PyTorch之九—cifar-10数据集图像分类--688IT编程网

PyTorch之九—cifar-10数据集图像分类

⽂章⽬录

这是⼀个⼊门级的图像分类模型，主要是⽤来熟悉模型。

⼀、准备

cifar-10 数据集

数据集下载：o.edu/~kriz/cifar.html

若是下载太慢了，

数据集组成：本数据及包含了6万张分辨率为32x32的图⽚，⼀共10类，分别为：飞机、汽车、鸟、猫、⿅、狗、青蛙、马、船、货车。

其中，5万张作为训练集，1万张作为测试集。

训练集被分为了5批训练和1批测试。每⼀批都是1万张。

测试集是从每⼀种分类中随机抽取出来1000张组成。

训练集从10个分类中各⾃随机抽取5000张，⼀共5万张。

数据的结构

⾸先看⼀下数据集解压之后的样⼦：

那些没有后缀名的⽂件，其实都是⽤python的cpickle库打包好的，这个库就是⽤来将python中的变量原封不动地存到本地使⽤的，当再次使⽤的时候，可以读取出来。

读取⽅式：

def unpickle(file):

import pickle

with open(file,'rb')as fo:

dict= pickle.load(fo, encoding='bytes')

return dict

如：label_names[0] == "airplane", label_names[1] == "automobile"

键值

data⼀个10000x3072的numpy数组，每⼀⾏都存储着⼀个分辨率为32x32的图⽚

labels标签，长度是10000，每个都是0-9的数字，是⼀个列表。其索引和data⾥⾯的索引相互对应。

可视化保存

import pickle as p

import numpy as np

import matplotlib.image as plimg

from PIL import Image

def load_CIFAR_batch(filename):

with open(filename,'rb')as f:

datadict = p.load(f, encoding='latin1')

X = datadict['data']

Y = datadict['labels']

X = X.reshape(10000,3,32,32)

Y = np.array(Y)

return X, Y

def load_CIFAR_Labels(filename):

with open(filename,'rb')as f:

lines =[x for x adlines()]

print(lines)

if __name__ =="__main__":

load_CIFAR_Labels("E:/pytorch_AI/data/a")

imgX, imgY = load_CIFAR_batch("E:/pytorch_AI/data/cifar-10-batches-py/data_batch_1")

print(imgX.shape)

print("正在保存图⽚:")

for i in range(30):

imgs = imgX[i]# 遍历获取图像

i0 = Image.fromarray(imgs[0])# 通道1

i1 = Image.fromarray(imgs[1])# 通道2

i2 = Image.fromarray(imgs[2])# 通道3

img = ("RGB",(i0, i1, i2))# 从数据，合并三个通道，⽣成image对象

name ="img"+str(i)+'.png'

img.save("./data/cifar-10-batches-py_img/"+ name,"png")# ⽂件夹下是RGB融合后的图像for j in range(imgs.shape[0]):

img = imgs[j]

name ="img"+str(i)+str(j)+".png"

print("正在保存图⽚"+ name)

plimg.imsave("./data/cifar-10-batches-py_img_0/"+ name, img)# ⽂件夹下是RGB分离的图像print("保存完毕.")

查看数据结构

def unpickle(file):

import pickle

with open(file,'rb')as fo:

dict= pickle.load(fo, encoding='bytes')

return dict

if __name__ =='__main__':

file="E:/pytorch_AI/data/cifar-10-batches-py/data_batch_1"

dict= unpickle(file)

dict_keys =dict.keys()

print("dict_keys: \t",dict_keys)# dict.keys()

print("dict[b'batch_label']:\t",dict[b'batch_label'])# key=b'batch_label'的内容print("dict[b'labels']_type:\t

",type(dict[b'labels']))# key=b'labels'的数据类型print("dict[b'filenames'][0]:\t",dict[b'filenames'][0])# 查看第⼀个图⽚名

print("dict[b'data'].shape():\t",dict[b'data'].shape)# 查看 b'data' 的数据形状print("dict[b'data'][0] :\t",dict[b'data'][0])# 查看 b'data'的第⼀个数据

输出：

dict_keys: dict_keys([b'batch_label', b'labels', b'data', b'filenames'])

dict[b'batch_label']: b'training batch 1 of 5'

dict[b'labels']_type:<class'list'>

dict[b'filenames'][0]: b'leptodactylus_pentadactylus_s_000004.png'

dict[b'data'].shape():(10000,3072)

dict[b'data'][0]:[1408472]

读取并保存所有数据为png图⽚，完整代码：

cifar2image.py

# !/usr/bin/env python

# -*- coding: utf-8 -*-

import pickle as p

import pickle

import os

import numpy as np

from PIL import Image

def load_CIFAR_batch(filename):

with open(filename,'rb')as f:

datadict = p.load(f, encoding='latin1')

X = datadict['data']

Y = datadict['labels']

X = X.reshape(10000,3,32,32)

Y = np.array(Y)

return X, Y

def load_CIFAR_Labels(filename):

import pickle

with open(filename,'rb')as fo:

dict2idx = pickle.load(fo, encoding='bytes')

return dict2idx

# 所有图⽚放置⼀个⽂件夹内

def main(root_path, files,save_path):

for file in files:

temp_path = os.path.join(root_path ,file)

images, labels = load_CIFAR_batch(temp_path)

print(images.shape)# (10000, 3, 32, 32)

image_num = images.shape[0]# 图⽚总数

for i in range(image_num):

imgs = images[i]

label = labels[i]

label_name = dict2idx[b'label_names'][label].decode()

i0 = Image.fromarray(imgs[0])# 通道1

i1 = Image.fromarray(imgs[1])# 通道2

i2 = Image.fromarray(imgs[2])# 通道3

img = ("RGB",(i0, i1, i2))# 合并三个通道，⽣成image对象

name =str(label_name)+"_"+str(file)+"_"+str(i)+'.png'

img.save(os.path.join(save_path , name),"png")

print("done!.")

# 每类图⽚放置⼀个⽂件夹内

def main2(root_path, files,save_path):

for file in files:

temp_path = os.path.join(root_path ,file)

images, labels = load_CIFAR_batch(temp_path)

print(images.shape)# (10000, 3, 32, 32)

image_num = images.shape[0]# 图⽚总数

for i in range(image_num):

imgs = images[i]

label = labels[i]

label_name = dict2idx[b'label_names'][label].decode()

i0 = Image.fromarray(imgs[0])# 通道1

i1 = Image.fromarray(imgs[1])# 通道2

i2 = Image.fromarray(imgs[2])# 通道3

img = ("RGB",(i0, i1, i2))# 合并三个通道，⽣成image对象

save_path_temp = os.path.join(save_path,label_name)

if not ists(save_path_temp):

os.mkdir(save_path_temp)

name =str(file)+"_"+str(i)+'.png'

img.save(os.path.join(save_path_temp,name),"png")

print("done!.")

if __name__ =="__main__":

root_path = r"E:\data\cifar-10-batches-py"

save_path = r"E:\data\all_class"

files =["data_batch_1","data_batch_2","data_batch_3","data_batch_4","data_batch_5"] dict2idx = load_CIFAR_Labels(os.path.join(root_path,"a"))

# main(root_path, files,save_path) # 所有图⽚放置⼀个⽂件夹内

main2(root_path, files,save_path)# 每类图⽚放置⼀个⽂件夹内

688IT编程网

PyTorch之九—cifar-10数据集图像分类

发表评论

推荐文章

java正则表达式选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符回溯引用和前后查匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式选择题

工龄小数点提取

非零金额正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

688IT编程网

PyTorch之九—cifar-10数据集图像分类

发表评论

推荐文章

java正则表达式 选择题

一种基于正则表达式的DBC文件解析及报文分析方法[发明专利]

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

热门文章

利用正则表达式实现文本数据提取与处理

正则表达式零宽断言详解

文本匹配规则

excel中使用正则

1-31正则表达式

anki之高级筛选

BUAA_OO_2021_第一单元总结

insert语句递增写法

sublime text 3在行前插入递增数字序号的方法

字符串只允许数字和英文的正则

powerbuilder 正则表达式

Shell脚本编写的高级技巧利用正则表达式进行字符串匹配

JAVA正则表达式的三种模式:贪婪,勉强和占有的讨论

go regexp匹配规则

oracle regexp_substr 实现原理

基本的元字符 回溯引用和前后查 匹配模式

elasticsearch query dsl正则

oracle sql正则表达式

GA-设置目标

仅匹配全角片假名的正则表达式

最新文章

java正则表达式 选择题

工龄小数点提取

非零金额 正则表达式

提取文本中数字的函数

vue数字相加小数点变长-概述说明以及解释

vue validate 正则验证小数长度

标签列表

java正则表达式选择题

非零金额正则表达式

基本的元字符回溯引用和前后查匹配模式

java正则表达式选择题

非零金额正则表达式