PYQT5+Pytorch的猫狗分类(从数据集制作-⽹络模型搭建和训
练-界⾯演⽰)
1.猫狗数据集制作
利⽤python爬⾍爬取⽹上的猫狗图⽚,有关python爬取图⽚参考了这篇⽂章
blog.csdn/zhangjunp3/article/details/79665750
有关python爬取图⽚代码如下
1# 导⼊需要的库
2import requests
3import os
4import json
5
6
7# 爬取百度图⽚,解析页⾯的函数
8def getManyPages(keyword, pages):
9'''
10参数keyword:要下载的影像关键词
11参数pages:需要下载的页⾯数
12'''
13 params = []
14
15for i in range(30, 30 * pages + 30, 30):
16 params.append({
17'tn': 'resultjson_com',
18'ipn': 'rj',
19'ct': 201326592,
20'is': '',
21'fp': 'result',
22'queryWord': keyword,
23'cl': 2,
24'lm': -1,
25'ie': 'utf-8',
26'oe': 'utf-8',
27'adpicid': '',
28'st': -1,
29'z': '',
30'ic': 0,
31'word': keyword,
32's': '',
33'se': '',
34'tab': '',
35'width': '',
36'height': '',
37'face': 0,
38'istype': 2,
39'qc': '',
40'nc': 1,
41'fr': '',
42'pn': i,
43'rn': 30,
44'gsm': '1e',
45'1488942260214': ''
46 })
47 url = 'image.baidu/search/acjson'
resized48 urls = []
49for i in params:
50try:
51 urls.(url, params=i).json().get('data'))
52except json.decoder.JSONDecodeError:
53print("解析出错")
54return urls
55
56
57# 下载图⽚并保存
58def getImg(dataList, localPath):
59'''
60参数datallist:下载图⽚的地址集
61参数localPath:保存下载图⽚的路径
62'''
63if not ists(localPath): # 判断是否存在保存路径,如果不存在就创建
64 os.mkdir(localPath)
65 x = 0
66for list in dataList:
67for i in list:
('thumbURL') != None:
69print('正在下载:%s' % i.get('thumbURL'))
70 ir = (i.get('thumbURL'))
71 open(localPath + '%d.jpg' % x, 'wb').t)
72 x += 1
73else:
74print('图⽚链接不存在')
75
76
77# 根据关键词⽪卡丘来下载图⽚
78if__name__ == '__main__':
79 dataList = getManyPages('狗', 40) # 参数1:关键字,参数2:要下载的页数
80 getImg(dataList, './dataset/dog/') # 参数2:指定保存的路径
在这⾥与原⽂章不同之处是加了这段代码。
1for i in params:
2try:
3 urls.(url, params=i).json().get('data'))
4except json.decoder.JSONDecodeError:
5print("解析出错")
6return urls
因为我发现若下载页数过多会报错,导致下载失败,插⼊try except可以解决这个问题。
这⾥猫和狗的图⽚我各爬取了40页,也就是1200张图⽚左右,最后剔除重复及模糊错误图⽚还剩800张左右。
因为⽹络训练和测试的原因还需要将我们的数据集分为训练集和测试集。这⾥需要编写⼀个分离脚本split_data.py。 1import os
2
3from shutil import copy
4
5import random
6
7
8
9
10
11def mkfile(file):
12
13if not ists(file):
14
15 os.makedirs(file)
16
17
18
19
20
21 file = 'data/dataset'
22
23 flower_class = [cla for cla in os.listdir(file) if".txt"not in cla]
24
25 mkfile('data/train')
26
27for cla in flower_class:
28
29 mkfile('data/train/'+cla)
30
31
32
33 mkfile('data/val')
34
35for cla in flower_class:
36
37 mkfile('data/val/'+cla)
38
39
40
41 split_rate = 0.1
42
43for cla in flower_class:
44
45 cla_path = file + '/' + cla + '/'
46
47 images = os.listdir(cla_path)
48
49 num = len(images)
50
51 eval_index = random.sample(images, k=int(num*split_rate))
52
53for index, image in enumerate(images):
54
55if image in eval_index:
56
57 image_path = cla_path + image
58
59 new_path = 'data/val/' + cla
60
61 copy(image_path, new_path)
62
63else:
64
65 image_path = cla_path + image
66
67 new_path = 'data/train/' + cla
68
69 copy(image_path, new_path)
70
71print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar
72
73print()
74
75
76
77print("processing done!")
⾸先我们要把数据集⽂件夹⽬录设置好。⽂件夹⽬录为data/dataset/cat dog。刚刚爬取的图⽚就放在cat 和 dog ⽂件夹内。然后在data⽂件夹⽬录下使⽤命令⾏窗⼝使⽤split_data.py脚本,按9:1的⼤⼩分离训练集和测试集。
这样就分离好了,可以看到我们的数据集分成了训练集和测试集,到此我们第⼀步制作数据集的⼯作就⼤功告成啦。
2.基于pytorch的⽹络模型的搭建及训练
这部分就是卷积神经⽹络分类的模型和训练了,采⽤的是pytorch框架,基本代码都差不多,这⾥就直接上代码了。⾸先是⽹络模型搭建,这⾥采⽤的经典的LeNet⽹络。
1import torch
as nn
functional as F
4from PIL import Image
5
6
7class LeNet(nn.Module):
8def__init__(self):
9 super(LeNet, self).__init__()
10 v1 = nn.Conv2d(3, 16, 5)
11 self.pool1 = nn.MaxPool2d(2, 2)
12 v2 = nn.Conv2d(16, 32, 5)
13 self.pool2 = nn.MaxPool2d(2, 2)
14 self.fc1 = nn.Linear(32*53*53, 120)
15 self.fc2 = nn.Linear(120, 84)
16 self.fc3 = nn.Linear(84, 2)
17
18def forward(self, x):
19 x = F.v1(x)) # input(3, 32, 32) output(16, 28, 28)
20
21 x = self.pool1(x) # output(16, 14, 14)
22 x = F.v2(x)) # output(32, 10, 10)
23
24 x = self.pool2(x) # output(32, 5, 5)
25 x = torch.flatten(x,start_dim=1) # output(32*5*5)
26 x = F.relu(self.fc1(x)) # output(120)
27# out_put.append(x)
28 x = F.relu(self.fc2(x)) # output(84)
29 x = self.fc3(x) # output(10)
30# out_put.append(x)
31return x
其次是⽹络的训练
1import torch
2import torchvision
3import os
ansforms as transforms
5import torchvision.datasets as datasets
6import torch.optim as optim
7from alexnet_model import AlexNet
8from model import LeNet
as nn
10
11
12 device = torch.device('cuda:0'if torch.cuda.is_available() else'cpu')
13
14 data_tranform ={
15'train':transforms.Compose(
16 [transforms.RandomResizedCrop(224),
17 transforms.ToTensor(),
18 transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]
19 ),
20'val':transforms.Compose(
21 [transforms.Resize((224, 224)),
22 transforms.ToTensor(),
23 transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]
24 )
25 }
26
27 data_root = os.getcwd()
28
29 image_path = data_root + './data'
30
31 train_dataset = datasets.ImageFolder(root=image_path+'/train',transform=data_tranform['train'])
32 val_dataset = datasets.ImageFolder(root=image_path + './val',transform=data_tranform['val'])
33
34 category_list = train_dataset.class_to_idx
35 cla_dict = dict((value,key) for key,value in category_list.items() )
36
37 trainloader = torch.utils.data.DataLoader(train_dataset,batch_size=32,shuffle=True,num_workers=0)
38 valloader = torch.utils.data.DataLoader(val_dataset,batch_size=16,shuffle=True,num_workers=0)
39
40#model = AlexNet(num_classes=2,init_weights=True)
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论