Pytorch构建迁移学习⽹络——以VGG16,ResNet18和
MobileNetV2模型为例
本次迁移学习训练的是865种鱼的分类,使⽤的是WildFish数据集:
提取码:a9pl
导⼊⼀些包:
import torch
as nn
import torch.optim as optim
import torchvision
ansforms as transforms
import numpy as np
from torchvision import datasets, models, transforms
import os
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
把数据集分成Training和Testing两个部分:
# 把数据变成可读取的数据
# 数据集中提供了Training和Testing两部分,但是以5:5分配,这⾥使⽤8:2的⽅式重新分配
tb1 = pd.read_table('./', sep='', names=['path', 'label'])
tb1['path'] = r'E:\data\wildfish\\' + tb1['path']
tb2 = pd.read_table('./', sep='', names=['path', 'label'])
tb2['path'] = r'E:\data\wildfish\\' + tb2['path']
# 把两个数据集合并成⼀个数据集
tb = pd.concat([tb1, tb2], sort=True).reset_index(drop=True)
tb = tb.loc[:, ['path', 'label']]
# 序号的尾号为9和0记为Testing,其余的记为Training
train_rows = [i for i in range(tb.shape[0]) if (i % 10 != 9 and i % 10 != 0)]
test_rows = [i for i in range(tb.shape[0]) if (i % 10 == 9 or i % 10 == 0)]
train_data = tb.iloc[train_rows]
test_data = tb.iloc[test_rows]
# 存储数据集
_csv(r'./', sep=' ', header=None, index=False)
_csv(r'./', sep=' ', header=None, index=False)
pytorch最后可读取的图⽚名称(以绝对路径显⽰)和类别名称如下图所⽰:
定义⼀些超参数:
# 定义是否使⽤GPU
device = torch.device("cuda"if torch.cuda.is_available() else"cpu")
EPOCH = 10 #遍历数据集次数
pre_epoch = 0 # 定义已经遍历数据集的次数
BATCH_SIZE = 128 #批处理尺⼨(batch_size)
LR = 0.0001 #学习率
对数据做预处理
# 准备数据集并预处理
transform_train = transforms.Compose([
transforms.Resize((150, 150)),
transforms.RandomHorizontalFlip(0.5), # 图像⼀半的概率翻转,⼀半的概率不翻转
transforms.RandomVerticalFlip(0.5), # 竖直翻转
transforms.RandomRotation(30),
transforms.RandomCrop(128, padding=4),
# transforms.ColorJitter(brightness=0.5),
# transforms.ColorJitter(contrast=0),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), #R,G,B每层的归⼀化⽤到的均值和⽅差
])
transform_test = transforms.Compose([
transforms.Resize((128, 128)), # 调整图像⼤⼩
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
将数据放到TrainLoader和TestLoader中
class MyDataset(Dataset):
def__init__(self, txt_path, transform = None, target_transform = None):
fh = open(txt_path, 'r', encoding='utf-8')
imgs = []
for line in fh:
line = line.rstrip()
words = line.split()
imgs.append((words[0], int(words[1])))
self.imgs = imgs
self.target_transform = target_transform
def__getitem__(self, index):
fn, label = self.imgs[index]
img = Image.open(fn).convert('RGB')
ansform is not None:
img = ansform(img)
return img, label
def__len__(self):
return len(self.imgs)
train_datasets = MyDataset(r'./', transform=transform_train)
test_datasets = MyDataset(r'./', transform=transform_test)
# 由于我使⽤的是Win10系统,num_workers只能设置为0,其他系统可以调⼤此参数,提⾼训练速度
trainloader = torch.utils.data.DataLoader(train_datasets, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) testloader = torch.utils.data.DataLoader(test_datasets, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
查看图⽚的代码,不执⾏不会影响后续的训练
# 查看图⽚
to_pil_image = transforms.ToPILImage()
cnt = 0
for image,label in trainloader:
if cnt>=3: # 只显⽰3张图⽚
break
print(label) # 显⽰label
img = image[0] # plt.imshow()只能接受3-D Tensor,所以也要⽤image[0]消去batch那⼀维
img = img.numpy() # FloatTensor转为ndarray
img = np.transpose(img, (1,2,0)) # 把channel那⼀维放到最后
# 显⽰图⽚
plt.imshow(img)
plt.show()
cnt += 1
调⽤VGG16的预训练模型
class VGGNet(nn.Module):
def__init__(self, num_classes=685): # num_classes,此处为⼆分类值为2
super(VGGNet, self).__init__()
net = models.vgg16(pretrained=True) # 从预训练模型加载VGG16⽹络参数
net.classifier = nn.Sequential() # 将分类层置空,下⾯将改变我们的分类层
self.features = net # 保留VGG16的特征层
self.classifier = nn.Sequential( # 定义⾃⼰的分类层
nn.Linear(512 * 7 * 7, 1024), #512 * 7 * 7不能改变,由VGG16⽹络决定的,第⼆个参数为神经元个数可以微调 nn.ReLU(True),
nn.Dropout(0.3),
nn.Linear(1024, 1024),
nn.ReLU(True),
nn.Dropout(0.3),
nn.Linear(1024, num_classes),
)
def forward(self, x):
x = self.features(x) # 预训练提供的提取特征的部分
x = x.view(x.size(0), -1)
x = self.classifier(x) # ⾃定义的分类部分
return x
net = VGGNet().to(device)
调⽤ResNet18的预训练模型
class ResNet(nn.Module):
def__init__(self, num_classes=685): # num_classes,此处为⼆分类值为2
super(ResNet, self).__init__()
net = snet18(pretrained=True) # 从预训练模型加载VGG16⽹络参数
net.classifier = nn.Sequential() # 将分类层置空,下⾯将改变我们的分类层
self.features = net # 保留VGG16的特征层
self.classifier = nn.Sequential( # 定义⾃⼰的分类层
nn.Linear(1000, 1000), #1000不能改变,由VGG16⽹络决定的,第⼆个参数为神经元个数可以微调
nn.ReLU(True),
nn.Dropout(0.5),
# nn.Linear(1024, 1024),
# nn.ReLU(True),
# nn.Dropout(0.3),
nn.Linear(1000, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
net = ResNet().to(device)
MobileNet V2的预训练模型
class MobileNet(nn.Module):
def__init__(self, num_classes=685): # num_classes,此处为⼆分类值为2
super(MobileNet, self).__init__()
net = bilenet_v2(pretrained=True) # 从预训练模型加载VGG16⽹络参数
net.classifier = nn.Sequential() # 将分类层置空,下⾯将改变我们的分类层
self.features = net # 保留VGG16的特征层
self.classifier = nn.Sequential( # 定义⾃⼰的分类层
nn.Linear(1280, 1000), #512 * 7 * 7不能改变,由VGG16⽹络决定的,第⼆个参数为神经元个数可以微调
nn.ReLU(True),
nn.Dropout(0.5),
# nn.Linear(1024, 1024),
# nn.ReLU(True),
# nn.Dropout(0.3),
nn.Linear(1000, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
net = MobileNet().to(device)
选择优化器和Loss
optimizer = optim.Adam(net.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
<(device=device)
定义两个函数,⼀个可以冻住features层,只训练FC层,另⼀个把features层解冻,训练所有参数from collections.abc import Iterable
def set_freeze_by_names(model, layer_names, freeze=True):
if not isinstance(layer_names, Iterable):
layer_names = [layer_names]
for name, child in model.named_children():
if name not in layer_names:
continue
for param in child.parameters():
def freeze_by_names(model, layer_names):
set_freeze_by_names(model, layer_names, True)
def unfreeze_by_names(model, layer_names):
set_freeze_by_names(model, layer_names, False)
truncated模型用什么软件# 冻结 features层
freeze_by_names(net, ('features'))
# 解冻features层
unfreeze_by_names(net, ('features'))
定义两个数组,为了存储预测的y值和真实的y值
y_predict = []
y_true = []
# 我不导⼊这个包会报错,
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
训练过程
# 训练
print("Start Training!") # 定义遍历数据集的次数
for epoch in range(pre_epoch, EPOCH):
print('\nEpoch: %d' % (epoch + 1))
sum_loss = 0.0
correct = 0.0
total = 0.0
for i, data in enumerate(trainloader, 0):
# 准备数据
length = len(trainloader)
inputs, labels = data
inputs, labels = (device), (device)
<_grad()
# forward + backward
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 每训练1个batch打印⼀次loss和准确率
sum_loss += loss.item()
# 使⽤Top5分类
maxk = max((1,5))
label_resize = labels.view(-1, 1)
_, predicted = pk(maxk, 1, True, True)
total += labels.size(0)
correct += torch.eq(predicted, label_resize).cpu().sum().float().item() print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total)) # 每训练完⼀个epoch测试⼀下准确率
print("Waiting Test!")
_grad():
correct = 0
total = 0
for data in testloader:
net.eval()
images, labels = data
images, labels = (device), (device)
outputs = net(images)
# 取得分最⾼的那个类 (outputs.data的索引号)
maxk = max((1,5))
label_resize = labels.view(-1, 1)
_, predicted = pk(maxk, 1, True, True)
total += labels.size(0)
correct += torch.eq(predicted, label_resize).cpu().sum().float().item()
y_predict.append(predicted)
y_true.append(labels)
print('测试分类准确率为:%.3f%%' % (100 * correct / total))
acc = 100. * correct / total
print("Training Finished, TotalEPOCH=%d" % EPOCH)
保存模型
torch.save(net, './model/mobileNet freeze.pth')
加载模型
net = torch.load('./model/VGG16-2 freeze.pth')
训练过程
我是先把特征层冻住训练10个epoch,再解冻训练20个epoch,各个模型在Training上的准确率基本在98%左右,在Testing上的准确率在88%左右。
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论