【图像分类】实战——AlexNet实现图像分类(pytroch)摘要
AlexNet是由Alex Krizhevsky 提出的⾸个应⽤于图像分类的深层卷积神经⽹络,该⽹络在2012年ILSVRC(ImageNet Large Scale Visual Recognition Competition)图像分类竞赛中以15.3%的top-5测试错误率赢得第⼀名。也是在那年之后,更多的更深的神经⽹络被提出,⽐如优秀的vgg,GoogLeNet。 这对于传统的机器学习分类算法⽽⾔,已经相当的出⾊。⽹络结构如下:
对于AlexNet的详细介绍可以参考我的博⽂:
下⾯讲讲如何使⽤AlexNet实现猫狗分类,由于本⽂使⽤的Loss函数是CrossEntropyLoss,所以只需更改类别的个数就可以实现多分类。
新建项⽬
新建⼀个图像分类的项⽬,data⾥⾯放数据集,dataset⽂件夹中⾃定义数据的读取⽅法,这次我不采⽤默认的读取⽅式,太简单没啥意思。然后再新建train.py和test.py
在项⽬的根⽬录新建train.py,然后在⾥⾯写训练代码。
导⼊所需要的库
import torch.optim as optim
import torch
as nn
parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
ansforms as transforms
from dataset.dataset import DogCat
from torch.autograd import Variable
dels import alexnet
设置全局参数
设置BatchSize、学习率和epochs,判断是否有cuda环境,如果没有设置为cpu。
# 设置全局参数
modellr = 1e-4
BATCH_SIZE = 64
EPOCHS = 20
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
图像预处理
在做图像与处理时,train数据集的transform和验证集的transform分开做,train的图像处理出了resize和归⼀化之外,还可以设置图像的增强,⽐如旋转、随机擦除等⼀系列的操作,验证集则不需要做图像增强,另外不要盲⽬的做增强,不合理的增强⼿段很可能会带来负作⽤,甚⾄出现Loss不收敛的情况。
# 数据预处理
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
transform_test = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
读取数据
然后我们在dataset⽂件夹下⾯新建 __init__.py和dataset.py,在dataset.py⽂件夹写⼊下⾯的代码:
# coding:utf8
import os
from PIL import Image
from torch.utils import data
from torchvision import transforms as T
del_selection import train_test_split
class DogCat(data.Dataset):
def __init__(self, root, transforms=None, train=True, test=False):
"""
主要⽬标:获取所有图⽚的地址,并根据训练,验证,测试划分数据
"""
imgs = [os.path.join(root, img) for img in os.listdir(root)]
st:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2].split('/')[-1]))
else:
imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2]))
st:
self.imgs = imgs
else:
trainval_files, val_files = train_test_split(imgs, test_size=0.3, random_state=42)
if train:
self.imgs = trainval_files
else:
self.imgs = val_files
def __getitem__(self, index):
"""
⼀次返回⼀张图⽚的数据
"""
img_path = self.imgs[index]
st:
label =-1
else:
label = 1 if 'dog' in img_path.split('/')[-1] else 0
data = Image.open(img_path)
data = ansforms(data)
return data, label
def __len__(self):
return len(self.imgs)
然后我们在train.py调⽤DogCat读取数据
dataset_train = DogCat('data/train', transforms=transform, train=True)
dataset_test = DogCat("data/train", transforms=transform_test, train=False)
# 读取数据
eval是做什么的print(dataset_train.imgs)
# 导⼊数据
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)
设置模型
使⽤CrossEntropyLoss作为loss,模型采⽤alexnet,选⽤预训练模型。更改全连接层,将最后⼀层类别设置为2,然后将模型放到DEVICE。优化器选⽤Adam。
# 实例化模型并且移动到GPU
criterion = nn.CrossEntropyLoss()
model_ft = alexnet(pretrained=True)
model_ft.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 2),
)
(DEVICE)
# 选择简单暴⼒的Adam优化器,学习率调低
optimizer = optim.Adam(model_ft.parameters(), lr=modellr)
def adjust_learning_rate(optimizer, epoch):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
modellrnew = modellr * (0.1 ** (epoch // 50))
print("lr:", modellrnew)
for param_group in optimizer.param_groups:
param_group['lr'] = modellrnew
设置训练和验证
# 定义训练过程
def train(model, device, train_loader, optimizer, epoch):
sum_loss = 0
total_num = len(train_loader.dataset)
print(total_num, len(train_loader))
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data).to(device), Variable(target).to(device)
output = model(data)
loss = criterion(output, target)
<_grad()
loss.backward()
optimizer.step()
print_loss = loss.data.item()
sum_loss += print_loss
if (batch_idx + 1) % 50 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader), loss.item()))
ave_loss = sum_loss / len(train_loader)
print('epoch:{},loss:{}'.format(epoch, ave_loss))
# 验证过程
def val(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
total_num = len(test_loader.dataset)
print(total_num, len(test_loader))
_grad():
for data, target in test_loader:
data, target = Variable(data).to(device), Variable(target).to(device)
output = model(data)
loss = criterion(output, target)
_, pred = torch.max(output.data, 1)
correct += torch.sum(pred == target)
print_loss = loss.data.item()
test_loss += print_loss
correct = correct.data.item()
acc = correct / total_num
avgloss = test_loss / len(test_loader)
print('\nVal set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
avgloss, correct, len(test_loader.dataset), 100 * acc))
# 训练
for epoch in range(1, EPOCHS + 1):
adjust_learning_rate(optimizer, epoch)
train(model_ft, DEVICE, train_loader, optimizer, epoch)
val(model_ft, DEVICE, test_loader)
torch.save(model_ft, 'model.pth')
完成上⾯的代码后就可以开始训练,点击run开始训练,如下图:
由于我们使⽤了预训练模型,所以收敛速度很快。
测试
我介绍两种常⽤的测试⽅式,第⼀种是通⽤的,通过⾃⼰⼿动加载数据集然后做预测,具体操作如下:
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论