Pytorch实现基于卷积神经⽹络的⾯部表情识别(详细步骤)
⽂章⽬录
⼀、项⽬背景
数据集cnn_train.csv包含⼈类⾯部表情的图⽚的label和feature。在这⾥,⾯部表情识别相当于⼀个分类问题,共有7个类别。
其中label包括7种类型表情:
⼀共有28709个label,说明包含了28709张表情包嘿嘿。
每⼀⾏就是⼀张表情包48*48=2304个像素,相当于4848个灰度值(intensity)(0为⿊, 255为⽩)
⼆、数据处理
1、标签与特征分离
这⼀步为了后⾯⽅便读取数据集,对原数据进⾏处理,分离后分别保存为cnn_label.csv和cnn_data.csv.
# cnn_feature_label.py 将label和像素数据分离
import pandas as pd
path = 'cnn_train.csv'# 原数据路径
# 读取数据
df = pd.read_csv(path)
# 提取label数据
df_y = df[['label']]
# 提取feature(即像素)数据
df_x = df[['feature']]
# 将label写⼊label.csv
_csv('cnn_label.csv', index=False, header=False)
# 将feature数据写⼊data.csv
_csv('cnn_data.csv', index=False, header=False)
执⾏之后⽣成结果⽂件:
2、数据可视化
完成与标签分离后,下⼀步我们对特征进⼀步处理,也就是将每个数据⾏的2304个像素值合成每张48*48的表情图。
# face_view.py 数据可视化
import cv2
import numpy as np
# 指定存放图⽚的路径
path = './/face'
# 读取像素数据
data = np.loadtxt('cnn_data.csv')
# 按⾏取数据
for i in range(data.shape[0]):
face_array = data[i, :].reshape((48, 48)) # reshape
cv2.imwrite(path + '//' + '{}.jpg'.format(i), face_array) # 写图⽚
这段代码将写⼊28709张表情图,执⾏需要⼀⼩段时间。
结果如下:
3、训练集和测试集
第⼀步,我们要训练模型,需要划分⼀下训练集和验证集。⼀共有28709张图⽚,我取前24000张图⽚作为训练集,其他图⽚作为验证集。新建⽂件夹cnn_train和cnn_val,将0.jpg到23999.jpg放进⽂件夹cnn_train,将其他图⽚放进⽂件夹cnn_val。
第⼆步,对每张图⽚标记属于哪⼀个类别,存放在dataset.csv中,分别在刚刚训练集和测试集执⾏标记任务。
# cnn_picture_label.py 表情图⽚和类别标注
import os
import pandas as pd
def data_label(path):
# 读取label⽂件
df_label = pd.read_csv('cnn_label.csv', header=None)
# 查看该⽂件夹下所有⽂件
files_dir = os.listdir(path)
# ⽤于存放图⽚名
path_list = []
# ⽤于存放图⽚对应的label
label_list = []
# 遍历该⽂件夹下的所有⽂件
for file_dir in files_dir:
# 如果某⽂件是图⽚,则将其⽂件名以及对应的label取出,分别放⼊path_list和label_list这两个列表中
if os.path.splitext(file_dir)[1] == ".jpg":
path_list.append(file_dir)
index = int(os.path.splitext(file_dir)[0])
label_list.append(df_label.iat[index, 0])
# 将两个列表写进dataset.csv⽂件
path_s = pd.Series(path_list)
label_s = pd.Series(label_list)
df = pd.DataFrame()
df['path'] = path_s
df['label'] = label_s
<_csv(path + '\\dataset.csv', index=False, header=False)
def main():
# 指定⽂件夹路径
train_path = 'D:\\PyCharm_Project\\deep learning\\model\\cnn_train'
val_path = 'D:\\PyCharm_Project\\deep learning\\model\\cnn_val'
data_label(train_path)
data_label(val_path)
if__name__ == "__main__":
main()
完成之后如图:
第三步,重写Dataset类,它是Pytorch中图像数据集加载的⼀个基类,源码如下,我们需要重写类来实现加载上⾯的图像数据集。
import bisect
import warnings
from torch._utils import _accumulate
from torch import randperm
class Dataset(object):
r"""An abstract class representing a :class:`Dataset`.
All datasets that represent a map from keys to data samples should subclass
it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
data sample for a given key. Subclasses could also optionally overwrite
:meth:`__len__`, which is expected to return the size of the dataset by many
:class:`~torch.utils.data.Sampler` implementations and the default options
of :class:`~torch.utils.data.DataLoader`.
.
. note::
:class:`~torch.utils.data.DataLoader` by default constructs a index
sampler that yields integral indices. To make it work with a map-style
numpy最详细教程dataset with non-integral indices/keys, a custom sampler must be provided.
"""
def__getitem__(self, index):
raise NotImplementedError
def__add__(self, other):
return ConcatDataset([self, other])
# No `def __len__(self)` default?
# See NOTE [ Lack of Default `__len__` in Python Abstract Base Classes ]
# in pytorch/torch/utils/data/sampler.py
重写之后如下,⾃定义类名为FaceDataset:
class FaceDataset(data.Dataset):
# 初始化
def__init__(self, root):
super(FaceDataset, self).__init__()
< = root
df_path = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[0])
df_label = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[1])
self.path = np.array(df_path)[:, 0]
self.label = np.array(df_label)[:, 0]
# 读取某幅图⽚,item为索引号
def__getitem__(self, item):
# 图像数据⽤于训练,需为tensor类型,label⽤numpy或list均可
face = cv2. + '\\' + self.path[item])
# 读取单通道灰度图
face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
# 直⽅图均衡化
face_hist = cv2.equalizeHist(face_gray)
"""
像素值标准化
读出的数据是48X48的,⽽后续卷积神经⽹络中nn.Conv2d() API所接受的数据格式是(batch_size, channel, width, higth),
本次图⽚通道为1,因此我们要将48X48 reshape为1X48X48。
"""
face_normalized = shape(1, 48, 48) / 255.0
face_tensor = torch.from_numpy(face_normalized)
face_tensor = pe('torch.FloatTensor')
label = self.label[item]
return face_tensor, label
# 获取数据集样本个数
def__len__(self):
return self.path.shape[0]
到此,就实现了数据集加载的过程,下⾯准备使⽤这个类将数据喂给模型训练了。
三、模型搭建
这是Github上⾯部表情识别的⼀个开源项⽬的模型结构,我们使⽤model B搭建⽹络模型。使⽤RRelu(随机修正线性单元)作为激活函数。卷积神经⽹络模型如下:
class FaceCNN(nn.Module):
# 初始化⽹络结构
def__init__(self):
super(FaceCNN, self).__init__()
# 第⼀层卷积、池化
nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1), # 卷积层
nn.BatchNorm2d(num_features=64), # 归⼀化
nn.RReLU(inplace=True), # 激活函数
nn.MaxPool2d(kernel_size=2, stride=2), # 最⼤值池化
)
# 第⼆层卷积、池化
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=128),
nn.RReLU(inplace=True),
# output:(bitch_size, 128, 12 ,12)
nn.MaxPool2d(kernel_size=2, stride=2),
)
# 第三层卷积、池化
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=256),
nn.RReLU(inplace=True),
# output:(bitch_size, 256, 6 ,6)
nn.MaxPool2d(kernel_size=2, stride=2),
)
# 参数初始化
# 全连接层
self.fc = nn.Sequential(
nn.Dropout(p=0.2),
nn.Linear(in_features=256 * 6 * 6, out_features=4096),
nn.RReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=1024),
nn.RReLU(inplace=True),
nn.Linear(in_features=1024, out_features=256),
nn.RReLU(inplace=True),
nn.Linear(in_features=256, out_features=7),
)
# 前向传播
def forward(self, x):
x = v1(x)
x = v2(x)
x = v3(x)
# 数据扁平化
x = x.view(x.shape[0], -1)
y = self.fc(x)
return y
参数解析:
输⼊通道数in_channels,输出通道数(即卷积核的通道数)out_channels,卷积核⼤⼩kernel_size,步长stride,对称填0⾏列数padding。第⼀层卷积:input:(bitch_size, 1, 48, 48), output(bitch_size, 64, 24, 24)
第⼆层卷积:input:(bitch_size, 64, 24, 24), output(bitch_size, 128, 12, 12)
第三层卷积:input:(bitch_size, 128, 12, 12), output:(bitch_size, 256, 6, 6)
四、模型训练
损失函数使⽤交叉熵,优化器是随机梯度下降SGD,其中weight_decay为正则项系数,每轮训练打印损失值,每5轮训练打印准确率。def train(train_dataset, val_dataset, batch_size, epochs, learning_rate, wt_decay):
# 载⼊数据并分割batch
train_loader = data.DataLoader(train_dataset, batch_size)
# 构建模型
model = FaceCNN()
# 损失函数
loss_function = nn.CrossEntropyLoss()
# 优化器
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=wt_decay)
# 逐轮训练
for epoch in range(epochs):
# 记录损失值
loss_rate = 0
# scheduler.step() # 学习率衰减
for images, labels in train_loader:
# 梯度清零
<_grad()
# 前向传播
output = model.forward(images)
# 误差计算
loss_rate = loss_function(output, labels)
# 误差的反向传播
loss_rate.backward()
# 更新参数
optimizer.step()
# 打印每轮的损失
print('After {} epochs , the loss_rate is : '.format(epoch + 1), loss_rate.item())
if epoch % 5 == 0:
model.eval() # 模型评估
acc_train = validate(model, train_dataset, batch_size)
acc_val = validate(model, val_dataset, batch_size)
print('After {} epochs , the acc_train is : '.format(epoch + 1), acc_train)
print('After {} epochs , the acc_val is : '.format(epoch + 1), acc_val)
return model
五、完整代码
1"""
2CNN_face.py 基于卷积神经⽹络的⾯部表情识别(Pytorch实现)
3"""
4import torch
5import torch.utils.data as data
as nn
7import torch.optim as optim
8import numpy as np
9import pandas as pd
10import cv2
11
12
13# 参数初始化
14def gaussian_weights_init(m):
15 classname = m.__class__.__name__
16# 字符串查find,不到返回-1,不等-1即字符串中含有该字符
17if classname.find('Conv') != -1:
18 m.al_(0.0, 0.04)
19
20
21# 验证模型在验证集上的正确率
22def validate(model, dataset, batch_size):
23 val_loader = data.DataLoader(dataset, batch_size)
24 result, num = 0.0, 0
25for images, labels in val_loader:
26 pred = model.forward(images)
27 pred = np.argmax(pred.data.numpy(), axis=1)
28 labels = labels.data.numpy()
29 result += np.sum((pred == labels))
30 num += len(images)
31 acc = result / num
32return acc
33
34
35class FaceDataset(data.Dataset):
36# 初始化
37def__init__(self, root):
38 super(FaceDataset, self).__init__()
39 = root
40 df_path = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[0])
41 df_label = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[1])
42 self.path = np.array(df_path)[:, 0]
43 self.label = np.array(df_label)[:, 0]
44
45# 读取某幅图⽚,item为索引号
46def__getitem__(self, item):
47# 图像数据⽤于训练,需为tensor类型,label⽤numpy或list均可
48 face = cv2. + '\\' + self.path[item])
49# 读取单通道灰度图
50 face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
51# 直⽅图均衡化
52 face_hist = cv2.equalizeHist(face_gray)
53"""
54像素值标准化
55读出的数据是48X48的,⽽后续卷积神经⽹络中nn.Conv2d() API所接受的数据格式是(batch_size, channel, width, higth), 56本次图⽚通道为1,因此我们要将48X48 reshape为1X48X48。
57"""
58 face_normalized = shape(1, 48, 48) / 255.0
59 face_tensor = torch.from_numpy(face_normalized)
60 face_tensor = pe('torch.FloatTensor')
61 label = self.label[item]
62return face_tensor, label
63
64# 获取数据集样本个数
65def__len__(self):
66return self.path.shape[0]
67
68
69class FaceCNN(nn.Module):
70# 初始化⽹络结构
71def__init__(self):
72 super(FaceCNN, self).__init__()
73
74# 第⼀次卷积、池化
75 v1 = nn.Sequential(
76 nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1), # 卷积层
77 nn.BatchNorm2d(num_features=64), # 归⼀化
78 nn.RReLU(inplace=True), # 激活函数
79 nn.MaxPool2d(kernel_size=2, stride=2), # 最⼤值池化
80 )
81
82# 第⼆次卷积、池化
83 v2 = nn.Sequential(
84 nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
85 nn.BatchNorm2d(num_features=128),
86 nn.RReLU(inplace=True),
87 nn.MaxPool2d(kernel_size=2, stride=2),
88 )
89
90# 第三次卷积、池化
91 v3 = nn.Sequential(
92 nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
93 nn.BatchNorm2d(num_features=256),
94 nn.RReLU(inplace=True),
95 nn.MaxPool2d(kernel_size=2, stride=2),
96 )
97
98# 参数初始化
99 v1.apply(gaussian_weights_init)
100 v2.apply(gaussian_weights_init)
101 v3.apply(gaussian_weights_init)
102
103# 全连接层
104 self.fc = nn.Sequential(
105 nn.Dropout(p=0.2),
106 nn.Linear(in_features=256 * 6 * 6, out_features=4096),
107 nn.RReLU(inplace=True),
108 nn.Dropout(p=0.5),
109 nn.Linear(in_features=4096, out_features=1024),
110 nn.RReLU(inplace=True),
111 nn.Linear(in_features=1024, out_features=256),
112 nn.RReLU(inplace=True),
113 nn.Linear(in_features=256, out_features=7),
114 )
115
116# 前向传播
117def forward(self, x):
118 x = v1(x)
119 x = v2(x)
120 x = v3(x)
121# 数据扁平化
122 x = x.view(x.shape[0], -1)
123 y = self.fc(x)
124return y
125
126
127def train(train_dataset, val_dataset, batch_size, epochs, learning_rate, wt_decay):
128# 载⼊数据并分割batch
129 train_loader = data.DataLoader(train_dataset, batch_size)
130# 构建模型
131 model = FaceCNN()
132# 损失函数
133 loss_function = nn.CrossEntropyLoss()
134# 优化器
135 optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=wt_decay)
136# 学习率衰减
137# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.8)
138# 逐轮训练
139for epoch in range(epochs):
140# 记录损失值
141 loss_rate = 0
142# scheduler.step() # 学习率衰减
143 ain() # 模型训练
144for images, labels in train_loader:
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论