18.1:tensorflow分类模型mobilenetv2训练(数据增强,保存模
型,衰减。。。
从前有个⼩孩⼦在学习tensorflow,于是他买了⼏本tensorflow书籍,他发现各种书籍⾥⾯讲的⽰例都是使⽤mnist数据集。由于框架
已经封装好了,在使⽤数据时⽤⼀条命令就可以加载数据了,他也不知道这些数据是怎么被读取的。于是乎,他在训练⾃⼰的模型时碰到的
数据都是⼀张张图像,仍然⽆从下⼿(⼀脸懵逼)。除此之外,他还想加⼀些其他功能,如怎么动态调节学习率,怎么使⽤tensorboard查
看loss, accuracy, learningrate的变化,怎么显⽰训练过程中的图像,怎么可视化模型的结构,怎么保存模型。后来他把这些问题搞定
了,但是他并不满⾜,他还想使⽤保存的模型(model.ckpt)对每张新的图像进⾏预测,后来发现⾃⼰需要预测的图像太多了,⽐较耗时,然
后他想能不能使⽤批量数据⼀次输⼊多个同时预测。最后他还发现使⽤训练好的模型进⾏预测时还得重新定义⽹络结构(虽然也可使⽤.meta
不⽤重新定义⽹络结构,但是这种⽅法每次必须输⼊⼀个批次数据),于是后来发现了可以使⽤保存的模型⽣成⼀个pb⽂件,调⽤pb⽂件时
只需指定输⼊,输出节点⽽⽆需重新定义⽹络结构就可以预测图像。再后来他使⽤模型预测新图像时发现所有图像都预测为同⼀类,经过分
析他发现在使⽤batch normal时出现了问题,因为bn层在训练时和测试时不⼀样。。。
⼀ 、训练
1. 数据集,批量数据的读取,数据增强
def get_files(file_dir):
image_list, label_list = [], []
for label in os.listdir(file_dir):
for img in glob.glob(os.path.join(file_dir, label, "*.jpg")):
image_list.append(img)
label_list.append(int(label_dict[label]))
print('There are %d data' %(len(image_list)))
temp = np.array([image_list, label_list])
temp = anspose()
np.random.shuffle(temp)
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(i) for i in label_list]
return image_list, label_list
他发现数据集的标签都是⼀个字符串,在模型训练时label需要为0,1,2,的整数,因此他⾃⼰对标签及label做了⼀个映射。
他建⽴了⼀个,⾥⾯为每⼀⾏为⼀个标签对映射,分别为:daisy:0, dandelion:1, roses:2, sunflowers:3, tulips:4。为了把标签
和label对应起来他写了这⼏⾏代码:
label_dict, label_dict_res = {}, {}
# ⼿动指定⼀个从类别到label的映射关系
with open("", 'r') as f:
for line adlines():
folder, label = line.strip().split(':')[0], line.strip().split(':')[1]
label_dict[folder] = label
label_dict_res[label] = folder
print(label_dict)
他之前使⽤过caffe,他知道caffe可以直接读取图像也可以把图像转为lmbd再读。他发现tensorfow得先把数据转为tf-record⽂件再读
取,他嫌先转⽂件格式这种⽅法有点⿇烦(虽然在训练速度上和消耗资源上有优势),也想直接读取图像进⾏训练,于是后来他发现了
tf.image.random_flip_left_righ,tf.image.random_flip_up_down, tf.image.random_brightness, tf.image.random_contrast, tf.image.ran 这些函数。他还想⽤tensorboard查看训练过程中通过数据增强的图像是怎样的,于是他发现了tf.summary.image这个函数。最后他定义
了获取批量数据的函数:
def get_batch(image, label, image_W, image_H, batch_size, capacity):
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
# make an input queue
input_queue = tf.train.slice_input_producer([image, label], shuffle=False)
label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
image = tf.image.decode_jpeg(image_contents, channels=3)
# 数据增强
#image = size_image_with_pad(image, target_height=image_W, target_width=image_H)
image = size_images(image, (image_W, image_H))
# 随机左右翻转
image = tf.image.random_flip_left_right(image)
# 随机上下翻转
image = tf.image.random_flip_up_down(image)
# 随机设置图⽚的亮度
image = tf.image.random_brightness(image, max_delta=32/255.0)
# 随机设置图⽚的对⽐度
#image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
variable怎么记# 随机设置图⽚的⾊度
image = tf.image.random_hue(image, max_delta=0.05)
# 随机设置图⽚的饱和度
#image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
# 标准化,使图⽚的均值为0,⽅差为1
image = tf.image.per_image_standardization(image)
image_batch, label_batch = tf.train.batch([image, label],
batch_size= batch_size,
num_threads= 64,
capacity = capacity)
tf.summary.image("input_img", image_batch, max_outputs=5)
label_batch = tf.reshape(label_batch, [batch_size])
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch
2. mobilenetv2⽹络
他在⽹上到了⼀个mobilenetv2的⽹络,并建⽴了⼀个新的model.py⽂件,把该⽹络粘贴进去了(名字
为class MobileNetV2)。这⾥他只是⽤了mobilenetv2,他也可以在⽹上⼀个其他⽹络(resnet, vgg, inception, )前向传播的代码粘贴到这⾥,就可以训练其他的⽹络了。
#coding:utf-8
import tensorflow as tf
ib.slim as slim
ib.layers.python.layers import batch_norm
import tensorflow as tf
import tensorflow as tf
ib as tc
import numpy as np
import time
class MobileNetV1(object):
def __init__(self, is_training=True, input_size=224):
self.input_size = input_size
self.is_training = is_training
self.bn_params = {'is_training': self.is_training}
with tf.variable_scope('MobileNetV1'):
self._create_placeholders()
self._create_placeholders()
self._build_model()
def _create_placeholders(self):
self.input = tf.placeholder(dtype=tf.float32, shape=[None, self.input_size, self.input_size, 3])
def _build_model(self):
i = 0
with tf.variable_scope('init_conv'):
normalizer_alizer, normalizer_params=self.bn_params)
# 1
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv1 = tc.layers.separable_v1, num_outputs=None, kernel_size=3, depth_multiplier=1,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv1 = v2d(self.dconv1, 64, 1, normalizer_alizer, normalizer_params=self.bn_params) # 2
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv2 = tc.layers.separable_conv2d(self.pconv1, None, 3, 1, 2,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv2 = v2d(self.dconv2, 128, 1, normalizer_alizer, normalizer_params=self.bn_params) # 3
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv3 = tc.layers.separable_conv2d(self.pconv2, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv3 = v2d(self.dconv3, 128, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 4
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv4 = tc.layers.separable_conv2d(self.pconv3, None, 3, 1, 2,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv4 = v2d(self.dconv4, 256, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 5
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv5 = tc.layers.separable_conv2d(self.pconv4, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv5 = v2d(self.dconv5, 256, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 6
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv6 = tc.layers.separable_conv2d(self.pconv5, None, 3, 1, 2,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv6 = v2d(self.dconv6, 512, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 7_1
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv71 = tc.layers.separable_conv2d(self.pconv6, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv71 = v2d(self.dconv71, 512, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 7_2
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv72 = tc.layers.separable_conv2d(self.pconv71, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params)
self.pconv72 = v2d(self.dconv72, 512, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 7_3
# 7_3
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv73 = tc.layers.separable_conv2d(self.pconv72, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params) self.pconv73 = v2d(self.dconv73, 512, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 7_4
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv74 = tc.layers.separable_conv2d(self.pconv73, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params) self.pconv74 = v2d(self.dconv74, 512, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 7_5
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv75 = tc.layers.separable_conv2d(self.pconv74, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params) self.pconv75 = v2d(self.dconv75, 512, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 8
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv8 = tc.layers.separable_conv2d(self.pconv75, None, 3, 1, 2,
normalizer_alizer, normalizer_params=self.bn_params) self.pconv8 = v2d(self.dconv8, 1024, 1, normalizer_alizer,
normalizer_params=self.bn_params)
# 9
with tf.variable_scope('dconv_block{}'.format(i)):
i += 1
self.dconv9 = tc.layers.separable_conv2d(self.pconv8, None, 3, 1, 1,
normalizer_alizer, normalizer_params=self.bn_params) self.pconv9 = v2d(self.dconv9, 1024, 1, normalizer_alizer,
normalizer_params=self.bn_params)
with tf.variable_scope('global_max_pooling'):
self.pool = tc.layers.max_pool2d(self.pconv9, kernel_size=7, stride=1)
with tf.variable_scope('prediction'):
self.output = v2d(self.pool, 1000, 1, activation_fn=None)
class MobileNetV2(object):
def __init__(self, input, num_classes=1000, is_training=True):
self.input = input
self.num_classes = num_classes
self.is_training = is_training
self.bn_params = {'is_training': self.is_training}
with tf.variable_scope('MobileNetV2'):
self._build_model()
def _build_model(self):
self.i = 0
with tf.variable_scope('init_conv'):
output = v2d(self.input, 32, 3, 2,
normalizer_alizer, normalizer_params=self.bn_params)
# _shape())
self.output = self._inverted_bottleneck(output, 1, 16, 0)
self.output = self._inverted_bottleneck(self.output, 6, 24, 1)
self.output = self._inverted_bottleneck(self.output, 6, 24, 0)
self.output = self._inverted_bottleneck(self.output, 6, 32, 1)
self.output = self._inverted_bottleneck(self.output, 6, 32, 0)
self.output = self._inverted_bottleneck(self.output, 6, 32, 0)
self.output = self._inverted_bottleneck(self.output, 6, 64, 1)
self.output = self._inverted_bottleneck(self.output, 6, 64, 0)
self.output = self._inverted_bottleneck(self.output, 6, 64, 0)
self.output = self._inverted_bottleneck(self.output, 6, 64, 0)
self.output = self._inverted_bottleneck(self.output, 6, 64, 0)
self.output = self._inverted_bottleneck(self.output, 6, 96, 0)
self.output = self._inverted_bottleneck(self.output, 6, 96, 0)
self.output = self._inverted_bottleneck(self.output, 6, 96, 0)
self.output = self._inverted_bottleneck(self.output, 6, 160, 1)
self.output = self._inverted_bottleneck(self.output, 6, 160, 0)
self.output = self._inverted_bottleneck(self.output, 6, 160, 0)
self.output = self._inverted_bottleneck(self.output, 6, 320, 0)
self.output = v2d(self.output, 1280, 1, normalizer_alizer, normalizer_params=self.bn_params)
self.output = tc.layers.avg_pool2d(self.output, 7)
self.output = v2d(self.output, self.num_classes, 1, activation_fn=None)
self.output = tf.reshape(self.output, shape=[-1, self.num_classes], name="logit")
def _inverted_bottleneck(self, input, up_sample_rate, channels, subsample):
with tf.variable_scope('inverted_bottleneck{}_{}_{}'.format(self.i, up_sample_rate, subsample)):
self.i += 1
stride = 2 if subsample else 1
output = v2d(input, up_sample__shape().as_list()[-1], 1,
activation_lu6,
normalizer_alizer, normalizer_params=self.bn_params)
output = tc.layers.separable_conv2d(output, None, 3, 1, stride=stride,
activation_lu6,
normalizer_alizer, normalizer_params=self.bn_params)
output = v2d(output, channels, 1, activation_fn=None,
normalizer_alizer, normalizer_params=self.bn_params)
_shape().as_list()[-1] == channels:
output = tf.add(input, output)
return output
# small inception
def model4(x, N_CLASSES, is_trian = False):
x = tf.v2d(x, 64, [5, 5], 1, 'SAME', activation_lu)
x = batch_norm(x, decay=0.9, updates_collections=None, is_training=is_trian) # 训练阶段is_trainging设置为true,训练完毕后使⽤模型时设置为false x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
x1_1 = tf.v2d(x, 64, [1, 1], 1, 'SAME', activation_lu) # 1X1 核
x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
x3_3 = tf.v2d(x, 64, [3, 3], 1, 'SAME', activation_lu) # 3x3 核
x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
x5_5 = tf.v2d(x, 64, [5, 5], 1, 'SAME', activation_lu) # 5x5 核
x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
x = tf.concat([x1_1, x3_3, x5_5], axis=-1) # 连接在⼀起,得到64*3=192个通道
x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
x1_1 = tf.v2d(x, 128, [1, 1], 1, 'SAME', activation_lu)
x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
x3_3 = tf.v2d(x, 128, [3, 3], 1, 'SAME', activation_lu)
x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
x5_5 = tf.v2d(x, 128, [5, 5], 1, 'SAME', activation_lu)
x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
x = tf.concat([x1_1, x3_3, x5_5], axis=-1)
x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
shp = x.get_shape()
x = tf.reshape(x, [-1, shp[1]*shp[2]*shp[3]]) # flatten
logits = tf.contrib.layers.fully_connected(x, N_CLASSES, activation_fn=None) # output logist without softmax
return logits
# 2conv + 3fc
def model2(images, batch_size, n_classes):
'''Build the model
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论