【学习笔记】⾃⼰动⼿制作数据集,以及第⼀个卷积神经⽹络
(CNN)
,这份数据集是猫的图⽚和狗的图⽚,是⾕歌摘取了⼀部分Kaggle的猫狗分类图⽚集。这次我们并没有给出制作好的数据集,⽽是只有放在两个不同⽂件夹内的猫和狗的图⽚。
我们第⼀步先⽤PIL把图⽚格式统⼀⼀下,我们这⾥将像素改为(150*150)。
import os
from PIL import Image
base_dir = './dataset/cats_and_dogs_filtered'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
train_cats_dir = os.path.join(train_dir, 'cats/')
train_dogs_dir = os.path.join(train_dir, 'dogs/')
validation_cats_dir = os.path.join(validation_dir, 'cats/')
validation_dogs_dir = os.path.join(validation_dir, 'dogs/')
train_cat_fnames = os.listdir(train_cats_dir)
train_dog_fnames = os.listdir(train_dogs_dir)
validation_cat_fnames = os.listdir(validation_cats_dir)
validation_dog_fnames = os.listdir(validation_dogs_dir)
os.mkdir(train_dir + '/resize_cats')
os.mkdir(train_dir + '/resize_dogs')
os.mkdir(validation_dir + '/resize_cats')
os.mkdir(validation_dir + '/resize_dogs')
for i in train_cat_fnames:
original_img = Image.open(train_cats_dir+i)
clipping_img = size((150, 150), Image.ANTIALIAS)
clipping_img.save(train_dir+'/resize_cats/'+i)
for i in train_dog_fnames:
original_img = Image.open(train_dogs_dir + i)
clipping_img = size((150, 150), Image.ANTIALIAS)
clipping_img.save(train_dir + '/resize_dogs/' + i)
for i in validation_cat_fnames:
original_img = Image.open(validation_cats_dir + i)
clipping_img = size((150, 150), Image.ANTIALIAS)
clipping_img.save(validation_dir + '/resize_cats/' + i)
for i in validation_dog_fnames:
original_img = Image.open(validation_dogs_dir + i)
clipping_img = size((150, 150), Image.ANTIALIAS)
clipping_img.save(validation_dir + '/resize_dogs/' + i)
下⼀步我们将猫狗图⽚制作成tfrecords格式的⽂件。
import os
import tensorflow as tf
from PIL import Image
cwd = './dataset/cats_and_dogs_filtered/train/'
classes = ('resize_cats', 'resize_dogs')
writer = tf.python_io.TFRecordWriter('cats_and_dogs_train_onehot.tfrecords')
for index, name in enumerate(classes):
class_path = cwd + name + '/'
for img_name in os.listdir(class_path):
img_path = class_path + img_name
img = Image.open(img_path)
img_raw = bytes()
example = tf.train.Example(ain.Features(feature={
'label': tf.train.Feature(int64_ain.Int64List(value=[1, 0]if index==0 else[0, 1])),
'img_raw': tf.train.Feature(bytes_ain.BytesList(value=[img_raw]))
}))
writer.write(example.SerializeToString())
writer.close()
这⾥猫狗是个2分类问题,我们可以选择独热码作为标签,也可以选择⽤[0]或者[1]来代替猫和狗。
如果⽤[0,1] [1,0]这种⽅式来作为猫和狗标签的话,我们可以⽤softmax作为输出层的activation function这时我们需要⽤交叉熵来作为loss函数,这种好处是我们可以很轻易的计算准确率。
如果⽤[0] [1]来作为猫和狗标签的话,我们可以⽤sigmoid 作为输出层的 activation function 这时我们需要⽤logloss来作为loss函数,这种好处是我们可以很轻易的计算出AUC以及设置lambda阈值(分类阈值)。
我这⾥选择独热码的⽅式来作为标签,原因是我懒(设置阈值看AUC什么的太⿇烦了)。
如果你不想⽤独热码⽅式,需要把上⾯的代码这部分:
'label': tf.train.Feature(int64_ain.Int64List(value=[1, 0]if index==0 else[0, 1]))
# 改为
'label': tf.train.Feature(int64_ain.Int64List(value=[index]))
这样猫就是0狗就是1了。
对于验证集也是这么制作cwd⾥的train改成validation就好了。
关于tfrecords的读取我们⽤之前官⽹给出的代码就好了,这段代码还蛮实⽤的,就算记不住也希望可以放在⼀个⾃⼰能到的位置,⽅便随时调⽤。
⽬标:
我们这⾥先搭建⼀个简单的⽹络,只有1个卷积层,1个全连接层,1个分类输出层(softmax)。同样我们也需要保存训练结果,⽅便移植到其他⽹络。
可选练习(这⾥我不弄了,感兴趣不妨把这些内容加上):
动态lr:需要 设置global_step(不可训练变量), 需要⽤到
tensorboard可视化:需要 ⽤到
tf.summary.scalar()
tf.summary.histogram()
_all()
tf.summary.FileWriter()
等
神经⽹络中对图⽚的处理可视化:需要matplotlib
不同的train⽅法:(SGD,Adam等)
这⾥是训练⽤的神经⽹络。
import tensorflow as tf
def _parse_function(record):
"""Extracts features and labels.
Args:
record: File path to a TFRecord file
Returns:
A `tuple` `(labels, features)`:
features: A dict of tensors representing the features
labels: A tensor with the corresponding labels.
"""
features = {
"label": tf.FixedLenFeature([2], tf.int64), # terms are strings of varying lengths "img_raw": tf.FixedLenFeature([], tf.string) # labels are 0 or 1
}
parsed_features = tf.parse_single_example(record, features)
img_raw = parsed_features['img_raw']
img_raw = tf.decode_raw(img_raw, tf.uint8)
img_raw = tf.reshape(img_raw, [150, 150, 3])
labels = parsed_features['label']
return img_raw, labels
def my_input_fn(input_filenames, num_epochs=None, shuffle=True):
# Same code as above; create a dataset and map features and labels.
ds = tf.data.TFRecordDataset(input_filenames)
ds = ds.map(_parse_function)
if shuffle:
ds = ds.shuffle(10000)
# Our feature data is variable-length, so we pad and batch
# each field of the dataset structure to whatever size is necessary.
ds = ds.padded_batch(25, ds.output_shapes)
ds = ds.repeat(num_epochs)
# Return the next batch of data.
features, labels = ds.make_one_shot_iterator().get_next()
return features, labels
def weight_variable(shape):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=.1)
return tf.Variable(initial)
def biases_variable(shape):
initial = tf.constant(.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, w):
v2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
ax_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def _loss(ys, pred):
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(tf.clip_by_value(pred, 1e-10, 1.0)), reduction_indices=[1])) return cross_entropy
def train_step(learning_rate, loss):
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
return optimizer
def accuracy(pred, ys):
variable used in lambda_bool = tf.equal(tf.argmax(pred, 1), tf.argmax(ys, 1))
acc = tf.reduce_mean(tf.cast(_bool, tf.float32))
return acc
train_path = my_input_fn('cats_and_dogs_train_onehot.tfrecords')
xs = train_path[0]
xs = tf.cast(xs, tf.float32)
x_input = xs/255
ys = train_path[1]
y_input = tf.cast(ys, tf.float32)
w_conv1 = weight_variable([3, 3, 3, 6])
b_conv1 = biases_variable([6])
h_conv1 = lu(conv2d(x_input, w_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_pool1_flat = tf.reshape(h_pool1, [-1, 75*75*6])
w_fc1 = weight_variable([75*75*6, 10])
b_fc1 = biases_variable([10])
h_fc1 = lu(tf.matmul(h_pool1_flat, w_fc1) + b_fc1)
w_fc2 = weight_variable([10, 2])
b_fc2 = biases_variable([2])
pred = tf.nn.softmax(tf.matmul(h_fc1, w_fc2) + b_fc2)
start_learning_rate = .005
loss = _loss(y_input, pred)
train = train_step(start_learning_rate, loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
saver = tf.train.Saver()
for i in range(1000):
sess.run(train)
sess.run(train)
save_path = saver.save(sess, 'my_net/simple_cnn1.ckpt')
if i % 50 == 0:
acc = accuracy(pred, y_input)
print('accuracy:', sess.run(acc))
整个⽹络训练好后,我们来新建⼀个⽹络来看看效果如何。
import tensorflow as tf
import numpy as np
def _parse_function(record):
"""Extracts features and labels.
Args:
record: File path to a TFRecord file
Returns:
A `tuple` `(labels, features)`:
features: A dict of tensors representing the features
labels: A tensor with the corresponding labels.
"""
features = {
"label": tf.FixedLenFeature([2], tf.int64), # terms are strings of varying lengths "img_raw": tf.FixedLenFeature([], tf.string) # labels are 0 or 1
}
parsed_features = tf.parse_single_example(record, features)
img_raw = parsed_features['img_raw']
img_raw = tf.decode_raw(img_raw, tf.uint8)
img_raw = tf.reshape(img_raw, [150, 150, 3])
labels = parsed_features['label']
return img_raw, labels
def my_input_fn(input_filenames, num_epochs=None, shuffle=False):
# Same code as above; create a dataset and map features and labels.
ds = tf.data.TFRecordDataset(input_filenames)
ds = ds.map(_parse_function)
if shuffle:
ds = ds.shuffle(10000)
# Our feature data is variable-length, so we pad and batch
# each field of the dataset structure to whatever size is necessary.
ds = ds.padded_batch(25, ds.output_shapes)
ds = ds.repeat(num_epochs)
# Return the next batch of data.
features, labels = ds.make_one_shot_iterator().get_next()
return features, labels
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=.1)
return tf.Variable(initial)
def biases_variable(shape):
initial = tf.constant(.1, shape=shape)
return tf.Variable(initial)
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论