全卷积神经网络FCN-TensorFlow代码精析--688IT编程网

全卷积神经⽹络FCN-TensorFlow代码精析这⾥解析所有代码并加⼊详细注释

注意事项：

debug标志可以在训练期间设置，以添加关于激活函数，梯度，变量等的信息。

FCN.py

# coding=utf-8

from __future__ import print_function

import tensorflow as tf

import numpy as np

import TensorflowUtils as utils

import read_MITSceneParsingData as scene_parsing

import datetime

import BatchDatsetReader as dataset

ves import xrange

# 参数设置

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_integer("batch_size", "2", "batch size for training")

tf.flags.DEFINE_string("logs_dir", "logs/", "path to logs directory")

tf.flags.DEFINE_string("data_dir", "Data_zoo/MIT_SceneParsing/", "path to dataset")

tf.flags.DEFINE_float("learning_rate", "1e-6", "Learning rate for Adam Optimizer")

tf.flags.DEFINE_string("model_dir", "Model_zoo/", "Path to vgg model mat")

tf.flags.DEFINE_bool('debug', "True", "Debug mode: True/ False")

tf.flags.DEFINE_string('mode', "train", "Mode train/ test/ visualize")

MODEL_URL = '/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat'

MAX_ITERATION = 20000 # 迭代次数

NUM_OF_CLASSESS = 151 # 类别数 151

IMAGE_SIZE = 224 # 图⽚⼤⼩ 224

fine_tuning = False

# VGG⽹络部分，weights是权重集合， image是预测图像的向量

def vgg_net(weights, image):

# VGG⽹络前五⼤部分

layers = (

'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',

'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',

'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',

'relu5_3', 'conv5_4', 'relu5_4'

)

net = {}

current = image # 预测图像

for i, name in enumerate(layers):

kind = name[:4]

if kind == 'conv':

kernels, bias = weights[i][0][0][0][0]

# matconvnet: weights are [width, height, in_channels, out_channels]

# tensorflow: weights are [height, width, in_channels, out_channels]

kernels = _anspose(kernels, (1, 0, 2, 3)), name=name + "_w") # conv1_1_w bias = _shape(-1), name=name + "_b") # conv1_1_b

current = v2d_basic(current, kernels, bias) # 前向传播结果 current

elif kind == 'relu':

current = lu(current, name=name) # relu1_1

if FLAGS.debug: # 是否开启debug模式 true / false

utils.add_activation_summary(current) # 画图

elif kind == 'pool':

# vgg 的前5层的stride都是2，也就是前5层的size依次减⼩1倍

# 这⾥处理了前4层的stride，⽤的是平均池化

# 第5层的pool在下⽂的外部处理了，⽤的是最⼤池化

# pool1 size缩⼩2倍

# pool2 size缩⼩4倍

# pool3 size缩⼩8倍

# pool4 size缩⼩16倍

current = utils.avg_pool_2x2(current)

net[name] = current # 每层前向传播结果放在net中，是⼀个字典

return net

# 预测流程，image是输⼊图像，keep_prob dropout⽐例

def inference(image, keep_prob):

"""

Semantic segmentation network definition # 语义分割⽹络定义

:param image: input image. Should have values in range 0-255

:param keep_prob:

:return:

"""

# 获取预训练⽹络VGG

print("setting up vgg initialized conv layers ...")

# model_dir Model_zoo/

# MODEL_URL 下载VGG19⽹址

model_data = _model_del_dir, MODEL_URL) # 返回VGG19模型中内容

mean = model_data['normalization'][0][0][0] # 获得图像均值

mean_pixel = np.mean(mean, axis=(0, 1)) # RGB

weights = np.squeeze(model_data['layers']) # 压缩VGG⽹络中参数，把维度是1的维度去掉剩下的就是权重 processed_image = utils.process_image(image, mean_pixel) # 图像减均值

with tf.variable_scope("inference"): # 命名作⽤域是inference

image_net = vgg_net(weights, processed_image) # 传⼊权重参数和预测图像，获得所有层输出结果

conv_final_layer = image_net["conv5_3"] # 获得输出结果

pool5 = utils.max_pool_2x2(conv_final_layer) # /32 缩⼩32倍

W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") # 初始化第6层的w b

b6 = utils.bias_variable([4096], name="b6")

conv6 = v2d_basic(pool5, W6, b6)

relu6 = lu(conv6, name="relu6")

if FLAGS.debug:

utils.add_activation_summary(relu6)

relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)

W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") # 第7层卷积层

b7 = utils.bias_variable([4096], name="b7")

conv7 = v2d_basic(relu_dropout6, W7, b7)

relu7 = lu(conv7, name="relu7")

if FLAGS.debug:

utils.add_activation_summary(relu7)

relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)

W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8")

b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8")

conv8 = v2d_basic(relu_dropout7, W8, b8) # 第8层卷积层分类151类

# annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1")

# now to upscale to actual image size

deconv_shape1 = image_net["pool4"].get_shape() # 将pool4 1/16结果尺⼨拿出来做融合 [b,h,w,c]

# 定义反卷积层的 W，B [H, W, OUTC, INC] 输出个数为pool4层通道个数，输⼊为conv8通道个数

# 扩⼤两倍所以stride = 2 kernel_size = 4

W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1")

b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")

# 输⼊为conv8特征图，使得其特征图⼤⼩扩⼤两倍，并且特征图个数变为pool4的通道数

conv_t1 = v2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"]))

conv_t1 = v2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") # 进⾏融合逐像素相加

# 获得pool3尺⼨是原图⼤⼩的1/8

deconv_shape2 = image_net["pool3"].get_shape()

# 输出通道数为pool3通道数，输⼊通道数为pool4通道数

W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2")

b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2")

# 将上⼀层融合结果fuse_1在扩⼤两倍，输出尺⼨和pool3相同

conv_t2 = v2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) # 融合操作deconv(fuse_1) + pool3

fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2")

shape = tf.shape(image) # 获得原始图像⼤⼩

# 堆叠列表，反卷积输出尺⼨，[b，原图H，原图W，类别个数]

deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS])

# 建⽴反卷积w[8倍扩⼤需要ks=16, 输出通道数为类别个数，输⼊通道数pool3通道数]

W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3

b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3")

# 反卷积，fuse_2反卷积，输出尺⼨为 [b，原图H，原图W，类别个数]

conv_t3 = v2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)

# ⽬前conv_t3的形式为size为和原始图像相同的size，通道数与分类数相同

# 这句我的理解是对于每个像素位置，根据第3维度（通道数）通过argmax能计算出这个像素点属于哪个分类 # 也就是对于每个像素⽽⾔，NUM_OF_CLASSESS个通道中哪个数值最⼤，这个像素就属于哪个分类

# 每个像素点有21个值，哪个值最⼤就属于那⼀类

# 返回⼀张图，每⼀个点对于其来别信息shape=[b,h,w]

annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction")

# 从第三维度扩展形成[b,h,w,c] 其中c=1, conv_t3最后具有21深度的特征图

pand_dims(annotation_pred, dim=3), conv_t3

def train(loss_val, var_list):

"""

:param loss_val: 损失函数

:param var_list: 需要优化的值

:return:

"""

optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)

grads = optimizerpute_gradients(loss_val, var_list=var_list)

if FLAGS.debug:

# print(len(var_list))

for grad, var in grads:

utils.add_gradient_summary(grad, var)

return optimizer.apply_gradients(grads) # 返回迭代梯度

def main(argv=None):

# dropout保留率

keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")

# 图像占坑

image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image")

# 标签占坑

annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")

# 预测⼀个batch图像获得预测图[b,h,w,c=1] 结果特征图[b,h,w,c=151]

pred_annotation, logits = inference(image, keep_probability)

tf.summary.image("input_image", image, max_outputs=2)

tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)

import pickletf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2)

# 空间交叉熵损失函数[b,h,w,c=151] 和labels[b,h,w] 每⼀张图分别对⽐

loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,

labels=tf.squeeze(annotation, squeeze_dims=[3]),

name="entropy")))

tf.summary.scalar("entropy", loss)

# 返回需要训练的变量列表

trainable_var = tf.trainable_variables()

if FLAGS.debug:

for var in trainable_var:

utils.add_to_regularization_and_summary(var)

# 传⼊损失函数和需要训练的变量列表

train_op = train(loss, trainable_var)

print("Setting up ")

# ⽣成绘图数据

summary_op = _all()

print("Setting up ")

# data_dir = Data_zoo/MIT_SceneParsing/

# training: [{image: 图⽚全路径， annotation：标签全路径， filename：图⽚名字}] [{}][{}]

train_records, valid_records = ad_dataset(FLAGS.data_dir)

print(len(train_records)) # 长度

print(len(valid_records))

print("Setting up dataset reader")

image_options = {'resize': True, 'resize_size': IMAGE_SIZE}

de == 'train':

# 读取图⽚产⽣类对象其中包含所有图⽚信息

train_dataset_reader = dataset.BatchDatset(train_records, image_options)

validation_dataset_reader = dataset.BatchDatset(valid_records, image_options)

sess = tf.Session()

print("Setting ")

saver = tf.train.Saver()

summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, aph)

sess.run(tf.global_variables_initializer())

# logs/

if fine_tuning:

ckpt = _checkpoint_state(FLAGS.logs_dir) # 训练断点回复

if ckpt del_checkpoint_path: # 如果存在checkpoint⽂件则恢复sess

print("")

de == "train":

for itr in range(MAX_ITERATION):

# 读取下⼀batch

train_images, train_annotations = train__batch(FLAGS.batch_size)

feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85}

# 迭代优化需要训练的变量

sess.run(train_op, feed_dict=feed_dict)

if itr % 10 == 0:

# 迭代10次打印显⽰

train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict)

print("Step: %d, Train_loss:%g" % (itr, train_loss))

summary_writer.add_summary(summary_str, itr)

if itr % 500 == 0:

# 迭代500 次验证

valid_images, valid_annotations = validation__batch(FLAGS.batch_size) valid_loss = sess.run(loss, feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0})

print("%s ---> Validation_loss: %g" % (w(), valid_loss))

# 保存模型

saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)

688IT编程网

全卷积神经网络FCN-TensorFlow代码精析

发表评论

推荐文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

热门文章

随机森林算法的改进方法

基于随机森林算法的风险预警模型研究

Python中的随机森林算法详解

随机森林发展历史

如何使用随机森林进行时间序列数据模式识别(八)

随机森林回归模型原理

如何使用随机森林进行时间序列数据模式识别(六)

如何使用随机森林进行时间序列数据预测(四)

如何使用随机森林进行异常检测(六)

随机森林算法和grandientboosting算法 -回复

随机森林方法总结全面

随机森林算法原理和步骤

随机森林的原理

随机森林重要性

随机森林算法

机器学习中随机森林的原理

随机森林算法原理

使用计算机视觉技术进行动物识别的技巧

基于crf命名实体识别实验总结

transformer预测模型训练方法

最新文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

随机森林结合直接正交信号校正的模型传递方法

标签列表

688IT编程网

全卷积神经网络FCN-TensorFlow代码精析

发表评论

推荐文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

热门文章

随机森林算法的改进方法

基于随机森林算法的风险预警模型研究

Python中的随机森林算法详解

随机森林发展历史

如何使用随机森林进行时间序列数据模式识别(八)

随机森林回归模型原理

如何使用随机森林进行时间序列数据模式识别(六)

如何使用随机森林进行时间序列数据预测(四)

如何使用随机森林进行异常检测(六)

随机森林算法和grandientboosting算法 -回复

随机森林方法总结全面

随机森林算法原理和步骤

随机森林的原理

随机森林 重要性

随机森林算法

机器学习中随机森林的原理

随机森林算法原理

使用计算机视觉技术进行动物识别的技巧

基于crf命名实体识别实验总结

transformer预测模型训练方法

最新文章

随机森林算法介绍及R语言实现

基于随机森林优化的神经网络算法在冬小麦产量预测中的应用研究_百度文 ...

基于正则化贪心森林算法的情感分析方法研究

随机森林算法和grandientboosting算法

基于随机森林的图像分类算法研究

随机森林结合直接正交信号校正的模型传递方法

标签列表

随机森林重要性