【延伸阅读】让⽼照⽚重现光彩(五):Pix2PixHD模型源代
码+中⽂注释
英伟达公司和加州⼤学伯克利分校于2018年发表的“基于有条件GAN的⾼分辨率图像合成及语义操控”项⽬,是本项⽬“让⽼照⽚重现光彩”的技术基础,算是⼀个前置开源项⽬。
“基于有条件GAN的⾼分辨率图像合成及语义操控”项⽬的技术核⼼是Pix2PixHD模型,我们在这⾥分享⼀下相关的源代码+中⽂注释,基于此可以加深对“让⽼照⽚重现光彩”项⽬的理解(尤其是,在⽼照⽚项⽬的模型与训练源代码尚未开源的情况下)。
Pix2PixHD模型使⽤PyTorch构建,代码清晰、整齐,相关的源代码主要是3个⽂件,分别是:./models/models.py、
./models/pix2pixHD_model.py 和  ./models/networks.py
说明如下:
(1)./models/models.py
调⽤ Pix2PixHDModel() 创建模型。
import torch
# 创建模型,并返回模型
def create_model(opt):
del == 'pix2pixHD':  # 选择pix2pixHD model
from .pix2pixHD_model import Pix2PixHDModel, InferenceModel
if opt.isTrain:  # 若是训练,则为True
model = Pix2PixHDModel()
else:  # 否则,若仅仅是前向传播⽤来演⽰,则为False
model = InferenceModel()
else:  # 选择 UIModel model
from .ui_model import UIModel
model = UIModel()
model.initialize(opt)  # 模型初始化参数
if opt.verbose:  # 默认为false,表⽰之前并⽆模型保存
print("model [%s] was created" % (model.name()))  # 打印label2city模型被创建
if opt.isTrain and len(opt.gpu_ids) and not opt.fp16:
model = DataParallel(model, device_ids=opt.gpu_ids)  # 多GPU训练
return model
(2)./models/pix2pixHD_model.py
构建模型的核⼼内容:
定义有条件GAN(Pix2PixHDModel)的⽣成器、鉴别器、编码器(⽤于⽣成实例的低维特征);
定义损失函数(包括:GANloss,VGGloss、特征匹配损失函数);
定义⽣成器和鉴别器的优化器(optimizer);
定义各模块的输⼊;
定义forward函数。
import numpy as np
import torch
import os
from torch.autograd import Variable
from util.image_pool import ImagePool
from .base_model import BaseModel
from . import networks
class Pix2PixHDModel(BaseModel):
def name(self):
return 'Pix2PixHDModel'
# loss滤波器:其中g_gan、d_real、d_fake三个loss值是肯定返回的
# 这⾥的g_gan_feat即论⽂中的“特征匹配损失函数”(论⽂中的等式(4))
# g_vgg为论⽂中的VGG感知损失函数,稍微改善了输出结果
# g_gan_feat、g_vgg两个loss值根据train_options的_ganFeat_loss, _vgg_loss⽽定(默认是需要返回的)    def init_loss_filter(self, use_gan_feat_loss, use_vgg_loss):
flags = (True, use_gan_feat_loss, use_vgg_loss, True, True)
def loss_filter(g_gan, g_gan_feat, g_vgg, d_real, d_fake):
return [l for (l,f) in zip((g_gan,g_gan_feat,g_vgg,d_real,d_fake),flags) if f]
return loss_filter
def initialize(self, opt):
BaseModel.initialize(self, opt)
size_or_crop != 'none' or not opt.isTrain: # when training at full res this causes OOM
torch.backends.cudnn.benchmark = True
self.isTrain = opt.isTrain
self.use_features = opt.instance_feat or opt.label_feat
<_features = self.use_features and not self.opt.load_features
input_nc = opt.label_nc if opt.label_nc != 0 else opt.input_nc
> define networks
# Generator network
# ⽣成器⽹络
netG_input_nc = input_nc
if _instance:
netG_input_nc += 1  # 添加instance通道(区分不同实例)
if self.use_features:
netG_input_nc += opt.feat_num  # 添加feature_map通道(使⽤encoder)
selfG = networks.define_G(netG_input_nc, opt.output_nc, f, optG,
opt.n_downsample_global, opt.n_blocks_global, opt.n_local_enhancers,
opt.n_blocks_local, , gpu_ids=self.gpu_ids)
# Discriminator network
# 鉴别器⽹络
if self.isTrain:
use_sigmoid = _lsgan
netD_input_nc = input_nc + opt.output_nc  # real_images + fake_images
if _instance:
netD_input_nc += 1  # 添加instance通道(区分不同实例)
selfD = networks.define_D(netD_input_nc, opt.ndf, opt.n_layers_D, , use_sigmoid,
opt.num_D, _ganFeat_loss, gpu_ids=self.gpu_ids)
### Encoder network
# 编码器⽹络(是define_G()中的⼀个⼦函数)
_features:
selfE = networks.define_G(opt.output_nc, opt.feat_num, f, 'encoder',
opt.n_downsample_E, , gpu_ids=self.gpu_ids)
if self.opt.verbose:
print('---------- Networks initialized -------------')
# load networks
# 加载⽹络(模型)
if not self.isTrain inue_train or opt.load_pretrain:
pretrained_path = '' if not self.isTrain else opt.load_pretrain
self.load_network(selfG, 'G', opt.which_epoch, pretrained_path)
if self.isTrain:
self.load_network(selfD, 'D', opt.which_epoch, pretrained_path)
_features:
self.load_network(selfE, 'E', opt.which_epoch, pretrained_path)
# set loss functions and optimizers
if self.isTrain:
if self.isTrain:
if opt.pool_size > 0 and (len(self.gpu_ids)) > 1:
raise NotImplementedError("Fake Pool Not Implemented for MultiGPU")
self.fake_pool = ImagePool(opt.pool_size)  # 初始化fake_pool:num_imgs = 0,images = []
self.old_lr = opt.lr
# define loss functions
# 定义损失函数,在.forward()中使⽤
# 默认使⽤ganfeat_loss和vgg_loss
self.loss_filter = self.init_loss_filter(_ganFeat_loss, _vgg_loss)
if _vgg_loss:
# Names so we can breakout loss
# 给损失函数命名
self.loss_names = self.loss_filter('G_GAN','G_GAN_Feat','G_VGG','D_real', 'D_fake')
# initialize optimizers
# 初始化优化器
# optimizer G(含:encoder)
if opt.niter_fix_global > 0:
import sys
if sys.version_info >= (3,0):
finetune_list = set()
else:
from sets import Set
finetune_list = Set()
params_dict = dict(selfG.named_parameters())
params = []
for key, value in params_dict.items():
if key.startswith('model' + str(opt.n_local_enhancers)):
params += [value]
finetune_list.add(key.split('.')[0])
print('------------- Only training the local enhancer network (for %d epochs) ------------' % opt.niter_fix_global)
print('The layers that are finetuned are ', sorted(finetune_list))
else:
params = list(selfG.parameters())
_features:
params += list(selfE.parameters())
self.optimizer_G = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.999))
# optimizer D
params = list(selfD.parameters())
self.optimizer_D = torch.optim.Adam(params, lr=opt.lr, betas=(opt.beta1, 0.999))
# feat=feature(特征),inst=instance(实例)
# label_map(标签图)每个像素值代表像素的对象类,inst_map(实例图)每个像素包含每个单独对象的唯⼀对象ID
# 获取实例图的边界(边缘),将edge_map与label_map的one-hot向量拼接在⼀起,封装为Variable,赋值给input_label    # real_image和feat_map,封装为Variable,赋值给real_image和feat_map;label_map赋值给inst_map
def encode_input(self, label_map, inst_map=None, real_image=None, feat_map=None, infer=False):
# label_map 数据类型转化
if self.opt.label_nc == 0:
input_label = label_map.data.cuda()
else:
# create one-hot vector for label map
size = label_map.size()
oneHot_size = (size[0], self.opt.label_nc, size[2], size[3])
input_label = torch.cuda.FloatTensor(torch.Size(oneHot_size)).zero_()
input_label = input_label.scatter_(1, label_map.data.long().cuda(), 1.0)  # 将列表转成one-hot编码的形式
if self.opt.data_type == 16:
input_label = input_label.half()
# get edges from instance map
# 获取实例图的边界(边缘),将edge_map与input_label拼接在⼀起
if not _instance:
inst_map = inst_map.data.cuda()
edge_map = _edges(inst_map)
input_label = torch.cat((input_label, edge_map), dim=1)
input_label = Variable(input_label, volatile=infer)
# real images for training
if real_image is not None:
real_image = Variable(real_image.data.cuda())
# instance map for feature encoding
if self.use_features:
# get precomputed feature maps
if self.opt.load_features:
feat_map = Variable(feat_map.data.cuda())
if self.opt.label_feat:
inst_map = label_map.cuda()
return input_label, inst_map, real_image, feat_map
# 定义判别器
def discriminate(self, input_label, test_image, use_pool=False):
input_concat = torch.cat((input_label, test_image.detach()), dim=1)
if use_pool:
fake_query = self.fake_pool.query(input_concat)  # 读取fake images
return selfD.forward(fake_query)
else:
return selfD.forward(input_concat)
# 前向传播,使⽤输⼊数据运⾏模型
# PyTorch 允许在前向传播过程中进⾏动态操作(如:跳跃连接等)
def forward(self, label, inst, image, feat, infer=False):
# Encode Inputs
# 获取实例图的边界(边缘),将edge_map与label_map的one-hot向量拼接在⼀起,封装为Variable,赋值给input_label        input_label, inst_map, real_image, feat_map = de_input(label, inst, image, feat)
# Fake Generation
# 调⽤⽣成器⽣成fake images
if self.use_features:
# 调⽤netE(即:encoder)对输⼊图⽚进⾏encoder-decoder运算,提取feature_map
if not self.opt.load_features:
feat_map = selfE.forward(real_image, inst_map)
input_concat = torch.cat((input_label, feat_map), dim=1)  # 将input_label与特征图拼接在⼀起,作为
⽣成器netG的输⼊        else:
input_concat = input_label
fake_image = selfG.forward(input_concat)
# Fake Detection and Loss
# 输⼊为input_label和fake_image,鉴别器⽣成fake images pool(假图⽚池)的预测(prediction)、D_fake损失函数
pred_fake_pool = self.discriminate(input_label, fake_image, use_pool=True)
loss_D_fake = iterionGAN(pred_fake_pool, False)
# Real Detection and Loss
# 输⼊为input_label和real_image,鉴别器⽣成real images的预测(prediction)、D_real损失函数
pred_real = self.discriminate(input_label, real_image)
电影源代码人物介绍loss_D_real = iterionGAN(pred_real, True)
# GAN loss (Fake Passability Loss)
# 将输⼊标签与假图⽚拼接后作为输⼊,鉴别器⽣成假图⽚预测(prediction)、G_GAN损失函数
pred_fake = selfD.forward(torch.cat((input_label, fake_image), dim=1))
loss_G_GAN = iterionGAN(pred_fake, True)
# GAN feature matching loss
# GAN feature matching loss
# 计算GAN的特征匹配损失函数,每⼀个尺度的鉴别器(num_D)、鉴别器的每层特征提取器(pred_fake)分别加权计算并求和
loss_G_GAN_Feat = 0
if not _ganFeat_loss:
feat_weights = 4.0 / (self.opt.n_layers_D + 1)  # 4.0/(鉴别器的层数+1)
D_weights = 1.0 / self.opt.num_D  # 1.0/(多尺度的个数,论⽂中是3)
for i in range(self.opt.num_D):
for j in range(len(pred_fake[i])-1):
# 计算:L1Loss(),lambda_feat为(输⼊的)调节系数
loss_G_GAN_Feat += D_weights * feat_weights * \
# VGG feature matching loss
# VGG特征匹配损失函数
loss_G_VGG = 0
if not _vgg_loss:
# 计算fake_image和real_image之间的VGGLoss,lambda_feat为输⼊的调节系数
# real_image不进⾏梯度计算
loss_G_VGG = iterionVGG(fake_image, real_image) * self.opt.lambda_feat
# Only return the fake_B image if necessary to save BW
return [ self.loss_filter( loss_G_GAN, loss_G_GAN_Feat, loss_G_VGG, loss_D_real, loss_D_fake ), None if not infer else fake_image ]
# 推理
# 将标签、实例边界、特征图作为输⼊,⽣成假图⽚
def inference(self, label, inst, image=None):
# Encode Inputs
image = Variable(image) if image is not None else None
# 将实例边界与label的one-hot向量拼接在⼀起,返回给input_label
input_label, inst_map, real_image, _ = de_input(Variable(label), Variable(inst), image, infer=True)
# Fake Generation
if self.use_features:
if self.opt.use_encoded_image:
# encode the real image to get feature map
# ⽤encoder计算真实图像的特征图
feat_map = selfE.forward(real_image, inst_map)
else:
# sample clusters from precomputed features
# 随机选取实例图中的某个特征作为编码特征,⽤于训练
feat_map = self.sample_features(inst_map)
input_concat = torch.cat((input_label, feat_map), dim=1)  # 把feat_map和input_label拼接在⼀起,作为⽣成器的输⼊
else:
input_concat = input_label
if torch.__version__.startswith('0.4'):
_grad():
fake_image = selfG.forward(input_concat)  # 调⽤generator⽣成假图⽚
else:
fake_image = selfG.forward(input_concat)
return fake_image
def sample_features(self, inst):
# read precomputed feature clusters
cluster_path = os.path.join(self.opt.checkpoints_dir, self.opt.name, self.opt.cluster_path)
features_clustered = np.load(cluster_path, encoding='latin1').item()
# randomly sample from the feature clusters
# 从特征簇中随机采样
inst_np = inst.cpu().numpy().astype(int)
feat_map = self.Tensor(inst.size()[0], self.opt.feat_num, inst.size()[2], inst.size()[3])  # feat_map.size
for i in np.unique(inst_np):  # 对于⼀维数组或者列表,unique()去除其中重复的元素,并按元素由⼤到⼩返回⼀个新的⽆元素重复的元组或者列表                                      # 确定具有唯⼀性的特征代码,并将特征代码排序
label = i if i < 1000 else i//1000
if label in features_clustered:
feat = features_clustered[label]  # 从特征簇中取出当前特征代码对应的特征向量

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。

发表评论