基于VGG模型图像风格转换功能实现资源-CSDN文库

共1个文件

py：1个

深度学习

自然语音处理

需积分: 5 92 浏览量 2024-03-19 21:52:36 上传评论收藏 3KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于VGG模型图像风格转换功能实现.zip （1个子文件）

基于VGG模型图像风格转换功能实现.py 5KB

import matplotlib.pyplot as plt import tensorflow as tf import numpy as np from PIL import Image # 设置最长的一条边的长度 max_dim = 800 # 内容图片路径 content_path = 'content image.jpg' # 风格图片路径 style_path = 'style image.jpg' # 风格权重 style_weight=10 # 内容权重 content_weight=1 # 全变差正则权重 total_variation_weight=1e5 # 训练次数 stpes = 301 # 是否保存训练过程中产生的图片 save_img = True # 载入图片 def load_img(path_to_img): # 读取文件内容 img = tf.io.read_file(path_to_img) # 变成 3 通道图片数据 img = tf.image.decode_image(img, channels=3, dtype=tf.float32) # 获得图片高度和宽度，并转成 float 类型 shape = tf.cast(tf.shape(img)[:-1], tf.float32) # 最长的边的长度 long_dim = max(shape) scale = max_dim / long_dim # 800 new_shape = tf.cast(shape * scale, tf.int32) img = tf.image.resize(img, new_shape) # 增加 1 个维度，变成 4 维数据 # [:,w,h,chn] img = img[tf.newaxis, :] return img # 用于显示图片 def imshow(image, title=None): if len(image.shape) > 3: # 去掉 size 为 1 的维度如(1,300,300,3)->(300,300,3) image = tf.squeeze(image) plt.imshow(image) if title: plt.title(title) plt.axis('off') plt.show() # 载入内容图片 content_image = load_img(content_path) # 载入风格图片 style_image = load_img(style_path) # 显示内容图片 imshow(content_image, 'Content Image') # 显示风格图片 imshow(style_image, 'Style Image') # 用于计算 content loss # 这里只取了一层的输出进行对比，取多层输出效果变化不大 content_layers = ['block5_conv2'] # 用于计算风格的卷积层 style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'] # 计算层数 num_content_layers = len(content_layers) num_style_layers = len(style_layers) # 创建一个新模型，输入与 vgg16 一样，输出为指定层的输出 def vgg_layers(layer_names): # 载入 VGG16 的卷积层部分 vgg = tf.keras.applications.VGG16(include_top=False, weights='imagenet') vgg.trainable = False # 获取指定层的输出值 # vgg.get_layer(name).output outputs = [vgg.get_layer(name).output for name in layer_names] # 定义一个新的模型，输入与 vgg16 一样，输出为指定层的输出 model = tf.keras.Model([vgg.input], outputs) # 返回模型 return model # 获得输出风格层特征的模型 style_extractor = vgg_layers(style_layers) # 图像预处理，主要是减去颜色均值，RGB 转 BGR preprocessed_input = tf.keras.applications.vgg16.preprocess_input(style_image*255) # 风格图片传入 style_extractor，提取风格层的输出 style_outputs = style_extractor(preprocessed_input) # Gram 矩阵的计算 def gram_matrix(input_tensor): # bijc 表示 input_tensor 中的 4 个维度 # bijd 表示 input_tensor 中的 4 个维度 # input_tensor 的 shape 为(1,300,200,32)，那么 b=1,i=300,j=200,c=32,d=32 # ->bcd 表示计算后得到的数据维度为(1,32,32), 得到的结果表示特征图与特征图之间的相关性 result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor) # 特征图的 shape input_shape = tf.shape(input_tensor) # 特征图的高度乘以宽度得到特征值数量 num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32) # 除以特征值的数量 return result / (num_locations) # 构建一个返回风格特征和内容特征的模型 class StyleContentModel(tf.keras.models.Model): def __init__(self, style_layers, content_layers): super(StyleContentModel, self).__init__() # 获得输出风格层和内容层特征的模型 self.vgg = vgg_layers(style_layers + content_layers) # 用于计算风格的卷积层 self.style_layers = style_layers # 用于计算 content loss 的卷积层 self.content_layers = content_layers # 风格层的数量 self.num_style_layers = len(style_layers) def call(self, inputs): # 图像预处理，主要是减去颜色均值，RGB 转 BGR preprocessed_input = tf.keras.applications.vgg16.preprocess_input(inputs * 255.0) # 图片传入模型，提取风格层和内容层的输出 outputs = self.vgg(preprocessed_input) # 获得风格特征输出和内容特征输出 # style_layers/content_layers /* 开发不易，整理也不易，如需要详细的说明文档和程序，以及完整的数据集，训练好的模型，或者进一步开发，可加作者新联系方式咨询，WX：Q3101759565，QQ：3101759565 */ y_deltas = image[:, 1:, :, :] - image[:, :-1, :, :] return tf.reduce_mean(x_deltas ** 2) + tf.reduce_mean(y_deltas ** 2) # content_image/style_image图像 # a = total_variation_loss(content_image) # 我们可以用@tf.function 装饰器来将 python 代码转成 tensorflow 的图表示代码，用于加速代码运行速度 @tf.function() def train_step(image): with tf.GradientTape() as tape: outputs = extractor(image) loss = style_content_loss(outputs) loss += total_variation_weight * total_variation_loss(image) # 传入 loss 和模型参数，计算权值调整 # image作为模型参数也是生成图像 grad = tape.gradient(loss, image) # 进行权值调整，这里要调整的权值就是 image 图像的像素值 opt.apply_gradients([(grad, image)]) image.assign(clip_0_1(image)) for n in range(stpes): train_step(image) if n % 5 == 0: # image是tensor类型 imshow(image.read_value(), "Train step: {}".format(n)) # 保存图片 # if save_img == True: # s_image = tf.squeeze(image) # s_image = Image.fromarray(np.uint8(s_image.numpy() * 255)) # s_image.save('temp/' + 'steps_' + str(n) + '.jpg')

评论收藏

内容反馈