mssim_vae.zip资源-CSDN文库

共2个文件

py：2个

版权申诉

Pytorch

115 浏览量 2023-08-26 10:15:45 上传评论收藏 3KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

mssim_vae.zip （2个子文件）

mssim_vae.py 9KB

types_.py 133B

import torch from models import BaseVAE from torch import nn from torch.nn import functional as F from .types_ import * from math import exp class MSSIMVAE(BaseVAE): def __init__(self, in_channels: int, latent_dim: int, hidden_dims: List = None, window_size: int = 11, size_average: bool = True, **kwargs) -> None: super(MSSIMVAE, self).__init__() self.latent_dim = latent_dim self.in_channels = in_channels modules = [] if hidden_dims is None: hidden_dims = [32, 64, 128, 256, 512] # Build Encoder for h_dim in hidden_dims: modules.append( nn.Sequential( nn.Conv2d(in_channels, out_channels=h_dim, kernel_size= 3, stride= 2, padding = 1), nn.BatchNorm2d(h_dim), nn.LeakyReLU()) ) in_channels = h_dim self.encoder = nn.Sequential(*modules) self.fc_mu = nn.Linear(hidden_dims[-1]*4, latent_dim) self.fc_var = nn.Linear(hidden_dims[-1]*4, latent_dim) # Build Decoder modules = [] self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1] * 4) hidden_dims.reverse() for i in range(len(hidden_dims) - 1): modules.append( nn.Sequential( nn.ConvTranspose2d(hidden_dims[i], hidden_dims[i + 1], kernel_size=3, stride = 2, padding=1, output_padding=1), nn.BatchNorm2d(hidden_dims[i + 1]), nn.LeakyReLU()) ) self.decoder = nn.Sequential(*modules) self.final_layer = nn.Sequential( nn.ConvTranspose2d(hidden_dims[-1], hidden_dims[-1], kernel_size=3, stride=2, padding=1, output_padding=1), nn.BatchNorm2d(hidden_dims[-1]), nn.LeakyReLU(), nn.Conv2d(hidden_dims[-1], out_channels= 3, kernel_size= 3, padding= 1), nn.Tanh()) self.mssim_loss = MSSIM(self.in_channels, window_size, size_average) def encode(self, input: Tensor) -> List[Tensor]: """ Encodes the input by passing through the encoder network and returns the latent codes. :param input: (Tensor) Input tensor to encoder [N x C x H x W] :return: (Tensor) List of latent codes """ result = self.encoder(input) result = torch.flatten(result, start_dim=1) # Split the result into mu and var components # of the latent Gaussian distribution mu = self.fc_mu(result) log_var = self.fc_var(result) return [mu, log_var] def decode(self, z: Tensor) -> Tensor: """ Maps the given latent codes onto the image space. :param z: (Tensor) [B x D] :return: (Tensor) [B x C x H x W] """ result = self.decoder_input(z) result = result.view(-1, 512, 2, 2) result = self.decoder(result) result = self.final_layer(result) return result def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor: """ Reparameterization trick to sample from N(mu, var) from N(0,1). :param mu: (Tensor) Mean of the latent Gaussian [B x D] :param logvar: (Tensor) Standard deviation of the latent Gaussian [B x D] :return: (Tensor) [B x D] """ std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return eps * std + mu def forward(self, input: Tensor, **kwargs) -> List[Tensor]: mu, log_var = self.encode(input) z = self.reparameterize(mu, log_var) return [self.decode(z), input, mu, log_var] def loss_function(self, *args: Any, **kwargs) -> dict: """ Computes the VAE loss function. KL(N(\mu, \sigma), N(0, 1)) = \log \frac{1}{\sigma} + \frac{\sigma^2 + \mu^2}{2} - \frac{1}{2} :param args: :param kwargs: :return: """ recons = args[0] input = args[1] mu = args[2] log_var = args[3] kld_weight = kwargs['M_N'] # Account for the minibatch samples from the dataset recons_loss = self.mssim_loss(recons, input) kld_loss = torch.mean(-0.5 * torch.sum(1 + log_var - mu ** 2 - log_var.exp(), dim = 1), dim = 0) loss = recons_loss + kld_weight * kld_loss return {'loss': loss, 'Reconstruction_Loss':recons_loss, 'KLD':-kld_loss} def sample(self, num_samples:int, current_device: int, **kwargs) -> Tensor: """ Samples from the latent space and return the corresponding image space map. :param num_samples: (Int) Number of samples :param current_device: (Int) Device to run the model :return: (Tensor) """ z = torch.randn(num_samples, self.latent_dim) z = z.cuda(current_device) samples = self.decode(z) return samples def generate(self, x: Tensor, **kwargs) -> Tensor: """ Given an input image x, returns the reconstructed image :param x: (Tensor) [B x C x H x W] :return: (Tensor) [B x C x H x W] """ return self.forward(x)[0] class MSSIM(nn.Module): def __init__(self, in_channels: int = 3, window_size: int=11, size_average:bool = True) -> None: """ Computes the differentiable MS-SSIM loss Reference: [1] https://github.com/jorge-pessoa/pytorch-msssim/blob/dev/pytorch_msssim/__init__.py (MIT License) :param in_channels: (Int) :param window_size: (Int) :param size_average: (Bool) """ super(MSSIM, self).__init__() self.in_channels = in_channels self.window_size = window_size self.size_average = size_average def gaussian_window(self, window_size:int, sigma: float) -> Tensor: kernel = torch.tensor([exp((x - window_size // 2)**2/(2 * sigma ** 2)) for x in range(window_size)]) return kernel/kernel.sum() def create_window(self, window_size, in_channels): _1D_window = self.gaussian_window(window_size, 1.5).unsqueeze(1) _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) window = _2D_window.expand(in_channels, 1, window_size, window_size).contiguous() return window def ssim(self, img1: Tensor, img2: Tensor, window_size: int, in_channel: int, size_average: bool) -> Tensor: device = img1.device window = self.create_window(window_size, in_channel).to(device) mu1 = F.conv2d(img1, window, padding= window_size//2, groups=in_channel) mu2 = F.conv2d(img2, window, padding= window_size//2, groups=in_channel) mu1_sq = mu1.pow(2) mu2_sq = mu2.pow(2) mu1_mu2 = mu1 * mu2 sigma1_sq = F.conv2d(img1 * img1, window, padding = window_size//2, groups=in_channel) - mu1_sq sigma2_sq = F.conv2d(img2 * img2, window, padding = window_size//2, groups=in_channel) - mu2_sq

评论收藏

内容反馈

版权申诉