Path: blob/main/modules/parallel_wavegan/losses/stft_loss.py
695 views
# -*- coding: utf-8 -*-12# Copyright 2019 Tomoki Hayashi3# MIT License (https://opensource.org/licenses/MIT)45"""STFT-based Loss modules."""67import torch8import torch.nn.functional as F91011def stft(x, fft_size, hop_size, win_length, window):12"""Perform STFT and convert to magnitude spectrogram.1314Args:15x (Tensor): Input signal tensor (B, T).16fft_size (int): FFT size.17hop_size (int): Hop size.18win_length (int): Window length.19window (str): Window function type.2021Returns:22Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).2324"""25x_stft = torch.stft(x, fft_size, hop_size, win_length, window)26real = x_stft[..., 0]27imag = x_stft[..., 1]2829# NOTE(kan-bayashi): clamp is needed to avoid nan or inf30return torch.sqrt(torch.clamp(real ** 2 + imag ** 2, min=1e-7)).transpose(2, 1)313233class SpectralConvergengeLoss(torch.nn.Module):34"""Spectral convergence loss module."""3536def __init__(self):37"""Initilize spectral convergence loss module."""38super(SpectralConvergengeLoss, self).__init__()3940def forward(self, x_mag, y_mag):41"""Calculate forward propagation.4243Args:44x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).45y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).4647Returns:48Tensor: Spectral convergence loss value.4950"""51return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro")525354class LogSTFTMagnitudeLoss(torch.nn.Module):55"""Log STFT magnitude loss module."""5657def __init__(self):58"""Initilize los STFT magnitude loss module."""59super(LogSTFTMagnitudeLoss, self).__init__()6061def forward(self, x_mag, y_mag):62"""Calculate forward propagation.6364Args:65x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).66y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).6768Returns:69Tensor: Log STFT magnitude loss value.7071"""72return F.l1_loss(torch.log(y_mag), torch.log(x_mag))737475class STFTLoss(torch.nn.Module):76"""STFT loss module."""7778def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window"):79"""Initialize STFT loss module."""80super(STFTLoss, self).__init__()81self.fft_size = fft_size82self.shift_size = shift_size83self.win_length = win_length84self.window = getattr(torch, window)(win_length)85self.spectral_convergenge_loss = SpectralConvergengeLoss()86self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss()8788def forward(self, x, y):89"""Calculate forward propagation.9091Args:92x (Tensor): Predicted signal (B, T).93y (Tensor): Groundtruth signal (B, T).9495Returns:96Tensor: Spectral convergence loss value.97Tensor: Log STFT magnitude loss value.9899"""100x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window)101y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window)102sc_loss = self.spectral_convergenge_loss(x_mag, y_mag)103mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag)104105return sc_loss, mag_loss106107108class MultiResolutionSTFTLoss(torch.nn.Module):109"""Multi resolution STFT loss module."""110111def __init__(self,112fft_sizes=[1024, 2048, 512],113hop_sizes=[120, 240, 50],114win_lengths=[600, 1200, 240],115window="hann_window"):116"""Initialize Multi resolution STFT loss module.117118Args:119fft_sizes (list): List of FFT sizes.120hop_sizes (list): List of hop sizes.121win_lengths (list): List of window lengths.122window (str): Window function type.123124"""125super(MultiResolutionSTFTLoss, self).__init__()126assert len(fft_sizes) == len(hop_sizes) == len(win_lengths)127self.stft_losses = torch.nn.ModuleList()128for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths):129self.stft_losses += [STFTLoss(fs, ss, wl, window)]130131def forward(self, x, y):132"""Calculate forward propagation.133134Args:135x (Tensor): Predicted signal (B, T).136y (Tensor): Groundtruth signal (B, T).137138Returns:139Tensor: Multi resolution spectral convergence loss value.140Tensor: Multi resolution log STFT magnitude loss value.141142"""143sc_loss = 0.0144mag_loss = 0.0145for f in self.stft_losses:146sc_l, mag_l = f(x, y)147sc_loss += sc_l148mag_loss += mag_l149sc_loss /= len(self.stft_losses)150mag_loss /= len(self.stft_losses)151152return sc_loss, mag_loss153154155