Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
prophesier
GitHub Repository: prophesier/diff-svc
Path: blob/main/modules/parallel_wavegan/losses/stft_loss.py
695 views
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2019 Tomoki Hayashi
4
# MIT License (https://opensource.org/licenses/MIT)
5
6
"""STFT-based Loss modules."""
7
8
import torch
9
import torch.nn.functional as F
10
11
12
def stft(x, fft_size, hop_size, win_length, window):
13
"""Perform STFT and convert to magnitude spectrogram.
14
15
Args:
16
x (Tensor): Input signal tensor (B, T).
17
fft_size (int): FFT size.
18
hop_size (int): Hop size.
19
win_length (int): Window length.
20
window (str): Window function type.
21
22
Returns:
23
Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
24
25
"""
26
x_stft = torch.stft(x, fft_size, hop_size, win_length, window)
27
real = x_stft[..., 0]
28
imag = x_stft[..., 1]
29
30
# NOTE(kan-bayashi): clamp is needed to avoid nan or inf
31
return torch.sqrt(torch.clamp(real ** 2 + imag ** 2, min=1e-7)).transpose(2, 1)
32
33
34
class SpectralConvergengeLoss(torch.nn.Module):
35
"""Spectral convergence loss module."""
36
37
def __init__(self):
38
"""Initilize spectral convergence loss module."""
39
super(SpectralConvergengeLoss, self).__init__()
40
41
def forward(self, x_mag, y_mag):
42
"""Calculate forward propagation.
43
44
Args:
45
x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
46
y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
47
48
Returns:
49
Tensor: Spectral convergence loss value.
50
51
"""
52
return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro")
53
54
55
class LogSTFTMagnitudeLoss(torch.nn.Module):
56
"""Log STFT magnitude loss module."""
57
58
def __init__(self):
59
"""Initilize los STFT magnitude loss module."""
60
super(LogSTFTMagnitudeLoss, self).__init__()
61
62
def forward(self, x_mag, y_mag):
63
"""Calculate forward propagation.
64
65
Args:
66
x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
67
y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
68
69
Returns:
70
Tensor: Log STFT magnitude loss value.
71
72
"""
73
return F.l1_loss(torch.log(y_mag), torch.log(x_mag))
74
75
76
class STFTLoss(torch.nn.Module):
77
"""STFT loss module."""
78
79
def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window"):
80
"""Initialize STFT loss module."""
81
super(STFTLoss, self).__init__()
82
self.fft_size = fft_size
83
self.shift_size = shift_size
84
self.win_length = win_length
85
self.window = getattr(torch, window)(win_length)
86
self.spectral_convergenge_loss = SpectralConvergengeLoss()
87
self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss()
88
89
def forward(self, x, y):
90
"""Calculate forward propagation.
91
92
Args:
93
x (Tensor): Predicted signal (B, T).
94
y (Tensor): Groundtruth signal (B, T).
95
96
Returns:
97
Tensor: Spectral convergence loss value.
98
Tensor: Log STFT magnitude loss value.
99
100
"""
101
x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window)
102
y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window)
103
sc_loss = self.spectral_convergenge_loss(x_mag, y_mag)
104
mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag)
105
106
return sc_loss, mag_loss
107
108
109
class MultiResolutionSTFTLoss(torch.nn.Module):
110
"""Multi resolution STFT loss module."""
111
112
def __init__(self,
113
fft_sizes=[1024, 2048, 512],
114
hop_sizes=[120, 240, 50],
115
win_lengths=[600, 1200, 240],
116
window="hann_window"):
117
"""Initialize Multi resolution STFT loss module.
118
119
Args:
120
fft_sizes (list): List of FFT sizes.
121
hop_sizes (list): List of hop sizes.
122
win_lengths (list): List of window lengths.
123
window (str): Window function type.
124
125
"""
126
super(MultiResolutionSTFTLoss, self).__init__()
127
assert len(fft_sizes) == len(hop_sizes) == len(win_lengths)
128
self.stft_losses = torch.nn.ModuleList()
129
for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths):
130
self.stft_losses += [STFTLoss(fs, ss, wl, window)]
131
132
def forward(self, x, y):
133
"""Calculate forward propagation.
134
135
Args:
136
x (Tensor): Predicted signal (B, T).
137
y (Tensor): Groundtruth signal (B, T).
138
139
Returns:
140
Tensor: Multi resolution spectral convergence loss value.
141
Tensor: Multi resolution log STFT magnitude loss value.
142
143
"""
144
sc_loss = 0.0
145
mag_loss = 0.0
146
for f in self.stft_losses:
147
sc_l, mag_l = f(x, y)
148
sc_loss += sc_l
149
mag_loss += mag_l
150
sc_loss /= len(self.stft_losses)
151
mag_loss /= len(self.stft_losses)
152
153
return sc_loss, mag_loss
154
155