CoCalc -- parallel_wavegan.v1.yaml

GitHub Repository: TensorSpeech/TensorFlowTTS
Path: blob/master/examples/parallel_wavegan/conf/parallel_wavegan.v1.yaml
¹⁵⁵⁹ views
1

2
# This is the hyperparameter configuration file for ParallelWavegan.
3
# Please make sure this is adjusted for the LJSpeech dataset. If you want to
4
# apply to the other dataset, you might need to carefully change some parameters.
5
# This configuration performs 4000k iters.
6

7
# Original: https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/egs/ljspeech/voc1/conf/parallel_wavegan.v1.yaml
8

9
###########################################################
10
#                FEATURE EXTRACTION SETTING               #
11
###########################################################
12
sampling_rate: 22050
13
hop_size: 256            # Hop size.
14
format: "npy"
15

16

17
###########################################################
18
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
19
###########################################################
20
model_type: "parallel_wavegan_generator"
21

22
parallel_wavegan_generator_params:
23
    out_channels: 1       # Number of output channels.
24
    kernel_size: 3        # Kernel size of dilated convolution.
25
    n_layers: 30            # Number of residual block layers.
26
    stacks: 3             # Number of stacks i.e., dilation cycles.
27
    residual_channels: 64 # Number of channels in residual conv.
28
    gate_channels: 128    # Number of channels in gated conv.
29
    skip_channels: 64     # Number of channels in skip conv.
30
    aux_channels: 80      # Number of channels for auxiliary feature conv.
31
                          # Must be the same as num_mels.
32
    aux_context_window: 2 # Context window size for auxiliary feature.
33
                          # If set to 2, previous 2 and future 2 frames will be considered.
34
    dropout: 0.0          # Dropout rate. 0.0 means no dropout applied.
35
    upsample_params:                      # Upsampling network parameters.
36
        upsample_scales: [4, 4, 4, 4]     # Upsampling scales. Prodcut of these must be the same as hop size.
37

38
###########################################################
39
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
40
###########################################################
41
parallel_wavegan_discriminator_params:
42
    out_channels: 1       # Number of output channels.
43
    kernel_size: 3        # Number of output channels.
44
    n_layers: 10            # Number of conv layers.
45
    conv_channels: 64     # Number of chnn layers.
46
    use_bias: true            # Whether to use bias parameter in conv.
47
    nonlinear_activation: "LeakyReLU" # Nonlinear function after each conv.
48
    nonlinear_activation_params:      # Nonlinear function parameters
49
        alpha: 0.2           # Alpha in LeakyReLU.
50

51
###########################################################
52
#                   STFT LOSS SETTING                     #
53
###########################################################
54
stft_loss_params:
55
    fft_lengths: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
56
    frame_steps: [120, 240, 50]     # List of hop size for STFT-based loss
57
    frame_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
58

59

60
###########################################################
61
#               ADVERSARIAL LOSS SETTING                  #
62
###########################################################
63
lambda_adv: 4.0  # Loss balancing coefficient.
64

65
###########################################################
66
#                  DATA LOADER SETTING                    #
67
###########################################################
68
batch_size: 6                  # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
69
batch_max_steps: 25600         # Length of each audio in batch for training. Make sure dividable by hop_size.
70
batch_max_steps_valid: 81920   # Length of each audio for validation. Make sure dividable by hope_size.
71
remove_short_samples: true     # Whether to remove samples the length of which are less than batch_max_steps.
72
allow_cache: true              # Whether to allow cache in dataset. If true, it requires cpu memory.
73
is_shuffle: true               # shuffle dataset after each epoch.
74

75
###########################################################
76
#             OPTIMIZER & SCHEDULER SETTING               #
77
###########################################################
78
generator_optimizer_params:
79
    lr_fn: "ExponentialDecay"
80
    lr_params: 
81
        initial_learning_rate: 0.0005
82
        decay_steps: 200000
83
        decay_rate: 0.5
84

85
    
86
discriminator_optimizer_params:
87
    lr_fn: "ExponentialDecay"
88
    lr_params: 
89
        initial_learning_rate: 0.0005
90
        decay_steps: 200000
91
        decay_rate: 0.5
92

93
gradient_accumulation_steps: 1
94
###########################################################
95
#                    INTERVAL SETTING                     #
96
###########################################################
97
discriminator_train_start_steps: 100000  # steps begin training discriminator
98
train_max_steps: 400000                 # Number of training steps.
99
save_interval_steps: 5000               # Interval steps to save checkpoint.
100
eval_interval_steps: 2000                # Interval steps to evaluate the network.
101
log_interval_steps: 200                  # Interval steps to record the training log.
102

103
###########################################################
104
#                     OTHER SETTING                       #
105
###########################################################
106
num_save_intermediate_results: 1  # Number of batch to be saved as intermediate results.
107

108
Product

Resources

Company