Path: blob/master/examples/parallel_wavegan/conf/parallel_wavegan.v1.yaml
1559 views
1# This is the hyperparameter configuration file for ParallelWavegan.2# Please make sure this is adjusted for the LJSpeech dataset. If you want to3# apply to the other dataset, you might need to carefully change some parameters.4# This configuration performs 4000k iters.56# Original: https://github.com/kan-bayashi/ParallelWaveGAN/blob/master/egs/ljspeech/voc1/conf/parallel_wavegan.v1.yaml78###########################################################9# FEATURE EXTRACTION SETTING #10###########################################################11sampling_rate: 2205012hop_size: 256 # Hop size.13format: "npy"141516###########################################################17# GENERATOR NETWORK ARCHITECTURE SETTING #18###########################################################19model_type: "parallel_wavegan_generator"2021parallel_wavegan_generator_params:22out_channels: 1 # Number of output channels.23kernel_size: 3 # Kernel size of dilated convolution.24n_layers: 30 # Number of residual block layers.25stacks: 3 # Number of stacks i.e., dilation cycles.26residual_channels: 64 # Number of channels in residual conv.27gate_channels: 128 # Number of channels in gated conv.28skip_channels: 64 # Number of channels in skip conv.29aux_channels: 80 # Number of channels for auxiliary feature conv.30# Must be the same as num_mels.31aux_context_window: 2 # Context window size for auxiliary feature.32# If set to 2, previous 2 and future 2 frames will be considered.33dropout: 0.0 # Dropout rate. 0.0 means no dropout applied.34upsample_params: # Upsampling network parameters.35upsample_scales: [4, 4, 4, 4] # Upsampling scales. Prodcut of these must be the same as hop size.3637###########################################################38# DISCRIMINATOR NETWORK ARCHITECTURE SETTING #39###########################################################40parallel_wavegan_discriminator_params:41out_channels: 1 # Number of output channels.42kernel_size: 3 # Number of output channels.43n_layers: 10 # Number of conv layers.44conv_channels: 64 # Number of chnn layers.45use_bias: true # Whether to use bias parameter in conv.46nonlinear_activation: "LeakyReLU" # Nonlinear function after each conv.47nonlinear_activation_params: # Nonlinear function parameters48alpha: 0.2 # Alpha in LeakyReLU.4950###########################################################51# STFT LOSS SETTING #52###########################################################53stft_loss_params:54fft_lengths: [1024, 2048, 512] # List of FFT size for STFT-based loss.55frame_steps: [120, 240, 50] # List of hop size for STFT-based loss56frame_lengths: [600, 1200, 240] # List of window length for STFT-based loss.575859###########################################################60# ADVERSARIAL LOSS SETTING #61###########################################################62lambda_adv: 4.0 # Loss balancing coefficient.6364###########################################################65# DATA LOADER SETTING #66###########################################################67batch_size: 6 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.68batch_max_steps: 25600 # Length of each audio in batch for training. Make sure dividable by hop_size.69batch_max_steps_valid: 81920 # Length of each audio for validation. Make sure dividable by hope_size.70remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.71allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.72is_shuffle: true # shuffle dataset after each epoch.7374###########################################################75# OPTIMIZER & SCHEDULER SETTING #76###########################################################77generator_optimizer_params:78lr_fn: "ExponentialDecay"79lr_params:80initial_learning_rate: 0.000581decay_steps: 20000082decay_rate: 0.5838485discriminator_optimizer_params:86lr_fn: "ExponentialDecay"87lr_params:88initial_learning_rate: 0.000589decay_steps: 20000090decay_rate: 0.59192gradient_accumulation_steps: 193###########################################################94# INTERVAL SETTING #95###########################################################96discriminator_train_start_steps: 100000 # steps begin training discriminator97train_max_steps: 400000 # Number of training steps.98save_interval_steps: 5000 # Interval steps to save checkpoint.99eval_interval_steps: 2000 # Interval steps to evaluate the network.100log_interval_steps: 200 # Interval steps to record the training log.101102###########################################################103# OTHER SETTING #104###########################################################105num_save_intermediate_results: 1 # Number of batch to be saved as intermediate results.106107108