Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
prophesier
GitHub Repository: prophesier/diff-svc
Path: blob/main/training/config.yaml
694 views
1
K_step: 1000
2
accumulate_grad_batches: 1
3
audio_num_mel_bins: 80
4
audio_sample_rate: 24000
5
binarization_args:
6
shuffle: false
7
with_align: true
8
with_f0: true
9
with_hubert: true
10
with_spk_embed: false
11
with_wav: false
12
binarizer_cls: preprocessing.SVCpre.SVCBinarizer
13
binary_data_dir: data/binary/atri
14
check_val_every_n_epoch: 10
15
choose_test_manually: false
16
clip_grad_norm: 1
17
config_path: training/config.yaml
18
content_cond_steps: []
19
cwt_add_f0_loss: false
20
cwt_hidden_size: 128
21
cwt_layers: 2
22
cwt_loss: l1
23
cwt_std_scale: 0.8
24
datasets:
25
- opencpop
26
debug: false
27
dec_ffn_kernel_size: 9
28
dec_layers: 4
29
decay_steps: 40000
30
decoder_type: fft
31
dict_dir: ''
32
diff_decoder_type: wavenet
33
diff_loss_type: l2
34
dilation_cycle_length: 4
35
dropout: 0.1
36
ds_workers: 4
37
dur_enc_hidden_stride_kernel:
38
- 0,2,3
39
- 0,2,3
40
- 0,1,3
41
dur_loss: mse
42
dur_predictor_kernel: 3
43
dur_predictor_layers: 5
44
enc_ffn_kernel_size: 9
45
enc_layers: 4
46
encoder_K: 8
47
encoder_type: fft
48
endless_ds: false
49
f0_bin: 256
50
f0_max: 1100.0
51
f0_min: 50.0
52
ffn_act: gelu
53
ffn_padding: SAME
54
fft_size: 512
55
fmax: 12000
56
fmin: 30
57
fs2_ckpt: ''
58
gaussian_start: true
59
gen_dir_name: ''
60
gen_tgt_spk_id: -1
61
hidden_size: 256
62
hop_size: 128
63
hubert_gpu: true
64
hubert_path: checkpoints/hubert/hubert_soft.pt
65
infer: false
66
keep_bins: 80
67
lambda_commit: 0.25
68
lambda_energy: 0.0
69
lambda_f0: 1.0
70
lambda_ph_dur: 0.3
71
lambda_sent_dur: 1.0
72
lambda_uv: 1.0
73
lambda_word_dur: 1.0
74
load_ckpt: ''
75
log_interval: 100
76
loud_norm: false
77
lr: 0.0004
78
max_beta: 0.02
79
max_epochs: 3000
80
max_eval_sentences: 1
81
max_eval_tokens: 60000
82
max_frames: 42000
83
max_input_tokens: 60000
84
max_sentences: 88
85
max_tokens: 128000
86
max_updates: 1000000
87
mel_loss: ssim:0.5|l1:0.5
88
mel_vmax: 1.5
89
mel_vmin: -6.0
90
min_level_db: -120
91
norm_type: gn
92
num_ckpt_keep: 10
93
num_heads: 2
94
num_sanity_val_steps: 1
95
num_spk: 1
96
num_test_samples: 0
97
num_valid_plots: 10
98
optimizer_adam_beta1: 0.9
99
optimizer_adam_beta2: 0.98
100
out_wav_norm: false
101
pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
102
pe_enable: false
103
perform_enhance: true
104
pitch_ar: false
105
pitch_enc_hidden_stride_kernel:
106
- 0,2,5
107
- 0,2,5
108
- 0,2,5
109
pitch_extractor: parselmouth
110
pitch_loss: l2
111
pitch_norm: log
112
pitch_type: frame
113
pndm_speedup: 10
114
pre_align_args:
115
allow_no_txt: false
116
denoise: false
117
forced_align: mfa
118
txt_processor: zh_g2pM
119
use_sox: true
120
use_tone: false
121
pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
122
predictor_dropout: 0.5
123
predictor_grad: 0.1
124
predictor_hidden: -1
125
predictor_kernel: 5
126
predictor_layers: 5
127
prenet_dropout: 0.5
128
prenet_hidden_size: 256
129
pretrain_fs_ckpt: ''
130
processed_data_dir: xxx
131
profile_infer: false
132
raw_data_dir: data/raw/atri
133
ref_norm_layer: bn
134
rel_pos: true
135
reset_phone_dict: true
136
residual_channels: 256
137
residual_layers: 20
138
save_best: false
139
save_ckpt: true
140
save_codes:
141
- configs
142
- modules
143
- src
144
- utils
145
save_f0: true
146
save_gt: false
147
schedule_type: linear
148
seed: 1234
149
sort_by_len: true
150
speaker_id: atri
151
spec_max:
152
- -1.5451143980026245
153
- -1.5177826881408691
154
- -1.1807013750076294
155
- -0.6732071042060852
156
- -0.47006210684776306
157
- -0.271837055683136
158
- -0.27174147963523865
159
- -0.3395537734031677
160
- -0.2529868483543396
161
- -0.22453370690345764
162
- -0.24767500162124634
163
- -0.22861438989639282
164
- -0.28668588399887085
165
- -0.335957795381546
166
- -0.3118636906147003
167
- -0.34530898928642273
168
- -0.35274678468704224
169
- -0.4730182886123657
170
- -0.45395755767822266
171
- -0.4338522255420685
172
- -0.41395917534828186
173
- -0.29468369483947754
174
- -0.16852207481861115
175
- -0.3900046646595001
176
- -0.6241626739501953
177
- -0.5899035930633545
178
- -0.6534764170646667
179
- -0.6667397022247314
180
- -0.6992383003234863
181
- -0.7867978811264038
182
- -0.8457596302032471
183
- -0.43350857496261597
184
- -0.629216730594635
185
- -0.9135912656784058
186
- -0.9230040311813354
187
- -0.6756577491760254
188
- -0.8399246335029602
189
- -0.8495144248008728
190
- -0.781493067741394
191
- -1.0347247123718262
192
- -1.0051935911178589
193
- -1.1246198415756226
194
- -1.021154522895813
195
- -0.851677417755127
196
- -0.8443652987480164
197
- -0.9016147255897522
198
- -0.7618780732154846
199
- -1.0490750074386597
200
- -1.2046996355056763
201
- -1.2022035121917725
202
- -0.9753153324127197
203
- -1.2503044605255127
204
- -1.0664823055267334
205
- -1.1236635446548462
206
- -1.2223032712936401
207
- -1.0116488933563232
208
- -1.2263423204421997
209
- -1.2552075386047363
210
- -1.3846945762634277
211
- -1.2681812047958374
212
- -1.3416036367416382
213
- -1.264938235282898
214
- -1.2763726711273193
215
- -1.4651004076004028
216
- -1.4880361557006836
217
- -1.5735552310943604
218
- -1.4097294807434082
219
- -1.468385100364685
220
- -1.3768259286880493
221
- -1.3312186002731323
222
- -1.3547866344451904
223
- -1.4387739896774292
224
- -1.1861546039581299
225
- -1.1709729433059692
226
- -1.1812609434127808
227
- -1.1489264965057373
228
- -1.5605546236038208
229
- -2.2702553272247314
230
- -4.064557075500488
231
- -5.809507846832275
232
spec_min:
233
- -5.740882873535156
234
- -6.0
235
- -6.0
236
- -6.0
237
- -6.0
238
- -6.0
239
- -6.0
240
- -6.0
241
- -5.959110260009766
242
- -6.0
243
- -6.0
244
- -6.0
245
- -6.0
246
- -6.0
247
- -6.0
248
- -6.0
249
- -6.0
250
- -6.0
251
- -6.0
252
- -6.0
253
- -6.0
254
- -6.0
255
- -6.0
256
- -6.0
257
- -6.0
258
- -6.0
259
- -6.0
260
- -6.0
261
- -6.0
262
- -6.0
263
- -6.0
264
- -5.999546527862549
265
- -6.0
266
- -5.995517730712891
267
- -6.0
268
- -6.0
269
- -6.0
270
- -6.0
271
- -6.0
272
- -6.0
273
- -6.0
274
- -5.960205078125
275
- -5.93423318862915
276
- -6.0
277
- -5.933608531951904
278
- -6.0
279
- -6.0
280
- -6.0
281
- -5.953958511352539
282
- -5.908934593200684
283
- -5.911312580108643
284
- -5.882552623748779
285
- -5.932425498962402
286
- -5.91495943069458
287
- -5.826524257659912
288
- -5.777952671051025
289
- -5.775007724761963
290
- -5.849961280822754
291
- -5.7793660163879395
292
- -5.781087875366211
293
- -5.818603992462158
294
- -5.765895366668701
295
- -5.834509372711182
296
- -5.817623615264893
297
- -5.855445384979248
298
- -5.844409465789795
299
- -5.760529518127441
300
- -5.713063716888428
301
- -5.74588680267334
302
- -5.855954647064209
303
- -5.874588489532471
304
- -5.81571626663208
305
- -5.849369049072266
306
- -5.963766574859619
307
- -5.8541646003723145
308
- -5.922942161560059
309
- -6.0
310
- -6.0
311
- -6.0
312
- -6.0
313
spk_cond_steps: []
314
stop_token_weight: 5.0
315
task_cls: training.task.SVC_task.SVCTask
316
test_ids: []
317
test_input_dir: ''
318
test_num: 0
319
test_prefixes:
320
- test
321
test_set_name: test
322
timesteps: 1000
323
train_set_name: train
324
use_crepe: true
325
use_denoise: false
326
use_energy_embed: false
327
use_gt_dur: false
328
use_gt_f0: false
329
use_midi: false
330
use_nsf: true
331
use_pitch_embed: true
332
use_pos_embed: true
333
use_spk_embed: false
334
use_spk_id: false
335
use_split_spk_id: false
336
use_uv: false
337
use_vec: false
338
use_var_enc: false
339
val_check_interval: 2000
340
valid_num: 0
341
valid_set_name: valid
342
vocoder: network.vocoders.hifigan.HifiGAN
343
vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128
344
warmup_updates: 2000
345
wav2spec_eps: 1e-6
346
weight_decay: 0
347
win_size: 512
348
work_dir: checkpoints/atri
349
no_fs2: true
350
351