CoCalc -- infer.py

GitHub Repository: prophesier/diff-svc
Path: blob/main/infer.py
⁶⁹⁴ views
1
import io
2
import time
3
from pathlib import Path
4

5
import librosa
6
import numpy as np
7
import soundfile
8

9
from infer_tools import infer_tool
10
from infer_tools import slicer
11
from infer_tools.infer_tool import Svc
12
from utils.hparams import hparams
13

14
chunks_dict = infer_tool.read_temp("./infer_tools/new_chunks_temp.json")
15

16

17
def run_clip(svc_model, key, acc, use_pe, use_crepe, thre, use_gt_mel, add_noise_step, project_name='', f_name=None,
18
             file_path=None, out_path=None, slice_db=-40,**kwargs):
19
    print(f'code version:2022-12-04')
20
    use_pe = use_pe if hparams['audio_sample_rate'] == 24000 else False
21
    if file_path is None:
22
        raw_audio_path = f"./raw/{f_name}"
23
        clean_name = f_name[:-4]
24
    else:
25
        raw_audio_path = file_path
26
        clean_name = str(Path(file_path).name)[:-4]
27
    infer_tool.format_wav(raw_audio_path)
28
    wav_path = Path(raw_audio_path).with_suffix('.wav')
29
    global chunks_dict
30
    audio, sr = librosa.load(wav_path, mono=True,sr=None)
31
    wav_hash = infer_tool.get_md5(audio)
32
    if wav_hash in chunks_dict.keys():
33
        print("load chunks from temp")
34
        chunks = chunks_dict[wav_hash]["chunks"]
35
    else:
36
        chunks = slicer.cut(wav_path, db_thresh=slice_db)
37
    chunks_dict[wav_hash] = {"chunks": chunks, "time": int(time.time())}
38
    infer_tool.write_temp("./infer_tools/new_chunks_temp.json", chunks_dict)
39
    audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks)
40

41
    count = 0
42
    f0_tst = []
43
    f0_pred = []
44
    audio = []
45
    for (slice_tag, data) in audio_data:
46
        print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======')
47
        length = int(np.ceil(len(data) / audio_sr * hparams['audio_sample_rate']))
48
        raw_path = io.BytesIO()
49
        soundfile.write(raw_path, data, audio_sr, format="wav")
50
        if hparams['debug']:
51
            print(np.mean(data), np.var(data))
52
        raw_path.seek(0)
53
        if slice_tag:
54
            print('jump empty segment')
55
            _f0_tst, _f0_pred, _audio = (
56
                np.zeros(int(np.ceil(length / hparams['hop_size']))), np.zeros(int(np.ceil(length / hparams['hop_size']))),
57
                np.zeros(length))
58
        else:
59
            _f0_tst, _f0_pred, _audio = svc_model.infer(raw_path, key=key, acc=acc, use_pe=use_pe, use_crepe=use_crepe,
60
                                                        thre=thre, use_gt_mel=use_gt_mel, add_noise_step=add_noise_step)
61
        fix_audio = np.zeros(length)
62
        fix_audio[:] = np.mean(_audio)
63
        fix_audio[:len(_audio)] = _audio[0 if len(_audio)<len(fix_audio) else len(_audio)-len(fix_audio):]
64
        f0_tst.extend(_f0_tst)
65
        f0_pred.extend(_f0_pred)
66
        audio.extend(list(fix_audio))
67
        count += 1
68
    if out_path is None:
69
        out_path = f'./results/{clean_name}_{key}key_{project_name}_{hparams["residual_channels"]}_{hparams["residual_layers"]}_{int(step / 1000)}k_{accelerate}x.{kwargs["format"]}'
70
    soundfile.write(out_path, audio, hparams["audio_sample_rate"], 'PCM_16',format=out_path.split('.')[-1])
71
    return np.array(f0_tst), np.array(f0_pred), audio
72

73

74
if __name__ == '__main__':
75
    # 工程文件夹名，训练时用的那个
76
    project_name = "yilanqiu"
77
    model_path = f'./checkpoints/{project_name}/model_ckpt_steps_246000.ckpt'
78
    config_path = f'./checkpoints/{project_name}/config.yaml'
79

80
    # 支持多个wav/ogg文件，放在raw文件夹下，带扩展名
81
    file_names = ["青花瓷.wav"]
82
    trans = [0]  # 音高调整，支持正负（半音），数量与上一行对应，不足的自动按第一个移调参数补齐
83
    # 加速倍数
84
    accelerate = 20
85
    hubert_gpu = True
86
    format='flac'
87
    step = int(model_path.split("_")[-1].split(".")[0])
88

89
    # 下面不动
90
    infer_tool.mkdir(["./raw", "./results"])
91
    infer_tool.fill_a_to_b(trans, file_names)
92

93
    model = Svc(project_name, config_path, hubert_gpu, model_path)
94
    for f_name, tran in zip(file_names, trans):
95
        if "." not in f_name:
96
            f_name += ".wav"
97
        run_clip(model, key=tran, acc=accelerate, use_crepe=True, thre=0.05, use_pe=True, use_gt_mel=False,
98
                 add_noise_step=500, f_name=f_name, project_name=project_name, format=format)
99

100
Product

Resources

Company