Kernel: Python 3 (ipykernel)
Install Dependencies
In [ ]:
# !pip install -q torchaudio SAMPLING_RATE = 16000 import torch from pprint import pprint import time import shutil torch.set_num_threads(1) NUM_PROCESS=4 # set to the number of CPU cores in the machine NUM_COPIES=8 # download wav files, make multiple copies torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', f"en_example0.wav") for idx in range(NUM_COPIES-1): shutil.copy(f"en_example0.wav", f"en_example{idx+1}.wav")
Load VAD model from torch hub
In [ ]:
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=True, onnx=False) (get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
Define a vad process function
In [ ]:
import multiprocessing vad_models = dict() def init_model(model): pid = multiprocessing.current_process().pid model, _ = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False, onnx=False) vad_models[pid] = model def vad_process(audio_file: str): pid = multiprocessing.current_process().pid with torch.no_grad(): wav = read_audio(audio_file, sampling_rate=SAMPLING_RATE) return get_speech_timestamps( wav, vad_models[pid], 0.46, # speech prob threshold 16000, # sample rate 300, # min speech duration in ms 20, # max speech duration in seconds 600, # min silence duration 512, # window size 200, # spech pad ms )
Parallelization
In [ ]:
from concurrent.futures import ProcessPoolExecutor, as_completed futures = [] with ProcessPoolExecutor(max_workers=NUM_PROCESS, initializer=init_model, initargs=(model,)) as ex: for i in range(NUM_COPIES): futures.append(ex.submit(vad_process, f"en_example{idx}.wav")) for finished in as_completed(futures): pprint(finished.result())