Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
snakers4
GitHub Repository: snakers4/silero-vad
Path: blob/master/examples/parallel_example.ipynb
1171 views
Kernel: Python 3 (ipykernel)

Install Dependencies

# !pip install -q torchaudio SAMPLING_RATE = 16000 import torch from pprint import pprint import time import shutil torch.set_num_threads(1) NUM_PROCESS=4 # set to the number of CPU cores in the machine NUM_COPIES=8 # download wav files, make multiple copies torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', f"en_example0.wav") for idx in range(NUM_COPIES-1): shutil.copy(f"en_example0.wav", f"en_example{idx+1}.wav")

Load VAD model from torch hub

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=True, onnx=False) (get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils

Define a vad process function

import multiprocessing vad_models = dict() def init_model(model): pid = multiprocessing.current_process().pid model, _ = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False, onnx=False) vad_models[pid] = model def vad_process(audio_file: str): pid = multiprocessing.current_process().pid with torch.no_grad(): wav = read_audio(audio_file, sampling_rate=SAMPLING_RATE) return get_speech_timestamps( wav, vad_models[pid], 0.46, # speech prob threshold 16000, # sample rate 300, # min speech duration in ms 20, # max speech duration in seconds 600, # min silence duration 512, # window size 200, # spech pad ms )

Parallelization

from concurrent.futures import ProcessPoolExecutor, as_completed futures = [] with ProcessPoolExecutor(max_workers=NUM_PROCESS, initializer=init_model, initargs=(model,)) as ex: for i in range(NUM_COPIES): futures.append(ex.submit(vad_process, f"en_example{idx}.wav")) for finished in as_completed(futures): pprint(finished.result())