CoCalc -- main.rs

GitHub Repository: snakers4/silero-vad
Path: blob/master/examples/rust-wav-processing-with-wavekat-vad/src/main.rs
²³⁸⁰ views
1
use wavekat_vad::backends::silero::SileroVad;
2
use wavekat_vad::{FrameAdapter, VoiceActivityDetector};
3

4
fn main() {
5
    let audio_path = std::env::args()
6
        .nth(1)
7
        .unwrap_or_else(|| String::from("recorder.wav"));
8

9
    // Open WAV file
10
    let mut reader = hound::WavReader::open(&audio_path).expect("failed to open WAV file");
11
    let spec = reader.spec();
12
    println!(
13
        "File: {audio_path} ({}Hz, {}ch, {}bit)",
14
        spec.sample_rate, spec.channels, spec.bits_per_sample
15
    );
16

17
    if spec.sample_format != hound::SampleFormat::Int {
18
        panic!("Unsupported sample format. Expect Int.");
19
    }
20

21
    // Read samples (first channel only for multi-channel files)
22
    let samples: Vec<i16> = reader
23
        .samples::<i16>()
24
        .step_by(spec.channels as usize)
25
        .map(|s| s.expect("failed to read sample"))
26
        .collect();
27

28
    // Resample to 16kHz if needed
29
    let target_rate = 16000;
30
    let samples = if spec.sample_rate != target_rate {
31
        println!("Resampling {}Hz -> {}Hz", spec.sample_rate, target_rate);
32
        use wavekat_vad::preprocessing::AudioResampler;
33
        let mut resampler =
34
            AudioResampler::new(spec.sample_rate, target_rate).expect("failed to create resampler");
35
        resampler.process(&samples)
36
    } else {
37
        samples
38
    };
39

40
    let duration_s = samples.len() as f64 / target_rate as f64;
41
    println!(
42
        "Duration: {duration_s:.2}s ({} samples at {target_rate}Hz)\n",
43
        samples.len()
44
    );
45

46
    // Create Silero VAD — the ONNX model is embedded in the binary at compile time
47
    let vad = SileroVad::new(target_rate).expect("failed to create Silero VAD");
48
    let caps = vad.capabilities();
49
    println!(
50
        "Silero VAD — frame: {} samples ({}ms)\n",
51
        caps.frame_size, caps.frame_duration_ms
52
    );
53

54
    // FrameAdapter handles automatic frame buffering so you can feed any chunk size
55
    let mut adapter = FrameAdapter::new(vad);
56

57
    // Process in 20ms chunks (arbitrary — the adapter buffers to the required frame size)
58
    let chunk_size = target_rate as usize / 50; // 320 samples = 20ms
59
    let mut time_ms = 0.0;
60
    let step_ms = chunk_size as f64 * 1000.0 / target_rate as f64;
61

62
    for chunk in samples.chunks(chunk_size) {
63
        let results = adapter.process_all(chunk, target_rate).unwrap();
64
        for prob in results {
65
            let bar = "#".repeat((prob * 40.0) as usize);
66
            let label = if prob > 0.5 { " SPEECH" } else { "" };
67
            println!("{time_ms:8.0}ms  {prob:.3}  {bar}{label}");
68
        }
69
        time_ms += step_ms;
70
    }
71

72
    println!("\nFinished.");
73
}
74

75
Product

Resources

Company