Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
snakers4
GitHub Repository: snakers4/silero-vad
Path: blob/master/examples/rust-wav-processing-with-wavekat-vad/src/main.rs
2380 views
1
use wavekat_vad::backends::silero::SileroVad;
2
use wavekat_vad::{FrameAdapter, VoiceActivityDetector};
3
4
fn main() {
5
let audio_path = std::env::args()
6
.nth(1)
7
.unwrap_or_else(|| String::from("recorder.wav"));
8
9
// Open WAV file
10
let mut reader = hound::WavReader::open(&audio_path).expect("failed to open WAV file");
11
let spec = reader.spec();
12
println!(
13
"File: {audio_path} ({}Hz, {}ch, {}bit)",
14
spec.sample_rate, spec.channels, spec.bits_per_sample
15
);
16
17
if spec.sample_format != hound::SampleFormat::Int {
18
panic!("Unsupported sample format. Expect Int.");
19
}
20
21
// Read samples (first channel only for multi-channel files)
22
let samples: Vec<i16> = reader
23
.samples::<i16>()
24
.step_by(spec.channels as usize)
25
.map(|s| s.expect("failed to read sample"))
26
.collect();
27
28
// Resample to 16kHz if needed
29
let target_rate = 16000;
30
let samples = if spec.sample_rate != target_rate {
31
println!("Resampling {}Hz -> {}Hz", spec.sample_rate, target_rate);
32
use wavekat_vad::preprocessing::AudioResampler;
33
let mut resampler =
34
AudioResampler::new(spec.sample_rate, target_rate).expect("failed to create resampler");
35
resampler.process(&samples)
36
} else {
37
samples
38
};
39
40
let duration_s = samples.len() as f64 / target_rate as f64;
41
println!(
42
"Duration: {duration_s:.2}s ({} samples at {target_rate}Hz)\n",
43
samples.len()
44
);
45
46
// Create Silero VAD — the ONNX model is embedded in the binary at compile time
47
let vad = SileroVad::new(target_rate).expect("failed to create Silero VAD");
48
let caps = vad.capabilities();
49
println!(
50
"Silero VAD — frame: {} samples ({}ms)\n",
51
caps.frame_size, caps.frame_duration_ms
52
);
53
54
// FrameAdapter handles automatic frame buffering so you can feed any chunk size
55
let mut adapter = FrameAdapter::new(vad);
56
57
// Process in 20ms chunks (arbitrary — the adapter buffers to the required frame size)
58
let chunk_size = target_rate as usize / 50; // 320 samples = 20ms
59
let mut time_ms = 0.0;
60
let step_ms = chunk_size as f64 * 1000.0 / target_rate as f64;
61
62
for chunk in samples.chunks(chunk_size) {
63
let results = adapter.process_all(chunk, target_rate).unwrap();
64
for prob in results {
65
let bar = "#".repeat((prob * 40.0) as usize);
66
let label = if prob > 0.5 { " SPEECH" } else { "" };
67
println!("{time_ms:8.0}ms {prob:.3} {bar}{label}");
68
}
69
time_ms += step_ms;
70
}
71
72
println!("\nFinished.");
73
}
74
75