Path: blob/master/examples/rust-wav-processing-with-wavekat-vad/src/main.rs
2380 views
use wavekat_vad::backends::silero::SileroVad;1use wavekat_vad::{FrameAdapter, VoiceActivityDetector};23fn main() {4let audio_path = std::env::args()5.nth(1)6.unwrap_or_else(|| String::from("recorder.wav"));78// Open WAV file9let mut reader = hound::WavReader::open(&audio_path).expect("failed to open WAV file");10let spec = reader.spec();11println!(12"File: {audio_path} ({}Hz, {}ch, {}bit)",13spec.sample_rate, spec.channels, spec.bits_per_sample14);1516if spec.sample_format != hound::SampleFormat::Int {17panic!("Unsupported sample format. Expect Int.");18}1920// Read samples (first channel only for multi-channel files)21let samples: Vec<i16> = reader22.samples::<i16>()23.step_by(spec.channels as usize)24.map(|s| s.expect("failed to read sample"))25.collect();2627// Resample to 16kHz if needed28let target_rate = 16000;29let samples = if spec.sample_rate != target_rate {30println!("Resampling {}Hz -> {}Hz", spec.sample_rate, target_rate);31use wavekat_vad::preprocessing::AudioResampler;32let mut resampler =33AudioResampler::new(spec.sample_rate, target_rate).expect("failed to create resampler");34resampler.process(&samples)35} else {36samples37};3839let duration_s = samples.len() as f64 / target_rate as f64;40println!(41"Duration: {duration_s:.2}s ({} samples at {target_rate}Hz)\n",42samples.len()43);4445// Create Silero VAD — the ONNX model is embedded in the binary at compile time46let vad = SileroVad::new(target_rate).expect("failed to create Silero VAD");47let caps = vad.capabilities();48println!(49"Silero VAD — frame: {} samples ({}ms)\n",50caps.frame_size, caps.frame_duration_ms51);5253// FrameAdapter handles automatic frame buffering so you can feed any chunk size54let mut adapter = FrameAdapter::new(vad);5556// Process in 20ms chunks (arbitrary — the adapter buffers to the required frame size)57let chunk_size = target_rate as usize / 50; // 320 samples = 20ms58let mut time_ms = 0.0;59let step_ms = chunk_size as f64 * 1000.0 / target_rate as f64;6061for chunk in samples.chunks(chunk_size) {62let results = adapter.process_all(chunk, target_rate).unwrap();63for prob in results {64let bar = "#".repeat((prob * 40.0) as usize);65let label = if prob > 0.5 { " SPEECH" } else { "" };66println!("{time_ms:8.0}ms {prob:.3} {bar}{label}");67}68time_ms += step_ms;69}7071println!("\nFinished.");72}737475