Path: blob/master/examples/cpp_libtorch/silero_torch.h
1179 views
//Author : Nathan Lee1//Created On : 2024-11-182//Description : silero 5.1 system for torch-script(c++).3//Version : 1.045#ifndef SILERO_TORCH_H6#define SILERO_TORCH_H78#include <string>9#include <memory>10#include <stdexcept>11#include <iostream>12#include <memory>13#include <vector>14#include <fstream>15#include <chrono>1617#include <torch/torch.h>18#include <torch/script.h>192021namespace silero{2223struct SpeechSegment{24int start;25int end;26};2728class VadIterator{29public:3031VadIterator(const std::string &model_path, float threshold = 0.5, int sample_rate = 16000,32int window_size_ms = 32, int speech_pad_ms = 30, int min_silence_duration_ms = 100,33int min_speech_duration_ms = 250, int max_duration_merge_ms = 300, bool print_as_samples = false);34~VadIterator();353637void SpeechProbs(std::vector<float>& input_wav);38std::vector<silero::SpeechSegment> GetSpeechTimestamps();39void SetVariables();4041float threshold;42int sample_rate;43int window_size_ms;44int min_speech_duration_ms;45int max_duration_merge_ms;46bool print_as_samples;4748private:49torch::jit::script::Module model;50std::vector<float> outputs_prob;51int min_silence_samples;52int min_speech_samples;53int speech_pad_samples;54int window_size_samples;55int duration_merge_samples;56int current_sample = 0;5758int total_sample_size=0;5960int min_silence_duration_ms;61int speech_pad_ms;62bool triggered = false;63int temp_end = 0;6465void init_engine(int window_size_ms);66void init_torch_model(const std::string& model_path);67void reset_states();68std::vector<SpeechSegment> DoVad();69std::vector<SpeechSegment> mergeSpeeches(const std::vector<SpeechSegment>& speeches, int duration_merge_samples);7071};7273}74#endif // SILERO_TORCH_H757677