CoCalc -- SlieroVadOnnxModel.java

GitHub Repository: snakers4/silero-vad
Path: blob/master/examples/java-example/src/main/java/org/example/SlieroVadOnnxModel.java
¹¹⁷¹ views
1
package org.example;
2

3
import ai.onnxruntime.OnnxTensor;
4
import ai.onnxruntime.OrtEnvironment;
5
import ai.onnxruntime.OrtException;
6
import ai.onnxruntime.OrtSession;
7
import java.util.Arrays;
8
import java.util.HashMap;
9
import java.util.List;
10
import java.util.Map;
11

12
public class SlieroVadOnnxModel {
13
    // Define private variable OrtSession
14
    private final OrtSession session;
15
    private float[][][] h;
16
    private float[][][] c;
17
    // Define the last sample rate
18
    private int lastSr = 0;
19
    // Define the last batch size
20
    private int lastBatchSize = 0;
21
    // Define a list of supported sample rates
22
    private static final List<Integer> SAMPLE_RATES = Arrays.asList(8000, 16000);
23

24
    // Constructor
25
    public SlieroVadOnnxModel(String modelPath) throws OrtException {
26
        // Get the ONNX runtime environment
27
        OrtEnvironment env = OrtEnvironment.getEnvironment();
28
        // Create an ONNX session options object
29
        OrtSession.SessionOptions opts = new OrtSession.SessionOptions();
30
        // Set the InterOp thread count to 1, InterOp threads are used for parallel processing of different computation graph operations
31
        opts.setInterOpNumThreads(1);
32
        // Set the IntraOp thread count to 1, IntraOp threads are used for parallel processing within a single operation
33
        opts.setIntraOpNumThreads(1);
34
        // Add a CPU device, setting to false disables CPU execution optimization
35
        opts.addCPU(true);
36
        // Create an ONNX session using the environment, model path, and options
37
        session = env.createSession(modelPath, opts);
38
        // Reset states
39
        resetStates();
40
    }
41

42
    /**
43
     * Reset states
44
     */
45
    void resetStates() {
46
        h = new float[2][1][64];
47
        c = new float[2][1][64];
48
        lastSr = 0;
49
        lastBatchSize = 0;
50
    }
51

52
    public void close() throws OrtException {
53
        session.close();
54
    }
55

56
    /**
57
     * Define inner class ValidationResult
58
     */
59
    public static class ValidationResult {
60
        public final float[][] x;
61
        public final int sr;
62

63
        // Constructor
64
        public ValidationResult(float[][] x, int sr) {
65
            this.x = x;
66
            this.sr = sr;
67
        }
68
    }
69

70
    /**
71
     * Function to validate input data
72
     */
73
    private ValidationResult validateInput(float[][] x, int sr) {
74
        // Process the input data with dimension 1
75
        if (x.length == 1) {
76
            x = new float[][]{x[0]};
77
        }
78
        // Throw an exception when the input data dimension is greater than 2
79
        if (x.length > 2) {
80
            throw new IllegalArgumentException("Incorrect audio data dimension: " + x[0].length);
81
        }
82

83
        // Process the input data when the sample rate is not equal to 16000 and is a multiple of 16000
84
        if (sr != 16000 && (sr % 16000 == 0)) {
85
            int step = sr / 16000;
86
            float[][] reducedX = new float[x.length][];
87

88
            for (int i = 0; i < x.length; i++) {
89
                float[] current = x[i];
90
                float[] newArr = new float[(current.length + step - 1) / step];
91

92
                for (int j = 0, index = 0; j < current.length; j += step, index++) {
93
                    newArr[index] = current[j];
94
                }
95

96
                reducedX[i] = newArr;
97
            }
98

99
            x = reducedX;
100
            sr = 16000;
101
        }
102

103
        // If the sample rate is not in the list of supported sample rates, throw an exception
104
        if (!SAMPLE_RATES.contains(sr)) {
105
            throw new IllegalArgumentException("Only supports sample rates " + SAMPLE_RATES + " (or multiples of 16000)");
106
        }
107

108
        // If the input audio block is too short, throw an exception
109
        if (((float) sr) / x[0].length > 31.25) {
110
            throw new IllegalArgumentException("Input audio is too short");
111
        }
112

113
        // Return the validated result
114
        return new ValidationResult(x, sr);
115
    }
116

117
    /**
118
     * Method to call the ONNX model
119
     */
120
    public float[] call(float[][] x, int sr) throws OrtException {
121
        ValidationResult result = validateInput(x, sr);
122
        x = result.x;
123
        sr = result.sr;
124

125
        int batchSize = x.length;
126

127
        if (lastBatchSize == 0 || lastSr != sr || lastBatchSize != batchSize) {
128
            resetStates();
129
        }
130

131
        OrtEnvironment env = OrtEnvironment.getEnvironment();
132

133
        OnnxTensor inputTensor = null;
134
        OnnxTensor hTensor = null;
135
        OnnxTensor cTensor = null;
136
        OnnxTensor srTensor = null;
137
        OrtSession.Result ortOutputs = null;
138

139
        try {
140
            // Create input tensors
141
            inputTensor = OnnxTensor.createTensor(env, x);
142
            hTensor = OnnxTensor.createTensor(env, h);
143
            cTensor = OnnxTensor.createTensor(env, c);
144
            srTensor = OnnxTensor.createTensor(env, new long[]{sr});
145

146
            Map<String, OnnxTensor> inputs = new HashMap<>();
147
            inputs.put("input", inputTensor);
148
            inputs.put("sr", srTensor);
149
            inputs.put("h", hTensor);
150
            inputs.put("c", cTensor);
151

152
            // Call the ONNX model for calculation
153
            ortOutputs = session.run(inputs);
154
            // Get the output results
155
            float[][] output = (float[][]) ortOutputs.get(0).getValue();
156
            h = (float[][][]) ortOutputs.get(1).getValue();
157
            c = (float[][][]) ortOutputs.get(2).getValue();
158

159
            lastSr = sr;
160
            lastBatchSize = batchSize;
161
            return output[0];
162
        } finally {
163
            if (inputTensor != null) {
164
                inputTensor.close();
165
            }
166
            if (hTensor != null) {
167
                hTensor.close();
168
            }
169
            if (cTensor != null) {
170
                cTensor.close();
171
            }
172
            if (srTensor != null) {
173
                srTensor.close();
174
            }
175
            if (ortOutputs != null) {
176
                ortOutputs.close();
177
            }
178
        }
179
    }
180
}
181

182
Product

Resources

Company