Ziyi Lin
commited on
Commit
·
b50f2a2
1
Parent(s):
a5398ec
ONNX open source and license terms update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- LICENSE +47 -0
- NOTICES +68 -0
- README.md +205 -64
- examples/.gitattributes +0 -1
- examples/CMakeLists.txt +6 -0
- examples/build-and-deploy-android.sh +6 -0
- examples/build-and-deploy-ios.sh +6 -0
- examples/build-and-deploy-linux.sh +6 -0
- examples/build-and-deploy-mac.sh +6 -0
- examples/build-and-deploy-windows.bat +7 -0
- examples/images/.gitattributes +0 -2
- examples/main.c +14 -6
- examples/plot_pr_curves.py +5 -4
- examples/sample_array.h +6 -0
- examples/test.py +4 -3
- examples/test_node.js +529 -0
- examples_onnx/CMakeLists.txt +24 -0
- examples_onnx/build-and-deploy-linux.sh +37 -0
- include/ten_vad.h +4 -3
- include/ten_vad.py +4 -3
- lib/Web/ten_vad.d.ts +111 -0
- lib/Web/ten_vad.js +30 -0
- lib/{macOS/ten_vad.framework/Versions/Current/Headers/ten_vad.h → Web/ten_vad.wasm} +2 -2
- lib/Windows/x64/ten_vad.lib +0 -0
- lib/Windows/x86/ten_vad.lib +0 -0
- lib/iOS/ten_vad.framework/Headers/ten_vad.h +90 -3
- lib/iOS/ten_vad.framework/Info.plist +0 -0
- lib/iOS/ten_vad.framework/Modules/module.modulemap +5 -3
- lib/macOS/ten_vad.framework/Headers +1 -0
- lib/macOS/ten_vad.framework/Headers/ten_vad.h +0 -3
- lib/macOS/ten_vad.framework/Resources +1 -0
- lib/macOS/ten_vad.framework/Resources/Info.plist +0 -3
- lib/macOS/ten_vad.framework/Versions/A/Headers/ten_vad.h +90 -3
- lib/macOS/ten_vad.framework/Versions/A/Resources/Info.plist +44 -3
- lib/macOS/ten_vad.framework/Versions/Current +1 -0
- lib/macOS/ten_vad.framework/Versions/Current/ten_vad +0 -3
- lib/macOS/ten_vad.framework/ten_vad +0 -3
- lib/macOS/ten_vad.framework/ten_vad +1 -0
- setup.py +6 -0
- src/aed.cc +993 -0
- src/aed.h +226 -0
- src/aed_st.h +132 -0
- src/biquad.cc +354 -0
- src/biquad.h +190 -0
- src/biquad_st.h +37 -0
- src/coeff.h +246 -0
- src/fftw.c +0 -0
- src/fftw.h +47 -0
- src/fscvrt.cc +541 -0
- src/fscvrt.h +186 -0
LICENSE
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Open Source License
|
2 |
+
|
3 |
+
The ten-vad is licensed pursuant to the Apache License v2.0, with the
|
4 |
+
following additional conditions. You may reproduce, prepare Derivative Works
|
5 |
+
of, publicly display, publicly perform, sublicense, distribute, or otherwise
|
6 |
+
make available (together, "Deploy") the ten-vad, for commercial or
|
7 |
+
non-commercial purposes, provided that you agree to abide by the terms below:
|
8 |
+
|
9 |
+
1. You may not Deploy the ten-vad in a way that competes with Agora's
|
10 |
+
offerings and/or that allows others to compete with Agora's offerings,
|
11 |
+
including without limitation enabling any third party to develop or
|
12 |
+
deploy Applications.
|
13 |
+
|
14 |
+
2. You may Deploy the ten-vad solely to create and enable deployment
|
15 |
+
of your Application(s) solely for your benefit and the benefit of your
|
16 |
+
direct End Users. If you prefer, you may include the following notice in
|
17 |
+
the documentation of your Application(s): "Powered by ten-vad".
|
18 |
+
|
19 |
+
3. Derivative Works of the ten-vad remain subject to this Open Source
|
20 |
+
License.
|
21 |
+
|
22 |
+
4. "End Users" shall mean the end-users of your Application(s) who access
|
23 |
+
the ten-vad solely to the extent necessary to access and use the
|
24 |
+
Application(s) you create or deploy using ten-vad.
|
25 |
+
|
26 |
+
5. "Application(s)" shall mean your software programs designed or developed
|
27 |
+
by using the ten-vad or where deployment is enabled by the ten-vad.
|
28 |
+
|
29 |
+
Copyright © 2025 Agora
|
30 |
+
|
31 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
32 |
+
you may not use this file except in compliance with the License.
|
33 |
+
You may obtain a copy of the License at
|
34 |
+
|
35 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
36 |
+
|
37 |
+
Unless required by applicable law or agreed to in writing, software
|
38 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
39 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
40 |
+
See the License for the specific language governing permissions and
|
41 |
+
limitations under the License.
|
42 |
+
|
43 |
+
=======================================================================================
|
44 |
+
|
45 |
+
Note that the project contains derived code from other open source project
|
46 |
+
with BSD-3-Clause and BSD-2-Clause license, refer to the "NOTICES"
|
47 |
+
file in the root directory for detailed information.
|
NOTICES
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This project includes modified code from the following third-party component:
|
2 |
+
|
3 |
+
1. File: lpcnet_enc.c
|
4 |
+
- Source: LPCNet (https:github.com/xiph/LPCNet)
|
5 |
+
- License: BSD-2-Clause
|
6 |
+
- Copyright: 2017-2019, Mozilla
|
7 |
+
- Original License Text:
|
8 |
+
Copyright (c) 2017-2019 Mozilla
|
9 |
+
|
10 |
+
Redistribution and use in source and binary forms, with or without modification,
|
11 |
+
are permitted provided that the following conditions are met:
|
12 |
+
|
13 |
+
- Redistributions of source code must retain the above copyright notice,
|
14 |
+
this list of conditions and the following disclaimer.
|
15 |
+
|
16 |
+
- Redistributions in binary form must reproduce the above copyright notice,
|
17 |
+
this list of conditions and the following disclaimer in the documentation
|
18 |
+
and/or other materials provided with the distribution.
|
19 |
+
|
20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21 |
+
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
24 |
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
25 |
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
26 |
+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
27 |
+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
28 |
+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
29 |
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
30 |
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31 |
+
|
32 |
+
2. Project: LPCNet
|
33 |
+
- Source: LPCNet (https:github.com/xiph/LPCNet)
|
34 |
+
- License: BSD-3-Clause
|
35 |
+
- Copyright: 2017-2018, Mozilla, 2007-2017, Jean-Marc Valin, 2005-2017, Xiph.Org Foundation, 2003-2004, Mark Borgerding
|
36 |
+
- Original License Text of LPCNet open source project:
|
37 |
+
Copyright (c) 2017-2018, Mozilla
|
38 |
+
Copyright (c) 2007-2017, Jean-Marc Valin
|
39 |
+
Copyright (c) 2005-2017, Xiph.Org Foundation
|
40 |
+
Copyright (c) 2003-2004, Mark Borgerding
|
41 |
+
|
42 |
+
Redistribution and use in source and binary forms, with or without
|
43 |
+
modification, are permitted provided that the following conditions
|
44 |
+
are met:
|
45 |
+
|
46 |
+
- Redistributions of source code must retain the above copyright
|
47 |
+
notice, this list of conditions and the following disclaimer.
|
48 |
+
|
49 |
+
- Redistributions in binary form must reproduce the above copyright
|
50 |
+
notice, this list of conditions and the following disclaimer in the
|
51 |
+
documentation and/or other materials provided with the distribution.
|
52 |
+
|
53 |
+
- Neither the name of the Xiph.Org Foundation nor the names of its
|
54 |
+
contributors may be used to endorse or promote products derived from
|
55 |
+
this software without specific prior written permission.
|
56 |
+
|
57 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
58 |
+
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
59 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
60 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION
|
61 |
+
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
62 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
63 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
64 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
65 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
66 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
67 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
68 |
+
|
README.md
CHANGED
@@ -1,26 +1,94 @@
|
|
1 |
-
|
2 |
-
tags:
|
3 |
-
- voice activity detection
|
4 |
-
- speech activity detection
|
5 |
-
- real time
|
6 |
-
- vad
|
7 |
-
- sad
|
8 |
-
- speech
|
9 |
-
- audio
|
10 |
-
- silero vad
|
11 |
-
- conversational
|
12 |
-
- automatic speech recognition
|
13 |
-
pipeline_tag: voice-activity-detection
|
14 |
-
---
|
15 |
-
# **TEN VAD**
|
16 |
-
***A Low-Latency, Lightweight and High-Performance Streaming VAD***
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
## **Introduction**
|
21 |
**TEN VAD** is a real-time voice activity detection system designed for enterprise use, providing accurate frame-level speech activity detection. It shows superior precision compared to both WebRTC VAD and Silero VAD, which are commonly used in the industry. Additionally, TEN VAD offers lower computational complexity and reduced memory usage compared to Silero VAD. Meanwhile, the architecture's temporal efficiency enables rapid voice activity detection, significantly reducing end-to-end response and turn detection latency in conversational AI systems.
|
22 |
|
23 |
|
|
|
24 |
|
25 |
## **Key Features**
|
26 |
|
@@ -28,6 +96,7 @@ pipeline_tag: voice-activity-detection
|
|
28 |
|
29 |
The precision-recall curves comparing the performance of WebRTC VAD (pitch-based), Silero VAD, and TEN VAD are shown below. The evaluation is conducted on the precisely manually annotated testset. The audio files are from librispeech, gigaspeech, DNS Challenge etc. As demonstrated, TEN VAD achieves the best performance. Additionally, cross-validation experiments conducted on large internal real-world datasets demonstrate the reproducibility of these findings. The **testset with annotated labels** is released in directory "testset" of this repository.
|
30 |
|
|
|
31 |
|
32 |
<div style="text-align:">
|
33 |
<img src="./examples/images/PR_Curves_testset.png" width="800">
|
@@ -39,14 +108,14 @@ Note that the default threshold of 0.5 is used to generate binary speech indicat
|
|
39 |
cd ./examples
|
40 |
python plot_pr_curves.py
|
41 |
```
|
42 |
-
|
43 |
|
44 |
### **2. Agent-Friendly:**
|
45 |
As illustrated in the figure below, TEN VAD rapidly detects speech-to-non-speech transitions, whereas Silero VAD suffers from a delay of several hundred milliseconds, resulting in increased end-to-end latency in human-agent interaction systems. In addition, as demonstrated in the 6.5s-7.0s audio segment, Silero VAD fails to identify short silent durations between adjacent speech segments.
|
46 |
<div style="text-align:">
|
47 |
<img src="./examples/images/Agent-Friendly-image.png" width="800">
|
48 |
</div>
|
49 |
-
|
50 |
|
51 |
### **3. Lightweight:**
|
52 |
We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equipped with varying CPUs. TEN VAD demonstrates much lower computational complexity and smaller library size than Silero VAD.
|
@@ -57,6 +126,7 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
|
|
57 |
<th align="center" rowspan="2" valign="middle"> CPU </th>
|
58 |
<th align="center" colspan="2"> RTF </th>
|
59 |
<th align="center" colspan="2"> Lib Size </th>
|
|
|
60 |
</tr>
|
61 |
<tr>
|
62 |
<th align="center" style="white-space: nowrap;"> TEN VAD </th>
|
@@ -68,16 +138,16 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
|
|
68 |
<th align="center" rowspan="3"> Linux </th>
|
69 |
<td style="white-space: nowrap;"> AMD Ryzen 9 5900X 12-Core </td>
|
70 |
<td align="center"> 0.0150 </td>
|
71 |
-
<td rowspan="2"
|
72 |
-
<td rowspan="3"
|
73 |
-
<td rowspan="
|
74 |
</tr>
|
75 |
<tr>
|
76 |
-
<td > Intel(R) Xeon(R) Platinum 8253 </td>
|
77 |
<td align="center"> 0.0136 </td>
|
78 |
</tr>
|
79 |
<tr>
|
80 |
-
<td > Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz </td>
|
81 |
<td align="center"> 0.0086 </td>
|
82 |
<td align="center"> 0.0127 </td>
|
83 |
</tr>
|
@@ -85,7 +155,7 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
|
|
85 |
<th align="center"> Windows </th>
|
86 |
<td> Intel i7-10710U </td>
|
87 |
<td align="center"> 0.0150 </td>
|
88 |
-
<td rowspan="
|
89 |
<td align="center" style="white-space: nowrap;"> 464KB(x86) / 508KB(x64) </td>
|
90 |
</tr>
|
91 |
<tr>
|
@@ -94,11 +164,17 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
|
|
94 |
<td align="center"> 0.0160 </td>
|
95 |
<td align="center"> 731KB </td>
|
96 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
<tr>
|
98 |
<th align="center" rowspan="2"> Android </th>
|
99 |
<td> Galaxy J6+ (32bit, 425) </td>
|
100 |
<td align="center"> 0.0570 </td>
|
101 |
-
<td rowspan="2" style="
|
102 |
</tr>
|
103 |
<tr>
|
104 |
<td> Oppo A3s (450) </td>
|
@@ -108,33 +184,31 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
|
|
108 |
<th align="center" rowspan="2"> iOS </th>
|
109 |
<td> iPhone6 (A8) </td>
|
110 |
<td align="center"> 0.0210 </td>
|
111 |
-
<td
|
112 |
</tr>
|
113 |
<tr>
|
114 |
<td> iPhone8 (A11) </td>
|
115 |
<td align="center"> 0.0050 </td>
|
116 |
</tr>
|
117 |
</table>
|
118 |
-
|
119 |
-
<style>
|
120 |
-
th, td {
|
121 |
-
border: 1px solid #ddd;
|
122 |
-
padding: 8px;
|
123 |
-
}
|
124 |
-
</style>
|
125 |
|
126 |
### **4. Multiple programming languages and platforms:**
|
127 |
-
TEN VAD provides cross-platform C compatibility across five operating systems (Linux x64, Windows, macOS, Android, iOS), with Python bindings optimized for Linux x64.
|
|
|
|
|
128 |
|
129 |
|
130 |
### **5. Supproted sampling rate and hop size:**
|
131 |
TEN VAD operates on 16kHz audio input with configurable hop sizes (optimized frame configurations: 160/256 samples=10/16ms). Other sampling rates must be resampled to 16kHz.
|
132 |
-
|
|
|
133 |
|
134 |
## **Installation**
|
135 |
```
|
136 |
-
git clone https://
|
137 |
```
|
|
|
138 |
|
139 |
## **Quick Start**
|
140 |
The project supports five major platforms with dynamic library linking.
|
@@ -152,7 +226,7 @@ The project supports five major platforms with dynamic library linking.
|
|
152 |
<td align="center"> libten_vad.so </td>
|
153 |
<td align="center"> x64 </td>
|
154 |
<td align="center"> Python, C </td>
|
155 |
-
<td rowspan="
|
156 |
<td> </td>
|
157 |
</tr>
|
158 |
<tr>
|
@@ -169,6 +243,13 @@ The project supports five major platforms with dynamic library linking.
|
|
169 |
<td align="center"> C </td>
|
170 |
<td> </td>
|
171 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
<tr>
|
173 |
<th align="center"> Android </th>
|
174 |
<td align="center"> libten_vad.so </td>
|
@@ -178,13 +259,14 @@ The project supports five major platforms with dynamic library linking.
|
|
178 |
</tr>
|
179 |
<tr>
|
180 |
<th align="center"> iOS </th>
|
181 |
-
<td align="center"
|
182 |
-
<td align="center"
|
183 |
<td align="center"> C </td>
|
184 |
<td> 1. not simulator <br> 2. not iPad </td>
|
185 |
</tr>
|
186 |
-
</table>
|
187 |
|
|
|
|
|
188 |
|
189 |
### **Python Usage**
|
190 |
#### **1. Linux**
|
@@ -201,7 +283,7 @@ Note: You could use other versions of above packages, but we didn't test other v
|
|
201 |
|
202 |
<br>
|
203 |
|
204 |
-
The **lib** only
|
205 |
|
206 |
```pip install -r requirements.txt```
|
207 |
|
@@ -219,6 +301,7 @@ sudo apt install libc++1
|
|
219 |
|
220 |
<br>
|
221 |
|
|
|
222 |
#### **Usage**
|
223 |
Note: For usage in python, you can either use it by **git clone** or **pip**.
|
224 |
|
@@ -226,7 +309,7 @@ Note: For usage in python, you can either use it by **git clone** or **pip**.
|
|
226 |
|
227 |
1. Clone the repository
|
228 |
```
|
229 |
-
git clone https://
|
230 |
```
|
231 |
|
232 |
2. Enter examples directory
|
@@ -238,6 +321,7 @@ cd ./examples
|
|
238 |
```
|
239 |
python test.py s0724-s0730.wav out.txt
|
240 |
```
|
|
|
241 |
|
242 |
##### **By using pip:**
|
243 |
|
@@ -252,10 +336,25 @@ pip install -U --force-reinstall -v git+https://github.com/TEN-framework/ten-vad
|
|
252 |
```
|
253 |
from ten_vad import TenVad
|
254 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
### **C Usage**
|
257 |
#### **Build Scripts**
|
258 |
-
Located in examples/ directory:
|
259 |
|
260 |
- Linux: build-and-deploy-linux.sh
|
261 |
- Windows: build-and-deploy-windows.bat
|
@@ -275,12 +374,13 @@ Runtime library path configuration:
|
|
275 |
- Configure toolchain and architecture settings
|
276 |
|
277 |
#### **Overview of Usage**
|
278 |
-
- Navigate to examples/
|
279 |
- Execute platform-specific build script
|
280 |
- Configure dynamic library path
|
281 |
- Run demo with sample audio s0724-s0730.wav
|
282 |
- Processed results saved to out.txt
|
283 |
|
|
|
284 |
|
285 |
The detailed usage methods of each platform are as follows <br>
|
286 |
|
@@ -296,12 +396,22 @@ sudo apt update
|
|
296 |
sudo apt install libc++1
|
297 |
```
|
298 |
|
299 |
-
##### **Usage**
|
300 |
```
|
301 |
1) cd ./examples
|
302 |
2) ./build-and-deploy-linux.sh
|
303 |
```
|
304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
#### **2. Windows**
|
306 |
##### **Requirements**
|
307 |
- Visual Studio (2017, 2019, 2022 verified)
|
@@ -316,6 +426,7 @@ sudo apt install libc++1
|
|
316 |
- Visual Studio version (default: 2019)
|
317 |
3) ./build-and-deploy-windows.bat
|
318 |
```
|
|
|
319 |
|
320 |
#### **3. macOS**
|
321 |
##### **Requirements**
|
@@ -330,6 +441,7 @@ sudo apt install libc++1
|
|
330 |
- Alternative: x86_64 (Intel)
|
331 |
3) ./build-and-deploy-mac.sh
|
332 |
```
|
|
|
333 |
|
334 |
#### **4. Android**
|
335 |
##### **Requirements**
|
@@ -346,6 +458,7 @@ sudo apt install libc++1
|
|
346 |
- Toolchain: aarch64-linux-android-clang (default) or custom NDK toolchain
|
347 |
4) ./build-and-deploy-android.sh
|
348 |
```
|
|
|
349 |
|
350 |
#### **5. iOS**
|
351 |
##### **Requirements**
|
@@ -397,6 +510,29 @@ cd ./examples
|
|
397 |
|
398 |
3.5. Build in Xcode and run demo on your device.
|
399 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
## **Citations**
|
401 |
```
|
402 |
@misc{TEN VAD,
|
@@ -409,29 +545,34 @@ cd ./examples
|
|
409 |
email = {[email protected]}
|
410 |
}
|
411 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
|
413 |
-
## Usage Guidance
|
414 |
|
415 |
-
|
416 |
-
User devices, including but not limited to any mobile terminal devices
|
417 |
-
or (ii) Deploy the TEN VAD in a way that competes with Agora's
|
418 |
-
offerings and/or that allows others to compete with Agora's offerings,
|
419 |
-
including without limitation enabling any third party to develop or
|
420 |
-
deploy Applications.
|
421 |
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
the documentation of your Application(s): "Powered by TEN VAD".
|
426 |
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
|
435 |
-
|
|
|
|
|
436 |
|
437 |
-
|
|
|
|
|
|
1 |
+
![TEN VAD banner][ten-vad-banner]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
[](https://github.com/TEN-framework/ten-vad/discussions/)
|
4 |
+
[](https://github.com/TEN-framework/ten-vad/graphs/commit-activity)
|
5 |
+
[](https://github.com/TEN-framework/ten-vad/issues)
|
6 |
+

|
7 |
+
[](https://github.com/TEN-framework/ten-vad/pulls)
|
8 |
+
[](https://deepwiki.com/TEN-framework/TEN-vad)
|
9 |
|
10 |
+
[](https://GitHub.com/TEN-framework/ten-vad/watchers/?WT.mc_id=academic-105485-koreyst)
|
11 |
+
[](https://GitHub.com/TEN-framework/ten-vad/network/?WT.mc_id=academic-105485-koreyst)
|
12 |
+
[](https://GitHub.com/TEN-framework/ten-vad/stargazers/?WT.mc_id=academic-105485-koreyst)
|
13 |
+
|
14 |
+
<br>
|
15 |
+
|
16 |
+
*Latest News* 🔥
|
17 |
+
- [2025/06] We **finally** released and **open-sourced** the **ONNX** model and the corresponding **preprocessing code**! Now you can deploy **TEN VAD** on **any platform** and **any hardware architecture**!
|
18 |
+
- [2025/06] We are excited to announce the release of **WASM+JS** for Web WASM Support.
|
19 |
+
<br>
|
20 |
+
|
21 |
+
## Table of Contents
|
22 |
+
|
23 |
+
- [Welcome to TEN](#welcome-to-ten)
|
24 |
+
- [TEN Hugging Face Space](#ten-hugging-face-space)
|
25 |
+
- [Introduction](#introduction)
|
26 |
+
- [Key Features](#key-features)
|
27 |
+
- [High-Performance](#1-high-performance)
|
28 |
+
- [Agent-Friendly](#2-agent-friendly)
|
29 |
+
- [Lightweight](#3-lightweight)
|
30 |
+
- [Multiple Programming Languages and Platforms](#4-multiple-programming-languages-and-platforms)
|
31 |
+
- [Supported Sampling Rate and Hop Size](#5-supproted-sampling-rate-and-hop-size)
|
32 |
+
- [Installation](#installation)
|
33 |
+
- [Quick Start](#quick-start)
|
34 |
+
- [Python Usage](#python-usage)
|
35 |
+
- [Linux](#1-linux)
|
36 |
+
- [JS Usage](#js-usage)
|
37 |
+
- [Web](#1-web)
|
38 |
+
- [C Usage](#c-usage)
|
39 |
+
- [Linux](#1-linux-1)
|
40 |
+
- [Windows](#2-windows)
|
41 |
+
- [macOS](#3-macos)
|
42 |
+
- [Android](#4-android)
|
43 |
+
- [iOS](#5-ios)
|
44 |
+
- [TEN Ecosystem](#ten-ecosystem)
|
45 |
+
- [Ask Questions](#ask-questions)
|
46 |
+
- [Citations](#citations)
|
47 |
+
- [License](#license)
|
48 |
+
|
49 |
+
<br>
|
50 |
+
|
51 |
+
## Welcome to TEN
|
52 |
+
|
53 |
+
TEN is a collection of open-source projects for building real-time, multimodal conversational voice agents. It includes [ TEN Framework ](https://github.com/ten-framework/ten-framework), [ TEN VAD ](https://github.com/ten-framework/ten-vad), [ TEN Turn Detection ](https://github.com/ten-framework/ten-turn-detection), TEN Agent, TMAN Designer, and [ TEN Portal ](https://github.com/ten-framework/portal), all fully open-source.
|
54 |
+
|
55 |
+
<br>
|
56 |
+
|
57 |
+
| Community Channel | Purpose |
|
58 |
+
| ---------------- | ------- |
|
59 |
+
| [](https://twitter.com/intent/follow?screen_name=TenFramework) | Follow TEN Framework on X for updates and announcements |
|
60 |
+
| [](https://www.linkedin.com/company/ten-framework) | Follow TEN Framework on LinkedIn for updates and announcements |
|
61 |
+
| [](https://discord.gg/VnPftUzAMJ) | Join our Discord community to connect with developers |
|
62 |
+
| [](https://huggingface.co/TEN-framework) | Join our Hugging Face community to explore our spaces and models |
|
63 |
+
| [](https://github.com/TEN-framework/ten-agent/discussions/170) | Join our WeChat group for Chinese community discussions |
|
64 |
+
|
65 |
+
<br>
|
66 |
+
|
67 |
+
> \[!IMPORTANT]
|
68 |
+
>
|
69 |
+
> **Star TEN Repositories** ⭐️
|
70 |
+
>
|
71 |
+
> Get instant notifications for new releases and updates. Your support helps us grow and improve TEN!
|
72 |
+
|
73 |
+
<br>
|
74 |
+
|
75 |
+

|
76 |
+
|
77 |
+
<br>
|
78 |
+
|
79 |
+
## TEN Hugging Face Space
|
80 |
+
|
81 |
+
<https://github.com/user-attachments/assets/725a8318-d679-4b17-b9e4-e3dce999b298>
|
82 |
+
|
83 |
+
You are more than welcome to [Visit TEN Hugging Face Space](https://huggingface.co/spaces/TEN-framework/ten-agent-demo) to try VAD and Turn Detection together.
|
84 |
+
|
85 |
+
<br>
|
86 |
|
87 |
## **Introduction**
|
88 |
**TEN VAD** is a real-time voice activity detection system designed for enterprise use, providing accurate frame-level speech activity detection. It shows superior precision compared to both WebRTC VAD and Silero VAD, which are commonly used in the industry. Additionally, TEN VAD offers lower computational complexity and reduced memory usage compared to Silero VAD. Meanwhile, the architecture's temporal efficiency enables rapid voice activity detection, significantly reducing end-to-end response and turn detection latency in conversational AI systems.
|
89 |
|
90 |
|
91 |
+
<br>
|
92 |
|
93 |
## **Key Features**
|
94 |
|
|
|
96 |
|
97 |
The precision-recall curves comparing the performance of WebRTC VAD (pitch-based), Silero VAD, and TEN VAD are shown below. The evaluation is conducted on the precisely manually annotated testset. The audio files are from librispeech, gigaspeech, DNS Challenge etc. As demonstrated, TEN VAD achieves the best performance. Additionally, cross-validation experiments conducted on large internal real-world datasets demonstrate the reproducibility of these findings. The **testset with annotated labels** is released in directory "testset" of this repository.
|
98 |
|
99 |
+
<br>
|
100 |
|
101 |
<div style="text-align:">
|
102 |
<img src="./examples/images/PR_Curves_testset.png" width="800">
|
|
|
108 |
cd ./examples
|
109 |
python plot_pr_curves.py
|
110 |
```
|
111 |
+
<br>
|
112 |
|
113 |
### **2. Agent-Friendly:**
|
114 |
As illustrated in the figure below, TEN VAD rapidly detects speech-to-non-speech transitions, whereas Silero VAD suffers from a delay of several hundred milliseconds, resulting in increased end-to-end latency in human-agent interaction systems. In addition, as demonstrated in the 6.5s-7.0s audio segment, Silero VAD fails to identify short silent durations between adjacent speech segments.
|
115 |
<div style="text-align:">
|
116 |
<img src="./examples/images/Agent-Friendly-image.png" width="800">
|
117 |
</div>
|
118 |
+
<br>
|
119 |
|
120 |
### **3. Lightweight:**
|
121 |
We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equipped with varying CPUs. TEN VAD demonstrates much lower computational complexity and smaller library size than Silero VAD.
|
|
|
126 |
<th align="center" rowspan="2" valign="middle"> CPU </th>
|
127 |
<th align="center" colspan="2"> RTF </th>
|
128 |
<th align="center" colspan="2"> Lib Size </th>
|
129 |
+
|
130 |
</tr>
|
131 |
<tr>
|
132 |
<th align="center" style="white-space: nowrap;"> TEN VAD </th>
|
|
|
138 |
<th align="center" rowspan="3"> Linux </th>
|
139 |
<td style="white-space: nowrap;"> AMD Ryzen 9 5900X 12-Core </td>
|
140 |
<td align="center"> 0.0150 </td>
|
141 |
+
<td align="center" rowspan="2" valign="middle"> / </td>
|
142 |
+
<td align="center" rowspan="3" valign="middle"> 306KB </td>
|
143 |
+
<td align="center" rowspan="10" style="white-space: nowrap;" valign="middle"> 2.16MB(JIT) / 2.22MB(ONNX) </td>
|
144 |
</tr>
|
145 |
<tr>
|
146 |
+
<td style="white-space: nowrap;"> Intel(R) Xeon(R) Platinum 8253 </td>
|
147 |
<td align="center"> 0.0136 </td>
|
148 |
</tr>
|
149 |
<tr>
|
150 |
+
<td style="white-space: nowrap;"> Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz </td>
|
151 |
<td align="center"> 0.0086 </td>
|
152 |
<td align="center"> 0.0127 </td>
|
153 |
</tr>
|
|
|
155 |
<th align="center"> Windows </th>
|
156 |
<td> Intel i7-10710U </td>
|
157 |
<td align="center"> 0.0150 </td>
|
158 |
+
<td align="center" rowspan="7" valign="middle"> / </td>
|
159 |
<td align="center" style="white-space: nowrap;"> 464KB(x86) / 508KB(x64) </td>
|
160 |
</tr>
|
161 |
<tr>
|
|
|
164 |
<td align="center"> 0.0160 </td>
|
165 |
<td align="center"> 731KB </td>
|
166 |
</tr>
|
167 |
+
<tr>
|
168 |
+
<th align="center"> Web </th>
|
169 |
+
<td> macOS(M1) </td>
|
170 |
+
<td align="center"> 0.010 </td>
|
171 |
+
<td align="center"> 277KB </td>
|
172 |
+
</tr>
|
173 |
<tr>
|
174 |
<th align="center" rowspan="2"> Android </th>
|
175 |
<td> Galaxy J6+ (32bit, 425) </td>
|
176 |
<td align="center"> 0.0570 </td>
|
177 |
+
<td align="center" rowspan="2" style="white-space: nowrap;"> 373KB(v7a) / 532KB(v8a)</td>
|
178 |
</tr>
|
179 |
<tr>
|
180 |
<td> Oppo A3s (450) </td>
|
|
|
184 |
<th align="center" rowspan="2"> iOS </th>
|
185 |
<td> iPhone6 (A8) </td>
|
186 |
<td align="center"> 0.0210 </td>
|
187 |
+
<td align="center" rowspan="2"> 320KB</td>
|
188 |
</tr>
|
189 |
<tr>
|
190 |
<td> iPhone8 (A11) </td>
|
191 |
<td align="center"> 0.0050 </td>
|
192 |
</tr>
|
193 |
</table>
|
194 |
+
<br>
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
### **4. Multiple programming languages and platforms:**
|
197 |
+
TEN VAD provides cross-platform C compatibility across five operating systems (Linux x64, Windows, macOS, Android, iOS), with Python bindings optimized for Linux x64, with wasm for Web.
|
198 |
+
<br>
|
199 |
+
<br>
|
200 |
|
201 |
|
202 |
### **5. Supproted sampling rate and hop size:**
|
203 |
TEN VAD operates on 16kHz audio input with configurable hop sizes (optimized frame configurations: 160/256 samples=10/16ms). Other sampling rates must be resampled to 16kHz.
|
204 |
+
<br>
|
205 |
+
<br>
|
206 |
|
207 |
## **Installation**
|
208 |
```
|
209 |
+
git clone https://github.com/TEN-framework/ten-vad.git
|
210 |
```
|
211 |
+
<br>
|
212 |
|
213 |
## **Quick Start**
|
214 |
The project supports five major platforms with dynamic library linking.
|
|
|
226 |
<td align="center"> libten_vad.so </td>
|
227 |
<td align="center"> x64 </td>
|
228 |
<td align="center"> Python, C </td>
|
229 |
+
<td rowspan="6">ten_vad.h <br> ten_vad.py <br> ten_vad.js</td>
|
230 |
<td> </td>
|
231 |
</tr>
|
232 |
<tr>
|
|
|
243 |
<td align="center"> C </td>
|
244 |
<td> </td>
|
245 |
</tr>
|
246 |
+
<tr>
|
247 |
+
<th align="center"> Web </th>
|
248 |
+
<td align="center"> ten_vad.wasm </td>
|
249 |
+
<td align="center"> / </td>
|
250 |
+
<td align="center"> JS </td>
|
251 |
+
<td> </td>
|
252 |
+
</tr>
|
253 |
<tr>
|
254 |
<th align="center"> Android </th>
|
255 |
<td align="center"> libten_vad.so </td>
|
|
|
259 |
</tr>
|
260 |
<tr>
|
261 |
<th align="center"> iOS </th>
|
262 |
+
<td align="center"> ten_vad.framework </td>
|
263 |
+
<td align="center"> arm64 </td>
|
264 |
<td align="center"> C </td>
|
265 |
<td> 1. not simulator <br> 2. not iPad </td>
|
266 |
</tr>
|
|
|
267 |
|
268 |
+
</table>
|
269 |
+
<br>
|
270 |
|
271 |
### **Python Usage**
|
272 |
#### **1. Linux**
|
|
|
283 |
|
284 |
<br>
|
285 |
|
286 |
+
The **lib** only depend on numpy, you have to install the dependency via requirements.txt:
|
287 |
|
288 |
```pip install -r requirements.txt```
|
289 |
|
|
|
301 |
|
302 |
<br>
|
303 |
|
304 |
+
|
305 |
#### **Usage**
|
306 |
Note: For usage in python, you can either use it by **git clone** or **pip**.
|
307 |
|
|
|
309 |
|
310 |
1. Clone the repository
|
311 |
```
|
312 |
+
git clone https://github.com/TEN-framework/ten-vad.git
|
313 |
```
|
314 |
|
315 |
2. Enter examples directory
|
|
|
321 |
```
|
322 |
python test.py s0724-s0730.wav out.txt
|
323 |
```
|
324 |
+
<br>
|
325 |
|
326 |
##### **By using pip:**
|
327 |
|
|
|
336 |
```
|
337 |
from ten_vad import TenVad
|
338 |
```
|
339 |
+
<br>
|
340 |
+
|
341 |
+
### **JS Usage**
|
342 |
+
|
343 |
+
#### **1. Web**
|
344 |
+
##### **Requirements**
|
345 |
+
- Node.js (macOS v14.18.2, Linux v16.20.2 verified)
|
346 |
+
- Terminal
|
347 |
+
|
348 |
+
##### **Usage**
|
349 |
+
```
|
350 |
+
1) cd ./examples
|
351 |
+
2) node test_node.js s0724-s0730.wav out.txt
|
352 |
+
```
|
353 |
+
<br>
|
354 |
|
355 |
### **C Usage**
|
356 |
#### **Build Scripts**
|
357 |
+
Located in examples/ directory and examples_onnx (for **ONNX** usage on Linux):
|
358 |
|
359 |
- Linux: build-and-deploy-linux.sh
|
360 |
- Windows: build-and-deploy-windows.bat
|
|
|
374 |
- Configure toolchain and architecture settings
|
375 |
|
376 |
#### **Overview of Usage**
|
377 |
+
- Navigate to examples/ or examples_onx/ (for **ONNX** usage on Linux)
|
378 |
- Execute platform-specific build script
|
379 |
- Configure dynamic library path
|
380 |
- Run demo with sample audio s0724-s0730.wav
|
381 |
- Processed results saved to out.txt
|
382 |
|
383 |
+
<br>
|
384 |
|
385 |
The detailed usage methods of each platform are as follows <br>
|
386 |
|
|
|
396 |
sudo apt install libc++1
|
397 |
```
|
398 |
|
399 |
+
##### **Usage (prebuilt-lib)**
|
400 |
```
|
401 |
1) cd ./examples
|
402 |
2) ./build-and-deploy-linux.sh
|
403 |
```
|
404 |
|
405 |
+
##### **Usage (ONNX)**
|
406 |
+
You have to download the **onnxruntime** packages from the [official website](https://github.com/microsoft/onnxruntime). Note that the version of onnxruntime must be higher than or equal to 1.17.1 (e.g. onnxruntime-linux-x64-1.17.1.tgz).
|
407 |
+
```
|
408 |
+
1) cd examples_onnx/
|
409 |
+
2) ./build-and-deploy-linux.sh --ort-root /absolute/path/to/your/onnxruntime/root/dir
|
410 |
+
```
|
411 |
+
Note: If executing the onnx demo from a different directory than the one used when running build-and-deploy-linux.sh, ensure to create a symbolic link to src/onnx_model/ to prevent ONNX model file loading failures.
|
412 |
+
|
413 |
+
<br>
|
414 |
+
|
415 |
#### **2. Windows**
|
416 |
##### **Requirements**
|
417 |
- Visual Studio (2017, 2019, 2022 verified)
|
|
|
426 |
- Visual Studio version (default: 2019)
|
427 |
3) ./build-and-deploy-windows.bat
|
428 |
```
|
429 |
+
<br>
|
430 |
|
431 |
#### **3. macOS**
|
432 |
##### **Requirements**
|
|
|
441 |
- Alternative: x86_64 (Intel)
|
442 |
3) ./build-and-deploy-mac.sh
|
443 |
```
|
444 |
+
<br>
|
445 |
|
446 |
#### **4. Android**
|
447 |
##### **Requirements**
|
|
|
458 |
- Toolchain: aarch64-linux-android-clang (default) or custom NDK toolchain
|
459 |
4) ./build-and-deploy-android.sh
|
460 |
```
|
461 |
+
<br>
|
462 |
|
463 |
#### **5. iOS**
|
464 |
##### **Requirements**
|
|
|
510 |
|
511 |
3.5. Build in Xcode and run demo on your device.
|
512 |
|
513 |
+
<br>
|
514 |
+
|
515 |
+
## TEN Ecosystem
|
516 |
+
|
517 |
+
| Project | Preview |
|
518 |
+
| ------- | ------- |
|
519 |
+
| [**🏚️ TEN Framework**][ten-framework-link]<br>TEN is an open-source framework for real-time, multimodal conversational AI.<br><br>![][ten-framework-shield] | ![][ten-framework-banner] |
|
520 |
+
| [**️🔂 TEN Turn Detection**][ten-turn-detection-link]<br>TEN is for full-duplex dialogue communication.<br><br>![][ten-turn-detection-shield] | ![][ten-turn-detection-banner] |
|
521 |
+
| [**🔉 TEN VAD**][ten-vad-link]<br>TEN VAD is a low-latency, lightweight and high-performance streaming voice activity detector (VAD).<br><br>![][ten-vad-shield] | ![][ten-vad-banner] |
|
522 |
+
| [**🎙️ TEN Agent**][ten-agent-link]<br>TEN Agent is a showcase of TEN Framewrok.<br><br> | ![][ten-agent-banner] |
|
523 |
+
| **🎨 TMAN Designer** <br>TMAN Designer is low/no code option to make a voice agent with easy to use workflow UI.<br><br> | ![][tman-designer-banner] |
|
524 |
+
| [**📒 TEN Portal**][ten-portal-link]<br>The official site of TEN framework, it has documentation and blog.<br><br>![][ten-portal-shield] | ![][ten-portal-banner] |
|
525 |
+
|
526 |
+
<br>
|
527 |
+
|
528 |
+
## Ask Questions
|
529 |
+
|
530 |
+
[](https://deepwiki.com/TEN-framework/TEN-vad)
|
531 |
+
|
532 |
+
Most questions can be answered by using DeepWiki, it is fast, intutive to use and supports multiple languages.
|
533 |
+
|
534 |
+
<br>
|
535 |
+
|
536 |
## **Citations**
|
537 |
```
|
538 |
@misc{TEN VAD,
|
|
|
545 |
email = {[email protected]}
|
546 |
}
|
547 |
```
|
548 |
+
<br>
|
549 |
+
|
550 |
+
## License
|
551 |
+
|
552 |
+
This project is Apache 2.0 with additional conditions licensed. Refer to the "LICENSE" file in the root directory for detailed information. Note that `pitch_est.cc` contains modified code derived from [LPCNet](https://github.com/xiph/LPCNet), which is [BSD-2-Clause](https://spdx.org/licenses/BSD-2-Clause.html) and [BSD-3-Clause](https://spdx.org/licenses/BSD-3-Clause.html) licensed, refer to the NOTICES file in the root directory for detailed information.
|
553 |
+
|
554 |
+
|
555 |
+
<br>
|
556 |
|
|
|
557 |
|
558 |
+
[back-to-top]: https://img.shields.io/badge/-Back_to_top-gray?style=flat-square
|
|
|
|
|
|
|
|
|
|
|
559 |
|
560 |
+
[ten-framework-shield]: https://img.shields.io/github/stars/ten-framework/ten_framework?color=ffcb47&labelColor=gray&style=flat-square&logo=github
|
561 |
+
[ten-framework-banner]: https://github.com/user-attachments/assets/7c8f72d7-3993-4d01-8504-b71578a22944
|
562 |
+
[ten-framework-link]: https://github.com/ten-framework/ten_framework
|
|
|
563 |
|
564 |
+
[ten-vad-link]: https://github.com/ten-framework/ten-vad
|
565 |
+
[ten-vad-shield]: https://img.shields.io/github/stars/ten-framework/ten-vad?color=ffcb47&labelColor=gray&style=flat-square&logo=github
|
566 |
+
[ten-vad-banner]: https://github.com/user-attachments/assets/d45870e4-9453-4047-8163-08737f82863f
|
567 |
|
568 |
+
[ten-turn-detection-link]: https://github.com/ten-framework/ten-turn-detection
|
569 |
+
[ten-turn-detection-shield]: https://img.shields.io/github/stars/ten-framework/ten-turn-detection?color=ffcb47&labelColor=gray&style=flat-square&logo=github
|
570 |
+
[ten-turn-detection-banner]: https://github.com/user-attachments/assets/8d0ec716-5d0e-43e4-ad9a-d97b17305658
|
571 |
|
572 |
+
[ten-agent-link]: https://github.com/TEN-framework/ten-framework/tree/main/ai_agents
|
573 |
+
[ten-agent-banner]: https://github.com/user-attachments/assets/38de2207-939b-4702-a0aa-04491f5b5275
|
574 |
+
[tman-designer-banner]: https://github.com/user-attachments/assets/804c3543-0a47-42b7-b40b-ef32b742fb8f
|
575 |
|
576 |
+
[ten-portal-link]: https://github.com/ten-framework/portal
|
577 |
+
[ten-portal-shield]: https://img.shields.io/github/stars/ten-framework/portal?color=ffcb47&labelColor=gray&style=flat-square&logo=github
|
578 |
+
[ten-portal-banner]: https://github.com/user-attachments/assets/e17d8aaa-5928-45dd-ac71-814928e26a89
|
examples/.gitattributes
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
*.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
examples/CMakeLists.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
cmake_minimum_required(VERSION 3.10)
|
2 |
get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
|
3 |
|
|
|
1 |
+
#
|
2 |
+
# Copyright © 2025 Agora
|
3 |
+
# This file is part of TEN Framework, an open source project.
|
4 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
#
|
7 |
cmake_minimum_required(VERSION 3.10)
|
8 |
get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
|
9 |
|
examples/build-and-deploy-android.sh
CHANGED
@@ -1,4 +1,10 @@
|
|
1 |
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
set -eo pipefail
|
3 |
|
4 |
# Customize the arch and toolchain
|
|
|
1 |
#!/bin/bash
|
2 |
+
#
|
3 |
+
# Copyright © 2025 Agora
|
4 |
+
# This file is part of TEN Framework, an open source project.
|
5 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
6 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
7 |
+
#
|
8 |
set -eo pipefail
|
9 |
|
10 |
# Customize the arch and toolchain
|
examples/build-and-deploy-ios.sh
CHANGED
@@ -1,4 +1,10 @@
|
|
1 |
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
set -euo pipefail
|
3 |
|
4 |
work_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
|
1 |
#!/usr/bin/env bash
|
2 |
+
#
|
3 |
+
# Copyright © 2025 Agora
|
4 |
+
# This file is part of TEN Framework, an open source project.
|
5 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
6 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
7 |
+
#
|
8 |
set -euo pipefail
|
9 |
|
10 |
work_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
examples/build-and-deploy-linux.sh
CHANGED
@@ -1,4 +1,10 @@
|
|
1 |
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
set -euo pipefail
|
3 |
|
4 |
arch=x64
|
|
|
1 |
#!/bin/bash
|
2 |
+
#
|
3 |
+
# Copyright © 2025 Agora
|
4 |
+
# This file is part of TEN Framework, an open source project.
|
5 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
6 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
7 |
+
#
|
8 |
set -euo pipefail
|
9 |
|
10 |
arch=x64
|
examples/build-and-deploy-mac.sh
CHANGED
@@ -1,4 +1,10 @@
|
|
1 |
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
set -euo pipefail
|
3 |
|
4 |
# Customize the arch
|
|
|
1 |
#!/bin/bash
|
2 |
+
#
|
3 |
+
# Copyright © 2025 Agora
|
4 |
+
# This file is part of TEN Framework, an open source project.
|
5 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
6 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
7 |
+
#
|
8 |
set -euo pipefail
|
9 |
|
10 |
# Customize the arch
|
examples/build-and-deploy-windows.bat
CHANGED
@@ -1,6 +1,13 @@
|
|
1 |
@echo off
|
2 |
setlocal
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
@REM Customize the arch
|
5 |
set arch=x64
|
6 |
@REM set arch=x86
|
|
|
1 |
@echo off
|
2 |
setlocal
|
3 |
|
4 |
+
@REM
|
5 |
+
@REM Copyright © 2025 Agora
|
6 |
+
@REM This file is part of TEN Framework, an open source project.
|
7 |
+
@REM Licensed under the Apache License, Version 2.0, with certain conditions.
|
8 |
+
@REM Refer to the "LICENSE" file in the root directory for more information.
|
9 |
+
@REM
|
10 |
+
|
11 |
@REM Customize the arch
|
12 |
set arch=x64
|
13 |
@REM set arch=x86
|
examples/images/.gitattributes
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
*.jpg filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
examples/main.c
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
//
|
|
|
2 |
// This file is part of TEN Framework, an open source project.
|
3 |
-
// Licensed under the Apache License, Version 2.0.
|
4 |
-
//
|
5 |
//
|
6 |
#include <stdio.h>
|
7 |
#include <stdint.h>
|
@@ -86,9 +87,16 @@ int vad_process(int16_t *input_buf, uint32_t frame_num,
|
|
86 |
for (int i = 0; i < frame_num; ++i)
|
87 |
{
|
88 |
int16_t *audio_data = input_buf + i * hop_size;
|
89 |
-
ten_vad_process(ten_vad_handle, audio_data, hop_size,
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
}
|
93 |
uint64_t end = get_timestamp_ms();
|
94 |
*use_time = (float)(end - start);
|
@@ -295,4 +303,4 @@ int read_wav_file(FILE *fp, wav_info_t *info)
|
|
295 |
// restore original file position
|
296 |
fseek(fp, orig_pos, SEEK_SET);
|
297 |
return 0;
|
298 |
-
}
|
|
|
1 |
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
//
|
7 |
#include <stdio.h>
|
8 |
#include <stdint.h>
|
|
|
87 |
for (int i = 0; i < frame_num; ++i)
|
88 |
{
|
89 |
int16_t *audio_data = input_buf + i * hop_size;
|
90 |
+
int res = ten_vad_process(ten_vad_handle, audio_data, hop_size,
|
91 |
+
&out_probs[i], &out_flags[i]);
|
92 |
+
if (res == 0)
|
93 |
+
{
|
94 |
+
printf("[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]);
|
95 |
+
}
|
96 |
+
else
|
97 |
+
{
|
98 |
+
printf("ten_vad_process failed res %d\n", res);
|
99 |
+
}
|
100 |
}
|
101 |
uint64_t end = get_timestamp_ms();
|
102 |
*use_time = (float)(end - start);
|
|
|
303 |
// restore original file position
|
304 |
fseek(fp, orig_pos, SEEK_SET);
|
305 |
return 0;
|
306 |
+
}
|
examples/plot_pr_curves.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
#
|
2 |
-
#
|
3 |
-
#
|
4 |
-
#
|
|
|
5 |
#
|
6 |
import os, glob, sys, torchaudio
|
7 |
import numpy as np
|
@@ -114,7 +115,7 @@ if __name__ == "__main__":
|
|
114 |
# Get the directory of the script
|
115 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
116 |
|
117 |
-
#
|
118 |
test_dir = f"{script_dir}/../testset"
|
119 |
|
120 |
# Initialization
|
|
|
1 |
#
|
2 |
+
# Copyright © 2025 Agora
|
3 |
+
# This file is part of TEN Framework, an open source project.
|
4 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
6 |
#
|
7 |
import os, glob, sys, torchaudio
|
8 |
import numpy as np
|
|
|
115 |
# Get the directory of the script
|
116 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
117 |
|
118 |
+
# TEN-VAD-TestSet dir
|
119 |
test_dir = f"{script_dir}/../testset"
|
120 |
|
121 |
# Initialization
|
examples/sample_array.h
CHANGED
@@ -1,3 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
// Used for iOS APP demo
|
2 |
unsigned char sample_array[] = {
|
3 |
0xe3, 0xff, 0xd4, 0xff, 0xdc, 0xff, 0xe0, 0xff, 0xf6, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xfc, 0xff,
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
// Used for iOS APP demo
|
8 |
unsigned char sample_array[] = {
|
9 |
0xe3, 0xff, 0xd4, 0xff, 0xdc, 0xff, 0xe0, 0xff, 0xf6, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xfc, 0xff,
|
examples/test.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
#
|
2 |
-
#
|
3 |
-
#
|
4 |
-
#
|
|
|
5 |
#
|
6 |
import sys, os
|
7 |
|
|
|
1 |
#
|
2 |
+
# Copyright © 2025 Agora
|
3 |
+
# This file is part of TEN Framework, an open source project.
|
4 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
6 |
#
|
7 |
import sys, os
|
8 |
|
examples/test_node.js
ADDED
@@ -0,0 +1,529 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env node
|
2 |
+
|
3 |
+
//
|
4 |
+
// Copyright © 2025 Agora
|
5 |
+
// This file is part of TEN Framework, an open source project.
|
6 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
7 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
8 |
+
//
|
9 |
+
|
10 |
+
/**
|
11 |
+
* TEN VAD WebAssembly Node.js Test
|
12 |
+
* Simplified and clean version based on main.c
|
13 |
+
*/
|
14 |
+
|
15 |
+
const fs = require('fs');
|
16 |
+
const path = require('path');
|
17 |
+
|
18 |
+
// Configuration
|
19 |
+
const HOP_SIZE = 256; // 16ms per frame
|
20 |
+
const VOICE_THRESHOLD = 0.5; // Voice detection threshold
|
21 |
+
|
22 |
+
// WASM module paths
|
23 |
+
const WASM_DIR = './../lib/Web';
|
24 |
+
const WASM_JS_FILE = path.join(WASM_DIR, 'ten_vad.js');
|
25 |
+
const WASM_BINARY_FILE = path.join(WASM_DIR, 'ten_vad.wasm');
|
26 |
+
|
27 |
+
// Global state
|
28 |
+
let vadModule = null;
|
29 |
+
let vadHandle = null;
|
30 |
+
let vadHandlePtr = null;
|
31 |
+
|
32 |
+
// ============================================================================
|
33 |
+
// UTILITY FUNCTIONS
|
34 |
+
// ============================================================================
|
35 |
+
|
36 |
+
function getTimestamp() {
|
37 |
+
return Date.now();
|
38 |
+
}
|
39 |
+
|
40 |
+
function addHelperFunctions() {
|
41 |
+
if (!vadModule.getValue) {
|
42 |
+
vadModule.getValue = function(ptr, type) {
|
43 |
+
switch (type) {
|
44 |
+
case 'i32': return vadModule.HEAP32[ptr >> 2];
|
45 |
+
case 'float': return vadModule.HEAPF32[ptr >> 2];
|
46 |
+
default: throw new Error(`Unsupported type: ${type}`);
|
47 |
+
}
|
48 |
+
};
|
49 |
+
}
|
50 |
+
|
51 |
+
if (!vadModule.UTF8ToString) {
|
52 |
+
vadModule.UTF8ToString = function(ptr) {
|
53 |
+
if (!ptr) return '';
|
54 |
+
let result = '';
|
55 |
+
let i = ptr;
|
56 |
+
while (vadModule.HEAPU8[i]) {
|
57 |
+
result += String.fromCharCode(vadModule.HEAPU8[i++]);
|
58 |
+
}
|
59 |
+
return result;
|
60 |
+
};
|
61 |
+
}
|
62 |
+
}
|
63 |
+
|
64 |
+
// ============================================================================
|
65 |
+
// AUDIO GENERATION
|
66 |
+
// ============================================================================
|
67 |
+
|
68 |
+
function generateTestAudio(durationMs = 5000) {
|
69 |
+
const sampleRate = 16000;
|
70 |
+
const totalSamples = Math.floor(durationMs * sampleRate / 1000);
|
71 |
+
const audioData = new Int16Array(totalSamples);
|
72 |
+
|
73 |
+
console.log(`Generating ${totalSamples} samples for ${durationMs}ms audio...`);
|
74 |
+
|
75 |
+
for (let i = 0; i < totalSamples; i++) {
|
76 |
+
const t = i / sampleRate;
|
77 |
+
let sample = 0;
|
78 |
+
|
79 |
+
if (t < 2.0) {
|
80 |
+
// Voice frequencies (440Hz + 880Hz)
|
81 |
+
sample = Math.sin(2 * Math.PI * 440 * t) * 8000 +
|
82 |
+
Math.sin(2 * Math.PI * 880 * t) * 4000;
|
83 |
+
} else if (t < 3.0) {
|
84 |
+
// Noise
|
85 |
+
sample = (Math.random() - 0.5) * 3000;
|
86 |
+
} else if (t < 4.0) {
|
87 |
+
// Mixed voice (220Hz + 660Hz)
|
88 |
+
sample = Math.sin(2 * Math.PI * 220 * t) * 6000 +
|
89 |
+
Math.sin(2 * Math.PI * 660 * t) * 3000;
|
90 |
+
} else {
|
91 |
+
// Silence with minimal noise
|
92 |
+
sample = Math.random() * 50;
|
93 |
+
}
|
94 |
+
|
95 |
+
audioData[i] = Math.max(-32768, Math.min(32767, Math.floor(sample)));
|
96 |
+
}
|
97 |
+
|
98 |
+
return audioData;
|
99 |
+
}
|
100 |
+
|
101 |
+
// ============================================================================
|
102 |
+
// VAD OPERATIONS
|
103 |
+
// ============================================================================
|
104 |
+
|
105 |
+
function getVADVersion() {
|
106 |
+
if (!vadModule) return "unknown";
|
107 |
+
try {
|
108 |
+
const versionPtr = vadModule._ten_vad_get_version();
|
109 |
+
return vadModule.UTF8ToString(versionPtr);
|
110 |
+
} catch (error) {
|
111 |
+
return "unknown";
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
function createVADInstance() {
|
116 |
+
try {
|
117 |
+
vadHandlePtr = vadModule._malloc(4);
|
118 |
+
const result = vadModule._ten_vad_create(vadHandlePtr, HOP_SIZE, VOICE_THRESHOLD);
|
119 |
+
|
120 |
+
if (result === 0) {
|
121 |
+
vadHandle = vadModule.getValue(vadHandlePtr, 'i32');
|
122 |
+
return true;
|
123 |
+
} else {
|
124 |
+
console.error(`VAD creation failed with code: ${result}`);
|
125 |
+
vadModule._free(vadHandlePtr);
|
126 |
+
return false;
|
127 |
+
}
|
128 |
+
} catch (error) {
|
129 |
+
console.error(`Error creating VAD instance: ${error.message}`);
|
130 |
+
return false;
|
131 |
+
}
|
132 |
+
}
|
133 |
+
|
134 |
+
function destroyVADInstance() {
|
135 |
+
if (vadHandlePtr && vadModule) {
|
136 |
+
vadModule._ten_vad_destroy(vadHandlePtr);
|
137 |
+
vadModule._free(vadHandlePtr);
|
138 |
+
vadHandlePtr = null;
|
139 |
+
vadHandle = null;
|
140 |
+
}
|
141 |
+
}
|
142 |
+
|
143 |
+
async function processAudio(inputBuf, frameNum, outProbs, outFlags) {
|
144 |
+
console.log(`VAD version: ${getVADVersion()}`);
|
145 |
+
|
146 |
+
if (!createVADInstance()) {
|
147 |
+
return -1;
|
148 |
+
}
|
149 |
+
|
150 |
+
const startTime = getTimestamp();
|
151 |
+
|
152 |
+
for (let i = 0; i < frameNum; i++) {
|
153 |
+
const frameStart = i * HOP_SIZE;
|
154 |
+
const frameData = inputBuf.slice(frameStart, frameStart + HOP_SIZE);
|
155 |
+
|
156 |
+
const audioPtr = vadModule._malloc(HOP_SIZE * 2);
|
157 |
+
const probPtr = vadModule._malloc(4);
|
158 |
+
const flagPtr = vadModule._malloc(4);
|
159 |
+
|
160 |
+
try {
|
161 |
+
vadModule.HEAP16.set(frameData, audioPtr / 2);
|
162 |
+
|
163 |
+
const result = vadModule._ten_vad_process(
|
164 |
+
vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr
|
165 |
+
);
|
166 |
+
|
167 |
+
if (result === 0) {
|
168 |
+
const probability = vadModule.getValue(probPtr, 'float');
|
169 |
+
const flag = vadModule.getValue(flagPtr, 'i32');
|
170 |
+
|
171 |
+
outProbs[i] = probability;
|
172 |
+
outFlags[i] = flag;
|
173 |
+
|
174 |
+
console.log(`[${i}] ${probability.toFixed(6)}, ${flag}`);
|
175 |
+
} else {
|
176 |
+
console.error(`Frame ${i} processing failed with code: ${result}`);
|
177 |
+
outProbs[i] = 0.0;
|
178 |
+
outFlags[i] = 0;
|
179 |
+
}
|
180 |
+
} finally {
|
181 |
+
vadModule._free(audioPtr);
|
182 |
+
vadModule._free(probPtr);
|
183 |
+
vadModule._free(flagPtr);
|
184 |
+
}
|
185 |
+
}
|
186 |
+
|
187 |
+
const endTime = getTimestamp();
|
188 |
+
const processingTime = endTime - startTime;
|
189 |
+
|
190 |
+
destroyVADInstance();
|
191 |
+
return processingTime;
|
192 |
+
}
|
193 |
+
|
194 |
+
// ============================================================================
|
195 |
+
// RESULT HANDLING
|
196 |
+
// ============================================================================
|
197 |
+
|
198 |
+
function printResults(processingTime, totalAudioTime, outFlags, frameNum) {
|
199 |
+
const rtf = processingTime / totalAudioTime;
|
200 |
+
const voiceFrames = outFlags.filter(flag => flag === 1).length;
|
201 |
+
const voicePercentage = (voiceFrames / frameNum * 100).toFixed(1);
|
202 |
+
|
203 |
+
console.log(`\n=== Processing Results ===`);
|
204 |
+
console.log(`Time: ${processingTime}ms, Audio: ${totalAudioTime.toFixed(2)}ms, RTF: ${rtf.toFixed(6)}`);
|
205 |
+
console.log(`Voice frames: ${voiceFrames}/${frameNum} (${voicePercentage}%)`);
|
206 |
+
}
|
207 |
+
|
208 |
+
function saveResults(outProbs, outFlags, frameNum, filename = 'out.txt') {
|
209 |
+
let output = '';
|
210 |
+
for (let i = 0; i < frameNum; i++) {
|
211 |
+
output += `[${i}] ${outProbs[i].toFixed(6)}, ${outFlags[i]}\n`;
|
212 |
+
}
|
213 |
+
|
214 |
+
try {
|
215 |
+
fs.writeFileSync(filename, output);
|
216 |
+
console.log(`Results saved to ${filename}`);
|
217 |
+
} catch (error) {
|
218 |
+
console.error(`Failed to save results: ${error.message}`);
|
219 |
+
}
|
220 |
+
}
|
221 |
+
|
222 |
+
// ============================================================================
|
223 |
+
// TEST FUNCTIONS
|
224 |
+
// ============================================================================
|
225 |
+
|
226 |
+
async function testWithArray() {
|
227 |
+
console.log("=== Array Test ===\n");
|
228 |
+
|
229 |
+
const inputBuf = generateTestAudio(5000);
|
230 |
+
const byteNum = inputBuf.byteLength;
|
231 |
+
const sampleNum = byteNum / 2;
|
232 |
+
const totalAudioTime = sampleNum / 16.0;
|
233 |
+
const frameNum = Math.floor(sampleNum / HOP_SIZE);
|
234 |
+
|
235 |
+
console.log(`Audio info: ${byteNum} bytes, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`);
|
236 |
+
|
237 |
+
const outProbs = new Float32Array(frameNum);
|
238 |
+
const outFlags = new Int32Array(frameNum);
|
239 |
+
|
240 |
+
const processingTime = await processAudio(inputBuf, frameNum, outProbs, outFlags);
|
241 |
+
|
242 |
+
if (processingTime > 0) {
|
243 |
+
printResults(processingTime, totalAudioTime, outFlags, frameNum);
|
244 |
+
}
|
245 |
+
|
246 |
+
return 0;
|
247 |
+
}
|
248 |
+
|
249 |
+
// WAV File parsing utilities
|
250 |
+
function parseWAVHeader(buffer) {
|
251 |
+
if (buffer.length < 44) {
|
252 |
+
throw new Error('Invalid WAV file: too small');
|
253 |
+
}
|
254 |
+
|
255 |
+
// Check RIFF header
|
256 |
+
const riffHeader = buffer.toString('ascii', 0, 4);
|
257 |
+
if (riffHeader !== 'RIFF') {
|
258 |
+
throw new Error('Invalid WAV file: missing RIFF header');
|
259 |
+
}
|
260 |
+
|
261 |
+
// Check WAVE format
|
262 |
+
const waveHeader = buffer.toString('ascii', 8, 12);
|
263 |
+
if (waveHeader !== 'WAVE') {
|
264 |
+
throw new Error('Invalid WAV file: not WAVE format');
|
265 |
+
}
|
266 |
+
|
267 |
+
let offset = 12;
|
268 |
+
let dataOffset = -1;
|
269 |
+
let dataSize = 0;
|
270 |
+
let sampleRate = 0;
|
271 |
+
let channels = 0;
|
272 |
+
let bitsPerSample = 0;
|
273 |
+
|
274 |
+
// Parse chunks
|
275 |
+
while (offset < buffer.length - 8) {
|
276 |
+
const chunkId = buffer.toString('ascii', offset, offset + 4);
|
277 |
+
const chunkSize = buffer.readUInt32LE(offset + 4);
|
278 |
+
|
279 |
+
if (chunkId === 'fmt ') {
|
280 |
+
// Format chunk
|
281 |
+
const audioFormat = buffer.readUInt16LE(offset + 8);
|
282 |
+
channels = buffer.readUInt16LE(offset + 10);
|
283 |
+
sampleRate = buffer.readUInt32LE(offset + 12);
|
284 |
+
bitsPerSample = buffer.readUInt16LE(offset + 22);
|
285 |
+
|
286 |
+
if (audioFormat !== 1) {
|
287 |
+
throw new Error('Unsupported WAV format: only PCM is supported');
|
288 |
+
}
|
289 |
+
|
290 |
+
if (bitsPerSample !== 16) {
|
291 |
+
throw new Error('Unsupported bit depth: only 16-bit is supported');
|
292 |
+
}
|
293 |
+
} else if (chunkId === 'data') {
|
294 |
+
// Data chunk
|
295 |
+
dataOffset = offset + 8;
|
296 |
+
dataSize = chunkSize;
|
297 |
+
break;
|
298 |
+
}
|
299 |
+
|
300 |
+
offset += 8 + chunkSize;
|
301 |
+
// Align to even byte boundary
|
302 |
+
if (chunkSize % 2 === 1) {
|
303 |
+
offset++;
|
304 |
+
}
|
305 |
+
}
|
306 |
+
|
307 |
+
if (dataOffset === -1) {
|
308 |
+
throw new Error('Invalid WAV file: no data chunk found');
|
309 |
+
}
|
310 |
+
|
311 |
+
return {
|
312 |
+
sampleRate,
|
313 |
+
channels,
|
314 |
+
bitsPerSample,
|
315 |
+
dataOffset,
|
316 |
+
dataSize,
|
317 |
+
totalSamples: dataSize / (bitsPerSample / 8),
|
318 |
+
samplesPerChannel: dataSize / (bitsPerSample / 8) / channels
|
319 |
+
};
|
320 |
+
}
|
321 |
+
|
322 |
+
async function testWithWAV(inputFile, outputFile) {
|
323 |
+
console.log("=== WAV File Test ===\n");
|
324 |
+
|
325 |
+
if (!fs.existsSync(inputFile)) {
|
326 |
+
console.error(`Input file not found: ${inputFile}`);
|
327 |
+
return 1;
|
328 |
+
}
|
329 |
+
|
330 |
+
try {
|
331 |
+
const buffer = fs.readFileSync(inputFile);
|
332 |
+
|
333 |
+
// Parse WAV header properly
|
334 |
+
const wavInfo = parseWAVHeader(buffer);
|
335 |
+
console.log(`WAV Format: ${wavInfo.channels} channel(s), ${wavInfo.sampleRate}Hz, ${wavInfo.bitsPerSample}-bit`);
|
336 |
+
console.log(`Total samples: ${wavInfo.totalSamples}, samples per channel: ${wavInfo.samplesPerChannel}`);
|
337 |
+
|
338 |
+
// Validate format requirements
|
339 |
+
if (wavInfo.sampleRate !== 16000) {
|
340 |
+
console.warn(`Warning: Sample rate is ${wavInfo.sampleRate}Hz, expected 16000Hz`);
|
341 |
+
}
|
342 |
+
|
343 |
+
if (wavInfo.channels !== 1) {
|
344 |
+
console.warn(`Warning: ${wavInfo.channels} channels detected, only first channel will be used`);
|
345 |
+
}
|
346 |
+
|
347 |
+
// Extract audio data
|
348 |
+
const audioBuffer = buffer.slice(wavInfo.dataOffset, wavInfo.dataOffset + wavInfo.dataSize);
|
349 |
+
const inputBuf = new Int16Array(audioBuffer.buffer.slice(audioBuffer.byteOffset));
|
350 |
+
|
351 |
+
// Calculate correct sample number (for mono audio)
|
352 |
+
const sampleNum = wavInfo.channels === 1 ?
|
353 |
+
wavInfo.samplesPerChannel :
|
354 |
+
Math.floor(wavInfo.samplesPerChannel); // Use only first channel if stereo
|
355 |
+
|
356 |
+
const totalAudioTime = sampleNum / wavInfo.sampleRate * 1000; // in milliseconds
|
357 |
+
const frameNum = Math.floor(sampleNum / HOP_SIZE);
|
358 |
+
|
359 |
+
console.log(`Audio info: ${audioBuffer.length} bytes, ${sampleNum} samples, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`);
|
360 |
+
|
361 |
+
// If stereo, extract only the first channel
|
362 |
+
let processedInput = inputBuf;
|
363 |
+
if (wavInfo.channels > 1) {
|
364 |
+
console.log(`Extracting mono from ${wavInfo.channels} channels...`);
|
365 |
+
processedInput = new Int16Array(Math.floor(inputBuf.length / wavInfo.channels));
|
366 |
+
for (let i = 0; i < processedInput.length; i++) {
|
367 |
+
processedInput[i] = inputBuf[i * wavInfo.channels]; // Take first channel
|
368 |
+
}
|
369 |
+
}
|
370 |
+
|
371 |
+
const outProbs = new Float32Array(frameNum);
|
372 |
+
const outFlags = new Int32Array(frameNum);
|
373 |
+
|
374 |
+
const processingTime = await processAudio(processedInput, frameNum, outProbs, outFlags);
|
375 |
+
|
376 |
+
if (processingTime > 0) {
|
377 |
+
printResults(processingTime, totalAudioTime, outFlags, frameNum);
|
378 |
+
saveResults(outProbs, outFlags, frameNum, outputFile);
|
379 |
+
}
|
380 |
+
|
381 |
+
return 0;
|
382 |
+
} catch (error) {
|
383 |
+
console.error(`Error processing WAV file: ${error.message}`);
|
384 |
+
return 1;
|
385 |
+
}
|
386 |
+
}
|
387 |
+
|
388 |
+
async function runBenchmark() {
|
389 |
+
console.log("=== Performance Benchmark ===\n");
|
390 |
+
|
391 |
+
if (!createVADInstance()) return;
|
392 |
+
|
393 |
+
const testData = new Int16Array(HOP_SIZE);
|
394 |
+
for (let i = 0; i < HOP_SIZE; i++) {
|
395 |
+
testData[i] = Math.sin(2 * Math.PI * 440 * i / 16000) * 8000;
|
396 |
+
}
|
397 |
+
|
398 |
+
const testCases = [100, 1000, 10000];
|
399 |
+
|
400 |
+
for (const numFrames of testCases) {
|
401 |
+
const audioPtr = vadModule._malloc(HOP_SIZE * 2);
|
402 |
+
const probPtr = vadModule._malloc(4);
|
403 |
+
const flagPtr = vadModule._malloc(4);
|
404 |
+
|
405 |
+
vadModule.HEAP16.set(testData, audioPtr / 2);
|
406 |
+
|
407 |
+
const startTime = getTimestamp();
|
408 |
+
|
409 |
+
for (let i = 0; i < numFrames; i++) {
|
410 |
+
vadModule._ten_vad_process(vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr);
|
411 |
+
}
|
412 |
+
|
413 |
+
const endTime = getTimestamp();
|
414 |
+
const totalTime = endTime - startTime;
|
415 |
+
const avgTime = totalTime / numFrames;
|
416 |
+
|
417 |
+
// Calculate RTF (Real-time Factor)
|
418 |
+
// Each frame represents 16ms of audio (HOP_SIZE=256 samples at 16kHz)
|
419 |
+
const frameAudioTime = (HOP_SIZE / 16000) * 1000; // 16ms
|
420 |
+
const totalAudioTime = numFrames * frameAudioTime;
|
421 |
+
const rtf = totalTime / totalAudioTime;
|
422 |
+
|
423 |
+
console.log(`${numFrames} frames: ${totalTime}ms total, ${avgTime.toFixed(3)}ms/frame, RTF: ${rtf.toFixed(3)}`);
|
424 |
+
|
425 |
+
vadModule._free(audioPtr);
|
426 |
+
vadModule._free(probPtr);
|
427 |
+
vadModule._free(flagPtr);
|
428 |
+
}
|
429 |
+
|
430 |
+
destroyVADInstance();
|
431 |
+
}
|
432 |
+
|
433 |
+
// ============================================================================
|
434 |
+
// MODULE INITIALIZATION
|
435 |
+
// ============================================================================
|
436 |
+
|
437 |
+
async function loadModule() {
|
438 |
+
try {
|
439 |
+
console.log("Loading WebAssembly module...");
|
440 |
+
|
441 |
+
if (!fs.existsSync(WASM_JS_FILE)) {
|
442 |
+
throw new Error(`ten_vad.js not found at ${WASM_JS_FILE}`);
|
443 |
+
}
|
444 |
+
|
445 |
+
if (!fs.existsSync(WASM_BINARY_FILE)) {
|
446 |
+
throw new Error(`ten_vad.wasm not found at ${WASM_BINARY_FILE}`);
|
447 |
+
}
|
448 |
+
|
449 |
+
// Read and modify the module file for Node.js compatibility
|
450 |
+
const wasmJsContent = fs.readFileSync(WASM_JS_FILE, 'utf8');
|
451 |
+
const modifiedContent = wasmJsContent
|
452 |
+
.replace(/import\.meta\.url/g, `"${path.resolve(WASM_JS_FILE)}"`)
|
453 |
+
.replace(/export default createVADModule;/, 'module.exports = createVADModule;');
|
454 |
+
|
455 |
+
// Write temporary file
|
456 |
+
const tempPath = './ten_vad_temp.js';
|
457 |
+
fs.writeFileSync(tempPath, modifiedContent);
|
458 |
+
|
459 |
+
// Load WASM binary
|
460 |
+
const wasmBinary = fs.readFileSync(WASM_BINARY_FILE);
|
461 |
+
|
462 |
+
// Load module
|
463 |
+
const createVADModule = require(path.resolve(tempPath));
|
464 |
+
vadModule = await createVADModule({
|
465 |
+
wasmBinary: wasmBinary,
|
466 |
+
locateFile: (filePath) => filePath.endsWith('.wasm') ? WASM_BINARY_FILE : filePath,
|
467 |
+
noInitialRun: false,
|
468 |
+
noExitRuntime: true
|
469 |
+
});
|
470 |
+
|
471 |
+
// Cleanup
|
472 |
+
fs.unlinkSync(tempPath);
|
473 |
+
|
474 |
+
// Add missing helper functions
|
475 |
+
addHelperFunctions();
|
476 |
+
|
477 |
+
console.log(`Module loaded successfully. Version: ${getVADVersion()}\n`);
|
478 |
+
return true;
|
479 |
+
|
480 |
+
} catch (error) {
|
481 |
+
console.error(`Failed to load module: ${error.message}`);
|
482 |
+
return false;
|
483 |
+
}
|
484 |
+
}
|
485 |
+
|
486 |
+
// ============================================================================
|
487 |
+
// MAIN FUNCTION
|
488 |
+
// ============================================================================
|
489 |
+
|
490 |
+
async function main() {
|
491 |
+
const args = process.argv.slice(2);
|
492 |
+
|
493 |
+
// Initialize module
|
494 |
+
if (!await loadModule()) {
|
495 |
+
process.exit(1);
|
496 |
+
}
|
497 |
+
|
498 |
+
try {
|
499 |
+
if (args.length >= 2) {
|
500 |
+
// Test with WAV file
|
501 |
+
const [inputFile, outputFile] = args;
|
502 |
+
console.log(`Input: ${inputFile}, Output: ${outputFile}\n`);
|
503 |
+
await testWithWAV(inputFile, outputFile);
|
504 |
+
} else {
|
505 |
+
// Test with generated array
|
506 |
+
await testWithArray();
|
507 |
+
}
|
508 |
+
await runBenchmark();
|
509 |
+
return 0;
|
510 |
+
} catch (error) {
|
511 |
+
console.error(`Test failed: ${error.message}`);
|
512 |
+
return 1;
|
513 |
+
}
|
514 |
+
}
|
515 |
+
|
516 |
+
// ============================================================================
|
517 |
+
// EXECUTION
|
518 |
+
// ============================================================================
|
519 |
+
|
520 |
+
if (require.main === module) {
|
521 |
+
main().then(exitCode => {
|
522 |
+
process.exit(exitCode);
|
523 |
+
}).catch(error => {
|
524 |
+
console.error(`Fatal error: ${error.message}`);
|
525 |
+
process.exit(1);
|
526 |
+
});
|
527 |
+
}
|
528 |
+
|
529 |
+
module.exports = { main, testWithArray, testWithWAV, runBenchmark };
|
examples_onnx/CMakeLists.txt
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright © 2025 Agora
|
3 |
+
# This file is part of TEN Framework, an open source project.
|
4 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
#
|
7 |
+
cmake_minimum_required(VERSION 3.10)
|
8 |
+
get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
|
9 |
+
|
10 |
+
project(ten_vad)
|
11 |
+
|
12 |
+
set(CMAKE_BUILD_TYPE Release)
|
13 |
+
add_compile_options(-Wno-write-strings -Wno-unused-result)
|
14 |
+
include_directories(${ROOT}/src)
|
15 |
+
include_directories(${ROOT}/include)
|
16 |
+
include_directories(${ORT_ROOT}/include)
|
17 |
+
file(GLOB LIBRARY_SOURCES "${ROOT}/src/*.cc" "${ROOT}/src/*.c")
|
18 |
+
add_library(ten_vad SHARED ${LIBRARY_SOURCES})
|
19 |
+
link_directories(${ORT_ROOT}/lib)
|
20 |
+
target_link_libraries(ten_vad "${ORT_ROOT}/lib/libonnxruntime.so")
|
21 |
+
|
22 |
+
set(EXECUTABLE_SOURCES ${ROOT}/examples/main.c)
|
23 |
+
add_executable(ten_vad_demo ${EXECUTABLE_SOURCES})
|
24 |
+
target_link_libraries(ten_vad_demo ten_vad)
|
examples_onnx/build-and-deploy-linux.sh
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#
|
3 |
+
# Copyright © 2025 Agora
|
4 |
+
# This file is part of TEN Framework, an open source project.
|
5 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
6 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
7 |
+
#
|
8 |
+
set -euo pipefail
|
9 |
+
|
10 |
+
if [[ "$#" -lt 2 || "$1" != "--ort-path" ]]; then
|
11 |
+
echo "usage: $0 --ort-path <path_to_onnxruntime>" >&2
|
12 |
+
exit 1
|
13 |
+
fi
|
14 |
+
|
15 |
+
ORT_ROOT="$2"
|
16 |
+
shift 2
|
17 |
+
|
18 |
+
if [[ ! -d "$ORT_ROOT" || ! -d "$ORT_ROOT/lib" || ! -d "$ORT_ROOT/include" ]]; then
|
19 |
+
echo "invalid onnxruntime library path: $ORT_ROOT" >&2
|
20 |
+
exit 1
|
21 |
+
fi
|
22 |
+
|
23 |
+
arch=x64
|
24 |
+
build_dir=build-linux/$arch
|
25 |
+
rm -rf $build_dir
|
26 |
+
mkdir -p $build_dir
|
27 |
+
cd $build_dir
|
28 |
+
|
29 |
+
# Step 1: Build the demo
|
30 |
+
cmake ../../ -DORT_ROOT="$ORT_ROOT"
|
31 |
+
cmake --build . --config Release
|
32 |
+
|
33 |
+
# Step 2: Run the demo
|
34 |
+
ln -s ../../../src/onnx_model/
|
35 |
+
./ten_vad_demo ../../../examples/s0724-s0730.wav out.txt
|
36 |
+
|
37 |
+
cd ../../
|
include/ten_vad.h
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
//
|
|
|
2 |
// This file is part of TEN Framework, an open source project.
|
3 |
-
// Licensed under the Apache License, Version 2.0.
|
4 |
-
//
|
5 |
//
|
6 |
#ifndef TEN_VAD_H
|
7 |
#define TEN_VAD_H
|
@@ -83,4 +84,4 @@ extern "C"
|
|
83 |
}
|
84 |
#endif
|
85 |
|
86 |
-
#endif /* TEN_VAD_H */
|
|
|
1 |
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
//
|
7 |
#ifndef TEN_VAD_H
|
8 |
#define TEN_VAD_H
|
|
|
84 |
}
|
85 |
#endif
|
86 |
|
87 |
+
#endif /* TEN_VAD_H */
|
include/ten_vad.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
#
|
2 |
-
#
|
3 |
-
#
|
4 |
-
#
|
|
|
5 |
#
|
6 |
from ctypes import c_int, c_int32, c_float, c_size_t, CDLL, c_void_p, POINTER
|
7 |
import numpy as np
|
|
|
1 |
#
|
2 |
+
# Copyright © 2025 Agora
|
3 |
+
# This file is part of TEN Framework, an open source project.
|
4 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
6 |
#
|
7 |
from ctypes import c_int, c_int32, c_float, c_size_t, CDLL, c_void_p, POINTER
|
8 |
import numpy as np
|
lib/Web/ten_vad.d.ts
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/**
|
2 |
+
* This file is part of TEN Framework, an open source project.
|
3 |
+
* Licensed under the Apache License, Version 2.0.
|
4 |
+
* See the LICENSE file for more information.
|
5 |
+
*
|
6 |
+
* TEN VAD (Voice Activity Detection) WebAssembly Module
|
7 |
+
* TypeScript type definitions
|
8 |
+
*/
|
9 |
+
|
10 |
+
export interface TenVADModule {
|
11 |
+
/**
|
12 |
+
* Create and initialize a VAD instance
|
13 |
+
* @param handlePtr Pointer to store the VAD handle
|
14 |
+
* @param hopSize Number of samples between consecutive analysis frames (e.g., 256)
|
15 |
+
* @param threshold VAD detection threshold [0.0, 1.0]
|
16 |
+
* @returns 0 on success, -1 on error
|
17 |
+
*/
|
18 |
+
_ten_vad_create(handlePtr: number, hopSize: number, threshold: number): number;
|
19 |
+
|
20 |
+
/**
|
21 |
+
* Process audio frame for voice activity detection
|
22 |
+
* @param handle Valid VAD handle from ten_vad_create
|
23 |
+
* @param audioDataPtr Pointer to int16 audio samples array
|
24 |
+
* @param audioDataLength Length of audio data (should equal hopSize)
|
25 |
+
* @param outProbabilityPtr Pointer to output probability [0.0, 1.0]
|
26 |
+
* @param outFlagPtr Pointer to output flag (0: no voice, 1: voice detected)
|
27 |
+
* @returns 0 on success, -1 on error
|
28 |
+
*/
|
29 |
+
_ten_vad_process(
|
30 |
+
handle: number,
|
31 |
+
audioDataPtr: number,
|
32 |
+
audioDataLength: number,
|
33 |
+
outProbabilityPtr: number,
|
34 |
+
outFlagPtr: number
|
35 |
+
): number;
|
36 |
+
|
37 |
+
/**
|
38 |
+
* Destroy VAD instance and release resources
|
39 |
+
* @param handlePtr Pointer to the VAD handle
|
40 |
+
* @returns 0 on success, -1 on error
|
41 |
+
*/
|
42 |
+
_ten_vad_destroy(handlePtr: number): number;
|
43 |
+
|
44 |
+
/**
|
45 |
+
* Get library version string
|
46 |
+
* @returns Version string pointer
|
47 |
+
*/
|
48 |
+
_ten_vad_get_version(): number;
|
49 |
+
|
50 |
+
// WebAssembly Memory Management
|
51 |
+
_malloc(size: number): number;
|
52 |
+
_free(ptr: number): void;
|
53 |
+
|
54 |
+
// Memory access helpers
|
55 |
+
HEAP16: Int16Array;
|
56 |
+
HEAPF32: Float32Array;
|
57 |
+
HEAP32: Int32Array;
|
58 |
+
HEAPU8: Uint8Array;
|
59 |
+
|
60 |
+
// Value access methods
|
61 |
+
getValue(ptr: number, type: 'i8' | 'i16' | 'i32' | 'float' | 'double'): number;
|
62 |
+
setValue(ptr: number, value: number, type: 'i8' | 'i16' | 'i32' | 'float' | 'double'): void;
|
63 |
+
|
64 |
+
// String utilities
|
65 |
+
UTF8ToString(ptr: number): string;
|
66 |
+
lengthBytesUTF8(str: string): number;
|
67 |
+
stringToUTF8(str: string, outPtr: number, maxBytesToWrite: number): void;
|
68 |
+
}
|
69 |
+
|
70 |
+
/**
|
71 |
+
* High-level TypeScript wrapper for TEN VAD
|
72 |
+
*/
|
73 |
+
export class TenVAD {
|
74 |
+
private module: TenVADModule;
|
75 |
+
private handle: number | null;
|
76 |
+
private hopSize: number;
|
77 |
+
|
78 |
+
constructor(module: TenVADModule, hopSize: number, threshold: number);
|
79 |
+
|
80 |
+
/**
|
81 |
+
* Process audio samples for voice activity detection
|
82 |
+
* @param audioData Int16Array of audio samples (length must equal hopSize)
|
83 |
+
* @returns Object with probability and voice detection flag
|
84 |
+
*/
|
85 |
+
process(audioData: Int16Array): {
|
86 |
+
probability: number;
|
87 |
+
isVoice: boolean;
|
88 |
+
} | null;
|
89 |
+
|
90 |
+
/**
|
91 |
+
* Get library version
|
92 |
+
*/
|
93 |
+
getVersion(): string;
|
94 |
+
|
95 |
+
/**
|
96 |
+
* Destroy VAD instance
|
97 |
+
*/
|
98 |
+
destroy(): void;
|
99 |
+
|
100 |
+
/**
|
101 |
+
* Check if VAD instance is valid
|
102 |
+
*/
|
103 |
+
isValid(): boolean;
|
104 |
+
}
|
105 |
+
|
106 |
+
/**
|
107 |
+
* Create TEN VAD WebAssembly module
|
108 |
+
*/
|
109 |
+
declare function createVADModule(): Promise<TenVADModule>;
|
110 |
+
|
111 |
+
export default createVADModule;
|
lib/Web/ten_vad.js
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
var createVADModule = (() => {
|
3 |
+
var _scriptDir = import.meta.url;
|
4 |
+
|
5 |
+
return (
|
6 |
+
function(createVADModule) {
|
7 |
+
createVADModule = createVADModule || {};
|
8 |
+
|
9 |
+
|
10 |
+
var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
|
11 |
+
if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
|
12 |
+
var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
|
13 |
+
function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
|
14 |
+
function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
|
15 |
+
var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
|
16 |
+
X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
|
17 |
+
b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
|
18 |
+
(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
|
19 |
+
b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
|
20 |
+
a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
|
21 |
+
a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
|
22 |
+
function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
|
23 |
+
if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
|
24 |
+
|
25 |
+
|
26 |
+
return createVADModule.ready
|
27 |
+
}
|
28 |
+
);
|
29 |
+
})();
|
30 |
+
export default createVADModule;
|
lib/{macOS/ten_vad.framework/Versions/Current/Headers/ten_vad.h → Web/ten_vad.wasm}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ec0b9640683987e15a4e54e4ce5642b2447c6e5d82b1be889b5099c75434fc3
|
3 |
+
size 283349
|
lib/Windows/x64/ten_vad.lib
CHANGED
Binary files a/lib/Windows/x64/ten_vad.lib and b/lib/Windows/x64/ten_vad.lib differ
|
|
lib/Windows/x86/ten_vad.lib
CHANGED
Binary files a/lib/Windows/x86/ten_vad.lib and b/lib/Windows/x86/ten_vad.lib differ
|
|
lib/iOS/ten_vad.framework/Headers/ten_vad.h
CHANGED
@@ -1,3 +1,90 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* @file ten_vad.h
|
3 |
+
* @brief Ten Voice Activity Detection (ten_vad) C API
|
4 |
+
* Version: 1.0.0
|
5 |
+
*
|
6 |
+
* Provides functions to create, process, and destroy a VAD instance.
|
7 |
+
*/
|
8 |
+
#ifndef TEN_VAD_H
|
9 |
+
#define TEN_VAD_H
|
10 |
+
|
11 |
+
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
|
12 |
+
#define TENVAD_API __attribute__((visibility("default")))
|
13 |
+
#elif defined(_WIN32) || defined(__CYGWIN__)
|
14 |
+
/**
|
15 |
+
* @def TENVAD_API
|
16 |
+
* @brief Export/import macro for ten_vad shared library symbols.
|
17 |
+
*/
|
18 |
+
#ifdef TENVAD_EXPORTS
|
19 |
+
#define TENVAD_API __declspec(dllexport)
|
20 |
+
#else
|
21 |
+
#define TENVAD_API __declspec(dllimport)
|
22 |
+
#endif
|
23 |
+
#else
|
24 |
+
#define TENVAD_API
|
25 |
+
#endif
|
26 |
+
|
27 |
+
#include <stddef.h> /* size_t */
|
28 |
+
#include <stdint.h> /* int16_t */
|
29 |
+
|
30 |
+
#ifdef __cplusplus
|
31 |
+
extern "C"
|
32 |
+
{
|
33 |
+
#endif
|
34 |
+
|
35 |
+
/**
|
36 |
+
* @typedef ten_vad_handle
|
37 |
+
* @brief Opaque handle for ten_vad instance.
|
38 |
+
*/
|
39 |
+
typedef void *ten_vad_handle_t;
|
40 |
+
|
41 |
+
/**
|
42 |
+
* @brief Create and initialize a ten_vad instance.
|
43 |
+
*
|
44 |
+
* @param[out] handle Pointer to receive the vad handle.
|
45 |
+
* @param[in] hop_size The number of samples between the start points of
|
46 |
+
* two consecutive analysis frames. (e.g., 256).
|
47 |
+
* @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
|
48 |
+
* (default: 0.5).
|
49 |
+
* @return 0 on success, or -1 error occurs.
|
50 |
+
*/
|
51 |
+
TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
|
52 |
+
float threshold);
|
53 |
+
|
54 |
+
/**
|
55 |
+
* @brief Process one audio frame for voice activity detection.
|
56 |
+
* Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
|
57 |
+
*
|
58 |
+
* @param[in] handle Valid VAD handle returned by ten_vad_create().
|
59 |
+
* @param[in] audio_data Pointer to an array of int16_t samples,
|
60 |
+
* buffer length must equal the hop size specified at ten_vad_create.
|
61 |
+
* @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
|
62 |
+
* @param[out] out_probability Pointer to a float (size 1) that receives the
|
63 |
+
* voice activity probability in the range [0.0, 1.0].
|
64 |
+
* @param[out] out_flag Pointer to an int (size 1) that receives the
|
65 |
+
* detection result: 0 = no voice, 1 = voice detected.
|
66 |
+
* @return 0 on success, or -1 error occurs.
|
67 |
+
*/
|
68 |
+
TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
|
69 |
+
float *out_probability, int *out_flag);
|
70 |
+
|
71 |
+
/**
|
72 |
+
* @brief Destroy a ten_vad instance and release its resources.
|
73 |
+
*
|
74 |
+
* @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
|
75 |
+
* @return 0 on success, or -1 error occurs.
|
76 |
+
*/
|
77 |
+
TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
|
78 |
+
|
79 |
+
/**
|
80 |
+
* @brief Get the ten_vad library version string.
|
81 |
+
*
|
82 |
+
* @return The version string (e.g., "1.0.0").
|
83 |
+
*/
|
84 |
+
TENVAD_API const char *ten_vad_get_version(void);
|
85 |
+
|
86 |
+
#ifdef __cplusplus
|
87 |
+
}
|
88 |
+
#endif
|
89 |
+
|
90 |
+
#endif /* TEN_VAD_H */
|
lib/iOS/ten_vad.framework/Info.plist
CHANGED
Binary files a/lib/iOS/ten_vad.framework/Info.plist and b/lib/iOS/ten_vad.framework/Info.plist differ
|
|
lib/iOS/ten_vad.framework/Modules/module.modulemap
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
1 |
+
framework module ten_vad {
|
2 |
+
umbrella header "ten_vad.h"
|
3 |
+
export *
|
4 |
+
module * { export * }
|
5 |
+
}
|
lib/macOS/ten_vad.framework/Headers
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Versions/Current/Headers
|
lib/macOS/ten_vad.framework/Headers/ten_vad.h
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
|
3 |
-
size 2885
|
|
|
|
|
|
|
|
lib/macOS/ten_vad.framework/Resources
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Versions/Current/Resources
|
lib/macOS/ten_vad.framework/Resources/Info.plist
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5aa8df4f544b3143b819d6ffd5c21574c02884bf41cb2b7a8df45c7f10f75c3a
|
3 |
-
size 1216
|
|
|
|
|
|
|
|
lib/macOS/ten_vad.framework/Versions/A/Headers/ten_vad.h
CHANGED
@@ -1,3 +1,90 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* @file ten_vad.h
|
3 |
+
* @brief Ten Voice Activity Detection (ten_vad) C API
|
4 |
+
* Version: 1.0.0
|
5 |
+
*
|
6 |
+
* Provides functions to create, process, and destroy a VAD instance.
|
7 |
+
*/
|
8 |
+
#ifndef TEN_VAD_H
|
9 |
+
#define TEN_VAD_H
|
10 |
+
|
11 |
+
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
|
12 |
+
#define TENVAD_API __attribute__((visibility("default")))
|
13 |
+
#elif defined(_WIN32) || defined(__CYGWIN__)
|
14 |
+
/**
|
15 |
+
* @def TENVAD_API
|
16 |
+
* @brief Export/import macro for ten_vad shared library symbols.
|
17 |
+
*/
|
18 |
+
#ifdef TENVAD_EXPORTS
|
19 |
+
#define TENVAD_API __declspec(dllexport)
|
20 |
+
#else
|
21 |
+
#define TENVAD_API __declspec(dllimport)
|
22 |
+
#endif
|
23 |
+
#else
|
24 |
+
#define TENVAD_API
|
25 |
+
#endif
|
26 |
+
|
27 |
+
#include <stddef.h> /* size_t */
|
28 |
+
#include <stdint.h> /* int16_t */
|
29 |
+
|
30 |
+
#ifdef __cplusplus
|
31 |
+
extern "C"
|
32 |
+
{
|
33 |
+
#endif
|
34 |
+
|
35 |
+
/**
|
36 |
+
* @typedef ten_vad_handle
|
37 |
+
* @brief Opaque handle for ten_vad instance.
|
38 |
+
*/
|
39 |
+
typedef void *ten_vad_handle_t;
|
40 |
+
|
41 |
+
/**
|
42 |
+
* @brief Create and initialize a ten_vad instance.
|
43 |
+
*
|
44 |
+
* @param[out] handle Pointer to receive the vad handle.
|
45 |
+
* @param[in] hop_size The number of samples between the start points of
|
46 |
+
* two consecutive analysis frames. (e.g., 256).
|
47 |
+
* @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
|
48 |
+
* (default: 0.5).
|
49 |
+
* @return 0 on success, or -1 error occurs.
|
50 |
+
*/
|
51 |
+
TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
|
52 |
+
float threshold);
|
53 |
+
|
54 |
+
/**
|
55 |
+
* @brief Process one audio frame for voice activity detection.
|
56 |
+
* Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
|
57 |
+
*
|
58 |
+
* @param[in] handle Valid VAD handle returned by ten_vad_create().
|
59 |
+
* @param[in] audio_data Pointer to an array of int16_t samples,
|
60 |
+
* buffer length must equal the hop size specified at ten_vad_create.
|
61 |
+
* @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
|
62 |
+
* @param[out] out_probability Pointer to a float (size 1) that receives the
|
63 |
+
* voice activity probability in the range [0.0, 1.0].
|
64 |
+
* @param[out] out_flag Pointer to an int (size 1) that receives the
|
65 |
+
* detection result: 0 = no voice, 1 = voice detected.
|
66 |
+
* @return 0 on success, or -1 error occurs.
|
67 |
+
*/
|
68 |
+
TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
|
69 |
+
float *out_probability, int *out_flag);
|
70 |
+
|
71 |
+
/**
|
72 |
+
* @brief Destroy a ten_vad instance and release its resources.
|
73 |
+
*
|
74 |
+
* @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
|
75 |
+
* @return 0 on success, or -1 error occurs.
|
76 |
+
*/
|
77 |
+
TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
|
78 |
+
|
79 |
+
/**
|
80 |
+
* @brief Get the ten_vad library version string.
|
81 |
+
*
|
82 |
+
* @return The version string (e.g., "1.0.0").
|
83 |
+
*/
|
84 |
+
TENVAD_API const char *ten_vad_get_version(void);
|
85 |
+
|
86 |
+
#ifdef __cplusplus
|
87 |
+
}
|
88 |
+
#endif
|
89 |
+
|
90 |
+
#endif /* TEN_VAD_H */
|
lib/macOS/ten_vad.framework/Versions/A/Resources/Info.plist
CHANGED
@@ -1,3 +1,44 @@
|
|
1 |
-
version
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
3 |
+
<plist version="1.0">
|
4 |
+
<dict>
|
5 |
+
<key>BuildMachineOSBuild</key>
|
6 |
+
<string>23D60</string>
|
7 |
+
<key>CFBundleDevelopmentRegion</key>
|
8 |
+
<string>English</string>
|
9 |
+
<key>CFBundleExecutable</key>
|
10 |
+
<string>ten_vad</string>
|
11 |
+
<key>CFBundleIdentifier</key>
|
12 |
+
<string>com.yourcompany.ten_vad</string>
|
13 |
+
<key>CFBundleInfoDictionaryVersion</key>
|
14 |
+
<string>6.0</string>
|
15 |
+
<key>CFBundlePackageType</key>
|
16 |
+
<string>FMWK</string>
|
17 |
+
<key>CFBundleSignature</key>
|
18 |
+
<string>????</string>
|
19 |
+
<key>CFBundleSupportedPlatforms</key>
|
20 |
+
<array>
|
21 |
+
<string>MacOSX</string>
|
22 |
+
</array>
|
23 |
+
<key>CSResourcesFileMapped</key>
|
24 |
+
<true/>
|
25 |
+
<key>DTCompiler</key>
|
26 |
+
<string>com.apple.compilers.llvm.clang.1_0</string>
|
27 |
+
<key>DTPlatformBuild</key>
|
28 |
+
<string></string>
|
29 |
+
<key>DTPlatformName</key>
|
30 |
+
<string>macosx</string>
|
31 |
+
<key>DTPlatformVersion</key>
|
32 |
+
<string>14.2</string>
|
33 |
+
<key>DTSDKBuild</key>
|
34 |
+
<string>23C53</string>
|
35 |
+
<key>DTSDKName</key>
|
36 |
+
<string>macosx14.2</string>
|
37 |
+
<key>DTXcode</key>
|
38 |
+
<string>1520</string>
|
39 |
+
<key>DTXcodeBuild</key>
|
40 |
+
<string>15C500b</string>
|
41 |
+
<key>LSMinimumSystemVersion</key>
|
42 |
+
<string>10.10</string>
|
43 |
+
</dict>
|
44 |
+
</plist>
|
lib/macOS/ten_vad.framework/Versions/Current
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
A
|
lib/macOS/ten_vad.framework/Versions/Current/ten_vad
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:81b2de13710670bb94fef315ab50fedc903a21c04c4290c6c2ac28d8b42e715a
|
3 |
-
size 744600
|
|
|
|
|
|
|
|
lib/macOS/ten_vad.framework/ten_vad
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:81b2de13710670bb94fef315ab50fedc903a21c04c4290c6c2ac28d8b42e715a
|
3 |
-
size 744600
|
|
|
|
|
|
|
|
lib/macOS/ten_vad.framework/ten_vad
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Versions/Current/ten_vad
|
setup.py
CHANGED
@@ -1,3 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from setuptools import setup
|
2 |
import os, shutil
|
3 |
from setuptools.command.install import install
|
|
|
1 |
+
#
|
2 |
+
# Copyright © 2025 Agora
|
3 |
+
# This file is part of TEN Framework, an open source project.
|
4 |
+
# Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
# Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
#
|
7 |
from setuptools import setup
|
8 |
import os, shutil
|
9 |
from setuptools.command.install import install
|
src/aed.cc
ADDED
@@ -0,0 +1,993 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#include <string.h>
|
8 |
+
#include <stdlib.h>
|
9 |
+
#include <algorithm>
|
10 |
+
#include <math.h>
|
11 |
+
#include "aed.h"
|
12 |
+
#include "aed_st.h"
|
13 |
+
#include "coeff.h"
|
14 |
+
#include "pitch_est.h"
|
15 |
+
#include "stft.h"
|
16 |
+
#include <assert.h>
|
17 |
+
|
18 |
+
#define AUP_AED_ALIGN8(o) (((o) + 7) & (~7))
|
19 |
+
#define AUP_AED_MAX(x, y) (((x) > (y)) ? (x) : (y))
|
20 |
+
#define AUP_AED_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
21 |
+
#define AUP_AED_EPS (1e-20f)
|
22 |
+
|
23 |
+
/// ///////////////////////////////////////////////////////////////////////
|
24 |
+
/// Internal Utils
|
25 |
+
/// ///////////////////////////////////////////////////////////////////////
|
26 |
+
|
27 |
+
AUP_MODULE_AIVAD::AUP_MODULE_AIVAD(char* onnx_path) {
|
28 |
+
ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
|
29 |
+
OrtStatus* status =
|
30 |
+
ort_api->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "TEN-VAD", &ort_env);
|
31 |
+
if (status) {
|
32 |
+
printf("Failed to create env: %s\n", ort_api->GetErrorMessage(status));
|
33 |
+
ort_api->ReleaseStatus(status);
|
34 |
+
ort_api->ReleaseEnv(ort_env);
|
35 |
+
ort_env = NULL;
|
36 |
+
return;
|
37 |
+
}
|
38 |
+
|
39 |
+
OrtSessionOptions* session_options;
|
40 |
+
ort_api->CreateSessionOptions(&session_options);
|
41 |
+
ort_api->SetIntraOpNumThreads(session_options, 1);
|
42 |
+
status =
|
43 |
+
ort_api->CreateSession(ort_env, onnx_path, session_options, &ort_session);
|
44 |
+
ort_api->ReleaseSessionOptions(session_options);
|
45 |
+
if (status) {
|
46 |
+
printf("Failed to create ort_session: %s\n",
|
47 |
+
ort_api->GetErrorMessage(status));
|
48 |
+
ort_api->ReleaseStatus(status);
|
49 |
+
ort_api->ReleaseEnv(ort_env);
|
50 |
+
ort_env = NULL;
|
51 |
+
return;
|
52 |
+
}
|
53 |
+
|
54 |
+
ort_api->GetAllocatorWithDefaultOptions(&ort_allocator);
|
55 |
+
size_t num_inputs;
|
56 |
+
ort_api->SessionGetInputCount(ort_session, &num_inputs);
|
57 |
+
assert(num_inputs == AUP_AED_MODEL_IO_NUM);
|
58 |
+
for (size_t i = 0; i < num_inputs; i++) {
|
59 |
+
char* input_name;
|
60 |
+
ort_api->SessionGetInputName(ort_session, i, ort_allocator, &input_name);
|
61 |
+
strncpy(input_names_buf[i], input_name, sizeof(input_names_buf[i]));
|
62 |
+
input_names[i] = input_names_buf[i];
|
63 |
+
ort_api->AllocatorFree(ort_allocator, input_name);
|
64 |
+
}
|
65 |
+
|
66 |
+
size_t num_outputs;
|
67 |
+
ort_api->SessionGetOutputCount(ort_session, &num_outputs);
|
68 |
+
assert(num_outputs == AUP_AED_MODEL_IO_NUM);
|
69 |
+
for (size_t i = 0; i < num_outputs; i++) {
|
70 |
+
char* output_name;
|
71 |
+
ort_api->SessionGetOutputName(ort_session, i, ort_allocator, &output_name);
|
72 |
+
strncpy(output_names_buf[i], output_name, sizeof(output_names_buf[i]));
|
73 |
+
output_names[i] = output_names_buf[i];
|
74 |
+
ort_api->AllocatorFree(ort_allocator, output_name);
|
75 |
+
}
|
76 |
+
|
77 |
+
OrtMemoryInfo* memory_info;
|
78 |
+
status = ort_api->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault,
|
79 |
+
&memory_info);
|
80 |
+
if (status != NULL) {
|
81 |
+
printf("Failed to create memory info: %s\n",
|
82 |
+
ort_api->GetErrorMessage(status));
|
83 |
+
ort_api->ReleaseStatus(status);
|
84 |
+
ort_api->ReleaseSession(ort_session);
|
85 |
+
ort_api->ReleaseEnv(ort_env);
|
86 |
+
ort_session = NULL;
|
87 |
+
ort_env = NULL;
|
88 |
+
return;
|
89 |
+
}
|
90 |
+
int64_t input_shapes0[] = {1, AUP_AED_CONTEXT_WINDOW_LEN, AUP_AED_FEA_LEN};
|
91 |
+
int64_t input_shapes1234[] = {1, AUP_AED_MODEL_HIDDEN_DIM};
|
92 |
+
for (int i = 0; i < num_inputs; i++) {
|
93 |
+
status = ort_api->CreateTensorWithDataAsOrtValue(
|
94 |
+
memory_info, i == 0 ? input_data_buf_0 : input_data_buf_1234[i - 1],
|
95 |
+
i == 0 ? sizeof(input_data_buf_0) : sizeof(input_data_buf_1234[i - 1]),
|
96 |
+
i == 0 ? input_shapes0 : input_shapes1234,
|
97 |
+
i == 0 ? sizeof(input_shapes0) / sizeof(input_shapes0[0])
|
98 |
+
: sizeof(input_shapes1234) / sizeof(input_shapes1234[0]),
|
99 |
+
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &ort_input_tensors[i]);
|
100 |
+
if (status != NULL) {
|
101 |
+
printf("Failed to create input tensor %d: %s\n", i,
|
102 |
+
ort_api->GetErrorMessage(status));
|
103 |
+
ort_api->ReleaseStatus(status);
|
104 |
+
ort_api->ReleaseSession(ort_session);
|
105 |
+
ort_api->ReleaseEnv(ort_env);
|
106 |
+
ort_session = NULL;
|
107 |
+
ort_env = NULL;
|
108 |
+
return;
|
109 |
+
}
|
110 |
+
}
|
111 |
+
|
112 |
+
int64_t output_shapes0[] = {1, 1, 1};
|
113 |
+
int64_t output_shapes1234[] = {1, AUP_AED_MODEL_HIDDEN_DIM};
|
114 |
+
for (int i = 0; i < num_outputs; i++) {
|
115 |
+
status = ort_api->CreateTensorAsOrtValue(
|
116 |
+
ort_allocator, i == 0 ? output_shapes0 : output_shapes1234,
|
117 |
+
i == 0 ? sizeof(output_shapes0) / sizeof(output_shapes0[0])
|
118 |
+
: sizeof(output_shapes1234) / sizeof(output_shapes1234[0]),
|
119 |
+
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &ort_output_tensors[i]);
|
120 |
+
if (status != NULL) {
|
121 |
+
printf("Failed to create output tensor %d: %s\n", i,
|
122 |
+
ort_api->GetErrorMessage(status));
|
123 |
+
ort_api->ReleaseStatus(status);
|
124 |
+
ort_api->ReleaseSession(ort_session);
|
125 |
+
ort_api->ReleaseEnv(ort_env);
|
126 |
+
ort_session = NULL;
|
127 |
+
ort_env = NULL;
|
128 |
+
return;
|
129 |
+
}
|
130 |
+
}
|
131 |
+
inited = 1;
|
132 |
+
}
|
133 |
+
|
134 |
+
AUP_MODULE_AIVAD::~AUP_MODULE_AIVAD() {
|
135 |
+
for (int i = 0; i < AUP_AED_MODEL_IO_NUM; i++) {
|
136 |
+
if (ort_output_tensors[i]) {
|
137 |
+
ort_api->ReleaseValue(ort_output_tensors[i]);
|
138 |
+
}
|
139 |
+
}
|
140 |
+
if (ort_session) {
|
141 |
+
ort_api->ReleaseSession(ort_session);
|
142 |
+
}
|
143 |
+
if (ort_env) {
|
144 |
+
ort_api->ReleaseEnv(ort_env);
|
145 |
+
}
|
146 |
+
}
|
147 |
+
|
148 |
+
int AUP_MODULE_AIVAD::Process(float* input, float* output) {
|
149 |
+
if (!inited) {
|
150 |
+
printf("not inited!\n");
|
151 |
+
return -1;
|
152 |
+
}
|
153 |
+
|
154 |
+
memcpy(input_data_buf_0, input, sizeof(input_data_buf_0));
|
155 |
+
if (clear_hidden) {
|
156 |
+
memset(input_data_buf_1234, 0, sizeof(input_data_buf_1234));
|
157 |
+
clear_hidden = 0;
|
158 |
+
}
|
159 |
+
OrtStatus* status = ort_api->Run(
|
160 |
+
ort_session, NULL, input_names, ort_input_tensors, AUP_AED_MODEL_IO_NUM,
|
161 |
+
output_names, AUP_AED_MODEL_IO_NUM, ort_output_tensors);
|
162 |
+
float* output_data;
|
163 |
+
ort_api->GetTensorMutableData(ort_output_tensors[0], (void**)&output_data);
|
164 |
+
*output = output_data[0];
|
165 |
+
for (int i = 1; i < AUP_AED_MODEL_IO_NUM; i++) {
|
166 |
+
ort_api->GetTensorMutableData(ort_output_tensors[i], (void**)&output_data);
|
167 |
+
memcpy(input_data_buf_1234[i - 1], output_data,
|
168 |
+
sizeof(input_data_buf_1234[i - 1]));
|
169 |
+
}
|
170 |
+
|
171 |
+
return 0;
|
172 |
+
}
|
173 |
+
|
174 |
+
int AUP_MODULE_AIVAD::Reset() {
|
175 |
+
if (!inited) {
|
176 |
+
return -1;
|
177 |
+
}
|
178 |
+
|
179 |
+
clear_hidden = 1;
|
180 |
+
return 0;
|
181 |
+
}
|
182 |
+
|
183 |
+
static int AUP_Aed_checkStatCfg(Aed_StaticCfg* pCfg) {
|
184 |
+
if (pCfg == NULL) {
|
185 |
+
return -1;
|
186 |
+
}
|
187 |
+
|
188 |
+
#if AUP_AED_FEA_LEN < AUP_AED_MEL_FILTER_BANK_NUM
|
189 |
+
return -1;
|
190 |
+
#endif
|
191 |
+
|
192 |
+
if (pCfg->hopSz < 32) {
|
193 |
+
return -1;
|
194 |
+
}
|
195 |
+
|
196 |
+
if (pCfg->frqInputAvailableFlag == 1) {
|
197 |
+
if (pCfg->fftSz < 128 || pCfg->fftSz < pCfg->hopSz) {
|
198 |
+
return -1;
|
199 |
+
}
|
200 |
+
if (pCfg->anaWindowSz > pCfg->fftSz || pCfg->anaWindowSz < pCfg->hopSz) {
|
201 |
+
return -1;
|
202 |
+
}
|
203 |
+
}
|
204 |
+
|
205 |
+
return 0;
|
206 |
+
}
|
207 |
+
|
208 |
+
static int AUP_Aed_publishStaticCfg(Aed_St* stHdl) {
|
209 |
+
const Aed_StaticCfg* pStatCfg;
|
210 |
+
|
211 |
+
if (stHdl == NULL) {
|
212 |
+
return -1;
|
213 |
+
}
|
214 |
+
pStatCfg = (const Aed_StaticCfg*)(&(stHdl->stCfg));
|
215 |
+
|
216 |
+
stHdl->extFftSz = 0;
|
217 |
+
stHdl->extNBins = 0;
|
218 |
+
stHdl->extWinSz = 0;
|
219 |
+
if (pStatCfg->frqInputAvailableFlag == 1) {
|
220 |
+
stHdl->extFftSz = pStatCfg->fftSz;
|
221 |
+
stHdl->extNBins = (stHdl->extFftSz >> 1) + 1;
|
222 |
+
stHdl->extWinSz = pStatCfg->anaWindowSz;
|
223 |
+
}
|
224 |
+
stHdl->extHopSz = pStatCfg->hopSz;
|
225 |
+
|
226 |
+
stHdl->intFftSz = AUP_AED_ASSUMED_FFTSZ;
|
227 |
+
stHdl->intHopSz = AUP_AED_ASSUMED_HOPSZ;
|
228 |
+
stHdl->intWinSz = AUP_AED_ASSUMED_WINDOWSZ;
|
229 |
+
stHdl->intNBins = (stHdl->intFftSz >> 1) + 1;
|
230 |
+
stHdl->intAnalyWindowPtr = AUP_AED_STFTWindow_Hann768;
|
231 |
+
|
232 |
+
if (pStatCfg->frqInputAvailableFlag == 0 ||
|
233 |
+
stHdl->extHopSz != stHdl->intHopSz) {
|
234 |
+
// external STFT analysis framework is not supported at all
|
235 |
+
stHdl->intAnalyFlag =
|
236 |
+
2; // internally redo analysis based on input time signal
|
237 |
+
} else if (stHdl->extFftSz == stHdl->intFftSz) {
|
238 |
+
// external STFT analysis framework completely match with internal
|
239 |
+
// requirement
|
240 |
+
stHdl->intAnalyFlag = 0; // directly use external spectrum
|
241 |
+
} else { // external spectrum need to be interpolated or extrapolated before
|
242 |
+
// AIVAD
|
243 |
+
stHdl->intAnalyFlag =
|
244 |
+
1; // use external spectrum with interpolation / exterpolation
|
245 |
+
}
|
246 |
+
stHdl->inputTimeFIFOLen = stHdl->extHopSz + stHdl->intHopSz;
|
247 |
+
|
248 |
+
// for aiaed release2.0.0, pre-emphasis for input time-signal is needed,
|
249 |
+
// therefore, we need redo analysis based on input time signal preprocessed by
|
250 |
+
// pre-emphasis.
|
251 |
+
stHdl->intAnalyFlag =
|
252 |
+
2; // internally redo analysis based on input time signal
|
253 |
+
|
254 |
+
stHdl->feaSz = (size_t)AUP_AED_FEA_LEN;
|
255 |
+
stHdl->melFbSz = (size_t)AUP_AED_MEL_FILTER_BANK_NUM;
|
256 |
+
stHdl->algDelay = (size_t)AUP_AED_LOOKAHEAD_NFRM;
|
257 |
+
stHdl->algCtxtSz = (size_t)AUP_AED_CONTEXT_WINDOW_LEN;
|
258 |
+
stHdl->frmRmsBufLen = AUP_AED_MAX(1, stHdl->algDelay);
|
259 |
+
|
260 |
+
return 0;
|
261 |
+
}
|
262 |
+
|
263 |
+
static int AUP_Aed_publishDynamCfg(Aed_St* stHdl) {
|
264 |
+
const Aed_DynamCfg* pDynmCfg;
|
265 |
+
PE_DynamCfg peDynmCfg;
|
266 |
+
if (stHdl == NULL) {
|
267 |
+
return -1;
|
268 |
+
}
|
269 |
+
|
270 |
+
pDynmCfg = (const Aed_DynamCfg*)(&(stHdl->dynamCfg));
|
271 |
+
stHdl->aivadResetFrmNum = pDynmCfg->resetFrameNum;
|
272 |
+
stHdl->voiceDecideThresh = pDynmCfg->extVoiceThr;
|
273 |
+
|
274 |
+
if (stHdl->pitchEstStPtr != NULL) {
|
275 |
+
peDynmCfg.voicedThr = pDynmCfg->pitchEstVoicedThr;
|
276 |
+
AUP_PE_setDynamCfg(stHdl->pitchEstStPtr, &peDynmCfg);
|
277 |
+
}
|
278 |
+
|
279 |
+
return 0;
|
280 |
+
}
|
281 |
+
|
282 |
+
static int AUP_Aed_resetVariables(Aed_St* stHdl) {
|
283 |
+
if (stHdl == NULL) {
|
284 |
+
return -1;
|
285 |
+
}
|
286 |
+
|
287 |
+
// first clear all the dynamic memory, all the dynamic variables which are
|
288 |
+
// not listed bellow are cleared to 0 by this step
|
289 |
+
memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
|
290 |
+
|
291 |
+
float* melFbCoef = stHdl->melFilterBankCoef;
|
292 |
+
size_t* melBinBuff = stHdl->melFilterBinBuff;
|
293 |
+
size_t i, j;
|
294 |
+
size_t nBins = stHdl->intNBins;
|
295 |
+
size_t melFbSz = stHdl->melFbSz;
|
296 |
+
|
297 |
+
stHdl->aedProcFrmCnt = 0;
|
298 |
+
stHdl->inputTimeFIFOIdx = 0;
|
299 |
+
stHdl->aivadResetCnt = 0;
|
300 |
+
stHdl->timeSignalPre = 0.0f;
|
301 |
+
stHdl->aivadScore =
|
302 |
+
-1.0f; // as default value, labeling as aed is not working yet
|
303 |
+
stHdl->aivadScorePre = -1.0f;
|
304 |
+
|
305 |
+
stHdl->pitchFreq = 0.0f;
|
306 |
+
|
307 |
+
// generate mel filter-bank coefficients
|
308 |
+
float low_mel = 2595.0f * log10f(1.0f + 0.0f / 700.0f);
|
309 |
+
float high_mel = 2595.0f * log10f(1.0f + 8000.0f / 700.0f);
|
310 |
+
float mel_points = 0.0f;
|
311 |
+
float hz_points = 0.0f;
|
312 |
+
size_t idx = 0;
|
313 |
+
|
314 |
+
for (i = 0; i < melFbSz + 2; i++) {
|
315 |
+
mel_points = i * (high_mel - low_mel) / ((float)melFbSz + 1.0f) + low_mel;
|
316 |
+
hz_points = 700.0f * (powf(10.0f, mel_points / 2595.0f) - 1.0f);
|
317 |
+
melBinBuff[i] =
|
318 |
+
(size_t)((stHdl->intFftSz + 1.0f) * hz_points / (float)AUP_AED_FS);
|
319 |
+
if (i > 0 && melBinBuff[i] == melBinBuff[i - 1]) {
|
320 |
+
return -1;
|
321 |
+
}
|
322 |
+
}
|
323 |
+
|
324 |
+
for (j = 0; j < melFbSz; j++) {
|
325 |
+
for (i = melBinBuff[j]; i < melBinBuff[j + 1]; i++) {
|
326 |
+
idx = j * nBins + i;
|
327 |
+
melFbCoef[idx] = (float)(i - melBinBuff[j]) /
|
328 |
+
(float)(melBinBuff[j + 1] - melBinBuff[j]);
|
329 |
+
}
|
330 |
+
for (i = melBinBuff[j + 1]; i < melBinBuff[j + 2]; i++) {
|
331 |
+
idx = j * nBins + i;
|
332 |
+
melFbCoef[idx] = (float)(melBinBuff[j + 2] - i) /
|
333 |
+
(float)(melBinBuff[j + 2] - melBinBuff[j + 1]);
|
334 |
+
}
|
335 |
+
}
|
336 |
+
|
337 |
+
if (stHdl->pitchEstStPtr != NULL) {
|
338 |
+
if (AUP_PE_init(stHdl->pitchEstStPtr) < 0) {
|
339 |
+
return -1;
|
340 |
+
}
|
341 |
+
}
|
342 |
+
|
343 |
+
if (stHdl->aivadInf != NULL) {
|
344 |
+
stHdl->aivadInf->Reset();
|
345 |
+
}
|
346 |
+
|
347 |
+
if (stHdl->timeInAnalysis != NULL) {
|
348 |
+
if (AUP_Analyzer_init(stHdl->timeInAnalysis) < 0) {
|
349 |
+
return -1;
|
350 |
+
}
|
351 |
+
}
|
352 |
+
|
353 |
+
return 0;
|
354 |
+
}
|
355 |
+
|
356 |
+
static int AUP_Aed_addOneCnter(int cnter) {
|
357 |
+
cnter++;
|
358 |
+
if (cnter >= 1000000000) {
|
359 |
+
cnter = 0; // reset every half year
|
360 |
+
}
|
361 |
+
return (cnter);
|
362 |
+
}
|
363 |
+
|
364 |
+
static void AUP_Aed_binPowerConvert(const float* src, float* tgt, int srcNBins,
|
365 |
+
int tgtNBins) {
|
366 |
+
float rate;
|
367 |
+
int srcIdx, tgtIdx;
|
368 |
+
if (srcNBins == tgtNBins) {
|
369 |
+
memcpy(tgt, src, sizeof(float) * tgtNBins);
|
370 |
+
return;
|
371 |
+
}
|
372 |
+
|
373 |
+
memset(tgt, 0, sizeof(float) * tgtNBins);
|
374 |
+
|
375 |
+
rate = (float)(srcNBins - 1) / (float)(tgtNBins - 1);
|
376 |
+
for (tgtIdx = 0; tgtIdx < tgtNBins; tgtIdx++) {
|
377 |
+
srcIdx = (int)(tgtIdx * rate);
|
378 |
+
srcIdx = AUP_AED_MIN(srcNBins - 1, AUP_AED_MAX(srcIdx, 0));
|
379 |
+
tgt[tgtIdx] = src[srcIdx];
|
380 |
+
}
|
381 |
+
|
382 |
+
return;
|
383 |
+
}
|
384 |
+
|
385 |
+
static void AUP_Aed_CalcBinPow(int nBins, const float* cmplxSpctr,
|
386 |
+
float* binPow) {
|
387 |
+
int idx, realIdx, imagIdx;
|
388 |
+
|
389 |
+
// bin-0
|
390 |
+
binPow[0] = cmplxSpctr[0] * cmplxSpctr[0];
|
391 |
+
|
392 |
+
// bin-(NBins-1)
|
393 |
+
binPow[nBins - 1] = cmplxSpctr[1] * cmplxSpctr[1];
|
394 |
+
|
395 |
+
for (idx = 1; idx < (nBins - 1); idx++) {
|
396 |
+
realIdx = idx << 1;
|
397 |
+
imagIdx = realIdx + 1;
|
398 |
+
|
399 |
+
binPow[idx] = cmplxSpctr[realIdx] * cmplxSpctr[realIdx] +
|
400 |
+
cmplxSpctr[imagIdx] * cmplxSpctr[imagIdx];
|
401 |
+
}
|
402 |
+
return;
|
403 |
+
}
|
404 |
+
|
405 |
+
static int AUP_Aed_pitch_proc(void* pitchModule, const float* timeSignal,
|
406 |
+
size_t timeLen, const float* binPow, size_t nBins,
|
407 |
+
PE_OutputData* pOut) {
|
408 |
+
PE_InputData peInData;
|
409 |
+
|
410 |
+
peInData.timeSignal = timeSignal;
|
411 |
+
peInData.hopSz = (int)timeLen;
|
412 |
+
peInData.inBinPow = binPow;
|
413 |
+
peInData.nBins = (int)nBins;
|
414 |
+
pOut->pitchFreq = 0;
|
415 |
+
pOut->voiced = -1;
|
416 |
+
return AUP_PE_proc(pitchModule, &peInData, pOut);
|
417 |
+
}
|
418 |
+
|
419 |
+
static int AUP_Aed_aivad_proc(Aed_St* stHdl, const float* inBinPow,
|
420 |
+
float* aivadScore) {
|
421 |
+
if (stHdl == NULL || inBinPow == NULL || aivadScore == NULL) {
|
422 |
+
return -1;
|
423 |
+
}
|
424 |
+
|
425 |
+
size_t i, j;
|
426 |
+
size_t nBins = stHdl->intNBins;
|
427 |
+
size_t melFbSz = stHdl->melFbSz;
|
428 |
+
size_t srcOffset;
|
429 |
+
size_t srcLen;
|
430 |
+
|
431 |
+
float* aivadInputFeatStack = stHdl->aivadInputFeatStack;
|
432 |
+
float* melFbCoef = stHdl->melFilterBankCoef;
|
433 |
+
const float* aivadFeatMean = AUP_AED_FEATURE_MEANS;
|
434 |
+
const float* aivadFeatStd = AUP_AED_FEATURE_STDS;
|
435 |
+
float* curMelFbCoefPtr = NULL;
|
436 |
+
float* curInputFeatPtr = NULL;
|
437 |
+
float perBandValue = 0.0f;
|
438 |
+
float powerNormal = 32768.0f * 32768.0f;
|
439 |
+
|
440 |
+
// update aivad feature buff.
|
441 |
+
srcOffset = stHdl->feaSz;
|
442 |
+
srcLen = (stHdl->algCtxtSz - 1) * stHdl->feaSz;
|
443 |
+
memmove(aivadInputFeatStack, aivadInputFeatStack + srcOffset,
|
444 |
+
sizeof(float) * srcLen);
|
445 |
+
curInputFeatPtr = aivadInputFeatStack + srcLen;
|
446 |
+
|
447 |
+
// cal. mel-filter-bank feature
|
448 |
+
for (i = 0; i < melFbSz; i++) {
|
449 |
+
perBandValue = 0.0f;
|
450 |
+
curMelFbCoefPtr = melFbCoef + i * nBins;
|
451 |
+
for (j = 0; j < nBins; j++) {
|
452 |
+
perBandValue += (inBinPow[j] * curMelFbCoefPtr[j]);
|
453 |
+
}
|
454 |
+
perBandValue = perBandValue / powerNormal;
|
455 |
+
perBandValue = logf(perBandValue + AUP_AED_EPS);
|
456 |
+
curInputFeatPtr[i] =
|
457 |
+
(perBandValue - aivadFeatMean[i]) / (aivadFeatStd[i] + AUP_AED_EPS);
|
458 |
+
}
|
459 |
+
|
460 |
+
// extra feat.
|
461 |
+
for (i = melFbSz; i < stHdl->feaSz; i++) {
|
462 |
+
curInputFeatPtr[i] =
|
463 |
+
(stHdl->pitchFreq - aivadFeatMean[i]) / (aivadFeatStd[i] + AUP_AED_EPS);
|
464 |
+
}
|
465 |
+
|
466 |
+
// exe. aivad
|
467 |
+
// exe. aivad
|
468 |
+
float aivadOutput;
|
469 |
+
if (stHdl->aivadInf != NULL &&
|
470 |
+
stHdl->aivadInf->Process(stHdl->aivadInputFeatStack, &aivadOutput) != 0) {
|
471 |
+
return -1;
|
472 |
+
}
|
473 |
+
|
474 |
+
(*aivadScore) = aivadOutput;
|
475 |
+
|
476 |
+
stHdl->aivadResetCnt += 1;
|
477 |
+
if (stHdl->aivadResetCnt >= stHdl->aivadResetFrmNum) {
|
478 |
+
if (stHdl->aivadInf != NULL && stHdl->aivadInf->Reset() != 0) {
|
479 |
+
}
|
480 |
+
stHdl->aivadResetCnt = 0;
|
481 |
+
}
|
482 |
+
|
483 |
+
return 0;
|
484 |
+
}
|
485 |
+
|
486 |
+
static int AUP_Aed_dynamMemPrepare(Aed_St* stHdl, void* memPtrExt,
|
487 |
+
size_t memSize) {
|
488 |
+
if (stHdl == NULL) {
|
489 |
+
return -1;
|
490 |
+
}
|
491 |
+
size_t pitchInNBins = stHdl->intNBins;
|
492 |
+
size_t totalMemSize = 0;
|
493 |
+
size_t inputTimeFIFOMemSize = 0;
|
494 |
+
size_t inputEmphTimeFIFOMemSize = 0;
|
495 |
+
size_t aivadInputCmplxSptrmMemSize = 0;
|
496 |
+
size_t aivadInputBinPowMemSize = 0;
|
497 |
+
size_t frameRmsBuffMemSize = 0;
|
498 |
+
size_t aivadInputFeatStackMemSize = 0;
|
499 |
+
size_t aimdInputFeatStackMemSize = 0;
|
500 |
+
size_t melFilterBankCoefMemSize = 0;
|
501 |
+
size_t melFilterBinBuffMemSize = 0;
|
502 |
+
size_t inputFloatBuffMemSize = 0;
|
503 |
+
|
504 |
+
// size_t vadScoreOutputBuffDelaySample = 384; // buff. delay for output
|
505 |
+
char* memPtr = NULL;
|
506 |
+
|
507 |
+
// size_t nBinsBufferMemSize = AUP_AED_ALIGN8(sizeof(float) * nBins);
|
508 |
+
// size_t spctrmMemSize = AUP_AED_ALIGN8(sizeof(float) * (nBins - 1) * 2);
|
509 |
+
|
510 |
+
inputTimeFIFOMemSize =
|
511 |
+
AUP_AED_ALIGN8(sizeof(float) * stHdl->inputTimeFIFOLen);
|
512 |
+
totalMemSize += inputTimeFIFOMemSize;
|
513 |
+
|
514 |
+
inputEmphTimeFIFOMemSize =
|
515 |
+
AUP_AED_ALIGN8(sizeof(float) * stHdl->inputTimeFIFOLen);
|
516 |
+
totalMemSize += inputEmphTimeFIFOMemSize;
|
517 |
+
|
518 |
+
aivadInputCmplxSptrmMemSize = AUP_AED_ALIGN8(sizeof(float) * stHdl->intFftSz);
|
519 |
+
totalMemSize += aivadInputCmplxSptrmMemSize;
|
520 |
+
|
521 |
+
aivadInputBinPowMemSize = AUP_AED_ALIGN8(sizeof(float) * stHdl->intNBins);
|
522 |
+
totalMemSize += aivadInputBinPowMemSize;
|
523 |
+
|
524 |
+
aivadInputFeatStackMemSize =
|
525 |
+
AUP_AED_ALIGN8(sizeof(float) * stHdl->algCtxtSz * stHdl->feaSz);
|
526 |
+
totalMemSize += aivadInputFeatStackMemSize;
|
527 |
+
|
528 |
+
aimdInputFeatStackMemSize =
|
529 |
+
AUP_AED_ALIGN8(sizeof(float) * stHdl->algCtxtSz * stHdl->feaSz);
|
530 |
+
totalMemSize += aimdInputFeatStackMemSize;
|
531 |
+
|
532 |
+
melFilterBankCoefMemSize =
|
533 |
+
AUP_AED_ALIGN8(sizeof(float) * pitchInNBins * stHdl->feaSz);
|
534 |
+
totalMemSize += melFilterBankCoefMemSize;
|
535 |
+
|
536 |
+
melFilterBinBuffMemSize = AUP_AED_ALIGN8(sizeof(size_t) * (stHdl->feaSz + 2));
|
537 |
+
totalMemSize += melFilterBinBuffMemSize;
|
538 |
+
|
539 |
+
frameRmsBuffMemSize = AUP_AED_ALIGN8(stHdl->frmRmsBufLen * sizeof(float));
|
540 |
+
totalMemSize += frameRmsBuffMemSize;
|
541 |
+
|
542 |
+
inputFloatBuffMemSize = AUP_AED_ALIGN8(stHdl->extHopSz * sizeof(float));
|
543 |
+
totalMemSize += inputFloatBuffMemSize;
|
544 |
+
|
545 |
+
if (memPtrExt == NULL) {
|
546 |
+
return ((int)totalMemSize);
|
547 |
+
}
|
548 |
+
|
549 |
+
if (totalMemSize > memSize) {
|
550 |
+
return -1;
|
551 |
+
}
|
552 |
+
|
553 |
+
memPtr = (char*)memPtrExt;
|
554 |
+
|
555 |
+
stHdl->inputTimeFIFO = (float*)memPtr;
|
556 |
+
memPtr += inputTimeFIFOMemSize;
|
557 |
+
|
558 |
+
stHdl->inputEmphTimeFIFO = (float*)memPtr;
|
559 |
+
memPtr += inputEmphTimeFIFOMemSize;
|
560 |
+
|
561 |
+
stHdl->aivadInputCmplxSptrm = (float*)memPtr;
|
562 |
+
memPtr += aivadInputCmplxSptrmMemSize;
|
563 |
+
|
564 |
+
stHdl->aivadInputBinPow = (float*)memPtr;
|
565 |
+
memPtr += aivadInputBinPowMemSize;
|
566 |
+
|
567 |
+
stHdl->aivadInputFeatStack = (float*)memPtr;
|
568 |
+
memPtr += aivadInputFeatStackMemSize;
|
569 |
+
|
570 |
+
stHdl->melFilterBankCoef = (float*)memPtr;
|
571 |
+
memPtr += melFilterBankCoefMemSize;
|
572 |
+
|
573 |
+
stHdl->melFilterBinBuff = (size_t*)memPtr;
|
574 |
+
memPtr += melFilterBinBuffMemSize;
|
575 |
+
|
576 |
+
stHdl->frameRmsBuff = (float*)memPtr;
|
577 |
+
memPtr += frameRmsBuffMemSize;
|
578 |
+
|
579 |
+
stHdl->inputFloatBuff = (float*)memPtr;
|
580 |
+
memPtr += inputFloatBuffMemSize;
|
581 |
+
|
582 |
+
if (((size_t)(memPtr - (char*)memPtrExt)) > totalMemSize) {
|
583 |
+
return -1;
|
584 |
+
}
|
585 |
+
|
586 |
+
return ((int)totalMemSize);
|
587 |
+
}
|
588 |
+
|
589 |
+
static int AUP_Aed_runOneFrm(Aed_St* stHdl, const float* tSignal, int hopSz,
|
590 |
+
const float* binPowPtr, int nBins) {
|
591 |
+
PE_OutputData peOutData = {0, 0};
|
592 |
+
float aivadScore = -1.0f;
|
593 |
+
float mediaFilterout = 0;
|
594 |
+
int mediaIdx = (int)(AUP_AED_OUTPUT_SMOOTH_FILTER_LEN) / 2;
|
595 |
+
int i;
|
596 |
+
|
597 |
+
if (AUP_Aed_pitch_proc(stHdl->pitchEstStPtr, tSignal, hopSz, binPowPtr, nBins,
|
598 |
+
&peOutData) < 0) {
|
599 |
+
return -1;
|
600 |
+
}
|
601 |
+
stHdl->pitchFreq = peOutData.pitchFreq;
|
602 |
+
if (AUP_Aed_aivad_proc(stHdl, binPowPtr, &aivadScore) < 0) {
|
603 |
+
return -1;
|
604 |
+
}
|
605 |
+
stHdl->aivadScore = aivadScore;
|
606 |
+
|
607 |
+
return 0;
|
608 |
+
}
|
609 |
+
|
610 |
+
/// ///////////////////////////////////////////////////////////////////////
|
611 |
+
/// Public API
|
612 |
+
/// ///////////////////////////////////////////////////////////////////////
|
613 |
+
|
614 |
+
int AUP_Aed_create(void** stPtr) {
|
615 |
+
if (stPtr == NULL) {
|
616 |
+
return -1;
|
617 |
+
}
|
618 |
+
Aed_St* tmpPtr = (Aed_St*)malloc(sizeof(Aed_St));
|
619 |
+
if (tmpPtr == NULL) {
|
620 |
+
return -1;
|
621 |
+
}
|
622 |
+
memset(tmpPtr, 0, sizeof(Aed_St));
|
623 |
+
|
624 |
+
if (AUP_PE_create(&(tmpPtr->pitchEstStPtr)) < 0) {
|
625 |
+
return -1;
|
626 |
+
}
|
627 |
+
if (AUP_Analyzer_create(&(tmpPtr->timeInAnalysis)) < 0) {
|
628 |
+
return -1;
|
629 |
+
}
|
630 |
+
|
631 |
+
tmpPtr->stCfg.enableFlag = 1; // as default, module enabled
|
632 |
+
tmpPtr->stCfg.fftSz = 1024;
|
633 |
+
tmpPtr->stCfg.hopSz = 256;
|
634 |
+
tmpPtr->stCfg.anaWindowSz = 768;
|
635 |
+
tmpPtr->stCfg.frqInputAvailableFlag = 0;
|
636 |
+
|
637 |
+
tmpPtr->dynamCfg.extVoiceThr = 0.5f;
|
638 |
+
tmpPtr->dynamCfg.extMusicThr = 0.5f;
|
639 |
+
tmpPtr->dynamCfg.extEnergyThr = 10.0f;
|
640 |
+
tmpPtr->dynamCfg.resetFrameNum = 1875; // TODO
|
641 |
+
tmpPtr->dynamCfg.pitchEstVoicedThr = AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR;
|
642 |
+
|
643 |
+
(*stPtr) = (void*)tmpPtr;
|
644 |
+
|
645 |
+
return 0;
|
646 |
+
}
|
647 |
+
|
648 |
+
int AUP_Aed_destroy(void** stPtr) {
|
649 |
+
if (stPtr == NULL || (*stPtr) == NULL) {
|
650 |
+
return -1;
|
651 |
+
}
|
652 |
+
Aed_St* stHdl = (Aed_St*)(*stPtr);
|
653 |
+
|
654 |
+
if (stHdl->aivadInf != NULL) {
|
655 |
+
delete stHdl->aivadInf;
|
656 |
+
}
|
657 |
+
stHdl->aivadInf = NULL;
|
658 |
+
|
659 |
+
if (AUP_PE_destroy(&(stHdl->pitchEstStPtr)) < 0) {
|
660 |
+
return -1;
|
661 |
+
}
|
662 |
+
if (AUP_Analyzer_destroy(&(stHdl->timeInAnalysis)) < 0) {
|
663 |
+
return -1;
|
664 |
+
}
|
665 |
+
|
666 |
+
if (stHdl->dynamMemPtr != NULL) {
|
667 |
+
free(stHdl->dynamMemPtr);
|
668 |
+
}
|
669 |
+
stHdl->dynamMemPtr = NULL;
|
670 |
+
|
671 |
+
if (stHdl != NULL) {
|
672 |
+
free(stHdl);
|
673 |
+
}
|
674 |
+
(*stPtr) = NULL;
|
675 |
+
|
676 |
+
return 0;
|
677 |
+
}
|
678 |
+
|
679 |
+
int AUP_Aed_memAllocate(void* stPtr, const Aed_StaticCfg* pCfg) {
|
680 |
+
Aed_St* stHdl = (Aed_St*)(stPtr);
|
681 |
+
Aed_StaticCfg aedStatCfg;
|
682 |
+
PE_StaticCfg pitchStatCfg;
|
683 |
+
Analyzer_StaticCfg analyzerStatCfg;
|
684 |
+
int totalMemSize = 0;
|
685 |
+
|
686 |
+
if (stPtr == NULL || pCfg == NULL) {
|
687 |
+
return -1;
|
688 |
+
}
|
689 |
+
|
690 |
+
// 1th: check static cfg.
|
691 |
+
memcpy(&aedStatCfg, pCfg, sizeof(Aed_StaticCfg));
|
692 |
+
if (AUP_Aed_checkStatCfg(&aedStatCfg) < 0) {
|
693 |
+
return -1;
|
694 |
+
}
|
695 |
+
|
696 |
+
memcpy(&(stHdl->stCfg), &aedStatCfg, sizeof(Aed_StaticCfg));
|
697 |
+
|
698 |
+
// 2th: publish static configuration to internal statical configuration
|
699 |
+
// registers
|
700 |
+
if (AUP_Aed_publishStaticCfg(stHdl) < 0) {
|
701 |
+
return -1;
|
702 |
+
}
|
703 |
+
|
704 |
+
// 3th: create aivad instance
|
705 |
+
if (stHdl->aivadInf == NULL) {
|
706 |
+
stHdl->aivadInf = new AUP_MODULE_AIVAD("onnx_model/ten-vad.onnx");
|
707 |
+
if (stHdl->aivadInf == NULL) {
|
708 |
+
return -1;
|
709 |
+
}
|
710 |
+
}
|
711 |
+
stHdl->aivadInf->Reset();
|
712 |
+
|
713 |
+
// 4th: memAllocate operation for Pitch-Estimator ............
|
714 |
+
if (AUP_PE_getStaticCfg(stHdl->pitchEstStPtr, &pitchStatCfg) < 0) {
|
715 |
+
return -1;
|
716 |
+
}
|
717 |
+
pitchStatCfg.fftSz = stHdl->intFftSz;
|
718 |
+
pitchStatCfg.anaWindowSz = stHdl->intWinSz;
|
719 |
+
pitchStatCfg.hopSz = stHdl->intHopSz;
|
720 |
+
pitchStatCfg.useLPCPreFiltering = AUP_AED_PITCH_EST_USE_LPC;
|
721 |
+
pitchStatCfg.procFs = AUP_AED_PITCH_EST_PROCFS;
|
722 |
+
if (AUP_PE_memAllocate(stHdl->pitchEstStPtr, &pitchStatCfg) < 0) {
|
723 |
+
return -1;
|
724 |
+
}
|
725 |
+
|
726 |
+
// creation and initialization with time-analysis module ......
|
727 |
+
AUP_Analyzer_getStaticCfg(stHdl->timeInAnalysis, &analyzerStatCfg);
|
728 |
+
analyzerStatCfg.win_len = (int)stHdl->intWinSz;
|
729 |
+
analyzerStatCfg.hop_size = (int)stHdl->intHopSz;
|
730 |
+
analyzerStatCfg.fft_size = (int)stHdl->intFftSz;
|
731 |
+
analyzerStatCfg.ana_win_coeff = stHdl->intAnalyWindowPtr;
|
732 |
+
if (AUP_Analyzer_memAllocate(stHdl->timeInAnalysis, &analyzerStatCfg) < 0) {
|
733 |
+
return -1;
|
734 |
+
}
|
735 |
+
|
736 |
+
// 5th: check memory requirement ..............................
|
737 |
+
totalMemSize = AUP_Aed_dynamMemPrepare(stHdl, NULL, 0);
|
738 |
+
if (totalMemSize < 0) {
|
739 |
+
return -1;
|
740 |
+
}
|
741 |
+
|
742 |
+
// 6th: allocate dynamic memory
|
743 |
+
if (totalMemSize > (int)stHdl->dynamMemSize) {
|
744 |
+
if (stHdl->dynamMemPtr != NULL) {
|
745 |
+
free(stHdl->dynamMemPtr);
|
746 |
+
stHdl->dynamMemPtr = NULL;
|
747 |
+
stHdl->dynamMemSize = 0;
|
748 |
+
}
|
749 |
+
stHdl->dynamMemPtr = malloc(totalMemSize);
|
750 |
+
if (stHdl->dynamMemPtr == NULL) {
|
751 |
+
return -1;
|
752 |
+
}
|
753 |
+
stHdl->dynamMemSize = totalMemSize;
|
754 |
+
}
|
755 |
+
memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
|
756 |
+
|
757 |
+
// 7th: setup the pointers/variable
|
758 |
+
if (AUP_Aed_dynamMemPrepare(stHdl, stHdl->dynamMemPtr, stHdl->dynamMemSize) <
|
759 |
+
0) {
|
760 |
+
return -1;
|
761 |
+
}
|
762 |
+
|
763 |
+
// 8th: publish internal dynamic config registers
|
764 |
+
if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
|
765 |
+
return -1;
|
766 |
+
}
|
767 |
+
|
768 |
+
return 0;
|
769 |
+
}
|
770 |
+
|
771 |
+
int AUP_Aed_init(void* stPtr) {
|
772 |
+
Aed_St* stHdl = (Aed_St*)(stPtr);
|
773 |
+
if (stPtr == NULL) {
|
774 |
+
return -1;
|
775 |
+
}
|
776 |
+
|
777 |
+
// publish internal dynamic config registers
|
778 |
+
if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
|
779 |
+
return -1;
|
780 |
+
}
|
781 |
+
|
782 |
+
// clear/reset run-time variables
|
783 |
+
if (AUP_Aed_resetVariables(stHdl) < 0) {
|
784 |
+
return -1;
|
785 |
+
}
|
786 |
+
|
787 |
+
return 0;
|
788 |
+
}
|
789 |
+
|
790 |
+
int AUP_Aed_setDynamCfg(void* stPtr, const Aed_DynamCfg* pCfg) {
|
791 |
+
Aed_St* stHdl = (Aed_St*)(stPtr);
|
792 |
+
|
793 |
+
if (stPtr == NULL || pCfg == NULL) {
|
794 |
+
return -1;
|
795 |
+
}
|
796 |
+
|
797 |
+
memcpy(&(stHdl->dynamCfg), pCfg, sizeof(Aed_DynamCfg));
|
798 |
+
|
799 |
+
// publish internal dynamic configuration registers
|
800 |
+
if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
|
801 |
+
return -1;
|
802 |
+
}
|
803 |
+
|
804 |
+
return 0;
|
805 |
+
}
|
806 |
+
|
807 |
+
int AUP_Aed_getStaticCfg(const void* stPtr, Aed_StaticCfg* pCfg) {
|
808 |
+
const Aed_St* stHdl = (const Aed_St*)(stPtr);
|
809 |
+
|
810 |
+
if (stPtr == NULL || pCfg == NULL) {
|
811 |
+
return -1;
|
812 |
+
}
|
813 |
+
|
814 |
+
memcpy(pCfg, &(stHdl->stCfg), sizeof(Aed_StaticCfg));
|
815 |
+
|
816 |
+
return 0;
|
817 |
+
}
|
818 |
+
|
819 |
+
int AUP_Aed_getDynamCfg(const void* stPtr, Aed_DynamCfg* pCfg) {
|
820 |
+
const Aed_St* stHdl = (const Aed_St*)(stPtr);
|
821 |
+
|
822 |
+
if (stPtr == NULL || pCfg == NULL) {
|
823 |
+
return -1;
|
824 |
+
}
|
825 |
+
|
826 |
+
memcpy(pCfg, &(stHdl->dynamCfg), sizeof(Aed_DynamCfg));
|
827 |
+
|
828 |
+
return 0;
|
829 |
+
}
|
830 |
+
|
831 |
+
int AUP_Aed_getAlgDelay(const void* stPtr, int* delayInFrms) {
|
832 |
+
const Aed_St* stHdl = (const Aed_St*)(stPtr);
|
833 |
+
|
834 |
+
if (stPtr == NULL || delayInFrms == NULL) {
|
835 |
+
return -1;
|
836 |
+
}
|
837 |
+
|
838 |
+
(*delayInFrms) = (int)stHdl->algDelay;
|
839 |
+
|
840 |
+
return 0;
|
841 |
+
}
|
842 |
+
|
843 |
+
int AUP_Aed_proc(void* stPtr, const Aed_InputData* pIn, Aed_OutputData* pOut) {
|
844 |
+
Analyzer_InputData analyzerInput;
|
845 |
+
Analyzer_OutputData analyzerOutput;
|
846 |
+
Aed_St* stHdl = (Aed_St*)(stPtr);
|
847 |
+
|
848 |
+
const float* binPowPtr = NULL;
|
849 |
+
float frameRms = 0.0f;
|
850 |
+
float frameEnergy = 0.0f;
|
851 |
+
float powerNormal = 32768.0f * 32768.0f;
|
852 |
+
int idx;
|
853 |
+
|
854 |
+
if (stPtr == NULL) {
|
855 |
+
return -1;
|
856 |
+
}
|
857 |
+
if (stHdl->stCfg.enableFlag == 0) { // this module is disabled
|
858 |
+
return 0;
|
859 |
+
}
|
860 |
+
if (pIn == NULL || pIn->timeSignal == NULL || pOut == NULL) {
|
861 |
+
return -1;
|
862 |
+
}
|
863 |
+
|
864 |
+
if (stHdl->intAnalyFlag != 2) { // the external spectra is going to be used
|
865 |
+
if (pIn->binPower == NULL) {
|
866 |
+
return -1;
|
867 |
+
}
|
868 |
+
if (pIn->nBins != (int)((stHdl->stCfg.fftSz >> 1) + 1) ||
|
869 |
+
pIn->hopSz != (int)(stHdl->stCfg.hopSz)) {
|
870 |
+
return -1;
|
871 |
+
}
|
872 |
+
}
|
873 |
+
|
874 |
+
// cal. input frame energy ....
|
875 |
+
for (idx = 0; idx < pIn->hopSz; idx++) {
|
876 |
+
frameRms += (pIn->timeSignal[idx] * pIn->timeSignal[idx]);
|
877 |
+
}
|
878 |
+
frameEnergy = frameRms;
|
879 |
+
frameRms = sqrtf(frameRms / (float)pIn->hopSz);
|
880 |
+
memmove(stHdl->frameRmsBuff, stHdl->frameRmsBuff + 1,
|
881 |
+
sizeof(float) * (stHdl->frmRmsBufLen - 1));
|
882 |
+
stHdl->frameRmsBuff[stHdl->frmRmsBufLen - 1] = frameRms;
|
883 |
+
|
884 |
+
// input signal conversion .........
|
885 |
+
if ((stHdl->inputTimeFIFOIdx + pIn->hopSz) > (int)stHdl->inputTimeFIFOLen) {
|
886 |
+
return -1;
|
887 |
+
}
|
888 |
+
|
889 |
+
// update pre-emphasis time signal FIFO
|
890 |
+
float* timeSigEphaPtr = stHdl->inputEmphTimeFIFO + stHdl->inputTimeFIFOIdx;
|
891 |
+
for (idx = 0; idx < pIn->hopSz; idx++) {
|
892 |
+
timeSigEphaPtr[idx] = pIn->timeSignal[idx] - 0.97f * stHdl->timeSignalPre;
|
893 |
+
stHdl->timeSignalPre = pIn->timeSignal[idx];
|
894 |
+
}
|
895 |
+
|
896 |
+
memcpy(stHdl->inputTimeFIFO + stHdl->inputTimeFIFOIdx, pIn->timeSignal,
|
897 |
+
sizeof(float) * (pIn->hopSz));
|
898 |
+
stHdl->inputTimeFIFOIdx += pIn->hopSz;
|
899 |
+
|
900 |
+
if (stHdl->intAnalyFlag == 0) { // directly use external spectra
|
901 |
+
if (stHdl->inputTimeFIFOIdx != (int)(stHdl->intHopSz) ||
|
902 |
+
(int)(stHdl->intNBins) != pIn->nBins) {
|
903 |
+
return -1;
|
904 |
+
}
|
905 |
+
|
906 |
+
// one-time processing ...
|
907 |
+
stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
|
908 |
+
binPowPtr = pIn->binPower;
|
909 |
+
|
910 |
+
// update: stHdl->pitchFreq, stHdl->aivadScore
|
911 |
+
if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
|
912 |
+
binPowPtr, (int)stHdl->intNBins) < 0) {
|
913 |
+
return -1;
|
914 |
+
}
|
915 |
+
|
916 |
+
// update the inputTimeFIFO
|
917 |
+
stHdl->inputTimeFIFOIdx = 0;
|
918 |
+
} else if (stHdl->intAnalyFlag ==
|
919 |
+
1) { // do interpolation or extrapolation with external spectra
|
920 |
+
if (stHdl->inputTimeFIFOIdx != (int)(stHdl->intHopSz) ||
|
921 |
+
(int)(stHdl->extNBins) != pIn->nBins) {
|
922 |
+
return -1;
|
923 |
+
}
|
924 |
+
|
925 |
+
// one-time processing ....
|
926 |
+
stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
|
927 |
+
AUP_Aed_binPowerConvert(pIn->binPower, stHdl->aivadInputBinPow,
|
928 |
+
(int)stHdl->extNBins, (int)stHdl->intNBins);
|
929 |
+
binPowPtr = stHdl->aivadInputBinPow;
|
930 |
+
|
931 |
+
// update: stHdl->pitchFreq, stHdl->aivadScore
|
932 |
+
if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
|
933 |
+
binPowPtr, (int)stHdl->intNBins) < 0) {
|
934 |
+
return -1;
|
935 |
+
}
|
936 |
+
|
937 |
+
// update the inputTimeFIFO
|
938 |
+
stHdl->inputTimeFIFOIdx = 0;
|
939 |
+
} else { // we need to do STFT on the input time-signal
|
940 |
+
if (stHdl->timeInAnalysis == NULL) {
|
941 |
+
return -1;
|
942 |
+
}
|
943 |
+
|
944 |
+
// loop processing .....
|
945 |
+
while (stHdl->inputTimeFIFOIdx >= (int)stHdl->intHopSz) {
|
946 |
+
stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
|
947 |
+
|
948 |
+
analyzerInput.input = stHdl->inputEmphTimeFIFO;
|
949 |
+
analyzerInput.iLength = (int)stHdl->intHopSz;
|
950 |
+
analyzerOutput.output = stHdl->aivadInputCmplxSptrm;
|
951 |
+
analyzerOutput.oLength = (int)stHdl->intFftSz;
|
952 |
+
if (AUP_Analyzer_proc(stHdl->timeInAnalysis, &analyzerInput,
|
953 |
+
&analyzerOutput) < 0) {
|
954 |
+
return -1;
|
955 |
+
}
|
956 |
+
|
957 |
+
AUP_Aed_CalcBinPow((int)stHdl->intNBins, stHdl->aivadInputCmplxSptrm,
|
958 |
+
stHdl->aivadInputBinPow);
|
959 |
+
binPowPtr = stHdl->aivadInputBinPow;
|
960 |
+
|
961 |
+
// update: stHdl->pitchFreq, stHdl->aivadScore
|
962 |
+
if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
|
963 |
+
binPowPtr, (int)stHdl->intNBins) < 0) {
|
964 |
+
return -1;
|
965 |
+
}
|
966 |
+
|
967 |
+
// update the inputTimeFIFO & inputEmphTimeFIFO.....
|
968 |
+
if (stHdl->inputTimeFIFOIdx > (int)stHdl->intHopSz) {
|
969 |
+
memcpy(stHdl->inputTimeFIFO, stHdl->inputTimeFIFO + stHdl->intHopSz,
|
970 |
+
sizeof(float) * (stHdl->inputTimeFIFOIdx - stHdl->intHopSz));
|
971 |
+
memcpy(stHdl->inputEmphTimeFIFO,
|
972 |
+
stHdl->inputEmphTimeFIFO + stHdl->intHopSz,
|
973 |
+
sizeof(float) * (stHdl->inputTimeFIFOIdx - stHdl->intHopSz));
|
974 |
+
}
|
975 |
+
stHdl->inputTimeFIFOIdx -= (int)stHdl->intHopSz;
|
976 |
+
}
|
977 |
+
}
|
978 |
+
|
979 |
+
// write to output res.
|
980 |
+
pOut->frameEnergy = frameEnergy / powerNormal;
|
981 |
+
pOut->frameRms = stHdl->frameRmsBuff[0];
|
982 |
+
pOut->pitchFreq = stHdl->pitchFreq;
|
983 |
+
pOut->voiceProb = stHdl->aivadScore;
|
984 |
+
if (pOut->voiceProb < 0.0f) {
|
985 |
+
pOut->vadRes = -1;
|
986 |
+
} else if (pOut->voiceProb <= stHdl->voiceDecideThresh) {
|
987 |
+
pOut->vadRes = 0;
|
988 |
+
} else {
|
989 |
+
pOut->vadRes = 1;
|
990 |
+
}
|
991 |
+
|
992 |
+
return 0;
|
993 |
+
}
|
src/aed.h
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#ifndef __AED_H__
|
8 |
+
#define __AED_H__
|
9 |
+
|
10 |
+
#include <stdint.h>
|
11 |
+
#include <stdlib.h>
|
12 |
+
|
13 |
+
#define AUP_AED_MAX_FFT_SZ (1024) // the max. fft-size supported by VAD module
|
14 |
+
#define AUP_AED_MAX_NBINS ((AUP_AED_MAX_FFT_SZ >> 1) + 1)
|
15 |
+
|
16 |
+
#define AUP_AED_FS (16000) // assumed input freq.
|
17 |
+
|
18 |
+
// Configuration Parameters, which impacts dynamic memory occupation, can only
|
19 |
+
// be set during allocation
|
20 |
+
typedef struct Aed_StaticCfg_ {
|
21 |
+
int enableFlag; // flag to enable or disable this module
|
22 |
+
// 0: disable, o.w.: enable
|
23 |
+
size_t fftSz; // fft-size, only support: 128, 256, 512, 1024
|
24 |
+
size_t hopSz; // fft-Hop Size, will be used to check
|
25 |
+
size_t anaWindowSz; // fft-window Size, will be used to calc rms
|
26 |
+
int frqInputAvailableFlag; // whether Aed_InputData will contain external
|
27 |
+
// freq. power-sepctra
|
28 |
+
} Aed_StaticCfg;
|
29 |
+
|
30 |
+
// Configuraiton parameters which can be modified/set every frames
|
31 |
+
typedef struct Aed_DynamCfg_ {
|
32 |
+
float extVoiceThr; // threshold for ai based voice decision [0,1]
|
33 |
+
float extMusicThr; // threshold for ai based music decision [0,1]
|
34 |
+
float extEnergyThr; // threshold for energy based vad decision [0, ---]
|
35 |
+
size_t resetFrameNum; // frame number for aivad reset [1875, 75000]
|
36 |
+
float pitchEstVoicedThr; // threshold for pitch-estimator to output estimated
|
37 |
+
// pitch
|
38 |
+
} Aed_DynamCfg;
|
39 |
+
|
40 |
+
// Spectrum are assumed to be generated with time-domain samples in [-32768,
|
41 |
+
// 32767] with or without pre-emphasis operation
|
42 |
+
typedef struct Aed_InputData_ {
|
43 |
+
const float* binPower; // [NBins], power spectrum of 16KHz samples
|
44 |
+
int nBins;
|
45 |
+
const float*
|
46 |
+
timeSignal; // [hopSz] // this frame's input signal, in [-32768, 32767]
|
47 |
+
int hopSz; // should be equal to StaticCfg->hopSz
|
48 |
+
} Aed_InputData;
|
49 |
+
|
50 |
+
// return data from statistical ns module
|
51 |
+
typedef struct Aed_OutputData_ {
|
52 |
+
float frameEnergy; // frame energy for input normalized data
|
53 |
+
float frameRms; // rms for input int16 data
|
54 |
+
int energyVadRes; // vad res 0/1 with extEnergyThreshold based on input frame
|
55 |
+
// energy
|
56 |
+
float voiceProb; // vad score [0,1]
|
57 |
+
int vadRes; // vad res 0/1 with extVoiceThr based on ai method, t + 16ms res
|
58 |
+
// correspond to the t input
|
59 |
+
float pitchFreq; // estimated pitch freq.
|
60 |
+
} Aed_OutputData;
|
61 |
+
|
62 |
+
#ifdef __cplusplus
|
63 |
+
extern "C" {
|
64 |
+
#endif
|
65 |
+
|
66 |
+
/****************************************************************************
|
67 |
+
* AUP_Aed_Create(...)
|
68 |
+
*
|
69 |
+
* This function creats a state handler from nothing, which is NOT ready for
|
70 |
+
* processing
|
71 |
+
*
|
72 |
+
* Input:
|
73 |
+
*
|
74 |
+
* Output:
|
75 |
+
* - stPtr : buffer to store the returned state handler
|
76 |
+
*
|
77 |
+
* Return value : 0 - Ok
|
78 |
+
* -1 - Error
|
79 |
+
*/
|
80 |
+
int AUP_Aed_create(void** stPtr);
|
81 |
+
|
82 |
+
/****************************************************************************
|
83 |
+
* AUP_Aed_Destroy(...)
|
84 |
+
*
|
85 |
+
* destroy VAD instance, and releasing all the dynamically allocated memory
|
86 |
+
* this interface will also release ainsFactory, which was
|
87 |
+
* created externally and passed to VAD module through memAllocate interface
|
88 |
+
*
|
89 |
+
* Input:
|
90 |
+
* - stPtr : buffer of State Handler, after this method, this
|
91 |
+
* handler won't be usable anymore
|
92 |
+
*
|
93 |
+
* Output:
|
94 |
+
*
|
95 |
+
* Return value : 0 - Ok
|
96 |
+
* -1 - Error
|
97 |
+
*/
|
98 |
+
int AUP_Aed_destroy(void** stPtr);
|
99 |
+
|
100 |
+
/****************************************************************************
|
101 |
+
* AUP_Aed_MemAllocate(...)
|
102 |
+
*
|
103 |
+
* This function sets Static Config params and does memory allocation
|
104 |
+
* operation, will lose the dynamCfg values
|
105 |
+
*
|
106 |
+
* Input:
|
107 |
+
* - stPtr : State Handler which was returned by _create
|
108 |
+
* - pCfg : static configuration parameters
|
109 |
+
*
|
110 |
+
* Output:
|
111 |
+
*
|
112 |
+
* Return value : 0 - Ok
|
113 |
+
* -1 - Error
|
114 |
+
*/
|
115 |
+
int AUP_Aed_memAllocate(void* stPtr, const Aed_StaticCfg* pCfg);
|
116 |
+
|
117 |
+
/****************************************************************************
|
118 |
+
* AUP_Aed_init(...)
|
119 |
+
*
|
120 |
+
* This function resets (initialize) the VAD module and gets it prepared for
|
121 |
+
* processing
|
122 |
+
*
|
123 |
+
* Input:
|
124 |
+
* - stPtr : State Handler which has gone through create and
|
125 |
+
* memAllocate
|
126 |
+
*
|
127 |
+
* Output:
|
128 |
+
*
|
129 |
+
* Return value : 0 - Ok
|
130 |
+
* -1 - Error
|
131 |
+
*/
|
132 |
+
int AUP_Aed_init(void* stPtr);
|
133 |
+
|
134 |
+
/****************************************************************************
|
135 |
+
* AUP_Aed_setDynamCfg(...)
|
136 |
+
*
|
137 |
+
* This function set dynamic (per-frame variable) configuration
|
138 |
+
*
|
139 |
+
* Input:
|
140 |
+
* - stPtr : State Handler which has gone through create and
|
141 |
+
* memAllocate
|
142 |
+
* - pCfg : configuration content
|
143 |
+
*
|
144 |
+
* Output:
|
145 |
+
*
|
146 |
+
* Return value : 0 - Ok
|
147 |
+
* -1 - Error
|
148 |
+
*/
|
149 |
+
int AUP_Aed_setDynamCfg(void* stPtr, const Aed_DynamCfg* pCfg);
|
150 |
+
|
151 |
+
/****************************************************************************
|
152 |
+
* AUP_Aed_getStaticCfg(...)
|
153 |
+
*
|
154 |
+
* This function get static configuration status from VAD module
|
155 |
+
*
|
156 |
+
* Input:
|
157 |
+
* - stPtr : State Handler which has gone through create and
|
158 |
+
* memAllocate
|
159 |
+
*
|
160 |
+
* Output:
|
161 |
+
* - pCfg : configuration content
|
162 |
+
*
|
163 |
+
* Return value : 0 - Ok
|
164 |
+
* -1 - Error
|
165 |
+
*/
|
166 |
+
int AUP_Aed_getStaticCfg(const void* stPtr, Aed_StaticCfg* pCfg);
|
167 |
+
|
168 |
+
/****************************************************************************
|
169 |
+
* AUP_Aed_getDynamCfg(...)
|
170 |
+
*
|
171 |
+
* This function get dynamic (per-frame variable) configuration status from
|
172 |
+
* VAD module
|
173 |
+
*
|
174 |
+
* Input:
|
175 |
+
* - stPtr : State Handler which has gone through create and
|
176 |
+
* memAllocate
|
177 |
+
*
|
178 |
+
* Output:
|
179 |
+
* - pCfg : configuration content
|
180 |
+
*
|
181 |
+
* Return value : 0 - Ok
|
182 |
+
* -1 - Error
|
183 |
+
*/
|
184 |
+
int AUP_Aed_getDynamCfg(const void* stPtr, Aed_DynamCfg* pCfg);
|
185 |
+
|
186 |
+
/****************************************************************************
|
187 |
+
* AUP_Aed_getAlgDelay(...)
|
188 |
+
*
|
189 |
+
* This function get algorithm delay from VAD module
|
190 |
+
*
|
191 |
+
* Input:
|
192 |
+
* - stPtr : State Handler which has gone through create and
|
193 |
+
* memAllocate
|
194 |
+
*
|
195 |
+
* Output:
|
196 |
+
* - delayInFrms : algorithm delay in terms of frames
|
197 |
+
*
|
198 |
+
* Return value : 0 - Ok
|
199 |
+
* -1 - Error
|
200 |
+
*/
|
201 |
+
int AUP_Aed_getAlgDelay(const void* stPtr, int* delayInFrms);
|
202 |
+
|
203 |
+
/****************************************************************************
|
204 |
+
* AUP_Aed_proc(...)
|
205 |
+
*
|
206 |
+
* process a single frame
|
207 |
+
*
|
208 |
+
* Input:
|
209 |
+
* - stPtr : State Handler which has gone through create and
|
210 |
+
* memAllocate and reset
|
211 |
+
* - pCtrl : per-frame variable control parameters
|
212 |
+
* - pIn : input data stream
|
213 |
+
*
|
214 |
+
* Output:
|
215 |
+
* - pOut : output data (mask, highband time-domain gain etc.)
|
216 |
+
*
|
217 |
+
* Return value : 0 - Ok
|
218 |
+
* -1 - Error
|
219 |
+
*/
|
220 |
+
int AUP_Aed_proc(void* stPtr, const Aed_InputData* pIn, Aed_OutputData* pOut);
|
221 |
+
|
222 |
+
#ifdef __cplusplus
|
223 |
+
}
|
224 |
+
#endif
|
225 |
+
|
226 |
+
#endif
|
src/aed_st.h
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#ifndef __AED_ST_H__
|
8 |
+
#define __AED_ST_H__
|
9 |
+
|
10 |
+
#include <stdio.h>
|
11 |
+
#include <onnxruntime_c_api.h>
|
12 |
+
|
13 |
+
#include "aed.h"
|
14 |
+
|
15 |
+
#define AUP_AED_FS (16000)
|
16 |
+
#define AUP_AED_MAX_IN_BUFF_SIZE (256)
|
17 |
+
#define AUP_AED_POWER_SPCTR_NORMALIZER (9.3132e-10f) // = 1/(32768^2)
|
18 |
+
#define AUP_AED_OUTPUT_SMOOTH_FILTER_LEN (10) // 160ms
|
19 |
+
|
20 |
+
#define AUP_AED_MEL_FILTER_BANK_NUM (40)
|
21 |
+
#define AUP_AED_LOOKAHEAD_NFRM (1)
|
22 |
+
#define AUP_AED_CONTEXT_WINDOW_LEN (3) // context window length of AIVAD
|
23 |
+
#define AUP_AED_FEA_LEN \
|
24 |
+
(AUP_AED_MEL_FILTER_BANK_NUM + 1) // feature length of AIVAD
|
25 |
+
|
26 |
+
#define AUP_AED_PITCH_EST_USE_LPC (1)
|
27 |
+
#define AUP_AED_PITCH_EST_PROCFS (4000)
|
28 |
+
#if AUP_AED_PITCH_EST_PROCFS == 2000
|
29 |
+
#define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.45f)
|
30 |
+
#else
|
31 |
+
#define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.4f)
|
32 |
+
#endif
|
33 |
+
|
34 |
+
#define AUP_AED_MODEL_IO_NUM (5)
|
35 |
+
#define AUP_AED_MODEL_NAME_LENGTH (32)
|
36 |
+
#define AUP_AED_MODEL_HIDDEN_DIM (64)
|
37 |
+
|
38 |
+
class AUP_MODULE_AIVAD {
|
39 |
+
public:
|
40 |
+
AUP_MODULE_AIVAD(char* onnx_path);
|
41 |
+
~AUP_MODULE_AIVAD();
|
42 |
+
int Process(float* input, float* output);
|
43 |
+
int Reset();
|
44 |
+
|
45 |
+
private:
|
46 |
+
const OrtApi* ort_api = NULL;
|
47 |
+
OrtAllocator* ort_allocator = NULL;
|
48 |
+
OrtEnv* ort_env = NULL;
|
49 |
+
OrtSession* ort_session = NULL;
|
50 |
+
int inited = 0;
|
51 |
+
int clear_hidden = 0;
|
52 |
+
|
53 |
+
char input_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
|
54 |
+
const char* input_names[AUP_AED_MODEL_IO_NUM] = {NULL};
|
55 |
+
float input_data_buf_0[AUP_AED_CONTEXT_WINDOW_LEN * AUP_AED_FEA_LEN] = {0};
|
56 |
+
float input_data_buf_1234[AUP_AED_MODEL_IO_NUM - 1]
|
57 |
+
[AUP_AED_MODEL_HIDDEN_DIM] = {0};
|
58 |
+
OrtValue* ort_input_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
|
59 |
+
|
60 |
+
char output_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
|
61 |
+
const char* output_names[AUP_AED_MODEL_IO_NUM] = {NULL};
|
62 |
+
OrtValue* ort_output_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
|
63 |
+
};
|
64 |
+
|
65 |
+
typedef struct Aed_St_ {
|
66 |
+
void* dynamMemPtr; // memory pointer holding the dynamic memory
|
67 |
+
size_t dynamMemSize; // size of the buffer *dynamMemPtr
|
68 |
+
|
69 |
+
Aed_StaticCfg stCfg;
|
70 |
+
|
71 |
+
Aed_DynamCfg dynamCfg;
|
72 |
+
|
73 |
+
// Internal Static Config Registers, which are generated from stCfg
|
74 |
+
size_t extFftSz; // externally decided FFT-Sz
|
75 |
+
size_t extHopSz; // externally decided FFT-Hop-Sz
|
76 |
+
size_t extNBins; // (FFTSz/2) + 1
|
77 |
+
size_t extWinSz; // externally decided FFT-Window-Sz
|
78 |
+
|
79 |
+
size_t intFftSz; // internal FFT Sz
|
80 |
+
size_t intHopSz; // internal Hop Sz
|
81 |
+
size_t intWinSz; // internal Window Sz
|
82 |
+
size_t intNBins; // internal NBins
|
83 |
+
const float* intAnalyWindowPtr; // internal analysis pointer
|
84 |
+
int intAnalyFlag; // whether to do internal analysis
|
85 |
+
// 0: directly use external spectrum
|
86 |
+
// 1: use external spectrum with interpolation / exterpolation
|
87 |
+
// 2: need to redo analysis based on input time-domain signal
|
88 |
+
size_t inputTimeFIFOLen; // length of input FIFO buffer
|
89 |
+
// if = 0: no need for input time-domain FIFO Queue
|
90 |
+
|
91 |
+
// Internal static config registers for pitch-est module
|
92 |
+
size_t feaSz;
|
93 |
+
size_t melFbSz;
|
94 |
+
size_t algDelay; // in terms of processing frames
|
95 |
+
size_t algCtxtSz;
|
96 |
+
size_t frmRmsBufLen; // frameRmsBuff: buffer-length of frameRmsBuff (FIFO)
|
97 |
+
|
98 |
+
// Internal dynamic Config Registers, which are generated from dynamCfg
|
99 |
+
size_t aivadResetFrmNum;
|
100 |
+
float voiceDecideThresh;
|
101 |
+
|
102 |
+
// SubModules
|
103 |
+
AUP_MODULE_AIVAD* aivadInf;
|
104 |
+
|
105 |
+
void* pitchEstStPtr; // pitch-estimation module handler
|
106 |
+
void* timeInAnalysis;
|
107 |
+
// state handler of STFT analysis module
|
108 |
+
|
109 |
+
// Variables
|
110 |
+
int aedProcFrmCnt; // counter of consecutive AI-VAD processed frames
|
111 |
+
int inputTimeFIFOIdx;
|
112 |
+
float* inputTimeFIFO; // [inputTimeFIFOLen]
|
113 |
+
// input fifo buffer of time-signal to adjust between extHopSz and intHopSz
|
114 |
+
float* inputEmphTimeFIFO; // [inputTimeFIFOLen]
|
115 |
+
float* aivadInputCmplxSptrm; // [intFftSz]
|
116 |
+
float* aivadInputBinPow; // [intNBins] // AIVAD input power spectrum
|
117 |
+
size_t aivadResetCnt;
|
118 |
+
float timeSignalPre;
|
119 |
+
float aivadScore;
|
120 |
+
float aivadScorePre;
|
121 |
+
|
122 |
+
float pitchFreq; // input audio pitch in Hz
|
123 |
+
float* frameRmsBuff; // [frmRmsBufLen], FIFO, to delay frmRms result so that
|
124 |
+
// it aligns with AIVAD result
|
125 |
+
float* aivadInputFeatStack; // [...] = [AUP_AED_CONTEXT_WINDOW_LEN *
|
126 |
+
// AUP_AED_FEA_LEN]
|
127 |
+
float* melFilterBankCoef; // [melFbSz][nBins]
|
128 |
+
size_t* melFilterBinBuff; // [melFbSz + 2]
|
129 |
+
float* inputFloatBuff; // [hopSz]
|
130 |
+
} Aed_St;
|
131 |
+
|
132 |
+
#endif
|
src/biquad.cc
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#include "biquad.h"
|
8 |
+
|
9 |
+
#include <math.h>
|
10 |
+
#include <stdlib.h>
|
11 |
+
#include <string.h>
|
12 |
+
#include <stdio.h>
|
13 |
+
|
14 |
+
#include "biquad_st.h"
|
15 |
+
|
16 |
+
#define AUP_BIQUAD_NUM_DUMP_FILES (20)
|
17 |
+
#define AUP_BIQUAD_DUMP_FILENAMES (200)
|
18 |
+
|
19 |
+
// ==========================================================================================
|
20 |
+
// internal tools
|
21 |
+
// ==========================================================================================
|
22 |
+
|
23 |
+
static int AUP_Biquad_checkStatCfg(const Biquad_StaticCfg* pCfg) {
|
24 |
+
int secIdx;
|
25 |
+
if (pCfg == NULL) {
|
26 |
+
return -1;
|
27 |
+
}
|
28 |
+
|
29 |
+
if (pCfg->maxNSample == 0 ||
|
30 |
+
pCfg->maxNSample > AGORA_UAP_BIQUAD_MAX_INPUT_LEN) {
|
31 |
+
return -1;
|
32 |
+
}
|
33 |
+
if (pCfg->nsect > AGORA_UAP_BIQUAD_MAX_SECTION) {
|
34 |
+
return -1;
|
35 |
+
}
|
36 |
+
|
37 |
+
// if external filter coefficients are required, we need to check the
|
38 |
+
// external filter coeff pointers' validness
|
39 |
+
if (pCfg->nsect > 0) {
|
40 |
+
for (secIdx = 0; secIdx < pCfg->nsect; secIdx++) {
|
41 |
+
if (pCfg->B[secIdx] == NULL || pCfg->A[secIdx] == NULL) {
|
42 |
+
return -1;
|
43 |
+
}
|
44 |
+
}
|
45 |
+
if (pCfg->G == NULL) {
|
46 |
+
return -1;
|
47 |
+
}
|
48 |
+
}
|
49 |
+
|
50 |
+
return 0;
|
51 |
+
}
|
52 |
+
|
53 |
+
static int AUP_Biquad_publishStaticCfg(Biquad_St* stHdl) {
|
54 |
+
const Biquad_StaticCfg* pStatCfg;
|
55 |
+
int idx;
|
56 |
+
|
57 |
+
if (stHdl == NULL) {
|
58 |
+
return -1;
|
59 |
+
}
|
60 |
+
pStatCfg = (const Biquad_StaticCfg*)(&(stHdl->stCfg));
|
61 |
+
|
62 |
+
stHdl->maxNSample = (int)pStatCfg->maxNSample;
|
63 |
+
|
64 |
+
// first, give default (all-pass-filter) values to filter coeffs
|
65 |
+
for (idx = 0; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
|
66 |
+
stHdl->BCoeff[idx][0] = 1.0f;
|
67 |
+
stHdl->BCoeff[idx][1] = 0;
|
68 |
+
stHdl->BCoeff[idx][2] = 0;
|
69 |
+
stHdl->ACoeff[idx][0] = 1.0f;
|
70 |
+
stHdl->ACoeff[idx][1] = 0;
|
71 |
+
stHdl->ACoeff[idx][2] = 0;
|
72 |
+
stHdl->GCoeff[idx] = 1.0f;
|
73 |
+
}
|
74 |
+
|
75 |
+
if (pStatCfg->nsect <= 0) {
|
76 |
+
stHdl->nsect = _BIQUAD_DC_REMOVAL_NSECT;
|
77 |
+
for (idx = 0; idx < stHdl->nsect; idx++) {
|
78 |
+
stHdl->BCoeff[idx][0] = _BIQUAD_DC_REMOVAL_B[idx][0];
|
79 |
+
stHdl->BCoeff[idx][1] = _BIQUAD_DC_REMOVAL_B[idx][1];
|
80 |
+
stHdl->BCoeff[idx][2] = _BIQUAD_DC_REMOVAL_B[idx][2];
|
81 |
+
stHdl->ACoeff[idx][0] = _BIQUAD_DC_REMOVAL_A[idx][0];
|
82 |
+
stHdl->ACoeff[idx][1] = _BIQUAD_DC_REMOVAL_A[idx][1];
|
83 |
+
stHdl->ACoeff[idx][2] = _BIQUAD_DC_REMOVAL_A[idx][2];
|
84 |
+
stHdl->GCoeff[idx] = _BIQUAD_DC_REMOVAL_G[idx];
|
85 |
+
}
|
86 |
+
} else {
|
87 |
+
stHdl->nsect = pStatCfg->nsect;
|
88 |
+
for (idx = 0; idx < stHdl->nsect; idx++) {
|
89 |
+
stHdl->BCoeff[idx][0] = pStatCfg->B[idx][0];
|
90 |
+
stHdl->BCoeff[idx][1] = pStatCfg->B[idx][1];
|
91 |
+
stHdl->BCoeff[idx][2] = pStatCfg->B[idx][2];
|
92 |
+
|
93 |
+
stHdl->ACoeff[idx][0] = pStatCfg->A[idx][0];
|
94 |
+
stHdl->ACoeff[idx][1] = pStatCfg->A[idx][1];
|
95 |
+
stHdl->ACoeff[idx][2] = pStatCfg->A[idx][2];
|
96 |
+
|
97 |
+
stHdl->GCoeff[idx] = pStatCfg->G[idx];
|
98 |
+
}
|
99 |
+
}
|
100 |
+
|
101 |
+
return 0;
|
102 |
+
}
|
103 |
+
|
104 |
+
static int AUP_Biquad_resetVariables(Biquad_St* stHdl) {
|
105 |
+
memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
|
106 |
+
memset(stHdl->sectW, 0, sizeof(stHdl->sectW));
|
107 |
+
|
108 |
+
return 0;
|
109 |
+
}
|
110 |
+
|
111 |
+
// ==========================================================================================
|
112 |
+
// public APIS
|
113 |
+
// ==========================================================================================
|
114 |
+
|
115 |
+
int AUP_Biquad_create(void** stPtr) {
|
116 |
+
Biquad_St* tmpPtr;
|
117 |
+
|
118 |
+
if (stPtr == NULL) {
|
119 |
+
return -1;
|
120 |
+
}
|
121 |
+
*stPtr = (void*)malloc(sizeof(Biquad_St));
|
122 |
+
if (*stPtr == NULL) {
|
123 |
+
return -1;
|
124 |
+
}
|
125 |
+
memset(*stPtr, 0, sizeof(Biquad_St));
|
126 |
+
|
127 |
+
tmpPtr = (Biquad_St*)(*stPtr);
|
128 |
+
|
129 |
+
tmpPtr->dynamMemPtr = NULL;
|
130 |
+
tmpPtr->dynamMemSize = 0;
|
131 |
+
|
132 |
+
tmpPtr->stCfg.maxNSample = 768;
|
133 |
+
tmpPtr->stCfg.nsect = 0;
|
134 |
+
for (int idx = 0; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
|
135 |
+
tmpPtr->stCfg.A[idx] = NULL;
|
136 |
+
tmpPtr->stCfg.B[idx] = NULL;
|
137 |
+
}
|
138 |
+
tmpPtr->stCfg.G = NULL;
|
139 |
+
|
140 |
+
return 0;
|
141 |
+
}
|
142 |
+
|
143 |
+
int AUP_Biquad_destroy(void** stPtr) {
|
144 |
+
Biquad_St* stHdl;
|
145 |
+
|
146 |
+
if (stPtr == NULL) {
|
147 |
+
return 0;
|
148 |
+
}
|
149 |
+
|
150 |
+
stHdl = (Biquad_St*)(*stPtr);
|
151 |
+
if (stHdl == NULL) {
|
152 |
+
return 0;
|
153 |
+
}
|
154 |
+
|
155 |
+
if (stHdl->dynamMemPtr != NULL) {
|
156 |
+
free(stHdl->dynamMemPtr);
|
157 |
+
}
|
158 |
+
stHdl->dynamMemPtr = NULL;
|
159 |
+
|
160 |
+
free(stHdl);
|
161 |
+
|
162 |
+
(*stPtr) = NULL;
|
163 |
+
|
164 |
+
return 0;
|
165 |
+
}
|
166 |
+
|
167 |
+
int AUP_Biquad_memAllocate(void* stPtr, const Biquad_StaticCfg* pCfg) {
|
168 |
+
Biquad_St* stHdl = NULL;
|
169 |
+
char* memPtr = NULL;
|
170 |
+
int maxNSample, nsect, idx;
|
171 |
+
|
172 |
+
int inputTempBufMemSize = 0;
|
173 |
+
int sectOutputBufMemSize_EACH = 0;
|
174 |
+
int totalMemSize = 0;
|
175 |
+
|
176 |
+
if (stPtr == NULL || pCfg == NULL) {
|
177 |
+
return -1;
|
178 |
+
}
|
179 |
+
stHdl = (Biquad_St*)(stPtr);
|
180 |
+
|
181 |
+
if (AUP_Biquad_checkStatCfg(pCfg) < 0) {
|
182 |
+
return -1;
|
183 |
+
}
|
184 |
+
memcpy(&(stHdl->stCfg), pCfg, sizeof(Biquad_StaticCfg));
|
185 |
+
|
186 |
+
if (AUP_Biquad_publishStaticCfg(stHdl) < 0) {
|
187 |
+
return -1;
|
188 |
+
}
|
189 |
+
maxNSample = stHdl->maxNSample;
|
190 |
+
nsect = stHdl->nsect;
|
191 |
+
|
192 |
+
// check memory requirement
|
193 |
+
inputTempBufMemSize = AGORA_UAP_BIQUAD_ALIGN8(sizeof(float) * maxNSample);
|
194 |
+
totalMemSize += inputTempBufMemSize;
|
195 |
+
|
196 |
+
sectOutputBufMemSize_EACH =
|
197 |
+
AGORA_UAP_BIQUAD_ALIGN8(sizeof(float) * maxNSample);
|
198 |
+
totalMemSize += sectOutputBufMemSize_EACH * nsect;
|
199 |
+
|
200 |
+
// allocate dynamic memory
|
201 |
+
if ((size_t)totalMemSize > stHdl->dynamMemSize) {
|
202 |
+
if (stHdl->dynamMemPtr != NULL) {
|
203 |
+
free(stHdl->dynamMemPtr);
|
204 |
+
stHdl->dynamMemSize = 0;
|
205 |
+
}
|
206 |
+
stHdl->dynamMemPtr = malloc(totalMemSize);
|
207 |
+
if (stHdl->dynamMemPtr == NULL) {
|
208 |
+
return -1;
|
209 |
+
}
|
210 |
+
stHdl->dynamMemSize = totalMemSize;
|
211 |
+
}
|
212 |
+
memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
|
213 |
+
|
214 |
+
// setup the pointers/variable
|
215 |
+
memPtr = (char*)(stHdl->dynamMemPtr);
|
216 |
+
|
217 |
+
stHdl->inputTempBuf = (float*)memPtr;
|
218 |
+
memPtr += inputTempBufMemSize;
|
219 |
+
|
220 |
+
for (idx = 0; idx < nsect; idx++) {
|
221 |
+
stHdl->sectOutputBuf[idx] = (float*)memPtr;
|
222 |
+
memPtr += sectOutputBufMemSize_EACH;
|
223 |
+
}
|
224 |
+
for (; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
|
225 |
+
stHdl->sectOutputBuf[idx] = NULL;
|
226 |
+
}
|
227 |
+
|
228 |
+
if (((int)(memPtr - (char*)(stHdl->dynamMemPtr))) > totalMemSize) {
|
229 |
+
return -1;
|
230 |
+
}
|
231 |
+
|
232 |
+
return 0;
|
233 |
+
}
|
234 |
+
|
235 |
+
int AUP_Biquad_init(void* stPtr) {
|
236 |
+
Biquad_St* stHdl;
|
237 |
+
|
238 |
+
if (stPtr == NULL) {
|
239 |
+
return -1;
|
240 |
+
}
|
241 |
+
stHdl = (Biquad_St*)(stPtr);
|
242 |
+
|
243 |
+
if (AUP_Biquad_resetVariables(stHdl) < 0) {
|
244 |
+
return -1;
|
245 |
+
}
|
246 |
+
|
247 |
+
return 0;
|
248 |
+
}
|
249 |
+
|
250 |
+
int AUP_Biquad_getStaticCfg(const void* stPtr, Biquad_StaticCfg* pCfg) {
|
251 |
+
const Biquad_St* stHdl;
|
252 |
+
|
253 |
+
if (stPtr == NULL || pCfg == NULL) {
|
254 |
+
return -1;
|
255 |
+
}
|
256 |
+
stHdl = (const Biquad_St*)(stPtr);
|
257 |
+
|
258 |
+
memcpy(pCfg, &(stHdl->stCfg), sizeof(Biquad_StaticCfg));
|
259 |
+
|
260 |
+
return 0;
|
261 |
+
}
|
262 |
+
|
263 |
+
int AUP_Biquad_getAlgDelay(const void* stPtr, int* delayInSamples) {
|
264 |
+
const Biquad_St* stHdl;
|
265 |
+
|
266 |
+
if (stPtr == NULL || delayInSamples == NULL) {
|
267 |
+
return -1;
|
268 |
+
}
|
269 |
+
stHdl = (const Biquad_St*)(stPtr);
|
270 |
+
|
271 |
+
*delayInSamples = stHdl->nsect;
|
272 |
+
|
273 |
+
return 0;
|
274 |
+
}
|
275 |
+
|
276 |
+
int AUP_Biquad_proc(void* stPtr, const Biquad_InputData* pIn,
|
277 |
+
Biquad_OutputData* pOut) {
|
278 |
+
Biquad_St* stHdl = NULL;
|
279 |
+
int isFloatIO = 0;
|
280 |
+
int inputNSamples, nSect;
|
281 |
+
int sectIdx, smplIdx;
|
282 |
+
float tmp1;
|
283 |
+
const short* pShortTemp;
|
284 |
+
float* src;
|
285 |
+
float* tgt;
|
286 |
+
|
287 |
+
if (stPtr == NULL || pIn == NULL || pOut == NULL) { // pCtrl == NULL
|
288 |
+
return -1;
|
289 |
+
}
|
290 |
+
if (pIn->samplesPtr == NULL || pOut->outputBuff == NULL) {
|
291 |
+
return -1;
|
292 |
+
}
|
293 |
+
|
294 |
+
stHdl = (Biquad_St*)(stPtr);
|
295 |
+
|
296 |
+
if (((int)pIn->nsamples) > stHdl->maxNSample) {
|
297 |
+
return -1;
|
298 |
+
}
|
299 |
+
|
300 |
+
isFloatIO = 0;
|
301 |
+
if (pIn->sampleType != 0) {
|
302 |
+
isFloatIO = 1;
|
303 |
+
}
|
304 |
+
|
305 |
+
inputNSamples = (int)pIn->nsamples;
|
306 |
+
nSect = stHdl->nsect;
|
307 |
+
|
308 |
+
// special handle for input
|
309 |
+
if (isFloatIO == 0) {
|
310 |
+
pShortTemp = (const short*)pIn->samplesPtr;
|
311 |
+
for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
|
312 |
+
stHdl->inputTempBuf[smplIdx] = (float)pShortTemp[smplIdx];
|
313 |
+
}
|
314 |
+
} else {
|
315 |
+
memcpy(stHdl->inputTempBuf, (const float*)pIn->samplesPtr,
|
316 |
+
sizeof(float) * inputNSamples);
|
317 |
+
}
|
318 |
+
|
319 |
+
for (sectIdx = 0; sectIdx < nSect; sectIdx++) {
|
320 |
+
if (sectIdx == 0) {
|
321 |
+
src = stHdl->inputTempBuf;
|
322 |
+
} else {
|
323 |
+
src = stHdl->sectOutputBuf[sectIdx - 1];
|
324 |
+
}
|
325 |
+
tgt = stHdl->sectOutputBuf[sectIdx];
|
326 |
+
|
327 |
+
for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
|
328 |
+
tmp1 = src[smplIdx] -
|
329 |
+
stHdl->ACoeff[sectIdx][1] * stHdl->sectW[sectIdx][0] -
|
330 |
+
stHdl->ACoeff[sectIdx][2] * stHdl->sectW[sectIdx][1];
|
331 |
+
|
332 |
+
tgt[smplIdx] = stHdl->GCoeff[sectIdx] *
|
333 |
+
(stHdl->BCoeff[sectIdx][0] * tmp1 +
|
334 |
+
stHdl->BCoeff[sectIdx][1] * stHdl->sectW[sectIdx][0] +
|
335 |
+
stHdl->BCoeff[sectIdx][2] * stHdl->sectW[sectIdx][1]);
|
336 |
+
|
337 |
+
stHdl->sectW[sectIdx][1] = stHdl->sectW[sectIdx][0];
|
338 |
+
stHdl->sectW[sectIdx][0] = tmp1;
|
339 |
+
}
|
340 |
+
}
|
341 |
+
|
342 |
+
// prepare output buffer
|
343 |
+
if (isFloatIO == 0) {
|
344 |
+
for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
|
345 |
+
((short*)pOut->outputBuff)[smplIdx] =
|
346 |
+
(short)_BIQUAD_FLOAT2SHORT(stHdl->sectOutputBuf[nSect - 1][smplIdx]);
|
347 |
+
}
|
348 |
+
} else {
|
349 |
+
memcpy(pOut->outputBuff, stHdl->sectOutputBuf[nSect - 1],
|
350 |
+
sizeof(float) * inputNSamples);
|
351 |
+
}
|
352 |
+
|
353 |
+
return 0;
|
354 |
+
}
|
src/biquad.h
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#ifndef __BIQUAD_H__
|
8 |
+
#define __BIQUAD_H__
|
9 |
+
|
10 |
+
#include <stdio.h>
|
11 |
+
|
12 |
+
#define AGORA_UAP_BIQUAD_MAX_SECTION (20)
|
13 |
+
// the max. number of sections supported by this Biquad module
|
14 |
+
|
15 |
+
#define AGORA_UAP_BIQUAD_MAX_INPUT_LEN (3840)
|
16 |
+
// max. number of samples each time can be fed in
|
17 |
+
|
18 |
+
#define AGORA_UAP_BIQUAD_ALIGN8(o) (((o) + 7) & (~7))
|
19 |
+
#define _BIQUAD_FLOAT2SHORT(x) \
|
20 |
+
((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : (short)floor(.5 + (x))))
|
21 |
+
|
22 |
+
#define _BIQUAD_DC_REMOVAL_NSECT (2)
|
23 |
+
const float _BIQUAD_DC_REMOVAL_B[_BIQUAD_DC_REMOVAL_NSECT][3] = {
|
24 |
+
{1.0f, -2.0f, 1.0f}, {1.0f, -1.0f, 0.0f}};
|
25 |
+
const float _BIQUAD_DC_REMOVAL_A[_BIQUAD_DC_REMOVAL_NSECT][3] = {
|
26 |
+
{1.0f, -1.93944294f, 0.94281253f}, {1.0f, -0.94276431f, 0.0f}};
|
27 |
+
// const float _BIQUAD_DC_REMOVAL_G[_BIQUAD_DC_REMOVAL_NSECT] = {0.97056387f,
|
28 |
+
// 0.97138215f};
|
29 |
+
const float _BIQUAD_DC_REMOVAL_G[_BIQUAD_DC_REMOVAL_NSECT] = {0.97056387f,
|
30 |
+
0.8655014957f};
|
31 |
+
|
32 |
+
// Configuration Parameters, which impacts dynamic memory occupation, can only
|
33 |
+
// be set during allocation
|
34 |
+
typedef struct Biquad_StaticCfg_ {
|
35 |
+
size_t maxNSample; // max. number of samples each time can be fed in
|
36 |
+
// (0, AGORA_UAP_BIQUAD_MAX_INPUT_LEN]
|
37 |
+
|
38 |
+
int nsect; // the number of sections to be processed by this Biquad module
|
39 |
+
// (-inf, AGORA_UAP_BIQUAD_MAX_SECTION]
|
40 |
+
// if <= 0, use internal default filter coefficients
|
41 |
+
|
42 |
+
const float* B[AGORA_UAP_BIQUAD_MAX_SECTION];
|
43 |
+
const float* A[AGORA_UAP_BIQUAD_MAX_SECTION];
|
44 |
+
// always assume A[...][0] = 1.0f
|
45 |
+
const float* G;
|
46 |
+
} Biquad_StaticCfg;
|
47 |
+
|
48 |
+
typedef struct Biquad_InputData_ {
|
49 |
+
const void*
|
50 |
+
samplesPtr; // externally provided buffer containing input time samples
|
51 |
+
// either in short or float type
|
52 |
+
short sampleType; // = 0: samplesPtr = short*; o.w. samplesPtr = float*
|
53 |
+
size_t nsamples; // number of samples fed in this time
|
54 |
+
} Biquad_InputData;
|
55 |
+
|
56 |
+
typedef struct Biquad_OutputData_ {
|
57 |
+
void* outputBuff; // externally provided output buffer,
|
58 |
+
// assumed to be of enough size nsamples *
|
59 |
+
// sizeof(short)/sizeof(short) output data type is the same
|
60 |
+
// as input
|
61 |
+
} Biquad_OutputData;
|
62 |
+
|
63 |
+
#ifdef __cplusplus
|
64 |
+
extern "C" {
|
65 |
+
#endif
|
66 |
+
|
67 |
+
/****************************************************************************
|
68 |
+
* AUP_Biquad_create(...)
|
69 |
+
*
|
70 |
+
* This function creats a state handler from nothing, which is NOT ready for
|
71 |
+
* processing
|
72 |
+
*
|
73 |
+
* Input:
|
74 |
+
*
|
75 |
+
* Output:
|
76 |
+
* - stPtr : buffer to store the returned state handler
|
77 |
+
*
|
78 |
+
* Return value : 0 - Ok
|
79 |
+
* -1 - Error
|
80 |
+
*/
|
81 |
+
int AUP_Biquad_create(void** stPtr);
|
82 |
+
|
83 |
+
/****************************************************************************
|
84 |
+
* AUP_Biquad_destroy(...)
|
85 |
+
*
|
86 |
+
* destroy biquad instance, and releasing all the dynamically allocated memory
|
87 |
+
*
|
88 |
+
* Input:
|
89 |
+
* - stPtr : buffer of State Handler, after this method, this
|
90 |
+
* handler won't be usable anymore
|
91 |
+
*
|
92 |
+
* Output:
|
93 |
+
*
|
94 |
+
* Return value : 0 - Ok
|
95 |
+
* -1 - Error
|
96 |
+
*/
|
97 |
+
int AUP_Biquad_destroy(void** stPtr);
|
98 |
+
|
99 |
+
/****************************************************************************
|
100 |
+
* AUP_Biquad_memAllocate(...)
|
101 |
+
*
|
102 |
+
* This function sets Static Config params and does memory allocation
|
103 |
+
* operation
|
104 |
+
*
|
105 |
+
* Input:
|
106 |
+
* - stPtr : State Handler which was returned by _create
|
107 |
+
* - pCfg : static configuration parameters
|
108 |
+
*
|
109 |
+
* Output:
|
110 |
+
*
|
111 |
+
* Return value : 0 - Ok
|
112 |
+
* -1 - Error
|
113 |
+
*/
|
114 |
+
int AUP_Biquad_memAllocate(void* stPtr, const Biquad_StaticCfg* pCfg);
|
115 |
+
|
116 |
+
/****************************************************************************
|
117 |
+
* AUP_Biquad_init(...)
|
118 |
+
*
|
119 |
+
* This function resets (initialize) the biquad module and gets it prepared for
|
120 |
+
* processing
|
121 |
+
*
|
122 |
+
* Input:
|
123 |
+
* - stPtr : State Handler which has gone through create and
|
124 |
+
* memAllocate
|
125 |
+
*
|
126 |
+
* Output:
|
127 |
+
*
|
128 |
+
* Return value : 0 - Ok
|
129 |
+
* -1 - Error
|
130 |
+
*/
|
131 |
+
int AUP_Biquad_init(void* stPtr);
|
132 |
+
|
133 |
+
/****************************************************************************
|
134 |
+
* AUP_Biquad_getStaticCfg(...)
|
135 |
+
*
|
136 |
+
* This function get static configuration status from Biquad module
|
137 |
+
*
|
138 |
+
* Input:
|
139 |
+
* - stPtr : State Handler which has gone through create and
|
140 |
+
* memAllocate
|
141 |
+
*
|
142 |
+
* Output:
|
143 |
+
* - pCfg : configuration content
|
144 |
+
*
|
145 |
+
* Return value : 0 - Ok
|
146 |
+
* -1 - Error
|
147 |
+
*/
|
148 |
+
int AUP_Biquad_getStaticCfg(const void* stPtr, Biquad_StaticCfg* pCfg);
|
149 |
+
|
150 |
+
/****************************************************************************
|
151 |
+
* AUP_Biquad_getAlgDelay(...)
|
152 |
+
*
|
153 |
+
* This function get algorithm delay from biquad module
|
154 |
+
*
|
155 |
+
* Input:
|
156 |
+
* - stPtr : State Handler which has gone through create and
|
157 |
+
* memAllocate
|
158 |
+
*
|
159 |
+
* Output:
|
160 |
+
* - delayInSamples : algorithm delay in terms of samples
|
161 |
+
*
|
162 |
+
* Return value : 0 - Ok
|
163 |
+
* -1 - Error
|
164 |
+
*/
|
165 |
+
int AUP_Biquad_getAlgDelay(const void* stPtr, int* delayInSamples);
|
166 |
+
|
167 |
+
/****************************************************************************
|
168 |
+
* AUP_Biquad_proc(...)
|
169 |
+
*
|
170 |
+
* process a single frame
|
171 |
+
*
|
172 |
+
* Input:
|
173 |
+
* - stPtr : State Handler which has gone through create and
|
174 |
+
* memAllocate
|
175 |
+
* - pCtrl : per-frame variable control parameters
|
176 |
+
* - pIn : input data stream
|
177 |
+
*
|
178 |
+
* Output:
|
179 |
+
* - pOut : output data (mask, highband time-domain gain etc.)
|
180 |
+
*
|
181 |
+
* Return value : 0 - Ok
|
182 |
+
* -1 - Error
|
183 |
+
*/
|
184 |
+
int AUP_Biquad_proc(void* stPtr, const Biquad_InputData* pIn,
|
185 |
+
Biquad_OutputData* pOut);
|
186 |
+
|
187 |
+
#ifdef __cplusplus
|
188 |
+
}
|
189 |
+
#endif
|
190 |
+
#endif // __BIQUAD_H__
|
src/biquad_st.h
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#ifndef __BIQUAD_ST_H__
|
8 |
+
#define __BIQUAD_ST_H__
|
9 |
+
|
10 |
+
#include <stdio.h>
|
11 |
+
#include "biquad.h"
|
12 |
+
|
13 |
+
typedef struct Biquad_St_ {
|
14 |
+
void* dynamMemPtr; // memory pointer holding the dynamic memory
|
15 |
+
size_t dynamMemSize; // size of the buffer *dynamMemPtr
|
16 |
+
|
17 |
+
// Static Configuration
|
18 |
+
Biquad_StaticCfg stCfg;
|
19 |
+
|
20 |
+
// ---------------------------------------------------------------
|
21 |
+
// Internal Static Config Registers, which are generated from stCfg
|
22 |
+
int maxNSample;
|
23 |
+
int nsect;
|
24 |
+
float BCoeff[AGORA_UAP_BIQUAD_MAX_SECTION][3];
|
25 |
+
float ACoeff[AGORA_UAP_BIQUAD_MAX_SECTION][3];
|
26 |
+
float GCoeff[AGORA_UAP_BIQUAD_MAX_SECTION]; // gain for each section
|
27 |
+
|
28 |
+
// Variables
|
29 |
+
float* inputTempBuf; // [maxNSample]
|
30 |
+
float sectW[AGORA_UAP_BIQUAD_MAX_SECTION][2];
|
31 |
+
// each section's register
|
32 |
+
float* sectOutputBuf
|
33 |
+
[AGORA_UAP_BIQUAD_MAX_SECTION]; //[AGORA_UAP_BIQUAD_MAX_SECTION][maxNSample]
|
34 |
+
// each section's output buffer
|
35 |
+
} Biquad_St;
|
36 |
+
|
37 |
+
#endif // __BIQUAD_ST_H__
|
src/coeff.h
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#ifndef __COEFF_H__
|
8 |
+
#define __COEFF_H__
|
9 |
+
|
10 |
+
#include "aed_st.h"
|
11 |
+
|
12 |
+
#define AUP_AED_MEAN_STD_NBINS AUP_AED_FEA_LEN
|
13 |
+
|
14 |
+
#define AUP_AED_ASSUMED_HOPSZ (256)
|
15 |
+
#define AUP_AED_ASSUMED_WINDOWSZ (768)
|
16 |
+
#define AUP_AED_ASSUMED_FFTSZ (1024)
|
17 |
+
|
18 |
+
// means of inpu-mel-filterbank
|
19 |
+
const float AUP_AED_FEATURE_MEANS[AUP_AED_MEAN_STD_NBINS] = {
|
20 |
+
-8.198236465454e+00f, -6.265716552734e+00f, -5.483818531036e+00f,
|
21 |
+
-4.758691310883e+00f, -4.417088985443e+00f, -4.142892837524e+00f,
|
22 |
+
-3.912850379944e+00f, -3.845927953720e+00f, -3.657090425491e+00f,
|
23 |
+
-3.723418712616e+00f, -3.876134157181e+00f, -3.843890905380e+00f,
|
24 |
+
-3.690405130386e+00f, -3.756065845490e+00f, -3.698696136475e+00f,
|
25 |
+
-3.650463104248e+00f, -3.700468778610e+00f, -3.567321300507e+00f,
|
26 |
+
-3.498900175095e+00f, -3.477807044983e+00f, -3.458816051483e+00f,
|
27 |
+
-3.444923877716e+00f, -3.401328563690e+00f, -3.306261301041e+00f,
|
28 |
+
-3.278556823730e+00f, -3.233250856400e+00f, -3.198616027832e+00f,
|
29 |
+
-3.204526424408e+00f, -3.208798646927e+00f, -3.257838010788e+00f,
|
30 |
+
-3.381376743317e+00f, -3.534021377563e+00f, -3.640867948532e+00f,
|
31 |
+
-3.726858854294e+00f, -3.773730993271e+00f, -3.804667234421e+00f,
|
32 |
+
-3.832901000977e+00f, -3.871120452881e+00f, -3.990592956543e+00f,
|
33 |
+
-4.480289459229e+00f, 9.235690307617e+01f};
|
34 |
+
|
35 |
+
// stds of input-mel-filterbank
|
36 |
+
const float AUP_AED_FEATURE_STDS[AUP_AED_MEAN_STD_NBINS] = {
|
37 |
+
5.166063785553e+00f, 4.977209568024e+00f, 4.698895931244e+00f,
|
38 |
+
4.630621433258e+00f, 4.634347915649e+00f, 4.641156196594e+00f,
|
39 |
+
4.640676498413e+00f, 4.666367053986e+00f, 4.650534629822e+00f,
|
40 |
+
4.640020847321e+00f, 4.637400150299e+00f, 4.620099067688e+00f,
|
41 |
+
4.596316337585e+00f, 4.562654972076e+00f, 4.554360389709e+00f,
|
42 |
+
4.566910743713e+00f, 4.562489986420e+00f, 4.562412738800e+00f,
|
43 |
+
4.585299491882e+00f, 4.600179672241e+00f, 4.592845916748e+00f,
|
44 |
+
4.585922718048e+00f, 4.583496570587e+00f, 4.626092910767e+00f,
|
45 |
+
4.626957893372e+00f, 4.626289367676e+00f, 4.637005805969e+00f,
|
46 |
+
4.683015823364e+00f, 4.726813793182e+00f, 4.734289646149e+00f,
|
47 |
+
4.753227233887e+00f, 4.849722862244e+00f, 4.869434833527e+00f,
|
48 |
+
4.884482860565e+00f, 4.921327114105e+00f, 4.959212303162e+00f,
|
49 |
+
4.996619224548e+00f, 5.044823646545e+00f, 5.072216987610e+00f,
|
50 |
+
5.096439361572e+00f, 1.152136917114e+02f};
|
51 |
+
|
52 |
+
const float AUP_AED_STFTWindow_Hann768[768] = {
|
53 |
+
0.0000000e+00f, 1.6733041e-05f, 6.6931045e-05f, 1.5059065e-04f,
|
54 |
+
2.6770626e-04f, 4.1827004e-04f, 6.0227190e-04f, 8.1969953e-04f,
|
55 |
+
1.0705384e-03f, 1.3547717e-03f, 1.6723803e-03f, 2.0233432e-03f,
|
56 |
+
2.4076367e-03f, 2.8252351e-03f, 3.2761105e-03f, 3.7602327e-03f,
|
57 |
+
4.2775693e-03f, 4.8280857e-03f, 5.4117450e-03f, 6.0285082e-03f,
|
58 |
+
6.6783340e-03f, 7.3611788e-03f, 8.0769970e-03f, 8.8257407e-03f,
|
59 |
+
9.6073598e-03f, 1.0421802e-02f, 1.1269013e-02f, 1.2148935e-02f,
|
60 |
+
1.3061510e-02f, 1.4006678e-02f, 1.4984373e-02f, 1.5994532e-02f,
|
61 |
+
1.7037087e-02f, 1.8111967e-02f, 1.9219101e-02f, 2.0358415e-02f,
|
62 |
+
2.1529832e-02f, 2.2733274e-02f, 2.3968661e-02f, 2.5235910e-02f,
|
63 |
+
2.6534935e-02f, 2.7865651e-02f, 2.9227967e-02f, 3.0621794e-02f,
|
64 |
+
3.2047037e-02f, 3.3503601e-02f, 3.4991388e-02f, 3.6510300e-02f,
|
65 |
+
3.8060234e-02f, 3.9641086e-02f, 4.1252752e-02f, 4.2895122e-02f,
|
66 |
+
4.4568088e-02f, 4.6271536e-02f, 4.8005353e-02f, 4.9769424e-02f,
|
67 |
+
5.1563629e-02f, 5.3387849e-02f, 5.5241962e-02f, 5.7125844e-02f,
|
68 |
+
5.9039368e-02f, 6.0982406e-02f, 6.2954829e-02f, 6.4956504e-02f,
|
69 |
+
6.6987298e-02f, 6.9047074e-02f, 7.1135695e-02f, 7.3253021e-02f,
|
70 |
+
7.5398909e-02f, 7.7573217e-02f, 7.9775799e-02f, 8.2006508e-02f,
|
71 |
+
8.4265194e-02f, 8.6551706e-02f, 8.8865891e-02f, 9.1207593e-02f,
|
72 |
+
9.3576658e-02f, 9.5972925e-02f, 9.8396234e-02f, 1.0084642e-01f,
|
73 |
+
1.0332333e-01f, 1.0582679e-01f, 1.0835663e-01f, 1.1091268e-01f,
|
74 |
+
1.1349477e-01f, 1.1610274e-01f, 1.1873640e-01f, 1.2139558e-01f,
|
75 |
+
1.2408010e-01f, 1.2678978e-01f, 1.2952444e-01f, 1.3228389e-01f,
|
76 |
+
1.3506796e-01f, 1.3787646e-01f, 1.4070919e-01f, 1.4356597e-01f,
|
77 |
+
1.4644661e-01f, 1.4935091e-01f, 1.5227868e-01f, 1.5522973e-01f,
|
78 |
+
1.5820385e-01f, 1.6120085e-01f, 1.6422052e-01f, 1.6726267e-01f,
|
79 |
+
1.7032709e-01f, 1.7341358e-01f, 1.7652192e-01f, 1.7965192e-01f,
|
80 |
+
1.8280336e-01f, 1.8597603e-01f, 1.8916971e-01f, 1.9238420e-01f,
|
81 |
+
1.9561929e-01f, 1.9887474e-01f, 2.0215035e-01f, 2.0544589e-01f,
|
82 |
+
2.0876115e-01f, 2.1209590e-01f, 2.1544993e-01f, 2.1882300e-01f,
|
83 |
+
2.2221488e-01f, 2.2562536e-01f, 2.2905421e-01f, 2.3250119e-01f,
|
84 |
+
2.3596607e-01f, 2.3944863e-01f, 2.4294863e-01f, 2.4646583e-01f,
|
85 |
+
2.5000000e-01f, 2.5355090e-01f, 2.5711830e-01f, 2.6070196e-01f,
|
86 |
+
2.6430163e-01f, 2.6791708e-01f, 2.7154806e-01f, 2.7519434e-01f,
|
87 |
+
2.7885565e-01f, 2.8253178e-01f, 2.8622245e-01f, 2.8992744e-01f,
|
88 |
+
2.9364649e-01f, 2.9737934e-01f, 3.0112576e-01f, 3.0488549e-01f,
|
89 |
+
3.0865828e-01f, 3.1244388e-01f, 3.1624203e-01f, 3.2005248e-01f,
|
90 |
+
3.2387498e-01f, 3.2770926e-01f, 3.3155507e-01f, 3.3541216e-01f,
|
91 |
+
3.3928027e-01f, 3.4315913e-01f, 3.4704849e-01f, 3.5094809e-01f,
|
92 |
+
3.5485766e-01f, 3.5877695e-01f, 3.6270569e-01f, 3.6664362e-01f,
|
93 |
+
3.7059048e-01f, 3.7454600e-01f, 3.7850991e-01f, 3.8248196e-01f,
|
94 |
+
3.8646187e-01f, 3.9044938e-01f, 3.9444422e-01f, 3.9844613e-01f,
|
95 |
+
4.0245484e-01f, 4.0647007e-01f, 4.1049157e-01f, 4.1451906e-01f,
|
96 |
+
4.1855226e-01f, 4.2259092e-01f, 4.2663476e-01f, 4.3068351e-01f,
|
97 |
+
4.3473690e-01f, 4.3879466e-01f, 4.4285652e-01f, 4.4692220e-01f,
|
98 |
+
4.5099143e-01f, 4.5506394e-01f, 4.5913946e-01f, 4.6321772e-01f,
|
99 |
+
4.6729844e-01f, 4.7138134e-01f, 4.7546616e-01f, 4.7955263e-01f,
|
100 |
+
4.8364046e-01f, 4.8772939e-01f, 4.9181913e-01f, 4.9590943e-01f,
|
101 |
+
5.0000000e-01f, 5.0409057e-01f, 5.0818087e-01f, 5.1227061e-01f,
|
102 |
+
5.1635954e-01f, 5.2044737e-01f, 5.2453384e-01f, 5.2861866e-01f,
|
103 |
+
5.3270156e-01f, 5.3678228e-01f, 5.4086054e-01f, 5.4493606e-01f,
|
104 |
+
5.4900857e-01f, 5.5307780e-01f, 5.5714348e-01f, 5.6120534e-01f,
|
105 |
+
5.6526310e-01f, 5.6931649e-01f, 5.7336524e-01f, 5.7740908e-01f,
|
106 |
+
5.8144774e-01f, 5.8548094e-01f, 5.8950843e-01f, 5.9352993e-01f,
|
107 |
+
5.9754516e-01f, 6.0155387e-01f, 6.0555578e-01f, 6.0955062e-01f,
|
108 |
+
6.1353813e-01f, 6.1751804e-01f, 6.2149009e-01f, 6.2545400e-01f,
|
109 |
+
6.2940952e-01f, 6.3335638e-01f, 6.3729431e-01f, 6.4122305e-01f,
|
110 |
+
6.4514234e-01f, 6.4905191e-01f, 6.5295151e-01f, 6.5684087e-01f,
|
111 |
+
6.6071973e-01f, 6.6458784e-01f, 6.6844493e-01f, 6.7229074e-01f,
|
112 |
+
6.7612502e-01f, 6.7994752e-01f, 6.8375797e-01f, 6.8755612e-01f,
|
113 |
+
6.9134172e-01f, 6.9511451e-01f, 6.9887424e-01f, 7.0262066e-01f,
|
114 |
+
7.0635351e-01f, 7.1007256e-01f, 7.1377755e-01f, 7.1746822e-01f,
|
115 |
+
7.2114435e-01f, 7.2480566e-01f, 7.2845194e-01f, 7.3208292e-01f,
|
116 |
+
7.3569837e-01f, 7.3929804e-01f, 7.4288170e-01f, 7.4644910e-01f,
|
117 |
+
7.5000000e-01f, 7.5353417e-01f, 7.5705137e-01f, 7.6055137e-01f,
|
118 |
+
7.6403393e-01f, 7.6749881e-01f, 7.7094579e-01f, 7.7437464e-01f,
|
119 |
+
7.7778512e-01f, 7.8117700e-01f, 7.8455007e-01f, 7.8790410e-01f,
|
120 |
+
7.9123885e-01f, 7.9455411e-01f, 7.9784965e-01f, 8.0112526e-01f,
|
121 |
+
8.0438071e-01f, 8.0761580e-01f, 8.1083029e-01f, 8.1402397e-01f,
|
122 |
+
8.1719664e-01f, 8.2034808e-01f, 8.2347808e-01f, 8.2658642e-01f,
|
123 |
+
8.2967291e-01f, 8.3273733e-01f, 8.3577948e-01f, 8.3879915e-01f,
|
124 |
+
8.4179615e-01f, 8.4477027e-01f, 8.4772132e-01f, 8.5064909e-01f,
|
125 |
+
8.5355339e-01f, 8.5643403e-01f, 8.5929081e-01f, 8.6212354e-01f,
|
126 |
+
8.6493204e-01f, 8.6771611e-01f, 8.7047556e-01f, 8.7321022e-01f,
|
127 |
+
8.7591990e-01f, 8.7860442e-01f, 8.8126360e-01f, 8.8389726e-01f,
|
128 |
+
8.8650523e-01f, 8.8908732e-01f, 8.9164337e-01f, 8.9417321e-01f,
|
129 |
+
8.9667667e-01f, 8.9915358e-01f, 9.0160377e-01f, 9.0402708e-01f,
|
130 |
+
9.0642334e-01f, 9.0879241e-01f, 9.1113411e-01f, 9.1344829e-01f,
|
131 |
+
9.1573481e-01f, 9.1799349e-01f, 9.2022420e-01f, 9.2242678e-01f,
|
132 |
+
9.2460109e-01f, 9.2674698e-01f, 9.2886431e-01f, 9.3095293e-01f,
|
133 |
+
9.3301270e-01f, 9.3504350e-01f, 9.3704517e-01f, 9.3901759e-01f,
|
134 |
+
9.4096063e-01f, 9.4287416e-01f, 9.4475804e-01f, 9.4661215e-01f,
|
135 |
+
9.4843637e-01f, 9.5023058e-01f, 9.5199465e-01f, 9.5372846e-01f,
|
136 |
+
9.5543191e-01f, 9.5710488e-01f, 9.5874725e-01f, 9.6035891e-01f,
|
137 |
+
9.6193977e-01f, 9.6348970e-01f, 9.6500861e-01f, 9.6649640e-01f,
|
138 |
+
9.6795296e-01f, 9.6937821e-01f, 9.7077203e-01f, 9.7213435e-01f,
|
139 |
+
9.7346506e-01f, 9.7476409e-01f, 9.7603134e-01f, 9.7726673e-01f,
|
140 |
+
9.7847017e-01f, 9.7964159e-01f, 9.8078090e-01f, 9.8188803e-01f,
|
141 |
+
9.8296291e-01f, 9.8400547e-01f, 9.8501563e-01f, 9.8599332e-01f,
|
142 |
+
9.8693849e-01f, 9.8785107e-01f, 9.8873099e-01f, 9.8957820e-01f,
|
143 |
+
9.9039264e-01f, 9.9117426e-01f, 9.9192300e-01f, 9.9263882e-01f,
|
144 |
+
9.9332167e-01f, 9.9397149e-01f, 9.9458825e-01f, 9.9517191e-01f,
|
145 |
+
9.9572243e-01f, 9.9623977e-01f, 9.9672389e-01f, 9.9717476e-01f,
|
146 |
+
9.9759236e-01f, 9.9797666e-01f, 9.9832762e-01f, 9.9864523e-01f,
|
147 |
+
9.9892946e-01f, 9.9918030e-01f, 9.9939773e-01f, 9.9958173e-01f,
|
148 |
+
9.9973229e-01f, 9.9984941e-01f, 9.9993307e-01f, 9.9998327e-01f,
|
149 |
+
1.0000000e+00f, 9.9998327e-01f, 9.9993307e-01f, 9.9984941e-01f,
|
150 |
+
9.9973229e-01f, 9.9958173e-01f, 9.9939773e-01f, 9.9918030e-01f,
|
151 |
+
9.9892946e-01f, 9.9864523e-01f, 9.9832762e-01f, 9.9797666e-01f,
|
152 |
+
9.9759236e-01f, 9.9717476e-01f, 9.9672389e-01f, 9.9623977e-01f,
|
153 |
+
9.9572243e-01f, 9.9517191e-01f, 9.9458825e-01f, 9.9397149e-01f,
|
154 |
+
9.9332167e-01f, 9.9263882e-01f, 9.9192300e-01f, 9.9117426e-01f,
|
155 |
+
9.9039264e-01f, 9.8957820e-01f, 9.8873099e-01f, 9.8785107e-01f,
|
156 |
+
9.8693849e-01f, 9.8599332e-01f, 9.8501563e-01f, 9.8400547e-01f,
|
157 |
+
9.8296291e-01f, 9.8188803e-01f, 9.8078090e-01f, 9.7964159e-01f,
|
158 |
+
9.7847017e-01f, 9.7726673e-01f, 9.7603134e-01f, 9.7476409e-01f,
|
159 |
+
9.7346506e-01f, 9.7213435e-01f, 9.7077203e-01f, 9.6937821e-01f,
|
160 |
+
9.6795296e-01f, 9.6649640e-01f, 9.6500861e-01f, 9.6348970e-01f,
|
161 |
+
9.6193977e-01f, 9.6035891e-01f, 9.5874725e-01f, 9.5710488e-01f,
|
162 |
+
9.5543191e-01f, 9.5372846e-01f, 9.5199465e-01f, 9.5023058e-01f,
|
163 |
+
9.4843637e-01f, 9.4661215e-01f, 9.4475804e-01f, 9.4287416e-01f,
|
164 |
+
9.4096063e-01f, 9.3901759e-01f, 9.3704517e-01f, 9.3504350e-01f,
|
165 |
+
9.3301270e-01f, 9.3095293e-01f, 9.2886431e-01f, 9.2674698e-01f,
|
166 |
+
9.2460109e-01f, 9.2242678e-01f, 9.2022420e-01f, 9.1799349e-01f,
|
167 |
+
9.1573481e-01f, 9.1344829e-01f, 9.1113411e-01f, 9.0879241e-01f,
|
168 |
+
9.0642334e-01f, 9.0402708e-01f, 9.0160377e-01f, 8.9915358e-01f,
|
169 |
+
8.9667667e-01f, 8.9417321e-01f, 8.9164337e-01f, 8.8908732e-01f,
|
170 |
+
8.8650523e-01f, 8.8389726e-01f, 8.8126360e-01f, 8.7860442e-01f,
|
171 |
+
8.7591990e-01f, 8.7321022e-01f, 8.7047556e-01f, 8.6771611e-01f,
|
172 |
+
8.6493204e-01f, 8.6212354e-01f, 8.5929081e-01f, 8.5643403e-01f,
|
173 |
+
8.5355339e-01f, 8.5064909e-01f, 8.4772132e-01f, 8.4477027e-01f,
|
174 |
+
8.4179615e-01f, 8.3879915e-01f, 8.3577948e-01f, 8.3273733e-01f,
|
175 |
+
8.2967291e-01f, 8.2658642e-01f, 8.2347808e-01f, 8.2034808e-01f,
|
176 |
+
8.1719664e-01f, 8.1402397e-01f, 8.1083029e-01f, 8.0761580e-01f,
|
177 |
+
8.0438071e-01f, 8.0112526e-01f, 7.9784965e-01f, 7.9455411e-01f,
|
178 |
+
7.9123885e-01f, 7.8790410e-01f, 7.8455007e-01f, 7.8117700e-01f,
|
179 |
+
7.7778512e-01f, 7.7437464e-01f, 7.7094579e-01f, 7.6749881e-01f,
|
180 |
+
7.6403393e-01f, 7.6055137e-01f, 7.5705137e-01f, 7.5353417e-01f,
|
181 |
+
7.5000000e-01f, 7.4644910e-01f, 7.4288170e-01f, 7.3929804e-01f,
|
182 |
+
7.3569837e-01f, 7.3208292e-01f, 7.2845194e-01f, 7.2480566e-01f,
|
183 |
+
7.2114435e-01f, 7.1746822e-01f, 7.1377755e-01f, 7.1007256e-01f,
|
184 |
+
7.0635351e-01f, 7.0262066e-01f, 6.9887424e-01f, 6.9511451e-01f,
|
185 |
+
6.9134172e-01f, 6.8755612e-01f, 6.8375797e-01f, 6.7994752e-01f,
|
186 |
+
6.7612502e-01f, 6.7229074e-01f, 6.6844493e-01f, 6.6458784e-01f,
|
187 |
+
6.6071973e-01f, 6.5684087e-01f, 6.5295151e-01f, 6.4905191e-01f,
|
188 |
+
6.4514234e-01f, 6.4122305e-01f, 6.3729431e-01f, 6.3335638e-01f,
|
189 |
+
6.2940952e-01f, 6.2545400e-01f, 6.2149009e-01f, 6.1751804e-01f,
|
190 |
+
6.1353813e-01f, 6.0955062e-01f, 6.0555578e-01f, 6.0155387e-01f,
|
191 |
+
5.9754516e-01f, 5.9352993e-01f, 5.8950843e-01f, 5.8548094e-01f,
|
192 |
+
5.8144774e-01f, 5.7740908e-01f, 5.7336524e-01f, 5.6931649e-01f,
|
193 |
+
5.6526310e-01f, 5.6120534e-01f, 5.5714348e-01f, 5.5307780e-01f,
|
194 |
+
5.4900857e-01f, 5.4493606e-01f, 5.4086054e-01f, 5.3678228e-01f,
|
195 |
+
5.3270156e-01f, 5.2861866e-01f, 5.2453384e-01f, 5.2044737e-01f,
|
196 |
+
5.1635954e-01f, 5.1227061e-01f, 5.0818087e-01f, 5.0409057e-01f,
|
197 |
+
5.0000000e-01f, 4.9590943e-01f, 4.9181913e-01f, 4.8772939e-01f,
|
198 |
+
4.8364046e-01f, 4.7955263e-01f, 4.7546616e-01f, 4.7138134e-01f,
|
199 |
+
4.6729844e-01f, 4.6321772e-01f, 4.5913946e-01f, 4.5506394e-01f,
|
200 |
+
4.5099143e-01f, 4.4692220e-01f, 4.4285652e-01f, 4.3879466e-01f,
|
201 |
+
4.3473690e-01f, 4.3068351e-01f, 4.2663476e-01f, 4.2259092e-01f,
|
202 |
+
4.1855226e-01f, 4.1451906e-01f, 4.1049157e-01f, 4.0647007e-01f,
|
203 |
+
4.0245484e-01f, 3.9844613e-01f, 3.9444422e-01f, 3.9044938e-01f,
|
204 |
+
3.8646187e-01f, 3.8248196e-01f, 3.7850991e-01f, 3.7454600e-01f,
|
205 |
+
3.7059048e-01f, 3.6664362e-01f, 3.6270569e-01f, 3.5877695e-01f,
|
206 |
+
3.5485766e-01f, 3.5094809e-01f, 3.4704849e-01f, 3.4315913e-01f,
|
207 |
+
3.3928027e-01f, 3.3541216e-01f, 3.3155507e-01f, 3.2770926e-01f,
|
208 |
+
3.2387498e-01f, 3.2005248e-01f, 3.1624203e-01f, 3.1244388e-01f,
|
209 |
+
3.0865828e-01f, 3.0488549e-01f, 3.0112576e-01f, 2.9737934e-01f,
|
210 |
+
2.9364649e-01f, 2.8992744e-01f, 2.8622245e-01f, 2.8253178e-01f,
|
211 |
+
2.7885565e-01f, 2.7519434e-01f, 2.7154806e-01f, 2.6791708e-01f,
|
212 |
+
2.6430163e-01f, 2.6070196e-01f, 2.5711830e-01f, 2.5355090e-01f,
|
213 |
+
2.5000000e-01f, 2.4646583e-01f, 2.4294863e-01f, 2.3944863e-01f,
|
214 |
+
2.3596607e-01f, 2.3250119e-01f, 2.2905421e-01f, 2.2562536e-01f,
|
215 |
+
2.2221488e-01f, 2.1882300e-01f, 2.1544993e-01f, 2.1209590e-01f,
|
216 |
+
2.0876115e-01f, 2.0544589e-01f, 2.0215035e-01f, 1.9887474e-01f,
|
217 |
+
1.9561929e-01f, 1.9238420e-01f, 1.8916971e-01f, 1.8597603e-01f,
|
218 |
+
1.8280336e-01f, 1.7965192e-01f, 1.7652192e-01f, 1.7341358e-01f,
|
219 |
+
1.7032709e-01f, 1.6726267e-01f, 1.6422052e-01f, 1.6120085e-01f,
|
220 |
+
1.5820385e-01f, 1.5522973e-01f, 1.5227868e-01f, 1.4935091e-01f,
|
221 |
+
1.4644661e-01f, 1.4356597e-01f, 1.4070919e-01f, 1.3787646e-01f,
|
222 |
+
1.3506796e-01f, 1.3228389e-01f, 1.2952444e-01f, 1.2678978e-01f,
|
223 |
+
1.2408010e-01f, 1.2139558e-01f, 1.1873640e-01f, 1.1610274e-01f,
|
224 |
+
1.1349477e-01f, 1.1091268e-01f, 1.0835663e-01f, 1.0582679e-01f,
|
225 |
+
1.0332333e-01f, 1.0084642e-01f, 9.8396234e-02f, 9.5972925e-02f,
|
226 |
+
9.3576658e-02f, 9.1207593e-02f, 8.8865891e-02f, 8.6551706e-02f,
|
227 |
+
8.4265194e-02f, 8.2006508e-02f, 7.9775799e-02f, 7.7573217e-02f,
|
228 |
+
7.5398909e-02f, 7.3253021e-02f, 7.1135695e-02f, 6.9047074e-02f,
|
229 |
+
6.6987298e-02f, 6.4956504e-02f, 6.2954829e-02f, 6.0982406e-02f,
|
230 |
+
5.9039368e-02f, 5.7125844e-02f, 5.5241962e-02f, 5.3387849e-02f,
|
231 |
+
5.1563629e-02f, 4.9769424e-02f, 4.8005353e-02f, 4.6271536e-02f,
|
232 |
+
4.4568088e-02f, 4.2895122e-02f, 4.1252752e-02f, 3.9641086e-02f,
|
233 |
+
3.8060234e-02f, 3.6510300e-02f, 3.4991388e-02f, 3.3503601e-02f,
|
234 |
+
3.2047037e-02f, 3.0621794e-02f, 2.9227967e-02f, 2.7865651e-02f,
|
235 |
+
2.6534935e-02f, 2.5235910e-02f, 2.3968661e-02f, 2.2733274e-02f,
|
236 |
+
2.1529832e-02f, 2.0358415e-02f, 1.9219101e-02f, 1.8111967e-02f,
|
237 |
+
1.7037087e-02f, 1.5994532e-02f, 1.4984373e-02f, 1.4006678e-02f,
|
238 |
+
1.3061510e-02f, 1.2148935e-02f, 1.1269013e-02f, 1.0421802e-02f,
|
239 |
+
9.6073598e-03f, 8.8257407e-03f, 8.0769970e-03f, 7.3611788e-03f,
|
240 |
+
6.6783340e-03f, 6.0285082e-03f, 5.4117450e-03f, 4.8280857e-03f,
|
241 |
+
4.2775693e-03f, 3.7602327e-03f, 3.2761105e-03f, 2.8252351e-03f,
|
242 |
+
2.4076367e-03f, 2.0233432e-03f, 1.6723803e-03f, 1.3547717e-03f,
|
243 |
+
1.0705384e-03f, 8.1969953e-04f, 6.0227190e-04f, 4.1827004e-04f,
|
244 |
+
2.6770626e-04f, 1.5059065e-04f, 6.6931045e-05f, 1.6733041e-05f};
|
245 |
+
|
246 |
+
#endif
|
src/fftw.c
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/fftw.h
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#ifndef __FFTW_H__
|
8 |
+
#define __FFTW_H__
|
9 |
+
|
10 |
+
#include <stdio.h>
|
11 |
+
|
12 |
+
#ifdef __cplusplus
|
13 |
+
extern "C" {
|
14 |
+
#endif /* __cplusplus */
|
15 |
+
// Spectrum Storage Format definition:
|
16 |
+
// format1: [Real-0, Real-Nyq, Real-1, Imag-1, Real-2, Imag-2, ...]
|
17 |
+
// format2: [Real-0, Real-1, (-1)*Imag-1, Real-2, (-1)*Imag-2, ..., Real-Nyq]
|
18 |
+
|
19 |
+
// the following functions assume input and output spectrum to be stored in
|
20 |
+
// format2
|
21 |
+
void AUP_FFTW_r2c_256(float* in, float* out);
|
22 |
+
void AUP_FFTW_c2r_256(float* in, float* out);
|
23 |
+
|
24 |
+
void AUP_FFTW_c2r_512(float* in, float* out);
|
25 |
+
void AUP_FFTW_r2c_512(float* in, float* out);
|
26 |
+
|
27 |
+
void AUP_FFTW_r2c_1024(float* in, float* out);
|
28 |
+
void AUP_FFTW_c2r_1024(float* in, float* out);
|
29 |
+
|
30 |
+
void AUP_FFTW_r2c_2048(float* in, float* out);
|
31 |
+
void AUP_FFTW_c2r_2048(float* in, float* out);
|
32 |
+
|
33 |
+
void AUP_FFTW_r2c_4096(float* in, float* out);
|
34 |
+
void AUP_FFTW_c2r_4096(float* in, float* out);
|
35 |
+
|
36 |
+
// if direction == 0: format1->format2
|
37 |
+
// if direction == 1: format2->format1
|
38 |
+
void AUP_FFTW_InplaceTransf(int direction, int fftSz, float* inplaceTranfBuf);
|
39 |
+
|
40 |
+
void AUP_FFTW_RescaleFFTOut(int fftSz, float* inplaceBuf);
|
41 |
+
void AUP_FFTW_RescaleIFFTOut(int fftSz, float* inplaceBuf);
|
42 |
+
|
43 |
+
#ifdef __cplusplus
|
44 |
+
}
|
45 |
+
#endif /* __cplusplus */
|
46 |
+
|
47 |
+
#endif // __FFTW_H__
|
src/fscvrt.cc
ADDED
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#include <math.h>
|
8 |
+
#include <stdlib.h>
|
9 |
+
#include <string.h>
|
10 |
+
#include <stdio.h>
|
11 |
+
|
12 |
+
#include "fscvrt.h"
|
13 |
+
#include "fscvrt_st.h"
|
14 |
+
#include "biquad.h"
|
15 |
+
|
16 |
+
// ==========================================================================================
|
17 |
+
// internal tools
|
18 |
+
// ==========================================================================================
|
19 |
+
|
20 |
+
static int AUP_Fscvrt_FilterSet(int resampleRate, int* nsect,
|
21 |
+
const float* B[_FSCVRT_MAXNSEC],
|
22 |
+
const float* A[_FSCVRT_MAXNSEC],
|
23 |
+
const float** G) {
|
24 |
+
int idx;
|
25 |
+
if (resampleRate == 2) {
|
26 |
+
*nsect = _FSCVRT_1over2_LOWPASS_NSEC;
|
27 |
+
for (idx = 0; idx < (*nsect); idx++) {
|
28 |
+
B[idx] = &(_FSCVRT_1over2_LOWPASS_B[idx][0]);
|
29 |
+
A[idx] = &(_FSCVRT_1over2_LOWPASS_A[idx][0]);
|
30 |
+
}
|
31 |
+
*G = _FSCVRT_1over2_LOWPASS_G;
|
32 |
+
} else if (resampleRate == 3) {
|
33 |
+
*nsect = _FSCVRT_1over3_LOWPASS_NSEC;
|
34 |
+
for (idx = 0; idx < (*nsect); idx++) {
|
35 |
+
B[idx] = &(_FSCVRT_1over3_LOWPASS_B[idx][0]);
|
36 |
+
A[idx] = &(_FSCVRT_1over3_LOWPASS_A[idx][0]);
|
37 |
+
}
|
38 |
+
*G = _FSCVRT_1over3_LOWPASS_G;
|
39 |
+
} else if (resampleRate == 4) {
|
40 |
+
*nsect = _FSCVRT_1over4_LOWPASS_NSEC;
|
41 |
+
for (idx = 0; idx < (*nsect); idx++) {
|
42 |
+
B[idx] = &(_FSCVRT_1over4_LOWPASS_B[idx][0]);
|
43 |
+
A[idx] = &(_FSCVRT_1over4_LOWPASS_A[idx][0]);
|
44 |
+
}
|
45 |
+
*G = _FSCVRT_1over4_LOWPASS_G;
|
46 |
+
} else if (resampleRate == 6) {
|
47 |
+
*nsect = _FSCVRT_1over6_LOWPASS_NSEC;
|
48 |
+
for (idx = 0; idx < (*nsect); idx++) {
|
49 |
+
B[idx] = &(_FSCVRT_1over6_LOWPASS_B[idx][0]);
|
50 |
+
A[idx] = &(_FSCVRT_1over6_LOWPASS_A[idx][0]);
|
51 |
+
}
|
52 |
+
*G = _FSCVRT_1over6_LOWPASS_G;
|
53 |
+
} else { // unknown resample rate
|
54 |
+
return -1;
|
55 |
+
}
|
56 |
+
|
57 |
+
return 0;
|
58 |
+
}
|
59 |
+
|
60 |
+
static int AUP_Fscvrt_dynamMemPrepare(FscvrtSt* stHdl, void* memPtrExt,
|
61 |
+
size_t memSize) {
|
62 |
+
char* memPtr = NULL;
|
63 |
+
int biquadInBufMemSize = 0;
|
64 |
+
int biquadOutBufMemSize = 0;
|
65 |
+
int totalMemSize = 0;
|
66 |
+
|
67 |
+
if (stHdl == NULL) {
|
68 |
+
return -1;
|
69 |
+
}
|
70 |
+
|
71 |
+
biquadInBufMemSize = _FSCVRT_ALIGN8(sizeof(float) * stHdl->biquadInBufLen);
|
72 |
+
totalMemSize += biquadInBufMemSize;
|
73 |
+
|
74 |
+
biquadOutBufMemSize = _FSCVRT_ALIGN8(sizeof(float) * stHdl->biquadOutBufLen);
|
75 |
+
totalMemSize += biquadOutBufMemSize;
|
76 |
+
|
77 |
+
totalMemSize = _FSCVRT_MAX(totalMemSize, 80);
|
78 |
+
|
79 |
+
// if no external memory provided, we are only profiling the memory
|
80 |
+
// requirement
|
81 |
+
if (memPtrExt == NULL) {
|
82 |
+
return (totalMemSize);
|
83 |
+
}
|
84 |
+
|
85 |
+
// if required memory is more than provided, error
|
86 |
+
if ((size_t)totalMemSize > memSize) {
|
87 |
+
return -1;
|
88 |
+
}
|
89 |
+
memPtr = (char*)memPtrExt;
|
90 |
+
|
91 |
+
stHdl->biquadInBuf = NULL;
|
92 |
+
if (biquadInBufMemSize != 0) {
|
93 |
+
stHdl->biquadInBuf = (float*)memPtr;
|
94 |
+
memPtr += biquadInBufMemSize;
|
95 |
+
}
|
96 |
+
|
97 |
+
stHdl->biquadOutBuf = NULL;
|
98 |
+
if (biquadOutBufMemSize != 0) {
|
99 |
+
stHdl->biquadOutBuf = (float*)memPtr;
|
100 |
+
memPtr += biquadOutBufMemSize;
|
101 |
+
}
|
102 |
+
|
103 |
+
if (((int)(memPtr - (char*)memPtrExt)) > totalMemSize) {
|
104 |
+
return -1;
|
105 |
+
}
|
106 |
+
|
107 |
+
return (totalMemSize);
|
108 |
+
}
|
109 |
+
|
110 |
+
static int AUP_Fscvrt_checkStatCfg(FscvrtStaticCfg* pCfg) {
|
111 |
+
if (pCfg == NULL) {
|
112 |
+
return -1;
|
113 |
+
}
|
114 |
+
|
115 |
+
if (pCfg->inputFs != 16000 && pCfg->inputFs != 24000 &&
|
116 |
+
pCfg->inputFs != 32000 && pCfg->inputFs != 48000) {
|
117 |
+
return -1;
|
118 |
+
}
|
119 |
+
|
120 |
+
if (pCfg->outputFs != 16000 && pCfg->outputFs != 24000 &&
|
121 |
+
pCfg->outputFs != 32000 && pCfg->outputFs != 48000) {
|
122 |
+
return -1;
|
123 |
+
}
|
124 |
+
|
125 |
+
if (pCfg->stepSz > AUP_FSCVRT_MAX_INPUT_LEN || pCfg->stepSz < 1) {
|
126 |
+
return -1;
|
127 |
+
}
|
128 |
+
|
129 |
+
if (pCfg->inputType != 0) {
|
130 |
+
pCfg->inputType = 1;
|
131 |
+
}
|
132 |
+
|
133 |
+
if (pCfg->outputType != 0) {
|
134 |
+
pCfg->outputType = 1;
|
135 |
+
}
|
136 |
+
|
137 |
+
return 0;
|
138 |
+
}
|
139 |
+
|
140 |
+
static int AUP_Fscvrt_publishStaticCfg(FscvrtSt* stHdl) {
|
141 |
+
int tmpRatio;
|
142 |
+
int ret;
|
143 |
+
int maxResmplRate = 0;
|
144 |
+
|
145 |
+
stHdl->mode = 0;
|
146 |
+
stHdl->upSmplRate = 1;
|
147 |
+
stHdl->downSmplRate = 1;
|
148 |
+
if (stHdl->stCfg.inputFs != stHdl->stCfg.outputFs) {
|
149 |
+
if (stHdl->stCfg.outputFs > stHdl->stCfg.inputFs) {
|
150 |
+
tmpRatio = (stHdl->stCfg.outputFs / stHdl->stCfg.inputFs);
|
151 |
+
if (stHdl->stCfg.outputFs == tmpRatio * stHdl->stCfg.inputFs) {
|
152 |
+
stHdl->mode = 1;
|
153 |
+
stHdl->upSmplRate = tmpRatio;
|
154 |
+
stHdl->downSmplRate = 1;
|
155 |
+
} else {
|
156 |
+
stHdl->mode = 3;
|
157 |
+
stHdl->upSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.inputFs;
|
158 |
+
stHdl->downSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.outputFs;
|
159 |
+
}
|
160 |
+
} else { // stHdl->stCfg.outputFs < stHdl->stCfg.inputFs
|
161 |
+
tmpRatio = (stHdl->stCfg.inputFs / stHdl->stCfg.outputFs);
|
162 |
+
if (stHdl->stCfg.inputFs == tmpRatio * stHdl->stCfg.outputFs) {
|
163 |
+
stHdl->mode = 2;
|
164 |
+
stHdl->upSmplRate = 1;
|
165 |
+
stHdl->downSmplRate = tmpRatio;
|
166 |
+
} else {
|
167 |
+
stHdl->mode = 3;
|
168 |
+
stHdl->upSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.inputFs;
|
169 |
+
stHdl->downSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.outputFs;
|
170 |
+
}
|
171 |
+
}
|
172 |
+
}
|
173 |
+
|
174 |
+
if (stHdl->mode == 0) {
|
175 |
+
stHdl->biquadInBufLen = 0;
|
176 |
+
stHdl->biquadOutBufLen = 0;
|
177 |
+
} else {
|
178 |
+
stHdl->biquadInBufLen = stHdl->stCfg.stepSz * stHdl->upSmplRate;
|
179 |
+
stHdl->biquadOutBufLen = 2 * (stHdl->stCfg.stepSz * stHdl->upSmplRate);
|
180 |
+
}
|
181 |
+
|
182 |
+
maxResmplRate = _FSCVRT_MAX(stHdl->upSmplRate, stHdl->downSmplRate);
|
183 |
+
|
184 |
+
stHdl->nSec = 0;
|
185 |
+
memset(stHdl->biquadB, 0, sizeof(stHdl->biquadB));
|
186 |
+
memset(stHdl->biquadA, 0, sizeof(stHdl->biquadA));
|
187 |
+
stHdl->biquadG = NULL; // gain for each section
|
188 |
+
|
189 |
+
if (stHdl->mode != 0) {
|
190 |
+
ret = AUP_Fscvrt_FilterSet(maxResmplRate, &(stHdl->nSec), stHdl->biquadB,
|
191 |
+
stHdl->biquadA, &(stHdl->biquadG));
|
192 |
+
if (ret < 0) {
|
193 |
+
return -1;
|
194 |
+
}
|
195 |
+
}
|
196 |
+
|
197 |
+
return 0;
|
198 |
+
}
|
199 |
+
|
200 |
+
static int AUP_Fscvrt_resetVariables(FscvrtSt* stHdl) {
|
201 |
+
stHdl->biquadInBufCnt = 0;
|
202 |
+
stHdl->biquadOutBufCnt = 0;
|
203 |
+
|
204 |
+
if (stHdl->dynamMemPtr != NULL && stHdl->dynamMemSize > 0) {
|
205 |
+
memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
|
206 |
+
}
|
207 |
+
return 0;
|
208 |
+
}
|
209 |
+
|
210 |
+
// ==========================================================================================
|
211 |
+
// public APIs
|
212 |
+
// ==========================================================================================
|
213 |
+
|
214 |
+
int AUP_Fscvrt_create(void** stPtr) {
|
215 |
+
FscvrtSt* tmpPtr;
|
216 |
+
|
217 |
+
if (stPtr == NULL) {
|
218 |
+
return -1;
|
219 |
+
}
|
220 |
+
*stPtr = (void*)malloc(sizeof(FscvrtSt));
|
221 |
+
if (*stPtr == NULL) {
|
222 |
+
return -1;
|
223 |
+
}
|
224 |
+
memset(*stPtr, 0, sizeof(FscvrtSt));
|
225 |
+
|
226 |
+
tmpPtr = (FscvrtSt*)(*stPtr);
|
227 |
+
|
228 |
+
tmpPtr->dynamMemPtr = NULL;
|
229 |
+
tmpPtr->dynamMemSize = 0;
|
230 |
+
|
231 |
+
tmpPtr->stCfg.inputFs = 24000;
|
232 |
+
tmpPtr->stCfg.outputFs = 32000;
|
233 |
+
tmpPtr->stCfg.stepSz = 240; // 10ms processing step
|
234 |
+
tmpPtr->stCfg.inputType = 0; // short in
|
235 |
+
tmpPtr->stCfg.outputType = 0; // short out
|
236 |
+
|
237 |
+
if (AUP_Biquad_create(&(tmpPtr->biquadSt)) < 0) {
|
238 |
+
return -1;
|
239 |
+
}
|
240 |
+
|
241 |
+
return 0;
|
242 |
+
}
|
243 |
+
|
244 |
+
int AUP_Fscvrt_destroy(void** stPtr) {
|
245 |
+
FscvrtSt* stHdl;
|
246 |
+
|
247 |
+
if (stPtr == NULL) {
|
248 |
+
return 0;
|
249 |
+
}
|
250 |
+
|
251 |
+
stHdl = (FscvrtSt*)(*stPtr);
|
252 |
+
if (stHdl == NULL) {
|
253 |
+
return 0;
|
254 |
+
}
|
255 |
+
|
256 |
+
AUP_Biquad_destroy(&(stHdl->biquadSt));
|
257 |
+
if (stHdl->dynamMemPtr != NULL) {
|
258 |
+
free(stHdl->dynamMemPtr);
|
259 |
+
}
|
260 |
+
stHdl->dynamMemPtr = NULL;
|
261 |
+
|
262 |
+
free(stHdl);
|
263 |
+
(*stPtr) = NULL;
|
264 |
+
|
265 |
+
return 0;
|
266 |
+
}
|
267 |
+
|
268 |
+
int AUP_Fscvrt_memAllocate(void* stPtr, const FscvrtStaticCfg* pCfg) {
|
269 |
+
FscvrtSt* stHdl = NULL;
|
270 |
+
FscvrtStaticCfg tmpStatCfg = {0};
|
271 |
+
Biquad_StaticCfg bqStatCfg;
|
272 |
+
int idx, ret;
|
273 |
+
int totalMemSize = 0;
|
274 |
+
|
275 |
+
if (stPtr == NULL || pCfg == NULL) {
|
276 |
+
return -1;
|
277 |
+
}
|
278 |
+
stHdl = (FscvrtSt*)(stPtr);
|
279 |
+
|
280 |
+
memcpy(&tmpStatCfg, pCfg, sizeof(FscvrtStaticCfg));
|
281 |
+
if (AUP_Fscvrt_checkStatCfg(&tmpStatCfg) < 0) {
|
282 |
+
return -1;
|
283 |
+
}
|
284 |
+
memcpy(&(stHdl->stCfg), &tmpStatCfg, sizeof(FscvrtStaticCfg));
|
285 |
+
|
286 |
+
if (AUP_Fscvrt_publishStaticCfg(stHdl) < 0) {
|
287 |
+
return -1;
|
288 |
+
}
|
289 |
+
|
290 |
+
// check memory requirement
|
291 |
+
totalMemSize = AUP_Fscvrt_dynamMemPrepare(stHdl, NULL, 0);
|
292 |
+
if (totalMemSize < 0) {
|
293 |
+
return -1;
|
294 |
+
}
|
295 |
+
|
296 |
+
// allocate dynamic memory
|
297 |
+
if ((size_t)totalMemSize > stHdl->dynamMemSize) {
|
298 |
+
if (stHdl->dynamMemPtr != NULL) {
|
299 |
+
free(stHdl->dynamMemPtr);
|
300 |
+
stHdl->dynamMemSize = 0;
|
301 |
+
}
|
302 |
+
stHdl->dynamMemPtr = (void*)malloc(totalMemSize);
|
303 |
+
if (stHdl->dynamMemPtr == NULL) {
|
304 |
+
return -1;
|
305 |
+
}
|
306 |
+
stHdl->dynamMemSize = totalMemSize;
|
307 |
+
}
|
308 |
+
memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
|
309 |
+
|
310 |
+
// setup the pointers/variable
|
311 |
+
if (AUP_Fscvrt_dynamMemPrepare(stHdl, stHdl->dynamMemPtr,
|
312 |
+
stHdl->dynamMemSize) < 0) {
|
313 |
+
return -1;
|
314 |
+
}
|
315 |
+
|
316 |
+
// memAllocation for upSmplBiquadSt and downSmplBiquadSt
|
317 |
+
if (stHdl->nSec != 0) {
|
318 |
+
if (stHdl->nSec > AGORA_UAP_BIQUAD_MAX_SECTION) {
|
319 |
+
return -1;
|
320 |
+
}
|
321 |
+
memset(&bqStatCfg, 0, sizeof(Biquad_StaticCfg));
|
322 |
+
bqStatCfg.maxNSample = (size_t)(stHdl->biquadInBufLen);
|
323 |
+
bqStatCfg.nsect = stHdl->nSec;
|
324 |
+
for (idx = 0; idx < stHdl->nSec; idx++) {
|
325 |
+
bqStatCfg.B[idx] = stHdl->biquadB[idx];
|
326 |
+
bqStatCfg.A[idx] = stHdl->biquadA[idx];
|
327 |
+
}
|
328 |
+
bqStatCfg.G = stHdl->biquadG;
|
329 |
+
|
330 |
+
ret = AUP_Biquad_memAllocate(stHdl->biquadSt, &bqStatCfg);
|
331 |
+
if (ret < 0) {
|
332 |
+
return -1;
|
333 |
+
}
|
334 |
+
}
|
335 |
+
|
336 |
+
return 0;
|
337 |
+
}
|
338 |
+
|
339 |
+
int AUP_Fscvrt_init(void* stPtr) {
|
340 |
+
FscvrtSt* stHdl;
|
341 |
+
|
342 |
+
if (stPtr == NULL) {
|
343 |
+
return -1;
|
344 |
+
}
|
345 |
+
stHdl = (FscvrtSt*)(stPtr);
|
346 |
+
|
347 |
+
// clear/reset run-time variables
|
348 |
+
if (AUP_Fscvrt_resetVariables(stHdl) < 0) {
|
349 |
+
return -1;
|
350 |
+
}
|
351 |
+
|
352 |
+
// init submodules ...
|
353 |
+
if (stHdl->biquadSt != NULL && stHdl->nSec != 0) {
|
354 |
+
if (AUP_Biquad_init(stHdl->biquadSt) < 0) {
|
355 |
+
return -1;
|
356 |
+
}
|
357 |
+
}
|
358 |
+
|
359 |
+
return 0;
|
360 |
+
}
|
361 |
+
|
362 |
+
int AUP_Fscvrt_getStaticCfg(const void* stPtr, FscvrtStaticCfg* pCfg) {
|
363 |
+
const FscvrtSt* stHdl;
|
364 |
+
|
365 |
+
if (stPtr == NULL || pCfg == NULL) {
|
366 |
+
return -1;
|
367 |
+
}
|
368 |
+
stHdl = (const FscvrtSt*)(stPtr);
|
369 |
+
|
370 |
+
memcpy(pCfg, &(stHdl->stCfg), sizeof(FscvrtStaticCfg));
|
371 |
+
|
372 |
+
return 0;
|
373 |
+
}
|
374 |
+
|
375 |
+
int AUP_Fscvrt_getInfor(const void* stPtr, FscvrtGetData* buff) {
|
376 |
+
const FscvrtSt* stHdl;
|
377 |
+
int delayBiquad = 0;
|
378 |
+
int tmp;
|
379 |
+
|
380 |
+
if (stPtr == NULL || buff == NULL) {
|
381 |
+
return -1;
|
382 |
+
}
|
383 |
+
stHdl = (const FscvrtSt*)(stPtr);
|
384 |
+
|
385 |
+
if (stHdl->nSec != 0) {
|
386 |
+
if (AUP_Biquad_getAlgDelay(stHdl->biquadSt, &delayBiquad) < 0) {
|
387 |
+
return -1;
|
388 |
+
}
|
389 |
+
}
|
390 |
+
|
391 |
+
if (stHdl->mode == 0) {
|
392 |
+
buff->delayInInputFs = 0;
|
393 |
+
} else if (stHdl->mode == 1) {
|
394 |
+
buff->delayInInputFs =
|
395 |
+
(int)roundf(delayBiquad / (float)(stHdl->upSmplRate));
|
396 |
+
} else if (stHdl->mode == 2) { // direct downsampling
|
397 |
+
buff->delayInInputFs = delayBiquad;
|
398 |
+
} else { // stHdl->mode == 3
|
399 |
+
buff->delayInInputFs =
|
400 |
+
(int)roundf(delayBiquad / (float)(stHdl->upSmplRate));
|
401 |
+
}
|
402 |
+
tmp = stHdl->stCfg.stepSz * stHdl->upSmplRate / stHdl->downSmplRate;
|
403 |
+
if (tmp * stHdl->downSmplRate == stHdl->stCfg.stepSz * stHdl->upSmplRate) {
|
404 |
+
buff->maxOutputStepSz = tmp;
|
405 |
+
} else {
|
406 |
+
buff->maxOutputStepSz = tmp + 1;
|
407 |
+
}
|
408 |
+
|
409 |
+
return 0;
|
410 |
+
}
|
411 |
+
|
412 |
+
int AUP_Fscvrt_proc(void* stPtr, const FscvrtInData* pIn, FscvrtOutData* pOut) {
|
413 |
+
FscvrtSt* stHdl = NULL;
|
414 |
+
const FscvrtStaticCfg* pCfg;
|
415 |
+
Biquad_InputData bqdInData;
|
416 |
+
Biquad_OutputData bqdOutData;
|
417 |
+
const short* shortSrcPtr = NULL;
|
418 |
+
const float* floatSrcPtr = NULL;
|
419 |
+
short* shortTgtPtr = NULL;
|
420 |
+
float* floatTgtPtr = NULL;
|
421 |
+
int idx, tgtIdx;
|
422 |
+
int nOutSamples = 0, samplesTaken = 0, samplesLeft = 0;
|
423 |
+
int jumpRate;
|
424 |
+
|
425 |
+
if (stPtr == NULL || pIn == NULL || pOut == NULL || pIn->inDataSeq == NULL ||
|
426 |
+
pOut->outDataSeq == NULL) { // pCtrl == NULL
|
427 |
+
return -1;
|
428 |
+
}
|
429 |
+
|
430 |
+
stHdl = (FscvrtSt*)(stPtr);
|
431 |
+
pCfg = (const FscvrtStaticCfg*)&(stHdl->stCfg);
|
432 |
+
shortSrcPtr = (const short*)(pIn->inDataSeq);
|
433 |
+
floatSrcPtr = (const float*)(pIn->inDataSeq);
|
434 |
+
// ==============================================================================
|
435 |
+
// mode-0: bypass
|
436 |
+
if (stHdl->mode == 0) { // direct bypass
|
437 |
+
if (pIn->outDataSeqLen < pCfg->stepSz) {
|
438 |
+
return -1;
|
439 |
+
}
|
440 |
+
pOut->nOutData = pCfg->stepSz;
|
441 |
+
pOut->outDataType = pCfg->outputType;
|
442 |
+
if (pIn->inDataSeq == pOut->outDataSeq) {
|
443 |
+
if (pCfg->outputType == pCfg->inputType)
|
444 |
+
return 0; // we don't need to do anything
|
445 |
+
return -1;
|
446 |
+
// if input buffer and the output buffer are the same, but required
|
447 |
+
// different data type: error, we currently do not support such usecase
|
448 |
+
}
|
449 |
+
|
450 |
+
if (pCfg->inputType == 0 && pCfg->outputType == 0) {
|
451 |
+
memcpy(pOut->outDataSeq, pIn->inDataSeq, sizeof(short) * pCfg->stepSz);
|
452 |
+
} else if (pCfg->inputType == 1 && pCfg->outputType == 1) {
|
453 |
+
memcpy(pOut->outDataSeq, pIn->inDataSeq, sizeof(float) * pCfg->stepSz);
|
454 |
+
} else if (pCfg->inputType == 0 && pCfg->outputType == 1) {
|
455 |
+
for (idx = 0; idx < pCfg->stepSz; idx++) {
|
456 |
+
((float*)pOut->outDataSeq)[idx] = ((short*)pIn->inDataSeq)[idx];
|
457 |
+
}
|
458 |
+
} else { // if (pCfg->inputType == 1 && pCfg->outputType == 0)
|
459 |
+
for (idx = 0; idx < pCfg->stepSz; idx++) {
|
460 |
+
((short*)pOut->outDataSeq)[idx] =
|
461 |
+
(short)_FSCVRT_FLOAT2SHORT(((float*)pIn->inDataSeq)[idx]);
|
462 |
+
}
|
463 |
+
}
|
464 |
+
|
465 |
+
return 0;
|
466 |
+
}
|
467 |
+
|
468 |
+
// prepare input buffer for Biquad .....
|
469 |
+
memset(stHdl->biquadInBuf, 0, sizeof(float) * stHdl->biquadInBufLen);
|
470 |
+
if (pCfg->inputType == 0) {
|
471 |
+
for (idx = 0; idx < pCfg->stepSz; idx++) {
|
472 |
+
stHdl->biquadInBuf[idx * (stHdl->upSmplRate)] =
|
473 |
+
((float)shortSrcPtr[idx]) * stHdl->upSmplRate;
|
474 |
+
}
|
475 |
+
} else {
|
476 |
+
for (idx = 0; idx < pCfg->stepSz; idx++) {
|
477 |
+
stHdl->biquadInBuf[idx * (stHdl->upSmplRate)] =
|
478 |
+
floatSrcPtr[idx] * stHdl->upSmplRate;
|
479 |
+
}
|
480 |
+
}
|
481 |
+
|
482 |
+
// biquad filtering ......
|
483 |
+
memset(&bqdInData, 0, sizeof(Biquad_InputData));
|
484 |
+
memset(&bqdOutData, 0, sizeof(Biquad_OutputData));
|
485 |
+
bqdInData.samplesPtr = (const void*)(stHdl->biquadInBuf);
|
486 |
+
bqdInData.sampleType = 1;
|
487 |
+
bqdInData.nsamples = (size_t)(pCfg->stepSz * stHdl->upSmplRate);
|
488 |
+
bqdOutData.outputBuff = (void*)&(stHdl->biquadOutBuf[stHdl->biquadOutBufCnt]);
|
489 |
+
if (stHdl->biquadOutBufCnt + (pCfg->stepSz * stHdl->upSmplRate) >
|
490 |
+
stHdl->biquadOutBufLen) {
|
491 |
+
return -1;
|
492 |
+
}
|
493 |
+
if (AUP_Biquad_proc(stHdl->biquadSt, &bqdInData, &bqdOutData) < 0) {
|
494 |
+
return -1;
|
495 |
+
}
|
496 |
+
stHdl->biquadOutBufCnt += (pCfg->stepSz * stHdl->upSmplRate);
|
497 |
+
|
498 |
+
// checking the output buffer .........
|
499 |
+
nOutSamples = stHdl->biquadOutBufCnt / stHdl->downSmplRate;
|
500 |
+
if (pIn->outDataSeqLen < nOutSamples) {
|
501 |
+
return -1;
|
502 |
+
}
|
503 |
+
|
504 |
+
// prepare output data, downsampling and throwing out ......
|
505 |
+
pOut->nOutData = nOutSamples;
|
506 |
+
pOut->outDataType = pCfg->outputType;
|
507 |
+
|
508 |
+
shortTgtPtr = (short*)pOut->outDataSeq;
|
509 |
+
floatTgtPtr = (float*)pOut->outDataSeq;
|
510 |
+
jumpRate = stHdl->downSmplRate;
|
511 |
+
if (pCfg->outputType == 0) { // -> shortTgtPtr
|
512 |
+
for (idx = (jumpRate - 1), tgtIdx = 0; idx < stHdl->biquadOutBufCnt;
|
513 |
+
idx += jumpRate, tgtIdx++) {
|
514 |
+
shortTgtPtr[tgtIdx] = _FSCVRT_FLOAT2SHORT(stHdl->biquadOutBuf[idx]);
|
515 |
+
}
|
516 |
+
} else { // -> floatTgtPtr
|
517 |
+
for (idx = (jumpRate - 1), tgtIdx = 0; idx < stHdl->biquadOutBufCnt;
|
518 |
+
idx += jumpRate, tgtIdx++) {
|
519 |
+
floatTgtPtr[tgtIdx] = stHdl->biquadOutBuf[idx];
|
520 |
+
}
|
521 |
+
}
|
522 |
+
if (nOutSamples != tgtIdx) {
|
523 |
+
return -1;
|
524 |
+
}
|
525 |
+
|
526 |
+
// update the stHdl->biquadOutBuf and stHdl->biquadOutBufCnt
|
527 |
+
samplesTaken = nOutSamples * jumpRate;
|
528 |
+
samplesLeft = stHdl->biquadOutBufCnt - samplesTaken;
|
529 |
+
if (samplesLeft == 0) {
|
530 |
+
stHdl->biquadOutBufCnt = 0;
|
531 |
+
} else if (samplesLeft > 0) {
|
532 |
+
stHdl->biquadOutBufCnt = samplesLeft;
|
533 |
+
memmove(stHdl->biquadOutBuf, &(stHdl->biquadOutBuf[samplesTaken]),
|
534 |
+
sizeof(float) * samplesLeft);
|
535 |
+
} else { // samplesLeft < 0
|
536 |
+
stHdl->biquadOutBufCnt = 0;
|
537 |
+
return -1;
|
538 |
+
}
|
539 |
+
|
540 |
+
return 0;
|
541 |
+
}
|
src/fscvrt.h
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright © 2025 Agora
|
3 |
+
// This file is part of TEN Framework, an open source project.
|
4 |
+
// Licensed under the Apache License, Version 2.0, with certain conditions.
|
5 |
+
// Refer to the "LICENSE" file in the root directory for more information.
|
6 |
+
//
|
7 |
+
#ifndef __FSCVRT_H__
|
8 |
+
#define __FSCVRT_H__
|
9 |
+
|
10 |
+
#define AUP_FSCVRT_MAX_INPUT_LEN (2400)
|
11 |
+
// max. number of samples each time can be fed in
|
12 |
+
|
13 |
+
#include <stdio.h>
|
14 |
+
|
15 |
+
typedef struct FscvrtStaticCfg_ {
|
16 |
+
int inputFs; // input stream sampling freq.
|
17 |
+
int outputFs; // output stream sampling freq.
|
18 |
+
int stepSz; // number of input samples per each proc.
|
19 |
+
int inputType; // input data type, 0: short, 1: float
|
20 |
+
int outputType; // output data type, 0: short, 1: float
|
21 |
+
} FscvrtStaticCfg;
|
22 |
+
|
23 |
+
typedef struct FscvrtInData_ {
|
24 |
+
const void* inDataSeq; // [stepSz], externally provided buffer
|
25 |
+
int outDataSeqLen;
|
26 |
+
// the length of externally provided buffer outDataSeq in OutData
|
27 |
+
} FscvrtInData;
|
28 |
+
|
29 |
+
typedef struct FscvrtOutData_ {
|
30 |
+
int nOutData; // number of samples in outDataSeq
|
31 |
+
// this value may vary by +-1 from frame-to-frame
|
32 |
+
// and the user needs to check if nOutData <= outDataSeqLen
|
33 |
+
// o.w. the buffer outDataSeq is not long enough
|
34 |
+
int outDataType; // output data type, 0: short, 1: float
|
35 |
+
void* outDataSeq; // [outDataSeqLen], externally provided buffer
|
36 |
+
} FscvrtOutData;
|
37 |
+
|
38 |
+
typedef struct FscvrtGetData_ {
|
39 |
+
int maxOutputStepSz; // max. number of output samples per each proc.
|
40 |
+
int delayInInputFs; // algorithm delay in terms of samples @ input fs
|
41 |
+
} FscvrtGetData;
|
42 |
+
|
43 |
+
#ifdef __cplusplus
|
44 |
+
extern "C" {
|
45 |
+
#endif
|
46 |
+
|
47 |
+
/****************************************************************************
|
48 |
+
* AUP_Fscvrt_create(...)
|
49 |
+
*
|
50 |
+
* This function creats a state handler from nothing, which is NOT ready for
|
51 |
+
* processing
|
52 |
+
*
|
53 |
+
* Input:
|
54 |
+
*
|
55 |
+
* Output:
|
56 |
+
* - stPtr : buffer to store the returned state handler
|
57 |
+
*
|
58 |
+
* Return value : 0 - Ok
|
59 |
+
* -1 - Error
|
60 |
+
*/
|
61 |
+
int AUP_Fscvrt_create(void** stPtr);
|
62 |
+
|
63 |
+
/****************************************************************************
|
64 |
+
* AUP_Fscvrt_destroy(...)
|
65 |
+
*
|
66 |
+
* destroy biquad instance, and releasing all the dynamically allocated memory
|
67 |
+
*
|
68 |
+
* Input:
|
69 |
+
* - stPtr : buffer of State Handler, after this method, this
|
70 |
+
* handler won't be usable anymore
|
71 |
+
*
|
72 |
+
* Output:
|
73 |
+
*
|
74 |
+
* Return value : 0 - Ok
|
75 |
+
* -1 - Error
|
76 |
+
*/
|
77 |
+
int AUP_Fscvrt_destroy(void** stPtr);
|
78 |
+
|
79 |
+
/****************************************************************************
|
80 |
+
* AUP_Fscvrt_memAllocate(...)
|
81 |
+
*
|
82 |
+
* This function sets Static Config params and does memory allocation
|
83 |
+
* operation
|
84 |
+
*
|
85 |
+
* Input:
|
86 |
+
* - stPtr : State Handler which was returned by _create
|
87 |
+
* - pCfg : static configuration parameters
|
88 |
+
*
|
89 |
+
* Output:
|
90 |
+
*
|
91 |
+
* Return value : 0 - Ok
|
92 |
+
* -1 - Error
|
93 |
+
*/
|
94 |
+
int AUP_Fscvrt_memAllocate(void* stPtr, const FscvrtStaticCfg* pCfg);
|
95 |
+
|
96 |
+
/****************************************************************************
|
97 |
+
* AUP_Fscvrt_init(...)
|
98 |
+
*
|
99 |
+
* This function resets (initialize) the XXXX module and gets it prepared for
|
100 |
+
* processing
|
101 |
+
*
|
102 |
+
* Input:
|
103 |
+
* - stPtr : State Handler which has gone through create and
|
104 |
+
* memAllocate
|
105 |
+
*
|
106 |
+
* Output:
|
107 |
+
*
|
108 |
+
* Return value : 0 - Ok
|
109 |
+
* -1 - Error
|
110 |
+
*/
|
111 |
+
int AUP_Fscvrt_init(void* stPtr);
|
112 |
+
|
113 |
+
/****************************************************************************
|
114 |
+
* AUP_Fscvrt_setDynamCfg(...)
|
115 |
+
*
|
116 |
+
* This function set dynamic (per-frame variable) configuration
|
117 |
+
*
|
118 |
+
* Input:
|
119 |
+
* - stPtr : State Handler which has gone through create and
|
120 |
+
* memAllocate
|
121 |
+
* - pCfg : configuration content
|
122 |
+
*
|
123 |
+
* Output:
|
124 |
+
*
|
125 |
+
* Return value : 0 - Ok
|
126 |
+
* -1 - Error
|
127 |
+
*/
|
128 |
+
int AUP_Fscvrt_setDynamCfg(void* stPtr);
|
129 |
+
|
130 |
+
/****************************************************************************
|
131 |
+
* AUP_Fscvrt_getStaticCfg(...)
|
132 |
+
*
|
133 |
+
* This function get static configuration status from XXXXX module
|
134 |
+
*
|
135 |
+
* Input:
|
136 |
+
* - stPtr : State Handler which has gone through create and
|
137 |
+
* memAllocate
|
138 |
+
*
|
139 |
+
* Output:
|
140 |
+
* - pCfg : configuration content
|
141 |
+
*
|
142 |
+
* Return value : 0 - Ok
|
143 |
+
* -1 - Error
|
144 |
+
*/
|
145 |
+
int AUP_Fscvrt_getStaticCfg(const void* stPtr, FscvrtStaticCfg* pCfg);
|
146 |
+
|
147 |
+
/****************************************************************************
|
148 |
+
* AUP_Fscvrt_getInfor(...)
|
149 |
+
*
|
150 |
+
* This function get subsidiary information from Fs-Converter module
|
151 |
+
*
|
152 |
+
* Input:
|
153 |
+
* - stPtr : State Handler which has gone through create and
|
154 |
+
* memAllocate
|
155 |
+
*
|
156 |
+
* Output:
|
157 |
+
* - FscvrtGetData : returned information
|
158 |
+
*
|
159 |
+
* Return value : 0 - Ok
|
160 |
+
* -1 - Error
|
161 |
+
*/
|
162 |
+
int AUP_Fscvrt_getInfor(const void* stPtr, FscvrtGetData* buff);
|
163 |
+
|
164 |
+
/****************************************************************************
|
165 |
+
* AUP_Fscvrt_proc(...)
|
166 |
+
*
|
167 |
+
* process a single frame
|
168 |
+
*
|
169 |
+
* Input:
|
170 |
+
* - stPtr : State Handler which has gone through create and
|
171 |
+
* memAllocate
|
172 |
+
* - pCtrl : per-frame variable control parameters
|
173 |
+
* - pIn : input data stream
|
174 |
+
*
|
175 |
+
* Output:
|
176 |
+
* - pOut : output data (mask, highband time-domain gain etc.)
|
177 |
+
*
|
178 |
+
* Return value : 0 - Ok
|
179 |
+
* -1 - Error
|
180 |
+
*/
|
181 |
+
int AUP_Fscvrt_proc(void* stPtr, const FscvrtInData* pIn, FscvrtOutData* pOut);
|
182 |
+
|
183 |
+
#ifdef __cplusplus
|
184 |
+
}
|
185 |
+
#endif
|
186 |
+
#endif // __FSCVRT_H__
|