Ziyi Lin commited on
Commit
b50f2a2
·
1 Parent(s): a5398ec

ONNX open source and license terms update

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LICENSE +47 -0
  2. NOTICES +68 -0
  3. README.md +205 -64
  4. examples/.gitattributes +0 -1
  5. examples/CMakeLists.txt +6 -0
  6. examples/build-and-deploy-android.sh +6 -0
  7. examples/build-and-deploy-ios.sh +6 -0
  8. examples/build-and-deploy-linux.sh +6 -0
  9. examples/build-and-deploy-mac.sh +6 -0
  10. examples/build-and-deploy-windows.bat +7 -0
  11. examples/images/.gitattributes +0 -2
  12. examples/main.c +14 -6
  13. examples/plot_pr_curves.py +5 -4
  14. examples/sample_array.h +6 -0
  15. examples/test.py +4 -3
  16. examples/test_node.js +529 -0
  17. examples_onnx/CMakeLists.txt +24 -0
  18. examples_onnx/build-and-deploy-linux.sh +37 -0
  19. include/ten_vad.h +4 -3
  20. include/ten_vad.py +4 -3
  21. lib/Web/ten_vad.d.ts +111 -0
  22. lib/Web/ten_vad.js +30 -0
  23. lib/{macOS/ten_vad.framework/Versions/Current/Headers/ten_vad.h → Web/ten_vad.wasm} +2 -2
  24. lib/Windows/x64/ten_vad.lib +0 -0
  25. lib/Windows/x86/ten_vad.lib +0 -0
  26. lib/iOS/ten_vad.framework/Headers/ten_vad.h +90 -3
  27. lib/iOS/ten_vad.framework/Info.plist +0 -0
  28. lib/iOS/ten_vad.framework/Modules/module.modulemap +5 -3
  29. lib/macOS/ten_vad.framework/Headers +1 -0
  30. lib/macOS/ten_vad.framework/Headers/ten_vad.h +0 -3
  31. lib/macOS/ten_vad.framework/Resources +1 -0
  32. lib/macOS/ten_vad.framework/Resources/Info.plist +0 -3
  33. lib/macOS/ten_vad.framework/Versions/A/Headers/ten_vad.h +90 -3
  34. lib/macOS/ten_vad.framework/Versions/A/Resources/Info.plist +44 -3
  35. lib/macOS/ten_vad.framework/Versions/Current +1 -0
  36. lib/macOS/ten_vad.framework/Versions/Current/ten_vad +0 -3
  37. lib/macOS/ten_vad.framework/ten_vad +0 -3
  38. lib/macOS/ten_vad.framework/ten_vad +1 -0
  39. setup.py +6 -0
  40. src/aed.cc +993 -0
  41. src/aed.h +226 -0
  42. src/aed_st.h +132 -0
  43. src/biquad.cc +354 -0
  44. src/biquad.h +190 -0
  45. src/biquad_st.h +37 -0
  46. src/coeff.h +246 -0
  47. src/fftw.c +0 -0
  48. src/fftw.h +47 -0
  49. src/fscvrt.cc +541 -0
  50. src/fscvrt.h +186 -0
LICENSE ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Open Source License
2
+
3
+ The ten-vad is licensed pursuant to the Apache License v2.0, with the
4
+ following additional conditions. You may reproduce, prepare Derivative Works
5
+ of, publicly display, publicly perform, sublicense, distribute, or otherwise
6
+ make available (together, "Deploy") the ten-vad, for commercial or
7
+ non-commercial purposes, provided that you agree to abide by the terms below:
8
+
9
+ 1. You may not Deploy the ten-vad in a way that competes with Agora's
10
+ offerings and/or that allows others to compete with Agora's offerings,
11
+ including without limitation enabling any third party to develop or
12
+ deploy Applications.
13
+
14
+ 2. You may Deploy the ten-vad solely to create and enable deployment
15
+ of your Application(s) solely for your benefit and the benefit of your
16
+ direct End Users. If you prefer, you may include the following notice in
17
+ the documentation of your Application(s): "Powered by ten-vad".
18
+
19
+ 3. Derivative Works of the ten-vad remain subject to this Open Source
20
+ License.
21
+
22
+ 4. "End Users" shall mean the end-users of your Application(s) who access
23
+ the ten-vad solely to the extent necessary to access and use the
24
+ Application(s) you create or deploy using ten-vad.
25
+
26
+ 5. "Application(s)" shall mean your software programs designed or developed
27
+ by using the ten-vad or where deployment is enabled by the ten-vad.
28
+
29
+ Copyright © 2025 Agora
30
+
31
+ Licensed under the Apache License, Version 2.0 (the "License");
32
+ you may not use this file except in compliance with the License.
33
+ You may obtain a copy of the License at
34
+
35
+ http://www.apache.org/licenses/LICENSE-2.0
36
+
37
+ Unless required by applicable law or agreed to in writing, software
38
+ distributed under the License is distributed on an "AS IS" BASIS,
39
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
40
+ See the License for the specific language governing permissions and
41
+ limitations under the License.
42
+
43
+ =======================================================================================
44
+
45
+ Note that the project contains derived code from other open source project
46
+ with BSD-3-Clause and BSD-2-Clause license, refer to the "NOTICES"
47
+ file in the root directory for detailed information.
NOTICES ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This project includes modified code from the following third-party component:
2
+
3
+ 1. File: lpcnet_enc.c
4
+ - Source: LPCNet (https:github.com/xiph/LPCNet)
5
+ - License: BSD-2-Clause
6
+ - Copyright: 2017-2019, Mozilla
7
+ - Original License Text:
8
+ Copyright (c) 2017-2019 Mozilla
9
+
10
+ Redistribution and use in source and binary forms, with or without modification,
11
+ are permitted provided that the following conditions are met:
12
+
13
+ - Redistributions of source code must retain the above copyright notice,
14
+ this list of conditions and the following disclaimer.
15
+
16
+ - Redistributions in binary form must reproduce the above copyright notice,
17
+ this list of conditions and the following disclaimer in the documentation
18
+ and/or other materials provided with the distribution.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+
32
+ 2. Project: LPCNet
33
+ - Source: LPCNet (https:github.com/xiph/LPCNet)
34
+ - License: BSD-3-Clause
35
+ - Copyright: 2017-2018, Mozilla, 2007-2017, Jean-Marc Valin, 2005-2017, Xiph.Org Foundation, 2003-2004, Mark Borgerding
36
+ - Original License Text of LPCNet open source project:
37
+ Copyright (c) 2017-2018, Mozilla
38
+ Copyright (c) 2007-2017, Jean-Marc Valin
39
+ Copyright (c) 2005-2017, Xiph.Org Foundation
40
+ Copyright (c) 2003-2004, Mark Borgerding
41
+
42
+ Redistribution and use in source and binary forms, with or without
43
+ modification, are permitted provided that the following conditions
44
+ are met:
45
+
46
+ - Redistributions of source code must retain the above copyright
47
+ notice, this list of conditions and the following disclaimer.
48
+
49
+ - Redistributions in binary form must reproduce the above copyright
50
+ notice, this list of conditions and the following disclaimer in the
51
+ documentation and/or other materials provided with the distribution.
52
+
53
+ - Neither the name of the Xiph.Org Foundation nor the names of its
54
+ contributors may be used to endorse or promote products derived from
55
+ this software without specific prior written permission.
56
+
57
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
58
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
60
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION
61
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
62
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
63
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
64
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
65
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
66
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
67
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68
+
README.md CHANGED
@@ -1,26 +1,94 @@
1
- ---
2
- tags:
3
- - voice activity detection
4
- - speech activity detection
5
- - real time
6
- - vad
7
- - sad
8
- - speech
9
- - audio
10
- - silero vad
11
- - conversational
12
- - automatic speech recognition
13
- pipeline_tag: voice-activity-detection
14
- ---
15
- # **TEN VAD**
16
- ***A Low-Latency, Lightweight and High-Performance Streaming VAD***
17
 
 
 
 
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  ## **Introduction**
21
  **TEN VAD** is a real-time voice activity detection system designed for enterprise use, providing accurate frame-level speech activity detection. It shows superior precision compared to both WebRTC VAD and Silero VAD, which are commonly used in the industry. Additionally, TEN VAD offers lower computational complexity and reduced memory usage compared to Silero VAD. Meanwhile, the architecture's temporal efficiency enables rapid voice activity detection, significantly reducing end-to-end response and turn detection latency in conversational AI systems.
22
 
23
 
 
24
 
25
  ## **Key Features**
26
 
@@ -28,6 +96,7 @@ pipeline_tag: voice-activity-detection
28
 
29
  The precision-recall curves comparing the performance of WebRTC VAD (pitch-based), Silero VAD, and TEN VAD are shown below. The evaluation is conducted on the precisely manually annotated testset. The audio files are from librispeech, gigaspeech, DNS Challenge etc. As demonstrated, TEN VAD achieves the best performance. Additionally, cross-validation experiments conducted on large internal real-world datasets demonstrate the reproducibility of these findings. The **testset with annotated labels** is released in directory "testset" of this repository.
30
 
 
31
 
32
  <div style="text-align:">
33
  <img src="./examples/images/PR_Curves_testset.png" width="800">
@@ -39,14 +108,14 @@ Note that the default threshold of 0.5 is used to generate binary speech indicat
39
  cd ./examples
40
  python plot_pr_curves.py
41
  ```
42
-
43
 
44
  ### **2. Agent-Friendly:**
45
  As illustrated in the figure below, TEN VAD rapidly detects speech-to-non-speech transitions, whereas Silero VAD suffers from a delay of several hundred milliseconds, resulting in increased end-to-end latency in human-agent interaction systems. In addition, as demonstrated in the 6.5s-7.0s audio segment, Silero VAD fails to identify short silent durations between adjacent speech segments.
46
  <div style="text-align:">
47
  <img src="./examples/images/Agent-Friendly-image.png" width="800">
48
  </div>
49
-
50
 
51
  ### **3. Lightweight:**
52
  We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equipped with varying CPUs. TEN VAD demonstrates much lower computational complexity and smaller library size than Silero VAD.
@@ -57,6 +126,7 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
57
  <th align="center" rowspan="2" valign="middle"> CPU </th>
58
  <th align="center" colspan="2"> RTF </th>
59
  <th align="center" colspan="2"> Lib Size </th>
 
60
  </tr>
61
  <tr>
62
  <th align="center" style="white-space: nowrap;"> TEN VAD </th>
@@ -68,16 +138,16 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
68
  <th align="center" rowspan="3"> Linux </th>
69
  <td style="white-space: nowrap;"> AMD Ryzen 9 5900X 12-Core </td>
70
  <td align="center"> 0.0150 </td>
71
- <td rowspan="2" style="text-align: center; vertical-align: middle;"> / </td>
72
- <td rowspan="3" style="text-align: center; vertical-align: middle;"> 306KB </td>
73
- <td rowspan="9" style="text-align: center; vertical-align: middle;"> 2.16MB(JIT) / 2.22MB(ONNX) </td>
74
  </tr>
75
  <tr>
76
- <td > Intel(R) Xeon(R) Platinum 8253 </td>
77
  <td align="center"> 0.0136 </td>
78
  </tr>
79
  <tr>
80
- <td > Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz </td>
81
  <td align="center"> 0.0086 </td>
82
  <td align="center"> 0.0127 </td>
83
  </tr>
@@ -85,7 +155,7 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
85
  <th align="center"> Windows </th>
86
  <td> Intel i7-10710U </td>
87
  <td align="center"> 0.0150 </td>
88
- <td rowspan="6" style="text-align: center; vertical-align: middle;"> / </td>
89
  <td align="center" style="white-space: nowrap;"> 464KB(x86) / 508KB(x64) </td>
90
  </tr>
91
  <tr>
@@ -94,11 +164,17 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
94
  <td align="center"> 0.0160 </td>
95
  <td align="center"> 731KB </td>
96
  </tr>
 
 
 
 
 
 
97
  <tr>
98
  <th align="center" rowspan="2"> Android </th>
99
  <td> Galaxy J6+ (32bit, 425) </td>
100
  <td align="center"> 0.0570 </td>
101
- <td rowspan="2" style="text-align: center; vertical-align: middle;"> 373KB(v7a) / 532KB(v8a)</td>
102
  </tr>
103
  <tr>
104
  <td> Oppo A3s (450) </td>
@@ -108,33 +184,31 @@ We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equ
108
  <th align="center" rowspan="2"> iOS </th>
109
  <td> iPhone6 (A8) </td>
110
  <td align="center"> 0.0210 </td>
111
- <td rowspan="2" style="text-align: center; vertical-align: middle;"> 320KB</td>
112
  </tr>
113
  <tr>
114
  <td> iPhone8 (A11) </td>
115
  <td align="center"> 0.0050 </td>
116
  </tr>
117
  </table>
118
-
119
- <style>
120
- th, td {
121
- border: 1px solid #ddd;
122
- padding: 8px;
123
- }
124
- </style>
125
 
126
  ### **4. Multiple programming languages and platforms:**
127
- TEN VAD provides cross-platform C compatibility across five operating systems (Linux x64, Windows, macOS, Android, iOS), with Python bindings optimized for Linux x64.
 
 
128
 
129
 
130
  ### **5. Supproted sampling rate and hop size:**
131
  TEN VAD operates on 16kHz audio input with configurable hop sizes (optimized frame configurations: 160/256 samples=10/16ms). Other sampling rates must be resampled to 16kHz.
132
-
 
133
 
134
  ## **Installation**
135
  ```
136
- git clone https://huggingface.co/TEN-framework/ten-vad
137
  ```
 
138
 
139
  ## **Quick Start**
140
  The project supports five major platforms with dynamic library linking.
@@ -152,7 +226,7 @@ The project supports five major platforms with dynamic library linking.
152
  <td align="center"> libten_vad.so </td>
153
  <td align="center"> x64 </td>
154
  <td align="center"> Python, C </td>
155
- <td rowspan="5" style="text-align: center; vertical-align: middle;">ten_vad.h <br> ten_vad.py</td>
156
  <td> </td>
157
  </tr>
158
  <tr>
@@ -169,6 +243,13 @@ The project supports five major platforms with dynamic library linking.
169
  <td align="center"> C </td>
170
  <td> </td>
171
  </tr>
 
 
 
 
 
 
 
172
  <tr>
173
  <th align="center"> Android </th>
174
  <td align="center"> libten_vad.so </td>
@@ -178,13 +259,14 @@ The project supports five major platforms with dynamic library linking.
178
  </tr>
179
  <tr>
180
  <th align="center"> iOS </th>
181
- <td align="center" style="text-align: center; vertical-align: middle;"> ten_vad.framework </td>
182
- <td align="center" style="text-align: center; vertical-align: middle;"> arm64 </td>
183
  <td align="center"> C </td>
184
  <td> 1. not simulator <br> 2. not iPad </td>
185
  </tr>
186
- </table>
187
 
 
 
188
 
189
  ### **Python Usage**
190
  #### **1. Linux**
@@ -201,7 +283,7 @@ Note: You could use other versions of above packages, but we didn't test other v
201
 
202
  <br>
203
 
204
- The **lib** only depends on numpy, you have to install the dependency via requirements.txt:
205
 
206
  ```pip install -r requirements.txt```
207
 
@@ -219,6 +301,7 @@ sudo apt install libc++1
219
 
220
  <br>
221
 
 
222
  #### **Usage**
223
  Note: For usage in python, you can either use it by **git clone** or **pip**.
224
 
@@ -226,7 +309,7 @@ Note: For usage in python, you can either use it by **git clone** or **pip**.
226
 
227
  1. Clone the repository
228
  ```
229
- git clone https://huggingface.co/TEN-framework/ten-vad
230
  ```
231
 
232
  2. Enter examples directory
@@ -238,6 +321,7 @@ cd ./examples
238
  ```
239
  python test.py s0724-s0730.wav out.txt
240
  ```
 
241
 
242
  ##### **By using pip:**
243
 
@@ -252,10 +336,25 @@ pip install -U --force-reinstall -v git+https://github.com/TEN-framework/ten-vad
252
  ```
253
  from ten_vad import TenVad
254
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  ### **C Usage**
257
  #### **Build Scripts**
258
- Located in examples/ directory:
259
 
260
  - Linux: build-and-deploy-linux.sh
261
  - Windows: build-and-deploy-windows.bat
@@ -275,12 +374,13 @@ Runtime library path configuration:
275
  - Configure toolchain and architecture settings
276
 
277
  #### **Overview of Usage**
278
- - Navigate to examples/
279
  - Execute platform-specific build script
280
  - Configure dynamic library path
281
  - Run demo with sample audio s0724-s0730.wav
282
  - Processed results saved to out.txt
283
 
 
284
 
285
  The detailed usage methods of each platform are as follows <br>
286
 
@@ -296,12 +396,22 @@ sudo apt update
296
  sudo apt install libc++1
297
  ```
298
 
299
- ##### **Usage**
300
  ```
301
  1) cd ./examples
302
  2) ./build-and-deploy-linux.sh
303
  ```
304
 
 
 
 
 
 
 
 
 
 
 
305
  #### **2. Windows**
306
  ##### **Requirements**
307
  - Visual Studio (2017, 2019, 2022 verified)
@@ -316,6 +426,7 @@ sudo apt install libc++1
316
  - Visual Studio version (default: 2019)
317
  3) ./build-and-deploy-windows.bat
318
  ```
 
319
 
320
  #### **3. macOS**
321
  ##### **Requirements**
@@ -330,6 +441,7 @@ sudo apt install libc++1
330
  - Alternative: x86_64 (Intel)
331
  3) ./build-and-deploy-mac.sh
332
  ```
 
333
 
334
  #### **4. Android**
335
  ##### **Requirements**
@@ -346,6 +458,7 @@ sudo apt install libc++1
346
  - Toolchain: aarch64-linux-android-clang (default) or custom NDK toolchain
347
  4) ./build-and-deploy-android.sh
348
  ```
 
349
 
350
  #### **5. iOS**
351
  ##### **Requirements**
@@ -397,6 +510,29 @@ cd ./examples
397
 
398
  3.5. Build in Xcode and run demo on your device.
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  ## **Citations**
401
  ```
402
  @misc{TEN VAD,
@@ -409,29 +545,34 @@ cd ./examples
409
  email = {[email protected]}
410
  }
411
  ```
 
 
 
 
 
 
 
 
412
 
413
- ## Usage Guidance
414
 
415
- 1. You may not (i) host the TEN VAD or the Derivative Works on any End
416
- User devices, including but not limited to any mobile terminal devices
417
- or (ii) Deploy the TEN VAD in a way that competes with Agora's
418
- offerings and/or that allows others to compete with Agora's offerings,
419
- including without limitation enabling any third party to develop or
420
- deploy Applications.
421
 
422
- 2. You may Deploy the TEN VAD solely to create and enable deployment
423
- of your Application(s) solely for your benefit and the benefit of your
424
- direct End Users. If you prefer, you may include the following notice in
425
- the documentation of your Application(s): "Powered by TEN VAD".
426
 
427
- 3. "End Users" shall mean the end-users of your Application(s) who access
428
- the TEN VAD solely to the extent necessary to access and use the
429
- Application(s) you create or deploy using TEN VAD.
430
 
431
- 4. "Application(s)" shall mean your software programs designed or developed
432
- by using the TEN VAD or where deployment is enabled by the TEN
433
- VAD.
434
 
435
- ## Future Open Source Plan
 
 
436
 
437
- TEN-VAD is currently released as a binary. Based on community feedback and interest, we plan to progressively open source the internal components of the binary.
 
 
 
1
+ ![TEN VAD banner][ten-vad-banner]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ [![Discussion posts](https://img.shields.io/github/discussions/TEN-framework/ten-vad?labelColor=gray&color=%20%23f79009)](https://github.com/TEN-framework/ten-vad/discussions/)
4
+ [![Commits](https://img.shields.io/github/commit-activity/m/TEN-framework/ten-vad?labelColor=gray&color=pink)](https://github.com/TEN-framework/ten-vad/graphs/commit-activity)
5
+ [![Issues closed](https://img.shields.io/github/issues-search?query=repo%3ATEN-framework%2Ften-vad%20is%3Aclosed&label=issues%20closed&labelColor=gray&color=green)](https://github.com/TEN-framework/ten-vad/issues)
6
+ ![](https://img.shields.io/github/contributors/ten-framework/ten-vad?color=c4f042&labelColor=gray&style=flat-square)
7
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome!-brightgreen.svg?style=flat-square)](https://github.com/TEN-framework/ten-vad/pulls)
8
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/TEN-framework/TEN-vad)
9
 
10
+ [![GitHub watchers](https://img.shields.io/github/watchers/TEN-framework/ten-vad?style=social&label=Watch)](https://GitHub.com/TEN-framework/ten-vad/watchers/?WT.mc_id=academic-105485-koreyst)
11
+ [![GitHub forks](https://img.shields.io/github/forks/TEN-framework/ten-vad?style=social&label=Fork)](https://GitHub.com/TEN-framework/ten-vad/network/?WT.mc_id=academic-105485-koreyst)
12
+ [![GitHub stars](https://img.shields.io/github/stars/TEN-framework/ten-vad?style=social&label=Star)](https://GitHub.com/TEN-framework/ten-vad/stargazers/?WT.mc_id=academic-105485-koreyst)
13
+
14
+ <br>
15
+
16
+ *Latest News* 🔥
17
+ - [2025/06] We **finally** released and **open-sourced** the **ONNX** model and the corresponding **preprocessing code**! Now you can deploy **TEN VAD** on **any platform** and **any hardware architecture**!
18
+ - [2025/06] We are excited to announce the release of **WASM+JS** for Web WASM Support.
19
+ <br>
20
+
21
+ ## Table of Contents
22
+
23
+ - [Welcome to TEN](#welcome-to-ten)
24
+ - [TEN Hugging Face Space](#ten-hugging-face-space)
25
+ - [Introduction](#introduction)
26
+ - [Key Features](#key-features)
27
+ - [High-Performance](#1-high-performance)
28
+ - [Agent-Friendly](#2-agent-friendly)
29
+ - [Lightweight](#3-lightweight)
30
+ - [Multiple Programming Languages and Platforms](#4-multiple-programming-languages-and-platforms)
31
+ - [Supported Sampling Rate and Hop Size](#5-supproted-sampling-rate-and-hop-size)
32
+ - [Installation](#installation)
33
+ - [Quick Start](#quick-start)
34
+ - [Python Usage](#python-usage)
35
+ - [Linux](#1-linux)
36
+ - [JS Usage](#js-usage)
37
+ - [Web](#1-web)
38
+ - [C Usage](#c-usage)
39
+ - [Linux](#1-linux-1)
40
+ - [Windows](#2-windows)
41
+ - [macOS](#3-macos)
42
+ - [Android](#4-android)
43
+ - [iOS](#5-ios)
44
+ - [TEN Ecosystem](#ten-ecosystem)
45
+ - [Ask Questions](#ask-questions)
46
+ - [Citations](#citations)
47
+ - [License](#license)
48
+
49
+ <br>
50
+
51
+ ## Welcome to TEN
52
+
53
+ TEN is a collection of open-source projects for building real-time, multimodal conversational voice agents. It includes [ TEN Framework ](https://github.com/ten-framework/ten-framework), [ TEN VAD ](https://github.com/ten-framework/ten-vad), [ TEN Turn Detection ](https://github.com/ten-framework/ten-turn-detection), TEN Agent, TMAN Designer, and [ TEN Portal ](https://github.com/ten-framework/portal), all fully open-source.
54
+
55
+ <br>
56
+
57
+ | Community Channel | Purpose |
58
+ | ---------------- | ------- |
59
+ | [![Follow on X](https://img.shields.io/twitter/follow/TenFramework?logo=X&color=%20%23f5f5f5)](https://twitter.com/intent/follow?screen_name=TenFramework) | Follow TEN Framework on X for updates and announcements |
60
+ | [![Follow on LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn-TEN_Framework-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/ten-framework) | Follow TEN Framework on LinkedIn for updates and announcements |
61
+ | [![Discord TEN Community](https://dcbadge.vercel.app/api/server/VnPftUzAMJ?&style=flat&theme=light&color=lightgray)](https://discord.gg/VnPftUzAMJ) | Join our Discord community to connect with developers |
62
+ | [![Hugging Face Space](https://img.shields.io/badge/Hugging%20Face-TEN%20Framework-yellow?style=flat&logo=huggingface)](https://huggingface.co/TEN-framework) | Join our Hugging Face community to explore our spaces and models |
63
+ | [![WeChat](https://img.shields.io/badge/TEN_Framework-WeChat_Group-%2307C160?logo=wechat&labelColor=darkgreen&color=gray)](https://github.com/TEN-framework/ten-agent/discussions/170) | Join our WeChat group for Chinese community discussions |
64
+
65
+ <br>
66
+
67
+ > \[!IMPORTANT]
68
+ >
69
+ > **Star TEN Repositories** ⭐️
70
+ >
71
+ > Get instant notifications for new releases and updates. Your support helps us grow and improve TEN!
72
+
73
+ <br>
74
+
75
+ ![TEN star us gif](https://github.com/user-attachments/assets/eeebe996-8c14-4bf7-82ae-f1a1f7e30705)
76
+
77
+ <br>
78
+
79
+ ## TEN Hugging Face Space
80
+
81
+ <https://github.com/user-attachments/assets/725a8318-d679-4b17-b9e4-e3dce999b298>
82
+
83
+ You are more than welcome to [Visit TEN Hugging Face Space](https://huggingface.co/spaces/TEN-framework/ten-agent-demo) to try VAD and Turn Detection together.
84
+
85
+ <br>
86
 
87
  ## **Introduction**
88
  **TEN VAD** is a real-time voice activity detection system designed for enterprise use, providing accurate frame-level speech activity detection. It shows superior precision compared to both WebRTC VAD and Silero VAD, which are commonly used in the industry. Additionally, TEN VAD offers lower computational complexity and reduced memory usage compared to Silero VAD. Meanwhile, the architecture's temporal efficiency enables rapid voice activity detection, significantly reducing end-to-end response and turn detection latency in conversational AI systems.
89
 
90
 
91
+ <br>
92
 
93
  ## **Key Features**
94
 
 
96
 
97
  The precision-recall curves comparing the performance of WebRTC VAD (pitch-based), Silero VAD, and TEN VAD are shown below. The evaluation is conducted on the precisely manually annotated testset. The audio files are from librispeech, gigaspeech, DNS Challenge etc. As demonstrated, TEN VAD achieves the best performance. Additionally, cross-validation experiments conducted on large internal real-world datasets demonstrate the reproducibility of these findings. The **testset with annotated labels** is released in directory "testset" of this repository.
98
 
99
+ <br>
100
 
101
  <div style="text-align:">
102
  <img src="./examples/images/PR_Curves_testset.png" width="800">
 
108
  cd ./examples
109
  python plot_pr_curves.py
110
  ```
111
+ <br>
112
 
113
  ### **2. Agent-Friendly:**
114
  As illustrated in the figure below, TEN VAD rapidly detects speech-to-non-speech transitions, whereas Silero VAD suffers from a delay of several hundred milliseconds, resulting in increased end-to-end latency in human-agent interaction systems. In addition, as demonstrated in the 6.5s-7.0s audio segment, Silero VAD fails to identify short silent durations between adjacent speech segments.
115
  <div style="text-align:">
116
  <img src="./examples/images/Agent-Friendly-image.png" width="800">
117
  </div>
118
+ <br>
119
 
120
  ### **3. Lightweight:**
121
  We evaluated the RTF (Real-Time Factor) across five distinct platforms, each equipped with varying CPUs. TEN VAD demonstrates much lower computational complexity and smaller library size than Silero VAD.
 
126
  <th align="center" rowspan="2" valign="middle"> CPU </th>
127
  <th align="center" colspan="2"> RTF </th>
128
  <th align="center" colspan="2"> Lib Size </th>
129
+
130
  </tr>
131
  <tr>
132
  <th align="center" style="white-space: nowrap;"> TEN VAD </th>
 
138
  <th align="center" rowspan="3"> Linux </th>
139
  <td style="white-space: nowrap;"> AMD Ryzen 9 5900X 12-Core </td>
140
  <td align="center"> 0.0150 </td>
141
+ <td align="center" rowspan="2" valign="middle"> / </td>
142
+ <td align="center" rowspan="3" valign="middle"> 306KB </td>
143
+ <td align="center" rowspan="10" style="white-space: nowrap;" valign="middle"> 2.16MB(JIT) / 2.22MB(ONNX) </td>
144
  </tr>
145
  <tr>
146
+ <td style="white-space: nowrap;"> Intel(R) Xeon(R) Platinum 8253 </td>
147
  <td align="center"> 0.0136 </td>
148
  </tr>
149
  <tr>
150
+ <td style="white-space: nowrap;"> Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz </td>
151
  <td align="center"> 0.0086 </td>
152
  <td align="center"> 0.0127 </td>
153
  </tr>
 
155
  <th align="center"> Windows </th>
156
  <td> Intel i7-10710U </td>
157
  <td align="center"> 0.0150 </td>
158
+ <td align="center" rowspan="7" valign="middle"> / </td>
159
  <td align="center" style="white-space: nowrap;"> 464KB(x86) / 508KB(x64) </td>
160
  </tr>
161
  <tr>
 
164
  <td align="center"> 0.0160 </td>
165
  <td align="center"> 731KB </td>
166
  </tr>
167
+ <tr>
168
+ <th align="center"> Web </th>
169
+ <td> macOS(M1) </td>
170
+ <td align="center"> 0.010 </td>
171
+ <td align="center"> 277KB </td>
172
+ </tr>
173
  <tr>
174
  <th align="center" rowspan="2"> Android </th>
175
  <td> Galaxy J6+ (32bit, 425) </td>
176
  <td align="center"> 0.0570 </td>
177
+ <td align="center" rowspan="2" style="white-space: nowrap;"> 373KB(v7a) / 532KB(v8a)</td>
178
  </tr>
179
  <tr>
180
  <td> Oppo A3s (450) </td>
 
184
  <th align="center" rowspan="2"> iOS </th>
185
  <td> iPhone6 (A8) </td>
186
  <td align="center"> 0.0210 </td>
187
+ <td align="center" rowspan="2"> 320KB</td>
188
  </tr>
189
  <tr>
190
  <td> iPhone8 (A11) </td>
191
  <td align="center"> 0.0050 </td>
192
  </tr>
193
  </table>
194
+ <br>
 
 
 
 
 
 
195
 
196
  ### **4. Multiple programming languages and platforms:**
197
+ TEN VAD provides cross-platform C compatibility across five operating systems (Linux x64, Windows, macOS, Android, iOS), with Python bindings optimized for Linux x64, with wasm for Web.
198
+ <br>
199
+ <br>
200
 
201
 
202
  ### **5. Supproted sampling rate and hop size:**
203
  TEN VAD operates on 16kHz audio input with configurable hop sizes (optimized frame configurations: 160/256 samples=10/16ms). Other sampling rates must be resampled to 16kHz.
204
+ <br>
205
+ <br>
206
 
207
  ## **Installation**
208
  ```
209
+ git clone https://github.com/TEN-framework/ten-vad.git
210
  ```
211
+ <br>
212
 
213
  ## **Quick Start**
214
  The project supports five major platforms with dynamic library linking.
 
226
  <td align="center"> libten_vad.so </td>
227
  <td align="center"> x64 </td>
228
  <td align="center"> Python, C </td>
229
+ <td rowspan="6">ten_vad.h <br> ten_vad.py <br> ten_vad.js</td>
230
  <td> </td>
231
  </tr>
232
  <tr>
 
243
  <td align="center"> C </td>
244
  <td> </td>
245
  </tr>
246
+ <tr>
247
+ <th align="center"> Web </th>
248
+ <td align="center"> ten_vad.wasm </td>
249
+ <td align="center"> / </td>
250
+ <td align="center"> JS </td>
251
+ <td> </td>
252
+ </tr>
253
  <tr>
254
  <th align="center"> Android </th>
255
  <td align="center"> libten_vad.so </td>
 
259
  </tr>
260
  <tr>
261
  <th align="center"> iOS </th>
262
+ <td align="center"> ten_vad.framework </td>
263
+ <td align="center"> arm64 </td>
264
  <td align="center"> C </td>
265
  <td> 1. not simulator <br> 2. not iPad </td>
266
  </tr>
 
267
 
268
+ </table>
269
+ <br>
270
 
271
  ### **Python Usage**
272
  #### **1. Linux**
 
283
 
284
  <br>
285
 
286
+ The **lib** only depend on numpy, you have to install the dependency via requirements.txt:
287
 
288
  ```pip install -r requirements.txt```
289
 
 
301
 
302
  <br>
303
 
304
+
305
  #### **Usage**
306
  Note: For usage in python, you can either use it by **git clone** or **pip**.
307
 
 
309
 
310
  1. Clone the repository
311
  ```
312
+ git clone https://github.com/TEN-framework/ten-vad.git
313
  ```
314
 
315
  2. Enter examples directory
 
321
  ```
322
  python test.py s0724-s0730.wav out.txt
323
  ```
324
+ <br>
325
 
326
  ##### **By using pip:**
327
 
 
336
  ```
337
  from ten_vad import TenVad
338
  ```
339
+ <br>
340
+
341
+ ### **JS Usage**
342
+
343
+ #### **1. Web**
344
+ ##### **Requirements**
345
+ - Node.js (macOS v14.18.2, Linux v16.20.2 verified)
346
+ - Terminal
347
+
348
+ ##### **Usage**
349
+ ```
350
+ 1) cd ./examples
351
+ 2) node test_node.js s0724-s0730.wav out.txt
352
+ ```
353
+ <br>
354
 
355
  ### **C Usage**
356
  #### **Build Scripts**
357
+ Located in examples/ directory and examples_onnx (for **ONNX** usage on Linux):
358
 
359
  - Linux: build-and-deploy-linux.sh
360
  - Windows: build-and-deploy-windows.bat
 
374
  - Configure toolchain and architecture settings
375
 
376
  #### **Overview of Usage**
377
+ - Navigate to examples/ or examples_onx/ (for **ONNX** usage on Linux)
378
  - Execute platform-specific build script
379
  - Configure dynamic library path
380
  - Run demo with sample audio s0724-s0730.wav
381
  - Processed results saved to out.txt
382
 
383
+ <br>
384
 
385
  The detailed usage methods of each platform are as follows <br>
386
 
 
396
  sudo apt install libc++1
397
  ```
398
 
399
+ ##### **Usage (prebuilt-lib)**
400
  ```
401
  1) cd ./examples
402
  2) ./build-and-deploy-linux.sh
403
  ```
404
 
405
+ ##### **Usage (ONNX)**
406
+ You have to download the **onnxruntime** packages from the [official website](https://github.com/microsoft/onnxruntime). Note that the version of onnxruntime must be higher than or equal to 1.17.1 (e.g. onnxruntime-linux-x64-1.17.1.tgz).
407
+ ```
408
+ 1) cd examples_onnx/
409
+ 2) ./build-and-deploy-linux.sh --ort-root /absolute/path/to/your/onnxruntime/root/dir
410
+ ```
411
+ Note: If executing the onnx demo from a different directory than the one used when running build-and-deploy-linux.sh, ensure to create a symbolic link to src/onnx_model/ to prevent ONNX model file loading failures.
412
+
413
+ <br>
414
+
415
  #### **2. Windows**
416
  ##### **Requirements**
417
  - Visual Studio (2017, 2019, 2022 verified)
 
426
  - Visual Studio version (default: 2019)
427
  3) ./build-and-deploy-windows.bat
428
  ```
429
+ <br>
430
 
431
  #### **3. macOS**
432
  ##### **Requirements**
 
441
  - Alternative: x86_64 (Intel)
442
  3) ./build-and-deploy-mac.sh
443
  ```
444
+ <br>
445
 
446
  #### **4. Android**
447
  ##### **Requirements**
 
458
  - Toolchain: aarch64-linux-android-clang (default) or custom NDK toolchain
459
  4) ./build-and-deploy-android.sh
460
  ```
461
+ <br>
462
 
463
  #### **5. iOS**
464
  ##### **Requirements**
 
510
 
511
  3.5. Build in Xcode and run demo on your device.
512
 
513
+ <br>
514
+
515
+ ## TEN Ecosystem
516
+
517
+ | Project | Preview |
518
+ | ------- | ------- |
519
+ | [**🏚️ TEN Framework**][ten-framework-link]<br>TEN is an open-source framework for real-time, multimodal conversational AI.<br><br>![][ten-framework-shield] | ![][ten-framework-banner] |
520
+ | [**️🔂 TEN Turn Detection**][ten-turn-detection-link]<br>TEN is for full-duplex dialogue communication.<br><br>![][ten-turn-detection-shield] | ![][ten-turn-detection-banner] |
521
+ | [**🔉 TEN VAD**][ten-vad-link]<br>TEN VAD is a low-latency, lightweight and high-performance streaming voice activity detector (VAD).<br><br>![][ten-vad-shield] | ![][ten-vad-banner] |
522
+ | [**🎙️ TEN Agent**][ten-agent-link]<br>TEN Agent is a showcase of TEN Framewrok.<br><br> | ![][ten-agent-banner] |
523
+ | **🎨 TMAN Designer** <br>TMAN Designer is low/no code option to make a voice agent with easy to use workflow UI.<br><br> | ![][tman-designer-banner] |
524
+ | [**📒 TEN Portal**][ten-portal-link]<br>The official site of TEN framework, it has documentation and blog.<br><br>![][ten-portal-shield] | ![][ten-portal-banner] |
525
+
526
+ <br>
527
+
528
+ ## Ask Questions
529
+
530
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/TEN-framework/TEN-vad)
531
+
532
+ Most questions can be answered by using DeepWiki, it is fast, intutive to use and supports multiple languages.
533
+
534
+ <br>
535
+
536
  ## **Citations**
537
  ```
538
  @misc{TEN VAD,
 
545
  email = {[email protected]}
546
  }
547
  ```
548
+ <br>
549
+
550
+ ## License
551
+
552
+ This project is Apache 2.0 with additional conditions licensed. Refer to the "LICENSE" file in the root directory for detailed information. Note that `pitch_est.cc` contains modified code derived from [LPCNet](https://github.com/xiph/LPCNet), which is [BSD-2-Clause](https://spdx.org/licenses/BSD-2-Clause.html) and [BSD-3-Clause](https://spdx.org/licenses/BSD-3-Clause.html) licensed, refer to the NOTICES file in the root directory for detailed information.
553
+
554
+
555
+ <br>
556
 
 
557
 
558
+ [back-to-top]: https://img.shields.io/badge/-Back_to_top-gray?style=flat-square
 
 
 
 
 
559
 
560
+ [ten-framework-shield]: https://img.shields.io/github/stars/ten-framework/ten_framework?color=ffcb47&labelColor=gray&style=flat-square&logo=github
561
+ [ten-framework-banner]: https://github.com/user-attachments/assets/7c8f72d7-3993-4d01-8504-b71578a22944
562
+ [ten-framework-link]: https://github.com/ten-framework/ten_framework
 
563
 
564
+ [ten-vad-link]: https://github.com/ten-framework/ten-vad
565
+ [ten-vad-shield]: https://img.shields.io/github/stars/ten-framework/ten-vad?color=ffcb47&labelColor=gray&style=flat-square&logo=github
566
+ [ten-vad-banner]: https://github.com/user-attachments/assets/d45870e4-9453-4047-8163-08737f82863f
567
 
568
+ [ten-turn-detection-link]: https://github.com/ten-framework/ten-turn-detection
569
+ [ten-turn-detection-shield]: https://img.shields.io/github/stars/ten-framework/ten-turn-detection?color=ffcb47&labelColor=gray&style=flat-square&logo=github
570
+ [ten-turn-detection-banner]: https://github.com/user-attachments/assets/8d0ec716-5d0e-43e4-ad9a-d97b17305658
571
 
572
+ [ten-agent-link]: https://github.com/TEN-framework/ten-framework/tree/main/ai_agents
573
+ [ten-agent-banner]: https://github.com/user-attachments/assets/38de2207-939b-4702-a0aa-04491f5b5275
574
+ [tman-designer-banner]: https://github.com/user-attachments/assets/804c3543-0a47-42b7-b40b-ef32b742fb8f
575
 
576
+ [ten-portal-link]: https://github.com/ten-framework/portal
577
+ [ten-portal-shield]: https://img.shields.io/github/stars/ten-framework/portal?color=ffcb47&labelColor=gray&style=flat-square&logo=github
578
+ [ten-portal-banner]: https://github.com/user-attachments/assets/e17d8aaa-5928-45dd-ac71-814928e26a89
examples/.gitattributes DELETED
@@ -1 +0,0 @@
1
- *.wav filter=lfs diff=lfs merge=lfs -text
 
 
examples/CMakeLists.txt CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  cmake_minimum_required(VERSION 3.10)
2
  get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
3
 
 
1
+ #
2
+ # Copyright © 2025 Agora
3
+ # This file is part of TEN Framework, an open source project.
4
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ # Refer to the "LICENSE" file in the root directory for more information.
6
+ #
7
  cmake_minimum_required(VERSION 3.10)
8
  get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
9
 
examples/build-and-deploy-android.sh CHANGED
@@ -1,4 +1,10 @@
1
  #!/bin/bash
 
 
 
 
 
 
2
  set -eo pipefail
3
 
4
  # Customize the arch and toolchain
 
1
  #!/bin/bash
2
+ #
3
+ # Copyright © 2025 Agora
4
+ # This file is part of TEN Framework, an open source project.
5
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
6
+ # Refer to the "LICENSE" file in the root directory for more information.
7
+ #
8
  set -eo pipefail
9
 
10
  # Customize the arch and toolchain
examples/build-and-deploy-ios.sh CHANGED
@@ -1,4 +1,10 @@
1
  #!/usr/bin/env bash
 
 
 
 
 
 
2
  set -euo pipefail
3
 
4
  work_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
1
  #!/usr/bin/env bash
2
+ #
3
+ # Copyright © 2025 Agora
4
+ # This file is part of TEN Framework, an open source project.
5
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
6
+ # Refer to the "LICENSE" file in the root directory for more information.
7
+ #
8
  set -euo pipefail
9
 
10
  work_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
examples/build-and-deploy-linux.sh CHANGED
@@ -1,4 +1,10 @@
1
  #!/bin/bash
 
 
 
 
 
 
2
  set -euo pipefail
3
 
4
  arch=x64
 
1
  #!/bin/bash
2
+ #
3
+ # Copyright © 2025 Agora
4
+ # This file is part of TEN Framework, an open source project.
5
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
6
+ # Refer to the "LICENSE" file in the root directory for more information.
7
+ #
8
  set -euo pipefail
9
 
10
  arch=x64
examples/build-and-deploy-mac.sh CHANGED
@@ -1,4 +1,10 @@
1
  #!/bin/bash
 
 
 
 
 
 
2
  set -euo pipefail
3
 
4
  # Customize the arch
 
1
  #!/bin/bash
2
+ #
3
+ # Copyright © 2025 Agora
4
+ # This file is part of TEN Framework, an open source project.
5
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
6
+ # Refer to the "LICENSE" file in the root directory for more information.
7
+ #
8
  set -euo pipefail
9
 
10
  # Customize the arch
examples/build-and-deploy-windows.bat CHANGED
@@ -1,6 +1,13 @@
1
  @echo off
2
  setlocal
3
 
 
 
 
 
 
 
 
4
  @REM Customize the arch
5
  set arch=x64
6
  @REM set arch=x86
 
1
  @echo off
2
  setlocal
3
 
4
+ @REM
5
+ @REM Copyright © 2025 Agora
6
+ @REM This file is part of TEN Framework, an open source project.
7
+ @REM Licensed under the Apache License, Version 2.0, with certain conditions.
8
+ @REM Refer to the "LICENSE" file in the root directory for more information.
9
+ @REM
10
+
11
  @REM Customize the arch
12
  set arch=x64
13
  @REM set arch=x86
examples/images/.gitattributes DELETED
@@ -1,2 +0,0 @@
1
- *.jpg filter=lfs diff=lfs merge=lfs -text
2
- *.png filter=lfs diff=lfs merge=lfs -text
 
 
 
examples/main.c CHANGED
@@ -1,7 +1,8 @@
1
  //
 
2
  // This file is part of TEN Framework, an open source project.
3
- // Licensed under the Apache License, Version 2.0.
4
- // See the LICENSE file for more information.
5
  //
6
  #include <stdio.h>
7
  #include <stdint.h>
@@ -86,9 +87,16 @@ int vad_process(int16_t *input_buf, uint32_t frame_num,
86
  for (int i = 0; i < frame_num; ++i)
87
  {
88
  int16_t *audio_data = input_buf + i * hop_size;
89
- ten_vad_process(ten_vad_handle, audio_data, hop_size,
90
- &out_probs[i], &out_flags[i]);
91
- printf("[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]);
 
 
 
 
 
 
 
92
  }
93
  uint64_t end = get_timestamp_ms();
94
  *use_time = (float)(end - start);
@@ -295,4 +303,4 @@ int read_wav_file(FILE *fp, wav_info_t *info)
295
  // restore original file position
296
  fseek(fp, orig_pos, SEEK_SET);
297
  return 0;
298
- }
 
1
  //
2
+ // Copyright © 2025 Agora
3
  // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
  //
7
  #include <stdio.h>
8
  #include <stdint.h>
 
87
  for (int i = 0; i < frame_num; ++i)
88
  {
89
  int16_t *audio_data = input_buf + i * hop_size;
90
+ int res = ten_vad_process(ten_vad_handle, audio_data, hop_size,
91
+ &out_probs[i], &out_flags[i]);
92
+ if (res == 0)
93
+ {
94
+ printf("[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]);
95
+ }
96
+ else
97
+ {
98
+ printf("ten_vad_process failed res %d\n", res);
99
+ }
100
  }
101
  uint64_t end = get_timestamp_ms();
102
  *use_time = (float)(end - start);
 
303
  // restore original file position
304
  fseek(fp, orig_pos, SEEK_SET);
305
  return 0;
306
+ }
examples/plot_pr_curves.py CHANGED
@@ -1,7 +1,8 @@
1
  #
2
- # This file is part of TEN Framework, an open source project.
3
- # Licensed under the Apache License, Version 2.0.
4
- # See the LICENSE file for more information.
 
5
  #
6
  import os, glob, sys, torchaudio
7
  import numpy as np
@@ -114,7 +115,7 @@ if __name__ == "__main__":
114
  # Get the directory of the script
115
  script_dir = os.path.dirname(os.path.abspath(__file__))
116
 
117
- # testset dir
118
  test_dir = f"{script_dir}/../testset"
119
 
120
  # Initialization
 
1
  #
2
+ # Copyright © 2025 Agora
3
+ # This file is part of TEN Framework, an open source project.
4
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ # Refer to the "LICENSE" file in the root directory for more information.
6
  #
7
  import os, glob, sys, torchaudio
8
  import numpy as np
 
115
  # Get the directory of the script
116
  script_dir = os.path.dirname(os.path.abspath(__file__))
117
 
118
+ # TEN-VAD-TestSet dir
119
  test_dir = f"{script_dir}/../testset"
120
 
121
  # Initialization
examples/sample_array.h CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  // Used for iOS APP demo
2
  unsigned char sample_array[] = {
3
  0xe3, 0xff, 0xd4, 0xff, 0xdc, 0xff, 0xe0, 0xff, 0xf6, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xfc, 0xff,
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
  // Used for iOS APP demo
8
  unsigned char sample_array[] = {
9
  0xe3, 0xff, 0xd4, 0xff, 0xdc, 0xff, 0xe0, 0xff, 0xf6, 0xff, 0xf5, 0xff, 0xf6, 0xff, 0xfc, 0xff,
examples/test.py CHANGED
@@ -1,7 +1,8 @@
1
  #
2
- # This file is part of TEN Framework, an open source project.
3
- # Licensed under the Apache License, Version 2.0.
4
- # See the LICENSE file for more information.
 
5
  #
6
  import sys, os
7
 
 
1
  #
2
+ # Copyright © 2025 Agora
3
+ # This file is part of TEN Framework, an open source project.
4
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ # Refer to the "LICENSE" file in the root directory for more information.
6
  #
7
  import sys, os
8
 
examples/test_node.js ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+
3
+ //
4
+ // Copyright © 2025 Agora
5
+ // This file is part of TEN Framework, an open source project.
6
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
7
+ // Refer to the "LICENSE" file in the root directory for more information.
8
+ //
9
+
10
+ /**
11
+ * TEN VAD WebAssembly Node.js Test
12
+ * Simplified and clean version based on main.c
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ // Configuration
19
+ const HOP_SIZE = 256; // 16ms per frame
20
+ const VOICE_THRESHOLD = 0.5; // Voice detection threshold
21
+
22
+ // WASM module paths
23
+ const WASM_DIR = './../lib/Web';
24
+ const WASM_JS_FILE = path.join(WASM_DIR, 'ten_vad.js');
25
+ const WASM_BINARY_FILE = path.join(WASM_DIR, 'ten_vad.wasm');
26
+
27
+ // Global state
28
+ let vadModule = null;
29
+ let vadHandle = null;
30
+ let vadHandlePtr = null;
31
+
32
+ // ============================================================================
33
+ // UTILITY FUNCTIONS
34
+ // ============================================================================
35
+
36
+ function getTimestamp() {
37
+ return Date.now();
38
+ }
39
+
40
+ function addHelperFunctions() {
41
+ if (!vadModule.getValue) {
42
+ vadModule.getValue = function(ptr, type) {
43
+ switch (type) {
44
+ case 'i32': return vadModule.HEAP32[ptr >> 2];
45
+ case 'float': return vadModule.HEAPF32[ptr >> 2];
46
+ default: throw new Error(`Unsupported type: ${type}`);
47
+ }
48
+ };
49
+ }
50
+
51
+ if (!vadModule.UTF8ToString) {
52
+ vadModule.UTF8ToString = function(ptr) {
53
+ if (!ptr) return '';
54
+ let result = '';
55
+ let i = ptr;
56
+ while (vadModule.HEAPU8[i]) {
57
+ result += String.fromCharCode(vadModule.HEAPU8[i++]);
58
+ }
59
+ return result;
60
+ };
61
+ }
62
+ }
63
+
64
+ // ============================================================================
65
+ // AUDIO GENERATION
66
+ // ============================================================================
67
+
68
+ function generateTestAudio(durationMs = 5000) {
69
+ const sampleRate = 16000;
70
+ const totalSamples = Math.floor(durationMs * sampleRate / 1000);
71
+ const audioData = new Int16Array(totalSamples);
72
+
73
+ console.log(`Generating ${totalSamples} samples for ${durationMs}ms audio...`);
74
+
75
+ for (let i = 0; i < totalSamples; i++) {
76
+ const t = i / sampleRate;
77
+ let sample = 0;
78
+
79
+ if (t < 2.0) {
80
+ // Voice frequencies (440Hz + 880Hz)
81
+ sample = Math.sin(2 * Math.PI * 440 * t) * 8000 +
82
+ Math.sin(2 * Math.PI * 880 * t) * 4000;
83
+ } else if (t < 3.0) {
84
+ // Noise
85
+ sample = (Math.random() - 0.5) * 3000;
86
+ } else if (t < 4.0) {
87
+ // Mixed voice (220Hz + 660Hz)
88
+ sample = Math.sin(2 * Math.PI * 220 * t) * 6000 +
89
+ Math.sin(2 * Math.PI * 660 * t) * 3000;
90
+ } else {
91
+ // Silence with minimal noise
92
+ sample = Math.random() * 50;
93
+ }
94
+
95
+ audioData[i] = Math.max(-32768, Math.min(32767, Math.floor(sample)));
96
+ }
97
+
98
+ return audioData;
99
+ }
100
+
101
+ // ============================================================================
102
+ // VAD OPERATIONS
103
+ // ============================================================================
104
+
105
+ function getVADVersion() {
106
+ if (!vadModule) return "unknown";
107
+ try {
108
+ const versionPtr = vadModule._ten_vad_get_version();
109
+ return vadModule.UTF8ToString(versionPtr);
110
+ } catch (error) {
111
+ return "unknown";
112
+ }
113
+ }
114
+
115
+ function createVADInstance() {
116
+ try {
117
+ vadHandlePtr = vadModule._malloc(4);
118
+ const result = vadModule._ten_vad_create(vadHandlePtr, HOP_SIZE, VOICE_THRESHOLD);
119
+
120
+ if (result === 0) {
121
+ vadHandle = vadModule.getValue(vadHandlePtr, 'i32');
122
+ return true;
123
+ } else {
124
+ console.error(`VAD creation failed with code: ${result}`);
125
+ vadModule._free(vadHandlePtr);
126
+ return false;
127
+ }
128
+ } catch (error) {
129
+ console.error(`Error creating VAD instance: ${error.message}`);
130
+ return false;
131
+ }
132
+ }
133
+
134
+ function destroyVADInstance() {
135
+ if (vadHandlePtr && vadModule) {
136
+ vadModule._ten_vad_destroy(vadHandlePtr);
137
+ vadModule._free(vadHandlePtr);
138
+ vadHandlePtr = null;
139
+ vadHandle = null;
140
+ }
141
+ }
142
+
143
+ async function processAudio(inputBuf, frameNum, outProbs, outFlags) {
144
+ console.log(`VAD version: ${getVADVersion()}`);
145
+
146
+ if (!createVADInstance()) {
147
+ return -1;
148
+ }
149
+
150
+ const startTime = getTimestamp();
151
+
152
+ for (let i = 0; i < frameNum; i++) {
153
+ const frameStart = i * HOP_SIZE;
154
+ const frameData = inputBuf.slice(frameStart, frameStart + HOP_SIZE);
155
+
156
+ const audioPtr = vadModule._malloc(HOP_SIZE * 2);
157
+ const probPtr = vadModule._malloc(4);
158
+ const flagPtr = vadModule._malloc(4);
159
+
160
+ try {
161
+ vadModule.HEAP16.set(frameData, audioPtr / 2);
162
+
163
+ const result = vadModule._ten_vad_process(
164
+ vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr
165
+ );
166
+
167
+ if (result === 0) {
168
+ const probability = vadModule.getValue(probPtr, 'float');
169
+ const flag = vadModule.getValue(flagPtr, 'i32');
170
+
171
+ outProbs[i] = probability;
172
+ outFlags[i] = flag;
173
+
174
+ console.log(`[${i}] ${probability.toFixed(6)}, ${flag}`);
175
+ } else {
176
+ console.error(`Frame ${i} processing failed with code: ${result}`);
177
+ outProbs[i] = 0.0;
178
+ outFlags[i] = 0;
179
+ }
180
+ } finally {
181
+ vadModule._free(audioPtr);
182
+ vadModule._free(probPtr);
183
+ vadModule._free(flagPtr);
184
+ }
185
+ }
186
+
187
+ const endTime = getTimestamp();
188
+ const processingTime = endTime - startTime;
189
+
190
+ destroyVADInstance();
191
+ return processingTime;
192
+ }
193
+
194
+ // ============================================================================
195
+ // RESULT HANDLING
196
+ // ============================================================================
197
+
198
+ function printResults(processingTime, totalAudioTime, outFlags, frameNum) {
199
+ const rtf = processingTime / totalAudioTime;
200
+ const voiceFrames = outFlags.filter(flag => flag === 1).length;
201
+ const voicePercentage = (voiceFrames / frameNum * 100).toFixed(1);
202
+
203
+ console.log(`\n=== Processing Results ===`);
204
+ console.log(`Time: ${processingTime}ms, Audio: ${totalAudioTime.toFixed(2)}ms, RTF: ${rtf.toFixed(6)}`);
205
+ console.log(`Voice frames: ${voiceFrames}/${frameNum} (${voicePercentage}%)`);
206
+ }
207
+
208
+ function saveResults(outProbs, outFlags, frameNum, filename = 'out.txt') {
209
+ let output = '';
210
+ for (let i = 0; i < frameNum; i++) {
211
+ output += `[${i}] ${outProbs[i].toFixed(6)}, ${outFlags[i]}\n`;
212
+ }
213
+
214
+ try {
215
+ fs.writeFileSync(filename, output);
216
+ console.log(`Results saved to ${filename}`);
217
+ } catch (error) {
218
+ console.error(`Failed to save results: ${error.message}`);
219
+ }
220
+ }
221
+
222
+ // ============================================================================
223
+ // TEST FUNCTIONS
224
+ // ============================================================================
225
+
226
+ async function testWithArray() {
227
+ console.log("=== Array Test ===\n");
228
+
229
+ const inputBuf = generateTestAudio(5000);
230
+ const byteNum = inputBuf.byteLength;
231
+ const sampleNum = byteNum / 2;
232
+ const totalAudioTime = sampleNum / 16.0;
233
+ const frameNum = Math.floor(sampleNum / HOP_SIZE);
234
+
235
+ console.log(`Audio info: ${byteNum} bytes, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`);
236
+
237
+ const outProbs = new Float32Array(frameNum);
238
+ const outFlags = new Int32Array(frameNum);
239
+
240
+ const processingTime = await processAudio(inputBuf, frameNum, outProbs, outFlags);
241
+
242
+ if (processingTime > 0) {
243
+ printResults(processingTime, totalAudioTime, outFlags, frameNum);
244
+ }
245
+
246
+ return 0;
247
+ }
248
+
249
+ // WAV File parsing utilities
250
+ function parseWAVHeader(buffer) {
251
+ if (buffer.length < 44) {
252
+ throw new Error('Invalid WAV file: too small');
253
+ }
254
+
255
+ // Check RIFF header
256
+ const riffHeader = buffer.toString('ascii', 0, 4);
257
+ if (riffHeader !== 'RIFF') {
258
+ throw new Error('Invalid WAV file: missing RIFF header');
259
+ }
260
+
261
+ // Check WAVE format
262
+ const waveHeader = buffer.toString('ascii', 8, 12);
263
+ if (waveHeader !== 'WAVE') {
264
+ throw new Error('Invalid WAV file: not WAVE format');
265
+ }
266
+
267
+ let offset = 12;
268
+ let dataOffset = -1;
269
+ let dataSize = 0;
270
+ let sampleRate = 0;
271
+ let channels = 0;
272
+ let bitsPerSample = 0;
273
+
274
+ // Parse chunks
275
+ while (offset < buffer.length - 8) {
276
+ const chunkId = buffer.toString('ascii', offset, offset + 4);
277
+ const chunkSize = buffer.readUInt32LE(offset + 4);
278
+
279
+ if (chunkId === 'fmt ') {
280
+ // Format chunk
281
+ const audioFormat = buffer.readUInt16LE(offset + 8);
282
+ channels = buffer.readUInt16LE(offset + 10);
283
+ sampleRate = buffer.readUInt32LE(offset + 12);
284
+ bitsPerSample = buffer.readUInt16LE(offset + 22);
285
+
286
+ if (audioFormat !== 1) {
287
+ throw new Error('Unsupported WAV format: only PCM is supported');
288
+ }
289
+
290
+ if (bitsPerSample !== 16) {
291
+ throw new Error('Unsupported bit depth: only 16-bit is supported');
292
+ }
293
+ } else if (chunkId === 'data') {
294
+ // Data chunk
295
+ dataOffset = offset + 8;
296
+ dataSize = chunkSize;
297
+ break;
298
+ }
299
+
300
+ offset += 8 + chunkSize;
301
+ // Align to even byte boundary
302
+ if (chunkSize % 2 === 1) {
303
+ offset++;
304
+ }
305
+ }
306
+
307
+ if (dataOffset === -1) {
308
+ throw new Error('Invalid WAV file: no data chunk found');
309
+ }
310
+
311
+ return {
312
+ sampleRate,
313
+ channels,
314
+ bitsPerSample,
315
+ dataOffset,
316
+ dataSize,
317
+ totalSamples: dataSize / (bitsPerSample / 8),
318
+ samplesPerChannel: dataSize / (bitsPerSample / 8) / channels
319
+ };
320
+ }
321
+
322
+ async function testWithWAV(inputFile, outputFile) {
323
+ console.log("=== WAV File Test ===\n");
324
+
325
+ if (!fs.existsSync(inputFile)) {
326
+ console.error(`Input file not found: ${inputFile}`);
327
+ return 1;
328
+ }
329
+
330
+ try {
331
+ const buffer = fs.readFileSync(inputFile);
332
+
333
+ // Parse WAV header properly
334
+ const wavInfo = parseWAVHeader(buffer);
335
+ console.log(`WAV Format: ${wavInfo.channels} channel(s), ${wavInfo.sampleRate}Hz, ${wavInfo.bitsPerSample}-bit`);
336
+ console.log(`Total samples: ${wavInfo.totalSamples}, samples per channel: ${wavInfo.samplesPerChannel}`);
337
+
338
+ // Validate format requirements
339
+ if (wavInfo.sampleRate !== 16000) {
340
+ console.warn(`Warning: Sample rate is ${wavInfo.sampleRate}Hz, expected 16000Hz`);
341
+ }
342
+
343
+ if (wavInfo.channels !== 1) {
344
+ console.warn(`Warning: ${wavInfo.channels} channels detected, only first channel will be used`);
345
+ }
346
+
347
+ // Extract audio data
348
+ const audioBuffer = buffer.slice(wavInfo.dataOffset, wavInfo.dataOffset + wavInfo.dataSize);
349
+ const inputBuf = new Int16Array(audioBuffer.buffer.slice(audioBuffer.byteOffset));
350
+
351
+ // Calculate correct sample number (for mono audio)
352
+ const sampleNum = wavInfo.channels === 1 ?
353
+ wavInfo.samplesPerChannel :
354
+ Math.floor(wavInfo.samplesPerChannel); // Use only first channel if stereo
355
+
356
+ const totalAudioTime = sampleNum / wavInfo.sampleRate * 1000; // in milliseconds
357
+ const frameNum = Math.floor(sampleNum / HOP_SIZE);
358
+
359
+ console.log(`Audio info: ${audioBuffer.length} bytes, ${sampleNum} samples, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`);
360
+
361
+ // If stereo, extract only the first channel
362
+ let processedInput = inputBuf;
363
+ if (wavInfo.channels > 1) {
364
+ console.log(`Extracting mono from ${wavInfo.channels} channels...`);
365
+ processedInput = new Int16Array(Math.floor(inputBuf.length / wavInfo.channels));
366
+ for (let i = 0; i < processedInput.length; i++) {
367
+ processedInput[i] = inputBuf[i * wavInfo.channels]; // Take first channel
368
+ }
369
+ }
370
+
371
+ const outProbs = new Float32Array(frameNum);
372
+ const outFlags = new Int32Array(frameNum);
373
+
374
+ const processingTime = await processAudio(processedInput, frameNum, outProbs, outFlags);
375
+
376
+ if (processingTime > 0) {
377
+ printResults(processingTime, totalAudioTime, outFlags, frameNum);
378
+ saveResults(outProbs, outFlags, frameNum, outputFile);
379
+ }
380
+
381
+ return 0;
382
+ } catch (error) {
383
+ console.error(`Error processing WAV file: ${error.message}`);
384
+ return 1;
385
+ }
386
+ }
387
+
388
+ async function runBenchmark() {
389
+ console.log("=== Performance Benchmark ===\n");
390
+
391
+ if (!createVADInstance()) return;
392
+
393
+ const testData = new Int16Array(HOP_SIZE);
394
+ for (let i = 0; i < HOP_SIZE; i++) {
395
+ testData[i] = Math.sin(2 * Math.PI * 440 * i / 16000) * 8000;
396
+ }
397
+
398
+ const testCases = [100, 1000, 10000];
399
+
400
+ for (const numFrames of testCases) {
401
+ const audioPtr = vadModule._malloc(HOP_SIZE * 2);
402
+ const probPtr = vadModule._malloc(4);
403
+ const flagPtr = vadModule._malloc(4);
404
+
405
+ vadModule.HEAP16.set(testData, audioPtr / 2);
406
+
407
+ const startTime = getTimestamp();
408
+
409
+ for (let i = 0; i < numFrames; i++) {
410
+ vadModule._ten_vad_process(vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr);
411
+ }
412
+
413
+ const endTime = getTimestamp();
414
+ const totalTime = endTime - startTime;
415
+ const avgTime = totalTime / numFrames;
416
+
417
+ // Calculate RTF (Real-time Factor)
418
+ // Each frame represents 16ms of audio (HOP_SIZE=256 samples at 16kHz)
419
+ const frameAudioTime = (HOP_SIZE / 16000) * 1000; // 16ms
420
+ const totalAudioTime = numFrames * frameAudioTime;
421
+ const rtf = totalTime / totalAudioTime;
422
+
423
+ console.log(`${numFrames} frames: ${totalTime}ms total, ${avgTime.toFixed(3)}ms/frame, RTF: ${rtf.toFixed(3)}`);
424
+
425
+ vadModule._free(audioPtr);
426
+ vadModule._free(probPtr);
427
+ vadModule._free(flagPtr);
428
+ }
429
+
430
+ destroyVADInstance();
431
+ }
432
+
433
+ // ============================================================================
434
+ // MODULE INITIALIZATION
435
+ // ============================================================================
436
+
437
+ async function loadModule() {
438
+ try {
439
+ console.log("Loading WebAssembly module...");
440
+
441
+ if (!fs.existsSync(WASM_JS_FILE)) {
442
+ throw new Error(`ten_vad.js not found at ${WASM_JS_FILE}`);
443
+ }
444
+
445
+ if (!fs.existsSync(WASM_BINARY_FILE)) {
446
+ throw new Error(`ten_vad.wasm not found at ${WASM_BINARY_FILE}`);
447
+ }
448
+
449
+ // Read and modify the module file for Node.js compatibility
450
+ const wasmJsContent = fs.readFileSync(WASM_JS_FILE, 'utf8');
451
+ const modifiedContent = wasmJsContent
452
+ .replace(/import\.meta\.url/g, `"${path.resolve(WASM_JS_FILE)}"`)
453
+ .replace(/export default createVADModule;/, 'module.exports = createVADModule;');
454
+
455
+ // Write temporary file
456
+ const tempPath = './ten_vad_temp.js';
457
+ fs.writeFileSync(tempPath, modifiedContent);
458
+
459
+ // Load WASM binary
460
+ const wasmBinary = fs.readFileSync(WASM_BINARY_FILE);
461
+
462
+ // Load module
463
+ const createVADModule = require(path.resolve(tempPath));
464
+ vadModule = await createVADModule({
465
+ wasmBinary: wasmBinary,
466
+ locateFile: (filePath) => filePath.endsWith('.wasm') ? WASM_BINARY_FILE : filePath,
467
+ noInitialRun: false,
468
+ noExitRuntime: true
469
+ });
470
+
471
+ // Cleanup
472
+ fs.unlinkSync(tempPath);
473
+
474
+ // Add missing helper functions
475
+ addHelperFunctions();
476
+
477
+ console.log(`Module loaded successfully. Version: ${getVADVersion()}\n`);
478
+ return true;
479
+
480
+ } catch (error) {
481
+ console.error(`Failed to load module: ${error.message}`);
482
+ return false;
483
+ }
484
+ }
485
+
486
+ // ============================================================================
487
+ // MAIN FUNCTION
488
+ // ============================================================================
489
+
490
+ async function main() {
491
+ const args = process.argv.slice(2);
492
+
493
+ // Initialize module
494
+ if (!await loadModule()) {
495
+ process.exit(1);
496
+ }
497
+
498
+ try {
499
+ if (args.length >= 2) {
500
+ // Test with WAV file
501
+ const [inputFile, outputFile] = args;
502
+ console.log(`Input: ${inputFile}, Output: ${outputFile}\n`);
503
+ await testWithWAV(inputFile, outputFile);
504
+ } else {
505
+ // Test with generated array
506
+ await testWithArray();
507
+ }
508
+ await runBenchmark();
509
+ return 0;
510
+ } catch (error) {
511
+ console.error(`Test failed: ${error.message}`);
512
+ return 1;
513
+ }
514
+ }
515
+
516
+ // ============================================================================
517
+ // EXECUTION
518
+ // ============================================================================
519
+
520
+ if (require.main === module) {
521
+ main().then(exitCode => {
522
+ process.exit(exitCode);
523
+ }).catch(error => {
524
+ console.error(`Fatal error: ${error.message}`);
525
+ process.exit(1);
526
+ });
527
+ }
528
+
529
+ module.exports = { main, testWithArray, testWithWAV, runBenchmark };
examples_onnx/CMakeLists.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright © 2025 Agora
3
+ # This file is part of TEN Framework, an open source project.
4
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ # Refer to the "LICENSE" file in the root directory for more information.
6
+ #
7
+ cmake_minimum_required(VERSION 3.10)
8
+ get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
9
+
10
+ project(ten_vad)
11
+
12
+ set(CMAKE_BUILD_TYPE Release)
13
+ add_compile_options(-Wno-write-strings -Wno-unused-result)
14
+ include_directories(${ROOT}/src)
15
+ include_directories(${ROOT}/include)
16
+ include_directories(${ORT_ROOT}/include)
17
+ file(GLOB LIBRARY_SOURCES "${ROOT}/src/*.cc" "${ROOT}/src/*.c")
18
+ add_library(ten_vad SHARED ${LIBRARY_SOURCES})
19
+ link_directories(${ORT_ROOT}/lib)
20
+ target_link_libraries(ten_vad "${ORT_ROOT}/lib/libonnxruntime.so")
21
+
22
+ set(EXECUTABLE_SOURCES ${ROOT}/examples/main.c)
23
+ add_executable(ten_vad_demo ${EXECUTABLE_SOURCES})
24
+ target_link_libraries(ten_vad_demo ten_vad)
examples_onnx/build-and-deploy-linux.sh ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #
3
+ # Copyright © 2025 Agora
4
+ # This file is part of TEN Framework, an open source project.
5
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
6
+ # Refer to the "LICENSE" file in the root directory for more information.
7
+ #
8
+ set -euo pipefail
9
+
10
+ if [[ "$#" -lt 2 || "$1" != "--ort-path" ]]; then
11
+ echo "usage: $0 --ort-path <path_to_onnxruntime>" >&2
12
+ exit 1
13
+ fi
14
+
15
+ ORT_ROOT="$2"
16
+ shift 2
17
+
18
+ if [[ ! -d "$ORT_ROOT" || ! -d "$ORT_ROOT/lib" || ! -d "$ORT_ROOT/include" ]]; then
19
+ echo "invalid onnxruntime library path: $ORT_ROOT" >&2
20
+ exit 1
21
+ fi
22
+
23
+ arch=x64
24
+ build_dir=build-linux/$arch
25
+ rm -rf $build_dir
26
+ mkdir -p $build_dir
27
+ cd $build_dir
28
+
29
+ # Step 1: Build the demo
30
+ cmake ../../ -DORT_ROOT="$ORT_ROOT"
31
+ cmake --build . --config Release
32
+
33
+ # Step 2: Run the demo
34
+ ln -s ../../../src/onnx_model/
35
+ ./ten_vad_demo ../../../examples/s0724-s0730.wav out.txt
36
+
37
+ cd ../../
include/ten_vad.h CHANGED
@@ -1,7 +1,8 @@
1
  //
 
2
  // This file is part of TEN Framework, an open source project.
3
- // Licensed under the Apache License, Version 2.0.
4
- // See the LICENSE file for more information.
5
  //
6
  #ifndef TEN_VAD_H
7
  #define TEN_VAD_H
@@ -83,4 +84,4 @@ extern "C"
83
  }
84
  #endif
85
 
86
- #endif /* TEN_VAD_H */
 
1
  //
2
+ // Copyright © 2025 Agora
3
  // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
  //
7
  #ifndef TEN_VAD_H
8
  #define TEN_VAD_H
 
84
  }
85
  #endif
86
 
87
+ #endif /* TEN_VAD_H */
include/ten_vad.py CHANGED
@@ -1,7 +1,8 @@
1
  #
2
- # This file is part of TEN Framework, an open source project.
3
- # Licensed under the Apache License, Version 2.0.
4
- # See the LICENSE file for more information.
 
5
  #
6
  from ctypes import c_int, c_int32, c_float, c_size_t, CDLL, c_void_p, POINTER
7
  import numpy as np
 
1
  #
2
+ # Copyright © 2025 Agora
3
+ # This file is part of TEN Framework, an open source project.
4
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ # Refer to the "LICENSE" file in the root directory for more information.
6
  #
7
  from ctypes import c_int, c_int32, c_float, c_size_t, CDLL, c_void_p, POINTER
8
  import numpy as np
lib/Web/ten_vad.d.ts ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * This file is part of TEN Framework, an open source project.
3
+ * Licensed under the Apache License, Version 2.0.
4
+ * See the LICENSE file for more information.
5
+ *
6
+ * TEN VAD (Voice Activity Detection) WebAssembly Module
7
+ * TypeScript type definitions
8
+ */
9
+
10
+ export interface TenVADModule {
11
+ /**
12
+ * Create and initialize a VAD instance
13
+ * @param handlePtr Pointer to store the VAD handle
14
+ * @param hopSize Number of samples between consecutive analysis frames (e.g., 256)
15
+ * @param threshold VAD detection threshold [0.0, 1.0]
16
+ * @returns 0 on success, -1 on error
17
+ */
18
+ _ten_vad_create(handlePtr: number, hopSize: number, threshold: number): number;
19
+
20
+ /**
21
+ * Process audio frame for voice activity detection
22
+ * @param handle Valid VAD handle from ten_vad_create
23
+ * @param audioDataPtr Pointer to int16 audio samples array
24
+ * @param audioDataLength Length of audio data (should equal hopSize)
25
+ * @param outProbabilityPtr Pointer to output probability [0.0, 1.0]
26
+ * @param outFlagPtr Pointer to output flag (0: no voice, 1: voice detected)
27
+ * @returns 0 on success, -1 on error
28
+ */
29
+ _ten_vad_process(
30
+ handle: number,
31
+ audioDataPtr: number,
32
+ audioDataLength: number,
33
+ outProbabilityPtr: number,
34
+ outFlagPtr: number
35
+ ): number;
36
+
37
+ /**
38
+ * Destroy VAD instance and release resources
39
+ * @param handlePtr Pointer to the VAD handle
40
+ * @returns 0 on success, -1 on error
41
+ */
42
+ _ten_vad_destroy(handlePtr: number): number;
43
+
44
+ /**
45
+ * Get library version string
46
+ * @returns Version string pointer
47
+ */
48
+ _ten_vad_get_version(): number;
49
+
50
+ // WebAssembly Memory Management
51
+ _malloc(size: number): number;
52
+ _free(ptr: number): void;
53
+
54
+ // Memory access helpers
55
+ HEAP16: Int16Array;
56
+ HEAPF32: Float32Array;
57
+ HEAP32: Int32Array;
58
+ HEAPU8: Uint8Array;
59
+
60
+ // Value access methods
61
+ getValue(ptr: number, type: 'i8' | 'i16' | 'i32' | 'float' | 'double'): number;
62
+ setValue(ptr: number, value: number, type: 'i8' | 'i16' | 'i32' | 'float' | 'double'): void;
63
+
64
+ // String utilities
65
+ UTF8ToString(ptr: number): string;
66
+ lengthBytesUTF8(str: string): number;
67
+ stringToUTF8(str: string, outPtr: number, maxBytesToWrite: number): void;
68
+ }
69
+
70
+ /**
71
+ * High-level TypeScript wrapper for TEN VAD
72
+ */
73
+ export class TenVAD {
74
+ private module: TenVADModule;
75
+ private handle: number | null;
76
+ private hopSize: number;
77
+
78
+ constructor(module: TenVADModule, hopSize: number, threshold: number);
79
+
80
+ /**
81
+ * Process audio samples for voice activity detection
82
+ * @param audioData Int16Array of audio samples (length must equal hopSize)
83
+ * @returns Object with probability and voice detection flag
84
+ */
85
+ process(audioData: Int16Array): {
86
+ probability: number;
87
+ isVoice: boolean;
88
+ } | null;
89
+
90
+ /**
91
+ * Get library version
92
+ */
93
+ getVersion(): string;
94
+
95
+ /**
96
+ * Destroy VAD instance
97
+ */
98
+ destroy(): void;
99
+
100
+ /**
101
+ * Check if VAD instance is valid
102
+ */
103
+ isValid(): boolean;
104
+ }
105
+
106
+ /**
107
+ * Create TEN VAD WebAssembly module
108
+ */
109
+ declare function createVADModule(): Promise<TenVADModule>;
110
+
111
+ export default createVADModule;
lib/Web/ten_vad.js ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ var createVADModule = (() => {
3
+ var _scriptDir = import.meta.url;
4
+
5
+ return (
6
+ function(createVADModule) {
7
+ createVADModule = createVADModule || {};
8
+
9
+
10
+ var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
11
+ if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
12
+ var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
13
+ function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
14
+ function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
15
+ var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
16
+ X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
17
+ b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
18
+ (function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
19
+ b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
20
+ a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
21
+ a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
22
+ function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
23
+ if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
24
+
25
+
26
+ return createVADModule.ready
27
+ }
28
+ );
29
+ })();
30
+ export default createVADModule;
lib/{macOS/ten_vad.framework/Versions/Current/Headers/ten_vad.h → Web/ten_vad.wasm} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
- size 2885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ec0b9640683987e15a4e54e4ce5642b2447c6e5d82b1be889b5099c75434fc3
3
+ size 283349
lib/Windows/x64/ten_vad.lib CHANGED
Binary files a/lib/Windows/x64/ten_vad.lib and b/lib/Windows/x64/ten_vad.lib differ
 
lib/Windows/x86/ten_vad.lib CHANGED
Binary files a/lib/Windows/x86/ten_vad.lib and b/lib/Windows/x86/ten_vad.lib differ
 
lib/iOS/ten_vad.framework/Headers/ten_vad.h CHANGED
@@ -1,3 +1,90 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
- size 2885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * @file ten_vad.h
3
+ * @brief Ten Voice Activity Detection (ten_vad) C API
4
+ * Version: 1.0.0
5
+ *
6
+ * Provides functions to create, process, and destroy a VAD instance.
7
+ */
8
+ #ifndef TEN_VAD_H
9
+ #define TEN_VAD_H
10
+
11
+ #if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
12
+ #define TENVAD_API __attribute__((visibility("default")))
13
+ #elif defined(_WIN32) || defined(__CYGWIN__)
14
+ /**
15
+ * @def TENVAD_API
16
+ * @brief Export/import macro for ten_vad shared library symbols.
17
+ */
18
+ #ifdef TENVAD_EXPORTS
19
+ #define TENVAD_API __declspec(dllexport)
20
+ #else
21
+ #define TENVAD_API __declspec(dllimport)
22
+ #endif
23
+ #else
24
+ #define TENVAD_API
25
+ #endif
26
+
27
+ #include <stddef.h> /* size_t */
28
+ #include <stdint.h> /* int16_t */
29
+
30
+ #ifdef __cplusplus
31
+ extern "C"
32
+ {
33
+ #endif
34
+
35
+ /**
36
+ * @typedef ten_vad_handle
37
+ * @brief Opaque handle for ten_vad instance.
38
+ */
39
+ typedef void *ten_vad_handle_t;
40
+
41
+ /**
42
+ * @brief Create and initialize a ten_vad instance.
43
+ *
44
+ * @param[out] handle Pointer to receive the vad handle.
45
+ * @param[in] hop_size The number of samples between the start points of
46
+ * two consecutive analysis frames. (e.g., 256).
47
+ * @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
48
+ * (default: 0.5).
49
+ * @return 0 on success, or -1 error occurs.
50
+ */
51
+ TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
52
+ float threshold);
53
+
54
+ /**
55
+ * @brief Process one audio frame for voice activity detection.
56
+ * Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
57
+ *
58
+ * @param[in] handle Valid VAD handle returned by ten_vad_create().
59
+ * @param[in] audio_data Pointer to an array of int16_t samples,
60
+ * buffer length must equal the hop size specified at ten_vad_create.
61
+ * @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
62
+ * @param[out] out_probability Pointer to a float (size 1) that receives the
63
+ * voice activity probability in the range [0.0, 1.0].
64
+ * @param[out] out_flag Pointer to an int (size 1) that receives the
65
+ * detection result: 0 = no voice, 1 = voice detected.
66
+ * @return 0 on success, or -1 error occurs.
67
+ */
68
+ TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
69
+ float *out_probability, int *out_flag);
70
+
71
+ /**
72
+ * @brief Destroy a ten_vad instance and release its resources.
73
+ *
74
+ * @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
75
+ * @return 0 on success, or -1 error occurs.
76
+ */
77
+ TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
78
+
79
+ /**
80
+ * @brief Get the ten_vad library version string.
81
+ *
82
+ * @return The version string (e.g., "1.0.0").
83
+ */
84
+ TENVAD_API const char *ten_vad_get_version(void);
85
+
86
+ #ifdef __cplusplus
87
+ }
88
+ #endif
89
+
90
+ #endif /* TEN_VAD_H */
lib/iOS/ten_vad.framework/Info.plist CHANGED
Binary files a/lib/iOS/ten_vad.framework/Info.plist and b/lib/iOS/ten_vad.framework/Info.plist differ
 
lib/iOS/ten_vad.framework/Modules/module.modulemap CHANGED
@@ -1,3 +1,5 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac22f2ff0291876b7d5069f957825d01abc06a3da84c9f4385154a8e99964096
3
- size 115
 
 
 
1
+ framework module ten_vad {
2
+ umbrella header "ten_vad.h"
3
+ export *
4
+ module * { export * }
5
+ }
lib/macOS/ten_vad.framework/Headers ADDED
@@ -0,0 +1 @@
 
 
1
+ Versions/Current/Headers
lib/macOS/ten_vad.framework/Headers/ten_vad.h DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
- size 2885
 
 
 
 
lib/macOS/ten_vad.framework/Resources ADDED
@@ -0,0 +1 @@
 
 
1
+ Versions/Current/Resources
lib/macOS/ten_vad.framework/Resources/Info.plist DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aa8df4f544b3143b819d6ffd5c21574c02884bf41cb2b7a8df45c7f10f75c3a
3
- size 1216
 
 
 
 
lib/macOS/ten_vad.framework/Versions/A/Headers/ten_vad.h CHANGED
@@ -1,3 +1,90 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbf0ab2d2ee30d9c170556efb9a7200a53725053cfa7c66a0dff79e7c9351e8
3
- size 2885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * @file ten_vad.h
3
+ * @brief Ten Voice Activity Detection (ten_vad) C API
4
+ * Version: 1.0.0
5
+ *
6
+ * Provides functions to create, process, and destroy a VAD instance.
7
+ */
8
+ #ifndef TEN_VAD_H
9
+ #define TEN_VAD_H
10
+
11
+ #if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
12
+ #define TENVAD_API __attribute__((visibility("default")))
13
+ #elif defined(_WIN32) || defined(__CYGWIN__)
14
+ /**
15
+ * @def TENVAD_API
16
+ * @brief Export/import macro for ten_vad shared library symbols.
17
+ */
18
+ #ifdef TENVAD_EXPORTS
19
+ #define TENVAD_API __declspec(dllexport)
20
+ #else
21
+ #define TENVAD_API __declspec(dllimport)
22
+ #endif
23
+ #else
24
+ #define TENVAD_API
25
+ #endif
26
+
27
+ #include <stddef.h> /* size_t */
28
+ #include <stdint.h> /* int16_t */
29
+
30
+ #ifdef __cplusplus
31
+ extern "C"
32
+ {
33
+ #endif
34
+
35
+ /**
36
+ * @typedef ten_vad_handle
37
+ * @brief Opaque handle for ten_vad instance.
38
+ */
39
+ typedef void *ten_vad_handle_t;
40
+
41
+ /**
42
+ * @brief Create and initialize a ten_vad instance.
43
+ *
44
+ * @param[out] handle Pointer to receive the vad handle.
45
+ * @param[in] hop_size The number of samples between the start points of
46
+ * two consecutive analysis frames. (e.g., 256).
47
+ * @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
48
+ * (default: 0.5).
49
+ * @return 0 on success, or -1 error occurs.
50
+ */
51
+ TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
52
+ float threshold);
53
+
54
+ /**
55
+ * @brief Process one audio frame for voice activity detection.
56
+ * Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
57
+ *
58
+ * @param[in] handle Valid VAD handle returned by ten_vad_create().
59
+ * @param[in] audio_data Pointer to an array of int16_t samples,
60
+ * buffer length must equal the hop size specified at ten_vad_create.
61
+ * @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
62
+ * @param[out] out_probability Pointer to a float (size 1) that receives the
63
+ * voice activity probability in the range [0.0, 1.0].
64
+ * @param[out] out_flag Pointer to an int (size 1) that receives the
65
+ * detection result: 0 = no voice, 1 = voice detected.
66
+ * @return 0 on success, or -1 error occurs.
67
+ */
68
+ TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
69
+ float *out_probability, int *out_flag);
70
+
71
+ /**
72
+ * @brief Destroy a ten_vad instance and release its resources.
73
+ *
74
+ * @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
75
+ * @return 0 on success, or -1 error occurs.
76
+ */
77
+ TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
78
+
79
+ /**
80
+ * @brief Get the ten_vad library version string.
81
+ *
82
+ * @return The version string (e.g., "1.0.0").
83
+ */
84
+ TENVAD_API const char *ten_vad_get_version(void);
85
+
86
+ #ifdef __cplusplus
87
+ }
88
+ #endif
89
+
90
+ #endif /* TEN_VAD_H */
lib/macOS/ten_vad.framework/Versions/A/Resources/Info.plist CHANGED
@@ -1,3 +1,44 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aa8df4f544b3143b819d6ffd5c21574c02884bf41cb2b7a8df45c7f10f75c3a
3
- size 1216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>BuildMachineOSBuild</key>
6
+ <string>23D60</string>
7
+ <key>CFBundleDevelopmentRegion</key>
8
+ <string>English</string>
9
+ <key>CFBundleExecutable</key>
10
+ <string>ten_vad</string>
11
+ <key>CFBundleIdentifier</key>
12
+ <string>com.yourcompany.ten_vad</string>
13
+ <key>CFBundleInfoDictionaryVersion</key>
14
+ <string>6.0</string>
15
+ <key>CFBundlePackageType</key>
16
+ <string>FMWK</string>
17
+ <key>CFBundleSignature</key>
18
+ <string>????</string>
19
+ <key>CFBundleSupportedPlatforms</key>
20
+ <array>
21
+ <string>MacOSX</string>
22
+ </array>
23
+ <key>CSResourcesFileMapped</key>
24
+ <true/>
25
+ <key>DTCompiler</key>
26
+ <string>com.apple.compilers.llvm.clang.1_0</string>
27
+ <key>DTPlatformBuild</key>
28
+ <string></string>
29
+ <key>DTPlatformName</key>
30
+ <string>macosx</string>
31
+ <key>DTPlatformVersion</key>
32
+ <string>14.2</string>
33
+ <key>DTSDKBuild</key>
34
+ <string>23C53</string>
35
+ <key>DTSDKName</key>
36
+ <string>macosx14.2</string>
37
+ <key>DTXcode</key>
38
+ <string>1520</string>
39
+ <key>DTXcodeBuild</key>
40
+ <string>15C500b</string>
41
+ <key>LSMinimumSystemVersion</key>
42
+ <string>10.10</string>
43
+ </dict>
44
+ </plist>
lib/macOS/ten_vad.framework/Versions/Current ADDED
@@ -0,0 +1 @@
 
 
1
+ A
lib/macOS/ten_vad.framework/Versions/Current/ten_vad DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81b2de13710670bb94fef315ab50fedc903a21c04c4290c6c2ac28d8b42e715a
3
- size 744600
 
 
 
 
lib/macOS/ten_vad.framework/ten_vad DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81b2de13710670bb94fef315ab50fedc903a21c04c4290c6c2ac28d8b42e715a
3
- size 744600
 
 
 
 
lib/macOS/ten_vad.framework/ten_vad ADDED
@@ -0,0 +1 @@
 
 
1
+ Versions/Current/ten_vad
setup.py CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  from setuptools import setup
2
  import os, shutil
3
  from setuptools.command.install import install
 
1
+ #
2
+ # Copyright © 2025 Agora
3
+ # This file is part of TEN Framework, an open source project.
4
+ # Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ # Refer to the "LICENSE" file in the root directory for more information.
6
+ #
7
  from setuptools import setup
8
  import os, shutil
9
  from setuptools.command.install import install
src/aed.cc ADDED
@@ -0,0 +1,993 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #include <string.h>
8
+ #include <stdlib.h>
9
+ #include <algorithm>
10
+ #include <math.h>
11
+ #include "aed.h"
12
+ #include "aed_st.h"
13
+ #include "coeff.h"
14
+ #include "pitch_est.h"
15
+ #include "stft.h"
16
+ #include <assert.h>
17
+
18
+ #define AUP_AED_ALIGN8(o) (((o) + 7) & (~7))
19
+ #define AUP_AED_MAX(x, y) (((x) > (y)) ? (x) : (y))
20
+ #define AUP_AED_MIN(x, y) (((x) > (y)) ? (y) : (x))
21
+ #define AUP_AED_EPS (1e-20f)
22
+
23
+ /// ///////////////////////////////////////////////////////////////////////
24
+ /// Internal Utils
25
+ /// ///////////////////////////////////////////////////////////////////////
26
+
27
+ AUP_MODULE_AIVAD::AUP_MODULE_AIVAD(char* onnx_path) {
28
+ ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
29
+ OrtStatus* status =
30
+ ort_api->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "TEN-VAD", &ort_env);
31
+ if (status) {
32
+ printf("Failed to create env: %s\n", ort_api->GetErrorMessage(status));
33
+ ort_api->ReleaseStatus(status);
34
+ ort_api->ReleaseEnv(ort_env);
35
+ ort_env = NULL;
36
+ return;
37
+ }
38
+
39
+ OrtSessionOptions* session_options;
40
+ ort_api->CreateSessionOptions(&session_options);
41
+ ort_api->SetIntraOpNumThreads(session_options, 1);
42
+ status =
43
+ ort_api->CreateSession(ort_env, onnx_path, session_options, &ort_session);
44
+ ort_api->ReleaseSessionOptions(session_options);
45
+ if (status) {
46
+ printf("Failed to create ort_session: %s\n",
47
+ ort_api->GetErrorMessage(status));
48
+ ort_api->ReleaseStatus(status);
49
+ ort_api->ReleaseEnv(ort_env);
50
+ ort_env = NULL;
51
+ return;
52
+ }
53
+
54
+ ort_api->GetAllocatorWithDefaultOptions(&ort_allocator);
55
+ size_t num_inputs;
56
+ ort_api->SessionGetInputCount(ort_session, &num_inputs);
57
+ assert(num_inputs == AUP_AED_MODEL_IO_NUM);
58
+ for (size_t i = 0; i < num_inputs; i++) {
59
+ char* input_name;
60
+ ort_api->SessionGetInputName(ort_session, i, ort_allocator, &input_name);
61
+ strncpy(input_names_buf[i], input_name, sizeof(input_names_buf[i]));
62
+ input_names[i] = input_names_buf[i];
63
+ ort_api->AllocatorFree(ort_allocator, input_name);
64
+ }
65
+
66
+ size_t num_outputs;
67
+ ort_api->SessionGetOutputCount(ort_session, &num_outputs);
68
+ assert(num_outputs == AUP_AED_MODEL_IO_NUM);
69
+ for (size_t i = 0; i < num_outputs; i++) {
70
+ char* output_name;
71
+ ort_api->SessionGetOutputName(ort_session, i, ort_allocator, &output_name);
72
+ strncpy(output_names_buf[i], output_name, sizeof(output_names_buf[i]));
73
+ output_names[i] = output_names_buf[i];
74
+ ort_api->AllocatorFree(ort_allocator, output_name);
75
+ }
76
+
77
+ OrtMemoryInfo* memory_info;
78
+ status = ort_api->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault,
79
+ &memory_info);
80
+ if (status != NULL) {
81
+ printf("Failed to create memory info: %s\n",
82
+ ort_api->GetErrorMessage(status));
83
+ ort_api->ReleaseStatus(status);
84
+ ort_api->ReleaseSession(ort_session);
85
+ ort_api->ReleaseEnv(ort_env);
86
+ ort_session = NULL;
87
+ ort_env = NULL;
88
+ return;
89
+ }
90
+ int64_t input_shapes0[] = {1, AUP_AED_CONTEXT_WINDOW_LEN, AUP_AED_FEA_LEN};
91
+ int64_t input_shapes1234[] = {1, AUP_AED_MODEL_HIDDEN_DIM};
92
+ for (int i = 0; i < num_inputs; i++) {
93
+ status = ort_api->CreateTensorWithDataAsOrtValue(
94
+ memory_info, i == 0 ? input_data_buf_0 : input_data_buf_1234[i - 1],
95
+ i == 0 ? sizeof(input_data_buf_0) : sizeof(input_data_buf_1234[i - 1]),
96
+ i == 0 ? input_shapes0 : input_shapes1234,
97
+ i == 0 ? sizeof(input_shapes0) / sizeof(input_shapes0[0])
98
+ : sizeof(input_shapes1234) / sizeof(input_shapes1234[0]),
99
+ ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &ort_input_tensors[i]);
100
+ if (status != NULL) {
101
+ printf("Failed to create input tensor %d: %s\n", i,
102
+ ort_api->GetErrorMessage(status));
103
+ ort_api->ReleaseStatus(status);
104
+ ort_api->ReleaseSession(ort_session);
105
+ ort_api->ReleaseEnv(ort_env);
106
+ ort_session = NULL;
107
+ ort_env = NULL;
108
+ return;
109
+ }
110
+ }
111
+
112
+ int64_t output_shapes0[] = {1, 1, 1};
113
+ int64_t output_shapes1234[] = {1, AUP_AED_MODEL_HIDDEN_DIM};
114
+ for (int i = 0; i < num_outputs; i++) {
115
+ status = ort_api->CreateTensorAsOrtValue(
116
+ ort_allocator, i == 0 ? output_shapes0 : output_shapes1234,
117
+ i == 0 ? sizeof(output_shapes0) / sizeof(output_shapes0[0])
118
+ : sizeof(output_shapes1234) / sizeof(output_shapes1234[0]),
119
+ ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &ort_output_tensors[i]);
120
+ if (status != NULL) {
121
+ printf("Failed to create output tensor %d: %s\n", i,
122
+ ort_api->GetErrorMessage(status));
123
+ ort_api->ReleaseStatus(status);
124
+ ort_api->ReleaseSession(ort_session);
125
+ ort_api->ReleaseEnv(ort_env);
126
+ ort_session = NULL;
127
+ ort_env = NULL;
128
+ return;
129
+ }
130
+ }
131
+ inited = 1;
132
+ }
133
+
134
+ AUP_MODULE_AIVAD::~AUP_MODULE_AIVAD() {
135
+ for (int i = 0; i < AUP_AED_MODEL_IO_NUM; i++) {
136
+ if (ort_output_tensors[i]) {
137
+ ort_api->ReleaseValue(ort_output_tensors[i]);
138
+ }
139
+ }
140
+ if (ort_session) {
141
+ ort_api->ReleaseSession(ort_session);
142
+ }
143
+ if (ort_env) {
144
+ ort_api->ReleaseEnv(ort_env);
145
+ }
146
+ }
147
+
148
+ int AUP_MODULE_AIVAD::Process(float* input, float* output) {
149
+ if (!inited) {
150
+ printf("not inited!\n");
151
+ return -1;
152
+ }
153
+
154
+ memcpy(input_data_buf_0, input, sizeof(input_data_buf_0));
155
+ if (clear_hidden) {
156
+ memset(input_data_buf_1234, 0, sizeof(input_data_buf_1234));
157
+ clear_hidden = 0;
158
+ }
159
+ OrtStatus* status = ort_api->Run(
160
+ ort_session, NULL, input_names, ort_input_tensors, AUP_AED_MODEL_IO_NUM,
161
+ output_names, AUP_AED_MODEL_IO_NUM, ort_output_tensors);
162
+ float* output_data;
163
+ ort_api->GetTensorMutableData(ort_output_tensors[0], (void**)&output_data);
164
+ *output = output_data[0];
165
+ for (int i = 1; i < AUP_AED_MODEL_IO_NUM; i++) {
166
+ ort_api->GetTensorMutableData(ort_output_tensors[i], (void**)&output_data);
167
+ memcpy(input_data_buf_1234[i - 1], output_data,
168
+ sizeof(input_data_buf_1234[i - 1]));
169
+ }
170
+
171
+ return 0;
172
+ }
173
+
174
+ int AUP_MODULE_AIVAD::Reset() {
175
+ if (!inited) {
176
+ return -1;
177
+ }
178
+
179
+ clear_hidden = 1;
180
+ return 0;
181
+ }
182
+
183
+ static int AUP_Aed_checkStatCfg(Aed_StaticCfg* pCfg) {
184
+ if (pCfg == NULL) {
185
+ return -1;
186
+ }
187
+
188
+ #if AUP_AED_FEA_LEN < AUP_AED_MEL_FILTER_BANK_NUM
189
+ return -1;
190
+ #endif
191
+
192
+ if (pCfg->hopSz < 32) {
193
+ return -1;
194
+ }
195
+
196
+ if (pCfg->frqInputAvailableFlag == 1) {
197
+ if (pCfg->fftSz < 128 || pCfg->fftSz < pCfg->hopSz) {
198
+ return -1;
199
+ }
200
+ if (pCfg->anaWindowSz > pCfg->fftSz || pCfg->anaWindowSz < pCfg->hopSz) {
201
+ return -1;
202
+ }
203
+ }
204
+
205
+ return 0;
206
+ }
207
+
208
+ static int AUP_Aed_publishStaticCfg(Aed_St* stHdl) {
209
+ const Aed_StaticCfg* pStatCfg;
210
+
211
+ if (stHdl == NULL) {
212
+ return -1;
213
+ }
214
+ pStatCfg = (const Aed_StaticCfg*)(&(stHdl->stCfg));
215
+
216
+ stHdl->extFftSz = 0;
217
+ stHdl->extNBins = 0;
218
+ stHdl->extWinSz = 0;
219
+ if (pStatCfg->frqInputAvailableFlag == 1) {
220
+ stHdl->extFftSz = pStatCfg->fftSz;
221
+ stHdl->extNBins = (stHdl->extFftSz >> 1) + 1;
222
+ stHdl->extWinSz = pStatCfg->anaWindowSz;
223
+ }
224
+ stHdl->extHopSz = pStatCfg->hopSz;
225
+
226
+ stHdl->intFftSz = AUP_AED_ASSUMED_FFTSZ;
227
+ stHdl->intHopSz = AUP_AED_ASSUMED_HOPSZ;
228
+ stHdl->intWinSz = AUP_AED_ASSUMED_WINDOWSZ;
229
+ stHdl->intNBins = (stHdl->intFftSz >> 1) + 1;
230
+ stHdl->intAnalyWindowPtr = AUP_AED_STFTWindow_Hann768;
231
+
232
+ if (pStatCfg->frqInputAvailableFlag == 0 ||
233
+ stHdl->extHopSz != stHdl->intHopSz) {
234
+ // external STFT analysis framework is not supported at all
235
+ stHdl->intAnalyFlag =
236
+ 2; // internally redo analysis based on input time signal
237
+ } else if (stHdl->extFftSz == stHdl->intFftSz) {
238
+ // external STFT analysis framework completely match with internal
239
+ // requirement
240
+ stHdl->intAnalyFlag = 0; // directly use external spectrum
241
+ } else { // external spectrum need to be interpolated or extrapolated before
242
+ // AIVAD
243
+ stHdl->intAnalyFlag =
244
+ 1; // use external spectrum with interpolation / exterpolation
245
+ }
246
+ stHdl->inputTimeFIFOLen = stHdl->extHopSz + stHdl->intHopSz;
247
+
248
+ // for aiaed release2.0.0, pre-emphasis for input time-signal is needed,
249
+ // therefore, we need redo analysis based on input time signal preprocessed by
250
+ // pre-emphasis.
251
+ stHdl->intAnalyFlag =
252
+ 2; // internally redo analysis based on input time signal
253
+
254
+ stHdl->feaSz = (size_t)AUP_AED_FEA_LEN;
255
+ stHdl->melFbSz = (size_t)AUP_AED_MEL_FILTER_BANK_NUM;
256
+ stHdl->algDelay = (size_t)AUP_AED_LOOKAHEAD_NFRM;
257
+ stHdl->algCtxtSz = (size_t)AUP_AED_CONTEXT_WINDOW_LEN;
258
+ stHdl->frmRmsBufLen = AUP_AED_MAX(1, stHdl->algDelay);
259
+
260
+ return 0;
261
+ }
262
+
263
+ static int AUP_Aed_publishDynamCfg(Aed_St* stHdl) {
264
+ const Aed_DynamCfg* pDynmCfg;
265
+ PE_DynamCfg peDynmCfg;
266
+ if (stHdl == NULL) {
267
+ return -1;
268
+ }
269
+
270
+ pDynmCfg = (const Aed_DynamCfg*)(&(stHdl->dynamCfg));
271
+ stHdl->aivadResetFrmNum = pDynmCfg->resetFrameNum;
272
+ stHdl->voiceDecideThresh = pDynmCfg->extVoiceThr;
273
+
274
+ if (stHdl->pitchEstStPtr != NULL) {
275
+ peDynmCfg.voicedThr = pDynmCfg->pitchEstVoicedThr;
276
+ AUP_PE_setDynamCfg(stHdl->pitchEstStPtr, &peDynmCfg);
277
+ }
278
+
279
+ return 0;
280
+ }
281
+
282
+ static int AUP_Aed_resetVariables(Aed_St* stHdl) {
283
+ if (stHdl == NULL) {
284
+ return -1;
285
+ }
286
+
287
+ // first clear all the dynamic memory, all the dynamic variables which are
288
+ // not listed bellow are cleared to 0 by this step
289
+ memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
290
+
291
+ float* melFbCoef = stHdl->melFilterBankCoef;
292
+ size_t* melBinBuff = stHdl->melFilterBinBuff;
293
+ size_t i, j;
294
+ size_t nBins = stHdl->intNBins;
295
+ size_t melFbSz = stHdl->melFbSz;
296
+
297
+ stHdl->aedProcFrmCnt = 0;
298
+ stHdl->inputTimeFIFOIdx = 0;
299
+ stHdl->aivadResetCnt = 0;
300
+ stHdl->timeSignalPre = 0.0f;
301
+ stHdl->aivadScore =
302
+ -1.0f; // as default value, labeling as aed is not working yet
303
+ stHdl->aivadScorePre = -1.0f;
304
+
305
+ stHdl->pitchFreq = 0.0f;
306
+
307
+ // generate mel filter-bank coefficients
308
+ float low_mel = 2595.0f * log10f(1.0f + 0.0f / 700.0f);
309
+ float high_mel = 2595.0f * log10f(1.0f + 8000.0f / 700.0f);
310
+ float mel_points = 0.0f;
311
+ float hz_points = 0.0f;
312
+ size_t idx = 0;
313
+
314
+ for (i = 0; i < melFbSz + 2; i++) {
315
+ mel_points = i * (high_mel - low_mel) / ((float)melFbSz + 1.0f) + low_mel;
316
+ hz_points = 700.0f * (powf(10.0f, mel_points / 2595.0f) - 1.0f);
317
+ melBinBuff[i] =
318
+ (size_t)((stHdl->intFftSz + 1.0f) * hz_points / (float)AUP_AED_FS);
319
+ if (i > 0 && melBinBuff[i] == melBinBuff[i - 1]) {
320
+ return -1;
321
+ }
322
+ }
323
+
324
+ for (j = 0; j < melFbSz; j++) {
325
+ for (i = melBinBuff[j]; i < melBinBuff[j + 1]; i++) {
326
+ idx = j * nBins + i;
327
+ melFbCoef[idx] = (float)(i - melBinBuff[j]) /
328
+ (float)(melBinBuff[j + 1] - melBinBuff[j]);
329
+ }
330
+ for (i = melBinBuff[j + 1]; i < melBinBuff[j + 2]; i++) {
331
+ idx = j * nBins + i;
332
+ melFbCoef[idx] = (float)(melBinBuff[j + 2] - i) /
333
+ (float)(melBinBuff[j + 2] - melBinBuff[j + 1]);
334
+ }
335
+ }
336
+
337
+ if (stHdl->pitchEstStPtr != NULL) {
338
+ if (AUP_PE_init(stHdl->pitchEstStPtr) < 0) {
339
+ return -1;
340
+ }
341
+ }
342
+
343
+ if (stHdl->aivadInf != NULL) {
344
+ stHdl->aivadInf->Reset();
345
+ }
346
+
347
+ if (stHdl->timeInAnalysis != NULL) {
348
+ if (AUP_Analyzer_init(stHdl->timeInAnalysis) < 0) {
349
+ return -1;
350
+ }
351
+ }
352
+
353
+ return 0;
354
+ }
355
+
356
+ static int AUP_Aed_addOneCnter(int cnter) {
357
+ cnter++;
358
+ if (cnter >= 1000000000) {
359
+ cnter = 0; // reset every half year
360
+ }
361
+ return (cnter);
362
+ }
363
+
364
+ static void AUP_Aed_binPowerConvert(const float* src, float* tgt, int srcNBins,
365
+ int tgtNBins) {
366
+ float rate;
367
+ int srcIdx, tgtIdx;
368
+ if (srcNBins == tgtNBins) {
369
+ memcpy(tgt, src, sizeof(float) * tgtNBins);
370
+ return;
371
+ }
372
+
373
+ memset(tgt, 0, sizeof(float) * tgtNBins);
374
+
375
+ rate = (float)(srcNBins - 1) / (float)(tgtNBins - 1);
376
+ for (tgtIdx = 0; tgtIdx < tgtNBins; tgtIdx++) {
377
+ srcIdx = (int)(tgtIdx * rate);
378
+ srcIdx = AUP_AED_MIN(srcNBins - 1, AUP_AED_MAX(srcIdx, 0));
379
+ tgt[tgtIdx] = src[srcIdx];
380
+ }
381
+
382
+ return;
383
+ }
384
+
385
+ static void AUP_Aed_CalcBinPow(int nBins, const float* cmplxSpctr,
386
+ float* binPow) {
387
+ int idx, realIdx, imagIdx;
388
+
389
+ // bin-0
390
+ binPow[0] = cmplxSpctr[0] * cmplxSpctr[0];
391
+
392
+ // bin-(NBins-1)
393
+ binPow[nBins - 1] = cmplxSpctr[1] * cmplxSpctr[1];
394
+
395
+ for (idx = 1; idx < (nBins - 1); idx++) {
396
+ realIdx = idx << 1;
397
+ imagIdx = realIdx + 1;
398
+
399
+ binPow[idx] = cmplxSpctr[realIdx] * cmplxSpctr[realIdx] +
400
+ cmplxSpctr[imagIdx] * cmplxSpctr[imagIdx];
401
+ }
402
+ return;
403
+ }
404
+
405
+ static int AUP_Aed_pitch_proc(void* pitchModule, const float* timeSignal,
406
+ size_t timeLen, const float* binPow, size_t nBins,
407
+ PE_OutputData* pOut) {
408
+ PE_InputData peInData;
409
+
410
+ peInData.timeSignal = timeSignal;
411
+ peInData.hopSz = (int)timeLen;
412
+ peInData.inBinPow = binPow;
413
+ peInData.nBins = (int)nBins;
414
+ pOut->pitchFreq = 0;
415
+ pOut->voiced = -1;
416
+ return AUP_PE_proc(pitchModule, &peInData, pOut);
417
+ }
418
+
419
+ static int AUP_Aed_aivad_proc(Aed_St* stHdl, const float* inBinPow,
420
+ float* aivadScore) {
421
+ if (stHdl == NULL || inBinPow == NULL || aivadScore == NULL) {
422
+ return -1;
423
+ }
424
+
425
+ size_t i, j;
426
+ size_t nBins = stHdl->intNBins;
427
+ size_t melFbSz = stHdl->melFbSz;
428
+ size_t srcOffset;
429
+ size_t srcLen;
430
+
431
+ float* aivadInputFeatStack = stHdl->aivadInputFeatStack;
432
+ float* melFbCoef = stHdl->melFilterBankCoef;
433
+ const float* aivadFeatMean = AUP_AED_FEATURE_MEANS;
434
+ const float* aivadFeatStd = AUP_AED_FEATURE_STDS;
435
+ float* curMelFbCoefPtr = NULL;
436
+ float* curInputFeatPtr = NULL;
437
+ float perBandValue = 0.0f;
438
+ float powerNormal = 32768.0f * 32768.0f;
439
+
440
+ // update aivad feature buff.
441
+ srcOffset = stHdl->feaSz;
442
+ srcLen = (stHdl->algCtxtSz - 1) * stHdl->feaSz;
443
+ memmove(aivadInputFeatStack, aivadInputFeatStack + srcOffset,
444
+ sizeof(float) * srcLen);
445
+ curInputFeatPtr = aivadInputFeatStack + srcLen;
446
+
447
+ // cal. mel-filter-bank feature
448
+ for (i = 0; i < melFbSz; i++) {
449
+ perBandValue = 0.0f;
450
+ curMelFbCoefPtr = melFbCoef + i * nBins;
451
+ for (j = 0; j < nBins; j++) {
452
+ perBandValue += (inBinPow[j] * curMelFbCoefPtr[j]);
453
+ }
454
+ perBandValue = perBandValue / powerNormal;
455
+ perBandValue = logf(perBandValue + AUP_AED_EPS);
456
+ curInputFeatPtr[i] =
457
+ (perBandValue - aivadFeatMean[i]) / (aivadFeatStd[i] + AUP_AED_EPS);
458
+ }
459
+
460
+ // extra feat.
461
+ for (i = melFbSz; i < stHdl->feaSz; i++) {
462
+ curInputFeatPtr[i] =
463
+ (stHdl->pitchFreq - aivadFeatMean[i]) / (aivadFeatStd[i] + AUP_AED_EPS);
464
+ }
465
+
466
+ // exe. aivad
467
+ // exe. aivad
468
+ float aivadOutput;
469
+ if (stHdl->aivadInf != NULL &&
470
+ stHdl->aivadInf->Process(stHdl->aivadInputFeatStack, &aivadOutput) != 0) {
471
+ return -1;
472
+ }
473
+
474
+ (*aivadScore) = aivadOutput;
475
+
476
+ stHdl->aivadResetCnt += 1;
477
+ if (stHdl->aivadResetCnt >= stHdl->aivadResetFrmNum) {
478
+ if (stHdl->aivadInf != NULL && stHdl->aivadInf->Reset() != 0) {
479
+ }
480
+ stHdl->aivadResetCnt = 0;
481
+ }
482
+
483
+ return 0;
484
+ }
485
+
486
+ static int AUP_Aed_dynamMemPrepare(Aed_St* stHdl, void* memPtrExt,
487
+ size_t memSize) {
488
+ if (stHdl == NULL) {
489
+ return -1;
490
+ }
491
+ size_t pitchInNBins = stHdl->intNBins;
492
+ size_t totalMemSize = 0;
493
+ size_t inputTimeFIFOMemSize = 0;
494
+ size_t inputEmphTimeFIFOMemSize = 0;
495
+ size_t aivadInputCmplxSptrmMemSize = 0;
496
+ size_t aivadInputBinPowMemSize = 0;
497
+ size_t frameRmsBuffMemSize = 0;
498
+ size_t aivadInputFeatStackMemSize = 0;
499
+ size_t aimdInputFeatStackMemSize = 0;
500
+ size_t melFilterBankCoefMemSize = 0;
501
+ size_t melFilterBinBuffMemSize = 0;
502
+ size_t inputFloatBuffMemSize = 0;
503
+
504
+ // size_t vadScoreOutputBuffDelaySample = 384; // buff. delay for output
505
+ char* memPtr = NULL;
506
+
507
+ // size_t nBinsBufferMemSize = AUP_AED_ALIGN8(sizeof(float) * nBins);
508
+ // size_t spctrmMemSize = AUP_AED_ALIGN8(sizeof(float) * (nBins - 1) * 2);
509
+
510
+ inputTimeFIFOMemSize =
511
+ AUP_AED_ALIGN8(sizeof(float) * stHdl->inputTimeFIFOLen);
512
+ totalMemSize += inputTimeFIFOMemSize;
513
+
514
+ inputEmphTimeFIFOMemSize =
515
+ AUP_AED_ALIGN8(sizeof(float) * stHdl->inputTimeFIFOLen);
516
+ totalMemSize += inputEmphTimeFIFOMemSize;
517
+
518
+ aivadInputCmplxSptrmMemSize = AUP_AED_ALIGN8(sizeof(float) * stHdl->intFftSz);
519
+ totalMemSize += aivadInputCmplxSptrmMemSize;
520
+
521
+ aivadInputBinPowMemSize = AUP_AED_ALIGN8(sizeof(float) * stHdl->intNBins);
522
+ totalMemSize += aivadInputBinPowMemSize;
523
+
524
+ aivadInputFeatStackMemSize =
525
+ AUP_AED_ALIGN8(sizeof(float) * stHdl->algCtxtSz * stHdl->feaSz);
526
+ totalMemSize += aivadInputFeatStackMemSize;
527
+
528
+ aimdInputFeatStackMemSize =
529
+ AUP_AED_ALIGN8(sizeof(float) * stHdl->algCtxtSz * stHdl->feaSz);
530
+ totalMemSize += aimdInputFeatStackMemSize;
531
+
532
+ melFilterBankCoefMemSize =
533
+ AUP_AED_ALIGN8(sizeof(float) * pitchInNBins * stHdl->feaSz);
534
+ totalMemSize += melFilterBankCoefMemSize;
535
+
536
+ melFilterBinBuffMemSize = AUP_AED_ALIGN8(sizeof(size_t) * (stHdl->feaSz + 2));
537
+ totalMemSize += melFilterBinBuffMemSize;
538
+
539
+ frameRmsBuffMemSize = AUP_AED_ALIGN8(stHdl->frmRmsBufLen * sizeof(float));
540
+ totalMemSize += frameRmsBuffMemSize;
541
+
542
+ inputFloatBuffMemSize = AUP_AED_ALIGN8(stHdl->extHopSz * sizeof(float));
543
+ totalMemSize += inputFloatBuffMemSize;
544
+
545
+ if (memPtrExt == NULL) {
546
+ return ((int)totalMemSize);
547
+ }
548
+
549
+ if (totalMemSize > memSize) {
550
+ return -1;
551
+ }
552
+
553
+ memPtr = (char*)memPtrExt;
554
+
555
+ stHdl->inputTimeFIFO = (float*)memPtr;
556
+ memPtr += inputTimeFIFOMemSize;
557
+
558
+ stHdl->inputEmphTimeFIFO = (float*)memPtr;
559
+ memPtr += inputEmphTimeFIFOMemSize;
560
+
561
+ stHdl->aivadInputCmplxSptrm = (float*)memPtr;
562
+ memPtr += aivadInputCmplxSptrmMemSize;
563
+
564
+ stHdl->aivadInputBinPow = (float*)memPtr;
565
+ memPtr += aivadInputBinPowMemSize;
566
+
567
+ stHdl->aivadInputFeatStack = (float*)memPtr;
568
+ memPtr += aivadInputFeatStackMemSize;
569
+
570
+ stHdl->melFilterBankCoef = (float*)memPtr;
571
+ memPtr += melFilterBankCoefMemSize;
572
+
573
+ stHdl->melFilterBinBuff = (size_t*)memPtr;
574
+ memPtr += melFilterBinBuffMemSize;
575
+
576
+ stHdl->frameRmsBuff = (float*)memPtr;
577
+ memPtr += frameRmsBuffMemSize;
578
+
579
+ stHdl->inputFloatBuff = (float*)memPtr;
580
+ memPtr += inputFloatBuffMemSize;
581
+
582
+ if (((size_t)(memPtr - (char*)memPtrExt)) > totalMemSize) {
583
+ return -1;
584
+ }
585
+
586
+ return ((int)totalMemSize);
587
+ }
588
+
589
+ static int AUP_Aed_runOneFrm(Aed_St* stHdl, const float* tSignal, int hopSz,
590
+ const float* binPowPtr, int nBins) {
591
+ PE_OutputData peOutData = {0, 0};
592
+ float aivadScore = -1.0f;
593
+ float mediaFilterout = 0;
594
+ int mediaIdx = (int)(AUP_AED_OUTPUT_SMOOTH_FILTER_LEN) / 2;
595
+ int i;
596
+
597
+ if (AUP_Aed_pitch_proc(stHdl->pitchEstStPtr, tSignal, hopSz, binPowPtr, nBins,
598
+ &peOutData) < 0) {
599
+ return -1;
600
+ }
601
+ stHdl->pitchFreq = peOutData.pitchFreq;
602
+ if (AUP_Aed_aivad_proc(stHdl, binPowPtr, &aivadScore) < 0) {
603
+ return -1;
604
+ }
605
+ stHdl->aivadScore = aivadScore;
606
+
607
+ return 0;
608
+ }
609
+
610
+ /// ///////////////////////////////////////////////////////////////////////
611
+ /// Public API
612
+ /// ///////////////////////////////////////////////////////////////////////
613
+
614
+ int AUP_Aed_create(void** stPtr) {
615
+ if (stPtr == NULL) {
616
+ return -1;
617
+ }
618
+ Aed_St* tmpPtr = (Aed_St*)malloc(sizeof(Aed_St));
619
+ if (tmpPtr == NULL) {
620
+ return -1;
621
+ }
622
+ memset(tmpPtr, 0, sizeof(Aed_St));
623
+
624
+ if (AUP_PE_create(&(tmpPtr->pitchEstStPtr)) < 0) {
625
+ return -1;
626
+ }
627
+ if (AUP_Analyzer_create(&(tmpPtr->timeInAnalysis)) < 0) {
628
+ return -1;
629
+ }
630
+
631
+ tmpPtr->stCfg.enableFlag = 1; // as default, module enabled
632
+ tmpPtr->stCfg.fftSz = 1024;
633
+ tmpPtr->stCfg.hopSz = 256;
634
+ tmpPtr->stCfg.anaWindowSz = 768;
635
+ tmpPtr->stCfg.frqInputAvailableFlag = 0;
636
+
637
+ tmpPtr->dynamCfg.extVoiceThr = 0.5f;
638
+ tmpPtr->dynamCfg.extMusicThr = 0.5f;
639
+ tmpPtr->dynamCfg.extEnergyThr = 10.0f;
640
+ tmpPtr->dynamCfg.resetFrameNum = 1875; // TODO
641
+ tmpPtr->dynamCfg.pitchEstVoicedThr = AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR;
642
+
643
+ (*stPtr) = (void*)tmpPtr;
644
+
645
+ return 0;
646
+ }
647
+
648
+ int AUP_Aed_destroy(void** stPtr) {
649
+ if (stPtr == NULL || (*stPtr) == NULL) {
650
+ return -1;
651
+ }
652
+ Aed_St* stHdl = (Aed_St*)(*stPtr);
653
+
654
+ if (stHdl->aivadInf != NULL) {
655
+ delete stHdl->aivadInf;
656
+ }
657
+ stHdl->aivadInf = NULL;
658
+
659
+ if (AUP_PE_destroy(&(stHdl->pitchEstStPtr)) < 0) {
660
+ return -1;
661
+ }
662
+ if (AUP_Analyzer_destroy(&(stHdl->timeInAnalysis)) < 0) {
663
+ return -1;
664
+ }
665
+
666
+ if (stHdl->dynamMemPtr != NULL) {
667
+ free(stHdl->dynamMemPtr);
668
+ }
669
+ stHdl->dynamMemPtr = NULL;
670
+
671
+ if (stHdl != NULL) {
672
+ free(stHdl);
673
+ }
674
+ (*stPtr) = NULL;
675
+
676
+ return 0;
677
+ }
678
+
679
+ int AUP_Aed_memAllocate(void* stPtr, const Aed_StaticCfg* pCfg) {
680
+ Aed_St* stHdl = (Aed_St*)(stPtr);
681
+ Aed_StaticCfg aedStatCfg;
682
+ PE_StaticCfg pitchStatCfg;
683
+ Analyzer_StaticCfg analyzerStatCfg;
684
+ int totalMemSize = 0;
685
+
686
+ if (stPtr == NULL || pCfg == NULL) {
687
+ return -1;
688
+ }
689
+
690
+ // 1th: check static cfg.
691
+ memcpy(&aedStatCfg, pCfg, sizeof(Aed_StaticCfg));
692
+ if (AUP_Aed_checkStatCfg(&aedStatCfg) < 0) {
693
+ return -1;
694
+ }
695
+
696
+ memcpy(&(stHdl->stCfg), &aedStatCfg, sizeof(Aed_StaticCfg));
697
+
698
+ // 2th: publish static configuration to internal statical configuration
699
+ // registers
700
+ if (AUP_Aed_publishStaticCfg(stHdl) < 0) {
701
+ return -1;
702
+ }
703
+
704
+ // 3th: create aivad instance
705
+ if (stHdl->aivadInf == NULL) {
706
+ stHdl->aivadInf = new AUP_MODULE_AIVAD("onnx_model/ten-vad.onnx");
707
+ if (stHdl->aivadInf == NULL) {
708
+ return -1;
709
+ }
710
+ }
711
+ stHdl->aivadInf->Reset();
712
+
713
+ // 4th: memAllocate operation for Pitch-Estimator ............
714
+ if (AUP_PE_getStaticCfg(stHdl->pitchEstStPtr, &pitchStatCfg) < 0) {
715
+ return -1;
716
+ }
717
+ pitchStatCfg.fftSz = stHdl->intFftSz;
718
+ pitchStatCfg.anaWindowSz = stHdl->intWinSz;
719
+ pitchStatCfg.hopSz = stHdl->intHopSz;
720
+ pitchStatCfg.useLPCPreFiltering = AUP_AED_PITCH_EST_USE_LPC;
721
+ pitchStatCfg.procFs = AUP_AED_PITCH_EST_PROCFS;
722
+ if (AUP_PE_memAllocate(stHdl->pitchEstStPtr, &pitchStatCfg) < 0) {
723
+ return -1;
724
+ }
725
+
726
+ // creation and initialization with time-analysis module ......
727
+ AUP_Analyzer_getStaticCfg(stHdl->timeInAnalysis, &analyzerStatCfg);
728
+ analyzerStatCfg.win_len = (int)stHdl->intWinSz;
729
+ analyzerStatCfg.hop_size = (int)stHdl->intHopSz;
730
+ analyzerStatCfg.fft_size = (int)stHdl->intFftSz;
731
+ analyzerStatCfg.ana_win_coeff = stHdl->intAnalyWindowPtr;
732
+ if (AUP_Analyzer_memAllocate(stHdl->timeInAnalysis, &analyzerStatCfg) < 0) {
733
+ return -1;
734
+ }
735
+
736
+ // 5th: check memory requirement ..............................
737
+ totalMemSize = AUP_Aed_dynamMemPrepare(stHdl, NULL, 0);
738
+ if (totalMemSize < 0) {
739
+ return -1;
740
+ }
741
+
742
+ // 6th: allocate dynamic memory
743
+ if (totalMemSize > (int)stHdl->dynamMemSize) {
744
+ if (stHdl->dynamMemPtr != NULL) {
745
+ free(stHdl->dynamMemPtr);
746
+ stHdl->dynamMemPtr = NULL;
747
+ stHdl->dynamMemSize = 0;
748
+ }
749
+ stHdl->dynamMemPtr = malloc(totalMemSize);
750
+ if (stHdl->dynamMemPtr == NULL) {
751
+ return -1;
752
+ }
753
+ stHdl->dynamMemSize = totalMemSize;
754
+ }
755
+ memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
756
+
757
+ // 7th: setup the pointers/variable
758
+ if (AUP_Aed_dynamMemPrepare(stHdl, stHdl->dynamMemPtr, stHdl->dynamMemSize) <
759
+ 0) {
760
+ return -1;
761
+ }
762
+
763
+ // 8th: publish internal dynamic config registers
764
+ if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
765
+ return -1;
766
+ }
767
+
768
+ return 0;
769
+ }
770
+
771
+ int AUP_Aed_init(void* stPtr) {
772
+ Aed_St* stHdl = (Aed_St*)(stPtr);
773
+ if (stPtr == NULL) {
774
+ return -1;
775
+ }
776
+
777
+ // publish internal dynamic config registers
778
+ if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
779
+ return -1;
780
+ }
781
+
782
+ // clear/reset run-time variables
783
+ if (AUP_Aed_resetVariables(stHdl) < 0) {
784
+ return -1;
785
+ }
786
+
787
+ return 0;
788
+ }
789
+
790
+ int AUP_Aed_setDynamCfg(void* stPtr, const Aed_DynamCfg* pCfg) {
791
+ Aed_St* stHdl = (Aed_St*)(stPtr);
792
+
793
+ if (stPtr == NULL || pCfg == NULL) {
794
+ return -1;
795
+ }
796
+
797
+ memcpy(&(stHdl->dynamCfg), pCfg, sizeof(Aed_DynamCfg));
798
+
799
+ // publish internal dynamic configuration registers
800
+ if (AUP_Aed_publishDynamCfg(stHdl) < 0) {
801
+ return -1;
802
+ }
803
+
804
+ return 0;
805
+ }
806
+
807
+ int AUP_Aed_getStaticCfg(const void* stPtr, Aed_StaticCfg* pCfg) {
808
+ const Aed_St* stHdl = (const Aed_St*)(stPtr);
809
+
810
+ if (stPtr == NULL || pCfg == NULL) {
811
+ return -1;
812
+ }
813
+
814
+ memcpy(pCfg, &(stHdl->stCfg), sizeof(Aed_StaticCfg));
815
+
816
+ return 0;
817
+ }
818
+
819
+ int AUP_Aed_getDynamCfg(const void* stPtr, Aed_DynamCfg* pCfg) {
820
+ const Aed_St* stHdl = (const Aed_St*)(stPtr);
821
+
822
+ if (stPtr == NULL || pCfg == NULL) {
823
+ return -1;
824
+ }
825
+
826
+ memcpy(pCfg, &(stHdl->dynamCfg), sizeof(Aed_DynamCfg));
827
+
828
+ return 0;
829
+ }
830
+
831
+ int AUP_Aed_getAlgDelay(const void* stPtr, int* delayInFrms) {
832
+ const Aed_St* stHdl = (const Aed_St*)(stPtr);
833
+
834
+ if (stPtr == NULL || delayInFrms == NULL) {
835
+ return -1;
836
+ }
837
+
838
+ (*delayInFrms) = (int)stHdl->algDelay;
839
+
840
+ return 0;
841
+ }
842
+
843
+ int AUP_Aed_proc(void* stPtr, const Aed_InputData* pIn, Aed_OutputData* pOut) {
844
+ Analyzer_InputData analyzerInput;
845
+ Analyzer_OutputData analyzerOutput;
846
+ Aed_St* stHdl = (Aed_St*)(stPtr);
847
+
848
+ const float* binPowPtr = NULL;
849
+ float frameRms = 0.0f;
850
+ float frameEnergy = 0.0f;
851
+ float powerNormal = 32768.0f * 32768.0f;
852
+ int idx;
853
+
854
+ if (stPtr == NULL) {
855
+ return -1;
856
+ }
857
+ if (stHdl->stCfg.enableFlag == 0) { // this module is disabled
858
+ return 0;
859
+ }
860
+ if (pIn == NULL || pIn->timeSignal == NULL || pOut == NULL) {
861
+ return -1;
862
+ }
863
+
864
+ if (stHdl->intAnalyFlag != 2) { // the external spectra is going to be used
865
+ if (pIn->binPower == NULL) {
866
+ return -1;
867
+ }
868
+ if (pIn->nBins != (int)((stHdl->stCfg.fftSz >> 1) + 1) ||
869
+ pIn->hopSz != (int)(stHdl->stCfg.hopSz)) {
870
+ return -1;
871
+ }
872
+ }
873
+
874
+ // cal. input frame energy ....
875
+ for (idx = 0; idx < pIn->hopSz; idx++) {
876
+ frameRms += (pIn->timeSignal[idx] * pIn->timeSignal[idx]);
877
+ }
878
+ frameEnergy = frameRms;
879
+ frameRms = sqrtf(frameRms / (float)pIn->hopSz);
880
+ memmove(stHdl->frameRmsBuff, stHdl->frameRmsBuff + 1,
881
+ sizeof(float) * (stHdl->frmRmsBufLen - 1));
882
+ stHdl->frameRmsBuff[stHdl->frmRmsBufLen - 1] = frameRms;
883
+
884
+ // input signal conversion .........
885
+ if ((stHdl->inputTimeFIFOIdx + pIn->hopSz) > (int)stHdl->inputTimeFIFOLen) {
886
+ return -1;
887
+ }
888
+
889
+ // update pre-emphasis time signal FIFO
890
+ float* timeSigEphaPtr = stHdl->inputEmphTimeFIFO + stHdl->inputTimeFIFOIdx;
891
+ for (idx = 0; idx < pIn->hopSz; idx++) {
892
+ timeSigEphaPtr[idx] = pIn->timeSignal[idx] - 0.97f * stHdl->timeSignalPre;
893
+ stHdl->timeSignalPre = pIn->timeSignal[idx];
894
+ }
895
+
896
+ memcpy(stHdl->inputTimeFIFO + stHdl->inputTimeFIFOIdx, pIn->timeSignal,
897
+ sizeof(float) * (pIn->hopSz));
898
+ stHdl->inputTimeFIFOIdx += pIn->hopSz;
899
+
900
+ if (stHdl->intAnalyFlag == 0) { // directly use external spectra
901
+ if (stHdl->inputTimeFIFOIdx != (int)(stHdl->intHopSz) ||
902
+ (int)(stHdl->intNBins) != pIn->nBins) {
903
+ return -1;
904
+ }
905
+
906
+ // one-time processing ...
907
+ stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
908
+ binPowPtr = pIn->binPower;
909
+
910
+ // update: stHdl->pitchFreq, stHdl->aivadScore
911
+ if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
912
+ binPowPtr, (int)stHdl->intNBins) < 0) {
913
+ return -1;
914
+ }
915
+
916
+ // update the inputTimeFIFO
917
+ stHdl->inputTimeFIFOIdx = 0;
918
+ } else if (stHdl->intAnalyFlag ==
919
+ 1) { // do interpolation or extrapolation with external spectra
920
+ if (stHdl->inputTimeFIFOIdx != (int)(stHdl->intHopSz) ||
921
+ (int)(stHdl->extNBins) != pIn->nBins) {
922
+ return -1;
923
+ }
924
+
925
+ // one-time processing ....
926
+ stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
927
+ AUP_Aed_binPowerConvert(pIn->binPower, stHdl->aivadInputBinPow,
928
+ (int)stHdl->extNBins, (int)stHdl->intNBins);
929
+ binPowPtr = stHdl->aivadInputBinPow;
930
+
931
+ // update: stHdl->pitchFreq, stHdl->aivadScore
932
+ if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
933
+ binPowPtr, (int)stHdl->intNBins) < 0) {
934
+ return -1;
935
+ }
936
+
937
+ // update the inputTimeFIFO
938
+ stHdl->inputTimeFIFOIdx = 0;
939
+ } else { // we need to do STFT on the input time-signal
940
+ if (stHdl->timeInAnalysis == NULL) {
941
+ return -1;
942
+ }
943
+
944
+ // loop processing .....
945
+ while (stHdl->inputTimeFIFOIdx >= (int)stHdl->intHopSz) {
946
+ stHdl->aedProcFrmCnt = AUP_Aed_addOneCnter(stHdl->aedProcFrmCnt);
947
+
948
+ analyzerInput.input = stHdl->inputEmphTimeFIFO;
949
+ analyzerInput.iLength = (int)stHdl->intHopSz;
950
+ analyzerOutput.output = stHdl->aivadInputCmplxSptrm;
951
+ analyzerOutput.oLength = (int)stHdl->intFftSz;
952
+ if (AUP_Analyzer_proc(stHdl->timeInAnalysis, &analyzerInput,
953
+ &analyzerOutput) < 0) {
954
+ return -1;
955
+ }
956
+
957
+ AUP_Aed_CalcBinPow((int)stHdl->intNBins, stHdl->aivadInputCmplxSptrm,
958
+ stHdl->aivadInputBinPow);
959
+ binPowPtr = stHdl->aivadInputBinPow;
960
+
961
+ // update: stHdl->pitchFreq, stHdl->aivadScore
962
+ if (AUP_Aed_runOneFrm(stHdl, stHdl->inputTimeFIFO, (int)stHdl->intHopSz,
963
+ binPowPtr, (int)stHdl->intNBins) < 0) {
964
+ return -1;
965
+ }
966
+
967
+ // update the inputTimeFIFO & inputEmphTimeFIFO.....
968
+ if (stHdl->inputTimeFIFOIdx > (int)stHdl->intHopSz) {
969
+ memcpy(stHdl->inputTimeFIFO, stHdl->inputTimeFIFO + stHdl->intHopSz,
970
+ sizeof(float) * (stHdl->inputTimeFIFOIdx - stHdl->intHopSz));
971
+ memcpy(stHdl->inputEmphTimeFIFO,
972
+ stHdl->inputEmphTimeFIFO + stHdl->intHopSz,
973
+ sizeof(float) * (stHdl->inputTimeFIFOIdx - stHdl->intHopSz));
974
+ }
975
+ stHdl->inputTimeFIFOIdx -= (int)stHdl->intHopSz;
976
+ }
977
+ }
978
+
979
+ // write to output res.
980
+ pOut->frameEnergy = frameEnergy / powerNormal;
981
+ pOut->frameRms = stHdl->frameRmsBuff[0];
982
+ pOut->pitchFreq = stHdl->pitchFreq;
983
+ pOut->voiceProb = stHdl->aivadScore;
984
+ if (pOut->voiceProb < 0.0f) {
985
+ pOut->vadRes = -1;
986
+ } else if (pOut->voiceProb <= stHdl->voiceDecideThresh) {
987
+ pOut->vadRes = 0;
988
+ } else {
989
+ pOut->vadRes = 1;
990
+ }
991
+
992
+ return 0;
993
+ }
src/aed.h ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #ifndef __AED_H__
8
+ #define __AED_H__
9
+
10
+ #include <stdint.h>
11
+ #include <stdlib.h>
12
+
13
+ #define AUP_AED_MAX_FFT_SZ (1024) // the max. fft-size supported by VAD module
14
+ #define AUP_AED_MAX_NBINS ((AUP_AED_MAX_FFT_SZ >> 1) + 1)
15
+
16
+ #define AUP_AED_FS (16000) // assumed input freq.
17
+
18
+ // Configuration Parameters, which impacts dynamic memory occupation, can only
19
+ // be set during allocation
20
+ typedef struct Aed_StaticCfg_ {
21
+ int enableFlag; // flag to enable or disable this module
22
+ // 0: disable, o.w.: enable
23
+ size_t fftSz; // fft-size, only support: 128, 256, 512, 1024
24
+ size_t hopSz; // fft-Hop Size, will be used to check
25
+ size_t anaWindowSz; // fft-window Size, will be used to calc rms
26
+ int frqInputAvailableFlag; // whether Aed_InputData will contain external
27
+ // freq. power-sepctra
28
+ } Aed_StaticCfg;
29
+
30
+ // Configuraiton parameters which can be modified/set every frames
31
+ typedef struct Aed_DynamCfg_ {
32
+ float extVoiceThr; // threshold for ai based voice decision [0,1]
33
+ float extMusicThr; // threshold for ai based music decision [0,1]
34
+ float extEnergyThr; // threshold for energy based vad decision [0, ---]
35
+ size_t resetFrameNum; // frame number for aivad reset [1875, 75000]
36
+ float pitchEstVoicedThr; // threshold for pitch-estimator to output estimated
37
+ // pitch
38
+ } Aed_DynamCfg;
39
+
40
+ // Spectrum are assumed to be generated with time-domain samples in [-32768,
41
+ // 32767] with or without pre-emphasis operation
42
+ typedef struct Aed_InputData_ {
43
+ const float* binPower; // [NBins], power spectrum of 16KHz samples
44
+ int nBins;
45
+ const float*
46
+ timeSignal; // [hopSz] // this frame's input signal, in [-32768, 32767]
47
+ int hopSz; // should be equal to StaticCfg->hopSz
48
+ } Aed_InputData;
49
+
50
+ // return data from statistical ns module
51
+ typedef struct Aed_OutputData_ {
52
+ float frameEnergy; // frame energy for input normalized data
53
+ float frameRms; // rms for input int16 data
54
+ int energyVadRes; // vad res 0/1 with extEnergyThreshold based on input frame
55
+ // energy
56
+ float voiceProb; // vad score [0,1]
57
+ int vadRes; // vad res 0/1 with extVoiceThr based on ai method, t + 16ms res
58
+ // correspond to the t input
59
+ float pitchFreq; // estimated pitch freq.
60
+ } Aed_OutputData;
61
+
62
+ #ifdef __cplusplus
63
+ extern "C" {
64
+ #endif
65
+
66
+ /****************************************************************************
67
+ * AUP_Aed_Create(...)
68
+ *
69
+ * This function creats a state handler from nothing, which is NOT ready for
70
+ * processing
71
+ *
72
+ * Input:
73
+ *
74
+ * Output:
75
+ * - stPtr : buffer to store the returned state handler
76
+ *
77
+ * Return value : 0 - Ok
78
+ * -1 - Error
79
+ */
80
+ int AUP_Aed_create(void** stPtr);
81
+
82
+ /****************************************************************************
83
+ * AUP_Aed_Destroy(...)
84
+ *
85
+ * destroy VAD instance, and releasing all the dynamically allocated memory
86
+ * this interface will also release ainsFactory, which was
87
+ * created externally and passed to VAD module through memAllocate interface
88
+ *
89
+ * Input:
90
+ * - stPtr : buffer of State Handler, after this method, this
91
+ * handler won't be usable anymore
92
+ *
93
+ * Output:
94
+ *
95
+ * Return value : 0 - Ok
96
+ * -1 - Error
97
+ */
98
+ int AUP_Aed_destroy(void** stPtr);
99
+
100
+ /****************************************************************************
101
+ * AUP_Aed_MemAllocate(...)
102
+ *
103
+ * This function sets Static Config params and does memory allocation
104
+ * operation, will lose the dynamCfg values
105
+ *
106
+ * Input:
107
+ * - stPtr : State Handler which was returned by _create
108
+ * - pCfg : static configuration parameters
109
+ *
110
+ * Output:
111
+ *
112
+ * Return value : 0 - Ok
113
+ * -1 - Error
114
+ */
115
+ int AUP_Aed_memAllocate(void* stPtr, const Aed_StaticCfg* pCfg);
116
+
117
+ /****************************************************************************
118
+ * AUP_Aed_init(...)
119
+ *
120
+ * This function resets (initialize) the VAD module and gets it prepared for
121
+ * processing
122
+ *
123
+ * Input:
124
+ * - stPtr : State Handler which has gone through create and
125
+ * memAllocate
126
+ *
127
+ * Output:
128
+ *
129
+ * Return value : 0 - Ok
130
+ * -1 - Error
131
+ */
132
+ int AUP_Aed_init(void* stPtr);
133
+
134
+ /****************************************************************************
135
+ * AUP_Aed_setDynamCfg(...)
136
+ *
137
+ * This function set dynamic (per-frame variable) configuration
138
+ *
139
+ * Input:
140
+ * - stPtr : State Handler which has gone through create and
141
+ * memAllocate
142
+ * - pCfg : configuration content
143
+ *
144
+ * Output:
145
+ *
146
+ * Return value : 0 - Ok
147
+ * -1 - Error
148
+ */
149
+ int AUP_Aed_setDynamCfg(void* stPtr, const Aed_DynamCfg* pCfg);
150
+
151
+ /****************************************************************************
152
+ * AUP_Aed_getStaticCfg(...)
153
+ *
154
+ * This function get static configuration status from VAD module
155
+ *
156
+ * Input:
157
+ * - stPtr : State Handler which has gone through create and
158
+ * memAllocate
159
+ *
160
+ * Output:
161
+ * - pCfg : configuration content
162
+ *
163
+ * Return value : 0 - Ok
164
+ * -1 - Error
165
+ */
166
+ int AUP_Aed_getStaticCfg(const void* stPtr, Aed_StaticCfg* pCfg);
167
+
168
+ /****************************************************************************
169
+ * AUP_Aed_getDynamCfg(...)
170
+ *
171
+ * This function get dynamic (per-frame variable) configuration status from
172
+ * VAD module
173
+ *
174
+ * Input:
175
+ * - stPtr : State Handler which has gone through create and
176
+ * memAllocate
177
+ *
178
+ * Output:
179
+ * - pCfg : configuration content
180
+ *
181
+ * Return value : 0 - Ok
182
+ * -1 - Error
183
+ */
184
+ int AUP_Aed_getDynamCfg(const void* stPtr, Aed_DynamCfg* pCfg);
185
+
186
+ /****************************************************************************
187
+ * AUP_Aed_getAlgDelay(...)
188
+ *
189
+ * This function get algorithm delay from VAD module
190
+ *
191
+ * Input:
192
+ * - stPtr : State Handler which has gone through create and
193
+ * memAllocate
194
+ *
195
+ * Output:
196
+ * - delayInFrms : algorithm delay in terms of frames
197
+ *
198
+ * Return value : 0 - Ok
199
+ * -1 - Error
200
+ */
201
+ int AUP_Aed_getAlgDelay(const void* stPtr, int* delayInFrms);
202
+
203
+ /****************************************************************************
204
+ * AUP_Aed_proc(...)
205
+ *
206
+ * process a single frame
207
+ *
208
+ * Input:
209
+ * - stPtr : State Handler which has gone through create and
210
+ * memAllocate and reset
211
+ * - pCtrl : per-frame variable control parameters
212
+ * - pIn : input data stream
213
+ *
214
+ * Output:
215
+ * - pOut : output data (mask, highband time-domain gain etc.)
216
+ *
217
+ * Return value : 0 - Ok
218
+ * -1 - Error
219
+ */
220
+ int AUP_Aed_proc(void* stPtr, const Aed_InputData* pIn, Aed_OutputData* pOut);
221
+
222
+ #ifdef __cplusplus
223
+ }
224
+ #endif
225
+
226
+ #endif
src/aed_st.h ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #ifndef __AED_ST_H__
8
+ #define __AED_ST_H__
9
+
10
+ #include <stdio.h>
11
+ #include <onnxruntime_c_api.h>
12
+
13
+ #include "aed.h"
14
+
15
+ #define AUP_AED_FS (16000)
16
+ #define AUP_AED_MAX_IN_BUFF_SIZE (256)
17
+ #define AUP_AED_POWER_SPCTR_NORMALIZER (9.3132e-10f) // = 1/(32768^2)
18
+ #define AUP_AED_OUTPUT_SMOOTH_FILTER_LEN (10) // 160ms
19
+
20
+ #define AUP_AED_MEL_FILTER_BANK_NUM (40)
21
+ #define AUP_AED_LOOKAHEAD_NFRM (1)
22
+ #define AUP_AED_CONTEXT_WINDOW_LEN (3) // context window length of AIVAD
23
+ #define AUP_AED_FEA_LEN \
24
+ (AUP_AED_MEL_FILTER_BANK_NUM + 1) // feature length of AIVAD
25
+
26
+ #define AUP_AED_PITCH_EST_USE_LPC (1)
27
+ #define AUP_AED_PITCH_EST_PROCFS (4000)
28
+ #if AUP_AED_PITCH_EST_PROCFS == 2000
29
+ #define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.45f)
30
+ #else
31
+ #define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.4f)
32
+ #endif
33
+
34
+ #define AUP_AED_MODEL_IO_NUM (5)
35
+ #define AUP_AED_MODEL_NAME_LENGTH (32)
36
+ #define AUP_AED_MODEL_HIDDEN_DIM (64)
37
+
38
+ class AUP_MODULE_AIVAD {
39
+ public:
40
+ AUP_MODULE_AIVAD(char* onnx_path);
41
+ ~AUP_MODULE_AIVAD();
42
+ int Process(float* input, float* output);
43
+ int Reset();
44
+
45
+ private:
46
+ const OrtApi* ort_api = NULL;
47
+ OrtAllocator* ort_allocator = NULL;
48
+ OrtEnv* ort_env = NULL;
49
+ OrtSession* ort_session = NULL;
50
+ int inited = 0;
51
+ int clear_hidden = 0;
52
+
53
+ char input_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
54
+ const char* input_names[AUP_AED_MODEL_IO_NUM] = {NULL};
55
+ float input_data_buf_0[AUP_AED_CONTEXT_WINDOW_LEN * AUP_AED_FEA_LEN] = {0};
56
+ float input_data_buf_1234[AUP_AED_MODEL_IO_NUM - 1]
57
+ [AUP_AED_MODEL_HIDDEN_DIM] = {0};
58
+ OrtValue* ort_input_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
59
+
60
+ char output_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
61
+ const char* output_names[AUP_AED_MODEL_IO_NUM] = {NULL};
62
+ OrtValue* ort_output_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
63
+ };
64
+
65
+ typedef struct Aed_St_ {
66
+ void* dynamMemPtr; // memory pointer holding the dynamic memory
67
+ size_t dynamMemSize; // size of the buffer *dynamMemPtr
68
+
69
+ Aed_StaticCfg stCfg;
70
+
71
+ Aed_DynamCfg dynamCfg;
72
+
73
+ // Internal Static Config Registers, which are generated from stCfg
74
+ size_t extFftSz; // externally decided FFT-Sz
75
+ size_t extHopSz; // externally decided FFT-Hop-Sz
76
+ size_t extNBins; // (FFTSz/2) + 1
77
+ size_t extWinSz; // externally decided FFT-Window-Sz
78
+
79
+ size_t intFftSz; // internal FFT Sz
80
+ size_t intHopSz; // internal Hop Sz
81
+ size_t intWinSz; // internal Window Sz
82
+ size_t intNBins; // internal NBins
83
+ const float* intAnalyWindowPtr; // internal analysis pointer
84
+ int intAnalyFlag; // whether to do internal analysis
85
+ // 0: directly use external spectrum
86
+ // 1: use external spectrum with interpolation / exterpolation
87
+ // 2: need to redo analysis based on input time-domain signal
88
+ size_t inputTimeFIFOLen; // length of input FIFO buffer
89
+ // if = 0: no need for input time-domain FIFO Queue
90
+
91
+ // Internal static config registers for pitch-est module
92
+ size_t feaSz;
93
+ size_t melFbSz;
94
+ size_t algDelay; // in terms of processing frames
95
+ size_t algCtxtSz;
96
+ size_t frmRmsBufLen; // frameRmsBuff: buffer-length of frameRmsBuff (FIFO)
97
+
98
+ // Internal dynamic Config Registers, which are generated from dynamCfg
99
+ size_t aivadResetFrmNum;
100
+ float voiceDecideThresh;
101
+
102
+ // SubModules
103
+ AUP_MODULE_AIVAD* aivadInf;
104
+
105
+ void* pitchEstStPtr; // pitch-estimation module handler
106
+ void* timeInAnalysis;
107
+ // state handler of STFT analysis module
108
+
109
+ // Variables
110
+ int aedProcFrmCnt; // counter of consecutive AI-VAD processed frames
111
+ int inputTimeFIFOIdx;
112
+ float* inputTimeFIFO; // [inputTimeFIFOLen]
113
+ // input fifo buffer of time-signal to adjust between extHopSz and intHopSz
114
+ float* inputEmphTimeFIFO; // [inputTimeFIFOLen]
115
+ float* aivadInputCmplxSptrm; // [intFftSz]
116
+ float* aivadInputBinPow; // [intNBins] // AIVAD input power spectrum
117
+ size_t aivadResetCnt;
118
+ float timeSignalPre;
119
+ float aivadScore;
120
+ float aivadScorePre;
121
+
122
+ float pitchFreq; // input audio pitch in Hz
123
+ float* frameRmsBuff; // [frmRmsBufLen], FIFO, to delay frmRms result so that
124
+ // it aligns with AIVAD result
125
+ float* aivadInputFeatStack; // [...] = [AUP_AED_CONTEXT_WINDOW_LEN *
126
+ // AUP_AED_FEA_LEN]
127
+ float* melFilterBankCoef; // [melFbSz][nBins]
128
+ size_t* melFilterBinBuff; // [melFbSz + 2]
129
+ float* inputFloatBuff; // [hopSz]
130
+ } Aed_St;
131
+
132
+ #endif
src/biquad.cc ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #include "biquad.h"
8
+
9
+ #include <math.h>
10
+ #include <stdlib.h>
11
+ #include <string.h>
12
+ #include <stdio.h>
13
+
14
+ #include "biquad_st.h"
15
+
16
+ #define AUP_BIQUAD_NUM_DUMP_FILES (20)
17
+ #define AUP_BIQUAD_DUMP_FILENAMES (200)
18
+
19
+ // ==========================================================================================
20
+ // internal tools
21
+ // ==========================================================================================
22
+
23
+ static int AUP_Biquad_checkStatCfg(const Biquad_StaticCfg* pCfg) {
24
+ int secIdx;
25
+ if (pCfg == NULL) {
26
+ return -1;
27
+ }
28
+
29
+ if (pCfg->maxNSample == 0 ||
30
+ pCfg->maxNSample > AGORA_UAP_BIQUAD_MAX_INPUT_LEN) {
31
+ return -1;
32
+ }
33
+ if (pCfg->nsect > AGORA_UAP_BIQUAD_MAX_SECTION) {
34
+ return -1;
35
+ }
36
+
37
+ // if external filter coefficients are required, we need to check the
38
+ // external filter coeff pointers' validness
39
+ if (pCfg->nsect > 0) {
40
+ for (secIdx = 0; secIdx < pCfg->nsect; secIdx++) {
41
+ if (pCfg->B[secIdx] == NULL || pCfg->A[secIdx] == NULL) {
42
+ return -1;
43
+ }
44
+ }
45
+ if (pCfg->G == NULL) {
46
+ return -1;
47
+ }
48
+ }
49
+
50
+ return 0;
51
+ }
52
+
53
+ static int AUP_Biquad_publishStaticCfg(Biquad_St* stHdl) {
54
+ const Biquad_StaticCfg* pStatCfg;
55
+ int idx;
56
+
57
+ if (stHdl == NULL) {
58
+ return -1;
59
+ }
60
+ pStatCfg = (const Biquad_StaticCfg*)(&(stHdl->stCfg));
61
+
62
+ stHdl->maxNSample = (int)pStatCfg->maxNSample;
63
+
64
+ // first, give default (all-pass-filter) values to filter coeffs
65
+ for (idx = 0; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
66
+ stHdl->BCoeff[idx][0] = 1.0f;
67
+ stHdl->BCoeff[idx][1] = 0;
68
+ stHdl->BCoeff[idx][2] = 0;
69
+ stHdl->ACoeff[idx][0] = 1.0f;
70
+ stHdl->ACoeff[idx][1] = 0;
71
+ stHdl->ACoeff[idx][2] = 0;
72
+ stHdl->GCoeff[idx] = 1.0f;
73
+ }
74
+
75
+ if (pStatCfg->nsect <= 0) {
76
+ stHdl->nsect = _BIQUAD_DC_REMOVAL_NSECT;
77
+ for (idx = 0; idx < stHdl->nsect; idx++) {
78
+ stHdl->BCoeff[idx][0] = _BIQUAD_DC_REMOVAL_B[idx][0];
79
+ stHdl->BCoeff[idx][1] = _BIQUAD_DC_REMOVAL_B[idx][1];
80
+ stHdl->BCoeff[idx][2] = _BIQUAD_DC_REMOVAL_B[idx][2];
81
+ stHdl->ACoeff[idx][0] = _BIQUAD_DC_REMOVAL_A[idx][0];
82
+ stHdl->ACoeff[idx][1] = _BIQUAD_DC_REMOVAL_A[idx][1];
83
+ stHdl->ACoeff[idx][2] = _BIQUAD_DC_REMOVAL_A[idx][2];
84
+ stHdl->GCoeff[idx] = _BIQUAD_DC_REMOVAL_G[idx];
85
+ }
86
+ } else {
87
+ stHdl->nsect = pStatCfg->nsect;
88
+ for (idx = 0; idx < stHdl->nsect; idx++) {
89
+ stHdl->BCoeff[idx][0] = pStatCfg->B[idx][0];
90
+ stHdl->BCoeff[idx][1] = pStatCfg->B[idx][1];
91
+ stHdl->BCoeff[idx][2] = pStatCfg->B[idx][2];
92
+
93
+ stHdl->ACoeff[idx][0] = pStatCfg->A[idx][0];
94
+ stHdl->ACoeff[idx][1] = pStatCfg->A[idx][1];
95
+ stHdl->ACoeff[idx][2] = pStatCfg->A[idx][2];
96
+
97
+ stHdl->GCoeff[idx] = pStatCfg->G[idx];
98
+ }
99
+ }
100
+
101
+ return 0;
102
+ }
103
+
104
+ static int AUP_Biquad_resetVariables(Biquad_St* stHdl) {
105
+ memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
106
+ memset(stHdl->sectW, 0, sizeof(stHdl->sectW));
107
+
108
+ return 0;
109
+ }
110
+
111
+ // ==========================================================================================
112
+ // public APIS
113
+ // ==========================================================================================
114
+
115
+ int AUP_Biquad_create(void** stPtr) {
116
+ Biquad_St* tmpPtr;
117
+
118
+ if (stPtr == NULL) {
119
+ return -1;
120
+ }
121
+ *stPtr = (void*)malloc(sizeof(Biquad_St));
122
+ if (*stPtr == NULL) {
123
+ return -1;
124
+ }
125
+ memset(*stPtr, 0, sizeof(Biquad_St));
126
+
127
+ tmpPtr = (Biquad_St*)(*stPtr);
128
+
129
+ tmpPtr->dynamMemPtr = NULL;
130
+ tmpPtr->dynamMemSize = 0;
131
+
132
+ tmpPtr->stCfg.maxNSample = 768;
133
+ tmpPtr->stCfg.nsect = 0;
134
+ for (int idx = 0; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
135
+ tmpPtr->stCfg.A[idx] = NULL;
136
+ tmpPtr->stCfg.B[idx] = NULL;
137
+ }
138
+ tmpPtr->stCfg.G = NULL;
139
+
140
+ return 0;
141
+ }
142
+
143
+ int AUP_Biquad_destroy(void** stPtr) {
144
+ Biquad_St* stHdl;
145
+
146
+ if (stPtr == NULL) {
147
+ return 0;
148
+ }
149
+
150
+ stHdl = (Biquad_St*)(*stPtr);
151
+ if (stHdl == NULL) {
152
+ return 0;
153
+ }
154
+
155
+ if (stHdl->dynamMemPtr != NULL) {
156
+ free(stHdl->dynamMemPtr);
157
+ }
158
+ stHdl->dynamMemPtr = NULL;
159
+
160
+ free(stHdl);
161
+
162
+ (*stPtr) = NULL;
163
+
164
+ return 0;
165
+ }
166
+
167
+ int AUP_Biquad_memAllocate(void* stPtr, const Biquad_StaticCfg* pCfg) {
168
+ Biquad_St* stHdl = NULL;
169
+ char* memPtr = NULL;
170
+ int maxNSample, nsect, idx;
171
+
172
+ int inputTempBufMemSize = 0;
173
+ int sectOutputBufMemSize_EACH = 0;
174
+ int totalMemSize = 0;
175
+
176
+ if (stPtr == NULL || pCfg == NULL) {
177
+ return -1;
178
+ }
179
+ stHdl = (Biquad_St*)(stPtr);
180
+
181
+ if (AUP_Biquad_checkStatCfg(pCfg) < 0) {
182
+ return -1;
183
+ }
184
+ memcpy(&(stHdl->stCfg), pCfg, sizeof(Biquad_StaticCfg));
185
+
186
+ if (AUP_Biquad_publishStaticCfg(stHdl) < 0) {
187
+ return -1;
188
+ }
189
+ maxNSample = stHdl->maxNSample;
190
+ nsect = stHdl->nsect;
191
+
192
+ // check memory requirement
193
+ inputTempBufMemSize = AGORA_UAP_BIQUAD_ALIGN8(sizeof(float) * maxNSample);
194
+ totalMemSize += inputTempBufMemSize;
195
+
196
+ sectOutputBufMemSize_EACH =
197
+ AGORA_UAP_BIQUAD_ALIGN8(sizeof(float) * maxNSample);
198
+ totalMemSize += sectOutputBufMemSize_EACH * nsect;
199
+
200
+ // allocate dynamic memory
201
+ if ((size_t)totalMemSize > stHdl->dynamMemSize) {
202
+ if (stHdl->dynamMemPtr != NULL) {
203
+ free(stHdl->dynamMemPtr);
204
+ stHdl->dynamMemSize = 0;
205
+ }
206
+ stHdl->dynamMemPtr = malloc(totalMemSize);
207
+ if (stHdl->dynamMemPtr == NULL) {
208
+ return -1;
209
+ }
210
+ stHdl->dynamMemSize = totalMemSize;
211
+ }
212
+ memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
213
+
214
+ // setup the pointers/variable
215
+ memPtr = (char*)(stHdl->dynamMemPtr);
216
+
217
+ stHdl->inputTempBuf = (float*)memPtr;
218
+ memPtr += inputTempBufMemSize;
219
+
220
+ for (idx = 0; idx < nsect; idx++) {
221
+ stHdl->sectOutputBuf[idx] = (float*)memPtr;
222
+ memPtr += sectOutputBufMemSize_EACH;
223
+ }
224
+ for (; idx < AGORA_UAP_BIQUAD_MAX_SECTION; idx++) {
225
+ stHdl->sectOutputBuf[idx] = NULL;
226
+ }
227
+
228
+ if (((int)(memPtr - (char*)(stHdl->dynamMemPtr))) > totalMemSize) {
229
+ return -1;
230
+ }
231
+
232
+ return 0;
233
+ }
234
+
235
+ int AUP_Biquad_init(void* stPtr) {
236
+ Biquad_St* stHdl;
237
+
238
+ if (stPtr == NULL) {
239
+ return -1;
240
+ }
241
+ stHdl = (Biquad_St*)(stPtr);
242
+
243
+ if (AUP_Biquad_resetVariables(stHdl) < 0) {
244
+ return -1;
245
+ }
246
+
247
+ return 0;
248
+ }
249
+
250
+ int AUP_Biquad_getStaticCfg(const void* stPtr, Biquad_StaticCfg* pCfg) {
251
+ const Biquad_St* stHdl;
252
+
253
+ if (stPtr == NULL || pCfg == NULL) {
254
+ return -1;
255
+ }
256
+ stHdl = (const Biquad_St*)(stPtr);
257
+
258
+ memcpy(pCfg, &(stHdl->stCfg), sizeof(Biquad_StaticCfg));
259
+
260
+ return 0;
261
+ }
262
+
263
+ int AUP_Biquad_getAlgDelay(const void* stPtr, int* delayInSamples) {
264
+ const Biquad_St* stHdl;
265
+
266
+ if (stPtr == NULL || delayInSamples == NULL) {
267
+ return -1;
268
+ }
269
+ stHdl = (const Biquad_St*)(stPtr);
270
+
271
+ *delayInSamples = stHdl->nsect;
272
+
273
+ return 0;
274
+ }
275
+
276
+ int AUP_Biquad_proc(void* stPtr, const Biquad_InputData* pIn,
277
+ Biquad_OutputData* pOut) {
278
+ Biquad_St* stHdl = NULL;
279
+ int isFloatIO = 0;
280
+ int inputNSamples, nSect;
281
+ int sectIdx, smplIdx;
282
+ float tmp1;
283
+ const short* pShortTemp;
284
+ float* src;
285
+ float* tgt;
286
+
287
+ if (stPtr == NULL || pIn == NULL || pOut == NULL) { // pCtrl == NULL
288
+ return -1;
289
+ }
290
+ if (pIn->samplesPtr == NULL || pOut->outputBuff == NULL) {
291
+ return -1;
292
+ }
293
+
294
+ stHdl = (Biquad_St*)(stPtr);
295
+
296
+ if (((int)pIn->nsamples) > stHdl->maxNSample) {
297
+ return -1;
298
+ }
299
+
300
+ isFloatIO = 0;
301
+ if (pIn->sampleType != 0) {
302
+ isFloatIO = 1;
303
+ }
304
+
305
+ inputNSamples = (int)pIn->nsamples;
306
+ nSect = stHdl->nsect;
307
+
308
+ // special handle for input
309
+ if (isFloatIO == 0) {
310
+ pShortTemp = (const short*)pIn->samplesPtr;
311
+ for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
312
+ stHdl->inputTempBuf[smplIdx] = (float)pShortTemp[smplIdx];
313
+ }
314
+ } else {
315
+ memcpy(stHdl->inputTempBuf, (const float*)pIn->samplesPtr,
316
+ sizeof(float) * inputNSamples);
317
+ }
318
+
319
+ for (sectIdx = 0; sectIdx < nSect; sectIdx++) {
320
+ if (sectIdx == 0) {
321
+ src = stHdl->inputTempBuf;
322
+ } else {
323
+ src = stHdl->sectOutputBuf[sectIdx - 1];
324
+ }
325
+ tgt = stHdl->sectOutputBuf[sectIdx];
326
+
327
+ for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
328
+ tmp1 = src[smplIdx] -
329
+ stHdl->ACoeff[sectIdx][1] * stHdl->sectW[sectIdx][0] -
330
+ stHdl->ACoeff[sectIdx][2] * stHdl->sectW[sectIdx][1];
331
+
332
+ tgt[smplIdx] = stHdl->GCoeff[sectIdx] *
333
+ (stHdl->BCoeff[sectIdx][0] * tmp1 +
334
+ stHdl->BCoeff[sectIdx][1] * stHdl->sectW[sectIdx][0] +
335
+ stHdl->BCoeff[sectIdx][2] * stHdl->sectW[sectIdx][1]);
336
+
337
+ stHdl->sectW[sectIdx][1] = stHdl->sectW[sectIdx][0];
338
+ stHdl->sectW[sectIdx][0] = tmp1;
339
+ }
340
+ }
341
+
342
+ // prepare output buffer
343
+ if (isFloatIO == 0) {
344
+ for (smplIdx = 0; smplIdx < inputNSamples; smplIdx++) {
345
+ ((short*)pOut->outputBuff)[smplIdx] =
346
+ (short)_BIQUAD_FLOAT2SHORT(stHdl->sectOutputBuf[nSect - 1][smplIdx]);
347
+ }
348
+ } else {
349
+ memcpy(pOut->outputBuff, stHdl->sectOutputBuf[nSect - 1],
350
+ sizeof(float) * inputNSamples);
351
+ }
352
+
353
+ return 0;
354
+ }
src/biquad.h ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #ifndef __BIQUAD_H__
8
+ #define __BIQUAD_H__
9
+
10
+ #include <stdio.h>
11
+
12
+ #define AGORA_UAP_BIQUAD_MAX_SECTION (20)
13
+ // the max. number of sections supported by this Biquad module
14
+
15
+ #define AGORA_UAP_BIQUAD_MAX_INPUT_LEN (3840)
16
+ // max. number of samples each time can be fed in
17
+
18
+ #define AGORA_UAP_BIQUAD_ALIGN8(o) (((o) + 7) & (~7))
19
+ #define _BIQUAD_FLOAT2SHORT(x) \
20
+ ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : (short)floor(.5 + (x))))
21
+
22
+ #define _BIQUAD_DC_REMOVAL_NSECT (2)
23
+ const float _BIQUAD_DC_REMOVAL_B[_BIQUAD_DC_REMOVAL_NSECT][3] = {
24
+ {1.0f, -2.0f, 1.0f}, {1.0f, -1.0f, 0.0f}};
25
+ const float _BIQUAD_DC_REMOVAL_A[_BIQUAD_DC_REMOVAL_NSECT][3] = {
26
+ {1.0f, -1.93944294f, 0.94281253f}, {1.0f, -0.94276431f, 0.0f}};
27
+ // const float _BIQUAD_DC_REMOVAL_G[_BIQUAD_DC_REMOVAL_NSECT] = {0.97056387f,
28
+ // 0.97138215f};
29
+ const float _BIQUAD_DC_REMOVAL_G[_BIQUAD_DC_REMOVAL_NSECT] = {0.97056387f,
30
+ 0.8655014957f};
31
+
32
+ // Configuration Parameters, which impacts dynamic memory occupation, can only
33
+ // be set during allocation
34
+ typedef struct Biquad_StaticCfg_ {
35
+ size_t maxNSample; // max. number of samples each time can be fed in
36
+ // (0, AGORA_UAP_BIQUAD_MAX_INPUT_LEN]
37
+
38
+ int nsect; // the number of sections to be processed by this Biquad module
39
+ // (-inf, AGORA_UAP_BIQUAD_MAX_SECTION]
40
+ // if <= 0, use internal default filter coefficients
41
+
42
+ const float* B[AGORA_UAP_BIQUAD_MAX_SECTION];
43
+ const float* A[AGORA_UAP_BIQUAD_MAX_SECTION];
44
+ // always assume A[...][0] = 1.0f
45
+ const float* G;
46
+ } Biquad_StaticCfg;
47
+
48
+ typedef struct Biquad_InputData_ {
49
+ const void*
50
+ samplesPtr; // externally provided buffer containing input time samples
51
+ // either in short or float type
52
+ short sampleType; // = 0: samplesPtr = short*; o.w. samplesPtr = float*
53
+ size_t nsamples; // number of samples fed in this time
54
+ } Biquad_InputData;
55
+
56
+ typedef struct Biquad_OutputData_ {
57
+ void* outputBuff; // externally provided output buffer,
58
+ // assumed to be of enough size nsamples *
59
+ // sizeof(short)/sizeof(short) output data type is the same
60
+ // as input
61
+ } Biquad_OutputData;
62
+
63
+ #ifdef __cplusplus
64
+ extern "C" {
65
+ #endif
66
+
67
+ /****************************************************************************
68
+ * AUP_Biquad_create(...)
69
+ *
70
+ * This function creats a state handler from nothing, which is NOT ready for
71
+ * processing
72
+ *
73
+ * Input:
74
+ *
75
+ * Output:
76
+ * - stPtr : buffer to store the returned state handler
77
+ *
78
+ * Return value : 0 - Ok
79
+ * -1 - Error
80
+ */
81
+ int AUP_Biquad_create(void** stPtr);
82
+
83
+ /****************************************************************************
84
+ * AUP_Biquad_destroy(...)
85
+ *
86
+ * destroy biquad instance, and releasing all the dynamically allocated memory
87
+ *
88
+ * Input:
89
+ * - stPtr : buffer of State Handler, after this method, this
90
+ * handler won't be usable anymore
91
+ *
92
+ * Output:
93
+ *
94
+ * Return value : 0 - Ok
95
+ * -1 - Error
96
+ */
97
+ int AUP_Biquad_destroy(void** stPtr);
98
+
99
+ /****************************************************************************
100
+ * AUP_Biquad_memAllocate(...)
101
+ *
102
+ * This function sets Static Config params and does memory allocation
103
+ * operation
104
+ *
105
+ * Input:
106
+ * - stPtr : State Handler which was returned by _create
107
+ * - pCfg : static configuration parameters
108
+ *
109
+ * Output:
110
+ *
111
+ * Return value : 0 - Ok
112
+ * -1 - Error
113
+ */
114
+ int AUP_Biquad_memAllocate(void* stPtr, const Biquad_StaticCfg* pCfg);
115
+
116
+ /****************************************************************************
117
+ * AUP_Biquad_init(...)
118
+ *
119
+ * This function resets (initialize) the biquad module and gets it prepared for
120
+ * processing
121
+ *
122
+ * Input:
123
+ * - stPtr : State Handler which has gone through create and
124
+ * memAllocate
125
+ *
126
+ * Output:
127
+ *
128
+ * Return value : 0 - Ok
129
+ * -1 - Error
130
+ */
131
+ int AUP_Biquad_init(void* stPtr);
132
+
133
+ /****************************************************************************
134
+ * AUP_Biquad_getStaticCfg(...)
135
+ *
136
+ * This function get static configuration status from Biquad module
137
+ *
138
+ * Input:
139
+ * - stPtr : State Handler which has gone through create and
140
+ * memAllocate
141
+ *
142
+ * Output:
143
+ * - pCfg : configuration content
144
+ *
145
+ * Return value : 0 - Ok
146
+ * -1 - Error
147
+ */
148
+ int AUP_Biquad_getStaticCfg(const void* stPtr, Biquad_StaticCfg* pCfg);
149
+
150
+ /****************************************************************************
151
+ * AUP_Biquad_getAlgDelay(...)
152
+ *
153
+ * This function get algorithm delay from biquad module
154
+ *
155
+ * Input:
156
+ * - stPtr : State Handler which has gone through create and
157
+ * memAllocate
158
+ *
159
+ * Output:
160
+ * - delayInSamples : algorithm delay in terms of samples
161
+ *
162
+ * Return value : 0 - Ok
163
+ * -1 - Error
164
+ */
165
+ int AUP_Biquad_getAlgDelay(const void* stPtr, int* delayInSamples);
166
+
167
+ /****************************************************************************
168
+ * AUP_Biquad_proc(...)
169
+ *
170
+ * process a single frame
171
+ *
172
+ * Input:
173
+ * - stPtr : State Handler which has gone through create and
174
+ * memAllocate
175
+ * - pCtrl : per-frame variable control parameters
176
+ * - pIn : input data stream
177
+ *
178
+ * Output:
179
+ * - pOut : output data (mask, highband time-domain gain etc.)
180
+ *
181
+ * Return value : 0 - Ok
182
+ * -1 - Error
183
+ */
184
+ int AUP_Biquad_proc(void* stPtr, const Biquad_InputData* pIn,
185
+ Biquad_OutputData* pOut);
186
+
187
+ #ifdef __cplusplus
188
+ }
189
+ #endif
190
+ #endif // __BIQUAD_H__
src/biquad_st.h ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #ifndef __BIQUAD_ST_H__
8
+ #define __BIQUAD_ST_H__
9
+
10
+ #include <stdio.h>
11
+ #include "biquad.h"
12
+
13
+ typedef struct Biquad_St_ {
14
+ void* dynamMemPtr; // memory pointer holding the dynamic memory
15
+ size_t dynamMemSize; // size of the buffer *dynamMemPtr
16
+
17
+ // Static Configuration
18
+ Biquad_StaticCfg stCfg;
19
+
20
+ // ---------------------------------------------------------------
21
+ // Internal Static Config Registers, which are generated from stCfg
22
+ int maxNSample;
23
+ int nsect;
24
+ float BCoeff[AGORA_UAP_BIQUAD_MAX_SECTION][3];
25
+ float ACoeff[AGORA_UAP_BIQUAD_MAX_SECTION][3];
26
+ float GCoeff[AGORA_UAP_BIQUAD_MAX_SECTION]; // gain for each section
27
+
28
+ // Variables
29
+ float* inputTempBuf; // [maxNSample]
30
+ float sectW[AGORA_UAP_BIQUAD_MAX_SECTION][2];
31
+ // each section's register
32
+ float* sectOutputBuf
33
+ [AGORA_UAP_BIQUAD_MAX_SECTION]; //[AGORA_UAP_BIQUAD_MAX_SECTION][maxNSample]
34
+ // each section's output buffer
35
+ } Biquad_St;
36
+
37
+ #endif // __BIQUAD_ST_H__
src/coeff.h ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #ifndef __COEFF_H__
8
+ #define __COEFF_H__
9
+
10
+ #include "aed_st.h"
11
+
12
+ #define AUP_AED_MEAN_STD_NBINS AUP_AED_FEA_LEN
13
+
14
+ #define AUP_AED_ASSUMED_HOPSZ (256)
15
+ #define AUP_AED_ASSUMED_WINDOWSZ (768)
16
+ #define AUP_AED_ASSUMED_FFTSZ (1024)
17
+
18
+ // means of inpu-mel-filterbank
19
+ const float AUP_AED_FEATURE_MEANS[AUP_AED_MEAN_STD_NBINS] = {
20
+ -8.198236465454e+00f, -6.265716552734e+00f, -5.483818531036e+00f,
21
+ -4.758691310883e+00f, -4.417088985443e+00f, -4.142892837524e+00f,
22
+ -3.912850379944e+00f, -3.845927953720e+00f, -3.657090425491e+00f,
23
+ -3.723418712616e+00f, -3.876134157181e+00f, -3.843890905380e+00f,
24
+ -3.690405130386e+00f, -3.756065845490e+00f, -3.698696136475e+00f,
25
+ -3.650463104248e+00f, -3.700468778610e+00f, -3.567321300507e+00f,
26
+ -3.498900175095e+00f, -3.477807044983e+00f, -3.458816051483e+00f,
27
+ -3.444923877716e+00f, -3.401328563690e+00f, -3.306261301041e+00f,
28
+ -3.278556823730e+00f, -3.233250856400e+00f, -3.198616027832e+00f,
29
+ -3.204526424408e+00f, -3.208798646927e+00f, -3.257838010788e+00f,
30
+ -3.381376743317e+00f, -3.534021377563e+00f, -3.640867948532e+00f,
31
+ -3.726858854294e+00f, -3.773730993271e+00f, -3.804667234421e+00f,
32
+ -3.832901000977e+00f, -3.871120452881e+00f, -3.990592956543e+00f,
33
+ -4.480289459229e+00f, 9.235690307617e+01f};
34
+
35
+ // stds of input-mel-filterbank
36
+ const float AUP_AED_FEATURE_STDS[AUP_AED_MEAN_STD_NBINS] = {
37
+ 5.166063785553e+00f, 4.977209568024e+00f, 4.698895931244e+00f,
38
+ 4.630621433258e+00f, 4.634347915649e+00f, 4.641156196594e+00f,
39
+ 4.640676498413e+00f, 4.666367053986e+00f, 4.650534629822e+00f,
40
+ 4.640020847321e+00f, 4.637400150299e+00f, 4.620099067688e+00f,
41
+ 4.596316337585e+00f, 4.562654972076e+00f, 4.554360389709e+00f,
42
+ 4.566910743713e+00f, 4.562489986420e+00f, 4.562412738800e+00f,
43
+ 4.585299491882e+00f, 4.600179672241e+00f, 4.592845916748e+00f,
44
+ 4.585922718048e+00f, 4.583496570587e+00f, 4.626092910767e+00f,
45
+ 4.626957893372e+00f, 4.626289367676e+00f, 4.637005805969e+00f,
46
+ 4.683015823364e+00f, 4.726813793182e+00f, 4.734289646149e+00f,
47
+ 4.753227233887e+00f, 4.849722862244e+00f, 4.869434833527e+00f,
48
+ 4.884482860565e+00f, 4.921327114105e+00f, 4.959212303162e+00f,
49
+ 4.996619224548e+00f, 5.044823646545e+00f, 5.072216987610e+00f,
50
+ 5.096439361572e+00f, 1.152136917114e+02f};
51
+
52
+ const float AUP_AED_STFTWindow_Hann768[768] = {
53
+ 0.0000000e+00f, 1.6733041e-05f, 6.6931045e-05f, 1.5059065e-04f,
54
+ 2.6770626e-04f, 4.1827004e-04f, 6.0227190e-04f, 8.1969953e-04f,
55
+ 1.0705384e-03f, 1.3547717e-03f, 1.6723803e-03f, 2.0233432e-03f,
56
+ 2.4076367e-03f, 2.8252351e-03f, 3.2761105e-03f, 3.7602327e-03f,
57
+ 4.2775693e-03f, 4.8280857e-03f, 5.4117450e-03f, 6.0285082e-03f,
58
+ 6.6783340e-03f, 7.3611788e-03f, 8.0769970e-03f, 8.8257407e-03f,
59
+ 9.6073598e-03f, 1.0421802e-02f, 1.1269013e-02f, 1.2148935e-02f,
60
+ 1.3061510e-02f, 1.4006678e-02f, 1.4984373e-02f, 1.5994532e-02f,
61
+ 1.7037087e-02f, 1.8111967e-02f, 1.9219101e-02f, 2.0358415e-02f,
62
+ 2.1529832e-02f, 2.2733274e-02f, 2.3968661e-02f, 2.5235910e-02f,
63
+ 2.6534935e-02f, 2.7865651e-02f, 2.9227967e-02f, 3.0621794e-02f,
64
+ 3.2047037e-02f, 3.3503601e-02f, 3.4991388e-02f, 3.6510300e-02f,
65
+ 3.8060234e-02f, 3.9641086e-02f, 4.1252752e-02f, 4.2895122e-02f,
66
+ 4.4568088e-02f, 4.6271536e-02f, 4.8005353e-02f, 4.9769424e-02f,
67
+ 5.1563629e-02f, 5.3387849e-02f, 5.5241962e-02f, 5.7125844e-02f,
68
+ 5.9039368e-02f, 6.0982406e-02f, 6.2954829e-02f, 6.4956504e-02f,
69
+ 6.6987298e-02f, 6.9047074e-02f, 7.1135695e-02f, 7.3253021e-02f,
70
+ 7.5398909e-02f, 7.7573217e-02f, 7.9775799e-02f, 8.2006508e-02f,
71
+ 8.4265194e-02f, 8.6551706e-02f, 8.8865891e-02f, 9.1207593e-02f,
72
+ 9.3576658e-02f, 9.5972925e-02f, 9.8396234e-02f, 1.0084642e-01f,
73
+ 1.0332333e-01f, 1.0582679e-01f, 1.0835663e-01f, 1.1091268e-01f,
74
+ 1.1349477e-01f, 1.1610274e-01f, 1.1873640e-01f, 1.2139558e-01f,
75
+ 1.2408010e-01f, 1.2678978e-01f, 1.2952444e-01f, 1.3228389e-01f,
76
+ 1.3506796e-01f, 1.3787646e-01f, 1.4070919e-01f, 1.4356597e-01f,
77
+ 1.4644661e-01f, 1.4935091e-01f, 1.5227868e-01f, 1.5522973e-01f,
78
+ 1.5820385e-01f, 1.6120085e-01f, 1.6422052e-01f, 1.6726267e-01f,
79
+ 1.7032709e-01f, 1.7341358e-01f, 1.7652192e-01f, 1.7965192e-01f,
80
+ 1.8280336e-01f, 1.8597603e-01f, 1.8916971e-01f, 1.9238420e-01f,
81
+ 1.9561929e-01f, 1.9887474e-01f, 2.0215035e-01f, 2.0544589e-01f,
82
+ 2.0876115e-01f, 2.1209590e-01f, 2.1544993e-01f, 2.1882300e-01f,
83
+ 2.2221488e-01f, 2.2562536e-01f, 2.2905421e-01f, 2.3250119e-01f,
84
+ 2.3596607e-01f, 2.3944863e-01f, 2.4294863e-01f, 2.4646583e-01f,
85
+ 2.5000000e-01f, 2.5355090e-01f, 2.5711830e-01f, 2.6070196e-01f,
86
+ 2.6430163e-01f, 2.6791708e-01f, 2.7154806e-01f, 2.7519434e-01f,
87
+ 2.7885565e-01f, 2.8253178e-01f, 2.8622245e-01f, 2.8992744e-01f,
88
+ 2.9364649e-01f, 2.9737934e-01f, 3.0112576e-01f, 3.0488549e-01f,
89
+ 3.0865828e-01f, 3.1244388e-01f, 3.1624203e-01f, 3.2005248e-01f,
90
+ 3.2387498e-01f, 3.2770926e-01f, 3.3155507e-01f, 3.3541216e-01f,
91
+ 3.3928027e-01f, 3.4315913e-01f, 3.4704849e-01f, 3.5094809e-01f,
92
+ 3.5485766e-01f, 3.5877695e-01f, 3.6270569e-01f, 3.6664362e-01f,
93
+ 3.7059048e-01f, 3.7454600e-01f, 3.7850991e-01f, 3.8248196e-01f,
94
+ 3.8646187e-01f, 3.9044938e-01f, 3.9444422e-01f, 3.9844613e-01f,
95
+ 4.0245484e-01f, 4.0647007e-01f, 4.1049157e-01f, 4.1451906e-01f,
96
+ 4.1855226e-01f, 4.2259092e-01f, 4.2663476e-01f, 4.3068351e-01f,
97
+ 4.3473690e-01f, 4.3879466e-01f, 4.4285652e-01f, 4.4692220e-01f,
98
+ 4.5099143e-01f, 4.5506394e-01f, 4.5913946e-01f, 4.6321772e-01f,
99
+ 4.6729844e-01f, 4.7138134e-01f, 4.7546616e-01f, 4.7955263e-01f,
100
+ 4.8364046e-01f, 4.8772939e-01f, 4.9181913e-01f, 4.9590943e-01f,
101
+ 5.0000000e-01f, 5.0409057e-01f, 5.0818087e-01f, 5.1227061e-01f,
102
+ 5.1635954e-01f, 5.2044737e-01f, 5.2453384e-01f, 5.2861866e-01f,
103
+ 5.3270156e-01f, 5.3678228e-01f, 5.4086054e-01f, 5.4493606e-01f,
104
+ 5.4900857e-01f, 5.5307780e-01f, 5.5714348e-01f, 5.6120534e-01f,
105
+ 5.6526310e-01f, 5.6931649e-01f, 5.7336524e-01f, 5.7740908e-01f,
106
+ 5.8144774e-01f, 5.8548094e-01f, 5.8950843e-01f, 5.9352993e-01f,
107
+ 5.9754516e-01f, 6.0155387e-01f, 6.0555578e-01f, 6.0955062e-01f,
108
+ 6.1353813e-01f, 6.1751804e-01f, 6.2149009e-01f, 6.2545400e-01f,
109
+ 6.2940952e-01f, 6.3335638e-01f, 6.3729431e-01f, 6.4122305e-01f,
110
+ 6.4514234e-01f, 6.4905191e-01f, 6.5295151e-01f, 6.5684087e-01f,
111
+ 6.6071973e-01f, 6.6458784e-01f, 6.6844493e-01f, 6.7229074e-01f,
112
+ 6.7612502e-01f, 6.7994752e-01f, 6.8375797e-01f, 6.8755612e-01f,
113
+ 6.9134172e-01f, 6.9511451e-01f, 6.9887424e-01f, 7.0262066e-01f,
114
+ 7.0635351e-01f, 7.1007256e-01f, 7.1377755e-01f, 7.1746822e-01f,
115
+ 7.2114435e-01f, 7.2480566e-01f, 7.2845194e-01f, 7.3208292e-01f,
116
+ 7.3569837e-01f, 7.3929804e-01f, 7.4288170e-01f, 7.4644910e-01f,
117
+ 7.5000000e-01f, 7.5353417e-01f, 7.5705137e-01f, 7.6055137e-01f,
118
+ 7.6403393e-01f, 7.6749881e-01f, 7.7094579e-01f, 7.7437464e-01f,
119
+ 7.7778512e-01f, 7.8117700e-01f, 7.8455007e-01f, 7.8790410e-01f,
120
+ 7.9123885e-01f, 7.9455411e-01f, 7.9784965e-01f, 8.0112526e-01f,
121
+ 8.0438071e-01f, 8.0761580e-01f, 8.1083029e-01f, 8.1402397e-01f,
122
+ 8.1719664e-01f, 8.2034808e-01f, 8.2347808e-01f, 8.2658642e-01f,
123
+ 8.2967291e-01f, 8.3273733e-01f, 8.3577948e-01f, 8.3879915e-01f,
124
+ 8.4179615e-01f, 8.4477027e-01f, 8.4772132e-01f, 8.5064909e-01f,
125
+ 8.5355339e-01f, 8.5643403e-01f, 8.5929081e-01f, 8.6212354e-01f,
126
+ 8.6493204e-01f, 8.6771611e-01f, 8.7047556e-01f, 8.7321022e-01f,
127
+ 8.7591990e-01f, 8.7860442e-01f, 8.8126360e-01f, 8.8389726e-01f,
128
+ 8.8650523e-01f, 8.8908732e-01f, 8.9164337e-01f, 8.9417321e-01f,
129
+ 8.9667667e-01f, 8.9915358e-01f, 9.0160377e-01f, 9.0402708e-01f,
130
+ 9.0642334e-01f, 9.0879241e-01f, 9.1113411e-01f, 9.1344829e-01f,
131
+ 9.1573481e-01f, 9.1799349e-01f, 9.2022420e-01f, 9.2242678e-01f,
132
+ 9.2460109e-01f, 9.2674698e-01f, 9.2886431e-01f, 9.3095293e-01f,
133
+ 9.3301270e-01f, 9.3504350e-01f, 9.3704517e-01f, 9.3901759e-01f,
134
+ 9.4096063e-01f, 9.4287416e-01f, 9.4475804e-01f, 9.4661215e-01f,
135
+ 9.4843637e-01f, 9.5023058e-01f, 9.5199465e-01f, 9.5372846e-01f,
136
+ 9.5543191e-01f, 9.5710488e-01f, 9.5874725e-01f, 9.6035891e-01f,
137
+ 9.6193977e-01f, 9.6348970e-01f, 9.6500861e-01f, 9.6649640e-01f,
138
+ 9.6795296e-01f, 9.6937821e-01f, 9.7077203e-01f, 9.7213435e-01f,
139
+ 9.7346506e-01f, 9.7476409e-01f, 9.7603134e-01f, 9.7726673e-01f,
140
+ 9.7847017e-01f, 9.7964159e-01f, 9.8078090e-01f, 9.8188803e-01f,
141
+ 9.8296291e-01f, 9.8400547e-01f, 9.8501563e-01f, 9.8599332e-01f,
142
+ 9.8693849e-01f, 9.8785107e-01f, 9.8873099e-01f, 9.8957820e-01f,
143
+ 9.9039264e-01f, 9.9117426e-01f, 9.9192300e-01f, 9.9263882e-01f,
144
+ 9.9332167e-01f, 9.9397149e-01f, 9.9458825e-01f, 9.9517191e-01f,
145
+ 9.9572243e-01f, 9.9623977e-01f, 9.9672389e-01f, 9.9717476e-01f,
146
+ 9.9759236e-01f, 9.9797666e-01f, 9.9832762e-01f, 9.9864523e-01f,
147
+ 9.9892946e-01f, 9.9918030e-01f, 9.9939773e-01f, 9.9958173e-01f,
148
+ 9.9973229e-01f, 9.9984941e-01f, 9.9993307e-01f, 9.9998327e-01f,
149
+ 1.0000000e+00f, 9.9998327e-01f, 9.9993307e-01f, 9.9984941e-01f,
150
+ 9.9973229e-01f, 9.9958173e-01f, 9.9939773e-01f, 9.9918030e-01f,
151
+ 9.9892946e-01f, 9.9864523e-01f, 9.9832762e-01f, 9.9797666e-01f,
152
+ 9.9759236e-01f, 9.9717476e-01f, 9.9672389e-01f, 9.9623977e-01f,
153
+ 9.9572243e-01f, 9.9517191e-01f, 9.9458825e-01f, 9.9397149e-01f,
154
+ 9.9332167e-01f, 9.9263882e-01f, 9.9192300e-01f, 9.9117426e-01f,
155
+ 9.9039264e-01f, 9.8957820e-01f, 9.8873099e-01f, 9.8785107e-01f,
156
+ 9.8693849e-01f, 9.8599332e-01f, 9.8501563e-01f, 9.8400547e-01f,
157
+ 9.8296291e-01f, 9.8188803e-01f, 9.8078090e-01f, 9.7964159e-01f,
158
+ 9.7847017e-01f, 9.7726673e-01f, 9.7603134e-01f, 9.7476409e-01f,
159
+ 9.7346506e-01f, 9.7213435e-01f, 9.7077203e-01f, 9.6937821e-01f,
160
+ 9.6795296e-01f, 9.6649640e-01f, 9.6500861e-01f, 9.6348970e-01f,
161
+ 9.6193977e-01f, 9.6035891e-01f, 9.5874725e-01f, 9.5710488e-01f,
162
+ 9.5543191e-01f, 9.5372846e-01f, 9.5199465e-01f, 9.5023058e-01f,
163
+ 9.4843637e-01f, 9.4661215e-01f, 9.4475804e-01f, 9.4287416e-01f,
164
+ 9.4096063e-01f, 9.3901759e-01f, 9.3704517e-01f, 9.3504350e-01f,
165
+ 9.3301270e-01f, 9.3095293e-01f, 9.2886431e-01f, 9.2674698e-01f,
166
+ 9.2460109e-01f, 9.2242678e-01f, 9.2022420e-01f, 9.1799349e-01f,
167
+ 9.1573481e-01f, 9.1344829e-01f, 9.1113411e-01f, 9.0879241e-01f,
168
+ 9.0642334e-01f, 9.0402708e-01f, 9.0160377e-01f, 8.9915358e-01f,
169
+ 8.9667667e-01f, 8.9417321e-01f, 8.9164337e-01f, 8.8908732e-01f,
170
+ 8.8650523e-01f, 8.8389726e-01f, 8.8126360e-01f, 8.7860442e-01f,
171
+ 8.7591990e-01f, 8.7321022e-01f, 8.7047556e-01f, 8.6771611e-01f,
172
+ 8.6493204e-01f, 8.6212354e-01f, 8.5929081e-01f, 8.5643403e-01f,
173
+ 8.5355339e-01f, 8.5064909e-01f, 8.4772132e-01f, 8.4477027e-01f,
174
+ 8.4179615e-01f, 8.3879915e-01f, 8.3577948e-01f, 8.3273733e-01f,
175
+ 8.2967291e-01f, 8.2658642e-01f, 8.2347808e-01f, 8.2034808e-01f,
176
+ 8.1719664e-01f, 8.1402397e-01f, 8.1083029e-01f, 8.0761580e-01f,
177
+ 8.0438071e-01f, 8.0112526e-01f, 7.9784965e-01f, 7.9455411e-01f,
178
+ 7.9123885e-01f, 7.8790410e-01f, 7.8455007e-01f, 7.8117700e-01f,
179
+ 7.7778512e-01f, 7.7437464e-01f, 7.7094579e-01f, 7.6749881e-01f,
180
+ 7.6403393e-01f, 7.6055137e-01f, 7.5705137e-01f, 7.5353417e-01f,
181
+ 7.5000000e-01f, 7.4644910e-01f, 7.4288170e-01f, 7.3929804e-01f,
182
+ 7.3569837e-01f, 7.3208292e-01f, 7.2845194e-01f, 7.2480566e-01f,
183
+ 7.2114435e-01f, 7.1746822e-01f, 7.1377755e-01f, 7.1007256e-01f,
184
+ 7.0635351e-01f, 7.0262066e-01f, 6.9887424e-01f, 6.9511451e-01f,
185
+ 6.9134172e-01f, 6.8755612e-01f, 6.8375797e-01f, 6.7994752e-01f,
186
+ 6.7612502e-01f, 6.7229074e-01f, 6.6844493e-01f, 6.6458784e-01f,
187
+ 6.6071973e-01f, 6.5684087e-01f, 6.5295151e-01f, 6.4905191e-01f,
188
+ 6.4514234e-01f, 6.4122305e-01f, 6.3729431e-01f, 6.3335638e-01f,
189
+ 6.2940952e-01f, 6.2545400e-01f, 6.2149009e-01f, 6.1751804e-01f,
190
+ 6.1353813e-01f, 6.0955062e-01f, 6.0555578e-01f, 6.0155387e-01f,
191
+ 5.9754516e-01f, 5.9352993e-01f, 5.8950843e-01f, 5.8548094e-01f,
192
+ 5.8144774e-01f, 5.7740908e-01f, 5.7336524e-01f, 5.6931649e-01f,
193
+ 5.6526310e-01f, 5.6120534e-01f, 5.5714348e-01f, 5.5307780e-01f,
194
+ 5.4900857e-01f, 5.4493606e-01f, 5.4086054e-01f, 5.3678228e-01f,
195
+ 5.3270156e-01f, 5.2861866e-01f, 5.2453384e-01f, 5.2044737e-01f,
196
+ 5.1635954e-01f, 5.1227061e-01f, 5.0818087e-01f, 5.0409057e-01f,
197
+ 5.0000000e-01f, 4.9590943e-01f, 4.9181913e-01f, 4.8772939e-01f,
198
+ 4.8364046e-01f, 4.7955263e-01f, 4.7546616e-01f, 4.7138134e-01f,
199
+ 4.6729844e-01f, 4.6321772e-01f, 4.5913946e-01f, 4.5506394e-01f,
200
+ 4.5099143e-01f, 4.4692220e-01f, 4.4285652e-01f, 4.3879466e-01f,
201
+ 4.3473690e-01f, 4.3068351e-01f, 4.2663476e-01f, 4.2259092e-01f,
202
+ 4.1855226e-01f, 4.1451906e-01f, 4.1049157e-01f, 4.0647007e-01f,
203
+ 4.0245484e-01f, 3.9844613e-01f, 3.9444422e-01f, 3.9044938e-01f,
204
+ 3.8646187e-01f, 3.8248196e-01f, 3.7850991e-01f, 3.7454600e-01f,
205
+ 3.7059048e-01f, 3.6664362e-01f, 3.6270569e-01f, 3.5877695e-01f,
206
+ 3.5485766e-01f, 3.5094809e-01f, 3.4704849e-01f, 3.4315913e-01f,
207
+ 3.3928027e-01f, 3.3541216e-01f, 3.3155507e-01f, 3.2770926e-01f,
208
+ 3.2387498e-01f, 3.2005248e-01f, 3.1624203e-01f, 3.1244388e-01f,
209
+ 3.0865828e-01f, 3.0488549e-01f, 3.0112576e-01f, 2.9737934e-01f,
210
+ 2.9364649e-01f, 2.8992744e-01f, 2.8622245e-01f, 2.8253178e-01f,
211
+ 2.7885565e-01f, 2.7519434e-01f, 2.7154806e-01f, 2.6791708e-01f,
212
+ 2.6430163e-01f, 2.6070196e-01f, 2.5711830e-01f, 2.5355090e-01f,
213
+ 2.5000000e-01f, 2.4646583e-01f, 2.4294863e-01f, 2.3944863e-01f,
214
+ 2.3596607e-01f, 2.3250119e-01f, 2.2905421e-01f, 2.2562536e-01f,
215
+ 2.2221488e-01f, 2.1882300e-01f, 2.1544993e-01f, 2.1209590e-01f,
216
+ 2.0876115e-01f, 2.0544589e-01f, 2.0215035e-01f, 1.9887474e-01f,
217
+ 1.9561929e-01f, 1.9238420e-01f, 1.8916971e-01f, 1.8597603e-01f,
218
+ 1.8280336e-01f, 1.7965192e-01f, 1.7652192e-01f, 1.7341358e-01f,
219
+ 1.7032709e-01f, 1.6726267e-01f, 1.6422052e-01f, 1.6120085e-01f,
220
+ 1.5820385e-01f, 1.5522973e-01f, 1.5227868e-01f, 1.4935091e-01f,
221
+ 1.4644661e-01f, 1.4356597e-01f, 1.4070919e-01f, 1.3787646e-01f,
222
+ 1.3506796e-01f, 1.3228389e-01f, 1.2952444e-01f, 1.2678978e-01f,
223
+ 1.2408010e-01f, 1.2139558e-01f, 1.1873640e-01f, 1.1610274e-01f,
224
+ 1.1349477e-01f, 1.1091268e-01f, 1.0835663e-01f, 1.0582679e-01f,
225
+ 1.0332333e-01f, 1.0084642e-01f, 9.8396234e-02f, 9.5972925e-02f,
226
+ 9.3576658e-02f, 9.1207593e-02f, 8.8865891e-02f, 8.6551706e-02f,
227
+ 8.4265194e-02f, 8.2006508e-02f, 7.9775799e-02f, 7.7573217e-02f,
228
+ 7.5398909e-02f, 7.3253021e-02f, 7.1135695e-02f, 6.9047074e-02f,
229
+ 6.6987298e-02f, 6.4956504e-02f, 6.2954829e-02f, 6.0982406e-02f,
230
+ 5.9039368e-02f, 5.7125844e-02f, 5.5241962e-02f, 5.3387849e-02f,
231
+ 5.1563629e-02f, 4.9769424e-02f, 4.8005353e-02f, 4.6271536e-02f,
232
+ 4.4568088e-02f, 4.2895122e-02f, 4.1252752e-02f, 3.9641086e-02f,
233
+ 3.8060234e-02f, 3.6510300e-02f, 3.4991388e-02f, 3.3503601e-02f,
234
+ 3.2047037e-02f, 3.0621794e-02f, 2.9227967e-02f, 2.7865651e-02f,
235
+ 2.6534935e-02f, 2.5235910e-02f, 2.3968661e-02f, 2.2733274e-02f,
236
+ 2.1529832e-02f, 2.0358415e-02f, 1.9219101e-02f, 1.8111967e-02f,
237
+ 1.7037087e-02f, 1.5994532e-02f, 1.4984373e-02f, 1.4006678e-02f,
238
+ 1.3061510e-02f, 1.2148935e-02f, 1.1269013e-02f, 1.0421802e-02f,
239
+ 9.6073598e-03f, 8.8257407e-03f, 8.0769970e-03f, 7.3611788e-03f,
240
+ 6.6783340e-03f, 6.0285082e-03f, 5.4117450e-03f, 4.8280857e-03f,
241
+ 4.2775693e-03f, 3.7602327e-03f, 3.2761105e-03f, 2.8252351e-03f,
242
+ 2.4076367e-03f, 2.0233432e-03f, 1.6723803e-03f, 1.3547717e-03f,
243
+ 1.0705384e-03f, 8.1969953e-04f, 6.0227190e-04f, 4.1827004e-04f,
244
+ 2.6770626e-04f, 1.5059065e-04f, 6.6931045e-05f, 1.6733041e-05f};
245
+
246
+ #endif
src/fftw.c ADDED
The diff for this file is too large to render. See raw diff
 
src/fftw.h ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #ifndef __FFTW_H__
8
+ #define __FFTW_H__
9
+
10
+ #include <stdio.h>
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif /* __cplusplus */
15
+ // Spectrum Storage Format definition:
16
+ // format1: [Real-0, Real-Nyq, Real-1, Imag-1, Real-2, Imag-2, ...]
17
+ // format2: [Real-0, Real-1, (-1)*Imag-1, Real-2, (-1)*Imag-2, ..., Real-Nyq]
18
+
19
+ // the following functions assume input and output spectrum to be stored in
20
+ // format2
21
+ void AUP_FFTW_r2c_256(float* in, float* out);
22
+ void AUP_FFTW_c2r_256(float* in, float* out);
23
+
24
+ void AUP_FFTW_c2r_512(float* in, float* out);
25
+ void AUP_FFTW_r2c_512(float* in, float* out);
26
+
27
+ void AUP_FFTW_r2c_1024(float* in, float* out);
28
+ void AUP_FFTW_c2r_1024(float* in, float* out);
29
+
30
+ void AUP_FFTW_r2c_2048(float* in, float* out);
31
+ void AUP_FFTW_c2r_2048(float* in, float* out);
32
+
33
+ void AUP_FFTW_r2c_4096(float* in, float* out);
34
+ void AUP_FFTW_c2r_4096(float* in, float* out);
35
+
36
+ // if direction == 0: format1->format2
37
+ // if direction == 1: format2->format1
38
+ void AUP_FFTW_InplaceTransf(int direction, int fftSz, float* inplaceTranfBuf);
39
+
40
+ void AUP_FFTW_RescaleFFTOut(int fftSz, float* inplaceBuf);
41
+ void AUP_FFTW_RescaleIFFTOut(int fftSz, float* inplaceBuf);
42
+
43
+ #ifdef __cplusplus
44
+ }
45
+ #endif /* __cplusplus */
46
+
47
+ #endif // __FFTW_H__
src/fscvrt.cc ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #include <math.h>
8
+ #include <stdlib.h>
9
+ #include <string.h>
10
+ #include <stdio.h>
11
+
12
+ #include "fscvrt.h"
13
+ #include "fscvrt_st.h"
14
+ #include "biquad.h"
15
+
16
+ // ==========================================================================================
17
+ // internal tools
18
+ // ==========================================================================================
19
+
20
+ static int AUP_Fscvrt_FilterSet(int resampleRate, int* nsect,
21
+ const float* B[_FSCVRT_MAXNSEC],
22
+ const float* A[_FSCVRT_MAXNSEC],
23
+ const float** G) {
24
+ int idx;
25
+ if (resampleRate == 2) {
26
+ *nsect = _FSCVRT_1over2_LOWPASS_NSEC;
27
+ for (idx = 0; idx < (*nsect); idx++) {
28
+ B[idx] = &(_FSCVRT_1over2_LOWPASS_B[idx][0]);
29
+ A[idx] = &(_FSCVRT_1over2_LOWPASS_A[idx][0]);
30
+ }
31
+ *G = _FSCVRT_1over2_LOWPASS_G;
32
+ } else if (resampleRate == 3) {
33
+ *nsect = _FSCVRT_1over3_LOWPASS_NSEC;
34
+ for (idx = 0; idx < (*nsect); idx++) {
35
+ B[idx] = &(_FSCVRT_1over3_LOWPASS_B[idx][0]);
36
+ A[idx] = &(_FSCVRT_1over3_LOWPASS_A[idx][0]);
37
+ }
38
+ *G = _FSCVRT_1over3_LOWPASS_G;
39
+ } else if (resampleRate == 4) {
40
+ *nsect = _FSCVRT_1over4_LOWPASS_NSEC;
41
+ for (idx = 0; idx < (*nsect); idx++) {
42
+ B[idx] = &(_FSCVRT_1over4_LOWPASS_B[idx][0]);
43
+ A[idx] = &(_FSCVRT_1over4_LOWPASS_A[idx][0]);
44
+ }
45
+ *G = _FSCVRT_1over4_LOWPASS_G;
46
+ } else if (resampleRate == 6) {
47
+ *nsect = _FSCVRT_1over6_LOWPASS_NSEC;
48
+ for (idx = 0; idx < (*nsect); idx++) {
49
+ B[idx] = &(_FSCVRT_1over6_LOWPASS_B[idx][0]);
50
+ A[idx] = &(_FSCVRT_1over6_LOWPASS_A[idx][0]);
51
+ }
52
+ *G = _FSCVRT_1over6_LOWPASS_G;
53
+ } else { // unknown resample rate
54
+ return -1;
55
+ }
56
+
57
+ return 0;
58
+ }
59
+
60
+ static int AUP_Fscvrt_dynamMemPrepare(FscvrtSt* stHdl, void* memPtrExt,
61
+ size_t memSize) {
62
+ char* memPtr = NULL;
63
+ int biquadInBufMemSize = 0;
64
+ int biquadOutBufMemSize = 0;
65
+ int totalMemSize = 0;
66
+
67
+ if (stHdl == NULL) {
68
+ return -1;
69
+ }
70
+
71
+ biquadInBufMemSize = _FSCVRT_ALIGN8(sizeof(float) * stHdl->biquadInBufLen);
72
+ totalMemSize += biquadInBufMemSize;
73
+
74
+ biquadOutBufMemSize = _FSCVRT_ALIGN8(sizeof(float) * stHdl->biquadOutBufLen);
75
+ totalMemSize += biquadOutBufMemSize;
76
+
77
+ totalMemSize = _FSCVRT_MAX(totalMemSize, 80);
78
+
79
+ // if no external memory provided, we are only profiling the memory
80
+ // requirement
81
+ if (memPtrExt == NULL) {
82
+ return (totalMemSize);
83
+ }
84
+
85
+ // if required memory is more than provided, error
86
+ if ((size_t)totalMemSize > memSize) {
87
+ return -1;
88
+ }
89
+ memPtr = (char*)memPtrExt;
90
+
91
+ stHdl->biquadInBuf = NULL;
92
+ if (biquadInBufMemSize != 0) {
93
+ stHdl->biquadInBuf = (float*)memPtr;
94
+ memPtr += biquadInBufMemSize;
95
+ }
96
+
97
+ stHdl->biquadOutBuf = NULL;
98
+ if (biquadOutBufMemSize != 0) {
99
+ stHdl->biquadOutBuf = (float*)memPtr;
100
+ memPtr += biquadOutBufMemSize;
101
+ }
102
+
103
+ if (((int)(memPtr - (char*)memPtrExt)) > totalMemSize) {
104
+ return -1;
105
+ }
106
+
107
+ return (totalMemSize);
108
+ }
109
+
110
+ static int AUP_Fscvrt_checkStatCfg(FscvrtStaticCfg* pCfg) {
111
+ if (pCfg == NULL) {
112
+ return -1;
113
+ }
114
+
115
+ if (pCfg->inputFs != 16000 && pCfg->inputFs != 24000 &&
116
+ pCfg->inputFs != 32000 && pCfg->inputFs != 48000) {
117
+ return -1;
118
+ }
119
+
120
+ if (pCfg->outputFs != 16000 && pCfg->outputFs != 24000 &&
121
+ pCfg->outputFs != 32000 && pCfg->outputFs != 48000) {
122
+ return -1;
123
+ }
124
+
125
+ if (pCfg->stepSz > AUP_FSCVRT_MAX_INPUT_LEN || pCfg->stepSz < 1) {
126
+ return -1;
127
+ }
128
+
129
+ if (pCfg->inputType != 0) {
130
+ pCfg->inputType = 1;
131
+ }
132
+
133
+ if (pCfg->outputType != 0) {
134
+ pCfg->outputType = 1;
135
+ }
136
+
137
+ return 0;
138
+ }
139
+
140
+ static int AUP_Fscvrt_publishStaticCfg(FscvrtSt* stHdl) {
141
+ int tmpRatio;
142
+ int ret;
143
+ int maxResmplRate = 0;
144
+
145
+ stHdl->mode = 0;
146
+ stHdl->upSmplRate = 1;
147
+ stHdl->downSmplRate = 1;
148
+ if (stHdl->stCfg.inputFs != stHdl->stCfg.outputFs) {
149
+ if (stHdl->stCfg.outputFs > stHdl->stCfg.inputFs) {
150
+ tmpRatio = (stHdl->stCfg.outputFs / stHdl->stCfg.inputFs);
151
+ if (stHdl->stCfg.outputFs == tmpRatio * stHdl->stCfg.inputFs) {
152
+ stHdl->mode = 1;
153
+ stHdl->upSmplRate = tmpRatio;
154
+ stHdl->downSmplRate = 1;
155
+ } else {
156
+ stHdl->mode = 3;
157
+ stHdl->upSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.inputFs;
158
+ stHdl->downSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.outputFs;
159
+ }
160
+ } else { // stHdl->stCfg.outputFs < stHdl->stCfg.inputFs
161
+ tmpRatio = (stHdl->stCfg.inputFs / stHdl->stCfg.outputFs);
162
+ if (stHdl->stCfg.inputFs == tmpRatio * stHdl->stCfg.outputFs) {
163
+ stHdl->mode = 2;
164
+ stHdl->upSmplRate = 1;
165
+ stHdl->downSmplRate = tmpRatio;
166
+ } else {
167
+ stHdl->mode = 3;
168
+ stHdl->upSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.inputFs;
169
+ stHdl->downSmplRate = _FSCVRT_COMMON_FS / stHdl->stCfg.outputFs;
170
+ }
171
+ }
172
+ }
173
+
174
+ if (stHdl->mode == 0) {
175
+ stHdl->biquadInBufLen = 0;
176
+ stHdl->biquadOutBufLen = 0;
177
+ } else {
178
+ stHdl->biquadInBufLen = stHdl->stCfg.stepSz * stHdl->upSmplRate;
179
+ stHdl->biquadOutBufLen = 2 * (stHdl->stCfg.stepSz * stHdl->upSmplRate);
180
+ }
181
+
182
+ maxResmplRate = _FSCVRT_MAX(stHdl->upSmplRate, stHdl->downSmplRate);
183
+
184
+ stHdl->nSec = 0;
185
+ memset(stHdl->biquadB, 0, sizeof(stHdl->biquadB));
186
+ memset(stHdl->biquadA, 0, sizeof(stHdl->biquadA));
187
+ stHdl->biquadG = NULL; // gain for each section
188
+
189
+ if (stHdl->mode != 0) {
190
+ ret = AUP_Fscvrt_FilterSet(maxResmplRate, &(stHdl->nSec), stHdl->biquadB,
191
+ stHdl->biquadA, &(stHdl->biquadG));
192
+ if (ret < 0) {
193
+ return -1;
194
+ }
195
+ }
196
+
197
+ return 0;
198
+ }
199
+
200
+ static int AUP_Fscvrt_resetVariables(FscvrtSt* stHdl) {
201
+ stHdl->biquadInBufCnt = 0;
202
+ stHdl->biquadOutBufCnt = 0;
203
+
204
+ if (stHdl->dynamMemPtr != NULL && stHdl->dynamMemSize > 0) {
205
+ memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
206
+ }
207
+ return 0;
208
+ }
209
+
210
+ // ==========================================================================================
211
+ // public APIs
212
+ // ==========================================================================================
213
+
214
+ int AUP_Fscvrt_create(void** stPtr) {
215
+ FscvrtSt* tmpPtr;
216
+
217
+ if (stPtr == NULL) {
218
+ return -1;
219
+ }
220
+ *stPtr = (void*)malloc(sizeof(FscvrtSt));
221
+ if (*stPtr == NULL) {
222
+ return -1;
223
+ }
224
+ memset(*stPtr, 0, sizeof(FscvrtSt));
225
+
226
+ tmpPtr = (FscvrtSt*)(*stPtr);
227
+
228
+ tmpPtr->dynamMemPtr = NULL;
229
+ tmpPtr->dynamMemSize = 0;
230
+
231
+ tmpPtr->stCfg.inputFs = 24000;
232
+ tmpPtr->stCfg.outputFs = 32000;
233
+ tmpPtr->stCfg.stepSz = 240; // 10ms processing step
234
+ tmpPtr->stCfg.inputType = 0; // short in
235
+ tmpPtr->stCfg.outputType = 0; // short out
236
+
237
+ if (AUP_Biquad_create(&(tmpPtr->biquadSt)) < 0) {
238
+ return -1;
239
+ }
240
+
241
+ return 0;
242
+ }
243
+
244
+ int AUP_Fscvrt_destroy(void** stPtr) {
245
+ FscvrtSt* stHdl;
246
+
247
+ if (stPtr == NULL) {
248
+ return 0;
249
+ }
250
+
251
+ stHdl = (FscvrtSt*)(*stPtr);
252
+ if (stHdl == NULL) {
253
+ return 0;
254
+ }
255
+
256
+ AUP_Biquad_destroy(&(stHdl->biquadSt));
257
+ if (stHdl->dynamMemPtr != NULL) {
258
+ free(stHdl->dynamMemPtr);
259
+ }
260
+ stHdl->dynamMemPtr = NULL;
261
+
262
+ free(stHdl);
263
+ (*stPtr) = NULL;
264
+
265
+ return 0;
266
+ }
267
+
268
+ int AUP_Fscvrt_memAllocate(void* stPtr, const FscvrtStaticCfg* pCfg) {
269
+ FscvrtSt* stHdl = NULL;
270
+ FscvrtStaticCfg tmpStatCfg = {0};
271
+ Biquad_StaticCfg bqStatCfg;
272
+ int idx, ret;
273
+ int totalMemSize = 0;
274
+
275
+ if (stPtr == NULL || pCfg == NULL) {
276
+ return -1;
277
+ }
278
+ stHdl = (FscvrtSt*)(stPtr);
279
+
280
+ memcpy(&tmpStatCfg, pCfg, sizeof(FscvrtStaticCfg));
281
+ if (AUP_Fscvrt_checkStatCfg(&tmpStatCfg) < 0) {
282
+ return -1;
283
+ }
284
+ memcpy(&(stHdl->stCfg), &tmpStatCfg, sizeof(FscvrtStaticCfg));
285
+
286
+ if (AUP_Fscvrt_publishStaticCfg(stHdl) < 0) {
287
+ return -1;
288
+ }
289
+
290
+ // check memory requirement
291
+ totalMemSize = AUP_Fscvrt_dynamMemPrepare(stHdl, NULL, 0);
292
+ if (totalMemSize < 0) {
293
+ return -1;
294
+ }
295
+
296
+ // allocate dynamic memory
297
+ if ((size_t)totalMemSize > stHdl->dynamMemSize) {
298
+ if (stHdl->dynamMemPtr != NULL) {
299
+ free(stHdl->dynamMemPtr);
300
+ stHdl->dynamMemSize = 0;
301
+ }
302
+ stHdl->dynamMemPtr = (void*)malloc(totalMemSize);
303
+ if (stHdl->dynamMemPtr == NULL) {
304
+ return -1;
305
+ }
306
+ stHdl->dynamMemSize = totalMemSize;
307
+ }
308
+ memset(stHdl->dynamMemPtr, 0, stHdl->dynamMemSize);
309
+
310
+ // setup the pointers/variable
311
+ if (AUP_Fscvrt_dynamMemPrepare(stHdl, stHdl->dynamMemPtr,
312
+ stHdl->dynamMemSize) < 0) {
313
+ return -1;
314
+ }
315
+
316
+ // memAllocation for upSmplBiquadSt and downSmplBiquadSt
317
+ if (stHdl->nSec != 0) {
318
+ if (stHdl->nSec > AGORA_UAP_BIQUAD_MAX_SECTION) {
319
+ return -1;
320
+ }
321
+ memset(&bqStatCfg, 0, sizeof(Biquad_StaticCfg));
322
+ bqStatCfg.maxNSample = (size_t)(stHdl->biquadInBufLen);
323
+ bqStatCfg.nsect = stHdl->nSec;
324
+ for (idx = 0; idx < stHdl->nSec; idx++) {
325
+ bqStatCfg.B[idx] = stHdl->biquadB[idx];
326
+ bqStatCfg.A[idx] = stHdl->biquadA[idx];
327
+ }
328
+ bqStatCfg.G = stHdl->biquadG;
329
+
330
+ ret = AUP_Biquad_memAllocate(stHdl->biquadSt, &bqStatCfg);
331
+ if (ret < 0) {
332
+ return -1;
333
+ }
334
+ }
335
+
336
+ return 0;
337
+ }
338
+
339
+ int AUP_Fscvrt_init(void* stPtr) {
340
+ FscvrtSt* stHdl;
341
+
342
+ if (stPtr == NULL) {
343
+ return -1;
344
+ }
345
+ stHdl = (FscvrtSt*)(stPtr);
346
+
347
+ // clear/reset run-time variables
348
+ if (AUP_Fscvrt_resetVariables(stHdl) < 0) {
349
+ return -1;
350
+ }
351
+
352
+ // init submodules ...
353
+ if (stHdl->biquadSt != NULL && stHdl->nSec != 0) {
354
+ if (AUP_Biquad_init(stHdl->biquadSt) < 0) {
355
+ return -1;
356
+ }
357
+ }
358
+
359
+ return 0;
360
+ }
361
+
362
+ int AUP_Fscvrt_getStaticCfg(const void* stPtr, FscvrtStaticCfg* pCfg) {
363
+ const FscvrtSt* stHdl;
364
+
365
+ if (stPtr == NULL || pCfg == NULL) {
366
+ return -1;
367
+ }
368
+ stHdl = (const FscvrtSt*)(stPtr);
369
+
370
+ memcpy(pCfg, &(stHdl->stCfg), sizeof(FscvrtStaticCfg));
371
+
372
+ return 0;
373
+ }
374
+
375
+ int AUP_Fscvrt_getInfor(const void* stPtr, FscvrtGetData* buff) {
376
+ const FscvrtSt* stHdl;
377
+ int delayBiquad = 0;
378
+ int tmp;
379
+
380
+ if (stPtr == NULL || buff == NULL) {
381
+ return -1;
382
+ }
383
+ stHdl = (const FscvrtSt*)(stPtr);
384
+
385
+ if (stHdl->nSec != 0) {
386
+ if (AUP_Biquad_getAlgDelay(stHdl->biquadSt, &delayBiquad) < 0) {
387
+ return -1;
388
+ }
389
+ }
390
+
391
+ if (stHdl->mode == 0) {
392
+ buff->delayInInputFs = 0;
393
+ } else if (stHdl->mode == 1) {
394
+ buff->delayInInputFs =
395
+ (int)roundf(delayBiquad / (float)(stHdl->upSmplRate));
396
+ } else if (stHdl->mode == 2) { // direct downsampling
397
+ buff->delayInInputFs = delayBiquad;
398
+ } else { // stHdl->mode == 3
399
+ buff->delayInInputFs =
400
+ (int)roundf(delayBiquad / (float)(stHdl->upSmplRate));
401
+ }
402
+ tmp = stHdl->stCfg.stepSz * stHdl->upSmplRate / stHdl->downSmplRate;
403
+ if (tmp * stHdl->downSmplRate == stHdl->stCfg.stepSz * stHdl->upSmplRate) {
404
+ buff->maxOutputStepSz = tmp;
405
+ } else {
406
+ buff->maxOutputStepSz = tmp + 1;
407
+ }
408
+
409
+ return 0;
410
+ }
411
+
412
+ int AUP_Fscvrt_proc(void* stPtr, const FscvrtInData* pIn, FscvrtOutData* pOut) {
413
+ FscvrtSt* stHdl = NULL;
414
+ const FscvrtStaticCfg* pCfg;
415
+ Biquad_InputData bqdInData;
416
+ Biquad_OutputData bqdOutData;
417
+ const short* shortSrcPtr = NULL;
418
+ const float* floatSrcPtr = NULL;
419
+ short* shortTgtPtr = NULL;
420
+ float* floatTgtPtr = NULL;
421
+ int idx, tgtIdx;
422
+ int nOutSamples = 0, samplesTaken = 0, samplesLeft = 0;
423
+ int jumpRate;
424
+
425
+ if (stPtr == NULL || pIn == NULL || pOut == NULL || pIn->inDataSeq == NULL ||
426
+ pOut->outDataSeq == NULL) { // pCtrl == NULL
427
+ return -1;
428
+ }
429
+
430
+ stHdl = (FscvrtSt*)(stPtr);
431
+ pCfg = (const FscvrtStaticCfg*)&(stHdl->stCfg);
432
+ shortSrcPtr = (const short*)(pIn->inDataSeq);
433
+ floatSrcPtr = (const float*)(pIn->inDataSeq);
434
+ // ==============================================================================
435
+ // mode-0: bypass
436
+ if (stHdl->mode == 0) { // direct bypass
437
+ if (pIn->outDataSeqLen < pCfg->stepSz) {
438
+ return -1;
439
+ }
440
+ pOut->nOutData = pCfg->stepSz;
441
+ pOut->outDataType = pCfg->outputType;
442
+ if (pIn->inDataSeq == pOut->outDataSeq) {
443
+ if (pCfg->outputType == pCfg->inputType)
444
+ return 0; // we don't need to do anything
445
+ return -1;
446
+ // if input buffer and the output buffer are the same, but required
447
+ // different data type: error, we currently do not support such usecase
448
+ }
449
+
450
+ if (pCfg->inputType == 0 && pCfg->outputType == 0) {
451
+ memcpy(pOut->outDataSeq, pIn->inDataSeq, sizeof(short) * pCfg->stepSz);
452
+ } else if (pCfg->inputType == 1 && pCfg->outputType == 1) {
453
+ memcpy(pOut->outDataSeq, pIn->inDataSeq, sizeof(float) * pCfg->stepSz);
454
+ } else if (pCfg->inputType == 0 && pCfg->outputType == 1) {
455
+ for (idx = 0; idx < pCfg->stepSz; idx++) {
456
+ ((float*)pOut->outDataSeq)[idx] = ((short*)pIn->inDataSeq)[idx];
457
+ }
458
+ } else { // if (pCfg->inputType == 1 && pCfg->outputType == 0)
459
+ for (idx = 0; idx < pCfg->stepSz; idx++) {
460
+ ((short*)pOut->outDataSeq)[idx] =
461
+ (short)_FSCVRT_FLOAT2SHORT(((float*)pIn->inDataSeq)[idx]);
462
+ }
463
+ }
464
+
465
+ return 0;
466
+ }
467
+
468
+ // prepare input buffer for Biquad .....
469
+ memset(stHdl->biquadInBuf, 0, sizeof(float) * stHdl->biquadInBufLen);
470
+ if (pCfg->inputType == 0) {
471
+ for (idx = 0; idx < pCfg->stepSz; idx++) {
472
+ stHdl->biquadInBuf[idx * (stHdl->upSmplRate)] =
473
+ ((float)shortSrcPtr[idx]) * stHdl->upSmplRate;
474
+ }
475
+ } else {
476
+ for (idx = 0; idx < pCfg->stepSz; idx++) {
477
+ stHdl->biquadInBuf[idx * (stHdl->upSmplRate)] =
478
+ floatSrcPtr[idx] * stHdl->upSmplRate;
479
+ }
480
+ }
481
+
482
+ // biquad filtering ......
483
+ memset(&bqdInData, 0, sizeof(Biquad_InputData));
484
+ memset(&bqdOutData, 0, sizeof(Biquad_OutputData));
485
+ bqdInData.samplesPtr = (const void*)(stHdl->biquadInBuf);
486
+ bqdInData.sampleType = 1;
487
+ bqdInData.nsamples = (size_t)(pCfg->stepSz * stHdl->upSmplRate);
488
+ bqdOutData.outputBuff = (void*)&(stHdl->biquadOutBuf[stHdl->biquadOutBufCnt]);
489
+ if (stHdl->biquadOutBufCnt + (pCfg->stepSz * stHdl->upSmplRate) >
490
+ stHdl->biquadOutBufLen) {
491
+ return -1;
492
+ }
493
+ if (AUP_Biquad_proc(stHdl->biquadSt, &bqdInData, &bqdOutData) < 0) {
494
+ return -1;
495
+ }
496
+ stHdl->biquadOutBufCnt += (pCfg->stepSz * stHdl->upSmplRate);
497
+
498
+ // checking the output buffer .........
499
+ nOutSamples = stHdl->biquadOutBufCnt / stHdl->downSmplRate;
500
+ if (pIn->outDataSeqLen < nOutSamples) {
501
+ return -1;
502
+ }
503
+
504
+ // prepare output data, downsampling and throwing out ......
505
+ pOut->nOutData = nOutSamples;
506
+ pOut->outDataType = pCfg->outputType;
507
+
508
+ shortTgtPtr = (short*)pOut->outDataSeq;
509
+ floatTgtPtr = (float*)pOut->outDataSeq;
510
+ jumpRate = stHdl->downSmplRate;
511
+ if (pCfg->outputType == 0) { // -> shortTgtPtr
512
+ for (idx = (jumpRate - 1), tgtIdx = 0; idx < stHdl->biquadOutBufCnt;
513
+ idx += jumpRate, tgtIdx++) {
514
+ shortTgtPtr[tgtIdx] = _FSCVRT_FLOAT2SHORT(stHdl->biquadOutBuf[idx]);
515
+ }
516
+ } else { // -> floatTgtPtr
517
+ for (idx = (jumpRate - 1), tgtIdx = 0; idx < stHdl->biquadOutBufCnt;
518
+ idx += jumpRate, tgtIdx++) {
519
+ floatTgtPtr[tgtIdx] = stHdl->biquadOutBuf[idx];
520
+ }
521
+ }
522
+ if (nOutSamples != tgtIdx) {
523
+ return -1;
524
+ }
525
+
526
+ // update the stHdl->biquadOutBuf and stHdl->biquadOutBufCnt
527
+ samplesTaken = nOutSamples * jumpRate;
528
+ samplesLeft = stHdl->biquadOutBufCnt - samplesTaken;
529
+ if (samplesLeft == 0) {
530
+ stHdl->biquadOutBufCnt = 0;
531
+ } else if (samplesLeft > 0) {
532
+ stHdl->biquadOutBufCnt = samplesLeft;
533
+ memmove(stHdl->biquadOutBuf, &(stHdl->biquadOutBuf[samplesTaken]),
534
+ sizeof(float) * samplesLeft);
535
+ } else { // samplesLeft < 0
536
+ stHdl->biquadOutBufCnt = 0;
537
+ return -1;
538
+ }
539
+
540
+ return 0;
541
+ }
src/fscvrt.h ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // Copyright © 2025 Agora
3
+ // This file is part of TEN Framework, an open source project.
4
+ // Licensed under the Apache License, Version 2.0, with certain conditions.
5
+ // Refer to the "LICENSE" file in the root directory for more information.
6
+ //
7
+ #ifndef __FSCVRT_H__
8
+ #define __FSCVRT_H__
9
+
10
+ #define AUP_FSCVRT_MAX_INPUT_LEN (2400)
11
+ // max. number of samples each time can be fed in
12
+
13
+ #include <stdio.h>
14
+
15
+ typedef struct FscvrtStaticCfg_ {
16
+ int inputFs; // input stream sampling freq.
17
+ int outputFs; // output stream sampling freq.
18
+ int stepSz; // number of input samples per each proc.
19
+ int inputType; // input data type, 0: short, 1: float
20
+ int outputType; // output data type, 0: short, 1: float
21
+ } FscvrtStaticCfg;
22
+
23
+ typedef struct FscvrtInData_ {
24
+ const void* inDataSeq; // [stepSz], externally provided buffer
25
+ int outDataSeqLen;
26
+ // the length of externally provided buffer outDataSeq in OutData
27
+ } FscvrtInData;
28
+
29
+ typedef struct FscvrtOutData_ {
30
+ int nOutData; // number of samples in outDataSeq
31
+ // this value may vary by +-1 from frame-to-frame
32
+ // and the user needs to check if nOutData <= outDataSeqLen
33
+ // o.w. the buffer outDataSeq is not long enough
34
+ int outDataType; // output data type, 0: short, 1: float
35
+ void* outDataSeq; // [outDataSeqLen], externally provided buffer
36
+ } FscvrtOutData;
37
+
38
+ typedef struct FscvrtGetData_ {
39
+ int maxOutputStepSz; // max. number of output samples per each proc.
40
+ int delayInInputFs; // algorithm delay in terms of samples @ input fs
41
+ } FscvrtGetData;
42
+
43
+ #ifdef __cplusplus
44
+ extern "C" {
45
+ #endif
46
+
47
+ /****************************************************************************
48
+ * AUP_Fscvrt_create(...)
49
+ *
50
+ * This function creats a state handler from nothing, which is NOT ready for
51
+ * processing
52
+ *
53
+ * Input:
54
+ *
55
+ * Output:
56
+ * - stPtr : buffer to store the returned state handler
57
+ *
58
+ * Return value : 0 - Ok
59
+ * -1 - Error
60
+ */
61
+ int AUP_Fscvrt_create(void** stPtr);
62
+
63
+ /****************************************************************************
64
+ * AUP_Fscvrt_destroy(...)
65
+ *
66
+ * destroy biquad instance, and releasing all the dynamically allocated memory
67
+ *
68
+ * Input:
69
+ * - stPtr : buffer of State Handler, after this method, this
70
+ * handler won't be usable anymore
71
+ *
72
+ * Output:
73
+ *
74
+ * Return value : 0 - Ok
75
+ * -1 - Error
76
+ */
77
+ int AUP_Fscvrt_destroy(void** stPtr);
78
+
79
+ /****************************************************************************
80
+ * AUP_Fscvrt_memAllocate(...)
81
+ *
82
+ * This function sets Static Config params and does memory allocation
83
+ * operation
84
+ *
85
+ * Input:
86
+ * - stPtr : State Handler which was returned by _create
87
+ * - pCfg : static configuration parameters
88
+ *
89
+ * Output:
90
+ *
91
+ * Return value : 0 - Ok
92
+ * -1 - Error
93
+ */
94
+ int AUP_Fscvrt_memAllocate(void* stPtr, const FscvrtStaticCfg* pCfg);
95
+
96
+ /****************************************************************************
97
+ * AUP_Fscvrt_init(...)
98
+ *
99
+ * This function resets (initialize) the XXXX module and gets it prepared for
100
+ * processing
101
+ *
102
+ * Input:
103
+ * - stPtr : State Handler which has gone through create and
104
+ * memAllocate
105
+ *
106
+ * Output:
107
+ *
108
+ * Return value : 0 - Ok
109
+ * -1 - Error
110
+ */
111
+ int AUP_Fscvrt_init(void* stPtr);
112
+
113
+ /****************************************************************************
114
+ * AUP_Fscvrt_setDynamCfg(...)
115
+ *
116
+ * This function set dynamic (per-frame variable) configuration
117
+ *
118
+ * Input:
119
+ * - stPtr : State Handler which has gone through create and
120
+ * memAllocate
121
+ * - pCfg : configuration content
122
+ *
123
+ * Output:
124
+ *
125
+ * Return value : 0 - Ok
126
+ * -1 - Error
127
+ */
128
+ int AUP_Fscvrt_setDynamCfg(void* stPtr);
129
+
130
+ /****************************************************************************
131
+ * AUP_Fscvrt_getStaticCfg(...)
132
+ *
133
+ * This function get static configuration status from XXXXX module
134
+ *
135
+ * Input:
136
+ * - stPtr : State Handler which has gone through create and
137
+ * memAllocate
138
+ *
139
+ * Output:
140
+ * - pCfg : configuration content
141
+ *
142
+ * Return value : 0 - Ok
143
+ * -1 - Error
144
+ */
145
+ int AUP_Fscvrt_getStaticCfg(const void* stPtr, FscvrtStaticCfg* pCfg);
146
+
147
+ /****************************************************************************
148
+ * AUP_Fscvrt_getInfor(...)
149
+ *
150
+ * This function get subsidiary information from Fs-Converter module
151
+ *
152
+ * Input:
153
+ * - stPtr : State Handler which has gone through create and
154
+ * memAllocate
155
+ *
156
+ * Output:
157
+ * - FscvrtGetData : returned information
158
+ *
159
+ * Return value : 0 - Ok
160
+ * -1 - Error
161
+ */
162
+ int AUP_Fscvrt_getInfor(const void* stPtr, FscvrtGetData* buff);
163
+
164
+ /****************************************************************************
165
+ * AUP_Fscvrt_proc(...)
166
+ *
167
+ * process a single frame
168
+ *
169
+ * Input:
170
+ * - stPtr : State Handler which has gone through create and
171
+ * memAllocate
172
+ * - pCtrl : per-frame variable control parameters
173
+ * - pIn : input data stream
174
+ *
175
+ * Output:
176
+ * - pOut : output data (mask, highband time-domain gain etc.)
177
+ *
178
+ * Return value : 0 - Ok
179
+ * -1 - Error
180
+ */
181
+ int AUP_Fscvrt_proc(void* stPtr, const FscvrtInData* pIn, FscvrtOutData* pOut);
182
+
183
+ #ifdef __cplusplus
184
+ }
185
+ #endif
186
+ #endif // __FSCVRT_H__