Upload examples

Files changed (12) hide show

examples/.gitattributes +1 -0
examples/CMakeLists.txt +40 -0
examples/build-and-deploy-android.sh +34 -0
examples/build-and-deploy-ios.sh +24 -0
examples/build-and-deploy-linux.sh +19 -0
examples/build-and-deploy-mac.sh +26 -0
examples/build-and-deploy-windows.bat +67 -0
examples/main.c +298 -0
examples/plot_pr_curves.py +212 -0
examples/s0724-s0730.wav +3 -0
examples/sample_array.h +0 -0
examples/test.py +26 -0

examples/.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.wav filter=lfs diff=lfs merge=lfs -text

examples/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,40 @@

+cmake_minimum_required(VERSION 3.10)
+get_filename_component(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../ ABSOLUTE)
+project(ten_vad_demo)
+add_executable(ten_vad_demo ${ROOT}/examples/main.c)
+target_include_directories(ten_vad_demo PRIVATE "${ROOT}/include")
+if(WIN32)
+  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+    target_link_libraries(ten_vad_demo "${ROOT}/lib/Windows/x64/ten_vad.lib")
+  else()
+    target_link_libraries(ten_vad_demo "${ROOT}/lib/Windows/x86/ten_vad.lib")
+  endif()
+elseif(ANDROID)
+  if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+    target_link_libraries(ten_vad_demo "${ROOT}/lib/Android/arm64-v8a/libten_vad.so")
+  else()
+    target_link_libraries(ten_vad_demo "${ROOT}/lib/Android/armeabi-v7a/libten_vad.so")
+  endif()
+elseif(IOS)
+  target_link_libraries(ten_vad_demo "${ROOT}/lib/iOS/ten_vad.framework")
+  set_target_properties(ten_vad_demo PROPERTIES
+    XCODE_ATTRIBUTE_FRAMEWORK_SEARCH_PATHS "${ROOT}/lib/iOS"
+    XCODE_ATTRIBUTE_LD_RUNPATH_SEARCH_PATHS "@executable_path/Frameworks"
+    XCODE_ATTRIBUTE_CODE_SIGN_STYLE "Manual"
+    XCODE_ATTRIBUTE_DEVELOPMENT_TEAM "${CMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM}"
+    XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.yourcompany.ten_vad_demo"
+    XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "iphoneos"
+    XCODE_ATTRIBUTE_ARCHS "arm64"
+  )
+elseif(APPLE)
+  target_link_libraries(ten_vad_demo "${ROOT}/lib/macOS/ten_vad.framework")
+  set_target_properties(ten_vad_demo PROPERTIES
+    INSTALL_RPATH "@loader_path"
+    BUILD_WITH_INSTALL_RPATH TRUE
+  )
+elseif(UNIX)
+  target_link_libraries(ten_vad_demo "${ROOT}/lib/Linux/x64/libten_vad.so")
+endif()

examples/build-and-deploy-android.sh ADDED Viewed

	@@ -0,0 +1,34 @@

+#!/bin/bash
+set -eo pipefail
+# Customize the arch and toolchain
+arch=arm64-v8a
+toolchain=aarch64-linux-android-clang
+# arch=armeabi-v7a
+# toolchain=arm-linux-android-clang
+build_dir=build-android/$arch
+rm -rf $build_dir
+mkdir -p $build_dir
+cd $build_dir
+# Step 1: Build the demo
+cmake ../../ \
+  -DANDROID_TOOLCHAIN_NAME=$toolchain \
+  -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
+  -G "Unix Makefiles"
+cmake --build . --config Release
+# Step 2: Run the demo
+adb push ../../s0724-s0730.wav /data/local/tmp/
+adb push ../../../lib/Android/${arch}/libten_vad.so /data/local/tmp/libten_vad.so &&
+  adb push ten_vad_demo /data/local/tmp/ &&
+  adb shell "cd /data/local/tmp && chmod +x ten_vad_demo && \
+LD_LIBRARY_PATH=/data/local/tmp ./ten_vad_demo ./s0724-s0730.wav ./out.txt && \
+exit 0"
+adb pull /data/local/tmp/out.txt ./
+cd ../../

examples/build-and-deploy-ios.sh ADDED Viewed

	@@ -0,0 +1,24 @@

+#!/usr/bin/env bash
+set -euo pipefail
+work_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+build_dir="${work_dir}/build-ios"
+mkdir -p "${build_dir}"
+cd "${build_dir}"
+# Step 1: Generate Xcode project for iOS device
+echo "[Info] Generating Xcode project"
+cmake "${work_dir}" \
+  -DCMAKE_SYSTEM_NAME=iOS \
+  -DCMAKE_OSX_SYSROOT="iphoneos" \
+  -DCMAKE_OSX_ARCHITECTURES="arm64" \
+  -DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY="Apple Development" \
+  -DCMAKE_OSX_DEPLOYMENT_TARGET=12.1 \
+  -DCMAKE_INSTALL_RPATH="@executable_path/Frameworks" \
+  -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
+  -G Xcode
+# Step 2: Use Xcode to open the project in build-ios directory
+# Step 3: Build and run the project in Xcode IDE

examples/build-and-deploy-linux.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+set -euo pipefail
+arch=x64
+build_dir=build-linux/$arch
+rm -rf $build_dir
+mkdir -p $build_dir
+cd $build_dir
+# Step 1: Build the demo
+cmake ../../
+cmake --build . --config Release
+# Step 2: Run the demo
+export LD_LIBRARY_PATH=../../../lib/Linux/$arch
+./ten_vad_demo ../../s0724-s0730.wav out.txt
+cd ../../

examples/build-and-deploy-mac.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+#!/bin/bash
+set -euo pipefail
+# Customize the arch
+arch=arm64
+# arch=x86_64
+build_dir=build-mac/$arch
+rm -rf $build_dir
+mkdir -p $build_dir
+cd $build_dir
+# Step 1: Build the demo
+cmake ../../ \
+  -DCMAKE_CXX_COMPILER=/usr/bin/clang++ \
+  -DCMAKE_C_COMPILER=/usr/bin/clang \
+  -DCMAKE_OSX_ARCHITECTURES=${arch} \
+  -G Xcode
+cmake --build . --config Release -- -UseModernBuildSystem=NO
+# Step 2: Run the demo
+export DYLD_FRAMEWORK_PATH="../../../lib/macOS/"
+Release/ten_vad_demo ../../s0724-s0730.wav out.txt
+cd ../../

examples/build-and-deploy-windows.bat ADDED Viewed

	@@ -0,0 +1,67 @@

+@echo off
+setlocal
+@REM Customize the arch
+set arch=x64
+@REM set arch=x86
+@REM step 1: Build the demo
+set "build_dir=%~dp0\build-windows"
+if exist "%build_dir%" rmdir /s /q "%build_dir%"
+mkdir "%build_dir%"
+cd /d "%build_dir%"
+@REM Customize the Visual Studio version
+@REM REM VS 2017
+@REM if %arch% == x64 (
+@REM   cmake .. -G "Visual Studio 15 2017" -A x64
+@REM ) else if %arch% == x86 (
+@REM   cmake .. -G "Visual Studio 15 2017" -A Win32
+@REM )
+REM VS 2019
+if %arch% == x64 (
+  cmake .. -G "Visual Studio 16 2019" -A x64
+) else if %arch% == x86 (
+  cmake .. -G "Visual Studio 16 2019" -A Win32
+)
+@REM REM VS 2022
+@REM if %arch% == x64 (
+@REM   cmake .. -G "Visual Studio 17 2022" -A x64
+@REM ) else if %arch% == x86 (
+@REM   cmake .. -G "Visual Studio 17 2022" -A Win32
+@REM )
+cmake --build . --config Release
+cd ..
+@REM step 2: Run the demo
+pushd "%~dp0"
+copy /Y "s0724-s0730.wav" "%build_dir%\Release"
+copy /Y "..\lib\Windows\%arch%\ten_vad.dll" "%build_dir%\Release"
+if errorlevel 1 (
+  echo [Error] copy file failed
+  popd
+  exit /b 1
+)
+cd /d "%build_dir%\Release"
+if not exist "ten_vad_demo.exe" (
+    echo Error: ten_vad_demo.exe not found
+    exit /b 1
+)
+if not exist "s0724-s0730.wav" (
+    echo Error: s0724-s0730.wav not found
+    exit /b 1
+)
+ten_vad_demo.exe "s0724-s0730.wav" out.txt
+if errorlevel 1 (
+    echo Error: ten_vad_demo.exe failed
+    exit /b 1
+)
+cd /d "%~dp0"
+popd
+exit /b 0

examples/main.c ADDED Viewed

	@@ -0,0 +1,298 @@

+//
+// This file is part of TEN Framework, an open source project.
+// Licensed under the Apache License, Version 2.0.
+// See the LICENSE file for more information.
+//
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+#include <inttypes.h>
+#include <string.h> // memcmp
+#ifdef _WIN32
+#include <windows.h>
+#endif
+#include "ten_vad.h"
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#if TARGET_OS_IPHONE
+#include "sample_array.h"
+#endif
+#endif
+const int hop_size = 256; // 16 ms per frame
+uint64_t get_timestamp_ms()
+{
+#ifdef _WIN32
+  LARGE_INTEGER frequency;
+  LARGE_INTEGER counter;
+  QueryPerformanceFrequency(&frequency);
+  QueryPerformanceCounter(&counter);
+  return (uint64_t)(counter.QuadPart * 1000 / frequency.QuadPart);
+#else
+  struct timespec ts;
+  uint64_t millis;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  millis = ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
+  return millis;
+#endif
+}
+// define RIFF header
+#pragma pack(push, 1)
+typedef struct
+{
+  char chunk_id[4];    // should be "RIFF"
+  uint32_t chunk_size; // file total size - 8
+  char format[4];      // should be "WAVE"
+} riff_header_t;
+// define each sub chunk header
+typedef struct
+{
+  char id[4];    // should be "fmt " or "data"
+  uint32_t size; // chunk data size
+} chunk_header_t;
+#pragma pack(pop)
+// define WAV file info we care about
+typedef struct
+{
+  uint16_t audio_format;    // audio format (e.g. PCM=1)
+  uint16_t num_channels;    // number of channels
+  uint32_t sample_rate;     // sample rate
+  uint32_t byte_rate;       // byte rate
+  uint16_t block_align;     // block align
+  uint16_t bits_per_sample; // bits per sample
+  uint32_t data_size;       // data size
+  long data_offset;         // data offset in file
+} wav_info_t;
+int read_wav_file(FILE *fp, wav_info_t *info);
+int vad_process(int16_t *input_buf, uint32_t frame_num,
+                float *out_probs, int32_t *out_flags,
+                float *use_time)
+{
+  printf("tenvadsrc version: %s\n", ten_vad_get_version());
+  void *ten_vad_handle = NULL;
+  float voice_threshold = 0.5f;
+  ten_vad_create(&ten_vad_handle, hop_size, voice_threshold);
+  uint64_t start = get_timestamp_ms();
+  for (int i = 0; i < frame_num; ++i)
+  {
+    int16_t *audio_data = input_buf + i * hop_size;
+    ten_vad_process(ten_vad_handle, audio_data, hop_size,
+                    &out_probs[i], &out_flags[i]);
+    printf("[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]);
+  }
+  uint64_t end = get_timestamp_ms();
+  *use_time = (float)(end - start);
+  ten_vad_destroy(&ten_vad_handle);
+  ten_vad_handle = NULL;
+  return 0;
+}
+int test_with_wav(int argc, char *argv[])
+{
+  if (argc < 3)
+  {
+    printf("Warning: Test.exe input.wav output.txt\n");
+    return 0;
+  }
+  char *input_file = argv[1];
+  char *out_file = argv[2];
+  FILE *fp = fopen(input_file, "rb");
+  if (fp == NULL)
+  {
+    printf("Failed to open input file: %s\n", input_file);
+    return 1;
+  }
+  fseek(fp, 0, SEEK_SET);
+  wav_info_t info;
+  if (read_wav_file(fp, &info) != 0)
+  {
+    printf("Failed to read WAV file header\n");
+    fclose(fp);
+    return 1;
+  }
+  uint32_t byte_num = info.data_size;
+  printf("WAV file byte num: %d\n", byte_num);
+  char *input_buf = (char *)malloc(byte_num);
+  fseek(fp, info.data_offset, SEEK_SET);
+  fread(input_buf, 1, byte_num, fp);
+  fclose(fp);
+  fp = NULL;
+  uint32_t sample_num = byte_num / sizeof(int16_t);
+  float total_audio_time = (float)sample_num / 16.0;
+  printf("total_audio_time: %.2f(ms)\n", total_audio_time);
+  uint32_t frame_num = sample_num / hop_size;
+  printf("Audio frame Num: %d\n", frame_num);
+  float *out_probs = (float *)malloc(frame_num * sizeof(float));
+  int32_t *out_flags = (int32_t *)malloc(frame_num * sizeof(int32_t));
+  float use_time = .0;
+  vad_process((int16_t *)input_buf, frame_num,
+               out_probs, out_flags,
+               &use_time);
+  float rtf = use_time / total_audio_time;
+  printf("Consuming time: %f(ms), audio-time: %.2f(ms), =====> RTF: %0.6f\n",
+          use_time, total_audio_time, rtf);
+  FILE *fout = fopen(out_file, "w");
+  if (fout != NULL)
+  {
+    for (int i = 0; i < frame_num; i++)
+    {
+      fprintf(fout, "[%d] %0.6f, %d\n", i, out_probs[i], out_flags[i]);
+    }
+    fclose(fout);
+    fout = NULL;
+  }
+  free(input_buf);
+  free(out_probs);
+  free(out_flags);
+  return 0;
+}
+#if TARGET_OS_IPHONE
+// Used for iOS APP demo
+int test_with_array()
+{
+  char *input_buf = (char *)sample_array;
+  uint32_t byte_num = sizeof(sample_array) / sizeof(sample_array[0]);
+  printf("WAV file byte num: %d\n", byte_num);
+  uint32_t sample_num = byte_num / sizeof(int16_t);
+  float total_audio_time = (float)sample_num / 16.0;
+  printf("total_audio_time: %.2f(ms)\n", total_audio_time);
+  uint32_t frame_num = sample_num / hop_size;
+  printf("Audio frame Num: %d\n", frame_num);
+  float *out_probs = (float *)malloc(frame_num * sizeof(float));
+  int32_t *out_flags = (int32_t *)malloc(frame_num * sizeof(int32_t));
+  float use_time = .0;
+  vad_process((int16_t *)input_buf, frame_num,
+               out_probs, out_flags,
+               &use_time);
+  float rtf = use_time / total_audio_time;
+  printf("Consuming time: %f(ms), audio-time: %.2f(ms), =====> RTF: %0.6f\n",
+          use_time, total_audio_time, rtf);
+  return 0;
+}
+#endif
+int main(int argc, char *argv[])
+{
+#if TARGET_OS_IPHONE
+  return test_with_array();
+#else
+  return test_with_wav(argc, argv);
+#endif
+}
+// function to read WAV file info
+int read_wav_file(FILE *fp, wav_info_t *info)
+{
+  if (fp == NULL || info == NULL)
+    return -1;
+  // save current file position
+  long orig_pos = ftell(fp);
+  fseek(fp, 0, SEEK_SET);
+  // read RIFF header
+  riff_header_t riff;
+  if (fread(&riff, sizeof(riff_header_t), 1, fp) != 1)
+  {
+    fprintf(stderr, "Can not read RIFF head\n");
+    fseek(fp, orig_pos, SEEK_SET);
+    return -1;
+  }
+  // verify RIFF/WAVE format
+  if (memcmp(riff.chunk_id, "RIFF", 4) != 0 ||
+      memcmp(riff.format, "WAVE", 4) != 0)
+  {
+    fprintf(stderr, "not a valid RIFF/WAVE file\n");
+    fseek(fp, orig_pos, SEEK_SET);
+    return -1;
+  }
+  // initialize, mark chunks not found yet
+  int fmt_found = 0, data_found = 0;
+  memset(info, 0, sizeof(wav_info_t));
+  // iterate all chunks
+  while (!feof(fp))
+  {
+    chunk_header_t chunk;
+    if (fread(&chunk, sizeof(chunk_header_t), 1, fp) != 1)
+    {
+      break; // read failed, maybe end of file
+    }
+    // check if it's fmt chunk
+    if (memcmp(chunk.id, "fmt ", 4) == 0)
+    {
+      // read fmt data
+      fmt_found = 1;
+      if (chunk.size < 16)
+      {
+        fprintf(stderr, "fmt chunk size is abnormal\n");
+        fseek(fp, orig_pos, SEEK_SET);
+        return -1;
+      }
+      // read fmt parameters
+      if (fread(&info->audio_format, 2, 1, fp) != 1 ||
+          fread(&info->num_channels, 2, 1, fp) != 1 ||
+          fread(&info->sample_rate, 4, 1, fp) != 1 ||
+          fread(&info->byte_rate, 4, 1, fp) != 1 ||
+          fread(&info->block_align, 2, 1, fp) != 1 ||
+          fread(&info->bits_per_sample, 2, 1, fp) != 1)
+      {
+        fprintf(stderr, "failed to read fmt data\n");
+        fseek(fp, orig_pos, SEEK_SET);
+        return -1;
+      }
+      // skip fmt extension data
+      if (chunk.size > 16)
+      {
+        fseek(fp, chunk.size - 16, SEEK_CUR);
+      }
+    }
+    // check if it's data chunk
+    else if (memcmp(chunk.id, "data", 4) == 0)
+    {
+      data_found = 1;
+      info->data_size = chunk.size;
+      info->data_offset = ftell(fp); // record data start position
+      break;                         // found data chunk, can exit loop
+    }
+    // other chunks, skip
+    else
+    {
+      // consider byte alignment, pad odd size
+      fseek(fp, (chunk.size + (chunk.size % 2)), SEEK_CUR);
+    }
+  }
+  // check if necessary chunks are found
+  if (!fmt_found)
+  {
+    fprintf(stderr, "fmt chunk not found\n");
+    fseek(fp, orig_pos, SEEK_SET);
+    return -1;
+  }
+  if (!data_found)
+  {
+    fprintf(stderr, "data chunk not found\n");
+    fseek(fp, orig_pos, SEEK_SET);
+    return -1;
+  }
+  // restore original file position
+  fseek(fp, orig_pos, SEEK_SET);
+  return 0;
+}

examples/plot_pr_curves.py ADDED Viewed

	@@ -0,0 +1,212 @@

+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import os, glob, sys, torchaudio
+import numpy as np
+import scipy.io.wavfile as Wavfile
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix
+os.system('git clone https://github.com/snakers4/silero-vad.git')  # Clone the silero-vad repo, using Silero V5
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "./silero-vad/src")))
+from silero_vad.utils_vad import VADIterator, init_jit_model
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../include")))
+from ten_vad import TenVad
+def convert_label_to_framewise(label_file, hop_size):
+    frame_duration = hop_size / 16000
+    with open(label_file, "r") as f:
+        lines = f.readlines()
+    content = lines[0].strip().split(",")[1:]
+    start = np.array(
+        content[::3], dtype=float
+    )  # Start point of each audio segment
+    end = np.array(
+        content[1:][::3], dtype=float
+    )  # End point of each audio segment
+    lab_manual = np.array(
+        content[2:][::3], dtype=int
+    )  # label, 0/1 stands for non-speech or speech, respectively
+    assert (
+        len(start) == len(end)
+        and len(start) == len(lab_manual)
+        and len(end) == len(lab_manual)
+    )
+    num = np.array(
+        np.round(((end - start) / frame_duration)), dtype=np.int32
+    )  # get number of frames of each audio segment
+    label_framewise = np.array([])
+    for segment_idx in range(len(num)):
+        cur_lab = int(lab_manual[segment_idx])
+        num_segment = num[segment_idx]
+        if cur_lab == 1:
+            vad_result_this_segment = np.ones(num_segment)
+        elif cur_lab == 0:
+            vad_result_this_segment = np.zeros(num_segment)
+        label_framewise = np.append(label_framewise, vad_result_this_segment)
+    frame_num = min(
+        label_framewise.__len__(), int((end[-1] - start[0]) / frame_duration)
+    )
+    label_framewise = label_framewise[:frame_num]
+    return label_framewise
+def read_file(file_path):
+    with open(file_path, "r") as f:
+        lines = f.readlines()
+    lines_arr = np.array([])
+    for line in lines:
+        lines_arr = np.append(lines_arr, float(line.strip()))
+    return lines_arr
+def get_precision_recall(VAD_result, label, threshold):
+    vad_result_hard = np.where(VAD_result >= threshold, 1, 0)
+    # Compute confusion matrix
+    TN, FP, FN, TP = confusion_matrix(label, vad_result_hard).ravel()
+    # Compute precision, recall, false positive rate and false negative rate
+    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
+    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
+    FPR = FP / (FP + TN) if (FP + TN) > 0 else 0
+    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0
+    return precision, recall, FPR, FNR
+def silero_vad_inference_single_file(wav_path):
+    current_directory = os.path.dirname(os.path.abspath(__file__))
+    model = init_jit_model(f'{current_directory}/silero-vad/src/silero_vad/data/silero_vad.jit')
+    vad_iterator = VADIterator(model)
+    window_size_samples = 512
+    speech_probs = np.array([])
+    wav, sr = torchaudio.load(wav_path)
+    wav = wav.squeeze(0)
+    for i in range(0, len(wav), window_size_samples):
+        chunk = wav[i: i+ window_size_samples]
+        if len(chunk) < window_size_samples:
+            break
+        speech_prob = model(chunk, sr).item()
+        speech_probs = np.append(speech_probs, speech_prob)
+    vad_iterator.reset_states()  # reset model states after each audio
+    return speech_probs, window_size_samples
+def ten_vad_process_wav(ten_vad_instance, wav_path, hop_size=256):
+    _, data = Wavfile.read(wav_path)
+    num_frames = data.shape[0] // hop_size
+    voice_prob_arr = np.array([])
+    for i in range(num_frames):
+        input_data = data[i * hop_size: (i + 1) * hop_size]
+        voice_prob, _ = ten_vad_instance.process(input_data)
+        voice_prob_arr = np.append(voice_prob_arr, voice_prob)
+    return voice_prob_arr
+if __name__ == "__main__":
+    # Get the directory of the script
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    # TEN-VAD-TestSet dir
+    test_dir = f"{script_dir}/../TEN-VAD-TestSet"
+    # Initialization
+    hop_size = 256
+    threshold = 0.5
+    label_all, vad_result_ten_vad_all = np.array([]), np.array([])
+    label_hop_512_all, vad_result_silero_vad_all = np.array([]), np.array([])
+    wav_list = glob.glob(f"{test_dir}/*.wav")
+    # Running TEN VAD
+    print("Start processing")
+    for wav_path in wav_list:
+        ten_vad_instance = TenVad(hop_size, threshold)
+        label_file = wav_path.replace(".wav", ".scv")
+        label = convert_label_to_framewise(
+            label_file, hop_size=hop_size
+        )  # Convert the VAD label to frame-wise one
+        vad_result_ten_vad = ten_vad_process_wav(
+            ten_vad_instance, wav_path, hop_size=hop_size
+        )
+        frame_num = min(label.__len__(), vad_result_ten_vad.__len__())
+        vad_result_ten_vad_all = np.append(
+            vad_result_ten_vad_all, vad_result_ten_vad[1:frame_num]
+        )
+        label_all = np.append(label_all, label[:frame_num - 1])
+        del ten_vad_instance  # To prevent getting different results of each run
+        label_hop_512 = convert_label_to_framewise(
+            label_file, hop_size=512
+        )  # Convert the VAD label to frame-wise one for Silero VAD
+        vad_result_silero_vad, _ = silero_vad_inference_single_file(wav_path)
+        frame_num_silero_vad = min(label_hop_512.__len__(), vad_result_silero_vad.__len__())
+        vad_result_silero_vad_all = np.append(vad_result_silero_vad_all, vad_result_silero_vad[:frame_num_silero_vad])
+        label_hop_512_all = np.append(label_hop_512_all, label_hop_512[:frame_num_silero_vad])
+    # Compute Precision and Recall
+    threshold_arr = np.arange(0, 1.01, 0.01)
+    pr_data_arr = np.zeros((threshold_arr.__len__(), 3))
+    pr_data_silero_vad_arr = np.zeros((threshold_arr.__len__(), 3))
+    for ind, threshold in enumerate(threshold_arr):
+        precision, recall, FPR, FNR = get_precision_recall(vad_result_ten_vad_all, label_all, threshold)
+        pr_data_arr[ind] = precision, recall, threshold
+        precision_silero_vad, recall_silero_vad, FPR_silero_vad, FNR_silero_vad = get_precision_recall(vad_result_silero_vad_all, label_hop_512_all, threshold)
+        pr_data_silero_vad_arr[ind] = precision_silero_vad, recall_silero_vad, threshold
+    # Plot PR Curve
+    print("Plotting PR Curve")
+    pr_data_arr_to_plot = pr_data_arr[:-1]
+    plt.plot(
+        pr_data_arr_to_plot[:, 1],
+        pr_data_arr_to_plot[:, 0],
+        color="red",
+        label="TEN VAD",
+    )  # Precision on y-axis, Recall on x-axis
+    pr_data_silero_vad_arr_to_plot = pr_data_silero_vad_arr[:-1]
+    plt.plot(
+        pr_data_silero_vad_arr_to_plot[:, 1],  # Recall (x-axis)
+        pr_data_silero_vad_arr_to_plot[:, 0],  # Precision (y-axis)
+        color="blue",
+        label="Silero VAD",
+    )
+    plt.xlabel("Recall", fontsize=14, fontweight="bold", color="black")
+    plt.ylabel("Precision", fontsize=14, fontweight="bold", color="black")
+    legend = plt.legend()
+    legend.get_texts()[0].set_fontweight("bold")
+    legend.get_texts()[1].set_fontweight("bold")
+    plt.grid(True)
+    plt.xlim(0.65, 1)
+    plt.ylim(0.7, 1)
+    plt.title(
+        "Precision-Recall Curve of TEN VAD on TEN-VAD-TestSet",
+        fontsize=12,
+        color="black",
+        fontweight="bold",
+    )
+    save_path = f"{script_dir}/PR_Curves.png"
+    plt.savefig(save_path, dpi=300, bbox_inches="tight")
+    print(f"PR Curves png file saved, save path: {save_path}")
+    # Save the PR data to txt file
+    pr_data_save_path = f"{script_dir}/PR_data_TEN_VAD.txt"
+    with open(pr_data_save_path, "w") as f:
+        for ind in range(pr_data_arr.shape[0]):
+            precision, recall, threshold = (
+                pr_data_arr[ind, 0],
+                pr_data_arr[ind, 1],
+                pr_data_arr[ind, 2],
+            )
+            f.write(f"{threshold:.2f} {precision:.4f} {recall:.4f}\n")
+    print("Processing done!")

examples/s0724-s0730.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d45912ad2eb5ce25e65a69ed9f110d0c9a382b2644e38341d8fecbcc6900d59a
+size 249732

examples/sample_array.h ADDED Viewed

The diff for this file is too large to render. See raw diff

examples/test.py ADDED Viewed

	@@ -0,0 +1,26 @@

+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import sys, os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../include")))
+from ten_vad import TenVad
+import scipy.io.wavfile as Wavfile
+if __name__ == "__main__":
+    input_file, out_path = sys.argv[1], sys.argv[2]
+    sr, data = Wavfile.read(input_file)
+    hop_size = 256  # 16 ms per frame
+    threshold = 0.5
+    ten_vad_instance = TenVad(hop_size, threshold)  # Create a TenVad instance
+    num_frames = data.shape[0] // hop_size
+    # Streaming inference
+    with open(out_path, "w") as f:
+        for i in range(num_frames):
+            audio_data = data[i * hop_size: (i + 1) * hop_size]
+            out_probability, out_flags = ten_vad_instance.process(audio_data)
+            print("[%d] %0.6f, %d" % (i, out_probability, out_flags))
+            f.write("[%d] %0.6f, %d\n" % (i, out_probability, out_flags))