|
#!/usr/bin/env node |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const fs = require('fs'); |
|
const path = require('path'); |
|
|
|
|
|
const HOP_SIZE = 256; |
|
const VOICE_THRESHOLD = 0.5; |
|
|
|
|
|
const WASM_DIR = './../lib/Web'; |
|
const WASM_JS_FILE = path.join(WASM_DIR, 'ten_vad.js'); |
|
const WASM_BINARY_FILE = path.join(WASM_DIR, 'ten_vad.wasm'); |
|
|
|
|
|
let vadModule = null; |
|
let vadHandle = null; |
|
let vadHandlePtr = null; |
|
|
|
|
|
|
|
|
|
|
|
function getTimestamp() { |
|
return Date.now(); |
|
} |
|
|
|
function addHelperFunctions() { |
|
if (!vadModule.getValue) { |
|
vadModule.getValue = function(ptr, type) { |
|
switch (type) { |
|
case 'i32': return vadModule.HEAP32[ptr >> 2]; |
|
case 'float': return vadModule.HEAPF32[ptr >> 2]; |
|
default: throw new Error(`Unsupported type: ${type}`); |
|
} |
|
}; |
|
} |
|
|
|
if (!vadModule.UTF8ToString) { |
|
vadModule.UTF8ToString = function(ptr) { |
|
if (!ptr) return ''; |
|
let result = ''; |
|
let i = ptr; |
|
while (vadModule.HEAPU8[i]) { |
|
result += String.fromCharCode(vadModule.HEAPU8[i++]); |
|
} |
|
return result; |
|
}; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
function generateTestAudio(durationMs = 5000) { |
|
const sampleRate = 16000; |
|
const totalSamples = Math.floor(durationMs * sampleRate / 1000); |
|
const audioData = new Int16Array(totalSamples); |
|
|
|
console.log(`Generating ${totalSamples} samples for ${durationMs}ms audio...`); |
|
|
|
for (let i = 0; i < totalSamples; i++) { |
|
const t = i / sampleRate; |
|
let sample = 0; |
|
|
|
if (t < 2.0) { |
|
|
|
sample = Math.sin(2 * Math.PI * 440 * t) * 8000 + |
|
Math.sin(2 * Math.PI * 880 * t) * 4000; |
|
} else if (t < 3.0) { |
|
|
|
sample = (Math.random() - 0.5) * 3000; |
|
} else if (t < 4.0) { |
|
|
|
sample = Math.sin(2 * Math.PI * 220 * t) * 6000 + |
|
Math.sin(2 * Math.PI * 660 * t) * 3000; |
|
} else { |
|
|
|
sample = Math.random() * 50; |
|
} |
|
|
|
audioData[i] = Math.max(-32768, Math.min(32767, Math.floor(sample))); |
|
} |
|
|
|
return audioData; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
function getVADVersion() { |
|
if (!vadModule) return "unknown"; |
|
try { |
|
const versionPtr = vadModule._ten_vad_get_version(); |
|
return vadModule.UTF8ToString(versionPtr); |
|
} catch (error) { |
|
return "unknown"; |
|
} |
|
} |
|
|
|
function createVADInstance() { |
|
try { |
|
vadHandlePtr = vadModule._malloc(4); |
|
const result = vadModule._ten_vad_create(vadHandlePtr, HOP_SIZE, VOICE_THRESHOLD); |
|
|
|
if (result === 0) { |
|
vadHandle = vadModule.getValue(vadHandlePtr, 'i32'); |
|
return true; |
|
} else { |
|
console.error(`VAD creation failed with code: ${result}`); |
|
vadModule._free(vadHandlePtr); |
|
return false; |
|
} |
|
} catch (error) { |
|
console.error(`Error creating VAD instance: ${error.message}`); |
|
return false; |
|
} |
|
} |
|
|
|
function destroyVADInstance() { |
|
if (vadHandlePtr && vadModule) { |
|
vadModule._ten_vad_destroy(vadHandlePtr); |
|
vadModule._free(vadHandlePtr); |
|
vadHandlePtr = null; |
|
vadHandle = null; |
|
} |
|
} |
|
|
|
async function processAudio(inputBuf, frameNum, outProbs, outFlags) { |
|
console.log(`VAD version: ${getVADVersion()}`); |
|
|
|
if (!createVADInstance()) { |
|
return -1; |
|
} |
|
|
|
const startTime = getTimestamp(); |
|
|
|
for (let i = 0; i < frameNum; i++) { |
|
const frameStart = i * HOP_SIZE; |
|
const frameData = inputBuf.slice(frameStart, frameStart + HOP_SIZE); |
|
|
|
const audioPtr = vadModule._malloc(HOP_SIZE * 2); |
|
const probPtr = vadModule._malloc(4); |
|
const flagPtr = vadModule._malloc(4); |
|
|
|
try { |
|
vadModule.HEAP16.set(frameData, audioPtr / 2); |
|
|
|
const result = vadModule._ten_vad_process( |
|
vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr |
|
); |
|
|
|
if (result === 0) { |
|
const probability = vadModule.getValue(probPtr, 'float'); |
|
const flag = vadModule.getValue(flagPtr, 'i32'); |
|
|
|
outProbs[i] = probability; |
|
outFlags[i] = flag; |
|
|
|
console.log(`[${i}] ${probability.toFixed(6)}, ${flag}`); |
|
} else { |
|
console.error(`Frame ${i} processing failed with code: ${result}`); |
|
outProbs[i] = 0.0; |
|
outFlags[i] = 0; |
|
} |
|
} finally { |
|
vadModule._free(audioPtr); |
|
vadModule._free(probPtr); |
|
vadModule._free(flagPtr); |
|
} |
|
} |
|
|
|
const endTime = getTimestamp(); |
|
const processingTime = endTime - startTime; |
|
|
|
destroyVADInstance(); |
|
return processingTime; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
function printResults(processingTime, totalAudioTime, outFlags, frameNum) { |
|
const rtf = processingTime / totalAudioTime; |
|
const voiceFrames = outFlags.filter(flag => flag === 1).length; |
|
const voicePercentage = (voiceFrames / frameNum * 100).toFixed(1); |
|
|
|
console.log(`\n=== Processing Results ===`); |
|
console.log(`Time: ${processingTime}ms, Audio: ${totalAudioTime.toFixed(2)}ms, RTF: ${rtf.toFixed(6)}`); |
|
console.log(`Voice frames: ${voiceFrames}/${frameNum} (${voicePercentage}%)`); |
|
} |
|
|
|
function saveResults(outProbs, outFlags, frameNum, filename = 'out.txt') { |
|
let output = ''; |
|
for (let i = 0; i < frameNum; i++) { |
|
output += `[${i}] ${outProbs[i].toFixed(6)}, ${outFlags[i]}\n`; |
|
} |
|
|
|
try { |
|
fs.writeFileSync(filename, output); |
|
console.log(`Results saved to ${filename}`); |
|
} catch (error) { |
|
console.error(`Failed to save results: ${error.message}`); |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function testWithArray() { |
|
console.log("=== Array Test ===\n"); |
|
|
|
const inputBuf = generateTestAudio(5000); |
|
const byteNum = inputBuf.byteLength; |
|
const sampleNum = byteNum / 2; |
|
const totalAudioTime = sampleNum / 16.0; |
|
const frameNum = Math.floor(sampleNum / HOP_SIZE); |
|
|
|
console.log(`Audio info: ${byteNum} bytes, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`); |
|
|
|
const outProbs = new Float32Array(frameNum); |
|
const outFlags = new Int32Array(frameNum); |
|
|
|
const processingTime = await processAudio(inputBuf, frameNum, outProbs, outFlags); |
|
|
|
if (processingTime > 0) { |
|
printResults(processingTime, totalAudioTime, outFlags, frameNum); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
|
|
function parseWAVHeader(buffer) { |
|
if (buffer.length < 44) { |
|
throw new Error('Invalid WAV file: too small'); |
|
} |
|
|
|
|
|
const riffHeader = buffer.toString('ascii', 0, 4); |
|
if (riffHeader !== 'RIFF') { |
|
throw new Error('Invalid WAV file: missing RIFF header'); |
|
} |
|
|
|
|
|
const waveHeader = buffer.toString('ascii', 8, 12); |
|
if (waveHeader !== 'WAVE') { |
|
throw new Error('Invalid WAV file: not WAVE format'); |
|
} |
|
|
|
let offset = 12; |
|
let dataOffset = -1; |
|
let dataSize = 0; |
|
let sampleRate = 0; |
|
let channels = 0; |
|
let bitsPerSample = 0; |
|
|
|
|
|
while (offset < buffer.length - 8) { |
|
const chunkId = buffer.toString('ascii', offset, offset + 4); |
|
const chunkSize = buffer.readUInt32LE(offset + 4); |
|
|
|
if (chunkId === 'fmt ') { |
|
|
|
const audioFormat = buffer.readUInt16LE(offset + 8); |
|
channels = buffer.readUInt16LE(offset + 10); |
|
sampleRate = buffer.readUInt32LE(offset + 12); |
|
bitsPerSample = buffer.readUInt16LE(offset + 22); |
|
|
|
if (audioFormat !== 1) { |
|
throw new Error('Unsupported WAV format: only PCM is supported'); |
|
} |
|
|
|
if (bitsPerSample !== 16) { |
|
throw new Error('Unsupported bit depth: only 16-bit is supported'); |
|
} |
|
} else if (chunkId === 'data') { |
|
|
|
dataOffset = offset + 8; |
|
dataSize = chunkSize; |
|
break; |
|
} |
|
|
|
offset += 8 + chunkSize; |
|
|
|
if (chunkSize % 2 === 1) { |
|
offset++; |
|
} |
|
} |
|
|
|
if (dataOffset === -1) { |
|
throw new Error('Invalid WAV file: no data chunk found'); |
|
} |
|
|
|
return { |
|
sampleRate, |
|
channels, |
|
bitsPerSample, |
|
dataOffset, |
|
dataSize, |
|
totalSamples: dataSize / (bitsPerSample / 8), |
|
samplesPerChannel: dataSize / (bitsPerSample / 8) / channels |
|
}; |
|
} |
|
|
|
async function testWithWAV(inputFile, outputFile) { |
|
console.log("=== WAV File Test ===\n"); |
|
|
|
if (!fs.existsSync(inputFile)) { |
|
console.error(`Input file not found: ${inputFile}`); |
|
return 1; |
|
} |
|
|
|
try { |
|
const buffer = fs.readFileSync(inputFile); |
|
|
|
|
|
const wavInfo = parseWAVHeader(buffer); |
|
console.log(`WAV Format: ${wavInfo.channels} channel(s), ${wavInfo.sampleRate}Hz, ${wavInfo.bitsPerSample}-bit`); |
|
console.log(`Total samples: ${wavInfo.totalSamples}, samples per channel: ${wavInfo.samplesPerChannel}`); |
|
|
|
|
|
if (wavInfo.sampleRate !== 16000) { |
|
console.warn(`Warning: Sample rate is ${wavInfo.sampleRate}Hz, expected 16000Hz`); |
|
} |
|
|
|
if (wavInfo.channels !== 1) { |
|
console.warn(`Warning: ${wavInfo.channels} channels detected, only first channel will be used`); |
|
} |
|
|
|
|
|
const audioBuffer = buffer.slice(wavInfo.dataOffset, wavInfo.dataOffset + wavInfo.dataSize); |
|
const inputBuf = new Int16Array(audioBuffer.buffer.slice(audioBuffer.byteOffset)); |
|
|
|
|
|
const sampleNum = wavInfo.channels === 1 ? |
|
wavInfo.samplesPerChannel : |
|
Math.floor(wavInfo.samplesPerChannel); |
|
|
|
const totalAudioTime = sampleNum / wavInfo.sampleRate * 1000; |
|
const frameNum = Math.floor(sampleNum / HOP_SIZE); |
|
|
|
console.log(`Audio info: ${audioBuffer.length} bytes, ${sampleNum} samples, ${frameNum} frames, ${totalAudioTime.toFixed(2)}ms`); |
|
|
|
|
|
let processedInput = inputBuf; |
|
if (wavInfo.channels > 1) { |
|
console.log(`Extracting mono from ${wavInfo.channels} channels...`); |
|
processedInput = new Int16Array(Math.floor(inputBuf.length / wavInfo.channels)); |
|
for (let i = 0; i < processedInput.length; i++) { |
|
processedInput[i] = inputBuf[i * wavInfo.channels]; |
|
} |
|
} |
|
|
|
const outProbs = new Float32Array(frameNum); |
|
const outFlags = new Int32Array(frameNum); |
|
|
|
const processingTime = await processAudio(processedInput, frameNum, outProbs, outFlags); |
|
|
|
if (processingTime > 0) { |
|
printResults(processingTime, totalAudioTime, outFlags, frameNum); |
|
saveResults(outProbs, outFlags, frameNum, outputFile); |
|
} |
|
|
|
return 0; |
|
} catch (error) { |
|
console.error(`Error processing WAV file: ${error.message}`); |
|
return 1; |
|
} |
|
} |
|
|
|
async function runBenchmark() { |
|
console.log("=== Performance Benchmark ===\n"); |
|
|
|
if (!createVADInstance()) return; |
|
|
|
const testData = new Int16Array(HOP_SIZE); |
|
for (let i = 0; i < HOP_SIZE; i++) { |
|
testData[i] = Math.sin(2 * Math.PI * 440 * i / 16000) * 8000; |
|
} |
|
|
|
const testCases = [100, 1000, 10000]; |
|
|
|
for (const numFrames of testCases) { |
|
const audioPtr = vadModule._malloc(HOP_SIZE * 2); |
|
const probPtr = vadModule._malloc(4); |
|
const flagPtr = vadModule._malloc(4); |
|
|
|
vadModule.HEAP16.set(testData, audioPtr / 2); |
|
|
|
const startTime = getTimestamp(); |
|
|
|
for (let i = 0; i < numFrames; i++) { |
|
vadModule._ten_vad_process(vadHandle, audioPtr, HOP_SIZE, probPtr, flagPtr); |
|
} |
|
|
|
const endTime = getTimestamp(); |
|
const totalTime = endTime - startTime; |
|
const avgTime = totalTime / numFrames; |
|
|
|
|
|
|
|
const frameAudioTime = (HOP_SIZE / 16000) * 1000; |
|
const totalAudioTime = numFrames * frameAudioTime; |
|
const rtf = totalTime / totalAudioTime; |
|
|
|
console.log(`${numFrames} frames: ${totalTime}ms total, ${avgTime.toFixed(3)}ms/frame, RTF: ${rtf.toFixed(3)}`); |
|
|
|
vadModule._free(audioPtr); |
|
vadModule._free(probPtr); |
|
vadModule._free(flagPtr); |
|
} |
|
|
|
destroyVADInstance(); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function loadModule() { |
|
try { |
|
console.log("Loading WebAssembly module..."); |
|
|
|
if (!fs.existsSync(WASM_JS_FILE)) { |
|
throw new Error(`ten_vad.js not found at ${WASM_JS_FILE}`); |
|
} |
|
|
|
if (!fs.existsSync(WASM_BINARY_FILE)) { |
|
throw new Error(`ten_vad.wasm not found at ${WASM_BINARY_FILE}`); |
|
} |
|
|
|
|
|
const wasmJsContent = fs.readFileSync(WASM_JS_FILE, 'utf8'); |
|
const modifiedContent = wasmJsContent |
|
.replace(/import\.meta\.url/g, `"${path.resolve(WASM_JS_FILE)}"`) |
|
.replace(/export default createVADModule;/, 'module.exports = createVADModule;'); |
|
|
|
|
|
const tempPath = './ten_vad_temp.js'; |
|
fs.writeFileSync(tempPath, modifiedContent); |
|
|
|
|
|
const wasmBinary = fs.readFileSync(WASM_BINARY_FILE); |
|
|
|
|
|
const createVADModule = require(path.resolve(tempPath)); |
|
vadModule = await createVADModule({ |
|
wasmBinary: wasmBinary, |
|
locateFile: (filePath) => filePath.endsWith('.wasm') ? WASM_BINARY_FILE : filePath, |
|
noInitialRun: false, |
|
noExitRuntime: true |
|
}); |
|
|
|
|
|
fs.unlinkSync(tempPath); |
|
|
|
|
|
addHelperFunctions(); |
|
|
|
console.log(`Module loaded successfully. Version: ${getVADVersion()}\n`); |
|
return true; |
|
|
|
} catch (error) { |
|
console.error(`Failed to load module: ${error.message}`); |
|
return false; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function main() { |
|
const args = process.argv.slice(2); |
|
|
|
|
|
if (!await loadModule()) { |
|
process.exit(1); |
|
} |
|
|
|
try { |
|
if (args.length >= 2) { |
|
|
|
const [inputFile, outputFile] = args; |
|
console.log(`Input: ${inputFile}, Output: ${outputFile}\n`); |
|
await testWithWAV(inputFile, outputFile); |
|
} else { |
|
|
|
await testWithArray(); |
|
} |
|
await runBenchmark(); |
|
return 0; |
|
} catch (error) { |
|
console.error(`Test failed: ${error.message}`); |
|
return 1; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (require.main === module) { |
|
main().then(exitCode => { |
|
process.exit(exitCode); |
|
}).catch(error => { |
|
console.error(`Fatal error: ${error.message}`); |
|
process.exit(1); |
|
}); |
|
} |
|
|
|
module.exports = { main, testWithArray, testWithWAV, runBenchmark }; |