|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef __AED_ST_H__
|
|
#define __AED_ST_H__
|
|
|
|
#include <stdio.h>
|
|
#include <onnxruntime_c_api.h>
|
|
|
|
#include "aed.h"
|
|
|
|
#define AUP_AED_FS (16000)
|
|
#define AUP_AED_MAX_IN_BUFF_SIZE (256)
|
|
#define AUP_AED_POWER_SPCTR_NORMALIZER (9.3132e-10f)
|
|
#define AUP_AED_OUTPUT_SMOOTH_FILTER_LEN (10)
|
|
|
|
#define AUP_AED_MEL_FILTER_BANK_NUM (40)
|
|
#define AUP_AED_LOOKAHEAD_NFRM (1)
|
|
#define AUP_AED_CONTEXT_WINDOW_LEN (3)
|
|
#define AUP_AED_FEA_LEN \
|
|
(AUP_AED_MEL_FILTER_BANK_NUM + 1)
|
|
|
|
#define AUP_AED_PITCH_EST_USE_LPC (1)
|
|
#define AUP_AED_PITCH_EST_PROCFS (4000)
|
|
#if AUP_AED_PITCH_EST_PROCFS == 2000
|
|
#define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.45f)
|
|
#else
|
|
#define AUP_AED_PITCH_EST_DEFAULT_VOICEDTHR (0.4f)
|
|
#endif
|
|
|
|
#define AUP_AED_MODEL_IO_NUM (5)
|
|
#define AUP_AED_MODEL_NAME_LENGTH (32)
|
|
#define AUP_AED_MODEL_HIDDEN_DIM (64)
|
|
|
|
class AUP_MODULE_AIVAD {
|
|
public:
|
|
AUP_MODULE_AIVAD(char* onnx_path);
|
|
~AUP_MODULE_AIVAD();
|
|
int Process(float* input, float* output);
|
|
int Reset();
|
|
|
|
private:
|
|
const OrtApi* ort_api = NULL;
|
|
OrtAllocator* ort_allocator = NULL;
|
|
OrtEnv* ort_env = NULL;
|
|
OrtSession* ort_session = NULL;
|
|
int inited = 0;
|
|
int clear_hidden = 0;
|
|
|
|
char input_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
|
|
const char* input_names[AUP_AED_MODEL_IO_NUM] = {NULL};
|
|
float input_data_buf_0[AUP_AED_CONTEXT_WINDOW_LEN * AUP_AED_FEA_LEN] = {0};
|
|
float input_data_buf_1234[AUP_AED_MODEL_IO_NUM - 1]
|
|
[AUP_AED_MODEL_HIDDEN_DIM] = {0};
|
|
OrtValue* ort_input_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
|
|
|
|
char output_names_buf[AUP_AED_MODEL_IO_NUM][AUP_AED_MODEL_NAME_LENGTH] = {0};
|
|
const char* output_names[AUP_AED_MODEL_IO_NUM] = {NULL};
|
|
OrtValue* ort_output_tensors[AUP_AED_MODEL_IO_NUM] = {NULL};
|
|
};
|
|
|
|
typedef struct Aed_St_ {
|
|
void* dynamMemPtr;
|
|
size_t dynamMemSize;
|
|
|
|
Aed_StaticCfg stCfg;
|
|
|
|
Aed_DynamCfg dynamCfg;
|
|
|
|
|
|
size_t extFftSz;
|
|
size_t extHopSz;
|
|
size_t extNBins;
|
|
size_t extWinSz;
|
|
|
|
size_t intFftSz;
|
|
size_t intHopSz;
|
|
size_t intWinSz;
|
|
size_t intNBins;
|
|
const float* intAnalyWindowPtr;
|
|
int intAnalyFlag;
|
|
|
|
|
|
|
|
size_t inputTimeFIFOLen;
|
|
|
|
|
|
|
|
size_t feaSz;
|
|
size_t melFbSz;
|
|
size_t algDelay;
|
|
size_t algCtxtSz;
|
|
size_t frmRmsBufLen;
|
|
|
|
|
|
size_t aivadResetFrmNum;
|
|
float voiceDecideThresh;
|
|
|
|
|
|
AUP_MODULE_AIVAD* aivadInf;
|
|
|
|
void* pitchEstStPtr;
|
|
void* timeInAnalysis;
|
|
|
|
|
|
|
|
int aedProcFrmCnt;
|
|
int inputTimeFIFOIdx;
|
|
float* inputTimeFIFO;
|
|
|
|
float* inputEmphTimeFIFO;
|
|
float* aivadInputCmplxSptrm;
|
|
float* aivadInputBinPow;
|
|
size_t aivadResetCnt;
|
|
float timeSignalPre;
|
|
float aivadScore;
|
|
float aivadScorePre;
|
|
|
|
float pitchFreq;
|
|
float* frameRmsBuff;
|
|
|
|
float* aivadInputFeatStack;
|
|
|
|
float* melFilterBankCoef;
|
|
size_t* melFilterBinBuff;
|
|
float* inputFloatBuff;
|
|
} Aed_St;
|
|
|
|
#endif
|
|
|