webrtc抽离vad算法+testdemo_webrtcvad,webrtcvad算法资源-CSDN文库

共67个文件

c：20个

o：19个

h：13个

webrtc

语音交互

5星 · 超过95%的资源需积分: 50 157 浏览量 2018-01-08 17:56:49 上传评论 11 收藏 4.11MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

webrtc_vad_extract.zip （67个子文件）

webrtc_vad_extract

division_operations.c 4KB

downsample_fast.c 2KB

cross_correlation.o 5KB

complex_fft.o 14KB

spl_sqrt.o 6KB

energy.c 1KB

complex_bit_reverse.c 1KB

vad_sp.c 6KB

vad_core.o 35KB

get_hanning_window.o 5KB

vector_scaling_operations.o 12KB

get_hanning_window.c 3KB

get_scaling_square.c 1KB

fft.o 31KB

division_operations.o 7KB

webrtc_vad.o 12KB

cross_correlation.c 8KB

downsample_fast.o 6KB

spl_sqrt_floor.o 5KB

spl_sqrt_floor.c 2KB

spl_sqrt.c 5KB

spl_version.o 5KB

vad_filterbank.c 10KB

complex_fft.c 19KB

spl_version.c 764B

webrtc_vad.c 4KB

vad_core.c 27KB

vad_gmm.c 3KB

Android.mk 2KB

libs

shared_file_no_found~ 119B

libwebrtc_vad_my.so 142KB

shared_file_no_found 119B

min_max_operations.c 7KB

makefile 2KB

vector_scaling_operations.c 4KB

fft.c 26KB

vad_gmm.o 6KB

min_max_operations.o 13KB

test_code

vad_test.c 5KB

speech_noisy.wav 1.1MB

q-0dB-15.wav 879KB

vad_test 13KB

audio_1

Untitled Document~ 1KB

compile_order 70B

speech_noisy_20cm.wav 1.26MB

speech.wav 690KB

q-10dB-15.wav 827KB

compile_order~ 70B

vad_filterbank.o 20KB

get_scaling_square.o 13KB

complex_bit_reverse.o 4KB

energy.o 5KB

include

fft.h 2KB

webrtc_vad.h 5KB

vad_defines.h 3KB

isac.h 25KB

vad_core.h 4KB

signal_processing_library.h 59KB

structs.h 13KB

structs.h~ 12KB

vad_gmm.h 1KB

typedefs.h 5KB

vad_sp.h 2KB

spl_inl.h 4KB

settings.h 8KB

vad_filterbank.h 5KB

vad_sp.o 11KB

/* * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /* * This file includes the implementation of the core functionality in VAD. * For function description, see vad_core.h. */ #include "vad_core.h" #include "signal_processing_library.h" #include "typedefs.h" #include "vad_defines.h" #include "vad_filterbank.h" #include "vad_gmm.h" #include "vad_sp.h" // Spectrum Weighting static const WebRtc_Word16 kSpectrumWeight[6] = { 6, 8, 10, 12, 14, 16 }; static const WebRtc_Word16 kNoiseUpdateConst = 655; // Q15 static const WebRtc_Word16 kSpeechUpdateConst = 6554; // Q15 static const WebRtc_Word16 kBackEta = 154; // Q8 // Minimum difference between the two models, Q5 static const WebRtc_Word16 kMinimumDifference[6] = { 544, 544, 576, 576, 576, 576 }; // Upper limit of mean value for speech model, Q7 static const WebRtc_Word16 kMaximumSpeech[6] = { 11392, 11392, 11520, 11520, 11520, 11520 }; // Minimum value for mean value static const WebRtc_Word16 kMinimumMean[2] = { 640, 768 }; // Upper limit of mean value for noise model, Q7 static const WebRtc_Word16 kMaximumNoise[6] = { 9216, 9088, 8960, 8832, 8704, 8576 }; // Start values for the Gaussian models, Q7 // Weights for the two Gaussians for the six channels (noise) static const WebRtc_Word16 kNoiseDataWeights[12] = { 34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 }; // Weights for the two Gaussians for the six channels (speech) static const WebRtc_Word16 kSpeechDataWeights[12] = { 48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 }; // Means for the two Gaussians for the six channels (noise) static const WebRtc_Word16 kNoiseDataMeans[12] = { 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 }; // Means for the two Gaussians for the six channels (speech) static const WebRtc_Word16 kSpeechDataMeans[12] = { 8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 }; // Stds for the two Gaussians for the six channels (noise) static const WebRtc_Word16 kNoiseDataStds[12] = { 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 }; // Stds for the two Gaussians for the six channels (speech) static const WebRtc_Word16 kSpeechDataStds[12] = { 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 }; static const int kInitCheck = 42; // Initialize VAD int WebRtcVad_InitCore(VadInstT *inst, short mode) { int i; // Initialization of struct inst->vad = 1; inst->frame_counter = 0; inst->over_hang = 0; inst->num_of_speech = 0; // Initialization of downsampling filter state inst->downsampling_filter_states[0] = 0; inst->downsampling_filter_states[1] = 0; inst->downsampling_filter_states[2] = 0; inst->downsampling_filter_states[3] = 0; // Read initial PDF parameters for (i = 0; i < NUM_TABLE_VALUES; i++) { inst->noise_means[i] = kNoiseDataMeans[i]; inst->speech_means[i] = kSpeechDataMeans[i]; inst->noise_stds[i] = kNoiseDataStds[i]; inst->speech_stds[i] = kSpeechDataStds[i]; } // Index and Minimum value vectors are initialized for (i = 0; i < 16 * NUM_CHANNELS; i++) { inst->low_value_vector[i] = 10000; inst->index_vector[i] = 0; } for (i = 0; i < 5; i++) { inst->upper_state[i] = 0; inst->lower_state[i] = 0; } for (i = 0; i < 4; i++) { inst->hp_filter_state[i] = 0; } // Init mean value memory, for FindMin function inst->mean_value[0] = 1600; inst->mean_value[1] = 1600; inst->mean_value[2] = 1600; inst->mean_value[3] = 1600; inst->mean_value[4] = 1600; inst->mean_value[5] = 1600; if (mode == 0) { // Quality mode inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst inst->individual[0] = INDIVIDUAL_10MS_Q; inst->individual[1] = INDIVIDUAL_20MS_Q; inst->individual[2] = INDIVIDUAL_30MS_Q; inst->total[0] = TOTAL_10MS_Q; inst->total[1] = TOTAL_20MS_Q; inst->total[2] = TOTAL_30MS_Q; } else if (mode == 1) { // Low bitrate mode inst->over_hang_max_1[0] = OHMAX1_10MS_LBR; // Overhang short speech burst inst->over_hang_max_1[1] = OHMAX1_20MS_LBR; // Overhang short speech burst inst->over_hang_max_1[2] = OHMAX1_30MS_LBR; // Overhang short speech burst inst->over_hang_max_2[0] = OHMAX2_10MS_LBR; // Overhang long speech burst inst->over_hang_max_2[1] = OHMAX2_20MS_LBR; // Overhang long speech burst inst->over_hang_max_2[2] = OHMAX2_30MS_LBR; // Overhang long speech burst inst->individual[0] = INDIVIDUAL_10MS_LBR; inst->individual[1] = INDIVIDUAL_20MS_LBR; inst->individual[2] = INDIVIDUAL_30MS_LBR; inst->total[0] = TOTAL_10MS_LBR; inst->total[1] = TOTAL_20MS_LBR; inst->total[2] = TOTAL_30MS_LBR; } else if (mode == 2) { // Aggressive mode inst->over_hang_max_1[0] = OHMAX1_10MS_AGG; // Overhang short speech burst inst->over_hang_max_1[1] = OHMAX1_20MS_AGG; // Overhang short speech burst inst->over_hang_max_1[2] = OHMAX1_30MS_AGG; // Overhang short speech burst inst->over_hang_max_2[0] = OHMAX2_10MS_AGG; // Overhang long speech burst inst->over_hang_max_2[1] = OHMAX2_20MS_AGG; // Overhang long speech burst inst->over_hang_max_2[2] = OHMAX2_30MS_AGG; // Overhang long speech burst inst->individual[0] = INDIVIDUAL_10MS_AGG; inst->individual[1] = INDIVIDUAL_20MS_AGG; inst->individual[2] = INDIVIDUAL_30MS_AGG; inst->total[0] = TOTAL_10MS_AGG; inst->total[1] = TOTAL_20MS_AGG; inst->total[2] = TOTAL_30MS_AGG; } else { // Very aggressive mode inst->over_hang_max_1[0] = OHMAX1_10MS_VAG; // Overhang short speech burst inst->over_hang_max_1[1] = OHMAX1_20MS_VAG; // Overhang short speech burst inst->over_hang_max_1[2] = OHMAX1_30MS_VAG; // Overhang short speech burst inst->over_hang_max_2[0] = OHMAX2_10MS_VAG; // Overhang long speech burst inst->over_hang_max_2[1] = OHMAX2_20MS_VAG; // Overhang long speech burst inst->over_hang_max_2[2] = OHMAX2_30MS_VAG; // Overhang long speech burst inst->individual[0] = INDIVIDUAL_10MS_VAG; inst->individual[1] = INDIVIDUAL_20MS_VAG; inst->individual[2] = INDIVIDUAL_30MS_VAG; inst->total[0] = TOTAL_10MS_VAG; inst->total[1] = TOTAL_20MS_VAG; inst->total[2] = TOTAL_30MS_VAG; } inst->init_flag = kInitCheck; return 0; } // Set aggressiveness mode int WebRtcVad_set_mode_core(VadInstT *inst, short mode) { if (mode == 0) { // Quality mode inst->over_hang_max_1[0] = OHMAX1_10MS_Q; // Overhang short speech burst inst->over_hang_max_1[1] = OHMAX1_20MS_Q; // Overhang short speech burst inst->over_hang_max_1[2] = OHMAX1_30MS_Q; // Overhang short speech burst inst->over_hang_max_2[0] = OHMAX2_10MS_Q; // Overhang long speech burst inst->over_hang_max_2[1] = OHMAX2_20MS_Q; // Overhang long speech burst inst->over_hang_max_2[2] = OHMAX2_30MS_Q; // Overhang long speech burst

评论收藏

内容反馈