can bayar
/
can_mfcc
sa
Embed:
(wiki syntax)
Show/hide line numbers
mfcc.cpp
00001 /* 00002 * Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved. 00003 * 00004 * SPDX-License-Identifier: Apache-2.0 00005 * 00006 * Licensed under the Apache License, Version 2.0 (the License); you may 00007 * not use this file except in compliance with the License. 00008 * You may obtain a copy of the License at 00009 * 00010 * www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, software 00013 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 * See the License for the specific language governing permissions and 00016 * limitations under the License. 00017 */ 00018 00019 /* 00020 * Description: MFCC feature extraction to match with TensorFlow MFCC Op 00021 */ 00022 00023 #include <string.h> 00024 00025 #include "mfcc.h" 00026 #include "float.h" 00027 00028 #define _USE_MATH_DEFINES 00029 #include <math.h> 00030 00031 MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) 00032 :num_mfcc_features(num_mfcc_features), 00033 frame_len(frame_len), 00034 mfcc_dec_bits(mfcc_dec_bits) 00035 { 00036 00037 // Round-up to nearest power of 2. 00038 frame_len_padded = pow(2,ceil((log(frame_len)/log(2)))); 00039 00040 frame = new float[frame_len_padded]; 00041 buffer = new float[frame_len_padded]; 00042 mel_energies = new float[NUM_FBANK_BINS]; 00043 00044 //create window function 00045 window_func = new float[frame_len]; 00046 for (int i = 0; i < frame_len; i++) 00047 window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len)); 00048 00049 //create mel filterbank 00050 fbank_filter_first = new int32_t[NUM_FBANK_BINS]; 00051 fbank_filter_last = new int32_t[NUM_FBANK_BINS];; 00052 mel_fbank = create_mel_fbank(); 00053 00054 //create DCT matrix 00055 dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features); 00056 00057 //initialize FFT 00058 rfft = new arm_rfft_fast_instance_f32; 00059 arm_rfft_fast_init_f32(rfft, frame_len_padded); 00060 00061 } 00062 00063 MFCC::~MFCC() { 00064 delete []frame; 00065 delete [] buffer; 00066 delete []mel_energies; 00067 delete []window_func; 00068 delete []fbank_filter_first; 00069 delete []fbank_filter_last; 00070 delete []dct_matrix; 00071 delete rfft; 00072 for(int i=0;i<NUM_FBANK_BINS;i++) 00073 delete mel_fbank[i]; 00074 delete mel_fbank; 00075 } 00076 00077 float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) { 00078 int32_t k, n; 00079 float * M = new float[input_length*coefficient_count]; 00080 float normalizer; 00081 arm_sqrt_f32(2.0/(float)input_length,&normalizer); 00082 for (k = 0; k < coefficient_count; k++) { 00083 for (n = 0; n < input_length; n++) { 00084 M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k ); 00085 } 00086 } 00087 return M; 00088 } 00089 00090 float ** MFCC::create_mel_fbank() { 00091 00092 int32_t bin, i; 00093 00094 int32_t num_fft_bins = frame_len_padded/2; 00095 float fft_bin_width = ((float)SAMP_FREQ) / frame_len_padded; 00096 float mel_low_freq = MelScale(MEL_LOW_FREQ); 00097 float mel_high_freq = MelScale(MEL_HIGH_FREQ); 00098 float mel_freq_delta = (mel_high_freq - mel_low_freq) / (NUM_FBANK_BINS+1); 00099 00100 float *this_bin = new float[num_fft_bins]; 00101 00102 float ** mel_fbank = new float*[NUM_FBANK_BINS]; 00103 00104 for (bin = 0; bin < NUM_FBANK_BINS; bin++) { 00105 00106 float left_mel = mel_low_freq + bin * mel_freq_delta; 00107 float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta; 00108 float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta; 00109 00110 int32_t first_index = -1, last_index = -1; 00111 00112 for (i = 0; i < num_fft_bins; i++) { 00113 00114 float freq = (fft_bin_width * i); // center freq of this fft bin. 00115 float mel = MelScale(freq); 00116 this_bin[i] = 0.0; 00117 00118 if (mel > left_mel && mel < right_mel) { 00119 float weight; 00120 if (mel <= center_mel) { 00121 weight = (mel - left_mel) / (center_mel - left_mel); 00122 } else { 00123 weight = (right_mel-mel) / (right_mel-center_mel); 00124 } 00125 this_bin[i] = weight; 00126 if (first_index == -1) 00127 first_index = i; 00128 last_index = i; 00129 } 00130 } 00131 00132 fbank_filter_first[bin] = first_index; 00133 fbank_filter_last[bin] = last_index; 00134 mel_fbank[bin] = new float[last_index-first_index+1]; 00135 00136 int32_t j = 0; 00137 //copy the part we care about 00138 for (i = first_index; i <= last_index; i++) { 00139 mel_fbank[bin][j++] = this_bin[i]; 00140 } 00141 } 00142 delete []this_bin; 00143 return mel_fbank; 00144 } 00145 00146 void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) { 00147 00148 int32_t i, j, bin; 00149 00150 //TensorFlow way of normalizing .wav data to (-1,1) 00151 for (i = 0; i < frame_len; i++) { 00152 frame[i] = (float)audio_data[i]/(1<<15); 00153 } 00154 //Fill up remaining with zeros 00155 memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len)); 00156 00157 for (i = 0; i < frame_len; i++) { 00158 frame[i] *= window_func[i]; 00159 } 00160 00161 //Compute FFT 00162 arm_rfft_fast_f32(rfft, frame, buffer, 0); 00163 00164 //Convert to power spectrum 00165 //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...] 00166 int32_t half_dim = frame_len_padded/2; 00167 float first_energy = buffer[0] * buffer[0], 00168 last_energy = buffer[1] * buffer[1]; // handle this special case 00169 for (i = 1; i < half_dim; i++) { 00170 float real = buffer[i*2], im = buffer[i*2 + 1]; 00171 buffer[i] = real*real + im*im; 00172 } 00173 buffer[0] = first_energy; 00174 buffer[half_dim] = last_energy; 00175 00176 float sqrt_data; 00177 //Apply mel filterbanks 00178 for (bin = 0; bin < NUM_FBANK_BINS; bin++) { 00179 j = 0; 00180 float mel_energy = 0; 00181 int32_t first_index = fbank_filter_first[bin]; 00182 int32_t last_index = fbank_filter_last[bin]; 00183 for (i = first_index; i <= last_index; i++) { 00184 arm_sqrt_f32(buffer[i],&sqrt_data); 00185 mel_energy += (sqrt_data) * mel_fbank[bin][j++]; 00186 } 00187 mel_energies[bin] = mel_energy; 00188 00189 //avoid log of zero 00190 if (mel_energy == 0.0) 00191 mel_energies[bin] = FLT_MIN; 00192 } 00193 00194 //Take log 00195 for (bin = 0; bin < NUM_FBANK_BINS; bin++) 00196 mel_energies[bin] = logf(mel_energies[bin]); 00197 00198 //Take DCT. Uses matrix mul. 00199 for (i = 0; i < num_mfcc_features; i++) { 00200 float sum = 0.0; 00201 for (j = 0; j < NUM_FBANK_BINS; j++) { 00202 sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j]; 00203 } 00204 00205 //Input is Qx.mfcc_dec_bits (from quantization step) 00206 sum *= (0x1<<mfcc_dec_bits); 00207 sum = round(sum); 00208 if(sum >= 127) 00209 mfcc_out[i] = 127; 00210 else if(sum <= -128) 00211 mfcc_out[i] = -128; 00212 else 00213 mfcc_out[i] = sum; 00214 } 00215 00216 }
Generated on Sat Aug 13 2022 06:12:26 by 1.7.2