![](/media/cache/img/default_profile.jpg.50x50_q85.jpg)
sa
mfcc.cpp@0:540cf00460b2, 2020-04-29 (annotated)
- Committer:
- magicTurtle866
- Date:
- Wed Apr 29 19:54:31 2020 +0000
- Revision:
- 0:540cf00460b2
andac bul beni
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
magicTurtle866 | 0:540cf00460b2 | 1 | /* |
magicTurtle866 | 0:540cf00460b2 | 2 | * Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved. |
magicTurtle866 | 0:540cf00460b2 | 3 | * |
magicTurtle866 | 0:540cf00460b2 | 4 | * SPDX-License-Identifier: Apache-2.0 |
magicTurtle866 | 0:540cf00460b2 | 5 | * |
magicTurtle866 | 0:540cf00460b2 | 6 | * Licensed under the Apache License, Version 2.0 (the License); you may |
magicTurtle866 | 0:540cf00460b2 | 7 | * not use this file except in compliance with the License. |
magicTurtle866 | 0:540cf00460b2 | 8 | * You may obtain a copy of the License at |
magicTurtle866 | 0:540cf00460b2 | 9 | * |
magicTurtle866 | 0:540cf00460b2 | 10 | * www.apache.org/licenses/LICENSE-2.0 |
magicTurtle866 | 0:540cf00460b2 | 11 | * |
magicTurtle866 | 0:540cf00460b2 | 12 | * Unless required by applicable law or agreed to in writing, software |
magicTurtle866 | 0:540cf00460b2 | 13 | * distributed under the License is distributed on an AS IS BASIS, WITHOUT |
magicTurtle866 | 0:540cf00460b2 | 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
magicTurtle866 | 0:540cf00460b2 | 15 | * See the License for the specific language governing permissions and |
magicTurtle866 | 0:540cf00460b2 | 16 | * limitations under the License. |
magicTurtle866 | 0:540cf00460b2 | 17 | */ |
magicTurtle866 | 0:540cf00460b2 | 18 | |
magicTurtle866 | 0:540cf00460b2 | 19 | /* |
magicTurtle866 | 0:540cf00460b2 | 20 | * Description: MFCC feature extraction to match with TensorFlow MFCC Op |
magicTurtle866 | 0:540cf00460b2 | 21 | */ |
magicTurtle866 | 0:540cf00460b2 | 22 | |
magicTurtle866 | 0:540cf00460b2 | 23 | #include <string.h> |
magicTurtle866 | 0:540cf00460b2 | 24 | |
magicTurtle866 | 0:540cf00460b2 | 25 | #include "mfcc.h" |
magicTurtle866 | 0:540cf00460b2 | 26 | #include "float.h" |
magicTurtle866 | 0:540cf00460b2 | 27 | |
magicTurtle866 | 0:540cf00460b2 | 28 | #define _USE_MATH_DEFINES |
magicTurtle866 | 0:540cf00460b2 | 29 | #include <math.h> |
magicTurtle866 | 0:540cf00460b2 | 30 | |
magicTurtle866 | 0:540cf00460b2 | 31 | MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) |
magicTurtle866 | 0:540cf00460b2 | 32 | :num_mfcc_features(num_mfcc_features), |
magicTurtle866 | 0:540cf00460b2 | 33 | frame_len(frame_len), |
magicTurtle866 | 0:540cf00460b2 | 34 | mfcc_dec_bits(mfcc_dec_bits) |
magicTurtle866 | 0:540cf00460b2 | 35 | { |
magicTurtle866 | 0:540cf00460b2 | 36 | |
magicTurtle866 | 0:540cf00460b2 | 37 | // Round-up to nearest power of 2. |
magicTurtle866 | 0:540cf00460b2 | 38 | frame_len_padded = pow(2,ceil((log(frame_len)/log(2)))); |
magicTurtle866 | 0:540cf00460b2 | 39 | |
magicTurtle866 | 0:540cf00460b2 | 40 | frame = new float[frame_len_padded]; |
magicTurtle866 | 0:540cf00460b2 | 41 | buffer = new float[frame_len_padded]; |
magicTurtle866 | 0:540cf00460b2 | 42 | mel_energies = new float[NUM_FBANK_BINS]; |
magicTurtle866 | 0:540cf00460b2 | 43 | |
magicTurtle866 | 0:540cf00460b2 | 44 | //create window function |
magicTurtle866 | 0:540cf00460b2 | 45 | window_func = new float[frame_len]; |
magicTurtle866 | 0:540cf00460b2 | 46 | for (int i = 0; i < frame_len; i++) |
magicTurtle866 | 0:540cf00460b2 | 47 | window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len)); |
magicTurtle866 | 0:540cf00460b2 | 48 | |
magicTurtle866 | 0:540cf00460b2 | 49 | //create mel filterbank |
magicTurtle866 | 0:540cf00460b2 | 50 | fbank_filter_first = new int32_t[NUM_FBANK_BINS]; |
magicTurtle866 | 0:540cf00460b2 | 51 | fbank_filter_last = new int32_t[NUM_FBANK_BINS];; |
magicTurtle866 | 0:540cf00460b2 | 52 | mel_fbank = create_mel_fbank(); |
magicTurtle866 | 0:540cf00460b2 | 53 | |
magicTurtle866 | 0:540cf00460b2 | 54 | //create DCT matrix |
magicTurtle866 | 0:540cf00460b2 | 55 | dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features); |
magicTurtle866 | 0:540cf00460b2 | 56 | |
magicTurtle866 | 0:540cf00460b2 | 57 | //initialize FFT |
magicTurtle866 | 0:540cf00460b2 | 58 | rfft = new arm_rfft_fast_instance_f32; |
magicTurtle866 | 0:540cf00460b2 | 59 | arm_rfft_fast_init_f32(rfft, frame_len_padded); |
magicTurtle866 | 0:540cf00460b2 | 60 | |
magicTurtle866 | 0:540cf00460b2 | 61 | } |
magicTurtle866 | 0:540cf00460b2 | 62 | |
magicTurtle866 | 0:540cf00460b2 | 63 | MFCC::~MFCC() { |
magicTurtle866 | 0:540cf00460b2 | 64 | delete []frame; |
magicTurtle866 | 0:540cf00460b2 | 65 | delete [] buffer; |
magicTurtle866 | 0:540cf00460b2 | 66 | delete []mel_energies; |
magicTurtle866 | 0:540cf00460b2 | 67 | delete []window_func; |
magicTurtle866 | 0:540cf00460b2 | 68 | delete []fbank_filter_first; |
magicTurtle866 | 0:540cf00460b2 | 69 | delete []fbank_filter_last; |
magicTurtle866 | 0:540cf00460b2 | 70 | delete []dct_matrix; |
magicTurtle866 | 0:540cf00460b2 | 71 | delete rfft; |
magicTurtle866 | 0:540cf00460b2 | 72 | for(int i=0;i<NUM_FBANK_BINS;i++) |
magicTurtle866 | 0:540cf00460b2 | 73 | delete mel_fbank[i]; |
magicTurtle866 | 0:540cf00460b2 | 74 | delete mel_fbank; |
magicTurtle866 | 0:540cf00460b2 | 75 | } |
magicTurtle866 | 0:540cf00460b2 | 76 | |
magicTurtle866 | 0:540cf00460b2 | 77 | float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) { |
magicTurtle866 | 0:540cf00460b2 | 78 | int32_t k, n; |
magicTurtle866 | 0:540cf00460b2 | 79 | float * M = new float[input_length*coefficient_count]; |
magicTurtle866 | 0:540cf00460b2 | 80 | float normalizer; |
magicTurtle866 | 0:540cf00460b2 | 81 | arm_sqrt_f32(2.0/(float)input_length,&normalizer); |
magicTurtle866 | 0:540cf00460b2 | 82 | for (k = 0; k < coefficient_count; k++) { |
magicTurtle866 | 0:540cf00460b2 | 83 | for (n = 0; n < input_length; n++) { |
magicTurtle866 | 0:540cf00460b2 | 84 | M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k ); |
magicTurtle866 | 0:540cf00460b2 | 85 | } |
magicTurtle866 | 0:540cf00460b2 | 86 | } |
magicTurtle866 | 0:540cf00460b2 | 87 | return M; |
magicTurtle866 | 0:540cf00460b2 | 88 | } |
magicTurtle866 | 0:540cf00460b2 | 89 | |
magicTurtle866 | 0:540cf00460b2 | 90 | float ** MFCC::create_mel_fbank() { |
magicTurtle866 | 0:540cf00460b2 | 91 | |
magicTurtle866 | 0:540cf00460b2 | 92 | int32_t bin, i; |
magicTurtle866 | 0:540cf00460b2 | 93 | |
magicTurtle866 | 0:540cf00460b2 | 94 | int32_t num_fft_bins = frame_len_padded/2; |
magicTurtle866 | 0:540cf00460b2 | 95 | float fft_bin_width = ((float)SAMP_FREQ) / frame_len_padded; |
magicTurtle866 | 0:540cf00460b2 | 96 | float mel_low_freq = MelScale(MEL_LOW_FREQ); |
magicTurtle866 | 0:540cf00460b2 | 97 | float mel_high_freq = MelScale(MEL_HIGH_FREQ); |
magicTurtle866 | 0:540cf00460b2 | 98 | float mel_freq_delta = (mel_high_freq - mel_low_freq) / (NUM_FBANK_BINS+1); |
magicTurtle866 | 0:540cf00460b2 | 99 | |
magicTurtle866 | 0:540cf00460b2 | 100 | float *this_bin = new float[num_fft_bins]; |
magicTurtle866 | 0:540cf00460b2 | 101 | |
magicTurtle866 | 0:540cf00460b2 | 102 | float ** mel_fbank = new float*[NUM_FBANK_BINS]; |
magicTurtle866 | 0:540cf00460b2 | 103 | |
magicTurtle866 | 0:540cf00460b2 | 104 | for (bin = 0; bin < NUM_FBANK_BINS; bin++) { |
magicTurtle866 | 0:540cf00460b2 | 105 | |
magicTurtle866 | 0:540cf00460b2 | 106 | float left_mel = mel_low_freq + bin * mel_freq_delta; |
magicTurtle866 | 0:540cf00460b2 | 107 | float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta; |
magicTurtle866 | 0:540cf00460b2 | 108 | float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta; |
magicTurtle866 | 0:540cf00460b2 | 109 | |
magicTurtle866 | 0:540cf00460b2 | 110 | int32_t first_index = -1, last_index = -1; |
magicTurtle866 | 0:540cf00460b2 | 111 | |
magicTurtle866 | 0:540cf00460b2 | 112 | for (i = 0; i < num_fft_bins; i++) { |
magicTurtle866 | 0:540cf00460b2 | 113 | |
magicTurtle866 | 0:540cf00460b2 | 114 | float freq = (fft_bin_width * i); // center freq of this fft bin. |
magicTurtle866 | 0:540cf00460b2 | 115 | float mel = MelScale(freq); |
magicTurtle866 | 0:540cf00460b2 | 116 | this_bin[i] = 0.0; |
magicTurtle866 | 0:540cf00460b2 | 117 | |
magicTurtle866 | 0:540cf00460b2 | 118 | if (mel > left_mel && mel < right_mel) { |
magicTurtle866 | 0:540cf00460b2 | 119 | float weight; |
magicTurtle866 | 0:540cf00460b2 | 120 | if (mel <= center_mel) { |
magicTurtle866 | 0:540cf00460b2 | 121 | weight = (mel - left_mel) / (center_mel - left_mel); |
magicTurtle866 | 0:540cf00460b2 | 122 | } else { |
magicTurtle866 | 0:540cf00460b2 | 123 | weight = (right_mel-mel) / (right_mel-center_mel); |
magicTurtle866 | 0:540cf00460b2 | 124 | } |
magicTurtle866 | 0:540cf00460b2 | 125 | this_bin[i] = weight; |
magicTurtle866 | 0:540cf00460b2 | 126 | if (first_index == -1) |
magicTurtle866 | 0:540cf00460b2 | 127 | first_index = i; |
magicTurtle866 | 0:540cf00460b2 | 128 | last_index = i; |
magicTurtle866 | 0:540cf00460b2 | 129 | } |
magicTurtle866 | 0:540cf00460b2 | 130 | } |
magicTurtle866 | 0:540cf00460b2 | 131 | |
magicTurtle866 | 0:540cf00460b2 | 132 | fbank_filter_first[bin] = first_index; |
magicTurtle866 | 0:540cf00460b2 | 133 | fbank_filter_last[bin] = last_index; |
magicTurtle866 | 0:540cf00460b2 | 134 | mel_fbank[bin] = new float[last_index-first_index+1]; |
magicTurtle866 | 0:540cf00460b2 | 135 | |
magicTurtle866 | 0:540cf00460b2 | 136 | int32_t j = 0; |
magicTurtle866 | 0:540cf00460b2 | 137 | //copy the part we care about |
magicTurtle866 | 0:540cf00460b2 | 138 | for (i = first_index; i <= last_index; i++) { |
magicTurtle866 | 0:540cf00460b2 | 139 | mel_fbank[bin][j++] = this_bin[i]; |
magicTurtle866 | 0:540cf00460b2 | 140 | } |
magicTurtle866 | 0:540cf00460b2 | 141 | } |
magicTurtle866 | 0:540cf00460b2 | 142 | delete []this_bin; |
magicTurtle866 | 0:540cf00460b2 | 143 | return mel_fbank; |
magicTurtle866 | 0:540cf00460b2 | 144 | } |
magicTurtle866 | 0:540cf00460b2 | 145 | |
magicTurtle866 | 0:540cf00460b2 | 146 | void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) { |
magicTurtle866 | 0:540cf00460b2 | 147 | |
magicTurtle866 | 0:540cf00460b2 | 148 | int32_t i, j, bin; |
magicTurtle866 | 0:540cf00460b2 | 149 | |
magicTurtle866 | 0:540cf00460b2 | 150 | //TensorFlow way of normalizing .wav data to (-1,1) |
magicTurtle866 | 0:540cf00460b2 | 151 | for (i = 0; i < frame_len; i++) { |
magicTurtle866 | 0:540cf00460b2 | 152 | frame[i] = (float)audio_data[i]/(1<<15); |
magicTurtle866 | 0:540cf00460b2 | 153 | } |
magicTurtle866 | 0:540cf00460b2 | 154 | //Fill up remaining with zeros |
magicTurtle866 | 0:540cf00460b2 | 155 | memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len)); |
magicTurtle866 | 0:540cf00460b2 | 156 | |
magicTurtle866 | 0:540cf00460b2 | 157 | for (i = 0; i < frame_len; i++) { |
magicTurtle866 | 0:540cf00460b2 | 158 | frame[i] *= window_func[i]; |
magicTurtle866 | 0:540cf00460b2 | 159 | } |
magicTurtle866 | 0:540cf00460b2 | 160 | |
magicTurtle866 | 0:540cf00460b2 | 161 | //Compute FFT |
magicTurtle866 | 0:540cf00460b2 | 162 | arm_rfft_fast_f32(rfft, frame, buffer, 0); |
magicTurtle866 | 0:540cf00460b2 | 163 | |
magicTurtle866 | 0:540cf00460b2 | 164 | //Convert to power spectrum |
magicTurtle866 | 0:540cf00460b2 | 165 | //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...] |
magicTurtle866 | 0:540cf00460b2 | 166 | int32_t half_dim = frame_len_padded/2; |
magicTurtle866 | 0:540cf00460b2 | 167 | float first_energy = buffer[0] * buffer[0], |
magicTurtle866 | 0:540cf00460b2 | 168 | last_energy = buffer[1] * buffer[1]; // handle this special case |
magicTurtle866 | 0:540cf00460b2 | 169 | for (i = 1; i < half_dim; i++) { |
magicTurtle866 | 0:540cf00460b2 | 170 | float real = buffer[i*2], im = buffer[i*2 + 1]; |
magicTurtle866 | 0:540cf00460b2 | 171 | buffer[i] = real*real + im*im; |
magicTurtle866 | 0:540cf00460b2 | 172 | } |
magicTurtle866 | 0:540cf00460b2 | 173 | buffer[0] = first_energy; |
magicTurtle866 | 0:540cf00460b2 | 174 | buffer[half_dim] = last_energy; |
magicTurtle866 | 0:540cf00460b2 | 175 | |
magicTurtle866 | 0:540cf00460b2 | 176 | float sqrt_data; |
magicTurtle866 | 0:540cf00460b2 | 177 | //Apply mel filterbanks |
magicTurtle866 | 0:540cf00460b2 | 178 | for (bin = 0; bin < NUM_FBANK_BINS; bin++) { |
magicTurtle866 | 0:540cf00460b2 | 179 | j = 0; |
magicTurtle866 | 0:540cf00460b2 | 180 | float mel_energy = 0; |
magicTurtle866 | 0:540cf00460b2 | 181 | int32_t first_index = fbank_filter_first[bin]; |
magicTurtle866 | 0:540cf00460b2 | 182 | int32_t last_index = fbank_filter_last[bin]; |
magicTurtle866 | 0:540cf00460b2 | 183 | for (i = first_index; i <= last_index; i++) { |
magicTurtle866 | 0:540cf00460b2 | 184 | arm_sqrt_f32(buffer[i],&sqrt_data); |
magicTurtle866 | 0:540cf00460b2 | 185 | mel_energy += (sqrt_data) * mel_fbank[bin][j++]; |
magicTurtle866 | 0:540cf00460b2 | 186 | } |
magicTurtle866 | 0:540cf00460b2 | 187 | mel_energies[bin] = mel_energy; |
magicTurtle866 | 0:540cf00460b2 | 188 | |
magicTurtle866 | 0:540cf00460b2 | 189 | //avoid log of zero |
magicTurtle866 | 0:540cf00460b2 | 190 | if (mel_energy == 0.0) |
magicTurtle866 | 0:540cf00460b2 | 191 | mel_energies[bin] = FLT_MIN; |
magicTurtle866 | 0:540cf00460b2 | 192 | } |
magicTurtle866 | 0:540cf00460b2 | 193 | |
magicTurtle866 | 0:540cf00460b2 | 194 | //Take log |
magicTurtle866 | 0:540cf00460b2 | 195 | for (bin = 0; bin < NUM_FBANK_BINS; bin++) |
magicTurtle866 | 0:540cf00460b2 | 196 | mel_energies[bin] = logf(mel_energies[bin]); |
magicTurtle866 | 0:540cf00460b2 | 197 | |
magicTurtle866 | 0:540cf00460b2 | 198 | //Take DCT. Uses matrix mul. |
magicTurtle866 | 0:540cf00460b2 | 199 | for (i = 0; i < num_mfcc_features; i++) { |
magicTurtle866 | 0:540cf00460b2 | 200 | float sum = 0.0; |
magicTurtle866 | 0:540cf00460b2 | 201 | for (j = 0; j < NUM_FBANK_BINS; j++) { |
magicTurtle866 | 0:540cf00460b2 | 202 | sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j]; |
magicTurtle866 | 0:540cf00460b2 | 203 | } |
magicTurtle866 | 0:540cf00460b2 | 204 | |
magicTurtle866 | 0:540cf00460b2 | 205 | //Input is Qx.mfcc_dec_bits (from quantization step) |
magicTurtle866 | 0:540cf00460b2 | 206 | sum *= (0x1<<mfcc_dec_bits); |
magicTurtle866 | 0:540cf00460b2 | 207 | sum = round(sum); |
magicTurtle866 | 0:540cf00460b2 | 208 | if(sum >= 127) |
magicTurtle866 | 0:540cf00460b2 | 209 | mfcc_out[i] = 127; |
magicTurtle866 | 0:540cf00460b2 | 210 | else if(sum <= -128) |
magicTurtle866 | 0:540cf00460b2 | 211 | mfcc_out[i] = -128; |
magicTurtle866 | 0:540cf00460b2 | 212 | else |
magicTurtle866 | 0:540cf00460b2 | 213 | mfcc_out[i] = sum; |
magicTurtle866 | 0:540cf00460b2 | 214 | } |
magicTurtle866 | 0:540cf00460b2 | 215 | |
magicTurtle866 | 0:540cf00460b2 | 216 | } |