can bayar
/
mfcc_test_1
andac icin mfcc
mfcc.cpp@0:534d6a8e9c27, 2020-04-29 (annotated)
- Committer:
- magicTurtle866
- Date:
- Wed Apr 29 19:57:04 2020 +0000
- Revision:
- 0:534d6a8e9c27
olasi mfcc; ;
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
magicTurtle866 | 0:534d6a8e9c27 | 1 | /* |
magicTurtle866 | 0:534d6a8e9c27 | 2 | * Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved. |
magicTurtle866 | 0:534d6a8e9c27 | 3 | * |
magicTurtle866 | 0:534d6a8e9c27 | 4 | * SPDX-License-Identifier: Apache-2.0 |
magicTurtle866 | 0:534d6a8e9c27 | 5 | * |
magicTurtle866 | 0:534d6a8e9c27 | 6 | * Licensed under the Apache License, Version 2.0 (the License); you may |
magicTurtle866 | 0:534d6a8e9c27 | 7 | * not use this file except in compliance with the License. |
magicTurtle866 | 0:534d6a8e9c27 | 8 | * You may obtain a copy of the License at |
magicTurtle866 | 0:534d6a8e9c27 | 9 | * |
magicTurtle866 | 0:534d6a8e9c27 | 10 | * www.apache.org/licenses/LICENSE-2.0 |
magicTurtle866 | 0:534d6a8e9c27 | 11 | * |
magicTurtle866 | 0:534d6a8e9c27 | 12 | * Unless required by applicable law or agreed to in writing, software |
magicTurtle866 | 0:534d6a8e9c27 | 13 | * distributed under the License is distributed on an AS IS BASIS, WITHOUT |
magicTurtle866 | 0:534d6a8e9c27 | 14 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
magicTurtle866 | 0:534d6a8e9c27 | 15 | * See the License for the specific language governing permissions and |
magicTurtle866 | 0:534d6a8e9c27 | 16 | * limitations under the License. |
magicTurtle866 | 0:534d6a8e9c27 | 17 | */ |
magicTurtle866 | 0:534d6a8e9c27 | 18 | |
magicTurtle866 | 0:534d6a8e9c27 | 19 | /* |
magicTurtle866 | 0:534d6a8e9c27 | 20 | * Description: MFCC feature extraction to match with TensorFlow MFCC Op |
magicTurtle866 | 0:534d6a8e9c27 | 21 | */ |
magicTurtle866 | 0:534d6a8e9c27 | 22 | |
magicTurtle866 | 0:534d6a8e9c27 | 23 | #include <string.h> |
magicTurtle866 | 0:534d6a8e9c27 | 24 | |
magicTurtle866 | 0:534d6a8e9c27 | 25 | #include "mfcc.h" |
magicTurtle866 | 0:534d6a8e9c27 | 26 | #include "float.h" |
magicTurtle866 | 0:534d6a8e9c27 | 27 | |
magicTurtle866 | 0:534d6a8e9c27 | 28 | #define M_PI 3.141592653589 |
magicTurtle866 | 0:534d6a8e9c27 | 29 | |
magicTurtle866 | 0:534d6a8e9c27 | 30 | MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) |
magicTurtle866 | 0:534d6a8e9c27 | 31 | :num_mfcc_features(num_mfcc_features), |
magicTurtle866 | 0:534d6a8e9c27 | 32 | frame_len(frame_len), |
magicTurtle866 | 0:534d6a8e9c27 | 33 | mfcc_dec_bits(mfcc_dec_bits) |
magicTurtle866 | 0:534d6a8e9c27 | 34 | { |
magicTurtle866 | 0:534d6a8e9c27 | 35 | |
magicTurtle866 | 0:534d6a8e9c27 | 36 | // Round-up to nearest power of 2. |
magicTurtle866 | 0:534d6a8e9c27 | 37 | frame_len_padded = pow(2,ceil((log(static_cast<double>(frame_len))/log(static_cast<double>(2))))); |
magicTurtle866 | 0:534d6a8e9c27 | 38 | |
magicTurtle866 | 0:534d6a8e9c27 | 39 | frame = new float[frame_len_padded]; |
magicTurtle866 | 0:534d6a8e9c27 | 40 | buffer = new float[frame_len_padded]; |
magicTurtle866 | 0:534d6a8e9c27 | 41 | mel_energies = new float[NUM_FBANK_BINS]; |
magicTurtle866 | 0:534d6a8e9c27 | 42 | |
magicTurtle866 | 0:534d6a8e9c27 | 43 | //create window function |
magicTurtle866 | 0:534d6a8e9c27 | 44 | window_func = new float[frame_len]; |
magicTurtle866 | 0:534d6a8e9c27 | 45 | for (int i = 0; i < frame_len; i++) |
magicTurtle866 | 0:534d6a8e9c27 | 46 | window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len)); |
magicTurtle866 | 0:534d6a8e9c27 | 47 | |
magicTurtle866 | 0:534d6a8e9c27 | 48 | //create mel filterbank |
magicTurtle866 | 0:534d6a8e9c27 | 49 | fbank_filter_first = new int32_t[NUM_FBANK_BINS]; |
magicTurtle866 | 0:534d6a8e9c27 | 50 | fbank_filter_last = new int32_t[NUM_FBANK_BINS];; |
magicTurtle866 | 0:534d6a8e9c27 | 51 | mel_fbank = create_mel_fbank(); |
magicTurtle866 | 0:534d6a8e9c27 | 52 | |
magicTurtle866 | 0:534d6a8e9c27 | 53 | //create DCT matrix |
magicTurtle866 | 0:534d6a8e9c27 | 54 | dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features); |
magicTurtle866 | 0:534d6a8e9c27 | 55 | |
magicTurtle866 | 0:534d6a8e9c27 | 56 | //initialize FFT |
magicTurtle866 | 0:534d6a8e9c27 | 57 | rfft = new arm_rfft_fast_instance_f32; |
magicTurtle866 | 0:534d6a8e9c27 | 58 | arm_rfft_fast_init_f32(rfft, frame_len_padded); |
magicTurtle866 | 0:534d6a8e9c27 | 59 | |
magicTurtle866 | 0:534d6a8e9c27 | 60 | } |
magicTurtle866 | 0:534d6a8e9c27 | 61 | |
magicTurtle866 | 0:534d6a8e9c27 | 62 | MFCC::~MFCC() { |
magicTurtle866 | 0:534d6a8e9c27 | 63 | delete []frame; |
magicTurtle866 | 0:534d6a8e9c27 | 64 | delete [] buffer; |
magicTurtle866 | 0:534d6a8e9c27 | 65 | delete []mel_energies; |
magicTurtle866 | 0:534d6a8e9c27 | 66 | delete []window_func; |
magicTurtle866 | 0:534d6a8e9c27 | 67 | delete []fbank_filter_first; |
magicTurtle866 | 0:534d6a8e9c27 | 68 | delete []fbank_filter_last; |
magicTurtle866 | 0:534d6a8e9c27 | 69 | delete []dct_matrix; |
magicTurtle866 | 0:534d6a8e9c27 | 70 | delete rfft; |
magicTurtle866 | 0:534d6a8e9c27 | 71 | for(int i=0;i<NUM_FBANK_BINS;i++) |
magicTurtle866 | 0:534d6a8e9c27 | 72 | delete mel_fbank[i]; |
magicTurtle866 | 0:534d6a8e9c27 | 73 | delete mel_fbank; |
magicTurtle866 | 0:534d6a8e9c27 | 74 | } |
magicTurtle866 | 0:534d6a8e9c27 | 75 | |
magicTurtle866 | 0:534d6a8e9c27 | 76 | double round(double number) |
magicTurtle866 | 0:534d6a8e9c27 | 77 | { |
magicTurtle866 | 0:534d6a8e9c27 | 78 | return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5); |
magicTurtle866 | 0:534d6a8e9c27 | 79 | } |
magicTurtle866 | 0:534d6a8e9c27 | 80 | |
magicTurtle866 | 0:534d6a8e9c27 | 81 | float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) { |
magicTurtle866 | 0:534d6a8e9c27 | 82 | int32_t k, n; |
magicTurtle866 | 0:534d6a8e9c27 | 83 | float * M = new float[input_length*coefficient_count]; |
magicTurtle866 | 0:534d6a8e9c27 | 84 | float normalizer; |
magicTurtle866 | 0:534d6a8e9c27 | 85 | arm_sqrt_f32(2.0/(float)input_length,&normalizer); |
magicTurtle866 | 0:534d6a8e9c27 | 86 | for (k = 0; k < coefficient_count; k++) { |
magicTurtle866 | 0:534d6a8e9c27 | 87 | for (n = 0; n < input_length; n++) { |
magicTurtle866 | 0:534d6a8e9c27 | 88 | M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k ); |
magicTurtle866 | 0:534d6a8e9c27 | 89 | } |
magicTurtle866 | 0:534d6a8e9c27 | 90 | } |
magicTurtle866 | 0:534d6a8e9c27 | 91 | return M; |
magicTurtle866 | 0:534d6a8e9c27 | 92 | } |
magicTurtle866 | 0:534d6a8e9c27 | 93 | |
magicTurtle866 | 0:534d6a8e9c27 | 94 | float ** MFCC::create_mel_fbank() { |
magicTurtle866 | 0:534d6a8e9c27 | 95 | |
magicTurtle866 | 0:534d6a8e9c27 | 96 | int32_t bin, i; |
magicTurtle866 | 0:534d6a8e9c27 | 97 | |
magicTurtle866 | 0:534d6a8e9c27 | 98 | int32_t num_fft_bins = frame_len_padded/2; |
magicTurtle866 | 0:534d6a8e9c27 | 99 | float fft_bin_width = ((float)SAMP_FREQ) / frame_len_padded; |
magicTurtle866 | 0:534d6a8e9c27 | 100 | float mel_low_freq = MelScale(MEL_LOW_FREQ); |
magicTurtle866 | 0:534d6a8e9c27 | 101 | float mel_high_freq = MelScale(MEL_HIGH_FREQ); |
magicTurtle866 | 0:534d6a8e9c27 | 102 | float mel_freq_delta = (mel_high_freq - mel_low_freq) / (NUM_FBANK_BINS+1); |
magicTurtle866 | 0:534d6a8e9c27 | 103 | |
magicTurtle866 | 0:534d6a8e9c27 | 104 | float *this_bin = new float[num_fft_bins]; |
magicTurtle866 | 0:534d6a8e9c27 | 105 | |
magicTurtle866 | 0:534d6a8e9c27 | 106 | float ** mel_fbank = new float*[NUM_FBANK_BINS]; |
magicTurtle866 | 0:534d6a8e9c27 | 107 | |
magicTurtle866 | 0:534d6a8e9c27 | 108 | for (bin = 0; bin < NUM_FBANK_BINS; bin++) { |
magicTurtle866 | 0:534d6a8e9c27 | 109 | |
magicTurtle866 | 0:534d6a8e9c27 | 110 | float left_mel = mel_low_freq + bin * mel_freq_delta; |
magicTurtle866 | 0:534d6a8e9c27 | 111 | float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta; |
magicTurtle866 | 0:534d6a8e9c27 | 112 | float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta; |
magicTurtle866 | 0:534d6a8e9c27 | 113 | |
magicTurtle866 | 0:534d6a8e9c27 | 114 | int32_t first_index = -1, last_index = -1; |
magicTurtle866 | 0:534d6a8e9c27 | 115 | |
magicTurtle866 | 0:534d6a8e9c27 | 116 | for (i = 0; i < num_fft_bins; i++) { |
magicTurtle866 | 0:534d6a8e9c27 | 117 | |
magicTurtle866 | 0:534d6a8e9c27 | 118 | float freq = (fft_bin_width * i); // center freq of this fft bin. |
magicTurtle866 | 0:534d6a8e9c27 | 119 | float mel = MelScale(freq); |
magicTurtle866 | 0:534d6a8e9c27 | 120 | this_bin[i] = 0.0; |
magicTurtle866 | 0:534d6a8e9c27 | 121 | |
magicTurtle866 | 0:534d6a8e9c27 | 122 | if (mel > left_mel && mel < right_mel) { |
magicTurtle866 | 0:534d6a8e9c27 | 123 | float weight; |
magicTurtle866 | 0:534d6a8e9c27 | 124 | if (mel <= center_mel) { |
magicTurtle866 | 0:534d6a8e9c27 | 125 | weight = (mel - left_mel) / (center_mel - left_mel); |
magicTurtle866 | 0:534d6a8e9c27 | 126 | } else { |
magicTurtle866 | 0:534d6a8e9c27 | 127 | weight = (right_mel-mel) / (right_mel-center_mel); |
magicTurtle866 | 0:534d6a8e9c27 | 128 | } |
magicTurtle866 | 0:534d6a8e9c27 | 129 | this_bin[i] = weight; |
magicTurtle866 | 0:534d6a8e9c27 | 130 | if (first_index == -1) |
magicTurtle866 | 0:534d6a8e9c27 | 131 | first_index = i; |
magicTurtle866 | 0:534d6a8e9c27 | 132 | last_index = i; |
magicTurtle866 | 0:534d6a8e9c27 | 133 | } |
magicTurtle866 | 0:534d6a8e9c27 | 134 | } |
magicTurtle866 | 0:534d6a8e9c27 | 135 | |
magicTurtle866 | 0:534d6a8e9c27 | 136 | fbank_filter_first[bin] = first_index; |
magicTurtle866 | 0:534d6a8e9c27 | 137 | fbank_filter_last[bin] = last_index; |
magicTurtle866 | 0:534d6a8e9c27 | 138 | mel_fbank[bin] = new float[last_index-first_index+1]; |
magicTurtle866 | 0:534d6a8e9c27 | 139 | |
magicTurtle866 | 0:534d6a8e9c27 | 140 | int32_t j = 0; |
magicTurtle866 | 0:534d6a8e9c27 | 141 | //copy the part we care about |
magicTurtle866 | 0:534d6a8e9c27 | 142 | for (i = first_index; i <= last_index; i++) { |
magicTurtle866 | 0:534d6a8e9c27 | 143 | mel_fbank[bin][j++] = this_bin[i]; |
magicTurtle866 | 0:534d6a8e9c27 | 144 | } |
magicTurtle866 | 0:534d6a8e9c27 | 145 | } |
magicTurtle866 | 0:534d6a8e9c27 | 146 | delete []this_bin; |
magicTurtle866 | 0:534d6a8e9c27 | 147 | return mel_fbank; |
magicTurtle866 | 0:534d6a8e9c27 | 148 | } |
magicTurtle866 | 0:534d6a8e9c27 | 149 | |
magicTurtle866 | 0:534d6a8e9c27 | 150 | void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) { |
magicTurtle866 | 0:534d6a8e9c27 | 151 | |
magicTurtle866 | 0:534d6a8e9c27 | 152 | int32_t i, j, bin; |
magicTurtle866 | 0:534d6a8e9c27 | 153 | |
magicTurtle866 | 0:534d6a8e9c27 | 154 | //TensorFlow way of normalizing .wav data to (-1,1) |
magicTurtle866 | 0:534d6a8e9c27 | 155 | for (i = 0; i < frame_len; i++) { |
magicTurtle866 | 0:534d6a8e9c27 | 156 | frame[i] = (float)audio_data[i]/(1<<15); |
magicTurtle866 | 0:534d6a8e9c27 | 157 | } |
magicTurtle866 | 0:534d6a8e9c27 | 158 | //Fill up remaining with zeros |
magicTurtle866 | 0:534d6a8e9c27 | 159 | memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len)); |
magicTurtle866 | 0:534d6a8e9c27 | 160 | |
magicTurtle866 | 0:534d6a8e9c27 | 161 | for (i = 0; i < frame_len; i++) { |
magicTurtle866 | 0:534d6a8e9c27 | 162 | frame[i] *= window_func[i]; |
magicTurtle866 | 0:534d6a8e9c27 | 163 | } |
magicTurtle866 | 0:534d6a8e9c27 | 164 | |
magicTurtle866 | 0:534d6a8e9c27 | 165 | //Compute FFT |
magicTurtle866 | 0:534d6a8e9c27 | 166 | arm_rfft_fast_f32(rfft, frame, buffer, 0); |
magicTurtle866 | 0:534d6a8e9c27 | 167 | |
magicTurtle866 | 0:534d6a8e9c27 | 168 | //Convert to power spectrum |
magicTurtle866 | 0:534d6a8e9c27 | 169 | //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...] |
magicTurtle866 | 0:534d6a8e9c27 | 170 | int32_t half_dim = frame_len_padded/2; |
magicTurtle866 | 0:534d6a8e9c27 | 171 | float first_energy = buffer[0] * buffer[0], |
magicTurtle866 | 0:534d6a8e9c27 | 172 | last_energy = buffer[1] * buffer[1]; // handle this special case |
magicTurtle866 | 0:534d6a8e9c27 | 173 | for (i = 1; i < half_dim; i++) { |
magicTurtle866 | 0:534d6a8e9c27 | 174 | float real = buffer[i*2], im = buffer[i*2 + 1]; |
magicTurtle866 | 0:534d6a8e9c27 | 175 | buffer[i] = real*real + im*im; |
magicTurtle866 | 0:534d6a8e9c27 | 176 | } |
magicTurtle866 | 0:534d6a8e9c27 | 177 | buffer[0] = first_energy; |
magicTurtle866 | 0:534d6a8e9c27 | 178 | buffer[half_dim] = last_energy; |
magicTurtle866 | 0:534d6a8e9c27 | 179 | |
magicTurtle866 | 0:534d6a8e9c27 | 180 | float sqrt_data; |
magicTurtle866 | 0:534d6a8e9c27 | 181 | //Apply mel filterbanks |
magicTurtle866 | 0:534d6a8e9c27 | 182 | for (bin = 0; bin < NUM_FBANK_BINS; bin++) { |
magicTurtle866 | 0:534d6a8e9c27 | 183 | j = 0; |
magicTurtle866 | 0:534d6a8e9c27 | 184 | float mel_energy = 0; |
magicTurtle866 | 0:534d6a8e9c27 | 185 | int32_t first_index = fbank_filter_first[bin]; |
magicTurtle866 | 0:534d6a8e9c27 | 186 | int32_t last_index = fbank_filter_last[bin]; |
magicTurtle866 | 0:534d6a8e9c27 | 187 | for (i = first_index; i <= last_index; i++) { |
magicTurtle866 | 0:534d6a8e9c27 | 188 | arm_sqrt_f32(buffer[i],&sqrt_data); |
magicTurtle866 | 0:534d6a8e9c27 | 189 | mel_energy += (sqrt_data) * mel_fbank[bin][j++]; |
magicTurtle866 | 0:534d6a8e9c27 | 190 | } |
magicTurtle866 | 0:534d6a8e9c27 | 191 | mel_energies[bin] = mel_energy; |
magicTurtle866 | 0:534d6a8e9c27 | 192 | |
magicTurtle866 | 0:534d6a8e9c27 | 193 | //avoid log of zero |
magicTurtle866 | 0:534d6a8e9c27 | 194 | if (mel_energy == 0.0) |
magicTurtle866 | 0:534d6a8e9c27 | 195 | mel_energies[bin] = FLT_MIN; |
magicTurtle866 | 0:534d6a8e9c27 | 196 | } |
magicTurtle866 | 0:534d6a8e9c27 | 197 | |
magicTurtle866 | 0:534d6a8e9c27 | 198 | //Take log |
magicTurtle866 | 0:534d6a8e9c27 | 199 | for (bin = 0; bin < NUM_FBANK_BINS; bin++) |
magicTurtle866 | 0:534d6a8e9c27 | 200 | mel_energies[bin] = logf(mel_energies[bin]); |
magicTurtle866 | 0:534d6a8e9c27 | 201 | |
magicTurtle866 | 0:534d6a8e9c27 | 202 | //Take DCT. Uses matrix mul. |
magicTurtle866 | 0:534d6a8e9c27 | 203 | for (i = 0; i < num_mfcc_features; i++) { |
magicTurtle866 | 0:534d6a8e9c27 | 204 | float sum = 0.0; |
magicTurtle866 | 0:534d6a8e9c27 | 205 | for (j = 0; j < NUM_FBANK_BINS; j++) { |
magicTurtle866 | 0:534d6a8e9c27 | 206 | sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j]; |
magicTurtle866 | 0:534d6a8e9c27 | 207 | } |
magicTurtle866 | 0:534d6a8e9c27 | 208 | |
magicTurtle866 | 0:534d6a8e9c27 | 209 | //Input is Qx.mfcc_dec_bits (from quantization step) |
magicTurtle866 | 0:534d6a8e9c27 | 210 | sum *= (0x1<<mfcc_dec_bits); |
magicTurtle866 | 0:534d6a8e9c27 | 211 | sum = round(sum); |
magicTurtle866 | 0:534d6a8e9c27 | 212 | if(sum >= 127) |
magicTurtle866 | 0:534d6a8e9c27 | 213 | mfcc_out[i] = 127; |
magicTurtle866 | 0:534d6a8e9c27 | 214 | else if(sum <= -128) |
magicTurtle866 | 0:534d6a8e9c27 | 215 | mfcc_out[i] = -128; |
magicTurtle866 | 0:534d6a8e9c27 | 216 | else |
magicTurtle866 | 0:534d6a8e9c27 | 217 | mfcc_out[i] = sum; |
magicTurtle866 | 0:534d6a8e9c27 | 218 | } |
magicTurtle866 | 0:534d6a8e9c27 | 219 | |
magicTurtle866 | 0:534d6a8e9c27 | 220 | } |
magicTurtle866 | 0:534d6a8e9c27 | 221 |