andac icin mfcc

Dependencies:   mbed mbed-dsp

Committer:
magicTurtle866
Date:
Wed Apr 29 19:57:04 2020 +0000
Revision:
0:534d6a8e9c27
olasi mfcc; ;

Who changed what in which revision?

UserRevisionLine numberNew contents of line
magicTurtle866 0:534d6a8e9c27 1 /*
magicTurtle866 0:534d6a8e9c27 2 * Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved.
magicTurtle866 0:534d6a8e9c27 3 *
magicTurtle866 0:534d6a8e9c27 4 * SPDX-License-Identifier: Apache-2.0
magicTurtle866 0:534d6a8e9c27 5 *
magicTurtle866 0:534d6a8e9c27 6 * Licensed under the Apache License, Version 2.0 (the License); you may
magicTurtle866 0:534d6a8e9c27 7 * not use this file except in compliance with the License.
magicTurtle866 0:534d6a8e9c27 8 * You may obtain a copy of the License at
magicTurtle866 0:534d6a8e9c27 9 *
magicTurtle866 0:534d6a8e9c27 10 * www.apache.org/licenses/LICENSE-2.0
magicTurtle866 0:534d6a8e9c27 11 *
magicTurtle866 0:534d6a8e9c27 12 * Unless required by applicable law or agreed to in writing, software
magicTurtle866 0:534d6a8e9c27 13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
magicTurtle866 0:534d6a8e9c27 14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
magicTurtle866 0:534d6a8e9c27 15 * See the License for the specific language governing permissions and
magicTurtle866 0:534d6a8e9c27 16 * limitations under the License.
magicTurtle866 0:534d6a8e9c27 17 */
magicTurtle866 0:534d6a8e9c27 18
magicTurtle866 0:534d6a8e9c27 19 /*
magicTurtle866 0:534d6a8e9c27 20 * Description: MFCC feature extraction to match with TensorFlow MFCC Op
magicTurtle866 0:534d6a8e9c27 21 */
magicTurtle866 0:534d6a8e9c27 22
magicTurtle866 0:534d6a8e9c27 23 #include <string.h>
magicTurtle866 0:534d6a8e9c27 24
magicTurtle866 0:534d6a8e9c27 25 #include "mfcc.h"
magicTurtle866 0:534d6a8e9c27 26 #include "float.h"
magicTurtle866 0:534d6a8e9c27 27
magicTurtle866 0:534d6a8e9c27 28 #define M_PI 3.141592653589
magicTurtle866 0:534d6a8e9c27 29
magicTurtle866 0:534d6a8e9c27 30 MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits)
magicTurtle866 0:534d6a8e9c27 31 :num_mfcc_features(num_mfcc_features),
magicTurtle866 0:534d6a8e9c27 32 frame_len(frame_len),
magicTurtle866 0:534d6a8e9c27 33 mfcc_dec_bits(mfcc_dec_bits)
magicTurtle866 0:534d6a8e9c27 34 {
magicTurtle866 0:534d6a8e9c27 35
magicTurtle866 0:534d6a8e9c27 36 // Round-up to nearest power of 2.
magicTurtle866 0:534d6a8e9c27 37 frame_len_padded = pow(2,ceil((log(static_cast<double>(frame_len))/log(static_cast<double>(2)))));
magicTurtle866 0:534d6a8e9c27 38
magicTurtle866 0:534d6a8e9c27 39 frame = new float[frame_len_padded];
magicTurtle866 0:534d6a8e9c27 40 buffer = new float[frame_len_padded];
magicTurtle866 0:534d6a8e9c27 41 mel_energies = new float[NUM_FBANK_BINS];
magicTurtle866 0:534d6a8e9c27 42
magicTurtle866 0:534d6a8e9c27 43 //create window function
magicTurtle866 0:534d6a8e9c27 44 window_func = new float[frame_len];
magicTurtle866 0:534d6a8e9c27 45 for (int i = 0; i < frame_len; i++)
magicTurtle866 0:534d6a8e9c27 46 window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len));
magicTurtle866 0:534d6a8e9c27 47
magicTurtle866 0:534d6a8e9c27 48 //create mel filterbank
magicTurtle866 0:534d6a8e9c27 49 fbank_filter_first = new int32_t[NUM_FBANK_BINS];
magicTurtle866 0:534d6a8e9c27 50 fbank_filter_last = new int32_t[NUM_FBANK_BINS];;
magicTurtle866 0:534d6a8e9c27 51 mel_fbank = create_mel_fbank();
magicTurtle866 0:534d6a8e9c27 52
magicTurtle866 0:534d6a8e9c27 53 //create DCT matrix
magicTurtle866 0:534d6a8e9c27 54 dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features);
magicTurtle866 0:534d6a8e9c27 55
magicTurtle866 0:534d6a8e9c27 56 //initialize FFT
magicTurtle866 0:534d6a8e9c27 57 rfft = new arm_rfft_fast_instance_f32;
magicTurtle866 0:534d6a8e9c27 58 arm_rfft_fast_init_f32(rfft, frame_len_padded);
magicTurtle866 0:534d6a8e9c27 59
magicTurtle866 0:534d6a8e9c27 60 }
magicTurtle866 0:534d6a8e9c27 61
magicTurtle866 0:534d6a8e9c27 62 MFCC::~MFCC() {
magicTurtle866 0:534d6a8e9c27 63 delete []frame;
magicTurtle866 0:534d6a8e9c27 64 delete [] buffer;
magicTurtle866 0:534d6a8e9c27 65 delete []mel_energies;
magicTurtle866 0:534d6a8e9c27 66 delete []window_func;
magicTurtle866 0:534d6a8e9c27 67 delete []fbank_filter_first;
magicTurtle866 0:534d6a8e9c27 68 delete []fbank_filter_last;
magicTurtle866 0:534d6a8e9c27 69 delete []dct_matrix;
magicTurtle866 0:534d6a8e9c27 70 delete rfft;
magicTurtle866 0:534d6a8e9c27 71 for(int i=0;i<NUM_FBANK_BINS;i++)
magicTurtle866 0:534d6a8e9c27 72 delete mel_fbank[i];
magicTurtle866 0:534d6a8e9c27 73 delete mel_fbank;
magicTurtle866 0:534d6a8e9c27 74 }
magicTurtle866 0:534d6a8e9c27 75
magicTurtle866 0:534d6a8e9c27 76 double round(double number)
magicTurtle866 0:534d6a8e9c27 77 {
magicTurtle866 0:534d6a8e9c27 78 return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5);
magicTurtle866 0:534d6a8e9c27 79 }
magicTurtle866 0:534d6a8e9c27 80
magicTurtle866 0:534d6a8e9c27 81 float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) {
magicTurtle866 0:534d6a8e9c27 82 int32_t k, n;
magicTurtle866 0:534d6a8e9c27 83 float * M = new float[input_length*coefficient_count];
magicTurtle866 0:534d6a8e9c27 84 float normalizer;
magicTurtle866 0:534d6a8e9c27 85 arm_sqrt_f32(2.0/(float)input_length,&normalizer);
magicTurtle866 0:534d6a8e9c27 86 for (k = 0; k < coefficient_count; k++) {
magicTurtle866 0:534d6a8e9c27 87 for (n = 0; n < input_length; n++) {
magicTurtle866 0:534d6a8e9c27 88 M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k );
magicTurtle866 0:534d6a8e9c27 89 }
magicTurtle866 0:534d6a8e9c27 90 }
magicTurtle866 0:534d6a8e9c27 91 return M;
magicTurtle866 0:534d6a8e9c27 92 }
magicTurtle866 0:534d6a8e9c27 93
magicTurtle866 0:534d6a8e9c27 94 float ** MFCC::create_mel_fbank() {
magicTurtle866 0:534d6a8e9c27 95
magicTurtle866 0:534d6a8e9c27 96 int32_t bin, i;
magicTurtle866 0:534d6a8e9c27 97
magicTurtle866 0:534d6a8e9c27 98 int32_t num_fft_bins = frame_len_padded/2;
magicTurtle866 0:534d6a8e9c27 99 float fft_bin_width = ((float)SAMP_FREQ) / frame_len_padded;
magicTurtle866 0:534d6a8e9c27 100 float mel_low_freq = MelScale(MEL_LOW_FREQ);
magicTurtle866 0:534d6a8e9c27 101 float mel_high_freq = MelScale(MEL_HIGH_FREQ);
magicTurtle866 0:534d6a8e9c27 102 float mel_freq_delta = (mel_high_freq - mel_low_freq) / (NUM_FBANK_BINS+1);
magicTurtle866 0:534d6a8e9c27 103
magicTurtle866 0:534d6a8e9c27 104 float *this_bin = new float[num_fft_bins];
magicTurtle866 0:534d6a8e9c27 105
magicTurtle866 0:534d6a8e9c27 106 float ** mel_fbank = new float*[NUM_FBANK_BINS];
magicTurtle866 0:534d6a8e9c27 107
magicTurtle866 0:534d6a8e9c27 108 for (bin = 0; bin < NUM_FBANK_BINS; bin++) {
magicTurtle866 0:534d6a8e9c27 109
magicTurtle866 0:534d6a8e9c27 110 float left_mel = mel_low_freq + bin * mel_freq_delta;
magicTurtle866 0:534d6a8e9c27 111 float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta;
magicTurtle866 0:534d6a8e9c27 112 float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
magicTurtle866 0:534d6a8e9c27 113
magicTurtle866 0:534d6a8e9c27 114 int32_t first_index = -1, last_index = -1;
magicTurtle866 0:534d6a8e9c27 115
magicTurtle866 0:534d6a8e9c27 116 for (i = 0; i < num_fft_bins; i++) {
magicTurtle866 0:534d6a8e9c27 117
magicTurtle866 0:534d6a8e9c27 118 float freq = (fft_bin_width * i); // center freq of this fft bin.
magicTurtle866 0:534d6a8e9c27 119 float mel = MelScale(freq);
magicTurtle866 0:534d6a8e9c27 120 this_bin[i] = 0.0;
magicTurtle866 0:534d6a8e9c27 121
magicTurtle866 0:534d6a8e9c27 122 if (mel > left_mel && mel < right_mel) {
magicTurtle866 0:534d6a8e9c27 123 float weight;
magicTurtle866 0:534d6a8e9c27 124 if (mel <= center_mel) {
magicTurtle866 0:534d6a8e9c27 125 weight = (mel - left_mel) / (center_mel - left_mel);
magicTurtle866 0:534d6a8e9c27 126 } else {
magicTurtle866 0:534d6a8e9c27 127 weight = (right_mel-mel) / (right_mel-center_mel);
magicTurtle866 0:534d6a8e9c27 128 }
magicTurtle866 0:534d6a8e9c27 129 this_bin[i] = weight;
magicTurtle866 0:534d6a8e9c27 130 if (first_index == -1)
magicTurtle866 0:534d6a8e9c27 131 first_index = i;
magicTurtle866 0:534d6a8e9c27 132 last_index = i;
magicTurtle866 0:534d6a8e9c27 133 }
magicTurtle866 0:534d6a8e9c27 134 }
magicTurtle866 0:534d6a8e9c27 135
magicTurtle866 0:534d6a8e9c27 136 fbank_filter_first[bin] = first_index;
magicTurtle866 0:534d6a8e9c27 137 fbank_filter_last[bin] = last_index;
magicTurtle866 0:534d6a8e9c27 138 mel_fbank[bin] = new float[last_index-first_index+1];
magicTurtle866 0:534d6a8e9c27 139
magicTurtle866 0:534d6a8e9c27 140 int32_t j = 0;
magicTurtle866 0:534d6a8e9c27 141 //copy the part we care about
magicTurtle866 0:534d6a8e9c27 142 for (i = first_index; i <= last_index; i++) {
magicTurtle866 0:534d6a8e9c27 143 mel_fbank[bin][j++] = this_bin[i];
magicTurtle866 0:534d6a8e9c27 144 }
magicTurtle866 0:534d6a8e9c27 145 }
magicTurtle866 0:534d6a8e9c27 146 delete []this_bin;
magicTurtle866 0:534d6a8e9c27 147 return mel_fbank;
magicTurtle866 0:534d6a8e9c27 148 }
magicTurtle866 0:534d6a8e9c27 149
magicTurtle866 0:534d6a8e9c27 150 void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) {
magicTurtle866 0:534d6a8e9c27 151
magicTurtle866 0:534d6a8e9c27 152 int32_t i, j, bin;
magicTurtle866 0:534d6a8e9c27 153
magicTurtle866 0:534d6a8e9c27 154 //TensorFlow way of normalizing .wav data to (-1,1)
magicTurtle866 0:534d6a8e9c27 155 for (i = 0; i < frame_len; i++) {
magicTurtle866 0:534d6a8e9c27 156 frame[i] = (float)audio_data[i]/(1<<15);
magicTurtle866 0:534d6a8e9c27 157 }
magicTurtle866 0:534d6a8e9c27 158 //Fill up remaining with zeros
magicTurtle866 0:534d6a8e9c27 159 memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len));
magicTurtle866 0:534d6a8e9c27 160
magicTurtle866 0:534d6a8e9c27 161 for (i = 0; i < frame_len; i++) {
magicTurtle866 0:534d6a8e9c27 162 frame[i] *= window_func[i];
magicTurtle866 0:534d6a8e9c27 163 }
magicTurtle866 0:534d6a8e9c27 164
magicTurtle866 0:534d6a8e9c27 165 //Compute FFT
magicTurtle866 0:534d6a8e9c27 166 arm_rfft_fast_f32(rfft, frame, buffer, 0);
magicTurtle866 0:534d6a8e9c27 167
magicTurtle866 0:534d6a8e9c27 168 //Convert to power spectrum
magicTurtle866 0:534d6a8e9c27 169 //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...]
magicTurtle866 0:534d6a8e9c27 170 int32_t half_dim = frame_len_padded/2;
magicTurtle866 0:534d6a8e9c27 171 float first_energy = buffer[0] * buffer[0],
magicTurtle866 0:534d6a8e9c27 172 last_energy = buffer[1] * buffer[1]; // handle this special case
magicTurtle866 0:534d6a8e9c27 173 for (i = 1; i < half_dim; i++) {
magicTurtle866 0:534d6a8e9c27 174 float real = buffer[i*2], im = buffer[i*2 + 1];
magicTurtle866 0:534d6a8e9c27 175 buffer[i] = real*real + im*im;
magicTurtle866 0:534d6a8e9c27 176 }
magicTurtle866 0:534d6a8e9c27 177 buffer[0] = first_energy;
magicTurtle866 0:534d6a8e9c27 178 buffer[half_dim] = last_energy;
magicTurtle866 0:534d6a8e9c27 179
magicTurtle866 0:534d6a8e9c27 180 float sqrt_data;
magicTurtle866 0:534d6a8e9c27 181 //Apply mel filterbanks
magicTurtle866 0:534d6a8e9c27 182 for (bin = 0; bin < NUM_FBANK_BINS; bin++) {
magicTurtle866 0:534d6a8e9c27 183 j = 0;
magicTurtle866 0:534d6a8e9c27 184 float mel_energy = 0;
magicTurtle866 0:534d6a8e9c27 185 int32_t first_index = fbank_filter_first[bin];
magicTurtle866 0:534d6a8e9c27 186 int32_t last_index = fbank_filter_last[bin];
magicTurtle866 0:534d6a8e9c27 187 for (i = first_index; i <= last_index; i++) {
magicTurtle866 0:534d6a8e9c27 188 arm_sqrt_f32(buffer[i],&sqrt_data);
magicTurtle866 0:534d6a8e9c27 189 mel_energy += (sqrt_data) * mel_fbank[bin][j++];
magicTurtle866 0:534d6a8e9c27 190 }
magicTurtle866 0:534d6a8e9c27 191 mel_energies[bin] = mel_energy;
magicTurtle866 0:534d6a8e9c27 192
magicTurtle866 0:534d6a8e9c27 193 //avoid log of zero
magicTurtle866 0:534d6a8e9c27 194 if (mel_energy == 0.0)
magicTurtle866 0:534d6a8e9c27 195 mel_energies[bin] = FLT_MIN;
magicTurtle866 0:534d6a8e9c27 196 }
magicTurtle866 0:534d6a8e9c27 197
magicTurtle866 0:534d6a8e9c27 198 //Take log
magicTurtle866 0:534d6a8e9c27 199 for (bin = 0; bin < NUM_FBANK_BINS; bin++)
magicTurtle866 0:534d6a8e9c27 200 mel_energies[bin] = logf(mel_energies[bin]);
magicTurtle866 0:534d6a8e9c27 201
magicTurtle866 0:534d6a8e9c27 202 //Take DCT. Uses matrix mul.
magicTurtle866 0:534d6a8e9c27 203 for (i = 0; i < num_mfcc_features; i++) {
magicTurtle866 0:534d6a8e9c27 204 float sum = 0.0;
magicTurtle866 0:534d6a8e9c27 205 for (j = 0; j < NUM_FBANK_BINS; j++) {
magicTurtle866 0:534d6a8e9c27 206 sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j];
magicTurtle866 0:534d6a8e9c27 207 }
magicTurtle866 0:534d6a8e9c27 208
magicTurtle866 0:534d6a8e9c27 209 //Input is Qx.mfcc_dec_bits (from quantization step)
magicTurtle866 0:534d6a8e9c27 210 sum *= (0x1<<mfcc_dec_bits);
magicTurtle866 0:534d6a8e9c27 211 sum = round(sum);
magicTurtle866 0:534d6a8e9c27 212 if(sum >= 127)
magicTurtle866 0:534d6a8e9c27 213 mfcc_out[i] = 127;
magicTurtle866 0:534d6a8e9c27 214 else if(sum <= -128)
magicTurtle866 0:534d6a8e9c27 215 mfcc_out[i] = -128;
magicTurtle866 0:534d6a8e9c27 216 else
magicTurtle866 0:534d6a8e9c27 217 mfcc_out[i] = sum;
magicTurtle866 0:534d6a8e9c27 218 }
magicTurtle866 0:534d6a8e9c27 219
magicTurtle866 0:534d6a8e9c27 220 }
magicTurtle866 0:534d6a8e9c27 221