sa

Dependencies:   mbed-dsp

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers mfcc.cpp Source File

mfcc.cpp

00001 /*
00002  * Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved.
00003  *
00004  * SPDX-License-Identifier: Apache-2.0
00005  *
00006  * Licensed under the Apache License, Version 2.0 (the License); you may
00007  * not use this file except in compliance with the License.
00008  * You may obtain a copy of the License at
00009  *
00010  * www.apache.org/licenses/LICENSE-2.0
00011  *
00012  * Unless required by applicable law or agreed to in writing, software
00013  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00014  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  * See the License for the specific language governing permissions and
00016  * limitations under the License.
00017  */
00018 
00019 /*
00020  * Description: MFCC feature extraction to match with TensorFlow MFCC Op
00021  */
00022 
00023 #include <string.h>
00024 
00025 #include "mfcc.h"
00026 #include "float.h"
00027 
00028 #define _USE_MATH_DEFINES
00029 #include <math.h>
00030 
00031 MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) 
00032 :num_mfcc_features(num_mfcc_features), 
00033  frame_len(frame_len), 
00034  mfcc_dec_bits(mfcc_dec_bits)
00035 {
00036 
00037   // Round-up to nearest power of 2.
00038   frame_len_padded = pow(2,ceil((log(frame_len)/log(2))));
00039 
00040   frame = new float[frame_len_padded];
00041   buffer = new float[frame_len_padded];
00042   mel_energies = new float[NUM_FBANK_BINS];
00043 
00044   //create window function
00045   window_func = new float[frame_len];
00046   for (int i = 0; i < frame_len; i++)
00047     window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len));
00048 
00049   //create mel filterbank
00050   fbank_filter_first = new int32_t[NUM_FBANK_BINS];
00051   fbank_filter_last = new int32_t[NUM_FBANK_BINS];;
00052   mel_fbank = create_mel_fbank();
00053   
00054   //create DCT matrix
00055   dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features);
00056 
00057   //initialize FFT
00058   rfft = new arm_rfft_fast_instance_f32;
00059   arm_rfft_fast_init_f32(rfft, frame_len_padded);
00060 
00061 }
00062 
00063 MFCC::~MFCC() {
00064   delete []frame;
00065   delete [] buffer;
00066   delete []mel_energies;
00067   delete []window_func;
00068   delete []fbank_filter_first;
00069   delete []fbank_filter_last;
00070   delete []dct_matrix;
00071   delete rfft;
00072   for(int i=0;i<NUM_FBANK_BINS;i++)
00073     delete mel_fbank[i];
00074   delete mel_fbank;
00075 }
00076 
00077 float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) {
00078   int32_t k, n;
00079   float * M = new float[input_length*coefficient_count];
00080   float normalizer;
00081   arm_sqrt_f32(2.0/(float)input_length,&normalizer);
00082   for (k = 0; k < coefficient_count; k++) {
00083     for (n = 0; n < input_length; n++) {
00084       M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k );
00085     }
00086   }
00087   return M;
00088 }
00089 
00090 float ** MFCC::create_mel_fbank() {
00091 
00092   int32_t bin, i;
00093 
00094   int32_t num_fft_bins = frame_len_padded/2;
00095   float fft_bin_width = ((float)SAMP_FREQ) / frame_len_padded;
00096   float mel_low_freq = MelScale(MEL_LOW_FREQ);
00097   float mel_high_freq = MelScale(MEL_HIGH_FREQ); 
00098   float mel_freq_delta = (mel_high_freq - mel_low_freq) / (NUM_FBANK_BINS+1);
00099 
00100   float *this_bin = new float[num_fft_bins];
00101 
00102   float ** mel_fbank =  new float*[NUM_FBANK_BINS];
00103 
00104   for (bin = 0; bin < NUM_FBANK_BINS; bin++) {
00105 
00106     float left_mel = mel_low_freq + bin * mel_freq_delta;
00107     float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta;
00108     float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
00109 
00110     int32_t first_index = -1, last_index = -1;
00111 
00112     for (i = 0; i < num_fft_bins; i++) {
00113 
00114       float freq = (fft_bin_width * i);  // center freq of this fft bin.
00115       float mel = MelScale(freq);
00116       this_bin[i] = 0.0;
00117 
00118       if (mel > left_mel && mel < right_mel) {
00119         float weight;
00120         if (mel <= center_mel) {
00121           weight = (mel - left_mel) / (center_mel - left_mel);
00122         } else {
00123           weight = (right_mel-mel) / (right_mel-center_mel);
00124         }
00125         this_bin[i] = weight;
00126         if (first_index == -1)
00127           first_index = i;
00128         last_index = i;
00129       }
00130     }
00131 
00132     fbank_filter_first[bin] = first_index;
00133     fbank_filter_last[bin] = last_index;
00134     mel_fbank[bin] = new float[last_index-first_index+1]; 
00135 
00136     int32_t j = 0;
00137     //copy the part we care about
00138     for (i = first_index; i <= last_index; i++) {
00139       mel_fbank[bin][j++] = this_bin[i];
00140     }
00141   }
00142   delete []this_bin;
00143   return mel_fbank;
00144 }
00145 
00146 void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) {
00147 
00148   int32_t i, j, bin;
00149 
00150   //TensorFlow way of normalizing .wav data to (-1,1)
00151   for (i = 0; i < frame_len; i++) {
00152     frame[i] = (float)audio_data[i]/(1<<15); 
00153   }
00154   //Fill up remaining with zeros
00155   memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len));
00156 
00157   for (i = 0; i < frame_len; i++) {
00158     frame[i] *= window_func[i];
00159   }
00160 
00161   //Compute FFT
00162   arm_rfft_fast_f32(rfft, frame, buffer, 0);
00163 
00164   //Convert to power spectrum
00165   //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...]
00166   int32_t half_dim = frame_len_padded/2;
00167   float first_energy = buffer[0] * buffer[0],
00168         last_energy =  buffer[1] * buffer[1];  // handle this special case
00169   for (i = 1; i < half_dim; i++) {
00170     float real = buffer[i*2], im = buffer[i*2 + 1];
00171     buffer[i] = real*real + im*im;
00172   }
00173   buffer[0] = first_energy;
00174   buffer[half_dim] = last_energy;  
00175  
00176   float sqrt_data;
00177   //Apply mel filterbanks
00178   for (bin = 0; bin < NUM_FBANK_BINS; bin++) {
00179     j = 0;
00180     float mel_energy = 0;
00181     int32_t first_index = fbank_filter_first[bin];
00182     int32_t last_index = fbank_filter_last[bin];
00183     for (i = first_index; i <= last_index; i++) {
00184       arm_sqrt_f32(buffer[i],&sqrt_data);
00185       mel_energy += (sqrt_data) * mel_fbank[bin][j++];
00186     }
00187     mel_energies[bin] = mel_energy;
00188 
00189     //avoid log of zero
00190     if (mel_energy == 0.0)
00191       mel_energies[bin] = FLT_MIN;
00192   }
00193 
00194   //Take log
00195   for (bin = 0; bin < NUM_FBANK_BINS; bin++)
00196     mel_energies[bin] = logf(mel_energies[bin]);
00197 
00198   //Take DCT. Uses matrix mul.
00199   for (i = 0; i < num_mfcc_features; i++) {
00200     float sum = 0.0;
00201     for (j = 0; j < NUM_FBANK_BINS; j++) {
00202       sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j];
00203     }
00204 
00205     //Input is Qx.mfcc_dec_bits (from quantization step)
00206     sum *= (0x1<<mfcc_dec_bits);
00207     sum = round(sum);
00208     if(sum >= 127)
00209       mfcc_out[i] = 127;
00210     else if(sum <= -128)
00211       mfcc_out[i] = -128;
00212     else
00213       mfcc_out[i] = sum; 
00214   }
00215 
00216 }