can bayar
/
can_mfcc
sa
Diff: mfcc.cpp
- Revision:
- 0:540cf00460b2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mfcc.cpp Wed Apr 29 19:54:31 2020 +0000 @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Description: MFCC feature extraction to match with TensorFlow MFCC Op + */ + +#include <string.h> + +#include "mfcc.h" +#include "float.h" + +#define _USE_MATH_DEFINES +#include <math.h> + +MFCC::MFCC(int num_mfcc_features, int frame_len, int mfcc_dec_bits) +:num_mfcc_features(num_mfcc_features), + frame_len(frame_len), + mfcc_dec_bits(mfcc_dec_bits) +{ + + // Round-up to nearest power of 2. + frame_len_padded = pow(2,ceil((log(frame_len)/log(2)))); + + frame = new float[frame_len_padded]; + buffer = new float[frame_len_padded]; + mel_energies = new float[NUM_FBANK_BINS]; + + //create window function + window_func = new float[frame_len]; + for (int i = 0; i < frame_len; i++) + window_func[i] = 0.5 - 0.5*cos(M_2PI * ((float)i) / (frame_len)); + + //create mel filterbank + fbank_filter_first = new int32_t[NUM_FBANK_BINS]; + fbank_filter_last = new int32_t[NUM_FBANK_BINS];; + mel_fbank = create_mel_fbank(); + + //create DCT matrix + dct_matrix = create_dct_matrix(NUM_FBANK_BINS, num_mfcc_features); + + //initialize FFT + rfft = new arm_rfft_fast_instance_f32; + arm_rfft_fast_init_f32(rfft, frame_len_padded); + +} + +MFCC::~MFCC() { + delete []frame; + delete [] buffer; + delete []mel_energies; + delete []window_func; + delete []fbank_filter_first; + delete []fbank_filter_last; + delete []dct_matrix; + delete rfft; + for(int i=0;i<NUM_FBANK_BINS;i++) + delete mel_fbank[i]; + delete mel_fbank; +} + +float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) { + int32_t k, n; + float * M = new float[input_length*coefficient_count]; + float normalizer; + arm_sqrt_f32(2.0/(float)input_length,&normalizer); + for (k = 0; k < coefficient_count; k++) { + for (n = 0; n < input_length; n++) { + M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k ); + } + } + return M; +} + +float ** MFCC::create_mel_fbank() { + + int32_t bin, i; + + int32_t num_fft_bins = frame_len_padded/2; + float fft_bin_width = ((float)SAMP_FREQ) / frame_len_padded; + float mel_low_freq = MelScale(MEL_LOW_FREQ); + float mel_high_freq = MelScale(MEL_HIGH_FREQ); + float mel_freq_delta = (mel_high_freq - mel_low_freq) / (NUM_FBANK_BINS+1); + + float *this_bin = new float[num_fft_bins]; + + float ** mel_fbank = new float*[NUM_FBANK_BINS]; + + for (bin = 0; bin < NUM_FBANK_BINS; bin++) { + + float left_mel = mel_low_freq + bin * mel_freq_delta; + float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta; + float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta; + + int32_t first_index = -1, last_index = -1; + + for (i = 0; i < num_fft_bins; i++) { + + float freq = (fft_bin_width * i); // center freq of this fft bin. + float mel = MelScale(freq); + this_bin[i] = 0.0; + + if (mel > left_mel && mel < right_mel) { + float weight; + if (mel <= center_mel) { + weight = (mel - left_mel) / (center_mel - left_mel); + } else { + weight = (right_mel-mel) / (right_mel-center_mel); + } + this_bin[i] = weight; + if (first_index == -1) + first_index = i; + last_index = i; + } + } + + fbank_filter_first[bin] = first_index; + fbank_filter_last[bin] = last_index; + mel_fbank[bin] = new float[last_index-first_index+1]; + + int32_t j = 0; + //copy the part we care about + for (i = first_index; i <= last_index; i++) { + mel_fbank[bin][j++] = this_bin[i]; + } + } + delete []this_bin; + return mel_fbank; +} + +void MFCC::mfcc_compute(const int16_t * audio_data, q7_t* mfcc_out) { + + int32_t i, j, bin; + + //TensorFlow way of normalizing .wav data to (-1,1) + for (i = 0; i < frame_len; i++) { + frame[i] = (float)audio_data[i]/(1<<15); + } + //Fill up remaining with zeros + memset(&frame[frame_len], 0, sizeof(float) * (frame_len_padded-frame_len)); + + for (i = 0; i < frame_len; i++) { + frame[i] *= window_func[i]; + } + + //Compute FFT + arm_rfft_fast_f32(rfft, frame, buffer, 0); + + //Convert to power spectrum + //frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...] + int32_t half_dim = frame_len_padded/2; + float first_energy = buffer[0] * buffer[0], + last_energy = buffer[1] * buffer[1]; // handle this special case + for (i = 1; i < half_dim; i++) { + float real = buffer[i*2], im = buffer[i*2 + 1]; + buffer[i] = real*real + im*im; + } + buffer[0] = first_energy; + buffer[half_dim] = last_energy; + + float sqrt_data; + //Apply mel filterbanks + for (bin = 0; bin < NUM_FBANK_BINS; bin++) { + j = 0; + float mel_energy = 0; + int32_t first_index = fbank_filter_first[bin]; + int32_t last_index = fbank_filter_last[bin]; + for (i = first_index; i <= last_index; i++) { + arm_sqrt_f32(buffer[i],&sqrt_data); + mel_energy += (sqrt_data) * mel_fbank[bin][j++]; + } + mel_energies[bin] = mel_energy; + + //avoid log of zero + if (mel_energy == 0.0) + mel_energies[bin] = FLT_MIN; + } + + //Take log + for (bin = 0; bin < NUM_FBANK_BINS; bin++) + mel_energies[bin] = logf(mel_energies[bin]); + + //Take DCT. Uses matrix mul. + for (i = 0; i < num_mfcc_features; i++) { + float sum = 0.0; + for (j = 0; j < NUM_FBANK_BINS; j++) { + sum += dct_matrix[i*NUM_FBANK_BINS+j] * mel_energies[j]; + } + + //Input is Qx.mfcc_dec_bits (from quantization step) + sum *= (0x1<<mfcc_dec_bits); + sum = round(sum); + if(sum >= 127) + mfcc_out[i] = 127; + else if(sum <= -128) + mfcc_out[i] = -128; + else + mfcc_out[i] = sum; + } + +} \ No newline at end of file