/**
  ******************************************************************************
  * @file    signal_processing.c
  * @author  Brian Mazzeo
  * @date    2020
  * @brief   This file provides a set of code for signal processing in 487.
  *          Parts are taken from example code from STMIcroelectronics
  ******************************************************************************
  * @attention
  *          This code was specifically developed for BYU ECEn 487 course 
  *          Introduction to Digital Signal Processing.
  *
  *
  ******************************************************************************
  */ 

#include "mbed.h"
#include "stm32746g_discovery_lcd.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "filter_coefficients.h"
#include "our_filter.h"
#include "windowed.h"
#include "signal_processing.h"


/* ---------------------------------------------------------------------- 
** Defines for signal processing
** ------------------------------------------------------------------- */ 

#define AUDIO_BLOCK_SAMPLES             ((uint32_t)128)         // Number of samples (L and R) in audio block (each samples is 16 bits)
#define CONV_LENGTH                     (WIN_NUM_TAPS + AUDIO_BLOCK_SAMPLES - 1)
#define BUFFER_LENGTH                   (WIN_NUM_TAPS - 1)
#define FFT_BUFFER_LENGTH               2048
#define FFT_LENGTH                      1024
#define PADDED_FILTER_LENGTH            (WIN_NUM_TAPS + 2*(AUDIO_BLOCK_SAMPLES - 1))

/* For Lab Exercise */
#define Lab_Execution_Type              6

float32_t padded_filter[PADDED_FILTER_LENGTH];

float32_t lState[NUM_TAPS + AUDIO_BLOCK_SAMPLES - 1];
float32_t rState[NUM_TAPS + AUDIO_BLOCK_SAMPLES - 1];

float32_t l_buf[BUFFER_LENGTH];
float32_t r_buf[BUFFER_LENGTH];

float32_t l_buf2[FFT_LENGTH];
float32_t r_buf2[FFT_LENGTH];

arm_fir_instance_f32 filter_left;
arm_fir_instance_f32 filter_right;

float32_t fft_buf[FFT_BUFFER_LENGTH];
float32_t ifft_buf[FFT_BUFFER_LENGTH];

float32_t fft_of_filter[FFT_BUFFER_LENGTH];


/* FUNCTION DEFINITIONS BELOW */

/**
  * @brief  Initialize filter structures to be used in loops later
  * @retval None
  */
void initalize_signal_processing(void)
{

  switch (Lab_Execution_Type)
  {
  case 0: // Passthrough case
    break;
  
  case 1: // FIR case (ARM)
  arm_fir_init_f32(&filter_left, NUM_TAPS, (float32_t *)&Filter_coeffs, (float32_t *)&lState, AUDIO_BLOCK_SAMPLES);
  arm_fir_init_f32(&filter_right, NUM_TAPS, (float32_t *)&Filter_coeffs, (float32_t *)&rState, AUDIO_BLOCK_SAMPLES);
    break;
  
  case 2: // FIR case (student)
  arm_fir_init_f32(&filter_left, WIN_NUM_TAPS, (float32_t *)&win_filter_coeffs, (float32_t *)&lState, AUDIO_BLOCK_SAMPLES);
  arm_fir_init_f32(&filter_right, WIN_NUM_TAPS, (float32_t *)&win_filter_coeffs, (float32_t *)&rState, AUDIO_BLOCK_SAMPLES);
    break;
  
  case 3: // conv Overlap-add 
  filter_conv_init();
    break;
    
  case 4: // FFT Overlap-add
    filter_fft_init();
    break;

  case 5: // FFT Overlap-add with real-imag efficiency
    filter_fft_init();
    break;
    
  case 6: // OS FFT RI
    filter_fft_init();
    break;


  }
}

/**
  * @brief  Process audio channel signals
  * @param  L_channel_in: Pointer to Left channel data input (float32_t)
  * @param  R_channel_in: Pointer to Right channel data input (float32_t)
  * @param  L_channel_out: Pointer to Left channel data output (float32_t)
  * @param  R_channel_out: Pointer to Right channel data output (float32_t)
  * @param  Signal_Length: length of data to process
  * @retval None
  */
void process_audio_channel_signals(float32_t* L_channel_in, float32_t* R_channel_in, float32_t* L_channel_out, float32_t* R_channel_out, uint16_t Signal_Length)
{
    char buf[70];    
    BSP_LCD_SetFont(&Font8);
    BSP_LCD_SetTextColor(LCD_COLOR_CYAN);
    sprintf(buf, "Processing Signals" );
    BSP_LCD_DisplayStringAt(0, 200, (uint8_t *) buf, LEFT_MODE);

  switch(Lab_Execution_Type)
  {
  case 0: // Passthrough case
    arm_copy_f32(L_channel_in, L_channel_out, Signal_Length);
    arm_copy_f32(R_channel_in, R_channel_out, Signal_Length);
    break;
  
  case 1: // FIR case (ARM)
  arm_fir_f32(&filter_left, L_channel_in, L_channel_out, Signal_Length);
  arm_fir_f32(&filter_right, R_channel_in, R_channel_out, Signal_Length);
    break;
  
  case 2: // FIR case (student)
  arm_fir_f32(&filter_left, L_channel_in, L_channel_out, Signal_Length);
  arm_fir_f32(&filter_right, R_channel_in, R_channel_out, Signal_Length);
    break;
  
  case 3: // OA CONV
  filter_OA_CONV(l_buf, L_channel_in, L_channel_out, Signal_Length);
  filter_OA_CONV(r_buf, R_channel_in, R_channel_out, Signal_Length);
    break;
    
  case 4: // OA FFT Overlap-add
  filter_OA_FFT(l_buf, fft_buf, ifft_buf, L_channel_in, L_channel_out, Signal_Length);
  filter_OA_FFT(r_buf, fft_buf, ifft_buf, R_channel_in, R_channel_out, Signal_Length);
    break;

  case 5: // FFT Overlap-add with real-imag efficiency
  filter_OA_FFT_RI(l_buf, r_buf, fft_buf, ifft_buf, L_channel_in, R_channel_in, L_channel_out, R_channel_out, Signal_Length);
    break;
    
  case 6: // OS FFT RI
  filter_OS_FFT_RI(l_buf2, r_buf2, fft_buf, ifft_buf, L_channel_in, R_channel_in, L_channel_out, R_channel_out, Signal_Length);
    break;

    
  }
    /* Change font back */  
    BSP_LCD_SetFont(&Font16);
}

//buffer: pointer to the storage buffer for the filter output
//buf_length: the length of the storage buffer (len_filter + len_batch - 1)

void filter_OA_CONV(float32_t* overlap_buffer, float32_t* d_in, float32_t* d_out, uint16_t sig_length)
{
    float32_t result = 0;
    
    //shift the data sample and convolve
    for(uint16_t shift = 0; shift < CONV_LENGTH; shift++)
    {
        result = 0;

        //multiply-add the shifted, reversed data sample to the padded filter
        for(int i=0; i<sig_length; i++)
        {
            result += padded_filter[i + shift] * d_in[sig_length - i - 1];
        }
        
        // overlap-add to the buffer      
        if(shift < sig_length)
        {
            d_out[shift] = overlap_buffer[shift] + result;
        }
        else if(shift < BUFFER_LENGTH)
        {
            overlap_buffer[shift - sig_length] = overlap_buffer[shift] + result;
        }
        else
        {
            overlap_buffer[shift - sig_length] = result;
        }
    }
    return;
}



void filter_OA_FFT(
float32_t* overlap_buffer, 
float32_t* fft_buffer, 
float32_t* ifft_buffer, 
float32_t* d_in, 
float32_t* d_out, 
uint16_t sig_length)
{
    //fill the FFT buffer
    for(int i=0; i < FFT_LENGTH; i++)
    {
        if(i < sig_length)
        {
        fft_buffer[2*i] = d_in[i];
        fft_buffer[2*i+1] = 0;
        }
        else
        {
        fft_buffer[2*i] = 0;
        fft_buffer[2*i+1] = 0;
        }
    }
    
    //perform FFT in place
    arm_cfft_f32(&arm_cfft_sR_f32_len1024, fft_buffer, 0, 1);
    
    
    //multiply with filter FFT
    arm_cmplx_mult_cmplx_f32(fft_buffer, fft_of_filter, ifft_buffer, FFT_LENGTH);

    //perform inverse FFT in place
    arm_cfft_f32(&arm_cfft_sR_f32_len1024, ifft_buffer, 1, 1);
    
    // overlap-add to the buffer
    for(uint16_t i = 0; i < CONV_LENGTH; i++)
    {
        if(i < sig_length)
        {
            d_out[i] = ifft_buffer[2*i] + overlap_buffer[i];
        }
        else if(i < BUFFER_LENGTH)
        {
            overlap_buffer[i - sig_length] = overlap_buffer[i] + ifft_buffer[2*i];
        }
        else
        {
            overlap_buffer[i - sig_length] = ifft_buffer[2*i];
        }
    }    
    return;
}


//The overlap-add method uses previous outputs to adjust the filtered
//output to be more representative of the continuous version of the signal
void filter_OA_FFT_RI(
    float32_t* overlap_buffer1,
    float32_t* overlap_buffer2,
    float32_t* fft_buffer,
    float32_t* ifft_buffer,
    float32_t* d_in1,
    float32_t* d_in2,
    float32_t* d_out1,
    float32_t* d_out2,
    uint16_t sig_length)
{
    //fill the FFT buffer
    for(int i=0; i < FFT_LENGTH; i++)
    {
        if(i < sig_length)
        {
        fft_buffer[2*i] = d_in1[i];
        fft_buffer[2*i+1] = d_in2[i];
        }
        else
        {
        fft_buffer[2*i] = 0;
        fft_buffer[2*i+1] = 0;
        }
    }
    
    //perform FFT in place
    arm_cfft_f32(&arm_cfft_sR_f32_len1024, fft_buffer, 0, 1);
    
    
    //multiply with filter FFT
    arm_cmplx_mult_cmplx_f32(fft_buffer, fft_of_filter, ifft_buffer, FFT_LENGTH);
    
    
    //perform inverse FFT in place
    arm_cfft_f32(&arm_cfft_sR_f32_len1024, ifft_buffer, 1, 1);
    
    // overlap-add to the buffer
    for(uint16_t i = 0; i < CONV_LENGTH; i++)
    {
        if(i < sig_length)
        {
            d_out1[i] = ifft_buffer[2*i] + overlap_buffer1[i];
            d_out2[i] = ifft_buffer[2*i+1] + overlap_buffer2[i];
        }
        else if(i < BUFFER_LENGTH)
        {
            overlap_buffer1[i - sig_length] = overlap_buffer1[i] + ifft_buffer[2*i];
            overlap_buffer2[i - sig_length] = overlap_buffer2[i] + ifft_buffer[2*i+1];
        }
        else
        {
            overlap_buffer1[i - sig_length] = ifft_buffer[2*i];
            overlap_buffer2[i - sig_length] = ifft_buffer[2*i+1];
        }
    }    
    return;
}


//The overlap-save method uses previous inputs to perform a 
//more representative filtering operation
void filter_OS_FFT_RI(
    float32_t* save_buffer1,
    float32_t* save_buffer2,
    float32_t* fft_buffer, 
    float32_t* ifft_buffer,
    float32_t* d_in1, 
    float32_t* d_in2, 
    float32_t* d_out1, 
    float32_t* d_out2, 
    uint16_t sig_length)
{
    //shift the save buffers left by the input data size
    for(int i=0; i < FFT_LENGTH; i++)
    {
        if(i < (FFT_LENGTH - sig_length))
        {
            save_buffer1[i] = save_buffer1[i+sig_length];
            save_buffer2[i] = save_buffer2[i+sig_length];
        }
        else
        {
            save_buffer1[i] = d_in1[i - (FFT_LENGTH - sig_length)];   
            save_buffer2[i] = d_in2[i - (FFT_LENGTH - sig_length)]; 
        }
    }
    
    //fill the FFT buffer
    for(int i=0; i < FFT_LENGTH; i++)
    {
            fft_buffer[2*i] = save_buffer1[i];
            fft_buffer[2*i+1] = save_buffer2[i];
    }
    
    //perform FFT in place
    arm_cfft_f32(&arm_cfft_sR_f32_len1024, fft_buffer, 0, 1);
    
    //multiply with filter FFT
    arm_cmplx_mult_cmplx_f32(fft_buffer, fft_of_filter, ifft_buffer, FFT_LENGTH);
    
    //perform inverse FFT in place
    arm_cfft_f32(&arm_cfft_sR_f32_len1024, ifft_buffer, 1, 1);
    
    // copy to output buffer
    for(uint16_t i = FFT_LENGTH - sig_length; i < FFT_LENGTH; i++)
    {
        d_out1[i - FFT_LENGTH + sig_length] = ifft_buffer[2*i];
        d_out2[i - FFT_LENGTH + sig_length] = ifft_buffer[2*i+1];
    }    
    return;
}






void filter_conv_init()
{
    for(int i=0; i < BUFFER_LENGTH; i++)
    {
        l_buf[i] = 0;
        r_buf[i] = 0;
    }
    for(int i=0; i < PADDED_FILTER_LENGTH; i++)
    {
        if((i < AUDIO_BLOCK_SAMPLES - 1) || (i >= WIN_NUM_TAPS + AUDIO_BLOCK_SAMPLES - 1))
        {
         padded_filter[i] = 0;
        }
        else
        {
         padded_filter[i] = win_filter_coeffs[i - AUDIO_BLOCK_SAMPLES - 1];   
        }
    }
    
    return;
}



void filter_fft_init()
{
    for(int i=0; i < FFT_LENGTH; i++)
    {
        if(i < WIN_NUM_TAPS)
        {
            fft_of_filter[2*i] = win_filter_coeffs[i];
            fft_of_filter[2*i+1] = 0;
        }
        else
        {
            fft_of_filter[2*i] = 0;   
            fft_of_filter[2*i+1] = 0;
        }
    }
    
    arm_cfft_f32(&arm_cfft_sR_f32_len1024, fft_of_filter, 0, 1);
    
    return;
}


