CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_rfft_fast_f32.c Source File

arm_rfft_fast_f32.c

00001 /* ----------------------------------------------------------------------
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
00003 *
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *
00007 * Project:      CMSIS DSP Library
00008 * Title:        arm_rfft_f32.c
00009 *
00010 * Description:  RFFT & RIFFT Floating point process function
00011 *
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *
00014 * Redistribution and use in source and binary forms, with or without
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 
00043 void stage_rfft_f32(
00044   arm_rfft_fast_instance_f32 * S,
00045   float32_t * p, float32_t * pOut)
00046 {
00047    uint32_t  k;                                /* Loop Counter                     */
00048    float32_t twR, twI;                         /* RFFT Twiddle coefficients        */
00049    float32_t * pCoeff = S->pTwiddleRFFT;  /* Points to RFFT Twiddle factors   */
00050    float32_t *pA = p;                          /* increasing pointer               */
00051    float32_t *pB = p;                          /* decreasing pointer               */
00052    float32_t xAR, xAI, xBR, xBI;                /* temporary variables              */
00053    float32_t t1a, t1b;                       /* temporary variables              */
00054    float32_t p0, p1, p2, p3;                   /* temporary variables              */
00055 
00056 
00057    k = (S->Sint).fftLen - 1;                    
00058 
00059    /* Pack first and last sample of the frequency domain together */
00060 
00061    xBR = pB[0];
00062    xBI = pB[1];
00063    xAR = pA[0];
00064    xAI = pA[1];
00065 
00066    twR = *pCoeff++ ;
00067    twI = *pCoeff++ ;
00068    
00069    // U1 = XA(1) + XB(1); % It is real
00070    t1a = xBR + xAR  ;
00071    
00072    // U2 = XB(1) - XA(1); % It is imaginary
00073    t1b = xBI + xAI  ;
00074 
00075    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
00076    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
00077    *pOut++ = 0.5f * ( t1a + t1b );
00078    *pOut++ = 0.5f * ( t1a - t1b );
00079 
00080    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
00081    pB  = p + 2*k;
00082    pA += 2;
00083 
00084    do
00085    {
00086       /*
00087          function X = my_split_rfft(X, ifftFlag)
00088          % X is a series of real numbers
00089          L  = length(X);
00090          XC = X(1:2:end) +i*X(2:2:end);
00091          XA = fft(XC);
00092          XB = conj(XA([1 end:-1:2]));
00093          TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
00094          for l = 2:L/2
00095             XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
00096          end
00097          XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
00098          X = XA;
00099       */
00100 
00101       xBI = pB[1];
00102       xBR = pB[0];
00103       xAR = pA[0];
00104       xAI = pA[1];
00105 
00106       twR = *pCoeff++;
00107       twI = *pCoeff++;
00108 
00109       t1a = xBR - xAR ;
00110       t1b = xBI + xAI ;
00111 
00112       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
00113       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
00114       p0 = twR * t1a;
00115       p1 = twI * t1a;
00116       p2 = twR * t1b;
00117       p3 = twI * t1b;
00118 
00119       *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
00120       *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
00121 
00122       pA += 2;
00123       pB -= 2;
00124       k--;
00125    } while(k > 0u);
00126 }
00127 
00128 /* Prepares data for inverse cfft */
00129 void merge_rfft_f32(
00130 arm_rfft_fast_instance_f32 * S,
00131 float32_t * p, float32_t * pOut)
00132 {
00133    uint32_t  k;                             /* Loop Counter                     */
00134    float32_t twR, twI;                      /* RFFT Twiddle coefficients        */
00135    float32_t *pCoeff = S->pTwiddleRFFT;     /* Points to RFFT Twiddle factors   */
00136    float32_t *pA = p;                       /* increasing pointer               */
00137    float32_t *pB = p;                       /* decreasing pointer               */
00138    float32_t xAR, xAI, xBR, xBI;            /* temporary variables              */
00139    float32_t t1a, t1b, r, s, t, u;          /* temporary variables              */
00140 
00141    k = (S->Sint).fftLen - 1;                    
00142 
00143    xAR = pA[0];
00144    xAI = pA[1];
00145 
00146    pCoeff += 2 ;
00147 
00148    *pOut++ = 0.5f * ( xAR + xAI );
00149    *pOut++ = 0.5f * ( xAR - xAI );
00150 
00151    pB  =  p + 2*k ;
00152    pA +=  2    ;
00153 
00154    while(k > 0u)
00155    {
00156       /* G is half of the frequency complex spectrum */
00157       //for k = 2:N
00158       //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
00159       xBI =   pB[1]    ;
00160       xBR =   pB[0]    ;
00161       xAR =  pA[0];
00162       xAI =  pA[1];
00163 
00164       twR = *pCoeff++;
00165       twI = *pCoeff++;
00166 
00167       t1a = xAR - xBR ;
00168       t1b = xAI + xBI ;
00169 
00170       r = twR * t1a;
00171       s = twI * t1b;
00172       t = twI * t1a;
00173       u = twR * t1b;
00174 
00175       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
00176       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
00177       *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
00178       *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
00179 
00180       pA += 2;
00181       pB -= 2;
00182       k--;
00183    }
00184 
00185 }
00186 
00187 /**
00188 * @ingroup groupTransforms
00189 */
00190 
00191 /**
00192  * @defgroup Fast Real FFT Functions
00193  *
00194  * \par
00195  * The CMSIS DSP library includes specialized algorithms for computing the
00196  * FFT of real data sequences.  The FFT is defined over complex data but
00197  * in many applications the input is real.  Real FFT algorithms take advantage
00198  * of the symmetry properties of the FFT and have a speed advantage over complex
00199  * algorithms of the same length.
00200  * \par
00201  * The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
00202  * \par
00203  * The real length N forward FFT of a sequence is computed using the steps shown below.
00204  * \par
00205  * \image html RFFT.gif "Real Fast Fourier Transform"
00206  * \par
00207  * The real sequence is initially treated as if it were complex to perform a CFFT.
00208  * Later, a processing stage reshapes the data to obtain half of the frequency spectrum
00209  * in complex format. Except the first complex number that contains the two real numbers
00210  * X[0] and X[N/2] all the data is complex. In other words, the first complex sample
00211  * contains two real values packed.
00212  * \par
00213  * The input for the inverse RFFT should keep the same format as the output of the 
00214  * forward RFFT. A first processing stage pre-process the data to later perform an
00215  * inverse CFFT.
00216  * \par    
00217  * \image html RIFFT.gif "Real Inverse Fast Fourier Transform"    
00218  * \par    
00219  * The algorithms for floating-point, Q15, and Q31 data are slightly different
00220  * and we describe each algorithm in turn.
00221  * \par Floating-point
00222  * The main functions are <code>arm_rfft_fast_f32()</code>
00223  * and <code>arm_rfft_fast_init_f32()</code>.  The older functions
00224  * <code>arm_rfft_f32()</code> and <code>arm_rfft_init_f32()</code> have been
00225  * deprecated but are still documented.
00226  * \par
00227  * The FFT of a real N-point sequence has even symmetry in the frequency
00228  * domain.  The second half of the data equals the conjugate of the first half
00229  * flipped in frequency:
00230  * <pre>
00231  *X[0] - real data
00232  *X[1] - complex data
00233  *X[2] - complex data
00234  *... 
00235  *X[fftLen/2-1] - complex data
00236  *X[fftLen/2] - real data
00237  *X[fftLen/2+1] - conjugate of X[fftLen/2-1]
00238  *X[fftLen/2+2] - conjugate of X[fftLen/2-2]
00239  *... 
00240  *X[fftLen-1] - conjugate of X[1]
00241  * </pre>
00242  * Looking at the data, we see that we can uniquely represent the FFT using only
00243  * <pre>
00244  *N/2+1 samples:
00245  *X[0] - real data
00246  *X[1] - complex data
00247  *X[2] - complex data
00248  *... 
00249  *X[fftLen/2-1] - complex data
00250  *X[fftLen/2] - real data
00251  * </pre>
00252  * Looking more closely we see that the first and last samples are real valued.
00253  * They can be packed together and we can thus represent the FFT of an N-point
00254  * real sequence by N/2 complex values:
00255  * <pre>
00256  *X[0],X[N/2] - packed real data: X[0] + jX[N/2]
00257  *X[1] - complex data
00258  *X[2] - complex data
00259  *... 
00260  *X[fftLen/2-1] - complex data
00261  * </pre>
00262  * The real FFT functions pack the frequency domain data in this fashion.  The
00263  * forward transform outputs the data in this form and the inverse transform
00264  * expects input data in this form.  The function always performs the needed
00265  * bitreversal so that the input and output data is always in normal order.  The 
00266  * functions support lengths of [32, 64, 128, ..., 4096] samples.
00267  * \par
00268  * The forward and inverse real FFT functions apply the standard FFT scaling; no
00269  * scaling on the forward transform and 1/fftLen scaling on the inverse
00270  * transform.
00271  * \par Q15 and Q31
00272  * The real algorithms are defined in a similar manner and utilize N/2 complex
00273  * transforms behind the scenes.  In the case of fixed-point data, a radix-4
00274  * complex transform is performed and this limits the allows sequence lengths to
00275  * 128, 512, and 2048 samples.
00276  * \par
00277  * TBD.  We need to document input and output order of data.
00278  * \par
00279  * The complex transforms used internally include scaling to prevent fixed-point
00280  * overflows.  The overall scaling equals 1/(fftLen/2).
00281  * \par
00282  * A separate instance structure must be defined for each transform used but 
00283  * twiddle factor and bit reversal tables can be reused.
00284  * \par
00285  * There is also an associated initialization function for each data type. 
00286  * The initialization function performs the following operations:
00287  * - Sets the values of the internal structure fields.   
00288  * - Initializes twiddle factor table and bit reversal table pointers.
00289  * - Initializes the internal complex FFT data structure.
00290  * \par   
00291  * Use of the initialization function is optional.   
00292  * However, if the initialization function is used, then the instance structure 
00293  * cannot be placed into a const data section. To place an instance structure 
00294  * into a const data section, the instance structure should be manually 
00295  * initialized as follows:
00296  * <pre>
00297  *arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};    
00298  *arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};    
00299  * </pre>
00300  * where <code>fftLenReal</code> is the length of the real transform;
00301  * <code>fftLenBy2</code> length of  the internal complex transform.
00302  * <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
00303  * <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
00304  * output (=1).
00305  * <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
00306  * The value is based on the FFT length;
00307  * <code>pTwiddleAReal</code>points to the A array of twiddle coefficients; 
00308  * <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;    
00309  * <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
00310  * must also be initialized.  Refer to arm_cfft_radix4_f32() for details regarding    
00311  * static initialization of the complex FFT instance structure.    
00312  */
00313 
00314 /**
00315 * @addtogroup RealFFT
00316 * @{
00317 */
00318 
00319 /**
00320 * @brief Processing function for the floating-point real FFT.
00321 * @param[in]  *S              points to an arm_rfft_fast_instance_f32 structure.
00322 * @param[in]  *p              points to the input buffer.
00323 * @param[in]  *pOut           points to an arm_rfft_fast_instance_f32 structure.
00324 * @param[in]  ifftFlag        RFFT if flag is 0, RIFFT if flag is 1
00325 * @return none.
00326 */
00327 
00328 void arm_rfft_fast_f32(
00329 arm_rfft_fast_instance_f32 * S,
00330 float32_t * p, float32_t * pOut,
00331 uint8_t ifftFlag)
00332 {
00333    arm_cfft_instance_f32 * Sint = &(S->Sint);
00334    Sint->fftLen = S->fftLenRFFT / 2;
00335 
00336    /* Calculation of Real FFT */
00337    if(ifftFlag)
00338    {
00339       /*  Real FFT comression */
00340       merge_rfft_f32(S, p, pOut);
00341 
00342       /* Complex radix-4 IFFT process */
00343       arm_cfft_f32( Sint, pOut, ifftFlag, 1);
00344    }
00345    else
00346    {
00347       /* Calculation of RFFT of input */
00348       arm_cfft_f32( Sint, p, ifftFlag, 1);
00349    
00350       /*  Real FFT extraction */
00351       stage_rfft_f32(S, p, pOut);
00352    }
00353 }
00354