CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Legacy Warning

This is an mbed 2 library. To learn more about mbed OS 5, visit the docs.

Revision:
3:7a284390b0ce
Parent:
2:da51fb522205
Child:
5:3762170b6d4d
--- a/cmsis_dsp/TransformFunctions/arm_cfft_radix2_f32.c	Thu May 30 17:10:11 2013 +0100
+++ b/cmsis_dsp/TransformFunctions/arm_cfft_radix2_f32.c	Fri Nov 08 13:45:10 2013 +0000
@@ -1,8 +1,8 @@
 /* ----------------------------------------------------------------------   
-* Copyright (C) 2010 ARM Limited. All rights reserved.   
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.   
 *   
-* $Date:        15. February 2012  
-* $Revision: 	V1.1.0  
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
 *   
 * Project: 	    CMSIS DSP Library   
 * Title:	    arm_cfft_radix2_f32.c   
@@ -12,169 +12,103 @@
 *   
 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
 *  
-* Version 1.1.0 2012/02/15 
-*    Updated with more optimizations, bug fixes and minor API changes.  
-*   
-* Version 1.0.3 2010/11/29  
-*    Initial version   
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.   
 * -------------------------------------------------------------------- */
 
 #include "arm_math.h"
 
+void arm_radix2_butterfly_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pCoef,
+  uint16_t twidCoefModifier);
+
+void arm_radix2_butterfly_inverse_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pCoef,
+  uint16_t twidCoefModifier,
+  float32_t onebyfftLen);
+
+extern void arm_bitreversal_f32(
+    float32_t * pSrc,
+    uint16_t fftSize,
+    uint16_t bitRevFactor,
+    uint16_t * pBitRevTab);
+
 /**   
- * @ingroup groupTransforms   
- */
+* @ingroup groupTransforms   
+*/
+
+/**   
+* @addtogroup ComplexFFT   
+* @{   
+*/
 
 /**   
- * @defgroup Radix2_CFFT_CIFFT Radix-2 Complex FFT Functions   
- *   
- * \par   
- * Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).   
- * Computational complexity of CFFT reduces drastically when compared to DFT.   
- * \par   
- * This set of functions implements CFFT/CIFFT   
- * for Q15, Q31, and floating-point data types.  The functions operates on in-place buffer which uses same buffer for input and output.   
- * Complex input is stored in input buffer in an interleaved fashion.   
- *   
- * \par   
- * The functions operate on blocks of input and output data and each call to the function processes   
- * <code>2*fftLen</code> samples through the transform.  <code>pSrc</code>  points to In-place arrays containing <code>2*fftLen</code> values.   
- * \par  
- * The <code>pSrc</code> points to the array of in-place buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.   
- * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>   
- *   
- * \par Lengths supported by the transform:  
- * \par   
- * Internally, the function utilize a radix-2 decimation in frequency(DIF) algorithm   
- * and the size of the FFT supported are of the lengths [16, 32, 64, 128, 256, 512, 1024, 2048, 4096].  
- *    
- *   
- * \par Algorithm:   
- *   
- * <b>Complex Fast Fourier Transform:</b>   
- * \par    
- * Input real and imaginary data:   
- * <pre>   
- * x(n) = xa + j * ya   
- * x(n+N/2 ) = xb + j * yb   
- * </pre>   
- * where N is length of FFT   
- * \par   
- * Output real and imaginary data:   
- * <pre>   
- * X(2r) = xa'+ j * ya'   
- * X(2r+1) = xb'+ j * yb'   
- * </pre>   
- * \par   
- * Twiddle factors for radix-2 FFT:   
- * <pre>   
- * Wn = cosVal + j * (- sinVal)   
- * </pre>   
- *   
- * \par   
- * \image html CFFT_Radix2.gif "Radix-2 Decimation-in Frequency Complex Fast Fourier Transform"   
- *   
- * \par   
- * Output from Radix-2 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.   
- * \par   
- * <b> Butterfly CFFT equations:</b>   
- * <pre>   
- * xa' = xa + xb  
- * ya' = ya + yb  
- * xb' = (xa-xb)* cosVal + (ya-yb) * sinVal   
- * yb' = (ya-yb)* cosVal - (xa-xb) * sinVal   
- * </pre>   
- *   
- *   
- * <b>Complex Inverse Fast Fourier Transform:</b>   
- * \par   
- * CIFFT uses same twiddle factor table as CFFT with modifications in the design equation as shown below.   
- *   
- * \par   
- * <b> Modified Butterfly CIFFT equations:</b>   
- * <pre>   
- * xa' = xa + xb  
- * ya' = ya + yb  
- * xb' = (xa-xb)* cosVal - (ya-yb) * sinVal   
- * yb' = (ya-yb)* cosVal + (xa-xb) * sinVal   
- * </pre>   
- *   
- * \par Instance Structure   
- * A separate instance structure must be defined for each Instance but the twiddle factors and bit reversal tables can be reused.   
- * There are separate instance structure declarations for each of the 3 supported data types.   
- *   
- * \par Initialization Functions   
- * There is also an associated initialization function for each data type.   
- * The initialization function performs the following operations:   
- * - Sets the values of the internal structure fields.   
- * - Initializes twiddle factor table and bit reversal table pointers   
- * \par   
- * Use of the initialization function is optional.   
- * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.   
- * To place an instance structure into a const data section, the instance structure must be manually initialized.   
- * Manually initialize the instance structure as follows:   
- * <pre>   
- *arm_cfft_radix2_instance_f32 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor, onebyfftLen};   
- *arm_cfft_radix2_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};   
- *arm_cfft_radix2_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};   
- * </pre>   
- * \par   
- * where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for selection of CFFT or CIFFT(Set ifftFlag to calculate CIFFT otherwise calculates CFFT);   
- * <code>bitReverseFlag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);    
- * <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.   
- * <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;    
- * <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.   
- * <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;   
- *  
- * \par Fixed-Point Behavior   
- * Care must be taken when using the fixed-point versions of the CFFT/CIFFT function.   
- * Refer to the function specific documentation below for usage guidelines.   
- */
-
-
-/**   
- * @addtogroup Radix2_CFFT_CIFFT   
- * @{   
- */
-
-/**   
- * @details   
- * @brief Processing function for the floating-point Radix-2 CFFT/CIFFT.  
- * @param[in]      *S    points to an instance of the floating-point Radix-2 CFFT/CIFFT structure.  
- * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.  
- * @return none.  
- */
+* @details
+* @brief Radix-2 CFFT/CIFFT.
+* @deprecated Do not use this function.  It has been superceded by \ref arm_cfft_f32 and will be removed
+* in the future.
+* @param[in]      *S    points to an instance of the floating-point Radix-2 CFFT/CIFFT structure.  
+* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.  
+* @return none.
+*/
 
 void arm_cfft_radix2_f32(
-  const arm_cfft_radix2_instance_f32 * S,
-  float32_t * pSrc)
+const arm_cfft_radix2_instance_f32 * S,
+float32_t * pSrc)
 {
 
-  if(S->ifftFlag == 1u)
-  {
-    /*  Complex IFFT radix-2  */
-    arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
-                                     S->twidCoefModifier, S->onebyfftLen);
-  }
-  else
-  {
-    /*  Complex FFT radix-2  */
-    arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
-                             S->twidCoefModifier);
-  }
+   if(S->ifftFlag == 1u)
+   {
+      /*  Complex IFFT radix-2  */
+      arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
+      S->twidCoefModifier, S->onebyfftLen);
+   }
+   else
+   {
+      /*  Complex FFT radix-2  */
+      arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
+      S->twidCoefModifier);
+   }
 
-  if(S->bitReverseFlag == 1u)
-  {
-    /*  Bit Reversal */
-    arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
-  }
+   if(S->bitReverseFlag == 1u)
+   {
+      /*  Bit Reversal */
+      arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+   }
 
 }
 
 
 /**    
- * @} end of Radix2_CFFT_CIFFT group    
- */
+* @} end of ComplexFFT group    
+*/
 
 
 
@@ -183,329 +117,369 @@
 ** ------------------------------------------------------------------- */
 
 /*    
- * @brief  Core function for the floating-point CFFT butterfly process.   
- * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.   
- * @param[in]      fftLen           length of the FFT.   
- * @param[in]      *pCoef           points to the twiddle coefficient buffer.   
- * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
- * @return none.   
- */
+* @brief  Core function for the floating-point CFFT butterfly process.   
+* @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.   
+* @param[in]      fftLen           length of the FFT.   
+* @param[in]      *pCoef           points to the twiddle coefficient buffer.   
+* @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+* @return none.   
+*/
 
 void arm_radix2_butterfly_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pCoef,
-  uint16_t twidCoefModifier)
+float32_t * pSrc,
+uint32_t fftLen,
+float32_t * pCoef,
+uint16_t twidCoefModifier)
 {
 
-  int i, j, k, l;
-  int n1, n2, ia;
-  float32_t xt, yt, cosVal, sinVal;
-
-#ifndef ARM_MATH_CM0
-
-  /*  Initializations for the first stage */
-  n2 = fftLen;
-
-  n1 = n2;
-  n2 = n2 >> 1;
-  ia = 0;
+   uint32_t i, j, k, l;
+   uint32_t n1, n2, ia;
+   float32_t xt, yt, cosVal, sinVal;
+   float32_t p0, p1, p2, p3;
+   float32_t a0, a1;
 
-  // loop for groups 
-  for (i = 0; i < n2; i++)
-  {
-    cosVal = pCoef[ia * 2];
-    sinVal = pCoef[(ia * 2) + 1];
-
-    /*  Twiddle coefficients index modifier */
-    ia = ia + twidCoefModifier;
-
-    /*  index calculation for the input as, */
-    /*  pSrc[i + 0], pSrc[i + fftLen/1] */
-    l = i + n2;
+#ifndef ARM_MATH_CM0_FAMILY
 
-    /*  Butterfly implementation */
-    xt = pSrc[2 * i] - pSrc[2 * l];
-    pSrc[2 * i] = pSrc[2 * i] + pSrc[2 * l];
-
-    yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-    pSrc[2 * i + 1] = pSrc[2 * l + 1] + pSrc[2 * i + 1];
-
-    pSrc[2u * l] = xt * cosVal + yt * sinVal;
-
-    pSrc[2u * l + 1u] = yt * cosVal - xt * sinVal;
-
-  }                             // groups loop end 
+   /*  Initializations for the first stage */
+   n2 = fftLen >> 1;
+   ia = 0;
+   i = 0;
 
-  twidCoefModifier = twidCoefModifier << 1u;
-
-  // loop for stage 
-  for (k = fftLen / 2; k > 2; k = k >> 1)
-  {
-    n1 = n2;
-    n2 = n2 >> 1;
-    ia = 0;
-
-    // loop for groups 
-    for (j = 0; j < n2; j++)
-    {
+   // loop for groups 
+   for (k = n2; k > 0; k--)
+   {
       cosVal = pCoef[ia * 2];
       sinVal = pCoef[(ia * 2) + 1];
-      ia = ia + twidCoefModifier;
+
+      /*  Twiddle coefficients index modifier */
+      ia += twidCoefModifier;
 
-      // loop for butterfly 
-      for (i = j; i < fftLen; i += n1)
-      {
-        l = i + n2;
-        xt = pSrc[2 * i] - pSrc[2 * l];
-        pSrc[2 * i] = pSrc[2 * i] + pSrc[2 * l];
+      /*  index calculation for the input as, */
+      /*  pSrc[i + 0], pSrc[i + fftLen/1] */
+      l = i + n2;
+
+      /*  Butterfly implementation */
+      a0 = pSrc[2 * i] + pSrc[2 * l];
+      xt = pSrc[2 * i] - pSrc[2 * l];
 
-        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-        pSrc[2 * i + 1] = pSrc[2 * l + 1] + pSrc[2 * i + 1];
-
-        pSrc[2u * l] = xt * cosVal + yt * sinVal;
-
-        pSrc[2u * l + 1u] = yt * cosVal - xt * sinVal;
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      
+      p0 = xt * cosVal;
+      p1 = yt * sinVal;
+      p2 = yt * cosVal;
+      p3 = xt * sinVal;  
+      
+      pSrc[2 * i]     = a0;   
+      pSrc[2 * i + 1] = a1;       
+      
+      pSrc[2 * l]     = p0 + p1;
+      pSrc[2 * l + 1] = p2 - p3;
+      
+      i++;
+   }                             // groups loop end 
 
-      }                         // butterfly loop end 
+   twidCoefModifier <<= 1u;
 
-    }                           // groups loop end 
-
-    twidCoefModifier = twidCoefModifier << 1u;
-  }                             // stages loop end 
+   // loop for stage 
+   for (k = n2; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
 
-  n1 = n2;
-  n2 = n2 >> 1;
-  ia = 0;
+      // loop for groups 
+      j = 0;
+      do
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia += twidCoefModifier;
 
-  cosVal = pCoef[ia * 2];
-  sinVal = pCoef[(ia * 2) + 1];
-  ia = ia + twidCoefModifier;
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
 
-  // loop for butterfly 
-  for (i = 0; i < fftLen; i += n1)
-  {
-    l = i + n2;
-    xt = pSrc[2 * i] - pSrc[2 * l];
-    pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 + p1;
+            pSrc[2 * l + 1] = p2 - p3;
+            
+            i += n1;
+         } while( i < fftLen );                        // butterfly loop end 
+         j++;
+      } while( j < n2);                          // groups loop end 
+      twidCoefModifier <<= 1u;
+   }                             // stages loop end 
 
-    yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-    pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += 2)
+   {
+      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
+      xt = pSrc[2 * i] - pSrc[2 * i + 2];
 
-    pSrc[2u * l] = xt;
-
-    pSrc[2u * l + 1u] = yt;
-
-  }                             // groups loop end 
+      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+      
+      pSrc[2 * i] = a0;   
+      pSrc[2 * i + 1] = a1;
+      pSrc[2 * i + 2] = xt;
+      pSrc[2 * i + 3] = yt;
+   }                             // groups loop end 
 
 #else
-
-  //N = fftLen; 
-  n2 = fftLen;
+ 
+   n2 = fftLen;
 
-  // loop for stage 
-  for (k = fftLen; k > 1; k = k >> 1)
-  {
-    n1 = n2;
-    n2 = n2 >> 1;
-    ia = 0;
+   // loop for stage 
+   for (k = fftLen; k > 1; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
 
-    // loop for groups 
-    for (j = 0; j < n2; j++)
-    {
-      cosVal = pCoef[ia * 2];
-      sinVal = pCoef[(ia * 2) + 1];
-      ia = ia + twidCoefModifier;
+      // loop for groups 
+      j = 0;
+      do
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia += twidCoefModifier;
 
-      // loop for butterfly 
-      for (i = j; i < fftLen; i += n1)
-      {
-        l = i + n2;
-        xt = pSrc[2 * i] - pSrc[2 * l];
-        pSrc[2 * i] = pSrc[2 * i] + pSrc[2 * l];
-
-        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-        pSrc[2 * i + 1] = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
 
-        pSrc[2 * l] = (cosVal * xt + sinVal * yt);      // >> 15; 
-        pSrc[2 * l + 1] = (cosVal * yt - sinVal * xt);  // >> 15; 
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 + p1;
+            pSrc[2 * l + 1] = p2 - p3;
+            
+            i += n1;
+         } while(i < fftLen);
+         j++;
+      } while(j < n2);
+      twidCoefModifier <<= 1u;
+   }
 
-      }
-    }
-    twidCoefModifier = twidCoefModifier << 1u;
-  }
-
-#endif //    #ifndef ARM_MATH_CM0
+#endif //    #ifndef ARM_MATH_CM0_FAMILY
 
 }
 
 
 void arm_radix2_butterfly_inverse_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pCoef,
-  uint16_t twidCoefModifier,
-  float32_t onebyfftLen)
+float32_t * pSrc,
+uint32_t fftLen,
+float32_t * pCoef,
+uint16_t twidCoefModifier,
+float32_t onebyfftLen)
 {
 
-  int i, j, k, l;
-  int n1, n2, ia;
-  float32_t xt, yt, cosVal, sinVal;
-
-#ifndef ARM_MATH_CM0
-
-  //N = fftLen; 
-  n2 = fftLen;
-
-  n1 = n2;
-  n2 = n2 >> 1;
-  ia = 0;
-
-  // loop for groups 
-  for (i = 0; i < n2; i++)
-  {
-    cosVal = pCoef[ia * 2];
-    sinVal = pCoef[(ia * 2) + 1];
-    ia = ia + twidCoefModifier;
+   uint32_t i, j, k, l;
+   uint32_t n1, n2, ia;
+   float32_t xt, yt, cosVal, sinVal;
+   float32_t p0, p1, p2, p3;
+   float32_t a0, a1;
 
-    l = i + n2;
-    xt = pSrc[2 * i] - pSrc[2 * l];
-    pSrc[2 * i] = pSrc[2 * i] + pSrc[2 * l];
-
-    yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-    pSrc[2 * i + 1] = pSrc[2 * l + 1] + pSrc[2 * i + 1];
-
-    pSrc[2u * l] = xt * cosVal - yt * sinVal;
-
-    pSrc[2u * l + 1u] = yt * cosVal + xt * sinVal;
-
-  }                             // groups loop end 
+#ifndef ARM_MATH_CM0_FAMILY
 
-  twidCoefModifier = twidCoefModifier << 1u;
+   n2 = fftLen >> 1;
+   ia = 0;
 
-  // loop for stage 
-  for (k = fftLen / 2; k > 2; k = k >> 1)
-  {
-    n1 = n2;
-    n2 = n2 >> 1;
-    ia = 0;
-
-    // loop for groups 
-    for (j = 0; j < n2; j++)
-    {
+   // loop for groups 
+   for (i = 0; i < n2; i++)
+   {
       cosVal = pCoef[ia * 2];
       sinVal = pCoef[(ia * 2) + 1];
-      ia = ia + twidCoefModifier;
+      ia += twidCoefModifier;
 
-      // loop for butterfly 
-      for (i = j; i < fftLen; i += n1)
-      {
-        l = i + n2;
-        xt = pSrc[2 * i] - pSrc[2 * l];
-        pSrc[2 * i] = pSrc[2 * i] + pSrc[2 * l];
+      l = i + n2;
+      a0 = pSrc[2 * i] + pSrc[2 * l];
+      xt = pSrc[2 * i] - pSrc[2 * l];
 
-        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-        pSrc[2 * i + 1] = pSrc[2 * l + 1] + pSrc[2 * i + 1];
-
-        pSrc[2u * l] = xt * cosVal - yt * sinVal;
-
-        pSrc[2u * l + 1u] = yt * cosVal + xt * sinVal;
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      
+      p0 = xt * cosVal;
+      p1 = yt * sinVal;
+      p2 = yt * cosVal;
+      p3 = xt * sinVal;  
+      
+      pSrc[2 * i] = a0;   
+      pSrc[2 * i + 1] = a1;       
+      
+      pSrc[2 * l]     = p0 - p1;
+      pSrc[2 * l + 1] = p2 + p3;  
+   }                             // groups loop end 
 
-      }                         // butterfly loop end 
+   twidCoefModifier <<= 1u;
 
-    }                           // groups loop end 
+   // loop for stage 
+   for (k = fftLen / 2; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
 
-    twidCoefModifier = twidCoefModifier << 1u;
-  }                             // stages loop end 
+      // loop for groups 
+      j = 0;
+      do
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia += twidCoefModifier;
 
-  n1 = n2;
-  n2 = n2 >> 1;
-  ia = 0;
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
 
-  cosVal = pCoef[ia * 2];
-  sinVal = pCoef[(ia * 2) + 1];
-  ia = ia + twidCoefModifier;
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 - p1;
+            pSrc[2 * l + 1] = p2 + p3; 
 
-  // loop for butterfly 
-  for (i = 0; i < fftLen; i += n1)
-  {
-    l = i + n2;
-    xt = pSrc[2 * i] - pSrc[2 * l];
-    pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) * onebyfftLen;
+            i += n1;
+         } while( i < fftLen );                 // butterfly loop end 
+         j++;
+      } while(j < n2);                      // groups loop end 
+
+      twidCoefModifier <<= 1u;
+   }                             // stages loop end 
 
-    yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-    pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) * onebyfftLen;
-
-    pSrc[2u * l] = xt * onebyfftLen;
-
-    pSrc[2u * l + 1u] = yt * onebyfftLen;
-
-  }                             // butterfly loop end 
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += 2)
+   {   
+      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
+      xt = pSrc[2 * i] - pSrc[2 * i + 2];
+      
+      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+      
+      p0 = a0 * onebyfftLen;
+      p2 = xt * onebyfftLen;
+      p1 = a1 * onebyfftLen;
+      p3 = yt * onebyfftLen; 
+      
+      pSrc[2 * i] = p0;
+      pSrc[2 * i + 1] = p1;  
+      pSrc[2 * i + 2] = p2;       
+      pSrc[2 * i + 3] = p3;
+   }                             // butterfly loop end 
 
 #else
 
-  //N = fftLen; 
-  n2 = fftLen;
+   n2 = fftLen;
 
-  // loop for stage 
-  for (k = fftLen; k > 2; k = k >> 1)
-  {
-    n1 = n2;
-    n2 = n2 >> 1;
-    ia = 0;
+   // loop for stage 
+   for (k = fftLen; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
 
-    // loop for groups 
-    for (j = 0; j < n2; j++)
-    {
-      cosVal = pCoef[ia * 2];
-      sinVal = pCoef[(ia * 2) + 1];
-      ia = ia + twidCoefModifier;
-
-      // loop for butterfly 
-      for (i = j; i < fftLen; i += n1)
+      // loop for groups 
+      j = 0;
+      do
       {
-        l = i + n2;
-        xt = pSrc[2 * i] - pSrc[2 * l];
-        pSrc[2 * i] = pSrc[2 * i] + pSrc[2 * l];
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia = ia + twidCoefModifier;
 
-        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-        pSrc[2 * i + 1] = pSrc[2 * l + 1] + pSrc[2 * i + 1];
-
-        pSrc[2u * l] = xt * cosVal - yt * sinVal;
-
-        pSrc[2u * l + 1u] = yt * cosVal + xt * sinVal;
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
 
-      }                         // butterfly loop end 
-
-    }                           // groups loop end 
-
-    twidCoefModifier = twidCoefModifier << 1u;
-  }                             // stages loop end 
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 - p1;
+            pSrc[2 * l + 1] = p2 + p3;  
+            
+            i += n1;
+         } while( i < fftLen );                    // butterfly loop end 
+         j++;
+      } while( j < n2 );                      // groups loop end 
 
-  n1 = n2;
-  n2 = n2 >> 1;
-  ia = 0;
+      twidCoefModifier = twidCoefModifier << 1u;
+   }                             // stages loop end 
 
-  cosVal = pCoef[ia * 2];
-  sinVal = pCoef[(ia * 2) + 1];
-  ia = ia + twidCoefModifier;
+   n1 = n2;
+   n2 = n2 >> 1;
 
-  // loop for butterfly 
-  for (i = 0; i < fftLen; i += n1)
-  {
-    l = i + n2;
-    xt = pSrc[2 * i] - pSrc[2 * l];
-    pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) * onebyfftLen;
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += n1)
+   {
+      l = i + n2;
+      
+      a0 = pSrc[2 * i] + pSrc[2 * l];
+      xt = pSrc[2 * i] - pSrc[2 * l];
+      
+      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      
+      p0 = a0 * onebyfftLen;
+      p2 = xt * onebyfftLen;
+      p1 = a1 * onebyfftLen;
+      p3 = yt * onebyfftLen; 
+      
+      pSrc[2 * i] = p0;
+      pSrc[2u * l] = p2;
+     
+      pSrc[2 * i + 1] = p1;    
+      pSrc[2u * l + 1u] = p3;
+   }                             // butterfly loop end 
 
-    yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-    pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) * onebyfftLen;
-
-    pSrc[2u * l] = xt * onebyfftLen;
-
-    pSrc[2u * l + 1u] = yt * onebyfftLen;
-
-  }                             // butterfly loop end 
-
-#endif //      #ifndef ARM_MATH_CM0
+#endif //      #ifndef ARM_MATH_CM0_FAMILY
 
 }