CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cfft_radix4_q31.c Source File

arm_cfft_radix4_q31.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_cfft_radix4_q31.c  
00009 *  
00010 * Description:  This file has function definition of Radix-4 FFT & IFFT function and  
00011 *               In-place bit reversal using bit reversal table  
00012 *  
00013 * Target Processor: Cortex-M4/Cortex-M3
00014 *  
00015 * Version 1.0.3 2010/11/29 
00016 *    Re-organized the CMSIS folders and updated documentation.  
00017 *   
00018 * Version 1.0.2 2010/11/11  
00019 *    Documentation updated.   
00020 *  
00021 * Version 1.0.1 2010/10/05   
00022 *    Production release and review comments incorporated.  
00023 *  
00024 * Version 1.0.0 2010/09/20   
00025 *    Production release and review comments incorporated.  
00026 *  
00027 * Version 0.0.5  2010/04/26   
00028 *    incorporated review comments and updated with latest CMSIS layer  
00029 *  
00030 * Version 0.0.3  2010/03/10   
00031 *    Initial version  
00032 * -------------------------------------------------------------------- */ 
00033 #include "arm_math.h" 
00034  
00035  
00036 /**  
00037  * @ingroup groupTransforms  
00038  */ 
00039  
00040 /**  
00041  * @addtogroup CFFT_CIFFT  
00042  * @{  
00043  */ 
00044  
00045 /**  
00046  * @details  
00047  * @brief Processing function for the Q31 CFFT/CIFFT.  
00048  * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure. 
00049  * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 
00050  * @return none.  
00051  *   
00052  * \par Input and output formats:  
00053  * \par  
00054  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 
00055  * Hence the output format is different for different FFT sizes.  
00056  * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 
00057  * \par 
00058  * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"  
00059  * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"  
00060  *  
00061  */ 
00062  
00063 void arm_cfft_radix4_q31( 
00064   const arm_cfft_radix4_instance_q31 * S, 
00065   q31_t * pSrc) 
00066 { 
00067   if(S->ifftFlag == 1u) 
00068   { 
00069     /* Complex IFFT radix-4 */ 
00070     arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, 
00071                                      S->twidCoefModifier); 
00072   } 
00073   else 
00074   { 
00075     /* Complex FFT radix-4 */ 
00076     arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, 
00077                              S->twidCoefModifier); 
00078   } 
00079  
00080  
00081   if(S->bitReverseFlag == 1u) 
00082   { 
00083     /*  Bit Reversal */ 
00084     arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 
00085   } 
00086  
00087 } 
00088  
00089 /**  
00090  * @} end of CFFT_CIFFT group  
00091  */ 
00092  
00093 /*  
00094 * Radix-4 FFT algorithm used is :  
00095 *  
00096 * Input real and imaginary data:  
00097 * x(n) = xa + j * ya  
00098 * x(n+N/4 ) = xb + j * yb  
00099 * x(n+N/2 ) = xc + j * yc  
00100 * x(n+3N 4) = xd + j * yd  
00101 *  
00102 *  
00103 * Output real and imaginary data:  
00104 * x(4r) = xa'+ j * ya'  
00105 * x(4r+1) = xb'+ j * yb'  
00106 * x(4r+2) = xc'+ j * yc'  
00107 * x(4r+3) = xd'+ j * yd'  
00108 *  
00109 *  
00110 * Twiddle factors for radix-4 FFT:  
00111 * Wn = co1 + j * (- si1)  
00112 * W2n = co2 + j * (- si2)  
00113 * W3n = co3 + j * (- si3)  
00114 *  
00115 *  Butterfly implementation:  
00116 * xa' = xa + xb + xc + xd  
00117 * ya' = ya + yb + yc + yd  
00118 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)  
00119 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)  
00120 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)  
00121 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)  
00122 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)  
00123 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)  
00124 *  
00125 */ 
00126  
00127 /**  
00128  * @brief  Core function for the Q31 CFFT butterfly process. 
00129  * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type. 
00130  * @param[in]      fftLen           length of the FFT. 
00131  * @param[in]      *pCoef           points to twiddle coefficient buffer. 
00132  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
00133  * @return none. 
00134  */ 
00135  
00136 void arm_radix4_butterfly_q31( 
00137   q31_t * pSrc, 
00138   uint32_t fftLen, 
00139   q31_t * pCoef, 
00140   uint32_t twidCoefModifier) 
00141 { 
00142   uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 
00143   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 
00144  
00145  
00146   /* Total process is divided into three stages */ 
00147  
00148   /* process first stage, middle stages, & last stage */ 
00149  
00150  
00151   /* start of first stage process */ 
00152  
00153   /*  Initializations for the first stage */ 
00154   n2 = fftLen; 
00155   n1 = n2; 
00156   /* n2 = fftLen/4 */ 
00157   n2 >>= 2u; 
00158   i0 = 0u; 
00159   ia1 = 0u; 
00160  
00161   j = n2; 
00162  
00163   /*  Calculation of first stage */ 
00164   do 
00165   { 
00166     /*  index calculation for the input as, */ 
00167     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
00168     i1 = i0 + n2; 
00169     i2 = i1 + n2; 
00170     i3 = i2 + n2; 
00171  
00172     /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 
00173  
00174     /*  Butterfly implementation */ 
00175     /* xa + xc */ 
00176     r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u); 
00177     /* xa - xc */ 
00178     r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 
00179  
00180     /* ya + yc */ 
00181     s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 
00182     /* ya - yc */ 
00183     s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 
00184  
00185     /* xb + xd */ 
00186     t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 
00187  
00188     /* xa' = xa + xb + xc + xd */ 
00189     pSrc[2u * i0] = (r1 + t1); 
00190     /* (xa + xc) - (xb + xd) */ 
00191     r1 = r1 - t1; 
00192     /* yb + yd */ 
00193     t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 
00194     /* ya' = ya + yb + yc + yd */ 
00195     pSrc[(2u * i0) + 1u] = (s1 + t2); 
00196  
00197     /* (ya + yc) - (yb + yd) */ 
00198     s1 = s1 - t2; 
00199  
00200     /* yb - yd */ 
00201     t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 
00202     /* xb - xd */ 
00203     t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 
00204  
00205     /*  index calculation for the coefficients */ 
00206     ia2 = 2u * ia1; 
00207     co2 = pCoef[ia2 * 2u]; 
00208     si2 = pCoef[(ia2 * 2u) + 1u]; 
00209  
00210     /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
00211     pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 
00212                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 
00213  
00214     /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
00215     pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 
00216                             ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 
00217  
00218     /* (xa - xc) + (yb - yd) */ 
00219     r1 = r2 + t1; 
00220     /* (xa - xc) - (yb - yd) */ 
00221     r2 = r2 - t1; 
00222  
00223     /* (ya - yc) - (xb - xd) */ 
00224     s1 = s2 - t2; 
00225     /* (ya - yc) + (xb - xd) */ 
00226     s2 = s2 + t2; 
00227  
00228     co1 = pCoef[ia1 * 2u]; 
00229     si1 = pCoef[(ia1 * 2u) + 1u]; 
00230  
00231     /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
00232     pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 
00233                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 
00234  
00235     /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
00236     pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 
00237                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 
00238  
00239     /*  index calculation for the coefficients */ 
00240     ia3 = 3u * ia1; 
00241     co3 = pCoef[ia3 * 2u]; 
00242     si3 = pCoef[(ia3 * 2u) + 1u]; 
00243  
00244     /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
00245     pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 
00246                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 
00247  
00248     /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
00249     pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 
00250                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 
00251  
00252     /*  Twiddle coefficients index modifier */ 
00253     ia1 = ia1 + twidCoefModifier; 
00254  
00255     /*  Updating input index */ 
00256     i0 = i0 + 1u; 
00257  
00258   } while(--j); 
00259  
00260   /* end of first stage process */ 
00261  
00262   /* data is in 5.27(q27) format */ 
00263  
00264  
00265   /* start of Middle stages process */ 
00266  
00267  
00268   /* each stage in middle stages provides two down scaling of the input */ 
00269  
00270   twidCoefModifier <<= 2u; 
00271  
00272  
00273   for (k = fftLen / 4u; k > 4u; k >>= 2u) 
00274   { 
00275     /*  Initializations for the first stage */ 
00276     n1 = n2; 
00277     n2 >>= 2u; 
00278     ia1 = 0u; 
00279  
00280     /*  Calculation of first stage */ 
00281     for (j = 0u; j <= (n2 - 1u); j++) 
00282     { 
00283       /*  index calculation for the coefficients */ 
00284       ia2 = ia1 + ia1; 
00285       ia3 = ia2 + ia1; 
00286       co1 = pCoef[ia1 * 2u]; 
00287       si1 = pCoef[(ia1 * 2u) + 1u]; 
00288       co2 = pCoef[ia2 * 2u]; 
00289       si2 = pCoef[(ia2 * 2u) + 1u]; 
00290       co3 = pCoef[ia3 * 2u]; 
00291       si3 = pCoef[(ia3 * 2u) + 1u]; 
00292       /*  Twiddle coefficients index modifier */ 
00293       ia1 = ia1 + twidCoefModifier; 
00294  
00295       for (i0 = j; i0 < fftLen; i0 += n1) 
00296       { 
00297         /*  index calculation for the input as, */ 
00298         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
00299         i1 = i0 + n2; 
00300         i2 = i1 + n2; 
00301         i3 = i2 + n2; 
00302  
00303         /*  Butterfly implementation */ 
00304         /* xa + xc */ 
00305         r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
00306         /* xa - xc */ 
00307         r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
00308  
00309         /* ya + yc */ 
00310         s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
00311         /* ya - yc */ 
00312         s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
00313  
00314         /* xb + xd */ 
00315         t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
00316  
00317         /* xa' = xa + xb + xc + xd */ 
00318         pSrc[2u * i0] = (r1 + t1) >> 2u; 
00319         /* xa + xc -(xb + xd) */ 
00320         r1 = r1 - t1; 
00321  
00322         /* yb + yd */ 
00323         t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
00324         /* ya' = ya + yb + yc + yd */ 
00325         pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 
00326  
00327         /* (ya + yc) - (yb + yd) */ 
00328         s1 = s1 - t2; 
00329  
00330         /* (yb - yd) */ 
00331         t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
00332         /* (xb - xd) */ 
00333         t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
00334  
00335         /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
00336         pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 
00337                          ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 
00338  
00339         /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
00340         pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 
00341                                 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 
00342  
00343         /* (xa - xc) + (yb - yd) */ 
00344         r1 = r2 + t1; 
00345         /* (xa - xc) - (yb - yd) */ 
00346         r2 = r2 - t1; 
00347  
00348         /* (ya - yc) -  (xb - xd) */ 
00349         s1 = s2 - t2; 
00350         /* (ya - yc) +  (xb - xd) */ 
00351         s2 = s2 + t2; 
00352  
00353         /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
00354         pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 
00355                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 
00356  
00357         /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
00358         pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 
00359                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 
00360  
00361         /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
00362         pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 
00363                          ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 
00364  
00365         /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
00366         pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 
00367                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 
00368       } 
00369     } 
00370     twidCoefModifier <<= 2u; 
00371   } 
00372  
00373   /* End of Middle stages process */ 
00374  
00375   /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 
00376   /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 
00377   /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 
00378   /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 
00379  
00380  
00381   /* start of Last stage process */ 
00382  
00383   /*  Initializations of last stage */ 
00384   n1 = n2; 
00385   n2 >>= 2u; 
00386  
00387   /*  Calculations of last stage */ 
00388   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
00389   { 
00390     /*  index calculation for the input as, */ 
00391     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
00392     i1 = i0 + n2; 
00393     i2 = i1 + n2; 
00394     i3 = i2 + n2; 
00395  
00396     /*  Butterfly implementation */ 
00397     /* xa + xb */ 
00398     r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
00399     /* xa - xb */ 
00400     r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
00401  
00402     /* ya + yc */ 
00403     s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
00404     /* ya - yc */ 
00405     s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
00406  
00407     /* xc + xd */ 
00408     t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
00409     /* xa' = xa + xb + xc + xd */ 
00410     pSrc[2u * i0] = (r1 + t1); 
00411     /* (xa + xb) - (xc + xd) */ 
00412     r1 = r1 - t1; 
00413  
00414     /* yb + yd */ 
00415     t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
00416     /* ya' = ya + yb + yc + yd */ 
00417     pSrc[(2u * i0) + 1u] = (s1 + t2); 
00418     /* (ya + yc) - (yb + yd) */ 
00419     s1 = s1 - t2; 
00420  
00421     /* (yb-yd) */ 
00422     t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
00423     /* (xb-xd) */ 
00424     t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
00425  
00426     /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
00427     pSrc[2u * i1] = r1; 
00428     /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
00429     pSrc[(2u * i1) + 1u] = s1; 
00430  
00431     /* (xa+yb-xc-yd) */ 
00432     r1 = r2 + t1; 
00433     /* (xa-yb-xc+yd) */ 
00434     r2 = r2 - t1; 
00435  
00436     /* (ya-xb-yc+xd) */ 
00437     s1 = s2 - t2; 
00438     /* (ya+xb-yc-xd) */ 
00439     s2 = s2 + t2; 
00440  
00441     /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
00442     pSrc[2u * i2] = r1; 
00443     /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
00444     pSrc[(2u * i2) + 1u] = s1; 
00445  
00446     /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
00447     pSrc[2u * i3] = r2; 
00448     /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
00449     pSrc[(2u * i3) + 1u] = s2; 
00450  
00451  
00452   } 
00453  
00454   /* output is in 11.21(q21) format for the 1024 point */ 
00455   /* output is in 9.23(q23) format for the 256 point */ 
00456   /* output is in 7.25(q25) format for the 64 point */ 
00457   /* output is in 5.27(q27) format for the 16 point */ 
00458  
00459   /* End of last stage process */ 
00460  
00461 } 
00462  
00463  
00464 /**  
00465  * @brief  Core function for the Q31 CIFFT butterfly process. 
00466  * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type. 
00467  * @param[in]      fftLen           length of the FFT. 
00468  * @param[in]      *pCoef           points to twiddle coefficient buffer. 
00469  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
00470  * @return none. 
00471  */ 
00472  
00473  
00474 /*  
00475 * Radix-4 IFFT algorithm used is :  
00476 *  
00477 * CIFFT uses same twiddle coefficients as CFFT Function  
00478 *  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]  
00479 *  
00480 *  
00481 * IFFT is implemented with following changes in equations from FFT  
00482 *  
00483 * Input real and imaginary data:  
00484 * x(n) = xa + j * ya  
00485 * x(n+N/4 ) = xb + j * yb  
00486 * x(n+N/2 ) = xc + j * yc  
00487 * x(n+3N 4) = xd + j * yd  
00488 *  
00489 *  
00490 * Output real and imaginary data:  
00491 * x(4r) = xa'+ j * ya'  
00492 * x(4r+1) = xb'+ j * yb'  
00493 * x(4r+2) = xc'+ j * yc'  
00494 * x(4r+3) = xd'+ j * yd'  
00495 *  
00496 *  
00497 * Twiddle factors for radix-4 IFFT:  
00498 * Wn = co1 + j * (si1)  
00499 * W2n = co2 + j * (si2)  
00500 * W3n = co3 + j * (si3)  
00501   
00502 * The real and imaginary output values for the radix-4 butterfly are  
00503 * xa' = xa + xb + xc + xd  
00504 * ya' = ya + yb + yc + yd  
00505 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)  
00506 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)  
00507 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)  
00508 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)  
00509 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)  
00510 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)  
00511 *  
00512 */ 
00513  
00514 void arm_radix4_butterfly_inverse_q31( 
00515   q31_t * pSrc, 
00516   uint32_t fftLen, 
00517   q31_t * pCoef, 
00518   uint32_t twidCoefModifier) 
00519 { 
00520   uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 
00521   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 
00522  
00523   /* input is be 1.31(q31) format for all FFT sizes */ 
00524   /* Total process is divided into three stages */ 
00525   /* process first stage, middle stages, & last stage */ 
00526  
00527   /* Start of first stage process */ 
00528  
00529   /* Initializations for the first stage */ 
00530   n2 = fftLen; 
00531   n1 = n2; 
00532   /* n2 = fftLen/4 */ 
00533   n2 >>= 2u; 
00534   i0 = 0u; 
00535   ia1 = 0u; 
00536  
00537   j = n2; 
00538  
00539   do 
00540   { 
00541  
00542     /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 
00543  
00544     /*  index calculation for the input as, */ 
00545     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
00546     i1 = i0 + n2; 
00547     i2 = i1 + n2; 
00548     i3 = i2 + n2; 
00549  
00550     /*  Butterfly implementation */ 
00551     /* xa + xc */ 
00552     r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u); 
00553     /* xa - xc */ 
00554     r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 
00555  
00556     /* ya + yc */ 
00557     s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 
00558     /* ya - yc */ 
00559     s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 
00560  
00561     /* xb + xd */ 
00562     t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 
00563  
00564     /* xa' = xa + xb + xc + xd */ 
00565     pSrc[2u * i0] = (r1 + t1); 
00566     /* (xa + xc) - (xb + xd) */ 
00567     r1 = r1 - t1; 
00568     /* yb + yd */ 
00569     t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 
00570     /* ya' = ya + yb + yc + yd */ 
00571     pSrc[(2u * i0) + 1u] = (s1 + t2); 
00572  
00573     /* (ya + yc) - (yb + yd) */ 
00574     s1 = s1 - t2; 
00575  
00576     /* yb - yd */ 
00577     t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 
00578     /* xb - xd */ 
00579     t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 
00580  
00581     /*  index calculation for the coefficients */ 
00582     ia2 = 2u * ia1; 
00583     co2 = pCoef[ia2 * 2u]; 
00584     si2 = pCoef[(ia2 * 2u) + 1u]; 
00585  
00586     /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
00587     pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 
00588                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 
00589  
00590     /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
00591     pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 
00592                           ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 
00593  
00594     /* (xa - xc) - (yb - yd) */ 
00595     r1 = r2 - t1; 
00596     /* (xa - xc) + (yb - yd) */ 
00597     r2 = r2 + t1; 
00598  
00599     /* (ya - yc) + (xb - xd) */ 
00600     s1 = s2 + t2; 
00601     /* (ya - yc) - (xb - xd) */ 
00602     s2 = s2 - t2; 
00603  
00604     co1 = pCoef[ia1 * 2u]; 
00605     si1 = pCoef[(ia1 * 2u) + 1u]; 
00606  
00607     /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
00608     pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 
00609                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 
00610  
00611     /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
00612     pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 
00613                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 
00614  
00615     /*  index calculation for the coefficients */ 
00616     ia3 = 3u * ia1; 
00617     co3 = pCoef[ia3 * 2u]; 
00618     si3 = pCoef[(ia3 * 2u) + 1u]; 
00619  
00620     /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
00621     pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 
00622                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 
00623  
00624     /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
00625     pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 
00626                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 
00627  
00628     /*  Twiddle coefficients index modifier */ 
00629     ia1 = ia1 + twidCoefModifier; 
00630  
00631     /*  Updating input index */ 
00632     i0 = i0 + 1u; 
00633  
00634   } while(--j); 
00635  
00636   /* data is in 5.27(q27) format */ 
00637   /* each stage provides two down scaling of the input */ 
00638  
00639  
00640   /* Start of Middle stages process */ 
00641  
00642   twidCoefModifier <<= 2u; 
00643  
00644   /*  Calculation of second stage to excluding last stage */ 
00645   for (k = fftLen / 4u; k > 4u; k >>= 2u) 
00646   { 
00647     /*  Initializations for the first stage */ 
00648     n1 = n2; 
00649     n2 >>= 2u; 
00650     ia1 = 0u; 
00651  
00652     for (j = 0; j <= (n2 - 1u); j++) 
00653     { 
00654       /*  index calculation for the coefficients */ 
00655       ia2 = ia1 + ia1; 
00656       ia3 = ia2 + ia1; 
00657       co1 = pCoef[ia1 * 2u]; 
00658       si1 = pCoef[(ia1 * 2u) + 1u]; 
00659       co2 = pCoef[ia2 * 2u]; 
00660       si2 = pCoef[(ia2 * 2u) + 1u]; 
00661       co3 = pCoef[ia3 * 2u]; 
00662       si3 = pCoef[(ia3 * 2u) + 1u]; 
00663       /*  Twiddle coefficients index modifier */ 
00664       ia1 = ia1 + twidCoefModifier; 
00665  
00666       for (i0 = j; i0 < fftLen; i0 += n1) 
00667       { 
00668         /*  index calculation for the input as, */ 
00669         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
00670         i1 = i0 + n2; 
00671         i2 = i1 + n2; 
00672         i3 = i2 + n2; 
00673  
00674         /*  Butterfly implementation */ 
00675         /* xa + xc */ 
00676         r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
00677         /* xa - xc */ 
00678         r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
00679  
00680         /* ya + yc */ 
00681         s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
00682         /* ya - yc */ 
00683         s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
00684  
00685         /* xb + xd */ 
00686         t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
00687  
00688         /* xa' = xa + xb + xc + xd */ 
00689         pSrc[2u * i0] = (r1 + t1) >> 2u; 
00690         /* xa + xc -(xb + xd) */ 
00691         r1 = r1 - t1; 
00692         /* yb + yd */ 
00693         t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
00694         /* ya' = ya + yb + yc + yd */ 
00695         pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 
00696  
00697         /* (ya + yc) - (yb + yd) */ 
00698         s1 = s1 - t2; 
00699  
00700         /* (yb - yd) */ 
00701         t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
00702         /* (xb - xd) */ 
00703         t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
00704  
00705         /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
00706         pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 
00707                          ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 
00708  
00709         /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
00710         pSrc[(2u * i1) + 1u] = 
00711           (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 
00712            ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 
00713  
00714         /* (xa - xc) - (yb - yd) */ 
00715         r1 = r2 - t1; 
00716         /* (xa - xc) + (yb - yd) */ 
00717         r2 = r2 + t1; 
00718  
00719         /* (ya - yc) +  (xb - xd) */ 
00720         s1 = s2 + t2; 
00721         /* (ya - yc) -  (xb - xd) */ 
00722         s2 = s2 - t2; 
00723  
00724         /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
00725         pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 
00726                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 
00727  
00728         /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
00729         pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 
00730                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 
00731  
00732         /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
00733         pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 
00734                            ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 
00735  
00736         /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
00737         pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 
00738                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 
00739       } 
00740     } 
00741     twidCoefModifier <<= 2u; 
00742   } 
00743  
00744   /* End of Middle stages process */ 
00745  
00746   /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 
00747   /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 
00748   /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 
00749   /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 
00750  
00751  
00752   /* Start of last stage process */ 
00753  
00754  
00755   /*  Initializations of last stage */ 
00756   n1 = n2; 
00757   n2 >>= 2u; 
00758  
00759   /*  Calculations of last stage */ 
00760   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
00761   { 
00762     /*  index calculation for the input as, */ 
00763     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
00764     i1 = i0 + n2; 
00765     i2 = i1 + n2; 
00766     i3 = i2 + n2; 
00767  
00768     /*  Butterfly implementation */ 
00769     /* xa + xc */ 
00770     r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
00771     /* xa - xc */ 
00772     r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
00773  
00774     /* ya + yc */ 
00775     s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
00776     /* ya - yc */ 
00777     s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
00778  
00779     /* xc + xd */ 
00780     t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
00781     /* xa' = xa + xb + xc + xd */ 
00782     pSrc[2u * i0] = (r1 + t1); 
00783     /* (xa + xb) - (xc + xd) */ 
00784     r1 = r1 - t1; 
00785  
00786     /* yb + yd */ 
00787     t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
00788     /* ya' = ya + yb + yc + yd */ 
00789     pSrc[(2u * i0) + 1u] = (s1 + t2); 
00790     /* (ya + yc) - (yb + yd) */ 
00791     s1 = s1 - t2; 
00792  
00793     /* (yb-yd) */ 
00794     t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
00795     /* (xb-xd) */ 
00796     t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
00797  
00798     /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
00799     pSrc[2u * i1] = r1; 
00800     /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
00801     pSrc[(2u * i1) + 1u] = s1; 
00802  
00803     /* (xa - xc) - (yb-yd) */ 
00804     r1 = r2 - t1; 
00805  
00806     /* (xa - xc) + (yb-yd) */ 
00807     r2 = r2 + t1; 
00808  
00809     /* (ya - yc) + (xb-xd) */ 
00810     s1 = s2 + t2; 
00811  
00812     /* (ya - yc) - (xb-xd) */ 
00813     s2 = s2 - t2; 
00814  
00815     /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
00816     pSrc[2u * i2] = r1; 
00817     /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
00818     pSrc[(2u * i2) + 1u] = s1; 
00819  
00820     /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
00821     pSrc[2u * i3] = r2; 
00822     /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
00823     pSrc[(2u * i3) + 1u] = s2; 
00824  
00825   } 
00826  
00827   /* output is in 11.21(q21) format for the 1024 point */ 
00828   /* output is in 9.23(q23) format for the 256 point */ 
00829   /* output is in 7.25(q25) format for the 64 point */ 
00830   /* output is in 5.27(q27) format for the 16 point */ 
00831  
00832   /* End of last stage process */ 
00833 } 
00834  
00835  
00836 /*  
00837  * @brief  In-place bit reversal function. 
00838  * @param[in, out] *pSrc        points to the in-place buffer of Q31 data type. 
00839  * @param[in]      fftLen       length of the FFT. 
00840  * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table 
00841  * @param[in]      *pBitRevTab  points to bit reversal table. 
00842  * @return none. 
00843  */ 
00844  
00845 void arm_bitreversal_q31( 
00846   q31_t * pSrc, 
00847   uint32_t fftLen, 
00848   uint16_t bitRevFactor, 
00849   uint16_t * pBitRevTable) 
00850 { 
00851   uint32_t fftLenBy2, fftLenBy2p1, i, j; 
00852   q31_t in; 
00853  
00854   /*  Initializations      */ 
00855   j = 0u; 
00856   fftLenBy2 = fftLen / 2u; 
00857   fftLenBy2p1 = (fftLen / 2u) + 1u; 
00858  
00859   /* Bit Reversal Implementation */ 
00860   for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u) 
00861   { 
00862     if(i < j) 
00863     { 
00864       /*  pSrc[i] <-> pSrc[j]; */ 
00865       in = pSrc[2u * i]; 
00866       pSrc[2u * i] = pSrc[2u * j]; 
00867       pSrc[2u * j] = in; 
00868  
00869       /*  pSrc[i+1u] <-> pSrc[j+1u] */ 
00870       in = pSrc[(2u * i) + 1u]; 
00871       pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u]; 
00872       pSrc[(2u * j) + 1u] = in; 
00873  
00874       /*  pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */ 
00875       in = pSrc[2u * (i + fftLenBy2p1)]; 
00876       pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)]; 
00877       pSrc[2u * (j + fftLenBy2p1)] = in; 
00878  
00879       /*  pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */ 
00880       in = pSrc[(2u * (i + fftLenBy2p1)) + 1u]; 
00881       pSrc[(2u * (i + fftLenBy2p1)) + 1u] = 
00882         pSrc[(2u * (j + fftLenBy2p1)) + 1u]; 
00883       pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in; 
00884  
00885     } 
00886  
00887     /*  pSrc[i+1u] <-> pSrc[j+1u] */ 
00888     in = pSrc[2u * (i + 1u)]; 
00889     pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)]; 
00890     pSrc[2u * (j + fftLenBy2)] = in; 
00891  
00892     /*  pSrc[i+2u] <-> pSrc[j+2u] */ 
00893     in = pSrc[(2u * (i + 1u)) + 1u]; 
00894     pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u]; 
00895     pSrc[(2u * (j + fftLenBy2)) + 1u] = in; 
00896  
00897     /*  Reading the index for the bit reversal */ 
00898     j = *pBitRevTable; 
00899  
00900     /*  Updating the bit reversal index depending on the fft length */ 
00901     pBitRevTable += bitRevFactor; 
00902   } 
00903 }