CMSIS DSP library

Dependents:   KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cfft_radix4_q31.c Source File

arm_cfft_radix4_q31.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013  
00005 * $Revision:    V1.4.1  
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_cfft_radix4_q31.c    
00009 *    
00010 * Description:  This file has function definition of Radix-4 FFT & IFFT function and    
00011 *               In-place bit reversal using bit reversal table    
00012 *    
00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00014 *  
00015 * Redistribution and use in source and binary forms, with or without 
00016 * modification, are permitted provided that the following conditions
00017 * are met:
00018 *   - Redistributions of source code must retain the above copyright
00019 *     notice, this list of conditions and the following disclaimer.
00020 *   - Redistributions in binary form must reproduce the above copyright
00021 *     notice, this list of conditions and the following disclaimer in
00022 *     the documentation and/or other materials provided with the 
00023 *     distribution.
00024 *   - Neither the name of ARM LIMITED nor the names of its contributors
00025 *     may be used to endorse or promote products derived from this
00026 *     software without specific prior written permission.
00027 *
00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00039 * POSSIBILITY OF SUCH DAMAGE.     
00040 * -------------------------------------------------------------------- */
00041 
00042 #include "arm_math.h"
00043 
00044 void arm_radix4_butterfly_inverse_q31(
00045 q31_t * pSrc,
00046 uint32_t fftLen,
00047 q31_t * pCoef,
00048 uint32_t twidCoefModifier);
00049 
00050 void arm_radix4_butterfly_q31(
00051 q31_t * pSrc,
00052 uint32_t fftLen,
00053 q31_t * pCoef,
00054 uint32_t twidCoefModifier);
00055 
00056 void arm_bitreversal_q31(
00057 q31_t * pSrc,
00058 uint32_t fftLen,
00059 uint16_t bitRevFactor,
00060 uint16_t * pBitRevTab);
00061 
00062 /**    
00063  * @ingroup groupTransforms    
00064  */
00065 
00066 /**    
00067  * @addtogroup ComplexFFT    
00068  * @{    
00069  */
00070 
00071 /**    
00072  * @details    
00073  * @brief Processing function for the Q31 CFFT/CIFFT.    
00074  * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure.   
00075  * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.   
00076  * @return none.    
00077  *     
00078  * \par Input and output formats:    
00079  * \par    
00080  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.   
00081  * Hence the output format is different for different FFT sizes.    
00082  * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:   
00083  * \par   
00084  * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"    
00085  * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"    
00086  *    
00087  */
00088 
00089 void arm_cfft_radix4_q31(
00090   const arm_cfft_radix4_instance_q31 * S,
00091   q31_t * pSrc)
00092 {
00093   if(S->ifftFlag == 1u)
00094   {
00095     /* Complex IFFT radix-4 */
00096     arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
00097                                      S->twidCoefModifier);
00098   }
00099   else
00100   {
00101     /* Complex FFT radix-4 */
00102     arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
00103                              S->twidCoefModifier);
00104   }
00105 
00106 
00107   if(S->bitReverseFlag == 1u)
00108   {
00109     /*  Bit Reversal */
00110     arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
00111   }
00112 
00113 }
00114 
00115 /**    
00116  * @} end of ComplexFFT group    
00117  */
00118 
00119 /*    
00120 * Radix-4 FFT algorithm used is :    
00121 *    
00122 * Input real and imaginary data:    
00123 * x(n) = xa + j * ya    
00124 * x(n+N/4 ) = xb + j * yb    
00125 * x(n+N/2 ) = xc + j * yc    
00126 * x(n+3N 4) = xd + j * yd    
00127 *    
00128 *    
00129 * Output real and imaginary data:    
00130 * x(4r) = xa'+ j * ya'    
00131 * x(4r+1) = xb'+ j * yb'    
00132 * x(4r+2) = xc'+ j * yc'    
00133 * x(4r+3) = xd'+ j * yd'    
00134 *    
00135 *    
00136 * Twiddle factors for radix-4 FFT:    
00137 * Wn = co1 + j * (- si1)    
00138 * W2n = co2 + j * (- si2)    
00139 * W3n = co3 + j * (- si3)    
00140 *    
00141 *  Butterfly implementation:    
00142 * xa' = xa + xb + xc + xd    
00143 * ya' = ya + yb + yc + yd    
00144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)    
00145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)    
00146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)    
00147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)    
00148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)    
00149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)    
00150 *    
00151 */
00152 
00153 /**    
00154  * @brief  Core function for the Q31 CFFT butterfly process.   
00155  * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.   
00156  * @param[in]      fftLen           length of the FFT.   
00157  * @param[in]      *pCoef           points to twiddle coefficient buffer.   
00158  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
00159  * @return none.   
00160  */
00161 
00162 void arm_radix4_butterfly_q31(
00163   q31_t * pSrc,
00164   uint32_t fftLen,
00165   q31_t * pCoef,
00166   uint32_t twidCoefModifier)
00167 {
00168   uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
00169   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
00170 
00171   q31_t xa, xb, xc, xd;
00172   q31_t ya, yb, yc, yd;
00173   q31_t xa_out, xb_out, xc_out, xd_out;
00174   q31_t ya_out, yb_out, yc_out, yd_out;
00175 
00176   q31_t *ptr1;
00177   q63_t xaya, xbyb, xcyc, xdyd;
00178   /* Total process is divided into three stages */
00179 
00180   /* process first stage, middle stages, & last stage */
00181 
00182 
00183   /* start of first stage process */
00184 
00185   /*  Initializations for the first stage */
00186   n2 = fftLen;
00187   n1 = n2;
00188   /* n2 = fftLen/4 */
00189   n2 >>= 2u;
00190   i0 = 0u;
00191   ia1 = 0u;
00192 
00193   j = n2;
00194 
00195   /*  Calculation of first stage */
00196   do
00197   {
00198     /*  index calculation for the input as, */
00199     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
00200     i1 = i0 + n2;
00201     i2 = i1 + n2;
00202     i3 = i2 + n2;
00203 
00204     /* input is in 1.31(q31) format and provide 4 guard bits for the input */
00205 
00206     /*  Butterfly implementation */
00207     /* xa + xc */
00208     r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
00209     /* xa - xc */
00210     r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
00211 
00212     /* xb + xd */
00213     t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
00214 
00215     /* ya + yc */
00216     s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
00217     /* ya - yc */
00218     s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
00219 
00220     /* xa' = xa + xb + xc + xd */
00221     pSrc[2u * i0] = (r1 + t1);
00222     /* (xa + xc) - (xb + xd) */
00223     r1 = r1 - t1;
00224     /* yb + yd */
00225     t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
00226 
00227     /* ya' = ya + yb + yc + yd */
00228     pSrc[(2u * i0) + 1u] = (s1 + t2);
00229 
00230     /* (ya + yc) - (yb + yd) */
00231     s1 = s1 - t2;
00232 
00233     /* yb - yd */
00234     t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
00235     /* xb - xd */
00236     t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
00237 
00238     /*  index calculation for the coefficients */
00239     ia2 = 2u * ia1;
00240     co2 = pCoef[ia2 * 2u];
00241     si2 = pCoef[(ia2 * 2u) + 1u];
00242 
00243     /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
00244     pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
00245                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
00246 
00247     /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
00248     pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
00249                             ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
00250 
00251     /* (xa - xc) + (yb - yd) */
00252     r1 = r2 + t1;
00253     /* (xa - xc) - (yb - yd) */
00254     r2 = r2 - t1;
00255 
00256     /* (ya - yc) - (xb - xd) */
00257     s1 = s2 - t2;
00258     /* (ya - yc) + (xb - xd) */
00259     s2 = s2 + t2;
00260 
00261     co1 = pCoef[ia1 * 2u];
00262     si1 = pCoef[(ia1 * 2u) + 1u];
00263 
00264     /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
00265     pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
00266                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
00267 
00268     /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
00269     pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
00270                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
00271 
00272     /*  index calculation for the coefficients */
00273     ia3 = 3u * ia1;
00274     co3 = pCoef[ia3 * 2u];
00275     si3 = pCoef[(ia3 * 2u) + 1u];
00276 
00277     /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
00278     pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
00279                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
00280 
00281     /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
00282     pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
00283                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
00284 
00285     /*  Twiddle coefficients index modifier */
00286     ia1 = ia1 + twidCoefModifier;
00287 
00288     /*  Updating input index */
00289     i0 = i0 + 1u;
00290 
00291   } while(--j);
00292 
00293   /* end of first stage process */
00294 
00295   /* data is in 5.27(q27) format */
00296 
00297 
00298   /* start of Middle stages process */
00299 
00300 
00301   /* each stage in middle stages provides two down scaling of the input */
00302 
00303   twidCoefModifier <<= 2u;
00304 
00305 
00306   for (k = fftLen / 4u; k > 4u; k >>= 2u)
00307   {
00308     /*  Initializations for the first stage */
00309     n1 = n2;
00310     n2 >>= 2u;
00311     ia1 = 0u;
00312 
00313     /*  Calculation of first stage */
00314     for (j = 0u; j <= (n2 - 1u); j++)
00315     {
00316       /*  index calculation for the coefficients */
00317       ia2 = ia1 + ia1;
00318       ia3 = ia2 + ia1;
00319       co1 = pCoef[ia1 * 2u];
00320       si1 = pCoef[(ia1 * 2u) + 1u];
00321       co2 = pCoef[ia2 * 2u];
00322       si2 = pCoef[(ia2 * 2u) + 1u];
00323       co3 = pCoef[ia3 * 2u];
00324       si3 = pCoef[(ia3 * 2u) + 1u];
00325       /*  Twiddle coefficients index modifier */
00326       ia1 = ia1 + twidCoefModifier;
00327 
00328       for (i0 = j; i0 < fftLen; i0 += n1)
00329       {
00330         /*  index calculation for the input as, */
00331         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
00332         i1 = i0 + n2;
00333         i2 = i1 + n2;
00334         i3 = i2 + n2;
00335 
00336         /*  Butterfly implementation */
00337         /* xa + xc */
00338         r1 = pSrc[2u * i0] + pSrc[2u * i2];
00339         /* xa - xc */
00340         r2 = pSrc[2u * i0] - pSrc[2u * i2];
00341 
00342         /* ya + yc */
00343         s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
00344         /* ya - yc */
00345         s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
00346 
00347         /* xb + xd */
00348         t1 = pSrc[2u * i1] + pSrc[2u * i3];
00349 
00350         /* xa' = xa + xb + xc + xd */
00351         pSrc[2u * i0] = (r1 + t1) >> 2u;
00352         /* xa + xc -(xb + xd) */
00353         r1 = r1 - t1;
00354 
00355         /* yb + yd */
00356         t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
00357         /* ya' = ya + yb + yc + yd */
00358         pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
00359 
00360         /* (ya + yc) - (yb + yd) */
00361         s1 = s1 - t2;
00362 
00363         /* (yb - yd) */
00364         t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
00365         /* (xb - xd) */
00366         t2 = pSrc[2u * i1] - pSrc[2u * i3];
00367 
00368         /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
00369         pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
00370                          ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
00371 
00372         /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
00373         pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
00374                                 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
00375 
00376         /* (xa - xc) + (yb - yd) */
00377         r1 = r2 + t1;
00378         /* (xa - xc) - (yb - yd) */
00379         r2 = r2 - t1;
00380 
00381         /* (ya - yc) -  (xb - xd) */
00382         s1 = s2 - t2;
00383         /* (ya - yc) +  (xb - xd) */
00384         s2 = s2 + t2;
00385 
00386         /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
00387         pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
00388                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
00389 
00390         /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
00391         pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
00392                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
00393 
00394         /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
00395         pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
00396                          ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
00397 
00398         /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
00399         pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
00400                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
00401       }
00402     }
00403     twidCoefModifier <<= 2u;
00404   }
00405 
00406   /* End of Middle stages process */
00407 
00408   /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
00409   /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
00410   /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
00411   /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
00412 
00413 
00414   /* start of Last stage process */
00415   /*  Initializations for the last stage */
00416   j = fftLen >> 2;
00417   ptr1 = &pSrc[0];
00418 
00419   /*  Calculations of last stage */
00420   do
00421   {
00422 
00423 #ifndef ARM_MATH_BIG_ENDIAN
00424 
00425     /* Read xa (real), ya(imag) input */
00426     xaya = *__SIMD64(ptr1)++;
00427     xa = (q31_t) xaya;
00428     ya = (q31_t) (xaya >> 32);
00429 
00430     /* Read xb (real), yb(imag) input */
00431     xbyb = *__SIMD64(ptr1)++;
00432     xb = (q31_t) xbyb;
00433     yb = (q31_t) (xbyb >> 32);
00434 
00435     /* Read xc (real), yc(imag) input */
00436     xcyc = *__SIMD64(ptr1)++;
00437     xc = (q31_t) xcyc;
00438     yc = (q31_t) (xcyc >> 32);
00439 
00440     /* Read xc (real), yc(imag) input */
00441     xdyd = *__SIMD64(ptr1)++;
00442     xd = (q31_t) xdyd;
00443     yd = (q31_t) (xdyd >> 32);
00444 
00445 #else
00446 
00447     /* Read xa (real), ya(imag) input */
00448     xaya = *__SIMD64(ptr1)++;
00449     ya = (q31_t) xaya;
00450     xa = (q31_t) (xaya >> 32);
00451 
00452     /* Read xb (real), yb(imag) input */
00453     xbyb = *__SIMD64(ptr1)++;
00454     yb = (q31_t) xbyb;
00455     xb = (q31_t) (xbyb >> 32);
00456 
00457     /* Read xc (real), yc(imag) input */
00458     xcyc = *__SIMD64(ptr1)++;
00459     yc = (q31_t) xcyc;
00460     xc = (q31_t) (xcyc >> 32);
00461 
00462     /* Read xc (real), yc(imag) input */
00463     xdyd = *__SIMD64(ptr1)++;
00464     yd = (q31_t) xdyd;
00465     xd = (q31_t) (xdyd >> 32);
00466 
00467 
00468 #endif
00469 
00470     /* xa' = xa + xb + xc + xd */
00471     xa_out = xa + xb + xc + xd;
00472 
00473     /* ya' = ya + yb + yc + yd */
00474     ya_out = ya + yb + yc + yd;
00475 
00476     /* pointer updation for writing */
00477     ptr1 = ptr1 - 8u;
00478 
00479     /* writing xa' and ya' */
00480     *ptr1++ = xa_out;
00481     *ptr1++ = ya_out;
00482 
00483     xc_out = (xa - xb + xc - xd);
00484     yc_out = (ya - yb + yc - yd);
00485 
00486     /* writing xc' and yc' */
00487     *ptr1++ = xc_out;
00488     *ptr1++ = yc_out;
00489 
00490     xb_out = (xa + yb - xc - yd);
00491     yb_out = (ya - xb - yc + xd);
00492 
00493     /* writing xb' and yb' */
00494     *ptr1++ = xb_out;
00495     *ptr1++ = yb_out;
00496 
00497     xd_out = (xa - yb - xc + yd);
00498     yd_out = (ya + xb - yc - xd);
00499 
00500     /* writing xd' and yd' */
00501     *ptr1++ = xd_out;
00502     *ptr1++ = yd_out;
00503 
00504 
00505   } while(--j);
00506 
00507   /* output is in 11.21(q21) format for the 1024 point */
00508   /* output is in 9.23(q23) format for the 256 point */
00509   /* output is in 7.25(q25) format for the 64 point */
00510   /* output is in 5.27(q27) format for the 16 point */
00511 
00512   /* End of last stage process */
00513 
00514 }
00515 
00516 
00517 /**    
00518  * @brief  Core function for the Q31 CIFFT butterfly process.   
00519  * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.   
00520  * @param[in]      fftLen           length of the FFT.   
00521  * @param[in]      *pCoef           points to twiddle coefficient buffer.   
00522  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
00523  * @return none.   
00524  */
00525 
00526 
00527 /*    
00528 * Radix-4 IFFT algorithm used is :    
00529 *    
00530 * CIFFT uses same twiddle coefficients as CFFT Function    
00531 *  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]    
00532 *    
00533 *    
00534 * IFFT is implemented with following changes in equations from FFT    
00535 *    
00536 * Input real and imaginary data:    
00537 * x(n) = xa + j * ya    
00538 * x(n+N/4 ) = xb + j * yb    
00539 * x(n+N/2 ) = xc + j * yc    
00540 * x(n+3N 4) = xd + j * yd    
00541 *    
00542 *    
00543 * Output real and imaginary data:    
00544 * x(4r) = xa'+ j * ya'    
00545 * x(4r+1) = xb'+ j * yb'    
00546 * x(4r+2) = xc'+ j * yc'    
00547 * x(4r+3) = xd'+ j * yd'    
00548 *    
00549 *    
00550 * Twiddle factors for radix-4 IFFT:    
00551 * Wn = co1 + j * (si1)    
00552 * W2n = co2 + j * (si2)    
00553 * W3n = co3 + j * (si3)    
00554     
00555 * The real and imaginary output values for the radix-4 butterfly are    
00556 * xa' = xa + xb + xc + xd    
00557 * ya' = ya + yb + yc + yd    
00558 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)    
00559 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)    
00560 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)    
00561 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)    
00562 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)    
00563 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)    
00564 *    
00565 */
00566 
00567 void arm_radix4_butterfly_inverse_q31(
00568   q31_t * pSrc,
00569   uint32_t fftLen,
00570   q31_t * pCoef,
00571   uint32_t twidCoefModifier)
00572 {
00573   uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
00574   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
00575   q31_t xa, xb, xc, xd;
00576   q31_t ya, yb, yc, yd;
00577   q31_t xa_out, xb_out, xc_out, xd_out;
00578   q31_t ya_out, yb_out, yc_out, yd_out;
00579 
00580   q31_t *ptr1;
00581   q63_t xaya, xbyb, xcyc, xdyd;
00582 
00583   /* input is be 1.31(q31) format for all FFT sizes */
00584   /* Total process is divided into three stages */
00585   /* process first stage, middle stages, & last stage */
00586 
00587   /* Start of first stage process */
00588 
00589   /* Initializations for the first stage */
00590   n2 = fftLen;
00591   n1 = n2;
00592   /* n2 = fftLen/4 */
00593   n2 >>= 2u;
00594   i0 = 0u;
00595   ia1 = 0u;
00596 
00597   j = n2;
00598 
00599   do
00600   {
00601 
00602     /* input is in 1.31(q31) format and provide 4 guard bits for the input */
00603 
00604     /*  index calculation for the input as, */
00605     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
00606     i1 = i0 + n2;
00607     i2 = i1 + n2;
00608     i3 = i2 + n2;
00609 
00610     /*  Butterfly implementation */
00611     /* xa + xc */
00612     r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
00613     /* xa - xc */
00614     r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
00615 
00616     /* xb + xd */
00617     t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
00618 
00619     /* ya + yc */
00620     s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
00621     /* ya - yc */
00622     s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
00623 
00624     /* xa' = xa + xb + xc + xd */
00625     pSrc[2u * i0] = (r1 + t1);
00626     /* (xa + xc) - (xb + xd) */
00627     r1 = r1 - t1;
00628     /* yb + yd */
00629     t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
00630     /* ya' = ya + yb + yc + yd */
00631     pSrc[(2u * i0) + 1u] = (s1 + t2);
00632 
00633     /* (ya + yc) - (yb + yd) */
00634     s1 = s1 - t2;
00635 
00636     /* yb - yd */
00637     t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
00638     /* xb - xd */
00639     t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
00640 
00641     /*  index calculation for the coefficients */
00642     ia2 = 2u * ia1;
00643     co2 = pCoef[ia2 * 2u];
00644     si2 = pCoef[(ia2 * 2u) + 1u];
00645 
00646     /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
00647     pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
00648                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
00649 
00650     /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
00651     pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
00652                           ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
00653 
00654     /* (xa - xc) - (yb - yd) */
00655     r1 = r2 - t1;
00656     /* (xa - xc) + (yb - yd) */
00657     r2 = r2 + t1;
00658 
00659     /* (ya - yc) + (xb - xd) */
00660     s1 = s2 + t2;
00661     /* (ya - yc) - (xb - xd) */
00662     s2 = s2 - t2;
00663 
00664     co1 = pCoef[ia1 * 2u];
00665     si1 = pCoef[(ia1 * 2u) + 1u];
00666 
00667     /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
00668     pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
00669                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
00670 
00671     /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
00672     pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
00673                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
00674 
00675     /*  index calculation for the coefficients */
00676     ia3 = 3u * ia1;
00677     co3 = pCoef[ia3 * 2u];
00678     si3 = pCoef[(ia3 * 2u) + 1u];
00679 
00680     /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
00681     pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
00682                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
00683 
00684     /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
00685     pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
00686                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
00687 
00688     /*  Twiddle coefficients index modifier */
00689     ia1 = ia1 + twidCoefModifier;
00690 
00691     /*  Updating input index */
00692     i0 = i0 + 1u;
00693 
00694   } while(--j);
00695 
00696   /* data is in 5.27(q27) format */
00697   /* each stage provides two down scaling of the input */
00698 
00699 
00700   /* Start of Middle stages process */
00701 
00702   twidCoefModifier <<= 2u;
00703 
00704   /*  Calculation of second stage to excluding last stage */
00705   for (k = fftLen / 4u; k > 4u; k >>= 2u)
00706   {
00707     /*  Initializations for the first stage */
00708     n1 = n2;
00709     n2 >>= 2u;
00710     ia1 = 0u;
00711 
00712     for (j = 0; j <= (n2 - 1u); j++)
00713     {
00714       /*  index calculation for the coefficients */
00715       ia2 = ia1 + ia1;
00716       ia3 = ia2 + ia1;
00717       co1 = pCoef[ia1 * 2u];
00718       si1 = pCoef[(ia1 * 2u) + 1u];
00719       co2 = pCoef[ia2 * 2u];
00720       si2 = pCoef[(ia2 * 2u) + 1u];
00721       co3 = pCoef[ia3 * 2u];
00722       si3 = pCoef[(ia3 * 2u) + 1u];
00723       /*  Twiddle coefficients index modifier */
00724       ia1 = ia1 + twidCoefModifier;
00725 
00726       for (i0 = j; i0 < fftLen; i0 += n1)
00727       {
00728         /*  index calculation for the input as, */
00729         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
00730         i1 = i0 + n2;
00731         i2 = i1 + n2;
00732         i3 = i2 + n2;
00733 
00734         /*  Butterfly implementation */
00735         /* xa + xc */
00736         r1 = pSrc[2u * i0] + pSrc[2u * i2];
00737         /* xa - xc */
00738         r2 = pSrc[2u * i0] - pSrc[2u * i2];
00739 
00740         /* ya + yc */
00741         s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
00742         /* ya - yc */
00743         s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
00744 
00745         /* xb + xd */
00746         t1 = pSrc[2u * i1] + pSrc[2u * i3];
00747 
00748         /* xa' = xa + xb + xc + xd */
00749         pSrc[2u * i0] = (r1 + t1) >> 2u;
00750         /* xa + xc -(xb + xd) */
00751         r1 = r1 - t1;
00752         /* yb + yd */
00753         t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
00754         /* ya' = ya + yb + yc + yd */
00755         pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
00756 
00757         /* (ya + yc) - (yb + yd) */
00758         s1 = s1 - t2;
00759 
00760         /* (yb - yd) */
00761         t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
00762         /* (xb - xd) */
00763         t2 = pSrc[2u * i1] - pSrc[2u * i3];
00764 
00765         /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
00766         pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
00767                          ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
00768 
00769         /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
00770         pSrc[(2u * i1) + 1u] =
00771           (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
00772            ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
00773 
00774         /* (xa - xc) - (yb - yd) */
00775         r1 = r2 - t1;
00776         /* (xa - xc) + (yb - yd) */
00777         r2 = r2 + t1;
00778 
00779         /* (ya - yc) +  (xb - xd) */
00780         s1 = s2 + t2;
00781         /* (ya - yc) -  (xb - xd) */
00782         s2 = s2 - t2;
00783 
00784         /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
00785         pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
00786                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
00787 
00788         /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
00789         pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
00790                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
00791 
00792         /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
00793         pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
00794                            ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
00795 
00796         /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
00797         pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
00798                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
00799       }
00800     }
00801     twidCoefModifier <<= 2u;
00802   }
00803 
00804   /* End of Middle stages process */
00805 
00806   /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
00807   /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
00808   /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
00809   /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
00810 
00811 
00812   /* Start of last stage process */
00813 
00814 
00815   /*  Initializations for the last stage */
00816   j = fftLen >> 2;
00817   ptr1 = &pSrc[0];
00818 
00819   /*  Calculations of last stage */
00820   do
00821   {
00822 #ifndef ARM_MATH_BIG_ENDIAN
00823     /* Read xa (real), ya(imag) input */
00824     xaya = *__SIMD64(ptr1)++;
00825     xa = (q31_t) xaya;
00826     ya = (q31_t) (xaya >> 32);
00827 
00828     /* Read xb (real), yb(imag) input */
00829     xbyb = *__SIMD64(ptr1)++;
00830     xb = (q31_t) xbyb;
00831     yb = (q31_t) (xbyb >> 32);
00832 
00833     /* Read xc (real), yc(imag) input */
00834     xcyc = *__SIMD64(ptr1)++;
00835     xc = (q31_t) xcyc;
00836     yc = (q31_t) (xcyc >> 32);
00837 
00838     /* Read xc (real), yc(imag) input */
00839     xdyd = *__SIMD64(ptr1)++;
00840     xd = (q31_t) xdyd;
00841     yd = (q31_t) (xdyd >> 32);
00842 
00843 #else
00844 
00845     /* Read xa (real), ya(imag) input */
00846     xaya = *__SIMD64(ptr1)++;
00847     ya = (q31_t) xaya;
00848     xa = (q31_t) (xaya >> 32);
00849 
00850     /* Read xb (real), yb(imag) input */
00851     xbyb = *__SIMD64(ptr1)++;
00852     yb = (q31_t) xbyb;
00853     xb = (q31_t) (xbyb >> 32);
00854 
00855     /* Read xc (real), yc(imag) input */
00856     xcyc = *__SIMD64(ptr1)++;
00857     yc = (q31_t) xcyc;
00858     xc = (q31_t) (xcyc >> 32);
00859 
00860     /* Read xc (real), yc(imag) input */
00861     xdyd = *__SIMD64(ptr1)++;
00862     yd = (q31_t) xdyd;
00863     xd = (q31_t) (xdyd >> 32);
00864 
00865 
00866 #endif
00867 
00868     /* xa' = xa + xb + xc + xd */
00869     xa_out = xa + xb + xc + xd;
00870 
00871     /* ya' = ya + yb + yc + yd */
00872     ya_out = ya + yb + yc + yd;
00873 
00874     /* pointer updation for writing */
00875     ptr1 = ptr1 - 8u;
00876 
00877     /* writing xa' and ya' */
00878     *ptr1++ = xa_out;
00879     *ptr1++ = ya_out;
00880 
00881     xc_out = (xa - xb + xc - xd);
00882     yc_out = (ya - yb + yc - yd);
00883 
00884     /* writing xc' and yc' */
00885     *ptr1++ = xc_out;
00886     *ptr1++ = yc_out;
00887 
00888     xb_out = (xa - yb - xc + yd);
00889     yb_out = (ya + xb - yc - xd);
00890 
00891     /* writing xb' and yb' */
00892     *ptr1++ = xb_out;
00893     *ptr1++ = yb_out;
00894 
00895     xd_out = (xa + yb - xc - yd);
00896     yd_out = (ya - xb - yc + xd);
00897 
00898     /* writing xd' and yd' */
00899     *ptr1++ = xd_out;
00900     *ptr1++ = yd_out;
00901 
00902 
00903   } while(--j);
00904 
00905   /* output is in 11.21(q21) format for the 1024 point */
00906   /* output is in 9.23(q23) format for the 256 point */
00907   /* output is in 7.25(q25) format for the 64 point */
00908   /* output is in 5.27(q27) format for the 16 point */
00909 
00910   /* End of last stage process */
00911 }