CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cfft_radix4_q15.c Source File

arm_cfft_radix4_q15.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        19. March 2015 
00005 * $Revision:    V.1.4.5  
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_cfft_radix4_q15.c    
00009 *    
00010 * Description:  This file has function definition of Radix-4 FFT & IFFT function and    
00011 *               In-place bit reversal using bit reversal table    
00012 *    
00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00014 *  
00015 * Redistribution and use in source and binary forms, with or without 
00016 * modification, are permitted provided that the following conditions
00017 * are met:
00018 *   - Redistributions of source code must retain the above copyright
00019 *     notice, this list of conditions and the following disclaimer.
00020 *   - Redistributions in binary form must reproduce the above copyright
00021 *     notice, this list of conditions and the following disclaimer in
00022 *     the documentation and/or other materials provided with the 
00023 *     distribution.
00024 *   - Neither the name of ARM LIMITED nor the names of its contributors
00025 *     may be used to endorse or promote products derived from this
00026 *     software without specific prior written permission.
00027 *
00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00039 * POSSIBILITY OF SUCH DAMAGE.     
00040 * -------------------------------------------------------------------- */
00041 
00042 #include "arm_math.h"
00043 
00044 
00045 void arm_radix4_butterfly_q15(
00046   q15_t * pSrc16,
00047   uint32_t fftLen,
00048   q15_t * pCoef16,
00049   uint32_t twidCoefModifier);
00050 
00051 void arm_radix4_butterfly_inverse_q15(
00052   q15_t * pSrc16,
00053   uint32_t fftLen,
00054   q15_t * pCoef16,
00055   uint32_t twidCoefModifier);
00056 
00057 void arm_bitreversal_q15(
00058   q15_t * pSrc,
00059   uint32_t fftLen,
00060   uint16_t bitRevFactor,
00061   uint16_t * pBitRevTab);
00062 
00063 /**    
00064  * @ingroup groupTransforms    
00065  */
00066 
00067 /**    
00068  * @addtogroup ComplexFFT    
00069  * @{    
00070  */
00071 
00072 
00073 /**    
00074  * @details    
00075  * @brief Processing function for the Q15 CFFT/CIFFT.   
00076  * @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q15 and will be removed
00077  * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.   
00078  * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.   
00079  * @return none.   
00080  *     
00081  * \par Input and output formats:    
00082  * \par    
00083  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.   
00084  * Hence the output format is different for different FFT sizes.    
00085  * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:   
00086  * \par   
00087  * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"    
00088  * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"    
00089  */
00090 
00091 void arm_cfft_radix4_q15(
00092   const arm_cfft_radix4_instance_q15 * S,
00093   q15_t * pSrc)
00094 {
00095   if(S->ifftFlag == 1u)
00096   {
00097     /*  Complex IFFT radix-4  */
00098     arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle,
00099                                      S->twidCoefModifier);
00100   }
00101   else
00102   {
00103     /*  Complex FFT radix-4  */
00104     arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle,
00105                              S->twidCoefModifier);
00106   }
00107 
00108   if(S->bitReverseFlag == 1u)
00109   {
00110     /*  Bit Reversal */
00111     arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
00112   }
00113 
00114 }
00115 
00116 /**    
00117  * @} end of ComplexFFT group    
00118  */
00119 
00120 /*    
00121 * Radix-4 FFT algorithm used is :    
00122 *    
00123 * Input real and imaginary data:    
00124 * x(n) = xa + j * ya    
00125 * x(n+N/4 ) = xb + j * yb    
00126 * x(n+N/2 ) = xc + j * yc    
00127 * x(n+3N 4) = xd + j * yd    
00128 *    
00129 *    
00130 * Output real and imaginary data:    
00131 * x(4r) = xa'+ j * ya'    
00132 * x(4r+1) = xb'+ j * yb'    
00133 * x(4r+2) = xc'+ j * yc'    
00134 * x(4r+3) = xd'+ j * yd'    
00135 *    
00136 *    
00137 * Twiddle factors for radix-4 FFT:    
00138 * Wn = co1 + j * (- si1)    
00139 * W2n = co2 + j * (- si2)    
00140 * W3n = co3 + j * (- si3)    
00141     
00142 * The real and imaginary output values for the radix-4 butterfly are    
00143 * xa' = xa + xb + xc + xd    
00144 * ya' = ya + yb + yc + yd    
00145 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)    
00146 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)    
00147 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)    
00148 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)    
00149 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)    
00150 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)    
00151 *    
00152 */
00153 
00154 /**    
00155  * @brief  Core function for the Q15 CFFT butterfly process.   
00156  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.   
00157  * @param[in]      fftLen           length of the FFT.   
00158  * @param[in]      *pCoef16         points to twiddle coefficient buffer.   
00159  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
00160  * @return none.   
00161  */
00162 
00163 void arm_radix4_butterfly_q15(
00164   q15_t * pSrc16,
00165   uint32_t fftLen,
00166   q15_t * pCoef16,
00167   uint32_t twidCoefModifier)
00168 {
00169 
00170 #ifndef ARM_MATH_CM0_FAMILY
00171 
00172   /* Run the below code for Cortex-M4 and Cortex-M3 */
00173 
00174   q31_t R, S, T, U;
00175   q31_t C1, C2, C3, out1, out2;
00176   uint32_t n1, n2, ic, i0, j, k;
00177 
00178   q15_t *ptr1;
00179   q15_t *pSi0;
00180   q15_t *pSi1;
00181   q15_t *pSi2;
00182   q15_t *pSi3;
00183 
00184   q31_t xaya, xbyb, xcyc, xdyd;
00185 
00186   /* Total process is divided into three stages */
00187 
00188   /* process first stage, middle stages, & last stage */
00189 
00190   /*  Initializations for the first stage */
00191   n2 = fftLen;
00192   n1 = n2;
00193 
00194   /* n2 = fftLen/4 */
00195   n2 >>= 2u;
00196 
00197   /* Index for twiddle coefficient */
00198   ic = 0u;
00199 
00200   /* Index for input read and output write */
00201   j = n2;
00202   
00203   pSi0 = pSrc16;
00204   pSi1 = pSi0 + 2 * n2;
00205   pSi2 = pSi1 + 2 * n2;
00206   pSi3 = pSi2 + 2 * n2;
00207 
00208   /* Input is in 1.15(q15) format */
00209 
00210   /*  start of first stage process */
00211   do
00212   {
00213     /*  Butterfly implementation */
00214 
00215     /*  Reading i0, i0+fftLen/2 inputs */
00216     /* Read ya (real), xa(imag) input */
00217     T = _SIMD32_OFFSET(pSi0);
00218     T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
00219     T = __SHADD16(T, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles
00220     //in = ((int16_t) (T & 0xFFFF)) >> 2;       // alternative code that takes 3 cycles
00221     //T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00222 
00223     /* Read yc (real), xc(imag) input */
00224     S = _SIMD32_OFFSET(pSi2);
00225     S = __SHADD16(S, 0);
00226     S = __SHADD16(S, 0);
00227 
00228     /* R = packed((ya + yc), (xa + xc) ) */
00229     R = __QADD16(T, S);
00230 
00231     /* S = packed((ya - yc), (xa - xc) ) */
00232     S = __QSUB16(T, S);
00233 
00234     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00235     /* Read yb (real), xb(imag) input */
00236     T = _SIMD32_OFFSET(pSi1);
00237     T = __SHADD16(T, 0);
00238     T = __SHADD16(T, 0);
00239 
00240     /* Read yd (real), xd(imag) input */
00241     U = _SIMD32_OFFSET(pSi3);
00242     U = __SHADD16(U, 0);
00243     U = __SHADD16(U, 0);
00244 
00245     /* T = packed((yb + yd), (xb + xd) ) */
00246     T = __QADD16(T, U);
00247 
00248     /*  writing the butterfly processed i0 sample */
00249     /* xa' = xa + xb + xc + xd */
00250     /* ya' = ya + yb + yc + yd */
00251     _SIMD32_OFFSET(pSi0) = __SHADD16(R, T);
00252     pSi0 += 2;
00253 
00254     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
00255     R = __QSUB16(R, T);
00256 
00257     /* co2 & si2 are read from SIMD Coefficient pointer */
00258     C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
00259 
00260 #ifndef ARM_MATH_BIG_ENDIAN
00261 
00262     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00263     out1 = __SMUAD(C2, R) >> 16u;
00264     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00265     out2 = __SMUSDX(C2, R);
00266 
00267 #else
00268 
00269     /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00270     out1 = __SMUSDX(R, C2) >> 16u;
00271     /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00272     out2 = __SMUAD(C2, R);
00273 
00274 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00275 
00276     /*  Reading i0+fftLen/4 */
00277     /* T = packed(yb, xb) */
00278     T = _SIMD32_OFFSET(pSi1);
00279     T = __SHADD16(T, 0);
00280     T = __SHADD16(T, 0);
00281 
00282     /* writing the butterfly processed i0 + fftLen/4 sample */
00283     /* writing output(xc', yc') in little endian format */
00284     _SIMD32_OFFSET(pSi1) =
00285       (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00286     pSi1 += 2;
00287 
00288     /*  Butterfly calculations */
00289     /* U = packed(yd, xd) */
00290     U = _SIMD32_OFFSET(pSi3);
00291     U = __SHADD16(U, 0);
00292     U = __SHADD16(U, 0);
00293 
00294     /* T = packed(yb-yd, xb-xd) */
00295     T = __QSUB16(T, U);
00296 
00297 #ifndef ARM_MATH_BIG_ENDIAN
00298 
00299     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00300     R = __QASX(S, T);
00301     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00302     S = __QSAX(S, T);
00303 
00304 #else
00305 
00306     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00307     R = __QSAX(S, T);
00308     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00309     S = __QASX(S, T);
00310 
00311 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00312 
00313     /* co1 & si1 are read from SIMD Coefficient pointer */
00314     C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
00315     /*  Butterfly process for the i0+fftLen/2 sample */
00316 
00317 #ifndef ARM_MATH_BIG_ENDIAN
00318 
00319     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00320     out1 = __SMUAD(C1, S) >> 16u;
00321     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00322     out2 = __SMUSDX(C1, S);
00323 
00324 #else
00325 
00326     /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00327     out1 = __SMUSDX(S, C1) >> 16u;
00328     /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00329     out2 = __SMUAD(C1, S);
00330 
00331 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00332 
00333     /* writing output(xb', yb') in little endian format */
00334     _SIMD32_OFFSET(pSi2) =
00335       ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
00336     pSi2 += 2;
00337 
00338 
00339     /* co3 & si3 are read from SIMD Coefficient pointer */
00340     C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
00341     /*  Butterfly process for the i0+3fftLen/4 sample */
00342 
00343 #ifndef ARM_MATH_BIG_ENDIAN
00344 
00345     /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00346     out1 = __SMUAD(C3, R) >> 16u;
00347     /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00348     out2 = __SMUSDX(C3, R);
00349 
00350 #else
00351 
00352     /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00353     out1 = __SMUSDX(R, C3) >> 16u;
00354     /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00355     out2 = __SMUAD(C3, R);
00356 
00357 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00358 
00359     /* writing output(xd', yd') in little endian format */
00360     _SIMD32_OFFSET(pSi3) =
00361       ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00362     pSi3 += 2;
00363 
00364     /*  Twiddle coefficients index modifier */
00365     ic = ic + twidCoefModifier;
00366 
00367   } while(--j);
00368   /* data is in 4.11(q11) format */
00369 
00370   /* end of first stage process */
00371 
00372 
00373   /* start of middle stage process */
00374 
00375   /*  Twiddle coefficients index modifier */
00376   twidCoefModifier <<= 2u;
00377 
00378   /*  Calculation of Middle stage */
00379   for (k = fftLen / 4u; k > 4u; k >>= 2u)
00380   {
00381     /*  Initializations for the middle stage */
00382     n1 = n2;
00383     n2 >>= 2u;
00384     ic = 0u;
00385 
00386     for (j = 0u; j <= (n2 - 1u); j++)
00387     {
00388       /*  index calculation for the coefficients */
00389       C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
00390       C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
00391       C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
00392 
00393       /*  Twiddle coefficients index modifier */
00394       ic = ic + twidCoefModifier;
00395       
00396       pSi0 = pSrc16 + 2 * j;
00397       pSi1 = pSi0 + 2 * n2;
00398       pSi2 = pSi1 + 2 * n2;
00399       pSi3 = pSi2 + 2 * n2;
00400 
00401       /*  Butterfly implementation */
00402       for (i0 = j; i0 < fftLen; i0 += n1)
00403       {
00404         /*  Reading i0, i0+fftLen/2 inputs */
00405         /* Read ya (real), xa(imag) input */
00406         T = _SIMD32_OFFSET(pSi0);
00407 
00408         /* Read yc (real), xc(imag) input */
00409         S = _SIMD32_OFFSET(pSi2);
00410 
00411         /* R = packed( (ya + yc), (xa + xc)) */
00412         R = __QADD16(T, S);
00413 
00414         /* S = packed((ya - yc), (xa - xc)) */
00415         S = __QSUB16(T, S);
00416 
00417         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00418         /* Read yb (real), xb(imag) input */
00419         T = _SIMD32_OFFSET(pSi1);
00420 
00421         /* Read yd (real), xd(imag) input */
00422         U = _SIMD32_OFFSET(pSi3);
00423 
00424         /* T = packed( (yb + yd), (xb + xd)) */
00425         T = __QADD16(T, U);
00426 
00427         /*  writing the butterfly processed i0 sample */
00428 
00429         /* xa' = xa + xb + xc + xd */
00430         /* ya' = ya + yb + yc + yd */
00431         out1 = __SHADD16(R, T);
00432         out1 = __SHADD16(out1, 0);
00433         _SIMD32_OFFSET(pSi0) = out1;
00434         pSi0 += 2 * n1;
00435 
00436         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
00437         R = __SHSUB16(R, T);
00438 
00439 #ifndef ARM_MATH_BIG_ENDIAN
00440 
00441         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00442         out1 = __SMUAD(C2, R) >> 16u;
00443 
00444         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00445         out2 = __SMUSDX(C2, R);
00446 
00447 #else
00448 
00449         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00450         out1 = __SMUSDX(R, C2) >> 16u;
00451 
00452         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00453         out2 = __SMUAD(C2, R);
00454 
00455 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00456 
00457         /*  Reading i0+3fftLen/4 */
00458         /* Read yb (real), xb(imag) input */
00459         T = _SIMD32_OFFSET(pSi1);
00460 
00461         /*  writing the butterfly processed i0 + fftLen/4 sample */
00462         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00463         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00464         _SIMD32_OFFSET(pSi1) =
00465           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00466         pSi1 += 2 * n1;
00467 
00468         /*  Butterfly calculations */
00469 
00470         /* Read yd (real), xd(imag) input */
00471         U = _SIMD32_OFFSET(pSi3);
00472 
00473         /* T = packed(yb-yd, xb-xd) */
00474         T = __QSUB16(T, U);
00475 
00476 #ifndef ARM_MATH_BIG_ENDIAN
00477 
00478         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00479         R = __SHASX(S, T);
00480 
00481         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00482         S = __SHSAX(S, T);
00483 
00484 
00485         /*  Butterfly process for the i0+fftLen/2 sample */
00486         out1 = __SMUAD(C1, S) >> 16u;
00487         out2 = __SMUSDX(C1, S);
00488 
00489 #else
00490 
00491         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00492         R = __SHSAX(S, T);
00493 
00494         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00495         S = __SHASX(S, T);
00496 
00497 
00498         /*  Butterfly process for the i0+fftLen/2 sample */
00499         out1 = __SMUSDX(S, C1) >> 16u;
00500         out2 = __SMUAD(C1, S);
00501 
00502 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00503 
00504         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00505         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00506         _SIMD32_OFFSET(pSi2) =
00507           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00508         pSi2 += 2 * n1;
00509 
00510         /*  Butterfly process for the i0+3fftLen/4 sample */
00511 
00512 #ifndef ARM_MATH_BIG_ENDIAN
00513 
00514         out1 = __SMUAD(C3, R) >> 16u;
00515         out2 = __SMUSDX(C3, R);
00516 
00517 #else
00518 
00519         out1 = __SMUSDX(R, C3) >> 16u;
00520         out2 = __SMUAD(C3, R);
00521 
00522 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00523 
00524         /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00525         /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00526         _SIMD32_OFFSET(pSi3) =
00527           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00528         pSi3 += 2 * n1;
00529       }
00530     }
00531     /*  Twiddle coefficients index modifier */
00532     twidCoefModifier <<= 2u;
00533   }
00534   /* end of middle stage process */
00535 
00536 
00537   /* data is in 10.6(q6) format for the 1024 point */
00538   /* data is in 8.8(q8) format for the 256 point */
00539   /* data is in 6.10(q10) format for the 64 point */
00540   /* data is in 4.12(q12) format for the 16 point */
00541 
00542   /*  Initializations for the last stage */
00543   j = fftLen >> 2;
00544 
00545   ptr1 = &pSrc16[0];
00546 
00547   /* start of last stage process */
00548 
00549   /*  Butterfly implementation */
00550   do
00551   {
00552     /* Read xa (real), ya(imag) input */
00553     xaya = *__SIMD32(ptr1)++;
00554 
00555     /* Read xb (real), yb(imag) input */
00556     xbyb = *__SIMD32(ptr1)++;
00557 
00558     /* Read xc (real), yc(imag) input */
00559     xcyc = *__SIMD32(ptr1)++;
00560 
00561     /* Read xd (real), yd(imag) input */
00562     xdyd = *__SIMD32(ptr1)++;
00563 
00564     /* R = packed((ya + yc), (xa + xc)) */
00565     R = __QADD16(xaya, xcyc);
00566 
00567     /* T = packed((yb + yd), (xb + xd)) */
00568     T = __QADD16(xbyb, xdyd);
00569 
00570     /* pointer updation for writing */
00571     ptr1 = ptr1 - 8u;
00572 
00573 
00574     /* xa' = xa + xb + xc + xd */
00575     /* ya' = ya + yb + yc + yd */
00576     *__SIMD32(ptr1)++ = __SHADD16(R, T);
00577 
00578     /* T = packed((yb + yd), (xb + xd)) */
00579     T = __QADD16(xbyb, xdyd);
00580 
00581     /* xc' = (xa-xb+xc-xd) */
00582     /* yc' = (ya-yb+yc-yd) */
00583     *__SIMD32(ptr1)++ = __SHSUB16(R, T);
00584 
00585     /* S = packed((ya - yc), (xa - xc)) */
00586     S = __QSUB16(xaya, xcyc);
00587 
00588     /* Read yd (real), xd(imag) input */
00589     /* T = packed( (yb - yd), (xb - xd))  */
00590     U = __QSUB16(xbyb, xdyd);
00591 
00592 #ifndef ARM_MATH_BIG_ENDIAN
00593 
00594     /* xb' = (xa+yb-xc-yd) */
00595     /* yb' = (ya-xb-yc+xd) */
00596     *__SIMD32(ptr1)++ = __SHSAX(S, U);
00597 
00598 
00599     /* xd' = (xa-yb-xc+yd) */
00600     /* yd' = (ya+xb-yc-xd) */
00601     *__SIMD32(ptr1)++ = __SHASX(S, U);
00602 
00603 #else
00604 
00605     /* xb' = (xa+yb-xc-yd) */
00606     /* yb' = (ya-xb-yc+xd) */
00607     *__SIMD32(ptr1)++ = __SHASX(S, U);
00608 
00609 
00610     /* xd' = (xa-yb-xc+yd) */
00611     /* yd' = (ya+xb-yc-xd) */
00612     *__SIMD32(ptr1)++ = __SHSAX(S, U);
00613 
00614 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00615 
00616   } while(--j);
00617 
00618   /* end of last stage process */
00619 
00620   /* output is in 11.5(q5) format for the 1024 point */
00621   /* output is in 9.7(q7) format for the 256 point   */
00622   /* output is in 7.9(q9) format for the 64 point  */
00623   /* output is in 5.11(q11) format for the 16 point  */
00624 
00625 
00626 #else
00627 
00628   /* Run the below code for Cortex-M0 */
00629 
00630   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
00631   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
00632   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
00633 
00634   /* Total process is divided into three stages */
00635 
00636   /* process first stage, middle stages, & last stage */
00637 
00638   /*  Initializations for the first stage */
00639   n2 = fftLen;
00640   n1 = n2;
00641 
00642   /* n2 = fftLen/4 */
00643   n2 >>= 2u;
00644 
00645   /* Index for twiddle coefficient */
00646   ic = 0u;
00647 
00648   /* Index for input read and output write */
00649   i0 = 0u;
00650   j = n2;
00651 
00652   /* Input is in 1.15(q15) format */
00653 
00654   /*  start of first stage process */
00655   do
00656   {
00657     /*  Butterfly implementation */
00658 
00659     /*  index calculation for the input as, */
00660     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00661     i1 = i0 + n2;
00662     i2 = i1 + n2;
00663     i3 = i2 + n2;
00664 
00665     /*  Reading i0, i0+fftLen/2 inputs */
00666 
00667     /* input is down scale by 4 to avoid overflow */
00668     /* Read ya (real), xa(imag) input */
00669     T0 = pSrc16[i0 * 2u] >> 2u;
00670     T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
00671 
00672     /* input is down scale by 4 to avoid overflow */
00673     /* Read yc (real), xc(imag) input */
00674     S0 = pSrc16[i2 * 2u] >> 2u;
00675     S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
00676 
00677     /* R0 = (ya + yc) */
00678     R0 = __SSAT(T0 + S0, 16u);
00679     /* R1 = (xa + xc) */
00680     R1 = __SSAT(T1 + S1, 16u);
00681 
00682     /* S0 = (ya - yc) */
00683     S0 = __SSAT(T0 - S0, 16);
00684     /* S1 = (xa - xc) */
00685     S1 = __SSAT(T1 - S1, 16);
00686 
00687     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00688     /* input is down scale by 4 to avoid overflow */
00689     /* Read yb (real), xb(imag) input */
00690     T0 = pSrc16[i1 * 2u] >> 2u;
00691     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
00692 
00693     /* input is down scale by 4 to avoid overflow */
00694     /* Read yd (real), xd(imag) input */
00695     U0 = pSrc16[i3 * 2u] >> 2u;
00696     U1 = pSrc16[(i3 * 2u) + 1] >> 2u;
00697 
00698     /* T0 = (yb + yd) */
00699     T0 = __SSAT(T0 + U0, 16u);
00700     /* T1 = (xb + xd) */
00701     T1 = __SSAT(T1 + U1, 16u);
00702 
00703     /*  writing the butterfly processed i0 sample */
00704     /* ya' = ya + yb + yc + yd */
00705     /* xa' = xa + xb + xc + xd */
00706     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
00707     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
00708 
00709     /* R0 = (ya + yc) - (yb + yd) */
00710     /* R1 = (xa + xc) - (xb + xd) */
00711     R0 = __SSAT(R0 - T0, 16u);
00712     R1 = __SSAT(R1 - T1, 16u);
00713 
00714     /* co2 & si2 are read from Coefficient pointer */
00715     Co2 = pCoef16[2u * ic * 2u];
00716     Si2 = pCoef16[(2u * ic * 2u) + 1];
00717 
00718     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00719     out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u);
00720     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00721     out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u);
00722 
00723     /*  Reading i0+fftLen/4 */
00724     /* input is down scale by 4 to avoid overflow */
00725     /* T0 = yb, T1 =  xb */
00726     T0 = pSrc16[i1 * 2u] >> 2;
00727     T1 = pSrc16[(i1 * 2u) + 1] >> 2;
00728 
00729     /* writing the butterfly processed i0 + fftLen/4 sample */
00730     /* writing output(xc', yc') in little endian format */
00731     pSrc16[i1 * 2u] = out1;
00732     pSrc16[(i1 * 2u) + 1] = out2;
00733 
00734     /*  Butterfly calculations */
00735     /* input is down scale by 4 to avoid overflow */
00736     /* U0 = yd, U1 = xd */
00737     U0 = pSrc16[i3 * 2u] >> 2;
00738     U1 = pSrc16[(i3 * 2u) + 1] >> 2;
00739     /* T0 = yb-yd */
00740     T0 = __SSAT(T0 - U0, 16);
00741     /* T1 = xb-xd */
00742     T1 = __SSAT(T1 - U1, 16);
00743 
00744     /* R1 = (ya-yc) + (xb- xd),  R0 = (xa-xc) - (yb-yd)) */
00745     R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
00746     R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
00747 
00748     /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
00749     S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16u);
00750     S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16u);
00751 
00752     /* co1 & si1 are read from Coefficient pointer */
00753     Co1 = pCoef16[ic * 2u];
00754     Si1 = pCoef16[(ic * 2u) + 1];
00755     /*  Butterfly process for the i0+fftLen/2 sample */
00756     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00757     out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
00758     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00759     out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
00760 
00761     /* writing output(xb', yb') in little endian format */
00762     pSrc16[i2 * 2u] = out1;
00763     pSrc16[(i2 * 2u) + 1] = out2;
00764 
00765     /* Co3 & si3 are read from Coefficient pointer */
00766     Co3 = pCoef16[3u * (ic * 2u)];
00767     Si3 = pCoef16[(3u * (ic * 2u)) + 1];
00768     /*  Butterfly process for the i0+3fftLen/4 sample */
00769     /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
00770     out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u);
00771     /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
00772     out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u);
00773     /* writing output(xd', yd') in little endian format */
00774     pSrc16[i3 * 2u] = out1;
00775     pSrc16[(i3 * 2u) + 1] = out2;
00776 
00777     /*  Twiddle coefficients index modifier */
00778     ic = ic + twidCoefModifier;
00779 
00780     /*  Updating input index */
00781     i0 = i0 + 1u;
00782 
00783   } while(--j);
00784   /* data is in 4.11(q11) format */
00785 
00786   /* end of first stage process */
00787 
00788 
00789   /* start of middle stage process */
00790 
00791   /*  Twiddle coefficients index modifier */
00792   twidCoefModifier <<= 2u;
00793 
00794   /*  Calculation of Middle stage */
00795   for (k = fftLen / 4u; k > 4u; k >>= 2u)
00796   {
00797     /*  Initializations for the middle stage */
00798     n1 = n2;
00799     n2 >>= 2u;
00800     ic = 0u;
00801 
00802     for (j = 0u; j <= (n2 - 1u); j++)
00803     {
00804       /*  index calculation for the coefficients */
00805       Co1 = pCoef16[ic * 2u];
00806       Si1 = pCoef16[(ic * 2u) + 1u];
00807       Co2 = pCoef16[2u * (ic * 2u)];
00808       Si2 = pCoef16[(2u * (ic * 2u)) + 1u];
00809       Co3 = pCoef16[3u * (ic * 2u)];
00810       Si3 = pCoef16[(3u * (ic * 2u)) + 1u];
00811 
00812       /*  Twiddle coefficients index modifier */
00813       ic = ic + twidCoefModifier;
00814 
00815       /*  Butterfly implementation */
00816       for (i0 = j; i0 < fftLen; i0 += n1)
00817       {
00818         /*  index calculation for the input as, */
00819         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00820         i1 = i0 + n2;
00821         i2 = i1 + n2;
00822         i3 = i2 + n2;
00823 
00824         /*  Reading i0, i0+fftLen/2 inputs */
00825         /* Read ya (real), xa(imag) input */
00826         T0 = pSrc16[i0 * 2u];
00827         T1 = pSrc16[(i0 * 2u) + 1u];
00828 
00829         /* Read yc (real), xc(imag) input */
00830         S0 = pSrc16[i2 * 2u];
00831         S1 = pSrc16[(i2 * 2u) + 1u];
00832 
00833         /* R0 = (ya + yc), R1 = (xa + xc) */
00834         R0 = __SSAT(T0 + S0, 16);
00835         R1 = __SSAT(T1 + S1, 16);
00836 
00837         /* S0 = (ya - yc), S1 =(xa - xc) */
00838         S0 = __SSAT(T0 - S0, 16);
00839         S1 = __SSAT(T1 - S1, 16);
00840 
00841         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00842         /* Read yb (real), xb(imag) input */
00843         T0 = pSrc16[i1 * 2u];
00844         T1 = pSrc16[(i1 * 2u) + 1u];
00845 
00846         /* Read yd (real), xd(imag) input */
00847         U0 = pSrc16[i3 * 2u];
00848         U1 = pSrc16[(i3 * 2u) + 1u];
00849 
00850 
00851         /* T0 = (yb + yd), T1 = (xb + xd) */
00852         T0 = __SSAT(T0 + U0, 16);
00853         T1 = __SSAT(T1 + U1, 16);
00854 
00855         /*  writing the butterfly processed i0 sample */
00856 
00857         /* xa' = xa + xb + xc + xd */
00858         /* ya' = ya + yb + yc + yd */
00859         out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
00860         out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
00861 
00862         pSrc16[i0 * 2u] = out1;
00863         pSrc16[(2u * i0) + 1u] = out2;
00864 
00865         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
00866         R0 = (R0 >> 1u) - (T0 >> 1u);
00867         R1 = (R1 >> 1u) - (T1 >> 1u);
00868 
00869         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00870         out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u);
00871 
00872         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00873         out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u);
00874 
00875         /*  Reading i0+3fftLen/4 */
00876         /* Read yb (real), xb(imag) input */
00877         T0 = pSrc16[i1 * 2u];
00878         T1 = pSrc16[(i1 * 2u) + 1u];
00879 
00880         /*  writing the butterfly processed i0 + fftLen/4 sample */
00881         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00882         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00883         pSrc16[i1 * 2u] = out1;
00884         pSrc16[(i1 * 2u) + 1u] = out2;
00885 
00886         /*  Butterfly calculations */
00887 
00888         /* Read yd (real), xd(imag) input */
00889         U0 = pSrc16[i3 * 2u];
00890         U1 = pSrc16[(i3 * 2u) + 1u];
00891 
00892         /* T0 = yb-yd, T1 = xb-xd */
00893         T0 = __SSAT(T0 - U0, 16);
00894         T1 = __SSAT(T1 - U1, 16);
00895 
00896         /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
00897         R0 = (S0 >> 1u) - (T1 >> 1u);
00898         R1 = (S1 >> 1u) + (T0 >> 1u);
00899 
00900         /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
00901         S0 = (S0 >> 1u) + (T1 >> 1u);
00902         S1 = (S1 >> 1u) - (T0 >> 1u);
00903 
00904         /*  Butterfly process for the i0+fftLen/2 sample */
00905         out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16u);
00906 
00907         out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16u);
00908 
00909         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00910         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00911         pSrc16[i2 * 2u] = out1;
00912         pSrc16[(i2 * 2u) + 1u] = out2;
00913 
00914         /*  Butterfly process for the i0+3fftLen/4 sample */
00915         out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u);
00916 
00917         out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u);
00918         /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
00919         /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
00920         pSrc16[i3 * 2u] = out1;
00921         pSrc16[(i3 * 2u) + 1u] = out2;
00922       }
00923     }
00924     /*  Twiddle coefficients index modifier */
00925     twidCoefModifier <<= 2u;
00926   }
00927   /* end of middle stage process */
00928 
00929 
00930   /* data is in 10.6(q6) format for the 1024 point */
00931   /* data is in 8.8(q8) format for the 256 point */
00932   /* data is in 6.10(q10) format for the 64 point */
00933   /* data is in 4.12(q12) format for the 16 point */
00934 
00935   /*  Initializations for the last stage */
00936   n1 = n2;
00937   n2 >>= 2u;
00938 
00939   /* start of last stage process */
00940 
00941   /*  Butterfly implementation */
00942   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
00943   {
00944     /*  index calculation for the input as, */
00945     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00946     i1 = i0 + n2;
00947     i2 = i1 + n2;
00948     i3 = i2 + n2;
00949 
00950     /*  Reading i0, i0+fftLen/2 inputs */
00951     /* Read ya (real), xa(imag) input */
00952     T0 = pSrc16[i0 * 2u];
00953     T1 = pSrc16[(i0 * 2u) + 1u];
00954 
00955     /* Read yc (real), xc(imag) input */
00956     S0 = pSrc16[i2 * 2u];
00957     S1 = pSrc16[(i2 * 2u) + 1u];
00958 
00959     /* R0 = (ya + yc), R1 = (xa + xc) */
00960     R0 = __SSAT(T0 + S0, 16u);
00961     R1 = __SSAT(T1 + S1, 16u);
00962 
00963     /* S0 = (ya - yc), S1 = (xa - xc) */
00964     S0 = __SSAT(T0 - S0, 16u);
00965     S1 = __SSAT(T1 - S1, 16u);
00966 
00967     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00968     /* Read yb (real), xb(imag) input */
00969     T0 = pSrc16[i1 * 2u];
00970     T1 = pSrc16[(i1 * 2u) + 1u];
00971     /* Read yd (real), xd(imag) input */
00972     U0 = pSrc16[i3 * 2u];
00973     U1 = pSrc16[(i3 * 2u) + 1u];
00974 
00975     /* T0 = (yb + yd), T1 = (xb + xd)) */
00976     T0 = __SSAT(T0 + U0, 16u);
00977     T1 = __SSAT(T1 + U1, 16u);
00978 
00979     /*  writing the butterfly processed i0 sample */
00980     /* xa' = xa + xb + xc + xd */
00981     /* ya' = ya + yb + yc + yd */
00982     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
00983     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
00984 
00985     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
00986     R0 = (R0 >> 1u) - (T0 >> 1u);
00987     R1 = (R1 >> 1u) - (T1 >> 1u);
00988     /* Read yb (real), xb(imag) input */
00989     T0 = pSrc16[i1 * 2u];
00990     T1 = pSrc16[(i1 * 2u) + 1u];
00991 
00992     /*  writing the butterfly processed i0 + fftLen/4 sample */
00993     /* xc' = (xa-xb+xc-xd) */
00994     /* yc' = (ya-yb+yc-yd) */
00995     pSrc16[i1 * 2u] = R0;
00996     pSrc16[(i1 * 2u) + 1u] = R1;
00997 
00998     /* Read yd (real), xd(imag) input */
00999     U0 = pSrc16[i3 * 2u];
01000     U1 = pSrc16[(i3 * 2u) + 1u];
01001     /* T0 = (yb - yd), T1 = (xb - xd)  */
01002     T0 = __SSAT(T0 - U0, 16u);
01003     T1 = __SSAT(T1 - U1, 16u);
01004 
01005     /*  writing the butterfly processed i0 + fftLen/2 sample */
01006     /* xb' = (xa+yb-xc-yd) */
01007     /* yb' = (ya-xb-yc+xd) */
01008     pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u);
01009     pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
01010 
01011     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
01012     /* xd' = (xa-yb-xc+yd) */
01013     /* yd' = (ya+xb-yc-xd) */
01014     pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u);
01015     pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
01016 
01017   }
01018 
01019   /* end of last stage process */
01020 
01021   /* output is in 11.5(q5) format for the 1024 point */
01022   /* output is in 9.7(q7) format for the 256 point   */
01023   /* output is in 7.9(q9) format for the 64 point  */
01024   /* output is in 5.11(q11) format for the 16 point  */
01025 
01026 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
01027 
01028 }
01029 
01030 
01031 /**    
01032  * @brief  Core function for the Q15 CIFFT butterfly process.   
01033  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.   
01034  * @param[in]      fftLen           length of the FFT.   
01035  * @param[in]      *pCoef16         points to twiddle coefficient buffer.   
01036  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
01037  * @return none.   
01038  */
01039 
01040 /*    
01041 * Radix-4 IFFT algorithm used is :    
01042 *    
01043 * CIFFT uses same twiddle coefficients as CFFT function    
01044 *  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]    
01045 *    
01046 *    
01047 * IFFT is implemented with following changes in equations from FFT    
01048 *    
01049 * Input real and imaginary data:    
01050 * x(n) = xa + j * ya    
01051 * x(n+N/4 ) = xb + j * yb    
01052 * x(n+N/2 ) = xc + j * yc    
01053 * x(n+3N 4) = xd + j * yd    
01054 *    
01055 *    
01056 * Output real and imaginary data:    
01057 * x(4r) = xa'+ j * ya'    
01058 * x(4r+1) = xb'+ j * yb'    
01059 * x(4r+2) = xc'+ j * yc'    
01060 * x(4r+3) = xd'+ j * yd'    
01061 *    
01062 *    
01063 * Twiddle factors for radix-4 IFFT:    
01064 * Wn = co1 + j * (si1)    
01065 * W2n = co2 + j * (si2)    
01066 * W3n = co3 + j * (si3)    
01067     
01068 * The real and imaginary output values for the radix-4 butterfly are    
01069 * xa' = xa + xb + xc + xd    
01070 * ya' = ya + yb + yc + yd    
01071 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)    
01072 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)    
01073 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)    
01074 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)    
01075 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)    
01076 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)    
01077 *    
01078 */
01079 
01080 void arm_radix4_butterfly_inverse_q15(
01081   q15_t * pSrc16,
01082   uint32_t fftLen,
01083   q15_t * pCoef16,
01084   uint32_t twidCoefModifier)
01085 {
01086 
01087 #ifndef ARM_MATH_CM0_FAMILY
01088 
01089   /* Run the below code for Cortex-M4 and Cortex-M3 */
01090 
01091   q31_t R, S, T, U;
01092   q31_t C1, C2, C3, out1, out2;
01093   uint32_t n1, n2, ic, i0, j, k;
01094 
01095   q15_t *ptr1;
01096   q15_t *pSi0;
01097   q15_t *pSi1;
01098   q15_t *pSi2;
01099   q15_t *pSi3;
01100 
01101   q31_t xaya, xbyb, xcyc, xdyd;
01102 
01103   /* Total process is divided into three stages */
01104 
01105   /* process first stage, middle stages, & last stage */
01106 
01107   /*  Initializations for the first stage */
01108   n2 = fftLen;
01109   n1 = n2;
01110 
01111   /* n2 = fftLen/4 */
01112   n2 >>= 2u;
01113 
01114   /* Index for twiddle coefficient */
01115   ic = 0u;
01116 
01117   /* Index for input read and output write */
01118   j = n2;
01119   
01120   pSi0 = pSrc16;
01121   pSi1 = pSi0 + 2 * n2;
01122   pSi2 = pSi1 + 2 * n2;
01123   pSi3 = pSi2 + 2 * n2;
01124 
01125   /* Input is in 1.15(q15) format */
01126 
01127   /*  start of first stage process */
01128   do
01129   {
01130     /*  Butterfly implementation */
01131 
01132     /*  Reading i0, i0+fftLen/2 inputs */
01133     /* Read ya (real), xa(imag) input */
01134     T = _SIMD32_OFFSET(pSi0);
01135     T = __SHADD16(T, 0);
01136     T = __SHADD16(T, 0);
01137 
01138     /* Read yc (real), xc(imag) input */
01139     S = _SIMD32_OFFSET(pSi2);
01140     S = __SHADD16(S, 0);
01141     S = __SHADD16(S, 0);
01142 
01143     /* R = packed((ya + yc), (xa + xc) ) */
01144     R = __QADD16(T, S);
01145 
01146     /* S = packed((ya - yc), (xa - xc) ) */
01147     S = __QSUB16(T, S);
01148 
01149     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01150     /* Read yb (real), xb(imag) input */
01151     T = _SIMD32_OFFSET(pSi1);
01152     T = __SHADD16(T, 0);
01153     T = __SHADD16(T, 0);
01154 
01155     /* Read yd (real), xd(imag) input */
01156     U = _SIMD32_OFFSET(pSi3);
01157     U = __SHADD16(U, 0);
01158     U = __SHADD16(U, 0);
01159 
01160     /* T = packed((yb + yd), (xb + xd) ) */
01161     T = __QADD16(T, U);
01162 
01163     /*  writing the butterfly processed i0 sample */
01164     /* xa' = xa + xb + xc + xd */
01165     /* ya' = ya + yb + yc + yd */
01166     _SIMD32_OFFSET(pSi0) = __SHADD16(R, T);
01167     pSi0 += 2;
01168 
01169     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
01170     R = __QSUB16(R, T);
01171 
01172     /* co2 & si2 are read from SIMD Coefficient pointer */
01173     C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
01174 
01175 #ifndef ARM_MATH_BIG_ENDIAN
01176 
01177     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01178     out1 = __SMUSD(C2, R) >> 16u;
01179     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01180     out2 = __SMUADX(C2, R);
01181 
01182 #else
01183 
01184     /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01185     out1 = __SMUADX(C2, R) >> 16u;
01186     /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01187     out2 = __SMUSD(__QSUB16(0, C2), R);
01188 
01189 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01190 
01191     /*  Reading i0+fftLen/4 */
01192     /* T = packed(yb, xb) */
01193     T = _SIMD32_OFFSET(pSi1);
01194     T = __SHADD16(T, 0);
01195     T = __SHADD16(T, 0);
01196 
01197     /* writing the butterfly processed i0 + fftLen/4 sample */
01198     /* writing output(xc', yc') in little endian format */
01199     _SIMD32_OFFSET(pSi1) =
01200       (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01201     pSi1 += 2;
01202 
01203     /*  Butterfly calculations */
01204     /* U = packed(yd, xd) */
01205     U = _SIMD32_OFFSET(pSi3);
01206     U = __SHADD16(U, 0);
01207     U = __SHADD16(U, 0);
01208 
01209     /* T = packed(yb-yd, xb-xd) */
01210     T = __QSUB16(T, U);
01211 
01212 #ifndef ARM_MATH_BIG_ENDIAN
01213 
01214     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01215     R = __QSAX(S, T);
01216     /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */
01217     S = __QASX(S, T);
01218 
01219 #else
01220 
01221     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01222     R = __QASX(S, T);
01223     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01224     S = __QSAX(S, T);
01225 
01226 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01227 
01228     /* co1 & si1 are read from SIMD Coefficient pointer */
01229     C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
01230     /*  Butterfly process for the i0+fftLen/2 sample */
01231 
01232 #ifndef ARM_MATH_BIG_ENDIAN
01233 
01234     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01235     out1 = __SMUSD(C1, S) >> 16u;
01236     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01237     out2 = __SMUADX(C1, S);
01238 
01239 #else
01240 
01241     /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01242     out1 = __SMUADX(C1, S) >> 16u;
01243     /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01244     out2 = __SMUSD(__QSUB16(0, C1), S);
01245 
01246 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01247 
01248     /* writing output(xb', yb') in little endian format */
01249     _SIMD32_OFFSET(pSi2) =
01250       ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
01251     pSi2 += 2;
01252 
01253 
01254     /* co3 & si3 are read from SIMD Coefficient pointer */
01255     C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
01256     /*  Butterfly process for the i0+3fftLen/4 sample */
01257 
01258 #ifndef ARM_MATH_BIG_ENDIAN
01259 
01260     /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01261     out1 = __SMUSD(C3, R) >> 16u;
01262     /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01263     out2 = __SMUADX(C3, R);
01264 
01265 #else
01266 
01267     /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01268     out1 = __SMUADX(C3, R) >> 16u;
01269     /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01270     out2 = __SMUSD(__QSUB16(0, C3), R);
01271 
01272 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01273 
01274     /* writing output(xd', yd') in little endian format */
01275     _SIMD32_OFFSET(pSi3) =
01276       ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01277     pSi3 += 2;
01278 
01279     /*  Twiddle coefficients index modifier */
01280     ic = ic + twidCoefModifier;
01281 
01282   } while(--j);
01283   /* data is in 4.11(q11) format */
01284 
01285   /* end of first stage process */
01286 
01287 
01288   /* start of middle stage process */
01289 
01290   /*  Twiddle coefficients index modifier */
01291   twidCoefModifier <<= 2u;
01292 
01293   /*  Calculation of Middle stage */
01294   for (k = fftLen / 4u; k > 4u; k >>= 2u)
01295   {
01296     /*  Initializations for the middle stage */
01297     n1 = n2;
01298     n2 >>= 2u;
01299     ic = 0u;
01300 
01301     for (j = 0u; j <= (n2 - 1u); j++)
01302     {
01303       /*  index calculation for the coefficients */
01304       C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
01305       C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
01306       C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
01307 
01308       /*  Twiddle coefficients index modifier */
01309       ic = ic + twidCoefModifier;
01310       
01311       pSi0 = pSrc16 + 2 * j;
01312       pSi1 = pSi0 + 2 * n2;
01313       pSi2 = pSi1 + 2 * n2;
01314       pSi3 = pSi2 + 2 * n2;
01315 
01316       /*  Butterfly implementation */
01317       for (i0 = j; i0 < fftLen; i0 += n1)
01318       {
01319         /*  Reading i0, i0+fftLen/2 inputs */
01320         /* Read ya (real), xa(imag) input */
01321         T = _SIMD32_OFFSET(pSi0);
01322 
01323         /* Read yc (real), xc(imag) input */
01324         S = _SIMD32_OFFSET(pSi2);
01325 
01326         /* R = packed( (ya + yc), (xa + xc)) */
01327         R = __QADD16(T, S);
01328 
01329         /* S = packed((ya - yc), (xa - xc)) */
01330         S = __QSUB16(T, S);
01331 
01332         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01333         /* Read yb (real), xb(imag) input */
01334         T = _SIMD32_OFFSET(pSi1);
01335 
01336         /* Read yd (real), xd(imag) input */
01337         U = _SIMD32_OFFSET(pSi3);
01338 
01339         /* T = packed( (yb + yd), (xb + xd)) */
01340         T = __QADD16(T, U);
01341 
01342         /*  writing the butterfly processed i0 sample */
01343 
01344         /* xa' = xa + xb + xc + xd */
01345         /* ya' = ya + yb + yc + yd */
01346         out1 = __SHADD16(R, T);
01347         out1 = __SHADD16(out1, 0);
01348         _SIMD32_OFFSET(pSi0) = out1;
01349         pSi0 += 2 * n1;
01350 
01351         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
01352         R = __SHSUB16(R, T);
01353 
01354 #ifndef ARM_MATH_BIG_ENDIAN
01355 
01356         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
01357         out1 = __SMUSD(C2, R) >> 16u;
01358 
01359         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01360         out2 = __SMUADX(C2, R);
01361 
01362 #else
01363 
01364         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01365         out1 = __SMUADX(R, C2) >> 16u;
01366 
01367         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
01368         out2 = __SMUSD(__QSUB16(0, C2), R);
01369 
01370 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01371 
01372         /*  Reading i0+3fftLen/4 */
01373         /* Read yb (real), xb(imag) input */
01374         T = _SIMD32_OFFSET(pSi1);
01375 
01376         /*  writing the butterfly processed i0 + fftLen/4 sample */
01377         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01378         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01379         _SIMD32_OFFSET(pSi1) =
01380           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01381         pSi1 += 2 * n1;
01382 
01383         /*  Butterfly calculations */
01384 
01385         /* Read yd (real), xd(imag) input */
01386         U = _SIMD32_OFFSET(pSi3);
01387 
01388         /* T = packed(yb-yd, xb-xd) */
01389         T = __QSUB16(T, U);
01390 
01391 #ifndef ARM_MATH_BIG_ENDIAN
01392 
01393         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01394         R = __SHSAX(S, T);
01395 
01396         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01397         S = __SHASX(S, T);
01398 
01399 
01400         /*  Butterfly process for the i0+fftLen/2 sample */
01401         out1 = __SMUSD(C1, S) >> 16u;
01402         out2 = __SMUADX(C1, S);
01403 
01404 #else
01405 
01406         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01407         R = __SHASX(S, T);
01408 
01409         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01410         S = __SHSAX(S, T);
01411 
01412 
01413         /*  Butterfly process for the i0+fftLen/2 sample */
01414         out1 = __SMUADX(S, C1) >> 16u;
01415         out2 = __SMUSD(__QSUB16(0, C1), S);
01416 
01417 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01418 
01419         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01420         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01421         _SIMD32_OFFSET(pSi2) =
01422           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01423         pSi2 += 2 * n1;
01424 
01425         /*  Butterfly process for the i0+3fftLen/4 sample */
01426 
01427 #ifndef ARM_MATH_BIG_ENDIAN
01428 
01429         out1 = __SMUSD(C3, R) >> 16u;
01430         out2 = __SMUADX(C3, R);
01431 
01432 #else
01433 
01434         out1 = __SMUADX(C3, R) >> 16u;
01435         out2 = __SMUSD(__QSUB16(0, C3), R);
01436 
01437 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01438 
01439         /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01440         /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01441         _SIMD32_OFFSET(pSi3) =
01442           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01443         pSi3 += 2 * n1;
01444       }
01445     }
01446     /*  Twiddle coefficients index modifier */
01447     twidCoefModifier <<= 2u;
01448   }
01449   /* end of middle stage process */
01450 
01451   /* data is in 10.6(q6) format for the 1024 point */
01452   /* data is in 8.8(q8) format for the 256 point */
01453   /* data is in 6.10(q10) format for the 64 point */
01454   /* data is in 4.12(q12) format for the 16 point */
01455 
01456   /*  Initializations for the last stage */
01457   j = fftLen >> 2;
01458 
01459   ptr1 = &pSrc16[0];
01460 
01461   /* start of last stage process */
01462 
01463   /*  Butterfly implementation */
01464   do
01465   {
01466     /* Read xa (real), ya(imag) input */
01467     xaya = *__SIMD32(ptr1)++;
01468 
01469     /* Read xb (real), yb(imag) input */
01470     xbyb = *__SIMD32(ptr1)++;
01471 
01472     /* Read xc (real), yc(imag) input */
01473     xcyc = *__SIMD32(ptr1)++;
01474 
01475     /* Read xd (real), yd(imag) input */
01476     xdyd = *__SIMD32(ptr1)++;
01477 
01478     /* R = packed((ya + yc), (xa + xc)) */
01479     R = __QADD16(xaya, xcyc);
01480 
01481     /* T = packed((yb + yd), (xb + xd)) */
01482     T = __QADD16(xbyb, xdyd);
01483 
01484     /* pointer updation for writing */
01485     ptr1 = ptr1 - 8u;
01486 
01487 
01488     /* xa' = xa + xb + xc + xd */
01489     /* ya' = ya + yb + yc + yd */
01490     *__SIMD32(ptr1)++ = __SHADD16(R, T);
01491 
01492     /* T = packed((yb + yd), (xb + xd)) */
01493     T = __QADD16(xbyb, xdyd);
01494 
01495     /* xc' = (xa-xb+xc-xd) */
01496     /* yc' = (ya-yb+yc-yd) */
01497     *__SIMD32(ptr1)++ = __SHSUB16(R, T);
01498 
01499     /* S = packed((ya - yc), (xa - xc)) */
01500     S = __QSUB16(xaya, xcyc);
01501 
01502     /* Read yd (real), xd(imag) input */
01503     /* T = packed( (yb - yd), (xb - xd))  */
01504     U = __QSUB16(xbyb, xdyd);
01505 
01506 #ifndef ARM_MATH_BIG_ENDIAN
01507 
01508     /* xb' = (xa+yb-xc-yd) */
01509     /* yb' = (ya-xb-yc+xd) */
01510     *__SIMD32(ptr1)++ = __SHASX(S, U);
01511 
01512 
01513     /* xd' = (xa-yb-xc+yd) */
01514     /* yd' = (ya+xb-yc-xd) */
01515     *__SIMD32(ptr1)++ = __SHSAX(S, U);
01516 
01517 #else
01518 
01519     /* xb' = (xa+yb-xc-yd) */
01520     /* yb' = (ya-xb-yc+xd) */
01521     *__SIMD32(ptr1)++ = __SHSAX(S, U);
01522 
01523 
01524     /* xd' = (xa-yb-xc+yd) */
01525     /* yd' = (ya+xb-yc-xd) */
01526     *__SIMD32(ptr1)++ = __SHASX(S, U);
01527 
01528 
01529 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01530 
01531   } while(--j);
01532 
01533   /* end of last stage  process */
01534 
01535   /* output is in 11.5(q5) format for the 1024 point */
01536   /* output is in 9.7(q7) format for the 256 point   */
01537   /* output is in 7.9(q9) format for the 64 point  */
01538   /* output is in 5.11(q11) format for the 16 point  */
01539 
01540 
01541 #else
01542 
01543   /* Run the below code for Cortex-M0 */
01544 
01545   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
01546   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
01547   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
01548 
01549   /* Total process is divided into three stages */
01550 
01551   /* process first stage, middle stages, & last stage */
01552 
01553   /*  Initializations for the first stage */
01554   n2 = fftLen;
01555   n1 = n2;
01556 
01557   /* n2 = fftLen/4 */
01558   n2 >>= 2u;
01559 
01560   /* Index for twiddle coefficient */
01561   ic = 0u;
01562 
01563   /* Index for input read and output write */
01564   i0 = 0u;
01565 
01566   j = n2;
01567 
01568   /* Input is in 1.15(q15) format */
01569 
01570   /*  Start of first stage process */
01571   do
01572   {
01573     /*  Butterfly implementation */
01574 
01575     /*  index calculation for the input as, */
01576     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01577     i1 = i0 + n2;
01578     i2 = i1 + n2;
01579     i3 = i2 + n2;
01580 
01581     /*  Reading i0, i0+fftLen/2 inputs */
01582     /* input is down scale by 4 to avoid overflow */
01583     /* Read ya (real), xa(imag) input */
01584     T0 = pSrc16[i0 * 2u] >> 2u;
01585     T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
01586     /* input is down scale by 4 to avoid overflow */
01587     /* Read yc (real), xc(imag) input */
01588     S0 = pSrc16[i2 * 2u] >> 2u;
01589     S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
01590 
01591     /* R0 = (ya + yc), R1 = (xa + xc) */
01592     R0 = __SSAT(T0 + S0, 16u);
01593     R1 = __SSAT(T1 + S1, 16u);
01594     /* S0 = (ya - yc), S1 = (xa - xc) */
01595     S0 = __SSAT(T0 - S0, 16u);
01596     S1 = __SSAT(T1 - S1, 16u);
01597 
01598     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01599     /* input is down scale by 4 to avoid overflow */
01600     /* Read yb (real), xb(imag) input */
01601     T0 = pSrc16[i1 * 2u] >> 2u;
01602     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
01603     /* Read yd (real), xd(imag) input */
01604     /* input is down scale by 4 to avoid overflow */
01605     U0 = pSrc16[i3 * 2u] >> 2u;
01606     U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
01607 
01608     /* T0 = (yb + yd), T1 = (xb + xd) */
01609     T0 = __SSAT(T0 + U0, 16u);
01610     T1 = __SSAT(T1 + U1, 16u);
01611 
01612     /*  writing the butterfly processed i0 sample */
01613     /* xa' = xa + xb + xc + xd */
01614     /* ya' = ya + yb + yc + yd */
01615     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
01616     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
01617 
01618     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
01619     R0 = __SSAT(R0 - T0, 16u);
01620     R1 = __SSAT(R1 - T1, 16u);
01621     /* co2 & si2 are read from Coefficient pointer */
01622     Co2 = pCoef16[2u * ic * 2u];
01623     Si2 = pCoef16[(2u * ic * 2u) + 1u];
01624     /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
01625     out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16u);
01626     /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01627     out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16u);
01628 
01629     /*  Reading i0+fftLen/4 */
01630     /* input is down scale by 4 to avoid overflow */
01631     /* T0 = yb, T1 = xb */
01632     T0 = pSrc16[i1 * 2u] >> 2u;
01633     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
01634 
01635     /* writing the butterfly processed i0 + fftLen/4 sample */
01636     /* writing output(xc', yc') in little endian format */
01637     pSrc16[i1 * 2u] = out1;
01638     pSrc16[(i1 * 2u) + 1u] = out2;
01639 
01640     /*  Butterfly calculations */
01641     /* input is down scale by 4 to avoid overflow */
01642     /* U0 = yd, U1 = xd) */
01643     U0 = pSrc16[i3 * 2u] >> 2u;
01644     U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
01645 
01646     /* T0 = yb-yd, T1 = xb-xd) */
01647     T0 = __SSAT(T0 - U0, 16u);
01648     T1 = __SSAT(T1 - U1, 16u);
01649     /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
01650     R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
01651     R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
01652     /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
01653     S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
01654     S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
01655 
01656     /* co1 & si1 are read from Coefficient pointer */
01657     Co1 = pCoef16[ic * 2u];
01658     Si1 = pCoef16[(ic * 2u) + 1u];
01659     /*  Butterfly process for the i0+fftLen/2 sample */
01660     /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
01661     out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u);
01662     /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
01663     out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u);
01664     /* writing output(xb', yb') in little endian format */
01665     pSrc16[i2 * 2u] = out1;
01666     pSrc16[(i2 * 2u) + 1u] = out2;
01667 
01668     /* Co3 & si3 are read from Coefficient pointer */
01669     Co3 = pCoef16[3u * ic * 2u];
01670     Si3 = pCoef16[(3u * ic * 2u) + 1u];
01671     /*  Butterfly process for the i0+3fftLen/4 sample */
01672     /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
01673     out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u);
01674     /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
01675     out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u);
01676     /* writing output(xd', yd') in little endian format */
01677     pSrc16[i3 * 2u] = out1;
01678     pSrc16[(i3 * 2u) + 1u] = out2;
01679 
01680     /*  Twiddle coefficients index modifier */
01681     ic = ic + twidCoefModifier;
01682 
01683     /*  Updating input index */
01684     i0 = i0 + 1u;
01685 
01686   } while(--j);
01687 
01688   /*  End of first stage process */
01689 
01690   /* data is in 4.11(q11) format */
01691 
01692 
01693   /*  Start of Middle stage process */
01694 
01695   /*  Twiddle coefficients index modifier */
01696   twidCoefModifier <<= 2u;
01697 
01698   /*  Calculation of Middle stage */
01699   for (k = fftLen / 4u; k > 4u; k >>= 2u)
01700   {
01701     /*  Initializations for the middle stage */
01702     n1 = n2;
01703     n2 >>= 2u;
01704     ic = 0u;
01705 
01706     for (j = 0u; j <= (n2 - 1u); j++)
01707     {
01708       /*  index calculation for the coefficients */
01709       Co1 = pCoef16[ic * 2u];
01710       Si1 = pCoef16[(ic * 2u) + 1u];
01711       Co2 = pCoef16[2u * ic * 2u];
01712       Si2 = pCoef16[2u * ic * 2u + 1u];
01713       Co3 = pCoef16[3u * ic * 2u];
01714       Si3 = pCoef16[(3u * ic * 2u) + 1u];
01715 
01716       /*  Twiddle coefficients index modifier */
01717       ic = ic + twidCoefModifier;
01718 
01719       /*  Butterfly implementation */
01720       for (i0 = j; i0 < fftLen; i0 += n1)
01721       {
01722         /*  index calculation for the input as, */
01723         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01724         i1 = i0 + n2;
01725         i2 = i1 + n2;
01726         i3 = i2 + n2;
01727 
01728         /*  Reading i0, i0+fftLen/2 inputs */
01729         /* Read ya (real), xa(imag) input */
01730         T0 = pSrc16[i0 * 2u];
01731         T1 = pSrc16[(i0 * 2u) + 1u];
01732 
01733         /* Read yc (real), xc(imag) input */
01734         S0 = pSrc16[i2 * 2u];
01735         S1 = pSrc16[(i2 * 2u) + 1u];
01736 
01737 
01738         /* R0 = (ya + yc), R1 = (xa + xc) */
01739         R0 = __SSAT(T0 + S0, 16u);
01740         R1 = __SSAT(T1 + S1, 16u);
01741         /* S0 = (ya - yc), S1 = (xa - xc) */
01742         S0 = __SSAT(T0 - S0, 16u);
01743         S1 = __SSAT(T1 - S1, 16u);
01744 
01745         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01746         /* Read yb (real), xb(imag) input */
01747         T0 = pSrc16[i1 * 2u];
01748         T1 = pSrc16[(i1 * 2u) + 1u];
01749 
01750         /* Read yd (real), xd(imag) input */
01751         U0 = pSrc16[i3 * 2u];
01752         U1 = pSrc16[(i3 * 2u) + 1u];
01753 
01754         /* T0 = (yb + yd), T1 = (xb + xd) */
01755         T0 = __SSAT(T0 + U0, 16u);
01756         T1 = __SSAT(T1 + U1, 16u);
01757 
01758         /*  writing the butterfly processed i0 sample */
01759         /* xa' = xa + xb + xc + xd */
01760         /* ya' = ya + yb + yc + yd */
01761         pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
01762         pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
01763 
01764         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
01765         R0 = (R0 >> 1u) - (T0 >> 1u);
01766         R1 = (R1 >> 1u) - (T1 >> 1u);
01767 
01768         /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
01769         out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
01770         /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01771         out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
01772 
01773         /*  Reading i0+3fftLen/4 */
01774         /* Read yb (real), xb(imag) input */
01775         T0 = pSrc16[i1 * 2u];
01776         T1 = pSrc16[(i1 * 2u) + 1u];
01777 
01778         /*  writing the butterfly processed i0 + fftLen/4 sample */
01779         /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
01780         /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01781         pSrc16[i1 * 2u] = out1;
01782         pSrc16[(i1 * 2u) + 1u] = out2;
01783 
01784         /*  Butterfly calculations */
01785         /* Read yd (real), xd(imag) input */
01786         U0 = pSrc16[i3 * 2u];
01787         U1 = pSrc16[(i3 * 2u) + 1u];
01788 
01789         /* T0 = yb-yd, T1 = xb-xd) */
01790         T0 = __SSAT(T0 - U0, 16u);
01791         T1 = __SSAT(T1 - U1, 16u);
01792 
01793         /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
01794         R0 = (S0 >> 1u) + (T1 >> 1u);
01795         R1 = (S1 >> 1u) - (T0 >> 1u);
01796 
01797         /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
01798         S0 = (S0 >> 1u) - (T1 >> 1u);
01799         S1 = (S1 >> 1u) + (T0 >> 1u);
01800 
01801         /*  Butterfly process for the i0+fftLen/2 sample */
01802         out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u);
01803         out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u);
01804         /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
01805         /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
01806         pSrc16[i2 * 2u] = out1;
01807         pSrc16[(i2 * 2u) + 1u] = out2;
01808 
01809         /*  Butterfly process for the i0+3fftLen/4 sample */
01810         out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u);
01811 
01812         out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u);
01813         /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
01814         /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
01815         pSrc16[i3 * 2u] = out1;
01816         pSrc16[(i3 * 2u) + 1u] = out2;
01817 
01818 
01819       }
01820     }
01821     /*  Twiddle coefficients index modifier */
01822     twidCoefModifier <<= 2u;
01823   }
01824   /*  End of Middle stages process */
01825 
01826 
01827   /* data is in 10.6(q6) format for the 1024 point */
01828   /* data is in 8.8(q8) format for the 256 point   */
01829   /* data is in 6.10(q10) format for the 64 point  */
01830   /* data is in 4.12(q12) format for the 16 point  */
01831 
01832   /* start of last stage process */
01833 
01834 
01835   /*  Initializations for the last stage */
01836   n1 = n2;
01837   n2 >>= 2u;
01838 
01839   /*  Butterfly implementation */
01840   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
01841   {
01842     /*  index calculation for the input as, */
01843     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01844     i1 = i0 + n2;
01845     i2 = i1 + n2;
01846     i3 = i2 + n2;
01847 
01848     /*  Reading i0, i0+fftLen/2 inputs */
01849     /* Read ya (real), xa(imag) input */
01850     T0 = pSrc16[i0 * 2u];
01851     T1 = pSrc16[(i0 * 2u) + 1u];
01852     /* Read yc (real), xc(imag) input */
01853     S0 = pSrc16[i2 * 2u];
01854     S1 = pSrc16[(i2 * 2u) + 1u];
01855 
01856     /* R0 = (ya + yc), R1 = (xa + xc) */
01857     R0 = __SSAT(T0 + S0, 16u);
01858     R1 = __SSAT(T1 + S1, 16u);
01859     /* S0 = (ya - yc), S1 = (xa - xc) */
01860     S0 = __SSAT(T0 - S0, 16u);
01861     S1 = __SSAT(T1 - S1, 16u);
01862 
01863     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01864     /* Read yb (real), xb(imag) input */
01865     T0 = pSrc16[i1 * 2u];
01866     T1 = pSrc16[(i1 * 2u) + 1u];
01867     /* Read yd (real), xd(imag) input */
01868     U0 = pSrc16[i3 * 2u];
01869     U1 = pSrc16[(i3 * 2u) + 1u];
01870 
01871     /* T0 = (yb + yd), T1 = (xb + xd) */
01872     T0 = __SSAT(T0 + U0, 16u);
01873     T1 = __SSAT(T1 + U1, 16u);
01874 
01875     /*  writing the butterfly processed i0 sample */
01876     /* xa' = xa + xb + xc + xd */
01877     /* ya' = ya + yb + yc + yd */
01878     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
01879     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
01880 
01881     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
01882     R0 = (R0 >> 1u) - (T0 >> 1u);
01883     R1 = (R1 >> 1u) - (T1 >> 1u);
01884 
01885     /* Read yb (real), xb(imag) input */
01886     T0 = pSrc16[i1 * 2u];
01887     T1 = pSrc16[(i1 * 2u) + 1u];
01888 
01889     /*  writing the butterfly processed i0 + fftLen/4 sample */
01890     /* xc' = (xa-xb+xc-xd) */
01891     /* yc' = (ya-yb+yc-yd) */
01892     pSrc16[i1 * 2u] = R0;
01893     pSrc16[(i1 * 2u) + 1u] = R1;
01894 
01895     /* Read yd (real), xd(imag) input */
01896     U0 = pSrc16[i3 * 2u];
01897     U1 = pSrc16[(i3 * 2u) + 1u];
01898     /* T0 = (yb - yd), T1 = (xb - xd) */
01899     T0 = __SSAT(T0 - U0, 16u);
01900     T1 = __SSAT(T1 - U1, 16u);
01901 
01902     /*  writing the butterfly processed i0 + fftLen/2 sample */
01903     /* xb' = (xa-yb-xc+yd) */
01904     /* yb' = (ya+xb-yc-xd) */
01905     pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u);
01906     pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
01907 
01908 
01909     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
01910     /* xd' = (xa+yb-xc-yd) */
01911     /* yd' = (ya-xb-yc+xd) */
01912     pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u);
01913     pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
01914   }
01915   /* end of last stage  process */
01916 
01917   /* output is in 11.5(q5) format for the 1024 point */
01918   /* output is in 9.7(q7) format for the 256 point   */
01919   /* output is in 7.9(q9) format for the 64 point  */
01920   /* output is in 5.11(q11) format for the 16 point  */
01921 
01922 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
01923 
01924 }