V4.0.1 of the ARM CMSIS DSP libraries. Note that arm_bitreversal2.s, arm_cfft_f32.c and arm_rfft_fast_f32.c had to be removed. arm_bitreversal2.s will not assemble with the online tools. So, the fast f32 FFT functions are not yet available. All the other FFT functions are available.

Dependents:   MPU9150_Example fir_f32 fir_f32 MPU9150_nucleo_noni2cdev ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cfft_radix4_q15.c Source File

arm_cfft_radix4_q15.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        12. March 2014  
00005 * $Revision:    V1.4.3  
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_cfft_radix4_q15.c    
00009 *    
00010 * Description:  This file has function definition of Radix-4 FFT & IFFT function and    
00011 *               In-place bit reversal using bit reversal table    
00012 *    
00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00014 *  
00015 * Redistribution and use in source and binary forms, with or without 
00016 * modification, are permitted provided that the following conditions
00017 * are met:
00018 *   - Redistributions of source code must retain the above copyright
00019 *     notice, this list of conditions and the following disclaimer.
00020 *   - Redistributions in binary form must reproduce the above copyright
00021 *     notice, this list of conditions and the following disclaimer in
00022 *     the documentation and/or other materials provided with the 
00023 *     distribution.
00024 *   - Neither the name of ARM LIMITED nor the names of its contributors
00025 *     may be used to endorse or promote products derived from this
00026 *     software without specific prior written permission.
00027 *
00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00039 * POSSIBILITY OF SUCH DAMAGE.     
00040 * -------------------------------------------------------------------- */
00041 
00042 #include "arm_math.h"
00043 
00044 
00045 void arm_radix4_butterfly_q15(
00046   q15_t * pSrc16,
00047   uint32_t fftLen,
00048   q15_t * pCoef16,
00049   uint32_t twidCoefModifier);
00050 
00051 void arm_radix4_butterfly_inverse_q15(
00052   q15_t * pSrc16,
00053   uint32_t fftLen,
00054   q15_t * pCoef16,
00055   uint32_t twidCoefModifier);
00056 
00057 void arm_bitreversal_q15(
00058   q15_t * pSrc,
00059   uint32_t fftLen,
00060   uint16_t bitRevFactor,
00061   uint16_t * pBitRevTab);
00062 
00063 /**    
00064  * @ingroup groupTransforms    
00065  */
00066 
00067 /**    
00068  * @addtogroup ComplexFFT    
00069  * @{    
00070  */
00071 
00072 
00073 /**    
00074  * @details    
00075  * @brief Processing function for the Q15 CFFT/CIFFT.   
00076  * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.   
00077  * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.   
00078  * @return none.   
00079  *     
00080  * \par Input and output formats:    
00081  * \par    
00082  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.   
00083  * Hence the output format is different for different FFT sizes.    
00084  * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:   
00085  * \par   
00086  * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"    
00087  * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"    
00088  */
00089 
00090 void arm_cfft_radix4_q15(
00091   const arm_cfft_radix4_instance_q15 * S,
00092   q15_t * pSrc)
00093 {
00094   if(S->ifftFlag == 1u)
00095   {
00096     /*  Complex IFFT radix-4  */
00097     arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle,
00098                                      S->twidCoefModifier);
00099   }
00100   else
00101   {
00102     /*  Complex FFT radix-4  */
00103     arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle,
00104                              S->twidCoefModifier);
00105   }
00106 
00107   if(S->bitReverseFlag == 1u)
00108   {
00109     /*  Bit Reversal */
00110     arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
00111   }
00112 
00113 }
00114 
00115 /**    
00116  * @} end of ComplexFFT group    
00117  */
00118 
00119 /*    
00120 * Radix-4 FFT algorithm used is :    
00121 *    
00122 * Input real and imaginary data:    
00123 * x(n) = xa + j * ya    
00124 * x(n+N/4 ) = xb + j * yb    
00125 * x(n+N/2 ) = xc + j * yc    
00126 * x(n+3N 4) = xd + j * yd    
00127 *    
00128 *    
00129 * Output real and imaginary data:    
00130 * x(4r) = xa'+ j * ya'    
00131 * x(4r+1) = xb'+ j * yb'    
00132 * x(4r+2) = xc'+ j * yc'    
00133 * x(4r+3) = xd'+ j * yd'    
00134 *    
00135 *    
00136 * Twiddle factors for radix-4 FFT:    
00137 * Wn = co1 + j * (- si1)    
00138 * W2n = co2 + j * (- si2)    
00139 * W3n = co3 + j * (- si3)    
00140     
00141 * The real and imaginary output values for the radix-4 butterfly are    
00142 * xa' = xa + xb + xc + xd    
00143 * ya' = ya + yb + yc + yd    
00144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)    
00145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)    
00146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)    
00147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)    
00148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)    
00149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)    
00150 *    
00151 */
00152 
00153 /**    
00154  * @brief  Core function for the Q15 CFFT butterfly process.   
00155  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.   
00156  * @param[in]      fftLen           length of the FFT.   
00157  * @param[in]      *pCoef16         points to twiddle coefficient buffer.   
00158  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
00159  * @return none.   
00160  */
00161 
00162 void arm_radix4_butterfly_q15(
00163   q15_t * pSrc16,
00164   uint32_t fftLen,
00165   q15_t * pCoef16,
00166   uint32_t twidCoefModifier)
00167 {
00168 
00169 #ifndef ARM_MATH_CM0_FAMILY
00170 
00171   /* Run the below code for Cortex-M4 and Cortex-M3 */
00172 
00173   q31_t R, S, T, U;
00174   q31_t C1, C2, C3, out1, out2;
00175   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
00176   q15_t in;
00177 
00178   q15_t *ptr1;
00179 
00180 
00181 
00182   q31_t xaya, xbyb, xcyc, xdyd;
00183 
00184   /* Total process is divided into three stages */
00185 
00186   /* process first stage, middle stages, & last stage */
00187 
00188   /*  Initializations for the first stage */
00189   n2 = fftLen;
00190   n1 = n2;
00191 
00192   /* n2 = fftLen/4 */
00193   n2 >>= 2u;
00194 
00195   /* Index for twiddle coefficient */
00196   ic = 0u;
00197 
00198   /* Index for input read and output write */
00199   i0 = 0u;
00200   j = n2;
00201 
00202   /* Input is in 1.15(q15) format */
00203 
00204   /*  start of first stage process */
00205   do
00206   {
00207     /*  Butterfly implementation */
00208 
00209     /*  index calculation for the input as, */
00210     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00211     i1 = i0 + n2;
00212     i2 = i1 + n2;
00213     i3 = i2 + n2;
00214 
00215     /*  Reading i0, i0+fftLen/2 inputs */
00216     /* Read ya (real), xa(imag) input */
00217     T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
00218     in = ((int16_t) (T & 0xFFFF)) >> 2;
00219     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00220 
00221     /* Read yc (real), xc(imag) input */
00222     S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
00223     in = ((int16_t) (S & 0xFFFF)) >> 2;
00224     S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00225 
00226     /* R = packed((ya + yc), (xa + xc) ) */
00227     R = __QADD16(T, S);
00228 
00229     /* S = packed((ya - yc), (xa - xc) ) */
00230     S = __QSUB16(T, S);
00231 
00232     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00233     /* Read yb (real), xb(imag) input */
00234     T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
00235     in = ((int16_t) (T & 0xFFFF)) >> 2;
00236     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00237 
00238     /* Read yd (real), xd(imag) input */
00239     U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
00240     in = ((int16_t) (U & 0xFFFF)) >> 2;
00241     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00242 
00243     /* T = packed((yb + yd), (xb + xd) ) */
00244     T = __QADD16(T, U);
00245 
00246     /*  writing the butterfly processed i0 sample */
00247     /* xa' = xa + xb + xc + xd */
00248     /* ya' = ya + yb + yc + yd */
00249     _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T);
00250 
00251     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
00252     R = __QSUB16(R, T);
00253 
00254     /* co2 & si2 are read from SIMD Coefficient pointer */
00255     C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
00256 
00257 #ifndef ARM_MATH_BIG_ENDIAN
00258 
00259     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00260     out1 = __SMUAD(C2, R) >> 16u;
00261     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00262     out2 = __SMUSDX(C2, R);
00263 
00264 #else
00265 
00266     /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00267     out1 = __SMUSDX(R, C2) >> 16u;
00268     /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00269     out2 = __SMUAD(C2, R);
00270 
00271 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00272 
00273     /*  Reading i0+fftLen/4 */
00274     /* T = packed(yb, xb) */
00275     T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
00276     in = ((int16_t) (T & 0xFFFF)) >> 2;
00277     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00278 
00279     /* writing the butterfly processed i0 + fftLen/4 sample */
00280     /* writing output(xc', yc') in little endian format */
00281     _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
00282       (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00283 
00284     /*  Butterfly calculations */
00285     /* U = packed(yd, xd) */
00286     U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
00287     in = ((int16_t) (U & 0xFFFF)) >> 2;
00288     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00289 
00290     /* T = packed(yb-yd, xb-xd) */
00291     T = __QSUB16(T, U);
00292 
00293 #ifndef ARM_MATH_BIG_ENDIAN
00294 
00295     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00296     R = __QASX(S, T);
00297     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00298     S = __QSAX(S, T);
00299 
00300 #else
00301 
00302     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00303     R = __QSAX(S, T);
00304     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00305     S = __QASX(S, T);
00306 
00307 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00308 
00309     /* co1 & si1 are read from SIMD Coefficient pointer */
00310     C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
00311     /*  Butterfly process for the i0+fftLen/2 sample */
00312 
00313 #ifndef ARM_MATH_BIG_ENDIAN
00314 
00315     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00316     out1 = __SMUAD(C1, S) >> 16u;
00317     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00318     out2 = __SMUSDX(C1, S);
00319 
00320 #else
00321 
00322     /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00323     out1 = __SMUSDX(S, C1) >> 16u;
00324     /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00325     out2 = __SMUAD(C1, S);
00326 
00327 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00328 
00329     /* writing output(xb', yb') in little endian format */
00330     _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
00331       ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
00332 
00333 
00334     /* co3 & si3 are read from SIMD Coefficient pointer */
00335     C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
00336     /*  Butterfly process for the i0+3fftLen/4 sample */
00337 
00338 #ifndef ARM_MATH_BIG_ENDIAN
00339 
00340     /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00341     out1 = __SMUAD(C3, R) >> 16u;
00342     /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00343     out2 = __SMUSDX(C3, R);
00344 
00345 #else
00346 
00347     /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00348     out1 = __SMUSDX(R, C3) >> 16u;
00349     /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00350     out2 = __SMUAD(C3, R);
00351 
00352 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00353 
00354     /* writing output(xd', yd') in little endian format */
00355     _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
00356       ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00357 
00358     /*  Twiddle coefficients index modifier */
00359     ic = ic + twidCoefModifier;
00360 
00361     /*  Updating input index */
00362     i0 = i0 + 1u;
00363 
00364   } while(--j);
00365   /* data is in 4.11(q11) format */
00366 
00367   /* end of first stage process */
00368 
00369 
00370   /* start of middle stage process */
00371 
00372   /*  Twiddle coefficients index modifier */
00373   twidCoefModifier <<= 2u;
00374 
00375   /*  Calculation of Middle stage */
00376   for (k = fftLen / 4u; k > 4u; k >>= 2u)
00377   {
00378     /*  Initializations for the middle stage */
00379     n1 = n2;
00380     n2 >>= 2u;
00381     ic = 0u;
00382 
00383     for (j = 0u; j <= (n2 - 1u); j++)
00384     {
00385       /*  index calculation for the coefficients */
00386       C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
00387       C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
00388       C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
00389 
00390       /*  Twiddle coefficients index modifier */
00391       ic = ic + twidCoefModifier;
00392 
00393       /*  Butterfly implementation */
00394       for (i0 = j; i0 < fftLen; i0 += n1)
00395       {
00396         /*  index calculation for the input as, */
00397         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00398         i1 = i0 + n2;
00399         i2 = i1 + n2;
00400         i3 = i2 + n2;
00401 
00402         /*  Reading i0, i0+fftLen/2 inputs */
00403         /* Read ya (real), xa(imag) input */
00404         T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
00405 
00406         /* Read yc (real), xc(imag) input */
00407         S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
00408 
00409         /* R = packed( (ya + yc), (xa + xc)) */
00410         R = __QADD16(T, S);
00411 
00412         /* S = packed((ya - yc), (xa - xc)) */
00413         S = __QSUB16(T, S);
00414 
00415         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00416         /* Read yb (real), xb(imag) input */
00417         T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
00418 
00419         /* Read yd (real), xd(imag) input */
00420         U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
00421 
00422         /* T = packed( (yb + yd), (xb + xd)) */
00423         T = __QADD16(T, U);
00424 
00425         /*  writing the butterfly processed i0 sample */
00426 
00427         /* xa' = xa + xb + xc + xd */
00428         /* ya' = ya + yb + yc + yd */
00429         out1 = __SHADD16(R, T);
00430         in = ((int16_t) (out1 & 0xFFFF)) >> 1;
00431         out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
00432         _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1;
00433 
00434         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
00435         R = __SHSUB16(R, T);
00436 
00437 #ifndef ARM_MATH_BIG_ENDIAN
00438 
00439         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00440         out1 = __SMUAD(C2, R) >> 16u;
00441 
00442         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00443         out2 = __SMUSDX(C2, R);
00444 
00445 #else
00446 
00447         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00448         out1 = __SMUSDX(R, C2) >> 16u;
00449 
00450         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00451         out2 = __SMUAD(C2, R);
00452 
00453 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00454 
00455         /*  Reading i0+3fftLen/4 */
00456         /* Read yb (real), xb(imag) input */
00457         T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
00458 
00459         /*  writing the butterfly processed i0 + fftLen/4 sample */
00460         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00461         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00462         _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
00463           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00464 
00465         /*  Butterfly calculations */
00466 
00467         /* Read yd (real), xd(imag) input */
00468         U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
00469 
00470         /* T = packed(yb-yd, xb-xd) */
00471         T = __QSUB16(T, U);
00472 
00473 #ifndef ARM_MATH_BIG_ENDIAN
00474 
00475         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00476         R = __SHASX(S, T);
00477 
00478         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00479         S = __SHSAX(S, T);
00480 
00481 
00482         /*  Butterfly process for the i0+fftLen/2 sample */
00483         out1 = __SMUAD(C1, S) >> 16u;
00484         out2 = __SMUSDX(C1, S);
00485 
00486 #else
00487 
00488         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00489         R = __SHSAX(S, T);
00490 
00491         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00492         S = __SHASX(S, T);
00493 
00494 
00495         /*  Butterfly process for the i0+fftLen/2 sample */
00496         out1 = __SMUSDX(S, C1) >> 16u;
00497         out2 = __SMUAD(C1, S);
00498 
00499 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00500 
00501         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00502         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00503         _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
00504           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00505 
00506         /*  Butterfly process for the i0+3fftLen/4 sample */
00507 
00508 #ifndef ARM_MATH_BIG_ENDIAN
00509 
00510         out1 = __SMUAD(C3, R) >> 16u;
00511         out2 = __SMUSDX(C3, R);
00512 
00513 #else
00514 
00515         out1 = __SMUSDX(R, C3) >> 16u;
00516         out2 = __SMUAD(C3, R);
00517 
00518 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00519 
00520         /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00521         /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00522         _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
00523           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00524       }
00525     }
00526     /*  Twiddle coefficients index modifier */
00527     twidCoefModifier <<= 2u;
00528   }
00529   /* end of middle stage process */
00530 
00531 
00532   /* data is in 10.6(q6) format for the 1024 point */
00533   /* data is in 8.8(q8) format for the 256 point */
00534   /* data is in 6.10(q10) format for the 64 point */
00535   /* data is in 4.12(q12) format for the 16 point */
00536 
00537   /*  Initializations for the last stage */
00538   j = fftLen >> 2;
00539 
00540   ptr1 = &pSrc16[0];
00541 
00542   /* start of last stage process */
00543 
00544   /*  Butterfly implementation */
00545   do
00546   {
00547     /* Read xa (real), ya(imag) input */
00548     xaya = *__SIMD32(ptr1)++;
00549 
00550     /* Read xb (real), yb(imag) input */
00551     xbyb = *__SIMD32(ptr1)++;
00552 
00553     /* Read xc (real), yc(imag) input */
00554     xcyc = *__SIMD32(ptr1)++;
00555 
00556     /* Read xd (real), yd(imag) input */
00557     xdyd = *__SIMD32(ptr1)++;
00558 
00559     /* R = packed((ya + yc), (xa + xc)) */
00560     R = __QADD16(xaya, xcyc);
00561 
00562     /* T = packed((yb + yd), (xb + xd)) */
00563     T = __QADD16(xbyb, xdyd);
00564 
00565     /* pointer updation for writing */
00566     ptr1 = ptr1 - 8u;
00567 
00568 
00569     /* xa' = xa + xb + xc + xd */
00570     /* ya' = ya + yb + yc + yd */
00571     *__SIMD32(ptr1)++ = __SHADD16(R, T);
00572 
00573     /* T = packed((yb + yd), (xb + xd)) */
00574     T = __QADD16(xbyb, xdyd);
00575 
00576     /* xc' = (xa-xb+xc-xd) */
00577     /* yc' = (ya-yb+yc-yd) */
00578     *__SIMD32(ptr1)++ = __SHSUB16(R, T);
00579 
00580     /* S = packed((ya - yc), (xa - xc)) */
00581     S = __QSUB16(xaya, xcyc);
00582 
00583     /* Read yd (real), xd(imag) input */
00584     /* T = packed( (yb - yd), (xb - xd))  */
00585     U = __QSUB16(xbyb, xdyd);
00586 
00587 #ifndef ARM_MATH_BIG_ENDIAN
00588 
00589     /* xb' = (xa+yb-xc-yd) */
00590     /* yb' = (ya-xb-yc+xd) */
00591     *__SIMD32(ptr1)++ = __SHSAX(S, U);
00592 
00593 
00594     /* xd' = (xa-yb-xc+yd) */
00595     /* yd' = (ya+xb-yc-xd) */
00596     *__SIMD32(ptr1)++ = __SHASX(S, U);
00597 
00598 #else
00599 
00600     /* xb' = (xa+yb-xc-yd) */
00601     /* yb' = (ya-xb-yc+xd) */
00602     *__SIMD32(ptr1)++ = __SHASX(S, U);
00603 
00604 
00605     /* xd' = (xa-yb-xc+yd) */
00606     /* yd' = (ya+xb-yc-xd) */
00607     *__SIMD32(ptr1)++ = __SHSAX(S, U);
00608 
00609 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00610 
00611   } while(--j);
00612 
00613   /* end of last stage process */
00614 
00615   /* output is in 11.5(q5) format for the 1024 point */
00616   /* output is in 9.7(q7) format for the 256 point   */
00617   /* output is in 7.9(q9) format for the 64 point  */
00618   /* output is in 5.11(q11) format for the 16 point  */
00619 
00620 
00621 #else
00622 
00623   /* Run the below code for Cortex-M0 */
00624 
00625   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
00626   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
00627   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
00628 
00629   /* Total process is divided into three stages */
00630 
00631   /* process first stage, middle stages, & last stage */
00632 
00633   /*  Initializations for the first stage */
00634   n2 = fftLen;
00635   n1 = n2;
00636 
00637   /* n2 = fftLen/4 */
00638   n2 >>= 2u;
00639 
00640   /* Index for twiddle coefficient */
00641   ic = 0u;
00642 
00643   /* Index for input read and output write */
00644   i0 = 0u;
00645   j = n2;
00646 
00647   /* Input is in 1.15(q15) format */
00648 
00649   /*  start of first stage process */
00650   do
00651   {
00652     /*  Butterfly implementation */
00653 
00654     /*  index calculation for the input as, */
00655     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00656     i1 = i0 + n2;
00657     i2 = i1 + n2;
00658     i3 = i2 + n2;
00659 
00660     /*  Reading i0, i0+fftLen/2 inputs */
00661 
00662     /* input is down scale by 4 to avoid overflow */
00663     /* Read ya (real), xa(imag) input */
00664     T0 = pSrc16[i0 * 2u] >> 2u;
00665     T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
00666 
00667     /* input is down scale by 4 to avoid overflow */
00668     /* Read yc (real), xc(imag) input */
00669     S0 = pSrc16[i2 * 2u] >> 2u;
00670     S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
00671 
00672     /* R0 = (ya + yc) */
00673     R0 = __SSAT(T0 + S0, 16u);
00674     /* R1 = (xa + xc) */
00675     R1 = __SSAT(T1 + S1, 16u);
00676 
00677     /* S0 = (ya - yc) */
00678     S0 = __SSAT(T0 - S0, 16);
00679     /* S1 = (xa - xc) */
00680     S1 = __SSAT(T1 - S1, 16);
00681 
00682     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00683     /* input is down scale by 4 to avoid overflow */
00684     /* Read yb (real), xb(imag) input */
00685     T0 = pSrc16[i1 * 2u] >> 2u;
00686     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
00687 
00688     /* input is down scale by 4 to avoid overflow */
00689     /* Read yd (real), xd(imag) input */
00690     U0 = pSrc16[i3 * 2u] >> 2u;
00691     U1 = pSrc16[(i3 * 2u) + 1] >> 2u;
00692 
00693     /* T0 = (yb + yd) */
00694     T0 = __SSAT(T0 + U0, 16u);
00695     /* T1 = (xb + xd) */
00696     T1 = __SSAT(T1 + U1, 16u);
00697 
00698     /*  writing the butterfly processed i0 sample */
00699     /* ya' = ya + yb + yc + yd */
00700     /* xa' = xa + xb + xc + xd */
00701     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
00702     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
00703 
00704     /* R0 = (ya + yc) - (yb + yd) */
00705     /* R1 = (xa + xc) - (xb + xd) */
00706     R0 = __SSAT(R0 - T0, 16u);
00707     R1 = __SSAT(R1 - T1, 16u);
00708 
00709     /* co2 & si2 are read from Coefficient pointer */
00710     Co2 = pCoef16[2u * ic * 2u];
00711     Si2 = pCoef16[(2u * ic * 2u) + 1];
00712 
00713     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00714     out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u);
00715     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00716     out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u);
00717 
00718     /*  Reading i0+fftLen/4 */
00719     /* input is down scale by 4 to avoid overflow */
00720     /* T0 = yb, T1 =  xb */
00721     T0 = pSrc16[i1 * 2u] >> 2;
00722     T1 = pSrc16[(i1 * 2u) + 1] >> 2;
00723 
00724     /* writing the butterfly processed i0 + fftLen/4 sample */
00725     /* writing output(xc', yc') in little endian format */
00726     pSrc16[i1 * 2u] = out1;
00727     pSrc16[(i1 * 2u) + 1] = out2;
00728 
00729     /*  Butterfly calculations */
00730     /* input is down scale by 4 to avoid overflow */
00731     /* U0 = yd, U1 = xd */
00732     U0 = pSrc16[i3 * 2u] >> 2;
00733     U1 = pSrc16[(i3 * 2u) + 1] >> 2;
00734     /* T0 = yb-yd */
00735     T0 = __SSAT(T0 - U0, 16);
00736     /* T1 = xb-xd */
00737     T1 = __SSAT(T1 - U1, 16);
00738 
00739     /* R1 = (ya-yc) + (xb- xd),  R0 = (xa-xc) - (yb-yd)) */
00740     R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
00741     R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
00742 
00743     /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
00744     S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16u);
00745     S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16u);
00746 
00747     /* co1 & si1 are read from Coefficient pointer */
00748     Co1 = pCoef16[ic * 2u];
00749     Si1 = pCoef16[(ic * 2u) + 1];
00750     /*  Butterfly process for the i0+fftLen/2 sample */
00751     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00752     out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
00753     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00754     out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
00755 
00756     /* writing output(xb', yb') in little endian format */
00757     pSrc16[i2 * 2u] = out1;
00758     pSrc16[(i2 * 2u) + 1] = out2;
00759 
00760     /* Co3 & si3 are read from Coefficient pointer */
00761     Co3 = pCoef16[3u * (ic * 2u)];
00762     Si3 = pCoef16[(3u * (ic * 2u)) + 1];
00763     /*  Butterfly process for the i0+3fftLen/4 sample */
00764     /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
00765     out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u);
00766     /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
00767     out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u);
00768     /* writing output(xd', yd') in little endian format */
00769     pSrc16[i3 * 2u] = out1;
00770     pSrc16[(i3 * 2u) + 1] = out2;
00771 
00772     /*  Twiddle coefficients index modifier */
00773     ic = ic + twidCoefModifier;
00774 
00775     /*  Updating input index */
00776     i0 = i0 + 1u;
00777 
00778   } while(--j);
00779   /* data is in 4.11(q11) format */
00780 
00781   /* end of first stage process */
00782 
00783 
00784   /* start of middle stage process */
00785 
00786   /*  Twiddle coefficients index modifier */
00787   twidCoefModifier <<= 2u;
00788 
00789   /*  Calculation of Middle stage */
00790   for (k = fftLen / 4u; k > 4u; k >>= 2u)
00791   {
00792     /*  Initializations for the middle stage */
00793     n1 = n2;
00794     n2 >>= 2u;
00795     ic = 0u;
00796 
00797     for (j = 0u; j <= (n2 - 1u); j++)
00798     {
00799       /*  index calculation for the coefficients */
00800       Co1 = pCoef16[ic * 2u];
00801       Si1 = pCoef16[(ic * 2u) + 1u];
00802       Co2 = pCoef16[2u * (ic * 2u)];
00803       Si2 = pCoef16[(2u * (ic * 2u)) + 1u];
00804       Co3 = pCoef16[3u * (ic * 2u)];
00805       Si3 = pCoef16[(3u * (ic * 2u)) + 1u];
00806 
00807       /*  Twiddle coefficients index modifier */
00808       ic = ic + twidCoefModifier;
00809 
00810       /*  Butterfly implementation */
00811       for (i0 = j; i0 < fftLen; i0 += n1)
00812       {
00813         /*  index calculation for the input as, */
00814         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00815         i1 = i0 + n2;
00816         i2 = i1 + n2;
00817         i3 = i2 + n2;
00818 
00819         /*  Reading i0, i0+fftLen/2 inputs */
00820         /* Read ya (real), xa(imag) input */
00821         T0 = pSrc16[i0 * 2u];
00822         T1 = pSrc16[(i0 * 2u) + 1u];
00823 
00824         /* Read yc (real), xc(imag) input */
00825         S0 = pSrc16[i2 * 2u];
00826         S1 = pSrc16[(i2 * 2u) + 1u];
00827 
00828         /* R0 = (ya + yc), R1 = (xa + xc) */
00829         R0 = __SSAT(T0 + S0, 16);
00830         R1 = __SSAT(T1 + S1, 16);
00831 
00832         /* S0 = (ya - yc), S1 =(xa - xc) */
00833         S0 = __SSAT(T0 - S0, 16);
00834         S1 = __SSAT(T1 - S1, 16);
00835 
00836         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00837         /* Read yb (real), xb(imag) input */
00838         T0 = pSrc16[i1 * 2u];
00839         T1 = pSrc16[(i1 * 2u) + 1u];
00840 
00841         /* Read yd (real), xd(imag) input */
00842         U0 = pSrc16[i3 * 2u];
00843         U1 = pSrc16[(i3 * 2u) + 1u];
00844 
00845 
00846         /* T0 = (yb + yd), T1 = (xb + xd) */
00847         T0 = __SSAT(T0 + U0, 16);
00848         T1 = __SSAT(T1 + U1, 16);
00849 
00850         /*  writing the butterfly processed i0 sample */
00851 
00852         /* xa' = xa + xb + xc + xd */
00853         /* ya' = ya + yb + yc + yd */
00854         out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
00855         out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
00856 
00857         pSrc16[i0 * 2u] = out1;
00858         pSrc16[(2u * i0) + 1u] = out2;
00859 
00860         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
00861         R0 = (R0 >> 1u) - (T0 >> 1u);
00862         R1 = (R1 >> 1u) - (T1 >> 1u);
00863 
00864         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00865         out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u);
00866 
00867         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00868         out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u);
00869 
00870         /*  Reading i0+3fftLen/4 */
00871         /* Read yb (real), xb(imag) input */
00872         T0 = pSrc16[i1 * 2u];
00873         T1 = pSrc16[(i1 * 2u) + 1u];
00874 
00875         /*  writing the butterfly processed i0 + fftLen/4 sample */
00876         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00877         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00878         pSrc16[i1 * 2u] = out1;
00879         pSrc16[(i1 * 2u) + 1u] = out2;
00880 
00881         /*  Butterfly calculations */
00882 
00883         /* Read yd (real), xd(imag) input */
00884         U0 = pSrc16[i3 * 2u];
00885         U1 = pSrc16[(i3 * 2u) + 1u];
00886 
00887         /* T0 = yb-yd, T1 = xb-xd */
00888         T0 = __SSAT(T0 - U0, 16);
00889         T1 = __SSAT(T1 - U1, 16);
00890 
00891         /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
00892         R0 = (S0 >> 1u) - (T1 >> 1u);
00893         R1 = (S1 >> 1u) + (T0 >> 1u);
00894 
00895         /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
00896         S0 = (S0 >> 1u) + (T1 >> 1u);
00897         S1 = (S1 >> 1u) - (T0 >> 1u);
00898 
00899         /*  Butterfly process for the i0+fftLen/2 sample */
00900         out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16u);
00901 
00902         out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16u);
00903 
00904         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00905         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00906         pSrc16[i2 * 2u] = out1;
00907         pSrc16[(i2 * 2u) + 1u] = out2;
00908 
00909         /*  Butterfly process for the i0+3fftLen/4 sample */
00910         out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u);
00911 
00912         out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u);
00913         /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
00914         /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
00915         pSrc16[i3 * 2u] = out1;
00916         pSrc16[(i3 * 2u) + 1u] = out2;
00917       }
00918     }
00919     /*  Twiddle coefficients index modifier */
00920     twidCoefModifier <<= 2u;
00921   }
00922   /* end of middle stage process */
00923 
00924 
00925   /* data is in 10.6(q6) format for the 1024 point */
00926   /* data is in 8.8(q8) format for the 256 point */
00927   /* data is in 6.10(q10) format for the 64 point */
00928   /* data is in 4.12(q12) format for the 16 point */
00929 
00930   /*  Initializations for the last stage */
00931   n1 = n2;
00932   n2 >>= 2u;
00933 
00934   /* start of last stage process */
00935 
00936   /*  Butterfly implementation */
00937   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
00938   {
00939     /*  index calculation for the input as, */
00940     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00941     i1 = i0 + n2;
00942     i2 = i1 + n2;
00943     i3 = i2 + n2;
00944 
00945     /*  Reading i0, i0+fftLen/2 inputs */
00946     /* Read ya (real), xa(imag) input */
00947     T0 = pSrc16[i0 * 2u];
00948     T1 = pSrc16[(i0 * 2u) + 1u];
00949 
00950     /* Read yc (real), xc(imag) input */
00951     S0 = pSrc16[i2 * 2u];
00952     S1 = pSrc16[(i2 * 2u) + 1u];
00953 
00954     /* R0 = (ya + yc), R1 = (xa + xc) */
00955     R0 = __SSAT(T0 + S0, 16u);
00956     R1 = __SSAT(T1 + S1, 16u);
00957 
00958     /* S0 = (ya - yc), S1 = (xa - xc) */
00959     S0 = __SSAT(T0 - S0, 16u);
00960     S1 = __SSAT(T1 - S1, 16u);
00961 
00962     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00963     /* Read yb (real), xb(imag) input */
00964     T0 = pSrc16[i1 * 2u];
00965     T1 = pSrc16[(i1 * 2u) + 1u];
00966     /* Read yd (real), xd(imag) input */
00967     U0 = pSrc16[i3 * 2u];
00968     U1 = pSrc16[(i3 * 2u) + 1u];
00969 
00970     /* T0 = (yb + yd), T1 = (xb + xd)) */
00971     T0 = __SSAT(T0 + U0, 16u);
00972     T1 = __SSAT(T1 + U1, 16u);
00973 
00974     /*  writing the butterfly processed i0 sample */
00975     /* xa' = xa + xb + xc + xd */
00976     /* ya' = ya + yb + yc + yd */
00977     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
00978     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
00979 
00980     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
00981     R0 = (R0 >> 1u) - (T0 >> 1u);
00982     R1 = (R1 >> 1u) - (T1 >> 1u);
00983     /* Read yb (real), xb(imag) input */
00984     T0 = pSrc16[i1 * 2u];
00985     T1 = pSrc16[(i1 * 2u) + 1u];
00986 
00987     /*  writing the butterfly processed i0 + fftLen/4 sample */
00988     /* xc' = (xa-xb+xc-xd) */
00989     /* yc' = (ya-yb+yc-yd) */
00990     pSrc16[i1 * 2u] = R0;
00991     pSrc16[(i1 * 2u) + 1u] = R1;
00992 
00993     /* Read yd (real), xd(imag) input */
00994     U0 = pSrc16[i3 * 2u];
00995     U1 = pSrc16[(i3 * 2u) + 1u];
00996     /* T0 = (yb - yd), T1 = (xb - xd)  */
00997     T0 = __SSAT(T0 - U0, 16u);
00998     T1 = __SSAT(T1 - U1, 16u);
00999 
01000     /*  writing the butterfly processed i0 + fftLen/2 sample */
01001     /* xb' = (xa+yb-xc-yd) */
01002     /* yb' = (ya-xb-yc+xd) */
01003     pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u);
01004     pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
01005 
01006     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
01007     /* xd' = (xa-yb-xc+yd) */
01008     /* yd' = (ya+xb-yc-xd) */
01009     pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u);
01010     pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
01011 
01012   }
01013 
01014   /* end of last stage process */
01015 
01016   /* output is in 11.5(q5) format for the 1024 point */
01017   /* output is in 9.7(q7) format for the 256 point   */
01018   /* output is in 7.9(q9) format for the 64 point  */
01019   /* output is in 5.11(q11) format for the 16 point  */
01020 
01021 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
01022 
01023 }
01024 
01025 
01026 /**    
01027  * @brief  Core function for the Q15 CIFFT butterfly process.   
01028  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.   
01029  * @param[in]      fftLen           length of the FFT.   
01030  * @param[in]      *pCoef16         points to twiddle coefficient buffer.   
01031  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
01032  * @return none.   
01033  */
01034 
01035 /*    
01036 * Radix-4 IFFT algorithm used is :    
01037 *    
01038 * CIFFT uses same twiddle coefficients as CFFT function    
01039 *  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]    
01040 *    
01041 *    
01042 * IFFT is implemented with following changes in equations from FFT    
01043 *    
01044 * Input real and imaginary data:    
01045 * x(n) = xa + j * ya    
01046 * x(n+N/4 ) = xb + j * yb    
01047 * x(n+N/2 ) = xc + j * yc    
01048 * x(n+3N 4) = xd + j * yd    
01049 *    
01050 *    
01051 * Output real and imaginary data:    
01052 * x(4r) = xa'+ j * ya'    
01053 * x(4r+1) = xb'+ j * yb'    
01054 * x(4r+2) = xc'+ j * yc'    
01055 * x(4r+3) = xd'+ j * yd'    
01056 *    
01057 *    
01058 * Twiddle factors for radix-4 IFFT:    
01059 * Wn = co1 + j * (si1)    
01060 * W2n = co2 + j * (si2)    
01061 * W3n = co3 + j * (si3)    
01062     
01063 * The real and imaginary output values for the radix-4 butterfly are    
01064 * xa' = xa + xb + xc + xd    
01065 * ya' = ya + yb + yc + yd    
01066 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)    
01067 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)    
01068 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)    
01069 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)    
01070 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)    
01071 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)    
01072 *    
01073 */
01074 
01075 void arm_radix4_butterfly_inverse_q15(
01076   q15_t * pSrc16,
01077   uint32_t fftLen,
01078   q15_t * pCoef16,
01079   uint32_t twidCoefModifier)
01080 {
01081 
01082 #ifndef ARM_MATH_CM0_FAMILY
01083 
01084   /* Run the below code for Cortex-M4 and Cortex-M3 */
01085 
01086   q31_t R, S, T, U;
01087   q31_t C1, C2, C3, out1, out2;
01088   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
01089   q15_t in;
01090 
01091   q15_t *ptr1;
01092 
01093 
01094 
01095   q31_t xaya, xbyb, xcyc, xdyd;
01096 
01097   /* Total process is divided into three stages */
01098 
01099   /* process first stage, middle stages, & last stage */
01100 
01101   /*  Initializations for the first stage */
01102   n2 = fftLen;
01103   n1 = n2;
01104 
01105   /* n2 = fftLen/4 */
01106   n2 >>= 2u;
01107 
01108   /* Index for twiddle coefficient */
01109   ic = 0u;
01110 
01111   /* Index for input read and output write */
01112   i0 = 0u;
01113   j = n2;
01114 
01115   /* Input is in 1.15(q15) format */
01116 
01117   /*  start of first stage process */
01118   do
01119   {
01120     /*  Butterfly implementation */
01121 
01122     /*  index calculation for the input as, */
01123     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01124     i1 = i0 + n2;
01125     i2 = i1 + n2;
01126     i3 = i2 + n2;
01127 
01128     /*  Reading i0, i0+fftLen/2 inputs */
01129     /* Read ya (real), xa(imag) input */
01130     T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
01131     in = ((int16_t) (T & 0xFFFF)) >> 2;
01132     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
01133 
01134     /* Read yc (real), xc(imag) input */
01135     S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
01136     in = ((int16_t) (S & 0xFFFF)) >> 2;
01137     S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
01138 
01139     /* R = packed((ya + yc), (xa + xc) ) */
01140     R = __QADD16(T, S);
01141 
01142     /* S = packed((ya - yc), (xa - xc) ) */
01143     S = __QSUB16(T, S);
01144 
01145     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01146     /* Read yb (real), xb(imag) input */
01147     T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
01148     in = ((int16_t) (T & 0xFFFF)) >> 2;
01149     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
01150 
01151     /* Read yd (real), xd(imag) input */
01152     U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
01153     in = ((int16_t) (U & 0xFFFF)) >> 2;
01154     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
01155 
01156     /* T = packed((yb + yd), (xb + xd) ) */
01157     T = __QADD16(T, U);
01158 
01159     /*  writing the butterfly processed i0 sample */
01160     /* xa' = xa + xb + xc + xd */
01161     /* ya' = ya + yb + yc + yd */
01162     _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T);
01163 
01164     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
01165     R = __QSUB16(R, T);
01166 
01167     /* co2 & si2 are read from SIMD Coefficient pointer */
01168     C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
01169 
01170 #ifndef ARM_MATH_BIG_ENDIAN
01171 
01172     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01173     out1 = __SMUSD(C2, R) >> 16u;
01174     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01175     out2 = __SMUADX(C2, R);
01176 
01177 #else
01178 
01179     /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01180     out1 = __SMUADX(C2, R) >> 16u;
01181     /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01182     out2 = __SMUSD(__QSUB16(0, C2), R);
01183 
01184 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01185 
01186     /*  Reading i0+fftLen/4 */
01187     /* T = packed(yb, xb) */
01188     T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
01189     in = ((int16_t) (T & 0xFFFF)) >> 2;
01190     T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
01191 
01192     /* writing the butterfly processed i0 + fftLen/4 sample */
01193     /* writing output(xc', yc') in little endian format */
01194     _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
01195       (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01196 
01197     /*  Butterfly calculations */
01198     /* U = packed(yd, xd) */
01199     U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
01200     in = ((int16_t) (U & 0xFFFF)) >> 2;
01201     U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
01202 
01203     /* T = packed(yb-yd, xb-xd) */
01204     T = __QSUB16(T, U);
01205 
01206 #ifndef ARM_MATH_BIG_ENDIAN
01207 
01208     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01209     R = __QSAX(S, T);
01210     /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */
01211     S = __QASX(S, T);
01212 
01213 #else
01214 
01215     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01216     R = __QASX(S, T);
01217     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01218     S = __QSAX(S, T);
01219 
01220 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01221 
01222     /* co1 & si1 are read from SIMD Coefficient pointer */
01223     C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
01224     /*  Butterfly process for the i0+fftLen/2 sample */
01225 
01226 #ifndef ARM_MATH_BIG_ENDIAN
01227 
01228     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01229     out1 = __SMUSD(C1, S) >> 16u;
01230     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01231     out2 = __SMUADX(C1, S);
01232 
01233 #else
01234 
01235     /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01236     out1 = __SMUADX(C1, S) >> 16u;
01237     /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01238     out2 = __SMUSD(__QSUB16(0, C1), S);
01239 
01240 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01241 
01242     /* writing output(xb', yb') in little endian format */
01243     _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
01244       ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
01245 
01246 
01247     /* co3 & si3 are read from SIMD Coefficient pointer */
01248     C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
01249     /*  Butterfly process for the i0+3fftLen/4 sample */
01250 
01251 #ifndef ARM_MATH_BIG_ENDIAN
01252 
01253     /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01254     out1 = __SMUSD(C3, R) >> 16u;
01255     /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01256     out2 = __SMUADX(C3, R);
01257 
01258 #else
01259 
01260     /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01261     out1 = __SMUADX(C3, R) >> 16u;
01262     /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01263     out2 = __SMUSD(__QSUB16(0, C3), R);
01264 
01265 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01266 
01267     /* writing output(xd', yd') in little endian format */
01268     _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
01269       ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01270 
01271     /*  Twiddle coefficients index modifier */
01272     ic = ic + twidCoefModifier;
01273 
01274     /*  Updating input index */
01275     i0 = i0 + 1u;
01276 
01277   } while(--j);
01278   /* data is in 4.11(q11) format */
01279 
01280   /* end of first stage process */
01281 
01282 
01283   /* start of middle stage process */
01284 
01285   /*  Twiddle coefficients index modifier */
01286   twidCoefModifier <<= 2u;
01287 
01288   /*  Calculation of Middle stage */
01289   for (k = fftLen / 4u; k > 4u; k >>= 2u)
01290   {
01291     /*  Initializations for the middle stage */
01292     n1 = n2;
01293     n2 >>= 2u;
01294     ic = 0u;
01295 
01296     for (j = 0u; j <= (n2 - 1u); j++)
01297     {
01298       /*  index calculation for the coefficients */
01299       C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
01300       C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
01301       C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
01302 
01303       /*  Twiddle coefficients index modifier */
01304       ic = ic + twidCoefModifier;
01305 
01306       /*  Butterfly implementation */
01307       for (i0 = j; i0 < fftLen; i0 += n1)
01308       {
01309         /*  index calculation for the input as, */
01310         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01311         i1 = i0 + n2;
01312         i2 = i1 + n2;
01313         i3 = i2 + n2;
01314 
01315         /*  Reading i0, i0+fftLen/2 inputs */
01316         /* Read ya (real), xa(imag) input */
01317         T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
01318 
01319         /* Read yc (real), xc(imag) input */
01320         S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
01321 
01322         /* R = packed( (ya + yc), (xa + xc)) */
01323         R = __QADD16(T, S);
01324 
01325         /* S = packed((ya - yc), (xa - xc)) */
01326         S = __QSUB16(T, S);
01327 
01328         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01329         /* Read yb (real), xb(imag) input */
01330         T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
01331 
01332         /* Read yd (real), xd(imag) input */
01333         U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
01334 
01335         /* T = packed( (yb + yd), (xb + xd)) */
01336         T = __QADD16(T, U);
01337 
01338         /*  writing the butterfly processed i0 sample */
01339 
01340         /* xa' = xa + xb + xc + xd */
01341         /* ya' = ya + yb + yc + yd */
01342         out1 = __SHADD16(R, T);
01343         in = ((int16_t) (out1 & 0xFFFF)) >> 1;
01344         out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
01345         _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1;
01346 
01347         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
01348         R = __SHSUB16(R, T);
01349 
01350 #ifndef ARM_MATH_BIG_ENDIAN
01351 
01352         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
01353         out1 = __SMUSD(C2, R) >> 16u;
01354 
01355         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01356         out2 = __SMUADX(C2, R);
01357 
01358 #else
01359 
01360         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01361         out1 = __SMUADX(R, C2) >> 16u;
01362 
01363         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
01364         out2 = __SMUSD(__QSUB16(0, C2), R);
01365 
01366 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01367 
01368         /*  Reading i0+3fftLen/4 */
01369         /* Read yb (real), xb(imag) input */
01370         T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
01371 
01372         /*  writing the butterfly processed i0 + fftLen/4 sample */
01373         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01374         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01375         _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
01376           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01377 
01378         /*  Butterfly calculations */
01379 
01380         /* Read yd (real), xd(imag) input */
01381         U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
01382 
01383         /* T = packed(yb-yd, xb-xd) */
01384         T = __QSUB16(T, U);
01385 
01386 #ifndef ARM_MATH_BIG_ENDIAN
01387 
01388         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01389         R = __SHSAX(S, T);
01390 
01391         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01392         S = __SHASX(S, T);
01393 
01394 
01395         /*  Butterfly process for the i0+fftLen/2 sample */
01396         out1 = __SMUSD(C1, S) >> 16u;
01397         out2 = __SMUADX(C1, S);
01398 
01399 #else
01400 
01401         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01402         R = __SHASX(S, T);
01403 
01404         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01405         S = __SHSAX(S, T);
01406 
01407 
01408         /*  Butterfly process for the i0+fftLen/2 sample */
01409         out1 = __SMUADX(S, C1) >> 16u;
01410         out2 = __SMUSD(__QSUB16(0, C1), S);
01411 
01412 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01413 
01414         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01415         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01416         _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
01417           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01418 
01419         /*  Butterfly process for the i0+3fftLen/4 sample */
01420 
01421 #ifndef ARM_MATH_BIG_ENDIAN
01422 
01423         out1 = __SMUSD(C3, R) >> 16u;
01424         out2 = __SMUADX(C3, R);
01425 
01426 #else
01427 
01428         out1 = __SMUADX(C3, R) >> 16u;
01429         out2 = __SMUSD(__QSUB16(0, C3), R);
01430 
01431 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01432 
01433         /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01434         /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01435         _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
01436           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01437       }
01438     }
01439     /*  Twiddle coefficients index modifier */
01440     twidCoefModifier <<= 2u;
01441   }
01442   /* end of middle stage process */
01443 
01444   /* data is in 10.6(q6) format for the 1024 point */
01445   /* data is in 8.8(q8) format for the 256 point */
01446   /* data is in 6.10(q10) format for the 64 point */
01447   /* data is in 4.12(q12) format for the 16 point */
01448 
01449   /*  Initializations for the last stage */
01450   j = fftLen >> 2;
01451 
01452   ptr1 = &pSrc16[0];
01453 
01454   /* start of last stage process */
01455 
01456   /*  Butterfly implementation */
01457   do
01458   {
01459     /* Read xa (real), ya(imag) input */
01460     xaya = *__SIMD32(ptr1)++;
01461 
01462     /* Read xb (real), yb(imag) input */
01463     xbyb = *__SIMD32(ptr1)++;
01464 
01465     /* Read xc (real), yc(imag) input */
01466     xcyc = *__SIMD32(ptr1)++;
01467 
01468     /* Read xd (real), yd(imag) input */
01469     xdyd = *__SIMD32(ptr1)++;
01470 
01471     /* R = packed((ya + yc), (xa + xc)) */
01472     R = __QADD16(xaya, xcyc);
01473 
01474     /* T = packed((yb + yd), (xb + xd)) */
01475     T = __QADD16(xbyb, xdyd);
01476 
01477     /* pointer updation for writing */
01478     ptr1 = ptr1 - 8u;
01479 
01480 
01481     /* xa' = xa + xb + xc + xd */
01482     /* ya' = ya + yb + yc + yd */
01483     *__SIMD32(ptr1)++ = __SHADD16(R, T);
01484 
01485     /* T = packed((yb + yd), (xb + xd)) */
01486     T = __QADD16(xbyb, xdyd);
01487 
01488     /* xc' = (xa-xb+xc-xd) */
01489     /* yc' = (ya-yb+yc-yd) */
01490     *__SIMD32(ptr1)++ = __SHSUB16(R, T);
01491 
01492     /* S = packed((ya - yc), (xa - xc)) */
01493     S = __QSUB16(xaya, xcyc);
01494 
01495     /* Read yd (real), xd(imag) input */
01496     /* T = packed( (yb - yd), (xb - xd))  */
01497     U = __QSUB16(xbyb, xdyd);
01498 
01499 #ifndef ARM_MATH_BIG_ENDIAN
01500 
01501     /* xb' = (xa+yb-xc-yd) */
01502     /* yb' = (ya-xb-yc+xd) */
01503     *__SIMD32(ptr1)++ = __SHASX(S, U);
01504 
01505 
01506     /* xd' = (xa-yb-xc+yd) */
01507     /* yd' = (ya+xb-yc-xd) */
01508     *__SIMD32(ptr1)++ = __SHSAX(S, U);
01509 
01510 #else
01511 
01512     /* xb' = (xa+yb-xc-yd) */
01513     /* yb' = (ya-xb-yc+xd) */
01514     *__SIMD32(ptr1)++ = __SHSAX(S, U);
01515 
01516 
01517     /* xd' = (xa-yb-xc+yd) */
01518     /* yd' = (ya+xb-yc-xd) */
01519     *__SIMD32(ptr1)++ = __SHASX(S, U);
01520 
01521 
01522 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01523 
01524   } while(--j);
01525 
01526   /* end of last stage  process */
01527 
01528   /* output is in 11.5(q5) format for the 1024 point */
01529   /* output is in 9.7(q7) format for the 256 point   */
01530   /* output is in 7.9(q9) format for the 64 point  */
01531   /* output is in 5.11(q11) format for the 16 point  */
01532 
01533 
01534 #else
01535 
01536   /* Run the below code for Cortex-M0 */
01537 
01538   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
01539   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
01540   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
01541 
01542   /* Total process is divided into three stages */
01543 
01544   /* process first stage, middle stages, & last stage */
01545 
01546   /*  Initializations for the first stage */
01547   n2 = fftLen;
01548   n1 = n2;
01549 
01550   /* n2 = fftLen/4 */
01551   n2 >>= 2u;
01552 
01553   /* Index for twiddle coefficient */
01554   ic = 0u;
01555 
01556   /* Index for input read and output write */
01557   i0 = 0u;
01558 
01559   j = n2;
01560 
01561   /* Input is in 1.15(q15) format */
01562 
01563   /*  Start of first stage process */
01564   do
01565   {
01566     /*  Butterfly implementation */
01567 
01568     /*  index calculation for the input as, */
01569     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01570     i1 = i0 + n2;
01571     i2 = i1 + n2;
01572     i3 = i2 + n2;
01573 
01574     /*  Reading i0, i0+fftLen/2 inputs */
01575     /* input is down scale by 4 to avoid overflow */
01576     /* Read ya (real), xa(imag) input */
01577     T0 = pSrc16[i0 * 2u] >> 2u;
01578     T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
01579     /* input is down scale by 4 to avoid overflow */
01580     /* Read yc (real), xc(imag) input */
01581     S0 = pSrc16[i2 * 2u] >> 2u;
01582     S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
01583 
01584     /* R0 = (ya + yc), R1 = (xa + xc) */
01585     R0 = __SSAT(T0 + S0, 16u);
01586     R1 = __SSAT(T1 + S1, 16u);
01587     /* S0 = (ya - yc), S1 = (xa - xc) */
01588     S0 = __SSAT(T0 - S0, 16u);
01589     S1 = __SSAT(T1 - S1, 16u);
01590 
01591     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01592     /* input is down scale by 4 to avoid overflow */
01593     /* Read yb (real), xb(imag) input */
01594     T0 = pSrc16[i1 * 2u] >> 2u;
01595     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
01596     /* Read yd (real), xd(imag) input */
01597     /* input is down scale by 4 to avoid overflow */
01598     U0 = pSrc16[i3 * 2u] >> 2u;
01599     U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
01600 
01601     /* T0 = (yb + yd), T1 = (xb + xd) */
01602     T0 = __SSAT(T0 + U0, 16u);
01603     T1 = __SSAT(T1 + U1, 16u);
01604 
01605     /*  writing the butterfly processed i0 sample */
01606     /* xa' = xa + xb + xc + xd */
01607     /* ya' = ya + yb + yc + yd */
01608     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
01609     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
01610 
01611     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
01612     R0 = __SSAT(R0 - T0, 16u);
01613     R1 = __SSAT(R1 - T1, 16u);
01614     /* co2 & si2 are read from Coefficient pointer */
01615     Co2 = pCoef16[2u * ic * 2u];
01616     Si2 = pCoef16[(2u * ic * 2u) + 1u];
01617     /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
01618     out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16u);
01619     /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01620     out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16u);
01621 
01622     /*  Reading i0+fftLen/4 */
01623     /* input is down scale by 4 to avoid overflow */
01624     /* T0 = yb, T1 = xb */
01625     T0 = pSrc16[i1 * 2u] >> 2u;
01626     T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
01627 
01628     /* writing the butterfly processed i0 + fftLen/4 sample */
01629     /* writing output(xc', yc') in little endian format */
01630     pSrc16[i1 * 2u] = out1;
01631     pSrc16[(i1 * 2u) + 1u] = out2;
01632 
01633     /*  Butterfly calculations */
01634     /* input is down scale by 4 to avoid overflow */
01635     /* U0 = yd, U1 = xd) */
01636     U0 = pSrc16[i3 * 2u] >> 2u;
01637     U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
01638 
01639     /* T0 = yb-yd, T1 = xb-xd) */
01640     T0 = __SSAT(T0 - U0, 16u);
01641     T1 = __SSAT(T1 - U1, 16u);
01642     /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
01643     R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
01644     R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
01645     /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
01646     S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
01647     S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
01648 
01649     /* co1 & si1 are read from Coefficient pointer */
01650     Co1 = pCoef16[ic * 2u];
01651     Si1 = pCoef16[(ic * 2u) + 1u];
01652     /*  Butterfly process for the i0+fftLen/2 sample */
01653     /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
01654     out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u);
01655     /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
01656     out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u);
01657     /* writing output(xb', yb') in little endian format */
01658     pSrc16[i2 * 2u] = out1;
01659     pSrc16[(i2 * 2u) + 1u] = out2;
01660 
01661     /* Co3 & si3 are read from Coefficient pointer */
01662     Co3 = pCoef16[3u * ic * 2u];
01663     Si3 = pCoef16[(3u * ic * 2u) + 1u];
01664     /*  Butterfly process for the i0+3fftLen/4 sample */
01665     /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
01666     out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u);
01667     /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
01668     out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u);
01669     /* writing output(xd', yd') in little endian format */
01670     pSrc16[i3 * 2u] = out1;
01671     pSrc16[(i3 * 2u) + 1u] = out2;
01672 
01673     /*  Twiddle coefficients index modifier */
01674     ic = ic + twidCoefModifier;
01675 
01676     /*  Updating input index */
01677     i0 = i0 + 1u;
01678 
01679   } while(--j);
01680 
01681   /*  End of first stage process */
01682 
01683   /* data is in 4.11(q11) format */
01684 
01685 
01686   /*  Start of Middle stage process */
01687 
01688   /*  Twiddle coefficients index modifier */
01689   twidCoefModifier <<= 2u;
01690 
01691   /*  Calculation of Middle stage */
01692   for (k = fftLen / 4u; k > 4u; k >>= 2u)
01693   {
01694     /*  Initializations for the middle stage */
01695     n1 = n2;
01696     n2 >>= 2u;
01697     ic = 0u;
01698 
01699     for (j = 0u; j <= (n2 - 1u); j++)
01700     {
01701       /*  index calculation for the coefficients */
01702       Co1 = pCoef16[ic * 2u];
01703       Si1 = pCoef16[(ic * 2u) + 1u];
01704       Co2 = pCoef16[2u * ic * 2u];
01705       Si2 = pCoef16[2u * ic * 2u + 1u];
01706       Co3 = pCoef16[3u * ic * 2u];
01707       Si3 = pCoef16[(3u * ic * 2u) + 1u];
01708 
01709       /*  Twiddle coefficients index modifier */
01710       ic = ic + twidCoefModifier;
01711 
01712       /*  Butterfly implementation */
01713       for (i0 = j; i0 < fftLen; i0 += n1)
01714       {
01715         /*  index calculation for the input as, */
01716         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01717         i1 = i0 + n2;
01718         i2 = i1 + n2;
01719         i3 = i2 + n2;
01720 
01721         /*  Reading i0, i0+fftLen/2 inputs */
01722         /* Read ya (real), xa(imag) input */
01723         T0 = pSrc16[i0 * 2u];
01724         T1 = pSrc16[(i0 * 2u) + 1u];
01725 
01726         /* Read yc (real), xc(imag) input */
01727         S0 = pSrc16[i2 * 2u];
01728         S1 = pSrc16[(i2 * 2u) + 1u];
01729 
01730 
01731         /* R0 = (ya + yc), R1 = (xa + xc) */
01732         R0 = __SSAT(T0 + S0, 16u);
01733         R1 = __SSAT(T1 + S1, 16u);
01734         /* S0 = (ya - yc), S1 = (xa - xc) */
01735         S0 = __SSAT(T0 - S0, 16u);
01736         S1 = __SSAT(T1 - S1, 16u);
01737 
01738         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01739         /* Read yb (real), xb(imag) input */
01740         T0 = pSrc16[i1 * 2u];
01741         T1 = pSrc16[(i1 * 2u) + 1u];
01742 
01743         /* Read yd (real), xd(imag) input */
01744         U0 = pSrc16[i3 * 2u];
01745         U1 = pSrc16[(i3 * 2u) + 1u];
01746 
01747         /* T0 = (yb + yd), T1 = (xb + xd) */
01748         T0 = __SSAT(T0 + U0, 16u);
01749         T1 = __SSAT(T1 + U1, 16u);
01750 
01751         /*  writing the butterfly processed i0 sample */
01752         /* xa' = xa + xb + xc + xd */
01753         /* ya' = ya + yb + yc + yd */
01754         pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
01755         pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
01756 
01757         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
01758         R0 = (R0 >> 1u) - (T0 >> 1u);
01759         R1 = (R1 >> 1u) - (T1 >> 1u);
01760 
01761         /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
01762         out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
01763         /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01764         out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
01765 
01766         /*  Reading i0+3fftLen/4 */
01767         /* Read yb (real), xb(imag) input */
01768         T0 = pSrc16[i1 * 2u];
01769         T1 = pSrc16[(i1 * 2u) + 1u];
01770 
01771         /*  writing the butterfly processed i0 + fftLen/4 sample */
01772         /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
01773         /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01774         pSrc16[i1 * 2u] = out1;
01775         pSrc16[(i1 * 2u) + 1u] = out2;
01776 
01777         /*  Butterfly calculations */
01778         /* Read yd (real), xd(imag) input */
01779         U0 = pSrc16[i3 * 2u];
01780         U1 = pSrc16[(i3 * 2u) + 1u];
01781 
01782         /* T0 = yb-yd, T1 = xb-xd) */
01783         T0 = __SSAT(T0 - U0, 16u);
01784         T1 = __SSAT(T1 - U1, 16u);
01785 
01786         /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
01787         R0 = (S0 >> 1u) + (T1 >> 1u);
01788         R1 = (S1 >> 1u) - (T0 >> 1u);
01789 
01790         /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
01791         S0 = (S0 >> 1u) - (T1 >> 1u);
01792         S1 = (S1 >> 1u) + (T0 >> 1u);
01793 
01794         /*  Butterfly process for the i0+fftLen/2 sample */
01795         out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u);
01796         out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u);
01797         /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
01798         /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
01799         pSrc16[i2 * 2u] = out1;
01800         pSrc16[(i2 * 2u) + 1u] = out2;
01801 
01802         /*  Butterfly process for the i0+3fftLen/4 sample */
01803         out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u);
01804 
01805         out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u);
01806         /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
01807         /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
01808         pSrc16[i3 * 2u] = out1;
01809         pSrc16[(i3 * 2u) + 1u] = out2;
01810 
01811 
01812       }
01813     }
01814     /*  Twiddle coefficients index modifier */
01815     twidCoefModifier <<= 2u;
01816   }
01817   /*  End of Middle stages process */
01818 
01819 
01820   /* data is in 10.6(q6) format for the 1024 point */
01821   /* data is in 8.8(q8) format for the 256 point   */
01822   /* data is in 6.10(q10) format for the 64 point  */
01823   /* data is in 4.12(q12) format for the 16 point  */
01824 
01825   /* start of last stage process */
01826 
01827 
01828   /*  Initializations for the last stage */
01829   n1 = n2;
01830   n2 >>= 2u;
01831 
01832   /*  Butterfly implementation */
01833   for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
01834   {
01835     /*  index calculation for the input as, */
01836     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01837     i1 = i0 + n2;
01838     i2 = i1 + n2;
01839     i3 = i2 + n2;
01840 
01841     /*  Reading i0, i0+fftLen/2 inputs */
01842     /* Read ya (real), xa(imag) input */
01843     T0 = pSrc16[i0 * 2u];
01844     T1 = pSrc16[(i0 * 2u) + 1u];
01845     /* Read yc (real), xc(imag) input */
01846     S0 = pSrc16[i2 * 2u];
01847     S1 = pSrc16[(i2 * 2u) + 1u];
01848 
01849     /* R0 = (ya + yc), R1 = (xa + xc) */
01850     R0 = __SSAT(T0 + S0, 16u);
01851     R1 = __SSAT(T1 + S1, 16u);
01852     /* S0 = (ya - yc), S1 = (xa - xc) */
01853     S0 = __SSAT(T0 - S0, 16u);
01854     S1 = __SSAT(T1 - S1, 16u);
01855 
01856     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01857     /* Read yb (real), xb(imag) input */
01858     T0 = pSrc16[i1 * 2u];
01859     T1 = pSrc16[(i1 * 2u) + 1u];
01860     /* Read yd (real), xd(imag) input */
01861     U0 = pSrc16[i3 * 2u];
01862     U1 = pSrc16[(i3 * 2u) + 1u];
01863 
01864     /* T0 = (yb + yd), T1 = (xb + xd) */
01865     T0 = __SSAT(T0 + U0, 16u);
01866     T1 = __SSAT(T1 + U1, 16u);
01867 
01868     /*  writing the butterfly processed i0 sample */
01869     /* xa' = xa + xb + xc + xd */
01870     /* ya' = ya + yb + yc + yd */
01871     pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
01872     pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
01873 
01874     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
01875     R0 = (R0 >> 1u) - (T0 >> 1u);
01876     R1 = (R1 >> 1u) - (T1 >> 1u);
01877 
01878     /* Read yb (real), xb(imag) input */
01879     T0 = pSrc16[i1 * 2u];
01880     T1 = pSrc16[(i1 * 2u) + 1u];
01881 
01882     /*  writing the butterfly processed i0 + fftLen/4 sample */
01883     /* xc' = (xa-xb+xc-xd) */
01884     /* yc' = (ya-yb+yc-yd) */
01885     pSrc16[i1 * 2u] = R0;
01886     pSrc16[(i1 * 2u) + 1u] = R1;
01887 
01888     /* Read yd (real), xd(imag) input */
01889     U0 = pSrc16[i3 * 2u];
01890     U1 = pSrc16[(i3 * 2u) + 1u];
01891     /* T0 = (yb - yd), T1 = (xb - xd) */
01892     T0 = __SSAT(T0 - U0, 16u);
01893     T1 = __SSAT(T1 - U1, 16u);
01894 
01895     /*  writing the butterfly processed i0 + fftLen/2 sample */
01896     /* xb' = (xa-yb-xc+yd) */
01897     /* yb' = (ya+xb-yc-xd) */
01898     pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u);
01899     pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
01900 
01901 
01902     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
01903     /* xd' = (xa+yb-xc-yd) */
01904     /* yd' = (ya-xb-yc+xd) */
01905     pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u);
01906     pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
01907   }
01908   /* end of last stage  process */
01909 
01910   /* output is in 11.5(q5) format for the 1024 point */
01911   /* output is in 9.7(q7) format for the 256 point   */
01912   /* output is in 7.9(q9) format for the 64 point  */
01913   /* output is in 5.11(q11) format for the 16 point  */
01914 
01915 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
01916 
01917 }