Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cfft_radix4_q15.c Source File

arm_cfft_radix4_q15.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_cfft_radix4_q15.c
00004  * Description:  This file has function definition of Radix-4 FFT & IFFT function and
00005  *               In-place bit reversal using bit reversal table
00006  *
00007  * $Date:        27. January 2017
00008  * $Revision:    V.1.5.1
00009  *
00010  * Target Processor: Cortex-M cores
00011  * -------------------------------------------------------------------- */
00012 /*
00013  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00014  *
00015  * SPDX-License-Identifier: Apache-2.0
00016  *
00017  * Licensed under the Apache License, Version 2.0 (the License); you may
00018  * not use this file except in compliance with the License.
00019  * You may obtain a copy of the License at
00020  *
00021  * www.apache.org/licenses/LICENSE-2.0
00022  *
00023  * Unless required by applicable law or agreed to in writing, software
00024  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00025  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00026  * See the License for the specific language governing permissions and
00027  * limitations under the License.
00028  */
00029 
00030 #include "arm_math.h"
00031 
00032 
00033 void arm_radix4_butterfly_q15(
00034   q15_t * pSrc16,
00035   uint32_t fftLen,
00036   q15_t * pCoef16,
00037   uint32_t twidCoefModifier);
00038 
00039 void arm_radix4_butterfly_inverse_q15(
00040   q15_t * pSrc16,
00041   uint32_t fftLen,
00042   q15_t * pCoef16,
00043   uint32_t twidCoefModifier);
00044 
00045 void arm_bitreversal_q15(
00046   q15_t * pSrc,
00047   uint32_t fftLen,
00048   uint16_t bitRevFactor,
00049   uint16_t * pBitRevTab);
00050 
00051 /**
00052  * @ingroup groupTransforms
00053  */
00054 
00055 /**
00056  * @addtogroup ComplexFFT
00057  * @{
00058  */
00059 
00060 
00061 /**
00062  * @details
00063  * @brief Processing function for the Q15 CFFT/CIFFT.
00064  * @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q15 and will be removed
00065  * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.
00066  * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
00067  * @return none.
00068  *
00069  * \par Input and output formats:
00070  * \par
00071  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
00072  * Hence the output format is different for different FFT sizes.
00073  * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
00074  * \par
00075  * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
00076  * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
00077  */
00078 
00079 void arm_cfft_radix4_q15(
00080   const arm_cfft_radix4_instance_q15 * S,
00081   q15_t * pSrc)
00082 {
00083   if (S->ifftFlag == 1U)
00084   {
00085     /*  Complex IFFT radix-4  */
00086     arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
00087   }
00088   else
00089   {
00090     /*  Complex FFT radix-4  */
00091     arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
00092   }
00093 
00094   if (S->bitReverseFlag == 1U)
00095   {
00096     /*  Bit Reversal */
00097     arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
00098   }
00099 
00100 }
00101 
00102 /**
00103  * @} end of ComplexFFT group
00104  */
00105 
00106 /*
00107 * Radix-4 FFT algorithm used is :
00108 *
00109 * Input real and imaginary data:
00110 * x(n) = xa + j * ya
00111 * x(n+N/4 ) = xb + j * yb
00112 * x(n+N/2 ) = xc + j * yc
00113 * x(n+3N 4) = xd + j * yd
00114 *
00115 *
00116 * Output real and imaginary data:
00117 * x(4r) = xa'+ j * ya'
00118 * x(4r+1) = xb'+ j * yb'
00119 * x(4r+2) = xc'+ j * yc'
00120 * x(4r+3) = xd'+ j * yd'
00121 *
00122 *
00123 * Twiddle factors for radix-4 FFT:
00124 * Wn = co1 + j * (- si1)
00125 * W2n = co2 + j * (- si2)
00126 * W3n = co3 + j * (- si3)
00127 
00128 * The real and imaginary output values for the radix-4 butterfly are
00129 * xa' = xa + xb + xc + xd
00130 * ya' = ya + yb + yc + yd
00131 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
00132 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
00133 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
00134 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
00135 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
00136 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
00137 *
00138 */
00139 
00140 /**
00141  * @brief  Core function for the Q15 CFFT butterfly process.
00142  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
00143  * @param[in]      fftLen           length of the FFT.
00144  * @param[in]      *pCoef16         points to twiddle coefficient buffer.
00145  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
00146  * @return none.
00147  */
00148 
00149 void arm_radix4_butterfly_q15(
00150   q15_t * pSrc16,
00151   uint32_t fftLen,
00152   q15_t * pCoef16,
00153   uint32_t twidCoefModifier)
00154 {
00155 
00156 #if defined (ARM_MATH_DSP)
00157 
00158   /* Run the below code for Cortex-M4 and Cortex-M3 */
00159 
00160   q31_t R, S, T, U;
00161   q31_t C1, C2, C3, out1, out2;
00162   uint32_t n1, n2, ic, i0, j, k;
00163 
00164   q15_t *ptr1;
00165   q15_t *pSi0;
00166   q15_t *pSi1;
00167   q15_t *pSi2;
00168   q15_t *pSi3;
00169 
00170   q31_t xaya, xbyb, xcyc, xdyd;
00171 
00172   /* Total process is divided into three stages */
00173 
00174   /* process first stage, middle stages, & last stage */
00175 
00176   /*  Initializations for the first stage */
00177   n2 = fftLen;
00178   n1 = n2;
00179 
00180   /* n2 = fftLen/4 */
00181   n2 >>= 2U;
00182 
00183   /* Index for twiddle coefficient */
00184   ic = 0U;
00185 
00186   /* Index for input read and output write */
00187   j = n2;
00188 
00189   pSi0 = pSrc16;
00190   pSi1 = pSi0 + 2 * n2;
00191   pSi2 = pSi1 + 2 * n2;
00192   pSi3 = pSi2 + 2 * n2;
00193 
00194   /* Input is in 1.15(q15) format */
00195 
00196   /*  start of first stage process */
00197   do
00198   {
00199     /*  Butterfly implementation */
00200 
00201     /*  Reading i0, i0+fftLen/2 inputs */
00202     /* Read ya (real), xa(imag) input */
00203     T = _SIMD32_OFFSET(pSi0);
00204     T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
00205     T = __SHADD16(T, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles
00206     //in = ((int16_t) (T & 0xFFFF)) >> 2;       // alternative code that takes 3 cycles
00207     //T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
00208 
00209     /* Read yc (real), xc(imag) input */
00210     S = _SIMD32_OFFSET(pSi2);
00211     S = __SHADD16(S, 0);
00212     S = __SHADD16(S, 0);
00213 
00214     /* R = packed((ya + yc), (xa + xc) ) */
00215     R = __QADD16(T, S);
00216 
00217     /* S = packed((ya - yc), (xa - xc) ) */
00218     S = __QSUB16(T, S);
00219 
00220     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00221     /* Read yb (real), xb(imag) input */
00222     T = _SIMD32_OFFSET(pSi1);
00223     T = __SHADD16(T, 0);
00224     T = __SHADD16(T, 0);
00225 
00226     /* Read yd (real), xd(imag) input */
00227     U = _SIMD32_OFFSET(pSi3);
00228     U = __SHADD16(U, 0);
00229     U = __SHADD16(U, 0);
00230 
00231     /* T = packed((yb + yd), (xb + xd) ) */
00232     T = __QADD16(T, U);
00233 
00234     /*  writing the butterfly processed i0 sample */
00235     /* xa' = xa + xb + xc + xd */
00236     /* ya' = ya + yb + yc + yd */
00237     _SIMD32_OFFSET(pSi0) = __SHADD16(R, T);
00238     pSi0 += 2;
00239 
00240     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
00241     R = __QSUB16(R, T);
00242 
00243     /* co2 & si2 are read from SIMD Coefficient pointer */
00244     C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic));
00245 
00246 #ifndef ARM_MATH_BIG_ENDIAN
00247 
00248     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00249     out1 = __SMUAD(C2, R) >> 16U;
00250     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00251     out2 = __SMUSDX(C2, R);
00252 
00253 #else
00254 
00255     /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00256     out1 = __SMUSDX(R, C2) >> 16U;
00257     /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00258     out2 = __SMUAD(C2, R);
00259 
00260 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00261 
00262     /*  Reading i0+fftLen/4 */
00263     /* T = packed(yb, xb) */
00264     T = _SIMD32_OFFSET(pSi1);
00265     T = __SHADD16(T, 0);
00266     T = __SHADD16(T, 0);
00267 
00268     /* writing the butterfly processed i0 + fftLen/4 sample */
00269     /* writing output(xc', yc') in little endian format */
00270     _SIMD32_OFFSET(pSi1) =
00271       (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00272     pSi1 += 2;
00273 
00274     /*  Butterfly calculations */
00275     /* U = packed(yd, xd) */
00276     U = _SIMD32_OFFSET(pSi3);
00277     U = __SHADD16(U, 0);
00278     U = __SHADD16(U, 0);
00279 
00280     /* T = packed(yb-yd, xb-xd) */
00281     T = __QSUB16(T, U);
00282 
00283 #ifndef ARM_MATH_BIG_ENDIAN
00284 
00285     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00286     R = __QASX(S, T);
00287     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00288     S = __QSAX(S, T);
00289 
00290 #else
00291 
00292     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00293     R = __QSAX(S, T);
00294     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00295     S = __QASX(S, T);
00296 
00297 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00298 
00299     /* co1 & si1 are read from SIMD Coefficient pointer */
00300     C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic));
00301     /*  Butterfly process for the i0+fftLen/2 sample */
00302 
00303 #ifndef ARM_MATH_BIG_ENDIAN
00304 
00305     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00306     out1 = __SMUAD(C1, S) >> 16U;
00307     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00308     out2 = __SMUSDX(C1, S);
00309 
00310 #else
00311 
00312     /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00313     out1 = __SMUSDX(S, C1) >> 16U;
00314     /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00315     out2 = __SMUAD(C1, S);
00316 
00317 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00318 
00319     /* writing output(xb', yb') in little endian format */
00320     _SIMD32_OFFSET(pSi2) =
00321       ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
00322     pSi2 += 2;
00323 
00324 
00325     /* co3 & si3 are read from SIMD Coefficient pointer */
00326     C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic));
00327     /*  Butterfly process for the i0+3fftLen/4 sample */
00328 
00329 #ifndef ARM_MATH_BIG_ENDIAN
00330 
00331     /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00332     out1 = __SMUAD(C3, R) >> 16U;
00333     /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00334     out2 = __SMUSDX(C3, R);
00335 
00336 #else
00337 
00338     /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00339     out1 = __SMUSDX(R, C3) >> 16U;
00340     /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00341     out2 = __SMUAD(C3, R);
00342 
00343 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00344 
00345     /* writing output(xd', yd') in little endian format */
00346     _SIMD32_OFFSET(pSi3) =
00347       ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00348     pSi3 += 2;
00349 
00350     /*  Twiddle coefficients index modifier */
00351     ic = ic + twidCoefModifier;
00352 
00353   } while (--j);
00354   /* data is in 4.11(q11) format */
00355 
00356   /* end of first stage process */
00357 
00358 
00359   /* start of middle stage process */
00360 
00361   /*  Twiddle coefficients index modifier */
00362   twidCoefModifier <<= 2U;
00363 
00364   /*  Calculation of Middle stage */
00365   for (k = fftLen / 4U; k > 4U; k >>= 2U)
00366   {
00367     /*  Initializations for the middle stage */
00368     n1 = n2;
00369     n2 >>= 2U;
00370     ic = 0U;
00371 
00372     for (j = 0U; j <= (n2 - 1U); j++)
00373     {
00374       /*  index calculation for the coefficients */
00375       C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic));
00376       C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic));
00377       C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic));
00378 
00379       /*  Twiddle coefficients index modifier */
00380       ic = ic + twidCoefModifier;
00381 
00382       pSi0 = pSrc16 + 2 * j;
00383       pSi1 = pSi0 + 2 * n2;
00384       pSi2 = pSi1 + 2 * n2;
00385       pSi3 = pSi2 + 2 * n2;
00386 
00387       /*  Butterfly implementation */
00388       for (i0 = j; i0 < fftLen; i0 += n1)
00389       {
00390         /*  Reading i0, i0+fftLen/2 inputs */
00391         /* Read ya (real), xa(imag) input */
00392         T = _SIMD32_OFFSET(pSi0);
00393 
00394         /* Read yc (real), xc(imag) input */
00395         S = _SIMD32_OFFSET(pSi2);
00396 
00397         /* R = packed( (ya + yc), (xa + xc)) */
00398         R = __QADD16(T, S);
00399 
00400         /* S = packed((ya - yc), (xa - xc)) */
00401         S = __QSUB16(T, S);
00402 
00403         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00404         /* Read yb (real), xb(imag) input */
00405         T = _SIMD32_OFFSET(pSi1);
00406 
00407         /* Read yd (real), xd(imag) input */
00408         U = _SIMD32_OFFSET(pSi3);
00409 
00410         /* T = packed( (yb + yd), (xb + xd)) */
00411         T = __QADD16(T, U);
00412 
00413         /*  writing the butterfly processed i0 sample */
00414 
00415         /* xa' = xa + xb + xc + xd */
00416         /* ya' = ya + yb + yc + yd */
00417         out1 = __SHADD16(R, T);
00418         out1 = __SHADD16(out1, 0);
00419         _SIMD32_OFFSET(pSi0) = out1;
00420         pSi0 += 2 * n1;
00421 
00422         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
00423         R = __SHSUB16(R, T);
00424 
00425 #ifndef ARM_MATH_BIG_ENDIAN
00426 
00427         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00428         out1 = __SMUAD(C2, R) >> 16U;
00429 
00430         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00431         out2 = __SMUSDX(C2, R);
00432 
00433 #else
00434 
00435         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00436         out1 = __SMUSDX(R, C2) >> 16U;
00437 
00438         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00439         out2 = __SMUAD(C2, R);
00440 
00441 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00442 
00443         /*  Reading i0+3fftLen/4 */
00444         /* Read yb (real), xb(imag) input */
00445         T = _SIMD32_OFFSET(pSi1);
00446 
00447         /*  writing the butterfly processed i0 + fftLen/4 sample */
00448         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00449         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00450         _SIMD32_OFFSET(pSi1) =
00451           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00452         pSi1 += 2 * n1;
00453 
00454         /*  Butterfly calculations */
00455 
00456         /* Read yd (real), xd(imag) input */
00457         U = _SIMD32_OFFSET(pSi3);
00458 
00459         /* T = packed(yb-yd, xb-xd) */
00460         T = __QSUB16(T, U);
00461 
00462 #ifndef ARM_MATH_BIG_ENDIAN
00463 
00464         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00465         R = __SHASX(S, T);
00466 
00467         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00468         S = __SHSAX(S, T);
00469 
00470 
00471         /*  Butterfly process for the i0+fftLen/2 sample */
00472         out1 = __SMUAD(C1, S) >> 16U;
00473         out2 = __SMUSDX(C1, S);
00474 
00475 #else
00476 
00477         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
00478         R = __SHSAX(S, T);
00479 
00480         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
00481         S = __SHASX(S, T);
00482 
00483 
00484         /*  Butterfly process for the i0+fftLen/2 sample */
00485         out1 = __SMUSDX(S, C1) >> 16U;
00486         out2 = __SMUAD(C1, S);
00487 
00488 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00489 
00490         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00491         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00492         _SIMD32_OFFSET(pSi2) =
00493           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00494         pSi2 += 2 * n1;
00495 
00496         /*  Butterfly process for the i0+3fftLen/4 sample */
00497 
00498 #ifndef ARM_MATH_BIG_ENDIAN
00499 
00500         out1 = __SMUAD(C3, R) >> 16U;
00501         out2 = __SMUSDX(C3, R);
00502 
00503 #else
00504 
00505         out1 = __SMUSDX(R, C3) >> 16U;
00506         out2 = __SMUAD(C3, R);
00507 
00508 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00509 
00510         /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
00511         /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
00512         _SIMD32_OFFSET(pSi3) =
00513           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
00514         pSi3 += 2 * n1;
00515       }
00516     }
00517     /*  Twiddle coefficients index modifier */
00518     twidCoefModifier <<= 2U;
00519   }
00520   /* end of middle stage process */
00521 
00522 
00523   /* data is in 10.6(q6) format for the 1024 point */
00524   /* data is in 8.8(q8) format for the 256 point */
00525   /* data is in 6.10(q10) format for the 64 point */
00526   /* data is in 4.12(q12) format for the 16 point */
00527 
00528   /*  Initializations for the last stage */
00529   j = fftLen >> 2;
00530 
00531   ptr1 = &pSrc16[0];
00532 
00533   /* start of last stage process */
00534 
00535   /*  Butterfly implementation */
00536   do
00537   {
00538     /* Read xa (real), ya(imag) input */
00539     xaya = *__SIMD32(ptr1)++;
00540 
00541     /* Read xb (real), yb(imag) input */
00542     xbyb = *__SIMD32(ptr1)++;
00543 
00544     /* Read xc (real), yc(imag) input */
00545     xcyc = *__SIMD32(ptr1)++;
00546 
00547     /* Read xd (real), yd(imag) input */
00548     xdyd = *__SIMD32(ptr1)++;
00549 
00550     /* R = packed((ya + yc), (xa + xc)) */
00551     R = __QADD16(xaya, xcyc);
00552 
00553     /* T = packed((yb + yd), (xb + xd)) */
00554     T = __QADD16(xbyb, xdyd);
00555 
00556     /* pointer updation for writing */
00557     ptr1 = ptr1 - 8U;
00558 
00559 
00560     /* xa' = xa + xb + xc + xd */
00561     /* ya' = ya + yb + yc + yd */
00562     *__SIMD32(ptr1)++ = __SHADD16(R, T);
00563 
00564     /* T = packed((yb + yd), (xb + xd)) */
00565     T = __QADD16(xbyb, xdyd);
00566 
00567     /* xc' = (xa-xb+xc-xd) */
00568     /* yc' = (ya-yb+yc-yd) */
00569     *__SIMD32(ptr1)++ = __SHSUB16(R, T);
00570 
00571     /* S = packed((ya - yc), (xa - xc)) */
00572     S = __QSUB16(xaya, xcyc);
00573 
00574     /* Read yd (real), xd(imag) input */
00575     /* T = packed( (yb - yd), (xb - xd))  */
00576     U = __QSUB16(xbyb, xdyd);
00577 
00578 #ifndef ARM_MATH_BIG_ENDIAN
00579 
00580     /* xb' = (xa+yb-xc-yd) */
00581     /* yb' = (ya-xb-yc+xd) */
00582     *__SIMD32(ptr1)++ = __SHSAX(S, U);
00583 
00584 
00585     /* xd' = (xa-yb-xc+yd) */
00586     /* yd' = (ya+xb-yc-xd) */
00587     *__SIMD32(ptr1)++ = __SHASX(S, U);
00588 
00589 #else
00590 
00591     /* xb' = (xa+yb-xc-yd) */
00592     /* yb' = (ya-xb-yc+xd) */
00593     *__SIMD32(ptr1)++ = __SHASX(S, U);
00594 
00595 
00596     /* xd' = (xa-yb-xc+yd) */
00597     /* yd' = (ya+xb-yc-xd) */
00598     *__SIMD32(ptr1)++ = __SHSAX(S, U);
00599 
00600 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
00601 
00602   } while (--j);
00603 
00604   /* end of last stage process */
00605 
00606   /* output is in 11.5(q5) format for the 1024 point */
00607   /* output is in 9.7(q7) format for the 256 point   */
00608   /* output is in 7.9(q9) format for the 64 point  */
00609   /* output is in 5.11(q11) format for the 16 point  */
00610 
00611 
00612 #else
00613 
00614   /* Run the below code for Cortex-M0 */
00615 
00616   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
00617   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
00618   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
00619 
00620   /* Total process is divided into three stages */
00621 
00622   /* process first stage, middle stages, & last stage */
00623 
00624   /*  Initializations for the first stage */
00625   n2 = fftLen;
00626   n1 = n2;
00627 
00628   /* n2 = fftLen/4 */
00629   n2 >>= 2U;
00630 
00631   /* Index for twiddle coefficient */
00632   ic = 0U;
00633 
00634   /* Index for input read and output write */
00635   i0 = 0U;
00636   j = n2;
00637 
00638   /* Input is in 1.15(q15) format */
00639 
00640   /*  start of first stage process */
00641   do
00642   {
00643     /*  Butterfly implementation */
00644 
00645     /*  index calculation for the input as, */
00646     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00647     i1 = i0 + n2;
00648     i2 = i1 + n2;
00649     i3 = i2 + n2;
00650 
00651     /*  Reading i0, i0+fftLen/2 inputs */
00652 
00653     /* input is down scale by 4 to avoid overflow */
00654     /* Read ya (real), xa(imag) input */
00655     T0 = pSrc16[i0 * 2U] >> 2U;
00656     T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
00657 
00658     /* input is down scale by 4 to avoid overflow */
00659     /* Read yc (real), xc(imag) input */
00660     S0 = pSrc16[i2 * 2U] >> 2U;
00661     S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
00662 
00663     /* R0 = (ya + yc) */
00664     R0 = __SSAT(T0 + S0, 16U);
00665     /* R1 = (xa + xc) */
00666     R1 = __SSAT(T1 + S1, 16U);
00667 
00668     /* S0 = (ya - yc) */
00669     S0 = __SSAT(T0 - S0, 16);
00670     /* S1 = (xa - xc) */
00671     S1 = __SSAT(T1 - S1, 16);
00672 
00673     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00674     /* input is down scale by 4 to avoid overflow */
00675     /* Read yb (real), xb(imag) input */
00676     T0 = pSrc16[i1 * 2U] >> 2U;
00677     T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
00678 
00679     /* input is down scale by 4 to avoid overflow */
00680     /* Read yd (real), xd(imag) input */
00681     U0 = pSrc16[i3 * 2U] >> 2U;
00682     U1 = pSrc16[(i3 * 2U) + 1] >> 2U;
00683 
00684     /* T0 = (yb + yd) */
00685     T0 = __SSAT(T0 + U0, 16U);
00686     /* T1 = (xb + xd) */
00687     T1 = __SSAT(T1 + U1, 16U);
00688 
00689     /*  writing the butterfly processed i0 sample */
00690     /* ya' = ya + yb + yc + yd */
00691     /* xa' = xa + xb + xc + xd */
00692     pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
00693     pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
00694 
00695     /* R0 = (ya + yc) - (yb + yd) */
00696     /* R1 = (xa + xc) - (xb + xd) */
00697     R0 = __SSAT(R0 - T0, 16U);
00698     R1 = __SSAT(R1 - T1, 16U);
00699 
00700     /* co2 & si2 are read from Coefficient pointer */
00701     Co2 = pCoef16[2U * ic * 2U];
00702     Si2 = pCoef16[(2U * ic * 2U) + 1];
00703 
00704     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00705     out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
00706     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00707     out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
00708 
00709     /*  Reading i0+fftLen/4 */
00710     /* input is down scale by 4 to avoid overflow */
00711     /* T0 = yb, T1 =  xb */
00712     T0 = pSrc16[i1 * 2U] >> 2;
00713     T1 = pSrc16[(i1 * 2U) + 1] >> 2;
00714 
00715     /* writing the butterfly processed i0 + fftLen/4 sample */
00716     /* writing output(xc', yc') in little endian format */
00717     pSrc16[i1 * 2U] = out1;
00718     pSrc16[(i1 * 2U) + 1] = out2;
00719 
00720     /*  Butterfly calculations */
00721     /* input is down scale by 4 to avoid overflow */
00722     /* U0 = yd, U1 = xd */
00723     U0 = pSrc16[i3 * 2U] >> 2;
00724     U1 = pSrc16[(i3 * 2U) + 1] >> 2;
00725     /* T0 = yb-yd */
00726     T0 = __SSAT(T0 - U0, 16);
00727     /* T1 = xb-xd */
00728     T1 = __SSAT(T1 - U1, 16);
00729 
00730     /* R1 = (ya-yc) + (xb- xd),  R0 = (xa-xc) - (yb-yd)) */
00731     R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
00732     R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
00733 
00734     /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
00735     S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U);
00736     S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U);
00737 
00738     /* co1 & si1 are read from Coefficient pointer */
00739     Co1 = pCoef16[ic * 2U];
00740     Si1 = pCoef16[(ic * 2U) + 1];
00741     /*  Butterfly process for the i0+fftLen/2 sample */
00742     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00743     out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
00744     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00745     out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
00746 
00747     /* writing output(xb', yb') in little endian format */
00748     pSrc16[i2 * 2U] = out1;
00749     pSrc16[(i2 * 2U) + 1] = out2;
00750 
00751     /* Co3 & si3 are read from Coefficient pointer */
00752     Co3 = pCoef16[3U * (ic * 2U)];
00753     Si3 = pCoef16[(3U * (ic * 2U)) + 1];
00754     /*  Butterfly process for the i0+3fftLen/4 sample */
00755     /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
00756     out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
00757     /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
00758     out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
00759     /* writing output(xd', yd') in little endian format */
00760     pSrc16[i3 * 2U] = out1;
00761     pSrc16[(i3 * 2U) + 1] = out2;
00762 
00763     /*  Twiddle coefficients index modifier */
00764     ic = ic + twidCoefModifier;
00765 
00766     /*  Updating input index */
00767     i0 = i0 + 1U;
00768 
00769   } while (--j);
00770   /* data is in 4.11(q11) format */
00771 
00772   /* end of first stage process */
00773 
00774 
00775   /* start of middle stage process */
00776 
00777   /*  Twiddle coefficients index modifier */
00778   twidCoefModifier <<= 2U;
00779 
00780   /*  Calculation of Middle stage */
00781   for (k = fftLen / 4U; k > 4U; k >>= 2U)
00782   {
00783     /*  Initializations for the middle stage */
00784     n1 = n2;
00785     n2 >>= 2U;
00786     ic = 0U;
00787 
00788     for (j = 0U; j <= (n2 - 1U); j++)
00789     {
00790       /*  index calculation for the coefficients */
00791       Co1 = pCoef16[ic * 2U];
00792       Si1 = pCoef16[(ic * 2U) + 1U];
00793       Co2 = pCoef16[2U * (ic * 2U)];
00794       Si2 = pCoef16[(2U * (ic * 2U)) + 1U];
00795       Co3 = pCoef16[3U * (ic * 2U)];
00796       Si3 = pCoef16[(3U * (ic * 2U)) + 1U];
00797 
00798       /*  Twiddle coefficients index modifier */
00799       ic = ic + twidCoefModifier;
00800 
00801       /*  Butterfly implementation */
00802       for (i0 = j; i0 < fftLen; i0 += n1)
00803       {
00804         /*  index calculation for the input as, */
00805         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00806         i1 = i0 + n2;
00807         i2 = i1 + n2;
00808         i3 = i2 + n2;
00809 
00810         /*  Reading i0, i0+fftLen/2 inputs */
00811         /* Read ya (real), xa(imag) input */
00812         T0 = pSrc16[i0 * 2U];
00813         T1 = pSrc16[(i0 * 2U) + 1U];
00814 
00815         /* Read yc (real), xc(imag) input */
00816         S0 = pSrc16[i2 * 2U];
00817         S1 = pSrc16[(i2 * 2U) + 1U];
00818 
00819         /* R0 = (ya + yc), R1 = (xa + xc) */
00820         R0 = __SSAT(T0 + S0, 16);
00821         R1 = __SSAT(T1 + S1, 16);
00822 
00823         /* S0 = (ya - yc), S1 =(xa - xc) */
00824         S0 = __SSAT(T0 - S0, 16);
00825         S1 = __SSAT(T1 - S1, 16);
00826 
00827         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00828         /* Read yb (real), xb(imag) input */
00829         T0 = pSrc16[i1 * 2U];
00830         T1 = pSrc16[(i1 * 2U) + 1U];
00831 
00832         /* Read yd (real), xd(imag) input */
00833         U0 = pSrc16[i3 * 2U];
00834         U1 = pSrc16[(i3 * 2U) + 1U];
00835 
00836 
00837         /* T0 = (yb + yd), T1 = (xb + xd) */
00838         T0 = __SSAT(T0 + U0, 16);
00839         T1 = __SSAT(T1 + U1, 16);
00840 
00841         /*  writing the butterfly processed i0 sample */
00842 
00843         /* xa' = xa + xb + xc + xd */
00844         /* ya' = ya + yb + yc + yd */
00845         out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
00846         out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
00847 
00848         pSrc16[i0 * 2U] = out1;
00849         pSrc16[(2U * i0) + 1U] = out2;
00850 
00851         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
00852         R0 = (R0 >> 1U) - (T0 >> 1U);
00853         R1 = (R1 >> 1U) - (T1 >> 1U);
00854 
00855         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
00856         out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
00857 
00858         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00859         out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
00860 
00861         /*  Reading i0+3fftLen/4 */
00862         /* Read yb (real), xb(imag) input */
00863         T0 = pSrc16[i1 * 2U];
00864         T1 = pSrc16[(i1 * 2U) + 1U];
00865 
00866         /*  writing the butterfly processed i0 + fftLen/4 sample */
00867         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
00868         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
00869         pSrc16[i1 * 2U] = out1;
00870         pSrc16[(i1 * 2U) + 1U] = out2;
00871 
00872         /*  Butterfly calculations */
00873 
00874         /* Read yd (real), xd(imag) input */
00875         U0 = pSrc16[i3 * 2U];
00876         U1 = pSrc16[(i3 * 2U) + 1U];
00877 
00878         /* T0 = yb-yd, T1 = xb-xd */
00879         T0 = __SSAT(T0 - U0, 16);
00880         T1 = __SSAT(T1 - U1, 16);
00881 
00882         /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
00883         R0 = (S0 >> 1U) - (T1 >> 1U);
00884         R1 = (S1 >> 1U) + (T0 >> 1U);
00885 
00886         /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
00887         S0 = (S0 >> 1U) + (T1 >> 1U);
00888         S1 = (S1 >> 1U) - (T0 >> 1U);
00889 
00890         /*  Butterfly process for the i0+fftLen/2 sample */
00891         out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U);
00892 
00893         out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U);
00894 
00895         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
00896         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
00897         pSrc16[i2 * 2U] = out1;
00898         pSrc16[(i2 * 2U) + 1U] = out2;
00899 
00900         /*  Butterfly process for the i0+3fftLen/4 sample */
00901         out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
00902 
00903         out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
00904         /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
00905         /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
00906         pSrc16[i3 * 2U] = out1;
00907         pSrc16[(i3 * 2U) + 1U] = out2;
00908       }
00909     }
00910     /*  Twiddle coefficients index modifier */
00911     twidCoefModifier <<= 2U;
00912   }
00913   /* end of middle stage process */
00914 
00915 
00916   /* data is in 10.6(q6) format for the 1024 point */
00917   /* data is in 8.8(q8) format for the 256 point */
00918   /* data is in 6.10(q10) format for the 64 point */
00919   /* data is in 4.12(q12) format for the 16 point */
00920 
00921   /*  Initializations for the last stage */
00922   n1 = n2;
00923   n2 >>= 2U;
00924 
00925   /* start of last stage process */
00926 
00927   /*  Butterfly implementation */
00928   for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
00929   {
00930     /*  index calculation for the input as, */
00931     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
00932     i1 = i0 + n2;
00933     i2 = i1 + n2;
00934     i3 = i2 + n2;
00935 
00936     /*  Reading i0, i0+fftLen/2 inputs */
00937     /* Read ya (real), xa(imag) input */
00938     T0 = pSrc16[i0 * 2U];
00939     T1 = pSrc16[(i0 * 2U) + 1U];
00940 
00941     /* Read yc (real), xc(imag) input */
00942     S0 = pSrc16[i2 * 2U];
00943     S1 = pSrc16[(i2 * 2U) + 1U];
00944 
00945     /* R0 = (ya + yc), R1 = (xa + xc) */
00946     R0 = __SSAT(T0 + S0, 16U);
00947     R1 = __SSAT(T1 + S1, 16U);
00948 
00949     /* S0 = (ya - yc), S1 = (xa - xc) */
00950     S0 = __SSAT(T0 - S0, 16U);
00951     S1 = __SSAT(T1 - S1, 16U);
00952 
00953     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
00954     /* Read yb (real), xb(imag) input */
00955     T0 = pSrc16[i1 * 2U];
00956     T1 = pSrc16[(i1 * 2U) + 1U];
00957     /* Read yd (real), xd(imag) input */
00958     U0 = pSrc16[i3 * 2U];
00959     U1 = pSrc16[(i3 * 2U) + 1U];
00960 
00961     /* T0 = (yb + yd), T1 = (xb + xd)) */
00962     T0 = __SSAT(T0 + U0, 16U);
00963     T1 = __SSAT(T1 + U1, 16U);
00964 
00965     /*  writing the butterfly processed i0 sample */
00966     /* xa' = xa + xb + xc + xd */
00967     /* ya' = ya + yb + yc + yd */
00968     pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
00969     pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
00970 
00971     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
00972     R0 = (R0 >> 1U) - (T0 >> 1U);
00973     R1 = (R1 >> 1U) - (T1 >> 1U);
00974     /* Read yb (real), xb(imag) input */
00975     T0 = pSrc16[i1 * 2U];
00976     T1 = pSrc16[(i1 * 2U) + 1U];
00977 
00978     /*  writing the butterfly processed i0 + fftLen/4 sample */
00979     /* xc' = (xa-xb+xc-xd) */
00980     /* yc' = (ya-yb+yc-yd) */
00981     pSrc16[i1 * 2U] = R0;
00982     pSrc16[(i1 * 2U) + 1U] = R1;
00983 
00984     /* Read yd (real), xd(imag) input */
00985     U0 = pSrc16[i3 * 2U];
00986     U1 = pSrc16[(i3 * 2U) + 1U];
00987     /* T0 = (yb - yd), T1 = (xb - xd)  */
00988     T0 = __SSAT(T0 - U0, 16U);
00989     T1 = __SSAT(T1 - U1, 16U);
00990 
00991     /*  writing the butterfly processed i0 + fftLen/2 sample */
00992     /* xb' = (xa+yb-xc-yd) */
00993     /* yb' = (ya-xb-yc+xd) */
00994     pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U);
00995     pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
00996 
00997     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
00998     /* xd' = (xa-yb-xc+yd) */
00999     /* yd' = (ya+xb-yc-xd) */
01000     pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U);
01001     pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
01002 
01003   }
01004 
01005   /* end of last stage process */
01006 
01007   /* output is in 11.5(q5) format for the 1024 point */
01008   /* output is in 9.7(q7) format for the 256 point   */
01009   /* output is in 7.9(q9) format for the 64 point  */
01010   /* output is in 5.11(q11) format for the 16 point  */
01011 
01012 #endif /* #if defined (ARM_MATH_DSP) */
01013 
01014 }
01015 
01016 
01017 /**
01018  * @brief  Core function for the Q15 CIFFT butterfly process.
01019  * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
01020  * @param[in]      fftLen           length of the FFT.
01021  * @param[in]      *pCoef16         points to twiddle coefficient buffer.
01022  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
01023  * @return none.
01024  */
01025 
01026 /*
01027 * Radix-4 IFFT algorithm used is :
01028 *
01029 * CIFFT uses same twiddle coefficients as CFFT function
01030 *  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
01031 *
01032 *
01033 * IFFT is implemented with following changes in equations from FFT
01034 *
01035 * Input real and imaginary data:
01036 * x(n) = xa + j * ya
01037 * x(n+N/4 ) = xb + j * yb
01038 * x(n+N/2 ) = xc + j * yc
01039 * x(n+3N 4) = xd + j * yd
01040 *
01041 *
01042 * Output real and imaginary data:
01043 * x(4r) = xa'+ j * ya'
01044 * x(4r+1) = xb'+ j * yb'
01045 * x(4r+2) = xc'+ j * yc'
01046 * x(4r+3) = xd'+ j * yd'
01047 *
01048 *
01049 * Twiddle factors for radix-4 IFFT:
01050 * Wn = co1 + j * (si1)
01051 * W2n = co2 + j * (si2)
01052 * W3n = co3 + j * (si3)
01053 
01054 * The real and imaginary output values for the radix-4 butterfly are
01055 * xa' = xa + xb + xc + xd
01056 * ya' = ya + yb + yc + yd
01057 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
01058 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
01059 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
01060 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
01061 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
01062 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
01063 *
01064 */
01065 
01066 void arm_radix4_butterfly_inverse_q15(
01067   q15_t * pSrc16,
01068   uint32_t fftLen,
01069   q15_t * pCoef16,
01070   uint32_t twidCoefModifier)
01071 {
01072 
01073 #if defined (ARM_MATH_DSP)
01074 
01075   /* Run the below code for Cortex-M4 and Cortex-M3 */
01076 
01077   q31_t R, S, T, U;
01078   q31_t C1, C2, C3, out1, out2;
01079   uint32_t n1, n2, ic, i0, j, k;
01080 
01081   q15_t *ptr1;
01082   q15_t *pSi0;
01083   q15_t *pSi1;
01084   q15_t *pSi2;
01085   q15_t *pSi3;
01086 
01087   q31_t xaya, xbyb, xcyc, xdyd;
01088 
01089   /* Total process is divided into three stages */
01090 
01091   /* process first stage, middle stages, & last stage */
01092 
01093   /*  Initializations for the first stage */
01094   n2 = fftLen;
01095   n1 = n2;
01096 
01097   /* n2 = fftLen/4 */
01098   n2 >>= 2U;
01099 
01100   /* Index for twiddle coefficient */
01101   ic = 0U;
01102 
01103   /* Index for input read and output write */
01104   j = n2;
01105 
01106   pSi0 = pSrc16;
01107   pSi1 = pSi0 + 2 * n2;
01108   pSi2 = pSi1 + 2 * n2;
01109   pSi3 = pSi2 + 2 * n2;
01110 
01111   /* Input is in 1.15(q15) format */
01112 
01113   /*  start of first stage process */
01114   do
01115   {
01116     /*  Butterfly implementation */
01117 
01118     /*  Reading i0, i0+fftLen/2 inputs */
01119     /* Read ya (real), xa(imag) input */
01120     T = _SIMD32_OFFSET(pSi0);
01121     T = __SHADD16(T, 0);
01122     T = __SHADD16(T, 0);
01123 
01124     /* Read yc (real), xc(imag) input */
01125     S = _SIMD32_OFFSET(pSi2);
01126     S = __SHADD16(S, 0);
01127     S = __SHADD16(S, 0);
01128 
01129     /* R = packed((ya + yc), (xa + xc) ) */
01130     R = __QADD16(T, S);
01131 
01132     /* S = packed((ya - yc), (xa - xc) ) */
01133     S = __QSUB16(T, S);
01134 
01135     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01136     /* Read yb (real), xb(imag) input */
01137     T = _SIMD32_OFFSET(pSi1);
01138     T = __SHADD16(T, 0);
01139     T = __SHADD16(T, 0);
01140 
01141     /* Read yd (real), xd(imag) input */
01142     U = _SIMD32_OFFSET(pSi3);
01143     U = __SHADD16(U, 0);
01144     U = __SHADD16(U, 0);
01145 
01146     /* T = packed((yb + yd), (xb + xd) ) */
01147     T = __QADD16(T, U);
01148 
01149     /*  writing the butterfly processed i0 sample */
01150     /* xa' = xa + xb + xc + xd */
01151     /* ya' = ya + yb + yc + yd */
01152     _SIMD32_OFFSET(pSi0) = __SHADD16(R, T);
01153     pSi0 += 2;
01154 
01155     /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
01156     R = __QSUB16(R, T);
01157 
01158     /* co2 & si2 are read from SIMD Coefficient pointer */
01159     C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic));
01160 
01161 #ifndef ARM_MATH_BIG_ENDIAN
01162 
01163     /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01164     out1 = __SMUSD(C2, R) >> 16U;
01165     /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01166     out2 = __SMUADX(C2, R);
01167 
01168 #else
01169 
01170     /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01171     out1 = __SMUADX(C2, R) >> 16U;
01172     /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01173     out2 = __SMUSD(__QSUB16(0, C2), R);
01174 
01175 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01176 
01177     /*  Reading i0+fftLen/4 */
01178     /* T = packed(yb, xb) */
01179     T = _SIMD32_OFFSET(pSi1);
01180     T = __SHADD16(T, 0);
01181     T = __SHADD16(T, 0);
01182 
01183     /* writing the butterfly processed i0 + fftLen/4 sample */
01184     /* writing output(xc', yc') in little endian format */
01185     _SIMD32_OFFSET(pSi1) =
01186       (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01187     pSi1 += 2;
01188 
01189     /*  Butterfly calculations */
01190     /* U = packed(yd, xd) */
01191     U = _SIMD32_OFFSET(pSi3);
01192     U = __SHADD16(U, 0);
01193     U = __SHADD16(U, 0);
01194 
01195     /* T = packed(yb-yd, xb-xd) */
01196     T = __QSUB16(T, U);
01197 
01198 #ifndef ARM_MATH_BIG_ENDIAN
01199 
01200     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01201     R = __QSAX(S, T);
01202     /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */
01203     S = __QASX(S, T);
01204 
01205 #else
01206 
01207     /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01208     R = __QASX(S, T);
01209     /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01210     S = __QSAX(S, T);
01211 
01212 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01213 
01214     /* co1 & si1 are read from SIMD Coefficient pointer */
01215     C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic));
01216     /*  Butterfly process for the i0+fftLen/2 sample */
01217 
01218 #ifndef ARM_MATH_BIG_ENDIAN
01219 
01220     /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01221     out1 = __SMUSD(C1, S) >> 16U;
01222     /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01223     out2 = __SMUADX(C1, S);
01224 
01225 #else
01226 
01227     /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01228     out1 = __SMUADX(C1, S) >> 16U;
01229     /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01230     out2 = __SMUSD(__QSUB16(0, C1), S);
01231 
01232 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01233 
01234     /* writing output(xb', yb') in little endian format */
01235     _SIMD32_OFFSET(pSi2) =
01236       ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
01237     pSi2 += 2;
01238 
01239 
01240     /* co3 & si3 are read from SIMD Coefficient pointer */
01241     C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic));
01242     /*  Butterfly process for the i0+3fftLen/4 sample */
01243 
01244 #ifndef ARM_MATH_BIG_ENDIAN
01245 
01246     /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01247     out1 = __SMUSD(C3, R) >> 16U;
01248     /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01249     out2 = __SMUADX(C3, R);
01250 
01251 #else
01252 
01253     /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01254     out1 = __SMUADX(C3, R) >> 16U;
01255     /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01256     out2 = __SMUSD(__QSUB16(0, C3), R);
01257 
01258 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01259 
01260     /* writing output(xd', yd') in little endian format */
01261     _SIMD32_OFFSET(pSi3) =
01262       ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01263     pSi3 += 2;
01264 
01265     /*  Twiddle coefficients index modifier */
01266     ic = ic + twidCoefModifier;
01267 
01268   } while (--j);
01269   /* data is in 4.11(q11) format */
01270 
01271   /* end of first stage process */
01272 
01273 
01274   /* start of middle stage process */
01275 
01276   /*  Twiddle coefficients index modifier */
01277   twidCoefModifier <<= 2U;
01278 
01279   /*  Calculation of Middle stage */
01280   for (k = fftLen / 4U; k > 4U; k >>= 2U)
01281   {
01282     /*  Initializations for the middle stage */
01283     n1 = n2;
01284     n2 >>= 2U;
01285     ic = 0U;
01286 
01287     for (j = 0U; j <= (n2 - 1U); j++)
01288     {
01289       /*  index calculation for the coefficients */
01290       C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic));
01291       C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic));
01292       C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic));
01293 
01294       /*  Twiddle coefficients index modifier */
01295       ic = ic + twidCoefModifier;
01296 
01297       pSi0 = pSrc16 + 2 * j;
01298       pSi1 = pSi0 + 2 * n2;
01299       pSi2 = pSi1 + 2 * n2;
01300       pSi3 = pSi2 + 2 * n2;
01301 
01302       /*  Butterfly implementation */
01303       for (i0 = j; i0 < fftLen; i0 += n1)
01304       {
01305         /*  Reading i0, i0+fftLen/2 inputs */
01306         /* Read ya (real), xa(imag) input */
01307         T = _SIMD32_OFFSET(pSi0);
01308 
01309         /* Read yc (real), xc(imag) input */
01310         S = _SIMD32_OFFSET(pSi2);
01311 
01312         /* R = packed( (ya + yc), (xa + xc)) */
01313         R = __QADD16(T, S);
01314 
01315         /* S = packed((ya - yc), (xa - xc)) */
01316         S = __QSUB16(T, S);
01317 
01318         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01319         /* Read yb (real), xb(imag) input */
01320         T = _SIMD32_OFFSET(pSi1);
01321 
01322         /* Read yd (real), xd(imag) input */
01323         U = _SIMD32_OFFSET(pSi3);
01324 
01325         /* T = packed( (yb + yd), (xb + xd)) */
01326         T = __QADD16(T, U);
01327 
01328         /*  writing the butterfly processed i0 sample */
01329 
01330         /* xa' = xa + xb + xc + xd */
01331         /* ya' = ya + yb + yc + yd */
01332         out1 = __SHADD16(R, T);
01333         out1 = __SHADD16(out1, 0);
01334         _SIMD32_OFFSET(pSi0) = out1;
01335         pSi0 += 2 * n1;
01336 
01337         /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
01338         R = __SHSUB16(R, T);
01339 
01340 #ifndef ARM_MATH_BIG_ENDIAN
01341 
01342         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
01343         out1 = __SMUSD(C2, R) >> 16U;
01344 
01345         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01346         out2 = __SMUADX(C2, R);
01347 
01348 #else
01349 
01350         /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01351         out1 = __SMUADX(R, C2) >> 16U;
01352 
01353         /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
01354         out2 = __SMUSD(__QSUB16(0, C2), R);
01355 
01356 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01357 
01358         /*  Reading i0+3fftLen/4 */
01359         /* Read yb (real), xb(imag) input */
01360         T = _SIMD32_OFFSET(pSi1);
01361 
01362         /*  writing the butterfly processed i0 + fftLen/4 sample */
01363         /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
01364         /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
01365         _SIMD32_OFFSET(pSi1) =
01366           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01367         pSi1 += 2 * n1;
01368 
01369         /*  Butterfly calculations */
01370 
01371         /* Read yd (real), xd(imag) input */
01372         U = _SIMD32_OFFSET(pSi3);
01373 
01374         /* T = packed(yb-yd, xb-xd) */
01375         T = __QSUB16(T, U);
01376 
01377 #ifndef ARM_MATH_BIG_ENDIAN
01378 
01379         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01380         R = __SHSAX(S, T);
01381 
01382         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01383         S = __SHASX(S, T);
01384 
01385 
01386         /*  Butterfly process for the i0+fftLen/2 sample */
01387         out1 = __SMUSD(C1, S) >> 16U;
01388         out2 = __SMUADX(C1, S);
01389 
01390 #else
01391 
01392         /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
01393         R = __SHASX(S, T);
01394 
01395         /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */
01396         S = __SHSAX(S, T);
01397 
01398 
01399         /*  Butterfly process for the i0+fftLen/2 sample */
01400         out1 = __SMUADX(S, C1) >> 16U;
01401         out2 = __SMUSD(__QSUB16(0, C1), S);
01402 
01403 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01404 
01405         /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
01406         /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
01407         _SIMD32_OFFSET(pSi2) =
01408           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01409         pSi2 += 2 * n1;
01410 
01411         /*  Butterfly process for the i0+3fftLen/4 sample */
01412 
01413 #ifndef ARM_MATH_BIG_ENDIAN
01414 
01415         out1 = __SMUSD(C3, R) >> 16U;
01416         out2 = __SMUADX(C3, R);
01417 
01418 #else
01419 
01420         out1 = __SMUADX(C3, R) >> 16U;
01421         out2 = __SMUSD(__QSUB16(0, C3), R);
01422 
01423 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01424 
01425         /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
01426         /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
01427         _SIMD32_OFFSET(pSi3) =
01428           ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
01429         pSi3 += 2 * n1;
01430       }
01431     }
01432     /*  Twiddle coefficients index modifier */
01433     twidCoefModifier <<= 2U;
01434   }
01435   /* end of middle stage process */
01436 
01437   /* data is in 10.6(q6) format for the 1024 point */
01438   /* data is in 8.8(q8) format for the 256 point */
01439   /* data is in 6.10(q10) format for the 64 point */
01440   /* data is in 4.12(q12) format for the 16 point */
01441 
01442   /*  Initializations for the last stage */
01443   j = fftLen >> 2;
01444 
01445   ptr1 = &pSrc16[0];
01446 
01447   /* start of last stage process */
01448 
01449   /*  Butterfly implementation */
01450   do
01451   {
01452     /* Read xa (real), ya(imag) input */
01453     xaya = *__SIMD32(ptr1)++;
01454 
01455     /* Read xb (real), yb(imag) input */
01456     xbyb = *__SIMD32(ptr1)++;
01457 
01458     /* Read xc (real), yc(imag) input */
01459     xcyc = *__SIMD32(ptr1)++;
01460 
01461     /* Read xd (real), yd(imag) input */
01462     xdyd = *__SIMD32(ptr1)++;
01463 
01464     /* R = packed((ya + yc), (xa + xc)) */
01465     R = __QADD16(xaya, xcyc);
01466 
01467     /* T = packed((yb + yd), (xb + xd)) */
01468     T = __QADD16(xbyb, xdyd);
01469 
01470     /* pointer updation for writing */
01471     ptr1 = ptr1 - 8U;
01472 
01473 
01474     /* xa' = xa + xb + xc + xd */
01475     /* ya' = ya + yb + yc + yd */
01476     *__SIMD32(ptr1)++ = __SHADD16(R, T);
01477 
01478     /* T = packed((yb + yd), (xb + xd)) */
01479     T = __QADD16(xbyb, xdyd);
01480 
01481     /* xc' = (xa-xb+xc-xd) */
01482     /* yc' = (ya-yb+yc-yd) */
01483     *__SIMD32(ptr1)++ = __SHSUB16(R, T);
01484 
01485     /* S = packed((ya - yc), (xa - xc)) */
01486     S = __QSUB16(xaya, xcyc);
01487 
01488     /* Read yd (real), xd(imag) input */
01489     /* T = packed( (yb - yd), (xb - xd))  */
01490     U = __QSUB16(xbyb, xdyd);
01491 
01492 #ifndef ARM_MATH_BIG_ENDIAN
01493 
01494     /* xb' = (xa+yb-xc-yd) */
01495     /* yb' = (ya-xb-yc+xd) */
01496     *__SIMD32(ptr1)++ = __SHASX(S, U);
01497 
01498 
01499     /* xd' = (xa-yb-xc+yd) */
01500     /* yd' = (ya+xb-yc-xd) */
01501     *__SIMD32(ptr1)++ = __SHSAX(S, U);
01502 
01503 #else
01504 
01505     /* xb' = (xa+yb-xc-yd) */
01506     /* yb' = (ya-xb-yc+xd) */
01507     *__SIMD32(ptr1)++ = __SHSAX(S, U);
01508 
01509 
01510     /* xd' = (xa-yb-xc+yd) */
01511     /* yd' = (ya+xb-yc-xd) */
01512     *__SIMD32(ptr1)++ = __SHASX(S, U);
01513 
01514 
01515 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
01516 
01517   } while (--j);
01518 
01519   /* end of last stage  process */
01520 
01521   /* output is in 11.5(q5) format for the 1024 point */
01522   /* output is in 9.7(q7) format for the 256 point   */
01523   /* output is in 7.9(q9) format for the 64 point  */
01524   /* output is in 5.11(q11) format for the 16 point  */
01525 
01526 
01527 #else
01528 
01529   /* Run the below code for Cortex-M0 */
01530 
01531   q15_t R0, R1, S0, S1, T0, T1, U0, U1;
01532   q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
01533   uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
01534 
01535   /* Total process is divided into three stages */
01536 
01537   /* process first stage, middle stages, & last stage */
01538 
01539   /*  Initializations for the first stage */
01540   n2 = fftLen;
01541   n1 = n2;
01542 
01543   /* n2 = fftLen/4 */
01544   n2 >>= 2U;
01545 
01546   /* Index for twiddle coefficient */
01547   ic = 0U;
01548 
01549   /* Index for input read and output write */
01550   i0 = 0U;
01551 
01552   j = n2;
01553 
01554   /* Input is in 1.15(q15) format */
01555 
01556   /*  Start of first stage process */
01557   do
01558   {
01559     /*  Butterfly implementation */
01560 
01561     /*  index calculation for the input as, */
01562     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01563     i1 = i0 + n2;
01564     i2 = i1 + n2;
01565     i3 = i2 + n2;
01566 
01567     /*  Reading i0, i0+fftLen/2 inputs */
01568     /* input is down scale by 4 to avoid overflow */
01569     /* Read ya (real), xa(imag) input */
01570     T0 = pSrc16[i0 * 2U] >> 2U;
01571     T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
01572     /* input is down scale by 4 to avoid overflow */
01573     /* Read yc (real), xc(imag) input */
01574     S0 = pSrc16[i2 * 2U] >> 2U;
01575     S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
01576 
01577     /* R0 = (ya + yc), R1 = (xa + xc) */
01578     R0 = __SSAT(T0 + S0, 16U);
01579     R1 = __SSAT(T1 + S1, 16U);
01580     /* S0 = (ya - yc), S1 = (xa - xc) */
01581     S0 = __SSAT(T0 - S0, 16U);
01582     S1 = __SSAT(T1 - S1, 16U);
01583 
01584     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01585     /* input is down scale by 4 to avoid overflow */
01586     /* Read yb (real), xb(imag) input */
01587     T0 = pSrc16[i1 * 2U] >> 2U;
01588     T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
01589     /* Read yd (real), xd(imag) input */
01590     /* input is down scale by 4 to avoid overflow */
01591     U0 = pSrc16[i3 * 2U] >> 2U;
01592     U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
01593 
01594     /* T0 = (yb + yd), T1 = (xb + xd) */
01595     T0 = __SSAT(T0 + U0, 16U);
01596     T1 = __SSAT(T1 + U1, 16U);
01597 
01598     /*  writing the butterfly processed i0 sample */
01599     /* xa' = xa + xb + xc + xd */
01600     /* ya' = ya + yb + yc + yd */
01601     pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
01602     pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
01603 
01604     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
01605     R0 = __SSAT(R0 - T0, 16U);
01606     R1 = __SSAT(R1 - T1, 16U);
01607     /* co2 & si2 are read from Coefficient pointer */
01608     Co2 = pCoef16[2U * ic * 2U];
01609     Si2 = pCoef16[(2U * ic * 2U) + 1U];
01610     /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
01611     out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U);
01612     /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01613     out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U);
01614 
01615     /*  Reading i0+fftLen/4 */
01616     /* input is down scale by 4 to avoid overflow */
01617     /* T0 = yb, T1 = xb */
01618     T0 = pSrc16[i1 * 2U] >> 2U;
01619     T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
01620 
01621     /* writing the butterfly processed i0 + fftLen/4 sample */
01622     /* writing output(xc', yc') in little endian format */
01623     pSrc16[i1 * 2U] = out1;
01624     pSrc16[(i1 * 2U) + 1U] = out2;
01625 
01626     /*  Butterfly calculations */
01627     /* input is down scale by 4 to avoid overflow */
01628     /* U0 = yd, U1 = xd) */
01629     U0 = pSrc16[i3 * 2U] >> 2U;
01630     U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
01631 
01632     /* T0 = yb-yd, T1 = xb-xd) */
01633     T0 = __SSAT(T0 - U0, 16U);
01634     T1 = __SSAT(T1 - U1, 16U);
01635     /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
01636     R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
01637     R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
01638     /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
01639     S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
01640     S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
01641 
01642     /* co1 & si1 are read from Coefficient pointer */
01643     Co1 = pCoef16[ic * 2U];
01644     Si1 = pCoef16[(ic * 2U) + 1U];
01645     /*  Butterfly process for the i0+fftLen/2 sample */
01646     /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
01647     out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
01648     /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
01649     out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
01650     /* writing output(xb', yb') in little endian format */
01651     pSrc16[i2 * 2U] = out1;
01652     pSrc16[(i2 * 2U) + 1U] = out2;
01653 
01654     /* Co3 & si3 are read from Coefficient pointer */
01655     Co3 = pCoef16[3U * ic * 2U];
01656     Si3 = pCoef16[(3U * ic * 2U) + 1U];
01657     /*  Butterfly process for the i0+3fftLen/4 sample */
01658     /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
01659     out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
01660     /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
01661     out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
01662     /* writing output(xd', yd') in little endian format */
01663     pSrc16[i3 * 2U] = out1;
01664     pSrc16[(i3 * 2U) + 1U] = out2;
01665 
01666     /*  Twiddle coefficients index modifier */
01667     ic = ic + twidCoefModifier;
01668 
01669     /*  Updating input index */
01670     i0 = i0 + 1U;
01671 
01672   } while (--j);
01673 
01674   /*  End of first stage process */
01675 
01676   /* data is in 4.11(q11) format */
01677 
01678 
01679   /*  Start of Middle stage process */
01680 
01681   /*  Twiddle coefficients index modifier */
01682   twidCoefModifier <<= 2U;
01683 
01684   /*  Calculation of Middle stage */
01685   for (k = fftLen / 4U; k > 4U; k >>= 2U)
01686   {
01687     /*  Initializations for the middle stage */
01688     n1 = n2;
01689     n2 >>= 2U;
01690     ic = 0U;
01691 
01692     for (j = 0U; j <= (n2 - 1U); j++)
01693     {
01694       /*  index calculation for the coefficients */
01695       Co1 = pCoef16[ic * 2U];
01696       Si1 = pCoef16[(ic * 2U) + 1U];
01697       Co2 = pCoef16[2U * ic * 2U];
01698       Si2 = pCoef16[2U * ic * 2U + 1U];
01699       Co3 = pCoef16[3U * ic * 2U];
01700       Si3 = pCoef16[(3U * ic * 2U) + 1U];
01701 
01702       /*  Twiddle coefficients index modifier */
01703       ic = ic + twidCoefModifier;
01704 
01705       /*  Butterfly implementation */
01706       for (i0 = j; i0 < fftLen; i0 += n1)
01707       {
01708         /*  index calculation for the input as, */
01709         /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01710         i1 = i0 + n2;
01711         i2 = i1 + n2;
01712         i3 = i2 + n2;
01713 
01714         /*  Reading i0, i0+fftLen/2 inputs */
01715         /* Read ya (real), xa(imag) input */
01716         T0 = pSrc16[i0 * 2U];
01717         T1 = pSrc16[(i0 * 2U) + 1U];
01718 
01719         /* Read yc (real), xc(imag) input */
01720         S0 = pSrc16[i2 * 2U];
01721         S1 = pSrc16[(i2 * 2U) + 1U];
01722 
01723 
01724         /* R0 = (ya + yc), R1 = (xa + xc) */
01725         R0 = __SSAT(T0 + S0, 16U);
01726         R1 = __SSAT(T1 + S1, 16U);
01727         /* S0 = (ya - yc), S1 = (xa - xc) */
01728         S0 = __SSAT(T0 - S0, 16U);
01729         S1 = __SSAT(T1 - S1, 16U);
01730 
01731         /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01732         /* Read yb (real), xb(imag) input */
01733         T0 = pSrc16[i1 * 2U];
01734         T1 = pSrc16[(i1 * 2U) + 1U];
01735 
01736         /* Read yd (real), xd(imag) input */
01737         U0 = pSrc16[i3 * 2U];
01738         U1 = pSrc16[(i3 * 2U) + 1U];
01739 
01740         /* T0 = (yb + yd), T1 = (xb + xd) */
01741         T0 = __SSAT(T0 + U0, 16U);
01742         T1 = __SSAT(T1 + U1, 16U);
01743 
01744         /*  writing the butterfly processed i0 sample */
01745         /* xa' = xa + xb + xc + xd */
01746         /* ya' = ya + yb + yc + yd */
01747         pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
01748         pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
01749 
01750         /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
01751         R0 = (R0 >> 1U) - (T0 >> 1U);
01752         R1 = (R1 >> 1U) - (T1 >> 1U);
01753 
01754         /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
01755         out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
01756         /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01757         out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
01758 
01759         /*  Reading i0+3fftLen/4 */
01760         /* Read yb (real), xb(imag) input */
01761         T0 = pSrc16[i1 * 2U];
01762         T1 = pSrc16[(i1 * 2U) + 1U];
01763 
01764         /*  writing the butterfly processed i0 + fftLen/4 sample */
01765         /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
01766         /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
01767         pSrc16[i1 * 2U] = out1;
01768         pSrc16[(i1 * 2U) + 1U] = out2;
01769 
01770         /*  Butterfly calculations */
01771         /* Read yd (real), xd(imag) input */
01772         U0 = pSrc16[i3 * 2U];
01773         U1 = pSrc16[(i3 * 2U) + 1U];
01774 
01775         /* T0 = yb-yd, T1 = xb-xd) */
01776         T0 = __SSAT(T0 - U0, 16U);
01777         T1 = __SSAT(T1 - U1, 16U);
01778 
01779         /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
01780         R0 = (S0 >> 1U) + (T1 >> 1U);
01781         R1 = (S1 >> 1U) - (T0 >> 1U);
01782 
01783         /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
01784         S0 = (S0 >> 1U) - (T1 >> 1U);
01785         S1 = (S1 >> 1U) + (T0 >> 1U);
01786 
01787         /*  Butterfly process for the i0+fftLen/2 sample */
01788         out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
01789         out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
01790         /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
01791         /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
01792         pSrc16[i2 * 2U] = out1;
01793         pSrc16[(i2 * 2U) + 1U] = out2;
01794 
01795         /*  Butterfly process for the i0+3fftLen/4 sample */
01796         out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
01797 
01798         out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
01799         /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
01800         /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
01801         pSrc16[i3 * 2U] = out1;
01802         pSrc16[(i3 * 2U) + 1U] = out2;
01803 
01804 
01805       }
01806     }
01807     /*  Twiddle coefficients index modifier */
01808     twidCoefModifier <<= 2U;
01809   }
01810   /*  End of Middle stages process */
01811 
01812 
01813   /* data is in 10.6(q6) format for the 1024 point */
01814   /* data is in 8.8(q8) format for the 256 point   */
01815   /* data is in 6.10(q10) format for the 64 point  */
01816   /* data is in 4.12(q12) format for the 16 point  */
01817 
01818   /* start of last stage process */
01819 
01820 
01821   /*  Initializations for the last stage */
01822   n1 = n2;
01823   n2 >>= 2U;
01824 
01825   /*  Butterfly implementation */
01826   for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
01827   {
01828     /*  index calculation for the input as, */
01829     /*  pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
01830     i1 = i0 + n2;
01831     i2 = i1 + n2;
01832     i3 = i2 + n2;
01833 
01834     /*  Reading i0, i0+fftLen/2 inputs */
01835     /* Read ya (real), xa(imag) input */
01836     T0 = pSrc16[i0 * 2U];
01837     T1 = pSrc16[(i0 * 2U) + 1U];
01838     /* Read yc (real), xc(imag) input */
01839     S0 = pSrc16[i2 * 2U];
01840     S1 = pSrc16[(i2 * 2U) + 1U];
01841 
01842     /* R0 = (ya + yc), R1 = (xa + xc) */
01843     R0 = __SSAT(T0 + S0, 16U);
01844     R1 = __SSAT(T1 + S1, 16U);
01845     /* S0 = (ya - yc), S1 = (xa - xc) */
01846     S0 = __SSAT(T0 - S0, 16U);
01847     S1 = __SSAT(T1 - S1, 16U);
01848 
01849     /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
01850     /* Read yb (real), xb(imag) input */
01851     T0 = pSrc16[i1 * 2U];
01852     T1 = pSrc16[(i1 * 2U) + 1U];
01853     /* Read yd (real), xd(imag) input */
01854     U0 = pSrc16[i3 * 2U];
01855     U1 = pSrc16[(i3 * 2U) + 1U];
01856 
01857     /* T0 = (yb + yd), T1 = (xb + xd) */
01858     T0 = __SSAT(T0 + U0, 16U);
01859     T1 = __SSAT(T1 + U1, 16U);
01860 
01861     /*  writing the butterfly processed i0 sample */
01862     /* xa' = xa + xb + xc + xd */
01863     /* ya' = ya + yb + yc + yd */
01864     pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
01865     pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
01866 
01867     /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
01868     R0 = (R0 >> 1U) - (T0 >> 1U);
01869     R1 = (R1 >> 1U) - (T1 >> 1U);
01870 
01871     /* Read yb (real), xb(imag) input */
01872     T0 = pSrc16[i1 * 2U];
01873     T1 = pSrc16[(i1 * 2U) + 1U];
01874 
01875     /*  writing the butterfly processed i0 + fftLen/4 sample */
01876     /* xc' = (xa-xb+xc-xd) */
01877     /* yc' = (ya-yb+yc-yd) */
01878     pSrc16[i1 * 2U] = R0;
01879     pSrc16[(i1 * 2U) + 1U] = R1;
01880 
01881     /* Read yd (real), xd(imag) input */
01882     U0 = pSrc16[i3 * 2U];
01883     U1 = pSrc16[(i3 * 2U) + 1U];
01884     /* T0 = (yb - yd), T1 = (xb - xd) */
01885     T0 = __SSAT(T0 - U0, 16U);
01886     T1 = __SSAT(T1 - U1, 16U);
01887 
01888     /*  writing the butterfly processed i0 + fftLen/2 sample */
01889     /* xb' = (xa-yb-xc+yd) */
01890     /* yb' = (ya+xb-yc-xd) */
01891     pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U);
01892     pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
01893 
01894 
01895     /*  writing the butterfly processed i0 + 3fftLen/4 sample */
01896     /* xd' = (xa+yb-xc-yd) */
01897     /* yd' = (ya-xb-yc+xd) */
01898     pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U);
01899     pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
01900   }
01901   /* end of last stage  process */
01902 
01903   /* output is in 11.5(q5) format for the 1024 point */
01904   /* output is in 9.7(q7) format for the 256 point   */
01905   /* output is in 7.9(q9) format for the 64 point  */
01906   /* output is in 5.11(q11) format for the 16 point  */
01907 
01908 #endif /* #if defined (ARM_MATH_DSP) */
01909 
01910 }