Robert Lopez / CMSIS5
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cfft_radix4_q31.c Source File

arm_cfft_radix4_q31.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_cfft_radix4_q31.c
00004  * Description:  This file has function definition of Radix-4 FFT & IFFT function and
00005  *               In-place bit reversal using bit reversal table
00006  *
00007  * $Date:        27. January 2017
00008  * $Revision:    V.1.5.1
00009  *
00010  * Target Processor: Cortex-M cores
00011  * -------------------------------------------------------------------- */
00012 /*
00013  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00014  *
00015  * SPDX-License-Identifier: Apache-2.0
00016  *
00017  * Licensed under the Apache License, Version 2.0 (the License); you may
00018  * not use this file except in compliance with the License.
00019  * You may obtain a copy of the License at
00020  *
00021  * www.apache.org/licenses/LICENSE-2.0
00022  *
00023  * Unless required by applicable law or agreed to in writing, software
00024  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00025  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00026  * See the License for the specific language governing permissions and
00027  * limitations under the License.
00028  */
00029 
00030 #include "arm_math.h"
00031 
00032 void arm_radix4_butterfly_inverse_q31(
00033 q31_t * pSrc,
00034 uint32_t fftLen,
00035 q31_t * pCoef,
00036 uint32_t twidCoefModifier);
00037 
00038 void arm_radix4_butterfly_q31(
00039 q31_t * pSrc,
00040 uint32_t fftLen,
00041 q31_t * pCoef,
00042 uint32_t twidCoefModifier);
00043 
00044 void arm_bitreversal_q31(
00045 q31_t * pSrc,
00046 uint32_t fftLen,
00047 uint16_t bitRevFactor,
00048 uint16_t * pBitRevTab);
00049 
00050 /**
00051  * @ingroup groupTransforms
00052  */
00053 
00054 /**
00055  * @addtogroup ComplexFFT
00056  * @{
00057  */
00058 
00059 /**
00060  * @details
00061  * @brief Processing function for the Q31 CFFT/CIFFT.
00062  * @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q31 and will be removed
00063  * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure.
00064  * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
00065  * @return none.
00066  *
00067  * \par Input and output formats:
00068  * \par
00069  * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
00070  * Hence the output format is different for different FFT sizes.
00071  * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
00072  * \par
00073  * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
00074  * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
00075  *
00076  */
00077 
00078 void arm_cfft_radix4_q31(
00079   const arm_cfft_radix4_instance_q31 * S,
00080   q31_t * pSrc)
00081 {
00082   if (S->ifftFlag == 1U)
00083   {
00084     /* Complex IFFT radix-4 */
00085     arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
00086   }
00087   else
00088   {
00089     /* Complex FFT radix-4 */
00090     arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
00091   }
00092 
00093   if (S->bitReverseFlag == 1U)
00094   {
00095     /*  Bit Reversal */
00096     arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
00097   }
00098 
00099 }
00100 
00101 /**
00102  * @} end of ComplexFFT group
00103  */
00104 
00105 /*
00106 * Radix-4 FFT algorithm used is :
00107 *
00108 * Input real and imaginary data:
00109 * x(n) = xa + j * ya
00110 * x(n+N/4 ) = xb + j * yb
00111 * x(n+N/2 ) = xc + j * yc
00112 * x(n+3N 4) = xd + j * yd
00113 *
00114 *
00115 * Output real and imaginary data:
00116 * x(4r) = xa'+ j * ya'
00117 * x(4r+1) = xb'+ j * yb'
00118 * x(4r+2) = xc'+ j * yc'
00119 * x(4r+3) = xd'+ j * yd'
00120 *
00121 *
00122 * Twiddle factors for radix-4 FFT:
00123 * Wn = co1 + j * (- si1)
00124 * W2n = co2 + j * (- si2)
00125 * W3n = co3 + j * (- si3)
00126 *
00127 *  Butterfly implementation:
00128 * xa' = xa + xb + xc + xd
00129 * ya' = ya + yb + yc + yd
00130 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
00131 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
00132 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
00133 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
00134 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
00135 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
00136 *
00137 */
00138 
00139 /**
00140  * @brief  Core function for the Q31 CFFT butterfly process.
00141  * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
00142  * @param[in]      fftLen           length of the FFT.
00143  * @param[in]      *pCoef           points to twiddle coefficient buffer.
00144  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
00145  * @return none.
00146  */
00147 
00148 void arm_radix4_butterfly_q31(
00149   q31_t * pSrc,
00150   uint32_t fftLen,
00151   q31_t * pCoef,
00152   uint32_t twidCoefModifier)
00153 {
00154 #if defined(ARM_MATH_CM7)
00155   uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
00156   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
00157 
00158   q31_t xa, xb, xc, xd;
00159   q31_t ya, yb, yc, yd;
00160   q31_t xa_out, xb_out, xc_out, xd_out;
00161   q31_t ya_out, yb_out, yc_out, yd_out;
00162 
00163   q31_t *ptr1;
00164   q63_t xaya, xbyb, xcyc, xdyd;
00165   /* Total process is divided into three stages */
00166 
00167   /* process first stage, middle stages, & last stage */
00168 
00169 
00170   /* start of first stage process */
00171 
00172   /*  Initializations for the first stage */
00173   n2 = fftLen;
00174   n1 = n2;
00175   /* n2 = fftLen/4 */
00176   n2 >>= 2U;
00177   i0 = 0U;
00178   ia1 = 0U;
00179 
00180   j = n2;
00181 
00182   /*  Calculation of first stage */
00183   do
00184   {
00185     /*  index calculation for the input as, */
00186     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
00187     i1 = i0 + n2;
00188     i2 = i1 + n2;
00189     i3 = i2 + n2;
00190 
00191     /* input is in 1.31(q31) format and provide 4 guard bits for the input */
00192 
00193     /*  Butterfly implementation */
00194     /* xa + xc */
00195     r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U);
00196     /* xa - xc */
00197     r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
00198 
00199     /* xb + xd */
00200     t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
00201 
00202     /* ya + yc */
00203     s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
00204     /* ya - yc */
00205     s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
00206 
00207     /* xa' = xa + xb + xc + xd */
00208     pSrc[2U * i0] = (r1 + t1);
00209     /* (xa + xc) - (xb + xd) */
00210     r1 = r1 - t1;
00211     /* yb + yd */
00212     t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
00213 
00214     /* ya' = ya + yb + yc + yd */
00215     pSrc[(2U * i0) + 1U] = (s1 + t2);
00216 
00217     /* (ya + yc) - (yb + yd) */
00218     s1 = s1 - t2;
00219 
00220     /* yb - yd */
00221     t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
00222     /* xb - xd */
00223     t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
00224 
00225     /*  index calculation for the coefficients */
00226     ia2 = 2U * ia1;
00227     co2 = pCoef[ia2 * 2U];
00228     si2 = pCoef[(ia2 * 2U) + 1U];
00229 
00230     /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
00231     pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
00232                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
00233 
00234     /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
00235     pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
00236                             ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
00237 
00238     /* (xa - xc) + (yb - yd) */
00239     r1 = r2 + t1;
00240     /* (xa - xc) - (yb - yd) */
00241     r2 = r2 - t1;
00242 
00243     /* (ya - yc) - (xb - xd) */
00244     s1 = s2 - t2;
00245     /* (ya - yc) + (xb - xd) */
00246     s2 = s2 + t2;
00247 
00248     co1 = pCoef[ia1 * 2U];
00249     si1 = pCoef[(ia1 * 2U) + 1U];
00250 
00251     /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
00252     pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
00253                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
00254 
00255     /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
00256     pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
00257                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
00258 
00259     /*  index calculation for the coefficients */
00260     ia3 = 3U * ia1;
00261     co3 = pCoef[ia3 * 2U];
00262     si3 = pCoef[(ia3 * 2U) + 1U];
00263 
00264     /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
00265     pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
00266                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
00267 
00268     /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
00269     pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
00270                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
00271 
00272     /*  Twiddle coefficients index modifier */
00273     ia1 = ia1 + twidCoefModifier;
00274 
00275     /*  Updating input index */
00276     i0 = i0 + 1U;
00277 
00278   } while (--j);
00279 
00280   /* end of first stage process */
00281 
00282   /* data is in 5.27(q27) format */
00283 
00284 
00285   /* start of Middle stages process */
00286 
00287 
00288   /* each stage in middle stages provides two down scaling of the input */
00289 
00290   twidCoefModifier <<= 2U;
00291 
00292 
00293   for (k = fftLen / 4U; k > 4U; k >>= 2U)
00294   {
00295     /*  Initializations for the first stage */
00296     n1 = n2;
00297     n2 >>= 2U;
00298     ia1 = 0U;
00299 
00300     /*  Calculation of first stage */
00301     for (j = 0U; j <= (n2 - 1U); j++)
00302     {
00303       /*  index calculation for the coefficients */
00304       ia2 = ia1 + ia1;
00305       ia3 = ia2 + ia1;
00306       co1 = pCoef[ia1 * 2U];
00307       si1 = pCoef[(ia1 * 2U) + 1U];
00308       co2 = pCoef[ia2 * 2U];
00309       si2 = pCoef[(ia2 * 2U) + 1U];
00310       co3 = pCoef[ia3 * 2U];
00311       si3 = pCoef[(ia3 * 2U) + 1U];
00312       /*  Twiddle coefficients index modifier */
00313       ia1 = ia1 + twidCoefModifier;
00314 
00315       for (i0 = j; i0 < fftLen; i0 += n1)
00316       {
00317         /*  index calculation for the input as, */
00318         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
00319         i1 = i0 + n2;
00320         i2 = i1 + n2;
00321         i3 = i2 + n2;
00322 
00323         /*  Butterfly implementation */
00324         /* xa + xc */
00325         r1 = pSrc[2U * i0] + pSrc[2U * i2];
00326         /* xa - xc */
00327         r2 = pSrc[2U * i0] - pSrc[2U * i2];
00328 
00329         /* ya + yc */
00330         s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
00331         /* ya - yc */
00332         s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
00333 
00334         /* xb + xd */
00335         t1 = pSrc[2U * i1] + pSrc[2U * i3];
00336 
00337         /* xa' = xa + xb + xc + xd */
00338         pSrc[2U * i0] = (r1 + t1) >> 2U;
00339         /* xa + xc -(xb + xd) */
00340         r1 = r1 - t1;
00341 
00342         /* yb + yd */
00343         t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
00344         /* ya' = ya + yb + yc + yd */
00345         pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
00346 
00347         /* (ya + yc) - (yb + yd) */
00348         s1 = s1 - t2;
00349 
00350         /* (yb - yd) */
00351         t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
00352         /* (xb - xd) */
00353         t2 = pSrc[2U * i1] - pSrc[2U * i3];
00354 
00355         /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
00356         pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
00357                          ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
00358 
00359         /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
00360         pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
00361                                 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
00362 
00363         /* (xa - xc) + (yb - yd) */
00364         r1 = r2 + t1;
00365         /* (xa - xc) - (yb - yd) */
00366         r2 = r2 - t1;
00367 
00368         /* (ya - yc) -  (xb - xd) */
00369         s1 = s2 - t2;
00370         /* (ya - yc) +  (xb - xd) */
00371         s2 = s2 + t2;
00372 
00373         /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
00374         pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
00375                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
00376 
00377         /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
00378         pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
00379                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
00380 
00381         /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
00382         pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
00383                          ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
00384 
00385         /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
00386         pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
00387                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
00388       }
00389     }
00390     twidCoefModifier <<= 2U;
00391   }
00392 #else
00393   uint32_t n1, n2, ia1, ia2, ia3, i0, j, k;
00394   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
00395 
00396   q31_t xa, xb, xc, xd;
00397   q31_t ya, yb, yc, yd;
00398   q31_t xa_out, xb_out, xc_out, xd_out;
00399   q31_t ya_out, yb_out, yc_out, yd_out;
00400 
00401   q31_t *ptr1;
00402   q31_t *pSi0;
00403   q31_t *pSi1;
00404   q31_t *pSi2;
00405   q31_t *pSi3;
00406   q63_t xaya, xbyb, xcyc, xdyd;
00407   /* Total process is divided into three stages */
00408 
00409   /* process first stage, middle stages, & last stage */
00410 
00411 
00412   /* start of first stage process */
00413 
00414   /*  Initializations for the first stage */
00415   n2 = fftLen;
00416   n1 = n2;
00417   /* n2 = fftLen/4 */
00418   n2 >>= 2U;
00419 
00420   ia1 = 0U;
00421 
00422   j = n2;
00423 
00424   pSi0 = pSrc;
00425   pSi1 = pSi0 + 2 * n2;
00426   pSi2 = pSi1 + 2 * n2;
00427   pSi3 = pSi2 + 2 * n2;
00428 
00429   /*  Calculation of first stage */
00430   do
00431   {
00432     /* input is in 1.31(q31) format and provide 4 guard bits for the input */
00433 
00434     /*  Butterfly implementation */
00435     /* xa + xc */
00436     r1 = (pSi0[0] >> 4U) + (pSi2[0] >> 4U);
00437     /* xa - xc */
00438     r2 = (pSi0[0] >> 4U) - (pSi2[0] >> 4U);
00439 
00440     /* xb + xd */
00441     t1 = (pSi1[0] >> 4U) + (pSi3[0] >> 4U);
00442 
00443     /* ya + yc */
00444     s1 = (pSi0[1] >> 4U) + (pSi2[1] >> 4U);
00445     /* ya - yc */
00446     s2 = (pSi0[1] >> 4U) - (pSi2[1] >> 4U);
00447 
00448     /* xa' = xa + xb + xc + xd */
00449     *pSi0++ = (r1 + t1);
00450     /* (xa + xc) - (xb + xd) */
00451     r1 = r1 - t1;
00452     /* yb + yd */
00453     t2 = (pSi1[1] >> 4U) + (pSi3[1] >> 4U);
00454 
00455     /* ya' = ya + yb + yc + yd */
00456     *pSi0++ = (s1 + t2);
00457 
00458     /* (ya + yc) - (yb + yd) */
00459     s1 = s1 - t2;
00460 
00461     /* yb - yd */
00462     t1 = (pSi1[1] >> 4U) - (pSi3[1] >> 4U);
00463     /* xb - xd */
00464     t2 = (pSi1[0] >> 4U) - (pSi3[0] >> 4U);
00465 
00466     /*  index calculation for the coefficients */
00467     ia2 = 2U * ia1;
00468     co2 = pCoef[ia2 * 2U];
00469     si2 = pCoef[(ia2 * 2U) + 1U];
00470 
00471     /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
00472     *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
00473                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
00474 
00475     /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
00476     *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
00477                             ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
00478 
00479     /* (xa - xc) + (yb - yd) */
00480     r1 = r2 + t1;
00481     /* (xa - xc) - (yb - yd) */
00482     r2 = r2 - t1;
00483 
00484     /* (ya - yc) - (xb - xd) */
00485     s1 = s2 - t2;
00486     /* (ya - yc) + (xb - xd) */
00487     s2 = s2 + t2;
00488 
00489     co1 = pCoef[ia1 * 2U];
00490     si1 = pCoef[(ia1 * 2U) + 1U];
00491 
00492     /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
00493     *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
00494                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
00495 
00496     /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
00497     *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
00498                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
00499 
00500     /*  index calculation for the coefficients */
00501     ia3 = 3U * ia1;
00502     co3 = pCoef[ia3 * 2U];
00503     si3 = pCoef[(ia3 * 2U) + 1U];
00504 
00505     /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
00506     *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
00507                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
00508 
00509     /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
00510     *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
00511                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
00512 
00513     /*  Twiddle coefficients index modifier */
00514     ia1 = ia1 + twidCoefModifier;
00515 
00516   } while (--j);
00517 
00518   /* end of first stage process */
00519 
00520   /* data is in 5.27(q27) format */
00521 
00522 
00523   /* start of Middle stages process */
00524 
00525 
00526   /* each stage in middle stages provides two down scaling of the input */
00527 
00528   twidCoefModifier <<= 2U;
00529 
00530 
00531   for (k = fftLen / 4U; k > 4U; k >>= 2U)
00532   {
00533     /*  Initializations for the first stage */
00534     n1 = n2;
00535     n2 >>= 2U;
00536     ia1 = 0U;
00537 
00538     /*  Calculation of first stage */
00539     for (j = 0U; j <= (n2 - 1U); j++)
00540     {
00541       /*  index calculation for the coefficients */
00542       ia2 = ia1 + ia1;
00543       ia3 = ia2 + ia1;
00544       co1 = pCoef[ia1 * 2U];
00545       si1 = pCoef[(ia1 * 2U) + 1U];
00546       co2 = pCoef[ia2 * 2U];
00547       si2 = pCoef[(ia2 * 2U) + 1U];
00548       co3 = pCoef[ia3 * 2U];
00549       si3 = pCoef[(ia3 * 2U) + 1U];
00550       /*  Twiddle coefficients index modifier */
00551       ia1 = ia1 + twidCoefModifier;
00552 
00553       pSi0 = pSrc + 2 * j;
00554       pSi1 = pSi0 + 2 * n2;
00555       pSi2 = pSi1 + 2 * n2;
00556       pSi3 = pSi2 + 2 * n2;
00557 
00558       for (i0 = j; i0 < fftLen; i0 += n1)
00559       {
00560         /*  Butterfly implementation */
00561         /* xa + xc */
00562         r1 = pSi0[0] + pSi2[0];
00563 
00564         /* xa - xc */
00565         r2 = pSi0[0] - pSi2[0];
00566 
00567 
00568         /* ya + yc */
00569         s1 = pSi0[1] + pSi2[1];
00570 
00571         /* ya - yc */
00572         s2 = pSi0[1] - pSi2[1];
00573 
00574 
00575         /* xb + xd */
00576         t1 = pSi1[0] + pSi3[0];
00577 
00578 
00579         /* xa' = xa + xb + xc + xd */
00580         pSi0[0] = (r1 + t1) >> 2U;
00581         /* xa + xc -(xb + xd) */
00582         r1 = r1 - t1;
00583 
00584         /* yb + yd */
00585         t2 = pSi1[1] + pSi3[1];
00586 
00587         /* ya' = ya + yb + yc + yd */
00588         pSi0[1] = (s1 + t2) >> 2U;
00589         pSi0 += 2 * n1;
00590 
00591         /* (ya + yc) - (yb + yd) */
00592         s1 = s1 - t2;
00593 
00594         /* (yb - yd) */
00595         t1 = pSi1[1] - pSi3[1];
00596 
00597         /* (xb - xd) */
00598         t2 = pSi1[0] - pSi3[0];
00599 
00600 
00601         /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
00602         pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
00603                          ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
00604 
00605         /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
00606         pSi1[1] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
00607                                 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
00608         pSi1 += 2 * n1;
00609 
00610         /* (xa - xc) + (yb - yd) */
00611         r1 = r2 + t1;
00612         /* (xa - xc) - (yb - yd) */
00613         r2 = r2 - t1;
00614 
00615         /* (ya - yc) -  (xb - xd) */
00616         s1 = s2 - t2;
00617         /* (ya - yc) +  (xb - xd) */
00618         s2 = s2 + t2;
00619 
00620         /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
00621         pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
00622                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
00623 
00624         /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
00625         pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
00626                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
00627         pSi2 += 2 * n1;
00628 
00629         /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
00630         pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
00631                          ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
00632 
00633         /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
00634         pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
00635                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
00636         pSi3 += 2 * n1;
00637       }
00638     }
00639     twidCoefModifier <<= 2U;
00640   }
00641 #endif
00642 
00643   /* End of Middle stages process */
00644 
00645   /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
00646   /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
00647   /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
00648   /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
00649 
00650 
00651   /* start of Last stage process */
00652   /*  Initializations for the last stage */
00653   j = fftLen >> 2;
00654   ptr1 = &pSrc[0];
00655 
00656   /*  Calculations of last stage */
00657   do
00658   {
00659 
00660 #ifndef ARM_MATH_BIG_ENDIAN
00661 
00662     /* Read xa (real), ya(imag) input */
00663     xaya = *__SIMD64(ptr1)++;
00664     xa = (q31_t) xaya;
00665     ya = (q31_t) (xaya >> 32);
00666 
00667     /* Read xb (real), yb(imag) input */
00668     xbyb = *__SIMD64(ptr1)++;
00669     xb = (q31_t) xbyb;
00670     yb = (q31_t) (xbyb >> 32);
00671 
00672     /* Read xc (real), yc(imag) input */
00673     xcyc = *__SIMD64(ptr1)++;
00674     xc = (q31_t) xcyc;
00675     yc = (q31_t) (xcyc >> 32);
00676 
00677     /* Read xc (real), yc(imag) input */
00678     xdyd = *__SIMD64(ptr1)++;
00679     xd = (q31_t) xdyd;
00680     yd = (q31_t) (xdyd >> 32);
00681 
00682 #else
00683 
00684     /* Read xa (real), ya(imag) input */
00685     xaya = *__SIMD64(ptr1)++;
00686     ya = (q31_t) xaya;
00687     xa = (q31_t) (xaya >> 32);
00688 
00689     /* Read xb (real), yb(imag) input */
00690     xbyb = *__SIMD64(ptr1)++;
00691     yb = (q31_t) xbyb;
00692     xb = (q31_t) (xbyb >> 32);
00693 
00694     /* Read xc (real), yc(imag) input */
00695     xcyc = *__SIMD64(ptr1)++;
00696     yc = (q31_t) xcyc;
00697     xc = (q31_t) (xcyc >> 32);
00698 
00699     /* Read xc (real), yc(imag) input */
00700     xdyd = *__SIMD64(ptr1)++;
00701     yd = (q31_t) xdyd;
00702     xd = (q31_t) (xdyd >> 32);
00703 
00704 
00705 #endif
00706 
00707     /* xa' = xa + xb + xc + xd */
00708     xa_out = xa + xb + xc + xd;
00709 
00710     /* ya' = ya + yb + yc + yd */
00711     ya_out = ya + yb + yc + yd;
00712 
00713     /* pointer updation for writing */
00714     ptr1 = ptr1 - 8U;
00715 
00716     /* writing xa' and ya' */
00717     *ptr1++ = xa_out;
00718     *ptr1++ = ya_out;
00719 
00720     xc_out = (xa - xb + xc - xd);
00721     yc_out = (ya - yb + yc - yd);
00722 
00723     /* writing xc' and yc' */
00724     *ptr1++ = xc_out;
00725     *ptr1++ = yc_out;
00726 
00727     xb_out = (xa + yb - xc - yd);
00728     yb_out = (ya - xb - yc + xd);
00729 
00730     /* writing xb' and yb' */
00731     *ptr1++ = xb_out;
00732     *ptr1++ = yb_out;
00733 
00734     xd_out = (xa - yb - xc + yd);
00735     yd_out = (ya + xb - yc - xd);
00736 
00737     /* writing xd' and yd' */
00738     *ptr1++ = xd_out;
00739     *ptr1++ = yd_out;
00740 
00741 
00742   } while (--j);
00743 
00744   /* output is in 11.21(q21) format for the 1024 point */
00745   /* output is in 9.23(q23) format for the 256 point */
00746   /* output is in 7.25(q25) format for the 64 point */
00747   /* output is in 5.27(q27) format for the 16 point */
00748 
00749   /* End of last stage process */
00750 
00751 }
00752 
00753 
00754 /**
00755  * @brief  Core function for the Q31 CIFFT butterfly process.
00756  * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
00757  * @param[in]      fftLen           length of the FFT.
00758  * @param[in]      *pCoef           points to twiddle coefficient buffer.
00759  * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
00760  * @return none.
00761  */
00762 
00763 
00764 /*
00765 * Radix-4 IFFT algorithm used is :
00766 *
00767 * CIFFT uses same twiddle coefficients as CFFT Function
00768 *  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
00769 *
00770 *
00771 * IFFT is implemented with following changes in equations from FFT
00772 *
00773 * Input real and imaginary data:
00774 * x(n) = xa + j * ya
00775 * x(n+N/4 ) = xb + j * yb
00776 * x(n+N/2 ) = xc + j * yc
00777 * x(n+3N 4) = xd + j * yd
00778 *
00779 *
00780 * Output real and imaginary data:
00781 * x(4r) = xa'+ j * ya'
00782 * x(4r+1) = xb'+ j * yb'
00783 * x(4r+2) = xc'+ j * yc'
00784 * x(4r+3) = xd'+ j * yd'
00785 *
00786 *
00787 * Twiddle factors for radix-4 IFFT:
00788 * Wn = co1 + j * (si1)
00789 * W2n = co2 + j * (si2)
00790 * W3n = co3 + j * (si3)
00791 
00792 * The real and imaginary output values for the radix-4 butterfly are
00793 * xa' = xa + xb + xc + xd
00794 * ya' = ya + yb + yc + yd
00795 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
00796 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
00797 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
00798 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
00799 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
00800 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
00801 *
00802 */
00803 
00804 void arm_radix4_butterfly_inverse_q31(
00805   q31_t * pSrc,
00806   uint32_t fftLen,
00807   q31_t * pCoef,
00808   uint32_t twidCoefModifier)
00809 {
00810 #if defined(ARM_MATH_CM7)
00811   uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
00812   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
00813   q31_t xa, xb, xc, xd;
00814   q31_t ya, yb, yc, yd;
00815   q31_t xa_out, xb_out, xc_out, xd_out;
00816   q31_t ya_out, yb_out, yc_out, yd_out;
00817 
00818   q31_t *ptr1;
00819   q63_t xaya, xbyb, xcyc, xdyd;
00820 
00821   /* input is be 1.31(q31) format for all FFT sizes */
00822   /* Total process is divided into three stages */
00823   /* process first stage, middle stages, & last stage */
00824 
00825   /* Start of first stage process */
00826 
00827   /* Initializations for the first stage */
00828   n2 = fftLen;
00829   n1 = n2;
00830   /* n2 = fftLen/4 */
00831   n2 >>= 2U;
00832   i0 = 0U;
00833   ia1 = 0U;
00834 
00835   j = n2;
00836 
00837   do
00838   {
00839 
00840     /* input is in 1.31(q31) format and provide 4 guard bits for the input */
00841 
00842     /*  index calculation for the input as, */
00843     /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
00844     i1 = i0 + n2;
00845     i2 = i1 + n2;
00846     i3 = i2 + n2;
00847 
00848     /*  Butterfly implementation */
00849     /* xa + xc */
00850     r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U);
00851     /* xa - xc */
00852     r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
00853 
00854     /* xb + xd */
00855     t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
00856 
00857     /* ya + yc */
00858     s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
00859     /* ya - yc */
00860     s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
00861 
00862     /* xa' = xa + xb + xc + xd */
00863     pSrc[2U * i0] = (r1 + t1);
00864     /* (xa + xc) - (xb + xd) */
00865     r1 = r1 - t1;
00866     /* yb + yd */
00867     t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
00868     /* ya' = ya + yb + yc + yd */
00869     pSrc[(2U * i0) + 1U] = (s1 + t2);
00870 
00871     /* (ya + yc) - (yb + yd) */
00872     s1 = s1 - t2;
00873 
00874     /* yb - yd */
00875     t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
00876     /* xb - xd */
00877     t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
00878 
00879     /*  index calculation for the coefficients */
00880     ia2 = 2U * ia1;
00881     co2 = pCoef[ia2 * 2U];
00882     si2 = pCoef[(ia2 * 2U) + 1U];
00883 
00884     /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
00885     pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
00886                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
00887 
00888     /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
00889     pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
00890                           ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
00891 
00892     /* (xa - xc) - (yb - yd) */
00893     r1 = r2 - t1;
00894     /* (xa - xc) + (yb - yd) */
00895     r2 = r2 + t1;
00896 
00897     /* (ya - yc) + (xb - xd) */
00898     s1 = s2 + t2;
00899     /* (ya - yc) - (xb - xd) */
00900     s2 = s2 - t2;
00901 
00902     co1 = pCoef[ia1 * 2U];
00903     si1 = pCoef[(ia1 * 2U) + 1U];
00904 
00905     /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
00906     pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
00907                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
00908 
00909     /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
00910     pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
00911                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
00912 
00913     /*  index calculation for the coefficients */
00914     ia3 = 3U * ia1;
00915     co3 = pCoef[ia3 * 2U];
00916     si3 = pCoef[(ia3 * 2U) + 1U];
00917 
00918     /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
00919     pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
00920                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
00921 
00922     /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
00923     pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
00924                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
00925 
00926     /*  Twiddle coefficients index modifier */
00927     ia1 = ia1 + twidCoefModifier;
00928 
00929     /*  Updating input index */
00930     i0 = i0 + 1U;
00931 
00932   } while (--j);
00933 
00934   /* data is in 5.27(q27) format */
00935   /* each stage provides two down scaling of the input */
00936 
00937 
00938   /* Start of Middle stages process */
00939 
00940   twidCoefModifier <<= 2U;
00941 
00942   /*  Calculation of second stage to excluding last stage */
00943   for (k = fftLen / 4U; k > 4U; k >>= 2U)
00944   {
00945     /*  Initializations for the first stage */
00946     n1 = n2;
00947     n2 >>= 2U;
00948     ia1 = 0U;
00949 
00950     for (j = 0; j <= (n2 - 1U); j++)
00951     {
00952       /*  index calculation for the coefficients */
00953       ia2 = ia1 + ia1;
00954       ia3 = ia2 + ia1;
00955       co1 = pCoef[ia1 * 2U];
00956       si1 = pCoef[(ia1 * 2U) + 1U];
00957       co2 = pCoef[ia2 * 2U];
00958       si2 = pCoef[(ia2 * 2U) + 1U];
00959       co3 = pCoef[ia3 * 2U];
00960       si3 = pCoef[(ia3 * 2U) + 1U];
00961       /*  Twiddle coefficients index modifier */
00962       ia1 = ia1 + twidCoefModifier;
00963 
00964       for (i0 = j; i0 < fftLen; i0 += n1)
00965       {
00966         /*  index calculation for the input as, */
00967         /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
00968         i1 = i0 + n2;
00969         i2 = i1 + n2;
00970         i3 = i2 + n2;
00971 
00972         /*  Butterfly implementation */
00973         /* xa + xc */
00974         r1 = pSrc[2U * i0] + pSrc[2U * i2];
00975         /* xa - xc */
00976         r2 = pSrc[2U * i0] - pSrc[2U * i2];
00977 
00978         /* ya + yc */
00979         s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
00980         /* ya - yc */
00981         s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
00982 
00983         /* xb + xd */
00984         t1 = pSrc[2U * i1] + pSrc[2U * i3];
00985 
00986         /* xa' = xa + xb + xc + xd */
00987         pSrc[2U * i0] = (r1 + t1) >> 2U;
00988         /* xa + xc -(xb + xd) */
00989         r1 = r1 - t1;
00990         /* yb + yd */
00991         t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
00992         /* ya' = ya + yb + yc + yd */
00993         pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
00994 
00995         /* (ya + yc) - (yb + yd) */
00996         s1 = s1 - t2;
00997 
00998         /* (yb - yd) */
00999         t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
01000         /* (xb - xd) */
01001         t2 = pSrc[2U * i1] - pSrc[2U * i3];
01002 
01003         /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
01004         pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
01005                          ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
01006 
01007         /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
01008         pSrc[(2U * i1) + 1U] =
01009           (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
01010            ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
01011 
01012         /* (xa - xc) - (yb - yd) */
01013         r1 = r2 - t1;
01014         /* (xa - xc) + (yb - yd) */
01015         r2 = r2 + t1;
01016 
01017         /* (ya - yc) +  (xb - xd) */
01018         s1 = s2 + t2;
01019         /* (ya - yc) -  (xb - xd) */
01020         s2 = s2 - t2;
01021 
01022         /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
01023         pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
01024                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
01025 
01026         /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
01027         pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
01028                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
01029 
01030         /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
01031         pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
01032                            ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
01033 
01034         /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
01035         pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
01036                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
01037       }
01038     }
01039     twidCoefModifier <<= 2U;
01040   }
01041 #else
01042   uint32_t n1, n2, ia1, ia2, ia3, i0, j, k;
01043   q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
01044   q31_t xa, xb, xc, xd;
01045   q31_t ya, yb, yc, yd;
01046   q31_t xa_out, xb_out, xc_out, xd_out;
01047   q31_t ya_out, yb_out, yc_out, yd_out;
01048 
01049   q31_t *ptr1;
01050   q31_t *pSi0;
01051   q31_t *pSi1;
01052   q31_t *pSi2;
01053   q31_t *pSi3;
01054   q63_t xaya, xbyb, xcyc, xdyd;
01055 
01056   /* input is be 1.31(q31) format for all FFT sizes */
01057   /* Total process is divided into three stages */
01058   /* process first stage, middle stages, & last stage */
01059 
01060   /* Start of first stage process */
01061 
01062   /* Initializations for the first stage */
01063   n2 = fftLen;
01064   n1 = n2;
01065   /* n2 = fftLen/4 */
01066   n2 >>= 2U;
01067 
01068   ia1 = 0U;
01069 
01070   j = n2;
01071 
01072   pSi0 = pSrc;
01073   pSi1 = pSi0 + 2 * n2;
01074   pSi2 = pSi1 + 2 * n2;
01075   pSi3 = pSi2 + 2 * n2;
01076 
01077   do
01078   {
01079     /*  Butterfly implementation */
01080     /* xa + xc */
01081     r1 = (pSi0[0] >> 4U) + (pSi2[0] >> 4U);
01082     /* xa - xc */
01083     r2 = (pSi0[0] >> 4U) - (pSi2[0] >> 4U);
01084 
01085     /* xb + xd */
01086     t1 = (pSi1[0] >> 4U) + (pSi3[0] >> 4U);
01087 
01088     /* ya + yc */
01089     s1 = (pSi0[1] >> 4U) + (pSi2[1] >> 4U);
01090     /* ya - yc */
01091     s2 = (pSi0[1] >> 4U) - (pSi2[1] >> 4U);
01092 
01093     /* xa' = xa + xb + xc + xd */
01094     *pSi0++ = (r1 + t1);
01095     /* (xa + xc) - (xb + xd) */
01096     r1 = r1 - t1;
01097     /* yb + yd */
01098     t2 = (pSi1[1] >> 4U) + (pSi3[1] >> 4U);
01099     /* ya' = ya + yb + yc + yd */
01100     *pSi0++ = (s1 + t2);
01101 
01102     /* (ya + yc) - (yb + yd) */
01103     s1 = s1 - t2;
01104 
01105     /* yb - yd */
01106     t1 = (pSi1[1] >> 4U) - (pSi3[1] >> 4U);
01107     /* xb - xd */
01108     t2 = (pSi1[0] >> 4U) - (pSi3[0] >> 4U);
01109 
01110     /*  index calculation for the coefficients */
01111     ia2 = 2U * ia1;
01112     co2 = pCoef[ia2 * 2U];
01113     si2 = pCoef[(ia2 * 2U) + 1U];
01114 
01115     /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
01116     *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
01117                      ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
01118 
01119     /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
01120     *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
01121                           ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
01122 
01123     /* (xa - xc) - (yb - yd) */
01124     r1 = r2 - t1;
01125     /* (xa - xc) + (yb - yd) */
01126     r2 = r2 + t1;
01127 
01128     /* (ya - yc) + (xb - xd) */
01129     s1 = s2 + t2;
01130     /* (ya - yc) - (xb - xd) */
01131     s2 = s2 - t2;
01132 
01133     co1 = pCoef[ia1 * 2U];
01134     si1 = pCoef[(ia1 * 2U) + 1U];
01135 
01136     /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
01137     *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
01138                      ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
01139 
01140     /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
01141     *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
01142                             ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
01143 
01144     /*  index calculation for the coefficients */
01145     ia3 = 3U * ia1;
01146     co3 = pCoef[ia3 * 2U];
01147     si3 = pCoef[(ia3 * 2U) + 1U];
01148 
01149     /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
01150     *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
01151                      ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
01152 
01153     /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
01154     *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
01155                             ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
01156 
01157     /*  Twiddle coefficients index modifier */
01158     ia1 = ia1 + twidCoefModifier;
01159 
01160   } while (--j);
01161 
01162   /* data is in 5.27(q27) format */
01163   /* each stage provides two down scaling of the input */
01164 
01165 
01166   /* Start of Middle stages process */
01167 
01168   twidCoefModifier <<= 2U;
01169 
01170   /*  Calculation of second stage to excluding last stage */
01171   for (k = fftLen / 4U; k > 4U; k >>= 2U)
01172   {
01173     /*  Initializations for the first stage */
01174     n1 = n2;
01175     n2 >>= 2U;
01176     ia1 = 0U;
01177 
01178     for (j = 0; j <= (n2 - 1U); j++)
01179     {
01180       /*  index calculation for the coefficients */
01181       ia2 = ia1 + ia1;
01182       ia3 = ia2 + ia1;
01183       co1 = pCoef[ia1 * 2U];
01184       si1 = pCoef[(ia1 * 2U) + 1U];
01185       co2 = pCoef[ia2 * 2U];
01186       si2 = pCoef[(ia2 * 2U) + 1U];
01187       co3 = pCoef[ia3 * 2U];
01188       si3 = pCoef[(ia3 * 2U) + 1U];
01189       /*  Twiddle coefficients index modifier */
01190       ia1 = ia1 + twidCoefModifier;
01191 
01192       pSi0 = pSrc + 2 * j;
01193       pSi1 = pSi0 + 2 * n2;
01194       pSi2 = pSi1 + 2 * n2;
01195       pSi3 = pSi2 + 2 * n2;
01196 
01197       for (i0 = j; i0 < fftLen; i0 += n1)
01198       {
01199         /*  Butterfly implementation */
01200         /* xa + xc */
01201         r1 = pSi0[0] + pSi2[0];
01202 
01203         /* xa - xc */
01204         r2 = pSi0[0] - pSi2[0];
01205 
01206 
01207         /* ya + yc */
01208         s1 = pSi0[1] + pSi2[1];
01209 
01210         /* ya - yc */
01211         s2 = pSi0[1] - pSi2[1];
01212 
01213 
01214         /* xb + xd */
01215         t1 = pSi1[0] + pSi3[0];
01216 
01217 
01218         /* xa' = xa + xb + xc + xd */
01219         pSi0[0] = (r1 + t1) >> 2U;
01220         /* xa + xc -(xb + xd) */
01221         r1 = r1 - t1;
01222         /* yb + yd */
01223         t2 = pSi1[1] + pSi3[1];
01224 
01225         /* ya' = ya + yb + yc + yd */
01226         pSi0[1] = (s1 + t2) >> 2U;
01227         pSi0 += 2 * n1;
01228 
01229         /* (ya + yc) - (yb + yd) */
01230         s1 = s1 - t2;
01231 
01232         /* (yb - yd) */
01233         t1 = pSi1[1] - pSi3[1];
01234 
01235         /* (xb - xd) */
01236         t2 = pSi1[0] - pSi3[0];
01237 
01238 
01239         /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
01240         pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
01241                          ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
01242 
01243         /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
01244         pSi1[1] =
01245 
01246           (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
01247            ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
01248         pSi1 += 2 * n1;
01249 
01250         /* (xa - xc) - (yb - yd) */
01251         r1 = r2 - t1;
01252         /* (xa - xc) + (yb - yd) */
01253         r2 = r2 + t1;
01254 
01255         /* (ya - yc) +  (xb - xd) */
01256         s1 = s2 + t2;
01257         /* (ya - yc) -  (xb - xd) */
01258         s2 = s2 - t2;
01259 
01260         /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
01261         pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
01262                          ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
01263 
01264         /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
01265         pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
01266                                 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
01267         pSi2 += 2 * n1;
01268 
01269         /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
01270         pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
01271                            ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
01272 
01273         /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
01274         pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
01275                                 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
01276         pSi3 += 2 * n1;
01277       }
01278     }
01279     twidCoefModifier <<= 2U;
01280   }
01281 #endif
01282 
01283   /* End of Middle stages process */
01284 
01285   /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
01286   /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
01287   /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
01288   /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
01289 
01290 
01291   /* Start of last stage process */
01292 
01293 
01294   /*  Initializations for the last stage */
01295   j = fftLen >> 2;
01296   ptr1 = &pSrc[0];
01297 
01298   /*  Calculations of last stage */
01299   do
01300   {
01301 #ifndef ARM_MATH_BIG_ENDIAN
01302     /* Read xa (real), ya(imag) input */
01303     xaya = *__SIMD64(ptr1)++;
01304     xa = (q31_t) xaya;
01305     ya = (q31_t) (xaya >> 32);
01306 
01307     /* Read xb (real), yb(imag) input */
01308     xbyb = *__SIMD64(ptr1)++;
01309     xb = (q31_t) xbyb;
01310     yb = (q31_t) (xbyb >> 32);
01311 
01312     /* Read xc (real), yc(imag) input */
01313     xcyc = *__SIMD64(ptr1)++;
01314     xc = (q31_t) xcyc;
01315     yc = (q31_t) (xcyc >> 32);
01316 
01317     /* Read xc (real), yc(imag) input */
01318     xdyd = *__SIMD64(ptr1)++;
01319     xd = (q31_t) xdyd;
01320     yd = (q31_t) (xdyd >> 32);
01321 
01322 #else
01323 
01324     /* Read xa (real), ya(imag) input */
01325     xaya = *__SIMD64(ptr1)++;
01326     ya = (q31_t) xaya;
01327     xa = (q31_t) (xaya >> 32);
01328 
01329     /* Read xb (real), yb(imag) input */
01330     xbyb = *__SIMD64(ptr1)++;
01331     yb = (q31_t) xbyb;
01332     xb = (q31_t) (xbyb >> 32);
01333 
01334     /* Read xc (real), yc(imag) input */
01335     xcyc = *__SIMD64(ptr1)++;
01336     yc = (q31_t) xcyc;
01337     xc = (q31_t) (xcyc >> 32);
01338 
01339     /* Read xc (real), yc(imag) input */
01340     xdyd = *__SIMD64(ptr1)++;
01341     yd = (q31_t) xdyd;
01342     xd = (q31_t) (xdyd >> 32);
01343 
01344 
01345 #endif
01346 
01347     /* xa' = xa + xb + xc + xd */
01348     xa_out = xa + xb + xc + xd;
01349 
01350     /* ya' = ya + yb + yc + yd */
01351     ya_out = ya + yb + yc + yd;
01352 
01353     /* pointer updation for writing */
01354     ptr1 = ptr1 - 8U;
01355 
01356     /* writing xa' and ya' */
01357     *ptr1++ = xa_out;
01358     *ptr1++ = ya_out;
01359 
01360     xc_out = (xa - xb + xc - xd);
01361     yc_out = (ya - yb + yc - yd);
01362 
01363     /* writing xc' and yc' */
01364     *ptr1++ = xc_out;
01365     *ptr1++ = yc_out;
01366 
01367     xb_out = (xa - yb - xc + yd);
01368     yb_out = (ya + xb - yc - xd);
01369 
01370     /* writing xb' and yb' */
01371     *ptr1++ = xb_out;
01372     *ptr1++ = yb_out;
01373 
01374     xd_out = (xa + yb - xc - yd);
01375     yd_out = (ya - xb - yc + xd);
01376 
01377     /* writing xd' and yd' */
01378     *ptr1++ = xd_out;
01379     *ptr1++ = yd_out;
01380 
01381   } while (--j);
01382 
01383   /* output is in 11.21(q21) format for the 1024 point */
01384   /* output is in 9.23(q23) format for the 256 point */
01385   /* output is in 7.25(q25) format for the 64 point */
01386   /* output is in 5.27(q27) format for the 16 point */
01387 
01388   /* End of last stage process */
01389 }
01390