CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_cfft_radix4_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q31.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 void arm_radix4_butterfly_inverse_q31( 00045 q31_t * pSrc, 00046 uint32_t fftLen, 00047 q31_t * pCoef, 00048 uint32_t twidCoefModifier); 00049 00050 void arm_radix4_butterfly_q31( 00051 q31_t * pSrc, 00052 uint32_t fftLen, 00053 q31_t * pCoef, 00054 uint32_t twidCoefModifier); 00055 00056 void arm_bitreversal_q31( 00057 q31_t * pSrc, 00058 uint32_t fftLen, 00059 uint16_t bitRevFactor, 00060 uint16_t * pBitRevTab); 00061 00062 /** 00063 * @ingroup groupTransforms 00064 */ 00065 00066 /** 00067 * @addtogroup ComplexFFT 00068 * @{ 00069 */ 00070 00071 /** 00072 * @details 00073 * @brief Processing function for the Q31 CFFT/CIFFT. 00074 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed 00075 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure. 00076 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 00077 * @return none. 00078 * 00079 * \par Input and output formats: 00080 * \par 00081 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00082 * Hence the output format is different for different FFT sizes. 00083 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00084 * \par 00085 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT" 00086 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT" 00087 * 00088 */ 00089 00090 void arm_cfft_radix4_q31( 00091 const arm_cfft_radix4_instance_q31 * S, 00092 q31_t * pSrc) 00093 { 00094 if(S->ifftFlag == 1u) 00095 { 00096 /* Complex IFFT radix-4 */ 00097 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, 00098 S->twidCoefModifier); 00099 } 00100 else 00101 { 00102 /* Complex FFT radix-4 */ 00103 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, 00104 S->twidCoefModifier); 00105 } 00106 00107 00108 if(S->bitReverseFlag == 1u) 00109 { 00110 /* Bit Reversal */ 00111 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00112 } 00113 00114 } 00115 00116 /** 00117 * @} end of ComplexFFT group 00118 */ 00119 00120 /* 00121 * Radix-4 FFT algorithm used is : 00122 * 00123 * Input real and imaginary data: 00124 * x(n) = xa + j * ya 00125 * x(n+N/4 ) = xb + j * yb 00126 * x(n+N/2 ) = xc + j * yc 00127 * x(n+3N 4) = xd + j * yd 00128 * 00129 * 00130 * Output real and imaginary data: 00131 * x(4r) = xa'+ j * ya' 00132 * x(4r+1) = xb'+ j * yb' 00133 * x(4r+2) = xc'+ j * yc' 00134 * x(4r+3) = xd'+ j * yd' 00135 * 00136 * 00137 * Twiddle factors for radix-4 FFT: 00138 * Wn = co1 + j * (- si1) 00139 * W2n = co2 + j * (- si2) 00140 * W3n = co3 + j * (- si3) 00141 * 00142 * Butterfly implementation: 00143 * xa' = xa + xb + xc + xd 00144 * ya' = ya + yb + yc + yd 00145 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00146 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00147 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00148 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00149 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00150 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00151 * 00152 */ 00153 00154 /** 00155 * @brief Core function for the Q31 CFFT butterfly process. 00156 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00157 * @param[in] fftLen length of the FFT. 00158 * @param[in] *pCoef points to twiddle coefficient buffer. 00159 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00160 * @return none. 00161 */ 00162 00163 void arm_radix4_butterfly_q31( 00164 q31_t * pSrc, 00165 uint32_t fftLen, 00166 q31_t * pCoef, 00167 uint32_t twidCoefModifier) 00168 { 00169 #if defined(ARM_MATH_CM7) 00170 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00171 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00172 00173 q31_t xa, xb, xc, xd; 00174 q31_t ya, yb, yc, yd; 00175 q31_t xa_out, xb_out, xc_out, xd_out; 00176 q31_t ya_out, yb_out, yc_out, yd_out; 00177 00178 q31_t *ptr1; 00179 q63_t xaya, xbyb, xcyc, xdyd; 00180 /* Total process is divided into three stages */ 00181 00182 /* process first stage, middle stages, & last stage */ 00183 00184 00185 /* start of first stage process */ 00186 00187 /* Initializations for the first stage */ 00188 n2 = fftLen; 00189 n1 = n2; 00190 /* n2 = fftLen/4 */ 00191 n2 >>= 2u; 00192 i0 = 0u; 00193 ia1 = 0u; 00194 00195 j = n2; 00196 00197 /* Calculation of first stage */ 00198 do 00199 { 00200 /* index calculation for the input as, */ 00201 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00202 i1 = i0 + n2; 00203 i2 = i1 + n2; 00204 i3 = i2 + n2; 00205 00206 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00207 00208 /* Butterfly implementation */ 00209 /* xa + xc */ 00210 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u); 00211 /* xa - xc */ 00212 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00213 00214 /* xb + xd */ 00215 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00216 00217 /* ya + yc */ 00218 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00219 /* ya - yc */ 00220 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00221 00222 /* xa' = xa + xb + xc + xd */ 00223 pSrc[2u * i0] = (r1 + t1); 00224 /* (xa + xc) - (xb + xd) */ 00225 r1 = r1 - t1; 00226 /* yb + yd */ 00227 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00228 00229 /* ya' = ya + yb + yc + yd */ 00230 pSrc[(2u * i0) + 1u] = (s1 + t2); 00231 00232 /* (ya + yc) - (yb + yd) */ 00233 s1 = s1 - t2; 00234 00235 /* yb - yd */ 00236 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00237 /* xb - xd */ 00238 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00239 00240 /* index calculation for the coefficients */ 00241 ia2 = 2u * ia1; 00242 co2 = pCoef[ia2 * 2u]; 00243 si2 = pCoef[(ia2 * 2u) + 1u]; 00244 00245 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00246 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00247 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00248 00249 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00250 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00251 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00252 00253 /* (xa - xc) + (yb - yd) */ 00254 r1 = r2 + t1; 00255 /* (xa - xc) - (yb - yd) */ 00256 r2 = r2 - t1; 00257 00258 /* (ya - yc) - (xb - xd) */ 00259 s1 = s2 - t2; 00260 /* (ya - yc) + (xb - xd) */ 00261 s2 = s2 + t2; 00262 00263 co1 = pCoef[ia1 * 2u]; 00264 si1 = pCoef[(ia1 * 2u) + 1u]; 00265 00266 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00267 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00268 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00269 00270 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00271 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00272 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00273 00274 /* index calculation for the coefficients */ 00275 ia3 = 3u * ia1; 00276 co3 = pCoef[ia3 * 2u]; 00277 si3 = pCoef[(ia3 * 2u) + 1u]; 00278 00279 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00280 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00281 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00282 00283 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00284 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00285 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00286 00287 /* Twiddle coefficients index modifier */ 00288 ia1 = ia1 + twidCoefModifier; 00289 00290 /* Updating input index */ 00291 i0 = i0 + 1u; 00292 00293 } while(--j); 00294 00295 /* end of first stage process */ 00296 00297 /* data is in 5.27(q27) format */ 00298 00299 00300 /* start of Middle stages process */ 00301 00302 00303 /* each stage in middle stages provides two down scaling of the input */ 00304 00305 twidCoefModifier <<= 2u; 00306 00307 00308 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00309 { 00310 /* Initializations for the first stage */ 00311 n1 = n2; 00312 n2 >>= 2u; 00313 ia1 = 0u; 00314 00315 /* Calculation of first stage */ 00316 for (j = 0u; j <= (n2 - 1u); j++) 00317 { 00318 /* index calculation for the coefficients */ 00319 ia2 = ia1 + ia1; 00320 ia3 = ia2 + ia1; 00321 co1 = pCoef[ia1 * 2u]; 00322 si1 = pCoef[(ia1 * 2u) + 1u]; 00323 co2 = pCoef[ia2 * 2u]; 00324 si2 = pCoef[(ia2 * 2u) + 1u]; 00325 co3 = pCoef[ia3 * 2u]; 00326 si3 = pCoef[(ia3 * 2u) + 1u]; 00327 /* Twiddle coefficients index modifier */ 00328 ia1 = ia1 + twidCoefModifier; 00329 00330 for (i0 = j; i0 < fftLen; i0 += n1) 00331 { 00332 /* index calculation for the input as, */ 00333 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00334 i1 = i0 + n2; 00335 i2 = i1 + n2; 00336 i3 = i2 + n2; 00337 00338 /* Butterfly implementation */ 00339 /* xa + xc */ 00340 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00341 /* xa - xc */ 00342 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00343 00344 /* ya + yc */ 00345 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00346 /* ya - yc */ 00347 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00348 00349 /* xb + xd */ 00350 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00351 00352 /* xa' = xa + xb + xc + xd */ 00353 pSrc[2u * i0] = (r1 + t1) >> 2u; 00354 /* xa + xc -(xb + xd) */ 00355 r1 = r1 - t1; 00356 00357 /* yb + yd */ 00358 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00359 /* ya' = ya + yb + yc + yd */ 00360 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00361 00362 /* (ya + yc) - (yb + yd) */ 00363 s1 = s1 - t2; 00364 00365 /* (yb - yd) */ 00366 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00367 /* (xb - xd) */ 00368 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00369 00370 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00371 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00372 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 00373 00374 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00375 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00376 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 00377 00378 /* (xa - xc) + (yb - yd) */ 00379 r1 = r2 + t1; 00380 /* (xa - xc) - (yb - yd) */ 00381 r2 = r2 - t1; 00382 00383 /* (ya - yc) - (xb - xd) */ 00384 s1 = s2 - t2; 00385 /* (ya - yc) + (xb - xd) */ 00386 s2 = s2 + t2; 00387 00388 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00389 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00390 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00391 00392 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00393 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00394 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00395 00396 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00397 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00398 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00399 00400 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00401 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00402 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00403 } 00404 } 00405 twidCoefModifier <<= 2u; 00406 } 00407 #else 00408 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k; 00409 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00410 00411 q31_t xa, xb, xc, xd; 00412 q31_t ya, yb, yc, yd; 00413 q31_t xa_out, xb_out, xc_out, xd_out; 00414 q31_t ya_out, yb_out, yc_out, yd_out; 00415 00416 q31_t *ptr1; 00417 q31_t *pSi0; 00418 q31_t *pSi1; 00419 q31_t *pSi2; 00420 q31_t *pSi3; 00421 q63_t xaya, xbyb, xcyc, xdyd; 00422 /* Total process is divided into three stages */ 00423 00424 /* process first stage, middle stages, & last stage */ 00425 00426 00427 /* start of first stage process */ 00428 00429 /* Initializations for the first stage */ 00430 n2 = fftLen; 00431 n1 = n2; 00432 /* n2 = fftLen/4 */ 00433 n2 >>= 2u; 00434 00435 ia1 = 0u; 00436 00437 j = n2; 00438 00439 pSi0 = pSrc; 00440 pSi1 = pSi0 + 2 * n2; 00441 pSi2 = pSi1 + 2 * n2; 00442 pSi3 = pSi2 + 2 * n2; 00443 00444 /* Calculation of first stage */ 00445 do 00446 { 00447 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00448 00449 /* Butterfly implementation */ 00450 /* xa + xc */ 00451 r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u); 00452 /* xa - xc */ 00453 r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u); 00454 00455 /* xb + xd */ 00456 t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u); 00457 00458 /* ya + yc */ 00459 s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u); 00460 /* ya - yc */ 00461 s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u); 00462 00463 /* xa' = xa + xb + xc + xd */ 00464 *pSi0++ = (r1 + t1); 00465 /* (xa + xc) - (xb + xd) */ 00466 r1 = r1 - t1; 00467 /* yb + yd */ 00468 t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u); 00469 00470 /* ya' = ya + yb + yc + yd */ 00471 *pSi0++ = (s1 + t2); 00472 00473 /* (ya + yc) - (yb + yd) */ 00474 s1 = s1 - t2; 00475 00476 /* yb - yd */ 00477 t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u); 00478 /* xb - xd */ 00479 t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u); 00480 00481 /* index calculation for the coefficients */ 00482 ia2 = 2u * ia1; 00483 co2 = pCoef[ia2 * 2u]; 00484 si2 = pCoef[(ia2 * 2u) + 1u]; 00485 00486 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00487 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00488 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00489 00490 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00491 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00492 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00493 00494 /* (xa - xc) + (yb - yd) */ 00495 r1 = r2 + t1; 00496 /* (xa - xc) - (yb - yd) */ 00497 r2 = r2 - t1; 00498 00499 /* (ya - yc) - (xb - xd) */ 00500 s1 = s2 - t2; 00501 /* (ya - yc) + (xb - xd) */ 00502 s2 = s2 + t2; 00503 00504 co1 = pCoef[ia1 * 2u]; 00505 si1 = pCoef[(ia1 * 2u) + 1u]; 00506 00507 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00508 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00509 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00510 00511 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00512 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00513 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00514 00515 /* index calculation for the coefficients */ 00516 ia3 = 3u * ia1; 00517 co3 = pCoef[ia3 * 2u]; 00518 si3 = pCoef[(ia3 * 2u) + 1u]; 00519 00520 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00521 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00522 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00523 00524 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00525 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00526 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00527 00528 /* Twiddle coefficients index modifier */ 00529 ia1 = ia1 + twidCoefModifier; 00530 00531 } while(--j); 00532 00533 /* end of first stage process */ 00534 00535 /* data is in 5.27(q27) format */ 00536 00537 00538 /* start of Middle stages process */ 00539 00540 00541 /* each stage in middle stages provides two down scaling of the input */ 00542 00543 twidCoefModifier <<= 2u; 00544 00545 00546 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00547 { 00548 /* Initializations for the first stage */ 00549 n1 = n2; 00550 n2 >>= 2u; 00551 ia1 = 0u; 00552 00553 /* Calculation of first stage */ 00554 for (j = 0u; j <= (n2 - 1u); j++) 00555 { 00556 /* index calculation for the coefficients */ 00557 ia2 = ia1 + ia1; 00558 ia3 = ia2 + ia1; 00559 co1 = pCoef[ia1 * 2u]; 00560 si1 = pCoef[(ia1 * 2u) + 1u]; 00561 co2 = pCoef[ia2 * 2u]; 00562 si2 = pCoef[(ia2 * 2u) + 1u]; 00563 co3 = pCoef[ia3 * 2u]; 00564 si3 = pCoef[(ia3 * 2u) + 1u]; 00565 /* Twiddle coefficients index modifier */ 00566 ia1 = ia1 + twidCoefModifier; 00567 00568 pSi0 = pSrc + 2 * j; 00569 pSi1 = pSi0 + 2 * n2; 00570 pSi2 = pSi1 + 2 * n2; 00571 pSi3 = pSi2 + 2 * n2; 00572 00573 for (i0 = j; i0 < fftLen; i0 += n1) 00574 { 00575 /* Butterfly implementation */ 00576 /* xa + xc */ 00577 r1 = pSi0[0] + pSi2[0]; 00578 00579 /* xa - xc */ 00580 r2 = pSi0[0] - pSi2[0]; 00581 00582 00583 /* ya + yc */ 00584 s1 = pSi0[1] + pSi2[1]; 00585 00586 /* ya - yc */ 00587 s2 = pSi0[1] - pSi2[1]; 00588 00589 00590 /* xb + xd */ 00591 t1 = pSi1[0] + pSi3[0]; 00592 00593 00594 /* xa' = xa + xb + xc + xd */ 00595 pSi0[0] = (r1 + t1) >> 2u; 00596 /* xa + xc -(xb + xd) */ 00597 r1 = r1 - t1; 00598 00599 /* yb + yd */ 00600 t2 = pSi1[1] + pSi3[1]; 00601 00602 /* ya' = ya + yb + yc + yd */ 00603 pSi0[1] = (s1 + t2) >> 2u; 00604 pSi0 += 2 * n1; 00605 00606 /* (ya + yc) - (yb + yd) */ 00607 s1 = s1 - t2; 00608 00609 /* (yb - yd) */ 00610 t1 = pSi1[1] - pSi3[1]; 00611 00612 /* (xb - xd) */ 00613 t2 = pSi1[0] - pSi3[0]; 00614 00615 00616 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00617 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00618 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 00619 00620 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00621 pSi1[1] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00622 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 00623 pSi1 += 2 * n1; 00624 00625 /* (xa - xc) + (yb - yd) */ 00626 r1 = r2 + t1; 00627 /* (xa - xc) - (yb - yd) */ 00628 r2 = r2 - t1; 00629 00630 /* (ya - yc) - (xb - xd) */ 00631 s1 = s2 - t2; 00632 /* (ya - yc) + (xb - xd) */ 00633 s2 = s2 + t2; 00634 00635 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00636 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00637 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00638 00639 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00640 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00641 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00642 pSi2 += 2 * n1; 00643 00644 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00645 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00646 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00647 00648 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00649 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00650 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00651 pSi3 += 2 * n1; 00652 } 00653 } 00654 twidCoefModifier <<= 2u; 00655 } 00656 #endif 00657 00658 /* End of Middle stages process */ 00659 00660 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00661 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00662 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00663 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00664 00665 00666 /* start of Last stage process */ 00667 /* Initializations for the last stage */ 00668 j = fftLen >> 2; 00669 ptr1 = &pSrc[0]; 00670 00671 /* Calculations of last stage */ 00672 do 00673 { 00674 00675 #ifndef ARM_MATH_BIG_ENDIAN 00676 00677 /* Read xa (real), ya(imag) input */ 00678 xaya = *__SIMD64(ptr1)++; 00679 xa = (q31_t) xaya; 00680 ya = (q31_t) (xaya >> 32); 00681 00682 /* Read xb (real), yb(imag) input */ 00683 xbyb = *__SIMD64(ptr1)++; 00684 xb = (q31_t) xbyb; 00685 yb = (q31_t) (xbyb >> 32); 00686 00687 /* Read xc (real), yc(imag) input */ 00688 xcyc = *__SIMD64(ptr1)++; 00689 xc = (q31_t) xcyc; 00690 yc = (q31_t) (xcyc >> 32); 00691 00692 /* Read xc (real), yc(imag) input */ 00693 xdyd = *__SIMD64(ptr1)++; 00694 xd = (q31_t) xdyd; 00695 yd = (q31_t) (xdyd >> 32); 00696 00697 #else 00698 00699 /* Read xa (real), ya(imag) input */ 00700 xaya = *__SIMD64(ptr1)++; 00701 ya = (q31_t) xaya; 00702 xa = (q31_t) (xaya >> 32); 00703 00704 /* Read xb (real), yb(imag) input */ 00705 xbyb = *__SIMD64(ptr1)++; 00706 yb = (q31_t) xbyb; 00707 xb = (q31_t) (xbyb >> 32); 00708 00709 /* Read xc (real), yc(imag) input */ 00710 xcyc = *__SIMD64(ptr1)++; 00711 yc = (q31_t) xcyc; 00712 xc = (q31_t) (xcyc >> 32); 00713 00714 /* Read xc (real), yc(imag) input */ 00715 xdyd = *__SIMD64(ptr1)++; 00716 yd = (q31_t) xdyd; 00717 xd = (q31_t) (xdyd >> 32); 00718 00719 00720 #endif 00721 00722 /* xa' = xa + xb + xc + xd */ 00723 xa_out = xa + xb + xc + xd; 00724 00725 /* ya' = ya + yb + yc + yd */ 00726 ya_out = ya + yb + yc + yd; 00727 00728 /* pointer updation for writing */ 00729 ptr1 = ptr1 - 8u; 00730 00731 /* writing xa' and ya' */ 00732 *ptr1++ = xa_out; 00733 *ptr1++ = ya_out; 00734 00735 xc_out = (xa - xb + xc - xd); 00736 yc_out = (ya - yb + yc - yd); 00737 00738 /* writing xc' and yc' */ 00739 *ptr1++ = xc_out; 00740 *ptr1++ = yc_out; 00741 00742 xb_out = (xa + yb - xc - yd); 00743 yb_out = (ya - xb - yc + xd); 00744 00745 /* writing xb' and yb' */ 00746 *ptr1++ = xb_out; 00747 *ptr1++ = yb_out; 00748 00749 xd_out = (xa - yb - xc + yd); 00750 yd_out = (ya + xb - yc - xd); 00751 00752 /* writing xd' and yd' */ 00753 *ptr1++ = xd_out; 00754 *ptr1++ = yd_out; 00755 00756 00757 } while(--j); 00758 00759 /* output is in 11.21(q21) format for the 1024 point */ 00760 /* output is in 9.23(q23) format for the 256 point */ 00761 /* output is in 7.25(q25) format for the 64 point */ 00762 /* output is in 5.27(q27) format for the 16 point */ 00763 00764 /* End of last stage process */ 00765 00766 } 00767 00768 00769 /** 00770 * @brief Core function for the Q31 CIFFT butterfly process. 00771 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00772 * @param[in] fftLen length of the FFT. 00773 * @param[in] *pCoef points to twiddle coefficient buffer. 00774 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00775 * @return none. 00776 */ 00777 00778 00779 /* 00780 * Radix-4 IFFT algorithm used is : 00781 * 00782 * CIFFT uses same twiddle coefficients as CFFT Function 00783 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 00784 * 00785 * 00786 * IFFT is implemented with following changes in equations from FFT 00787 * 00788 * Input real and imaginary data: 00789 * x(n) = xa + j * ya 00790 * x(n+N/4 ) = xb + j * yb 00791 * x(n+N/2 ) = xc + j * yc 00792 * x(n+3N 4) = xd + j * yd 00793 * 00794 * 00795 * Output real and imaginary data: 00796 * x(4r) = xa'+ j * ya' 00797 * x(4r+1) = xb'+ j * yb' 00798 * x(4r+2) = xc'+ j * yc' 00799 * x(4r+3) = xd'+ j * yd' 00800 * 00801 * 00802 * Twiddle factors for radix-4 IFFT: 00803 * Wn = co1 + j * (si1) 00804 * W2n = co2 + j * (si2) 00805 * W3n = co3 + j * (si3) 00806 00807 * The real and imaginary output values for the radix-4 butterfly are 00808 * xa' = xa + xb + xc + xd 00809 * ya' = ya + yb + yc + yd 00810 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 00811 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 00812 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 00813 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 00814 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 00815 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 00816 * 00817 */ 00818 00819 void arm_radix4_butterfly_inverse_q31( 00820 q31_t * pSrc, 00821 uint32_t fftLen, 00822 q31_t * pCoef, 00823 uint32_t twidCoefModifier) 00824 { 00825 #if defined(ARM_MATH_CM7) 00826 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00827 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00828 q31_t xa, xb, xc, xd; 00829 q31_t ya, yb, yc, yd; 00830 q31_t xa_out, xb_out, xc_out, xd_out; 00831 q31_t ya_out, yb_out, yc_out, yd_out; 00832 00833 q31_t *ptr1; 00834 q63_t xaya, xbyb, xcyc, xdyd; 00835 00836 /* input is be 1.31(q31) format for all FFT sizes */ 00837 /* Total process is divided into three stages */ 00838 /* process first stage, middle stages, & last stage */ 00839 00840 /* Start of first stage process */ 00841 00842 /* Initializations for the first stage */ 00843 n2 = fftLen; 00844 n1 = n2; 00845 /* n2 = fftLen/4 */ 00846 n2 >>= 2u; 00847 i0 = 0u; 00848 ia1 = 0u; 00849 00850 j = n2; 00851 00852 do 00853 { 00854 00855 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00856 00857 /* index calculation for the input as, */ 00858 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00859 i1 = i0 + n2; 00860 i2 = i1 + n2; 00861 i3 = i2 + n2; 00862 00863 /* Butterfly implementation */ 00864 /* xa + xc */ 00865 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u); 00866 /* xa - xc */ 00867 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00868 00869 /* xb + xd */ 00870 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00871 00872 /* ya + yc */ 00873 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00874 /* ya - yc */ 00875 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00876 00877 /* xa' = xa + xb + xc + xd */ 00878 pSrc[2u * i0] = (r1 + t1); 00879 /* (xa + xc) - (xb + xd) */ 00880 r1 = r1 - t1; 00881 /* yb + yd */ 00882 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00883 /* ya' = ya + yb + yc + yd */ 00884 pSrc[(2u * i0) + 1u] = (s1 + t2); 00885 00886 /* (ya + yc) - (yb + yd) */ 00887 s1 = s1 - t2; 00888 00889 /* yb - yd */ 00890 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00891 /* xb - xd */ 00892 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00893 00894 /* index calculation for the coefficients */ 00895 ia2 = 2u * ia1; 00896 co2 = pCoef[ia2 * 2u]; 00897 si2 = pCoef[(ia2 * 2u) + 1u]; 00898 00899 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00900 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 00901 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00902 00903 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00904 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 00905 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00906 00907 /* (xa - xc) - (yb - yd) */ 00908 r1 = r2 - t1; 00909 /* (xa - xc) + (yb - yd) */ 00910 r2 = r2 + t1; 00911 00912 /* (ya - yc) + (xb - xd) */ 00913 s1 = s2 + t2; 00914 /* (ya - yc) - (xb - xd) */ 00915 s2 = s2 - t2; 00916 00917 co1 = pCoef[ia1 * 2u]; 00918 si1 = pCoef[(ia1 * 2u) + 1u]; 00919 00920 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00921 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00922 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00923 00924 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00925 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00926 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00927 00928 /* index calculation for the coefficients */ 00929 ia3 = 3u * ia1; 00930 co3 = pCoef[ia3 * 2u]; 00931 si3 = pCoef[(ia3 * 2u) + 1u]; 00932 00933 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00934 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00935 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00936 00937 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00938 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00939 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00940 00941 /* Twiddle coefficients index modifier */ 00942 ia1 = ia1 + twidCoefModifier; 00943 00944 /* Updating input index */ 00945 i0 = i0 + 1u; 00946 00947 } while(--j); 00948 00949 /* data is in 5.27(q27) format */ 00950 /* each stage provides two down scaling of the input */ 00951 00952 00953 /* Start of Middle stages process */ 00954 00955 twidCoefModifier <<= 2u; 00956 00957 /* Calculation of second stage to excluding last stage */ 00958 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00959 { 00960 /* Initializations for the first stage */ 00961 n1 = n2; 00962 n2 >>= 2u; 00963 ia1 = 0u; 00964 00965 for (j = 0; j <= (n2 - 1u); j++) 00966 { 00967 /* index calculation for the coefficients */ 00968 ia2 = ia1 + ia1; 00969 ia3 = ia2 + ia1; 00970 co1 = pCoef[ia1 * 2u]; 00971 si1 = pCoef[(ia1 * 2u) + 1u]; 00972 co2 = pCoef[ia2 * 2u]; 00973 si2 = pCoef[(ia2 * 2u) + 1u]; 00974 co3 = pCoef[ia3 * 2u]; 00975 si3 = pCoef[(ia3 * 2u) + 1u]; 00976 /* Twiddle coefficients index modifier */ 00977 ia1 = ia1 + twidCoefModifier; 00978 00979 for (i0 = j; i0 < fftLen; i0 += n1) 00980 { 00981 /* index calculation for the input as, */ 00982 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00983 i1 = i0 + n2; 00984 i2 = i1 + n2; 00985 i3 = i2 + n2; 00986 00987 /* Butterfly implementation */ 00988 /* xa + xc */ 00989 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00990 /* xa - xc */ 00991 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00992 00993 /* ya + yc */ 00994 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00995 /* ya - yc */ 00996 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00997 00998 /* xb + xd */ 00999 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 01000 01001 /* xa' = xa + xb + xc + xd */ 01002 pSrc[2u * i0] = (r1 + t1) >> 2u; 01003 /* xa + xc -(xb + xd) */ 01004 r1 = r1 - t1; 01005 /* yb + yd */ 01006 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 01007 /* ya' = ya + yb + yc + yd */ 01008 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 01009 01010 /* (ya + yc) - (yb + yd) */ 01011 s1 = s1 - t2; 01012 01013 /* (yb - yd) */ 01014 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 01015 /* (xb - xd) */ 01016 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 01017 01018 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01019 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 01020 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 01021 01022 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01023 pSrc[(2u * i1) + 1u] = 01024 (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 01025 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 01026 01027 /* (xa - xc) - (yb - yd) */ 01028 r1 = r2 - t1; 01029 /* (xa - xc) + (yb - yd) */ 01030 r2 = r2 + t1; 01031 01032 /* (ya - yc) + (xb - xd) */ 01033 s1 = s2 + t2; 01034 /* (ya - yc) - (xb - xd) */ 01035 s2 = s2 - t2; 01036 01037 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01038 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01039 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 01040 01041 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01042 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01043 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 01044 01045 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01046 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01047 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 01048 01049 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01050 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01051 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 01052 } 01053 } 01054 twidCoefModifier <<= 2u; 01055 } 01056 #else 01057 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k; 01058 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 01059 q31_t xa, xb, xc, xd; 01060 q31_t ya, yb, yc, yd; 01061 q31_t xa_out, xb_out, xc_out, xd_out; 01062 q31_t ya_out, yb_out, yc_out, yd_out; 01063 01064 q31_t *ptr1; 01065 q31_t *pSi0; 01066 q31_t *pSi1; 01067 q31_t *pSi2; 01068 q31_t *pSi3; 01069 q63_t xaya, xbyb, xcyc, xdyd; 01070 01071 /* input is be 1.31(q31) format for all FFT sizes */ 01072 /* Total process is divided into three stages */ 01073 /* process first stage, middle stages, & last stage */ 01074 01075 /* Start of first stage process */ 01076 01077 /* Initializations for the first stage */ 01078 n2 = fftLen; 01079 n1 = n2; 01080 /* n2 = fftLen/4 */ 01081 n2 >>= 2u; 01082 01083 ia1 = 0u; 01084 01085 j = n2; 01086 01087 pSi0 = pSrc; 01088 pSi1 = pSi0 + 2 * n2; 01089 pSi2 = pSi1 + 2 * n2; 01090 pSi3 = pSi2 + 2 * n2; 01091 01092 do 01093 { 01094 /* Butterfly implementation */ 01095 /* xa + xc */ 01096 r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u); 01097 /* xa - xc */ 01098 r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u); 01099 01100 /* xb + xd */ 01101 t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u); 01102 01103 /* ya + yc */ 01104 s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u); 01105 /* ya - yc */ 01106 s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u); 01107 01108 /* xa' = xa + xb + xc + xd */ 01109 *pSi0++ = (r1 + t1); 01110 /* (xa + xc) - (xb + xd) */ 01111 r1 = r1 - t1; 01112 /* yb + yd */ 01113 t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u); 01114 /* ya' = ya + yb + yc + yd */ 01115 *pSi0++ = (s1 + t2); 01116 01117 /* (ya + yc) - (yb + yd) */ 01118 s1 = s1 - t2; 01119 01120 /* yb - yd */ 01121 t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u); 01122 /* xb - xd */ 01123 t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u); 01124 01125 /* index calculation for the coefficients */ 01126 ia2 = 2u * ia1; 01127 co2 = pCoef[ia2 * 2u]; 01128 si2 = pCoef[(ia2 * 2u) + 1u]; 01129 01130 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01131 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 01132 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 01133 01134 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01135 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 01136 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 01137 01138 /* (xa - xc) - (yb - yd) */ 01139 r1 = r2 - t1; 01140 /* (xa - xc) + (yb - yd) */ 01141 r2 = r2 + t1; 01142 01143 /* (ya - yc) + (xb - xd) */ 01144 s1 = s2 + t2; 01145 /* (ya - yc) - (xb - xd) */ 01146 s2 = s2 - t2; 01147 01148 co1 = pCoef[ia1 * 2u]; 01149 si1 = pCoef[(ia1 * 2u) + 1u]; 01150 01151 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01152 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01153 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 01154 01155 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01156 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01157 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 01158 01159 /* index calculation for the coefficients */ 01160 ia3 = 3u * ia1; 01161 co3 = pCoef[ia3 * 2u]; 01162 si3 = pCoef[(ia3 * 2u) + 1u]; 01163 01164 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01165 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01166 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 01167 01168 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01169 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01170 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 01171 01172 /* Twiddle coefficients index modifier */ 01173 ia1 = ia1 + twidCoefModifier; 01174 01175 } while(--j); 01176 01177 /* data is in 5.27(q27) format */ 01178 /* each stage provides two down scaling of the input */ 01179 01180 01181 /* Start of Middle stages process */ 01182 01183 twidCoefModifier <<= 2u; 01184 01185 /* Calculation of second stage to excluding last stage */ 01186 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01187 { 01188 /* Initializations for the first stage */ 01189 n1 = n2; 01190 n2 >>= 2u; 01191 ia1 = 0u; 01192 01193 for (j = 0; j <= (n2 - 1u); j++) 01194 { 01195 /* index calculation for the coefficients */ 01196 ia2 = ia1 + ia1; 01197 ia3 = ia2 + ia1; 01198 co1 = pCoef[ia1 * 2u]; 01199 si1 = pCoef[(ia1 * 2u) + 1u]; 01200 co2 = pCoef[ia2 * 2u]; 01201 si2 = pCoef[(ia2 * 2u) + 1u]; 01202 co3 = pCoef[ia3 * 2u]; 01203 si3 = pCoef[(ia3 * 2u) + 1u]; 01204 /* Twiddle coefficients index modifier */ 01205 ia1 = ia1 + twidCoefModifier; 01206 01207 pSi0 = pSrc + 2 * j; 01208 pSi1 = pSi0 + 2 * n2; 01209 pSi2 = pSi1 + 2 * n2; 01210 pSi3 = pSi2 + 2 * n2; 01211 01212 for (i0 = j; i0 < fftLen; i0 += n1) 01213 { 01214 /* Butterfly implementation */ 01215 /* xa + xc */ 01216 r1 = pSi0[0] + pSi2[0]; 01217 01218 /* xa - xc */ 01219 r2 = pSi0[0] - pSi2[0]; 01220 01221 01222 /* ya + yc */ 01223 s1 = pSi0[1] + pSi2[1]; 01224 01225 /* ya - yc */ 01226 s2 = pSi0[1] - pSi2[1]; 01227 01228 01229 /* xb + xd */ 01230 t1 = pSi1[0] + pSi3[0]; 01231 01232 01233 /* xa' = xa + xb + xc + xd */ 01234 pSi0[0] = (r1 + t1) >> 2u; 01235 /* xa + xc -(xb + xd) */ 01236 r1 = r1 - t1; 01237 /* yb + yd */ 01238 t2 = pSi1[1] + pSi3[1]; 01239 01240 /* ya' = ya + yb + yc + yd */ 01241 pSi0[1] = (s1 + t2) >> 2u; 01242 pSi0 += 2 * n1; 01243 01244 /* (ya + yc) - (yb + yd) */ 01245 s1 = s1 - t2; 01246 01247 /* (yb - yd) */ 01248 t1 = pSi1[1] - pSi3[1]; 01249 01250 /* (xb - xd) */ 01251 t2 = pSi1[0] - pSi3[0]; 01252 01253 01254 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01255 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 01256 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 01257 01258 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01259 pSi1[1] = 01260 01261 (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 01262 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 01263 pSi1 += 2 * n1; 01264 01265 /* (xa - xc) - (yb - yd) */ 01266 r1 = r2 - t1; 01267 /* (xa - xc) + (yb - yd) */ 01268 r2 = r2 + t1; 01269 01270 /* (ya - yc) + (xb - xd) */ 01271 s1 = s2 + t2; 01272 /* (ya - yc) - (xb - xd) */ 01273 s2 = s2 - t2; 01274 01275 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01276 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01277 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 01278 01279 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01280 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01281 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 01282 pSi2 += 2 * n1; 01283 01284 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01285 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01286 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 01287 01288 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01289 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01290 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 01291 pSi3 += 2 * n1; 01292 } 01293 } 01294 twidCoefModifier <<= 2u; 01295 } 01296 #endif 01297 01298 /* End of Middle stages process */ 01299 01300 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 01301 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 01302 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 01303 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 01304 01305 01306 /* Start of last stage process */ 01307 01308 01309 /* Initializations for the last stage */ 01310 j = fftLen >> 2; 01311 ptr1 = &pSrc[0]; 01312 01313 /* Calculations of last stage */ 01314 do 01315 { 01316 #ifndef ARM_MATH_BIG_ENDIAN 01317 /* Read xa (real), ya(imag) input */ 01318 xaya = *__SIMD64(ptr1)++; 01319 xa = (q31_t) xaya; 01320 ya = (q31_t) (xaya >> 32); 01321 01322 /* Read xb (real), yb(imag) input */ 01323 xbyb = *__SIMD64(ptr1)++; 01324 xb = (q31_t) xbyb; 01325 yb = (q31_t) (xbyb >> 32); 01326 01327 /* Read xc (real), yc(imag) input */ 01328 xcyc = *__SIMD64(ptr1)++; 01329 xc = (q31_t) xcyc; 01330 yc = (q31_t) (xcyc >> 32); 01331 01332 /* Read xc (real), yc(imag) input */ 01333 xdyd = *__SIMD64(ptr1)++; 01334 xd = (q31_t) xdyd; 01335 yd = (q31_t) (xdyd >> 32); 01336 01337 #else 01338 01339 /* Read xa (real), ya(imag) input */ 01340 xaya = *__SIMD64(ptr1)++; 01341 ya = (q31_t) xaya; 01342 xa = (q31_t) (xaya >> 32); 01343 01344 /* Read xb (real), yb(imag) input */ 01345 xbyb = *__SIMD64(ptr1)++; 01346 yb = (q31_t) xbyb; 01347 xb = (q31_t) (xbyb >> 32); 01348 01349 /* Read xc (real), yc(imag) input */ 01350 xcyc = *__SIMD64(ptr1)++; 01351 yc = (q31_t) xcyc; 01352 xc = (q31_t) (xcyc >> 32); 01353 01354 /* Read xc (real), yc(imag) input */ 01355 xdyd = *__SIMD64(ptr1)++; 01356 yd = (q31_t) xdyd; 01357 xd = (q31_t) (xdyd >> 32); 01358 01359 01360 #endif 01361 01362 /* xa' = xa + xb + xc + xd */ 01363 xa_out = xa + xb + xc + xd; 01364 01365 /* ya' = ya + yb + yc + yd */ 01366 ya_out = ya + yb + yc + yd; 01367 01368 /* pointer updation for writing */ 01369 ptr1 = ptr1 - 8u; 01370 01371 /* writing xa' and ya' */ 01372 *ptr1++ = xa_out; 01373 *ptr1++ = ya_out; 01374 01375 xc_out = (xa - xb + xc - xd); 01376 yc_out = (ya - yb + yc - yd); 01377 01378 /* writing xc' and yc' */ 01379 *ptr1++ = xc_out; 01380 *ptr1++ = yc_out; 01381 01382 xb_out = (xa - yb - xc + yd); 01383 yb_out = (ya + xb - yc - xd); 01384 01385 /* writing xb' and yb' */ 01386 *ptr1++ = xb_out; 01387 *ptr1++ = yb_out; 01388 01389 xd_out = (xa + yb - xc - yd); 01390 yd_out = (ya - xb - yc + xd); 01391 01392 /* writing xd' and yd' */ 01393 *ptr1++ = xd_out; 01394 *ptr1++ = yd_out; 01395 01396 } while(--j); 01397 01398 /* output is in 11.21(q21) format for the 1024 point */ 01399 /* output is in 9.23(q23) format for the 256 point */ 01400 /* output is in 7.25(q25) format for the 64 point */ 01401 /* output is in 5.27(q27) format for the 16 point */ 01402 01403 /* End of last stage process */ 01404 }
Generated on Tue Jul 12 2022 11:59:15 by 1.7.2