Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of OmniWheels by
arm_cfft_radix4_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q31.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 void arm_radix4_butterfly_inverse_q31( 00045 q31_t * pSrc, 00046 uint32_t fftLen, 00047 q31_t * pCoef, 00048 uint32_t twidCoefModifier); 00049 00050 void arm_radix4_butterfly_q31( 00051 q31_t * pSrc, 00052 uint32_t fftLen, 00053 q31_t * pCoef, 00054 uint32_t twidCoefModifier); 00055 00056 void arm_bitreversal_q31( 00057 q31_t * pSrc, 00058 uint32_t fftLen, 00059 uint16_t bitRevFactor, 00060 uint16_t * pBitRevTab); 00061 00062 /** 00063 * @ingroup groupTransforms 00064 */ 00065 00066 /** 00067 * @addtogroup ComplexFFT 00068 * @{ 00069 */ 00070 00071 /** 00072 * @details 00073 * @brief Processing function for the Q31 CFFT/CIFFT. 00074 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed 00075 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure. 00076 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 00077 * @return none. 00078 * 00079 * \par Input and output formats: 00080 * \par 00081 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00082 * Hence the output format is different for different FFT sizes. 00083 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00084 * \par 00085 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT" 00086 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT" 00087 * 00088 */ 00089 00090 void arm_cfft_radix4_q31( 00091 const arm_cfft_radix4_instance_q31 * S, 00092 q31_t * pSrc) 00093 { 00094 if(S->ifftFlag == 1u) 00095 { 00096 /* Complex IFFT radix-4 */ 00097 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, 00098 S->twidCoefModifier); 00099 } 00100 else 00101 { 00102 /* Complex FFT radix-4 */ 00103 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, 00104 S->twidCoefModifier); 00105 } 00106 00107 00108 if(S->bitReverseFlag == 1u) 00109 { 00110 /* Bit Reversal */ 00111 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00112 } 00113 00114 } 00115 00116 /** 00117 * @} end of ComplexFFT group 00118 */ 00119 00120 /* 00121 * Radix-4 FFT algorithm used is : 00122 * 00123 * Input real and imaginary data: 00124 * x(n) = xa + j * ya 00125 * x(n+N/4 ) = xb + j * yb 00126 * x(n+N/2 ) = xc + j * yc 00127 * x(n+3N 4) = xd + j * yd 00128 * 00129 * 00130 * Output real and imaginary data: 00131 * x(4r) = xa'+ j * ya' 00132 * x(4r+1) = xb'+ j * yb' 00133 * x(4r+2) = xc'+ j * yc' 00134 * x(4r+3) = xd'+ j * yd' 00135 * 00136 * 00137 * Twiddle factors for radix-4 FFT: 00138 * Wn = co1 + j * (- si1) 00139 * W2n = co2 + j * (- si2) 00140 * W3n = co3 + j * (- si3) 00141 * 00142 * Butterfly implementation: 00143 * xa' = xa + xb + xc + xd 00144 * ya' = ya + yb + yc + yd 00145 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00146 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00147 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00148 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00149 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00150 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00151 * 00152 */ 00153 00154 /** 00155 * @brief Core function for the Q31 CFFT butterfly process. 00156 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00157 * @param[in] fftLen length of the FFT. 00158 * @param[in] *pCoef points to twiddle coefficient buffer. 00159 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00160 * @return none. 00161 */ 00162 00163 void arm_radix4_butterfly_q31( 00164 q31_t * pSrc, 00165 uint32_t fftLen, 00166 q31_t * pCoef, 00167 uint32_t twidCoefModifier) 00168 { 00169 #if defined(ARM_MATH_CM7) 00170 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00171 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00172 00173 q31_t xa, xb, xc, xd; 00174 q31_t ya, yb, yc, yd; 00175 q31_t xa_out, xb_out, xc_out, xd_out; 00176 q31_t ya_out, yb_out, yc_out, yd_out; 00177 00178 q31_t *ptr1; 00179 q63_t xaya, xbyb, xcyc, xdyd; 00180 /* Total process is divided into three stages */ 00181 00182 /* process first stage, middle stages, & last stage */ 00183 00184 00185 /* start of first stage process */ 00186 00187 /* Initializations for the first stage */ 00188 n2 = fftLen; 00189 n1 = n2; 00190 /* n2 = fftLen/4 */ 00191 n2 >>= 2u; 00192 i0 = 0u; 00193 ia1 = 0u; 00194 00195 j = n2; 00196 00197 /* Calculation of first stage */ 00198 do 00199 { 00200 /* index calculation for the input as, */ 00201 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00202 i1 = i0 + n2; 00203 i2 = i1 + n2; 00204 i3 = i2 + n2; 00205 00206 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00207 00208 /* Butterfly implementation */ 00209 /* xa + xc */ 00210 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u); 00211 /* xa - xc */ 00212 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00213 00214 /* xb + xd */ 00215 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00216 00217 /* ya + yc */ 00218 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00219 /* ya - yc */ 00220 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00221 00222 /* xa' = xa + xb + xc + xd */ 00223 pSrc[2u * i0] = (r1 + t1); 00224 /* (xa + xc) - (xb + xd) */ 00225 r1 = r1 - t1; 00226 /* yb + yd */ 00227 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00228 00229 /* ya' = ya + yb + yc + yd */ 00230 pSrc[(2u * i0) + 1u] = (s1 + t2); 00231 00232 /* (ya + yc) - (yb + yd) */ 00233 s1 = s1 - t2; 00234 00235 /* yb - yd */ 00236 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00237 /* xb - xd */ 00238 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00239 00240 /* index calculation for the coefficients */ 00241 ia2 = 2u * ia1; 00242 co2 = pCoef[ia2 * 2u]; 00243 si2 = pCoef[(ia2 * 2u) + 1u]; 00244 00245 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00246 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00247 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00248 00249 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00250 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00251 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00252 00253 /* (xa - xc) + (yb - yd) */ 00254 r1 = r2 + t1; 00255 /* (xa - xc) - (yb - yd) */ 00256 r2 = r2 - t1; 00257 00258 /* (ya - yc) - (xb - xd) */ 00259 s1 = s2 - t2; 00260 /* (ya - yc) + (xb - xd) */ 00261 s2 = s2 + t2; 00262 00263 co1 = pCoef[ia1 * 2u]; 00264 si1 = pCoef[(ia1 * 2u) + 1u]; 00265 00266 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00267 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00268 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00269 00270 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00271 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00272 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00273 00274 /* index calculation for the coefficients */ 00275 ia3 = 3u * ia1; 00276 co3 = pCoef[ia3 * 2u]; 00277 si3 = pCoef[(ia3 * 2u) + 1u]; 00278 00279 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00280 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00281 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00282 00283 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00284 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00285 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00286 00287 /* Twiddle coefficients index modifier */ 00288 ia1 = ia1 + twidCoefModifier; 00289 00290 /* Updating input index */ 00291 i0 = i0 + 1u; 00292 00293 } while(--j); 00294 00295 /* end of first stage process */ 00296 00297 /* data is in 5.27(q27) format */ 00298 00299 00300 /* start of Middle stages process */ 00301 00302 00303 /* each stage in middle stages provides two down scaling of the input */ 00304 00305 twidCoefModifier <<= 2u; 00306 00307 00308 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00309 { 00310 /* Initializations for the first stage */ 00311 n1 = n2; 00312 n2 >>= 2u; 00313 ia1 = 0u; 00314 00315 /* Calculation of first stage */ 00316 for (j = 0u; j <= (n2 - 1u); j++) 00317 { 00318 /* index calculation for the coefficients */ 00319 ia2 = ia1 + ia1; 00320 ia3 = ia2 + ia1; 00321 co1 = pCoef[ia1 * 2u]; 00322 si1 = pCoef[(ia1 * 2u) + 1u]; 00323 co2 = pCoef[ia2 * 2u]; 00324 si2 = pCoef[(ia2 * 2u) + 1u]; 00325 co3 = pCoef[ia3 * 2u]; 00326 si3 = pCoef[(ia3 * 2u) + 1u]; 00327 /* Twiddle coefficients index modifier */ 00328 ia1 = ia1 + twidCoefModifier; 00329 00330 for (i0 = j; i0 < fftLen; i0 += n1) 00331 { 00332 /* index calculation for the input as, */ 00333 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00334 i1 = i0 + n2; 00335 i2 = i1 + n2; 00336 i3 = i2 + n2; 00337 00338 /* Butterfly implementation */ 00339 /* xa + xc */ 00340 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00341 /* xa - xc */ 00342 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00343 00344 /* ya + yc */ 00345 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00346 /* ya - yc */ 00347 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00348 00349 /* xb + xd */ 00350 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00351 00352 /* xa' = xa + xb + xc + xd */ 00353 pSrc[2u * i0] = (r1 + t1) >> 2u; 00354 /* xa + xc -(xb + xd) */ 00355 r1 = r1 - t1; 00356 00357 /* yb + yd */ 00358 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00359 /* ya' = ya + yb + yc + yd */ 00360 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00361 00362 /* (ya + yc) - (yb + yd) */ 00363 s1 = s1 - t2; 00364 00365 /* (yb - yd) */ 00366 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00367 /* (xb - xd) */ 00368 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00369 00370 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00371 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00372 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 00373 00374 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00375 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00376 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 00377 00378 /* (xa - xc) + (yb - yd) */ 00379 r1 = r2 + t1; 00380 /* (xa - xc) - (yb - yd) */ 00381 r2 = r2 - t1; 00382 00383 /* (ya - yc) - (xb - xd) */ 00384 s1 = s2 - t2; 00385 /* (ya - yc) + (xb - xd) */ 00386 s2 = s2 + t2; 00387 00388 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00389 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00390 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00391 00392 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00393 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00394 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00395 00396 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00397 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00398 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00399 00400 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00401 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00402 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00403 } 00404 } 00405 twidCoefModifier <<= 2u; 00406 } 00407 #else 00408 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k; 00409 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00410 00411 q31_t xa, xb, xc, xd; 00412 q31_t ya, yb, yc, yd; 00413 q31_t xa_out, xb_out, xc_out, xd_out; 00414 q31_t ya_out, yb_out, yc_out, yd_out; 00415 00416 q31_t *ptr1; 00417 q31_t *pSi0; 00418 q31_t *pSi1; 00419 q31_t *pSi2; 00420 q31_t *pSi3; 00421 q63_t xaya, xbyb, xcyc, xdyd; 00422 /* Total process is divided into three stages */ 00423 00424 /* process first stage, middle stages, & last stage */ 00425 00426 00427 /* start of first stage process */ 00428 00429 /* Initializations for the first stage */ 00430 n2 = fftLen; 00431 n1 = n2; 00432 /* n2 = fftLen/4 */ 00433 n2 >>= 2u; 00434 00435 ia1 = 0u; 00436 00437 j = n2; 00438 00439 pSi0 = pSrc; 00440 pSi1 = pSi0 + 2 * n2; 00441 pSi2 = pSi1 + 2 * n2; 00442 pSi3 = pSi2 + 2 * n2; 00443 00444 /* Calculation of first stage */ 00445 do 00446 { 00447 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00448 00449 /* Butterfly implementation */ 00450 /* xa + xc */ 00451 r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u); 00452 /* xa - xc */ 00453 r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u); 00454 00455 /* xb + xd */ 00456 t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u); 00457 00458 /* ya + yc */ 00459 s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u); 00460 /* ya - yc */ 00461 s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u); 00462 00463 /* xa' = xa + xb + xc + xd */ 00464 *pSi0++ = (r1 + t1); 00465 /* (xa + xc) - (xb + xd) */ 00466 r1 = r1 - t1; 00467 /* yb + yd */ 00468 t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u); 00469 00470 /* ya' = ya + yb + yc + yd */ 00471 *pSi0++ = (s1 + t2); 00472 00473 /* (ya + yc) - (yb + yd) */ 00474 s1 = s1 - t2; 00475 00476 /* yb - yd */ 00477 t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u); 00478 /* xb - xd */ 00479 t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u); 00480 00481 /* index calculation for the coefficients */ 00482 ia2 = 2u * ia1; 00483 co2 = pCoef[ia2 * 2u]; 00484 si2 = pCoef[(ia2 * 2u) + 1u]; 00485 00486 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00487 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00488 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00489 00490 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00491 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00492 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00493 00494 /* (xa - xc) + (yb - yd) */ 00495 r1 = r2 + t1; 00496 /* (xa - xc) - (yb - yd) */ 00497 r2 = r2 - t1; 00498 00499 /* (ya - yc) - (xb - xd) */ 00500 s1 = s2 - t2; 00501 /* (ya - yc) + (xb - xd) */ 00502 s2 = s2 + t2; 00503 00504 co1 = pCoef[ia1 * 2u]; 00505 si1 = pCoef[(ia1 * 2u) + 1u]; 00506 00507 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00508 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00509 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00510 00511 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00512 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00513 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00514 00515 /* index calculation for the coefficients */ 00516 ia3 = 3u * ia1; 00517 co3 = pCoef[ia3 * 2u]; 00518 si3 = pCoef[(ia3 * 2u) + 1u]; 00519 00520 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00521 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00522 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00523 00524 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00525 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00526 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00527 00528 /* Twiddle coefficients index modifier */ 00529 ia1 = ia1 + twidCoefModifier; 00530 00531 } while(--j); 00532 00533 /* end of first stage process */ 00534 00535 /* data is in 5.27(q27) format */ 00536 00537 00538 /* start of Middle stages process */ 00539 00540 00541 /* each stage in middle stages provides two down scaling of the input */ 00542 00543 twidCoefModifier <<= 2u; 00544 00545 00546 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00547 { 00548 /* Initializations for the first stage */ 00549 n1 = n2; 00550 n2 >>= 2u; 00551 ia1 = 0u; 00552 00553 /* Calculation of first stage */ 00554 for (j = 0u; j <= (n2 - 1u); j++) 00555 { 00556 /* index calculation for the coefficients */ 00557 ia2 = ia1 + ia1; 00558 ia3 = ia2 + ia1; 00559 co1 = pCoef[ia1 * 2u]; 00560 si1 = pCoef[(ia1 * 2u) + 1u]; 00561 co2 = pCoef[ia2 * 2u]; 00562 si2 = pCoef[(ia2 * 2u) + 1u]; 00563 co3 = pCoef[ia3 * 2u]; 00564 si3 = pCoef[(ia3 * 2u) + 1u]; 00565 /* Twiddle coefficients index modifier */ 00566 ia1 = ia1 + twidCoefModifier; 00567 00568 pSi0 = pSrc + 2 * j; 00569 pSi1 = pSi0 + 2 * n2; 00570 pSi2 = pSi1 + 2 * n2; 00571 pSi3 = pSi2 + 2 * n2; 00572 00573 for (i0 = j; i0 < fftLen; i0 += n1) 00574 { 00575 /* Butterfly implementation */ 00576 /* xa + xc */ 00577 r1 = pSi0[0] + pSi2[0]; 00578 00579 /* xa - xc */ 00580 r2 = pSi0[0] - pSi2[0]; 00581 00582 00583 /* ya + yc */ 00584 s1 = pSi0[1] + pSi2[1]; 00585 00586 /* ya - yc */ 00587 s2 = pSi0[1] - pSi2[1]; 00588 00589 00590 /* xb + xd */ 00591 t1 = pSi1[0] + pSi3[0]; 00592 00593 00594 /* xa' = xa + xb + xc + xd */ 00595 pSi0[0] = (r1 + t1) >> 2u; 00596 /* xa + xc -(xb + xd) */ 00597 r1 = r1 - t1; 00598 00599 /* yb + yd */ 00600 t2 = pSi1[1] + pSi3[1]; 00601 00602 /* ya' = ya + yb + yc + yd */ 00603 pSi0[1] = (s1 + t2) >> 2u; 00604 pSi0 += 2 * n1; 00605 00606 /* (ya + yc) - (yb + yd) */ 00607 s1 = s1 - t2; 00608 00609 /* (yb - yd) */ 00610 t1 = pSi1[1] - pSi3[1]; 00611 00612 /* (xb - xd) */ 00613 t2 = pSi1[0] - pSi3[0]; 00614 00615 00616 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00617 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00618 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 00619 00620 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00621 pSi1[1] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00622 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 00623 pSi1 += 2 * n1; 00624 00625 /* (xa - xc) + (yb - yd) */ 00626 r1 = r2 + t1; 00627 /* (xa - xc) - (yb - yd) */ 00628 r2 = r2 - t1; 00629 00630 /* (ya - yc) - (xb - xd) */ 00631 s1 = s2 - t2; 00632 /* (ya - yc) + (xb - xd) */ 00633 s2 = s2 + t2; 00634 00635 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00636 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00637 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00638 00639 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00640 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00641 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00642 pSi2 += 2 * n1; 00643 00644 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00645 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00646 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00647 00648 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00649 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00650 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00651 pSi3 += 2 * n1; 00652 } 00653 } 00654 twidCoefModifier <<= 2u; 00655 } 00656 #endif 00657 00658 /* End of Middle stages process */ 00659 00660 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00661 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00662 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00663 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00664 00665 00666 /* start of Last stage process */ 00667 /* Initializations for the last stage */ 00668 j = fftLen >> 2; 00669 ptr1 = &pSrc[0]; 00670 00671 /* Calculations of last stage */ 00672 do 00673 { 00674 00675 #ifndef ARM_MATH_BIG_ENDIAN 00676 00677 /* Read xa (real), ya(imag) input */ 00678 xaya = *__SIMD64(ptr1)++; 00679 xa = (q31_t) xaya; 00680 ya = (q31_t) (xaya >> 32); 00681 00682 /* Read xb (real), yb(imag) input */ 00683 xbyb = *__SIMD64(ptr1)++; 00684 xb = (q31_t) xbyb; 00685 yb = (q31_t) (xbyb >> 32); 00686 00687 /* Read xc (real), yc(imag) input */ 00688 xcyc = *__SIMD64(ptr1)++; 00689 xc = (q31_t) xcyc; 00690 yc = (q31_t) (xcyc >> 32); 00691 00692 /* Read xc (real), yc(imag) input */ 00693 xdyd = *__SIMD64(ptr1)++; 00694 xd = (q31_t) xdyd; 00695 yd = (q31_t) (xdyd >> 32); 00696 00697 #else 00698 00699 /* Read xa (real), ya(imag) input */ 00700 xaya = *__SIMD64(ptr1)++; 00701 ya = (q31_t) xaya; 00702 xa = (q31_t) (xaya >> 32); 00703 00704 /* Read xb (real), yb(imag) input */ 00705 xbyb = *__SIMD64(ptr1)++; 00706 yb = (q31_t) xbyb; 00707 xb = (q31_t) (xbyb >> 32); 00708 00709 /* Read xc (real), yc(imag) input */ 00710 xcyc = *__SIMD64(ptr1)++; 00711 yc = (q31_t) xcyc; 00712 xc = (q31_t) (xcyc >> 32); 00713 00714 /* Read xc (real), yc(imag) input */ 00715 xdyd = *__SIMD64(ptr1)++; 00716 yd = (q31_t) xdyd; 00717 xd = (q31_t) (xdyd >> 32); 00718 00719 00720 #endif 00721 00722 /* xa' = xa + xb + xc + xd */ 00723 xa_out = xa + xb + xc + xd; 00724 00725 /* ya' = ya + yb + yc + yd */ 00726 ya_out = ya + yb + yc + yd; 00727 00728 /* pointer updation for writing */ 00729 ptr1 = ptr1 - 8u; 00730 00731 /* writing xa' and ya' */ 00732 *ptr1++ = xa_out; 00733 *ptr1++ = ya_out; 00734 00735 xc_out = (xa - xb + xc - xd); 00736 yc_out = (ya - yb + yc - yd); 00737 00738 /* writing xc' and yc' */ 00739 *ptr1++ = xc_out; 00740 *ptr1++ = yc_out; 00741 00742 xb_out = (xa + yb - xc - yd); 00743 yb_out = (ya - xb - yc + xd); 00744 00745 /* writing xb' and yb' */ 00746 *ptr1++ = xb_out; 00747 *ptr1++ = yb_out; 00748 00749 xd_out = (xa - yb - xc + yd); 00750 yd_out = (ya + xb - yc - xd); 00751 00752 /* writing xd' and yd' */ 00753 *ptr1++ = xd_out; 00754 *ptr1++ = yd_out; 00755 00756 00757 } while(--j); 00758 00759 /* output is in 11.21(q21) format for the 1024 point */ 00760 /* output is in 9.23(q23) format for the 256 point */ 00761 /* output is in 7.25(q25) format for the 64 point */ 00762 /* output is in 5.27(q27) format for the 16 point */ 00763 00764 /* End of last stage process */ 00765 00766 } 00767 00768 00769 /** 00770 * @brief Core function for the Q31 CIFFT butterfly process. 00771 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00772 * @param[in] fftLen length of the FFT. 00773 * @param[in] *pCoef points to twiddle coefficient buffer. 00774 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00775 * @return none. 00776 */ 00777 00778 00779 /* 00780 * Radix-4 IFFT algorithm used is : 00781 * 00782 * CIFFT uses same twiddle coefficients as CFFT Function 00783 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 00784 * 00785 * 00786 * IFFT is implemented with following changes in equations from FFT 00787 * 00788 * Input real and imaginary data: 00789 * x(n) = xa + j * ya 00790 * x(n+N/4 ) = xb + j * yb 00791 * x(n+N/2 ) = xc + j * yc 00792 * x(n+3N 4) = xd + j * yd 00793 * 00794 * 00795 * Output real and imaginary data: 00796 * x(4r) = xa'+ j * ya' 00797 * x(4r+1) = xb'+ j * yb' 00798 * x(4r+2) = xc'+ j * yc' 00799 * x(4r+3) = xd'+ j * yd' 00800 * 00801 * 00802 * Twiddle factors for radix-4 IFFT: 00803 * Wn = co1 + j * (si1) 00804 * W2n = co2 + j * (si2) 00805 * W3n = co3 + j * (si3) 00806 00807 * The real and imaginary output values for the radix-4 butterfly are 00808 * xa' = xa + xb + xc + xd 00809 * ya' = ya + yb + yc + yd 00810 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 00811 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 00812 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 00813 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 00814 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 00815 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 00816 * 00817 */ 00818 00819 void arm_radix4_butterfly_inverse_q31( 00820 q31_t * pSrc, 00821 uint32_t fftLen, 00822 q31_t * pCoef, 00823 uint32_t twidCoefModifier) 00824 { 00825 #if defined(ARM_MATH_CM7) 00826 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00827 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00828 q31_t xa, xb, xc, xd; 00829 q31_t ya, yb, yc, yd; 00830 q31_t xa_out, xb_out, xc_out, xd_out; 00831 q31_t ya_out, yb_out, yc_out, yd_out; 00832 00833 q31_t *ptr1; 00834 q63_t xaya, xbyb, xcyc, xdyd; 00835 00836 /* input is be 1.31(q31) format for all FFT sizes */ 00837 /* Total process is divided into three stages */ 00838 /* process first stage, middle stages, & last stage */ 00839 00840 /* Start of first stage process */ 00841 00842 /* Initializations for the first stage */ 00843 n2 = fftLen; 00844 n1 = n2; 00845 /* n2 = fftLen/4 */ 00846 n2 >>= 2u; 00847 i0 = 0u; 00848 ia1 = 0u; 00849 00850 j = n2; 00851 00852 do 00853 { 00854 00855 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00856 00857 /* index calculation for the input as, */ 00858 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00859 i1 = i0 + n2; 00860 i2 = i1 + n2; 00861 i3 = i2 + n2; 00862 00863 /* Butterfly implementation */ 00864 /* xa + xc */ 00865 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u); 00866 /* xa - xc */ 00867 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00868 00869 /* xb + xd */ 00870 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00871 00872 /* ya + yc */ 00873 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00874 /* ya - yc */ 00875 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00876 00877 /* xa' = xa + xb + xc + xd */ 00878 pSrc[2u * i0] = (r1 + t1); 00879 /* (xa + xc) - (xb + xd) */ 00880 r1 = r1 - t1; 00881 /* yb + yd */ 00882 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00883 /* ya' = ya + yb + yc + yd */ 00884 pSrc[(2u * i0) + 1u] = (s1 + t2); 00885 00886 /* (ya + yc) - (yb + yd) */ 00887 s1 = s1 - t2; 00888 00889 /* yb - yd */ 00890 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00891 /* xb - xd */ 00892 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00893 00894 /* index calculation for the coefficients */ 00895 ia2 = 2u * ia1; 00896 co2 = pCoef[ia2 * 2u]; 00897 si2 = pCoef[(ia2 * 2u) + 1u]; 00898 00899 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00900 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 00901 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00902 00903 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00904 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 00905 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00906 00907 /* (xa - xc) - (yb - yd) */ 00908 r1 = r2 - t1; 00909 /* (xa - xc) + (yb - yd) */ 00910 r2 = r2 + t1; 00911 00912 /* (ya - yc) + (xb - xd) */ 00913 s1 = s2 + t2; 00914 /* (ya - yc) - (xb - xd) */ 00915 s2 = s2 - t2; 00916 00917 co1 = pCoef[ia1 * 2u]; 00918 si1 = pCoef[(ia1 * 2u) + 1u]; 00919 00920 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00921 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00922 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00923 00924 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00925 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00926 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00927 00928 /* index calculation for the coefficients */ 00929 ia3 = 3u * ia1; 00930 co3 = pCoef[ia3 * 2u]; 00931 si3 = pCoef[(ia3 * 2u) + 1u]; 00932 00933 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00934 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00935 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00936 00937 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00938 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00939 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00940 00941 /* Twiddle coefficients index modifier */ 00942 ia1 = ia1 + twidCoefModifier; 00943 00944 /* Updating input index */ 00945 i0 = i0 + 1u; 00946 00947 } while(--j); 00948 00949 /* data is in 5.27(q27) format */ 00950 /* each stage provides two down scaling of the input */ 00951 00952 00953 /* Start of Middle stages process */ 00954 00955 twidCoefModifier <<= 2u; 00956 00957 /* Calculation of second stage to excluding last stage */ 00958 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00959 { 00960 /* Initializations for the first stage */ 00961 n1 = n2; 00962 n2 >>= 2u; 00963 ia1 = 0u; 00964 00965 for (j = 0; j <= (n2 - 1u); j++) 00966 { 00967 /* index calculation for the coefficients */ 00968 ia2 = ia1 + ia1; 00969 ia3 = ia2 + ia1; 00970 co1 = pCoef[ia1 * 2u]; 00971 si1 = pCoef[(ia1 * 2u) + 1u]; 00972 co2 = pCoef[ia2 * 2u]; 00973 si2 = pCoef[(ia2 * 2u) + 1u]; 00974 co3 = pCoef[ia3 * 2u]; 00975 si3 = pCoef[(ia3 * 2u) + 1u]; 00976 /* Twiddle coefficients index modifier */ 00977 ia1 = ia1 + twidCoefModifier; 00978 00979 for (i0 = j; i0 < fftLen; i0 += n1) 00980 { 00981 /* index calculation for the input as, */ 00982 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00983 i1 = i0 + n2; 00984 i2 = i1 + n2; 00985 i3 = i2 + n2; 00986 00987 /* Butterfly implementation */ 00988 /* xa + xc */ 00989 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00990 /* xa - xc */ 00991 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00992 00993 /* ya + yc */ 00994 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00995 /* ya - yc */ 00996 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00997 00998 /* xb + xd */ 00999 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 01000 01001 /* xa' = xa + xb + xc + xd */ 01002 pSrc[2u * i0] = (r1 + t1) >> 2u; 01003 /* xa + xc -(xb + xd) */ 01004 r1 = r1 - t1; 01005 /* yb + yd */ 01006 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 01007 /* ya' = ya + yb + yc + yd */ 01008 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 01009 01010 /* (ya + yc) - (yb + yd) */ 01011 s1 = s1 - t2; 01012 01013 /* (yb - yd) */ 01014 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 01015 /* (xb - xd) */ 01016 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 01017 01018 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01019 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 01020 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 01021 01022 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01023 pSrc[(2u * i1) + 1u] = 01024 (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 01025 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 01026 01027 /* (xa - xc) - (yb - yd) */ 01028 r1 = r2 - t1; 01029 /* (xa - xc) + (yb - yd) */ 01030 r2 = r2 + t1; 01031 01032 /* (ya - yc) + (xb - xd) */ 01033 s1 = s2 + t2; 01034 /* (ya - yc) - (xb - xd) */ 01035 s2 = s2 - t2; 01036 01037 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01038 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01039 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 01040 01041 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01042 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01043 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 01044 01045 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01046 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01047 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 01048 01049 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01050 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01051 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 01052 } 01053 } 01054 twidCoefModifier <<= 2u; 01055 } 01056 #else 01057 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k; 01058 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 01059 q31_t xa, xb, xc, xd; 01060 q31_t ya, yb, yc, yd; 01061 q31_t xa_out, xb_out, xc_out, xd_out; 01062 q31_t ya_out, yb_out, yc_out, yd_out; 01063 01064 q31_t *ptr1; 01065 q31_t *pSi0; 01066 q31_t *pSi1; 01067 q31_t *pSi2; 01068 q31_t *pSi3; 01069 q63_t xaya, xbyb, xcyc, xdyd; 01070 01071 /* input is be 1.31(q31) format for all FFT sizes */ 01072 /* Total process is divided into three stages */ 01073 /* process first stage, middle stages, & last stage */ 01074 01075 /* Start of first stage process */ 01076 01077 /* Initializations for the first stage */ 01078 n2 = fftLen; 01079 n1 = n2; 01080 /* n2 = fftLen/4 */ 01081 n2 >>= 2u; 01082 01083 ia1 = 0u; 01084 01085 j = n2; 01086 01087 pSi0 = pSrc; 01088 pSi1 = pSi0 + 2 * n2; 01089 pSi2 = pSi1 + 2 * n2; 01090 pSi3 = pSi2 + 2 * n2; 01091 01092 do 01093 { 01094 /* Butterfly implementation */ 01095 /* xa + xc */ 01096 r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u); 01097 /* xa - xc */ 01098 r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u); 01099 01100 /* xb + xd */ 01101 t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u); 01102 01103 /* ya + yc */ 01104 s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u); 01105 /* ya - yc */ 01106 s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u); 01107 01108 /* xa' = xa + xb + xc + xd */ 01109 *pSi0++ = (r1 + t1); 01110 /* (xa + xc) - (xb + xd) */ 01111 r1 = r1 - t1; 01112 /* yb + yd */ 01113 t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u); 01114 /* ya' = ya + yb + yc + yd */ 01115 *pSi0++ = (s1 + t2); 01116 01117 /* (ya + yc) - (yb + yd) */ 01118 s1 = s1 - t2; 01119 01120 /* yb - yd */ 01121 t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u); 01122 /* xb - xd */ 01123 t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u); 01124 01125 /* index calculation for the coefficients */ 01126 ia2 = 2u * ia1; 01127 co2 = pCoef[ia2 * 2u]; 01128 si2 = pCoef[(ia2 * 2u) + 1u]; 01129 01130 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01131 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 01132 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 01133 01134 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01135 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 01136 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 01137 01138 /* (xa - xc) - (yb - yd) */ 01139 r1 = r2 - t1; 01140 /* (xa - xc) + (yb - yd) */ 01141 r2 = r2 + t1; 01142 01143 /* (ya - yc) + (xb - xd) */ 01144 s1 = s2 + t2; 01145 /* (ya - yc) - (xb - xd) */ 01146 s2 = s2 - t2; 01147 01148 co1 = pCoef[ia1 * 2u]; 01149 si1 = pCoef[(ia1 * 2u) + 1u]; 01150 01151 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01152 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01153 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 01154 01155 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01156 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01157 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 01158 01159 /* index calculation for the coefficients */ 01160 ia3 = 3u * ia1; 01161 co3 = pCoef[ia3 * 2u]; 01162 si3 = pCoef[(ia3 * 2u) + 1u]; 01163 01164 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01165 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01166 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 01167 01168 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01169 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01170 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 01171 01172 /* Twiddle coefficients index modifier */ 01173 ia1 = ia1 + twidCoefModifier; 01174 01175 } while(--j); 01176 01177 /* data is in 5.27(q27) format */ 01178 /* each stage provides two down scaling of the input */ 01179 01180 01181 /* Start of Middle stages process */ 01182 01183 twidCoefModifier <<= 2u; 01184 01185 /* Calculation of second stage to excluding last stage */ 01186 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01187 { 01188 /* Initializations for the first stage */ 01189 n1 = n2; 01190 n2 >>= 2u; 01191 ia1 = 0u; 01192 01193 for (j = 0; j <= (n2 - 1u); j++) 01194 { 01195 /* index calculation for the coefficients */ 01196 ia2 = ia1 + ia1; 01197 ia3 = ia2 + ia1; 01198 co1 = pCoef[ia1 * 2u]; 01199 si1 = pCoef[(ia1 * 2u) + 1u]; 01200 co2 = pCoef[ia2 * 2u]; 01201 si2 = pCoef[(ia2 * 2u) + 1u]; 01202 co3 = pCoef[ia3 * 2u]; 01203 si3 = pCoef[(ia3 * 2u) + 1u]; 01204 /* Twiddle coefficients index modifier */ 01205 ia1 = ia1 + twidCoefModifier; 01206 01207 pSi0 = pSrc + 2 * j; 01208 pSi1 = pSi0 + 2 * n2; 01209 pSi2 = pSi1 + 2 * n2; 01210 pSi3 = pSi2 + 2 * n2; 01211 01212 for (i0 = j; i0 < fftLen; i0 += n1) 01213 { 01214 /* Butterfly implementation */ 01215 /* xa + xc */ 01216 r1 = pSi0[0] + pSi2[0]; 01217 01218 /* xa - xc */ 01219 r2 = pSi0[0] - pSi2[0]; 01220 01221 01222 /* ya + yc */ 01223 s1 = pSi0[1] + pSi2[1]; 01224 01225 /* ya - yc */ 01226 s2 = pSi0[1] - pSi2[1]; 01227 01228 01229 /* xb + xd */ 01230 t1 = pSi1[0] + pSi3[0]; 01231 01232 01233 /* xa' = xa + xb + xc + xd */ 01234 pSi0[0] = (r1 + t1) >> 2u; 01235 /* xa + xc -(xb + xd) */ 01236 r1 = r1 - t1; 01237 /* yb + yd */ 01238 t2 = pSi1[1] + pSi3[1]; 01239 01240 /* ya' = ya + yb + yc + yd */ 01241 pSi0[1] = (s1 + t2) >> 2u; 01242 pSi0 += 2 * n1; 01243 01244 /* (ya + yc) - (yb + yd) */ 01245 s1 = s1 - t2; 01246 01247 /* (yb - yd) */ 01248 t1 = pSi1[1] - pSi3[1]; 01249 01250 /* (xb - xd) */ 01251 t2 = pSi1[0] - pSi3[0]; 01252 01253 01254 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01255 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 01256 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 01257 01258 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01259 pSi1[1] = 01260 01261 (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 01262 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 01263 pSi1 += 2 * n1; 01264 01265 /* (xa - xc) - (yb - yd) */ 01266 r1 = r2 - t1; 01267 /* (xa - xc) + (yb - yd) */ 01268 r2 = r2 + t1; 01269 01270 /* (ya - yc) + (xb - xd) */ 01271 s1 = s2 + t2; 01272 /* (ya - yc) - (xb - xd) */ 01273 s2 = s2 - t2; 01274 01275 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01276 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01277 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 01278 01279 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01280 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01281 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 01282 pSi2 += 2 * n1; 01283 01284 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01285 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01286 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 01287 01288 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01289 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01290 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 01291 pSi3 += 2 * n1; 01292 } 01293 } 01294 twidCoefModifier <<= 2u; 01295 } 01296 #endif 01297 01298 /* End of Middle stages process */ 01299 01300 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 01301 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 01302 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 01303 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 01304 01305 01306 /* Start of last stage process */ 01307 01308 01309 /* Initializations for the last stage */ 01310 j = fftLen >> 2; 01311 ptr1 = &pSrc[0]; 01312 01313 /* Calculations of last stage */ 01314 do 01315 { 01316 #ifndef ARM_MATH_BIG_ENDIAN 01317 /* Read xa (real), ya(imag) input */ 01318 xaya = *__SIMD64(ptr1)++; 01319 xa = (q31_t) xaya; 01320 ya = (q31_t) (xaya >> 32); 01321 01322 /* Read xb (real), yb(imag) input */ 01323 xbyb = *__SIMD64(ptr1)++; 01324 xb = (q31_t) xbyb; 01325 yb = (q31_t) (xbyb >> 32); 01326 01327 /* Read xc (real), yc(imag) input */ 01328 xcyc = *__SIMD64(ptr1)++; 01329 xc = (q31_t) xcyc; 01330 yc = (q31_t) (xcyc >> 32); 01331 01332 /* Read xc (real), yc(imag) input */ 01333 xdyd = *__SIMD64(ptr1)++; 01334 xd = (q31_t) xdyd; 01335 yd = (q31_t) (xdyd >> 32); 01336 01337 #else 01338 01339 /* Read xa (real), ya(imag) input */ 01340 xaya = *__SIMD64(ptr1)++; 01341 ya = (q31_t) xaya; 01342 xa = (q31_t) (xaya >> 32); 01343 01344 /* Read xb (real), yb(imag) input */ 01345 xbyb = *__SIMD64(ptr1)++; 01346 yb = (q31_t) xbyb; 01347 xb = (q31_t) (xbyb >> 32); 01348 01349 /* Read xc (real), yc(imag) input */ 01350 xcyc = *__SIMD64(ptr1)++; 01351 yc = (q31_t) xcyc; 01352 xc = (q31_t) (xcyc >> 32); 01353 01354 /* Read xc (real), yc(imag) input */ 01355 xdyd = *__SIMD64(ptr1)++; 01356 yd = (q31_t) xdyd; 01357 xd = (q31_t) (xdyd >> 32); 01358 01359 01360 #endif 01361 01362 /* xa' = xa + xb + xc + xd */ 01363 xa_out = xa + xb + xc + xd; 01364 01365 /* ya' = ya + yb + yc + yd */ 01366 ya_out = ya + yb + yc + yd; 01367 01368 /* pointer updation for writing */ 01369 ptr1 = ptr1 - 8u; 01370 01371 /* writing xa' and ya' */ 01372 *ptr1++ = xa_out; 01373 *ptr1++ = ya_out; 01374 01375 xc_out = (xa - xb + xc - xd); 01376 yc_out = (ya - yb + yc - yd); 01377 01378 /* writing xc' and yc' */ 01379 *ptr1++ = xc_out; 01380 *ptr1++ = yc_out; 01381 01382 xb_out = (xa - yb - xc + yd); 01383 yb_out = (ya + xb - yc - xd); 01384 01385 /* writing xb' and yb' */ 01386 *ptr1++ = xb_out; 01387 *ptr1++ = yb_out; 01388 01389 xd_out = (xa + yb - xc - yd); 01390 yd_out = (ya - xb - yc + xd); 01391 01392 /* writing xd' and yd' */ 01393 *ptr1++ = xd_out; 01394 *ptr1++ = yd_out; 01395 01396 } while(--j); 01397 01398 /* output is in 11.21(q21) format for the 1024 point */ 01399 /* output is in 9.23(q23) format for the 256 point */ 01400 /* output is in 7.25(q25) format for the 64 point */ 01401 /* output is in 5.27(q27) format for the 16 point */ 01402 01403 /* End of last stage process */ 01404 }
Generated on Fri Jul 22 2022 04:53:43 by
1.7.2
