CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_cfft_radix4_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q31.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3 00014 * 00015 * Version 1.0.3 2010/11/29 00016 * Re-organized the CMSIS folders and updated documentation. 00017 * 00018 * Version 1.0.2 2010/11/11 00019 * Documentation updated. 00020 * 00021 * Version 1.0.1 2010/10/05 00022 * Production release and review comments incorporated. 00023 * 00024 * Version 1.0.0 2010/09/20 00025 * Production release and review comments incorporated. 00026 * 00027 * Version 0.0.5 2010/04/26 00028 * incorporated review comments and updated with latest CMSIS layer 00029 * 00030 * Version 0.0.3 2010/03/10 00031 * Initial version 00032 * -------------------------------------------------------------------- */ 00033 #include "arm_math.h" 00034 00035 00036 /** 00037 * @ingroup groupTransforms 00038 */ 00039 00040 /** 00041 * @addtogroup CFFT_CIFFT 00042 * @{ 00043 */ 00044 00045 /** 00046 * @details 00047 * @brief Processing function for the Q31 CFFT/CIFFT. 00048 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure. 00049 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 00050 * @return none. 00051 * 00052 * \par Input and output formats: 00053 * \par 00054 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00055 * Hence the output format is different for different FFT sizes. 00056 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00057 * \par 00058 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT" 00059 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT" 00060 * 00061 */ 00062 00063 void arm_cfft_radix4_q31( 00064 const arm_cfft_radix4_instance_q31 * S, 00065 q31_t * pSrc) 00066 { 00067 if(S->ifftFlag == 1u) 00068 { 00069 /* Complex IFFT radix-4 */ 00070 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, 00071 S->twidCoefModifier); 00072 } 00073 else 00074 { 00075 /* Complex FFT radix-4 */ 00076 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, 00077 S->twidCoefModifier); 00078 } 00079 00080 00081 if(S->bitReverseFlag == 1u) 00082 { 00083 /* Bit Reversal */ 00084 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00085 } 00086 00087 } 00088 00089 /** 00090 * @} end of CFFT_CIFFT group 00091 */ 00092 00093 /* 00094 * Radix-4 FFT algorithm used is : 00095 * 00096 * Input real and imaginary data: 00097 * x(n) = xa + j * ya 00098 * x(n+N/4 ) = xb + j * yb 00099 * x(n+N/2 ) = xc + j * yc 00100 * x(n+3N 4) = xd + j * yd 00101 * 00102 * 00103 * Output real and imaginary data: 00104 * x(4r) = xa'+ j * ya' 00105 * x(4r+1) = xb'+ j * yb' 00106 * x(4r+2) = xc'+ j * yc' 00107 * x(4r+3) = xd'+ j * yd' 00108 * 00109 * 00110 * Twiddle factors for radix-4 FFT: 00111 * Wn = co1 + j * (- si1) 00112 * W2n = co2 + j * (- si2) 00113 * W3n = co3 + j * (- si3) 00114 * 00115 * Butterfly implementation: 00116 * xa' = xa + xb + xc + xd 00117 * ya' = ya + yb + yc + yd 00118 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00119 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00120 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00121 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00122 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00123 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00124 * 00125 */ 00126 00127 /** 00128 * @brief Core function for the Q31 CFFT butterfly process. 00129 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00130 * @param[in] fftLen length of the FFT. 00131 * @param[in] *pCoef points to twiddle coefficient buffer. 00132 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00133 * @return none. 00134 */ 00135 00136 void arm_radix4_butterfly_q31( 00137 q31_t * pSrc, 00138 uint32_t fftLen, 00139 q31_t * pCoef, 00140 uint32_t twidCoefModifier) 00141 { 00142 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00143 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00144 00145 00146 /* Total process is divided into three stages */ 00147 00148 /* process first stage, middle stages, & last stage */ 00149 00150 00151 /* start of first stage process */ 00152 00153 /* Initializations for the first stage */ 00154 n2 = fftLen; 00155 n1 = n2; 00156 /* n2 = fftLen/4 */ 00157 n2 >>= 2u; 00158 i0 = 0u; 00159 ia1 = 0u; 00160 00161 j = n2; 00162 00163 /* Calculation of first stage */ 00164 do 00165 { 00166 /* index calculation for the input as, */ 00167 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00168 i1 = i0 + n2; 00169 i2 = i1 + n2; 00170 i3 = i2 + n2; 00171 00172 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00173 00174 /* Butterfly implementation */ 00175 /* xa + xc */ 00176 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u); 00177 /* xa - xc */ 00178 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00179 00180 /* ya + yc */ 00181 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00182 /* ya - yc */ 00183 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00184 00185 /* xb + xd */ 00186 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00187 00188 /* xa' = xa + xb + xc + xd */ 00189 pSrc[2u * i0] = (r1 + t1); 00190 /* (xa + xc) - (xb + xd) */ 00191 r1 = r1 - t1; 00192 /* yb + yd */ 00193 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00194 /* ya' = ya + yb + yc + yd */ 00195 pSrc[(2u * i0) + 1u] = (s1 + t2); 00196 00197 /* (ya + yc) - (yb + yd) */ 00198 s1 = s1 - t2; 00199 00200 /* yb - yd */ 00201 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00202 /* xb - xd */ 00203 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00204 00205 /* index calculation for the coefficients */ 00206 ia2 = 2u * ia1; 00207 co2 = pCoef[ia2 * 2u]; 00208 si2 = pCoef[(ia2 * 2u) + 1u]; 00209 00210 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00211 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00212 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00213 00214 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00215 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00216 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00217 00218 /* (xa - xc) + (yb - yd) */ 00219 r1 = r2 + t1; 00220 /* (xa - xc) - (yb - yd) */ 00221 r2 = r2 - t1; 00222 00223 /* (ya - yc) - (xb - xd) */ 00224 s1 = s2 - t2; 00225 /* (ya - yc) + (xb - xd) */ 00226 s2 = s2 + t2; 00227 00228 co1 = pCoef[ia1 * 2u]; 00229 si1 = pCoef[(ia1 * 2u) + 1u]; 00230 00231 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00232 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00233 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00234 00235 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00236 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00237 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00238 00239 /* index calculation for the coefficients */ 00240 ia3 = 3u * ia1; 00241 co3 = pCoef[ia3 * 2u]; 00242 si3 = pCoef[(ia3 * 2u) + 1u]; 00243 00244 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00245 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00246 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00247 00248 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00249 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00250 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00251 00252 /* Twiddle coefficients index modifier */ 00253 ia1 = ia1 + twidCoefModifier; 00254 00255 /* Updating input index */ 00256 i0 = i0 + 1u; 00257 00258 } while(--j); 00259 00260 /* end of first stage process */ 00261 00262 /* data is in 5.27(q27) format */ 00263 00264 00265 /* start of Middle stages process */ 00266 00267 00268 /* each stage in middle stages provides two down scaling of the input */ 00269 00270 twidCoefModifier <<= 2u; 00271 00272 00273 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00274 { 00275 /* Initializations for the first stage */ 00276 n1 = n2; 00277 n2 >>= 2u; 00278 ia1 = 0u; 00279 00280 /* Calculation of first stage */ 00281 for (j = 0u; j <= (n2 - 1u); j++) 00282 { 00283 /* index calculation for the coefficients */ 00284 ia2 = ia1 + ia1; 00285 ia3 = ia2 + ia1; 00286 co1 = pCoef[ia1 * 2u]; 00287 si1 = pCoef[(ia1 * 2u) + 1u]; 00288 co2 = pCoef[ia2 * 2u]; 00289 si2 = pCoef[(ia2 * 2u) + 1u]; 00290 co3 = pCoef[ia3 * 2u]; 00291 si3 = pCoef[(ia3 * 2u) + 1u]; 00292 /* Twiddle coefficients index modifier */ 00293 ia1 = ia1 + twidCoefModifier; 00294 00295 for (i0 = j; i0 < fftLen; i0 += n1) 00296 { 00297 /* index calculation for the input as, */ 00298 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00299 i1 = i0 + n2; 00300 i2 = i1 + n2; 00301 i3 = i2 + n2; 00302 00303 /* Butterfly implementation */ 00304 /* xa + xc */ 00305 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00306 /* xa - xc */ 00307 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00308 00309 /* ya + yc */ 00310 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00311 /* ya - yc */ 00312 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00313 00314 /* xb + xd */ 00315 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00316 00317 /* xa' = xa + xb + xc + xd */ 00318 pSrc[2u * i0] = (r1 + t1) >> 2u; 00319 /* xa + xc -(xb + xd) */ 00320 r1 = r1 - t1; 00321 00322 /* yb + yd */ 00323 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00324 /* ya' = ya + yb + yc + yd */ 00325 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00326 00327 /* (ya + yc) - (yb + yd) */ 00328 s1 = s1 - t2; 00329 00330 /* (yb - yd) */ 00331 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00332 /* (xb - xd) */ 00333 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00334 00335 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00336 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00337 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 00338 00339 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00340 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00341 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 00342 00343 /* (xa - xc) + (yb - yd) */ 00344 r1 = r2 + t1; 00345 /* (xa - xc) - (yb - yd) */ 00346 r2 = r2 - t1; 00347 00348 /* (ya - yc) - (xb - xd) */ 00349 s1 = s2 - t2; 00350 /* (ya - yc) + (xb - xd) */ 00351 s2 = s2 + t2; 00352 00353 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00354 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00355 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00356 00357 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00358 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00359 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00360 00361 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00362 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00363 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00364 00365 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00366 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00367 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00368 } 00369 } 00370 twidCoefModifier <<= 2u; 00371 } 00372 00373 /* End of Middle stages process */ 00374 00375 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00376 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00377 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00378 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00379 00380 00381 /* start of Last stage process */ 00382 00383 /* Initializations of last stage */ 00384 n1 = n2; 00385 n2 >>= 2u; 00386 00387 /* Calculations of last stage */ 00388 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00389 { 00390 /* index calculation for the input as, */ 00391 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00392 i1 = i0 + n2; 00393 i2 = i1 + n2; 00394 i3 = i2 + n2; 00395 00396 /* Butterfly implementation */ 00397 /* xa + xb */ 00398 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00399 /* xa - xb */ 00400 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00401 00402 /* ya + yc */ 00403 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00404 /* ya - yc */ 00405 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00406 00407 /* xc + xd */ 00408 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00409 /* xa' = xa + xb + xc + xd */ 00410 pSrc[2u * i0] = (r1 + t1); 00411 /* (xa + xb) - (xc + xd) */ 00412 r1 = r1 - t1; 00413 00414 /* yb + yd */ 00415 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00416 /* ya' = ya + yb + yc + yd */ 00417 pSrc[(2u * i0) + 1u] = (s1 + t2); 00418 /* (ya + yc) - (yb + yd) */ 00419 s1 = s1 - t2; 00420 00421 /* (yb-yd) */ 00422 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00423 /* (xb-xd) */ 00424 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00425 00426 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00427 pSrc[2u * i1] = r1; 00428 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00429 pSrc[(2u * i1) + 1u] = s1; 00430 00431 /* (xa+yb-xc-yd) */ 00432 r1 = r2 + t1; 00433 /* (xa-yb-xc+yd) */ 00434 r2 = r2 - t1; 00435 00436 /* (ya-xb-yc+xd) */ 00437 s1 = s2 - t2; 00438 /* (ya+xb-yc-xd) */ 00439 s2 = s2 + t2; 00440 00441 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00442 pSrc[2u * i2] = r1; 00443 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00444 pSrc[(2u * i2) + 1u] = s1; 00445 00446 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00447 pSrc[2u * i3] = r2; 00448 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00449 pSrc[(2u * i3) + 1u] = s2; 00450 00451 00452 } 00453 00454 /* output is in 11.21(q21) format for the 1024 point */ 00455 /* output is in 9.23(q23) format for the 256 point */ 00456 /* output is in 7.25(q25) format for the 64 point */ 00457 /* output is in 5.27(q27) format for the 16 point */ 00458 00459 /* End of last stage process */ 00460 00461 } 00462 00463 00464 /** 00465 * @brief Core function for the Q31 CIFFT butterfly process. 00466 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00467 * @param[in] fftLen length of the FFT. 00468 * @param[in] *pCoef points to twiddle coefficient buffer. 00469 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00470 * @return none. 00471 */ 00472 00473 00474 /* 00475 * Radix-4 IFFT algorithm used is : 00476 * 00477 * CIFFT uses same twiddle coefficients as CFFT Function 00478 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 00479 * 00480 * 00481 * IFFT is implemented with following changes in equations from FFT 00482 * 00483 * Input real and imaginary data: 00484 * x(n) = xa + j * ya 00485 * x(n+N/4 ) = xb + j * yb 00486 * x(n+N/2 ) = xc + j * yc 00487 * x(n+3N 4) = xd + j * yd 00488 * 00489 * 00490 * Output real and imaginary data: 00491 * x(4r) = xa'+ j * ya' 00492 * x(4r+1) = xb'+ j * yb' 00493 * x(4r+2) = xc'+ j * yc' 00494 * x(4r+3) = xd'+ j * yd' 00495 * 00496 * 00497 * Twiddle factors for radix-4 IFFT: 00498 * Wn = co1 + j * (si1) 00499 * W2n = co2 + j * (si2) 00500 * W3n = co3 + j * (si3) 00501 00502 * The real and imaginary output values for the radix-4 butterfly are 00503 * xa' = xa + xb + xc + xd 00504 * ya' = ya + yb + yc + yd 00505 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 00506 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 00507 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 00508 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 00509 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 00510 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 00511 * 00512 */ 00513 00514 void arm_radix4_butterfly_inverse_q31( 00515 q31_t * pSrc, 00516 uint32_t fftLen, 00517 q31_t * pCoef, 00518 uint32_t twidCoefModifier) 00519 { 00520 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00521 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00522 00523 /* input is be 1.31(q31) format for all FFT sizes */ 00524 /* Total process is divided into three stages */ 00525 /* process first stage, middle stages, & last stage */ 00526 00527 /* Start of first stage process */ 00528 00529 /* Initializations for the first stage */ 00530 n2 = fftLen; 00531 n1 = n2; 00532 /* n2 = fftLen/4 */ 00533 n2 >>= 2u; 00534 i0 = 0u; 00535 ia1 = 0u; 00536 00537 j = n2; 00538 00539 do 00540 { 00541 00542 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00543 00544 /* index calculation for the input as, */ 00545 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00546 i1 = i0 + n2; 00547 i2 = i1 + n2; 00548 i3 = i2 + n2; 00549 00550 /* Butterfly implementation */ 00551 /* xa + xc */ 00552 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u); 00553 /* xa - xc */ 00554 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00555 00556 /* ya + yc */ 00557 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00558 /* ya - yc */ 00559 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00560 00561 /* xb + xd */ 00562 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00563 00564 /* xa' = xa + xb + xc + xd */ 00565 pSrc[2u * i0] = (r1 + t1); 00566 /* (xa + xc) - (xb + xd) */ 00567 r1 = r1 - t1; 00568 /* yb + yd */ 00569 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00570 /* ya' = ya + yb + yc + yd */ 00571 pSrc[(2u * i0) + 1u] = (s1 + t2); 00572 00573 /* (ya + yc) - (yb + yd) */ 00574 s1 = s1 - t2; 00575 00576 /* yb - yd */ 00577 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00578 /* xb - xd */ 00579 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00580 00581 /* index calculation for the coefficients */ 00582 ia2 = 2u * ia1; 00583 co2 = pCoef[ia2 * 2u]; 00584 si2 = pCoef[(ia2 * 2u) + 1u]; 00585 00586 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00587 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 00588 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00589 00590 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00591 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 00592 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00593 00594 /* (xa - xc) - (yb - yd) */ 00595 r1 = r2 - t1; 00596 /* (xa - xc) + (yb - yd) */ 00597 r2 = r2 + t1; 00598 00599 /* (ya - yc) + (xb - xd) */ 00600 s1 = s2 + t2; 00601 /* (ya - yc) - (xb - xd) */ 00602 s2 = s2 - t2; 00603 00604 co1 = pCoef[ia1 * 2u]; 00605 si1 = pCoef[(ia1 * 2u) + 1u]; 00606 00607 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00608 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00609 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00610 00611 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00612 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00613 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00614 00615 /* index calculation for the coefficients */ 00616 ia3 = 3u * ia1; 00617 co3 = pCoef[ia3 * 2u]; 00618 si3 = pCoef[(ia3 * 2u) + 1u]; 00619 00620 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00621 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00622 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00623 00624 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00625 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00626 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00627 00628 /* Twiddle coefficients index modifier */ 00629 ia1 = ia1 + twidCoefModifier; 00630 00631 /* Updating input index */ 00632 i0 = i0 + 1u; 00633 00634 } while(--j); 00635 00636 /* data is in 5.27(q27) format */ 00637 /* each stage provides two down scaling of the input */ 00638 00639 00640 /* Start of Middle stages process */ 00641 00642 twidCoefModifier <<= 2u; 00643 00644 /* Calculation of second stage to excluding last stage */ 00645 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00646 { 00647 /* Initializations for the first stage */ 00648 n1 = n2; 00649 n2 >>= 2u; 00650 ia1 = 0u; 00651 00652 for (j = 0; j <= (n2 - 1u); j++) 00653 { 00654 /* index calculation for the coefficients */ 00655 ia2 = ia1 + ia1; 00656 ia3 = ia2 + ia1; 00657 co1 = pCoef[ia1 * 2u]; 00658 si1 = pCoef[(ia1 * 2u) + 1u]; 00659 co2 = pCoef[ia2 * 2u]; 00660 si2 = pCoef[(ia2 * 2u) + 1u]; 00661 co3 = pCoef[ia3 * 2u]; 00662 si3 = pCoef[(ia3 * 2u) + 1u]; 00663 /* Twiddle coefficients index modifier */ 00664 ia1 = ia1 + twidCoefModifier; 00665 00666 for (i0 = j; i0 < fftLen; i0 += n1) 00667 { 00668 /* index calculation for the input as, */ 00669 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00670 i1 = i0 + n2; 00671 i2 = i1 + n2; 00672 i3 = i2 + n2; 00673 00674 /* Butterfly implementation */ 00675 /* xa + xc */ 00676 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00677 /* xa - xc */ 00678 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00679 00680 /* ya + yc */ 00681 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00682 /* ya - yc */ 00683 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00684 00685 /* xb + xd */ 00686 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00687 00688 /* xa' = xa + xb + xc + xd */ 00689 pSrc[2u * i0] = (r1 + t1) >> 2u; 00690 /* xa + xc -(xb + xd) */ 00691 r1 = r1 - t1; 00692 /* yb + yd */ 00693 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00694 /* ya' = ya + yb + yc + yd */ 00695 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00696 00697 /* (ya + yc) - (yb + yd) */ 00698 s1 = s1 - t2; 00699 00700 /* (yb - yd) */ 00701 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00702 /* (xb - xd) */ 00703 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00704 00705 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00706 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 00707 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 00708 00709 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00710 pSrc[(2u * i1) + 1u] = 00711 (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 00712 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 00713 00714 /* (xa - xc) - (yb - yd) */ 00715 r1 = r2 - t1; 00716 /* (xa - xc) + (yb - yd) */ 00717 r2 = r2 + t1; 00718 00719 /* (ya - yc) + (xb - xd) */ 00720 s1 = s2 + t2; 00721 /* (ya - yc) - (xb - xd) */ 00722 s2 = s2 - t2; 00723 00724 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00725 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00726 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00727 00728 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00729 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00730 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00731 00732 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00733 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00734 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00735 00736 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00737 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00738 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00739 } 00740 } 00741 twidCoefModifier <<= 2u; 00742 } 00743 00744 /* End of Middle stages process */ 00745 00746 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00747 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00748 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00749 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00750 00751 00752 /* Start of last stage process */ 00753 00754 00755 /* Initializations of last stage */ 00756 n1 = n2; 00757 n2 >>= 2u; 00758 00759 /* Calculations of last stage */ 00760 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00761 { 00762 /* index calculation for the input as, */ 00763 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00764 i1 = i0 + n2; 00765 i2 = i1 + n2; 00766 i3 = i2 + n2; 00767 00768 /* Butterfly implementation */ 00769 /* xa + xc */ 00770 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00771 /* xa - xc */ 00772 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00773 00774 /* ya + yc */ 00775 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00776 /* ya - yc */ 00777 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00778 00779 /* xc + xd */ 00780 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00781 /* xa' = xa + xb + xc + xd */ 00782 pSrc[2u * i0] = (r1 + t1); 00783 /* (xa + xb) - (xc + xd) */ 00784 r1 = r1 - t1; 00785 00786 /* yb + yd */ 00787 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00788 /* ya' = ya + yb + yc + yd */ 00789 pSrc[(2u * i0) + 1u] = (s1 + t2); 00790 /* (ya + yc) - (yb + yd) */ 00791 s1 = s1 - t2; 00792 00793 /* (yb-yd) */ 00794 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00795 /* (xb-xd) */ 00796 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00797 00798 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00799 pSrc[2u * i1] = r1; 00800 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00801 pSrc[(2u * i1) + 1u] = s1; 00802 00803 /* (xa - xc) - (yb-yd) */ 00804 r1 = r2 - t1; 00805 00806 /* (xa - xc) + (yb-yd) */ 00807 r2 = r2 + t1; 00808 00809 /* (ya - yc) + (xb-xd) */ 00810 s1 = s2 + t2; 00811 00812 /* (ya - yc) - (xb-xd) */ 00813 s2 = s2 - t2; 00814 00815 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00816 pSrc[2u * i2] = r1; 00817 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00818 pSrc[(2u * i2) + 1u] = s1; 00819 00820 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00821 pSrc[2u * i3] = r2; 00822 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00823 pSrc[(2u * i3) + 1u] = s2; 00824 00825 } 00826 00827 /* output is in 11.21(q21) format for the 1024 point */ 00828 /* output is in 9.23(q23) format for the 256 point */ 00829 /* output is in 7.25(q25) format for the 64 point */ 00830 /* output is in 5.27(q27) format for the 16 point */ 00831 00832 /* End of last stage process */ 00833 } 00834 00835 00836 /* 00837 * @brief In-place bit reversal function. 00838 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00839 * @param[in] fftLen length of the FFT. 00840 * @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table 00841 * @param[in] *pBitRevTab points to bit reversal table. 00842 * @return none. 00843 */ 00844 00845 void arm_bitreversal_q31( 00846 q31_t * pSrc, 00847 uint32_t fftLen, 00848 uint16_t bitRevFactor, 00849 uint16_t * pBitRevTable) 00850 { 00851 uint32_t fftLenBy2, fftLenBy2p1, i, j; 00852 q31_t in; 00853 00854 /* Initializations */ 00855 j = 0u; 00856 fftLenBy2 = fftLen / 2u; 00857 fftLenBy2p1 = (fftLen / 2u) + 1u; 00858 00859 /* Bit Reversal Implementation */ 00860 for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u) 00861 { 00862 if(i < j) 00863 { 00864 /* pSrc[i] <-> pSrc[j]; */ 00865 in = pSrc[2u * i]; 00866 pSrc[2u * i] = pSrc[2u * j]; 00867 pSrc[2u * j] = in; 00868 00869 /* pSrc[i+1u] <-> pSrc[j+1u] */ 00870 in = pSrc[(2u * i) + 1u]; 00871 pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u]; 00872 pSrc[(2u * j) + 1u] = in; 00873 00874 /* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */ 00875 in = pSrc[2u * (i + fftLenBy2p1)]; 00876 pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)]; 00877 pSrc[2u * (j + fftLenBy2p1)] = in; 00878 00879 /* pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */ 00880 in = pSrc[(2u * (i + fftLenBy2p1)) + 1u]; 00881 pSrc[(2u * (i + fftLenBy2p1)) + 1u] = 00882 pSrc[(2u * (j + fftLenBy2p1)) + 1u]; 00883 pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in; 00884 00885 } 00886 00887 /* pSrc[i+1u] <-> pSrc[j+1u] */ 00888 in = pSrc[2u * (i + 1u)]; 00889 pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)]; 00890 pSrc[2u * (j + fftLenBy2)] = in; 00891 00892 /* pSrc[i+2u] <-> pSrc[j+2u] */ 00893 in = pSrc[(2u * (i + 1u)) + 1u]; 00894 pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u]; 00895 pSrc[(2u * (j + fftLenBy2)) + 1u] = in; 00896 00897 /* Reading the index for the bit reversal */ 00898 j = *pBitRevTable; 00899 00900 /* Updating the bit reversal index depending on the fft length */ 00901 pBitRevTable += bitRevFactor; 00902 } 00903 }
Generated on Tue Jul 12 2022 14:13:52 by 1.7.2