CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details
Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more
arm_cfft_radix4_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q15.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3 00014 * 00015 * Version 1.0.3 2010/11/29 00016 * Re-organized the CMSIS folders and updated documentation. 00017 * 00018 * Version 1.0.2 2010/11/11 00019 * Documentation updated. 00020 * 00021 * Version 1.0.1 2010/10/05 00022 * Production release and review comments incorporated. 00023 * 00024 * Version 1.0.0 2010/09/20 00025 * Production release and review comments incorporated. 00026 * 00027 * Version 0.0.5 2010/04/26 00028 * incorporated review comments and updated with latest CMSIS layer 00029 * 00030 * Version 0.0.3 2010/03/10 00031 * Initial version 00032 * -------------------------------------------------------------------- */ 00033 00034 #include "arm_math.h" 00035 00036 /** 00037 * @ingroup groupTransforms 00038 */ 00039 00040 /** 00041 * @addtogroup CFFT_CIFFT 00042 * @{ 00043 */ 00044 00045 00046 /** 00047 * @details 00048 * @brief Processing function for the Q15 CFFT/CIFFT. 00049 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure. 00050 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place. 00051 * @return none. 00052 * 00053 * \par Input and output formats: 00054 * \par 00055 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00056 * Hence the output format is different for different FFT sizes. 00057 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00058 * \par 00059 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT" 00060 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT" 00061 */ 00062 00063 void arm_cfft_radix4_q15( 00064 const arm_cfft_radix4_instance_q15 * S, 00065 q15_t * pSrc) 00066 { 00067 if(S->ifftFlag == 1u) 00068 { 00069 /* Complex IFFT radix-4 */ 00070 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, 00071 S->twidCoefModifier); 00072 } 00073 else 00074 { 00075 /* Complex FFT radix-4 */ 00076 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, 00077 S->twidCoefModifier); 00078 } 00079 00080 if(S->bitReverseFlag == 1u) 00081 { 00082 /* Bit Reversal */ 00083 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00084 } 00085 00086 } 00087 00088 /** 00089 * @} end of CFFT_CIFFT group 00090 */ 00091 00092 /* 00093 * Radix-4 FFT algorithm used is : 00094 * 00095 * Input real and imaginary data: 00096 * x(n) = xa + j * ya 00097 * x(n+N/4 ) = xb + j * yb 00098 * x(n+N/2 ) = xc + j * yc 00099 * x(n+3N 4) = xd + j * yd 00100 * 00101 * 00102 * Output real and imaginary data: 00103 * x(4r) = xa'+ j * ya' 00104 * x(4r+1) = xb'+ j * yb' 00105 * x(4r+2) = xc'+ j * yc' 00106 * x(4r+3) = xd'+ j * yd' 00107 * 00108 * 00109 * Twiddle factors for radix-4 FFT: 00110 * Wn = co1 + j * (- si1) 00111 * W2n = co2 + j * (- si2) 00112 * W3n = co3 + j * (- si3) 00113 00114 * The real and imaginary output values for the radix-4 butterfly are 00115 * xa' = xa + xb + xc + xd 00116 * ya' = ya + yb + yc + yd 00117 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00118 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00119 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00120 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00121 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00122 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00123 * 00124 */ 00125 00126 /** 00127 * @brief Core function for the Q15 CFFT butterfly process. 00128 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 00129 * @param[in] fftLen length of the FFT. 00130 * @param[in] *pCoef16 points to twiddle coefficient buffer. 00131 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00132 * @return none. 00133 */ 00134 00135 void arm_radix4_butterfly_q15( 00136 q15_t * pSrc16, 00137 uint32_t fftLen, 00138 q15_t * pCoef16, 00139 uint32_t twidCoefModifier) 00140 { 00141 q31_t R, S, T, U; 00142 q31_t C1, C2, C3, out1, out2; 00143 q31_t *pSrc, *pCoeff; 00144 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00145 q15_t in; 00146 00147 /* Total process is divided into three stages */ 00148 00149 /* process first stage, middle stages, & last stage */ 00150 00151 /* pointer initializations for SIMD calculations */ 00152 pSrc = (q31_t *) pSrc16; 00153 pCoeff = (q31_t *) pCoef16; 00154 00155 /* Initializations for the first stage */ 00156 n2 = fftLen; 00157 n1 = n2; 00158 00159 /* n2 = fftLen/4 */ 00160 n2 >>= 2u; 00161 00162 /* Index for twiddle coefficient */ 00163 ic = 0u; 00164 00165 /* Index for input read and output write */ 00166 i0 = 0u; 00167 j = n2; 00168 00169 /* Input is in 1.15(q15) format */ 00170 00171 /* start of first stage process */ 00172 do 00173 { 00174 /* Butterfly implementation */ 00175 00176 /* index calculation for the input as, */ 00177 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00178 i1 = i0 + n2; 00179 i2 = i1 + n2; 00180 i3 = i2 + n2; 00181 00182 /* Reading i0, i0+fftLen/2 inputs */ 00183 /* Read ya (real), xa(imag) input */ 00184 T = pSrc[i0]; 00185 in = ((int16_t) (T & 0xFFFF)) >> 2; 00186 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00187 /* Read yc (real), xc(imag) input */ 00188 S = pSrc[i2]; 00189 in = ((int16_t) (S & 0xFFFF)) >> 2; 00190 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00191 /* R = packed((ya + yc), (xa + xc) ) */ 00192 R = __QADD16(T, S); 00193 /* S = packed((ya - yc), (xa - xc) ) */ 00194 S = __QSUB16(T, S); 00195 00196 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00197 /* Read yb (real), xb(imag) input */ 00198 T = pSrc[i1]; 00199 in = ((int16_t) (T & 0xFFFF)) >> 2; 00200 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00201 /* Read yd (real), xd(imag) input */ 00202 U = pSrc[i3]; 00203 in = ((int16_t) (U & 0xFFFF)) >> 2; 00204 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00205 /* T = packed((yb + yd), (xb + xd) ) */ 00206 T = __QADD16(T, U); 00207 00208 /* writing the butterfly processed i0 sample */ 00209 /* xa' = xa + xb + xc + xd */ 00210 /* ya' = ya + yb + yc + yd */ 00211 pSrc[i0] = __SHADD16(R, T); 00212 00213 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 00214 R = __QSUB16(R, T); 00215 00216 /* co2 & si2 are read from SIMD Coefficient pointer */ 00217 C2 = pCoeff[2u * ic]; 00218 00219 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00220 out1 = __SMUAD(C2, R) >> 16u; 00221 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00222 out2 = __SMUSDX(C2, R); 00223 00224 /* Reading i0+fftLen/4 */ 00225 /* T = packed(yb, xb) */ 00226 T = pSrc[i1]; 00227 in = ((int16_t) (T & 0xFFFF)) >> 2; 00228 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00229 00230 /* writing the butterfly processed i0 + fftLen/4 sample */ 00231 /* writing output(xc', yc') in little endian format */ 00232 pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00233 00234 /* Butterfly calculations */ 00235 /* U = packed(yd, xd) */ 00236 U = pSrc[i3]; 00237 in = ((int16_t) (U & 0xFFFF)) >> 2; 00238 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00239 /* T = packed(yb-yd, xb-xd) */ 00240 T = __QSUB16(T, U); 00241 00242 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00243 R = __QASX(S, T); 00244 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00245 S = __QSAX(S, T); 00246 00247 /* co1 & si1 are read from SIMD Coefficient pointer */ 00248 C1 = pCoeff[ic]; 00249 /* Butterfly process for the i0+fftLen/2 sample */ 00250 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00251 out1 = __SMUAD(C1, S) >> 16u; 00252 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00253 out2 = __SMUSDX(C1, S); 00254 /* writing output(xb', yb') in little endian format */ 00255 pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 00256 00257 00258 /* co3 & si3 are read from SIMD Coefficient pointer */ 00259 C3 = pCoeff[3u * ic]; 00260 /* Butterfly process for the i0+3fftLen/4 sample */ 00261 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00262 out1 = __SMUAD(C3, R) >> 16u; 00263 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00264 out2 = __SMUSDX(C3, R); 00265 /* writing output(xd', yd') in little endian format */ 00266 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00267 00268 /* Twiddle coefficients index modifier */ 00269 ic = ic + twidCoefModifier; 00270 00271 /* Updating input index */ 00272 i0 = i0 + 1u; 00273 00274 } while(--j); 00275 /* data is in 4.11(q11) format */ 00276 00277 /* end of first stage process */ 00278 00279 00280 /* start of middle stage process */ 00281 00282 /* Twiddle coefficients index modifier */ 00283 twidCoefModifier <<= 2u; 00284 00285 /* Calculation of Middle stage */ 00286 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00287 { 00288 /* Initializations for the middle stage */ 00289 n1 = n2; 00290 n2 >>= 2u; 00291 ic = 0u; 00292 00293 for (j = 0u; j <= (n2 - 1u); j++) 00294 { 00295 /* index calculation for the coefficients */ 00296 C1 = pCoeff[ic]; 00297 C2 = pCoeff[2u * ic]; 00298 C3 = pCoeff[3u * ic]; 00299 00300 /* Twiddle coefficients index modifier */ 00301 ic = ic + twidCoefModifier; 00302 00303 /* Butterfly implementation */ 00304 for (i0 = j; i0 < fftLen; i0 += n1) 00305 { 00306 /* index calculation for the input as, */ 00307 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00308 i1 = i0 + n2; 00309 i2 = i1 + n2; 00310 i3 = i2 + n2; 00311 00312 /* Reading i0, i0+fftLen/2 inputs */ 00313 /* Read ya (real), xa(imag) input */ 00314 T = pSrc[i0]; 00315 00316 /* Read yc (real), xc(imag) input */ 00317 S = pSrc[i2]; 00318 00319 /* R = packed( (ya + yc), (xa + xc)) */ 00320 R = __QADD16(T, S); 00321 00322 /* S = packed((ya - yc), (xa - xc)) */ 00323 S = __QSUB16(T, S); 00324 00325 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00326 /* Read yb (real), xb(imag) input */ 00327 T = pSrc[i1]; 00328 00329 /* Read yd (real), xd(imag) input */ 00330 U = pSrc[i3]; 00331 00332 00333 /* T = packed( (yb + yd), (xb + xd)) */ 00334 T = __QADD16(T, U); 00335 00336 00337 /* writing the butterfly processed i0 sample */ 00338 00339 /* xa' = xa + xb + xc + xd */ 00340 /* ya' = ya + yb + yc + yd */ 00341 out1 = __SHADD16(R, T); 00342 in = ((int16_t) (out1 & 0xFFFF)) >> 1; 00343 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 00344 pSrc[i0] = out1; 00345 00346 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00347 R = __SHSUB16(R, T); 00348 00349 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00350 out1 = __SMUAD(C2, R) >> 16u; 00351 00352 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00353 out2 = __SMUSDX(C2, R); 00354 00355 /* Reading i0+3fftLen/4 */ 00356 /* Read yb (real), xb(imag) input */ 00357 T = pSrc[i1]; 00358 00359 /* writing the butterfly processed i0 + fftLen/4 sample */ 00360 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00361 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00362 pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00363 00364 /* Butterfly calculations */ 00365 00366 /* Read yd (real), xd(imag) input */ 00367 U = pSrc[i3]; 00368 00369 /* T = packed(yb-yd, xb-xd) */ 00370 T = __QSUB16(T, U); 00371 00372 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00373 R = __SHASX(S, T); 00374 00375 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00376 S = __SHSAX(S, T); 00377 00378 00379 /* Butterfly process for the i0+fftLen/2 sample */ 00380 out1 = __SMUAD(C1, S) >> 16u; 00381 out2 = __SMUSDX(C1, S); 00382 00383 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00384 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00385 pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00386 00387 /* Butterfly process for the i0+3fftLen/4 sample */ 00388 out1 = __SMUAD(C3, R) >> 16u; 00389 out2 = __SMUSDX(C3, R); 00390 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00391 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00392 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00393 } 00394 } 00395 /* Twiddle coefficients index modifier */ 00396 twidCoefModifier <<= 2u; 00397 } 00398 /* end of middle stage process */ 00399 00400 00401 /* data is in 10.6(q6) format for the 1024 point */ 00402 /* data is in 8.8(q8) format for the 256 point */ 00403 /* data is in 6.10(q10) format for the 64 point */ 00404 /* data is in 4.12(q12) format for the 16 point */ 00405 00406 /* Initializations for the last stage */ 00407 n1 = n2; 00408 n2 >>= 2u; 00409 00410 /* start of last stage process */ 00411 00412 /* Butterfly implementation */ 00413 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00414 { 00415 /* index calculation for the input as, */ 00416 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00417 i1 = i0 + n2; 00418 i2 = i1 + n2; 00419 i3 = i2 + n2; 00420 00421 /* Reading i0, i0+fftLen/2 inputs */ 00422 /* Read ya (real), xa(imag) input */ 00423 T = pSrc[i0]; 00424 /* Read yc (real), xc(imag) input */ 00425 S = pSrc[i2]; 00426 00427 /* R = packed((ya + yc), (xa + xc)) */ 00428 R = __QADD16(T, S); 00429 /* S = packed((ya - yc), (xa - xc)) */ 00430 S = __QSUB16(T, S); 00431 00432 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00433 /* Read yb (real), xb(imag) input */ 00434 T = pSrc[i1]; 00435 /* Read yd (real), xd(imag) input */ 00436 U = pSrc[i3]; 00437 00438 /* T = packed((yb + yd), (xb + xd)) */ 00439 T = __QADD16(T, U); 00440 00441 /* writing the butterfly processed i0 sample */ 00442 /* xa' = xa + xb + xc + xd */ 00443 /* ya' = ya + yb + yc + yd */ 00444 pSrc[i0] = __SHADD16(R, T); 00445 00446 /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00447 R = __SHSUB16(R, T); 00448 00449 /* Read yb (real), xb(imag) input */ 00450 T = pSrc[i1]; 00451 00452 /* writing the butterfly processed i0 + fftLen/4 sample */ 00453 /* xc' = (xa-xb+xc-xd) */ 00454 /* yc' = (ya-yb+yc-yd) */ 00455 pSrc[i1] = R; 00456 00457 /* Read yd (real), xd(imag) input */ 00458 U = pSrc[i3]; 00459 /* T = packed( (yb - yd), (xb - xd)) */ 00460 T = __QSUB16(T, U); 00461 00462 /* writing the butterfly processed i0 + fftLen/2 sample */ 00463 /* xb' = (xa+yb-xc-yd) */ 00464 /* yb' = (ya-xb-yc+xd) */ 00465 pSrc[i2] = __SHSAX(S, T); 00466 00467 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 00468 /* xd' = (xa-yb-xc+yd) */ 00469 /* yd' = (ya+xb-yc-xd) */ 00470 pSrc[i3] = __SHASX(S, T); 00471 00472 } 00473 00474 /* end of last stage process */ 00475 00476 /* output is in 11.5(q5) format for the 1024 point */ 00477 /* output is in 9.7(q7) format for the 256 point */ 00478 /* output is in 7.9(q9) format for the 64 point */ 00479 /* output is in 5.11(q11) format for the 16 point */ 00480 00481 } 00482 00483 00484 /** 00485 * @brief Core function for the Q15 CIFFT butterfly process. 00486 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 00487 * @param[in] fftLen length of the FFT. 00488 * @param[in] *pCoef16 points to twiddle coefficient buffer. 00489 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00490 * @return none. 00491 */ 00492 00493 /* 00494 * Radix-4 IFFT algorithm used is : 00495 * 00496 * CIFFT uses same twiddle coefficients as CFFT function 00497 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 00498 * 00499 * 00500 * IFFT is implemented with following changes in equations from FFT 00501 * 00502 * Input real and imaginary data: 00503 * x(n) = xa + j * ya 00504 * x(n+N/4 ) = xb + j * yb 00505 * x(n+N/2 ) = xc + j * yc 00506 * x(n+3N 4) = xd + j * yd 00507 * 00508 * 00509 * Output real and imaginary data: 00510 * x(4r) = xa'+ j * ya' 00511 * x(4r+1) = xb'+ j * yb' 00512 * x(4r+2) = xc'+ j * yc' 00513 * x(4r+3) = xd'+ j * yd' 00514 * 00515 * 00516 * Twiddle factors for radix-4 IFFT: 00517 * Wn = co1 + j * (si1) 00518 * W2n = co2 + j * (si2) 00519 * W3n = co3 + j * (si3) 00520 00521 * The real and imaginary output values for the radix-4 butterfly are 00522 * xa' = xa + xb + xc + xd 00523 * ya' = ya + yb + yc + yd 00524 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 00525 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 00526 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 00527 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 00528 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 00529 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 00530 * 00531 */ 00532 00533 void arm_radix4_butterfly_inverse_q15( 00534 q15_t * pSrc16, 00535 uint32_t fftLen, 00536 q15_t * pCoef16, 00537 uint32_t twidCoefModifier) 00538 { 00539 q31_t R, S, T, U; 00540 q31_t C1, C2, C3, out1, out2; 00541 q31_t *pSrc, *pCoeff; 00542 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00543 q15_t in; 00544 00545 /* Total process is divided into three stages */ 00546 00547 /* process first stage, middle stages, & last stage */ 00548 00549 /* pointer initializations for SIMD calculations */ 00550 pSrc = (q31_t *) pSrc16; 00551 pCoeff = (q31_t *) pCoef16; 00552 00553 /* Initializations for the first stage */ 00554 n2 = fftLen; 00555 n1 = n2; 00556 00557 /* n2 = fftLen/4 */ 00558 n2 >>= 2u; 00559 00560 /* Index for twiddle coefficient */ 00561 ic = 0u; 00562 00563 /* Index for input read and output write */ 00564 i0 = 0u; 00565 00566 j = n2; 00567 00568 /* Input is in 1.15(q15) format */ 00569 00570 /* Start of first stage process */ 00571 do 00572 { 00573 /* Butterfly implementation */ 00574 00575 /* index calculation for the input as, */ 00576 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00577 i1 = i0 + n2; 00578 i2 = i1 + n2; 00579 i3 = i2 + n2; 00580 00581 /* Reading i0, i0+fftLen/2 inputs */ 00582 /* Read ya (real), xa(imag) input */ 00583 T = pSrc[i0]; 00584 in = ((int16_t) (T & 0xFFFF)) >> 2; 00585 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00586 /* Read yc (real), xc(imag) input */ 00587 S = pSrc[i2]; 00588 in = ((int16_t) (S & 0xFFFF)) >> 2; 00589 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00590 00591 /* R = packed((ya + yc), (xa + xc) ) */ 00592 R = __QADD16(T, S); 00593 /* S = packed((ya - yc), (xa - xc) ) */ 00594 S = __QSUB16(T, S); 00595 00596 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00597 /* Read yb (real), xb(imag) input */ 00598 T = pSrc[i1]; 00599 in = ((int16_t) (T & 0xFFFF)) >> 2; 00600 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00601 /* Read yd (real), xd(imag) input */ 00602 U = pSrc[i3]; 00603 in = ((int16_t) (U & 0xFFFF)) >> 2; 00604 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00605 00606 /* T = packed((yb + yd), (xb + xd) ) */ 00607 T = __QADD16(T, U); 00608 00609 /* writing the butterfly processed i0 sample */ 00610 /* xa' = xa + xb + xc + xd */ 00611 /* ya' = ya + yb + yc + yd */ 00612 pSrc[i0] = __SHADD16(R, T); 00613 00614 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 00615 R = __QSUB16(R, T); 00616 /* co2 & si2 are read from SIMD Coefficient pointer */ 00617 C2 = pCoeff[2u * ic]; 00618 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 00619 out1 = __SMUSD(C2, R) >> 16u; 00620 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 00621 out2 = __SMUADX(C2, R); 00622 00623 /* Reading i0+fftLen/4 */ 00624 /* T = packed(yb, xb) */ 00625 T = pSrc[i1]; 00626 in = ((int16_t) (T & 0xFFFF)) >> 2; 00627 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00628 00629 /* writing the butterfly processed i0 + fftLen/4 sample */ 00630 /* writing output(xc', yc') in little endian format */ 00631 pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00632 00633 /* Butterfly calculations */ 00634 /* U = packed(yd, xd) */ 00635 U = pSrc[i3]; 00636 in = ((int16_t) (U & 0xFFFF)) >> 2; 00637 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00638 00639 /* T = packed(yb-yd, xb-xd) */ 00640 T = __QSUB16(T, U); 00641 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 00642 R = __QSAX(S, T); 00643 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 00644 S = __QASX(S, T); 00645 00646 /* co1 & si1 are read from SIMD Coefficient pointer */ 00647 C1 = pCoeff[ic]; 00648 /* Butterfly process for the i0+fftLen/2 sample */ 00649 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 00650 out1 = __SMUSD(C1, S) >> 16u; 00651 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 00652 out2 = __SMUADX(C1, S); 00653 /* writing output(xb', yb') in little endian format */ 00654 pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 00655 00656 /* co3 & si3 are read from SIMD Coefficient pointer */ 00657 C3 = pCoeff[3u * ic]; 00658 /* Butterfly process for the i0+3fftLen/4 sample */ 00659 /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */ 00660 out1 = __SMUSD(C3, R) >> 16u; 00661 /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */ 00662 out2 = __SMUADX(C3, R); 00663 /* writing output(xd', yd') in little endian format */ 00664 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00665 00666 /* Twiddle coefficients index modifier */ 00667 ic = ic + twidCoefModifier; 00668 00669 /* Updating input index */ 00670 i0 = i0 + 1u; 00671 00672 } while(--j); 00673 00674 /* End of first stage process */ 00675 00676 /* data is in 4.11(q11) format */ 00677 00678 00679 /* Start of Middle stage process */ 00680 00681 /* Twiddle coefficients index modifier */ 00682 twidCoefModifier <<= 2u; 00683 00684 /* Calculation of Middle stage */ 00685 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00686 { 00687 /* Initializations for the middle stage */ 00688 n1 = n2; 00689 n2 >>= 2u; 00690 ic = 0u; 00691 00692 for (j = 0u; j <= (n2 - 1u); j++) 00693 { 00694 /* index calculation for the coefficients */ 00695 C1 = pCoeff[ic]; 00696 C2 = pCoeff[2u * ic]; 00697 C3 = pCoeff[3u * ic]; 00698 00699 /* Twiddle coefficients index modifier */ 00700 ic = ic + twidCoefModifier; 00701 00702 /* Butterfly implementation */ 00703 for (i0 = j; i0 < fftLen; i0 += n1) 00704 { 00705 /* index calculation for the input as, */ 00706 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00707 i1 = i0 + n2; 00708 i2 = i1 + n2; 00709 i3 = i2 + n2; 00710 00711 /* Reading i0, i0+fftLen/2 inputs */ 00712 /* Read ya (real), xa(imag) input */ 00713 T = pSrc[i0]; 00714 00715 /* Read yc (real), xc(imag) input */ 00716 S = pSrc[i2]; 00717 00718 00719 /* R = packed( (ya + yc), (xa + xc)) */ 00720 R = __QADD16(T, S); 00721 /* S = packed((ya - yc), (xa - xc)) */ 00722 S = __QSUB16(T, S); 00723 00724 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00725 /* Read yb (real), xb(imag) input */ 00726 T = pSrc[i1]; 00727 00728 /* Read yd (real), xd(imag) input */ 00729 U = pSrc[i3]; 00730 00731 00732 /* T = packed( (yb + yd), (xb + xd)) */ 00733 T = __QADD16(T, U); 00734 00735 /* writing the butterfly processed i0 sample */ 00736 /* xa' = xa + xb + xc + xd */ 00737 /* ya' = ya + yb + yc + yd */ 00738 out1 = __SHADD16(R, T); 00739 in = ((int16_t) (out1 & 0xFFFF)) >> 1; 00740 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 00741 pSrc[i0] = out1; 00742 00743 00744 00745 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00746 R = __SHSUB16(R, T); 00747 00748 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 00749 out1 = __SMUSD(C2, R) >> 16u; 00750 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 00751 out2 = __SMUADX(C2, R); 00752 00753 /* Reading i0+3fftLen/4 */ 00754 /* Read yb (real), xb(imag) input */ 00755 T = pSrc[i1]; 00756 00757 /* writing the butterfly processed i0 + fftLen/4 sample */ 00758 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 00759 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 00760 pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00761 00762 /* Butterfly calculations */ 00763 /* Read yd (real), xd(imag) input */ 00764 U = pSrc[i3]; 00765 00766 /* T = packed(yb-yd, xb-xd) */ 00767 T = __QSUB16(T, U); 00768 00769 /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 00770 R = __SHSAX(S, T); 00771 00772 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 00773 S = __SHASX(S, T); 00774 00775 /* Butterfly process for the i0+fftLen/2 sample */ 00776 out1 = __SMUSD(C1, S) >> 16u; 00777 out2 = __SMUADX(C1, S); 00778 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 00779 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 00780 pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00781 00782 /* Butterfly process for the i0+3fftLen/4 sample */ 00783 out1 = __SMUSD(C3, R) >> 16u; 00784 out2 = __SMUADX(C3, R); 00785 /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */ 00786 /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */ 00787 pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00788 00789 00790 } 00791 } 00792 /* Twiddle coefficients index modifier */ 00793 twidCoefModifier <<= 2u; 00794 } 00795 /* End of Middle stages process */ 00796 00797 00798 /* data is in 10.6(q6) format for the 1024 point */ 00799 /* data is in 8.8(q8) format for the 256 point */ 00800 /* data is in 6.10(q10) format for the 64 point */ 00801 /* data is in 4.12(q12) format for the 16 point */ 00802 00803 /* start of last stage process */ 00804 00805 00806 /* Initializations for the last stage */ 00807 n1 = n2; 00808 n2 >>= 2u; 00809 00810 /* Butterfly implementation */ 00811 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00812 { 00813 /* index calculation for the input as, */ 00814 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 00815 i1 = i0 + n2; 00816 i2 = i1 + n2; 00817 i3 = i2 + n2; 00818 00819 /* Reading i0, i0+fftLen/2 inputs */ 00820 /* Read ya (real), xa(imag) input */ 00821 T = pSrc[i0]; 00822 /* Read yc (real), xc(imag) input */ 00823 S = pSrc[i2]; 00824 00825 /* R = packed((ya + yc), (xa + xc)) */ 00826 R = __QADD16(T, S); 00827 /* S = packed((ya - yc), (xa - xc)) */ 00828 S = __QSUB16(T, S); 00829 00830 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00831 /* Read yb (real), xb(imag) input */ 00832 T = pSrc[i1]; 00833 /* Read yd (real), xd(imag) input */ 00834 U = pSrc[i3]; 00835 00836 /* T = packed((yb + yd), (xb + xd)) */ 00837 T = __QADD16(T, U); 00838 00839 /* writing the butterfly processed i0 sample */ 00840 /* xa' = xa + xb + xc + xd */ 00841 /* ya' = ya + yb + yc + yd */ 00842 pSrc[i0] = __SHADD16(R, T); 00843 00844 /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00845 R = __SHSUB16(R, T); 00846 00847 /* Read yb (real), xb(imag) input */ 00848 T = pSrc[i1]; 00849 00850 /* writing the butterfly processed i0 + fftLen/4 sample */ 00851 /* xc' = (xa-xb+xc-xd) */ 00852 /* yc' = (ya-yb+yc-yd) */ 00853 pSrc[i1] = R; 00854 00855 /* Read yd (real), xd(imag) input */ 00856 U = pSrc[i3]; 00857 /* T = packed( (yb - yd), (xb - xd)) */ 00858 T = __QSUB16(T, U); 00859 00860 /* writing the butterfly processed i0 + fftLen/2 sample */ 00861 /* xb' = (xa-yb-xc+yd) */ 00862 /* yb' = (ya+xb-yc-xd) */ 00863 pSrc[i2] = __SHASX(S, T); 00864 00865 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 00866 /* xd' = (xa+yb-xc-yd) */ 00867 /* yd' = (ya-xb-yc+xd) */ 00868 pSrc[i3] = __SHSAX(S, T); 00869 } 00870 /* end of last stage process */ 00871 00872 /* output is in 11.5(q5) format for the 1024 point */ 00873 /* output is in 9.7(q7) format for the 256 point */ 00874 /* output is in 7.9(q9) format for the 64 point */ 00875 /* output is in 5.11(q11) format for the 16 point */ 00876 } 00877 00878 00879 /* 00880 * @brief In-place bit reversal function. 00881 * @param[in, out] *pSrc points to the in-place buffer of Q15 data type. 00882 * @param[in] fftLen length of the FFT. 00883 * @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table 00884 * @param[in] *pBitRevTab points to bit reversal table. 00885 * @return none. 00886 */ 00887 00888 void arm_bitreversal_q15( 00889 q15_t * pSrc16, 00890 uint32_t fftLen, 00891 uint16_t bitRevFactor, 00892 uint16_t * pBitRevTab) 00893 { 00894 q31_t *pSrc = (q31_t *) pSrc16; 00895 q31_t in; 00896 uint32_t fftLenBy2, fftLenBy2p1; 00897 uint32_t i, j; 00898 00899 /* Initializations */ 00900 j = 0u; 00901 fftLenBy2 = fftLen / 2u; 00902 fftLenBy2p1 = (fftLen / 2u) + 1u; 00903 00904 /* Bit Reversal Implementation */ 00905 for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u) 00906 { 00907 if(i < j) 00908 { 00909 /* pSrc[i] <-> pSrc[j]; */ 00910 /* pSrc[i+1u] <-> pSrc[j+1u] */ 00911 in = pSrc[i]; 00912 pSrc[i] = pSrc[j]; 00913 pSrc[j] = in; 00914 00915 /* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */ 00916 /* pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */ 00917 in = pSrc[i + fftLenBy2p1]; 00918 pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1]; 00919 pSrc[j + fftLenBy2p1] = in; 00920 } 00921 00922 /* pSrc[i+1u] <-> pSrc[j+fftLenBy2]; */ 00923 /* pSrc[i+2] <-> pSrc[j+fftLenBy2+1u] */ 00924 in = pSrc[i + 1u]; 00925 pSrc[i + 1u] = pSrc[j + fftLenBy2]; 00926 pSrc[j + fftLenBy2] = in; 00927 00928 /* Reading the index for the bit reversal */ 00929 j = *pBitRevTab; 00930 00931 /* Updating the bit reversal index depending on the fft length */ 00932 pBitRevTab += bitRevFactor; 00933 } 00934 }
Generated on Tue Jul 12 2022 14:13:52 by 1.7.2