CMSIS DSP library
Dependents: performance_timer Surfboard_ gps2rtty Capstone ... more
arm_cfft_radix4_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q15.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 00045 void arm_radix4_butterfly_q15( 00046 q15_t * pSrc16, 00047 uint32_t fftLen, 00048 q15_t * pCoef16, 00049 uint32_t twidCoefModifier); 00050 00051 void arm_radix4_butterfly_inverse_q15( 00052 q15_t * pSrc16, 00053 uint32_t fftLen, 00054 q15_t * pCoef16, 00055 uint32_t twidCoefModifier); 00056 00057 void arm_bitreversal_q15( 00058 q15_t * pSrc, 00059 uint32_t fftLen, 00060 uint16_t bitRevFactor, 00061 uint16_t * pBitRevTab); 00062 00063 /** 00064 * @ingroup groupTransforms 00065 */ 00066 00067 /** 00068 * @addtogroup ComplexFFT 00069 * @{ 00070 */ 00071 00072 00073 /** 00074 * @details 00075 * @brief Processing function for the Q15 CFFT/CIFFT. 00076 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed 00077 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure. 00078 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place. 00079 * @return none. 00080 * 00081 * \par Input and output formats: 00082 * \par 00083 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00084 * Hence the output format is different for different FFT sizes. 00085 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00086 * \par 00087 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT" 00088 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT" 00089 */ 00090 00091 void arm_cfft_radix4_q15( 00092 const arm_cfft_radix4_instance_q15 * S, 00093 q15_t * pSrc) 00094 { 00095 if(S->ifftFlag == 1u) 00096 { 00097 /* Complex IFFT radix-4 */ 00098 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, 00099 S->twidCoefModifier); 00100 } 00101 else 00102 { 00103 /* Complex FFT radix-4 */ 00104 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, 00105 S->twidCoefModifier); 00106 } 00107 00108 if(S->bitReverseFlag == 1u) 00109 { 00110 /* Bit Reversal */ 00111 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00112 } 00113 00114 } 00115 00116 /** 00117 * @} end of ComplexFFT group 00118 */ 00119 00120 /* 00121 * Radix-4 FFT algorithm used is : 00122 * 00123 * Input real and imaginary data: 00124 * x(n) = xa + j * ya 00125 * x(n+N/4 ) = xb + j * yb 00126 * x(n+N/2 ) = xc + j * yc 00127 * x(n+3N 4) = xd + j * yd 00128 * 00129 * 00130 * Output real and imaginary data: 00131 * x(4r) = xa'+ j * ya' 00132 * x(4r+1) = xb'+ j * yb' 00133 * x(4r+2) = xc'+ j * yc' 00134 * x(4r+3) = xd'+ j * yd' 00135 * 00136 * 00137 * Twiddle factors for radix-4 FFT: 00138 * Wn = co1 + j * (- si1) 00139 * W2n = co2 + j * (- si2) 00140 * W3n = co3 + j * (- si3) 00141 00142 * The real and imaginary output values for the radix-4 butterfly are 00143 * xa' = xa + xb + xc + xd 00144 * ya' = ya + yb + yc + yd 00145 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00146 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00147 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00148 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00149 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00150 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00151 * 00152 */ 00153 00154 /** 00155 * @brief Core function for the Q15 CFFT butterfly process. 00156 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 00157 * @param[in] fftLen length of the FFT. 00158 * @param[in] *pCoef16 points to twiddle coefficient buffer. 00159 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00160 * @return none. 00161 */ 00162 00163 void arm_radix4_butterfly_q15( 00164 q15_t * pSrc16, 00165 uint32_t fftLen, 00166 q15_t * pCoef16, 00167 uint32_t twidCoefModifier) 00168 { 00169 00170 #ifndef ARM_MATH_CM0_FAMILY 00171 00172 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00173 00174 q31_t R, S, T, U; 00175 q31_t C1, C2, C3, out1, out2; 00176 uint32_t n1, n2, ic, i0, j, k; 00177 00178 q15_t *ptr1; 00179 q15_t *pSi0; 00180 q15_t *pSi1; 00181 q15_t *pSi2; 00182 q15_t *pSi3; 00183 00184 q31_t xaya, xbyb, xcyc, xdyd; 00185 00186 /* Total process is divided into three stages */ 00187 00188 /* process first stage, middle stages, & last stage */ 00189 00190 /* Initializations for the first stage */ 00191 n2 = fftLen; 00192 n1 = n2; 00193 00194 /* n2 = fftLen/4 */ 00195 n2 >>= 2u; 00196 00197 /* Index for twiddle coefficient */ 00198 ic = 0u; 00199 00200 /* Index for input read and output write */ 00201 j = n2; 00202 00203 pSi0 = pSrc16; 00204 pSi1 = pSi0 + 2 * n2; 00205 pSi2 = pSi1 + 2 * n2; 00206 pSi3 = pSi2 + 2 * n2; 00207 00208 /* Input is in 1.15(q15) format */ 00209 00210 /* start of first stage process */ 00211 do 00212 { 00213 /* Butterfly implementation */ 00214 00215 /* Reading i0, i0+fftLen/2 inputs */ 00216 /* Read ya (real), xa(imag) input */ 00217 T = _SIMD32_OFFSET(pSi0); 00218 T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1 00219 T = __SHADD16(T, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles 00220 //in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles 00221 //T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00222 00223 /* Read yc (real), xc(imag) input */ 00224 S = _SIMD32_OFFSET(pSi2); 00225 S = __SHADD16(S, 0); 00226 S = __SHADD16(S, 0); 00227 00228 /* R = packed((ya + yc), (xa + xc) ) */ 00229 R = __QADD16(T, S); 00230 00231 /* S = packed((ya - yc), (xa - xc) ) */ 00232 S = __QSUB16(T, S); 00233 00234 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00235 /* Read yb (real), xb(imag) input */ 00236 T = _SIMD32_OFFSET(pSi1); 00237 T = __SHADD16(T, 0); 00238 T = __SHADD16(T, 0); 00239 00240 /* Read yd (real), xd(imag) input */ 00241 U = _SIMD32_OFFSET(pSi3); 00242 U = __SHADD16(U, 0); 00243 U = __SHADD16(U, 0); 00244 00245 /* T = packed((yb + yd), (xb + xd) ) */ 00246 T = __QADD16(T, U); 00247 00248 /* writing the butterfly processed i0 sample */ 00249 /* xa' = xa + xb + xc + xd */ 00250 /* ya' = ya + yb + yc + yd */ 00251 _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); 00252 pSi0 += 2; 00253 00254 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 00255 R = __QSUB16(R, T); 00256 00257 /* co2 & si2 are read from SIMD Coefficient pointer */ 00258 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 00259 00260 #ifndef ARM_MATH_BIG_ENDIAN 00261 00262 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00263 out1 = __SMUAD(C2, R) >> 16u; 00264 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00265 out2 = __SMUSDX(C2, R); 00266 00267 #else 00268 00269 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00270 out1 = __SMUSDX(R, C2) >> 16u; 00271 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00272 out2 = __SMUAD(C2, R); 00273 00274 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00275 00276 /* Reading i0+fftLen/4 */ 00277 /* T = packed(yb, xb) */ 00278 T = _SIMD32_OFFSET(pSi1); 00279 T = __SHADD16(T, 0); 00280 T = __SHADD16(T, 0); 00281 00282 /* writing the butterfly processed i0 + fftLen/4 sample */ 00283 /* writing output(xc', yc') in little endian format */ 00284 _SIMD32_OFFSET(pSi1) = 00285 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00286 pSi1 += 2; 00287 00288 /* Butterfly calculations */ 00289 /* U = packed(yd, xd) */ 00290 U = _SIMD32_OFFSET(pSi3); 00291 U = __SHADD16(U, 0); 00292 U = __SHADD16(U, 0); 00293 00294 /* T = packed(yb-yd, xb-xd) */ 00295 T = __QSUB16(T, U); 00296 00297 #ifndef ARM_MATH_BIG_ENDIAN 00298 00299 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00300 R = __QASX(S, T); 00301 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00302 S = __QSAX(S, T); 00303 00304 #else 00305 00306 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00307 R = __QSAX(S, T); 00308 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00309 S = __QASX(S, T); 00310 00311 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00312 00313 /* co1 & si1 are read from SIMD Coefficient pointer */ 00314 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 00315 /* Butterfly process for the i0+fftLen/2 sample */ 00316 00317 #ifndef ARM_MATH_BIG_ENDIAN 00318 00319 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00320 out1 = __SMUAD(C1, S) >> 16u; 00321 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00322 out2 = __SMUSDX(C1, S); 00323 00324 #else 00325 00326 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00327 out1 = __SMUSDX(S, C1) >> 16u; 00328 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00329 out2 = __SMUAD(C1, S); 00330 00331 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00332 00333 /* writing output(xb', yb') in little endian format */ 00334 _SIMD32_OFFSET(pSi2) = 00335 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 00336 pSi2 += 2; 00337 00338 00339 /* co3 & si3 are read from SIMD Coefficient pointer */ 00340 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 00341 /* Butterfly process for the i0+3fftLen/4 sample */ 00342 00343 #ifndef ARM_MATH_BIG_ENDIAN 00344 00345 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00346 out1 = __SMUAD(C3, R) >> 16u; 00347 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00348 out2 = __SMUSDX(C3, R); 00349 00350 #else 00351 00352 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00353 out1 = __SMUSDX(R, C3) >> 16u; 00354 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00355 out2 = __SMUAD(C3, R); 00356 00357 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00358 00359 /* writing output(xd', yd') in little endian format */ 00360 _SIMD32_OFFSET(pSi3) = 00361 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00362 pSi3 += 2; 00363 00364 /* Twiddle coefficients index modifier */ 00365 ic = ic + twidCoefModifier; 00366 00367 } while(--j); 00368 /* data is in 4.11(q11) format */ 00369 00370 /* end of first stage process */ 00371 00372 00373 /* start of middle stage process */ 00374 00375 /* Twiddle coefficients index modifier */ 00376 twidCoefModifier <<= 2u; 00377 00378 /* Calculation of Middle stage */ 00379 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00380 { 00381 /* Initializations for the middle stage */ 00382 n1 = n2; 00383 n2 >>= 2u; 00384 ic = 0u; 00385 00386 for (j = 0u; j <= (n2 - 1u); j++) 00387 { 00388 /* index calculation for the coefficients */ 00389 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 00390 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 00391 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 00392 00393 /* Twiddle coefficients index modifier */ 00394 ic = ic + twidCoefModifier; 00395 00396 pSi0 = pSrc16 + 2 * j; 00397 pSi1 = pSi0 + 2 * n2; 00398 pSi2 = pSi1 + 2 * n2; 00399 pSi3 = pSi2 + 2 * n2; 00400 00401 /* Butterfly implementation */ 00402 for (i0 = j; i0 < fftLen; i0 += n1) 00403 { 00404 /* Reading i0, i0+fftLen/2 inputs */ 00405 /* Read ya (real), xa(imag) input */ 00406 T = _SIMD32_OFFSET(pSi0); 00407 00408 /* Read yc (real), xc(imag) input */ 00409 S = _SIMD32_OFFSET(pSi2); 00410 00411 /* R = packed( (ya + yc), (xa + xc)) */ 00412 R = __QADD16(T, S); 00413 00414 /* S = packed((ya - yc), (xa - xc)) */ 00415 S = __QSUB16(T, S); 00416 00417 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00418 /* Read yb (real), xb(imag) input */ 00419 T = _SIMD32_OFFSET(pSi1); 00420 00421 /* Read yd (real), xd(imag) input */ 00422 U = _SIMD32_OFFSET(pSi3); 00423 00424 /* T = packed( (yb + yd), (xb + xd)) */ 00425 T = __QADD16(T, U); 00426 00427 /* writing the butterfly processed i0 sample */ 00428 00429 /* xa' = xa + xb + xc + xd */ 00430 /* ya' = ya + yb + yc + yd */ 00431 out1 = __SHADD16(R, T); 00432 out1 = __SHADD16(out1, 0); 00433 _SIMD32_OFFSET(pSi0) = out1; 00434 pSi0 += 2 * n1; 00435 00436 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00437 R = __SHSUB16(R, T); 00438 00439 #ifndef ARM_MATH_BIG_ENDIAN 00440 00441 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00442 out1 = __SMUAD(C2, R) >> 16u; 00443 00444 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00445 out2 = __SMUSDX(C2, R); 00446 00447 #else 00448 00449 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00450 out1 = __SMUSDX(R, C2) >> 16u; 00451 00452 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00453 out2 = __SMUAD(C2, R); 00454 00455 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00456 00457 /* Reading i0+3fftLen/4 */ 00458 /* Read yb (real), xb(imag) input */ 00459 T = _SIMD32_OFFSET(pSi1); 00460 00461 /* writing the butterfly processed i0 + fftLen/4 sample */ 00462 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00463 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00464 _SIMD32_OFFSET(pSi1) = 00465 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00466 pSi1 += 2 * n1; 00467 00468 /* Butterfly calculations */ 00469 00470 /* Read yd (real), xd(imag) input */ 00471 U = _SIMD32_OFFSET(pSi3); 00472 00473 /* T = packed(yb-yd, xb-xd) */ 00474 T = __QSUB16(T, U); 00475 00476 #ifndef ARM_MATH_BIG_ENDIAN 00477 00478 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00479 R = __SHASX(S, T); 00480 00481 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00482 S = __SHSAX(S, T); 00483 00484 00485 /* Butterfly process for the i0+fftLen/2 sample */ 00486 out1 = __SMUAD(C1, S) >> 16u; 00487 out2 = __SMUSDX(C1, S); 00488 00489 #else 00490 00491 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00492 R = __SHSAX(S, T); 00493 00494 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00495 S = __SHASX(S, T); 00496 00497 00498 /* Butterfly process for the i0+fftLen/2 sample */ 00499 out1 = __SMUSDX(S, C1) >> 16u; 00500 out2 = __SMUAD(C1, S); 00501 00502 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00503 00504 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00505 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00506 _SIMD32_OFFSET(pSi2) = 00507 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00508 pSi2 += 2 * n1; 00509 00510 /* Butterfly process for the i0+3fftLen/4 sample */ 00511 00512 #ifndef ARM_MATH_BIG_ENDIAN 00513 00514 out1 = __SMUAD(C3, R) >> 16u; 00515 out2 = __SMUSDX(C3, R); 00516 00517 #else 00518 00519 out1 = __SMUSDX(R, C3) >> 16u; 00520 out2 = __SMUAD(C3, R); 00521 00522 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00523 00524 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00525 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00526 _SIMD32_OFFSET(pSi3) = 00527 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00528 pSi3 += 2 * n1; 00529 } 00530 } 00531 /* Twiddle coefficients index modifier */ 00532 twidCoefModifier <<= 2u; 00533 } 00534 /* end of middle stage process */ 00535 00536 00537 /* data is in 10.6(q6) format for the 1024 point */ 00538 /* data is in 8.8(q8) format for the 256 point */ 00539 /* data is in 6.10(q10) format for the 64 point */ 00540 /* data is in 4.12(q12) format for the 16 point */ 00541 00542 /* Initializations for the last stage */ 00543 j = fftLen >> 2; 00544 00545 ptr1 = &pSrc16[0]; 00546 00547 /* start of last stage process */ 00548 00549 /* Butterfly implementation */ 00550 do 00551 { 00552 /* Read xa (real), ya(imag) input */ 00553 xaya = *__SIMD32(ptr1)++; 00554 00555 /* Read xb (real), yb(imag) input */ 00556 xbyb = *__SIMD32(ptr1)++; 00557 00558 /* Read xc (real), yc(imag) input */ 00559 xcyc = *__SIMD32(ptr1)++; 00560 00561 /* Read xd (real), yd(imag) input */ 00562 xdyd = *__SIMD32(ptr1)++; 00563 00564 /* R = packed((ya + yc), (xa + xc)) */ 00565 R = __QADD16(xaya, xcyc); 00566 00567 /* T = packed((yb + yd), (xb + xd)) */ 00568 T = __QADD16(xbyb, xdyd); 00569 00570 /* pointer updation for writing */ 00571 ptr1 = ptr1 - 8u; 00572 00573 00574 /* xa' = xa + xb + xc + xd */ 00575 /* ya' = ya + yb + yc + yd */ 00576 *__SIMD32(ptr1)++ = __SHADD16(R, T); 00577 00578 /* T = packed((yb + yd), (xb + xd)) */ 00579 T = __QADD16(xbyb, xdyd); 00580 00581 /* xc' = (xa-xb+xc-xd) */ 00582 /* yc' = (ya-yb+yc-yd) */ 00583 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 00584 00585 /* S = packed((ya - yc), (xa - xc)) */ 00586 S = __QSUB16(xaya, xcyc); 00587 00588 /* Read yd (real), xd(imag) input */ 00589 /* T = packed( (yb - yd), (xb - xd)) */ 00590 U = __QSUB16(xbyb, xdyd); 00591 00592 #ifndef ARM_MATH_BIG_ENDIAN 00593 00594 /* xb' = (xa+yb-xc-yd) */ 00595 /* yb' = (ya-xb-yc+xd) */ 00596 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00597 00598 00599 /* xd' = (xa-yb-xc+yd) */ 00600 /* yd' = (ya+xb-yc-xd) */ 00601 *__SIMD32(ptr1)++ = __SHASX(S, U); 00602 00603 #else 00604 00605 /* xb' = (xa+yb-xc-yd) */ 00606 /* yb' = (ya-xb-yc+xd) */ 00607 *__SIMD32(ptr1)++ = __SHASX(S, U); 00608 00609 00610 /* xd' = (xa-yb-xc+yd) */ 00611 /* yd' = (ya+xb-yc-xd) */ 00612 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00613 00614 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00615 00616 } while(--j); 00617 00618 /* end of last stage process */ 00619 00620 /* output is in 11.5(q5) format for the 1024 point */ 00621 /* output is in 9.7(q7) format for the 256 point */ 00622 /* output is in 7.9(q9) format for the 64 point */ 00623 /* output is in 5.11(q11) format for the 16 point */ 00624 00625 00626 #else 00627 00628 /* Run the below code for Cortex-M0 */ 00629 00630 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 00631 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 00632 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00633 00634 /* Total process is divided into three stages */ 00635 00636 /* process first stage, middle stages, & last stage */ 00637 00638 /* Initializations for the first stage */ 00639 n2 = fftLen; 00640 n1 = n2; 00641 00642 /* n2 = fftLen/4 */ 00643 n2 >>= 2u; 00644 00645 /* Index for twiddle coefficient */ 00646 ic = 0u; 00647 00648 /* Index for input read and output write */ 00649 i0 = 0u; 00650 j = n2; 00651 00652 /* Input is in 1.15(q15) format */ 00653 00654 /* start of first stage process */ 00655 do 00656 { 00657 /* Butterfly implementation */ 00658 00659 /* index calculation for the input as, */ 00660 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00661 i1 = i0 + n2; 00662 i2 = i1 + n2; 00663 i3 = i2 + n2; 00664 00665 /* Reading i0, i0+fftLen/2 inputs */ 00666 00667 /* input is down scale by 4 to avoid overflow */ 00668 /* Read ya (real), xa(imag) input */ 00669 T0 = pSrc16[i0 * 2u] >> 2u; 00670 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 00671 00672 /* input is down scale by 4 to avoid overflow */ 00673 /* Read yc (real), xc(imag) input */ 00674 S0 = pSrc16[i2 * 2u] >> 2u; 00675 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 00676 00677 /* R0 = (ya + yc) */ 00678 R0 = __SSAT(T0 + S0, 16u); 00679 /* R1 = (xa + xc) */ 00680 R1 = __SSAT(T1 + S1, 16u); 00681 00682 /* S0 = (ya - yc) */ 00683 S0 = __SSAT(T0 - S0, 16); 00684 /* S1 = (xa - xc) */ 00685 S1 = __SSAT(T1 - S1, 16); 00686 00687 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00688 /* input is down scale by 4 to avoid overflow */ 00689 /* Read yb (real), xb(imag) input */ 00690 T0 = pSrc16[i1 * 2u] >> 2u; 00691 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 00692 00693 /* input is down scale by 4 to avoid overflow */ 00694 /* Read yd (real), xd(imag) input */ 00695 U0 = pSrc16[i3 * 2u] >> 2u; 00696 U1 = pSrc16[(i3 * 2u) + 1] >> 2u; 00697 00698 /* T0 = (yb + yd) */ 00699 T0 = __SSAT(T0 + U0, 16u); 00700 /* T1 = (xb + xd) */ 00701 T1 = __SSAT(T1 + U1, 16u); 00702 00703 /* writing the butterfly processed i0 sample */ 00704 /* ya' = ya + yb + yc + yd */ 00705 /* xa' = xa + xb + xc + xd */ 00706 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00707 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00708 00709 /* R0 = (ya + yc) - (yb + yd) */ 00710 /* R1 = (xa + xc) - (xb + xd) */ 00711 R0 = __SSAT(R0 - T0, 16u); 00712 R1 = __SSAT(R1 - T1, 16u); 00713 00714 /* co2 & si2 are read from Coefficient pointer */ 00715 Co2 = pCoef16[2u * ic * 2u]; 00716 Si2 = pCoef16[(2u * ic * 2u) + 1]; 00717 00718 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00719 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u); 00720 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00721 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u); 00722 00723 /* Reading i0+fftLen/4 */ 00724 /* input is down scale by 4 to avoid overflow */ 00725 /* T0 = yb, T1 = xb */ 00726 T0 = pSrc16[i1 * 2u] >> 2; 00727 T1 = pSrc16[(i1 * 2u) + 1] >> 2; 00728 00729 /* writing the butterfly processed i0 + fftLen/4 sample */ 00730 /* writing output(xc', yc') in little endian format */ 00731 pSrc16[i1 * 2u] = out1; 00732 pSrc16[(i1 * 2u) + 1] = out2; 00733 00734 /* Butterfly calculations */ 00735 /* input is down scale by 4 to avoid overflow */ 00736 /* U0 = yd, U1 = xd */ 00737 U0 = pSrc16[i3 * 2u] >> 2; 00738 U1 = pSrc16[(i3 * 2u) + 1] >> 2; 00739 /* T0 = yb-yd */ 00740 T0 = __SSAT(T0 - U0, 16); 00741 /* T1 = xb-xd */ 00742 T1 = __SSAT(T1 - U1, 16); 00743 00744 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */ 00745 R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); 00746 R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); 00747 00748 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */ 00749 S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16u); 00750 S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16u); 00751 00752 /* co1 & si1 are read from Coefficient pointer */ 00753 Co1 = pCoef16[ic * 2u]; 00754 Si1 = pCoef16[(ic * 2u) + 1]; 00755 /* Butterfly process for the i0+fftLen/2 sample */ 00756 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00757 out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16); 00758 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00759 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16); 00760 00761 /* writing output(xb', yb') in little endian format */ 00762 pSrc16[i2 * 2u] = out1; 00763 pSrc16[(i2 * 2u) + 1] = out2; 00764 00765 /* Co3 & si3 are read from Coefficient pointer */ 00766 Co3 = pCoef16[3u * (ic * 2u)]; 00767 Si3 = pCoef16[(3u * (ic * 2u)) + 1]; 00768 /* Butterfly process for the i0+3fftLen/4 sample */ 00769 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00770 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u); 00771 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00772 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u); 00773 /* writing output(xd', yd') in little endian format */ 00774 pSrc16[i3 * 2u] = out1; 00775 pSrc16[(i3 * 2u) + 1] = out2; 00776 00777 /* Twiddle coefficients index modifier */ 00778 ic = ic + twidCoefModifier; 00779 00780 /* Updating input index */ 00781 i0 = i0 + 1u; 00782 00783 } while(--j); 00784 /* data is in 4.11(q11) format */ 00785 00786 /* end of first stage process */ 00787 00788 00789 /* start of middle stage process */ 00790 00791 /* Twiddle coefficients index modifier */ 00792 twidCoefModifier <<= 2u; 00793 00794 /* Calculation of Middle stage */ 00795 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00796 { 00797 /* Initializations for the middle stage */ 00798 n1 = n2; 00799 n2 >>= 2u; 00800 ic = 0u; 00801 00802 for (j = 0u; j <= (n2 - 1u); j++) 00803 { 00804 /* index calculation for the coefficients */ 00805 Co1 = pCoef16[ic * 2u]; 00806 Si1 = pCoef16[(ic * 2u) + 1u]; 00807 Co2 = pCoef16[2u * (ic * 2u)]; 00808 Si2 = pCoef16[(2u * (ic * 2u)) + 1u]; 00809 Co3 = pCoef16[3u * (ic * 2u)]; 00810 Si3 = pCoef16[(3u * (ic * 2u)) + 1u]; 00811 00812 /* Twiddle coefficients index modifier */ 00813 ic = ic + twidCoefModifier; 00814 00815 /* Butterfly implementation */ 00816 for (i0 = j; i0 < fftLen; i0 += n1) 00817 { 00818 /* index calculation for the input as, */ 00819 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00820 i1 = i0 + n2; 00821 i2 = i1 + n2; 00822 i3 = i2 + n2; 00823 00824 /* Reading i0, i0+fftLen/2 inputs */ 00825 /* Read ya (real), xa(imag) input */ 00826 T0 = pSrc16[i0 * 2u]; 00827 T1 = pSrc16[(i0 * 2u) + 1u]; 00828 00829 /* Read yc (real), xc(imag) input */ 00830 S0 = pSrc16[i2 * 2u]; 00831 S1 = pSrc16[(i2 * 2u) + 1u]; 00832 00833 /* R0 = (ya + yc), R1 = (xa + xc) */ 00834 R0 = __SSAT(T0 + S0, 16); 00835 R1 = __SSAT(T1 + S1, 16); 00836 00837 /* S0 = (ya - yc), S1 =(xa - xc) */ 00838 S0 = __SSAT(T0 - S0, 16); 00839 S1 = __SSAT(T1 - S1, 16); 00840 00841 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00842 /* Read yb (real), xb(imag) input */ 00843 T0 = pSrc16[i1 * 2u]; 00844 T1 = pSrc16[(i1 * 2u) + 1u]; 00845 00846 /* Read yd (real), xd(imag) input */ 00847 U0 = pSrc16[i3 * 2u]; 00848 U1 = pSrc16[(i3 * 2u) + 1u]; 00849 00850 00851 /* T0 = (yb + yd), T1 = (xb + xd) */ 00852 T0 = __SSAT(T0 + U0, 16); 00853 T1 = __SSAT(T1 + U1, 16); 00854 00855 /* writing the butterfly processed i0 sample */ 00856 00857 /* xa' = xa + xb + xc + xd */ 00858 /* ya' = ya + yb + yc + yd */ 00859 out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 00860 out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 00861 00862 pSrc16[i0 * 2u] = out1; 00863 pSrc16[(2u * i0) + 1u] = out2; 00864 00865 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00866 R0 = (R0 >> 1u) - (T0 >> 1u); 00867 R1 = (R1 >> 1u) - (T1 >> 1u); 00868 00869 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00870 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u); 00871 00872 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00873 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u); 00874 00875 /* Reading i0+3fftLen/4 */ 00876 /* Read yb (real), xb(imag) input */ 00877 T0 = pSrc16[i1 * 2u]; 00878 T1 = pSrc16[(i1 * 2u) + 1u]; 00879 00880 /* writing the butterfly processed i0 + fftLen/4 sample */ 00881 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00882 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00883 pSrc16[i1 * 2u] = out1; 00884 pSrc16[(i1 * 2u) + 1u] = out2; 00885 00886 /* Butterfly calculations */ 00887 00888 /* Read yd (real), xd(imag) input */ 00889 U0 = pSrc16[i3 * 2u]; 00890 U1 = pSrc16[(i3 * 2u) + 1u]; 00891 00892 /* T0 = yb-yd, T1 = xb-xd */ 00893 T0 = __SSAT(T0 - U0, 16); 00894 T1 = __SSAT(T1 - U1, 16); 00895 00896 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */ 00897 R0 = (S0 >> 1u) - (T1 >> 1u); 00898 R1 = (S1 >> 1u) + (T0 >> 1u); 00899 00900 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */ 00901 S0 = (S0 >> 1u) + (T1 >> 1u); 00902 S1 = (S1 >> 1u) - (T0 >> 1u); 00903 00904 /* Butterfly process for the i0+fftLen/2 sample */ 00905 out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16u); 00906 00907 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16u); 00908 00909 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00910 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00911 pSrc16[i2 * 2u] = out1; 00912 pSrc16[(i2 * 2u) + 1u] = out2; 00913 00914 /* Butterfly process for the i0+3fftLen/4 sample */ 00915 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u); 00916 00917 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u); 00918 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00919 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00920 pSrc16[i3 * 2u] = out1; 00921 pSrc16[(i3 * 2u) + 1u] = out2; 00922 } 00923 } 00924 /* Twiddle coefficients index modifier */ 00925 twidCoefModifier <<= 2u; 00926 } 00927 /* end of middle stage process */ 00928 00929 00930 /* data is in 10.6(q6) format for the 1024 point */ 00931 /* data is in 8.8(q8) format for the 256 point */ 00932 /* data is in 6.10(q10) format for the 64 point */ 00933 /* data is in 4.12(q12) format for the 16 point */ 00934 00935 /* Initializations for the last stage */ 00936 n1 = n2; 00937 n2 >>= 2u; 00938 00939 /* start of last stage process */ 00940 00941 /* Butterfly implementation */ 00942 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00943 { 00944 /* index calculation for the input as, */ 00945 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00946 i1 = i0 + n2; 00947 i2 = i1 + n2; 00948 i3 = i2 + n2; 00949 00950 /* Reading i0, i0+fftLen/2 inputs */ 00951 /* Read ya (real), xa(imag) input */ 00952 T0 = pSrc16[i0 * 2u]; 00953 T1 = pSrc16[(i0 * 2u) + 1u]; 00954 00955 /* Read yc (real), xc(imag) input */ 00956 S0 = pSrc16[i2 * 2u]; 00957 S1 = pSrc16[(i2 * 2u) + 1u]; 00958 00959 /* R0 = (ya + yc), R1 = (xa + xc) */ 00960 R0 = __SSAT(T0 + S0, 16u); 00961 R1 = __SSAT(T1 + S1, 16u); 00962 00963 /* S0 = (ya - yc), S1 = (xa - xc) */ 00964 S0 = __SSAT(T0 - S0, 16u); 00965 S1 = __SSAT(T1 - S1, 16u); 00966 00967 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00968 /* Read yb (real), xb(imag) input */ 00969 T0 = pSrc16[i1 * 2u]; 00970 T1 = pSrc16[(i1 * 2u) + 1u]; 00971 /* Read yd (real), xd(imag) input */ 00972 U0 = pSrc16[i3 * 2u]; 00973 U1 = pSrc16[(i3 * 2u) + 1u]; 00974 00975 /* T0 = (yb + yd), T1 = (xb + xd)) */ 00976 T0 = __SSAT(T0 + U0, 16u); 00977 T1 = __SSAT(T1 + U1, 16u); 00978 00979 /* writing the butterfly processed i0 sample */ 00980 /* xa' = xa + xb + xc + xd */ 00981 /* ya' = ya + yb + yc + yd */ 00982 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00983 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00984 00985 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00986 R0 = (R0 >> 1u) - (T0 >> 1u); 00987 R1 = (R1 >> 1u) - (T1 >> 1u); 00988 /* Read yb (real), xb(imag) input */ 00989 T0 = pSrc16[i1 * 2u]; 00990 T1 = pSrc16[(i1 * 2u) + 1u]; 00991 00992 /* writing the butterfly processed i0 + fftLen/4 sample */ 00993 /* xc' = (xa-xb+xc-xd) */ 00994 /* yc' = (ya-yb+yc-yd) */ 00995 pSrc16[i1 * 2u] = R0; 00996 pSrc16[(i1 * 2u) + 1u] = R1; 00997 00998 /* Read yd (real), xd(imag) input */ 00999 U0 = pSrc16[i3 * 2u]; 01000 U1 = pSrc16[(i3 * 2u) + 1u]; 01001 /* T0 = (yb - yd), T1 = (xb - xd) */ 01002 T0 = __SSAT(T0 - U0, 16u); 01003 T1 = __SSAT(T1 - U1, 16u); 01004 01005 /* writing the butterfly processed i0 + fftLen/2 sample */ 01006 /* xb' = (xa+yb-xc-yd) */ 01007 /* yb' = (ya-xb-yc+xd) */ 01008 pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u); 01009 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 01010 01011 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01012 /* xd' = (xa-yb-xc+yd) */ 01013 /* yd' = (ya+xb-yc-xd) */ 01014 pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u); 01015 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 01016 01017 } 01018 01019 /* end of last stage process */ 01020 01021 /* output is in 11.5(q5) format for the 1024 point */ 01022 /* output is in 9.7(q7) format for the 256 point */ 01023 /* output is in 7.9(q9) format for the 64 point */ 01024 /* output is in 5.11(q11) format for the 16 point */ 01025 01026 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 01027 01028 } 01029 01030 01031 /** 01032 * @brief Core function for the Q15 CIFFT butterfly process. 01033 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 01034 * @param[in] fftLen length of the FFT. 01035 * @param[in] *pCoef16 points to twiddle coefficient buffer. 01036 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 01037 * @return none. 01038 */ 01039 01040 /* 01041 * Radix-4 IFFT algorithm used is : 01042 * 01043 * CIFFT uses same twiddle coefficients as CFFT function 01044 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 01045 * 01046 * 01047 * IFFT is implemented with following changes in equations from FFT 01048 * 01049 * Input real and imaginary data: 01050 * x(n) = xa + j * ya 01051 * x(n+N/4 ) = xb + j * yb 01052 * x(n+N/2 ) = xc + j * yc 01053 * x(n+3N 4) = xd + j * yd 01054 * 01055 * 01056 * Output real and imaginary data: 01057 * x(4r) = xa'+ j * ya' 01058 * x(4r+1) = xb'+ j * yb' 01059 * x(4r+2) = xc'+ j * yc' 01060 * x(4r+3) = xd'+ j * yd' 01061 * 01062 * 01063 * Twiddle factors for radix-4 IFFT: 01064 * Wn = co1 + j * (si1) 01065 * W2n = co2 + j * (si2) 01066 * W3n = co3 + j * (si3) 01067 01068 * The real and imaginary output values for the radix-4 butterfly are 01069 * xa' = xa + xb + xc + xd 01070 * ya' = ya + yb + yc + yd 01071 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 01072 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 01073 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 01074 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 01075 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 01076 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 01077 * 01078 */ 01079 01080 void arm_radix4_butterfly_inverse_q15( 01081 q15_t * pSrc16, 01082 uint32_t fftLen, 01083 q15_t * pCoef16, 01084 uint32_t twidCoefModifier) 01085 { 01086 01087 #ifndef ARM_MATH_CM0_FAMILY 01088 01089 /* Run the below code for Cortex-M4 and Cortex-M3 */ 01090 01091 q31_t R, S, T, U; 01092 q31_t C1, C2, C3, out1, out2; 01093 uint32_t n1, n2, ic, i0, j, k; 01094 01095 q15_t *ptr1; 01096 q15_t *pSi0; 01097 q15_t *pSi1; 01098 q15_t *pSi2; 01099 q15_t *pSi3; 01100 01101 q31_t xaya, xbyb, xcyc, xdyd; 01102 01103 /* Total process is divided into three stages */ 01104 01105 /* process first stage, middle stages, & last stage */ 01106 01107 /* Initializations for the first stage */ 01108 n2 = fftLen; 01109 n1 = n2; 01110 01111 /* n2 = fftLen/4 */ 01112 n2 >>= 2u; 01113 01114 /* Index for twiddle coefficient */ 01115 ic = 0u; 01116 01117 /* Index for input read and output write */ 01118 j = n2; 01119 01120 pSi0 = pSrc16; 01121 pSi1 = pSi0 + 2 * n2; 01122 pSi2 = pSi1 + 2 * n2; 01123 pSi3 = pSi2 + 2 * n2; 01124 01125 /* Input is in 1.15(q15) format */ 01126 01127 /* start of first stage process */ 01128 do 01129 { 01130 /* Butterfly implementation */ 01131 01132 /* Reading i0, i0+fftLen/2 inputs */ 01133 /* Read ya (real), xa(imag) input */ 01134 T = _SIMD32_OFFSET(pSi0); 01135 T = __SHADD16(T, 0); 01136 T = __SHADD16(T, 0); 01137 01138 /* Read yc (real), xc(imag) input */ 01139 S = _SIMD32_OFFSET(pSi2); 01140 S = __SHADD16(S, 0); 01141 S = __SHADD16(S, 0); 01142 01143 /* R = packed((ya + yc), (xa + xc) ) */ 01144 R = __QADD16(T, S); 01145 01146 /* S = packed((ya - yc), (xa - xc) ) */ 01147 S = __QSUB16(T, S); 01148 01149 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01150 /* Read yb (real), xb(imag) input */ 01151 T = _SIMD32_OFFSET(pSi1); 01152 T = __SHADD16(T, 0); 01153 T = __SHADD16(T, 0); 01154 01155 /* Read yd (real), xd(imag) input */ 01156 U = _SIMD32_OFFSET(pSi3); 01157 U = __SHADD16(U, 0); 01158 U = __SHADD16(U, 0); 01159 01160 /* T = packed((yb + yd), (xb + xd) ) */ 01161 T = __QADD16(T, U); 01162 01163 /* writing the butterfly processed i0 sample */ 01164 /* xa' = xa + xb + xc + xd */ 01165 /* ya' = ya + yb + yc + yd */ 01166 _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); 01167 pSi0 += 2; 01168 01169 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 01170 R = __QSUB16(R, T); 01171 01172 /* co2 & si2 are read from SIMD Coefficient pointer */ 01173 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 01174 01175 #ifndef ARM_MATH_BIG_ENDIAN 01176 01177 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01178 out1 = __SMUSD(C2, R) >> 16u; 01179 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01180 out2 = __SMUADX(C2, R); 01181 01182 #else 01183 01184 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01185 out1 = __SMUADX(C2, R) >> 16u; 01186 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01187 out2 = __SMUSD(__QSUB16(0, C2), R); 01188 01189 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01190 01191 /* Reading i0+fftLen/4 */ 01192 /* T = packed(yb, xb) */ 01193 T = _SIMD32_OFFSET(pSi1); 01194 T = __SHADD16(T, 0); 01195 T = __SHADD16(T, 0); 01196 01197 /* writing the butterfly processed i0 + fftLen/4 sample */ 01198 /* writing output(xc', yc') in little endian format */ 01199 _SIMD32_OFFSET(pSi1) = 01200 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01201 pSi1 += 2; 01202 01203 /* Butterfly calculations */ 01204 /* U = packed(yd, xd) */ 01205 U = _SIMD32_OFFSET(pSi3); 01206 U = __SHADD16(U, 0); 01207 U = __SHADD16(U, 0); 01208 01209 /* T = packed(yb-yd, xb-xd) */ 01210 T = __QSUB16(T, U); 01211 01212 #ifndef ARM_MATH_BIG_ENDIAN 01213 01214 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01215 R = __QSAX(S, T); 01216 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01217 S = __QASX(S, T); 01218 01219 #else 01220 01221 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01222 R = __QASX(S, T); 01223 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01224 S = __QSAX(S, T); 01225 01226 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01227 01228 /* co1 & si1 are read from SIMD Coefficient pointer */ 01229 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 01230 /* Butterfly process for the i0+fftLen/2 sample */ 01231 01232 #ifndef ARM_MATH_BIG_ENDIAN 01233 01234 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01235 out1 = __SMUSD(C1, S) >> 16u; 01236 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01237 out2 = __SMUADX(C1, S); 01238 01239 #else 01240 01241 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01242 out1 = __SMUADX(C1, S) >> 16u; 01243 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01244 out2 = __SMUSD(__QSUB16(0, C1), S); 01245 01246 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01247 01248 /* writing output(xb', yb') in little endian format */ 01249 _SIMD32_OFFSET(pSi2) = 01250 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 01251 pSi2 += 2; 01252 01253 01254 /* co3 & si3 are read from SIMD Coefficient pointer */ 01255 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 01256 /* Butterfly process for the i0+3fftLen/4 sample */ 01257 01258 #ifndef ARM_MATH_BIG_ENDIAN 01259 01260 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01261 out1 = __SMUSD(C3, R) >> 16u; 01262 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01263 out2 = __SMUADX(C3, R); 01264 01265 #else 01266 01267 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01268 out1 = __SMUADX(C3, R) >> 16u; 01269 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01270 out2 = __SMUSD(__QSUB16(0, C3), R); 01271 01272 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01273 01274 /* writing output(xd', yd') in little endian format */ 01275 _SIMD32_OFFSET(pSi3) = 01276 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01277 pSi3 += 2; 01278 01279 /* Twiddle coefficients index modifier */ 01280 ic = ic + twidCoefModifier; 01281 01282 } while(--j); 01283 /* data is in 4.11(q11) format */ 01284 01285 /* end of first stage process */ 01286 01287 01288 /* start of middle stage process */ 01289 01290 /* Twiddle coefficients index modifier */ 01291 twidCoefModifier <<= 2u; 01292 01293 /* Calculation of Middle stage */ 01294 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01295 { 01296 /* Initializations for the middle stage */ 01297 n1 = n2; 01298 n2 >>= 2u; 01299 ic = 0u; 01300 01301 for (j = 0u; j <= (n2 - 1u); j++) 01302 { 01303 /* index calculation for the coefficients */ 01304 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 01305 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 01306 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 01307 01308 /* Twiddle coefficients index modifier */ 01309 ic = ic + twidCoefModifier; 01310 01311 pSi0 = pSrc16 + 2 * j; 01312 pSi1 = pSi0 + 2 * n2; 01313 pSi2 = pSi1 + 2 * n2; 01314 pSi3 = pSi2 + 2 * n2; 01315 01316 /* Butterfly implementation */ 01317 for (i0 = j; i0 < fftLen; i0 += n1) 01318 { 01319 /* Reading i0, i0+fftLen/2 inputs */ 01320 /* Read ya (real), xa(imag) input */ 01321 T = _SIMD32_OFFSET(pSi0); 01322 01323 /* Read yc (real), xc(imag) input */ 01324 S = _SIMD32_OFFSET(pSi2); 01325 01326 /* R = packed( (ya + yc), (xa + xc)) */ 01327 R = __QADD16(T, S); 01328 01329 /* S = packed((ya - yc), (xa - xc)) */ 01330 S = __QSUB16(T, S); 01331 01332 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01333 /* Read yb (real), xb(imag) input */ 01334 T = _SIMD32_OFFSET(pSi1); 01335 01336 /* Read yd (real), xd(imag) input */ 01337 U = _SIMD32_OFFSET(pSi3); 01338 01339 /* T = packed( (yb + yd), (xb + xd)) */ 01340 T = __QADD16(T, U); 01341 01342 /* writing the butterfly processed i0 sample */ 01343 01344 /* xa' = xa + xb + xc + xd */ 01345 /* ya' = ya + yb + yc + yd */ 01346 out1 = __SHADD16(R, T); 01347 out1 = __SHADD16(out1, 0); 01348 _SIMD32_OFFSET(pSi0) = out1; 01349 pSi0 += 2 * n1; 01350 01351 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 01352 R = __SHSUB16(R, T); 01353 01354 #ifndef ARM_MATH_BIG_ENDIAN 01355 01356 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01357 out1 = __SMUSD(C2, R) >> 16u; 01358 01359 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01360 out2 = __SMUADX(C2, R); 01361 01362 #else 01363 01364 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01365 out1 = __SMUADX(R, C2) >> 16u; 01366 01367 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01368 out2 = __SMUSD(__QSUB16(0, C2), R); 01369 01370 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01371 01372 /* Reading i0+3fftLen/4 */ 01373 /* Read yb (real), xb(imag) input */ 01374 T = _SIMD32_OFFSET(pSi1); 01375 01376 /* writing the butterfly processed i0 + fftLen/4 sample */ 01377 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01378 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01379 _SIMD32_OFFSET(pSi1) = 01380 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01381 pSi1 += 2 * n1; 01382 01383 /* Butterfly calculations */ 01384 01385 /* Read yd (real), xd(imag) input */ 01386 U = _SIMD32_OFFSET(pSi3); 01387 01388 /* T = packed(yb-yd, xb-xd) */ 01389 T = __QSUB16(T, U); 01390 01391 #ifndef ARM_MATH_BIG_ENDIAN 01392 01393 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01394 R = __SHSAX(S, T); 01395 01396 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01397 S = __SHASX(S, T); 01398 01399 01400 /* Butterfly process for the i0+fftLen/2 sample */ 01401 out1 = __SMUSD(C1, S) >> 16u; 01402 out2 = __SMUADX(C1, S); 01403 01404 #else 01405 01406 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01407 R = __SHASX(S, T); 01408 01409 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01410 S = __SHSAX(S, T); 01411 01412 01413 /* Butterfly process for the i0+fftLen/2 sample */ 01414 out1 = __SMUADX(S, C1) >> 16u; 01415 out2 = __SMUSD(__QSUB16(0, C1), S); 01416 01417 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01418 01419 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01420 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01421 _SIMD32_OFFSET(pSi2) = 01422 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01423 pSi2 += 2 * n1; 01424 01425 /* Butterfly process for the i0+3fftLen/4 sample */ 01426 01427 #ifndef ARM_MATH_BIG_ENDIAN 01428 01429 out1 = __SMUSD(C3, R) >> 16u; 01430 out2 = __SMUADX(C3, R); 01431 01432 #else 01433 01434 out1 = __SMUADX(C3, R) >> 16u; 01435 out2 = __SMUSD(__QSUB16(0, C3), R); 01436 01437 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01438 01439 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01440 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01441 _SIMD32_OFFSET(pSi3) = 01442 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01443 pSi3 += 2 * n1; 01444 } 01445 } 01446 /* Twiddle coefficients index modifier */ 01447 twidCoefModifier <<= 2u; 01448 } 01449 /* end of middle stage process */ 01450 01451 /* data is in 10.6(q6) format for the 1024 point */ 01452 /* data is in 8.8(q8) format for the 256 point */ 01453 /* data is in 6.10(q10) format for the 64 point */ 01454 /* data is in 4.12(q12) format for the 16 point */ 01455 01456 /* Initializations for the last stage */ 01457 j = fftLen >> 2; 01458 01459 ptr1 = &pSrc16[0]; 01460 01461 /* start of last stage process */ 01462 01463 /* Butterfly implementation */ 01464 do 01465 { 01466 /* Read xa (real), ya(imag) input */ 01467 xaya = *__SIMD32(ptr1)++; 01468 01469 /* Read xb (real), yb(imag) input */ 01470 xbyb = *__SIMD32(ptr1)++; 01471 01472 /* Read xc (real), yc(imag) input */ 01473 xcyc = *__SIMD32(ptr1)++; 01474 01475 /* Read xd (real), yd(imag) input */ 01476 xdyd = *__SIMD32(ptr1)++; 01477 01478 /* R = packed((ya + yc), (xa + xc)) */ 01479 R = __QADD16(xaya, xcyc); 01480 01481 /* T = packed((yb + yd), (xb + xd)) */ 01482 T = __QADD16(xbyb, xdyd); 01483 01484 /* pointer updation for writing */ 01485 ptr1 = ptr1 - 8u; 01486 01487 01488 /* xa' = xa + xb + xc + xd */ 01489 /* ya' = ya + yb + yc + yd */ 01490 *__SIMD32(ptr1)++ = __SHADD16(R, T); 01491 01492 /* T = packed((yb + yd), (xb + xd)) */ 01493 T = __QADD16(xbyb, xdyd); 01494 01495 /* xc' = (xa-xb+xc-xd) */ 01496 /* yc' = (ya-yb+yc-yd) */ 01497 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 01498 01499 /* S = packed((ya - yc), (xa - xc)) */ 01500 S = __QSUB16(xaya, xcyc); 01501 01502 /* Read yd (real), xd(imag) input */ 01503 /* T = packed( (yb - yd), (xb - xd)) */ 01504 U = __QSUB16(xbyb, xdyd); 01505 01506 #ifndef ARM_MATH_BIG_ENDIAN 01507 01508 /* xb' = (xa+yb-xc-yd) */ 01509 /* yb' = (ya-xb-yc+xd) */ 01510 *__SIMD32(ptr1)++ = __SHASX(S, U); 01511 01512 01513 /* xd' = (xa-yb-xc+yd) */ 01514 /* yd' = (ya+xb-yc-xd) */ 01515 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01516 01517 #else 01518 01519 /* xb' = (xa+yb-xc-yd) */ 01520 /* yb' = (ya-xb-yc+xd) */ 01521 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01522 01523 01524 /* xd' = (xa-yb-xc+yd) */ 01525 /* yd' = (ya+xb-yc-xd) */ 01526 *__SIMD32(ptr1)++ = __SHASX(S, U); 01527 01528 01529 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01530 01531 } while(--j); 01532 01533 /* end of last stage process */ 01534 01535 /* output is in 11.5(q5) format for the 1024 point */ 01536 /* output is in 9.7(q7) format for the 256 point */ 01537 /* output is in 7.9(q9) format for the 64 point */ 01538 /* output is in 5.11(q11) format for the 16 point */ 01539 01540 01541 #else 01542 01543 /* Run the below code for Cortex-M0 */ 01544 01545 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 01546 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 01547 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 01548 01549 /* Total process is divided into three stages */ 01550 01551 /* process first stage, middle stages, & last stage */ 01552 01553 /* Initializations for the first stage */ 01554 n2 = fftLen; 01555 n1 = n2; 01556 01557 /* n2 = fftLen/4 */ 01558 n2 >>= 2u; 01559 01560 /* Index for twiddle coefficient */ 01561 ic = 0u; 01562 01563 /* Index for input read and output write */ 01564 i0 = 0u; 01565 01566 j = n2; 01567 01568 /* Input is in 1.15(q15) format */ 01569 01570 /* Start of first stage process */ 01571 do 01572 { 01573 /* Butterfly implementation */ 01574 01575 /* index calculation for the input as, */ 01576 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01577 i1 = i0 + n2; 01578 i2 = i1 + n2; 01579 i3 = i2 + n2; 01580 01581 /* Reading i0, i0+fftLen/2 inputs */ 01582 /* input is down scale by 4 to avoid overflow */ 01583 /* Read ya (real), xa(imag) input */ 01584 T0 = pSrc16[i0 * 2u] >> 2u; 01585 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 01586 /* input is down scale by 4 to avoid overflow */ 01587 /* Read yc (real), xc(imag) input */ 01588 S0 = pSrc16[i2 * 2u] >> 2u; 01589 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 01590 01591 /* R0 = (ya + yc), R1 = (xa + xc) */ 01592 R0 = __SSAT(T0 + S0, 16u); 01593 R1 = __SSAT(T1 + S1, 16u); 01594 /* S0 = (ya - yc), S1 = (xa - xc) */ 01595 S0 = __SSAT(T0 - S0, 16u); 01596 S1 = __SSAT(T1 - S1, 16u); 01597 01598 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01599 /* input is down scale by 4 to avoid overflow */ 01600 /* Read yb (real), xb(imag) input */ 01601 T0 = pSrc16[i1 * 2u] >> 2u; 01602 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01603 /* Read yd (real), xd(imag) input */ 01604 /* input is down scale by 4 to avoid overflow */ 01605 U0 = pSrc16[i3 * 2u] >> 2u; 01606 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01607 01608 /* T0 = (yb + yd), T1 = (xb + xd) */ 01609 T0 = __SSAT(T0 + U0, 16u); 01610 T1 = __SSAT(T1 + U1, 16u); 01611 01612 /* writing the butterfly processed i0 sample */ 01613 /* xa' = xa + xb + xc + xd */ 01614 /* ya' = ya + yb + yc + yd */ 01615 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01616 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01617 01618 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */ 01619 R0 = __SSAT(R0 - T0, 16u); 01620 R1 = __SSAT(R1 - T1, 16u); 01621 /* co2 & si2 are read from Coefficient pointer */ 01622 Co2 = pCoef16[2u * ic * 2u]; 01623 Si2 = pCoef16[(2u * ic * 2u) + 1u]; 01624 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01625 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16u); 01626 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01627 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16u); 01628 01629 /* Reading i0+fftLen/4 */ 01630 /* input is down scale by 4 to avoid overflow */ 01631 /* T0 = yb, T1 = xb */ 01632 T0 = pSrc16[i1 * 2u] >> 2u; 01633 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01634 01635 /* writing the butterfly processed i0 + fftLen/4 sample */ 01636 /* writing output(xc', yc') in little endian format */ 01637 pSrc16[i1 * 2u] = out1; 01638 pSrc16[(i1 * 2u) + 1u] = out2; 01639 01640 /* Butterfly calculations */ 01641 /* input is down scale by 4 to avoid overflow */ 01642 /* U0 = yd, U1 = xd) */ 01643 U0 = pSrc16[i3 * 2u] >> 2u; 01644 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01645 01646 /* T0 = yb-yd, T1 = xb-xd) */ 01647 T0 = __SSAT(T0 - U0, 16u); 01648 T1 = __SSAT(T1 - U1, 16u); 01649 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01650 R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16); 01651 R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16); 01652 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01653 S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); 01654 S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); 01655 01656 /* co1 & si1 are read from Coefficient pointer */ 01657 Co1 = pCoef16[ic * 2u]; 01658 Si1 = pCoef16[(ic * 2u) + 1u]; 01659 /* Butterfly process for the i0+fftLen/2 sample */ 01660 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01661 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u); 01662 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01663 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u); 01664 /* writing output(xb', yb') in little endian format */ 01665 pSrc16[i2 * 2u] = out1; 01666 pSrc16[(i2 * 2u) + 1u] = out2; 01667 01668 /* Co3 & si3 are read from Coefficient pointer */ 01669 Co3 = pCoef16[3u * ic * 2u]; 01670 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01671 /* Butterfly process for the i0+3fftLen/4 sample */ 01672 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01673 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u); 01674 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01675 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u); 01676 /* writing output(xd', yd') in little endian format */ 01677 pSrc16[i3 * 2u] = out1; 01678 pSrc16[(i3 * 2u) + 1u] = out2; 01679 01680 /* Twiddle coefficients index modifier */ 01681 ic = ic + twidCoefModifier; 01682 01683 /* Updating input index */ 01684 i0 = i0 + 1u; 01685 01686 } while(--j); 01687 01688 /* End of first stage process */ 01689 01690 /* data is in 4.11(q11) format */ 01691 01692 01693 /* Start of Middle stage process */ 01694 01695 /* Twiddle coefficients index modifier */ 01696 twidCoefModifier <<= 2u; 01697 01698 /* Calculation of Middle stage */ 01699 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01700 { 01701 /* Initializations for the middle stage */ 01702 n1 = n2; 01703 n2 >>= 2u; 01704 ic = 0u; 01705 01706 for (j = 0u; j <= (n2 - 1u); j++) 01707 { 01708 /* index calculation for the coefficients */ 01709 Co1 = pCoef16[ic * 2u]; 01710 Si1 = pCoef16[(ic * 2u) + 1u]; 01711 Co2 = pCoef16[2u * ic * 2u]; 01712 Si2 = pCoef16[2u * ic * 2u + 1u]; 01713 Co3 = pCoef16[3u * ic * 2u]; 01714 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01715 01716 /* Twiddle coefficients index modifier */ 01717 ic = ic + twidCoefModifier; 01718 01719 /* Butterfly implementation */ 01720 for (i0 = j; i0 < fftLen; i0 += n1) 01721 { 01722 /* index calculation for the input as, */ 01723 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01724 i1 = i0 + n2; 01725 i2 = i1 + n2; 01726 i3 = i2 + n2; 01727 01728 /* Reading i0, i0+fftLen/2 inputs */ 01729 /* Read ya (real), xa(imag) input */ 01730 T0 = pSrc16[i0 * 2u]; 01731 T1 = pSrc16[(i0 * 2u) + 1u]; 01732 01733 /* Read yc (real), xc(imag) input */ 01734 S0 = pSrc16[i2 * 2u]; 01735 S1 = pSrc16[(i2 * 2u) + 1u]; 01736 01737 01738 /* R0 = (ya + yc), R1 = (xa + xc) */ 01739 R0 = __SSAT(T0 + S0, 16u); 01740 R1 = __SSAT(T1 + S1, 16u); 01741 /* S0 = (ya - yc), S1 = (xa - xc) */ 01742 S0 = __SSAT(T0 - S0, 16u); 01743 S1 = __SSAT(T1 - S1, 16u); 01744 01745 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01746 /* Read yb (real), xb(imag) input */ 01747 T0 = pSrc16[i1 * 2u]; 01748 T1 = pSrc16[(i1 * 2u) + 1u]; 01749 01750 /* Read yd (real), xd(imag) input */ 01751 U0 = pSrc16[i3 * 2u]; 01752 U1 = pSrc16[(i3 * 2u) + 1u]; 01753 01754 /* T0 = (yb + yd), T1 = (xb + xd) */ 01755 T0 = __SSAT(T0 + U0, 16u); 01756 T1 = __SSAT(T1 + U1, 16u); 01757 01758 /* writing the butterfly processed i0 sample */ 01759 /* xa' = xa + xb + xc + xd */ 01760 /* ya' = ya + yb + yc + yd */ 01761 pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 01762 pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 01763 01764 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01765 R0 = (R0 >> 1u) - (T0 >> 1u); 01766 R1 = (R1 >> 1u) - (T1 >> 1u); 01767 01768 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 01769 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16); 01770 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01771 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16); 01772 01773 /* Reading i0+3fftLen/4 */ 01774 /* Read yb (real), xb(imag) input */ 01775 T0 = pSrc16[i1 * 2u]; 01776 T1 = pSrc16[(i1 * 2u) + 1u]; 01777 01778 /* writing the butterfly processed i0 + fftLen/4 sample */ 01779 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01780 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01781 pSrc16[i1 * 2u] = out1; 01782 pSrc16[(i1 * 2u) + 1u] = out2; 01783 01784 /* Butterfly calculations */ 01785 /* Read yd (real), xd(imag) input */ 01786 U0 = pSrc16[i3 * 2u]; 01787 U1 = pSrc16[(i3 * 2u) + 1u]; 01788 01789 /* T0 = yb-yd, T1 = xb-xd) */ 01790 T0 = __SSAT(T0 - U0, 16u); 01791 T1 = __SSAT(T1 - U1, 16u); 01792 01793 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01794 R0 = (S0 >> 1u) + (T1 >> 1u); 01795 R1 = (S1 >> 1u) - (T0 >> 1u); 01796 01797 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01798 S0 = (S0 >> 1u) - (T1 >> 1u); 01799 S1 = (S1 >> 1u) + (T0 >> 1u); 01800 01801 /* Butterfly process for the i0+fftLen/2 sample */ 01802 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u); 01803 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u); 01804 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01805 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01806 pSrc16[i2 * 2u] = out1; 01807 pSrc16[(i2 * 2u) + 1u] = out2; 01808 01809 /* Butterfly process for the i0+3fftLen/4 sample */ 01810 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u); 01811 01812 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u); 01813 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01814 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01815 pSrc16[i3 * 2u] = out1; 01816 pSrc16[(i3 * 2u) + 1u] = out2; 01817 01818 01819 } 01820 } 01821 /* Twiddle coefficients index modifier */ 01822 twidCoefModifier <<= 2u; 01823 } 01824 /* End of Middle stages process */ 01825 01826 01827 /* data is in 10.6(q6) format for the 1024 point */ 01828 /* data is in 8.8(q8) format for the 256 point */ 01829 /* data is in 6.10(q10) format for the 64 point */ 01830 /* data is in 4.12(q12) format for the 16 point */ 01831 01832 /* start of last stage process */ 01833 01834 01835 /* Initializations for the last stage */ 01836 n1 = n2; 01837 n2 >>= 2u; 01838 01839 /* Butterfly implementation */ 01840 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 01841 { 01842 /* index calculation for the input as, */ 01843 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01844 i1 = i0 + n2; 01845 i2 = i1 + n2; 01846 i3 = i2 + n2; 01847 01848 /* Reading i0, i0+fftLen/2 inputs */ 01849 /* Read ya (real), xa(imag) input */ 01850 T0 = pSrc16[i0 * 2u]; 01851 T1 = pSrc16[(i0 * 2u) + 1u]; 01852 /* Read yc (real), xc(imag) input */ 01853 S0 = pSrc16[i2 * 2u]; 01854 S1 = pSrc16[(i2 * 2u) + 1u]; 01855 01856 /* R0 = (ya + yc), R1 = (xa + xc) */ 01857 R0 = __SSAT(T0 + S0, 16u); 01858 R1 = __SSAT(T1 + S1, 16u); 01859 /* S0 = (ya - yc), S1 = (xa - xc) */ 01860 S0 = __SSAT(T0 - S0, 16u); 01861 S1 = __SSAT(T1 - S1, 16u); 01862 01863 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01864 /* Read yb (real), xb(imag) input */ 01865 T0 = pSrc16[i1 * 2u]; 01866 T1 = pSrc16[(i1 * 2u) + 1u]; 01867 /* Read yd (real), xd(imag) input */ 01868 U0 = pSrc16[i3 * 2u]; 01869 U1 = pSrc16[(i3 * 2u) + 1u]; 01870 01871 /* T0 = (yb + yd), T1 = (xb + xd) */ 01872 T0 = __SSAT(T0 + U0, 16u); 01873 T1 = __SSAT(T1 + U1, 16u); 01874 01875 /* writing the butterfly processed i0 sample */ 01876 /* xa' = xa + xb + xc + xd */ 01877 /* ya' = ya + yb + yc + yd */ 01878 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01879 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01880 01881 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01882 R0 = (R0 >> 1u) - (T0 >> 1u); 01883 R1 = (R1 >> 1u) - (T1 >> 1u); 01884 01885 /* Read yb (real), xb(imag) input */ 01886 T0 = pSrc16[i1 * 2u]; 01887 T1 = pSrc16[(i1 * 2u) + 1u]; 01888 01889 /* writing the butterfly processed i0 + fftLen/4 sample */ 01890 /* xc' = (xa-xb+xc-xd) */ 01891 /* yc' = (ya-yb+yc-yd) */ 01892 pSrc16[i1 * 2u] = R0; 01893 pSrc16[(i1 * 2u) + 1u] = R1; 01894 01895 /* Read yd (real), xd(imag) input */ 01896 U0 = pSrc16[i3 * 2u]; 01897 U1 = pSrc16[(i3 * 2u) + 1u]; 01898 /* T0 = (yb - yd), T1 = (xb - xd) */ 01899 T0 = __SSAT(T0 - U0, 16u); 01900 T1 = __SSAT(T1 - U1, 16u); 01901 01902 /* writing the butterfly processed i0 + fftLen/2 sample */ 01903 /* xb' = (xa-yb-xc+yd) */ 01904 /* yb' = (ya+xb-yc-xd) */ 01905 pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u); 01906 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 01907 01908 01909 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01910 /* xd' = (xa+yb-xc-yd) */ 01911 /* yd' = (ya-xb-yc+xd) */ 01912 pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u); 01913 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 01914 } 01915 /* end of last stage process */ 01916 01917 /* output is in 11.5(q5) format for the 1024 point */ 01918 /* output is in 9.7(q7) format for the 256 point */ 01919 /* output is in 7.9(q9) format for the 64 point */ 01920 /* output is in 5.11(q11) format for the 16 point */ 01921 01922 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 01923 01924 }
Generated on Tue Jul 12 2022 11:59:15 by 1.7.2