Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_cfft_radix4_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q15.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 00045 void arm_radix4_butterfly_q15( 00046 q15_t * pSrc16, 00047 uint32_t fftLen, 00048 q15_t * pCoef16, 00049 uint32_t twidCoefModifier); 00050 00051 void arm_radix4_butterfly_inverse_q15( 00052 q15_t * pSrc16, 00053 uint32_t fftLen, 00054 q15_t * pCoef16, 00055 uint32_t twidCoefModifier); 00056 00057 void arm_bitreversal_q15( 00058 q15_t * pSrc, 00059 uint32_t fftLen, 00060 uint16_t bitRevFactor, 00061 uint16_t * pBitRevTab); 00062 00063 /** 00064 * @ingroup groupTransforms 00065 */ 00066 00067 /** 00068 * @addtogroup ComplexFFT 00069 * @{ 00070 */ 00071 00072 00073 /** 00074 * @details 00075 * @brief Processing function for the Q15 CFFT/CIFFT. 00076 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed 00077 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure. 00078 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place. 00079 * @return none. 00080 * 00081 * \par Input and output formats: 00082 * \par 00083 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00084 * Hence the output format is different for different FFT sizes. 00085 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00086 * \par 00087 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT" 00088 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT" 00089 */ 00090 00091 void arm_cfft_radix4_q15( 00092 const arm_cfft_radix4_instance_q15 * S, 00093 q15_t * pSrc) 00094 { 00095 if(S->ifftFlag == 1u) 00096 { 00097 /* Complex IFFT radix-4 */ 00098 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, 00099 S->twidCoefModifier); 00100 } 00101 else 00102 { 00103 /* Complex FFT radix-4 */ 00104 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, 00105 S->twidCoefModifier); 00106 } 00107 00108 if(S->bitReverseFlag == 1u) 00109 { 00110 /* Bit Reversal */ 00111 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00112 } 00113 00114 } 00115 00116 /** 00117 * @} end of ComplexFFT group 00118 */ 00119 00120 /* 00121 * Radix-4 FFT algorithm used is : 00122 * 00123 * Input real and imaginary data: 00124 * x(n) = xa + j * ya 00125 * x(n+N/4 ) = xb + j * yb 00126 * x(n+N/2 ) = xc + j * yc 00127 * x(n+3N 4) = xd + j * yd 00128 * 00129 * 00130 * Output real and imaginary data: 00131 * x(4r) = xa'+ j * ya' 00132 * x(4r+1) = xb'+ j * yb' 00133 * x(4r+2) = xc'+ j * yc' 00134 * x(4r+3) = xd'+ j * yd' 00135 * 00136 * 00137 * Twiddle factors for radix-4 FFT: 00138 * Wn = co1 + j * (- si1) 00139 * W2n = co2 + j * (- si2) 00140 * W3n = co3 + j * (- si3) 00141 00142 * The real and imaginary output values for the radix-4 butterfly are 00143 * xa' = xa + xb + xc + xd 00144 * ya' = ya + yb + yc + yd 00145 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00146 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00147 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00148 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00149 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00150 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00151 * 00152 */ 00153 00154 /** 00155 * @brief Core function for the Q15 CFFT butterfly process. 00156 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 00157 * @param[in] fftLen length of the FFT. 00158 * @param[in] *pCoef16 points to twiddle coefficient buffer. 00159 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00160 * @return none. 00161 */ 00162 00163 void arm_radix4_butterfly_q15( 00164 q15_t * pSrc16, 00165 uint32_t fftLen, 00166 q15_t * pCoef16, 00167 uint32_t twidCoefModifier) 00168 { 00169 00170 #ifndef ARM_MATH_CM0_FAMILY 00171 00172 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00173 00174 q31_t R, S, T, U; 00175 q31_t C1, C2, C3, out1, out2; 00176 uint32_t n1, n2, ic, i0, j, k; 00177 00178 q15_t *ptr1; 00179 q15_t *pSi0; 00180 q15_t *pSi1; 00181 q15_t *pSi2; 00182 q15_t *pSi3; 00183 00184 q31_t xaya, xbyb, xcyc, xdyd; 00185 00186 /* Total process is divided into three stages */ 00187 00188 /* process first stage, middle stages, & last stage */ 00189 00190 /* Initializations for the first stage */ 00191 n2 = fftLen; 00192 n1 = n2; 00193 00194 /* n2 = fftLen/4 */ 00195 n2 >>= 2u; 00196 00197 /* Index for twiddle coefficient */ 00198 ic = 0u; 00199 00200 /* Index for input read and output write */ 00201 j = n2; 00202 00203 pSi0 = pSrc16; 00204 pSi1 = pSi0 + 2 * n2; 00205 pSi2 = pSi1 + 2 * n2; 00206 pSi3 = pSi2 + 2 * n2; 00207 00208 /* Input is in 1.15(q15) format */ 00209 00210 /* start of first stage process */ 00211 do 00212 { 00213 /* Butterfly implementation */ 00214 00215 /* Reading i0, i0+fftLen/2 inputs */ 00216 /* Read ya (real), xa(imag) input */ 00217 T = _SIMD32_OFFSET(pSi0); 00218 T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1 00219 T = __SHADD16(T, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles 00220 //in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles 00221 //T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00222 00223 /* Read yc (real), xc(imag) input */ 00224 S = _SIMD32_OFFSET(pSi2); 00225 S = __SHADD16(S, 0); 00226 S = __SHADD16(S, 0); 00227 00228 /* R = packed((ya + yc), (xa + xc) ) */ 00229 R = __QADD16(T, S); 00230 00231 /* S = packed((ya - yc), (xa - xc) ) */ 00232 S = __QSUB16(T, S); 00233 00234 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00235 /* Read yb (real), xb(imag) input */ 00236 T = _SIMD32_OFFSET(pSi1); 00237 T = __SHADD16(T, 0); 00238 T = __SHADD16(T, 0); 00239 00240 /* Read yd (real), xd(imag) input */ 00241 U = _SIMD32_OFFSET(pSi3); 00242 U = __SHADD16(U, 0); 00243 U = __SHADD16(U, 0); 00244 00245 /* T = packed((yb + yd), (xb + xd) ) */ 00246 T = __QADD16(T, U); 00247 00248 /* writing the butterfly processed i0 sample */ 00249 /* xa' = xa + xb + xc + xd */ 00250 /* ya' = ya + yb + yc + yd */ 00251 _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); 00252 pSi0 += 2; 00253 00254 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 00255 R = __QSUB16(R, T); 00256 00257 /* co2 & si2 are read from SIMD Coefficient pointer */ 00258 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 00259 00260 #ifndef ARM_MATH_BIG_ENDIAN 00261 00262 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00263 out1 = __SMUAD(C2, R) >> 16u; 00264 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00265 out2 = __SMUSDX(C2, R); 00266 00267 #else 00268 00269 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00270 out1 = __SMUSDX(R, C2) >> 16u; 00271 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00272 out2 = __SMUAD(C2, R); 00273 00274 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00275 00276 /* Reading i0+fftLen/4 */ 00277 /* T = packed(yb, xb) */ 00278 T = _SIMD32_OFFSET(pSi1); 00279 T = __SHADD16(T, 0); 00280 T = __SHADD16(T, 0); 00281 00282 /* writing the butterfly processed i0 + fftLen/4 sample */ 00283 /* writing output(xc', yc') in little endian format */ 00284 _SIMD32_OFFSET(pSi1) = 00285 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00286 pSi1 += 2; 00287 00288 /* Butterfly calculations */ 00289 /* U = packed(yd, xd) */ 00290 U = _SIMD32_OFFSET(pSi3); 00291 U = __SHADD16(U, 0); 00292 U = __SHADD16(U, 0); 00293 00294 /* T = packed(yb-yd, xb-xd) */ 00295 T = __QSUB16(T, U); 00296 00297 #ifndef ARM_MATH_BIG_ENDIAN 00298 00299 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00300 R = __QASX(S, T); 00301 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00302 S = __QSAX(S, T); 00303 00304 #else 00305 00306 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00307 R = __QSAX(S, T); 00308 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00309 S = __QASX(S, T); 00310 00311 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00312 00313 /* co1 & si1 are read from SIMD Coefficient pointer */ 00314 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 00315 /* Butterfly process for the i0+fftLen/2 sample */ 00316 00317 #ifndef ARM_MATH_BIG_ENDIAN 00318 00319 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00320 out1 = __SMUAD(C1, S) >> 16u; 00321 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00322 out2 = __SMUSDX(C1, S); 00323 00324 #else 00325 00326 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00327 out1 = __SMUSDX(S, C1) >> 16u; 00328 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00329 out2 = __SMUAD(C1, S); 00330 00331 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00332 00333 /* writing output(xb', yb') in little endian format */ 00334 _SIMD32_OFFSET(pSi2) = 00335 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 00336 pSi2 += 2; 00337 00338 00339 /* co3 & si3 are read from SIMD Coefficient pointer */ 00340 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 00341 /* Butterfly process for the i0+3fftLen/4 sample */ 00342 00343 #ifndef ARM_MATH_BIG_ENDIAN 00344 00345 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00346 out1 = __SMUAD(C3, R) >> 16u; 00347 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00348 out2 = __SMUSDX(C3, R); 00349 00350 #else 00351 00352 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00353 out1 = __SMUSDX(R, C3) >> 16u; 00354 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00355 out2 = __SMUAD(C3, R); 00356 00357 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00358 00359 /* writing output(xd', yd') in little endian format */ 00360 _SIMD32_OFFSET(pSi3) = 00361 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00362 pSi3 += 2; 00363 00364 /* Twiddle coefficients index modifier */ 00365 ic = ic + twidCoefModifier; 00366 00367 } while(--j); 00368 /* data is in 4.11(q11) format */ 00369 00370 /* end of first stage process */ 00371 00372 00373 /* start of middle stage process */ 00374 00375 /* Twiddle coefficients index modifier */ 00376 twidCoefModifier <<= 2u; 00377 00378 /* Calculation of Middle stage */ 00379 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00380 { 00381 /* Initializations for the middle stage */ 00382 n1 = n2; 00383 n2 >>= 2u; 00384 ic = 0u; 00385 00386 for (j = 0u; j <= (n2 - 1u); j++) 00387 { 00388 /* index calculation for the coefficients */ 00389 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 00390 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 00391 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 00392 00393 /* Twiddle coefficients index modifier */ 00394 ic = ic + twidCoefModifier; 00395 00396 pSi0 = pSrc16 + 2 * j; 00397 pSi1 = pSi0 + 2 * n2; 00398 pSi2 = pSi1 + 2 * n2; 00399 pSi3 = pSi2 + 2 * n2; 00400 00401 /* Butterfly implementation */ 00402 for (i0 = j; i0 < fftLen; i0 += n1) 00403 { 00404 /* Reading i0, i0+fftLen/2 inputs */ 00405 /* Read ya (real), xa(imag) input */ 00406 T = _SIMD32_OFFSET(pSi0); 00407 00408 /* Read yc (real), xc(imag) input */ 00409 S = _SIMD32_OFFSET(pSi2); 00410 00411 /* R = packed( (ya + yc), (xa + xc)) */ 00412 R = __QADD16(T, S); 00413 00414 /* S = packed((ya - yc), (xa - xc)) */ 00415 S = __QSUB16(T, S); 00416 00417 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00418 /* Read yb (real), xb(imag) input */ 00419 T = _SIMD32_OFFSET(pSi1); 00420 00421 /* Read yd (real), xd(imag) input */ 00422 U = _SIMD32_OFFSET(pSi3); 00423 00424 /* T = packed( (yb + yd), (xb + xd)) */ 00425 T = __QADD16(T, U); 00426 00427 /* writing the butterfly processed i0 sample */ 00428 00429 /* xa' = xa + xb + xc + xd */ 00430 /* ya' = ya + yb + yc + yd */ 00431 out1 = __SHADD16(R, T); 00432 out1 = __SHADD16(out1, 0); 00433 _SIMD32_OFFSET(pSi0) = out1; 00434 pSi0 += 2 * n1; 00435 00436 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00437 R = __SHSUB16(R, T); 00438 00439 #ifndef ARM_MATH_BIG_ENDIAN 00440 00441 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00442 out1 = __SMUAD(C2, R) >> 16u; 00443 00444 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00445 out2 = __SMUSDX(C2, R); 00446 00447 #else 00448 00449 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00450 out1 = __SMUSDX(R, C2) >> 16u; 00451 00452 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00453 out2 = __SMUAD(C2, R); 00454 00455 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00456 00457 /* Reading i0+3fftLen/4 */ 00458 /* Read yb (real), xb(imag) input */ 00459 T = _SIMD32_OFFSET(pSi1); 00460 00461 /* writing the butterfly processed i0 + fftLen/4 sample */ 00462 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00463 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00464 _SIMD32_OFFSET(pSi1) = 00465 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00466 pSi1 += 2 * n1; 00467 00468 /* Butterfly calculations */ 00469 00470 /* Read yd (real), xd(imag) input */ 00471 U = _SIMD32_OFFSET(pSi3); 00472 00473 /* T = packed(yb-yd, xb-xd) */ 00474 T = __QSUB16(T, U); 00475 00476 #ifndef ARM_MATH_BIG_ENDIAN 00477 00478 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00479 R = __SHASX(S, T); 00480 00481 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00482 S = __SHSAX(S, T); 00483 00484 00485 /* Butterfly process for the i0+fftLen/2 sample */ 00486 out1 = __SMUAD(C1, S) >> 16u; 00487 out2 = __SMUSDX(C1, S); 00488 00489 #else 00490 00491 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00492 R = __SHSAX(S, T); 00493 00494 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00495 S = __SHASX(S, T); 00496 00497 00498 /* Butterfly process for the i0+fftLen/2 sample */ 00499 out1 = __SMUSDX(S, C1) >> 16u; 00500 out2 = __SMUAD(C1, S); 00501 00502 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00503 00504 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00505 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00506 _SIMD32_OFFSET(pSi2) = 00507 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00508 pSi2 += 2 * n1; 00509 00510 /* Butterfly process for the i0+3fftLen/4 sample */ 00511 00512 #ifndef ARM_MATH_BIG_ENDIAN 00513 00514 out1 = __SMUAD(C3, R) >> 16u; 00515 out2 = __SMUSDX(C3, R); 00516 00517 #else 00518 00519 out1 = __SMUSDX(R, C3) >> 16u; 00520 out2 = __SMUAD(C3, R); 00521 00522 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00523 00524 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00525 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00526 _SIMD32_OFFSET(pSi3) = 00527 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00528 pSi3 += 2 * n1; 00529 } 00530 } 00531 /* Twiddle coefficients index modifier */ 00532 twidCoefModifier <<= 2u; 00533 } 00534 /* end of middle stage process */ 00535 00536 00537 /* data is in 10.6(q6) format for the 1024 point */ 00538 /* data is in 8.8(q8) format for the 256 point */ 00539 /* data is in 6.10(q10) format for the 64 point */ 00540 /* data is in 4.12(q12) format for the 16 point */ 00541 00542 /* Initializations for the last stage */ 00543 j = fftLen >> 2; 00544 00545 ptr1 = &pSrc16[0]; 00546 00547 /* start of last stage process */ 00548 00549 /* Butterfly implementation */ 00550 do 00551 { 00552 /* Read xa (real), ya(imag) input */ 00553 xaya = *__SIMD32(ptr1)++; 00554 00555 /* Read xb (real), yb(imag) input */ 00556 xbyb = *__SIMD32(ptr1)++; 00557 00558 /* Read xc (real), yc(imag) input */ 00559 xcyc = *__SIMD32(ptr1)++; 00560 00561 /* Read xd (real), yd(imag) input */ 00562 xdyd = *__SIMD32(ptr1)++; 00563 00564 /* R = packed((ya + yc), (xa + xc)) */ 00565 R = __QADD16(xaya, xcyc); 00566 00567 /* T = packed((yb + yd), (xb + xd)) */ 00568 T = __QADD16(xbyb, xdyd); 00569 00570 /* pointer updation for writing */ 00571 ptr1 = ptr1 - 8u; 00572 00573 00574 /* xa' = xa + xb + xc + xd */ 00575 /* ya' = ya + yb + yc + yd */ 00576 *__SIMD32(ptr1)++ = __SHADD16(R, T); 00577 00578 /* T = packed((yb + yd), (xb + xd)) */ 00579 T = __QADD16(xbyb, xdyd); 00580 00581 /* xc' = (xa-xb+xc-xd) */ 00582 /* yc' = (ya-yb+yc-yd) */ 00583 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 00584 00585 /* S = packed((ya - yc), (xa - xc)) */ 00586 S = __QSUB16(xaya, xcyc); 00587 00588 /* Read yd (real), xd(imag) input */ 00589 /* T = packed( (yb - yd), (xb - xd)) */ 00590 U = __QSUB16(xbyb, xdyd); 00591 00592 #ifndef ARM_MATH_BIG_ENDIAN 00593 00594 /* xb' = (xa+yb-xc-yd) */ 00595 /* yb' = (ya-xb-yc+xd) */ 00596 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00597 00598 00599 /* xd' = (xa-yb-xc+yd) */ 00600 /* yd' = (ya+xb-yc-xd) */ 00601 *__SIMD32(ptr1)++ = __SHASX(S, U); 00602 00603 #else 00604 00605 /* xb' = (xa+yb-xc-yd) */ 00606 /* yb' = (ya-xb-yc+xd) */ 00607 *__SIMD32(ptr1)++ = __SHASX(S, U); 00608 00609 00610 /* xd' = (xa-yb-xc+yd) */ 00611 /* yd' = (ya+xb-yc-xd) */ 00612 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00613 00614 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00615 00616 } while(--j); 00617 00618 /* end of last stage process */ 00619 00620 /* output is in 11.5(q5) format for the 1024 point */ 00621 /* output is in 9.7(q7) format for the 256 point */ 00622 /* output is in 7.9(q9) format for the 64 point */ 00623 /* output is in 5.11(q11) format for the 16 point */ 00624 00625 00626 #else 00627 00628 /* Run the below code for Cortex-M0 */ 00629 00630 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 00631 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 00632 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00633 00634 /* Total process is divided into three stages */ 00635 00636 /* process first stage, middle stages, & last stage */ 00637 00638 /* Initializations for the first stage */ 00639 n2 = fftLen; 00640 n1 = n2; 00641 00642 /* n2 = fftLen/4 */ 00643 n2 >>= 2u; 00644 00645 /* Index for twiddle coefficient */ 00646 ic = 0u; 00647 00648 /* Index for input read and output write */ 00649 i0 = 0u; 00650 j = n2; 00651 00652 /* Input is in 1.15(q15) format */ 00653 00654 /* start of first stage process */ 00655 do 00656 { 00657 /* Butterfly implementation */ 00658 00659 /* index calculation for the input as, */ 00660 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00661 i1 = i0 + n2; 00662 i2 = i1 + n2; 00663 i3 = i2 + n2; 00664 00665 /* Reading i0, i0+fftLen/2 inputs */ 00666 00667 /* input is down scale by 4 to avoid overflow */ 00668 /* Read ya (real), xa(imag) input */ 00669 T0 = pSrc16[i0 * 2u] >> 2u; 00670 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 00671 00672 /* input is down scale by 4 to avoid overflow */ 00673 /* Read yc (real), xc(imag) input */ 00674 S0 = pSrc16[i2 * 2u] >> 2u; 00675 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 00676 00677 /* R0 = (ya + yc) */ 00678 R0 = __SSAT(T0 + S0, 16u); 00679 /* R1 = (xa + xc) */ 00680 R1 = __SSAT(T1 + S1, 16u); 00681 00682 /* S0 = (ya - yc) */ 00683 S0 = __SSAT(T0 - S0, 16); 00684 /* S1 = (xa - xc) */ 00685 S1 = __SSAT(T1 - S1, 16); 00686 00687 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00688 /* input is down scale by 4 to avoid overflow */ 00689 /* Read yb (real), xb(imag) input */ 00690 T0 = pSrc16[i1 * 2u] >> 2u; 00691 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 00692 00693 /* input is down scale by 4 to avoid overflow */ 00694 /* Read yd (real), xd(imag) input */ 00695 U0 = pSrc16[i3 * 2u] >> 2u; 00696 U1 = pSrc16[(i3 * 2u) + 1] >> 2u; 00697 00698 /* T0 = (yb + yd) */ 00699 T0 = __SSAT(T0 + U0, 16u); 00700 /* T1 = (xb + xd) */ 00701 T1 = __SSAT(T1 + U1, 16u); 00702 00703 /* writing the butterfly processed i0 sample */ 00704 /* ya' = ya + yb + yc + yd */ 00705 /* xa' = xa + xb + xc + xd */ 00706 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00707 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00708 00709 /* R0 = (ya + yc) - (yb + yd) */ 00710 /* R1 = (xa + xc) - (xb + xd) */ 00711 R0 = __SSAT(R0 - T0, 16u); 00712 R1 = __SSAT(R1 - T1, 16u); 00713 00714 /* co2 & si2 are read from Coefficient pointer */ 00715 Co2 = pCoef16[2u * ic * 2u]; 00716 Si2 = pCoef16[(2u * ic * 2u) + 1]; 00717 00718 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00719 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u); 00720 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00721 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u); 00722 00723 /* Reading i0+fftLen/4 */ 00724 /* input is down scale by 4 to avoid overflow */ 00725 /* T0 = yb, T1 = xb */ 00726 T0 = pSrc16[i1 * 2u] >> 2; 00727 T1 = pSrc16[(i1 * 2u) + 1] >> 2; 00728 00729 /* writing the butterfly processed i0 + fftLen/4 sample */ 00730 /* writing output(xc', yc') in little endian format */ 00731 pSrc16[i1 * 2u] = out1; 00732 pSrc16[(i1 * 2u) + 1] = out2; 00733 00734 /* Butterfly calculations */ 00735 /* input is down scale by 4 to avoid overflow */ 00736 /* U0 = yd, U1 = xd */ 00737 U0 = pSrc16[i3 * 2u] >> 2; 00738 U1 = pSrc16[(i3 * 2u) + 1] >> 2; 00739 /* T0 = yb-yd */ 00740 T0 = __SSAT(T0 - U0, 16); 00741 /* T1 = xb-xd */ 00742 T1 = __SSAT(T1 - U1, 16); 00743 00744 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */ 00745 R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); 00746 R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); 00747 00748 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */ 00749 S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16u); 00750 S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16u); 00751 00752 /* co1 & si1 are read from Coefficient pointer */ 00753 Co1 = pCoef16[ic * 2u]; 00754 Si1 = pCoef16[(ic * 2u) + 1]; 00755 /* Butterfly process for the i0+fftLen/2 sample */ 00756 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00757 out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16); 00758 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00759 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16); 00760 00761 /* writing output(xb', yb') in little endian format */ 00762 pSrc16[i2 * 2u] = out1; 00763 pSrc16[(i2 * 2u) + 1] = out2; 00764 00765 /* Co3 & si3 are read from Coefficient pointer */ 00766 Co3 = pCoef16[3u * (ic * 2u)]; 00767 Si3 = pCoef16[(3u * (ic * 2u)) + 1]; 00768 /* Butterfly process for the i0+3fftLen/4 sample */ 00769 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00770 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u); 00771 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00772 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u); 00773 /* writing output(xd', yd') in little endian format */ 00774 pSrc16[i3 * 2u] = out1; 00775 pSrc16[(i3 * 2u) + 1] = out2; 00776 00777 /* Twiddle coefficients index modifier */ 00778 ic = ic + twidCoefModifier; 00779 00780 /* Updating input index */ 00781 i0 = i0 + 1u; 00782 00783 } while(--j); 00784 /* data is in 4.11(q11) format */ 00785 00786 /* end of first stage process */ 00787 00788 00789 /* start of middle stage process */ 00790 00791 /* Twiddle coefficients index modifier */ 00792 twidCoefModifier <<= 2u; 00793 00794 /* Calculation of Middle stage */ 00795 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00796 { 00797 /* Initializations for the middle stage */ 00798 n1 = n2; 00799 n2 >>= 2u; 00800 ic = 0u; 00801 00802 for (j = 0u; j <= (n2 - 1u); j++) 00803 { 00804 /* index calculation for the coefficients */ 00805 Co1 = pCoef16[ic * 2u]; 00806 Si1 = pCoef16[(ic * 2u) + 1u]; 00807 Co2 = pCoef16[2u * (ic * 2u)]; 00808 Si2 = pCoef16[(2u * (ic * 2u)) + 1u]; 00809 Co3 = pCoef16[3u * (ic * 2u)]; 00810 Si3 = pCoef16[(3u * (ic * 2u)) + 1u]; 00811 00812 /* Twiddle coefficients index modifier */ 00813 ic = ic + twidCoefModifier; 00814 00815 /* Butterfly implementation */ 00816 for (i0 = j; i0 < fftLen; i0 += n1) 00817 { 00818 /* index calculation for the input as, */ 00819 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00820 i1 = i0 + n2; 00821 i2 = i1 + n2; 00822 i3 = i2 + n2; 00823 00824 /* Reading i0, i0+fftLen/2 inputs */ 00825 /* Read ya (real), xa(imag) input */ 00826 T0 = pSrc16[i0 * 2u]; 00827 T1 = pSrc16[(i0 * 2u) + 1u]; 00828 00829 /* Read yc (real), xc(imag) input */ 00830 S0 = pSrc16[i2 * 2u]; 00831 S1 = pSrc16[(i2 * 2u) + 1u]; 00832 00833 /* R0 = (ya + yc), R1 = (xa + xc) */ 00834 R0 = __SSAT(T0 + S0, 16); 00835 R1 = __SSAT(T1 + S1, 16); 00836 00837 /* S0 = (ya - yc), S1 =(xa - xc) */ 00838 S0 = __SSAT(T0 - S0, 16); 00839 S1 = __SSAT(T1 - S1, 16); 00840 00841 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00842 /* Read yb (real), xb(imag) input */ 00843 T0 = pSrc16[i1 * 2u]; 00844 T1 = pSrc16[(i1 * 2u) + 1u]; 00845 00846 /* Read yd (real), xd(imag) input */ 00847 U0 = pSrc16[i3 * 2u]; 00848 U1 = pSrc16[(i3 * 2u) + 1u]; 00849 00850 00851 /* T0 = (yb + yd), T1 = (xb + xd) */ 00852 T0 = __SSAT(T0 + U0, 16); 00853 T1 = __SSAT(T1 + U1, 16); 00854 00855 /* writing the butterfly processed i0 sample */ 00856 00857 /* xa' = xa + xb + xc + xd */ 00858 /* ya' = ya + yb + yc + yd */ 00859 out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 00860 out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 00861 00862 pSrc16[i0 * 2u] = out1; 00863 pSrc16[(2u * i0) + 1u] = out2; 00864 00865 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00866 R0 = (R0 >> 1u) - (T0 >> 1u); 00867 R1 = (R1 >> 1u) - (T1 >> 1u); 00868 00869 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00870 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16u); 00871 00872 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00873 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16u); 00874 00875 /* Reading i0+3fftLen/4 */ 00876 /* Read yb (real), xb(imag) input */ 00877 T0 = pSrc16[i1 * 2u]; 00878 T1 = pSrc16[(i1 * 2u) + 1u]; 00879 00880 /* writing the butterfly processed i0 + fftLen/4 sample */ 00881 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00882 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00883 pSrc16[i1 * 2u] = out1; 00884 pSrc16[(i1 * 2u) + 1u] = out2; 00885 00886 /* Butterfly calculations */ 00887 00888 /* Read yd (real), xd(imag) input */ 00889 U0 = pSrc16[i3 * 2u]; 00890 U1 = pSrc16[(i3 * 2u) + 1u]; 00891 00892 /* T0 = yb-yd, T1 = xb-xd */ 00893 T0 = __SSAT(T0 - U0, 16); 00894 T1 = __SSAT(T1 - U1, 16); 00895 00896 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */ 00897 R0 = (S0 >> 1u) - (T1 >> 1u); 00898 R1 = (S1 >> 1u) + (T0 >> 1u); 00899 00900 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */ 00901 S0 = (S0 >> 1u) + (T1 >> 1u); 00902 S1 = (S1 >> 1u) - (T0 >> 1u); 00903 00904 /* Butterfly process for the i0+fftLen/2 sample */ 00905 out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16u); 00906 00907 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16u); 00908 00909 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00910 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00911 pSrc16[i2 * 2u] = out1; 00912 pSrc16[(i2 * 2u) + 1u] = out2; 00913 00914 /* Butterfly process for the i0+3fftLen/4 sample */ 00915 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16u); 00916 00917 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16u); 00918 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00919 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00920 pSrc16[i3 * 2u] = out1; 00921 pSrc16[(i3 * 2u) + 1u] = out2; 00922 } 00923 } 00924 /* Twiddle coefficients index modifier */ 00925 twidCoefModifier <<= 2u; 00926 } 00927 /* end of middle stage process */ 00928 00929 00930 /* data is in 10.6(q6) format for the 1024 point */ 00931 /* data is in 8.8(q8) format for the 256 point */ 00932 /* data is in 6.10(q10) format for the 64 point */ 00933 /* data is in 4.12(q12) format for the 16 point */ 00934 00935 /* Initializations for the last stage */ 00936 n1 = n2; 00937 n2 >>= 2u; 00938 00939 /* start of last stage process */ 00940 00941 /* Butterfly implementation */ 00942 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00943 { 00944 /* index calculation for the input as, */ 00945 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00946 i1 = i0 + n2; 00947 i2 = i1 + n2; 00948 i3 = i2 + n2; 00949 00950 /* Reading i0, i0+fftLen/2 inputs */ 00951 /* Read ya (real), xa(imag) input */ 00952 T0 = pSrc16[i0 * 2u]; 00953 T1 = pSrc16[(i0 * 2u) + 1u]; 00954 00955 /* Read yc (real), xc(imag) input */ 00956 S0 = pSrc16[i2 * 2u]; 00957 S1 = pSrc16[(i2 * 2u) + 1u]; 00958 00959 /* R0 = (ya + yc), R1 = (xa + xc) */ 00960 R0 = __SSAT(T0 + S0, 16u); 00961 R1 = __SSAT(T1 + S1, 16u); 00962 00963 /* S0 = (ya - yc), S1 = (xa - xc) */ 00964 S0 = __SSAT(T0 - S0, 16u); 00965 S1 = __SSAT(T1 - S1, 16u); 00966 00967 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00968 /* Read yb (real), xb(imag) input */ 00969 T0 = pSrc16[i1 * 2u]; 00970 T1 = pSrc16[(i1 * 2u) + 1u]; 00971 /* Read yd (real), xd(imag) input */ 00972 U0 = pSrc16[i3 * 2u]; 00973 U1 = pSrc16[(i3 * 2u) + 1u]; 00974 00975 /* T0 = (yb + yd), T1 = (xb + xd)) */ 00976 T0 = __SSAT(T0 + U0, 16u); 00977 T1 = __SSAT(T1 + U1, 16u); 00978 00979 /* writing the butterfly processed i0 sample */ 00980 /* xa' = xa + xb + xc + xd */ 00981 /* ya' = ya + yb + yc + yd */ 00982 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00983 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00984 00985 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00986 R0 = (R0 >> 1u) - (T0 >> 1u); 00987 R1 = (R1 >> 1u) - (T1 >> 1u); 00988 /* Read yb (real), xb(imag) input */ 00989 T0 = pSrc16[i1 * 2u]; 00990 T1 = pSrc16[(i1 * 2u) + 1u]; 00991 00992 /* writing the butterfly processed i0 + fftLen/4 sample */ 00993 /* xc' = (xa-xb+xc-xd) */ 00994 /* yc' = (ya-yb+yc-yd) */ 00995 pSrc16[i1 * 2u] = R0; 00996 pSrc16[(i1 * 2u) + 1u] = R1; 00997 00998 /* Read yd (real), xd(imag) input */ 00999 U0 = pSrc16[i3 * 2u]; 01000 U1 = pSrc16[(i3 * 2u) + 1u]; 01001 /* T0 = (yb - yd), T1 = (xb - xd) */ 01002 T0 = __SSAT(T0 - U0, 16u); 01003 T1 = __SSAT(T1 - U1, 16u); 01004 01005 /* writing the butterfly processed i0 + fftLen/2 sample */ 01006 /* xb' = (xa+yb-xc-yd) */ 01007 /* yb' = (ya-xb-yc+xd) */ 01008 pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u); 01009 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 01010 01011 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01012 /* xd' = (xa-yb-xc+yd) */ 01013 /* yd' = (ya+xb-yc-xd) */ 01014 pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u); 01015 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 01016 01017 } 01018 01019 /* end of last stage process */ 01020 01021 /* output is in 11.5(q5) format for the 1024 point */ 01022 /* output is in 9.7(q7) format for the 256 point */ 01023 /* output is in 7.9(q9) format for the 64 point */ 01024 /* output is in 5.11(q11) format for the 16 point */ 01025 01026 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 01027 01028 } 01029 01030 01031 /** 01032 * @brief Core function for the Q15 CIFFT butterfly process. 01033 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 01034 * @param[in] fftLen length of the FFT. 01035 * @param[in] *pCoef16 points to twiddle coefficient buffer. 01036 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 01037 * @return none. 01038 */ 01039 01040 /* 01041 * Radix-4 IFFT algorithm used is : 01042 * 01043 * CIFFT uses same twiddle coefficients as CFFT function 01044 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 01045 * 01046 * 01047 * IFFT is implemented with following changes in equations from FFT 01048 * 01049 * Input real and imaginary data: 01050 * x(n) = xa + j * ya 01051 * x(n+N/4 ) = xb + j * yb 01052 * x(n+N/2 ) = xc + j * yc 01053 * x(n+3N 4) = xd + j * yd 01054 * 01055 * 01056 * Output real and imaginary data: 01057 * x(4r) = xa'+ j * ya' 01058 * x(4r+1) = xb'+ j * yb' 01059 * x(4r+2) = xc'+ j * yc' 01060 * x(4r+3) = xd'+ j * yd' 01061 * 01062 * 01063 * Twiddle factors for radix-4 IFFT: 01064 * Wn = co1 + j * (si1) 01065 * W2n = co2 + j * (si2) 01066 * W3n = co3 + j * (si3) 01067 01068 * The real and imaginary output values for the radix-4 butterfly are 01069 * xa' = xa + xb + xc + xd 01070 * ya' = ya + yb + yc + yd 01071 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 01072 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 01073 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 01074 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 01075 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 01076 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 01077 * 01078 */ 01079 01080 void arm_radix4_butterfly_inverse_q15( 01081 q15_t * pSrc16, 01082 uint32_t fftLen, 01083 q15_t * pCoef16, 01084 uint32_t twidCoefModifier) 01085 { 01086 01087 #ifndef ARM_MATH_CM0_FAMILY 01088 01089 /* Run the below code for Cortex-M4 and Cortex-M3 */ 01090 01091 q31_t R, S, T, U; 01092 q31_t C1, C2, C3, out1, out2; 01093 uint32_t n1, n2, ic, i0, j, k; 01094 01095 q15_t *ptr1; 01096 q15_t *pSi0; 01097 q15_t *pSi1; 01098 q15_t *pSi2; 01099 q15_t *pSi3; 01100 01101 q31_t xaya, xbyb, xcyc, xdyd; 01102 01103 /* Total process is divided into three stages */ 01104 01105 /* process first stage, middle stages, & last stage */ 01106 01107 /* Initializations for the first stage */ 01108 n2 = fftLen; 01109 n1 = n2; 01110 01111 /* n2 = fftLen/4 */ 01112 n2 >>= 2u; 01113 01114 /* Index for twiddle coefficient */ 01115 ic = 0u; 01116 01117 /* Index for input read and output write */ 01118 j = n2; 01119 01120 pSi0 = pSrc16; 01121 pSi1 = pSi0 + 2 * n2; 01122 pSi2 = pSi1 + 2 * n2; 01123 pSi3 = pSi2 + 2 * n2; 01124 01125 /* Input is in 1.15(q15) format */ 01126 01127 /* start of first stage process */ 01128 do 01129 { 01130 /* Butterfly implementation */ 01131 01132 /* Reading i0, i0+fftLen/2 inputs */ 01133 /* Read ya (real), xa(imag) input */ 01134 T = _SIMD32_OFFSET(pSi0); 01135 T = __SHADD16(T, 0); 01136 T = __SHADD16(T, 0); 01137 01138 /* Read yc (real), xc(imag) input */ 01139 S = _SIMD32_OFFSET(pSi2); 01140 S = __SHADD16(S, 0); 01141 S = __SHADD16(S, 0); 01142 01143 /* R = packed((ya + yc), (xa + xc) ) */ 01144 R = __QADD16(T, S); 01145 01146 /* S = packed((ya - yc), (xa - xc) ) */ 01147 S = __QSUB16(T, S); 01148 01149 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01150 /* Read yb (real), xb(imag) input */ 01151 T = _SIMD32_OFFSET(pSi1); 01152 T = __SHADD16(T, 0); 01153 T = __SHADD16(T, 0); 01154 01155 /* Read yd (real), xd(imag) input */ 01156 U = _SIMD32_OFFSET(pSi3); 01157 U = __SHADD16(U, 0); 01158 U = __SHADD16(U, 0); 01159 01160 /* T = packed((yb + yd), (xb + xd) ) */ 01161 T = __QADD16(T, U); 01162 01163 /* writing the butterfly processed i0 sample */ 01164 /* xa' = xa + xb + xc + xd */ 01165 /* ya' = ya + yb + yc + yd */ 01166 _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); 01167 pSi0 += 2; 01168 01169 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 01170 R = __QSUB16(R, T); 01171 01172 /* co2 & si2 are read from SIMD Coefficient pointer */ 01173 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 01174 01175 #ifndef ARM_MATH_BIG_ENDIAN 01176 01177 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01178 out1 = __SMUSD(C2, R) >> 16u; 01179 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01180 out2 = __SMUADX(C2, R); 01181 01182 #else 01183 01184 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01185 out1 = __SMUADX(C2, R) >> 16u; 01186 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01187 out2 = __SMUSD(__QSUB16(0, C2), R); 01188 01189 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01190 01191 /* Reading i0+fftLen/4 */ 01192 /* T = packed(yb, xb) */ 01193 T = _SIMD32_OFFSET(pSi1); 01194 T = __SHADD16(T, 0); 01195 T = __SHADD16(T, 0); 01196 01197 /* writing the butterfly processed i0 + fftLen/4 sample */ 01198 /* writing output(xc', yc') in little endian format */ 01199 _SIMD32_OFFSET(pSi1) = 01200 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01201 pSi1 += 2; 01202 01203 /* Butterfly calculations */ 01204 /* U = packed(yd, xd) */ 01205 U = _SIMD32_OFFSET(pSi3); 01206 U = __SHADD16(U, 0); 01207 U = __SHADD16(U, 0); 01208 01209 /* T = packed(yb-yd, xb-xd) */ 01210 T = __QSUB16(T, U); 01211 01212 #ifndef ARM_MATH_BIG_ENDIAN 01213 01214 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01215 R = __QSAX(S, T); 01216 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01217 S = __QASX(S, T); 01218 01219 #else 01220 01221 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01222 R = __QASX(S, T); 01223 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01224 S = __QSAX(S, T); 01225 01226 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01227 01228 /* co1 & si1 are read from SIMD Coefficient pointer */ 01229 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 01230 /* Butterfly process for the i0+fftLen/2 sample */ 01231 01232 #ifndef ARM_MATH_BIG_ENDIAN 01233 01234 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01235 out1 = __SMUSD(C1, S) >> 16u; 01236 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01237 out2 = __SMUADX(C1, S); 01238 01239 #else 01240 01241 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01242 out1 = __SMUADX(C1, S) >> 16u; 01243 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01244 out2 = __SMUSD(__QSUB16(0, C1), S); 01245 01246 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01247 01248 /* writing output(xb', yb') in little endian format */ 01249 _SIMD32_OFFSET(pSi2) = 01250 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 01251 pSi2 += 2; 01252 01253 01254 /* co3 & si3 are read from SIMD Coefficient pointer */ 01255 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 01256 /* Butterfly process for the i0+3fftLen/4 sample */ 01257 01258 #ifndef ARM_MATH_BIG_ENDIAN 01259 01260 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01261 out1 = __SMUSD(C3, R) >> 16u; 01262 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01263 out2 = __SMUADX(C3, R); 01264 01265 #else 01266 01267 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01268 out1 = __SMUADX(C3, R) >> 16u; 01269 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01270 out2 = __SMUSD(__QSUB16(0, C3), R); 01271 01272 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01273 01274 /* writing output(xd', yd') in little endian format */ 01275 _SIMD32_OFFSET(pSi3) = 01276 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01277 pSi3 += 2; 01278 01279 /* Twiddle coefficients index modifier */ 01280 ic = ic + twidCoefModifier; 01281 01282 } while(--j); 01283 /* data is in 4.11(q11) format */ 01284 01285 /* end of first stage process */ 01286 01287 01288 /* start of middle stage process */ 01289 01290 /* Twiddle coefficients index modifier */ 01291 twidCoefModifier <<= 2u; 01292 01293 /* Calculation of Middle stage */ 01294 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01295 { 01296 /* Initializations for the middle stage */ 01297 n1 = n2; 01298 n2 >>= 2u; 01299 ic = 0u; 01300 01301 for (j = 0u; j <= (n2 - 1u); j++) 01302 { 01303 /* index calculation for the coefficients */ 01304 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 01305 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 01306 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 01307 01308 /* Twiddle coefficients index modifier */ 01309 ic = ic + twidCoefModifier; 01310 01311 pSi0 = pSrc16 + 2 * j; 01312 pSi1 = pSi0 + 2 * n2; 01313 pSi2 = pSi1 + 2 * n2; 01314 pSi3 = pSi2 + 2 * n2; 01315 01316 /* Butterfly implementation */ 01317 for (i0 = j; i0 < fftLen; i0 += n1) 01318 { 01319 /* Reading i0, i0+fftLen/2 inputs */ 01320 /* Read ya (real), xa(imag) input */ 01321 T = _SIMD32_OFFSET(pSi0); 01322 01323 /* Read yc (real), xc(imag) input */ 01324 S = _SIMD32_OFFSET(pSi2); 01325 01326 /* R = packed( (ya + yc), (xa + xc)) */ 01327 R = __QADD16(T, S); 01328 01329 /* S = packed((ya - yc), (xa - xc)) */ 01330 S = __QSUB16(T, S); 01331 01332 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01333 /* Read yb (real), xb(imag) input */ 01334 T = _SIMD32_OFFSET(pSi1); 01335 01336 /* Read yd (real), xd(imag) input */ 01337 U = _SIMD32_OFFSET(pSi3); 01338 01339 /* T = packed( (yb + yd), (xb + xd)) */ 01340 T = __QADD16(T, U); 01341 01342 /* writing the butterfly processed i0 sample */ 01343 01344 /* xa' = xa + xb + xc + xd */ 01345 /* ya' = ya + yb + yc + yd */ 01346 out1 = __SHADD16(R, T); 01347 out1 = __SHADD16(out1, 0); 01348 _SIMD32_OFFSET(pSi0) = out1; 01349 pSi0 += 2 * n1; 01350 01351 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 01352 R = __SHSUB16(R, T); 01353 01354 #ifndef ARM_MATH_BIG_ENDIAN 01355 01356 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01357 out1 = __SMUSD(C2, R) >> 16u; 01358 01359 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01360 out2 = __SMUADX(C2, R); 01361 01362 #else 01363 01364 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01365 out1 = __SMUADX(R, C2) >> 16u; 01366 01367 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01368 out2 = __SMUSD(__QSUB16(0, C2), R); 01369 01370 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01371 01372 /* Reading i0+3fftLen/4 */ 01373 /* Read yb (real), xb(imag) input */ 01374 T = _SIMD32_OFFSET(pSi1); 01375 01376 /* writing the butterfly processed i0 + fftLen/4 sample */ 01377 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01378 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01379 _SIMD32_OFFSET(pSi1) = 01380 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01381 pSi1 += 2 * n1; 01382 01383 /* Butterfly calculations */ 01384 01385 /* Read yd (real), xd(imag) input */ 01386 U = _SIMD32_OFFSET(pSi3); 01387 01388 /* T = packed(yb-yd, xb-xd) */ 01389 T = __QSUB16(T, U); 01390 01391 #ifndef ARM_MATH_BIG_ENDIAN 01392 01393 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01394 R = __SHSAX(S, T); 01395 01396 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01397 S = __SHASX(S, T); 01398 01399 01400 /* Butterfly process for the i0+fftLen/2 sample */ 01401 out1 = __SMUSD(C1, S) >> 16u; 01402 out2 = __SMUADX(C1, S); 01403 01404 #else 01405 01406 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01407 R = __SHASX(S, T); 01408 01409 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01410 S = __SHSAX(S, T); 01411 01412 01413 /* Butterfly process for the i0+fftLen/2 sample */ 01414 out1 = __SMUADX(S, C1) >> 16u; 01415 out2 = __SMUSD(__QSUB16(0, C1), S); 01416 01417 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01418 01419 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01420 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01421 _SIMD32_OFFSET(pSi2) = 01422 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01423 pSi2 += 2 * n1; 01424 01425 /* Butterfly process for the i0+3fftLen/4 sample */ 01426 01427 #ifndef ARM_MATH_BIG_ENDIAN 01428 01429 out1 = __SMUSD(C3, R) >> 16u; 01430 out2 = __SMUADX(C3, R); 01431 01432 #else 01433 01434 out1 = __SMUADX(C3, R) >> 16u; 01435 out2 = __SMUSD(__QSUB16(0, C3), R); 01436 01437 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01438 01439 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01440 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01441 _SIMD32_OFFSET(pSi3) = 01442 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01443 pSi3 += 2 * n1; 01444 } 01445 } 01446 /* Twiddle coefficients index modifier */ 01447 twidCoefModifier <<= 2u; 01448 } 01449 /* end of middle stage process */ 01450 01451 /* data is in 10.6(q6) format for the 1024 point */ 01452 /* data is in 8.8(q8) format for the 256 point */ 01453 /* data is in 6.10(q10) format for the 64 point */ 01454 /* data is in 4.12(q12) format for the 16 point */ 01455 01456 /* Initializations for the last stage */ 01457 j = fftLen >> 2; 01458 01459 ptr1 = &pSrc16[0]; 01460 01461 /* start of last stage process */ 01462 01463 /* Butterfly implementation */ 01464 do 01465 { 01466 /* Read xa (real), ya(imag) input */ 01467 xaya = *__SIMD32(ptr1)++; 01468 01469 /* Read xb (real), yb(imag) input */ 01470 xbyb = *__SIMD32(ptr1)++; 01471 01472 /* Read xc (real), yc(imag) input */ 01473 xcyc = *__SIMD32(ptr1)++; 01474 01475 /* Read xd (real), yd(imag) input */ 01476 xdyd = *__SIMD32(ptr1)++; 01477 01478 /* R = packed((ya + yc), (xa + xc)) */ 01479 R = __QADD16(xaya, xcyc); 01480 01481 /* T = packed((yb + yd), (xb + xd)) */ 01482 T = __QADD16(xbyb, xdyd); 01483 01484 /* pointer updation for writing */ 01485 ptr1 = ptr1 - 8u; 01486 01487 01488 /* xa' = xa + xb + xc + xd */ 01489 /* ya' = ya + yb + yc + yd */ 01490 *__SIMD32(ptr1)++ = __SHADD16(R, T); 01491 01492 /* T = packed((yb + yd), (xb + xd)) */ 01493 T = __QADD16(xbyb, xdyd); 01494 01495 /* xc' = (xa-xb+xc-xd) */ 01496 /* yc' = (ya-yb+yc-yd) */ 01497 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 01498 01499 /* S = packed((ya - yc), (xa - xc)) */ 01500 S = __QSUB16(xaya, xcyc); 01501 01502 /* Read yd (real), xd(imag) input */ 01503 /* T = packed( (yb - yd), (xb - xd)) */ 01504 U = __QSUB16(xbyb, xdyd); 01505 01506 #ifndef ARM_MATH_BIG_ENDIAN 01507 01508 /* xb' = (xa+yb-xc-yd) */ 01509 /* yb' = (ya-xb-yc+xd) */ 01510 *__SIMD32(ptr1)++ = __SHASX(S, U); 01511 01512 01513 /* xd' = (xa-yb-xc+yd) */ 01514 /* yd' = (ya+xb-yc-xd) */ 01515 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01516 01517 #else 01518 01519 /* xb' = (xa+yb-xc-yd) */ 01520 /* yb' = (ya-xb-yc+xd) */ 01521 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01522 01523 01524 /* xd' = (xa-yb-xc+yd) */ 01525 /* yd' = (ya+xb-yc-xd) */ 01526 *__SIMD32(ptr1)++ = __SHASX(S, U); 01527 01528 01529 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01530 01531 } while(--j); 01532 01533 /* end of last stage process */ 01534 01535 /* output is in 11.5(q5) format for the 1024 point */ 01536 /* output is in 9.7(q7) format for the 256 point */ 01537 /* output is in 7.9(q9) format for the 64 point */ 01538 /* output is in 5.11(q11) format for the 16 point */ 01539 01540 01541 #else 01542 01543 /* Run the below code for Cortex-M0 */ 01544 01545 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 01546 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 01547 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 01548 01549 /* Total process is divided into three stages */ 01550 01551 /* process first stage, middle stages, & last stage */ 01552 01553 /* Initializations for the first stage */ 01554 n2 = fftLen; 01555 n1 = n2; 01556 01557 /* n2 = fftLen/4 */ 01558 n2 >>= 2u; 01559 01560 /* Index for twiddle coefficient */ 01561 ic = 0u; 01562 01563 /* Index for input read and output write */ 01564 i0 = 0u; 01565 01566 j = n2; 01567 01568 /* Input is in 1.15(q15) format */ 01569 01570 /* Start of first stage process */ 01571 do 01572 { 01573 /* Butterfly implementation */ 01574 01575 /* index calculation for the input as, */ 01576 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01577 i1 = i0 + n2; 01578 i2 = i1 + n2; 01579 i3 = i2 + n2; 01580 01581 /* Reading i0, i0+fftLen/2 inputs */ 01582 /* input is down scale by 4 to avoid overflow */ 01583 /* Read ya (real), xa(imag) input */ 01584 T0 = pSrc16[i0 * 2u] >> 2u; 01585 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 01586 /* input is down scale by 4 to avoid overflow */ 01587 /* Read yc (real), xc(imag) input */ 01588 S0 = pSrc16[i2 * 2u] >> 2u; 01589 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 01590 01591 /* R0 = (ya + yc), R1 = (xa + xc) */ 01592 R0 = __SSAT(T0 + S0, 16u); 01593 R1 = __SSAT(T1 + S1, 16u); 01594 /* S0 = (ya - yc), S1 = (xa - xc) */ 01595 S0 = __SSAT(T0 - S0, 16u); 01596 S1 = __SSAT(T1 - S1, 16u); 01597 01598 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01599 /* input is down scale by 4 to avoid overflow */ 01600 /* Read yb (real), xb(imag) input */ 01601 T0 = pSrc16[i1 * 2u] >> 2u; 01602 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01603 /* Read yd (real), xd(imag) input */ 01604 /* input is down scale by 4 to avoid overflow */ 01605 U0 = pSrc16[i3 * 2u] >> 2u; 01606 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01607 01608 /* T0 = (yb + yd), T1 = (xb + xd) */ 01609 T0 = __SSAT(T0 + U0, 16u); 01610 T1 = __SSAT(T1 + U1, 16u); 01611 01612 /* writing the butterfly processed i0 sample */ 01613 /* xa' = xa + xb + xc + xd */ 01614 /* ya' = ya + yb + yc + yd */ 01615 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01616 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01617 01618 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */ 01619 R0 = __SSAT(R0 - T0, 16u); 01620 R1 = __SSAT(R1 - T1, 16u); 01621 /* co2 & si2 are read from Coefficient pointer */ 01622 Co2 = pCoef16[2u * ic * 2u]; 01623 Si2 = pCoef16[(2u * ic * 2u) + 1u]; 01624 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01625 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16u); 01626 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01627 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16u); 01628 01629 /* Reading i0+fftLen/4 */ 01630 /* input is down scale by 4 to avoid overflow */ 01631 /* T0 = yb, T1 = xb */ 01632 T0 = pSrc16[i1 * 2u] >> 2u; 01633 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01634 01635 /* writing the butterfly processed i0 + fftLen/4 sample */ 01636 /* writing output(xc', yc') in little endian format */ 01637 pSrc16[i1 * 2u] = out1; 01638 pSrc16[(i1 * 2u) + 1u] = out2; 01639 01640 /* Butterfly calculations */ 01641 /* input is down scale by 4 to avoid overflow */ 01642 /* U0 = yd, U1 = xd) */ 01643 U0 = pSrc16[i3 * 2u] >> 2u; 01644 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01645 01646 /* T0 = yb-yd, T1 = xb-xd) */ 01647 T0 = __SSAT(T0 - U0, 16u); 01648 T1 = __SSAT(T1 - U1, 16u); 01649 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01650 R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16); 01651 R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16); 01652 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01653 S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); 01654 S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); 01655 01656 /* co1 & si1 are read from Coefficient pointer */ 01657 Co1 = pCoef16[ic * 2u]; 01658 Si1 = pCoef16[(ic * 2u) + 1u]; 01659 /* Butterfly process for the i0+fftLen/2 sample */ 01660 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01661 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u); 01662 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01663 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u); 01664 /* writing output(xb', yb') in little endian format */ 01665 pSrc16[i2 * 2u] = out1; 01666 pSrc16[(i2 * 2u) + 1u] = out2; 01667 01668 /* Co3 & si3 are read from Coefficient pointer */ 01669 Co3 = pCoef16[3u * ic * 2u]; 01670 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01671 /* Butterfly process for the i0+3fftLen/4 sample */ 01672 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01673 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u); 01674 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01675 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u); 01676 /* writing output(xd', yd') in little endian format */ 01677 pSrc16[i3 * 2u] = out1; 01678 pSrc16[(i3 * 2u) + 1u] = out2; 01679 01680 /* Twiddle coefficients index modifier */ 01681 ic = ic + twidCoefModifier; 01682 01683 /* Updating input index */ 01684 i0 = i0 + 1u; 01685 01686 } while(--j); 01687 01688 /* End of first stage process */ 01689 01690 /* data is in 4.11(q11) format */ 01691 01692 01693 /* Start of Middle stage process */ 01694 01695 /* Twiddle coefficients index modifier */ 01696 twidCoefModifier <<= 2u; 01697 01698 /* Calculation of Middle stage */ 01699 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01700 { 01701 /* Initializations for the middle stage */ 01702 n1 = n2; 01703 n2 >>= 2u; 01704 ic = 0u; 01705 01706 for (j = 0u; j <= (n2 - 1u); j++) 01707 { 01708 /* index calculation for the coefficients */ 01709 Co1 = pCoef16[ic * 2u]; 01710 Si1 = pCoef16[(ic * 2u) + 1u]; 01711 Co2 = pCoef16[2u * ic * 2u]; 01712 Si2 = pCoef16[2u * ic * 2u + 1u]; 01713 Co3 = pCoef16[3u * ic * 2u]; 01714 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01715 01716 /* Twiddle coefficients index modifier */ 01717 ic = ic + twidCoefModifier; 01718 01719 /* Butterfly implementation */ 01720 for (i0 = j; i0 < fftLen; i0 += n1) 01721 { 01722 /* index calculation for the input as, */ 01723 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01724 i1 = i0 + n2; 01725 i2 = i1 + n2; 01726 i3 = i2 + n2; 01727 01728 /* Reading i0, i0+fftLen/2 inputs */ 01729 /* Read ya (real), xa(imag) input */ 01730 T0 = pSrc16[i0 * 2u]; 01731 T1 = pSrc16[(i0 * 2u) + 1u]; 01732 01733 /* Read yc (real), xc(imag) input */ 01734 S0 = pSrc16[i2 * 2u]; 01735 S1 = pSrc16[(i2 * 2u) + 1u]; 01736 01737 01738 /* R0 = (ya + yc), R1 = (xa + xc) */ 01739 R0 = __SSAT(T0 + S0, 16u); 01740 R1 = __SSAT(T1 + S1, 16u); 01741 /* S0 = (ya - yc), S1 = (xa - xc) */ 01742 S0 = __SSAT(T0 - S0, 16u); 01743 S1 = __SSAT(T1 - S1, 16u); 01744 01745 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01746 /* Read yb (real), xb(imag) input */ 01747 T0 = pSrc16[i1 * 2u]; 01748 T1 = pSrc16[(i1 * 2u) + 1u]; 01749 01750 /* Read yd (real), xd(imag) input */ 01751 U0 = pSrc16[i3 * 2u]; 01752 U1 = pSrc16[(i3 * 2u) + 1u]; 01753 01754 /* T0 = (yb + yd), T1 = (xb + xd) */ 01755 T0 = __SSAT(T0 + U0, 16u); 01756 T1 = __SSAT(T1 + U1, 16u); 01757 01758 /* writing the butterfly processed i0 sample */ 01759 /* xa' = xa + xb + xc + xd */ 01760 /* ya' = ya + yb + yc + yd */ 01761 pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 01762 pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 01763 01764 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01765 R0 = (R0 >> 1u) - (T0 >> 1u); 01766 R1 = (R1 >> 1u) - (T1 >> 1u); 01767 01768 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 01769 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16); 01770 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01771 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16); 01772 01773 /* Reading i0+3fftLen/4 */ 01774 /* Read yb (real), xb(imag) input */ 01775 T0 = pSrc16[i1 * 2u]; 01776 T1 = pSrc16[(i1 * 2u) + 1u]; 01777 01778 /* writing the butterfly processed i0 + fftLen/4 sample */ 01779 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01780 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01781 pSrc16[i1 * 2u] = out1; 01782 pSrc16[(i1 * 2u) + 1u] = out2; 01783 01784 /* Butterfly calculations */ 01785 /* Read yd (real), xd(imag) input */ 01786 U0 = pSrc16[i3 * 2u]; 01787 U1 = pSrc16[(i3 * 2u) + 1u]; 01788 01789 /* T0 = yb-yd, T1 = xb-xd) */ 01790 T0 = __SSAT(T0 - U0, 16u); 01791 T1 = __SSAT(T1 - U1, 16u); 01792 01793 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01794 R0 = (S0 >> 1u) + (T1 >> 1u); 01795 R1 = (S1 >> 1u) - (T0 >> 1u); 01796 01797 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01798 S0 = (S0 >> 1u) - (T1 >> 1u); 01799 S1 = (S1 >> 1u) + (T0 >> 1u); 01800 01801 /* Butterfly process for the i0+fftLen/2 sample */ 01802 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16u); 01803 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16u); 01804 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01805 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01806 pSrc16[i2 * 2u] = out1; 01807 pSrc16[(i2 * 2u) + 1u] = out2; 01808 01809 /* Butterfly process for the i0+3fftLen/4 sample */ 01810 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16u); 01811 01812 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16u); 01813 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01814 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01815 pSrc16[i3 * 2u] = out1; 01816 pSrc16[(i3 * 2u) + 1u] = out2; 01817 01818 01819 } 01820 } 01821 /* Twiddle coefficients index modifier */ 01822 twidCoefModifier <<= 2u; 01823 } 01824 /* End of Middle stages process */ 01825 01826 01827 /* data is in 10.6(q6) format for the 1024 point */ 01828 /* data is in 8.8(q8) format for the 256 point */ 01829 /* data is in 6.10(q10) format for the 64 point */ 01830 /* data is in 4.12(q12) format for the 16 point */ 01831 01832 /* start of last stage process */ 01833 01834 01835 /* Initializations for the last stage */ 01836 n1 = n2; 01837 n2 >>= 2u; 01838 01839 /* Butterfly implementation */ 01840 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 01841 { 01842 /* index calculation for the input as, */ 01843 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01844 i1 = i0 + n2; 01845 i2 = i1 + n2; 01846 i3 = i2 + n2; 01847 01848 /* Reading i0, i0+fftLen/2 inputs */ 01849 /* Read ya (real), xa(imag) input */ 01850 T0 = pSrc16[i0 * 2u]; 01851 T1 = pSrc16[(i0 * 2u) + 1u]; 01852 /* Read yc (real), xc(imag) input */ 01853 S0 = pSrc16[i2 * 2u]; 01854 S1 = pSrc16[(i2 * 2u) + 1u]; 01855 01856 /* R0 = (ya + yc), R1 = (xa + xc) */ 01857 R0 = __SSAT(T0 + S0, 16u); 01858 R1 = __SSAT(T1 + S1, 16u); 01859 /* S0 = (ya - yc), S1 = (xa - xc) */ 01860 S0 = __SSAT(T0 - S0, 16u); 01861 S1 = __SSAT(T1 - S1, 16u); 01862 01863 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01864 /* Read yb (real), xb(imag) input */ 01865 T0 = pSrc16[i1 * 2u]; 01866 T1 = pSrc16[(i1 * 2u) + 1u]; 01867 /* Read yd (real), xd(imag) input */ 01868 U0 = pSrc16[i3 * 2u]; 01869 U1 = pSrc16[(i3 * 2u) + 1u]; 01870 01871 /* T0 = (yb + yd), T1 = (xb + xd) */ 01872 T0 = __SSAT(T0 + U0, 16u); 01873 T1 = __SSAT(T1 + U1, 16u); 01874 01875 /* writing the butterfly processed i0 sample */ 01876 /* xa' = xa + xb + xc + xd */ 01877 /* ya' = ya + yb + yc + yd */ 01878 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01879 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01880 01881 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01882 R0 = (R0 >> 1u) - (T0 >> 1u); 01883 R1 = (R1 >> 1u) - (T1 >> 1u); 01884 01885 /* Read yb (real), xb(imag) input */ 01886 T0 = pSrc16[i1 * 2u]; 01887 T1 = pSrc16[(i1 * 2u) + 1u]; 01888 01889 /* writing the butterfly processed i0 + fftLen/4 sample */ 01890 /* xc' = (xa-xb+xc-xd) */ 01891 /* yc' = (ya-yb+yc-yd) */ 01892 pSrc16[i1 * 2u] = R0; 01893 pSrc16[(i1 * 2u) + 1u] = R1; 01894 01895 /* Read yd (real), xd(imag) input */ 01896 U0 = pSrc16[i3 * 2u]; 01897 U1 = pSrc16[(i3 * 2u) + 1u]; 01898 /* T0 = (yb - yd), T1 = (xb - xd) */ 01899 T0 = __SSAT(T0 - U0, 16u); 01900 T1 = __SSAT(T1 - U1, 16u); 01901 01902 /* writing the butterfly processed i0 + fftLen/2 sample */ 01903 /* xb' = (xa-yb-xc+yd) */ 01904 /* yb' = (ya+xb-yc-xd) */ 01905 pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u); 01906 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 01907 01908 01909 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01910 /* xd' = (xa+yb-xc-yd) */ 01911 /* yd' = (ya-xb-yc+xd) */ 01912 pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u); 01913 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 01914 } 01915 /* end of last stage process */ 01916 01917 /* output is in 11.5(q5) format for the 1024 point */ 01918 /* output is in 9.7(q7) format for the 256 point */ 01919 /* output is in 7.9(q9) format for the 64 point */ 01920 /* output is in 5.11(q11) format for the 16 point */ 01921 01922 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 01923 01924 }
Generated on Tue Jul 12 2022 13:15:20 by
