CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
arm_cfft_radix4_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q15.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 00045 void arm_radix4_butterfly_q15( 00046 q15_t * pSrc16, 00047 uint32_t fftLen, 00048 q15_t * pCoef16, 00049 uint32_t twidCoefModifier); 00050 00051 void arm_radix4_butterfly_inverse_q15( 00052 q15_t * pSrc16, 00053 uint32_t fftLen, 00054 q15_t * pCoef16, 00055 uint32_t twidCoefModifier); 00056 00057 void arm_bitreversal_q15( 00058 q15_t * pSrc, 00059 uint32_t fftLen, 00060 uint16_t bitRevFactor, 00061 uint16_t * pBitRevTab); 00062 00063 /** 00064 * @ingroup groupTransforms 00065 */ 00066 00067 /** 00068 * @addtogroup ComplexFFT 00069 * @{ 00070 */ 00071 00072 00073 /** 00074 * @details 00075 * @brief Processing function for the Q15 CFFT/CIFFT. 00076 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure. 00077 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place. 00078 * @return none. 00079 * 00080 * \par Input and output formats: 00081 * \par 00082 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00083 * Hence the output format is different for different FFT sizes. 00084 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00085 * \par 00086 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT" 00087 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT" 00088 */ 00089 00090 void arm_cfft_radix4_q15( 00091 const arm_cfft_radix4_instance_q15 * S, 00092 q15_t * pSrc) 00093 { 00094 if(S->ifftFlag == 1u) 00095 { 00096 /* Complex IFFT radix-4 */ 00097 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, 00098 S->twidCoefModifier); 00099 } 00100 else 00101 { 00102 /* Complex FFT radix-4 */ 00103 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, 00104 S->twidCoefModifier); 00105 } 00106 00107 if(S->bitReverseFlag == 1u) 00108 { 00109 /* Bit Reversal */ 00110 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00111 } 00112 00113 } 00114 00115 /** 00116 * @} end of ComplexFFT group 00117 */ 00118 00119 /* 00120 * Radix-4 FFT algorithm used is : 00121 * 00122 * Input real and imaginary data: 00123 * x(n) = xa + j * ya 00124 * x(n+N/4 ) = xb + j * yb 00125 * x(n+N/2 ) = xc + j * yc 00126 * x(n+3N 4) = xd + j * yd 00127 * 00128 * 00129 * Output real and imaginary data: 00130 * x(4r) = xa'+ j * ya' 00131 * x(4r+1) = xb'+ j * yb' 00132 * x(4r+2) = xc'+ j * yc' 00133 * x(4r+3) = xd'+ j * yd' 00134 * 00135 * 00136 * Twiddle factors for radix-4 FFT: 00137 * Wn = co1 + j * (- si1) 00138 * W2n = co2 + j * (- si2) 00139 * W3n = co3 + j * (- si3) 00140 00141 * The real and imaginary output values for the radix-4 butterfly are 00142 * xa' = xa + xb + xc + xd 00143 * ya' = ya + yb + yc + yd 00144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00150 * 00151 */ 00152 00153 /** 00154 * @brief Core function for the Q15 CFFT butterfly process. 00155 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 00156 * @param[in] fftLen length of the FFT. 00157 * @param[in] *pCoef16 points to twiddle coefficient buffer. 00158 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00159 * @return none. 00160 */ 00161 00162 void arm_radix4_butterfly_q15( 00163 q15_t * pSrc16, 00164 uint32_t fftLen, 00165 q15_t * pCoef16, 00166 uint32_t twidCoefModifier) 00167 { 00168 00169 #ifndef ARM_MATH_CM0_FAMILY 00170 00171 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00172 00173 q31_t R, S, T, U; 00174 q31_t C1, C2, C3, out1, out2; 00175 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00176 q15_t in; 00177 00178 q15_t *ptr1; 00179 00180 00181 00182 q31_t xaya, xbyb, xcyc, xdyd; 00183 00184 /* Total process is divided into three stages */ 00185 00186 /* process first stage, middle stages, & last stage */ 00187 00188 /* Initializations for the first stage */ 00189 n2 = fftLen; 00190 n1 = n2; 00191 00192 /* n2 = fftLen/4 */ 00193 n2 >>= 2u; 00194 00195 /* Index for twiddle coefficient */ 00196 ic = 0u; 00197 00198 /* Index for input read and output write */ 00199 i0 = 0u; 00200 j = n2; 00201 00202 /* Input is in 1.15(q15) format */ 00203 00204 /* start of first stage process */ 00205 do 00206 { 00207 /* Butterfly implementation */ 00208 00209 /* index calculation for the input as, */ 00210 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00211 i1 = i0 + n2; 00212 i2 = i1 + n2; 00213 i3 = i2 + n2; 00214 00215 /* Reading i0, i0+fftLen/2 inputs */ 00216 /* Read ya (real), xa(imag) input */ 00217 T = _SIMD32_OFFSET(pSrc16 + (2u * i0)); 00218 in = ((int16_t) (T & 0xFFFF)) >> 2; 00219 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00220 00221 /* Read yc (real), xc(imag) input */ 00222 S = _SIMD32_OFFSET(pSrc16 + (2u * i2)); 00223 in = ((int16_t) (S & 0xFFFF)) >> 2; 00224 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00225 00226 /* R = packed((ya + yc), (xa + xc) ) */ 00227 R = __QADD16(T, S); 00228 00229 /* S = packed((ya - yc), (xa - xc) ) */ 00230 S = __QSUB16(T, S); 00231 00232 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00233 /* Read yb (real), xb(imag) input */ 00234 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 00235 in = ((int16_t) (T & 0xFFFF)) >> 2; 00236 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00237 00238 /* Read yd (real), xd(imag) input */ 00239 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 00240 in = ((int16_t) (U & 0xFFFF)) >> 2; 00241 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00242 00243 /* T = packed((yb + yd), (xb + xd) ) */ 00244 T = __QADD16(T, U); 00245 00246 /* writing the butterfly processed i0 sample */ 00247 /* xa' = xa + xb + xc + xd */ 00248 /* ya' = ya + yb + yc + yd */ 00249 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T); 00250 00251 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 00252 R = __QSUB16(R, T); 00253 00254 /* co2 & si2 are read from SIMD Coefficient pointer */ 00255 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 00256 00257 #ifndef ARM_MATH_BIG_ENDIAN 00258 00259 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00260 out1 = __SMUAD(C2, R) >> 16u; 00261 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00262 out2 = __SMUSDX(C2, R); 00263 00264 #else 00265 00266 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00267 out1 = __SMUSDX(R, C2) >> 16u; 00268 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00269 out2 = __SMUAD(C2, R); 00270 00271 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00272 00273 /* Reading i0+fftLen/4 */ 00274 /* T = packed(yb, xb) */ 00275 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 00276 in = ((int16_t) (T & 0xFFFF)) >> 2; 00277 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00278 00279 /* writing the butterfly processed i0 + fftLen/4 sample */ 00280 /* writing output(xc', yc') in little endian format */ 00281 _SIMD32_OFFSET(pSrc16 + (2u * i1)) = 00282 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00283 00284 /* Butterfly calculations */ 00285 /* U = packed(yd, xd) */ 00286 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 00287 in = ((int16_t) (U & 0xFFFF)) >> 2; 00288 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00289 00290 /* T = packed(yb-yd, xb-xd) */ 00291 T = __QSUB16(T, U); 00292 00293 #ifndef ARM_MATH_BIG_ENDIAN 00294 00295 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00296 R = __QASX(S, T); 00297 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00298 S = __QSAX(S, T); 00299 00300 #else 00301 00302 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00303 R = __QSAX(S, T); 00304 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00305 S = __QASX(S, T); 00306 00307 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00308 00309 /* co1 & si1 are read from SIMD Coefficient pointer */ 00310 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 00311 /* Butterfly process for the i0+fftLen/2 sample */ 00312 00313 #ifndef ARM_MATH_BIG_ENDIAN 00314 00315 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00316 out1 = __SMUAD(C1, S) >> 16u; 00317 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00318 out2 = __SMUSDX(C1, S); 00319 00320 #else 00321 00322 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00323 out1 = __SMUSDX(S, C1) >> 16u; 00324 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00325 out2 = __SMUAD(C1, S); 00326 00327 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00328 00329 /* writing output(xb', yb') in little endian format */ 00330 _SIMD32_OFFSET(pSrc16 + (2u * i2)) = 00331 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 00332 00333 00334 /* co3 & si3 are read from SIMD Coefficient pointer */ 00335 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 00336 /* Butterfly process for the i0+3fftLen/4 sample */ 00337 00338 #ifndef ARM_MATH_BIG_ENDIAN 00339 00340 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00341 out1 = __SMUAD(C3, R) >> 16u; 00342 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00343 out2 = __SMUSDX(C3, R); 00344 00345 #else 00346 00347 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00348 out1 = __SMUSDX(R, C3) >> 16u; 00349 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00350 out2 = __SMUAD(C3, R); 00351 00352 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00353 00354 /* writing output(xd', yd') in little endian format */ 00355 _SIMD32_OFFSET(pSrc16 + (2u * i3)) = 00356 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00357 00358 /* Twiddle coefficients index modifier */ 00359 ic = ic + twidCoefModifier; 00360 00361 /* Updating input index */ 00362 i0 = i0 + 1u; 00363 00364 } while(--j); 00365 /* data is in 4.11(q11) format */ 00366 00367 /* end of first stage process */ 00368 00369 00370 /* start of middle stage process */ 00371 00372 /* Twiddle coefficients index modifier */ 00373 twidCoefModifier <<= 2u; 00374 00375 /* Calculation of Middle stage */ 00376 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00377 { 00378 /* Initializations for the middle stage */ 00379 n1 = n2; 00380 n2 >>= 2u; 00381 ic = 0u; 00382 00383 for (j = 0u; j <= (n2 - 1u); j++) 00384 { 00385 /* index calculation for the coefficients */ 00386 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 00387 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 00388 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 00389 00390 /* Twiddle coefficients index modifier */ 00391 ic = ic + twidCoefModifier; 00392 00393 /* Butterfly implementation */ 00394 for (i0 = j; i0 < fftLen; i0 += n1) 00395 { 00396 /* index calculation for the input as, */ 00397 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00398 i1 = i0 + n2; 00399 i2 = i1 + n2; 00400 i3 = i2 + n2; 00401 00402 /* Reading i0, i0+fftLen/2 inputs */ 00403 /* Read ya (real), xa(imag) input */ 00404 T = _SIMD32_OFFSET(pSrc16 + (2u * i0)); 00405 00406 /* Read yc (real), xc(imag) input */ 00407 S = _SIMD32_OFFSET(pSrc16 + (2u * i2)); 00408 00409 /* R = packed( (ya + yc), (xa + xc)) */ 00410 R = __QADD16(T, S); 00411 00412 /* S = packed((ya - yc), (xa - xc)) */ 00413 S = __QSUB16(T, S); 00414 00415 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00416 /* Read yb (real), xb(imag) input */ 00417 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 00418 00419 /* Read yd (real), xd(imag) input */ 00420 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 00421 00422 /* T = packed( (yb + yd), (xb + xd)) */ 00423 T = __QADD16(T, U); 00424 00425 /* writing the butterfly processed i0 sample */ 00426 00427 /* xa' = xa + xb + xc + xd */ 00428 /* ya' = ya + yb + yc + yd */ 00429 out1 = __SHADD16(R, T); 00430 in = ((int16_t) (out1 & 0xFFFF)) >> 1; 00431 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 00432 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1; 00433 00434 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00435 R = __SHSUB16(R, T); 00436 00437 #ifndef ARM_MATH_BIG_ENDIAN 00438 00439 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00440 out1 = __SMUAD(C2, R) >> 16u; 00441 00442 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00443 out2 = __SMUSDX(C2, R); 00444 00445 #else 00446 00447 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00448 out1 = __SMUSDX(R, C2) >> 16u; 00449 00450 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00451 out2 = __SMUAD(C2, R); 00452 00453 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00454 00455 /* Reading i0+3fftLen/4 */ 00456 /* Read yb (real), xb(imag) input */ 00457 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 00458 00459 /* writing the butterfly processed i0 + fftLen/4 sample */ 00460 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00461 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00462 _SIMD32_OFFSET(pSrc16 + (2u * i1)) = 00463 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00464 00465 /* Butterfly calculations */ 00466 00467 /* Read yd (real), xd(imag) input */ 00468 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 00469 00470 /* T = packed(yb-yd, xb-xd) */ 00471 T = __QSUB16(T, U); 00472 00473 #ifndef ARM_MATH_BIG_ENDIAN 00474 00475 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00476 R = __SHASX(S, T); 00477 00478 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00479 S = __SHSAX(S, T); 00480 00481 00482 /* Butterfly process for the i0+fftLen/2 sample */ 00483 out1 = __SMUAD(C1, S) >> 16u; 00484 out2 = __SMUSDX(C1, S); 00485 00486 #else 00487 00488 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00489 R = __SHSAX(S, T); 00490 00491 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00492 S = __SHASX(S, T); 00493 00494 00495 /* Butterfly process for the i0+fftLen/2 sample */ 00496 out1 = __SMUSDX(S, C1) >> 16u; 00497 out2 = __SMUAD(C1, S); 00498 00499 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00500 00501 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00502 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00503 _SIMD32_OFFSET(pSrc16 + (2u * i2)) = 00504 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00505 00506 /* Butterfly process for the i0+3fftLen/4 sample */ 00507 00508 #ifndef ARM_MATH_BIG_ENDIAN 00509 00510 out1 = __SMUAD(C3, R) >> 16u; 00511 out2 = __SMUSDX(C3, R); 00512 00513 #else 00514 00515 out1 = __SMUSDX(R, C3) >> 16u; 00516 out2 = __SMUAD(C3, R); 00517 00518 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00519 00520 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00521 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00522 _SIMD32_OFFSET(pSrc16 + (2u * i3)) = 00523 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00524 } 00525 } 00526 /* Twiddle coefficients index modifier */ 00527 twidCoefModifier <<= 2u; 00528 } 00529 /* end of middle stage process */ 00530 00531 00532 /* data is in 10.6(q6) format for the 1024 point */ 00533 /* data is in 8.8(q8) format for the 256 point */ 00534 /* data is in 6.10(q10) format for the 64 point */ 00535 /* data is in 4.12(q12) format for the 16 point */ 00536 00537 /* Initializations for the last stage */ 00538 j = fftLen >> 2; 00539 00540 ptr1 = &pSrc16[0]; 00541 00542 /* start of last stage process */ 00543 00544 /* Butterfly implementation */ 00545 do 00546 { 00547 /* Read xa (real), ya(imag) input */ 00548 xaya = *__SIMD32(ptr1)++; 00549 00550 /* Read xb (real), yb(imag) input */ 00551 xbyb = *__SIMD32(ptr1)++; 00552 00553 /* Read xc (real), yc(imag) input */ 00554 xcyc = *__SIMD32(ptr1)++; 00555 00556 /* Read xd (real), yd(imag) input */ 00557 xdyd = *__SIMD32(ptr1)++; 00558 00559 /* R = packed((ya + yc), (xa + xc)) */ 00560 R = __QADD16(xaya, xcyc); 00561 00562 /* T = packed((yb + yd), (xb + xd)) */ 00563 T = __QADD16(xbyb, xdyd); 00564 00565 /* pointer updation for writing */ 00566 ptr1 = ptr1 - 8u; 00567 00568 00569 /* xa' = xa + xb + xc + xd */ 00570 /* ya' = ya + yb + yc + yd */ 00571 *__SIMD32(ptr1)++ = __SHADD16(R, T); 00572 00573 /* T = packed((yb + yd), (xb + xd)) */ 00574 T = __QADD16(xbyb, xdyd); 00575 00576 /* xc' = (xa-xb+xc-xd) */ 00577 /* yc' = (ya-yb+yc-yd) */ 00578 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 00579 00580 /* S = packed((ya - yc), (xa - xc)) */ 00581 S = __QSUB16(xaya, xcyc); 00582 00583 /* Read yd (real), xd(imag) input */ 00584 /* T = packed( (yb - yd), (xb - xd)) */ 00585 U = __QSUB16(xbyb, xdyd); 00586 00587 #ifndef ARM_MATH_BIG_ENDIAN 00588 00589 /* xb' = (xa+yb-xc-yd) */ 00590 /* yb' = (ya-xb-yc+xd) */ 00591 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00592 00593 00594 /* xd' = (xa-yb-xc+yd) */ 00595 /* yd' = (ya+xb-yc-xd) */ 00596 *__SIMD32(ptr1)++ = __SHASX(S, U); 00597 00598 #else 00599 00600 /* xb' = (xa+yb-xc-yd) */ 00601 /* yb' = (ya-xb-yc+xd) */ 00602 *__SIMD32(ptr1)++ = __SHASX(S, U); 00603 00604 00605 /* xd' = (xa-yb-xc+yd) */ 00606 /* yd' = (ya+xb-yc-xd) */ 00607 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00608 00609 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00610 00611 } while(--j); 00612 00613 /* end of last stage process */ 00614 00615 /* output is in 11.5(q5) format for the 1024 point */ 00616 /* output is in 9.7(q7) format for the 256 point */ 00617 /* output is in 7.9(q9) format for the 64 point */ 00618 /* output is in 5.11(q11) format for the 16 point */ 00619 00620 00621 #else 00622 00623 /* Run the below code for Cortex-M0 */ 00624 00625 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 00626 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 00627 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00628 00629 /* Total process is divided into three stages */ 00630 00631 /* process first stage, middle stages, & last stage */ 00632 00633 /* Initializations for the first stage */ 00634 n2 = fftLen; 00635 n1 = n2; 00636 00637 /* n2 = fftLen/4 */ 00638 n2 >>= 2u; 00639 00640 /* Index for twiddle coefficient */ 00641 ic = 0u; 00642 00643 /* Index for input read and output write */ 00644 i0 = 0u; 00645 j = n2; 00646 00647 /* Input is in 1.15(q15) format */ 00648 00649 /* start of first stage process */ 00650 do 00651 { 00652 /* Butterfly implementation */ 00653 00654 /* index calculation for the input as, */ 00655 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00656 i1 = i0 + n2; 00657 i2 = i1 + n2; 00658 i3 = i2 + n2; 00659 00660 /* Reading i0, i0+fftLen/2 inputs */ 00661 00662 /* input is down scale by 4 to avoid overflow */ 00663 /* Read ya (real), xa(imag) input */ 00664 T0 = pSrc16[i0 * 2u] >> 2u; 00665 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 00666 00667 /* input is down scale by 4 to avoid overflow */ 00668 /* Read yc (real), xc(imag) input */ 00669 S0 = pSrc16[i2 * 2u] >> 2u; 00670 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 00671 00672 /* R0 = (ya + yc) */ 00673 R0 = __SSAT(T0 + S0, 16u); 00674 /* R1 = (xa + xc) */ 00675 R1 = __SSAT(T1 + S1, 16u); 00676 00677 /* S0 = (ya - yc) */ 00678 S0 = __SSAT(T0 - S0, 16); 00679 /* S1 = (xa - xc) */ 00680 S1 = __SSAT(T1 - S1, 16); 00681 00682 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00683 /* input is down scale by 4 to avoid overflow */ 00684 /* Read yb (real), xb(imag) input */ 00685 T0 = pSrc16[i1 * 2u] >> 2u; 00686 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 00687 00688 /* input is down scale by 4 to avoid overflow */ 00689 /* Read yd (real), xd(imag) input */ 00690 U0 = pSrc16[i3 * 2u] >> 2u; 00691 U1 = pSrc16[(i3 * 2u) + 1] >> 2u; 00692 00693 /* T0 = (yb + yd) */ 00694 T0 = __SSAT(T0 + U0, 16u); 00695 /* T1 = (xb + xd) */ 00696 T1 = __SSAT(T1 + U1, 16u); 00697 00698 /* writing the butterfly processed i0 sample */ 00699 /* ya' = ya + yb + yc + yd */ 00700 /* xa' = xa + xb + xc + xd */ 00701 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00702 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00703 00704 /* R0 = (ya + yc) - (yb + yd) */ 00705 /* R1 = (xa + xc) - (xb + xd) */ 00706 R0 = __SSAT(R0 - T0, 16u); 00707 R1 = __SSAT(R1 - T1, 16u); 00708 00709 /* co2 & si2 are read from Coefficient pointer */ 00710 Co2 = pCoef16[2u * ic * 2u]; 00711 Si2 = pCoef16[(2u * ic * 2u) + 1]; 00712 00713 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00714 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u); 00715 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00716 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u); 00717 00718 /* Reading i0+fftLen/4 */ 00719 /* input is down scale by 4 to avoid overflow */ 00720 /* T0 = yb, T1 = xb */ 00721 T0 = pSrc16[i1 * 2u] >> 2; 00722 T1 = pSrc16[(i1 * 2u) + 1] >> 2; 00723 00724 /* writing the butterfly processed i0 + fftLen/4 sample */ 00725 /* writing output(xc', yc') in little endian format */ 00726 pSrc16[i1 * 2u] = out1; 00727 pSrc16[(i1 * 2u) + 1] = out2; 00728 00729 /* Butterfly calculations */ 00730 /* input is down scale by 4 to avoid overflow */ 00731 /* U0 = yd, U1 = xd */ 00732 U0 = pSrc16[i3 * 2u] >> 2; 00733 U1 = pSrc16[(i3 * 2u) + 1] >> 2; 00734 /* T0 = yb-yd */ 00735 T0 = __SSAT(T0 - U0, 16); 00736 /* T1 = xb-xd */ 00737 T1 = __SSAT(T1 - U1, 16); 00738 00739 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */ 00740 R0 = (short) __SSAT((q31_t) (S0 - T1), 16); 00741 R1 = (short) __SSAT((q31_t) (S1 + T0), 16); 00742 00743 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */ 00744 S0 = (short) __SSAT(((q31_t) S0 + T1), 16u); 00745 S1 = (short) __SSAT(((q31_t) S1 - T0), 16u); 00746 00747 /* co1 & si1 are read from Coefficient pointer */ 00748 Co1 = pCoef16[ic * 2u]; 00749 Si1 = pCoef16[(ic * 2u) + 1]; 00750 /* Butterfly process for the i0+fftLen/2 sample */ 00751 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00752 out1 = (short) ((Si1 * S1 + Co1 * S0) >> 16); 00753 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00754 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16); 00755 00756 /* writing output(xb', yb') in little endian format */ 00757 pSrc16[i2 * 2u] = out1; 00758 pSrc16[(i2 * 2u) + 1] = out2; 00759 00760 /* Co3 & si3 are read from Coefficient pointer */ 00761 Co3 = pCoef16[3u * (ic * 2u)]; 00762 Si3 = pCoef16[(3u * (ic * 2u)) + 1]; 00763 /* Butterfly process for the i0+3fftLen/4 sample */ 00764 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00765 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u); 00766 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00767 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u); 00768 /* writing output(xd', yd') in little endian format */ 00769 pSrc16[i3 * 2u] = out1; 00770 pSrc16[(i3 * 2u) + 1] = out2; 00771 00772 /* Twiddle coefficients index modifier */ 00773 ic = ic + twidCoefModifier; 00774 00775 /* Updating input index */ 00776 i0 = i0 + 1u; 00777 00778 } while(--j); 00779 /* data is in 4.11(q11) format */ 00780 00781 /* end of first stage process */ 00782 00783 00784 /* start of middle stage process */ 00785 00786 /* Twiddle coefficients index modifier */ 00787 twidCoefModifier <<= 2u; 00788 00789 /* Calculation of Middle stage */ 00790 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00791 { 00792 /* Initializations for the middle stage */ 00793 n1 = n2; 00794 n2 >>= 2u; 00795 ic = 0u; 00796 00797 for (j = 0u; j <= (n2 - 1u); j++) 00798 { 00799 /* index calculation for the coefficients */ 00800 Co1 = pCoef16[ic * 2u]; 00801 Si1 = pCoef16[(ic * 2u) + 1u]; 00802 Co2 = pCoef16[2u * (ic * 2u)]; 00803 Si2 = pCoef16[(2u * (ic * 2u)) + 1u]; 00804 Co3 = pCoef16[3u * (ic * 2u)]; 00805 Si3 = pCoef16[(3u * (ic * 2u)) + 1u]; 00806 00807 /* Twiddle coefficients index modifier */ 00808 ic = ic + twidCoefModifier; 00809 00810 /* Butterfly implementation */ 00811 for (i0 = j; i0 < fftLen; i0 += n1) 00812 { 00813 /* index calculation for the input as, */ 00814 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00815 i1 = i0 + n2; 00816 i2 = i1 + n2; 00817 i3 = i2 + n2; 00818 00819 /* Reading i0, i0+fftLen/2 inputs */ 00820 /* Read ya (real), xa(imag) input */ 00821 T0 = pSrc16[i0 * 2u]; 00822 T1 = pSrc16[(i0 * 2u) + 1u]; 00823 00824 /* Read yc (real), xc(imag) input */ 00825 S0 = pSrc16[i2 * 2u]; 00826 S1 = pSrc16[(i2 * 2u) + 1u]; 00827 00828 /* R0 = (ya + yc), R1 = (xa + xc) */ 00829 R0 = __SSAT(T0 + S0, 16); 00830 R1 = __SSAT(T1 + S1, 16); 00831 00832 /* S0 = (ya - yc), S1 =(xa - xc) */ 00833 S0 = __SSAT(T0 - S0, 16); 00834 S1 = __SSAT(T1 - S1, 16); 00835 00836 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00837 /* Read yb (real), xb(imag) input */ 00838 T0 = pSrc16[i1 * 2u]; 00839 T1 = pSrc16[(i1 * 2u) + 1u]; 00840 00841 /* Read yd (real), xd(imag) input */ 00842 U0 = pSrc16[i3 * 2u]; 00843 U1 = pSrc16[(i3 * 2u) + 1u]; 00844 00845 00846 /* T0 = (yb + yd), T1 = (xb + xd) */ 00847 T0 = __SSAT(T0 + U0, 16); 00848 T1 = __SSAT(T1 + U1, 16); 00849 00850 /* writing the butterfly processed i0 sample */ 00851 00852 /* xa' = xa + xb + xc + xd */ 00853 /* ya' = ya + yb + yc + yd */ 00854 out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 00855 out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 00856 00857 pSrc16[i0 * 2u] = out1; 00858 pSrc16[(2u * i0) + 1u] = out2; 00859 00860 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00861 R0 = (R0 >> 1u) - (T0 >> 1u); 00862 R1 = (R1 >> 1u) - (T1 >> 1u); 00863 00864 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00865 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u); 00866 00867 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00868 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u); 00869 00870 /* Reading i0+3fftLen/4 */ 00871 /* Read yb (real), xb(imag) input */ 00872 T0 = pSrc16[i1 * 2u]; 00873 T1 = pSrc16[(i1 * 2u) + 1u]; 00874 00875 /* writing the butterfly processed i0 + fftLen/4 sample */ 00876 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00877 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00878 pSrc16[i1 * 2u] = out1; 00879 pSrc16[(i1 * 2u) + 1u] = out2; 00880 00881 /* Butterfly calculations */ 00882 00883 /* Read yd (real), xd(imag) input */ 00884 U0 = pSrc16[i3 * 2u]; 00885 U1 = pSrc16[(i3 * 2u) + 1u]; 00886 00887 /* T0 = yb-yd, T1 = xb-xd */ 00888 T0 = __SSAT(T0 - U0, 16); 00889 T1 = __SSAT(T1 - U1, 16); 00890 00891 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */ 00892 R0 = (S0 >> 1u) - (T1 >> 1u); 00893 R1 = (S1 >> 1u) + (T0 >> 1u); 00894 00895 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */ 00896 S0 = (S0 >> 1u) + (T1 >> 1u); 00897 S1 = (S1 >> 1u) - (T0 >> 1u); 00898 00899 /* Butterfly process for the i0+fftLen/2 sample */ 00900 out1 = (short) ((Co1 * S0 + Si1 * S1) >> 16u); 00901 00902 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16u); 00903 00904 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00905 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00906 pSrc16[i2 * 2u] = out1; 00907 pSrc16[(i2 * 2u) + 1u] = out2; 00908 00909 /* Butterfly process for the i0+3fftLen/4 sample */ 00910 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u); 00911 00912 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u); 00913 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00914 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00915 pSrc16[i3 * 2u] = out1; 00916 pSrc16[(i3 * 2u) + 1u] = out2; 00917 } 00918 } 00919 /* Twiddle coefficients index modifier */ 00920 twidCoefModifier <<= 2u; 00921 } 00922 /* end of middle stage process */ 00923 00924 00925 /* data is in 10.6(q6) format for the 1024 point */ 00926 /* data is in 8.8(q8) format for the 256 point */ 00927 /* data is in 6.10(q10) format for the 64 point */ 00928 /* data is in 4.12(q12) format for the 16 point */ 00929 00930 /* Initializations for the last stage */ 00931 n1 = n2; 00932 n2 >>= 2u; 00933 00934 /* start of last stage process */ 00935 00936 /* Butterfly implementation */ 00937 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 00938 { 00939 /* index calculation for the input as, */ 00940 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00941 i1 = i0 + n2; 00942 i2 = i1 + n2; 00943 i3 = i2 + n2; 00944 00945 /* Reading i0, i0+fftLen/2 inputs */ 00946 /* Read ya (real), xa(imag) input */ 00947 T0 = pSrc16[i0 * 2u]; 00948 T1 = pSrc16[(i0 * 2u) + 1u]; 00949 00950 /* Read yc (real), xc(imag) input */ 00951 S0 = pSrc16[i2 * 2u]; 00952 S1 = pSrc16[(i2 * 2u) + 1u]; 00953 00954 /* R0 = (ya + yc), R1 = (xa + xc) */ 00955 R0 = __SSAT(T0 + S0, 16u); 00956 R1 = __SSAT(T1 + S1, 16u); 00957 00958 /* S0 = (ya - yc), S1 = (xa - xc) */ 00959 S0 = __SSAT(T0 - S0, 16u); 00960 S1 = __SSAT(T1 - S1, 16u); 00961 00962 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00963 /* Read yb (real), xb(imag) input */ 00964 T0 = pSrc16[i1 * 2u]; 00965 T1 = pSrc16[(i1 * 2u) + 1u]; 00966 /* Read yd (real), xd(imag) input */ 00967 U0 = pSrc16[i3 * 2u]; 00968 U1 = pSrc16[(i3 * 2u) + 1u]; 00969 00970 /* T0 = (yb + yd), T1 = (xb + xd)) */ 00971 T0 = __SSAT(T0 + U0, 16u); 00972 T1 = __SSAT(T1 + U1, 16u); 00973 00974 /* writing the butterfly processed i0 sample */ 00975 /* xa' = xa + xb + xc + xd */ 00976 /* ya' = ya + yb + yc + yd */ 00977 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 00978 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 00979 00980 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00981 R0 = (R0 >> 1u) - (T0 >> 1u); 00982 R1 = (R1 >> 1u) - (T1 >> 1u); 00983 /* Read yb (real), xb(imag) input */ 00984 T0 = pSrc16[i1 * 2u]; 00985 T1 = pSrc16[(i1 * 2u) + 1u]; 00986 00987 /* writing the butterfly processed i0 + fftLen/4 sample */ 00988 /* xc' = (xa-xb+xc-xd) */ 00989 /* yc' = (ya-yb+yc-yd) */ 00990 pSrc16[i1 * 2u] = R0; 00991 pSrc16[(i1 * 2u) + 1u] = R1; 00992 00993 /* Read yd (real), xd(imag) input */ 00994 U0 = pSrc16[i3 * 2u]; 00995 U1 = pSrc16[(i3 * 2u) + 1u]; 00996 /* T0 = (yb - yd), T1 = (xb - xd) */ 00997 T0 = __SSAT(T0 - U0, 16u); 00998 T1 = __SSAT(T1 - U1, 16u); 00999 01000 /* writing the butterfly processed i0 + fftLen/2 sample */ 01001 /* xb' = (xa+yb-xc-yd) */ 01002 /* yb' = (ya-xb-yc+xd) */ 01003 pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u); 01004 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 01005 01006 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01007 /* xd' = (xa-yb-xc+yd) */ 01008 /* yd' = (ya+xb-yc-xd) */ 01009 pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u); 01010 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 01011 01012 } 01013 01014 /* end of last stage process */ 01015 01016 /* output is in 11.5(q5) format for the 1024 point */ 01017 /* output is in 9.7(q7) format for the 256 point */ 01018 /* output is in 7.9(q9) format for the 64 point */ 01019 /* output is in 5.11(q11) format for the 16 point */ 01020 01021 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 01022 01023 } 01024 01025 01026 /** 01027 * @brief Core function for the Q15 CIFFT butterfly process. 01028 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 01029 * @param[in] fftLen length of the FFT. 01030 * @param[in] *pCoef16 points to twiddle coefficient buffer. 01031 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 01032 * @return none. 01033 */ 01034 01035 /* 01036 * Radix-4 IFFT algorithm used is : 01037 * 01038 * CIFFT uses same twiddle coefficients as CFFT function 01039 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 01040 * 01041 * 01042 * IFFT is implemented with following changes in equations from FFT 01043 * 01044 * Input real and imaginary data: 01045 * x(n) = xa + j * ya 01046 * x(n+N/4 ) = xb + j * yb 01047 * x(n+N/2 ) = xc + j * yc 01048 * x(n+3N 4) = xd + j * yd 01049 * 01050 * 01051 * Output real and imaginary data: 01052 * x(4r) = xa'+ j * ya' 01053 * x(4r+1) = xb'+ j * yb' 01054 * x(4r+2) = xc'+ j * yc' 01055 * x(4r+3) = xd'+ j * yd' 01056 * 01057 * 01058 * Twiddle factors for radix-4 IFFT: 01059 * Wn = co1 + j * (si1) 01060 * W2n = co2 + j * (si2) 01061 * W3n = co3 + j * (si3) 01062 01063 * The real and imaginary output values for the radix-4 butterfly are 01064 * xa' = xa + xb + xc + xd 01065 * ya' = ya + yb + yc + yd 01066 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 01067 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 01068 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 01069 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 01070 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 01071 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 01072 * 01073 */ 01074 01075 void arm_radix4_butterfly_inverse_q15( 01076 q15_t * pSrc16, 01077 uint32_t fftLen, 01078 q15_t * pCoef16, 01079 uint32_t twidCoefModifier) 01080 { 01081 01082 #ifndef ARM_MATH_CM0_FAMILY 01083 01084 /* Run the below code for Cortex-M4 and Cortex-M3 */ 01085 01086 q31_t R, S, T, U; 01087 q31_t C1, C2, C3, out1, out2; 01088 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 01089 q15_t in; 01090 01091 q15_t *ptr1; 01092 01093 01094 01095 q31_t xaya, xbyb, xcyc, xdyd; 01096 01097 /* Total process is divided into three stages */ 01098 01099 /* process first stage, middle stages, & last stage */ 01100 01101 /* Initializations for the first stage */ 01102 n2 = fftLen; 01103 n1 = n2; 01104 01105 /* n2 = fftLen/4 */ 01106 n2 >>= 2u; 01107 01108 /* Index for twiddle coefficient */ 01109 ic = 0u; 01110 01111 /* Index for input read and output write */ 01112 i0 = 0u; 01113 j = n2; 01114 01115 /* Input is in 1.15(q15) format */ 01116 01117 /* start of first stage process */ 01118 do 01119 { 01120 /* Butterfly implementation */ 01121 01122 /* index calculation for the input as, */ 01123 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01124 i1 = i0 + n2; 01125 i2 = i1 + n2; 01126 i3 = i2 + n2; 01127 01128 /* Reading i0, i0+fftLen/2 inputs */ 01129 /* Read ya (real), xa(imag) input */ 01130 T = _SIMD32_OFFSET(pSrc16 + (2u * i0)); 01131 in = ((int16_t) (T & 0xFFFF)) >> 2; 01132 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01133 01134 /* Read yc (real), xc(imag) input */ 01135 S = _SIMD32_OFFSET(pSrc16 + (2u * i2)); 01136 in = ((int16_t) (S & 0xFFFF)) >> 2; 01137 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01138 01139 /* R = packed((ya + yc), (xa + xc) ) */ 01140 R = __QADD16(T, S); 01141 01142 /* S = packed((ya - yc), (xa - xc) ) */ 01143 S = __QSUB16(T, S); 01144 01145 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01146 /* Read yb (real), xb(imag) input */ 01147 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 01148 in = ((int16_t) (T & 0xFFFF)) >> 2; 01149 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01150 01151 /* Read yd (real), xd(imag) input */ 01152 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 01153 in = ((int16_t) (U & 0xFFFF)) >> 2; 01154 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01155 01156 /* T = packed((yb + yd), (xb + xd) ) */ 01157 T = __QADD16(T, U); 01158 01159 /* writing the butterfly processed i0 sample */ 01160 /* xa' = xa + xb + xc + xd */ 01161 /* ya' = ya + yb + yc + yd */ 01162 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T); 01163 01164 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 01165 R = __QSUB16(R, T); 01166 01167 /* co2 & si2 are read from SIMD Coefficient pointer */ 01168 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 01169 01170 #ifndef ARM_MATH_BIG_ENDIAN 01171 01172 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01173 out1 = __SMUSD(C2, R) >> 16u; 01174 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01175 out2 = __SMUADX(C2, R); 01176 01177 #else 01178 01179 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01180 out1 = __SMUADX(C2, R) >> 16u; 01181 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01182 out2 = __SMUSD(__QSUB16(0, C2), R); 01183 01184 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01185 01186 /* Reading i0+fftLen/4 */ 01187 /* T = packed(yb, xb) */ 01188 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 01189 in = ((int16_t) (T & 0xFFFF)) >> 2; 01190 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01191 01192 /* writing the butterfly processed i0 + fftLen/4 sample */ 01193 /* writing output(xc', yc') in little endian format */ 01194 _SIMD32_OFFSET(pSrc16 + (2u * i1)) = 01195 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01196 01197 /* Butterfly calculations */ 01198 /* U = packed(yd, xd) */ 01199 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 01200 in = ((int16_t) (U & 0xFFFF)) >> 2; 01201 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 01202 01203 /* T = packed(yb-yd, xb-xd) */ 01204 T = __QSUB16(T, U); 01205 01206 #ifndef ARM_MATH_BIG_ENDIAN 01207 01208 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01209 R = __QSAX(S, T); 01210 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01211 S = __QASX(S, T); 01212 01213 #else 01214 01215 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01216 R = __QASX(S, T); 01217 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01218 S = __QSAX(S, T); 01219 01220 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01221 01222 /* co1 & si1 are read from SIMD Coefficient pointer */ 01223 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 01224 /* Butterfly process for the i0+fftLen/2 sample */ 01225 01226 #ifndef ARM_MATH_BIG_ENDIAN 01227 01228 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01229 out1 = __SMUSD(C1, S) >> 16u; 01230 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01231 out2 = __SMUADX(C1, S); 01232 01233 #else 01234 01235 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01236 out1 = __SMUADX(C1, S) >> 16u; 01237 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01238 out2 = __SMUSD(__QSUB16(0, C1), S); 01239 01240 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01241 01242 /* writing output(xb', yb') in little endian format */ 01243 _SIMD32_OFFSET(pSrc16 + (2u * i2)) = 01244 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 01245 01246 01247 /* co3 & si3 are read from SIMD Coefficient pointer */ 01248 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 01249 /* Butterfly process for the i0+3fftLen/4 sample */ 01250 01251 #ifndef ARM_MATH_BIG_ENDIAN 01252 01253 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01254 out1 = __SMUSD(C3, R) >> 16u; 01255 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01256 out2 = __SMUADX(C3, R); 01257 01258 #else 01259 01260 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01261 out1 = __SMUADX(C3, R) >> 16u; 01262 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01263 out2 = __SMUSD(__QSUB16(0, C3), R); 01264 01265 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01266 01267 /* writing output(xd', yd') in little endian format */ 01268 _SIMD32_OFFSET(pSrc16 + (2u * i3)) = 01269 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01270 01271 /* Twiddle coefficients index modifier */ 01272 ic = ic + twidCoefModifier; 01273 01274 /* Updating input index */ 01275 i0 = i0 + 1u; 01276 01277 } while(--j); 01278 /* data is in 4.11(q11) format */ 01279 01280 /* end of first stage process */ 01281 01282 01283 /* start of middle stage process */ 01284 01285 /* Twiddle coefficients index modifier */ 01286 twidCoefModifier <<= 2u; 01287 01288 /* Calculation of Middle stage */ 01289 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01290 { 01291 /* Initializations for the middle stage */ 01292 n1 = n2; 01293 n2 >>= 2u; 01294 ic = 0u; 01295 01296 for (j = 0u; j <= (n2 - 1u); j++) 01297 { 01298 /* index calculation for the coefficients */ 01299 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic)); 01300 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic)); 01301 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic)); 01302 01303 /* Twiddle coefficients index modifier */ 01304 ic = ic + twidCoefModifier; 01305 01306 /* Butterfly implementation */ 01307 for (i0 = j; i0 < fftLen; i0 += n1) 01308 { 01309 /* index calculation for the input as, */ 01310 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01311 i1 = i0 + n2; 01312 i2 = i1 + n2; 01313 i3 = i2 + n2; 01314 01315 /* Reading i0, i0+fftLen/2 inputs */ 01316 /* Read ya (real), xa(imag) input */ 01317 T = _SIMD32_OFFSET(pSrc16 + (2u * i0)); 01318 01319 /* Read yc (real), xc(imag) input */ 01320 S = _SIMD32_OFFSET(pSrc16 + (2u * i2)); 01321 01322 /* R = packed( (ya + yc), (xa + xc)) */ 01323 R = __QADD16(T, S); 01324 01325 /* S = packed((ya - yc), (xa - xc)) */ 01326 S = __QSUB16(T, S); 01327 01328 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01329 /* Read yb (real), xb(imag) input */ 01330 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 01331 01332 /* Read yd (real), xd(imag) input */ 01333 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 01334 01335 /* T = packed( (yb + yd), (xb + xd)) */ 01336 T = __QADD16(T, U); 01337 01338 /* writing the butterfly processed i0 sample */ 01339 01340 /* xa' = xa + xb + xc + xd */ 01341 /* ya' = ya + yb + yc + yd */ 01342 out1 = __SHADD16(R, T); 01343 in = ((int16_t) (out1 & 0xFFFF)) >> 1; 01344 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 01345 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1; 01346 01347 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 01348 R = __SHSUB16(R, T); 01349 01350 #ifndef ARM_MATH_BIG_ENDIAN 01351 01352 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01353 out1 = __SMUSD(C2, R) >> 16u; 01354 01355 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01356 out2 = __SMUADX(C2, R); 01357 01358 #else 01359 01360 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01361 out1 = __SMUADX(R, C2) >> 16u; 01362 01363 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01364 out2 = __SMUSD(__QSUB16(0, C2), R); 01365 01366 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01367 01368 /* Reading i0+3fftLen/4 */ 01369 /* Read yb (real), xb(imag) input */ 01370 T = _SIMD32_OFFSET(pSrc16 + (2u * i1)); 01371 01372 /* writing the butterfly processed i0 + fftLen/4 sample */ 01373 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01374 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01375 _SIMD32_OFFSET(pSrc16 + (2u * i1)) = 01376 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01377 01378 /* Butterfly calculations */ 01379 01380 /* Read yd (real), xd(imag) input */ 01381 U = _SIMD32_OFFSET(pSrc16 + (2u * i3)); 01382 01383 /* T = packed(yb-yd, xb-xd) */ 01384 T = __QSUB16(T, U); 01385 01386 #ifndef ARM_MATH_BIG_ENDIAN 01387 01388 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01389 R = __SHSAX(S, T); 01390 01391 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01392 S = __SHASX(S, T); 01393 01394 01395 /* Butterfly process for the i0+fftLen/2 sample */ 01396 out1 = __SMUSD(C1, S) >> 16u; 01397 out2 = __SMUADX(C1, S); 01398 01399 #else 01400 01401 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01402 R = __SHASX(S, T); 01403 01404 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01405 S = __SHSAX(S, T); 01406 01407 01408 /* Butterfly process for the i0+fftLen/2 sample */ 01409 out1 = __SMUADX(S, C1) >> 16u; 01410 out2 = __SMUSD(__QSUB16(0, C1), S); 01411 01412 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01413 01414 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01415 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01416 _SIMD32_OFFSET(pSrc16 + (2u * i2)) = 01417 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01418 01419 /* Butterfly process for the i0+3fftLen/4 sample */ 01420 01421 #ifndef ARM_MATH_BIG_ENDIAN 01422 01423 out1 = __SMUSD(C3, R) >> 16u; 01424 out2 = __SMUADX(C3, R); 01425 01426 #else 01427 01428 out1 = __SMUADX(C3, R) >> 16u; 01429 out2 = __SMUSD(__QSUB16(0, C3), R); 01430 01431 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01432 01433 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01434 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01435 _SIMD32_OFFSET(pSrc16 + (2u * i3)) = 01436 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01437 } 01438 } 01439 /* Twiddle coefficients index modifier */ 01440 twidCoefModifier <<= 2u; 01441 } 01442 /* end of middle stage process */ 01443 01444 /* data is in 10.6(q6) format for the 1024 point */ 01445 /* data is in 8.8(q8) format for the 256 point */ 01446 /* data is in 6.10(q10) format for the 64 point */ 01447 /* data is in 4.12(q12) format for the 16 point */ 01448 01449 /* Initializations for the last stage */ 01450 j = fftLen >> 2; 01451 01452 ptr1 = &pSrc16[0]; 01453 01454 /* start of last stage process */ 01455 01456 /* Butterfly implementation */ 01457 do 01458 { 01459 /* Read xa (real), ya(imag) input */ 01460 xaya = *__SIMD32(ptr1)++; 01461 01462 /* Read xb (real), yb(imag) input */ 01463 xbyb = *__SIMD32(ptr1)++; 01464 01465 /* Read xc (real), yc(imag) input */ 01466 xcyc = *__SIMD32(ptr1)++; 01467 01468 /* Read xd (real), yd(imag) input */ 01469 xdyd = *__SIMD32(ptr1)++; 01470 01471 /* R = packed((ya + yc), (xa + xc)) */ 01472 R = __QADD16(xaya, xcyc); 01473 01474 /* T = packed((yb + yd), (xb + xd)) */ 01475 T = __QADD16(xbyb, xdyd); 01476 01477 /* pointer updation for writing */ 01478 ptr1 = ptr1 - 8u; 01479 01480 01481 /* xa' = xa + xb + xc + xd */ 01482 /* ya' = ya + yb + yc + yd */ 01483 *__SIMD32(ptr1)++ = __SHADD16(R, T); 01484 01485 /* T = packed((yb + yd), (xb + xd)) */ 01486 T = __QADD16(xbyb, xdyd); 01487 01488 /* xc' = (xa-xb+xc-xd) */ 01489 /* yc' = (ya-yb+yc-yd) */ 01490 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 01491 01492 /* S = packed((ya - yc), (xa - xc)) */ 01493 S = __QSUB16(xaya, xcyc); 01494 01495 /* Read yd (real), xd(imag) input */ 01496 /* T = packed( (yb - yd), (xb - xd)) */ 01497 U = __QSUB16(xbyb, xdyd); 01498 01499 #ifndef ARM_MATH_BIG_ENDIAN 01500 01501 /* xb' = (xa+yb-xc-yd) */ 01502 /* yb' = (ya-xb-yc+xd) */ 01503 *__SIMD32(ptr1)++ = __SHASX(S, U); 01504 01505 01506 /* xd' = (xa-yb-xc+yd) */ 01507 /* yd' = (ya+xb-yc-xd) */ 01508 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01509 01510 #else 01511 01512 /* xb' = (xa+yb-xc-yd) */ 01513 /* yb' = (ya-xb-yc+xd) */ 01514 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01515 01516 01517 /* xd' = (xa-yb-xc+yd) */ 01518 /* yd' = (ya+xb-yc-xd) */ 01519 *__SIMD32(ptr1)++ = __SHASX(S, U); 01520 01521 01522 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01523 01524 } while(--j); 01525 01526 /* end of last stage process */ 01527 01528 /* output is in 11.5(q5) format for the 1024 point */ 01529 /* output is in 9.7(q7) format for the 256 point */ 01530 /* output is in 7.9(q9) format for the 64 point */ 01531 /* output is in 5.11(q11) format for the 16 point */ 01532 01533 01534 #else 01535 01536 /* Run the below code for Cortex-M0 */ 01537 01538 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 01539 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 01540 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 01541 01542 /* Total process is divided into three stages */ 01543 01544 /* process first stage, middle stages, & last stage */ 01545 01546 /* Initializations for the first stage */ 01547 n2 = fftLen; 01548 n1 = n2; 01549 01550 /* n2 = fftLen/4 */ 01551 n2 >>= 2u; 01552 01553 /* Index for twiddle coefficient */ 01554 ic = 0u; 01555 01556 /* Index for input read and output write */ 01557 i0 = 0u; 01558 01559 j = n2; 01560 01561 /* Input is in 1.15(q15) format */ 01562 01563 /* Start of first stage process */ 01564 do 01565 { 01566 /* Butterfly implementation */ 01567 01568 /* index calculation for the input as, */ 01569 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01570 i1 = i0 + n2; 01571 i2 = i1 + n2; 01572 i3 = i2 + n2; 01573 01574 /* Reading i0, i0+fftLen/2 inputs */ 01575 /* input is down scale by 4 to avoid overflow */ 01576 /* Read ya (real), xa(imag) input */ 01577 T0 = pSrc16[i0 * 2u] >> 2u; 01578 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u; 01579 /* input is down scale by 4 to avoid overflow */ 01580 /* Read yc (real), xc(imag) input */ 01581 S0 = pSrc16[i2 * 2u] >> 2u; 01582 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u; 01583 01584 /* R0 = (ya + yc), R1 = (xa + xc) */ 01585 R0 = __SSAT(T0 + S0, 16u); 01586 R1 = __SSAT(T1 + S1, 16u); 01587 /* S0 = (ya - yc), S1 = (xa - xc) */ 01588 S0 = __SSAT(T0 - S0, 16u); 01589 S1 = __SSAT(T1 - S1, 16u); 01590 01591 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01592 /* input is down scale by 4 to avoid overflow */ 01593 /* Read yb (real), xb(imag) input */ 01594 T0 = pSrc16[i1 * 2u] >> 2u; 01595 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01596 /* Read yd (real), xd(imag) input */ 01597 /* input is down scale by 4 to avoid overflow */ 01598 U0 = pSrc16[i3 * 2u] >> 2u; 01599 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01600 01601 /* T0 = (yb + yd), T1 = (xb + xd) */ 01602 T0 = __SSAT(T0 + U0, 16u); 01603 T1 = __SSAT(T1 + U1, 16u); 01604 01605 /* writing the butterfly processed i0 sample */ 01606 /* xa' = xa + xb + xc + xd */ 01607 /* ya' = ya + yb + yc + yd */ 01608 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01609 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01610 01611 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */ 01612 R0 = __SSAT(R0 - T0, 16u); 01613 R1 = __SSAT(R1 - T1, 16u); 01614 /* co2 & si2 are read from Coefficient pointer */ 01615 Co2 = pCoef16[2u * ic * 2u]; 01616 Si2 = pCoef16[(2u * ic * 2u) + 1u]; 01617 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01618 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16u); 01619 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01620 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16u); 01621 01622 /* Reading i0+fftLen/4 */ 01623 /* input is down scale by 4 to avoid overflow */ 01624 /* T0 = yb, T1 = xb */ 01625 T0 = pSrc16[i1 * 2u] >> 2u; 01626 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u; 01627 01628 /* writing the butterfly processed i0 + fftLen/4 sample */ 01629 /* writing output(xc', yc') in little endian format */ 01630 pSrc16[i1 * 2u] = out1; 01631 pSrc16[(i1 * 2u) + 1u] = out2; 01632 01633 /* Butterfly calculations */ 01634 /* input is down scale by 4 to avoid overflow */ 01635 /* U0 = yd, U1 = xd) */ 01636 U0 = pSrc16[i3 * 2u] >> 2u; 01637 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u; 01638 01639 /* T0 = yb-yd, T1 = xb-xd) */ 01640 T0 = __SSAT(T0 - U0, 16u); 01641 T1 = __SSAT(T1 - U1, 16u); 01642 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01643 R0 = (short) __SSAT((q31_t) (S0 + T1), 16); 01644 R1 = (short) __SSAT((q31_t) (S1 - T0), 16); 01645 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01646 S0 = (short) __SSAT((q31_t) (S0 - T1), 16); 01647 S1 = (short) __SSAT((q31_t) (S1 + T0), 16); 01648 01649 /* co1 & si1 are read from Coefficient pointer */ 01650 Co1 = pCoef16[ic * 2u]; 01651 Si1 = pCoef16[(ic * 2u) + 1u]; 01652 /* Butterfly process for the i0+fftLen/2 sample */ 01653 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01654 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u); 01655 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01656 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u); 01657 /* writing output(xb', yb') in little endian format */ 01658 pSrc16[i2 * 2u] = out1; 01659 pSrc16[(i2 * 2u) + 1u] = out2; 01660 01661 /* Co3 & si3 are read from Coefficient pointer */ 01662 Co3 = pCoef16[3u * ic * 2u]; 01663 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01664 /* Butterfly process for the i0+3fftLen/4 sample */ 01665 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01666 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u); 01667 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01668 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u); 01669 /* writing output(xd', yd') in little endian format */ 01670 pSrc16[i3 * 2u] = out1; 01671 pSrc16[(i3 * 2u) + 1u] = out2; 01672 01673 /* Twiddle coefficients index modifier */ 01674 ic = ic + twidCoefModifier; 01675 01676 /* Updating input index */ 01677 i0 = i0 + 1u; 01678 01679 } while(--j); 01680 01681 /* End of first stage process */ 01682 01683 /* data is in 4.11(q11) format */ 01684 01685 01686 /* Start of Middle stage process */ 01687 01688 /* Twiddle coefficients index modifier */ 01689 twidCoefModifier <<= 2u; 01690 01691 /* Calculation of Middle stage */ 01692 for (k = fftLen / 4u; k > 4u; k >>= 2u) 01693 { 01694 /* Initializations for the middle stage */ 01695 n1 = n2; 01696 n2 >>= 2u; 01697 ic = 0u; 01698 01699 for (j = 0u; j <= (n2 - 1u); j++) 01700 { 01701 /* index calculation for the coefficients */ 01702 Co1 = pCoef16[ic * 2u]; 01703 Si1 = pCoef16[(ic * 2u) + 1u]; 01704 Co2 = pCoef16[2u * ic * 2u]; 01705 Si2 = pCoef16[2u * ic * 2u + 1u]; 01706 Co3 = pCoef16[3u * ic * 2u]; 01707 Si3 = pCoef16[(3u * ic * 2u) + 1u]; 01708 01709 /* Twiddle coefficients index modifier */ 01710 ic = ic + twidCoefModifier; 01711 01712 /* Butterfly implementation */ 01713 for (i0 = j; i0 < fftLen; i0 += n1) 01714 { 01715 /* index calculation for the input as, */ 01716 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01717 i1 = i0 + n2; 01718 i2 = i1 + n2; 01719 i3 = i2 + n2; 01720 01721 /* Reading i0, i0+fftLen/2 inputs */ 01722 /* Read ya (real), xa(imag) input */ 01723 T0 = pSrc16[i0 * 2u]; 01724 T1 = pSrc16[(i0 * 2u) + 1u]; 01725 01726 /* Read yc (real), xc(imag) input */ 01727 S0 = pSrc16[i2 * 2u]; 01728 S1 = pSrc16[(i2 * 2u) + 1u]; 01729 01730 01731 /* R0 = (ya + yc), R1 = (xa + xc) */ 01732 R0 = __SSAT(T0 + S0, 16u); 01733 R1 = __SSAT(T1 + S1, 16u); 01734 /* S0 = (ya - yc), S1 = (xa - xc) */ 01735 S0 = __SSAT(T0 - S0, 16u); 01736 S1 = __SSAT(T1 - S1, 16u); 01737 01738 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01739 /* Read yb (real), xb(imag) input */ 01740 T0 = pSrc16[i1 * 2u]; 01741 T1 = pSrc16[(i1 * 2u) + 1u]; 01742 01743 /* Read yd (real), xd(imag) input */ 01744 U0 = pSrc16[i3 * 2u]; 01745 U1 = pSrc16[(i3 * 2u) + 1u]; 01746 01747 /* T0 = (yb + yd), T1 = (xb + xd) */ 01748 T0 = __SSAT(T0 + U0, 16u); 01749 T1 = __SSAT(T1 + U1, 16u); 01750 01751 /* writing the butterfly processed i0 sample */ 01752 /* xa' = xa + xb + xc + xd */ 01753 /* ya' = ya + yb + yc + yd */ 01754 pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u; 01755 pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u; 01756 01757 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01758 R0 = (R0 >> 1u) - (T0 >> 1u); 01759 R1 = (R1 >> 1u) - (T1 >> 1u); 01760 01761 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 01762 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16); 01763 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01764 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16); 01765 01766 /* Reading i0+3fftLen/4 */ 01767 /* Read yb (real), xb(imag) input */ 01768 T0 = pSrc16[i1 * 2u]; 01769 T1 = pSrc16[(i1 * 2u) + 1u]; 01770 01771 /* writing the butterfly processed i0 + fftLen/4 sample */ 01772 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01773 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01774 pSrc16[i1 * 2u] = out1; 01775 pSrc16[(i1 * 2u) + 1u] = out2; 01776 01777 /* Butterfly calculations */ 01778 /* Read yd (real), xd(imag) input */ 01779 U0 = pSrc16[i3 * 2u]; 01780 U1 = pSrc16[(i3 * 2u) + 1u]; 01781 01782 /* T0 = yb-yd, T1 = xb-xd) */ 01783 T0 = __SSAT(T0 - U0, 16u); 01784 T1 = __SSAT(T1 - U1, 16u); 01785 01786 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01787 R0 = (S0 >> 1u) + (T1 >> 1u); 01788 R1 = (S1 >> 1u) - (T0 >> 1u); 01789 01790 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01791 S0 = (S0 >> 1u) - (T1 >> 1u); 01792 S1 = (S1 >> 1u) + (T0 >> 1u); 01793 01794 /* Butterfly process for the i0+fftLen/2 sample */ 01795 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u); 01796 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u); 01797 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01798 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01799 pSrc16[i2 * 2u] = out1; 01800 pSrc16[(i2 * 2u) + 1u] = out2; 01801 01802 /* Butterfly process for the i0+3fftLen/4 sample */ 01803 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u); 01804 01805 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u); 01806 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01807 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01808 pSrc16[i3 * 2u] = out1; 01809 pSrc16[(i3 * 2u) + 1u] = out2; 01810 01811 01812 } 01813 } 01814 /* Twiddle coefficients index modifier */ 01815 twidCoefModifier <<= 2u; 01816 } 01817 /* End of Middle stages process */ 01818 01819 01820 /* data is in 10.6(q6) format for the 1024 point */ 01821 /* data is in 8.8(q8) format for the 256 point */ 01822 /* data is in 6.10(q10) format for the 64 point */ 01823 /* data is in 4.12(q12) format for the 16 point */ 01824 01825 /* start of last stage process */ 01826 01827 01828 /* Initializations for the last stage */ 01829 n1 = n2; 01830 n2 >>= 2u; 01831 01832 /* Butterfly implementation */ 01833 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 01834 { 01835 /* index calculation for the input as, */ 01836 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01837 i1 = i0 + n2; 01838 i2 = i1 + n2; 01839 i3 = i2 + n2; 01840 01841 /* Reading i0, i0+fftLen/2 inputs */ 01842 /* Read ya (real), xa(imag) input */ 01843 T0 = pSrc16[i0 * 2u]; 01844 T1 = pSrc16[(i0 * 2u) + 1u]; 01845 /* Read yc (real), xc(imag) input */ 01846 S0 = pSrc16[i2 * 2u]; 01847 S1 = pSrc16[(i2 * 2u) + 1u]; 01848 01849 /* R0 = (ya + yc), R1 = (xa + xc) */ 01850 R0 = __SSAT(T0 + S0, 16u); 01851 R1 = __SSAT(T1 + S1, 16u); 01852 /* S0 = (ya - yc), S1 = (xa - xc) */ 01853 S0 = __SSAT(T0 - S0, 16u); 01854 S1 = __SSAT(T1 - S1, 16u); 01855 01856 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01857 /* Read yb (real), xb(imag) input */ 01858 T0 = pSrc16[i1 * 2u]; 01859 T1 = pSrc16[(i1 * 2u) + 1u]; 01860 /* Read yd (real), xd(imag) input */ 01861 U0 = pSrc16[i3 * 2u]; 01862 U1 = pSrc16[(i3 * 2u) + 1u]; 01863 01864 /* T0 = (yb + yd), T1 = (xb + xd) */ 01865 T0 = __SSAT(T0 + U0, 16u); 01866 T1 = __SSAT(T1 + U1, 16u); 01867 01868 /* writing the butterfly processed i0 sample */ 01869 /* xa' = xa + xb + xc + xd */ 01870 /* ya' = ya + yb + yc + yd */ 01871 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u); 01872 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u); 01873 01874 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01875 R0 = (R0 >> 1u) - (T0 >> 1u); 01876 R1 = (R1 >> 1u) - (T1 >> 1u); 01877 01878 /* Read yb (real), xb(imag) input */ 01879 T0 = pSrc16[i1 * 2u]; 01880 T1 = pSrc16[(i1 * 2u) + 1u]; 01881 01882 /* writing the butterfly processed i0 + fftLen/4 sample */ 01883 /* xc' = (xa-xb+xc-xd) */ 01884 /* yc' = (ya-yb+yc-yd) */ 01885 pSrc16[i1 * 2u] = R0; 01886 pSrc16[(i1 * 2u) + 1u] = R1; 01887 01888 /* Read yd (real), xd(imag) input */ 01889 U0 = pSrc16[i3 * 2u]; 01890 U1 = pSrc16[(i3 * 2u) + 1u]; 01891 /* T0 = (yb - yd), T1 = (xb - xd) */ 01892 T0 = __SSAT(T0 - U0, 16u); 01893 T1 = __SSAT(T1 - U1, 16u); 01894 01895 /* writing the butterfly processed i0 + fftLen/2 sample */ 01896 /* xb' = (xa-yb-xc+yd) */ 01897 /* yb' = (ya+xb-yc-xd) */ 01898 pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u); 01899 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u); 01900 01901 01902 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01903 /* xd' = (xa+yb-xc-yd) */ 01904 /* yd' = (ya-xb-yc+xd) */ 01905 pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u); 01906 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u); 01907 } 01908 /* end of last stage process */ 01909 01910 /* output is in 11.5(q5) format for the 1024 point */ 01911 /* output is in 9.7(q7) format for the 256 point */ 01912 /* output is in 7.9(q9) format for the 64 point */ 01913 /* output is in 5.11(q11) format for the 16 point */ 01914 01915 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 01916 01917 }
Generated on Tue Jul 12 2022 12:36:53 by 1.7.2