Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_cfft_radix4_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_cfft_radix4_q15.c 00004 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00005 * In-place bit reversal using bit reversal table 00006 * 00007 * $Date: 27. January 2017 00008 * $Revision: V.1.5.1 00009 * 00010 * Target Processor: Cortex-M cores 00011 * -------------------------------------------------------------------- */ 00012 /* 00013 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00014 * 00015 * SPDX-License-Identifier: Apache-2.0 00016 * 00017 * Licensed under the Apache License, Version 2.0 (the License); you may 00018 * not use this file except in compliance with the License. 00019 * You may obtain a copy of the License at 00020 * 00021 * www.apache.org/licenses/LICENSE-2.0 00022 * 00023 * Unless required by applicable law or agreed to in writing, software 00024 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00025 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00026 * See the License for the specific language governing permissions and 00027 * limitations under the License. 00028 */ 00029 00030 #include "arm_math.h" 00031 00032 00033 void arm_radix4_butterfly_q15( 00034 q15_t * pSrc16, 00035 uint32_t fftLen, 00036 q15_t * pCoef16, 00037 uint32_t twidCoefModifier); 00038 00039 void arm_radix4_butterfly_inverse_q15( 00040 q15_t * pSrc16, 00041 uint32_t fftLen, 00042 q15_t * pCoef16, 00043 uint32_t twidCoefModifier); 00044 00045 void arm_bitreversal_q15( 00046 q15_t * pSrc, 00047 uint32_t fftLen, 00048 uint16_t bitRevFactor, 00049 uint16_t * pBitRevTab); 00050 00051 /** 00052 * @ingroup groupTransforms 00053 */ 00054 00055 /** 00056 * @addtogroup ComplexFFT 00057 * @{ 00058 */ 00059 00060 00061 /** 00062 * @details 00063 * @brief Processing function for the Q15 CFFT/CIFFT. 00064 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed 00065 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure. 00066 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place. 00067 * @return none. 00068 * 00069 * \par Input and output formats: 00070 * \par 00071 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00072 * Hence the output format is different for different FFT sizes. 00073 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00074 * \par 00075 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT" 00076 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT" 00077 */ 00078 00079 void arm_cfft_radix4_q15( 00080 const arm_cfft_radix4_instance_q15 * S, 00081 q15_t * pSrc) 00082 { 00083 if (S->ifftFlag == 1U) 00084 { 00085 /* Complex IFFT radix-4 */ 00086 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); 00087 } 00088 else 00089 { 00090 /* Complex FFT radix-4 */ 00091 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); 00092 } 00093 00094 if (S->bitReverseFlag == 1U) 00095 { 00096 /* Bit Reversal */ 00097 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00098 } 00099 00100 } 00101 00102 /** 00103 * @} end of ComplexFFT group 00104 */ 00105 00106 /* 00107 * Radix-4 FFT algorithm used is : 00108 * 00109 * Input real and imaginary data: 00110 * x(n) = xa + j * ya 00111 * x(n+N/4 ) = xb + j * yb 00112 * x(n+N/2 ) = xc + j * yc 00113 * x(n+3N 4) = xd + j * yd 00114 * 00115 * 00116 * Output real and imaginary data: 00117 * x(4r) = xa'+ j * ya' 00118 * x(4r+1) = xb'+ j * yb' 00119 * x(4r+2) = xc'+ j * yc' 00120 * x(4r+3) = xd'+ j * yd' 00121 * 00122 * 00123 * Twiddle factors for radix-4 FFT: 00124 * Wn = co1 + j * (- si1) 00125 * W2n = co2 + j * (- si2) 00126 * W3n = co3 + j * (- si3) 00127 00128 * The real and imaginary output values for the radix-4 butterfly are 00129 * xa' = xa + xb + xc + xd 00130 * ya' = ya + yb + yc + yd 00131 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00132 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00133 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00134 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00135 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00136 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00137 * 00138 */ 00139 00140 /** 00141 * @brief Core function for the Q15 CFFT butterfly process. 00142 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 00143 * @param[in] fftLen length of the FFT. 00144 * @param[in] *pCoef16 points to twiddle coefficient buffer. 00145 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00146 * @return none. 00147 */ 00148 00149 void arm_radix4_butterfly_q15( 00150 q15_t * pSrc16, 00151 uint32_t fftLen, 00152 q15_t * pCoef16, 00153 uint32_t twidCoefModifier) 00154 { 00155 00156 #if defined (ARM_MATH_DSP) 00157 00158 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00159 00160 q31_t R, S, T, U; 00161 q31_t C1, C2, C3, out1, out2; 00162 uint32_t n1, n2, ic, i0, j, k; 00163 00164 q15_t *ptr1; 00165 q15_t *pSi0; 00166 q15_t *pSi1; 00167 q15_t *pSi2; 00168 q15_t *pSi3; 00169 00170 q31_t xaya, xbyb, xcyc, xdyd; 00171 00172 /* Total process is divided into three stages */ 00173 00174 /* process first stage, middle stages, & last stage */ 00175 00176 /* Initializations for the first stage */ 00177 n2 = fftLen; 00178 n1 = n2; 00179 00180 /* n2 = fftLen/4 */ 00181 n2 >>= 2U; 00182 00183 /* Index for twiddle coefficient */ 00184 ic = 0U; 00185 00186 /* Index for input read and output write */ 00187 j = n2; 00188 00189 pSi0 = pSrc16; 00190 pSi1 = pSi0 + 2 * n2; 00191 pSi2 = pSi1 + 2 * n2; 00192 pSi3 = pSi2 + 2 * n2; 00193 00194 /* Input is in 1.15(q15) format */ 00195 00196 /* start of first stage process */ 00197 do 00198 { 00199 /* Butterfly implementation */ 00200 00201 /* Reading i0, i0+fftLen/2 inputs */ 00202 /* Read ya (real), xa(imag) input */ 00203 T = _SIMD32_OFFSET(pSi0); 00204 T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1 00205 T = __SHADD16(T, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles 00206 //in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles 00207 //T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 00208 00209 /* Read yc (real), xc(imag) input */ 00210 S = _SIMD32_OFFSET(pSi2); 00211 S = __SHADD16(S, 0); 00212 S = __SHADD16(S, 0); 00213 00214 /* R = packed((ya + yc), (xa + xc) ) */ 00215 R = __QADD16(T, S); 00216 00217 /* S = packed((ya - yc), (xa - xc) ) */ 00218 S = __QSUB16(T, S); 00219 00220 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00221 /* Read yb (real), xb(imag) input */ 00222 T = _SIMD32_OFFSET(pSi1); 00223 T = __SHADD16(T, 0); 00224 T = __SHADD16(T, 0); 00225 00226 /* Read yd (real), xd(imag) input */ 00227 U = _SIMD32_OFFSET(pSi3); 00228 U = __SHADD16(U, 0); 00229 U = __SHADD16(U, 0); 00230 00231 /* T = packed((yb + yd), (xb + xd) ) */ 00232 T = __QADD16(T, U); 00233 00234 /* writing the butterfly processed i0 sample */ 00235 /* xa' = xa + xb + xc + xd */ 00236 /* ya' = ya + yb + yc + yd */ 00237 _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); 00238 pSi0 += 2; 00239 00240 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 00241 R = __QSUB16(R, T); 00242 00243 /* co2 & si2 are read from SIMD Coefficient pointer */ 00244 C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); 00245 00246 #ifndef ARM_MATH_BIG_ENDIAN 00247 00248 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00249 out1 = __SMUAD(C2, R) >> 16U; 00250 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00251 out2 = __SMUSDX(C2, R); 00252 00253 #else 00254 00255 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00256 out1 = __SMUSDX(R, C2) >> 16U; 00257 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00258 out2 = __SMUAD(C2, R); 00259 00260 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00261 00262 /* Reading i0+fftLen/4 */ 00263 /* T = packed(yb, xb) */ 00264 T = _SIMD32_OFFSET(pSi1); 00265 T = __SHADD16(T, 0); 00266 T = __SHADD16(T, 0); 00267 00268 /* writing the butterfly processed i0 + fftLen/4 sample */ 00269 /* writing output(xc', yc') in little endian format */ 00270 _SIMD32_OFFSET(pSi1) = 00271 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00272 pSi1 += 2; 00273 00274 /* Butterfly calculations */ 00275 /* U = packed(yd, xd) */ 00276 U = _SIMD32_OFFSET(pSi3); 00277 U = __SHADD16(U, 0); 00278 U = __SHADD16(U, 0); 00279 00280 /* T = packed(yb-yd, xb-xd) */ 00281 T = __QSUB16(T, U); 00282 00283 #ifndef ARM_MATH_BIG_ENDIAN 00284 00285 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00286 R = __QASX(S, T); 00287 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00288 S = __QSAX(S, T); 00289 00290 #else 00291 00292 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00293 R = __QSAX(S, T); 00294 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00295 S = __QASX(S, T); 00296 00297 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00298 00299 /* co1 & si1 are read from SIMD Coefficient pointer */ 00300 C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); 00301 /* Butterfly process for the i0+fftLen/2 sample */ 00302 00303 #ifndef ARM_MATH_BIG_ENDIAN 00304 00305 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00306 out1 = __SMUAD(C1, S) >> 16U; 00307 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00308 out2 = __SMUSDX(C1, S); 00309 00310 #else 00311 00312 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00313 out1 = __SMUSDX(S, C1) >> 16U; 00314 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00315 out2 = __SMUAD(C1, S); 00316 00317 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00318 00319 /* writing output(xb', yb') in little endian format */ 00320 _SIMD32_OFFSET(pSi2) = 00321 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 00322 pSi2 += 2; 00323 00324 00325 /* co3 & si3 are read from SIMD Coefficient pointer */ 00326 C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); 00327 /* Butterfly process for the i0+3fftLen/4 sample */ 00328 00329 #ifndef ARM_MATH_BIG_ENDIAN 00330 00331 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00332 out1 = __SMUAD(C3, R) >> 16U; 00333 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00334 out2 = __SMUSDX(C3, R); 00335 00336 #else 00337 00338 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00339 out1 = __SMUSDX(R, C3) >> 16U; 00340 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00341 out2 = __SMUAD(C3, R); 00342 00343 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00344 00345 /* writing output(xd', yd') in little endian format */ 00346 _SIMD32_OFFSET(pSi3) = 00347 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00348 pSi3 += 2; 00349 00350 /* Twiddle coefficients index modifier */ 00351 ic = ic + twidCoefModifier; 00352 00353 } while (--j); 00354 /* data is in 4.11(q11) format */ 00355 00356 /* end of first stage process */ 00357 00358 00359 /* start of middle stage process */ 00360 00361 /* Twiddle coefficients index modifier */ 00362 twidCoefModifier <<= 2U; 00363 00364 /* Calculation of Middle stage */ 00365 for (k = fftLen / 4U; k > 4U; k >>= 2U) 00366 { 00367 /* Initializations for the middle stage */ 00368 n1 = n2; 00369 n2 >>= 2U; 00370 ic = 0U; 00371 00372 for (j = 0U; j <= (n2 - 1U); j++) 00373 { 00374 /* index calculation for the coefficients */ 00375 C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); 00376 C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); 00377 C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); 00378 00379 /* Twiddle coefficients index modifier */ 00380 ic = ic + twidCoefModifier; 00381 00382 pSi0 = pSrc16 + 2 * j; 00383 pSi1 = pSi0 + 2 * n2; 00384 pSi2 = pSi1 + 2 * n2; 00385 pSi3 = pSi2 + 2 * n2; 00386 00387 /* Butterfly implementation */ 00388 for (i0 = j; i0 < fftLen; i0 += n1) 00389 { 00390 /* Reading i0, i0+fftLen/2 inputs */ 00391 /* Read ya (real), xa(imag) input */ 00392 T = _SIMD32_OFFSET(pSi0); 00393 00394 /* Read yc (real), xc(imag) input */ 00395 S = _SIMD32_OFFSET(pSi2); 00396 00397 /* R = packed( (ya + yc), (xa + xc)) */ 00398 R = __QADD16(T, S); 00399 00400 /* S = packed((ya - yc), (xa - xc)) */ 00401 S = __QSUB16(T, S); 00402 00403 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00404 /* Read yb (real), xb(imag) input */ 00405 T = _SIMD32_OFFSET(pSi1); 00406 00407 /* Read yd (real), xd(imag) input */ 00408 U = _SIMD32_OFFSET(pSi3); 00409 00410 /* T = packed( (yb + yd), (xb + xd)) */ 00411 T = __QADD16(T, U); 00412 00413 /* writing the butterfly processed i0 sample */ 00414 00415 /* xa' = xa + xb + xc + xd */ 00416 /* ya' = ya + yb + yc + yd */ 00417 out1 = __SHADD16(R, T); 00418 out1 = __SHADD16(out1, 0); 00419 _SIMD32_OFFSET(pSi0) = out1; 00420 pSi0 += 2 * n1; 00421 00422 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 00423 R = __SHSUB16(R, T); 00424 00425 #ifndef ARM_MATH_BIG_ENDIAN 00426 00427 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00428 out1 = __SMUAD(C2, R) >> 16U; 00429 00430 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00431 out2 = __SMUSDX(C2, R); 00432 00433 #else 00434 00435 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00436 out1 = __SMUSDX(R, C2) >> 16U; 00437 00438 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00439 out2 = __SMUAD(C2, R); 00440 00441 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00442 00443 /* Reading i0+3fftLen/4 */ 00444 /* Read yb (real), xb(imag) input */ 00445 T = _SIMD32_OFFSET(pSi1); 00446 00447 /* writing the butterfly processed i0 + fftLen/4 sample */ 00448 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00449 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00450 _SIMD32_OFFSET(pSi1) = 00451 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00452 pSi1 += 2 * n1; 00453 00454 /* Butterfly calculations */ 00455 00456 /* Read yd (real), xd(imag) input */ 00457 U = _SIMD32_OFFSET(pSi3); 00458 00459 /* T = packed(yb-yd, xb-xd) */ 00460 T = __QSUB16(T, U); 00461 00462 #ifndef ARM_MATH_BIG_ENDIAN 00463 00464 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00465 R = __SHASX(S, T); 00466 00467 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00468 S = __SHSAX(S, T); 00469 00470 00471 /* Butterfly process for the i0+fftLen/2 sample */ 00472 out1 = __SMUAD(C1, S) >> 16U; 00473 out2 = __SMUSDX(C1, S); 00474 00475 #else 00476 00477 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 00478 R = __SHSAX(S, T); 00479 00480 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 00481 S = __SHASX(S, T); 00482 00483 00484 /* Butterfly process for the i0+fftLen/2 sample */ 00485 out1 = __SMUSDX(S, C1) >> 16U; 00486 out2 = __SMUAD(C1, S); 00487 00488 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00489 00490 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00491 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00492 _SIMD32_OFFSET(pSi2) = 00493 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00494 pSi2 += 2 * n1; 00495 00496 /* Butterfly process for the i0+3fftLen/4 sample */ 00497 00498 #ifndef ARM_MATH_BIG_ENDIAN 00499 00500 out1 = __SMUAD(C3, R) >> 16U; 00501 out2 = __SMUSDX(C3, R); 00502 00503 #else 00504 00505 out1 = __SMUSDX(R, C3) >> 16U; 00506 out2 = __SMUAD(C3, R); 00507 00508 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00509 00510 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 00511 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 00512 _SIMD32_OFFSET(pSi3) = 00513 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 00514 pSi3 += 2 * n1; 00515 } 00516 } 00517 /* Twiddle coefficients index modifier */ 00518 twidCoefModifier <<= 2U; 00519 } 00520 /* end of middle stage process */ 00521 00522 00523 /* data is in 10.6(q6) format for the 1024 point */ 00524 /* data is in 8.8(q8) format for the 256 point */ 00525 /* data is in 6.10(q10) format for the 64 point */ 00526 /* data is in 4.12(q12) format for the 16 point */ 00527 00528 /* Initializations for the last stage */ 00529 j = fftLen >> 2; 00530 00531 ptr1 = &pSrc16[0]; 00532 00533 /* start of last stage process */ 00534 00535 /* Butterfly implementation */ 00536 do 00537 { 00538 /* Read xa (real), ya(imag) input */ 00539 xaya = *__SIMD32(ptr1)++; 00540 00541 /* Read xb (real), yb(imag) input */ 00542 xbyb = *__SIMD32(ptr1)++; 00543 00544 /* Read xc (real), yc(imag) input */ 00545 xcyc = *__SIMD32(ptr1)++; 00546 00547 /* Read xd (real), yd(imag) input */ 00548 xdyd = *__SIMD32(ptr1)++; 00549 00550 /* R = packed((ya + yc), (xa + xc)) */ 00551 R = __QADD16(xaya, xcyc); 00552 00553 /* T = packed((yb + yd), (xb + xd)) */ 00554 T = __QADD16(xbyb, xdyd); 00555 00556 /* pointer updation for writing */ 00557 ptr1 = ptr1 - 8U; 00558 00559 00560 /* xa' = xa + xb + xc + xd */ 00561 /* ya' = ya + yb + yc + yd */ 00562 *__SIMD32(ptr1)++ = __SHADD16(R, T); 00563 00564 /* T = packed((yb + yd), (xb + xd)) */ 00565 T = __QADD16(xbyb, xdyd); 00566 00567 /* xc' = (xa-xb+xc-xd) */ 00568 /* yc' = (ya-yb+yc-yd) */ 00569 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 00570 00571 /* S = packed((ya - yc), (xa - xc)) */ 00572 S = __QSUB16(xaya, xcyc); 00573 00574 /* Read yd (real), xd(imag) input */ 00575 /* T = packed( (yb - yd), (xb - xd)) */ 00576 U = __QSUB16(xbyb, xdyd); 00577 00578 #ifndef ARM_MATH_BIG_ENDIAN 00579 00580 /* xb' = (xa+yb-xc-yd) */ 00581 /* yb' = (ya-xb-yc+xd) */ 00582 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00583 00584 00585 /* xd' = (xa-yb-xc+yd) */ 00586 /* yd' = (ya+xb-yc-xd) */ 00587 *__SIMD32(ptr1)++ = __SHASX(S, U); 00588 00589 #else 00590 00591 /* xb' = (xa+yb-xc-yd) */ 00592 /* yb' = (ya-xb-yc+xd) */ 00593 *__SIMD32(ptr1)++ = __SHASX(S, U); 00594 00595 00596 /* xd' = (xa-yb-xc+yd) */ 00597 /* yd' = (ya+xb-yc-xd) */ 00598 *__SIMD32(ptr1)++ = __SHSAX(S, U); 00599 00600 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00601 00602 } while (--j); 00603 00604 /* end of last stage process */ 00605 00606 /* output is in 11.5(q5) format for the 1024 point */ 00607 /* output is in 9.7(q7) format for the 256 point */ 00608 /* output is in 7.9(q9) format for the 64 point */ 00609 /* output is in 5.11(q11) format for the 16 point */ 00610 00611 00612 #else 00613 00614 /* Run the below code for Cortex-M0 */ 00615 00616 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 00617 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 00618 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 00619 00620 /* Total process is divided into three stages */ 00621 00622 /* process first stage, middle stages, & last stage */ 00623 00624 /* Initializations for the first stage */ 00625 n2 = fftLen; 00626 n1 = n2; 00627 00628 /* n2 = fftLen/4 */ 00629 n2 >>= 2U; 00630 00631 /* Index for twiddle coefficient */ 00632 ic = 0U; 00633 00634 /* Index for input read and output write */ 00635 i0 = 0U; 00636 j = n2; 00637 00638 /* Input is in 1.15(q15) format */ 00639 00640 /* start of first stage process */ 00641 do 00642 { 00643 /* Butterfly implementation */ 00644 00645 /* index calculation for the input as, */ 00646 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00647 i1 = i0 + n2; 00648 i2 = i1 + n2; 00649 i3 = i2 + n2; 00650 00651 /* Reading i0, i0+fftLen/2 inputs */ 00652 00653 /* input is down scale by 4 to avoid overflow */ 00654 /* Read ya (real), xa(imag) input */ 00655 T0 = pSrc16[i0 * 2U] >> 2U; 00656 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U; 00657 00658 /* input is down scale by 4 to avoid overflow */ 00659 /* Read yc (real), xc(imag) input */ 00660 S0 = pSrc16[i2 * 2U] >> 2U; 00661 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U; 00662 00663 /* R0 = (ya + yc) */ 00664 R0 = __SSAT(T0 + S0, 16U); 00665 /* R1 = (xa + xc) */ 00666 R1 = __SSAT(T1 + S1, 16U); 00667 00668 /* S0 = (ya - yc) */ 00669 S0 = __SSAT(T0 - S0, 16); 00670 /* S1 = (xa - xc) */ 00671 S1 = __SSAT(T1 - S1, 16); 00672 00673 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00674 /* input is down scale by 4 to avoid overflow */ 00675 /* Read yb (real), xb(imag) input */ 00676 T0 = pSrc16[i1 * 2U] >> 2U; 00677 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; 00678 00679 /* input is down scale by 4 to avoid overflow */ 00680 /* Read yd (real), xd(imag) input */ 00681 U0 = pSrc16[i3 * 2U] >> 2U; 00682 U1 = pSrc16[(i3 * 2U) + 1] >> 2U; 00683 00684 /* T0 = (yb + yd) */ 00685 T0 = __SSAT(T0 + U0, 16U); 00686 /* T1 = (xb + xd) */ 00687 T1 = __SSAT(T1 + U1, 16U); 00688 00689 /* writing the butterfly processed i0 sample */ 00690 /* ya' = ya + yb + yc + yd */ 00691 /* xa' = xa + xb + xc + xd */ 00692 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); 00693 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); 00694 00695 /* R0 = (ya + yc) - (yb + yd) */ 00696 /* R1 = (xa + xc) - (xb + xd) */ 00697 R0 = __SSAT(R0 - T0, 16U); 00698 R1 = __SSAT(R1 - T1, 16U); 00699 00700 /* co2 & si2 are read from Coefficient pointer */ 00701 Co2 = pCoef16[2U * ic * 2U]; 00702 Si2 = pCoef16[(2U * ic * 2U) + 1]; 00703 00704 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00705 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U); 00706 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00707 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U); 00708 00709 /* Reading i0+fftLen/4 */ 00710 /* input is down scale by 4 to avoid overflow */ 00711 /* T0 = yb, T1 = xb */ 00712 T0 = pSrc16[i1 * 2U] >> 2; 00713 T1 = pSrc16[(i1 * 2U) + 1] >> 2; 00714 00715 /* writing the butterfly processed i0 + fftLen/4 sample */ 00716 /* writing output(xc', yc') in little endian format */ 00717 pSrc16[i1 * 2U] = out1; 00718 pSrc16[(i1 * 2U) + 1] = out2; 00719 00720 /* Butterfly calculations */ 00721 /* input is down scale by 4 to avoid overflow */ 00722 /* U0 = yd, U1 = xd */ 00723 U0 = pSrc16[i3 * 2U] >> 2; 00724 U1 = pSrc16[(i3 * 2U) + 1] >> 2; 00725 /* T0 = yb-yd */ 00726 T0 = __SSAT(T0 - U0, 16); 00727 /* T1 = xb-xd */ 00728 T1 = __SSAT(T1 - U1, 16); 00729 00730 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */ 00731 R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); 00732 R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); 00733 00734 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */ 00735 S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U); 00736 S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U); 00737 00738 /* co1 & si1 are read from Coefficient pointer */ 00739 Co1 = pCoef16[ic * 2U]; 00740 Si1 = pCoef16[(ic * 2U) + 1]; 00741 /* Butterfly process for the i0+fftLen/2 sample */ 00742 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00743 out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16); 00744 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00745 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16); 00746 00747 /* writing output(xb', yb') in little endian format */ 00748 pSrc16[i2 * 2U] = out1; 00749 pSrc16[(i2 * 2U) + 1] = out2; 00750 00751 /* Co3 & si3 are read from Coefficient pointer */ 00752 Co3 = pCoef16[3U * (ic * 2U)]; 00753 Si3 = pCoef16[(3U * (ic * 2U)) + 1]; 00754 /* Butterfly process for the i0+3fftLen/4 sample */ 00755 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00756 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U); 00757 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00758 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U); 00759 /* writing output(xd', yd') in little endian format */ 00760 pSrc16[i3 * 2U] = out1; 00761 pSrc16[(i3 * 2U) + 1] = out2; 00762 00763 /* Twiddle coefficients index modifier */ 00764 ic = ic + twidCoefModifier; 00765 00766 /* Updating input index */ 00767 i0 = i0 + 1U; 00768 00769 } while (--j); 00770 /* data is in 4.11(q11) format */ 00771 00772 /* end of first stage process */ 00773 00774 00775 /* start of middle stage process */ 00776 00777 /* Twiddle coefficients index modifier */ 00778 twidCoefModifier <<= 2U; 00779 00780 /* Calculation of Middle stage */ 00781 for (k = fftLen / 4U; k > 4U; k >>= 2U) 00782 { 00783 /* Initializations for the middle stage */ 00784 n1 = n2; 00785 n2 >>= 2U; 00786 ic = 0U; 00787 00788 for (j = 0U; j <= (n2 - 1U); j++) 00789 { 00790 /* index calculation for the coefficients */ 00791 Co1 = pCoef16[ic * 2U]; 00792 Si1 = pCoef16[(ic * 2U) + 1U]; 00793 Co2 = pCoef16[2U * (ic * 2U)]; 00794 Si2 = pCoef16[(2U * (ic * 2U)) + 1U]; 00795 Co3 = pCoef16[3U * (ic * 2U)]; 00796 Si3 = pCoef16[(3U * (ic * 2U)) + 1U]; 00797 00798 /* Twiddle coefficients index modifier */ 00799 ic = ic + twidCoefModifier; 00800 00801 /* Butterfly implementation */ 00802 for (i0 = j; i0 < fftLen; i0 += n1) 00803 { 00804 /* index calculation for the input as, */ 00805 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00806 i1 = i0 + n2; 00807 i2 = i1 + n2; 00808 i3 = i2 + n2; 00809 00810 /* Reading i0, i0+fftLen/2 inputs */ 00811 /* Read ya (real), xa(imag) input */ 00812 T0 = pSrc16[i0 * 2U]; 00813 T1 = pSrc16[(i0 * 2U) + 1U]; 00814 00815 /* Read yc (real), xc(imag) input */ 00816 S0 = pSrc16[i2 * 2U]; 00817 S1 = pSrc16[(i2 * 2U) + 1U]; 00818 00819 /* R0 = (ya + yc), R1 = (xa + xc) */ 00820 R0 = __SSAT(T0 + S0, 16); 00821 R1 = __SSAT(T1 + S1, 16); 00822 00823 /* S0 = (ya - yc), S1 =(xa - xc) */ 00824 S0 = __SSAT(T0 - S0, 16); 00825 S1 = __SSAT(T1 - S1, 16); 00826 00827 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00828 /* Read yb (real), xb(imag) input */ 00829 T0 = pSrc16[i1 * 2U]; 00830 T1 = pSrc16[(i1 * 2U) + 1U]; 00831 00832 /* Read yd (real), xd(imag) input */ 00833 U0 = pSrc16[i3 * 2U]; 00834 U1 = pSrc16[(i3 * 2U) + 1U]; 00835 00836 00837 /* T0 = (yb + yd), T1 = (xb + xd) */ 00838 T0 = __SSAT(T0 + U0, 16); 00839 T1 = __SSAT(T1 + U1, 16); 00840 00841 /* writing the butterfly processed i0 sample */ 00842 00843 /* xa' = xa + xb + xc + xd */ 00844 /* ya' = ya + yb + yc + yd */ 00845 out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U; 00846 out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U; 00847 00848 pSrc16[i0 * 2U] = out1; 00849 pSrc16[(2U * i0) + 1U] = out2; 00850 00851 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00852 R0 = (R0 >> 1U) - (T0 >> 1U); 00853 R1 = (R1 >> 1U) - (T1 >> 1U); 00854 00855 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 00856 out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U); 00857 00858 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00859 out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U); 00860 00861 /* Reading i0+3fftLen/4 */ 00862 /* Read yb (real), xb(imag) input */ 00863 T0 = pSrc16[i1 * 2U]; 00864 T1 = pSrc16[(i1 * 2U) + 1U]; 00865 00866 /* writing the butterfly processed i0 + fftLen/4 sample */ 00867 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 00868 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 00869 pSrc16[i1 * 2U] = out1; 00870 pSrc16[(i1 * 2U) + 1U] = out2; 00871 00872 /* Butterfly calculations */ 00873 00874 /* Read yd (real), xd(imag) input */ 00875 U0 = pSrc16[i3 * 2U]; 00876 U1 = pSrc16[(i3 * 2U) + 1U]; 00877 00878 /* T0 = yb-yd, T1 = xb-xd */ 00879 T0 = __SSAT(T0 - U0, 16); 00880 T1 = __SSAT(T1 - U1, 16); 00881 00882 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */ 00883 R0 = (S0 >> 1U) - (T1 >> 1U); 00884 R1 = (S1 >> 1U) + (T0 >> 1U); 00885 00886 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */ 00887 S0 = (S0 >> 1U) + (T1 >> 1U); 00888 S1 = (S1 >> 1U) - (T0 >> 1U); 00889 00890 /* Butterfly process for the i0+fftLen/2 sample */ 00891 out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U); 00892 00893 out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U); 00894 00895 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 00896 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 00897 pSrc16[i2 * 2U] = out1; 00898 pSrc16[(i2 * 2U) + 1U] = out2; 00899 00900 /* Butterfly process for the i0+3fftLen/4 sample */ 00901 out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U); 00902 00903 out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U); 00904 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ 00905 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ 00906 pSrc16[i3 * 2U] = out1; 00907 pSrc16[(i3 * 2U) + 1U] = out2; 00908 } 00909 } 00910 /* Twiddle coefficients index modifier */ 00911 twidCoefModifier <<= 2U; 00912 } 00913 /* end of middle stage process */ 00914 00915 00916 /* data is in 10.6(q6) format for the 1024 point */ 00917 /* data is in 8.8(q8) format for the 256 point */ 00918 /* data is in 6.10(q10) format for the 64 point */ 00919 /* data is in 4.12(q12) format for the 16 point */ 00920 00921 /* Initializations for the last stage */ 00922 n1 = n2; 00923 n2 >>= 2U; 00924 00925 /* start of last stage process */ 00926 00927 /* Butterfly implementation */ 00928 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1) 00929 { 00930 /* index calculation for the input as, */ 00931 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 00932 i1 = i0 + n2; 00933 i2 = i1 + n2; 00934 i3 = i2 + n2; 00935 00936 /* Reading i0, i0+fftLen/2 inputs */ 00937 /* Read ya (real), xa(imag) input */ 00938 T0 = pSrc16[i0 * 2U]; 00939 T1 = pSrc16[(i0 * 2U) + 1U]; 00940 00941 /* Read yc (real), xc(imag) input */ 00942 S0 = pSrc16[i2 * 2U]; 00943 S1 = pSrc16[(i2 * 2U) + 1U]; 00944 00945 /* R0 = (ya + yc), R1 = (xa + xc) */ 00946 R0 = __SSAT(T0 + S0, 16U); 00947 R1 = __SSAT(T1 + S1, 16U); 00948 00949 /* S0 = (ya - yc), S1 = (xa - xc) */ 00950 S0 = __SSAT(T0 - S0, 16U); 00951 S1 = __SSAT(T1 - S1, 16U); 00952 00953 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 00954 /* Read yb (real), xb(imag) input */ 00955 T0 = pSrc16[i1 * 2U]; 00956 T1 = pSrc16[(i1 * 2U) + 1U]; 00957 /* Read yd (real), xd(imag) input */ 00958 U0 = pSrc16[i3 * 2U]; 00959 U1 = pSrc16[(i3 * 2U) + 1U]; 00960 00961 /* T0 = (yb + yd), T1 = (xb + xd)) */ 00962 T0 = __SSAT(T0 + U0, 16U); 00963 T1 = __SSAT(T1 + U1, 16U); 00964 00965 /* writing the butterfly processed i0 sample */ 00966 /* xa' = xa + xb + xc + xd */ 00967 /* ya' = ya + yb + yc + yd */ 00968 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); 00969 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); 00970 00971 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 00972 R0 = (R0 >> 1U) - (T0 >> 1U); 00973 R1 = (R1 >> 1U) - (T1 >> 1U); 00974 /* Read yb (real), xb(imag) input */ 00975 T0 = pSrc16[i1 * 2U]; 00976 T1 = pSrc16[(i1 * 2U) + 1U]; 00977 00978 /* writing the butterfly processed i0 + fftLen/4 sample */ 00979 /* xc' = (xa-xb+xc-xd) */ 00980 /* yc' = (ya-yb+yc-yd) */ 00981 pSrc16[i1 * 2U] = R0; 00982 pSrc16[(i1 * 2U) + 1U] = R1; 00983 00984 /* Read yd (real), xd(imag) input */ 00985 U0 = pSrc16[i3 * 2U]; 00986 U1 = pSrc16[(i3 * 2U) + 1U]; 00987 /* T0 = (yb - yd), T1 = (xb - xd) */ 00988 T0 = __SSAT(T0 - U0, 16U); 00989 T1 = __SSAT(T1 - U1, 16U); 00990 00991 /* writing the butterfly processed i0 + fftLen/2 sample */ 00992 /* xb' = (xa+yb-xc-yd) */ 00993 /* yb' = (ya-xb-yc+xd) */ 00994 pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U); 00995 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U); 00996 00997 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 00998 /* xd' = (xa-yb-xc+yd) */ 00999 /* yd' = (ya+xb-yc-xd) */ 01000 pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U); 01001 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U); 01002 01003 } 01004 01005 /* end of last stage process */ 01006 01007 /* output is in 11.5(q5) format for the 1024 point */ 01008 /* output is in 9.7(q7) format for the 256 point */ 01009 /* output is in 7.9(q9) format for the 64 point */ 01010 /* output is in 5.11(q11) format for the 16 point */ 01011 01012 #endif /* #if defined (ARM_MATH_DSP) */ 01013 01014 } 01015 01016 01017 /** 01018 * @brief Core function for the Q15 CIFFT butterfly process. 01019 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. 01020 * @param[in] fftLen length of the FFT. 01021 * @param[in] *pCoef16 points to twiddle coefficient buffer. 01022 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 01023 * @return none. 01024 */ 01025 01026 /* 01027 * Radix-4 IFFT algorithm used is : 01028 * 01029 * CIFFT uses same twiddle coefficients as CFFT function 01030 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 01031 * 01032 * 01033 * IFFT is implemented with following changes in equations from FFT 01034 * 01035 * Input real and imaginary data: 01036 * x(n) = xa + j * ya 01037 * x(n+N/4 ) = xb + j * yb 01038 * x(n+N/2 ) = xc + j * yc 01039 * x(n+3N 4) = xd + j * yd 01040 * 01041 * 01042 * Output real and imaginary data: 01043 * x(4r) = xa'+ j * ya' 01044 * x(4r+1) = xb'+ j * yb' 01045 * x(4r+2) = xc'+ j * yc' 01046 * x(4r+3) = xd'+ j * yd' 01047 * 01048 * 01049 * Twiddle factors for radix-4 IFFT: 01050 * Wn = co1 + j * (si1) 01051 * W2n = co2 + j * (si2) 01052 * W3n = co3 + j * (si3) 01053 01054 * The real and imaginary output values for the radix-4 butterfly are 01055 * xa' = xa + xb + xc + xd 01056 * ya' = ya + yb + yc + yd 01057 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 01058 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 01059 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 01060 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 01061 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 01062 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 01063 * 01064 */ 01065 01066 void arm_radix4_butterfly_inverse_q15( 01067 q15_t * pSrc16, 01068 uint32_t fftLen, 01069 q15_t * pCoef16, 01070 uint32_t twidCoefModifier) 01071 { 01072 01073 #if defined (ARM_MATH_DSP) 01074 01075 /* Run the below code for Cortex-M4 and Cortex-M3 */ 01076 01077 q31_t R, S, T, U; 01078 q31_t C1, C2, C3, out1, out2; 01079 uint32_t n1, n2, ic, i0, j, k; 01080 01081 q15_t *ptr1; 01082 q15_t *pSi0; 01083 q15_t *pSi1; 01084 q15_t *pSi2; 01085 q15_t *pSi3; 01086 01087 q31_t xaya, xbyb, xcyc, xdyd; 01088 01089 /* Total process is divided into three stages */ 01090 01091 /* process first stage, middle stages, & last stage */ 01092 01093 /* Initializations for the first stage */ 01094 n2 = fftLen; 01095 n1 = n2; 01096 01097 /* n2 = fftLen/4 */ 01098 n2 >>= 2U; 01099 01100 /* Index for twiddle coefficient */ 01101 ic = 0U; 01102 01103 /* Index for input read and output write */ 01104 j = n2; 01105 01106 pSi0 = pSrc16; 01107 pSi1 = pSi0 + 2 * n2; 01108 pSi2 = pSi1 + 2 * n2; 01109 pSi3 = pSi2 + 2 * n2; 01110 01111 /* Input is in 1.15(q15) format */ 01112 01113 /* start of first stage process */ 01114 do 01115 { 01116 /* Butterfly implementation */ 01117 01118 /* Reading i0, i0+fftLen/2 inputs */ 01119 /* Read ya (real), xa(imag) input */ 01120 T = _SIMD32_OFFSET(pSi0); 01121 T = __SHADD16(T, 0); 01122 T = __SHADD16(T, 0); 01123 01124 /* Read yc (real), xc(imag) input */ 01125 S = _SIMD32_OFFSET(pSi2); 01126 S = __SHADD16(S, 0); 01127 S = __SHADD16(S, 0); 01128 01129 /* R = packed((ya + yc), (xa + xc) ) */ 01130 R = __QADD16(T, S); 01131 01132 /* S = packed((ya - yc), (xa - xc) ) */ 01133 S = __QSUB16(T, S); 01134 01135 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01136 /* Read yb (real), xb(imag) input */ 01137 T = _SIMD32_OFFSET(pSi1); 01138 T = __SHADD16(T, 0); 01139 T = __SHADD16(T, 0); 01140 01141 /* Read yd (real), xd(imag) input */ 01142 U = _SIMD32_OFFSET(pSi3); 01143 U = __SHADD16(U, 0); 01144 U = __SHADD16(U, 0); 01145 01146 /* T = packed((yb + yd), (xb + xd) ) */ 01147 T = __QADD16(T, U); 01148 01149 /* writing the butterfly processed i0 sample */ 01150 /* xa' = xa + xb + xc + xd */ 01151 /* ya' = ya + yb + yc + yd */ 01152 _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); 01153 pSi0 += 2; 01154 01155 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 01156 R = __QSUB16(R, T); 01157 01158 /* co2 & si2 are read from SIMD Coefficient pointer */ 01159 C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); 01160 01161 #ifndef ARM_MATH_BIG_ENDIAN 01162 01163 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01164 out1 = __SMUSD(C2, R) >> 16U; 01165 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01166 out2 = __SMUADX(C2, R); 01167 01168 #else 01169 01170 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01171 out1 = __SMUADX(C2, R) >> 16U; 01172 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01173 out2 = __SMUSD(__QSUB16(0, C2), R); 01174 01175 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01176 01177 /* Reading i0+fftLen/4 */ 01178 /* T = packed(yb, xb) */ 01179 T = _SIMD32_OFFSET(pSi1); 01180 T = __SHADD16(T, 0); 01181 T = __SHADD16(T, 0); 01182 01183 /* writing the butterfly processed i0 + fftLen/4 sample */ 01184 /* writing output(xc', yc') in little endian format */ 01185 _SIMD32_OFFSET(pSi1) = 01186 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01187 pSi1 += 2; 01188 01189 /* Butterfly calculations */ 01190 /* U = packed(yd, xd) */ 01191 U = _SIMD32_OFFSET(pSi3); 01192 U = __SHADD16(U, 0); 01193 U = __SHADD16(U, 0); 01194 01195 /* T = packed(yb-yd, xb-xd) */ 01196 T = __QSUB16(T, U); 01197 01198 #ifndef ARM_MATH_BIG_ENDIAN 01199 01200 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01201 R = __QSAX(S, T); 01202 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ 01203 S = __QASX(S, T); 01204 01205 #else 01206 01207 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01208 R = __QASX(S, T); 01209 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01210 S = __QSAX(S, T); 01211 01212 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01213 01214 /* co1 & si1 are read from SIMD Coefficient pointer */ 01215 C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); 01216 /* Butterfly process for the i0+fftLen/2 sample */ 01217 01218 #ifndef ARM_MATH_BIG_ENDIAN 01219 01220 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01221 out1 = __SMUSD(C1, S) >> 16U; 01222 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01223 out2 = __SMUADX(C1, S); 01224 01225 #else 01226 01227 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01228 out1 = __SMUADX(C1, S) >> 16U; 01229 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01230 out2 = __SMUSD(__QSUB16(0, C1), S); 01231 01232 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01233 01234 /* writing output(xb', yb') in little endian format */ 01235 _SIMD32_OFFSET(pSi2) = 01236 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 01237 pSi2 += 2; 01238 01239 01240 /* co3 & si3 are read from SIMD Coefficient pointer */ 01241 C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); 01242 /* Butterfly process for the i0+3fftLen/4 sample */ 01243 01244 #ifndef ARM_MATH_BIG_ENDIAN 01245 01246 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01247 out1 = __SMUSD(C3, R) >> 16U; 01248 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01249 out2 = __SMUADX(C3, R); 01250 01251 #else 01252 01253 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01254 out1 = __SMUADX(C3, R) >> 16U; 01255 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01256 out2 = __SMUSD(__QSUB16(0, C3), R); 01257 01258 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01259 01260 /* writing output(xd', yd') in little endian format */ 01261 _SIMD32_OFFSET(pSi3) = 01262 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01263 pSi3 += 2; 01264 01265 /* Twiddle coefficients index modifier */ 01266 ic = ic + twidCoefModifier; 01267 01268 } while (--j); 01269 /* data is in 4.11(q11) format */ 01270 01271 /* end of first stage process */ 01272 01273 01274 /* start of middle stage process */ 01275 01276 /* Twiddle coefficients index modifier */ 01277 twidCoefModifier <<= 2U; 01278 01279 /* Calculation of Middle stage */ 01280 for (k = fftLen / 4U; k > 4U; k >>= 2U) 01281 { 01282 /* Initializations for the middle stage */ 01283 n1 = n2; 01284 n2 >>= 2U; 01285 ic = 0U; 01286 01287 for (j = 0U; j <= (n2 - 1U); j++) 01288 { 01289 /* index calculation for the coefficients */ 01290 C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); 01291 C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); 01292 C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); 01293 01294 /* Twiddle coefficients index modifier */ 01295 ic = ic + twidCoefModifier; 01296 01297 pSi0 = pSrc16 + 2 * j; 01298 pSi1 = pSi0 + 2 * n2; 01299 pSi2 = pSi1 + 2 * n2; 01300 pSi3 = pSi2 + 2 * n2; 01301 01302 /* Butterfly implementation */ 01303 for (i0 = j; i0 < fftLen; i0 += n1) 01304 { 01305 /* Reading i0, i0+fftLen/2 inputs */ 01306 /* Read ya (real), xa(imag) input */ 01307 T = _SIMD32_OFFSET(pSi0); 01308 01309 /* Read yc (real), xc(imag) input */ 01310 S = _SIMD32_OFFSET(pSi2); 01311 01312 /* R = packed( (ya + yc), (xa + xc)) */ 01313 R = __QADD16(T, S); 01314 01315 /* S = packed((ya - yc), (xa - xc)) */ 01316 S = __QSUB16(T, S); 01317 01318 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01319 /* Read yb (real), xb(imag) input */ 01320 T = _SIMD32_OFFSET(pSi1); 01321 01322 /* Read yd (real), xd(imag) input */ 01323 U = _SIMD32_OFFSET(pSi3); 01324 01325 /* T = packed( (yb + yd), (xb + xd)) */ 01326 T = __QADD16(T, U); 01327 01328 /* writing the butterfly processed i0 sample */ 01329 01330 /* xa' = xa + xb + xc + xd */ 01331 /* ya' = ya + yb + yc + yd */ 01332 out1 = __SHADD16(R, T); 01333 out1 = __SHADD16(out1, 0); 01334 _SIMD32_OFFSET(pSi0) = out1; 01335 pSi0 += 2 * n1; 01336 01337 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 01338 R = __SHSUB16(R, T); 01339 01340 #ifndef ARM_MATH_BIG_ENDIAN 01341 01342 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01343 out1 = __SMUSD(C2, R) >> 16U; 01344 01345 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01346 out2 = __SMUADX(C2, R); 01347 01348 #else 01349 01350 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01351 out1 = __SMUADX(R, C2) >> 16U; 01352 01353 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 01354 out2 = __SMUSD(__QSUB16(0, C2), R); 01355 01356 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01357 01358 /* Reading i0+3fftLen/4 */ 01359 /* Read yb (real), xb(imag) input */ 01360 T = _SIMD32_OFFSET(pSi1); 01361 01362 /* writing the butterfly processed i0 + fftLen/4 sample */ 01363 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 01364 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 01365 _SIMD32_OFFSET(pSi1) = 01366 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01367 pSi1 += 2 * n1; 01368 01369 /* Butterfly calculations */ 01370 01371 /* Read yd (real), xd(imag) input */ 01372 U = _SIMD32_OFFSET(pSi3); 01373 01374 /* T = packed(yb-yd, xb-xd) */ 01375 T = __QSUB16(T, U); 01376 01377 #ifndef ARM_MATH_BIG_ENDIAN 01378 01379 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01380 R = __SHSAX(S, T); 01381 01382 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01383 S = __SHASX(S, T); 01384 01385 01386 /* Butterfly process for the i0+fftLen/2 sample */ 01387 out1 = __SMUSD(C1, S) >> 16U; 01388 out2 = __SMUADX(C1, S); 01389 01390 #else 01391 01392 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 01393 R = __SHASX(S, T); 01394 01395 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ 01396 S = __SHSAX(S, T); 01397 01398 01399 /* Butterfly process for the i0+fftLen/2 sample */ 01400 out1 = __SMUADX(S, C1) >> 16U; 01401 out2 = __SMUSD(__QSUB16(0, C1), S); 01402 01403 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01404 01405 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 01406 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 01407 _SIMD32_OFFSET(pSi2) = 01408 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01409 pSi2 += 2 * n1; 01410 01411 /* Butterfly process for the i0+3fftLen/4 sample */ 01412 01413 #ifndef ARM_MATH_BIG_ENDIAN 01414 01415 out1 = __SMUSD(C3, R) >> 16U; 01416 out2 = __SMUADX(C3, R); 01417 01418 #else 01419 01420 out1 = __SMUADX(C3, R) >> 16U; 01421 out2 = __SMUSD(__QSUB16(0, C3), R); 01422 01423 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01424 01425 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 01426 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 01427 _SIMD32_OFFSET(pSi3) = 01428 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 01429 pSi3 += 2 * n1; 01430 } 01431 } 01432 /* Twiddle coefficients index modifier */ 01433 twidCoefModifier <<= 2U; 01434 } 01435 /* end of middle stage process */ 01436 01437 /* data is in 10.6(q6) format for the 1024 point */ 01438 /* data is in 8.8(q8) format for the 256 point */ 01439 /* data is in 6.10(q10) format for the 64 point */ 01440 /* data is in 4.12(q12) format for the 16 point */ 01441 01442 /* Initializations for the last stage */ 01443 j = fftLen >> 2; 01444 01445 ptr1 = &pSrc16[0]; 01446 01447 /* start of last stage process */ 01448 01449 /* Butterfly implementation */ 01450 do 01451 { 01452 /* Read xa (real), ya(imag) input */ 01453 xaya = *__SIMD32(ptr1)++; 01454 01455 /* Read xb (real), yb(imag) input */ 01456 xbyb = *__SIMD32(ptr1)++; 01457 01458 /* Read xc (real), yc(imag) input */ 01459 xcyc = *__SIMD32(ptr1)++; 01460 01461 /* Read xd (real), yd(imag) input */ 01462 xdyd = *__SIMD32(ptr1)++; 01463 01464 /* R = packed((ya + yc), (xa + xc)) */ 01465 R = __QADD16(xaya, xcyc); 01466 01467 /* T = packed((yb + yd), (xb + xd)) */ 01468 T = __QADD16(xbyb, xdyd); 01469 01470 /* pointer updation for writing */ 01471 ptr1 = ptr1 - 8U; 01472 01473 01474 /* xa' = xa + xb + xc + xd */ 01475 /* ya' = ya + yb + yc + yd */ 01476 *__SIMD32(ptr1)++ = __SHADD16(R, T); 01477 01478 /* T = packed((yb + yd), (xb + xd)) */ 01479 T = __QADD16(xbyb, xdyd); 01480 01481 /* xc' = (xa-xb+xc-xd) */ 01482 /* yc' = (ya-yb+yc-yd) */ 01483 *__SIMD32(ptr1)++ = __SHSUB16(R, T); 01484 01485 /* S = packed((ya - yc), (xa - xc)) */ 01486 S = __QSUB16(xaya, xcyc); 01487 01488 /* Read yd (real), xd(imag) input */ 01489 /* T = packed( (yb - yd), (xb - xd)) */ 01490 U = __QSUB16(xbyb, xdyd); 01491 01492 #ifndef ARM_MATH_BIG_ENDIAN 01493 01494 /* xb' = (xa+yb-xc-yd) */ 01495 /* yb' = (ya-xb-yc+xd) */ 01496 *__SIMD32(ptr1)++ = __SHASX(S, U); 01497 01498 01499 /* xd' = (xa-yb-xc+yd) */ 01500 /* yd' = (ya+xb-yc-xd) */ 01501 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01502 01503 #else 01504 01505 /* xb' = (xa+yb-xc-yd) */ 01506 /* yb' = (ya-xb-yc+xd) */ 01507 *__SIMD32(ptr1)++ = __SHSAX(S, U); 01508 01509 01510 /* xd' = (xa-yb-xc+yd) */ 01511 /* yd' = (ya+xb-yc-xd) */ 01512 *__SIMD32(ptr1)++ = __SHASX(S, U); 01513 01514 01515 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 01516 01517 } while (--j); 01518 01519 /* end of last stage process */ 01520 01521 /* output is in 11.5(q5) format for the 1024 point */ 01522 /* output is in 9.7(q7) format for the 256 point */ 01523 /* output is in 7.9(q9) format for the 64 point */ 01524 /* output is in 5.11(q11) format for the 16 point */ 01525 01526 01527 #else 01528 01529 /* Run the below code for Cortex-M0 */ 01530 01531 q15_t R0, R1, S0, S1, T0, T1, U0, U1; 01532 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; 01533 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 01534 01535 /* Total process is divided into three stages */ 01536 01537 /* process first stage, middle stages, & last stage */ 01538 01539 /* Initializations for the first stage */ 01540 n2 = fftLen; 01541 n1 = n2; 01542 01543 /* n2 = fftLen/4 */ 01544 n2 >>= 2U; 01545 01546 /* Index for twiddle coefficient */ 01547 ic = 0U; 01548 01549 /* Index for input read and output write */ 01550 i0 = 0U; 01551 01552 j = n2; 01553 01554 /* Input is in 1.15(q15) format */ 01555 01556 /* Start of first stage process */ 01557 do 01558 { 01559 /* Butterfly implementation */ 01560 01561 /* index calculation for the input as, */ 01562 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01563 i1 = i0 + n2; 01564 i2 = i1 + n2; 01565 i3 = i2 + n2; 01566 01567 /* Reading i0, i0+fftLen/2 inputs */ 01568 /* input is down scale by 4 to avoid overflow */ 01569 /* Read ya (real), xa(imag) input */ 01570 T0 = pSrc16[i0 * 2U] >> 2U; 01571 T1 = pSrc16[(i0 * 2U) + 1U] >> 2U; 01572 /* input is down scale by 4 to avoid overflow */ 01573 /* Read yc (real), xc(imag) input */ 01574 S0 = pSrc16[i2 * 2U] >> 2U; 01575 S1 = pSrc16[(i2 * 2U) + 1U] >> 2U; 01576 01577 /* R0 = (ya + yc), R1 = (xa + xc) */ 01578 R0 = __SSAT(T0 + S0, 16U); 01579 R1 = __SSAT(T1 + S1, 16U); 01580 /* S0 = (ya - yc), S1 = (xa - xc) */ 01581 S0 = __SSAT(T0 - S0, 16U); 01582 S1 = __SSAT(T1 - S1, 16U); 01583 01584 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01585 /* input is down scale by 4 to avoid overflow */ 01586 /* Read yb (real), xb(imag) input */ 01587 T0 = pSrc16[i1 * 2U] >> 2U; 01588 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; 01589 /* Read yd (real), xd(imag) input */ 01590 /* input is down scale by 4 to avoid overflow */ 01591 U0 = pSrc16[i3 * 2U] >> 2U; 01592 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U; 01593 01594 /* T0 = (yb + yd), T1 = (xb + xd) */ 01595 T0 = __SSAT(T0 + U0, 16U); 01596 T1 = __SSAT(T1 + U1, 16U); 01597 01598 /* writing the butterfly processed i0 sample */ 01599 /* xa' = xa + xb + xc + xd */ 01600 /* ya' = ya + yb + yc + yd */ 01601 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); 01602 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); 01603 01604 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */ 01605 R0 = __SSAT(R0 - T0, 16U); 01606 R1 = __SSAT(R1 - T1, 16U); 01607 /* co2 & si2 are read from Coefficient pointer */ 01608 Co2 = pCoef16[2U * ic * 2U]; 01609 Si2 = pCoef16[(2U * ic * 2U) + 1U]; 01610 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01611 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U); 01612 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01613 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U); 01614 01615 /* Reading i0+fftLen/4 */ 01616 /* input is down scale by 4 to avoid overflow */ 01617 /* T0 = yb, T1 = xb */ 01618 T0 = pSrc16[i1 * 2U] >> 2U; 01619 T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; 01620 01621 /* writing the butterfly processed i0 + fftLen/4 sample */ 01622 /* writing output(xc', yc') in little endian format */ 01623 pSrc16[i1 * 2U] = out1; 01624 pSrc16[(i1 * 2U) + 1U] = out2; 01625 01626 /* Butterfly calculations */ 01627 /* input is down scale by 4 to avoid overflow */ 01628 /* U0 = yd, U1 = xd) */ 01629 U0 = pSrc16[i3 * 2U] >> 2U; 01630 U1 = pSrc16[(i3 * 2U) + 1U] >> 2U; 01631 01632 /* T0 = yb-yd, T1 = xb-xd) */ 01633 T0 = __SSAT(T0 - U0, 16U); 01634 T1 = __SSAT(T1 - U1, 16U); 01635 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01636 R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16); 01637 R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16); 01638 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01639 S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); 01640 S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); 01641 01642 /* co1 & si1 are read from Coefficient pointer */ 01643 Co1 = pCoef16[ic * 2U]; 01644 Si1 = pCoef16[(ic * 2U) + 1U]; 01645 /* Butterfly process for the i0+fftLen/2 sample */ 01646 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01647 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U); 01648 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01649 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U); 01650 /* writing output(xb', yb') in little endian format */ 01651 pSrc16[i2 * 2U] = out1; 01652 pSrc16[(i2 * 2U) + 1U] = out2; 01653 01654 /* Co3 & si3 are read from Coefficient pointer */ 01655 Co3 = pCoef16[3U * ic * 2U]; 01656 Si3 = pCoef16[(3U * ic * 2U) + 1U]; 01657 /* Butterfly process for the i0+3fftLen/4 sample */ 01658 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01659 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U); 01660 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01661 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U); 01662 /* writing output(xd', yd') in little endian format */ 01663 pSrc16[i3 * 2U] = out1; 01664 pSrc16[(i3 * 2U) + 1U] = out2; 01665 01666 /* Twiddle coefficients index modifier */ 01667 ic = ic + twidCoefModifier; 01668 01669 /* Updating input index */ 01670 i0 = i0 + 1U; 01671 01672 } while (--j); 01673 01674 /* End of first stage process */ 01675 01676 /* data is in 4.11(q11) format */ 01677 01678 01679 /* Start of Middle stage process */ 01680 01681 /* Twiddle coefficients index modifier */ 01682 twidCoefModifier <<= 2U; 01683 01684 /* Calculation of Middle stage */ 01685 for (k = fftLen / 4U; k > 4U; k >>= 2U) 01686 { 01687 /* Initializations for the middle stage */ 01688 n1 = n2; 01689 n2 >>= 2U; 01690 ic = 0U; 01691 01692 for (j = 0U; j <= (n2 - 1U); j++) 01693 { 01694 /* index calculation for the coefficients */ 01695 Co1 = pCoef16[ic * 2U]; 01696 Si1 = pCoef16[(ic * 2U) + 1U]; 01697 Co2 = pCoef16[2U * ic * 2U]; 01698 Si2 = pCoef16[2U * ic * 2U + 1U]; 01699 Co3 = pCoef16[3U * ic * 2U]; 01700 Si3 = pCoef16[(3U * ic * 2U) + 1U]; 01701 01702 /* Twiddle coefficients index modifier */ 01703 ic = ic + twidCoefModifier; 01704 01705 /* Butterfly implementation */ 01706 for (i0 = j; i0 < fftLen; i0 += n1) 01707 { 01708 /* index calculation for the input as, */ 01709 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01710 i1 = i0 + n2; 01711 i2 = i1 + n2; 01712 i3 = i2 + n2; 01713 01714 /* Reading i0, i0+fftLen/2 inputs */ 01715 /* Read ya (real), xa(imag) input */ 01716 T0 = pSrc16[i0 * 2U]; 01717 T1 = pSrc16[(i0 * 2U) + 1U]; 01718 01719 /* Read yc (real), xc(imag) input */ 01720 S0 = pSrc16[i2 * 2U]; 01721 S1 = pSrc16[(i2 * 2U) + 1U]; 01722 01723 01724 /* R0 = (ya + yc), R1 = (xa + xc) */ 01725 R0 = __SSAT(T0 + S0, 16U); 01726 R1 = __SSAT(T1 + S1, 16U); 01727 /* S0 = (ya - yc), S1 = (xa - xc) */ 01728 S0 = __SSAT(T0 - S0, 16U); 01729 S1 = __SSAT(T1 - S1, 16U); 01730 01731 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01732 /* Read yb (real), xb(imag) input */ 01733 T0 = pSrc16[i1 * 2U]; 01734 T1 = pSrc16[(i1 * 2U) + 1U]; 01735 01736 /* Read yd (real), xd(imag) input */ 01737 U0 = pSrc16[i3 * 2U]; 01738 U1 = pSrc16[(i3 * 2U) + 1U]; 01739 01740 /* T0 = (yb + yd), T1 = (xb + xd) */ 01741 T0 = __SSAT(T0 + U0, 16U); 01742 T1 = __SSAT(T1 + U1, 16U); 01743 01744 /* writing the butterfly processed i0 sample */ 01745 /* xa' = xa + xb + xc + xd */ 01746 /* ya' = ya + yb + yc + yd */ 01747 pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U; 01748 pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U; 01749 01750 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01751 R0 = (R0 >> 1U) - (T0 >> 1U); 01752 R1 = (R1 >> 1U) - (T1 >> 1U); 01753 01754 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 01755 out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16); 01756 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01757 out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16); 01758 01759 /* Reading i0+3fftLen/4 */ 01760 /* Read yb (real), xb(imag) input */ 01761 T0 = pSrc16[i1 * 2U]; 01762 T1 = pSrc16[(i1 * 2U) + 1U]; 01763 01764 /* writing the butterfly processed i0 + fftLen/4 sample */ 01765 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 01766 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 01767 pSrc16[i1 * 2U] = out1; 01768 pSrc16[(i1 * 2U) + 1U] = out2; 01769 01770 /* Butterfly calculations */ 01771 /* Read yd (real), xd(imag) input */ 01772 U0 = pSrc16[i3 * 2U]; 01773 U1 = pSrc16[(i3 * 2U) + 1U]; 01774 01775 /* T0 = yb-yd, T1 = xb-xd) */ 01776 T0 = __SSAT(T0 - U0, 16U); 01777 T1 = __SSAT(T1 - U1, 16U); 01778 01779 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ 01780 R0 = (S0 >> 1U) + (T1 >> 1U); 01781 R1 = (S1 >> 1U) - (T0 >> 1U); 01782 01783 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ 01784 S0 = (S0 >> 1U) - (T1 >> 1U); 01785 S1 = (S1 >> 1U) + (T0 >> 1U); 01786 01787 /* Butterfly process for the i0+fftLen/2 sample */ 01788 out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U); 01789 out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U); 01790 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 01791 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 01792 pSrc16[i2 * 2U] = out1; 01793 pSrc16[(i2 * 2U) + 1U] = out2; 01794 01795 /* Butterfly process for the i0+3fftLen/4 sample */ 01796 out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U); 01797 01798 out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U); 01799 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ 01800 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ 01801 pSrc16[i3 * 2U] = out1; 01802 pSrc16[(i3 * 2U) + 1U] = out2; 01803 01804 01805 } 01806 } 01807 /* Twiddle coefficients index modifier */ 01808 twidCoefModifier <<= 2U; 01809 } 01810 /* End of Middle stages process */ 01811 01812 01813 /* data is in 10.6(q6) format for the 1024 point */ 01814 /* data is in 8.8(q8) format for the 256 point */ 01815 /* data is in 6.10(q10) format for the 64 point */ 01816 /* data is in 4.12(q12) format for the 16 point */ 01817 01818 /* start of last stage process */ 01819 01820 01821 /* Initializations for the last stage */ 01822 n1 = n2; 01823 n2 >>= 2U; 01824 01825 /* Butterfly implementation */ 01826 for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1) 01827 { 01828 /* index calculation for the input as, */ 01829 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ 01830 i1 = i0 + n2; 01831 i2 = i1 + n2; 01832 i3 = i2 + n2; 01833 01834 /* Reading i0, i0+fftLen/2 inputs */ 01835 /* Read ya (real), xa(imag) input */ 01836 T0 = pSrc16[i0 * 2U]; 01837 T1 = pSrc16[(i0 * 2U) + 1U]; 01838 /* Read yc (real), xc(imag) input */ 01839 S0 = pSrc16[i2 * 2U]; 01840 S1 = pSrc16[(i2 * 2U) + 1U]; 01841 01842 /* R0 = (ya + yc), R1 = (xa + xc) */ 01843 R0 = __SSAT(T0 + S0, 16U); 01844 R1 = __SSAT(T1 + S1, 16U); 01845 /* S0 = (ya - yc), S1 = (xa - xc) */ 01846 S0 = __SSAT(T0 - S0, 16U); 01847 S1 = __SSAT(T1 - S1, 16U); 01848 01849 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 01850 /* Read yb (real), xb(imag) input */ 01851 T0 = pSrc16[i1 * 2U]; 01852 T1 = pSrc16[(i1 * 2U) + 1U]; 01853 /* Read yd (real), xd(imag) input */ 01854 U0 = pSrc16[i3 * 2U]; 01855 U1 = pSrc16[(i3 * 2U) + 1U]; 01856 01857 /* T0 = (yb + yd), T1 = (xb + xd) */ 01858 T0 = __SSAT(T0 + U0, 16U); 01859 T1 = __SSAT(T1 + U1, 16U); 01860 01861 /* writing the butterfly processed i0 sample */ 01862 /* xa' = xa + xb + xc + xd */ 01863 /* ya' = ya + yb + yc + yd */ 01864 pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); 01865 pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); 01866 01867 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ 01868 R0 = (R0 >> 1U) - (T0 >> 1U); 01869 R1 = (R1 >> 1U) - (T1 >> 1U); 01870 01871 /* Read yb (real), xb(imag) input */ 01872 T0 = pSrc16[i1 * 2U]; 01873 T1 = pSrc16[(i1 * 2U) + 1U]; 01874 01875 /* writing the butterfly processed i0 + fftLen/4 sample */ 01876 /* xc' = (xa-xb+xc-xd) */ 01877 /* yc' = (ya-yb+yc-yd) */ 01878 pSrc16[i1 * 2U] = R0; 01879 pSrc16[(i1 * 2U) + 1U] = R1; 01880 01881 /* Read yd (real), xd(imag) input */ 01882 U0 = pSrc16[i3 * 2U]; 01883 U1 = pSrc16[(i3 * 2U) + 1U]; 01884 /* T0 = (yb - yd), T1 = (xb - xd) */ 01885 T0 = __SSAT(T0 - U0, 16U); 01886 T1 = __SSAT(T1 - U1, 16U); 01887 01888 /* writing the butterfly processed i0 + fftLen/2 sample */ 01889 /* xb' = (xa-yb-xc+yd) */ 01890 /* yb' = (ya+xb-yc-xd) */ 01891 pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U); 01892 pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U); 01893 01894 01895 /* writing the butterfly processed i0 + 3fftLen/4 sample */ 01896 /* xd' = (xa+yb-xc-yd) */ 01897 /* yd' = (ya-xb-yc+xd) */ 01898 pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U); 01899 pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U); 01900 } 01901 /* end of last stage process */ 01902 01903 /* output is in 11.5(q5) format for the 1024 point */ 01904 /* output is in 9.7(q7) format for the 256 point */ 01905 /* output is in 7.9(q9) format for the 64 point */ 01906 /* output is in 5.11(q11) format for the 16 point */ 01907 01908 #endif /* #if defined (ARM_MATH_DSP) */ 01909 01910 }
Generated on Wed Jul 13 2022 16:03:34 by
