Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_cfft_radix4_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_cfft_radix4_q31.c 00004 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00005 * In-place bit reversal using bit reversal table 00006 * 00007 * $Date: 27. January 2017 00008 * $Revision: V.1.5.1 00009 * 00010 * Target Processor: Cortex-M cores 00011 * -------------------------------------------------------------------- */ 00012 /* 00013 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00014 * 00015 * SPDX-License-Identifier: Apache-2.0 00016 * 00017 * Licensed under the Apache License, Version 2.0 (the License); you may 00018 * not use this file except in compliance with the License. 00019 * You may obtain a copy of the License at 00020 * 00021 * www.apache.org/licenses/LICENSE-2.0 00022 * 00023 * Unless required by applicable law or agreed to in writing, software 00024 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00025 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00026 * See the License for the specific language governing permissions and 00027 * limitations under the License. 00028 */ 00029 00030 #include "arm_math.h" 00031 00032 void arm_radix4_butterfly_inverse_q31( 00033 q31_t * pSrc, 00034 uint32_t fftLen, 00035 q31_t * pCoef, 00036 uint32_t twidCoefModifier); 00037 00038 void arm_radix4_butterfly_q31( 00039 q31_t * pSrc, 00040 uint32_t fftLen, 00041 q31_t * pCoef, 00042 uint32_t twidCoefModifier); 00043 00044 void arm_bitreversal_q31( 00045 q31_t * pSrc, 00046 uint32_t fftLen, 00047 uint16_t bitRevFactor, 00048 uint16_t * pBitRevTab); 00049 00050 /** 00051 * @ingroup groupTransforms 00052 */ 00053 00054 /** 00055 * @addtogroup ComplexFFT 00056 * @{ 00057 */ 00058 00059 /** 00060 * @details 00061 * @brief Processing function for the Q31 CFFT/CIFFT. 00062 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed 00063 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure. 00064 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 00065 * @return none. 00066 * 00067 * \par Input and output formats: 00068 * \par 00069 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00070 * Hence the output format is different for different FFT sizes. 00071 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00072 * \par 00073 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT" 00074 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT" 00075 * 00076 */ 00077 00078 void arm_cfft_radix4_q31( 00079 const arm_cfft_radix4_instance_q31 * S, 00080 q31_t * pSrc) 00081 { 00082 if (S->ifftFlag == 1U) 00083 { 00084 /* Complex IFFT radix-4 */ 00085 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); 00086 } 00087 else 00088 { 00089 /* Complex FFT radix-4 */ 00090 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); 00091 } 00092 00093 if (S->bitReverseFlag == 1U) 00094 { 00095 /* Bit Reversal */ 00096 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00097 } 00098 00099 } 00100 00101 /** 00102 * @} end of ComplexFFT group 00103 */ 00104 00105 /* 00106 * Radix-4 FFT algorithm used is : 00107 * 00108 * Input real and imaginary data: 00109 * x(n) = xa + j * ya 00110 * x(n+N/4 ) = xb + j * yb 00111 * x(n+N/2 ) = xc + j * yc 00112 * x(n+3N 4) = xd + j * yd 00113 * 00114 * 00115 * Output real and imaginary data: 00116 * x(4r) = xa'+ j * ya' 00117 * x(4r+1) = xb'+ j * yb' 00118 * x(4r+2) = xc'+ j * yc' 00119 * x(4r+3) = xd'+ j * yd' 00120 * 00121 * 00122 * Twiddle factors for radix-4 FFT: 00123 * Wn = co1 + j * (- si1) 00124 * W2n = co2 + j * (- si2) 00125 * W3n = co3 + j * (- si3) 00126 * 00127 * Butterfly implementation: 00128 * xa' = xa + xb + xc + xd 00129 * ya' = ya + yb + yc + yd 00130 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00131 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00132 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00133 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00134 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00135 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00136 * 00137 */ 00138 00139 /** 00140 * @brief Core function for the Q31 CFFT butterfly process. 00141 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00142 * @param[in] fftLen length of the FFT. 00143 * @param[in] *pCoef points to twiddle coefficient buffer. 00144 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00145 * @return none. 00146 */ 00147 00148 void arm_radix4_butterfly_q31( 00149 q31_t * pSrc, 00150 uint32_t fftLen, 00151 q31_t * pCoef, 00152 uint32_t twidCoefModifier) 00153 { 00154 #if defined(ARM_MATH_CM7) 00155 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00156 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00157 00158 q31_t xa, xb, xc, xd; 00159 q31_t ya, yb, yc, yd; 00160 q31_t xa_out, xb_out, xc_out, xd_out; 00161 q31_t ya_out, yb_out, yc_out, yd_out; 00162 00163 q31_t *ptr1; 00164 q63_t xaya, xbyb, xcyc, xdyd; 00165 /* Total process is divided into three stages */ 00166 00167 /* process first stage, middle stages, & last stage */ 00168 00169 00170 /* start of first stage process */ 00171 00172 /* Initializations for the first stage */ 00173 n2 = fftLen; 00174 n1 = n2; 00175 /* n2 = fftLen/4 */ 00176 n2 >>= 2U; 00177 i0 = 0U; 00178 ia1 = 0U; 00179 00180 j = n2; 00181 00182 /* Calculation of first stage */ 00183 do 00184 { 00185 /* index calculation for the input as, */ 00186 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ 00187 i1 = i0 + n2; 00188 i2 = i1 + n2; 00189 i3 = i2 + n2; 00190 00191 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00192 00193 /* Butterfly implementation */ 00194 /* xa + xc */ 00195 r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U); 00196 /* xa - xc */ 00197 r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U); 00198 00199 /* xb + xd */ 00200 t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U); 00201 00202 /* ya + yc */ 00203 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U); 00204 /* ya - yc */ 00205 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U); 00206 00207 /* xa' = xa + xb + xc + xd */ 00208 pSrc[2U * i0] = (r1 + t1); 00209 /* (xa + xc) - (xb + xd) */ 00210 r1 = r1 - t1; 00211 /* yb + yd */ 00212 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U); 00213 00214 /* ya' = ya + yb + yc + yd */ 00215 pSrc[(2U * i0) + 1U] = (s1 + t2); 00216 00217 /* (ya + yc) - (yb + yd) */ 00218 s1 = s1 - t2; 00219 00220 /* yb - yd */ 00221 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U); 00222 /* xb - xd */ 00223 t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U); 00224 00225 /* index calculation for the coefficients */ 00226 ia2 = 2U * ia1; 00227 co2 = pCoef[ia2 * 2U]; 00228 si2 = pCoef[(ia2 * 2U) + 1U]; 00229 00230 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00231 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00232 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U; 00233 00234 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00235 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00236 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U; 00237 00238 /* (xa - xc) + (yb - yd) */ 00239 r1 = r2 + t1; 00240 /* (xa - xc) - (yb - yd) */ 00241 r2 = r2 - t1; 00242 00243 /* (ya - yc) - (xb - xd) */ 00244 s1 = s2 - t2; 00245 /* (ya - yc) + (xb - xd) */ 00246 s2 = s2 + t2; 00247 00248 co1 = pCoef[ia1 * 2U]; 00249 si1 = pCoef[(ia1 * 2U) + 1U]; 00250 00251 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00252 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00253 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U; 00254 00255 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00256 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00257 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U; 00258 00259 /* index calculation for the coefficients */ 00260 ia3 = 3U * ia1; 00261 co3 = pCoef[ia3 * 2U]; 00262 si3 = pCoef[(ia3 * 2U) + 1U]; 00263 00264 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00265 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00266 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U; 00267 00268 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00269 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00270 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U; 00271 00272 /* Twiddle coefficients index modifier */ 00273 ia1 = ia1 + twidCoefModifier; 00274 00275 /* Updating input index */ 00276 i0 = i0 + 1U; 00277 00278 } while (--j); 00279 00280 /* end of first stage process */ 00281 00282 /* data is in 5.27(q27) format */ 00283 00284 00285 /* start of Middle stages process */ 00286 00287 00288 /* each stage in middle stages provides two down scaling of the input */ 00289 00290 twidCoefModifier <<= 2U; 00291 00292 00293 for (k = fftLen / 4U; k > 4U; k >>= 2U) 00294 { 00295 /* Initializations for the first stage */ 00296 n1 = n2; 00297 n2 >>= 2U; 00298 ia1 = 0U; 00299 00300 /* Calculation of first stage */ 00301 for (j = 0U; j <= (n2 - 1U); j++) 00302 { 00303 /* index calculation for the coefficients */ 00304 ia2 = ia1 + ia1; 00305 ia3 = ia2 + ia1; 00306 co1 = pCoef[ia1 * 2U]; 00307 si1 = pCoef[(ia1 * 2U) + 1U]; 00308 co2 = pCoef[ia2 * 2U]; 00309 si2 = pCoef[(ia2 * 2U) + 1U]; 00310 co3 = pCoef[ia3 * 2U]; 00311 si3 = pCoef[(ia3 * 2U) + 1U]; 00312 /* Twiddle coefficients index modifier */ 00313 ia1 = ia1 + twidCoefModifier; 00314 00315 for (i0 = j; i0 < fftLen; i0 += n1) 00316 { 00317 /* index calculation for the input as, */ 00318 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ 00319 i1 = i0 + n2; 00320 i2 = i1 + n2; 00321 i3 = i2 + n2; 00322 00323 /* Butterfly implementation */ 00324 /* xa + xc */ 00325 r1 = pSrc[2U * i0] + pSrc[2U * i2]; 00326 /* xa - xc */ 00327 r2 = pSrc[2U * i0] - pSrc[2U * i2]; 00328 00329 /* ya + yc */ 00330 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U]; 00331 /* ya - yc */ 00332 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U]; 00333 00334 /* xb + xd */ 00335 t1 = pSrc[2U * i1] + pSrc[2U * i3]; 00336 00337 /* xa' = xa + xb + xc + xd */ 00338 pSrc[2U * i0] = (r1 + t1) >> 2U; 00339 /* xa + xc -(xb + xd) */ 00340 r1 = r1 - t1; 00341 00342 /* yb + yd */ 00343 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U]; 00344 /* ya' = ya + yb + yc + yd */ 00345 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U; 00346 00347 /* (ya + yc) - (yb + yd) */ 00348 s1 = s1 - t2; 00349 00350 /* (yb - yd) */ 00351 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U]; 00352 /* (xb - xd) */ 00353 t2 = pSrc[2U * i1] - pSrc[2U * i3]; 00354 00355 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00356 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00357 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U; 00358 00359 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00360 pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00361 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U; 00362 00363 /* (xa - xc) + (yb - yd) */ 00364 r1 = r2 + t1; 00365 /* (xa - xc) - (yb - yd) */ 00366 r2 = r2 - t1; 00367 00368 /* (ya - yc) - (xb - xd) */ 00369 s1 = s2 - t2; 00370 /* (ya - yc) + (xb - xd) */ 00371 s2 = s2 + t2; 00372 00373 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00374 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00375 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U; 00376 00377 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00378 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00379 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U; 00380 00381 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00382 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00383 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U; 00384 00385 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00386 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00387 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U; 00388 } 00389 } 00390 twidCoefModifier <<= 2U; 00391 } 00392 #else 00393 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k; 00394 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00395 00396 q31_t xa, xb, xc, xd; 00397 q31_t ya, yb, yc, yd; 00398 q31_t xa_out, xb_out, xc_out, xd_out; 00399 q31_t ya_out, yb_out, yc_out, yd_out; 00400 00401 q31_t *ptr1; 00402 q31_t *pSi0; 00403 q31_t *pSi1; 00404 q31_t *pSi2; 00405 q31_t *pSi3; 00406 q63_t xaya, xbyb, xcyc, xdyd; 00407 /* Total process is divided into three stages */ 00408 00409 /* process first stage, middle stages, & last stage */ 00410 00411 00412 /* start of first stage process */ 00413 00414 /* Initializations for the first stage */ 00415 n2 = fftLen; 00416 n1 = n2; 00417 /* n2 = fftLen/4 */ 00418 n2 >>= 2U; 00419 00420 ia1 = 0U; 00421 00422 j = n2; 00423 00424 pSi0 = pSrc; 00425 pSi1 = pSi0 + 2 * n2; 00426 pSi2 = pSi1 + 2 * n2; 00427 pSi3 = pSi2 + 2 * n2; 00428 00429 /* Calculation of first stage */ 00430 do 00431 { 00432 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00433 00434 /* Butterfly implementation */ 00435 /* xa + xc */ 00436 r1 = (pSi0[0] >> 4U) + (pSi2[0] >> 4U); 00437 /* xa - xc */ 00438 r2 = (pSi0[0] >> 4U) - (pSi2[0] >> 4U); 00439 00440 /* xb + xd */ 00441 t1 = (pSi1[0] >> 4U) + (pSi3[0] >> 4U); 00442 00443 /* ya + yc */ 00444 s1 = (pSi0[1] >> 4U) + (pSi2[1] >> 4U); 00445 /* ya - yc */ 00446 s2 = (pSi0[1] >> 4U) - (pSi2[1] >> 4U); 00447 00448 /* xa' = xa + xb + xc + xd */ 00449 *pSi0++ = (r1 + t1); 00450 /* (xa + xc) - (xb + xd) */ 00451 r1 = r1 - t1; 00452 /* yb + yd */ 00453 t2 = (pSi1[1] >> 4U) + (pSi3[1] >> 4U); 00454 00455 /* ya' = ya + yb + yc + yd */ 00456 *pSi0++ = (s1 + t2); 00457 00458 /* (ya + yc) - (yb + yd) */ 00459 s1 = s1 - t2; 00460 00461 /* yb - yd */ 00462 t1 = (pSi1[1] >> 4U) - (pSi3[1] >> 4U); 00463 /* xb - xd */ 00464 t2 = (pSi1[0] >> 4U) - (pSi3[0] >> 4U); 00465 00466 /* index calculation for the coefficients */ 00467 ia2 = 2U * ia1; 00468 co2 = pCoef[ia2 * 2U]; 00469 si2 = pCoef[(ia2 * 2U) + 1U]; 00470 00471 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00472 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00473 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U; 00474 00475 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00476 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00477 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U; 00478 00479 /* (xa - xc) + (yb - yd) */ 00480 r1 = r2 + t1; 00481 /* (xa - xc) - (yb - yd) */ 00482 r2 = r2 - t1; 00483 00484 /* (ya - yc) - (xb - xd) */ 00485 s1 = s2 - t2; 00486 /* (ya - yc) + (xb - xd) */ 00487 s2 = s2 + t2; 00488 00489 co1 = pCoef[ia1 * 2U]; 00490 si1 = pCoef[(ia1 * 2U) + 1U]; 00491 00492 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00493 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00494 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U; 00495 00496 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00497 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00498 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U; 00499 00500 /* index calculation for the coefficients */ 00501 ia3 = 3U * ia1; 00502 co3 = pCoef[ia3 * 2U]; 00503 si3 = pCoef[(ia3 * 2U) + 1U]; 00504 00505 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00506 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00507 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U; 00508 00509 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00510 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00511 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U; 00512 00513 /* Twiddle coefficients index modifier */ 00514 ia1 = ia1 + twidCoefModifier; 00515 00516 } while (--j); 00517 00518 /* end of first stage process */ 00519 00520 /* data is in 5.27(q27) format */ 00521 00522 00523 /* start of Middle stages process */ 00524 00525 00526 /* each stage in middle stages provides two down scaling of the input */ 00527 00528 twidCoefModifier <<= 2U; 00529 00530 00531 for (k = fftLen / 4U; k > 4U; k >>= 2U) 00532 { 00533 /* Initializations for the first stage */ 00534 n1 = n2; 00535 n2 >>= 2U; 00536 ia1 = 0U; 00537 00538 /* Calculation of first stage */ 00539 for (j = 0U; j <= (n2 - 1U); j++) 00540 { 00541 /* index calculation for the coefficients */ 00542 ia2 = ia1 + ia1; 00543 ia3 = ia2 + ia1; 00544 co1 = pCoef[ia1 * 2U]; 00545 si1 = pCoef[(ia1 * 2U) + 1U]; 00546 co2 = pCoef[ia2 * 2U]; 00547 si2 = pCoef[(ia2 * 2U) + 1U]; 00548 co3 = pCoef[ia3 * 2U]; 00549 si3 = pCoef[(ia3 * 2U) + 1U]; 00550 /* Twiddle coefficients index modifier */ 00551 ia1 = ia1 + twidCoefModifier; 00552 00553 pSi0 = pSrc + 2 * j; 00554 pSi1 = pSi0 + 2 * n2; 00555 pSi2 = pSi1 + 2 * n2; 00556 pSi3 = pSi2 + 2 * n2; 00557 00558 for (i0 = j; i0 < fftLen; i0 += n1) 00559 { 00560 /* Butterfly implementation */ 00561 /* xa + xc */ 00562 r1 = pSi0[0] + pSi2[0]; 00563 00564 /* xa - xc */ 00565 r2 = pSi0[0] - pSi2[0]; 00566 00567 00568 /* ya + yc */ 00569 s1 = pSi0[1] + pSi2[1]; 00570 00571 /* ya - yc */ 00572 s2 = pSi0[1] - pSi2[1]; 00573 00574 00575 /* xb + xd */ 00576 t1 = pSi1[0] + pSi3[0]; 00577 00578 00579 /* xa' = xa + xb + xc + xd */ 00580 pSi0[0] = (r1 + t1) >> 2U; 00581 /* xa + xc -(xb + xd) */ 00582 r1 = r1 - t1; 00583 00584 /* yb + yd */ 00585 t2 = pSi1[1] + pSi3[1]; 00586 00587 /* ya' = ya + yb + yc + yd */ 00588 pSi0[1] = (s1 + t2) >> 2U; 00589 pSi0 += 2 * n1; 00590 00591 /* (ya + yc) - (yb + yd) */ 00592 s1 = s1 - t2; 00593 00594 /* (yb - yd) */ 00595 t1 = pSi1[1] - pSi3[1]; 00596 00597 /* (xb - xd) */ 00598 t2 = pSi1[0] - pSi3[0]; 00599 00600 00601 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00602 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00603 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U; 00604 00605 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00606 pSi1[1] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00607 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U; 00608 pSi1 += 2 * n1; 00609 00610 /* (xa - xc) + (yb - yd) */ 00611 r1 = r2 + t1; 00612 /* (xa - xc) - (yb - yd) */ 00613 r2 = r2 - t1; 00614 00615 /* (ya - yc) - (xb - xd) */ 00616 s1 = s2 - t2; 00617 /* (ya - yc) + (xb - xd) */ 00618 s2 = s2 + t2; 00619 00620 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00621 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00622 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U; 00623 00624 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00625 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00626 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U; 00627 pSi2 += 2 * n1; 00628 00629 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00630 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00631 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U; 00632 00633 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00634 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00635 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U; 00636 pSi3 += 2 * n1; 00637 } 00638 } 00639 twidCoefModifier <<= 2U; 00640 } 00641 #endif 00642 00643 /* End of Middle stages process */ 00644 00645 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00646 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00647 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00648 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00649 00650 00651 /* start of Last stage process */ 00652 /* Initializations for the last stage */ 00653 j = fftLen >> 2; 00654 ptr1 = &pSrc[0]; 00655 00656 /* Calculations of last stage */ 00657 do 00658 { 00659 00660 #ifndef ARM_MATH_BIG_ENDIAN 00661 00662 /* Read xa (real), ya(imag) input */ 00663 xaya = *__SIMD64(ptr1)++; 00664 xa = (q31_t) xaya; 00665 ya = (q31_t) (xaya >> 32); 00666 00667 /* Read xb (real), yb(imag) input */ 00668 xbyb = *__SIMD64(ptr1)++; 00669 xb = (q31_t) xbyb; 00670 yb = (q31_t) (xbyb >> 32); 00671 00672 /* Read xc (real), yc(imag) input */ 00673 xcyc = *__SIMD64(ptr1)++; 00674 xc = (q31_t) xcyc; 00675 yc = (q31_t) (xcyc >> 32); 00676 00677 /* Read xc (real), yc(imag) input */ 00678 xdyd = *__SIMD64(ptr1)++; 00679 xd = (q31_t) xdyd; 00680 yd = (q31_t) (xdyd >> 32); 00681 00682 #else 00683 00684 /* Read xa (real), ya(imag) input */ 00685 xaya = *__SIMD64(ptr1)++; 00686 ya = (q31_t) xaya; 00687 xa = (q31_t) (xaya >> 32); 00688 00689 /* Read xb (real), yb(imag) input */ 00690 xbyb = *__SIMD64(ptr1)++; 00691 yb = (q31_t) xbyb; 00692 xb = (q31_t) (xbyb >> 32); 00693 00694 /* Read xc (real), yc(imag) input */ 00695 xcyc = *__SIMD64(ptr1)++; 00696 yc = (q31_t) xcyc; 00697 xc = (q31_t) (xcyc >> 32); 00698 00699 /* Read xc (real), yc(imag) input */ 00700 xdyd = *__SIMD64(ptr1)++; 00701 yd = (q31_t) xdyd; 00702 xd = (q31_t) (xdyd >> 32); 00703 00704 00705 #endif 00706 00707 /* xa' = xa + xb + xc + xd */ 00708 xa_out = xa + xb + xc + xd; 00709 00710 /* ya' = ya + yb + yc + yd */ 00711 ya_out = ya + yb + yc + yd; 00712 00713 /* pointer updation for writing */ 00714 ptr1 = ptr1 - 8U; 00715 00716 /* writing xa' and ya' */ 00717 *ptr1++ = xa_out; 00718 *ptr1++ = ya_out; 00719 00720 xc_out = (xa - xb + xc - xd); 00721 yc_out = (ya - yb + yc - yd); 00722 00723 /* writing xc' and yc' */ 00724 *ptr1++ = xc_out; 00725 *ptr1++ = yc_out; 00726 00727 xb_out = (xa + yb - xc - yd); 00728 yb_out = (ya - xb - yc + xd); 00729 00730 /* writing xb' and yb' */ 00731 *ptr1++ = xb_out; 00732 *ptr1++ = yb_out; 00733 00734 xd_out = (xa - yb - xc + yd); 00735 yd_out = (ya + xb - yc - xd); 00736 00737 /* writing xd' and yd' */ 00738 *ptr1++ = xd_out; 00739 *ptr1++ = yd_out; 00740 00741 00742 } while (--j); 00743 00744 /* output is in 11.21(q21) format for the 1024 point */ 00745 /* output is in 9.23(q23) format for the 256 point */ 00746 /* output is in 7.25(q25) format for the 64 point */ 00747 /* output is in 5.27(q27) format for the 16 point */ 00748 00749 /* End of last stage process */ 00750 00751 } 00752 00753 00754 /** 00755 * @brief Core function for the Q31 CIFFT butterfly process. 00756 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00757 * @param[in] fftLen length of the FFT. 00758 * @param[in] *pCoef points to twiddle coefficient buffer. 00759 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00760 * @return none. 00761 */ 00762 00763 00764 /* 00765 * Radix-4 IFFT algorithm used is : 00766 * 00767 * CIFFT uses same twiddle coefficients as CFFT Function 00768 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 00769 * 00770 * 00771 * IFFT is implemented with following changes in equations from FFT 00772 * 00773 * Input real and imaginary data: 00774 * x(n) = xa + j * ya 00775 * x(n+N/4 ) = xb + j * yb 00776 * x(n+N/2 ) = xc + j * yc 00777 * x(n+3N 4) = xd + j * yd 00778 * 00779 * 00780 * Output real and imaginary data: 00781 * x(4r) = xa'+ j * ya' 00782 * x(4r+1) = xb'+ j * yb' 00783 * x(4r+2) = xc'+ j * yc' 00784 * x(4r+3) = xd'+ j * yd' 00785 * 00786 * 00787 * Twiddle factors for radix-4 IFFT: 00788 * Wn = co1 + j * (si1) 00789 * W2n = co2 + j * (si2) 00790 * W3n = co3 + j * (si3) 00791 00792 * The real and imaginary output values for the radix-4 butterfly are 00793 * xa' = xa + xb + xc + xd 00794 * ya' = ya + yb + yc + yd 00795 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 00796 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 00797 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 00798 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 00799 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 00800 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 00801 * 00802 */ 00803 00804 void arm_radix4_butterfly_inverse_q31( 00805 q31_t * pSrc, 00806 uint32_t fftLen, 00807 q31_t * pCoef, 00808 uint32_t twidCoefModifier) 00809 { 00810 #if defined(ARM_MATH_CM7) 00811 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00812 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00813 q31_t xa, xb, xc, xd; 00814 q31_t ya, yb, yc, yd; 00815 q31_t xa_out, xb_out, xc_out, xd_out; 00816 q31_t ya_out, yb_out, yc_out, yd_out; 00817 00818 q31_t *ptr1; 00819 q63_t xaya, xbyb, xcyc, xdyd; 00820 00821 /* input is be 1.31(q31) format for all FFT sizes */ 00822 /* Total process is divided into three stages */ 00823 /* process first stage, middle stages, & last stage */ 00824 00825 /* Start of first stage process */ 00826 00827 /* Initializations for the first stage */ 00828 n2 = fftLen; 00829 n1 = n2; 00830 /* n2 = fftLen/4 */ 00831 n2 >>= 2U; 00832 i0 = 0U; 00833 ia1 = 0U; 00834 00835 j = n2; 00836 00837 do 00838 { 00839 00840 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00841 00842 /* index calculation for the input as, */ 00843 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ 00844 i1 = i0 + n2; 00845 i2 = i1 + n2; 00846 i3 = i2 + n2; 00847 00848 /* Butterfly implementation */ 00849 /* xa + xc */ 00850 r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U); 00851 /* xa - xc */ 00852 r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U); 00853 00854 /* xb + xd */ 00855 t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U); 00856 00857 /* ya + yc */ 00858 s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U); 00859 /* ya - yc */ 00860 s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U); 00861 00862 /* xa' = xa + xb + xc + xd */ 00863 pSrc[2U * i0] = (r1 + t1); 00864 /* (xa + xc) - (xb + xd) */ 00865 r1 = r1 - t1; 00866 /* yb + yd */ 00867 t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U); 00868 /* ya' = ya + yb + yc + yd */ 00869 pSrc[(2U * i0) + 1U] = (s1 + t2); 00870 00871 /* (ya + yc) - (yb + yd) */ 00872 s1 = s1 - t2; 00873 00874 /* yb - yd */ 00875 t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U); 00876 /* xb - xd */ 00877 t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U); 00878 00879 /* index calculation for the coefficients */ 00880 ia2 = 2U * ia1; 00881 co2 = pCoef[ia2 * 2U]; 00882 si2 = pCoef[(ia2 * 2U) + 1U]; 00883 00884 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00885 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 00886 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U; 00887 00888 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00889 pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 00890 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U; 00891 00892 /* (xa - xc) - (yb - yd) */ 00893 r1 = r2 - t1; 00894 /* (xa - xc) + (yb - yd) */ 00895 r2 = r2 + t1; 00896 00897 /* (ya - yc) + (xb - xd) */ 00898 s1 = s2 + t2; 00899 /* (ya - yc) - (xb - xd) */ 00900 s2 = s2 - t2; 00901 00902 co1 = pCoef[ia1 * 2U]; 00903 si1 = pCoef[(ia1 * 2U) + 1U]; 00904 00905 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00906 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00907 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U; 00908 00909 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00910 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00911 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U; 00912 00913 /* index calculation for the coefficients */ 00914 ia3 = 3U * ia1; 00915 co3 = pCoef[ia3 * 2U]; 00916 si3 = pCoef[(ia3 * 2U) + 1U]; 00917 00918 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00919 pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00920 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U; 00921 00922 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00923 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00924 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U; 00925 00926 /* Twiddle coefficients index modifier */ 00927 ia1 = ia1 + twidCoefModifier; 00928 00929 /* Updating input index */ 00930 i0 = i0 + 1U; 00931 00932 } while (--j); 00933 00934 /* data is in 5.27(q27) format */ 00935 /* each stage provides two down scaling of the input */ 00936 00937 00938 /* Start of Middle stages process */ 00939 00940 twidCoefModifier <<= 2U; 00941 00942 /* Calculation of second stage to excluding last stage */ 00943 for (k = fftLen / 4U; k > 4U; k >>= 2U) 00944 { 00945 /* Initializations for the first stage */ 00946 n1 = n2; 00947 n2 >>= 2U; 00948 ia1 = 0U; 00949 00950 for (j = 0; j <= (n2 - 1U); j++) 00951 { 00952 /* index calculation for the coefficients */ 00953 ia2 = ia1 + ia1; 00954 ia3 = ia2 + ia1; 00955 co1 = pCoef[ia1 * 2U]; 00956 si1 = pCoef[(ia1 * 2U) + 1U]; 00957 co2 = pCoef[ia2 * 2U]; 00958 si2 = pCoef[(ia2 * 2U) + 1U]; 00959 co3 = pCoef[ia3 * 2U]; 00960 si3 = pCoef[(ia3 * 2U) + 1U]; 00961 /* Twiddle coefficients index modifier */ 00962 ia1 = ia1 + twidCoefModifier; 00963 00964 for (i0 = j; i0 < fftLen; i0 += n1) 00965 { 00966 /* index calculation for the input as, */ 00967 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ 00968 i1 = i0 + n2; 00969 i2 = i1 + n2; 00970 i3 = i2 + n2; 00971 00972 /* Butterfly implementation */ 00973 /* xa + xc */ 00974 r1 = pSrc[2U * i0] + pSrc[2U * i2]; 00975 /* xa - xc */ 00976 r2 = pSrc[2U * i0] - pSrc[2U * i2]; 00977 00978 /* ya + yc */ 00979 s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U]; 00980 /* ya - yc */ 00981 s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U]; 00982 00983 /* xb + xd */ 00984 t1 = pSrc[2U * i1] + pSrc[2U * i3]; 00985 00986 /* xa' = xa + xb + xc + xd */ 00987 pSrc[2U * i0] = (r1 + t1) >> 2U; 00988 /* xa + xc -(xb + xd) */ 00989 r1 = r1 - t1; 00990 /* yb + yd */ 00991 t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U]; 00992 /* ya' = ya + yb + yc + yd */ 00993 pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U; 00994 00995 /* (ya + yc) - (yb + yd) */ 00996 s1 = s1 - t2; 00997 00998 /* (yb - yd) */ 00999 t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U]; 01000 /* (xb - xd) */ 01001 t2 = pSrc[2U * i1] - pSrc[2U * i3]; 01002 01003 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01004 pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) - 01005 ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U; 01006 01007 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01008 pSrc[(2U * i1) + 1U] = 01009 (((int32_t) (((q63_t) s1 * co2) >> 32U)) + 01010 ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U; 01011 01012 /* (xa - xc) - (yb - yd) */ 01013 r1 = r2 - t1; 01014 /* (xa - xc) + (yb - yd) */ 01015 r2 = r2 + t1; 01016 01017 /* (ya - yc) + (xb - xd) */ 01018 s1 = s2 + t2; 01019 /* (ya - yc) - (xb - xd) */ 01020 s2 = s2 - t2; 01021 01022 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01023 pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01024 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U; 01025 01026 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01027 pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01028 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U; 01029 01030 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01031 pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01032 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U; 01033 01034 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01035 pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01036 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U; 01037 } 01038 } 01039 twidCoefModifier <<= 2U; 01040 } 01041 #else 01042 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k; 01043 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 01044 q31_t xa, xb, xc, xd; 01045 q31_t ya, yb, yc, yd; 01046 q31_t xa_out, xb_out, xc_out, xd_out; 01047 q31_t ya_out, yb_out, yc_out, yd_out; 01048 01049 q31_t *ptr1; 01050 q31_t *pSi0; 01051 q31_t *pSi1; 01052 q31_t *pSi2; 01053 q31_t *pSi3; 01054 q63_t xaya, xbyb, xcyc, xdyd; 01055 01056 /* input is be 1.31(q31) format for all FFT sizes */ 01057 /* Total process is divided into three stages */ 01058 /* process first stage, middle stages, & last stage */ 01059 01060 /* Start of first stage process */ 01061 01062 /* Initializations for the first stage */ 01063 n2 = fftLen; 01064 n1 = n2; 01065 /* n2 = fftLen/4 */ 01066 n2 >>= 2U; 01067 01068 ia1 = 0U; 01069 01070 j = n2; 01071 01072 pSi0 = pSrc; 01073 pSi1 = pSi0 + 2 * n2; 01074 pSi2 = pSi1 + 2 * n2; 01075 pSi3 = pSi2 + 2 * n2; 01076 01077 do 01078 { 01079 /* Butterfly implementation */ 01080 /* xa + xc */ 01081 r1 = (pSi0[0] >> 4U) + (pSi2[0] >> 4U); 01082 /* xa - xc */ 01083 r2 = (pSi0[0] >> 4U) - (pSi2[0] >> 4U); 01084 01085 /* xb + xd */ 01086 t1 = (pSi1[0] >> 4U) + (pSi3[0] >> 4U); 01087 01088 /* ya + yc */ 01089 s1 = (pSi0[1] >> 4U) + (pSi2[1] >> 4U); 01090 /* ya - yc */ 01091 s2 = (pSi0[1] >> 4U) - (pSi2[1] >> 4U); 01092 01093 /* xa' = xa + xb + xc + xd */ 01094 *pSi0++ = (r1 + t1); 01095 /* (xa + xc) - (xb + xd) */ 01096 r1 = r1 - t1; 01097 /* yb + yd */ 01098 t2 = (pSi1[1] >> 4U) + (pSi3[1] >> 4U); 01099 /* ya' = ya + yb + yc + yd */ 01100 *pSi0++ = (s1 + t2); 01101 01102 /* (ya + yc) - (yb + yd) */ 01103 s1 = s1 - t2; 01104 01105 /* yb - yd */ 01106 t1 = (pSi1[1] >> 4U) - (pSi3[1] >> 4U); 01107 /* xb - xd */ 01108 t2 = (pSi1[0] >> 4U) - (pSi3[0] >> 4U); 01109 01110 /* index calculation for the coefficients */ 01111 ia2 = 2U * ia1; 01112 co2 = pCoef[ia2 * 2U]; 01113 si2 = pCoef[(ia2 * 2U) + 1U]; 01114 01115 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01116 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 01117 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U; 01118 01119 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01120 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 01121 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U; 01122 01123 /* (xa - xc) - (yb - yd) */ 01124 r1 = r2 - t1; 01125 /* (xa - xc) + (yb - yd) */ 01126 r2 = r2 + t1; 01127 01128 /* (ya - yc) + (xb - xd) */ 01129 s1 = s2 + t2; 01130 /* (ya - yc) - (xb - xd) */ 01131 s2 = s2 - t2; 01132 01133 co1 = pCoef[ia1 * 2U]; 01134 si1 = pCoef[(ia1 * 2U) + 1U]; 01135 01136 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01137 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01138 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U; 01139 01140 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01141 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01142 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U; 01143 01144 /* index calculation for the coefficients */ 01145 ia3 = 3U * ia1; 01146 co3 = pCoef[ia3 * 2U]; 01147 si3 = pCoef[(ia3 * 2U) + 1U]; 01148 01149 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01150 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01151 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U; 01152 01153 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01154 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01155 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U; 01156 01157 /* Twiddle coefficients index modifier */ 01158 ia1 = ia1 + twidCoefModifier; 01159 01160 } while (--j); 01161 01162 /* data is in 5.27(q27) format */ 01163 /* each stage provides two down scaling of the input */ 01164 01165 01166 /* Start of Middle stages process */ 01167 01168 twidCoefModifier <<= 2U; 01169 01170 /* Calculation of second stage to excluding last stage */ 01171 for (k = fftLen / 4U; k > 4U; k >>= 2U) 01172 { 01173 /* Initializations for the first stage */ 01174 n1 = n2; 01175 n2 >>= 2U; 01176 ia1 = 0U; 01177 01178 for (j = 0; j <= (n2 - 1U); j++) 01179 { 01180 /* index calculation for the coefficients */ 01181 ia2 = ia1 + ia1; 01182 ia3 = ia2 + ia1; 01183 co1 = pCoef[ia1 * 2U]; 01184 si1 = pCoef[(ia1 * 2U) + 1U]; 01185 co2 = pCoef[ia2 * 2U]; 01186 si2 = pCoef[(ia2 * 2U) + 1U]; 01187 co3 = pCoef[ia3 * 2U]; 01188 si3 = pCoef[(ia3 * 2U) + 1U]; 01189 /* Twiddle coefficients index modifier */ 01190 ia1 = ia1 + twidCoefModifier; 01191 01192 pSi0 = pSrc + 2 * j; 01193 pSi1 = pSi0 + 2 * n2; 01194 pSi2 = pSi1 + 2 * n2; 01195 pSi3 = pSi2 + 2 * n2; 01196 01197 for (i0 = j; i0 < fftLen; i0 += n1) 01198 { 01199 /* Butterfly implementation */ 01200 /* xa + xc */ 01201 r1 = pSi0[0] + pSi2[0]; 01202 01203 /* xa - xc */ 01204 r2 = pSi0[0] - pSi2[0]; 01205 01206 01207 /* ya + yc */ 01208 s1 = pSi0[1] + pSi2[1]; 01209 01210 /* ya - yc */ 01211 s2 = pSi0[1] - pSi2[1]; 01212 01213 01214 /* xb + xd */ 01215 t1 = pSi1[0] + pSi3[0]; 01216 01217 01218 /* xa' = xa + xb + xc + xd */ 01219 pSi0[0] = (r1 + t1) >> 2U; 01220 /* xa + xc -(xb + xd) */ 01221 r1 = r1 - t1; 01222 /* yb + yd */ 01223 t2 = pSi1[1] + pSi3[1]; 01224 01225 /* ya' = ya + yb + yc + yd */ 01226 pSi0[1] = (s1 + t2) >> 2U; 01227 pSi0 += 2 * n1; 01228 01229 /* (ya + yc) - (yb + yd) */ 01230 s1 = s1 - t2; 01231 01232 /* (yb - yd) */ 01233 t1 = pSi1[1] - pSi3[1]; 01234 01235 /* (xb - xd) */ 01236 t2 = pSi1[0] - pSi3[0]; 01237 01238 01239 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 01240 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) - 01241 ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U; 01242 01243 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 01244 pSi1[1] = 01245 01246 (((int32_t) (((q63_t) s1 * co2) >> 32U)) + 01247 ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U; 01248 pSi1 += 2 * n1; 01249 01250 /* (xa - xc) - (yb - yd) */ 01251 r1 = r2 - t1; 01252 /* (xa - xc) + (yb - yd) */ 01253 r2 = r2 + t1; 01254 01255 /* (ya - yc) + (xb - xd) */ 01256 s1 = s2 + t2; 01257 /* (ya - yc) - (xb - xd) */ 01258 s2 = s2 - t2; 01259 01260 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 01261 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 01262 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U; 01263 01264 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 01265 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 01266 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U; 01267 pSi2 += 2 * n1; 01268 01269 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 01270 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 01271 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U; 01272 01273 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 01274 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 01275 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U; 01276 pSi3 += 2 * n1; 01277 } 01278 } 01279 twidCoefModifier <<= 2U; 01280 } 01281 #endif 01282 01283 /* End of Middle stages process */ 01284 01285 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 01286 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 01287 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 01288 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 01289 01290 01291 /* Start of last stage process */ 01292 01293 01294 /* Initializations for the last stage */ 01295 j = fftLen >> 2; 01296 ptr1 = &pSrc[0]; 01297 01298 /* Calculations of last stage */ 01299 do 01300 { 01301 #ifndef ARM_MATH_BIG_ENDIAN 01302 /* Read xa (real), ya(imag) input */ 01303 xaya = *__SIMD64(ptr1)++; 01304 xa = (q31_t) xaya; 01305 ya = (q31_t) (xaya >> 32); 01306 01307 /* Read xb (real), yb(imag) input */ 01308 xbyb = *__SIMD64(ptr1)++; 01309 xb = (q31_t) xbyb; 01310 yb = (q31_t) (xbyb >> 32); 01311 01312 /* Read xc (real), yc(imag) input */ 01313 xcyc = *__SIMD64(ptr1)++; 01314 xc = (q31_t) xcyc; 01315 yc = (q31_t) (xcyc >> 32); 01316 01317 /* Read xc (real), yc(imag) input */ 01318 xdyd = *__SIMD64(ptr1)++; 01319 xd = (q31_t) xdyd; 01320 yd = (q31_t) (xdyd >> 32); 01321 01322 #else 01323 01324 /* Read xa (real), ya(imag) input */ 01325 xaya = *__SIMD64(ptr1)++; 01326 ya = (q31_t) xaya; 01327 xa = (q31_t) (xaya >> 32); 01328 01329 /* Read xb (real), yb(imag) input */ 01330 xbyb = *__SIMD64(ptr1)++; 01331 yb = (q31_t) xbyb; 01332 xb = (q31_t) (xbyb >> 32); 01333 01334 /* Read xc (real), yc(imag) input */ 01335 xcyc = *__SIMD64(ptr1)++; 01336 yc = (q31_t) xcyc; 01337 xc = (q31_t) (xcyc >> 32); 01338 01339 /* Read xc (real), yc(imag) input */ 01340 xdyd = *__SIMD64(ptr1)++; 01341 yd = (q31_t) xdyd; 01342 xd = (q31_t) (xdyd >> 32); 01343 01344 01345 #endif 01346 01347 /* xa' = xa + xb + xc + xd */ 01348 xa_out = xa + xb + xc + xd; 01349 01350 /* ya' = ya + yb + yc + yd */ 01351 ya_out = ya + yb + yc + yd; 01352 01353 /* pointer updation for writing */ 01354 ptr1 = ptr1 - 8U; 01355 01356 /* writing xa' and ya' */ 01357 *ptr1++ = xa_out; 01358 *ptr1++ = ya_out; 01359 01360 xc_out = (xa - xb + xc - xd); 01361 yc_out = (ya - yb + yc - yd); 01362 01363 /* writing xc' and yc' */ 01364 *ptr1++ = xc_out; 01365 *ptr1++ = yc_out; 01366 01367 xb_out = (xa - yb - xc + yd); 01368 yb_out = (ya + xb - yc - xd); 01369 01370 /* writing xb' and yb' */ 01371 *ptr1++ = xb_out; 01372 *ptr1++ = yb_out; 01373 01374 xd_out = (xa + yb - xc - yd); 01375 yd_out = (ya - xb - yc + xd); 01376 01377 /* writing xd' and yd' */ 01378 *ptr1++ = xd_out; 01379 *ptr1++ = yd_out; 01380 01381 } while (--j); 01382 01383 /* output is in 11.21(q21) format for the 1024 point */ 01384 /* output is in 9.23(q23) format for the 256 point */ 01385 /* output is in 7.25(q25) format for the 64 point */ 01386 /* output is in 5.27(q27) format for the 16 point */ 01387 01388 /* End of last stage process */ 01389 } 01390
Generated on Tue Jul 12 2022 16:46:23 by
1.7.2