Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_cfft_radix4_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q31.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 void arm_radix4_butterfly_inverse_q31( 00045 q31_t * pSrc, 00046 uint32_t fftLen, 00047 q31_t * pCoef, 00048 uint32_t twidCoefModifier); 00049 00050 void arm_radix4_butterfly_q31( 00051 q31_t * pSrc, 00052 uint32_t fftLen, 00053 q31_t * pCoef, 00054 uint32_t twidCoefModifier); 00055 00056 void arm_bitreversal_q31( 00057 q31_t * pSrc, 00058 uint32_t fftLen, 00059 uint16_t bitRevFactor, 00060 uint16_t * pBitRevTab); 00061 00062 /** 00063 * @ingroup groupTransforms 00064 */ 00065 00066 /** 00067 * @addtogroup ComplexFFT 00068 * @{ 00069 */ 00070 00071 /** 00072 * @details 00073 * @brief Processing function for the Q31 CFFT/CIFFT. 00074 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure. 00075 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 00076 * @return none. 00077 * 00078 * \par Input and output formats: 00079 * \par 00080 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00081 * Hence the output format is different for different FFT sizes. 00082 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00083 * \par 00084 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT" 00085 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT" 00086 * 00087 */ 00088 00089 void arm_cfft_radix4_q31( 00090 const arm_cfft_radix4_instance_q31 * S, 00091 q31_t * pSrc) 00092 { 00093 if(S->ifftFlag == 1u) 00094 { 00095 /* Complex IFFT radix-4 */ 00096 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, 00097 S->twidCoefModifier); 00098 } 00099 else 00100 { 00101 /* Complex FFT radix-4 */ 00102 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, 00103 S->twidCoefModifier); 00104 } 00105 00106 00107 if(S->bitReverseFlag == 1u) 00108 { 00109 /* Bit Reversal */ 00110 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00111 } 00112 00113 } 00114 00115 /** 00116 * @} end of ComplexFFT group 00117 */ 00118 00119 /* 00120 * Radix-4 FFT algorithm used is : 00121 * 00122 * Input real and imaginary data: 00123 * x(n) = xa + j * ya 00124 * x(n+N/4 ) = xb + j * yb 00125 * x(n+N/2 ) = xc + j * yc 00126 * x(n+3N 4) = xd + j * yd 00127 * 00128 * 00129 * Output real and imaginary data: 00130 * x(4r) = xa'+ j * ya' 00131 * x(4r+1) = xb'+ j * yb' 00132 * x(4r+2) = xc'+ j * yc' 00133 * x(4r+3) = xd'+ j * yd' 00134 * 00135 * 00136 * Twiddle factors for radix-4 FFT: 00137 * Wn = co1 + j * (- si1) 00138 * W2n = co2 + j * (- si2) 00139 * W3n = co3 + j * (- si3) 00140 * 00141 * Butterfly implementation: 00142 * xa' = xa + xb + xc + xd 00143 * ya' = ya + yb + yc + yd 00144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00150 * 00151 */ 00152 00153 /** 00154 * @brief Core function for the Q31 CFFT butterfly process. 00155 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00156 * @param[in] fftLen length of the FFT. 00157 * @param[in] *pCoef points to twiddle coefficient buffer. 00158 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00159 * @return none. 00160 */ 00161 00162 void arm_radix4_butterfly_q31( 00163 q31_t * pSrc, 00164 uint32_t fftLen, 00165 q31_t * pCoef, 00166 uint32_t twidCoefModifier) 00167 { 00168 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00169 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00170 00171 q31_t xa, xb, xc, xd; 00172 q31_t ya, yb, yc, yd; 00173 q31_t xa_out, xb_out, xc_out, xd_out; 00174 q31_t ya_out, yb_out, yc_out, yd_out; 00175 00176 q31_t *ptr1; 00177 q63_t xaya, xbyb, xcyc, xdyd; 00178 /* Total process is divided into three stages */ 00179 00180 /* process first stage, middle stages, & last stage */ 00181 00182 00183 /* start of first stage process */ 00184 00185 /* Initializations for the first stage */ 00186 n2 = fftLen; 00187 n1 = n2; 00188 /* n2 = fftLen/4 */ 00189 n2 >>= 2u; 00190 i0 = 0u; 00191 ia1 = 0u; 00192 00193 j = n2; 00194 00195 /* Calculation of first stage */ 00196 do 00197 { 00198 /* index calculation for the input as, */ 00199 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00200 i1 = i0 + n2; 00201 i2 = i1 + n2; 00202 i3 = i2 + n2; 00203 00204 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00205 00206 /* Butterfly implementation */ 00207 /* xa + xc */ 00208 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u); 00209 /* xa - xc */ 00210 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00211 00212 /* xb + xd */ 00213 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00214 00215 /* ya + yc */ 00216 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00217 /* ya - yc */ 00218 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00219 00220 /* xa' = xa + xb + xc + xd */ 00221 pSrc[2u * i0] = (r1 + t1); 00222 /* (xa + xc) - (xb + xd) */ 00223 r1 = r1 - t1; 00224 /* yb + yd */ 00225 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00226 00227 /* ya' = ya + yb + yc + yd */ 00228 pSrc[(2u * i0) + 1u] = (s1 + t2); 00229 00230 /* (ya + yc) - (yb + yd) */ 00231 s1 = s1 - t2; 00232 00233 /* yb - yd */ 00234 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00235 /* xb - xd */ 00236 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00237 00238 /* index calculation for the coefficients */ 00239 ia2 = 2u * ia1; 00240 co2 = pCoef[ia2 * 2u]; 00241 si2 = pCoef[(ia2 * 2u) + 1u]; 00242 00243 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00244 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00245 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00246 00247 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00248 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00249 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00250 00251 /* (xa - xc) + (yb - yd) */ 00252 r1 = r2 + t1; 00253 /* (xa - xc) - (yb - yd) */ 00254 r2 = r2 - t1; 00255 00256 /* (ya - yc) - (xb - xd) */ 00257 s1 = s2 - t2; 00258 /* (ya - yc) + (xb - xd) */ 00259 s2 = s2 + t2; 00260 00261 co1 = pCoef[ia1 * 2u]; 00262 si1 = pCoef[(ia1 * 2u) + 1u]; 00263 00264 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00265 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00266 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00267 00268 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00269 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00270 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00271 00272 /* index calculation for the coefficients */ 00273 ia3 = 3u * ia1; 00274 co3 = pCoef[ia3 * 2u]; 00275 si3 = pCoef[(ia3 * 2u) + 1u]; 00276 00277 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00278 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00279 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00280 00281 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00282 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00283 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00284 00285 /* Twiddle coefficients index modifier */ 00286 ia1 = ia1 + twidCoefModifier; 00287 00288 /* Updating input index */ 00289 i0 = i0 + 1u; 00290 00291 } while(--j); 00292 00293 /* end of first stage process */ 00294 00295 /* data is in 5.27(q27) format */ 00296 00297 00298 /* start of Middle stages process */ 00299 00300 00301 /* each stage in middle stages provides two down scaling of the input */ 00302 00303 twidCoefModifier <<= 2u; 00304 00305 00306 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00307 { 00308 /* Initializations for the first stage */ 00309 n1 = n2; 00310 n2 >>= 2u; 00311 ia1 = 0u; 00312 00313 /* Calculation of first stage */ 00314 for (j = 0u; j <= (n2 - 1u); j++) 00315 { 00316 /* index calculation for the coefficients */ 00317 ia2 = ia1 + ia1; 00318 ia3 = ia2 + ia1; 00319 co1 = pCoef[ia1 * 2u]; 00320 si1 = pCoef[(ia1 * 2u) + 1u]; 00321 co2 = pCoef[ia2 * 2u]; 00322 si2 = pCoef[(ia2 * 2u) + 1u]; 00323 co3 = pCoef[ia3 * 2u]; 00324 si3 = pCoef[(ia3 * 2u) + 1u]; 00325 /* Twiddle coefficients index modifier */ 00326 ia1 = ia1 + twidCoefModifier; 00327 00328 for (i0 = j; i0 < fftLen; i0 += n1) 00329 { 00330 /* index calculation for the input as, */ 00331 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00332 i1 = i0 + n2; 00333 i2 = i1 + n2; 00334 i3 = i2 + n2; 00335 00336 /* Butterfly implementation */ 00337 /* xa + xc */ 00338 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00339 /* xa - xc */ 00340 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00341 00342 /* ya + yc */ 00343 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00344 /* ya - yc */ 00345 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00346 00347 /* xb + xd */ 00348 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00349 00350 /* xa' = xa + xb + xc + xd */ 00351 pSrc[2u * i0] = (r1 + t1) >> 2u; 00352 /* xa + xc -(xb + xd) */ 00353 r1 = r1 - t1; 00354 00355 /* yb + yd */ 00356 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00357 /* ya' = ya + yb + yc + yd */ 00358 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00359 00360 /* (ya + yc) - (yb + yd) */ 00361 s1 = s1 - t2; 00362 00363 /* (yb - yd) */ 00364 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00365 /* (xb - xd) */ 00366 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00367 00368 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00369 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00370 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 00371 00372 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00373 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00374 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 00375 00376 /* (xa - xc) + (yb - yd) */ 00377 r1 = r2 + t1; 00378 /* (xa - xc) - (yb - yd) */ 00379 r2 = r2 - t1; 00380 00381 /* (ya - yc) - (xb - xd) */ 00382 s1 = s2 - t2; 00383 /* (ya - yc) + (xb - xd) */ 00384 s2 = s2 + t2; 00385 00386 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00387 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00388 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00389 00390 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00391 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00392 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00393 00394 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00395 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00396 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00397 00398 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00399 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00400 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00401 } 00402 } 00403 twidCoefModifier <<= 2u; 00404 } 00405 00406 /* End of Middle stages process */ 00407 00408 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00409 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00410 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00411 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00412 00413 00414 /* start of Last stage process */ 00415 /* Initializations for the last stage */ 00416 j = fftLen >> 2; 00417 ptr1 = &pSrc[0]; 00418 00419 /* Calculations of last stage */ 00420 do 00421 { 00422 00423 #ifndef ARM_MATH_BIG_ENDIAN 00424 00425 /* Read xa (real), ya(imag) input */ 00426 xaya = *__SIMD64(ptr1)++; 00427 xa = (q31_t) xaya; 00428 ya = (q31_t) (xaya >> 32); 00429 00430 /* Read xb (real), yb(imag) input */ 00431 xbyb = *__SIMD64(ptr1)++; 00432 xb = (q31_t) xbyb; 00433 yb = (q31_t) (xbyb >> 32); 00434 00435 /* Read xc (real), yc(imag) input */ 00436 xcyc = *__SIMD64(ptr1)++; 00437 xc = (q31_t) xcyc; 00438 yc = (q31_t) (xcyc >> 32); 00439 00440 /* Read xc (real), yc(imag) input */ 00441 xdyd = *__SIMD64(ptr1)++; 00442 xd = (q31_t) xdyd; 00443 yd = (q31_t) (xdyd >> 32); 00444 00445 #else 00446 00447 /* Read xa (real), ya(imag) input */ 00448 xaya = *__SIMD64(ptr1)++; 00449 ya = (q31_t) xaya; 00450 xa = (q31_t) (xaya >> 32); 00451 00452 /* Read xb (real), yb(imag) input */ 00453 xbyb = *__SIMD64(ptr1)++; 00454 yb = (q31_t) xbyb; 00455 xb = (q31_t) (xbyb >> 32); 00456 00457 /* Read xc (real), yc(imag) input */ 00458 xcyc = *__SIMD64(ptr1)++; 00459 yc = (q31_t) xcyc; 00460 xc = (q31_t) (xcyc >> 32); 00461 00462 /* Read xc (real), yc(imag) input */ 00463 xdyd = *__SIMD64(ptr1)++; 00464 yd = (q31_t) xdyd; 00465 xd = (q31_t) (xdyd >> 32); 00466 00467 00468 #endif 00469 00470 /* xa' = xa + xb + xc + xd */ 00471 xa_out = xa + xb + xc + xd; 00472 00473 /* ya' = ya + yb + yc + yd */ 00474 ya_out = ya + yb + yc + yd; 00475 00476 /* pointer updation for writing */ 00477 ptr1 = ptr1 - 8u; 00478 00479 /* writing xa' and ya' */ 00480 *ptr1++ = xa_out; 00481 *ptr1++ = ya_out; 00482 00483 xc_out = (xa - xb + xc - xd); 00484 yc_out = (ya - yb + yc - yd); 00485 00486 /* writing xc' and yc' */ 00487 *ptr1++ = xc_out; 00488 *ptr1++ = yc_out; 00489 00490 xb_out = (xa + yb - xc - yd); 00491 yb_out = (ya - xb - yc + xd); 00492 00493 /* writing xb' and yb' */ 00494 *ptr1++ = xb_out; 00495 *ptr1++ = yb_out; 00496 00497 xd_out = (xa - yb - xc + yd); 00498 yd_out = (ya + xb - yc - xd); 00499 00500 /* writing xd' and yd' */ 00501 *ptr1++ = xd_out; 00502 *ptr1++ = yd_out; 00503 00504 00505 } while(--j); 00506 00507 /* output is in 11.21(q21) format for the 1024 point */ 00508 /* output is in 9.23(q23) format for the 256 point */ 00509 /* output is in 7.25(q25) format for the 64 point */ 00510 /* output is in 5.27(q27) format for the 16 point */ 00511 00512 /* End of last stage process */ 00513 00514 } 00515 00516 00517 /** 00518 * @brief Core function for the Q31 CIFFT butterfly process. 00519 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00520 * @param[in] fftLen length of the FFT. 00521 * @param[in] *pCoef points to twiddle coefficient buffer. 00522 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00523 * @return none. 00524 */ 00525 00526 00527 /* 00528 * Radix-4 IFFT algorithm used is : 00529 * 00530 * CIFFT uses same twiddle coefficients as CFFT Function 00531 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 00532 * 00533 * 00534 * IFFT is implemented with following changes in equations from FFT 00535 * 00536 * Input real and imaginary data: 00537 * x(n) = xa + j * ya 00538 * x(n+N/4 ) = xb + j * yb 00539 * x(n+N/2 ) = xc + j * yc 00540 * x(n+3N 4) = xd + j * yd 00541 * 00542 * 00543 * Output real and imaginary data: 00544 * x(4r) = xa'+ j * ya' 00545 * x(4r+1) = xb'+ j * yb' 00546 * x(4r+2) = xc'+ j * yc' 00547 * x(4r+3) = xd'+ j * yd' 00548 * 00549 * 00550 * Twiddle factors for radix-4 IFFT: 00551 * Wn = co1 + j * (si1) 00552 * W2n = co2 + j * (si2) 00553 * W3n = co3 + j * (si3) 00554 00555 * The real and imaginary output values for the radix-4 butterfly are 00556 * xa' = xa + xb + xc + xd 00557 * ya' = ya + yb + yc + yd 00558 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 00559 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 00560 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 00561 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 00562 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 00563 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 00564 * 00565 */ 00566 00567 void arm_radix4_butterfly_inverse_q31( 00568 q31_t * pSrc, 00569 uint32_t fftLen, 00570 q31_t * pCoef, 00571 uint32_t twidCoefModifier) 00572 { 00573 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00574 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00575 q31_t xa, xb, xc, xd; 00576 q31_t ya, yb, yc, yd; 00577 q31_t xa_out, xb_out, xc_out, xd_out; 00578 q31_t ya_out, yb_out, yc_out, yd_out; 00579 00580 q31_t *ptr1; 00581 q63_t xaya, xbyb, xcyc, xdyd; 00582 00583 /* input is be 1.31(q31) format for all FFT sizes */ 00584 /* Total process is divided into three stages */ 00585 /* process first stage, middle stages, & last stage */ 00586 00587 /* Start of first stage process */ 00588 00589 /* Initializations for the first stage */ 00590 n2 = fftLen; 00591 n1 = n2; 00592 /* n2 = fftLen/4 */ 00593 n2 >>= 2u; 00594 i0 = 0u; 00595 ia1 = 0u; 00596 00597 j = n2; 00598 00599 do 00600 { 00601 00602 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00603 00604 /* index calculation for the input as, */ 00605 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00606 i1 = i0 + n2; 00607 i2 = i1 + n2; 00608 i3 = i2 + n2; 00609 00610 /* Butterfly implementation */ 00611 /* xa + xc */ 00612 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u); 00613 /* xa - xc */ 00614 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00615 00616 /* xb + xd */ 00617 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00618 00619 /* ya + yc */ 00620 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00621 /* ya - yc */ 00622 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00623 00624 /* xa' = xa + xb + xc + xd */ 00625 pSrc[2u * i0] = (r1 + t1); 00626 /* (xa + xc) - (xb + xd) */ 00627 r1 = r1 - t1; 00628 /* yb + yd */ 00629 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00630 /* ya' = ya + yb + yc + yd */ 00631 pSrc[(2u * i0) + 1u] = (s1 + t2); 00632 00633 /* (ya + yc) - (yb + yd) */ 00634 s1 = s1 - t2; 00635 00636 /* yb - yd */ 00637 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00638 /* xb - xd */ 00639 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00640 00641 /* index calculation for the coefficients */ 00642 ia2 = 2u * ia1; 00643 co2 = pCoef[ia2 * 2u]; 00644 si2 = pCoef[(ia2 * 2u) + 1u]; 00645 00646 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00647 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 00648 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00649 00650 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00651 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 00652 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00653 00654 /* (xa - xc) - (yb - yd) */ 00655 r1 = r2 - t1; 00656 /* (xa - xc) + (yb - yd) */ 00657 r2 = r2 + t1; 00658 00659 /* (ya - yc) + (xb - xd) */ 00660 s1 = s2 + t2; 00661 /* (ya - yc) - (xb - xd) */ 00662 s2 = s2 - t2; 00663 00664 co1 = pCoef[ia1 * 2u]; 00665 si1 = pCoef[(ia1 * 2u) + 1u]; 00666 00667 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00668 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00669 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00670 00671 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00672 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00673 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00674 00675 /* index calculation for the coefficients */ 00676 ia3 = 3u * ia1; 00677 co3 = pCoef[ia3 * 2u]; 00678 si3 = pCoef[(ia3 * 2u) + 1u]; 00679 00680 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00681 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00682 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00683 00684 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00685 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00686 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00687 00688 /* Twiddle coefficients index modifier */ 00689 ia1 = ia1 + twidCoefModifier; 00690 00691 /* Updating input index */ 00692 i0 = i0 + 1u; 00693 00694 } while(--j); 00695 00696 /* data is in 5.27(q27) format */ 00697 /* each stage provides two down scaling of the input */ 00698 00699 00700 /* Start of Middle stages process */ 00701 00702 twidCoefModifier <<= 2u; 00703 00704 /* Calculation of second stage to excluding last stage */ 00705 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00706 { 00707 /* Initializations for the first stage */ 00708 n1 = n2; 00709 n2 >>= 2u; 00710 ia1 = 0u; 00711 00712 for (j = 0; j <= (n2 - 1u); j++) 00713 { 00714 /* index calculation for the coefficients */ 00715 ia2 = ia1 + ia1; 00716 ia3 = ia2 + ia1; 00717 co1 = pCoef[ia1 * 2u]; 00718 si1 = pCoef[(ia1 * 2u) + 1u]; 00719 co2 = pCoef[ia2 * 2u]; 00720 si2 = pCoef[(ia2 * 2u) + 1u]; 00721 co3 = pCoef[ia3 * 2u]; 00722 si3 = pCoef[(ia3 * 2u) + 1u]; 00723 /* Twiddle coefficients index modifier */ 00724 ia1 = ia1 + twidCoefModifier; 00725 00726 for (i0 = j; i0 < fftLen; i0 += n1) 00727 { 00728 /* index calculation for the input as, */ 00729 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00730 i1 = i0 + n2; 00731 i2 = i1 + n2; 00732 i3 = i2 + n2; 00733 00734 /* Butterfly implementation */ 00735 /* xa + xc */ 00736 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00737 /* xa - xc */ 00738 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00739 00740 /* ya + yc */ 00741 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00742 /* ya - yc */ 00743 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00744 00745 /* xb + xd */ 00746 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00747 00748 /* xa' = xa + xb + xc + xd */ 00749 pSrc[2u * i0] = (r1 + t1) >> 2u; 00750 /* xa + xc -(xb + xd) */ 00751 r1 = r1 - t1; 00752 /* yb + yd */ 00753 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00754 /* ya' = ya + yb + yc + yd */ 00755 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00756 00757 /* (ya + yc) - (yb + yd) */ 00758 s1 = s1 - t2; 00759 00760 /* (yb - yd) */ 00761 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00762 /* (xb - xd) */ 00763 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00764 00765 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00766 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 00767 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 00768 00769 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00770 pSrc[(2u * i1) + 1u] = 00771 (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 00772 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 00773 00774 /* (xa - xc) - (yb - yd) */ 00775 r1 = r2 - t1; 00776 /* (xa - xc) + (yb - yd) */ 00777 r2 = r2 + t1; 00778 00779 /* (ya - yc) + (xb - xd) */ 00780 s1 = s2 + t2; 00781 /* (ya - yc) - (xb - xd) */ 00782 s2 = s2 - t2; 00783 00784 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00785 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00786 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00787 00788 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00789 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00790 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00791 00792 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00793 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00794 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00795 00796 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00797 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00798 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00799 } 00800 } 00801 twidCoefModifier <<= 2u; 00802 } 00803 00804 /* End of Middle stages process */ 00805 00806 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00807 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00808 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00809 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00810 00811 00812 /* Start of last stage process */ 00813 00814 00815 /* Initializations for the last stage */ 00816 j = fftLen >> 2; 00817 ptr1 = &pSrc[0]; 00818 00819 /* Calculations of last stage */ 00820 do 00821 { 00822 #ifndef ARM_MATH_BIG_ENDIAN 00823 /* Read xa (real), ya(imag) input */ 00824 xaya = *__SIMD64(ptr1)++; 00825 xa = (q31_t) xaya; 00826 ya = (q31_t) (xaya >> 32); 00827 00828 /* Read xb (real), yb(imag) input */ 00829 xbyb = *__SIMD64(ptr1)++; 00830 xb = (q31_t) xbyb; 00831 yb = (q31_t) (xbyb >> 32); 00832 00833 /* Read xc (real), yc(imag) input */ 00834 xcyc = *__SIMD64(ptr1)++; 00835 xc = (q31_t) xcyc; 00836 yc = (q31_t) (xcyc >> 32); 00837 00838 /* Read xc (real), yc(imag) input */ 00839 xdyd = *__SIMD64(ptr1)++; 00840 xd = (q31_t) xdyd; 00841 yd = (q31_t) (xdyd >> 32); 00842 00843 #else 00844 00845 /* Read xa (real), ya(imag) input */ 00846 xaya = *__SIMD64(ptr1)++; 00847 ya = (q31_t) xaya; 00848 xa = (q31_t) (xaya >> 32); 00849 00850 /* Read xb (real), yb(imag) input */ 00851 xbyb = *__SIMD64(ptr1)++; 00852 yb = (q31_t) xbyb; 00853 xb = (q31_t) (xbyb >> 32); 00854 00855 /* Read xc (real), yc(imag) input */ 00856 xcyc = *__SIMD64(ptr1)++; 00857 yc = (q31_t) xcyc; 00858 xc = (q31_t) (xcyc >> 32); 00859 00860 /* Read xc (real), yc(imag) input */ 00861 xdyd = *__SIMD64(ptr1)++; 00862 yd = (q31_t) xdyd; 00863 xd = (q31_t) (xdyd >> 32); 00864 00865 00866 #endif 00867 00868 /* xa' = xa + xb + xc + xd */ 00869 xa_out = xa + xb + xc + xd; 00870 00871 /* ya' = ya + yb + yc + yd */ 00872 ya_out = ya + yb + yc + yd; 00873 00874 /* pointer updation for writing */ 00875 ptr1 = ptr1 - 8u; 00876 00877 /* writing xa' and ya' */ 00878 *ptr1++ = xa_out; 00879 *ptr1++ = ya_out; 00880 00881 xc_out = (xa - xb + xc - xd); 00882 yc_out = (ya - yb + yc - yd); 00883 00884 /* writing xc' and yc' */ 00885 *ptr1++ = xc_out; 00886 *ptr1++ = yc_out; 00887 00888 xb_out = (xa - yb - xc + yd); 00889 yb_out = (ya + xb - yc - xd); 00890 00891 /* writing xb' and yb' */ 00892 *ptr1++ = xb_out; 00893 *ptr1++ = yb_out; 00894 00895 xd_out = (xa + yb - xc - yd); 00896 yd_out = (ya - xb - yc + xd); 00897 00898 /* writing xd' and yd' */ 00899 *ptr1++ = xd_out; 00900 *ptr1++ = yd_out; 00901 00902 00903 } while(--j); 00904 00905 /* output is in 11.21(q21) format for the 1024 point */ 00906 /* output is in 9.23(q23) format for the 256 point */ 00907 /* output is in 7.25(q25) format for the 64 point */ 00908 /* output is in 5.27(q27) format for the 16 point */ 00909 00910 /* End of last stage process */ 00911 }
Generated on Tue Jul 12 2022 18:44:08 by
1.7.2
