CMSIS DSP library
Dependents: KL25Z_FFT_Demo Hat_Board_v5_1 KL25Z_FFT_Demo_tony KL25Z_FFT_Demo_tony ... more
Fork of mbed-dsp by
arm_cfft_radix4_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cfft_radix4_q31.c 00009 * 00010 * Description: This file has function definition of Radix-4 FFT & IFFT function and 00011 * In-place bit reversal using bit reversal table 00012 * 00013 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00014 * 00015 * Redistribution and use in source and binary forms, with or without 00016 * modification, are permitted provided that the following conditions 00017 * are met: 00018 * - Redistributions of source code must retain the above copyright 00019 * notice, this list of conditions and the following disclaimer. 00020 * - Redistributions in binary form must reproduce the above copyright 00021 * notice, this list of conditions and the following disclaimer in 00022 * the documentation and/or other materials provided with the 00023 * distribution. 00024 * - Neither the name of ARM LIMITED nor the names of its contributors 00025 * may be used to endorse or promote products derived from this 00026 * software without specific prior written permission. 00027 * 00028 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00029 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00030 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00031 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00032 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00033 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00034 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00035 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00036 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00037 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00038 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00039 * POSSIBILITY OF SUCH DAMAGE. 00040 * -------------------------------------------------------------------- */ 00041 00042 #include "arm_math.h" 00043 00044 void arm_radix4_butterfly_inverse_q31( 00045 q31_t * pSrc, 00046 uint32_t fftLen, 00047 q31_t * pCoef, 00048 uint32_t twidCoefModifier); 00049 00050 void arm_radix4_butterfly_q31( 00051 q31_t * pSrc, 00052 uint32_t fftLen, 00053 q31_t * pCoef, 00054 uint32_t twidCoefModifier); 00055 00056 void arm_bitreversal_q31( 00057 q31_t * pSrc, 00058 uint32_t fftLen, 00059 uint16_t bitRevFactor, 00060 uint16_t * pBitRevTab); 00061 00062 /** 00063 * @ingroup groupTransforms 00064 */ 00065 00066 /** 00067 * @addtogroup ComplexFFT 00068 * @{ 00069 */ 00070 00071 /** 00072 * @details 00073 * @brief Processing function for the Q31 CFFT/CIFFT. 00074 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure. 00075 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 00076 * @return none. 00077 * 00078 * \par Input and output formats: 00079 * \par 00080 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 00081 * Hence the output format is different for different FFT sizes. 00082 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 00083 * \par 00084 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT" 00085 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT" 00086 * 00087 */ 00088 00089 void arm_cfft_radix4_q31( 00090 const arm_cfft_radix4_instance_q31 * S, 00091 q31_t * pSrc) 00092 { 00093 if(S->ifftFlag == 1u) 00094 { 00095 /* Complex IFFT radix-4 */ 00096 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, 00097 S->twidCoefModifier); 00098 } 00099 else 00100 { 00101 /* Complex FFT radix-4 */ 00102 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, 00103 S->twidCoefModifier); 00104 } 00105 00106 00107 if(S->bitReverseFlag == 1u) 00108 { 00109 /* Bit Reversal */ 00110 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 00111 } 00112 00113 } 00114 00115 /** 00116 * @} end of ComplexFFT group 00117 */ 00118 00119 /* 00120 * Radix-4 FFT algorithm used is : 00121 * 00122 * Input real and imaginary data: 00123 * x(n) = xa + j * ya 00124 * x(n+N/4 ) = xb + j * yb 00125 * x(n+N/2 ) = xc + j * yc 00126 * x(n+3N 4) = xd + j * yd 00127 * 00128 * 00129 * Output real and imaginary data: 00130 * x(4r) = xa'+ j * ya' 00131 * x(4r+1) = xb'+ j * yb' 00132 * x(4r+2) = xc'+ j * yc' 00133 * x(4r+3) = xd'+ j * yd' 00134 * 00135 * 00136 * Twiddle factors for radix-4 FFT: 00137 * Wn = co1 + j * (- si1) 00138 * W2n = co2 + j * (- si2) 00139 * W3n = co3 + j * (- si3) 00140 * 00141 * Butterfly implementation: 00142 * xa' = xa + xb + xc + xd 00143 * ya' = ya + yb + yc + yd 00144 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) 00145 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) 00146 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) 00147 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) 00148 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) 00149 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) 00150 * 00151 */ 00152 00153 /** 00154 * @brief Core function for the Q31 CFFT butterfly process. 00155 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00156 * @param[in] fftLen length of the FFT. 00157 * @param[in] *pCoef points to twiddle coefficient buffer. 00158 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00159 * @return none. 00160 */ 00161 00162 void arm_radix4_butterfly_q31( 00163 q31_t * pSrc, 00164 uint32_t fftLen, 00165 q31_t * pCoef, 00166 uint32_t twidCoefModifier) 00167 { 00168 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00169 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00170 00171 q31_t xa, xb, xc, xd; 00172 q31_t ya, yb, yc, yd; 00173 q31_t xa_out, xb_out, xc_out, xd_out; 00174 q31_t ya_out, yb_out, yc_out, yd_out; 00175 00176 q31_t *ptr1; 00177 q63_t xaya, xbyb, xcyc, xdyd; 00178 /* Total process is divided into three stages */ 00179 00180 /* process first stage, middle stages, & last stage */ 00181 00182 00183 /* start of first stage process */ 00184 00185 /* Initializations for the first stage */ 00186 n2 = fftLen; 00187 n1 = n2; 00188 /* n2 = fftLen/4 */ 00189 n2 >>= 2u; 00190 i0 = 0u; 00191 ia1 = 0u; 00192 00193 j = n2; 00194 00195 /* Calculation of first stage */ 00196 do 00197 { 00198 /* index calculation for the input as, */ 00199 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00200 i1 = i0 + n2; 00201 i2 = i1 + n2; 00202 i3 = i2 + n2; 00203 00204 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00205 00206 /* Butterfly implementation */ 00207 /* xa + xc */ 00208 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u); 00209 /* xa - xc */ 00210 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00211 00212 /* xb + xd */ 00213 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00214 00215 /* ya + yc */ 00216 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00217 /* ya - yc */ 00218 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00219 00220 /* xa' = xa + xb + xc + xd */ 00221 pSrc[2u * i0] = (r1 + t1); 00222 /* (xa + xc) - (xb + xd) */ 00223 r1 = r1 - t1; 00224 /* yb + yd */ 00225 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00226 00227 /* ya' = ya + yb + yc + yd */ 00228 pSrc[(2u * i0) + 1u] = (s1 + t2); 00229 00230 /* (ya + yc) - (yb + yd) */ 00231 s1 = s1 - t2; 00232 00233 /* yb - yd */ 00234 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00235 /* xb - xd */ 00236 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00237 00238 /* index calculation for the coefficients */ 00239 ia2 = 2u * ia1; 00240 co2 = pCoef[ia2 * 2u]; 00241 si2 = pCoef[(ia2 * 2u) + 1u]; 00242 00243 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00244 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00245 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00246 00247 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00248 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00249 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00250 00251 /* (xa - xc) + (yb - yd) */ 00252 r1 = r2 + t1; 00253 /* (xa - xc) - (yb - yd) */ 00254 r2 = r2 - t1; 00255 00256 /* (ya - yc) - (xb - xd) */ 00257 s1 = s2 - t2; 00258 /* (ya - yc) + (xb - xd) */ 00259 s2 = s2 + t2; 00260 00261 co1 = pCoef[ia1 * 2u]; 00262 si1 = pCoef[(ia1 * 2u) + 1u]; 00263 00264 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00265 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00266 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00267 00268 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00269 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00270 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00271 00272 /* index calculation for the coefficients */ 00273 ia3 = 3u * ia1; 00274 co3 = pCoef[ia3 * 2u]; 00275 si3 = pCoef[(ia3 * 2u) + 1u]; 00276 00277 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00278 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00279 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00280 00281 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00282 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00283 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00284 00285 /* Twiddle coefficients index modifier */ 00286 ia1 = ia1 + twidCoefModifier; 00287 00288 /* Updating input index */ 00289 i0 = i0 + 1u; 00290 00291 } while(--j); 00292 00293 /* end of first stage process */ 00294 00295 /* data is in 5.27(q27) format */ 00296 00297 00298 /* start of Middle stages process */ 00299 00300 00301 /* each stage in middle stages provides two down scaling of the input */ 00302 00303 twidCoefModifier <<= 2u; 00304 00305 00306 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00307 { 00308 /* Initializations for the first stage */ 00309 n1 = n2; 00310 n2 >>= 2u; 00311 ia1 = 0u; 00312 00313 /* Calculation of first stage */ 00314 for (j = 0u; j <= (n2 - 1u); j++) 00315 { 00316 /* index calculation for the coefficients */ 00317 ia2 = ia1 + ia1; 00318 ia3 = ia2 + ia1; 00319 co1 = pCoef[ia1 * 2u]; 00320 si1 = pCoef[(ia1 * 2u) + 1u]; 00321 co2 = pCoef[ia2 * 2u]; 00322 si2 = pCoef[(ia2 * 2u) + 1u]; 00323 co3 = pCoef[ia3 * 2u]; 00324 si3 = pCoef[(ia3 * 2u) + 1u]; 00325 /* Twiddle coefficients index modifier */ 00326 ia1 = ia1 + twidCoefModifier; 00327 00328 for (i0 = j; i0 < fftLen; i0 += n1) 00329 { 00330 /* index calculation for the input as, */ 00331 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00332 i1 = i0 + n2; 00333 i2 = i1 + n2; 00334 i3 = i2 + n2; 00335 00336 /* Butterfly implementation */ 00337 /* xa + xc */ 00338 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00339 /* xa - xc */ 00340 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00341 00342 /* ya + yc */ 00343 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00344 /* ya - yc */ 00345 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00346 00347 /* xb + xd */ 00348 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00349 00350 /* xa' = xa + xb + xc + xd */ 00351 pSrc[2u * i0] = (r1 + t1) >> 2u; 00352 /* xa + xc -(xb + xd) */ 00353 r1 = r1 - t1; 00354 00355 /* yb + yd */ 00356 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00357 /* ya' = ya + yb + yc + yd */ 00358 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00359 00360 /* (ya + yc) - (yb + yd) */ 00361 s1 = s1 - t2; 00362 00363 /* (yb - yd) */ 00364 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00365 /* (xb - xd) */ 00366 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00367 00368 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 00369 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 00370 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 00371 00372 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 00373 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 00374 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 00375 00376 /* (xa - xc) + (yb - yd) */ 00377 r1 = r2 + t1; 00378 /* (xa - xc) - (yb - yd) */ 00379 r2 = r2 - t1; 00380 00381 /* (ya - yc) - (xb - xd) */ 00382 s1 = s2 - t2; 00383 /* (ya - yc) + (xb - xd) */ 00384 s2 = s2 + t2; 00385 00386 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 00387 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 00388 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00389 00390 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 00391 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 00392 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00393 00394 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 00395 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 00396 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00397 00398 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 00399 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 00400 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00401 } 00402 } 00403 twidCoefModifier <<= 2u; 00404 } 00405 00406 /* End of Middle stages process */ 00407 00408 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00409 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00410 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00411 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00412 00413 00414 /* start of Last stage process */ 00415 /* Initializations for the last stage */ 00416 j = fftLen >> 2; 00417 ptr1 = &pSrc[0]; 00418 00419 /* Calculations of last stage */ 00420 do 00421 { 00422 00423 #ifndef ARM_MATH_BIG_ENDIAN 00424 00425 /* Read xa (real), ya(imag) input */ 00426 xaya = *__SIMD64(ptr1)++; 00427 xa = (q31_t) xaya; 00428 ya = (q31_t) (xaya >> 32); 00429 00430 /* Read xb (real), yb(imag) input */ 00431 xbyb = *__SIMD64(ptr1)++; 00432 xb = (q31_t) xbyb; 00433 yb = (q31_t) (xbyb >> 32); 00434 00435 /* Read xc (real), yc(imag) input */ 00436 xcyc = *__SIMD64(ptr1)++; 00437 xc = (q31_t) xcyc; 00438 yc = (q31_t) (xcyc >> 32); 00439 00440 /* Read xc (real), yc(imag) input */ 00441 xdyd = *__SIMD64(ptr1)++; 00442 xd = (q31_t) xdyd; 00443 yd = (q31_t) (xdyd >> 32); 00444 00445 #else 00446 00447 /* Read xa (real), ya(imag) input */ 00448 xaya = *__SIMD64(ptr1)++; 00449 ya = (q31_t) xaya; 00450 xa = (q31_t) (xaya >> 32); 00451 00452 /* Read xb (real), yb(imag) input */ 00453 xbyb = *__SIMD64(ptr1)++; 00454 yb = (q31_t) xbyb; 00455 xb = (q31_t) (xbyb >> 32); 00456 00457 /* Read xc (real), yc(imag) input */ 00458 xcyc = *__SIMD64(ptr1)++; 00459 yc = (q31_t) xcyc; 00460 xc = (q31_t) (xcyc >> 32); 00461 00462 /* Read xc (real), yc(imag) input */ 00463 xdyd = *__SIMD64(ptr1)++; 00464 yd = (q31_t) xdyd; 00465 xd = (q31_t) (xdyd >> 32); 00466 00467 00468 #endif 00469 00470 /* xa' = xa + xb + xc + xd */ 00471 xa_out = xa + xb + xc + xd; 00472 00473 /* ya' = ya + yb + yc + yd */ 00474 ya_out = ya + yb + yc + yd; 00475 00476 /* pointer updation for writing */ 00477 ptr1 = ptr1 - 8u; 00478 00479 /* writing xa' and ya' */ 00480 *ptr1++ = xa_out; 00481 *ptr1++ = ya_out; 00482 00483 xc_out = (xa - xb + xc - xd); 00484 yc_out = (ya - yb + yc - yd); 00485 00486 /* writing xc' and yc' */ 00487 *ptr1++ = xc_out; 00488 *ptr1++ = yc_out; 00489 00490 xb_out = (xa + yb - xc - yd); 00491 yb_out = (ya - xb - yc + xd); 00492 00493 /* writing xb' and yb' */ 00494 *ptr1++ = xb_out; 00495 *ptr1++ = yb_out; 00496 00497 xd_out = (xa - yb - xc + yd); 00498 yd_out = (ya + xb - yc - xd); 00499 00500 /* writing xd' and yd' */ 00501 *ptr1++ = xd_out; 00502 *ptr1++ = yd_out; 00503 00504 00505 } while(--j); 00506 00507 /* output is in 11.21(q21) format for the 1024 point */ 00508 /* output is in 9.23(q23) format for the 256 point */ 00509 /* output is in 7.25(q25) format for the 64 point */ 00510 /* output is in 5.27(q27) format for the 16 point */ 00511 00512 /* End of last stage process */ 00513 00514 } 00515 00516 00517 /** 00518 * @brief Core function for the Q31 CIFFT butterfly process. 00519 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type. 00520 * @param[in] fftLen length of the FFT. 00521 * @param[in] *pCoef points to twiddle coefficient buffer. 00522 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 00523 * @return none. 00524 */ 00525 00526 00527 /* 00528 * Radix-4 IFFT algorithm used is : 00529 * 00530 * CIFFT uses same twiddle coefficients as CFFT Function 00531 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] 00532 * 00533 * 00534 * IFFT is implemented with following changes in equations from FFT 00535 * 00536 * Input real and imaginary data: 00537 * x(n) = xa + j * ya 00538 * x(n+N/4 ) = xb + j * yb 00539 * x(n+N/2 ) = xc + j * yc 00540 * x(n+3N 4) = xd + j * yd 00541 * 00542 * 00543 * Output real and imaginary data: 00544 * x(4r) = xa'+ j * ya' 00545 * x(4r+1) = xb'+ j * yb' 00546 * x(4r+2) = xc'+ j * yc' 00547 * x(4r+3) = xd'+ j * yd' 00548 * 00549 * 00550 * Twiddle factors for radix-4 IFFT: 00551 * Wn = co1 + j * (si1) 00552 * W2n = co2 + j * (si2) 00553 * W3n = co3 + j * (si3) 00554 00555 * The real and imaginary output values for the radix-4 butterfly are 00556 * xa' = xa + xb + xc + xd 00557 * ya' = ya + yb + yc + yd 00558 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) 00559 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) 00560 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) 00561 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) 00562 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) 00563 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) 00564 * 00565 */ 00566 00567 void arm_radix4_butterfly_inverse_q31( 00568 q31_t * pSrc, 00569 uint32_t fftLen, 00570 q31_t * pCoef, 00571 uint32_t twidCoefModifier) 00572 { 00573 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 00574 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 00575 q31_t xa, xb, xc, xd; 00576 q31_t ya, yb, yc, yd; 00577 q31_t xa_out, xb_out, xc_out, xd_out; 00578 q31_t ya_out, yb_out, yc_out, yd_out; 00579 00580 q31_t *ptr1; 00581 q63_t xaya, xbyb, xcyc, xdyd; 00582 00583 /* input is be 1.31(q31) format for all FFT sizes */ 00584 /* Total process is divided into three stages */ 00585 /* process first stage, middle stages, & last stage */ 00586 00587 /* Start of first stage process */ 00588 00589 /* Initializations for the first stage */ 00590 n2 = fftLen; 00591 n1 = n2; 00592 /* n2 = fftLen/4 */ 00593 n2 >>= 2u; 00594 i0 = 0u; 00595 ia1 = 0u; 00596 00597 j = n2; 00598 00599 do 00600 { 00601 00602 /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 00603 00604 /* index calculation for the input as, */ 00605 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00606 i1 = i0 + n2; 00607 i2 = i1 + n2; 00608 i3 = i2 + n2; 00609 00610 /* Butterfly implementation */ 00611 /* xa + xc */ 00612 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u); 00613 /* xa - xc */ 00614 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 00615 00616 /* xb + xd */ 00617 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 00618 00619 /* ya + yc */ 00620 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 00621 /* ya - yc */ 00622 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 00623 00624 /* xa' = xa + xb + xc + xd */ 00625 pSrc[2u * i0] = (r1 + t1); 00626 /* (xa + xc) - (xb + xd) */ 00627 r1 = r1 - t1; 00628 /* yb + yd */ 00629 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 00630 /* ya' = ya + yb + yc + yd */ 00631 pSrc[(2u * i0) + 1u] = (s1 + t2); 00632 00633 /* (ya + yc) - (yb + yd) */ 00634 s1 = s1 - t2; 00635 00636 /* yb - yd */ 00637 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 00638 /* xb - xd */ 00639 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 00640 00641 /* index calculation for the coefficients */ 00642 ia2 = 2u * ia1; 00643 co2 = pCoef[ia2 * 2u]; 00644 si2 = pCoef[(ia2 * 2u) + 1u]; 00645 00646 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00647 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 00648 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 00649 00650 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00651 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 00652 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 00653 00654 /* (xa - xc) - (yb - yd) */ 00655 r1 = r2 - t1; 00656 /* (xa - xc) + (yb - yd) */ 00657 r2 = r2 + t1; 00658 00659 /* (ya - yc) + (xb - xd) */ 00660 s1 = s2 + t2; 00661 /* (ya - yc) - (xb - xd) */ 00662 s2 = s2 - t2; 00663 00664 co1 = pCoef[ia1 * 2u]; 00665 si1 = pCoef[(ia1 * 2u) + 1u]; 00666 00667 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00668 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00669 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 00670 00671 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00672 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00673 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 00674 00675 /* index calculation for the coefficients */ 00676 ia3 = 3u * ia1; 00677 co3 = pCoef[ia3 * 2u]; 00678 si3 = pCoef[(ia3 * 2u) + 1u]; 00679 00680 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00681 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00682 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 00683 00684 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00685 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00686 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 00687 00688 /* Twiddle coefficients index modifier */ 00689 ia1 = ia1 + twidCoefModifier; 00690 00691 /* Updating input index */ 00692 i0 = i0 + 1u; 00693 00694 } while(--j); 00695 00696 /* data is in 5.27(q27) format */ 00697 /* each stage provides two down scaling of the input */ 00698 00699 00700 /* Start of Middle stages process */ 00701 00702 twidCoefModifier <<= 2u; 00703 00704 /* Calculation of second stage to excluding last stage */ 00705 for (k = fftLen / 4u; k > 4u; k >>= 2u) 00706 { 00707 /* Initializations for the first stage */ 00708 n1 = n2; 00709 n2 >>= 2u; 00710 ia1 = 0u; 00711 00712 for (j = 0; j <= (n2 - 1u); j++) 00713 { 00714 /* index calculation for the coefficients */ 00715 ia2 = ia1 + ia1; 00716 ia3 = ia2 + ia1; 00717 co1 = pCoef[ia1 * 2u]; 00718 si1 = pCoef[(ia1 * 2u) + 1u]; 00719 co2 = pCoef[ia2 * 2u]; 00720 si2 = pCoef[(ia2 * 2u) + 1u]; 00721 co3 = pCoef[ia3 * 2u]; 00722 si3 = pCoef[(ia3 * 2u) + 1u]; 00723 /* Twiddle coefficients index modifier */ 00724 ia1 = ia1 + twidCoefModifier; 00725 00726 for (i0 = j; i0 < fftLen; i0 += n1) 00727 { 00728 /* index calculation for the input as, */ 00729 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 00730 i1 = i0 + n2; 00731 i2 = i1 + n2; 00732 i3 = i2 + n2; 00733 00734 /* Butterfly implementation */ 00735 /* xa + xc */ 00736 r1 = pSrc[2u * i0] + pSrc[2u * i2]; 00737 /* xa - xc */ 00738 r2 = pSrc[2u * i0] - pSrc[2u * i2]; 00739 00740 /* ya + yc */ 00741 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 00742 /* ya - yc */ 00743 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 00744 00745 /* xb + xd */ 00746 t1 = pSrc[2u * i1] + pSrc[2u * i3]; 00747 00748 /* xa' = xa + xb + xc + xd */ 00749 pSrc[2u * i0] = (r1 + t1) >> 2u; 00750 /* xa + xc -(xb + xd) */ 00751 r1 = r1 - t1; 00752 /* yb + yd */ 00753 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 00754 /* ya' = ya + yb + yc + yd */ 00755 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 00756 00757 /* (ya + yc) - (yb + yd) */ 00758 s1 = s1 - t2; 00759 00760 /* (yb - yd) */ 00761 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 00762 /* (xb - xd) */ 00763 t2 = pSrc[2u * i1] - pSrc[2u * i3]; 00764 00765 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 00766 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 00767 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 00768 00769 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 00770 pSrc[(2u * i1) + 1u] = 00771 (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 00772 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 00773 00774 /* (xa - xc) - (yb - yd) */ 00775 r1 = r2 - t1; 00776 /* (xa - xc) + (yb - yd) */ 00777 r2 = r2 + t1; 00778 00779 /* (ya - yc) + (xb - xd) */ 00780 s1 = s2 + t2; 00781 /* (ya - yc) - (xb - xd) */ 00782 s2 = s2 - t2; 00783 00784 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 00785 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 00786 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 00787 00788 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 00789 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 00790 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 00791 00792 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 00793 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 00794 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 00795 00796 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 00797 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 00798 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 00799 } 00800 } 00801 twidCoefModifier <<= 2u; 00802 } 00803 00804 /* End of Middle stages process */ 00805 00806 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 00807 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 00808 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 00809 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 00810 00811 00812 /* Start of last stage process */ 00813 00814 00815 /* Initializations for the last stage */ 00816 j = fftLen >> 2; 00817 ptr1 = &pSrc[0]; 00818 00819 /* Calculations of last stage */ 00820 do 00821 { 00822 #ifndef ARM_MATH_BIG_ENDIAN 00823 /* Read xa (real), ya(imag) input */ 00824 xaya = *__SIMD64(ptr1)++; 00825 xa = (q31_t) xaya; 00826 ya = (q31_t) (xaya >> 32); 00827 00828 /* Read xb (real), yb(imag) input */ 00829 xbyb = *__SIMD64(ptr1)++; 00830 xb = (q31_t) xbyb; 00831 yb = (q31_t) (xbyb >> 32); 00832 00833 /* Read xc (real), yc(imag) input */ 00834 xcyc = *__SIMD64(ptr1)++; 00835 xc = (q31_t) xcyc; 00836 yc = (q31_t) (xcyc >> 32); 00837 00838 /* Read xc (real), yc(imag) input */ 00839 xdyd = *__SIMD64(ptr1)++; 00840 xd = (q31_t) xdyd; 00841 yd = (q31_t) (xdyd >> 32); 00842 00843 #else 00844 00845 /* Read xa (real), ya(imag) input */ 00846 xaya = *__SIMD64(ptr1)++; 00847 ya = (q31_t) xaya; 00848 xa = (q31_t) (xaya >> 32); 00849 00850 /* Read xb (real), yb(imag) input */ 00851 xbyb = *__SIMD64(ptr1)++; 00852 yb = (q31_t) xbyb; 00853 xb = (q31_t) (xbyb >> 32); 00854 00855 /* Read xc (real), yc(imag) input */ 00856 xcyc = *__SIMD64(ptr1)++; 00857 yc = (q31_t) xcyc; 00858 xc = (q31_t) (xcyc >> 32); 00859 00860 /* Read xc (real), yc(imag) input */ 00861 xdyd = *__SIMD64(ptr1)++; 00862 yd = (q31_t) xdyd; 00863 xd = (q31_t) (xdyd >> 32); 00864 00865 00866 #endif 00867 00868 /* xa' = xa + xb + xc + xd */ 00869 xa_out = xa + xb + xc + xd; 00870 00871 /* ya' = ya + yb + yc + yd */ 00872 ya_out = ya + yb + yc + yd; 00873 00874 /* pointer updation for writing */ 00875 ptr1 = ptr1 - 8u; 00876 00877 /* writing xa' and ya' */ 00878 *ptr1++ = xa_out; 00879 *ptr1++ = ya_out; 00880 00881 xc_out = (xa - xb + xc - xd); 00882 yc_out = (ya - yb + yc - yd); 00883 00884 /* writing xc' and yc' */ 00885 *ptr1++ = xc_out; 00886 *ptr1++ = yc_out; 00887 00888 xb_out = (xa - yb - xc + yd); 00889 yb_out = (ya + xb - yc - xd); 00890 00891 /* writing xb' and yb' */ 00892 *ptr1++ = xb_out; 00893 *ptr1++ = yb_out; 00894 00895 xd_out = (xa + yb - xc - yd); 00896 yd_out = (ya - xb - yc + xd); 00897 00898 /* writing xd' and yd' */ 00899 *ptr1++ = xd_out; 00900 *ptr1++ = yd_out; 00901 00902 00903 } while(--j); 00904 00905 /* output is in 11.21(q21) format for the 1024 point */ 00906 /* output is in 9.23(q23) format for the 256 point */ 00907 /* output is in 7.25(q25) format for the 64 point */ 00908 /* output is in 5.27(q27) format for the 16 point */ 00909 00910 /* End of last stage process */ 00911 }
Generated on Tue Jul 12 2022 12:36:53 by 1.7.2