CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Legacy Warning

This is an mbed 2 library. To learn more about mbed OS 5, visit the docs.

Committer:
mbed_official
Date:
Fri Nov 20 08:45:18 2015 +0000
Revision:
5:3762170b6d4d
Parent:
3:7a284390b0ce
Synchronized with git revision 2eb940b9a73af188d3004a2575fdfbb05febe62b

Full URL: https://github.com/mbedmicro/mbed/commit/2eb940b9a73af188d3004a2575fdfbb05febe62b/

Added option to build rpc library. closes #1426

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emilmont 1:fdd22bb7aa52 1 /* ----------------------------------------------------------------------
mbed_official 5:3762170b6d4d 2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
emilmont 1:fdd22bb7aa52 3 *
mbed_official 5:3762170b6d4d 4 * $Date: 19. March 2015
mbed_official 5:3762170b6d4d 5 * $Revision: V.1.4.5
emilmont 1:fdd22bb7aa52 6 *
emilmont 2:da51fb522205 7 * Project: CMSIS DSP Library
emilmont 2:da51fb522205 8 * Title: arm_cfft_radix4_q31.c
emilmont 1:fdd22bb7aa52 9 *
emilmont 2:da51fb522205 10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
emilmont 2:da51fb522205 11 * In-place bit reversal using bit reversal table
emilmont 1:fdd22bb7aa52 12 *
emilmont 1:fdd22bb7aa52 13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
emilmont 1:fdd22bb7aa52 14 *
mbed_official 3:7a284390b0ce 15 * Redistribution and use in source and binary forms, with or without
mbed_official 3:7a284390b0ce 16 * modification, are permitted provided that the following conditions
mbed_official 3:7a284390b0ce 17 * are met:
mbed_official 3:7a284390b0ce 18 * - Redistributions of source code must retain the above copyright
mbed_official 3:7a284390b0ce 19 * notice, this list of conditions and the following disclaimer.
mbed_official 3:7a284390b0ce 20 * - Redistributions in binary form must reproduce the above copyright
mbed_official 3:7a284390b0ce 21 * notice, this list of conditions and the following disclaimer in
mbed_official 3:7a284390b0ce 22 * the documentation and/or other materials provided with the
mbed_official 3:7a284390b0ce 23 * distribution.
mbed_official 3:7a284390b0ce 24 * - Neither the name of ARM LIMITED nor the names of its contributors
mbed_official 3:7a284390b0ce 25 * may be used to endorse or promote products derived from this
mbed_official 3:7a284390b0ce 26 * software without specific prior written permission.
mbed_official 3:7a284390b0ce 27 *
mbed_official 3:7a284390b0ce 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
mbed_official 3:7a284390b0ce 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
mbed_official 3:7a284390b0ce 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
mbed_official 3:7a284390b0ce 31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
mbed_official 3:7a284390b0ce 32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
mbed_official 3:7a284390b0ce 33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
mbed_official 3:7a284390b0ce 34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
mbed_official 3:7a284390b0ce 35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
mbed_official 3:7a284390b0ce 36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
mbed_official 3:7a284390b0ce 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
mbed_official 3:7a284390b0ce 38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
mbed_official 3:7a284390b0ce 39 * POSSIBILITY OF SUCH DAMAGE.
emilmont 1:fdd22bb7aa52 40 * -------------------------------------------------------------------- */
mbed_official 3:7a284390b0ce 41
emilmont 1:fdd22bb7aa52 42 #include "arm_math.h"
emilmont 1:fdd22bb7aa52 43
mbed_official 3:7a284390b0ce 44 void arm_radix4_butterfly_inverse_q31(
mbed_official 3:7a284390b0ce 45 q31_t * pSrc,
mbed_official 3:7a284390b0ce 46 uint32_t fftLen,
mbed_official 3:7a284390b0ce 47 q31_t * pCoef,
mbed_official 3:7a284390b0ce 48 uint32_t twidCoefModifier);
mbed_official 3:7a284390b0ce 49
mbed_official 3:7a284390b0ce 50 void arm_radix4_butterfly_q31(
mbed_official 3:7a284390b0ce 51 q31_t * pSrc,
mbed_official 3:7a284390b0ce 52 uint32_t fftLen,
mbed_official 3:7a284390b0ce 53 q31_t * pCoef,
mbed_official 3:7a284390b0ce 54 uint32_t twidCoefModifier);
mbed_official 3:7a284390b0ce 55
mbed_official 3:7a284390b0ce 56 void arm_bitreversal_q31(
mbed_official 3:7a284390b0ce 57 q31_t * pSrc,
mbed_official 3:7a284390b0ce 58 uint32_t fftLen,
mbed_official 3:7a284390b0ce 59 uint16_t bitRevFactor,
mbed_official 3:7a284390b0ce 60 uint16_t * pBitRevTab);
emilmont 1:fdd22bb7aa52 61
emilmont 1:fdd22bb7aa52 62 /**
emilmont 1:fdd22bb7aa52 63 * @ingroup groupTransforms
emilmont 1:fdd22bb7aa52 64 */
emilmont 1:fdd22bb7aa52 65
emilmont 1:fdd22bb7aa52 66 /**
mbed_official 3:7a284390b0ce 67 * @addtogroup ComplexFFT
emilmont 1:fdd22bb7aa52 68 * @{
emilmont 1:fdd22bb7aa52 69 */
emilmont 1:fdd22bb7aa52 70
emilmont 1:fdd22bb7aa52 71 /**
emilmont 1:fdd22bb7aa52 72 * @details
emilmont 1:fdd22bb7aa52 73 * @brief Processing function for the Q31 CFFT/CIFFT.
mbed_official 5:3762170b6d4d 74 * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
emilmont 1:fdd22bb7aa52 75 * @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
emilmont 1:fdd22bb7aa52 76 * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
emilmont 1:fdd22bb7aa52 77 * @return none.
emilmont 1:fdd22bb7aa52 78 *
emilmont 1:fdd22bb7aa52 79 * \par Input and output formats:
emilmont 1:fdd22bb7aa52 80 * \par
emilmont 1:fdd22bb7aa52 81 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
emilmont 1:fdd22bb7aa52 82 * Hence the output format is different for different FFT sizes.
emilmont 1:fdd22bb7aa52 83 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
emilmont 1:fdd22bb7aa52 84 * \par
emilmont 1:fdd22bb7aa52 85 * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
emilmont 1:fdd22bb7aa52 86 * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
emilmont 1:fdd22bb7aa52 87 *
emilmont 1:fdd22bb7aa52 88 */
emilmont 1:fdd22bb7aa52 89
emilmont 1:fdd22bb7aa52 90 void arm_cfft_radix4_q31(
emilmont 1:fdd22bb7aa52 91 const arm_cfft_radix4_instance_q31 * S,
emilmont 1:fdd22bb7aa52 92 q31_t * pSrc)
emilmont 1:fdd22bb7aa52 93 {
emilmont 1:fdd22bb7aa52 94 if(S->ifftFlag == 1u)
emilmont 1:fdd22bb7aa52 95 {
emilmont 1:fdd22bb7aa52 96 /* Complex IFFT radix-4 */
emilmont 1:fdd22bb7aa52 97 arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
emilmont 1:fdd22bb7aa52 98 S->twidCoefModifier);
emilmont 1:fdd22bb7aa52 99 }
emilmont 1:fdd22bb7aa52 100 else
emilmont 1:fdd22bb7aa52 101 {
emilmont 1:fdd22bb7aa52 102 /* Complex FFT radix-4 */
emilmont 1:fdd22bb7aa52 103 arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
emilmont 1:fdd22bb7aa52 104 S->twidCoefModifier);
emilmont 1:fdd22bb7aa52 105 }
emilmont 1:fdd22bb7aa52 106
emilmont 1:fdd22bb7aa52 107
emilmont 1:fdd22bb7aa52 108 if(S->bitReverseFlag == 1u)
emilmont 1:fdd22bb7aa52 109 {
emilmont 1:fdd22bb7aa52 110 /* Bit Reversal */
emilmont 1:fdd22bb7aa52 111 arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
emilmont 1:fdd22bb7aa52 112 }
emilmont 1:fdd22bb7aa52 113
emilmont 1:fdd22bb7aa52 114 }
emilmont 1:fdd22bb7aa52 115
emilmont 1:fdd22bb7aa52 116 /**
mbed_official 3:7a284390b0ce 117 * @} end of ComplexFFT group
emilmont 1:fdd22bb7aa52 118 */
emilmont 1:fdd22bb7aa52 119
emilmont 1:fdd22bb7aa52 120 /*
emilmont 1:fdd22bb7aa52 121 * Radix-4 FFT algorithm used is :
emilmont 1:fdd22bb7aa52 122 *
emilmont 1:fdd22bb7aa52 123 * Input real and imaginary data:
emilmont 1:fdd22bb7aa52 124 * x(n) = xa + j * ya
emilmont 1:fdd22bb7aa52 125 * x(n+N/4 ) = xb + j * yb
emilmont 1:fdd22bb7aa52 126 * x(n+N/2 ) = xc + j * yc
emilmont 1:fdd22bb7aa52 127 * x(n+3N 4) = xd + j * yd
emilmont 1:fdd22bb7aa52 128 *
emilmont 1:fdd22bb7aa52 129 *
emilmont 1:fdd22bb7aa52 130 * Output real and imaginary data:
emilmont 1:fdd22bb7aa52 131 * x(4r) = xa'+ j * ya'
emilmont 1:fdd22bb7aa52 132 * x(4r+1) = xb'+ j * yb'
emilmont 1:fdd22bb7aa52 133 * x(4r+2) = xc'+ j * yc'
emilmont 1:fdd22bb7aa52 134 * x(4r+3) = xd'+ j * yd'
emilmont 1:fdd22bb7aa52 135 *
emilmont 1:fdd22bb7aa52 136 *
emilmont 1:fdd22bb7aa52 137 * Twiddle factors for radix-4 FFT:
emilmont 1:fdd22bb7aa52 138 * Wn = co1 + j * (- si1)
emilmont 1:fdd22bb7aa52 139 * W2n = co2 + j * (- si2)
emilmont 1:fdd22bb7aa52 140 * W3n = co3 + j * (- si3)
emilmont 1:fdd22bb7aa52 141 *
emilmont 1:fdd22bb7aa52 142 * Butterfly implementation:
emilmont 1:fdd22bb7aa52 143 * xa' = xa + xb + xc + xd
emilmont 1:fdd22bb7aa52 144 * ya' = ya + yb + yc + yd
emilmont 1:fdd22bb7aa52 145 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
emilmont 1:fdd22bb7aa52 146 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
emilmont 1:fdd22bb7aa52 147 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
emilmont 1:fdd22bb7aa52 148 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
emilmont 1:fdd22bb7aa52 149 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
emilmont 1:fdd22bb7aa52 150 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
emilmont 1:fdd22bb7aa52 151 *
emilmont 1:fdd22bb7aa52 152 */
emilmont 1:fdd22bb7aa52 153
emilmont 1:fdd22bb7aa52 154 /**
emilmont 1:fdd22bb7aa52 155 * @brief Core function for the Q31 CFFT butterfly process.
emilmont 1:fdd22bb7aa52 156 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
emilmont 1:fdd22bb7aa52 157 * @param[in] fftLen length of the FFT.
emilmont 1:fdd22bb7aa52 158 * @param[in] *pCoef points to twiddle coefficient buffer.
emilmont 1:fdd22bb7aa52 159 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emilmont 1:fdd22bb7aa52 160 * @return none.
emilmont 1:fdd22bb7aa52 161 */
emilmont 1:fdd22bb7aa52 162
emilmont 1:fdd22bb7aa52 163 void arm_radix4_butterfly_q31(
emilmont 1:fdd22bb7aa52 164 q31_t * pSrc,
emilmont 1:fdd22bb7aa52 165 uint32_t fftLen,
emilmont 1:fdd22bb7aa52 166 q31_t * pCoef,
emilmont 1:fdd22bb7aa52 167 uint32_t twidCoefModifier)
emilmont 1:fdd22bb7aa52 168 {
mbed_official 5:3762170b6d4d 169 #if defined(ARM_MATH_CM7)
emilmont 1:fdd22bb7aa52 170 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 171 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
emilmont 1:fdd22bb7aa52 172
emilmont 1:fdd22bb7aa52 173 q31_t xa, xb, xc, xd;
emilmont 1:fdd22bb7aa52 174 q31_t ya, yb, yc, yd;
emilmont 1:fdd22bb7aa52 175 q31_t xa_out, xb_out, xc_out, xd_out;
emilmont 1:fdd22bb7aa52 176 q31_t ya_out, yb_out, yc_out, yd_out;
emilmont 1:fdd22bb7aa52 177
emilmont 1:fdd22bb7aa52 178 q31_t *ptr1;
emilmont 1:fdd22bb7aa52 179 q63_t xaya, xbyb, xcyc, xdyd;
emilmont 1:fdd22bb7aa52 180 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 181
emilmont 1:fdd22bb7aa52 182 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 183
emilmont 1:fdd22bb7aa52 184
emilmont 1:fdd22bb7aa52 185 /* start of first stage process */
emilmont 1:fdd22bb7aa52 186
emilmont 1:fdd22bb7aa52 187 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 188 n2 = fftLen;
emilmont 1:fdd22bb7aa52 189 n1 = n2;
emilmont 1:fdd22bb7aa52 190 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 191 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 192 i0 = 0u;
emilmont 1:fdd22bb7aa52 193 ia1 = 0u;
emilmont 1:fdd22bb7aa52 194
emilmont 1:fdd22bb7aa52 195 j = n2;
emilmont 1:fdd22bb7aa52 196
emilmont 1:fdd22bb7aa52 197 /* Calculation of first stage */
emilmont 1:fdd22bb7aa52 198 do
emilmont 1:fdd22bb7aa52 199 {
emilmont 1:fdd22bb7aa52 200 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 201 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 202 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 203 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 204 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 205
emilmont 1:fdd22bb7aa52 206 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
emilmont 1:fdd22bb7aa52 207
emilmont 1:fdd22bb7aa52 208 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 209 /* xa + xc */
emilmont 1:fdd22bb7aa52 210 r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
emilmont 1:fdd22bb7aa52 211 /* xa - xc */
emilmont 1:fdd22bb7aa52 212 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
emilmont 1:fdd22bb7aa52 213
emilmont 1:fdd22bb7aa52 214 /* xb + xd */
emilmont 1:fdd22bb7aa52 215 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 216
emilmont 1:fdd22bb7aa52 217 /* ya + yc */
emilmont 1:fdd22bb7aa52 218 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 219 /* ya - yc */
emilmont 1:fdd22bb7aa52 220 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 221
emilmont 1:fdd22bb7aa52 222 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 223 pSrc[2u * i0] = (r1 + t1);
emilmont 1:fdd22bb7aa52 224 /* (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 225 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 226 /* yb + yd */
emilmont 1:fdd22bb7aa52 227 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 228
emilmont 1:fdd22bb7aa52 229 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 230 pSrc[(2u * i0) + 1u] = (s1 + t2);
emilmont 1:fdd22bb7aa52 231
emilmont 1:fdd22bb7aa52 232 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 233 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 234
emilmont 1:fdd22bb7aa52 235 /* yb - yd */
emilmont 1:fdd22bb7aa52 236 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 237 /* xb - xd */
emilmont 1:fdd22bb7aa52 238 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 239
emilmont 1:fdd22bb7aa52 240 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 241 ia2 = 2u * ia1;
emilmont 1:fdd22bb7aa52 242 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 243 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 244
emilmont 1:fdd22bb7aa52 245 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 246 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
emilmont 1:fdd22bb7aa52 247 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 248
emilmont 1:fdd22bb7aa52 249 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 250 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
emilmont 1:fdd22bb7aa52 251 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 252
emilmont 1:fdd22bb7aa52 253 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 254 r1 = r2 + t1;
emilmont 1:fdd22bb7aa52 255 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 256 r2 = r2 - t1;
emilmont 1:fdd22bb7aa52 257
emilmont 1:fdd22bb7aa52 258 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 259 s1 = s2 - t2;
emilmont 1:fdd22bb7aa52 260 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 261 s2 = s2 + t2;
emilmont 1:fdd22bb7aa52 262
emilmont 1:fdd22bb7aa52 263 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 264 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 265
emilmont 1:fdd22bb7aa52 266 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 267 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 268 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 269
emilmont 1:fdd22bb7aa52 270 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 271 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 272 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 273
emilmont 1:fdd22bb7aa52 274 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 275 ia3 = 3u * ia1;
emilmont 1:fdd22bb7aa52 276 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 277 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 278
emilmont 1:fdd22bb7aa52 279 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 280 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 281 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 282
emilmont 1:fdd22bb7aa52 283 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 284 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 285 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 286
emilmont 1:fdd22bb7aa52 287 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 288 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 289
emilmont 1:fdd22bb7aa52 290 /* Updating input index */
emilmont 1:fdd22bb7aa52 291 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 292
emilmont 1:fdd22bb7aa52 293 } while(--j);
emilmont 1:fdd22bb7aa52 294
emilmont 1:fdd22bb7aa52 295 /* end of first stage process */
emilmont 1:fdd22bb7aa52 296
emilmont 1:fdd22bb7aa52 297 /* data is in 5.27(q27) format */
emilmont 1:fdd22bb7aa52 298
emilmont 1:fdd22bb7aa52 299
emilmont 1:fdd22bb7aa52 300 /* start of Middle stages process */
emilmont 1:fdd22bb7aa52 301
emilmont 1:fdd22bb7aa52 302
emilmont 1:fdd22bb7aa52 303 /* each stage in middle stages provides two down scaling of the input */
emilmont 1:fdd22bb7aa52 304
emilmont 1:fdd22bb7aa52 305 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 306
emilmont 1:fdd22bb7aa52 307
emilmont 1:fdd22bb7aa52 308 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 309 {
emilmont 1:fdd22bb7aa52 310 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 311 n1 = n2;
emilmont 1:fdd22bb7aa52 312 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 313 ia1 = 0u;
emilmont 1:fdd22bb7aa52 314
emilmont 1:fdd22bb7aa52 315 /* Calculation of first stage */
emilmont 1:fdd22bb7aa52 316 for (j = 0u; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 317 {
emilmont 1:fdd22bb7aa52 318 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 319 ia2 = ia1 + ia1;
emilmont 1:fdd22bb7aa52 320 ia3 = ia2 + ia1;
emilmont 1:fdd22bb7aa52 321 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 322 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 323 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 324 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 325 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 326 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 327 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 328 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 329
emilmont 1:fdd22bb7aa52 330 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 331 {
emilmont 1:fdd22bb7aa52 332 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 333 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 334 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 335 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 336 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 337
emilmont 1:fdd22bb7aa52 338 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 339 /* xa + xc */
emilmont 1:fdd22bb7aa52 340 r1 = pSrc[2u * i0] + pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 341 /* xa - xc */
emilmont 1:fdd22bb7aa52 342 r2 = pSrc[2u * i0] - pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 343
emilmont 1:fdd22bb7aa52 344 /* ya + yc */
emilmont 1:fdd22bb7aa52 345 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 346 /* ya - yc */
emilmont 1:fdd22bb7aa52 347 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 348
emilmont 1:fdd22bb7aa52 349 /* xb + xd */
emilmont 1:fdd22bb7aa52 350 t1 = pSrc[2u * i1] + pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 351
emilmont 1:fdd22bb7aa52 352 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 353 pSrc[2u * i0] = (r1 + t1) >> 2u;
emilmont 1:fdd22bb7aa52 354 /* xa + xc -(xb + xd) */
emilmont 1:fdd22bb7aa52 355 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 356
emilmont 1:fdd22bb7aa52 357 /* yb + yd */
emilmont 1:fdd22bb7aa52 358 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 359 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 360 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
emilmont 1:fdd22bb7aa52 361
emilmont 1:fdd22bb7aa52 362 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 363 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 364
emilmont 1:fdd22bb7aa52 365 /* (yb - yd) */
emilmont 1:fdd22bb7aa52 366 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 367 /* (xb - xd) */
emilmont 1:fdd22bb7aa52 368 t2 = pSrc[2u * i1] - pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 369
emilmont 1:fdd22bb7aa52 370 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 371 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
emilmont 1:fdd22bb7aa52 372 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 373
emilmont 1:fdd22bb7aa52 374 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 375 pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
emilmont 1:fdd22bb7aa52 376 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 377
emilmont 1:fdd22bb7aa52 378 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 379 r1 = r2 + t1;
emilmont 1:fdd22bb7aa52 380 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 381 r2 = r2 - t1;
emilmont 1:fdd22bb7aa52 382
emilmont 1:fdd22bb7aa52 383 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 384 s1 = s2 - t2;
emilmont 1:fdd22bb7aa52 385 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 386 s2 = s2 + t2;
emilmont 1:fdd22bb7aa52 387
emilmont 1:fdd22bb7aa52 388 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 389 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 390 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 391
emilmont 1:fdd22bb7aa52 392 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 393 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 394 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 395
emilmont 1:fdd22bb7aa52 396 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 397 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 398 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 399
emilmont 1:fdd22bb7aa52 400 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 401 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 402 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 403 }
emilmont 1:fdd22bb7aa52 404 }
emilmont 1:fdd22bb7aa52 405 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 406 }
mbed_official 5:3762170b6d4d 407 #else
mbed_official 5:3762170b6d4d 408 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k;
mbed_official 5:3762170b6d4d 409 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
mbed_official 5:3762170b6d4d 410
mbed_official 5:3762170b6d4d 411 q31_t xa, xb, xc, xd;
mbed_official 5:3762170b6d4d 412 q31_t ya, yb, yc, yd;
mbed_official 5:3762170b6d4d 413 q31_t xa_out, xb_out, xc_out, xd_out;
mbed_official 5:3762170b6d4d 414 q31_t ya_out, yb_out, yc_out, yd_out;
mbed_official 5:3762170b6d4d 415
mbed_official 5:3762170b6d4d 416 q31_t *ptr1;
mbed_official 5:3762170b6d4d 417 q31_t *pSi0;
mbed_official 5:3762170b6d4d 418 q31_t *pSi1;
mbed_official 5:3762170b6d4d 419 q31_t *pSi2;
mbed_official 5:3762170b6d4d 420 q31_t *pSi3;
mbed_official 5:3762170b6d4d 421 q63_t xaya, xbyb, xcyc, xdyd;
mbed_official 5:3762170b6d4d 422 /* Total process is divided into three stages */
mbed_official 5:3762170b6d4d 423
mbed_official 5:3762170b6d4d 424 /* process first stage, middle stages, & last stage */
mbed_official 5:3762170b6d4d 425
mbed_official 5:3762170b6d4d 426
mbed_official 5:3762170b6d4d 427 /* start of first stage process */
mbed_official 5:3762170b6d4d 428
mbed_official 5:3762170b6d4d 429 /* Initializations for the first stage */
mbed_official 5:3762170b6d4d 430 n2 = fftLen;
mbed_official 5:3762170b6d4d 431 n1 = n2;
mbed_official 5:3762170b6d4d 432 /* n2 = fftLen/4 */
mbed_official 5:3762170b6d4d 433 n2 >>= 2u;
mbed_official 5:3762170b6d4d 434
mbed_official 5:3762170b6d4d 435 ia1 = 0u;
mbed_official 5:3762170b6d4d 436
mbed_official 5:3762170b6d4d 437 j = n2;
mbed_official 5:3762170b6d4d 438
mbed_official 5:3762170b6d4d 439 pSi0 = pSrc;
mbed_official 5:3762170b6d4d 440 pSi1 = pSi0 + 2 * n2;
mbed_official 5:3762170b6d4d 441 pSi2 = pSi1 + 2 * n2;
mbed_official 5:3762170b6d4d 442 pSi3 = pSi2 + 2 * n2;
mbed_official 5:3762170b6d4d 443
mbed_official 5:3762170b6d4d 444 /* Calculation of first stage */
mbed_official 5:3762170b6d4d 445 do
mbed_official 5:3762170b6d4d 446 {
mbed_official 5:3762170b6d4d 447 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
mbed_official 5:3762170b6d4d 448
mbed_official 5:3762170b6d4d 449 /* Butterfly implementation */
mbed_official 5:3762170b6d4d 450 /* xa + xc */
mbed_official 5:3762170b6d4d 451 r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u);
mbed_official 5:3762170b6d4d 452 /* xa - xc */
mbed_official 5:3762170b6d4d 453 r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u);
mbed_official 5:3762170b6d4d 454
mbed_official 5:3762170b6d4d 455 /* xb + xd */
mbed_official 5:3762170b6d4d 456 t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u);
mbed_official 5:3762170b6d4d 457
mbed_official 5:3762170b6d4d 458 /* ya + yc */
mbed_official 5:3762170b6d4d 459 s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u);
mbed_official 5:3762170b6d4d 460 /* ya - yc */
mbed_official 5:3762170b6d4d 461 s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u);
mbed_official 5:3762170b6d4d 462
mbed_official 5:3762170b6d4d 463 /* xa' = xa + xb + xc + xd */
mbed_official 5:3762170b6d4d 464 *pSi0++ = (r1 + t1);
mbed_official 5:3762170b6d4d 465 /* (xa + xc) - (xb + xd) */
mbed_official 5:3762170b6d4d 466 r1 = r1 - t1;
mbed_official 5:3762170b6d4d 467 /* yb + yd */
mbed_official 5:3762170b6d4d 468 t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u);
mbed_official 5:3762170b6d4d 469
mbed_official 5:3762170b6d4d 470 /* ya' = ya + yb + yc + yd */
mbed_official 5:3762170b6d4d 471 *pSi0++ = (s1 + t2);
mbed_official 5:3762170b6d4d 472
mbed_official 5:3762170b6d4d 473 /* (ya + yc) - (yb + yd) */
mbed_official 5:3762170b6d4d 474 s1 = s1 - t2;
mbed_official 5:3762170b6d4d 475
mbed_official 5:3762170b6d4d 476 /* yb - yd */
mbed_official 5:3762170b6d4d 477 t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u);
mbed_official 5:3762170b6d4d 478 /* xb - xd */
mbed_official 5:3762170b6d4d 479 t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u);
mbed_official 5:3762170b6d4d 480
mbed_official 5:3762170b6d4d 481 /* index calculation for the coefficients */
mbed_official 5:3762170b6d4d 482 ia2 = 2u * ia1;
mbed_official 5:3762170b6d4d 483 co2 = pCoef[ia2 * 2u];
mbed_official 5:3762170b6d4d 484 si2 = pCoef[(ia2 * 2u) + 1u];
mbed_official 5:3762170b6d4d 485
mbed_official 5:3762170b6d4d 486 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
mbed_official 5:3762170b6d4d 487 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
mbed_official 5:3762170b6d4d 488 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 489
mbed_official 5:3762170b6d4d 490 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
mbed_official 5:3762170b6d4d 491 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
mbed_official 5:3762170b6d4d 492 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 493
mbed_official 5:3762170b6d4d 494 /* (xa - xc) + (yb - yd) */
mbed_official 5:3762170b6d4d 495 r1 = r2 + t1;
mbed_official 5:3762170b6d4d 496 /* (xa - xc) - (yb - yd) */
mbed_official 5:3762170b6d4d 497 r2 = r2 - t1;
mbed_official 5:3762170b6d4d 498
mbed_official 5:3762170b6d4d 499 /* (ya - yc) - (xb - xd) */
mbed_official 5:3762170b6d4d 500 s1 = s2 - t2;
mbed_official 5:3762170b6d4d 501 /* (ya - yc) + (xb - xd) */
mbed_official 5:3762170b6d4d 502 s2 = s2 + t2;
mbed_official 5:3762170b6d4d 503
mbed_official 5:3762170b6d4d 504 co1 = pCoef[ia1 * 2u];
mbed_official 5:3762170b6d4d 505 si1 = pCoef[(ia1 * 2u) + 1u];
mbed_official 5:3762170b6d4d 506
mbed_official 5:3762170b6d4d 507 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
mbed_official 5:3762170b6d4d 508 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
mbed_official 5:3762170b6d4d 509 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 510
mbed_official 5:3762170b6d4d 511 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
mbed_official 5:3762170b6d4d 512 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
mbed_official 5:3762170b6d4d 513 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 514
mbed_official 5:3762170b6d4d 515 /* index calculation for the coefficients */
mbed_official 5:3762170b6d4d 516 ia3 = 3u * ia1;
mbed_official 5:3762170b6d4d 517 co3 = pCoef[ia3 * 2u];
mbed_official 5:3762170b6d4d 518 si3 = pCoef[(ia3 * 2u) + 1u];
mbed_official 5:3762170b6d4d 519
mbed_official 5:3762170b6d4d 520 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
mbed_official 5:3762170b6d4d 521 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
mbed_official 5:3762170b6d4d 522 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 523
mbed_official 5:3762170b6d4d 524 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
mbed_official 5:3762170b6d4d 525 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
mbed_official 5:3762170b6d4d 526 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 527
mbed_official 5:3762170b6d4d 528 /* Twiddle coefficients index modifier */
mbed_official 5:3762170b6d4d 529 ia1 = ia1 + twidCoefModifier;
mbed_official 5:3762170b6d4d 530
mbed_official 5:3762170b6d4d 531 } while(--j);
mbed_official 5:3762170b6d4d 532
mbed_official 5:3762170b6d4d 533 /* end of first stage process */
mbed_official 5:3762170b6d4d 534
mbed_official 5:3762170b6d4d 535 /* data is in 5.27(q27) format */
mbed_official 5:3762170b6d4d 536
mbed_official 5:3762170b6d4d 537
mbed_official 5:3762170b6d4d 538 /* start of Middle stages process */
mbed_official 5:3762170b6d4d 539
mbed_official 5:3762170b6d4d 540
mbed_official 5:3762170b6d4d 541 /* each stage in middle stages provides two down scaling of the input */
mbed_official 5:3762170b6d4d 542
mbed_official 5:3762170b6d4d 543 twidCoefModifier <<= 2u;
mbed_official 5:3762170b6d4d 544
mbed_official 5:3762170b6d4d 545
mbed_official 5:3762170b6d4d 546 for (k = fftLen / 4u; k > 4u; k >>= 2u)
mbed_official 5:3762170b6d4d 547 {
mbed_official 5:3762170b6d4d 548 /* Initializations for the first stage */
mbed_official 5:3762170b6d4d 549 n1 = n2;
mbed_official 5:3762170b6d4d 550 n2 >>= 2u;
mbed_official 5:3762170b6d4d 551 ia1 = 0u;
mbed_official 5:3762170b6d4d 552
mbed_official 5:3762170b6d4d 553 /* Calculation of first stage */
mbed_official 5:3762170b6d4d 554 for (j = 0u; j <= (n2 - 1u); j++)
mbed_official 5:3762170b6d4d 555 {
mbed_official 5:3762170b6d4d 556 /* index calculation for the coefficients */
mbed_official 5:3762170b6d4d 557 ia2 = ia1 + ia1;
mbed_official 5:3762170b6d4d 558 ia3 = ia2 + ia1;
mbed_official 5:3762170b6d4d 559 co1 = pCoef[ia1 * 2u];
mbed_official 5:3762170b6d4d 560 si1 = pCoef[(ia1 * 2u) + 1u];
mbed_official 5:3762170b6d4d 561 co2 = pCoef[ia2 * 2u];
mbed_official 5:3762170b6d4d 562 si2 = pCoef[(ia2 * 2u) + 1u];
mbed_official 5:3762170b6d4d 563 co3 = pCoef[ia3 * 2u];
mbed_official 5:3762170b6d4d 564 si3 = pCoef[(ia3 * 2u) + 1u];
mbed_official 5:3762170b6d4d 565 /* Twiddle coefficients index modifier */
mbed_official 5:3762170b6d4d 566 ia1 = ia1 + twidCoefModifier;
mbed_official 5:3762170b6d4d 567
mbed_official 5:3762170b6d4d 568 pSi0 = pSrc + 2 * j;
mbed_official 5:3762170b6d4d 569 pSi1 = pSi0 + 2 * n2;
mbed_official 5:3762170b6d4d 570 pSi2 = pSi1 + 2 * n2;
mbed_official 5:3762170b6d4d 571 pSi3 = pSi2 + 2 * n2;
mbed_official 5:3762170b6d4d 572
mbed_official 5:3762170b6d4d 573 for (i0 = j; i0 < fftLen; i0 += n1)
mbed_official 5:3762170b6d4d 574 {
mbed_official 5:3762170b6d4d 575 /* Butterfly implementation */
mbed_official 5:3762170b6d4d 576 /* xa + xc */
mbed_official 5:3762170b6d4d 577 r1 = pSi0[0] + pSi2[0];
mbed_official 5:3762170b6d4d 578
mbed_official 5:3762170b6d4d 579 /* xa - xc */
mbed_official 5:3762170b6d4d 580 r2 = pSi0[0] - pSi2[0];
mbed_official 5:3762170b6d4d 581
mbed_official 5:3762170b6d4d 582
mbed_official 5:3762170b6d4d 583 /* ya + yc */
mbed_official 5:3762170b6d4d 584 s1 = pSi0[1] + pSi2[1];
mbed_official 5:3762170b6d4d 585
mbed_official 5:3762170b6d4d 586 /* ya - yc */
mbed_official 5:3762170b6d4d 587 s2 = pSi0[1] - pSi2[1];
mbed_official 5:3762170b6d4d 588
mbed_official 5:3762170b6d4d 589
mbed_official 5:3762170b6d4d 590 /* xb + xd */
mbed_official 5:3762170b6d4d 591 t1 = pSi1[0] + pSi3[0];
mbed_official 5:3762170b6d4d 592
mbed_official 5:3762170b6d4d 593
mbed_official 5:3762170b6d4d 594 /* xa' = xa + xb + xc + xd */
mbed_official 5:3762170b6d4d 595 pSi0[0] = (r1 + t1) >> 2u;
mbed_official 5:3762170b6d4d 596 /* xa + xc -(xb + xd) */
mbed_official 5:3762170b6d4d 597 r1 = r1 - t1;
mbed_official 5:3762170b6d4d 598
mbed_official 5:3762170b6d4d 599 /* yb + yd */
mbed_official 5:3762170b6d4d 600 t2 = pSi1[1] + pSi3[1];
mbed_official 5:3762170b6d4d 601
mbed_official 5:3762170b6d4d 602 /* ya' = ya + yb + yc + yd */
mbed_official 5:3762170b6d4d 603 pSi0[1] = (s1 + t2) >> 2u;
mbed_official 5:3762170b6d4d 604 pSi0 += 2 * n1;
mbed_official 5:3762170b6d4d 605
mbed_official 5:3762170b6d4d 606 /* (ya + yc) - (yb + yd) */
mbed_official 5:3762170b6d4d 607 s1 = s1 - t2;
mbed_official 5:3762170b6d4d 608
mbed_official 5:3762170b6d4d 609 /* (yb - yd) */
mbed_official 5:3762170b6d4d 610 t1 = pSi1[1] - pSi3[1];
mbed_official 5:3762170b6d4d 611
mbed_official 5:3762170b6d4d 612 /* (xb - xd) */
mbed_official 5:3762170b6d4d 613 t2 = pSi1[0] - pSi3[0];
mbed_official 5:3762170b6d4d 614
mbed_official 5:3762170b6d4d 615
mbed_official 5:3762170b6d4d 616 /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
mbed_official 5:3762170b6d4d 617 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
mbed_official 5:3762170b6d4d 618 ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 619
mbed_official 5:3762170b6d4d 620 /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
mbed_official 5:3762170b6d4d 621 pSi1[1] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
mbed_official 5:3762170b6d4d 622 ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 623 pSi1 += 2 * n1;
mbed_official 5:3762170b6d4d 624
mbed_official 5:3762170b6d4d 625 /* (xa - xc) + (yb - yd) */
mbed_official 5:3762170b6d4d 626 r1 = r2 + t1;
mbed_official 5:3762170b6d4d 627 /* (xa - xc) - (yb - yd) */
mbed_official 5:3762170b6d4d 628 r2 = r2 - t1;
mbed_official 5:3762170b6d4d 629
mbed_official 5:3762170b6d4d 630 /* (ya - yc) - (xb - xd) */
mbed_official 5:3762170b6d4d 631 s1 = s2 - t2;
mbed_official 5:3762170b6d4d 632 /* (ya - yc) + (xb - xd) */
mbed_official 5:3762170b6d4d 633 s2 = s2 + t2;
mbed_official 5:3762170b6d4d 634
mbed_official 5:3762170b6d4d 635 /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
mbed_official 5:3762170b6d4d 636 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
mbed_official 5:3762170b6d4d 637 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 638
mbed_official 5:3762170b6d4d 639 /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
mbed_official 5:3762170b6d4d 640 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
mbed_official 5:3762170b6d4d 641 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 642 pSi2 += 2 * n1;
mbed_official 5:3762170b6d4d 643
mbed_official 5:3762170b6d4d 644 /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
mbed_official 5:3762170b6d4d 645 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
mbed_official 5:3762170b6d4d 646 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 647
mbed_official 5:3762170b6d4d 648 /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
mbed_official 5:3762170b6d4d 649 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
mbed_official 5:3762170b6d4d 650 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 651 pSi3 += 2 * n1;
mbed_official 5:3762170b6d4d 652 }
mbed_official 5:3762170b6d4d 653 }
mbed_official 5:3762170b6d4d 654 twidCoefModifier <<= 2u;
mbed_official 5:3762170b6d4d 655 }
mbed_official 5:3762170b6d4d 656 #endif
emilmont 1:fdd22bb7aa52 657
emilmont 1:fdd22bb7aa52 658 /* End of Middle stages process */
emilmont 1:fdd22bb7aa52 659
emilmont 1:fdd22bb7aa52 660 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
emilmont 1:fdd22bb7aa52 661 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
emilmont 1:fdd22bb7aa52 662 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
emilmont 1:fdd22bb7aa52 663 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
emilmont 1:fdd22bb7aa52 664
emilmont 1:fdd22bb7aa52 665
emilmont 1:fdd22bb7aa52 666 /* start of Last stage process */
emilmont 1:fdd22bb7aa52 667 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 668 j = fftLen >> 2;
emilmont 1:fdd22bb7aa52 669 ptr1 = &pSrc[0];
emilmont 1:fdd22bb7aa52 670
emilmont 1:fdd22bb7aa52 671 /* Calculations of last stage */
emilmont 1:fdd22bb7aa52 672 do
emilmont 1:fdd22bb7aa52 673 {
emilmont 1:fdd22bb7aa52 674
emilmont 1:fdd22bb7aa52 675 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 676
emilmont 1:fdd22bb7aa52 677 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 678 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 679 xa = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 680 ya = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 681
emilmont 1:fdd22bb7aa52 682 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 683 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 684 xb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 685 yb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 686
emilmont 1:fdd22bb7aa52 687 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 688 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 689 xc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 690 yc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 691
emilmont 1:fdd22bb7aa52 692 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 693 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 694 xd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 695 yd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 696
emilmont 1:fdd22bb7aa52 697 #else
emilmont 1:fdd22bb7aa52 698
emilmont 1:fdd22bb7aa52 699 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 700 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 701 ya = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 702 xa = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 703
emilmont 1:fdd22bb7aa52 704 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 705 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 706 yb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 707 xb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 708
emilmont 1:fdd22bb7aa52 709 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 710 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 711 yc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 712 xc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 713
emilmont 1:fdd22bb7aa52 714 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 715 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 716 yd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 717 xd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 718
emilmont 1:fdd22bb7aa52 719
emilmont 1:fdd22bb7aa52 720 #endif
emilmont 1:fdd22bb7aa52 721
emilmont 1:fdd22bb7aa52 722 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 723 xa_out = xa + xb + xc + xd;
emilmont 1:fdd22bb7aa52 724
emilmont 1:fdd22bb7aa52 725 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 726 ya_out = ya + yb + yc + yd;
emilmont 1:fdd22bb7aa52 727
emilmont 1:fdd22bb7aa52 728 /* pointer updation for writing */
emilmont 1:fdd22bb7aa52 729 ptr1 = ptr1 - 8u;
emilmont 1:fdd22bb7aa52 730
emilmont 1:fdd22bb7aa52 731 /* writing xa' and ya' */
emilmont 1:fdd22bb7aa52 732 *ptr1++ = xa_out;
emilmont 1:fdd22bb7aa52 733 *ptr1++ = ya_out;
emilmont 1:fdd22bb7aa52 734
emilmont 1:fdd22bb7aa52 735 xc_out = (xa - xb + xc - xd);
emilmont 1:fdd22bb7aa52 736 yc_out = (ya - yb + yc - yd);
emilmont 1:fdd22bb7aa52 737
emilmont 1:fdd22bb7aa52 738 /* writing xc' and yc' */
emilmont 1:fdd22bb7aa52 739 *ptr1++ = xc_out;
emilmont 1:fdd22bb7aa52 740 *ptr1++ = yc_out;
emilmont 1:fdd22bb7aa52 741
emilmont 1:fdd22bb7aa52 742 xb_out = (xa + yb - xc - yd);
emilmont 1:fdd22bb7aa52 743 yb_out = (ya - xb - yc + xd);
emilmont 1:fdd22bb7aa52 744
emilmont 1:fdd22bb7aa52 745 /* writing xb' and yb' */
emilmont 1:fdd22bb7aa52 746 *ptr1++ = xb_out;
emilmont 1:fdd22bb7aa52 747 *ptr1++ = yb_out;
emilmont 1:fdd22bb7aa52 748
emilmont 1:fdd22bb7aa52 749 xd_out = (xa - yb - xc + yd);
emilmont 1:fdd22bb7aa52 750 yd_out = (ya + xb - yc - xd);
emilmont 1:fdd22bb7aa52 751
emilmont 1:fdd22bb7aa52 752 /* writing xd' and yd' */
emilmont 1:fdd22bb7aa52 753 *ptr1++ = xd_out;
emilmont 1:fdd22bb7aa52 754 *ptr1++ = yd_out;
emilmont 1:fdd22bb7aa52 755
emilmont 1:fdd22bb7aa52 756
emilmont 1:fdd22bb7aa52 757 } while(--j);
emilmont 1:fdd22bb7aa52 758
emilmont 1:fdd22bb7aa52 759 /* output is in 11.21(q21) format for the 1024 point */
emilmont 1:fdd22bb7aa52 760 /* output is in 9.23(q23) format for the 256 point */
emilmont 1:fdd22bb7aa52 761 /* output is in 7.25(q25) format for the 64 point */
emilmont 1:fdd22bb7aa52 762 /* output is in 5.27(q27) format for the 16 point */
emilmont 1:fdd22bb7aa52 763
emilmont 1:fdd22bb7aa52 764 /* End of last stage process */
emilmont 1:fdd22bb7aa52 765
emilmont 1:fdd22bb7aa52 766 }
emilmont 1:fdd22bb7aa52 767
emilmont 1:fdd22bb7aa52 768
emilmont 1:fdd22bb7aa52 769 /**
emilmont 1:fdd22bb7aa52 770 * @brief Core function for the Q31 CIFFT butterfly process.
emilmont 1:fdd22bb7aa52 771 * @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
emilmont 1:fdd22bb7aa52 772 * @param[in] fftLen length of the FFT.
emilmont 1:fdd22bb7aa52 773 * @param[in] *pCoef points to twiddle coefficient buffer.
emilmont 1:fdd22bb7aa52 774 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emilmont 1:fdd22bb7aa52 775 * @return none.
emilmont 1:fdd22bb7aa52 776 */
emilmont 1:fdd22bb7aa52 777
emilmont 1:fdd22bb7aa52 778
emilmont 1:fdd22bb7aa52 779 /*
emilmont 1:fdd22bb7aa52 780 * Radix-4 IFFT algorithm used is :
emilmont 1:fdd22bb7aa52 781 *
emilmont 1:fdd22bb7aa52 782 * CIFFT uses same twiddle coefficients as CFFT Function
emilmont 1:fdd22bb7aa52 783 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
emilmont 1:fdd22bb7aa52 784 *
emilmont 1:fdd22bb7aa52 785 *
emilmont 1:fdd22bb7aa52 786 * IFFT is implemented with following changes in equations from FFT
emilmont 1:fdd22bb7aa52 787 *
emilmont 1:fdd22bb7aa52 788 * Input real and imaginary data:
emilmont 1:fdd22bb7aa52 789 * x(n) = xa + j * ya
emilmont 1:fdd22bb7aa52 790 * x(n+N/4 ) = xb + j * yb
emilmont 1:fdd22bb7aa52 791 * x(n+N/2 ) = xc + j * yc
emilmont 1:fdd22bb7aa52 792 * x(n+3N 4) = xd + j * yd
emilmont 1:fdd22bb7aa52 793 *
emilmont 1:fdd22bb7aa52 794 *
emilmont 1:fdd22bb7aa52 795 * Output real and imaginary data:
emilmont 1:fdd22bb7aa52 796 * x(4r) = xa'+ j * ya'
emilmont 1:fdd22bb7aa52 797 * x(4r+1) = xb'+ j * yb'
emilmont 1:fdd22bb7aa52 798 * x(4r+2) = xc'+ j * yc'
emilmont 1:fdd22bb7aa52 799 * x(4r+3) = xd'+ j * yd'
emilmont 1:fdd22bb7aa52 800 *
emilmont 1:fdd22bb7aa52 801 *
emilmont 1:fdd22bb7aa52 802 * Twiddle factors for radix-4 IFFT:
emilmont 1:fdd22bb7aa52 803 * Wn = co1 + j * (si1)
emilmont 1:fdd22bb7aa52 804 * W2n = co2 + j * (si2)
emilmont 1:fdd22bb7aa52 805 * W3n = co3 + j * (si3)
emilmont 1:fdd22bb7aa52 806
emilmont 1:fdd22bb7aa52 807 * The real and imaginary output values for the radix-4 butterfly are
emilmont 1:fdd22bb7aa52 808 * xa' = xa + xb + xc + xd
emilmont 1:fdd22bb7aa52 809 * ya' = ya + yb + yc + yd
emilmont 1:fdd22bb7aa52 810 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
emilmont 1:fdd22bb7aa52 811 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
emilmont 1:fdd22bb7aa52 812 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
emilmont 1:fdd22bb7aa52 813 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
emilmont 1:fdd22bb7aa52 814 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
emilmont 1:fdd22bb7aa52 815 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
emilmont 1:fdd22bb7aa52 816 *
emilmont 1:fdd22bb7aa52 817 */
emilmont 1:fdd22bb7aa52 818
emilmont 1:fdd22bb7aa52 819 void arm_radix4_butterfly_inverse_q31(
emilmont 1:fdd22bb7aa52 820 q31_t * pSrc,
emilmont 1:fdd22bb7aa52 821 uint32_t fftLen,
emilmont 1:fdd22bb7aa52 822 q31_t * pCoef,
emilmont 1:fdd22bb7aa52 823 uint32_t twidCoefModifier)
emilmont 1:fdd22bb7aa52 824 {
mbed_official 5:3762170b6d4d 825 #if defined(ARM_MATH_CM7)
emilmont 1:fdd22bb7aa52 826 uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 827 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
emilmont 1:fdd22bb7aa52 828 q31_t xa, xb, xc, xd;
emilmont 1:fdd22bb7aa52 829 q31_t ya, yb, yc, yd;
emilmont 1:fdd22bb7aa52 830 q31_t xa_out, xb_out, xc_out, xd_out;
emilmont 1:fdd22bb7aa52 831 q31_t ya_out, yb_out, yc_out, yd_out;
emilmont 1:fdd22bb7aa52 832
emilmont 1:fdd22bb7aa52 833 q31_t *ptr1;
emilmont 1:fdd22bb7aa52 834 q63_t xaya, xbyb, xcyc, xdyd;
emilmont 1:fdd22bb7aa52 835
emilmont 1:fdd22bb7aa52 836 /* input is be 1.31(q31) format for all FFT sizes */
emilmont 1:fdd22bb7aa52 837 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 838 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 839
emilmont 1:fdd22bb7aa52 840 /* Start of first stage process */
emilmont 1:fdd22bb7aa52 841
emilmont 1:fdd22bb7aa52 842 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 843 n2 = fftLen;
emilmont 1:fdd22bb7aa52 844 n1 = n2;
emilmont 1:fdd22bb7aa52 845 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 846 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 847 i0 = 0u;
emilmont 1:fdd22bb7aa52 848 ia1 = 0u;
emilmont 1:fdd22bb7aa52 849
emilmont 1:fdd22bb7aa52 850 j = n2;
emilmont 1:fdd22bb7aa52 851
emilmont 1:fdd22bb7aa52 852 do
emilmont 1:fdd22bb7aa52 853 {
emilmont 1:fdd22bb7aa52 854
emilmont 1:fdd22bb7aa52 855 /* input is in 1.31(q31) format and provide 4 guard bits for the input */
emilmont 1:fdd22bb7aa52 856
emilmont 1:fdd22bb7aa52 857 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 858 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 859 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 860 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 861 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 862
emilmont 1:fdd22bb7aa52 863 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 864 /* xa + xc */
emilmont 1:fdd22bb7aa52 865 r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
emilmont 1:fdd22bb7aa52 866 /* xa - xc */
emilmont 1:fdd22bb7aa52 867 r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
emilmont 1:fdd22bb7aa52 868
emilmont 1:fdd22bb7aa52 869 /* xb + xd */
emilmont 1:fdd22bb7aa52 870 t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 871
emilmont 1:fdd22bb7aa52 872 /* ya + yc */
emilmont 1:fdd22bb7aa52 873 s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 874 /* ya - yc */
emilmont 1:fdd22bb7aa52 875 s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 876
emilmont 1:fdd22bb7aa52 877 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 878 pSrc[2u * i0] = (r1 + t1);
emilmont 1:fdd22bb7aa52 879 /* (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 880 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 881 /* yb + yd */
emilmont 1:fdd22bb7aa52 882 t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 883 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 884 pSrc[(2u * i0) + 1u] = (s1 + t2);
emilmont 1:fdd22bb7aa52 885
emilmont 1:fdd22bb7aa52 886 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 887 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 888
emilmont 1:fdd22bb7aa52 889 /* yb - yd */
emilmont 1:fdd22bb7aa52 890 t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
emilmont 1:fdd22bb7aa52 891 /* xb - xd */
emilmont 1:fdd22bb7aa52 892 t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
emilmont 1:fdd22bb7aa52 893
emilmont 1:fdd22bb7aa52 894 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 895 ia2 = 2u * ia1;
emilmont 1:fdd22bb7aa52 896 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 897 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 898
emilmont 1:fdd22bb7aa52 899 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 900 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
emilmont 1:fdd22bb7aa52 901 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 902
emilmont 1:fdd22bb7aa52 903 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 904 pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
emilmont 1:fdd22bb7aa52 905 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 906
emilmont 1:fdd22bb7aa52 907 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 908 r1 = r2 - t1;
emilmont 1:fdd22bb7aa52 909 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 910 r2 = r2 + t1;
emilmont 1:fdd22bb7aa52 911
emilmont 1:fdd22bb7aa52 912 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 913 s1 = s2 + t2;
emilmont 1:fdd22bb7aa52 914 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 915 s2 = s2 - t2;
emilmont 1:fdd22bb7aa52 916
emilmont 1:fdd22bb7aa52 917 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 918 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 919
emilmont 1:fdd22bb7aa52 920 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 921 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 922 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 923
emilmont 1:fdd22bb7aa52 924 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 925 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 926 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 927
emilmont 1:fdd22bb7aa52 928 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 929 ia3 = 3u * ia1;
emilmont 1:fdd22bb7aa52 930 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 931 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 932
emilmont 1:fdd22bb7aa52 933 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 934 pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 935 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 936
emilmont 1:fdd22bb7aa52 937 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 938 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 939 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
emilmont 1:fdd22bb7aa52 940
emilmont 1:fdd22bb7aa52 941 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 942 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 943
emilmont 1:fdd22bb7aa52 944 /* Updating input index */
emilmont 1:fdd22bb7aa52 945 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 946
emilmont 1:fdd22bb7aa52 947 } while(--j);
emilmont 1:fdd22bb7aa52 948
emilmont 1:fdd22bb7aa52 949 /* data is in 5.27(q27) format */
emilmont 1:fdd22bb7aa52 950 /* each stage provides two down scaling of the input */
emilmont 1:fdd22bb7aa52 951
emilmont 1:fdd22bb7aa52 952
emilmont 1:fdd22bb7aa52 953 /* Start of Middle stages process */
emilmont 1:fdd22bb7aa52 954
emilmont 1:fdd22bb7aa52 955 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 956
emilmont 1:fdd22bb7aa52 957 /* Calculation of second stage to excluding last stage */
emilmont 1:fdd22bb7aa52 958 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 959 {
emilmont 1:fdd22bb7aa52 960 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 961 n1 = n2;
emilmont 1:fdd22bb7aa52 962 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 963 ia1 = 0u;
emilmont 1:fdd22bb7aa52 964
emilmont 1:fdd22bb7aa52 965 for (j = 0; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 966 {
emilmont 1:fdd22bb7aa52 967 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 968 ia2 = ia1 + ia1;
emilmont 1:fdd22bb7aa52 969 ia3 = ia2 + ia1;
emilmont 1:fdd22bb7aa52 970 co1 = pCoef[ia1 * 2u];
emilmont 1:fdd22bb7aa52 971 si1 = pCoef[(ia1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 972 co2 = pCoef[ia2 * 2u];
emilmont 1:fdd22bb7aa52 973 si2 = pCoef[(ia2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 974 co3 = pCoef[ia3 * 2u];
emilmont 1:fdd22bb7aa52 975 si3 = pCoef[(ia3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 976 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 977 ia1 = ia1 + twidCoefModifier;
emilmont 1:fdd22bb7aa52 978
emilmont 1:fdd22bb7aa52 979 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 980 {
emilmont 1:fdd22bb7aa52 981 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 982 /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 983 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 984 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 985 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 986
emilmont 1:fdd22bb7aa52 987 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 988 /* xa + xc */
emilmont 1:fdd22bb7aa52 989 r1 = pSrc[2u * i0] + pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 990 /* xa - xc */
emilmont 1:fdd22bb7aa52 991 r2 = pSrc[2u * i0] - pSrc[2u * i2];
emilmont 1:fdd22bb7aa52 992
emilmont 1:fdd22bb7aa52 993 /* ya + yc */
emilmont 1:fdd22bb7aa52 994 s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 995 /* ya - yc */
emilmont 1:fdd22bb7aa52 996 s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
emilmont 1:fdd22bb7aa52 997
emilmont 1:fdd22bb7aa52 998 /* xb + xd */
emilmont 1:fdd22bb7aa52 999 t1 = pSrc[2u * i1] + pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 1000
emilmont 1:fdd22bb7aa52 1001 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1002 pSrc[2u * i0] = (r1 + t1) >> 2u;
emilmont 1:fdd22bb7aa52 1003 /* xa + xc -(xb + xd) */
emilmont 1:fdd22bb7aa52 1004 r1 = r1 - t1;
emilmont 1:fdd22bb7aa52 1005 /* yb + yd */
emilmont 1:fdd22bb7aa52 1006 t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 1007 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1008 pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
emilmont 1:fdd22bb7aa52 1009
emilmont 1:fdd22bb7aa52 1010 /* (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 1011 s1 = s1 - t2;
emilmont 1:fdd22bb7aa52 1012
emilmont 1:fdd22bb7aa52 1013 /* (yb - yd) */
emilmont 1:fdd22bb7aa52 1014 t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
emilmont 1:fdd22bb7aa52 1015 /* (xb - xd) */
emilmont 1:fdd22bb7aa52 1016 t2 = pSrc[2u * i1] - pSrc[2u * i3];
emilmont 1:fdd22bb7aa52 1017
emilmont 1:fdd22bb7aa52 1018 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
emilmont 1:fdd22bb7aa52 1019 pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
emilmont 1:fdd22bb7aa52 1020 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
emilmont 1:fdd22bb7aa52 1021
emilmont 1:fdd22bb7aa52 1022 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
emilmont 1:fdd22bb7aa52 1023 pSrc[(2u * i1) + 1u] =
emilmont 1:fdd22bb7aa52 1024 (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
emilmont 1:fdd22bb7aa52 1025 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
emilmont 1:fdd22bb7aa52 1026
emilmont 1:fdd22bb7aa52 1027 /* (xa - xc) - (yb - yd) */
emilmont 1:fdd22bb7aa52 1028 r1 = r2 - t1;
emilmont 1:fdd22bb7aa52 1029 /* (xa - xc) + (yb - yd) */
emilmont 1:fdd22bb7aa52 1030 r2 = r2 + t1;
emilmont 1:fdd22bb7aa52 1031
emilmont 1:fdd22bb7aa52 1032 /* (ya - yc) + (xb - xd) */
emilmont 1:fdd22bb7aa52 1033 s1 = s2 + t2;
emilmont 1:fdd22bb7aa52 1034 /* (ya - yc) - (xb - xd) */
emilmont 1:fdd22bb7aa52 1035 s2 = s2 - t2;
emilmont 1:fdd22bb7aa52 1036
emilmont 1:fdd22bb7aa52 1037 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
emilmont 1:fdd22bb7aa52 1038 pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
emilmont 1:fdd22bb7aa52 1039 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 1040
emilmont 1:fdd22bb7aa52 1041 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
emilmont 1:fdd22bb7aa52 1042 pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
emilmont 1:fdd22bb7aa52 1043 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 1044
emilmont 1:fdd22bb7aa52 1045 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
emilmont 1:fdd22bb7aa52 1046 pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
emilmont 1:fdd22bb7aa52 1047 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 1048
emilmont 1:fdd22bb7aa52 1049 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
emilmont 1:fdd22bb7aa52 1050 pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
emilmont 1:fdd22bb7aa52 1051 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
emilmont 1:fdd22bb7aa52 1052 }
emilmont 1:fdd22bb7aa52 1053 }
emilmont 1:fdd22bb7aa52 1054 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 1055 }
mbed_official 5:3762170b6d4d 1056 #else
mbed_official 5:3762170b6d4d 1057 uint32_t n1, n2, ia1, ia2, ia3, i0, j, k;
mbed_official 5:3762170b6d4d 1058 q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
mbed_official 5:3762170b6d4d 1059 q31_t xa, xb, xc, xd;
mbed_official 5:3762170b6d4d 1060 q31_t ya, yb, yc, yd;
mbed_official 5:3762170b6d4d 1061 q31_t xa_out, xb_out, xc_out, xd_out;
mbed_official 5:3762170b6d4d 1062 q31_t ya_out, yb_out, yc_out, yd_out;
mbed_official 5:3762170b6d4d 1063
mbed_official 5:3762170b6d4d 1064 q31_t *ptr1;
mbed_official 5:3762170b6d4d 1065 q31_t *pSi0;
mbed_official 5:3762170b6d4d 1066 q31_t *pSi1;
mbed_official 5:3762170b6d4d 1067 q31_t *pSi2;
mbed_official 5:3762170b6d4d 1068 q31_t *pSi3;
mbed_official 5:3762170b6d4d 1069 q63_t xaya, xbyb, xcyc, xdyd;
mbed_official 5:3762170b6d4d 1070
mbed_official 5:3762170b6d4d 1071 /* input is be 1.31(q31) format for all FFT sizes */
mbed_official 5:3762170b6d4d 1072 /* Total process is divided into three stages */
mbed_official 5:3762170b6d4d 1073 /* process first stage, middle stages, & last stage */
mbed_official 5:3762170b6d4d 1074
mbed_official 5:3762170b6d4d 1075 /* Start of first stage process */
mbed_official 5:3762170b6d4d 1076
mbed_official 5:3762170b6d4d 1077 /* Initializations for the first stage */
mbed_official 5:3762170b6d4d 1078 n2 = fftLen;
mbed_official 5:3762170b6d4d 1079 n1 = n2;
mbed_official 5:3762170b6d4d 1080 /* n2 = fftLen/4 */
mbed_official 5:3762170b6d4d 1081 n2 >>= 2u;
mbed_official 5:3762170b6d4d 1082
mbed_official 5:3762170b6d4d 1083 ia1 = 0u;
mbed_official 5:3762170b6d4d 1084
mbed_official 5:3762170b6d4d 1085 j = n2;
mbed_official 5:3762170b6d4d 1086
mbed_official 5:3762170b6d4d 1087 pSi0 = pSrc;
mbed_official 5:3762170b6d4d 1088 pSi1 = pSi0 + 2 * n2;
mbed_official 5:3762170b6d4d 1089 pSi2 = pSi1 + 2 * n2;
mbed_official 5:3762170b6d4d 1090 pSi3 = pSi2 + 2 * n2;
mbed_official 5:3762170b6d4d 1091
mbed_official 5:3762170b6d4d 1092 do
mbed_official 5:3762170b6d4d 1093 {
mbed_official 5:3762170b6d4d 1094 /* Butterfly implementation */
mbed_official 5:3762170b6d4d 1095 /* xa + xc */
mbed_official 5:3762170b6d4d 1096 r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u);
mbed_official 5:3762170b6d4d 1097 /* xa - xc */
mbed_official 5:3762170b6d4d 1098 r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u);
mbed_official 5:3762170b6d4d 1099
mbed_official 5:3762170b6d4d 1100 /* xb + xd */
mbed_official 5:3762170b6d4d 1101 t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u);
mbed_official 5:3762170b6d4d 1102
mbed_official 5:3762170b6d4d 1103 /* ya + yc */
mbed_official 5:3762170b6d4d 1104 s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u);
mbed_official 5:3762170b6d4d 1105 /* ya - yc */
mbed_official 5:3762170b6d4d 1106 s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u);
mbed_official 5:3762170b6d4d 1107
mbed_official 5:3762170b6d4d 1108 /* xa' = xa + xb + xc + xd */
mbed_official 5:3762170b6d4d 1109 *pSi0++ = (r1 + t1);
mbed_official 5:3762170b6d4d 1110 /* (xa + xc) - (xb + xd) */
mbed_official 5:3762170b6d4d 1111 r1 = r1 - t1;
mbed_official 5:3762170b6d4d 1112 /* yb + yd */
mbed_official 5:3762170b6d4d 1113 t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u);
mbed_official 5:3762170b6d4d 1114 /* ya' = ya + yb + yc + yd */
mbed_official 5:3762170b6d4d 1115 *pSi0++ = (s1 + t2);
mbed_official 5:3762170b6d4d 1116
mbed_official 5:3762170b6d4d 1117 /* (ya + yc) - (yb + yd) */
mbed_official 5:3762170b6d4d 1118 s1 = s1 - t2;
mbed_official 5:3762170b6d4d 1119
mbed_official 5:3762170b6d4d 1120 /* yb - yd */
mbed_official 5:3762170b6d4d 1121 t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u);
mbed_official 5:3762170b6d4d 1122 /* xb - xd */
mbed_official 5:3762170b6d4d 1123 t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u);
mbed_official 5:3762170b6d4d 1124
mbed_official 5:3762170b6d4d 1125 /* index calculation for the coefficients */
mbed_official 5:3762170b6d4d 1126 ia2 = 2u * ia1;
mbed_official 5:3762170b6d4d 1127 co2 = pCoef[ia2 * 2u];
mbed_official 5:3762170b6d4d 1128 si2 = pCoef[(ia2 * 2u) + 1u];
mbed_official 5:3762170b6d4d 1129
mbed_official 5:3762170b6d4d 1130 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
mbed_official 5:3762170b6d4d 1131 *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
mbed_official 5:3762170b6d4d 1132 ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 1133
mbed_official 5:3762170b6d4d 1134 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
mbed_official 5:3762170b6d4d 1135 *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
mbed_official 5:3762170b6d4d 1136 ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 1137
mbed_official 5:3762170b6d4d 1138 /* (xa - xc) - (yb - yd) */
mbed_official 5:3762170b6d4d 1139 r1 = r2 - t1;
mbed_official 5:3762170b6d4d 1140 /* (xa - xc) + (yb - yd) */
mbed_official 5:3762170b6d4d 1141 r2 = r2 + t1;
mbed_official 5:3762170b6d4d 1142
mbed_official 5:3762170b6d4d 1143 /* (ya - yc) + (xb - xd) */
mbed_official 5:3762170b6d4d 1144 s1 = s2 + t2;
mbed_official 5:3762170b6d4d 1145 /* (ya - yc) - (xb - xd) */
mbed_official 5:3762170b6d4d 1146 s2 = s2 - t2;
mbed_official 5:3762170b6d4d 1147
mbed_official 5:3762170b6d4d 1148 co1 = pCoef[ia1 * 2u];
mbed_official 5:3762170b6d4d 1149 si1 = pCoef[(ia1 * 2u) + 1u];
mbed_official 5:3762170b6d4d 1150
mbed_official 5:3762170b6d4d 1151 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
mbed_official 5:3762170b6d4d 1152 *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
mbed_official 5:3762170b6d4d 1153 ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 1154
mbed_official 5:3762170b6d4d 1155 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
mbed_official 5:3762170b6d4d 1156 *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
mbed_official 5:3762170b6d4d 1157 ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 1158
mbed_official 5:3762170b6d4d 1159 /* index calculation for the coefficients */
mbed_official 5:3762170b6d4d 1160 ia3 = 3u * ia1;
mbed_official 5:3762170b6d4d 1161 co3 = pCoef[ia3 * 2u];
mbed_official 5:3762170b6d4d 1162 si3 = pCoef[(ia3 * 2u) + 1u];
mbed_official 5:3762170b6d4d 1163
mbed_official 5:3762170b6d4d 1164 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
mbed_official 5:3762170b6d4d 1165 *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
mbed_official 5:3762170b6d4d 1166 ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 1167
mbed_official 5:3762170b6d4d 1168 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
mbed_official 5:3762170b6d4d 1169 *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
mbed_official 5:3762170b6d4d 1170 ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
mbed_official 5:3762170b6d4d 1171
mbed_official 5:3762170b6d4d 1172 /* Twiddle coefficients index modifier */
mbed_official 5:3762170b6d4d 1173 ia1 = ia1 + twidCoefModifier;
mbed_official 5:3762170b6d4d 1174
mbed_official 5:3762170b6d4d 1175 } while(--j);
mbed_official 5:3762170b6d4d 1176
mbed_official 5:3762170b6d4d 1177 /* data is in 5.27(q27) format */
mbed_official 5:3762170b6d4d 1178 /* each stage provides two down scaling of the input */
mbed_official 5:3762170b6d4d 1179
mbed_official 5:3762170b6d4d 1180
mbed_official 5:3762170b6d4d 1181 /* Start of Middle stages process */
mbed_official 5:3762170b6d4d 1182
mbed_official 5:3762170b6d4d 1183 twidCoefModifier <<= 2u;
mbed_official 5:3762170b6d4d 1184
mbed_official 5:3762170b6d4d 1185 /* Calculation of second stage to excluding last stage */
mbed_official 5:3762170b6d4d 1186 for (k = fftLen / 4u; k > 4u; k >>= 2u)
mbed_official 5:3762170b6d4d 1187 {
mbed_official 5:3762170b6d4d 1188 /* Initializations for the first stage */
mbed_official 5:3762170b6d4d 1189 n1 = n2;
mbed_official 5:3762170b6d4d 1190 n2 >>= 2u;
mbed_official 5:3762170b6d4d 1191 ia1 = 0u;
mbed_official 5:3762170b6d4d 1192
mbed_official 5:3762170b6d4d 1193 for (j = 0; j <= (n2 - 1u); j++)
mbed_official 5:3762170b6d4d 1194 {
mbed_official 5:3762170b6d4d 1195 /* index calculation for the coefficients */
mbed_official 5:3762170b6d4d 1196 ia2 = ia1 + ia1;
mbed_official 5:3762170b6d4d 1197 ia3 = ia2 + ia1;
mbed_official 5:3762170b6d4d 1198 co1 = pCoef[ia1 * 2u];
mbed_official 5:3762170b6d4d 1199 si1 = pCoef[(ia1 * 2u) + 1u];
mbed_official 5:3762170b6d4d 1200 co2 = pCoef[ia2 * 2u];
mbed_official 5:3762170b6d4d 1201 si2 = pCoef[(ia2 * 2u) + 1u];
mbed_official 5:3762170b6d4d 1202 co3 = pCoef[ia3 * 2u];
mbed_official 5:3762170b6d4d 1203 si3 = pCoef[(ia3 * 2u) + 1u];
mbed_official 5:3762170b6d4d 1204 /* Twiddle coefficients index modifier */
mbed_official 5:3762170b6d4d 1205 ia1 = ia1 + twidCoefModifier;
mbed_official 5:3762170b6d4d 1206
mbed_official 5:3762170b6d4d 1207 pSi0 = pSrc + 2 * j;
mbed_official 5:3762170b6d4d 1208 pSi1 = pSi0 + 2 * n2;
mbed_official 5:3762170b6d4d 1209 pSi2 = pSi1 + 2 * n2;
mbed_official 5:3762170b6d4d 1210 pSi3 = pSi2 + 2 * n2;
mbed_official 5:3762170b6d4d 1211
mbed_official 5:3762170b6d4d 1212 for (i0 = j; i0 < fftLen; i0 += n1)
mbed_official 5:3762170b6d4d 1213 {
mbed_official 5:3762170b6d4d 1214 /* Butterfly implementation */
mbed_official 5:3762170b6d4d 1215 /* xa + xc */
mbed_official 5:3762170b6d4d 1216 r1 = pSi0[0] + pSi2[0];
mbed_official 5:3762170b6d4d 1217
mbed_official 5:3762170b6d4d 1218 /* xa - xc */
mbed_official 5:3762170b6d4d 1219 r2 = pSi0[0] - pSi2[0];
mbed_official 5:3762170b6d4d 1220
mbed_official 5:3762170b6d4d 1221
mbed_official 5:3762170b6d4d 1222 /* ya + yc */
mbed_official 5:3762170b6d4d 1223 s1 = pSi0[1] + pSi2[1];
mbed_official 5:3762170b6d4d 1224
mbed_official 5:3762170b6d4d 1225 /* ya - yc */
mbed_official 5:3762170b6d4d 1226 s2 = pSi0[1] - pSi2[1];
mbed_official 5:3762170b6d4d 1227
mbed_official 5:3762170b6d4d 1228
mbed_official 5:3762170b6d4d 1229 /* xb + xd */
mbed_official 5:3762170b6d4d 1230 t1 = pSi1[0] + pSi3[0];
mbed_official 5:3762170b6d4d 1231
mbed_official 5:3762170b6d4d 1232
mbed_official 5:3762170b6d4d 1233 /* xa' = xa + xb + xc + xd */
mbed_official 5:3762170b6d4d 1234 pSi0[0] = (r1 + t1) >> 2u;
mbed_official 5:3762170b6d4d 1235 /* xa + xc -(xb + xd) */
mbed_official 5:3762170b6d4d 1236 r1 = r1 - t1;
mbed_official 5:3762170b6d4d 1237 /* yb + yd */
mbed_official 5:3762170b6d4d 1238 t2 = pSi1[1] + pSi3[1];
mbed_official 5:3762170b6d4d 1239
mbed_official 5:3762170b6d4d 1240 /* ya' = ya + yb + yc + yd */
mbed_official 5:3762170b6d4d 1241 pSi0[1] = (s1 + t2) >> 2u;
mbed_official 5:3762170b6d4d 1242 pSi0 += 2 * n1;
mbed_official 5:3762170b6d4d 1243
mbed_official 5:3762170b6d4d 1244 /* (ya + yc) - (yb + yd) */
mbed_official 5:3762170b6d4d 1245 s1 = s1 - t2;
mbed_official 5:3762170b6d4d 1246
mbed_official 5:3762170b6d4d 1247 /* (yb - yd) */
mbed_official 5:3762170b6d4d 1248 t1 = pSi1[1] - pSi3[1];
mbed_official 5:3762170b6d4d 1249
mbed_official 5:3762170b6d4d 1250 /* (xb - xd) */
mbed_official 5:3762170b6d4d 1251 t2 = pSi1[0] - pSi3[0];
mbed_official 5:3762170b6d4d 1252
mbed_official 5:3762170b6d4d 1253
mbed_official 5:3762170b6d4d 1254 /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
mbed_official 5:3762170b6d4d 1255 pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
mbed_official 5:3762170b6d4d 1256 ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
mbed_official 5:3762170b6d4d 1257
mbed_official 5:3762170b6d4d 1258 /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
mbed_official 5:3762170b6d4d 1259 pSi1[1] =
mbed_official 5:3762170b6d4d 1260
mbed_official 5:3762170b6d4d 1261 (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
mbed_official 5:3762170b6d4d 1262 ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
mbed_official 5:3762170b6d4d 1263 pSi1 += 2 * n1;
mbed_official 5:3762170b6d4d 1264
mbed_official 5:3762170b6d4d 1265 /* (xa - xc) - (yb - yd) */
mbed_official 5:3762170b6d4d 1266 r1 = r2 - t1;
mbed_official 5:3762170b6d4d 1267 /* (xa - xc) + (yb - yd) */
mbed_official 5:3762170b6d4d 1268 r2 = r2 + t1;
mbed_official 5:3762170b6d4d 1269
mbed_official 5:3762170b6d4d 1270 /* (ya - yc) + (xb - xd) */
mbed_official 5:3762170b6d4d 1271 s1 = s2 + t2;
mbed_official 5:3762170b6d4d 1272 /* (ya - yc) - (xb - xd) */
mbed_official 5:3762170b6d4d 1273 s2 = s2 - t2;
mbed_official 5:3762170b6d4d 1274
mbed_official 5:3762170b6d4d 1275 /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
mbed_official 5:3762170b6d4d 1276 pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
mbed_official 5:3762170b6d4d 1277 ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 1278
mbed_official 5:3762170b6d4d 1279 /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
mbed_official 5:3762170b6d4d 1280 pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
mbed_official 5:3762170b6d4d 1281 ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 1282 pSi2 += 2 * n1;
mbed_official 5:3762170b6d4d 1283
mbed_official 5:3762170b6d4d 1284 /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
mbed_official 5:3762170b6d4d 1285 pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
mbed_official 5:3762170b6d4d 1286 ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 1287
mbed_official 5:3762170b6d4d 1288 /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
mbed_official 5:3762170b6d4d 1289 pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
mbed_official 5:3762170b6d4d 1290 ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
mbed_official 5:3762170b6d4d 1291 pSi3 += 2 * n1;
mbed_official 5:3762170b6d4d 1292 }
mbed_official 5:3762170b6d4d 1293 }
mbed_official 5:3762170b6d4d 1294 twidCoefModifier <<= 2u;
mbed_official 5:3762170b6d4d 1295 }
mbed_official 5:3762170b6d4d 1296 #endif
emilmont 1:fdd22bb7aa52 1297
emilmont 1:fdd22bb7aa52 1298 /* End of Middle stages process */
emilmont 1:fdd22bb7aa52 1299
emilmont 1:fdd22bb7aa52 1300 /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
emilmont 1:fdd22bb7aa52 1301 /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
emilmont 1:fdd22bb7aa52 1302 /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
emilmont 1:fdd22bb7aa52 1303 /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
emilmont 1:fdd22bb7aa52 1304
emilmont 1:fdd22bb7aa52 1305
emilmont 1:fdd22bb7aa52 1306 /* Start of last stage process */
emilmont 1:fdd22bb7aa52 1307
emilmont 1:fdd22bb7aa52 1308
emilmont 1:fdd22bb7aa52 1309 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 1310 j = fftLen >> 2;
emilmont 1:fdd22bb7aa52 1311 ptr1 = &pSrc[0];
emilmont 1:fdd22bb7aa52 1312
emilmont 1:fdd22bb7aa52 1313 /* Calculations of last stage */
emilmont 1:fdd22bb7aa52 1314 do
emilmont 1:fdd22bb7aa52 1315 {
emilmont 1:fdd22bb7aa52 1316 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1317 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 1318 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1319 xa = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 1320 ya = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 1321
emilmont 1:fdd22bb7aa52 1322 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 1323 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1324 xb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 1325 yb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 1326
emilmont 1:fdd22bb7aa52 1327 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 1328 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1329 xc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 1330 yc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 1331
emilmont 1:fdd22bb7aa52 1332 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 1333 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1334 xd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 1335 yd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 1336
emilmont 1:fdd22bb7aa52 1337 #else
emilmont 1:fdd22bb7aa52 1338
emilmont 1:fdd22bb7aa52 1339 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 1340 xaya = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1341 ya = (q31_t) xaya;
emilmont 1:fdd22bb7aa52 1342 xa = (q31_t) (xaya >> 32);
emilmont 1:fdd22bb7aa52 1343
emilmont 1:fdd22bb7aa52 1344 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 1345 xbyb = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1346 yb = (q31_t) xbyb;
emilmont 1:fdd22bb7aa52 1347 xb = (q31_t) (xbyb >> 32);
emilmont 1:fdd22bb7aa52 1348
emilmont 1:fdd22bb7aa52 1349 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 1350 xcyc = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1351 yc = (q31_t) xcyc;
emilmont 1:fdd22bb7aa52 1352 xc = (q31_t) (xcyc >> 32);
emilmont 1:fdd22bb7aa52 1353
emilmont 1:fdd22bb7aa52 1354 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 1355 xdyd = *__SIMD64(ptr1)++;
emilmont 1:fdd22bb7aa52 1356 yd = (q31_t) xdyd;
emilmont 1:fdd22bb7aa52 1357 xd = (q31_t) (xdyd >> 32);
emilmont 1:fdd22bb7aa52 1358
emilmont 1:fdd22bb7aa52 1359
emilmont 1:fdd22bb7aa52 1360 #endif
emilmont 1:fdd22bb7aa52 1361
emilmont 1:fdd22bb7aa52 1362 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1363 xa_out = xa + xb + xc + xd;
emilmont 1:fdd22bb7aa52 1364
emilmont 1:fdd22bb7aa52 1365 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1366 ya_out = ya + yb + yc + yd;
emilmont 1:fdd22bb7aa52 1367
emilmont 1:fdd22bb7aa52 1368 /* pointer updation for writing */
emilmont 1:fdd22bb7aa52 1369 ptr1 = ptr1 - 8u;
emilmont 1:fdd22bb7aa52 1370
emilmont 1:fdd22bb7aa52 1371 /* writing xa' and ya' */
emilmont 1:fdd22bb7aa52 1372 *ptr1++ = xa_out;
emilmont 1:fdd22bb7aa52 1373 *ptr1++ = ya_out;
emilmont 1:fdd22bb7aa52 1374
emilmont 1:fdd22bb7aa52 1375 xc_out = (xa - xb + xc - xd);
emilmont 1:fdd22bb7aa52 1376 yc_out = (ya - yb + yc - yd);
emilmont 1:fdd22bb7aa52 1377
emilmont 1:fdd22bb7aa52 1378 /* writing xc' and yc' */
emilmont 1:fdd22bb7aa52 1379 *ptr1++ = xc_out;
emilmont 1:fdd22bb7aa52 1380 *ptr1++ = yc_out;
emilmont 1:fdd22bb7aa52 1381
emilmont 1:fdd22bb7aa52 1382 xb_out = (xa - yb - xc + yd);
emilmont 1:fdd22bb7aa52 1383 yb_out = (ya + xb - yc - xd);
emilmont 1:fdd22bb7aa52 1384
emilmont 1:fdd22bb7aa52 1385 /* writing xb' and yb' */
emilmont 1:fdd22bb7aa52 1386 *ptr1++ = xb_out;
emilmont 1:fdd22bb7aa52 1387 *ptr1++ = yb_out;
emilmont 1:fdd22bb7aa52 1388
emilmont 1:fdd22bb7aa52 1389 xd_out = (xa + yb - xc - yd);
emilmont 1:fdd22bb7aa52 1390 yd_out = (ya - xb - yc + xd);
emilmont 1:fdd22bb7aa52 1391
emilmont 1:fdd22bb7aa52 1392 /* writing xd' and yd' */
emilmont 1:fdd22bb7aa52 1393 *ptr1++ = xd_out;
emilmont 1:fdd22bb7aa52 1394 *ptr1++ = yd_out;
emilmont 1:fdd22bb7aa52 1395
emilmont 1:fdd22bb7aa52 1396 } while(--j);
emilmont 1:fdd22bb7aa52 1397
emilmont 1:fdd22bb7aa52 1398 /* output is in 11.21(q21) format for the 1024 point */
emilmont 1:fdd22bb7aa52 1399 /* output is in 9.23(q23) format for the 256 point */
emilmont 1:fdd22bb7aa52 1400 /* output is in 7.25(q25) format for the 64 point */
emilmont 1:fdd22bb7aa52 1401 /* output is in 5.27(q27) format for the 16 point */
emilmont 1:fdd22bb7aa52 1402
emilmont 1:fdd22bb7aa52 1403 /* End of last stage process */
emilmont 1:fdd22bb7aa52 1404 }