CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Legacy Warning

This is an mbed 2 library. To learn more about mbed OS 5, visit the docs.

Committer:
emilmont
Date:
Thu May 30 17:10:11 2013 +0100
Revision:
2:da51fb522205
Parent:
1:fdd22bb7aa52
Child:
3:7a284390b0ce
Keep "cmsis-dsp" module in synch with its source

Who changed what in which revision?

UserRevisionLine numberNew contents of line
emilmont 1:fdd22bb7aa52 1 /* ----------------------------------------------------------------------
emilmont 1:fdd22bb7aa52 2 * Copyright (C) 2010 ARM Limited. All rights reserved.
emilmont 1:fdd22bb7aa52 3 *
emilmont 1:fdd22bb7aa52 4 * $Date: 15. February 2012
emilmont 2:da51fb522205 5 * $Revision: V1.1.0
emilmont 1:fdd22bb7aa52 6 *
emilmont 2:da51fb522205 7 * Project: CMSIS DSP Library
emilmont 2:da51fb522205 8 * Title: arm_cfft_radix4_q15.c
emilmont 1:fdd22bb7aa52 9 *
emilmont 2:da51fb522205 10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
emilmont 2:da51fb522205 11 * In-place bit reversal using bit reversal table
emilmont 1:fdd22bb7aa52 12 *
emilmont 1:fdd22bb7aa52 13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
emilmont 1:fdd22bb7aa52 14 *
emilmont 1:fdd22bb7aa52 15 * Version 1.1.0 2012/02/15
emilmont 1:fdd22bb7aa52 16 * Updated with more optimizations, bug fixes and minor API changes.
emilmont 1:fdd22bb7aa52 17 *
emilmont 1:fdd22bb7aa52 18 * Version 1.0.10 2011/7/15
emilmont 1:fdd22bb7aa52 19 * Big Endian support added and Merged M0 and M3/M4 Source code.
emilmont 1:fdd22bb7aa52 20 *
emilmont 1:fdd22bb7aa52 21 * Version 1.0.3 2010/11/29
emilmont 1:fdd22bb7aa52 22 * Re-organized the CMSIS folders and updated documentation.
emilmont 1:fdd22bb7aa52 23 *
emilmont 1:fdd22bb7aa52 24 * Version 1.0.2 2010/11/11
emilmont 1:fdd22bb7aa52 25 * Documentation updated.
emilmont 1:fdd22bb7aa52 26 *
emilmont 1:fdd22bb7aa52 27 * Version 1.0.1 2010/10/05
emilmont 1:fdd22bb7aa52 28 * Production release and review comments incorporated.
emilmont 1:fdd22bb7aa52 29 *
emilmont 1:fdd22bb7aa52 30 * Version 1.0.0 2010/09/20
emilmont 1:fdd22bb7aa52 31 * Production release and review comments incorporated.
emilmont 1:fdd22bb7aa52 32 *
emilmont 1:fdd22bb7aa52 33 * Version 0.0.5 2010/04/26
emilmont 2:da51fb522205 34 * incorporated review comments and updated with latest CMSIS layer
emilmont 1:fdd22bb7aa52 35 *
emilmont 1:fdd22bb7aa52 36 * Version 0.0.3 2010/03/10
emilmont 1:fdd22bb7aa52 37 * Initial version
emilmont 1:fdd22bb7aa52 38 * -------------------------------------------------------------------- */
emilmont 1:fdd22bb7aa52 39
emilmont 1:fdd22bb7aa52 40 #include "arm_math.h"
emilmont 1:fdd22bb7aa52 41
emilmont 1:fdd22bb7aa52 42 /**
emilmont 1:fdd22bb7aa52 43 * @ingroup groupTransforms
emilmont 1:fdd22bb7aa52 44 */
emilmont 1:fdd22bb7aa52 45
emilmont 1:fdd22bb7aa52 46 /**
emilmont 1:fdd22bb7aa52 47 * @addtogroup Radix4_CFFT_CIFFT
emilmont 1:fdd22bb7aa52 48 * @{
emilmont 1:fdd22bb7aa52 49 */
emilmont 1:fdd22bb7aa52 50
emilmont 1:fdd22bb7aa52 51
emilmont 1:fdd22bb7aa52 52 /**
emilmont 1:fdd22bb7aa52 53 * @details
emilmont 1:fdd22bb7aa52 54 * @brief Processing function for the Q15 CFFT/CIFFT.
emilmont 1:fdd22bb7aa52 55 * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure.
emilmont 1:fdd22bb7aa52 56 * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
emilmont 1:fdd22bb7aa52 57 * @return none.
emilmont 1:fdd22bb7aa52 58 *
emilmont 1:fdd22bb7aa52 59 * \par Input and output formats:
emilmont 1:fdd22bb7aa52 60 * \par
emilmont 1:fdd22bb7aa52 61 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
emilmont 1:fdd22bb7aa52 62 * Hence the output format is different for different FFT sizes.
emilmont 1:fdd22bb7aa52 63 * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
emilmont 1:fdd22bb7aa52 64 * \par
emilmont 1:fdd22bb7aa52 65 * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
emilmont 1:fdd22bb7aa52 66 * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
emilmont 1:fdd22bb7aa52 67 */
emilmont 1:fdd22bb7aa52 68
emilmont 1:fdd22bb7aa52 69 void arm_cfft_radix4_q15(
emilmont 1:fdd22bb7aa52 70 const arm_cfft_radix4_instance_q15 * S,
emilmont 1:fdd22bb7aa52 71 q15_t * pSrc)
emilmont 1:fdd22bb7aa52 72 {
emilmont 1:fdd22bb7aa52 73 if(S->ifftFlag == 1u)
emilmont 1:fdd22bb7aa52 74 {
emilmont 1:fdd22bb7aa52 75 /* Complex IFFT radix-4 */
emilmont 1:fdd22bb7aa52 76 arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle,
emilmont 1:fdd22bb7aa52 77 S->twidCoefModifier);
emilmont 1:fdd22bb7aa52 78 }
emilmont 1:fdd22bb7aa52 79 else
emilmont 1:fdd22bb7aa52 80 {
emilmont 1:fdd22bb7aa52 81 /* Complex FFT radix-4 */
emilmont 1:fdd22bb7aa52 82 arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle,
emilmont 1:fdd22bb7aa52 83 S->twidCoefModifier);
emilmont 1:fdd22bb7aa52 84 }
emilmont 1:fdd22bb7aa52 85
emilmont 1:fdd22bb7aa52 86 if(S->bitReverseFlag == 1u)
emilmont 1:fdd22bb7aa52 87 {
emilmont 1:fdd22bb7aa52 88 /* Bit Reversal */
emilmont 1:fdd22bb7aa52 89 arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
emilmont 1:fdd22bb7aa52 90 }
emilmont 1:fdd22bb7aa52 91
emilmont 1:fdd22bb7aa52 92 }
emilmont 1:fdd22bb7aa52 93
emilmont 1:fdd22bb7aa52 94 /**
emilmont 1:fdd22bb7aa52 95 * @} end of Radix4_CFFT_CIFFT group
emilmont 1:fdd22bb7aa52 96 */
emilmont 1:fdd22bb7aa52 97
emilmont 1:fdd22bb7aa52 98 /*
emilmont 1:fdd22bb7aa52 99 * Radix-4 FFT algorithm used is :
emilmont 1:fdd22bb7aa52 100 *
emilmont 1:fdd22bb7aa52 101 * Input real and imaginary data:
emilmont 1:fdd22bb7aa52 102 * x(n) = xa + j * ya
emilmont 1:fdd22bb7aa52 103 * x(n+N/4 ) = xb + j * yb
emilmont 1:fdd22bb7aa52 104 * x(n+N/2 ) = xc + j * yc
emilmont 1:fdd22bb7aa52 105 * x(n+3N 4) = xd + j * yd
emilmont 1:fdd22bb7aa52 106 *
emilmont 1:fdd22bb7aa52 107 *
emilmont 1:fdd22bb7aa52 108 * Output real and imaginary data:
emilmont 1:fdd22bb7aa52 109 * x(4r) = xa'+ j * ya'
emilmont 1:fdd22bb7aa52 110 * x(4r+1) = xb'+ j * yb'
emilmont 1:fdd22bb7aa52 111 * x(4r+2) = xc'+ j * yc'
emilmont 1:fdd22bb7aa52 112 * x(4r+3) = xd'+ j * yd'
emilmont 1:fdd22bb7aa52 113 *
emilmont 1:fdd22bb7aa52 114 *
emilmont 1:fdd22bb7aa52 115 * Twiddle factors for radix-4 FFT:
emilmont 1:fdd22bb7aa52 116 * Wn = co1 + j * (- si1)
emilmont 1:fdd22bb7aa52 117 * W2n = co2 + j * (- si2)
emilmont 1:fdd22bb7aa52 118 * W3n = co3 + j * (- si3)
emilmont 1:fdd22bb7aa52 119
emilmont 1:fdd22bb7aa52 120 * The real and imaginary output values for the radix-4 butterfly are
emilmont 1:fdd22bb7aa52 121 * xa' = xa + xb + xc + xd
emilmont 1:fdd22bb7aa52 122 * ya' = ya + yb + yc + yd
emilmont 1:fdd22bb7aa52 123 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
emilmont 1:fdd22bb7aa52 124 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
emilmont 1:fdd22bb7aa52 125 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
emilmont 1:fdd22bb7aa52 126 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
emilmont 1:fdd22bb7aa52 127 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
emilmont 1:fdd22bb7aa52 128 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
emilmont 1:fdd22bb7aa52 129 *
emilmont 1:fdd22bb7aa52 130 */
emilmont 1:fdd22bb7aa52 131
emilmont 1:fdd22bb7aa52 132 /**
emilmont 1:fdd22bb7aa52 133 * @brief Core function for the Q15 CFFT butterfly process.
emilmont 1:fdd22bb7aa52 134 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
emilmont 1:fdd22bb7aa52 135 * @param[in] fftLen length of the FFT.
emilmont 1:fdd22bb7aa52 136 * @param[in] *pCoef16 points to twiddle coefficient buffer.
emilmont 1:fdd22bb7aa52 137 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emilmont 1:fdd22bb7aa52 138 * @return none.
emilmont 1:fdd22bb7aa52 139 */
emilmont 1:fdd22bb7aa52 140
emilmont 1:fdd22bb7aa52 141 void arm_radix4_butterfly_q15(
emilmont 1:fdd22bb7aa52 142 q15_t * pSrc16,
emilmont 1:fdd22bb7aa52 143 uint32_t fftLen,
emilmont 1:fdd22bb7aa52 144 q15_t * pCoef16,
emilmont 1:fdd22bb7aa52 145 uint32_t twidCoefModifier)
emilmont 1:fdd22bb7aa52 146 {
emilmont 1:fdd22bb7aa52 147
emilmont 1:fdd22bb7aa52 148 #ifndef ARM_MATH_CM0
emilmont 1:fdd22bb7aa52 149
emilmont 1:fdd22bb7aa52 150 /* Run the below code for Cortex-M4 and Cortex-M3 */
emilmont 1:fdd22bb7aa52 151
emilmont 1:fdd22bb7aa52 152 q31_t R, S, T, U;
emilmont 1:fdd22bb7aa52 153 q31_t C1, C2, C3, out1, out2;
emilmont 1:fdd22bb7aa52 154 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 155 q15_t in;
emilmont 1:fdd22bb7aa52 156
emilmont 1:fdd22bb7aa52 157 q15_t *ptr1;
emilmont 1:fdd22bb7aa52 158
emilmont 1:fdd22bb7aa52 159
emilmont 1:fdd22bb7aa52 160
emilmont 1:fdd22bb7aa52 161 q31_t xaya, xbyb, xcyc, xdyd;
emilmont 1:fdd22bb7aa52 162
emilmont 1:fdd22bb7aa52 163 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 164
emilmont 1:fdd22bb7aa52 165 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 166
emilmont 1:fdd22bb7aa52 167 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 168 n2 = fftLen;
emilmont 1:fdd22bb7aa52 169 n1 = n2;
emilmont 1:fdd22bb7aa52 170
emilmont 1:fdd22bb7aa52 171 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 172 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 173
emilmont 1:fdd22bb7aa52 174 /* Index for twiddle coefficient */
emilmont 1:fdd22bb7aa52 175 ic = 0u;
emilmont 1:fdd22bb7aa52 176
emilmont 1:fdd22bb7aa52 177 /* Index for input read and output write */
emilmont 1:fdd22bb7aa52 178 i0 = 0u;
emilmont 1:fdd22bb7aa52 179 j = n2;
emilmont 1:fdd22bb7aa52 180
emilmont 1:fdd22bb7aa52 181 /* Input is in 1.15(q15) format */
emilmont 1:fdd22bb7aa52 182
emilmont 1:fdd22bb7aa52 183 /* start of first stage process */
emilmont 1:fdd22bb7aa52 184 do
emilmont 1:fdd22bb7aa52 185 {
emilmont 1:fdd22bb7aa52 186 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 187
emilmont 1:fdd22bb7aa52 188 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 189 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 190 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 191 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 192 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 193
emilmont 1:fdd22bb7aa52 194 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 195 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 196 T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
emilmont 1:fdd22bb7aa52 197 in = ((int16_t) (T & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 198 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 199
emilmont 1:fdd22bb7aa52 200 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 201 S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
emilmont 1:fdd22bb7aa52 202 in = ((int16_t) (S & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 203 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 204
emilmont 1:fdd22bb7aa52 205 /* R = packed((ya + yc), (xa + xc) ) */
emilmont 1:fdd22bb7aa52 206 R = __QADD16(T, S);
emilmont 1:fdd22bb7aa52 207
emilmont 1:fdd22bb7aa52 208 /* S = packed((ya - yc), (xa - xc) ) */
emilmont 1:fdd22bb7aa52 209 S = __QSUB16(T, S);
emilmont 1:fdd22bb7aa52 210
emilmont 1:fdd22bb7aa52 211 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 212 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 213 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 214 in = ((int16_t) (T & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 215 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 216
emilmont 1:fdd22bb7aa52 217 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 218 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 219 in = ((int16_t) (U & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 220 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 221
emilmont 1:fdd22bb7aa52 222 /* T = packed((yb + yd), (xb + xd) ) */
emilmont 1:fdd22bb7aa52 223 T = __QADD16(T, U);
emilmont 1:fdd22bb7aa52 224
emilmont 1:fdd22bb7aa52 225 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 226 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 227 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 228 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T);
emilmont 1:fdd22bb7aa52 229
emilmont 1:fdd22bb7aa52 230 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
emilmont 1:fdd22bb7aa52 231 R = __QSUB16(R, T);
emilmont 1:fdd22bb7aa52 232
emilmont 1:fdd22bb7aa52 233 /* co2 & si2 are read from SIMD Coefficient pointer */
emilmont 1:fdd22bb7aa52 234 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
emilmont 1:fdd22bb7aa52 235
emilmont 1:fdd22bb7aa52 236 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 237
emilmont 1:fdd22bb7aa52 238 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 239 out1 = __SMUAD(C2, R) >> 16u;
emilmont 1:fdd22bb7aa52 240 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 241 out2 = __SMUSDX(C2, R);
emilmont 1:fdd22bb7aa52 242
emilmont 1:fdd22bb7aa52 243 #else
emilmont 1:fdd22bb7aa52 244
emilmont 1:fdd22bb7aa52 245 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 246 out1 = __SMUSDX(R, C2) >> 16u;
emilmont 1:fdd22bb7aa52 247 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 248 out2 = __SMUAD(C2, R);
emilmont 1:fdd22bb7aa52 249
emilmont 1:fdd22bb7aa52 250 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 251
emilmont 1:fdd22bb7aa52 252 /* Reading i0+fftLen/4 */
emilmont 1:fdd22bb7aa52 253 /* T = packed(yb, xb) */
emilmont 1:fdd22bb7aa52 254 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 255 in = ((int16_t) (T & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 256 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 257
emilmont 1:fdd22bb7aa52 258 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 259 /* writing output(xc', yc') in little endian format */
emilmont 1:fdd22bb7aa52 260 _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
emilmont 1:fdd22bb7aa52 261 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 262
emilmont 1:fdd22bb7aa52 263 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 264 /* U = packed(yd, xd) */
emilmont 1:fdd22bb7aa52 265 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 266 in = ((int16_t) (U & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 267 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 268
emilmont 1:fdd22bb7aa52 269 /* T = packed(yb-yd, xb-xd) */
emilmont 1:fdd22bb7aa52 270 T = __QSUB16(T, U);
emilmont 1:fdd22bb7aa52 271
emilmont 1:fdd22bb7aa52 272 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 273
emilmont 1:fdd22bb7aa52 274 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 275 R = __QASX(S, T);
emilmont 1:fdd22bb7aa52 276 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 277 S = __QSAX(S, T);
emilmont 1:fdd22bb7aa52 278
emilmont 1:fdd22bb7aa52 279 #else
emilmont 1:fdd22bb7aa52 280
emilmont 1:fdd22bb7aa52 281 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 282 R = __QSAX(S, T);
emilmont 1:fdd22bb7aa52 283 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 284 S = __QASX(S, T);
emilmont 1:fdd22bb7aa52 285
emilmont 1:fdd22bb7aa52 286 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 287
emilmont 1:fdd22bb7aa52 288 /* co1 & si1 are read from SIMD Coefficient pointer */
emilmont 1:fdd22bb7aa52 289 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
emilmont 1:fdd22bb7aa52 290 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 291
emilmont 1:fdd22bb7aa52 292 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 293
emilmont 1:fdd22bb7aa52 294 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 295 out1 = __SMUAD(C1, S) >> 16u;
emilmont 1:fdd22bb7aa52 296 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 297 out2 = __SMUSDX(C1, S);
emilmont 1:fdd22bb7aa52 298
emilmont 1:fdd22bb7aa52 299 #else
emilmont 1:fdd22bb7aa52 300
emilmont 1:fdd22bb7aa52 301 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 302 out1 = __SMUSDX(S, C1) >> 16u;
emilmont 1:fdd22bb7aa52 303 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 304 out2 = __SMUAD(C1, S);
emilmont 1:fdd22bb7aa52 305
emilmont 1:fdd22bb7aa52 306 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 307
emilmont 1:fdd22bb7aa52 308 /* writing output(xb', yb') in little endian format */
emilmont 1:fdd22bb7aa52 309 _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
emilmont 1:fdd22bb7aa52 310 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 311
emilmont 1:fdd22bb7aa52 312
emilmont 1:fdd22bb7aa52 313 /* co3 & si3 are read from SIMD Coefficient pointer */
emilmont 1:fdd22bb7aa52 314 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
emilmont 1:fdd22bb7aa52 315 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 316
emilmont 1:fdd22bb7aa52 317 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 318
emilmont 1:fdd22bb7aa52 319 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 320 out1 = __SMUAD(C3, R) >> 16u;
emilmont 1:fdd22bb7aa52 321 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 322 out2 = __SMUSDX(C3, R);
emilmont 1:fdd22bb7aa52 323
emilmont 1:fdd22bb7aa52 324 #else
emilmont 1:fdd22bb7aa52 325
emilmont 1:fdd22bb7aa52 326 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 327 out1 = __SMUSDX(R, C3) >> 16u;
emilmont 1:fdd22bb7aa52 328 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 329 out2 = __SMUAD(C3, R);
emilmont 1:fdd22bb7aa52 330
emilmont 1:fdd22bb7aa52 331 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 332
emilmont 1:fdd22bb7aa52 333 /* writing output(xd', yd') in little endian format */
emilmont 1:fdd22bb7aa52 334 _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
emilmont 1:fdd22bb7aa52 335 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 336
emilmont 1:fdd22bb7aa52 337 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 338 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 339
emilmont 1:fdd22bb7aa52 340 /* Updating input index */
emilmont 1:fdd22bb7aa52 341 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 342
emilmont 1:fdd22bb7aa52 343 } while(--j);
emilmont 1:fdd22bb7aa52 344 /* data is in 4.11(q11) format */
emilmont 1:fdd22bb7aa52 345
emilmont 1:fdd22bb7aa52 346 /* end of first stage process */
emilmont 1:fdd22bb7aa52 347
emilmont 1:fdd22bb7aa52 348
emilmont 1:fdd22bb7aa52 349 /* start of middle stage process */
emilmont 1:fdd22bb7aa52 350
emilmont 1:fdd22bb7aa52 351 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 352 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 353
emilmont 1:fdd22bb7aa52 354 /* Calculation of Middle stage */
emilmont 1:fdd22bb7aa52 355 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 356 {
emilmont 1:fdd22bb7aa52 357 /* Initializations for the middle stage */
emilmont 1:fdd22bb7aa52 358 n1 = n2;
emilmont 1:fdd22bb7aa52 359 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 360 ic = 0u;
emilmont 1:fdd22bb7aa52 361
emilmont 1:fdd22bb7aa52 362 for (j = 0u; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 363 {
emilmont 1:fdd22bb7aa52 364 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 365 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
emilmont 1:fdd22bb7aa52 366 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
emilmont 1:fdd22bb7aa52 367 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
emilmont 1:fdd22bb7aa52 368
emilmont 1:fdd22bb7aa52 369 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 370 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 371
emilmont 1:fdd22bb7aa52 372 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 373 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 374 {
emilmont 1:fdd22bb7aa52 375 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 376 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 377 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 378 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 379 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 380
emilmont 1:fdd22bb7aa52 381 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 382 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 383 T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
emilmont 1:fdd22bb7aa52 384
emilmont 1:fdd22bb7aa52 385 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 386 S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
emilmont 1:fdd22bb7aa52 387
emilmont 1:fdd22bb7aa52 388 /* R = packed( (ya + yc), (xa + xc)) */
emilmont 1:fdd22bb7aa52 389 R = __QADD16(T, S);
emilmont 1:fdd22bb7aa52 390
emilmont 1:fdd22bb7aa52 391 /* S = packed((ya - yc), (xa - xc)) */
emilmont 1:fdd22bb7aa52 392 S = __QSUB16(T, S);
emilmont 1:fdd22bb7aa52 393
emilmont 1:fdd22bb7aa52 394 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 395 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 396 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 397
emilmont 1:fdd22bb7aa52 398 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 399 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 400
emilmont 1:fdd22bb7aa52 401 /* T = packed( (yb + yd), (xb + xd)) */
emilmont 1:fdd22bb7aa52 402 T = __QADD16(T, U);
emilmont 1:fdd22bb7aa52 403
emilmont 1:fdd22bb7aa52 404 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 405
emilmont 1:fdd22bb7aa52 406 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 407 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 408 out1 = __SHADD16(R, T);
emilmont 1:fdd22bb7aa52 409 in = ((int16_t) (out1 & 0xFFFF)) >> 1;
emilmont 1:fdd22bb7aa52 410 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 411 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1;
emilmont 1:fdd22bb7aa52 412
emilmont 1:fdd22bb7aa52 413 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
emilmont 1:fdd22bb7aa52 414 R = __SHSUB16(R, T);
emilmont 1:fdd22bb7aa52 415
emilmont 1:fdd22bb7aa52 416 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 417
emilmont 1:fdd22bb7aa52 418 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
emilmont 1:fdd22bb7aa52 419 out1 = __SMUAD(C2, R) >> 16u;
emilmont 1:fdd22bb7aa52 420
emilmont 1:fdd22bb7aa52 421 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 422 out2 = __SMUSDX(C2, R);
emilmont 1:fdd22bb7aa52 423
emilmont 1:fdd22bb7aa52 424 #else
emilmont 1:fdd22bb7aa52 425
emilmont 1:fdd22bb7aa52 426 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 427 out1 = __SMUSDX(R, C2) >> 16u;
emilmont 1:fdd22bb7aa52 428
emilmont 1:fdd22bb7aa52 429 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
emilmont 1:fdd22bb7aa52 430 out2 = __SMUAD(C2, R);
emilmont 1:fdd22bb7aa52 431
emilmont 1:fdd22bb7aa52 432 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 433
emilmont 1:fdd22bb7aa52 434 /* Reading i0+3fftLen/4 */
emilmont 1:fdd22bb7aa52 435 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 436 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 437
emilmont 1:fdd22bb7aa52 438 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 439 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 440 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 441 _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
emilmont 1:fdd22bb7aa52 442 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 443
emilmont 1:fdd22bb7aa52 444 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 445
emilmont 1:fdd22bb7aa52 446 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 447 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 448
emilmont 1:fdd22bb7aa52 449 /* T = packed(yb-yd, xb-xd) */
emilmont 1:fdd22bb7aa52 450 T = __QSUB16(T, U);
emilmont 1:fdd22bb7aa52 451
emilmont 1:fdd22bb7aa52 452 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 453
emilmont 1:fdd22bb7aa52 454 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 455 R = __SHASX(S, T);
emilmont 1:fdd22bb7aa52 456
emilmont 1:fdd22bb7aa52 457 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 458 S = __SHSAX(S, T);
emilmont 1:fdd22bb7aa52 459
emilmont 1:fdd22bb7aa52 460
emilmont 1:fdd22bb7aa52 461 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 462 out1 = __SMUAD(C1, S) >> 16u;
emilmont 1:fdd22bb7aa52 463 out2 = __SMUSDX(C1, S);
emilmont 1:fdd22bb7aa52 464
emilmont 1:fdd22bb7aa52 465 #else
emilmont 1:fdd22bb7aa52 466
emilmont 1:fdd22bb7aa52 467 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 468 R = __SHSAX(S, T);
emilmont 1:fdd22bb7aa52 469
emilmont 1:fdd22bb7aa52 470 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 471 S = __SHASX(S, T);
emilmont 1:fdd22bb7aa52 472
emilmont 1:fdd22bb7aa52 473
emilmont 1:fdd22bb7aa52 474 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 475 out1 = __SMUSDX(S, C1) >> 16u;
emilmont 1:fdd22bb7aa52 476 out2 = __SMUAD(C1, S);
emilmont 1:fdd22bb7aa52 477
emilmont 1:fdd22bb7aa52 478 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 479
emilmont 1:fdd22bb7aa52 480 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 481 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 482 _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
emilmont 1:fdd22bb7aa52 483 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 484
emilmont 1:fdd22bb7aa52 485 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 486
emilmont 1:fdd22bb7aa52 487 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 488
emilmont 1:fdd22bb7aa52 489 out1 = __SMUAD(C3, R) >> 16u;
emilmont 1:fdd22bb7aa52 490 out2 = __SMUSDX(C3, R);
emilmont 1:fdd22bb7aa52 491
emilmont 1:fdd22bb7aa52 492 #else
emilmont 1:fdd22bb7aa52 493
emilmont 1:fdd22bb7aa52 494 out1 = __SMUSDX(R, C3) >> 16u;
emilmont 1:fdd22bb7aa52 495 out2 = __SMUAD(C3, R);
emilmont 1:fdd22bb7aa52 496
emilmont 1:fdd22bb7aa52 497 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 498
emilmont 1:fdd22bb7aa52 499 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 500 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 501 _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
emilmont 1:fdd22bb7aa52 502 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 503 }
emilmont 1:fdd22bb7aa52 504 }
emilmont 1:fdd22bb7aa52 505 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 506 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 507 }
emilmont 1:fdd22bb7aa52 508 /* end of middle stage process */
emilmont 1:fdd22bb7aa52 509
emilmont 1:fdd22bb7aa52 510
emilmont 1:fdd22bb7aa52 511 /* data is in 10.6(q6) format for the 1024 point */
emilmont 1:fdd22bb7aa52 512 /* data is in 8.8(q8) format for the 256 point */
emilmont 1:fdd22bb7aa52 513 /* data is in 6.10(q10) format for the 64 point */
emilmont 1:fdd22bb7aa52 514 /* data is in 4.12(q12) format for the 16 point */
emilmont 1:fdd22bb7aa52 515
emilmont 1:fdd22bb7aa52 516 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 517 j = fftLen >> 2;
emilmont 1:fdd22bb7aa52 518
emilmont 1:fdd22bb7aa52 519 ptr1 = &pSrc16[0];
emilmont 1:fdd22bb7aa52 520
emilmont 1:fdd22bb7aa52 521 /* start of last stage process */
emilmont 1:fdd22bb7aa52 522
emilmont 1:fdd22bb7aa52 523 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 524 do
emilmont 1:fdd22bb7aa52 525 {
emilmont 1:fdd22bb7aa52 526 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 527 xaya = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 528
emilmont 1:fdd22bb7aa52 529 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 530 xbyb = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 531
emilmont 1:fdd22bb7aa52 532 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 533 xcyc = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 534
emilmont 1:fdd22bb7aa52 535 /* Read xd (real), yd(imag) input */
emilmont 1:fdd22bb7aa52 536 xdyd = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 537
emilmont 1:fdd22bb7aa52 538 /* R = packed((ya + yc), (xa + xc)) */
emilmont 1:fdd22bb7aa52 539 R = __QADD16(xaya, xcyc);
emilmont 1:fdd22bb7aa52 540
emilmont 1:fdd22bb7aa52 541 /* T = packed((yb + yd), (xb + xd)) */
emilmont 1:fdd22bb7aa52 542 T = __QADD16(xbyb, xdyd);
emilmont 1:fdd22bb7aa52 543
emilmont 1:fdd22bb7aa52 544 /* pointer updation for writing */
emilmont 1:fdd22bb7aa52 545 ptr1 = ptr1 - 8u;
emilmont 1:fdd22bb7aa52 546
emilmont 1:fdd22bb7aa52 547
emilmont 1:fdd22bb7aa52 548 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 549 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 550 *__SIMD32(ptr1)++ = __SHADD16(R, T);
emilmont 1:fdd22bb7aa52 551
emilmont 1:fdd22bb7aa52 552 /* T = packed((yb + yd), (xb + xd)) */
emilmont 1:fdd22bb7aa52 553 T = __QADD16(xbyb, xdyd);
emilmont 1:fdd22bb7aa52 554
emilmont 1:fdd22bb7aa52 555 /* xc' = (xa-xb+xc-xd) */
emilmont 1:fdd22bb7aa52 556 /* yc' = (ya-yb+yc-yd) */
emilmont 1:fdd22bb7aa52 557 *__SIMD32(ptr1)++ = __SHSUB16(R, T);
emilmont 1:fdd22bb7aa52 558
emilmont 1:fdd22bb7aa52 559 /* S = packed((ya - yc), (xa - xc)) */
emilmont 1:fdd22bb7aa52 560 S = __QSUB16(xaya, xcyc);
emilmont 1:fdd22bb7aa52 561
emilmont 1:fdd22bb7aa52 562 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 563 /* T = packed( (yb - yd), (xb - xd)) */
emilmont 1:fdd22bb7aa52 564 U = __QSUB16(xbyb, xdyd);
emilmont 1:fdd22bb7aa52 565
emilmont 1:fdd22bb7aa52 566 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 567
emilmont 1:fdd22bb7aa52 568 /* xb' = (xa+yb-xc-yd) */
emilmont 1:fdd22bb7aa52 569 /* yb' = (ya-xb-yc+xd) */
emilmont 1:fdd22bb7aa52 570 *__SIMD32(ptr1)++ = __SHSAX(S, U);
emilmont 1:fdd22bb7aa52 571
emilmont 1:fdd22bb7aa52 572
emilmont 1:fdd22bb7aa52 573 /* xd' = (xa-yb-xc+yd) */
emilmont 1:fdd22bb7aa52 574 /* yd' = (ya+xb-yc-xd) */
emilmont 1:fdd22bb7aa52 575 *__SIMD32(ptr1)++ = __SHASX(S, U);
emilmont 1:fdd22bb7aa52 576
emilmont 1:fdd22bb7aa52 577 #else
emilmont 1:fdd22bb7aa52 578
emilmont 1:fdd22bb7aa52 579 /* xb' = (xa+yb-xc-yd) */
emilmont 1:fdd22bb7aa52 580 /* yb' = (ya-xb-yc+xd) */
emilmont 1:fdd22bb7aa52 581 *__SIMD32(ptr1)++ = __SHASX(S, U);
emilmont 1:fdd22bb7aa52 582
emilmont 1:fdd22bb7aa52 583
emilmont 1:fdd22bb7aa52 584 /* xd' = (xa-yb-xc+yd) */
emilmont 1:fdd22bb7aa52 585 /* yd' = (ya+xb-yc-xd) */
emilmont 1:fdd22bb7aa52 586 *__SIMD32(ptr1)++ = __SHSAX(S, U);
emilmont 1:fdd22bb7aa52 587
emilmont 1:fdd22bb7aa52 588 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 589
emilmont 1:fdd22bb7aa52 590 } while(--j);
emilmont 1:fdd22bb7aa52 591
emilmont 1:fdd22bb7aa52 592 /* end of last stage process */
emilmont 1:fdd22bb7aa52 593
emilmont 1:fdd22bb7aa52 594 /* output is in 11.5(q5) format for the 1024 point */
emilmont 1:fdd22bb7aa52 595 /* output is in 9.7(q7) format for the 256 point */
emilmont 1:fdd22bb7aa52 596 /* output is in 7.9(q9) format for the 64 point */
emilmont 1:fdd22bb7aa52 597 /* output is in 5.11(q11) format for the 16 point */
emilmont 1:fdd22bb7aa52 598
emilmont 1:fdd22bb7aa52 599
emilmont 1:fdd22bb7aa52 600 #else
emilmont 1:fdd22bb7aa52 601
emilmont 1:fdd22bb7aa52 602 /* Run the below code for Cortex-M0 */
emilmont 1:fdd22bb7aa52 603
emilmont 1:fdd22bb7aa52 604 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
emilmont 1:fdd22bb7aa52 605 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
emilmont 1:fdd22bb7aa52 606 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 607
emilmont 1:fdd22bb7aa52 608 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 609
emilmont 1:fdd22bb7aa52 610 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 611
emilmont 1:fdd22bb7aa52 612 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 613 n2 = fftLen;
emilmont 1:fdd22bb7aa52 614 n1 = n2;
emilmont 1:fdd22bb7aa52 615
emilmont 1:fdd22bb7aa52 616 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 617 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 618
emilmont 1:fdd22bb7aa52 619 /* Index for twiddle coefficient */
emilmont 1:fdd22bb7aa52 620 ic = 0u;
emilmont 1:fdd22bb7aa52 621
emilmont 1:fdd22bb7aa52 622 /* Index for input read and output write */
emilmont 1:fdd22bb7aa52 623 i0 = 0u;
emilmont 1:fdd22bb7aa52 624 j = n2;
emilmont 1:fdd22bb7aa52 625
emilmont 1:fdd22bb7aa52 626 /* Input is in 1.15(q15) format */
emilmont 1:fdd22bb7aa52 627
emilmont 1:fdd22bb7aa52 628 /* start of first stage process */
emilmont 1:fdd22bb7aa52 629 do
emilmont 1:fdd22bb7aa52 630 {
emilmont 1:fdd22bb7aa52 631 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 632
emilmont 1:fdd22bb7aa52 633 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 634 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 635 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 636 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 637 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 638
emilmont 1:fdd22bb7aa52 639 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 640
emilmont 1:fdd22bb7aa52 641 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 642 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 643 T0 = pSrc16[i0 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 644 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 645
emilmont 1:fdd22bb7aa52 646 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 647 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 648 S0 = pSrc16[i2 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 649 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 650
emilmont 1:fdd22bb7aa52 651 /* R0 = (ya + yc) */
emilmont 1:fdd22bb7aa52 652 R0 = __SSAT(T0 + S0, 16u);
emilmont 1:fdd22bb7aa52 653 /* R1 = (xa + xc) */
emilmont 1:fdd22bb7aa52 654 R1 = __SSAT(T1 + S1, 16u);
emilmont 1:fdd22bb7aa52 655
emilmont 1:fdd22bb7aa52 656 /* S0 = (ya - yc) */
emilmont 1:fdd22bb7aa52 657 S0 = __SSAT(T0 - S0, 16);
emilmont 1:fdd22bb7aa52 658 /* S1 = (xa - xc) */
emilmont 1:fdd22bb7aa52 659 S1 = __SSAT(T1 - S1, 16);
emilmont 1:fdd22bb7aa52 660
emilmont 1:fdd22bb7aa52 661 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 662 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 663 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 664 T0 = pSrc16[i1 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 665 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 666
emilmont 1:fdd22bb7aa52 667 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 668 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 669 U0 = pSrc16[i3 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 670 U1 = pSrc16[(i3 * 2u) + 1] >> 2u;
emilmont 1:fdd22bb7aa52 671
emilmont 1:fdd22bb7aa52 672 /* T0 = (yb + yd) */
emilmont 1:fdd22bb7aa52 673 T0 = __SSAT(T0 + U0, 16u);
emilmont 1:fdd22bb7aa52 674 /* T1 = (xb + xd) */
emilmont 1:fdd22bb7aa52 675 T1 = __SSAT(T1 + U1, 16u);
emilmont 1:fdd22bb7aa52 676
emilmont 1:fdd22bb7aa52 677 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 678 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 679 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 680 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 681 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 682
emilmont 1:fdd22bb7aa52 683 /* R0 = (ya + yc) - (yb + yd) */
emilmont 1:fdd22bb7aa52 684 /* R1 = (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 685 R0 = __SSAT(R0 - T0, 16u);
emilmont 1:fdd22bb7aa52 686 R1 = __SSAT(R1 - T1, 16u);
emilmont 1:fdd22bb7aa52 687
emilmont 1:fdd22bb7aa52 688 /* co2 & si2 are read from Coefficient pointer */
emilmont 1:fdd22bb7aa52 689 Co2 = pCoef16[2u * ic * 2u];
emilmont 1:fdd22bb7aa52 690 Si2 = pCoef16[(2u * ic * 2u) + 1];
emilmont 1:fdd22bb7aa52 691
emilmont 1:fdd22bb7aa52 692 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 693 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 694 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 695 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 696
emilmont 1:fdd22bb7aa52 697 /* Reading i0+fftLen/4 */
emilmont 1:fdd22bb7aa52 698 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 699 /* T0 = yb, T1 = xb */
emilmont 1:fdd22bb7aa52 700 T0 = pSrc16[i1 * 2u] >> 2;
emilmont 1:fdd22bb7aa52 701 T1 = pSrc16[(i1 * 2u) + 1] >> 2;
emilmont 1:fdd22bb7aa52 702
emilmont 1:fdd22bb7aa52 703 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 704 /* writing output(xc', yc') in little endian format */
emilmont 1:fdd22bb7aa52 705 pSrc16[i1 * 2u] = out1;
emilmont 1:fdd22bb7aa52 706 pSrc16[(i1 * 2u) + 1] = out2;
emilmont 1:fdd22bb7aa52 707
emilmont 1:fdd22bb7aa52 708 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 709 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 710 /* U0 = yd, U1 = xd */
emilmont 1:fdd22bb7aa52 711 U0 = pSrc16[i3 * 2u] >> 2;
emilmont 1:fdd22bb7aa52 712 U1 = pSrc16[(i3 * 2u) + 1] >> 2;
emilmont 1:fdd22bb7aa52 713 /* T0 = yb-yd */
emilmont 1:fdd22bb7aa52 714 T0 = __SSAT(T0 - U0, 16);
emilmont 1:fdd22bb7aa52 715 /* T1 = xb-xd */
emilmont 1:fdd22bb7aa52 716 T1 = __SSAT(T1 - U1, 16);
emilmont 1:fdd22bb7aa52 717
emilmont 1:fdd22bb7aa52 718 /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 719 R0 = (short) __SSAT((q31_t) (S0 - T1), 16);
emilmont 1:fdd22bb7aa52 720 R1 = (short) __SSAT((q31_t) (S1 + T0), 16);
emilmont 1:fdd22bb7aa52 721
emilmont 1:fdd22bb7aa52 722 /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 723 S0 = (short) __SSAT(((q31_t) S0 + T1), 16u);
emilmont 1:fdd22bb7aa52 724 S1 = (short) __SSAT(((q31_t) S1 - T0), 16u);
emilmont 1:fdd22bb7aa52 725
emilmont 1:fdd22bb7aa52 726 /* co1 & si1 are read from Coefficient pointer */
emilmont 1:fdd22bb7aa52 727 Co1 = pCoef16[ic * 2u];
emilmont 1:fdd22bb7aa52 728 Si1 = pCoef16[(ic * 2u) + 1];
emilmont 1:fdd22bb7aa52 729 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 730 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 731 out1 = (short) ((Si1 * S1 + Co1 * S0) >> 16);
emilmont 1:fdd22bb7aa52 732 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 733 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16);
emilmont 1:fdd22bb7aa52 734
emilmont 1:fdd22bb7aa52 735 /* writing output(xb', yb') in little endian format */
emilmont 1:fdd22bb7aa52 736 pSrc16[i2 * 2u] = out1;
emilmont 1:fdd22bb7aa52 737 pSrc16[(i2 * 2u) + 1] = out2;
emilmont 1:fdd22bb7aa52 738
emilmont 1:fdd22bb7aa52 739 /* Co3 & si3 are read from Coefficient pointer */
emilmont 1:fdd22bb7aa52 740 Co3 = pCoef16[3u * (ic * 2u)];
emilmont 1:fdd22bb7aa52 741 Si3 = pCoef16[(3u * (ic * 2u)) + 1];
emilmont 1:fdd22bb7aa52 742 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 743 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 744 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
emilmont 1:fdd22bb7aa52 745 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 746 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 747 /* writing output(xd', yd') in little endian format */
emilmont 1:fdd22bb7aa52 748 pSrc16[i3 * 2u] = out1;
emilmont 1:fdd22bb7aa52 749 pSrc16[(i3 * 2u) + 1] = out2;
emilmont 1:fdd22bb7aa52 750
emilmont 1:fdd22bb7aa52 751 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 752 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 753
emilmont 1:fdd22bb7aa52 754 /* Updating input index */
emilmont 1:fdd22bb7aa52 755 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 756
emilmont 1:fdd22bb7aa52 757 } while(--j);
emilmont 1:fdd22bb7aa52 758 /* data is in 4.11(q11) format */
emilmont 1:fdd22bb7aa52 759
emilmont 1:fdd22bb7aa52 760 /* end of first stage process */
emilmont 1:fdd22bb7aa52 761
emilmont 1:fdd22bb7aa52 762
emilmont 1:fdd22bb7aa52 763 /* start of middle stage process */
emilmont 1:fdd22bb7aa52 764
emilmont 1:fdd22bb7aa52 765 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 766 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 767
emilmont 1:fdd22bb7aa52 768 /* Calculation of Middle stage */
emilmont 1:fdd22bb7aa52 769 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 770 {
emilmont 1:fdd22bb7aa52 771 /* Initializations for the middle stage */
emilmont 1:fdd22bb7aa52 772 n1 = n2;
emilmont 1:fdd22bb7aa52 773 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 774 ic = 0u;
emilmont 1:fdd22bb7aa52 775
emilmont 1:fdd22bb7aa52 776 for (j = 0u; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 777 {
emilmont 1:fdd22bb7aa52 778 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 779 Co1 = pCoef16[ic * 2u];
emilmont 1:fdd22bb7aa52 780 Si1 = pCoef16[(ic * 2u) + 1u];
emilmont 1:fdd22bb7aa52 781 Co2 = pCoef16[2u * (ic * 2u)];
emilmont 1:fdd22bb7aa52 782 Si2 = pCoef16[(2u * (ic * 2u)) + 1u];
emilmont 1:fdd22bb7aa52 783 Co3 = pCoef16[3u * (ic * 2u)];
emilmont 1:fdd22bb7aa52 784 Si3 = pCoef16[(3u * (ic * 2u)) + 1u];
emilmont 1:fdd22bb7aa52 785
emilmont 1:fdd22bb7aa52 786 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 787 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 788
emilmont 1:fdd22bb7aa52 789 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 790 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 791 {
emilmont 1:fdd22bb7aa52 792 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 793 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 794 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 795 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 796 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 797
emilmont 1:fdd22bb7aa52 798 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 799 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 800 T0 = pSrc16[i0 * 2u];
emilmont 1:fdd22bb7aa52 801 T1 = pSrc16[(i0 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 802
emilmont 1:fdd22bb7aa52 803 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 804 S0 = pSrc16[i2 * 2u];
emilmont 1:fdd22bb7aa52 805 S1 = pSrc16[(i2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 806
emilmont 1:fdd22bb7aa52 807 /* R0 = (ya + yc), R1 = (xa + xc) */
emilmont 1:fdd22bb7aa52 808 R0 = __SSAT(T0 + S0, 16);
emilmont 1:fdd22bb7aa52 809 R1 = __SSAT(T1 + S1, 16);
emilmont 1:fdd22bb7aa52 810
emilmont 1:fdd22bb7aa52 811 /* S0 = (ya - yc), S1 =(xa - xc) */
emilmont 1:fdd22bb7aa52 812 S0 = __SSAT(T0 - S0, 16);
emilmont 1:fdd22bb7aa52 813 S1 = __SSAT(T1 - S1, 16);
emilmont 1:fdd22bb7aa52 814
emilmont 1:fdd22bb7aa52 815 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 816 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 817 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 818 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 819
emilmont 1:fdd22bb7aa52 820 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 821 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 822 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 823
emilmont 1:fdd22bb7aa52 824
emilmont 1:fdd22bb7aa52 825 /* T0 = (yb + yd), T1 = (xb + xd) */
emilmont 1:fdd22bb7aa52 826 T0 = __SSAT(T0 + U0, 16);
emilmont 1:fdd22bb7aa52 827 T1 = __SSAT(T1 + U1, 16);
emilmont 1:fdd22bb7aa52 828
emilmont 1:fdd22bb7aa52 829 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 830
emilmont 1:fdd22bb7aa52 831 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 832 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 833 out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
emilmont 1:fdd22bb7aa52 834 out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
emilmont 1:fdd22bb7aa52 835
emilmont 1:fdd22bb7aa52 836 pSrc16[i0 * 2u] = out1;
emilmont 1:fdd22bb7aa52 837 pSrc16[(2u * i0) + 1u] = out2;
emilmont 1:fdd22bb7aa52 838
emilmont 1:fdd22bb7aa52 839 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 840 R0 = (R0 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 841 R1 = (R1 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 842
emilmont 1:fdd22bb7aa52 843 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
emilmont 1:fdd22bb7aa52 844 out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 845
emilmont 1:fdd22bb7aa52 846 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 847 out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 848
emilmont 1:fdd22bb7aa52 849 /* Reading i0+3fftLen/4 */
emilmont 1:fdd22bb7aa52 850 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 851 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 852 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 853
emilmont 1:fdd22bb7aa52 854 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 855 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 856 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 857 pSrc16[i1 * 2u] = out1;
emilmont 1:fdd22bb7aa52 858 pSrc16[(i1 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 859
emilmont 1:fdd22bb7aa52 860 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 861
emilmont 1:fdd22bb7aa52 862 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 863 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 864 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 865
emilmont 1:fdd22bb7aa52 866 /* T0 = yb-yd, T1 = xb-xd */
emilmont 1:fdd22bb7aa52 867 T0 = __SSAT(T0 - U0, 16);
emilmont 1:fdd22bb7aa52 868 T1 = __SSAT(T1 - U1, 16);
emilmont 1:fdd22bb7aa52 869
emilmont 1:fdd22bb7aa52 870 /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 871 R0 = (S0 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 872 R1 = (S1 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 873
emilmont 1:fdd22bb7aa52 874 /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 875 S0 = (S0 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 876 S1 = (S1 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 877
emilmont 1:fdd22bb7aa52 878 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 879 out1 = (short) ((Co1 * S0 + Si1 * S1) >> 16u);
emilmont 1:fdd22bb7aa52 880
emilmont 1:fdd22bb7aa52 881 out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16u);
emilmont 1:fdd22bb7aa52 882
emilmont 1:fdd22bb7aa52 883 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 884 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 885 pSrc16[i2 * 2u] = out1;
emilmont 1:fdd22bb7aa52 886 pSrc16[(i2 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 887
emilmont 1:fdd22bb7aa52 888 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 889 out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
emilmont 1:fdd22bb7aa52 890
emilmont 1:fdd22bb7aa52 891 out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 892 /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 893 /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 894 pSrc16[i3 * 2u] = out1;
emilmont 1:fdd22bb7aa52 895 pSrc16[(i3 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 896 }
emilmont 1:fdd22bb7aa52 897 }
emilmont 1:fdd22bb7aa52 898 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 899 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 900 }
emilmont 1:fdd22bb7aa52 901 /* end of middle stage process */
emilmont 1:fdd22bb7aa52 902
emilmont 1:fdd22bb7aa52 903
emilmont 1:fdd22bb7aa52 904 /* data is in 10.6(q6) format for the 1024 point */
emilmont 1:fdd22bb7aa52 905 /* data is in 8.8(q8) format for the 256 point */
emilmont 1:fdd22bb7aa52 906 /* data is in 6.10(q10) format for the 64 point */
emilmont 1:fdd22bb7aa52 907 /* data is in 4.12(q12) format for the 16 point */
emilmont 1:fdd22bb7aa52 908
emilmont 1:fdd22bb7aa52 909 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 910 n1 = n2;
emilmont 1:fdd22bb7aa52 911 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 912
emilmont 1:fdd22bb7aa52 913 /* start of last stage process */
emilmont 1:fdd22bb7aa52 914
emilmont 1:fdd22bb7aa52 915 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 916 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
emilmont 1:fdd22bb7aa52 917 {
emilmont 1:fdd22bb7aa52 918 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 919 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 920 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 921 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 922 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 923
emilmont 1:fdd22bb7aa52 924 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 925 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 926 T0 = pSrc16[i0 * 2u];
emilmont 1:fdd22bb7aa52 927 T1 = pSrc16[(i0 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 928
emilmont 1:fdd22bb7aa52 929 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 930 S0 = pSrc16[i2 * 2u];
emilmont 1:fdd22bb7aa52 931 S1 = pSrc16[(i2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 932
emilmont 1:fdd22bb7aa52 933 /* R0 = (ya + yc), R1 = (xa + xc) */
emilmont 1:fdd22bb7aa52 934 R0 = __SSAT(T0 + S0, 16u);
emilmont 1:fdd22bb7aa52 935 R1 = __SSAT(T1 + S1, 16u);
emilmont 1:fdd22bb7aa52 936
emilmont 1:fdd22bb7aa52 937 /* S0 = (ya - yc), S1 = (xa - xc) */
emilmont 1:fdd22bb7aa52 938 S0 = __SSAT(T0 - S0, 16u);
emilmont 1:fdd22bb7aa52 939 S1 = __SSAT(T1 - S1, 16u);
emilmont 1:fdd22bb7aa52 940
emilmont 1:fdd22bb7aa52 941 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 942 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 943 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 944 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 945 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 946 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 947 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 948
emilmont 1:fdd22bb7aa52 949 /* T0 = (yb + yd), T1 = (xb + xd)) */
emilmont 1:fdd22bb7aa52 950 T0 = __SSAT(T0 + U0, 16u);
emilmont 1:fdd22bb7aa52 951 T1 = __SSAT(T1 + U1, 16u);
emilmont 1:fdd22bb7aa52 952
emilmont 1:fdd22bb7aa52 953 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 954 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 955 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 956 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 957 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 958
emilmont 1:fdd22bb7aa52 959 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 960 R0 = (R0 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 961 R1 = (R1 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 962 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 963 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 964 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 965
emilmont 1:fdd22bb7aa52 966 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 967 /* xc' = (xa-xb+xc-xd) */
emilmont 1:fdd22bb7aa52 968 /* yc' = (ya-yb+yc-yd) */
emilmont 1:fdd22bb7aa52 969 pSrc16[i1 * 2u] = R0;
emilmont 1:fdd22bb7aa52 970 pSrc16[(i1 * 2u) + 1u] = R1;
emilmont 1:fdd22bb7aa52 971
emilmont 1:fdd22bb7aa52 972 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 973 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 974 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 975 /* T0 = (yb - yd), T1 = (xb - xd) */
emilmont 1:fdd22bb7aa52 976 T0 = __SSAT(T0 - U0, 16u);
emilmont 1:fdd22bb7aa52 977 T1 = __SSAT(T1 - U1, 16u);
emilmont 1:fdd22bb7aa52 978
emilmont 1:fdd22bb7aa52 979 /* writing the butterfly processed i0 + fftLen/2 sample */
emilmont 1:fdd22bb7aa52 980 /* xb' = (xa+yb-xc-yd) */
emilmont 1:fdd22bb7aa52 981 /* yb' = (ya-xb-yc+xd) */
emilmont 1:fdd22bb7aa52 982 pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 983 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 984
emilmont 1:fdd22bb7aa52 985 /* writing the butterfly processed i0 + 3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 986 /* xd' = (xa-yb-xc+yd) */
emilmont 1:fdd22bb7aa52 987 /* yd' = (ya+xb-yc-xd) */
emilmont 1:fdd22bb7aa52 988 pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 989 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 990
emilmont 1:fdd22bb7aa52 991 }
emilmont 1:fdd22bb7aa52 992
emilmont 1:fdd22bb7aa52 993 /* end of last stage process */
emilmont 1:fdd22bb7aa52 994
emilmont 1:fdd22bb7aa52 995 /* output is in 11.5(q5) format for the 1024 point */
emilmont 1:fdd22bb7aa52 996 /* output is in 9.7(q7) format for the 256 point */
emilmont 1:fdd22bb7aa52 997 /* output is in 7.9(q9) format for the 64 point */
emilmont 1:fdd22bb7aa52 998 /* output is in 5.11(q11) format for the 16 point */
emilmont 1:fdd22bb7aa52 999
emilmont 1:fdd22bb7aa52 1000 #endif /* #ifndef ARM_MATH_CM0 */
emilmont 1:fdd22bb7aa52 1001
emilmont 1:fdd22bb7aa52 1002 }
emilmont 1:fdd22bb7aa52 1003
emilmont 1:fdd22bb7aa52 1004
emilmont 1:fdd22bb7aa52 1005 /**
emilmont 1:fdd22bb7aa52 1006 * @brief Core function for the Q15 CIFFT butterfly process.
emilmont 1:fdd22bb7aa52 1007 * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
emilmont 1:fdd22bb7aa52 1008 * @param[in] fftLen length of the FFT.
emilmont 1:fdd22bb7aa52 1009 * @param[in] *pCoef16 points to twiddle coefficient buffer.
emilmont 1:fdd22bb7aa52 1010 * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
emilmont 1:fdd22bb7aa52 1011 * @return none.
emilmont 1:fdd22bb7aa52 1012 */
emilmont 1:fdd22bb7aa52 1013
emilmont 1:fdd22bb7aa52 1014 /*
emilmont 1:fdd22bb7aa52 1015 * Radix-4 IFFT algorithm used is :
emilmont 1:fdd22bb7aa52 1016 *
emilmont 1:fdd22bb7aa52 1017 * CIFFT uses same twiddle coefficients as CFFT function
emilmont 1:fdd22bb7aa52 1018 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
emilmont 1:fdd22bb7aa52 1019 *
emilmont 1:fdd22bb7aa52 1020 *
emilmont 1:fdd22bb7aa52 1021 * IFFT is implemented with following changes in equations from FFT
emilmont 1:fdd22bb7aa52 1022 *
emilmont 1:fdd22bb7aa52 1023 * Input real and imaginary data:
emilmont 1:fdd22bb7aa52 1024 * x(n) = xa + j * ya
emilmont 1:fdd22bb7aa52 1025 * x(n+N/4 ) = xb + j * yb
emilmont 1:fdd22bb7aa52 1026 * x(n+N/2 ) = xc + j * yc
emilmont 1:fdd22bb7aa52 1027 * x(n+3N 4) = xd + j * yd
emilmont 1:fdd22bb7aa52 1028 *
emilmont 1:fdd22bb7aa52 1029 *
emilmont 1:fdd22bb7aa52 1030 * Output real and imaginary data:
emilmont 1:fdd22bb7aa52 1031 * x(4r) = xa'+ j * ya'
emilmont 1:fdd22bb7aa52 1032 * x(4r+1) = xb'+ j * yb'
emilmont 1:fdd22bb7aa52 1033 * x(4r+2) = xc'+ j * yc'
emilmont 1:fdd22bb7aa52 1034 * x(4r+3) = xd'+ j * yd'
emilmont 1:fdd22bb7aa52 1035 *
emilmont 1:fdd22bb7aa52 1036 *
emilmont 1:fdd22bb7aa52 1037 * Twiddle factors for radix-4 IFFT:
emilmont 1:fdd22bb7aa52 1038 * Wn = co1 + j * (si1)
emilmont 1:fdd22bb7aa52 1039 * W2n = co2 + j * (si2)
emilmont 1:fdd22bb7aa52 1040 * W3n = co3 + j * (si3)
emilmont 1:fdd22bb7aa52 1041
emilmont 1:fdd22bb7aa52 1042 * The real and imaginary output values for the radix-4 butterfly are
emilmont 1:fdd22bb7aa52 1043 * xa' = xa + xb + xc + xd
emilmont 1:fdd22bb7aa52 1044 * ya' = ya + yb + yc + yd
emilmont 1:fdd22bb7aa52 1045 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
emilmont 1:fdd22bb7aa52 1046 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
emilmont 1:fdd22bb7aa52 1047 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
emilmont 1:fdd22bb7aa52 1048 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
emilmont 1:fdd22bb7aa52 1049 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
emilmont 1:fdd22bb7aa52 1050 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
emilmont 1:fdd22bb7aa52 1051 *
emilmont 1:fdd22bb7aa52 1052 */
emilmont 1:fdd22bb7aa52 1053
emilmont 1:fdd22bb7aa52 1054 void arm_radix4_butterfly_inverse_q15(
emilmont 1:fdd22bb7aa52 1055 q15_t * pSrc16,
emilmont 1:fdd22bb7aa52 1056 uint32_t fftLen,
emilmont 1:fdd22bb7aa52 1057 q15_t * pCoef16,
emilmont 1:fdd22bb7aa52 1058 uint32_t twidCoefModifier)
emilmont 1:fdd22bb7aa52 1059 {
emilmont 1:fdd22bb7aa52 1060
emilmont 1:fdd22bb7aa52 1061 #ifndef ARM_MATH_CM0
emilmont 1:fdd22bb7aa52 1062
emilmont 1:fdd22bb7aa52 1063 /* Run the below code for Cortex-M4 and Cortex-M3 */
emilmont 1:fdd22bb7aa52 1064
emilmont 1:fdd22bb7aa52 1065 q31_t R, S, T, U;
emilmont 1:fdd22bb7aa52 1066 q31_t C1, C2, C3, out1, out2;
emilmont 1:fdd22bb7aa52 1067 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 1068 q15_t in;
emilmont 1:fdd22bb7aa52 1069
emilmont 1:fdd22bb7aa52 1070 q15_t *ptr1;
emilmont 1:fdd22bb7aa52 1071
emilmont 1:fdd22bb7aa52 1072
emilmont 1:fdd22bb7aa52 1073
emilmont 1:fdd22bb7aa52 1074 q31_t xaya, xbyb, xcyc, xdyd;
emilmont 1:fdd22bb7aa52 1075
emilmont 1:fdd22bb7aa52 1076 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 1077
emilmont 1:fdd22bb7aa52 1078 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 1079
emilmont 1:fdd22bb7aa52 1080 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 1081 n2 = fftLen;
emilmont 1:fdd22bb7aa52 1082 n1 = n2;
emilmont 1:fdd22bb7aa52 1083
emilmont 1:fdd22bb7aa52 1084 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 1085 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 1086
emilmont 1:fdd22bb7aa52 1087 /* Index for twiddle coefficient */
emilmont 1:fdd22bb7aa52 1088 ic = 0u;
emilmont 1:fdd22bb7aa52 1089
emilmont 1:fdd22bb7aa52 1090 /* Index for input read and output write */
emilmont 1:fdd22bb7aa52 1091 i0 = 0u;
emilmont 1:fdd22bb7aa52 1092 j = n2;
emilmont 1:fdd22bb7aa52 1093
emilmont 1:fdd22bb7aa52 1094 /* Input is in 1.15(q15) format */
emilmont 1:fdd22bb7aa52 1095
emilmont 1:fdd22bb7aa52 1096 /* start of first stage process */
emilmont 1:fdd22bb7aa52 1097 do
emilmont 1:fdd22bb7aa52 1098 {
emilmont 1:fdd22bb7aa52 1099 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 1100
emilmont 1:fdd22bb7aa52 1101 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 1102 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 1103 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 1104 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 1105 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 1106
emilmont 1:fdd22bb7aa52 1107 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 1108 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 1109 T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
emilmont 1:fdd22bb7aa52 1110 in = ((int16_t) (T & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 1111 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 1112
emilmont 1:fdd22bb7aa52 1113 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 1114 S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
emilmont 1:fdd22bb7aa52 1115 in = ((int16_t) (S & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 1116 S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 1117
emilmont 1:fdd22bb7aa52 1118 /* R = packed((ya + yc), (xa + xc) ) */
emilmont 1:fdd22bb7aa52 1119 R = __QADD16(T, S);
emilmont 1:fdd22bb7aa52 1120
emilmont 1:fdd22bb7aa52 1121 /* S = packed((ya - yc), (xa - xc) ) */
emilmont 1:fdd22bb7aa52 1122 S = __QSUB16(T, S);
emilmont 1:fdd22bb7aa52 1123
emilmont 1:fdd22bb7aa52 1124 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 1125 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1126 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 1127 in = ((int16_t) (T & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 1128 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 1129
emilmont 1:fdd22bb7aa52 1130 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1131 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 1132 in = ((int16_t) (U & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 1133 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 1134
emilmont 1:fdd22bb7aa52 1135 /* T = packed((yb + yd), (xb + xd) ) */
emilmont 1:fdd22bb7aa52 1136 T = __QADD16(T, U);
emilmont 1:fdd22bb7aa52 1137
emilmont 1:fdd22bb7aa52 1138 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 1139 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1140 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1141 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T);
emilmont 1:fdd22bb7aa52 1142
emilmont 1:fdd22bb7aa52 1143 /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
emilmont 1:fdd22bb7aa52 1144 R = __QSUB16(R, T);
emilmont 1:fdd22bb7aa52 1145
emilmont 1:fdd22bb7aa52 1146 /* co2 & si2 are read from SIMD Coefficient pointer */
emilmont 1:fdd22bb7aa52 1147 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
emilmont 1:fdd22bb7aa52 1148
emilmont 1:fdd22bb7aa52 1149 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1150
emilmont 1:fdd22bb7aa52 1151 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 1152 out1 = __SMUSD(C2, R) >> 16u;
emilmont 1:fdd22bb7aa52 1153 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1154 out2 = __SMUADX(C2, R);
emilmont 1:fdd22bb7aa52 1155
emilmont 1:fdd22bb7aa52 1156 #else
emilmont 1:fdd22bb7aa52 1157
emilmont 1:fdd22bb7aa52 1158 /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1159 out1 = __SMUADX(C2, R) >> 16u;
emilmont 1:fdd22bb7aa52 1160 /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 1161 out2 = __SMUSD(__QSUB16(0, C2), R);
emilmont 1:fdd22bb7aa52 1162
emilmont 1:fdd22bb7aa52 1163 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1164
emilmont 1:fdd22bb7aa52 1165 /* Reading i0+fftLen/4 */
emilmont 1:fdd22bb7aa52 1166 /* T = packed(yb, xb) */
emilmont 1:fdd22bb7aa52 1167 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 1168 in = ((int16_t) (T & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 1169 T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 1170
emilmont 1:fdd22bb7aa52 1171 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1172 /* writing output(xc', yc') in little endian format */
emilmont 1:fdd22bb7aa52 1173 _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
emilmont 1:fdd22bb7aa52 1174 (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 1175
emilmont 1:fdd22bb7aa52 1176 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 1177 /* U = packed(yd, xd) */
emilmont 1:fdd22bb7aa52 1178 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 1179 in = ((int16_t) (U & 0xFFFF)) >> 2;
emilmont 1:fdd22bb7aa52 1180 U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 1181
emilmont 1:fdd22bb7aa52 1182 /* T = packed(yb-yd, xb-xd) */
emilmont 1:fdd22bb7aa52 1183 T = __QSUB16(T, U);
emilmont 1:fdd22bb7aa52 1184
emilmont 1:fdd22bb7aa52 1185 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1186
emilmont 1:fdd22bb7aa52 1187 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 1188 R = __QSAX(S, T);
emilmont 1:fdd22bb7aa52 1189 /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 1190 S = __QASX(S, T);
emilmont 1:fdd22bb7aa52 1191
emilmont 1:fdd22bb7aa52 1192 #else
emilmont 1:fdd22bb7aa52 1193
emilmont 1:fdd22bb7aa52 1194 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 1195 R = __QASX(S, T);
emilmont 1:fdd22bb7aa52 1196 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 1197 S = __QSAX(S, T);
emilmont 1:fdd22bb7aa52 1198
emilmont 1:fdd22bb7aa52 1199 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1200
emilmont 1:fdd22bb7aa52 1201 /* co1 & si1 are read from SIMD Coefficient pointer */
emilmont 1:fdd22bb7aa52 1202 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
emilmont 1:fdd22bb7aa52 1203 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 1204
emilmont 1:fdd22bb7aa52 1205 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1206
emilmont 1:fdd22bb7aa52 1207 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 1208 out1 = __SMUSD(C1, S) >> 16u;
emilmont 1:fdd22bb7aa52 1209 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 1210 out2 = __SMUADX(C1, S);
emilmont 1:fdd22bb7aa52 1211
emilmont 1:fdd22bb7aa52 1212 #else
emilmont 1:fdd22bb7aa52 1213
emilmont 1:fdd22bb7aa52 1214 /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 1215 out1 = __SMUADX(C1, S) >> 16u;
emilmont 1:fdd22bb7aa52 1216 /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 1217 out2 = __SMUSD(__QSUB16(0, C1), S);
emilmont 1:fdd22bb7aa52 1218
emilmont 1:fdd22bb7aa52 1219 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1220
emilmont 1:fdd22bb7aa52 1221 /* writing output(xb', yb') in little endian format */
emilmont 1:fdd22bb7aa52 1222 _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
emilmont 1:fdd22bb7aa52 1223 ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 1224
emilmont 1:fdd22bb7aa52 1225
emilmont 1:fdd22bb7aa52 1226 /* co3 & si3 are read from SIMD Coefficient pointer */
emilmont 1:fdd22bb7aa52 1227 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
emilmont 1:fdd22bb7aa52 1228 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1229
emilmont 1:fdd22bb7aa52 1230 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1231
emilmont 1:fdd22bb7aa52 1232 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 1233 out1 = __SMUSD(C3, R) >> 16u;
emilmont 1:fdd22bb7aa52 1234 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 1235 out2 = __SMUADX(C3, R);
emilmont 1:fdd22bb7aa52 1236
emilmont 1:fdd22bb7aa52 1237 #else
emilmont 1:fdd22bb7aa52 1238
emilmont 1:fdd22bb7aa52 1239 /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 1240 out1 = __SMUADX(C3, R) >> 16u;
emilmont 1:fdd22bb7aa52 1241 /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 1242 out2 = __SMUSD(__QSUB16(0, C3), R);
emilmont 1:fdd22bb7aa52 1243
emilmont 1:fdd22bb7aa52 1244 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1245
emilmont 1:fdd22bb7aa52 1246 /* writing output(xd', yd') in little endian format */
emilmont 1:fdd22bb7aa52 1247 _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
emilmont 1:fdd22bb7aa52 1248 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 1249
emilmont 1:fdd22bb7aa52 1250 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1251 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 1252
emilmont 1:fdd22bb7aa52 1253 /* Updating input index */
emilmont 1:fdd22bb7aa52 1254 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 1255
emilmont 1:fdd22bb7aa52 1256 } while(--j);
emilmont 1:fdd22bb7aa52 1257 /* data is in 4.11(q11) format */
emilmont 1:fdd22bb7aa52 1258
emilmont 1:fdd22bb7aa52 1259 /* end of first stage process */
emilmont 1:fdd22bb7aa52 1260
emilmont 1:fdd22bb7aa52 1261
emilmont 1:fdd22bb7aa52 1262 /* start of middle stage process */
emilmont 1:fdd22bb7aa52 1263
emilmont 1:fdd22bb7aa52 1264 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1265 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 1266
emilmont 1:fdd22bb7aa52 1267 /* Calculation of Middle stage */
emilmont 1:fdd22bb7aa52 1268 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 1269 {
emilmont 1:fdd22bb7aa52 1270 /* Initializations for the middle stage */
emilmont 1:fdd22bb7aa52 1271 n1 = n2;
emilmont 1:fdd22bb7aa52 1272 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 1273 ic = 0u;
emilmont 1:fdd22bb7aa52 1274
emilmont 1:fdd22bb7aa52 1275 for (j = 0u; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 1276 {
emilmont 1:fdd22bb7aa52 1277 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 1278 C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
emilmont 1:fdd22bb7aa52 1279 C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
emilmont 1:fdd22bb7aa52 1280 C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
emilmont 1:fdd22bb7aa52 1281
emilmont 1:fdd22bb7aa52 1282 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1283 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 1284
emilmont 1:fdd22bb7aa52 1285 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 1286 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 1287 {
emilmont 1:fdd22bb7aa52 1288 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 1289 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 1290 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 1291 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 1292 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 1293
emilmont 1:fdd22bb7aa52 1294 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 1295 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 1296 T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
emilmont 1:fdd22bb7aa52 1297
emilmont 1:fdd22bb7aa52 1298 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 1299 S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
emilmont 1:fdd22bb7aa52 1300
emilmont 1:fdd22bb7aa52 1301 /* R = packed( (ya + yc), (xa + xc)) */
emilmont 1:fdd22bb7aa52 1302 R = __QADD16(T, S);
emilmont 1:fdd22bb7aa52 1303
emilmont 1:fdd22bb7aa52 1304 /* S = packed((ya - yc), (xa - xc)) */
emilmont 1:fdd22bb7aa52 1305 S = __QSUB16(T, S);
emilmont 1:fdd22bb7aa52 1306
emilmont 1:fdd22bb7aa52 1307 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 1308 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1309 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 1310
emilmont 1:fdd22bb7aa52 1311 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1312 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 1313
emilmont 1:fdd22bb7aa52 1314 /* T = packed( (yb + yd), (xb + xd)) */
emilmont 1:fdd22bb7aa52 1315 T = __QADD16(T, U);
emilmont 1:fdd22bb7aa52 1316
emilmont 1:fdd22bb7aa52 1317 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 1318
emilmont 1:fdd22bb7aa52 1319 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1320 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1321 out1 = __SHADD16(R, T);
emilmont 1:fdd22bb7aa52 1322 in = ((int16_t) (out1 & 0xFFFF)) >> 1;
emilmont 1:fdd22bb7aa52 1323 out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
emilmont 1:fdd22bb7aa52 1324 _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1;
emilmont 1:fdd22bb7aa52 1325
emilmont 1:fdd22bb7aa52 1326 /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
emilmont 1:fdd22bb7aa52 1327 R = __SHSUB16(R, T);
emilmont 1:fdd22bb7aa52 1328
emilmont 1:fdd22bb7aa52 1329 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1330
emilmont 1:fdd22bb7aa52 1331 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
emilmont 1:fdd22bb7aa52 1332 out1 = __SMUSD(C2, R) >> 16u;
emilmont 1:fdd22bb7aa52 1333
emilmont 1:fdd22bb7aa52 1334 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1335 out2 = __SMUADX(C2, R);
emilmont 1:fdd22bb7aa52 1336
emilmont 1:fdd22bb7aa52 1337 #else
emilmont 1:fdd22bb7aa52 1338
emilmont 1:fdd22bb7aa52 1339 /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1340 out1 = __SMUADX(R, C2) >> 16u;
emilmont 1:fdd22bb7aa52 1341
emilmont 1:fdd22bb7aa52 1342 /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
emilmont 1:fdd22bb7aa52 1343 out2 = __SMUSD(__QSUB16(0, C2), R);
emilmont 1:fdd22bb7aa52 1344
emilmont 1:fdd22bb7aa52 1345 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1346
emilmont 1:fdd22bb7aa52 1347 /* Reading i0+3fftLen/4 */
emilmont 1:fdd22bb7aa52 1348 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1349 T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
emilmont 1:fdd22bb7aa52 1350
emilmont 1:fdd22bb7aa52 1351 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1352 /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 1353 /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1354 _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
emilmont 1:fdd22bb7aa52 1355 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 1356
emilmont 1:fdd22bb7aa52 1357 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 1358
emilmont 1:fdd22bb7aa52 1359 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1360 U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
emilmont 1:fdd22bb7aa52 1361
emilmont 1:fdd22bb7aa52 1362 /* T = packed(yb-yd, xb-xd) */
emilmont 1:fdd22bb7aa52 1363 T = __QSUB16(T, U);
emilmont 1:fdd22bb7aa52 1364
emilmont 1:fdd22bb7aa52 1365 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1366
emilmont 1:fdd22bb7aa52 1367 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 1368 R = __SHSAX(S, T);
emilmont 1:fdd22bb7aa52 1369
emilmont 1:fdd22bb7aa52 1370 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 1371 S = __SHASX(S, T);
emilmont 1:fdd22bb7aa52 1372
emilmont 1:fdd22bb7aa52 1373
emilmont 1:fdd22bb7aa52 1374 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 1375 out1 = __SMUSD(C1, S) >> 16u;
emilmont 1:fdd22bb7aa52 1376 out2 = __SMUADX(C1, S);
emilmont 1:fdd22bb7aa52 1377
emilmont 1:fdd22bb7aa52 1378 #else
emilmont 1:fdd22bb7aa52 1379
emilmont 1:fdd22bb7aa52 1380 /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
emilmont 1:fdd22bb7aa52 1381 R = __SHASX(S, T);
emilmont 1:fdd22bb7aa52 1382
emilmont 1:fdd22bb7aa52 1383 /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
emilmont 1:fdd22bb7aa52 1384 S = __SHSAX(S, T);
emilmont 1:fdd22bb7aa52 1385
emilmont 1:fdd22bb7aa52 1386
emilmont 1:fdd22bb7aa52 1387 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 1388 out1 = __SMUADX(S, C1) >> 16u;
emilmont 1:fdd22bb7aa52 1389 out2 = __SMUSD(__QSUB16(0, C1), S);
emilmont 1:fdd22bb7aa52 1390
emilmont 1:fdd22bb7aa52 1391 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1392
emilmont 1:fdd22bb7aa52 1393 /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
emilmont 1:fdd22bb7aa52 1394 /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
emilmont 1:fdd22bb7aa52 1395 _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
emilmont 1:fdd22bb7aa52 1396 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 1397
emilmont 1:fdd22bb7aa52 1398 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1399
emilmont 1:fdd22bb7aa52 1400 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1401
emilmont 1:fdd22bb7aa52 1402 out1 = __SMUSD(C3, R) >> 16u;
emilmont 1:fdd22bb7aa52 1403 out2 = __SMUADX(C3, R);
emilmont 1:fdd22bb7aa52 1404
emilmont 1:fdd22bb7aa52 1405 #else
emilmont 1:fdd22bb7aa52 1406
emilmont 1:fdd22bb7aa52 1407 out1 = __SMUADX(C3, R) >> 16u;
emilmont 1:fdd22bb7aa52 1408 out2 = __SMUSD(__QSUB16(0, C3), R);
emilmont 1:fdd22bb7aa52 1409
emilmont 1:fdd22bb7aa52 1410 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1411
emilmont 1:fdd22bb7aa52 1412 /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
emilmont 1:fdd22bb7aa52 1413 /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
emilmont 1:fdd22bb7aa52 1414 _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
emilmont 1:fdd22bb7aa52 1415 ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
emilmont 1:fdd22bb7aa52 1416 }
emilmont 1:fdd22bb7aa52 1417 }
emilmont 1:fdd22bb7aa52 1418 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1419 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 1420 }
emilmont 1:fdd22bb7aa52 1421 /* end of middle stage process */
emilmont 1:fdd22bb7aa52 1422
emilmont 1:fdd22bb7aa52 1423 /* data is in 10.6(q6) format for the 1024 point */
emilmont 1:fdd22bb7aa52 1424 /* data is in 8.8(q8) format for the 256 point */
emilmont 1:fdd22bb7aa52 1425 /* data is in 6.10(q10) format for the 64 point */
emilmont 1:fdd22bb7aa52 1426 /* data is in 4.12(q12) format for the 16 point */
emilmont 1:fdd22bb7aa52 1427
emilmont 1:fdd22bb7aa52 1428 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 1429 j = fftLen >> 2;
emilmont 1:fdd22bb7aa52 1430
emilmont 1:fdd22bb7aa52 1431 ptr1 = &pSrc16[0];
emilmont 1:fdd22bb7aa52 1432
emilmont 1:fdd22bb7aa52 1433 /* start of last stage process */
emilmont 1:fdd22bb7aa52 1434
emilmont 1:fdd22bb7aa52 1435 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 1436 do
emilmont 1:fdd22bb7aa52 1437 {
emilmont 1:fdd22bb7aa52 1438 /* Read xa (real), ya(imag) input */
emilmont 1:fdd22bb7aa52 1439 xaya = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 1440
emilmont 1:fdd22bb7aa52 1441 /* Read xb (real), yb(imag) input */
emilmont 1:fdd22bb7aa52 1442 xbyb = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 1443
emilmont 1:fdd22bb7aa52 1444 /* Read xc (real), yc(imag) input */
emilmont 1:fdd22bb7aa52 1445 xcyc = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 1446
emilmont 1:fdd22bb7aa52 1447 /* Read xd (real), yd(imag) input */
emilmont 1:fdd22bb7aa52 1448 xdyd = *__SIMD32(ptr1)++;
emilmont 1:fdd22bb7aa52 1449
emilmont 1:fdd22bb7aa52 1450 /* R = packed((ya + yc), (xa + xc)) */
emilmont 1:fdd22bb7aa52 1451 R = __QADD16(xaya, xcyc);
emilmont 1:fdd22bb7aa52 1452
emilmont 1:fdd22bb7aa52 1453 /* T = packed((yb + yd), (xb + xd)) */
emilmont 1:fdd22bb7aa52 1454 T = __QADD16(xbyb, xdyd);
emilmont 1:fdd22bb7aa52 1455
emilmont 1:fdd22bb7aa52 1456 /* pointer updation for writing */
emilmont 1:fdd22bb7aa52 1457 ptr1 = ptr1 - 8u;
emilmont 1:fdd22bb7aa52 1458
emilmont 1:fdd22bb7aa52 1459
emilmont 1:fdd22bb7aa52 1460 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1461 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1462 *__SIMD32(ptr1)++ = __SHADD16(R, T);
emilmont 1:fdd22bb7aa52 1463
emilmont 1:fdd22bb7aa52 1464 /* T = packed((yb + yd), (xb + xd)) */
emilmont 1:fdd22bb7aa52 1465 T = __QADD16(xbyb, xdyd);
emilmont 1:fdd22bb7aa52 1466
emilmont 1:fdd22bb7aa52 1467 /* xc' = (xa-xb+xc-xd) */
emilmont 1:fdd22bb7aa52 1468 /* yc' = (ya-yb+yc-yd) */
emilmont 1:fdd22bb7aa52 1469 *__SIMD32(ptr1)++ = __SHSUB16(R, T);
emilmont 1:fdd22bb7aa52 1470
emilmont 1:fdd22bb7aa52 1471 /* S = packed((ya - yc), (xa - xc)) */
emilmont 1:fdd22bb7aa52 1472 S = __QSUB16(xaya, xcyc);
emilmont 1:fdd22bb7aa52 1473
emilmont 1:fdd22bb7aa52 1474 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1475 /* T = packed( (yb - yd), (xb - xd)) */
emilmont 1:fdd22bb7aa52 1476 U = __QSUB16(xbyb, xdyd);
emilmont 1:fdd22bb7aa52 1477
emilmont 1:fdd22bb7aa52 1478 #ifndef ARM_MATH_BIG_ENDIAN
emilmont 1:fdd22bb7aa52 1479
emilmont 1:fdd22bb7aa52 1480 /* xb' = (xa+yb-xc-yd) */
emilmont 1:fdd22bb7aa52 1481 /* yb' = (ya-xb-yc+xd) */
emilmont 1:fdd22bb7aa52 1482 *__SIMD32(ptr1)++ = __SHASX(S, U);
emilmont 1:fdd22bb7aa52 1483
emilmont 1:fdd22bb7aa52 1484
emilmont 1:fdd22bb7aa52 1485 /* xd' = (xa-yb-xc+yd) */
emilmont 1:fdd22bb7aa52 1486 /* yd' = (ya+xb-yc-xd) */
emilmont 1:fdd22bb7aa52 1487 *__SIMD32(ptr1)++ = __SHSAX(S, U);
emilmont 1:fdd22bb7aa52 1488
emilmont 1:fdd22bb7aa52 1489 #else
emilmont 1:fdd22bb7aa52 1490
emilmont 1:fdd22bb7aa52 1491 /* xb' = (xa+yb-xc-yd) */
emilmont 1:fdd22bb7aa52 1492 /* yb' = (ya-xb-yc+xd) */
emilmont 1:fdd22bb7aa52 1493 *__SIMD32(ptr1)++ = __SHSAX(S, U);
emilmont 1:fdd22bb7aa52 1494
emilmont 1:fdd22bb7aa52 1495
emilmont 1:fdd22bb7aa52 1496 /* xd' = (xa-yb-xc+yd) */
emilmont 1:fdd22bb7aa52 1497 /* yd' = (ya+xb-yc-xd) */
emilmont 1:fdd22bb7aa52 1498 *__SIMD32(ptr1)++ = __SHASX(S, U);
emilmont 1:fdd22bb7aa52 1499
emilmont 1:fdd22bb7aa52 1500
emilmont 1:fdd22bb7aa52 1501 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
emilmont 1:fdd22bb7aa52 1502
emilmont 1:fdd22bb7aa52 1503 } while(--j);
emilmont 1:fdd22bb7aa52 1504
emilmont 1:fdd22bb7aa52 1505 /* end of last stage process */
emilmont 1:fdd22bb7aa52 1506
emilmont 1:fdd22bb7aa52 1507 /* output is in 11.5(q5) format for the 1024 point */
emilmont 1:fdd22bb7aa52 1508 /* output is in 9.7(q7) format for the 256 point */
emilmont 1:fdd22bb7aa52 1509 /* output is in 7.9(q9) format for the 64 point */
emilmont 1:fdd22bb7aa52 1510 /* output is in 5.11(q11) format for the 16 point */
emilmont 1:fdd22bb7aa52 1511
emilmont 1:fdd22bb7aa52 1512
emilmont 1:fdd22bb7aa52 1513 #else
emilmont 1:fdd22bb7aa52 1514
emilmont 1:fdd22bb7aa52 1515 /* Run the below code for Cortex-M0 */
emilmont 1:fdd22bb7aa52 1516
emilmont 1:fdd22bb7aa52 1517 q15_t R0, R1, S0, S1, T0, T1, U0, U1;
emilmont 1:fdd22bb7aa52 1518 q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
emilmont 1:fdd22bb7aa52 1519 uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
emilmont 1:fdd22bb7aa52 1520
emilmont 1:fdd22bb7aa52 1521 /* Total process is divided into three stages */
emilmont 1:fdd22bb7aa52 1522
emilmont 1:fdd22bb7aa52 1523 /* process first stage, middle stages, & last stage */
emilmont 1:fdd22bb7aa52 1524
emilmont 1:fdd22bb7aa52 1525 /* Initializations for the first stage */
emilmont 1:fdd22bb7aa52 1526 n2 = fftLen;
emilmont 1:fdd22bb7aa52 1527 n1 = n2;
emilmont 1:fdd22bb7aa52 1528
emilmont 1:fdd22bb7aa52 1529 /* n2 = fftLen/4 */
emilmont 1:fdd22bb7aa52 1530 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 1531
emilmont 1:fdd22bb7aa52 1532 /* Index for twiddle coefficient */
emilmont 1:fdd22bb7aa52 1533 ic = 0u;
emilmont 1:fdd22bb7aa52 1534
emilmont 1:fdd22bb7aa52 1535 /* Index for input read and output write */
emilmont 1:fdd22bb7aa52 1536 i0 = 0u;
emilmont 1:fdd22bb7aa52 1537
emilmont 1:fdd22bb7aa52 1538 j = n2;
emilmont 1:fdd22bb7aa52 1539
emilmont 1:fdd22bb7aa52 1540 /* Input is in 1.15(q15) format */
emilmont 1:fdd22bb7aa52 1541
emilmont 1:fdd22bb7aa52 1542 /* Start of first stage process */
emilmont 1:fdd22bb7aa52 1543 do
emilmont 1:fdd22bb7aa52 1544 {
emilmont 1:fdd22bb7aa52 1545 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 1546
emilmont 1:fdd22bb7aa52 1547 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 1548 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 1549 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 1550 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 1551 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 1552
emilmont 1:fdd22bb7aa52 1553 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 1554 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 1555 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 1556 T0 = pSrc16[i0 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 1557 T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 1558 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 1559 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 1560 S0 = pSrc16[i2 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 1561 S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 1562
emilmont 1:fdd22bb7aa52 1563 /* R0 = (ya + yc), R1 = (xa + xc) */
emilmont 1:fdd22bb7aa52 1564 R0 = __SSAT(T0 + S0, 16u);
emilmont 1:fdd22bb7aa52 1565 R1 = __SSAT(T1 + S1, 16u);
emilmont 1:fdd22bb7aa52 1566 /* S0 = (ya - yc), S1 = (xa - xc) */
emilmont 1:fdd22bb7aa52 1567 S0 = __SSAT(T0 - S0, 16u);
emilmont 1:fdd22bb7aa52 1568 S1 = __SSAT(T1 - S1, 16u);
emilmont 1:fdd22bb7aa52 1569
emilmont 1:fdd22bb7aa52 1570 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 1571 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 1572 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1573 T0 = pSrc16[i1 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 1574 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 1575 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1576 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 1577 U0 = pSrc16[i3 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 1578 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 1579
emilmont 1:fdd22bb7aa52 1580 /* T0 = (yb + yd), T1 = (xb + xd) */
emilmont 1:fdd22bb7aa52 1581 T0 = __SSAT(T0 + U0, 16u);
emilmont 1:fdd22bb7aa52 1582 T1 = __SSAT(T1 + U1, 16u);
emilmont 1:fdd22bb7aa52 1583
emilmont 1:fdd22bb7aa52 1584 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 1585 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1586 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1587 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1588 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1589
emilmont 1:fdd22bb7aa52 1590 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
emilmont 1:fdd22bb7aa52 1591 R0 = __SSAT(R0 - T0, 16u);
emilmont 1:fdd22bb7aa52 1592 R1 = __SSAT(R1 - T1, 16u);
emilmont 1:fdd22bb7aa52 1593 /* co2 & si2 are read from Coefficient pointer */
emilmont 1:fdd22bb7aa52 1594 Co2 = pCoef16[2u * ic * 2u];
emilmont 1:fdd22bb7aa52 1595 Si2 = pCoef16[(2u * ic * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1596 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 1597 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 1598 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1599 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 1600
emilmont 1:fdd22bb7aa52 1601 /* Reading i0+fftLen/4 */
emilmont 1:fdd22bb7aa52 1602 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 1603 /* T0 = yb, T1 = xb */
emilmont 1:fdd22bb7aa52 1604 T0 = pSrc16[i1 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 1605 T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 1606
emilmont 1:fdd22bb7aa52 1607 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1608 /* writing output(xc', yc') in little endian format */
emilmont 1:fdd22bb7aa52 1609 pSrc16[i1 * 2u] = out1;
emilmont 1:fdd22bb7aa52 1610 pSrc16[(i1 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 1611
emilmont 1:fdd22bb7aa52 1612 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 1613 /* input is down scale by 4 to avoid overflow */
emilmont 1:fdd22bb7aa52 1614 /* U0 = yd, U1 = xd) */
emilmont 1:fdd22bb7aa52 1615 U0 = pSrc16[i3 * 2u] >> 2u;
emilmont 1:fdd22bb7aa52 1616 U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
emilmont 1:fdd22bb7aa52 1617
emilmont 1:fdd22bb7aa52 1618 /* T0 = yb-yd, T1 = xb-xd) */
emilmont 1:fdd22bb7aa52 1619 T0 = __SSAT(T0 - U0, 16u);
emilmont 1:fdd22bb7aa52 1620 T1 = __SSAT(T1 - U1, 16u);
emilmont 1:fdd22bb7aa52 1621 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
emilmont 1:fdd22bb7aa52 1622 R0 = (short) __SSAT((q31_t) (S0 + T1), 16);
emilmont 1:fdd22bb7aa52 1623 R1 = (short) __SSAT((q31_t) (S1 - T0), 16);
emilmont 1:fdd22bb7aa52 1624 /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
emilmont 1:fdd22bb7aa52 1625 S0 = (short) __SSAT((q31_t) (S0 - T1), 16);
emilmont 1:fdd22bb7aa52 1626 S1 = (short) __SSAT((q31_t) (S1 + T0), 16);
emilmont 1:fdd22bb7aa52 1627
emilmont 1:fdd22bb7aa52 1628 /* co1 & si1 are read from Coefficient pointer */
emilmont 1:fdd22bb7aa52 1629 Co1 = pCoef16[ic * 2u];
emilmont 1:fdd22bb7aa52 1630 Si1 = pCoef16[(ic * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1631 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 1632 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
emilmont 1:fdd22bb7aa52 1633 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
emilmont 1:fdd22bb7aa52 1634 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
emilmont 1:fdd22bb7aa52 1635 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
emilmont 1:fdd22bb7aa52 1636 /* writing output(xb', yb') in little endian format */
emilmont 1:fdd22bb7aa52 1637 pSrc16[i2 * 2u] = out1;
emilmont 1:fdd22bb7aa52 1638 pSrc16[(i2 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 1639
emilmont 1:fdd22bb7aa52 1640 /* Co3 & si3 are read from Coefficient pointer */
emilmont 1:fdd22bb7aa52 1641 Co3 = pCoef16[3u * ic * 2u];
emilmont 1:fdd22bb7aa52 1642 Si3 = pCoef16[(3u * ic * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1643 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1644 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
emilmont 1:fdd22bb7aa52 1645 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 1646 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
emilmont 1:fdd22bb7aa52 1647 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 1648 /* writing output(xd', yd') in little endian format */
emilmont 1:fdd22bb7aa52 1649 pSrc16[i3 * 2u] = out1;
emilmont 1:fdd22bb7aa52 1650 pSrc16[(i3 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 1651
emilmont 1:fdd22bb7aa52 1652 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1653 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 1654
emilmont 1:fdd22bb7aa52 1655 /* Updating input index */
emilmont 1:fdd22bb7aa52 1656 i0 = i0 + 1u;
emilmont 1:fdd22bb7aa52 1657
emilmont 1:fdd22bb7aa52 1658 } while(--j);
emilmont 1:fdd22bb7aa52 1659
emilmont 1:fdd22bb7aa52 1660 /* End of first stage process */
emilmont 1:fdd22bb7aa52 1661
emilmont 1:fdd22bb7aa52 1662 /* data is in 4.11(q11) format */
emilmont 1:fdd22bb7aa52 1663
emilmont 1:fdd22bb7aa52 1664
emilmont 1:fdd22bb7aa52 1665 /* Start of Middle stage process */
emilmont 1:fdd22bb7aa52 1666
emilmont 1:fdd22bb7aa52 1667 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1668 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 1669
emilmont 1:fdd22bb7aa52 1670 /* Calculation of Middle stage */
emilmont 1:fdd22bb7aa52 1671 for (k = fftLen / 4u; k > 4u; k >>= 2u)
emilmont 1:fdd22bb7aa52 1672 {
emilmont 1:fdd22bb7aa52 1673 /* Initializations for the middle stage */
emilmont 1:fdd22bb7aa52 1674 n1 = n2;
emilmont 1:fdd22bb7aa52 1675 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 1676 ic = 0u;
emilmont 1:fdd22bb7aa52 1677
emilmont 1:fdd22bb7aa52 1678 for (j = 0u; j <= (n2 - 1u); j++)
emilmont 1:fdd22bb7aa52 1679 {
emilmont 1:fdd22bb7aa52 1680 /* index calculation for the coefficients */
emilmont 1:fdd22bb7aa52 1681 Co1 = pCoef16[ic * 2u];
emilmont 1:fdd22bb7aa52 1682 Si1 = pCoef16[(ic * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1683 Co2 = pCoef16[2u * ic * 2u];
emilmont 1:fdd22bb7aa52 1684 Si2 = pCoef16[2u * ic * 2u + 1u];
emilmont 1:fdd22bb7aa52 1685 Co3 = pCoef16[3u * ic * 2u];
emilmont 1:fdd22bb7aa52 1686 Si3 = pCoef16[(3u * ic * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1687
emilmont 1:fdd22bb7aa52 1688 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1689 ic = ic + twidCoefModifier;
emilmont 1:fdd22bb7aa52 1690
emilmont 1:fdd22bb7aa52 1691 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 1692 for (i0 = j; i0 < fftLen; i0 += n1)
emilmont 1:fdd22bb7aa52 1693 {
emilmont 1:fdd22bb7aa52 1694 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 1695 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 1696 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 1697 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 1698 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 1699
emilmont 1:fdd22bb7aa52 1700 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 1701 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 1702 T0 = pSrc16[i0 * 2u];
emilmont 1:fdd22bb7aa52 1703 T1 = pSrc16[(i0 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1704
emilmont 1:fdd22bb7aa52 1705 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 1706 S0 = pSrc16[i2 * 2u];
emilmont 1:fdd22bb7aa52 1707 S1 = pSrc16[(i2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1708
emilmont 1:fdd22bb7aa52 1709
emilmont 1:fdd22bb7aa52 1710 /* R0 = (ya + yc), R1 = (xa + xc) */
emilmont 1:fdd22bb7aa52 1711 R0 = __SSAT(T0 + S0, 16u);
emilmont 1:fdd22bb7aa52 1712 R1 = __SSAT(T1 + S1, 16u);
emilmont 1:fdd22bb7aa52 1713 /* S0 = (ya - yc), S1 = (xa - xc) */
emilmont 1:fdd22bb7aa52 1714 S0 = __SSAT(T0 - S0, 16u);
emilmont 1:fdd22bb7aa52 1715 S1 = __SSAT(T1 - S1, 16u);
emilmont 1:fdd22bb7aa52 1716
emilmont 1:fdd22bb7aa52 1717 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 1718 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1719 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 1720 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1721
emilmont 1:fdd22bb7aa52 1722 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1723 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 1724 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1725
emilmont 1:fdd22bb7aa52 1726 /* T0 = (yb + yd), T1 = (xb + xd) */
emilmont 1:fdd22bb7aa52 1727 T0 = __SSAT(T0 + U0, 16u);
emilmont 1:fdd22bb7aa52 1728 T1 = __SSAT(T1 + U1, 16u);
emilmont 1:fdd22bb7aa52 1729
emilmont 1:fdd22bb7aa52 1730 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 1731 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1732 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1733 pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
emilmont 1:fdd22bb7aa52 1734 pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
emilmont 1:fdd22bb7aa52 1735
emilmont 1:fdd22bb7aa52 1736 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 1737 R0 = (R0 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1738 R1 = (R1 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1739
emilmont 1:fdd22bb7aa52 1740 /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
emilmont 1:fdd22bb7aa52 1741 out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16);
emilmont 1:fdd22bb7aa52 1742 /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1743 out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16);
emilmont 1:fdd22bb7aa52 1744
emilmont 1:fdd22bb7aa52 1745 /* Reading i0+3fftLen/4 */
emilmont 1:fdd22bb7aa52 1746 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1747 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 1748 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1749
emilmont 1:fdd22bb7aa52 1750 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1751 /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
emilmont 1:fdd22bb7aa52 1752 /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
emilmont 1:fdd22bb7aa52 1753 pSrc16[i1 * 2u] = out1;
emilmont 1:fdd22bb7aa52 1754 pSrc16[(i1 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 1755
emilmont 1:fdd22bb7aa52 1756 /* Butterfly calculations */
emilmont 1:fdd22bb7aa52 1757 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1758 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 1759 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1760
emilmont 1:fdd22bb7aa52 1761 /* T0 = yb-yd, T1 = xb-xd) */
emilmont 1:fdd22bb7aa52 1762 T0 = __SSAT(T0 - U0, 16u);
emilmont 1:fdd22bb7aa52 1763 T1 = __SSAT(T1 - U1, 16u);
emilmont 1:fdd22bb7aa52 1764
emilmont 1:fdd22bb7aa52 1765 /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
emilmont 1:fdd22bb7aa52 1766 R0 = (S0 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1767 R1 = (S1 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1768
emilmont 1:fdd22bb7aa52 1769 /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
emilmont 1:fdd22bb7aa52 1770 S0 = (S0 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1771 S1 = (S1 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1772
emilmont 1:fdd22bb7aa52 1773 /* Butterfly process for the i0+fftLen/2 sample */
emilmont 1:fdd22bb7aa52 1774 out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
emilmont 1:fdd22bb7aa52 1775 out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
emilmont 1:fdd22bb7aa52 1776 /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
emilmont 1:fdd22bb7aa52 1777 /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
emilmont 1:fdd22bb7aa52 1778 pSrc16[i2 * 2u] = out1;
emilmont 1:fdd22bb7aa52 1779 pSrc16[(i2 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 1780
emilmont 1:fdd22bb7aa52 1781 /* Butterfly process for the i0+3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1782 out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 1783
emilmont 1:fdd22bb7aa52 1784 out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
emilmont 1:fdd22bb7aa52 1785 /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
emilmont 1:fdd22bb7aa52 1786 /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
emilmont 1:fdd22bb7aa52 1787 pSrc16[i3 * 2u] = out1;
emilmont 1:fdd22bb7aa52 1788 pSrc16[(i3 * 2u) + 1u] = out2;
emilmont 1:fdd22bb7aa52 1789
emilmont 1:fdd22bb7aa52 1790
emilmont 1:fdd22bb7aa52 1791 }
emilmont 1:fdd22bb7aa52 1792 }
emilmont 1:fdd22bb7aa52 1793 /* Twiddle coefficients index modifier */
emilmont 1:fdd22bb7aa52 1794 twidCoefModifier <<= 2u;
emilmont 1:fdd22bb7aa52 1795 }
emilmont 1:fdd22bb7aa52 1796 /* End of Middle stages process */
emilmont 1:fdd22bb7aa52 1797
emilmont 1:fdd22bb7aa52 1798
emilmont 1:fdd22bb7aa52 1799 /* data is in 10.6(q6) format for the 1024 point */
emilmont 1:fdd22bb7aa52 1800 /* data is in 8.8(q8) format for the 256 point */
emilmont 1:fdd22bb7aa52 1801 /* data is in 6.10(q10) format for the 64 point */
emilmont 1:fdd22bb7aa52 1802 /* data is in 4.12(q12) format for the 16 point */
emilmont 1:fdd22bb7aa52 1803
emilmont 1:fdd22bb7aa52 1804 /* start of last stage process */
emilmont 1:fdd22bb7aa52 1805
emilmont 1:fdd22bb7aa52 1806
emilmont 1:fdd22bb7aa52 1807 /* Initializations for the last stage */
emilmont 1:fdd22bb7aa52 1808 n1 = n2;
emilmont 1:fdd22bb7aa52 1809 n2 >>= 2u;
emilmont 1:fdd22bb7aa52 1810
emilmont 1:fdd22bb7aa52 1811 /* Butterfly implementation */
emilmont 1:fdd22bb7aa52 1812 for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
emilmont 1:fdd22bb7aa52 1813 {
emilmont 1:fdd22bb7aa52 1814 /* index calculation for the input as, */
emilmont 1:fdd22bb7aa52 1815 /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
emilmont 1:fdd22bb7aa52 1816 i1 = i0 + n2;
emilmont 1:fdd22bb7aa52 1817 i2 = i1 + n2;
emilmont 1:fdd22bb7aa52 1818 i3 = i2 + n2;
emilmont 1:fdd22bb7aa52 1819
emilmont 1:fdd22bb7aa52 1820 /* Reading i0, i0+fftLen/2 inputs */
emilmont 1:fdd22bb7aa52 1821 /* Read ya (real), xa(imag) input */
emilmont 1:fdd22bb7aa52 1822 T0 = pSrc16[i0 * 2u];
emilmont 1:fdd22bb7aa52 1823 T1 = pSrc16[(i0 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1824 /* Read yc (real), xc(imag) input */
emilmont 1:fdd22bb7aa52 1825 S0 = pSrc16[i2 * 2u];
emilmont 1:fdd22bb7aa52 1826 S1 = pSrc16[(i2 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1827
emilmont 1:fdd22bb7aa52 1828 /* R0 = (ya + yc), R1 = (xa + xc) */
emilmont 1:fdd22bb7aa52 1829 R0 = __SSAT(T0 + S0, 16u);
emilmont 1:fdd22bb7aa52 1830 R1 = __SSAT(T1 + S1, 16u);
emilmont 1:fdd22bb7aa52 1831 /* S0 = (ya - yc), S1 = (xa - xc) */
emilmont 1:fdd22bb7aa52 1832 S0 = __SSAT(T0 - S0, 16u);
emilmont 1:fdd22bb7aa52 1833 S1 = __SSAT(T1 - S1, 16u);
emilmont 1:fdd22bb7aa52 1834
emilmont 1:fdd22bb7aa52 1835 /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
emilmont 1:fdd22bb7aa52 1836 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1837 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 1838 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1839 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1840 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 1841 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1842
emilmont 1:fdd22bb7aa52 1843 /* T0 = (yb + yd), T1 = (xb + xd) */
emilmont 1:fdd22bb7aa52 1844 T0 = __SSAT(T0 + U0, 16u);
emilmont 1:fdd22bb7aa52 1845 T1 = __SSAT(T1 + U1, 16u);
emilmont 1:fdd22bb7aa52 1846
emilmont 1:fdd22bb7aa52 1847 /* writing the butterfly processed i0 sample */
emilmont 1:fdd22bb7aa52 1848 /* xa' = xa + xb + xc + xd */
emilmont 1:fdd22bb7aa52 1849 /* ya' = ya + yb + yc + yd */
emilmont 1:fdd22bb7aa52 1850 pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1851 pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1852
emilmont 1:fdd22bb7aa52 1853 /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
emilmont 1:fdd22bb7aa52 1854 R0 = (R0 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1855 R1 = (R1 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1856
emilmont 1:fdd22bb7aa52 1857 /* Read yb (real), xb(imag) input */
emilmont 1:fdd22bb7aa52 1858 T0 = pSrc16[i1 * 2u];
emilmont 1:fdd22bb7aa52 1859 T1 = pSrc16[(i1 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1860
emilmont 1:fdd22bb7aa52 1861 /* writing the butterfly processed i0 + fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1862 /* xc' = (xa-xb+xc-xd) */
emilmont 1:fdd22bb7aa52 1863 /* yc' = (ya-yb+yc-yd) */
emilmont 1:fdd22bb7aa52 1864 pSrc16[i1 * 2u] = R0;
emilmont 1:fdd22bb7aa52 1865 pSrc16[(i1 * 2u) + 1u] = R1;
emilmont 1:fdd22bb7aa52 1866
emilmont 1:fdd22bb7aa52 1867 /* Read yd (real), xd(imag) input */
emilmont 1:fdd22bb7aa52 1868 U0 = pSrc16[i3 * 2u];
emilmont 1:fdd22bb7aa52 1869 U1 = pSrc16[(i3 * 2u) + 1u];
emilmont 1:fdd22bb7aa52 1870 /* T0 = (yb - yd), T1 = (xb - xd) */
emilmont 1:fdd22bb7aa52 1871 T0 = __SSAT(T0 - U0, 16u);
emilmont 1:fdd22bb7aa52 1872 T1 = __SSAT(T1 - U1, 16u);
emilmont 1:fdd22bb7aa52 1873
emilmont 1:fdd22bb7aa52 1874 /* writing the butterfly processed i0 + fftLen/2 sample */
emilmont 1:fdd22bb7aa52 1875 /* xb' = (xa-yb-xc+yd) */
emilmont 1:fdd22bb7aa52 1876 /* yb' = (ya+xb-yc-xd) */
emilmont 1:fdd22bb7aa52 1877 pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1878 pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1879
emilmont 1:fdd22bb7aa52 1880
emilmont 1:fdd22bb7aa52 1881 /* writing the butterfly processed i0 + 3fftLen/4 sample */
emilmont 1:fdd22bb7aa52 1882 /* xd' = (xa+yb-xc-yd) */
emilmont 1:fdd22bb7aa52 1883 /* yd' = (ya-xb-yc+xd) */
emilmont 1:fdd22bb7aa52 1884 pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u);
emilmont 1:fdd22bb7aa52 1885 pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
emilmont 1:fdd22bb7aa52 1886 }
emilmont 1:fdd22bb7aa52 1887 /* end of last stage process */
emilmont 1:fdd22bb7aa52 1888
emilmont 1:fdd22bb7aa52 1889 /* output is in 11.5(q5) format for the 1024 point */
emilmont 1:fdd22bb7aa52 1890 /* output is in 9.7(q7) format for the 256 point */
emilmont 1:fdd22bb7aa52 1891 /* output is in 7.9(q9) format for the 64 point */
emilmont 1:fdd22bb7aa52 1892 /* output is in 5.11(q11) format for the 16 point */
emilmont 1:fdd22bb7aa52 1893
emilmont 1:fdd22bb7aa52 1894 #endif /* #ifndef ARM_MATH_CM0 */
emilmont 1:fdd22bb7aa52 1895
emilmont 1:fdd22bb7aa52 1896 }