The CMSIS DSP 5 library
Dependents: Nucleo-Heart-Rate ejercicioVrms2 PROYECTOFINAL ejercicioVrms ... more
functions/TransformFunctions/arm_cfft_radix4_q15.c@3:4098b9d3d571, 2018-06-21 (annotated)
- Committer:
- xorjoep
- Date:
- Thu Jun 21 11:56:27 2018 +0000
- Revision:
- 3:4098b9d3d571
- Parent:
- 1:24714b45cd1b
headers is a folder not a library
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
xorjoep | 1:24714b45cd1b | 1 | /* ---------------------------------------------------------------------- |
xorjoep | 1:24714b45cd1b | 2 | * Project: CMSIS DSP Library |
xorjoep | 1:24714b45cd1b | 3 | * Title: arm_cfft_radix4_q15.c |
xorjoep | 1:24714b45cd1b | 4 | * Description: This file has function definition of Radix-4 FFT & IFFT function and |
xorjoep | 1:24714b45cd1b | 5 | * In-place bit reversal using bit reversal table |
xorjoep | 1:24714b45cd1b | 6 | * |
xorjoep | 1:24714b45cd1b | 7 | * $Date: 27. January 2017 |
xorjoep | 1:24714b45cd1b | 8 | * $Revision: V.1.5.1 |
xorjoep | 1:24714b45cd1b | 9 | * |
xorjoep | 1:24714b45cd1b | 10 | * Target Processor: Cortex-M cores |
xorjoep | 1:24714b45cd1b | 11 | * -------------------------------------------------------------------- */ |
xorjoep | 1:24714b45cd1b | 12 | /* |
xorjoep | 1:24714b45cd1b | 13 | * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. |
xorjoep | 1:24714b45cd1b | 14 | * |
xorjoep | 1:24714b45cd1b | 15 | * SPDX-License-Identifier: Apache-2.0 |
xorjoep | 1:24714b45cd1b | 16 | * |
xorjoep | 1:24714b45cd1b | 17 | * Licensed under the Apache License, Version 2.0 (the License); you may |
xorjoep | 1:24714b45cd1b | 18 | * not use this file except in compliance with the License. |
xorjoep | 1:24714b45cd1b | 19 | * You may obtain a copy of the License at |
xorjoep | 1:24714b45cd1b | 20 | * |
xorjoep | 1:24714b45cd1b | 21 | * www.apache.org/licenses/LICENSE-2.0 |
xorjoep | 1:24714b45cd1b | 22 | * |
xorjoep | 1:24714b45cd1b | 23 | * Unless required by applicable law or agreed to in writing, software |
xorjoep | 1:24714b45cd1b | 24 | * distributed under the License is distributed on an AS IS BASIS, WITHOUT |
xorjoep | 1:24714b45cd1b | 25 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
xorjoep | 1:24714b45cd1b | 26 | * See the License for the specific language governing permissions and |
xorjoep | 1:24714b45cd1b | 27 | * limitations under the License. |
xorjoep | 1:24714b45cd1b | 28 | */ |
xorjoep | 1:24714b45cd1b | 29 | |
xorjoep | 1:24714b45cd1b | 30 | #include "arm_math.h" |
xorjoep | 1:24714b45cd1b | 31 | |
xorjoep | 1:24714b45cd1b | 32 | |
xorjoep | 1:24714b45cd1b | 33 | void arm_radix4_butterfly_q15( |
xorjoep | 1:24714b45cd1b | 34 | q15_t * pSrc16, |
xorjoep | 1:24714b45cd1b | 35 | uint32_t fftLen, |
xorjoep | 1:24714b45cd1b | 36 | q15_t * pCoef16, |
xorjoep | 1:24714b45cd1b | 37 | uint32_t twidCoefModifier); |
xorjoep | 1:24714b45cd1b | 38 | |
xorjoep | 1:24714b45cd1b | 39 | void arm_radix4_butterfly_inverse_q15( |
xorjoep | 1:24714b45cd1b | 40 | q15_t * pSrc16, |
xorjoep | 1:24714b45cd1b | 41 | uint32_t fftLen, |
xorjoep | 1:24714b45cd1b | 42 | q15_t * pCoef16, |
xorjoep | 1:24714b45cd1b | 43 | uint32_t twidCoefModifier); |
xorjoep | 1:24714b45cd1b | 44 | |
xorjoep | 1:24714b45cd1b | 45 | void arm_bitreversal_q15( |
xorjoep | 1:24714b45cd1b | 46 | q15_t * pSrc, |
xorjoep | 1:24714b45cd1b | 47 | uint32_t fftLen, |
xorjoep | 1:24714b45cd1b | 48 | uint16_t bitRevFactor, |
xorjoep | 1:24714b45cd1b | 49 | uint16_t * pBitRevTab); |
xorjoep | 1:24714b45cd1b | 50 | |
xorjoep | 1:24714b45cd1b | 51 | /** |
xorjoep | 1:24714b45cd1b | 52 | * @ingroup groupTransforms |
xorjoep | 1:24714b45cd1b | 53 | */ |
xorjoep | 1:24714b45cd1b | 54 | |
xorjoep | 1:24714b45cd1b | 55 | /** |
xorjoep | 1:24714b45cd1b | 56 | * @addtogroup ComplexFFT |
xorjoep | 1:24714b45cd1b | 57 | * @{ |
xorjoep | 1:24714b45cd1b | 58 | */ |
xorjoep | 1:24714b45cd1b | 59 | |
xorjoep | 1:24714b45cd1b | 60 | |
xorjoep | 1:24714b45cd1b | 61 | /** |
xorjoep | 1:24714b45cd1b | 62 | * @details |
xorjoep | 1:24714b45cd1b | 63 | * @brief Processing function for the Q15 CFFT/CIFFT. |
xorjoep | 1:24714b45cd1b | 64 | * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed |
xorjoep | 1:24714b45cd1b | 65 | * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure. |
xorjoep | 1:24714b45cd1b | 66 | * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place. |
xorjoep | 1:24714b45cd1b | 67 | * @return none. |
xorjoep | 1:24714b45cd1b | 68 | * |
xorjoep | 1:24714b45cd1b | 69 | * \par Input and output formats: |
xorjoep | 1:24714b45cd1b | 70 | * \par |
xorjoep | 1:24714b45cd1b | 71 | * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. |
xorjoep | 1:24714b45cd1b | 72 | * Hence the output format is different for different FFT sizes. |
xorjoep | 1:24714b45cd1b | 73 | * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: |
xorjoep | 1:24714b45cd1b | 74 | * \par |
xorjoep | 1:24714b45cd1b | 75 | * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT" |
xorjoep | 1:24714b45cd1b | 76 | * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT" |
xorjoep | 1:24714b45cd1b | 77 | */ |
xorjoep | 1:24714b45cd1b | 78 | |
xorjoep | 1:24714b45cd1b | 79 | void arm_cfft_radix4_q15( |
xorjoep | 1:24714b45cd1b | 80 | const arm_cfft_radix4_instance_q15 * S, |
xorjoep | 1:24714b45cd1b | 81 | q15_t * pSrc) |
xorjoep | 1:24714b45cd1b | 82 | { |
xorjoep | 1:24714b45cd1b | 83 | if (S->ifftFlag == 1U) |
xorjoep | 1:24714b45cd1b | 84 | { |
xorjoep | 1:24714b45cd1b | 85 | /* Complex IFFT radix-4 */ |
xorjoep | 1:24714b45cd1b | 86 | arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); |
xorjoep | 1:24714b45cd1b | 87 | } |
xorjoep | 1:24714b45cd1b | 88 | else |
xorjoep | 1:24714b45cd1b | 89 | { |
xorjoep | 1:24714b45cd1b | 90 | /* Complex FFT radix-4 */ |
xorjoep | 1:24714b45cd1b | 91 | arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); |
xorjoep | 1:24714b45cd1b | 92 | } |
xorjoep | 1:24714b45cd1b | 93 | |
xorjoep | 1:24714b45cd1b | 94 | if (S->bitReverseFlag == 1U) |
xorjoep | 1:24714b45cd1b | 95 | { |
xorjoep | 1:24714b45cd1b | 96 | /* Bit Reversal */ |
xorjoep | 1:24714b45cd1b | 97 | arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); |
xorjoep | 1:24714b45cd1b | 98 | } |
xorjoep | 1:24714b45cd1b | 99 | |
xorjoep | 1:24714b45cd1b | 100 | } |
xorjoep | 1:24714b45cd1b | 101 | |
xorjoep | 1:24714b45cd1b | 102 | /** |
xorjoep | 1:24714b45cd1b | 103 | * @} end of ComplexFFT group |
xorjoep | 1:24714b45cd1b | 104 | */ |
xorjoep | 1:24714b45cd1b | 105 | |
xorjoep | 1:24714b45cd1b | 106 | /* |
xorjoep | 1:24714b45cd1b | 107 | * Radix-4 FFT algorithm used is : |
xorjoep | 1:24714b45cd1b | 108 | * |
xorjoep | 1:24714b45cd1b | 109 | * Input real and imaginary data: |
xorjoep | 1:24714b45cd1b | 110 | * x(n) = xa + j * ya |
xorjoep | 1:24714b45cd1b | 111 | * x(n+N/4 ) = xb + j * yb |
xorjoep | 1:24714b45cd1b | 112 | * x(n+N/2 ) = xc + j * yc |
xorjoep | 1:24714b45cd1b | 113 | * x(n+3N 4) = xd + j * yd |
xorjoep | 1:24714b45cd1b | 114 | * |
xorjoep | 1:24714b45cd1b | 115 | * |
xorjoep | 1:24714b45cd1b | 116 | * Output real and imaginary data: |
xorjoep | 1:24714b45cd1b | 117 | * x(4r) = xa'+ j * ya' |
xorjoep | 1:24714b45cd1b | 118 | * x(4r+1) = xb'+ j * yb' |
xorjoep | 1:24714b45cd1b | 119 | * x(4r+2) = xc'+ j * yc' |
xorjoep | 1:24714b45cd1b | 120 | * x(4r+3) = xd'+ j * yd' |
xorjoep | 1:24714b45cd1b | 121 | * |
xorjoep | 1:24714b45cd1b | 122 | * |
xorjoep | 1:24714b45cd1b | 123 | * Twiddle factors for radix-4 FFT: |
xorjoep | 1:24714b45cd1b | 124 | * Wn = co1 + j * (- si1) |
xorjoep | 1:24714b45cd1b | 125 | * W2n = co2 + j * (- si2) |
xorjoep | 1:24714b45cd1b | 126 | * W3n = co3 + j * (- si3) |
xorjoep | 1:24714b45cd1b | 127 | |
xorjoep | 1:24714b45cd1b | 128 | * The real and imaginary output values for the radix-4 butterfly are |
xorjoep | 1:24714b45cd1b | 129 | * xa' = xa + xb + xc + xd |
xorjoep | 1:24714b45cd1b | 130 | * ya' = ya + yb + yc + yd |
xorjoep | 1:24714b45cd1b | 131 | * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) |
xorjoep | 1:24714b45cd1b | 132 | * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) |
xorjoep | 1:24714b45cd1b | 133 | * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) |
xorjoep | 1:24714b45cd1b | 134 | * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) |
xorjoep | 1:24714b45cd1b | 135 | * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) |
xorjoep | 1:24714b45cd1b | 136 | * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) |
xorjoep | 1:24714b45cd1b | 137 | * |
xorjoep | 1:24714b45cd1b | 138 | */ |
xorjoep | 1:24714b45cd1b | 139 | |
xorjoep | 1:24714b45cd1b | 140 | /** |
xorjoep | 1:24714b45cd1b | 141 | * @brief Core function for the Q15 CFFT butterfly process. |
xorjoep | 1:24714b45cd1b | 142 | * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. |
xorjoep | 1:24714b45cd1b | 143 | * @param[in] fftLen length of the FFT. |
xorjoep | 1:24714b45cd1b | 144 | * @param[in] *pCoef16 points to twiddle coefficient buffer. |
xorjoep | 1:24714b45cd1b | 145 | * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. |
xorjoep | 1:24714b45cd1b | 146 | * @return none. |
xorjoep | 1:24714b45cd1b | 147 | */ |
xorjoep | 1:24714b45cd1b | 148 | |
xorjoep | 1:24714b45cd1b | 149 | void arm_radix4_butterfly_q15( |
xorjoep | 1:24714b45cd1b | 150 | q15_t * pSrc16, |
xorjoep | 1:24714b45cd1b | 151 | uint32_t fftLen, |
xorjoep | 1:24714b45cd1b | 152 | q15_t * pCoef16, |
xorjoep | 1:24714b45cd1b | 153 | uint32_t twidCoefModifier) |
xorjoep | 1:24714b45cd1b | 154 | { |
xorjoep | 1:24714b45cd1b | 155 | |
xorjoep | 1:24714b45cd1b | 156 | #if defined (ARM_MATH_DSP) |
xorjoep | 1:24714b45cd1b | 157 | |
xorjoep | 1:24714b45cd1b | 158 | /* Run the below code for Cortex-M4 and Cortex-M3 */ |
xorjoep | 1:24714b45cd1b | 159 | |
xorjoep | 1:24714b45cd1b | 160 | q31_t R, S, T, U; |
xorjoep | 1:24714b45cd1b | 161 | q31_t C1, C2, C3, out1, out2; |
xorjoep | 1:24714b45cd1b | 162 | uint32_t n1, n2, ic, i0, j, k; |
xorjoep | 1:24714b45cd1b | 163 | |
xorjoep | 1:24714b45cd1b | 164 | q15_t *ptr1; |
xorjoep | 1:24714b45cd1b | 165 | q15_t *pSi0; |
xorjoep | 1:24714b45cd1b | 166 | q15_t *pSi1; |
xorjoep | 1:24714b45cd1b | 167 | q15_t *pSi2; |
xorjoep | 1:24714b45cd1b | 168 | q15_t *pSi3; |
xorjoep | 1:24714b45cd1b | 169 | |
xorjoep | 1:24714b45cd1b | 170 | q31_t xaya, xbyb, xcyc, xdyd; |
xorjoep | 1:24714b45cd1b | 171 | |
xorjoep | 1:24714b45cd1b | 172 | /* Total process is divided into three stages */ |
xorjoep | 1:24714b45cd1b | 173 | |
xorjoep | 1:24714b45cd1b | 174 | /* process first stage, middle stages, & last stage */ |
xorjoep | 1:24714b45cd1b | 175 | |
xorjoep | 1:24714b45cd1b | 176 | /* Initializations for the first stage */ |
xorjoep | 1:24714b45cd1b | 177 | n2 = fftLen; |
xorjoep | 1:24714b45cd1b | 178 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 179 | |
xorjoep | 1:24714b45cd1b | 180 | /* n2 = fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 181 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 182 | |
xorjoep | 1:24714b45cd1b | 183 | /* Index for twiddle coefficient */ |
xorjoep | 1:24714b45cd1b | 184 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 185 | |
xorjoep | 1:24714b45cd1b | 186 | /* Index for input read and output write */ |
xorjoep | 1:24714b45cd1b | 187 | j = n2; |
xorjoep | 1:24714b45cd1b | 188 | |
xorjoep | 1:24714b45cd1b | 189 | pSi0 = pSrc16; |
xorjoep | 1:24714b45cd1b | 190 | pSi1 = pSi0 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 191 | pSi2 = pSi1 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 192 | pSi3 = pSi2 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 193 | |
xorjoep | 1:24714b45cd1b | 194 | /* Input is in 1.15(q15) format */ |
xorjoep | 1:24714b45cd1b | 195 | |
xorjoep | 1:24714b45cd1b | 196 | /* start of first stage process */ |
xorjoep | 1:24714b45cd1b | 197 | do |
xorjoep | 1:24714b45cd1b | 198 | { |
xorjoep | 1:24714b45cd1b | 199 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 200 | |
xorjoep | 1:24714b45cd1b | 201 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 202 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 203 | T = _SIMD32_OFFSET(pSi0); |
xorjoep | 1:24714b45cd1b | 204 | T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1 |
xorjoep | 1:24714b45cd1b | 205 | T = __SHADD16(T, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles |
xorjoep | 1:24714b45cd1b | 206 | //in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles |
xorjoep | 1:24714b45cd1b | 207 | //T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); |
xorjoep | 1:24714b45cd1b | 208 | |
xorjoep | 1:24714b45cd1b | 209 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 210 | S = _SIMD32_OFFSET(pSi2); |
xorjoep | 1:24714b45cd1b | 211 | S = __SHADD16(S, 0); |
xorjoep | 1:24714b45cd1b | 212 | S = __SHADD16(S, 0); |
xorjoep | 1:24714b45cd1b | 213 | |
xorjoep | 1:24714b45cd1b | 214 | /* R = packed((ya + yc), (xa + xc) ) */ |
xorjoep | 1:24714b45cd1b | 215 | R = __QADD16(T, S); |
xorjoep | 1:24714b45cd1b | 216 | |
xorjoep | 1:24714b45cd1b | 217 | /* S = packed((ya - yc), (xa - xc) ) */ |
xorjoep | 1:24714b45cd1b | 218 | S = __QSUB16(T, S); |
xorjoep | 1:24714b45cd1b | 219 | |
xorjoep | 1:24714b45cd1b | 220 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 221 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 222 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 223 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 224 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 225 | |
xorjoep | 1:24714b45cd1b | 226 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 227 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 228 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 229 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 230 | |
xorjoep | 1:24714b45cd1b | 231 | /* T = packed((yb + yd), (xb + xd) ) */ |
xorjoep | 1:24714b45cd1b | 232 | T = __QADD16(T, U); |
xorjoep | 1:24714b45cd1b | 233 | |
xorjoep | 1:24714b45cd1b | 234 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 235 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 236 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 237 | _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); |
xorjoep | 1:24714b45cd1b | 238 | pSi0 += 2; |
xorjoep | 1:24714b45cd1b | 239 | |
xorjoep | 1:24714b45cd1b | 240 | /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 241 | R = __QSUB16(R, T); |
xorjoep | 1:24714b45cd1b | 242 | |
xorjoep | 1:24714b45cd1b | 243 | /* co2 & si2 are read from SIMD Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 244 | C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); |
xorjoep | 1:24714b45cd1b | 245 | |
xorjoep | 1:24714b45cd1b | 246 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 247 | |
xorjoep | 1:24714b45cd1b | 248 | /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 249 | out1 = __SMUAD(C2, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 250 | /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 251 | out2 = __SMUSDX(C2, R); |
xorjoep | 1:24714b45cd1b | 252 | |
xorjoep | 1:24714b45cd1b | 253 | #else |
xorjoep | 1:24714b45cd1b | 254 | |
xorjoep | 1:24714b45cd1b | 255 | /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 256 | out1 = __SMUSDX(R, C2) >> 16U; |
xorjoep | 1:24714b45cd1b | 257 | /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 258 | out2 = __SMUAD(C2, R); |
xorjoep | 1:24714b45cd1b | 259 | |
xorjoep | 1:24714b45cd1b | 260 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 261 | |
xorjoep | 1:24714b45cd1b | 262 | /* Reading i0+fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 263 | /* T = packed(yb, xb) */ |
xorjoep | 1:24714b45cd1b | 264 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 265 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 266 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 267 | |
xorjoep | 1:24714b45cd1b | 268 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 269 | /* writing output(xc', yc') in little endian format */ |
xorjoep | 1:24714b45cd1b | 270 | _SIMD32_OFFSET(pSi1) = |
xorjoep | 1:24714b45cd1b | 271 | (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 272 | pSi1 += 2; |
xorjoep | 1:24714b45cd1b | 273 | |
xorjoep | 1:24714b45cd1b | 274 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 275 | /* U = packed(yd, xd) */ |
xorjoep | 1:24714b45cd1b | 276 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 277 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 278 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 279 | |
xorjoep | 1:24714b45cd1b | 280 | /* T = packed(yb-yd, xb-xd) */ |
xorjoep | 1:24714b45cd1b | 281 | T = __QSUB16(T, U); |
xorjoep | 1:24714b45cd1b | 282 | |
xorjoep | 1:24714b45cd1b | 283 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 284 | |
xorjoep | 1:24714b45cd1b | 285 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 286 | R = __QASX(S, T); |
xorjoep | 1:24714b45cd1b | 287 | /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 288 | S = __QSAX(S, T); |
xorjoep | 1:24714b45cd1b | 289 | |
xorjoep | 1:24714b45cd1b | 290 | #else |
xorjoep | 1:24714b45cd1b | 291 | |
xorjoep | 1:24714b45cd1b | 292 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 293 | R = __QSAX(S, T); |
xorjoep | 1:24714b45cd1b | 294 | /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 295 | S = __QASX(S, T); |
xorjoep | 1:24714b45cd1b | 296 | |
xorjoep | 1:24714b45cd1b | 297 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 298 | |
xorjoep | 1:24714b45cd1b | 299 | /* co1 & si1 are read from SIMD Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 300 | C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); |
xorjoep | 1:24714b45cd1b | 301 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 302 | |
xorjoep | 1:24714b45cd1b | 303 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 304 | |
xorjoep | 1:24714b45cd1b | 305 | /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 306 | out1 = __SMUAD(C1, S) >> 16U; |
xorjoep | 1:24714b45cd1b | 307 | /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 308 | out2 = __SMUSDX(C1, S); |
xorjoep | 1:24714b45cd1b | 309 | |
xorjoep | 1:24714b45cd1b | 310 | #else |
xorjoep | 1:24714b45cd1b | 311 | |
xorjoep | 1:24714b45cd1b | 312 | /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 313 | out1 = __SMUSDX(S, C1) >> 16U; |
xorjoep | 1:24714b45cd1b | 314 | /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 315 | out2 = __SMUAD(C1, S); |
xorjoep | 1:24714b45cd1b | 316 | |
xorjoep | 1:24714b45cd1b | 317 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 318 | |
xorjoep | 1:24714b45cd1b | 319 | /* writing output(xb', yb') in little endian format */ |
xorjoep | 1:24714b45cd1b | 320 | _SIMD32_OFFSET(pSi2) = |
xorjoep | 1:24714b45cd1b | 321 | ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 322 | pSi2 += 2; |
xorjoep | 1:24714b45cd1b | 323 | |
xorjoep | 1:24714b45cd1b | 324 | |
xorjoep | 1:24714b45cd1b | 325 | /* co3 & si3 are read from SIMD Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 326 | C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); |
xorjoep | 1:24714b45cd1b | 327 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 328 | |
xorjoep | 1:24714b45cd1b | 329 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 330 | |
xorjoep | 1:24714b45cd1b | 331 | /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 332 | out1 = __SMUAD(C3, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 333 | /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 334 | out2 = __SMUSDX(C3, R); |
xorjoep | 1:24714b45cd1b | 335 | |
xorjoep | 1:24714b45cd1b | 336 | #else |
xorjoep | 1:24714b45cd1b | 337 | |
xorjoep | 1:24714b45cd1b | 338 | /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 339 | out1 = __SMUSDX(R, C3) >> 16U; |
xorjoep | 1:24714b45cd1b | 340 | /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 341 | out2 = __SMUAD(C3, R); |
xorjoep | 1:24714b45cd1b | 342 | |
xorjoep | 1:24714b45cd1b | 343 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 344 | |
xorjoep | 1:24714b45cd1b | 345 | /* writing output(xd', yd') in little endian format */ |
xorjoep | 1:24714b45cd1b | 346 | _SIMD32_OFFSET(pSi3) = |
xorjoep | 1:24714b45cd1b | 347 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 348 | pSi3 += 2; |
xorjoep | 1:24714b45cd1b | 349 | |
xorjoep | 1:24714b45cd1b | 350 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 351 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 352 | |
xorjoep | 1:24714b45cd1b | 353 | } while (--j); |
xorjoep | 1:24714b45cd1b | 354 | /* data is in 4.11(q11) format */ |
xorjoep | 1:24714b45cd1b | 355 | |
xorjoep | 1:24714b45cd1b | 356 | /* end of first stage process */ |
xorjoep | 1:24714b45cd1b | 357 | |
xorjoep | 1:24714b45cd1b | 358 | |
xorjoep | 1:24714b45cd1b | 359 | /* start of middle stage process */ |
xorjoep | 1:24714b45cd1b | 360 | |
xorjoep | 1:24714b45cd1b | 361 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 362 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 363 | |
xorjoep | 1:24714b45cd1b | 364 | /* Calculation of Middle stage */ |
xorjoep | 1:24714b45cd1b | 365 | for (k = fftLen / 4U; k > 4U; k >>= 2U) |
xorjoep | 1:24714b45cd1b | 366 | { |
xorjoep | 1:24714b45cd1b | 367 | /* Initializations for the middle stage */ |
xorjoep | 1:24714b45cd1b | 368 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 369 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 370 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 371 | |
xorjoep | 1:24714b45cd1b | 372 | for (j = 0U; j <= (n2 - 1U); j++) |
xorjoep | 1:24714b45cd1b | 373 | { |
xorjoep | 1:24714b45cd1b | 374 | /* index calculation for the coefficients */ |
xorjoep | 1:24714b45cd1b | 375 | C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); |
xorjoep | 1:24714b45cd1b | 376 | C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); |
xorjoep | 1:24714b45cd1b | 377 | C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); |
xorjoep | 1:24714b45cd1b | 378 | |
xorjoep | 1:24714b45cd1b | 379 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 380 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 381 | |
xorjoep | 1:24714b45cd1b | 382 | pSi0 = pSrc16 + 2 * j; |
xorjoep | 1:24714b45cd1b | 383 | pSi1 = pSi0 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 384 | pSi2 = pSi1 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 385 | pSi3 = pSi2 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 386 | |
xorjoep | 1:24714b45cd1b | 387 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 388 | for (i0 = j; i0 < fftLen; i0 += n1) |
xorjoep | 1:24714b45cd1b | 389 | { |
xorjoep | 1:24714b45cd1b | 390 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 391 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 392 | T = _SIMD32_OFFSET(pSi0); |
xorjoep | 1:24714b45cd1b | 393 | |
xorjoep | 1:24714b45cd1b | 394 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 395 | S = _SIMD32_OFFSET(pSi2); |
xorjoep | 1:24714b45cd1b | 396 | |
xorjoep | 1:24714b45cd1b | 397 | /* R = packed( (ya + yc), (xa + xc)) */ |
xorjoep | 1:24714b45cd1b | 398 | R = __QADD16(T, S); |
xorjoep | 1:24714b45cd1b | 399 | |
xorjoep | 1:24714b45cd1b | 400 | /* S = packed((ya - yc), (xa - xc)) */ |
xorjoep | 1:24714b45cd1b | 401 | S = __QSUB16(T, S); |
xorjoep | 1:24714b45cd1b | 402 | |
xorjoep | 1:24714b45cd1b | 403 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 404 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 405 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 406 | |
xorjoep | 1:24714b45cd1b | 407 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 408 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 409 | |
xorjoep | 1:24714b45cd1b | 410 | /* T = packed( (yb + yd), (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 411 | T = __QADD16(T, U); |
xorjoep | 1:24714b45cd1b | 412 | |
xorjoep | 1:24714b45cd1b | 413 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 414 | |
xorjoep | 1:24714b45cd1b | 415 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 416 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 417 | out1 = __SHADD16(R, T); |
xorjoep | 1:24714b45cd1b | 418 | out1 = __SHADD16(out1, 0); |
xorjoep | 1:24714b45cd1b | 419 | _SIMD32_OFFSET(pSi0) = out1; |
xorjoep | 1:24714b45cd1b | 420 | pSi0 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 421 | |
xorjoep | 1:24714b45cd1b | 422 | /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 423 | R = __SHSUB16(R, T); |
xorjoep | 1:24714b45cd1b | 424 | |
xorjoep | 1:24714b45cd1b | 425 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 426 | |
xorjoep | 1:24714b45cd1b | 427 | /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ |
xorjoep | 1:24714b45cd1b | 428 | out1 = __SMUAD(C2, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 429 | |
xorjoep | 1:24714b45cd1b | 430 | /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 431 | out2 = __SMUSDX(C2, R); |
xorjoep | 1:24714b45cd1b | 432 | |
xorjoep | 1:24714b45cd1b | 433 | #else |
xorjoep | 1:24714b45cd1b | 434 | |
xorjoep | 1:24714b45cd1b | 435 | /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 436 | out1 = __SMUSDX(R, C2) >> 16U; |
xorjoep | 1:24714b45cd1b | 437 | |
xorjoep | 1:24714b45cd1b | 438 | /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ |
xorjoep | 1:24714b45cd1b | 439 | out2 = __SMUAD(C2, R); |
xorjoep | 1:24714b45cd1b | 440 | |
xorjoep | 1:24714b45cd1b | 441 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 442 | |
xorjoep | 1:24714b45cd1b | 443 | /* Reading i0+3fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 444 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 445 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 446 | |
xorjoep | 1:24714b45cd1b | 447 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 448 | /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 449 | /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 450 | _SIMD32_OFFSET(pSi1) = |
xorjoep | 1:24714b45cd1b | 451 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 452 | pSi1 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 453 | |
xorjoep | 1:24714b45cd1b | 454 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 455 | |
xorjoep | 1:24714b45cd1b | 456 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 457 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 458 | |
xorjoep | 1:24714b45cd1b | 459 | /* T = packed(yb-yd, xb-xd) */ |
xorjoep | 1:24714b45cd1b | 460 | T = __QSUB16(T, U); |
xorjoep | 1:24714b45cd1b | 461 | |
xorjoep | 1:24714b45cd1b | 462 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 463 | |
xorjoep | 1:24714b45cd1b | 464 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 465 | R = __SHASX(S, T); |
xorjoep | 1:24714b45cd1b | 466 | |
xorjoep | 1:24714b45cd1b | 467 | /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 468 | S = __SHSAX(S, T); |
xorjoep | 1:24714b45cd1b | 469 | |
xorjoep | 1:24714b45cd1b | 470 | |
xorjoep | 1:24714b45cd1b | 471 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 472 | out1 = __SMUAD(C1, S) >> 16U; |
xorjoep | 1:24714b45cd1b | 473 | out2 = __SMUSDX(C1, S); |
xorjoep | 1:24714b45cd1b | 474 | |
xorjoep | 1:24714b45cd1b | 475 | #else |
xorjoep | 1:24714b45cd1b | 476 | |
xorjoep | 1:24714b45cd1b | 477 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 478 | R = __SHSAX(S, T); |
xorjoep | 1:24714b45cd1b | 479 | |
xorjoep | 1:24714b45cd1b | 480 | /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 481 | S = __SHASX(S, T); |
xorjoep | 1:24714b45cd1b | 482 | |
xorjoep | 1:24714b45cd1b | 483 | |
xorjoep | 1:24714b45cd1b | 484 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 485 | out1 = __SMUSDX(S, C1) >> 16U; |
xorjoep | 1:24714b45cd1b | 486 | out2 = __SMUAD(C1, S); |
xorjoep | 1:24714b45cd1b | 487 | |
xorjoep | 1:24714b45cd1b | 488 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 489 | |
xorjoep | 1:24714b45cd1b | 490 | /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 491 | /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 492 | _SIMD32_OFFSET(pSi2) = |
xorjoep | 1:24714b45cd1b | 493 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 494 | pSi2 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 495 | |
xorjoep | 1:24714b45cd1b | 496 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 497 | |
xorjoep | 1:24714b45cd1b | 498 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 499 | |
xorjoep | 1:24714b45cd1b | 500 | out1 = __SMUAD(C3, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 501 | out2 = __SMUSDX(C3, R); |
xorjoep | 1:24714b45cd1b | 502 | |
xorjoep | 1:24714b45cd1b | 503 | #else |
xorjoep | 1:24714b45cd1b | 504 | |
xorjoep | 1:24714b45cd1b | 505 | out1 = __SMUSDX(R, C3) >> 16U; |
xorjoep | 1:24714b45cd1b | 506 | out2 = __SMUAD(C3, R); |
xorjoep | 1:24714b45cd1b | 507 | |
xorjoep | 1:24714b45cd1b | 508 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 509 | |
xorjoep | 1:24714b45cd1b | 510 | /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 511 | /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 512 | _SIMD32_OFFSET(pSi3) = |
xorjoep | 1:24714b45cd1b | 513 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 514 | pSi3 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 515 | } |
xorjoep | 1:24714b45cd1b | 516 | } |
xorjoep | 1:24714b45cd1b | 517 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 518 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 519 | } |
xorjoep | 1:24714b45cd1b | 520 | /* end of middle stage process */ |
xorjoep | 1:24714b45cd1b | 521 | |
xorjoep | 1:24714b45cd1b | 522 | |
xorjoep | 1:24714b45cd1b | 523 | /* data is in 10.6(q6) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 524 | /* data is in 8.8(q8) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 525 | /* data is in 6.10(q10) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 526 | /* data is in 4.12(q12) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 527 | |
xorjoep | 1:24714b45cd1b | 528 | /* Initializations for the last stage */ |
xorjoep | 1:24714b45cd1b | 529 | j = fftLen >> 2; |
xorjoep | 1:24714b45cd1b | 530 | |
xorjoep | 1:24714b45cd1b | 531 | ptr1 = &pSrc16[0]; |
xorjoep | 1:24714b45cd1b | 532 | |
xorjoep | 1:24714b45cd1b | 533 | /* start of last stage process */ |
xorjoep | 1:24714b45cd1b | 534 | |
xorjoep | 1:24714b45cd1b | 535 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 536 | do |
xorjoep | 1:24714b45cd1b | 537 | { |
xorjoep | 1:24714b45cd1b | 538 | /* Read xa (real), ya(imag) input */ |
xorjoep | 1:24714b45cd1b | 539 | xaya = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 540 | |
xorjoep | 1:24714b45cd1b | 541 | /* Read xb (real), yb(imag) input */ |
xorjoep | 1:24714b45cd1b | 542 | xbyb = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 543 | |
xorjoep | 1:24714b45cd1b | 544 | /* Read xc (real), yc(imag) input */ |
xorjoep | 1:24714b45cd1b | 545 | xcyc = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 546 | |
xorjoep | 1:24714b45cd1b | 547 | /* Read xd (real), yd(imag) input */ |
xorjoep | 1:24714b45cd1b | 548 | xdyd = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 549 | |
xorjoep | 1:24714b45cd1b | 550 | /* R = packed((ya + yc), (xa + xc)) */ |
xorjoep | 1:24714b45cd1b | 551 | R = __QADD16(xaya, xcyc); |
xorjoep | 1:24714b45cd1b | 552 | |
xorjoep | 1:24714b45cd1b | 553 | /* T = packed((yb + yd), (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 554 | T = __QADD16(xbyb, xdyd); |
xorjoep | 1:24714b45cd1b | 555 | |
xorjoep | 1:24714b45cd1b | 556 | /* pointer updation for writing */ |
xorjoep | 1:24714b45cd1b | 557 | ptr1 = ptr1 - 8U; |
xorjoep | 1:24714b45cd1b | 558 | |
xorjoep | 1:24714b45cd1b | 559 | |
xorjoep | 1:24714b45cd1b | 560 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 561 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 562 | *__SIMD32(ptr1)++ = __SHADD16(R, T); |
xorjoep | 1:24714b45cd1b | 563 | |
xorjoep | 1:24714b45cd1b | 564 | /* T = packed((yb + yd), (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 565 | T = __QADD16(xbyb, xdyd); |
xorjoep | 1:24714b45cd1b | 566 | |
xorjoep | 1:24714b45cd1b | 567 | /* xc' = (xa-xb+xc-xd) */ |
xorjoep | 1:24714b45cd1b | 568 | /* yc' = (ya-yb+yc-yd) */ |
xorjoep | 1:24714b45cd1b | 569 | *__SIMD32(ptr1)++ = __SHSUB16(R, T); |
xorjoep | 1:24714b45cd1b | 570 | |
xorjoep | 1:24714b45cd1b | 571 | /* S = packed((ya - yc), (xa - xc)) */ |
xorjoep | 1:24714b45cd1b | 572 | S = __QSUB16(xaya, xcyc); |
xorjoep | 1:24714b45cd1b | 573 | |
xorjoep | 1:24714b45cd1b | 574 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 575 | /* T = packed( (yb - yd), (xb - xd)) */ |
xorjoep | 1:24714b45cd1b | 576 | U = __QSUB16(xbyb, xdyd); |
xorjoep | 1:24714b45cd1b | 577 | |
xorjoep | 1:24714b45cd1b | 578 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 579 | |
xorjoep | 1:24714b45cd1b | 580 | /* xb' = (xa+yb-xc-yd) */ |
xorjoep | 1:24714b45cd1b | 581 | /* yb' = (ya-xb-yc+xd) */ |
xorjoep | 1:24714b45cd1b | 582 | *__SIMD32(ptr1)++ = __SHSAX(S, U); |
xorjoep | 1:24714b45cd1b | 583 | |
xorjoep | 1:24714b45cd1b | 584 | |
xorjoep | 1:24714b45cd1b | 585 | /* xd' = (xa-yb-xc+yd) */ |
xorjoep | 1:24714b45cd1b | 586 | /* yd' = (ya+xb-yc-xd) */ |
xorjoep | 1:24714b45cd1b | 587 | *__SIMD32(ptr1)++ = __SHASX(S, U); |
xorjoep | 1:24714b45cd1b | 588 | |
xorjoep | 1:24714b45cd1b | 589 | #else |
xorjoep | 1:24714b45cd1b | 590 | |
xorjoep | 1:24714b45cd1b | 591 | /* xb' = (xa+yb-xc-yd) */ |
xorjoep | 1:24714b45cd1b | 592 | /* yb' = (ya-xb-yc+xd) */ |
xorjoep | 1:24714b45cd1b | 593 | *__SIMD32(ptr1)++ = __SHASX(S, U); |
xorjoep | 1:24714b45cd1b | 594 | |
xorjoep | 1:24714b45cd1b | 595 | |
xorjoep | 1:24714b45cd1b | 596 | /* xd' = (xa-yb-xc+yd) */ |
xorjoep | 1:24714b45cd1b | 597 | /* yd' = (ya+xb-yc-xd) */ |
xorjoep | 1:24714b45cd1b | 598 | *__SIMD32(ptr1)++ = __SHSAX(S, U); |
xorjoep | 1:24714b45cd1b | 599 | |
xorjoep | 1:24714b45cd1b | 600 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 601 | |
xorjoep | 1:24714b45cd1b | 602 | } while (--j); |
xorjoep | 1:24714b45cd1b | 603 | |
xorjoep | 1:24714b45cd1b | 604 | /* end of last stage process */ |
xorjoep | 1:24714b45cd1b | 605 | |
xorjoep | 1:24714b45cd1b | 606 | /* output is in 11.5(q5) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 607 | /* output is in 9.7(q7) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 608 | /* output is in 7.9(q9) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 609 | /* output is in 5.11(q11) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 610 | |
xorjoep | 1:24714b45cd1b | 611 | |
xorjoep | 1:24714b45cd1b | 612 | #else |
xorjoep | 1:24714b45cd1b | 613 | |
xorjoep | 1:24714b45cd1b | 614 | /* Run the below code for Cortex-M0 */ |
xorjoep | 1:24714b45cd1b | 615 | |
xorjoep | 1:24714b45cd1b | 616 | q15_t R0, R1, S0, S1, T0, T1, U0, U1; |
xorjoep | 1:24714b45cd1b | 617 | q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; |
xorjoep | 1:24714b45cd1b | 618 | uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; |
xorjoep | 1:24714b45cd1b | 619 | |
xorjoep | 1:24714b45cd1b | 620 | /* Total process is divided into three stages */ |
xorjoep | 1:24714b45cd1b | 621 | |
xorjoep | 1:24714b45cd1b | 622 | /* process first stage, middle stages, & last stage */ |
xorjoep | 1:24714b45cd1b | 623 | |
xorjoep | 1:24714b45cd1b | 624 | /* Initializations for the first stage */ |
xorjoep | 1:24714b45cd1b | 625 | n2 = fftLen; |
xorjoep | 1:24714b45cd1b | 626 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 627 | |
xorjoep | 1:24714b45cd1b | 628 | /* n2 = fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 629 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 630 | |
xorjoep | 1:24714b45cd1b | 631 | /* Index for twiddle coefficient */ |
xorjoep | 1:24714b45cd1b | 632 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 633 | |
xorjoep | 1:24714b45cd1b | 634 | /* Index for input read and output write */ |
xorjoep | 1:24714b45cd1b | 635 | i0 = 0U; |
xorjoep | 1:24714b45cd1b | 636 | j = n2; |
xorjoep | 1:24714b45cd1b | 637 | |
xorjoep | 1:24714b45cd1b | 638 | /* Input is in 1.15(q15) format */ |
xorjoep | 1:24714b45cd1b | 639 | |
xorjoep | 1:24714b45cd1b | 640 | /* start of first stage process */ |
xorjoep | 1:24714b45cd1b | 641 | do |
xorjoep | 1:24714b45cd1b | 642 | { |
xorjoep | 1:24714b45cd1b | 643 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 644 | |
xorjoep | 1:24714b45cd1b | 645 | /* index calculation for the input as, */ |
xorjoep | 1:24714b45cd1b | 646 | /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ |
xorjoep | 1:24714b45cd1b | 647 | i1 = i0 + n2; |
xorjoep | 1:24714b45cd1b | 648 | i2 = i1 + n2; |
xorjoep | 1:24714b45cd1b | 649 | i3 = i2 + n2; |
xorjoep | 1:24714b45cd1b | 650 | |
xorjoep | 1:24714b45cd1b | 651 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 652 | |
xorjoep | 1:24714b45cd1b | 653 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 654 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 655 | T0 = pSrc16[i0 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 656 | T1 = pSrc16[(i0 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 657 | |
xorjoep | 1:24714b45cd1b | 658 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 659 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 660 | S0 = pSrc16[i2 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 661 | S1 = pSrc16[(i2 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 662 | |
xorjoep | 1:24714b45cd1b | 663 | /* R0 = (ya + yc) */ |
xorjoep | 1:24714b45cd1b | 664 | R0 = __SSAT(T0 + S0, 16U); |
xorjoep | 1:24714b45cd1b | 665 | /* R1 = (xa + xc) */ |
xorjoep | 1:24714b45cd1b | 666 | R1 = __SSAT(T1 + S1, 16U); |
xorjoep | 1:24714b45cd1b | 667 | |
xorjoep | 1:24714b45cd1b | 668 | /* S0 = (ya - yc) */ |
xorjoep | 1:24714b45cd1b | 669 | S0 = __SSAT(T0 - S0, 16); |
xorjoep | 1:24714b45cd1b | 670 | /* S1 = (xa - xc) */ |
xorjoep | 1:24714b45cd1b | 671 | S1 = __SSAT(T1 - S1, 16); |
xorjoep | 1:24714b45cd1b | 672 | |
xorjoep | 1:24714b45cd1b | 673 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 674 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 675 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 676 | T0 = pSrc16[i1 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 677 | T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 678 | |
xorjoep | 1:24714b45cd1b | 679 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 680 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 681 | U0 = pSrc16[i3 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 682 | U1 = pSrc16[(i3 * 2U) + 1] >> 2U; |
xorjoep | 1:24714b45cd1b | 683 | |
xorjoep | 1:24714b45cd1b | 684 | /* T0 = (yb + yd) */ |
xorjoep | 1:24714b45cd1b | 685 | T0 = __SSAT(T0 + U0, 16U); |
xorjoep | 1:24714b45cd1b | 686 | /* T1 = (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 687 | T1 = __SSAT(T1 + U1, 16U); |
xorjoep | 1:24714b45cd1b | 688 | |
xorjoep | 1:24714b45cd1b | 689 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 690 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 691 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 692 | pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 693 | pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 694 | |
xorjoep | 1:24714b45cd1b | 695 | /* R0 = (ya + yc) - (yb + yd) */ |
xorjoep | 1:24714b45cd1b | 696 | /* R1 = (xa + xc) - (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 697 | R0 = __SSAT(R0 - T0, 16U); |
xorjoep | 1:24714b45cd1b | 698 | R1 = __SSAT(R1 - T1, 16U); |
xorjoep | 1:24714b45cd1b | 699 | |
xorjoep | 1:24714b45cd1b | 700 | /* co2 & si2 are read from Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 701 | Co2 = pCoef16[2U * ic * 2U]; |
xorjoep | 1:24714b45cd1b | 702 | Si2 = pCoef16[(2U * ic * 2U) + 1]; |
xorjoep | 1:24714b45cd1b | 703 | |
xorjoep | 1:24714b45cd1b | 704 | /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 705 | out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 706 | /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 707 | out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 708 | |
xorjoep | 1:24714b45cd1b | 709 | /* Reading i0+fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 710 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 711 | /* T0 = yb, T1 = xb */ |
xorjoep | 1:24714b45cd1b | 712 | T0 = pSrc16[i1 * 2U] >> 2; |
xorjoep | 1:24714b45cd1b | 713 | T1 = pSrc16[(i1 * 2U) + 1] >> 2; |
xorjoep | 1:24714b45cd1b | 714 | |
xorjoep | 1:24714b45cd1b | 715 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 716 | /* writing output(xc', yc') in little endian format */ |
xorjoep | 1:24714b45cd1b | 717 | pSrc16[i1 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 718 | pSrc16[(i1 * 2U) + 1] = out2; |
xorjoep | 1:24714b45cd1b | 719 | |
xorjoep | 1:24714b45cd1b | 720 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 721 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 722 | /* U0 = yd, U1 = xd */ |
xorjoep | 1:24714b45cd1b | 723 | U0 = pSrc16[i3 * 2U] >> 2; |
xorjoep | 1:24714b45cd1b | 724 | U1 = pSrc16[(i3 * 2U) + 1] >> 2; |
xorjoep | 1:24714b45cd1b | 725 | /* T0 = yb-yd */ |
xorjoep | 1:24714b45cd1b | 726 | T0 = __SSAT(T0 - U0, 16); |
xorjoep | 1:24714b45cd1b | 727 | /* T1 = xb-xd */ |
xorjoep | 1:24714b45cd1b | 728 | T1 = __SSAT(T1 - U1, 16); |
xorjoep | 1:24714b45cd1b | 729 | |
xorjoep | 1:24714b45cd1b | 730 | /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 731 | R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); |
xorjoep | 1:24714b45cd1b | 732 | R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); |
xorjoep | 1:24714b45cd1b | 733 | |
xorjoep | 1:24714b45cd1b | 734 | /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 735 | S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U); |
xorjoep | 1:24714b45cd1b | 736 | S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U); |
xorjoep | 1:24714b45cd1b | 737 | |
xorjoep | 1:24714b45cd1b | 738 | /* co1 & si1 are read from Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 739 | Co1 = pCoef16[ic * 2U]; |
xorjoep | 1:24714b45cd1b | 740 | Si1 = pCoef16[(ic * 2U) + 1]; |
xorjoep | 1:24714b45cd1b | 741 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 742 | /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 743 | out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16); |
xorjoep | 1:24714b45cd1b | 744 | /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 745 | out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16); |
xorjoep | 1:24714b45cd1b | 746 | |
xorjoep | 1:24714b45cd1b | 747 | /* writing output(xb', yb') in little endian format */ |
xorjoep | 1:24714b45cd1b | 748 | pSrc16[i2 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 749 | pSrc16[(i2 * 2U) + 1] = out2; |
xorjoep | 1:24714b45cd1b | 750 | |
xorjoep | 1:24714b45cd1b | 751 | /* Co3 & si3 are read from Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 752 | Co3 = pCoef16[3U * (ic * 2U)]; |
xorjoep | 1:24714b45cd1b | 753 | Si3 = pCoef16[(3U * (ic * 2U)) + 1]; |
xorjoep | 1:24714b45cd1b | 754 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 755 | /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 756 | out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U); |
xorjoep | 1:24714b45cd1b | 757 | /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 758 | out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 759 | /* writing output(xd', yd') in little endian format */ |
xorjoep | 1:24714b45cd1b | 760 | pSrc16[i3 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 761 | pSrc16[(i3 * 2U) + 1] = out2; |
xorjoep | 1:24714b45cd1b | 762 | |
xorjoep | 1:24714b45cd1b | 763 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 764 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 765 | |
xorjoep | 1:24714b45cd1b | 766 | /* Updating input index */ |
xorjoep | 1:24714b45cd1b | 767 | i0 = i0 + 1U; |
xorjoep | 1:24714b45cd1b | 768 | |
xorjoep | 1:24714b45cd1b | 769 | } while (--j); |
xorjoep | 1:24714b45cd1b | 770 | /* data is in 4.11(q11) format */ |
xorjoep | 1:24714b45cd1b | 771 | |
xorjoep | 1:24714b45cd1b | 772 | /* end of first stage process */ |
xorjoep | 1:24714b45cd1b | 773 | |
xorjoep | 1:24714b45cd1b | 774 | |
xorjoep | 1:24714b45cd1b | 775 | /* start of middle stage process */ |
xorjoep | 1:24714b45cd1b | 776 | |
xorjoep | 1:24714b45cd1b | 777 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 778 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 779 | |
xorjoep | 1:24714b45cd1b | 780 | /* Calculation of Middle stage */ |
xorjoep | 1:24714b45cd1b | 781 | for (k = fftLen / 4U; k > 4U; k >>= 2U) |
xorjoep | 1:24714b45cd1b | 782 | { |
xorjoep | 1:24714b45cd1b | 783 | /* Initializations for the middle stage */ |
xorjoep | 1:24714b45cd1b | 784 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 785 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 786 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 787 | |
xorjoep | 1:24714b45cd1b | 788 | for (j = 0U; j <= (n2 - 1U); j++) |
xorjoep | 1:24714b45cd1b | 789 | { |
xorjoep | 1:24714b45cd1b | 790 | /* index calculation for the coefficients */ |
xorjoep | 1:24714b45cd1b | 791 | Co1 = pCoef16[ic * 2U]; |
xorjoep | 1:24714b45cd1b | 792 | Si1 = pCoef16[(ic * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 793 | Co2 = pCoef16[2U * (ic * 2U)]; |
xorjoep | 1:24714b45cd1b | 794 | Si2 = pCoef16[(2U * (ic * 2U)) + 1U]; |
xorjoep | 1:24714b45cd1b | 795 | Co3 = pCoef16[3U * (ic * 2U)]; |
xorjoep | 1:24714b45cd1b | 796 | Si3 = pCoef16[(3U * (ic * 2U)) + 1U]; |
xorjoep | 1:24714b45cd1b | 797 | |
xorjoep | 1:24714b45cd1b | 798 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 799 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 800 | |
xorjoep | 1:24714b45cd1b | 801 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 802 | for (i0 = j; i0 < fftLen; i0 += n1) |
xorjoep | 1:24714b45cd1b | 803 | { |
xorjoep | 1:24714b45cd1b | 804 | /* index calculation for the input as, */ |
xorjoep | 1:24714b45cd1b | 805 | /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ |
xorjoep | 1:24714b45cd1b | 806 | i1 = i0 + n2; |
xorjoep | 1:24714b45cd1b | 807 | i2 = i1 + n2; |
xorjoep | 1:24714b45cd1b | 808 | i3 = i2 + n2; |
xorjoep | 1:24714b45cd1b | 809 | |
xorjoep | 1:24714b45cd1b | 810 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 811 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 812 | T0 = pSrc16[i0 * 2U]; |
xorjoep | 1:24714b45cd1b | 813 | T1 = pSrc16[(i0 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 814 | |
xorjoep | 1:24714b45cd1b | 815 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 816 | S0 = pSrc16[i2 * 2U]; |
xorjoep | 1:24714b45cd1b | 817 | S1 = pSrc16[(i2 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 818 | |
xorjoep | 1:24714b45cd1b | 819 | /* R0 = (ya + yc), R1 = (xa + xc) */ |
xorjoep | 1:24714b45cd1b | 820 | R0 = __SSAT(T0 + S0, 16); |
xorjoep | 1:24714b45cd1b | 821 | R1 = __SSAT(T1 + S1, 16); |
xorjoep | 1:24714b45cd1b | 822 | |
xorjoep | 1:24714b45cd1b | 823 | /* S0 = (ya - yc), S1 =(xa - xc) */ |
xorjoep | 1:24714b45cd1b | 824 | S0 = __SSAT(T0 - S0, 16); |
xorjoep | 1:24714b45cd1b | 825 | S1 = __SSAT(T1 - S1, 16); |
xorjoep | 1:24714b45cd1b | 826 | |
xorjoep | 1:24714b45cd1b | 827 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 828 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 829 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 830 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 831 | |
xorjoep | 1:24714b45cd1b | 832 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 833 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 834 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 835 | |
xorjoep | 1:24714b45cd1b | 836 | |
xorjoep | 1:24714b45cd1b | 837 | /* T0 = (yb + yd), T1 = (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 838 | T0 = __SSAT(T0 + U0, 16); |
xorjoep | 1:24714b45cd1b | 839 | T1 = __SSAT(T1 + U1, 16); |
xorjoep | 1:24714b45cd1b | 840 | |
xorjoep | 1:24714b45cd1b | 841 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 842 | |
xorjoep | 1:24714b45cd1b | 843 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 844 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 845 | out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U; |
xorjoep | 1:24714b45cd1b | 846 | out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U; |
xorjoep | 1:24714b45cd1b | 847 | |
xorjoep | 1:24714b45cd1b | 848 | pSrc16[i0 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 849 | pSrc16[(2U * i0) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 850 | |
xorjoep | 1:24714b45cd1b | 851 | /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 852 | R0 = (R0 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 853 | R1 = (R1 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 854 | |
xorjoep | 1:24714b45cd1b | 855 | /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ |
xorjoep | 1:24714b45cd1b | 856 | out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 857 | |
xorjoep | 1:24714b45cd1b | 858 | /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 859 | out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 860 | |
xorjoep | 1:24714b45cd1b | 861 | /* Reading i0+3fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 862 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 863 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 864 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 865 | |
xorjoep | 1:24714b45cd1b | 866 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 867 | /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 868 | /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 869 | pSrc16[i1 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 870 | pSrc16[(i1 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 871 | |
xorjoep | 1:24714b45cd1b | 872 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 873 | |
xorjoep | 1:24714b45cd1b | 874 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 875 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 876 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 877 | |
xorjoep | 1:24714b45cd1b | 878 | /* T0 = yb-yd, T1 = xb-xd */ |
xorjoep | 1:24714b45cd1b | 879 | T0 = __SSAT(T0 - U0, 16); |
xorjoep | 1:24714b45cd1b | 880 | T1 = __SSAT(T1 - U1, 16); |
xorjoep | 1:24714b45cd1b | 881 | |
xorjoep | 1:24714b45cd1b | 882 | /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 883 | R0 = (S0 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 884 | R1 = (S1 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 885 | |
xorjoep | 1:24714b45cd1b | 886 | /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 887 | S0 = (S0 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 888 | S1 = (S1 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 889 | |
xorjoep | 1:24714b45cd1b | 890 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 891 | out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U); |
xorjoep | 1:24714b45cd1b | 892 | |
xorjoep | 1:24714b45cd1b | 893 | out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U); |
xorjoep | 1:24714b45cd1b | 894 | |
xorjoep | 1:24714b45cd1b | 895 | /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 896 | /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 897 | pSrc16[i2 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 898 | pSrc16[(i2 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 899 | |
xorjoep | 1:24714b45cd1b | 900 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 901 | out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U); |
xorjoep | 1:24714b45cd1b | 902 | |
xorjoep | 1:24714b45cd1b | 903 | out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 904 | /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 905 | /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 906 | pSrc16[i3 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 907 | pSrc16[(i3 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 908 | } |
xorjoep | 1:24714b45cd1b | 909 | } |
xorjoep | 1:24714b45cd1b | 910 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 911 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 912 | } |
xorjoep | 1:24714b45cd1b | 913 | /* end of middle stage process */ |
xorjoep | 1:24714b45cd1b | 914 | |
xorjoep | 1:24714b45cd1b | 915 | |
xorjoep | 1:24714b45cd1b | 916 | /* data is in 10.6(q6) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 917 | /* data is in 8.8(q8) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 918 | /* data is in 6.10(q10) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 919 | /* data is in 4.12(q12) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 920 | |
xorjoep | 1:24714b45cd1b | 921 | /* Initializations for the last stage */ |
xorjoep | 1:24714b45cd1b | 922 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 923 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 924 | |
xorjoep | 1:24714b45cd1b | 925 | /* start of last stage process */ |
xorjoep | 1:24714b45cd1b | 926 | |
xorjoep | 1:24714b45cd1b | 927 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 928 | for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1) |
xorjoep | 1:24714b45cd1b | 929 | { |
xorjoep | 1:24714b45cd1b | 930 | /* index calculation for the input as, */ |
xorjoep | 1:24714b45cd1b | 931 | /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ |
xorjoep | 1:24714b45cd1b | 932 | i1 = i0 + n2; |
xorjoep | 1:24714b45cd1b | 933 | i2 = i1 + n2; |
xorjoep | 1:24714b45cd1b | 934 | i3 = i2 + n2; |
xorjoep | 1:24714b45cd1b | 935 | |
xorjoep | 1:24714b45cd1b | 936 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 937 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 938 | T0 = pSrc16[i0 * 2U]; |
xorjoep | 1:24714b45cd1b | 939 | T1 = pSrc16[(i0 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 940 | |
xorjoep | 1:24714b45cd1b | 941 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 942 | S0 = pSrc16[i2 * 2U]; |
xorjoep | 1:24714b45cd1b | 943 | S1 = pSrc16[(i2 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 944 | |
xorjoep | 1:24714b45cd1b | 945 | /* R0 = (ya + yc), R1 = (xa + xc) */ |
xorjoep | 1:24714b45cd1b | 946 | R0 = __SSAT(T0 + S0, 16U); |
xorjoep | 1:24714b45cd1b | 947 | R1 = __SSAT(T1 + S1, 16U); |
xorjoep | 1:24714b45cd1b | 948 | |
xorjoep | 1:24714b45cd1b | 949 | /* S0 = (ya - yc), S1 = (xa - xc) */ |
xorjoep | 1:24714b45cd1b | 950 | S0 = __SSAT(T0 - S0, 16U); |
xorjoep | 1:24714b45cd1b | 951 | S1 = __SSAT(T1 - S1, 16U); |
xorjoep | 1:24714b45cd1b | 952 | |
xorjoep | 1:24714b45cd1b | 953 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 954 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 955 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 956 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 957 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 958 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 959 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 960 | |
xorjoep | 1:24714b45cd1b | 961 | /* T0 = (yb + yd), T1 = (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 962 | T0 = __SSAT(T0 + U0, 16U); |
xorjoep | 1:24714b45cd1b | 963 | T1 = __SSAT(T1 + U1, 16U); |
xorjoep | 1:24714b45cd1b | 964 | |
xorjoep | 1:24714b45cd1b | 965 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 966 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 967 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 968 | pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 969 | pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 970 | |
xorjoep | 1:24714b45cd1b | 971 | /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 972 | R0 = (R0 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 973 | R1 = (R1 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 974 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 975 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 976 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 977 | |
xorjoep | 1:24714b45cd1b | 978 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 979 | /* xc' = (xa-xb+xc-xd) */ |
xorjoep | 1:24714b45cd1b | 980 | /* yc' = (ya-yb+yc-yd) */ |
xorjoep | 1:24714b45cd1b | 981 | pSrc16[i1 * 2U] = R0; |
xorjoep | 1:24714b45cd1b | 982 | pSrc16[(i1 * 2U) + 1U] = R1; |
xorjoep | 1:24714b45cd1b | 983 | |
xorjoep | 1:24714b45cd1b | 984 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 985 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 986 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 987 | /* T0 = (yb - yd), T1 = (xb - xd) */ |
xorjoep | 1:24714b45cd1b | 988 | T0 = __SSAT(T0 - U0, 16U); |
xorjoep | 1:24714b45cd1b | 989 | T1 = __SSAT(T1 - U1, 16U); |
xorjoep | 1:24714b45cd1b | 990 | |
xorjoep | 1:24714b45cd1b | 991 | /* writing the butterfly processed i0 + fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 992 | /* xb' = (xa+yb-xc-yd) */ |
xorjoep | 1:24714b45cd1b | 993 | /* yb' = (ya-xb-yc+xd) */ |
xorjoep | 1:24714b45cd1b | 994 | pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 995 | pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 996 | |
xorjoep | 1:24714b45cd1b | 997 | /* writing the butterfly processed i0 + 3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 998 | /* xd' = (xa-yb-xc+yd) */ |
xorjoep | 1:24714b45cd1b | 999 | /* yd' = (ya+xb-yc-xd) */ |
xorjoep | 1:24714b45cd1b | 1000 | pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1001 | pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1002 | |
xorjoep | 1:24714b45cd1b | 1003 | } |
xorjoep | 1:24714b45cd1b | 1004 | |
xorjoep | 1:24714b45cd1b | 1005 | /* end of last stage process */ |
xorjoep | 1:24714b45cd1b | 1006 | |
xorjoep | 1:24714b45cd1b | 1007 | /* output is in 11.5(q5) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 1008 | /* output is in 9.7(q7) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 1009 | /* output is in 7.9(q9) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 1010 | /* output is in 5.11(q11) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 1011 | |
xorjoep | 1:24714b45cd1b | 1012 | #endif /* #if defined (ARM_MATH_DSP) */ |
xorjoep | 1:24714b45cd1b | 1013 | |
xorjoep | 1:24714b45cd1b | 1014 | } |
xorjoep | 1:24714b45cd1b | 1015 | |
xorjoep | 1:24714b45cd1b | 1016 | |
xorjoep | 1:24714b45cd1b | 1017 | /** |
xorjoep | 1:24714b45cd1b | 1018 | * @brief Core function for the Q15 CIFFT butterfly process. |
xorjoep | 1:24714b45cd1b | 1019 | * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type. |
xorjoep | 1:24714b45cd1b | 1020 | * @param[in] fftLen length of the FFT. |
xorjoep | 1:24714b45cd1b | 1021 | * @param[in] *pCoef16 points to twiddle coefficient buffer. |
xorjoep | 1:24714b45cd1b | 1022 | * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. |
xorjoep | 1:24714b45cd1b | 1023 | * @return none. |
xorjoep | 1:24714b45cd1b | 1024 | */ |
xorjoep | 1:24714b45cd1b | 1025 | |
xorjoep | 1:24714b45cd1b | 1026 | /* |
xorjoep | 1:24714b45cd1b | 1027 | * Radix-4 IFFT algorithm used is : |
xorjoep | 1:24714b45cd1b | 1028 | * |
xorjoep | 1:24714b45cd1b | 1029 | * CIFFT uses same twiddle coefficients as CFFT function |
xorjoep | 1:24714b45cd1b | 1030 | * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] |
xorjoep | 1:24714b45cd1b | 1031 | * |
xorjoep | 1:24714b45cd1b | 1032 | * |
xorjoep | 1:24714b45cd1b | 1033 | * IFFT is implemented with following changes in equations from FFT |
xorjoep | 1:24714b45cd1b | 1034 | * |
xorjoep | 1:24714b45cd1b | 1035 | * Input real and imaginary data: |
xorjoep | 1:24714b45cd1b | 1036 | * x(n) = xa + j * ya |
xorjoep | 1:24714b45cd1b | 1037 | * x(n+N/4 ) = xb + j * yb |
xorjoep | 1:24714b45cd1b | 1038 | * x(n+N/2 ) = xc + j * yc |
xorjoep | 1:24714b45cd1b | 1039 | * x(n+3N 4) = xd + j * yd |
xorjoep | 1:24714b45cd1b | 1040 | * |
xorjoep | 1:24714b45cd1b | 1041 | * |
xorjoep | 1:24714b45cd1b | 1042 | * Output real and imaginary data: |
xorjoep | 1:24714b45cd1b | 1043 | * x(4r) = xa'+ j * ya' |
xorjoep | 1:24714b45cd1b | 1044 | * x(4r+1) = xb'+ j * yb' |
xorjoep | 1:24714b45cd1b | 1045 | * x(4r+2) = xc'+ j * yc' |
xorjoep | 1:24714b45cd1b | 1046 | * x(4r+3) = xd'+ j * yd' |
xorjoep | 1:24714b45cd1b | 1047 | * |
xorjoep | 1:24714b45cd1b | 1048 | * |
xorjoep | 1:24714b45cd1b | 1049 | * Twiddle factors for radix-4 IFFT: |
xorjoep | 1:24714b45cd1b | 1050 | * Wn = co1 + j * (si1) |
xorjoep | 1:24714b45cd1b | 1051 | * W2n = co2 + j * (si2) |
xorjoep | 1:24714b45cd1b | 1052 | * W3n = co3 + j * (si3) |
xorjoep | 1:24714b45cd1b | 1053 | |
xorjoep | 1:24714b45cd1b | 1054 | * The real and imaginary output values for the radix-4 butterfly are |
xorjoep | 1:24714b45cd1b | 1055 | * xa' = xa + xb + xc + xd |
xorjoep | 1:24714b45cd1b | 1056 | * ya' = ya + yb + yc + yd |
xorjoep | 1:24714b45cd1b | 1057 | * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) |
xorjoep | 1:24714b45cd1b | 1058 | * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) |
xorjoep | 1:24714b45cd1b | 1059 | * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) |
xorjoep | 1:24714b45cd1b | 1060 | * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) |
xorjoep | 1:24714b45cd1b | 1061 | * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) |
xorjoep | 1:24714b45cd1b | 1062 | * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) |
xorjoep | 1:24714b45cd1b | 1063 | * |
xorjoep | 1:24714b45cd1b | 1064 | */ |
xorjoep | 1:24714b45cd1b | 1065 | |
xorjoep | 1:24714b45cd1b | 1066 | void arm_radix4_butterfly_inverse_q15( |
xorjoep | 1:24714b45cd1b | 1067 | q15_t * pSrc16, |
xorjoep | 1:24714b45cd1b | 1068 | uint32_t fftLen, |
xorjoep | 1:24714b45cd1b | 1069 | q15_t * pCoef16, |
xorjoep | 1:24714b45cd1b | 1070 | uint32_t twidCoefModifier) |
xorjoep | 1:24714b45cd1b | 1071 | { |
xorjoep | 1:24714b45cd1b | 1072 | |
xorjoep | 1:24714b45cd1b | 1073 | #if defined (ARM_MATH_DSP) |
xorjoep | 1:24714b45cd1b | 1074 | |
xorjoep | 1:24714b45cd1b | 1075 | /* Run the below code for Cortex-M4 and Cortex-M3 */ |
xorjoep | 1:24714b45cd1b | 1076 | |
xorjoep | 1:24714b45cd1b | 1077 | q31_t R, S, T, U; |
xorjoep | 1:24714b45cd1b | 1078 | q31_t C1, C2, C3, out1, out2; |
xorjoep | 1:24714b45cd1b | 1079 | uint32_t n1, n2, ic, i0, j, k; |
xorjoep | 1:24714b45cd1b | 1080 | |
xorjoep | 1:24714b45cd1b | 1081 | q15_t *ptr1; |
xorjoep | 1:24714b45cd1b | 1082 | q15_t *pSi0; |
xorjoep | 1:24714b45cd1b | 1083 | q15_t *pSi1; |
xorjoep | 1:24714b45cd1b | 1084 | q15_t *pSi2; |
xorjoep | 1:24714b45cd1b | 1085 | q15_t *pSi3; |
xorjoep | 1:24714b45cd1b | 1086 | |
xorjoep | 1:24714b45cd1b | 1087 | q31_t xaya, xbyb, xcyc, xdyd; |
xorjoep | 1:24714b45cd1b | 1088 | |
xorjoep | 1:24714b45cd1b | 1089 | /* Total process is divided into three stages */ |
xorjoep | 1:24714b45cd1b | 1090 | |
xorjoep | 1:24714b45cd1b | 1091 | /* process first stage, middle stages, & last stage */ |
xorjoep | 1:24714b45cd1b | 1092 | |
xorjoep | 1:24714b45cd1b | 1093 | /* Initializations for the first stage */ |
xorjoep | 1:24714b45cd1b | 1094 | n2 = fftLen; |
xorjoep | 1:24714b45cd1b | 1095 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 1096 | |
xorjoep | 1:24714b45cd1b | 1097 | /* n2 = fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 1098 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 1099 | |
xorjoep | 1:24714b45cd1b | 1100 | /* Index for twiddle coefficient */ |
xorjoep | 1:24714b45cd1b | 1101 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 1102 | |
xorjoep | 1:24714b45cd1b | 1103 | /* Index for input read and output write */ |
xorjoep | 1:24714b45cd1b | 1104 | j = n2; |
xorjoep | 1:24714b45cd1b | 1105 | |
xorjoep | 1:24714b45cd1b | 1106 | pSi0 = pSrc16; |
xorjoep | 1:24714b45cd1b | 1107 | pSi1 = pSi0 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 1108 | pSi2 = pSi1 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 1109 | pSi3 = pSi2 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 1110 | |
xorjoep | 1:24714b45cd1b | 1111 | /* Input is in 1.15(q15) format */ |
xorjoep | 1:24714b45cd1b | 1112 | |
xorjoep | 1:24714b45cd1b | 1113 | /* start of first stage process */ |
xorjoep | 1:24714b45cd1b | 1114 | do |
xorjoep | 1:24714b45cd1b | 1115 | { |
xorjoep | 1:24714b45cd1b | 1116 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 1117 | |
xorjoep | 1:24714b45cd1b | 1118 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 1119 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 1120 | T = _SIMD32_OFFSET(pSi0); |
xorjoep | 1:24714b45cd1b | 1121 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 1122 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 1123 | |
xorjoep | 1:24714b45cd1b | 1124 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 1125 | S = _SIMD32_OFFSET(pSi2); |
xorjoep | 1:24714b45cd1b | 1126 | S = __SHADD16(S, 0); |
xorjoep | 1:24714b45cd1b | 1127 | S = __SHADD16(S, 0); |
xorjoep | 1:24714b45cd1b | 1128 | |
xorjoep | 1:24714b45cd1b | 1129 | /* R = packed((ya + yc), (xa + xc) ) */ |
xorjoep | 1:24714b45cd1b | 1130 | R = __QADD16(T, S); |
xorjoep | 1:24714b45cd1b | 1131 | |
xorjoep | 1:24714b45cd1b | 1132 | /* S = packed((ya - yc), (xa - xc) ) */ |
xorjoep | 1:24714b45cd1b | 1133 | S = __QSUB16(T, S); |
xorjoep | 1:24714b45cd1b | 1134 | |
xorjoep | 1:24714b45cd1b | 1135 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 1136 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1137 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 1138 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 1139 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 1140 | |
xorjoep | 1:24714b45cd1b | 1141 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1142 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 1143 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 1144 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 1145 | |
xorjoep | 1:24714b45cd1b | 1146 | /* T = packed((yb + yd), (xb + xd) ) */ |
xorjoep | 1:24714b45cd1b | 1147 | T = __QADD16(T, U); |
xorjoep | 1:24714b45cd1b | 1148 | |
xorjoep | 1:24714b45cd1b | 1149 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 1150 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 1151 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 1152 | _SIMD32_OFFSET(pSi0) = __SHADD16(R, T); |
xorjoep | 1:24714b45cd1b | 1153 | pSi0 += 2; |
xorjoep | 1:24714b45cd1b | 1154 | |
xorjoep | 1:24714b45cd1b | 1155 | /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 1156 | R = __QSUB16(R, T); |
xorjoep | 1:24714b45cd1b | 1157 | |
xorjoep | 1:24714b45cd1b | 1158 | /* co2 & si2 are read from SIMD Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 1159 | C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); |
xorjoep | 1:24714b45cd1b | 1160 | |
xorjoep | 1:24714b45cd1b | 1161 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1162 | |
xorjoep | 1:24714b45cd1b | 1163 | /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1164 | out1 = __SMUSD(C2, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 1165 | /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1166 | out2 = __SMUADX(C2, R); |
xorjoep | 1:24714b45cd1b | 1167 | |
xorjoep | 1:24714b45cd1b | 1168 | #else |
xorjoep | 1:24714b45cd1b | 1169 | |
xorjoep | 1:24714b45cd1b | 1170 | /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1171 | out1 = __SMUADX(C2, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 1172 | /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1173 | out2 = __SMUSD(__QSUB16(0, C2), R); |
xorjoep | 1:24714b45cd1b | 1174 | |
xorjoep | 1:24714b45cd1b | 1175 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1176 | |
xorjoep | 1:24714b45cd1b | 1177 | /* Reading i0+fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 1178 | /* T = packed(yb, xb) */ |
xorjoep | 1:24714b45cd1b | 1179 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 1180 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 1181 | T = __SHADD16(T, 0); |
xorjoep | 1:24714b45cd1b | 1182 | |
xorjoep | 1:24714b45cd1b | 1183 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1184 | /* writing output(xc', yc') in little endian format */ |
xorjoep | 1:24714b45cd1b | 1185 | _SIMD32_OFFSET(pSi1) = |
xorjoep | 1:24714b45cd1b | 1186 | (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 1187 | pSi1 += 2; |
xorjoep | 1:24714b45cd1b | 1188 | |
xorjoep | 1:24714b45cd1b | 1189 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 1190 | /* U = packed(yd, xd) */ |
xorjoep | 1:24714b45cd1b | 1191 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 1192 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 1193 | U = __SHADD16(U, 0); |
xorjoep | 1:24714b45cd1b | 1194 | |
xorjoep | 1:24714b45cd1b | 1195 | /* T = packed(yb-yd, xb-xd) */ |
xorjoep | 1:24714b45cd1b | 1196 | T = __QSUB16(T, U); |
xorjoep | 1:24714b45cd1b | 1197 | |
xorjoep | 1:24714b45cd1b | 1198 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1199 | |
xorjoep | 1:24714b45cd1b | 1200 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1201 | R = __QSAX(S, T); |
xorjoep | 1:24714b45cd1b | 1202 | /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1203 | S = __QASX(S, T); |
xorjoep | 1:24714b45cd1b | 1204 | |
xorjoep | 1:24714b45cd1b | 1205 | #else |
xorjoep | 1:24714b45cd1b | 1206 | |
xorjoep | 1:24714b45cd1b | 1207 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1208 | R = __QASX(S, T); |
xorjoep | 1:24714b45cd1b | 1209 | /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1210 | S = __QSAX(S, T); |
xorjoep | 1:24714b45cd1b | 1211 | |
xorjoep | 1:24714b45cd1b | 1212 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1213 | |
xorjoep | 1:24714b45cd1b | 1214 | /* co1 & si1 are read from SIMD Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 1215 | C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); |
xorjoep | 1:24714b45cd1b | 1216 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 1217 | |
xorjoep | 1:24714b45cd1b | 1218 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1219 | |
xorjoep | 1:24714b45cd1b | 1220 | /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1221 | out1 = __SMUSD(C1, S) >> 16U; |
xorjoep | 1:24714b45cd1b | 1222 | /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1223 | out2 = __SMUADX(C1, S); |
xorjoep | 1:24714b45cd1b | 1224 | |
xorjoep | 1:24714b45cd1b | 1225 | #else |
xorjoep | 1:24714b45cd1b | 1226 | |
xorjoep | 1:24714b45cd1b | 1227 | /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1228 | out1 = __SMUADX(C1, S) >> 16U; |
xorjoep | 1:24714b45cd1b | 1229 | /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1230 | out2 = __SMUSD(__QSUB16(0, C1), S); |
xorjoep | 1:24714b45cd1b | 1231 | |
xorjoep | 1:24714b45cd1b | 1232 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1233 | |
xorjoep | 1:24714b45cd1b | 1234 | /* writing output(xb', yb') in little endian format */ |
xorjoep | 1:24714b45cd1b | 1235 | _SIMD32_OFFSET(pSi2) = |
xorjoep | 1:24714b45cd1b | 1236 | ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 1237 | pSi2 += 2; |
xorjoep | 1:24714b45cd1b | 1238 | |
xorjoep | 1:24714b45cd1b | 1239 | |
xorjoep | 1:24714b45cd1b | 1240 | /* co3 & si3 are read from SIMD Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 1241 | C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); |
xorjoep | 1:24714b45cd1b | 1242 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1243 | |
xorjoep | 1:24714b45cd1b | 1244 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1245 | |
xorjoep | 1:24714b45cd1b | 1246 | /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1247 | out1 = __SMUSD(C3, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 1248 | /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1249 | out2 = __SMUADX(C3, R); |
xorjoep | 1:24714b45cd1b | 1250 | |
xorjoep | 1:24714b45cd1b | 1251 | #else |
xorjoep | 1:24714b45cd1b | 1252 | |
xorjoep | 1:24714b45cd1b | 1253 | /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1254 | out1 = __SMUADX(C3, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 1255 | /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1256 | out2 = __SMUSD(__QSUB16(0, C3), R); |
xorjoep | 1:24714b45cd1b | 1257 | |
xorjoep | 1:24714b45cd1b | 1258 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1259 | |
xorjoep | 1:24714b45cd1b | 1260 | /* writing output(xd', yd') in little endian format */ |
xorjoep | 1:24714b45cd1b | 1261 | _SIMD32_OFFSET(pSi3) = |
xorjoep | 1:24714b45cd1b | 1262 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 1263 | pSi3 += 2; |
xorjoep | 1:24714b45cd1b | 1264 | |
xorjoep | 1:24714b45cd1b | 1265 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1266 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 1267 | |
xorjoep | 1:24714b45cd1b | 1268 | } while (--j); |
xorjoep | 1:24714b45cd1b | 1269 | /* data is in 4.11(q11) format */ |
xorjoep | 1:24714b45cd1b | 1270 | |
xorjoep | 1:24714b45cd1b | 1271 | /* end of first stage process */ |
xorjoep | 1:24714b45cd1b | 1272 | |
xorjoep | 1:24714b45cd1b | 1273 | |
xorjoep | 1:24714b45cd1b | 1274 | /* start of middle stage process */ |
xorjoep | 1:24714b45cd1b | 1275 | |
xorjoep | 1:24714b45cd1b | 1276 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1277 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 1278 | |
xorjoep | 1:24714b45cd1b | 1279 | /* Calculation of Middle stage */ |
xorjoep | 1:24714b45cd1b | 1280 | for (k = fftLen / 4U; k > 4U; k >>= 2U) |
xorjoep | 1:24714b45cd1b | 1281 | { |
xorjoep | 1:24714b45cd1b | 1282 | /* Initializations for the middle stage */ |
xorjoep | 1:24714b45cd1b | 1283 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 1284 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 1285 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 1286 | |
xorjoep | 1:24714b45cd1b | 1287 | for (j = 0U; j <= (n2 - 1U); j++) |
xorjoep | 1:24714b45cd1b | 1288 | { |
xorjoep | 1:24714b45cd1b | 1289 | /* index calculation for the coefficients */ |
xorjoep | 1:24714b45cd1b | 1290 | C1 = _SIMD32_OFFSET(pCoef16 + (2U * ic)); |
xorjoep | 1:24714b45cd1b | 1291 | C2 = _SIMD32_OFFSET(pCoef16 + (4U * ic)); |
xorjoep | 1:24714b45cd1b | 1292 | C3 = _SIMD32_OFFSET(pCoef16 + (6U * ic)); |
xorjoep | 1:24714b45cd1b | 1293 | |
xorjoep | 1:24714b45cd1b | 1294 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1295 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 1296 | |
xorjoep | 1:24714b45cd1b | 1297 | pSi0 = pSrc16 + 2 * j; |
xorjoep | 1:24714b45cd1b | 1298 | pSi1 = pSi0 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 1299 | pSi2 = pSi1 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 1300 | pSi3 = pSi2 + 2 * n2; |
xorjoep | 1:24714b45cd1b | 1301 | |
xorjoep | 1:24714b45cd1b | 1302 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 1303 | for (i0 = j; i0 < fftLen; i0 += n1) |
xorjoep | 1:24714b45cd1b | 1304 | { |
xorjoep | 1:24714b45cd1b | 1305 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 1306 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 1307 | T = _SIMD32_OFFSET(pSi0); |
xorjoep | 1:24714b45cd1b | 1308 | |
xorjoep | 1:24714b45cd1b | 1309 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 1310 | S = _SIMD32_OFFSET(pSi2); |
xorjoep | 1:24714b45cd1b | 1311 | |
xorjoep | 1:24714b45cd1b | 1312 | /* R = packed( (ya + yc), (xa + xc)) */ |
xorjoep | 1:24714b45cd1b | 1313 | R = __QADD16(T, S); |
xorjoep | 1:24714b45cd1b | 1314 | |
xorjoep | 1:24714b45cd1b | 1315 | /* S = packed((ya - yc), (xa - xc)) */ |
xorjoep | 1:24714b45cd1b | 1316 | S = __QSUB16(T, S); |
xorjoep | 1:24714b45cd1b | 1317 | |
xorjoep | 1:24714b45cd1b | 1318 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 1319 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1320 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 1321 | |
xorjoep | 1:24714b45cd1b | 1322 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1323 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 1324 | |
xorjoep | 1:24714b45cd1b | 1325 | /* T = packed( (yb + yd), (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 1326 | T = __QADD16(T, U); |
xorjoep | 1:24714b45cd1b | 1327 | |
xorjoep | 1:24714b45cd1b | 1328 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 1329 | |
xorjoep | 1:24714b45cd1b | 1330 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 1331 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 1332 | out1 = __SHADD16(R, T); |
xorjoep | 1:24714b45cd1b | 1333 | out1 = __SHADD16(out1, 0); |
xorjoep | 1:24714b45cd1b | 1334 | _SIMD32_OFFSET(pSi0) = out1; |
xorjoep | 1:24714b45cd1b | 1335 | pSi0 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 1336 | |
xorjoep | 1:24714b45cd1b | 1337 | /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 1338 | R = __SHSUB16(R, T); |
xorjoep | 1:24714b45cd1b | 1339 | |
xorjoep | 1:24714b45cd1b | 1340 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1341 | |
xorjoep | 1:24714b45cd1b | 1342 | /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ |
xorjoep | 1:24714b45cd1b | 1343 | out1 = __SMUSD(C2, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 1344 | |
xorjoep | 1:24714b45cd1b | 1345 | /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1346 | out2 = __SMUADX(C2, R); |
xorjoep | 1:24714b45cd1b | 1347 | |
xorjoep | 1:24714b45cd1b | 1348 | #else |
xorjoep | 1:24714b45cd1b | 1349 | |
xorjoep | 1:24714b45cd1b | 1350 | /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1351 | out1 = __SMUADX(R, C2) >> 16U; |
xorjoep | 1:24714b45cd1b | 1352 | |
xorjoep | 1:24714b45cd1b | 1353 | /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ |
xorjoep | 1:24714b45cd1b | 1354 | out2 = __SMUSD(__QSUB16(0, C2), R); |
xorjoep | 1:24714b45cd1b | 1355 | |
xorjoep | 1:24714b45cd1b | 1356 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1357 | |
xorjoep | 1:24714b45cd1b | 1358 | /* Reading i0+3fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 1359 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1360 | T = _SIMD32_OFFSET(pSi1); |
xorjoep | 1:24714b45cd1b | 1361 | |
xorjoep | 1:24714b45cd1b | 1362 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1363 | /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1364 | /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1365 | _SIMD32_OFFSET(pSi1) = |
xorjoep | 1:24714b45cd1b | 1366 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 1367 | pSi1 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 1368 | |
xorjoep | 1:24714b45cd1b | 1369 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 1370 | |
xorjoep | 1:24714b45cd1b | 1371 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1372 | U = _SIMD32_OFFSET(pSi3); |
xorjoep | 1:24714b45cd1b | 1373 | |
xorjoep | 1:24714b45cd1b | 1374 | /* T = packed(yb-yd, xb-xd) */ |
xorjoep | 1:24714b45cd1b | 1375 | T = __QSUB16(T, U); |
xorjoep | 1:24714b45cd1b | 1376 | |
xorjoep | 1:24714b45cd1b | 1377 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1378 | |
xorjoep | 1:24714b45cd1b | 1379 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1380 | R = __SHSAX(S, T); |
xorjoep | 1:24714b45cd1b | 1381 | |
xorjoep | 1:24714b45cd1b | 1382 | /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1383 | S = __SHASX(S, T); |
xorjoep | 1:24714b45cd1b | 1384 | |
xorjoep | 1:24714b45cd1b | 1385 | |
xorjoep | 1:24714b45cd1b | 1386 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 1387 | out1 = __SMUSD(C1, S) >> 16U; |
xorjoep | 1:24714b45cd1b | 1388 | out2 = __SMUADX(C1, S); |
xorjoep | 1:24714b45cd1b | 1389 | |
xorjoep | 1:24714b45cd1b | 1390 | #else |
xorjoep | 1:24714b45cd1b | 1391 | |
xorjoep | 1:24714b45cd1b | 1392 | /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1393 | R = __SHASX(S, T); |
xorjoep | 1:24714b45cd1b | 1394 | |
xorjoep | 1:24714b45cd1b | 1395 | /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ |
xorjoep | 1:24714b45cd1b | 1396 | S = __SHSAX(S, T); |
xorjoep | 1:24714b45cd1b | 1397 | |
xorjoep | 1:24714b45cd1b | 1398 | |
xorjoep | 1:24714b45cd1b | 1399 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 1400 | out1 = __SMUADX(S, C1) >> 16U; |
xorjoep | 1:24714b45cd1b | 1401 | out2 = __SMUSD(__QSUB16(0, C1), S); |
xorjoep | 1:24714b45cd1b | 1402 | |
xorjoep | 1:24714b45cd1b | 1403 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1404 | |
xorjoep | 1:24714b45cd1b | 1405 | /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1406 | /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1407 | _SIMD32_OFFSET(pSi2) = |
xorjoep | 1:24714b45cd1b | 1408 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 1409 | pSi2 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 1410 | |
xorjoep | 1:24714b45cd1b | 1411 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1412 | |
xorjoep | 1:24714b45cd1b | 1413 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1414 | |
xorjoep | 1:24714b45cd1b | 1415 | out1 = __SMUSD(C3, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 1416 | out2 = __SMUADX(C3, R); |
xorjoep | 1:24714b45cd1b | 1417 | |
xorjoep | 1:24714b45cd1b | 1418 | #else |
xorjoep | 1:24714b45cd1b | 1419 | |
xorjoep | 1:24714b45cd1b | 1420 | out1 = __SMUADX(C3, R) >> 16U; |
xorjoep | 1:24714b45cd1b | 1421 | out2 = __SMUSD(__QSUB16(0, C3), R); |
xorjoep | 1:24714b45cd1b | 1422 | |
xorjoep | 1:24714b45cd1b | 1423 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1424 | |
xorjoep | 1:24714b45cd1b | 1425 | /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1426 | /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1427 | _SIMD32_OFFSET(pSi3) = |
xorjoep | 1:24714b45cd1b | 1428 | ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); |
xorjoep | 1:24714b45cd1b | 1429 | pSi3 += 2 * n1; |
xorjoep | 1:24714b45cd1b | 1430 | } |
xorjoep | 1:24714b45cd1b | 1431 | } |
xorjoep | 1:24714b45cd1b | 1432 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1433 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 1434 | } |
xorjoep | 1:24714b45cd1b | 1435 | /* end of middle stage process */ |
xorjoep | 1:24714b45cd1b | 1436 | |
xorjoep | 1:24714b45cd1b | 1437 | /* data is in 10.6(q6) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 1438 | /* data is in 8.8(q8) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 1439 | /* data is in 6.10(q10) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 1440 | /* data is in 4.12(q12) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 1441 | |
xorjoep | 1:24714b45cd1b | 1442 | /* Initializations for the last stage */ |
xorjoep | 1:24714b45cd1b | 1443 | j = fftLen >> 2; |
xorjoep | 1:24714b45cd1b | 1444 | |
xorjoep | 1:24714b45cd1b | 1445 | ptr1 = &pSrc16[0]; |
xorjoep | 1:24714b45cd1b | 1446 | |
xorjoep | 1:24714b45cd1b | 1447 | /* start of last stage process */ |
xorjoep | 1:24714b45cd1b | 1448 | |
xorjoep | 1:24714b45cd1b | 1449 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 1450 | do |
xorjoep | 1:24714b45cd1b | 1451 | { |
xorjoep | 1:24714b45cd1b | 1452 | /* Read xa (real), ya(imag) input */ |
xorjoep | 1:24714b45cd1b | 1453 | xaya = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 1454 | |
xorjoep | 1:24714b45cd1b | 1455 | /* Read xb (real), yb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1456 | xbyb = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 1457 | |
xorjoep | 1:24714b45cd1b | 1458 | /* Read xc (real), yc(imag) input */ |
xorjoep | 1:24714b45cd1b | 1459 | xcyc = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 1460 | |
xorjoep | 1:24714b45cd1b | 1461 | /* Read xd (real), yd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1462 | xdyd = *__SIMD32(ptr1)++; |
xorjoep | 1:24714b45cd1b | 1463 | |
xorjoep | 1:24714b45cd1b | 1464 | /* R = packed((ya + yc), (xa + xc)) */ |
xorjoep | 1:24714b45cd1b | 1465 | R = __QADD16(xaya, xcyc); |
xorjoep | 1:24714b45cd1b | 1466 | |
xorjoep | 1:24714b45cd1b | 1467 | /* T = packed((yb + yd), (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 1468 | T = __QADD16(xbyb, xdyd); |
xorjoep | 1:24714b45cd1b | 1469 | |
xorjoep | 1:24714b45cd1b | 1470 | /* pointer updation for writing */ |
xorjoep | 1:24714b45cd1b | 1471 | ptr1 = ptr1 - 8U; |
xorjoep | 1:24714b45cd1b | 1472 | |
xorjoep | 1:24714b45cd1b | 1473 | |
xorjoep | 1:24714b45cd1b | 1474 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 1475 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 1476 | *__SIMD32(ptr1)++ = __SHADD16(R, T); |
xorjoep | 1:24714b45cd1b | 1477 | |
xorjoep | 1:24714b45cd1b | 1478 | /* T = packed((yb + yd), (xb + xd)) */ |
xorjoep | 1:24714b45cd1b | 1479 | T = __QADD16(xbyb, xdyd); |
xorjoep | 1:24714b45cd1b | 1480 | |
xorjoep | 1:24714b45cd1b | 1481 | /* xc' = (xa-xb+xc-xd) */ |
xorjoep | 1:24714b45cd1b | 1482 | /* yc' = (ya-yb+yc-yd) */ |
xorjoep | 1:24714b45cd1b | 1483 | *__SIMD32(ptr1)++ = __SHSUB16(R, T); |
xorjoep | 1:24714b45cd1b | 1484 | |
xorjoep | 1:24714b45cd1b | 1485 | /* S = packed((ya - yc), (xa - xc)) */ |
xorjoep | 1:24714b45cd1b | 1486 | S = __QSUB16(xaya, xcyc); |
xorjoep | 1:24714b45cd1b | 1487 | |
xorjoep | 1:24714b45cd1b | 1488 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1489 | /* T = packed( (yb - yd), (xb - xd)) */ |
xorjoep | 1:24714b45cd1b | 1490 | U = __QSUB16(xbyb, xdyd); |
xorjoep | 1:24714b45cd1b | 1491 | |
xorjoep | 1:24714b45cd1b | 1492 | #ifndef ARM_MATH_BIG_ENDIAN |
xorjoep | 1:24714b45cd1b | 1493 | |
xorjoep | 1:24714b45cd1b | 1494 | /* xb' = (xa+yb-xc-yd) */ |
xorjoep | 1:24714b45cd1b | 1495 | /* yb' = (ya-xb-yc+xd) */ |
xorjoep | 1:24714b45cd1b | 1496 | *__SIMD32(ptr1)++ = __SHASX(S, U); |
xorjoep | 1:24714b45cd1b | 1497 | |
xorjoep | 1:24714b45cd1b | 1498 | |
xorjoep | 1:24714b45cd1b | 1499 | /* xd' = (xa-yb-xc+yd) */ |
xorjoep | 1:24714b45cd1b | 1500 | /* yd' = (ya+xb-yc-xd) */ |
xorjoep | 1:24714b45cd1b | 1501 | *__SIMD32(ptr1)++ = __SHSAX(S, U); |
xorjoep | 1:24714b45cd1b | 1502 | |
xorjoep | 1:24714b45cd1b | 1503 | #else |
xorjoep | 1:24714b45cd1b | 1504 | |
xorjoep | 1:24714b45cd1b | 1505 | /* xb' = (xa+yb-xc-yd) */ |
xorjoep | 1:24714b45cd1b | 1506 | /* yb' = (ya-xb-yc+xd) */ |
xorjoep | 1:24714b45cd1b | 1507 | *__SIMD32(ptr1)++ = __SHSAX(S, U); |
xorjoep | 1:24714b45cd1b | 1508 | |
xorjoep | 1:24714b45cd1b | 1509 | |
xorjoep | 1:24714b45cd1b | 1510 | /* xd' = (xa-yb-xc+yd) */ |
xorjoep | 1:24714b45cd1b | 1511 | /* yd' = (ya+xb-yc-xd) */ |
xorjoep | 1:24714b45cd1b | 1512 | *__SIMD32(ptr1)++ = __SHASX(S, U); |
xorjoep | 1:24714b45cd1b | 1513 | |
xorjoep | 1:24714b45cd1b | 1514 | |
xorjoep | 1:24714b45cd1b | 1515 | #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
xorjoep | 1:24714b45cd1b | 1516 | |
xorjoep | 1:24714b45cd1b | 1517 | } while (--j); |
xorjoep | 1:24714b45cd1b | 1518 | |
xorjoep | 1:24714b45cd1b | 1519 | /* end of last stage process */ |
xorjoep | 1:24714b45cd1b | 1520 | |
xorjoep | 1:24714b45cd1b | 1521 | /* output is in 11.5(q5) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 1522 | /* output is in 9.7(q7) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 1523 | /* output is in 7.9(q9) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 1524 | /* output is in 5.11(q11) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 1525 | |
xorjoep | 1:24714b45cd1b | 1526 | |
xorjoep | 1:24714b45cd1b | 1527 | #else |
xorjoep | 1:24714b45cd1b | 1528 | |
xorjoep | 1:24714b45cd1b | 1529 | /* Run the below code for Cortex-M0 */ |
xorjoep | 1:24714b45cd1b | 1530 | |
xorjoep | 1:24714b45cd1b | 1531 | q15_t R0, R1, S0, S1, T0, T1, U0, U1; |
xorjoep | 1:24714b45cd1b | 1532 | q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; |
xorjoep | 1:24714b45cd1b | 1533 | uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; |
xorjoep | 1:24714b45cd1b | 1534 | |
xorjoep | 1:24714b45cd1b | 1535 | /* Total process is divided into three stages */ |
xorjoep | 1:24714b45cd1b | 1536 | |
xorjoep | 1:24714b45cd1b | 1537 | /* process first stage, middle stages, & last stage */ |
xorjoep | 1:24714b45cd1b | 1538 | |
xorjoep | 1:24714b45cd1b | 1539 | /* Initializations for the first stage */ |
xorjoep | 1:24714b45cd1b | 1540 | n2 = fftLen; |
xorjoep | 1:24714b45cd1b | 1541 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 1542 | |
xorjoep | 1:24714b45cd1b | 1543 | /* n2 = fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 1544 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 1545 | |
xorjoep | 1:24714b45cd1b | 1546 | /* Index for twiddle coefficient */ |
xorjoep | 1:24714b45cd1b | 1547 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 1548 | |
xorjoep | 1:24714b45cd1b | 1549 | /* Index for input read and output write */ |
xorjoep | 1:24714b45cd1b | 1550 | i0 = 0U; |
xorjoep | 1:24714b45cd1b | 1551 | |
xorjoep | 1:24714b45cd1b | 1552 | j = n2; |
xorjoep | 1:24714b45cd1b | 1553 | |
xorjoep | 1:24714b45cd1b | 1554 | /* Input is in 1.15(q15) format */ |
xorjoep | 1:24714b45cd1b | 1555 | |
xorjoep | 1:24714b45cd1b | 1556 | /* Start of first stage process */ |
xorjoep | 1:24714b45cd1b | 1557 | do |
xorjoep | 1:24714b45cd1b | 1558 | { |
xorjoep | 1:24714b45cd1b | 1559 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 1560 | |
xorjoep | 1:24714b45cd1b | 1561 | /* index calculation for the input as, */ |
xorjoep | 1:24714b45cd1b | 1562 | /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ |
xorjoep | 1:24714b45cd1b | 1563 | i1 = i0 + n2; |
xorjoep | 1:24714b45cd1b | 1564 | i2 = i1 + n2; |
xorjoep | 1:24714b45cd1b | 1565 | i3 = i2 + n2; |
xorjoep | 1:24714b45cd1b | 1566 | |
xorjoep | 1:24714b45cd1b | 1567 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 1568 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 1569 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 1570 | T0 = pSrc16[i0 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1571 | T1 = pSrc16[(i0 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1572 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 1573 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 1574 | S0 = pSrc16[i2 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1575 | S1 = pSrc16[(i2 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1576 | |
xorjoep | 1:24714b45cd1b | 1577 | /* R0 = (ya + yc), R1 = (xa + xc) */ |
xorjoep | 1:24714b45cd1b | 1578 | R0 = __SSAT(T0 + S0, 16U); |
xorjoep | 1:24714b45cd1b | 1579 | R1 = __SSAT(T1 + S1, 16U); |
xorjoep | 1:24714b45cd1b | 1580 | /* S0 = (ya - yc), S1 = (xa - xc) */ |
xorjoep | 1:24714b45cd1b | 1581 | S0 = __SSAT(T0 - S0, 16U); |
xorjoep | 1:24714b45cd1b | 1582 | S1 = __SSAT(T1 - S1, 16U); |
xorjoep | 1:24714b45cd1b | 1583 | |
xorjoep | 1:24714b45cd1b | 1584 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 1585 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 1586 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1587 | T0 = pSrc16[i1 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1588 | T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1589 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1590 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 1591 | U0 = pSrc16[i3 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1592 | U1 = pSrc16[(i3 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1593 | |
xorjoep | 1:24714b45cd1b | 1594 | /* T0 = (yb + yd), T1 = (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 1595 | T0 = __SSAT(T0 + U0, 16U); |
xorjoep | 1:24714b45cd1b | 1596 | T1 = __SSAT(T1 + U1, 16U); |
xorjoep | 1:24714b45cd1b | 1597 | |
xorjoep | 1:24714b45cd1b | 1598 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 1599 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 1600 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 1601 | pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1602 | pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1603 | |
xorjoep | 1:24714b45cd1b | 1604 | /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 1605 | R0 = __SSAT(R0 - T0, 16U); |
xorjoep | 1:24714b45cd1b | 1606 | R1 = __SSAT(R1 - T1, 16U); |
xorjoep | 1:24714b45cd1b | 1607 | /* co2 & si2 are read from Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 1608 | Co2 = pCoef16[2U * ic * 2U]; |
xorjoep | 1:24714b45cd1b | 1609 | Si2 = pCoef16[(2U * ic * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1610 | /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1611 | out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1612 | /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1613 | out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1614 | |
xorjoep | 1:24714b45cd1b | 1615 | /* Reading i0+fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 1616 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 1617 | /* T0 = yb, T1 = xb */ |
xorjoep | 1:24714b45cd1b | 1618 | T0 = pSrc16[i1 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1619 | T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1620 | |
xorjoep | 1:24714b45cd1b | 1621 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1622 | /* writing output(xc', yc') in little endian format */ |
xorjoep | 1:24714b45cd1b | 1623 | pSrc16[i1 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 1624 | pSrc16[(i1 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 1625 | |
xorjoep | 1:24714b45cd1b | 1626 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 1627 | /* input is down scale by 4 to avoid overflow */ |
xorjoep | 1:24714b45cd1b | 1628 | /* U0 = yd, U1 = xd) */ |
xorjoep | 1:24714b45cd1b | 1629 | U0 = pSrc16[i3 * 2U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1630 | U1 = pSrc16[(i3 * 2U) + 1U] >> 2U; |
xorjoep | 1:24714b45cd1b | 1631 | |
xorjoep | 1:24714b45cd1b | 1632 | /* T0 = yb-yd, T1 = xb-xd) */ |
xorjoep | 1:24714b45cd1b | 1633 | T0 = __SSAT(T0 - U0, 16U); |
xorjoep | 1:24714b45cd1b | 1634 | T1 = __SSAT(T1 - U1, 16U); |
xorjoep | 1:24714b45cd1b | 1635 | /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ |
xorjoep | 1:24714b45cd1b | 1636 | R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16); |
xorjoep | 1:24714b45cd1b | 1637 | R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16); |
xorjoep | 1:24714b45cd1b | 1638 | /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ |
xorjoep | 1:24714b45cd1b | 1639 | S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); |
xorjoep | 1:24714b45cd1b | 1640 | S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); |
xorjoep | 1:24714b45cd1b | 1641 | |
xorjoep | 1:24714b45cd1b | 1642 | /* co1 & si1 are read from Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 1643 | Co1 = pCoef16[ic * 2U]; |
xorjoep | 1:24714b45cd1b | 1644 | Si1 = pCoef16[(ic * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1645 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 1646 | /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1647 | out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1648 | /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1649 | out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1650 | /* writing output(xb', yb') in little endian format */ |
xorjoep | 1:24714b45cd1b | 1651 | pSrc16[i2 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 1652 | pSrc16[(i2 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 1653 | |
xorjoep | 1:24714b45cd1b | 1654 | /* Co3 & si3 are read from Coefficient pointer */ |
xorjoep | 1:24714b45cd1b | 1655 | Co3 = pCoef16[3U * ic * 2U]; |
xorjoep | 1:24714b45cd1b | 1656 | Si3 = pCoef16[(3U * ic * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1657 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1658 | /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1659 | out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1660 | /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1661 | out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1662 | /* writing output(xd', yd') in little endian format */ |
xorjoep | 1:24714b45cd1b | 1663 | pSrc16[i3 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 1664 | pSrc16[(i3 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 1665 | |
xorjoep | 1:24714b45cd1b | 1666 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1667 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 1668 | |
xorjoep | 1:24714b45cd1b | 1669 | /* Updating input index */ |
xorjoep | 1:24714b45cd1b | 1670 | i0 = i0 + 1U; |
xorjoep | 1:24714b45cd1b | 1671 | |
xorjoep | 1:24714b45cd1b | 1672 | } while (--j); |
xorjoep | 1:24714b45cd1b | 1673 | |
xorjoep | 1:24714b45cd1b | 1674 | /* End of first stage process */ |
xorjoep | 1:24714b45cd1b | 1675 | |
xorjoep | 1:24714b45cd1b | 1676 | /* data is in 4.11(q11) format */ |
xorjoep | 1:24714b45cd1b | 1677 | |
xorjoep | 1:24714b45cd1b | 1678 | |
xorjoep | 1:24714b45cd1b | 1679 | /* Start of Middle stage process */ |
xorjoep | 1:24714b45cd1b | 1680 | |
xorjoep | 1:24714b45cd1b | 1681 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1682 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 1683 | |
xorjoep | 1:24714b45cd1b | 1684 | /* Calculation of Middle stage */ |
xorjoep | 1:24714b45cd1b | 1685 | for (k = fftLen / 4U; k > 4U; k >>= 2U) |
xorjoep | 1:24714b45cd1b | 1686 | { |
xorjoep | 1:24714b45cd1b | 1687 | /* Initializations for the middle stage */ |
xorjoep | 1:24714b45cd1b | 1688 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 1689 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 1690 | ic = 0U; |
xorjoep | 1:24714b45cd1b | 1691 | |
xorjoep | 1:24714b45cd1b | 1692 | for (j = 0U; j <= (n2 - 1U); j++) |
xorjoep | 1:24714b45cd1b | 1693 | { |
xorjoep | 1:24714b45cd1b | 1694 | /* index calculation for the coefficients */ |
xorjoep | 1:24714b45cd1b | 1695 | Co1 = pCoef16[ic * 2U]; |
xorjoep | 1:24714b45cd1b | 1696 | Si1 = pCoef16[(ic * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1697 | Co2 = pCoef16[2U * ic * 2U]; |
xorjoep | 1:24714b45cd1b | 1698 | Si2 = pCoef16[2U * ic * 2U + 1U]; |
xorjoep | 1:24714b45cd1b | 1699 | Co3 = pCoef16[3U * ic * 2U]; |
xorjoep | 1:24714b45cd1b | 1700 | Si3 = pCoef16[(3U * ic * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1701 | |
xorjoep | 1:24714b45cd1b | 1702 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1703 | ic = ic + twidCoefModifier; |
xorjoep | 1:24714b45cd1b | 1704 | |
xorjoep | 1:24714b45cd1b | 1705 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 1706 | for (i0 = j; i0 < fftLen; i0 += n1) |
xorjoep | 1:24714b45cd1b | 1707 | { |
xorjoep | 1:24714b45cd1b | 1708 | /* index calculation for the input as, */ |
xorjoep | 1:24714b45cd1b | 1709 | /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ |
xorjoep | 1:24714b45cd1b | 1710 | i1 = i0 + n2; |
xorjoep | 1:24714b45cd1b | 1711 | i2 = i1 + n2; |
xorjoep | 1:24714b45cd1b | 1712 | i3 = i2 + n2; |
xorjoep | 1:24714b45cd1b | 1713 | |
xorjoep | 1:24714b45cd1b | 1714 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 1715 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 1716 | T0 = pSrc16[i0 * 2U]; |
xorjoep | 1:24714b45cd1b | 1717 | T1 = pSrc16[(i0 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1718 | |
xorjoep | 1:24714b45cd1b | 1719 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 1720 | S0 = pSrc16[i2 * 2U]; |
xorjoep | 1:24714b45cd1b | 1721 | S1 = pSrc16[(i2 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1722 | |
xorjoep | 1:24714b45cd1b | 1723 | |
xorjoep | 1:24714b45cd1b | 1724 | /* R0 = (ya + yc), R1 = (xa + xc) */ |
xorjoep | 1:24714b45cd1b | 1725 | R0 = __SSAT(T0 + S0, 16U); |
xorjoep | 1:24714b45cd1b | 1726 | R1 = __SSAT(T1 + S1, 16U); |
xorjoep | 1:24714b45cd1b | 1727 | /* S0 = (ya - yc), S1 = (xa - xc) */ |
xorjoep | 1:24714b45cd1b | 1728 | S0 = __SSAT(T0 - S0, 16U); |
xorjoep | 1:24714b45cd1b | 1729 | S1 = __SSAT(T1 - S1, 16U); |
xorjoep | 1:24714b45cd1b | 1730 | |
xorjoep | 1:24714b45cd1b | 1731 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 1732 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1733 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 1734 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1735 | |
xorjoep | 1:24714b45cd1b | 1736 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1737 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 1738 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1739 | |
xorjoep | 1:24714b45cd1b | 1740 | /* T0 = (yb + yd), T1 = (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 1741 | T0 = __SSAT(T0 + U0, 16U); |
xorjoep | 1:24714b45cd1b | 1742 | T1 = __SSAT(T1 + U1, 16U); |
xorjoep | 1:24714b45cd1b | 1743 | |
xorjoep | 1:24714b45cd1b | 1744 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 1745 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 1746 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 1747 | pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U; |
xorjoep | 1:24714b45cd1b | 1748 | pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U; |
xorjoep | 1:24714b45cd1b | 1749 | |
xorjoep | 1:24714b45cd1b | 1750 | /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 1751 | R0 = (R0 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1752 | R1 = (R1 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1753 | |
xorjoep | 1:24714b45cd1b | 1754 | /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ |
xorjoep | 1:24714b45cd1b | 1755 | out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16); |
xorjoep | 1:24714b45cd1b | 1756 | /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1757 | out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16); |
xorjoep | 1:24714b45cd1b | 1758 | |
xorjoep | 1:24714b45cd1b | 1759 | /* Reading i0+3fftLen/4 */ |
xorjoep | 1:24714b45cd1b | 1760 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1761 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 1762 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1763 | |
xorjoep | 1:24714b45cd1b | 1764 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1765 | /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1766 | /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ |
xorjoep | 1:24714b45cd1b | 1767 | pSrc16[i1 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 1768 | pSrc16[(i1 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 1769 | |
xorjoep | 1:24714b45cd1b | 1770 | /* Butterfly calculations */ |
xorjoep | 1:24714b45cd1b | 1771 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1772 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 1773 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1774 | |
xorjoep | 1:24714b45cd1b | 1775 | /* T0 = yb-yd, T1 = xb-xd) */ |
xorjoep | 1:24714b45cd1b | 1776 | T0 = __SSAT(T0 - U0, 16U); |
xorjoep | 1:24714b45cd1b | 1777 | T1 = __SSAT(T1 - U1, 16U); |
xorjoep | 1:24714b45cd1b | 1778 | |
xorjoep | 1:24714b45cd1b | 1779 | /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ |
xorjoep | 1:24714b45cd1b | 1780 | R0 = (S0 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1781 | R1 = (S1 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1782 | |
xorjoep | 1:24714b45cd1b | 1783 | /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ |
xorjoep | 1:24714b45cd1b | 1784 | S0 = (S0 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1785 | S1 = (S1 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1786 | |
xorjoep | 1:24714b45cd1b | 1787 | /* Butterfly process for the i0+fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 1788 | out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1789 | out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1790 | /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1791 | /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ |
xorjoep | 1:24714b45cd1b | 1792 | pSrc16[i2 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 1793 | pSrc16[(i2 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 1794 | |
xorjoep | 1:24714b45cd1b | 1795 | /* Butterfly process for the i0+3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1796 | out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1797 | |
xorjoep | 1:24714b45cd1b | 1798 | out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U); |
xorjoep | 1:24714b45cd1b | 1799 | /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1800 | /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ |
xorjoep | 1:24714b45cd1b | 1801 | pSrc16[i3 * 2U] = out1; |
xorjoep | 1:24714b45cd1b | 1802 | pSrc16[(i3 * 2U) + 1U] = out2; |
xorjoep | 1:24714b45cd1b | 1803 | |
xorjoep | 1:24714b45cd1b | 1804 | |
xorjoep | 1:24714b45cd1b | 1805 | } |
xorjoep | 1:24714b45cd1b | 1806 | } |
xorjoep | 1:24714b45cd1b | 1807 | /* Twiddle coefficients index modifier */ |
xorjoep | 1:24714b45cd1b | 1808 | twidCoefModifier <<= 2U; |
xorjoep | 1:24714b45cd1b | 1809 | } |
xorjoep | 1:24714b45cd1b | 1810 | /* End of Middle stages process */ |
xorjoep | 1:24714b45cd1b | 1811 | |
xorjoep | 1:24714b45cd1b | 1812 | |
xorjoep | 1:24714b45cd1b | 1813 | /* data is in 10.6(q6) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 1814 | /* data is in 8.8(q8) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 1815 | /* data is in 6.10(q10) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 1816 | /* data is in 4.12(q12) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 1817 | |
xorjoep | 1:24714b45cd1b | 1818 | /* start of last stage process */ |
xorjoep | 1:24714b45cd1b | 1819 | |
xorjoep | 1:24714b45cd1b | 1820 | |
xorjoep | 1:24714b45cd1b | 1821 | /* Initializations for the last stage */ |
xorjoep | 1:24714b45cd1b | 1822 | n1 = n2; |
xorjoep | 1:24714b45cd1b | 1823 | n2 >>= 2U; |
xorjoep | 1:24714b45cd1b | 1824 | |
xorjoep | 1:24714b45cd1b | 1825 | /* Butterfly implementation */ |
xorjoep | 1:24714b45cd1b | 1826 | for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1) |
xorjoep | 1:24714b45cd1b | 1827 | { |
xorjoep | 1:24714b45cd1b | 1828 | /* index calculation for the input as, */ |
xorjoep | 1:24714b45cd1b | 1829 | /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ |
xorjoep | 1:24714b45cd1b | 1830 | i1 = i0 + n2; |
xorjoep | 1:24714b45cd1b | 1831 | i2 = i1 + n2; |
xorjoep | 1:24714b45cd1b | 1832 | i3 = i2 + n2; |
xorjoep | 1:24714b45cd1b | 1833 | |
xorjoep | 1:24714b45cd1b | 1834 | /* Reading i0, i0+fftLen/2 inputs */ |
xorjoep | 1:24714b45cd1b | 1835 | /* Read ya (real), xa(imag) input */ |
xorjoep | 1:24714b45cd1b | 1836 | T0 = pSrc16[i0 * 2U]; |
xorjoep | 1:24714b45cd1b | 1837 | T1 = pSrc16[(i0 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1838 | /* Read yc (real), xc(imag) input */ |
xorjoep | 1:24714b45cd1b | 1839 | S0 = pSrc16[i2 * 2U]; |
xorjoep | 1:24714b45cd1b | 1840 | S1 = pSrc16[(i2 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1841 | |
xorjoep | 1:24714b45cd1b | 1842 | /* R0 = (ya + yc), R1 = (xa + xc) */ |
xorjoep | 1:24714b45cd1b | 1843 | R0 = __SSAT(T0 + S0, 16U); |
xorjoep | 1:24714b45cd1b | 1844 | R1 = __SSAT(T1 + S1, 16U); |
xorjoep | 1:24714b45cd1b | 1845 | /* S0 = (ya - yc), S1 = (xa - xc) */ |
xorjoep | 1:24714b45cd1b | 1846 | S0 = __SSAT(T0 - S0, 16U); |
xorjoep | 1:24714b45cd1b | 1847 | S1 = __SSAT(T1 - S1, 16U); |
xorjoep | 1:24714b45cd1b | 1848 | |
xorjoep | 1:24714b45cd1b | 1849 | /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ |
xorjoep | 1:24714b45cd1b | 1850 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1851 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 1852 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1853 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1854 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 1855 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1856 | |
xorjoep | 1:24714b45cd1b | 1857 | /* T0 = (yb + yd), T1 = (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 1858 | T0 = __SSAT(T0 + U0, 16U); |
xorjoep | 1:24714b45cd1b | 1859 | T1 = __SSAT(T1 + U1, 16U); |
xorjoep | 1:24714b45cd1b | 1860 | |
xorjoep | 1:24714b45cd1b | 1861 | /* writing the butterfly processed i0 sample */ |
xorjoep | 1:24714b45cd1b | 1862 | /* xa' = xa + xb + xc + xd */ |
xorjoep | 1:24714b45cd1b | 1863 | /* ya' = ya + yb + yc + yd */ |
xorjoep | 1:24714b45cd1b | 1864 | pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1865 | pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1866 | |
xorjoep | 1:24714b45cd1b | 1867 | /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ |
xorjoep | 1:24714b45cd1b | 1868 | R0 = (R0 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1869 | R1 = (R1 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1870 | |
xorjoep | 1:24714b45cd1b | 1871 | /* Read yb (real), xb(imag) input */ |
xorjoep | 1:24714b45cd1b | 1872 | T0 = pSrc16[i1 * 2U]; |
xorjoep | 1:24714b45cd1b | 1873 | T1 = pSrc16[(i1 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1874 | |
xorjoep | 1:24714b45cd1b | 1875 | /* writing the butterfly processed i0 + fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1876 | /* xc' = (xa-xb+xc-xd) */ |
xorjoep | 1:24714b45cd1b | 1877 | /* yc' = (ya-yb+yc-yd) */ |
xorjoep | 1:24714b45cd1b | 1878 | pSrc16[i1 * 2U] = R0; |
xorjoep | 1:24714b45cd1b | 1879 | pSrc16[(i1 * 2U) + 1U] = R1; |
xorjoep | 1:24714b45cd1b | 1880 | |
xorjoep | 1:24714b45cd1b | 1881 | /* Read yd (real), xd(imag) input */ |
xorjoep | 1:24714b45cd1b | 1882 | U0 = pSrc16[i3 * 2U]; |
xorjoep | 1:24714b45cd1b | 1883 | U1 = pSrc16[(i3 * 2U) + 1U]; |
xorjoep | 1:24714b45cd1b | 1884 | /* T0 = (yb - yd), T1 = (xb - xd) */ |
xorjoep | 1:24714b45cd1b | 1885 | T0 = __SSAT(T0 - U0, 16U); |
xorjoep | 1:24714b45cd1b | 1886 | T1 = __SSAT(T1 - U1, 16U); |
xorjoep | 1:24714b45cd1b | 1887 | |
xorjoep | 1:24714b45cd1b | 1888 | /* writing the butterfly processed i0 + fftLen/2 sample */ |
xorjoep | 1:24714b45cd1b | 1889 | /* xb' = (xa-yb-xc+yd) */ |
xorjoep | 1:24714b45cd1b | 1890 | /* yb' = (ya+xb-yc-xd) */ |
xorjoep | 1:24714b45cd1b | 1891 | pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1892 | pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1893 | |
xorjoep | 1:24714b45cd1b | 1894 | |
xorjoep | 1:24714b45cd1b | 1895 | /* writing the butterfly processed i0 + 3fftLen/4 sample */ |
xorjoep | 1:24714b45cd1b | 1896 | /* xd' = (xa+yb-xc-yd) */ |
xorjoep | 1:24714b45cd1b | 1897 | /* yd' = (ya-xb-yc+xd) */ |
xorjoep | 1:24714b45cd1b | 1898 | pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U); |
xorjoep | 1:24714b45cd1b | 1899 | pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U); |
xorjoep | 1:24714b45cd1b | 1900 | } |
xorjoep | 1:24714b45cd1b | 1901 | /* end of last stage process */ |
xorjoep | 1:24714b45cd1b | 1902 | |
xorjoep | 1:24714b45cd1b | 1903 | /* output is in 11.5(q5) format for the 1024 point */ |
xorjoep | 1:24714b45cd1b | 1904 | /* output is in 9.7(q7) format for the 256 point */ |
xorjoep | 1:24714b45cd1b | 1905 | /* output is in 7.9(q9) format for the 64 point */ |
xorjoep | 1:24714b45cd1b | 1906 | /* output is in 5.11(q11) format for the 16 point */ |
xorjoep | 1:24714b45cd1b | 1907 | |
xorjoep | 1:24714b45cd1b | 1908 | #endif /* #if defined (ARM_MATH_DSP) */ |
xorjoep | 1:24714b45cd1b | 1909 | |
xorjoep | 1:24714b45cd1b | 1910 | } |