CMSIS DSP Lib
Fork of mbed-dsp by
Diff: cmsis_dsp/TransformFunctions/arm_cfft_f32.c
- Revision:
- 3:7a284390b0ce
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cmsis_dsp/TransformFunctions/arm_cfft_f32.c Fri Nov 08 13:45:10 2013 +0000 @@ -0,0 +1,616 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010-2013 ARM Limited. All rights reserved. +* +* $Date: 17. January 2013 +* $Revision: V1.4.1 +* +* Project: CMSIS DSP Library +* Title: arm_cfft_f32.c +* +* Description: Combined Radix Decimation in Frequency CFFT Floating point processing function +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of ARM LIMITED nor the names of its contributors +* may be used to endorse or promote products derived from this +* software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +* POSSIBILITY OF SUCH DAMAGE. +* -------------------------------------------------------------------- */ + + +#include "arm_math.h" +#include "arm_common_tables.h" + +extern void arm_radix8_butterfly_f32( + float32_t * pSrc, + uint16_t fftLen, + const float32_t * pCoef, + uint16_t twidCoefModifier); + +extern void arm_bitreversal_32( + uint32_t * pSrc, + const uint16_t bitRevLen, + const uint16_t * pBitRevTable); + +/** +* @ingroup groupTransforms +*/ + +/** +* @defgroup ComplexFFT Complex FFT Functions +* +* \par +* The Fast Fourier Transform (FFT) is an efficient algorithm for computing the +* Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster +* than the DFT, especially for long lengths. +* The algorithms described in this section +* operate on complex data. A separate set of functions is devoted to handling +* of real sequences. +* \par +* There are separate algorithms for handling floating-point, Q15, and Q31 data +* types. The algorithms available for each data type are described next. +* \par +* The FFT functions operate in-place. That is, the array holding the input data +* will also be used to hold the corresponding result. The input data is complex +* and contains <code>2*fftLen</code> interleaved values as shown below. +* <pre> {real[0], imag[0], real[1], imag[1],..} </pre> +* The FFT result will be contained in the same array and the frequency domain +* values will have the same interleaving. +* +* \par Floating-point +* The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8 +* stages are performed along with a single radix-2 or radix-4 stage, as needed. +* The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses +* a different twiddle factor table. +* \par +* The function uses the standard FFT definition and output values may grow by a +* factor of <code>fftLen</code> when computing the forward transform. The +* inverse transform includes a scale of <code>1/fftLen</code> as part of the +* calculation and this matches the textbook definition of the inverse FFT. +* \par +* Preinitialized data structures containing twiddle factors and bit reversal +* tables are provided and defined in <code>arm_const_structs.h</code>. Include +* this header in your function and then pass one of the constant structures as +* an argument to arm_cfft_f32. For example: +* \par +* <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code> +* \par +* computes a 64-point inverse complex FFT including bit reversal. +* The data structures are treated as constant data and not modified during the +* calculation. The same data structure can be reused for multiple transforms +* including mixing forward and inverse transforms. +* \par +* Earlier releases of the library provided separate radix-2 and radix-4 +* algorithms that operated on floating-point data. These functions are still +* provided but are deprecated. The older functions are slower and less general +* than the new functions. +* \par +* An example of initialization of the constants for the arm_cfft_f32 function follows: +* \par +* const static arm_cfft_instance_f32 *S; +* ... +* switch (length) { +* case 16: +* S = & arm_cfft_sR_f32_len16; +* break; +* case 32: +* S = & arm_cfft_sR_f32_len32; +* break; +* case 64: +* S = & arm_cfft_sR_f32_len64; +* break; +* case 128: +* S = & arm_cfft_sR_f32_len128; +* break; +* case 256: +* S = & arm_cfft_sR_f32_len256; +* break; +* case 512: +* S = & arm_cfft_sR_f32_len512; +* break; +* case 1024: +* S = & arm_cfft_sR_f32_len1024; +* break; +* case 2048: +* S = & arm_cfft_sR_f32_len2048; +* break; +* case 4096: +* S = & arm_cfft_sR_f32_len4096; +* break; +* } +* \par Q15 and Q31 +* The library provides radix-2 and radix-4 FFT algorithms for fixed-point data. The +* radix-2 algorithm supports lengths of [16, 32, 64, ..., 4096]. The radix-4 +* algorithm supports lengths of [16, 64, 256, ..., 4096]. When possible, you +* should use the radix-4 algorithm since it is faster than the radix-2 of the +* same length. +* \par +* The forward FFTs include scaling in order to prevent results from overflowing. +* Intermediate results are scaled down during each butterfly stage. In the +* radix-2 algorithm, a scale of 0.5 is applied during each butterfly. In the +* radix-4 algorithm, a scale of 0.25 is applied. The scaling applies to both +* the forward and the inverse FFTs. Thus the forward FFT contains an additional +* scale factor of <code>1/fftLen</code> as compared to the standard textbook +* definition of the FFT. The inverse FFT also scales down during each butterfly +* stage and this corresponds to the standard textbook definition. +* \par +* A separate instance structure must be defined for each transform used but +* twiddle factor and bit reversal tables can be reused. +* \par +* There is also an associated initialization function for each data type. +* The initialization function performs the following operations: +* - Sets the values of the internal structure fields. +* - Initializes twiddle factor table and bit reversal table pointers. +* \par +* Use of the initialization function is optional. +* However, if the initialization function is used, then the instance structure +* cannot be placed into a const data section. To place an instance structure +* into a const data section, the instance structure should be manually +* initialized as follows: +* <pre> +*arm_cfft_radix2_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor}; +*arm_cfft_radix2_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor}; +*arm_cfft_radix4_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor}; +*arm_cfft_radix4_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor}; +*arm_cfft_instance_f32 S = {fftLen, pTwiddle, pBitRevTable, bitRevLength}; +* </pre> +* \par +* where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for +* selection of forward or inverse transform. When ifftFlag is set the inverse +* transform is calculated. +* <code>bitReverseFlag</code> Flag for selection of output order (Set bitReverseFlag to output in normal order otherwise output in bit reversed order); +* <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the bit reversal table. +* <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table; +* <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table. +* <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT; +* \par +* The Q15 and Q31 FFT functions use a large bit reversal and twiddle factor +* table. The tables are defined for the maximum length transform and a subset +* of the coefficients are used in shorter transforms. +* +*/ + +void arm_cfft_radix8by2_f32( arm_cfft_instance_f32 * S, float32_t * p1) +{ + uint32_t L = S->fftLen; + float32_t * pCol1, * pCol2, * pMid1, * pMid2; + float32_t * p2 = p1 + L; + const float32_t * tw = (float32_t *) S->pTwiddle; + float32_t t1[4], t2[4], t3[4], t4[4], twR, twI; + float32_t m0, m1, m2, m3; + uint32_t l; + + pCol1 = p1; + pCol2 = p2; + + // Define new length + L >>= 1; + // Initialize mid pointers + pMid1 = p1 + L; + pMid2 = p2 + L; + + // do two dot Fourier transform + for ( l = L >> 2; l > 0; l-- ) + { + t1[0] = p1[0]; + t1[1] = p1[1]; + t1[2] = p1[2]; + t1[3] = p1[3]; + + t2[0] = p2[0]; + t2[1] = p2[1]; + t2[2] = p2[2]; + t2[3] = p2[3]; + + t3[0] = pMid1[0]; + t3[1] = pMid1[1]; + t3[2] = pMid1[2]; + t3[3] = pMid1[3]; + + t4[0] = pMid2[0]; + t4[1] = pMid2[1]; + t4[2] = pMid2[2]; + t4[3] = pMid2[3]; + + *p1++ = t1[0] + t2[0]; + *p1++ = t1[1] + t2[1]; + *p1++ = t1[2] + t2[2]; + *p1++ = t1[3] + t2[3]; // col 1 + + t2[0] = t1[0] - t2[0]; + t2[1] = t1[1] - t2[1]; + t2[2] = t1[2] - t2[2]; + t2[3] = t1[3] - t2[3]; // for col 2 + + *pMid1++ = t3[0] + t4[0]; + *pMid1++ = t3[1] + t4[1]; + *pMid1++ = t3[2] + t4[2]; + *pMid1++ = t3[3] + t4[3]; // col 1 + + t4[0] = t4[0] - t3[0]; + t4[1] = t4[1] - t3[1]; + t4[2] = t4[2] - t3[2]; + t4[3] = t4[3] - t3[3]; // for col 2 + + twR = *tw++; + twI = *tw++; + + // multiply by twiddle factors + m0 = t2[0] * twR; + m1 = t2[1] * twI; + m2 = t2[1] * twR; + m3 = t2[0] * twI; + + // R = R * Tr - I * Ti + *p2++ = m0 + m1; + // I = I * Tr + R * Ti + *p2++ = m2 - m3; + + // use vertical symmetry + // 0.9988 - 0.0491i <==> -0.0491 - 0.9988i + m0 = t4[0] * twI; + m1 = t4[1] * twR; + m2 = t4[1] * twI; + m3 = t4[0] * twR; + + *pMid2++ = m0 - m1; + *pMid2++ = m2 + m3; + + twR = *tw++; + twI = *tw++; + + m0 = t2[2] * twR; + m1 = t2[3] * twI; + m2 = t2[3] * twR; + m3 = t2[2] * twI; + + *p2++ = m0 + m1; + *p2++ = m2 - m3; + + m0 = t4[2] * twI; + m1 = t4[3] * twR; + m2 = t4[3] * twI; + m3 = t4[2] * twR; + + *pMid2++ = m0 - m1; + *pMid2++ = m2 + m3; + } + + // first col + arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2u); + // second col + arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2u); + +} + +void arm_cfft_radix8by4_f32( arm_cfft_instance_f32 * S, float32_t * p1) +{ + uint32_t L = S->fftLen >> 1; + float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4; + const float32_t *tw2, *tw3, *tw4; + float32_t * p2 = p1 + L; + float32_t * p3 = p2 + L; + float32_t * p4 = p3 + L; + float32_t t2[4], t3[4], t4[4], twR, twI; + float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1; + float32_t m0, m1, m2, m3; + uint32_t l, twMod2, twMod3, twMod4; + + pCol1 = p1; // points to real values by default + pCol2 = p2; + pCol3 = p3; + pCol4 = p4; + pEnd1 = p2 - 1; // points to imaginary values by default + pEnd2 = p3 - 1; + pEnd3 = p4 - 1; + pEnd4 = pEnd3 + L; + + tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle; + + L >>= 1; + + // do four dot Fourier transform + + twMod2 = 2; + twMod3 = 4; + twMod4 = 6; + + // TOP + p1ap3_0 = p1[0] + p3[0]; + p1sp3_0 = p1[0] - p3[0]; + p1ap3_1 = p1[1] + p3[1]; + p1sp3_1 = p1[1] - p3[1]; + + // col 2 + t2[0] = p1sp3_0 + p2[1] - p4[1]; + t2[1] = p1sp3_1 - p2[0] + p4[0]; + // col 3 + t3[0] = p1ap3_0 - p2[0] - p4[0]; + t3[1] = p1ap3_1 - p2[1] - p4[1]; + // col 4 + t4[0] = p1sp3_0 - p2[1] + p4[1]; + t4[1] = p1sp3_1 + p2[0] - p4[0]; + // col 1 + *p1++ = p1ap3_0 + p2[0] + p4[0]; + *p1++ = p1ap3_1 + p2[1] + p4[1]; + + // Twiddle factors are ones + *p2++ = t2[0]; + *p2++ = t2[1]; + *p3++ = t3[0]; + *p3++ = t3[1]; + *p4++ = t4[0]; + *p4++ = t4[1]; + + tw2 += twMod2; + tw3 += twMod3; + tw4 += twMod4; + + for (l = (L - 2) >> 1; l > 0; l-- ) + { + + // TOP + p1ap3_0 = p1[0] + p3[0]; + p1sp3_0 = p1[0] - p3[0]; + p1ap3_1 = p1[1] + p3[1]; + p1sp3_1 = p1[1] - p3[1]; + // col 2 + t2[0] = p1sp3_0 + p2[1] - p4[1]; + t2[1] = p1sp3_1 - p2[0] + p4[0]; + // col 3 + t3[0] = p1ap3_0 - p2[0] - p4[0]; + t3[1] = p1ap3_1 - p2[1] - p4[1]; + // col 4 + t4[0] = p1sp3_0 - p2[1] + p4[1]; + t4[1] = p1sp3_1 + p2[0] - p4[0]; + // col 1 - top + *p1++ = p1ap3_0 + p2[0] + p4[0]; + *p1++ = p1ap3_1 + p2[1] + p4[1]; + + // BOTTOM + p1ap3_1 = pEnd1[-1] + pEnd3[-1]; + p1sp3_1 = pEnd1[-1] - pEnd3[-1]; + p1ap3_0 = pEnd1[0] + pEnd3[0]; + p1sp3_0 = pEnd1[0] - pEnd3[0]; + // col 2 + t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1; + t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1]; + // col 3 + t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1]; + t3[3] = p1ap3_0 - pEnd2[0] - pEnd4[0]; + // col 4 + t4[2] = pEnd2[0] - pEnd4[0] - p1sp3_1; + t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0; + // col 1 - Bottom + *pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0]; + *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1]; + + // COL 2 + // read twiddle factors + twR = *tw2++; + twI = *tw2++; + // multiply by twiddle factors + // let Z1 = a + i(b), Z2 = c + i(d) + // => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d) + // Top + m0 = t2[0] * twR; + m1 = t2[1] * twI; + m2 = t2[1] * twR; + m3 = t2[0] * twI; + + *p2++ = m0 + m1; + *p2++ = m2 - m3; + // use vertical symmetry col 2 + // 0.9997 - 0.0245i <==> 0.0245 - 0.9997i + // Bottom + m0 = t2[3] * twI; + m1 = t2[2] * twR; + m2 = t2[2] * twI; + m3 = t2[3] * twR; + + *pEnd2-- = m0 - m1; + *pEnd2-- = m2 + m3; + + // COL 3 + twR = tw3[0]; + twI = tw3[1]; + tw3 += twMod3; + // Top + m0 = t3[0] * twR; + m1 = t3[1] * twI; + m2 = t3[1] * twR; + m3 = t3[0] * twI; + + *p3++ = m0 + m1; + *p3++ = m2 - m3; + // use vertical symmetry col 3 + // 0.9988 - 0.0491i <==> -0.9988 - 0.0491i + // Bottom + m0 = -t3[3] * twR; + m1 = t3[2] * twI; + m2 = t3[2] * twR; + m3 = t3[3] * twI; + + *pEnd3-- = m0 - m1; + *pEnd3-- = m3 - m2; + + // COL 4 + twR = tw4[0]; + twI = tw4[1]; + tw4 += twMod4; + // Top + m0 = t4[0] * twR; + m1 = t4[1] * twI; + m2 = t4[1] * twR; + m3 = t4[0] * twI; + + *p4++ = m0 + m1; + *p4++ = m2 - m3; + // use vertical symmetry col 4 + // 0.9973 - 0.0736i <==> -0.0736 + 0.9973i + // Bottom + m0 = t4[3] * twI; + m1 = t4[2] * twR; + m2 = t4[2] * twI; + m3 = t4[3] * twR; + + *pEnd4-- = m0 - m1; + *pEnd4-- = m2 + m3; + } + + //MIDDLE + // Twiddle factors are + // 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i + p1ap3_0 = p1[0] + p3[0]; + p1sp3_0 = p1[0] - p3[0]; + p1ap3_1 = p1[1] + p3[1]; + p1sp3_1 = p1[1] - p3[1]; + + // col 2 + t2[0] = p1sp3_0 + p2[1] - p4[1]; + t2[1] = p1sp3_1 - p2[0] + p4[0]; + // col 3 + t3[0] = p1ap3_0 - p2[0] - p4[0]; + t3[1] = p1ap3_1 - p2[1] - p4[1]; + // col 4 + t4[0] = p1sp3_0 - p2[1] + p4[1]; + t4[1] = p1sp3_1 + p2[0] - p4[0]; + // col 1 - Top + *p1++ = p1ap3_0 + p2[0] + p4[0]; + *p1++ = p1ap3_1 + p2[1] + p4[1]; + + // COL 2 + twR = tw2[0]; + twI = tw2[1]; + + m0 = t2[0] * twR; + m1 = t2[1] * twI; + m2 = t2[1] * twR; + m3 = t2[0] * twI; + + *p2++ = m0 + m1; + *p2++ = m2 - m3; + // COL 3 + twR = tw3[0]; + twI = tw3[1]; + + m0 = t3[0] * twR; + m1 = t3[1] * twI; + m2 = t3[1] * twR; + m3 = t3[0] * twI; + + *p3++ = m0 + m1; + *p3++ = m2 - m3; + // COL 4 + twR = tw4[0]; + twI = tw4[1]; + + m0 = t4[0] * twR; + m1 = t4[1] * twI; + m2 = t4[1] * twR; + m3 = t4[0] * twI; + + *p4++ = m0 + m1; + *p4++ = m2 - m3; + + // first col + arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4u); + // second col + arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4u); + // third col + arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4u); + // fourth col + arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4u); + +} + +/** +* @addtogroup ComplexFFT +* @{ +*/ + +/** +* @details +* @brief Processing function for the floating-point complex FFT. +* @param[in] *S points to an instance of the floating-point CFFT structure. +* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. +* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. +* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. +* @return none. +*/ + +void arm_cfft_f32( + const arm_cfft_instance_f32 * S, + float32_t * p1, + uint8_t ifftFlag, + uint8_t bitReverseFlag) +{ + + uint32_t L = S->fftLen, l; + float32_t invL, * pSrc; + + if(ifftFlag == 1u) + { + /* Conjugate input data */ + pSrc = p1 + 1; + for(l=0; l<L; l++) { + *pSrc = -*pSrc; + pSrc += 2; + } + } + + switch (L) { + case 16: + case 128: + case 1024: + arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32 *) S, p1); + break; + case 32: + case 256: + case 2048: + arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32 *) S, p1); + break; + case 64: + case 512: + case 4096: + arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1); + break; + } + + if( bitReverseFlag ) + arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable); + + if(ifftFlag == 1u) + { + invL = 1.0f/(float32_t)L; + /* Conjugate and scale output data */ + pSrc = p1; + for(l=0; l<L; l++) { + *pSrc++ *= invL ; + *pSrc = -(*pSrc) * invL; + pSrc++; + } + } +} +