огшор

Dependencies:   CMSIS_DSP_401 mbed

Fork of fir_f32 by Nikolay Sergeev

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cfft_f32.c Source File

arm_cfft_f32.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013  
00005 * $Revision:    V1.4.1  
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_cfft_f32.c   
00009 *    
00010 * Description:  Combined Radix Decimation in Frequency CFFT Floating point processing function
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE.   
00039 * -------------------------------------------------------------------- */
00040 
00041 
00042 #include "arm_math.h"
00043 #include "arm_common_tables.h"
00044 
00045 extern void arm_radix8_butterfly_f32(
00046   float32_t * pSrc,
00047   uint16_t fftLen,
00048   const float32_t * pCoef,
00049   uint16_t twidCoefModifier);
00050 
00051 
00052 void arm_bitreversal_32(
00053         uint32_t * pSrc,
00054         const uint16_t bitRevLen,
00055         const uint16_t * pBitRevTable){
00056             float32_t pSrc1[1024];
00057             for (int i =0; i<bitRevLen; i++)
00058             {
00059                 pSrc1[i]=(float32_t)pSrc[i]/4095;
00060             }
00061                 arm_bitreversal_f32(pSrc1, bitRevLen, 1, pBitRevTable);
00062             }
00063 
00064 /**   
00065 * @ingroup groupTransforms   
00066 */
00067 
00068 /**   
00069 * @defgroup ComplexFFT Complex FFT Functions   
00070 *   
00071 * \par
00072 * The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
00073 * Discrete Fourier Transform (DFT).  The FFT can be orders of magnitude faster
00074 * than the DFT, especially for long lengths.
00075 * The algorithms described in this section
00076 * operate on complex data.  A separate set of functions is devoted to handling
00077 * of real sequences.
00078 * \par
00079 * There are separate algorithms for handling floating-point, Q15, and Q31 data
00080 * types.  The algorithms available for each data type are described next.
00081 * \par
00082 * The FFT functions operate in-place.  That is, the array holding the input data
00083 * will also be used to hold the corresponding result.  The input data is complex
00084 * and contains <code>2*fftLen</code> interleaved values as shown below.
00085 * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
00086 * The FFT result will be contained in the same array and the frequency domain
00087 * values will have the same interleaving.
00088 *
00089 * \par Floating-point
00090 * The floating-point complex FFT uses a mixed-radix algorithm.  Multiple radix-8
00091 * stages are performed along with a single radix-2 or radix-4 stage, as needed.
00092 * The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
00093 * a different twiddle factor table.  
00094 * \par
00095 * The function uses the standard FFT definition and output values may grow by a
00096 * factor of <code>fftLen</code> when computing the forward transform.  The
00097 * inverse transform includes a scale of <code>1/fftLen</code> as part of the
00098 * calculation and this matches the textbook definition of the inverse FFT.
00099 * \par
00100 * Preinitialized data structures containing twiddle factors and bit reversal
00101 * tables are provided and defined in <code>arm_const_structs.h</code>.  Include 
00102 * this header in your function and then pass one of the constant structures as 
00103 * an argument to arm_cfft_f32.  For example:
00104 * \par
00105 * <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
00106 * \par
00107 * computes a 64-point inverse complex FFT including bit reversal.
00108 * The data structures are treated as constant data and not modified during the
00109 * calculation.  The same data structure can be reused for multiple transforms
00110 * including mixing forward and inverse transforms.
00111 * \par
00112 * Earlier releases of the library provided separate radix-2 and radix-4
00113 * algorithms that operated on floating-point data.  These functions are still
00114 * provided but are deprecated.  The older functions are slower and less general
00115 * than the new functions.
00116 * \par
00117 * An example of initialization of the constants for the arm_cfft_f32 function follows:
00118 * \par
00119 * const static arm_cfft_instance_f32 *S;
00120 * ...
00121 *       switch (length) {
00122 *           case 16:
00123 *               S = & arm_cfft_sR_f32_len16;
00124 *           break;
00125 *           case 32:
00126 *               S = & arm_cfft_sR_f32_len32;
00127 *           break;
00128 *           case 64:
00129 *               S = & arm_cfft_sR_f32_len64;
00130 *           break;
00131 *           case 128:
00132 *               S = & arm_cfft_sR_f32_len128;
00133 *           break;
00134 *           case 256:
00135 *               S = & arm_cfft_sR_f32_len256;
00136 *           break;
00137 *           case 512:
00138 *               S = & arm_cfft_sR_f32_len512;
00139 *           break;
00140 *           case 1024:
00141 *               S = & arm_cfft_sR_f32_len1024;
00142 *           break;
00143 *           case 2048:
00144 *               S = & arm_cfft_sR_f32_len2048;
00145 *           break;
00146 *           case 4096:
00147 *               S = & arm_cfft_sR_f32_len4096;
00148 *           break;
00149 *           }
00150 * \par Q15 and Q31
00151 * The library provides radix-2 and radix-4 FFT algorithms for fixed-point data.  The
00152 * radix-2 algorithm supports lengths of [16, 32, 64, ..., 4096].  The radix-4
00153 * algorithm supports lengths of [16, 64, 256, ..., 4096].  When possible, you
00154 * should use the radix-4 algorithm since it is faster than the radix-2 of the
00155 * same length.
00156 * \par
00157 * The forward FFTs include scaling in order to prevent results from overflowing.
00158 * Intermediate results are scaled down during each butterfly stage.  In the
00159 * radix-2 algorithm, a scale of 0.5 is applied during each butterfly.  In the
00160 * radix-4 algorithm, a scale of 0.25 is applied.  The scaling applies to both
00161 * the forward and the inverse FFTs.  Thus the forward FFT contains an additional
00162 * scale factor of <code>1/fftLen</code> as compared to the standard textbook
00163 * definition of the FFT.  The inverse FFT also scales down during each butterfly
00164 * stage and this corresponds to the standard textbook definition.
00165 * \par
00166 * A separate instance structure must be defined for each transform used but
00167 * twiddle factor and bit reversal tables can be reused.
00168 * \par 
00169 * There is also an associated initialization function for each data type.   
00170 * The initialization function performs the following operations:   
00171 * - Sets the values of the internal structure fields.   
00172 * - Initializes twiddle factor table and bit reversal table pointers.
00173 * \par   
00174 * Use of the initialization function is optional.   
00175 * However, if the initialization function is used, then the instance structure 
00176 * cannot be placed into a const data section. To place an instance structure 
00177 * into a const data section, the instance structure should be manually 
00178 * initialized as follows:
00179 * <pre>   
00180 *arm_cfft_radix2_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};   
00181 *arm_cfft_radix2_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};   
00182 *arm_cfft_radix4_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};    
00183 *arm_cfft_radix4_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};    
00184 *arm_cfft_instance_f32 S = {fftLen, pTwiddle, pBitRevTable, bitRevLength};
00185 * </pre>   
00186 * \par   
00187 * where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for
00188 * selection of forward or inverse transform.  When ifftFlag is set the inverse
00189 * transform is calculated.
00190 * <code>bitReverseFlag</code> Flag for selection of output order (Set bitReverseFlag to output in normal order otherwise output in bit reversed order);    
00191 * <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the bit reversal table.   
00192 * <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;    
00193 * <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.   
00194 * <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;
00195 * \par
00196 * The Q15 and Q31 FFT functions use a large bit reversal and twiddle factor
00197 * table.  The tables are defined for the maximum length transform and a subset
00198 * of the coefficients are used in shorter transforms.
00199 * 
00200 */
00201 
00202 void arm_cfft_radix8by2_f32( arm_cfft_instance_f32 * S, float32_t * p1) 
00203 {
00204    uint32_t    L  = S->fftLen;
00205    float32_t * pCol1, * pCol2, * pMid1, * pMid2;
00206    float32_t * p2 = p1 + L;
00207    const float32_t * tw = (float32_t *) S->pTwiddle;
00208    float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
00209    float32_t m0, m1, m2, m3;
00210    uint32_t l;
00211 
00212    pCol1 = p1;
00213    pCol2 = p2;
00214 
00215    //    Define new length
00216    L >>= 1;
00217    //    Initialize mid pointers
00218    pMid1 = p1 + L;
00219    pMid2 = p2 + L;
00220 
00221    // do two dot Fourier transform
00222    for ( l = L >> 2; l > 0; l-- ) 
00223    {
00224       t1[0] = p1[0];
00225       t1[1] = p1[1];
00226       t1[2] = p1[2];
00227       t1[3] = p1[3];
00228 
00229       t2[0] = p2[0];
00230       t2[1] = p2[1];
00231       t2[2] = p2[2];
00232       t2[3] = p2[3];
00233 
00234       t3[0] = pMid1[0];
00235       t3[1] = pMid1[1];
00236       t3[2] = pMid1[2];
00237       t3[3] = pMid1[3];
00238 
00239       t4[0] = pMid2[0];
00240       t4[1] = pMid2[1];
00241       t4[2] = pMid2[2];
00242       t4[3] = pMid2[3];
00243 
00244       *p1++ = t1[0] + t2[0];
00245       *p1++ = t1[1] + t2[1];
00246       *p1++ = t1[2] + t2[2];
00247       *p1++ = t1[3] + t2[3];    // col 1
00248 
00249       t2[0] = t1[0] - t2[0];
00250       t2[1] = t1[1] - t2[1];
00251       t2[2] = t1[2] - t2[2];
00252       t2[3] = t1[3] - t2[3];    // for col 2
00253 
00254       *pMid1++ = t3[0] + t4[0];
00255       *pMid1++ = t3[1] + t4[1];
00256       *pMid1++ = t3[2] + t4[2];
00257       *pMid1++ = t3[3] + t4[3]; // col 1
00258 
00259       t4[0] = t4[0] - t3[0];
00260       t4[1] = t4[1] - t3[1];
00261       t4[2] = t4[2] - t3[2];
00262       t4[3] = t4[3] - t3[3];    // for col 2
00263 
00264       twR = *tw++;
00265       twI = *tw++;
00266 
00267       // multiply by twiddle factors
00268       m0 = t2[0] * twR;
00269       m1 = t2[1] * twI;
00270       m2 = t2[1] * twR;
00271       m3 = t2[0] * twI;
00272       
00273       // R  =  R  *  Tr - I * Ti
00274       *p2++ = m0 + m1;
00275       // I  =  I  *  Tr + R * Ti
00276       *p2++ = m2 - m3;
00277       
00278       // use vertical symmetry
00279       //  0.9988 - 0.0491i <==> -0.0491 - 0.9988i
00280       m0 = t4[0] * twI;
00281       m1 = t4[1] * twR;
00282       m2 = t4[1] * twI;
00283       m3 = t4[0] * twR;
00284       
00285       *pMid2++ = m0 - m1;
00286       *pMid2++ = m2 + m3;
00287 
00288       twR = *tw++;
00289       twI = *tw++;
00290       
00291       m0 = t2[2] * twR;
00292       m1 = t2[3] * twI;
00293       m2 = t2[3] * twR;
00294       m3 = t2[2] * twI;
00295       
00296       *p2++ = m0 + m1;
00297       *p2++ = m2 - m3;
00298          
00299       m0 = t4[2] * twI;
00300       m1 = t4[3] * twR;
00301       m2 = t4[3] * twI;
00302       m3 = t4[2] * twR;
00303       
00304       *pMid2++ = m0 - m1;
00305       *pMid2++ = m2 + m3;
00306    }
00307 
00308    // first col
00309    arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2u);
00310    // second col
00311    arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2u);
00312    
00313 }
00314 
00315 void arm_cfft_radix8by4_f32( arm_cfft_instance_f32 * S, float32_t * p1) 
00316 {
00317    uint32_t    L  = S->fftLen >> 1;
00318    float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
00319     const float32_t *tw2, *tw3, *tw4;
00320    float32_t * p2 = p1 + L;
00321    float32_t * p3 = p2 + L;
00322    float32_t * p4 = p3 + L;
00323    float32_t t2[4], t3[4], t4[4], twR, twI;
00324    float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
00325    float32_t m0, m1, m2, m3;
00326    uint32_t l, twMod2, twMod3, twMod4;
00327 
00328    pCol1 = p1;         // points to real values by default
00329    pCol2 = p2;
00330    pCol3 = p3;
00331    pCol4 = p4;
00332    pEnd1 = p2 - 1;     // points to imaginary values by default
00333    pEnd2 = p3 - 1;
00334    pEnd3 = p4 - 1;
00335    pEnd4 = pEnd3 + L;
00336    
00337    tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
00338    
00339    L >>= 1;
00340 
00341    // do four dot Fourier transform
00342 
00343    twMod2 = 2;
00344    twMod3 = 4;
00345    twMod4 = 6;
00346 
00347    // TOP
00348    p1ap3_0 = p1[0] + p3[0];
00349    p1sp3_0 = p1[0] - p3[0];
00350    p1ap3_1 = p1[1] + p3[1];
00351    p1sp3_1 = p1[1] - p3[1];
00352 
00353    // col 2
00354    t2[0] = p1sp3_0 + p2[1] - p4[1];
00355    t2[1] = p1sp3_1 - p2[0] + p4[0];
00356    // col 3
00357    t3[0] = p1ap3_0 - p2[0] - p4[0];
00358    t3[1] = p1ap3_1 - p2[1] - p4[1];
00359    // col 4
00360    t4[0] = p1sp3_0 - p2[1] + p4[1];
00361    t4[1] = p1sp3_1 + p2[0] - p4[0];
00362    // col 1
00363    *p1++ = p1ap3_0 + p2[0] + p4[0];
00364    *p1++ = p1ap3_1 + p2[1] + p4[1];
00365 
00366    // Twiddle factors are ones
00367    *p2++ = t2[0];
00368    *p2++ = t2[1];
00369    *p3++ = t3[0];
00370    *p3++ = t3[1];
00371    *p4++ = t4[0];
00372    *p4++ = t4[1];
00373    
00374    tw2 += twMod2;
00375    tw3 += twMod3;
00376    tw4 += twMod4;
00377    
00378    for (l = (L - 2) >> 1; l > 0; l-- ) 
00379    {
00380 
00381       // TOP
00382       p1ap3_0 = p1[0] + p3[0];
00383       p1sp3_0 = p1[0] - p3[0];
00384       p1ap3_1 = p1[1] + p3[1];
00385       p1sp3_1 = p1[1] - p3[1];
00386       // col 2
00387       t2[0] = p1sp3_0 + p2[1] - p4[1];
00388       t2[1] = p1sp3_1 - p2[0] + p4[0];
00389       // col 3
00390       t3[0] = p1ap3_0 - p2[0] - p4[0];
00391       t3[1] = p1ap3_1 - p2[1] - p4[1];
00392       // col 4
00393       t4[0] = p1sp3_0 - p2[1] + p4[1];
00394       t4[1] = p1sp3_1 + p2[0] - p4[0];
00395       // col 1 - top
00396       *p1++ = p1ap3_0 + p2[0] + p4[0];
00397       *p1++ = p1ap3_1 + p2[1] + p4[1];
00398 
00399       // BOTTOM
00400       p1ap3_1 = pEnd1[-1] + pEnd3[-1];
00401       p1sp3_1 = pEnd1[-1] - pEnd3[-1];
00402       p1ap3_0 = pEnd1[0] + pEnd3[0];
00403       p1sp3_0 = pEnd1[0] - pEnd3[0];
00404       // col 2
00405       t2[2] = pEnd2[0]  - pEnd4[0] + p1sp3_1;
00406       t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
00407       // col 3
00408       t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
00409       t3[3] = p1ap3_0 - pEnd2[0]  - pEnd4[0];
00410       // col 4
00411       t4[2] = pEnd2[0]  - pEnd4[0]  - p1sp3_1;
00412       t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
00413       // col 1 - Bottom
00414       *pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0];
00415       *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
00416 
00417       // COL 2
00418       // read twiddle factors
00419       twR = *tw2++;
00420       twI = *tw2++;
00421       // multiply by twiddle factors
00422       //  let    Z1 = a + i(b),   Z2 = c + i(d)
00423       //   =>  Z1 * Z2  =  (a*c - b*d) + i(b*c + a*d)
00424       // Top
00425       m0 = t2[0] * twR;
00426       m1 = t2[1] * twI;
00427       m2 = t2[1] * twR;
00428       m3 = t2[0] * twI;
00429       
00430       *p2++ = m0 + m1;
00431       *p2++ = m2 - m3;
00432       // use vertical symmetry col 2
00433       // 0.9997 - 0.0245i  <==>  0.0245 - 0.9997i
00434       // Bottom
00435       m0 = t2[3] * twI;
00436       m1 = t2[2] * twR;
00437       m2 = t2[2] * twI;
00438       m3 = t2[3] * twR;
00439       
00440       *pEnd2-- = m0 - m1;
00441       *pEnd2-- = m2 + m3;
00442 
00443       // COL 3
00444       twR = tw3[0];
00445       twI = tw3[1];
00446       tw3 += twMod3;
00447       // Top
00448       m0 = t3[0] * twR;
00449       m1 = t3[1] * twI;
00450       m2 = t3[1] * twR;
00451       m3 = t3[0] * twI;
00452       
00453       *p3++ = m0 + m1;
00454       *p3++ = m2 - m3;
00455       // use vertical symmetry col 3
00456       // 0.9988 - 0.0491i  <==>  -0.9988 - 0.0491i
00457       // Bottom
00458       m0 = -t3[3] * twR;
00459       m1 = t3[2] * twI;
00460       m2 = t3[2] * twR;
00461       m3 = t3[3] * twI;
00462       
00463       *pEnd3-- = m0 - m1;
00464       *pEnd3-- = m3 - m2;
00465       
00466       // COL 4
00467       twR = tw4[0];
00468       twI = tw4[1];
00469       tw4 += twMod4;
00470       // Top
00471       m0 = t4[0] * twR;
00472       m1 = t4[1] * twI;
00473       m2 = t4[1] * twR;
00474       m3 = t4[0] * twI;
00475       
00476       *p4++ = m0 + m1;
00477       *p4++ = m2 - m3;
00478       // use vertical symmetry col 4
00479       // 0.9973 - 0.0736i  <==>  -0.0736 + 0.9973i
00480       // Bottom
00481       m0 = t4[3] * twI;
00482       m1 = t4[2] * twR;
00483       m2 = t4[2] * twI;
00484       m3 = t4[3] * twR;
00485       
00486       *pEnd4-- = m0 - m1;
00487       *pEnd4-- = m2 + m3;
00488    }
00489 
00490    //MIDDLE
00491    // Twiddle factors are 
00492    //  1.0000  0.7071-0.7071i  -1.0000i  -0.7071-0.7071i
00493    p1ap3_0 = p1[0] + p3[0];
00494    p1sp3_0 = p1[0] - p3[0];
00495    p1ap3_1 = p1[1] + p3[1];
00496    p1sp3_1 = p1[1] - p3[1];
00497 
00498    // col 2
00499    t2[0] = p1sp3_0 + p2[1] - p4[1];
00500    t2[1] = p1sp3_1 - p2[0] + p4[0];
00501    // col 3
00502    t3[0] = p1ap3_0 - p2[0] - p4[0];
00503    t3[1] = p1ap3_1 - p2[1] - p4[1];
00504    // col 4
00505    t4[0] = p1sp3_0 - p2[1] + p4[1];
00506    t4[1] = p1sp3_1 + p2[0] - p4[0];
00507    // col 1 - Top
00508    *p1++ = p1ap3_0 + p2[0] + p4[0];
00509    *p1++ = p1ap3_1 + p2[1] + p4[1];
00510    
00511    // COL 2
00512    twR = tw2[0];
00513    twI = tw2[1];
00514    
00515    m0 = t2[0] * twR;
00516    m1 = t2[1] * twI;
00517    m2 = t2[1] * twR;
00518    m3 = t2[0] * twI;
00519    
00520    *p2++ = m0 + m1;
00521    *p2++ = m2 - m3;
00522       // COL 3
00523    twR = tw3[0];
00524    twI = tw3[1];
00525    
00526    m0 = t3[0] * twR;
00527    m1 = t3[1] * twI;
00528    m2 = t3[1] * twR;
00529    m3 = t3[0] * twI;
00530    
00531    *p3++ = m0 + m1;
00532    *p3++ = m2 - m3;
00533    // COL 4
00534    twR = tw4[0];
00535    twI = tw4[1];
00536    
00537    m0 = t4[0] * twR;
00538    m1 = t4[1] * twI;
00539    m2 = t4[1] * twR;
00540    m3 = t4[0] * twI;
00541    
00542    *p4++ = m0 + m1;
00543    *p4++ = m2 - m3;
00544 
00545    // first col
00546    arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4u);
00547    // second col
00548    arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4u);
00549    // third col
00550    arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4u);
00551    // fourth col
00552    arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4u);
00553 
00554 }
00555 
00556 /**
00557 * @addtogroup ComplexFFT   
00558 * @{   
00559 */
00560 
00561 /**   
00562 * @details   
00563 * @brief       Processing function for the floating-point complex FFT.
00564 * @param[in]      *S    points to an instance of the floating-point CFFT structure.  
00565 * @param[in, out] *p1   points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.  
00566 * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.  
00567 * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.  
00568 * @return none.  
00569 */
00570 
00571 void arm_cfft_f32( 
00572    const arm_cfft_instance_f32 * S, 
00573    float32_t * p1,
00574    uint8_t ifftFlag,
00575    uint8_t bitReverseFlag)
00576 {
00577 
00578    uint32_t  L = S->fftLen, l;
00579    float32_t invL, * pSrc;
00580 
00581   if(ifftFlag == 1u)
00582   {
00583       /*  Conjugate input data  */
00584       pSrc = p1 + 1;
00585       for(l=0; l<L; l++) {
00586           *pSrc = -*pSrc;
00587            pSrc += 2;
00588       }
00589   }
00590 
00591         switch (L) {
00592         case 16: 
00593         case 128:
00594         case 1024:
00595              arm_cfft_radix8by2_f32  ( (arm_cfft_instance_f32 *) S, p1);
00596              break;
00597         case 32:
00598         case 256:
00599         case 2048:
00600              arm_cfft_radix8by4_f32  ( (arm_cfft_instance_f32 *) S, p1);
00601              break;
00602         case 64:
00603         case 512:
00604         case 4096:
00605           arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1);
00606              break;
00607         }  
00608 
00609     if( bitReverseFlag )
00610         arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
00611 
00612   if(ifftFlag == 1u)
00613   {
00614       invL = 1.0f/(float32_t)L;
00615       /*  Conjugate and scale output data */
00616       pSrc = p1;
00617       for(l=0; l<L; l++) {
00618          *pSrc++ *=   invL ;
00619          *pSrc  = -(*pSrc) * invL;
00620                  pSrc++;
00621       }
00622   }
00623 }