mbed port of FFT routines from STM32 DSP library and Ivan Mellen's implementation. Tested on LPC2368 mbed but should work on 1768 too (original code was written for Cortex-M3)
Diff: cr4_fft_256_stm32.s
- Revision:
- 0:90ade34a3b71
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cr4_fft_256_stm32.s Sun Dec 13 07:14:57 2009 +0000 @@ -0,0 +1,395 @@ +;******************** (C) COPYRIGHT 2009 STMicroelectronics ******************** +;* File Name : cr4_fft_256_stm32.s +;* Author : MCD Application Team +;* Version : V2.0.0 +;* Date : 04/27/2009 +;* Description : Optimized 256-point radix-4 complex FFT for Cortex-M3 +;******************************************************************************** +;* THE PRESENT FIRMWARE WHICH IS FOR GUIDANCE ONLY AIMS AT PROVIDING CUSTOMERS +;* WITH CODING INFORMATION REGARDING THEIR PRODUCTS IN ORDER FOR THEM TO SAVE TIME. +;* AS A RESULT, STMICROELECTRONICS SHALL NOT BE HELD LIABLE FOR ANY DIRECT, +;* INDIRECT OR CONSEQUENTIAL DAMAGES WITH RESPECT TO ANY CLAIMS ARISING FROM THE +;* CONTENT OF SUCH SOFTWARE AND/OR THE USE MADE BY CUSTOMERS OF THE CODING +;* INFORMATION CONTAINED HEREIN IN CONNECTION WITH THEIR PRODUCTS. +;*******************************************************************************/ + +; THUMB + REQUIRE8 + PRESERVE8 + + AREA |.text|, CODE, READONLY, ALIGN=2 + + EXPORT cr4_fft_256_stm32 + EXTERN TableFFT + + +pssK RN R0 +pssOUT RN R0 +pssX RN R1 +pssIN RN R1 +butternbr RN R2 +Nbin RN R2 +index RN R3 +Ar RN R4 +Ai RN R5 +Br RN R6 +Bi RN R7 +Cr RN R8 +Ci RN R9 +Dr RN R10 +Di RN R11 +cntrbitrev RN R12 +tmp RN R12 +pssIN2 RN R14 +tmp2 RN R14 + +NPT EQU 256 + +;----------------------------- MACROS ---------------------------------------- + + MACRO + DEC $reg + SUB $reg,$reg,#1 + MEND + + MACRO + INC $reg + ADD $reg,$reg,#1 + MEND + + + MACRO + QUAD $reg + MOV $reg,$reg,LSL#2 + MEND + +;sXi = *(PssX+1); sXr = *PssX; PssX += offset; PssX= R1 + + MACRO + LDR2Q $sXr,$sXi, $PssX, $offset + LDRSH $sXi, [$PssX, #2] + LDRSH $sXr, [$PssX] + ADD $PssX, $PssX, $offset + MEND + +;!! Same macro, to be used when passing negative offset value !! + MACRO + LDR2Qm $sXr, $sXi, $PssX, $offset + LDRSH $sXi, [$PssX, #2] + LDRSH $sXr, [$PssX] + SUB $PssX, $PssX, $offset + MEND + +;(PssX+1)= sXi; *PssX=sXr; PssX += offset; + MACRO + STR2Q $sXr, $sXi, $PssX, $offset + STRH $sXi, [$PssX, #2] + STRH $sXr, [$PssX] + ADD $PssX, $PssX, $offset + MEND + +; YY = Cplx_conjugate_mul(Y,K) +; Y = YYr + i*YYi +; use the following trick +; K = (Kr-Ki) + i*Ki + MACRO + CXMUL_V7 $YYr, $YYi, $Yr, $Yi, $Kr, $Ki,$tmp,$tmp2 + SUB $tmp2, $Yi, $Yr ; sYi-sYr + MUL $tmp, $tmp2, $Ki ; (sYi-sYr)*sKi + ADD $tmp2, $Kr, $Ki, LSL#1 ; (sKr+sKi) + MLA $YYi, $Yi, $Kr, $tmp ; lYYi = sYi*sKr-sYr*sKi + MLA $YYr, $Yr, $tmp2, $tmp ; lYYr = sYr*sKr+sYi*sKi + MEND + +; Four point complex Fast Fourier Transform + MACRO + CXADDA4 $s + ; (C,D) = (C+D, C-D) + ADD Cr, Cr, Dr + ADD Ci, Ci, Di + SUB Dr, Cr, Dr, LSL#1 + SUB Di, Ci, Di, LSL#1 + ; (A,B) = (A+(B>>s), A-(B>>s))/4 + MOV Ar, Ar, ASR#2 + MOV Ai, Ai, ASR#2 + ADD Ar, Ar, Br, ASR#(2+$s) + ADD Ai, Ai, Bi, ASR#(2+$s) + SUB Br, Ar, Br, ASR#(1+$s) + SUB Bi, Ai, Bi, ASR#(1+$s) + ; (A,C) = (A+(C>>s)/4, A-(C>>s)/4) + ADD Ar, Ar, Cr, ASR#(2+$s) + ADD Ai, Ai, Ci, ASR#(2+$s) + SUB Cr, Ar, Cr, ASR#(1+$s) + SUB Ci, Ai, Ci, ASR#(1+$s) + ; (B,D) = (B-i*(D>>s)/4, B+i*(D>>s)/4) + ADD Br, Br, Di, ASR#(2+$s) + SUB Bi, Bi, Dr, ASR#(2+$s) + SUB Di, Br, Di, ASR#(1+$s) + ADD Dr, Bi, Dr, ASR#(1+$s) + MEND + + + MACRO + BUTFLY4ZERO_OPT $pIN,$offset, $pOUT + LDRSH Ai, [$pIN, #2] + LDRSH Ar, [$pIN] + ADD $pIN, #NPT + LDRSH Ci, [$pIN, #2] + LDRSH Cr, [$pIN] + ADD $pIN, #NPT + LDRSH Bi, [$pIN, #2] + LDRSH Br, [$pIN] + ADD $pIN, #NPT + LDRSH Di, [$pIN, #2] + LDRSH Dr, [$pIN] + ADD $pIN, #NPT + + ; (C,D) = (C+D, C-D) + ADD Cr, Cr, Dr + ADD Ci, Ci, Di + SUB Dr, Cr, Dr, LSL#1 ; trick + SUB Di, Ci, Di, LSL#1 ;trick + ; (A,B) = (A+B)/4, (A-B)/4 + MOV Ar, Ar, ASR#2 + MOV Ai, Ai, ASR#2 + ADD Ar, Ar, Br, ASR#2 + ADD Ai, Ai, Bi, ASR#2 + SUB Br, Ar, Br, ASR#1 + SUB Bi, Ai, Bi, ASR#1 + ; (A,C) = (A+C)/4, (A-C)/4 + ADD Ar, Ar, Cr, ASR#2 + ADD Ai, Ai, Ci, ASR#2 + SUB Cr, Ar, Cr, ASR#1 + SUB Ci, Ai, Ci, ASR#1 + ; (B,D) = (B-i*D)/4, (B+i*D)/4 + ADD Br, Br, Di, ASR#2 + SUB Bi, Bi, Dr, ASR#2 + SUB Di, Br, Di, ASR#1 + ADD Dr, Bi, Dr, ASR#1 + ; + STRH Ai, [$pOUT, #2] + STRH Ar, [$pOUT], #4 + STRH Bi, [$pOUT, #2] + STRH Br, [$pOUT], #4 + STRH Ci, [$pOUT, #2] + STRH Cr, [$pOUT], #4 + STRH Dr, [$pOUT, #2] ; inversion here + STRH Di, [$pOUT], #4 + MEND + + MACRO + BUTFLY4_V7 $pssDin,$offset,$pssDout,$qformat,$pssK + LDR2Qm Ar,Ai,$pssDin, $offset;-$offset + LDR2Q Dr,Di,$pssK, #4 + ; format CXMUL_V7 YYr, YYi, Yr, Yi, Kr, Ki,tmp,tmp2 + CXMUL_V7 Dr,Di,Ar,Ai,Dr,Di,tmp,tmp2 + LDR2Qm Ar,Ai,$pssDin,$offset;-$offset + LDR2Q Cr,Ci,$pssK,#4 + CXMUL_V7 Cr,Ci,Ar,Ai,Cr,Ci,tmp,tmp2 + LDR2Qm Ar,Ai, $pssDin, $offset;-$offset + LDR2Q Br,Bi, $pssK, #4 + CXMUL_V7 Br,Bi,Ar,Ai,Br,Bi,tmp,tmp2 + LDR2Q Ar,Ai, $pssDin, #0 + CXADDA4 $qformat + STRH Ai, [$pssDout, #2] + STRH Ar, [$pssDout] + ADD $pssDout, $pssDout, $offset + STRH Bi, [$pssDout, #2] + STRH Br, [$pssDout] + ADD $pssDout, $pssDout, $offset + STRH Ci, [$pssDout, #2] + STRH Cr, [$pssDout] + ADD $pssDout, $pssDout, $offset + STRH Dr, [$pssDout, #2] ; inversion here + STRH Di, [$pssDout], #4 + MEND + +;------------------- CODE -------------------------------- +;=============================================================================== +;******************************************************************************* +;* Function Name : cr4_fft_256_stm32 +;* Description : complex radix-4 256 points FFT +;* Input : - R0 = pssOUT: Output array . +;* - R1 = pssIN: Input array +;* - R2 = Nbin: =256 number of points, this optimized FFT function +;* can only convert 256 points. +;* Output : None +;* Return : None +;******************************************************************************* +cr4_fft_256_stm32 + + STMFD SP!, {R4-R11, LR} + + MOV cntrbitrev, #0 + MOV index,#0 + +preloop_v7 + ADD pssIN2, pssIN, cntrbitrev, LSR#24 ;256-pts + BUTFLY4ZERO_OPT pssIN2,Nbin,pssOUT + INC index + IF :DEF:TARGET_LPC1768 + RBIT cntrbitrev,index + ELSE + ; add 1 to cntrbitrev "backwards" + ; start looking from top, toggling all bits until we hit a 0, + ; which we toggle to 1 and then stop + ; tmp2 = 0x80000000; + ; bit1 = true; + ; while ( bit1 ) { + ; bit1 = (cntrbitrev & tmp2) != 0; + ; cntrbitrev ^= tmp2; + ; if ( bit1 ) tmp2 <= 1; + ; } + MOV tmp2, #(1<<31) +Lrev + TST cntrbitrev, tmp2 ; is the current bit set? + EOR cntrbitrev, tmp2 ; toggle it regardless of result + MOVNE tmp2, tmp2, LSR#1 ; if set, shift mask + BNE Lrev ; and loop again + ENDIF + + CMP index,#64 ;256-pts + BNE preloop_v7 + + + SUB pssX, pssOUT, Nbin, LSL#2 + MOV index, #16 + MOVS butternbr, Nbin, LSR#4 ;dual use of register + +;------------------------------------------------------------------------------ +; The FFT coefficients table can be stored into Flash or RAM. +; The following two lines of code allow selecting the method for coefficients +; storage. +; In the case of choosing coefficients in RAM, you have to: +; 1. Include the file table_fft.h, which is a part of the DSP library, +; in your main file. +; 2. Decomment the line LDR.W pssK, =TableFFT and comment the line +; ADRL pssK, TableFFT_V7 +; 3. Comment all the TableFFT_V7 data. +;------------------------------------------------------------------------------ + ADR pssK, TableFFT_V7 ; Coeff in Flash + ;LDR.W pssK, =TableFFT ; Coeff in RAM + +;................................ +passloop_v7 + STMFD SP!, {pssX,butternbr} + ADD tmp, index, index, LSL#1 + ADD pssX, pssX, tmp + SUB butternbr, butternbr, #1<<16 +;................ +grouploop_v7 + ADD butternbr,butternbr,index,LSL#(16-2) +;....... +butterloop_v7 + BUTFLY4_V7 pssX,index,pssX,14,pssK + SUBS butternbr,butternbr, #1<<16 + BGE butterloop_v7 +;....... + ADD tmp, index, index, LSL#1 + ADD pssX, pssX, tmp + DEC butternbr + MOVS tmp2, butternbr, LSL#16 + IT NE + SUBNE pssK, pssK, tmp + BNE grouploop_v7 +;................ + LDMFD sp!, {pssX, butternbr} + QUAD index + MOVS butternbr, butternbr, LSR#2 ; loop nbr /= radix + BNE passloop_v7 +;................................ + LDMFD SP!, {R4-R11, PC} + +;============================================================================= + +TableFFT_V7 + ;N=16 + DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000 + DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41 + DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000 + DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41 + ; N=64 + DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000 + DCW 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c + DCW 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e + DCW 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e + DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41 + DCW 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537 + DCW 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21 + DCW 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5 + DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000 + DCW 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5 + DCW 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21 + DCW 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537 + DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41 + DCW 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e + DCW 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e + DCW 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c + ; N=256 + DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000 + DCW 0x3b1e,0x04b5, 0x3e69,0x0192, 0x3cc8,0x0324 + DCW 0x35eb,0x0964, 0x3cc8,0x0324, 0x396b,0x0646 + DCW 0x306c,0x0e06, 0x3b1e,0x04b5, 0x35eb,0x0964 + DCW 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c + DCW 0x24ae,0x1709, 0x37af,0x07d6, 0x2e88,0x0f8d + DCW 0x1e7e,0x1b5d, 0x35eb,0x0964, 0x2aaa,0x1294 + DCW 0x1824,0x1f8c, 0x341e,0x0af1, 0x26b3,0x1590 + DCW 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e + DCW 0x0b14,0x2760, 0x306c,0x0e06, 0x1e7e,0x1b5d + DCW 0x0471,0x2afb, 0x2e88,0x0f8d, 0x1a46,0x1e2b + DCW 0xfdc7,0x2e5a, 0x2c9d,0x1112, 0x15fe,0x20e7 + DCW 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e + DCW 0xf087,0x3453, 0x28b2,0x1413, 0x0d48,0x2620 + DCW 0xea02,0x36e5, 0x26b3,0x1590, 0x08df,0x289a + DCW 0xe39c,0x392b, 0x24ae,0x1709, 0x0471,0x2afb + DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41 + DCW 0xd74e,0x3cc5, 0x2093,0x19ef, 0xfb8f,0x2f6c + DCW 0xd178,0x3e15, 0x1e7e,0x1b5d, 0xf721,0x3179 + DCW 0xcbe2,0x3f0f, 0x1c64,0x1cc6, 0xf2b8,0x3368 + DCW 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537 + DCW 0xc197,0x3ffb, 0x1824,0x1f8c, 0xea02,0x36e5 + DCW 0xbcf0,0x3fec, 0x15fe,0x20e7, 0xe5ba,0x3871 + DCW 0xb8a6,0x3f85, 0x13d5,0x223d, 0xe182,0x39db + DCW 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21 + DCW 0xb140,0x3daf, 0x0f79,0x24da, 0xd94d,0x3c42 + DCW 0xae2e,0x3c42, 0x0d48,0x2620, 0xd556,0x3d3f + DCW 0xab8e,0x3a82, 0x0b14,0x2760, 0xd178,0x3e15 + DCW 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5 + DCW 0xa7b1,0x3612, 0x06a9,0x29ce, 0xca15,0x3f4f + DCW 0xa678,0x3368, 0x0471,0x2afb, 0xc695,0x3fb1 + DCW 0xa5bc,0x3076, 0x0239,0x2c21, 0xc338,0x3fec + DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000 + DCW 0xa5bc,0x29ce, 0xfdc7,0x2e5a, 0xbcf0,0x3fec + DCW 0xa678,0x2620, 0xfb8f,0x2f6c, 0xba09,0x3fb1 + DCW 0xa7b1,0x223d, 0xf957,0x3076, 0xb74d,0x3f4f + DCW 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5 + DCW 0xab8e,0x19ef, 0xf4ec,0x3274, 0xb25e,0x3e15 + DCW 0xae2e,0x1590, 0xf2b8,0x3368, 0xb02d,0x3d3f + DCW 0xb140,0x1112, 0xf087,0x3453, 0xae2e,0x3c42 + DCW 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21 + DCW 0xb8a6,0x07d6, 0xec2b,0x3612, 0xaac8,0x39db + DCW 0xbcf0,0x0324, 0xea02,0x36e5, 0xa963,0x3871 + DCW 0xc197,0xfe6e, 0xe7dc,0x37b0, 0xa834,0x36e5 + DCW 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537 + DCW 0xcbe2,0xf50f, 0xe39c,0x392b, 0xa678,0x3368 + DCW 0xd178,0xf073, 0xe182,0x39db, 0xa5ed,0x3179 + DCW 0xd74e,0xebed, 0xdf6d,0x3a82, 0xa599,0x2f6c + DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41 + DCW 0xe39c,0xe33a, 0xdb52,0x3bb6, 0xa599,0x2afb + DCW 0xea02,0xdf19, 0xd94d,0x3c42, 0xa5ed,0x289a + DCW 0xf087,0xdb26, 0xd74e,0x3cc5, 0xa678,0x2620 + DCW 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e + DCW 0xfdc7,0xd3df, 0xd363,0x3daf, 0xa834,0x20e7 + DCW 0x0471,0xd094, 0xd178,0x3e15, 0xa963,0x1e2b + DCW 0x0b14,0xcd8c, 0xcf94,0x3e72, 0xaac8,0x1b5d + DCW 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e + DCW 0x1824,0xc850, 0xcbe2,0x3f0f, 0xae2e,0x1590 + DCW 0x1e7e,0xc625, 0xca15,0x3f4f, 0xb02d,0x1294 + DCW 0x24ae,0xc44a, 0xc851,0x3f85, 0xb25e,0x0f8d + DCW 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c + DCW 0x306c,0xc18e, 0xc4e2,0x3fd4, 0xb74d,0x0964 + DCW 0x35eb,0xc0b1, 0xc338,0x3fec, 0xba09,0x0646 + DCW 0x3b1e,0xc02c, 0xc197,0x3ffb, 0xbcf0,0x0324 + + END +;******************* (C) COPYRIGHT 2009 STMicroelectronics *****END OF FILE****