mbed port of FFT routines from STM32 DSP library and Ivan Mellen's implementation. Tested on LPC2368 mbed but should work on 1768 too (original code was written for Cortex-M3)

Dependencies:   mbed

cr4_fft_64_stm32.s

Committer:
igorsk
Date:
2009-12-13
Revision:
0:90ade34a3b71

File content as of revision 0:90ade34a3b71:

;******************** (C) COPYRIGHT 2009  STMicroelectronics ********************
;* File Name          : cr4_fft_64_stm32.s
;* Author             : MCD Application Team
;* Version            : V2.0.0
;* Date               : 04/27/2009
;* Description        : Optimized 64-point radix-4 complex FFT for Cortex-M3
;********************************************************************************
;* THE PRESENT FIRMWARE WHICH IS FOR GUIDANCE ONLY AIMS AT PROVIDING CUSTOMERS
;* WITH CODING INFORMATION REGARDING THEIR PRODUCTS IN ORDER FOR THEM TO SAVE TIME.
;* AS A RESULT, STMICROELECTRONICS SHALL NOT BE HELD LIABLE FOR ANY DIRECT,
;* INDIRECT OR CONSEQUENTIAL DAMAGES WITH RESPECT TO ANY CLAIMS ARISING FROM THE
;* CONTENT OF SUCH SOFTWARE AND/OR THE USE MADE BY CUSTOMERS OF THE CODING
;* INFORMATION CONTAINED HEREIN IN CONNECTION WITH THEIR PRODUCTS.
;*******************************************************************************/

    ;THUMB
    REQUIRE8
    PRESERVE8

    AREA |.text|, CODE, READONLY, ALIGN=4
                
    EXPORT cr4_fft_64_stm32      
    EXTERN TableFFT


pssK          RN R0
pssOUT        RN R0
pssX          RN R1
pssIN         RN R1
butternbr     RN R2
Nbin          RN R2
index         RN R3
Ar            RN R4
Ai            RN R5
Br            RN R6
Bi            RN R7
Cr            RN R8
Ci            RN R9
Dr            RN R10
Di            RN R11
cntrbitrev     RN R12
tmp           RN R12
pssIN2        RN R14
tmp2          RN R14

NPT EQU 64

;----------------------------- MACROS ----------------------------------------
     
         MACRO
         DEC  $reg
         SUB  $reg,$reg,#1
         MEND

         MACRO
         INC  $reg
         ADD  $reg,$reg,#1
         MEND


          MACRO
         QUAD  $reg
         MOV  $reg,$reg,LSL#2
         MEND

;sXi = *(PssX+1); sXr = *PssX; PssX += offset; PssX= R1

          MACRO
          LDR2Q  $sXr,$sXi, $PssX, $offset
          LDRSH $sXi, [$PssX, #2]
          LDRSH $sXr, [$PssX]
          ADD $PssX, $PssX, $offset
          MEND

;!! Same macro, to be used when passing negative offset value !!
        MACRO
        LDR2Qm  $sXr, $sXi, $PssX, $offset
        LDRSH $sXi, [$PssX, #2]
        LDRSH $sXr, [$PssX]
        SUB $PssX, $PssX, $offset
        MEND

;(PssX+1)= sXi;  *PssX=sXr; PssX += offset;
        MACRO
        STR2Q  $sXr, $sXi, $PssX, $offset
        STRH  $sXi, [$PssX, #2]
        STRH  $sXr, [$PssX]
        ADD $PssX, $PssX, $offset
        MEND

; YY = Cplx_conjugate_mul(Y,K)
;  Y = YYr + i*YYi
; use the following trick
;  K = (Kr-Ki) + i*Ki
        MACRO
        CXMUL_V7  $YYr, $YYi, $Yr, $Yi, $Kr, $Ki,$tmp,$tmp2
        SUB  $tmp2, $Yi, $Yr         ; sYi-sYr
        MUL  $tmp, $tmp2, $Ki        ; (sYi-sYr)*sKi
        ADD  $tmp2, $Kr, $Ki, LSL#1  ; (sKr+sKi)
        MLA  $YYi, $Yi, $Kr, $tmp     ; lYYi = sYi*sKr-sYr*sKi
        MLA  $YYr, $Yr, $tmp2, $tmp   ; lYYr = sYr*sKr+sYi*sKi
        MEND

; Four point complex Fast Fourier Transform        
        MACRO
        CXADDA4  $s
        ; (C,D) = (C+D, C-D)
        ADD     Cr, Cr, Dr
        ADD     Ci, Ci, Di
        SUB     Dr, Cr, Dr, LSL#1
        SUB     Di, Ci, Di, LSL#1
        ; (A,B) = (A+(B>>s), A-(B>>s))/4
        MOV     Ar, Ar, ASR#2
        MOV     Ai, Ai, ASR#2
        ADD     Ar, Ar, Br, ASR#(2+$s)
        ADD     Ai, Ai, Bi, ASR#(2+$s)
        SUB     Br, Ar, Br, ASR#(1+$s)
        SUB     Bi, Ai, Bi, ASR#(1+$s)
        ; (A,C) = (A+(C>>s)/4, A-(C>>s)/4)
        ADD     Ar, Ar, Cr, ASR#(2+$s)
        ADD     Ai, Ai, Ci, ASR#(2+$s)
        SUB     Cr, Ar, Cr, ASR#(1+$s)
        SUB     Ci, Ai, Ci, ASR#(1+$s)
        ; (B,D) = (B-i*(D>>s)/4, B+i*(D>>s)/4)
        ADD     Br, Br, Di, ASR#(2+$s)
        SUB     Bi, Bi, Dr, ASR#(2+$s)
        SUB     Di, Br, Di, ASR#(1+$s)
        ADD     Dr, Bi, Dr, ASR#(1+$s)
        MEND

        
        MACRO
        BUTFLY4ZERO_OPT  $pIN,$offset, $pOUT
        LDRSH Ai, [$pIN, #2]
        LDRSH Ar, [$pIN],#NPT
        LDRSH Ci, [$pIN, #2]
        LDRSH Cr, [$pIN],#NPT
        LDRSH Bi, [$pIN, #2]
        LDRSH Br, [$pIN],#NPT
        LDRSH Di, [$pIN, #2]
        LDRSH Dr, [$pIN],#NPT
        ; (C,D) = (C+D, C-D)
        ADD     Cr, Cr, Dr
        ADD     Ci, Ci, Di
        SUB     Dr, Cr, Dr, LSL#1  ; trick
        SUB     Di, Ci, Di, LSL#1  ;trick
        ; (A,B) = (A+B)/4, (A-B)/4
        MOV     Ar, Ar, ASR#2
        MOV     Ai, Ai, ASR#2
        ADD     Ar, Ar, Br, ASR#2
        ADD     Ai, Ai, Bi, ASR#2
        SUB     Br, Ar, Br, ASR#1
        SUB     Bi, Ai, Bi, ASR#1
        ; (A,C) = (A+C)/4, (A-C)/4
        ADD     Ar, Ar, Cr, ASR#2
        ADD     Ai, Ai, Ci, ASR#2
        SUB     Cr, Ar, Cr, ASR#1
        SUB     Ci, Ai, Ci, ASR#1
        ; (B,D) = (B-i*D)/4, (B+i*D)/4
        ADD     Br, Br, Di, ASR#2
        SUB     Bi, Bi, Dr, ASR#2
        SUB     Di, Br, Di, ASR#1
        ADD     Dr, Bi, Dr, ASR#1
        ;
        STRH    Ai, [$pOUT, #2]
        STRH    Ar, [$pOUT], #4
        STRH    Bi, [$pOUT, #2]
        STRH    Br, [$pOUT], #4
        STRH    Ci, [$pOUT, #2]
        STRH    Cr, [$pOUT], #4
        STRH    Dr, [$pOUT, #2]  ; inversion here
        STRH    Di, [$pOUT], #4
        MEND

        MACRO
        BUTFLY4_V7   $pssDin,$offset,$pssDout,$qformat,$pssK
        LDR2Qm   Ar,Ai,$pssDin, $offset;-$offset
        LDR2Q    Dr,Di,$pssK, #4
        ; format CXMUL_V7 YYr, YYi, Yr, Yi, Kr, Ki,tmp,tmp2
        CXMUL_V7 Dr,Di,Ar,Ai,Dr,Di,tmp,tmp2
        LDR2Qm   Ar,Ai,$pssDin,$offset;-$offset
        LDR2Q    Cr,Ci,$pssK,#4
        CXMUL_V7 Cr,Ci,Ar,Ai,Cr,Ci,tmp,tmp2
        LDR2Qm    Ar,Ai, $pssDin, $offset;-$offset
        LDR2Q    Br,Bi, $pssK, #4
        CXMUL_V7  Br,Bi,Ar,Ai,Br,Bi,tmp,tmp2
        LDR2Q    Ar,Ai, $pssDin, #0
        CXADDA4  $qformat
        STRH    Ai, [$pssDout, #2]
        STRH    Ar, [$pssDout]
        ADD     $pssDout, $pssDout, $offset
        STRH    Bi, [$pssDout, #2]
        STRH    Br, [$pssDout]
        ADD     $pssDout, $pssDout, $offset
        STRH    Ci, [$pssDout, #2]
        STRH    Cr, [$pssDout]
        ADD     $pssDout, $pssDout, $offset
        STRH    Dr, [$pssDout, #2]  ; inversion here
        STRH    Di, [$pssDout], #4
        MEND

;-------------------             CODE             --------------------------------
;===============================================================================
;*******************************************************************************
;* Function Name  : cr4_fft_64_stm32
;* Description    : complex radix-4 64 points FFT
;* Input          : - R0 = pssOUT: Output array .
;*                  - R1 = pssIN: Input array 
;*                  - R2 = Nbin: =64 number of points, this optimized FFT function  
;*                    can only convert 64 points.
;* Output         : None 
;* Return         : None
;*******************************************************************************
cr4_fft_64_stm32

        STMFD   SP!, {R4-R11, LR}
        
        MOV cntrbitrev, #0
        MOV index,#0
preloop_v7
        ADD     pssIN2, pssIN, cntrbitrev, LSR#26 ;64-pts
        BUTFLY4ZERO_OPT pssIN2,Nbin,pssOUT
        INC index
     IF :DEF:TARGET_LPC1768        
        RBIT cntrbitrev,index
     ELSE
        ; since we need only 16 values, use a lookup table
        ADR cntrbitrev, BITREV16
        LDR cntrbitrev, [cntrbitrev, index, LSL #2]
     ENDIF   
        CMP index,#16  ;64-pts
        BNE  preloop_v7


        SUB     pssX, pssOUT, Nbin, LSL#2
        MOV     index, #16
        MOVS    butternbr, Nbin, LSR#4   ;dual use of register 
        
;------------------------------------------------------------------------------
;   The FFT coefficients table can be stored into Flash or RAM. 
;   The following two lines of code allow selecting the method for coefficients 
;   storage. 
;   In the case of choosing coefficients in RAM, you have to:
;   1. Include the file table_fft.h, which is a part of the DSP library, 
;      in your main file.
;   2. Decomment the line LDR.W pssK, =TableFFT and comment the line 
;      ADRL    pssK, TableFFT_V7
;   3. Comment all the TableFFT_V7 data.
;------------------------------------------------------------------------------
        ADR    pssK, TableFFT_V7    ; Coeff in Flash 
        ;LDR.W pssK, =TableFFT      ; Coeff in RAM 

;................................
passloop_v7
        STMFD   SP!, {pssX,butternbr}
        ADD     tmp, index, index, LSL#1
        ADD     pssX, pssX, tmp
        SUB     butternbr, butternbr, #1<<16
;................
grouploop_v7
        ADD     butternbr,butternbr,index,LSL#(16-2)
;.......
butterloop_v7
        BUTFLY4_V7  pssX,index,pssX,14,pssK
        SUBS        butternbr,butternbr, #1<<16
        BGE     butterloop_v7
;.......
        ADD     tmp, index, index, LSL#1
        ADD     pssX, pssX, tmp
        DEC     butternbr
        MOVS    tmp2, butternbr, LSL#16
        IT      NE
        SUBNE   pssK, pssK, tmp
        BNE     grouploop_v7
;................
        LDMFD   sp!, {pssX, butternbr}
        QUAD    index
        MOVS    butternbr, butternbr, LSR#2    ; loop nbr /= radix 
        BNE     passloop_v7
;................................
       LDMFD   SP!, {R4-R11, PC}

;=============================================================================

TableFFT_V7
         ;N=16
        DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
        DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
        DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
        DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
        ; N=64
        DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
        DCW 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c
        DCW 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e
        DCW 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e
        DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
        DCW 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537
        DCW 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21
        DCW 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5
        DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
        DCW 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5
        DCW 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21
        DCW 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537
        DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
        DCW 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e
        DCW 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e
        DCW 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c

; bit reversal table values from 0 to 16
BITREV16
        DCD 2_0000 << 28
        DCD 2_1000 << 28
        DCD 2_0100 << 28
        DCD 2_1100 << 28
        DCD 2_0010 << 28
        DCD 2_1010 << 28
        DCD 2_0110 << 28
        DCD 2_1110 << 28
        DCD 2_0001 << 28
        DCD 2_1001 << 28
        DCD 2_0101 << 28
        DCD 2_1101 << 28
        DCD 2_0011 << 28
        DCD 2_1011 << 28
        DCD 2_0111 << 28
        DCD 2_1111 << 28
        DCD 2_0000 << 28
       END
;******************* (C) COPYRIGHT 2009  STMicroelectronics *****END OF FILE****