Gonzalo Brusco
/
QuiPAD
Real Time FIR Filter - Distinctive Excellence award winner :)
cr_dsplib_blockfir32.s@0:b3e50e98acac, 2011-08-13 (annotated)
- Committer:
- Gonzakpo
- Date:
- Sat Aug 13 17:35:52 2011 +0000
- Revision:
- 0:b3e50e98acac
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
Gonzakpo | 0:b3e50e98acac | 1 | |
Gonzakpo | 0:b3e50e98acac | 2 | ; Los cuatro argumentos de la funcion llegan por r0, r1, r2 y r3. |
Gonzakpo | 0:b3e50e98acac | 3 | ; r0 = Puntero a las muestras de salida. |
Gonzakpo | 0:b3e50e98acac | 4 | ; r1 = Puntero a las muestras de entrada. |
Gonzakpo | 0:b3e50e98acac | 5 | ; r2 = Puntero a la estructura de los coeficientes. |
Gonzakpo | 0:b3e50e98acac | 6 | ; r3 = Cantidad de muestras de entrada. |
Gonzakpo | 0:b3e50e98acac | 7 | ; r4 = Cantidad de coeficientes. |
Gonzakpo | 0:b3e50e98acac | 8 | ; r3 y r5 = Temporales donde se cargan los coeficientes de entrada (r3 esta sobrecargado) |
Gonzakpo | 0:b3e50e98acac | 9 | ; r6,r7,r8,r9 = Acumuladores. |
Gonzakpo | 0:b3e50e98acac | 10 | ; r10,r11,r12,r14 = Temporales en donde se cargan las muestras de entrada. |
Gonzakpo | 0:b3e50e98acac | 11 | ; r0 = Se utiliza como mascara (r0 esta sobrecargado) |
Gonzakpo | 0:b3e50e98acac | 12 | |
Gonzakpo | 0:b3e50e98acac | 13 | AREA asm_func, CODE, READONLY |
Gonzakpo | 0:b3e50e98acac | 14 | |
Gonzakpo | 0:b3e50e98acac | 15 | EXPORT vF_dspl_blockfir32 |
Gonzakpo | 0:b3e50e98acac | 16 | |
Gonzakpo | 0:b3e50e98acac | 17 | vF_dspl_blockfir32 |
Gonzakpo | 0:b3e50e98acac | 18 | |
Gonzakpo | 0:b3e50e98acac | 19 | push {r4-r12,lr} ; Almacena registros en el stack. |
Gonzakpo | 0:b3e50e98acac | 20 | ldr r4,[r2,#4] ; Levanta en el registro r4 la cantidad de coeficientes. |
Gonzakpo | 0:b3e50e98acac | 21 | ldr r2,[r2] ; Carga en r2 el puntero hacia los coeficientes. |
Gonzakpo | 0:b3e50e98acac | 22 | |
Gonzakpo | 0:b3e50e98acac | 23 | ; Le resto la cantidad de coeficientes a r3 para evitar que genere el transitorio final. |
Gonzakpo | 0:b3e50e98acac | 24 | ; r3 = r3 - r4 + 4 |
Gonzakpo | 0:b3e50e98acac | 25 | ; Recordar que las ultimas tres muestras de salida tienen que ser descartadas. |
Gonzakpo | 0:b3e50e98acac | 26 | |
Gonzakpo | 0:b3e50e98acac | 27 | sub r3,r3,r4 |
Gonzakpo | 0:b3e50e98acac | 28 | add r3,r3,#4 |
Gonzakpo | 0:b3e50e98acac | 29 | |
Gonzakpo | 0:b3e50e98acac | 30 | LoopSample |
Gonzakpo | 0:b3e50e98acac | 31 | mov r6,#0 ; Limpia los acumuladores (0 a 3) |
Gonzakpo | 0:b3e50e98acac | 32 | mov r7,r6 |
Gonzakpo | 0:b3e50e98acac | 33 | mov r8,r6 |
Gonzakpo | 0:b3e50e98acac | 34 | mov r9,r6 |
Gonzakpo | 0:b3e50e98acac | 35 | |
Gonzakpo | 0:b3e50e98acac | 36 | push {r0, r3, r4} ; Guarda r3, r4 y r0 (son registros) en el stack. |
Gonzakpo | 0:b3e50e98acac | 37 | |
Gonzakpo | 0:b3e50e98acac | 38 | ldr r0,=0x80000000 |
Gonzakpo | 0:b3e50e98acac | 39 | |
Gonzakpo | 0:b3e50e98acac | 40 | ldmia r1!,{r10,r11,r12,r14} ; Carga 4 muestras en r10, r11, r12 y r14. r1 queda apuntando a la proxima muestra. |
Gonzakpo | 0:b3e50e98acac | 41 | |
Gonzakpo | 0:b3e50e98acac | 42 | ; Desplazo hacia la izquierda lo mas posible y complemento el bit MSB. |
Gonzakpo | 0:b3e50e98acac | 43 | eor r10,r0,r10,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 44 | eor r11,r0,r11,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 45 | eor r12,r0,r12,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 46 | eor r14,r0,r14,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 47 | |
Gonzakpo | 0:b3e50e98acac | 48 | ; Desplazo aritmeticamente lo mas posible hacia la derecha. |
Gonzakpo | 0:b3e50e98acac | 49 | mov r10,r10,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 50 | mov r11,r11,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 51 | mov r12,r12,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 52 | mov r14,r14,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 53 | |
Gonzakpo | 0:b3e50e98acac | 54 | LoopTaps |
Gonzakpo | 0:b3e50e98acac | 55 | ldmia r2!,{r3,r5} ;Carga 2 coeficientes en r3 y r5. r2 queda apuntando al proximo coeficiente. |
Gonzakpo | 0:b3e50e98acac | 56 | |
Gonzakpo | 0:b3e50e98acac | 57 | ; mla = multiply and accumulate |
Gonzakpo | 0:b3e50e98acac | 58 | ; Multiplica un coeficiente por las cuatro muestras leidas y acumula. |
Gonzakpo | 0:b3e50e98acac | 59 | mla r6,r10,r3,r6 ; r6 = (r6 + (r10 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 60 | mla r7,r11,r3,r7 ; r7 = (r7 + (r11 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 61 | mla r8,r12,r3,r8 ; r8 = (r8 + (r12 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 62 | mla r9,r14,r3,r9 ; r9 = (r9 + (r14 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 63 | |
Gonzakpo | 0:b3e50e98acac | 64 | ldr r10,[r1],#4 ; Se carga en r10 lo apuntado por r1. Luego se incrementa r1 en 4 (r1=r1+4) |
Gonzakpo | 0:b3e50e98acac | 65 | |
Gonzakpo | 0:b3e50e98acac | 66 | ; Enmascaro, complemento el MSB y hago extension de signo. |
Gonzakpo | 0:b3e50e98acac | 67 | eor r10,r0,r10,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 68 | mov r10,r10,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 69 | |
Gonzakpo | 0:b3e50e98acac | 70 | ldr r3,[r2],#4 ; Se carga en r3 lo apuntado por r2. Luego se incrementa r1 en 4 (r2=r2+4) |
Gonzakpo | 0:b3e50e98acac | 71 | |
Gonzakpo | 0:b3e50e98acac | 72 | mla r6,r11,r5,r6 ; r6 = (r6 + (r11 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 73 | mla r7,r12,r5,r7 ; r7 = (r7 + (r12 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 74 | mla r8,r14,r5,r8 ; r8 = (r8 + (r14 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 75 | mla r9,r10,r5,r9 ; r9 = (r9 + (r10 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 76 | |
Gonzakpo | 0:b3e50e98acac | 77 | ldr r11,[r1],#4 ; Se carga en r11 lo apuntado por r1. Luego se incrementa r1 en 4 (r1=r1+4) |
Gonzakpo | 0:b3e50e98acac | 78 | |
Gonzakpo | 0:b3e50e98acac | 79 | ; Enmascaro, complemento el MSB y hago extension de signo. |
Gonzakpo | 0:b3e50e98acac | 80 | eor r11,r0,r11,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 81 | mov r11,r11,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 82 | |
Gonzakpo | 0:b3e50e98acac | 83 | ldr r5,[r2],#4 ; Se carga en r5 lo apuntado por r2. Luego se incrementa r2 en 4 (r2=r2+4) |
Gonzakpo | 0:b3e50e98acac | 84 | ; Supongo que como se trabaja con la memoria alineada de a bytes, incrementar en 4 significa |
Gonzakpo | 0:b3e50e98acac | 85 | ; pararse en la proxima muestra (las muestras son de 32 bits). |
Gonzakpo | 0:b3e50e98acac | 86 | |
Gonzakpo | 0:b3e50e98acac | 87 | mla r6,r12,r3,r6 ; r6 = (r6 + (r12 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 88 | mla r7,r14,r3,r7 ; r7 = (r7 + (r14 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 89 | mla r8,r10,r3,r8 ; r8 = (r8 + (r10 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 90 | mla r9,r11,r3,r9 ; r9 = (r9 + (r11 * r3))[31:0] |
Gonzakpo | 0:b3e50e98acac | 91 | |
Gonzakpo | 0:b3e50e98acac | 92 | ldr r12,[r1],#4 ; Se carga en r12 lo apuntado por r1. Luego se incrementa r1 en 4 (r1=r1+4) |
Gonzakpo | 0:b3e50e98acac | 93 | |
Gonzakpo | 0:b3e50e98acac | 94 | ; Enmascaro, complemento el MSB y hago extension de signo. |
Gonzakpo | 0:b3e50e98acac | 95 | eor r12,r0,r12,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 96 | mov r12,r12,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 97 | |
Gonzakpo | 0:b3e50e98acac | 98 | subs r4,r4,#4 ;Este resta se puso aqu� para evitar que se forme una burbuja antes del branch. |
Gonzakpo | 0:b3e50e98acac | 99 | ;r4 = r4 - 4 |
Gonzakpo | 0:b3e50e98acac | 100 | |
Gonzakpo | 0:b3e50e98acac | 101 | mla r6,r14,r5,r6 ; r6 = (r6 + (r14 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 102 | mla r7,r10,r5,r7 ; r7 = (r7 + (r10 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 103 | mla r8,r11,r5,r8 ; r8 = (r8 + (r11 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 104 | mla r9,r12,r5,r9 ; r9 = (r9 + (r12 * r5))[31:0] |
Gonzakpo | 0:b3e50e98acac | 105 | |
Gonzakpo | 0:b3e50e98acac | 106 | ldr r14,[r1],#4 ; Se carga en r14 lo apuntado por r1. Luego se incrementa r1 en 4 (r1=r1+4) |
Gonzakpo | 0:b3e50e98acac | 107 | |
Gonzakpo | 0:b3e50e98acac | 108 | ; Enmascaro, complemento el MSB y hago extension de signo. |
Gonzakpo | 0:b3e50e98acac | 109 | eor r14,r0,r14,LSL#16 |
Gonzakpo | 0:b3e50e98acac | 110 | mov r14,r14,ASR#22 |
Gonzakpo | 0:b3e50e98acac | 111 | |
Gonzakpo | 0:b3e50e98acac | 112 | bne LoopTaps ; Mira si se terminaron los coeficientes. |
Gonzakpo | 0:b3e50e98acac | 113 | |
Gonzakpo | 0:b3e50e98acac | 114 | ; Enmascaro y complemento el MSB (lo vuelvo a formato DAC). |
Gonzakpo | 0:b3e50e98acac | 115 | ldr r0,=0xFFC0 |
Gonzakpo | 0:b3e50e98acac | 116 | and r6,r0,r6,LSR#16 |
Gonzakpo | 0:b3e50e98acac | 117 | and r7,r0,r7,LSR#16 |
Gonzakpo | 0:b3e50e98acac | 118 | and r8,r0,r8,LSR#16 |
Gonzakpo | 0:b3e50e98acac | 119 | and r9,r0,r9,LSR#16 |
Gonzakpo | 0:b3e50e98acac | 120 | eor r6,r6,#0x8000 |
Gonzakpo | 0:b3e50e98acac | 121 | eor r7,r7,#0x8000 |
Gonzakpo | 0:b3e50e98acac | 122 | eor r8,r8,#0x8000 |
Gonzakpo | 0:b3e50e98acac | 123 | eor r9,r9,#0x8000 |
Gonzakpo | 0:b3e50e98acac | 124 | |
Gonzakpo | 0:b3e50e98acac | 125 | pop {r0, r3, r4} ; Recupera r3, r4 y r0 desde el stack. |
Gonzakpo | 0:b3e50e98acac | 126 | |
Gonzakpo | 0:b3e50e98acac | 127 | ; Store the outputs |
Gonzakpo | 0:b3e50e98acac | 128 | stmia r0!,{r6,r7,r8,r9} ; Guarda r6-4 en r0. r0 queda apuntando al proximo valor. |
Gonzakpo | 0:b3e50e98acac | 129 | |
Gonzakpo | 0:b3e50e98acac | 130 | subs r3,r3,#4 ; hoisted up to avoid pipe delay before branch |
Gonzakpo | 0:b3e50e98acac | 131 | ; r3 = r3 - 4 |
Gonzakpo | 0:b3e50e98acac | 132 | |
Gonzakpo | 0:b3e50e98acac | 133 | ; Reset Coeffs to start |
Gonzakpo | 0:b3e50e98acac | 134 | sub r2,r2,r4,LSL#2 ; r2 = r2 - (r4 * 4) |
Gonzakpo | 0:b3e50e98acac | 135 | |
Gonzakpo | 0:b3e50e98acac | 136 | ; Walk along the input data by 4 samples each LoopSample iteration |
Gonzakpo | 0:b3e50e98acac | 137 | sub r1,r1,r4,LSL#2 ; r1 = r1 - (r4 * 4) |
Gonzakpo | 0:b3e50e98acac | 138 | |
Gonzakpo | 0:b3e50e98acac | 139 | bne LoopSample ; Se terminaron las muestras de entrada? |
Gonzakpo | 0:b3e50e98acac | 140 | |
Gonzakpo | 0:b3e50e98acac | 141 | pop {r4-r12,lr} ; Restauro registros a su estado inicial. |
Gonzakpo | 0:b3e50e98acac | 142 | bx lr ; Return |
Gonzakpo | 0:b3e50e98acac | 143 | |
Gonzakpo | 0:b3e50e98acac | 144 | end |