change some parameters in the library to meet the needs of the website httpbin.org
Fork of MiniTLS-GPL by
math/sqr/fp_sqr_comba.c@5:95f70ebfe61f, 2015-02-06 (annotated)
- Committer:
- shiyilei
- Date:
- Fri Feb 06 06:17:33 2015 +0000
- Revision:
- 5:95f70ebfe61f
- Parent:
- 0:35aa5be3b78d
change some parameters in the library to meet the needs of httpbin.org
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
MiniTLS | 0:35aa5be3b78d | 1 | /* |
MiniTLS | 0:35aa5be3b78d | 2 | * |
MiniTLS | 0:35aa5be3b78d | 3 | * This project is meant to fill in where LibTomMath |
MiniTLS | 0:35aa5be3b78d | 4 | * falls short. That is speed ;-) |
MiniTLS | 0:35aa5be3b78d | 5 | * |
MiniTLS | 0:35aa5be3b78d | 6 | * This project is public domain and free for all purposes. |
MiniTLS | 0:35aa5be3b78d | 7 | * |
MiniTLS | 0:35aa5be3b78d | 8 | * Tom St Denis, tomstdenis@gmail.com |
MiniTLS | 0:35aa5be3b78d | 9 | */ |
MiniTLS | 0:35aa5be3b78d | 10 | #include <tfm.h> |
MiniTLS | 0:35aa5be3b78d | 11 | |
MiniTLS | 0:35aa5be3b78d | 12 | #if defined(TFM_PRESCOTT) && defined(TFM_SSE2) |
MiniTLS | 0:35aa5be3b78d | 13 | #undef TFM_SSE2 |
MiniTLS | 0:35aa5be3b78d | 14 | #define TFM_X86 |
MiniTLS | 0:35aa5be3b78d | 15 | #endif |
MiniTLS | 0:35aa5be3b78d | 16 | |
MiniTLS | 0:35aa5be3b78d | 17 | #if defined(TFM_X86) |
MiniTLS | 0:35aa5be3b78d | 18 | |
MiniTLS | 0:35aa5be3b78d | 19 | /* x86-32 optimized */ |
MiniTLS | 0:35aa5be3b78d | 20 | |
MiniTLS | 0:35aa5be3b78d | 21 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 22 | |
MiniTLS | 0:35aa5be3b78d | 23 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 24 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 25 | |
MiniTLS | 0:35aa5be3b78d | 26 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 27 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 28 | |
MiniTLS | 0:35aa5be3b78d | 29 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 30 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 31 | |
MiniTLS | 0:35aa5be3b78d | 32 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 33 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 34 | |
MiniTLS | 0:35aa5be3b78d | 35 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 36 | |
MiniTLS | 0:35aa5be3b78d | 37 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 38 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 39 | "movl %6,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 40 | "mull %%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 41 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 42 | "adcl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 43 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 44 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 45 | |
MiniTLS | 0:35aa5be3b78d | 46 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 47 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 48 | "movl %6,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 49 | "mull %7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 50 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 51 | "adcl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 52 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 53 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 54 | "adcl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 55 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 56 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 57 | |
MiniTLS | 0:35aa5be3b78d | 58 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 59 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 60 | "movl %6,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 61 | "mull %7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 62 | "movl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 63 | "movl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 64 | "xorl %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 65 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 66 | |
MiniTLS | 0:35aa5be3b78d | 67 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 68 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 69 | "movl %6,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 70 | "mull %7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 71 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 72 | "adcl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 73 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 74 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 75 | |
MiniTLS | 0:35aa5be3b78d | 76 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 77 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 78 | "addl %6,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 79 | "adcl %7,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 80 | "adcl %8,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 81 | "addl %6,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 82 | "adcl %7,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 83 | "adcl %8,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 84 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 85 | |
MiniTLS | 0:35aa5be3b78d | 86 | #elif defined(TFM_X86_64) |
MiniTLS | 0:35aa5be3b78d | 87 | /* x86-64 optimized */ |
MiniTLS | 0:35aa5be3b78d | 88 | |
MiniTLS | 0:35aa5be3b78d | 89 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 90 | |
MiniTLS | 0:35aa5be3b78d | 91 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 92 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 93 | |
MiniTLS | 0:35aa5be3b78d | 94 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 95 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 96 | |
MiniTLS | 0:35aa5be3b78d | 97 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 98 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 99 | |
MiniTLS | 0:35aa5be3b78d | 100 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 101 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 102 | |
MiniTLS | 0:35aa5be3b78d | 103 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 104 | |
MiniTLS | 0:35aa5be3b78d | 105 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 106 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 107 | "movq %6,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 108 | "mulq %%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 109 | "addq %%rax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 110 | "adcq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 111 | "adcq $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 112 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 113 | |
MiniTLS | 0:35aa5be3b78d | 114 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 115 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 116 | "movq %6,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 117 | "mulq %7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 118 | "addq %%rax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 119 | "adcq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 120 | "adcq $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 121 | "addq %%rax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 122 | "adcq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 123 | "adcq $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 124 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 125 | |
MiniTLS | 0:35aa5be3b78d | 126 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 127 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 128 | "movq %6,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 129 | "mulq %7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 130 | "movq %%rax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 131 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 132 | "xorq %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 133 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 134 | |
MiniTLS | 0:35aa5be3b78d | 135 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 136 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 137 | "movq %6,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 138 | "mulq %7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 139 | "addq %%rax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 140 | "adcq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 141 | "adcq $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 142 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 143 | |
MiniTLS | 0:35aa5be3b78d | 144 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 145 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 146 | "addq %6,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 147 | "adcq %7,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 148 | "adcq %8,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 149 | "addq %6,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 150 | "adcq %7,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 151 | "adcq %8,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 152 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 153 | |
MiniTLS | 0:35aa5be3b78d | 154 | #elif defined(TFM_SSE2) |
MiniTLS | 0:35aa5be3b78d | 155 | |
MiniTLS | 0:35aa5be3b78d | 156 | /* SSE2 Optimized */ |
MiniTLS | 0:35aa5be3b78d | 157 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 158 | |
MiniTLS | 0:35aa5be3b78d | 159 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 160 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 161 | |
MiniTLS | 0:35aa5be3b78d | 162 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 163 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 164 | |
MiniTLS | 0:35aa5be3b78d | 165 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 166 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 167 | |
MiniTLS | 0:35aa5be3b78d | 168 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 169 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 170 | |
MiniTLS | 0:35aa5be3b78d | 171 | #define COMBA_FINI \ |
MiniTLS | 0:35aa5be3b78d | 172 | asm("emms"); |
MiniTLS | 0:35aa5be3b78d | 173 | |
MiniTLS | 0:35aa5be3b78d | 174 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 175 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 176 | "movd %6,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 177 | "pmuludq %%mm0,%%mm0\n\t" \ |
MiniTLS | 0:35aa5be3b78d | 178 | "movd %%mm0,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 179 | "psrlq $32,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 180 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 181 | "movd %%mm0,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 182 | "adcl %%eax,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 183 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 184 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc"); |
MiniTLS | 0:35aa5be3b78d | 185 | |
MiniTLS | 0:35aa5be3b78d | 186 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 187 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 188 | "movd %6,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 189 | "movd %7,%%mm1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 190 | "pmuludq %%mm1,%%mm0\n\t" \ |
MiniTLS | 0:35aa5be3b78d | 191 | "movd %%mm0,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 192 | "psrlq $32,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 193 | "movd %%mm0,%%edx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 194 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 195 | "adcl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 196 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 197 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 198 | "adcl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 199 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 200 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 201 | |
MiniTLS | 0:35aa5be3b78d | 202 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 203 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 204 | "movd %6,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 205 | "movd %7,%%mm1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 206 | "pmuludq %%mm1,%%mm0\n\t" \ |
MiniTLS | 0:35aa5be3b78d | 207 | "movd %%mm0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 208 | "psrlq $32,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 209 | "movd %%mm0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 210 | "xorl %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 211 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j)); |
MiniTLS | 0:35aa5be3b78d | 212 | |
MiniTLS | 0:35aa5be3b78d | 213 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 214 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 215 | "movd %6,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 216 | "movd %7,%%mm1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 217 | "pmuludq %%mm1,%%mm0\n\t" \ |
MiniTLS | 0:35aa5be3b78d | 218 | "movd %%mm0,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 219 | "psrlq $32,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 220 | "movd %%mm0,%%edx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 221 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 222 | "adcl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 223 | "adcl $0,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 224 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc"); |
MiniTLS | 0:35aa5be3b78d | 225 | |
MiniTLS | 0:35aa5be3b78d | 226 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 227 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 228 | "addl %6,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 229 | "adcl %7,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 230 | "adcl %8,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 231 | "addl %6,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 232 | "adcl %7,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 233 | "adcl %8,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 234 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 235 | |
MiniTLS | 0:35aa5be3b78d | 236 | #elif defined(TFM_ARM) |
MiniTLS | 0:35aa5be3b78d | 237 | |
MiniTLS | 0:35aa5be3b78d | 238 | /* ARM code */ |
MiniTLS | 0:35aa5be3b78d | 239 | |
MiniTLS | 0:35aa5be3b78d | 240 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 241 | |
MiniTLS | 0:35aa5be3b78d | 242 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 243 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 244 | |
MiniTLS | 0:35aa5be3b78d | 245 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 246 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 247 | |
MiniTLS | 0:35aa5be3b78d | 248 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 249 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 250 | |
MiniTLS | 0:35aa5be3b78d | 251 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 252 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 253 | |
MiniTLS | 0:35aa5be3b78d | 254 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 255 | |
MiniTLS | 0:35aa5be3b78d | 256 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
MiniTLS | 0:35aa5be3b78d | 257 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 258 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 259 | " UMULL r0,r1,%6,%6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 260 | " ADDS %0,%0,r0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 261 | " ADCS %1,%1,r1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 262 | " ADC %2,%2,#0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 263 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); |
MiniTLS | 0:35aa5be3b78d | 264 | |
MiniTLS | 0:35aa5be3b78d | 265 | /* for squaring some of the terms are doubled... */ |
MiniTLS | 0:35aa5be3b78d | 266 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 267 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 268 | " UMULL r0,r1,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 269 | " ADDS %0,%0,r0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 270 | " ADCS %1,%1,r1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 271 | " ADC %2,%2,#0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 272 | " ADDS %0,%0,r0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 273 | " ADCS %1,%1,r1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 274 | " ADC %2,%2,#0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 275 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); |
MiniTLS | 0:35aa5be3b78d | 276 | |
MiniTLS | 0:35aa5be3b78d | 277 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 278 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 279 | " UMULL %0,%1,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 280 | " SUB %2,%2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 281 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 282 | |
MiniTLS | 0:35aa5be3b78d | 283 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 284 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 285 | " UMULL r0,r1,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 286 | " ADDS %0,%0,r0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 287 | " ADCS %1,%1,r1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 288 | " ADC %2,%2,#0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 289 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc"); |
MiniTLS | 0:35aa5be3b78d | 290 | |
MiniTLS | 0:35aa5be3b78d | 291 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 292 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 293 | " ADDS %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 294 | " ADCS %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 295 | " ADC %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 296 | " ADDS %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 297 | " ADCS %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 298 | " ADC %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 299 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 300 | |
MiniTLS | 0:35aa5be3b78d | 301 | #elif defined(TFM_PPC32) |
MiniTLS | 0:35aa5be3b78d | 302 | |
MiniTLS | 0:35aa5be3b78d | 303 | /* PPC32 */ |
MiniTLS | 0:35aa5be3b78d | 304 | |
MiniTLS | 0:35aa5be3b78d | 305 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 306 | |
MiniTLS | 0:35aa5be3b78d | 307 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 308 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 309 | |
MiniTLS | 0:35aa5be3b78d | 310 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 311 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 312 | |
MiniTLS | 0:35aa5be3b78d | 313 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 314 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 315 | |
MiniTLS | 0:35aa5be3b78d | 316 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 317 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 318 | |
MiniTLS | 0:35aa5be3b78d | 319 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 320 | |
MiniTLS | 0:35aa5be3b78d | 321 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
MiniTLS | 0:35aa5be3b78d | 322 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 323 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 324 | " mullw 16,%6,%6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 325 | " addc %0,%0,16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 326 | " mulhwu 16,%6,%6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 327 | " adde %1,%1,16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 328 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 329 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc"); |
MiniTLS | 0:35aa5be3b78d | 330 | |
MiniTLS | 0:35aa5be3b78d | 331 | /* for squaring some of the terms are doubled... */ |
MiniTLS | 0:35aa5be3b78d | 332 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 333 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 334 | " mullw 16,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 335 | " mulhwu 17,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 336 | " addc %0,%0,16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 337 | " adde %1,%1,17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 338 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 339 | " addc %0,%0,16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 340 | " adde %1,%1,17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 341 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 342 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc"); |
MiniTLS | 0:35aa5be3b78d | 343 | |
MiniTLS | 0:35aa5be3b78d | 344 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 345 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 346 | " mullw %0,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 347 | " mulhwu %1,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 348 | " xor %2,%2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 349 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 350 | |
MiniTLS | 0:35aa5be3b78d | 351 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 352 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 353 | " mullw 16,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 354 | " addc %0,%0,16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 355 | " mulhwu 16,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 356 | " adde %1,%1,16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 357 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 358 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc"); |
MiniTLS | 0:35aa5be3b78d | 359 | |
MiniTLS | 0:35aa5be3b78d | 360 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 361 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 362 | " addc %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 363 | " adde %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 364 | " adde %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 365 | " addc %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 366 | " adde %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 367 | " adde %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 368 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 369 | |
MiniTLS | 0:35aa5be3b78d | 370 | #elif defined(TFM_PPC64) |
MiniTLS | 0:35aa5be3b78d | 371 | /* PPC64 */ |
MiniTLS | 0:35aa5be3b78d | 372 | |
MiniTLS | 0:35aa5be3b78d | 373 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 374 | |
MiniTLS | 0:35aa5be3b78d | 375 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 376 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 377 | |
MiniTLS | 0:35aa5be3b78d | 378 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 379 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 380 | |
MiniTLS | 0:35aa5be3b78d | 381 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 382 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 383 | |
MiniTLS | 0:35aa5be3b78d | 384 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 385 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 386 | |
MiniTLS | 0:35aa5be3b78d | 387 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 388 | |
MiniTLS | 0:35aa5be3b78d | 389 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
MiniTLS | 0:35aa5be3b78d | 390 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 391 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 392 | " mulld r16,%6,%6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 393 | " addc %0,%0,r16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 394 | " mulhdu r16,%6,%6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 395 | " adde %1,%1,r16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 396 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 397 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc"); |
MiniTLS | 0:35aa5be3b78d | 398 | |
MiniTLS | 0:35aa5be3b78d | 399 | /* for squaring some of the terms are doubled... */ |
MiniTLS | 0:35aa5be3b78d | 400 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 401 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 402 | " mulld r16,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 403 | " mulhdu r17,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 404 | " addc %0,%0,r16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 405 | " adde %1,%1,r17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 406 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 407 | " addc %0,%0,r16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 408 | " adde %1,%1,r17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 409 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 410 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc"); |
MiniTLS | 0:35aa5be3b78d | 411 | |
MiniTLS | 0:35aa5be3b78d | 412 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 413 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 414 | " mulld %0,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 415 | " mulhdu %1,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 416 | " xor %2,%2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 417 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 418 | |
MiniTLS | 0:35aa5be3b78d | 419 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 420 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 421 | " mulld r16,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 422 | " addc %0,%0,r16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 423 | " mulhdu r16,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 424 | " adde %1,%1,r16 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 425 | " addze %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 426 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc"); |
MiniTLS | 0:35aa5be3b78d | 427 | |
MiniTLS | 0:35aa5be3b78d | 428 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 429 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 430 | " addc %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 431 | " adde %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 432 | " adde %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 433 | " addc %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 434 | " adde %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 435 | " adde %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 436 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 437 | |
MiniTLS | 0:35aa5be3b78d | 438 | |
MiniTLS | 0:35aa5be3b78d | 439 | #elif defined(TFM_AVR32) |
MiniTLS | 0:35aa5be3b78d | 440 | |
MiniTLS | 0:35aa5be3b78d | 441 | /* AVR32 */ |
MiniTLS | 0:35aa5be3b78d | 442 | |
MiniTLS | 0:35aa5be3b78d | 443 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 444 | |
MiniTLS | 0:35aa5be3b78d | 445 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 446 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 447 | |
MiniTLS | 0:35aa5be3b78d | 448 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 449 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 450 | |
MiniTLS | 0:35aa5be3b78d | 451 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 452 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 453 | |
MiniTLS | 0:35aa5be3b78d | 454 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 455 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 456 | |
MiniTLS | 0:35aa5be3b78d | 457 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 458 | |
MiniTLS | 0:35aa5be3b78d | 459 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
MiniTLS | 0:35aa5be3b78d | 460 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 461 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 462 | " mulu.d r2,%6,%6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 463 | " add %0,%0,r2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 464 | " adc %1,%1,r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 465 | " acr %2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 466 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); |
MiniTLS | 0:35aa5be3b78d | 467 | |
MiniTLS | 0:35aa5be3b78d | 468 | /* for squaring some of the terms are doubled... */ |
MiniTLS | 0:35aa5be3b78d | 469 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 470 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 471 | " mulu.d r2,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 472 | " add %0,%0,r2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 473 | " adc %1,%1,r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 474 | " acr %2, \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 475 | " add %0,%0,r2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 476 | " adc %1,%1,r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 477 | " acr %2, \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 478 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); |
MiniTLS | 0:35aa5be3b78d | 479 | |
MiniTLS | 0:35aa5be3b78d | 480 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 481 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 482 | " mulu.d r2,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 483 | " mov %0,r2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 484 | " mov %1,r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 485 | " eor %2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 486 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); |
MiniTLS | 0:35aa5be3b78d | 487 | |
MiniTLS | 0:35aa5be3b78d | 488 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 489 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 490 | " mulu.d r2,%6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 491 | " add %0,%0,r2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 492 | " adc %1,%1,r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 493 | " acr %2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 494 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); |
MiniTLS | 0:35aa5be3b78d | 495 | |
MiniTLS | 0:35aa5be3b78d | 496 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 497 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 498 | " add %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 499 | " adc %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 500 | " adc %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 501 | " add %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 502 | " adc %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 503 | " adc %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 504 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 505 | |
MiniTLS | 0:35aa5be3b78d | 506 | #elif defined(TFM_MIPS) |
MiniTLS | 0:35aa5be3b78d | 507 | |
MiniTLS | 0:35aa5be3b78d | 508 | /* MIPS */ |
MiniTLS | 0:35aa5be3b78d | 509 | |
MiniTLS | 0:35aa5be3b78d | 510 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 511 | |
MiniTLS | 0:35aa5be3b78d | 512 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 513 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 514 | |
MiniTLS | 0:35aa5be3b78d | 515 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 516 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 517 | |
MiniTLS | 0:35aa5be3b78d | 518 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 519 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 520 | |
MiniTLS | 0:35aa5be3b78d | 521 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 522 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 523 | |
MiniTLS | 0:35aa5be3b78d | 524 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 525 | |
MiniTLS | 0:35aa5be3b78d | 526 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
MiniTLS | 0:35aa5be3b78d | 527 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 528 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 529 | " multu %6,%6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 530 | " mflo $12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 531 | " mfhi $13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 532 | " addu %0,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 533 | " sltu $12,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 534 | " addu %1,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 535 | " sltu $13,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 536 | " addu %1,%1,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 537 | " sltu $12,%1,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 538 | " addu %2,%2,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 539 | " addu %2,%2,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 540 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13"); |
MiniTLS | 0:35aa5be3b78d | 541 | |
MiniTLS | 0:35aa5be3b78d | 542 | /* for squaring some of the terms are doubled... */ |
MiniTLS | 0:35aa5be3b78d | 543 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 544 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 545 | " multu %6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 546 | " mflo $12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 547 | " mfhi $13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 548 | \ |
MiniTLS | 0:35aa5be3b78d | 549 | " addu %0,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 550 | " sltu $14,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 551 | " addu %1,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 552 | " sltu $15,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 553 | " addu %1,%1,$14 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 554 | " sltu $14,%1,$14 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 555 | " addu %2,%2,$15 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 556 | " addu %2,%2,$14 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 557 | \ |
MiniTLS | 0:35aa5be3b78d | 558 | " addu %0,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 559 | " sltu $14,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 560 | " addu %1,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 561 | " sltu $15,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 562 | " addu %1,%1,$14 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 563 | " sltu $14,%1,$14 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 564 | " addu %2,%2,$15 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 565 | " addu %2,%2,$14 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 566 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15"); |
MiniTLS | 0:35aa5be3b78d | 567 | |
MiniTLS | 0:35aa5be3b78d | 568 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 569 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 570 | " multu %6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 571 | " mflo %0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 572 | " mfhi %1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 573 | " xor %2,%2,%2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 574 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); |
MiniTLS | 0:35aa5be3b78d | 575 | |
MiniTLS | 0:35aa5be3b78d | 576 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 577 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 578 | " multu %6,%7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 579 | " mflo $12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 580 | " mfhi $13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 581 | " addu %0,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 582 | " sltu $12,%0,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 583 | " addu %1,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 584 | " sltu $13,%1,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 585 | " addu %1,%1,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 586 | " sltu $12,%1,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 587 | " addu %2,%2,$13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 588 | " addu %2,%2,$12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 589 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14"); |
MiniTLS | 0:35aa5be3b78d | 590 | |
MiniTLS | 0:35aa5be3b78d | 591 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 592 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 593 | " addu %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 594 | " sltu $10,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 595 | " addu %1,%1,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 596 | " sltu $10,%1,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 597 | " addu %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 598 | " sltu $11,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 599 | " addu %2,%2,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 600 | " addu %2,%2,$11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 601 | " addu %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 602 | \ |
MiniTLS | 0:35aa5be3b78d | 603 | " addu %0,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 604 | " sltu $10,%0,%3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 605 | " addu %1,%1,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 606 | " sltu $10,%1,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 607 | " addu %1,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 608 | " sltu $11,%1,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 609 | " addu %2,%2,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 610 | " addu %2,%2,$11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 611 | " addu %2,%2,%5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 612 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11"); |
MiniTLS | 0:35aa5be3b78d | 613 | |
MiniTLS | 0:35aa5be3b78d | 614 | #else |
MiniTLS | 0:35aa5be3b78d | 615 | |
MiniTLS | 0:35aa5be3b78d | 616 | #define TFM_ISO |
MiniTLS | 0:35aa5be3b78d | 617 | |
MiniTLS | 0:35aa5be3b78d | 618 | /* ISO C portable code */ |
MiniTLS | 0:35aa5be3b78d | 619 | |
MiniTLS | 0:35aa5be3b78d | 620 | #define COMBA_START |
MiniTLS | 0:35aa5be3b78d | 621 | |
MiniTLS | 0:35aa5be3b78d | 622 | #define CLEAR_CARRY \ |
MiniTLS | 0:35aa5be3b78d | 623 | c0 = c1 = c2 = 0; |
MiniTLS | 0:35aa5be3b78d | 624 | |
MiniTLS | 0:35aa5be3b78d | 625 | #define COMBA_STORE(x) \ |
MiniTLS | 0:35aa5be3b78d | 626 | x = c0; |
MiniTLS | 0:35aa5be3b78d | 627 | |
MiniTLS | 0:35aa5be3b78d | 628 | #define COMBA_STORE2(x) \ |
MiniTLS | 0:35aa5be3b78d | 629 | x = c1; |
MiniTLS | 0:35aa5be3b78d | 630 | |
MiniTLS | 0:35aa5be3b78d | 631 | #define CARRY_FORWARD \ |
MiniTLS | 0:35aa5be3b78d | 632 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
MiniTLS | 0:35aa5be3b78d | 633 | |
MiniTLS | 0:35aa5be3b78d | 634 | #define COMBA_FINI |
MiniTLS | 0:35aa5be3b78d | 635 | |
MiniTLS | 0:35aa5be3b78d | 636 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
MiniTLS | 0:35aa5be3b78d | 637 | #define SQRADD(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 638 | do { fp_word t; \ |
MiniTLS | 0:35aa5be3b78d | 639 | t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ |
MiniTLS | 0:35aa5be3b78d | 640 | t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ |
MiniTLS | 0:35aa5be3b78d | 641 | } while (0); |
MiniTLS | 0:35aa5be3b78d | 642 | |
MiniTLS | 0:35aa5be3b78d | 643 | |
MiniTLS | 0:35aa5be3b78d | 644 | /* for squaring some of the terms are doubled... */ |
MiniTLS | 0:35aa5be3b78d | 645 | #define SQRADD2(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 646 | do { fp_word t; \ |
MiniTLS | 0:35aa5be3b78d | 647 | t = ((fp_word)i) * ((fp_word)j); \ |
MiniTLS | 0:35aa5be3b78d | 648 | tt = (fp_word)c0 + t; c0 = tt; \ |
MiniTLS | 0:35aa5be3b78d | 649 | tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ |
MiniTLS | 0:35aa5be3b78d | 650 | tt = (fp_word)c0 + t; c0 = tt; \ |
MiniTLS | 0:35aa5be3b78d | 651 | tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ |
MiniTLS | 0:35aa5be3b78d | 652 | } while (0); |
MiniTLS | 0:35aa5be3b78d | 653 | |
MiniTLS | 0:35aa5be3b78d | 654 | #define SQRADDSC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 655 | do { fp_word t; \ |
MiniTLS | 0:35aa5be3b78d | 656 | t = ((fp_word)i) * ((fp_word)j); \ |
MiniTLS | 0:35aa5be3b78d | 657 | sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ |
MiniTLS | 0:35aa5be3b78d | 658 | } while (0); |
MiniTLS | 0:35aa5be3b78d | 659 | |
MiniTLS | 0:35aa5be3b78d | 660 | #define SQRADDAC(i, j) \ |
MiniTLS | 0:35aa5be3b78d | 661 | do { fp_word t; \ |
MiniTLS | 0:35aa5be3b78d | 662 | t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \ |
MiniTLS | 0:35aa5be3b78d | 663 | t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \ |
MiniTLS | 0:35aa5be3b78d | 664 | } while (0); |
MiniTLS | 0:35aa5be3b78d | 665 | |
MiniTLS | 0:35aa5be3b78d | 666 | #define SQRADDDB \ |
MiniTLS | 0:35aa5be3b78d | 667 | do { fp_word t; \ |
MiniTLS | 0:35aa5be3b78d | 668 | t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \ |
MiniTLS | 0:35aa5be3b78d | 669 | t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \ |
MiniTLS | 0:35aa5be3b78d | 670 | c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \ |
MiniTLS | 0:35aa5be3b78d | 671 | } while (0); |
MiniTLS | 0:35aa5be3b78d | 672 | |
MiniTLS | 0:35aa5be3b78d | 673 | #endif |
MiniTLS | 0:35aa5be3b78d | 674 | |
MiniTLS | 0:35aa5be3b78d | 675 | /* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba.c,v $ */ |
MiniTLS | 0:35aa5be3b78d | 676 | /* $Revision: 1.4 $ */ |
MiniTLS | 0:35aa5be3b78d | 677 | /* $Date: 2007/03/14 23:47:42 $ */ |