Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of MiniTLS-GPL by
fp_sqr_comba.c
00001 /* 00002 * 00003 * This project is meant to fill in where LibTomMath 00004 * falls short. That is speed ;-) 00005 * 00006 * This project is public domain and free for all purposes. 00007 * 00008 * Tom St Denis, tomstdenis@gmail.com 00009 */ 00010 #include <tfm.h> 00011 00012 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2) 00013 #undef TFM_SSE2 00014 #define TFM_X86 00015 #endif 00016 00017 #if defined(TFM_X86) 00018 00019 /* x86-32 optimized */ 00020 00021 #define COMBA_START 00022 00023 #define CLEAR_CARRY \ 00024 c0 = c1 = c2 = 0; 00025 00026 #define COMBA_STORE(x) \ 00027 x = c0; 00028 00029 #define COMBA_STORE2(x) \ 00030 x = c1; 00031 00032 #define CARRY_FORWARD \ 00033 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00034 00035 #define COMBA_FINI 00036 00037 #define SQRADD(i, j) \ 00038 asm( \ 00039 "movl %6,%%eax \n\t" \ 00040 "mull %%eax \n\t" \ 00041 "addl %%eax,%0 \n\t" \ 00042 "adcl %%edx,%1 \n\t" \ 00043 "adcl $0,%2 \n\t" \ 00044 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); 00045 00046 #define SQRADD2(i, j) \ 00047 asm( \ 00048 "movl %6,%%eax \n\t" \ 00049 "mull %7 \n\t" \ 00050 "addl %%eax,%0 \n\t" \ 00051 "adcl %%edx,%1 \n\t" \ 00052 "adcl $0,%2 \n\t" \ 00053 "addl %%eax,%0 \n\t" \ 00054 "adcl %%edx,%1 \n\t" \ 00055 "adcl $0,%2 \n\t" \ 00056 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00057 00058 #define SQRADDSC(i, j) \ 00059 asm( \ 00060 "movl %6,%%eax \n\t" \ 00061 "mull %7 \n\t" \ 00062 "movl %%eax,%0 \n\t" \ 00063 "movl %%edx,%1 \n\t" \ 00064 "xorl %2,%2 \n\t" \ 00065 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); 00066 00067 #define SQRADDAC(i, j) \ 00068 asm( \ 00069 "movl %6,%%eax \n\t" \ 00070 "mull %7 \n\t" \ 00071 "addl %%eax,%0 \n\t" \ 00072 "adcl %%edx,%1 \n\t" \ 00073 "adcl $0,%2 \n\t" \ 00074 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); 00075 00076 #define SQRADDDB \ 00077 asm( \ 00078 "addl %6,%0 \n\t" \ 00079 "adcl %7,%1 \n\t" \ 00080 "adcl %8,%2 \n\t" \ 00081 "addl %6,%0 \n\t" \ 00082 "adcl %7,%1 \n\t" \ 00083 "adcl %8,%2 \n\t" \ 00084 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); 00085 00086 #elif defined(TFM_X86_64) 00087 /* x86-64 optimized */ 00088 00089 #define COMBA_START 00090 00091 #define CLEAR_CARRY \ 00092 c0 = c1 = c2 = 0; 00093 00094 #define COMBA_STORE(x) \ 00095 x = c0; 00096 00097 #define COMBA_STORE2(x) \ 00098 x = c1; 00099 00100 #define CARRY_FORWARD \ 00101 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00102 00103 #define COMBA_FINI 00104 00105 #define SQRADD(i, j) \ 00106 asm( \ 00107 "movq %6,%%rax \n\t" \ 00108 "mulq %%rax \n\t" \ 00109 "addq %%rax,%0 \n\t" \ 00110 "adcq %%rdx,%1 \n\t" \ 00111 "adcq $0,%2 \n\t" \ 00112 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc"); 00113 00114 #define SQRADD2(i, j) \ 00115 asm( \ 00116 "movq %6,%%rax \n\t" \ 00117 "mulq %7 \n\t" \ 00118 "addq %%rax,%0 \n\t" \ 00119 "adcq %%rdx,%1 \n\t" \ 00120 "adcq $0,%2 \n\t" \ 00121 "addq %%rax,%0 \n\t" \ 00122 "adcq %%rdx,%1 \n\t" \ 00123 "adcq $0,%2 \n\t" \ 00124 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00125 00126 #define SQRADDSC(i, j) \ 00127 asm( \ 00128 "movq %6,%%rax \n\t" \ 00129 "mulq %7 \n\t" \ 00130 "movq %%rax,%0 \n\t" \ 00131 "movq %%rdx,%1 \n\t" \ 00132 "xorq %2,%2 \n\t" \ 00133 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00134 00135 #define SQRADDAC(i, j) \ 00136 asm( \ 00137 "movq %6,%%rax \n\t" \ 00138 "mulq %7 \n\t" \ 00139 "addq %%rax,%0 \n\t" \ 00140 "adcq %%rdx,%1 \n\t" \ 00141 "adcq $0,%2 \n\t" \ 00142 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00143 00144 #define SQRADDDB \ 00145 asm( \ 00146 "addq %6,%0 \n\t" \ 00147 "adcq %7,%1 \n\t" \ 00148 "adcq %8,%2 \n\t" \ 00149 "addq %6,%0 \n\t" \ 00150 "adcq %7,%1 \n\t" \ 00151 "adcq %8,%2 \n\t" \ 00152 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); 00153 00154 #elif defined(TFM_SSE2) 00155 00156 /* SSE2 Optimized */ 00157 #define COMBA_START 00158 00159 #define CLEAR_CARRY \ 00160 c0 = c1 = c2 = 0; 00161 00162 #define COMBA_STORE(x) \ 00163 x = c0; 00164 00165 #define COMBA_STORE2(x) \ 00166 x = c1; 00167 00168 #define CARRY_FORWARD \ 00169 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00170 00171 #define COMBA_FINI \ 00172 asm("emms"); 00173 00174 #define SQRADD(i, j) \ 00175 asm( \ 00176 "movd %6,%%mm0 \n\t" \ 00177 "pmuludq %%mm0,%%mm0\n\t" \ 00178 "movd %%mm0,%%eax \n\t" \ 00179 "psrlq $32,%%mm0 \n\t" \ 00180 "addl %%eax,%0 \n\t" \ 00181 "movd %%mm0,%%eax \n\t" \ 00182 "adcl %%eax,%1 \n\t" \ 00183 "adcl $0,%2 \n\t" \ 00184 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc"); 00185 00186 #define SQRADD2(i, j) \ 00187 asm( \ 00188 "movd %6,%%mm0 \n\t" \ 00189 "movd %7,%%mm1 \n\t" \ 00190 "pmuludq %%mm1,%%mm0\n\t" \ 00191 "movd %%mm0,%%eax \n\t" \ 00192 "psrlq $32,%%mm0 \n\t" \ 00193 "movd %%mm0,%%edx \n\t" \ 00194 "addl %%eax,%0 \n\t" \ 00195 "adcl %%edx,%1 \n\t" \ 00196 "adcl $0,%2 \n\t" \ 00197 "addl %%eax,%0 \n\t" \ 00198 "adcl %%edx,%1 \n\t" \ 00199 "adcl $0,%2 \n\t" \ 00200 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00201 00202 #define SQRADDSC(i, j) \ 00203 asm( \ 00204 "movd %6,%%mm0 \n\t" \ 00205 "movd %7,%%mm1 \n\t" \ 00206 "pmuludq %%mm1,%%mm0\n\t" \ 00207 "movd %%mm0,%0 \n\t" \ 00208 "psrlq $32,%%mm0 \n\t" \ 00209 "movd %%mm0,%1 \n\t" \ 00210 "xorl %2,%2 \n\t" \ 00211 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j)); 00212 00213 #define SQRADDAC(i, j) \ 00214 asm( \ 00215 "movd %6,%%mm0 \n\t" \ 00216 "movd %7,%%mm1 \n\t" \ 00217 "pmuludq %%mm1,%%mm0\n\t" \ 00218 "movd %%mm0,%%eax \n\t" \ 00219 "psrlq $32,%%mm0 \n\t" \ 00220 "movd %%mm0,%%edx \n\t" \ 00221 "addl %%eax,%0 \n\t" \ 00222 "adcl %%edx,%1 \n\t" \ 00223 "adcl $0,%2 \n\t" \ 00224 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00225 00226 #define SQRADDDB \ 00227 asm( \ 00228 "addl %6,%0 \n\t" \ 00229 "adcl %7,%1 \n\t" \ 00230 "adcl %8,%2 \n\t" \ 00231 "addl %6,%0 \n\t" \ 00232 "adcl %7,%1 \n\t" \ 00233 "adcl %8,%2 \n\t" \ 00234 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); 00235 00236 #elif defined(TFM_ARM) 00237 00238 /* ARM code */ 00239 00240 #define COMBA_START 00241 00242 #define CLEAR_CARRY \ 00243 c0 = c1 = c2 = 0; 00244 00245 #define COMBA_STORE(x) \ 00246 x = c0; 00247 00248 #define COMBA_STORE2(x) \ 00249 x = c1; 00250 00251 #define CARRY_FORWARD \ 00252 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00253 00254 #define COMBA_FINI 00255 00256 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00257 #define SQRADD(i, j) \ 00258 asm( \ 00259 " UMULL r0,r1,%6,%6 \n\t" \ 00260 " ADDS %0,%0,r0 \n\t" \ 00261 " ADCS %1,%1,r1 \n\t" \ 00262 " ADC %2,%2,#0 \n\t" \ 00263 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); 00264 00265 /* for squaring some of the terms are doubled... */ 00266 #define SQRADD2(i, j) \ 00267 asm( \ 00268 " UMULL r0,r1,%6,%7 \n\t" \ 00269 " ADDS %0,%0,r0 \n\t" \ 00270 " ADCS %1,%1,r1 \n\t" \ 00271 " ADC %2,%2,#0 \n\t" \ 00272 " ADDS %0,%0,r0 \n\t" \ 00273 " ADCS %1,%1,r1 \n\t" \ 00274 " ADC %2,%2,#0 \n\t" \ 00275 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); 00276 00277 #define SQRADDSC(i, j) \ 00278 asm( \ 00279 " UMULL %0,%1,%6,%7 \n\t" \ 00280 " SUB %2,%2,%2 \n\t" \ 00281 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc"); 00282 00283 #define SQRADDAC(i, j) \ 00284 asm( \ 00285 " UMULL r0,r1,%6,%7 \n\t" \ 00286 " ADDS %0,%0,r0 \n\t" \ 00287 " ADCS %1,%1,r1 \n\t" \ 00288 " ADC %2,%2,#0 \n\t" \ 00289 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc"); 00290 00291 #define SQRADDDB \ 00292 asm( \ 00293 " ADDS %0,%0,%3 \n\t" \ 00294 " ADCS %1,%1,%4 \n\t" \ 00295 " ADC %2,%2,%5 \n\t" \ 00296 " ADDS %0,%0,%3 \n\t" \ 00297 " ADCS %1,%1,%4 \n\t" \ 00298 " ADC %2,%2,%5 \n\t" \ 00299 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00300 00301 #elif defined(TFM_PPC32) 00302 00303 /* PPC32 */ 00304 00305 #define COMBA_START 00306 00307 #define CLEAR_CARRY \ 00308 c0 = c1 = c2 = 0; 00309 00310 #define COMBA_STORE(x) \ 00311 x = c0; 00312 00313 #define COMBA_STORE2(x) \ 00314 x = c1; 00315 00316 #define CARRY_FORWARD \ 00317 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00318 00319 #define COMBA_FINI 00320 00321 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00322 #define SQRADD(i, j) \ 00323 asm( \ 00324 " mullw 16,%6,%6 \n\t" \ 00325 " addc %0,%0,16 \n\t" \ 00326 " mulhwu 16,%6,%6 \n\t" \ 00327 " adde %1,%1,16 \n\t" \ 00328 " addze %2,%2 \n\t" \ 00329 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc"); 00330 00331 /* for squaring some of the terms are doubled... */ 00332 #define SQRADD2(i, j) \ 00333 asm( \ 00334 " mullw 16,%6,%7 \n\t" \ 00335 " mulhwu 17,%6,%7 \n\t" \ 00336 " addc %0,%0,16 \n\t" \ 00337 " adde %1,%1,17 \n\t" \ 00338 " addze %2,%2 \n\t" \ 00339 " addc %0,%0,16 \n\t" \ 00340 " adde %1,%1,17 \n\t" \ 00341 " addze %2,%2 \n\t" \ 00342 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc"); 00343 00344 #define SQRADDSC(i, j) \ 00345 asm( \ 00346 " mullw %0,%6,%7 \n\t" \ 00347 " mulhwu %1,%6,%7 \n\t" \ 00348 " xor %2,%2,%2 \n\t" \ 00349 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); 00350 00351 #define SQRADDAC(i, j) \ 00352 asm( \ 00353 " mullw 16,%6,%7 \n\t" \ 00354 " addc %0,%0,16 \n\t" \ 00355 " mulhwu 16,%6,%7 \n\t" \ 00356 " adde %1,%1,16 \n\t" \ 00357 " addze %2,%2 \n\t" \ 00358 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc"); 00359 00360 #define SQRADDDB \ 00361 asm( \ 00362 " addc %0,%0,%3 \n\t" \ 00363 " adde %1,%1,%4 \n\t" \ 00364 " adde %2,%2,%5 \n\t" \ 00365 " addc %0,%0,%3 \n\t" \ 00366 " adde %1,%1,%4 \n\t" \ 00367 " adde %2,%2,%5 \n\t" \ 00368 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00369 00370 #elif defined(TFM_PPC64) 00371 /* PPC64 */ 00372 00373 #define COMBA_START 00374 00375 #define CLEAR_CARRY \ 00376 c0 = c1 = c2 = 0; 00377 00378 #define COMBA_STORE(x) \ 00379 x = c0; 00380 00381 #define COMBA_STORE2(x) \ 00382 x = c1; 00383 00384 #define CARRY_FORWARD \ 00385 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00386 00387 #define COMBA_FINI 00388 00389 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00390 #define SQRADD(i, j) \ 00391 asm( \ 00392 " mulld r16,%6,%6 \n\t" \ 00393 " addc %0,%0,r16 \n\t" \ 00394 " mulhdu r16,%6,%6 \n\t" \ 00395 " adde %1,%1,r16 \n\t" \ 00396 " addze %2,%2 \n\t" \ 00397 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc"); 00398 00399 /* for squaring some of the terms are doubled... */ 00400 #define SQRADD2(i, j) \ 00401 asm( \ 00402 " mulld r16,%6,%7 \n\t" \ 00403 " mulhdu r17,%6,%7 \n\t" \ 00404 " addc %0,%0,r16 \n\t" \ 00405 " adde %1,%1,r17 \n\t" \ 00406 " addze %2,%2 \n\t" \ 00407 " addc %0,%0,r16 \n\t" \ 00408 " adde %1,%1,r17 \n\t" \ 00409 " addze %2,%2 \n\t" \ 00410 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc"); 00411 00412 #define SQRADDSC(i, j) \ 00413 asm( \ 00414 " mulld %0,%6,%7 \n\t" \ 00415 " mulhdu %1,%6,%7 \n\t" \ 00416 " xor %2,%2,%2 \n\t" \ 00417 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); 00418 00419 #define SQRADDAC(i, j) \ 00420 asm( \ 00421 " mulld r16,%6,%7 \n\t" \ 00422 " addc %0,%0,r16 \n\t" \ 00423 " mulhdu r16,%6,%7 \n\t" \ 00424 " adde %1,%1,r16 \n\t" \ 00425 " addze %2,%2 \n\t" \ 00426 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc"); 00427 00428 #define SQRADDDB \ 00429 asm( \ 00430 " addc %0,%0,%3 \n\t" \ 00431 " adde %1,%1,%4 \n\t" \ 00432 " adde %2,%2,%5 \n\t" \ 00433 " addc %0,%0,%3 \n\t" \ 00434 " adde %1,%1,%4 \n\t" \ 00435 " adde %2,%2,%5 \n\t" \ 00436 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00437 00438 00439 #elif defined(TFM_AVR32) 00440 00441 /* AVR32 */ 00442 00443 #define COMBA_START 00444 00445 #define CLEAR_CARRY \ 00446 c0 = c1 = c2 = 0; 00447 00448 #define COMBA_STORE(x) \ 00449 x = c0; 00450 00451 #define COMBA_STORE2(x) \ 00452 x = c1; 00453 00454 #define CARRY_FORWARD \ 00455 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00456 00457 #define COMBA_FINI 00458 00459 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00460 #define SQRADD(i, j) \ 00461 asm( \ 00462 " mulu.d r2,%6,%6 \n\t" \ 00463 " add %0,%0,r2 \n\t" \ 00464 " adc %1,%1,r3 \n\t" \ 00465 " acr %2 \n\t" \ 00466 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); 00467 00468 /* for squaring some of the terms are doubled... */ 00469 #define SQRADD2(i, j) \ 00470 asm( \ 00471 " mulu.d r2,%6,%7 \n\t" \ 00472 " add %0,%0,r2 \n\t" \ 00473 " adc %1,%1,r3 \n\t" \ 00474 " acr %2, \n\t" \ 00475 " add %0,%0,r2 \n\t" \ 00476 " adc %1,%1,r3 \n\t" \ 00477 " acr %2, \n\t" \ 00478 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); 00479 00480 #define SQRADDSC(i, j) \ 00481 asm( \ 00482 " mulu.d r2,%6,%7 \n\t" \ 00483 " mov %0,r2 \n\t" \ 00484 " mov %1,r3 \n\t" \ 00485 " eor %2,%2 \n\t" \ 00486 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); 00487 00488 #define SQRADDAC(i, j) \ 00489 asm( \ 00490 " mulu.d r2,%6,%7 \n\t" \ 00491 " add %0,%0,r2 \n\t" \ 00492 " adc %1,%1,r3 \n\t" \ 00493 " acr %2 \n\t" \ 00494 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); 00495 00496 #define SQRADDDB \ 00497 asm( \ 00498 " add %0,%0,%3 \n\t" \ 00499 " adc %1,%1,%4 \n\t" \ 00500 " adc %2,%2,%5 \n\t" \ 00501 " add %0,%0,%3 \n\t" \ 00502 " adc %1,%1,%4 \n\t" \ 00503 " adc %2,%2,%5 \n\t" \ 00504 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00505 00506 #elif defined(TFM_MIPS) 00507 00508 /* MIPS */ 00509 00510 #define COMBA_START 00511 00512 #define CLEAR_CARRY \ 00513 c0 = c1 = c2 = 0; 00514 00515 #define COMBA_STORE(x) \ 00516 x = c0; 00517 00518 #define COMBA_STORE2(x) \ 00519 x = c1; 00520 00521 #define CARRY_FORWARD \ 00522 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00523 00524 #define COMBA_FINI 00525 00526 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00527 #define SQRADD(i, j) \ 00528 asm( \ 00529 " multu %6,%6 \n\t" \ 00530 " mflo $12 \n\t" \ 00531 " mfhi $13 \n\t" \ 00532 " addu %0,%0,$12 \n\t" \ 00533 " sltu $12,%0,$12 \n\t" \ 00534 " addu %1,%1,$13 \n\t" \ 00535 " sltu $13,%1,$13 \n\t" \ 00536 " addu %1,%1,$12 \n\t" \ 00537 " sltu $12,%1,$12 \n\t" \ 00538 " addu %2,%2,$13 \n\t" \ 00539 " addu %2,%2,$12 \n\t" \ 00540 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13"); 00541 00542 /* for squaring some of the terms are doubled... */ 00543 #define SQRADD2(i, j) \ 00544 asm( \ 00545 " multu %6,%7 \n\t" \ 00546 " mflo $12 \n\t" \ 00547 " mfhi $13 \n\t" \ 00548 \ 00549 " addu %0,%0,$12 \n\t" \ 00550 " sltu $14,%0,$12 \n\t" \ 00551 " addu %1,%1,$13 \n\t" \ 00552 " sltu $15,%1,$13 \n\t" \ 00553 " addu %1,%1,$14 \n\t" \ 00554 " sltu $14,%1,$14 \n\t" \ 00555 " addu %2,%2,$15 \n\t" \ 00556 " addu %2,%2,$14 \n\t" \ 00557 \ 00558 " addu %0,%0,$12 \n\t" \ 00559 " sltu $14,%0,$12 \n\t" \ 00560 " addu %1,%1,$13 \n\t" \ 00561 " sltu $15,%1,$13 \n\t" \ 00562 " addu %1,%1,$14 \n\t" \ 00563 " sltu $14,%1,$14 \n\t" \ 00564 " addu %2,%2,$15 \n\t" \ 00565 " addu %2,%2,$14 \n\t" \ 00566 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15"); 00567 00568 #define SQRADDSC(i, j) \ 00569 asm( \ 00570 " multu %6,%7 \n\t" \ 00571 " mflo %0 \n\t" \ 00572 " mfhi %1 \n\t" \ 00573 " xor %2,%2,%2 \n\t" \ 00574 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); 00575 00576 #define SQRADDAC(i, j) \ 00577 asm( \ 00578 " multu %6,%7 \n\t" \ 00579 " mflo $12 \n\t" \ 00580 " mfhi $13 \n\t" \ 00581 " addu %0,%0,$12 \n\t" \ 00582 " sltu $12,%0,$12 \n\t" \ 00583 " addu %1,%1,$13 \n\t" \ 00584 " sltu $13,%1,$13 \n\t" \ 00585 " addu %1,%1,$12 \n\t" \ 00586 " sltu $12,%1,$12 \n\t" \ 00587 " addu %2,%2,$13 \n\t" \ 00588 " addu %2,%2,$12 \n\t" \ 00589 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14"); 00590 00591 #define SQRADDDB \ 00592 asm( \ 00593 " addu %0,%0,%3 \n\t" \ 00594 " sltu $10,%0,%3 \n\t" \ 00595 " addu %1,%1,$10 \n\t" \ 00596 " sltu $10,%1,$10 \n\t" \ 00597 " addu %1,%1,%4 \n\t" \ 00598 " sltu $11,%1,%4 \n\t" \ 00599 " addu %2,%2,$10 \n\t" \ 00600 " addu %2,%2,$11 \n\t" \ 00601 " addu %2,%2,%5 \n\t" \ 00602 \ 00603 " addu %0,%0,%3 \n\t" \ 00604 " sltu $10,%0,%3 \n\t" \ 00605 " addu %1,%1,$10 \n\t" \ 00606 " sltu $10,%1,$10 \n\t" \ 00607 " addu %1,%1,%4 \n\t" \ 00608 " sltu $11,%1,%4 \n\t" \ 00609 " addu %2,%2,$10 \n\t" \ 00610 " addu %2,%2,$11 \n\t" \ 00611 " addu %2,%2,%5 \n\t" \ 00612 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11"); 00613 00614 #else 00615 00616 #define TFM_ISO 00617 00618 /* ISO C portable code */ 00619 00620 #define COMBA_START 00621 00622 #define CLEAR_CARRY \ 00623 c0 = c1 = c2 = 0; 00624 00625 #define COMBA_STORE(x) \ 00626 x = c0; 00627 00628 #define COMBA_STORE2(x) \ 00629 x = c1; 00630 00631 #define CARRY_FORWARD \ 00632 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00633 00634 #define COMBA_FINI 00635 00636 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00637 #define SQRADD(i, j) \ 00638 do { fp_word t; \ 00639 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ 00640 t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ 00641 } while (0); 00642 00643 00644 /* for squaring some of the terms are doubled... */ 00645 #define SQRADD2(i, j) \ 00646 do { fp_word t; \ 00647 t = ((fp_word)i) * ((fp_word)j); \ 00648 tt = (fp_word)c0 + t; c0 = tt; \ 00649 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ 00650 tt = (fp_word)c0 + t; c0 = tt; \ 00651 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ 00652 } while (0); 00653 00654 #define SQRADDSC(i, j) \ 00655 do { fp_word t; \ 00656 t = ((fp_word)i) * ((fp_word)j); \ 00657 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ 00658 } while (0); 00659 00660 #define SQRADDAC(i, j) \ 00661 do { fp_word t; \ 00662 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \ 00663 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \ 00664 } while (0); 00665 00666 #define SQRADDDB \ 00667 do { fp_word t; \ 00668 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \ 00669 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \ 00670 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \ 00671 } while (0); 00672 00673 #endif 00674 00675 /* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba.c,v $ */ 00676 /* $Revision: 1.4 $ */ 00677 /* $Date: 2007/03/14 23:47:42 $ */
Generated on Tue Jul 12 2022 19:20:10 by
1.7.2
