Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Dependents: MiniTLS-HTTPS-Example
fp_sqr_comba.c
00001 /* 00002 * 00003 * This project is meant to fill in where LibTomMath 00004 * falls short. That is speed ;-) 00005 * 00006 * This project is public domain and free for all purposes. 00007 * 00008 * Tom St Denis, tomstdenis@gmail.com 00009 */ 00010 #include <tfm.h> 00011 00012 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2) 00013 #undef TFM_SSE2 00014 #define TFM_X86 00015 #endif 00016 00017 #if defined(TFM_X86) 00018 00019 /* x86-32 optimized */ 00020 00021 #define COMBA_START 00022 00023 #define CLEAR_CARRY \ 00024 c0 = c1 = c2 = 0; 00025 00026 #define COMBA_STORE(x) \ 00027 x = c0; 00028 00029 #define COMBA_STORE2(x) \ 00030 x = c1; 00031 00032 #define CARRY_FORWARD \ 00033 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00034 00035 #define COMBA_FINI 00036 00037 #define SQRADD(i, j) \ 00038 asm( \ 00039 "movl %6,%%eax \n\t" \ 00040 "mull %%eax \n\t" \ 00041 "addl %%eax,%0 \n\t" \ 00042 "adcl %%edx,%1 \n\t" \ 00043 "adcl $0,%2 \n\t" \ 00044 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); 00045 00046 #define SQRADD2(i, j) \ 00047 asm( \ 00048 "movl %6,%%eax \n\t" \ 00049 "mull %7 \n\t" \ 00050 "addl %%eax,%0 \n\t" \ 00051 "adcl %%edx,%1 \n\t" \ 00052 "adcl $0,%2 \n\t" \ 00053 "addl %%eax,%0 \n\t" \ 00054 "adcl %%edx,%1 \n\t" \ 00055 "adcl $0,%2 \n\t" \ 00056 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00057 00058 #define SQRADDSC(i, j) \ 00059 asm( \ 00060 "movl %6,%%eax \n\t" \ 00061 "mull %7 \n\t" \ 00062 "movl %%eax,%0 \n\t" \ 00063 "movl %%edx,%1 \n\t" \ 00064 "xorl %2,%2 \n\t" \ 00065 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); 00066 00067 #define SQRADDAC(i, j) \ 00068 asm( \ 00069 "movl %6,%%eax \n\t" \ 00070 "mull %7 \n\t" \ 00071 "addl %%eax,%0 \n\t" \ 00072 "adcl %%edx,%1 \n\t" \ 00073 "adcl $0,%2 \n\t" \ 00074 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); 00075 00076 #define SQRADDDB \ 00077 asm( \ 00078 "addl %6,%0 \n\t" \ 00079 "adcl %7,%1 \n\t" \ 00080 "adcl %8,%2 \n\t" \ 00081 "addl %6,%0 \n\t" \ 00082 "adcl %7,%1 \n\t" \ 00083 "adcl %8,%2 \n\t" \ 00084 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); 00085 00086 #elif defined(TFM_X86_64) 00087 /* x86-64 optimized */ 00088 00089 #define COMBA_START 00090 00091 #define CLEAR_CARRY \ 00092 c0 = c1 = c2 = 0; 00093 00094 #define COMBA_STORE(x) \ 00095 x = c0; 00096 00097 #define COMBA_STORE2(x) \ 00098 x = c1; 00099 00100 #define CARRY_FORWARD \ 00101 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00102 00103 #define COMBA_FINI 00104 00105 #define SQRADD(i, j) \ 00106 asm( \ 00107 "movq %6,%%rax \n\t" \ 00108 "mulq %%rax \n\t" \ 00109 "addq %%rax,%0 \n\t" \ 00110 "adcq %%rdx,%1 \n\t" \ 00111 "adcq $0,%2 \n\t" \ 00112 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc"); 00113 00114 #define SQRADD2(i, j) \ 00115 asm( \ 00116 "movq %6,%%rax \n\t" \ 00117 "mulq %7 \n\t" \ 00118 "addq %%rax,%0 \n\t" \ 00119 "adcq %%rdx,%1 \n\t" \ 00120 "adcq $0,%2 \n\t" \ 00121 "addq %%rax,%0 \n\t" \ 00122 "adcq %%rdx,%1 \n\t" \ 00123 "adcq $0,%2 \n\t" \ 00124 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00125 00126 #define SQRADDSC(i, j) \ 00127 asm( \ 00128 "movq %6,%%rax \n\t" \ 00129 "mulq %7 \n\t" \ 00130 "movq %%rax,%0 \n\t" \ 00131 "movq %%rdx,%1 \n\t" \ 00132 "xorq %2,%2 \n\t" \ 00133 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00134 00135 #define SQRADDAC(i, j) \ 00136 asm( \ 00137 "movq %6,%%rax \n\t" \ 00138 "mulq %7 \n\t" \ 00139 "addq %%rax,%0 \n\t" \ 00140 "adcq %%rdx,%1 \n\t" \ 00141 "adcq $0,%2 \n\t" \ 00142 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00143 00144 #define SQRADDDB \ 00145 asm( \ 00146 "addq %6,%0 \n\t" \ 00147 "adcq %7,%1 \n\t" \ 00148 "adcq %8,%2 \n\t" \ 00149 "addq %6,%0 \n\t" \ 00150 "adcq %7,%1 \n\t" \ 00151 "adcq %8,%2 \n\t" \ 00152 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); 00153 00154 #elif defined(TFM_SSE2) 00155 00156 /* SSE2 Optimized */ 00157 #define COMBA_START 00158 00159 #define CLEAR_CARRY \ 00160 c0 = c1 = c2 = 0; 00161 00162 #define COMBA_STORE(x) \ 00163 x = c0; 00164 00165 #define COMBA_STORE2(x) \ 00166 x = c1; 00167 00168 #define CARRY_FORWARD \ 00169 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00170 00171 #define COMBA_FINI \ 00172 asm("emms"); 00173 00174 #define SQRADD(i, j) \ 00175 asm( \ 00176 "movd %6,%%mm0 \n\t" \ 00177 "pmuludq %%mm0,%%mm0\n\t" \ 00178 "movd %%mm0,%%eax \n\t" \ 00179 "psrlq $32,%%mm0 \n\t" \ 00180 "addl %%eax,%0 \n\t" \ 00181 "movd %%mm0,%%eax \n\t" \ 00182 "adcl %%eax,%1 \n\t" \ 00183 "adcl $0,%2 \n\t" \ 00184 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc"); 00185 00186 #define SQRADD2(i, j) \ 00187 asm( \ 00188 "movd %6,%%mm0 \n\t" \ 00189 "movd %7,%%mm1 \n\t" \ 00190 "pmuludq %%mm1,%%mm0\n\t" \ 00191 "movd %%mm0,%%eax \n\t" \ 00192 "psrlq $32,%%mm0 \n\t" \ 00193 "movd %%mm0,%%edx \n\t" \ 00194 "addl %%eax,%0 \n\t" \ 00195 "adcl %%edx,%1 \n\t" \ 00196 "adcl $0,%2 \n\t" \ 00197 "addl %%eax,%0 \n\t" \ 00198 "adcl %%edx,%1 \n\t" \ 00199 "adcl $0,%2 \n\t" \ 00200 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00201 00202 #define SQRADDSC(i, j) \ 00203 asm( \ 00204 "movd %6,%%mm0 \n\t" \ 00205 "movd %7,%%mm1 \n\t" \ 00206 "pmuludq %%mm1,%%mm0\n\t" \ 00207 "movd %%mm0,%0 \n\t" \ 00208 "psrlq $32,%%mm0 \n\t" \ 00209 "movd %%mm0,%1 \n\t" \ 00210 "xorl %2,%2 \n\t" \ 00211 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j)); 00212 00213 #define SQRADDAC(i, j) \ 00214 asm( \ 00215 "movd %6,%%mm0 \n\t" \ 00216 "movd %7,%%mm1 \n\t" \ 00217 "pmuludq %%mm1,%%mm0\n\t" \ 00218 "movd %%mm0,%%eax \n\t" \ 00219 "psrlq $32,%%mm0 \n\t" \ 00220 "movd %%mm0,%%edx \n\t" \ 00221 "addl %%eax,%0 \n\t" \ 00222 "adcl %%edx,%1 \n\t" \ 00223 "adcl $0,%2 \n\t" \ 00224 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00225 00226 #define SQRADDDB \ 00227 asm( \ 00228 "addl %6,%0 \n\t" \ 00229 "adcl %7,%1 \n\t" \ 00230 "adcl %8,%2 \n\t" \ 00231 "addl %6,%0 \n\t" \ 00232 "adcl %7,%1 \n\t" \ 00233 "adcl %8,%2 \n\t" \ 00234 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); 00235 00236 #elif defined(TFM_ARM) 00237 00238 /* ARM code */ 00239 00240 #define COMBA_START 00241 00242 #define CLEAR_CARRY \ 00243 c0 = c1 = c2 = 0; 00244 00245 #define COMBA_STORE(x) \ 00246 x = c0; 00247 00248 #define COMBA_STORE2(x) \ 00249 x = c1; 00250 00251 #define CARRY_FORWARD \ 00252 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00253 00254 #define COMBA_FINI 00255 00256 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00257 #define SQRADD(i, j) \ 00258 asm( \ 00259 " UMULL r0,r1,%6,%6 \n\t" \ 00260 " ADDS %0,%0,r0 \n\t" \ 00261 " ADCS %1,%1,r1 \n\t" \ 00262 " ADC %2,%2,#0 \n\t" \ 00263 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); 00264 00265 /* for squaring some of the terms are doubled... */ 00266 #define SQRADD2(i, j) \ 00267 asm( \ 00268 " UMULL r0,r1,%6,%7 \n\t" \ 00269 " ADDS %0,%0,r0 \n\t" \ 00270 " ADCS %1,%1,r1 \n\t" \ 00271 " ADC %2,%2,#0 \n\t" \ 00272 " ADDS %0,%0,r0 \n\t" \ 00273 " ADCS %1,%1,r1 \n\t" \ 00274 " ADC %2,%2,#0 \n\t" \ 00275 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); 00276 00277 #define SQRADDSC(i, j) \ 00278 asm( \ 00279 " UMULL %0,%1,%6,%7 \n\t" \ 00280 " SUB %2,%2,%2 \n\t" \ 00281 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc"); 00282 00283 #define SQRADDAC(i, j) \ 00284 asm( \ 00285 " UMULL r0,r1,%6,%7 \n\t" \ 00286 " ADDS %0,%0,r0 \n\t" \ 00287 " ADCS %1,%1,r1 \n\t" \ 00288 " ADC %2,%2,#0 \n\t" \ 00289 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc"); 00290 00291 #define SQRADDDB \ 00292 asm( \ 00293 " ADDS %0,%0,%3 \n\t" \ 00294 " ADCS %1,%1,%4 \n\t" \ 00295 " ADC %2,%2,%5 \n\t" \ 00296 " ADDS %0,%0,%3 \n\t" \ 00297 " ADCS %1,%1,%4 \n\t" \ 00298 " ADC %2,%2,%5 \n\t" \ 00299 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00300 00301 #elif defined(TFM_PPC32) 00302 00303 /* PPC32 */ 00304 00305 #define COMBA_START 00306 00307 #define CLEAR_CARRY \ 00308 c0 = c1 = c2 = 0; 00309 00310 #define COMBA_STORE(x) \ 00311 x = c0; 00312 00313 #define COMBA_STORE2(x) \ 00314 x = c1; 00315 00316 #define CARRY_FORWARD \ 00317 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00318 00319 #define COMBA_FINI 00320 00321 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00322 #define SQRADD(i, j) \ 00323 asm( \ 00324 " mullw 16,%6,%6 \n\t" \ 00325 " addc %0,%0,16 \n\t" \ 00326 " mulhwu 16,%6,%6 \n\t" \ 00327 " adde %1,%1,16 \n\t" \ 00328 " addze %2,%2 \n\t" \ 00329 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc"); 00330 00331 /* for squaring some of the terms are doubled... */ 00332 #define SQRADD2(i, j) \ 00333 asm( \ 00334 " mullw 16,%6,%7 \n\t" \ 00335 " mulhwu 17,%6,%7 \n\t" \ 00336 " addc %0,%0,16 \n\t" \ 00337 " adde %1,%1,17 \n\t" \ 00338 " addze %2,%2 \n\t" \ 00339 " addc %0,%0,16 \n\t" \ 00340 " adde %1,%1,17 \n\t" \ 00341 " addze %2,%2 \n\t" \ 00342 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc"); 00343 00344 #define SQRADDSC(i, j) \ 00345 asm( \ 00346 " mullw %0,%6,%7 \n\t" \ 00347 " mulhwu %1,%6,%7 \n\t" \ 00348 " xor %2,%2,%2 \n\t" \ 00349 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); 00350 00351 #define SQRADDAC(i, j) \ 00352 asm( \ 00353 " mullw 16,%6,%7 \n\t" \ 00354 " addc %0,%0,16 \n\t" \ 00355 " mulhwu 16,%6,%7 \n\t" \ 00356 " adde %1,%1,16 \n\t" \ 00357 " addze %2,%2 \n\t" \ 00358 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc"); 00359 00360 #define SQRADDDB \ 00361 asm( \ 00362 " addc %0,%0,%3 \n\t" \ 00363 " adde %1,%1,%4 \n\t" \ 00364 " adde %2,%2,%5 \n\t" \ 00365 " addc %0,%0,%3 \n\t" \ 00366 " adde %1,%1,%4 \n\t" \ 00367 " adde %2,%2,%5 \n\t" \ 00368 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00369 00370 #elif defined(TFM_PPC64) 00371 /* PPC64 */ 00372 00373 #define COMBA_START 00374 00375 #define CLEAR_CARRY \ 00376 c0 = c1 = c2 = 0; 00377 00378 #define COMBA_STORE(x) \ 00379 x = c0; 00380 00381 #define COMBA_STORE2(x) \ 00382 x = c1; 00383 00384 #define CARRY_FORWARD \ 00385 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00386 00387 #define COMBA_FINI 00388 00389 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00390 #define SQRADD(i, j) \ 00391 asm( \ 00392 " mulld r16,%6,%6 \n\t" \ 00393 " addc %0,%0,r16 \n\t" \ 00394 " mulhdu r16,%6,%6 \n\t" \ 00395 " adde %1,%1,r16 \n\t" \ 00396 " addze %2,%2 \n\t" \ 00397 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","%cc"); 00398 00399 /* for squaring some of the terms are doubled... */ 00400 #define SQRADD2(i, j) \ 00401 asm( \ 00402 " mulld r16,%6,%7 \n\t" \ 00403 " mulhdu r17,%6,%7 \n\t" \ 00404 " addc %0,%0,r16 \n\t" \ 00405 " adde %1,%1,r17 \n\t" \ 00406 " addze %2,%2 \n\t" \ 00407 " addc %0,%0,r16 \n\t" \ 00408 " adde %1,%1,r17 \n\t" \ 00409 " addze %2,%2 \n\t" \ 00410 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","%cc"); 00411 00412 #define SQRADDSC(i, j) \ 00413 asm( \ 00414 " mulld %0,%6,%7 \n\t" \ 00415 " mulhdu %1,%6,%7 \n\t" \ 00416 " xor %2,%2,%2 \n\t" \ 00417 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); 00418 00419 #define SQRADDAC(i, j) \ 00420 asm( \ 00421 " mulld r16,%6,%7 \n\t" \ 00422 " addc %0,%0,r16 \n\t" \ 00423 " mulhdu r16,%6,%7 \n\t" \ 00424 " adde %1,%1,r16 \n\t" \ 00425 " addze %2,%2 \n\t" \ 00426 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "%cc"); 00427 00428 #define SQRADDDB \ 00429 asm( \ 00430 " addc %0,%0,%3 \n\t" \ 00431 " adde %1,%1,%4 \n\t" \ 00432 " adde %2,%2,%5 \n\t" \ 00433 " addc %0,%0,%3 \n\t" \ 00434 " adde %1,%1,%4 \n\t" \ 00435 " adde %2,%2,%5 \n\t" \ 00436 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00437 00438 00439 #elif defined(TFM_AVR32) 00440 00441 /* AVR32 */ 00442 00443 #define COMBA_START 00444 00445 #define CLEAR_CARRY \ 00446 c0 = c1 = c2 = 0; 00447 00448 #define COMBA_STORE(x) \ 00449 x = c0; 00450 00451 #define COMBA_STORE2(x) \ 00452 x = c1; 00453 00454 #define CARRY_FORWARD \ 00455 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00456 00457 #define COMBA_FINI 00458 00459 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00460 #define SQRADD(i, j) \ 00461 asm( \ 00462 " mulu.d r2,%6,%6 \n\t" \ 00463 " add %0,%0,r2 \n\t" \ 00464 " adc %1,%1,r3 \n\t" \ 00465 " acr %2 \n\t" \ 00466 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); 00467 00468 /* for squaring some of the terms are doubled... */ 00469 #define SQRADD2(i, j) \ 00470 asm( \ 00471 " mulu.d r2,%6,%7 \n\t" \ 00472 " add %0,%0,r2 \n\t" \ 00473 " adc %1,%1,r3 \n\t" \ 00474 " acr %2, \n\t" \ 00475 " add %0,%0,r2 \n\t" \ 00476 " adc %1,%1,r3 \n\t" \ 00477 " acr %2, \n\t" \ 00478 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); 00479 00480 #define SQRADDSC(i, j) \ 00481 asm( \ 00482 " mulu.d r2,%6,%7 \n\t" \ 00483 " mov %0,r2 \n\t" \ 00484 " mov %1,r3 \n\t" \ 00485 " eor %2,%2 \n\t" \ 00486 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); 00487 00488 #define SQRADDAC(i, j) \ 00489 asm( \ 00490 " mulu.d r2,%6,%7 \n\t" \ 00491 " add %0,%0,r2 \n\t" \ 00492 " adc %1,%1,r3 \n\t" \ 00493 " acr %2 \n\t" \ 00494 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); 00495 00496 #define SQRADDDB \ 00497 asm( \ 00498 " add %0,%0,%3 \n\t" \ 00499 " adc %1,%1,%4 \n\t" \ 00500 " adc %2,%2,%5 \n\t" \ 00501 " add %0,%0,%3 \n\t" \ 00502 " adc %1,%1,%4 \n\t" \ 00503 " adc %2,%2,%5 \n\t" \ 00504 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); 00505 00506 #elif defined(TFM_MIPS) 00507 00508 /* MIPS */ 00509 00510 #define COMBA_START 00511 00512 #define CLEAR_CARRY \ 00513 c0 = c1 = c2 = 0; 00514 00515 #define COMBA_STORE(x) \ 00516 x = c0; 00517 00518 #define COMBA_STORE2(x) \ 00519 x = c1; 00520 00521 #define CARRY_FORWARD \ 00522 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00523 00524 #define COMBA_FINI 00525 00526 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00527 #define SQRADD(i, j) \ 00528 asm( \ 00529 " multu %6,%6 \n\t" \ 00530 " mflo $12 \n\t" \ 00531 " mfhi $13 \n\t" \ 00532 " addu %0,%0,$12 \n\t" \ 00533 " sltu $12,%0,$12 \n\t" \ 00534 " addu %1,%1,$13 \n\t" \ 00535 " sltu $13,%1,$13 \n\t" \ 00536 " addu %1,%1,$12 \n\t" \ 00537 " sltu $12,%1,$12 \n\t" \ 00538 " addu %2,%2,$13 \n\t" \ 00539 " addu %2,%2,$12 \n\t" \ 00540 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13"); 00541 00542 /* for squaring some of the terms are doubled... */ 00543 #define SQRADD2(i, j) \ 00544 asm( \ 00545 " multu %6,%7 \n\t" \ 00546 " mflo $12 \n\t" \ 00547 " mfhi $13 \n\t" \ 00548 \ 00549 " addu %0,%0,$12 \n\t" \ 00550 " sltu $14,%0,$12 \n\t" \ 00551 " addu %1,%1,$13 \n\t" \ 00552 " sltu $15,%1,$13 \n\t" \ 00553 " addu %1,%1,$14 \n\t" \ 00554 " sltu $14,%1,$14 \n\t" \ 00555 " addu %2,%2,$15 \n\t" \ 00556 " addu %2,%2,$14 \n\t" \ 00557 \ 00558 " addu %0,%0,$12 \n\t" \ 00559 " sltu $14,%0,$12 \n\t" \ 00560 " addu %1,%1,$13 \n\t" \ 00561 " sltu $15,%1,$13 \n\t" \ 00562 " addu %1,%1,$14 \n\t" \ 00563 " sltu $14,%1,$14 \n\t" \ 00564 " addu %2,%2,$15 \n\t" \ 00565 " addu %2,%2,$14 \n\t" \ 00566 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15"); 00567 00568 #define SQRADDSC(i, j) \ 00569 asm( \ 00570 " multu %6,%7 \n\t" \ 00571 " mflo %0 \n\t" \ 00572 " mfhi %1 \n\t" \ 00573 " xor %2,%2,%2 \n\t" \ 00574 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); 00575 00576 #define SQRADDAC(i, j) \ 00577 asm( \ 00578 " multu %6,%7 \n\t" \ 00579 " mflo $12 \n\t" \ 00580 " mfhi $13 \n\t" \ 00581 " addu %0,%0,$12 \n\t" \ 00582 " sltu $12,%0,$12 \n\t" \ 00583 " addu %1,%1,$13 \n\t" \ 00584 " sltu $13,%1,$13 \n\t" \ 00585 " addu %1,%1,$12 \n\t" \ 00586 " sltu $12,%1,$12 \n\t" \ 00587 " addu %2,%2,$13 \n\t" \ 00588 " addu %2,%2,$12 \n\t" \ 00589 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14"); 00590 00591 #define SQRADDDB \ 00592 asm( \ 00593 " addu %0,%0,%3 \n\t" \ 00594 " sltu $10,%0,%3 \n\t" \ 00595 " addu %1,%1,$10 \n\t" \ 00596 " sltu $10,%1,$10 \n\t" \ 00597 " addu %1,%1,%4 \n\t" \ 00598 " sltu $11,%1,%4 \n\t" \ 00599 " addu %2,%2,$10 \n\t" \ 00600 " addu %2,%2,$11 \n\t" \ 00601 " addu %2,%2,%5 \n\t" \ 00602 \ 00603 " addu %0,%0,%3 \n\t" \ 00604 " sltu $10,%0,%3 \n\t" \ 00605 " addu %1,%1,$10 \n\t" \ 00606 " sltu $10,%1,$10 \n\t" \ 00607 " addu %1,%1,%4 \n\t" \ 00608 " sltu $11,%1,%4 \n\t" \ 00609 " addu %2,%2,$10 \n\t" \ 00610 " addu %2,%2,$11 \n\t" \ 00611 " addu %2,%2,%5 \n\t" \ 00612 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11"); 00613 00614 #else 00615 00616 #define TFM_ISO 00617 00618 /* ISO C portable code */ 00619 00620 #define COMBA_START 00621 00622 #define CLEAR_CARRY \ 00623 c0 = c1 = c2 = 0; 00624 00625 #define COMBA_STORE(x) \ 00626 x = c0; 00627 00628 #define COMBA_STORE2(x) \ 00629 x = c1; 00630 00631 #define CARRY_FORWARD \ 00632 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00633 00634 #define COMBA_FINI 00635 00636 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00637 #define SQRADD(i, j) \ 00638 do { fp_word t; \ 00639 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ 00640 t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ 00641 } while (0); 00642 00643 00644 /* for squaring some of the terms are doubled... */ 00645 #define SQRADD2(i, j) \ 00646 do { fp_word t; \ 00647 t = ((fp_word)i) * ((fp_word)j); \ 00648 tt = (fp_word)c0 + t; c0 = tt; \ 00649 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ 00650 tt = (fp_word)c0 + t; c0 = tt; \ 00651 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ 00652 } while (0); 00653 00654 #define SQRADDSC(i, j) \ 00655 do { fp_word t; \ 00656 t = ((fp_word)i) * ((fp_word)j); \ 00657 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ 00658 } while (0); 00659 00660 #define SQRADDAC(i, j) \ 00661 do { fp_word t; \ 00662 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \ 00663 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \ 00664 } while (0); 00665 00666 #define SQRADDDB \ 00667 do { fp_word t; \ 00668 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \ 00669 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \ 00670 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \ 00671 } while (0); 00672 00673 #endif 00674 00675 /* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba.c,v $ */ 00676 /* $Revision: 1.4 $ */ 00677 /* $Date: 2007/03/14 23:47:42 $ */
Generated on Wed Jul 13 2022 00:22:54 by
1.7.2
