Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Dependents: HTTPClient-SSL HTTPClient HTTPClient-SSL http_access ... more
asm.c
00001 /* asm.c 00002 * 00003 * Copyright (C) 2006-2014 wolfSSL Inc. 00004 * 00005 * This file is part of CyaSSL. 00006 * 00007 * CyaSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * CyaSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 00020 */ 00021 00022 #ifdef HAVE_CONFIG_H 00023 #include <config.h> 00024 #endif 00025 00026 #include <cyassl/ctaocrypt/settings.h> 00027 00028 /* 00029 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca, 00030 * http://math.libtomcrypt.com 00031 */ 00032 00033 00034 /******************************************************************/ 00035 /* fp_montgomery_reduce.c asm or generic */ 00036 #if defined(TFM_X86) && !defined(TFM_SSE2) 00037 /* x86-32 code */ 00038 00039 #define MONT_START 00040 #define MONT_FINI 00041 #define LOOP_END 00042 #define LOOP_START \ 00043 mu = c[x] * mp 00044 00045 #define INNERMUL \ 00046 __asm__( \ 00047 "movl %5,%%eax \n\t" \ 00048 "mull %4 \n\t" \ 00049 "addl %1,%%eax \n\t" \ 00050 "adcl $0,%%edx \n\t" \ 00051 "addl %%eax,%0 \n\t" \ 00052 "adcl $0,%%edx \n\t" \ 00053 "movl %%edx,%1 \n\t" \ 00054 :"=g"(_c[LO]), "=r"(cy) \ 00055 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ 00056 : "%eax", "%edx", "cc") 00057 00058 #define PROPCARRY \ 00059 __asm__( \ 00060 "addl %1,%0 \n\t" \ 00061 "setb %%al \n\t" \ 00062 "movzbl %%al,%1 \n\t" \ 00063 :"=g"(_c[LO]), "=r"(cy) \ 00064 :"0"(_c[LO]), "1"(cy) \ 00065 : "%eax", "cc") 00066 00067 /******************************************************************/ 00068 #elif defined(TFM_X86_64) 00069 /* x86-64 code */ 00070 00071 #define MONT_START 00072 #define MONT_FINI 00073 #define LOOP_END 00074 #define LOOP_START \ 00075 mu = c[x] * mp 00076 00077 #define INNERMUL \ 00078 __asm__( \ 00079 "movq %5,%%rax \n\t" \ 00080 "mulq %4 \n\t" \ 00081 "addq %1,%%rax \n\t" \ 00082 "adcq $0,%%rdx \n\t" \ 00083 "addq %%rax,%0 \n\t" \ 00084 "adcq $0,%%rdx \n\t" \ 00085 "movq %%rdx,%1 \n\t" \ 00086 :"=g"(_c[LO]), "=r"(cy) \ 00087 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ 00088 : "%rax", "%rdx", "cc") 00089 00090 #define INNERMUL8 \ 00091 __asm__( \ 00092 "movq 0(%5),%%rax \n\t" \ 00093 "movq 0(%2),%%r10 \n\t" \ 00094 "movq 0x8(%5),%%r11 \n\t" \ 00095 "mulq %4 \n\t" \ 00096 "addq %%r10,%%rax \n\t" \ 00097 "adcq $0,%%rdx \n\t" \ 00098 "movq 0x8(%2),%%r10 \n\t" \ 00099 "addq %3,%%rax \n\t" \ 00100 "adcq $0,%%rdx \n\t" \ 00101 "movq %%rax,0(%0) \n\t" \ 00102 "movq %%rdx,%1 \n\t" \ 00103 \ 00104 "movq %%r11,%%rax \n\t" \ 00105 "movq 0x10(%5),%%r11 \n\t" \ 00106 "mulq %4 \n\t" \ 00107 "addq %%r10,%%rax \n\t" \ 00108 "adcq $0,%%rdx \n\t" \ 00109 "movq 0x10(%2),%%r10 \n\t" \ 00110 "addq %3,%%rax \n\t" \ 00111 "adcq $0,%%rdx \n\t" \ 00112 "movq %%rax,0x8(%0) \n\t" \ 00113 "movq %%rdx,%1 \n\t" \ 00114 \ 00115 "movq %%r11,%%rax \n\t" \ 00116 "movq 0x18(%5),%%r11 \n\t" \ 00117 "mulq %4 \n\t" \ 00118 "addq %%r10,%%rax \n\t" \ 00119 "adcq $0,%%rdx \n\t" \ 00120 "movq 0x18(%2),%%r10 \n\t" \ 00121 "addq %3,%%rax \n\t" \ 00122 "adcq $0,%%rdx \n\t" \ 00123 "movq %%rax,0x10(%0) \n\t" \ 00124 "movq %%rdx,%1 \n\t" \ 00125 \ 00126 "movq %%r11,%%rax \n\t" \ 00127 "movq 0x20(%5),%%r11 \n\t" \ 00128 "mulq %4 \n\t" \ 00129 "addq %%r10,%%rax \n\t" \ 00130 "adcq $0,%%rdx \n\t" \ 00131 "movq 0x20(%2),%%r10 \n\t" \ 00132 "addq %3,%%rax \n\t" \ 00133 "adcq $0,%%rdx \n\t" \ 00134 "movq %%rax,0x18(%0) \n\t" \ 00135 "movq %%rdx,%1 \n\t" \ 00136 \ 00137 "movq %%r11,%%rax \n\t" \ 00138 "movq 0x28(%5),%%r11 \n\t" \ 00139 "mulq %4 \n\t" \ 00140 "addq %%r10,%%rax \n\t" \ 00141 "adcq $0,%%rdx \n\t" \ 00142 "movq 0x28(%2),%%r10 \n\t" \ 00143 "addq %3,%%rax \n\t" \ 00144 "adcq $0,%%rdx \n\t" \ 00145 "movq %%rax,0x20(%0) \n\t" \ 00146 "movq %%rdx,%1 \n\t" \ 00147 \ 00148 "movq %%r11,%%rax \n\t" \ 00149 "movq 0x30(%5),%%r11 \n\t" \ 00150 "mulq %4 \n\t" \ 00151 "addq %%r10,%%rax \n\t" \ 00152 "adcq $0,%%rdx \n\t" \ 00153 "movq 0x30(%2),%%r10 \n\t" \ 00154 "addq %3,%%rax \n\t" \ 00155 "adcq $0,%%rdx \n\t" \ 00156 "movq %%rax,0x28(%0) \n\t" \ 00157 "movq %%rdx,%1 \n\t" \ 00158 \ 00159 "movq %%r11,%%rax \n\t" \ 00160 "movq 0x38(%5),%%r11 \n\t" \ 00161 "mulq %4 \n\t" \ 00162 "addq %%r10,%%rax \n\t" \ 00163 "adcq $0,%%rdx \n\t" \ 00164 "movq 0x38(%2),%%r10 \n\t" \ 00165 "addq %3,%%rax \n\t" \ 00166 "adcq $0,%%rdx \n\t" \ 00167 "movq %%rax,0x30(%0) \n\t" \ 00168 "movq %%rdx,%1 \n\t" \ 00169 \ 00170 "movq %%r11,%%rax \n\t" \ 00171 "mulq %4 \n\t" \ 00172 "addq %%r10,%%rax \n\t" \ 00173 "adcq $0,%%rdx \n\t" \ 00174 "addq %3,%%rax \n\t" \ 00175 "adcq $0,%%rdx \n\t" \ 00176 "movq %%rax,0x38(%0) \n\t" \ 00177 "movq %%rdx,%1 \n\t" \ 00178 \ 00179 :"=r"(_c), "=r"(cy) \ 00180 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ 00181 : "%rax", "%rdx", "%r10", "%r11", "cc") 00182 00183 00184 #define PROPCARRY \ 00185 __asm__( \ 00186 "addq %1,%0 \n\t" \ 00187 "setb %%al \n\t" \ 00188 "movzbq %%al,%1 \n\t" \ 00189 :"=g"(_c[LO]), "=r"(cy) \ 00190 :"0"(_c[LO]), "1"(cy) \ 00191 : "%rax", "cc") 00192 00193 /******************************************************************/ 00194 #elif defined(TFM_SSE2) 00195 /* SSE2 code (assumes 32-bit fp_digits) */ 00196 /* XMM register assignments: 00197 * xmm0 *tmpm++, then Mu * (*tmpm++) 00198 * xmm1 c[x], then Mu 00199 * xmm2 mp 00200 * xmm3 cy 00201 * xmm4 _c[LO] 00202 */ 00203 00204 #define MONT_START \ 00205 __asm__("movd %0,%%mm2"::"g"(mp)) 00206 00207 #define MONT_FINI \ 00208 __asm__("emms") 00209 00210 #define LOOP_START \ 00211 __asm__( \ 00212 "movd %0,%%mm1 \n\t" \ 00213 "pxor %%mm3,%%mm3 \n\t" \ 00214 "pmuludq %%mm2,%%mm1 \n\t" \ 00215 :: "g"(c[x])) 00216 00217 /* pmuludq on mmx registers does a 32x32->64 multiply. */ 00218 #define INNERMUL \ 00219 __asm__( \ 00220 "movd %1,%%mm4 \n\t" \ 00221 "movd %2,%%mm0 \n\t" \ 00222 "paddq %%mm4,%%mm3 \n\t" \ 00223 "pmuludq %%mm1,%%mm0 \n\t" \ 00224 "paddq %%mm0,%%mm3 \n\t" \ 00225 "movd %%mm3,%0 \n\t" \ 00226 "psrlq $32, %%mm3 \n\t" \ 00227 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) ); 00228 00229 #define INNERMUL8 \ 00230 __asm__( \ 00231 "movd 0(%1),%%mm4 \n\t" \ 00232 "movd 0(%2),%%mm0 \n\t" \ 00233 "paddq %%mm4,%%mm3 \n\t" \ 00234 "pmuludq %%mm1,%%mm0 \n\t" \ 00235 "movd 4(%2),%%mm5 \n\t" \ 00236 "paddq %%mm0,%%mm3 \n\t" \ 00237 "movd 4(%1),%%mm6 \n\t" \ 00238 "movd %%mm3,0(%0) \n\t" \ 00239 "psrlq $32, %%mm3 \n\t" \ 00240 \ 00241 "paddq %%mm6,%%mm3 \n\t" \ 00242 "pmuludq %%mm1,%%mm5 \n\t" \ 00243 "movd 8(%2),%%mm6 \n\t" \ 00244 "paddq %%mm5,%%mm3 \n\t" \ 00245 "movd 8(%1),%%mm7 \n\t" \ 00246 "movd %%mm3,4(%0) \n\t" \ 00247 "psrlq $32, %%mm3 \n\t" \ 00248 \ 00249 "paddq %%mm7,%%mm3 \n\t" \ 00250 "pmuludq %%mm1,%%mm6 \n\t" \ 00251 "movd 12(%2),%%mm7 \n\t" \ 00252 "paddq %%mm6,%%mm3 \n\t" \ 00253 "movd 12(%1),%%mm5 \n\t" \ 00254 "movd %%mm3,8(%0) \n\t" \ 00255 "psrlq $32, %%mm3 \n\t" \ 00256 \ 00257 "paddq %%mm5,%%mm3 \n\t" \ 00258 "pmuludq %%mm1,%%mm7 \n\t" \ 00259 "movd 16(%2),%%mm5 \n\t" \ 00260 "paddq %%mm7,%%mm3 \n\t" \ 00261 "movd 16(%1),%%mm6 \n\t" \ 00262 "movd %%mm3,12(%0) \n\t" \ 00263 "psrlq $32, %%mm3 \n\t" \ 00264 \ 00265 "paddq %%mm6,%%mm3 \n\t" \ 00266 "pmuludq %%mm1,%%mm5 \n\t" \ 00267 "movd 20(%2),%%mm6 \n\t" \ 00268 "paddq %%mm5,%%mm3 \n\t" \ 00269 "movd 20(%1),%%mm7 \n\t" \ 00270 "movd %%mm3,16(%0) \n\t" \ 00271 "psrlq $32, %%mm3 \n\t" \ 00272 \ 00273 "paddq %%mm7,%%mm3 \n\t" \ 00274 "pmuludq %%mm1,%%mm6 \n\t" \ 00275 "movd 24(%2),%%mm7 \n\t" \ 00276 "paddq %%mm6,%%mm3 \n\t" \ 00277 "movd 24(%1),%%mm5 \n\t" \ 00278 "movd %%mm3,20(%0) \n\t" \ 00279 "psrlq $32, %%mm3 \n\t" \ 00280 \ 00281 "paddq %%mm5,%%mm3 \n\t" \ 00282 "pmuludq %%mm1,%%mm7 \n\t" \ 00283 "movd 28(%2),%%mm5 \n\t" \ 00284 "paddq %%mm7,%%mm3 \n\t" \ 00285 "movd 28(%1),%%mm6 \n\t" \ 00286 "movd %%mm3,24(%0) \n\t" \ 00287 "psrlq $32, %%mm3 \n\t" \ 00288 \ 00289 "paddq %%mm6,%%mm3 \n\t" \ 00290 "pmuludq %%mm1,%%mm5 \n\t" \ 00291 "paddq %%mm5,%%mm3 \n\t" \ 00292 "movd %%mm3,28(%0) \n\t" \ 00293 "psrlq $32, %%mm3 \n\t" \ 00294 :"=r"(_c) : "0"(_c), "r"(tmpm) ); 00295 00296 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack 00297 pointer */ 00298 00299 #define LOOP_END \ 00300 __asm__( "movd %%mm3,%0 \n" :"=r"(cy)) 00301 00302 #define PROPCARRY \ 00303 __asm__( \ 00304 "addl %1,%0 \n\t" \ 00305 "setb %%al \n\t" \ 00306 "movzbl %%al,%1 \n\t" \ 00307 :"=g"(_c[LO]), "=r"(cy) \ 00308 :"0"(_c[LO]), "1"(cy) \ 00309 : "%eax", "cc") 00310 00311 /******************************************************************/ 00312 #elif defined(TFM_ARM) 00313 /* ARMv4 code */ 00314 00315 #define MONT_START 00316 #define MONT_FINI 00317 #define LOOP_END 00318 #define LOOP_START \ 00319 mu = c[x] * mp 00320 00321 00322 #ifdef __thumb__ 00323 00324 #define INNERMUL \ 00325 __asm__( \ 00326 " LDR r0,%1 \n\t" \ 00327 " ADDS r0,r0,%0 \n\t" \ 00328 " ITE CS \n\t" \ 00329 " MOVCS %0,#1 \n\t" \ 00330 " MOVCC %0,#0 \n\t" \ 00331 " UMLAL r0,%0,%3,%4 \n\t" \ 00332 " STR r0,%1 \n\t" \ 00333 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc"); 00334 00335 #define PROPCARRY \ 00336 __asm__( \ 00337 " LDR r0,%1 \n\t" \ 00338 " ADDS r0,r0,%0 \n\t" \ 00339 " STR r0,%1 \n\t" \ 00340 " ITE CS \n\t" \ 00341 " MOVCS %0,#1 \n\t" \ 00342 " MOVCC %0,#0 \n\t" \ 00343 :"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc"); 00344 00345 00346 /* TAO thumb mode uses ite (if then else) to detect carry directly 00347 * fixed unmatched constraint warning by changing 1 to m */ 00348 00349 #else /* __thumb__ */ 00350 00351 #define INNERMUL \ 00352 __asm__( \ 00353 " LDR r0,%1 \n\t" \ 00354 " ADDS r0,r0,%0 \n\t" \ 00355 " MOVCS %0,#1 \n\t" \ 00356 " MOVCC %0,#0 \n\t" \ 00357 " UMLAL r0,%0,%3,%4 \n\t" \ 00358 " STR r0,%1 \n\t" \ 00359 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc"); 00360 00361 #define PROPCARRY \ 00362 __asm__( \ 00363 " LDR r0,%1 \n\t" \ 00364 " ADDS r0,r0,%0 \n\t" \ 00365 " STR r0,%1 \n\t" \ 00366 " MOVCS %0,#1 \n\t" \ 00367 " MOVCC %0,#0 \n\t" \ 00368 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc"); 00369 00370 #endif /* __thumb__ */ 00371 00372 #elif defined(TFM_PPC32) 00373 00374 /* PPC32 */ 00375 #define MONT_START 00376 #define MONT_FINI 00377 #define LOOP_END 00378 #define LOOP_START \ 00379 mu = c[x] * mp 00380 00381 #define INNERMUL \ 00382 __asm__( \ 00383 " mullw 16,%3,%4 \n\t" \ 00384 " mulhwu 17,%3,%4 \n\t" \ 00385 " addc 16,16,%0 \n\t" \ 00386 " addze 17,17 \n\t" \ 00387 " lwz 18,%1 \n\t" \ 00388 " addc 16,16,18 \n\t" \ 00389 " addze %0,17 \n\t" \ 00390 " stw 16,%1 \n\t" \ 00391 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm; 00392 00393 #define PROPCARRY \ 00394 __asm__( \ 00395 " lwz 16,%1 \n\t" \ 00396 " addc 16,16,%0 \n\t" \ 00397 " stw 16,%1 \n\t" \ 00398 " xor %0,%0,%0 \n\t" \ 00399 " addze %0,%0 \n\t" \ 00400 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc"); 00401 00402 #elif defined(TFM_PPC64) 00403 00404 /* PPC64 */ 00405 #define MONT_START 00406 #define MONT_FINI 00407 #define LOOP_END 00408 #define LOOP_START \ 00409 mu = c[x] * mp 00410 00411 #define INNERMUL \ 00412 __asm__( \ 00413 " mulld 16,%3,%4 \n\t" \ 00414 " mulhdu 17,%3,%4 \n\t" \ 00415 " addc 16,16,%0 \n\t" \ 00416 " addze 17,17 \n\t" \ 00417 " ldx 18,0,%1 \n\t" \ 00418 " addc 16,16,18 \n\t" \ 00419 " addze %0,17 \n\t" \ 00420 " sdx 16,0,%1 \n\t" \ 00421 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm; 00422 00423 #define PROPCARRY \ 00424 __asm__( \ 00425 " ldx 16,0,%1 \n\t" \ 00426 " addc 16,16,%0 \n\t" \ 00427 " sdx 16,0,%1 \n\t" \ 00428 " xor %0,%0,%0 \n\t" \ 00429 " addze %0,%0 \n\t" \ 00430 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc"); 00431 00432 /******************************************************************/ 00433 00434 #elif defined(TFM_AVR32) 00435 00436 /* AVR32 */ 00437 #define MONT_START 00438 #define MONT_FINI 00439 #define LOOP_END 00440 #define LOOP_START \ 00441 mu = c[x] * mp 00442 00443 #define INNERMUL \ 00444 __asm__( \ 00445 " ld.w r2,%1 \n\t" \ 00446 " add r2,%0 \n\t" \ 00447 " eor r3,r3 \n\t" \ 00448 " acr r3 \n\t" \ 00449 " macu.d r2,%3,%4 \n\t" \ 00450 " st.w %1,r2 \n\t" \ 00451 " mov %0,r3 \n\t" \ 00452 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3"); 00453 00454 #define PROPCARRY \ 00455 __asm__( \ 00456 " ld.w r2,%1 \n\t" \ 00457 " add r2,%0 \n\t" \ 00458 " st.w %1,r2 \n\t" \ 00459 " eor %0,%0 \n\t" \ 00460 " acr %0 \n\t" \ 00461 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc"); 00462 00463 #else 00464 00465 /* ISO C code */ 00466 #define MONT_START 00467 #define MONT_FINI 00468 #define LOOP_END 00469 #define LOOP_START \ 00470 mu = c[x] * mp 00471 00472 #define INNERMUL \ 00473 do { fp_word t; \ 00474 t = ((fp_word)_c[0] + (fp_word)cy) + \ 00475 (((fp_word)mu) * ((fp_word)*tmpm++)); \ 00476 _c[0] = (fp_digit)t; \ 00477 cy = (fp_digit)(t >> DIGIT_BIT); \ 00478 } while (0) 00479 00480 #define PROPCARRY \ 00481 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0) 00482 00483 #endif 00484 /******************************************************************/ 00485 00486 00487 #define LO 0 00488 /* end fp_montogomery_reduce.c asm */ 00489 00490 00491 /* start fp_sqr_comba.c asm */ 00492 #if defined(TFM_X86) 00493 00494 /* x86-32 optimized */ 00495 00496 #define COMBA_START 00497 00498 #define CLEAR_CARRY \ 00499 c0 = c1 = c2 = 0; 00500 00501 #define COMBA_STORE(x) \ 00502 x = c0; 00503 00504 #define COMBA_STORE2(x) \ 00505 x = c1; 00506 00507 #define CARRY_FORWARD \ 00508 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00509 00510 #define COMBA_FINI 00511 00512 #define SQRADD(i, j) \ 00513 __asm__( \ 00514 "movl %6,%%eax \n\t" \ 00515 "mull %%eax \n\t" \ 00516 "addl %%eax,%0 \n\t" \ 00517 "adcl %%edx,%1 \n\t" \ 00518 "adcl $0,%2 \n\t" \ 00519 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc"); 00520 00521 #define SQRADD2(i, j) \ 00522 __asm__( \ 00523 "movl %6,%%eax \n\t" \ 00524 "mull %7 \n\t" \ 00525 "addl %%eax,%0 \n\t" \ 00526 "adcl %%edx,%1 \n\t" \ 00527 "adcl $0,%2 \n\t" \ 00528 "addl %%eax,%0 \n\t" \ 00529 "adcl %%edx,%1 \n\t" \ 00530 "adcl $0,%2 \n\t" \ 00531 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc"); 00532 00533 #define SQRADDSC(i, j) \ 00534 __asm__( \ 00535 "movl %3,%%eax \n\t" \ 00536 "mull %4 \n\t" \ 00537 "movl %%eax,%0 \n\t" \ 00538 "movl %%edx,%1 \n\t" \ 00539 "xorl %2,%2 \n\t" \ 00540 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc"); 00541 00542 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00543 00544 #define SQRADDAC(i, j) \ 00545 __asm__( \ 00546 "movl %6,%%eax \n\t" \ 00547 "mull %7 \n\t" \ 00548 "addl %%eax,%0 \n\t" \ 00549 "adcl %%edx,%1 \n\t" \ 00550 "adcl $0,%2 \n\t" \ 00551 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc"); 00552 00553 #define SQRADDDB \ 00554 __asm__( \ 00555 "addl %6,%0 \n\t" \ 00556 "adcl %7,%1 \n\t" \ 00557 "adcl %8,%2 \n\t" \ 00558 "addl %6,%0 \n\t" \ 00559 "adcl %7,%1 \n\t" \ 00560 "adcl %8,%2 \n\t" \ 00561 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00562 00563 #elif defined(TFM_X86_64) 00564 /* x86-64 optimized */ 00565 00566 #define COMBA_START 00567 00568 #define CLEAR_CARRY \ 00569 c0 = c1 = c2 = 0; 00570 00571 #define COMBA_STORE(x) \ 00572 x = c0; 00573 00574 #define COMBA_STORE2(x) \ 00575 x = c1; 00576 00577 #define CARRY_FORWARD \ 00578 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00579 00580 #define COMBA_FINI 00581 00582 #define SQRADD(i, j) \ 00583 __asm__( \ 00584 "movq %6,%%rax \n\t" \ 00585 "mulq %%rax \n\t" \ 00586 "addq %%rax,%0 \n\t" \ 00587 "adcq %%rdx,%1 \n\t" \ 00588 "adcq $0,%2 \n\t" \ 00589 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc"); 00590 00591 #define SQRADD2(i, j) \ 00592 __asm__( \ 00593 "movq %6,%%rax \n\t" \ 00594 "mulq %7 \n\t" \ 00595 "addq %%rax,%0 \n\t" \ 00596 "adcq %%rdx,%1 \n\t" \ 00597 "adcq $0,%2 \n\t" \ 00598 "addq %%rax,%0 \n\t" \ 00599 "adcq %%rdx,%1 \n\t" \ 00600 "adcq $0,%2 \n\t" \ 00601 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 00602 00603 #define SQRADDSC(i, j) \ 00604 __asm__( \ 00605 "movq %3,%%rax \n\t" \ 00606 "mulq %4 \n\t" \ 00607 "movq %%rax,%0 \n\t" \ 00608 "movq %%rdx,%1 \n\t" \ 00609 "xorq %2,%2 \n\t" \ 00610 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc"); 00611 00612 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00613 00614 #define SQRADDAC(i, j) \ 00615 __asm__( \ 00616 "movq %6,%%rax \n\t" \ 00617 "mulq %7 \n\t" \ 00618 "addq %%rax,%0 \n\t" \ 00619 "adcq %%rdx,%1 \n\t" \ 00620 "adcq $0,%2 \n\t" \ 00621 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 00622 00623 #define SQRADDDB \ 00624 __asm__( \ 00625 "addq %6,%0 \n\t" \ 00626 "adcq %7,%1 \n\t" \ 00627 "adcq %8,%2 \n\t" \ 00628 "addq %6,%0 \n\t" \ 00629 "adcq %7,%1 \n\t" \ 00630 "adcq %8,%2 \n\t" \ 00631 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00632 00633 #elif defined(TFM_SSE2) 00634 00635 /* SSE2 Optimized */ 00636 #define COMBA_START 00637 00638 #define CLEAR_CARRY \ 00639 c0 = c1 = c2 = 0; 00640 00641 #define COMBA_STORE(x) \ 00642 x = c0; 00643 00644 #define COMBA_STORE2(x) \ 00645 x = c1; 00646 00647 #define CARRY_FORWARD \ 00648 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00649 00650 #define COMBA_FINI \ 00651 __asm__("emms"); 00652 00653 #define SQRADD(i, j) \ 00654 __asm__( \ 00655 "movd %6,%%mm0 \n\t" \ 00656 "pmuludq %%mm0,%%mm0\n\t" \ 00657 "movd %%mm0,%%eax \n\t" \ 00658 "psrlq $32,%%mm0 \n\t" \ 00659 "addl %%eax,%0 \n\t" \ 00660 "movd %%mm0,%%eax \n\t" \ 00661 "adcl %%eax,%1 \n\t" \ 00662 "adcl $0,%2 \n\t" \ 00663 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc"); 00664 00665 #define SQRADD2(i, j) \ 00666 __asm__( \ 00667 "movd %6,%%mm0 \n\t" \ 00668 "movd %7,%%mm1 \n\t" \ 00669 "pmuludq %%mm1,%%mm0\n\t" \ 00670 "movd %%mm0,%%eax \n\t" \ 00671 "psrlq $32,%%mm0 \n\t" \ 00672 "movd %%mm0,%%edx \n\t" \ 00673 "addl %%eax,%0 \n\t" \ 00674 "adcl %%edx,%1 \n\t" \ 00675 "adcl $0,%2 \n\t" \ 00676 "addl %%eax,%0 \n\t" \ 00677 "adcl %%edx,%1 \n\t" \ 00678 "adcl $0,%2 \n\t" \ 00679 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 00680 00681 #define SQRADDSC(i, j) \ 00682 __asm__( \ 00683 "movd %3,%%mm0 \n\t" \ 00684 "movd %4,%%mm1 \n\t" \ 00685 "pmuludq %%mm1,%%mm0\n\t" \ 00686 "movd %%mm0,%0 \n\t" \ 00687 "psrlq $32,%%mm0 \n\t" \ 00688 "movd %%mm0,%1 \n\t" \ 00689 "xorl %2,%2 \n\t" \ 00690 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j)); 00691 00692 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00693 00694 #define SQRADDAC(i, j) \ 00695 __asm__( \ 00696 "movd %6,%%mm0 \n\t" \ 00697 "movd %7,%%mm1 \n\t" \ 00698 "pmuludq %%mm1,%%mm0\n\t" \ 00699 "movd %%mm0,%%eax \n\t" \ 00700 "psrlq $32,%%mm0 \n\t" \ 00701 "movd %%mm0,%%edx \n\t" \ 00702 "addl %%eax,%0 \n\t" \ 00703 "adcl %%edx,%1 \n\t" \ 00704 "adcl $0,%2 \n\t" \ 00705 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc"); 00706 00707 #define SQRADDDB \ 00708 __asm__( \ 00709 "addl %6,%0 \n\t" \ 00710 "adcl %7,%1 \n\t" \ 00711 "adcl %8,%2 \n\t" \ 00712 "addl %6,%0 \n\t" \ 00713 "adcl %7,%1 \n\t" \ 00714 "adcl %8,%2 \n\t" \ 00715 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00716 00717 #elif defined(TFM_ARM) 00718 00719 /* ARM code */ 00720 00721 #define COMBA_START 00722 00723 #define CLEAR_CARRY \ 00724 c0 = c1 = c2 = 0; 00725 00726 #define COMBA_STORE(x) \ 00727 x = c0; 00728 00729 #define COMBA_STORE2(x) \ 00730 x = c1; 00731 00732 #define CARRY_FORWARD \ 00733 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00734 00735 #define COMBA_FINI 00736 00737 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00738 #define SQRADD(i, j) \ 00739 __asm__( \ 00740 " UMULL r0,r1,%6,%6 \n\t" \ 00741 " ADDS %0,%0,r0 \n\t" \ 00742 " ADCS %1,%1,r1 \n\t" \ 00743 " ADC %2,%2,#0 \n\t" \ 00744 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc"); 00745 00746 /* for squaring some of the terms are doubled... */ 00747 #define SQRADD2(i, j) \ 00748 __asm__( \ 00749 " UMULL r0,r1,%6,%7 \n\t" \ 00750 " ADDS %0,%0,r0 \n\t" \ 00751 " ADCS %1,%1,r1 \n\t" \ 00752 " ADC %2,%2,#0 \n\t" \ 00753 " ADDS %0,%0,r0 \n\t" \ 00754 " ADCS %1,%1,r1 \n\t" \ 00755 " ADC %2,%2,#0 \n\t" \ 00756 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); 00757 00758 #define SQRADDSC(i, j) \ 00759 __asm__( \ 00760 " UMULL %0,%1,%3,%4 \n\t" \ 00761 " SUB %2,%2,%2 \n\t" \ 00762 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc"); 00763 00764 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00765 00766 #define SQRADDAC(i, j) \ 00767 __asm__( \ 00768 " UMULL r0,r1,%6,%7 \n\t" \ 00769 " ADDS %0,%0,r0 \n\t" \ 00770 " ADCS %1,%1,r1 \n\t" \ 00771 " ADC %2,%2,#0 \n\t" \ 00772 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc"); 00773 00774 #define SQRADDDB \ 00775 __asm__( \ 00776 " ADDS %0,%0,%3 \n\t" \ 00777 " ADCS %1,%1,%4 \n\t" \ 00778 " ADC %2,%2,%5 \n\t" \ 00779 " ADDS %0,%0,%3 \n\t" \ 00780 " ADCS %1,%1,%4 \n\t" \ 00781 " ADC %2,%2,%5 \n\t" \ 00782 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 00783 00784 #elif defined(TFM_PPC32) 00785 00786 /* PPC32 */ 00787 00788 #define COMBA_START 00789 00790 #define CLEAR_CARRY \ 00791 c0 = c1 = c2 = 0; 00792 00793 #define COMBA_STORE(x) \ 00794 x = c0; 00795 00796 #define COMBA_STORE2(x) \ 00797 x = c1; 00798 00799 #define CARRY_FORWARD \ 00800 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00801 00802 #define COMBA_FINI 00803 00804 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00805 #define SQRADD(i, j) \ 00806 __asm__( \ 00807 " mullw 16,%6,%6 \n\t" \ 00808 " addc %0,%0,16 \n\t" \ 00809 " mulhwu 16,%6,%6 \n\t" \ 00810 " adde %1,%1,16 \n\t" \ 00811 " addze %2,%2 \n\t" \ 00812 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); 00813 00814 /* for squaring some of the terms are doubled... */ 00815 #define SQRADD2(i, j) \ 00816 __asm__( \ 00817 " mullw 16,%6,%7 \n\t" \ 00818 " mulhwu 17,%6,%7 \n\t" \ 00819 " addc %0,%0,16 \n\t" \ 00820 " adde %1,%1,17 \n\t" \ 00821 " addze %2,%2 \n\t" \ 00822 " addc %0,%0,16 \n\t" \ 00823 " adde %1,%1,17 \n\t" \ 00824 " addze %2,%2 \n\t" \ 00825 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); 00826 00827 #define SQRADDSC(i, j) \ 00828 __asm__( \ 00829 " mullw %0,%6,%7 \n\t" \ 00830 " mulhwu %1,%6,%7 \n\t" \ 00831 " xor %2,%2,%2 \n\t" \ 00832 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 00833 00834 #define SQRADDAC(i, j) \ 00835 __asm__( \ 00836 " mullw 16,%6,%7 \n\t" \ 00837 " addc %0,%0,16 \n\t" \ 00838 " mulhwu 16,%6,%7 \n\t" \ 00839 " adde %1,%1,16 \n\t" \ 00840 " addze %2,%2 \n\t" \ 00841 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); 00842 00843 #define SQRADDDB \ 00844 __asm__( \ 00845 " addc %0,%0,%3 \n\t" \ 00846 " adde %1,%1,%4 \n\t" \ 00847 " adde %2,%2,%5 \n\t" \ 00848 " addc %0,%0,%3 \n\t" \ 00849 " adde %1,%1,%4 \n\t" \ 00850 " adde %2,%2,%5 \n\t" \ 00851 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 00852 00853 #elif defined(TFM_PPC64) 00854 /* PPC64 */ 00855 00856 #define COMBA_START 00857 00858 #define CLEAR_CARRY \ 00859 c0 = c1 = c2 = 0; 00860 00861 #define COMBA_STORE(x) \ 00862 x = c0; 00863 00864 #define COMBA_STORE2(x) \ 00865 x = c1; 00866 00867 #define CARRY_FORWARD \ 00868 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00869 00870 #define COMBA_FINI 00871 00872 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00873 #define SQRADD(i, j) \ 00874 __asm__( \ 00875 " mulld 16,%6,%6 \n\t" \ 00876 " addc %0,%0,16 \n\t" \ 00877 " mulhdu 16,%6,%6 \n\t" \ 00878 " adde %1,%1,16 \n\t" \ 00879 " addze %2,%2 \n\t" \ 00880 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); 00881 00882 /* for squaring some of the terms are doubled... */ 00883 #define SQRADD2(i, j) \ 00884 __asm__( \ 00885 " mulld 16,%6,%7 \n\t" \ 00886 " mulhdu 17,%6,%7 \n\t" \ 00887 " addc %0,%0,16 \n\t" \ 00888 " adde %1,%1,17 \n\t" \ 00889 " addze %2,%2 \n\t" \ 00890 " addc %0,%0,16 \n\t" \ 00891 " adde %1,%1,17 \n\t" \ 00892 " addze %2,%2 \n\t" \ 00893 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); 00894 00895 #define SQRADDSC(i, j) \ 00896 __asm__( \ 00897 " mulld %0,%6,%7 \n\t" \ 00898 " mulhdu %1,%6,%7 \n\t" \ 00899 " xor %2,%2,%2 \n\t" \ 00900 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 00901 00902 #define SQRADDAC(i, j) \ 00903 __asm__( \ 00904 " mulld 16,%6,%7 \n\t" \ 00905 " addc %0,%0,16 \n\t" \ 00906 " mulhdu 16,%6,%7 \n\t" \ 00907 " adde %1,%1,16 \n\t" \ 00908 " addze %2,%2 \n\t" \ 00909 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); 00910 00911 #define SQRADDDB \ 00912 __asm__( \ 00913 " addc %0,%0,%3 \n\t" \ 00914 " adde %1,%1,%4 \n\t" \ 00915 " adde %2,%2,%5 \n\t" \ 00916 " addc %0,%0,%3 \n\t" \ 00917 " adde %1,%1,%4 \n\t" \ 00918 " adde %2,%2,%5 \n\t" \ 00919 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 00920 00921 00922 #elif defined(TFM_AVR32) 00923 00924 /* AVR32 */ 00925 00926 #define COMBA_START 00927 00928 #define CLEAR_CARRY \ 00929 c0 = c1 = c2 = 0; 00930 00931 #define COMBA_STORE(x) \ 00932 x = c0; 00933 00934 #define COMBA_STORE2(x) \ 00935 x = c1; 00936 00937 #define CARRY_FORWARD \ 00938 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00939 00940 #define COMBA_FINI 00941 00942 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00943 #define SQRADD(i, j) \ 00944 __asm__( \ 00945 " mulu.d r2,%6,%6 \n\t" \ 00946 " add %0,%0,r2 \n\t" \ 00947 " adc %1,%1,r3 \n\t" \ 00948 " acr %2 \n\t" \ 00949 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); 00950 00951 /* for squaring some of the terms are doubled... */ 00952 #define SQRADD2(i, j) \ 00953 __asm__( \ 00954 " mulu.d r2,%6,%7 \n\t" \ 00955 " add %0,%0,r2 \n\t" \ 00956 " adc %1,%1,r3 \n\t" \ 00957 " acr %2, \n\t" \ 00958 " add %0,%0,r2 \n\t" \ 00959 " adc %1,%1,r3 \n\t" \ 00960 " acr %2, \n\t" \ 00961 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); 00962 00963 #define SQRADDSC(i, j) \ 00964 __asm__( \ 00965 " mulu.d r2,%6,%7 \n\t" \ 00966 " mov %0,r2 \n\t" \ 00967 " mov %1,r3 \n\t" \ 00968 " eor %2,%2 \n\t" \ 00969 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); 00970 00971 #define SQRADDAC(i, j) \ 00972 __asm__( \ 00973 " mulu.d r2,%6,%7 \n\t" \ 00974 " add %0,%0,r2 \n\t" \ 00975 " adc %1,%1,r3 \n\t" \ 00976 " acr %2 \n\t" \ 00977 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); 00978 00979 #define SQRADDDB \ 00980 __asm__( \ 00981 " add %0,%0,%3 \n\t" \ 00982 " adc %1,%1,%4 \n\t" \ 00983 " adc %2,%2,%5 \n\t" \ 00984 " add %0,%0,%3 \n\t" \ 00985 " adc %1,%1,%4 \n\t" \ 00986 " adc %2,%2,%5 \n\t" \ 00987 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 00988 00989 00990 #else 00991 00992 #define TFM_ISO 00993 00994 /* ISO C portable code */ 00995 00996 #define COMBA_START 00997 00998 #define CLEAR_CARRY \ 00999 c0 = c1 = c2 = 0; 01000 01001 #define COMBA_STORE(x) \ 01002 x = c0; 01003 01004 #define COMBA_STORE2(x) \ 01005 x = c1; 01006 01007 #define CARRY_FORWARD \ 01008 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01009 01010 #define COMBA_FINI 01011 01012 /* multiplies point i and j, updates carry "c1" and digit c2 */ 01013 #define SQRADD(i, j) \ 01014 do { fp_word t; \ 01015 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \ 01016 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \ 01017 c2 +=(fp_digit) (t >> DIGIT_BIT); \ 01018 } while (0); 01019 01020 01021 /* for squaring some of the terms are doubled... */ 01022 #define SQRADD2(i, j) \ 01023 do { fp_word t; \ 01024 t = ((fp_word)i) * ((fp_word)j); \ 01025 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ 01026 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ 01027 c2 +=(fp_digit)( tt >> DIGIT_BIT); \ 01028 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ 01029 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ 01030 c2 +=(fp_digit) (tt >> DIGIT_BIT); \ 01031 } while (0); 01032 01033 #define SQRADDSC(i, j) \ 01034 do { fp_word t; \ 01035 t = ((fp_word)i) * ((fp_word)j); \ 01036 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ 01037 } while (0); 01038 01039 #define SQRADDAC(i, j) \ 01040 do { fp_word t; \ 01041 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \ 01042 t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \ 01043 sc2 += (fp_digit)(t >> DIGIT_BIT); \ 01044 } while (0); 01045 01046 #define SQRADDDB \ 01047 do { fp_word t; \ 01048 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \ 01049 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \ 01050 c1 = (fp_digit)t; \ 01051 c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \ 01052 } while (0); 01053 01054 #endif 01055 01056 #ifdef TFM_SMALL_SET 01057 #include "fp_sqr_comba_small_set.i" 01058 #endif 01059 01060 #if defined(TFM_SQR3) 01061 #include "fp_sqr_comba_3.i" 01062 #endif 01063 #if defined(TFM_SQR4) 01064 #include "fp_sqr_comba_4.i" 01065 #endif 01066 #if defined(TFM_SQR6) 01067 #include "fp_sqr_comba_6.i" 01068 #endif 01069 #if defined(TFM_SQR7) 01070 #include "fp_sqr_comba_7.i" 01071 #endif 01072 #if defined(TFM_SQR8) 01073 #include "fp_sqr_comba_8.i" 01074 #endif 01075 #if defined(TFM_SQR9) 01076 #include "fp_sqr_comba_9.i" 01077 #endif 01078 #if defined(TFM_SQR12) 01079 #include "fp_sqr_comba_12.i" 01080 #endif 01081 #if defined(TFM_SQR17) 01082 #include "fp_sqr_comba_17.i" 01083 #endif 01084 #if defined(TFM_SQR20) 01085 #include "fp_sqr_comba_20.i" 01086 #endif 01087 #if defined(TFM_SQR24) 01088 #include "fp_sqr_comba_24.i" 01089 #endif 01090 #if defined(TFM_SQR28) 01091 #include "fp_sqr_comba_28.i" 01092 #endif 01093 #if defined(TFM_SQR32) 01094 #include "fp_sqr_comba_32.i" 01095 #endif 01096 #if defined(TFM_SQR48) 01097 #include "fp_sqr_comba_48.i" 01098 #endif 01099 #if defined(TFM_SQR64) 01100 #include "fp_sqr_comba_64.i" 01101 #endif 01102 /* end fp_sqr_comba.c asm */ 01103 01104 /* start fp_mul_comba.c asm */ 01105 /* these are the combas. Worship them. */ 01106 #if defined(TFM_X86) 01107 /* Generic x86 optimized code */ 01108 01109 /* anything you need at the start */ 01110 #define COMBA_START 01111 01112 /* clear the chaining variables */ 01113 #define COMBA_CLEAR \ 01114 c0 = c1 = c2 = 0; 01115 01116 /* forward the carry to the next digit */ 01117 #define COMBA_FORWARD \ 01118 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01119 01120 /* store the first sum */ 01121 #define COMBA_STORE(x) \ 01122 x = c0; 01123 01124 /* store the second sum [carry] */ 01125 #define COMBA_STORE2(x) \ 01126 x = c1; 01127 01128 /* anything you need at the end */ 01129 #define COMBA_FINI 01130 01131 /* this should multiply i and j */ 01132 #define MULADD(i, j) \ 01133 __asm__( \ 01134 "movl %6,%%eax \n\t" \ 01135 "mull %7 \n\t" \ 01136 "addl %%eax,%0 \n\t" \ 01137 "adcl %%edx,%1 \n\t" \ 01138 "adcl $0,%2 \n\t" \ 01139 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 01140 01141 #elif defined(TFM_X86_64) 01142 /* x86-64 optimized */ 01143 01144 /* anything you need at the start */ 01145 #define COMBA_START 01146 01147 /* clear the chaining variables */ 01148 #define COMBA_CLEAR \ 01149 c0 = c1 = c2 = 0; 01150 01151 /* forward the carry to the next digit */ 01152 #define COMBA_FORWARD \ 01153 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01154 01155 /* store the first sum */ 01156 #define COMBA_STORE(x) \ 01157 x = c0; 01158 01159 /* store the second sum [carry] */ 01160 #define COMBA_STORE2(x) \ 01161 x = c1; 01162 01163 /* anything you need at the end */ 01164 #define COMBA_FINI 01165 01166 /* this should multiply i and j */ 01167 #define MULADD(i, j) \ 01168 __asm__ ( \ 01169 "movq %6,%%rax \n\t" \ 01170 "mulq %7 \n\t" \ 01171 "addq %%rax,%0 \n\t" \ 01172 "adcq %%rdx,%1 \n\t" \ 01173 "adcq $0,%2 \n\t" \ 01174 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 01175 01176 #elif defined(TFM_SSE2) 01177 /* use SSE2 optimizations */ 01178 01179 /* anything you need at the start */ 01180 #define COMBA_START 01181 01182 /* clear the chaining variables */ 01183 #define COMBA_CLEAR \ 01184 c0 = c1 = c2 = 0; 01185 01186 /* forward the carry to the next digit */ 01187 #define COMBA_FORWARD \ 01188 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01189 01190 /* store the first sum */ 01191 #define COMBA_STORE(x) \ 01192 x = c0; 01193 01194 /* store the second sum [carry] */ 01195 #define COMBA_STORE2(x) \ 01196 x = c1; 01197 01198 /* anything you need at the end */ 01199 #define COMBA_FINI \ 01200 __asm__("emms"); 01201 01202 /* this should multiply i and j */ 01203 #define MULADD(i, j) \ 01204 __asm__( \ 01205 "movd %6,%%mm0 \n\t" \ 01206 "movd %7,%%mm1 \n\t" \ 01207 "pmuludq %%mm1,%%mm0\n\t" \ 01208 "movd %%mm0,%%eax \n\t" \ 01209 "psrlq $32,%%mm0 \n\t" \ 01210 "addl %%eax,%0 \n\t" \ 01211 "movd %%mm0,%%eax \n\t" \ 01212 "adcl %%eax,%1 \n\t" \ 01213 "adcl $0,%2 \n\t" \ 01214 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc"); 01215 01216 #elif defined(TFM_ARM) 01217 /* ARM code */ 01218 01219 #define COMBA_START 01220 01221 #define COMBA_CLEAR \ 01222 c0 = c1 = c2 = 0; 01223 01224 #define COMBA_FORWARD \ 01225 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01226 01227 #define COMBA_STORE(x) \ 01228 x = c0; 01229 01230 #define COMBA_STORE2(x) \ 01231 x = c1; 01232 01233 #define COMBA_FINI 01234 01235 #define MULADD(i, j) \ 01236 __asm__( \ 01237 " UMULL r0,r1,%6,%7 \n\t" \ 01238 " ADDS %0,%0,r0 \n\t" \ 01239 " ADCS %1,%1,r1 \n\t" \ 01240 " ADC %2,%2,#0 \n\t" \ 01241 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); 01242 01243 #elif defined(TFM_PPC32) 01244 /* For 32-bit PPC */ 01245 01246 #define COMBA_START 01247 01248 #define COMBA_CLEAR \ 01249 c0 = c1 = c2 = 0; 01250 01251 #define COMBA_FORWARD \ 01252 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01253 01254 #define COMBA_STORE(x) \ 01255 x = c0; 01256 01257 #define COMBA_STORE2(x) \ 01258 x = c1; 01259 01260 #define COMBA_FINI 01261 01262 /* untested: will mulhwu change the flags? Docs say no */ 01263 #define MULADD(i, j) \ 01264 __asm__( \ 01265 " mullw 16,%6,%7 \n\t" \ 01266 " addc %0,%0,16 \n\t" \ 01267 " mulhwu 16,%6,%7 \n\t" \ 01268 " adde %1,%1,16 \n\t" \ 01269 " addze %2,%2 \n\t" \ 01270 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 01271 01272 #elif defined(TFM_PPC64) 01273 /* For 64-bit PPC */ 01274 01275 #define COMBA_START 01276 01277 #define COMBA_CLEAR \ 01278 c0 = c1 = c2 = 0; 01279 01280 #define COMBA_FORWARD \ 01281 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01282 01283 #define COMBA_STORE(x) \ 01284 x = c0; 01285 01286 #define COMBA_STORE2(x) \ 01287 x = c1; 01288 01289 #define COMBA_FINI 01290 01291 /* untested: will mulhwu change the flags? Docs say no */ 01292 #define MULADD(i, j) \ 01293 ____asm__( \ 01294 " mulld 16,%6,%7 \n\t" \ 01295 " addc %0,%0,16 \n\t" \ 01296 " mulhdu 16,%6,%7 \n\t" \ 01297 " adde %1,%1,16 \n\t" \ 01298 " addze %2,%2 \n\t" \ 01299 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 01300 01301 #elif defined(TFM_AVR32) 01302 01303 /* ISO C code */ 01304 01305 #define COMBA_START 01306 01307 #define COMBA_CLEAR \ 01308 c0 = c1 = c2 = 0; 01309 01310 #define COMBA_FORWARD \ 01311 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01312 01313 #define COMBA_STORE(x) \ 01314 x = c0; 01315 01316 #define COMBA_STORE2(x) \ 01317 x = c1; 01318 01319 #define COMBA_FINI 01320 01321 #define MULADD(i, j) \ 01322 ____asm__( \ 01323 " mulu.d r2,%6,%7 \n\t"\ 01324 " add %0,r2 \n\t"\ 01325 " adc %1,%1,r3 \n\t"\ 01326 " acr %2 \n\t"\ 01327 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); 01328 01329 #else 01330 /* ISO C code */ 01331 01332 #define COMBA_START 01333 01334 #define COMBA_CLEAR \ 01335 c0 = c1 = c2 = 0; 01336 01337 #define COMBA_FORWARD \ 01338 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01339 01340 #define COMBA_STORE(x) \ 01341 x = c0; 01342 01343 #define COMBA_STORE2(x) \ 01344 x = c1; 01345 01346 #define COMBA_FINI 01347 01348 #define MULADD(i, j) \ 01349 do { fp_word t; \ 01350 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \ 01351 t = (fp_word)c1 + (t >> DIGIT_BIT); \ 01352 c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \ 01353 } while (0); 01354 01355 #endif 01356 01357 01358 #ifdef TFM_SMALL_SET 01359 #include "fp_mul_comba_small_set.i" 01360 #endif 01361 01362 #if defined(TFM_MUL3) 01363 #include "fp_mul_comba_3.i" 01364 #endif 01365 #if defined(TFM_MUL4) 01366 #include "fp_mul_comba_4.i" 01367 #endif 01368 #if defined(TFM_MUL6) 01369 #include "fp_mul_comba_6.i" 01370 #endif 01371 #if defined(TFM_MUL7) 01372 #include "fp_mul_comba_7.i" 01373 #endif 01374 #if defined(TFM_MUL8) 01375 #include "fp_mul_comba_8.i" 01376 #endif 01377 #if defined(TFM_MUL9) 01378 #include "fp_mul_comba_9.i" 01379 #endif 01380 #if defined(TFM_MUL12) 01381 #include "fp_mul_comba_12.i" 01382 #endif 01383 #if defined(TFM_MUL17) 01384 #include "fp_mul_comba_17.i" 01385 #endif 01386 #if defined(TFM_MUL20) 01387 #include "fp_mul_comba_20.i" 01388 #endif 01389 #if defined(TFM_MUL24) 01390 #include "fp_mul_comba_24.i" 01391 #endif 01392 #if defined(TFM_MUL28) 01393 #include "fp_mul_comba_28.i" 01394 #endif 01395 #if defined(TFM_MUL32) 01396 #include "fp_mul_comba_32.i" 01397 #endif 01398 #if defined(TFM_MUL48) 01399 #include "fp_mul_comba_48.i" 01400 #endif 01401 #if defined(TFM_MUL64) 01402 #include "fp_mul_comba_64.i" 01403 #endif 01404 01405 /* end fp_mul_comba.c asm */ 01406 01407
Generated on Wed Jul 13 2022 02:18:38 by
 1.7.2
 1.7.2 
    