Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
asm.c
00001 /* asm.c 00002 * 00003 * Copyright (C) 2006-2016 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 #ifdef HAVE_CONFIG_H 00024 #include <config.h> 00025 #endif 00026 00027 #include <wolfssl/wolfcrypt/settings.h> 00028 00029 /* 00030 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca, 00031 * http://math.libtomcrypt.com 00032 */ 00033 00034 00035 /******************************************************************/ 00036 /* fp_montgomery_reduce.c asm or generic */ 00037 00038 00039 /* Each platform needs to query info type 1 from cpuid to see if aesni is 00040 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts 00041 */ 00042 00043 #if defined(HAVE_INTEL_MULX) 00044 #ifndef _MSC_VER 00045 #define cpuid(reg, leaf, sub)\ 00046 __asm__ __volatile__ ("cpuid":\ 00047 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\ 00048 "a" (leaf), "c"(sub)); 00049 00050 #define XASM_LINK(f) asm(f) 00051 #else 00052 00053 #include <intrin.h> 00054 #define cpuid(a,b) __cpuid((int*)a,b) 00055 00056 #define XASM_LINK(f) 00057 00058 #endif /* _MSC_VER */ 00059 00060 #define EAX 0 00061 #define EBX 1 00062 #define ECX 2 00063 #define EDX 3 00064 00065 #define CPUID_AVX1 0x1 00066 #define CPUID_AVX2 0x2 00067 #define CPUID_RDRAND 0x4 00068 #define CPUID_RDSEED 0x8 00069 #define CPUID_BMI2 0x10 /* MULX, RORX */ 00070 #define CPUID_ADX 0x20 /* ADCX, ADOX */ 00071 00072 #define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1) 00073 #define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2) 00074 #define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2) 00075 #define IS_INTEL_ADX (cpuid_flags&CPUID_ADX) 00076 #define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND) 00077 #define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED) 00078 #define SET_FLAGS 00079 00080 static word32 cpuid_check = 0 ; 00081 static word32 cpuid_flags = 0 ; 00082 00083 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { 00084 int got_intel_cpu=0; 00085 unsigned int reg[5]; 00086 00087 reg[4] = '\0' ; 00088 cpuid(reg, 0, 0); 00089 if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 && 00090 memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 && 00091 memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) { 00092 got_intel_cpu = 1; 00093 } 00094 if (got_intel_cpu) { 00095 cpuid(reg, leaf, sub); 00096 return((reg[num]>>bit)&0x1) ; 00097 } 00098 return 0 ; 00099 } 00100 00101 INLINE static int set_cpuid_flags(void) { 00102 if(cpuid_check == 0) { 00103 if(cpuid_flag(7, 0, EBX, 8)){ cpuid_flags |= CPUID_BMI2 ; } 00104 if(cpuid_flag(7, 0, EBX,19)){ cpuid_flags |= CPUID_ADX ; } 00105 cpuid_check = 1 ; 00106 return 0 ; 00107 } 00108 return 1 ; 00109 } 00110 00111 #define RETURN return 00112 #define IF_HAVE_INTEL_MULX(func, ret) \ 00113 if(cpuid_check==0)set_cpuid_flags() ; \ 00114 if(IS_INTEL_BMI2 && IS_INTEL_ADX){ func; ret ; } 00115 00116 #else 00117 #define IF_HAVE_INTEL_MULX(func, ret) 00118 #endif 00119 00120 #if defined(TFM_X86) && !defined(TFM_SSE2) 00121 /* x86-32 code */ 00122 00123 #define MONT_START 00124 #define MONT_FINI 00125 #define LOOP_END 00126 #define LOOP_START \ 00127 mu = c[x] * mp 00128 00129 #define INNERMUL \ 00130 __asm__( \ 00131 "movl %5,%%eax \n\t" \ 00132 "mull %4 \n\t" \ 00133 "addl %1,%%eax \n\t" \ 00134 "adcl $0,%%edx \n\t" \ 00135 "addl %%eax,%0 \n\t" \ 00136 "adcl $0,%%edx \n\t" \ 00137 "movl %%edx,%1 \n\t" \ 00138 :"=g"(_c[LO]), "=r"(cy) \ 00139 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ 00140 : "%eax", "%edx", "cc") 00141 00142 #define PROPCARRY \ 00143 __asm__( \ 00144 "addl %1,%0 \n\t" \ 00145 "setb %%al \n\t" \ 00146 "movzbl %%al,%1 \n\t" \ 00147 :"=g"(_c[LO]), "=r"(cy) \ 00148 :"0"(_c[LO]), "1"(cy) \ 00149 : "%eax", "cc") 00150 00151 /******************************************************************/ 00152 #elif defined(TFM_X86_64) 00153 /* x86-64 code */ 00154 00155 #define MONT_START 00156 #define MONT_FINI 00157 #define LOOP_END 00158 #define LOOP_START \ 00159 mu = c[x] * mp; 00160 00161 #define INNERMUL \ 00162 __asm__( \ 00163 "movq %5,%%rax \n\t" \ 00164 "mulq %4 \n\t" \ 00165 "addq %1,%%rax \n\t" \ 00166 "adcq $0,%%rdx \n\t" \ 00167 "addq %%rax,%0 \n\t" \ 00168 "adcq $0,%%rdx \n\t" \ 00169 "movq %%rdx,%1 \n\t" \ 00170 :"=g"(_c[LO]), "=r"(cy) \ 00171 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ 00172 : "%rax", "%rdx", "cc") 00173 00174 #if defined(HAVE_INTEL_MULX) 00175 #define MULX_INIT(a0, c0, cy)\ 00176 __asm__ volatile( \ 00177 "xorq %%r10, %%r10\n\t" \ 00178 "movq %1,%%rdx\n\t" \ 00179 "addq %2, %0\n\t" /* c0+=cy; Set CF, OF */ \ 00180 "adoxq %%r10, %%r10\n\t" /* Reset OF */ \ 00181 :"+m"(c0):"r"(a0),"r"(cy):"%r8","%r9", "%r10","%r11","%r12","%rdx") ; \ 00182 00183 #define MULX_INNERMUL_R1(c0, c1, pre, rdx)\ 00184 { \ 00185 __asm__ volatile ( \ 00186 "movq %3, %%rdx\n\t" \ 00187 "mulx %%r11,%%r9, %%r8 \n\t" \ 00188 "movq %2, %%r12\n\t" \ 00189 "adoxq %%r9,%0 \n\t" \ 00190 "adcxq %%r8,%1 \n\t" \ 00191 :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \ 00192 ); } 00193 00194 00195 #define MULX_INNERMUL_R2(c0, c1, pre, rdx)\ 00196 { \ 00197 __asm__ volatile ( \ 00198 "movq %3, %%rdx\n\t" \ 00199 "mulx %%r12,%%r9, %%r8 \n\t" \ 00200 "movq %2, %%r11\n\t" \ 00201 "adoxq %%r9,%0 \n\t" \ 00202 "adcxq %%r8,%1 \n\t" \ 00203 :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \ 00204 ); } 00205 00206 #define MULX_LOAD_R1(val)\ 00207 __asm__ volatile ( \ 00208 "movq %0, %%r11\n\t"\ 00209 ::"m"(val):"%r8","%r9", "%r10", "%r11","%r12","%rdx"\ 00210 ) ; 00211 00212 #define MULX_INNERMUL_LAST(c0, c1, rdx)\ 00213 { \ 00214 __asm__ volatile ( \ 00215 "movq %2, %%rdx\n\t" \ 00216 "mulx %%r12,%%r9, %%r8 \n\t" \ 00217 "movq $0, %%r10 \n\t" \ 00218 "adoxq %%r10, %%r9 \n\t" \ 00219 "adcq $0,%%r8 \n\t" \ 00220 "addq %%r9,%0 \n\t" \ 00221 "adcq $0,%%r8 \n\t" \ 00222 "movq %%r8,%1 \n\t" \ 00223 :"+m"(c0),"=m"(c1):"r"(rdx):"%r8","%r9","%r10", "%r11", "%r12","%rdx"\ 00224 ); } 00225 00226 #define MULX_INNERMUL8(x,y,z,cy)\ 00227 { word64 rdx = y ;\ 00228 MULX_LOAD_R1(x[0]) ;\ 00229 MULX_INIT(y, _c0, cy) ; /* rdx=y; z0+=cy; */ \ 00230 MULX_INNERMUL_R1(_c0, _c1, x[1], rdx) ;\ 00231 MULX_INNERMUL_R2(_c1, _c2, x[2], rdx) ;\ 00232 MULX_INNERMUL_R1(_c2, _c3, x[3], rdx) ;\ 00233 MULX_INNERMUL_R2(_c3, _c4, x[4], rdx) ;\ 00234 MULX_INNERMUL_R1(_c4, _c5, x[5], rdx) ;\ 00235 MULX_INNERMUL_R2(_c5, _c6, x[6], rdx) ;\ 00236 MULX_INNERMUL_R1(_c6, _c7, x[7], rdx) ;\ 00237 MULX_INNERMUL_LAST(_c7, cy, rdx) ;\ 00238 } 00239 #define INNERMUL8_MULX \ 00240 {\ 00241 MULX_INNERMUL8(tmpm, mu, _c, cy);\ 00242 } 00243 #endif 00244 00245 #define INNERMUL8 \ 00246 __asm__( \ 00247 "movq 0(%5),%%rax \n\t" \ 00248 "movq 0(%2),%%r10 \n\t" \ 00249 "movq 0x8(%5),%%r11 \n\t" \ 00250 "mulq %4 \n\t" \ 00251 "addq %%r10,%%rax \n\t" \ 00252 "adcq $0,%%rdx \n\t" \ 00253 "movq 0x8(%2),%%r10 \n\t" \ 00254 "addq %3,%%rax \n\t" \ 00255 "adcq $0,%%rdx \n\t" \ 00256 "movq %%rax,0(%0) \n\t" \ 00257 "movq %%rdx,%1 \n\t" \ 00258 \ 00259 "movq %%r11,%%rax \n\t" \ 00260 "movq 0x10(%5),%%r11 \n\t" \ 00261 "mulq %4 \n\t" \ 00262 "addq %%r10,%%rax \n\t" \ 00263 "adcq $0,%%rdx \n\t" \ 00264 "movq 0x10(%2),%%r10 \n\t" \ 00265 "addq %3,%%rax \n\t" \ 00266 "adcq $0,%%rdx \n\t" \ 00267 "movq %%rax,0x8(%0) \n\t" \ 00268 "movq %%rdx,%1 \n\t" \ 00269 \ 00270 "movq %%r11,%%rax \n\t" \ 00271 "movq 0x18(%5),%%r11 \n\t" \ 00272 "mulq %4 \n\t" \ 00273 "addq %%r10,%%rax \n\t" \ 00274 "adcq $0,%%rdx \n\t" \ 00275 "movq 0x18(%2),%%r10 \n\t" \ 00276 "addq %3,%%rax \n\t" \ 00277 "adcq $0,%%rdx \n\t" \ 00278 "movq %%rax,0x10(%0) \n\t" \ 00279 "movq %%rdx,%1 \n\t" \ 00280 \ 00281 "movq %%r11,%%rax \n\t" \ 00282 "movq 0x20(%5),%%r11 \n\t" \ 00283 "mulq %4 \n\t" \ 00284 "addq %%r10,%%rax \n\t" \ 00285 "adcq $0,%%rdx \n\t" \ 00286 "movq 0x20(%2),%%r10 \n\t" \ 00287 "addq %3,%%rax \n\t" \ 00288 "adcq $0,%%rdx \n\t" \ 00289 "movq %%rax,0x18(%0) \n\t" \ 00290 "movq %%rdx,%1 \n\t" \ 00291 \ 00292 "movq %%r11,%%rax \n\t" \ 00293 "movq 0x28(%5),%%r11 \n\t" \ 00294 "mulq %4 \n\t" \ 00295 "addq %%r10,%%rax \n\t" \ 00296 "adcq $0,%%rdx \n\t" \ 00297 "movq 0x28(%2),%%r10 \n\t" \ 00298 "addq %3,%%rax \n\t" \ 00299 "adcq $0,%%rdx \n\t" \ 00300 "movq %%rax,0x20(%0) \n\t" \ 00301 "movq %%rdx,%1 \n\t" \ 00302 \ 00303 "movq %%r11,%%rax \n\t" \ 00304 "movq 0x30(%5),%%r11 \n\t" \ 00305 "mulq %4 \n\t" \ 00306 "addq %%r10,%%rax \n\t" \ 00307 "adcq $0,%%rdx \n\t" \ 00308 "movq 0x30(%2),%%r10 \n\t" \ 00309 "addq %3,%%rax \n\t" \ 00310 "adcq $0,%%rdx \n\t" \ 00311 "movq %%rax,0x28(%0) \n\t" \ 00312 "movq %%rdx,%1 \n\t" \ 00313 \ 00314 "movq %%r11,%%rax \n\t" \ 00315 "movq 0x38(%5),%%r11 \n\t" \ 00316 "mulq %4 \n\t" \ 00317 "addq %%r10,%%rax \n\t" \ 00318 "adcq $0,%%rdx \n\t" \ 00319 "movq 0x38(%2),%%r10 \n\t" \ 00320 "addq %3,%%rax \n\t" \ 00321 "adcq $0,%%rdx \n\t" \ 00322 "movq %%rax,0x30(%0) \n\t" \ 00323 "movq %%rdx,%1 \n\t" \ 00324 \ 00325 "movq %%r11,%%rax \n\t" \ 00326 "mulq %4 \n\t" \ 00327 "addq %%r10,%%rax \n\t" \ 00328 "adcq $0,%%rdx \n\t" \ 00329 "addq %3,%%rax \n\t" \ 00330 "adcq $0,%%rdx \n\t" \ 00331 "movq %%rax,0x38(%0) \n\t" \ 00332 "movq %%rdx,%1 \n\t" \ 00333 \ 00334 :"=r"(_c), "=r"(cy) \ 00335 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ 00336 : "%rax", "%rdx", "%r10", "%r11", "cc")\ 00337 00338 #define PROPCARRY \ 00339 __asm__( \ 00340 "addq %1,%0 \n\t" \ 00341 "setb %%al \n\t" \ 00342 "movzbq %%al,%1 \n\t" \ 00343 :"=g"(_c[LO]), "=r"(cy) \ 00344 :"0"(_c[LO]), "1"(cy) \ 00345 : "%rax", "cc") 00346 00347 /******************************************************************/ 00348 #elif defined(TFM_SSE2) 00349 /* SSE2 code (assumes 32-bit fp_digits) */ 00350 /* XMM register assignments: 00351 * xmm0 *tmpm++, then Mu * (*tmpm++) 00352 * xmm1 c[x], then Mu 00353 * xmm2 mp 00354 * xmm3 cy 00355 * xmm4 _c[LO] 00356 */ 00357 00358 #define MONT_START \ 00359 __asm__("movd %0,%%mm2"::"g"(mp)) 00360 00361 #define MONT_FINI \ 00362 __asm__("emms") 00363 00364 #define LOOP_START \ 00365 __asm__( \ 00366 "movd %0,%%mm1 \n\t" \ 00367 "pxor %%mm3,%%mm3 \n\t" \ 00368 "pmuludq %%mm2,%%mm1 \n\t" \ 00369 :: "g"(c[x])) 00370 00371 /* pmuludq on mmx registers does a 32x32->64 multiply. */ 00372 #define INNERMUL \ 00373 __asm__( \ 00374 "movd %1,%%mm4 \n\t" \ 00375 "movd %2,%%mm0 \n\t" \ 00376 "paddq %%mm4,%%mm3 \n\t" \ 00377 "pmuludq %%mm1,%%mm0 \n\t" \ 00378 "paddq %%mm0,%%mm3 \n\t" \ 00379 "movd %%mm3,%0 \n\t" \ 00380 "psrlq $32, %%mm3 \n\t" \ 00381 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) ); 00382 00383 #define INNERMUL8 \ 00384 __asm__( \ 00385 "movd 0(%1),%%mm4 \n\t" \ 00386 "movd 0(%2),%%mm0 \n\t" \ 00387 "paddq %%mm4,%%mm3 \n\t" \ 00388 "pmuludq %%mm1,%%mm0 \n\t" \ 00389 "movd 4(%2),%%mm5 \n\t" \ 00390 "paddq %%mm0,%%mm3 \n\t" \ 00391 "movd 4(%1),%%mm6 \n\t" \ 00392 "movd %%mm3,0(%0) \n\t" \ 00393 "psrlq $32, %%mm3 \n\t" \ 00394 \ 00395 "paddq %%mm6,%%mm3 \n\t" \ 00396 "pmuludq %%mm1,%%mm5 \n\t" \ 00397 "movd 8(%2),%%mm6 \n\t" \ 00398 "paddq %%mm5,%%mm3 \n\t" \ 00399 "movd 8(%1),%%mm7 \n\t" \ 00400 "movd %%mm3,4(%0) \n\t" \ 00401 "psrlq $32, %%mm3 \n\t" \ 00402 \ 00403 "paddq %%mm7,%%mm3 \n\t" \ 00404 "pmuludq %%mm1,%%mm6 \n\t" \ 00405 "movd 12(%2),%%mm7 \n\t" \ 00406 "paddq %%mm6,%%mm3 \n\t" \ 00407 "movd 12(%1),%%mm5 \n\t" \ 00408 "movd %%mm3,8(%0) \n\t" \ 00409 "psrlq $32, %%mm3 \n\t" \ 00410 \ 00411 "paddq %%mm5,%%mm3 \n\t" \ 00412 "pmuludq %%mm1,%%mm7 \n\t" \ 00413 "movd 16(%2),%%mm5 \n\t" \ 00414 "paddq %%mm7,%%mm3 \n\t" \ 00415 "movd 16(%1),%%mm6 \n\t" \ 00416 "movd %%mm3,12(%0) \n\t" \ 00417 "psrlq $32, %%mm3 \n\t" \ 00418 \ 00419 "paddq %%mm6,%%mm3 \n\t" \ 00420 "pmuludq %%mm1,%%mm5 \n\t" \ 00421 "movd 20(%2),%%mm6 \n\t" \ 00422 "paddq %%mm5,%%mm3 \n\t" \ 00423 "movd 20(%1),%%mm7 \n\t" \ 00424 "movd %%mm3,16(%0) \n\t" \ 00425 "psrlq $32, %%mm3 \n\t" \ 00426 \ 00427 "paddq %%mm7,%%mm3 \n\t" \ 00428 "pmuludq %%mm1,%%mm6 \n\t" \ 00429 "movd 24(%2),%%mm7 \n\t" \ 00430 "paddq %%mm6,%%mm3 \n\t" \ 00431 "movd 24(%1),%%mm5 \n\t" \ 00432 "movd %%mm3,20(%0) \n\t" \ 00433 "psrlq $32, %%mm3 \n\t" \ 00434 \ 00435 "paddq %%mm5,%%mm3 \n\t" \ 00436 "pmuludq %%mm1,%%mm7 \n\t" \ 00437 "movd 28(%2),%%mm5 \n\t" \ 00438 "paddq %%mm7,%%mm3 \n\t" \ 00439 "movd 28(%1),%%mm6 \n\t" \ 00440 "movd %%mm3,24(%0) \n\t" \ 00441 "psrlq $32, %%mm3 \n\t" \ 00442 \ 00443 "paddq %%mm6,%%mm3 \n\t" \ 00444 "pmuludq %%mm1,%%mm5 \n\t" \ 00445 "paddq %%mm5,%%mm3 \n\t" \ 00446 "movd %%mm3,28(%0) \n\t" \ 00447 "psrlq $32, %%mm3 \n\t" \ 00448 :"=r"(_c) : "0"(_c), "r"(tmpm) ); 00449 00450 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack 00451 pointer */ 00452 00453 #define LOOP_END \ 00454 __asm__( "movd %%mm3,%0 \n" :"=r"(cy)) 00455 00456 #define PROPCARRY \ 00457 __asm__( \ 00458 "addl %1,%0 \n\t" \ 00459 "setb %%al \n\t" \ 00460 "movzbl %%al,%1 \n\t" \ 00461 :"=g"(_c[LO]), "=r"(cy) \ 00462 :"0"(_c[LO]), "1"(cy) \ 00463 : "%eax", "cc") 00464 00465 /******************************************************************/ 00466 #elif defined(TFM_ARM) 00467 /* ARMv4 code */ 00468 00469 #define MONT_START 00470 #define MONT_FINI 00471 #define LOOP_END 00472 #define LOOP_START \ 00473 mu = c[x] * mp 00474 00475 00476 #ifdef __thumb__ 00477 00478 #define INNERMUL \ 00479 __asm__( \ 00480 " LDR r0,%1 \n\t" \ 00481 " ADDS r0,r0,%0 \n\t" \ 00482 " ITE CS \n\t" \ 00483 " MOVCS %0,#1 \n\t" \ 00484 " MOVCC %0,#0 \n\t" \ 00485 " UMLAL r0,%0,%3,%4 \n\t" \ 00486 " STR r0,%1 \n\t" \ 00487 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc"); 00488 00489 #define PROPCARRY \ 00490 __asm__( \ 00491 " LDR r0,%1 \n\t" \ 00492 " ADDS r0,r0,%0 \n\t" \ 00493 " STR r0,%1 \n\t" \ 00494 " ITE CS \n\t" \ 00495 " MOVCS %0,#1 \n\t" \ 00496 " MOVCC %0,#0 \n\t" \ 00497 :"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc"); 00498 00499 00500 /* TAO thumb mode uses ite (if then else) to detect carry directly 00501 * fixed unmatched constraint warning by changing 1 to m */ 00502 00503 #else /* __thumb__ */ 00504 00505 #define INNERMUL \ 00506 __asm__( \ 00507 " LDR r0,%1 \n\t" \ 00508 " ADDS r0,r0,%0 \n\t" \ 00509 " MOVCS %0,#1 \n\t" \ 00510 " MOVCC %0,#0 \n\t" \ 00511 " UMLAL r0,%0,%3,%4 \n\t" \ 00512 " STR r0,%1 \n\t" \ 00513 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc"); 00514 00515 #define PROPCARRY \ 00516 __asm__( \ 00517 " LDR r0,%1 \n\t" \ 00518 " ADDS r0,r0,%0 \n\t" \ 00519 " STR r0,%1 \n\t" \ 00520 " MOVCS %0,#1 \n\t" \ 00521 " MOVCC %0,#0 \n\t" \ 00522 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc"); 00523 00524 #endif /* __thumb__ */ 00525 00526 #elif defined(TFM_PPC32) 00527 00528 /* PPC32 */ 00529 #define MONT_START 00530 #define MONT_FINI 00531 #define LOOP_END 00532 #define LOOP_START \ 00533 mu = c[x] * mp 00534 00535 #define INNERMUL \ 00536 __asm__( \ 00537 " mullw 16,%3,%4 \n\t" \ 00538 " mulhwu 17,%3,%4 \n\t" \ 00539 " addc 16,16,%0 \n\t" \ 00540 " addze 17,17 \n\t" \ 00541 " lwz 18,%1 \n\t" \ 00542 " addc 16,16,18 \n\t" \ 00543 " addze %0,17 \n\t" \ 00544 " stw 16,%1 \n\t" \ 00545 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm; 00546 00547 #define PROPCARRY \ 00548 __asm__( \ 00549 " lwz 16,%1 \n\t" \ 00550 " addc 16,16,%0 \n\t" \ 00551 " stw 16,%1 \n\t" \ 00552 " xor %0,%0,%0 \n\t" \ 00553 " addze %0,%0 \n\t" \ 00554 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc"); 00555 00556 #elif defined(TFM_PPC64) 00557 00558 /* PPC64 */ 00559 #define MONT_START 00560 #define MONT_FINI 00561 #define LOOP_END 00562 #define LOOP_START \ 00563 mu = c[x] * mp 00564 00565 #define INNERMUL \ 00566 __asm__( \ 00567 " mulld 16,%3,%4 \n\t" \ 00568 " mulhdu 17,%3,%4 \n\t" \ 00569 " addc 16,16,%0 \n\t" \ 00570 " addze 17,17 \n\t" \ 00571 " ldx 18,0,%1 \n\t" \ 00572 " addc 16,16,18 \n\t" \ 00573 " addze %0,17 \n\t" \ 00574 " sdx 16,0,%1 \n\t" \ 00575 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm; 00576 00577 #define PROPCARRY \ 00578 __asm__( \ 00579 " ldx 16,0,%1 \n\t" \ 00580 " addc 16,16,%0 \n\t" \ 00581 " sdx 16,0,%1 \n\t" \ 00582 " xor %0,%0,%0 \n\t" \ 00583 " addze %0,%0 \n\t" \ 00584 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc"); 00585 00586 /******************************************************************/ 00587 00588 #elif defined(TFM_AVR32) 00589 00590 /* AVR32 */ 00591 #define MONT_START 00592 #define MONT_FINI 00593 #define LOOP_END 00594 #define LOOP_START \ 00595 mu = c[x] * mp 00596 00597 #define INNERMUL \ 00598 __asm__( \ 00599 " ld.w r2,%1 \n\t" \ 00600 " add r2,%0 \n\t" \ 00601 " eor r3,r3 \n\t" \ 00602 " acr r3 \n\t" \ 00603 " macu.d r2,%3,%4 \n\t" \ 00604 " st.w %1,r2 \n\t" \ 00605 " mov %0,r3 \n\t" \ 00606 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3"); 00607 00608 #define PROPCARRY \ 00609 __asm__( \ 00610 " ld.w r2,%1 \n\t" \ 00611 " add r2,%0 \n\t" \ 00612 " st.w %1,r2 \n\t" \ 00613 " eor %0,%0 \n\t" \ 00614 " acr %0 \n\t" \ 00615 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc"); 00616 00617 #else 00618 00619 /* ISO C code */ 00620 #define MONT_START 00621 #define MONT_FINI 00622 #define LOOP_END 00623 #define LOOP_START \ 00624 mu = c[x] * mp 00625 00626 #define INNERMUL \ 00627 do { fp_word t; \ 00628 t = ((fp_word)_c[0] + (fp_word)cy) + \ 00629 (((fp_word)mu) * ((fp_word)*tmpm++)); \ 00630 _c[0] = (fp_digit)t; \ 00631 cy = (fp_digit)(t >> DIGIT_BIT); \ 00632 } while (0) 00633 00634 #define PROPCARRY \ 00635 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0) 00636 00637 #endif 00638 /******************************************************************/ 00639 00640 00641 #define LO 0 00642 /* end fp_montogomery_reduce.c asm */ 00643 00644 00645 /* start fp_sqr_comba.c asm */ 00646 #if defined(TFM_X86) 00647 00648 /* x86-32 optimized */ 00649 00650 #define COMBA_START 00651 00652 #define CLEAR_CARRY \ 00653 c0 = c1 = c2 = 0; 00654 00655 #define COMBA_STORE(x) \ 00656 x = c0; 00657 00658 #define COMBA_STORE2(x) \ 00659 x = c1; 00660 00661 #define CARRY_FORWARD \ 00662 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00663 00664 #define COMBA_FINI 00665 00666 #define SQRADD(i, j) \ 00667 __asm__( \ 00668 "movl %6,%%eax \n\t" \ 00669 "mull %%eax \n\t" \ 00670 "addl %%eax,%0 \n\t" \ 00671 "adcl %%edx,%1 \n\t" \ 00672 "adcl $0,%2 \n\t" \ 00673 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc"); 00674 00675 #define SQRADD2(i, j) \ 00676 __asm__( \ 00677 "movl %6,%%eax \n\t" \ 00678 "mull %7 \n\t" \ 00679 "addl %%eax,%0 \n\t" \ 00680 "adcl %%edx,%1 \n\t" \ 00681 "adcl $0,%2 \n\t" \ 00682 "addl %%eax,%0 \n\t" \ 00683 "adcl %%edx,%1 \n\t" \ 00684 "adcl $0,%2 \n\t" \ 00685 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc"); 00686 00687 #define SQRADDSC(i, j) \ 00688 __asm__( \ 00689 "movl %3,%%eax \n\t" \ 00690 "mull %4 \n\t" \ 00691 "movl %%eax,%0 \n\t" \ 00692 "movl %%edx,%1 \n\t" \ 00693 "xorl %2,%2 \n\t" \ 00694 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc"); 00695 00696 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00697 00698 #define SQRADDAC(i, j) \ 00699 __asm__( \ 00700 "movl %6,%%eax \n\t" \ 00701 "mull %7 \n\t" \ 00702 "addl %%eax,%0 \n\t" \ 00703 "adcl %%edx,%1 \n\t" \ 00704 "adcl $0,%2 \n\t" \ 00705 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc"); 00706 00707 #define SQRADDDB \ 00708 __asm__( \ 00709 "addl %6,%0 \n\t" \ 00710 "adcl %7,%1 \n\t" \ 00711 "adcl %8,%2 \n\t" \ 00712 "addl %6,%0 \n\t" \ 00713 "adcl %7,%1 \n\t" \ 00714 "adcl %8,%2 \n\t" \ 00715 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00716 00717 #elif defined(TFM_X86_64) 00718 /* x86-64 optimized */ 00719 00720 #define COMBA_START 00721 00722 #define CLEAR_CARRY \ 00723 c0 = c1 = c2 = 0; 00724 00725 #define COMBA_STORE(x) \ 00726 x = c0; 00727 00728 #define COMBA_STORE2(x) \ 00729 x = c1; 00730 00731 #define CARRY_FORWARD \ 00732 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00733 00734 #define COMBA_FINI 00735 00736 #define SQRADD(i, j) \ 00737 __asm__( \ 00738 "movq %6,%%rax \n\t" \ 00739 "mulq %%rax \n\t" \ 00740 "addq %%rax,%0 \n\t" \ 00741 "adcq %%rdx,%1 \n\t" \ 00742 "adcq $0,%2 \n\t" \ 00743 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc"); 00744 00745 #define SQRADD2(i, j) \ 00746 __asm__( \ 00747 "movq %6,%%rax \n\t" \ 00748 "mulq %7 \n\t" \ 00749 "addq %%rax,%0 \n\t" \ 00750 "adcq %%rdx,%1 \n\t" \ 00751 "adcq $0,%2 \n\t" \ 00752 "addq %%rax,%0 \n\t" \ 00753 "adcq %%rdx,%1 \n\t" \ 00754 "adcq $0,%2 \n\t" \ 00755 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 00756 00757 #define SQRADDSC(i, j) \ 00758 __asm__( \ 00759 "movq %3,%%rax \n\t" \ 00760 "mulq %4 \n\t" \ 00761 "movq %%rax,%0 \n\t" \ 00762 "movq %%rdx,%1 \n\t" \ 00763 "xorq %2,%2 \n\t" \ 00764 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc"); 00765 00766 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00767 00768 #define SQRADDAC(i, j) \ 00769 __asm__( \ 00770 "movq %6,%%rax \n\t" \ 00771 "mulq %7 \n\t" \ 00772 "addq %%rax,%0 \n\t" \ 00773 "adcq %%rdx,%1 \n\t" \ 00774 "adcq $0,%2 \n\t" \ 00775 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 00776 00777 #define SQRADDDB \ 00778 __asm__( \ 00779 "addq %6,%0 \n\t" \ 00780 "adcq %7,%1 \n\t" \ 00781 "adcq %8,%2 \n\t" \ 00782 "addq %6,%0 \n\t" \ 00783 "adcq %7,%1 \n\t" \ 00784 "adcq %8,%2 \n\t" \ 00785 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00786 00787 #elif defined(TFM_SSE2) 00788 00789 /* SSE2 Optimized */ 00790 #define COMBA_START 00791 00792 #define CLEAR_CARRY \ 00793 c0 = c1 = c2 = 0; 00794 00795 #define COMBA_STORE(x) \ 00796 x = c0; 00797 00798 #define COMBA_STORE2(x) \ 00799 x = c1; 00800 00801 #define CARRY_FORWARD \ 00802 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00803 00804 #define COMBA_FINI \ 00805 __asm__("emms"); 00806 00807 #define SQRADD(i, j) \ 00808 __asm__( \ 00809 "movd %6,%%mm0 \n\t" \ 00810 "pmuludq %%mm0,%%mm0\n\t" \ 00811 "movd %%mm0,%%eax \n\t" \ 00812 "psrlq $32,%%mm0 \n\t" \ 00813 "addl %%eax,%0 \n\t" \ 00814 "movd %%mm0,%%eax \n\t" \ 00815 "adcl %%eax,%1 \n\t" \ 00816 "adcl $0,%2 \n\t" \ 00817 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc"); 00818 00819 #define SQRADD2(i, j) \ 00820 __asm__( \ 00821 "movd %6,%%mm0 \n\t" \ 00822 "movd %7,%%mm1 \n\t" \ 00823 "pmuludq %%mm1,%%mm0\n\t" \ 00824 "movd %%mm0,%%eax \n\t" \ 00825 "psrlq $32,%%mm0 \n\t" \ 00826 "movd %%mm0,%%edx \n\t" \ 00827 "addl %%eax,%0 \n\t" \ 00828 "adcl %%edx,%1 \n\t" \ 00829 "adcl $0,%2 \n\t" \ 00830 "addl %%eax,%0 \n\t" \ 00831 "adcl %%edx,%1 \n\t" \ 00832 "adcl $0,%2 \n\t" \ 00833 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 00834 00835 #define SQRADDSC(i, j) \ 00836 __asm__( \ 00837 "movd %3,%%mm0 \n\t" \ 00838 "movd %4,%%mm1 \n\t" \ 00839 "pmuludq %%mm1,%%mm0\n\t" \ 00840 "movd %%mm0,%0 \n\t" \ 00841 "psrlq $32,%%mm0 \n\t" \ 00842 "movd %%mm0,%1 \n\t" \ 00843 "xorl %2,%2 \n\t" \ 00844 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j)); 00845 00846 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00847 00848 #define SQRADDAC(i, j) \ 00849 __asm__( \ 00850 "movd %6,%%mm0 \n\t" \ 00851 "movd %7,%%mm1 \n\t" \ 00852 "pmuludq %%mm1,%%mm0\n\t" \ 00853 "movd %%mm0,%%eax \n\t" \ 00854 "psrlq $32,%%mm0 \n\t" \ 00855 "movd %%mm0,%%edx \n\t" \ 00856 "addl %%eax,%0 \n\t" \ 00857 "adcl %%edx,%1 \n\t" \ 00858 "adcl $0,%2 \n\t" \ 00859 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc"); 00860 00861 #define SQRADDDB \ 00862 __asm__( \ 00863 "addl %6,%0 \n\t" \ 00864 "adcl %7,%1 \n\t" \ 00865 "adcl %8,%2 \n\t" \ 00866 "addl %6,%0 \n\t" \ 00867 "adcl %7,%1 \n\t" \ 00868 "adcl %8,%2 \n\t" \ 00869 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00870 00871 #elif defined(TFM_ARM) 00872 00873 /* ARM code */ 00874 00875 #define COMBA_START 00876 00877 #define CLEAR_CARRY \ 00878 c0 = c1 = c2 = 0; 00879 00880 #define COMBA_STORE(x) \ 00881 x = c0; 00882 00883 #define COMBA_STORE2(x) \ 00884 x = c1; 00885 00886 #define CARRY_FORWARD \ 00887 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00888 00889 #define COMBA_FINI 00890 00891 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00892 #define SQRADD(i, j) \ 00893 __asm__( \ 00894 " UMULL r0,r1,%6,%6 \n\t" \ 00895 " ADDS %0,%0,r0 \n\t" \ 00896 " ADCS %1,%1,r1 \n\t" \ 00897 " ADC %2,%2,#0 \n\t" \ 00898 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc"); 00899 00900 /* for squaring some of the terms are doubled... */ 00901 #define SQRADD2(i, j) \ 00902 __asm__( \ 00903 " UMULL r0,r1,%6,%7 \n\t" \ 00904 " ADDS %0,%0,r0 \n\t" \ 00905 " ADCS %1,%1,r1 \n\t" \ 00906 " ADC %2,%2,#0 \n\t" \ 00907 " ADDS %0,%0,r0 \n\t" \ 00908 " ADCS %1,%1,r1 \n\t" \ 00909 " ADC %2,%2,#0 \n\t" \ 00910 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); 00911 00912 #define SQRADDSC(i, j) \ 00913 __asm__( \ 00914 " UMULL %0,%1,%3,%4 \n\t" \ 00915 " SUB %2,%2,%2 \n\t" \ 00916 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc"); 00917 00918 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00919 00920 #define SQRADDAC(i, j) \ 00921 __asm__( \ 00922 " UMULL r0,r1,%6,%7 \n\t" \ 00923 " ADDS %0,%0,r0 \n\t" \ 00924 " ADCS %1,%1,r1 \n\t" \ 00925 " ADC %2,%2,#0 \n\t" \ 00926 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc"); 00927 00928 #define SQRADDDB \ 00929 __asm__( \ 00930 " ADDS %0,%0,%3 \n\t" \ 00931 " ADCS %1,%1,%4 \n\t" \ 00932 " ADC %2,%2,%5 \n\t" \ 00933 " ADDS %0,%0,%3 \n\t" \ 00934 " ADCS %1,%1,%4 \n\t" \ 00935 " ADC %2,%2,%5 \n\t" \ 00936 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 00937 00938 #elif defined(TFM_PPC32) 00939 00940 /* PPC32 */ 00941 00942 #define COMBA_START 00943 00944 #define CLEAR_CARRY \ 00945 c0 = c1 = c2 = 0; 00946 00947 #define COMBA_STORE(x) \ 00948 x = c0; 00949 00950 #define COMBA_STORE2(x) \ 00951 x = c1; 00952 00953 #define CARRY_FORWARD \ 00954 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00955 00956 #define COMBA_FINI 00957 00958 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00959 #define SQRADD(i, j) \ 00960 __asm__( \ 00961 " mullw 16,%6,%6 \n\t" \ 00962 " addc %0,%0,16 \n\t" \ 00963 " mulhwu 16,%6,%6 \n\t" \ 00964 " adde %1,%1,16 \n\t" \ 00965 " addze %2,%2 \n\t" \ 00966 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); 00967 00968 /* for squaring some of the terms are doubled... */ 00969 #define SQRADD2(i, j) \ 00970 __asm__( \ 00971 " mullw 16,%6,%7 \n\t" \ 00972 " mulhwu 17,%6,%7 \n\t" \ 00973 " addc %0,%0,16 \n\t" \ 00974 " adde %1,%1,17 \n\t" \ 00975 " addze %2,%2 \n\t" \ 00976 " addc %0,%0,16 \n\t" \ 00977 " adde %1,%1,17 \n\t" \ 00978 " addze %2,%2 \n\t" \ 00979 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); 00980 00981 #define SQRADDSC(i, j) \ 00982 __asm__( \ 00983 " mullw %0,%6,%7 \n\t" \ 00984 " mulhwu %1,%6,%7 \n\t" \ 00985 " xor %2,%2,%2 \n\t" \ 00986 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 00987 00988 #define SQRADDAC(i, j) \ 00989 __asm__( \ 00990 " mullw 16,%6,%7 \n\t" \ 00991 " addc %0,%0,16 \n\t" \ 00992 " mulhwu 16,%6,%7 \n\t" \ 00993 " adde %1,%1,16 \n\t" \ 00994 " addze %2,%2 \n\t" \ 00995 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); 00996 00997 #define SQRADDDB \ 00998 __asm__( \ 00999 " addc %0,%0,%3 \n\t" \ 01000 " adde %1,%1,%4 \n\t" \ 01001 " adde %2,%2,%5 \n\t" \ 01002 " addc %0,%0,%3 \n\t" \ 01003 " adde %1,%1,%4 \n\t" \ 01004 " adde %2,%2,%5 \n\t" \ 01005 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 01006 01007 #elif defined(TFM_PPC64) 01008 /* PPC64 */ 01009 01010 #define COMBA_START 01011 01012 #define CLEAR_CARRY \ 01013 c0 = c1 = c2 = 0; 01014 01015 #define COMBA_STORE(x) \ 01016 x = c0; 01017 01018 #define COMBA_STORE2(x) \ 01019 x = c1; 01020 01021 #define CARRY_FORWARD \ 01022 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01023 01024 #define COMBA_FINI 01025 01026 /* multiplies point i and j, updates carry "c1" and digit c2 */ 01027 #define SQRADD(i, j) \ 01028 __asm__( \ 01029 " mulld 16,%6,%6 \n\t" \ 01030 " addc %0,%0,16 \n\t" \ 01031 " mulhdu 16,%6,%6 \n\t" \ 01032 " adde %1,%1,16 \n\t" \ 01033 " addze %2,%2 \n\t" \ 01034 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); 01035 01036 /* for squaring some of the terms are doubled... */ 01037 #define SQRADD2(i, j) \ 01038 __asm__( \ 01039 " mulld 16,%6,%7 \n\t" \ 01040 " mulhdu 17,%6,%7 \n\t" \ 01041 " addc %0,%0,16 \n\t" \ 01042 " adde %1,%1,17 \n\t" \ 01043 " addze %2,%2 \n\t" \ 01044 " addc %0,%0,16 \n\t" \ 01045 " adde %1,%1,17 \n\t" \ 01046 " addze %2,%2 \n\t" \ 01047 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); 01048 01049 #define SQRADDSC(i, j) \ 01050 __asm__( \ 01051 " mulld %0,%6,%7 \n\t" \ 01052 " mulhdu %1,%6,%7 \n\t" \ 01053 " xor %2,%2,%2 \n\t" \ 01054 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 01055 01056 #define SQRADDAC(i, j) \ 01057 __asm__( \ 01058 " mulld 16,%6,%7 \n\t" \ 01059 " addc %0,%0,16 \n\t" \ 01060 " mulhdu 16,%6,%7 \n\t" \ 01061 " adde %1,%1,16 \n\t" \ 01062 " addze %2,%2 \n\t" \ 01063 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); 01064 01065 #define SQRADDDB \ 01066 __asm__( \ 01067 " addc %0,%0,%3 \n\t" \ 01068 " adde %1,%1,%4 \n\t" \ 01069 " adde %2,%2,%5 \n\t" \ 01070 " addc %0,%0,%3 \n\t" \ 01071 " adde %1,%1,%4 \n\t" \ 01072 " adde %2,%2,%5 \n\t" \ 01073 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 01074 01075 01076 #elif defined(TFM_AVR32) 01077 01078 /* AVR32 */ 01079 01080 #define COMBA_START 01081 01082 #define CLEAR_CARRY \ 01083 c0 = c1 = c2 = 0; 01084 01085 #define COMBA_STORE(x) \ 01086 x = c0; 01087 01088 #define COMBA_STORE2(x) \ 01089 x = c1; 01090 01091 #define CARRY_FORWARD \ 01092 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01093 01094 #define COMBA_FINI 01095 01096 /* multiplies point i and j, updates carry "c1" and digit c2 */ 01097 #define SQRADD(i, j) \ 01098 __asm__( \ 01099 " mulu.d r2,%6,%6 \n\t" \ 01100 " add %0,%0,r2 \n\t" \ 01101 " adc %1,%1,r3 \n\t" \ 01102 " acr %2 \n\t" \ 01103 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); 01104 01105 /* for squaring some of the terms are doubled... */ 01106 #define SQRADD2(i, j) \ 01107 __asm__( \ 01108 " mulu.d r2,%6,%7 \n\t" \ 01109 " add %0,%0,r2 \n\t" \ 01110 " adc %1,%1,r3 \n\t" \ 01111 " acr %2, \n\t" \ 01112 " add %0,%0,r2 \n\t" \ 01113 " adc %1,%1,r3 \n\t" \ 01114 " acr %2, \n\t" \ 01115 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); 01116 01117 #define SQRADDSC(i, j) \ 01118 __asm__( \ 01119 " mulu.d r2,%6,%7 \n\t" \ 01120 " mov %0,r2 \n\t" \ 01121 " mov %1,r3 \n\t" \ 01122 " eor %2,%2 \n\t" \ 01123 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); 01124 01125 #define SQRADDAC(i, j) \ 01126 __asm__( \ 01127 " mulu.d r2,%6,%7 \n\t" \ 01128 " add %0,%0,r2 \n\t" \ 01129 " adc %1,%1,r3 \n\t" \ 01130 " acr %2 \n\t" \ 01131 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); 01132 01133 #define SQRADDDB \ 01134 __asm__( \ 01135 " add %0,%0,%3 \n\t" \ 01136 " adc %1,%1,%4 \n\t" \ 01137 " adc %2,%2,%5 \n\t" \ 01138 " add %0,%0,%3 \n\t" \ 01139 " adc %1,%1,%4 \n\t" \ 01140 " adc %2,%2,%5 \n\t" \ 01141 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 01142 01143 01144 #else 01145 01146 #define TFM_ISO 01147 01148 /* ISO C portable code */ 01149 01150 #define COMBA_START 01151 01152 #define CLEAR_CARRY \ 01153 c0 = c1 = c2 = 0; 01154 01155 #define COMBA_STORE(x) \ 01156 x = c0; 01157 01158 #define COMBA_STORE2(x) \ 01159 x = c1; 01160 01161 #define CARRY_FORWARD \ 01162 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01163 01164 #define COMBA_FINI 01165 01166 /* multiplies point i and j, updates carry "c1" and digit c2 */ 01167 #define SQRADD(i, j) \ 01168 do { fp_word t; \ 01169 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \ 01170 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \ 01171 c2 +=(fp_digit) (t >> DIGIT_BIT); \ 01172 } while (0); 01173 01174 01175 /* for squaring some of the terms are doubled... */ 01176 #define SQRADD2(i, j) \ 01177 do { fp_word t; \ 01178 t = ((fp_word)i) * ((fp_word)j); \ 01179 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ 01180 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ 01181 c2 +=(fp_digit)( tt >> DIGIT_BIT); \ 01182 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ 01183 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ 01184 c2 +=(fp_digit) (tt >> DIGIT_BIT); \ 01185 } while (0); 01186 01187 #define SQRADDSC(i, j) \ 01188 do { fp_word t; \ 01189 t = ((fp_word)i) * ((fp_word)j); \ 01190 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ 01191 } while (0); 01192 01193 #define SQRADDAC(i, j) \ 01194 do { fp_word t; \ 01195 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \ 01196 t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \ 01197 sc2 += (fp_digit)(t >> DIGIT_BIT); \ 01198 } while (0); 01199 01200 #define SQRADDDB \ 01201 do { fp_word t; \ 01202 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \ 01203 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \ 01204 c1 = (fp_digit)t; \ 01205 c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \ 01206 } while (0); 01207 01208 #endif 01209 01210 #ifdef TFM_SMALL_SET 01211 #include "fp_sqr_comba_small_set.i" 01212 #endif 01213 01214 #if defined(TFM_SQR3) 01215 #include "fp_sqr_comba_3.i" 01216 #endif 01217 #if defined(TFM_SQR4) 01218 #include "fp_sqr_comba_4.i" 01219 #endif 01220 #if defined(TFM_SQR6) 01221 #include "fp_sqr_comba_6.i" 01222 #endif 01223 #if defined(TFM_SQR7) 01224 #include "fp_sqr_comba_7.i" 01225 #endif 01226 #if defined(TFM_SQR8) 01227 #include "fp_sqr_comba_8.i" 01228 #endif 01229 #if defined(TFM_SQR9) 01230 #include "fp_sqr_comba_9.i" 01231 #endif 01232 #if defined(TFM_SQR12) 01233 #include "fp_sqr_comba_12.i" 01234 #endif 01235 #if defined(TFM_SQR17) 01236 #include "fp_sqr_comba_17.i" 01237 #endif 01238 #if defined(TFM_SQR20) 01239 #include "fp_sqr_comba_20.i" 01240 #endif 01241 #if defined(TFM_SQR24) 01242 #include "fp_sqr_comba_24.i" 01243 #endif 01244 #if defined(TFM_SQR28) 01245 #include "fp_sqr_comba_28.i" 01246 #endif 01247 #if defined(TFM_SQR32) 01248 #include "fp_sqr_comba_32.i" 01249 #endif 01250 #if defined(TFM_SQR48) 01251 #include "fp_sqr_comba_48.i" 01252 #endif 01253 #if defined(TFM_SQR64) 01254 #include "fp_sqr_comba_64.i" 01255 #endif 01256 /* end fp_sqr_comba.c asm */ 01257 01258 /* start fp_mul_comba.c asm */ 01259 /* these are the combas. Worship them. */ 01260 #if defined(TFM_X86) 01261 /* Generic x86 optimized code */ 01262 01263 /* anything you need at the start */ 01264 #define COMBA_START 01265 01266 /* clear the chaining variables */ 01267 #define COMBA_CLEAR \ 01268 c0 = c1 = c2 = 0; 01269 01270 /* forward the carry to the next digit */ 01271 #define COMBA_FORWARD \ 01272 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01273 01274 /* store the first sum */ 01275 #define COMBA_STORE(x) \ 01276 x = c0; 01277 01278 /* store the second sum [carry] */ 01279 #define COMBA_STORE2(x) \ 01280 x = c1; 01281 01282 /* anything you need at the end */ 01283 #define COMBA_FINI 01284 01285 /* this should multiply i and j */ 01286 #define MULADD(i, j) \ 01287 __asm__( \ 01288 "movl %6,%%eax \n\t" \ 01289 "mull %7 \n\t" \ 01290 "addl %%eax,%0 \n\t" \ 01291 "adcl %%edx,%1 \n\t" \ 01292 "adcl $0,%2 \n\t" \ 01293 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 01294 01295 #elif defined(TFM_X86_64) 01296 /* x86-64 optimized */ 01297 01298 /* anything you need at the start */ 01299 #define COMBA_START 01300 01301 /* clear the chaining variables */ 01302 #define COMBA_CLEAR \ 01303 c0 = c1 = c2 = 0; 01304 01305 /* forward the carry to the next digit */ 01306 #define COMBA_FORWARD \ 01307 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01308 01309 /* store the first sum */ 01310 #define COMBA_STORE(x) \ 01311 x = c0; 01312 01313 /* store the second sum [carry] */ 01314 #define COMBA_STORE2(x) \ 01315 x = c1; 01316 01317 /* anything you need at the end */ 01318 #define COMBA_FINI 01319 01320 /* this should multiply i and j */ 01321 #define MULADD(i, j) \ 01322 __asm__ ( \ 01323 "movq %6,%%rax \n\t" \ 01324 "mulq %7 \n\t" \ 01325 "addq %%rax,%0 \n\t" \ 01326 "adcq %%rdx,%1 \n\t" \ 01327 "adcq $0,%2 \n\t" \ 01328 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 01329 01330 01331 #if defined(HAVE_INTEL_MULX) 01332 #define MULADD_MULX(b0, c0, c1, rdx)\ 01333 __asm__ volatile ( \ 01334 "movq %3, %%rdx\n\t" \ 01335 "mulx %2,%%r9, %%r8 \n\t" \ 01336 "adoxq %%r9,%0 \n\t" \ 01337 "adcxq %%r8,%1 \n\t" \ 01338 :"+r"(c0),"+r"(c1):"r"(b0), "r"(rdx):"%r8","%r9","%r10","%rdx"\ 01339 ) 01340 01341 01342 #define MULADD_MULX_ADD_CARRY(c0, c1)\ 01343 __asm__ volatile(\ 01344 "mov $0, %%r10\n\t"\ 01345 "movq %1, %%r8\n\t"\ 01346 "adox %%r10, %0\n\t"\ 01347 "adcx %%r10, %1\n\t"\ 01348 :"+r"(c0),"+r"(c1)::"%r8","%r9","%r10","%rdx") ; 01349 01350 #define MULADD_SET_A(a0)\ 01351 __asm__ volatile("add $0, %%r8\n\t" \ 01352 "movq %0,%%rdx\n\t" \ 01353 ::"r"(a0):"%r8","%r9","%r10","%rdx") ; 01354 01355 #define MULADD_BODY(a,b,c)\ 01356 { word64 rdx = a->dp[ix] ; \ 01357 cp = &(c->dp[iz]) ; \ 01358 c0 = cp[0] ; c1 = cp[1]; \ 01359 MULADD_SET_A(rdx) ; \ 01360 MULADD_MULX(b0, c0, c1, rdx) ;\ 01361 cp[0]=c0; c0=cp[2]; \ 01362 MULADD_MULX(b1, c1, c0, rdx) ;\ 01363 cp[1]=c1; c1=cp[3]; \ 01364 MULADD_MULX(b2, c0, c1, rdx) ;\ 01365 cp[2]=c0; c0=cp[4]; \ 01366 MULADD_MULX(b3, c1, c0, rdx) ;\ 01367 cp[3]=c1; c1=cp[5]; \ 01368 MULADD_MULX_ADD_CARRY(c0, c1);\ 01369 cp[4]=c0; cp[5]=c1; \ 01370 } 01371 01372 #define TFM_INTEL_MUL_COMBA(a, b, c)\ 01373 for(ix=0; ix<pa; ix++)c->dp[ix]=0 ; \ 01374 for(iy=0; (iy<b->used); iy+=4) { \ 01375 fp_digit *bp ; \ 01376 bp = &(b->dp[iy+0]) ; \ 01377 fp_digit b0 = bp[0] , b1= bp[1], \ 01378 b2= bp[2], b3= bp[3]; \ 01379 ix=0, iz=iy; \ 01380 while(ix<a->used) { \ 01381 fp_digit c0, c1; \ 01382 fp_digit *cp ; \ 01383 MULADD_BODY(a,b,c); \ 01384 ix++ ; iz++ ; \ 01385 } \ 01386 }; 01387 #endif 01388 01389 #elif defined(TFM_SSE2) 01390 /* use SSE2 optimizations */ 01391 01392 /* anything you need at the start */ 01393 #define COMBA_START 01394 01395 /* clear the chaining variables */ 01396 #define COMBA_CLEAR \ 01397 c0 = c1 = c2 = 0; 01398 01399 /* forward the carry to the next digit */ 01400 #define COMBA_FORWARD \ 01401 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01402 01403 /* store the first sum */ 01404 #define COMBA_STORE(x) \ 01405 x = c0; 01406 01407 /* store the second sum [carry] */ 01408 #define COMBA_STORE2(x) \ 01409 x = c1; 01410 01411 /* anything you need at the end */ 01412 #define COMBA_FINI \ 01413 __asm__("emms"); 01414 01415 /* this should multiply i and j */ 01416 #define MULADD(i, j) \ 01417 __asm__( \ 01418 "movd %6,%%mm0 \n\t" \ 01419 "movd %7,%%mm1 \n\t" \ 01420 "pmuludq %%mm1,%%mm0\n\t" \ 01421 "movd %%mm0,%%eax \n\t" \ 01422 "psrlq $32,%%mm0 \n\t" \ 01423 "addl %%eax,%0 \n\t" \ 01424 "movd %%mm0,%%eax \n\t" \ 01425 "adcl %%eax,%1 \n\t" \ 01426 "adcl $0,%2 \n\t" \ 01427 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc"); 01428 01429 #elif defined(TFM_ARM) 01430 /* ARM code */ 01431 01432 #define COMBA_START 01433 01434 #define COMBA_CLEAR \ 01435 c0 = c1 = c2 = 0; 01436 01437 #define COMBA_FORWARD \ 01438 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01439 01440 #define COMBA_STORE(x) \ 01441 x = c0; 01442 01443 #define COMBA_STORE2(x) \ 01444 x = c1; 01445 01446 #define COMBA_FINI 01447 01448 #define MULADD(i, j) \ 01449 __asm__( \ 01450 " UMULL r0,r1,%6,%7 \n\t" \ 01451 " ADDS %0,%0,r0 \n\t" \ 01452 " ADCS %1,%1,r1 \n\t" \ 01453 " ADC %2,%2,#0 \n\t" \ 01454 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); 01455 01456 #elif defined(TFM_PPC32) 01457 /* For 32-bit PPC */ 01458 01459 #define COMBA_START 01460 01461 #define COMBA_CLEAR \ 01462 c0 = c1 = c2 = 0; 01463 01464 #define COMBA_FORWARD \ 01465 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01466 01467 #define COMBA_STORE(x) \ 01468 x = c0; 01469 01470 #define COMBA_STORE2(x) \ 01471 x = c1; 01472 01473 #define COMBA_FINI 01474 01475 /* untested: will mulhwu change the flags? Docs say no */ 01476 #define MULADD(i, j) \ 01477 __asm__( \ 01478 " mullw 16,%6,%7 \n\t" \ 01479 " addc %0,%0,16 \n\t" \ 01480 " mulhwu 16,%6,%7 \n\t" \ 01481 " adde %1,%1,16 \n\t" \ 01482 " addze %2,%2 \n\t" \ 01483 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 01484 01485 #elif defined(TFM_PPC64) 01486 /* For 64-bit PPC */ 01487 01488 #define COMBA_START 01489 01490 #define COMBA_CLEAR \ 01491 c0 = c1 = c2 = 0; 01492 01493 #define COMBA_FORWARD \ 01494 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01495 01496 #define COMBA_STORE(x) \ 01497 x = c0; 01498 01499 #define COMBA_STORE2(x) \ 01500 x = c1; 01501 01502 #define COMBA_FINI 01503 01504 /* untested: will mulhwu change the flags? Docs say no */ 01505 #define MULADD(i, j) \ 01506 ____asm__( \ 01507 " mulld 16,%6,%7 \n\t" \ 01508 " addc %0,%0,16 \n\t" \ 01509 " mulhdu 16,%6,%7 \n\t" \ 01510 " adde %1,%1,16 \n\t" \ 01511 " addze %2,%2 \n\t" \ 01512 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 01513 01514 #elif defined(TFM_AVR32) 01515 01516 /* ISO C code */ 01517 01518 #define COMBA_START 01519 01520 #define COMBA_CLEAR \ 01521 c0 = c1 = c2 = 0; 01522 01523 #define COMBA_FORWARD \ 01524 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01525 01526 #define COMBA_STORE(x) \ 01527 x = c0; 01528 01529 #define COMBA_STORE2(x) \ 01530 x = c1; 01531 01532 #define COMBA_FINI 01533 01534 #define MULADD(i, j) \ 01535 ____asm__( \ 01536 " mulu.d r2,%6,%7 \n\t"\ 01537 " add %0,r2 \n\t"\ 01538 " adc %1,%1,r3 \n\t"\ 01539 " acr %2 \n\t"\ 01540 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); 01541 01542 #else 01543 /* ISO C code */ 01544 01545 #define COMBA_START 01546 01547 #define COMBA_CLEAR \ 01548 c0 = c1 = c2 = 0; 01549 01550 #define COMBA_FORWARD \ 01551 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01552 01553 #define COMBA_STORE(x) \ 01554 x = c0; 01555 01556 #define COMBA_STORE2(x) \ 01557 x = c1; 01558 01559 #define COMBA_FINI 01560 01561 #define MULADD(i, j) \ 01562 do { fp_word t; \ 01563 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \ 01564 t = (fp_word)c1 + (t >> DIGIT_BIT); \ 01565 c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \ 01566 } while (0); 01567 01568 #endif 01569 01570 01571 #ifdef TFM_SMALL_SET 01572 #include "fp_mul_comba_small_set.i" 01573 #endif 01574 01575 #if defined(TFM_MUL3) 01576 #include "fp_mul_comba_3.i" 01577 #endif 01578 #if defined(TFM_MUL4) 01579 #include "fp_mul_comba_4.i" 01580 #endif 01581 #if defined(TFM_MUL6) 01582 #include "fp_mul_comba_6.i" 01583 #endif 01584 #if defined(TFM_MUL7) 01585 #include "fp_mul_comba_7.i" 01586 #endif 01587 #if defined(TFM_MUL8) 01588 #include "fp_mul_comba_8.i" 01589 #endif 01590 #if defined(TFM_MUL9) 01591 #include "fp_mul_comba_9.i" 01592 #endif 01593 #if defined(TFM_MUL12) 01594 #include "fp_mul_comba_12.i" 01595 #endif 01596 #if defined(TFM_MUL17) 01597 #include "fp_mul_comba_17.i" 01598 #endif 01599 #if defined(TFM_MUL20) 01600 #include "fp_mul_comba_20.i" 01601 #endif 01602 #if defined(TFM_MUL24) 01603 #include "fp_mul_comba_24.i" 01604 #endif 01605 #if defined(TFM_MUL28) 01606 #include "fp_mul_comba_28.i" 01607 #endif 01608 #if defined(TFM_MUL32) 01609 #include "fp_mul_comba_32.i" 01610 #endif 01611 #if defined(TFM_MUL48) 01612 #include "fp_mul_comba_48.i" 01613 #endif 01614 #if defined(TFM_MUL64) 01615 #include "fp_mul_comba_64.i" 01616 #endif 01617 01618 /* end fp_mul_comba.c asm */ 01619 01620
Generated on Tue Jul 12 2022 15:55:17 by
1.7.2