Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of wolfSSL by
asm.c
00001 /* asm.c 00002 * 00003 * Copyright (C) 2006-2016 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 #ifdef HAVE_CONFIG_H 00024 #include <config.h> 00025 #endif 00026 00027 #include <wolfssl/wolfcrypt/settings.h> 00028 00029 /* 00030 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca, 00031 * http://math.libtomcrypt.com 00032 */ 00033 00034 00035 /******************************************************************/ 00036 /* fp_montgomery_reduce.c asm or generic */ 00037 00038 00039 /* Each platform needs to query info type 1 from cpuid to see if aesni is 00040 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts 00041 */ 00042 00043 #if defined(HAVE_INTEL_MULX) 00044 #ifndef _MSC_VER 00045 #define cpuid(reg, leaf, sub)\ 00046 __asm__ __volatile__ ("cpuid":\ 00047 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\ 00048 "a" (leaf), "c"(sub)); 00049 00050 #define XASM_LINK(f) asm(f) 00051 #else 00052 00053 #include <intrin.h> 00054 #define cpuid(a,b) __cpuid((int*)a,b) 00055 00056 #define XASM_LINK(f) 00057 00058 #endif /* _MSC_VER */ 00059 00060 #define EAX 0 00061 #define EBX 1 00062 #define ECX 2 00063 #define EDX 3 00064 00065 #define CPUID_AVX1 0x1 00066 #define CPUID_AVX2 0x2 00067 #define CPUID_RDRAND 0x4 00068 #define CPUID_RDSEED 0x8 00069 #define CPUID_BMI2 0x10 /* MULX, RORX */ 00070 #define CPUID_ADX 0x20 /* ADCX, ADOX */ 00071 00072 #define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1) 00073 #define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2) 00074 #define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2) 00075 #define IS_INTEL_ADX (cpuid_flags&CPUID_ADX) 00076 #define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND) 00077 #define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED) 00078 #define SET_FLAGS 00079 00080 static word32 cpuid_check = 0 ; 00081 static word32 cpuid_flags = 0 ; 00082 00083 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { 00084 int got_intel_cpu=0; 00085 unsigned int reg[5]; 00086 00087 reg[4] = '\0' ; 00088 cpuid(reg, 0, 0); 00089 if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 && 00090 memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 && 00091 memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) { 00092 got_intel_cpu = 1; 00093 } 00094 if (got_intel_cpu) { 00095 cpuid(reg, leaf, sub); 00096 return((reg[num]>>bit)&0x1) ; 00097 } 00098 return 0 ; 00099 } 00100 00101 INLINE static int set_cpuid_flags(void) { 00102 if(cpuid_check == 0) { 00103 if(cpuid_flag(7, 0, EBX, 8)){ cpuid_flags |= CPUID_BMI2 ; } 00104 if(cpuid_flag(7, 0, EBX,19)){ cpuid_flags |= CPUID_ADX ; } 00105 cpuid_check = 1 ; 00106 return 0 ; 00107 } 00108 return 1 ; 00109 } 00110 00111 #define RETURN return 00112 #define IF_HAVE_INTEL_MULX(func, ret) \ 00113 if(cpuid_check==0)set_cpuid_flags() ; \ 00114 if(IS_INTEL_BMI2 && IS_INTEL_ADX){ func; ret ; } 00115 00116 #else 00117 #define IF_HAVE_INTEL_MULX(func, ret) 00118 #endif 00119 00120 #if defined(TFM_X86) && !defined(TFM_SSE2) 00121 /* x86-32 code */ 00122 00123 #define MONT_START 00124 #define MONT_FINI 00125 #define LOOP_END 00126 #define LOOP_START \ 00127 mu = c[x] * mp 00128 00129 #define INNERMUL \ 00130 __asm__( \ 00131 "movl %5,%%eax \n\t" \ 00132 "mull %4 \n\t" \ 00133 "addl %1,%%eax \n\t" \ 00134 "adcl $0,%%edx \n\t" \ 00135 "addl %%eax,%0 \n\t" \ 00136 "adcl $0,%%edx \n\t" \ 00137 "movl %%edx,%1 \n\t" \ 00138 :"=g"(_c[LO]), "=r"(cy) \ 00139 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ 00140 : "%eax", "%edx", "cc") 00141 00142 #define PROPCARRY \ 00143 __asm__( \ 00144 "addl %1,%0 \n\t" \ 00145 "setb %%al \n\t" \ 00146 "movzbl %%al,%1 \n\t" \ 00147 :"=g"(_c[LO]), "=r"(cy) \ 00148 :"0"(_c[LO]), "1"(cy) \ 00149 : "%eax", "cc") 00150 00151 /******************************************************************/ 00152 #elif defined(TFM_X86_64) 00153 /* x86-64 code */ 00154 00155 #define MONT_START 00156 #define MONT_FINI 00157 #define LOOP_END 00158 #define LOOP_START \ 00159 mu = c[x] * mp; 00160 00161 #define INNERMUL \ 00162 __asm__( \ 00163 "movq %5,%%rax \n\t" \ 00164 "mulq %4 \n\t" \ 00165 "addq %1,%%rax \n\t" \ 00166 "adcq $0,%%rdx \n\t" \ 00167 "addq %%rax,%0 \n\t" \ 00168 "adcq $0,%%rdx \n\t" \ 00169 "movq %%rdx,%1 \n\t" \ 00170 :"=g"(_c[LO]), "=r"(cy) \ 00171 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ 00172 : "%rax", "%rdx", "cc") 00173 00174 #if defined(HAVE_INTEL_MULX) 00175 #define MULX_INIT(a0, c0, cy)\ 00176 __asm__ volatile( \ 00177 "xorq %%r10, %%r10\n\t" \ 00178 "movq %1,%%rdx\n\t" \ 00179 "addq %2, %0\n\t" /* c0+=cy; Set CF, OF */ \ 00180 "adoxq %%r10, %%r10\n\t" /* Reset OF */ \ 00181 :"+m"(c0):"r"(a0),"r"(cy):"%r8","%r9", "%r10","%r11","%r12","%rdx") ; \ 00182 00183 #define MULX_INNERMUL_R1(c0, c1, pre, rdx)\ 00184 { \ 00185 __asm__ volatile ( \ 00186 "movq %3, %%rdx\n\t" \ 00187 "mulx %%r11,%%r9, %%r8 \n\t" \ 00188 "movq %2, %%r12\n\t" \ 00189 "adoxq %%r9,%0 \n\t" \ 00190 "adcxq %%r8,%1 \n\t" \ 00191 :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \ 00192 ); } 00193 00194 00195 #define MULX_INNERMUL_R2(c0, c1, pre, rdx)\ 00196 { \ 00197 __asm__ volatile ( \ 00198 "movq %3, %%rdx\n\t" \ 00199 "mulx %%r12,%%r9, %%r8 \n\t" \ 00200 "movq %2, %%r11\n\t" \ 00201 "adoxq %%r9,%0 \n\t" \ 00202 "adcxq %%r8,%1 \n\t" \ 00203 :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \ 00204 ); } 00205 00206 #define MULX_LOAD_R1(val)\ 00207 __asm__ volatile ( \ 00208 "movq %0, %%r11\n\t"\ 00209 ::"m"(val):"%r8","%r9", "%r10", "%r11","%r12","%rdx"\ 00210 ) ; 00211 00212 #define MULX_INNERMUL_LAST(c0, c1, rdx)\ 00213 { \ 00214 __asm__ volatile ( \ 00215 "movq %2, %%rdx\n\t" \ 00216 "mulx %%r12,%%r9, %%r8 \n\t" \ 00217 "movq $0, %%r10 \n\t" \ 00218 "adoxq %%r10, %%r9 \n\t" \ 00219 "adcq $0,%%r8 \n\t" \ 00220 "addq %%r9,%0 \n\t" \ 00221 "adcq $0,%%r8 \n\t" \ 00222 "movq %%r8,%1 \n\t" \ 00223 :"+m"(c0),"=m"(c1):"r"(rdx):"%r8","%r9","%r10", "%r11", "%r12","%rdx"\ 00224 ); } 00225 00226 #define MULX_INNERMUL8(x,y,z,cy)\ 00227 { word64 rdx = y ;\ 00228 MULX_LOAD_R1(x[0]) ;\ 00229 MULX_INIT(y, _c0, cy) ; /* rdx=y; z0+=cy; */ \ 00230 MULX_INNERMUL_R1(_c0, _c1, x[1], rdx) ;\ 00231 MULX_INNERMUL_R2(_c1, _c2, x[2], rdx) ;\ 00232 MULX_INNERMUL_R1(_c2, _c3, x[3], rdx) ;\ 00233 MULX_INNERMUL_R2(_c3, _c4, x[4], rdx) ;\ 00234 MULX_INNERMUL_R1(_c4, _c5, x[5], rdx) ;\ 00235 MULX_INNERMUL_R2(_c5, _c6, x[6], rdx) ;\ 00236 MULX_INNERMUL_R1(_c6, _c7, x[7], rdx) ;\ 00237 MULX_INNERMUL_LAST(_c7, cy, rdx) ;\ 00238 } 00239 #define INNERMUL8_MULX \ 00240 {\ 00241 MULX_INNERMUL8(tmpm, mu, _c, cy);\ 00242 } 00243 #endif 00244 00245 #define INNERMUL8 \ 00246 __asm__( \ 00247 "movq 0(%5),%%rax \n\t" \ 00248 "movq 0(%2),%%r10 \n\t" \ 00249 "movq 0x8(%5),%%r11 \n\t" \ 00250 "mulq %4 \n\t" \ 00251 "addq %%r10,%%rax \n\t" \ 00252 "adcq $0,%%rdx \n\t" \ 00253 "movq 0x8(%2),%%r10 \n\t" \ 00254 "addq %3,%%rax \n\t" \ 00255 "adcq $0,%%rdx \n\t" \ 00256 "movq %%rax,0(%0) \n\t" \ 00257 "movq %%rdx,%1 \n\t" \ 00258 \ 00259 "movq %%r11,%%rax \n\t" \ 00260 "movq 0x10(%5),%%r11 \n\t" \ 00261 "mulq %4 \n\t" \ 00262 "addq %%r10,%%rax \n\t" \ 00263 "adcq $0,%%rdx \n\t" \ 00264 "movq 0x10(%2),%%r10 \n\t" \ 00265 "addq %3,%%rax \n\t" \ 00266 "adcq $0,%%rdx \n\t" \ 00267 "movq %%rax,0x8(%0) \n\t" \ 00268 "movq %%rdx,%1 \n\t" \ 00269 \ 00270 "movq %%r11,%%rax \n\t" \ 00271 "movq 0x18(%5),%%r11 \n\t" \ 00272 "mulq %4 \n\t" \ 00273 "addq %%r10,%%rax \n\t" \ 00274 "adcq $0,%%rdx \n\t" \ 00275 "movq 0x18(%2),%%r10 \n\t" \ 00276 "addq %3,%%rax \n\t" \ 00277 "adcq $0,%%rdx \n\t" \ 00278 "movq %%rax,0x10(%0) \n\t" \ 00279 "movq %%rdx,%1 \n\t" \ 00280 \ 00281 "movq %%r11,%%rax \n\t" \ 00282 "movq 0x20(%5),%%r11 \n\t" \ 00283 "mulq %4 \n\t" \ 00284 "addq %%r10,%%rax \n\t" \ 00285 "adcq $0,%%rdx \n\t" \ 00286 "movq 0x20(%2),%%r10 \n\t" \ 00287 "addq %3,%%rax \n\t" \ 00288 "adcq $0,%%rdx \n\t" \ 00289 "movq %%rax,0x18(%0) \n\t" \ 00290 "movq %%rdx,%1 \n\t" \ 00291 \ 00292 "movq %%r11,%%rax \n\t" \ 00293 "movq 0x28(%5),%%r11 \n\t" \ 00294 "mulq %4 \n\t" \ 00295 "addq %%r10,%%rax \n\t" \ 00296 "adcq $0,%%rdx \n\t" \ 00297 "movq 0x28(%2),%%r10 \n\t" \ 00298 "addq %3,%%rax \n\t" \ 00299 "adcq $0,%%rdx \n\t" \ 00300 "movq %%rax,0x20(%0) \n\t" \ 00301 "movq %%rdx,%1 \n\t" \ 00302 \ 00303 "movq %%r11,%%rax \n\t" \ 00304 "movq 0x30(%5),%%r11 \n\t" \ 00305 "mulq %4 \n\t" \ 00306 "addq %%r10,%%rax \n\t" \ 00307 "adcq $0,%%rdx \n\t" \ 00308 "movq 0x30(%2),%%r10 \n\t" \ 00309 "addq %3,%%rax \n\t" \ 00310 "adcq $0,%%rdx \n\t" \ 00311 "movq %%rax,0x28(%0) \n\t" \ 00312 "movq %%rdx,%1 \n\t" \ 00313 \ 00314 "movq %%r11,%%rax \n\t" \ 00315 "movq 0x38(%5),%%r11 \n\t" \ 00316 "mulq %4 \n\t" \ 00317 "addq %%r10,%%rax \n\t" \ 00318 "adcq $0,%%rdx \n\t" \ 00319 "movq 0x38(%2),%%r10 \n\t" \ 00320 "addq %3,%%rax \n\t" \ 00321 "adcq $0,%%rdx \n\t" \ 00322 "movq %%rax,0x30(%0) \n\t" \ 00323 "movq %%rdx,%1 \n\t" \ 00324 \ 00325 "movq %%r11,%%rax \n\t" \ 00326 "mulq %4 \n\t" \ 00327 "addq %%r10,%%rax \n\t" \ 00328 "adcq $0,%%rdx \n\t" \ 00329 "addq %3,%%rax \n\t" \ 00330 "adcq $0,%%rdx \n\t" \ 00331 "movq %%rax,0x38(%0) \n\t" \ 00332 "movq %%rdx,%1 \n\t" \ 00333 \ 00334 :"=r"(_c), "=r"(cy) \ 00335 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ 00336 : "%rax", "%rdx", "%r10", "%r11", "cc")\ 00337 00338 #define PROPCARRY \ 00339 __asm__( \ 00340 "addq %1,%0 \n\t" \ 00341 "setb %%al \n\t" \ 00342 "movzbq %%al,%1 \n\t" \ 00343 :"=g"(_c[LO]), "=r"(cy) \ 00344 :"0"(_c[LO]), "1"(cy) \ 00345 : "%rax", "cc") 00346 00347 /******************************************************************/ 00348 #elif defined(TFM_SSE2) 00349 /* SSE2 code (assumes 32-bit fp_digits) */ 00350 /* XMM register assignments: 00351 * xmm0 *tmpm++, then Mu * (*tmpm++) 00352 * xmm1 c[x], then Mu 00353 * xmm2 mp 00354 * xmm3 cy 00355 * xmm4 _c[LO] 00356 */ 00357 00358 #define MONT_START \ 00359 __asm__("movd %0,%%mm2"::"g"(mp)) 00360 00361 #define MONT_FINI \ 00362 __asm__("emms") 00363 00364 #define LOOP_START \ 00365 __asm__( \ 00366 "movd %0,%%mm1 \n\t" \ 00367 "pxor %%mm3,%%mm3 \n\t" \ 00368 "pmuludq %%mm2,%%mm1 \n\t" \ 00369 :: "g"(c[x])) 00370 00371 /* pmuludq on mmx registers does a 32x32->64 multiply. */ 00372 #define INNERMUL \ 00373 __asm__( \ 00374 "movd %1,%%mm4 \n\t" \ 00375 "movd %2,%%mm0 \n\t" \ 00376 "paddq %%mm4,%%mm3 \n\t" \ 00377 "pmuludq %%mm1,%%mm0 \n\t" \ 00378 "paddq %%mm0,%%mm3 \n\t" \ 00379 "movd %%mm3,%0 \n\t" \ 00380 "psrlq $32, %%mm3 \n\t" \ 00381 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) ); 00382 00383 #define INNERMUL8 \ 00384 __asm__( \ 00385 "movd 0(%1),%%mm4 \n\t" \ 00386 "movd 0(%2),%%mm0 \n\t" \ 00387 "paddq %%mm4,%%mm3 \n\t" \ 00388 "pmuludq %%mm1,%%mm0 \n\t" \ 00389 "movd 4(%2),%%mm5 \n\t" \ 00390 "paddq %%mm0,%%mm3 \n\t" \ 00391 "movd 4(%1),%%mm6 \n\t" \ 00392 "movd %%mm3,0(%0) \n\t" \ 00393 "psrlq $32, %%mm3 \n\t" \ 00394 \ 00395 "paddq %%mm6,%%mm3 \n\t" \ 00396 "pmuludq %%mm1,%%mm5 \n\t" \ 00397 "movd 8(%2),%%mm6 \n\t" \ 00398 "paddq %%mm5,%%mm3 \n\t" \ 00399 "movd 8(%1),%%mm7 \n\t" \ 00400 "movd %%mm3,4(%0) \n\t" \ 00401 "psrlq $32, %%mm3 \n\t" \ 00402 \ 00403 "paddq %%mm7,%%mm3 \n\t" \ 00404 "pmuludq %%mm1,%%mm6 \n\t" \ 00405 "movd 12(%2),%%mm7 \n\t" \ 00406 "paddq %%mm6,%%mm3 \n\t" \ 00407 "movd 12(%1),%%mm5 \n\t" \ 00408 "movd %%mm3,8(%0) \n\t" \ 00409 "psrlq $32, %%mm3 \n\t" \ 00410 \ 00411 "paddq %%mm5,%%mm3 \n\t" \ 00412 "pmuludq %%mm1,%%mm7 \n\t" \ 00413 "movd 16(%2),%%mm5 \n\t" \ 00414 "paddq %%mm7,%%mm3 \n\t" \ 00415 "movd 16(%1),%%mm6 \n\t" \ 00416 "movd %%mm3,12(%0) \n\t" \ 00417 "psrlq $32, %%mm3 \n\t" \ 00418 \ 00419 "paddq %%mm6,%%mm3 \n\t" \ 00420 "pmuludq %%mm1,%%mm5 \n\t" \ 00421 "movd 20(%2),%%mm6 \n\t" \ 00422 "paddq %%mm5,%%mm3 \n\t" \ 00423 "movd 20(%1),%%mm7 \n\t" \ 00424 "movd %%mm3,16(%0) \n\t" \ 00425 "psrlq $32, %%mm3 \n\t" \ 00426 \ 00427 "paddq %%mm7,%%mm3 \n\t" \ 00428 "pmuludq %%mm1,%%mm6 \n\t" \ 00429 "movd 24(%2),%%mm7 \n\t" \ 00430 "paddq %%mm6,%%mm3 \n\t" \ 00431 "movd 24(%1),%%mm5 \n\t" \ 00432 "movd %%mm3,20(%0) \n\t" \ 00433 "psrlq $32, %%mm3 \n\t" \ 00434 \ 00435 "paddq %%mm5,%%mm3 \n\t" \ 00436 "pmuludq %%mm1,%%mm7 \n\t" \ 00437 "movd 28(%2),%%mm5 \n\t" \ 00438 "paddq %%mm7,%%mm3 \n\t" \ 00439 "movd 28(%1),%%mm6 \n\t" \ 00440 "movd %%mm3,24(%0) \n\t" \ 00441 "psrlq $32, %%mm3 \n\t" \ 00442 \ 00443 "paddq %%mm6,%%mm3 \n\t" \ 00444 "pmuludq %%mm1,%%mm5 \n\t" \ 00445 "paddq %%mm5,%%mm3 \n\t" \ 00446 "movd %%mm3,28(%0) \n\t" \ 00447 "psrlq $32, %%mm3 \n\t" \ 00448 :"=r"(_c) : "0"(_c), "r"(tmpm) ); 00449 00450 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack 00451 pointer */ 00452 00453 #define LOOP_END \ 00454 __asm__( "movd %%mm3,%0 \n" :"=r"(cy)) 00455 00456 #define PROPCARRY \ 00457 __asm__( \ 00458 "addl %1,%0 \n\t" \ 00459 "setb %%al \n\t" \ 00460 "movzbl %%al,%1 \n\t" \ 00461 :"=g"(_c[LO]), "=r"(cy) \ 00462 :"0"(_c[LO]), "1"(cy) \ 00463 : "%eax", "cc") 00464 00465 /******************************************************************/ 00466 #elif defined(TFM_ARM) 00467 /* ARMv4 code */ 00468 00469 #define MONT_START 00470 #define MONT_FINI 00471 #define LOOP_END 00472 #define LOOP_START \ 00473 mu = c[x] * mp 00474 00475 00476 #ifdef __thumb__ 00477 00478 #define INNERMUL \ 00479 __asm__( \ 00480 " LDR r0,%1 \n\t" \ 00481 " ADDS r0,r0,%0 \n\t" \ 00482 " ITE CS \n\t" \ 00483 " MOVCS %0,#1 \n\t" \ 00484 " MOVCC %0,#0 \n\t" \ 00485 " UMLAL r0,%0,%3,%4 \n\t" \ 00486 " STR r0,%1 \n\t" \ 00487 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc"); 00488 00489 #define PROPCARRY \ 00490 __asm__( \ 00491 " LDR r0,%1 \n\t" \ 00492 " ADDS r0,r0,%0 \n\t" \ 00493 " STR r0,%1 \n\t" \ 00494 " ITE CS \n\t" \ 00495 " MOVCS %0,#1 \n\t" \ 00496 " MOVCC %0,#0 \n\t" \ 00497 :"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc"); 00498 00499 00500 /* TAO thumb mode uses ite (if then else) to detect carry directly 00501 * fixed unmatched constraint warning by changing 1 to m */ 00502 00503 #else /* __thumb__ */ 00504 00505 #define INNERMUL \ 00506 __asm__( \ 00507 " LDR r0,%1 \n\t" \ 00508 " ADDS r0,r0,%0 \n\t" \ 00509 " MOVCS %0,#1 \n\t" \ 00510 " MOVCC %0,#0 \n\t" \ 00511 " UMLAL r0,%0,%3,%4 \n\t" \ 00512 " STR r0,%1 \n\t" \ 00513 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc"); 00514 00515 #define PROPCARRY \ 00516 __asm__( \ 00517 " LDR r0,%1 \n\t" \ 00518 " ADDS r0,r0,%0 \n\t" \ 00519 " STR r0,%1 \n\t" \ 00520 " MOVCS %0,#1 \n\t" \ 00521 " MOVCC %0,#0 \n\t" \ 00522 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc"); 00523 00524 #endif /* __thumb__ */ 00525 00526 #elif defined(TFM_PPC32) 00527 00528 /* PPC32 */ 00529 #define MONT_START 00530 #define MONT_FINI 00531 #define LOOP_END 00532 #define LOOP_START \ 00533 mu = c[x] * mp 00534 00535 #define INNERMUL \ 00536 __asm__( \ 00537 " mullw 16,%3,%4 \n\t" \ 00538 " mulhwu 17,%3,%4 \n\t" \ 00539 " addc 16,16,%2 \n\t" \ 00540 " addze 17,17 \n\t" \ 00541 " addc %1,16,%5 \n\t" \ 00542 " addze %0,17 \n\t" \ 00543 :"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "cc"); ++tmpm; 00544 00545 #define PROPCARRY \ 00546 __asm__( \ 00547 " addc %1,%3,%2 \n\t" \ 00548 " xor %0,%2,%2 \n\t" \ 00549 " addze %0,%2 \n\t" \ 00550 :"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc"); 00551 00552 #elif defined(TFM_PPC64) 00553 00554 /* PPC64 */ 00555 #define MONT_START 00556 #define MONT_FINI 00557 #define LOOP_END 00558 #define LOOP_START \ 00559 mu = c[x] * mp 00560 00561 #define INNERMUL \ 00562 __asm__( \ 00563 " mulld 16,%3,%4 \n\t" \ 00564 " mulhdu 17,%3,%4 \n\t" \ 00565 " addc 16,16,%0 \n\t" \ 00566 " addze 17,17 \n\t" \ 00567 " ldx 18,0,%1 \n\t" \ 00568 " addc 16,16,18 \n\t" \ 00569 " addze %0,17 \n\t" \ 00570 " sdx 16,0,%1 \n\t" \ 00571 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm; 00572 00573 #define PROPCARRY \ 00574 __asm__( \ 00575 " ldx 16,0,%1 \n\t" \ 00576 " addc 16,16,%0 \n\t" \ 00577 " sdx 16,0,%1 \n\t" \ 00578 " xor %0,%0,%0 \n\t" \ 00579 " addze %0,%0 \n\t" \ 00580 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc"); 00581 00582 /******************************************************************/ 00583 00584 #elif defined(TFM_AVR32) 00585 00586 /* AVR32 */ 00587 #define MONT_START 00588 #define MONT_FINI 00589 #define LOOP_END 00590 #define LOOP_START \ 00591 mu = c[x] * mp 00592 00593 #define INNERMUL \ 00594 __asm__( \ 00595 " ld.w r2,%1 \n\t" \ 00596 " add r2,%0 \n\t" \ 00597 " eor r3,r3 \n\t" \ 00598 " acr r3 \n\t" \ 00599 " macu.d r2,%3,%4 \n\t" \ 00600 " st.w %1,r2 \n\t" \ 00601 " mov %0,r3 \n\t" \ 00602 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3"); 00603 00604 #define PROPCARRY \ 00605 __asm__( \ 00606 " ld.w r2,%1 \n\t" \ 00607 " add r2,%0 \n\t" \ 00608 " st.w %1,r2 \n\t" \ 00609 " eor %0,%0 \n\t" \ 00610 " acr %0 \n\t" \ 00611 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc"); 00612 00613 #else 00614 00615 /* ISO C code */ 00616 #define MONT_START 00617 #define MONT_FINI 00618 #define LOOP_END 00619 #define LOOP_START \ 00620 mu = c[x] * mp 00621 00622 #define INNERMUL \ 00623 do { fp_word t; \ 00624 t = ((fp_word)_c[0] + (fp_word)cy) + \ 00625 (((fp_word)mu) * ((fp_word)*tmpm++)); \ 00626 _c[0] = (fp_digit)t; \ 00627 cy = (fp_digit)(t >> DIGIT_BIT); \ 00628 } while (0) 00629 00630 #define PROPCARRY \ 00631 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0) 00632 00633 #endif 00634 /******************************************************************/ 00635 00636 00637 #define LO 0 00638 /* end fp_montogomery_reduce.c asm */ 00639 00640 00641 /* start fp_sqr_comba.c asm */ 00642 #if defined(TFM_X86) 00643 00644 /* x86-32 optimized */ 00645 00646 #define COMBA_START 00647 00648 #define CLEAR_CARRY \ 00649 c0 = c1 = c2 = 0; 00650 00651 #define COMBA_STORE(x) \ 00652 x = c0; 00653 00654 #define COMBA_STORE2(x) \ 00655 x = c1; 00656 00657 #define CARRY_FORWARD \ 00658 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00659 00660 #define COMBA_FINI 00661 00662 #define SQRADD(i, j) \ 00663 __asm__( \ 00664 "movl %6,%%eax \n\t" \ 00665 "mull %%eax \n\t" \ 00666 "addl %%eax,%0 \n\t" \ 00667 "adcl %%edx,%1 \n\t" \ 00668 "adcl $0,%2 \n\t" \ 00669 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc"); 00670 00671 #define SQRADD2(i, j) \ 00672 __asm__( \ 00673 "movl %6,%%eax \n\t" \ 00674 "mull %7 \n\t" \ 00675 "addl %%eax,%0 \n\t" \ 00676 "adcl %%edx,%1 \n\t" \ 00677 "adcl $0,%2 \n\t" \ 00678 "addl %%eax,%0 \n\t" \ 00679 "adcl %%edx,%1 \n\t" \ 00680 "adcl $0,%2 \n\t" \ 00681 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc"); 00682 00683 #define SQRADDSC(i, j) \ 00684 __asm__( \ 00685 "movl %3,%%eax \n\t" \ 00686 "mull %4 \n\t" \ 00687 "movl %%eax,%0 \n\t" \ 00688 "movl %%edx,%1 \n\t" \ 00689 "xorl %2,%2 \n\t" \ 00690 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc"); 00691 00692 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00693 00694 #define SQRADDAC(i, j) \ 00695 __asm__( \ 00696 "movl %6,%%eax \n\t" \ 00697 "mull %7 \n\t" \ 00698 "addl %%eax,%0 \n\t" \ 00699 "adcl %%edx,%1 \n\t" \ 00700 "adcl $0,%2 \n\t" \ 00701 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc"); 00702 00703 #define SQRADDDB \ 00704 __asm__( \ 00705 "addl %6,%0 \n\t" \ 00706 "adcl %7,%1 \n\t" \ 00707 "adcl %8,%2 \n\t" \ 00708 "addl %6,%0 \n\t" \ 00709 "adcl %7,%1 \n\t" \ 00710 "adcl %8,%2 \n\t" \ 00711 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00712 00713 #elif defined(TFM_X86_64) 00714 /* x86-64 optimized */ 00715 00716 #define COMBA_START 00717 00718 #define CLEAR_CARRY \ 00719 c0 = c1 = c2 = 0; 00720 00721 #define COMBA_STORE(x) \ 00722 x = c0; 00723 00724 #define COMBA_STORE2(x) \ 00725 x = c1; 00726 00727 #define CARRY_FORWARD \ 00728 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00729 00730 #define COMBA_FINI 00731 00732 #define SQRADD(i, j) \ 00733 __asm__( \ 00734 "movq %6,%%rax \n\t" \ 00735 "mulq %%rax \n\t" \ 00736 "addq %%rax,%0 \n\t" \ 00737 "adcq %%rdx,%1 \n\t" \ 00738 "adcq $0,%2 \n\t" \ 00739 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc"); 00740 00741 #define SQRADD2(i, j) \ 00742 __asm__( \ 00743 "movq %6,%%rax \n\t" \ 00744 "mulq %7 \n\t" \ 00745 "addq %%rax,%0 \n\t" \ 00746 "adcq %%rdx,%1 \n\t" \ 00747 "adcq $0,%2 \n\t" \ 00748 "addq %%rax,%0 \n\t" \ 00749 "adcq %%rdx,%1 \n\t" \ 00750 "adcq $0,%2 \n\t" \ 00751 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 00752 00753 #define SQRADDSC(i, j) \ 00754 __asm__( \ 00755 "movq %3,%%rax \n\t" \ 00756 "mulq %4 \n\t" \ 00757 "movq %%rax,%0 \n\t" \ 00758 "movq %%rdx,%1 \n\t" \ 00759 "xorq %2,%2 \n\t" \ 00760 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc"); 00761 00762 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00763 00764 #define SQRADDAC(i, j) \ 00765 __asm__( \ 00766 "movq %6,%%rax \n\t" \ 00767 "mulq %7 \n\t" \ 00768 "addq %%rax,%0 \n\t" \ 00769 "adcq %%rdx,%1 \n\t" \ 00770 "adcq $0,%2 \n\t" \ 00771 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 00772 00773 #define SQRADDDB \ 00774 __asm__( \ 00775 "addq %6,%0 \n\t" \ 00776 "adcq %7,%1 \n\t" \ 00777 "adcq %8,%2 \n\t" \ 00778 "addq %6,%0 \n\t" \ 00779 "adcq %7,%1 \n\t" \ 00780 "adcq %8,%2 \n\t" \ 00781 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00782 00783 #elif defined(TFM_SSE2) 00784 00785 /* SSE2 Optimized */ 00786 #define COMBA_START 00787 00788 #define CLEAR_CARRY \ 00789 c0 = c1 = c2 = 0; 00790 00791 #define COMBA_STORE(x) \ 00792 x = c0; 00793 00794 #define COMBA_STORE2(x) \ 00795 x = c1; 00796 00797 #define CARRY_FORWARD \ 00798 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00799 00800 #define COMBA_FINI \ 00801 __asm__("emms"); 00802 00803 #define SQRADD(i, j) \ 00804 __asm__( \ 00805 "movd %6,%%mm0 \n\t" \ 00806 "pmuludq %%mm0,%%mm0\n\t" \ 00807 "movd %%mm0,%%eax \n\t" \ 00808 "psrlq $32,%%mm0 \n\t" \ 00809 "addl %%eax,%0 \n\t" \ 00810 "movd %%mm0,%%eax \n\t" \ 00811 "adcl %%eax,%1 \n\t" \ 00812 "adcl $0,%2 \n\t" \ 00813 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc"); 00814 00815 #define SQRADD2(i, j) \ 00816 __asm__( \ 00817 "movd %6,%%mm0 \n\t" \ 00818 "movd %7,%%mm1 \n\t" \ 00819 "pmuludq %%mm1,%%mm0\n\t" \ 00820 "movd %%mm0,%%eax \n\t" \ 00821 "psrlq $32,%%mm0 \n\t" \ 00822 "movd %%mm0,%%edx \n\t" \ 00823 "addl %%eax,%0 \n\t" \ 00824 "adcl %%edx,%1 \n\t" \ 00825 "adcl $0,%2 \n\t" \ 00826 "addl %%eax,%0 \n\t" \ 00827 "adcl %%edx,%1 \n\t" \ 00828 "adcl $0,%2 \n\t" \ 00829 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 00830 00831 #define SQRADDSC(i, j) \ 00832 __asm__( \ 00833 "movd %3,%%mm0 \n\t" \ 00834 "movd %4,%%mm1 \n\t" \ 00835 "pmuludq %%mm1,%%mm0\n\t" \ 00836 "movd %%mm0,%0 \n\t" \ 00837 "psrlq $32,%%mm0 \n\t" \ 00838 "movd %%mm0,%1 \n\t" \ 00839 "xorl %2,%2 \n\t" \ 00840 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j)); 00841 00842 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00843 00844 #define SQRADDAC(i, j) \ 00845 __asm__( \ 00846 "movd %6,%%mm0 \n\t" \ 00847 "movd %7,%%mm1 \n\t" \ 00848 "pmuludq %%mm1,%%mm0\n\t" \ 00849 "movd %%mm0,%%eax \n\t" \ 00850 "psrlq $32,%%mm0 \n\t" \ 00851 "movd %%mm0,%%edx \n\t" \ 00852 "addl %%eax,%0 \n\t" \ 00853 "adcl %%edx,%1 \n\t" \ 00854 "adcl $0,%2 \n\t" \ 00855 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc"); 00856 00857 #define SQRADDDB \ 00858 __asm__( \ 00859 "addl %6,%0 \n\t" \ 00860 "adcl %7,%1 \n\t" \ 00861 "adcl %8,%2 \n\t" \ 00862 "addl %6,%0 \n\t" \ 00863 "adcl %7,%1 \n\t" \ 00864 "adcl %8,%2 \n\t" \ 00865 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 00866 00867 #elif defined(TFM_ARM) 00868 00869 /* ARM code */ 00870 00871 #define COMBA_START 00872 00873 #define CLEAR_CARRY \ 00874 c0 = c1 = c2 = 0; 00875 00876 #define COMBA_STORE(x) \ 00877 x = c0; 00878 00879 #define COMBA_STORE2(x) \ 00880 x = c1; 00881 00882 #define CARRY_FORWARD \ 00883 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00884 00885 #define COMBA_FINI 00886 00887 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00888 #define SQRADD(i, j) \ 00889 __asm__( \ 00890 " UMULL r0,r1,%6,%6 \n\t" \ 00891 " ADDS %0,%0,r0 \n\t" \ 00892 " ADCS %1,%1,r1 \n\t" \ 00893 " ADC %2,%2,#0 \n\t" \ 00894 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc"); 00895 00896 /* for squaring some of the terms are doubled... */ 00897 #define SQRADD2(i, j) \ 00898 __asm__( \ 00899 " UMULL r0,r1,%6,%7 \n\t" \ 00900 " ADDS %0,%0,r0 \n\t" \ 00901 " ADCS %1,%1,r1 \n\t" \ 00902 " ADC %2,%2,#0 \n\t" \ 00903 " ADDS %0,%0,r0 \n\t" \ 00904 " ADCS %1,%1,r1 \n\t" \ 00905 " ADC %2,%2,#0 \n\t" \ 00906 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); 00907 00908 #define SQRADDSC(i, j) \ 00909 __asm__( \ 00910 " UMULL %0,%1,%3,%4 \n\t" \ 00911 " SUB %2,%2,%2 \n\t" \ 00912 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc"); 00913 00914 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ 00915 00916 #define SQRADDAC(i, j) \ 00917 __asm__( \ 00918 " UMULL r0,r1,%6,%7 \n\t" \ 00919 " ADDS %0,%0,r0 \n\t" \ 00920 " ADCS %1,%1,r1 \n\t" \ 00921 " ADC %2,%2,#0 \n\t" \ 00922 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc"); 00923 00924 #define SQRADDDB \ 00925 __asm__( \ 00926 " ADDS %0,%0,%3 \n\t" \ 00927 " ADCS %1,%1,%4 \n\t" \ 00928 " ADC %2,%2,%5 \n\t" \ 00929 " ADDS %0,%0,%3 \n\t" \ 00930 " ADCS %1,%1,%4 \n\t" \ 00931 " ADC %2,%2,%5 \n\t" \ 00932 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 00933 00934 #elif defined(TFM_PPC32) 00935 00936 /* PPC32 */ 00937 00938 #define COMBA_START 00939 00940 #define CLEAR_CARRY \ 00941 c0 = c1 = c2 = 0; 00942 00943 #define COMBA_STORE(x) \ 00944 x = c0; 00945 00946 #define COMBA_STORE2(x) \ 00947 x = c1; 00948 00949 #define CARRY_FORWARD \ 00950 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00951 00952 #define COMBA_FINI 00953 00954 /* multiplies point i and j, updates carry "c1" and digit c2 */ 00955 #define SQRADD(i, j) \ 00956 __asm__( \ 00957 " mullw 16,%6,%6 \n\t" \ 00958 " addc %0,%0,16 \n\t" \ 00959 " mulhwu 16,%6,%6 \n\t" \ 00960 " adde %1,%1,16 \n\t" \ 00961 " addze %2,%2 \n\t" \ 00962 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); 00963 00964 /* for squaring some of the terms are doubled... */ 00965 #define SQRADD2(i, j) \ 00966 __asm__( \ 00967 " mullw 16,%6,%7 \n\t" \ 00968 " mulhwu 17,%6,%7 \n\t" \ 00969 " addc %0,%0,16 \n\t" \ 00970 " adde %1,%1,17 \n\t" \ 00971 " addze %2,%2 \n\t" \ 00972 " addc %0,%0,16 \n\t" \ 00973 " adde %1,%1,17 \n\t" \ 00974 " addze %2,%2 \n\t" \ 00975 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); 00976 00977 #define SQRADDSC(i, j) \ 00978 __asm__( \ 00979 " mullw %0,%6,%7 \n\t" \ 00980 " mulhwu %1,%6,%7 \n\t" \ 00981 " xor %2,%2,%2 \n\t" \ 00982 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 00983 00984 #define SQRADDAC(i, j) \ 00985 __asm__( \ 00986 " mullw 16,%6,%7 \n\t" \ 00987 " addc %0,%0,16 \n\t" \ 00988 " mulhwu 16,%6,%7 \n\t" \ 00989 " adde %1,%1,16 \n\t" \ 00990 " addze %2,%2 \n\t" \ 00991 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); 00992 00993 #define SQRADDDB \ 00994 __asm__( \ 00995 " addc %0,%0,%3 \n\t" \ 00996 " adde %1,%1,%4 \n\t" \ 00997 " adde %2,%2,%5 \n\t" \ 00998 " addc %0,%0,%3 \n\t" \ 00999 " adde %1,%1,%4 \n\t" \ 01000 " adde %2,%2,%5 \n\t" \ 01001 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 01002 01003 #elif defined(TFM_PPC64) 01004 /* PPC64 */ 01005 01006 #define COMBA_START 01007 01008 #define CLEAR_CARRY \ 01009 c0 = c1 = c2 = 0; 01010 01011 #define COMBA_STORE(x) \ 01012 x = c0; 01013 01014 #define COMBA_STORE2(x) \ 01015 x = c1; 01016 01017 #define CARRY_FORWARD \ 01018 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01019 01020 #define COMBA_FINI 01021 01022 /* multiplies point i and j, updates carry "c1" and digit c2 */ 01023 #define SQRADD(i, j) \ 01024 __asm__( \ 01025 " mulld 16,%6,%6 \n\t" \ 01026 " addc %0,%0,16 \n\t" \ 01027 " mulhdu 16,%6,%6 \n\t" \ 01028 " adde %1,%1,16 \n\t" \ 01029 " addze %2,%2 \n\t" \ 01030 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); 01031 01032 /* for squaring some of the terms are doubled... */ 01033 #define SQRADD2(i, j) \ 01034 __asm__( \ 01035 " mulld 16,%6,%7 \n\t" \ 01036 " mulhdu 17,%6,%7 \n\t" \ 01037 " addc %0,%0,16 \n\t" \ 01038 " adde %1,%1,17 \n\t" \ 01039 " addze %2,%2 \n\t" \ 01040 " addc %0,%0,16 \n\t" \ 01041 " adde %1,%1,17 \n\t" \ 01042 " addze %2,%2 \n\t" \ 01043 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); 01044 01045 #define SQRADDSC(i, j) \ 01046 __asm__( \ 01047 " mulld %0,%6,%7 \n\t" \ 01048 " mulhdu %1,%6,%7 \n\t" \ 01049 " xor %2,%2,%2 \n\t" \ 01050 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 01051 01052 #define SQRADDAC(i, j) \ 01053 __asm__( \ 01054 " mulld 16,%6,%7 \n\t" \ 01055 " addc %0,%0,16 \n\t" \ 01056 " mulhdu 16,%6,%7 \n\t" \ 01057 " adde %1,%1,16 \n\t" \ 01058 " addze %2,%2 \n\t" \ 01059 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); 01060 01061 #define SQRADDDB \ 01062 __asm__( \ 01063 " addc %0,%0,%3 \n\t" \ 01064 " adde %1,%1,%4 \n\t" \ 01065 " adde %2,%2,%5 \n\t" \ 01066 " addc %0,%0,%3 \n\t" \ 01067 " adde %1,%1,%4 \n\t" \ 01068 " adde %2,%2,%5 \n\t" \ 01069 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 01070 01071 01072 #elif defined(TFM_AVR32) 01073 01074 /* AVR32 */ 01075 01076 #define COMBA_START 01077 01078 #define CLEAR_CARRY \ 01079 c0 = c1 = c2 = 0; 01080 01081 #define COMBA_STORE(x) \ 01082 x = c0; 01083 01084 #define COMBA_STORE2(x) \ 01085 x = c1; 01086 01087 #define CARRY_FORWARD \ 01088 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01089 01090 #define COMBA_FINI 01091 01092 /* multiplies point i and j, updates carry "c1" and digit c2 */ 01093 #define SQRADD(i, j) \ 01094 __asm__( \ 01095 " mulu.d r2,%6,%6 \n\t" \ 01096 " add %0,%0,r2 \n\t" \ 01097 " adc %1,%1,r3 \n\t" \ 01098 " acr %2 \n\t" \ 01099 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); 01100 01101 /* for squaring some of the terms are doubled... */ 01102 #define SQRADD2(i, j) \ 01103 __asm__( \ 01104 " mulu.d r2,%6,%7 \n\t" \ 01105 " add %0,%0,r2 \n\t" \ 01106 " adc %1,%1,r3 \n\t" \ 01107 " acr %2, \n\t" \ 01108 " add %0,%0,r2 \n\t" \ 01109 " adc %1,%1,r3 \n\t" \ 01110 " acr %2, \n\t" \ 01111 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); 01112 01113 #define SQRADDSC(i, j) \ 01114 __asm__( \ 01115 " mulu.d r2,%6,%7 \n\t" \ 01116 " mov %0,r2 \n\t" \ 01117 " mov %1,r3 \n\t" \ 01118 " eor %2,%2 \n\t" \ 01119 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); 01120 01121 #define SQRADDAC(i, j) \ 01122 __asm__( \ 01123 " mulu.d r2,%6,%7 \n\t" \ 01124 " add %0,%0,r2 \n\t" \ 01125 " adc %1,%1,r3 \n\t" \ 01126 " acr %2 \n\t" \ 01127 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); 01128 01129 #define SQRADDDB \ 01130 __asm__( \ 01131 " add %0,%0,%3 \n\t" \ 01132 " adc %1,%1,%4 \n\t" \ 01133 " adc %2,%2,%5 \n\t" \ 01134 " add %0,%0,%3 \n\t" \ 01135 " adc %1,%1,%4 \n\t" \ 01136 " adc %2,%2,%5 \n\t" \ 01137 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 01138 01139 01140 #else 01141 01142 #define TFM_ISO 01143 01144 /* ISO C portable code */ 01145 01146 #define COMBA_START 01147 01148 #define CLEAR_CARRY \ 01149 c0 = c1 = c2 = 0; 01150 01151 #define COMBA_STORE(x) \ 01152 x = c0; 01153 01154 #define COMBA_STORE2(x) \ 01155 x = c1; 01156 01157 #define CARRY_FORWARD \ 01158 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01159 01160 #define COMBA_FINI 01161 01162 /* multiplies point i and j, updates carry "c1" and digit c2 */ 01163 #define SQRADD(i, j) \ 01164 do { fp_word t; \ 01165 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \ 01166 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \ 01167 c2 +=(fp_digit) (t >> DIGIT_BIT); \ 01168 } while (0); 01169 01170 01171 /* for squaring some of the terms are doubled... */ 01172 #define SQRADD2(i, j) \ 01173 do { fp_word t; \ 01174 t = ((fp_word)i) * ((fp_word)j); \ 01175 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ 01176 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ 01177 c2 +=(fp_digit)( tt >> DIGIT_BIT); \ 01178 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \ 01179 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \ 01180 c2 +=(fp_digit) (tt >> DIGIT_BIT); \ 01181 } while (0); 01182 01183 #define SQRADDSC(i, j) \ 01184 do { fp_word t; \ 01185 t = ((fp_word)i) * ((fp_word)j); \ 01186 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ 01187 } while (0); 01188 01189 #define SQRADDAC(i, j) \ 01190 do { fp_word t; \ 01191 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \ 01192 t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \ 01193 sc2 += (fp_digit)(t >> DIGIT_BIT); \ 01194 } while (0); 01195 01196 #define SQRADDDB \ 01197 do { fp_word t; \ 01198 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \ 01199 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \ 01200 c1 = (fp_digit)t; \ 01201 c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \ 01202 } while (0); 01203 01204 #endif 01205 01206 #ifdef TFM_SMALL_SET 01207 #include "fp_sqr_comba_small_set.i" 01208 #endif 01209 01210 #if defined(TFM_SQR3) && FP_SIZE >= 6 01211 #include "fp_sqr_comba_3.i" 01212 #endif 01213 #if defined(TFM_SQR4) && FP_SIZE >= 8 01214 #include "fp_sqr_comba_4.i" 01215 #endif 01216 #if defined(TFM_SQR6) && FP_SIZE >= 12 01217 #include "fp_sqr_comba_6.i" 01218 #endif 01219 #if defined(TFM_SQR7) && FP_SIZE >= 14 01220 #include "fp_sqr_comba_7.i" 01221 #endif 01222 #if defined(TFM_SQR8) && FP_SIZE >= 16 01223 #include "fp_sqr_comba_8.i" 01224 #endif 01225 #if defined(TFM_SQR9) && FP_SIZE >= 18 01226 #include "fp_sqr_comba_9.i" 01227 #endif 01228 #if defined(TFM_SQR12) && FP_SIZE >= 24 01229 #include "fp_sqr_comba_12.i" 01230 #endif 01231 #if defined(TFM_SQR17) && FP_SIZE >= 34 01232 #include "fp_sqr_comba_17.i" 01233 #endif 01234 #if defined(TFM_SQR20) && FP_SIZE >= 40 01235 #include "fp_sqr_comba_20.i" 01236 #endif 01237 #if defined(TFM_SQR24) && FP_SIZE >= 48 01238 #include "fp_sqr_comba_24.i" 01239 #endif 01240 #if defined(TFM_SQR28) && FP_SIZE >= 56 01241 #include "fp_sqr_comba_28.i" 01242 #endif 01243 #if defined(TFM_SQR32) && FP_SIZE >= 64 01244 #include "fp_sqr_comba_32.i" 01245 #endif 01246 #if defined(TFM_SQR48) && FP_SIZE >= 96 01247 #include "fp_sqr_comba_48.i" 01248 #endif 01249 #if defined(TFM_SQR64) && FP_SIZE >= 128 01250 #include "fp_sqr_comba_64.i" 01251 #endif 01252 /* end fp_sqr_comba.c asm */ 01253 01254 /* start fp_mul_comba.c asm */ 01255 /* these are the combas. Worship them. */ 01256 #if defined(TFM_X86) 01257 /* Generic x86 optimized code */ 01258 01259 /* anything you need at the start */ 01260 #define COMBA_START 01261 01262 /* clear the chaining variables */ 01263 #define COMBA_CLEAR \ 01264 c0 = c1 = c2 = 0; 01265 01266 /* forward the carry to the next digit */ 01267 #define COMBA_FORWARD \ 01268 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01269 01270 /* store the first sum */ 01271 #define COMBA_STORE(x) \ 01272 x = c0; 01273 01274 /* store the second sum [carry] */ 01275 #define COMBA_STORE2(x) \ 01276 x = c1; 01277 01278 /* anything you need at the end */ 01279 #define COMBA_FINI 01280 01281 /* this should multiply i and j */ 01282 #define MULADD(i, j) \ 01283 __asm__( \ 01284 "movl %6,%%eax \n\t" \ 01285 "mull %7 \n\t" \ 01286 "addl %%eax,%0 \n\t" \ 01287 "adcl %%edx,%1 \n\t" \ 01288 "adcl $0,%2 \n\t" \ 01289 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 01290 01291 #elif defined(TFM_X86_64) 01292 /* x86-64 optimized */ 01293 01294 /* anything you need at the start */ 01295 #define COMBA_START 01296 01297 /* clear the chaining variables */ 01298 #define COMBA_CLEAR \ 01299 c0 = c1 = c2 = 0; 01300 01301 /* forward the carry to the next digit */ 01302 #define COMBA_FORWARD \ 01303 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01304 01305 /* store the first sum */ 01306 #define COMBA_STORE(x) \ 01307 x = c0; 01308 01309 /* store the second sum [carry] */ 01310 #define COMBA_STORE2(x) \ 01311 x = c1; 01312 01313 /* anything you need at the end */ 01314 #define COMBA_FINI 01315 01316 /* this should multiply i and j */ 01317 #define MULADD(i, j) \ 01318 __asm__ ( \ 01319 "movq %6,%%rax \n\t" \ 01320 "mulq %7 \n\t" \ 01321 "addq %%rax,%0 \n\t" \ 01322 "adcq %%rdx,%1 \n\t" \ 01323 "adcq $0,%2 \n\t" \ 01324 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 01325 01326 01327 #if defined(HAVE_INTEL_MULX) 01328 #define MULADD_MULX(b0, c0, c1, rdx)\ 01329 __asm__ volatile ( \ 01330 "movq %3, %%rdx\n\t" \ 01331 "mulx %2,%%r9, %%r8 \n\t" \ 01332 "adoxq %%r9,%0 \n\t" \ 01333 "adcxq %%r8,%1 \n\t" \ 01334 :"+r"(c0),"+r"(c1):"r"(b0), "r"(rdx):"%r8","%r9","%r10","%rdx"\ 01335 ) 01336 01337 01338 #define MULADD_MULX_ADD_CARRY(c0, c1)\ 01339 __asm__ volatile(\ 01340 "mov $0, %%r10\n\t"\ 01341 "movq %1, %%r8\n\t"\ 01342 "adox %%r10, %0\n\t"\ 01343 "adcx %%r10, %1\n\t"\ 01344 :"+r"(c0),"+r"(c1)::"%r8","%r9","%r10","%rdx") ; 01345 01346 #define MULADD_SET_A(a0)\ 01347 __asm__ volatile("add $0, %%r8\n\t" \ 01348 "movq %0,%%rdx\n\t" \ 01349 ::"r"(a0):"%r8","%r9","%r10","%rdx") ; 01350 01351 #define MULADD_BODY(a,b,c)\ 01352 { word64 rdx = a->dp[ix] ; \ 01353 cp = &(c->dp[iz]) ; \ 01354 c0 = cp[0] ; c1 = cp[1]; \ 01355 MULADD_SET_A(rdx) ; \ 01356 MULADD_MULX(b0, c0, c1, rdx) ;\ 01357 cp[0]=c0; c0=cp[2]; \ 01358 MULADD_MULX(b1, c1, c0, rdx) ;\ 01359 cp[1]=c1; c1=cp[3]; \ 01360 MULADD_MULX(b2, c0, c1, rdx) ;\ 01361 cp[2]=c0; c0=cp[4]; \ 01362 MULADD_MULX(b3, c1, c0, rdx) ;\ 01363 cp[3]=c1; c1=cp[5]; \ 01364 MULADD_MULX_ADD_CARRY(c0, c1);\ 01365 cp[4]=c0; cp[5]=c1; \ 01366 } 01367 01368 #define TFM_INTEL_MUL_COMBA(a, b, c)\ 01369 for(ix=0; ix<pa; ix++)c->dp[ix]=0 ; \ 01370 for(iy=0; (iy<b->used); iy+=4) { \ 01371 fp_digit *bp ; \ 01372 bp = &(b->dp[iy+0]) ; \ 01373 fp_digit b0 = bp[0] , b1= bp[1], \ 01374 b2= bp[2], b3= bp[3]; \ 01375 ix=0, iz=iy; \ 01376 while(ix<a->used) { \ 01377 fp_digit c0, c1; \ 01378 fp_digit *cp ; \ 01379 MULADD_BODY(a,b,c); \ 01380 ix++ ; iz++ ; \ 01381 } \ 01382 }; 01383 #endif 01384 01385 #elif defined(TFM_SSE2) 01386 /* use SSE2 optimizations */ 01387 01388 /* anything you need at the start */ 01389 #define COMBA_START 01390 01391 /* clear the chaining variables */ 01392 #define COMBA_CLEAR \ 01393 c0 = c1 = c2 = 0; 01394 01395 /* forward the carry to the next digit */ 01396 #define COMBA_FORWARD \ 01397 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01398 01399 /* store the first sum */ 01400 #define COMBA_STORE(x) \ 01401 x = c0; 01402 01403 /* store the second sum [carry] */ 01404 #define COMBA_STORE2(x) \ 01405 x = c1; 01406 01407 /* anything you need at the end */ 01408 #define COMBA_FINI \ 01409 __asm__("emms"); 01410 01411 /* this should multiply i and j */ 01412 #define MULADD(i, j) \ 01413 __asm__( \ 01414 "movd %6,%%mm0 \n\t" \ 01415 "movd %7,%%mm1 \n\t" \ 01416 "pmuludq %%mm1,%%mm0\n\t" \ 01417 "movd %%mm0,%%eax \n\t" \ 01418 "psrlq $32,%%mm0 \n\t" \ 01419 "addl %%eax,%0 \n\t" \ 01420 "movd %%mm0,%%eax \n\t" \ 01421 "adcl %%eax,%1 \n\t" \ 01422 "adcl $0,%2 \n\t" \ 01423 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc"); 01424 01425 #elif defined(TFM_ARM) 01426 /* ARM code */ 01427 01428 #define COMBA_START 01429 01430 #define COMBA_CLEAR \ 01431 c0 = c1 = c2 = 0; 01432 01433 #define COMBA_FORWARD \ 01434 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01435 01436 #define COMBA_STORE(x) \ 01437 x = c0; 01438 01439 #define COMBA_STORE2(x) \ 01440 x = c1; 01441 01442 #define COMBA_FINI 01443 01444 #define MULADD(i, j) \ 01445 __asm__( \ 01446 " UMULL r0,r1,%6,%7 \n\t" \ 01447 " ADDS %0,%0,r0 \n\t" \ 01448 " ADCS %1,%1,r1 \n\t" \ 01449 " ADC %2,%2,#0 \n\t" \ 01450 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); 01451 01452 #elif defined(TFM_PPC32) 01453 /* For 32-bit PPC */ 01454 01455 #define COMBA_START 01456 01457 #define COMBA_CLEAR \ 01458 c0 = c1 = c2 = 0; 01459 01460 #define COMBA_FORWARD \ 01461 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01462 01463 #define COMBA_STORE(x) \ 01464 x = c0; 01465 01466 #define COMBA_STORE2(x) \ 01467 x = c1; 01468 01469 #define COMBA_FINI 01470 01471 /* untested: will mulhwu change the flags? Docs say no */ 01472 #define MULADD(i, j) \ 01473 __asm__( \ 01474 " mullw 16,%6,%7 \n\t" \ 01475 " addc %0,%0,16 \n\t" \ 01476 " mulhwu 16,%6,%7 \n\t" \ 01477 " adde %1,%1,16 \n\t" \ 01478 " addze %2,%2 \n\t" \ 01479 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 01480 01481 #elif defined(TFM_PPC64) 01482 /* For 64-bit PPC */ 01483 01484 #define COMBA_START 01485 01486 #define COMBA_CLEAR \ 01487 c0 = c1 = c2 = 0; 01488 01489 #define COMBA_FORWARD \ 01490 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01491 01492 #define COMBA_STORE(x) \ 01493 x = c0; 01494 01495 #define COMBA_STORE2(x) \ 01496 x = c1; 01497 01498 #define COMBA_FINI 01499 01500 /* untested: will mulhwu change the flags? Docs say no */ 01501 #define MULADD(i, j) \ 01502 ____asm__( \ 01503 " mulld 16,%6,%7 \n\t" \ 01504 " addc %0,%0,16 \n\t" \ 01505 " mulhdu 16,%6,%7 \n\t" \ 01506 " adde %1,%1,16 \n\t" \ 01507 " addze %2,%2 \n\t" \ 01508 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 01509 01510 #elif defined(TFM_AVR32) 01511 01512 /* ISO C code */ 01513 01514 #define COMBA_START 01515 01516 #define COMBA_CLEAR \ 01517 c0 = c1 = c2 = 0; 01518 01519 #define COMBA_FORWARD \ 01520 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01521 01522 #define COMBA_STORE(x) \ 01523 x = c0; 01524 01525 #define COMBA_STORE2(x) \ 01526 x = c1; 01527 01528 #define COMBA_FINI 01529 01530 #define MULADD(i, j) \ 01531 ____asm__( \ 01532 " mulu.d r2,%6,%7 \n\t"\ 01533 " add %0,r2 \n\t"\ 01534 " adc %1,%1,r3 \n\t"\ 01535 " acr %2 \n\t"\ 01536 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); 01537 01538 #else 01539 /* ISO C code */ 01540 01541 #define COMBA_START 01542 01543 #define COMBA_CLEAR \ 01544 c0 = c1 = c2 = 0; 01545 01546 #define COMBA_FORWARD \ 01547 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 01548 01549 #define COMBA_STORE(x) \ 01550 x = c0; 01551 01552 #define COMBA_STORE2(x) \ 01553 x = c1; 01554 01555 #define COMBA_FINI 01556 01557 #define MULADD(i, j) \ 01558 do { fp_word t; \ 01559 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \ 01560 t = (fp_word)c1 + (t >> DIGIT_BIT); \ 01561 c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \ 01562 } while (0); 01563 01564 #endif 01565 01566 01567 #ifdef TFM_SMALL_SET 01568 #include "fp_mul_comba_small_set.i" 01569 #endif 01570 01571 #if defined(TFM_MUL3) && FP_SIZE >= 6 01572 #include "fp_mul_comba_3.i" 01573 #endif 01574 #if defined(TFM_MUL4) && FP_SIZE >= 8 01575 #include "fp_mul_comba_4.i" 01576 #endif 01577 #if defined(TFM_MUL6) && FP_SIZE >= 12 01578 #include "fp_mul_comba_6.i" 01579 #endif 01580 #if defined(TFM_MUL7) && FP_SIZE >= 14 01581 #include "fp_mul_comba_7.i" 01582 #endif 01583 #if defined(TFM_MUL8) && FP_SIZE >= 16 01584 #include "fp_mul_comba_8.i" 01585 #endif 01586 #if defined(TFM_MUL9) && FP_SIZE >= 18 01587 #include "fp_mul_comba_9.i" 01588 #endif 01589 #if defined(TFM_MUL12) && FP_SIZE >= 24 01590 #include "fp_mul_comba_12.i" 01591 #endif 01592 #if defined(TFM_MUL17) && FP_SIZE >= 34 01593 #include "fp_mul_comba_17.i" 01594 #endif 01595 #if defined(TFM_MUL20) && FP_SIZE >= 40 01596 #include "fp_mul_comba_20.i" 01597 #endif 01598 #if defined(TFM_MUL24) && FP_SIZE >= 48 01599 #include "fp_mul_comba_24.i" 01600 #endif 01601 #if defined(TFM_MUL28) && FP_SIZE >= 56 01602 #include "fp_mul_comba_28.i" 01603 #endif 01604 #if defined(TFM_MUL32) && FP_SIZE >= 64 01605 #include "fp_mul_comba_32.i" 01606 #endif 01607 #if defined(TFM_MUL48) && FP_SIZE >= 96 01608 #include "fp_mul_comba_48.i" 01609 #endif 01610 #if defined(TFM_MUL64) && FP_SIZE >= 128 01611 #include "fp_mul_comba_64.i" 01612 #endif 01613 01614 /* end fp_mul_comba.c asm */ 01615 01616
Generated on Tue Jul 12 2022 23:30:53 by
1.7.2
