wolfSSL SSL/TLS library, support up to TLS1.3

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Committer:
wolfSSL
Date:
Tue May 02 08:44:47 2017 +0000
Revision:
7:481bce714567
wolfSSL3.10.2

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 7:481bce714567 1 /* asm.c
wolfSSL 7:481bce714567 2 *
wolfSSL 7:481bce714567 3 * Copyright (C) 2006-2016 wolfSSL Inc.
wolfSSL 7:481bce714567 4 *
wolfSSL 7:481bce714567 5 * This file is part of wolfSSL.
wolfSSL 7:481bce714567 6 *
wolfSSL 7:481bce714567 7 * wolfSSL is free software; you can redistribute it and/or modify
wolfSSL 7:481bce714567 8 * it under the terms of the GNU General Public License as published by
wolfSSL 7:481bce714567 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 7:481bce714567 10 * (at your option) any later version.
wolfSSL 7:481bce714567 11 *
wolfSSL 7:481bce714567 12 * wolfSSL is distributed in the hope that it will be useful,
wolfSSL 7:481bce714567 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 7:481bce714567 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 7:481bce714567 15 * GNU General Public License for more details.
wolfSSL 7:481bce714567 16 *
wolfSSL 7:481bce714567 17 * You should have received a copy of the GNU General Public License
wolfSSL 7:481bce714567 18 * along with this program; if not, write to the Free Software
wolfSSL 7:481bce714567 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
wolfSSL 7:481bce714567 20 */
wolfSSL 7:481bce714567 21
wolfSSL 7:481bce714567 22
wolfSSL 7:481bce714567 23 #ifdef HAVE_CONFIG_H
wolfSSL 7:481bce714567 24 #include <config.h>
wolfSSL 7:481bce714567 25 #endif
wolfSSL 7:481bce714567 26
wolfSSL 7:481bce714567 27 #include <wolfssl/wolfcrypt/settings.h>
wolfSSL 7:481bce714567 28
wolfSSL 7:481bce714567 29 /*
wolfSSL 7:481bce714567 30 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
wolfSSL 7:481bce714567 31 * http://math.libtomcrypt.com
wolfSSL 7:481bce714567 32 */
wolfSSL 7:481bce714567 33
wolfSSL 7:481bce714567 34
wolfSSL 7:481bce714567 35 /******************************************************************/
wolfSSL 7:481bce714567 36 /* fp_montgomery_reduce.c asm or generic */
wolfSSL 7:481bce714567 37
wolfSSL 7:481bce714567 38
wolfSSL 7:481bce714567 39 /* Each platform needs to query info type 1 from cpuid to see if aesni is
wolfSSL 7:481bce714567 40 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
wolfSSL 7:481bce714567 41 */
wolfSSL 7:481bce714567 42
wolfSSL 7:481bce714567 43 #if defined(HAVE_INTEL_MULX)
wolfSSL 7:481bce714567 44 #ifndef _MSC_VER
wolfSSL 7:481bce714567 45 #define cpuid(reg, leaf, sub)\
wolfSSL 7:481bce714567 46 __asm__ __volatile__ ("cpuid":\
wolfSSL 7:481bce714567 47 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
wolfSSL 7:481bce714567 48 "a" (leaf), "c"(sub));
wolfSSL 7:481bce714567 49
wolfSSL 7:481bce714567 50 #define XASM_LINK(f) asm(f)
wolfSSL 7:481bce714567 51 #else
wolfSSL 7:481bce714567 52
wolfSSL 7:481bce714567 53 #include <intrin.h>
wolfSSL 7:481bce714567 54 #define cpuid(a,b) __cpuid((int*)a,b)
wolfSSL 7:481bce714567 55
wolfSSL 7:481bce714567 56 #define XASM_LINK(f)
wolfSSL 7:481bce714567 57
wolfSSL 7:481bce714567 58 #endif /* _MSC_VER */
wolfSSL 7:481bce714567 59
wolfSSL 7:481bce714567 60 #define EAX 0
wolfSSL 7:481bce714567 61 #define EBX 1
wolfSSL 7:481bce714567 62 #define ECX 2
wolfSSL 7:481bce714567 63 #define EDX 3
wolfSSL 7:481bce714567 64
wolfSSL 7:481bce714567 65 #define CPUID_AVX1 0x1
wolfSSL 7:481bce714567 66 #define CPUID_AVX2 0x2
wolfSSL 7:481bce714567 67 #define CPUID_RDRAND 0x4
wolfSSL 7:481bce714567 68 #define CPUID_RDSEED 0x8
wolfSSL 7:481bce714567 69 #define CPUID_BMI2 0x10 /* MULX, RORX */
wolfSSL 7:481bce714567 70 #define CPUID_ADX 0x20 /* ADCX, ADOX */
wolfSSL 7:481bce714567 71
wolfSSL 7:481bce714567 72 #define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1)
wolfSSL 7:481bce714567 73 #define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2)
wolfSSL 7:481bce714567 74 #define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2)
wolfSSL 7:481bce714567 75 #define IS_INTEL_ADX (cpuid_flags&CPUID_ADX)
wolfSSL 7:481bce714567 76 #define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
wolfSSL 7:481bce714567 77 #define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
wolfSSL 7:481bce714567 78 #define SET_FLAGS
wolfSSL 7:481bce714567 79
wolfSSL 7:481bce714567 80 static word32 cpuid_check = 0 ;
wolfSSL 7:481bce714567 81 static word32 cpuid_flags = 0 ;
wolfSSL 7:481bce714567 82
wolfSSL 7:481bce714567 83 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
wolfSSL 7:481bce714567 84 int got_intel_cpu=0;
wolfSSL 7:481bce714567 85 unsigned int reg[5];
wolfSSL 7:481bce714567 86
wolfSSL 7:481bce714567 87 reg[4] = '\0' ;
wolfSSL 7:481bce714567 88 cpuid(reg, 0, 0);
wolfSSL 7:481bce714567 89 if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
wolfSSL 7:481bce714567 90 memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
wolfSSL 7:481bce714567 91 memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
wolfSSL 7:481bce714567 92 got_intel_cpu = 1;
wolfSSL 7:481bce714567 93 }
wolfSSL 7:481bce714567 94 if (got_intel_cpu) {
wolfSSL 7:481bce714567 95 cpuid(reg, leaf, sub);
wolfSSL 7:481bce714567 96 return((reg[num]>>bit)&0x1) ;
wolfSSL 7:481bce714567 97 }
wolfSSL 7:481bce714567 98 return 0 ;
wolfSSL 7:481bce714567 99 }
wolfSSL 7:481bce714567 100
wolfSSL 7:481bce714567 101 INLINE static int set_cpuid_flags(void) {
wolfSSL 7:481bce714567 102 if(cpuid_check == 0) {
wolfSSL 7:481bce714567 103 if(cpuid_flag(7, 0, EBX, 8)){ cpuid_flags |= CPUID_BMI2 ; }
wolfSSL 7:481bce714567 104 if(cpuid_flag(7, 0, EBX,19)){ cpuid_flags |= CPUID_ADX ; }
wolfSSL 7:481bce714567 105 cpuid_check = 1 ;
wolfSSL 7:481bce714567 106 return 0 ;
wolfSSL 7:481bce714567 107 }
wolfSSL 7:481bce714567 108 return 1 ;
wolfSSL 7:481bce714567 109 }
wolfSSL 7:481bce714567 110
wolfSSL 7:481bce714567 111 #define RETURN return
wolfSSL 7:481bce714567 112 #define IF_HAVE_INTEL_MULX(func, ret) \
wolfSSL 7:481bce714567 113 if(cpuid_check==0)set_cpuid_flags() ; \
wolfSSL 7:481bce714567 114 if(IS_INTEL_BMI2 && IS_INTEL_ADX){ func; ret ; }
wolfSSL 7:481bce714567 115
wolfSSL 7:481bce714567 116 #else
wolfSSL 7:481bce714567 117 #define IF_HAVE_INTEL_MULX(func, ret)
wolfSSL 7:481bce714567 118 #endif
wolfSSL 7:481bce714567 119
wolfSSL 7:481bce714567 120 #if defined(TFM_X86) && !defined(TFM_SSE2)
wolfSSL 7:481bce714567 121 /* x86-32 code */
wolfSSL 7:481bce714567 122
wolfSSL 7:481bce714567 123 #define MONT_START
wolfSSL 7:481bce714567 124 #define MONT_FINI
wolfSSL 7:481bce714567 125 #define LOOP_END
wolfSSL 7:481bce714567 126 #define LOOP_START \
wolfSSL 7:481bce714567 127 mu = c[x] * mp
wolfSSL 7:481bce714567 128
wolfSSL 7:481bce714567 129 #define INNERMUL \
wolfSSL 7:481bce714567 130 __asm__( \
wolfSSL 7:481bce714567 131 "movl %5,%%eax \n\t" \
wolfSSL 7:481bce714567 132 "mull %4 \n\t" \
wolfSSL 7:481bce714567 133 "addl %1,%%eax \n\t" \
wolfSSL 7:481bce714567 134 "adcl $0,%%edx \n\t" \
wolfSSL 7:481bce714567 135 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 136 "adcl $0,%%edx \n\t" \
wolfSSL 7:481bce714567 137 "movl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 138 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 7:481bce714567 139 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
wolfSSL 7:481bce714567 140 : "%eax", "%edx", "cc")
wolfSSL 7:481bce714567 141
wolfSSL 7:481bce714567 142 #define PROPCARRY \
wolfSSL 7:481bce714567 143 __asm__( \
wolfSSL 7:481bce714567 144 "addl %1,%0 \n\t" \
wolfSSL 7:481bce714567 145 "setb %%al \n\t" \
wolfSSL 7:481bce714567 146 "movzbl %%al,%1 \n\t" \
wolfSSL 7:481bce714567 147 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 7:481bce714567 148 :"0"(_c[LO]), "1"(cy) \
wolfSSL 7:481bce714567 149 : "%eax", "cc")
wolfSSL 7:481bce714567 150
wolfSSL 7:481bce714567 151 /******************************************************************/
wolfSSL 7:481bce714567 152 #elif defined(TFM_X86_64)
wolfSSL 7:481bce714567 153 /* x86-64 code */
wolfSSL 7:481bce714567 154
wolfSSL 7:481bce714567 155 #define MONT_START
wolfSSL 7:481bce714567 156 #define MONT_FINI
wolfSSL 7:481bce714567 157 #define LOOP_END
wolfSSL 7:481bce714567 158 #define LOOP_START \
wolfSSL 7:481bce714567 159 mu = c[x] * mp;
wolfSSL 7:481bce714567 160
wolfSSL 7:481bce714567 161 #define INNERMUL \
wolfSSL 7:481bce714567 162 __asm__( \
wolfSSL 7:481bce714567 163 "movq %5,%%rax \n\t" \
wolfSSL 7:481bce714567 164 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 165 "addq %1,%%rax \n\t" \
wolfSSL 7:481bce714567 166 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 167 "addq %%rax,%0 \n\t" \
wolfSSL 7:481bce714567 168 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 169 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 170 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 7:481bce714567 171 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
wolfSSL 7:481bce714567 172 : "%rax", "%rdx", "cc")
wolfSSL 7:481bce714567 173
wolfSSL 7:481bce714567 174 #if defined(HAVE_INTEL_MULX)
wolfSSL 7:481bce714567 175 #define MULX_INIT(a0, c0, cy)\
wolfSSL 7:481bce714567 176 __asm__ volatile( \
wolfSSL 7:481bce714567 177 "xorq %%r10, %%r10\n\t" \
wolfSSL 7:481bce714567 178 "movq %1,%%rdx\n\t" \
wolfSSL 7:481bce714567 179 "addq %2, %0\n\t" /* c0+=cy; Set CF, OF */ \
wolfSSL 7:481bce714567 180 "adoxq %%r10, %%r10\n\t" /* Reset OF */ \
wolfSSL 7:481bce714567 181 :"+m"(c0):"r"(a0),"r"(cy):"%r8","%r9", "%r10","%r11","%r12","%rdx") ; \
wolfSSL 7:481bce714567 182
wolfSSL 7:481bce714567 183 #define MULX_INNERMUL_R1(c0, c1, pre, rdx)\
wolfSSL 7:481bce714567 184 { \
wolfSSL 7:481bce714567 185 __asm__ volatile ( \
wolfSSL 7:481bce714567 186 "movq %3, %%rdx\n\t" \
wolfSSL 7:481bce714567 187 "mulx %%r11,%%r9, %%r8 \n\t" \
wolfSSL 7:481bce714567 188 "movq %2, %%r12\n\t" \
wolfSSL 7:481bce714567 189 "adoxq %%r9,%0 \n\t" \
wolfSSL 7:481bce714567 190 "adcxq %%r8,%1 \n\t" \
wolfSSL 7:481bce714567 191 :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \
wolfSSL 7:481bce714567 192 ); }
wolfSSL 7:481bce714567 193
wolfSSL 7:481bce714567 194
wolfSSL 7:481bce714567 195 #define MULX_INNERMUL_R2(c0, c1, pre, rdx)\
wolfSSL 7:481bce714567 196 { \
wolfSSL 7:481bce714567 197 __asm__ volatile ( \
wolfSSL 7:481bce714567 198 "movq %3, %%rdx\n\t" \
wolfSSL 7:481bce714567 199 "mulx %%r12,%%r9, %%r8 \n\t" \
wolfSSL 7:481bce714567 200 "movq %2, %%r11\n\t" \
wolfSSL 7:481bce714567 201 "adoxq %%r9,%0 \n\t" \
wolfSSL 7:481bce714567 202 "adcxq %%r8,%1 \n\t" \
wolfSSL 7:481bce714567 203 :"+r"(c0),"+r"(c1):"m"(pre),"r"(rdx):"%r8","%r9", "%r10", "%r11","%r12","%rdx" \
wolfSSL 7:481bce714567 204 ); }
wolfSSL 7:481bce714567 205
wolfSSL 7:481bce714567 206 #define MULX_LOAD_R1(val)\
wolfSSL 7:481bce714567 207 __asm__ volatile ( \
wolfSSL 7:481bce714567 208 "movq %0, %%r11\n\t"\
wolfSSL 7:481bce714567 209 ::"m"(val):"%r8","%r9", "%r10", "%r11","%r12","%rdx"\
wolfSSL 7:481bce714567 210 ) ;
wolfSSL 7:481bce714567 211
wolfSSL 7:481bce714567 212 #define MULX_INNERMUL_LAST(c0, c1, rdx)\
wolfSSL 7:481bce714567 213 { \
wolfSSL 7:481bce714567 214 __asm__ volatile ( \
wolfSSL 7:481bce714567 215 "movq %2, %%rdx\n\t" \
wolfSSL 7:481bce714567 216 "mulx %%r12,%%r9, %%r8 \n\t" \
wolfSSL 7:481bce714567 217 "movq $0, %%r10 \n\t" \
wolfSSL 7:481bce714567 218 "adoxq %%r10, %%r9 \n\t" \
wolfSSL 7:481bce714567 219 "adcq $0,%%r8 \n\t" \
wolfSSL 7:481bce714567 220 "addq %%r9,%0 \n\t" \
wolfSSL 7:481bce714567 221 "adcq $0,%%r8 \n\t" \
wolfSSL 7:481bce714567 222 "movq %%r8,%1 \n\t" \
wolfSSL 7:481bce714567 223 :"+m"(c0),"=m"(c1):"r"(rdx):"%r8","%r9","%r10", "%r11", "%r12","%rdx"\
wolfSSL 7:481bce714567 224 ); }
wolfSSL 7:481bce714567 225
wolfSSL 7:481bce714567 226 #define MULX_INNERMUL8(x,y,z,cy)\
wolfSSL 7:481bce714567 227 { word64 rdx = y ;\
wolfSSL 7:481bce714567 228 MULX_LOAD_R1(x[0]) ;\
wolfSSL 7:481bce714567 229 MULX_INIT(y, _c0, cy) ; /* rdx=y; z0+=cy; */ \
wolfSSL 7:481bce714567 230 MULX_INNERMUL_R1(_c0, _c1, x[1], rdx) ;\
wolfSSL 7:481bce714567 231 MULX_INNERMUL_R2(_c1, _c2, x[2], rdx) ;\
wolfSSL 7:481bce714567 232 MULX_INNERMUL_R1(_c2, _c3, x[3], rdx) ;\
wolfSSL 7:481bce714567 233 MULX_INNERMUL_R2(_c3, _c4, x[4], rdx) ;\
wolfSSL 7:481bce714567 234 MULX_INNERMUL_R1(_c4, _c5, x[5], rdx) ;\
wolfSSL 7:481bce714567 235 MULX_INNERMUL_R2(_c5, _c6, x[6], rdx) ;\
wolfSSL 7:481bce714567 236 MULX_INNERMUL_R1(_c6, _c7, x[7], rdx) ;\
wolfSSL 7:481bce714567 237 MULX_INNERMUL_LAST(_c7, cy, rdx) ;\
wolfSSL 7:481bce714567 238 }
wolfSSL 7:481bce714567 239 #define INNERMUL8_MULX \
wolfSSL 7:481bce714567 240 {\
wolfSSL 7:481bce714567 241 MULX_INNERMUL8(tmpm, mu, _c, cy);\
wolfSSL 7:481bce714567 242 }
wolfSSL 7:481bce714567 243 #endif
wolfSSL 7:481bce714567 244
wolfSSL 7:481bce714567 245 #define INNERMUL8 \
wolfSSL 7:481bce714567 246 __asm__( \
wolfSSL 7:481bce714567 247 "movq 0(%5),%%rax \n\t" \
wolfSSL 7:481bce714567 248 "movq 0(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 249 "movq 0x8(%5),%%r11 \n\t" \
wolfSSL 7:481bce714567 250 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 251 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 252 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 253 "movq 0x8(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 254 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 255 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 256 "movq %%rax,0(%0) \n\t" \
wolfSSL 7:481bce714567 257 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 258 \
wolfSSL 7:481bce714567 259 "movq %%r11,%%rax \n\t" \
wolfSSL 7:481bce714567 260 "movq 0x10(%5),%%r11 \n\t" \
wolfSSL 7:481bce714567 261 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 262 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 263 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 264 "movq 0x10(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 265 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 266 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 267 "movq %%rax,0x8(%0) \n\t" \
wolfSSL 7:481bce714567 268 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 269 \
wolfSSL 7:481bce714567 270 "movq %%r11,%%rax \n\t" \
wolfSSL 7:481bce714567 271 "movq 0x18(%5),%%r11 \n\t" \
wolfSSL 7:481bce714567 272 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 273 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 274 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 275 "movq 0x18(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 276 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 277 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 278 "movq %%rax,0x10(%0) \n\t" \
wolfSSL 7:481bce714567 279 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 280 \
wolfSSL 7:481bce714567 281 "movq %%r11,%%rax \n\t" \
wolfSSL 7:481bce714567 282 "movq 0x20(%5),%%r11 \n\t" \
wolfSSL 7:481bce714567 283 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 284 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 285 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 286 "movq 0x20(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 287 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 288 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 289 "movq %%rax,0x18(%0) \n\t" \
wolfSSL 7:481bce714567 290 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 291 \
wolfSSL 7:481bce714567 292 "movq %%r11,%%rax \n\t" \
wolfSSL 7:481bce714567 293 "movq 0x28(%5),%%r11 \n\t" \
wolfSSL 7:481bce714567 294 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 295 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 296 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 297 "movq 0x28(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 298 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 299 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 300 "movq %%rax,0x20(%0) \n\t" \
wolfSSL 7:481bce714567 301 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 302 \
wolfSSL 7:481bce714567 303 "movq %%r11,%%rax \n\t" \
wolfSSL 7:481bce714567 304 "movq 0x30(%5),%%r11 \n\t" \
wolfSSL 7:481bce714567 305 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 306 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 307 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 308 "movq 0x30(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 309 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 310 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 311 "movq %%rax,0x28(%0) \n\t" \
wolfSSL 7:481bce714567 312 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 313 \
wolfSSL 7:481bce714567 314 "movq %%r11,%%rax \n\t" \
wolfSSL 7:481bce714567 315 "movq 0x38(%5),%%r11 \n\t" \
wolfSSL 7:481bce714567 316 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 317 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 318 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 319 "movq 0x38(%2),%%r10 \n\t" \
wolfSSL 7:481bce714567 320 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 321 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 322 "movq %%rax,0x30(%0) \n\t" \
wolfSSL 7:481bce714567 323 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 324 \
wolfSSL 7:481bce714567 325 "movq %%r11,%%rax \n\t" \
wolfSSL 7:481bce714567 326 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 327 "addq %%r10,%%rax \n\t" \
wolfSSL 7:481bce714567 328 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 329 "addq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 330 "adcq $0,%%rdx \n\t" \
wolfSSL 7:481bce714567 331 "movq %%rax,0x38(%0) \n\t" \
wolfSSL 7:481bce714567 332 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 333 \
wolfSSL 7:481bce714567 334 :"=r"(_c), "=r"(cy) \
wolfSSL 7:481bce714567 335 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
wolfSSL 7:481bce714567 336 : "%rax", "%rdx", "%r10", "%r11", "cc")\
wolfSSL 7:481bce714567 337
wolfSSL 7:481bce714567 338 #define PROPCARRY \
wolfSSL 7:481bce714567 339 __asm__( \
wolfSSL 7:481bce714567 340 "addq %1,%0 \n\t" \
wolfSSL 7:481bce714567 341 "setb %%al \n\t" \
wolfSSL 7:481bce714567 342 "movzbq %%al,%1 \n\t" \
wolfSSL 7:481bce714567 343 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 7:481bce714567 344 :"0"(_c[LO]), "1"(cy) \
wolfSSL 7:481bce714567 345 : "%rax", "cc")
wolfSSL 7:481bce714567 346
wolfSSL 7:481bce714567 347 /******************************************************************/
wolfSSL 7:481bce714567 348 #elif defined(TFM_SSE2)
wolfSSL 7:481bce714567 349 /* SSE2 code (assumes 32-bit fp_digits) */
wolfSSL 7:481bce714567 350 /* XMM register assignments:
wolfSSL 7:481bce714567 351 * xmm0 *tmpm++, then Mu * (*tmpm++)
wolfSSL 7:481bce714567 352 * xmm1 c[x], then Mu
wolfSSL 7:481bce714567 353 * xmm2 mp
wolfSSL 7:481bce714567 354 * xmm3 cy
wolfSSL 7:481bce714567 355 * xmm4 _c[LO]
wolfSSL 7:481bce714567 356 */
wolfSSL 7:481bce714567 357
wolfSSL 7:481bce714567 358 #define MONT_START \
wolfSSL 7:481bce714567 359 __asm__("movd %0,%%mm2"::"g"(mp))
wolfSSL 7:481bce714567 360
wolfSSL 7:481bce714567 361 #define MONT_FINI \
wolfSSL 7:481bce714567 362 __asm__("emms")
wolfSSL 7:481bce714567 363
wolfSSL 7:481bce714567 364 #define LOOP_START \
wolfSSL 7:481bce714567 365 __asm__( \
wolfSSL 7:481bce714567 366 "movd %0,%%mm1 \n\t" \
wolfSSL 7:481bce714567 367 "pxor %%mm3,%%mm3 \n\t" \
wolfSSL 7:481bce714567 368 "pmuludq %%mm2,%%mm1 \n\t" \
wolfSSL 7:481bce714567 369 :: "g"(c[x]))
wolfSSL 7:481bce714567 370
wolfSSL 7:481bce714567 371 /* pmuludq on mmx registers does a 32x32->64 multiply. */
wolfSSL 7:481bce714567 372 #define INNERMUL \
wolfSSL 7:481bce714567 373 __asm__( \
wolfSSL 7:481bce714567 374 "movd %1,%%mm4 \n\t" \
wolfSSL 7:481bce714567 375 "movd %2,%%mm0 \n\t" \
wolfSSL 7:481bce714567 376 "paddq %%mm4,%%mm3 \n\t" \
wolfSSL 7:481bce714567 377 "pmuludq %%mm1,%%mm0 \n\t" \
wolfSSL 7:481bce714567 378 "paddq %%mm0,%%mm3 \n\t" \
wolfSSL 7:481bce714567 379 "movd %%mm3,%0 \n\t" \
wolfSSL 7:481bce714567 380 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 381 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
wolfSSL 7:481bce714567 382
wolfSSL 7:481bce714567 383 #define INNERMUL8 \
wolfSSL 7:481bce714567 384 __asm__( \
wolfSSL 7:481bce714567 385 "movd 0(%1),%%mm4 \n\t" \
wolfSSL 7:481bce714567 386 "movd 0(%2),%%mm0 \n\t" \
wolfSSL 7:481bce714567 387 "paddq %%mm4,%%mm3 \n\t" \
wolfSSL 7:481bce714567 388 "pmuludq %%mm1,%%mm0 \n\t" \
wolfSSL 7:481bce714567 389 "movd 4(%2),%%mm5 \n\t" \
wolfSSL 7:481bce714567 390 "paddq %%mm0,%%mm3 \n\t" \
wolfSSL 7:481bce714567 391 "movd 4(%1),%%mm6 \n\t" \
wolfSSL 7:481bce714567 392 "movd %%mm3,0(%0) \n\t" \
wolfSSL 7:481bce714567 393 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 394 \
wolfSSL 7:481bce714567 395 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 7:481bce714567 396 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 7:481bce714567 397 "movd 8(%2),%%mm6 \n\t" \
wolfSSL 7:481bce714567 398 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 7:481bce714567 399 "movd 8(%1),%%mm7 \n\t" \
wolfSSL 7:481bce714567 400 "movd %%mm3,4(%0) \n\t" \
wolfSSL 7:481bce714567 401 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 402 \
wolfSSL 7:481bce714567 403 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 7:481bce714567 404 "pmuludq %%mm1,%%mm6 \n\t" \
wolfSSL 7:481bce714567 405 "movd 12(%2),%%mm7 \n\t" \
wolfSSL 7:481bce714567 406 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 7:481bce714567 407 "movd 12(%1),%%mm5 \n\t" \
wolfSSL 7:481bce714567 408 "movd %%mm3,8(%0) \n\t" \
wolfSSL 7:481bce714567 409 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 410 \
wolfSSL 7:481bce714567 411 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 7:481bce714567 412 "pmuludq %%mm1,%%mm7 \n\t" \
wolfSSL 7:481bce714567 413 "movd 16(%2),%%mm5 \n\t" \
wolfSSL 7:481bce714567 414 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 7:481bce714567 415 "movd 16(%1),%%mm6 \n\t" \
wolfSSL 7:481bce714567 416 "movd %%mm3,12(%0) \n\t" \
wolfSSL 7:481bce714567 417 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 418 \
wolfSSL 7:481bce714567 419 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 7:481bce714567 420 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 7:481bce714567 421 "movd 20(%2),%%mm6 \n\t" \
wolfSSL 7:481bce714567 422 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 7:481bce714567 423 "movd 20(%1),%%mm7 \n\t" \
wolfSSL 7:481bce714567 424 "movd %%mm3,16(%0) \n\t" \
wolfSSL 7:481bce714567 425 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 426 \
wolfSSL 7:481bce714567 427 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 7:481bce714567 428 "pmuludq %%mm1,%%mm6 \n\t" \
wolfSSL 7:481bce714567 429 "movd 24(%2),%%mm7 \n\t" \
wolfSSL 7:481bce714567 430 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 7:481bce714567 431 "movd 24(%1),%%mm5 \n\t" \
wolfSSL 7:481bce714567 432 "movd %%mm3,20(%0) \n\t" \
wolfSSL 7:481bce714567 433 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 434 \
wolfSSL 7:481bce714567 435 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 7:481bce714567 436 "pmuludq %%mm1,%%mm7 \n\t" \
wolfSSL 7:481bce714567 437 "movd 28(%2),%%mm5 \n\t" \
wolfSSL 7:481bce714567 438 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 7:481bce714567 439 "movd 28(%1),%%mm6 \n\t" \
wolfSSL 7:481bce714567 440 "movd %%mm3,24(%0) \n\t" \
wolfSSL 7:481bce714567 441 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 442 \
wolfSSL 7:481bce714567 443 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 7:481bce714567 444 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 7:481bce714567 445 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 7:481bce714567 446 "movd %%mm3,28(%0) \n\t" \
wolfSSL 7:481bce714567 447 "psrlq $32, %%mm3 \n\t" \
wolfSSL 7:481bce714567 448 :"=r"(_c) : "0"(_c), "r"(tmpm) );
wolfSSL 7:481bce714567 449
wolfSSL 7:481bce714567 450 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack
wolfSSL 7:481bce714567 451 pointer */
wolfSSL 7:481bce714567 452
wolfSSL 7:481bce714567 453 #define LOOP_END \
wolfSSL 7:481bce714567 454 __asm__( "movd %%mm3,%0 \n" :"=r"(cy))
wolfSSL 7:481bce714567 455
wolfSSL 7:481bce714567 456 #define PROPCARRY \
wolfSSL 7:481bce714567 457 __asm__( \
wolfSSL 7:481bce714567 458 "addl %1,%0 \n\t" \
wolfSSL 7:481bce714567 459 "setb %%al \n\t" \
wolfSSL 7:481bce714567 460 "movzbl %%al,%1 \n\t" \
wolfSSL 7:481bce714567 461 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 7:481bce714567 462 :"0"(_c[LO]), "1"(cy) \
wolfSSL 7:481bce714567 463 : "%eax", "cc")
wolfSSL 7:481bce714567 464
wolfSSL 7:481bce714567 465 /******************************************************************/
wolfSSL 7:481bce714567 466 #elif defined(TFM_ARM)
wolfSSL 7:481bce714567 467 /* ARMv4 code */
wolfSSL 7:481bce714567 468
wolfSSL 7:481bce714567 469 #define MONT_START
wolfSSL 7:481bce714567 470 #define MONT_FINI
wolfSSL 7:481bce714567 471 #define LOOP_END
wolfSSL 7:481bce714567 472 #define LOOP_START \
wolfSSL 7:481bce714567 473 mu = c[x] * mp
wolfSSL 7:481bce714567 474
wolfSSL 7:481bce714567 475
wolfSSL 7:481bce714567 476 #ifdef __thumb__
wolfSSL 7:481bce714567 477
wolfSSL 7:481bce714567 478 #define INNERMUL \
wolfSSL 7:481bce714567 479 __asm__( \
wolfSSL 7:481bce714567 480 " LDR r0,%1 \n\t" \
wolfSSL 7:481bce714567 481 " ADDS r0,r0,%0 \n\t" \
wolfSSL 7:481bce714567 482 " ITE CS \n\t" \
wolfSSL 7:481bce714567 483 " MOVCS %0,#1 \n\t" \
wolfSSL 7:481bce714567 484 " MOVCC %0,#0 \n\t" \
wolfSSL 7:481bce714567 485 " UMLAL r0,%0,%3,%4 \n\t" \
wolfSSL 7:481bce714567 486 " STR r0,%1 \n\t" \
wolfSSL 7:481bce714567 487 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc");
wolfSSL 7:481bce714567 488
wolfSSL 7:481bce714567 489 #define PROPCARRY \
wolfSSL 7:481bce714567 490 __asm__( \
wolfSSL 7:481bce714567 491 " LDR r0,%1 \n\t" \
wolfSSL 7:481bce714567 492 " ADDS r0,r0,%0 \n\t" \
wolfSSL 7:481bce714567 493 " STR r0,%1 \n\t" \
wolfSSL 7:481bce714567 494 " ITE CS \n\t" \
wolfSSL 7:481bce714567 495 " MOVCS %0,#1 \n\t" \
wolfSSL 7:481bce714567 496 " MOVCC %0,#0 \n\t" \
wolfSSL 7:481bce714567 497 :"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc");
wolfSSL 7:481bce714567 498
wolfSSL 7:481bce714567 499
wolfSSL 7:481bce714567 500 /* TAO thumb mode uses ite (if then else) to detect carry directly
wolfSSL 7:481bce714567 501 * fixed unmatched constraint warning by changing 1 to m */
wolfSSL 7:481bce714567 502
wolfSSL 7:481bce714567 503 #else /* __thumb__ */
wolfSSL 7:481bce714567 504
wolfSSL 7:481bce714567 505 #define INNERMUL \
wolfSSL 7:481bce714567 506 __asm__( \
wolfSSL 7:481bce714567 507 " LDR r0,%1 \n\t" \
wolfSSL 7:481bce714567 508 " ADDS r0,r0,%0 \n\t" \
wolfSSL 7:481bce714567 509 " MOVCS %0,#1 \n\t" \
wolfSSL 7:481bce714567 510 " MOVCC %0,#0 \n\t" \
wolfSSL 7:481bce714567 511 " UMLAL r0,%0,%3,%4 \n\t" \
wolfSSL 7:481bce714567 512 " STR r0,%1 \n\t" \
wolfSSL 7:481bce714567 513 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
wolfSSL 7:481bce714567 514
wolfSSL 7:481bce714567 515 #define PROPCARRY \
wolfSSL 7:481bce714567 516 __asm__( \
wolfSSL 7:481bce714567 517 " LDR r0,%1 \n\t" \
wolfSSL 7:481bce714567 518 " ADDS r0,r0,%0 \n\t" \
wolfSSL 7:481bce714567 519 " STR r0,%1 \n\t" \
wolfSSL 7:481bce714567 520 " MOVCS %0,#1 \n\t" \
wolfSSL 7:481bce714567 521 " MOVCC %0,#0 \n\t" \
wolfSSL 7:481bce714567 522 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
wolfSSL 7:481bce714567 523
wolfSSL 7:481bce714567 524 #endif /* __thumb__ */
wolfSSL 7:481bce714567 525
wolfSSL 7:481bce714567 526 #elif defined(TFM_PPC32)
wolfSSL 7:481bce714567 527
wolfSSL 7:481bce714567 528 /* PPC32 */
wolfSSL 7:481bce714567 529 #define MONT_START
wolfSSL 7:481bce714567 530 #define MONT_FINI
wolfSSL 7:481bce714567 531 #define LOOP_END
wolfSSL 7:481bce714567 532 #define LOOP_START \
wolfSSL 7:481bce714567 533 mu = c[x] * mp
wolfSSL 7:481bce714567 534
wolfSSL 7:481bce714567 535 #define INNERMUL \
wolfSSL 7:481bce714567 536 __asm__( \
wolfSSL 7:481bce714567 537 " mullw 16,%3,%4 \n\t" \
wolfSSL 7:481bce714567 538 " mulhwu 17,%3,%4 \n\t" \
wolfSSL 7:481bce714567 539 " addc 16,16,%2 \n\t" \
wolfSSL 7:481bce714567 540 " addze 17,17 \n\t" \
wolfSSL 7:481bce714567 541 " addc %1,16,%5 \n\t" \
wolfSSL 7:481bce714567 542 " addze %0,17 \n\t" \
wolfSSL 7:481bce714567 543 :"=r"(cy),"=r"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "cc"); ++tmpm;
wolfSSL 7:481bce714567 544
wolfSSL 7:481bce714567 545 #define PROPCARRY \
wolfSSL 7:481bce714567 546 __asm__( \
wolfSSL 7:481bce714567 547 " addc %1,%3,%2 \n\t" \
wolfSSL 7:481bce714567 548 " xor %0,%2,%2 \n\t" \
wolfSSL 7:481bce714567 549 " addze %0,%2 \n\t" \
wolfSSL 7:481bce714567 550 :"=r"(cy),"=r"(_c[0]):"0"(cy),"1"(_c[0]):"cc");
wolfSSL 7:481bce714567 551
wolfSSL 7:481bce714567 552 #elif defined(TFM_PPC64)
wolfSSL 7:481bce714567 553
wolfSSL 7:481bce714567 554 /* PPC64 */
wolfSSL 7:481bce714567 555 #define MONT_START
wolfSSL 7:481bce714567 556 #define MONT_FINI
wolfSSL 7:481bce714567 557 #define LOOP_END
wolfSSL 7:481bce714567 558 #define LOOP_START \
wolfSSL 7:481bce714567 559 mu = c[x] * mp
wolfSSL 7:481bce714567 560
wolfSSL 7:481bce714567 561 #define INNERMUL \
wolfSSL 7:481bce714567 562 __asm__( \
wolfSSL 7:481bce714567 563 " mulld 16,%3,%4 \n\t" \
wolfSSL 7:481bce714567 564 " mulhdu 17,%3,%4 \n\t" \
wolfSSL 7:481bce714567 565 " addc 16,16,%0 \n\t" \
wolfSSL 7:481bce714567 566 " addze 17,17 \n\t" \
wolfSSL 7:481bce714567 567 " ldx 18,0,%1 \n\t" \
wolfSSL 7:481bce714567 568 " addc 16,16,18 \n\t" \
wolfSSL 7:481bce714567 569 " addze %0,17 \n\t" \
wolfSSL 7:481bce714567 570 " sdx 16,0,%1 \n\t" \
wolfSSL 7:481bce714567 571 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
wolfSSL 7:481bce714567 572
wolfSSL 7:481bce714567 573 #define PROPCARRY \
wolfSSL 7:481bce714567 574 __asm__( \
wolfSSL 7:481bce714567 575 " ldx 16,0,%1 \n\t" \
wolfSSL 7:481bce714567 576 " addc 16,16,%0 \n\t" \
wolfSSL 7:481bce714567 577 " sdx 16,0,%1 \n\t" \
wolfSSL 7:481bce714567 578 " xor %0,%0,%0 \n\t" \
wolfSSL 7:481bce714567 579 " addze %0,%0 \n\t" \
wolfSSL 7:481bce714567 580 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
wolfSSL 7:481bce714567 581
wolfSSL 7:481bce714567 582 /******************************************************************/
wolfSSL 7:481bce714567 583
wolfSSL 7:481bce714567 584 #elif defined(TFM_AVR32)
wolfSSL 7:481bce714567 585
wolfSSL 7:481bce714567 586 /* AVR32 */
wolfSSL 7:481bce714567 587 #define MONT_START
wolfSSL 7:481bce714567 588 #define MONT_FINI
wolfSSL 7:481bce714567 589 #define LOOP_END
wolfSSL 7:481bce714567 590 #define LOOP_START \
wolfSSL 7:481bce714567 591 mu = c[x] * mp
wolfSSL 7:481bce714567 592
wolfSSL 7:481bce714567 593 #define INNERMUL \
wolfSSL 7:481bce714567 594 __asm__( \
wolfSSL 7:481bce714567 595 " ld.w r2,%1 \n\t" \
wolfSSL 7:481bce714567 596 " add r2,%0 \n\t" \
wolfSSL 7:481bce714567 597 " eor r3,r3 \n\t" \
wolfSSL 7:481bce714567 598 " acr r3 \n\t" \
wolfSSL 7:481bce714567 599 " macu.d r2,%3,%4 \n\t" \
wolfSSL 7:481bce714567 600 " st.w %1,r2 \n\t" \
wolfSSL 7:481bce714567 601 " mov %0,r3 \n\t" \
wolfSSL 7:481bce714567 602 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
wolfSSL 7:481bce714567 603
wolfSSL 7:481bce714567 604 #define PROPCARRY \
wolfSSL 7:481bce714567 605 __asm__( \
wolfSSL 7:481bce714567 606 " ld.w r2,%1 \n\t" \
wolfSSL 7:481bce714567 607 " add r2,%0 \n\t" \
wolfSSL 7:481bce714567 608 " st.w %1,r2 \n\t" \
wolfSSL 7:481bce714567 609 " eor %0,%0 \n\t" \
wolfSSL 7:481bce714567 610 " acr %0 \n\t" \
wolfSSL 7:481bce714567 611 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
wolfSSL 7:481bce714567 612
wolfSSL 7:481bce714567 613 #else
wolfSSL 7:481bce714567 614
wolfSSL 7:481bce714567 615 /* ISO C code */
wolfSSL 7:481bce714567 616 #define MONT_START
wolfSSL 7:481bce714567 617 #define MONT_FINI
wolfSSL 7:481bce714567 618 #define LOOP_END
wolfSSL 7:481bce714567 619 #define LOOP_START \
wolfSSL 7:481bce714567 620 mu = c[x] * mp
wolfSSL 7:481bce714567 621
wolfSSL 7:481bce714567 622 #define INNERMUL \
wolfSSL 7:481bce714567 623 do { fp_word t; \
wolfSSL 7:481bce714567 624 t = ((fp_word)_c[0] + (fp_word)cy) + \
wolfSSL 7:481bce714567 625 (((fp_word)mu) * ((fp_word)*tmpm++)); \
wolfSSL 7:481bce714567 626 _c[0] = (fp_digit)t; \
wolfSSL 7:481bce714567 627 cy = (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 7:481bce714567 628 } while (0)
wolfSSL 7:481bce714567 629
wolfSSL 7:481bce714567 630 #define PROPCARRY \
wolfSSL 7:481bce714567 631 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
wolfSSL 7:481bce714567 632
wolfSSL 7:481bce714567 633 #endif
wolfSSL 7:481bce714567 634 /******************************************************************/
wolfSSL 7:481bce714567 635
wolfSSL 7:481bce714567 636
wolfSSL 7:481bce714567 637 #define LO 0
wolfSSL 7:481bce714567 638 /* end fp_montogomery_reduce.c asm */
wolfSSL 7:481bce714567 639
wolfSSL 7:481bce714567 640
wolfSSL 7:481bce714567 641 /* start fp_sqr_comba.c asm */
wolfSSL 7:481bce714567 642 #if defined(TFM_X86)
wolfSSL 7:481bce714567 643
wolfSSL 7:481bce714567 644 /* x86-32 optimized */
wolfSSL 7:481bce714567 645
wolfSSL 7:481bce714567 646 #define COMBA_START
wolfSSL 7:481bce714567 647
wolfSSL 7:481bce714567 648 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 649 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 650
wolfSSL 7:481bce714567 651 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 652 x = c0;
wolfSSL 7:481bce714567 653
wolfSSL 7:481bce714567 654 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 655 x = c1;
wolfSSL 7:481bce714567 656
wolfSSL 7:481bce714567 657 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 658 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 659
wolfSSL 7:481bce714567 660 #define COMBA_FINI
wolfSSL 7:481bce714567 661
wolfSSL 7:481bce714567 662 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 663 __asm__( \
wolfSSL 7:481bce714567 664 "movl %6,%%eax \n\t" \
wolfSSL 7:481bce714567 665 "mull %%eax \n\t" \
wolfSSL 7:481bce714567 666 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 667 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 668 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 669 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
wolfSSL 7:481bce714567 670
wolfSSL 7:481bce714567 671 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 672 __asm__( \
wolfSSL 7:481bce714567 673 "movl %6,%%eax \n\t" \
wolfSSL 7:481bce714567 674 "mull %7 \n\t" \
wolfSSL 7:481bce714567 675 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 676 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 677 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 678 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 679 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 680 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 681 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc");
wolfSSL 7:481bce714567 682
wolfSSL 7:481bce714567 683 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 684 __asm__( \
wolfSSL 7:481bce714567 685 "movl %3,%%eax \n\t" \
wolfSSL 7:481bce714567 686 "mull %4 \n\t" \
wolfSSL 7:481bce714567 687 "movl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 688 "movl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 689 "xorl %2,%2 \n\t" \
wolfSSL 7:481bce714567 690 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
wolfSSL 7:481bce714567 691
wolfSSL 7:481bce714567 692 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 7:481bce714567 693
wolfSSL 7:481bce714567 694 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 695 __asm__( \
wolfSSL 7:481bce714567 696 "movl %6,%%eax \n\t" \
wolfSSL 7:481bce714567 697 "mull %7 \n\t" \
wolfSSL 7:481bce714567 698 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 699 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 700 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 701 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
wolfSSL 7:481bce714567 702
wolfSSL 7:481bce714567 703 #define SQRADDDB \
wolfSSL 7:481bce714567 704 __asm__( \
wolfSSL 7:481bce714567 705 "addl %6,%0 \n\t" \
wolfSSL 7:481bce714567 706 "adcl %7,%1 \n\t" \
wolfSSL 7:481bce714567 707 "adcl %8,%2 \n\t" \
wolfSSL 7:481bce714567 708 "addl %6,%0 \n\t" \
wolfSSL 7:481bce714567 709 "adcl %7,%1 \n\t" \
wolfSSL 7:481bce714567 710 "adcl %8,%2 \n\t" \
wolfSSL 7:481bce714567 711 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 7:481bce714567 712
wolfSSL 7:481bce714567 713 #elif defined(TFM_X86_64)
wolfSSL 7:481bce714567 714 /* x86-64 optimized */
wolfSSL 7:481bce714567 715
wolfSSL 7:481bce714567 716 #define COMBA_START
wolfSSL 7:481bce714567 717
wolfSSL 7:481bce714567 718 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 719 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 720
wolfSSL 7:481bce714567 721 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 722 x = c0;
wolfSSL 7:481bce714567 723
wolfSSL 7:481bce714567 724 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 725 x = c1;
wolfSSL 7:481bce714567 726
wolfSSL 7:481bce714567 727 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 728 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 729
wolfSSL 7:481bce714567 730 #define COMBA_FINI
wolfSSL 7:481bce714567 731
wolfSSL 7:481bce714567 732 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 733 __asm__( \
wolfSSL 7:481bce714567 734 "movq %6,%%rax \n\t" \
wolfSSL 7:481bce714567 735 "mulq %%rax \n\t" \
wolfSSL 7:481bce714567 736 "addq %%rax,%0 \n\t" \
wolfSSL 7:481bce714567 737 "adcq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 738 "adcq $0,%2 \n\t" \
wolfSSL 7:481bce714567 739 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "x"(i) :"%rax","%rdx","cc");
wolfSSL 7:481bce714567 740
wolfSSL 7:481bce714567 741 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 742 __asm__( \
wolfSSL 7:481bce714567 743 "movq %6,%%rax \n\t" \
wolfSSL 7:481bce714567 744 "mulq %7 \n\t" \
wolfSSL 7:481bce714567 745 "addq %%rax,%0 \n\t" \
wolfSSL 7:481bce714567 746 "adcq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 747 "adcq $0,%2 \n\t" \
wolfSSL 7:481bce714567 748 "addq %%rax,%0 \n\t" \
wolfSSL 7:481bce714567 749 "adcq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 750 "adcq $0,%2 \n\t" \
wolfSSL 7:481bce714567 751 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 7:481bce714567 752
wolfSSL 7:481bce714567 753 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 754 __asm__( \
wolfSSL 7:481bce714567 755 "movq %3,%%rax \n\t" \
wolfSSL 7:481bce714567 756 "mulq %4 \n\t" \
wolfSSL 7:481bce714567 757 "movq %%rax,%0 \n\t" \
wolfSSL 7:481bce714567 758 "movq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 759 "xorq %2,%2 \n\t" \
wolfSSL 7:481bce714567 760 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 7:481bce714567 761
wolfSSL 7:481bce714567 762 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 7:481bce714567 763
wolfSSL 7:481bce714567 764 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 765 __asm__( \
wolfSSL 7:481bce714567 766 "movq %6,%%rax \n\t" \
wolfSSL 7:481bce714567 767 "mulq %7 \n\t" \
wolfSSL 7:481bce714567 768 "addq %%rax,%0 \n\t" \
wolfSSL 7:481bce714567 769 "adcq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 770 "adcq $0,%2 \n\t" \
wolfSSL 7:481bce714567 771 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 7:481bce714567 772
wolfSSL 7:481bce714567 773 #define SQRADDDB \
wolfSSL 7:481bce714567 774 __asm__( \
wolfSSL 7:481bce714567 775 "addq %6,%0 \n\t" \
wolfSSL 7:481bce714567 776 "adcq %7,%1 \n\t" \
wolfSSL 7:481bce714567 777 "adcq %8,%2 \n\t" \
wolfSSL 7:481bce714567 778 "addq %6,%0 \n\t" \
wolfSSL 7:481bce714567 779 "adcq %7,%1 \n\t" \
wolfSSL 7:481bce714567 780 "adcq %8,%2 \n\t" \
wolfSSL 7:481bce714567 781 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 7:481bce714567 782
wolfSSL 7:481bce714567 783 #elif defined(TFM_SSE2)
wolfSSL 7:481bce714567 784
wolfSSL 7:481bce714567 785 /* SSE2 Optimized */
wolfSSL 7:481bce714567 786 #define COMBA_START
wolfSSL 7:481bce714567 787
wolfSSL 7:481bce714567 788 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 789 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 790
wolfSSL 7:481bce714567 791 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 792 x = c0;
wolfSSL 7:481bce714567 793
wolfSSL 7:481bce714567 794 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 795 x = c1;
wolfSSL 7:481bce714567 796
wolfSSL 7:481bce714567 797 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 798 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 799
wolfSSL 7:481bce714567 800 #define COMBA_FINI \
wolfSSL 7:481bce714567 801 __asm__("emms");
wolfSSL 7:481bce714567 802
wolfSSL 7:481bce714567 803 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 804 __asm__( \
wolfSSL 7:481bce714567 805 "movd %6,%%mm0 \n\t" \
wolfSSL 7:481bce714567 806 "pmuludq %%mm0,%%mm0\n\t" \
wolfSSL 7:481bce714567 807 "movd %%mm0,%%eax \n\t" \
wolfSSL 7:481bce714567 808 "psrlq $32,%%mm0 \n\t" \
wolfSSL 7:481bce714567 809 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 810 "movd %%mm0,%%eax \n\t" \
wolfSSL 7:481bce714567 811 "adcl %%eax,%1 \n\t" \
wolfSSL 7:481bce714567 812 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 813 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
wolfSSL 7:481bce714567 814
wolfSSL 7:481bce714567 815 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 816 __asm__( \
wolfSSL 7:481bce714567 817 "movd %6,%%mm0 \n\t" \
wolfSSL 7:481bce714567 818 "movd %7,%%mm1 \n\t" \
wolfSSL 7:481bce714567 819 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 7:481bce714567 820 "movd %%mm0,%%eax \n\t" \
wolfSSL 7:481bce714567 821 "psrlq $32,%%mm0 \n\t" \
wolfSSL 7:481bce714567 822 "movd %%mm0,%%edx \n\t" \
wolfSSL 7:481bce714567 823 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 824 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 825 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 826 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 827 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 828 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 829 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 7:481bce714567 830
wolfSSL 7:481bce714567 831 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 832 __asm__( \
wolfSSL 7:481bce714567 833 "movd %3,%%mm0 \n\t" \
wolfSSL 7:481bce714567 834 "movd %4,%%mm1 \n\t" \
wolfSSL 7:481bce714567 835 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 7:481bce714567 836 "movd %%mm0,%0 \n\t" \
wolfSSL 7:481bce714567 837 "psrlq $32,%%mm0 \n\t" \
wolfSSL 7:481bce714567 838 "movd %%mm0,%1 \n\t" \
wolfSSL 7:481bce714567 839 "xorl %2,%2 \n\t" \
wolfSSL 7:481bce714567 840 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j));
wolfSSL 7:481bce714567 841
wolfSSL 7:481bce714567 842 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 7:481bce714567 843
wolfSSL 7:481bce714567 844 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 845 __asm__( \
wolfSSL 7:481bce714567 846 "movd %6,%%mm0 \n\t" \
wolfSSL 7:481bce714567 847 "movd %7,%%mm1 \n\t" \
wolfSSL 7:481bce714567 848 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 7:481bce714567 849 "movd %%mm0,%%eax \n\t" \
wolfSSL 7:481bce714567 850 "psrlq $32,%%mm0 \n\t" \
wolfSSL 7:481bce714567 851 "movd %%mm0,%%edx \n\t" \
wolfSSL 7:481bce714567 852 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 853 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 854 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 855 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 7:481bce714567 856
wolfSSL 7:481bce714567 857 #define SQRADDDB \
wolfSSL 7:481bce714567 858 __asm__( \
wolfSSL 7:481bce714567 859 "addl %6,%0 \n\t" \
wolfSSL 7:481bce714567 860 "adcl %7,%1 \n\t" \
wolfSSL 7:481bce714567 861 "adcl %8,%2 \n\t" \
wolfSSL 7:481bce714567 862 "addl %6,%0 \n\t" \
wolfSSL 7:481bce714567 863 "adcl %7,%1 \n\t" \
wolfSSL 7:481bce714567 864 "adcl %8,%2 \n\t" \
wolfSSL 7:481bce714567 865 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 7:481bce714567 866
wolfSSL 7:481bce714567 867 #elif defined(TFM_ARM)
wolfSSL 7:481bce714567 868
wolfSSL 7:481bce714567 869 /* ARM code */
wolfSSL 7:481bce714567 870
wolfSSL 7:481bce714567 871 #define COMBA_START
wolfSSL 7:481bce714567 872
wolfSSL 7:481bce714567 873 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 874 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 875
wolfSSL 7:481bce714567 876 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 877 x = c0;
wolfSSL 7:481bce714567 878
wolfSSL 7:481bce714567 879 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 880 x = c1;
wolfSSL 7:481bce714567 881
wolfSSL 7:481bce714567 882 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 883 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 884
wolfSSL 7:481bce714567 885 #define COMBA_FINI
wolfSSL 7:481bce714567 886
wolfSSL 7:481bce714567 887 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 7:481bce714567 888 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 889 __asm__( \
wolfSSL 7:481bce714567 890 " UMULL r0,r1,%6,%6 \n\t" \
wolfSSL 7:481bce714567 891 " ADDS %0,%0,r0 \n\t" \
wolfSSL 7:481bce714567 892 " ADCS %1,%1,r1 \n\t" \
wolfSSL 7:481bce714567 893 " ADC %2,%2,#0 \n\t" \
wolfSSL 7:481bce714567 894 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
wolfSSL 7:481bce714567 895
wolfSSL 7:481bce714567 896 /* for squaring some of the terms are doubled... */
wolfSSL 7:481bce714567 897 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 898 __asm__( \
wolfSSL 7:481bce714567 899 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 7:481bce714567 900 " ADDS %0,%0,r0 \n\t" \
wolfSSL 7:481bce714567 901 " ADCS %1,%1,r1 \n\t" \
wolfSSL 7:481bce714567 902 " ADC %2,%2,#0 \n\t" \
wolfSSL 7:481bce714567 903 " ADDS %0,%0,r0 \n\t" \
wolfSSL 7:481bce714567 904 " ADCS %1,%1,r1 \n\t" \
wolfSSL 7:481bce714567 905 " ADC %2,%2,#0 \n\t" \
wolfSSL 7:481bce714567 906 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 7:481bce714567 907
wolfSSL 7:481bce714567 908 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 909 __asm__( \
wolfSSL 7:481bce714567 910 " UMULL %0,%1,%3,%4 \n\t" \
wolfSSL 7:481bce714567 911 " SUB %2,%2,%2 \n\t" \
wolfSSL 7:481bce714567 912 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc");
wolfSSL 7:481bce714567 913
wolfSSL 7:481bce714567 914 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 7:481bce714567 915
wolfSSL 7:481bce714567 916 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 917 __asm__( \
wolfSSL 7:481bce714567 918 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 7:481bce714567 919 " ADDS %0,%0,r0 \n\t" \
wolfSSL 7:481bce714567 920 " ADCS %1,%1,r1 \n\t" \
wolfSSL 7:481bce714567 921 " ADC %2,%2,#0 \n\t" \
wolfSSL 7:481bce714567 922 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 7:481bce714567 923
wolfSSL 7:481bce714567 924 #define SQRADDDB \
wolfSSL 7:481bce714567 925 __asm__( \
wolfSSL 7:481bce714567 926 " ADDS %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 927 " ADCS %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 928 " ADC %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 929 " ADDS %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 930 " ADCS %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 931 " ADC %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 932 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 7:481bce714567 933
wolfSSL 7:481bce714567 934 #elif defined(TFM_PPC32)
wolfSSL 7:481bce714567 935
wolfSSL 7:481bce714567 936 /* PPC32 */
wolfSSL 7:481bce714567 937
wolfSSL 7:481bce714567 938 #define COMBA_START
wolfSSL 7:481bce714567 939
wolfSSL 7:481bce714567 940 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 941 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 942
wolfSSL 7:481bce714567 943 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 944 x = c0;
wolfSSL 7:481bce714567 945
wolfSSL 7:481bce714567 946 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 947 x = c1;
wolfSSL 7:481bce714567 948
wolfSSL 7:481bce714567 949 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 950 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 951
wolfSSL 7:481bce714567 952 #define COMBA_FINI
wolfSSL 7:481bce714567 953
wolfSSL 7:481bce714567 954 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 7:481bce714567 955 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 956 __asm__( \
wolfSSL 7:481bce714567 957 " mullw 16,%6,%6 \n\t" \
wolfSSL 7:481bce714567 958 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 959 " mulhwu 16,%6,%6 \n\t" \
wolfSSL 7:481bce714567 960 " adde %1,%1,16 \n\t" \
wolfSSL 7:481bce714567 961 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 962 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
wolfSSL 7:481bce714567 963
wolfSSL 7:481bce714567 964 /* for squaring some of the terms are doubled... */
wolfSSL 7:481bce714567 965 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 966 __asm__( \
wolfSSL 7:481bce714567 967 " mullw 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 968 " mulhwu 17,%6,%7 \n\t" \
wolfSSL 7:481bce714567 969 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 970 " adde %1,%1,17 \n\t" \
wolfSSL 7:481bce714567 971 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 972 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 973 " adde %1,%1,17 \n\t" \
wolfSSL 7:481bce714567 974 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 975 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
wolfSSL 7:481bce714567 976
wolfSSL 7:481bce714567 977 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 978 __asm__( \
wolfSSL 7:481bce714567 979 " mullw %0,%6,%7 \n\t" \
wolfSSL 7:481bce714567 980 " mulhwu %1,%6,%7 \n\t" \
wolfSSL 7:481bce714567 981 " xor %2,%2,%2 \n\t" \
wolfSSL 7:481bce714567 982 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
wolfSSL 7:481bce714567 983
wolfSSL 7:481bce714567 984 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 985 __asm__( \
wolfSSL 7:481bce714567 986 " mullw 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 987 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 988 " mulhwu 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 989 " adde %1,%1,16 \n\t" \
wolfSSL 7:481bce714567 990 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 991 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
wolfSSL 7:481bce714567 992
wolfSSL 7:481bce714567 993 #define SQRADDDB \
wolfSSL 7:481bce714567 994 __asm__( \
wolfSSL 7:481bce714567 995 " addc %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 996 " adde %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 997 " adde %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 998 " addc %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 999 " adde %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 1000 " adde %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 1001 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 7:481bce714567 1002
wolfSSL 7:481bce714567 1003 #elif defined(TFM_PPC64)
wolfSSL 7:481bce714567 1004 /* PPC64 */
wolfSSL 7:481bce714567 1005
wolfSSL 7:481bce714567 1006 #define COMBA_START
wolfSSL 7:481bce714567 1007
wolfSSL 7:481bce714567 1008 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 1009 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1010
wolfSSL 7:481bce714567 1011 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1012 x = c0;
wolfSSL 7:481bce714567 1013
wolfSSL 7:481bce714567 1014 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1015 x = c1;
wolfSSL 7:481bce714567 1016
wolfSSL 7:481bce714567 1017 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 1018 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1019
wolfSSL 7:481bce714567 1020 #define COMBA_FINI
wolfSSL 7:481bce714567 1021
wolfSSL 7:481bce714567 1022 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 7:481bce714567 1023 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 1024 __asm__( \
wolfSSL 7:481bce714567 1025 " mulld 16,%6,%6 \n\t" \
wolfSSL 7:481bce714567 1026 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 1027 " mulhdu 16,%6,%6 \n\t" \
wolfSSL 7:481bce714567 1028 " adde %1,%1,16 \n\t" \
wolfSSL 7:481bce714567 1029 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 1030 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
wolfSSL 7:481bce714567 1031
wolfSSL 7:481bce714567 1032 /* for squaring some of the terms are doubled... */
wolfSSL 7:481bce714567 1033 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 1034 __asm__( \
wolfSSL 7:481bce714567 1035 " mulld 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1036 " mulhdu 17,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1037 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 1038 " adde %1,%1,17 \n\t" \
wolfSSL 7:481bce714567 1039 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 1040 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 1041 " adde %1,%1,17 \n\t" \
wolfSSL 7:481bce714567 1042 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 1043 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
wolfSSL 7:481bce714567 1044
wolfSSL 7:481bce714567 1045 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 1046 __asm__( \
wolfSSL 7:481bce714567 1047 " mulld %0,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1048 " mulhdu %1,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1049 " xor %2,%2,%2 \n\t" \
wolfSSL 7:481bce714567 1050 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
wolfSSL 7:481bce714567 1051
wolfSSL 7:481bce714567 1052 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 1053 __asm__( \
wolfSSL 7:481bce714567 1054 " mulld 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1055 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 1056 " mulhdu 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1057 " adde %1,%1,16 \n\t" \
wolfSSL 7:481bce714567 1058 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 1059 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
wolfSSL 7:481bce714567 1060
wolfSSL 7:481bce714567 1061 #define SQRADDDB \
wolfSSL 7:481bce714567 1062 __asm__( \
wolfSSL 7:481bce714567 1063 " addc %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 1064 " adde %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 1065 " adde %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 1066 " addc %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 1067 " adde %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 1068 " adde %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 1069 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 7:481bce714567 1070
wolfSSL 7:481bce714567 1071
wolfSSL 7:481bce714567 1072 #elif defined(TFM_AVR32)
wolfSSL 7:481bce714567 1073
wolfSSL 7:481bce714567 1074 /* AVR32 */
wolfSSL 7:481bce714567 1075
wolfSSL 7:481bce714567 1076 #define COMBA_START
wolfSSL 7:481bce714567 1077
wolfSSL 7:481bce714567 1078 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 1079 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1080
wolfSSL 7:481bce714567 1081 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1082 x = c0;
wolfSSL 7:481bce714567 1083
wolfSSL 7:481bce714567 1084 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1085 x = c1;
wolfSSL 7:481bce714567 1086
wolfSSL 7:481bce714567 1087 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 1088 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1089
wolfSSL 7:481bce714567 1090 #define COMBA_FINI
wolfSSL 7:481bce714567 1091
wolfSSL 7:481bce714567 1092 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 7:481bce714567 1093 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 1094 __asm__( \
wolfSSL 7:481bce714567 1095 " mulu.d r2,%6,%6 \n\t" \
wolfSSL 7:481bce714567 1096 " add %0,%0,r2 \n\t" \
wolfSSL 7:481bce714567 1097 " adc %1,%1,r3 \n\t" \
wolfSSL 7:481bce714567 1098 " acr %2 \n\t" \
wolfSSL 7:481bce714567 1099 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
wolfSSL 7:481bce714567 1100
wolfSSL 7:481bce714567 1101 /* for squaring some of the terms are doubled... */
wolfSSL 7:481bce714567 1102 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 1103 __asm__( \
wolfSSL 7:481bce714567 1104 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1105 " add %0,%0,r2 \n\t" \
wolfSSL 7:481bce714567 1106 " adc %1,%1,r3 \n\t" \
wolfSSL 7:481bce714567 1107 " acr %2, \n\t" \
wolfSSL 7:481bce714567 1108 " add %0,%0,r2 \n\t" \
wolfSSL 7:481bce714567 1109 " adc %1,%1,r3 \n\t" \
wolfSSL 7:481bce714567 1110 " acr %2, \n\t" \
wolfSSL 7:481bce714567 1111 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
wolfSSL 7:481bce714567 1112
wolfSSL 7:481bce714567 1113 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 1114 __asm__( \
wolfSSL 7:481bce714567 1115 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1116 " mov %0,r2 \n\t" \
wolfSSL 7:481bce714567 1117 " mov %1,r3 \n\t" \
wolfSSL 7:481bce714567 1118 " eor %2,%2 \n\t" \
wolfSSL 7:481bce714567 1119 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
wolfSSL 7:481bce714567 1120
wolfSSL 7:481bce714567 1121 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 1122 __asm__( \
wolfSSL 7:481bce714567 1123 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1124 " add %0,%0,r2 \n\t" \
wolfSSL 7:481bce714567 1125 " adc %1,%1,r3 \n\t" \
wolfSSL 7:481bce714567 1126 " acr %2 \n\t" \
wolfSSL 7:481bce714567 1127 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
wolfSSL 7:481bce714567 1128
wolfSSL 7:481bce714567 1129 #define SQRADDDB \
wolfSSL 7:481bce714567 1130 __asm__( \
wolfSSL 7:481bce714567 1131 " add %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 1132 " adc %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 1133 " adc %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 1134 " add %0,%0,%3 \n\t" \
wolfSSL 7:481bce714567 1135 " adc %1,%1,%4 \n\t" \
wolfSSL 7:481bce714567 1136 " adc %2,%2,%5 \n\t" \
wolfSSL 7:481bce714567 1137 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 7:481bce714567 1138
wolfSSL 7:481bce714567 1139
wolfSSL 7:481bce714567 1140 #else
wolfSSL 7:481bce714567 1141
wolfSSL 7:481bce714567 1142 #define TFM_ISO
wolfSSL 7:481bce714567 1143
wolfSSL 7:481bce714567 1144 /* ISO C portable code */
wolfSSL 7:481bce714567 1145
wolfSSL 7:481bce714567 1146 #define COMBA_START
wolfSSL 7:481bce714567 1147
wolfSSL 7:481bce714567 1148 #define CLEAR_CARRY \
wolfSSL 7:481bce714567 1149 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1150
wolfSSL 7:481bce714567 1151 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1152 x = c0;
wolfSSL 7:481bce714567 1153
wolfSSL 7:481bce714567 1154 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1155 x = c1;
wolfSSL 7:481bce714567 1156
wolfSSL 7:481bce714567 1157 #define CARRY_FORWARD \
wolfSSL 7:481bce714567 1158 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1159
wolfSSL 7:481bce714567 1160 #define COMBA_FINI
wolfSSL 7:481bce714567 1161
wolfSSL 7:481bce714567 1162 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 7:481bce714567 1163 #define SQRADD(i, j) \
wolfSSL 7:481bce714567 1164 do { fp_word t; \
wolfSSL 7:481bce714567 1165 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
wolfSSL 7:481bce714567 1166 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \
wolfSSL 7:481bce714567 1167 c2 +=(fp_digit) (t >> DIGIT_BIT); \
wolfSSL 7:481bce714567 1168 } while (0);
wolfSSL 7:481bce714567 1169
wolfSSL 7:481bce714567 1170
wolfSSL 7:481bce714567 1171 /* for squaring some of the terms are doubled... */
wolfSSL 7:481bce714567 1172 #define SQRADD2(i, j) \
wolfSSL 7:481bce714567 1173 do { fp_word t; \
wolfSSL 7:481bce714567 1174 t = ((fp_word)i) * ((fp_word)j); \
wolfSSL 7:481bce714567 1175 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
wolfSSL 7:481bce714567 1176 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
wolfSSL 7:481bce714567 1177 c2 +=(fp_digit)( tt >> DIGIT_BIT); \
wolfSSL 7:481bce714567 1178 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
wolfSSL 7:481bce714567 1179 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
wolfSSL 7:481bce714567 1180 c2 +=(fp_digit) (tt >> DIGIT_BIT); \
wolfSSL 7:481bce714567 1181 } while (0);
wolfSSL 7:481bce714567 1182
wolfSSL 7:481bce714567 1183 #define SQRADDSC(i, j) \
wolfSSL 7:481bce714567 1184 do { fp_word t; \
wolfSSL 7:481bce714567 1185 t = ((fp_word)i) * ((fp_word)j); \
wolfSSL 7:481bce714567 1186 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
wolfSSL 7:481bce714567 1187 } while (0);
wolfSSL 7:481bce714567 1188
wolfSSL 7:481bce714567 1189 #define SQRADDAC(i, j) \
wolfSSL 7:481bce714567 1190 do { fp_word t; \
wolfSSL 7:481bce714567 1191 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \
wolfSSL 7:481bce714567 1192 t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \
wolfSSL 7:481bce714567 1193 sc2 += (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 7:481bce714567 1194 } while (0);
wolfSSL 7:481bce714567 1195
wolfSSL 7:481bce714567 1196 #define SQRADDDB \
wolfSSL 7:481bce714567 1197 do { fp_word t; \
wolfSSL 7:481bce714567 1198 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \
wolfSSL 7:481bce714567 1199 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \
wolfSSL 7:481bce714567 1200 c1 = (fp_digit)t; \
wolfSSL 7:481bce714567 1201 c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \
wolfSSL 7:481bce714567 1202 } while (0);
wolfSSL 7:481bce714567 1203
wolfSSL 7:481bce714567 1204 #endif
wolfSSL 7:481bce714567 1205
wolfSSL 7:481bce714567 1206 #ifdef TFM_SMALL_SET
wolfSSL 7:481bce714567 1207 #include "fp_sqr_comba_small_set.i"
wolfSSL 7:481bce714567 1208 #endif
wolfSSL 7:481bce714567 1209
wolfSSL 7:481bce714567 1210 #if defined(TFM_SQR3) && FP_SIZE >= 6
wolfSSL 7:481bce714567 1211 #include "fp_sqr_comba_3.i"
wolfSSL 7:481bce714567 1212 #endif
wolfSSL 7:481bce714567 1213 #if defined(TFM_SQR4) && FP_SIZE >= 8
wolfSSL 7:481bce714567 1214 #include "fp_sqr_comba_4.i"
wolfSSL 7:481bce714567 1215 #endif
wolfSSL 7:481bce714567 1216 #if defined(TFM_SQR6) && FP_SIZE >= 12
wolfSSL 7:481bce714567 1217 #include "fp_sqr_comba_6.i"
wolfSSL 7:481bce714567 1218 #endif
wolfSSL 7:481bce714567 1219 #if defined(TFM_SQR7) && FP_SIZE >= 14
wolfSSL 7:481bce714567 1220 #include "fp_sqr_comba_7.i"
wolfSSL 7:481bce714567 1221 #endif
wolfSSL 7:481bce714567 1222 #if defined(TFM_SQR8) && FP_SIZE >= 16
wolfSSL 7:481bce714567 1223 #include "fp_sqr_comba_8.i"
wolfSSL 7:481bce714567 1224 #endif
wolfSSL 7:481bce714567 1225 #if defined(TFM_SQR9) && FP_SIZE >= 18
wolfSSL 7:481bce714567 1226 #include "fp_sqr_comba_9.i"
wolfSSL 7:481bce714567 1227 #endif
wolfSSL 7:481bce714567 1228 #if defined(TFM_SQR12) && FP_SIZE >= 24
wolfSSL 7:481bce714567 1229 #include "fp_sqr_comba_12.i"
wolfSSL 7:481bce714567 1230 #endif
wolfSSL 7:481bce714567 1231 #if defined(TFM_SQR17) && FP_SIZE >= 34
wolfSSL 7:481bce714567 1232 #include "fp_sqr_comba_17.i"
wolfSSL 7:481bce714567 1233 #endif
wolfSSL 7:481bce714567 1234 #if defined(TFM_SQR20) && FP_SIZE >= 40
wolfSSL 7:481bce714567 1235 #include "fp_sqr_comba_20.i"
wolfSSL 7:481bce714567 1236 #endif
wolfSSL 7:481bce714567 1237 #if defined(TFM_SQR24) && FP_SIZE >= 48
wolfSSL 7:481bce714567 1238 #include "fp_sqr_comba_24.i"
wolfSSL 7:481bce714567 1239 #endif
wolfSSL 7:481bce714567 1240 #if defined(TFM_SQR28) && FP_SIZE >= 56
wolfSSL 7:481bce714567 1241 #include "fp_sqr_comba_28.i"
wolfSSL 7:481bce714567 1242 #endif
wolfSSL 7:481bce714567 1243 #if defined(TFM_SQR32) && FP_SIZE >= 64
wolfSSL 7:481bce714567 1244 #include "fp_sqr_comba_32.i"
wolfSSL 7:481bce714567 1245 #endif
wolfSSL 7:481bce714567 1246 #if defined(TFM_SQR48) && FP_SIZE >= 96
wolfSSL 7:481bce714567 1247 #include "fp_sqr_comba_48.i"
wolfSSL 7:481bce714567 1248 #endif
wolfSSL 7:481bce714567 1249 #if defined(TFM_SQR64) && FP_SIZE >= 128
wolfSSL 7:481bce714567 1250 #include "fp_sqr_comba_64.i"
wolfSSL 7:481bce714567 1251 #endif
wolfSSL 7:481bce714567 1252 /* end fp_sqr_comba.c asm */
wolfSSL 7:481bce714567 1253
wolfSSL 7:481bce714567 1254 /* start fp_mul_comba.c asm */
wolfSSL 7:481bce714567 1255 /* these are the combas. Worship them. */
wolfSSL 7:481bce714567 1256 #if defined(TFM_X86)
wolfSSL 7:481bce714567 1257 /* Generic x86 optimized code */
wolfSSL 7:481bce714567 1258
wolfSSL 7:481bce714567 1259 /* anything you need at the start */
wolfSSL 7:481bce714567 1260 #define COMBA_START
wolfSSL 7:481bce714567 1261
wolfSSL 7:481bce714567 1262 /* clear the chaining variables */
wolfSSL 7:481bce714567 1263 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1264 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1265
wolfSSL 7:481bce714567 1266 /* forward the carry to the next digit */
wolfSSL 7:481bce714567 1267 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1268 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1269
wolfSSL 7:481bce714567 1270 /* store the first sum */
wolfSSL 7:481bce714567 1271 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1272 x = c0;
wolfSSL 7:481bce714567 1273
wolfSSL 7:481bce714567 1274 /* store the second sum [carry] */
wolfSSL 7:481bce714567 1275 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1276 x = c1;
wolfSSL 7:481bce714567 1277
wolfSSL 7:481bce714567 1278 /* anything you need at the end */
wolfSSL 7:481bce714567 1279 #define COMBA_FINI
wolfSSL 7:481bce714567 1280
wolfSSL 7:481bce714567 1281 /* this should multiply i and j */
wolfSSL 7:481bce714567 1282 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1283 __asm__( \
wolfSSL 7:481bce714567 1284 "movl %6,%%eax \n\t" \
wolfSSL 7:481bce714567 1285 "mull %7 \n\t" \
wolfSSL 7:481bce714567 1286 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 1287 "adcl %%edx,%1 \n\t" \
wolfSSL 7:481bce714567 1288 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 1289 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 7:481bce714567 1290
wolfSSL 7:481bce714567 1291 #elif defined(TFM_X86_64)
wolfSSL 7:481bce714567 1292 /* x86-64 optimized */
wolfSSL 7:481bce714567 1293
wolfSSL 7:481bce714567 1294 /* anything you need at the start */
wolfSSL 7:481bce714567 1295 #define COMBA_START
wolfSSL 7:481bce714567 1296
wolfSSL 7:481bce714567 1297 /* clear the chaining variables */
wolfSSL 7:481bce714567 1298 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1299 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1300
wolfSSL 7:481bce714567 1301 /* forward the carry to the next digit */
wolfSSL 7:481bce714567 1302 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1303 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1304
wolfSSL 7:481bce714567 1305 /* store the first sum */
wolfSSL 7:481bce714567 1306 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1307 x = c0;
wolfSSL 7:481bce714567 1308
wolfSSL 7:481bce714567 1309 /* store the second sum [carry] */
wolfSSL 7:481bce714567 1310 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1311 x = c1;
wolfSSL 7:481bce714567 1312
wolfSSL 7:481bce714567 1313 /* anything you need at the end */
wolfSSL 7:481bce714567 1314 #define COMBA_FINI
wolfSSL 7:481bce714567 1315
wolfSSL 7:481bce714567 1316 /* this should multiply i and j */
wolfSSL 7:481bce714567 1317 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1318 __asm__ ( \
wolfSSL 7:481bce714567 1319 "movq %6,%%rax \n\t" \
wolfSSL 7:481bce714567 1320 "mulq %7 \n\t" \
wolfSSL 7:481bce714567 1321 "addq %%rax,%0 \n\t" \
wolfSSL 7:481bce714567 1322 "adcq %%rdx,%1 \n\t" \
wolfSSL 7:481bce714567 1323 "adcq $0,%2 \n\t" \
wolfSSL 7:481bce714567 1324 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 7:481bce714567 1325
wolfSSL 7:481bce714567 1326
wolfSSL 7:481bce714567 1327 #if defined(HAVE_INTEL_MULX)
wolfSSL 7:481bce714567 1328 #define MULADD_MULX(b0, c0, c1, rdx)\
wolfSSL 7:481bce714567 1329 __asm__ volatile ( \
wolfSSL 7:481bce714567 1330 "movq %3, %%rdx\n\t" \
wolfSSL 7:481bce714567 1331 "mulx %2,%%r9, %%r8 \n\t" \
wolfSSL 7:481bce714567 1332 "adoxq %%r9,%0 \n\t" \
wolfSSL 7:481bce714567 1333 "adcxq %%r8,%1 \n\t" \
wolfSSL 7:481bce714567 1334 :"+r"(c0),"+r"(c1):"r"(b0), "r"(rdx):"%r8","%r9","%r10","%rdx"\
wolfSSL 7:481bce714567 1335 )
wolfSSL 7:481bce714567 1336
wolfSSL 7:481bce714567 1337
wolfSSL 7:481bce714567 1338 #define MULADD_MULX_ADD_CARRY(c0, c1)\
wolfSSL 7:481bce714567 1339 __asm__ volatile(\
wolfSSL 7:481bce714567 1340 "mov $0, %%r10\n\t"\
wolfSSL 7:481bce714567 1341 "movq %1, %%r8\n\t"\
wolfSSL 7:481bce714567 1342 "adox %%r10, %0\n\t"\
wolfSSL 7:481bce714567 1343 "adcx %%r10, %1\n\t"\
wolfSSL 7:481bce714567 1344 :"+r"(c0),"+r"(c1)::"%r8","%r9","%r10","%rdx") ;
wolfSSL 7:481bce714567 1345
wolfSSL 7:481bce714567 1346 #define MULADD_SET_A(a0)\
wolfSSL 7:481bce714567 1347 __asm__ volatile("add $0, %%r8\n\t" \
wolfSSL 7:481bce714567 1348 "movq %0,%%rdx\n\t" \
wolfSSL 7:481bce714567 1349 ::"r"(a0):"%r8","%r9","%r10","%rdx") ;
wolfSSL 7:481bce714567 1350
wolfSSL 7:481bce714567 1351 #define MULADD_BODY(a,b,c)\
wolfSSL 7:481bce714567 1352 { word64 rdx = a->dp[ix] ; \
wolfSSL 7:481bce714567 1353 cp = &(c->dp[iz]) ; \
wolfSSL 7:481bce714567 1354 c0 = cp[0] ; c1 = cp[1]; \
wolfSSL 7:481bce714567 1355 MULADD_SET_A(rdx) ; \
wolfSSL 7:481bce714567 1356 MULADD_MULX(b0, c0, c1, rdx) ;\
wolfSSL 7:481bce714567 1357 cp[0]=c0; c0=cp[2]; \
wolfSSL 7:481bce714567 1358 MULADD_MULX(b1, c1, c0, rdx) ;\
wolfSSL 7:481bce714567 1359 cp[1]=c1; c1=cp[3]; \
wolfSSL 7:481bce714567 1360 MULADD_MULX(b2, c0, c1, rdx) ;\
wolfSSL 7:481bce714567 1361 cp[2]=c0; c0=cp[4]; \
wolfSSL 7:481bce714567 1362 MULADD_MULX(b3, c1, c0, rdx) ;\
wolfSSL 7:481bce714567 1363 cp[3]=c1; c1=cp[5]; \
wolfSSL 7:481bce714567 1364 MULADD_MULX_ADD_CARRY(c0, c1);\
wolfSSL 7:481bce714567 1365 cp[4]=c0; cp[5]=c1; \
wolfSSL 7:481bce714567 1366 }
wolfSSL 7:481bce714567 1367
wolfSSL 7:481bce714567 1368 #define TFM_INTEL_MUL_COMBA(a, b, c)\
wolfSSL 7:481bce714567 1369 for(ix=0; ix<pa; ix++)c->dp[ix]=0 ; \
wolfSSL 7:481bce714567 1370 for(iy=0; (iy<b->used); iy+=4) { \
wolfSSL 7:481bce714567 1371 fp_digit *bp ; \
wolfSSL 7:481bce714567 1372 bp = &(b->dp[iy+0]) ; \
wolfSSL 7:481bce714567 1373 fp_digit b0 = bp[0] , b1= bp[1], \
wolfSSL 7:481bce714567 1374 b2= bp[2], b3= bp[3]; \
wolfSSL 7:481bce714567 1375 ix=0, iz=iy; \
wolfSSL 7:481bce714567 1376 while(ix<a->used) { \
wolfSSL 7:481bce714567 1377 fp_digit c0, c1; \
wolfSSL 7:481bce714567 1378 fp_digit *cp ; \
wolfSSL 7:481bce714567 1379 MULADD_BODY(a,b,c); \
wolfSSL 7:481bce714567 1380 ix++ ; iz++ ; \
wolfSSL 7:481bce714567 1381 } \
wolfSSL 7:481bce714567 1382 };
wolfSSL 7:481bce714567 1383 #endif
wolfSSL 7:481bce714567 1384
wolfSSL 7:481bce714567 1385 #elif defined(TFM_SSE2)
wolfSSL 7:481bce714567 1386 /* use SSE2 optimizations */
wolfSSL 7:481bce714567 1387
wolfSSL 7:481bce714567 1388 /* anything you need at the start */
wolfSSL 7:481bce714567 1389 #define COMBA_START
wolfSSL 7:481bce714567 1390
wolfSSL 7:481bce714567 1391 /* clear the chaining variables */
wolfSSL 7:481bce714567 1392 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1393 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1394
wolfSSL 7:481bce714567 1395 /* forward the carry to the next digit */
wolfSSL 7:481bce714567 1396 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1397 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1398
wolfSSL 7:481bce714567 1399 /* store the first sum */
wolfSSL 7:481bce714567 1400 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1401 x = c0;
wolfSSL 7:481bce714567 1402
wolfSSL 7:481bce714567 1403 /* store the second sum [carry] */
wolfSSL 7:481bce714567 1404 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1405 x = c1;
wolfSSL 7:481bce714567 1406
wolfSSL 7:481bce714567 1407 /* anything you need at the end */
wolfSSL 7:481bce714567 1408 #define COMBA_FINI \
wolfSSL 7:481bce714567 1409 __asm__("emms");
wolfSSL 7:481bce714567 1410
wolfSSL 7:481bce714567 1411 /* this should multiply i and j */
wolfSSL 7:481bce714567 1412 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1413 __asm__( \
wolfSSL 7:481bce714567 1414 "movd %6,%%mm0 \n\t" \
wolfSSL 7:481bce714567 1415 "movd %7,%%mm1 \n\t" \
wolfSSL 7:481bce714567 1416 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 7:481bce714567 1417 "movd %%mm0,%%eax \n\t" \
wolfSSL 7:481bce714567 1418 "psrlq $32,%%mm0 \n\t" \
wolfSSL 7:481bce714567 1419 "addl %%eax,%0 \n\t" \
wolfSSL 7:481bce714567 1420 "movd %%mm0,%%eax \n\t" \
wolfSSL 7:481bce714567 1421 "adcl %%eax,%1 \n\t" \
wolfSSL 7:481bce714567 1422 "adcl $0,%2 \n\t" \
wolfSSL 7:481bce714567 1423 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
wolfSSL 7:481bce714567 1424
wolfSSL 7:481bce714567 1425 #elif defined(TFM_ARM)
wolfSSL 7:481bce714567 1426 /* ARM code */
wolfSSL 7:481bce714567 1427
wolfSSL 7:481bce714567 1428 #define COMBA_START
wolfSSL 7:481bce714567 1429
wolfSSL 7:481bce714567 1430 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1431 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1432
wolfSSL 7:481bce714567 1433 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1434 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1435
wolfSSL 7:481bce714567 1436 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1437 x = c0;
wolfSSL 7:481bce714567 1438
wolfSSL 7:481bce714567 1439 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1440 x = c1;
wolfSSL 7:481bce714567 1441
wolfSSL 7:481bce714567 1442 #define COMBA_FINI
wolfSSL 7:481bce714567 1443
wolfSSL 7:481bce714567 1444 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1445 __asm__( \
wolfSSL 7:481bce714567 1446 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1447 " ADDS %0,%0,r0 \n\t" \
wolfSSL 7:481bce714567 1448 " ADCS %1,%1,r1 \n\t" \
wolfSSL 7:481bce714567 1449 " ADC %2,%2,#0 \n\t" \
wolfSSL 7:481bce714567 1450 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 7:481bce714567 1451
wolfSSL 7:481bce714567 1452 #elif defined(TFM_PPC32)
wolfSSL 7:481bce714567 1453 /* For 32-bit PPC */
wolfSSL 7:481bce714567 1454
wolfSSL 7:481bce714567 1455 #define COMBA_START
wolfSSL 7:481bce714567 1456
wolfSSL 7:481bce714567 1457 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1458 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1459
wolfSSL 7:481bce714567 1460 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1461 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1462
wolfSSL 7:481bce714567 1463 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1464 x = c0;
wolfSSL 7:481bce714567 1465
wolfSSL 7:481bce714567 1466 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1467 x = c1;
wolfSSL 7:481bce714567 1468
wolfSSL 7:481bce714567 1469 #define COMBA_FINI
wolfSSL 7:481bce714567 1470
wolfSSL 7:481bce714567 1471 /* untested: will mulhwu change the flags? Docs say no */
wolfSSL 7:481bce714567 1472 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1473 __asm__( \
wolfSSL 7:481bce714567 1474 " mullw 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1475 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 1476 " mulhwu 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1477 " adde %1,%1,16 \n\t" \
wolfSSL 7:481bce714567 1478 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 1479 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
wolfSSL 7:481bce714567 1480
wolfSSL 7:481bce714567 1481 #elif defined(TFM_PPC64)
wolfSSL 7:481bce714567 1482 /* For 64-bit PPC */
wolfSSL 7:481bce714567 1483
wolfSSL 7:481bce714567 1484 #define COMBA_START
wolfSSL 7:481bce714567 1485
wolfSSL 7:481bce714567 1486 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1487 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1488
wolfSSL 7:481bce714567 1489 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1490 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1491
wolfSSL 7:481bce714567 1492 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1493 x = c0;
wolfSSL 7:481bce714567 1494
wolfSSL 7:481bce714567 1495 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1496 x = c1;
wolfSSL 7:481bce714567 1497
wolfSSL 7:481bce714567 1498 #define COMBA_FINI
wolfSSL 7:481bce714567 1499
wolfSSL 7:481bce714567 1500 /* untested: will mulhwu change the flags? Docs say no */
wolfSSL 7:481bce714567 1501 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1502 ____asm__( \
wolfSSL 7:481bce714567 1503 " mulld 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1504 " addc %0,%0,16 \n\t" \
wolfSSL 7:481bce714567 1505 " mulhdu 16,%6,%7 \n\t" \
wolfSSL 7:481bce714567 1506 " adde %1,%1,16 \n\t" \
wolfSSL 7:481bce714567 1507 " addze %2,%2 \n\t" \
wolfSSL 7:481bce714567 1508 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
wolfSSL 7:481bce714567 1509
wolfSSL 7:481bce714567 1510 #elif defined(TFM_AVR32)
wolfSSL 7:481bce714567 1511
wolfSSL 7:481bce714567 1512 /* ISO C code */
wolfSSL 7:481bce714567 1513
wolfSSL 7:481bce714567 1514 #define COMBA_START
wolfSSL 7:481bce714567 1515
wolfSSL 7:481bce714567 1516 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1517 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1518
wolfSSL 7:481bce714567 1519 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1520 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1521
wolfSSL 7:481bce714567 1522 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1523 x = c0;
wolfSSL 7:481bce714567 1524
wolfSSL 7:481bce714567 1525 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1526 x = c1;
wolfSSL 7:481bce714567 1527
wolfSSL 7:481bce714567 1528 #define COMBA_FINI
wolfSSL 7:481bce714567 1529
wolfSSL 7:481bce714567 1530 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1531 ____asm__( \
wolfSSL 7:481bce714567 1532 " mulu.d r2,%6,%7 \n\t"\
wolfSSL 7:481bce714567 1533 " add %0,r2 \n\t"\
wolfSSL 7:481bce714567 1534 " adc %1,%1,r3 \n\t"\
wolfSSL 7:481bce714567 1535 " acr %2 \n\t"\
wolfSSL 7:481bce714567 1536 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
wolfSSL 7:481bce714567 1537
wolfSSL 7:481bce714567 1538 #else
wolfSSL 7:481bce714567 1539 /* ISO C code */
wolfSSL 7:481bce714567 1540
wolfSSL 7:481bce714567 1541 #define COMBA_START
wolfSSL 7:481bce714567 1542
wolfSSL 7:481bce714567 1543 #define COMBA_CLEAR \
wolfSSL 7:481bce714567 1544 c0 = c1 = c2 = 0;
wolfSSL 7:481bce714567 1545
wolfSSL 7:481bce714567 1546 #define COMBA_FORWARD \
wolfSSL 7:481bce714567 1547 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 7:481bce714567 1548
wolfSSL 7:481bce714567 1549 #define COMBA_STORE(x) \
wolfSSL 7:481bce714567 1550 x = c0;
wolfSSL 7:481bce714567 1551
wolfSSL 7:481bce714567 1552 #define COMBA_STORE2(x) \
wolfSSL 7:481bce714567 1553 x = c1;
wolfSSL 7:481bce714567 1554
wolfSSL 7:481bce714567 1555 #define COMBA_FINI
wolfSSL 7:481bce714567 1556
wolfSSL 7:481bce714567 1557 #define MULADD(i, j) \
wolfSSL 7:481bce714567 1558 do { fp_word t; \
wolfSSL 7:481bce714567 1559 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
wolfSSL 7:481bce714567 1560 t = (fp_word)c1 + (t >> DIGIT_BIT); \
wolfSSL 7:481bce714567 1561 c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 7:481bce714567 1562 } while (0);
wolfSSL 7:481bce714567 1563
wolfSSL 7:481bce714567 1564 #endif
wolfSSL 7:481bce714567 1565
wolfSSL 7:481bce714567 1566
wolfSSL 7:481bce714567 1567 #ifdef TFM_SMALL_SET
wolfSSL 7:481bce714567 1568 #include "fp_mul_comba_small_set.i"
wolfSSL 7:481bce714567 1569 #endif
wolfSSL 7:481bce714567 1570
wolfSSL 7:481bce714567 1571 #if defined(TFM_MUL3) && FP_SIZE >= 6
wolfSSL 7:481bce714567 1572 #include "fp_mul_comba_3.i"
wolfSSL 7:481bce714567 1573 #endif
wolfSSL 7:481bce714567 1574 #if defined(TFM_MUL4) && FP_SIZE >= 8
wolfSSL 7:481bce714567 1575 #include "fp_mul_comba_4.i"
wolfSSL 7:481bce714567 1576 #endif
wolfSSL 7:481bce714567 1577 #if defined(TFM_MUL6) && FP_SIZE >= 12
wolfSSL 7:481bce714567 1578 #include "fp_mul_comba_6.i"
wolfSSL 7:481bce714567 1579 #endif
wolfSSL 7:481bce714567 1580 #if defined(TFM_MUL7) && FP_SIZE >= 14
wolfSSL 7:481bce714567 1581 #include "fp_mul_comba_7.i"
wolfSSL 7:481bce714567 1582 #endif
wolfSSL 7:481bce714567 1583 #if defined(TFM_MUL8) && FP_SIZE >= 16
wolfSSL 7:481bce714567 1584 #include "fp_mul_comba_8.i"
wolfSSL 7:481bce714567 1585 #endif
wolfSSL 7:481bce714567 1586 #if defined(TFM_MUL9) && FP_SIZE >= 18
wolfSSL 7:481bce714567 1587 #include "fp_mul_comba_9.i"
wolfSSL 7:481bce714567 1588 #endif
wolfSSL 7:481bce714567 1589 #if defined(TFM_MUL12) && FP_SIZE >= 24
wolfSSL 7:481bce714567 1590 #include "fp_mul_comba_12.i"
wolfSSL 7:481bce714567 1591 #endif
wolfSSL 7:481bce714567 1592 #if defined(TFM_MUL17) && FP_SIZE >= 34
wolfSSL 7:481bce714567 1593 #include "fp_mul_comba_17.i"
wolfSSL 7:481bce714567 1594 #endif
wolfSSL 7:481bce714567 1595 #if defined(TFM_MUL20) && FP_SIZE >= 40
wolfSSL 7:481bce714567 1596 #include "fp_mul_comba_20.i"
wolfSSL 7:481bce714567 1597 #endif
wolfSSL 7:481bce714567 1598 #if defined(TFM_MUL24) && FP_SIZE >= 48
wolfSSL 7:481bce714567 1599 #include "fp_mul_comba_24.i"
wolfSSL 7:481bce714567 1600 #endif
wolfSSL 7:481bce714567 1601 #if defined(TFM_MUL28) && FP_SIZE >= 56
wolfSSL 7:481bce714567 1602 #include "fp_mul_comba_28.i"
wolfSSL 7:481bce714567 1603 #endif
wolfSSL 7:481bce714567 1604 #if defined(TFM_MUL32) && FP_SIZE >= 64
wolfSSL 7:481bce714567 1605 #include "fp_mul_comba_32.i"
wolfSSL 7:481bce714567 1606 #endif
wolfSSL 7:481bce714567 1607 #if defined(TFM_MUL48) && FP_SIZE >= 96
wolfSSL 7:481bce714567 1608 #include "fp_mul_comba_48.i"
wolfSSL 7:481bce714567 1609 #endif
wolfSSL 7:481bce714567 1610 #if defined(TFM_MUL64) && FP_SIZE >= 128
wolfSSL 7:481bce714567 1611 #include "fp_mul_comba_64.i"
wolfSSL 7:481bce714567 1612 #endif
wolfSSL 7:481bce714567 1613
wolfSSL 7:481bce714567 1614 /* end fp_mul_comba.c asm */
wolfSSL 7:481bce714567 1615
wolfSSL 7:481bce714567 1616