Fork of CyaSSL for my specific settings

Dependents:   CyaSSL_Example

Fork of CyaSSL by wolf SSL

Committer:
wolfSSL
Date:
Sat Jul 12 07:18:23 2014 +0000
Revision:
0:1239e9b70ca2
CyaSSL 3.0.0;

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 0:1239e9b70ca2 1 /* asm.c
wolfSSL 0:1239e9b70ca2 2 *
wolfSSL 0:1239e9b70ca2 3 * Copyright (C) 2006-2014 wolfSSL Inc.
wolfSSL 0:1239e9b70ca2 4 *
wolfSSL 0:1239e9b70ca2 5 * This file is part of CyaSSL.
wolfSSL 0:1239e9b70ca2 6 *
wolfSSL 0:1239e9b70ca2 7 * CyaSSL is free software; you can redistribute it and/or modify
wolfSSL 0:1239e9b70ca2 8 * it under the terms of the GNU General Public License as published by
wolfSSL 0:1239e9b70ca2 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 0:1239e9b70ca2 10 * (at your option) any later version.
wolfSSL 0:1239e9b70ca2 11 *
wolfSSL 0:1239e9b70ca2 12 * CyaSSL is distributed in the hope that it will be useful,
wolfSSL 0:1239e9b70ca2 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 0:1239e9b70ca2 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 0:1239e9b70ca2 15 * GNU General Public License for more details.
wolfSSL 0:1239e9b70ca2 16 *
wolfSSL 0:1239e9b70ca2 17 * You should have received a copy of the GNU General Public License
wolfSSL 0:1239e9b70ca2 18 * along with this program; if not, write to the Free Software
wolfSSL 0:1239e9b70ca2 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
wolfSSL 0:1239e9b70ca2 20 */
wolfSSL 0:1239e9b70ca2 21
wolfSSL 0:1239e9b70ca2 22 #ifdef HAVE_CONFIG_H
wolfSSL 0:1239e9b70ca2 23 #include <config.h>
wolfSSL 0:1239e9b70ca2 24 #endif
wolfSSL 0:1239e9b70ca2 25
wolfSSL 0:1239e9b70ca2 26 #include <cyassl/ctaocrypt/settings.h>
wolfSSL 0:1239e9b70ca2 27
wolfSSL 0:1239e9b70ca2 28 /*
wolfSSL 0:1239e9b70ca2 29 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
wolfSSL 0:1239e9b70ca2 30 * http://math.libtomcrypt.com
wolfSSL 0:1239e9b70ca2 31 */
wolfSSL 0:1239e9b70ca2 32
wolfSSL 0:1239e9b70ca2 33
wolfSSL 0:1239e9b70ca2 34 /******************************************************************/
wolfSSL 0:1239e9b70ca2 35 /* fp_montgomery_reduce.c asm or generic */
wolfSSL 0:1239e9b70ca2 36 #if defined(TFM_X86) && !defined(TFM_SSE2)
wolfSSL 0:1239e9b70ca2 37 /* x86-32 code */
wolfSSL 0:1239e9b70ca2 38
wolfSSL 0:1239e9b70ca2 39 #define MONT_START
wolfSSL 0:1239e9b70ca2 40 #define MONT_FINI
wolfSSL 0:1239e9b70ca2 41 #define LOOP_END
wolfSSL 0:1239e9b70ca2 42 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 43 mu = c[x] * mp
wolfSSL 0:1239e9b70ca2 44
wolfSSL 0:1239e9b70ca2 45 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 46 __asm__( \
wolfSSL 0:1239e9b70ca2 47 "movl %5,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 48 "mull %4 \n\t" \
wolfSSL 0:1239e9b70ca2 49 "addl %1,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 50 "adcl $0,%%edx \n\t" \
wolfSSL 0:1239e9b70ca2 51 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 52 "adcl $0,%%edx \n\t" \
wolfSSL 0:1239e9b70ca2 53 "movl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 54 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 0:1239e9b70ca2 55 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
wolfSSL 0:1239e9b70ca2 56 : "%eax", "%edx", "cc")
wolfSSL 0:1239e9b70ca2 57
wolfSSL 0:1239e9b70ca2 58 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 59 __asm__( \
wolfSSL 0:1239e9b70ca2 60 "addl %1,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 61 "setb %%al \n\t" \
wolfSSL 0:1239e9b70ca2 62 "movzbl %%al,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 63 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 0:1239e9b70ca2 64 :"0"(_c[LO]), "1"(cy) \
wolfSSL 0:1239e9b70ca2 65 : "%eax", "cc")
wolfSSL 0:1239e9b70ca2 66
wolfSSL 0:1239e9b70ca2 67 /******************************************************************/
wolfSSL 0:1239e9b70ca2 68 #elif defined(TFM_X86_64)
wolfSSL 0:1239e9b70ca2 69 /* x86-64 code */
wolfSSL 0:1239e9b70ca2 70
wolfSSL 0:1239e9b70ca2 71 #define MONT_START
wolfSSL 0:1239e9b70ca2 72 #define MONT_FINI
wolfSSL 0:1239e9b70ca2 73 #define LOOP_END
wolfSSL 0:1239e9b70ca2 74 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 75 mu = c[x] * mp
wolfSSL 0:1239e9b70ca2 76
wolfSSL 0:1239e9b70ca2 77 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 78 __asm__( \
wolfSSL 0:1239e9b70ca2 79 "movq %5,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 80 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 81 "addq %1,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 82 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 83 "addq %%rax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 84 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 85 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 86 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 0:1239e9b70ca2 87 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
wolfSSL 0:1239e9b70ca2 88 : "%rax", "%rdx", "cc")
wolfSSL 0:1239e9b70ca2 89
wolfSSL 0:1239e9b70ca2 90 #define INNERMUL8 \
wolfSSL 0:1239e9b70ca2 91 __asm__( \
wolfSSL 0:1239e9b70ca2 92 "movq 0(%5),%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 93 "movq 0(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 94 "movq 0x8(%5),%%r11 \n\t" \
wolfSSL 0:1239e9b70ca2 95 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 96 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 97 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 98 "movq 0x8(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 99 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 100 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 101 "movq %%rax,0(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 102 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 103 \
wolfSSL 0:1239e9b70ca2 104 "movq %%r11,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 105 "movq 0x10(%5),%%r11 \n\t" \
wolfSSL 0:1239e9b70ca2 106 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 107 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 108 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 109 "movq 0x10(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 110 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 111 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 112 "movq %%rax,0x8(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 113 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 114 \
wolfSSL 0:1239e9b70ca2 115 "movq %%r11,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 116 "movq 0x18(%5),%%r11 \n\t" \
wolfSSL 0:1239e9b70ca2 117 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 118 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 119 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 120 "movq 0x18(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 121 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 122 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 123 "movq %%rax,0x10(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 124 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 125 \
wolfSSL 0:1239e9b70ca2 126 "movq %%r11,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 127 "movq 0x20(%5),%%r11 \n\t" \
wolfSSL 0:1239e9b70ca2 128 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 129 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 130 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 131 "movq 0x20(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 132 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 133 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 134 "movq %%rax,0x18(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 135 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 136 \
wolfSSL 0:1239e9b70ca2 137 "movq %%r11,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 138 "movq 0x28(%5),%%r11 \n\t" \
wolfSSL 0:1239e9b70ca2 139 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 140 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 141 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 142 "movq 0x28(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 143 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 144 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 145 "movq %%rax,0x20(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 146 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 147 \
wolfSSL 0:1239e9b70ca2 148 "movq %%r11,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 149 "movq 0x30(%5),%%r11 \n\t" \
wolfSSL 0:1239e9b70ca2 150 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 151 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 152 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 153 "movq 0x30(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 154 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 155 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 156 "movq %%rax,0x28(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 157 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 158 \
wolfSSL 0:1239e9b70ca2 159 "movq %%r11,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 160 "movq 0x38(%5),%%r11 \n\t" \
wolfSSL 0:1239e9b70ca2 161 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 162 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 163 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 164 "movq 0x38(%2),%%r10 \n\t" \
wolfSSL 0:1239e9b70ca2 165 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 166 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 167 "movq %%rax,0x30(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 168 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 169 \
wolfSSL 0:1239e9b70ca2 170 "movq %%r11,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 171 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 172 "addq %%r10,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 173 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 174 "addq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 175 "adcq $0,%%rdx \n\t" \
wolfSSL 0:1239e9b70ca2 176 "movq %%rax,0x38(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 177 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 178 \
wolfSSL 0:1239e9b70ca2 179 :"=r"(_c), "=r"(cy) \
wolfSSL 0:1239e9b70ca2 180 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
wolfSSL 0:1239e9b70ca2 181 : "%rax", "%rdx", "%r10", "%r11", "cc")
wolfSSL 0:1239e9b70ca2 182
wolfSSL 0:1239e9b70ca2 183
wolfSSL 0:1239e9b70ca2 184 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 185 __asm__( \
wolfSSL 0:1239e9b70ca2 186 "addq %1,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 187 "setb %%al \n\t" \
wolfSSL 0:1239e9b70ca2 188 "movzbq %%al,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 189 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 0:1239e9b70ca2 190 :"0"(_c[LO]), "1"(cy) \
wolfSSL 0:1239e9b70ca2 191 : "%rax", "cc")
wolfSSL 0:1239e9b70ca2 192
wolfSSL 0:1239e9b70ca2 193 /******************************************************************/
wolfSSL 0:1239e9b70ca2 194 #elif defined(TFM_SSE2)
wolfSSL 0:1239e9b70ca2 195 /* SSE2 code (assumes 32-bit fp_digits) */
wolfSSL 0:1239e9b70ca2 196 /* XMM register assignments:
wolfSSL 0:1239e9b70ca2 197 * xmm0 *tmpm++, then Mu * (*tmpm++)
wolfSSL 0:1239e9b70ca2 198 * xmm1 c[x], then Mu
wolfSSL 0:1239e9b70ca2 199 * xmm2 mp
wolfSSL 0:1239e9b70ca2 200 * xmm3 cy
wolfSSL 0:1239e9b70ca2 201 * xmm4 _c[LO]
wolfSSL 0:1239e9b70ca2 202 */
wolfSSL 0:1239e9b70ca2 203
wolfSSL 0:1239e9b70ca2 204 #define MONT_START \
wolfSSL 0:1239e9b70ca2 205 __asm__("movd %0,%%mm2"::"g"(mp))
wolfSSL 0:1239e9b70ca2 206
wolfSSL 0:1239e9b70ca2 207 #define MONT_FINI \
wolfSSL 0:1239e9b70ca2 208 __asm__("emms")
wolfSSL 0:1239e9b70ca2 209
wolfSSL 0:1239e9b70ca2 210 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 211 __asm__( \
wolfSSL 0:1239e9b70ca2 212 "movd %0,%%mm1 \n\t" \
wolfSSL 0:1239e9b70ca2 213 "pxor %%mm3,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 214 "pmuludq %%mm2,%%mm1 \n\t" \
wolfSSL 0:1239e9b70ca2 215 :: "g"(c[x]))
wolfSSL 0:1239e9b70ca2 216
wolfSSL 0:1239e9b70ca2 217 /* pmuludq on mmx registers does a 32x32->64 multiply. */
wolfSSL 0:1239e9b70ca2 218 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 219 __asm__( \
wolfSSL 0:1239e9b70ca2 220 "movd %1,%%mm4 \n\t" \
wolfSSL 0:1239e9b70ca2 221 "movd %2,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 222 "paddq %%mm4,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 223 "pmuludq %%mm1,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 224 "paddq %%mm0,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 225 "movd %%mm3,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 226 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 227 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
wolfSSL 0:1239e9b70ca2 228
wolfSSL 0:1239e9b70ca2 229 #define INNERMUL8 \
wolfSSL 0:1239e9b70ca2 230 __asm__( \
wolfSSL 0:1239e9b70ca2 231 "movd 0(%1),%%mm4 \n\t" \
wolfSSL 0:1239e9b70ca2 232 "movd 0(%2),%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 233 "paddq %%mm4,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 234 "pmuludq %%mm1,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 235 "movd 4(%2),%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 236 "paddq %%mm0,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 237 "movd 4(%1),%%mm6 \n\t" \
wolfSSL 0:1239e9b70ca2 238 "movd %%mm3,0(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 239 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 240 \
wolfSSL 0:1239e9b70ca2 241 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 242 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 243 "movd 8(%2),%%mm6 \n\t" \
wolfSSL 0:1239e9b70ca2 244 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 245 "movd 8(%1),%%mm7 \n\t" \
wolfSSL 0:1239e9b70ca2 246 "movd %%mm3,4(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 247 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 248 \
wolfSSL 0:1239e9b70ca2 249 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 250 "pmuludq %%mm1,%%mm6 \n\t" \
wolfSSL 0:1239e9b70ca2 251 "movd 12(%2),%%mm7 \n\t" \
wolfSSL 0:1239e9b70ca2 252 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 253 "movd 12(%1),%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 254 "movd %%mm3,8(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 255 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 256 \
wolfSSL 0:1239e9b70ca2 257 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 258 "pmuludq %%mm1,%%mm7 \n\t" \
wolfSSL 0:1239e9b70ca2 259 "movd 16(%2),%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 260 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 261 "movd 16(%1),%%mm6 \n\t" \
wolfSSL 0:1239e9b70ca2 262 "movd %%mm3,12(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 263 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 264 \
wolfSSL 0:1239e9b70ca2 265 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 266 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 267 "movd 20(%2),%%mm6 \n\t" \
wolfSSL 0:1239e9b70ca2 268 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 269 "movd 20(%1),%%mm7 \n\t" \
wolfSSL 0:1239e9b70ca2 270 "movd %%mm3,16(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 271 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 272 \
wolfSSL 0:1239e9b70ca2 273 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 274 "pmuludq %%mm1,%%mm6 \n\t" \
wolfSSL 0:1239e9b70ca2 275 "movd 24(%2),%%mm7 \n\t" \
wolfSSL 0:1239e9b70ca2 276 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 277 "movd 24(%1),%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 278 "movd %%mm3,20(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 279 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 280 \
wolfSSL 0:1239e9b70ca2 281 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 282 "pmuludq %%mm1,%%mm7 \n\t" \
wolfSSL 0:1239e9b70ca2 283 "movd 28(%2),%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 284 "paddq %%mm7,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 285 "movd 28(%1),%%mm6 \n\t" \
wolfSSL 0:1239e9b70ca2 286 "movd %%mm3,24(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 287 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 288 \
wolfSSL 0:1239e9b70ca2 289 "paddq %%mm6,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 290 "pmuludq %%mm1,%%mm5 \n\t" \
wolfSSL 0:1239e9b70ca2 291 "paddq %%mm5,%%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 292 "movd %%mm3,28(%0) \n\t" \
wolfSSL 0:1239e9b70ca2 293 "psrlq $32, %%mm3 \n\t" \
wolfSSL 0:1239e9b70ca2 294 :"=r"(_c) : "0"(_c), "r"(tmpm) );
wolfSSL 0:1239e9b70ca2 295
wolfSSL 0:1239e9b70ca2 296 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack
wolfSSL 0:1239e9b70ca2 297 pointer */
wolfSSL 0:1239e9b70ca2 298
wolfSSL 0:1239e9b70ca2 299 #define LOOP_END \
wolfSSL 0:1239e9b70ca2 300 __asm__( "movd %%mm3,%0 \n" :"=r"(cy))
wolfSSL 0:1239e9b70ca2 301
wolfSSL 0:1239e9b70ca2 302 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 303 __asm__( \
wolfSSL 0:1239e9b70ca2 304 "addl %1,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 305 "setb %%al \n\t" \
wolfSSL 0:1239e9b70ca2 306 "movzbl %%al,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 307 :"=g"(_c[LO]), "=r"(cy) \
wolfSSL 0:1239e9b70ca2 308 :"0"(_c[LO]), "1"(cy) \
wolfSSL 0:1239e9b70ca2 309 : "%eax", "cc")
wolfSSL 0:1239e9b70ca2 310
wolfSSL 0:1239e9b70ca2 311 /******************************************************************/
wolfSSL 0:1239e9b70ca2 312 #elif defined(TFM_ARM)
wolfSSL 0:1239e9b70ca2 313 /* ARMv4 code */
wolfSSL 0:1239e9b70ca2 314
wolfSSL 0:1239e9b70ca2 315 #define MONT_START
wolfSSL 0:1239e9b70ca2 316 #define MONT_FINI
wolfSSL 0:1239e9b70ca2 317 #define LOOP_END
wolfSSL 0:1239e9b70ca2 318 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 319 mu = c[x] * mp
wolfSSL 0:1239e9b70ca2 320
wolfSSL 0:1239e9b70ca2 321
wolfSSL 0:1239e9b70ca2 322 #ifdef __thumb__
wolfSSL 0:1239e9b70ca2 323
wolfSSL 0:1239e9b70ca2 324 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 325 __asm__( \
wolfSSL 0:1239e9b70ca2 326 " LDR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 327 " ADDS r0,r0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 328 " ITE CS \n\t" \
wolfSSL 0:1239e9b70ca2 329 " MOVCS %0,#1 \n\t" \
wolfSSL 0:1239e9b70ca2 330 " MOVCC %0,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 331 " UMLAL r0,%0,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 332 " STR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 333 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc");
wolfSSL 0:1239e9b70ca2 334
wolfSSL 0:1239e9b70ca2 335 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 336 __asm__( \
wolfSSL 0:1239e9b70ca2 337 " LDR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 338 " ADDS r0,r0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 339 " STR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 340 " ITE CS \n\t" \
wolfSSL 0:1239e9b70ca2 341 " MOVCS %0,#1 \n\t" \
wolfSSL 0:1239e9b70ca2 342 " MOVCC %0,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 343 :"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc");
wolfSSL 0:1239e9b70ca2 344
wolfSSL 0:1239e9b70ca2 345
wolfSSL 0:1239e9b70ca2 346 /* TAO thumb mode uses ite (if then else) to detect carry directly
wolfSSL 0:1239e9b70ca2 347 * fixed unmatched constraint warning by changing 1 to m */
wolfSSL 0:1239e9b70ca2 348
wolfSSL 0:1239e9b70ca2 349 #else /* __thumb__ */
wolfSSL 0:1239e9b70ca2 350
wolfSSL 0:1239e9b70ca2 351 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 352 __asm__( \
wolfSSL 0:1239e9b70ca2 353 " LDR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 354 " ADDS r0,r0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 355 " MOVCS %0,#1 \n\t" \
wolfSSL 0:1239e9b70ca2 356 " MOVCC %0,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 357 " UMLAL r0,%0,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 358 " STR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 359 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
wolfSSL 0:1239e9b70ca2 360
wolfSSL 0:1239e9b70ca2 361 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 362 __asm__( \
wolfSSL 0:1239e9b70ca2 363 " LDR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 364 " ADDS r0,r0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 365 " STR r0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 366 " MOVCS %0,#1 \n\t" \
wolfSSL 0:1239e9b70ca2 367 " MOVCC %0,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 368 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
wolfSSL 0:1239e9b70ca2 369
wolfSSL 0:1239e9b70ca2 370 #endif /* __thumb__ */
wolfSSL 0:1239e9b70ca2 371
wolfSSL 0:1239e9b70ca2 372 #elif defined(TFM_PPC32)
wolfSSL 0:1239e9b70ca2 373
wolfSSL 0:1239e9b70ca2 374 /* PPC32 */
wolfSSL 0:1239e9b70ca2 375 #define MONT_START
wolfSSL 0:1239e9b70ca2 376 #define MONT_FINI
wolfSSL 0:1239e9b70ca2 377 #define LOOP_END
wolfSSL 0:1239e9b70ca2 378 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 379 mu = c[x] * mp
wolfSSL 0:1239e9b70ca2 380
wolfSSL 0:1239e9b70ca2 381 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 382 __asm__( \
wolfSSL 0:1239e9b70ca2 383 " mullw 16,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 384 " mulhwu 17,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 385 " addc 16,16,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 386 " addze 17,17 \n\t" \
wolfSSL 0:1239e9b70ca2 387 " lwz 18,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 388 " addc 16,16,18 \n\t" \
wolfSSL 0:1239e9b70ca2 389 " addze %0,17 \n\t" \
wolfSSL 0:1239e9b70ca2 390 " stw 16,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 391 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
wolfSSL 0:1239e9b70ca2 392
wolfSSL 0:1239e9b70ca2 393 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 394 __asm__( \
wolfSSL 0:1239e9b70ca2 395 " lwz 16,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 396 " addc 16,16,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 397 " stw 16,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 398 " xor %0,%0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 399 " addze %0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 400 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
wolfSSL 0:1239e9b70ca2 401
wolfSSL 0:1239e9b70ca2 402 #elif defined(TFM_PPC64)
wolfSSL 0:1239e9b70ca2 403
wolfSSL 0:1239e9b70ca2 404 /* PPC64 */
wolfSSL 0:1239e9b70ca2 405 #define MONT_START
wolfSSL 0:1239e9b70ca2 406 #define MONT_FINI
wolfSSL 0:1239e9b70ca2 407 #define LOOP_END
wolfSSL 0:1239e9b70ca2 408 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 409 mu = c[x] * mp
wolfSSL 0:1239e9b70ca2 410
wolfSSL 0:1239e9b70ca2 411 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 412 __asm__( \
wolfSSL 0:1239e9b70ca2 413 " mulld 16,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 414 " mulhdu 17,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 415 " addc 16,16,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 416 " addze 17,17 \n\t" \
wolfSSL 0:1239e9b70ca2 417 " ldx 18,0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 418 " addc 16,16,18 \n\t" \
wolfSSL 0:1239e9b70ca2 419 " addze %0,17 \n\t" \
wolfSSL 0:1239e9b70ca2 420 " sdx 16,0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 421 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
wolfSSL 0:1239e9b70ca2 422
wolfSSL 0:1239e9b70ca2 423 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 424 __asm__( \
wolfSSL 0:1239e9b70ca2 425 " ldx 16,0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 426 " addc 16,16,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 427 " sdx 16,0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 428 " xor %0,%0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 429 " addze %0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 430 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
wolfSSL 0:1239e9b70ca2 431
wolfSSL 0:1239e9b70ca2 432 /******************************************************************/
wolfSSL 0:1239e9b70ca2 433
wolfSSL 0:1239e9b70ca2 434 #elif defined(TFM_AVR32)
wolfSSL 0:1239e9b70ca2 435
wolfSSL 0:1239e9b70ca2 436 /* AVR32 */
wolfSSL 0:1239e9b70ca2 437 #define MONT_START
wolfSSL 0:1239e9b70ca2 438 #define MONT_FINI
wolfSSL 0:1239e9b70ca2 439 #define LOOP_END
wolfSSL 0:1239e9b70ca2 440 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 441 mu = c[x] * mp
wolfSSL 0:1239e9b70ca2 442
wolfSSL 0:1239e9b70ca2 443 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 444 __asm__( \
wolfSSL 0:1239e9b70ca2 445 " ld.w r2,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 446 " add r2,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 447 " eor r3,r3 \n\t" \
wolfSSL 0:1239e9b70ca2 448 " acr r3 \n\t" \
wolfSSL 0:1239e9b70ca2 449 " macu.d r2,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 450 " st.w %1,r2 \n\t" \
wolfSSL 0:1239e9b70ca2 451 " mov %0,r3 \n\t" \
wolfSSL 0:1239e9b70ca2 452 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
wolfSSL 0:1239e9b70ca2 453
wolfSSL 0:1239e9b70ca2 454 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 455 __asm__( \
wolfSSL 0:1239e9b70ca2 456 " ld.w r2,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 457 " add r2,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 458 " st.w %1,r2 \n\t" \
wolfSSL 0:1239e9b70ca2 459 " eor %0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 460 " acr %0 \n\t" \
wolfSSL 0:1239e9b70ca2 461 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
wolfSSL 0:1239e9b70ca2 462
wolfSSL 0:1239e9b70ca2 463 #else
wolfSSL 0:1239e9b70ca2 464
wolfSSL 0:1239e9b70ca2 465 /* ISO C code */
wolfSSL 0:1239e9b70ca2 466 #define MONT_START
wolfSSL 0:1239e9b70ca2 467 #define MONT_FINI
wolfSSL 0:1239e9b70ca2 468 #define LOOP_END
wolfSSL 0:1239e9b70ca2 469 #define LOOP_START \
wolfSSL 0:1239e9b70ca2 470 mu = c[x] * mp
wolfSSL 0:1239e9b70ca2 471
wolfSSL 0:1239e9b70ca2 472 #define INNERMUL \
wolfSSL 0:1239e9b70ca2 473 do { fp_word t; \
wolfSSL 0:1239e9b70ca2 474 t = ((fp_word)_c[0] + (fp_word)cy) + \
wolfSSL 0:1239e9b70ca2 475 (((fp_word)mu) * ((fp_word)*tmpm++)); \
wolfSSL 0:1239e9b70ca2 476 _c[0] = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 477 cy = (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 478 } while (0)
wolfSSL 0:1239e9b70ca2 479
wolfSSL 0:1239e9b70ca2 480 #define PROPCARRY \
wolfSSL 0:1239e9b70ca2 481 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
wolfSSL 0:1239e9b70ca2 482
wolfSSL 0:1239e9b70ca2 483 #endif
wolfSSL 0:1239e9b70ca2 484 /******************************************************************/
wolfSSL 0:1239e9b70ca2 485
wolfSSL 0:1239e9b70ca2 486
wolfSSL 0:1239e9b70ca2 487 #define LO 0
wolfSSL 0:1239e9b70ca2 488 /* end fp_montogomery_reduce.c asm */
wolfSSL 0:1239e9b70ca2 489
wolfSSL 0:1239e9b70ca2 490
wolfSSL 0:1239e9b70ca2 491 /* start fp_sqr_comba.c asm */
wolfSSL 0:1239e9b70ca2 492 #if defined(TFM_X86)
wolfSSL 0:1239e9b70ca2 493
wolfSSL 0:1239e9b70ca2 494 /* x86-32 optimized */
wolfSSL 0:1239e9b70ca2 495
wolfSSL 0:1239e9b70ca2 496 #define COMBA_START
wolfSSL 0:1239e9b70ca2 497
wolfSSL 0:1239e9b70ca2 498 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 499 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 500
wolfSSL 0:1239e9b70ca2 501 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 502 x = c0;
wolfSSL 0:1239e9b70ca2 503
wolfSSL 0:1239e9b70ca2 504 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 505 x = c1;
wolfSSL 0:1239e9b70ca2 506
wolfSSL 0:1239e9b70ca2 507 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 508 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 509
wolfSSL 0:1239e9b70ca2 510 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 511
wolfSSL 0:1239e9b70ca2 512 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 513 __asm__( \
wolfSSL 0:1239e9b70ca2 514 "movl %6,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 515 "mull %%eax \n\t" \
wolfSSL 0:1239e9b70ca2 516 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 517 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 518 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 519 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
wolfSSL 0:1239e9b70ca2 520
wolfSSL 0:1239e9b70ca2 521 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 522 __asm__( \
wolfSSL 0:1239e9b70ca2 523 "movl %6,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 524 "mull %7 \n\t" \
wolfSSL 0:1239e9b70ca2 525 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 526 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 527 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 528 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 529 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 530 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 531 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc");
wolfSSL 0:1239e9b70ca2 532
wolfSSL 0:1239e9b70ca2 533 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 534 __asm__( \
wolfSSL 0:1239e9b70ca2 535 "movl %3,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 536 "mull %4 \n\t" \
wolfSSL 0:1239e9b70ca2 537 "movl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 538 "movl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 539 "xorl %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 540 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
wolfSSL 0:1239e9b70ca2 541
wolfSSL 0:1239e9b70ca2 542 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 0:1239e9b70ca2 543
wolfSSL 0:1239e9b70ca2 544 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 545 __asm__( \
wolfSSL 0:1239e9b70ca2 546 "movl %6,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 547 "mull %7 \n\t" \
wolfSSL 0:1239e9b70ca2 548 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 549 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 550 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 551 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
wolfSSL 0:1239e9b70ca2 552
wolfSSL 0:1239e9b70ca2 553 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 554 __asm__( \
wolfSSL 0:1239e9b70ca2 555 "addl %6,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 556 "adcl %7,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 557 "adcl %8,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 558 "addl %6,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 559 "adcl %7,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 560 "adcl %8,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 561 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 0:1239e9b70ca2 562
wolfSSL 0:1239e9b70ca2 563 #elif defined(TFM_X86_64)
wolfSSL 0:1239e9b70ca2 564 /* x86-64 optimized */
wolfSSL 0:1239e9b70ca2 565
wolfSSL 0:1239e9b70ca2 566 #define COMBA_START
wolfSSL 0:1239e9b70ca2 567
wolfSSL 0:1239e9b70ca2 568 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 569 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 570
wolfSSL 0:1239e9b70ca2 571 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 572 x = c0;
wolfSSL 0:1239e9b70ca2 573
wolfSSL 0:1239e9b70ca2 574 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 575 x = c1;
wolfSSL 0:1239e9b70ca2 576
wolfSSL 0:1239e9b70ca2 577 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 578 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 579
wolfSSL 0:1239e9b70ca2 580 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 581
wolfSSL 0:1239e9b70ca2 582 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 583 __asm__( \
wolfSSL 0:1239e9b70ca2 584 "movq %6,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 585 "mulq %%rax \n\t" \
wolfSSL 0:1239e9b70ca2 586 "addq %%rax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 587 "adcq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 588 "adcq $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 589 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
wolfSSL 0:1239e9b70ca2 590
wolfSSL 0:1239e9b70ca2 591 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 592 __asm__( \
wolfSSL 0:1239e9b70ca2 593 "movq %6,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 594 "mulq %7 \n\t" \
wolfSSL 0:1239e9b70ca2 595 "addq %%rax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 596 "adcq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 597 "adcq $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 598 "addq %%rax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 599 "adcq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 600 "adcq $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 601 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 0:1239e9b70ca2 602
wolfSSL 0:1239e9b70ca2 603 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 604 __asm__( \
wolfSSL 0:1239e9b70ca2 605 "movq %3,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 606 "mulq %4 \n\t" \
wolfSSL 0:1239e9b70ca2 607 "movq %%rax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 608 "movq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 609 "xorq %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 610 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 0:1239e9b70ca2 611
wolfSSL 0:1239e9b70ca2 612 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 0:1239e9b70ca2 613
wolfSSL 0:1239e9b70ca2 614 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 615 __asm__( \
wolfSSL 0:1239e9b70ca2 616 "movq %6,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 617 "mulq %7 \n\t" \
wolfSSL 0:1239e9b70ca2 618 "addq %%rax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 619 "adcq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 620 "adcq $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 621 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 0:1239e9b70ca2 622
wolfSSL 0:1239e9b70ca2 623 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 624 __asm__( \
wolfSSL 0:1239e9b70ca2 625 "addq %6,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 626 "adcq %7,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 627 "adcq %8,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 628 "addq %6,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 629 "adcq %7,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 630 "adcq %8,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 631 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 0:1239e9b70ca2 632
wolfSSL 0:1239e9b70ca2 633 #elif defined(TFM_SSE2)
wolfSSL 0:1239e9b70ca2 634
wolfSSL 0:1239e9b70ca2 635 /* SSE2 Optimized */
wolfSSL 0:1239e9b70ca2 636 #define COMBA_START
wolfSSL 0:1239e9b70ca2 637
wolfSSL 0:1239e9b70ca2 638 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 639 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 640
wolfSSL 0:1239e9b70ca2 641 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 642 x = c0;
wolfSSL 0:1239e9b70ca2 643
wolfSSL 0:1239e9b70ca2 644 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 645 x = c1;
wolfSSL 0:1239e9b70ca2 646
wolfSSL 0:1239e9b70ca2 647 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 648 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 649
wolfSSL 0:1239e9b70ca2 650 #define COMBA_FINI \
wolfSSL 0:1239e9b70ca2 651 __asm__("emms");
wolfSSL 0:1239e9b70ca2 652
wolfSSL 0:1239e9b70ca2 653 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 654 __asm__( \
wolfSSL 0:1239e9b70ca2 655 "movd %6,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 656 "pmuludq %%mm0,%%mm0\n\t" \
wolfSSL 0:1239e9b70ca2 657 "movd %%mm0,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 658 "psrlq $32,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 659 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 660 "movd %%mm0,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 661 "adcl %%eax,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 662 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 663 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
wolfSSL 0:1239e9b70ca2 664
wolfSSL 0:1239e9b70ca2 665 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 666 __asm__( \
wolfSSL 0:1239e9b70ca2 667 "movd %6,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 668 "movd %7,%%mm1 \n\t" \
wolfSSL 0:1239e9b70ca2 669 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 0:1239e9b70ca2 670 "movd %%mm0,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 671 "psrlq $32,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 672 "movd %%mm0,%%edx \n\t" \
wolfSSL 0:1239e9b70ca2 673 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 674 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 675 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 676 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 677 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 678 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 679 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 0:1239e9b70ca2 680
wolfSSL 0:1239e9b70ca2 681 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 682 __asm__( \
wolfSSL 0:1239e9b70ca2 683 "movd %3,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 684 "movd %4,%%mm1 \n\t" \
wolfSSL 0:1239e9b70ca2 685 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 0:1239e9b70ca2 686 "movd %%mm0,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 687 "psrlq $32,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 688 "movd %%mm0,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 689 "xorl %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 690 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j));
wolfSSL 0:1239e9b70ca2 691
wolfSSL 0:1239e9b70ca2 692 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 0:1239e9b70ca2 693
wolfSSL 0:1239e9b70ca2 694 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 695 __asm__( \
wolfSSL 0:1239e9b70ca2 696 "movd %6,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 697 "movd %7,%%mm1 \n\t" \
wolfSSL 0:1239e9b70ca2 698 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 0:1239e9b70ca2 699 "movd %%mm0,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 700 "psrlq $32,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 701 "movd %%mm0,%%edx \n\t" \
wolfSSL 0:1239e9b70ca2 702 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 703 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 704 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 705 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 0:1239e9b70ca2 706
wolfSSL 0:1239e9b70ca2 707 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 708 __asm__( \
wolfSSL 0:1239e9b70ca2 709 "addl %6,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 710 "adcl %7,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 711 "adcl %8,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 712 "addl %6,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 713 "adcl %7,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 714 "adcl %8,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 715 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
wolfSSL 0:1239e9b70ca2 716
wolfSSL 0:1239e9b70ca2 717 #elif defined(TFM_ARM)
wolfSSL 0:1239e9b70ca2 718
wolfSSL 0:1239e9b70ca2 719 /* ARM code */
wolfSSL 0:1239e9b70ca2 720
wolfSSL 0:1239e9b70ca2 721 #define COMBA_START
wolfSSL 0:1239e9b70ca2 722
wolfSSL 0:1239e9b70ca2 723 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 724 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 725
wolfSSL 0:1239e9b70ca2 726 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 727 x = c0;
wolfSSL 0:1239e9b70ca2 728
wolfSSL 0:1239e9b70ca2 729 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 730 x = c1;
wolfSSL 0:1239e9b70ca2 731
wolfSSL 0:1239e9b70ca2 732 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 733 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 734
wolfSSL 0:1239e9b70ca2 735 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 736
wolfSSL 0:1239e9b70ca2 737 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 0:1239e9b70ca2 738 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 739 __asm__( \
wolfSSL 0:1239e9b70ca2 740 " UMULL r0,r1,%6,%6 \n\t" \
wolfSSL 0:1239e9b70ca2 741 " ADDS %0,%0,r0 \n\t" \
wolfSSL 0:1239e9b70ca2 742 " ADCS %1,%1,r1 \n\t" \
wolfSSL 0:1239e9b70ca2 743 " ADC %2,%2,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 744 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
wolfSSL 0:1239e9b70ca2 745
wolfSSL 0:1239e9b70ca2 746 /* for squaring some of the terms are doubled... */
wolfSSL 0:1239e9b70ca2 747 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 748 __asm__( \
wolfSSL 0:1239e9b70ca2 749 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 750 " ADDS %0,%0,r0 \n\t" \
wolfSSL 0:1239e9b70ca2 751 " ADCS %1,%1,r1 \n\t" \
wolfSSL 0:1239e9b70ca2 752 " ADC %2,%2,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 753 " ADDS %0,%0,r0 \n\t" \
wolfSSL 0:1239e9b70ca2 754 " ADCS %1,%1,r1 \n\t" \
wolfSSL 0:1239e9b70ca2 755 " ADC %2,%2,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 756 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 0:1239e9b70ca2 757
wolfSSL 0:1239e9b70ca2 758 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 759 __asm__( \
wolfSSL 0:1239e9b70ca2 760 " UMULL %0,%1,%3,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 761 " SUB %2,%2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 762 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc");
wolfSSL 0:1239e9b70ca2 763
wolfSSL 0:1239e9b70ca2 764 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
wolfSSL 0:1239e9b70ca2 765
wolfSSL 0:1239e9b70ca2 766 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 767 __asm__( \
wolfSSL 0:1239e9b70ca2 768 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 769 " ADDS %0,%0,r0 \n\t" \
wolfSSL 0:1239e9b70ca2 770 " ADCS %1,%1,r1 \n\t" \
wolfSSL 0:1239e9b70ca2 771 " ADC %2,%2,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 772 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 0:1239e9b70ca2 773
wolfSSL 0:1239e9b70ca2 774 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 775 __asm__( \
wolfSSL 0:1239e9b70ca2 776 " ADDS %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 777 " ADCS %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 778 " ADC %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 779 " ADDS %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 780 " ADCS %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 781 " ADC %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 782 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 0:1239e9b70ca2 783
wolfSSL 0:1239e9b70ca2 784 #elif defined(TFM_PPC32)
wolfSSL 0:1239e9b70ca2 785
wolfSSL 0:1239e9b70ca2 786 /* PPC32 */
wolfSSL 0:1239e9b70ca2 787
wolfSSL 0:1239e9b70ca2 788 #define COMBA_START
wolfSSL 0:1239e9b70ca2 789
wolfSSL 0:1239e9b70ca2 790 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 791 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 792
wolfSSL 0:1239e9b70ca2 793 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 794 x = c0;
wolfSSL 0:1239e9b70ca2 795
wolfSSL 0:1239e9b70ca2 796 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 797 x = c1;
wolfSSL 0:1239e9b70ca2 798
wolfSSL 0:1239e9b70ca2 799 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 800 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 801
wolfSSL 0:1239e9b70ca2 802 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 803
wolfSSL 0:1239e9b70ca2 804 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 0:1239e9b70ca2 805 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 806 __asm__( \
wolfSSL 0:1239e9b70ca2 807 " mullw 16,%6,%6 \n\t" \
wolfSSL 0:1239e9b70ca2 808 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 809 " mulhwu 16,%6,%6 \n\t" \
wolfSSL 0:1239e9b70ca2 810 " adde %1,%1,16 \n\t" \
wolfSSL 0:1239e9b70ca2 811 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 812 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
wolfSSL 0:1239e9b70ca2 813
wolfSSL 0:1239e9b70ca2 814 /* for squaring some of the terms are doubled... */
wolfSSL 0:1239e9b70ca2 815 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 816 __asm__( \
wolfSSL 0:1239e9b70ca2 817 " mullw 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 818 " mulhwu 17,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 819 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 820 " adde %1,%1,17 \n\t" \
wolfSSL 0:1239e9b70ca2 821 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 822 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 823 " adde %1,%1,17 \n\t" \
wolfSSL 0:1239e9b70ca2 824 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 825 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
wolfSSL 0:1239e9b70ca2 826
wolfSSL 0:1239e9b70ca2 827 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 828 __asm__( \
wolfSSL 0:1239e9b70ca2 829 " mullw %0,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 830 " mulhwu %1,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 831 " xor %2,%2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 832 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
wolfSSL 0:1239e9b70ca2 833
wolfSSL 0:1239e9b70ca2 834 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 835 __asm__( \
wolfSSL 0:1239e9b70ca2 836 " mullw 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 837 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 838 " mulhwu 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 839 " adde %1,%1,16 \n\t" \
wolfSSL 0:1239e9b70ca2 840 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 841 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
wolfSSL 0:1239e9b70ca2 842
wolfSSL 0:1239e9b70ca2 843 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 844 __asm__( \
wolfSSL 0:1239e9b70ca2 845 " addc %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 846 " adde %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 847 " adde %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 848 " addc %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 849 " adde %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 850 " adde %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 851 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 0:1239e9b70ca2 852
wolfSSL 0:1239e9b70ca2 853 #elif defined(TFM_PPC64)
wolfSSL 0:1239e9b70ca2 854 /* PPC64 */
wolfSSL 0:1239e9b70ca2 855
wolfSSL 0:1239e9b70ca2 856 #define COMBA_START
wolfSSL 0:1239e9b70ca2 857
wolfSSL 0:1239e9b70ca2 858 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 859 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 860
wolfSSL 0:1239e9b70ca2 861 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 862 x = c0;
wolfSSL 0:1239e9b70ca2 863
wolfSSL 0:1239e9b70ca2 864 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 865 x = c1;
wolfSSL 0:1239e9b70ca2 866
wolfSSL 0:1239e9b70ca2 867 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 868 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 869
wolfSSL 0:1239e9b70ca2 870 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 871
wolfSSL 0:1239e9b70ca2 872 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 0:1239e9b70ca2 873 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 874 __asm__( \
wolfSSL 0:1239e9b70ca2 875 " mulld 16,%6,%6 \n\t" \
wolfSSL 0:1239e9b70ca2 876 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 877 " mulhdu 16,%6,%6 \n\t" \
wolfSSL 0:1239e9b70ca2 878 " adde %1,%1,16 \n\t" \
wolfSSL 0:1239e9b70ca2 879 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 880 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
wolfSSL 0:1239e9b70ca2 881
wolfSSL 0:1239e9b70ca2 882 /* for squaring some of the terms are doubled... */
wolfSSL 0:1239e9b70ca2 883 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 884 __asm__( \
wolfSSL 0:1239e9b70ca2 885 " mulld 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 886 " mulhdu 17,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 887 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 888 " adde %1,%1,17 \n\t" \
wolfSSL 0:1239e9b70ca2 889 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 890 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 891 " adde %1,%1,17 \n\t" \
wolfSSL 0:1239e9b70ca2 892 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 893 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
wolfSSL 0:1239e9b70ca2 894
wolfSSL 0:1239e9b70ca2 895 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 896 __asm__( \
wolfSSL 0:1239e9b70ca2 897 " mulld %0,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 898 " mulhdu %1,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 899 " xor %2,%2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 900 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
wolfSSL 0:1239e9b70ca2 901
wolfSSL 0:1239e9b70ca2 902 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 903 __asm__( \
wolfSSL 0:1239e9b70ca2 904 " mulld 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 905 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 906 " mulhdu 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 907 " adde %1,%1,16 \n\t" \
wolfSSL 0:1239e9b70ca2 908 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 909 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
wolfSSL 0:1239e9b70ca2 910
wolfSSL 0:1239e9b70ca2 911 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 912 __asm__( \
wolfSSL 0:1239e9b70ca2 913 " addc %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 914 " adde %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 915 " adde %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 916 " addc %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 917 " adde %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 918 " adde %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 919 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 0:1239e9b70ca2 920
wolfSSL 0:1239e9b70ca2 921
wolfSSL 0:1239e9b70ca2 922 #elif defined(TFM_AVR32)
wolfSSL 0:1239e9b70ca2 923
wolfSSL 0:1239e9b70ca2 924 /* AVR32 */
wolfSSL 0:1239e9b70ca2 925
wolfSSL 0:1239e9b70ca2 926 #define COMBA_START
wolfSSL 0:1239e9b70ca2 927
wolfSSL 0:1239e9b70ca2 928 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 929 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 930
wolfSSL 0:1239e9b70ca2 931 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 932 x = c0;
wolfSSL 0:1239e9b70ca2 933
wolfSSL 0:1239e9b70ca2 934 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 935 x = c1;
wolfSSL 0:1239e9b70ca2 936
wolfSSL 0:1239e9b70ca2 937 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 938 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 939
wolfSSL 0:1239e9b70ca2 940 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 941
wolfSSL 0:1239e9b70ca2 942 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 0:1239e9b70ca2 943 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 944 __asm__( \
wolfSSL 0:1239e9b70ca2 945 " mulu.d r2,%6,%6 \n\t" \
wolfSSL 0:1239e9b70ca2 946 " add %0,%0,r2 \n\t" \
wolfSSL 0:1239e9b70ca2 947 " adc %1,%1,r3 \n\t" \
wolfSSL 0:1239e9b70ca2 948 " acr %2 \n\t" \
wolfSSL 0:1239e9b70ca2 949 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
wolfSSL 0:1239e9b70ca2 950
wolfSSL 0:1239e9b70ca2 951 /* for squaring some of the terms are doubled... */
wolfSSL 0:1239e9b70ca2 952 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 953 __asm__( \
wolfSSL 0:1239e9b70ca2 954 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 955 " add %0,%0,r2 \n\t" \
wolfSSL 0:1239e9b70ca2 956 " adc %1,%1,r3 \n\t" \
wolfSSL 0:1239e9b70ca2 957 " acr %2, \n\t" \
wolfSSL 0:1239e9b70ca2 958 " add %0,%0,r2 \n\t" \
wolfSSL 0:1239e9b70ca2 959 " adc %1,%1,r3 \n\t" \
wolfSSL 0:1239e9b70ca2 960 " acr %2, \n\t" \
wolfSSL 0:1239e9b70ca2 961 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
wolfSSL 0:1239e9b70ca2 962
wolfSSL 0:1239e9b70ca2 963 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 964 __asm__( \
wolfSSL 0:1239e9b70ca2 965 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 966 " mov %0,r2 \n\t" \
wolfSSL 0:1239e9b70ca2 967 " mov %1,r3 \n\t" \
wolfSSL 0:1239e9b70ca2 968 " eor %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 969 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
wolfSSL 0:1239e9b70ca2 970
wolfSSL 0:1239e9b70ca2 971 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 972 __asm__( \
wolfSSL 0:1239e9b70ca2 973 " mulu.d r2,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 974 " add %0,%0,r2 \n\t" \
wolfSSL 0:1239e9b70ca2 975 " adc %1,%1,r3 \n\t" \
wolfSSL 0:1239e9b70ca2 976 " acr %2 \n\t" \
wolfSSL 0:1239e9b70ca2 977 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
wolfSSL 0:1239e9b70ca2 978
wolfSSL 0:1239e9b70ca2 979 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 980 __asm__( \
wolfSSL 0:1239e9b70ca2 981 " add %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 982 " adc %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 983 " adc %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 984 " add %0,%0,%3 \n\t" \
wolfSSL 0:1239e9b70ca2 985 " adc %1,%1,%4 \n\t" \
wolfSSL 0:1239e9b70ca2 986 " adc %2,%2,%5 \n\t" \
wolfSSL 0:1239e9b70ca2 987 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
wolfSSL 0:1239e9b70ca2 988
wolfSSL 0:1239e9b70ca2 989
wolfSSL 0:1239e9b70ca2 990 #else
wolfSSL 0:1239e9b70ca2 991
wolfSSL 0:1239e9b70ca2 992 #define TFM_ISO
wolfSSL 0:1239e9b70ca2 993
wolfSSL 0:1239e9b70ca2 994 /* ISO C portable code */
wolfSSL 0:1239e9b70ca2 995
wolfSSL 0:1239e9b70ca2 996 #define COMBA_START
wolfSSL 0:1239e9b70ca2 997
wolfSSL 0:1239e9b70ca2 998 #define CLEAR_CARRY \
wolfSSL 0:1239e9b70ca2 999 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1000
wolfSSL 0:1239e9b70ca2 1001 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1002 x = c0;
wolfSSL 0:1239e9b70ca2 1003
wolfSSL 0:1239e9b70ca2 1004 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1005 x = c1;
wolfSSL 0:1239e9b70ca2 1006
wolfSSL 0:1239e9b70ca2 1007 #define CARRY_FORWARD \
wolfSSL 0:1239e9b70ca2 1008 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1009
wolfSSL 0:1239e9b70ca2 1010 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1011
wolfSSL 0:1239e9b70ca2 1012 /* multiplies point i and j, updates carry "c1" and digit c2 */
wolfSSL 0:1239e9b70ca2 1013 #define SQRADD(i, j) \
wolfSSL 0:1239e9b70ca2 1014 do { fp_word t; \
wolfSSL 0:1239e9b70ca2 1015 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 1016 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 1017 c2 +=(fp_digit) (t >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 1018 } while (0);
wolfSSL 0:1239e9b70ca2 1019
wolfSSL 0:1239e9b70ca2 1020
wolfSSL 0:1239e9b70ca2 1021 /* for squaring some of the terms are doubled... */
wolfSSL 0:1239e9b70ca2 1022 #define SQRADD2(i, j) \
wolfSSL 0:1239e9b70ca2 1023 do { fp_word t; \
wolfSSL 0:1239e9b70ca2 1024 t = ((fp_word)i) * ((fp_word)j); \
wolfSSL 0:1239e9b70ca2 1025 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
wolfSSL 0:1239e9b70ca2 1026 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
wolfSSL 0:1239e9b70ca2 1027 c2 +=(fp_digit)( tt >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 1028 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
wolfSSL 0:1239e9b70ca2 1029 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
wolfSSL 0:1239e9b70ca2 1030 c2 +=(fp_digit) (tt >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 1031 } while (0);
wolfSSL 0:1239e9b70ca2 1032
wolfSSL 0:1239e9b70ca2 1033 #define SQRADDSC(i, j) \
wolfSSL 0:1239e9b70ca2 1034 do { fp_word t; \
wolfSSL 0:1239e9b70ca2 1035 t = ((fp_word)i) * ((fp_word)j); \
wolfSSL 0:1239e9b70ca2 1036 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
wolfSSL 0:1239e9b70ca2 1037 } while (0);
wolfSSL 0:1239e9b70ca2 1038
wolfSSL 0:1239e9b70ca2 1039 #define SQRADDAC(i, j) \
wolfSSL 0:1239e9b70ca2 1040 do { fp_word t; \
wolfSSL 0:1239e9b70ca2 1041 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 1042 t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 1043 sc2 += (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 1044 } while (0);
wolfSSL 0:1239e9b70ca2 1045
wolfSSL 0:1239e9b70ca2 1046 #define SQRADDDB \
wolfSSL 0:1239e9b70ca2 1047 do { fp_word t; \
wolfSSL 0:1239e9b70ca2 1048 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 1049 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 1050 c1 = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 1051 c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \
wolfSSL 0:1239e9b70ca2 1052 } while (0);
wolfSSL 0:1239e9b70ca2 1053
wolfSSL 0:1239e9b70ca2 1054 #endif
wolfSSL 0:1239e9b70ca2 1055
wolfSSL 0:1239e9b70ca2 1056 #ifdef TFM_SMALL_SET
wolfSSL 0:1239e9b70ca2 1057 #include "fp_sqr_comba_small_set.i"
wolfSSL 0:1239e9b70ca2 1058 #endif
wolfSSL 0:1239e9b70ca2 1059
wolfSSL 0:1239e9b70ca2 1060 #if defined(TFM_SQR3)
wolfSSL 0:1239e9b70ca2 1061 #include "fp_sqr_comba_3.i"
wolfSSL 0:1239e9b70ca2 1062 #endif
wolfSSL 0:1239e9b70ca2 1063 #if defined(TFM_SQR4)
wolfSSL 0:1239e9b70ca2 1064 #include "fp_sqr_comba_4.i"
wolfSSL 0:1239e9b70ca2 1065 #endif
wolfSSL 0:1239e9b70ca2 1066 #if defined(TFM_SQR6)
wolfSSL 0:1239e9b70ca2 1067 #include "fp_sqr_comba_6.i"
wolfSSL 0:1239e9b70ca2 1068 #endif
wolfSSL 0:1239e9b70ca2 1069 #if defined(TFM_SQR7)
wolfSSL 0:1239e9b70ca2 1070 #include "fp_sqr_comba_7.i"
wolfSSL 0:1239e9b70ca2 1071 #endif
wolfSSL 0:1239e9b70ca2 1072 #if defined(TFM_SQR8)
wolfSSL 0:1239e9b70ca2 1073 #include "fp_sqr_comba_8.i"
wolfSSL 0:1239e9b70ca2 1074 #endif
wolfSSL 0:1239e9b70ca2 1075 #if defined(TFM_SQR9)
wolfSSL 0:1239e9b70ca2 1076 #include "fp_sqr_comba_9.i"
wolfSSL 0:1239e9b70ca2 1077 #endif
wolfSSL 0:1239e9b70ca2 1078 #if defined(TFM_SQR12)
wolfSSL 0:1239e9b70ca2 1079 #include "fp_sqr_comba_12.i"
wolfSSL 0:1239e9b70ca2 1080 #endif
wolfSSL 0:1239e9b70ca2 1081 #if defined(TFM_SQR17)
wolfSSL 0:1239e9b70ca2 1082 #include "fp_sqr_comba_17.i"
wolfSSL 0:1239e9b70ca2 1083 #endif
wolfSSL 0:1239e9b70ca2 1084 #if defined(TFM_SQR20)
wolfSSL 0:1239e9b70ca2 1085 #include "fp_sqr_comba_20.i"
wolfSSL 0:1239e9b70ca2 1086 #endif
wolfSSL 0:1239e9b70ca2 1087 #if defined(TFM_SQR24)
wolfSSL 0:1239e9b70ca2 1088 #include "fp_sqr_comba_24.i"
wolfSSL 0:1239e9b70ca2 1089 #endif
wolfSSL 0:1239e9b70ca2 1090 #if defined(TFM_SQR28)
wolfSSL 0:1239e9b70ca2 1091 #include "fp_sqr_comba_28.i"
wolfSSL 0:1239e9b70ca2 1092 #endif
wolfSSL 0:1239e9b70ca2 1093 #if defined(TFM_SQR32)
wolfSSL 0:1239e9b70ca2 1094 #include "fp_sqr_comba_32.i"
wolfSSL 0:1239e9b70ca2 1095 #endif
wolfSSL 0:1239e9b70ca2 1096 #if defined(TFM_SQR48)
wolfSSL 0:1239e9b70ca2 1097 #include "fp_sqr_comba_48.i"
wolfSSL 0:1239e9b70ca2 1098 #endif
wolfSSL 0:1239e9b70ca2 1099 #if defined(TFM_SQR64)
wolfSSL 0:1239e9b70ca2 1100 #include "fp_sqr_comba_64.i"
wolfSSL 0:1239e9b70ca2 1101 #endif
wolfSSL 0:1239e9b70ca2 1102 /* end fp_sqr_comba.c asm */
wolfSSL 0:1239e9b70ca2 1103
wolfSSL 0:1239e9b70ca2 1104 /* start fp_mul_comba.c asm */
wolfSSL 0:1239e9b70ca2 1105 /* these are the combas. Worship them. */
wolfSSL 0:1239e9b70ca2 1106 #if defined(TFM_X86)
wolfSSL 0:1239e9b70ca2 1107 /* Generic x86 optimized code */
wolfSSL 0:1239e9b70ca2 1108
wolfSSL 0:1239e9b70ca2 1109 /* anything you need at the start */
wolfSSL 0:1239e9b70ca2 1110 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1111
wolfSSL 0:1239e9b70ca2 1112 /* clear the chaining variables */
wolfSSL 0:1239e9b70ca2 1113 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1114 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1115
wolfSSL 0:1239e9b70ca2 1116 /* forward the carry to the next digit */
wolfSSL 0:1239e9b70ca2 1117 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1118 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1119
wolfSSL 0:1239e9b70ca2 1120 /* store the first sum */
wolfSSL 0:1239e9b70ca2 1121 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1122 x = c0;
wolfSSL 0:1239e9b70ca2 1123
wolfSSL 0:1239e9b70ca2 1124 /* store the second sum [carry] */
wolfSSL 0:1239e9b70ca2 1125 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1126 x = c1;
wolfSSL 0:1239e9b70ca2 1127
wolfSSL 0:1239e9b70ca2 1128 /* anything you need at the end */
wolfSSL 0:1239e9b70ca2 1129 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1130
wolfSSL 0:1239e9b70ca2 1131 /* this should multiply i and j */
wolfSSL 0:1239e9b70ca2 1132 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1133 __asm__( \
wolfSSL 0:1239e9b70ca2 1134 "movl %6,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 1135 "mull %7 \n\t" \
wolfSSL 0:1239e9b70ca2 1136 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 1137 "adcl %%edx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 1138 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 1139 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
wolfSSL 0:1239e9b70ca2 1140
wolfSSL 0:1239e9b70ca2 1141 #elif defined(TFM_X86_64)
wolfSSL 0:1239e9b70ca2 1142 /* x86-64 optimized */
wolfSSL 0:1239e9b70ca2 1143
wolfSSL 0:1239e9b70ca2 1144 /* anything you need at the start */
wolfSSL 0:1239e9b70ca2 1145 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1146
wolfSSL 0:1239e9b70ca2 1147 /* clear the chaining variables */
wolfSSL 0:1239e9b70ca2 1148 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1149 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1150
wolfSSL 0:1239e9b70ca2 1151 /* forward the carry to the next digit */
wolfSSL 0:1239e9b70ca2 1152 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1153 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1154
wolfSSL 0:1239e9b70ca2 1155 /* store the first sum */
wolfSSL 0:1239e9b70ca2 1156 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1157 x = c0;
wolfSSL 0:1239e9b70ca2 1158
wolfSSL 0:1239e9b70ca2 1159 /* store the second sum [carry] */
wolfSSL 0:1239e9b70ca2 1160 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1161 x = c1;
wolfSSL 0:1239e9b70ca2 1162
wolfSSL 0:1239e9b70ca2 1163 /* anything you need at the end */
wolfSSL 0:1239e9b70ca2 1164 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1165
wolfSSL 0:1239e9b70ca2 1166 /* this should multiply i and j */
wolfSSL 0:1239e9b70ca2 1167 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1168 __asm__ ( \
wolfSSL 0:1239e9b70ca2 1169 "movq %6,%%rax \n\t" \
wolfSSL 0:1239e9b70ca2 1170 "mulq %7 \n\t" \
wolfSSL 0:1239e9b70ca2 1171 "addq %%rax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 1172 "adcq %%rdx,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 1173 "adcq $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 1174 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
wolfSSL 0:1239e9b70ca2 1175
wolfSSL 0:1239e9b70ca2 1176 #elif defined(TFM_SSE2)
wolfSSL 0:1239e9b70ca2 1177 /* use SSE2 optimizations */
wolfSSL 0:1239e9b70ca2 1178
wolfSSL 0:1239e9b70ca2 1179 /* anything you need at the start */
wolfSSL 0:1239e9b70ca2 1180 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1181
wolfSSL 0:1239e9b70ca2 1182 /* clear the chaining variables */
wolfSSL 0:1239e9b70ca2 1183 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1184 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1185
wolfSSL 0:1239e9b70ca2 1186 /* forward the carry to the next digit */
wolfSSL 0:1239e9b70ca2 1187 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1188 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1189
wolfSSL 0:1239e9b70ca2 1190 /* store the first sum */
wolfSSL 0:1239e9b70ca2 1191 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1192 x = c0;
wolfSSL 0:1239e9b70ca2 1193
wolfSSL 0:1239e9b70ca2 1194 /* store the second sum [carry] */
wolfSSL 0:1239e9b70ca2 1195 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1196 x = c1;
wolfSSL 0:1239e9b70ca2 1197
wolfSSL 0:1239e9b70ca2 1198 /* anything you need at the end */
wolfSSL 0:1239e9b70ca2 1199 #define COMBA_FINI \
wolfSSL 0:1239e9b70ca2 1200 __asm__("emms");
wolfSSL 0:1239e9b70ca2 1201
wolfSSL 0:1239e9b70ca2 1202 /* this should multiply i and j */
wolfSSL 0:1239e9b70ca2 1203 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1204 __asm__( \
wolfSSL 0:1239e9b70ca2 1205 "movd %6,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 1206 "movd %7,%%mm1 \n\t" \
wolfSSL 0:1239e9b70ca2 1207 "pmuludq %%mm1,%%mm0\n\t" \
wolfSSL 0:1239e9b70ca2 1208 "movd %%mm0,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 1209 "psrlq $32,%%mm0 \n\t" \
wolfSSL 0:1239e9b70ca2 1210 "addl %%eax,%0 \n\t" \
wolfSSL 0:1239e9b70ca2 1211 "movd %%mm0,%%eax \n\t" \
wolfSSL 0:1239e9b70ca2 1212 "adcl %%eax,%1 \n\t" \
wolfSSL 0:1239e9b70ca2 1213 "adcl $0,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 1214 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
wolfSSL 0:1239e9b70ca2 1215
wolfSSL 0:1239e9b70ca2 1216 #elif defined(TFM_ARM)
wolfSSL 0:1239e9b70ca2 1217 /* ARM code */
wolfSSL 0:1239e9b70ca2 1218
wolfSSL 0:1239e9b70ca2 1219 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1220
wolfSSL 0:1239e9b70ca2 1221 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1222 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1223
wolfSSL 0:1239e9b70ca2 1224 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1225 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1226
wolfSSL 0:1239e9b70ca2 1227 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1228 x = c0;
wolfSSL 0:1239e9b70ca2 1229
wolfSSL 0:1239e9b70ca2 1230 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1231 x = c1;
wolfSSL 0:1239e9b70ca2 1232
wolfSSL 0:1239e9b70ca2 1233 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1234
wolfSSL 0:1239e9b70ca2 1235 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1236 __asm__( \
wolfSSL 0:1239e9b70ca2 1237 " UMULL r0,r1,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 1238 " ADDS %0,%0,r0 \n\t" \
wolfSSL 0:1239e9b70ca2 1239 " ADCS %1,%1,r1 \n\t" \
wolfSSL 0:1239e9b70ca2 1240 " ADC %2,%2,#0 \n\t" \
wolfSSL 0:1239e9b70ca2 1241 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
wolfSSL 0:1239e9b70ca2 1242
wolfSSL 0:1239e9b70ca2 1243 #elif defined(TFM_PPC32)
wolfSSL 0:1239e9b70ca2 1244 /* For 32-bit PPC */
wolfSSL 0:1239e9b70ca2 1245
wolfSSL 0:1239e9b70ca2 1246 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1247
wolfSSL 0:1239e9b70ca2 1248 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1249 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1250
wolfSSL 0:1239e9b70ca2 1251 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1252 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1253
wolfSSL 0:1239e9b70ca2 1254 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1255 x = c0;
wolfSSL 0:1239e9b70ca2 1256
wolfSSL 0:1239e9b70ca2 1257 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1258 x = c1;
wolfSSL 0:1239e9b70ca2 1259
wolfSSL 0:1239e9b70ca2 1260 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1261
wolfSSL 0:1239e9b70ca2 1262 /* untested: will mulhwu change the flags? Docs say no */
wolfSSL 0:1239e9b70ca2 1263 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1264 __asm__( \
wolfSSL 0:1239e9b70ca2 1265 " mullw 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 1266 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 1267 " mulhwu 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 1268 " adde %1,%1,16 \n\t" \
wolfSSL 0:1239e9b70ca2 1269 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 1270 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
wolfSSL 0:1239e9b70ca2 1271
wolfSSL 0:1239e9b70ca2 1272 #elif defined(TFM_PPC64)
wolfSSL 0:1239e9b70ca2 1273 /* For 64-bit PPC */
wolfSSL 0:1239e9b70ca2 1274
wolfSSL 0:1239e9b70ca2 1275 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1276
wolfSSL 0:1239e9b70ca2 1277 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1278 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1279
wolfSSL 0:1239e9b70ca2 1280 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1281 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1282
wolfSSL 0:1239e9b70ca2 1283 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1284 x = c0;
wolfSSL 0:1239e9b70ca2 1285
wolfSSL 0:1239e9b70ca2 1286 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1287 x = c1;
wolfSSL 0:1239e9b70ca2 1288
wolfSSL 0:1239e9b70ca2 1289 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1290
wolfSSL 0:1239e9b70ca2 1291 /* untested: will mulhwu change the flags? Docs say no */
wolfSSL 0:1239e9b70ca2 1292 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1293 ____asm__( \
wolfSSL 0:1239e9b70ca2 1294 " mulld 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 1295 " addc %0,%0,16 \n\t" \
wolfSSL 0:1239e9b70ca2 1296 " mulhdu 16,%6,%7 \n\t" \
wolfSSL 0:1239e9b70ca2 1297 " adde %1,%1,16 \n\t" \
wolfSSL 0:1239e9b70ca2 1298 " addze %2,%2 \n\t" \
wolfSSL 0:1239e9b70ca2 1299 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
wolfSSL 0:1239e9b70ca2 1300
wolfSSL 0:1239e9b70ca2 1301 #elif defined(TFM_AVR32)
wolfSSL 0:1239e9b70ca2 1302
wolfSSL 0:1239e9b70ca2 1303 /* ISO C code */
wolfSSL 0:1239e9b70ca2 1304
wolfSSL 0:1239e9b70ca2 1305 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1306
wolfSSL 0:1239e9b70ca2 1307 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1308 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1309
wolfSSL 0:1239e9b70ca2 1310 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1311 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1312
wolfSSL 0:1239e9b70ca2 1313 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1314 x = c0;
wolfSSL 0:1239e9b70ca2 1315
wolfSSL 0:1239e9b70ca2 1316 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1317 x = c1;
wolfSSL 0:1239e9b70ca2 1318
wolfSSL 0:1239e9b70ca2 1319 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1320
wolfSSL 0:1239e9b70ca2 1321 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1322 ____asm__( \
wolfSSL 0:1239e9b70ca2 1323 " mulu.d r2,%6,%7 \n\t"\
wolfSSL 0:1239e9b70ca2 1324 " add %0,r2 \n\t"\
wolfSSL 0:1239e9b70ca2 1325 " adc %1,%1,r3 \n\t"\
wolfSSL 0:1239e9b70ca2 1326 " acr %2 \n\t"\
wolfSSL 0:1239e9b70ca2 1327 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
wolfSSL 0:1239e9b70ca2 1328
wolfSSL 0:1239e9b70ca2 1329 #else
wolfSSL 0:1239e9b70ca2 1330 /* ISO C code */
wolfSSL 0:1239e9b70ca2 1331
wolfSSL 0:1239e9b70ca2 1332 #define COMBA_START
wolfSSL 0:1239e9b70ca2 1333
wolfSSL 0:1239e9b70ca2 1334 #define COMBA_CLEAR \
wolfSSL 0:1239e9b70ca2 1335 c0 = c1 = c2 = 0;
wolfSSL 0:1239e9b70ca2 1336
wolfSSL 0:1239e9b70ca2 1337 #define COMBA_FORWARD \
wolfSSL 0:1239e9b70ca2 1338 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
wolfSSL 0:1239e9b70ca2 1339
wolfSSL 0:1239e9b70ca2 1340 #define COMBA_STORE(x) \
wolfSSL 0:1239e9b70ca2 1341 x = c0;
wolfSSL 0:1239e9b70ca2 1342
wolfSSL 0:1239e9b70ca2 1343 #define COMBA_STORE2(x) \
wolfSSL 0:1239e9b70ca2 1344 x = c1;
wolfSSL 0:1239e9b70ca2 1345
wolfSSL 0:1239e9b70ca2 1346 #define COMBA_FINI
wolfSSL 0:1239e9b70ca2 1347
wolfSSL 0:1239e9b70ca2 1348 #define MULADD(i, j) \
wolfSSL 0:1239e9b70ca2 1349 do { fp_word t; \
wolfSSL 0:1239e9b70ca2 1350 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
wolfSSL 0:1239e9b70ca2 1351 t = (fp_word)c1 + (t >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 1352 c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \
wolfSSL 0:1239e9b70ca2 1353 } while (0);
wolfSSL 0:1239e9b70ca2 1354
wolfSSL 0:1239e9b70ca2 1355 #endif
wolfSSL 0:1239e9b70ca2 1356
wolfSSL 0:1239e9b70ca2 1357
wolfSSL 0:1239e9b70ca2 1358 #ifdef TFM_SMALL_SET
wolfSSL 0:1239e9b70ca2 1359 #include "fp_mul_comba_small_set.i"
wolfSSL 0:1239e9b70ca2 1360 #endif
wolfSSL 0:1239e9b70ca2 1361
wolfSSL 0:1239e9b70ca2 1362 #if defined(TFM_MUL3)
wolfSSL 0:1239e9b70ca2 1363 #include "fp_mul_comba_3.i"
wolfSSL 0:1239e9b70ca2 1364 #endif
wolfSSL 0:1239e9b70ca2 1365 #if defined(TFM_MUL4)
wolfSSL 0:1239e9b70ca2 1366 #include "fp_mul_comba_4.i"
wolfSSL 0:1239e9b70ca2 1367 #endif
wolfSSL 0:1239e9b70ca2 1368 #if defined(TFM_MUL6)
wolfSSL 0:1239e9b70ca2 1369 #include "fp_mul_comba_6.i"
wolfSSL 0:1239e9b70ca2 1370 #endif
wolfSSL 0:1239e9b70ca2 1371 #if defined(TFM_MUL7)
wolfSSL 0:1239e9b70ca2 1372 #include "fp_mul_comba_7.i"
wolfSSL 0:1239e9b70ca2 1373 #endif
wolfSSL 0:1239e9b70ca2 1374 #if defined(TFM_MUL8)
wolfSSL 0:1239e9b70ca2 1375 #include "fp_mul_comba_8.i"
wolfSSL 0:1239e9b70ca2 1376 #endif
wolfSSL 0:1239e9b70ca2 1377 #if defined(TFM_MUL9)
wolfSSL 0:1239e9b70ca2 1378 #include "fp_mul_comba_9.i"
wolfSSL 0:1239e9b70ca2 1379 #endif
wolfSSL 0:1239e9b70ca2 1380 #if defined(TFM_MUL12)
wolfSSL 0:1239e9b70ca2 1381 #include "fp_mul_comba_12.i"
wolfSSL 0:1239e9b70ca2 1382 #endif
wolfSSL 0:1239e9b70ca2 1383 #if defined(TFM_MUL17)
wolfSSL 0:1239e9b70ca2 1384 #include "fp_mul_comba_17.i"
wolfSSL 0:1239e9b70ca2 1385 #endif
wolfSSL 0:1239e9b70ca2 1386 #if defined(TFM_MUL20)
wolfSSL 0:1239e9b70ca2 1387 #include "fp_mul_comba_20.i"
wolfSSL 0:1239e9b70ca2 1388 #endif
wolfSSL 0:1239e9b70ca2 1389 #if defined(TFM_MUL24)
wolfSSL 0:1239e9b70ca2 1390 #include "fp_mul_comba_24.i"
wolfSSL 0:1239e9b70ca2 1391 #endif
wolfSSL 0:1239e9b70ca2 1392 #if defined(TFM_MUL28)
wolfSSL 0:1239e9b70ca2 1393 #include "fp_mul_comba_28.i"
wolfSSL 0:1239e9b70ca2 1394 #endif
wolfSSL 0:1239e9b70ca2 1395 #if defined(TFM_MUL32)
wolfSSL 0:1239e9b70ca2 1396 #include "fp_mul_comba_32.i"
wolfSSL 0:1239e9b70ca2 1397 #endif
wolfSSL 0:1239e9b70ca2 1398 #if defined(TFM_MUL48)
wolfSSL 0:1239e9b70ca2 1399 #include "fp_mul_comba_48.i"
wolfSSL 0:1239e9b70ca2 1400 #endif
wolfSSL 0:1239e9b70ca2 1401 #if defined(TFM_MUL64)
wolfSSL 0:1239e9b70ca2 1402 #include "fp_mul_comba_64.i"
wolfSSL 0:1239e9b70ca2 1403 #endif
wolfSSL 0:1239e9b70ca2 1404
wolfSSL 0:1239e9b70ca2 1405 /* end fp_mul_comba.c asm */
wolfSSL 0:1239e9b70ca2 1406
wolfSSL 0:1239e9b70ca2 1407