A library for setting up Secure Socket Layer (SSL) connections and verifying remote hosts using certificates. Contains only the source files for mbed platform implementation of the library.

Dependents:   HTTPClient-SSL HTTPClient-SSL HTTPClient-SSL HTTPClient-SSL

Committer:
Mike Fiore
Date:
Mon Mar 23 16:51:07 2015 -0500
Revision:
6:cf58d49e1a86
Parent:
0:b86d15c6ba29
fix whitespace in sha512.c

Who changed what in which revision?

UserRevisionLine numberNew contents of line
Vanger 0:b86d15c6ba29 1 /* asm.c
Vanger 0:b86d15c6ba29 2 *
Vanger 0:b86d15c6ba29 3 * Copyright (C) 2006-2014 wolfSSL Inc.
Vanger 0:b86d15c6ba29 4 *
Vanger 0:b86d15c6ba29 5 * This file is part of CyaSSL.
Vanger 0:b86d15c6ba29 6 *
Vanger 0:b86d15c6ba29 7 * CyaSSL is free software; you can redistribute it and/or modify
Vanger 0:b86d15c6ba29 8 * it under the terms of the GNU General Public License as published by
Vanger 0:b86d15c6ba29 9 * the Free Software Foundation; either version 2 of the License, or
Vanger 0:b86d15c6ba29 10 * (at your option) any later version.
Vanger 0:b86d15c6ba29 11 *
Vanger 0:b86d15c6ba29 12 * CyaSSL is distributed in the hope that it will be useful,
Vanger 0:b86d15c6ba29 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Vanger 0:b86d15c6ba29 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Vanger 0:b86d15c6ba29 15 * GNU General Public License for more details.
Vanger 0:b86d15c6ba29 16 *
Vanger 0:b86d15c6ba29 17 * You should have received a copy of the GNU General Public License
Vanger 0:b86d15c6ba29 18 * along with this program; if not, write to the Free Software
Vanger 0:b86d15c6ba29 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
Vanger 0:b86d15c6ba29 20 */
Vanger 0:b86d15c6ba29 21
Vanger 0:b86d15c6ba29 22 #ifdef HAVE_CONFIG_H
Vanger 0:b86d15c6ba29 23 #include <config.h>
Vanger 0:b86d15c6ba29 24 #endif
Vanger 0:b86d15c6ba29 25
Vanger 0:b86d15c6ba29 26 #include <cyassl/ctaocrypt/settings.h>
Vanger 0:b86d15c6ba29 27
Vanger 0:b86d15c6ba29 28 /*
Vanger 0:b86d15c6ba29 29 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
Vanger 0:b86d15c6ba29 30 * http://math.libtomcrypt.com
Vanger 0:b86d15c6ba29 31 */
Vanger 0:b86d15c6ba29 32
Vanger 0:b86d15c6ba29 33
Vanger 0:b86d15c6ba29 34 /******************************************************************/
Vanger 0:b86d15c6ba29 35 /* fp_montgomery_reduce.c asm or generic */
Vanger 0:b86d15c6ba29 36 #if defined(TFM_X86) && !defined(TFM_SSE2)
Vanger 0:b86d15c6ba29 37 /* x86-32 code */
Vanger 0:b86d15c6ba29 38
Vanger 0:b86d15c6ba29 39 #define MONT_START
Vanger 0:b86d15c6ba29 40 #define MONT_FINI
Vanger 0:b86d15c6ba29 41 #define LOOP_END
Vanger 0:b86d15c6ba29 42 #define LOOP_START \
Vanger 0:b86d15c6ba29 43 mu = c[x] * mp
Vanger 0:b86d15c6ba29 44
Vanger 0:b86d15c6ba29 45 #define INNERMUL \
Vanger 0:b86d15c6ba29 46 __asm__( \
Vanger 0:b86d15c6ba29 47 "movl %5,%%eax \n\t" \
Vanger 0:b86d15c6ba29 48 "mull %4 \n\t" \
Vanger 0:b86d15c6ba29 49 "addl %1,%%eax \n\t" \
Vanger 0:b86d15c6ba29 50 "adcl $0,%%edx \n\t" \
Vanger 0:b86d15c6ba29 51 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 52 "adcl $0,%%edx \n\t" \
Vanger 0:b86d15c6ba29 53 "movl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 54 :"=g"(_c[LO]), "=r"(cy) \
Vanger 0:b86d15c6ba29 55 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
Vanger 0:b86d15c6ba29 56 : "%eax", "%edx", "cc")
Vanger 0:b86d15c6ba29 57
Vanger 0:b86d15c6ba29 58 #define PROPCARRY \
Vanger 0:b86d15c6ba29 59 __asm__( \
Vanger 0:b86d15c6ba29 60 "addl %1,%0 \n\t" \
Vanger 0:b86d15c6ba29 61 "setb %%al \n\t" \
Vanger 0:b86d15c6ba29 62 "movzbl %%al,%1 \n\t" \
Vanger 0:b86d15c6ba29 63 :"=g"(_c[LO]), "=r"(cy) \
Vanger 0:b86d15c6ba29 64 :"0"(_c[LO]), "1"(cy) \
Vanger 0:b86d15c6ba29 65 : "%eax", "cc")
Vanger 0:b86d15c6ba29 66
Vanger 0:b86d15c6ba29 67 /******************************************************************/
Vanger 0:b86d15c6ba29 68 #elif defined(TFM_X86_64)
Vanger 0:b86d15c6ba29 69 /* x86-64 code */
Vanger 0:b86d15c6ba29 70
Vanger 0:b86d15c6ba29 71 #define MONT_START
Vanger 0:b86d15c6ba29 72 #define MONT_FINI
Vanger 0:b86d15c6ba29 73 #define LOOP_END
Vanger 0:b86d15c6ba29 74 #define LOOP_START \
Vanger 0:b86d15c6ba29 75 mu = c[x] * mp
Vanger 0:b86d15c6ba29 76
Vanger 0:b86d15c6ba29 77 #define INNERMUL \
Vanger 0:b86d15c6ba29 78 __asm__( \
Vanger 0:b86d15c6ba29 79 "movq %5,%%rax \n\t" \
Vanger 0:b86d15c6ba29 80 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 81 "addq %1,%%rax \n\t" \
Vanger 0:b86d15c6ba29 82 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 83 "addq %%rax,%0 \n\t" \
Vanger 0:b86d15c6ba29 84 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 85 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 86 :"=g"(_c[LO]), "=r"(cy) \
Vanger 0:b86d15c6ba29 87 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
Vanger 0:b86d15c6ba29 88 : "%rax", "%rdx", "cc")
Vanger 0:b86d15c6ba29 89
Vanger 0:b86d15c6ba29 90 #define INNERMUL8 \
Vanger 0:b86d15c6ba29 91 __asm__( \
Vanger 0:b86d15c6ba29 92 "movq 0(%5),%%rax \n\t" \
Vanger 0:b86d15c6ba29 93 "movq 0(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 94 "movq 0x8(%5),%%r11 \n\t" \
Vanger 0:b86d15c6ba29 95 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 96 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 97 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 98 "movq 0x8(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 99 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 100 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 101 "movq %%rax,0(%0) \n\t" \
Vanger 0:b86d15c6ba29 102 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 103 \
Vanger 0:b86d15c6ba29 104 "movq %%r11,%%rax \n\t" \
Vanger 0:b86d15c6ba29 105 "movq 0x10(%5),%%r11 \n\t" \
Vanger 0:b86d15c6ba29 106 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 107 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 108 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 109 "movq 0x10(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 110 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 111 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 112 "movq %%rax,0x8(%0) \n\t" \
Vanger 0:b86d15c6ba29 113 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 114 \
Vanger 0:b86d15c6ba29 115 "movq %%r11,%%rax \n\t" \
Vanger 0:b86d15c6ba29 116 "movq 0x18(%5),%%r11 \n\t" \
Vanger 0:b86d15c6ba29 117 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 118 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 119 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 120 "movq 0x18(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 121 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 122 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 123 "movq %%rax,0x10(%0) \n\t" \
Vanger 0:b86d15c6ba29 124 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 125 \
Vanger 0:b86d15c6ba29 126 "movq %%r11,%%rax \n\t" \
Vanger 0:b86d15c6ba29 127 "movq 0x20(%5),%%r11 \n\t" \
Vanger 0:b86d15c6ba29 128 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 129 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 130 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 131 "movq 0x20(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 132 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 133 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 134 "movq %%rax,0x18(%0) \n\t" \
Vanger 0:b86d15c6ba29 135 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 136 \
Vanger 0:b86d15c6ba29 137 "movq %%r11,%%rax \n\t" \
Vanger 0:b86d15c6ba29 138 "movq 0x28(%5),%%r11 \n\t" \
Vanger 0:b86d15c6ba29 139 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 140 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 141 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 142 "movq 0x28(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 143 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 144 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 145 "movq %%rax,0x20(%0) \n\t" \
Vanger 0:b86d15c6ba29 146 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 147 \
Vanger 0:b86d15c6ba29 148 "movq %%r11,%%rax \n\t" \
Vanger 0:b86d15c6ba29 149 "movq 0x30(%5),%%r11 \n\t" \
Vanger 0:b86d15c6ba29 150 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 151 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 152 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 153 "movq 0x30(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 154 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 155 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 156 "movq %%rax,0x28(%0) \n\t" \
Vanger 0:b86d15c6ba29 157 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 158 \
Vanger 0:b86d15c6ba29 159 "movq %%r11,%%rax \n\t" \
Vanger 0:b86d15c6ba29 160 "movq 0x38(%5),%%r11 \n\t" \
Vanger 0:b86d15c6ba29 161 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 162 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 163 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 164 "movq 0x38(%2),%%r10 \n\t" \
Vanger 0:b86d15c6ba29 165 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 166 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 167 "movq %%rax,0x30(%0) \n\t" \
Vanger 0:b86d15c6ba29 168 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 169 \
Vanger 0:b86d15c6ba29 170 "movq %%r11,%%rax \n\t" \
Vanger 0:b86d15c6ba29 171 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 172 "addq %%r10,%%rax \n\t" \
Vanger 0:b86d15c6ba29 173 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 174 "addq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 175 "adcq $0,%%rdx \n\t" \
Vanger 0:b86d15c6ba29 176 "movq %%rax,0x38(%0) \n\t" \
Vanger 0:b86d15c6ba29 177 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 178 \
Vanger 0:b86d15c6ba29 179 :"=r"(_c), "=r"(cy) \
Vanger 0:b86d15c6ba29 180 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
Vanger 0:b86d15c6ba29 181 : "%rax", "%rdx", "%r10", "%r11", "cc")
Vanger 0:b86d15c6ba29 182
Vanger 0:b86d15c6ba29 183
Vanger 0:b86d15c6ba29 184 #define PROPCARRY \
Vanger 0:b86d15c6ba29 185 __asm__( \
Vanger 0:b86d15c6ba29 186 "addq %1,%0 \n\t" \
Vanger 0:b86d15c6ba29 187 "setb %%al \n\t" \
Vanger 0:b86d15c6ba29 188 "movzbq %%al,%1 \n\t" \
Vanger 0:b86d15c6ba29 189 :"=g"(_c[LO]), "=r"(cy) \
Vanger 0:b86d15c6ba29 190 :"0"(_c[LO]), "1"(cy) \
Vanger 0:b86d15c6ba29 191 : "%rax", "cc")
Vanger 0:b86d15c6ba29 192
Vanger 0:b86d15c6ba29 193 /******************************************************************/
Vanger 0:b86d15c6ba29 194 #elif defined(TFM_SSE2)
Vanger 0:b86d15c6ba29 195 /* SSE2 code (assumes 32-bit fp_digits) */
Vanger 0:b86d15c6ba29 196 /* XMM register assignments:
Vanger 0:b86d15c6ba29 197 * xmm0 *tmpm++, then Mu * (*tmpm++)
Vanger 0:b86d15c6ba29 198 * xmm1 c[x], then Mu
Vanger 0:b86d15c6ba29 199 * xmm2 mp
Vanger 0:b86d15c6ba29 200 * xmm3 cy
Vanger 0:b86d15c6ba29 201 * xmm4 _c[LO]
Vanger 0:b86d15c6ba29 202 */
Vanger 0:b86d15c6ba29 203
Vanger 0:b86d15c6ba29 204 #define MONT_START \
Vanger 0:b86d15c6ba29 205 __asm__("movd %0,%%mm2"::"g"(mp))
Vanger 0:b86d15c6ba29 206
Vanger 0:b86d15c6ba29 207 #define MONT_FINI \
Vanger 0:b86d15c6ba29 208 __asm__("emms")
Vanger 0:b86d15c6ba29 209
Vanger 0:b86d15c6ba29 210 #define LOOP_START \
Vanger 0:b86d15c6ba29 211 __asm__( \
Vanger 0:b86d15c6ba29 212 "movd %0,%%mm1 \n\t" \
Vanger 0:b86d15c6ba29 213 "pxor %%mm3,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 214 "pmuludq %%mm2,%%mm1 \n\t" \
Vanger 0:b86d15c6ba29 215 :: "g"(c[x]))
Vanger 0:b86d15c6ba29 216
Vanger 0:b86d15c6ba29 217 /* pmuludq on mmx registers does a 32x32->64 multiply. */
Vanger 0:b86d15c6ba29 218 #define INNERMUL \
Vanger 0:b86d15c6ba29 219 __asm__( \
Vanger 0:b86d15c6ba29 220 "movd %1,%%mm4 \n\t" \
Vanger 0:b86d15c6ba29 221 "movd %2,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 222 "paddq %%mm4,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 223 "pmuludq %%mm1,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 224 "paddq %%mm0,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 225 "movd %%mm3,%0 \n\t" \
Vanger 0:b86d15c6ba29 226 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 227 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
Vanger 0:b86d15c6ba29 228
Vanger 0:b86d15c6ba29 229 #define INNERMUL8 \
Vanger 0:b86d15c6ba29 230 __asm__( \
Vanger 0:b86d15c6ba29 231 "movd 0(%1),%%mm4 \n\t" \
Vanger 0:b86d15c6ba29 232 "movd 0(%2),%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 233 "paddq %%mm4,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 234 "pmuludq %%mm1,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 235 "movd 4(%2),%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 236 "paddq %%mm0,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 237 "movd 4(%1),%%mm6 \n\t" \
Vanger 0:b86d15c6ba29 238 "movd %%mm3,0(%0) \n\t" \
Vanger 0:b86d15c6ba29 239 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 240 \
Vanger 0:b86d15c6ba29 241 "paddq %%mm6,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 242 "pmuludq %%mm1,%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 243 "movd 8(%2),%%mm6 \n\t" \
Vanger 0:b86d15c6ba29 244 "paddq %%mm5,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 245 "movd 8(%1),%%mm7 \n\t" \
Vanger 0:b86d15c6ba29 246 "movd %%mm3,4(%0) \n\t" \
Vanger 0:b86d15c6ba29 247 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 248 \
Vanger 0:b86d15c6ba29 249 "paddq %%mm7,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 250 "pmuludq %%mm1,%%mm6 \n\t" \
Vanger 0:b86d15c6ba29 251 "movd 12(%2),%%mm7 \n\t" \
Vanger 0:b86d15c6ba29 252 "paddq %%mm6,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 253 "movd 12(%1),%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 254 "movd %%mm3,8(%0) \n\t" \
Vanger 0:b86d15c6ba29 255 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 256 \
Vanger 0:b86d15c6ba29 257 "paddq %%mm5,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 258 "pmuludq %%mm1,%%mm7 \n\t" \
Vanger 0:b86d15c6ba29 259 "movd 16(%2),%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 260 "paddq %%mm7,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 261 "movd 16(%1),%%mm6 \n\t" \
Vanger 0:b86d15c6ba29 262 "movd %%mm3,12(%0) \n\t" \
Vanger 0:b86d15c6ba29 263 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 264 \
Vanger 0:b86d15c6ba29 265 "paddq %%mm6,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 266 "pmuludq %%mm1,%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 267 "movd 20(%2),%%mm6 \n\t" \
Vanger 0:b86d15c6ba29 268 "paddq %%mm5,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 269 "movd 20(%1),%%mm7 \n\t" \
Vanger 0:b86d15c6ba29 270 "movd %%mm3,16(%0) \n\t" \
Vanger 0:b86d15c6ba29 271 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 272 \
Vanger 0:b86d15c6ba29 273 "paddq %%mm7,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 274 "pmuludq %%mm1,%%mm6 \n\t" \
Vanger 0:b86d15c6ba29 275 "movd 24(%2),%%mm7 \n\t" \
Vanger 0:b86d15c6ba29 276 "paddq %%mm6,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 277 "movd 24(%1),%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 278 "movd %%mm3,20(%0) \n\t" \
Vanger 0:b86d15c6ba29 279 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 280 \
Vanger 0:b86d15c6ba29 281 "paddq %%mm5,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 282 "pmuludq %%mm1,%%mm7 \n\t" \
Vanger 0:b86d15c6ba29 283 "movd 28(%2),%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 284 "paddq %%mm7,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 285 "movd 28(%1),%%mm6 \n\t" \
Vanger 0:b86d15c6ba29 286 "movd %%mm3,24(%0) \n\t" \
Vanger 0:b86d15c6ba29 287 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 288 \
Vanger 0:b86d15c6ba29 289 "paddq %%mm6,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 290 "pmuludq %%mm1,%%mm5 \n\t" \
Vanger 0:b86d15c6ba29 291 "paddq %%mm5,%%mm3 \n\t" \
Vanger 0:b86d15c6ba29 292 "movd %%mm3,28(%0) \n\t" \
Vanger 0:b86d15c6ba29 293 "psrlq $32, %%mm3 \n\t" \
Vanger 0:b86d15c6ba29 294 :"=r"(_c) : "0"(_c), "r"(tmpm) );
Vanger 0:b86d15c6ba29 295
Vanger 0:b86d15c6ba29 296 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack
Vanger 0:b86d15c6ba29 297 pointer */
Vanger 0:b86d15c6ba29 298
Vanger 0:b86d15c6ba29 299 #define LOOP_END \
Vanger 0:b86d15c6ba29 300 __asm__( "movd %%mm3,%0 \n" :"=r"(cy))
Vanger 0:b86d15c6ba29 301
Vanger 0:b86d15c6ba29 302 #define PROPCARRY \
Vanger 0:b86d15c6ba29 303 __asm__( \
Vanger 0:b86d15c6ba29 304 "addl %1,%0 \n\t" \
Vanger 0:b86d15c6ba29 305 "setb %%al \n\t" \
Vanger 0:b86d15c6ba29 306 "movzbl %%al,%1 \n\t" \
Vanger 0:b86d15c6ba29 307 :"=g"(_c[LO]), "=r"(cy) \
Vanger 0:b86d15c6ba29 308 :"0"(_c[LO]), "1"(cy) \
Vanger 0:b86d15c6ba29 309 : "%eax", "cc")
Vanger 0:b86d15c6ba29 310
Vanger 0:b86d15c6ba29 311 /******************************************************************/
Vanger 0:b86d15c6ba29 312 #elif defined(TFM_ARM)
Vanger 0:b86d15c6ba29 313 /* ARMv4 code */
Vanger 0:b86d15c6ba29 314
Vanger 0:b86d15c6ba29 315 #define MONT_START
Vanger 0:b86d15c6ba29 316 #define MONT_FINI
Vanger 0:b86d15c6ba29 317 #define LOOP_END
Vanger 0:b86d15c6ba29 318 #define LOOP_START \
Vanger 0:b86d15c6ba29 319 mu = c[x] * mp
Vanger 0:b86d15c6ba29 320
Vanger 0:b86d15c6ba29 321
Vanger 0:b86d15c6ba29 322 #ifdef __thumb__
Vanger 0:b86d15c6ba29 323
Vanger 0:b86d15c6ba29 324 #define INNERMUL \
Vanger 0:b86d15c6ba29 325 __asm__( \
Vanger 0:b86d15c6ba29 326 " LDR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 327 " ADDS r0,r0,%0 \n\t" \
Vanger 0:b86d15c6ba29 328 " ITE CS \n\t" \
Vanger 0:b86d15c6ba29 329 " MOVCS %0,#1 \n\t" \
Vanger 0:b86d15c6ba29 330 " MOVCC %0,#0 \n\t" \
Vanger 0:b86d15c6ba29 331 " UMLAL r0,%0,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 332 " STR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 333 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0]):"r0","cc");
Vanger 0:b86d15c6ba29 334
Vanger 0:b86d15c6ba29 335 #define PROPCARRY \
Vanger 0:b86d15c6ba29 336 __asm__( \
Vanger 0:b86d15c6ba29 337 " LDR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 338 " ADDS r0,r0,%0 \n\t" \
Vanger 0:b86d15c6ba29 339 " STR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 340 " ITE CS \n\t" \
Vanger 0:b86d15c6ba29 341 " MOVCS %0,#1 \n\t" \
Vanger 0:b86d15c6ba29 342 " MOVCC %0,#0 \n\t" \
Vanger 0:b86d15c6ba29 343 :"=r"(cy),"=m"(_c[0]):"0"(cy),"m"(_c[0]):"r0","cc");
Vanger 0:b86d15c6ba29 344
Vanger 0:b86d15c6ba29 345
Vanger 0:b86d15c6ba29 346 /* TAO thumb mode uses ite (if then else) to detect carry directly
Vanger 0:b86d15c6ba29 347 * fixed unmatched constraint warning by changing 1 to m */
Vanger 0:b86d15c6ba29 348
Vanger 0:b86d15c6ba29 349 #else /* __thumb__ */
Vanger 0:b86d15c6ba29 350
Vanger 0:b86d15c6ba29 351 #define INNERMUL \
Vanger 0:b86d15c6ba29 352 __asm__( \
Vanger 0:b86d15c6ba29 353 " LDR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 354 " ADDS r0,r0,%0 \n\t" \
Vanger 0:b86d15c6ba29 355 " MOVCS %0,#1 \n\t" \
Vanger 0:b86d15c6ba29 356 " MOVCC %0,#0 \n\t" \
Vanger 0:b86d15c6ba29 357 " UMLAL r0,%0,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 358 " STR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 359 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
Vanger 0:b86d15c6ba29 360
Vanger 0:b86d15c6ba29 361 #define PROPCARRY \
Vanger 0:b86d15c6ba29 362 __asm__( \
Vanger 0:b86d15c6ba29 363 " LDR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 364 " ADDS r0,r0,%0 \n\t" \
Vanger 0:b86d15c6ba29 365 " STR r0,%1 \n\t" \
Vanger 0:b86d15c6ba29 366 " MOVCS %0,#1 \n\t" \
Vanger 0:b86d15c6ba29 367 " MOVCC %0,#0 \n\t" \
Vanger 0:b86d15c6ba29 368 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
Vanger 0:b86d15c6ba29 369
Vanger 0:b86d15c6ba29 370 #endif /* __thumb__ */
Vanger 0:b86d15c6ba29 371
Vanger 0:b86d15c6ba29 372 #elif defined(TFM_PPC32)
Vanger 0:b86d15c6ba29 373
Vanger 0:b86d15c6ba29 374 /* PPC32 */
Vanger 0:b86d15c6ba29 375 #define MONT_START
Vanger 0:b86d15c6ba29 376 #define MONT_FINI
Vanger 0:b86d15c6ba29 377 #define LOOP_END
Vanger 0:b86d15c6ba29 378 #define LOOP_START \
Vanger 0:b86d15c6ba29 379 mu = c[x] * mp
Vanger 0:b86d15c6ba29 380
Vanger 0:b86d15c6ba29 381 #define INNERMUL \
Vanger 0:b86d15c6ba29 382 __asm__( \
Vanger 0:b86d15c6ba29 383 " mullw 16,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 384 " mulhwu 17,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 385 " addc 16,16,%0 \n\t" \
Vanger 0:b86d15c6ba29 386 " addze 17,17 \n\t" \
Vanger 0:b86d15c6ba29 387 " lwz 18,%1 \n\t" \
Vanger 0:b86d15c6ba29 388 " addc 16,16,18 \n\t" \
Vanger 0:b86d15c6ba29 389 " addze %0,17 \n\t" \
Vanger 0:b86d15c6ba29 390 " stw 16,%1 \n\t" \
Vanger 0:b86d15c6ba29 391 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
Vanger 0:b86d15c6ba29 392
Vanger 0:b86d15c6ba29 393 #define PROPCARRY \
Vanger 0:b86d15c6ba29 394 __asm__( \
Vanger 0:b86d15c6ba29 395 " lwz 16,%1 \n\t" \
Vanger 0:b86d15c6ba29 396 " addc 16,16,%0 \n\t" \
Vanger 0:b86d15c6ba29 397 " stw 16,%1 \n\t" \
Vanger 0:b86d15c6ba29 398 " xor %0,%0,%0 \n\t" \
Vanger 0:b86d15c6ba29 399 " addze %0,%0 \n\t" \
Vanger 0:b86d15c6ba29 400 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
Vanger 0:b86d15c6ba29 401
Vanger 0:b86d15c6ba29 402 #elif defined(TFM_PPC64)
Vanger 0:b86d15c6ba29 403
Vanger 0:b86d15c6ba29 404 /* PPC64 */
Vanger 0:b86d15c6ba29 405 #define MONT_START
Vanger 0:b86d15c6ba29 406 #define MONT_FINI
Vanger 0:b86d15c6ba29 407 #define LOOP_END
Vanger 0:b86d15c6ba29 408 #define LOOP_START \
Vanger 0:b86d15c6ba29 409 mu = c[x] * mp
Vanger 0:b86d15c6ba29 410
Vanger 0:b86d15c6ba29 411 #define INNERMUL \
Vanger 0:b86d15c6ba29 412 __asm__( \
Vanger 0:b86d15c6ba29 413 " mulld 16,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 414 " mulhdu 17,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 415 " addc 16,16,%0 \n\t" \
Vanger 0:b86d15c6ba29 416 " addze 17,17 \n\t" \
Vanger 0:b86d15c6ba29 417 " ldx 18,0,%1 \n\t" \
Vanger 0:b86d15c6ba29 418 " addc 16,16,18 \n\t" \
Vanger 0:b86d15c6ba29 419 " addze %0,17 \n\t" \
Vanger 0:b86d15c6ba29 420 " sdx 16,0,%1 \n\t" \
Vanger 0:b86d15c6ba29 421 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
Vanger 0:b86d15c6ba29 422
Vanger 0:b86d15c6ba29 423 #define PROPCARRY \
Vanger 0:b86d15c6ba29 424 __asm__( \
Vanger 0:b86d15c6ba29 425 " ldx 16,0,%1 \n\t" \
Vanger 0:b86d15c6ba29 426 " addc 16,16,%0 \n\t" \
Vanger 0:b86d15c6ba29 427 " sdx 16,0,%1 \n\t" \
Vanger 0:b86d15c6ba29 428 " xor %0,%0,%0 \n\t" \
Vanger 0:b86d15c6ba29 429 " addze %0,%0 \n\t" \
Vanger 0:b86d15c6ba29 430 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
Vanger 0:b86d15c6ba29 431
Vanger 0:b86d15c6ba29 432 /******************************************************************/
Vanger 0:b86d15c6ba29 433
Vanger 0:b86d15c6ba29 434 #elif defined(TFM_AVR32)
Vanger 0:b86d15c6ba29 435
Vanger 0:b86d15c6ba29 436 /* AVR32 */
Vanger 0:b86d15c6ba29 437 #define MONT_START
Vanger 0:b86d15c6ba29 438 #define MONT_FINI
Vanger 0:b86d15c6ba29 439 #define LOOP_END
Vanger 0:b86d15c6ba29 440 #define LOOP_START \
Vanger 0:b86d15c6ba29 441 mu = c[x] * mp
Vanger 0:b86d15c6ba29 442
Vanger 0:b86d15c6ba29 443 #define INNERMUL \
Vanger 0:b86d15c6ba29 444 __asm__( \
Vanger 0:b86d15c6ba29 445 " ld.w r2,%1 \n\t" \
Vanger 0:b86d15c6ba29 446 " add r2,%0 \n\t" \
Vanger 0:b86d15c6ba29 447 " eor r3,r3 \n\t" \
Vanger 0:b86d15c6ba29 448 " acr r3 \n\t" \
Vanger 0:b86d15c6ba29 449 " macu.d r2,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 450 " st.w %1,r2 \n\t" \
Vanger 0:b86d15c6ba29 451 " mov %0,r3 \n\t" \
Vanger 0:b86d15c6ba29 452 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
Vanger 0:b86d15c6ba29 453
Vanger 0:b86d15c6ba29 454 #define PROPCARRY \
Vanger 0:b86d15c6ba29 455 __asm__( \
Vanger 0:b86d15c6ba29 456 " ld.w r2,%1 \n\t" \
Vanger 0:b86d15c6ba29 457 " add r2,%0 \n\t" \
Vanger 0:b86d15c6ba29 458 " st.w %1,r2 \n\t" \
Vanger 0:b86d15c6ba29 459 " eor %0,%0 \n\t" \
Vanger 0:b86d15c6ba29 460 " acr %0 \n\t" \
Vanger 0:b86d15c6ba29 461 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
Vanger 0:b86d15c6ba29 462
Vanger 0:b86d15c6ba29 463 #else
Vanger 0:b86d15c6ba29 464
Vanger 0:b86d15c6ba29 465 /* ISO C code */
Vanger 0:b86d15c6ba29 466 #define MONT_START
Vanger 0:b86d15c6ba29 467 #define MONT_FINI
Vanger 0:b86d15c6ba29 468 #define LOOP_END
Vanger 0:b86d15c6ba29 469 #define LOOP_START \
Vanger 0:b86d15c6ba29 470 mu = c[x] * mp
Vanger 0:b86d15c6ba29 471
Vanger 0:b86d15c6ba29 472 #define INNERMUL \
Vanger 0:b86d15c6ba29 473 do { fp_word t; \
Vanger 0:b86d15c6ba29 474 t = ((fp_word)_c[0] + (fp_word)cy) + \
Vanger 0:b86d15c6ba29 475 (((fp_word)mu) * ((fp_word)*tmpm++)); \
Vanger 0:b86d15c6ba29 476 _c[0] = (fp_digit)t; \
Vanger 0:b86d15c6ba29 477 cy = (fp_digit)(t >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 478 } while (0)
Vanger 0:b86d15c6ba29 479
Vanger 0:b86d15c6ba29 480 #define PROPCARRY \
Vanger 0:b86d15c6ba29 481 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
Vanger 0:b86d15c6ba29 482
Vanger 0:b86d15c6ba29 483 #endif
Vanger 0:b86d15c6ba29 484 /******************************************************************/
Vanger 0:b86d15c6ba29 485
Vanger 0:b86d15c6ba29 486
Vanger 0:b86d15c6ba29 487 #define LO 0
Vanger 0:b86d15c6ba29 488 /* end fp_montogomery_reduce.c asm */
Vanger 0:b86d15c6ba29 489
Vanger 0:b86d15c6ba29 490
Vanger 0:b86d15c6ba29 491 /* start fp_sqr_comba.c asm */
Vanger 0:b86d15c6ba29 492 #if defined(TFM_X86)
Vanger 0:b86d15c6ba29 493
Vanger 0:b86d15c6ba29 494 /* x86-32 optimized */
Vanger 0:b86d15c6ba29 495
Vanger 0:b86d15c6ba29 496 #define COMBA_START
Vanger 0:b86d15c6ba29 497
Vanger 0:b86d15c6ba29 498 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 499 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 500
Vanger 0:b86d15c6ba29 501 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 502 x = c0;
Vanger 0:b86d15c6ba29 503
Vanger 0:b86d15c6ba29 504 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 505 x = c1;
Vanger 0:b86d15c6ba29 506
Vanger 0:b86d15c6ba29 507 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 508 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 509
Vanger 0:b86d15c6ba29 510 #define COMBA_FINI
Vanger 0:b86d15c6ba29 511
Vanger 0:b86d15c6ba29 512 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 513 __asm__( \
Vanger 0:b86d15c6ba29 514 "movl %6,%%eax \n\t" \
Vanger 0:b86d15c6ba29 515 "mull %%eax \n\t" \
Vanger 0:b86d15c6ba29 516 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 517 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 518 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 519 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
Vanger 0:b86d15c6ba29 520
Vanger 0:b86d15c6ba29 521 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 522 __asm__( \
Vanger 0:b86d15c6ba29 523 "movl %6,%%eax \n\t" \
Vanger 0:b86d15c6ba29 524 "mull %7 \n\t" \
Vanger 0:b86d15c6ba29 525 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 526 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 527 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 528 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 529 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 530 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 531 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc");
Vanger 0:b86d15c6ba29 532
Vanger 0:b86d15c6ba29 533 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 534 __asm__( \
Vanger 0:b86d15c6ba29 535 "movl %3,%%eax \n\t" \
Vanger 0:b86d15c6ba29 536 "mull %4 \n\t" \
Vanger 0:b86d15c6ba29 537 "movl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 538 "movl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 539 "xorl %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 540 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
Vanger 0:b86d15c6ba29 541
Vanger 0:b86d15c6ba29 542 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
Vanger 0:b86d15c6ba29 543
Vanger 0:b86d15c6ba29 544 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 545 __asm__( \
Vanger 0:b86d15c6ba29 546 "movl %6,%%eax \n\t" \
Vanger 0:b86d15c6ba29 547 "mull %7 \n\t" \
Vanger 0:b86d15c6ba29 548 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 549 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 550 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 551 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
Vanger 0:b86d15c6ba29 552
Vanger 0:b86d15c6ba29 553 #define SQRADDDB \
Vanger 0:b86d15c6ba29 554 __asm__( \
Vanger 0:b86d15c6ba29 555 "addl %6,%0 \n\t" \
Vanger 0:b86d15c6ba29 556 "adcl %7,%1 \n\t" \
Vanger 0:b86d15c6ba29 557 "adcl %8,%2 \n\t" \
Vanger 0:b86d15c6ba29 558 "addl %6,%0 \n\t" \
Vanger 0:b86d15c6ba29 559 "adcl %7,%1 \n\t" \
Vanger 0:b86d15c6ba29 560 "adcl %8,%2 \n\t" \
Vanger 0:b86d15c6ba29 561 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
Vanger 0:b86d15c6ba29 562
Vanger 0:b86d15c6ba29 563 #elif defined(TFM_X86_64)
Vanger 0:b86d15c6ba29 564 /* x86-64 optimized */
Vanger 0:b86d15c6ba29 565
Vanger 0:b86d15c6ba29 566 #define COMBA_START
Vanger 0:b86d15c6ba29 567
Vanger 0:b86d15c6ba29 568 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 569 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 570
Vanger 0:b86d15c6ba29 571 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 572 x = c0;
Vanger 0:b86d15c6ba29 573
Vanger 0:b86d15c6ba29 574 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 575 x = c1;
Vanger 0:b86d15c6ba29 576
Vanger 0:b86d15c6ba29 577 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 578 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 579
Vanger 0:b86d15c6ba29 580 #define COMBA_FINI
Vanger 0:b86d15c6ba29 581
Vanger 0:b86d15c6ba29 582 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 583 __asm__( \
Vanger 0:b86d15c6ba29 584 "movq %6,%%rax \n\t" \
Vanger 0:b86d15c6ba29 585 "mulq %%rax \n\t" \
Vanger 0:b86d15c6ba29 586 "addq %%rax,%0 \n\t" \
Vanger 0:b86d15c6ba29 587 "adcq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 588 "adcq $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 589 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
Vanger 0:b86d15c6ba29 590
Vanger 0:b86d15c6ba29 591 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 592 __asm__( \
Vanger 0:b86d15c6ba29 593 "movq %6,%%rax \n\t" \
Vanger 0:b86d15c6ba29 594 "mulq %7 \n\t" \
Vanger 0:b86d15c6ba29 595 "addq %%rax,%0 \n\t" \
Vanger 0:b86d15c6ba29 596 "adcq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 597 "adcq $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 598 "addq %%rax,%0 \n\t" \
Vanger 0:b86d15c6ba29 599 "adcq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 600 "adcq $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 601 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
Vanger 0:b86d15c6ba29 602
Vanger 0:b86d15c6ba29 603 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 604 __asm__( \
Vanger 0:b86d15c6ba29 605 "movq %3,%%rax \n\t" \
Vanger 0:b86d15c6ba29 606 "mulq %4 \n\t" \
Vanger 0:b86d15c6ba29 607 "movq %%rax,%0 \n\t" \
Vanger 0:b86d15c6ba29 608 "movq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 609 "xorq %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 610 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
Vanger 0:b86d15c6ba29 611
Vanger 0:b86d15c6ba29 612 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
Vanger 0:b86d15c6ba29 613
Vanger 0:b86d15c6ba29 614 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 615 __asm__( \
Vanger 0:b86d15c6ba29 616 "movq %6,%%rax \n\t" \
Vanger 0:b86d15c6ba29 617 "mulq %7 \n\t" \
Vanger 0:b86d15c6ba29 618 "addq %%rax,%0 \n\t" \
Vanger 0:b86d15c6ba29 619 "adcq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 620 "adcq $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 621 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
Vanger 0:b86d15c6ba29 622
Vanger 0:b86d15c6ba29 623 #define SQRADDDB \
Vanger 0:b86d15c6ba29 624 __asm__( \
Vanger 0:b86d15c6ba29 625 "addq %6,%0 \n\t" \
Vanger 0:b86d15c6ba29 626 "adcq %7,%1 \n\t" \
Vanger 0:b86d15c6ba29 627 "adcq %8,%2 \n\t" \
Vanger 0:b86d15c6ba29 628 "addq %6,%0 \n\t" \
Vanger 0:b86d15c6ba29 629 "adcq %7,%1 \n\t" \
Vanger 0:b86d15c6ba29 630 "adcq %8,%2 \n\t" \
Vanger 0:b86d15c6ba29 631 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
Vanger 0:b86d15c6ba29 632
Vanger 0:b86d15c6ba29 633 #elif defined(TFM_SSE2)
Vanger 0:b86d15c6ba29 634
Vanger 0:b86d15c6ba29 635 /* SSE2 Optimized */
Vanger 0:b86d15c6ba29 636 #define COMBA_START
Vanger 0:b86d15c6ba29 637
Vanger 0:b86d15c6ba29 638 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 639 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 640
Vanger 0:b86d15c6ba29 641 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 642 x = c0;
Vanger 0:b86d15c6ba29 643
Vanger 0:b86d15c6ba29 644 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 645 x = c1;
Vanger 0:b86d15c6ba29 646
Vanger 0:b86d15c6ba29 647 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 648 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 649
Vanger 0:b86d15c6ba29 650 #define COMBA_FINI \
Vanger 0:b86d15c6ba29 651 __asm__("emms");
Vanger 0:b86d15c6ba29 652
Vanger 0:b86d15c6ba29 653 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 654 __asm__( \
Vanger 0:b86d15c6ba29 655 "movd %6,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 656 "pmuludq %%mm0,%%mm0\n\t" \
Vanger 0:b86d15c6ba29 657 "movd %%mm0,%%eax \n\t" \
Vanger 0:b86d15c6ba29 658 "psrlq $32,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 659 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 660 "movd %%mm0,%%eax \n\t" \
Vanger 0:b86d15c6ba29 661 "adcl %%eax,%1 \n\t" \
Vanger 0:b86d15c6ba29 662 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 663 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
Vanger 0:b86d15c6ba29 664
Vanger 0:b86d15c6ba29 665 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 666 __asm__( \
Vanger 0:b86d15c6ba29 667 "movd %6,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 668 "movd %7,%%mm1 \n\t" \
Vanger 0:b86d15c6ba29 669 "pmuludq %%mm1,%%mm0\n\t" \
Vanger 0:b86d15c6ba29 670 "movd %%mm0,%%eax \n\t" \
Vanger 0:b86d15c6ba29 671 "psrlq $32,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 672 "movd %%mm0,%%edx \n\t" \
Vanger 0:b86d15c6ba29 673 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 674 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 675 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 676 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 677 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 678 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 679 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
Vanger 0:b86d15c6ba29 680
Vanger 0:b86d15c6ba29 681 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 682 __asm__( \
Vanger 0:b86d15c6ba29 683 "movd %3,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 684 "movd %4,%%mm1 \n\t" \
Vanger 0:b86d15c6ba29 685 "pmuludq %%mm1,%%mm0\n\t" \
Vanger 0:b86d15c6ba29 686 "movd %%mm0,%0 \n\t" \
Vanger 0:b86d15c6ba29 687 "psrlq $32,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 688 "movd %%mm0,%1 \n\t" \
Vanger 0:b86d15c6ba29 689 "xorl %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 690 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j));
Vanger 0:b86d15c6ba29 691
Vanger 0:b86d15c6ba29 692 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
Vanger 0:b86d15c6ba29 693
Vanger 0:b86d15c6ba29 694 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 695 __asm__( \
Vanger 0:b86d15c6ba29 696 "movd %6,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 697 "movd %7,%%mm1 \n\t" \
Vanger 0:b86d15c6ba29 698 "pmuludq %%mm1,%%mm0\n\t" \
Vanger 0:b86d15c6ba29 699 "movd %%mm0,%%eax \n\t" \
Vanger 0:b86d15c6ba29 700 "psrlq $32,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 701 "movd %%mm0,%%edx \n\t" \
Vanger 0:b86d15c6ba29 702 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 703 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 704 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 705 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
Vanger 0:b86d15c6ba29 706
Vanger 0:b86d15c6ba29 707 #define SQRADDDB \
Vanger 0:b86d15c6ba29 708 __asm__( \
Vanger 0:b86d15c6ba29 709 "addl %6,%0 \n\t" \
Vanger 0:b86d15c6ba29 710 "adcl %7,%1 \n\t" \
Vanger 0:b86d15c6ba29 711 "adcl %8,%2 \n\t" \
Vanger 0:b86d15c6ba29 712 "addl %6,%0 \n\t" \
Vanger 0:b86d15c6ba29 713 "adcl %7,%1 \n\t" \
Vanger 0:b86d15c6ba29 714 "adcl %8,%2 \n\t" \
Vanger 0:b86d15c6ba29 715 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
Vanger 0:b86d15c6ba29 716
Vanger 0:b86d15c6ba29 717 #elif defined(TFM_ARM)
Vanger 0:b86d15c6ba29 718
Vanger 0:b86d15c6ba29 719 /* ARM code */
Vanger 0:b86d15c6ba29 720
Vanger 0:b86d15c6ba29 721 #define COMBA_START
Vanger 0:b86d15c6ba29 722
Vanger 0:b86d15c6ba29 723 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 724 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 725
Vanger 0:b86d15c6ba29 726 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 727 x = c0;
Vanger 0:b86d15c6ba29 728
Vanger 0:b86d15c6ba29 729 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 730 x = c1;
Vanger 0:b86d15c6ba29 731
Vanger 0:b86d15c6ba29 732 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 733 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 734
Vanger 0:b86d15c6ba29 735 #define COMBA_FINI
Vanger 0:b86d15c6ba29 736
Vanger 0:b86d15c6ba29 737 /* multiplies point i and j, updates carry "c1" and digit c2 */
Vanger 0:b86d15c6ba29 738 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 739 __asm__( \
Vanger 0:b86d15c6ba29 740 " UMULL r0,r1,%6,%6 \n\t" \
Vanger 0:b86d15c6ba29 741 " ADDS %0,%0,r0 \n\t" \
Vanger 0:b86d15c6ba29 742 " ADCS %1,%1,r1 \n\t" \
Vanger 0:b86d15c6ba29 743 " ADC %2,%2,#0 \n\t" \
Vanger 0:b86d15c6ba29 744 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
Vanger 0:b86d15c6ba29 745
Vanger 0:b86d15c6ba29 746 /* for squaring some of the terms are doubled... */
Vanger 0:b86d15c6ba29 747 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 748 __asm__( \
Vanger 0:b86d15c6ba29 749 " UMULL r0,r1,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 750 " ADDS %0,%0,r0 \n\t" \
Vanger 0:b86d15c6ba29 751 " ADCS %1,%1,r1 \n\t" \
Vanger 0:b86d15c6ba29 752 " ADC %2,%2,#0 \n\t" \
Vanger 0:b86d15c6ba29 753 " ADDS %0,%0,r0 \n\t" \
Vanger 0:b86d15c6ba29 754 " ADCS %1,%1,r1 \n\t" \
Vanger 0:b86d15c6ba29 755 " ADC %2,%2,#0 \n\t" \
Vanger 0:b86d15c6ba29 756 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
Vanger 0:b86d15c6ba29 757
Vanger 0:b86d15c6ba29 758 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 759 __asm__( \
Vanger 0:b86d15c6ba29 760 " UMULL %0,%1,%3,%4 \n\t" \
Vanger 0:b86d15c6ba29 761 " SUB %2,%2,%2 \n\t" \
Vanger 0:b86d15c6ba29 762 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "r"(i), "r"(j) : "cc");
Vanger 0:b86d15c6ba29 763
Vanger 0:b86d15c6ba29 764 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
Vanger 0:b86d15c6ba29 765
Vanger 0:b86d15c6ba29 766 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 767 __asm__( \
Vanger 0:b86d15c6ba29 768 " UMULL r0,r1,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 769 " ADDS %0,%0,r0 \n\t" \
Vanger 0:b86d15c6ba29 770 " ADCS %1,%1,r1 \n\t" \
Vanger 0:b86d15c6ba29 771 " ADC %2,%2,#0 \n\t" \
Vanger 0:b86d15c6ba29 772 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
Vanger 0:b86d15c6ba29 773
Vanger 0:b86d15c6ba29 774 #define SQRADDDB \
Vanger 0:b86d15c6ba29 775 __asm__( \
Vanger 0:b86d15c6ba29 776 " ADDS %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 777 " ADCS %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 778 " ADC %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 779 " ADDS %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 780 " ADCS %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 781 " ADC %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 782 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
Vanger 0:b86d15c6ba29 783
Vanger 0:b86d15c6ba29 784 #elif defined(TFM_PPC32)
Vanger 0:b86d15c6ba29 785
Vanger 0:b86d15c6ba29 786 /* PPC32 */
Vanger 0:b86d15c6ba29 787
Vanger 0:b86d15c6ba29 788 #define COMBA_START
Vanger 0:b86d15c6ba29 789
Vanger 0:b86d15c6ba29 790 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 791 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 792
Vanger 0:b86d15c6ba29 793 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 794 x = c0;
Vanger 0:b86d15c6ba29 795
Vanger 0:b86d15c6ba29 796 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 797 x = c1;
Vanger 0:b86d15c6ba29 798
Vanger 0:b86d15c6ba29 799 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 800 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 801
Vanger 0:b86d15c6ba29 802 #define COMBA_FINI
Vanger 0:b86d15c6ba29 803
Vanger 0:b86d15c6ba29 804 /* multiplies point i and j, updates carry "c1" and digit c2 */
Vanger 0:b86d15c6ba29 805 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 806 __asm__( \
Vanger 0:b86d15c6ba29 807 " mullw 16,%6,%6 \n\t" \
Vanger 0:b86d15c6ba29 808 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 809 " mulhwu 16,%6,%6 \n\t" \
Vanger 0:b86d15c6ba29 810 " adde %1,%1,16 \n\t" \
Vanger 0:b86d15c6ba29 811 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 812 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
Vanger 0:b86d15c6ba29 813
Vanger 0:b86d15c6ba29 814 /* for squaring some of the terms are doubled... */
Vanger 0:b86d15c6ba29 815 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 816 __asm__( \
Vanger 0:b86d15c6ba29 817 " mullw 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 818 " mulhwu 17,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 819 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 820 " adde %1,%1,17 \n\t" \
Vanger 0:b86d15c6ba29 821 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 822 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 823 " adde %1,%1,17 \n\t" \
Vanger 0:b86d15c6ba29 824 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 825 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
Vanger 0:b86d15c6ba29 826
Vanger 0:b86d15c6ba29 827 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 828 __asm__( \
Vanger 0:b86d15c6ba29 829 " mullw %0,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 830 " mulhwu %1,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 831 " xor %2,%2,%2 \n\t" \
Vanger 0:b86d15c6ba29 832 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
Vanger 0:b86d15c6ba29 833
Vanger 0:b86d15c6ba29 834 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 835 __asm__( \
Vanger 0:b86d15c6ba29 836 " mullw 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 837 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 838 " mulhwu 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 839 " adde %1,%1,16 \n\t" \
Vanger 0:b86d15c6ba29 840 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 841 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
Vanger 0:b86d15c6ba29 842
Vanger 0:b86d15c6ba29 843 #define SQRADDDB \
Vanger 0:b86d15c6ba29 844 __asm__( \
Vanger 0:b86d15c6ba29 845 " addc %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 846 " adde %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 847 " adde %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 848 " addc %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 849 " adde %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 850 " adde %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 851 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
Vanger 0:b86d15c6ba29 852
Vanger 0:b86d15c6ba29 853 #elif defined(TFM_PPC64)
Vanger 0:b86d15c6ba29 854 /* PPC64 */
Vanger 0:b86d15c6ba29 855
Vanger 0:b86d15c6ba29 856 #define COMBA_START
Vanger 0:b86d15c6ba29 857
Vanger 0:b86d15c6ba29 858 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 859 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 860
Vanger 0:b86d15c6ba29 861 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 862 x = c0;
Vanger 0:b86d15c6ba29 863
Vanger 0:b86d15c6ba29 864 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 865 x = c1;
Vanger 0:b86d15c6ba29 866
Vanger 0:b86d15c6ba29 867 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 868 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 869
Vanger 0:b86d15c6ba29 870 #define COMBA_FINI
Vanger 0:b86d15c6ba29 871
Vanger 0:b86d15c6ba29 872 /* multiplies point i and j, updates carry "c1" and digit c2 */
Vanger 0:b86d15c6ba29 873 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 874 __asm__( \
Vanger 0:b86d15c6ba29 875 " mulld 16,%6,%6 \n\t" \
Vanger 0:b86d15c6ba29 876 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 877 " mulhdu 16,%6,%6 \n\t" \
Vanger 0:b86d15c6ba29 878 " adde %1,%1,16 \n\t" \
Vanger 0:b86d15c6ba29 879 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 880 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
Vanger 0:b86d15c6ba29 881
Vanger 0:b86d15c6ba29 882 /* for squaring some of the terms are doubled... */
Vanger 0:b86d15c6ba29 883 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 884 __asm__( \
Vanger 0:b86d15c6ba29 885 " mulld 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 886 " mulhdu 17,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 887 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 888 " adde %1,%1,17 \n\t" \
Vanger 0:b86d15c6ba29 889 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 890 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 891 " adde %1,%1,17 \n\t" \
Vanger 0:b86d15c6ba29 892 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 893 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
Vanger 0:b86d15c6ba29 894
Vanger 0:b86d15c6ba29 895 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 896 __asm__( \
Vanger 0:b86d15c6ba29 897 " mulld %0,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 898 " mulhdu %1,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 899 " xor %2,%2,%2 \n\t" \
Vanger 0:b86d15c6ba29 900 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
Vanger 0:b86d15c6ba29 901
Vanger 0:b86d15c6ba29 902 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 903 __asm__( \
Vanger 0:b86d15c6ba29 904 " mulld 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 905 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 906 " mulhdu 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 907 " adde %1,%1,16 \n\t" \
Vanger 0:b86d15c6ba29 908 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 909 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
Vanger 0:b86d15c6ba29 910
Vanger 0:b86d15c6ba29 911 #define SQRADDDB \
Vanger 0:b86d15c6ba29 912 __asm__( \
Vanger 0:b86d15c6ba29 913 " addc %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 914 " adde %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 915 " adde %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 916 " addc %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 917 " adde %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 918 " adde %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 919 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
Vanger 0:b86d15c6ba29 920
Vanger 0:b86d15c6ba29 921
Vanger 0:b86d15c6ba29 922 #elif defined(TFM_AVR32)
Vanger 0:b86d15c6ba29 923
Vanger 0:b86d15c6ba29 924 /* AVR32 */
Vanger 0:b86d15c6ba29 925
Vanger 0:b86d15c6ba29 926 #define COMBA_START
Vanger 0:b86d15c6ba29 927
Vanger 0:b86d15c6ba29 928 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 929 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 930
Vanger 0:b86d15c6ba29 931 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 932 x = c0;
Vanger 0:b86d15c6ba29 933
Vanger 0:b86d15c6ba29 934 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 935 x = c1;
Vanger 0:b86d15c6ba29 936
Vanger 0:b86d15c6ba29 937 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 938 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 939
Vanger 0:b86d15c6ba29 940 #define COMBA_FINI
Vanger 0:b86d15c6ba29 941
Vanger 0:b86d15c6ba29 942 /* multiplies point i and j, updates carry "c1" and digit c2 */
Vanger 0:b86d15c6ba29 943 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 944 __asm__( \
Vanger 0:b86d15c6ba29 945 " mulu.d r2,%6,%6 \n\t" \
Vanger 0:b86d15c6ba29 946 " add %0,%0,r2 \n\t" \
Vanger 0:b86d15c6ba29 947 " adc %1,%1,r3 \n\t" \
Vanger 0:b86d15c6ba29 948 " acr %2 \n\t" \
Vanger 0:b86d15c6ba29 949 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
Vanger 0:b86d15c6ba29 950
Vanger 0:b86d15c6ba29 951 /* for squaring some of the terms are doubled... */
Vanger 0:b86d15c6ba29 952 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 953 __asm__( \
Vanger 0:b86d15c6ba29 954 " mulu.d r2,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 955 " add %0,%0,r2 \n\t" \
Vanger 0:b86d15c6ba29 956 " adc %1,%1,r3 \n\t" \
Vanger 0:b86d15c6ba29 957 " acr %2, \n\t" \
Vanger 0:b86d15c6ba29 958 " add %0,%0,r2 \n\t" \
Vanger 0:b86d15c6ba29 959 " adc %1,%1,r3 \n\t" \
Vanger 0:b86d15c6ba29 960 " acr %2, \n\t" \
Vanger 0:b86d15c6ba29 961 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
Vanger 0:b86d15c6ba29 962
Vanger 0:b86d15c6ba29 963 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 964 __asm__( \
Vanger 0:b86d15c6ba29 965 " mulu.d r2,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 966 " mov %0,r2 \n\t" \
Vanger 0:b86d15c6ba29 967 " mov %1,r3 \n\t" \
Vanger 0:b86d15c6ba29 968 " eor %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 969 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
Vanger 0:b86d15c6ba29 970
Vanger 0:b86d15c6ba29 971 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 972 __asm__( \
Vanger 0:b86d15c6ba29 973 " mulu.d r2,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 974 " add %0,%0,r2 \n\t" \
Vanger 0:b86d15c6ba29 975 " adc %1,%1,r3 \n\t" \
Vanger 0:b86d15c6ba29 976 " acr %2 \n\t" \
Vanger 0:b86d15c6ba29 977 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
Vanger 0:b86d15c6ba29 978
Vanger 0:b86d15c6ba29 979 #define SQRADDDB \
Vanger 0:b86d15c6ba29 980 __asm__( \
Vanger 0:b86d15c6ba29 981 " add %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 982 " adc %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 983 " adc %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 984 " add %0,%0,%3 \n\t" \
Vanger 0:b86d15c6ba29 985 " adc %1,%1,%4 \n\t" \
Vanger 0:b86d15c6ba29 986 " adc %2,%2,%5 \n\t" \
Vanger 0:b86d15c6ba29 987 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
Vanger 0:b86d15c6ba29 988
Vanger 0:b86d15c6ba29 989
Vanger 0:b86d15c6ba29 990 #else
Vanger 0:b86d15c6ba29 991
Vanger 0:b86d15c6ba29 992 #define TFM_ISO
Vanger 0:b86d15c6ba29 993
Vanger 0:b86d15c6ba29 994 /* ISO C portable code */
Vanger 0:b86d15c6ba29 995
Vanger 0:b86d15c6ba29 996 #define COMBA_START
Vanger 0:b86d15c6ba29 997
Vanger 0:b86d15c6ba29 998 #define CLEAR_CARRY \
Vanger 0:b86d15c6ba29 999 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1000
Vanger 0:b86d15c6ba29 1001 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1002 x = c0;
Vanger 0:b86d15c6ba29 1003
Vanger 0:b86d15c6ba29 1004 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1005 x = c1;
Vanger 0:b86d15c6ba29 1006
Vanger 0:b86d15c6ba29 1007 #define CARRY_FORWARD \
Vanger 0:b86d15c6ba29 1008 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1009
Vanger 0:b86d15c6ba29 1010 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1011
Vanger 0:b86d15c6ba29 1012 /* multiplies point i and j, updates carry "c1" and digit c2 */
Vanger 0:b86d15c6ba29 1013 #define SQRADD(i, j) \
Vanger 0:b86d15c6ba29 1014 do { fp_word t; \
Vanger 0:b86d15c6ba29 1015 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
Vanger 0:b86d15c6ba29 1016 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \
Vanger 0:b86d15c6ba29 1017 c2 +=(fp_digit) (t >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 1018 } while (0);
Vanger 0:b86d15c6ba29 1019
Vanger 0:b86d15c6ba29 1020
Vanger 0:b86d15c6ba29 1021 /* for squaring some of the terms are doubled... */
Vanger 0:b86d15c6ba29 1022 #define SQRADD2(i, j) \
Vanger 0:b86d15c6ba29 1023 do { fp_word t; \
Vanger 0:b86d15c6ba29 1024 t = ((fp_word)i) * ((fp_word)j); \
Vanger 0:b86d15c6ba29 1025 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
Vanger 0:b86d15c6ba29 1026 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
Vanger 0:b86d15c6ba29 1027 c2 +=(fp_digit)( tt >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 1028 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
Vanger 0:b86d15c6ba29 1029 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
Vanger 0:b86d15c6ba29 1030 c2 +=(fp_digit) (tt >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 1031 } while (0);
Vanger 0:b86d15c6ba29 1032
Vanger 0:b86d15c6ba29 1033 #define SQRADDSC(i, j) \
Vanger 0:b86d15c6ba29 1034 do { fp_word t; \
Vanger 0:b86d15c6ba29 1035 t = ((fp_word)i) * ((fp_word)j); \
Vanger 0:b86d15c6ba29 1036 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
Vanger 0:b86d15c6ba29 1037 } while (0);
Vanger 0:b86d15c6ba29 1038
Vanger 0:b86d15c6ba29 1039 #define SQRADDAC(i, j) \
Vanger 0:b86d15c6ba29 1040 do { fp_word t; \
Vanger 0:b86d15c6ba29 1041 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = (fp_digit)t; \
Vanger 0:b86d15c6ba29 1042 t = sc1 + (t >> DIGIT_BIT); sc1 = (fp_digit)t; \
Vanger 0:b86d15c6ba29 1043 sc2 += (fp_digit)(t >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 1044 } while (0);
Vanger 0:b86d15c6ba29 1045
Vanger 0:b86d15c6ba29 1046 #define SQRADDDB \
Vanger 0:b86d15c6ba29 1047 do { fp_word t; \
Vanger 0:b86d15c6ba29 1048 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = (fp_digit)t; \
Vanger 0:b86d15c6ba29 1049 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 1050 c1 = (fp_digit)t; \
Vanger 0:b86d15c6ba29 1051 c2 = c2 + (fp_digit)(((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT)); \
Vanger 0:b86d15c6ba29 1052 } while (0);
Vanger 0:b86d15c6ba29 1053
Vanger 0:b86d15c6ba29 1054 #endif
Vanger 0:b86d15c6ba29 1055
Vanger 0:b86d15c6ba29 1056 #ifdef TFM_SMALL_SET
Vanger 0:b86d15c6ba29 1057 #include "fp_sqr_comba_small_set.i"
Vanger 0:b86d15c6ba29 1058 #endif
Vanger 0:b86d15c6ba29 1059
Vanger 0:b86d15c6ba29 1060 #if defined(TFM_SQR3)
Vanger 0:b86d15c6ba29 1061 #include "fp_sqr_comba_3.i"
Vanger 0:b86d15c6ba29 1062 #endif
Vanger 0:b86d15c6ba29 1063 #if defined(TFM_SQR4)
Vanger 0:b86d15c6ba29 1064 #include "fp_sqr_comba_4.i"
Vanger 0:b86d15c6ba29 1065 #endif
Vanger 0:b86d15c6ba29 1066 #if defined(TFM_SQR6)
Vanger 0:b86d15c6ba29 1067 #include "fp_sqr_comba_6.i"
Vanger 0:b86d15c6ba29 1068 #endif
Vanger 0:b86d15c6ba29 1069 #if defined(TFM_SQR7)
Vanger 0:b86d15c6ba29 1070 #include "fp_sqr_comba_7.i"
Vanger 0:b86d15c6ba29 1071 #endif
Vanger 0:b86d15c6ba29 1072 #if defined(TFM_SQR8)
Vanger 0:b86d15c6ba29 1073 #include "fp_sqr_comba_8.i"
Vanger 0:b86d15c6ba29 1074 #endif
Vanger 0:b86d15c6ba29 1075 #if defined(TFM_SQR9)
Vanger 0:b86d15c6ba29 1076 #include "fp_sqr_comba_9.i"
Vanger 0:b86d15c6ba29 1077 #endif
Vanger 0:b86d15c6ba29 1078 #if defined(TFM_SQR12)
Vanger 0:b86d15c6ba29 1079 #include "fp_sqr_comba_12.i"
Vanger 0:b86d15c6ba29 1080 #endif
Vanger 0:b86d15c6ba29 1081 #if defined(TFM_SQR17)
Vanger 0:b86d15c6ba29 1082 #include "fp_sqr_comba_17.i"
Vanger 0:b86d15c6ba29 1083 #endif
Vanger 0:b86d15c6ba29 1084 #if defined(TFM_SQR20)
Vanger 0:b86d15c6ba29 1085 #include "fp_sqr_comba_20.i"
Vanger 0:b86d15c6ba29 1086 #endif
Vanger 0:b86d15c6ba29 1087 #if defined(TFM_SQR24)
Vanger 0:b86d15c6ba29 1088 #include "fp_sqr_comba_24.i"
Vanger 0:b86d15c6ba29 1089 #endif
Vanger 0:b86d15c6ba29 1090 #if defined(TFM_SQR28)
Vanger 0:b86d15c6ba29 1091 #include "fp_sqr_comba_28.i"
Vanger 0:b86d15c6ba29 1092 #endif
Vanger 0:b86d15c6ba29 1093 #if defined(TFM_SQR32)
Vanger 0:b86d15c6ba29 1094 #include "fp_sqr_comba_32.i"
Vanger 0:b86d15c6ba29 1095 #endif
Vanger 0:b86d15c6ba29 1096 #if defined(TFM_SQR48)
Vanger 0:b86d15c6ba29 1097 #include "fp_sqr_comba_48.i"
Vanger 0:b86d15c6ba29 1098 #endif
Vanger 0:b86d15c6ba29 1099 #if defined(TFM_SQR64)
Vanger 0:b86d15c6ba29 1100 #include "fp_sqr_comba_64.i"
Vanger 0:b86d15c6ba29 1101 #endif
Vanger 0:b86d15c6ba29 1102 /* end fp_sqr_comba.c asm */
Vanger 0:b86d15c6ba29 1103
Vanger 0:b86d15c6ba29 1104 /* start fp_mul_comba.c asm */
Vanger 0:b86d15c6ba29 1105 /* these are the combas. Worship them. */
Vanger 0:b86d15c6ba29 1106 #if defined(TFM_X86)
Vanger 0:b86d15c6ba29 1107 /* Generic x86 optimized code */
Vanger 0:b86d15c6ba29 1108
Vanger 0:b86d15c6ba29 1109 /* anything you need at the start */
Vanger 0:b86d15c6ba29 1110 #define COMBA_START
Vanger 0:b86d15c6ba29 1111
Vanger 0:b86d15c6ba29 1112 /* clear the chaining variables */
Vanger 0:b86d15c6ba29 1113 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1114 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1115
Vanger 0:b86d15c6ba29 1116 /* forward the carry to the next digit */
Vanger 0:b86d15c6ba29 1117 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1118 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1119
Vanger 0:b86d15c6ba29 1120 /* store the first sum */
Vanger 0:b86d15c6ba29 1121 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1122 x = c0;
Vanger 0:b86d15c6ba29 1123
Vanger 0:b86d15c6ba29 1124 /* store the second sum [carry] */
Vanger 0:b86d15c6ba29 1125 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1126 x = c1;
Vanger 0:b86d15c6ba29 1127
Vanger 0:b86d15c6ba29 1128 /* anything you need at the end */
Vanger 0:b86d15c6ba29 1129 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1130
Vanger 0:b86d15c6ba29 1131 /* this should multiply i and j */
Vanger 0:b86d15c6ba29 1132 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1133 __asm__( \
Vanger 0:b86d15c6ba29 1134 "movl %6,%%eax \n\t" \
Vanger 0:b86d15c6ba29 1135 "mull %7 \n\t" \
Vanger 0:b86d15c6ba29 1136 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 1137 "adcl %%edx,%1 \n\t" \
Vanger 0:b86d15c6ba29 1138 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 1139 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
Vanger 0:b86d15c6ba29 1140
Vanger 0:b86d15c6ba29 1141 #elif defined(TFM_X86_64)
Vanger 0:b86d15c6ba29 1142 /* x86-64 optimized */
Vanger 0:b86d15c6ba29 1143
Vanger 0:b86d15c6ba29 1144 /* anything you need at the start */
Vanger 0:b86d15c6ba29 1145 #define COMBA_START
Vanger 0:b86d15c6ba29 1146
Vanger 0:b86d15c6ba29 1147 /* clear the chaining variables */
Vanger 0:b86d15c6ba29 1148 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1149 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1150
Vanger 0:b86d15c6ba29 1151 /* forward the carry to the next digit */
Vanger 0:b86d15c6ba29 1152 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1153 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1154
Vanger 0:b86d15c6ba29 1155 /* store the first sum */
Vanger 0:b86d15c6ba29 1156 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1157 x = c0;
Vanger 0:b86d15c6ba29 1158
Vanger 0:b86d15c6ba29 1159 /* store the second sum [carry] */
Vanger 0:b86d15c6ba29 1160 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1161 x = c1;
Vanger 0:b86d15c6ba29 1162
Vanger 0:b86d15c6ba29 1163 /* anything you need at the end */
Vanger 0:b86d15c6ba29 1164 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1165
Vanger 0:b86d15c6ba29 1166 /* this should multiply i and j */
Vanger 0:b86d15c6ba29 1167 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1168 __asm__ ( \
Vanger 0:b86d15c6ba29 1169 "movq %6,%%rax \n\t" \
Vanger 0:b86d15c6ba29 1170 "mulq %7 \n\t" \
Vanger 0:b86d15c6ba29 1171 "addq %%rax,%0 \n\t" \
Vanger 0:b86d15c6ba29 1172 "adcq %%rdx,%1 \n\t" \
Vanger 0:b86d15c6ba29 1173 "adcq $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 1174 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
Vanger 0:b86d15c6ba29 1175
Vanger 0:b86d15c6ba29 1176 #elif defined(TFM_SSE2)
Vanger 0:b86d15c6ba29 1177 /* use SSE2 optimizations */
Vanger 0:b86d15c6ba29 1178
Vanger 0:b86d15c6ba29 1179 /* anything you need at the start */
Vanger 0:b86d15c6ba29 1180 #define COMBA_START
Vanger 0:b86d15c6ba29 1181
Vanger 0:b86d15c6ba29 1182 /* clear the chaining variables */
Vanger 0:b86d15c6ba29 1183 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1184 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1185
Vanger 0:b86d15c6ba29 1186 /* forward the carry to the next digit */
Vanger 0:b86d15c6ba29 1187 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1188 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1189
Vanger 0:b86d15c6ba29 1190 /* store the first sum */
Vanger 0:b86d15c6ba29 1191 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1192 x = c0;
Vanger 0:b86d15c6ba29 1193
Vanger 0:b86d15c6ba29 1194 /* store the second sum [carry] */
Vanger 0:b86d15c6ba29 1195 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1196 x = c1;
Vanger 0:b86d15c6ba29 1197
Vanger 0:b86d15c6ba29 1198 /* anything you need at the end */
Vanger 0:b86d15c6ba29 1199 #define COMBA_FINI \
Vanger 0:b86d15c6ba29 1200 __asm__("emms");
Vanger 0:b86d15c6ba29 1201
Vanger 0:b86d15c6ba29 1202 /* this should multiply i and j */
Vanger 0:b86d15c6ba29 1203 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1204 __asm__( \
Vanger 0:b86d15c6ba29 1205 "movd %6,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 1206 "movd %7,%%mm1 \n\t" \
Vanger 0:b86d15c6ba29 1207 "pmuludq %%mm1,%%mm0\n\t" \
Vanger 0:b86d15c6ba29 1208 "movd %%mm0,%%eax \n\t" \
Vanger 0:b86d15c6ba29 1209 "psrlq $32,%%mm0 \n\t" \
Vanger 0:b86d15c6ba29 1210 "addl %%eax,%0 \n\t" \
Vanger 0:b86d15c6ba29 1211 "movd %%mm0,%%eax \n\t" \
Vanger 0:b86d15c6ba29 1212 "adcl %%eax,%1 \n\t" \
Vanger 0:b86d15c6ba29 1213 "adcl $0,%2 \n\t" \
Vanger 0:b86d15c6ba29 1214 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
Vanger 0:b86d15c6ba29 1215
Vanger 0:b86d15c6ba29 1216 #elif defined(TFM_ARM)
Vanger 0:b86d15c6ba29 1217 /* ARM code */
Vanger 0:b86d15c6ba29 1218
Vanger 0:b86d15c6ba29 1219 #define COMBA_START
Vanger 0:b86d15c6ba29 1220
Vanger 0:b86d15c6ba29 1221 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1222 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1223
Vanger 0:b86d15c6ba29 1224 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1225 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1226
Vanger 0:b86d15c6ba29 1227 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1228 x = c0;
Vanger 0:b86d15c6ba29 1229
Vanger 0:b86d15c6ba29 1230 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1231 x = c1;
Vanger 0:b86d15c6ba29 1232
Vanger 0:b86d15c6ba29 1233 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1234
Vanger 0:b86d15c6ba29 1235 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1236 __asm__( \
Vanger 0:b86d15c6ba29 1237 " UMULL r0,r1,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 1238 " ADDS %0,%0,r0 \n\t" \
Vanger 0:b86d15c6ba29 1239 " ADCS %1,%1,r1 \n\t" \
Vanger 0:b86d15c6ba29 1240 " ADC %2,%2,#0 \n\t" \
Vanger 0:b86d15c6ba29 1241 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
Vanger 0:b86d15c6ba29 1242
Vanger 0:b86d15c6ba29 1243 #elif defined(TFM_PPC32)
Vanger 0:b86d15c6ba29 1244 /* For 32-bit PPC */
Vanger 0:b86d15c6ba29 1245
Vanger 0:b86d15c6ba29 1246 #define COMBA_START
Vanger 0:b86d15c6ba29 1247
Vanger 0:b86d15c6ba29 1248 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1249 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1250
Vanger 0:b86d15c6ba29 1251 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1252 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1253
Vanger 0:b86d15c6ba29 1254 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1255 x = c0;
Vanger 0:b86d15c6ba29 1256
Vanger 0:b86d15c6ba29 1257 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1258 x = c1;
Vanger 0:b86d15c6ba29 1259
Vanger 0:b86d15c6ba29 1260 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1261
Vanger 0:b86d15c6ba29 1262 /* untested: will mulhwu change the flags? Docs say no */
Vanger 0:b86d15c6ba29 1263 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1264 __asm__( \
Vanger 0:b86d15c6ba29 1265 " mullw 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 1266 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 1267 " mulhwu 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 1268 " adde %1,%1,16 \n\t" \
Vanger 0:b86d15c6ba29 1269 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 1270 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
Vanger 0:b86d15c6ba29 1271
Vanger 0:b86d15c6ba29 1272 #elif defined(TFM_PPC64)
Vanger 0:b86d15c6ba29 1273 /* For 64-bit PPC */
Vanger 0:b86d15c6ba29 1274
Vanger 0:b86d15c6ba29 1275 #define COMBA_START
Vanger 0:b86d15c6ba29 1276
Vanger 0:b86d15c6ba29 1277 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1278 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1279
Vanger 0:b86d15c6ba29 1280 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1281 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1282
Vanger 0:b86d15c6ba29 1283 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1284 x = c0;
Vanger 0:b86d15c6ba29 1285
Vanger 0:b86d15c6ba29 1286 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1287 x = c1;
Vanger 0:b86d15c6ba29 1288
Vanger 0:b86d15c6ba29 1289 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1290
Vanger 0:b86d15c6ba29 1291 /* untested: will mulhwu change the flags? Docs say no */
Vanger 0:b86d15c6ba29 1292 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1293 ____asm__( \
Vanger 0:b86d15c6ba29 1294 " mulld 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 1295 " addc %0,%0,16 \n\t" \
Vanger 0:b86d15c6ba29 1296 " mulhdu 16,%6,%7 \n\t" \
Vanger 0:b86d15c6ba29 1297 " adde %1,%1,16 \n\t" \
Vanger 0:b86d15c6ba29 1298 " addze %2,%2 \n\t" \
Vanger 0:b86d15c6ba29 1299 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
Vanger 0:b86d15c6ba29 1300
Vanger 0:b86d15c6ba29 1301 #elif defined(TFM_AVR32)
Vanger 0:b86d15c6ba29 1302
Vanger 0:b86d15c6ba29 1303 /* ISO C code */
Vanger 0:b86d15c6ba29 1304
Vanger 0:b86d15c6ba29 1305 #define COMBA_START
Vanger 0:b86d15c6ba29 1306
Vanger 0:b86d15c6ba29 1307 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1308 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1309
Vanger 0:b86d15c6ba29 1310 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1311 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1312
Vanger 0:b86d15c6ba29 1313 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1314 x = c0;
Vanger 0:b86d15c6ba29 1315
Vanger 0:b86d15c6ba29 1316 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1317 x = c1;
Vanger 0:b86d15c6ba29 1318
Vanger 0:b86d15c6ba29 1319 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1320
Vanger 0:b86d15c6ba29 1321 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1322 ____asm__( \
Vanger 0:b86d15c6ba29 1323 " mulu.d r2,%6,%7 \n\t"\
Vanger 0:b86d15c6ba29 1324 " add %0,r2 \n\t"\
Vanger 0:b86d15c6ba29 1325 " adc %1,%1,r3 \n\t"\
Vanger 0:b86d15c6ba29 1326 " acr %2 \n\t"\
Vanger 0:b86d15c6ba29 1327 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
Vanger 0:b86d15c6ba29 1328
Vanger 0:b86d15c6ba29 1329 #else
Vanger 0:b86d15c6ba29 1330 /* ISO C code */
Vanger 0:b86d15c6ba29 1331
Vanger 0:b86d15c6ba29 1332 #define COMBA_START
Vanger 0:b86d15c6ba29 1333
Vanger 0:b86d15c6ba29 1334 #define COMBA_CLEAR \
Vanger 0:b86d15c6ba29 1335 c0 = c1 = c2 = 0;
Vanger 0:b86d15c6ba29 1336
Vanger 0:b86d15c6ba29 1337 #define COMBA_FORWARD \
Vanger 0:b86d15c6ba29 1338 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
Vanger 0:b86d15c6ba29 1339
Vanger 0:b86d15c6ba29 1340 #define COMBA_STORE(x) \
Vanger 0:b86d15c6ba29 1341 x = c0;
Vanger 0:b86d15c6ba29 1342
Vanger 0:b86d15c6ba29 1343 #define COMBA_STORE2(x) \
Vanger 0:b86d15c6ba29 1344 x = c1;
Vanger 0:b86d15c6ba29 1345
Vanger 0:b86d15c6ba29 1346 #define COMBA_FINI
Vanger 0:b86d15c6ba29 1347
Vanger 0:b86d15c6ba29 1348 #define MULADD(i, j) \
Vanger 0:b86d15c6ba29 1349 do { fp_word t; \
Vanger 0:b86d15c6ba29 1350 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
Vanger 0:b86d15c6ba29 1351 t = (fp_word)c1 + (t >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 1352 c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \
Vanger 0:b86d15c6ba29 1353 } while (0);
Vanger 0:b86d15c6ba29 1354
Vanger 0:b86d15c6ba29 1355 #endif
Vanger 0:b86d15c6ba29 1356
Vanger 0:b86d15c6ba29 1357
Vanger 0:b86d15c6ba29 1358 #ifdef TFM_SMALL_SET
Vanger 0:b86d15c6ba29 1359 #include "fp_mul_comba_small_set.i"
Vanger 0:b86d15c6ba29 1360 #endif
Vanger 0:b86d15c6ba29 1361
Vanger 0:b86d15c6ba29 1362 #if defined(TFM_MUL3)
Vanger 0:b86d15c6ba29 1363 #include "fp_mul_comba_3.i"
Vanger 0:b86d15c6ba29 1364 #endif
Vanger 0:b86d15c6ba29 1365 #if defined(TFM_MUL4)
Vanger 0:b86d15c6ba29 1366 #include "fp_mul_comba_4.i"
Vanger 0:b86d15c6ba29 1367 #endif
Vanger 0:b86d15c6ba29 1368 #if defined(TFM_MUL6)
Vanger 0:b86d15c6ba29 1369 #include "fp_mul_comba_6.i"
Vanger 0:b86d15c6ba29 1370 #endif
Vanger 0:b86d15c6ba29 1371 #if defined(TFM_MUL7)
Vanger 0:b86d15c6ba29 1372 #include "fp_mul_comba_7.i"
Vanger 0:b86d15c6ba29 1373 #endif
Vanger 0:b86d15c6ba29 1374 #if defined(TFM_MUL8)
Vanger 0:b86d15c6ba29 1375 #include "fp_mul_comba_8.i"
Vanger 0:b86d15c6ba29 1376 #endif
Vanger 0:b86d15c6ba29 1377 #if defined(TFM_MUL9)
Vanger 0:b86d15c6ba29 1378 #include "fp_mul_comba_9.i"
Vanger 0:b86d15c6ba29 1379 #endif
Vanger 0:b86d15c6ba29 1380 #if defined(TFM_MUL12)
Vanger 0:b86d15c6ba29 1381 #include "fp_mul_comba_12.i"
Vanger 0:b86d15c6ba29 1382 #endif
Vanger 0:b86d15c6ba29 1383 #if defined(TFM_MUL17)
Vanger 0:b86d15c6ba29 1384 #include "fp_mul_comba_17.i"
Vanger 0:b86d15c6ba29 1385 #endif
Vanger 0:b86d15c6ba29 1386 #if defined(TFM_MUL20)
Vanger 0:b86d15c6ba29 1387 #include "fp_mul_comba_20.i"
Vanger 0:b86d15c6ba29 1388 #endif
Vanger 0:b86d15c6ba29 1389 #if defined(TFM_MUL24)
Vanger 0:b86d15c6ba29 1390 #include "fp_mul_comba_24.i"
Vanger 0:b86d15c6ba29 1391 #endif
Vanger 0:b86d15c6ba29 1392 #if defined(TFM_MUL28)
Vanger 0:b86d15c6ba29 1393 #include "fp_mul_comba_28.i"
Vanger 0:b86d15c6ba29 1394 #endif
Vanger 0:b86d15c6ba29 1395 #if defined(TFM_MUL32)
Vanger 0:b86d15c6ba29 1396 #include "fp_mul_comba_32.i"
Vanger 0:b86d15c6ba29 1397 #endif
Vanger 0:b86d15c6ba29 1398 #if defined(TFM_MUL48)
Vanger 0:b86d15c6ba29 1399 #include "fp_mul_comba_48.i"
Vanger 0:b86d15c6ba29 1400 #endif
Vanger 0:b86d15c6ba29 1401 #if defined(TFM_MUL64)
Vanger 0:b86d15c6ba29 1402 #include "fp_mul_comba_64.i"
Vanger 0:b86d15c6ba29 1403 #endif
Vanger 0:b86d15c6ba29 1404
Vanger 0:b86d15c6ba29 1405 /* end fp_mul_comba.c asm */
Vanger 0:b86d15c6ba29 1406