fork of cyassl-lib

Dependents:   TLS_cyassl TLS_cyassl

Committer:
ashleymills
Date:
Thu Sep 05 10:33:04 2013 +0000
Revision:
0:714293de3836
Initial commit

Who changed what in which revision?

UserRevisionLine numberNew contents of line
ashleymills 0:714293de3836 1 /* asm.c
ashleymills 0:714293de3836 2 *
ashleymills 0:714293de3836 3 * Copyright (C) 2006-2013 wolfSSL Inc.
ashleymills 0:714293de3836 4 *
ashleymills 0:714293de3836 5 * This file is part of CyaSSL.
ashleymills 0:714293de3836 6 *
ashleymills 0:714293de3836 7 * CyaSSL is free software; you can redistribute it and/or modify
ashleymills 0:714293de3836 8 * it under the terms of the GNU General Public License as published by
ashleymills 0:714293de3836 9 * the Free Software Foundation; either version 2 of the License, or
ashleymills 0:714293de3836 10 * (at your option) any later version.
ashleymills 0:714293de3836 11 *
ashleymills 0:714293de3836 12 * CyaSSL is distributed in the hope that it will be useful,
ashleymills 0:714293de3836 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ashleymills 0:714293de3836 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ashleymills 0:714293de3836 15 * GNU General Public License for more details.
ashleymills 0:714293de3836 16 *
ashleymills 0:714293de3836 17 * You should have received a copy of the GNU General Public License
ashleymills 0:714293de3836 18 * along with this program; if not, write to the Free Software
ashleymills 0:714293de3836 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
ashleymills 0:714293de3836 20 */
ashleymills 0:714293de3836 21
ashleymills 0:714293de3836 22 #ifdef HAVE_CONFIG_H
ashleymills 0:714293de3836 23 #include <config.h>
ashleymills 0:714293de3836 24 #endif
ashleymills 0:714293de3836 25
ashleymills 0:714293de3836 26 #include <cyassl/ctaocrypt/settings.h>
ashleymills 0:714293de3836 27
ashleymills 0:714293de3836 28 /*
ashleymills 0:714293de3836 29 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
ashleymills 0:714293de3836 30 * http://math.libtomcrypt.com
ashleymills 0:714293de3836 31 */
ashleymills 0:714293de3836 32
ashleymills 0:714293de3836 33
ashleymills 0:714293de3836 34 /******************************************************************/
ashleymills 0:714293de3836 35 /* fp_montgomery_reduce.c asm or generic */
ashleymills 0:714293de3836 36 #if defined(TFM_X86) && !defined(TFM_SSE2)
ashleymills 0:714293de3836 37 /* x86-32 code */
ashleymills 0:714293de3836 38
ashleymills 0:714293de3836 39 #define MONT_START
ashleymills 0:714293de3836 40 #define MONT_FINI
ashleymills 0:714293de3836 41 #define LOOP_END
ashleymills 0:714293de3836 42 #define LOOP_START \
ashleymills 0:714293de3836 43 mu = c[x] * mp
ashleymills 0:714293de3836 44
ashleymills 0:714293de3836 45 #define INNERMUL \
ashleymills 0:714293de3836 46 __asm__( \
ashleymills 0:714293de3836 47 "movl %5,%%eax \n\t" \
ashleymills 0:714293de3836 48 "mull %4 \n\t" \
ashleymills 0:714293de3836 49 "addl %1,%%eax \n\t" \
ashleymills 0:714293de3836 50 "adcl $0,%%edx \n\t" \
ashleymills 0:714293de3836 51 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 52 "adcl $0,%%edx \n\t" \
ashleymills 0:714293de3836 53 "movl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 54 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:714293de3836 55 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
ashleymills 0:714293de3836 56 : "%eax", "%edx", "cc")
ashleymills 0:714293de3836 57
ashleymills 0:714293de3836 58 #define PROPCARRY \
ashleymills 0:714293de3836 59 __asm__( \
ashleymills 0:714293de3836 60 "addl %1,%0 \n\t" \
ashleymills 0:714293de3836 61 "setb %%al \n\t" \
ashleymills 0:714293de3836 62 "movzbl %%al,%1 \n\t" \
ashleymills 0:714293de3836 63 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:714293de3836 64 :"0"(_c[LO]), "1"(cy) \
ashleymills 0:714293de3836 65 : "%eax", "cc")
ashleymills 0:714293de3836 66
ashleymills 0:714293de3836 67 /******************************************************************/
ashleymills 0:714293de3836 68 #elif defined(TFM_X86_64)
ashleymills 0:714293de3836 69 /* x86-64 code */
ashleymills 0:714293de3836 70
ashleymills 0:714293de3836 71 #define MONT_START
ashleymills 0:714293de3836 72 #define MONT_FINI
ashleymills 0:714293de3836 73 #define LOOP_END
ashleymills 0:714293de3836 74 #define LOOP_START \
ashleymills 0:714293de3836 75 mu = c[x] * mp
ashleymills 0:714293de3836 76
ashleymills 0:714293de3836 77 #define INNERMUL \
ashleymills 0:714293de3836 78 __asm__( \
ashleymills 0:714293de3836 79 "movq %5,%%rax \n\t" \
ashleymills 0:714293de3836 80 "mulq %4 \n\t" \
ashleymills 0:714293de3836 81 "addq %1,%%rax \n\t" \
ashleymills 0:714293de3836 82 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 83 "addq %%rax,%0 \n\t" \
ashleymills 0:714293de3836 84 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 85 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 86 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:714293de3836 87 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
ashleymills 0:714293de3836 88 : "%rax", "%rdx", "cc")
ashleymills 0:714293de3836 89
ashleymills 0:714293de3836 90 #define INNERMUL8 \
ashleymills 0:714293de3836 91 __asm__( \
ashleymills 0:714293de3836 92 "movq 0(%5),%%rax \n\t" \
ashleymills 0:714293de3836 93 "movq 0(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 94 "movq 0x8(%5),%%r11 \n\t" \
ashleymills 0:714293de3836 95 "mulq %4 \n\t" \
ashleymills 0:714293de3836 96 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 97 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 98 "movq 0x8(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 99 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 100 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 101 "movq %%rax,0(%0) \n\t" \
ashleymills 0:714293de3836 102 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 103 \
ashleymills 0:714293de3836 104 "movq %%r11,%%rax \n\t" \
ashleymills 0:714293de3836 105 "movq 0x10(%5),%%r11 \n\t" \
ashleymills 0:714293de3836 106 "mulq %4 \n\t" \
ashleymills 0:714293de3836 107 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 108 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 109 "movq 0x10(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 110 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 111 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 112 "movq %%rax,0x8(%0) \n\t" \
ashleymills 0:714293de3836 113 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 114 \
ashleymills 0:714293de3836 115 "movq %%r11,%%rax \n\t" \
ashleymills 0:714293de3836 116 "movq 0x18(%5),%%r11 \n\t" \
ashleymills 0:714293de3836 117 "mulq %4 \n\t" \
ashleymills 0:714293de3836 118 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 119 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 120 "movq 0x18(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 121 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 122 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 123 "movq %%rax,0x10(%0) \n\t" \
ashleymills 0:714293de3836 124 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 125 \
ashleymills 0:714293de3836 126 "movq %%r11,%%rax \n\t" \
ashleymills 0:714293de3836 127 "movq 0x20(%5),%%r11 \n\t" \
ashleymills 0:714293de3836 128 "mulq %4 \n\t" \
ashleymills 0:714293de3836 129 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 130 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 131 "movq 0x20(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 132 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 133 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 134 "movq %%rax,0x18(%0) \n\t" \
ashleymills 0:714293de3836 135 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 136 \
ashleymills 0:714293de3836 137 "movq %%r11,%%rax \n\t" \
ashleymills 0:714293de3836 138 "movq 0x28(%5),%%r11 \n\t" \
ashleymills 0:714293de3836 139 "mulq %4 \n\t" \
ashleymills 0:714293de3836 140 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 141 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 142 "movq 0x28(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 143 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 144 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 145 "movq %%rax,0x20(%0) \n\t" \
ashleymills 0:714293de3836 146 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 147 \
ashleymills 0:714293de3836 148 "movq %%r11,%%rax \n\t" \
ashleymills 0:714293de3836 149 "movq 0x30(%5),%%r11 \n\t" \
ashleymills 0:714293de3836 150 "mulq %4 \n\t" \
ashleymills 0:714293de3836 151 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 152 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 153 "movq 0x30(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 154 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 155 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 156 "movq %%rax,0x28(%0) \n\t" \
ashleymills 0:714293de3836 157 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 158 \
ashleymills 0:714293de3836 159 "movq %%r11,%%rax \n\t" \
ashleymills 0:714293de3836 160 "movq 0x38(%5),%%r11 \n\t" \
ashleymills 0:714293de3836 161 "mulq %4 \n\t" \
ashleymills 0:714293de3836 162 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 163 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 164 "movq 0x38(%2),%%r10 \n\t" \
ashleymills 0:714293de3836 165 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 166 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 167 "movq %%rax,0x30(%0) \n\t" \
ashleymills 0:714293de3836 168 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 169 \
ashleymills 0:714293de3836 170 "movq %%r11,%%rax \n\t" \
ashleymills 0:714293de3836 171 "mulq %4 \n\t" \
ashleymills 0:714293de3836 172 "addq %%r10,%%rax \n\t" \
ashleymills 0:714293de3836 173 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 174 "addq %3,%%rax \n\t" \
ashleymills 0:714293de3836 175 "adcq $0,%%rdx \n\t" \
ashleymills 0:714293de3836 176 "movq %%rax,0x38(%0) \n\t" \
ashleymills 0:714293de3836 177 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 178 \
ashleymills 0:714293de3836 179 :"=r"(_c), "=r"(cy) \
ashleymills 0:714293de3836 180 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
ashleymills 0:714293de3836 181 : "%rax", "%rdx", "%r10", "%r11", "cc")
ashleymills 0:714293de3836 182
ashleymills 0:714293de3836 183
ashleymills 0:714293de3836 184 #define PROPCARRY \
ashleymills 0:714293de3836 185 __asm__( \
ashleymills 0:714293de3836 186 "addq %1,%0 \n\t" \
ashleymills 0:714293de3836 187 "setb %%al \n\t" \
ashleymills 0:714293de3836 188 "movzbq %%al,%1 \n\t" \
ashleymills 0:714293de3836 189 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:714293de3836 190 :"0"(_c[LO]), "1"(cy) \
ashleymills 0:714293de3836 191 : "%rax", "cc")
ashleymills 0:714293de3836 192
ashleymills 0:714293de3836 193 /******************************************************************/
ashleymills 0:714293de3836 194 #elif defined(TFM_SSE2)
ashleymills 0:714293de3836 195 /* SSE2 code (assumes 32-bit fp_digits) */
ashleymills 0:714293de3836 196 /* XMM register assignments:
ashleymills 0:714293de3836 197 * xmm0 *tmpm++, then Mu * (*tmpm++)
ashleymills 0:714293de3836 198 * xmm1 c[x], then Mu
ashleymills 0:714293de3836 199 * xmm2 mp
ashleymills 0:714293de3836 200 * xmm3 cy
ashleymills 0:714293de3836 201 * xmm4 _c[LO]
ashleymills 0:714293de3836 202 */
ashleymills 0:714293de3836 203
ashleymills 0:714293de3836 204 #define MONT_START \
ashleymills 0:714293de3836 205 __asm__("movd %0,%%mm2"::"g"(mp))
ashleymills 0:714293de3836 206
ashleymills 0:714293de3836 207 #define MONT_FINI \
ashleymills 0:714293de3836 208 __asm__("emms")
ashleymills 0:714293de3836 209
ashleymills 0:714293de3836 210 #define LOOP_START \
ashleymills 0:714293de3836 211 __asm__( \
ashleymills 0:714293de3836 212 "movd %0,%%mm1 \n\t" \
ashleymills 0:714293de3836 213 "pxor %%mm3,%%mm3 \n\t" \
ashleymills 0:714293de3836 214 "pmuludq %%mm2,%%mm1 \n\t" \
ashleymills 0:714293de3836 215 :: "g"(c[x]))
ashleymills 0:714293de3836 216
ashleymills 0:714293de3836 217 /* pmuludq on mmx registers does a 32x32->64 multiply. */
ashleymills 0:714293de3836 218 #define INNERMUL \
ashleymills 0:714293de3836 219 __asm__( \
ashleymills 0:714293de3836 220 "movd %1,%%mm4 \n\t" \
ashleymills 0:714293de3836 221 "movd %2,%%mm0 \n\t" \
ashleymills 0:714293de3836 222 "paddq %%mm4,%%mm3 \n\t" \
ashleymills 0:714293de3836 223 "pmuludq %%mm1,%%mm0 \n\t" \
ashleymills 0:714293de3836 224 "paddq %%mm0,%%mm3 \n\t" \
ashleymills 0:714293de3836 225 "movd %%mm3,%0 \n\t" \
ashleymills 0:714293de3836 226 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 227 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
ashleymills 0:714293de3836 228
ashleymills 0:714293de3836 229 #define INNERMUL8 \
ashleymills 0:714293de3836 230 __asm__( \
ashleymills 0:714293de3836 231 "movd 0(%1),%%mm4 \n\t" \
ashleymills 0:714293de3836 232 "movd 0(%2),%%mm0 \n\t" \
ashleymills 0:714293de3836 233 "paddq %%mm4,%%mm3 \n\t" \
ashleymills 0:714293de3836 234 "pmuludq %%mm1,%%mm0 \n\t" \
ashleymills 0:714293de3836 235 "movd 4(%2),%%mm5 \n\t" \
ashleymills 0:714293de3836 236 "paddq %%mm0,%%mm3 \n\t" \
ashleymills 0:714293de3836 237 "movd 4(%1),%%mm6 \n\t" \
ashleymills 0:714293de3836 238 "movd %%mm3,0(%0) \n\t" \
ashleymills 0:714293de3836 239 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 240 \
ashleymills 0:714293de3836 241 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:714293de3836 242 "pmuludq %%mm1,%%mm5 \n\t" \
ashleymills 0:714293de3836 243 "movd 8(%2),%%mm6 \n\t" \
ashleymills 0:714293de3836 244 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:714293de3836 245 "movd 8(%1),%%mm7 \n\t" \
ashleymills 0:714293de3836 246 "movd %%mm3,4(%0) \n\t" \
ashleymills 0:714293de3836 247 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 248 \
ashleymills 0:714293de3836 249 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:714293de3836 250 "pmuludq %%mm1,%%mm6 \n\t" \
ashleymills 0:714293de3836 251 "movd 12(%2),%%mm7 \n\t" \
ashleymills 0:714293de3836 252 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:714293de3836 253 "movd 12(%1),%%mm5 \n\t" \
ashleymills 0:714293de3836 254 "movd %%mm3,8(%0) \n\t" \
ashleymills 0:714293de3836 255 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 256 \
ashleymills 0:714293de3836 257 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:714293de3836 258 "pmuludq %%mm1,%%mm7 \n\t" \
ashleymills 0:714293de3836 259 "movd 16(%2),%%mm5 \n\t" \
ashleymills 0:714293de3836 260 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:714293de3836 261 "movd 16(%1),%%mm6 \n\t" \
ashleymills 0:714293de3836 262 "movd %%mm3,12(%0) \n\t" \
ashleymills 0:714293de3836 263 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 264 \
ashleymills 0:714293de3836 265 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:714293de3836 266 "pmuludq %%mm1,%%mm5 \n\t" \
ashleymills 0:714293de3836 267 "movd 20(%2),%%mm6 \n\t" \
ashleymills 0:714293de3836 268 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:714293de3836 269 "movd 20(%1),%%mm7 \n\t" \
ashleymills 0:714293de3836 270 "movd %%mm3,16(%0) \n\t" \
ashleymills 0:714293de3836 271 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 272 \
ashleymills 0:714293de3836 273 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:714293de3836 274 "pmuludq %%mm1,%%mm6 \n\t" \
ashleymills 0:714293de3836 275 "movd 24(%2),%%mm7 \n\t" \
ashleymills 0:714293de3836 276 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:714293de3836 277 "movd 24(%1),%%mm5 \n\t" \
ashleymills 0:714293de3836 278 "movd %%mm3,20(%0) \n\t" \
ashleymills 0:714293de3836 279 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 280 \
ashleymills 0:714293de3836 281 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:714293de3836 282 "pmuludq %%mm1,%%mm7 \n\t" \
ashleymills 0:714293de3836 283 "movd 28(%2),%%mm5 \n\t" \
ashleymills 0:714293de3836 284 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:714293de3836 285 "movd 28(%1),%%mm6 \n\t" \
ashleymills 0:714293de3836 286 "movd %%mm3,24(%0) \n\t" \
ashleymills 0:714293de3836 287 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 288 \
ashleymills 0:714293de3836 289 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:714293de3836 290 "pmuludq %%mm1,%%mm5 \n\t" \
ashleymills 0:714293de3836 291 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:714293de3836 292 "movd %%mm3,28(%0) \n\t" \
ashleymills 0:714293de3836 293 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:714293de3836 294 :"=r"(_c) : "0"(_c), "r"(tmpm) );
ashleymills 0:714293de3836 295
ashleymills 0:714293de3836 296 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack
ashleymills 0:714293de3836 297 pointer */
ashleymills 0:714293de3836 298
ashleymills 0:714293de3836 299 #define LOOP_END \
ashleymills 0:714293de3836 300 __asm__( "movd %%mm3,%0 \n" :"=r"(cy))
ashleymills 0:714293de3836 301
ashleymills 0:714293de3836 302 #define PROPCARRY \
ashleymills 0:714293de3836 303 __asm__( \
ashleymills 0:714293de3836 304 "addl %1,%0 \n\t" \
ashleymills 0:714293de3836 305 "setb %%al \n\t" \
ashleymills 0:714293de3836 306 "movzbl %%al,%1 \n\t" \
ashleymills 0:714293de3836 307 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:714293de3836 308 :"0"(_c[LO]), "1"(cy) \
ashleymills 0:714293de3836 309 : "%eax", "cc")
ashleymills 0:714293de3836 310
ashleymills 0:714293de3836 311 /******************************************************************/
ashleymills 0:714293de3836 312 #elif defined(TFM_ARM)
ashleymills 0:714293de3836 313 /* ARMv4 code */
ashleymills 0:714293de3836 314
ashleymills 0:714293de3836 315 #define MONT_START
ashleymills 0:714293de3836 316 #define MONT_FINI
ashleymills 0:714293de3836 317 #define LOOP_END
ashleymills 0:714293de3836 318 #define LOOP_START \
ashleymills 0:714293de3836 319 mu = c[x] * mp
ashleymills 0:714293de3836 320
ashleymills 0:714293de3836 321 #define INNERMUL \
ashleymills 0:714293de3836 322 __asm__( \
ashleymills 0:714293de3836 323 " LDR r0,%1 \n\t" \
ashleymills 0:714293de3836 324 " ADDS r0,r0,%0 \n\t" \
ashleymills 0:714293de3836 325 " MOVCS %0,#1 \n\t" \
ashleymills 0:714293de3836 326 " MOVCC %0,#0 \n\t" \
ashleymills 0:714293de3836 327 " UMLAL r0,%0,%3,%4 \n\t" \
ashleymills 0:714293de3836 328 " STR r0,%1 \n\t" \
ashleymills 0:714293de3836 329 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","cc");
ashleymills 0:714293de3836 330
ashleymills 0:714293de3836 331 #define PROPCARRY \
ashleymills 0:714293de3836 332 __asm__( \
ashleymills 0:714293de3836 333 " LDR r0,%1 \n\t" \
ashleymills 0:714293de3836 334 " ADDS r0,r0,%0 \n\t" \
ashleymills 0:714293de3836 335 " STR r0,%1 \n\t" \
ashleymills 0:714293de3836 336 " MOVCS %0,#1 \n\t" \
ashleymills 0:714293de3836 337 " MOVCC %0,#0 \n\t" \
ashleymills 0:714293de3836 338 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","cc");
ashleymills 0:714293de3836 339
ashleymills 0:714293de3836 340 #elif defined(TFM_PPC32)
ashleymills 0:714293de3836 341
ashleymills 0:714293de3836 342 /* PPC32 */
ashleymills 0:714293de3836 343 #define MONT_START
ashleymills 0:714293de3836 344 #define MONT_FINI
ashleymills 0:714293de3836 345 #define LOOP_END
ashleymills 0:714293de3836 346 #define LOOP_START \
ashleymills 0:714293de3836 347 mu = c[x] * mp
ashleymills 0:714293de3836 348
ashleymills 0:714293de3836 349 #define INNERMUL \
ashleymills 0:714293de3836 350 __asm__( \
ashleymills 0:714293de3836 351 " mullw 16,%3,%4 \n\t" \
ashleymills 0:714293de3836 352 " mulhwu 17,%3,%4 \n\t" \
ashleymills 0:714293de3836 353 " addc 16,16,%0 \n\t" \
ashleymills 0:714293de3836 354 " addze 17,17 \n\t" \
ashleymills 0:714293de3836 355 " lwz 18,%1 \n\t" \
ashleymills 0:714293de3836 356 " addc 16,16,18 \n\t" \
ashleymills 0:714293de3836 357 " addze %0,17 \n\t" \
ashleymills 0:714293de3836 358 " stw 16,%1 \n\t" \
ashleymills 0:714293de3836 359 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
ashleymills 0:714293de3836 360
ashleymills 0:714293de3836 361 #define PROPCARRY \
ashleymills 0:714293de3836 362 __asm__( \
ashleymills 0:714293de3836 363 " lwz 16,%1 \n\t" \
ashleymills 0:714293de3836 364 " addc 16,16,%0 \n\t" \
ashleymills 0:714293de3836 365 " stw 16,%1 \n\t" \
ashleymills 0:714293de3836 366 " xor %0,%0,%0 \n\t" \
ashleymills 0:714293de3836 367 " addze %0,%0 \n\t" \
ashleymills 0:714293de3836 368 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
ashleymills 0:714293de3836 369
ashleymills 0:714293de3836 370 #elif defined(TFM_PPC64)
ashleymills 0:714293de3836 371
ashleymills 0:714293de3836 372 /* PPC64 */
ashleymills 0:714293de3836 373 #define MONT_START
ashleymills 0:714293de3836 374 #define MONT_FINI
ashleymills 0:714293de3836 375 #define LOOP_END
ashleymills 0:714293de3836 376 #define LOOP_START \
ashleymills 0:714293de3836 377 mu = c[x] * mp
ashleymills 0:714293de3836 378
ashleymills 0:714293de3836 379 #define INNERMUL \
ashleymills 0:714293de3836 380 __asm__( \
ashleymills 0:714293de3836 381 " mulld 16,%3,%4 \n\t" \
ashleymills 0:714293de3836 382 " mulhdu 17,%3,%4 \n\t" \
ashleymills 0:714293de3836 383 " addc 16,16,%0 \n\t" \
ashleymills 0:714293de3836 384 " addze 17,17 \n\t" \
ashleymills 0:714293de3836 385 " ldx 18,0,%1 \n\t" \
ashleymills 0:714293de3836 386 " addc 16,16,18 \n\t" \
ashleymills 0:714293de3836 387 " addze %0,17 \n\t" \
ashleymills 0:714293de3836 388 " sdx 16,0,%1 \n\t" \
ashleymills 0:714293de3836 389 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","cc"); ++tmpm;
ashleymills 0:714293de3836 390
ashleymills 0:714293de3836 391 #define PROPCARRY \
ashleymills 0:714293de3836 392 __asm__( \
ashleymills 0:714293de3836 393 " ldx 16,0,%1 \n\t" \
ashleymills 0:714293de3836 394 " addc 16,16,%0 \n\t" \
ashleymills 0:714293de3836 395 " sdx 16,0,%1 \n\t" \
ashleymills 0:714293de3836 396 " xor %0,%0,%0 \n\t" \
ashleymills 0:714293de3836 397 " addze %0,%0 \n\t" \
ashleymills 0:714293de3836 398 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","cc");
ashleymills 0:714293de3836 399
ashleymills 0:714293de3836 400 /******************************************************************/
ashleymills 0:714293de3836 401
ashleymills 0:714293de3836 402 #elif defined(TFM_AVR32)
ashleymills 0:714293de3836 403
ashleymills 0:714293de3836 404 /* AVR32 */
ashleymills 0:714293de3836 405 #define MONT_START
ashleymills 0:714293de3836 406 #define MONT_FINI
ashleymills 0:714293de3836 407 #define LOOP_END
ashleymills 0:714293de3836 408 #define LOOP_START \
ashleymills 0:714293de3836 409 mu = c[x] * mp
ashleymills 0:714293de3836 410
ashleymills 0:714293de3836 411 #define INNERMUL \
ashleymills 0:714293de3836 412 __asm__( \
ashleymills 0:714293de3836 413 " ld.w r2,%1 \n\t" \
ashleymills 0:714293de3836 414 " add r2,%0 \n\t" \
ashleymills 0:714293de3836 415 " eor r3,r3 \n\t" \
ashleymills 0:714293de3836 416 " acr r3 \n\t" \
ashleymills 0:714293de3836 417 " macu.d r2,%3,%4 \n\t" \
ashleymills 0:714293de3836 418 " st.w %1,r2 \n\t" \
ashleymills 0:714293de3836 419 " mov %0,r3 \n\t" \
ashleymills 0:714293de3836 420 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
ashleymills 0:714293de3836 421
ashleymills 0:714293de3836 422 #define PROPCARRY \
ashleymills 0:714293de3836 423 __asm__( \
ashleymills 0:714293de3836 424 " ld.w r2,%1 \n\t" \
ashleymills 0:714293de3836 425 " add r2,%0 \n\t" \
ashleymills 0:714293de3836 426 " st.w %1,r2 \n\t" \
ashleymills 0:714293de3836 427 " eor %0,%0 \n\t" \
ashleymills 0:714293de3836 428 " acr %0 \n\t" \
ashleymills 0:714293de3836 429 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","cc");
ashleymills 0:714293de3836 430
ashleymills 0:714293de3836 431 #else
ashleymills 0:714293de3836 432
ashleymills 0:714293de3836 433 /* ISO C code */
ashleymills 0:714293de3836 434 #define MONT_START
ashleymills 0:714293de3836 435 #define MONT_FINI
ashleymills 0:714293de3836 436 #define LOOP_END
ashleymills 0:714293de3836 437 #define LOOP_START \
ashleymills 0:714293de3836 438 mu = c[x] * mp
ashleymills 0:714293de3836 439
ashleymills 0:714293de3836 440 #define INNERMUL \
ashleymills 0:714293de3836 441 do { fp_word t; \
ashleymills 0:714293de3836 442 t = ((fp_word)_c[0] + (fp_word)cy) + \
ashleymills 0:714293de3836 443 (((fp_word)mu) * ((fp_word)*tmpm++)); \
ashleymills 0:714293de3836 444 _c[0] = (fp_digit)t; \
ashleymills 0:714293de3836 445 cy = (fp_digit)(t >> DIGIT_BIT); \
ashleymills 0:714293de3836 446 } while (0)
ashleymills 0:714293de3836 447
ashleymills 0:714293de3836 448 #define PROPCARRY \
ashleymills 0:714293de3836 449 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
ashleymills 0:714293de3836 450
ashleymills 0:714293de3836 451 #endif
ashleymills 0:714293de3836 452 /******************************************************************/
ashleymills 0:714293de3836 453
ashleymills 0:714293de3836 454
ashleymills 0:714293de3836 455 #define LO 0
ashleymills 0:714293de3836 456 /* end fp_montogomery_reduce.c asm */
ashleymills 0:714293de3836 457
ashleymills 0:714293de3836 458
ashleymills 0:714293de3836 459 /* start fp_sqr_comba.c asm */
ashleymills 0:714293de3836 460 #if defined(TFM_X86)
ashleymills 0:714293de3836 461
ashleymills 0:714293de3836 462 /* x86-32 optimized */
ashleymills 0:714293de3836 463
ashleymills 0:714293de3836 464 #define COMBA_START
ashleymills 0:714293de3836 465
ashleymills 0:714293de3836 466 #define CLEAR_CARRY \
ashleymills 0:714293de3836 467 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 468
ashleymills 0:714293de3836 469 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 470 x = c0;
ashleymills 0:714293de3836 471
ashleymills 0:714293de3836 472 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 473 x = c1;
ashleymills 0:714293de3836 474
ashleymills 0:714293de3836 475 #define CARRY_FORWARD \
ashleymills 0:714293de3836 476 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 477
ashleymills 0:714293de3836 478 #define COMBA_FINI
ashleymills 0:714293de3836 479
ashleymills 0:714293de3836 480 #define SQRADD(i, j) \
ashleymills 0:714293de3836 481 __asm__( \
ashleymills 0:714293de3836 482 "movl %6,%%eax \n\t" \
ashleymills 0:714293de3836 483 "mull %%eax \n\t" \
ashleymills 0:714293de3836 484 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 485 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 486 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 487 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
ashleymills 0:714293de3836 488
ashleymills 0:714293de3836 489 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 490 __asm__( \
ashleymills 0:714293de3836 491 "movl %6,%%eax \n\t" \
ashleymills 0:714293de3836 492 "mull %7 \n\t" \
ashleymills 0:714293de3836 493 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 494 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 495 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 496 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 497 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 498 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 499 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "cc");
ashleymills 0:714293de3836 500
ashleymills 0:714293de3836 501 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 502 __asm__( \
ashleymills 0:714293de3836 503 "movl %3,%%eax \n\t" \
ashleymills 0:714293de3836 504 "mull %4 \n\t" \
ashleymills 0:714293de3836 505 "movl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 506 "movl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 507 "xorl %2,%2 \n\t" \
ashleymills 0:714293de3836 508 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","cc");
ashleymills 0:714293de3836 509
ashleymills 0:714293de3836 510 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
ashleymills 0:714293de3836 511
ashleymills 0:714293de3836 512 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 513 __asm__( \
ashleymills 0:714293de3836 514 "movl %6,%%eax \n\t" \
ashleymills 0:714293de3836 515 "mull %7 \n\t" \
ashleymills 0:714293de3836 516 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 517 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 518 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 519 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
ashleymills 0:714293de3836 520
ashleymills 0:714293de3836 521 #define SQRADDDB \
ashleymills 0:714293de3836 522 __asm__( \
ashleymills 0:714293de3836 523 "addl %6,%0 \n\t" \
ashleymills 0:714293de3836 524 "adcl %7,%1 \n\t" \
ashleymills 0:714293de3836 525 "adcl %8,%2 \n\t" \
ashleymills 0:714293de3836 526 "addl %6,%0 \n\t" \
ashleymills 0:714293de3836 527 "adcl %7,%1 \n\t" \
ashleymills 0:714293de3836 528 "adcl %8,%2 \n\t" \
ashleymills 0:714293de3836 529 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
ashleymills 0:714293de3836 530
ashleymills 0:714293de3836 531 #elif defined(TFM_X86_64)
ashleymills 0:714293de3836 532 /* x86-64 optimized */
ashleymills 0:714293de3836 533
ashleymills 0:714293de3836 534 #define COMBA_START
ashleymills 0:714293de3836 535
ashleymills 0:714293de3836 536 #define CLEAR_CARRY \
ashleymills 0:714293de3836 537 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 538
ashleymills 0:714293de3836 539 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 540 x = c0;
ashleymills 0:714293de3836 541
ashleymills 0:714293de3836 542 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 543 x = c1;
ashleymills 0:714293de3836 544
ashleymills 0:714293de3836 545 #define CARRY_FORWARD \
ashleymills 0:714293de3836 546 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 547
ashleymills 0:714293de3836 548 #define COMBA_FINI
ashleymills 0:714293de3836 549
ashleymills 0:714293de3836 550 #define SQRADD(i, j) \
ashleymills 0:714293de3836 551 __asm__( \
ashleymills 0:714293de3836 552 "movq %6,%%rax \n\t" \
ashleymills 0:714293de3836 553 "mulq %%rax \n\t" \
ashleymills 0:714293de3836 554 "addq %%rax,%0 \n\t" \
ashleymills 0:714293de3836 555 "adcq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 556 "adcq $0,%2 \n\t" \
ashleymills 0:714293de3836 557 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
ashleymills 0:714293de3836 558
ashleymills 0:714293de3836 559 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 560 __asm__( \
ashleymills 0:714293de3836 561 "movq %6,%%rax \n\t" \
ashleymills 0:714293de3836 562 "mulq %7 \n\t" \
ashleymills 0:714293de3836 563 "addq %%rax,%0 \n\t" \
ashleymills 0:714293de3836 564 "adcq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 565 "adcq $0,%2 \n\t" \
ashleymills 0:714293de3836 566 "addq %%rax,%0 \n\t" \
ashleymills 0:714293de3836 567 "adcq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 568 "adcq $0,%2 \n\t" \
ashleymills 0:714293de3836 569 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
ashleymills 0:714293de3836 570
ashleymills 0:714293de3836 571 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 572 __asm__( \
ashleymills 0:714293de3836 573 "movq %3,%%rax \n\t" \
ashleymills 0:714293de3836 574 "mulq %4 \n\t" \
ashleymills 0:714293de3836 575 "movq %%rax,%0 \n\t" \
ashleymills 0:714293de3836 576 "movq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 577 "xorq %2,%2 \n\t" \
ashleymills 0:714293de3836 578 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","cc");
ashleymills 0:714293de3836 579
ashleymills 0:714293de3836 580 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
ashleymills 0:714293de3836 581
ashleymills 0:714293de3836 582 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 583 __asm__( \
ashleymills 0:714293de3836 584 "movq %6,%%rax \n\t" \
ashleymills 0:714293de3836 585 "mulq %7 \n\t" \
ashleymills 0:714293de3836 586 "addq %%rax,%0 \n\t" \
ashleymills 0:714293de3836 587 "adcq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 588 "adcq $0,%2 \n\t" \
ashleymills 0:714293de3836 589 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
ashleymills 0:714293de3836 590
ashleymills 0:714293de3836 591 #define SQRADDDB \
ashleymills 0:714293de3836 592 __asm__( \
ashleymills 0:714293de3836 593 "addq %6,%0 \n\t" \
ashleymills 0:714293de3836 594 "adcq %7,%1 \n\t" \
ashleymills 0:714293de3836 595 "adcq %8,%2 \n\t" \
ashleymills 0:714293de3836 596 "addq %6,%0 \n\t" \
ashleymills 0:714293de3836 597 "adcq %7,%1 \n\t" \
ashleymills 0:714293de3836 598 "adcq %8,%2 \n\t" \
ashleymills 0:714293de3836 599 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
ashleymills 0:714293de3836 600
ashleymills 0:714293de3836 601 #elif defined(TFM_SSE2)
ashleymills 0:714293de3836 602
ashleymills 0:714293de3836 603 /* SSE2 Optimized */
ashleymills 0:714293de3836 604 #define COMBA_START
ashleymills 0:714293de3836 605
ashleymills 0:714293de3836 606 #define CLEAR_CARRY \
ashleymills 0:714293de3836 607 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 608
ashleymills 0:714293de3836 609 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 610 x = c0;
ashleymills 0:714293de3836 611
ashleymills 0:714293de3836 612 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 613 x = c1;
ashleymills 0:714293de3836 614
ashleymills 0:714293de3836 615 #define CARRY_FORWARD \
ashleymills 0:714293de3836 616 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 617
ashleymills 0:714293de3836 618 #define COMBA_FINI \
ashleymills 0:714293de3836 619 __asm__("emms");
ashleymills 0:714293de3836 620
ashleymills 0:714293de3836 621 #define SQRADD(i, j) \
ashleymills 0:714293de3836 622 __asm__( \
ashleymills 0:714293de3836 623 "movd %6,%%mm0 \n\t" \
ashleymills 0:714293de3836 624 "pmuludq %%mm0,%%mm0\n\t" \
ashleymills 0:714293de3836 625 "movd %%mm0,%%eax \n\t" \
ashleymills 0:714293de3836 626 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:714293de3836 627 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 628 "movd %%mm0,%%eax \n\t" \
ashleymills 0:714293de3836 629 "adcl %%eax,%1 \n\t" \
ashleymills 0:714293de3836 630 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 631 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
ashleymills 0:714293de3836 632
ashleymills 0:714293de3836 633 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 634 __asm__( \
ashleymills 0:714293de3836 635 "movd %6,%%mm0 \n\t" \
ashleymills 0:714293de3836 636 "movd %7,%%mm1 \n\t" \
ashleymills 0:714293de3836 637 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:714293de3836 638 "movd %%mm0,%%eax \n\t" \
ashleymills 0:714293de3836 639 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:714293de3836 640 "movd %%mm0,%%edx \n\t" \
ashleymills 0:714293de3836 641 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 642 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 643 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 644 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 645 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 646 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 647 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
ashleymills 0:714293de3836 648
ashleymills 0:714293de3836 649 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 650 __asm__( \
ashleymills 0:714293de3836 651 "movd %3,%%mm0 \n\t" \
ashleymills 0:714293de3836 652 "movd %4,%%mm1 \n\t" \
ashleymills 0:714293de3836 653 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:714293de3836 654 "movd %%mm0,%0 \n\t" \
ashleymills 0:714293de3836 655 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:714293de3836 656 "movd %%mm0,%1 \n\t" \
ashleymills 0:714293de3836 657 "xorl %2,%2 \n\t" \
ashleymills 0:714293de3836 658 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j));
ashleymills 0:714293de3836 659
ashleymills 0:714293de3836 660 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
ashleymills 0:714293de3836 661
ashleymills 0:714293de3836 662 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 663 __asm__( \
ashleymills 0:714293de3836 664 "movd %6,%%mm0 \n\t" \
ashleymills 0:714293de3836 665 "movd %7,%%mm1 \n\t" \
ashleymills 0:714293de3836 666 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:714293de3836 667 "movd %%mm0,%%eax \n\t" \
ashleymills 0:714293de3836 668 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:714293de3836 669 "movd %%mm0,%%edx \n\t" \
ashleymills 0:714293de3836 670 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 671 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 672 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 673 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc");
ashleymills 0:714293de3836 674
ashleymills 0:714293de3836 675 #define SQRADDDB \
ashleymills 0:714293de3836 676 __asm__( \
ashleymills 0:714293de3836 677 "addl %6,%0 \n\t" \
ashleymills 0:714293de3836 678 "adcl %7,%1 \n\t" \
ashleymills 0:714293de3836 679 "adcl %8,%2 \n\t" \
ashleymills 0:714293de3836 680 "addl %6,%0 \n\t" \
ashleymills 0:714293de3836 681 "adcl %7,%1 \n\t" \
ashleymills 0:714293de3836 682 "adcl %8,%2 \n\t" \
ashleymills 0:714293de3836 683 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
ashleymills 0:714293de3836 684
ashleymills 0:714293de3836 685 #elif defined(TFM_ARM)
ashleymills 0:714293de3836 686
ashleymills 0:714293de3836 687 /* ARM code */
ashleymills 0:714293de3836 688
ashleymills 0:714293de3836 689 #define COMBA_START
ashleymills 0:714293de3836 690
ashleymills 0:714293de3836 691 #define CLEAR_CARRY \
ashleymills 0:714293de3836 692 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 693
ashleymills 0:714293de3836 694 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 695 x = c0;
ashleymills 0:714293de3836 696
ashleymills 0:714293de3836 697 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 698 x = c1;
ashleymills 0:714293de3836 699
ashleymills 0:714293de3836 700 #define CARRY_FORWARD \
ashleymills 0:714293de3836 701 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 702
ashleymills 0:714293de3836 703 #define COMBA_FINI
ashleymills 0:714293de3836 704
ashleymills 0:714293de3836 705 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:714293de3836 706 #define SQRADD(i, j) \
ashleymills 0:714293de3836 707 __asm__( \
ashleymills 0:714293de3836 708 " UMULL r0,r1,%6,%6 \n\t" \
ashleymills 0:714293de3836 709 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:714293de3836 710 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:714293de3836 711 " ADC %2,%2,#0 \n\t" \
ashleymills 0:714293de3836 712 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
ashleymills 0:714293de3836 713
ashleymills 0:714293de3836 714 /* for squaring some of the terms are doubled... */
ashleymills 0:714293de3836 715 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 716 __asm__( \
ashleymills 0:714293de3836 717 " UMULL r0,r1,%6,%7 \n\t" \
ashleymills 0:714293de3836 718 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:714293de3836 719 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:714293de3836 720 " ADC %2,%2,#0 \n\t" \
ashleymills 0:714293de3836 721 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:714293de3836 722 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:714293de3836 723 " ADC %2,%2,#0 \n\t" \
ashleymills 0:714293de3836 724 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
ashleymills 0:714293de3836 725
ashleymills 0:714293de3836 726 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 727 __asm__( \
ashleymills 0:714293de3836 728 " UMULL %0,%1,%6,%7 \n\t" \
ashleymills 0:714293de3836 729 " SUB %2,%2,%2 \n\t" \
ashleymills 0:714293de3836 730 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "cc");
ashleymills 0:714293de3836 731
ashleymills 0:714293de3836 732 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 733 __asm__( \
ashleymills 0:714293de3836 734 " UMULL r0,r1,%6,%7 \n\t" \
ashleymills 0:714293de3836 735 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:714293de3836 736 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:714293de3836 737 " ADC %2,%2,#0 \n\t" \
ashleymills 0:714293de3836 738 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
ashleymills 0:714293de3836 739
ashleymills 0:714293de3836 740 #define SQRADDDB \
ashleymills 0:714293de3836 741 __asm__( \
ashleymills 0:714293de3836 742 " ADDS %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 743 " ADCS %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 744 " ADC %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 745 " ADDS %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 746 " ADCS %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 747 " ADC %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 748 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
ashleymills 0:714293de3836 749
ashleymills 0:714293de3836 750 #elif defined(TFM_PPC32)
ashleymills 0:714293de3836 751
ashleymills 0:714293de3836 752 /* PPC32 */
ashleymills 0:714293de3836 753
ashleymills 0:714293de3836 754 #define COMBA_START
ashleymills 0:714293de3836 755
ashleymills 0:714293de3836 756 #define CLEAR_CARRY \
ashleymills 0:714293de3836 757 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 758
ashleymills 0:714293de3836 759 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 760 x = c0;
ashleymills 0:714293de3836 761
ashleymills 0:714293de3836 762 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 763 x = c1;
ashleymills 0:714293de3836 764
ashleymills 0:714293de3836 765 #define CARRY_FORWARD \
ashleymills 0:714293de3836 766 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 767
ashleymills 0:714293de3836 768 #define COMBA_FINI
ashleymills 0:714293de3836 769
ashleymills 0:714293de3836 770 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:714293de3836 771 #define SQRADD(i, j) \
ashleymills 0:714293de3836 772 __asm__( \
ashleymills 0:714293de3836 773 " mullw 16,%6,%6 \n\t" \
ashleymills 0:714293de3836 774 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 775 " mulhwu 16,%6,%6 \n\t" \
ashleymills 0:714293de3836 776 " adde %1,%1,16 \n\t" \
ashleymills 0:714293de3836 777 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 778 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
ashleymills 0:714293de3836 779
ashleymills 0:714293de3836 780 /* for squaring some of the terms are doubled... */
ashleymills 0:714293de3836 781 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 782 __asm__( \
ashleymills 0:714293de3836 783 " mullw 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 784 " mulhwu 17,%6,%7 \n\t" \
ashleymills 0:714293de3836 785 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 786 " adde %1,%1,17 \n\t" \
ashleymills 0:714293de3836 787 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 788 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 789 " adde %1,%1,17 \n\t" \
ashleymills 0:714293de3836 790 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 791 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
ashleymills 0:714293de3836 792
ashleymills 0:714293de3836 793 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 794 __asm__( \
ashleymills 0:714293de3836 795 " mullw %0,%6,%7 \n\t" \
ashleymills 0:714293de3836 796 " mulhwu %1,%6,%7 \n\t" \
ashleymills 0:714293de3836 797 " xor %2,%2,%2 \n\t" \
ashleymills 0:714293de3836 798 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
ashleymills 0:714293de3836 799
ashleymills 0:714293de3836 800 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 801 __asm__( \
ashleymills 0:714293de3836 802 " mullw 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 803 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 804 " mulhwu 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 805 " adde %1,%1,16 \n\t" \
ashleymills 0:714293de3836 806 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 807 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
ashleymills 0:714293de3836 808
ashleymills 0:714293de3836 809 #define SQRADDDB \
ashleymills 0:714293de3836 810 __asm__( \
ashleymills 0:714293de3836 811 " addc %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 812 " adde %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 813 " adde %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 814 " addc %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 815 " adde %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 816 " adde %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 817 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
ashleymills 0:714293de3836 818
ashleymills 0:714293de3836 819 #elif defined(TFM_PPC64)
ashleymills 0:714293de3836 820 /* PPC64 */
ashleymills 0:714293de3836 821
ashleymills 0:714293de3836 822 #define COMBA_START
ashleymills 0:714293de3836 823
ashleymills 0:714293de3836 824 #define CLEAR_CARRY \
ashleymills 0:714293de3836 825 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 826
ashleymills 0:714293de3836 827 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 828 x = c0;
ashleymills 0:714293de3836 829
ashleymills 0:714293de3836 830 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 831 x = c1;
ashleymills 0:714293de3836 832
ashleymills 0:714293de3836 833 #define CARRY_FORWARD \
ashleymills 0:714293de3836 834 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 835
ashleymills 0:714293de3836 836 #define COMBA_FINI
ashleymills 0:714293de3836 837
ashleymills 0:714293de3836 838 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:714293de3836 839 #define SQRADD(i, j) \
ashleymills 0:714293de3836 840 __asm__( \
ashleymills 0:714293de3836 841 " mulld 16,%6,%6 \n\t" \
ashleymills 0:714293de3836 842 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 843 " mulhdu 16,%6,%6 \n\t" \
ashleymills 0:714293de3836 844 " adde %1,%1,16 \n\t" \
ashleymills 0:714293de3836 845 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 846 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
ashleymills 0:714293de3836 847
ashleymills 0:714293de3836 848 /* for squaring some of the terms are doubled... */
ashleymills 0:714293de3836 849 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 850 __asm__( \
ashleymills 0:714293de3836 851 " mulld 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 852 " mulhdu 17,%6,%7 \n\t" \
ashleymills 0:714293de3836 853 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 854 " adde %1,%1,17 \n\t" \
ashleymills 0:714293de3836 855 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 856 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 857 " adde %1,%1,17 \n\t" \
ashleymills 0:714293de3836 858 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 859 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
ashleymills 0:714293de3836 860
ashleymills 0:714293de3836 861 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 862 __asm__( \
ashleymills 0:714293de3836 863 " mulld %0,%6,%7 \n\t" \
ashleymills 0:714293de3836 864 " mulhdu %1,%6,%7 \n\t" \
ashleymills 0:714293de3836 865 " xor %2,%2,%2 \n\t" \
ashleymills 0:714293de3836 866 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
ashleymills 0:714293de3836 867
ashleymills 0:714293de3836 868 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 869 __asm__( \
ashleymills 0:714293de3836 870 " mulld 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 871 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 872 " mulhdu 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 873 " adde %1,%1,16 \n\t" \
ashleymills 0:714293de3836 874 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 875 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
ashleymills 0:714293de3836 876
ashleymills 0:714293de3836 877 #define SQRADDDB \
ashleymills 0:714293de3836 878 __asm__( \
ashleymills 0:714293de3836 879 " addc %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 880 " adde %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 881 " adde %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 882 " addc %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 883 " adde %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 884 " adde %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 885 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
ashleymills 0:714293de3836 886
ashleymills 0:714293de3836 887
ashleymills 0:714293de3836 888 #elif defined(TFM_AVR32)
ashleymills 0:714293de3836 889
ashleymills 0:714293de3836 890 /* AVR32 */
ashleymills 0:714293de3836 891
ashleymills 0:714293de3836 892 #define COMBA_START
ashleymills 0:714293de3836 893
ashleymills 0:714293de3836 894 #define CLEAR_CARRY \
ashleymills 0:714293de3836 895 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 896
ashleymills 0:714293de3836 897 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 898 x = c0;
ashleymills 0:714293de3836 899
ashleymills 0:714293de3836 900 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 901 x = c1;
ashleymills 0:714293de3836 902
ashleymills 0:714293de3836 903 #define CARRY_FORWARD \
ashleymills 0:714293de3836 904 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 905
ashleymills 0:714293de3836 906 #define COMBA_FINI
ashleymills 0:714293de3836 907
ashleymills 0:714293de3836 908 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:714293de3836 909 #define SQRADD(i, j) \
ashleymills 0:714293de3836 910 __asm__( \
ashleymills 0:714293de3836 911 " mulu.d r2,%6,%6 \n\t" \
ashleymills 0:714293de3836 912 " add %0,%0,r2 \n\t" \
ashleymills 0:714293de3836 913 " adc %1,%1,r3 \n\t" \
ashleymills 0:714293de3836 914 " acr %2 \n\t" \
ashleymills 0:714293de3836 915 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
ashleymills 0:714293de3836 916
ashleymills 0:714293de3836 917 /* for squaring some of the terms are doubled... */
ashleymills 0:714293de3836 918 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 919 __asm__( \
ashleymills 0:714293de3836 920 " mulu.d r2,%6,%7 \n\t" \
ashleymills 0:714293de3836 921 " add %0,%0,r2 \n\t" \
ashleymills 0:714293de3836 922 " adc %1,%1,r3 \n\t" \
ashleymills 0:714293de3836 923 " acr %2, \n\t" \
ashleymills 0:714293de3836 924 " add %0,%0,r2 \n\t" \
ashleymills 0:714293de3836 925 " adc %1,%1,r3 \n\t" \
ashleymills 0:714293de3836 926 " acr %2, \n\t" \
ashleymills 0:714293de3836 927 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
ashleymills 0:714293de3836 928
ashleymills 0:714293de3836 929 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 930 __asm__( \
ashleymills 0:714293de3836 931 " mulu.d r2,%6,%7 \n\t" \
ashleymills 0:714293de3836 932 " mov %0,r2 \n\t" \
ashleymills 0:714293de3836 933 " mov %1,r3 \n\t" \
ashleymills 0:714293de3836 934 " eor %2,%2 \n\t" \
ashleymills 0:714293de3836 935 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
ashleymills 0:714293de3836 936
ashleymills 0:714293de3836 937 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 938 __asm__( \
ashleymills 0:714293de3836 939 " mulu.d r2,%6,%7 \n\t" \
ashleymills 0:714293de3836 940 " add %0,%0,r2 \n\t" \
ashleymills 0:714293de3836 941 " adc %1,%1,r3 \n\t" \
ashleymills 0:714293de3836 942 " acr %2 \n\t" \
ashleymills 0:714293de3836 943 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
ashleymills 0:714293de3836 944
ashleymills 0:714293de3836 945 #define SQRADDDB \
ashleymills 0:714293de3836 946 __asm__( \
ashleymills 0:714293de3836 947 " add %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 948 " adc %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 949 " adc %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 950 " add %0,%0,%3 \n\t" \
ashleymills 0:714293de3836 951 " adc %1,%1,%4 \n\t" \
ashleymills 0:714293de3836 952 " adc %2,%2,%5 \n\t" \
ashleymills 0:714293de3836 953 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
ashleymills 0:714293de3836 954
ashleymills 0:714293de3836 955
ashleymills 0:714293de3836 956 #else
ashleymills 0:714293de3836 957
ashleymills 0:714293de3836 958 #define TFM_ISO
ashleymills 0:714293de3836 959
ashleymills 0:714293de3836 960 /* ISO C portable code */
ashleymills 0:714293de3836 961
ashleymills 0:714293de3836 962 #define COMBA_START
ashleymills 0:714293de3836 963
ashleymills 0:714293de3836 964 #define CLEAR_CARRY \
ashleymills 0:714293de3836 965 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 966
ashleymills 0:714293de3836 967 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 968 x = c0;
ashleymills 0:714293de3836 969
ashleymills 0:714293de3836 970 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 971 x = c1;
ashleymills 0:714293de3836 972
ashleymills 0:714293de3836 973 #define CARRY_FORWARD \
ashleymills 0:714293de3836 974 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 975
ashleymills 0:714293de3836 976 #define COMBA_FINI
ashleymills 0:714293de3836 977
ashleymills 0:714293de3836 978 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:714293de3836 979 #define SQRADD(i, j) \
ashleymills 0:714293de3836 980 do { fp_word t; \
ashleymills 0:714293de3836 981 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
ashleymills 0:714293de3836 982 t = c1 + (t >> DIGIT_BIT); c1 = (fp_digit)t; \
ashleymills 0:714293de3836 983 c2 +=(fp_digit) (t >> DIGIT_BIT); \
ashleymills 0:714293de3836 984 } while (0);
ashleymills 0:714293de3836 985
ashleymills 0:714293de3836 986
ashleymills 0:714293de3836 987 /* for squaring some of the terms are doubled... */
ashleymills 0:714293de3836 988 #define SQRADD2(i, j) \
ashleymills 0:714293de3836 989 do { fp_word t; \
ashleymills 0:714293de3836 990 t = ((fp_word)i) * ((fp_word)j); \
ashleymills 0:714293de3836 991 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
ashleymills 0:714293de3836 992 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
ashleymills 0:714293de3836 993 c2 +=(fp_digit)( tt >> DIGIT_BIT); \
ashleymills 0:714293de3836 994 tt = (fp_word)c0 + t; c0 = (fp_digit)tt; \
ashleymills 0:714293de3836 995 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = (fp_digit)tt; \
ashleymills 0:714293de3836 996 c2 +=(fp_digit) (tt >> DIGIT_BIT); \
ashleymills 0:714293de3836 997 } while (0);
ashleymills 0:714293de3836 998
ashleymills 0:714293de3836 999 #define SQRADDSC(i, j) \
ashleymills 0:714293de3836 1000 do { fp_word t; \
ashleymills 0:714293de3836 1001 t = ((fp_word)i) * ((fp_word)j); \
ashleymills 0:714293de3836 1002 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
ashleymills 0:714293de3836 1003 } while (0);
ashleymills 0:714293de3836 1004
ashleymills 0:714293de3836 1005 #define SQRADDAC(i, j) \
ashleymills 0:714293de3836 1006 do { fp_word t; \
ashleymills 0:714293de3836 1007 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \
ashleymills 0:714293de3836 1008 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \
ashleymills 0:714293de3836 1009 } while (0);
ashleymills 0:714293de3836 1010
ashleymills 0:714293de3836 1011 #define SQRADDDB \
ashleymills 0:714293de3836 1012 do { fp_word t; \
ashleymills 0:714293de3836 1013 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \
ashleymills 0:714293de3836 1014 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \
ashleymills 0:714293de3836 1015 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \
ashleymills 0:714293de3836 1016 } while (0);
ashleymills 0:714293de3836 1017
ashleymills 0:714293de3836 1018 #endif
ashleymills 0:714293de3836 1019
ashleymills 0:714293de3836 1020 #ifdef TFM_SMALL_SET
ashleymills 0:714293de3836 1021 #include "fp_sqr_comba_small_set.i"
ashleymills 0:714293de3836 1022 #include "fp_sqr_comba_3.i"
ashleymills 0:714293de3836 1023 #include "fp_sqr_comba_4.i"
ashleymills 0:714293de3836 1024 #include "fp_sqr_comba_6.i"
ashleymills 0:714293de3836 1025 #include "fp_sqr_comba_7.i"
ashleymills 0:714293de3836 1026 #include "fp_sqr_comba_8.i"
ashleymills 0:714293de3836 1027 #include "fp_sqr_comba_9.i"
ashleymills 0:714293de3836 1028 #include "fp_sqr_comba_12.i"
ashleymills 0:714293de3836 1029 #include "fp_sqr_comba_17.i"
ashleymills 0:714293de3836 1030 #include "fp_sqr_comba_20.i"
ashleymills 0:714293de3836 1031 #include "fp_sqr_comba_24.i"
ashleymills 0:714293de3836 1032 #include "fp_sqr_comba_28.i"
ashleymills 0:714293de3836 1033 #include "fp_sqr_comba_32.i"
ashleymills 0:714293de3836 1034 #include "fp_sqr_comba_48.i"
ashleymills 0:714293de3836 1035 #include "fp_sqr_comba_64.i"
ashleymills 0:714293de3836 1036 #endif
ashleymills 0:714293de3836 1037 /* end fp_sqr_comba.c asm */
ashleymills 0:714293de3836 1038
ashleymills 0:714293de3836 1039 /* start fp_mul_comba.c asm */
ashleymills 0:714293de3836 1040 /* these are the combas. Worship them. */
ashleymills 0:714293de3836 1041 #if defined(TFM_X86)
ashleymills 0:714293de3836 1042 /* Generic x86 optimized code */
ashleymills 0:714293de3836 1043
ashleymills 0:714293de3836 1044 /* anything you need at the start */
ashleymills 0:714293de3836 1045 #define COMBA_START
ashleymills 0:714293de3836 1046
ashleymills 0:714293de3836 1047 /* clear the chaining variables */
ashleymills 0:714293de3836 1048 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1049 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1050
ashleymills 0:714293de3836 1051 /* forward the carry to the next digit */
ashleymills 0:714293de3836 1052 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1053 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1054
ashleymills 0:714293de3836 1055 /* store the first sum */
ashleymills 0:714293de3836 1056 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1057 x = c0;
ashleymills 0:714293de3836 1058
ashleymills 0:714293de3836 1059 /* store the second sum [carry] */
ashleymills 0:714293de3836 1060 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1061 x = c1;
ashleymills 0:714293de3836 1062
ashleymills 0:714293de3836 1063 /* anything you need at the end */
ashleymills 0:714293de3836 1064 #define COMBA_FINI
ashleymills 0:714293de3836 1065
ashleymills 0:714293de3836 1066 /* this should multiply i and j */
ashleymills 0:714293de3836 1067 #define MULADD(i, j) \
ashleymills 0:714293de3836 1068 __asm__( \
ashleymills 0:714293de3836 1069 "movl %6,%%eax \n\t" \
ashleymills 0:714293de3836 1070 "mull %7 \n\t" \
ashleymills 0:714293de3836 1071 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 1072 "adcl %%edx,%1 \n\t" \
ashleymills 0:714293de3836 1073 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 1074 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
ashleymills 0:714293de3836 1075
ashleymills 0:714293de3836 1076 #elif defined(TFM_X86_64)
ashleymills 0:714293de3836 1077 /* x86-64 optimized */
ashleymills 0:714293de3836 1078
ashleymills 0:714293de3836 1079 /* anything you need at the start */
ashleymills 0:714293de3836 1080 #define COMBA_START
ashleymills 0:714293de3836 1081
ashleymills 0:714293de3836 1082 /* clear the chaining variables */
ashleymills 0:714293de3836 1083 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1084 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1085
ashleymills 0:714293de3836 1086 /* forward the carry to the next digit */
ashleymills 0:714293de3836 1087 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1088 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1089
ashleymills 0:714293de3836 1090 /* store the first sum */
ashleymills 0:714293de3836 1091 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1092 x = c0;
ashleymills 0:714293de3836 1093
ashleymills 0:714293de3836 1094 /* store the second sum [carry] */
ashleymills 0:714293de3836 1095 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1096 x = c1;
ashleymills 0:714293de3836 1097
ashleymills 0:714293de3836 1098 /* anything you need at the end */
ashleymills 0:714293de3836 1099 #define COMBA_FINI
ashleymills 0:714293de3836 1100
ashleymills 0:714293de3836 1101 /* this should multiply i and j */
ashleymills 0:714293de3836 1102 #define MULADD(i, j) \
ashleymills 0:714293de3836 1103 __asm__ ( \
ashleymills 0:714293de3836 1104 "movq %6,%%rax \n\t" \
ashleymills 0:714293de3836 1105 "mulq %7 \n\t" \
ashleymills 0:714293de3836 1106 "addq %%rax,%0 \n\t" \
ashleymills 0:714293de3836 1107 "adcq %%rdx,%1 \n\t" \
ashleymills 0:714293de3836 1108 "adcq $0,%2 \n\t" \
ashleymills 0:714293de3836 1109 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
ashleymills 0:714293de3836 1110
ashleymills 0:714293de3836 1111 #elif defined(TFM_SSE2)
ashleymills 0:714293de3836 1112 /* use SSE2 optimizations */
ashleymills 0:714293de3836 1113
ashleymills 0:714293de3836 1114 /* anything you need at the start */
ashleymills 0:714293de3836 1115 #define COMBA_START
ashleymills 0:714293de3836 1116
ashleymills 0:714293de3836 1117 /* clear the chaining variables */
ashleymills 0:714293de3836 1118 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1119 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1120
ashleymills 0:714293de3836 1121 /* forward the carry to the next digit */
ashleymills 0:714293de3836 1122 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1123 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1124
ashleymills 0:714293de3836 1125 /* store the first sum */
ashleymills 0:714293de3836 1126 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1127 x = c0;
ashleymills 0:714293de3836 1128
ashleymills 0:714293de3836 1129 /* store the second sum [carry] */
ashleymills 0:714293de3836 1130 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1131 x = c1;
ashleymills 0:714293de3836 1132
ashleymills 0:714293de3836 1133 /* anything you need at the end */
ashleymills 0:714293de3836 1134 #define COMBA_FINI \
ashleymills 0:714293de3836 1135 __asm__("emms");
ashleymills 0:714293de3836 1136
ashleymills 0:714293de3836 1137 /* this should multiply i and j */
ashleymills 0:714293de3836 1138 #define MULADD(i, j) \
ashleymills 0:714293de3836 1139 __asm__( \
ashleymills 0:714293de3836 1140 "movd %6,%%mm0 \n\t" \
ashleymills 0:714293de3836 1141 "movd %7,%%mm1 \n\t" \
ashleymills 0:714293de3836 1142 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:714293de3836 1143 "movd %%mm0,%%eax \n\t" \
ashleymills 0:714293de3836 1144 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:714293de3836 1145 "addl %%eax,%0 \n\t" \
ashleymills 0:714293de3836 1146 "movd %%mm0,%%eax \n\t" \
ashleymills 0:714293de3836 1147 "adcl %%eax,%1 \n\t" \
ashleymills 0:714293de3836 1148 "adcl $0,%2 \n\t" \
ashleymills 0:714293de3836 1149 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
ashleymills 0:714293de3836 1150
ashleymills 0:714293de3836 1151 #elif defined(TFM_ARM)
ashleymills 0:714293de3836 1152 /* ARM code */
ashleymills 0:714293de3836 1153
ashleymills 0:714293de3836 1154 #define COMBA_START
ashleymills 0:714293de3836 1155
ashleymills 0:714293de3836 1156 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1157 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1158
ashleymills 0:714293de3836 1159 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1160 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1161
ashleymills 0:714293de3836 1162 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1163 x = c0;
ashleymills 0:714293de3836 1164
ashleymills 0:714293de3836 1165 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1166 x = c1;
ashleymills 0:714293de3836 1167
ashleymills 0:714293de3836 1168 #define COMBA_FINI
ashleymills 0:714293de3836 1169
ashleymills 0:714293de3836 1170 #define MULADD(i, j) \
ashleymills 0:714293de3836 1171 __asm__( \
ashleymills 0:714293de3836 1172 " UMULL r0,r1,%6,%7 \n\t" \
ashleymills 0:714293de3836 1173 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:714293de3836 1174 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:714293de3836 1175 " ADC %2,%2,#0 \n\t" \
ashleymills 0:714293de3836 1176 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
ashleymills 0:714293de3836 1177
ashleymills 0:714293de3836 1178 #elif defined(TFM_PPC32)
ashleymills 0:714293de3836 1179 /* For 32-bit PPC */
ashleymills 0:714293de3836 1180
ashleymills 0:714293de3836 1181 #define COMBA_START
ashleymills 0:714293de3836 1182
ashleymills 0:714293de3836 1183 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1184 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1185
ashleymills 0:714293de3836 1186 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1187 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1188
ashleymills 0:714293de3836 1189 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1190 x = c0;
ashleymills 0:714293de3836 1191
ashleymills 0:714293de3836 1192 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1193 x = c1;
ashleymills 0:714293de3836 1194
ashleymills 0:714293de3836 1195 #define COMBA_FINI
ashleymills 0:714293de3836 1196
ashleymills 0:714293de3836 1197 /* untested: will mulhwu change the flags? Docs say no */
ashleymills 0:714293de3836 1198 #define MULADD(i, j) \
ashleymills 0:714293de3836 1199 __asm__( \
ashleymills 0:714293de3836 1200 " mullw 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 1201 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 1202 " mulhwu 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 1203 " adde %1,%1,16 \n\t" \
ashleymills 0:714293de3836 1204 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 1205 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
ashleymills 0:714293de3836 1206
ashleymills 0:714293de3836 1207 #elif defined(TFM_PPC64)
ashleymills 0:714293de3836 1208 /* For 64-bit PPC */
ashleymills 0:714293de3836 1209
ashleymills 0:714293de3836 1210 #define COMBA_START
ashleymills 0:714293de3836 1211
ashleymills 0:714293de3836 1212 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1213 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1214
ashleymills 0:714293de3836 1215 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1216 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1217
ashleymills 0:714293de3836 1218 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1219 x = c0;
ashleymills 0:714293de3836 1220
ashleymills 0:714293de3836 1221 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1222 x = c1;
ashleymills 0:714293de3836 1223
ashleymills 0:714293de3836 1224 #define COMBA_FINI
ashleymills 0:714293de3836 1225
ashleymills 0:714293de3836 1226 /* untested: will mulhwu change the flags? Docs say no */
ashleymills 0:714293de3836 1227 #define MULADD(i, j) \
ashleymills 0:714293de3836 1228 ____asm__( \
ashleymills 0:714293de3836 1229 " mulld 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 1230 " addc %0,%0,16 \n\t" \
ashleymills 0:714293de3836 1231 " mulhdu 16,%6,%7 \n\t" \
ashleymills 0:714293de3836 1232 " adde %1,%1,16 \n\t" \
ashleymills 0:714293de3836 1233 " addze %2,%2 \n\t" \
ashleymills 0:714293de3836 1234 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
ashleymills 0:714293de3836 1235
ashleymills 0:714293de3836 1236 #elif defined(TFM_AVR32)
ashleymills 0:714293de3836 1237
ashleymills 0:714293de3836 1238 /* ISO C code */
ashleymills 0:714293de3836 1239
ashleymills 0:714293de3836 1240 #define COMBA_START
ashleymills 0:714293de3836 1241
ashleymills 0:714293de3836 1242 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1243 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1244
ashleymills 0:714293de3836 1245 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1246 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1247
ashleymills 0:714293de3836 1248 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1249 x = c0;
ashleymills 0:714293de3836 1250
ashleymills 0:714293de3836 1251 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1252 x = c1;
ashleymills 0:714293de3836 1253
ashleymills 0:714293de3836 1254 #define COMBA_FINI
ashleymills 0:714293de3836 1255
ashleymills 0:714293de3836 1256 #define MULADD(i, j) \
ashleymills 0:714293de3836 1257 ____asm__( \
ashleymills 0:714293de3836 1258 " mulu.d r2,%6,%7 \n\t"\
ashleymills 0:714293de3836 1259 " add %0,r2 \n\t"\
ashleymills 0:714293de3836 1260 " adc %1,%1,r3 \n\t"\
ashleymills 0:714293de3836 1261 " acr %2 \n\t"\
ashleymills 0:714293de3836 1262 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
ashleymills 0:714293de3836 1263
ashleymills 0:714293de3836 1264 #else
ashleymills 0:714293de3836 1265 /* ISO C code */
ashleymills 0:714293de3836 1266
ashleymills 0:714293de3836 1267 #define COMBA_START
ashleymills 0:714293de3836 1268
ashleymills 0:714293de3836 1269 #define COMBA_CLEAR \
ashleymills 0:714293de3836 1270 c0 = c1 = c2 = 0;
ashleymills 0:714293de3836 1271
ashleymills 0:714293de3836 1272 #define COMBA_FORWARD \
ashleymills 0:714293de3836 1273 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:714293de3836 1274
ashleymills 0:714293de3836 1275 #define COMBA_STORE(x) \
ashleymills 0:714293de3836 1276 x = c0;
ashleymills 0:714293de3836 1277
ashleymills 0:714293de3836 1278 #define COMBA_STORE2(x) \
ashleymills 0:714293de3836 1279 x = c1;
ashleymills 0:714293de3836 1280
ashleymills 0:714293de3836 1281 #define COMBA_FINI
ashleymills 0:714293de3836 1282
ashleymills 0:714293de3836 1283 #define MULADD(i, j) \
ashleymills 0:714293de3836 1284 do { fp_word t; \
ashleymills 0:714293de3836 1285 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = (fp_digit)t; \
ashleymills 0:714293de3836 1286 t = (fp_word)c1 + (t >> DIGIT_BIT); \
ashleymills 0:714293de3836 1287 c1 = (fp_digit)t; c2 += (fp_digit)(t >> DIGIT_BIT); \
ashleymills 0:714293de3836 1288 } while (0);
ashleymills 0:714293de3836 1289
ashleymills 0:714293de3836 1290 #endif
ashleymills 0:714293de3836 1291
ashleymills 0:714293de3836 1292
ashleymills 0:714293de3836 1293 #ifdef TFM_SMALL_SET
ashleymills 0:714293de3836 1294 #include "fp_mul_comba_small_set.i"
ashleymills 0:714293de3836 1295 #include "fp_mul_comba_3.i"
ashleymills 0:714293de3836 1296 #include "fp_mul_comba_4.i"
ashleymills 0:714293de3836 1297 #include "fp_mul_comba_6.i"
ashleymills 0:714293de3836 1298 #include "fp_mul_comba_7.i"
ashleymills 0:714293de3836 1299 #include "fp_mul_comba_8.i"
ashleymills 0:714293de3836 1300 #include "fp_mul_comba_9.i"
ashleymills 0:714293de3836 1301 #include "fp_mul_comba_12.i"
ashleymills 0:714293de3836 1302 #include "fp_mul_comba_17.i"
ashleymills 0:714293de3836 1303 #include "fp_mul_comba_20.i"
ashleymills 0:714293de3836 1304 #include "fp_mul_comba_24.i"
ashleymills 0:714293de3836 1305 #include "fp_mul_comba_28.i"
ashleymills 0:714293de3836 1306 #include "fp_mul_comba_32.i"
ashleymills 0:714293de3836 1307 #include "fp_mul_comba_48.i"
ashleymills 0:714293de3836 1308 #include "fp_mul_comba_64.i"
ashleymills 0:714293de3836 1309 #endif
ashleymills 0:714293de3836 1310
ashleymills 0:714293de3836 1311 /* end fp_mul_comba.c asm */
ashleymills 0:714293de3836 1312