cyassl re-port with cellular comms, PSK test

Dependencies:   VodafoneUSBModem_bleedingedge2 mbed-rtos mbed-src

Committer:
ashleymills
Date:
Fri Apr 26 16:54:58 2013 +0000
Revision:
0:e979170e02e7
Basic operation of SSL with PSK working for cellular.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
ashleymills 0:e979170e02e7 1 /* asm.c
ashleymills 0:e979170e02e7 2 *
ashleymills 0:e979170e02e7 3 * Copyright (C) 2006-2012 Sawtooth Consulting Ltd.
ashleymills 0:e979170e02e7 4 *
ashleymills 0:e979170e02e7 5 * This file is part of CyaSSL.
ashleymills 0:e979170e02e7 6 *
ashleymills 0:e979170e02e7 7 * CyaSSL is free software; you can redistribute it and/or modify
ashleymills 0:e979170e02e7 8 * it under the terms of the GNU General Public License as published by
ashleymills 0:e979170e02e7 9 * the Free Software Foundation; either version 2 of the License, or
ashleymills 0:e979170e02e7 10 * (at your option) any later version.
ashleymills 0:e979170e02e7 11 *
ashleymills 0:e979170e02e7 12 * CyaSSL is distributed in the hope that it will be useful,
ashleymills 0:e979170e02e7 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ashleymills 0:e979170e02e7 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ashleymills 0:e979170e02e7 15 * GNU General Public License for more details.
ashleymills 0:e979170e02e7 16 *
ashleymills 0:e979170e02e7 17 * You should have received a copy of the GNU General Public License
ashleymills 0:e979170e02e7 18 * along with this program; if not, write to the Free Software
ashleymills 0:e979170e02e7 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
ashleymills 0:e979170e02e7 20 */
ashleymills 0:e979170e02e7 21
ashleymills 0:e979170e02e7 22 #ifdef HAVE_CONFIG_H
ashleymills 0:e979170e02e7 23 #include <config.h>
ashleymills 0:e979170e02e7 24 #endif
ashleymills 0:e979170e02e7 25
ashleymills 0:e979170e02e7 26 /*
ashleymills 0:e979170e02e7 27 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
ashleymills 0:e979170e02e7 28 * http://math.libtomcrypt.com
ashleymills 0:e979170e02e7 29 */
ashleymills 0:e979170e02e7 30
ashleymills 0:e979170e02e7 31
ashleymills 0:e979170e02e7 32 /******************************************************************/
ashleymills 0:e979170e02e7 33 /* fp_montgomery_reduce.c asm or generic */
ashleymills 0:e979170e02e7 34 #if defined(TFM_X86) && !defined(TFM_SSE2)
ashleymills 0:e979170e02e7 35 /* x86-32 code */
ashleymills 0:e979170e02e7 36
ashleymills 0:e979170e02e7 37 #define MONT_START
ashleymills 0:e979170e02e7 38 #define MONT_FINI
ashleymills 0:e979170e02e7 39 #define LOOP_END
ashleymills 0:e979170e02e7 40 #define LOOP_START \
ashleymills 0:e979170e02e7 41 mu = c[x] * mp
ashleymills 0:e979170e02e7 42
ashleymills 0:e979170e02e7 43 #define INNERMUL \
ashleymills 0:e979170e02e7 44 __asm__( \
ashleymills 0:e979170e02e7 45 "movl %5,%%eax \n\t" \
ashleymills 0:e979170e02e7 46 "mull %4 \n\t" \
ashleymills 0:e979170e02e7 47 "addl %1,%%eax \n\t" \
ashleymills 0:e979170e02e7 48 "adcl $0,%%edx \n\t" \
ashleymills 0:e979170e02e7 49 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 50 "adcl $0,%%edx \n\t" \
ashleymills 0:e979170e02e7 51 "movl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 52 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:e979170e02e7 53 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
ashleymills 0:e979170e02e7 54 : "%eax", "%edx", "%cc")
ashleymills 0:e979170e02e7 55
ashleymills 0:e979170e02e7 56 #define PROPCARRY \
ashleymills 0:e979170e02e7 57 __asm__( \
ashleymills 0:e979170e02e7 58 "addl %1,%0 \n\t" \
ashleymills 0:e979170e02e7 59 "setb %%al \n\t" \
ashleymills 0:e979170e02e7 60 "movzbl %%al,%1 \n\t" \
ashleymills 0:e979170e02e7 61 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:e979170e02e7 62 :"0"(_c[LO]), "1"(cy) \
ashleymills 0:e979170e02e7 63 : "%eax", "%cc")
ashleymills 0:e979170e02e7 64
ashleymills 0:e979170e02e7 65 /******************************************************************/
ashleymills 0:e979170e02e7 66 #elif defined(TFM_X86_64)
ashleymills 0:e979170e02e7 67 /* x86-64 code */
ashleymills 0:e979170e02e7 68
ashleymills 0:e979170e02e7 69 #define MONT_START
ashleymills 0:e979170e02e7 70 #define MONT_FINI
ashleymills 0:e979170e02e7 71 #define LOOP_END
ashleymills 0:e979170e02e7 72 #define LOOP_START \
ashleymills 0:e979170e02e7 73 mu = c[x] * mp
ashleymills 0:e979170e02e7 74
ashleymills 0:e979170e02e7 75 #define INNERMUL \
ashleymills 0:e979170e02e7 76 __asm__( \
ashleymills 0:e979170e02e7 77 "movq %5,%%rax \n\t" \
ashleymills 0:e979170e02e7 78 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 79 "addq %1,%%rax \n\t" \
ashleymills 0:e979170e02e7 80 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 81 "addq %%rax,%0 \n\t" \
ashleymills 0:e979170e02e7 82 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 83 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 84 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:e979170e02e7 85 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
ashleymills 0:e979170e02e7 86 : "%rax", "%rdx", "%cc")
ashleymills 0:e979170e02e7 87
ashleymills 0:e979170e02e7 88 #define INNERMUL8 \
ashleymills 0:e979170e02e7 89 __asm__( \
ashleymills 0:e979170e02e7 90 "movq 0(%5),%%rax \n\t" \
ashleymills 0:e979170e02e7 91 "movq 0(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 92 "movq 0x8(%5),%%r11 \n\t" \
ashleymills 0:e979170e02e7 93 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 94 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 95 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 96 "movq 0x8(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 97 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 98 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 99 "movq %%rax,0(%0) \n\t" \
ashleymills 0:e979170e02e7 100 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 101 \
ashleymills 0:e979170e02e7 102 "movq %%r11,%%rax \n\t" \
ashleymills 0:e979170e02e7 103 "movq 0x10(%5),%%r11 \n\t" \
ashleymills 0:e979170e02e7 104 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 105 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 106 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 107 "movq 0x10(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 108 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 109 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 110 "movq %%rax,0x8(%0) \n\t" \
ashleymills 0:e979170e02e7 111 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 112 \
ashleymills 0:e979170e02e7 113 "movq %%r11,%%rax \n\t" \
ashleymills 0:e979170e02e7 114 "movq 0x18(%5),%%r11 \n\t" \
ashleymills 0:e979170e02e7 115 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 116 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 117 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 118 "movq 0x18(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 119 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 120 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 121 "movq %%rax,0x10(%0) \n\t" \
ashleymills 0:e979170e02e7 122 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 123 \
ashleymills 0:e979170e02e7 124 "movq %%r11,%%rax \n\t" \
ashleymills 0:e979170e02e7 125 "movq 0x20(%5),%%r11 \n\t" \
ashleymills 0:e979170e02e7 126 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 127 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 128 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 129 "movq 0x20(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 130 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 131 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 132 "movq %%rax,0x18(%0) \n\t" \
ashleymills 0:e979170e02e7 133 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 134 \
ashleymills 0:e979170e02e7 135 "movq %%r11,%%rax \n\t" \
ashleymills 0:e979170e02e7 136 "movq 0x28(%5),%%r11 \n\t" \
ashleymills 0:e979170e02e7 137 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 138 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 139 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 140 "movq 0x28(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 141 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 142 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 143 "movq %%rax,0x20(%0) \n\t" \
ashleymills 0:e979170e02e7 144 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 145 \
ashleymills 0:e979170e02e7 146 "movq %%r11,%%rax \n\t" \
ashleymills 0:e979170e02e7 147 "movq 0x30(%5),%%r11 \n\t" \
ashleymills 0:e979170e02e7 148 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 149 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 150 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 151 "movq 0x30(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 152 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 153 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 154 "movq %%rax,0x28(%0) \n\t" \
ashleymills 0:e979170e02e7 155 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 156 \
ashleymills 0:e979170e02e7 157 "movq %%r11,%%rax \n\t" \
ashleymills 0:e979170e02e7 158 "movq 0x38(%5),%%r11 \n\t" \
ashleymills 0:e979170e02e7 159 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 160 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 161 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 162 "movq 0x38(%2),%%r10 \n\t" \
ashleymills 0:e979170e02e7 163 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 164 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 165 "movq %%rax,0x30(%0) \n\t" \
ashleymills 0:e979170e02e7 166 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 167 \
ashleymills 0:e979170e02e7 168 "movq %%r11,%%rax \n\t" \
ashleymills 0:e979170e02e7 169 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 170 "addq %%r10,%%rax \n\t" \
ashleymills 0:e979170e02e7 171 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 172 "addq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 173 "adcq $0,%%rdx \n\t" \
ashleymills 0:e979170e02e7 174 "movq %%rax,0x38(%0) \n\t" \
ashleymills 0:e979170e02e7 175 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 176 \
ashleymills 0:e979170e02e7 177 :"=r"(_c), "=r"(cy) \
ashleymills 0:e979170e02e7 178 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
ashleymills 0:e979170e02e7 179 : "%rax", "%rdx", "%r10", "%r11", "%cc")
ashleymills 0:e979170e02e7 180
ashleymills 0:e979170e02e7 181
ashleymills 0:e979170e02e7 182 #define PROPCARRY \
ashleymills 0:e979170e02e7 183 __asm__( \
ashleymills 0:e979170e02e7 184 "addq %1,%0 \n\t" \
ashleymills 0:e979170e02e7 185 "setb %%al \n\t" \
ashleymills 0:e979170e02e7 186 "movzbq %%al,%1 \n\t" \
ashleymills 0:e979170e02e7 187 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:e979170e02e7 188 :"0"(_c[LO]), "1"(cy) \
ashleymills 0:e979170e02e7 189 : "%rax", "%cc")
ashleymills 0:e979170e02e7 190
ashleymills 0:e979170e02e7 191 /******************************************************************/
ashleymills 0:e979170e02e7 192 #elif defined(TFM_SSE2)
ashleymills 0:e979170e02e7 193 /* SSE2 code (assumes 32-bit fp_digits) */
ashleymills 0:e979170e02e7 194 /* XMM register assignments:
ashleymills 0:e979170e02e7 195 * xmm0 *tmpm++, then Mu * (*tmpm++)
ashleymills 0:e979170e02e7 196 * xmm1 c[x], then Mu
ashleymills 0:e979170e02e7 197 * xmm2 mp
ashleymills 0:e979170e02e7 198 * xmm3 cy
ashleymills 0:e979170e02e7 199 * xmm4 _c[LO]
ashleymills 0:e979170e02e7 200 */
ashleymills 0:e979170e02e7 201
ashleymills 0:e979170e02e7 202 #define MONT_START \
ashleymills 0:e979170e02e7 203 __asm__("movd %0,%%mm2"::"g"(mp))
ashleymills 0:e979170e02e7 204
ashleymills 0:e979170e02e7 205 #define MONT_FINI \
ashleymills 0:e979170e02e7 206 __asm__("emms")
ashleymills 0:e979170e02e7 207
ashleymills 0:e979170e02e7 208 #define LOOP_START \
ashleymills 0:e979170e02e7 209 __asm__( \
ashleymills 0:e979170e02e7 210 "movd %0,%%mm1 \n\t" \
ashleymills 0:e979170e02e7 211 "pxor %%mm3,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 212 "pmuludq %%mm2,%%mm1 \n\t" \
ashleymills 0:e979170e02e7 213 :: "g"(c[x]))
ashleymills 0:e979170e02e7 214
ashleymills 0:e979170e02e7 215 /* pmuludq on mmx registers does a 32x32->64 multiply. */
ashleymills 0:e979170e02e7 216 #define INNERMUL \
ashleymills 0:e979170e02e7 217 __asm__( \
ashleymills 0:e979170e02e7 218 "movd %1,%%mm4 \n\t" \
ashleymills 0:e979170e02e7 219 "movd %2,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 220 "paddq %%mm4,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 221 "pmuludq %%mm1,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 222 "paddq %%mm0,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 223 "movd %%mm3,%0 \n\t" \
ashleymills 0:e979170e02e7 224 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 225 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
ashleymills 0:e979170e02e7 226
ashleymills 0:e979170e02e7 227 #define INNERMUL8 \
ashleymills 0:e979170e02e7 228 __asm__( \
ashleymills 0:e979170e02e7 229 "movd 0(%1),%%mm4 \n\t" \
ashleymills 0:e979170e02e7 230 "movd 0(%2),%%mm0 \n\t" \
ashleymills 0:e979170e02e7 231 "paddq %%mm4,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 232 "pmuludq %%mm1,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 233 "movd 4(%2),%%mm5 \n\t" \
ashleymills 0:e979170e02e7 234 "paddq %%mm0,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 235 "movd 4(%1),%%mm6 \n\t" \
ashleymills 0:e979170e02e7 236 "movd %%mm3,0(%0) \n\t" \
ashleymills 0:e979170e02e7 237 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 238 \
ashleymills 0:e979170e02e7 239 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 240 "pmuludq %%mm1,%%mm5 \n\t" \
ashleymills 0:e979170e02e7 241 "movd 8(%2),%%mm6 \n\t" \
ashleymills 0:e979170e02e7 242 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 243 "movd 8(%1),%%mm7 \n\t" \
ashleymills 0:e979170e02e7 244 "movd %%mm3,4(%0) \n\t" \
ashleymills 0:e979170e02e7 245 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 246 \
ashleymills 0:e979170e02e7 247 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 248 "pmuludq %%mm1,%%mm6 \n\t" \
ashleymills 0:e979170e02e7 249 "movd 12(%2),%%mm7 \n\t" \
ashleymills 0:e979170e02e7 250 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 251 "movd 12(%1),%%mm5 \n\t" \
ashleymills 0:e979170e02e7 252 "movd %%mm3,8(%0) \n\t" \
ashleymills 0:e979170e02e7 253 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 254 \
ashleymills 0:e979170e02e7 255 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 256 "pmuludq %%mm1,%%mm7 \n\t" \
ashleymills 0:e979170e02e7 257 "movd 16(%2),%%mm5 \n\t" \
ashleymills 0:e979170e02e7 258 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 259 "movd 16(%1),%%mm6 \n\t" \
ashleymills 0:e979170e02e7 260 "movd %%mm3,12(%0) \n\t" \
ashleymills 0:e979170e02e7 261 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 262 \
ashleymills 0:e979170e02e7 263 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 264 "pmuludq %%mm1,%%mm5 \n\t" \
ashleymills 0:e979170e02e7 265 "movd 20(%2),%%mm6 \n\t" \
ashleymills 0:e979170e02e7 266 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 267 "movd 20(%1),%%mm7 \n\t" \
ashleymills 0:e979170e02e7 268 "movd %%mm3,16(%0) \n\t" \
ashleymills 0:e979170e02e7 269 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 270 \
ashleymills 0:e979170e02e7 271 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 272 "pmuludq %%mm1,%%mm6 \n\t" \
ashleymills 0:e979170e02e7 273 "movd 24(%2),%%mm7 \n\t" \
ashleymills 0:e979170e02e7 274 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 275 "movd 24(%1),%%mm5 \n\t" \
ashleymills 0:e979170e02e7 276 "movd %%mm3,20(%0) \n\t" \
ashleymills 0:e979170e02e7 277 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 278 \
ashleymills 0:e979170e02e7 279 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 280 "pmuludq %%mm1,%%mm7 \n\t" \
ashleymills 0:e979170e02e7 281 "movd 28(%2),%%mm5 \n\t" \
ashleymills 0:e979170e02e7 282 "paddq %%mm7,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 283 "movd 28(%1),%%mm6 \n\t" \
ashleymills 0:e979170e02e7 284 "movd %%mm3,24(%0) \n\t" \
ashleymills 0:e979170e02e7 285 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 286 \
ashleymills 0:e979170e02e7 287 "paddq %%mm6,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 288 "pmuludq %%mm1,%%mm5 \n\t" \
ashleymills 0:e979170e02e7 289 "paddq %%mm5,%%mm3 \n\t" \
ashleymills 0:e979170e02e7 290 "movd %%mm3,28(%0) \n\t" \
ashleymills 0:e979170e02e7 291 "psrlq $32, %%mm3 \n\t" \
ashleymills 0:e979170e02e7 292 :"=r"(_c) : "0"(_c), "r"(tmpm) );
ashleymills 0:e979170e02e7 293
ashleymills 0:e979170e02e7 294 /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack
ashleymills 0:e979170e02e7 295 pointer */
ashleymills 0:e979170e02e7 296
ashleymills 0:e979170e02e7 297 #define LOOP_END \
ashleymills 0:e979170e02e7 298 __asm__( "movd %%mm3,%0 \n" :"=r"(cy))
ashleymills 0:e979170e02e7 299
ashleymills 0:e979170e02e7 300 #define PROPCARRY \
ashleymills 0:e979170e02e7 301 __asm__( \
ashleymills 0:e979170e02e7 302 "addl %1,%0 \n\t" \
ashleymills 0:e979170e02e7 303 "setb %%al \n\t" \
ashleymills 0:e979170e02e7 304 "movzbl %%al,%1 \n\t" \
ashleymills 0:e979170e02e7 305 :"=g"(_c[LO]), "=r"(cy) \
ashleymills 0:e979170e02e7 306 :"0"(_c[LO]), "1"(cy) \
ashleymills 0:e979170e02e7 307 : "%eax", "%cc")
ashleymills 0:e979170e02e7 308
ashleymills 0:e979170e02e7 309 /******************************************************************/
ashleymills 0:e979170e02e7 310 #elif defined(TFM_ARM)
ashleymills 0:e979170e02e7 311 /* ARMv4 code */
ashleymills 0:e979170e02e7 312
ashleymills 0:e979170e02e7 313 #define MONT_START
ashleymills 0:e979170e02e7 314 #define MONT_FINI
ashleymills 0:e979170e02e7 315 #define LOOP_END
ashleymills 0:e979170e02e7 316 #define LOOP_START \
ashleymills 0:e979170e02e7 317 mu = c[x] * mp
ashleymills 0:e979170e02e7 318
ashleymills 0:e979170e02e7 319 #define INNERMUL \
ashleymills 0:e979170e02e7 320 __asm__( \
ashleymills 0:e979170e02e7 321 " LDR r0,%1 \n\t" \
ashleymills 0:e979170e02e7 322 " ADDS r0,r0,%0 \n\t" \
ashleymills 0:e979170e02e7 323 " MOVCS %0,#1 \n\t" \
ashleymills 0:e979170e02e7 324 " MOVCC %0,#0 \n\t" \
ashleymills 0:e979170e02e7 325 " UMLAL r0,%0,%3,%4 \n\t" \
ashleymills 0:e979170e02e7 326 " STR r0,%1 \n\t" \
ashleymills 0:e979170e02e7 327 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc");
ashleymills 0:e979170e02e7 328
ashleymills 0:e979170e02e7 329 #define PROPCARRY \
ashleymills 0:e979170e02e7 330 __asm__( \
ashleymills 0:e979170e02e7 331 " LDR r0,%1 \n\t" \
ashleymills 0:e979170e02e7 332 " ADDS r0,r0,%0 \n\t" \
ashleymills 0:e979170e02e7 333 " STR r0,%1 \n\t" \
ashleymills 0:e979170e02e7 334 " MOVCS %0,#1 \n\t" \
ashleymills 0:e979170e02e7 335 " MOVCC %0,#0 \n\t" \
ashleymills 0:e979170e02e7 336 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
ashleymills 0:e979170e02e7 337
ashleymills 0:e979170e02e7 338 #elif defined(TFM_PPC32)
ashleymills 0:e979170e02e7 339
ashleymills 0:e979170e02e7 340 /* PPC32 */
ashleymills 0:e979170e02e7 341 #define MONT_START
ashleymills 0:e979170e02e7 342 #define MONT_FINI
ashleymills 0:e979170e02e7 343 #define LOOP_END
ashleymills 0:e979170e02e7 344 #define LOOP_START \
ashleymills 0:e979170e02e7 345 mu = c[x] * mp
ashleymills 0:e979170e02e7 346
ashleymills 0:e979170e02e7 347 #define INNERMUL \
ashleymills 0:e979170e02e7 348 __asm__( \
ashleymills 0:e979170e02e7 349 " mullw 16,%3,%4 \n\t" \
ashleymills 0:e979170e02e7 350 " mulhwu 17,%3,%4 \n\t" \
ashleymills 0:e979170e02e7 351 " addc 16,16,%0 \n\t" \
ashleymills 0:e979170e02e7 352 " addze 17,17 \n\t" \
ashleymills 0:e979170e02e7 353 " lwz 18,%1 \n\t" \
ashleymills 0:e979170e02e7 354 " addc 16,16,18 \n\t" \
ashleymills 0:e979170e02e7 355 " addze %0,17 \n\t" \
ashleymills 0:e979170e02e7 356 " stw 16,%1 \n\t" \
ashleymills 0:e979170e02e7 357 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
ashleymills 0:e979170e02e7 358
ashleymills 0:e979170e02e7 359 #define PROPCARRY \
ashleymills 0:e979170e02e7 360 __asm__( \
ashleymills 0:e979170e02e7 361 " lwz 16,%1 \n\t" \
ashleymills 0:e979170e02e7 362 " addc 16,16,%0 \n\t" \
ashleymills 0:e979170e02e7 363 " stw 16,%1 \n\t" \
ashleymills 0:e979170e02e7 364 " xor %0,%0,%0 \n\t" \
ashleymills 0:e979170e02e7 365 " addze %0,%0 \n\t" \
ashleymills 0:e979170e02e7 366 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
ashleymills 0:e979170e02e7 367
ashleymills 0:e979170e02e7 368 #elif defined(TFM_PPC64)
ashleymills 0:e979170e02e7 369
ashleymills 0:e979170e02e7 370 /* PPC64 */
ashleymills 0:e979170e02e7 371 #define MONT_START
ashleymills 0:e979170e02e7 372 #define MONT_FINI
ashleymills 0:e979170e02e7 373 #define LOOP_END
ashleymills 0:e979170e02e7 374 #define LOOP_START \
ashleymills 0:e979170e02e7 375 mu = c[x] * mp
ashleymills 0:e979170e02e7 376
ashleymills 0:e979170e02e7 377 #define INNERMUL \
ashleymills 0:e979170e02e7 378 __asm__( \
ashleymills 0:e979170e02e7 379 " mulld 16,%3,%4 \n\t" \
ashleymills 0:e979170e02e7 380 " mulhdu 17,%3,%4 \n\t" \
ashleymills 0:e979170e02e7 381 " addc 16,16,%0 \n\t" \
ashleymills 0:e979170e02e7 382 " addze 17,17 \n\t" \
ashleymills 0:e979170e02e7 383 " ldx 18,0,%1 \n\t" \
ashleymills 0:e979170e02e7 384 " addc 16,16,18 \n\t" \
ashleymills 0:e979170e02e7 385 " addze %0,17 \n\t" \
ashleymills 0:e979170e02e7 386 " sdx 16,0,%1 \n\t" \
ashleymills 0:e979170e02e7 387 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
ashleymills 0:e979170e02e7 388
ashleymills 0:e979170e02e7 389 #define PROPCARRY \
ashleymills 0:e979170e02e7 390 __asm__( \
ashleymills 0:e979170e02e7 391 " ldx 16,0,%1 \n\t" \
ashleymills 0:e979170e02e7 392 " addc 16,16,%0 \n\t" \
ashleymills 0:e979170e02e7 393 " sdx 16,0,%1 \n\t" \
ashleymills 0:e979170e02e7 394 " xor %0,%0,%0 \n\t" \
ashleymills 0:e979170e02e7 395 " addze %0,%0 \n\t" \
ashleymills 0:e979170e02e7 396 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
ashleymills 0:e979170e02e7 397
ashleymills 0:e979170e02e7 398 /******************************************************************/
ashleymills 0:e979170e02e7 399
ashleymills 0:e979170e02e7 400 #elif defined(TFM_AVR32)
ashleymills 0:e979170e02e7 401
ashleymills 0:e979170e02e7 402 /* AVR32 */
ashleymills 0:e979170e02e7 403 #define MONT_START
ashleymills 0:e979170e02e7 404 #define MONT_FINI
ashleymills 0:e979170e02e7 405 #define LOOP_END
ashleymills 0:e979170e02e7 406 #define LOOP_START \
ashleymills 0:e979170e02e7 407 mu = c[x] * mp
ashleymills 0:e979170e02e7 408
ashleymills 0:e979170e02e7 409 #define INNERMUL \
ashleymills 0:e979170e02e7 410 __asm__( \
ashleymills 0:e979170e02e7 411 " ld.w r2,%1 \n\t" \
ashleymills 0:e979170e02e7 412 " add r2,%0 \n\t" \
ashleymills 0:e979170e02e7 413 " eor r3,r3 \n\t" \
ashleymills 0:e979170e02e7 414 " acr r3 \n\t" \
ashleymills 0:e979170e02e7 415 " macu.d r2,%3,%4 \n\t" \
ashleymills 0:e979170e02e7 416 " st.w %1,r2 \n\t" \
ashleymills 0:e979170e02e7 417 " mov %0,r3 \n\t" \
ashleymills 0:e979170e02e7 418 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
ashleymills 0:e979170e02e7 419
ashleymills 0:e979170e02e7 420 #define PROPCARRY \
ashleymills 0:e979170e02e7 421 __asm__( \
ashleymills 0:e979170e02e7 422 " ld.w r2,%1 \n\t" \
ashleymills 0:e979170e02e7 423 " add r2,%0 \n\t" \
ashleymills 0:e979170e02e7 424 " st.w %1,r2 \n\t" \
ashleymills 0:e979170e02e7 425 " eor %0,%0 \n\t" \
ashleymills 0:e979170e02e7 426 " acr %0 \n\t" \
ashleymills 0:e979170e02e7 427 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc");
ashleymills 0:e979170e02e7 428
ashleymills 0:e979170e02e7 429 #else
ashleymills 0:e979170e02e7 430
ashleymills 0:e979170e02e7 431 /* ISO C code */
ashleymills 0:e979170e02e7 432 #define MONT_START
ashleymills 0:e979170e02e7 433 #define MONT_FINI
ashleymills 0:e979170e02e7 434 #define LOOP_END
ashleymills 0:e979170e02e7 435 #define LOOP_START \
ashleymills 0:e979170e02e7 436 mu = c[x] * mp
ashleymills 0:e979170e02e7 437
ashleymills 0:e979170e02e7 438 #define INNERMUL \
ashleymills 0:e979170e02e7 439 do { fp_word t; \
ashleymills 0:e979170e02e7 440 _c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
ashleymills 0:e979170e02e7 441 (((fp_word)mu) * ((fp_word)*tmpm++)); \
ashleymills 0:e979170e02e7 442 cy = (t >> DIGIT_BIT); \
ashleymills 0:e979170e02e7 443 } while (0)
ashleymills 0:e979170e02e7 444
ashleymills 0:e979170e02e7 445 #define PROPCARRY \
ashleymills 0:e979170e02e7 446 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
ashleymills 0:e979170e02e7 447
ashleymills 0:e979170e02e7 448 #endif
ashleymills 0:e979170e02e7 449 /******************************************************************/
ashleymills 0:e979170e02e7 450
ashleymills 0:e979170e02e7 451
ashleymills 0:e979170e02e7 452 #define LO 0
ashleymills 0:e979170e02e7 453 /* end fp_montogomery_reduce.c asm */
ashleymills 0:e979170e02e7 454
ashleymills 0:e979170e02e7 455
ashleymills 0:e979170e02e7 456 /* start fp_sqr_comba.c asm */
ashleymills 0:e979170e02e7 457 #if defined(TFM_X86)
ashleymills 0:e979170e02e7 458
ashleymills 0:e979170e02e7 459 /* x86-32 optimized */
ashleymills 0:e979170e02e7 460
ashleymills 0:e979170e02e7 461 #define COMBA_START
ashleymills 0:e979170e02e7 462
ashleymills 0:e979170e02e7 463 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 464 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 465
ashleymills 0:e979170e02e7 466 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 467 x = c0;
ashleymills 0:e979170e02e7 468
ashleymills 0:e979170e02e7 469 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 470 x = c1;
ashleymills 0:e979170e02e7 471
ashleymills 0:e979170e02e7 472 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 473 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 474
ashleymills 0:e979170e02e7 475 #define COMBA_FINI
ashleymills 0:e979170e02e7 476
ashleymills 0:e979170e02e7 477 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 478 __asm__( \
ashleymills 0:e979170e02e7 479 "movl %6,%%eax \n\t" \
ashleymills 0:e979170e02e7 480 "mull %%eax \n\t" \
ashleymills 0:e979170e02e7 481 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 482 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 483 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 484 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
ashleymills 0:e979170e02e7 485
ashleymills 0:e979170e02e7 486 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 487 __asm__( \
ashleymills 0:e979170e02e7 488 "movl %6,%%eax \n\t" \
ashleymills 0:e979170e02e7 489 "mull %7 \n\t" \
ashleymills 0:e979170e02e7 490 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 491 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 492 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 493 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 494 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 495 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 496 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "%cc");
ashleymills 0:e979170e02e7 497
ashleymills 0:e979170e02e7 498 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 499 __asm__( \
ashleymills 0:e979170e02e7 500 "movl %3,%%eax \n\t" \
ashleymills 0:e979170e02e7 501 "mull %4 \n\t" \
ashleymills 0:e979170e02e7 502 "movl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 503 "movl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 504 "xorl %2,%2 \n\t" \
ashleymills 0:e979170e02e7 505 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","%cc");
ashleymills 0:e979170e02e7 506
ashleymills 0:e979170e02e7 507 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
ashleymills 0:e979170e02e7 508
ashleymills 0:e979170e02e7 509 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 510 __asm__( \
ashleymills 0:e979170e02e7 511 "movl %6,%%eax \n\t" \
ashleymills 0:e979170e02e7 512 "mull %7 \n\t" \
ashleymills 0:e979170e02e7 513 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 514 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 515 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 516 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
ashleymills 0:e979170e02e7 517
ashleymills 0:e979170e02e7 518 #define SQRADDDB \
ashleymills 0:e979170e02e7 519 __asm__( \
ashleymills 0:e979170e02e7 520 "addl %6,%0 \n\t" \
ashleymills 0:e979170e02e7 521 "adcl %7,%1 \n\t" \
ashleymills 0:e979170e02e7 522 "adcl %8,%2 \n\t" \
ashleymills 0:e979170e02e7 523 "addl %6,%0 \n\t" \
ashleymills 0:e979170e02e7 524 "adcl %7,%1 \n\t" \
ashleymills 0:e979170e02e7 525 "adcl %8,%2 \n\t" \
ashleymills 0:e979170e02e7 526 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
ashleymills 0:e979170e02e7 527
ashleymills 0:e979170e02e7 528 #elif defined(TFM_X86_64)
ashleymills 0:e979170e02e7 529 /* x86-64 optimized */
ashleymills 0:e979170e02e7 530
ashleymills 0:e979170e02e7 531 #define COMBA_START
ashleymills 0:e979170e02e7 532
ashleymills 0:e979170e02e7 533 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 534 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 535
ashleymills 0:e979170e02e7 536 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 537 x = c0;
ashleymills 0:e979170e02e7 538
ashleymills 0:e979170e02e7 539 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 540 x = c1;
ashleymills 0:e979170e02e7 541
ashleymills 0:e979170e02e7 542 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 543 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 544
ashleymills 0:e979170e02e7 545 #define COMBA_FINI
ashleymills 0:e979170e02e7 546
ashleymills 0:e979170e02e7 547 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 548 __asm__( \
ashleymills 0:e979170e02e7 549 "movq %6,%%rax \n\t" \
ashleymills 0:e979170e02e7 550 "mulq %%rax \n\t" \
ashleymills 0:e979170e02e7 551 "addq %%rax,%0 \n\t" \
ashleymills 0:e979170e02e7 552 "adcq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 553 "adcq $0,%2 \n\t" \
ashleymills 0:e979170e02e7 554 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc");
ashleymills 0:e979170e02e7 555
ashleymills 0:e979170e02e7 556 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 557 __asm__( \
ashleymills 0:e979170e02e7 558 "movq %6,%%rax \n\t" \
ashleymills 0:e979170e02e7 559 "mulq %7 \n\t" \
ashleymills 0:e979170e02e7 560 "addq %%rax,%0 \n\t" \
ashleymills 0:e979170e02e7 561 "adcq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 562 "adcq $0,%2 \n\t" \
ashleymills 0:e979170e02e7 563 "addq %%rax,%0 \n\t" \
ashleymills 0:e979170e02e7 564 "adcq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 565 "adcq $0,%2 \n\t" \
ashleymills 0:e979170e02e7 566 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
ashleymills 0:e979170e02e7 567
ashleymills 0:e979170e02e7 568 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 569 __asm__( \
ashleymills 0:e979170e02e7 570 "movq %3,%%rax \n\t" \
ashleymills 0:e979170e02e7 571 "mulq %4 \n\t" \
ashleymills 0:e979170e02e7 572 "movq %%rax,%0 \n\t" \
ashleymills 0:e979170e02e7 573 "movq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 574 "xorq %2,%2 \n\t" \
ashleymills 0:e979170e02e7 575 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","%cc");
ashleymills 0:e979170e02e7 576
ashleymills 0:e979170e02e7 577 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
ashleymills 0:e979170e02e7 578
ashleymills 0:e979170e02e7 579 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 580 __asm__( \
ashleymills 0:e979170e02e7 581 "movq %6,%%rax \n\t" \
ashleymills 0:e979170e02e7 582 "mulq %7 \n\t" \
ashleymills 0:e979170e02e7 583 "addq %%rax,%0 \n\t" \
ashleymills 0:e979170e02e7 584 "adcq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 585 "adcq $0,%2 \n\t" \
ashleymills 0:e979170e02e7 586 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
ashleymills 0:e979170e02e7 587
ashleymills 0:e979170e02e7 588 #define SQRADDDB \
ashleymills 0:e979170e02e7 589 __asm__( \
ashleymills 0:e979170e02e7 590 "addq %6,%0 \n\t" \
ashleymills 0:e979170e02e7 591 "adcq %7,%1 \n\t" \
ashleymills 0:e979170e02e7 592 "adcq %8,%2 \n\t" \
ashleymills 0:e979170e02e7 593 "addq %6,%0 \n\t" \
ashleymills 0:e979170e02e7 594 "adcq %7,%1 \n\t" \
ashleymills 0:e979170e02e7 595 "adcq %8,%2 \n\t" \
ashleymills 0:e979170e02e7 596 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
ashleymills 0:e979170e02e7 597
ashleymills 0:e979170e02e7 598 #elif defined(TFM_SSE2)
ashleymills 0:e979170e02e7 599
ashleymills 0:e979170e02e7 600 /* SSE2 Optimized */
ashleymills 0:e979170e02e7 601 #define COMBA_START
ashleymills 0:e979170e02e7 602
ashleymills 0:e979170e02e7 603 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 604 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 605
ashleymills 0:e979170e02e7 606 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 607 x = c0;
ashleymills 0:e979170e02e7 608
ashleymills 0:e979170e02e7 609 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 610 x = c1;
ashleymills 0:e979170e02e7 611
ashleymills 0:e979170e02e7 612 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 613 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 614
ashleymills 0:e979170e02e7 615 #define COMBA_FINI \
ashleymills 0:e979170e02e7 616 __asm__("emms");
ashleymills 0:e979170e02e7 617
ashleymills 0:e979170e02e7 618 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 619 __asm__( \
ashleymills 0:e979170e02e7 620 "movd %6,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 621 "pmuludq %%mm0,%%mm0\n\t" \
ashleymills 0:e979170e02e7 622 "movd %%mm0,%%eax \n\t" \
ashleymills 0:e979170e02e7 623 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 624 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 625 "movd %%mm0,%%eax \n\t" \
ashleymills 0:e979170e02e7 626 "adcl %%eax,%1 \n\t" \
ashleymills 0:e979170e02e7 627 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 628 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc");
ashleymills 0:e979170e02e7 629
ashleymills 0:e979170e02e7 630 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 631 __asm__( \
ashleymills 0:e979170e02e7 632 "movd %6,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 633 "movd %7,%%mm1 \n\t" \
ashleymills 0:e979170e02e7 634 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:e979170e02e7 635 "movd %%mm0,%%eax \n\t" \
ashleymills 0:e979170e02e7 636 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 637 "movd %%mm0,%%edx \n\t" \
ashleymills 0:e979170e02e7 638 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 639 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 640 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 641 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 642 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 643 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 644 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
ashleymills 0:e979170e02e7 645
ashleymills 0:e979170e02e7 646 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 647 __asm__( \
ashleymills 0:e979170e02e7 648 "movd %3,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 649 "movd %4,%%mm1 \n\t" \
ashleymills 0:e979170e02e7 650 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:e979170e02e7 651 "movd %%mm0,%0 \n\t" \
ashleymills 0:e979170e02e7 652 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 653 "movd %%mm0,%1 \n\t" \
ashleymills 0:e979170e02e7 654 "xorl %2,%2 \n\t" \
ashleymills 0:e979170e02e7 655 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j));
ashleymills 0:e979170e02e7 656
ashleymills 0:e979170e02e7 657 /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */
ashleymills 0:e979170e02e7 658
ashleymills 0:e979170e02e7 659 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 660 __asm__( \
ashleymills 0:e979170e02e7 661 "movd %6,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 662 "movd %7,%%mm1 \n\t" \
ashleymills 0:e979170e02e7 663 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:e979170e02e7 664 "movd %%mm0,%%eax \n\t" \
ashleymills 0:e979170e02e7 665 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 666 "movd %%mm0,%%edx \n\t" \
ashleymills 0:e979170e02e7 667 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 668 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 669 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 670 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc");
ashleymills 0:e979170e02e7 671
ashleymills 0:e979170e02e7 672 #define SQRADDDB \
ashleymills 0:e979170e02e7 673 __asm__( \
ashleymills 0:e979170e02e7 674 "addl %6,%0 \n\t" \
ashleymills 0:e979170e02e7 675 "adcl %7,%1 \n\t" \
ashleymills 0:e979170e02e7 676 "adcl %8,%2 \n\t" \
ashleymills 0:e979170e02e7 677 "addl %6,%0 \n\t" \
ashleymills 0:e979170e02e7 678 "adcl %7,%1 \n\t" \
ashleymills 0:e979170e02e7 679 "adcl %8,%2 \n\t" \
ashleymills 0:e979170e02e7 680 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
ashleymills 0:e979170e02e7 681
ashleymills 0:e979170e02e7 682 #elif defined(TFM_ARM)
ashleymills 0:e979170e02e7 683
ashleymills 0:e979170e02e7 684 /* ARM code */
ashleymills 0:e979170e02e7 685
ashleymills 0:e979170e02e7 686 #define COMBA_START
ashleymills 0:e979170e02e7 687
ashleymills 0:e979170e02e7 688 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 689 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 690
ashleymills 0:e979170e02e7 691 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 692 x = c0;
ashleymills 0:e979170e02e7 693
ashleymills 0:e979170e02e7 694 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 695 x = c1;
ashleymills 0:e979170e02e7 696
ashleymills 0:e979170e02e7 697 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 698 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 699
ashleymills 0:e979170e02e7 700 #define COMBA_FINI
ashleymills 0:e979170e02e7 701
ashleymills 0:e979170e02e7 702 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:e979170e02e7 703 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 704 __asm__( \
ashleymills 0:e979170e02e7 705 " UMULL r0,r1,%6,%6 \n\t" \
ashleymills 0:e979170e02e7 706 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:e979170e02e7 707 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:e979170e02e7 708 " ADC %2,%2,#0 \n\t" \
ashleymills 0:e979170e02e7 709 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
ashleymills 0:e979170e02e7 710
ashleymills 0:e979170e02e7 711 /* for squaring some of the terms are doubled... */
ashleymills 0:e979170e02e7 712 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 713 __asm__( \
ashleymills 0:e979170e02e7 714 " UMULL r0,r1,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 715 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:e979170e02e7 716 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:e979170e02e7 717 " ADC %2,%2,#0 \n\t" \
ashleymills 0:e979170e02e7 718 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:e979170e02e7 719 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:e979170e02e7 720 " ADC %2,%2,#0 \n\t" \
ashleymills 0:e979170e02e7 721 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
ashleymills 0:e979170e02e7 722
ashleymills 0:e979170e02e7 723 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 724 __asm__( \
ashleymills 0:e979170e02e7 725 " UMULL %0,%1,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 726 " SUB %2,%2,%2 \n\t" \
ashleymills 0:e979170e02e7 727 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
ashleymills 0:e979170e02e7 728
ashleymills 0:e979170e02e7 729 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 730 __asm__( \
ashleymills 0:e979170e02e7 731 " UMULL r0,r1,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 732 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:e979170e02e7 733 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:e979170e02e7 734 " ADC %2,%2,#0 \n\t" \
ashleymills 0:e979170e02e7 735 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
ashleymills 0:e979170e02e7 736
ashleymills 0:e979170e02e7 737 #define SQRADDDB \
ashleymills 0:e979170e02e7 738 __asm__( \
ashleymills 0:e979170e02e7 739 " ADDS %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 740 " ADCS %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 741 " ADC %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 742 " ADDS %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 743 " ADCS %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 744 " ADC %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 745 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
ashleymills 0:e979170e02e7 746
ashleymills 0:e979170e02e7 747 #elif defined(TFM_PPC32)
ashleymills 0:e979170e02e7 748
ashleymills 0:e979170e02e7 749 /* PPC32 */
ashleymills 0:e979170e02e7 750
ashleymills 0:e979170e02e7 751 #define COMBA_START
ashleymills 0:e979170e02e7 752
ashleymills 0:e979170e02e7 753 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 754 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 755
ashleymills 0:e979170e02e7 756 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 757 x = c0;
ashleymills 0:e979170e02e7 758
ashleymills 0:e979170e02e7 759 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 760 x = c1;
ashleymills 0:e979170e02e7 761
ashleymills 0:e979170e02e7 762 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 763 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 764
ashleymills 0:e979170e02e7 765 #define COMBA_FINI
ashleymills 0:e979170e02e7 766
ashleymills 0:e979170e02e7 767 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:e979170e02e7 768 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 769 __asm__( \
ashleymills 0:e979170e02e7 770 " mullw 16,%6,%6 \n\t" \
ashleymills 0:e979170e02e7 771 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 772 " mulhwu 16,%6,%6 \n\t" \
ashleymills 0:e979170e02e7 773 " adde %1,%1,16 \n\t" \
ashleymills 0:e979170e02e7 774 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 775 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
ashleymills 0:e979170e02e7 776
ashleymills 0:e979170e02e7 777 /* for squaring some of the terms are doubled... */
ashleymills 0:e979170e02e7 778 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 779 __asm__( \
ashleymills 0:e979170e02e7 780 " mullw 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 781 " mulhwu 17,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 782 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 783 " adde %1,%1,17 \n\t" \
ashleymills 0:e979170e02e7 784 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 785 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 786 " adde %1,%1,17 \n\t" \
ashleymills 0:e979170e02e7 787 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 788 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
ashleymills 0:e979170e02e7 789
ashleymills 0:e979170e02e7 790 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 791 __asm__( \
ashleymills 0:e979170e02e7 792 " mullw %0,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 793 " mulhwu %1,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 794 " xor %2,%2,%2 \n\t" \
ashleymills 0:e979170e02e7 795 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
ashleymills 0:e979170e02e7 796
ashleymills 0:e979170e02e7 797 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 798 __asm__( \
ashleymills 0:e979170e02e7 799 " mullw 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 800 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 801 " mulhwu 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 802 " adde %1,%1,16 \n\t" \
ashleymills 0:e979170e02e7 803 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 804 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
ashleymills 0:e979170e02e7 805
ashleymills 0:e979170e02e7 806 #define SQRADDDB \
ashleymills 0:e979170e02e7 807 __asm__( \
ashleymills 0:e979170e02e7 808 " addc %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 809 " adde %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 810 " adde %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 811 " addc %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 812 " adde %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 813 " adde %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 814 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
ashleymills 0:e979170e02e7 815
ashleymills 0:e979170e02e7 816 #elif defined(TFM_PPC64)
ashleymills 0:e979170e02e7 817 /* PPC64 */
ashleymills 0:e979170e02e7 818
ashleymills 0:e979170e02e7 819 #define COMBA_START
ashleymills 0:e979170e02e7 820
ashleymills 0:e979170e02e7 821 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 822 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 823
ashleymills 0:e979170e02e7 824 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 825 x = c0;
ashleymills 0:e979170e02e7 826
ashleymills 0:e979170e02e7 827 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 828 x = c1;
ashleymills 0:e979170e02e7 829
ashleymills 0:e979170e02e7 830 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 831 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 832
ashleymills 0:e979170e02e7 833 #define COMBA_FINI
ashleymills 0:e979170e02e7 834
ashleymills 0:e979170e02e7 835 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:e979170e02e7 836 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 837 __asm__( \
ashleymills 0:e979170e02e7 838 " mulld 16,%6,%6 \n\t" \
ashleymills 0:e979170e02e7 839 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 840 " mulhdu 16,%6,%6 \n\t" \
ashleymills 0:e979170e02e7 841 " adde %1,%1,16 \n\t" \
ashleymills 0:e979170e02e7 842 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 843 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc");
ashleymills 0:e979170e02e7 844
ashleymills 0:e979170e02e7 845 /* for squaring some of the terms are doubled... */
ashleymills 0:e979170e02e7 846 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 847 __asm__( \
ashleymills 0:e979170e02e7 848 " mulld 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 849 " mulhdu 17,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 850 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 851 " adde %1,%1,17 \n\t" \
ashleymills 0:e979170e02e7 852 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 853 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 854 " adde %1,%1,17 \n\t" \
ashleymills 0:e979170e02e7 855 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 856 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc");
ashleymills 0:e979170e02e7 857
ashleymills 0:e979170e02e7 858 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 859 __asm__( \
ashleymills 0:e979170e02e7 860 " mulld %0,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 861 " mulhdu %1,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 862 " xor %2,%2,%2 \n\t" \
ashleymills 0:e979170e02e7 863 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
ashleymills 0:e979170e02e7 864
ashleymills 0:e979170e02e7 865 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 866 __asm__( \
ashleymills 0:e979170e02e7 867 " mulld 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 868 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 869 " mulhdu 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 870 " adde %1,%1,16 \n\t" \
ashleymills 0:e979170e02e7 871 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 872 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc");
ashleymills 0:e979170e02e7 873
ashleymills 0:e979170e02e7 874 #define SQRADDDB \
ashleymills 0:e979170e02e7 875 __asm__( \
ashleymills 0:e979170e02e7 876 " addc %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 877 " adde %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 878 " adde %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 879 " addc %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 880 " adde %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 881 " adde %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 882 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
ashleymills 0:e979170e02e7 883
ashleymills 0:e979170e02e7 884
ashleymills 0:e979170e02e7 885 #elif defined(TFM_AVR32)
ashleymills 0:e979170e02e7 886
ashleymills 0:e979170e02e7 887 /* AVR32 */
ashleymills 0:e979170e02e7 888
ashleymills 0:e979170e02e7 889 #define COMBA_START
ashleymills 0:e979170e02e7 890
ashleymills 0:e979170e02e7 891 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 892 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 893
ashleymills 0:e979170e02e7 894 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 895 x = c0;
ashleymills 0:e979170e02e7 896
ashleymills 0:e979170e02e7 897 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 898 x = c1;
ashleymills 0:e979170e02e7 899
ashleymills 0:e979170e02e7 900 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 901 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 902
ashleymills 0:e979170e02e7 903 #define COMBA_FINI
ashleymills 0:e979170e02e7 904
ashleymills 0:e979170e02e7 905 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:e979170e02e7 906 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 907 __asm__( \
ashleymills 0:e979170e02e7 908 " mulu.d r2,%6,%6 \n\t" \
ashleymills 0:e979170e02e7 909 " add %0,%0,r2 \n\t" \
ashleymills 0:e979170e02e7 910 " adc %1,%1,r3 \n\t" \
ashleymills 0:e979170e02e7 911 " acr %2 \n\t" \
ashleymills 0:e979170e02e7 912 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
ashleymills 0:e979170e02e7 913
ashleymills 0:e979170e02e7 914 /* for squaring some of the terms are doubled... */
ashleymills 0:e979170e02e7 915 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 916 __asm__( \
ashleymills 0:e979170e02e7 917 " mulu.d r2,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 918 " add %0,%0,r2 \n\t" \
ashleymills 0:e979170e02e7 919 " adc %1,%1,r3 \n\t" \
ashleymills 0:e979170e02e7 920 " acr %2, \n\t" \
ashleymills 0:e979170e02e7 921 " add %0,%0,r2 \n\t" \
ashleymills 0:e979170e02e7 922 " adc %1,%1,r3 \n\t" \
ashleymills 0:e979170e02e7 923 " acr %2, \n\t" \
ashleymills 0:e979170e02e7 924 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
ashleymills 0:e979170e02e7 925
ashleymills 0:e979170e02e7 926 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 927 __asm__( \
ashleymills 0:e979170e02e7 928 " mulu.d r2,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 929 " mov %0,r2 \n\t" \
ashleymills 0:e979170e02e7 930 " mov %1,r3 \n\t" \
ashleymills 0:e979170e02e7 931 " eor %2,%2 \n\t" \
ashleymills 0:e979170e02e7 932 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
ashleymills 0:e979170e02e7 933
ashleymills 0:e979170e02e7 934 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 935 __asm__( \
ashleymills 0:e979170e02e7 936 " mulu.d r2,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 937 " add %0,%0,r2 \n\t" \
ashleymills 0:e979170e02e7 938 " adc %1,%1,r3 \n\t" \
ashleymills 0:e979170e02e7 939 " acr %2 \n\t" \
ashleymills 0:e979170e02e7 940 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
ashleymills 0:e979170e02e7 941
ashleymills 0:e979170e02e7 942 #define SQRADDDB \
ashleymills 0:e979170e02e7 943 __asm__( \
ashleymills 0:e979170e02e7 944 " add %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 945 " adc %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 946 " adc %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 947 " add %0,%0,%3 \n\t" \
ashleymills 0:e979170e02e7 948 " adc %1,%1,%4 \n\t" \
ashleymills 0:e979170e02e7 949 " adc %2,%2,%5 \n\t" \
ashleymills 0:e979170e02e7 950 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
ashleymills 0:e979170e02e7 951
ashleymills 0:e979170e02e7 952
ashleymills 0:e979170e02e7 953 #else
ashleymills 0:e979170e02e7 954
ashleymills 0:e979170e02e7 955 #define TFM_ISO
ashleymills 0:e979170e02e7 956
ashleymills 0:e979170e02e7 957 /* ISO C portable code */
ashleymills 0:e979170e02e7 958
ashleymills 0:e979170e02e7 959 #define COMBA_START
ashleymills 0:e979170e02e7 960
ashleymills 0:e979170e02e7 961 #define CLEAR_CARRY \
ashleymills 0:e979170e02e7 962 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 963
ashleymills 0:e979170e02e7 964 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 965 x = c0;
ashleymills 0:e979170e02e7 966
ashleymills 0:e979170e02e7 967 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 968 x = c1;
ashleymills 0:e979170e02e7 969
ashleymills 0:e979170e02e7 970 #define CARRY_FORWARD \
ashleymills 0:e979170e02e7 971 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 972
ashleymills 0:e979170e02e7 973 #define COMBA_FINI
ashleymills 0:e979170e02e7 974
ashleymills 0:e979170e02e7 975 /* multiplies point i and j, updates carry "c1" and digit c2 */
ashleymills 0:e979170e02e7 976 #define SQRADD(i, j) \
ashleymills 0:e979170e02e7 977 do { fp_word t; \
ashleymills 0:e979170e02e7 978 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
ashleymills 0:e979170e02e7 979 t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
ashleymills 0:e979170e02e7 980 } while (0);
ashleymills 0:e979170e02e7 981
ashleymills 0:e979170e02e7 982
ashleymills 0:e979170e02e7 983 /* for squaring some of the terms are doubled... */
ashleymills 0:e979170e02e7 984 #define SQRADD2(i, j) \
ashleymills 0:e979170e02e7 985 do { fp_word t; \
ashleymills 0:e979170e02e7 986 t = ((fp_word)i) * ((fp_word)j); \
ashleymills 0:e979170e02e7 987 tt = (fp_word)c0 + t; c0 = tt; \
ashleymills 0:e979170e02e7 988 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
ashleymills 0:e979170e02e7 989 tt = (fp_word)c0 + t; c0 = tt; \
ashleymills 0:e979170e02e7 990 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \
ashleymills 0:e979170e02e7 991 } while (0);
ashleymills 0:e979170e02e7 992
ashleymills 0:e979170e02e7 993 #define SQRADDSC(i, j) \
ashleymills 0:e979170e02e7 994 do { fp_word t; \
ashleymills 0:e979170e02e7 995 t = ((fp_word)i) * ((fp_word)j); \
ashleymills 0:e979170e02e7 996 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \
ashleymills 0:e979170e02e7 997 } while (0);
ashleymills 0:e979170e02e7 998
ashleymills 0:e979170e02e7 999 #define SQRADDAC(i, j) \
ashleymills 0:e979170e02e7 1000 do { fp_word t; \
ashleymills 0:e979170e02e7 1001 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \
ashleymills 0:e979170e02e7 1002 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \
ashleymills 0:e979170e02e7 1003 } while (0);
ashleymills 0:e979170e02e7 1004
ashleymills 0:e979170e02e7 1005 #define SQRADDDB \
ashleymills 0:e979170e02e7 1006 do { fp_word t; \
ashleymills 0:e979170e02e7 1007 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \
ashleymills 0:e979170e02e7 1008 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \
ashleymills 0:e979170e02e7 1009 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \
ashleymills 0:e979170e02e7 1010 } while (0);
ashleymills 0:e979170e02e7 1011
ashleymills 0:e979170e02e7 1012 #endif
ashleymills 0:e979170e02e7 1013
ashleymills 0:e979170e02e7 1014 #ifdef TFM_SMALL_SET
ashleymills 0:e979170e02e7 1015 #include "fp_sqr_comba_small_set.i"
ashleymills 0:e979170e02e7 1016 #include "fp_sqr_comba_3.i"
ashleymills 0:e979170e02e7 1017 #include "fp_sqr_comba_4.i"
ashleymills 0:e979170e02e7 1018 #include "fp_sqr_comba_6.i"
ashleymills 0:e979170e02e7 1019 #include "fp_sqr_comba_7.i"
ashleymills 0:e979170e02e7 1020 #include "fp_sqr_comba_8.i"
ashleymills 0:e979170e02e7 1021 #include "fp_sqr_comba_9.i"
ashleymills 0:e979170e02e7 1022 #include "fp_sqr_comba_12.i"
ashleymills 0:e979170e02e7 1023 #include "fp_sqr_comba_17.i"
ashleymills 0:e979170e02e7 1024 #include "fp_sqr_comba_20.i"
ashleymills 0:e979170e02e7 1025 #include "fp_sqr_comba_24.i"
ashleymills 0:e979170e02e7 1026 #include "fp_sqr_comba_28.i"
ashleymills 0:e979170e02e7 1027 #include "fp_sqr_comba_32.i"
ashleymills 0:e979170e02e7 1028 #include "fp_sqr_comba_48.i"
ashleymills 0:e979170e02e7 1029 #include "fp_sqr_comba_64.i"
ashleymills 0:e979170e02e7 1030 #endif
ashleymills 0:e979170e02e7 1031 /* end fp_sqr_comba.c asm */
ashleymills 0:e979170e02e7 1032
ashleymills 0:e979170e02e7 1033 /* start fp_mul_comba.c asm */
ashleymills 0:e979170e02e7 1034 /* these are the combas. Worship them. */
ashleymills 0:e979170e02e7 1035 #if defined(TFM_X86)
ashleymills 0:e979170e02e7 1036 /* Generic x86 optimized code */
ashleymills 0:e979170e02e7 1037
ashleymills 0:e979170e02e7 1038 /* anything you need at the start */
ashleymills 0:e979170e02e7 1039 #define COMBA_START
ashleymills 0:e979170e02e7 1040
ashleymills 0:e979170e02e7 1041 /* clear the chaining variables */
ashleymills 0:e979170e02e7 1042 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1043 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1044
ashleymills 0:e979170e02e7 1045 /* forward the carry to the next digit */
ashleymills 0:e979170e02e7 1046 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1047 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1048
ashleymills 0:e979170e02e7 1049 /* store the first sum */
ashleymills 0:e979170e02e7 1050 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1051 x = c0;
ashleymills 0:e979170e02e7 1052
ashleymills 0:e979170e02e7 1053 /* store the second sum [carry] */
ashleymills 0:e979170e02e7 1054 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1055 x = c1;
ashleymills 0:e979170e02e7 1056
ashleymills 0:e979170e02e7 1057 /* anything you need at the end */
ashleymills 0:e979170e02e7 1058 #define COMBA_FINI
ashleymills 0:e979170e02e7 1059
ashleymills 0:e979170e02e7 1060 /* this should multiply i and j */
ashleymills 0:e979170e02e7 1061 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1062 __asm__( \
ashleymills 0:e979170e02e7 1063 "movl %6,%%eax \n\t" \
ashleymills 0:e979170e02e7 1064 "mull %7 \n\t" \
ashleymills 0:e979170e02e7 1065 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 1066 "adcl %%edx,%1 \n\t" \
ashleymills 0:e979170e02e7 1067 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 1068 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
ashleymills 0:e979170e02e7 1069
ashleymills 0:e979170e02e7 1070 #elif defined(TFM_X86_64)
ashleymills 0:e979170e02e7 1071 /* x86-64 optimized */
ashleymills 0:e979170e02e7 1072
ashleymills 0:e979170e02e7 1073 /* anything you need at the start */
ashleymills 0:e979170e02e7 1074 #define COMBA_START
ashleymills 0:e979170e02e7 1075
ashleymills 0:e979170e02e7 1076 /* clear the chaining variables */
ashleymills 0:e979170e02e7 1077 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1078 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1079
ashleymills 0:e979170e02e7 1080 /* forward the carry to the next digit */
ashleymills 0:e979170e02e7 1081 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1082 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1083
ashleymills 0:e979170e02e7 1084 /* store the first sum */
ashleymills 0:e979170e02e7 1085 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1086 x = c0;
ashleymills 0:e979170e02e7 1087
ashleymills 0:e979170e02e7 1088 /* store the second sum [carry] */
ashleymills 0:e979170e02e7 1089 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1090 x = c1;
ashleymills 0:e979170e02e7 1091
ashleymills 0:e979170e02e7 1092 /* anything you need at the end */
ashleymills 0:e979170e02e7 1093 #define COMBA_FINI
ashleymills 0:e979170e02e7 1094
ashleymills 0:e979170e02e7 1095 /* this should multiply i and j */
ashleymills 0:e979170e02e7 1096 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1097 __asm__ ( \
ashleymills 0:e979170e02e7 1098 "movq %6,%%rax \n\t" \
ashleymills 0:e979170e02e7 1099 "mulq %7 \n\t" \
ashleymills 0:e979170e02e7 1100 "addq %%rax,%0 \n\t" \
ashleymills 0:e979170e02e7 1101 "adcq %%rdx,%1 \n\t" \
ashleymills 0:e979170e02e7 1102 "adcq $0,%2 \n\t" \
ashleymills 0:e979170e02e7 1103 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc");
ashleymills 0:e979170e02e7 1104
ashleymills 0:e979170e02e7 1105 #elif defined(TFM_SSE2)
ashleymills 0:e979170e02e7 1106 /* use SSE2 optimizations */
ashleymills 0:e979170e02e7 1107
ashleymills 0:e979170e02e7 1108 /* anything you need at the start */
ashleymills 0:e979170e02e7 1109 #define COMBA_START
ashleymills 0:e979170e02e7 1110
ashleymills 0:e979170e02e7 1111 /* clear the chaining variables */
ashleymills 0:e979170e02e7 1112 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1113 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1114
ashleymills 0:e979170e02e7 1115 /* forward the carry to the next digit */
ashleymills 0:e979170e02e7 1116 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1117 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1118
ashleymills 0:e979170e02e7 1119 /* store the first sum */
ashleymills 0:e979170e02e7 1120 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1121 x = c0;
ashleymills 0:e979170e02e7 1122
ashleymills 0:e979170e02e7 1123 /* store the second sum [carry] */
ashleymills 0:e979170e02e7 1124 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1125 x = c1;
ashleymills 0:e979170e02e7 1126
ashleymills 0:e979170e02e7 1127 /* anything you need at the end */
ashleymills 0:e979170e02e7 1128 #define COMBA_FINI \
ashleymills 0:e979170e02e7 1129 __asm__("emms");
ashleymills 0:e979170e02e7 1130
ashleymills 0:e979170e02e7 1131 /* this should multiply i and j */
ashleymills 0:e979170e02e7 1132 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1133 __asm__( \
ashleymills 0:e979170e02e7 1134 "movd %6,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 1135 "movd %7,%%mm1 \n\t" \
ashleymills 0:e979170e02e7 1136 "pmuludq %%mm1,%%mm0\n\t" \
ashleymills 0:e979170e02e7 1137 "movd %%mm0,%%eax \n\t" \
ashleymills 0:e979170e02e7 1138 "psrlq $32,%%mm0 \n\t" \
ashleymills 0:e979170e02e7 1139 "addl %%eax,%0 \n\t" \
ashleymills 0:e979170e02e7 1140 "movd %%mm0,%%eax \n\t" \
ashleymills 0:e979170e02e7 1141 "adcl %%eax,%1 \n\t" \
ashleymills 0:e979170e02e7 1142 "adcl $0,%2 \n\t" \
ashleymills 0:e979170e02e7 1143 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc");
ashleymills 0:e979170e02e7 1144
ashleymills 0:e979170e02e7 1145 #elif defined(TFM_ARM)
ashleymills 0:e979170e02e7 1146 /* ARM code */
ashleymills 0:e979170e02e7 1147
ashleymills 0:e979170e02e7 1148 #define COMBA_START
ashleymills 0:e979170e02e7 1149
ashleymills 0:e979170e02e7 1150 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1151 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1152
ashleymills 0:e979170e02e7 1153 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1154 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1155
ashleymills 0:e979170e02e7 1156 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1157 x = c0;
ashleymills 0:e979170e02e7 1158
ashleymills 0:e979170e02e7 1159 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1160 x = c1;
ashleymills 0:e979170e02e7 1161
ashleymills 0:e979170e02e7 1162 #define COMBA_FINI
ashleymills 0:e979170e02e7 1163
ashleymills 0:e979170e02e7 1164 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1165 __asm__( \
ashleymills 0:e979170e02e7 1166 " UMULL r0,r1,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 1167 " ADDS %0,%0,r0 \n\t" \
ashleymills 0:e979170e02e7 1168 " ADCS %1,%1,r1 \n\t" \
ashleymills 0:e979170e02e7 1169 " ADC %2,%2,#0 \n\t" \
ashleymills 0:e979170e02e7 1170 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
ashleymills 0:e979170e02e7 1171
ashleymills 0:e979170e02e7 1172 #elif defined(TFM_PPC32)
ashleymills 0:e979170e02e7 1173 /* For 32-bit PPC */
ashleymills 0:e979170e02e7 1174
ashleymills 0:e979170e02e7 1175 #define COMBA_START
ashleymills 0:e979170e02e7 1176
ashleymills 0:e979170e02e7 1177 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1178 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1179
ashleymills 0:e979170e02e7 1180 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1181 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1182
ashleymills 0:e979170e02e7 1183 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1184 x = c0;
ashleymills 0:e979170e02e7 1185
ashleymills 0:e979170e02e7 1186 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1187 x = c1;
ashleymills 0:e979170e02e7 1188
ashleymills 0:e979170e02e7 1189 #define COMBA_FINI
ashleymills 0:e979170e02e7 1190
ashleymills 0:e979170e02e7 1191 /* untested: will mulhwu change the flags? Docs say no */
ashleymills 0:e979170e02e7 1192 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1193 __asm__( \
ashleymills 0:e979170e02e7 1194 " mullw 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 1195 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 1196 " mulhwu 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 1197 " adde %1,%1,16 \n\t" \
ashleymills 0:e979170e02e7 1198 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 1199 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
ashleymills 0:e979170e02e7 1200
ashleymills 0:e979170e02e7 1201 #elif defined(TFM_PPC64)
ashleymills 0:e979170e02e7 1202 /* For 64-bit PPC */
ashleymills 0:e979170e02e7 1203
ashleymills 0:e979170e02e7 1204 #define COMBA_START
ashleymills 0:e979170e02e7 1205
ashleymills 0:e979170e02e7 1206 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1207 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1208
ashleymills 0:e979170e02e7 1209 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1210 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1211
ashleymills 0:e979170e02e7 1212 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1213 x = c0;
ashleymills 0:e979170e02e7 1214
ashleymills 0:e979170e02e7 1215 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1216 x = c1;
ashleymills 0:e979170e02e7 1217
ashleymills 0:e979170e02e7 1218 #define COMBA_FINI
ashleymills 0:e979170e02e7 1219
ashleymills 0:e979170e02e7 1220 /* untested: will mulhwu change the flags? Docs say no */
ashleymills 0:e979170e02e7 1221 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1222 ____asm__( \
ashleymills 0:e979170e02e7 1223 " mulld 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 1224 " addc %0,%0,16 \n\t" \
ashleymills 0:e979170e02e7 1225 " mulhdu 16,%6,%7 \n\t" \
ashleymills 0:e979170e02e7 1226 " adde %1,%1,16 \n\t" \
ashleymills 0:e979170e02e7 1227 " addze %2,%2 \n\t" \
ashleymills 0:e979170e02e7 1228 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
ashleymills 0:e979170e02e7 1229
ashleymills 0:e979170e02e7 1230 #elif defined(TFM_AVR32)
ashleymills 0:e979170e02e7 1231
ashleymills 0:e979170e02e7 1232 /* ISO C code */
ashleymills 0:e979170e02e7 1233
ashleymills 0:e979170e02e7 1234 #define COMBA_START
ashleymills 0:e979170e02e7 1235
ashleymills 0:e979170e02e7 1236 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1237 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1238
ashleymills 0:e979170e02e7 1239 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1240 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1241
ashleymills 0:e979170e02e7 1242 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1243 x = c0;
ashleymills 0:e979170e02e7 1244
ashleymills 0:e979170e02e7 1245 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1246 x = c1;
ashleymills 0:e979170e02e7 1247
ashleymills 0:e979170e02e7 1248 #define COMBA_FINI
ashleymills 0:e979170e02e7 1249
ashleymills 0:e979170e02e7 1250 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1251 ____asm__( \
ashleymills 0:e979170e02e7 1252 " mulu.d r2,%6,%7 \n\t"\
ashleymills 0:e979170e02e7 1253 " add %0,r2 \n\t"\
ashleymills 0:e979170e02e7 1254 " adc %1,%1,r3 \n\t"\
ashleymills 0:e979170e02e7 1255 " acr %2 \n\t"\
ashleymills 0:e979170e02e7 1256 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
ashleymills 0:e979170e02e7 1257
ashleymills 0:e979170e02e7 1258 #else
ashleymills 0:e979170e02e7 1259 /* ISO C code */
ashleymills 0:e979170e02e7 1260
ashleymills 0:e979170e02e7 1261 #define COMBA_START
ashleymills 0:e979170e02e7 1262
ashleymills 0:e979170e02e7 1263 #define COMBA_CLEAR \
ashleymills 0:e979170e02e7 1264 c0 = c1 = c2 = 0;
ashleymills 0:e979170e02e7 1265
ashleymills 0:e979170e02e7 1266 #define COMBA_FORWARD \
ashleymills 0:e979170e02e7 1267 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
ashleymills 0:e979170e02e7 1268
ashleymills 0:e979170e02e7 1269 #define COMBA_STORE(x) \
ashleymills 0:e979170e02e7 1270 x = c0;
ashleymills 0:e979170e02e7 1271
ashleymills 0:e979170e02e7 1272 #define COMBA_STORE2(x) \
ashleymills 0:e979170e02e7 1273 x = c1;
ashleymills 0:e979170e02e7 1274
ashleymills 0:e979170e02e7 1275 #define COMBA_FINI
ashleymills 0:e979170e02e7 1276
ashleymills 0:e979170e02e7 1277 #define MULADD(i, j) \
ashleymills 0:e979170e02e7 1278 do { fp_word t; \
ashleymills 0:e979170e02e7 1279 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
ashleymills 0:e979170e02e7 1280 t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
ashleymills 0:e979170e02e7 1281 } while (0);
ashleymills 0:e979170e02e7 1282
ashleymills 0:e979170e02e7 1283 #endif
ashleymills 0:e979170e02e7 1284
ashleymills 0:e979170e02e7 1285
ashleymills 0:e979170e02e7 1286 #ifdef TFM_SMALL_SET
ashleymills 0:e979170e02e7 1287 #include "fp_mul_comba_small_set.i"
ashleymills 0:e979170e02e7 1288 #include "fp_mul_comba_3.i"
ashleymills 0:e979170e02e7 1289 #include "fp_mul_comba_4.i"
ashleymills 0:e979170e02e7 1290 #include "fp_mul_comba_6.i"
ashleymills 0:e979170e02e7 1291 #include "fp_mul_comba_7.i"
ashleymills 0:e979170e02e7 1292 #include "fp_mul_comba_8.i"
ashleymills 0:e979170e02e7 1293 #include "fp_mul_comba_9.i"
ashleymills 0:e979170e02e7 1294 #include "fp_mul_comba_12.i"
ashleymills 0:e979170e02e7 1295 #include "fp_mul_comba_17.i"
ashleymills 0:e979170e02e7 1296 #include "fp_mul_comba_20.i"
ashleymills 0:e979170e02e7 1297 #include "fp_mul_comba_24.i"
ashleymills 0:e979170e02e7 1298 #include "fp_mul_comba_28.i"
ashleymills 0:e979170e02e7 1299 #include "fp_mul_comba_32.i"
ashleymills 0:e979170e02e7 1300 #include "fp_mul_comba_48.i"
ashleymills 0:e979170e02e7 1301 #include "fp_mul_comba_64.i"
ashleymills 0:e979170e02e7 1302 #endif
ashleymills 0:e979170e02e7 1303
ashleymills 0:e979170e02e7 1304 /* end fp_mul_comba.c asm */
ashleymills 0:e979170e02e7 1305