cyassl re-port with cellular comms, PSK test
Dependencies: VodafoneUSBModem_bleedingedge2 mbed-rtos mbed-src
cyassllib/ctaocrypt/src/asm.c@0:e979170e02e7, 2013-04-26 (annotated)
- Committer:
- ashleymills
- Date:
- Fri Apr 26 16:54:58 2013 +0000
- Revision:
- 0:e979170e02e7
Basic operation of SSL with PSK working for cellular.
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
ashleymills | 0:e979170e02e7 | 1 | /* asm.c |
ashleymills | 0:e979170e02e7 | 2 | * |
ashleymills | 0:e979170e02e7 | 3 | * Copyright (C) 2006-2012 Sawtooth Consulting Ltd. |
ashleymills | 0:e979170e02e7 | 4 | * |
ashleymills | 0:e979170e02e7 | 5 | * This file is part of CyaSSL. |
ashleymills | 0:e979170e02e7 | 6 | * |
ashleymills | 0:e979170e02e7 | 7 | * CyaSSL is free software; you can redistribute it and/or modify |
ashleymills | 0:e979170e02e7 | 8 | * it under the terms of the GNU General Public License as published by |
ashleymills | 0:e979170e02e7 | 9 | * the Free Software Foundation; either version 2 of the License, or |
ashleymills | 0:e979170e02e7 | 10 | * (at your option) any later version. |
ashleymills | 0:e979170e02e7 | 11 | * |
ashleymills | 0:e979170e02e7 | 12 | * CyaSSL is distributed in the hope that it will be useful, |
ashleymills | 0:e979170e02e7 | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
ashleymills | 0:e979170e02e7 | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
ashleymills | 0:e979170e02e7 | 15 | * GNU General Public License for more details. |
ashleymills | 0:e979170e02e7 | 16 | * |
ashleymills | 0:e979170e02e7 | 17 | * You should have received a copy of the GNU General Public License |
ashleymills | 0:e979170e02e7 | 18 | * along with this program; if not, write to the Free Software |
ashleymills | 0:e979170e02e7 | 19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
ashleymills | 0:e979170e02e7 | 20 | */ |
ashleymills | 0:e979170e02e7 | 21 | |
ashleymills | 0:e979170e02e7 | 22 | #ifdef HAVE_CONFIG_H |
ashleymills | 0:e979170e02e7 | 23 | #include <config.h> |
ashleymills | 0:e979170e02e7 | 24 | #endif |
ashleymills | 0:e979170e02e7 | 25 | |
ashleymills | 0:e979170e02e7 | 26 | /* |
ashleymills | 0:e979170e02e7 | 27 | * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca, |
ashleymills | 0:e979170e02e7 | 28 | * http://math.libtomcrypt.com |
ashleymills | 0:e979170e02e7 | 29 | */ |
ashleymills | 0:e979170e02e7 | 30 | |
ashleymills | 0:e979170e02e7 | 31 | |
ashleymills | 0:e979170e02e7 | 32 | /******************************************************************/ |
ashleymills | 0:e979170e02e7 | 33 | /* fp_montgomery_reduce.c asm or generic */ |
ashleymills | 0:e979170e02e7 | 34 | #if defined(TFM_X86) && !defined(TFM_SSE2) |
ashleymills | 0:e979170e02e7 | 35 | /* x86-32 code */ |
ashleymills | 0:e979170e02e7 | 36 | |
ashleymills | 0:e979170e02e7 | 37 | #define MONT_START |
ashleymills | 0:e979170e02e7 | 38 | #define MONT_FINI |
ashleymills | 0:e979170e02e7 | 39 | #define LOOP_END |
ashleymills | 0:e979170e02e7 | 40 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 41 | mu = c[x] * mp |
ashleymills | 0:e979170e02e7 | 42 | |
ashleymills | 0:e979170e02e7 | 43 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 44 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 45 | "movl %5,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 46 | "mull %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 47 | "addl %1,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 48 | "adcl $0,%%edx \n\t" \ |
ashleymills | 0:e979170e02e7 | 49 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 50 | "adcl $0,%%edx \n\t" \ |
ashleymills | 0:e979170e02e7 | 51 | "movl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 52 | :"=g"(_c[LO]), "=r"(cy) \ |
ashleymills | 0:e979170e02e7 | 53 | :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ |
ashleymills | 0:e979170e02e7 | 54 | : "%eax", "%edx", "%cc") |
ashleymills | 0:e979170e02e7 | 55 | |
ashleymills | 0:e979170e02e7 | 56 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 57 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 58 | "addl %1,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 59 | "setb %%al \n\t" \ |
ashleymills | 0:e979170e02e7 | 60 | "movzbl %%al,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 61 | :"=g"(_c[LO]), "=r"(cy) \ |
ashleymills | 0:e979170e02e7 | 62 | :"0"(_c[LO]), "1"(cy) \ |
ashleymills | 0:e979170e02e7 | 63 | : "%eax", "%cc") |
ashleymills | 0:e979170e02e7 | 64 | |
ashleymills | 0:e979170e02e7 | 65 | /******************************************************************/ |
ashleymills | 0:e979170e02e7 | 66 | #elif defined(TFM_X86_64) |
ashleymills | 0:e979170e02e7 | 67 | /* x86-64 code */ |
ashleymills | 0:e979170e02e7 | 68 | |
ashleymills | 0:e979170e02e7 | 69 | #define MONT_START |
ashleymills | 0:e979170e02e7 | 70 | #define MONT_FINI |
ashleymills | 0:e979170e02e7 | 71 | #define LOOP_END |
ashleymills | 0:e979170e02e7 | 72 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 73 | mu = c[x] * mp |
ashleymills | 0:e979170e02e7 | 74 | |
ashleymills | 0:e979170e02e7 | 75 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 76 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 77 | "movq %5,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 78 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 79 | "addq %1,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 80 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 81 | "addq %%rax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 82 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 83 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 84 | :"=g"(_c[LO]), "=r"(cy) \ |
ashleymills | 0:e979170e02e7 | 85 | :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ |
ashleymills | 0:e979170e02e7 | 86 | : "%rax", "%rdx", "%cc") |
ashleymills | 0:e979170e02e7 | 87 | |
ashleymills | 0:e979170e02e7 | 88 | #define INNERMUL8 \ |
ashleymills | 0:e979170e02e7 | 89 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 90 | "movq 0(%5),%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 91 | "movq 0(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 92 | "movq 0x8(%5),%%r11 \n\t" \ |
ashleymills | 0:e979170e02e7 | 93 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 94 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 95 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 96 | "movq 0x8(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 97 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 98 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 99 | "movq %%rax,0(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 100 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 101 | \ |
ashleymills | 0:e979170e02e7 | 102 | "movq %%r11,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 103 | "movq 0x10(%5),%%r11 \n\t" \ |
ashleymills | 0:e979170e02e7 | 104 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 105 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 106 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 107 | "movq 0x10(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 108 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 109 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 110 | "movq %%rax,0x8(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 111 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 112 | \ |
ashleymills | 0:e979170e02e7 | 113 | "movq %%r11,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 114 | "movq 0x18(%5),%%r11 \n\t" \ |
ashleymills | 0:e979170e02e7 | 115 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 116 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 117 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 118 | "movq 0x18(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 119 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 120 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 121 | "movq %%rax,0x10(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 122 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 123 | \ |
ashleymills | 0:e979170e02e7 | 124 | "movq %%r11,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 125 | "movq 0x20(%5),%%r11 \n\t" \ |
ashleymills | 0:e979170e02e7 | 126 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 127 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 128 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 129 | "movq 0x20(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 130 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 131 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 132 | "movq %%rax,0x18(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 133 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 134 | \ |
ashleymills | 0:e979170e02e7 | 135 | "movq %%r11,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 136 | "movq 0x28(%5),%%r11 \n\t" \ |
ashleymills | 0:e979170e02e7 | 137 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 138 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 139 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 140 | "movq 0x28(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 141 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 142 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 143 | "movq %%rax,0x20(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 144 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 145 | \ |
ashleymills | 0:e979170e02e7 | 146 | "movq %%r11,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 147 | "movq 0x30(%5),%%r11 \n\t" \ |
ashleymills | 0:e979170e02e7 | 148 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 149 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 150 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 151 | "movq 0x30(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 152 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 153 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 154 | "movq %%rax,0x28(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 155 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 156 | \ |
ashleymills | 0:e979170e02e7 | 157 | "movq %%r11,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 158 | "movq 0x38(%5),%%r11 \n\t" \ |
ashleymills | 0:e979170e02e7 | 159 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 160 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 161 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 162 | "movq 0x38(%2),%%r10 \n\t" \ |
ashleymills | 0:e979170e02e7 | 163 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 164 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 165 | "movq %%rax,0x30(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 166 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 167 | \ |
ashleymills | 0:e979170e02e7 | 168 | "movq %%r11,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 169 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 170 | "addq %%r10,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 171 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 172 | "addq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 173 | "adcq $0,%%rdx \n\t" \ |
ashleymills | 0:e979170e02e7 | 174 | "movq %%rax,0x38(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 175 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 176 | \ |
ashleymills | 0:e979170e02e7 | 177 | :"=r"(_c), "=r"(cy) \ |
ashleymills | 0:e979170e02e7 | 178 | : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ |
ashleymills | 0:e979170e02e7 | 179 | : "%rax", "%rdx", "%r10", "%r11", "%cc") |
ashleymills | 0:e979170e02e7 | 180 | |
ashleymills | 0:e979170e02e7 | 181 | |
ashleymills | 0:e979170e02e7 | 182 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 183 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 184 | "addq %1,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 185 | "setb %%al \n\t" \ |
ashleymills | 0:e979170e02e7 | 186 | "movzbq %%al,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 187 | :"=g"(_c[LO]), "=r"(cy) \ |
ashleymills | 0:e979170e02e7 | 188 | :"0"(_c[LO]), "1"(cy) \ |
ashleymills | 0:e979170e02e7 | 189 | : "%rax", "%cc") |
ashleymills | 0:e979170e02e7 | 190 | |
ashleymills | 0:e979170e02e7 | 191 | /******************************************************************/ |
ashleymills | 0:e979170e02e7 | 192 | #elif defined(TFM_SSE2) |
ashleymills | 0:e979170e02e7 | 193 | /* SSE2 code (assumes 32-bit fp_digits) */ |
ashleymills | 0:e979170e02e7 | 194 | /* XMM register assignments: |
ashleymills | 0:e979170e02e7 | 195 | * xmm0 *tmpm++, then Mu * (*tmpm++) |
ashleymills | 0:e979170e02e7 | 196 | * xmm1 c[x], then Mu |
ashleymills | 0:e979170e02e7 | 197 | * xmm2 mp |
ashleymills | 0:e979170e02e7 | 198 | * xmm3 cy |
ashleymills | 0:e979170e02e7 | 199 | * xmm4 _c[LO] |
ashleymills | 0:e979170e02e7 | 200 | */ |
ashleymills | 0:e979170e02e7 | 201 | |
ashleymills | 0:e979170e02e7 | 202 | #define MONT_START \ |
ashleymills | 0:e979170e02e7 | 203 | __asm__("movd %0,%%mm2"::"g"(mp)) |
ashleymills | 0:e979170e02e7 | 204 | |
ashleymills | 0:e979170e02e7 | 205 | #define MONT_FINI \ |
ashleymills | 0:e979170e02e7 | 206 | __asm__("emms") |
ashleymills | 0:e979170e02e7 | 207 | |
ashleymills | 0:e979170e02e7 | 208 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 209 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 210 | "movd %0,%%mm1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 211 | "pxor %%mm3,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 212 | "pmuludq %%mm2,%%mm1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 213 | :: "g"(c[x])) |
ashleymills | 0:e979170e02e7 | 214 | |
ashleymills | 0:e979170e02e7 | 215 | /* pmuludq on mmx registers does a 32x32->64 multiply. */ |
ashleymills | 0:e979170e02e7 | 216 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 217 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 218 | "movd %1,%%mm4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 219 | "movd %2,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 220 | "paddq %%mm4,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 221 | "pmuludq %%mm1,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 222 | "paddq %%mm0,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 223 | "movd %%mm3,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 224 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 225 | :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) ); |
ashleymills | 0:e979170e02e7 | 226 | |
ashleymills | 0:e979170e02e7 | 227 | #define INNERMUL8 \ |
ashleymills | 0:e979170e02e7 | 228 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 229 | "movd 0(%1),%%mm4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 230 | "movd 0(%2),%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 231 | "paddq %%mm4,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 232 | "pmuludq %%mm1,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 233 | "movd 4(%2),%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 234 | "paddq %%mm0,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 235 | "movd 4(%1),%%mm6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 236 | "movd %%mm3,0(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 237 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 238 | \ |
ashleymills | 0:e979170e02e7 | 239 | "paddq %%mm6,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 240 | "pmuludq %%mm1,%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 241 | "movd 8(%2),%%mm6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 242 | "paddq %%mm5,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 243 | "movd 8(%1),%%mm7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 244 | "movd %%mm3,4(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 245 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 246 | \ |
ashleymills | 0:e979170e02e7 | 247 | "paddq %%mm7,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 248 | "pmuludq %%mm1,%%mm6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 249 | "movd 12(%2),%%mm7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 250 | "paddq %%mm6,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 251 | "movd 12(%1),%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 252 | "movd %%mm3,8(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 253 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 254 | \ |
ashleymills | 0:e979170e02e7 | 255 | "paddq %%mm5,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 256 | "pmuludq %%mm1,%%mm7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 257 | "movd 16(%2),%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 258 | "paddq %%mm7,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 259 | "movd 16(%1),%%mm6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 260 | "movd %%mm3,12(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 261 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 262 | \ |
ashleymills | 0:e979170e02e7 | 263 | "paddq %%mm6,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 264 | "pmuludq %%mm1,%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 265 | "movd 20(%2),%%mm6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 266 | "paddq %%mm5,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 267 | "movd 20(%1),%%mm7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 268 | "movd %%mm3,16(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 269 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 270 | \ |
ashleymills | 0:e979170e02e7 | 271 | "paddq %%mm7,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 272 | "pmuludq %%mm1,%%mm6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 273 | "movd 24(%2),%%mm7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 274 | "paddq %%mm6,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 275 | "movd 24(%1),%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 276 | "movd %%mm3,20(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 277 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 278 | \ |
ashleymills | 0:e979170e02e7 | 279 | "paddq %%mm5,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 280 | "pmuludq %%mm1,%%mm7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 281 | "movd 28(%2),%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 282 | "paddq %%mm7,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 283 | "movd 28(%1),%%mm6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 284 | "movd %%mm3,24(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 285 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 286 | \ |
ashleymills | 0:e979170e02e7 | 287 | "paddq %%mm6,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 288 | "pmuludq %%mm1,%%mm5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 289 | "paddq %%mm5,%%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 290 | "movd %%mm3,28(%0) \n\t" \ |
ashleymills | 0:e979170e02e7 | 291 | "psrlq $32, %%mm3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 292 | :"=r"(_c) : "0"(_c), "r"(tmpm) ); |
ashleymills | 0:e979170e02e7 | 293 | |
ashleymills | 0:e979170e02e7 | 294 | /* TAO switched tmpm from "g" to "r" after gcc tried to index the indexed stack |
ashleymills | 0:e979170e02e7 | 295 | pointer */ |
ashleymills | 0:e979170e02e7 | 296 | |
ashleymills | 0:e979170e02e7 | 297 | #define LOOP_END \ |
ashleymills | 0:e979170e02e7 | 298 | __asm__( "movd %%mm3,%0 \n" :"=r"(cy)) |
ashleymills | 0:e979170e02e7 | 299 | |
ashleymills | 0:e979170e02e7 | 300 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 301 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 302 | "addl %1,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 303 | "setb %%al \n\t" \ |
ashleymills | 0:e979170e02e7 | 304 | "movzbl %%al,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 305 | :"=g"(_c[LO]), "=r"(cy) \ |
ashleymills | 0:e979170e02e7 | 306 | :"0"(_c[LO]), "1"(cy) \ |
ashleymills | 0:e979170e02e7 | 307 | : "%eax", "%cc") |
ashleymills | 0:e979170e02e7 | 308 | |
ashleymills | 0:e979170e02e7 | 309 | /******************************************************************/ |
ashleymills | 0:e979170e02e7 | 310 | #elif defined(TFM_ARM) |
ashleymills | 0:e979170e02e7 | 311 | /* ARMv4 code */ |
ashleymills | 0:e979170e02e7 | 312 | |
ashleymills | 0:e979170e02e7 | 313 | #define MONT_START |
ashleymills | 0:e979170e02e7 | 314 | #define MONT_FINI |
ashleymills | 0:e979170e02e7 | 315 | #define LOOP_END |
ashleymills | 0:e979170e02e7 | 316 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 317 | mu = c[x] * mp |
ashleymills | 0:e979170e02e7 | 318 | |
ashleymills | 0:e979170e02e7 | 319 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 320 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 321 | " LDR r0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 322 | " ADDS r0,r0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 323 | " MOVCS %0,#1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 324 | " MOVCC %0,#0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 325 | " UMLAL r0,%0,%3,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 326 | " STR r0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 327 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); |
ashleymills | 0:e979170e02e7 | 328 | |
ashleymills | 0:e979170e02e7 | 329 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 330 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 331 | " LDR r0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 332 | " ADDS r0,r0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 333 | " STR r0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 334 | " MOVCS %0,#1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 335 | " MOVCC %0,#0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 336 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); |
ashleymills | 0:e979170e02e7 | 337 | |
ashleymills | 0:e979170e02e7 | 338 | #elif defined(TFM_PPC32) |
ashleymills | 0:e979170e02e7 | 339 | |
ashleymills | 0:e979170e02e7 | 340 | /* PPC32 */ |
ashleymills | 0:e979170e02e7 | 341 | #define MONT_START |
ashleymills | 0:e979170e02e7 | 342 | #define MONT_FINI |
ashleymills | 0:e979170e02e7 | 343 | #define LOOP_END |
ashleymills | 0:e979170e02e7 | 344 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 345 | mu = c[x] * mp |
ashleymills | 0:e979170e02e7 | 346 | |
ashleymills | 0:e979170e02e7 | 347 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 348 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 349 | " mullw 16,%3,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 350 | " mulhwu 17,%3,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 351 | " addc 16,16,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 352 | " addze 17,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 353 | " lwz 18,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 354 | " addc 16,16,18 \n\t" \ |
ashleymills | 0:e979170e02e7 | 355 | " addze %0,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 356 | " stw 16,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 357 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm; |
ashleymills | 0:e979170e02e7 | 358 | |
ashleymills | 0:e979170e02e7 | 359 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 360 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 361 | " lwz 16,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 362 | " addc 16,16,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 363 | " stw 16,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 364 | " xor %0,%0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 365 | " addze %0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 366 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc"); |
ashleymills | 0:e979170e02e7 | 367 | |
ashleymills | 0:e979170e02e7 | 368 | #elif defined(TFM_PPC64) |
ashleymills | 0:e979170e02e7 | 369 | |
ashleymills | 0:e979170e02e7 | 370 | /* PPC64 */ |
ashleymills | 0:e979170e02e7 | 371 | #define MONT_START |
ashleymills | 0:e979170e02e7 | 372 | #define MONT_FINI |
ashleymills | 0:e979170e02e7 | 373 | #define LOOP_END |
ashleymills | 0:e979170e02e7 | 374 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 375 | mu = c[x] * mp |
ashleymills | 0:e979170e02e7 | 376 | |
ashleymills | 0:e979170e02e7 | 377 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 378 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 379 | " mulld 16,%3,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 380 | " mulhdu 17,%3,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 381 | " addc 16,16,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 382 | " addze 17,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 383 | " ldx 18,0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 384 | " addc 16,16,18 \n\t" \ |
ashleymills | 0:e979170e02e7 | 385 | " addze %0,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 386 | " sdx 16,0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 387 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm; |
ashleymills | 0:e979170e02e7 | 388 | |
ashleymills | 0:e979170e02e7 | 389 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 390 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 391 | " ldx 16,0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 392 | " addc 16,16,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 393 | " sdx 16,0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 394 | " xor %0,%0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 395 | " addze %0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 396 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc"); |
ashleymills | 0:e979170e02e7 | 397 | |
ashleymills | 0:e979170e02e7 | 398 | /******************************************************************/ |
ashleymills | 0:e979170e02e7 | 399 | |
ashleymills | 0:e979170e02e7 | 400 | #elif defined(TFM_AVR32) |
ashleymills | 0:e979170e02e7 | 401 | |
ashleymills | 0:e979170e02e7 | 402 | /* AVR32 */ |
ashleymills | 0:e979170e02e7 | 403 | #define MONT_START |
ashleymills | 0:e979170e02e7 | 404 | #define MONT_FINI |
ashleymills | 0:e979170e02e7 | 405 | #define LOOP_END |
ashleymills | 0:e979170e02e7 | 406 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 407 | mu = c[x] * mp |
ashleymills | 0:e979170e02e7 | 408 | |
ashleymills | 0:e979170e02e7 | 409 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 410 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 411 | " ld.w r2,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 412 | " add r2,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 413 | " eor r3,r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 414 | " acr r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 415 | " macu.d r2,%3,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 416 | " st.w %1,r2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 417 | " mov %0,r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 418 | :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3"); |
ashleymills | 0:e979170e02e7 | 419 | |
ashleymills | 0:e979170e02e7 | 420 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 421 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 422 | " ld.w r2,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 423 | " add r2,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 424 | " st.w %1,r2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 425 | " eor %0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 426 | " acr %0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 427 | :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc"); |
ashleymills | 0:e979170e02e7 | 428 | |
ashleymills | 0:e979170e02e7 | 429 | #else |
ashleymills | 0:e979170e02e7 | 430 | |
ashleymills | 0:e979170e02e7 | 431 | /* ISO C code */ |
ashleymills | 0:e979170e02e7 | 432 | #define MONT_START |
ashleymills | 0:e979170e02e7 | 433 | #define MONT_FINI |
ashleymills | 0:e979170e02e7 | 434 | #define LOOP_END |
ashleymills | 0:e979170e02e7 | 435 | #define LOOP_START \ |
ashleymills | 0:e979170e02e7 | 436 | mu = c[x] * mp |
ashleymills | 0:e979170e02e7 | 437 | |
ashleymills | 0:e979170e02e7 | 438 | #define INNERMUL \ |
ashleymills | 0:e979170e02e7 | 439 | do { fp_word t; \ |
ashleymills | 0:e979170e02e7 | 440 | _c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \ |
ashleymills | 0:e979170e02e7 | 441 | (((fp_word)mu) * ((fp_word)*tmpm++)); \ |
ashleymills | 0:e979170e02e7 | 442 | cy = (t >> DIGIT_BIT); \ |
ashleymills | 0:e979170e02e7 | 443 | } while (0) |
ashleymills | 0:e979170e02e7 | 444 | |
ashleymills | 0:e979170e02e7 | 445 | #define PROPCARRY \ |
ashleymills | 0:e979170e02e7 | 446 | do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0) |
ashleymills | 0:e979170e02e7 | 447 | |
ashleymills | 0:e979170e02e7 | 448 | #endif |
ashleymills | 0:e979170e02e7 | 449 | /******************************************************************/ |
ashleymills | 0:e979170e02e7 | 450 | |
ashleymills | 0:e979170e02e7 | 451 | |
ashleymills | 0:e979170e02e7 | 452 | #define LO 0 |
ashleymills | 0:e979170e02e7 | 453 | /* end fp_montogomery_reduce.c asm */ |
ashleymills | 0:e979170e02e7 | 454 | |
ashleymills | 0:e979170e02e7 | 455 | |
ashleymills | 0:e979170e02e7 | 456 | /* start fp_sqr_comba.c asm */ |
ashleymills | 0:e979170e02e7 | 457 | #if defined(TFM_X86) |
ashleymills | 0:e979170e02e7 | 458 | |
ashleymills | 0:e979170e02e7 | 459 | /* x86-32 optimized */ |
ashleymills | 0:e979170e02e7 | 460 | |
ashleymills | 0:e979170e02e7 | 461 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 462 | |
ashleymills | 0:e979170e02e7 | 463 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 464 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 465 | |
ashleymills | 0:e979170e02e7 | 466 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 467 | x = c0; |
ashleymills | 0:e979170e02e7 | 468 | |
ashleymills | 0:e979170e02e7 | 469 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 470 | x = c1; |
ashleymills | 0:e979170e02e7 | 471 | |
ashleymills | 0:e979170e02e7 | 472 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 473 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 474 | |
ashleymills | 0:e979170e02e7 | 475 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 476 | |
ashleymills | 0:e979170e02e7 | 477 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 478 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 479 | "movl %6,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 480 | "mull %%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 481 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 482 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 483 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 484 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); |
ashleymills | 0:e979170e02e7 | 485 | |
ashleymills | 0:e979170e02e7 | 486 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 487 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 488 | "movl %6,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 489 | "mull %7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 490 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 491 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 492 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 493 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 494 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 495 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 496 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx", "%cc"); |
ashleymills | 0:e979170e02e7 | 497 | |
ashleymills | 0:e979170e02e7 | 498 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 499 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 500 | "movl %3,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 501 | "mull %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 502 | "movl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 503 | "movl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 504 | "xorl %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 505 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%eax","%edx","%cc"); |
ashleymills | 0:e979170e02e7 | 506 | |
ashleymills | 0:e979170e02e7 | 507 | /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ |
ashleymills | 0:e979170e02e7 | 508 | |
ashleymills | 0:e979170e02e7 | 509 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 510 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 511 | "movl %6,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 512 | "mull %7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 513 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 514 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 515 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 516 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); |
ashleymills | 0:e979170e02e7 | 517 | |
ashleymills | 0:e979170e02e7 | 518 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 519 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 520 | "addl %6,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 521 | "adcl %7,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 522 | "adcl %8,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 523 | "addl %6,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 524 | "adcl %7,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 525 | "adcl %8,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 526 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); |
ashleymills | 0:e979170e02e7 | 527 | |
ashleymills | 0:e979170e02e7 | 528 | #elif defined(TFM_X86_64) |
ashleymills | 0:e979170e02e7 | 529 | /* x86-64 optimized */ |
ashleymills | 0:e979170e02e7 | 530 | |
ashleymills | 0:e979170e02e7 | 531 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 532 | |
ashleymills | 0:e979170e02e7 | 533 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 534 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 535 | |
ashleymills | 0:e979170e02e7 | 536 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 537 | x = c0; |
ashleymills | 0:e979170e02e7 | 538 | |
ashleymills | 0:e979170e02e7 | 539 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 540 | x = c1; |
ashleymills | 0:e979170e02e7 | 541 | |
ashleymills | 0:e979170e02e7 | 542 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 543 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 544 | |
ashleymills | 0:e979170e02e7 | 545 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 546 | |
ashleymills | 0:e979170e02e7 | 547 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 548 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 549 | "movq %6,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 550 | "mulq %%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 551 | "addq %%rax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 552 | "adcq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 553 | "adcq $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 554 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","%cc"); |
ashleymills | 0:e979170e02e7 | 555 | |
ashleymills | 0:e979170e02e7 | 556 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 557 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 558 | "movq %6,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 559 | "mulq %7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 560 | "addq %%rax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 561 | "adcq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 562 | "adcq $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 563 | "addq %%rax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 564 | "adcq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 565 | "adcq $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 566 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); |
ashleymills | 0:e979170e02e7 | 567 | |
ashleymills | 0:e979170e02e7 | 568 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 569 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 570 | "movq %3,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 571 | "mulq %4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 572 | "movq %%rax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 573 | "movq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 574 | "xorq %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 575 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "g"(i), "g"(j) :"%rax","%rdx","%cc"); |
ashleymills | 0:e979170e02e7 | 576 | |
ashleymills | 0:e979170e02e7 | 577 | /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ |
ashleymills | 0:e979170e02e7 | 578 | |
ashleymills | 0:e979170e02e7 | 579 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 580 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 581 | "movq %6,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 582 | "mulq %7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 583 | "addq %%rax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 584 | "adcq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 585 | "adcq $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 586 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); |
ashleymills | 0:e979170e02e7 | 587 | |
ashleymills | 0:e979170e02e7 | 588 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 589 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 590 | "addq %6,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 591 | "adcq %7,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 592 | "adcq %8,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 593 | "addq %6,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 594 | "adcq %7,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 595 | "adcq %8,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 596 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); |
ashleymills | 0:e979170e02e7 | 597 | |
ashleymills | 0:e979170e02e7 | 598 | #elif defined(TFM_SSE2) |
ashleymills | 0:e979170e02e7 | 599 | |
ashleymills | 0:e979170e02e7 | 600 | /* SSE2 Optimized */ |
ashleymills | 0:e979170e02e7 | 601 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 602 | |
ashleymills | 0:e979170e02e7 | 603 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 604 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 605 | |
ashleymills | 0:e979170e02e7 | 606 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 607 | x = c0; |
ashleymills | 0:e979170e02e7 | 608 | |
ashleymills | 0:e979170e02e7 | 609 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 610 | x = c1; |
ashleymills | 0:e979170e02e7 | 611 | |
ashleymills | 0:e979170e02e7 | 612 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 613 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 614 | |
ashleymills | 0:e979170e02e7 | 615 | #define COMBA_FINI \ |
ashleymills | 0:e979170e02e7 | 616 | __asm__("emms"); |
ashleymills | 0:e979170e02e7 | 617 | |
ashleymills | 0:e979170e02e7 | 618 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 619 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 620 | "movd %6,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 621 | "pmuludq %%mm0,%%mm0\n\t" \ |
ashleymills | 0:e979170e02e7 | 622 | "movd %%mm0,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 623 | "psrlq $32,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 624 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 625 | "movd %%mm0,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 626 | "adcl %%eax,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 627 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 628 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%cc"); |
ashleymills | 0:e979170e02e7 | 629 | |
ashleymills | 0:e979170e02e7 | 630 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 631 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 632 | "movd %6,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 633 | "movd %7,%%mm1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 634 | "pmuludq %%mm1,%%mm0\n\t" \ |
ashleymills | 0:e979170e02e7 | 635 | "movd %%mm0,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 636 | "psrlq $32,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 637 | "movd %%mm0,%%edx \n\t" \ |
ashleymills | 0:e979170e02e7 | 638 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 639 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 640 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 641 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 642 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 643 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 644 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); |
ashleymills | 0:e979170e02e7 | 645 | |
ashleymills | 0:e979170e02e7 | 646 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 647 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 648 | "movd %3,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 649 | "movd %4,%%mm1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 650 | "pmuludq %%mm1,%%mm0\n\t" \ |
ashleymills | 0:e979170e02e7 | 651 | "movd %%mm0,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 652 | "psrlq $32,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 653 | "movd %%mm0,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 654 | "xorl %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 655 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "m"(i), "m"(j)); |
ashleymills | 0:e979170e02e7 | 656 | |
ashleymills | 0:e979170e02e7 | 657 | /* TAO removed sc0,1,2 as input to remove warning so %6,%7 become %3,%4 */ |
ashleymills | 0:e979170e02e7 | 658 | |
ashleymills | 0:e979170e02e7 | 659 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 660 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 661 | "movd %6,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 662 | "movd %7,%%mm1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 663 | "pmuludq %%mm1,%%mm0\n\t" \ |
ashleymills | 0:e979170e02e7 | 664 | "movd %%mm0,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 665 | "psrlq $32,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 666 | "movd %%mm0,%%edx \n\t" \ |
ashleymills | 0:e979170e02e7 | 667 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 668 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 669 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 670 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","%cc"); |
ashleymills | 0:e979170e02e7 | 671 | |
ashleymills | 0:e979170e02e7 | 672 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 673 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 674 | "addl %6,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 675 | "adcl %7,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 676 | "adcl %8,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 677 | "addl %6,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 678 | "adcl %7,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 679 | "adcl %8,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 680 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); |
ashleymills | 0:e979170e02e7 | 681 | |
ashleymills | 0:e979170e02e7 | 682 | #elif defined(TFM_ARM) |
ashleymills | 0:e979170e02e7 | 683 | |
ashleymills | 0:e979170e02e7 | 684 | /* ARM code */ |
ashleymills | 0:e979170e02e7 | 685 | |
ashleymills | 0:e979170e02e7 | 686 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 687 | |
ashleymills | 0:e979170e02e7 | 688 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 689 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 690 | |
ashleymills | 0:e979170e02e7 | 691 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 692 | x = c0; |
ashleymills | 0:e979170e02e7 | 693 | |
ashleymills | 0:e979170e02e7 | 694 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 695 | x = c1; |
ashleymills | 0:e979170e02e7 | 696 | |
ashleymills | 0:e979170e02e7 | 697 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 698 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 699 | |
ashleymills | 0:e979170e02e7 | 700 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 701 | |
ashleymills | 0:e979170e02e7 | 702 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
ashleymills | 0:e979170e02e7 | 703 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 704 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 705 | " UMULL r0,r1,%6,%6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 706 | " ADDS %0,%0,r0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 707 | " ADCS %1,%1,r1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 708 | " ADC %2,%2,#0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 709 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); |
ashleymills | 0:e979170e02e7 | 710 | |
ashleymills | 0:e979170e02e7 | 711 | /* for squaring some of the terms are doubled... */ |
ashleymills | 0:e979170e02e7 | 712 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 713 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 714 | " UMULL r0,r1,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 715 | " ADDS %0,%0,r0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 716 | " ADCS %1,%1,r1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 717 | " ADC %2,%2,#0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 718 | " ADDS %0,%0,r0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 719 | " ADCS %1,%1,r1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 720 | " ADC %2,%2,#0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 721 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); |
ashleymills | 0:e979170e02e7 | 722 | |
ashleymills | 0:e979170e02e7 | 723 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 724 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 725 | " UMULL %0,%1,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 726 | " SUB %2,%2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 727 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc"); |
ashleymills | 0:e979170e02e7 | 728 | |
ashleymills | 0:e979170e02e7 | 729 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 730 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 731 | " UMULL r0,r1,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 732 | " ADDS %0,%0,r0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 733 | " ADCS %1,%1,r1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 734 | " ADC %2,%2,#0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 735 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc"); |
ashleymills | 0:e979170e02e7 | 736 | |
ashleymills | 0:e979170e02e7 | 737 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 738 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 739 | " ADDS %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 740 | " ADCS %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 741 | " ADC %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 742 | " ADDS %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 743 | " ADCS %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 744 | " ADC %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 745 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
ashleymills | 0:e979170e02e7 | 746 | |
ashleymills | 0:e979170e02e7 | 747 | #elif defined(TFM_PPC32) |
ashleymills | 0:e979170e02e7 | 748 | |
ashleymills | 0:e979170e02e7 | 749 | /* PPC32 */ |
ashleymills | 0:e979170e02e7 | 750 | |
ashleymills | 0:e979170e02e7 | 751 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 752 | |
ashleymills | 0:e979170e02e7 | 753 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 754 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 755 | |
ashleymills | 0:e979170e02e7 | 756 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 757 | x = c0; |
ashleymills | 0:e979170e02e7 | 758 | |
ashleymills | 0:e979170e02e7 | 759 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 760 | x = c1; |
ashleymills | 0:e979170e02e7 | 761 | |
ashleymills | 0:e979170e02e7 | 762 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 763 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 764 | |
ashleymills | 0:e979170e02e7 | 765 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 766 | |
ashleymills | 0:e979170e02e7 | 767 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
ashleymills | 0:e979170e02e7 | 768 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 769 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 770 | " mullw 16,%6,%6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 771 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 772 | " mulhwu 16,%6,%6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 773 | " adde %1,%1,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 774 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 775 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc"); |
ashleymills | 0:e979170e02e7 | 776 | |
ashleymills | 0:e979170e02e7 | 777 | /* for squaring some of the terms are doubled... */ |
ashleymills | 0:e979170e02e7 | 778 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 779 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 780 | " mullw 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 781 | " mulhwu 17,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 782 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 783 | " adde %1,%1,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 784 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 785 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 786 | " adde %1,%1,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 787 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 788 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc"); |
ashleymills | 0:e979170e02e7 | 789 | |
ashleymills | 0:e979170e02e7 | 790 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 791 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 792 | " mullw %0,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 793 | " mulhwu %1,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 794 | " xor %2,%2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 795 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); |
ashleymills | 0:e979170e02e7 | 796 | |
ashleymills | 0:e979170e02e7 | 797 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 798 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 799 | " mullw 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 800 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 801 | " mulhwu 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 802 | " adde %1,%1,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 803 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 804 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc"); |
ashleymills | 0:e979170e02e7 | 805 | |
ashleymills | 0:e979170e02e7 | 806 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 807 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 808 | " addc %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 809 | " adde %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 810 | " adde %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 811 | " addc %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 812 | " adde %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 813 | " adde %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 814 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
ashleymills | 0:e979170e02e7 | 815 | |
ashleymills | 0:e979170e02e7 | 816 | #elif defined(TFM_PPC64) |
ashleymills | 0:e979170e02e7 | 817 | /* PPC64 */ |
ashleymills | 0:e979170e02e7 | 818 | |
ashleymills | 0:e979170e02e7 | 819 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 820 | |
ashleymills | 0:e979170e02e7 | 821 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 822 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 823 | |
ashleymills | 0:e979170e02e7 | 824 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 825 | x = c0; |
ashleymills | 0:e979170e02e7 | 826 | |
ashleymills | 0:e979170e02e7 | 827 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 828 | x = c1; |
ashleymills | 0:e979170e02e7 | 829 | |
ashleymills | 0:e979170e02e7 | 830 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 831 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 832 | |
ashleymills | 0:e979170e02e7 | 833 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 834 | |
ashleymills | 0:e979170e02e7 | 835 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
ashleymills | 0:e979170e02e7 | 836 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 837 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 838 | " mulld 16,%6,%6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 839 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 840 | " mulhdu 16,%6,%6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 841 | " adde %1,%1,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 842 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 843 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","%cc"); |
ashleymills | 0:e979170e02e7 | 844 | |
ashleymills | 0:e979170e02e7 | 845 | /* for squaring some of the terms are doubled... */ |
ashleymills | 0:e979170e02e7 | 846 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 847 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 848 | " mulld 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 849 | " mulhdu 17,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 850 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 851 | " adde %1,%1,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 852 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 853 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 854 | " adde %1,%1,17 \n\t" \ |
ashleymills | 0:e979170e02e7 | 855 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 856 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","%cc"); |
ashleymills | 0:e979170e02e7 | 857 | |
ashleymills | 0:e979170e02e7 | 858 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 859 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 860 | " mulld %0,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 861 | " mulhdu %1,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 862 | " xor %2,%2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 863 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); |
ashleymills | 0:e979170e02e7 | 864 | |
ashleymills | 0:e979170e02e7 | 865 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 866 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 867 | " mulld 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 868 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 869 | " mulhdu 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 870 | " adde %1,%1,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 871 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 872 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "%cc"); |
ashleymills | 0:e979170e02e7 | 873 | |
ashleymills | 0:e979170e02e7 | 874 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 875 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 876 | " addc %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 877 | " adde %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 878 | " adde %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 879 | " addc %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 880 | " adde %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 881 | " adde %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 882 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
ashleymills | 0:e979170e02e7 | 883 | |
ashleymills | 0:e979170e02e7 | 884 | |
ashleymills | 0:e979170e02e7 | 885 | #elif defined(TFM_AVR32) |
ashleymills | 0:e979170e02e7 | 886 | |
ashleymills | 0:e979170e02e7 | 887 | /* AVR32 */ |
ashleymills | 0:e979170e02e7 | 888 | |
ashleymills | 0:e979170e02e7 | 889 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 890 | |
ashleymills | 0:e979170e02e7 | 891 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 892 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 893 | |
ashleymills | 0:e979170e02e7 | 894 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 895 | x = c0; |
ashleymills | 0:e979170e02e7 | 896 | |
ashleymills | 0:e979170e02e7 | 897 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 898 | x = c1; |
ashleymills | 0:e979170e02e7 | 899 | |
ashleymills | 0:e979170e02e7 | 900 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 901 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 902 | |
ashleymills | 0:e979170e02e7 | 903 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 904 | |
ashleymills | 0:e979170e02e7 | 905 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
ashleymills | 0:e979170e02e7 | 906 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 907 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 908 | " mulu.d r2,%6,%6 \n\t" \ |
ashleymills | 0:e979170e02e7 | 909 | " add %0,%0,r2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 910 | " adc %1,%1,r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 911 | " acr %2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 912 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); |
ashleymills | 0:e979170e02e7 | 913 | |
ashleymills | 0:e979170e02e7 | 914 | /* for squaring some of the terms are doubled... */ |
ashleymills | 0:e979170e02e7 | 915 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 916 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 917 | " mulu.d r2,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 918 | " add %0,%0,r2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 919 | " adc %1,%1,r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 920 | " acr %2, \n\t" \ |
ashleymills | 0:e979170e02e7 | 921 | " add %0,%0,r2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 922 | " adc %1,%1,r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 923 | " acr %2, \n\t" \ |
ashleymills | 0:e979170e02e7 | 924 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); |
ashleymills | 0:e979170e02e7 | 925 | |
ashleymills | 0:e979170e02e7 | 926 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 927 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 928 | " mulu.d r2,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 929 | " mov %0,r2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 930 | " mov %1,r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 931 | " eor %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 932 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); |
ashleymills | 0:e979170e02e7 | 933 | |
ashleymills | 0:e979170e02e7 | 934 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 935 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 936 | " mulu.d r2,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 937 | " add %0,%0,r2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 938 | " adc %1,%1,r3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 939 | " acr %2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 940 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); |
ashleymills | 0:e979170e02e7 | 941 | |
ashleymills | 0:e979170e02e7 | 942 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 943 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 944 | " add %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 945 | " adc %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 946 | " adc %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 947 | " add %0,%0,%3 \n\t" \ |
ashleymills | 0:e979170e02e7 | 948 | " adc %1,%1,%4 \n\t" \ |
ashleymills | 0:e979170e02e7 | 949 | " adc %2,%2,%5 \n\t" \ |
ashleymills | 0:e979170e02e7 | 950 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); |
ashleymills | 0:e979170e02e7 | 951 | |
ashleymills | 0:e979170e02e7 | 952 | |
ashleymills | 0:e979170e02e7 | 953 | #else |
ashleymills | 0:e979170e02e7 | 954 | |
ashleymills | 0:e979170e02e7 | 955 | #define TFM_ISO |
ashleymills | 0:e979170e02e7 | 956 | |
ashleymills | 0:e979170e02e7 | 957 | /* ISO C portable code */ |
ashleymills | 0:e979170e02e7 | 958 | |
ashleymills | 0:e979170e02e7 | 959 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 960 | |
ashleymills | 0:e979170e02e7 | 961 | #define CLEAR_CARRY \ |
ashleymills | 0:e979170e02e7 | 962 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 963 | |
ashleymills | 0:e979170e02e7 | 964 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 965 | x = c0; |
ashleymills | 0:e979170e02e7 | 966 | |
ashleymills | 0:e979170e02e7 | 967 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 968 | x = c1; |
ashleymills | 0:e979170e02e7 | 969 | |
ashleymills | 0:e979170e02e7 | 970 | #define CARRY_FORWARD \ |
ashleymills | 0:e979170e02e7 | 971 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 972 | |
ashleymills | 0:e979170e02e7 | 973 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 974 | |
ashleymills | 0:e979170e02e7 | 975 | /* multiplies point i and j, updates carry "c1" and digit c2 */ |
ashleymills | 0:e979170e02e7 | 976 | #define SQRADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 977 | do { fp_word t; \ |
ashleymills | 0:e979170e02e7 | 978 | t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ |
ashleymills | 0:e979170e02e7 | 979 | t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ |
ashleymills | 0:e979170e02e7 | 980 | } while (0); |
ashleymills | 0:e979170e02e7 | 981 | |
ashleymills | 0:e979170e02e7 | 982 | |
ashleymills | 0:e979170e02e7 | 983 | /* for squaring some of the terms are doubled... */ |
ashleymills | 0:e979170e02e7 | 984 | #define SQRADD2(i, j) \ |
ashleymills | 0:e979170e02e7 | 985 | do { fp_word t; \ |
ashleymills | 0:e979170e02e7 | 986 | t = ((fp_word)i) * ((fp_word)j); \ |
ashleymills | 0:e979170e02e7 | 987 | tt = (fp_word)c0 + t; c0 = tt; \ |
ashleymills | 0:e979170e02e7 | 988 | tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ |
ashleymills | 0:e979170e02e7 | 989 | tt = (fp_word)c0 + t; c0 = tt; \ |
ashleymills | 0:e979170e02e7 | 990 | tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ |
ashleymills | 0:e979170e02e7 | 991 | } while (0); |
ashleymills | 0:e979170e02e7 | 992 | |
ashleymills | 0:e979170e02e7 | 993 | #define SQRADDSC(i, j) \ |
ashleymills | 0:e979170e02e7 | 994 | do { fp_word t; \ |
ashleymills | 0:e979170e02e7 | 995 | t = ((fp_word)i) * ((fp_word)j); \ |
ashleymills | 0:e979170e02e7 | 996 | sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ |
ashleymills | 0:e979170e02e7 | 997 | } while (0); |
ashleymills | 0:e979170e02e7 | 998 | |
ashleymills | 0:e979170e02e7 | 999 | #define SQRADDAC(i, j) \ |
ashleymills | 0:e979170e02e7 | 1000 | do { fp_word t; \ |
ashleymills | 0:e979170e02e7 | 1001 | t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \ |
ashleymills | 0:e979170e02e7 | 1002 | t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \ |
ashleymills | 0:e979170e02e7 | 1003 | } while (0); |
ashleymills | 0:e979170e02e7 | 1004 | |
ashleymills | 0:e979170e02e7 | 1005 | #define SQRADDDB \ |
ashleymills | 0:e979170e02e7 | 1006 | do { fp_word t; \ |
ashleymills | 0:e979170e02e7 | 1007 | t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \ |
ashleymills | 0:e979170e02e7 | 1008 | t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \ |
ashleymills | 0:e979170e02e7 | 1009 | c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \ |
ashleymills | 0:e979170e02e7 | 1010 | } while (0); |
ashleymills | 0:e979170e02e7 | 1011 | |
ashleymills | 0:e979170e02e7 | 1012 | #endif |
ashleymills | 0:e979170e02e7 | 1013 | |
ashleymills | 0:e979170e02e7 | 1014 | #ifdef TFM_SMALL_SET |
ashleymills | 0:e979170e02e7 | 1015 | #include "fp_sqr_comba_small_set.i" |
ashleymills | 0:e979170e02e7 | 1016 | #include "fp_sqr_comba_3.i" |
ashleymills | 0:e979170e02e7 | 1017 | #include "fp_sqr_comba_4.i" |
ashleymills | 0:e979170e02e7 | 1018 | #include "fp_sqr_comba_6.i" |
ashleymills | 0:e979170e02e7 | 1019 | #include "fp_sqr_comba_7.i" |
ashleymills | 0:e979170e02e7 | 1020 | #include "fp_sqr_comba_8.i" |
ashleymills | 0:e979170e02e7 | 1021 | #include "fp_sqr_comba_9.i" |
ashleymills | 0:e979170e02e7 | 1022 | #include "fp_sqr_comba_12.i" |
ashleymills | 0:e979170e02e7 | 1023 | #include "fp_sqr_comba_17.i" |
ashleymills | 0:e979170e02e7 | 1024 | #include "fp_sqr_comba_20.i" |
ashleymills | 0:e979170e02e7 | 1025 | #include "fp_sqr_comba_24.i" |
ashleymills | 0:e979170e02e7 | 1026 | #include "fp_sqr_comba_28.i" |
ashleymills | 0:e979170e02e7 | 1027 | #include "fp_sqr_comba_32.i" |
ashleymills | 0:e979170e02e7 | 1028 | #include "fp_sqr_comba_48.i" |
ashleymills | 0:e979170e02e7 | 1029 | #include "fp_sqr_comba_64.i" |
ashleymills | 0:e979170e02e7 | 1030 | #endif |
ashleymills | 0:e979170e02e7 | 1031 | /* end fp_sqr_comba.c asm */ |
ashleymills | 0:e979170e02e7 | 1032 | |
ashleymills | 0:e979170e02e7 | 1033 | /* start fp_mul_comba.c asm */ |
ashleymills | 0:e979170e02e7 | 1034 | /* these are the combas. Worship them. */ |
ashleymills | 0:e979170e02e7 | 1035 | #if defined(TFM_X86) |
ashleymills | 0:e979170e02e7 | 1036 | /* Generic x86 optimized code */ |
ashleymills | 0:e979170e02e7 | 1037 | |
ashleymills | 0:e979170e02e7 | 1038 | /* anything you need at the start */ |
ashleymills | 0:e979170e02e7 | 1039 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1040 | |
ashleymills | 0:e979170e02e7 | 1041 | /* clear the chaining variables */ |
ashleymills | 0:e979170e02e7 | 1042 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1043 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1044 | |
ashleymills | 0:e979170e02e7 | 1045 | /* forward the carry to the next digit */ |
ashleymills | 0:e979170e02e7 | 1046 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1047 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1048 | |
ashleymills | 0:e979170e02e7 | 1049 | /* store the first sum */ |
ashleymills | 0:e979170e02e7 | 1050 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1051 | x = c0; |
ashleymills | 0:e979170e02e7 | 1052 | |
ashleymills | 0:e979170e02e7 | 1053 | /* store the second sum [carry] */ |
ashleymills | 0:e979170e02e7 | 1054 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1055 | x = c1; |
ashleymills | 0:e979170e02e7 | 1056 | |
ashleymills | 0:e979170e02e7 | 1057 | /* anything you need at the end */ |
ashleymills | 0:e979170e02e7 | 1058 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 1059 | |
ashleymills | 0:e979170e02e7 | 1060 | /* this should multiply i and j */ |
ashleymills | 0:e979170e02e7 | 1061 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1062 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 1063 | "movl %6,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 1064 | "mull %7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1065 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1066 | "adcl %%edx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1067 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1068 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); |
ashleymills | 0:e979170e02e7 | 1069 | |
ashleymills | 0:e979170e02e7 | 1070 | #elif defined(TFM_X86_64) |
ashleymills | 0:e979170e02e7 | 1071 | /* x86-64 optimized */ |
ashleymills | 0:e979170e02e7 | 1072 | |
ashleymills | 0:e979170e02e7 | 1073 | /* anything you need at the start */ |
ashleymills | 0:e979170e02e7 | 1074 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1075 | |
ashleymills | 0:e979170e02e7 | 1076 | /* clear the chaining variables */ |
ashleymills | 0:e979170e02e7 | 1077 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1078 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1079 | |
ashleymills | 0:e979170e02e7 | 1080 | /* forward the carry to the next digit */ |
ashleymills | 0:e979170e02e7 | 1081 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1082 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1083 | |
ashleymills | 0:e979170e02e7 | 1084 | /* store the first sum */ |
ashleymills | 0:e979170e02e7 | 1085 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1086 | x = c0; |
ashleymills | 0:e979170e02e7 | 1087 | |
ashleymills | 0:e979170e02e7 | 1088 | /* store the second sum [carry] */ |
ashleymills | 0:e979170e02e7 | 1089 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1090 | x = c1; |
ashleymills | 0:e979170e02e7 | 1091 | |
ashleymills | 0:e979170e02e7 | 1092 | /* anything you need at the end */ |
ashleymills | 0:e979170e02e7 | 1093 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 1094 | |
ashleymills | 0:e979170e02e7 | 1095 | /* this should multiply i and j */ |
ashleymills | 0:e979170e02e7 | 1096 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1097 | __asm__ ( \ |
ashleymills | 0:e979170e02e7 | 1098 | "movq %6,%%rax \n\t" \ |
ashleymills | 0:e979170e02e7 | 1099 | "mulq %7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1100 | "addq %%rax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1101 | "adcq %%rdx,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1102 | "adcq $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1103 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); |
ashleymills | 0:e979170e02e7 | 1104 | |
ashleymills | 0:e979170e02e7 | 1105 | #elif defined(TFM_SSE2) |
ashleymills | 0:e979170e02e7 | 1106 | /* use SSE2 optimizations */ |
ashleymills | 0:e979170e02e7 | 1107 | |
ashleymills | 0:e979170e02e7 | 1108 | /* anything you need at the start */ |
ashleymills | 0:e979170e02e7 | 1109 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1110 | |
ashleymills | 0:e979170e02e7 | 1111 | /* clear the chaining variables */ |
ashleymills | 0:e979170e02e7 | 1112 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1113 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1114 | |
ashleymills | 0:e979170e02e7 | 1115 | /* forward the carry to the next digit */ |
ashleymills | 0:e979170e02e7 | 1116 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1117 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1118 | |
ashleymills | 0:e979170e02e7 | 1119 | /* store the first sum */ |
ashleymills | 0:e979170e02e7 | 1120 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1121 | x = c0; |
ashleymills | 0:e979170e02e7 | 1122 | |
ashleymills | 0:e979170e02e7 | 1123 | /* store the second sum [carry] */ |
ashleymills | 0:e979170e02e7 | 1124 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1125 | x = c1; |
ashleymills | 0:e979170e02e7 | 1126 | |
ashleymills | 0:e979170e02e7 | 1127 | /* anything you need at the end */ |
ashleymills | 0:e979170e02e7 | 1128 | #define COMBA_FINI \ |
ashleymills | 0:e979170e02e7 | 1129 | __asm__("emms"); |
ashleymills | 0:e979170e02e7 | 1130 | |
ashleymills | 0:e979170e02e7 | 1131 | /* this should multiply i and j */ |
ashleymills | 0:e979170e02e7 | 1132 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1133 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 1134 | "movd %6,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1135 | "movd %7,%%mm1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1136 | "pmuludq %%mm1,%%mm0\n\t" \ |
ashleymills | 0:e979170e02e7 | 1137 | "movd %%mm0,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 1138 | "psrlq $32,%%mm0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1139 | "addl %%eax,%0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1140 | "movd %%mm0,%%eax \n\t" \ |
ashleymills | 0:e979170e02e7 | 1141 | "adcl %%eax,%1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1142 | "adcl $0,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1143 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc"); |
ashleymills | 0:e979170e02e7 | 1144 | |
ashleymills | 0:e979170e02e7 | 1145 | #elif defined(TFM_ARM) |
ashleymills | 0:e979170e02e7 | 1146 | /* ARM code */ |
ashleymills | 0:e979170e02e7 | 1147 | |
ashleymills | 0:e979170e02e7 | 1148 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1149 | |
ashleymills | 0:e979170e02e7 | 1150 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1151 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1152 | |
ashleymills | 0:e979170e02e7 | 1153 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1154 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1155 | |
ashleymills | 0:e979170e02e7 | 1156 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1157 | x = c0; |
ashleymills | 0:e979170e02e7 | 1158 | |
ashleymills | 0:e979170e02e7 | 1159 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1160 | x = c1; |
ashleymills | 0:e979170e02e7 | 1161 | |
ashleymills | 0:e979170e02e7 | 1162 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 1163 | |
ashleymills | 0:e979170e02e7 | 1164 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1165 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 1166 | " UMULL r0,r1,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1167 | " ADDS %0,%0,r0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1168 | " ADCS %1,%1,r1 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1169 | " ADC %2,%2,#0 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1170 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); |
ashleymills | 0:e979170e02e7 | 1171 | |
ashleymills | 0:e979170e02e7 | 1172 | #elif defined(TFM_PPC32) |
ashleymills | 0:e979170e02e7 | 1173 | /* For 32-bit PPC */ |
ashleymills | 0:e979170e02e7 | 1174 | |
ashleymills | 0:e979170e02e7 | 1175 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1176 | |
ashleymills | 0:e979170e02e7 | 1177 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1178 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1179 | |
ashleymills | 0:e979170e02e7 | 1180 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1181 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1182 | |
ashleymills | 0:e979170e02e7 | 1183 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1184 | x = c0; |
ashleymills | 0:e979170e02e7 | 1185 | |
ashleymills | 0:e979170e02e7 | 1186 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1187 | x = c1; |
ashleymills | 0:e979170e02e7 | 1188 | |
ashleymills | 0:e979170e02e7 | 1189 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 1190 | |
ashleymills | 0:e979170e02e7 | 1191 | /* untested: will mulhwu change the flags? Docs say no */ |
ashleymills | 0:e979170e02e7 | 1192 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1193 | __asm__( \ |
ashleymills | 0:e979170e02e7 | 1194 | " mullw 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1195 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1196 | " mulhwu 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1197 | " adde %1,%1,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1198 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1199 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); |
ashleymills | 0:e979170e02e7 | 1200 | |
ashleymills | 0:e979170e02e7 | 1201 | #elif defined(TFM_PPC64) |
ashleymills | 0:e979170e02e7 | 1202 | /* For 64-bit PPC */ |
ashleymills | 0:e979170e02e7 | 1203 | |
ashleymills | 0:e979170e02e7 | 1204 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1205 | |
ashleymills | 0:e979170e02e7 | 1206 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1207 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1208 | |
ashleymills | 0:e979170e02e7 | 1209 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1210 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1211 | |
ashleymills | 0:e979170e02e7 | 1212 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1213 | x = c0; |
ashleymills | 0:e979170e02e7 | 1214 | |
ashleymills | 0:e979170e02e7 | 1215 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1216 | x = c1; |
ashleymills | 0:e979170e02e7 | 1217 | |
ashleymills | 0:e979170e02e7 | 1218 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 1219 | |
ashleymills | 0:e979170e02e7 | 1220 | /* untested: will mulhwu change the flags? Docs say no */ |
ashleymills | 0:e979170e02e7 | 1221 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1222 | ____asm__( \ |
ashleymills | 0:e979170e02e7 | 1223 | " mulld 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1224 | " addc %0,%0,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1225 | " mulhdu 16,%6,%7 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1226 | " adde %1,%1,16 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1227 | " addze %2,%2 \n\t" \ |
ashleymills | 0:e979170e02e7 | 1228 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); |
ashleymills | 0:e979170e02e7 | 1229 | |
ashleymills | 0:e979170e02e7 | 1230 | #elif defined(TFM_AVR32) |
ashleymills | 0:e979170e02e7 | 1231 | |
ashleymills | 0:e979170e02e7 | 1232 | /* ISO C code */ |
ashleymills | 0:e979170e02e7 | 1233 | |
ashleymills | 0:e979170e02e7 | 1234 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1235 | |
ashleymills | 0:e979170e02e7 | 1236 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1237 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1238 | |
ashleymills | 0:e979170e02e7 | 1239 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1240 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1241 | |
ashleymills | 0:e979170e02e7 | 1242 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1243 | x = c0; |
ashleymills | 0:e979170e02e7 | 1244 | |
ashleymills | 0:e979170e02e7 | 1245 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1246 | x = c1; |
ashleymills | 0:e979170e02e7 | 1247 | |
ashleymills | 0:e979170e02e7 | 1248 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 1249 | |
ashleymills | 0:e979170e02e7 | 1250 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1251 | ____asm__( \ |
ashleymills | 0:e979170e02e7 | 1252 | " mulu.d r2,%6,%7 \n\t"\ |
ashleymills | 0:e979170e02e7 | 1253 | " add %0,r2 \n\t"\ |
ashleymills | 0:e979170e02e7 | 1254 | " adc %1,%1,r3 \n\t"\ |
ashleymills | 0:e979170e02e7 | 1255 | " acr %2 \n\t"\ |
ashleymills | 0:e979170e02e7 | 1256 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); |
ashleymills | 0:e979170e02e7 | 1257 | |
ashleymills | 0:e979170e02e7 | 1258 | #else |
ashleymills | 0:e979170e02e7 | 1259 | /* ISO C code */ |
ashleymills | 0:e979170e02e7 | 1260 | |
ashleymills | 0:e979170e02e7 | 1261 | #define COMBA_START |
ashleymills | 0:e979170e02e7 | 1262 | |
ashleymills | 0:e979170e02e7 | 1263 | #define COMBA_CLEAR \ |
ashleymills | 0:e979170e02e7 | 1264 | c0 = c1 = c2 = 0; |
ashleymills | 0:e979170e02e7 | 1265 | |
ashleymills | 0:e979170e02e7 | 1266 | #define COMBA_FORWARD \ |
ashleymills | 0:e979170e02e7 | 1267 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
ashleymills | 0:e979170e02e7 | 1268 | |
ashleymills | 0:e979170e02e7 | 1269 | #define COMBA_STORE(x) \ |
ashleymills | 0:e979170e02e7 | 1270 | x = c0; |
ashleymills | 0:e979170e02e7 | 1271 | |
ashleymills | 0:e979170e02e7 | 1272 | #define COMBA_STORE2(x) \ |
ashleymills | 0:e979170e02e7 | 1273 | x = c1; |
ashleymills | 0:e979170e02e7 | 1274 | |
ashleymills | 0:e979170e02e7 | 1275 | #define COMBA_FINI |
ashleymills | 0:e979170e02e7 | 1276 | |
ashleymills | 0:e979170e02e7 | 1277 | #define MULADD(i, j) \ |
ashleymills | 0:e979170e02e7 | 1278 | do { fp_word t; \ |
ashleymills | 0:e979170e02e7 | 1279 | t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ |
ashleymills | 0:e979170e02e7 | 1280 | t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ |
ashleymills | 0:e979170e02e7 | 1281 | } while (0); |
ashleymills | 0:e979170e02e7 | 1282 | |
ashleymills | 0:e979170e02e7 | 1283 | #endif |
ashleymills | 0:e979170e02e7 | 1284 | |
ashleymills | 0:e979170e02e7 | 1285 | |
ashleymills | 0:e979170e02e7 | 1286 | #ifdef TFM_SMALL_SET |
ashleymills | 0:e979170e02e7 | 1287 | #include "fp_mul_comba_small_set.i" |
ashleymills | 0:e979170e02e7 | 1288 | #include "fp_mul_comba_3.i" |
ashleymills | 0:e979170e02e7 | 1289 | #include "fp_mul_comba_4.i" |
ashleymills | 0:e979170e02e7 | 1290 | #include "fp_mul_comba_6.i" |
ashleymills | 0:e979170e02e7 | 1291 | #include "fp_mul_comba_7.i" |
ashleymills | 0:e979170e02e7 | 1292 | #include "fp_mul_comba_8.i" |
ashleymills | 0:e979170e02e7 | 1293 | #include "fp_mul_comba_9.i" |
ashleymills | 0:e979170e02e7 | 1294 | #include "fp_mul_comba_12.i" |
ashleymills | 0:e979170e02e7 | 1295 | #include "fp_mul_comba_17.i" |
ashleymills | 0:e979170e02e7 | 1296 | #include "fp_mul_comba_20.i" |
ashleymills | 0:e979170e02e7 | 1297 | #include "fp_mul_comba_24.i" |
ashleymills | 0:e979170e02e7 | 1298 | #include "fp_mul_comba_28.i" |
ashleymills | 0:e979170e02e7 | 1299 | #include "fp_mul_comba_32.i" |
ashleymills | 0:e979170e02e7 | 1300 | #include "fp_mul_comba_48.i" |
ashleymills | 0:e979170e02e7 | 1301 | #include "fp_mul_comba_64.i" |
ashleymills | 0:e979170e02e7 | 1302 | #endif |
ashleymills | 0:e979170e02e7 | 1303 | |
ashleymills | 0:e979170e02e7 | 1304 | /* end fp_mul_comba.c asm */ |
ashleymills | 0:e979170e02e7 | 1305 |