A super trimmed down TLS stack, GPL licensed

Dependents:   MiniTLS-HTTPS-Example

MiniTLS - A super trimmed down TLS/SSL Library for embedded devices Author: Donatien Garnier Copyright (C) 2013-2014 AppNearMe Ltd

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

Committer:
MiniTLS
Date:
Tue Jun 10 14:23:09 2014 +0000
Revision:
4:cbaf466d717d
Parent:
0:35aa5be3b78d
Fixes for mbed

Who changed what in which revision?

UserRevisionLine numberNew contents of line
MiniTLS 0:35aa5be3b78d 1 /* TomsFastMath, a fast ISO C bignum library.
MiniTLS 0:35aa5be3b78d 2 *
MiniTLS 0:35aa5be3b78d 3 * This project is meant to fill in where LibTomMath
MiniTLS 0:35aa5be3b78d 4 * falls short. That is speed ;-)
MiniTLS 0:35aa5be3b78d 5 *
MiniTLS 0:35aa5be3b78d 6 * This project is public domain and free for all purposes.
MiniTLS 0:35aa5be3b78d 7 *
MiniTLS 0:35aa5be3b78d 8 * Tom St Denis, tomstdenis@gmail.com
MiniTLS 0:35aa5be3b78d 9 */
MiniTLS 0:35aa5be3b78d 10 #include <tfm.h>
MiniTLS 0:35aa5be3b78d 11
MiniTLS 0:35aa5be3b78d 12 /******************************************************************/
MiniTLS 0:35aa5be3b78d 13 #if defined(TFM_X86) && !defined(TFM_SSE2)
MiniTLS 0:35aa5be3b78d 14 /* x86-32 code */
MiniTLS 0:35aa5be3b78d 15
MiniTLS 0:35aa5be3b78d 16 #define MONT_START
MiniTLS 0:35aa5be3b78d 17 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 18 #define LOOP_END
MiniTLS 0:35aa5be3b78d 19 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 20 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 21
MiniTLS 0:35aa5be3b78d 22 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 23 asm( \
MiniTLS 0:35aa5be3b78d 24 "movl %5,%%eax \n\t" \
MiniTLS 0:35aa5be3b78d 25 "mull %4 \n\t" \
MiniTLS 0:35aa5be3b78d 26 "addl %1,%%eax \n\t" \
MiniTLS 0:35aa5be3b78d 27 "adcl $0,%%edx \n\t" \
MiniTLS 0:35aa5be3b78d 28 "addl %%eax,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 29 "adcl $0,%%edx \n\t" \
MiniTLS 0:35aa5be3b78d 30 "movl %%edx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 31 :"=g"(_c[LO]), "=r"(cy) \
MiniTLS 0:35aa5be3b78d 32 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
MiniTLS 0:35aa5be3b78d 33 : "%eax", "%edx", "%cc")
MiniTLS 0:35aa5be3b78d 34
MiniTLS 0:35aa5be3b78d 35 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 36 asm( \
MiniTLS 0:35aa5be3b78d 37 "addl %1,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 38 "setb %%al \n\t" \
MiniTLS 0:35aa5be3b78d 39 "movzbl %%al,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 40 :"=g"(_c[LO]), "=r"(cy) \
MiniTLS 0:35aa5be3b78d 41 :"0"(_c[LO]), "1"(cy) \
MiniTLS 0:35aa5be3b78d 42 : "%eax", "%cc")
MiniTLS 0:35aa5be3b78d 43
MiniTLS 0:35aa5be3b78d 44 /******************************************************************/
MiniTLS 0:35aa5be3b78d 45 #elif defined(TFM_X86_64)
MiniTLS 0:35aa5be3b78d 46 /* x86-64 code */
MiniTLS 0:35aa5be3b78d 47
MiniTLS 0:35aa5be3b78d 48 #define MONT_START
MiniTLS 0:35aa5be3b78d 49 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 50 #define LOOP_END
MiniTLS 0:35aa5be3b78d 51 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 52 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 53
MiniTLS 0:35aa5be3b78d 54 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 55 asm( \
MiniTLS 0:35aa5be3b78d 56 "movq %5,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 57 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 58 "addq %1,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 59 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 60 "addq %%rax,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 61 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 62 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 63 :"=g"(_c[LO]), "=r"(cy) \
MiniTLS 0:35aa5be3b78d 64 :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
MiniTLS 0:35aa5be3b78d 65 : "%rax", "%rdx", "%cc")
MiniTLS 0:35aa5be3b78d 66
MiniTLS 0:35aa5be3b78d 67 #define INNERMUL8 \
MiniTLS 0:35aa5be3b78d 68 asm( \
MiniTLS 0:35aa5be3b78d 69 "movq 0(%5),%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 70 "movq 0(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 71 "movq 0x8(%5),%%r11 \n\t" \
MiniTLS 0:35aa5be3b78d 72 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 73 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 74 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 75 "movq 0x8(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 76 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 77 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 78 "movq %%rax,0(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 79 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 80 \
MiniTLS 0:35aa5be3b78d 81 "movq %%r11,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 82 "movq 0x10(%5),%%r11 \n\t" \
MiniTLS 0:35aa5be3b78d 83 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 84 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 85 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 86 "movq 0x10(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 87 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 88 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 89 "movq %%rax,0x8(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 90 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 91 \
MiniTLS 0:35aa5be3b78d 92 "movq %%r11,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 93 "movq 0x18(%5),%%r11 \n\t" \
MiniTLS 0:35aa5be3b78d 94 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 95 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 96 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 97 "movq 0x18(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 98 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 99 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 100 "movq %%rax,0x10(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 101 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 102 \
MiniTLS 0:35aa5be3b78d 103 "movq %%r11,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 104 "movq 0x20(%5),%%r11 \n\t" \
MiniTLS 0:35aa5be3b78d 105 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 106 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 107 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 108 "movq 0x20(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 109 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 110 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 111 "movq %%rax,0x18(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 112 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 113 \
MiniTLS 0:35aa5be3b78d 114 "movq %%r11,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 115 "movq 0x28(%5),%%r11 \n\t" \
MiniTLS 0:35aa5be3b78d 116 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 117 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 118 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 119 "movq 0x28(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 120 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 121 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 122 "movq %%rax,0x20(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 123 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 124 \
MiniTLS 0:35aa5be3b78d 125 "movq %%r11,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 126 "movq 0x30(%5),%%r11 \n\t" \
MiniTLS 0:35aa5be3b78d 127 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 128 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 129 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 130 "movq 0x30(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 131 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 132 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 133 "movq %%rax,0x28(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 134 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 135 \
MiniTLS 0:35aa5be3b78d 136 "movq %%r11,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 137 "movq 0x38(%5),%%r11 \n\t" \
MiniTLS 0:35aa5be3b78d 138 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 139 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 140 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 141 "movq 0x38(%2),%%r10 \n\t" \
MiniTLS 0:35aa5be3b78d 142 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 143 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 144 "movq %%rax,0x30(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 145 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 146 \
MiniTLS 0:35aa5be3b78d 147 "movq %%r11,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 148 "mulq %4 \n\t" \
MiniTLS 0:35aa5be3b78d 149 "addq %%r10,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 150 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 151 "addq %3,%%rax \n\t" \
MiniTLS 0:35aa5be3b78d 152 "adcq $0,%%rdx \n\t" \
MiniTLS 0:35aa5be3b78d 153 "movq %%rax,0x38(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 154 "movq %%rdx,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 155 \
MiniTLS 0:35aa5be3b78d 156 :"=r"(_c), "=r"(cy) \
MiniTLS 0:35aa5be3b78d 157 : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
MiniTLS 0:35aa5be3b78d 158 : "%rax", "%rdx", "%r10", "%r11", "%cc")
MiniTLS 0:35aa5be3b78d 159
MiniTLS 0:35aa5be3b78d 160
MiniTLS 0:35aa5be3b78d 161 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 162 asm( \
MiniTLS 0:35aa5be3b78d 163 "addq %1,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 164 "setb %%al \n\t" \
MiniTLS 0:35aa5be3b78d 165 "movzbq %%al,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 166 :"=g"(_c[LO]), "=r"(cy) \
MiniTLS 0:35aa5be3b78d 167 :"0"(_c[LO]), "1"(cy) \
MiniTLS 0:35aa5be3b78d 168 : "%rax", "%cc")
MiniTLS 0:35aa5be3b78d 169
MiniTLS 0:35aa5be3b78d 170 /******************************************************************/
MiniTLS 0:35aa5be3b78d 171 #elif defined(TFM_SSE2)
MiniTLS 0:35aa5be3b78d 172 /* SSE2 code (assumes 32-bit fp_digits) */
MiniTLS 0:35aa5be3b78d 173 /* XMM register assignments:
MiniTLS 0:35aa5be3b78d 174 * xmm0 *tmpm++, then Mu * (*tmpm++)
MiniTLS 0:35aa5be3b78d 175 * xmm1 c[x], then Mu
MiniTLS 0:35aa5be3b78d 176 * xmm2 mp
MiniTLS 0:35aa5be3b78d 177 * xmm3 cy
MiniTLS 0:35aa5be3b78d 178 * xmm4 _c[LO]
MiniTLS 0:35aa5be3b78d 179 */
MiniTLS 0:35aa5be3b78d 180
MiniTLS 0:35aa5be3b78d 181 #define MONT_START \
MiniTLS 0:35aa5be3b78d 182 asm("movd %0,%%mm2"::"g"(mp))
MiniTLS 0:35aa5be3b78d 183
MiniTLS 0:35aa5be3b78d 184 #define MONT_FINI \
MiniTLS 0:35aa5be3b78d 185 asm("emms")
MiniTLS 0:35aa5be3b78d 186
MiniTLS 0:35aa5be3b78d 187 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 188 asm( \
MiniTLS 0:35aa5be3b78d 189 "movd %0,%%mm1 \n\t" \
MiniTLS 0:35aa5be3b78d 190 "pxor %%mm3,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 191 "pmuludq %%mm2,%%mm1 \n\t" \
MiniTLS 0:35aa5be3b78d 192 :: "g"(c[x]))
MiniTLS 0:35aa5be3b78d 193
MiniTLS 0:35aa5be3b78d 194 /* pmuludq on mmx registers does a 32x32->64 multiply. */
MiniTLS 0:35aa5be3b78d 195 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 196 asm( \
MiniTLS 0:35aa5be3b78d 197 "movd %1,%%mm4 \n\t" \
MiniTLS 0:35aa5be3b78d 198 "movd %2,%%mm0 \n\t" \
MiniTLS 0:35aa5be3b78d 199 "paddq %%mm4,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 200 "pmuludq %%mm1,%%mm0 \n\t" \
MiniTLS 0:35aa5be3b78d 201 "paddq %%mm0,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 202 "movd %%mm3,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 203 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 204 :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) );
MiniTLS 0:35aa5be3b78d 205
MiniTLS 0:35aa5be3b78d 206 #define INNERMUL8 \
MiniTLS 0:35aa5be3b78d 207 asm( \
MiniTLS 0:35aa5be3b78d 208 "movd 0(%1),%%mm4 \n\t" \
MiniTLS 0:35aa5be3b78d 209 "movd 0(%2),%%mm0 \n\t" \
MiniTLS 0:35aa5be3b78d 210 "paddq %%mm4,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 211 "pmuludq %%mm1,%%mm0 \n\t" \
MiniTLS 0:35aa5be3b78d 212 "movd 4(%2),%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 213 "paddq %%mm0,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 214 "movd 4(%1),%%mm6 \n\t" \
MiniTLS 0:35aa5be3b78d 215 "movd %%mm3,0(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 216 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 217 \
MiniTLS 0:35aa5be3b78d 218 "paddq %%mm6,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 219 "pmuludq %%mm1,%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 220 "movd 8(%2),%%mm6 \n\t" \
MiniTLS 0:35aa5be3b78d 221 "paddq %%mm5,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 222 "movd 8(%1),%%mm7 \n\t" \
MiniTLS 0:35aa5be3b78d 223 "movd %%mm3,4(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 224 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 225 \
MiniTLS 0:35aa5be3b78d 226 "paddq %%mm7,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 227 "pmuludq %%mm1,%%mm6 \n\t" \
MiniTLS 0:35aa5be3b78d 228 "movd 12(%2),%%mm7 \n\t" \
MiniTLS 0:35aa5be3b78d 229 "paddq %%mm6,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 230 "movd 12(%1),%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 231 "movd %%mm3,8(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 232 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 233 \
MiniTLS 0:35aa5be3b78d 234 "paddq %%mm5,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 235 "pmuludq %%mm1,%%mm7 \n\t" \
MiniTLS 0:35aa5be3b78d 236 "movd 16(%2),%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 237 "paddq %%mm7,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 238 "movd 16(%1),%%mm6 \n\t" \
MiniTLS 0:35aa5be3b78d 239 "movd %%mm3,12(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 240 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 241 \
MiniTLS 0:35aa5be3b78d 242 "paddq %%mm6,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 243 "pmuludq %%mm1,%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 244 "movd 20(%2),%%mm6 \n\t" \
MiniTLS 0:35aa5be3b78d 245 "paddq %%mm5,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 246 "movd 20(%1),%%mm7 \n\t" \
MiniTLS 0:35aa5be3b78d 247 "movd %%mm3,16(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 248 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 249 \
MiniTLS 0:35aa5be3b78d 250 "paddq %%mm7,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 251 "pmuludq %%mm1,%%mm6 \n\t" \
MiniTLS 0:35aa5be3b78d 252 "movd 24(%2),%%mm7 \n\t" \
MiniTLS 0:35aa5be3b78d 253 "paddq %%mm6,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 254 "movd 24(%1),%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 255 "movd %%mm3,20(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 256 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 257 \
MiniTLS 0:35aa5be3b78d 258 "paddq %%mm5,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 259 "pmuludq %%mm1,%%mm7 \n\t" \
MiniTLS 0:35aa5be3b78d 260 "movd 28(%2),%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 261 "paddq %%mm7,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 262 "movd 28(%1),%%mm6 \n\t" \
MiniTLS 0:35aa5be3b78d 263 "movd %%mm3,24(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 264 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 265 \
MiniTLS 0:35aa5be3b78d 266 "paddq %%mm6,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 267 "pmuludq %%mm1,%%mm5 \n\t" \
MiniTLS 0:35aa5be3b78d 268 "paddq %%mm5,%%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 269 "movd %%mm3,28(%0) \n\t" \
MiniTLS 0:35aa5be3b78d 270 "psrlq $32, %%mm3 \n\t" \
MiniTLS 0:35aa5be3b78d 271 :"=r"(_c) : "0"(_c), "g"(tmpm) );
MiniTLS 0:35aa5be3b78d 272
MiniTLS 0:35aa5be3b78d 273 #define LOOP_END \
MiniTLS 0:35aa5be3b78d 274 asm( "movd %%mm3,%0 \n" :"=r"(cy))
MiniTLS 0:35aa5be3b78d 275
MiniTLS 0:35aa5be3b78d 276 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 277 asm( \
MiniTLS 0:35aa5be3b78d 278 "addl %1,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 279 "setb %%al \n\t" \
MiniTLS 0:35aa5be3b78d 280 "movzbl %%al,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 281 :"=g"(_c[LO]), "=r"(cy) \
MiniTLS 0:35aa5be3b78d 282 :"0"(_c[LO]), "1"(cy) \
MiniTLS 0:35aa5be3b78d 283 : "%eax", "%cc")
MiniTLS 0:35aa5be3b78d 284
MiniTLS 0:35aa5be3b78d 285 /******************************************************************/
MiniTLS 0:35aa5be3b78d 286 #elif defined(TFM_ARM)
MiniTLS 0:35aa5be3b78d 287 /* ARMv4 code */
MiniTLS 0:35aa5be3b78d 288
MiniTLS 0:35aa5be3b78d 289 #define MONT_START
MiniTLS 0:35aa5be3b78d 290 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 291 #define LOOP_END
MiniTLS 0:35aa5be3b78d 292 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 293 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 294
MiniTLS 0:35aa5be3b78d 295 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 296 asm( \
MiniTLS 0:35aa5be3b78d 297 " LDR r0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 298 " ADDS r0,r0,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 299 " MOVCS %0,#1 \n\t" \
MiniTLS 0:35aa5be3b78d 300 " MOVCC %0,#0 \n\t" \
MiniTLS 0:35aa5be3b78d 301 " UMLAL r0,%0,%3,%4 \n\t" \
MiniTLS 0:35aa5be3b78d 302 " STR r0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 303 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc");
MiniTLS 0:35aa5be3b78d 304
MiniTLS 0:35aa5be3b78d 305 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 306 asm( \
MiniTLS 0:35aa5be3b78d 307 " LDR r0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 308 " ADDS r0,r0,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 309 " STR r0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 310 " MOVCS %0,#1 \n\t" \
MiniTLS 0:35aa5be3b78d 311 " MOVCC %0,#0 \n\t" \
MiniTLS 0:35aa5be3b78d 312 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc");
MiniTLS 0:35aa5be3b78d 313
MiniTLS 0:35aa5be3b78d 314 /******************************************************************/
MiniTLS 0:35aa5be3b78d 315 #elif defined(TFM_PPC32)
MiniTLS 0:35aa5be3b78d 316
MiniTLS 0:35aa5be3b78d 317 /* PPC32 */
MiniTLS 0:35aa5be3b78d 318 #define MONT_START
MiniTLS 0:35aa5be3b78d 319 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 320 #define LOOP_END
MiniTLS 0:35aa5be3b78d 321 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 322 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 323
MiniTLS 0:35aa5be3b78d 324 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 325 asm( \
MiniTLS 0:35aa5be3b78d 326 " mullw 16,%3,%4 \n\t" \
MiniTLS 0:35aa5be3b78d 327 " mulhwu 17,%3,%4 \n\t" \
MiniTLS 0:35aa5be3b78d 328 " addc 16,16,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 329 " addze 17,17 \n\t" \
MiniTLS 0:35aa5be3b78d 330 " lwz 18,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 331 " addc 16,16,18 \n\t" \
MiniTLS 0:35aa5be3b78d 332 " addze %0,17 \n\t" \
MiniTLS 0:35aa5be3b78d 333 " stw 16,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 334 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm;
MiniTLS 0:35aa5be3b78d 335
MiniTLS 0:35aa5be3b78d 336 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 337 asm( \
MiniTLS 0:35aa5be3b78d 338 " lwz 16,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 339 " addc 16,16,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 340 " stw 16,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 341 " xor %0,%0,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 342 " addze %0,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 343 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc");
MiniTLS 0:35aa5be3b78d 344
MiniTLS 0:35aa5be3b78d 345 /******************************************************************/
MiniTLS 0:35aa5be3b78d 346 #elif defined(TFM_PPC64)
MiniTLS 0:35aa5be3b78d 347
MiniTLS 0:35aa5be3b78d 348 /* PPC64 */
MiniTLS 0:35aa5be3b78d 349 #define MONT_START
MiniTLS 0:35aa5be3b78d 350 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 351 #define LOOP_END
MiniTLS 0:35aa5be3b78d 352 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 353 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 354
MiniTLS 0:35aa5be3b78d 355 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 356 asm( \
MiniTLS 0:35aa5be3b78d 357 " mulld r16,%3,%4 \n\t" \
MiniTLS 0:35aa5be3b78d 358 " mulhdu r17,%3,%4 \n\t" \
MiniTLS 0:35aa5be3b78d 359 " addc r16,16,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 360 " addze r17,r17 \n\t" \
MiniTLS 0:35aa5be3b78d 361 " ldx r18,0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 362 " addc r16,r16,r18 \n\t" \
MiniTLS 0:35aa5be3b78d 363 " addze %0,r17 \n\t" \
MiniTLS 0:35aa5be3b78d 364 " sdx r16,0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 365 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","%cc"); ++tmpm;
MiniTLS 0:35aa5be3b78d 366
MiniTLS 0:35aa5be3b78d 367 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 368 asm( \
MiniTLS 0:35aa5be3b78d 369 " ldx r16,0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 370 " addc r16,r16,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 371 " sdx r16,0,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 372 " xor %0,%0,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 373 " addze %0,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 374 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc");
MiniTLS 0:35aa5be3b78d 375
MiniTLS 0:35aa5be3b78d 376 /******************************************************************/
MiniTLS 0:35aa5be3b78d 377 #elif defined(TFM_AVR32)
MiniTLS 0:35aa5be3b78d 378
MiniTLS 0:35aa5be3b78d 379 /* AVR32 */
MiniTLS 0:35aa5be3b78d 380 #define MONT_START
MiniTLS 0:35aa5be3b78d 381 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 382 #define LOOP_END
MiniTLS 0:35aa5be3b78d 383 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 384 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 385
MiniTLS 0:35aa5be3b78d 386 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 387 asm( \
MiniTLS 0:35aa5be3b78d 388 " ld.w r2,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 389 " add r2,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 390 " eor r3,r3 \n\t" \
MiniTLS 0:35aa5be3b78d 391 " acr r3 \n\t" \
MiniTLS 0:35aa5be3b78d 392 " macu.d r2,%3,%4 \n\t" \
MiniTLS 0:35aa5be3b78d 393 " st.w %1,r2 \n\t" \
MiniTLS 0:35aa5be3b78d 394 " mov %0,r3 \n\t" \
MiniTLS 0:35aa5be3b78d 395 :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3");
MiniTLS 0:35aa5be3b78d 396
MiniTLS 0:35aa5be3b78d 397 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 398 asm( \
MiniTLS 0:35aa5be3b78d 399 " ld.w r2,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 400 " add r2,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 401 " st.w %1,r2 \n\t" \
MiniTLS 0:35aa5be3b78d 402 " eor %0,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 403 " acr %0 \n\t" \
MiniTLS 0:35aa5be3b78d 404 :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc");
MiniTLS 0:35aa5be3b78d 405
MiniTLS 0:35aa5be3b78d 406 /******************************************************************/
MiniTLS 0:35aa5be3b78d 407 #elif defined(TFM_MIPS)
MiniTLS 0:35aa5be3b78d 408
MiniTLS 0:35aa5be3b78d 409 /* MIPS */
MiniTLS 0:35aa5be3b78d 410 #define MONT_START
MiniTLS 0:35aa5be3b78d 411 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 412 #define LOOP_END
MiniTLS 0:35aa5be3b78d 413 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 414 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 415
MiniTLS 0:35aa5be3b78d 416 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 417 asm( \
MiniTLS 0:35aa5be3b78d 418 " multu %3,%4 \n\t" \
MiniTLS 0:35aa5be3b78d 419 " mflo $12 \n\t" \
MiniTLS 0:35aa5be3b78d 420 " mfhi $13 \n\t" \
MiniTLS 0:35aa5be3b78d 421 " addu $12,$12,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 422 " sltu $10,$12,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 423 " addu $13,$13,$10 \n\t" \
MiniTLS 0:35aa5be3b78d 424 " lw $10,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 425 " addu $12,$12,$10 \n\t" \
MiniTLS 0:35aa5be3b78d 426 " sltu $10,$12,$10 \n\t" \
MiniTLS 0:35aa5be3b78d 427 " addu %0,$13,$10 \n\t" \
MiniTLS 0:35aa5be3b78d 428 " sw $12,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 429 :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"$10","$12","$13"); ++tmpm;
MiniTLS 0:35aa5be3b78d 430
MiniTLS 0:35aa5be3b78d 431 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 432 asm( \
MiniTLS 0:35aa5be3b78d 433 " lw $10,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 434 " addu $10,$10,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 435 " sw $10,%1 \n\t" \
MiniTLS 0:35aa5be3b78d 436 " sltu %0,$10,%0 \n\t" \
MiniTLS 0:35aa5be3b78d 437 :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"$10");
MiniTLS 0:35aa5be3b78d 438
MiniTLS 0:35aa5be3b78d 439 /******************************************************************/
MiniTLS 0:35aa5be3b78d 440 #else
MiniTLS 0:35aa5be3b78d 441
MiniTLS 0:35aa5be3b78d 442 /* ISO C code */
MiniTLS 0:35aa5be3b78d 443 #define MONT_START
MiniTLS 0:35aa5be3b78d 444 #define MONT_FINI
MiniTLS 0:35aa5be3b78d 445 #define LOOP_END
MiniTLS 0:35aa5be3b78d 446 #define LOOP_START \
MiniTLS 0:35aa5be3b78d 447 mu = c[x] * mp
MiniTLS 0:35aa5be3b78d 448
MiniTLS 0:35aa5be3b78d 449 #define INNERMUL \
MiniTLS 0:35aa5be3b78d 450 do { fp_word t; \
MiniTLS 0:35aa5be3b78d 451 _c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \
MiniTLS 0:35aa5be3b78d 452 (((fp_word)mu) * ((fp_word)*tmpm++)); \
MiniTLS 0:35aa5be3b78d 453 cy = (t >> DIGIT_BIT); \
MiniTLS 0:35aa5be3b78d 454 } while (0)
MiniTLS 0:35aa5be3b78d 455
MiniTLS 0:35aa5be3b78d 456 #define PROPCARRY \
MiniTLS 0:35aa5be3b78d 457 do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0)
MiniTLS 0:35aa5be3b78d 458
MiniTLS 0:35aa5be3b78d 459 #endif
MiniTLS 0:35aa5be3b78d 460 /******************************************************************/
MiniTLS 0:35aa5be3b78d 461
MiniTLS 0:35aa5be3b78d 462
MiniTLS 0:35aa5be3b78d 463 #define LO 0
MiniTLS 0:35aa5be3b78d 464
MiniTLS 0:35aa5be3b78d 465 #ifdef TFM_SMALL_MONT_SET
MiniTLS 0:35aa5be3b78d 466 #include "fp_mont_small.i"
MiniTLS 0:35aa5be3b78d 467 #endif
MiniTLS 0:35aa5be3b78d 468
MiniTLS 0:35aa5be3b78d 469 /* computes x/R == x (mod N) via Montgomery Reduction */
MiniTLS 0:35aa5be3b78d 470 void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
MiniTLS 0:35aa5be3b78d 471 {
MiniTLS 0:35aa5be3b78d 472 fp_digit c[FP_SIZE], *_c, *tmpm, mu;
MiniTLS 0:35aa5be3b78d 473 int oldused, x, y, pa;
MiniTLS 0:35aa5be3b78d 474
MiniTLS 0:35aa5be3b78d 475 /* bail if too large */
MiniTLS 0:35aa5be3b78d 476 if (m->used > (FP_SIZE/2)) {
MiniTLS 0:35aa5be3b78d 477 return;
MiniTLS 0:35aa5be3b78d 478 }
MiniTLS 0:35aa5be3b78d 479
MiniTLS 0:35aa5be3b78d 480 #ifdef TFM_SMALL_MONT_SET
MiniTLS 0:35aa5be3b78d 481 if (m->used <= 16) {
MiniTLS 0:35aa5be3b78d 482 fp_montgomery_reduce_small(a, m, mp);
MiniTLS 0:35aa5be3b78d 483 return;
MiniTLS 0:35aa5be3b78d 484 }
MiniTLS 0:35aa5be3b78d 485 #endif
MiniTLS 0:35aa5be3b78d 486
MiniTLS 0:35aa5be3b78d 487 #if defined(USE_MEMSET)
MiniTLS 0:35aa5be3b78d 488 /* now zero the buff */
MiniTLS 0:35aa5be3b78d 489 memset(c, 0, sizeof c);
MiniTLS 0:35aa5be3b78d 490 #endif
MiniTLS 0:35aa5be3b78d 491 pa = m->used;
MiniTLS 0:35aa5be3b78d 492
MiniTLS 0:35aa5be3b78d 493 /* copy the input */
MiniTLS 0:35aa5be3b78d 494 oldused = a->used;
MiniTLS 0:35aa5be3b78d 495 for (x = 0; x < oldused; x++) {
MiniTLS 0:35aa5be3b78d 496 c[x] = a->dp[x];
MiniTLS 0:35aa5be3b78d 497 }
MiniTLS 0:35aa5be3b78d 498 #if !defined(USE_MEMSET)
MiniTLS 0:35aa5be3b78d 499 for (; x < 2*pa+1; x++) {
MiniTLS 0:35aa5be3b78d 500 c[x] = 0;
MiniTLS 0:35aa5be3b78d 501 }
MiniTLS 0:35aa5be3b78d 502 #endif
MiniTLS 0:35aa5be3b78d 503 MONT_START;
MiniTLS 0:35aa5be3b78d 504
MiniTLS 0:35aa5be3b78d 505 for (x = 0; x < pa; x++) {
MiniTLS 0:35aa5be3b78d 506 fp_digit cy = 0;
MiniTLS 0:35aa5be3b78d 507 /* get Mu for this round */
MiniTLS 0:35aa5be3b78d 508 LOOP_START;
MiniTLS 0:35aa5be3b78d 509 _c = c + x;
MiniTLS 0:35aa5be3b78d 510 tmpm = m->dp;
MiniTLS 0:35aa5be3b78d 511 y = 0;
MiniTLS 0:35aa5be3b78d 512 #if (defined(TFM_SSE2) || defined(TFM_X86_64))
MiniTLS 0:35aa5be3b78d 513 for (; y < (pa & ~7); y += 8) {
MiniTLS 0:35aa5be3b78d 514 INNERMUL8;
MiniTLS 0:35aa5be3b78d 515 _c += 8;
MiniTLS 0:35aa5be3b78d 516 tmpm += 8;
MiniTLS 0:35aa5be3b78d 517 }
MiniTLS 0:35aa5be3b78d 518 #endif
MiniTLS 0:35aa5be3b78d 519
MiniTLS 0:35aa5be3b78d 520 for (; y < pa; y++) {
MiniTLS 0:35aa5be3b78d 521 INNERMUL;
MiniTLS 0:35aa5be3b78d 522 ++_c;
MiniTLS 0:35aa5be3b78d 523 }
MiniTLS 0:35aa5be3b78d 524 LOOP_END;
MiniTLS 0:35aa5be3b78d 525 while (cy) {
MiniTLS 0:35aa5be3b78d 526 PROPCARRY;
MiniTLS 0:35aa5be3b78d 527 ++_c;
MiniTLS 0:35aa5be3b78d 528 }
MiniTLS 0:35aa5be3b78d 529 }
MiniTLS 0:35aa5be3b78d 530
MiniTLS 0:35aa5be3b78d 531 /* now copy out */
MiniTLS 0:35aa5be3b78d 532 _c = c + pa;
MiniTLS 0:35aa5be3b78d 533 tmpm = a->dp;
MiniTLS 0:35aa5be3b78d 534 for (x = 0; x < pa+1; x++) {
MiniTLS 0:35aa5be3b78d 535 *tmpm++ = *_c++;
MiniTLS 0:35aa5be3b78d 536 }
MiniTLS 0:35aa5be3b78d 537
MiniTLS 0:35aa5be3b78d 538 for (; x < oldused; x++) {
MiniTLS 0:35aa5be3b78d 539 *tmpm++ = 0;
MiniTLS 0:35aa5be3b78d 540 }
MiniTLS 0:35aa5be3b78d 541
MiniTLS 0:35aa5be3b78d 542 MONT_FINI;
MiniTLS 0:35aa5be3b78d 543
MiniTLS 0:35aa5be3b78d 544 a->used = pa+1;
MiniTLS 0:35aa5be3b78d 545 fp_clamp(a);
MiniTLS 0:35aa5be3b78d 546
MiniTLS 0:35aa5be3b78d 547 /* if A >= m then A = A - m */
MiniTLS 0:35aa5be3b78d 548 if (fp_cmp_mag (a, m) != FP_LT) {
MiniTLS 0:35aa5be3b78d 549 s_fp_sub (a, m, a);
MiniTLS 0:35aa5be3b78d 550 }
MiniTLS 0:35aa5be3b78d 551 }
MiniTLS 0:35aa5be3b78d 552
MiniTLS 0:35aa5be3b78d 553
MiniTLS 0:35aa5be3b78d 554 /* $Source: /cvs/libtom/tomsfastmath/src/mont/fp_montgomery_reduce.c,v $ */
MiniTLS 0:35aa5be3b78d 555 /* $Revision: 1.2 $ */
MiniTLS 0:35aa5be3b78d 556 /* $Date: 2007/03/14 23:47:42 $ */