change some parameters in the library to meet the needs of the website httpbin.org
Fork of MiniTLS-GPL by
math/mont/fp_montgomery_reduce.c@5:95f70ebfe61f, 2015-02-06 (annotated)
- Committer:
- shiyilei
- Date:
- Fri Feb 06 06:17:33 2015 +0000
- Revision:
- 5:95f70ebfe61f
- Parent:
- 0:35aa5be3b78d
change some parameters in the library to meet the needs of httpbin.org
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
MiniTLS | 0:35aa5be3b78d | 1 | /* TomsFastMath, a fast ISO C bignum library. |
MiniTLS | 0:35aa5be3b78d | 2 | * |
MiniTLS | 0:35aa5be3b78d | 3 | * This project is meant to fill in where LibTomMath |
MiniTLS | 0:35aa5be3b78d | 4 | * falls short. That is speed ;-) |
MiniTLS | 0:35aa5be3b78d | 5 | * |
MiniTLS | 0:35aa5be3b78d | 6 | * This project is public domain and free for all purposes. |
MiniTLS | 0:35aa5be3b78d | 7 | * |
MiniTLS | 0:35aa5be3b78d | 8 | * Tom St Denis, tomstdenis@gmail.com |
MiniTLS | 0:35aa5be3b78d | 9 | */ |
MiniTLS | 0:35aa5be3b78d | 10 | #include <tfm.h> |
MiniTLS | 0:35aa5be3b78d | 11 | |
MiniTLS | 0:35aa5be3b78d | 12 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 13 | #if defined(TFM_X86) && !defined(TFM_SSE2) |
MiniTLS | 0:35aa5be3b78d | 14 | /* x86-32 code */ |
MiniTLS | 0:35aa5be3b78d | 15 | |
MiniTLS | 0:35aa5be3b78d | 16 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 17 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 18 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 19 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 20 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 21 | |
MiniTLS | 0:35aa5be3b78d | 22 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 23 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 24 | "movl %5,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 25 | "mull %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 26 | "addl %1,%%eax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 27 | "adcl $0,%%edx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 28 | "addl %%eax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 29 | "adcl $0,%%edx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 30 | "movl %%edx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 31 | :"=g"(_c[LO]), "=r"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 32 | :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ |
MiniTLS | 0:35aa5be3b78d | 33 | : "%eax", "%edx", "%cc") |
MiniTLS | 0:35aa5be3b78d | 34 | |
MiniTLS | 0:35aa5be3b78d | 35 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 36 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 37 | "addl %1,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 38 | "setb %%al \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 39 | "movzbl %%al,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 40 | :"=g"(_c[LO]), "=r"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 41 | :"0"(_c[LO]), "1"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 42 | : "%eax", "%cc") |
MiniTLS | 0:35aa5be3b78d | 43 | |
MiniTLS | 0:35aa5be3b78d | 44 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 45 | #elif defined(TFM_X86_64) |
MiniTLS | 0:35aa5be3b78d | 46 | /* x86-64 code */ |
MiniTLS | 0:35aa5be3b78d | 47 | |
MiniTLS | 0:35aa5be3b78d | 48 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 49 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 50 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 51 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 52 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 53 | |
MiniTLS | 0:35aa5be3b78d | 54 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 55 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 56 | "movq %5,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 57 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 58 | "addq %1,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 59 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 60 | "addq %%rax,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 61 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 62 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 63 | :"=g"(_c[LO]), "=r"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 64 | :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \ |
MiniTLS | 0:35aa5be3b78d | 65 | : "%rax", "%rdx", "%cc") |
MiniTLS | 0:35aa5be3b78d | 66 | |
MiniTLS | 0:35aa5be3b78d | 67 | #define INNERMUL8 \ |
MiniTLS | 0:35aa5be3b78d | 68 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 69 | "movq 0(%5),%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 70 | "movq 0(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 71 | "movq 0x8(%5),%%r11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 72 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 73 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 74 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 75 | "movq 0x8(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 76 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 77 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 78 | "movq %%rax,0(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 79 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 80 | \ |
MiniTLS | 0:35aa5be3b78d | 81 | "movq %%r11,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 82 | "movq 0x10(%5),%%r11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 83 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 84 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 85 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 86 | "movq 0x10(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 87 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 88 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 89 | "movq %%rax,0x8(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 90 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 91 | \ |
MiniTLS | 0:35aa5be3b78d | 92 | "movq %%r11,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 93 | "movq 0x18(%5),%%r11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 94 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 95 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 96 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 97 | "movq 0x18(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 98 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 99 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 100 | "movq %%rax,0x10(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 101 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 102 | \ |
MiniTLS | 0:35aa5be3b78d | 103 | "movq %%r11,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 104 | "movq 0x20(%5),%%r11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 105 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 106 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 107 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 108 | "movq 0x20(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 109 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 110 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 111 | "movq %%rax,0x18(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 112 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 113 | \ |
MiniTLS | 0:35aa5be3b78d | 114 | "movq %%r11,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 115 | "movq 0x28(%5),%%r11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 116 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 117 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 118 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 119 | "movq 0x28(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 120 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 121 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 122 | "movq %%rax,0x20(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 123 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 124 | \ |
MiniTLS | 0:35aa5be3b78d | 125 | "movq %%r11,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 126 | "movq 0x30(%5),%%r11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 127 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 128 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 129 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 130 | "movq 0x30(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 131 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 132 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 133 | "movq %%rax,0x28(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 134 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 135 | \ |
MiniTLS | 0:35aa5be3b78d | 136 | "movq %%r11,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 137 | "movq 0x38(%5),%%r11 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 138 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 139 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 140 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 141 | "movq 0x38(%2),%%r10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 142 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 143 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 144 | "movq %%rax,0x30(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 145 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 146 | \ |
MiniTLS | 0:35aa5be3b78d | 147 | "movq %%r11,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 148 | "mulq %4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 149 | "addq %%r10,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 150 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 151 | "addq %3,%%rax \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 152 | "adcq $0,%%rdx \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 153 | "movq %%rax,0x38(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 154 | "movq %%rdx,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 155 | \ |
MiniTLS | 0:35aa5be3b78d | 156 | :"=r"(_c), "=r"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 157 | : "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\ |
MiniTLS | 0:35aa5be3b78d | 158 | : "%rax", "%rdx", "%r10", "%r11", "%cc") |
MiniTLS | 0:35aa5be3b78d | 159 | |
MiniTLS | 0:35aa5be3b78d | 160 | |
MiniTLS | 0:35aa5be3b78d | 161 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 162 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 163 | "addq %1,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 164 | "setb %%al \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 165 | "movzbq %%al,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 166 | :"=g"(_c[LO]), "=r"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 167 | :"0"(_c[LO]), "1"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 168 | : "%rax", "%cc") |
MiniTLS | 0:35aa5be3b78d | 169 | |
MiniTLS | 0:35aa5be3b78d | 170 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 171 | #elif defined(TFM_SSE2) |
MiniTLS | 0:35aa5be3b78d | 172 | /* SSE2 code (assumes 32-bit fp_digits) */ |
MiniTLS | 0:35aa5be3b78d | 173 | /* XMM register assignments: |
MiniTLS | 0:35aa5be3b78d | 174 | * xmm0 *tmpm++, then Mu * (*tmpm++) |
MiniTLS | 0:35aa5be3b78d | 175 | * xmm1 c[x], then Mu |
MiniTLS | 0:35aa5be3b78d | 176 | * xmm2 mp |
MiniTLS | 0:35aa5be3b78d | 177 | * xmm3 cy |
MiniTLS | 0:35aa5be3b78d | 178 | * xmm4 _c[LO] |
MiniTLS | 0:35aa5be3b78d | 179 | */ |
MiniTLS | 0:35aa5be3b78d | 180 | |
MiniTLS | 0:35aa5be3b78d | 181 | #define MONT_START \ |
MiniTLS | 0:35aa5be3b78d | 182 | asm("movd %0,%%mm2"::"g"(mp)) |
MiniTLS | 0:35aa5be3b78d | 183 | |
MiniTLS | 0:35aa5be3b78d | 184 | #define MONT_FINI \ |
MiniTLS | 0:35aa5be3b78d | 185 | asm("emms") |
MiniTLS | 0:35aa5be3b78d | 186 | |
MiniTLS | 0:35aa5be3b78d | 187 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 188 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 189 | "movd %0,%%mm1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 190 | "pxor %%mm3,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 191 | "pmuludq %%mm2,%%mm1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 192 | :: "g"(c[x])) |
MiniTLS | 0:35aa5be3b78d | 193 | |
MiniTLS | 0:35aa5be3b78d | 194 | /* pmuludq on mmx registers does a 32x32->64 multiply. */ |
MiniTLS | 0:35aa5be3b78d | 195 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 196 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 197 | "movd %1,%%mm4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 198 | "movd %2,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 199 | "paddq %%mm4,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 200 | "pmuludq %%mm1,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 201 | "paddq %%mm0,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 202 | "movd %%mm3,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 203 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 204 | :"=g"(_c[LO]) : "0"(_c[LO]), "g"(*tmpm++) ); |
MiniTLS | 0:35aa5be3b78d | 205 | |
MiniTLS | 0:35aa5be3b78d | 206 | #define INNERMUL8 \ |
MiniTLS | 0:35aa5be3b78d | 207 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 208 | "movd 0(%1),%%mm4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 209 | "movd 0(%2),%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 210 | "paddq %%mm4,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 211 | "pmuludq %%mm1,%%mm0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 212 | "movd 4(%2),%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 213 | "paddq %%mm0,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 214 | "movd 4(%1),%%mm6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 215 | "movd %%mm3,0(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 216 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 217 | \ |
MiniTLS | 0:35aa5be3b78d | 218 | "paddq %%mm6,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 219 | "pmuludq %%mm1,%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 220 | "movd 8(%2),%%mm6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 221 | "paddq %%mm5,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 222 | "movd 8(%1),%%mm7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 223 | "movd %%mm3,4(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 224 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 225 | \ |
MiniTLS | 0:35aa5be3b78d | 226 | "paddq %%mm7,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 227 | "pmuludq %%mm1,%%mm6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 228 | "movd 12(%2),%%mm7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 229 | "paddq %%mm6,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 230 | "movd 12(%1),%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 231 | "movd %%mm3,8(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 232 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 233 | \ |
MiniTLS | 0:35aa5be3b78d | 234 | "paddq %%mm5,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 235 | "pmuludq %%mm1,%%mm7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 236 | "movd 16(%2),%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 237 | "paddq %%mm7,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 238 | "movd 16(%1),%%mm6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 239 | "movd %%mm3,12(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 240 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 241 | \ |
MiniTLS | 0:35aa5be3b78d | 242 | "paddq %%mm6,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 243 | "pmuludq %%mm1,%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 244 | "movd 20(%2),%%mm6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 245 | "paddq %%mm5,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 246 | "movd 20(%1),%%mm7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 247 | "movd %%mm3,16(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 248 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 249 | \ |
MiniTLS | 0:35aa5be3b78d | 250 | "paddq %%mm7,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 251 | "pmuludq %%mm1,%%mm6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 252 | "movd 24(%2),%%mm7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 253 | "paddq %%mm6,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 254 | "movd 24(%1),%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 255 | "movd %%mm3,20(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 256 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 257 | \ |
MiniTLS | 0:35aa5be3b78d | 258 | "paddq %%mm5,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 259 | "pmuludq %%mm1,%%mm7 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 260 | "movd 28(%2),%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 261 | "paddq %%mm7,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 262 | "movd 28(%1),%%mm6 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 263 | "movd %%mm3,24(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 264 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 265 | \ |
MiniTLS | 0:35aa5be3b78d | 266 | "paddq %%mm6,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 267 | "pmuludq %%mm1,%%mm5 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 268 | "paddq %%mm5,%%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 269 | "movd %%mm3,28(%0) \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 270 | "psrlq $32, %%mm3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 271 | :"=r"(_c) : "0"(_c), "g"(tmpm) ); |
MiniTLS | 0:35aa5be3b78d | 272 | |
MiniTLS | 0:35aa5be3b78d | 273 | #define LOOP_END \ |
MiniTLS | 0:35aa5be3b78d | 274 | asm( "movd %%mm3,%0 \n" :"=r"(cy)) |
MiniTLS | 0:35aa5be3b78d | 275 | |
MiniTLS | 0:35aa5be3b78d | 276 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 277 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 278 | "addl %1,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 279 | "setb %%al \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 280 | "movzbl %%al,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 281 | :"=g"(_c[LO]), "=r"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 282 | :"0"(_c[LO]), "1"(cy) \ |
MiniTLS | 0:35aa5be3b78d | 283 | : "%eax", "%cc") |
MiniTLS | 0:35aa5be3b78d | 284 | |
MiniTLS | 0:35aa5be3b78d | 285 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 286 | #elif defined(TFM_ARM) |
MiniTLS | 0:35aa5be3b78d | 287 | /* ARMv4 code */ |
MiniTLS | 0:35aa5be3b78d | 288 | |
MiniTLS | 0:35aa5be3b78d | 289 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 290 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 291 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 292 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 293 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 294 | |
MiniTLS | 0:35aa5be3b78d | 295 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 296 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 297 | " LDR r0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 298 | " ADDS r0,r0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 299 | " MOVCS %0,#1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 300 | " MOVCC %0,#0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 301 | " UMLAL r0,%0,%3,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 302 | " STR r0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 303 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c[0]):"r0","%cc"); |
MiniTLS | 0:35aa5be3b78d | 304 | |
MiniTLS | 0:35aa5be3b78d | 305 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 306 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 307 | " LDR r0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 308 | " ADDS r0,r0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 309 | " STR r0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 310 | " MOVCS %0,#1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 311 | " MOVCC %0,#0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 312 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r0","%cc"); |
MiniTLS | 0:35aa5be3b78d | 313 | |
MiniTLS | 0:35aa5be3b78d | 314 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 315 | #elif defined(TFM_PPC32) |
MiniTLS | 0:35aa5be3b78d | 316 | |
MiniTLS | 0:35aa5be3b78d | 317 | /* PPC32 */ |
MiniTLS | 0:35aa5be3b78d | 318 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 319 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 320 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 321 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 322 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 323 | |
MiniTLS | 0:35aa5be3b78d | 324 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 325 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 326 | " mullw 16,%3,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 327 | " mulhwu 17,%3,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 328 | " addc 16,16,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 329 | " addze 17,17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 330 | " lwz 18,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 331 | " addc 16,16,18 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 332 | " addze %0,17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 333 | " stw 16,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 334 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"16", "17", "18","%cc"); ++tmpm; |
MiniTLS | 0:35aa5be3b78d | 335 | |
MiniTLS | 0:35aa5be3b78d | 336 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 337 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 338 | " lwz 16,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 339 | " addc 16,16,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 340 | " stw 16,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 341 | " xor %0,%0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 342 | " addze %0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 343 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"16","%cc"); |
MiniTLS | 0:35aa5be3b78d | 344 | |
MiniTLS | 0:35aa5be3b78d | 345 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 346 | #elif defined(TFM_PPC64) |
MiniTLS | 0:35aa5be3b78d | 347 | |
MiniTLS | 0:35aa5be3b78d | 348 | /* PPC64 */ |
MiniTLS | 0:35aa5be3b78d | 349 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 350 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 351 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 352 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 353 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 354 | |
MiniTLS | 0:35aa5be3b78d | 355 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 356 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 357 | " mulld r16,%3,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 358 | " mulhdu r17,%3,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 359 | " addc r16,16,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 360 | " addze r17,r17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 361 | " ldx r18,0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 362 | " addc r16,r16,r18 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 363 | " addze %0,r17 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 364 | " sdx r16,0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 365 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"r16", "r17", "r18","%cc"); ++tmpm; |
MiniTLS | 0:35aa5be3b78d | 366 | |
MiniTLS | 0:35aa5be3b78d | 367 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 368 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 369 | " ldx r16,0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 370 | " addc r16,r16,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 371 | " sdx r16,0,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 372 | " xor %0,%0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 373 | " addze %0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 374 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"r16","%cc"); |
MiniTLS | 0:35aa5be3b78d | 375 | |
MiniTLS | 0:35aa5be3b78d | 376 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 377 | #elif defined(TFM_AVR32) |
MiniTLS | 0:35aa5be3b78d | 378 | |
MiniTLS | 0:35aa5be3b78d | 379 | /* AVR32 */ |
MiniTLS | 0:35aa5be3b78d | 380 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 381 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 382 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 383 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 384 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 385 | |
MiniTLS | 0:35aa5be3b78d | 386 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 387 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 388 | " ld.w r2,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 389 | " add r2,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 390 | " eor r3,r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 391 | " acr r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 392 | " macu.d r2,%3,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 393 | " st.w %1,r2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 394 | " mov %0,r3 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 395 | :"=r"(cy),"=r"(_c):"0"(cy),"r"(mu),"r"(*tmpm++),"1"(_c):"r2","r3"); |
MiniTLS | 0:35aa5be3b78d | 396 | |
MiniTLS | 0:35aa5be3b78d | 397 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 398 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 399 | " ld.w r2,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 400 | " add r2,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 401 | " st.w %1,r2 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 402 | " eor %0,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 403 | " acr %0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 404 | :"=r"(cy),"=r"(&_c[0]):"0"(cy),"1"(&_c[0]):"r2","%cc"); |
MiniTLS | 0:35aa5be3b78d | 405 | |
MiniTLS | 0:35aa5be3b78d | 406 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 407 | #elif defined(TFM_MIPS) |
MiniTLS | 0:35aa5be3b78d | 408 | |
MiniTLS | 0:35aa5be3b78d | 409 | /* MIPS */ |
MiniTLS | 0:35aa5be3b78d | 410 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 411 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 412 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 413 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 414 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 415 | |
MiniTLS | 0:35aa5be3b78d | 416 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 417 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 418 | " multu %3,%4 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 419 | " mflo $12 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 420 | " mfhi $13 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 421 | " addu $12,$12,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 422 | " sltu $10,$12,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 423 | " addu $13,$13,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 424 | " lw $10,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 425 | " addu $12,$12,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 426 | " sltu $10,$12,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 427 | " addu %0,$13,$10 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 428 | " sw $12,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 429 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"r"(mu),"r"(tmpm[0]),"1"(_c[0]):"$10","$12","$13"); ++tmpm; |
MiniTLS | 0:35aa5be3b78d | 430 | |
MiniTLS | 0:35aa5be3b78d | 431 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 432 | asm( \ |
MiniTLS | 0:35aa5be3b78d | 433 | " lw $10,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 434 | " addu $10,$10,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 435 | " sw $10,%1 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 436 | " sltu %0,$10,%0 \n\t" \ |
MiniTLS | 0:35aa5be3b78d | 437 | :"=r"(cy),"=m"(_c[0]):"0"(cy),"1"(_c[0]):"$10"); |
MiniTLS | 0:35aa5be3b78d | 438 | |
MiniTLS | 0:35aa5be3b78d | 439 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 440 | #else |
MiniTLS | 0:35aa5be3b78d | 441 | |
MiniTLS | 0:35aa5be3b78d | 442 | /* ISO C code */ |
MiniTLS | 0:35aa5be3b78d | 443 | #define MONT_START |
MiniTLS | 0:35aa5be3b78d | 444 | #define MONT_FINI |
MiniTLS | 0:35aa5be3b78d | 445 | #define LOOP_END |
MiniTLS | 0:35aa5be3b78d | 446 | #define LOOP_START \ |
MiniTLS | 0:35aa5be3b78d | 447 | mu = c[x] * mp |
MiniTLS | 0:35aa5be3b78d | 448 | |
MiniTLS | 0:35aa5be3b78d | 449 | #define INNERMUL \ |
MiniTLS | 0:35aa5be3b78d | 450 | do { fp_word t; \ |
MiniTLS | 0:35aa5be3b78d | 451 | _c[0] = t = ((fp_word)_c[0] + (fp_word)cy) + \ |
MiniTLS | 0:35aa5be3b78d | 452 | (((fp_word)mu) * ((fp_word)*tmpm++)); \ |
MiniTLS | 0:35aa5be3b78d | 453 | cy = (t >> DIGIT_BIT); \ |
MiniTLS | 0:35aa5be3b78d | 454 | } while (0) |
MiniTLS | 0:35aa5be3b78d | 455 | |
MiniTLS | 0:35aa5be3b78d | 456 | #define PROPCARRY \ |
MiniTLS | 0:35aa5be3b78d | 457 | do { fp_digit t = _c[0] += cy; cy = (t < cy); } while (0) |
MiniTLS | 0:35aa5be3b78d | 458 | |
MiniTLS | 0:35aa5be3b78d | 459 | #endif |
MiniTLS | 0:35aa5be3b78d | 460 | /******************************************************************/ |
MiniTLS | 0:35aa5be3b78d | 461 | |
MiniTLS | 0:35aa5be3b78d | 462 | |
MiniTLS | 0:35aa5be3b78d | 463 | #define LO 0 |
MiniTLS | 0:35aa5be3b78d | 464 | |
MiniTLS | 0:35aa5be3b78d | 465 | #ifdef TFM_SMALL_MONT_SET |
MiniTLS | 0:35aa5be3b78d | 466 | #include "fp_mont_small.i" |
MiniTLS | 0:35aa5be3b78d | 467 | #endif |
MiniTLS | 0:35aa5be3b78d | 468 | |
MiniTLS | 0:35aa5be3b78d | 469 | /* computes x/R == x (mod N) via Montgomery Reduction */ |
MiniTLS | 0:35aa5be3b78d | 470 | void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) |
MiniTLS | 0:35aa5be3b78d | 471 | { |
MiniTLS | 0:35aa5be3b78d | 472 | fp_digit c[FP_SIZE], *_c, *tmpm, mu; |
MiniTLS | 0:35aa5be3b78d | 473 | int oldused, x, y, pa; |
MiniTLS | 0:35aa5be3b78d | 474 | |
MiniTLS | 0:35aa5be3b78d | 475 | /* bail if too large */ |
MiniTLS | 0:35aa5be3b78d | 476 | if (m->used > (FP_SIZE/2)) { |
MiniTLS | 0:35aa5be3b78d | 477 | return; |
MiniTLS | 0:35aa5be3b78d | 478 | } |
MiniTLS | 0:35aa5be3b78d | 479 | |
MiniTLS | 0:35aa5be3b78d | 480 | #ifdef TFM_SMALL_MONT_SET |
MiniTLS | 0:35aa5be3b78d | 481 | if (m->used <= 16) { |
MiniTLS | 0:35aa5be3b78d | 482 | fp_montgomery_reduce_small(a, m, mp); |
MiniTLS | 0:35aa5be3b78d | 483 | return; |
MiniTLS | 0:35aa5be3b78d | 484 | } |
MiniTLS | 0:35aa5be3b78d | 485 | #endif |
MiniTLS | 0:35aa5be3b78d | 486 | |
MiniTLS | 0:35aa5be3b78d | 487 | #if defined(USE_MEMSET) |
MiniTLS | 0:35aa5be3b78d | 488 | /* now zero the buff */ |
MiniTLS | 0:35aa5be3b78d | 489 | memset(c, 0, sizeof c); |
MiniTLS | 0:35aa5be3b78d | 490 | #endif |
MiniTLS | 0:35aa5be3b78d | 491 | pa = m->used; |
MiniTLS | 0:35aa5be3b78d | 492 | |
MiniTLS | 0:35aa5be3b78d | 493 | /* copy the input */ |
MiniTLS | 0:35aa5be3b78d | 494 | oldused = a->used; |
MiniTLS | 0:35aa5be3b78d | 495 | for (x = 0; x < oldused; x++) { |
MiniTLS | 0:35aa5be3b78d | 496 | c[x] = a->dp[x]; |
MiniTLS | 0:35aa5be3b78d | 497 | } |
MiniTLS | 0:35aa5be3b78d | 498 | #if !defined(USE_MEMSET) |
MiniTLS | 0:35aa5be3b78d | 499 | for (; x < 2*pa+1; x++) { |
MiniTLS | 0:35aa5be3b78d | 500 | c[x] = 0; |
MiniTLS | 0:35aa5be3b78d | 501 | } |
MiniTLS | 0:35aa5be3b78d | 502 | #endif |
MiniTLS | 0:35aa5be3b78d | 503 | MONT_START; |
MiniTLS | 0:35aa5be3b78d | 504 | |
MiniTLS | 0:35aa5be3b78d | 505 | for (x = 0; x < pa; x++) { |
MiniTLS | 0:35aa5be3b78d | 506 | fp_digit cy = 0; |
MiniTLS | 0:35aa5be3b78d | 507 | /* get Mu for this round */ |
MiniTLS | 0:35aa5be3b78d | 508 | LOOP_START; |
MiniTLS | 0:35aa5be3b78d | 509 | _c = c + x; |
MiniTLS | 0:35aa5be3b78d | 510 | tmpm = m->dp; |
MiniTLS | 0:35aa5be3b78d | 511 | y = 0; |
MiniTLS | 0:35aa5be3b78d | 512 | #if (defined(TFM_SSE2) || defined(TFM_X86_64)) |
MiniTLS | 0:35aa5be3b78d | 513 | for (; y < (pa & ~7); y += 8) { |
MiniTLS | 0:35aa5be3b78d | 514 | INNERMUL8; |
MiniTLS | 0:35aa5be3b78d | 515 | _c += 8; |
MiniTLS | 0:35aa5be3b78d | 516 | tmpm += 8; |
MiniTLS | 0:35aa5be3b78d | 517 | } |
MiniTLS | 0:35aa5be3b78d | 518 | #endif |
MiniTLS | 0:35aa5be3b78d | 519 | |
MiniTLS | 0:35aa5be3b78d | 520 | for (; y < pa; y++) { |
MiniTLS | 0:35aa5be3b78d | 521 | INNERMUL; |
MiniTLS | 0:35aa5be3b78d | 522 | ++_c; |
MiniTLS | 0:35aa5be3b78d | 523 | } |
MiniTLS | 0:35aa5be3b78d | 524 | LOOP_END; |
MiniTLS | 0:35aa5be3b78d | 525 | while (cy) { |
MiniTLS | 0:35aa5be3b78d | 526 | PROPCARRY; |
MiniTLS | 0:35aa5be3b78d | 527 | ++_c; |
MiniTLS | 0:35aa5be3b78d | 528 | } |
MiniTLS | 0:35aa5be3b78d | 529 | } |
MiniTLS | 0:35aa5be3b78d | 530 | |
MiniTLS | 0:35aa5be3b78d | 531 | /* now copy out */ |
MiniTLS | 0:35aa5be3b78d | 532 | _c = c + pa; |
MiniTLS | 0:35aa5be3b78d | 533 | tmpm = a->dp; |
MiniTLS | 0:35aa5be3b78d | 534 | for (x = 0; x < pa+1; x++) { |
MiniTLS | 0:35aa5be3b78d | 535 | *tmpm++ = *_c++; |
MiniTLS | 0:35aa5be3b78d | 536 | } |
MiniTLS | 0:35aa5be3b78d | 537 | |
MiniTLS | 0:35aa5be3b78d | 538 | for (; x < oldused; x++) { |
MiniTLS | 0:35aa5be3b78d | 539 | *tmpm++ = 0; |
MiniTLS | 0:35aa5be3b78d | 540 | } |
MiniTLS | 0:35aa5be3b78d | 541 | |
MiniTLS | 0:35aa5be3b78d | 542 | MONT_FINI; |
MiniTLS | 0:35aa5be3b78d | 543 | |
MiniTLS | 0:35aa5be3b78d | 544 | a->used = pa+1; |
MiniTLS | 0:35aa5be3b78d | 545 | fp_clamp(a); |
MiniTLS | 0:35aa5be3b78d | 546 | |
MiniTLS | 0:35aa5be3b78d | 547 | /* if A >= m then A = A - m */ |
MiniTLS | 0:35aa5be3b78d | 548 | if (fp_cmp_mag (a, m) != FP_LT) { |
MiniTLS | 0:35aa5be3b78d | 549 | s_fp_sub (a, m, a); |
MiniTLS | 0:35aa5be3b78d | 550 | } |
MiniTLS | 0:35aa5be3b78d | 551 | } |
MiniTLS | 0:35aa5be3b78d | 552 | |
MiniTLS | 0:35aa5be3b78d | 553 | |
MiniTLS | 0:35aa5be3b78d | 554 | /* $Source: /cvs/libtom/tomsfastmath/src/mont/fp_montgomery_reduce.c,v $ */ |
MiniTLS | 0:35aa5be3b78d | 555 | /* $Revision: 1.2 $ */ |
MiniTLS | 0:35aa5be3b78d | 556 | /* $Date: 2007/03/14 23:47:42 $ */ |