wolfSSL SSL/TLS library, support up to TLS1.3

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Committer:
wolfSSL
Date:
Tue May 30 01:44:10 2017 +0000
Revision:
11:cee25a834751
wolfSSL 3.11.0

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 11:cee25a834751 1 /* fe_operations.c
wolfSSL 11:cee25a834751 2 *
wolfSSL 11:cee25a834751 3 * Copyright (C) 2006-2016 wolfSSL Inc.
wolfSSL 11:cee25a834751 4 *
wolfSSL 11:cee25a834751 5 * This file is part of wolfSSL.
wolfSSL 11:cee25a834751 6 *
wolfSSL 11:cee25a834751 7 * wolfSSL is free software; you can redistribute it and/or modify
wolfSSL 11:cee25a834751 8 * it under the terms of the GNU General Public License as published by
wolfSSL 11:cee25a834751 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 11:cee25a834751 10 * (at your option) any later version.
wolfSSL 11:cee25a834751 11 *
wolfSSL 11:cee25a834751 12 * wolfSSL is distributed in the hope that it will be useful,
wolfSSL 11:cee25a834751 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 11:cee25a834751 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 11:cee25a834751 15 * GNU General Public License for more details.
wolfSSL 11:cee25a834751 16 *
wolfSSL 11:cee25a834751 17 * You should have received a copy of the GNU General Public License
wolfSSL 11:cee25a834751 18 * along with this program; if not, write to the Free Software
wolfSSL 11:cee25a834751 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
wolfSSL 11:cee25a834751 20 */
wolfSSL 11:cee25a834751 21
wolfSSL 11:cee25a834751 22
wolfSSL 11:cee25a834751 23 /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */
wolfSSL 11:cee25a834751 24
wolfSSL 11:cee25a834751 25 #ifdef HAVE_CONFIG_H
wolfSSL 11:cee25a834751 26 #include <config.h>
wolfSSL 11:cee25a834751 27 #endif
wolfSSL 11:cee25a834751 28
wolfSSL 11:cee25a834751 29 #include <wolfssl/wolfcrypt/settings.h>
wolfSSL 11:cee25a834751 30
wolfSSL 11:cee25a834751 31 #ifndef CURVED25519_SMALL /* run when not defined to use small memory math */
wolfSSL 11:cee25a834751 32 #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519)
wolfSSL 11:cee25a834751 33
wolfSSL 11:cee25a834751 34 #include <wolfssl/wolfcrypt/fe_operations.h>
wolfSSL 11:cee25a834751 35 #include <stdint.h>
wolfSSL 11:cee25a834751 36
wolfSSL 11:cee25a834751 37 #ifdef NO_INLINE
wolfSSL 11:cee25a834751 38 #include <wolfssl/wolfcrypt/misc.h>
wolfSSL 11:cee25a834751 39 #else
wolfSSL 11:cee25a834751 40 #define WOLFSSL_MISC_INCLUDED
wolfSSL 11:cee25a834751 41 #include <wolfcrypt/src/misc.c>
wolfSSL 11:cee25a834751 42 #endif
wolfSSL 11:cee25a834751 43
wolfSSL 11:cee25a834751 44 #ifdef HAVE___UINT128_T
wolfSSL 11:cee25a834751 45 #include "fe_x25519_128.i"
wolfSSL 11:cee25a834751 46 #else
wolfSSL 11:cee25a834751 47 /*
wolfSSL 11:cee25a834751 48 fe means field element.
wolfSSL 11:cee25a834751 49 Here the field is \Z/(2^255-19).
wolfSSL 11:cee25a834751 50 An element t, entries t[0]...t[9], represents the integer
wolfSSL 11:cee25a834751 51 t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
wolfSSL 11:cee25a834751 52 Bounds on each t[i] vary depending on context.
wolfSSL 11:cee25a834751 53 */
wolfSSL 11:cee25a834751 54
wolfSSL 11:cee25a834751 55 uint64_t load_3(const unsigned char *in)
wolfSSL 11:cee25a834751 56 {
wolfSSL 11:cee25a834751 57 uint64_t result;
wolfSSL 11:cee25a834751 58 result = (uint64_t) in[0];
wolfSSL 11:cee25a834751 59 result |= ((uint64_t) in[1]) << 8;
wolfSSL 11:cee25a834751 60 result |= ((uint64_t) in[2]) << 16;
wolfSSL 11:cee25a834751 61 return result;
wolfSSL 11:cee25a834751 62 }
wolfSSL 11:cee25a834751 63
wolfSSL 11:cee25a834751 64
wolfSSL 11:cee25a834751 65 uint64_t load_4(const unsigned char *in)
wolfSSL 11:cee25a834751 66 {
wolfSSL 11:cee25a834751 67 uint64_t result;
wolfSSL 11:cee25a834751 68 result = (uint64_t) in[0];
wolfSSL 11:cee25a834751 69 result |= ((uint64_t) in[1]) << 8;
wolfSSL 11:cee25a834751 70 result |= ((uint64_t) in[2]) << 16;
wolfSSL 11:cee25a834751 71 result |= ((uint64_t) in[3]) << 24;
wolfSSL 11:cee25a834751 72 return result;
wolfSSL 11:cee25a834751 73 }
wolfSSL 11:cee25a834751 74
wolfSSL 11:cee25a834751 75
wolfSSL 11:cee25a834751 76 /*
wolfSSL 11:cee25a834751 77 h = 1
wolfSSL 11:cee25a834751 78 */
wolfSSL 11:cee25a834751 79
wolfSSL 11:cee25a834751 80 void fe_1(fe h)
wolfSSL 11:cee25a834751 81 {
wolfSSL 11:cee25a834751 82 h[0] = 1;
wolfSSL 11:cee25a834751 83 h[1] = 0;
wolfSSL 11:cee25a834751 84 h[2] = 0;
wolfSSL 11:cee25a834751 85 h[3] = 0;
wolfSSL 11:cee25a834751 86 h[4] = 0;
wolfSSL 11:cee25a834751 87 h[5] = 0;
wolfSSL 11:cee25a834751 88 h[6] = 0;
wolfSSL 11:cee25a834751 89 h[7] = 0;
wolfSSL 11:cee25a834751 90 h[8] = 0;
wolfSSL 11:cee25a834751 91 h[9] = 0;
wolfSSL 11:cee25a834751 92 }
wolfSSL 11:cee25a834751 93
wolfSSL 11:cee25a834751 94
wolfSSL 11:cee25a834751 95 /*
wolfSSL 11:cee25a834751 96 h = 0
wolfSSL 11:cee25a834751 97 */
wolfSSL 11:cee25a834751 98
wolfSSL 11:cee25a834751 99 void fe_0(fe h)
wolfSSL 11:cee25a834751 100 {
wolfSSL 11:cee25a834751 101 h[0] = 0;
wolfSSL 11:cee25a834751 102 h[1] = 0;
wolfSSL 11:cee25a834751 103 h[2] = 0;
wolfSSL 11:cee25a834751 104 h[3] = 0;
wolfSSL 11:cee25a834751 105 h[4] = 0;
wolfSSL 11:cee25a834751 106 h[5] = 0;
wolfSSL 11:cee25a834751 107 h[6] = 0;
wolfSSL 11:cee25a834751 108 h[7] = 0;
wolfSSL 11:cee25a834751 109 h[8] = 0;
wolfSSL 11:cee25a834751 110 h[9] = 0;
wolfSSL 11:cee25a834751 111 }
wolfSSL 11:cee25a834751 112
wolfSSL 11:cee25a834751 113 #ifndef FREESCALE_LTC_ECC
wolfSSL 11:cee25a834751 114 int curve25519(byte* q, byte* n, byte* p)
wolfSSL 11:cee25a834751 115 {
wolfSSL 11:cee25a834751 116 #if 0
wolfSSL 11:cee25a834751 117 unsigned char e[32];
wolfSSL 11:cee25a834751 118 #endif
wolfSSL 11:cee25a834751 119 fe x1;
wolfSSL 11:cee25a834751 120 fe x2;
wolfSSL 11:cee25a834751 121 fe z2;
wolfSSL 11:cee25a834751 122 fe x3;
wolfSSL 11:cee25a834751 123 fe z3;
wolfSSL 11:cee25a834751 124 fe tmp0;
wolfSSL 11:cee25a834751 125 fe tmp1;
wolfSSL 11:cee25a834751 126 int pos;
wolfSSL 11:cee25a834751 127 unsigned int swap;
wolfSSL 11:cee25a834751 128 unsigned int b;
wolfSSL 11:cee25a834751 129
wolfSSL 11:cee25a834751 130 /* Clamp already done during key generation and import */
wolfSSL 11:cee25a834751 131 #if 0
wolfSSL 11:cee25a834751 132 {
wolfSSL 11:cee25a834751 133 unsigned int i;
wolfSSL 11:cee25a834751 134 for (i = 0;i < 32;++i) e[i] = n[i];
wolfSSL 11:cee25a834751 135 e[0] &= 248;
wolfSSL 11:cee25a834751 136 e[31] &= 127;
wolfSSL 11:cee25a834751 137 e[31] |= 64;
wolfSSL 11:cee25a834751 138 }
wolfSSL 11:cee25a834751 139 #endif
wolfSSL 11:cee25a834751 140
wolfSSL 11:cee25a834751 141 fe_frombytes(x1,p);
wolfSSL 11:cee25a834751 142 fe_1(x2);
wolfSSL 11:cee25a834751 143 fe_0(z2);
wolfSSL 11:cee25a834751 144 fe_copy(x3,x1);
wolfSSL 11:cee25a834751 145 fe_1(z3);
wolfSSL 11:cee25a834751 146
wolfSSL 11:cee25a834751 147 swap = 0;
wolfSSL 11:cee25a834751 148 for (pos = 254;pos >= 0;--pos) {
wolfSSL 11:cee25a834751 149 #if 0
wolfSSL 11:cee25a834751 150 b = e[pos / 8] >> (pos & 7);
wolfSSL 11:cee25a834751 151 #else
wolfSSL 11:cee25a834751 152 b = n[pos / 8] >> (pos & 7);
wolfSSL 11:cee25a834751 153 #endif
wolfSSL 11:cee25a834751 154 b &= 1;
wolfSSL 11:cee25a834751 155 swap ^= b;
wolfSSL 11:cee25a834751 156 fe_cswap(x2,x3,swap);
wolfSSL 11:cee25a834751 157 fe_cswap(z2,z3,swap);
wolfSSL 11:cee25a834751 158 swap = b;
wolfSSL 11:cee25a834751 159
wolfSSL 11:cee25a834751 160 /* montgomery */
wolfSSL 11:cee25a834751 161 fe_sub(tmp0,x3,z3);
wolfSSL 11:cee25a834751 162 fe_sub(tmp1,x2,z2);
wolfSSL 11:cee25a834751 163 fe_add(x2,x2,z2);
wolfSSL 11:cee25a834751 164 fe_add(z2,x3,z3);
wolfSSL 11:cee25a834751 165 fe_mul(z3,tmp0,x2);
wolfSSL 11:cee25a834751 166 fe_mul(z2,z2,tmp1);
wolfSSL 11:cee25a834751 167 fe_sq(tmp0,tmp1);
wolfSSL 11:cee25a834751 168 fe_sq(tmp1,x2);
wolfSSL 11:cee25a834751 169 fe_add(x3,z3,z2);
wolfSSL 11:cee25a834751 170 fe_sub(z2,z3,z2);
wolfSSL 11:cee25a834751 171 fe_mul(x2,tmp1,tmp0);
wolfSSL 11:cee25a834751 172 fe_sub(tmp1,tmp1,tmp0);
wolfSSL 11:cee25a834751 173 fe_sq(z2,z2);
wolfSSL 11:cee25a834751 174 fe_mul121666(z3,tmp1);
wolfSSL 11:cee25a834751 175 fe_sq(x3,x3);
wolfSSL 11:cee25a834751 176 fe_add(tmp0,tmp0,z3);
wolfSSL 11:cee25a834751 177 fe_mul(z3,x1,z2);
wolfSSL 11:cee25a834751 178 fe_mul(z2,tmp1,tmp0);
wolfSSL 11:cee25a834751 179 }
wolfSSL 11:cee25a834751 180 fe_cswap(x2,x3,swap);
wolfSSL 11:cee25a834751 181 fe_cswap(z2,z3,swap);
wolfSSL 11:cee25a834751 182
wolfSSL 11:cee25a834751 183 fe_invert(z2,z2);
wolfSSL 11:cee25a834751 184 fe_mul(x2,x2,z2);
wolfSSL 11:cee25a834751 185 fe_tobytes(q,x2);
wolfSSL 11:cee25a834751 186
wolfSSL 11:cee25a834751 187 return 0;
wolfSSL 11:cee25a834751 188 }
wolfSSL 11:cee25a834751 189 #endif /* !FREESCALE_LTC_ECC */
wolfSSL 11:cee25a834751 190
wolfSSL 11:cee25a834751 191 /*
wolfSSL 11:cee25a834751 192 h = f * f
wolfSSL 11:cee25a834751 193 Can overlap h with f.
wolfSSL 11:cee25a834751 194
wolfSSL 11:cee25a834751 195 Preconditions:
wolfSSL 11:cee25a834751 196 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
wolfSSL 11:cee25a834751 197
wolfSSL 11:cee25a834751 198 Postconditions:
wolfSSL 11:cee25a834751 199 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
wolfSSL 11:cee25a834751 200 */
wolfSSL 11:cee25a834751 201
wolfSSL 11:cee25a834751 202 /*
wolfSSL 11:cee25a834751 203 See fe_mul.c for discussion of implementation strategy.
wolfSSL 11:cee25a834751 204 */
wolfSSL 11:cee25a834751 205
wolfSSL 11:cee25a834751 206 void fe_sq(fe h,const fe f)
wolfSSL 11:cee25a834751 207 {
wolfSSL 11:cee25a834751 208 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 209 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 210 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 211 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 212 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 213 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 214 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 215 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 216 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 217 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 218 int32_t f0_2 = 2 * f0;
wolfSSL 11:cee25a834751 219 int32_t f1_2 = 2 * f1;
wolfSSL 11:cee25a834751 220 int32_t f2_2 = 2 * f2;
wolfSSL 11:cee25a834751 221 int32_t f3_2 = 2 * f3;
wolfSSL 11:cee25a834751 222 int32_t f4_2 = 2 * f4;
wolfSSL 11:cee25a834751 223 int32_t f5_2 = 2 * f5;
wolfSSL 11:cee25a834751 224 int32_t f6_2 = 2 * f6;
wolfSSL 11:cee25a834751 225 int32_t f7_2 = 2 * f7;
wolfSSL 11:cee25a834751 226 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 227 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 228 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 229 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 230 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 231 int64_t f0f0 = f0 * (int64_t) f0;
wolfSSL 11:cee25a834751 232 int64_t f0f1_2 = f0_2 * (int64_t) f1;
wolfSSL 11:cee25a834751 233 int64_t f0f2_2 = f0_2 * (int64_t) f2;
wolfSSL 11:cee25a834751 234 int64_t f0f3_2 = f0_2 * (int64_t) f3;
wolfSSL 11:cee25a834751 235 int64_t f0f4_2 = f0_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 236 int64_t f0f5_2 = f0_2 * (int64_t) f5;
wolfSSL 11:cee25a834751 237 int64_t f0f6_2 = f0_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 238 int64_t f0f7_2 = f0_2 * (int64_t) f7;
wolfSSL 11:cee25a834751 239 int64_t f0f8_2 = f0_2 * (int64_t) f8;
wolfSSL 11:cee25a834751 240 int64_t f0f9_2 = f0_2 * (int64_t) f9;
wolfSSL 11:cee25a834751 241 int64_t f1f1_2 = f1_2 * (int64_t) f1;
wolfSSL 11:cee25a834751 242 int64_t f1f2_2 = f1_2 * (int64_t) f2;
wolfSSL 11:cee25a834751 243 int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
wolfSSL 11:cee25a834751 244 int64_t f1f4_2 = f1_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 245 int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
wolfSSL 11:cee25a834751 246 int64_t f1f6_2 = f1_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 247 int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
wolfSSL 11:cee25a834751 248 int64_t f1f8_2 = f1_2 * (int64_t) f8;
wolfSSL 11:cee25a834751 249 int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 250 int64_t f2f2 = f2 * (int64_t) f2;
wolfSSL 11:cee25a834751 251 int64_t f2f3_2 = f2_2 * (int64_t) f3;
wolfSSL 11:cee25a834751 252 int64_t f2f4_2 = f2_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 253 int64_t f2f5_2 = f2_2 * (int64_t) f5;
wolfSSL 11:cee25a834751 254 int64_t f2f6_2 = f2_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 255 int64_t f2f7_2 = f2_2 * (int64_t) f7;
wolfSSL 11:cee25a834751 256 int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 257 int64_t f2f9_38 = f2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 258 int64_t f3f3_2 = f3_2 * (int64_t) f3;
wolfSSL 11:cee25a834751 259 int64_t f3f4_2 = f3_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 260 int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
wolfSSL 11:cee25a834751 261 int64_t f3f6_2 = f3_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 262 int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 263 int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 264 int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 265 int64_t f4f4 = f4 * (int64_t) f4;
wolfSSL 11:cee25a834751 266 int64_t f4f5_2 = f4_2 * (int64_t) f5;
wolfSSL 11:cee25a834751 267 int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
wolfSSL 11:cee25a834751 268 int64_t f4f7_38 = f4 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 269 int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 270 int64_t f4f9_38 = f4 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 271 int64_t f5f5_38 = f5 * (int64_t) f5_38;
wolfSSL 11:cee25a834751 272 int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
wolfSSL 11:cee25a834751 273 int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 274 int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 275 int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 276 int64_t f6f6_19 = f6 * (int64_t) f6_19;
wolfSSL 11:cee25a834751 277 int64_t f6f7_38 = f6 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 278 int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 279 int64_t f6f9_38 = f6 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 280 int64_t f7f7_38 = f7 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 281 int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 282 int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 283 int64_t f8f8_19 = f8 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 284 int64_t f8f9_38 = f8 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 285 int64_t f9f9_38 = f9 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 286 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
wolfSSL 11:cee25a834751 287 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
wolfSSL 11:cee25a834751 288 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
wolfSSL 11:cee25a834751 289 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
wolfSSL 11:cee25a834751 290 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
wolfSSL 11:cee25a834751 291 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
wolfSSL 11:cee25a834751 292 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
wolfSSL 11:cee25a834751 293 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
wolfSSL 11:cee25a834751 294 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
wolfSSL 11:cee25a834751 295 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
wolfSSL 11:cee25a834751 296 int64_t carry0;
wolfSSL 11:cee25a834751 297 int64_t carry1;
wolfSSL 11:cee25a834751 298 int64_t carry2;
wolfSSL 11:cee25a834751 299 int64_t carry3;
wolfSSL 11:cee25a834751 300 int64_t carry4;
wolfSSL 11:cee25a834751 301 int64_t carry5;
wolfSSL 11:cee25a834751 302 int64_t carry6;
wolfSSL 11:cee25a834751 303 int64_t carry7;
wolfSSL 11:cee25a834751 304 int64_t carry8;
wolfSSL 11:cee25a834751 305 int64_t carry9;
wolfSSL 11:cee25a834751 306
wolfSSL 11:cee25a834751 307 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 308 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 309
wolfSSL 11:cee25a834751 310 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
wolfSSL 11:cee25a834751 311 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
wolfSSL 11:cee25a834751 312
wolfSSL 11:cee25a834751 313 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
wolfSSL 11:cee25a834751 314 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
wolfSSL 11:cee25a834751 315
wolfSSL 11:cee25a834751 316 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
wolfSSL 11:cee25a834751 317 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
wolfSSL 11:cee25a834751 318
wolfSSL 11:cee25a834751 319 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 320 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
wolfSSL 11:cee25a834751 321
wolfSSL 11:cee25a834751 322 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
wolfSSL 11:cee25a834751 323
wolfSSL 11:cee25a834751 324 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 325
wolfSSL 11:cee25a834751 326 h[0] = (int32_t)h0;
wolfSSL 11:cee25a834751 327 h[1] = (int32_t)h1;
wolfSSL 11:cee25a834751 328 h[2] = (int32_t)h2;
wolfSSL 11:cee25a834751 329 h[3] = (int32_t)h3;
wolfSSL 11:cee25a834751 330 h[4] = (int32_t)h4;
wolfSSL 11:cee25a834751 331 h[5] = (int32_t)h5;
wolfSSL 11:cee25a834751 332 h[6] = (int32_t)h6;
wolfSSL 11:cee25a834751 333 h[7] = (int32_t)h7;
wolfSSL 11:cee25a834751 334 h[8] = (int32_t)h8;
wolfSSL 11:cee25a834751 335 h[9] = (int32_t)h9;
wolfSSL 11:cee25a834751 336 }
wolfSSL 11:cee25a834751 337
wolfSSL 11:cee25a834751 338
wolfSSL 11:cee25a834751 339 /*
wolfSSL 11:cee25a834751 340 h = f + g
wolfSSL 11:cee25a834751 341 Can overlap h with f or g.
wolfSSL 11:cee25a834751 342
wolfSSL 11:cee25a834751 343 Preconditions:
wolfSSL 11:cee25a834751 344 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
wolfSSL 11:cee25a834751 345 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
wolfSSL 11:cee25a834751 346
wolfSSL 11:cee25a834751 347 Postconditions:
wolfSSL 11:cee25a834751 348 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
wolfSSL 11:cee25a834751 349 */
wolfSSL 11:cee25a834751 350
wolfSSL 11:cee25a834751 351 void fe_add(fe h,const fe f,const fe g)
wolfSSL 11:cee25a834751 352 {
wolfSSL 11:cee25a834751 353 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 354 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 355 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 356 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 357 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 358 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 359 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 360 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 361 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 362 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 363 int32_t g0 = g[0];
wolfSSL 11:cee25a834751 364 int32_t g1 = g[1];
wolfSSL 11:cee25a834751 365 int32_t g2 = g[2];
wolfSSL 11:cee25a834751 366 int32_t g3 = g[3];
wolfSSL 11:cee25a834751 367 int32_t g4 = g[4];
wolfSSL 11:cee25a834751 368 int32_t g5 = g[5];
wolfSSL 11:cee25a834751 369 int32_t g6 = g[6];
wolfSSL 11:cee25a834751 370 int32_t g7 = g[7];
wolfSSL 11:cee25a834751 371 int32_t g8 = g[8];
wolfSSL 11:cee25a834751 372 int32_t g9 = g[9];
wolfSSL 11:cee25a834751 373 int32_t h0 = f0 + g0;
wolfSSL 11:cee25a834751 374 int32_t h1 = f1 + g1;
wolfSSL 11:cee25a834751 375 int32_t h2 = f2 + g2;
wolfSSL 11:cee25a834751 376 int32_t h3 = f3 + g3;
wolfSSL 11:cee25a834751 377 int32_t h4 = f4 + g4;
wolfSSL 11:cee25a834751 378 int32_t h5 = f5 + g5;
wolfSSL 11:cee25a834751 379 int32_t h6 = f6 + g6;
wolfSSL 11:cee25a834751 380 int32_t h7 = f7 + g7;
wolfSSL 11:cee25a834751 381 int32_t h8 = f8 + g8;
wolfSSL 11:cee25a834751 382 int32_t h9 = f9 + g9;
wolfSSL 11:cee25a834751 383 h[0] = h0;
wolfSSL 11:cee25a834751 384 h[1] = h1;
wolfSSL 11:cee25a834751 385 h[2] = h2;
wolfSSL 11:cee25a834751 386 h[3] = h3;
wolfSSL 11:cee25a834751 387 h[4] = h4;
wolfSSL 11:cee25a834751 388 h[5] = h5;
wolfSSL 11:cee25a834751 389 h[6] = h6;
wolfSSL 11:cee25a834751 390 h[7] = h7;
wolfSSL 11:cee25a834751 391 h[8] = h8;
wolfSSL 11:cee25a834751 392 h[9] = h9;
wolfSSL 11:cee25a834751 393 }
wolfSSL 11:cee25a834751 394
wolfSSL 11:cee25a834751 395
wolfSSL 11:cee25a834751 396 /*
wolfSSL 11:cee25a834751 397 Preconditions:
wolfSSL 11:cee25a834751 398 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
wolfSSL 11:cee25a834751 399
wolfSSL 11:cee25a834751 400 Write p=2^255-19; q=floor(h/p).
wolfSSL 11:cee25a834751 401 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
wolfSSL 11:cee25a834751 402
wolfSSL 11:cee25a834751 403 Proof:
wolfSSL 11:cee25a834751 404 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
wolfSSL 11:cee25a834751 405 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
wolfSSL 11:cee25a834751 406
wolfSSL 11:cee25a834751 407 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
wolfSSL 11:cee25a834751 408 Then 0<y<1.
wolfSSL 11:cee25a834751 409
wolfSSL 11:cee25a834751 410 Write r=h-pq.
wolfSSL 11:cee25a834751 411 Have 0<=r<=p-1=2^255-20.
wolfSSL 11:cee25a834751 412 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
wolfSSL 11:cee25a834751 413
wolfSSL 11:cee25a834751 414 Write x=r+19(2^-255)r+y.
wolfSSL 11:cee25a834751 415 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
wolfSSL 11:cee25a834751 416
wolfSSL 11:cee25a834751 417 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
wolfSSL 11:cee25a834751 418 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
wolfSSL 11:cee25a834751 419 */
wolfSSL 11:cee25a834751 420
wolfSSL 11:cee25a834751 421 void fe_tobytes(unsigned char *s,const fe h)
wolfSSL 11:cee25a834751 422 {
wolfSSL 11:cee25a834751 423 int32_t h0 = h[0];
wolfSSL 11:cee25a834751 424 int32_t h1 = h[1];
wolfSSL 11:cee25a834751 425 int32_t h2 = h[2];
wolfSSL 11:cee25a834751 426 int32_t h3 = h[3];
wolfSSL 11:cee25a834751 427 int32_t h4 = h[4];
wolfSSL 11:cee25a834751 428 int32_t h5 = h[5];
wolfSSL 11:cee25a834751 429 int32_t h6 = h[6];
wolfSSL 11:cee25a834751 430 int32_t h7 = h[7];
wolfSSL 11:cee25a834751 431 int32_t h8 = h[8];
wolfSSL 11:cee25a834751 432 int32_t h9 = h[9];
wolfSSL 11:cee25a834751 433 int32_t q;
wolfSSL 11:cee25a834751 434 int32_t carry0;
wolfSSL 11:cee25a834751 435 int32_t carry1;
wolfSSL 11:cee25a834751 436 int32_t carry2;
wolfSSL 11:cee25a834751 437 int32_t carry3;
wolfSSL 11:cee25a834751 438 int32_t carry4;
wolfSSL 11:cee25a834751 439 int32_t carry5;
wolfSSL 11:cee25a834751 440 int32_t carry6;
wolfSSL 11:cee25a834751 441 int32_t carry7;
wolfSSL 11:cee25a834751 442 int32_t carry8;
wolfSSL 11:cee25a834751 443 int32_t carry9;
wolfSSL 11:cee25a834751 444
wolfSSL 11:cee25a834751 445 q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
wolfSSL 11:cee25a834751 446 q = (h0 + q) >> 26;
wolfSSL 11:cee25a834751 447 q = (h1 + q) >> 25;
wolfSSL 11:cee25a834751 448 q = (h2 + q) >> 26;
wolfSSL 11:cee25a834751 449 q = (h3 + q) >> 25;
wolfSSL 11:cee25a834751 450 q = (h4 + q) >> 26;
wolfSSL 11:cee25a834751 451 q = (h5 + q) >> 25;
wolfSSL 11:cee25a834751 452 q = (h6 + q) >> 26;
wolfSSL 11:cee25a834751 453 q = (h7 + q) >> 25;
wolfSSL 11:cee25a834751 454 q = (h8 + q) >> 26;
wolfSSL 11:cee25a834751 455 q = (h9 + q) >> 25;
wolfSSL 11:cee25a834751 456
wolfSSL 11:cee25a834751 457 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
wolfSSL 11:cee25a834751 458 h0 += 19 * q;
wolfSSL 11:cee25a834751 459 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
wolfSSL 11:cee25a834751 460
wolfSSL 11:cee25a834751 461 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 462 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
wolfSSL 11:cee25a834751 463 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
wolfSSL 11:cee25a834751 464 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
wolfSSL 11:cee25a834751 465 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 466 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
wolfSSL 11:cee25a834751 467 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
wolfSSL 11:cee25a834751 468 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
wolfSSL 11:cee25a834751 469 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
wolfSSL 11:cee25a834751 470 carry9 = h9 >> 25; h9 -= carry9 << 25;
wolfSSL 11:cee25a834751 471 /* h10 = carry9 */
wolfSSL 11:cee25a834751 472
wolfSSL 11:cee25a834751 473 /*
wolfSSL 11:cee25a834751 474 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
wolfSSL 11:cee25a834751 475 Have h0+...+2^230 h9 between 0 and 2^255-1;
wolfSSL 11:cee25a834751 476 evidently 2^255 h10-2^255 q = 0.
wolfSSL 11:cee25a834751 477 Goal: Output h0+...+2^230 h9.
wolfSSL 11:cee25a834751 478 */
wolfSSL 11:cee25a834751 479
wolfSSL 11:cee25a834751 480 s[0] = h0 >> 0;
wolfSSL 11:cee25a834751 481 s[1] = h0 >> 8;
wolfSSL 11:cee25a834751 482 s[2] = h0 >> 16;
wolfSSL 11:cee25a834751 483 s[3] = (h0 >> 24) | (h1 << 2);
wolfSSL 11:cee25a834751 484 s[4] = h1 >> 6;
wolfSSL 11:cee25a834751 485 s[5] = h1 >> 14;
wolfSSL 11:cee25a834751 486 s[6] = (h1 >> 22) | (h2 << 3);
wolfSSL 11:cee25a834751 487 s[7] = h2 >> 5;
wolfSSL 11:cee25a834751 488 s[8] = h2 >> 13;
wolfSSL 11:cee25a834751 489 s[9] = (h2 >> 21) | (h3 << 5);
wolfSSL 11:cee25a834751 490 s[10] = h3 >> 3;
wolfSSL 11:cee25a834751 491 s[11] = h3 >> 11;
wolfSSL 11:cee25a834751 492 s[12] = (h3 >> 19) | (h4 << 6);
wolfSSL 11:cee25a834751 493 s[13] = h4 >> 2;
wolfSSL 11:cee25a834751 494 s[14] = h4 >> 10;
wolfSSL 11:cee25a834751 495 s[15] = h4 >> 18;
wolfSSL 11:cee25a834751 496 s[16] = h5 >> 0;
wolfSSL 11:cee25a834751 497 s[17] = h5 >> 8;
wolfSSL 11:cee25a834751 498 s[18] = h5 >> 16;
wolfSSL 11:cee25a834751 499 s[19] = (h5 >> 24) | (h6 << 1);
wolfSSL 11:cee25a834751 500 s[20] = h6 >> 7;
wolfSSL 11:cee25a834751 501 s[21] = h6 >> 15;
wolfSSL 11:cee25a834751 502 s[22] = (h6 >> 23) | (h7 << 3);
wolfSSL 11:cee25a834751 503 s[23] = h7 >> 5;
wolfSSL 11:cee25a834751 504 s[24] = h7 >> 13;
wolfSSL 11:cee25a834751 505 s[25] = (h7 >> 21) | (h8 << 4);
wolfSSL 11:cee25a834751 506 s[26] = h8 >> 4;
wolfSSL 11:cee25a834751 507 s[27] = h8 >> 12;
wolfSSL 11:cee25a834751 508 s[28] = (h8 >> 20) | (h9 << 6);
wolfSSL 11:cee25a834751 509 s[29] = h9 >> 2;
wolfSSL 11:cee25a834751 510 s[30] = h9 >> 10;
wolfSSL 11:cee25a834751 511 s[31] = h9 >> 18;
wolfSSL 11:cee25a834751 512 }
wolfSSL 11:cee25a834751 513
wolfSSL 11:cee25a834751 514
wolfSSL 11:cee25a834751 515 /*
wolfSSL 11:cee25a834751 516 h = f - g
wolfSSL 11:cee25a834751 517 Can overlap h with f or g.
wolfSSL 11:cee25a834751 518
wolfSSL 11:cee25a834751 519 Preconditions:
wolfSSL 11:cee25a834751 520 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
wolfSSL 11:cee25a834751 521 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
wolfSSL 11:cee25a834751 522
wolfSSL 11:cee25a834751 523 Postconditions:
wolfSSL 11:cee25a834751 524 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
wolfSSL 11:cee25a834751 525 */
wolfSSL 11:cee25a834751 526
wolfSSL 11:cee25a834751 527 void fe_sub(fe h,const fe f,const fe g)
wolfSSL 11:cee25a834751 528 {
wolfSSL 11:cee25a834751 529 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 530 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 531 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 532 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 533 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 534 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 535 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 536 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 537 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 538 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 539 int32_t g0 = g[0];
wolfSSL 11:cee25a834751 540 int32_t g1 = g[1];
wolfSSL 11:cee25a834751 541 int32_t g2 = g[2];
wolfSSL 11:cee25a834751 542 int32_t g3 = g[3];
wolfSSL 11:cee25a834751 543 int32_t g4 = g[4];
wolfSSL 11:cee25a834751 544 int32_t g5 = g[5];
wolfSSL 11:cee25a834751 545 int32_t g6 = g[6];
wolfSSL 11:cee25a834751 546 int32_t g7 = g[7];
wolfSSL 11:cee25a834751 547 int32_t g8 = g[8];
wolfSSL 11:cee25a834751 548 int32_t g9 = g[9];
wolfSSL 11:cee25a834751 549 int32_t h0 = f0 - g0;
wolfSSL 11:cee25a834751 550 int32_t h1 = f1 - g1;
wolfSSL 11:cee25a834751 551 int32_t h2 = f2 - g2;
wolfSSL 11:cee25a834751 552 int32_t h3 = f3 - g3;
wolfSSL 11:cee25a834751 553 int32_t h4 = f4 - g4;
wolfSSL 11:cee25a834751 554 int32_t h5 = f5 - g5;
wolfSSL 11:cee25a834751 555 int32_t h6 = f6 - g6;
wolfSSL 11:cee25a834751 556 int32_t h7 = f7 - g7;
wolfSSL 11:cee25a834751 557 int32_t h8 = f8 - g8;
wolfSSL 11:cee25a834751 558 int32_t h9 = f9 - g9;
wolfSSL 11:cee25a834751 559 h[0] = h0;
wolfSSL 11:cee25a834751 560 h[1] = h1;
wolfSSL 11:cee25a834751 561 h[2] = h2;
wolfSSL 11:cee25a834751 562 h[3] = h3;
wolfSSL 11:cee25a834751 563 h[4] = h4;
wolfSSL 11:cee25a834751 564 h[5] = h5;
wolfSSL 11:cee25a834751 565 h[6] = h6;
wolfSSL 11:cee25a834751 566 h[7] = h7;
wolfSSL 11:cee25a834751 567 h[8] = h8;
wolfSSL 11:cee25a834751 568 h[9] = h9;
wolfSSL 11:cee25a834751 569 }
wolfSSL 11:cee25a834751 570
wolfSSL 11:cee25a834751 571
wolfSSL 11:cee25a834751 572 /*
wolfSSL 11:cee25a834751 573 Ignores top bit of h.
wolfSSL 11:cee25a834751 574 */
wolfSSL 11:cee25a834751 575
wolfSSL 11:cee25a834751 576 void fe_frombytes(fe h,const unsigned char *s)
wolfSSL 11:cee25a834751 577 {
wolfSSL 11:cee25a834751 578 int64_t h0 = load_4(s);
wolfSSL 11:cee25a834751 579 int64_t h1 = load_3(s + 4) << 6;
wolfSSL 11:cee25a834751 580 int64_t h2 = load_3(s + 7) << 5;
wolfSSL 11:cee25a834751 581 int64_t h3 = load_3(s + 10) << 3;
wolfSSL 11:cee25a834751 582 int64_t h4 = load_3(s + 13) << 2;
wolfSSL 11:cee25a834751 583 int64_t h5 = load_4(s + 16);
wolfSSL 11:cee25a834751 584 int64_t h6 = load_3(s + 20) << 7;
wolfSSL 11:cee25a834751 585 int64_t h7 = load_3(s + 23) << 5;
wolfSSL 11:cee25a834751 586 int64_t h8 = load_3(s + 26) << 4;
wolfSSL 11:cee25a834751 587 int64_t h9 = (load_3(s + 29) & 8388607) << 2;
wolfSSL 11:cee25a834751 588 int64_t carry0;
wolfSSL 11:cee25a834751 589 int64_t carry1;
wolfSSL 11:cee25a834751 590 int64_t carry2;
wolfSSL 11:cee25a834751 591 int64_t carry3;
wolfSSL 11:cee25a834751 592 int64_t carry4;
wolfSSL 11:cee25a834751 593 int64_t carry5;
wolfSSL 11:cee25a834751 594 int64_t carry6;
wolfSSL 11:cee25a834751 595 int64_t carry7;
wolfSSL 11:cee25a834751 596 int64_t carry8;
wolfSSL 11:cee25a834751 597 int64_t carry9;
wolfSSL 11:cee25a834751 598
wolfSSL 11:cee25a834751 599 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
wolfSSL 11:cee25a834751 600 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
wolfSSL 11:cee25a834751 601 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
wolfSSL 11:cee25a834751 602 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
wolfSSL 11:cee25a834751 603 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
wolfSSL 11:cee25a834751 604
wolfSSL 11:cee25a834751 605 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 606 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
wolfSSL 11:cee25a834751 607 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 608 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
wolfSSL 11:cee25a834751 609 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
wolfSSL 11:cee25a834751 610
wolfSSL 11:cee25a834751 611 h[0] = (int32_t)h0;
wolfSSL 11:cee25a834751 612 h[1] = (int32_t)h1;
wolfSSL 11:cee25a834751 613 h[2] = (int32_t)h2;
wolfSSL 11:cee25a834751 614 h[3] = (int32_t)h3;
wolfSSL 11:cee25a834751 615 h[4] = (int32_t)h4;
wolfSSL 11:cee25a834751 616 h[5] = (int32_t)h5;
wolfSSL 11:cee25a834751 617 h[6] = (int32_t)h6;
wolfSSL 11:cee25a834751 618 h[7] = (int32_t)h7;
wolfSSL 11:cee25a834751 619 h[8] = (int32_t)h8;
wolfSSL 11:cee25a834751 620 h[9] = (int32_t)h9;
wolfSSL 11:cee25a834751 621 }
wolfSSL 11:cee25a834751 622
wolfSSL 11:cee25a834751 623
wolfSSL 11:cee25a834751 624 void fe_invert(fe out,const fe z)
wolfSSL 11:cee25a834751 625 {
wolfSSL 11:cee25a834751 626 fe t0;
wolfSSL 11:cee25a834751 627 fe t1;
wolfSSL 11:cee25a834751 628 fe t2;
wolfSSL 11:cee25a834751 629 fe t3;
wolfSSL 11:cee25a834751 630 int i;
wolfSSL 11:cee25a834751 631
wolfSSL 11:cee25a834751 632 /* pow225521 */
wolfSSL 11:cee25a834751 633 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
wolfSSL 11:cee25a834751 634 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 635 fe_mul(t1,z,t1);
wolfSSL 11:cee25a834751 636 fe_mul(t0,t0,t1);
wolfSSL 11:cee25a834751 637 fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 638 fe_mul(t1,t1,t2);
wolfSSL 11:cee25a834751 639 fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 640 fe_mul(t1,t2,t1);
wolfSSL 11:cee25a834751 641 fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 642 fe_mul(t2,t2,t1);
wolfSSL 11:cee25a834751 643 fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
wolfSSL 11:cee25a834751 644 fe_mul(t2,t3,t2);
wolfSSL 11:cee25a834751 645 fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 646 fe_mul(t1,t2,t1);
wolfSSL 11:cee25a834751 647 fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 648 fe_mul(t2,t2,t1);
wolfSSL 11:cee25a834751 649 fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
wolfSSL 11:cee25a834751 650 fe_mul(t2,t3,t2);
wolfSSL 11:cee25a834751 651 fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 652 fe_mul(t1,t2,t1);
wolfSSL 11:cee25a834751 653 fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 654 fe_mul(out,t1,t0);
wolfSSL 11:cee25a834751 655
wolfSSL 11:cee25a834751 656 return;
wolfSSL 11:cee25a834751 657 }
wolfSSL 11:cee25a834751 658
wolfSSL 11:cee25a834751 659
wolfSSL 11:cee25a834751 660 /*
wolfSSL 11:cee25a834751 661 h = f
wolfSSL 11:cee25a834751 662 */
wolfSSL 11:cee25a834751 663
wolfSSL 11:cee25a834751 664 void fe_copy(fe h,const fe f)
wolfSSL 11:cee25a834751 665 {
wolfSSL 11:cee25a834751 666 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 667 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 668 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 669 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 670 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 671 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 672 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 673 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 674 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 675 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 676 h[0] = f0;
wolfSSL 11:cee25a834751 677 h[1] = f1;
wolfSSL 11:cee25a834751 678 h[2] = f2;
wolfSSL 11:cee25a834751 679 h[3] = f3;
wolfSSL 11:cee25a834751 680 h[4] = f4;
wolfSSL 11:cee25a834751 681 h[5] = f5;
wolfSSL 11:cee25a834751 682 h[6] = f6;
wolfSSL 11:cee25a834751 683 h[7] = f7;
wolfSSL 11:cee25a834751 684 h[8] = f8;
wolfSSL 11:cee25a834751 685 h[9] = f9;
wolfSSL 11:cee25a834751 686 }
wolfSSL 11:cee25a834751 687
wolfSSL 11:cee25a834751 688
wolfSSL 11:cee25a834751 689 /*
wolfSSL 11:cee25a834751 690 h = f * g
wolfSSL 11:cee25a834751 691 Can overlap h with f or g.
wolfSSL 11:cee25a834751 692
wolfSSL 11:cee25a834751 693 Preconditions:
wolfSSL 11:cee25a834751 694 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
wolfSSL 11:cee25a834751 695 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
wolfSSL 11:cee25a834751 696
wolfSSL 11:cee25a834751 697 Postconditions:
wolfSSL 11:cee25a834751 698 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
wolfSSL 11:cee25a834751 699 */
wolfSSL 11:cee25a834751 700
wolfSSL 11:cee25a834751 701 /*
wolfSSL 11:cee25a834751 702 Notes on implementation strategy:
wolfSSL 11:cee25a834751 703
wolfSSL 11:cee25a834751 704 Using schoolbook multiplication.
wolfSSL 11:cee25a834751 705 Karatsuba would save a little in some cost models.
wolfSSL 11:cee25a834751 706
wolfSSL 11:cee25a834751 707 Most multiplications by 2 and 19 are 32-bit precomputations;
wolfSSL 11:cee25a834751 708 cheaper than 64-bit postcomputations.
wolfSSL 11:cee25a834751 709
wolfSSL 11:cee25a834751 710 There is one remaining multiplication by 19 in the carry chain;
wolfSSL 11:cee25a834751 711 one *19 precomputation can be merged into this,
wolfSSL 11:cee25a834751 712 but the resulting data flow is considerably less clean.
wolfSSL 11:cee25a834751 713
wolfSSL 11:cee25a834751 714 There are 12 carries below.
wolfSSL 11:cee25a834751 715 10 of them are 2-way parallelizable and vectorizable.
wolfSSL 11:cee25a834751 716 Can get away with 11 carries, but then data flow is much deeper.
wolfSSL 11:cee25a834751 717
wolfSSL 11:cee25a834751 718 With tighter constraints on inputs can squeeze carries into int32.
wolfSSL 11:cee25a834751 719 */
wolfSSL 11:cee25a834751 720
wolfSSL 11:cee25a834751 721 void fe_mul(fe h,const fe f,const fe g)
wolfSSL 11:cee25a834751 722 {
wolfSSL 11:cee25a834751 723 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 724 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 725 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 726 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 727 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 728 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 729 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 730 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 731 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 732 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 733 int32_t g0 = g[0];
wolfSSL 11:cee25a834751 734 int32_t g1 = g[1];
wolfSSL 11:cee25a834751 735 int32_t g2 = g[2];
wolfSSL 11:cee25a834751 736 int32_t g3 = g[3];
wolfSSL 11:cee25a834751 737 int32_t g4 = g[4];
wolfSSL 11:cee25a834751 738 int32_t g5 = g[5];
wolfSSL 11:cee25a834751 739 int32_t g6 = g[6];
wolfSSL 11:cee25a834751 740 int32_t g7 = g[7];
wolfSSL 11:cee25a834751 741 int32_t g8 = g[8];
wolfSSL 11:cee25a834751 742 int32_t g9 = g[9];
wolfSSL 11:cee25a834751 743 int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
wolfSSL 11:cee25a834751 744 int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
wolfSSL 11:cee25a834751 745 int32_t g3_19 = 19 * g3;
wolfSSL 11:cee25a834751 746 int32_t g4_19 = 19 * g4;
wolfSSL 11:cee25a834751 747 int32_t g5_19 = 19 * g5;
wolfSSL 11:cee25a834751 748 int32_t g6_19 = 19 * g6;
wolfSSL 11:cee25a834751 749 int32_t g7_19 = 19 * g7;
wolfSSL 11:cee25a834751 750 int32_t g8_19 = 19 * g8;
wolfSSL 11:cee25a834751 751 int32_t g9_19 = 19 * g9;
wolfSSL 11:cee25a834751 752 int32_t f1_2 = 2 * f1;
wolfSSL 11:cee25a834751 753 int32_t f3_2 = 2 * f3;
wolfSSL 11:cee25a834751 754 int32_t f5_2 = 2 * f5;
wolfSSL 11:cee25a834751 755 int32_t f7_2 = 2 * f7;
wolfSSL 11:cee25a834751 756 int32_t f9_2 = 2 * f9;
wolfSSL 11:cee25a834751 757 int64_t f0g0 = f0 * (int64_t) g0;
wolfSSL 11:cee25a834751 758 int64_t f0g1 = f0 * (int64_t) g1;
wolfSSL 11:cee25a834751 759 int64_t f0g2 = f0 * (int64_t) g2;
wolfSSL 11:cee25a834751 760 int64_t f0g3 = f0 * (int64_t) g3;
wolfSSL 11:cee25a834751 761 int64_t f0g4 = f0 * (int64_t) g4;
wolfSSL 11:cee25a834751 762 int64_t f0g5 = f0 * (int64_t) g5;
wolfSSL 11:cee25a834751 763 int64_t f0g6 = f0 * (int64_t) g6;
wolfSSL 11:cee25a834751 764 int64_t f0g7 = f0 * (int64_t) g7;
wolfSSL 11:cee25a834751 765 int64_t f0g8 = f0 * (int64_t) g8;
wolfSSL 11:cee25a834751 766 int64_t f0g9 = f0 * (int64_t) g9;
wolfSSL 11:cee25a834751 767 int64_t f1g0 = f1 * (int64_t) g0;
wolfSSL 11:cee25a834751 768 int64_t f1g1_2 = f1_2 * (int64_t) g1;
wolfSSL 11:cee25a834751 769 int64_t f1g2 = f1 * (int64_t) g2;
wolfSSL 11:cee25a834751 770 int64_t f1g3_2 = f1_2 * (int64_t) g3;
wolfSSL 11:cee25a834751 771 int64_t f1g4 = f1 * (int64_t) g4;
wolfSSL 11:cee25a834751 772 int64_t f1g5_2 = f1_2 * (int64_t) g5;
wolfSSL 11:cee25a834751 773 int64_t f1g6 = f1 * (int64_t) g6;
wolfSSL 11:cee25a834751 774 int64_t f1g7_2 = f1_2 * (int64_t) g7;
wolfSSL 11:cee25a834751 775 int64_t f1g8 = f1 * (int64_t) g8;
wolfSSL 11:cee25a834751 776 int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 777 int64_t f2g0 = f2 * (int64_t) g0;
wolfSSL 11:cee25a834751 778 int64_t f2g1 = f2 * (int64_t) g1;
wolfSSL 11:cee25a834751 779 int64_t f2g2 = f2 * (int64_t) g2;
wolfSSL 11:cee25a834751 780 int64_t f2g3 = f2 * (int64_t) g3;
wolfSSL 11:cee25a834751 781 int64_t f2g4 = f2 * (int64_t) g4;
wolfSSL 11:cee25a834751 782 int64_t f2g5 = f2 * (int64_t) g5;
wolfSSL 11:cee25a834751 783 int64_t f2g6 = f2 * (int64_t) g6;
wolfSSL 11:cee25a834751 784 int64_t f2g7 = f2 * (int64_t) g7;
wolfSSL 11:cee25a834751 785 int64_t f2g8_19 = f2 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 786 int64_t f2g9_19 = f2 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 787 int64_t f3g0 = f3 * (int64_t) g0;
wolfSSL 11:cee25a834751 788 int64_t f3g1_2 = f3_2 * (int64_t) g1;
wolfSSL 11:cee25a834751 789 int64_t f3g2 = f3 * (int64_t) g2;
wolfSSL 11:cee25a834751 790 int64_t f3g3_2 = f3_2 * (int64_t) g3;
wolfSSL 11:cee25a834751 791 int64_t f3g4 = f3 * (int64_t) g4;
wolfSSL 11:cee25a834751 792 int64_t f3g5_2 = f3_2 * (int64_t) g5;
wolfSSL 11:cee25a834751 793 int64_t f3g6 = f3 * (int64_t) g6;
wolfSSL 11:cee25a834751 794 int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
wolfSSL 11:cee25a834751 795 int64_t f3g8_19 = f3 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 796 int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 797 int64_t f4g0 = f4 * (int64_t) g0;
wolfSSL 11:cee25a834751 798 int64_t f4g1 = f4 * (int64_t) g1;
wolfSSL 11:cee25a834751 799 int64_t f4g2 = f4 * (int64_t) g2;
wolfSSL 11:cee25a834751 800 int64_t f4g3 = f4 * (int64_t) g3;
wolfSSL 11:cee25a834751 801 int64_t f4g4 = f4 * (int64_t) g4;
wolfSSL 11:cee25a834751 802 int64_t f4g5 = f4 * (int64_t) g5;
wolfSSL 11:cee25a834751 803 int64_t f4g6_19 = f4 * (int64_t) g6_19;
wolfSSL 11:cee25a834751 804 int64_t f4g7_19 = f4 * (int64_t) g7_19;
wolfSSL 11:cee25a834751 805 int64_t f4g8_19 = f4 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 806 int64_t f4g9_19 = f4 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 807 int64_t f5g0 = f5 * (int64_t) g0;
wolfSSL 11:cee25a834751 808 int64_t f5g1_2 = f5_2 * (int64_t) g1;
wolfSSL 11:cee25a834751 809 int64_t f5g2 = f5 * (int64_t) g2;
wolfSSL 11:cee25a834751 810 int64_t f5g3_2 = f5_2 * (int64_t) g3;
wolfSSL 11:cee25a834751 811 int64_t f5g4 = f5 * (int64_t) g4;
wolfSSL 11:cee25a834751 812 int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
wolfSSL 11:cee25a834751 813 int64_t f5g6_19 = f5 * (int64_t) g6_19;
wolfSSL 11:cee25a834751 814 int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
wolfSSL 11:cee25a834751 815 int64_t f5g8_19 = f5 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 816 int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 817 int64_t f6g0 = f6 * (int64_t) g0;
wolfSSL 11:cee25a834751 818 int64_t f6g1 = f6 * (int64_t) g1;
wolfSSL 11:cee25a834751 819 int64_t f6g2 = f6 * (int64_t) g2;
wolfSSL 11:cee25a834751 820 int64_t f6g3 = f6 * (int64_t) g3;
wolfSSL 11:cee25a834751 821 int64_t f6g4_19 = f6 * (int64_t) g4_19;
wolfSSL 11:cee25a834751 822 int64_t f6g5_19 = f6 * (int64_t) g5_19;
wolfSSL 11:cee25a834751 823 int64_t f6g6_19 = f6 * (int64_t) g6_19;
wolfSSL 11:cee25a834751 824 int64_t f6g7_19 = f6 * (int64_t) g7_19;
wolfSSL 11:cee25a834751 825 int64_t f6g8_19 = f6 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 826 int64_t f6g9_19 = f6 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 827 int64_t f7g0 = f7 * (int64_t) g0;
wolfSSL 11:cee25a834751 828 int64_t f7g1_2 = f7_2 * (int64_t) g1;
wolfSSL 11:cee25a834751 829 int64_t f7g2 = f7 * (int64_t) g2;
wolfSSL 11:cee25a834751 830 int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
wolfSSL 11:cee25a834751 831 int64_t f7g4_19 = f7 * (int64_t) g4_19;
wolfSSL 11:cee25a834751 832 int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
wolfSSL 11:cee25a834751 833 int64_t f7g6_19 = f7 * (int64_t) g6_19;
wolfSSL 11:cee25a834751 834 int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
wolfSSL 11:cee25a834751 835 int64_t f7g8_19 = f7 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 836 int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 837 int64_t f8g0 = f8 * (int64_t) g0;
wolfSSL 11:cee25a834751 838 int64_t f8g1 = f8 * (int64_t) g1;
wolfSSL 11:cee25a834751 839 int64_t f8g2_19 = f8 * (int64_t) g2_19;
wolfSSL 11:cee25a834751 840 int64_t f8g3_19 = f8 * (int64_t) g3_19;
wolfSSL 11:cee25a834751 841 int64_t f8g4_19 = f8 * (int64_t) g4_19;
wolfSSL 11:cee25a834751 842 int64_t f8g5_19 = f8 * (int64_t) g5_19;
wolfSSL 11:cee25a834751 843 int64_t f8g6_19 = f8 * (int64_t) g6_19;
wolfSSL 11:cee25a834751 844 int64_t f8g7_19 = f8 * (int64_t) g7_19;
wolfSSL 11:cee25a834751 845 int64_t f8g8_19 = f8 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 846 int64_t f8g9_19 = f8 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 847 int64_t f9g0 = f9 * (int64_t) g0;
wolfSSL 11:cee25a834751 848 int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
wolfSSL 11:cee25a834751 849 int64_t f9g2_19 = f9 * (int64_t) g2_19;
wolfSSL 11:cee25a834751 850 int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
wolfSSL 11:cee25a834751 851 int64_t f9g4_19 = f9 * (int64_t) g4_19;
wolfSSL 11:cee25a834751 852 int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
wolfSSL 11:cee25a834751 853 int64_t f9g6_19 = f9 * (int64_t) g6_19;
wolfSSL 11:cee25a834751 854 int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
wolfSSL 11:cee25a834751 855 int64_t f9g8_19 = f9 * (int64_t) g8_19;
wolfSSL 11:cee25a834751 856 int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
wolfSSL 11:cee25a834751 857 int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
wolfSSL 11:cee25a834751 858 int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
wolfSSL 11:cee25a834751 859 int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
wolfSSL 11:cee25a834751 860 int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
wolfSSL 11:cee25a834751 861 int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
wolfSSL 11:cee25a834751 862 int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
wolfSSL 11:cee25a834751 863 int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
wolfSSL 11:cee25a834751 864 int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
wolfSSL 11:cee25a834751 865 int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
wolfSSL 11:cee25a834751 866 int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
wolfSSL 11:cee25a834751 867 int64_t carry0;
wolfSSL 11:cee25a834751 868 int64_t carry1;
wolfSSL 11:cee25a834751 869 int64_t carry2;
wolfSSL 11:cee25a834751 870 int64_t carry3;
wolfSSL 11:cee25a834751 871 int64_t carry4;
wolfSSL 11:cee25a834751 872 int64_t carry5;
wolfSSL 11:cee25a834751 873 int64_t carry6;
wolfSSL 11:cee25a834751 874 int64_t carry7;
wolfSSL 11:cee25a834751 875 int64_t carry8;
wolfSSL 11:cee25a834751 876 int64_t carry9;
wolfSSL 11:cee25a834751 877
wolfSSL 11:cee25a834751 878 /*
wolfSSL 11:cee25a834751 879 |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
wolfSSL 11:cee25a834751 880 i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
wolfSSL 11:cee25a834751 881 |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
wolfSSL 11:cee25a834751 882 i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
wolfSSL 11:cee25a834751 883 */
wolfSSL 11:cee25a834751 884
wolfSSL 11:cee25a834751 885 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 886 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 887 /* |h0| <= 2^25 */
wolfSSL 11:cee25a834751 888 /* |h4| <= 2^25 */
wolfSSL 11:cee25a834751 889 /* |h1| <= 1.71*2^59 */
wolfSSL 11:cee25a834751 890 /* |h5| <= 1.71*2^59 */
wolfSSL 11:cee25a834751 891
wolfSSL 11:cee25a834751 892 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
wolfSSL 11:cee25a834751 893 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
wolfSSL 11:cee25a834751 894 /* |h1| <= 2^24; from now on fits into int32 */
wolfSSL 11:cee25a834751 895 /* |h5| <= 2^24; from now on fits into int32 */
wolfSSL 11:cee25a834751 896 /* |h2| <= 1.41*2^60 */
wolfSSL 11:cee25a834751 897 /* |h6| <= 1.41*2^60 */
wolfSSL 11:cee25a834751 898
wolfSSL 11:cee25a834751 899 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
wolfSSL 11:cee25a834751 900 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
wolfSSL 11:cee25a834751 901 /* |h2| <= 2^25; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 902 /* |h6| <= 2^25; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 903 /* |h3| <= 1.71*2^59 */
wolfSSL 11:cee25a834751 904 /* |h7| <= 1.71*2^59 */
wolfSSL 11:cee25a834751 905
wolfSSL 11:cee25a834751 906 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
wolfSSL 11:cee25a834751 907 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
wolfSSL 11:cee25a834751 908 /* |h3| <= 2^24; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 909 /* |h7| <= 2^24; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 910 /* |h4| <= 1.72*2^34 */
wolfSSL 11:cee25a834751 911 /* |h8| <= 1.41*2^60 */
wolfSSL 11:cee25a834751 912
wolfSSL 11:cee25a834751 913 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 914 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
wolfSSL 11:cee25a834751 915 /* |h4| <= 2^25; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 916 /* |h8| <= 2^25; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 917 /* |h5| <= 1.01*2^24 */
wolfSSL 11:cee25a834751 918 /* |h9| <= 1.71*2^59 */
wolfSSL 11:cee25a834751 919
wolfSSL 11:cee25a834751 920 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
wolfSSL 11:cee25a834751 921 /* |h9| <= 2^24; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 922 /* |h0| <= 1.1*2^39 */
wolfSSL 11:cee25a834751 923
wolfSSL 11:cee25a834751 924 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 925 /* |h0| <= 2^25; from now on fits into int32 unchanged */
wolfSSL 11:cee25a834751 926 /* |h1| <= 1.01*2^24 */
wolfSSL 11:cee25a834751 927
wolfSSL 11:cee25a834751 928 h[0] = (int32_t)h0;
wolfSSL 11:cee25a834751 929 h[1] = (int32_t)h1;
wolfSSL 11:cee25a834751 930 h[2] = (int32_t)h2;
wolfSSL 11:cee25a834751 931 h[3] = (int32_t)h3;
wolfSSL 11:cee25a834751 932 h[4] = (int32_t)h4;
wolfSSL 11:cee25a834751 933 h[5] = (int32_t)h5;
wolfSSL 11:cee25a834751 934 h[6] = (int32_t)h6;
wolfSSL 11:cee25a834751 935 h[7] = (int32_t)h7;
wolfSSL 11:cee25a834751 936 h[8] = (int32_t)h8;
wolfSSL 11:cee25a834751 937 h[9] = (int32_t)h9;
wolfSSL 11:cee25a834751 938 }
wolfSSL 11:cee25a834751 939
wolfSSL 11:cee25a834751 940
wolfSSL 11:cee25a834751 941 /*
wolfSSL 11:cee25a834751 942 Replace (f,g) with (g,f) if b == 1;
wolfSSL 11:cee25a834751 943 replace (f,g) with (f,g) if b == 0.
wolfSSL 11:cee25a834751 944
wolfSSL 11:cee25a834751 945 Preconditions: b in {0,1}.
wolfSSL 11:cee25a834751 946 */
wolfSSL 11:cee25a834751 947
wolfSSL 11:cee25a834751 948 void fe_cswap(fe f, fe g, int b)
wolfSSL 11:cee25a834751 949 {
wolfSSL 11:cee25a834751 950 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 951 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 952 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 953 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 954 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 955 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 956 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 957 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 958 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 959 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 960 int32_t g0 = g[0];
wolfSSL 11:cee25a834751 961 int32_t g1 = g[1];
wolfSSL 11:cee25a834751 962 int32_t g2 = g[2];
wolfSSL 11:cee25a834751 963 int32_t g3 = g[3];
wolfSSL 11:cee25a834751 964 int32_t g4 = g[4];
wolfSSL 11:cee25a834751 965 int32_t g5 = g[5];
wolfSSL 11:cee25a834751 966 int32_t g6 = g[6];
wolfSSL 11:cee25a834751 967 int32_t g7 = g[7];
wolfSSL 11:cee25a834751 968 int32_t g8 = g[8];
wolfSSL 11:cee25a834751 969 int32_t g9 = g[9];
wolfSSL 11:cee25a834751 970 int32_t x0 = f0 ^ g0;
wolfSSL 11:cee25a834751 971 int32_t x1 = f1 ^ g1;
wolfSSL 11:cee25a834751 972 int32_t x2 = f2 ^ g2;
wolfSSL 11:cee25a834751 973 int32_t x3 = f3 ^ g3;
wolfSSL 11:cee25a834751 974 int32_t x4 = f4 ^ g4;
wolfSSL 11:cee25a834751 975 int32_t x5 = f5 ^ g5;
wolfSSL 11:cee25a834751 976 int32_t x6 = f6 ^ g6;
wolfSSL 11:cee25a834751 977 int32_t x7 = f7 ^ g7;
wolfSSL 11:cee25a834751 978 int32_t x8 = f8 ^ g8;
wolfSSL 11:cee25a834751 979 int32_t x9 = f9 ^ g9;
wolfSSL 11:cee25a834751 980 b = -b;
wolfSSL 11:cee25a834751 981 x0 &= b;
wolfSSL 11:cee25a834751 982 x1 &= b;
wolfSSL 11:cee25a834751 983 x2 &= b;
wolfSSL 11:cee25a834751 984 x3 &= b;
wolfSSL 11:cee25a834751 985 x4 &= b;
wolfSSL 11:cee25a834751 986 x5 &= b;
wolfSSL 11:cee25a834751 987 x6 &= b;
wolfSSL 11:cee25a834751 988 x7 &= b;
wolfSSL 11:cee25a834751 989 x8 &= b;
wolfSSL 11:cee25a834751 990 x9 &= b;
wolfSSL 11:cee25a834751 991 f[0] = f0 ^ x0;
wolfSSL 11:cee25a834751 992 f[1] = f1 ^ x1;
wolfSSL 11:cee25a834751 993 f[2] = f2 ^ x2;
wolfSSL 11:cee25a834751 994 f[3] = f3 ^ x3;
wolfSSL 11:cee25a834751 995 f[4] = f4 ^ x4;
wolfSSL 11:cee25a834751 996 f[5] = f5 ^ x5;
wolfSSL 11:cee25a834751 997 f[6] = f6 ^ x6;
wolfSSL 11:cee25a834751 998 f[7] = f7 ^ x7;
wolfSSL 11:cee25a834751 999 f[8] = f8 ^ x8;
wolfSSL 11:cee25a834751 1000 f[9] = f9 ^ x9;
wolfSSL 11:cee25a834751 1001 g[0] = g0 ^ x0;
wolfSSL 11:cee25a834751 1002 g[1] = g1 ^ x1;
wolfSSL 11:cee25a834751 1003 g[2] = g2 ^ x2;
wolfSSL 11:cee25a834751 1004 g[3] = g3 ^ x3;
wolfSSL 11:cee25a834751 1005 g[4] = g4 ^ x4;
wolfSSL 11:cee25a834751 1006 g[5] = g5 ^ x5;
wolfSSL 11:cee25a834751 1007 g[6] = g6 ^ x6;
wolfSSL 11:cee25a834751 1008 g[7] = g7 ^ x7;
wolfSSL 11:cee25a834751 1009 g[8] = g8 ^ x8;
wolfSSL 11:cee25a834751 1010 g[9] = g9 ^ x9;
wolfSSL 11:cee25a834751 1011 }
wolfSSL 11:cee25a834751 1012
wolfSSL 11:cee25a834751 1013
wolfSSL 11:cee25a834751 1014 /*
wolfSSL 11:cee25a834751 1015 h = f * 121666
wolfSSL 11:cee25a834751 1016 Can overlap h with f.
wolfSSL 11:cee25a834751 1017
wolfSSL 11:cee25a834751 1018 Preconditions:
wolfSSL 11:cee25a834751 1019 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
wolfSSL 11:cee25a834751 1020
wolfSSL 11:cee25a834751 1021 Postconditions:
wolfSSL 11:cee25a834751 1022 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
wolfSSL 11:cee25a834751 1023 */
wolfSSL 11:cee25a834751 1024
wolfSSL 11:cee25a834751 1025 void fe_mul121666(fe h,fe f)
wolfSSL 11:cee25a834751 1026 {
wolfSSL 11:cee25a834751 1027 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 1028 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 1029 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 1030 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 1031 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 1032 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 1033 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 1034 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 1035 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 1036 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 1037 int64_t h0 = f0 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1038 int64_t h1 = f1 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1039 int64_t h2 = f2 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1040 int64_t h3 = f3 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1041 int64_t h4 = f4 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1042 int64_t h5 = f5 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1043 int64_t h6 = f6 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1044 int64_t h7 = f7 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1045 int64_t h8 = f8 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1046 int64_t h9 = f9 * (int64_t) 121666;
wolfSSL 11:cee25a834751 1047 int64_t carry0;
wolfSSL 11:cee25a834751 1048 int64_t carry1;
wolfSSL 11:cee25a834751 1049 int64_t carry2;
wolfSSL 11:cee25a834751 1050 int64_t carry3;
wolfSSL 11:cee25a834751 1051 int64_t carry4;
wolfSSL 11:cee25a834751 1052 int64_t carry5;
wolfSSL 11:cee25a834751 1053 int64_t carry6;
wolfSSL 11:cee25a834751 1054 int64_t carry7;
wolfSSL 11:cee25a834751 1055 int64_t carry8;
wolfSSL 11:cee25a834751 1056 int64_t carry9;
wolfSSL 11:cee25a834751 1057
wolfSSL 11:cee25a834751 1058 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
wolfSSL 11:cee25a834751 1059 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
wolfSSL 11:cee25a834751 1060 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
wolfSSL 11:cee25a834751 1061 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
wolfSSL 11:cee25a834751 1062 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
wolfSSL 11:cee25a834751 1063
wolfSSL 11:cee25a834751 1064 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 1065 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
wolfSSL 11:cee25a834751 1066 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 1067 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
wolfSSL 11:cee25a834751 1068 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
wolfSSL 11:cee25a834751 1069
wolfSSL 11:cee25a834751 1070 h[0] = (int32_t)h0;
wolfSSL 11:cee25a834751 1071 h[1] = (int32_t)h1;
wolfSSL 11:cee25a834751 1072 h[2] = (int32_t)h2;
wolfSSL 11:cee25a834751 1073 h[3] = (int32_t)h3;
wolfSSL 11:cee25a834751 1074 h[4] = (int32_t)h4;
wolfSSL 11:cee25a834751 1075 h[5] = (int32_t)h5;
wolfSSL 11:cee25a834751 1076 h[6] = (int32_t)h6;
wolfSSL 11:cee25a834751 1077 h[7] = (int32_t)h7;
wolfSSL 11:cee25a834751 1078 h[8] = (int32_t)h8;
wolfSSL 11:cee25a834751 1079 h[9] = (int32_t)h9;
wolfSSL 11:cee25a834751 1080 }
wolfSSL 11:cee25a834751 1081
wolfSSL 11:cee25a834751 1082
wolfSSL 11:cee25a834751 1083 /*
wolfSSL 11:cee25a834751 1084 h = 2 * f * f
wolfSSL 11:cee25a834751 1085 Can overlap h with f.
wolfSSL 11:cee25a834751 1086
wolfSSL 11:cee25a834751 1087 Preconditions:
wolfSSL 11:cee25a834751 1088 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
wolfSSL 11:cee25a834751 1089
wolfSSL 11:cee25a834751 1090 Postconditions:
wolfSSL 11:cee25a834751 1091 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
wolfSSL 11:cee25a834751 1092 */
wolfSSL 11:cee25a834751 1093
wolfSSL 11:cee25a834751 1094 /*
wolfSSL 11:cee25a834751 1095 See fe_mul.c for discussion of implementation strategy.
wolfSSL 11:cee25a834751 1096 */
wolfSSL 11:cee25a834751 1097
wolfSSL 11:cee25a834751 1098 void fe_sq2(fe h,const fe f)
wolfSSL 11:cee25a834751 1099 {
wolfSSL 11:cee25a834751 1100 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 1101 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 1102 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 1103 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 1104 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 1105 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 1106 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 1107 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 1108 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 1109 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 1110 int32_t f0_2 = 2 * f0;
wolfSSL 11:cee25a834751 1111 int32_t f1_2 = 2 * f1;
wolfSSL 11:cee25a834751 1112 int32_t f2_2 = 2 * f2;
wolfSSL 11:cee25a834751 1113 int32_t f3_2 = 2 * f3;
wolfSSL 11:cee25a834751 1114 int32_t f4_2 = 2 * f4;
wolfSSL 11:cee25a834751 1115 int32_t f5_2 = 2 * f5;
wolfSSL 11:cee25a834751 1116 int32_t f6_2 = 2 * f6;
wolfSSL 11:cee25a834751 1117 int32_t f7_2 = 2 * f7;
wolfSSL 11:cee25a834751 1118 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 1119 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 1120 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 1121 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 1122 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
wolfSSL 11:cee25a834751 1123 int64_t f0f0 = f0 * (int64_t) f0;
wolfSSL 11:cee25a834751 1124 int64_t f0f1_2 = f0_2 * (int64_t) f1;
wolfSSL 11:cee25a834751 1125 int64_t f0f2_2 = f0_2 * (int64_t) f2;
wolfSSL 11:cee25a834751 1126 int64_t f0f3_2 = f0_2 * (int64_t) f3;
wolfSSL 11:cee25a834751 1127 int64_t f0f4_2 = f0_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 1128 int64_t f0f5_2 = f0_2 * (int64_t) f5;
wolfSSL 11:cee25a834751 1129 int64_t f0f6_2 = f0_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 1130 int64_t f0f7_2 = f0_2 * (int64_t) f7;
wolfSSL 11:cee25a834751 1131 int64_t f0f8_2 = f0_2 * (int64_t) f8;
wolfSSL 11:cee25a834751 1132 int64_t f0f9_2 = f0_2 * (int64_t) f9;
wolfSSL 11:cee25a834751 1133 int64_t f1f1_2 = f1_2 * (int64_t) f1;
wolfSSL 11:cee25a834751 1134 int64_t f1f2_2 = f1_2 * (int64_t) f2;
wolfSSL 11:cee25a834751 1135 int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
wolfSSL 11:cee25a834751 1136 int64_t f1f4_2 = f1_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 1137 int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
wolfSSL 11:cee25a834751 1138 int64_t f1f6_2 = f1_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 1139 int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
wolfSSL 11:cee25a834751 1140 int64_t f1f8_2 = f1_2 * (int64_t) f8;
wolfSSL 11:cee25a834751 1141 int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1142 int64_t f2f2 = f2 * (int64_t) f2;
wolfSSL 11:cee25a834751 1143 int64_t f2f3_2 = f2_2 * (int64_t) f3;
wolfSSL 11:cee25a834751 1144 int64_t f2f4_2 = f2_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 1145 int64_t f2f5_2 = f2_2 * (int64_t) f5;
wolfSSL 11:cee25a834751 1146 int64_t f2f6_2 = f2_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 1147 int64_t f2f7_2 = f2_2 * (int64_t) f7;
wolfSSL 11:cee25a834751 1148 int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 1149 int64_t f2f9_38 = f2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1150 int64_t f3f3_2 = f3_2 * (int64_t) f3;
wolfSSL 11:cee25a834751 1151 int64_t f3f4_2 = f3_2 * (int64_t) f4;
wolfSSL 11:cee25a834751 1152 int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
wolfSSL 11:cee25a834751 1153 int64_t f3f6_2 = f3_2 * (int64_t) f6;
wolfSSL 11:cee25a834751 1154 int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 1155 int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 1156 int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1157 int64_t f4f4 = f4 * (int64_t) f4;
wolfSSL 11:cee25a834751 1158 int64_t f4f5_2 = f4_2 * (int64_t) f5;
wolfSSL 11:cee25a834751 1159 int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
wolfSSL 11:cee25a834751 1160 int64_t f4f7_38 = f4 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 1161 int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 1162 int64_t f4f9_38 = f4 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1163 int64_t f5f5_38 = f5 * (int64_t) f5_38;
wolfSSL 11:cee25a834751 1164 int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
wolfSSL 11:cee25a834751 1165 int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 1166 int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 1167 int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1168 int64_t f6f6_19 = f6 * (int64_t) f6_19;
wolfSSL 11:cee25a834751 1169 int64_t f6f7_38 = f6 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 1170 int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 1171 int64_t f6f9_38 = f6 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1172 int64_t f7f7_38 = f7 * (int64_t) f7_38;
wolfSSL 11:cee25a834751 1173 int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 1174 int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1175 int64_t f8f8_19 = f8 * (int64_t) f8_19;
wolfSSL 11:cee25a834751 1176 int64_t f8f9_38 = f8 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1177 int64_t f9f9_38 = f9 * (int64_t) f9_38;
wolfSSL 11:cee25a834751 1178 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
wolfSSL 11:cee25a834751 1179 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
wolfSSL 11:cee25a834751 1180 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
wolfSSL 11:cee25a834751 1181 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
wolfSSL 11:cee25a834751 1182 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
wolfSSL 11:cee25a834751 1183 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
wolfSSL 11:cee25a834751 1184 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
wolfSSL 11:cee25a834751 1185 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
wolfSSL 11:cee25a834751 1186 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
wolfSSL 11:cee25a834751 1187 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
wolfSSL 11:cee25a834751 1188 int64_t carry0;
wolfSSL 11:cee25a834751 1189 int64_t carry1;
wolfSSL 11:cee25a834751 1190 int64_t carry2;
wolfSSL 11:cee25a834751 1191 int64_t carry3;
wolfSSL 11:cee25a834751 1192 int64_t carry4;
wolfSSL 11:cee25a834751 1193 int64_t carry5;
wolfSSL 11:cee25a834751 1194 int64_t carry6;
wolfSSL 11:cee25a834751 1195 int64_t carry7;
wolfSSL 11:cee25a834751 1196 int64_t carry8;
wolfSSL 11:cee25a834751 1197 int64_t carry9;
wolfSSL 11:cee25a834751 1198
wolfSSL 11:cee25a834751 1199 h0 += h0;
wolfSSL 11:cee25a834751 1200 h1 += h1;
wolfSSL 11:cee25a834751 1201 h2 += h2;
wolfSSL 11:cee25a834751 1202 h3 += h3;
wolfSSL 11:cee25a834751 1203 h4 += h4;
wolfSSL 11:cee25a834751 1204 h5 += h5;
wolfSSL 11:cee25a834751 1205 h6 += h6;
wolfSSL 11:cee25a834751 1206 h7 += h7;
wolfSSL 11:cee25a834751 1207 h8 += h8;
wolfSSL 11:cee25a834751 1208 h9 += h9;
wolfSSL 11:cee25a834751 1209
wolfSSL 11:cee25a834751 1210 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 1211 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 1212
wolfSSL 11:cee25a834751 1213 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
wolfSSL 11:cee25a834751 1214 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
wolfSSL 11:cee25a834751 1215
wolfSSL 11:cee25a834751 1216 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
wolfSSL 11:cee25a834751 1217 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
wolfSSL 11:cee25a834751 1218
wolfSSL 11:cee25a834751 1219 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
wolfSSL 11:cee25a834751 1220 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
wolfSSL 11:cee25a834751 1221
wolfSSL 11:cee25a834751 1222 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
wolfSSL 11:cee25a834751 1223 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
wolfSSL 11:cee25a834751 1224
wolfSSL 11:cee25a834751 1225 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
wolfSSL 11:cee25a834751 1226
wolfSSL 11:cee25a834751 1227 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
wolfSSL 11:cee25a834751 1228
wolfSSL 11:cee25a834751 1229 h[0] = (int32_t)h0;
wolfSSL 11:cee25a834751 1230 h[1] = (int32_t)h1;
wolfSSL 11:cee25a834751 1231 h[2] = (int32_t)h2;
wolfSSL 11:cee25a834751 1232 h[3] = (int32_t)h3;
wolfSSL 11:cee25a834751 1233 h[4] = (int32_t)h4;
wolfSSL 11:cee25a834751 1234 h[5] = (int32_t)h5;
wolfSSL 11:cee25a834751 1235 h[6] = (int32_t)h6;
wolfSSL 11:cee25a834751 1236 h[7] = (int32_t)h7;
wolfSSL 11:cee25a834751 1237 h[8] = (int32_t)h8;
wolfSSL 11:cee25a834751 1238 h[9] = (int32_t)h9;
wolfSSL 11:cee25a834751 1239 }
wolfSSL 11:cee25a834751 1240
wolfSSL 11:cee25a834751 1241
wolfSSL 11:cee25a834751 1242 void fe_pow22523(fe out,const fe z)
wolfSSL 11:cee25a834751 1243 {
wolfSSL 11:cee25a834751 1244 fe t0;
wolfSSL 11:cee25a834751 1245 fe t1;
wolfSSL 11:cee25a834751 1246 fe t2;
wolfSSL 11:cee25a834751 1247 int i;
wolfSSL 11:cee25a834751 1248
wolfSSL 11:cee25a834751 1249 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
wolfSSL 11:cee25a834751 1250 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 1251 fe_mul(t1,z,t1);
wolfSSL 11:cee25a834751 1252 fe_mul(t0,t0,t1);
wolfSSL 11:cee25a834751 1253 fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0);
wolfSSL 11:cee25a834751 1254 fe_mul(t0,t1,t0);
wolfSSL 11:cee25a834751 1255 fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 1256 fe_mul(t0,t1,t0);
wolfSSL 11:cee25a834751 1257 fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 1258 fe_mul(t1,t1,t0);
wolfSSL 11:cee25a834751 1259 fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 1260 fe_mul(t1,t2,t1);
wolfSSL 11:cee25a834751 1261 fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 1262 fe_mul(t0,t1,t0);
wolfSSL 11:cee25a834751 1263 fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 1264 fe_mul(t1,t1,t0);
wolfSSL 11:cee25a834751 1265 fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2);
wolfSSL 11:cee25a834751 1266 fe_mul(t1,t2,t1);
wolfSSL 11:cee25a834751 1267 fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1);
wolfSSL 11:cee25a834751 1268 fe_mul(t0,t1,t0);
wolfSSL 11:cee25a834751 1269 fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0);
wolfSSL 11:cee25a834751 1270 fe_mul(out,t0,z);
wolfSSL 11:cee25a834751 1271
wolfSSL 11:cee25a834751 1272 return;
wolfSSL 11:cee25a834751 1273 }
wolfSSL 11:cee25a834751 1274
wolfSSL 11:cee25a834751 1275
wolfSSL 11:cee25a834751 1276 /*
wolfSSL 11:cee25a834751 1277 h = -f
wolfSSL 11:cee25a834751 1278
wolfSSL 11:cee25a834751 1279 Preconditions:
wolfSSL 11:cee25a834751 1280 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
wolfSSL 11:cee25a834751 1281
wolfSSL 11:cee25a834751 1282 Postconditions:
wolfSSL 11:cee25a834751 1283 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
wolfSSL 11:cee25a834751 1284 */
wolfSSL 11:cee25a834751 1285
wolfSSL 11:cee25a834751 1286 void fe_neg(fe h,const fe f)
wolfSSL 11:cee25a834751 1287 {
wolfSSL 11:cee25a834751 1288 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 1289 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 1290 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 1291 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 1292 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 1293 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 1294 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 1295 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 1296 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 1297 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 1298 int32_t h0 = -f0;
wolfSSL 11:cee25a834751 1299 int32_t h1 = -f1;
wolfSSL 11:cee25a834751 1300 int32_t h2 = -f2;
wolfSSL 11:cee25a834751 1301 int32_t h3 = -f3;
wolfSSL 11:cee25a834751 1302 int32_t h4 = -f4;
wolfSSL 11:cee25a834751 1303 int32_t h5 = -f5;
wolfSSL 11:cee25a834751 1304 int32_t h6 = -f6;
wolfSSL 11:cee25a834751 1305 int32_t h7 = -f7;
wolfSSL 11:cee25a834751 1306 int32_t h8 = -f8;
wolfSSL 11:cee25a834751 1307 int32_t h9 = -f9;
wolfSSL 11:cee25a834751 1308 h[0] = h0;
wolfSSL 11:cee25a834751 1309 h[1] = h1;
wolfSSL 11:cee25a834751 1310 h[2] = h2;
wolfSSL 11:cee25a834751 1311 h[3] = h3;
wolfSSL 11:cee25a834751 1312 h[4] = h4;
wolfSSL 11:cee25a834751 1313 h[5] = h5;
wolfSSL 11:cee25a834751 1314 h[6] = h6;
wolfSSL 11:cee25a834751 1315 h[7] = h7;
wolfSSL 11:cee25a834751 1316 h[8] = h8;
wolfSSL 11:cee25a834751 1317 h[9] = h9;
wolfSSL 11:cee25a834751 1318 }
wolfSSL 11:cee25a834751 1319
wolfSSL 11:cee25a834751 1320
wolfSSL 11:cee25a834751 1321 /*
wolfSSL 11:cee25a834751 1322 Preconditions:
wolfSSL 11:cee25a834751 1323 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
wolfSSL 11:cee25a834751 1324 */
wolfSSL 11:cee25a834751 1325
wolfSSL 11:cee25a834751 1326 static const unsigned char zero[32] = {0};
wolfSSL 11:cee25a834751 1327
wolfSSL 11:cee25a834751 1328 int fe_isnonzero(const fe f)
wolfSSL 11:cee25a834751 1329 {
wolfSSL 11:cee25a834751 1330 unsigned char s[32];
wolfSSL 11:cee25a834751 1331 fe_tobytes(s,f);
wolfSSL 11:cee25a834751 1332 return ConstantCompare(s,zero,32);
wolfSSL 11:cee25a834751 1333 }
wolfSSL 11:cee25a834751 1334
wolfSSL 11:cee25a834751 1335
wolfSSL 11:cee25a834751 1336 /*
wolfSSL 11:cee25a834751 1337 return 1 if f is in {1,3,5,...,q-2}
wolfSSL 11:cee25a834751 1338 return 0 if f is in {0,2,4,...,q-1}
wolfSSL 11:cee25a834751 1339
wolfSSL 11:cee25a834751 1340 Preconditions:
wolfSSL 11:cee25a834751 1341 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
wolfSSL 11:cee25a834751 1342 */
wolfSSL 11:cee25a834751 1343
wolfSSL 11:cee25a834751 1344 int fe_isnegative(const fe f)
wolfSSL 11:cee25a834751 1345 {
wolfSSL 11:cee25a834751 1346 unsigned char s[32];
wolfSSL 11:cee25a834751 1347 fe_tobytes(s,f);
wolfSSL 11:cee25a834751 1348 return s[0] & 1;
wolfSSL 11:cee25a834751 1349 }
wolfSSL 11:cee25a834751 1350
wolfSSL 11:cee25a834751 1351
wolfSSL 11:cee25a834751 1352 /*
wolfSSL 11:cee25a834751 1353 Replace (f,g) with (g,g) if b == 1;
wolfSSL 11:cee25a834751 1354 replace (f,g) with (f,g) if b == 0.
wolfSSL 11:cee25a834751 1355
wolfSSL 11:cee25a834751 1356 Preconditions: b in {0,1}.
wolfSSL 11:cee25a834751 1357 */
wolfSSL 11:cee25a834751 1358
wolfSSL 11:cee25a834751 1359 void fe_cmov(fe f, const fe g, int b)
wolfSSL 11:cee25a834751 1360 {
wolfSSL 11:cee25a834751 1361 int32_t f0 = f[0];
wolfSSL 11:cee25a834751 1362 int32_t f1 = f[1];
wolfSSL 11:cee25a834751 1363 int32_t f2 = f[2];
wolfSSL 11:cee25a834751 1364 int32_t f3 = f[3];
wolfSSL 11:cee25a834751 1365 int32_t f4 = f[4];
wolfSSL 11:cee25a834751 1366 int32_t f5 = f[5];
wolfSSL 11:cee25a834751 1367 int32_t f6 = f[6];
wolfSSL 11:cee25a834751 1368 int32_t f7 = f[7];
wolfSSL 11:cee25a834751 1369 int32_t f8 = f[8];
wolfSSL 11:cee25a834751 1370 int32_t f9 = f[9];
wolfSSL 11:cee25a834751 1371 int32_t g0 = g[0];
wolfSSL 11:cee25a834751 1372 int32_t g1 = g[1];
wolfSSL 11:cee25a834751 1373 int32_t g2 = g[2];
wolfSSL 11:cee25a834751 1374 int32_t g3 = g[3];
wolfSSL 11:cee25a834751 1375 int32_t g4 = g[4];
wolfSSL 11:cee25a834751 1376 int32_t g5 = g[5];
wolfSSL 11:cee25a834751 1377 int32_t g6 = g[6];
wolfSSL 11:cee25a834751 1378 int32_t g7 = g[7];
wolfSSL 11:cee25a834751 1379 int32_t g8 = g[8];
wolfSSL 11:cee25a834751 1380 int32_t g9 = g[9];
wolfSSL 11:cee25a834751 1381 int32_t x0 = f0 ^ g0;
wolfSSL 11:cee25a834751 1382 int32_t x1 = f1 ^ g1;
wolfSSL 11:cee25a834751 1383 int32_t x2 = f2 ^ g2;
wolfSSL 11:cee25a834751 1384 int32_t x3 = f3 ^ g3;
wolfSSL 11:cee25a834751 1385 int32_t x4 = f4 ^ g4;
wolfSSL 11:cee25a834751 1386 int32_t x5 = f5 ^ g5;
wolfSSL 11:cee25a834751 1387 int32_t x6 = f6 ^ g6;
wolfSSL 11:cee25a834751 1388 int32_t x7 = f7 ^ g7;
wolfSSL 11:cee25a834751 1389 int32_t x8 = f8 ^ g8;
wolfSSL 11:cee25a834751 1390 int32_t x9 = f9 ^ g9;
wolfSSL 11:cee25a834751 1391 b = -b;
wolfSSL 11:cee25a834751 1392 x0 &= b;
wolfSSL 11:cee25a834751 1393 x1 &= b;
wolfSSL 11:cee25a834751 1394 x2 &= b;
wolfSSL 11:cee25a834751 1395 x3 &= b;
wolfSSL 11:cee25a834751 1396 x4 &= b;
wolfSSL 11:cee25a834751 1397 x5 &= b;
wolfSSL 11:cee25a834751 1398 x6 &= b;
wolfSSL 11:cee25a834751 1399 x7 &= b;
wolfSSL 11:cee25a834751 1400 x8 &= b;
wolfSSL 11:cee25a834751 1401 x9 &= b;
wolfSSL 11:cee25a834751 1402 f[0] = f0 ^ x0;
wolfSSL 11:cee25a834751 1403 f[1] = f1 ^ x1;
wolfSSL 11:cee25a834751 1404 f[2] = f2 ^ x2;
wolfSSL 11:cee25a834751 1405 f[3] = f3 ^ x3;
wolfSSL 11:cee25a834751 1406 f[4] = f4 ^ x4;
wolfSSL 11:cee25a834751 1407 f[5] = f5 ^ x5;
wolfSSL 11:cee25a834751 1408 f[6] = f6 ^ x6;
wolfSSL 11:cee25a834751 1409 f[7] = f7 ^ x7;
wolfSSL 11:cee25a834751 1410 f[8] = f8 ^ x8;
wolfSSL 11:cee25a834751 1411 f[9] = f9 ^ x9;
wolfSSL 11:cee25a834751 1412 }
wolfSSL 11:cee25a834751 1413 #endif
wolfSSL 11:cee25a834751 1414 #endif /* HAVE ED25519 or CURVE25519 */
wolfSSL 11:cee25a834751 1415 #endif /* not defined CURVED25519_SMALL */
wolfSSL 11:cee25a834751 1416
wolfSSL 11:cee25a834751 1417