Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of wolfSSL by
fe_operations.c
00001 /* fe_operations.c 00002 * 00003 * Copyright (C) 2006-2016 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */ 00024 00025 #ifdef HAVE_CONFIG_H 00026 #include <config.h> 00027 #endif 00028 00029 #include <wolfssl/wolfcrypt/settings.h> 00030 00031 #ifndef CURVED25519_SMALL /* run when not defined to use small memory math */ 00032 #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) 00033 00034 #include <wolfssl/wolfcrypt/fe_operations.h> 00035 #include <stdint.h> 00036 00037 #ifdef NO_INLINE 00038 #include <wolfssl/wolfcrypt/misc.h> 00039 #else 00040 #define WOLFSSL_MISC_INCLUDED 00041 #include <wolfcrypt/src/misc.c> 00042 #endif 00043 00044 #ifdef HAVE___UINT128_T 00045 #include "fe_x25519_128.i" 00046 #else 00047 /* 00048 fe means field element. 00049 Here the field is \Z/(2^255-19). 00050 An element t, entries t[0]...t[9], represents the integer 00051 t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 00052 Bounds on each t[i] vary depending on context. 00053 */ 00054 00055 uint64_t load_3(const unsigned char *in) 00056 { 00057 uint64_t result; 00058 result = (uint64_t) in[0]; 00059 result |= ((uint64_t) in[1]) << 8; 00060 result |= ((uint64_t) in[2]) << 16; 00061 return result; 00062 } 00063 00064 00065 uint64_t load_4(const unsigned char *in) 00066 { 00067 uint64_t result; 00068 result = (uint64_t) in[0]; 00069 result |= ((uint64_t) in[1]) << 8; 00070 result |= ((uint64_t) in[2]) << 16; 00071 result |= ((uint64_t) in[3]) << 24; 00072 return result; 00073 } 00074 00075 00076 /* 00077 h = 1 00078 */ 00079 00080 void fe_1(fe h) 00081 { 00082 h[0] = 1; 00083 h[1] = 0; 00084 h[2] = 0; 00085 h[3] = 0; 00086 h[4] = 0; 00087 h[5] = 0; 00088 h[6] = 0; 00089 h[7] = 0; 00090 h[8] = 0; 00091 h[9] = 0; 00092 } 00093 00094 00095 /* 00096 h = 0 00097 */ 00098 00099 void fe_0(fe h) 00100 { 00101 h[0] = 0; 00102 h[1] = 0; 00103 h[2] = 0; 00104 h[3] = 0; 00105 h[4] = 0; 00106 h[5] = 0; 00107 h[6] = 0; 00108 h[7] = 0; 00109 h[8] = 0; 00110 h[9] = 0; 00111 } 00112 00113 #ifndef FREESCALE_LTC_ECC 00114 int curve25519(byte* q, byte* n, byte* p) 00115 { 00116 #if 0 00117 unsigned char e[32]; 00118 #endif 00119 fe x1; 00120 fe x2; 00121 fe z2; 00122 fe x3; 00123 fe z3; 00124 fe tmp0; 00125 fe tmp1; 00126 int pos; 00127 unsigned int swap; 00128 unsigned int b; 00129 00130 /* Clamp already done during key generation and import */ 00131 #if 0 00132 { 00133 unsigned int i; 00134 for (i = 0;i < 32;++i) e[i] = n[i]; 00135 e[0] &= 248; 00136 e[31] &= 127; 00137 e[31] |= 64; 00138 } 00139 #endif 00140 00141 fe_frombytes(x1,p); 00142 fe_1(x2); 00143 fe_0(z2); 00144 fe_copy(x3,x1); 00145 fe_1(z3); 00146 00147 swap = 0; 00148 for (pos = 254;pos >= 0;--pos) { 00149 #if 0 00150 b = e[pos / 8] >> (pos & 7); 00151 #else 00152 b = n[pos / 8] >> (pos & 7); 00153 #endif 00154 b &= 1; 00155 swap ^= b; 00156 fe_cswap(x2,x3,swap); 00157 fe_cswap(z2,z3,swap); 00158 swap = b; 00159 00160 /* montgomery */ 00161 fe_sub(tmp0,x3,z3); 00162 fe_sub(tmp1,x2,z2); 00163 fe_add(x2,x2,z2); 00164 fe_add(z2,x3,z3); 00165 fe_mul(z3,tmp0,x2); 00166 fe_mul(z2,z2,tmp1); 00167 fe_sq(tmp0,tmp1); 00168 fe_sq(tmp1,x2); 00169 fe_add(x3,z3,z2); 00170 fe_sub(z2,z3,z2); 00171 fe_mul(x2,tmp1,tmp0); 00172 fe_sub(tmp1,tmp1,tmp0); 00173 fe_sq(z2,z2); 00174 fe_mul121666(z3,tmp1); 00175 fe_sq(x3,x3); 00176 fe_add(tmp0,tmp0,z3); 00177 fe_mul(z3,x1,z2); 00178 fe_mul(z2,tmp1,tmp0); 00179 } 00180 fe_cswap(x2,x3,swap); 00181 fe_cswap(z2,z3,swap); 00182 00183 fe_invert(z2,z2); 00184 fe_mul(x2,x2,z2); 00185 fe_tobytes(q,x2); 00186 00187 return 0; 00188 } 00189 #endif /* !FREESCALE_LTC_ECC */ 00190 00191 /* 00192 h = f * f 00193 Can overlap h with f. 00194 00195 Preconditions: 00196 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00197 00198 Postconditions: 00199 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 00200 */ 00201 00202 /* 00203 See fe_mul.c for discussion of implementation strategy. 00204 */ 00205 00206 void fe_sq(fe h,const fe f) 00207 { 00208 int32_t f0 = f[0]; 00209 int32_t f1 = f[1]; 00210 int32_t f2 = f[2]; 00211 int32_t f3 = f[3]; 00212 int32_t f4 = f[4]; 00213 int32_t f5 = f[5]; 00214 int32_t f6 = f[6]; 00215 int32_t f7 = f[7]; 00216 int32_t f8 = f[8]; 00217 int32_t f9 = f[9]; 00218 int32_t f0_2 = 2 * f0; 00219 int32_t f1_2 = 2 * f1; 00220 int32_t f2_2 = 2 * f2; 00221 int32_t f3_2 = 2 * f3; 00222 int32_t f4_2 = 2 * f4; 00223 int32_t f5_2 = 2 * f5; 00224 int32_t f6_2 = 2 * f6; 00225 int32_t f7_2 = 2 * f7; 00226 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ 00227 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ 00228 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ 00229 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ 00230 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ 00231 int64_t f0f0 = f0 * (int64_t) f0; 00232 int64_t f0f1_2 = f0_2 * (int64_t) f1; 00233 int64_t f0f2_2 = f0_2 * (int64_t) f2; 00234 int64_t f0f3_2 = f0_2 * (int64_t) f3; 00235 int64_t f0f4_2 = f0_2 * (int64_t) f4; 00236 int64_t f0f5_2 = f0_2 * (int64_t) f5; 00237 int64_t f0f6_2 = f0_2 * (int64_t) f6; 00238 int64_t f0f7_2 = f0_2 * (int64_t) f7; 00239 int64_t f0f8_2 = f0_2 * (int64_t) f8; 00240 int64_t f0f9_2 = f0_2 * (int64_t) f9; 00241 int64_t f1f1_2 = f1_2 * (int64_t) f1; 00242 int64_t f1f2_2 = f1_2 * (int64_t) f2; 00243 int64_t f1f3_4 = f1_2 * (int64_t) f3_2; 00244 int64_t f1f4_2 = f1_2 * (int64_t) f4; 00245 int64_t f1f5_4 = f1_2 * (int64_t) f5_2; 00246 int64_t f1f6_2 = f1_2 * (int64_t) f6; 00247 int64_t f1f7_4 = f1_2 * (int64_t) f7_2; 00248 int64_t f1f8_2 = f1_2 * (int64_t) f8; 00249 int64_t f1f9_76 = f1_2 * (int64_t) f9_38; 00250 int64_t f2f2 = f2 * (int64_t) f2; 00251 int64_t f2f3_2 = f2_2 * (int64_t) f3; 00252 int64_t f2f4_2 = f2_2 * (int64_t) f4; 00253 int64_t f2f5_2 = f2_2 * (int64_t) f5; 00254 int64_t f2f6_2 = f2_2 * (int64_t) f6; 00255 int64_t f2f7_2 = f2_2 * (int64_t) f7; 00256 int64_t f2f8_38 = f2_2 * (int64_t) f8_19; 00257 int64_t f2f9_38 = f2 * (int64_t) f9_38; 00258 int64_t f3f3_2 = f3_2 * (int64_t) f3; 00259 int64_t f3f4_2 = f3_2 * (int64_t) f4; 00260 int64_t f3f5_4 = f3_2 * (int64_t) f5_2; 00261 int64_t f3f6_2 = f3_2 * (int64_t) f6; 00262 int64_t f3f7_76 = f3_2 * (int64_t) f7_38; 00263 int64_t f3f8_38 = f3_2 * (int64_t) f8_19; 00264 int64_t f3f9_76 = f3_2 * (int64_t) f9_38; 00265 int64_t f4f4 = f4 * (int64_t) f4; 00266 int64_t f4f5_2 = f4_2 * (int64_t) f5; 00267 int64_t f4f6_38 = f4_2 * (int64_t) f6_19; 00268 int64_t f4f7_38 = f4 * (int64_t) f7_38; 00269 int64_t f4f8_38 = f4_2 * (int64_t) f8_19; 00270 int64_t f4f9_38 = f4 * (int64_t) f9_38; 00271 int64_t f5f5_38 = f5 * (int64_t) f5_38; 00272 int64_t f5f6_38 = f5_2 * (int64_t) f6_19; 00273 int64_t f5f7_76 = f5_2 * (int64_t) f7_38; 00274 int64_t f5f8_38 = f5_2 * (int64_t) f8_19; 00275 int64_t f5f9_76 = f5_2 * (int64_t) f9_38; 00276 int64_t f6f6_19 = f6 * (int64_t) f6_19; 00277 int64_t f6f7_38 = f6 * (int64_t) f7_38; 00278 int64_t f6f8_38 = f6_2 * (int64_t) f8_19; 00279 int64_t f6f9_38 = f6 * (int64_t) f9_38; 00280 int64_t f7f7_38 = f7 * (int64_t) f7_38; 00281 int64_t f7f8_38 = f7_2 * (int64_t) f8_19; 00282 int64_t f7f9_76 = f7_2 * (int64_t) f9_38; 00283 int64_t f8f8_19 = f8 * (int64_t) f8_19; 00284 int64_t f8f9_38 = f8 * (int64_t) f9_38; 00285 int64_t f9f9_38 = f9 * (int64_t) f9_38; 00286 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 00287 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 00288 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 00289 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 00290 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 00291 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 00292 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 00293 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 00294 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 00295 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 00296 int64_t carry0; 00297 int64_t carry1; 00298 int64_t carry2; 00299 int64_t carry3; 00300 int64_t carry4; 00301 int64_t carry5; 00302 int64_t carry6; 00303 int64_t carry7; 00304 int64_t carry8; 00305 int64_t carry9; 00306 00307 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00308 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00309 00310 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00311 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00312 00313 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00314 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00315 00316 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00317 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00318 00319 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00320 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00321 00322 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00323 00324 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00325 00326 h[0] = (int32_t)h0; 00327 h[1] = (int32_t)h1; 00328 h[2] = (int32_t)h2; 00329 h[3] = (int32_t)h3; 00330 h[4] = (int32_t)h4; 00331 h[5] = (int32_t)h5; 00332 h[6] = (int32_t)h6; 00333 h[7] = (int32_t)h7; 00334 h[8] = (int32_t)h8; 00335 h[9] = (int32_t)h9; 00336 } 00337 00338 00339 /* 00340 h = f + g 00341 Can overlap h with f or g. 00342 00343 Preconditions: 00344 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00345 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00346 00347 Postconditions: 00348 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00349 */ 00350 00351 void fe_add(fe h,const fe f,const fe g) 00352 { 00353 int32_t f0 = f[0]; 00354 int32_t f1 = f[1]; 00355 int32_t f2 = f[2]; 00356 int32_t f3 = f[3]; 00357 int32_t f4 = f[4]; 00358 int32_t f5 = f[5]; 00359 int32_t f6 = f[6]; 00360 int32_t f7 = f[7]; 00361 int32_t f8 = f[8]; 00362 int32_t f9 = f[9]; 00363 int32_t g0 = g[0]; 00364 int32_t g1 = g[1]; 00365 int32_t g2 = g[2]; 00366 int32_t g3 = g[3]; 00367 int32_t g4 = g[4]; 00368 int32_t g5 = g[5]; 00369 int32_t g6 = g[6]; 00370 int32_t g7 = g[7]; 00371 int32_t g8 = g[8]; 00372 int32_t g9 = g[9]; 00373 int32_t h0 = f0 + g0; 00374 int32_t h1 = f1 + g1; 00375 int32_t h2 = f2 + g2; 00376 int32_t h3 = f3 + g3; 00377 int32_t h4 = f4 + g4; 00378 int32_t h5 = f5 + g5; 00379 int32_t h6 = f6 + g6; 00380 int32_t h7 = f7 + g7; 00381 int32_t h8 = f8 + g8; 00382 int32_t h9 = f9 + g9; 00383 h[0] = h0; 00384 h[1] = h1; 00385 h[2] = h2; 00386 h[3] = h3; 00387 h[4] = h4; 00388 h[5] = h5; 00389 h[6] = h6; 00390 h[7] = h7; 00391 h[8] = h8; 00392 h[9] = h9; 00393 } 00394 00395 00396 /* 00397 Preconditions: 00398 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00399 00400 Write p=2^255-19; q=floor(h/p). 00401 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). 00402 00403 Proof: 00404 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. 00405 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. 00406 00407 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). 00408 Then 0<y<1. 00409 00410 Write r=h-pq. 00411 Have 0<=r<=p-1=2^255-20. 00412 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. 00413 00414 Write x=r+19(2^-255)r+y. 00415 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. 00416 00417 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) 00418 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. 00419 */ 00420 00421 void fe_tobytes(unsigned char *s,const fe h) 00422 { 00423 int32_t h0 = h[0]; 00424 int32_t h1 = h[1]; 00425 int32_t h2 = h[2]; 00426 int32_t h3 = h[3]; 00427 int32_t h4 = h[4]; 00428 int32_t h5 = h[5]; 00429 int32_t h6 = h[6]; 00430 int32_t h7 = h[7]; 00431 int32_t h8 = h[8]; 00432 int32_t h9 = h[9]; 00433 int32_t q; 00434 int32_t carry0; 00435 int32_t carry1; 00436 int32_t carry2; 00437 int32_t carry3; 00438 int32_t carry4; 00439 int32_t carry5; 00440 int32_t carry6; 00441 int32_t carry7; 00442 int32_t carry8; 00443 int32_t carry9; 00444 00445 q = (19 * h9 + (((int32_t) 1) << 24)) >> 25; 00446 q = (h0 + q) >> 26; 00447 q = (h1 + q) >> 25; 00448 q = (h2 + q) >> 26; 00449 q = (h3 + q) >> 25; 00450 q = (h4 + q) >> 26; 00451 q = (h5 + q) >> 25; 00452 q = (h6 + q) >> 26; 00453 q = (h7 + q) >> 25; 00454 q = (h8 + q) >> 26; 00455 q = (h9 + q) >> 25; 00456 00457 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ 00458 h0 += 19 * q; 00459 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ 00460 00461 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; 00462 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; 00463 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; 00464 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; 00465 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; 00466 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; 00467 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; 00468 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; 00469 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; 00470 carry9 = h9 >> 25; h9 -= carry9 << 25; 00471 /* h10 = carry9 */ 00472 00473 /* 00474 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. 00475 Have h0+...+2^230 h9 between 0 and 2^255-1; 00476 evidently 2^255 h10-2^255 q = 0. 00477 Goal: Output h0+...+2^230 h9. 00478 */ 00479 00480 s[0] = h0 >> 0; 00481 s[1] = h0 >> 8; 00482 s[2] = h0 >> 16; 00483 s[3] = (h0 >> 24) | (h1 << 2); 00484 s[4] = h1 >> 6; 00485 s[5] = h1 >> 14; 00486 s[6] = (h1 >> 22) | (h2 << 3); 00487 s[7] = h2 >> 5; 00488 s[8] = h2 >> 13; 00489 s[9] = (h2 >> 21) | (h3 << 5); 00490 s[10] = h3 >> 3; 00491 s[11] = h3 >> 11; 00492 s[12] = (h3 >> 19) | (h4 << 6); 00493 s[13] = h4 >> 2; 00494 s[14] = h4 >> 10; 00495 s[15] = h4 >> 18; 00496 s[16] = h5 >> 0; 00497 s[17] = h5 >> 8; 00498 s[18] = h5 >> 16; 00499 s[19] = (h5 >> 24) | (h6 << 1); 00500 s[20] = h6 >> 7; 00501 s[21] = h6 >> 15; 00502 s[22] = (h6 >> 23) | (h7 << 3); 00503 s[23] = h7 >> 5; 00504 s[24] = h7 >> 13; 00505 s[25] = (h7 >> 21) | (h8 << 4); 00506 s[26] = h8 >> 4; 00507 s[27] = h8 >> 12; 00508 s[28] = (h8 >> 20) | (h9 << 6); 00509 s[29] = h9 >> 2; 00510 s[30] = h9 >> 10; 00511 s[31] = h9 >> 18; 00512 } 00513 00514 00515 /* 00516 h = f - g 00517 Can overlap h with f or g. 00518 00519 Preconditions: 00520 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00521 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00522 00523 Postconditions: 00524 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00525 */ 00526 00527 void fe_sub(fe h,const fe f,const fe g) 00528 { 00529 int32_t f0 = f[0]; 00530 int32_t f1 = f[1]; 00531 int32_t f2 = f[2]; 00532 int32_t f3 = f[3]; 00533 int32_t f4 = f[4]; 00534 int32_t f5 = f[5]; 00535 int32_t f6 = f[6]; 00536 int32_t f7 = f[7]; 00537 int32_t f8 = f[8]; 00538 int32_t f9 = f[9]; 00539 int32_t g0 = g[0]; 00540 int32_t g1 = g[1]; 00541 int32_t g2 = g[2]; 00542 int32_t g3 = g[3]; 00543 int32_t g4 = g[4]; 00544 int32_t g5 = g[5]; 00545 int32_t g6 = g[6]; 00546 int32_t g7 = g[7]; 00547 int32_t g8 = g[8]; 00548 int32_t g9 = g[9]; 00549 int32_t h0 = f0 - g0; 00550 int32_t h1 = f1 - g1; 00551 int32_t h2 = f2 - g2; 00552 int32_t h3 = f3 - g3; 00553 int32_t h4 = f4 - g4; 00554 int32_t h5 = f5 - g5; 00555 int32_t h6 = f6 - g6; 00556 int32_t h7 = f7 - g7; 00557 int32_t h8 = f8 - g8; 00558 int32_t h9 = f9 - g9; 00559 h[0] = h0; 00560 h[1] = h1; 00561 h[2] = h2; 00562 h[3] = h3; 00563 h[4] = h4; 00564 h[5] = h5; 00565 h[6] = h6; 00566 h[7] = h7; 00567 h[8] = h8; 00568 h[9] = h9; 00569 } 00570 00571 00572 /* 00573 Ignores top bit of h. 00574 */ 00575 00576 void fe_frombytes(fe h,const unsigned char *s) 00577 { 00578 int64_t h0 = load_4(s); 00579 int64_t h1 = load_3(s + 4) << 6; 00580 int64_t h2 = load_3(s + 7) << 5; 00581 int64_t h3 = load_3(s + 10) << 3; 00582 int64_t h4 = load_3(s + 13) << 2; 00583 int64_t h5 = load_4(s + 16); 00584 int64_t h6 = load_3(s + 20) << 7; 00585 int64_t h7 = load_3(s + 23) << 5; 00586 int64_t h8 = load_3(s + 26) << 4; 00587 int64_t h9 = (load_3(s + 29) & 8388607) << 2; 00588 int64_t carry0; 00589 int64_t carry1; 00590 int64_t carry2; 00591 int64_t carry3; 00592 int64_t carry4; 00593 int64_t carry5; 00594 int64_t carry6; 00595 int64_t carry7; 00596 int64_t carry8; 00597 int64_t carry9; 00598 00599 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00600 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00601 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00602 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00603 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00604 00605 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00606 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00607 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00608 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00609 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00610 00611 h[0] = (int32_t)h0; 00612 h[1] = (int32_t)h1; 00613 h[2] = (int32_t)h2; 00614 h[3] = (int32_t)h3; 00615 h[4] = (int32_t)h4; 00616 h[5] = (int32_t)h5; 00617 h[6] = (int32_t)h6; 00618 h[7] = (int32_t)h7; 00619 h[8] = (int32_t)h8; 00620 h[9] = (int32_t)h9; 00621 } 00622 00623 00624 void fe_invert(fe out,const fe z) 00625 { 00626 fe t0; 00627 fe t1; 00628 fe t2; 00629 fe t3; 00630 int i; 00631 00632 /* pow225521 */ 00633 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 00634 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 00635 fe_mul(t1,z,t1); 00636 fe_mul(t0,t0,t1); 00637 fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2); 00638 fe_mul(t1,t1,t2); 00639 fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2); 00640 fe_mul(t1,t2,t1); 00641 fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2); 00642 fe_mul(t2,t2,t1); 00643 fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3); 00644 fe_mul(t2,t3,t2); 00645 fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2); 00646 fe_mul(t1,t2,t1); 00647 fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2); 00648 fe_mul(t2,t2,t1); 00649 fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3); 00650 fe_mul(t2,t3,t2); 00651 fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2); 00652 fe_mul(t1,t2,t1); 00653 fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1); 00654 fe_mul(out,t1,t0); 00655 00656 return; 00657 } 00658 00659 00660 /* 00661 h = f 00662 */ 00663 00664 void fe_copy(fe h,const fe f) 00665 { 00666 int32_t f0 = f[0]; 00667 int32_t f1 = f[1]; 00668 int32_t f2 = f[2]; 00669 int32_t f3 = f[3]; 00670 int32_t f4 = f[4]; 00671 int32_t f5 = f[5]; 00672 int32_t f6 = f[6]; 00673 int32_t f7 = f[7]; 00674 int32_t f8 = f[8]; 00675 int32_t f9 = f[9]; 00676 h[0] = f0; 00677 h[1] = f1; 00678 h[2] = f2; 00679 h[3] = f3; 00680 h[4] = f4; 00681 h[5] = f5; 00682 h[6] = f6; 00683 h[7] = f7; 00684 h[8] = f8; 00685 h[9] = f9; 00686 } 00687 00688 00689 /* 00690 h = f * g 00691 Can overlap h with f or g. 00692 00693 Preconditions: 00694 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00695 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00696 00697 Postconditions: 00698 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 00699 */ 00700 00701 /* 00702 Notes on implementation strategy: 00703 00704 Using schoolbook multiplication. 00705 Karatsuba would save a little in some cost models. 00706 00707 Most multiplications by 2 and 19 are 32-bit precomputations; 00708 cheaper than 64-bit postcomputations. 00709 00710 There is one remaining multiplication by 19 in the carry chain; 00711 one *19 precomputation can be merged into this, 00712 but the resulting data flow is considerably less clean. 00713 00714 There are 12 carries below. 00715 10 of them are 2-way parallelizable and vectorizable. 00716 Can get away with 11 carries, but then data flow is much deeper. 00717 00718 With tighter constraints on inputs can squeeze carries into int32. 00719 */ 00720 00721 void fe_mul(fe h,const fe f,const fe g) 00722 { 00723 int32_t f0 = f[0]; 00724 int32_t f1 = f[1]; 00725 int32_t f2 = f[2]; 00726 int32_t f3 = f[3]; 00727 int32_t f4 = f[4]; 00728 int32_t f5 = f[5]; 00729 int32_t f6 = f[6]; 00730 int32_t f7 = f[7]; 00731 int32_t f8 = f[8]; 00732 int32_t f9 = f[9]; 00733 int32_t g0 = g[0]; 00734 int32_t g1 = g[1]; 00735 int32_t g2 = g[2]; 00736 int32_t g3 = g[3]; 00737 int32_t g4 = g[4]; 00738 int32_t g5 = g[5]; 00739 int32_t g6 = g[6]; 00740 int32_t g7 = g[7]; 00741 int32_t g8 = g[8]; 00742 int32_t g9 = g[9]; 00743 int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ 00744 int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ 00745 int32_t g3_19 = 19 * g3; 00746 int32_t g4_19 = 19 * g4; 00747 int32_t g5_19 = 19 * g5; 00748 int32_t g6_19 = 19 * g6; 00749 int32_t g7_19 = 19 * g7; 00750 int32_t g8_19 = 19 * g8; 00751 int32_t g9_19 = 19 * g9; 00752 int32_t f1_2 = 2 * f1; 00753 int32_t f3_2 = 2 * f3; 00754 int32_t f5_2 = 2 * f5; 00755 int32_t f7_2 = 2 * f7; 00756 int32_t f9_2 = 2 * f9; 00757 int64_t f0g0 = f0 * (int64_t) g0; 00758 int64_t f0g1 = f0 * (int64_t) g1; 00759 int64_t f0g2 = f0 * (int64_t) g2; 00760 int64_t f0g3 = f0 * (int64_t) g3; 00761 int64_t f0g4 = f0 * (int64_t) g4; 00762 int64_t f0g5 = f0 * (int64_t) g5; 00763 int64_t f0g6 = f0 * (int64_t) g6; 00764 int64_t f0g7 = f0 * (int64_t) g7; 00765 int64_t f0g8 = f0 * (int64_t) g8; 00766 int64_t f0g9 = f0 * (int64_t) g9; 00767 int64_t f1g0 = f1 * (int64_t) g0; 00768 int64_t f1g1_2 = f1_2 * (int64_t) g1; 00769 int64_t f1g2 = f1 * (int64_t) g2; 00770 int64_t f1g3_2 = f1_2 * (int64_t) g3; 00771 int64_t f1g4 = f1 * (int64_t) g4; 00772 int64_t f1g5_2 = f1_2 * (int64_t) g5; 00773 int64_t f1g6 = f1 * (int64_t) g6; 00774 int64_t f1g7_2 = f1_2 * (int64_t) g7; 00775 int64_t f1g8 = f1 * (int64_t) g8; 00776 int64_t f1g9_38 = f1_2 * (int64_t) g9_19; 00777 int64_t f2g0 = f2 * (int64_t) g0; 00778 int64_t f2g1 = f2 * (int64_t) g1; 00779 int64_t f2g2 = f2 * (int64_t) g2; 00780 int64_t f2g3 = f2 * (int64_t) g3; 00781 int64_t f2g4 = f2 * (int64_t) g4; 00782 int64_t f2g5 = f2 * (int64_t) g5; 00783 int64_t f2g6 = f2 * (int64_t) g6; 00784 int64_t f2g7 = f2 * (int64_t) g7; 00785 int64_t f2g8_19 = f2 * (int64_t) g8_19; 00786 int64_t f2g9_19 = f2 * (int64_t) g9_19; 00787 int64_t f3g0 = f3 * (int64_t) g0; 00788 int64_t f3g1_2 = f3_2 * (int64_t) g1; 00789 int64_t f3g2 = f3 * (int64_t) g2; 00790 int64_t f3g3_2 = f3_2 * (int64_t) g3; 00791 int64_t f3g4 = f3 * (int64_t) g4; 00792 int64_t f3g5_2 = f3_2 * (int64_t) g5; 00793 int64_t f3g6 = f3 * (int64_t) g6; 00794 int64_t f3g7_38 = f3_2 * (int64_t) g7_19; 00795 int64_t f3g8_19 = f3 * (int64_t) g8_19; 00796 int64_t f3g9_38 = f3_2 * (int64_t) g9_19; 00797 int64_t f4g0 = f4 * (int64_t) g0; 00798 int64_t f4g1 = f4 * (int64_t) g1; 00799 int64_t f4g2 = f4 * (int64_t) g2; 00800 int64_t f4g3 = f4 * (int64_t) g3; 00801 int64_t f4g4 = f4 * (int64_t) g4; 00802 int64_t f4g5 = f4 * (int64_t) g5; 00803 int64_t f4g6_19 = f4 * (int64_t) g6_19; 00804 int64_t f4g7_19 = f4 * (int64_t) g7_19; 00805 int64_t f4g8_19 = f4 * (int64_t) g8_19; 00806 int64_t f4g9_19 = f4 * (int64_t) g9_19; 00807 int64_t f5g0 = f5 * (int64_t) g0; 00808 int64_t f5g1_2 = f5_2 * (int64_t) g1; 00809 int64_t f5g2 = f5 * (int64_t) g2; 00810 int64_t f5g3_2 = f5_2 * (int64_t) g3; 00811 int64_t f5g4 = f5 * (int64_t) g4; 00812 int64_t f5g5_38 = f5_2 * (int64_t) g5_19; 00813 int64_t f5g6_19 = f5 * (int64_t) g6_19; 00814 int64_t f5g7_38 = f5_2 * (int64_t) g7_19; 00815 int64_t f5g8_19 = f5 * (int64_t) g8_19; 00816 int64_t f5g9_38 = f5_2 * (int64_t) g9_19; 00817 int64_t f6g0 = f6 * (int64_t) g0; 00818 int64_t f6g1 = f6 * (int64_t) g1; 00819 int64_t f6g2 = f6 * (int64_t) g2; 00820 int64_t f6g3 = f6 * (int64_t) g3; 00821 int64_t f6g4_19 = f6 * (int64_t) g4_19; 00822 int64_t f6g5_19 = f6 * (int64_t) g5_19; 00823 int64_t f6g6_19 = f6 * (int64_t) g6_19; 00824 int64_t f6g7_19 = f6 * (int64_t) g7_19; 00825 int64_t f6g8_19 = f6 * (int64_t) g8_19; 00826 int64_t f6g9_19 = f6 * (int64_t) g9_19; 00827 int64_t f7g0 = f7 * (int64_t) g0; 00828 int64_t f7g1_2 = f7_2 * (int64_t) g1; 00829 int64_t f7g2 = f7 * (int64_t) g2; 00830 int64_t f7g3_38 = f7_2 * (int64_t) g3_19; 00831 int64_t f7g4_19 = f7 * (int64_t) g4_19; 00832 int64_t f7g5_38 = f7_2 * (int64_t) g5_19; 00833 int64_t f7g6_19 = f7 * (int64_t) g6_19; 00834 int64_t f7g7_38 = f7_2 * (int64_t) g7_19; 00835 int64_t f7g8_19 = f7 * (int64_t) g8_19; 00836 int64_t f7g9_38 = f7_2 * (int64_t) g9_19; 00837 int64_t f8g0 = f8 * (int64_t) g0; 00838 int64_t f8g1 = f8 * (int64_t) g1; 00839 int64_t f8g2_19 = f8 * (int64_t) g2_19; 00840 int64_t f8g3_19 = f8 * (int64_t) g3_19; 00841 int64_t f8g4_19 = f8 * (int64_t) g4_19; 00842 int64_t f8g5_19 = f8 * (int64_t) g5_19; 00843 int64_t f8g6_19 = f8 * (int64_t) g6_19; 00844 int64_t f8g7_19 = f8 * (int64_t) g7_19; 00845 int64_t f8g8_19 = f8 * (int64_t) g8_19; 00846 int64_t f8g9_19 = f8 * (int64_t) g9_19; 00847 int64_t f9g0 = f9 * (int64_t) g0; 00848 int64_t f9g1_38 = f9_2 * (int64_t) g1_19; 00849 int64_t f9g2_19 = f9 * (int64_t) g2_19; 00850 int64_t f9g3_38 = f9_2 * (int64_t) g3_19; 00851 int64_t f9g4_19 = f9 * (int64_t) g4_19; 00852 int64_t f9g5_38 = f9_2 * (int64_t) g5_19; 00853 int64_t f9g6_19 = f9 * (int64_t) g6_19; 00854 int64_t f9g7_38 = f9_2 * (int64_t) g7_19; 00855 int64_t f9g8_19 = f9 * (int64_t) g8_19; 00856 int64_t f9g9_38 = f9_2 * (int64_t) g9_19; 00857 int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; 00858 int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; 00859 int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; 00860 int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; 00861 int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; 00862 int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; 00863 int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; 00864 int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; 00865 int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; 00866 int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; 00867 int64_t carry0; 00868 int64_t carry1; 00869 int64_t carry2; 00870 int64_t carry3; 00871 int64_t carry4; 00872 int64_t carry5; 00873 int64_t carry6; 00874 int64_t carry7; 00875 int64_t carry8; 00876 int64_t carry9; 00877 00878 /* 00879 |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) 00880 i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 00881 |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) 00882 i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 00883 */ 00884 00885 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00886 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00887 /* |h0| <= 2^25 */ 00888 /* |h4| <= 2^25 */ 00889 /* |h1| <= 1.71*2^59 */ 00890 /* |h5| <= 1.71*2^59 */ 00891 00892 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00893 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00894 /* |h1| <= 2^24; from now on fits into int32 */ 00895 /* |h5| <= 2^24; from now on fits into int32 */ 00896 /* |h2| <= 1.41*2^60 */ 00897 /* |h6| <= 1.41*2^60 */ 00898 00899 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00900 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00901 /* |h2| <= 2^25; from now on fits into int32 unchanged */ 00902 /* |h6| <= 2^25; from now on fits into int32 unchanged */ 00903 /* |h3| <= 1.71*2^59 */ 00904 /* |h7| <= 1.71*2^59 */ 00905 00906 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00907 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00908 /* |h3| <= 2^24; from now on fits into int32 unchanged */ 00909 /* |h7| <= 2^24; from now on fits into int32 unchanged */ 00910 /* |h4| <= 1.72*2^34 */ 00911 /* |h8| <= 1.41*2^60 */ 00912 00913 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00914 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00915 /* |h4| <= 2^25; from now on fits into int32 unchanged */ 00916 /* |h8| <= 2^25; from now on fits into int32 unchanged */ 00917 /* |h5| <= 1.01*2^24 */ 00918 /* |h9| <= 1.71*2^59 */ 00919 00920 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00921 /* |h9| <= 2^24; from now on fits into int32 unchanged */ 00922 /* |h0| <= 1.1*2^39 */ 00923 00924 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00925 /* |h0| <= 2^25; from now on fits into int32 unchanged */ 00926 /* |h1| <= 1.01*2^24 */ 00927 00928 h[0] = (int32_t)h0; 00929 h[1] = (int32_t)h1; 00930 h[2] = (int32_t)h2; 00931 h[3] = (int32_t)h3; 00932 h[4] = (int32_t)h4; 00933 h[5] = (int32_t)h5; 00934 h[6] = (int32_t)h6; 00935 h[7] = (int32_t)h7; 00936 h[8] = (int32_t)h8; 00937 h[9] = (int32_t)h9; 00938 } 00939 00940 00941 /* 00942 Replace (f,g) with (g,f) if b == 1; 00943 replace (f,g) with (f,g) if b == 0. 00944 00945 Preconditions: b in {0,1}. 00946 */ 00947 00948 void fe_cswap(fe f, fe g, int b) 00949 { 00950 int32_t f0 = f[0]; 00951 int32_t f1 = f[1]; 00952 int32_t f2 = f[2]; 00953 int32_t f3 = f[3]; 00954 int32_t f4 = f[4]; 00955 int32_t f5 = f[5]; 00956 int32_t f6 = f[6]; 00957 int32_t f7 = f[7]; 00958 int32_t f8 = f[8]; 00959 int32_t f9 = f[9]; 00960 int32_t g0 = g[0]; 00961 int32_t g1 = g[1]; 00962 int32_t g2 = g[2]; 00963 int32_t g3 = g[3]; 00964 int32_t g4 = g[4]; 00965 int32_t g5 = g[5]; 00966 int32_t g6 = g[6]; 00967 int32_t g7 = g[7]; 00968 int32_t g8 = g[8]; 00969 int32_t g9 = g[9]; 00970 int32_t x0 = f0 ^ g0; 00971 int32_t x1 = f1 ^ g1; 00972 int32_t x2 = f2 ^ g2; 00973 int32_t x3 = f3 ^ g3; 00974 int32_t x4 = f4 ^ g4; 00975 int32_t x5 = f5 ^ g5; 00976 int32_t x6 = f6 ^ g6; 00977 int32_t x7 = f7 ^ g7; 00978 int32_t x8 = f8 ^ g8; 00979 int32_t x9 = f9 ^ g9; 00980 b = -b; 00981 x0 &= b; 00982 x1 &= b; 00983 x2 &= b; 00984 x3 &= b; 00985 x4 &= b; 00986 x5 &= b; 00987 x6 &= b; 00988 x7 &= b; 00989 x8 &= b; 00990 x9 &= b; 00991 f[0] = f0 ^ x0; 00992 f[1] = f1 ^ x1; 00993 f[2] = f2 ^ x2; 00994 f[3] = f3 ^ x3; 00995 f[4] = f4 ^ x4; 00996 f[5] = f5 ^ x5; 00997 f[6] = f6 ^ x6; 00998 f[7] = f7 ^ x7; 00999 f[8] = f8 ^ x8; 01000 f[9] = f9 ^ x9; 01001 g[0] = g0 ^ x0; 01002 g[1] = g1 ^ x1; 01003 g[2] = g2 ^ x2; 01004 g[3] = g3 ^ x3; 01005 g[4] = g4 ^ x4; 01006 g[5] = g5 ^ x5; 01007 g[6] = g6 ^ x6; 01008 g[7] = g7 ^ x7; 01009 g[8] = g8 ^ x8; 01010 g[9] = g9 ^ x9; 01011 } 01012 01013 01014 /* 01015 h = f * 121666 01016 Can overlap h with f. 01017 01018 Preconditions: 01019 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01020 01021 Postconditions: 01022 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01023 */ 01024 01025 void fe_mul121666(fe h,fe f) 01026 { 01027 int32_t f0 = f[0]; 01028 int32_t f1 = f[1]; 01029 int32_t f2 = f[2]; 01030 int32_t f3 = f[3]; 01031 int32_t f4 = f[4]; 01032 int32_t f5 = f[5]; 01033 int32_t f6 = f[6]; 01034 int32_t f7 = f[7]; 01035 int32_t f8 = f[8]; 01036 int32_t f9 = f[9]; 01037 int64_t h0 = f0 * (int64_t) 121666; 01038 int64_t h1 = f1 * (int64_t) 121666; 01039 int64_t h2 = f2 * (int64_t) 121666; 01040 int64_t h3 = f3 * (int64_t) 121666; 01041 int64_t h4 = f4 * (int64_t) 121666; 01042 int64_t h5 = f5 * (int64_t) 121666; 01043 int64_t h6 = f6 * (int64_t) 121666; 01044 int64_t h7 = f7 * (int64_t) 121666; 01045 int64_t h8 = f8 * (int64_t) 121666; 01046 int64_t h9 = f9 * (int64_t) 121666; 01047 int64_t carry0; 01048 int64_t carry1; 01049 int64_t carry2; 01050 int64_t carry3; 01051 int64_t carry4; 01052 int64_t carry5; 01053 int64_t carry6; 01054 int64_t carry7; 01055 int64_t carry8; 01056 int64_t carry9; 01057 01058 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 01059 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 01060 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 01061 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 01062 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 01063 01064 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01065 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 01066 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01067 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 01068 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 01069 01070 h[0] = (int32_t)h0; 01071 h[1] = (int32_t)h1; 01072 h[2] = (int32_t)h2; 01073 h[3] = (int32_t)h3; 01074 h[4] = (int32_t)h4; 01075 h[5] = (int32_t)h5; 01076 h[6] = (int32_t)h6; 01077 h[7] = (int32_t)h7; 01078 h[8] = (int32_t)h8; 01079 h[9] = (int32_t)h9; 01080 } 01081 01082 01083 /* 01084 h = 2 * f * f 01085 Can overlap h with f. 01086 01087 Preconditions: 01088 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 01089 01090 Postconditions: 01091 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 01092 */ 01093 01094 /* 01095 See fe_mul.c for discussion of implementation strategy. 01096 */ 01097 01098 void fe_sq2(fe h,const fe f) 01099 { 01100 int32_t f0 = f[0]; 01101 int32_t f1 = f[1]; 01102 int32_t f2 = f[2]; 01103 int32_t f3 = f[3]; 01104 int32_t f4 = f[4]; 01105 int32_t f5 = f[5]; 01106 int32_t f6 = f[6]; 01107 int32_t f7 = f[7]; 01108 int32_t f8 = f[8]; 01109 int32_t f9 = f[9]; 01110 int32_t f0_2 = 2 * f0; 01111 int32_t f1_2 = 2 * f1; 01112 int32_t f2_2 = 2 * f2; 01113 int32_t f3_2 = 2 * f3; 01114 int32_t f4_2 = 2 * f4; 01115 int32_t f5_2 = 2 * f5; 01116 int32_t f6_2 = 2 * f6; 01117 int32_t f7_2 = 2 * f7; 01118 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ 01119 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ 01120 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ 01121 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ 01122 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ 01123 int64_t f0f0 = f0 * (int64_t) f0; 01124 int64_t f0f1_2 = f0_2 * (int64_t) f1; 01125 int64_t f0f2_2 = f0_2 * (int64_t) f2; 01126 int64_t f0f3_2 = f0_2 * (int64_t) f3; 01127 int64_t f0f4_2 = f0_2 * (int64_t) f4; 01128 int64_t f0f5_2 = f0_2 * (int64_t) f5; 01129 int64_t f0f6_2 = f0_2 * (int64_t) f6; 01130 int64_t f0f7_2 = f0_2 * (int64_t) f7; 01131 int64_t f0f8_2 = f0_2 * (int64_t) f8; 01132 int64_t f0f9_2 = f0_2 * (int64_t) f9; 01133 int64_t f1f1_2 = f1_2 * (int64_t) f1; 01134 int64_t f1f2_2 = f1_2 * (int64_t) f2; 01135 int64_t f1f3_4 = f1_2 * (int64_t) f3_2; 01136 int64_t f1f4_2 = f1_2 * (int64_t) f4; 01137 int64_t f1f5_4 = f1_2 * (int64_t) f5_2; 01138 int64_t f1f6_2 = f1_2 * (int64_t) f6; 01139 int64_t f1f7_4 = f1_2 * (int64_t) f7_2; 01140 int64_t f1f8_2 = f1_2 * (int64_t) f8; 01141 int64_t f1f9_76 = f1_2 * (int64_t) f9_38; 01142 int64_t f2f2 = f2 * (int64_t) f2; 01143 int64_t f2f3_2 = f2_2 * (int64_t) f3; 01144 int64_t f2f4_2 = f2_2 * (int64_t) f4; 01145 int64_t f2f5_2 = f2_2 * (int64_t) f5; 01146 int64_t f2f6_2 = f2_2 * (int64_t) f6; 01147 int64_t f2f7_2 = f2_2 * (int64_t) f7; 01148 int64_t f2f8_38 = f2_2 * (int64_t) f8_19; 01149 int64_t f2f9_38 = f2 * (int64_t) f9_38; 01150 int64_t f3f3_2 = f3_2 * (int64_t) f3; 01151 int64_t f3f4_2 = f3_2 * (int64_t) f4; 01152 int64_t f3f5_4 = f3_2 * (int64_t) f5_2; 01153 int64_t f3f6_2 = f3_2 * (int64_t) f6; 01154 int64_t f3f7_76 = f3_2 * (int64_t) f7_38; 01155 int64_t f3f8_38 = f3_2 * (int64_t) f8_19; 01156 int64_t f3f9_76 = f3_2 * (int64_t) f9_38; 01157 int64_t f4f4 = f4 * (int64_t) f4; 01158 int64_t f4f5_2 = f4_2 * (int64_t) f5; 01159 int64_t f4f6_38 = f4_2 * (int64_t) f6_19; 01160 int64_t f4f7_38 = f4 * (int64_t) f7_38; 01161 int64_t f4f8_38 = f4_2 * (int64_t) f8_19; 01162 int64_t f4f9_38 = f4 * (int64_t) f9_38; 01163 int64_t f5f5_38 = f5 * (int64_t) f5_38; 01164 int64_t f5f6_38 = f5_2 * (int64_t) f6_19; 01165 int64_t f5f7_76 = f5_2 * (int64_t) f7_38; 01166 int64_t f5f8_38 = f5_2 * (int64_t) f8_19; 01167 int64_t f5f9_76 = f5_2 * (int64_t) f9_38; 01168 int64_t f6f6_19 = f6 * (int64_t) f6_19; 01169 int64_t f6f7_38 = f6 * (int64_t) f7_38; 01170 int64_t f6f8_38 = f6_2 * (int64_t) f8_19; 01171 int64_t f6f9_38 = f6 * (int64_t) f9_38; 01172 int64_t f7f7_38 = f7 * (int64_t) f7_38; 01173 int64_t f7f8_38 = f7_2 * (int64_t) f8_19; 01174 int64_t f7f9_76 = f7_2 * (int64_t) f9_38; 01175 int64_t f8f8_19 = f8 * (int64_t) f8_19; 01176 int64_t f8f9_38 = f8 * (int64_t) f9_38; 01177 int64_t f9f9_38 = f9 * (int64_t) f9_38; 01178 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 01179 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 01180 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 01181 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 01182 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 01183 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 01184 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 01185 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 01186 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 01187 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 01188 int64_t carry0; 01189 int64_t carry1; 01190 int64_t carry2; 01191 int64_t carry3; 01192 int64_t carry4; 01193 int64_t carry5; 01194 int64_t carry6; 01195 int64_t carry7; 01196 int64_t carry8; 01197 int64_t carry9; 01198 01199 h0 += h0; 01200 h1 += h1; 01201 h2 += h2; 01202 h3 += h3; 01203 h4 += h4; 01204 h5 += h5; 01205 h6 += h6; 01206 h7 += h7; 01207 h8 += h8; 01208 h9 += h9; 01209 01210 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01211 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01212 01213 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 01214 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 01215 01216 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 01217 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 01218 01219 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 01220 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 01221 01222 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01223 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 01224 01225 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 01226 01227 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01228 01229 h[0] = (int32_t)h0; 01230 h[1] = (int32_t)h1; 01231 h[2] = (int32_t)h2; 01232 h[3] = (int32_t)h3; 01233 h[4] = (int32_t)h4; 01234 h[5] = (int32_t)h5; 01235 h[6] = (int32_t)h6; 01236 h[7] = (int32_t)h7; 01237 h[8] = (int32_t)h8; 01238 h[9] = (int32_t)h9; 01239 } 01240 01241 01242 void fe_pow22523(fe out,const fe z) 01243 { 01244 fe t0; 01245 fe t1; 01246 fe t2; 01247 int i; 01248 01249 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 01250 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 01251 fe_mul(t1,z,t1); 01252 fe_mul(t0,t0,t1); 01253 fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0); 01254 fe_mul(t0,t1,t0); 01255 fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1); 01256 fe_mul(t0,t1,t0); 01257 fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1); 01258 fe_mul(t1,t1,t0); 01259 fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2); 01260 fe_mul(t1,t2,t1); 01261 fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1); 01262 fe_mul(t0,t1,t0); 01263 fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1); 01264 fe_mul(t1,t1,t0); 01265 fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2); 01266 fe_mul(t1,t2,t1); 01267 fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1); 01268 fe_mul(t0,t1,t0); 01269 fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0); 01270 fe_mul(out,t0,z); 01271 01272 return; 01273 } 01274 01275 01276 /* 01277 h = -f 01278 01279 Preconditions: 01280 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01281 01282 Postconditions: 01283 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01284 */ 01285 01286 void fe_neg(fe h,const fe f) 01287 { 01288 int32_t f0 = f[0]; 01289 int32_t f1 = f[1]; 01290 int32_t f2 = f[2]; 01291 int32_t f3 = f[3]; 01292 int32_t f4 = f[4]; 01293 int32_t f5 = f[5]; 01294 int32_t f6 = f[6]; 01295 int32_t f7 = f[7]; 01296 int32_t f8 = f[8]; 01297 int32_t f9 = f[9]; 01298 int32_t h0 = -f0; 01299 int32_t h1 = -f1; 01300 int32_t h2 = -f2; 01301 int32_t h3 = -f3; 01302 int32_t h4 = -f4; 01303 int32_t h5 = -f5; 01304 int32_t h6 = -f6; 01305 int32_t h7 = -f7; 01306 int32_t h8 = -f8; 01307 int32_t h9 = -f9; 01308 h[0] = h0; 01309 h[1] = h1; 01310 h[2] = h2; 01311 h[3] = h3; 01312 h[4] = h4; 01313 h[5] = h5; 01314 h[6] = h6; 01315 h[7] = h7; 01316 h[8] = h8; 01317 h[9] = h9; 01318 } 01319 01320 01321 /* 01322 Preconditions: 01323 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01324 */ 01325 01326 static const unsigned char zero[32] = {0}; 01327 01328 int fe_isnonzero(const fe f) 01329 { 01330 unsigned char s[32]; 01331 fe_tobytes(s,f); 01332 return ConstantCompare(s,zero,32); 01333 } 01334 01335 01336 /* 01337 return 1 if f is in {1,3,5,...,q-2} 01338 return 0 if f is in {0,2,4,...,q-1} 01339 01340 Preconditions: 01341 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01342 */ 01343 01344 int fe_isnegative(const fe f) 01345 { 01346 unsigned char s[32]; 01347 fe_tobytes(s,f); 01348 return s[0] & 1; 01349 } 01350 01351 01352 /* 01353 Replace (f,g) with (g,g) if b == 1; 01354 replace (f,g) with (f,g) if b == 0. 01355 01356 Preconditions: b in {0,1}. 01357 */ 01358 01359 void fe_cmov(fe f, const fe g, int b) 01360 { 01361 int32_t f0 = f[0]; 01362 int32_t f1 = f[1]; 01363 int32_t f2 = f[2]; 01364 int32_t f3 = f[3]; 01365 int32_t f4 = f[4]; 01366 int32_t f5 = f[5]; 01367 int32_t f6 = f[6]; 01368 int32_t f7 = f[7]; 01369 int32_t f8 = f[8]; 01370 int32_t f9 = f[9]; 01371 int32_t g0 = g[0]; 01372 int32_t g1 = g[1]; 01373 int32_t g2 = g[2]; 01374 int32_t g3 = g[3]; 01375 int32_t g4 = g[4]; 01376 int32_t g5 = g[5]; 01377 int32_t g6 = g[6]; 01378 int32_t g7 = g[7]; 01379 int32_t g8 = g[8]; 01380 int32_t g9 = g[9]; 01381 int32_t x0 = f0 ^ g0; 01382 int32_t x1 = f1 ^ g1; 01383 int32_t x2 = f2 ^ g2; 01384 int32_t x3 = f3 ^ g3; 01385 int32_t x4 = f4 ^ g4; 01386 int32_t x5 = f5 ^ g5; 01387 int32_t x6 = f6 ^ g6; 01388 int32_t x7 = f7 ^ g7; 01389 int32_t x8 = f8 ^ g8; 01390 int32_t x9 = f9 ^ g9; 01391 b = -b; 01392 x0 &= b; 01393 x1 &= b; 01394 x2 &= b; 01395 x3 &= b; 01396 x4 &= b; 01397 x5 &= b; 01398 x6 &= b; 01399 x7 &= b; 01400 x8 &= b; 01401 x9 &= b; 01402 f[0] = f0 ^ x0; 01403 f[1] = f1 ^ x1; 01404 f[2] = f2 ^ x2; 01405 f[3] = f3 ^ x3; 01406 f[4] = f4 ^ x4; 01407 f[5] = f5 ^ x5; 01408 f[6] = f6 ^ x6; 01409 f[7] = f7 ^ x7; 01410 f[8] = f8 ^ x8; 01411 f[9] = f9 ^ x9; 01412 } 01413 #endif 01414 #endif /* HAVE ED25519 or CURVE25519 */ 01415 #endif /* not defined CURVED25519_SMALL */ 01416 01417
Generated on Tue Jul 12 2022 23:30:55 by
1.7.2
