Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
fe_operations.c
00001 /* fe_operations.c 00002 * 00003 * Copyright (C) 2006-2016 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */ 00024 00025 #ifdef HAVE_CONFIG_H 00026 #include <config.h> 00027 #endif 00028 00029 #include <wolfssl/wolfcrypt/settings.h> 00030 00031 #ifndef CURVED25519_SMALL /* run when not defined to use small memory math */ 00032 #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) 00033 00034 #include <wolfssl/wolfcrypt/fe_operations.h> 00035 #include <stdint.h> 00036 00037 #ifdef NO_INLINE 00038 #include <wolfssl/wolfcrypt/misc.h> 00039 #else 00040 #include <wolfcrypt/src/misc.c> 00041 #endif 00042 00043 /* 00044 fe means field element. 00045 Here the field is \Z/(2^255-19). 00046 An element t, entries t[0]...t[9], represents the integer 00047 t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 00048 Bounds on each t[i] vary depending on context. 00049 */ 00050 00051 uint64_t load_3(const unsigned char *in) 00052 { 00053 uint64_t result; 00054 result = (uint64_t) in[0]; 00055 result |= ((uint64_t) in[1]) << 8; 00056 result |= ((uint64_t) in[2]) << 16; 00057 return result; 00058 } 00059 00060 00061 uint64_t load_4(const unsigned char *in) 00062 { 00063 uint64_t result; 00064 result = (uint64_t) in[0]; 00065 result |= ((uint64_t) in[1]) << 8; 00066 result |= ((uint64_t) in[2]) << 16; 00067 result |= ((uint64_t) in[3]) << 24; 00068 return result; 00069 } 00070 00071 00072 /* 00073 h = 1 00074 */ 00075 00076 void fe_1(fe h) 00077 { 00078 h[0] = 1; 00079 h[1] = 0; 00080 h[2] = 0; 00081 h[3] = 0; 00082 h[4] = 0; 00083 h[5] = 0; 00084 h[6] = 0; 00085 h[7] = 0; 00086 h[8] = 0; 00087 h[9] = 0; 00088 } 00089 00090 00091 /* 00092 h = 0 00093 */ 00094 00095 void fe_0(fe h) 00096 { 00097 h[0] = 0; 00098 h[1] = 0; 00099 h[2] = 0; 00100 h[3] = 0; 00101 h[4] = 0; 00102 h[5] = 0; 00103 h[6] = 0; 00104 h[7] = 0; 00105 h[8] = 0; 00106 h[9] = 0; 00107 } 00108 00109 00110 int curve25519(byte* q, byte* n, byte* p) 00111 { 00112 #if 0 00113 unsigned char e[32]; 00114 #endif 00115 fe x1; 00116 fe x2; 00117 fe z2; 00118 fe x3; 00119 fe z3; 00120 fe tmp0; 00121 fe tmp1; 00122 int pos; 00123 unsigned int swap; 00124 unsigned int b; 00125 00126 /* Clamp already done during key generation and import */ 00127 #if 0 00128 { 00129 unsigned int i; 00130 for (i = 0;i < 32;++i) e[i] = n[i]; 00131 e[0] &= 248; 00132 e[31] &= 127; 00133 e[31] |= 64; 00134 } 00135 #endif 00136 00137 fe_frombytes(x1,p); 00138 fe_1(x2); 00139 fe_0(z2); 00140 fe_copy(x3,x1); 00141 fe_1(z3); 00142 00143 swap = 0; 00144 for (pos = 254;pos >= 0;--pos) { 00145 #if 0 00146 b = e[pos / 8] >> (pos & 7); 00147 #else 00148 b = n[pos / 8] >> (pos & 7); 00149 #endif 00150 b &= 1; 00151 swap ^= b; 00152 fe_cswap(x2,x3,swap); 00153 fe_cswap(z2,z3,swap); 00154 swap = b; 00155 00156 /* montgomery */ 00157 fe_sub(tmp0,x3,z3); 00158 fe_sub(tmp1,x2,z2); 00159 fe_add(x2,x2,z2); 00160 fe_add(z2,x3,z3); 00161 fe_mul(z3,tmp0,x2); 00162 fe_mul(z2,z2,tmp1); 00163 fe_sq(tmp0,tmp1); 00164 fe_sq(tmp1,x2); 00165 fe_add(x3,z3,z2); 00166 fe_sub(z2,z3,z2); 00167 fe_mul(x2,tmp1,tmp0); 00168 fe_sub(tmp1,tmp1,tmp0); 00169 fe_sq(z2,z2); 00170 fe_mul121666(z3,tmp1); 00171 fe_sq(x3,x3); 00172 fe_add(tmp0,tmp0,z3); 00173 fe_mul(z3,x1,z2); 00174 fe_mul(z2,tmp1,tmp0); 00175 } 00176 fe_cswap(x2,x3,swap); 00177 fe_cswap(z2,z3,swap); 00178 00179 fe_invert(z2,z2); 00180 fe_mul(x2,x2,z2); 00181 fe_tobytes(q,x2); 00182 00183 return 0; 00184 } 00185 00186 00187 /* 00188 h = f * f 00189 Can overlap h with f. 00190 00191 Preconditions: 00192 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00193 00194 Postconditions: 00195 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 00196 */ 00197 00198 /* 00199 See fe_mul.c for discussion of implementation strategy. 00200 */ 00201 00202 void fe_sq(fe h,const fe f) 00203 { 00204 int32_t f0 = f[0]; 00205 int32_t f1 = f[1]; 00206 int32_t f2 = f[2]; 00207 int32_t f3 = f[3]; 00208 int32_t f4 = f[4]; 00209 int32_t f5 = f[5]; 00210 int32_t f6 = f[6]; 00211 int32_t f7 = f[7]; 00212 int32_t f8 = f[8]; 00213 int32_t f9 = f[9]; 00214 int32_t f0_2 = 2 * f0; 00215 int32_t f1_2 = 2 * f1; 00216 int32_t f2_2 = 2 * f2; 00217 int32_t f3_2 = 2 * f3; 00218 int32_t f4_2 = 2 * f4; 00219 int32_t f5_2 = 2 * f5; 00220 int32_t f6_2 = 2 * f6; 00221 int32_t f7_2 = 2 * f7; 00222 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ 00223 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ 00224 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ 00225 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ 00226 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ 00227 int64_t f0f0 = f0 * (int64_t) f0; 00228 int64_t f0f1_2 = f0_2 * (int64_t) f1; 00229 int64_t f0f2_2 = f0_2 * (int64_t) f2; 00230 int64_t f0f3_2 = f0_2 * (int64_t) f3; 00231 int64_t f0f4_2 = f0_2 * (int64_t) f4; 00232 int64_t f0f5_2 = f0_2 * (int64_t) f5; 00233 int64_t f0f6_2 = f0_2 * (int64_t) f6; 00234 int64_t f0f7_2 = f0_2 * (int64_t) f7; 00235 int64_t f0f8_2 = f0_2 * (int64_t) f8; 00236 int64_t f0f9_2 = f0_2 * (int64_t) f9; 00237 int64_t f1f1_2 = f1_2 * (int64_t) f1; 00238 int64_t f1f2_2 = f1_2 * (int64_t) f2; 00239 int64_t f1f3_4 = f1_2 * (int64_t) f3_2; 00240 int64_t f1f4_2 = f1_2 * (int64_t) f4; 00241 int64_t f1f5_4 = f1_2 * (int64_t) f5_2; 00242 int64_t f1f6_2 = f1_2 * (int64_t) f6; 00243 int64_t f1f7_4 = f1_2 * (int64_t) f7_2; 00244 int64_t f1f8_2 = f1_2 * (int64_t) f8; 00245 int64_t f1f9_76 = f1_2 * (int64_t) f9_38; 00246 int64_t f2f2 = f2 * (int64_t) f2; 00247 int64_t f2f3_2 = f2_2 * (int64_t) f3; 00248 int64_t f2f4_2 = f2_2 * (int64_t) f4; 00249 int64_t f2f5_2 = f2_2 * (int64_t) f5; 00250 int64_t f2f6_2 = f2_2 * (int64_t) f6; 00251 int64_t f2f7_2 = f2_2 * (int64_t) f7; 00252 int64_t f2f8_38 = f2_2 * (int64_t) f8_19; 00253 int64_t f2f9_38 = f2 * (int64_t) f9_38; 00254 int64_t f3f3_2 = f3_2 * (int64_t) f3; 00255 int64_t f3f4_2 = f3_2 * (int64_t) f4; 00256 int64_t f3f5_4 = f3_2 * (int64_t) f5_2; 00257 int64_t f3f6_2 = f3_2 * (int64_t) f6; 00258 int64_t f3f7_76 = f3_2 * (int64_t) f7_38; 00259 int64_t f3f8_38 = f3_2 * (int64_t) f8_19; 00260 int64_t f3f9_76 = f3_2 * (int64_t) f9_38; 00261 int64_t f4f4 = f4 * (int64_t) f4; 00262 int64_t f4f5_2 = f4_2 * (int64_t) f5; 00263 int64_t f4f6_38 = f4_2 * (int64_t) f6_19; 00264 int64_t f4f7_38 = f4 * (int64_t) f7_38; 00265 int64_t f4f8_38 = f4_2 * (int64_t) f8_19; 00266 int64_t f4f9_38 = f4 * (int64_t) f9_38; 00267 int64_t f5f5_38 = f5 * (int64_t) f5_38; 00268 int64_t f5f6_38 = f5_2 * (int64_t) f6_19; 00269 int64_t f5f7_76 = f5_2 * (int64_t) f7_38; 00270 int64_t f5f8_38 = f5_2 * (int64_t) f8_19; 00271 int64_t f5f9_76 = f5_2 * (int64_t) f9_38; 00272 int64_t f6f6_19 = f6 * (int64_t) f6_19; 00273 int64_t f6f7_38 = f6 * (int64_t) f7_38; 00274 int64_t f6f8_38 = f6_2 * (int64_t) f8_19; 00275 int64_t f6f9_38 = f6 * (int64_t) f9_38; 00276 int64_t f7f7_38 = f7 * (int64_t) f7_38; 00277 int64_t f7f8_38 = f7_2 * (int64_t) f8_19; 00278 int64_t f7f9_76 = f7_2 * (int64_t) f9_38; 00279 int64_t f8f8_19 = f8 * (int64_t) f8_19; 00280 int64_t f8f9_38 = f8 * (int64_t) f9_38; 00281 int64_t f9f9_38 = f9 * (int64_t) f9_38; 00282 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 00283 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 00284 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 00285 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 00286 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 00287 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 00288 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 00289 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 00290 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 00291 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 00292 int64_t carry0; 00293 int64_t carry1; 00294 int64_t carry2; 00295 int64_t carry3; 00296 int64_t carry4; 00297 int64_t carry5; 00298 int64_t carry6; 00299 int64_t carry7; 00300 int64_t carry8; 00301 int64_t carry9; 00302 00303 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00304 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00305 00306 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00307 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00308 00309 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00310 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00311 00312 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00313 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00314 00315 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00316 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00317 00318 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00319 00320 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00321 00322 h[0] = (int32_t)h0; 00323 h[1] = (int32_t)h1; 00324 h[2] = (int32_t)h2; 00325 h[3] = (int32_t)h3; 00326 h[4] = (int32_t)h4; 00327 h[5] = (int32_t)h5; 00328 h[6] = (int32_t)h6; 00329 h[7] = (int32_t)h7; 00330 h[8] = (int32_t)h8; 00331 h[9] = (int32_t)h9; 00332 } 00333 00334 00335 /* 00336 h = f + g 00337 Can overlap h with f or g. 00338 00339 Preconditions: 00340 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00341 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00342 00343 Postconditions: 00344 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00345 */ 00346 00347 void fe_add(fe h,const fe f,const fe g) 00348 { 00349 int32_t f0 = f[0]; 00350 int32_t f1 = f[1]; 00351 int32_t f2 = f[2]; 00352 int32_t f3 = f[3]; 00353 int32_t f4 = f[4]; 00354 int32_t f5 = f[5]; 00355 int32_t f6 = f[6]; 00356 int32_t f7 = f[7]; 00357 int32_t f8 = f[8]; 00358 int32_t f9 = f[9]; 00359 int32_t g0 = g[0]; 00360 int32_t g1 = g[1]; 00361 int32_t g2 = g[2]; 00362 int32_t g3 = g[3]; 00363 int32_t g4 = g[4]; 00364 int32_t g5 = g[5]; 00365 int32_t g6 = g[6]; 00366 int32_t g7 = g[7]; 00367 int32_t g8 = g[8]; 00368 int32_t g9 = g[9]; 00369 int32_t h0 = f0 + g0; 00370 int32_t h1 = f1 + g1; 00371 int32_t h2 = f2 + g2; 00372 int32_t h3 = f3 + g3; 00373 int32_t h4 = f4 + g4; 00374 int32_t h5 = f5 + g5; 00375 int32_t h6 = f6 + g6; 00376 int32_t h7 = f7 + g7; 00377 int32_t h8 = f8 + g8; 00378 int32_t h9 = f9 + g9; 00379 h[0] = h0; 00380 h[1] = h1; 00381 h[2] = h2; 00382 h[3] = h3; 00383 h[4] = h4; 00384 h[5] = h5; 00385 h[6] = h6; 00386 h[7] = h7; 00387 h[8] = h8; 00388 h[9] = h9; 00389 } 00390 00391 00392 /* 00393 Preconditions: 00394 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00395 00396 Write p=2^255-19; q=floor(h/p). 00397 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). 00398 00399 Proof: 00400 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. 00401 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. 00402 00403 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). 00404 Then 0<y<1. 00405 00406 Write r=h-pq. 00407 Have 0<=r<=p-1=2^255-20. 00408 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. 00409 00410 Write x=r+19(2^-255)r+y. 00411 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. 00412 00413 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) 00414 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. 00415 */ 00416 00417 void fe_tobytes(unsigned char *s,const fe h) 00418 { 00419 int32_t h0 = h[0]; 00420 int32_t h1 = h[1]; 00421 int32_t h2 = h[2]; 00422 int32_t h3 = h[3]; 00423 int32_t h4 = h[4]; 00424 int32_t h5 = h[5]; 00425 int32_t h6 = h[6]; 00426 int32_t h7 = h[7]; 00427 int32_t h8 = h[8]; 00428 int32_t h9 = h[9]; 00429 int32_t q; 00430 int32_t carry0; 00431 int32_t carry1; 00432 int32_t carry2; 00433 int32_t carry3; 00434 int32_t carry4; 00435 int32_t carry5; 00436 int32_t carry6; 00437 int32_t carry7; 00438 int32_t carry8; 00439 int32_t carry9; 00440 00441 q = (19 * h9 + (((int32_t) 1) << 24)) >> 25; 00442 q = (h0 + q) >> 26; 00443 q = (h1 + q) >> 25; 00444 q = (h2 + q) >> 26; 00445 q = (h3 + q) >> 25; 00446 q = (h4 + q) >> 26; 00447 q = (h5 + q) >> 25; 00448 q = (h6 + q) >> 26; 00449 q = (h7 + q) >> 25; 00450 q = (h8 + q) >> 26; 00451 q = (h9 + q) >> 25; 00452 00453 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ 00454 h0 += 19 * q; 00455 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ 00456 00457 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; 00458 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; 00459 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; 00460 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; 00461 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; 00462 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; 00463 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; 00464 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; 00465 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; 00466 carry9 = h9 >> 25; h9 -= carry9 << 25; 00467 /* h10 = carry9 */ 00468 00469 /* 00470 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. 00471 Have h0+...+2^230 h9 between 0 and 2^255-1; 00472 evidently 2^255 h10-2^255 q = 0. 00473 Goal: Output h0+...+2^230 h9. 00474 */ 00475 00476 s[0] = h0 >> 0; 00477 s[1] = h0 >> 8; 00478 s[2] = h0 >> 16; 00479 s[3] = (h0 >> 24) | (h1 << 2); 00480 s[4] = h1 >> 6; 00481 s[5] = h1 >> 14; 00482 s[6] = (h1 >> 22) | (h2 << 3); 00483 s[7] = h2 >> 5; 00484 s[8] = h2 >> 13; 00485 s[9] = (h2 >> 21) | (h3 << 5); 00486 s[10] = h3 >> 3; 00487 s[11] = h3 >> 11; 00488 s[12] = (h3 >> 19) | (h4 << 6); 00489 s[13] = h4 >> 2; 00490 s[14] = h4 >> 10; 00491 s[15] = h4 >> 18; 00492 s[16] = h5 >> 0; 00493 s[17] = h5 >> 8; 00494 s[18] = h5 >> 16; 00495 s[19] = (h5 >> 24) | (h6 << 1); 00496 s[20] = h6 >> 7; 00497 s[21] = h6 >> 15; 00498 s[22] = (h6 >> 23) | (h7 << 3); 00499 s[23] = h7 >> 5; 00500 s[24] = h7 >> 13; 00501 s[25] = (h7 >> 21) | (h8 << 4); 00502 s[26] = h8 >> 4; 00503 s[27] = h8 >> 12; 00504 s[28] = (h8 >> 20) | (h9 << 6); 00505 s[29] = h9 >> 2; 00506 s[30] = h9 >> 10; 00507 s[31] = h9 >> 18; 00508 } 00509 00510 00511 /* 00512 h = f - g 00513 Can overlap h with f or g. 00514 00515 Preconditions: 00516 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00517 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00518 00519 Postconditions: 00520 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00521 */ 00522 00523 void fe_sub(fe h,const fe f,const fe g) 00524 { 00525 int32_t f0 = f[0]; 00526 int32_t f1 = f[1]; 00527 int32_t f2 = f[2]; 00528 int32_t f3 = f[3]; 00529 int32_t f4 = f[4]; 00530 int32_t f5 = f[5]; 00531 int32_t f6 = f[6]; 00532 int32_t f7 = f[7]; 00533 int32_t f8 = f[8]; 00534 int32_t f9 = f[9]; 00535 int32_t g0 = g[0]; 00536 int32_t g1 = g[1]; 00537 int32_t g2 = g[2]; 00538 int32_t g3 = g[3]; 00539 int32_t g4 = g[4]; 00540 int32_t g5 = g[5]; 00541 int32_t g6 = g[6]; 00542 int32_t g7 = g[7]; 00543 int32_t g8 = g[8]; 00544 int32_t g9 = g[9]; 00545 int32_t h0 = f0 - g0; 00546 int32_t h1 = f1 - g1; 00547 int32_t h2 = f2 - g2; 00548 int32_t h3 = f3 - g3; 00549 int32_t h4 = f4 - g4; 00550 int32_t h5 = f5 - g5; 00551 int32_t h6 = f6 - g6; 00552 int32_t h7 = f7 - g7; 00553 int32_t h8 = f8 - g8; 00554 int32_t h9 = f9 - g9; 00555 h[0] = h0; 00556 h[1] = h1; 00557 h[2] = h2; 00558 h[3] = h3; 00559 h[4] = h4; 00560 h[5] = h5; 00561 h[6] = h6; 00562 h[7] = h7; 00563 h[8] = h8; 00564 h[9] = h9; 00565 } 00566 00567 00568 /* 00569 Ignores top bit of h. 00570 */ 00571 00572 void fe_frombytes(fe h,const unsigned char *s) 00573 { 00574 int64_t h0 = load_4(s); 00575 int64_t h1 = load_3(s + 4) << 6; 00576 int64_t h2 = load_3(s + 7) << 5; 00577 int64_t h3 = load_3(s + 10) << 3; 00578 int64_t h4 = load_3(s + 13) << 2; 00579 int64_t h5 = load_4(s + 16); 00580 int64_t h6 = load_3(s + 20) << 7; 00581 int64_t h7 = load_3(s + 23) << 5; 00582 int64_t h8 = load_3(s + 26) << 4; 00583 int64_t h9 = (load_3(s + 29) & 8388607) << 2; 00584 int64_t carry0; 00585 int64_t carry1; 00586 int64_t carry2; 00587 int64_t carry3; 00588 int64_t carry4; 00589 int64_t carry5; 00590 int64_t carry6; 00591 int64_t carry7; 00592 int64_t carry8; 00593 int64_t carry9; 00594 00595 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00596 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00597 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00598 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00599 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00600 00601 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00602 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00603 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00604 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00605 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00606 00607 h[0] = (int32_t)h0; 00608 h[1] = (int32_t)h1; 00609 h[2] = (int32_t)h2; 00610 h[3] = (int32_t)h3; 00611 h[4] = (int32_t)h4; 00612 h[5] = (int32_t)h5; 00613 h[6] = (int32_t)h6; 00614 h[7] = (int32_t)h7; 00615 h[8] = (int32_t)h8; 00616 h[9] = (int32_t)h9; 00617 } 00618 00619 00620 void fe_invert(fe out,const fe z) 00621 { 00622 fe t0; 00623 fe t1; 00624 fe t2; 00625 fe t3; 00626 int i; 00627 00628 /* pow225521 */ 00629 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 00630 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 00631 fe_mul(t1,z,t1); 00632 fe_mul(t0,t0,t1); 00633 fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2); 00634 fe_mul(t1,t1,t2); 00635 fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2); 00636 fe_mul(t1,t2,t1); 00637 fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2); 00638 fe_mul(t2,t2,t1); 00639 fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3); 00640 fe_mul(t2,t3,t2); 00641 fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2); 00642 fe_mul(t1,t2,t1); 00643 fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2); 00644 fe_mul(t2,t2,t1); 00645 fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3); 00646 fe_mul(t2,t3,t2); 00647 fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2); 00648 fe_mul(t1,t2,t1); 00649 fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1); 00650 fe_mul(out,t1,t0); 00651 00652 return; 00653 } 00654 00655 00656 /* 00657 h = f 00658 */ 00659 00660 void fe_copy(fe h,const fe f) 00661 { 00662 int32_t f0 = f[0]; 00663 int32_t f1 = f[1]; 00664 int32_t f2 = f[2]; 00665 int32_t f3 = f[3]; 00666 int32_t f4 = f[4]; 00667 int32_t f5 = f[5]; 00668 int32_t f6 = f[6]; 00669 int32_t f7 = f[7]; 00670 int32_t f8 = f[8]; 00671 int32_t f9 = f[9]; 00672 h[0] = f0; 00673 h[1] = f1; 00674 h[2] = f2; 00675 h[3] = f3; 00676 h[4] = f4; 00677 h[5] = f5; 00678 h[6] = f6; 00679 h[7] = f7; 00680 h[8] = f8; 00681 h[9] = f9; 00682 } 00683 00684 00685 /* 00686 h = f * g 00687 Can overlap h with f or g. 00688 00689 Preconditions: 00690 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00691 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00692 00693 Postconditions: 00694 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 00695 */ 00696 00697 /* 00698 Notes on implementation strategy: 00699 00700 Using schoolbook multiplication. 00701 Karatsuba would save a little in some cost models. 00702 00703 Most multiplications by 2 and 19 are 32-bit precomputations; 00704 cheaper than 64-bit postcomputations. 00705 00706 There is one remaining multiplication by 19 in the carry chain; 00707 one *19 precomputation can be merged into this, 00708 but the resulting data flow is considerably less clean. 00709 00710 There are 12 carries below. 00711 10 of them are 2-way parallelizable and vectorizable. 00712 Can get away with 11 carries, but then data flow is much deeper. 00713 00714 With tighter constraints on inputs can squeeze carries into int32. 00715 */ 00716 00717 void fe_mul(fe h,const fe f,const fe g) 00718 { 00719 int32_t f0 = f[0]; 00720 int32_t f1 = f[1]; 00721 int32_t f2 = f[2]; 00722 int32_t f3 = f[3]; 00723 int32_t f4 = f[4]; 00724 int32_t f5 = f[5]; 00725 int32_t f6 = f[6]; 00726 int32_t f7 = f[7]; 00727 int32_t f8 = f[8]; 00728 int32_t f9 = f[9]; 00729 int32_t g0 = g[0]; 00730 int32_t g1 = g[1]; 00731 int32_t g2 = g[2]; 00732 int32_t g3 = g[3]; 00733 int32_t g4 = g[4]; 00734 int32_t g5 = g[5]; 00735 int32_t g6 = g[6]; 00736 int32_t g7 = g[7]; 00737 int32_t g8 = g[8]; 00738 int32_t g9 = g[9]; 00739 int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ 00740 int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ 00741 int32_t g3_19 = 19 * g3; 00742 int32_t g4_19 = 19 * g4; 00743 int32_t g5_19 = 19 * g5; 00744 int32_t g6_19 = 19 * g6; 00745 int32_t g7_19 = 19 * g7; 00746 int32_t g8_19 = 19 * g8; 00747 int32_t g9_19 = 19 * g9; 00748 int32_t f1_2 = 2 * f1; 00749 int32_t f3_2 = 2 * f3; 00750 int32_t f5_2 = 2 * f5; 00751 int32_t f7_2 = 2 * f7; 00752 int32_t f9_2 = 2 * f9; 00753 int64_t f0g0 = f0 * (int64_t) g0; 00754 int64_t f0g1 = f0 * (int64_t) g1; 00755 int64_t f0g2 = f0 * (int64_t) g2; 00756 int64_t f0g3 = f0 * (int64_t) g3; 00757 int64_t f0g4 = f0 * (int64_t) g4; 00758 int64_t f0g5 = f0 * (int64_t) g5; 00759 int64_t f0g6 = f0 * (int64_t) g6; 00760 int64_t f0g7 = f0 * (int64_t) g7; 00761 int64_t f0g8 = f0 * (int64_t) g8; 00762 int64_t f0g9 = f0 * (int64_t) g9; 00763 int64_t f1g0 = f1 * (int64_t) g0; 00764 int64_t f1g1_2 = f1_2 * (int64_t) g1; 00765 int64_t f1g2 = f1 * (int64_t) g2; 00766 int64_t f1g3_2 = f1_2 * (int64_t) g3; 00767 int64_t f1g4 = f1 * (int64_t) g4; 00768 int64_t f1g5_2 = f1_2 * (int64_t) g5; 00769 int64_t f1g6 = f1 * (int64_t) g6; 00770 int64_t f1g7_2 = f1_2 * (int64_t) g7; 00771 int64_t f1g8 = f1 * (int64_t) g8; 00772 int64_t f1g9_38 = f1_2 * (int64_t) g9_19; 00773 int64_t f2g0 = f2 * (int64_t) g0; 00774 int64_t f2g1 = f2 * (int64_t) g1; 00775 int64_t f2g2 = f2 * (int64_t) g2; 00776 int64_t f2g3 = f2 * (int64_t) g3; 00777 int64_t f2g4 = f2 * (int64_t) g4; 00778 int64_t f2g5 = f2 * (int64_t) g5; 00779 int64_t f2g6 = f2 * (int64_t) g6; 00780 int64_t f2g7 = f2 * (int64_t) g7; 00781 int64_t f2g8_19 = f2 * (int64_t) g8_19; 00782 int64_t f2g9_19 = f2 * (int64_t) g9_19; 00783 int64_t f3g0 = f3 * (int64_t) g0; 00784 int64_t f3g1_2 = f3_2 * (int64_t) g1; 00785 int64_t f3g2 = f3 * (int64_t) g2; 00786 int64_t f3g3_2 = f3_2 * (int64_t) g3; 00787 int64_t f3g4 = f3 * (int64_t) g4; 00788 int64_t f3g5_2 = f3_2 * (int64_t) g5; 00789 int64_t f3g6 = f3 * (int64_t) g6; 00790 int64_t f3g7_38 = f3_2 * (int64_t) g7_19; 00791 int64_t f3g8_19 = f3 * (int64_t) g8_19; 00792 int64_t f3g9_38 = f3_2 * (int64_t) g9_19; 00793 int64_t f4g0 = f4 * (int64_t) g0; 00794 int64_t f4g1 = f4 * (int64_t) g1; 00795 int64_t f4g2 = f4 * (int64_t) g2; 00796 int64_t f4g3 = f4 * (int64_t) g3; 00797 int64_t f4g4 = f4 * (int64_t) g4; 00798 int64_t f4g5 = f4 * (int64_t) g5; 00799 int64_t f4g6_19 = f4 * (int64_t) g6_19; 00800 int64_t f4g7_19 = f4 * (int64_t) g7_19; 00801 int64_t f4g8_19 = f4 * (int64_t) g8_19; 00802 int64_t f4g9_19 = f4 * (int64_t) g9_19; 00803 int64_t f5g0 = f5 * (int64_t) g0; 00804 int64_t f5g1_2 = f5_2 * (int64_t) g1; 00805 int64_t f5g2 = f5 * (int64_t) g2; 00806 int64_t f5g3_2 = f5_2 * (int64_t) g3; 00807 int64_t f5g4 = f5 * (int64_t) g4; 00808 int64_t f5g5_38 = f5_2 * (int64_t) g5_19; 00809 int64_t f5g6_19 = f5 * (int64_t) g6_19; 00810 int64_t f5g7_38 = f5_2 * (int64_t) g7_19; 00811 int64_t f5g8_19 = f5 * (int64_t) g8_19; 00812 int64_t f5g9_38 = f5_2 * (int64_t) g9_19; 00813 int64_t f6g0 = f6 * (int64_t) g0; 00814 int64_t f6g1 = f6 * (int64_t) g1; 00815 int64_t f6g2 = f6 * (int64_t) g2; 00816 int64_t f6g3 = f6 * (int64_t) g3; 00817 int64_t f6g4_19 = f6 * (int64_t) g4_19; 00818 int64_t f6g5_19 = f6 * (int64_t) g5_19; 00819 int64_t f6g6_19 = f6 * (int64_t) g6_19; 00820 int64_t f6g7_19 = f6 * (int64_t) g7_19; 00821 int64_t f6g8_19 = f6 * (int64_t) g8_19; 00822 int64_t f6g9_19 = f6 * (int64_t) g9_19; 00823 int64_t f7g0 = f7 * (int64_t) g0; 00824 int64_t f7g1_2 = f7_2 * (int64_t) g1; 00825 int64_t f7g2 = f7 * (int64_t) g2; 00826 int64_t f7g3_38 = f7_2 * (int64_t) g3_19; 00827 int64_t f7g4_19 = f7 * (int64_t) g4_19; 00828 int64_t f7g5_38 = f7_2 * (int64_t) g5_19; 00829 int64_t f7g6_19 = f7 * (int64_t) g6_19; 00830 int64_t f7g7_38 = f7_2 * (int64_t) g7_19; 00831 int64_t f7g8_19 = f7 * (int64_t) g8_19; 00832 int64_t f7g9_38 = f7_2 * (int64_t) g9_19; 00833 int64_t f8g0 = f8 * (int64_t) g0; 00834 int64_t f8g1 = f8 * (int64_t) g1; 00835 int64_t f8g2_19 = f8 * (int64_t) g2_19; 00836 int64_t f8g3_19 = f8 * (int64_t) g3_19; 00837 int64_t f8g4_19 = f8 * (int64_t) g4_19; 00838 int64_t f8g5_19 = f8 * (int64_t) g5_19; 00839 int64_t f8g6_19 = f8 * (int64_t) g6_19; 00840 int64_t f8g7_19 = f8 * (int64_t) g7_19; 00841 int64_t f8g8_19 = f8 * (int64_t) g8_19; 00842 int64_t f8g9_19 = f8 * (int64_t) g9_19; 00843 int64_t f9g0 = f9 * (int64_t) g0; 00844 int64_t f9g1_38 = f9_2 * (int64_t) g1_19; 00845 int64_t f9g2_19 = f9 * (int64_t) g2_19; 00846 int64_t f9g3_38 = f9_2 * (int64_t) g3_19; 00847 int64_t f9g4_19 = f9 * (int64_t) g4_19; 00848 int64_t f9g5_38 = f9_2 * (int64_t) g5_19; 00849 int64_t f9g6_19 = f9 * (int64_t) g6_19; 00850 int64_t f9g7_38 = f9_2 * (int64_t) g7_19; 00851 int64_t f9g8_19 = f9 * (int64_t) g8_19; 00852 int64_t f9g9_38 = f9_2 * (int64_t) g9_19; 00853 int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; 00854 int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; 00855 int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; 00856 int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; 00857 int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; 00858 int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; 00859 int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; 00860 int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; 00861 int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; 00862 int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; 00863 int64_t carry0; 00864 int64_t carry1; 00865 int64_t carry2; 00866 int64_t carry3; 00867 int64_t carry4; 00868 int64_t carry5; 00869 int64_t carry6; 00870 int64_t carry7; 00871 int64_t carry8; 00872 int64_t carry9; 00873 00874 /* 00875 |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) 00876 i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 00877 |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) 00878 i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 00879 */ 00880 00881 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00882 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00883 /* |h0| <= 2^25 */ 00884 /* |h4| <= 2^25 */ 00885 /* |h1| <= 1.71*2^59 */ 00886 /* |h5| <= 1.71*2^59 */ 00887 00888 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00889 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00890 /* |h1| <= 2^24; from now on fits into int32 */ 00891 /* |h5| <= 2^24; from now on fits into int32 */ 00892 /* |h2| <= 1.41*2^60 */ 00893 /* |h6| <= 1.41*2^60 */ 00894 00895 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00896 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00897 /* |h2| <= 2^25; from now on fits into int32 unchanged */ 00898 /* |h6| <= 2^25; from now on fits into int32 unchanged */ 00899 /* |h3| <= 1.71*2^59 */ 00900 /* |h7| <= 1.71*2^59 */ 00901 00902 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00903 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00904 /* |h3| <= 2^24; from now on fits into int32 unchanged */ 00905 /* |h7| <= 2^24; from now on fits into int32 unchanged */ 00906 /* |h4| <= 1.72*2^34 */ 00907 /* |h8| <= 1.41*2^60 */ 00908 00909 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00910 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00911 /* |h4| <= 2^25; from now on fits into int32 unchanged */ 00912 /* |h8| <= 2^25; from now on fits into int32 unchanged */ 00913 /* |h5| <= 1.01*2^24 */ 00914 /* |h9| <= 1.71*2^59 */ 00915 00916 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00917 /* |h9| <= 2^24; from now on fits into int32 unchanged */ 00918 /* |h0| <= 1.1*2^39 */ 00919 00920 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00921 /* |h0| <= 2^25; from now on fits into int32 unchanged */ 00922 /* |h1| <= 1.01*2^24 */ 00923 00924 h[0] = (int32_t)h0; 00925 h[1] = (int32_t)h1; 00926 h[2] = (int32_t)h2; 00927 h[3] = (int32_t)h3; 00928 h[4] = (int32_t)h4; 00929 h[5] = (int32_t)h5; 00930 h[6] = (int32_t)h6; 00931 h[7] = (int32_t)h7; 00932 h[8] = (int32_t)h8; 00933 h[9] = (int32_t)h9; 00934 } 00935 00936 00937 /* 00938 Replace (f,g) with (g,f) if b == 1; 00939 replace (f,g) with (f,g) if b == 0. 00940 00941 Preconditions: b in {0,1}. 00942 */ 00943 00944 void fe_cswap(fe f,fe g,unsigned int b) 00945 { 00946 int32_t f0 = f[0]; 00947 int32_t f1 = f[1]; 00948 int32_t f2 = f[2]; 00949 int32_t f3 = f[3]; 00950 int32_t f4 = f[4]; 00951 int32_t f5 = f[5]; 00952 int32_t f6 = f[6]; 00953 int32_t f7 = f[7]; 00954 int32_t f8 = f[8]; 00955 int32_t f9 = f[9]; 00956 int32_t g0 = g[0]; 00957 int32_t g1 = g[1]; 00958 int32_t g2 = g[2]; 00959 int32_t g3 = g[3]; 00960 int32_t g4 = g[4]; 00961 int32_t g5 = g[5]; 00962 int32_t g6 = g[6]; 00963 int32_t g7 = g[7]; 00964 int32_t g8 = g[8]; 00965 int32_t g9 = g[9]; 00966 int32_t x0 = f0 ^ g0; 00967 int32_t x1 = f1 ^ g1; 00968 int32_t x2 = f2 ^ g2; 00969 int32_t x3 = f3 ^ g3; 00970 int32_t x4 = f4 ^ g4; 00971 int32_t x5 = f5 ^ g5; 00972 int32_t x6 = f6 ^ g6; 00973 int32_t x7 = f7 ^ g7; 00974 int32_t x8 = f8 ^ g8; 00975 int32_t x9 = f9 ^ g9; 00976 b = -b; 00977 x0 &= b; 00978 x1 &= b; 00979 x2 &= b; 00980 x3 &= b; 00981 x4 &= b; 00982 x5 &= b; 00983 x6 &= b; 00984 x7 &= b; 00985 x8 &= b; 00986 x9 &= b; 00987 f[0] = f0 ^ x0; 00988 f[1] = f1 ^ x1; 00989 f[2] = f2 ^ x2; 00990 f[3] = f3 ^ x3; 00991 f[4] = f4 ^ x4; 00992 f[5] = f5 ^ x5; 00993 f[6] = f6 ^ x6; 00994 f[7] = f7 ^ x7; 00995 f[8] = f8 ^ x8; 00996 f[9] = f9 ^ x9; 00997 g[0] = g0 ^ x0; 00998 g[1] = g1 ^ x1; 00999 g[2] = g2 ^ x2; 01000 g[3] = g3 ^ x3; 01001 g[4] = g4 ^ x4; 01002 g[5] = g5 ^ x5; 01003 g[6] = g6 ^ x6; 01004 g[7] = g7 ^ x7; 01005 g[8] = g8 ^ x8; 01006 g[9] = g9 ^ x9; 01007 } 01008 01009 01010 /* 01011 h = f * 121666 01012 Can overlap h with f. 01013 01014 Preconditions: 01015 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01016 01017 Postconditions: 01018 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01019 */ 01020 01021 void fe_mul121666(fe h,fe f) 01022 { 01023 int32_t f0 = f[0]; 01024 int32_t f1 = f[1]; 01025 int32_t f2 = f[2]; 01026 int32_t f3 = f[3]; 01027 int32_t f4 = f[4]; 01028 int32_t f5 = f[5]; 01029 int32_t f6 = f[6]; 01030 int32_t f7 = f[7]; 01031 int32_t f8 = f[8]; 01032 int32_t f9 = f[9]; 01033 int64_t h0 = f0 * (int64_t) 121666; 01034 int64_t h1 = f1 * (int64_t) 121666; 01035 int64_t h2 = f2 * (int64_t) 121666; 01036 int64_t h3 = f3 * (int64_t) 121666; 01037 int64_t h4 = f4 * (int64_t) 121666; 01038 int64_t h5 = f5 * (int64_t) 121666; 01039 int64_t h6 = f6 * (int64_t) 121666; 01040 int64_t h7 = f7 * (int64_t) 121666; 01041 int64_t h8 = f8 * (int64_t) 121666; 01042 int64_t h9 = f9 * (int64_t) 121666; 01043 int64_t carry0; 01044 int64_t carry1; 01045 int64_t carry2; 01046 int64_t carry3; 01047 int64_t carry4; 01048 int64_t carry5; 01049 int64_t carry6; 01050 int64_t carry7; 01051 int64_t carry8; 01052 int64_t carry9; 01053 01054 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 01055 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 01056 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 01057 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 01058 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 01059 01060 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01061 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 01062 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01063 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 01064 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 01065 01066 h[0] = (int32_t)h0; 01067 h[1] = (int32_t)h1; 01068 h[2] = (int32_t)h2; 01069 h[3] = (int32_t)h3; 01070 h[4] = (int32_t)h4; 01071 h[5] = (int32_t)h5; 01072 h[6] = (int32_t)h6; 01073 h[7] = (int32_t)h7; 01074 h[8] = (int32_t)h8; 01075 h[9] = (int32_t)h9; 01076 } 01077 01078 01079 /* 01080 h = 2 * f * f 01081 Can overlap h with f. 01082 01083 Preconditions: 01084 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 01085 01086 Postconditions: 01087 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 01088 */ 01089 01090 /* 01091 See fe_mul.c for discussion of implementation strategy. 01092 */ 01093 01094 void fe_sq2(fe h,const fe f) 01095 { 01096 int32_t f0 = f[0]; 01097 int32_t f1 = f[1]; 01098 int32_t f2 = f[2]; 01099 int32_t f3 = f[3]; 01100 int32_t f4 = f[4]; 01101 int32_t f5 = f[5]; 01102 int32_t f6 = f[6]; 01103 int32_t f7 = f[7]; 01104 int32_t f8 = f[8]; 01105 int32_t f9 = f[9]; 01106 int32_t f0_2 = 2 * f0; 01107 int32_t f1_2 = 2 * f1; 01108 int32_t f2_2 = 2 * f2; 01109 int32_t f3_2 = 2 * f3; 01110 int32_t f4_2 = 2 * f4; 01111 int32_t f5_2 = 2 * f5; 01112 int32_t f6_2 = 2 * f6; 01113 int32_t f7_2 = 2 * f7; 01114 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ 01115 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ 01116 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ 01117 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ 01118 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ 01119 int64_t f0f0 = f0 * (int64_t) f0; 01120 int64_t f0f1_2 = f0_2 * (int64_t) f1; 01121 int64_t f0f2_2 = f0_2 * (int64_t) f2; 01122 int64_t f0f3_2 = f0_2 * (int64_t) f3; 01123 int64_t f0f4_2 = f0_2 * (int64_t) f4; 01124 int64_t f0f5_2 = f0_2 * (int64_t) f5; 01125 int64_t f0f6_2 = f0_2 * (int64_t) f6; 01126 int64_t f0f7_2 = f0_2 * (int64_t) f7; 01127 int64_t f0f8_2 = f0_2 * (int64_t) f8; 01128 int64_t f0f9_2 = f0_2 * (int64_t) f9; 01129 int64_t f1f1_2 = f1_2 * (int64_t) f1; 01130 int64_t f1f2_2 = f1_2 * (int64_t) f2; 01131 int64_t f1f3_4 = f1_2 * (int64_t) f3_2; 01132 int64_t f1f4_2 = f1_2 * (int64_t) f4; 01133 int64_t f1f5_4 = f1_2 * (int64_t) f5_2; 01134 int64_t f1f6_2 = f1_2 * (int64_t) f6; 01135 int64_t f1f7_4 = f1_2 * (int64_t) f7_2; 01136 int64_t f1f8_2 = f1_2 * (int64_t) f8; 01137 int64_t f1f9_76 = f1_2 * (int64_t) f9_38; 01138 int64_t f2f2 = f2 * (int64_t) f2; 01139 int64_t f2f3_2 = f2_2 * (int64_t) f3; 01140 int64_t f2f4_2 = f2_2 * (int64_t) f4; 01141 int64_t f2f5_2 = f2_2 * (int64_t) f5; 01142 int64_t f2f6_2 = f2_2 * (int64_t) f6; 01143 int64_t f2f7_2 = f2_2 * (int64_t) f7; 01144 int64_t f2f8_38 = f2_2 * (int64_t) f8_19; 01145 int64_t f2f9_38 = f2 * (int64_t) f9_38; 01146 int64_t f3f3_2 = f3_2 * (int64_t) f3; 01147 int64_t f3f4_2 = f3_2 * (int64_t) f4; 01148 int64_t f3f5_4 = f3_2 * (int64_t) f5_2; 01149 int64_t f3f6_2 = f3_2 * (int64_t) f6; 01150 int64_t f3f7_76 = f3_2 * (int64_t) f7_38; 01151 int64_t f3f8_38 = f3_2 * (int64_t) f8_19; 01152 int64_t f3f9_76 = f3_2 * (int64_t) f9_38; 01153 int64_t f4f4 = f4 * (int64_t) f4; 01154 int64_t f4f5_2 = f4_2 * (int64_t) f5; 01155 int64_t f4f6_38 = f4_2 * (int64_t) f6_19; 01156 int64_t f4f7_38 = f4 * (int64_t) f7_38; 01157 int64_t f4f8_38 = f4_2 * (int64_t) f8_19; 01158 int64_t f4f9_38 = f4 * (int64_t) f9_38; 01159 int64_t f5f5_38 = f5 * (int64_t) f5_38; 01160 int64_t f5f6_38 = f5_2 * (int64_t) f6_19; 01161 int64_t f5f7_76 = f5_2 * (int64_t) f7_38; 01162 int64_t f5f8_38 = f5_2 * (int64_t) f8_19; 01163 int64_t f5f9_76 = f5_2 * (int64_t) f9_38; 01164 int64_t f6f6_19 = f6 * (int64_t) f6_19; 01165 int64_t f6f7_38 = f6 * (int64_t) f7_38; 01166 int64_t f6f8_38 = f6_2 * (int64_t) f8_19; 01167 int64_t f6f9_38 = f6 * (int64_t) f9_38; 01168 int64_t f7f7_38 = f7 * (int64_t) f7_38; 01169 int64_t f7f8_38 = f7_2 * (int64_t) f8_19; 01170 int64_t f7f9_76 = f7_2 * (int64_t) f9_38; 01171 int64_t f8f8_19 = f8 * (int64_t) f8_19; 01172 int64_t f8f9_38 = f8 * (int64_t) f9_38; 01173 int64_t f9f9_38 = f9 * (int64_t) f9_38; 01174 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 01175 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 01176 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 01177 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 01178 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 01179 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 01180 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 01181 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 01182 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 01183 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 01184 int64_t carry0; 01185 int64_t carry1; 01186 int64_t carry2; 01187 int64_t carry3; 01188 int64_t carry4; 01189 int64_t carry5; 01190 int64_t carry6; 01191 int64_t carry7; 01192 int64_t carry8; 01193 int64_t carry9; 01194 01195 h0 += h0; 01196 h1 += h1; 01197 h2 += h2; 01198 h3 += h3; 01199 h4 += h4; 01200 h5 += h5; 01201 h6 += h6; 01202 h7 += h7; 01203 h8 += h8; 01204 h9 += h9; 01205 01206 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01207 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01208 01209 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 01210 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 01211 01212 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 01213 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 01214 01215 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 01216 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 01217 01218 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01219 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 01220 01221 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 01222 01223 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01224 01225 h[0] = (int32_t)h0; 01226 h[1] = (int32_t)h1; 01227 h[2] = (int32_t)h2; 01228 h[3] = (int32_t)h3; 01229 h[4] = (int32_t)h4; 01230 h[5] = (int32_t)h5; 01231 h[6] = (int32_t)h6; 01232 h[7] = (int32_t)h7; 01233 h[8] = (int32_t)h8; 01234 h[9] = (int32_t)h9; 01235 } 01236 01237 01238 void fe_pow22523(fe out,const fe z) 01239 { 01240 fe t0; 01241 fe t1; 01242 fe t2; 01243 int i; 01244 01245 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 01246 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 01247 fe_mul(t1,z,t1); 01248 fe_mul(t0,t0,t1); 01249 fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0); 01250 fe_mul(t0,t1,t0); 01251 fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1); 01252 fe_mul(t0,t1,t0); 01253 fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1); 01254 fe_mul(t1,t1,t0); 01255 fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2); 01256 fe_mul(t1,t2,t1); 01257 fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1); 01258 fe_mul(t0,t1,t0); 01259 fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1); 01260 fe_mul(t1,t1,t0); 01261 fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2); 01262 fe_mul(t1,t2,t1); 01263 fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1); 01264 fe_mul(t0,t1,t0); 01265 fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0); 01266 fe_mul(out,t0,z); 01267 01268 return; 01269 } 01270 01271 01272 /* 01273 h = -f 01274 01275 Preconditions: 01276 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01277 01278 Postconditions: 01279 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01280 */ 01281 01282 void fe_neg(fe h,const fe f) 01283 { 01284 int32_t f0 = f[0]; 01285 int32_t f1 = f[1]; 01286 int32_t f2 = f[2]; 01287 int32_t f3 = f[3]; 01288 int32_t f4 = f[4]; 01289 int32_t f5 = f[5]; 01290 int32_t f6 = f[6]; 01291 int32_t f7 = f[7]; 01292 int32_t f8 = f[8]; 01293 int32_t f9 = f[9]; 01294 int32_t h0 = -f0; 01295 int32_t h1 = -f1; 01296 int32_t h2 = -f2; 01297 int32_t h3 = -f3; 01298 int32_t h4 = -f4; 01299 int32_t h5 = -f5; 01300 int32_t h6 = -f6; 01301 int32_t h7 = -f7; 01302 int32_t h8 = -f8; 01303 int32_t h9 = -f9; 01304 h[0] = h0; 01305 h[1] = h1; 01306 h[2] = h2; 01307 h[3] = h3; 01308 h[4] = h4; 01309 h[5] = h5; 01310 h[6] = h6; 01311 h[7] = h7; 01312 h[8] = h8; 01313 h[9] = h9; 01314 } 01315 01316 01317 /* 01318 Preconditions: 01319 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01320 */ 01321 01322 static const unsigned char zero[32] = {0}; 01323 01324 int fe_isnonzero(const fe f) 01325 { 01326 unsigned char s[32]; 01327 fe_tobytes(s,f); 01328 return ConstantCompare(s,zero,32); 01329 } 01330 01331 01332 /* 01333 return 1 if f is in {1,3,5,...,q-2} 01334 return 0 if f is in {0,2,4,...,q-1} 01335 01336 Preconditions: 01337 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01338 */ 01339 01340 int fe_isnegative(const fe f) 01341 { 01342 unsigned char s[32]; 01343 fe_tobytes(s,f); 01344 return s[0] & 1; 01345 } 01346 01347 01348 /* 01349 Replace (f,g) with (g,g) if b == 1; 01350 replace (f,g) with (f,g) if b == 0. 01351 01352 Preconditions: b in {0,1}. 01353 */ 01354 01355 void fe_cmov(fe f,const fe g,unsigned int b) 01356 { 01357 int32_t f0 = f[0]; 01358 int32_t f1 = f[1]; 01359 int32_t f2 = f[2]; 01360 int32_t f3 = f[3]; 01361 int32_t f4 = f[4]; 01362 int32_t f5 = f[5]; 01363 int32_t f6 = f[6]; 01364 int32_t f7 = f[7]; 01365 int32_t f8 = f[8]; 01366 int32_t f9 = f[9]; 01367 int32_t g0 = g[0]; 01368 int32_t g1 = g[1]; 01369 int32_t g2 = g[2]; 01370 int32_t g3 = g[3]; 01371 int32_t g4 = g[4]; 01372 int32_t g5 = g[5]; 01373 int32_t g6 = g[6]; 01374 int32_t g7 = g[7]; 01375 int32_t g8 = g[8]; 01376 int32_t g9 = g[9]; 01377 int32_t x0 = f0 ^ g0; 01378 int32_t x1 = f1 ^ g1; 01379 int32_t x2 = f2 ^ g2; 01380 int32_t x3 = f3 ^ g3; 01381 int32_t x4 = f4 ^ g4; 01382 int32_t x5 = f5 ^ g5; 01383 int32_t x6 = f6 ^ g6; 01384 int32_t x7 = f7 ^ g7; 01385 int32_t x8 = f8 ^ g8; 01386 int32_t x9 = f9 ^ g9; 01387 b = -b; 01388 x0 &= b; 01389 x1 &= b; 01390 x2 &= b; 01391 x3 &= b; 01392 x4 &= b; 01393 x5 &= b; 01394 x6 &= b; 01395 x7 &= b; 01396 x8 &= b; 01397 x9 &= b; 01398 f[0] = f0 ^ x0; 01399 f[1] = f1 ^ x1; 01400 f[2] = f2 ^ x2; 01401 f[3] = f3 ^ x3; 01402 f[4] = f4 ^ x4; 01403 f[5] = f5 ^ x5; 01404 f[6] = f6 ^ x6; 01405 f[7] = f7 ^ x7; 01406 f[8] = f8 ^ x8; 01407 f[9] = f9 ^ x9; 01408 } 01409 #endif /* HAVE ED25519 or CURVE25519 */ 01410 #endif /* not defined CURVED25519_SMALL */ 01411 01412
Generated on Tue Jul 12 2022 15:55:18 by
1.7.2