Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
fe_operations.c
00001 /* fe_operations.c 00002 * 00003 * Copyright (C) 2006-2017 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */ 00024 00025 #ifdef HAVE_CONFIG_H 00026 #include <config.h> 00027 #endif 00028 00029 #include <wolfcrypt/settings.h> 00030 00031 #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) 00032 #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) /* run when not defined to use small memory math */ 00033 00034 #include <wolfcrypt/fe_operations.h> 00035 #include <stdint.h> 00036 00037 #ifdef NO_INLINE 00038 #include <wolfcrypt/misc.h> 00039 #else 00040 #define WOLFSSL_MISC_INCLUDED 00041 #include <wolfcrypt/src/misc.c> 00042 #endif 00043 00044 #ifdef CURVED25519_X64 00045 #include "fe_x25519_x64.i" 00046 #elif defined(CURVED25519_128BIT) 00047 #include "fe_x25519_128.i" 00048 #else 00049 00050 #if defined(HAVE_CURVE25519) || \ 00051 (defined(HAVE_ED25519) && !defined(ED25519_SMALL)) 00052 /* 00053 fe means field element. 00054 Here the field is \Z/(2^255-19). 00055 An element t, entries t[0]...t[9], represents the integer 00056 t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 00057 Bounds on each t[i] vary depending on context. 00058 */ 00059 00060 uint64_t load_3(const unsigned char *in) 00061 { 00062 uint64_t result; 00063 result = (uint64_t) in[0]; 00064 result |= ((uint64_t) in[1]) << 8; 00065 result |= ((uint64_t) in[2]) << 16; 00066 return result; 00067 } 00068 00069 00070 uint64_t load_4(const unsigned char *in) 00071 { 00072 uint64_t result; 00073 result = (uint64_t) in[0]; 00074 result |= ((uint64_t) in[1]) << 8; 00075 result |= ((uint64_t) in[2]) << 16; 00076 result |= ((uint64_t) in[3]) << 24; 00077 return result; 00078 } 00079 #endif 00080 00081 /* 00082 h = 1 00083 */ 00084 00085 void fe_1(fe h) 00086 { 00087 h[0] = 1; 00088 h[1] = 0; 00089 h[2] = 0; 00090 h[3] = 0; 00091 h[4] = 0; 00092 h[5] = 0; 00093 h[6] = 0; 00094 h[7] = 0; 00095 h[8] = 0; 00096 h[9] = 0; 00097 } 00098 00099 00100 /* 00101 h = 0 00102 */ 00103 00104 void fe_0(fe h) 00105 { 00106 h[0] = 0; 00107 h[1] = 0; 00108 h[2] = 0; 00109 h[3] = 0; 00110 h[4] = 0; 00111 h[5] = 0; 00112 h[6] = 0; 00113 h[7] = 0; 00114 h[8] = 0; 00115 h[9] = 0; 00116 } 00117 00118 00119 #if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \ 00120 (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \ 00121 !defined(FREESCALE_LTC_ECC) 00122 /* to be Complementary to fe_low_mem.c */ 00123 void fe_init() 00124 { 00125 } 00126 #endif 00127 00128 #if defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL) && \ 00129 !defined(FREESCALE_LTC_ECC) 00130 int curve25519(byte* q, byte* n, byte* p) 00131 { 00132 #if 0 00133 unsigned char e[32]; 00134 #endif 00135 fe x1; 00136 fe x2; 00137 fe z2; 00138 fe x3; 00139 fe z3; 00140 fe tmp0; 00141 fe tmp1; 00142 int pos; 00143 unsigned int swap; 00144 unsigned int b; 00145 00146 /* Clamp already done during key generation and import */ 00147 #if 0 00148 { 00149 unsigned int i; 00150 for (i = 0;i < 32;++i) e[i] = n[i]; 00151 e[0] &= 248; 00152 e[31] &= 127; 00153 e[31] |= 64; 00154 } 00155 #endif 00156 00157 fe_frombytes(x1,p); 00158 fe_1(x2); 00159 fe_0(z2); 00160 fe_copy(x3,x1); 00161 fe_1(z3); 00162 00163 swap = 0; 00164 for (pos = 254;pos >= 0;--pos) { 00165 #if 0 00166 b = e[pos / 8] >> (pos & 7); 00167 #else 00168 b = n[pos / 8] >> (pos & 7); 00169 #endif 00170 b &= 1; 00171 swap ^= b; 00172 fe_cswap(x2,x3,swap); 00173 fe_cswap(z2,z3,swap); 00174 swap = b; 00175 00176 /* montgomery */ 00177 fe_sub(tmp0,x3,z3); 00178 fe_sub(tmp1,x2,z2); 00179 fe_add(x2,x2,z2); 00180 fe_add(z2,x3,z3); 00181 fe_mul(z3,tmp0,x2); 00182 fe_mul(z2,z2,tmp1); 00183 fe_sq(tmp0,tmp1); 00184 fe_sq(tmp1,x2); 00185 fe_add(x3,z3,z2); 00186 fe_sub(z2,z3,z2); 00187 fe_mul(x2,tmp1,tmp0); 00188 fe_sub(tmp1,tmp1,tmp0); 00189 fe_sq(z2,z2); 00190 fe_mul121666(z3,tmp1); 00191 fe_sq(x3,x3); 00192 fe_add(tmp0,tmp0,z3); 00193 fe_mul(z3,x1,z2); 00194 fe_mul(z2,tmp1,tmp0); 00195 } 00196 fe_cswap(x2,x3,swap); 00197 fe_cswap(z2,z3,swap); 00198 00199 fe_invert(z2,z2); 00200 fe_mul(x2,x2,z2); 00201 fe_tobytes(q,x2); 00202 00203 return 0; 00204 } 00205 #endif /* HAVE_CURVE25519 && !CURVE25519_SMALL && !FREESCALE_LTC_ECC */ 00206 00207 00208 /* 00209 h = f * f 00210 Can overlap h with f. 00211 00212 Preconditions: 00213 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00214 00215 Postconditions: 00216 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 00217 */ 00218 00219 /* 00220 See fe_mul.c for discussion of implementation strategy. 00221 */ 00222 00223 void fe_sq(fe h,const fe f) 00224 { 00225 int32_t f0 = f[0]; 00226 int32_t f1 = f[1]; 00227 int32_t f2 = f[2]; 00228 int32_t f3 = f[3]; 00229 int32_t f4 = f[4]; 00230 int32_t f5 = f[5]; 00231 int32_t f6 = f[6]; 00232 int32_t f7 = f[7]; 00233 int32_t f8 = f[8]; 00234 int32_t f9 = f[9]; 00235 int32_t f0_2 = 2 * f0; 00236 int32_t f1_2 = 2 * f1; 00237 int32_t f2_2 = 2 * f2; 00238 int32_t f3_2 = 2 * f3; 00239 int32_t f4_2 = 2 * f4; 00240 int32_t f5_2 = 2 * f5; 00241 int32_t f6_2 = 2 * f6; 00242 int32_t f7_2 = 2 * f7; 00243 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ 00244 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ 00245 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ 00246 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ 00247 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ 00248 int64_t f0f0 = f0 * (int64_t) f0; 00249 int64_t f0f1_2 = f0_2 * (int64_t) f1; 00250 int64_t f0f2_2 = f0_2 * (int64_t) f2; 00251 int64_t f0f3_2 = f0_2 * (int64_t) f3; 00252 int64_t f0f4_2 = f0_2 * (int64_t) f4; 00253 int64_t f0f5_2 = f0_2 * (int64_t) f5; 00254 int64_t f0f6_2 = f0_2 * (int64_t) f6; 00255 int64_t f0f7_2 = f0_2 * (int64_t) f7; 00256 int64_t f0f8_2 = f0_2 * (int64_t) f8; 00257 int64_t f0f9_2 = f0_2 * (int64_t) f9; 00258 int64_t f1f1_2 = f1_2 * (int64_t) f1; 00259 int64_t f1f2_2 = f1_2 * (int64_t) f2; 00260 int64_t f1f3_4 = f1_2 * (int64_t) f3_2; 00261 int64_t f1f4_2 = f1_2 * (int64_t) f4; 00262 int64_t f1f5_4 = f1_2 * (int64_t) f5_2; 00263 int64_t f1f6_2 = f1_2 * (int64_t) f6; 00264 int64_t f1f7_4 = f1_2 * (int64_t) f7_2; 00265 int64_t f1f8_2 = f1_2 * (int64_t) f8; 00266 int64_t f1f9_76 = f1_2 * (int64_t) f9_38; 00267 int64_t f2f2 = f2 * (int64_t) f2; 00268 int64_t f2f3_2 = f2_2 * (int64_t) f3; 00269 int64_t f2f4_2 = f2_2 * (int64_t) f4; 00270 int64_t f2f5_2 = f2_2 * (int64_t) f5; 00271 int64_t f2f6_2 = f2_2 * (int64_t) f6; 00272 int64_t f2f7_2 = f2_2 * (int64_t) f7; 00273 int64_t f2f8_38 = f2_2 * (int64_t) f8_19; 00274 int64_t f2f9_38 = f2 * (int64_t) f9_38; 00275 int64_t f3f3_2 = f3_2 * (int64_t) f3; 00276 int64_t f3f4_2 = f3_2 * (int64_t) f4; 00277 int64_t f3f5_4 = f3_2 * (int64_t) f5_2; 00278 int64_t f3f6_2 = f3_2 * (int64_t) f6; 00279 int64_t f3f7_76 = f3_2 * (int64_t) f7_38; 00280 int64_t f3f8_38 = f3_2 * (int64_t) f8_19; 00281 int64_t f3f9_76 = f3_2 * (int64_t) f9_38; 00282 int64_t f4f4 = f4 * (int64_t) f4; 00283 int64_t f4f5_2 = f4_2 * (int64_t) f5; 00284 int64_t f4f6_38 = f4_2 * (int64_t) f6_19; 00285 int64_t f4f7_38 = f4 * (int64_t) f7_38; 00286 int64_t f4f8_38 = f4_2 * (int64_t) f8_19; 00287 int64_t f4f9_38 = f4 * (int64_t) f9_38; 00288 int64_t f5f5_38 = f5 * (int64_t) f5_38; 00289 int64_t f5f6_38 = f5_2 * (int64_t) f6_19; 00290 int64_t f5f7_76 = f5_2 * (int64_t) f7_38; 00291 int64_t f5f8_38 = f5_2 * (int64_t) f8_19; 00292 int64_t f5f9_76 = f5_2 * (int64_t) f9_38; 00293 int64_t f6f6_19 = f6 * (int64_t) f6_19; 00294 int64_t f6f7_38 = f6 * (int64_t) f7_38; 00295 int64_t f6f8_38 = f6_2 * (int64_t) f8_19; 00296 int64_t f6f9_38 = f6 * (int64_t) f9_38; 00297 int64_t f7f7_38 = f7 * (int64_t) f7_38; 00298 int64_t f7f8_38 = f7_2 * (int64_t) f8_19; 00299 int64_t f7f9_76 = f7_2 * (int64_t) f9_38; 00300 int64_t f8f8_19 = f8 * (int64_t) f8_19; 00301 int64_t f8f9_38 = f8 * (int64_t) f9_38; 00302 int64_t f9f9_38 = f9 * (int64_t) f9_38; 00303 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 00304 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 00305 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 00306 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 00307 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 00308 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 00309 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 00310 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 00311 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 00312 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 00313 int64_t carry0; 00314 int64_t carry1; 00315 int64_t carry2; 00316 int64_t carry3; 00317 int64_t carry4; 00318 int64_t carry5; 00319 int64_t carry6; 00320 int64_t carry7; 00321 int64_t carry8; 00322 int64_t carry9; 00323 00324 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00325 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00326 00327 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00328 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00329 00330 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00331 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00332 00333 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00334 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00335 00336 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00337 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00338 00339 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00340 00341 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00342 00343 h[0] = (int32_t)h0; 00344 h[1] = (int32_t)h1; 00345 h[2] = (int32_t)h2; 00346 h[3] = (int32_t)h3; 00347 h[4] = (int32_t)h4; 00348 h[5] = (int32_t)h5; 00349 h[6] = (int32_t)h6; 00350 h[7] = (int32_t)h7; 00351 h[8] = (int32_t)h8; 00352 h[9] = (int32_t)h9; 00353 } 00354 00355 00356 /* 00357 h = f + g 00358 Can overlap h with f or g. 00359 00360 Preconditions: 00361 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00362 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00363 00364 Postconditions: 00365 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00366 */ 00367 00368 void fe_add(fe h,const fe f,const fe g) 00369 { 00370 int32_t f0 = f[0]; 00371 int32_t f1 = f[1]; 00372 int32_t f2 = f[2]; 00373 int32_t f3 = f[3]; 00374 int32_t f4 = f[4]; 00375 int32_t f5 = f[5]; 00376 int32_t f6 = f[6]; 00377 int32_t f7 = f[7]; 00378 int32_t f8 = f[8]; 00379 int32_t f9 = f[9]; 00380 int32_t g0 = g[0]; 00381 int32_t g1 = g[1]; 00382 int32_t g2 = g[2]; 00383 int32_t g3 = g[3]; 00384 int32_t g4 = g[4]; 00385 int32_t g5 = g[5]; 00386 int32_t g6 = g[6]; 00387 int32_t g7 = g[7]; 00388 int32_t g8 = g[8]; 00389 int32_t g9 = g[9]; 00390 int32_t h0 = f0 + g0; 00391 int32_t h1 = f1 + g1; 00392 int32_t h2 = f2 + g2; 00393 int32_t h3 = f3 + g3; 00394 int32_t h4 = f4 + g4; 00395 int32_t h5 = f5 + g5; 00396 int32_t h6 = f6 + g6; 00397 int32_t h7 = f7 + g7; 00398 int32_t h8 = f8 + g8; 00399 int32_t h9 = f9 + g9; 00400 h[0] = h0; 00401 h[1] = h1; 00402 h[2] = h2; 00403 h[3] = h3; 00404 h[4] = h4; 00405 h[5] = h5; 00406 h[6] = h6; 00407 h[7] = h7; 00408 h[8] = h8; 00409 h[9] = h9; 00410 } 00411 00412 00413 /* 00414 Preconditions: 00415 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00416 00417 Write p=2^255-19; q=floor(h/p). 00418 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). 00419 00420 Proof: 00421 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. 00422 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. 00423 00424 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). 00425 Then 0<y<1. 00426 00427 Write r=h-pq. 00428 Have 0<=r<=p-1=2^255-20. 00429 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. 00430 00431 Write x=r+19(2^-255)r+y. 00432 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. 00433 00434 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) 00435 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. 00436 */ 00437 00438 void fe_tobytes(unsigned char *s,const fe h) 00439 { 00440 int32_t h0 = h[0]; 00441 int32_t h1 = h[1]; 00442 int32_t h2 = h[2]; 00443 int32_t h3 = h[3]; 00444 int32_t h4 = h[4]; 00445 int32_t h5 = h[5]; 00446 int32_t h6 = h[6]; 00447 int32_t h7 = h[7]; 00448 int32_t h8 = h[8]; 00449 int32_t h9 = h[9]; 00450 int32_t q; 00451 int32_t carry0; 00452 int32_t carry1; 00453 int32_t carry2; 00454 int32_t carry3; 00455 int32_t carry4; 00456 int32_t carry5; 00457 int32_t carry6; 00458 int32_t carry7; 00459 int32_t carry8; 00460 int32_t carry9; 00461 00462 q = (19 * h9 + (((int32_t) 1) << 24)) >> 25; 00463 q = (h0 + q) >> 26; 00464 q = (h1 + q) >> 25; 00465 q = (h2 + q) >> 26; 00466 q = (h3 + q) >> 25; 00467 q = (h4 + q) >> 26; 00468 q = (h5 + q) >> 25; 00469 q = (h6 + q) >> 26; 00470 q = (h7 + q) >> 25; 00471 q = (h8 + q) >> 26; 00472 q = (h9 + q) >> 25; 00473 00474 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ 00475 h0 += 19 * q; 00476 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ 00477 00478 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; 00479 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; 00480 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; 00481 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; 00482 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; 00483 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; 00484 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; 00485 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; 00486 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; 00487 carry9 = h9 >> 25; h9 -= carry9 << 25; 00488 /* h10 = carry9 */ 00489 00490 /* 00491 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. 00492 Have h0+...+2^230 h9 between 0 and 2^255-1; 00493 evidently 2^255 h10-2^255 q = 0. 00494 Goal: Output h0+...+2^230 h9. 00495 */ 00496 00497 s[0] = (byte)(h0 >> 0); 00498 s[1] = (byte)(h0 >> 8); 00499 s[2] = (byte)(h0 >> 16); 00500 s[3] = (byte)((h0 >> 24) | (h1 << 2)); 00501 s[4] = (byte)(h1 >> 6); 00502 s[5] = (byte)(h1 >> 14); 00503 s[6] = (byte)((h1 >> 22) | (h2 << 3)); 00504 s[7] = (byte)(h2 >> 5); 00505 s[8] = (byte)(h2 >> 13); 00506 s[9] = (byte)((h2 >> 21) | (h3 << 5)); 00507 s[10] = (byte)(h3 >> 3); 00508 s[11] = (byte)(h3 >> 11); 00509 s[12] = (byte)((h3 >> 19) | (h4 << 6)); 00510 s[13] = (byte)(h4 >> 2); 00511 s[14] = (byte)(h4 >> 10); 00512 s[15] = (byte)(h4 >> 18); 00513 s[16] = (byte)(h5 >> 0); 00514 s[17] = (byte)(h5 >> 8); 00515 s[18] = (byte)(h5 >> 16); 00516 s[19] = (byte)((h5 >> 24) | (h6 << 1)); 00517 s[20] = (byte)(h6 >> 7); 00518 s[21] = (byte)(h6 >> 15); 00519 s[22] = (byte)((h6 >> 23) | (h7 << 3)); 00520 s[23] = (byte)(h7 >> 5); 00521 s[24] = (byte)(h7 >> 13); 00522 s[25] = (byte)((h7 >> 21) | (h8 << 4)); 00523 s[26] = (byte)(h8 >> 4); 00524 s[27] = (byte)(h8 >> 12); 00525 s[28] = (byte)((h8 >> 20) | (h9 << 6)); 00526 s[29] = (byte)(h9 >> 2); 00527 s[30] = (byte)(h9 >> 10); 00528 s[31] = (byte)(h9 >> 18); 00529 } 00530 00531 00532 /* 00533 h = f - g 00534 Can overlap h with f or g. 00535 00536 Preconditions: 00537 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00538 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 00539 00540 Postconditions: 00541 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 00542 */ 00543 00544 void fe_sub(fe h,const fe f,const fe g) 00545 { 00546 int32_t f0 = f[0]; 00547 int32_t f1 = f[1]; 00548 int32_t f2 = f[2]; 00549 int32_t f3 = f[3]; 00550 int32_t f4 = f[4]; 00551 int32_t f5 = f[5]; 00552 int32_t f6 = f[6]; 00553 int32_t f7 = f[7]; 00554 int32_t f8 = f[8]; 00555 int32_t f9 = f[9]; 00556 int32_t g0 = g[0]; 00557 int32_t g1 = g[1]; 00558 int32_t g2 = g[2]; 00559 int32_t g3 = g[3]; 00560 int32_t g4 = g[4]; 00561 int32_t g5 = g[5]; 00562 int32_t g6 = g[6]; 00563 int32_t g7 = g[7]; 00564 int32_t g8 = g[8]; 00565 int32_t g9 = g[9]; 00566 int32_t h0 = f0 - g0; 00567 int32_t h1 = f1 - g1; 00568 int32_t h2 = f2 - g2; 00569 int32_t h3 = f3 - g3; 00570 int32_t h4 = f4 - g4; 00571 int32_t h5 = f5 - g5; 00572 int32_t h6 = f6 - g6; 00573 int32_t h7 = f7 - g7; 00574 int32_t h8 = f8 - g8; 00575 int32_t h9 = f9 - g9; 00576 h[0] = h0; 00577 h[1] = h1; 00578 h[2] = h2; 00579 h[3] = h3; 00580 h[4] = h4; 00581 h[5] = h5; 00582 h[6] = h6; 00583 h[7] = h7; 00584 h[8] = h8; 00585 h[9] = h9; 00586 } 00587 00588 00589 #if defined(HAVE_CURVE25519) || \ 00590 (defined(HAVE_ED25519) && !defined(ED25519_SMALL)) 00591 /* 00592 Ignores top bit of h. 00593 */ 00594 00595 void fe_frombytes(fe h,const unsigned char *s) 00596 { 00597 int64_t h0 = load_4(s); 00598 int64_t h1 = load_3(s + 4) << 6; 00599 int64_t h2 = load_3(s + 7) << 5; 00600 int64_t h3 = load_3(s + 10) << 3; 00601 int64_t h4 = load_3(s + 13) << 2; 00602 int64_t h5 = load_4(s + 16); 00603 int64_t h6 = load_3(s + 20) << 7; 00604 int64_t h7 = load_3(s + 23) << 5; 00605 int64_t h8 = load_3(s + 26) << 4; 00606 int64_t h9 = (load_3(s + 29) & 8388607) << 2; 00607 int64_t carry0; 00608 int64_t carry1; 00609 int64_t carry2; 00610 int64_t carry3; 00611 int64_t carry4; 00612 int64_t carry5; 00613 int64_t carry6; 00614 int64_t carry7; 00615 int64_t carry8; 00616 int64_t carry9; 00617 00618 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00619 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00620 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00621 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00622 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00623 00624 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00625 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00626 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00627 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00628 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00629 00630 h[0] = (int32_t)h0; 00631 h[1] = (int32_t)h1; 00632 h[2] = (int32_t)h2; 00633 h[3] = (int32_t)h3; 00634 h[4] = (int32_t)h4; 00635 h[5] = (int32_t)h5; 00636 h[6] = (int32_t)h6; 00637 h[7] = (int32_t)h7; 00638 h[8] = (int32_t)h8; 00639 h[9] = (int32_t)h9; 00640 } 00641 #endif 00642 00643 00644 void fe_invert(fe out,const fe z) 00645 { 00646 fe t0; 00647 fe t1; 00648 fe t2; 00649 fe t3; 00650 int i; 00651 00652 /* pow225521 */ 00653 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 00654 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 00655 fe_mul(t1,z,t1); 00656 fe_mul(t0,t0,t1); 00657 fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2); 00658 fe_mul(t1,t1,t2); 00659 fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2); 00660 fe_mul(t1,t2,t1); 00661 fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2); 00662 fe_mul(t2,t2,t1); 00663 fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3); 00664 fe_mul(t2,t3,t2); 00665 fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2); 00666 fe_mul(t1,t2,t1); 00667 fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2); 00668 fe_mul(t2,t2,t1); 00669 fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3); 00670 fe_mul(t2,t3,t2); 00671 fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2); 00672 fe_mul(t1,t2,t1); 00673 fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1); 00674 fe_mul(out,t1,t0); 00675 00676 return; 00677 } 00678 00679 00680 /* 00681 h = f 00682 */ 00683 00684 void fe_copy(fe h,const fe f) 00685 { 00686 int32_t f0 = f[0]; 00687 int32_t f1 = f[1]; 00688 int32_t f2 = f[2]; 00689 int32_t f3 = f[3]; 00690 int32_t f4 = f[4]; 00691 int32_t f5 = f[5]; 00692 int32_t f6 = f[6]; 00693 int32_t f7 = f[7]; 00694 int32_t f8 = f[8]; 00695 int32_t f9 = f[9]; 00696 h[0] = f0; 00697 h[1] = f1; 00698 h[2] = f2; 00699 h[3] = f3; 00700 h[4] = f4; 00701 h[5] = f5; 00702 h[6] = f6; 00703 h[7] = f7; 00704 h[8] = f8; 00705 h[9] = f9; 00706 } 00707 00708 00709 /* 00710 h = f * g 00711 Can overlap h with f or g. 00712 00713 Preconditions: 00714 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00715 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 00716 00717 Postconditions: 00718 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 00719 */ 00720 00721 /* 00722 Notes on implementation strategy: 00723 00724 Using schoolbook multiplication. 00725 Karatsuba would save a little in some cost models. 00726 00727 Most multiplications by 2 and 19 are 32-bit precomputations; 00728 cheaper than 64-bit postcomputations. 00729 00730 There is one remaining multiplication by 19 in the carry chain; 00731 one *19 precomputation can be merged into this, 00732 but the resulting data flow is considerably less clean. 00733 00734 There are 12 carries below. 00735 10 of them are 2-way parallelizable and vectorizable. 00736 Can get away with 11 carries, but then data flow is much deeper. 00737 00738 With tighter constraints on inputs can squeeze carries into int32. 00739 */ 00740 00741 void fe_mul(fe h,const fe f,const fe g) 00742 { 00743 int32_t f0 = f[0]; 00744 int32_t f1 = f[1]; 00745 int32_t f2 = f[2]; 00746 int32_t f3 = f[3]; 00747 int32_t f4 = f[4]; 00748 int32_t f5 = f[5]; 00749 int32_t f6 = f[6]; 00750 int32_t f7 = f[7]; 00751 int32_t f8 = f[8]; 00752 int32_t f9 = f[9]; 00753 int32_t g0 = g[0]; 00754 int32_t g1 = g[1]; 00755 int32_t g2 = g[2]; 00756 int32_t g3 = g[3]; 00757 int32_t g4 = g[4]; 00758 int32_t g5 = g[5]; 00759 int32_t g6 = g[6]; 00760 int32_t g7 = g[7]; 00761 int32_t g8 = g[8]; 00762 int32_t g9 = g[9]; 00763 int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ 00764 int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ 00765 int32_t g3_19 = 19 * g3; 00766 int32_t g4_19 = 19 * g4; 00767 int32_t g5_19 = 19 * g5; 00768 int32_t g6_19 = 19 * g6; 00769 int32_t g7_19 = 19 * g7; 00770 int32_t g8_19 = 19 * g8; 00771 int32_t g9_19 = 19 * g9; 00772 int32_t f1_2 = 2 * f1; 00773 int32_t f3_2 = 2 * f3; 00774 int32_t f5_2 = 2 * f5; 00775 int32_t f7_2 = 2 * f7; 00776 int32_t f9_2 = 2 * f9; 00777 int64_t f0g0 = f0 * (int64_t) g0; 00778 int64_t f0g1 = f0 * (int64_t) g1; 00779 int64_t f0g2 = f0 * (int64_t) g2; 00780 int64_t f0g3 = f0 * (int64_t) g3; 00781 int64_t f0g4 = f0 * (int64_t) g4; 00782 int64_t f0g5 = f0 * (int64_t) g5; 00783 int64_t f0g6 = f0 * (int64_t) g6; 00784 int64_t f0g7 = f0 * (int64_t) g7; 00785 int64_t f0g8 = f0 * (int64_t) g8; 00786 int64_t f0g9 = f0 * (int64_t) g9; 00787 int64_t f1g0 = f1 * (int64_t) g0; 00788 int64_t f1g1_2 = f1_2 * (int64_t) g1; 00789 int64_t f1g2 = f1 * (int64_t) g2; 00790 int64_t f1g3_2 = f1_2 * (int64_t) g3; 00791 int64_t f1g4 = f1 * (int64_t) g4; 00792 int64_t f1g5_2 = f1_2 * (int64_t) g5; 00793 int64_t f1g6 = f1 * (int64_t) g6; 00794 int64_t f1g7_2 = f1_2 * (int64_t) g7; 00795 int64_t f1g8 = f1 * (int64_t) g8; 00796 int64_t f1g9_38 = f1_2 * (int64_t) g9_19; 00797 int64_t f2g0 = f2 * (int64_t) g0; 00798 int64_t f2g1 = f2 * (int64_t) g1; 00799 int64_t f2g2 = f2 * (int64_t) g2; 00800 int64_t f2g3 = f2 * (int64_t) g3; 00801 int64_t f2g4 = f2 * (int64_t) g4; 00802 int64_t f2g5 = f2 * (int64_t) g5; 00803 int64_t f2g6 = f2 * (int64_t) g6; 00804 int64_t f2g7 = f2 * (int64_t) g7; 00805 int64_t f2g8_19 = f2 * (int64_t) g8_19; 00806 int64_t f2g9_19 = f2 * (int64_t) g9_19; 00807 int64_t f3g0 = f3 * (int64_t) g0; 00808 int64_t f3g1_2 = f3_2 * (int64_t) g1; 00809 int64_t f3g2 = f3 * (int64_t) g2; 00810 int64_t f3g3_2 = f3_2 * (int64_t) g3; 00811 int64_t f3g4 = f3 * (int64_t) g4; 00812 int64_t f3g5_2 = f3_2 * (int64_t) g5; 00813 int64_t f3g6 = f3 * (int64_t) g6; 00814 int64_t f3g7_38 = f3_2 * (int64_t) g7_19; 00815 int64_t f3g8_19 = f3 * (int64_t) g8_19; 00816 int64_t f3g9_38 = f3_2 * (int64_t) g9_19; 00817 int64_t f4g0 = f4 * (int64_t) g0; 00818 int64_t f4g1 = f4 * (int64_t) g1; 00819 int64_t f4g2 = f4 * (int64_t) g2; 00820 int64_t f4g3 = f4 * (int64_t) g3; 00821 int64_t f4g4 = f4 * (int64_t) g4; 00822 int64_t f4g5 = f4 * (int64_t) g5; 00823 int64_t f4g6_19 = f4 * (int64_t) g6_19; 00824 int64_t f4g7_19 = f4 * (int64_t) g7_19; 00825 int64_t f4g8_19 = f4 * (int64_t) g8_19; 00826 int64_t f4g9_19 = f4 * (int64_t) g9_19; 00827 int64_t f5g0 = f5 * (int64_t) g0; 00828 int64_t f5g1_2 = f5_2 * (int64_t) g1; 00829 int64_t f5g2 = f5 * (int64_t) g2; 00830 int64_t f5g3_2 = f5_2 * (int64_t) g3; 00831 int64_t f5g4 = f5 * (int64_t) g4; 00832 int64_t f5g5_38 = f5_2 * (int64_t) g5_19; 00833 int64_t f5g6_19 = f5 * (int64_t) g6_19; 00834 int64_t f5g7_38 = f5_2 * (int64_t) g7_19; 00835 int64_t f5g8_19 = f5 * (int64_t) g8_19; 00836 int64_t f5g9_38 = f5_2 * (int64_t) g9_19; 00837 int64_t f6g0 = f6 * (int64_t) g0; 00838 int64_t f6g1 = f6 * (int64_t) g1; 00839 int64_t f6g2 = f6 * (int64_t) g2; 00840 int64_t f6g3 = f6 * (int64_t) g3; 00841 int64_t f6g4_19 = f6 * (int64_t) g4_19; 00842 int64_t f6g5_19 = f6 * (int64_t) g5_19; 00843 int64_t f6g6_19 = f6 * (int64_t) g6_19; 00844 int64_t f6g7_19 = f6 * (int64_t) g7_19; 00845 int64_t f6g8_19 = f6 * (int64_t) g8_19; 00846 int64_t f6g9_19 = f6 * (int64_t) g9_19; 00847 int64_t f7g0 = f7 * (int64_t) g0; 00848 int64_t f7g1_2 = f7_2 * (int64_t) g1; 00849 int64_t f7g2 = f7 * (int64_t) g2; 00850 int64_t f7g3_38 = f7_2 * (int64_t) g3_19; 00851 int64_t f7g4_19 = f7 * (int64_t) g4_19; 00852 int64_t f7g5_38 = f7_2 * (int64_t) g5_19; 00853 int64_t f7g6_19 = f7 * (int64_t) g6_19; 00854 int64_t f7g7_38 = f7_2 * (int64_t) g7_19; 00855 int64_t f7g8_19 = f7 * (int64_t) g8_19; 00856 int64_t f7g9_38 = f7_2 * (int64_t) g9_19; 00857 int64_t f8g0 = f8 * (int64_t) g0; 00858 int64_t f8g1 = f8 * (int64_t) g1; 00859 int64_t f8g2_19 = f8 * (int64_t) g2_19; 00860 int64_t f8g3_19 = f8 * (int64_t) g3_19; 00861 int64_t f8g4_19 = f8 * (int64_t) g4_19; 00862 int64_t f8g5_19 = f8 * (int64_t) g5_19; 00863 int64_t f8g6_19 = f8 * (int64_t) g6_19; 00864 int64_t f8g7_19 = f8 * (int64_t) g7_19; 00865 int64_t f8g8_19 = f8 * (int64_t) g8_19; 00866 int64_t f8g9_19 = f8 * (int64_t) g9_19; 00867 int64_t f9g0 = f9 * (int64_t) g0; 00868 int64_t f9g1_38 = f9_2 * (int64_t) g1_19; 00869 int64_t f9g2_19 = f9 * (int64_t) g2_19; 00870 int64_t f9g3_38 = f9_2 * (int64_t) g3_19; 00871 int64_t f9g4_19 = f9 * (int64_t) g4_19; 00872 int64_t f9g5_38 = f9_2 * (int64_t) g5_19; 00873 int64_t f9g6_19 = f9 * (int64_t) g6_19; 00874 int64_t f9g7_38 = f9_2 * (int64_t) g7_19; 00875 int64_t f9g8_19 = f9 * (int64_t) g8_19; 00876 int64_t f9g9_38 = f9_2 * (int64_t) g9_19; 00877 int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; 00878 int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; 00879 int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; 00880 int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; 00881 int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; 00882 int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; 00883 int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; 00884 int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; 00885 int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; 00886 int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; 00887 int64_t carry0; 00888 int64_t carry1; 00889 int64_t carry2; 00890 int64_t carry3; 00891 int64_t carry4; 00892 int64_t carry5; 00893 int64_t carry6; 00894 int64_t carry7; 00895 int64_t carry8; 00896 int64_t carry9; 00897 00898 /* 00899 |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) 00900 i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 00901 |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) 00902 i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 00903 */ 00904 00905 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00906 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00907 /* |h0| <= 2^25 */ 00908 /* |h4| <= 2^25 */ 00909 /* |h1| <= 1.71*2^59 */ 00910 /* |h5| <= 1.71*2^59 */ 00911 00912 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 00913 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 00914 /* |h1| <= 2^24; from now on fits into int32 */ 00915 /* |h5| <= 2^24; from now on fits into int32 */ 00916 /* |h2| <= 1.41*2^60 */ 00917 /* |h6| <= 1.41*2^60 */ 00918 00919 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 00920 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 00921 /* |h2| <= 2^25; from now on fits into int32 unchanged */ 00922 /* |h6| <= 2^25; from now on fits into int32 unchanged */ 00923 /* |h3| <= 1.71*2^59 */ 00924 /* |h7| <= 1.71*2^59 */ 00925 00926 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 00927 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 00928 /* |h3| <= 2^24; from now on fits into int32 unchanged */ 00929 /* |h7| <= 2^24; from now on fits into int32 unchanged */ 00930 /* |h4| <= 1.72*2^34 */ 00931 /* |h8| <= 1.41*2^60 */ 00932 00933 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 00934 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 00935 /* |h4| <= 2^25; from now on fits into int32 unchanged */ 00936 /* |h8| <= 2^25; from now on fits into int32 unchanged */ 00937 /* |h5| <= 1.01*2^24 */ 00938 /* |h9| <= 1.71*2^59 */ 00939 00940 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 00941 /* |h9| <= 2^24; from now on fits into int32 unchanged */ 00942 /* |h0| <= 1.1*2^39 */ 00943 00944 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 00945 /* |h0| <= 2^25; from now on fits into int32 unchanged */ 00946 /* |h1| <= 1.01*2^24 */ 00947 00948 h[0] = (int32_t)h0; 00949 h[1] = (int32_t)h1; 00950 h[2] = (int32_t)h2; 00951 h[3] = (int32_t)h3; 00952 h[4] = (int32_t)h4; 00953 h[5] = (int32_t)h5; 00954 h[6] = (int32_t)h6; 00955 h[7] = (int32_t)h7; 00956 h[8] = (int32_t)h8; 00957 h[9] = (int32_t)h9; 00958 } 00959 00960 00961 /* 00962 Replace (f,g) with (g,f) if b == 1; 00963 replace (f,g) with (f,g) if b == 0. 00964 00965 Preconditions: b in {0,1}. 00966 */ 00967 00968 void fe_cswap(fe f, fe g, int b) 00969 { 00970 int32_t f0 = f[0]; 00971 int32_t f1 = f[1]; 00972 int32_t f2 = f[2]; 00973 int32_t f3 = f[3]; 00974 int32_t f4 = f[4]; 00975 int32_t f5 = f[5]; 00976 int32_t f6 = f[6]; 00977 int32_t f7 = f[7]; 00978 int32_t f8 = f[8]; 00979 int32_t f9 = f[9]; 00980 int32_t g0 = g[0]; 00981 int32_t g1 = g[1]; 00982 int32_t g2 = g[2]; 00983 int32_t g3 = g[3]; 00984 int32_t g4 = g[4]; 00985 int32_t g5 = g[5]; 00986 int32_t g6 = g[6]; 00987 int32_t g7 = g[7]; 00988 int32_t g8 = g[8]; 00989 int32_t g9 = g[9]; 00990 int32_t x0 = f0 ^ g0; 00991 int32_t x1 = f1 ^ g1; 00992 int32_t x2 = f2 ^ g2; 00993 int32_t x3 = f3 ^ g3; 00994 int32_t x4 = f4 ^ g4; 00995 int32_t x5 = f5 ^ g5; 00996 int32_t x6 = f6 ^ g6; 00997 int32_t x7 = f7 ^ g7; 00998 int32_t x8 = f8 ^ g8; 00999 int32_t x9 = f9 ^ g9; 01000 b = -b; 01001 x0 &= b; 01002 x1 &= b; 01003 x2 &= b; 01004 x3 &= b; 01005 x4 &= b; 01006 x5 &= b; 01007 x6 &= b; 01008 x7 &= b; 01009 x8 &= b; 01010 x9 &= b; 01011 f[0] = f0 ^ x0; 01012 f[1] = f1 ^ x1; 01013 f[2] = f2 ^ x2; 01014 f[3] = f3 ^ x3; 01015 f[4] = f4 ^ x4; 01016 f[5] = f5 ^ x5; 01017 f[6] = f6 ^ x6; 01018 f[7] = f7 ^ x7; 01019 f[8] = f8 ^ x8; 01020 f[9] = f9 ^ x9; 01021 g[0] = g0 ^ x0; 01022 g[1] = g1 ^ x1; 01023 g[2] = g2 ^ x2; 01024 g[3] = g3 ^ x3; 01025 g[4] = g4 ^ x4; 01026 g[5] = g5 ^ x5; 01027 g[6] = g6 ^ x6; 01028 g[7] = g7 ^ x7; 01029 g[8] = g8 ^ x8; 01030 g[9] = g9 ^ x9; 01031 } 01032 01033 01034 /* 01035 h = f * 121666 01036 Can overlap h with f. 01037 01038 Preconditions: 01039 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01040 01041 Postconditions: 01042 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01043 */ 01044 01045 void fe_mul121666(fe h,fe f) 01046 { 01047 int32_t f0 = f[0]; 01048 int32_t f1 = f[1]; 01049 int32_t f2 = f[2]; 01050 int32_t f3 = f[3]; 01051 int32_t f4 = f[4]; 01052 int32_t f5 = f[5]; 01053 int32_t f6 = f[6]; 01054 int32_t f7 = f[7]; 01055 int32_t f8 = f[8]; 01056 int32_t f9 = f[9]; 01057 int64_t h0 = f0 * (int64_t) 121666; 01058 int64_t h1 = f1 * (int64_t) 121666; 01059 int64_t h2 = f2 * (int64_t) 121666; 01060 int64_t h3 = f3 * (int64_t) 121666; 01061 int64_t h4 = f4 * (int64_t) 121666; 01062 int64_t h5 = f5 * (int64_t) 121666; 01063 int64_t h6 = f6 * (int64_t) 121666; 01064 int64_t h7 = f7 * (int64_t) 121666; 01065 int64_t h8 = f8 * (int64_t) 121666; 01066 int64_t h9 = f9 * (int64_t) 121666; 01067 int64_t carry0; 01068 int64_t carry1; 01069 int64_t carry2; 01070 int64_t carry3; 01071 int64_t carry4; 01072 int64_t carry5; 01073 int64_t carry6; 01074 int64_t carry7; 01075 int64_t carry8; 01076 int64_t carry9; 01077 01078 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 01079 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 01080 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 01081 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 01082 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 01083 01084 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01085 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 01086 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01087 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 01088 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 01089 01090 h[0] = (int32_t)h0; 01091 h[1] = (int32_t)h1; 01092 h[2] = (int32_t)h2; 01093 h[3] = (int32_t)h3; 01094 h[4] = (int32_t)h4; 01095 h[5] = (int32_t)h5; 01096 h[6] = (int32_t)h6; 01097 h[7] = (int32_t)h7; 01098 h[8] = (int32_t)h8; 01099 h[9] = (int32_t)h9; 01100 } 01101 01102 01103 /* 01104 h = 2 * f * f 01105 Can overlap h with f. 01106 01107 Preconditions: 01108 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 01109 01110 Postconditions: 01111 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 01112 */ 01113 01114 /* 01115 See fe_mul.c for discussion of implementation strategy. 01116 */ 01117 01118 void fe_sq2(fe h,const fe f) 01119 { 01120 int32_t f0 = f[0]; 01121 int32_t f1 = f[1]; 01122 int32_t f2 = f[2]; 01123 int32_t f3 = f[3]; 01124 int32_t f4 = f[4]; 01125 int32_t f5 = f[5]; 01126 int32_t f6 = f[6]; 01127 int32_t f7 = f[7]; 01128 int32_t f8 = f[8]; 01129 int32_t f9 = f[9]; 01130 int32_t f0_2 = 2 * f0; 01131 int32_t f1_2 = 2 * f1; 01132 int32_t f2_2 = 2 * f2; 01133 int32_t f3_2 = 2 * f3; 01134 int32_t f4_2 = 2 * f4; 01135 int32_t f5_2 = 2 * f5; 01136 int32_t f6_2 = 2 * f6; 01137 int32_t f7_2 = 2 * f7; 01138 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ 01139 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ 01140 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ 01141 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ 01142 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ 01143 int64_t f0f0 = f0 * (int64_t) f0; 01144 int64_t f0f1_2 = f0_2 * (int64_t) f1; 01145 int64_t f0f2_2 = f0_2 * (int64_t) f2; 01146 int64_t f0f3_2 = f0_2 * (int64_t) f3; 01147 int64_t f0f4_2 = f0_2 * (int64_t) f4; 01148 int64_t f0f5_2 = f0_2 * (int64_t) f5; 01149 int64_t f0f6_2 = f0_2 * (int64_t) f6; 01150 int64_t f0f7_2 = f0_2 * (int64_t) f7; 01151 int64_t f0f8_2 = f0_2 * (int64_t) f8; 01152 int64_t f0f9_2 = f0_2 * (int64_t) f9; 01153 int64_t f1f1_2 = f1_2 * (int64_t) f1; 01154 int64_t f1f2_2 = f1_2 * (int64_t) f2; 01155 int64_t f1f3_4 = f1_2 * (int64_t) f3_2; 01156 int64_t f1f4_2 = f1_2 * (int64_t) f4; 01157 int64_t f1f5_4 = f1_2 * (int64_t) f5_2; 01158 int64_t f1f6_2 = f1_2 * (int64_t) f6; 01159 int64_t f1f7_4 = f1_2 * (int64_t) f7_2; 01160 int64_t f1f8_2 = f1_2 * (int64_t) f8; 01161 int64_t f1f9_76 = f1_2 * (int64_t) f9_38; 01162 int64_t f2f2 = f2 * (int64_t) f2; 01163 int64_t f2f3_2 = f2_2 * (int64_t) f3; 01164 int64_t f2f4_2 = f2_2 * (int64_t) f4; 01165 int64_t f2f5_2 = f2_2 * (int64_t) f5; 01166 int64_t f2f6_2 = f2_2 * (int64_t) f6; 01167 int64_t f2f7_2 = f2_2 * (int64_t) f7; 01168 int64_t f2f8_38 = f2_2 * (int64_t) f8_19; 01169 int64_t f2f9_38 = f2 * (int64_t) f9_38; 01170 int64_t f3f3_2 = f3_2 * (int64_t) f3; 01171 int64_t f3f4_2 = f3_2 * (int64_t) f4; 01172 int64_t f3f5_4 = f3_2 * (int64_t) f5_2; 01173 int64_t f3f6_2 = f3_2 * (int64_t) f6; 01174 int64_t f3f7_76 = f3_2 * (int64_t) f7_38; 01175 int64_t f3f8_38 = f3_2 * (int64_t) f8_19; 01176 int64_t f3f9_76 = f3_2 * (int64_t) f9_38; 01177 int64_t f4f4 = f4 * (int64_t) f4; 01178 int64_t f4f5_2 = f4_2 * (int64_t) f5; 01179 int64_t f4f6_38 = f4_2 * (int64_t) f6_19; 01180 int64_t f4f7_38 = f4 * (int64_t) f7_38; 01181 int64_t f4f8_38 = f4_2 * (int64_t) f8_19; 01182 int64_t f4f9_38 = f4 * (int64_t) f9_38; 01183 int64_t f5f5_38 = f5 * (int64_t) f5_38; 01184 int64_t f5f6_38 = f5_2 * (int64_t) f6_19; 01185 int64_t f5f7_76 = f5_2 * (int64_t) f7_38; 01186 int64_t f5f8_38 = f5_2 * (int64_t) f8_19; 01187 int64_t f5f9_76 = f5_2 * (int64_t) f9_38; 01188 int64_t f6f6_19 = f6 * (int64_t) f6_19; 01189 int64_t f6f7_38 = f6 * (int64_t) f7_38; 01190 int64_t f6f8_38 = f6_2 * (int64_t) f8_19; 01191 int64_t f6f9_38 = f6 * (int64_t) f9_38; 01192 int64_t f7f7_38 = f7 * (int64_t) f7_38; 01193 int64_t f7f8_38 = f7_2 * (int64_t) f8_19; 01194 int64_t f7f9_76 = f7_2 * (int64_t) f9_38; 01195 int64_t f8f8_19 = f8 * (int64_t) f8_19; 01196 int64_t f8f9_38 = f8 * (int64_t) f9_38; 01197 int64_t f9f9_38 = f9 * (int64_t) f9_38; 01198 int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 01199 int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 01200 int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 01201 int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 01202 int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 01203 int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 01204 int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 01205 int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 01206 int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 01207 int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 01208 int64_t carry0; 01209 int64_t carry1; 01210 int64_t carry2; 01211 int64_t carry3; 01212 int64_t carry4; 01213 int64_t carry5; 01214 int64_t carry6; 01215 int64_t carry7; 01216 int64_t carry8; 01217 int64_t carry9; 01218 01219 h0 += h0; 01220 h1 += h1; 01221 h2 += h2; 01222 h3 += h3; 01223 h4 += h4; 01224 h5 += h5; 01225 h6 += h6; 01226 h7 += h7; 01227 h8 += h8; 01228 h9 += h9; 01229 01230 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01231 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01232 01233 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 01234 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 01235 01236 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 01237 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 01238 01239 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 01240 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 01241 01242 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 01243 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 01244 01245 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 01246 01247 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 01248 01249 h[0] = (int32_t)h0; 01250 h[1] = (int32_t)h1; 01251 h[2] = (int32_t)h2; 01252 h[3] = (int32_t)h3; 01253 h[4] = (int32_t)h4; 01254 h[5] = (int32_t)h5; 01255 h[6] = (int32_t)h6; 01256 h[7] = (int32_t)h7; 01257 h[8] = (int32_t)h8; 01258 h[9] = (int32_t)h9; 01259 } 01260 01261 01262 void fe_pow22523(fe out,const fe z) 01263 { 01264 fe t0; 01265 fe t1; 01266 fe t2; 01267 int i; 01268 01269 fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); 01270 fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); 01271 fe_mul(t1,z,t1); 01272 fe_mul(t0,t0,t1); 01273 fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0); 01274 fe_mul(t0,t1,t0); 01275 fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1); 01276 fe_mul(t0,t1,t0); 01277 fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1); 01278 fe_mul(t1,t1,t0); 01279 fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2); 01280 fe_mul(t1,t2,t1); 01281 fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1); 01282 fe_mul(t0,t1,t0); 01283 fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1); 01284 fe_mul(t1,t1,t0); 01285 fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2); 01286 fe_mul(t1,t2,t1); 01287 fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1); 01288 fe_mul(t0,t1,t0); 01289 fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0); 01290 fe_mul(out,t0,z); 01291 01292 return; 01293 } 01294 01295 01296 /* 01297 h = -f 01298 01299 Preconditions: 01300 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01301 01302 Postconditions: 01303 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 01304 */ 01305 01306 void fe_neg(fe h,const fe f) 01307 { 01308 int32_t f0 = f[0]; 01309 int32_t f1 = f[1]; 01310 int32_t f2 = f[2]; 01311 int32_t f3 = f[3]; 01312 int32_t f4 = f[4]; 01313 int32_t f5 = f[5]; 01314 int32_t f6 = f[6]; 01315 int32_t f7 = f[7]; 01316 int32_t f8 = f[8]; 01317 int32_t f9 = f[9]; 01318 int32_t h0 = -f0; 01319 int32_t h1 = -f1; 01320 int32_t h2 = -f2; 01321 int32_t h3 = -f3; 01322 int32_t h4 = -f4; 01323 int32_t h5 = -f5; 01324 int32_t h6 = -f6; 01325 int32_t h7 = -f7; 01326 int32_t h8 = -f8; 01327 int32_t h9 = -f9; 01328 h[0] = h0; 01329 h[1] = h1; 01330 h[2] = h2; 01331 h[3] = h3; 01332 h[4] = h4; 01333 h[5] = h5; 01334 h[6] = h6; 01335 h[7] = h7; 01336 h[8] = h8; 01337 h[9] = h9; 01338 } 01339 01340 01341 /* 01342 Preconditions: 01343 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01344 */ 01345 01346 static const unsigned char zero[32] = {0}; 01347 01348 int fe_isnonzero(const fe f) 01349 { 01350 unsigned char s[32]; 01351 fe_tobytes(s,f); 01352 return ConstantCompare(s,zero,32); 01353 } 01354 01355 01356 /* 01357 return 1 if f is in {1,3,5,...,q-2} 01358 return 0 if f is in {0,2,4,...,q-1} 01359 01360 Preconditions: 01361 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 01362 */ 01363 01364 int fe_isnegative(const fe f) 01365 { 01366 unsigned char s[32]; 01367 fe_tobytes(s,f); 01368 return s[0] & 1; 01369 } 01370 01371 01372 /* 01373 Replace (f,g) with (g,g) if b == 1; 01374 replace (f,g) with (f,g) if b == 0. 01375 01376 Preconditions: b in {0,1}. 01377 */ 01378 01379 void fe_cmov(fe f, const fe g, int b) 01380 { 01381 int32_t f0 = f[0]; 01382 int32_t f1 = f[1]; 01383 int32_t f2 = f[2]; 01384 int32_t f3 = f[3]; 01385 int32_t f4 = f[4]; 01386 int32_t f5 = f[5]; 01387 int32_t f6 = f[6]; 01388 int32_t f7 = f[7]; 01389 int32_t f8 = f[8]; 01390 int32_t f9 = f[9]; 01391 int32_t g0 = g[0]; 01392 int32_t g1 = g[1]; 01393 int32_t g2 = g[2]; 01394 int32_t g3 = g[3]; 01395 int32_t g4 = g[4]; 01396 int32_t g5 = g[5]; 01397 int32_t g6 = g[6]; 01398 int32_t g7 = g[7]; 01399 int32_t g8 = g[8]; 01400 int32_t g9 = g[9]; 01401 int32_t x0 = f0 ^ g0; 01402 int32_t x1 = f1 ^ g1; 01403 int32_t x2 = f2 ^ g2; 01404 int32_t x3 = f3 ^ g3; 01405 int32_t x4 = f4 ^ g4; 01406 int32_t x5 = f5 ^ g5; 01407 int32_t x6 = f6 ^ g6; 01408 int32_t x7 = f7 ^ g7; 01409 int32_t x8 = f8 ^ g8; 01410 int32_t x9 = f9 ^ g9; 01411 b = -b; 01412 x0 &= b; 01413 x1 &= b; 01414 x2 &= b; 01415 x3 &= b; 01416 x4 &= b; 01417 x5 &= b; 01418 x6 &= b; 01419 x7 &= b; 01420 x8 &= b; 01421 x9 &= b; 01422 f[0] = f0 ^ x0; 01423 f[1] = f1 ^ x1; 01424 f[2] = f2 ^ x2; 01425 f[3] = f3 ^ x3; 01426 f[4] = f4 ^ x4; 01427 f[5] = f5 ^ x5; 01428 f[6] = f6 ^ x6; 01429 f[7] = f7 ^ x7; 01430 f[8] = f8 ^ x8; 01431 f[9] = f9 ^ x9; 01432 } 01433 #endif 01434 01435 #endif /* !CURVE25519_SMALL || !ED25519_SMALL */ 01436 #endif /* HAVE_CURVE25519 || HAVE_ED25519 */ 01437
Generated on Tue Jul 12 2022 16:58:06 by
1.7.2