Xuyi Wang / wolfcrypt

Dependents:   OS

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers fe_operations.c Source File

fe_operations.c

00001 /* fe_operations.c
00002  *
00003  * Copyright (C) 2006-2017 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023  /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */
00024 
00025 #ifdef HAVE_CONFIG_H
00026     #include <config.h>
00027 #endif
00028 
00029 #include <wolfcrypt/settings.h>
00030 
00031 #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
00032 #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) /* run when not defined to use small memory math */
00033 
00034 #include <wolfcrypt/fe_operations.h>
00035 #include <stdint.h>
00036 
00037 #ifdef NO_INLINE
00038     #include <wolfcrypt/misc.h>
00039 #else
00040     #define WOLFSSL_MISC_INCLUDED
00041     #include <wolfcrypt/src/misc.c>
00042 #endif
00043 
00044 #ifdef CURVED25519_X64
00045 #include "fe_x25519_x64.i"
00046 #elif defined(CURVED25519_128BIT)
00047 #include "fe_x25519_128.i"
00048 #else
00049 
00050 #if defined(HAVE_CURVE25519) || \
00051     (defined(HAVE_ED25519) && !defined(ED25519_SMALL))
00052 /*
00053 fe means field element.
00054 Here the field is \Z/(2^255-19).
00055 An element t, entries t[0]...t[9], represents the integer
00056 t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
00057 Bounds on each t[i] vary depending on context.
00058 */
00059 
00060 uint64_t load_3(const unsigned char *in)
00061 {
00062   uint64_t result;
00063   result = (uint64_t) in[0];
00064   result |= ((uint64_t) in[1]) << 8;
00065   result |= ((uint64_t) in[2]) << 16;
00066   return result;
00067 }
00068 
00069 
00070 uint64_t load_4(const unsigned char *in)
00071 {
00072   uint64_t result;
00073   result = (uint64_t) in[0];
00074   result |= ((uint64_t) in[1]) << 8;
00075   result |= ((uint64_t) in[2]) << 16;
00076   result |= ((uint64_t) in[3]) << 24;
00077   return result;
00078 }
00079 #endif
00080 
00081 /*
00082 h = 1
00083 */
00084 
00085 void fe_1(fe h)
00086 {
00087   h[0] = 1;
00088   h[1] = 0;
00089   h[2] = 0;
00090   h[3] = 0;
00091   h[4] = 0;
00092   h[5] = 0;
00093   h[6] = 0;
00094   h[7] = 0;
00095   h[8] = 0;
00096   h[9] = 0;
00097 }
00098 
00099 
00100 /*
00101 h = 0
00102 */
00103 
00104 void fe_0(fe h)
00105 {
00106   h[0] = 0;
00107   h[1] = 0;
00108   h[2] = 0;
00109   h[3] = 0;
00110   h[4] = 0;
00111   h[5] = 0;
00112   h[6] = 0;
00113   h[7] = 0;
00114   h[8] = 0;
00115   h[9] = 0;
00116 }
00117 
00118 
00119 #if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \
00120      (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \
00121     !defined(FREESCALE_LTC_ECC)
00122 /* to be Complementary to fe_low_mem.c */
00123 void fe_init()
00124 {
00125 }
00126 #endif
00127 
00128 #if defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL) && \
00129     !defined(FREESCALE_LTC_ECC)
00130 int curve25519(byte* q, byte* n, byte* p)
00131 {
00132 #if 0
00133   unsigned char e[32];
00134 #endif
00135   fe x1;
00136   fe x2;
00137   fe z2;
00138   fe x3;
00139   fe z3;
00140   fe tmp0;
00141   fe tmp1;
00142   int pos;
00143   unsigned int swap;
00144   unsigned int b;
00145 
00146   /* Clamp already done during key generation and import */
00147 #if 0
00148   {
00149     unsigned int i;
00150     for (i = 0;i < 32;++i) e[i] = n[i];
00151     e[0] &= 248;
00152     e[31] &= 127;
00153     e[31] |= 64;
00154   }
00155 #endif
00156 
00157   fe_frombytes(x1,p);
00158   fe_1(x2);
00159   fe_0(z2);
00160   fe_copy(x3,x1);
00161   fe_1(z3);
00162 
00163   swap = 0;
00164   for (pos = 254;pos >= 0;--pos) {
00165 #if 0
00166     b = e[pos / 8] >> (pos & 7);
00167 #else
00168     b = n[pos / 8] >> (pos & 7);
00169 #endif
00170     b &= 1;
00171     swap ^= b;
00172     fe_cswap(x2,x3,swap);
00173     fe_cswap(z2,z3,swap);
00174     swap = b;
00175 
00176     /* montgomery */
00177     fe_sub(tmp0,x3,z3);
00178     fe_sub(tmp1,x2,z2);
00179     fe_add(x2,x2,z2);
00180     fe_add(z2,x3,z3);
00181     fe_mul(z3,tmp0,x2);
00182     fe_mul(z2,z2,tmp1);
00183     fe_sq(tmp0,tmp1);
00184     fe_sq(tmp1,x2);
00185     fe_add(x3,z3,z2);
00186     fe_sub(z2,z3,z2);
00187     fe_mul(x2,tmp1,tmp0);
00188     fe_sub(tmp1,tmp1,tmp0);
00189     fe_sq(z2,z2);
00190     fe_mul121666(z3,tmp1);
00191     fe_sq(x3,x3);
00192     fe_add(tmp0,tmp0,z3);
00193     fe_mul(z3,x1,z2);
00194     fe_mul(z2,tmp1,tmp0);
00195   }
00196   fe_cswap(x2,x3,swap);
00197   fe_cswap(z2,z3,swap);
00198 
00199   fe_invert(z2,z2);
00200   fe_mul(x2,x2,z2);
00201   fe_tobytes(q,x2);
00202 
00203   return 0;
00204 }
00205 #endif /* HAVE_CURVE25519 && !CURVE25519_SMALL && !FREESCALE_LTC_ECC */
00206 
00207 
00208 /*
00209 h = f * f
00210 Can overlap h with f.
00211 
00212 Preconditions:
00213    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00214 
00215 Postconditions:
00216    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
00217 */
00218 
00219 /*
00220 See fe_mul.c for discussion of implementation strategy.
00221 */
00222 
00223 void fe_sq(fe h,const fe f)
00224 {
00225   int32_t f0 = f[0];
00226   int32_t f1 = f[1];
00227   int32_t f2 = f[2];
00228   int32_t f3 = f[3];
00229   int32_t f4 = f[4];
00230   int32_t f5 = f[5];
00231   int32_t f6 = f[6];
00232   int32_t f7 = f[7];
00233   int32_t f8 = f[8];
00234   int32_t f9 = f[9];
00235   int32_t f0_2 = 2 * f0;
00236   int32_t f1_2 = 2 * f1;
00237   int32_t f2_2 = 2 * f2;
00238   int32_t f3_2 = 2 * f3;
00239   int32_t f4_2 = 2 * f4;
00240   int32_t f5_2 = 2 * f5;
00241   int32_t f6_2 = 2 * f6;
00242   int32_t f7_2 = 2 * f7;
00243   int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
00244   int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
00245   int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
00246   int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
00247   int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
00248   int64_t f0f0    = f0   * (int64_t) f0;
00249   int64_t f0f1_2  = f0_2 * (int64_t) f1;
00250   int64_t f0f2_2  = f0_2 * (int64_t) f2;
00251   int64_t f0f3_2  = f0_2 * (int64_t) f3;
00252   int64_t f0f4_2  = f0_2 * (int64_t) f4;
00253   int64_t f0f5_2  = f0_2 * (int64_t) f5;
00254   int64_t f0f6_2  = f0_2 * (int64_t) f6;
00255   int64_t f0f7_2  = f0_2 * (int64_t) f7;
00256   int64_t f0f8_2  = f0_2 * (int64_t) f8;
00257   int64_t f0f9_2  = f0_2 * (int64_t) f9;
00258   int64_t f1f1_2  = f1_2 * (int64_t) f1;
00259   int64_t f1f2_2  = f1_2 * (int64_t) f2;
00260   int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
00261   int64_t f1f4_2  = f1_2 * (int64_t) f4;
00262   int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
00263   int64_t f1f6_2  = f1_2 * (int64_t) f6;
00264   int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
00265   int64_t f1f8_2  = f1_2 * (int64_t) f8;
00266   int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
00267   int64_t f2f2    = f2   * (int64_t) f2;
00268   int64_t f2f3_2  = f2_2 * (int64_t) f3;
00269   int64_t f2f4_2  = f2_2 * (int64_t) f4;
00270   int64_t f2f5_2  = f2_2 * (int64_t) f5;
00271   int64_t f2f6_2  = f2_2 * (int64_t) f6;
00272   int64_t f2f7_2  = f2_2 * (int64_t) f7;
00273   int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
00274   int64_t f2f9_38 = f2   * (int64_t) f9_38;
00275   int64_t f3f3_2  = f3_2 * (int64_t) f3;
00276   int64_t f3f4_2  = f3_2 * (int64_t) f4;
00277   int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
00278   int64_t f3f6_2  = f3_2 * (int64_t) f6;
00279   int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
00280   int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
00281   int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
00282   int64_t f4f4    = f4   * (int64_t) f4;
00283   int64_t f4f5_2  = f4_2 * (int64_t) f5;
00284   int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
00285   int64_t f4f7_38 = f4   * (int64_t) f7_38;
00286   int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
00287   int64_t f4f9_38 = f4   * (int64_t) f9_38;
00288   int64_t f5f5_38 = f5   * (int64_t) f5_38;
00289   int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
00290   int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
00291   int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
00292   int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
00293   int64_t f6f6_19 = f6   * (int64_t) f6_19;
00294   int64_t f6f7_38 = f6   * (int64_t) f7_38;
00295   int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
00296   int64_t f6f9_38 = f6   * (int64_t) f9_38;
00297   int64_t f7f7_38 = f7   * (int64_t) f7_38;
00298   int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
00299   int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
00300   int64_t f8f8_19 = f8   * (int64_t) f8_19;
00301   int64_t f8f9_38 = f8   * (int64_t) f9_38;
00302   int64_t f9f9_38 = f9   * (int64_t) f9_38;
00303   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
00304   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
00305   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
00306   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
00307   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
00308   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
00309   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
00310   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
00311   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
00312   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
00313   int64_t carry0;
00314   int64_t carry1;
00315   int64_t carry2;
00316   int64_t carry3;
00317   int64_t carry4;
00318   int64_t carry5;
00319   int64_t carry6;
00320   int64_t carry7;
00321   int64_t carry8;
00322   int64_t carry9;
00323 
00324   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00325   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00326 
00327   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00328   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00329 
00330   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00331   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00332 
00333   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00334   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00335 
00336   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00337   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00338 
00339   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00340 
00341   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00342 
00343   h[0] = (int32_t)h0;
00344   h[1] = (int32_t)h1;
00345   h[2] = (int32_t)h2;
00346   h[3] = (int32_t)h3;
00347   h[4] = (int32_t)h4;
00348   h[5] = (int32_t)h5;
00349   h[6] = (int32_t)h6;
00350   h[7] = (int32_t)h7;
00351   h[8] = (int32_t)h8;
00352   h[9] = (int32_t)h9;
00353 }
00354 
00355 
00356 /*
00357 h = f + g
00358 Can overlap h with f or g.
00359 
00360 Preconditions:
00361    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00362    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00363 
00364 Postconditions:
00365    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00366 */
00367 
00368 void fe_add(fe h,const fe f,const fe g)
00369 {
00370   int32_t f0 = f[0];
00371   int32_t f1 = f[1];
00372   int32_t f2 = f[2];
00373   int32_t f3 = f[3];
00374   int32_t f4 = f[4];
00375   int32_t f5 = f[5];
00376   int32_t f6 = f[6];
00377   int32_t f7 = f[7];
00378   int32_t f8 = f[8];
00379   int32_t f9 = f[9];
00380   int32_t g0 = g[0];
00381   int32_t g1 = g[1];
00382   int32_t g2 = g[2];
00383   int32_t g3 = g[3];
00384   int32_t g4 = g[4];
00385   int32_t g5 = g[5];
00386   int32_t g6 = g[6];
00387   int32_t g7 = g[7];
00388   int32_t g8 = g[8];
00389   int32_t g9 = g[9];
00390   int32_t h0 = f0 + g0;
00391   int32_t h1 = f1 + g1;
00392   int32_t h2 = f2 + g2;
00393   int32_t h3 = f3 + g3;
00394   int32_t h4 = f4 + g4;
00395   int32_t h5 = f5 + g5;
00396   int32_t h6 = f6 + g6;
00397   int32_t h7 = f7 + g7;
00398   int32_t h8 = f8 + g8;
00399   int32_t h9 = f9 + g9;
00400   h[0] = h0;
00401   h[1] = h1;
00402   h[2] = h2;
00403   h[3] = h3;
00404   h[4] = h4;
00405   h[5] = h5;
00406   h[6] = h6;
00407   h[7] = h7;
00408   h[8] = h8;
00409   h[9] = h9;
00410 }
00411 
00412 
00413 /*
00414 Preconditions:
00415   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00416 
00417 Write p=2^255-19; q=floor(h/p).
00418 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
00419 
00420 Proof:
00421   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
00422   Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
00423 
00424   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
00425   Then 0<y<1.
00426 
00427   Write r=h-pq.
00428   Have 0<=r<=p-1=2^255-20.
00429   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
00430 
00431   Write x=r+19(2^-255)r+y.
00432   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
00433 
00434   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
00435   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
00436 */
00437 
00438 void fe_tobytes(unsigned char *s,const fe h)
00439 {
00440   int32_t h0 = h[0];
00441   int32_t h1 = h[1];
00442   int32_t h2 = h[2];
00443   int32_t h3 = h[3];
00444   int32_t h4 = h[4];
00445   int32_t h5 = h[5];
00446   int32_t h6 = h[6];
00447   int32_t h7 = h[7];
00448   int32_t h8 = h[8];
00449   int32_t h9 = h[9];
00450   int32_t q;
00451   int32_t carry0;
00452   int32_t carry1;
00453   int32_t carry2;
00454   int32_t carry3;
00455   int32_t carry4;
00456   int32_t carry5;
00457   int32_t carry6;
00458   int32_t carry7;
00459   int32_t carry8;
00460   int32_t carry9;
00461 
00462   q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
00463   q = (h0 + q) >> 26;
00464   q = (h1 + q) >> 25;
00465   q = (h2 + q) >> 26;
00466   q = (h3 + q) >> 25;
00467   q = (h4 + q) >> 26;
00468   q = (h5 + q) >> 25;
00469   q = (h6 + q) >> 26;
00470   q = (h7 + q) >> 25;
00471   q = (h8 + q) >> 26;
00472   q = (h9 + q) >> 25;
00473 
00474   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
00475   h0 += 19 * q;
00476   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
00477 
00478   carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
00479   carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
00480   carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
00481   carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
00482   carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
00483   carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
00484   carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
00485   carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
00486   carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
00487   carry9 = h9 >> 25;               h9 -= carry9 << 25;
00488                   /* h10 = carry9 */
00489 
00490   /*
00491   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
00492   Have h0+...+2^230 h9 between 0 and 2^255-1;
00493   evidently 2^255 h10-2^255 q = 0.
00494   Goal: Output h0+...+2^230 h9.
00495   */
00496 
00497   s[0] = (byte)(h0 >> 0);
00498   s[1] = (byte)(h0 >> 8);
00499   s[2] = (byte)(h0 >> 16);
00500   s[3] = (byte)((h0 >> 24) | (h1 << 2));
00501   s[4] = (byte)(h1 >> 6);
00502   s[5] = (byte)(h1 >> 14);
00503   s[6] = (byte)((h1 >> 22) | (h2 << 3));
00504   s[7] = (byte)(h2 >> 5);
00505   s[8] = (byte)(h2 >> 13);
00506   s[9] = (byte)((h2 >> 21) | (h3 << 5));
00507   s[10] = (byte)(h3 >> 3);
00508   s[11] = (byte)(h3 >> 11);
00509   s[12] = (byte)((h3 >> 19) | (h4 << 6));
00510   s[13] = (byte)(h4 >> 2);
00511   s[14] = (byte)(h4 >> 10);
00512   s[15] = (byte)(h4 >> 18);
00513   s[16] = (byte)(h5 >> 0);
00514   s[17] = (byte)(h5 >> 8);
00515   s[18] = (byte)(h5 >> 16);
00516   s[19] = (byte)((h5 >> 24) | (h6 << 1));
00517   s[20] = (byte)(h6 >> 7);
00518   s[21] = (byte)(h6 >> 15);
00519   s[22] = (byte)((h6 >> 23) | (h7 << 3));
00520   s[23] = (byte)(h7 >> 5);
00521   s[24] = (byte)(h7 >> 13);
00522   s[25] = (byte)((h7 >> 21) | (h8 << 4));
00523   s[26] = (byte)(h8 >> 4);
00524   s[27] = (byte)(h8 >> 12);
00525   s[28] = (byte)((h8 >> 20) | (h9 << 6));
00526   s[29] = (byte)(h9 >> 2);
00527   s[30] = (byte)(h9 >> 10);
00528   s[31] = (byte)(h9 >> 18);
00529 }
00530 
00531 
00532 /*
00533 h = f - g
00534 Can overlap h with f or g.
00535 
00536 Preconditions:
00537    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00538    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00539 
00540 Postconditions:
00541    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00542 */
00543 
00544 void fe_sub(fe h,const fe f,const fe g)
00545 {
00546   int32_t f0 = f[0];
00547   int32_t f1 = f[1];
00548   int32_t f2 = f[2];
00549   int32_t f3 = f[3];
00550   int32_t f4 = f[4];
00551   int32_t f5 = f[5];
00552   int32_t f6 = f[6];
00553   int32_t f7 = f[7];
00554   int32_t f8 = f[8];
00555   int32_t f9 = f[9];
00556   int32_t g0 = g[0];
00557   int32_t g1 = g[1];
00558   int32_t g2 = g[2];
00559   int32_t g3 = g[3];
00560   int32_t g4 = g[4];
00561   int32_t g5 = g[5];
00562   int32_t g6 = g[6];
00563   int32_t g7 = g[7];
00564   int32_t g8 = g[8];
00565   int32_t g9 = g[9];
00566   int32_t h0 = f0 - g0;
00567   int32_t h1 = f1 - g1;
00568   int32_t h2 = f2 - g2;
00569   int32_t h3 = f3 - g3;
00570   int32_t h4 = f4 - g4;
00571   int32_t h5 = f5 - g5;
00572   int32_t h6 = f6 - g6;
00573   int32_t h7 = f7 - g7;
00574   int32_t h8 = f8 - g8;
00575   int32_t h9 = f9 - g9;
00576   h[0] = h0;
00577   h[1] = h1;
00578   h[2] = h2;
00579   h[3] = h3;
00580   h[4] = h4;
00581   h[5] = h5;
00582   h[6] = h6;
00583   h[7] = h7;
00584   h[8] = h8;
00585   h[9] = h9;
00586 }
00587 
00588 
00589 #if defined(HAVE_CURVE25519) || \
00590     (defined(HAVE_ED25519) && !defined(ED25519_SMALL))
00591 /*
00592 Ignores top bit of h.
00593 */
00594 
00595 void fe_frombytes(fe h,const unsigned char *s)
00596 {
00597   int64_t h0 = load_4(s);
00598   int64_t h1 = load_3(s + 4) << 6;
00599   int64_t h2 = load_3(s + 7) << 5;
00600   int64_t h3 = load_3(s + 10) << 3;
00601   int64_t h4 = load_3(s + 13) << 2;
00602   int64_t h5 = load_4(s + 16);
00603   int64_t h6 = load_3(s + 20) << 7;
00604   int64_t h7 = load_3(s + 23) << 5;
00605   int64_t h8 = load_3(s + 26) << 4;
00606   int64_t h9 = (load_3(s + 29) & 8388607) << 2;
00607   int64_t carry0;
00608   int64_t carry1;
00609   int64_t carry2;
00610   int64_t carry3;
00611   int64_t carry4;
00612   int64_t carry5;
00613   int64_t carry6;
00614   int64_t carry7;
00615   int64_t carry8;
00616   int64_t carry9;
00617 
00618   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00619   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00620   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00621   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00622   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00623 
00624   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00625   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00626   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00627   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00628   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00629 
00630   h[0] = (int32_t)h0;
00631   h[1] = (int32_t)h1;
00632   h[2] = (int32_t)h2;
00633   h[3] = (int32_t)h3;
00634   h[4] = (int32_t)h4;
00635   h[5] = (int32_t)h5;
00636   h[6] = (int32_t)h6;
00637   h[7] = (int32_t)h7;
00638   h[8] = (int32_t)h8;
00639   h[9] = (int32_t)h9;
00640 }
00641 #endif
00642 
00643 
00644 void fe_invert(fe out,const fe z)
00645 {
00646   fe t0;
00647   fe t1;
00648   fe t2;
00649   fe t3;
00650   int i;
00651 
00652   /* pow225521 */
00653   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
00654   fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
00655   fe_mul(t1,z,t1);
00656   fe_mul(t0,t0,t1);
00657   fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
00658   fe_mul(t1,t1,t2);
00659   fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
00660   fe_mul(t1,t2,t1);
00661   fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
00662   fe_mul(t2,t2,t1);
00663   fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
00664   fe_mul(t2,t3,t2);
00665   fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
00666   fe_mul(t1,t2,t1);
00667   fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
00668   fe_mul(t2,t2,t1);
00669   fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
00670   fe_mul(t2,t3,t2);
00671   fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
00672   fe_mul(t1,t2,t1);
00673   fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
00674   fe_mul(out,t1,t0);
00675 
00676   return;
00677 }
00678 
00679 
00680 /*
00681 h = f
00682 */
00683 
00684 void fe_copy(fe h,const fe f)
00685 {
00686   int32_t f0 = f[0];
00687   int32_t f1 = f[1];
00688   int32_t f2 = f[2];
00689   int32_t f3 = f[3];
00690   int32_t f4 = f[4];
00691   int32_t f5 = f[5];
00692   int32_t f6 = f[6];
00693   int32_t f7 = f[7];
00694   int32_t f8 = f[8];
00695   int32_t f9 = f[9];
00696   h[0] = f0;
00697   h[1] = f1;
00698   h[2] = f2;
00699   h[3] = f3;
00700   h[4] = f4;
00701   h[5] = f5;
00702   h[6] = f6;
00703   h[7] = f7;
00704   h[8] = f8;
00705   h[9] = f9;
00706 }
00707 
00708 
00709 /*
00710 h = f * g
00711 Can overlap h with f or g.
00712 
00713 Preconditions:
00714    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00715    |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00716 
00717 Postconditions:
00718    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
00719 */
00720 
00721 /*
00722 Notes on implementation strategy:
00723 
00724 Using schoolbook multiplication.
00725 Karatsuba would save a little in some cost models.
00726 
00727 Most multiplications by 2 and 19 are 32-bit precomputations;
00728 cheaper than 64-bit postcomputations.
00729 
00730 There is one remaining multiplication by 19 in the carry chain;
00731 one *19 precomputation can be merged into this,
00732 but the resulting data flow is considerably less clean.
00733 
00734 There are 12 carries below.
00735 10 of them are 2-way parallelizable and vectorizable.
00736 Can get away with 11 carries, but then data flow is much deeper.
00737 
00738 With tighter constraints on inputs can squeeze carries into int32.
00739 */
00740 
00741 void fe_mul(fe h,const fe f,const fe g)
00742 {
00743   int32_t f0 = f[0];
00744   int32_t f1 = f[1];
00745   int32_t f2 = f[2];
00746   int32_t f3 = f[3];
00747   int32_t f4 = f[4];
00748   int32_t f5 = f[5];
00749   int32_t f6 = f[6];
00750   int32_t f7 = f[7];
00751   int32_t f8 = f[8];
00752   int32_t f9 = f[9];
00753   int32_t g0 = g[0];
00754   int32_t g1 = g[1];
00755   int32_t g2 = g[2];
00756   int32_t g3 = g[3];
00757   int32_t g4 = g[4];
00758   int32_t g5 = g[5];
00759   int32_t g6 = g[6];
00760   int32_t g7 = g[7];
00761   int32_t g8 = g[8];
00762   int32_t g9 = g[9];
00763   int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
00764   int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
00765   int32_t g3_19 = 19 * g3;
00766   int32_t g4_19 = 19 * g4;
00767   int32_t g5_19 = 19 * g5;
00768   int32_t g6_19 = 19 * g6;
00769   int32_t g7_19 = 19 * g7;
00770   int32_t g8_19 = 19 * g8;
00771   int32_t g9_19 = 19 * g9;
00772   int32_t f1_2 = 2 * f1;
00773   int32_t f3_2 = 2 * f3;
00774   int32_t f5_2 = 2 * f5;
00775   int32_t f7_2 = 2 * f7;
00776   int32_t f9_2 = 2 * f9;
00777   int64_t f0g0    = f0   * (int64_t) g0;
00778   int64_t f0g1    = f0   * (int64_t) g1;
00779   int64_t f0g2    = f0   * (int64_t) g2;
00780   int64_t f0g3    = f0   * (int64_t) g3;
00781   int64_t f0g4    = f0   * (int64_t) g4;
00782   int64_t f0g5    = f0   * (int64_t) g5;
00783   int64_t f0g6    = f0   * (int64_t) g6;
00784   int64_t f0g7    = f0   * (int64_t) g7;
00785   int64_t f0g8    = f0   * (int64_t) g8;
00786   int64_t f0g9    = f0   * (int64_t) g9;
00787   int64_t f1g0    = f1   * (int64_t) g0;
00788   int64_t f1g1_2  = f1_2 * (int64_t) g1;
00789   int64_t f1g2    = f1   * (int64_t) g2;
00790   int64_t f1g3_2  = f1_2 * (int64_t) g3;
00791   int64_t f1g4    = f1   * (int64_t) g4;
00792   int64_t f1g5_2  = f1_2 * (int64_t) g5;
00793   int64_t f1g6    = f1   * (int64_t) g6;
00794   int64_t f1g7_2  = f1_2 * (int64_t) g7;
00795   int64_t f1g8    = f1   * (int64_t) g8;
00796   int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
00797   int64_t f2g0    = f2   * (int64_t) g0;
00798   int64_t f2g1    = f2   * (int64_t) g1;
00799   int64_t f2g2    = f2   * (int64_t) g2;
00800   int64_t f2g3    = f2   * (int64_t) g3;
00801   int64_t f2g4    = f2   * (int64_t) g4;
00802   int64_t f2g5    = f2   * (int64_t) g5;
00803   int64_t f2g6    = f2   * (int64_t) g6;
00804   int64_t f2g7    = f2   * (int64_t) g7;
00805   int64_t f2g8_19 = f2   * (int64_t) g8_19;
00806   int64_t f2g9_19 = f2   * (int64_t) g9_19;
00807   int64_t f3g0    = f3   * (int64_t) g0;
00808   int64_t f3g1_2  = f3_2 * (int64_t) g1;
00809   int64_t f3g2    = f3   * (int64_t) g2;
00810   int64_t f3g3_2  = f3_2 * (int64_t) g3;
00811   int64_t f3g4    = f3   * (int64_t) g4;
00812   int64_t f3g5_2  = f3_2 * (int64_t) g5;
00813   int64_t f3g6    = f3   * (int64_t) g6;
00814   int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
00815   int64_t f3g8_19 = f3   * (int64_t) g8_19;
00816   int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
00817   int64_t f4g0    = f4   * (int64_t) g0;
00818   int64_t f4g1    = f4   * (int64_t) g1;
00819   int64_t f4g2    = f4   * (int64_t) g2;
00820   int64_t f4g3    = f4   * (int64_t) g3;
00821   int64_t f4g4    = f4   * (int64_t) g4;
00822   int64_t f4g5    = f4   * (int64_t) g5;
00823   int64_t f4g6_19 = f4   * (int64_t) g6_19;
00824   int64_t f4g7_19 = f4   * (int64_t) g7_19;
00825   int64_t f4g8_19 = f4   * (int64_t) g8_19;
00826   int64_t f4g9_19 = f4   * (int64_t) g9_19;
00827   int64_t f5g0    = f5   * (int64_t) g0;
00828   int64_t f5g1_2  = f5_2 * (int64_t) g1;
00829   int64_t f5g2    = f5   * (int64_t) g2;
00830   int64_t f5g3_2  = f5_2 * (int64_t) g3;
00831   int64_t f5g4    = f5   * (int64_t) g4;
00832   int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
00833   int64_t f5g6_19 = f5   * (int64_t) g6_19;
00834   int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
00835   int64_t f5g8_19 = f5   * (int64_t) g8_19;
00836   int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
00837   int64_t f6g0    = f6   * (int64_t) g0;
00838   int64_t f6g1    = f6   * (int64_t) g1;
00839   int64_t f6g2    = f6   * (int64_t) g2;
00840   int64_t f6g3    = f6   * (int64_t) g3;
00841   int64_t f6g4_19 = f6   * (int64_t) g4_19;
00842   int64_t f6g5_19 = f6   * (int64_t) g5_19;
00843   int64_t f6g6_19 = f6   * (int64_t) g6_19;
00844   int64_t f6g7_19 = f6   * (int64_t) g7_19;
00845   int64_t f6g8_19 = f6   * (int64_t) g8_19;
00846   int64_t f6g9_19 = f6   * (int64_t) g9_19;
00847   int64_t f7g0    = f7   * (int64_t) g0;
00848   int64_t f7g1_2  = f7_2 * (int64_t) g1;
00849   int64_t f7g2    = f7   * (int64_t) g2;
00850   int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
00851   int64_t f7g4_19 = f7   * (int64_t) g4_19;
00852   int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
00853   int64_t f7g6_19 = f7   * (int64_t) g6_19;
00854   int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
00855   int64_t f7g8_19 = f7   * (int64_t) g8_19;
00856   int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
00857   int64_t f8g0    = f8   * (int64_t) g0;
00858   int64_t f8g1    = f8   * (int64_t) g1;
00859   int64_t f8g2_19 = f8   * (int64_t) g2_19;
00860   int64_t f8g3_19 = f8   * (int64_t) g3_19;
00861   int64_t f8g4_19 = f8   * (int64_t) g4_19;
00862   int64_t f8g5_19 = f8   * (int64_t) g5_19;
00863   int64_t f8g6_19 = f8   * (int64_t) g6_19;
00864   int64_t f8g7_19 = f8   * (int64_t) g7_19;
00865   int64_t f8g8_19 = f8   * (int64_t) g8_19;
00866   int64_t f8g9_19 = f8   * (int64_t) g9_19;
00867   int64_t f9g0    = f9   * (int64_t) g0;
00868   int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
00869   int64_t f9g2_19 = f9   * (int64_t) g2_19;
00870   int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
00871   int64_t f9g4_19 = f9   * (int64_t) g4_19;
00872   int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
00873   int64_t f9g6_19 = f9   * (int64_t) g6_19;
00874   int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
00875   int64_t f9g8_19 = f9   * (int64_t) g8_19;
00876   int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
00877   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
00878   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
00879   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
00880   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
00881   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
00882   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
00883   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
00884   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
00885   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
00886   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
00887   int64_t carry0;
00888   int64_t carry1;
00889   int64_t carry2;
00890   int64_t carry3;
00891   int64_t carry4;
00892   int64_t carry5;
00893   int64_t carry6;
00894   int64_t carry7;
00895   int64_t carry8;
00896   int64_t carry9;
00897 
00898   /*
00899   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
00900     i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
00901   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
00902     i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
00903   */
00904 
00905   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00906   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00907   /* |h0| <= 2^25 */
00908   /* |h4| <= 2^25 */
00909   /* |h1| <= 1.71*2^59 */
00910   /* |h5| <= 1.71*2^59 */
00911 
00912   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00913   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00914   /* |h1| <= 2^24; from now on fits into int32 */
00915   /* |h5| <= 2^24; from now on fits into int32 */
00916   /* |h2| <= 1.41*2^60 */
00917   /* |h6| <= 1.41*2^60 */
00918 
00919   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00920   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00921   /* |h2| <= 2^25; from now on fits into int32 unchanged */
00922   /* |h6| <= 2^25; from now on fits into int32 unchanged */
00923   /* |h3| <= 1.71*2^59 */
00924   /* |h7| <= 1.71*2^59 */
00925 
00926   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00927   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00928   /* |h3| <= 2^24; from now on fits into int32 unchanged */
00929   /* |h7| <= 2^24; from now on fits into int32 unchanged */
00930   /* |h4| <= 1.72*2^34 */
00931   /* |h8| <= 1.41*2^60 */
00932 
00933   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00934   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00935   /* |h4| <= 2^25; from now on fits into int32 unchanged */
00936   /* |h8| <= 2^25; from now on fits into int32 unchanged */
00937   /* |h5| <= 1.01*2^24 */
00938   /* |h9| <= 1.71*2^59 */
00939 
00940   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00941   /* |h9| <= 2^24; from now on fits into int32 unchanged */
00942   /* |h0| <= 1.1*2^39 */
00943 
00944   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00945   /* |h0| <= 2^25; from now on fits into int32 unchanged */
00946   /* |h1| <= 1.01*2^24 */
00947 
00948   h[0] = (int32_t)h0;
00949   h[1] = (int32_t)h1;
00950   h[2] = (int32_t)h2;
00951   h[3] = (int32_t)h3;
00952   h[4] = (int32_t)h4;
00953   h[5] = (int32_t)h5;
00954   h[6] = (int32_t)h6;
00955   h[7] = (int32_t)h7;
00956   h[8] = (int32_t)h8;
00957   h[9] = (int32_t)h9;
00958 }
00959 
00960 
00961 /*
00962 Replace (f,g) with (g,f) if b == 1;
00963 replace (f,g) with (f,g) if b == 0.
00964 
00965 Preconditions: b in {0,1}.
00966 */
00967 
00968 void fe_cswap(fe f, fe g, int b)
00969 {
00970   int32_t f0 = f[0];
00971   int32_t f1 = f[1];
00972   int32_t f2 = f[2];
00973   int32_t f3 = f[3];
00974   int32_t f4 = f[4];
00975   int32_t f5 = f[5];
00976   int32_t f6 = f[6];
00977   int32_t f7 = f[7];
00978   int32_t f8 = f[8];
00979   int32_t f9 = f[9];
00980   int32_t g0 = g[0];
00981   int32_t g1 = g[1];
00982   int32_t g2 = g[2];
00983   int32_t g3 = g[3];
00984   int32_t g4 = g[4];
00985   int32_t g5 = g[5];
00986   int32_t g6 = g[6];
00987   int32_t g7 = g[7];
00988   int32_t g8 = g[8];
00989   int32_t g9 = g[9];
00990   int32_t x0 = f0 ^ g0;
00991   int32_t x1 = f1 ^ g1;
00992   int32_t x2 = f2 ^ g2;
00993   int32_t x3 = f3 ^ g3;
00994   int32_t x4 = f4 ^ g4;
00995   int32_t x5 = f5 ^ g5;
00996   int32_t x6 = f6 ^ g6;
00997   int32_t x7 = f7 ^ g7;
00998   int32_t x8 = f8 ^ g8;
00999   int32_t x9 = f9 ^ g9;
01000   b = -b;
01001   x0 &= b;
01002   x1 &= b;
01003   x2 &= b;
01004   x3 &= b;
01005   x4 &= b;
01006   x5 &= b;
01007   x6 &= b;
01008   x7 &= b;
01009   x8 &= b;
01010   x9 &= b;
01011   f[0] = f0 ^ x0;
01012   f[1] = f1 ^ x1;
01013   f[2] = f2 ^ x2;
01014   f[3] = f3 ^ x3;
01015   f[4] = f4 ^ x4;
01016   f[5] = f5 ^ x5;
01017   f[6] = f6 ^ x6;
01018   f[7] = f7 ^ x7;
01019   f[8] = f8 ^ x8;
01020   f[9] = f9 ^ x9;
01021   g[0] = g0 ^ x0;
01022   g[1] = g1 ^ x1;
01023   g[2] = g2 ^ x2;
01024   g[3] = g3 ^ x3;
01025   g[4] = g4 ^ x4;
01026   g[5] = g5 ^ x5;
01027   g[6] = g6 ^ x6;
01028   g[7] = g7 ^ x7;
01029   g[8] = g8 ^ x8;
01030   g[9] = g9 ^ x9;
01031 }
01032 
01033 
01034 /*
01035 h = f * 121666
01036 Can overlap h with f.
01037 
01038 Preconditions:
01039    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01040 
01041 Postconditions:
01042    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01043 */
01044 
01045 void fe_mul121666(fe h,fe f)
01046 {
01047   int32_t f0 = f[0];
01048   int32_t f1 = f[1];
01049   int32_t f2 = f[2];
01050   int32_t f3 = f[3];
01051   int32_t f4 = f[4];
01052   int32_t f5 = f[5];
01053   int32_t f6 = f[6];
01054   int32_t f7 = f[7];
01055   int32_t f8 = f[8];
01056   int32_t f9 = f[9];
01057   int64_t h0 = f0 * (int64_t) 121666;
01058   int64_t h1 = f1 * (int64_t) 121666;
01059   int64_t h2 = f2 * (int64_t) 121666;
01060   int64_t h3 = f3 * (int64_t) 121666;
01061   int64_t h4 = f4 * (int64_t) 121666;
01062   int64_t h5 = f5 * (int64_t) 121666;
01063   int64_t h6 = f6 * (int64_t) 121666;
01064   int64_t h7 = f7 * (int64_t) 121666;
01065   int64_t h8 = f8 * (int64_t) 121666;
01066   int64_t h9 = f9 * (int64_t) 121666;
01067   int64_t carry0;
01068   int64_t carry1;
01069   int64_t carry2;
01070   int64_t carry3;
01071   int64_t carry4;
01072   int64_t carry5;
01073   int64_t carry6;
01074   int64_t carry7;
01075   int64_t carry8;
01076   int64_t carry9;
01077 
01078   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
01079   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
01080   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
01081   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
01082   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
01083 
01084   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01085   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
01086   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01087   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
01088   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
01089 
01090   h[0] = (int32_t)h0;
01091   h[1] = (int32_t)h1;
01092   h[2] = (int32_t)h2;
01093   h[3] = (int32_t)h3;
01094   h[4] = (int32_t)h4;
01095   h[5] = (int32_t)h5;
01096   h[6] = (int32_t)h6;
01097   h[7] = (int32_t)h7;
01098   h[8] = (int32_t)h8;
01099   h[9] = (int32_t)h9;
01100 }
01101 
01102 
01103 /*
01104 h = 2 * f * f
01105 Can overlap h with f.
01106 
01107 Preconditions:
01108    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
01109 
01110 Postconditions:
01111    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
01112 */
01113 
01114 /*
01115 See fe_mul.c for discussion of implementation strategy.
01116 */
01117 
01118 void fe_sq2(fe h,const fe f)
01119 {
01120   int32_t f0 = f[0];
01121   int32_t f1 = f[1];
01122   int32_t f2 = f[2];
01123   int32_t f3 = f[3];
01124   int32_t f4 = f[4];
01125   int32_t f5 = f[5];
01126   int32_t f6 = f[6];
01127   int32_t f7 = f[7];
01128   int32_t f8 = f[8];
01129   int32_t f9 = f[9];
01130   int32_t f0_2 = 2 * f0;
01131   int32_t f1_2 = 2 * f1;
01132   int32_t f2_2 = 2 * f2;
01133   int32_t f3_2 = 2 * f3;
01134   int32_t f4_2 = 2 * f4;
01135   int32_t f5_2 = 2 * f5;
01136   int32_t f6_2 = 2 * f6;
01137   int32_t f7_2 = 2 * f7;
01138   int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
01139   int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
01140   int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
01141   int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
01142   int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
01143   int64_t f0f0    = f0   * (int64_t) f0;
01144   int64_t f0f1_2  = f0_2 * (int64_t) f1;
01145   int64_t f0f2_2  = f0_2 * (int64_t) f2;
01146   int64_t f0f3_2  = f0_2 * (int64_t) f3;
01147   int64_t f0f4_2  = f0_2 * (int64_t) f4;
01148   int64_t f0f5_2  = f0_2 * (int64_t) f5;
01149   int64_t f0f6_2  = f0_2 * (int64_t) f6;
01150   int64_t f0f7_2  = f0_2 * (int64_t) f7;
01151   int64_t f0f8_2  = f0_2 * (int64_t) f8;
01152   int64_t f0f9_2  = f0_2 * (int64_t) f9;
01153   int64_t f1f1_2  = f1_2 * (int64_t) f1;
01154   int64_t f1f2_2  = f1_2 * (int64_t) f2;
01155   int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
01156   int64_t f1f4_2  = f1_2 * (int64_t) f4;
01157   int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
01158   int64_t f1f6_2  = f1_2 * (int64_t) f6;
01159   int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
01160   int64_t f1f8_2  = f1_2 * (int64_t) f8;
01161   int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
01162   int64_t f2f2    = f2   * (int64_t) f2;
01163   int64_t f2f3_2  = f2_2 * (int64_t) f3;
01164   int64_t f2f4_2  = f2_2 * (int64_t) f4;
01165   int64_t f2f5_2  = f2_2 * (int64_t) f5;
01166   int64_t f2f6_2  = f2_2 * (int64_t) f6;
01167   int64_t f2f7_2  = f2_2 * (int64_t) f7;
01168   int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
01169   int64_t f2f9_38 = f2   * (int64_t) f9_38;
01170   int64_t f3f3_2  = f3_2 * (int64_t) f3;
01171   int64_t f3f4_2  = f3_2 * (int64_t) f4;
01172   int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
01173   int64_t f3f6_2  = f3_2 * (int64_t) f6;
01174   int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
01175   int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
01176   int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
01177   int64_t f4f4    = f4   * (int64_t) f4;
01178   int64_t f4f5_2  = f4_2 * (int64_t) f5;
01179   int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
01180   int64_t f4f7_38 = f4   * (int64_t) f7_38;
01181   int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
01182   int64_t f4f9_38 = f4   * (int64_t) f9_38;
01183   int64_t f5f5_38 = f5   * (int64_t) f5_38;
01184   int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
01185   int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
01186   int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
01187   int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
01188   int64_t f6f6_19 = f6   * (int64_t) f6_19;
01189   int64_t f6f7_38 = f6   * (int64_t) f7_38;
01190   int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
01191   int64_t f6f9_38 = f6   * (int64_t) f9_38;
01192   int64_t f7f7_38 = f7   * (int64_t) f7_38;
01193   int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
01194   int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
01195   int64_t f8f8_19 = f8   * (int64_t) f8_19;
01196   int64_t f8f9_38 = f8   * (int64_t) f9_38;
01197   int64_t f9f9_38 = f9   * (int64_t) f9_38;
01198   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
01199   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
01200   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
01201   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
01202   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
01203   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
01204   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
01205   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
01206   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
01207   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
01208   int64_t carry0;
01209   int64_t carry1;
01210   int64_t carry2;
01211   int64_t carry3;
01212   int64_t carry4;
01213   int64_t carry5;
01214   int64_t carry6;
01215   int64_t carry7;
01216   int64_t carry8;
01217   int64_t carry9;
01218 
01219   h0 += h0;
01220   h1 += h1;
01221   h2 += h2;
01222   h3 += h3;
01223   h4 += h4;
01224   h5 += h5;
01225   h6 += h6;
01226   h7 += h7;
01227   h8 += h8;
01228   h9 += h9;
01229 
01230   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01231   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01232 
01233   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
01234   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
01235 
01236   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
01237   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
01238 
01239   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
01240   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
01241 
01242   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01243   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
01244 
01245   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
01246 
01247   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01248 
01249   h[0] = (int32_t)h0;
01250   h[1] = (int32_t)h1;
01251   h[2] = (int32_t)h2;
01252   h[3] = (int32_t)h3;
01253   h[4] = (int32_t)h4;
01254   h[5] = (int32_t)h5;
01255   h[6] = (int32_t)h6;
01256   h[7] = (int32_t)h7;
01257   h[8] = (int32_t)h8;
01258   h[9] = (int32_t)h9;
01259 }
01260 
01261 
01262 void fe_pow22523(fe out,const fe z)
01263 {
01264   fe t0;
01265   fe t1;
01266   fe t2;
01267   int i;
01268 
01269   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
01270   fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
01271   fe_mul(t1,z,t1);
01272   fe_mul(t0,t0,t1);
01273   fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0);
01274   fe_mul(t0,t1,t0);
01275   fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1);
01276   fe_mul(t0,t1,t0);
01277   fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1);
01278   fe_mul(t1,t1,t0);
01279   fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2);
01280   fe_mul(t1,t2,t1);
01281   fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1);
01282   fe_mul(t0,t1,t0);
01283   fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1);
01284   fe_mul(t1,t1,t0);
01285   fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2);
01286   fe_mul(t1,t2,t1);
01287   fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1);
01288   fe_mul(t0,t1,t0);
01289   fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0);
01290   fe_mul(out,t0,z);
01291 
01292   return;
01293 }
01294 
01295 
01296 /*
01297 h = -f
01298 
01299 Preconditions:
01300    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01301 
01302 Postconditions:
01303    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01304 */
01305 
01306 void fe_neg(fe h,const fe f)
01307 {
01308   int32_t f0 = f[0];
01309   int32_t f1 = f[1];
01310   int32_t f2 = f[2];
01311   int32_t f3 = f[3];
01312   int32_t f4 = f[4];
01313   int32_t f5 = f[5];
01314   int32_t f6 = f[6];
01315   int32_t f7 = f[7];
01316   int32_t f8 = f[8];
01317   int32_t f9 = f[9];
01318   int32_t h0 = -f0;
01319   int32_t h1 = -f1;
01320   int32_t h2 = -f2;
01321   int32_t h3 = -f3;
01322   int32_t h4 = -f4;
01323   int32_t h5 = -f5;
01324   int32_t h6 = -f6;
01325   int32_t h7 = -f7;
01326   int32_t h8 = -f8;
01327   int32_t h9 = -f9;
01328   h[0] = h0;
01329   h[1] = h1;
01330   h[2] = h2;
01331   h[3] = h3;
01332   h[4] = h4;
01333   h[5] = h5;
01334   h[6] = h6;
01335   h[7] = h7;
01336   h[8] = h8;
01337   h[9] = h9;
01338 }
01339 
01340 
01341 /*
01342 Preconditions:
01343    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01344 */
01345 
01346 static const unsigned char zero[32] = {0};
01347 
01348 int fe_isnonzero(const fe f)
01349 {
01350   unsigned char s[32];
01351   fe_tobytes(s,f);
01352   return ConstantCompare(s,zero,32);
01353 }
01354 
01355 
01356 /*
01357 return 1 if f is in {1,3,5,...,q-2}
01358 return 0 if f is in {0,2,4,...,q-1}
01359 
01360 Preconditions:
01361    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01362 */
01363 
01364 int fe_isnegative(const fe f)
01365 {
01366   unsigned char s[32];
01367   fe_tobytes(s,f);
01368   return s[0] & 1;
01369 }
01370 
01371 
01372 /*
01373 Replace (f,g) with (g,g) if b == 1;
01374 replace (f,g) with (f,g) if b == 0.
01375 
01376 Preconditions: b in {0,1}.
01377 */
01378 
01379 void fe_cmov(fe f, const fe g, int b)
01380 {
01381   int32_t f0 = f[0];
01382   int32_t f1 = f[1];
01383   int32_t f2 = f[2];
01384   int32_t f3 = f[3];
01385   int32_t f4 = f[4];
01386   int32_t f5 = f[5];
01387   int32_t f6 = f[6];
01388   int32_t f7 = f[7];
01389   int32_t f8 = f[8];
01390   int32_t f9 = f[9];
01391   int32_t g0 = g[0];
01392   int32_t g1 = g[1];
01393   int32_t g2 = g[2];
01394   int32_t g3 = g[3];
01395   int32_t g4 = g[4];
01396   int32_t g5 = g[5];
01397   int32_t g6 = g[6];
01398   int32_t g7 = g[7];
01399   int32_t g8 = g[8];
01400   int32_t g9 = g[9];
01401   int32_t x0 = f0 ^ g0;
01402   int32_t x1 = f1 ^ g1;
01403   int32_t x2 = f2 ^ g2;
01404   int32_t x3 = f3 ^ g3;
01405   int32_t x4 = f4 ^ g4;
01406   int32_t x5 = f5 ^ g5;
01407   int32_t x6 = f6 ^ g6;
01408   int32_t x7 = f7 ^ g7;
01409   int32_t x8 = f8 ^ g8;
01410   int32_t x9 = f9 ^ g9;
01411   b = -b;
01412   x0 &= b;
01413   x1 &= b;
01414   x2 &= b;
01415   x3 &= b;
01416   x4 &= b;
01417   x5 &= b;
01418   x6 &= b;
01419   x7 &= b;
01420   x8 &= b;
01421   x9 &= b;
01422   f[0] = f0 ^ x0;
01423   f[1] = f1 ^ x1;
01424   f[2] = f2 ^ x2;
01425   f[3] = f3 ^ x3;
01426   f[4] = f4 ^ x4;
01427   f[5] = f5 ^ x5;
01428   f[6] = f6 ^ x6;
01429   f[7] = f7 ^ x7;
01430   f[8] = f8 ^ x8;
01431   f[9] = f9 ^ x9;
01432 }
01433 #endif
01434 
01435 #endif /* !CURVE25519_SMALL || !ED25519_SMALL */
01436 #endif /* HAVE_CURVE25519 || HAVE_ED25519 */
01437