Renesas / SecureDweet
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers fe_operations.c Source File

fe_operations.c

00001 /* fe_operations.c
00002  *
00003  * Copyright (C) 2006-2016 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023  /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */
00024 
00025 #ifdef HAVE_CONFIG_H
00026     #include <config.h>
00027 #endif
00028 
00029 #include <wolfssl/wolfcrypt/settings.h>
00030 
00031 #ifndef CURVED25519_SMALL /* run when not defined to use small memory math */
00032 #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519)
00033 
00034 #include <wolfssl/wolfcrypt/fe_operations.h>
00035 #include <stdint.h>
00036 
00037 #ifdef NO_INLINE
00038     #include <wolfssl/wolfcrypt/misc.h>
00039 #else
00040     #include <wolfcrypt/src/misc.c>
00041 #endif
00042 
00043 /*
00044 fe means field element.
00045 Here the field is \Z/(2^255-19).
00046 An element t, entries t[0]...t[9], represents the integer
00047 t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
00048 Bounds on each t[i] vary depending on context.
00049 */
00050 
00051 uint64_t load_3(const unsigned char *in)
00052 {
00053   uint64_t result;
00054   result = (uint64_t) in[0];
00055   result |= ((uint64_t) in[1]) << 8;
00056   result |= ((uint64_t) in[2]) << 16;
00057   return result;
00058 }
00059 
00060 
00061 uint64_t load_4(const unsigned char *in)
00062 {
00063   uint64_t result;
00064   result = (uint64_t) in[0];
00065   result |= ((uint64_t) in[1]) << 8;
00066   result |= ((uint64_t) in[2]) << 16;
00067   result |= ((uint64_t) in[3]) << 24;
00068   return result;
00069 }
00070 
00071 
00072 /*
00073 h = 1
00074 */
00075 
00076 void fe_1(fe h)
00077 {
00078   h[0] = 1;
00079   h[1] = 0;
00080   h[2] = 0;
00081   h[3] = 0;
00082   h[4] = 0;
00083   h[5] = 0;
00084   h[6] = 0;
00085   h[7] = 0;
00086   h[8] = 0;
00087   h[9] = 0;
00088 }
00089 
00090 
00091 /*
00092 h = 0
00093 */
00094 
00095 void fe_0(fe h)
00096 {
00097   h[0] = 0;
00098   h[1] = 0;
00099   h[2] = 0;
00100   h[3] = 0;
00101   h[4] = 0;
00102   h[5] = 0;
00103   h[6] = 0;
00104   h[7] = 0;
00105   h[8] = 0;
00106   h[9] = 0;
00107 }
00108 
00109 
00110 int curve25519(byte* q, byte* n, byte* p)
00111 {
00112 #if 0
00113   unsigned char e[32];
00114 #endif
00115   fe x1;
00116   fe x2;
00117   fe z2;
00118   fe x3;
00119   fe z3;
00120   fe tmp0;
00121   fe tmp1;
00122   int pos;
00123   unsigned int swap;
00124   unsigned int b;
00125 
00126   /* Clamp already done during key generation and import */
00127 #if 0
00128   {
00129     unsigned int i;
00130     for (i = 0;i < 32;++i) e[i] = n[i];
00131     e[0] &= 248;
00132     e[31] &= 127;
00133     e[31] |= 64;
00134   }
00135 #endif
00136 
00137   fe_frombytes(x1,p);
00138   fe_1(x2);
00139   fe_0(z2);
00140   fe_copy(x3,x1);
00141   fe_1(z3);
00142 
00143   swap = 0;
00144   for (pos = 254;pos >= 0;--pos) {
00145 #if 0
00146     b = e[pos / 8] >> (pos & 7);
00147 #else
00148     b = n[pos / 8] >> (pos & 7);
00149 #endif
00150     b &= 1;
00151     swap ^= b;
00152     fe_cswap(x2,x3,swap);
00153     fe_cswap(z2,z3,swap);
00154     swap = b;
00155 
00156     /* montgomery */
00157     fe_sub(tmp0,x3,z3);
00158     fe_sub(tmp1,x2,z2);
00159     fe_add(x2,x2,z2);
00160     fe_add(z2,x3,z3);
00161     fe_mul(z3,tmp0,x2);
00162     fe_mul(z2,z2,tmp1);
00163     fe_sq(tmp0,tmp1);
00164     fe_sq(tmp1,x2);
00165     fe_add(x3,z3,z2);
00166     fe_sub(z2,z3,z2);
00167     fe_mul(x2,tmp1,tmp0);
00168     fe_sub(tmp1,tmp1,tmp0);
00169     fe_sq(z2,z2);
00170     fe_mul121666(z3,tmp1);
00171     fe_sq(x3,x3);
00172     fe_add(tmp0,tmp0,z3);
00173     fe_mul(z3,x1,z2);
00174     fe_mul(z2,tmp1,tmp0);
00175   }
00176   fe_cswap(x2,x3,swap);
00177   fe_cswap(z2,z3,swap);
00178 
00179   fe_invert(z2,z2);
00180   fe_mul(x2,x2,z2);
00181   fe_tobytes(q,x2);
00182 
00183   return 0;
00184 }
00185 
00186 
00187 /*
00188 h = f * f
00189 Can overlap h with f.
00190 
00191 Preconditions:
00192    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00193 
00194 Postconditions:
00195    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
00196 */
00197 
00198 /*
00199 See fe_mul.c for discussion of implementation strategy.
00200 */
00201 
00202 void fe_sq(fe h,const fe f)
00203 {
00204   int32_t f0 = f[0];
00205   int32_t f1 = f[1];
00206   int32_t f2 = f[2];
00207   int32_t f3 = f[3];
00208   int32_t f4 = f[4];
00209   int32_t f5 = f[5];
00210   int32_t f6 = f[6];
00211   int32_t f7 = f[7];
00212   int32_t f8 = f[8];
00213   int32_t f9 = f[9];
00214   int32_t f0_2 = 2 * f0;
00215   int32_t f1_2 = 2 * f1;
00216   int32_t f2_2 = 2 * f2;
00217   int32_t f3_2 = 2 * f3;
00218   int32_t f4_2 = 2 * f4;
00219   int32_t f5_2 = 2 * f5;
00220   int32_t f6_2 = 2 * f6;
00221   int32_t f7_2 = 2 * f7;
00222   int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
00223   int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
00224   int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
00225   int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
00226   int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
00227   int64_t f0f0    = f0   * (int64_t) f0;
00228   int64_t f0f1_2  = f0_2 * (int64_t) f1;
00229   int64_t f0f2_2  = f0_2 * (int64_t) f2;
00230   int64_t f0f3_2  = f0_2 * (int64_t) f3;
00231   int64_t f0f4_2  = f0_2 * (int64_t) f4;
00232   int64_t f0f5_2  = f0_2 * (int64_t) f5;
00233   int64_t f0f6_2  = f0_2 * (int64_t) f6;
00234   int64_t f0f7_2  = f0_2 * (int64_t) f7;
00235   int64_t f0f8_2  = f0_2 * (int64_t) f8;
00236   int64_t f0f9_2  = f0_2 * (int64_t) f9;
00237   int64_t f1f1_2  = f1_2 * (int64_t) f1;
00238   int64_t f1f2_2  = f1_2 * (int64_t) f2;
00239   int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
00240   int64_t f1f4_2  = f1_2 * (int64_t) f4;
00241   int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
00242   int64_t f1f6_2  = f1_2 * (int64_t) f6;
00243   int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
00244   int64_t f1f8_2  = f1_2 * (int64_t) f8;
00245   int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
00246   int64_t f2f2    = f2   * (int64_t) f2;
00247   int64_t f2f3_2  = f2_2 * (int64_t) f3;
00248   int64_t f2f4_2  = f2_2 * (int64_t) f4;
00249   int64_t f2f5_2  = f2_2 * (int64_t) f5;
00250   int64_t f2f6_2  = f2_2 * (int64_t) f6;
00251   int64_t f2f7_2  = f2_2 * (int64_t) f7;
00252   int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
00253   int64_t f2f9_38 = f2   * (int64_t) f9_38;
00254   int64_t f3f3_2  = f3_2 * (int64_t) f3;
00255   int64_t f3f4_2  = f3_2 * (int64_t) f4;
00256   int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
00257   int64_t f3f6_2  = f3_2 * (int64_t) f6;
00258   int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
00259   int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
00260   int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
00261   int64_t f4f4    = f4   * (int64_t) f4;
00262   int64_t f4f5_2  = f4_2 * (int64_t) f5;
00263   int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
00264   int64_t f4f7_38 = f4   * (int64_t) f7_38;
00265   int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
00266   int64_t f4f9_38 = f4   * (int64_t) f9_38;
00267   int64_t f5f5_38 = f5   * (int64_t) f5_38;
00268   int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
00269   int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
00270   int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
00271   int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
00272   int64_t f6f6_19 = f6   * (int64_t) f6_19;
00273   int64_t f6f7_38 = f6   * (int64_t) f7_38;
00274   int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
00275   int64_t f6f9_38 = f6   * (int64_t) f9_38;
00276   int64_t f7f7_38 = f7   * (int64_t) f7_38;
00277   int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
00278   int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
00279   int64_t f8f8_19 = f8   * (int64_t) f8_19;
00280   int64_t f8f9_38 = f8   * (int64_t) f9_38;
00281   int64_t f9f9_38 = f9   * (int64_t) f9_38;
00282   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
00283   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
00284   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
00285   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
00286   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
00287   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
00288   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
00289   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
00290   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
00291   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
00292   int64_t carry0;
00293   int64_t carry1;
00294   int64_t carry2;
00295   int64_t carry3;
00296   int64_t carry4;
00297   int64_t carry5;
00298   int64_t carry6;
00299   int64_t carry7;
00300   int64_t carry8;
00301   int64_t carry9;
00302 
00303   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00304   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00305 
00306   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00307   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00308 
00309   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00310   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00311 
00312   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00313   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00314 
00315   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00316   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00317 
00318   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00319 
00320   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00321 
00322   h[0] = (int32_t)h0;
00323   h[1] = (int32_t)h1;
00324   h[2] = (int32_t)h2;
00325   h[3] = (int32_t)h3;
00326   h[4] = (int32_t)h4;
00327   h[5] = (int32_t)h5;
00328   h[6] = (int32_t)h6;
00329   h[7] = (int32_t)h7;
00330   h[8] = (int32_t)h8;
00331   h[9] = (int32_t)h9;
00332 }
00333 
00334 
00335 /*
00336 h = f + g
00337 Can overlap h with f or g.
00338 
00339 Preconditions:
00340    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00341    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00342 
00343 Postconditions:
00344    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00345 */
00346 
00347 void fe_add(fe h,const fe f,const fe g)
00348 {
00349   int32_t f0 = f[0];
00350   int32_t f1 = f[1];
00351   int32_t f2 = f[2];
00352   int32_t f3 = f[3];
00353   int32_t f4 = f[4];
00354   int32_t f5 = f[5];
00355   int32_t f6 = f[6];
00356   int32_t f7 = f[7];
00357   int32_t f8 = f[8];
00358   int32_t f9 = f[9];
00359   int32_t g0 = g[0];
00360   int32_t g1 = g[1];
00361   int32_t g2 = g[2];
00362   int32_t g3 = g[3];
00363   int32_t g4 = g[4];
00364   int32_t g5 = g[5];
00365   int32_t g6 = g[6];
00366   int32_t g7 = g[7];
00367   int32_t g8 = g[8];
00368   int32_t g9 = g[9];
00369   int32_t h0 = f0 + g0;
00370   int32_t h1 = f1 + g1;
00371   int32_t h2 = f2 + g2;
00372   int32_t h3 = f3 + g3;
00373   int32_t h4 = f4 + g4;
00374   int32_t h5 = f5 + g5;
00375   int32_t h6 = f6 + g6;
00376   int32_t h7 = f7 + g7;
00377   int32_t h8 = f8 + g8;
00378   int32_t h9 = f9 + g9;
00379   h[0] = h0;
00380   h[1] = h1;
00381   h[2] = h2;
00382   h[3] = h3;
00383   h[4] = h4;
00384   h[5] = h5;
00385   h[6] = h6;
00386   h[7] = h7;
00387   h[8] = h8;
00388   h[9] = h9;
00389 }
00390 
00391 
00392 /*
00393 Preconditions:
00394   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00395 
00396 Write p=2^255-19; q=floor(h/p).
00397 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
00398 
00399 Proof:
00400   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
00401   Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
00402 
00403   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
00404   Then 0<y<1.
00405 
00406   Write r=h-pq.
00407   Have 0<=r<=p-1=2^255-20.
00408   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
00409 
00410   Write x=r+19(2^-255)r+y.
00411   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
00412 
00413   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
00414   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
00415 */
00416 
00417 void fe_tobytes(unsigned char *s,const fe h)
00418 {
00419   int32_t h0 = h[0];
00420   int32_t h1 = h[1];
00421   int32_t h2 = h[2];
00422   int32_t h3 = h[3];
00423   int32_t h4 = h[4];
00424   int32_t h5 = h[5];
00425   int32_t h6 = h[6];
00426   int32_t h7 = h[7];
00427   int32_t h8 = h[8];
00428   int32_t h9 = h[9];
00429   int32_t q;
00430   int32_t carry0;
00431   int32_t carry1;
00432   int32_t carry2;
00433   int32_t carry3;
00434   int32_t carry4;
00435   int32_t carry5;
00436   int32_t carry6;
00437   int32_t carry7;
00438   int32_t carry8;
00439   int32_t carry9;
00440 
00441   q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
00442   q = (h0 + q) >> 26;
00443   q = (h1 + q) >> 25;
00444   q = (h2 + q) >> 26;
00445   q = (h3 + q) >> 25;
00446   q = (h4 + q) >> 26;
00447   q = (h5 + q) >> 25;
00448   q = (h6 + q) >> 26;
00449   q = (h7 + q) >> 25;
00450   q = (h8 + q) >> 26;
00451   q = (h9 + q) >> 25;
00452 
00453   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
00454   h0 += 19 * q;
00455   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
00456 
00457   carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
00458   carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
00459   carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
00460   carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
00461   carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
00462   carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
00463   carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
00464   carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
00465   carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
00466   carry9 = h9 >> 25;               h9 -= carry9 << 25;
00467                   /* h10 = carry9 */
00468 
00469   /*
00470   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
00471   Have h0+...+2^230 h9 between 0 and 2^255-1;
00472   evidently 2^255 h10-2^255 q = 0.
00473   Goal: Output h0+...+2^230 h9.
00474   */
00475 
00476   s[0] = h0 >> 0;
00477   s[1] = h0 >> 8;
00478   s[2] = h0 >> 16;
00479   s[3] = (h0 >> 24) | (h1 << 2);
00480   s[4] = h1 >> 6;
00481   s[5] = h1 >> 14;
00482   s[6] = (h1 >> 22) | (h2 << 3);
00483   s[7] = h2 >> 5;
00484   s[8] = h2 >> 13;
00485   s[9] = (h2 >> 21) | (h3 << 5);
00486   s[10] = h3 >> 3;
00487   s[11] = h3 >> 11;
00488   s[12] = (h3 >> 19) | (h4 << 6);
00489   s[13] = h4 >> 2;
00490   s[14] = h4 >> 10;
00491   s[15] = h4 >> 18;
00492   s[16] = h5 >> 0;
00493   s[17] = h5 >> 8;
00494   s[18] = h5 >> 16;
00495   s[19] = (h5 >> 24) | (h6 << 1);
00496   s[20] = h6 >> 7;
00497   s[21] = h6 >> 15;
00498   s[22] = (h6 >> 23) | (h7 << 3);
00499   s[23] = h7 >> 5;
00500   s[24] = h7 >> 13;
00501   s[25] = (h7 >> 21) | (h8 << 4);
00502   s[26] = h8 >> 4;
00503   s[27] = h8 >> 12;
00504   s[28] = (h8 >> 20) | (h9 << 6);
00505   s[29] = h9 >> 2;
00506   s[30] = h9 >> 10;
00507   s[31] = h9 >> 18;
00508 }
00509 
00510 
00511 /*
00512 h = f - g
00513 Can overlap h with f or g.
00514 
00515 Preconditions:
00516    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00517    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00518 
00519 Postconditions:
00520    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00521 */
00522 
00523 void fe_sub(fe h,const fe f,const fe g)
00524 {
00525   int32_t f0 = f[0];
00526   int32_t f1 = f[1];
00527   int32_t f2 = f[2];
00528   int32_t f3 = f[3];
00529   int32_t f4 = f[4];
00530   int32_t f5 = f[5];
00531   int32_t f6 = f[6];
00532   int32_t f7 = f[7];
00533   int32_t f8 = f[8];
00534   int32_t f9 = f[9];
00535   int32_t g0 = g[0];
00536   int32_t g1 = g[1];
00537   int32_t g2 = g[2];
00538   int32_t g3 = g[3];
00539   int32_t g4 = g[4];
00540   int32_t g5 = g[5];
00541   int32_t g6 = g[6];
00542   int32_t g7 = g[7];
00543   int32_t g8 = g[8];
00544   int32_t g9 = g[9];
00545   int32_t h0 = f0 - g0;
00546   int32_t h1 = f1 - g1;
00547   int32_t h2 = f2 - g2;
00548   int32_t h3 = f3 - g3;
00549   int32_t h4 = f4 - g4;
00550   int32_t h5 = f5 - g5;
00551   int32_t h6 = f6 - g6;
00552   int32_t h7 = f7 - g7;
00553   int32_t h8 = f8 - g8;
00554   int32_t h9 = f9 - g9;
00555   h[0] = h0;
00556   h[1] = h1;
00557   h[2] = h2;
00558   h[3] = h3;
00559   h[4] = h4;
00560   h[5] = h5;
00561   h[6] = h6;
00562   h[7] = h7;
00563   h[8] = h8;
00564   h[9] = h9;
00565 }
00566 
00567 
00568 /*
00569 Ignores top bit of h.
00570 */
00571 
00572 void fe_frombytes(fe h,const unsigned char *s)
00573 {
00574   int64_t h0 = load_4(s);
00575   int64_t h1 = load_3(s + 4) << 6;
00576   int64_t h2 = load_3(s + 7) << 5;
00577   int64_t h3 = load_3(s + 10) << 3;
00578   int64_t h4 = load_3(s + 13) << 2;
00579   int64_t h5 = load_4(s + 16);
00580   int64_t h6 = load_3(s + 20) << 7;
00581   int64_t h7 = load_3(s + 23) << 5;
00582   int64_t h8 = load_3(s + 26) << 4;
00583   int64_t h9 = (load_3(s + 29) & 8388607) << 2;
00584   int64_t carry0;
00585   int64_t carry1;
00586   int64_t carry2;
00587   int64_t carry3;
00588   int64_t carry4;
00589   int64_t carry5;
00590   int64_t carry6;
00591   int64_t carry7;
00592   int64_t carry8;
00593   int64_t carry9;
00594 
00595   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00596   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00597   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00598   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00599   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00600 
00601   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00602   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00603   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00604   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00605   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00606 
00607   h[0] = (int32_t)h0;
00608   h[1] = (int32_t)h1;
00609   h[2] = (int32_t)h2;
00610   h[3] = (int32_t)h3;
00611   h[4] = (int32_t)h4;
00612   h[5] = (int32_t)h5;
00613   h[6] = (int32_t)h6;
00614   h[7] = (int32_t)h7;
00615   h[8] = (int32_t)h8;
00616   h[9] = (int32_t)h9;
00617 }
00618 
00619 
00620 void fe_invert(fe out,const fe z)
00621 {
00622   fe t0;
00623   fe t1;
00624   fe t2;
00625   fe t3;
00626   int i;
00627 
00628   /* pow225521 */
00629   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
00630   fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
00631   fe_mul(t1,z,t1);
00632   fe_mul(t0,t0,t1);
00633   fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
00634   fe_mul(t1,t1,t2);
00635   fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
00636   fe_mul(t1,t2,t1);
00637   fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
00638   fe_mul(t2,t2,t1);
00639   fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
00640   fe_mul(t2,t3,t2);
00641   fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
00642   fe_mul(t1,t2,t1);
00643   fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
00644   fe_mul(t2,t2,t1);
00645   fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
00646   fe_mul(t2,t3,t2);
00647   fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
00648   fe_mul(t1,t2,t1);
00649   fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
00650   fe_mul(out,t1,t0);
00651 
00652   return;
00653 }
00654 
00655 
00656 /*
00657 h = f
00658 */
00659 
00660 void fe_copy(fe h,const fe f)
00661 {
00662   int32_t f0 = f[0];
00663   int32_t f1 = f[1];
00664   int32_t f2 = f[2];
00665   int32_t f3 = f[3];
00666   int32_t f4 = f[4];
00667   int32_t f5 = f[5];
00668   int32_t f6 = f[6];
00669   int32_t f7 = f[7];
00670   int32_t f8 = f[8];
00671   int32_t f9 = f[9];
00672   h[0] = f0;
00673   h[1] = f1;
00674   h[2] = f2;
00675   h[3] = f3;
00676   h[4] = f4;
00677   h[5] = f5;
00678   h[6] = f6;
00679   h[7] = f7;
00680   h[8] = f8;
00681   h[9] = f9;
00682 }
00683 
00684 
00685 /*
00686 h = f * g
00687 Can overlap h with f or g.
00688 
00689 Preconditions:
00690    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00691    |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00692 
00693 Postconditions:
00694    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
00695 */
00696 
00697 /*
00698 Notes on implementation strategy:
00699 
00700 Using schoolbook multiplication.
00701 Karatsuba would save a little in some cost models.
00702 
00703 Most multiplications by 2 and 19 are 32-bit precomputations;
00704 cheaper than 64-bit postcomputations.
00705 
00706 There is one remaining multiplication by 19 in the carry chain;
00707 one *19 precomputation can be merged into this,
00708 but the resulting data flow is considerably less clean.
00709 
00710 There are 12 carries below.
00711 10 of them are 2-way parallelizable and vectorizable.
00712 Can get away with 11 carries, but then data flow is much deeper.
00713 
00714 With tighter constraints on inputs can squeeze carries into int32.
00715 */
00716 
00717 void fe_mul(fe h,const fe f,const fe g)
00718 {
00719   int32_t f0 = f[0];
00720   int32_t f1 = f[1];
00721   int32_t f2 = f[2];
00722   int32_t f3 = f[3];
00723   int32_t f4 = f[4];
00724   int32_t f5 = f[5];
00725   int32_t f6 = f[6];
00726   int32_t f7 = f[7];
00727   int32_t f8 = f[8];
00728   int32_t f9 = f[9];
00729   int32_t g0 = g[0];
00730   int32_t g1 = g[1];
00731   int32_t g2 = g[2];
00732   int32_t g3 = g[3];
00733   int32_t g4 = g[4];
00734   int32_t g5 = g[5];
00735   int32_t g6 = g[6];
00736   int32_t g7 = g[7];
00737   int32_t g8 = g[8];
00738   int32_t g9 = g[9];
00739   int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
00740   int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
00741   int32_t g3_19 = 19 * g3;
00742   int32_t g4_19 = 19 * g4;
00743   int32_t g5_19 = 19 * g5;
00744   int32_t g6_19 = 19 * g6;
00745   int32_t g7_19 = 19 * g7;
00746   int32_t g8_19 = 19 * g8;
00747   int32_t g9_19 = 19 * g9;
00748   int32_t f1_2 = 2 * f1;
00749   int32_t f3_2 = 2 * f3;
00750   int32_t f5_2 = 2 * f5;
00751   int32_t f7_2 = 2 * f7;
00752   int32_t f9_2 = 2 * f9;
00753   int64_t f0g0    = f0   * (int64_t) g0;
00754   int64_t f0g1    = f0   * (int64_t) g1;
00755   int64_t f0g2    = f0   * (int64_t) g2;
00756   int64_t f0g3    = f0   * (int64_t) g3;
00757   int64_t f0g4    = f0   * (int64_t) g4;
00758   int64_t f0g5    = f0   * (int64_t) g5;
00759   int64_t f0g6    = f0   * (int64_t) g6;
00760   int64_t f0g7    = f0   * (int64_t) g7;
00761   int64_t f0g8    = f0   * (int64_t) g8;
00762   int64_t f0g9    = f0   * (int64_t) g9;
00763   int64_t f1g0    = f1   * (int64_t) g0;
00764   int64_t f1g1_2  = f1_2 * (int64_t) g1;
00765   int64_t f1g2    = f1   * (int64_t) g2;
00766   int64_t f1g3_2  = f1_2 * (int64_t) g3;
00767   int64_t f1g4    = f1   * (int64_t) g4;
00768   int64_t f1g5_2  = f1_2 * (int64_t) g5;
00769   int64_t f1g6    = f1   * (int64_t) g6;
00770   int64_t f1g7_2  = f1_2 * (int64_t) g7;
00771   int64_t f1g8    = f1   * (int64_t) g8;
00772   int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
00773   int64_t f2g0    = f2   * (int64_t) g0;
00774   int64_t f2g1    = f2   * (int64_t) g1;
00775   int64_t f2g2    = f2   * (int64_t) g2;
00776   int64_t f2g3    = f2   * (int64_t) g3;
00777   int64_t f2g4    = f2   * (int64_t) g4;
00778   int64_t f2g5    = f2   * (int64_t) g5;
00779   int64_t f2g6    = f2   * (int64_t) g6;
00780   int64_t f2g7    = f2   * (int64_t) g7;
00781   int64_t f2g8_19 = f2   * (int64_t) g8_19;
00782   int64_t f2g9_19 = f2   * (int64_t) g9_19;
00783   int64_t f3g0    = f3   * (int64_t) g0;
00784   int64_t f3g1_2  = f3_2 * (int64_t) g1;
00785   int64_t f3g2    = f3   * (int64_t) g2;
00786   int64_t f3g3_2  = f3_2 * (int64_t) g3;
00787   int64_t f3g4    = f3   * (int64_t) g4;
00788   int64_t f3g5_2  = f3_2 * (int64_t) g5;
00789   int64_t f3g6    = f3   * (int64_t) g6;
00790   int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
00791   int64_t f3g8_19 = f3   * (int64_t) g8_19;
00792   int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
00793   int64_t f4g0    = f4   * (int64_t) g0;
00794   int64_t f4g1    = f4   * (int64_t) g1;
00795   int64_t f4g2    = f4   * (int64_t) g2;
00796   int64_t f4g3    = f4   * (int64_t) g3;
00797   int64_t f4g4    = f4   * (int64_t) g4;
00798   int64_t f4g5    = f4   * (int64_t) g5;
00799   int64_t f4g6_19 = f4   * (int64_t) g6_19;
00800   int64_t f4g7_19 = f4   * (int64_t) g7_19;
00801   int64_t f4g8_19 = f4   * (int64_t) g8_19;
00802   int64_t f4g9_19 = f4   * (int64_t) g9_19;
00803   int64_t f5g0    = f5   * (int64_t) g0;
00804   int64_t f5g1_2  = f5_2 * (int64_t) g1;
00805   int64_t f5g2    = f5   * (int64_t) g2;
00806   int64_t f5g3_2  = f5_2 * (int64_t) g3;
00807   int64_t f5g4    = f5   * (int64_t) g4;
00808   int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
00809   int64_t f5g6_19 = f5   * (int64_t) g6_19;
00810   int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
00811   int64_t f5g8_19 = f5   * (int64_t) g8_19;
00812   int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
00813   int64_t f6g0    = f6   * (int64_t) g0;
00814   int64_t f6g1    = f6   * (int64_t) g1;
00815   int64_t f6g2    = f6   * (int64_t) g2;
00816   int64_t f6g3    = f6   * (int64_t) g3;
00817   int64_t f6g4_19 = f6   * (int64_t) g4_19;
00818   int64_t f6g5_19 = f6   * (int64_t) g5_19;
00819   int64_t f6g6_19 = f6   * (int64_t) g6_19;
00820   int64_t f6g7_19 = f6   * (int64_t) g7_19;
00821   int64_t f6g8_19 = f6   * (int64_t) g8_19;
00822   int64_t f6g9_19 = f6   * (int64_t) g9_19;
00823   int64_t f7g0    = f7   * (int64_t) g0;
00824   int64_t f7g1_2  = f7_2 * (int64_t) g1;
00825   int64_t f7g2    = f7   * (int64_t) g2;
00826   int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
00827   int64_t f7g4_19 = f7   * (int64_t) g4_19;
00828   int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
00829   int64_t f7g6_19 = f7   * (int64_t) g6_19;
00830   int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
00831   int64_t f7g8_19 = f7   * (int64_t) g8_19;
00832   int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
00833   int64_t f8g0    = f8   * (int64_t) g0;
00834   int64_t f8g1    = f8   * (int64_t) g1;
00835   int64_t f8g2_19 = f8   * (int64_t) g2_19;
00836   int64_t f8g3_19 = f8   * (int64_t) g3_19;
00837   int64_t f8g4_19 = f8   * (int64_t) g4_19;
00838   int64_t f8g5_19 = f8   * (int64_t) g5_19;
00839   int64_t f8g6_19 = f8   * (int64_t) g6_19;
00840   int64_t f8g7_19 = f8   * (int64_t) g7_19;
00841   int64_t f8g8_19 = f8   * (int64_t) g8_19;
00842   int64_t f8g9_19 = f8   * (int64_t) g9_19;
00843   int64_t f9g0    = f9   * (int64_t) g0;
00844   int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
00845   int64_t f9g2_19 = f9   * (int64_t) g2_19;
00846   int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
00847   int64_t f9g4_19 = f9   * (int64_t) g4_19;
00848   int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
00849   int64_t f9g6_19 = f9   * (int64_t) g6_19;
00850   int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
00851   int64_t f9g8_19 = f9   * (int64_t) g8_19;
00852   int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
00853   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
00854   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
00855   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
00856   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
00857   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
00858   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
00859   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
00860   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
00861   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
00862   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
00863   int64_t carry0;
00864   int64_t carry1;
00865   int64_t carry2;
00866   int64_t carry3;
00867   int64_t carry4;
00868   int64_t carry5;
00869   int64_t carry6;
00870   int64_t carry7;
00871   int64_t carry8;
00872   int64_t carry9;
00873 
00874   /*
00875   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
00876     i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
00877   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
00878     i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
00879   */
00880 
00881   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00882   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00883   /* |h0| <= 2^25 */
00884   /* |h4| <= 2^25 */
00885   /* |h1| <= 1.71*2^59 */
00886   /* |h5| <= 1.71*2^59 */
00887 
00888   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00889   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00890   /* |h1| <= 2^24; from now on fits into int32 */
00891   /* |h5| <= 2^24; from now on fits into int32 */
00892   /* |h2| <= 1.41*2^60 */
00893   /* |h6| <= 1.41*2^60 */
00894 
00895   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00896   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00897   /* |h2| <= 2^25; from now on fits into int32 unchanged */
00898   /* |h6| <= 2^25; from now on fits into int32 unchanged */
00899   /* |h3| <= 1.71*2^59 */
00900   /* |h7| <= 1.71*2^59 */
00901 
00902   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00903   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00904   /* |h3| <= 2^24; from now on fits into int32 unchanged */
00905   /* |h7| <= 2^24; from now on fits into int32 unchanged */
00906   /* |h4| <= 1.72*2^34 */
00907   /* |h8| <= 1.41*2^60 */
00908 
00909   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00910   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00911   /* |h4| <= 2^25; from now on fits into int32 unchanged */
00912   /* |h8| <= 2^25; from now on fits into int32 unchanged */
00913   /* |h5| <= 1.01*2^24 */
00914   /* |h9| <= 1.71*2^59 */
00915 
00916   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00917   /* |h9| <= 2^24; from now on fits into int32 unchanged */
00918   /* |h0| <= 1.1*2^39 */
00919 
00920   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00921   /* |h0| <= 2^25; from now on fits into int32 unchanged */
00922   /* |h1| <= 1.01*2^24 */
00923 
00924   h[0] = (int32_t)h0;
00925   h[1] = (int32_t)h1;
00926   h[2] = (int32_t)h2;
00927   h[3] = (int32_t)h3;
00928   h[4] = (int32_t)h4;
00929   h[5] = (int32_t)h5;
00930   h[6] = (int32_t)h6;
00931   h[7] = (int32_t)h7;
00932   h[8] = (int32_t)h8;
00933   h[9] = (int32_t)h9;
00934 }
00935 
00936 
00937 /*
00938 Replace (f,g) with (g,f) if b == 1;
00939 replace (f,g) with (f,g) if b == 0.
00940 
00941 Preconditions: b in {0,1}.
00942 */
00943 
00944 void fe_cswap(fe f,fe g,unsigned int b)
00945 {
00946   int32_t f0 = f[0];
00947   int32_t f1 = f[1];
00948   int32_t f2 = f[2];
00949   int32_t f3 = f[3];
00950   int32_t f4 = f[4];
00951   int32_t f5 = f[5];
00952   int32_t f6 = f[6];
00953   int32_t f7 = f[7];
00954   int32_t f8 = f[8];
00955   int32_t f9 = f[9];
00956   int32_t g0 = g[0];
00957   int32_t g1 = g[1];
00958   int32_t g2 = g[2];
00959   int32_t g3 = g[3];
00960   int32_t g4 = g[4];
00961   int32_t g5 = g[5];
00962   int32_t g6 = g[6];
00963   int32_t g7 = g[7];
00964   int32_t g8 = g[8];
00965   int32_t g9 = g[9];
00966   int32_t x0 = f0 ^ g0;
00967   int32_t x1 = f1 ^ g1;
00968   int32_t x2 = f2 ^ g2;
00969   int32_t x3 = f3 ^ g3;
00970   int32_t x4 = f4 ^ g4;
00971   int32_t x5 = f5 ^ g5;
00972   int32_t x6 = f6 ^ g6;
00973   int32_t x7 = f7 ^ g7;
00974   int32_t x8 = f8 ^ g8;
00975   int32_t x9 = f9 ^ g9;
00976   b = -b;
00977   x0 &= b;
00978   x1 &= b;
00979   x2 &= b;
00980   x3 &= b;
00981   x4 &= b;
00982   x5 &= b;
00983   x6 &= b;
00984   x7 &= b;
00985   x8 &= b;
00986   x9 &= b;
00987   f[0] = f0 ^ x0;
00988   f[1] = f1 ^ x1;
00989   f[2] = f2 ^ x2;
00990   f[3] = f3 ^ x3;
00991   f[4] = f4 ^ x4;
00992   f[5] = f5 ^ x5;
00993   f[6] = f6 ^ x6;
00994   f[7] = f7 ^ x7;
00995   f[8] = f8 ^ x8;
00996   f[9] = f9 ^ x9;
00997   g[0] = g0 ^ x0;
00998   g[1] = g1 ^ x1;
00999   g[2] = g2 ^ x2;
01000   g[3] = g3 ^ x3;
01001   g[4] = g4 ^ x4;
01002   g[5] = g5 ^ x5;
01003   g[6] = g6 ^ x6;
01004   g[7] = g7 ^ x7;
01005   g[8] = g8 ^ x8;
01006   g[9] = g9 ^ x9;
01007 }
01008 
01009 
01010 /*
01011 h = f * 121666
01012 Can overlap h with f.
01013 
01014 Preconditions:
01015    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01016 
01017 Postconditions:
01018    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01019 */
01020 
01021 void fe_mul121666(fe h,fe f)
01022 {
01023   int32_t f0 = f[0];
01024   int32_t f1 = f[1];
01025   int32_t f2 = f[2];
01026   int32_t f3 = f[3];
01027   int32_t f4 = f[4];
01028   int32_t f5 = f[5];
01029   int32_t f6 = f[6];
01030   int32_t f7 = f[7];
01031   int32_t f8 = f[8];
01032   int32_t f9 = f[9];
01033   int64_t h0 = f0 * (int64_t) 121666;
01034   int64_t h1 = f1 * (int64_t) 121666;
01035   int64_t h2 = f2 * (int64_t) 121666;
01036   int64_t h3 = f3 * (int64_t) 121666;
01037   int64_t h4 = f4 * (int64_t) 121666;
01038   int64_t h5 = f5 * (int64_t) 121666;
01039   int64_t h6 = f6 * (int64_t) 121666;
01040   int64_t h7 = f7 * (int64_t) 121666;
01041   int64_t h8 = f8 * (int64_t) 121666;
01042   int64_t h9 = f9 * (int64_t) 121666;
01043   int64_t carry0;
01044   int64_t carry1;
01045   int64_t carry2;
01046   int64_t carry3;
01047   int64_t carry4;
01048   int64_t carry5;
01049   int64_t carry6;
01050   int64_t carry7;
01051   int64_t carry8;
01052   int64_t carry9;
01053 
01054   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
01055   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
01056   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
01057   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
01058   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
01059 
01060   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01061   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
01062   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01063   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
01064   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
01065 
01066   h[0] = (int32_t)h0;
01067   h[1] = (int32_t)h1;
01068   h[2] = (int32_t)h2;
01069   h[3] = (int32_t)h3;
01070   h[4] = (int32_t)h4;
01071   h[5] = (int32_t)h5;
01072   h[6] = (int32_t)h6;
01073   h[7] = (int32_t)h7;
01074   h[8] = (int32_t)h8;
01075   h[9] = (int32_t)h9;
01076 }
01077 
01078 
01079 /*
01080 h = 2 * f * f
01081 Can overlap h with f.
01082 
01083 Preconditions:
01084    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
01085 
01086 Postconditions:
01087    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
01088 */
01089 
01090 /*
01091 See fe_mul.c for discussion of implementation strategy.
01092 */
01093 
01094 void fe_sq2(fe h,const fe f)
01095 {
01096   int32_t f0 = f[0];
01097   int32_t f1 = f[1];
01098   int32_t f2 = f[2];
01099   int32_t f3 = f[3];
01100   int32_t f4 = f[4];
01101   int32_t f5 = f[5];
01102   int32_t f6 = f[6];
01103   int32_t f7 = f[7];
01104   int32_t f8 = f[8];
01105   int32_t f9 = f[9];
01106   int32_t f0_2 = 2 * f0;
01107   int32_t f1_2 = 2 * f1;
01108   int32_t f2_2 = 2 * f2;
01109   int32_t f3_2 = 2 * f3;
01110   int32_t f4_2 = 2 * f4;
01111   int32_t f5_2 = 2 * f5;
01112   int32_t f6_2 = 2 * f6;
01113   int32_t f7_2 = 2 * f7;
01114   int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
01115   int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
01116   int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
01117   int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
01118   int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
01119   int64_t f0f0    = f0   * (int64_t) f0;
01120   int64_t f0f1_2  = f0_2 * (int64_t) f1;
01121   int64_t f0f2_2  = f0_2 * (int64_t) f2;
01122   int64_t f0f3_2  = f0_2 * (int64_t) f3;
01123   int64_t f0f4_2  = f0_2 * (int64_t) f4;
01124   int64_t f0f5_2  = f0_2 * (int64_t) f5;
01125   int64_t f0f6_2  = f0_2 * (int64_t) f6;
01126   int64_t f0f7_2  = f0_2 * (int64_t) f7;
01127   int64_t f0f8_2  = f0_2 * (int64_t) f8;
01128   int64_t f0f9_2  = f0_2 * (int64_t) f9;
01129   int64_t f1f1_2  = f1_2 * (int64_t) f1;
01130   int64_t f1f2_2  = f1_2 * (int64_t) f2;
01131   int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
01132   int64_t f1f4_2  = f1_2 * (int64_t) f4;
01133   int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
01134   int64_t f1f6_2  = f1_2 * (int64_t) f6;
01135   int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
01136   int64_t f1f8_2  = f1_2 * (int64_t) f8;
01137   int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
01138   int64_t f2f2    = f2   * (int64_t) f2;
01139   int64_t f2f3_2  = f2_2 * (int64_t) f3;
01140   int64_t f2f4_2  = f2_2 * (int64_t) f4;
01141   int64_t f2f5_2  = f2_2 * (int64_t) f5;
01142   int64_t f2f6_2  = f2_2 * (int64_t) f6;
01143   int64_t f2f7_2  = f2_2 * (int64_t) f7;
01144   int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
01145   int64_t f2f9_38 = f2   * (int64_t) f9_38;
01146   int64_t f3f3_2  = f3_2 * (int64_t) f3;
01147   int64_t f3f4_2  = f3_2 * (int64_t) f4;
01148   int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
01149   int64_t f3f6_2  = f3_2 * (int64_t) f6;
01150   int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
01151   int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
01152   int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
01153   int64_t f4f4    = f4   * (int64_t) f4;
01154   int64_t f4f5_2  = f4_2 * (int64_t) f5;
01155   int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
01156   int64_t f4f7_38 = f4   * (int64_t) f7_38;
01157   int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
01158   int64_t f4f9_38 = f4   * (int64_t) f9_38;
01159   int64_t f5f5_38 = f5   * (int64_t) f5_38;
01160   int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
01161   int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
01162   int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
01163   int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
01164   int64_t f6f6_19 = f6   * (int64_t) f6_19;
01165   int64_t f6f7_38 = f6   * (int64_t) f7_38;
01166   int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
01167   int64_t f6f9_38 = f6   * (int64_t) f9_38;
01168   int64_t f7f7_38 = f7   * (int64_t) f7_38;
01169   int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
01170   int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
01171   int64_t f8f8_19 = f8   * (int64_t) f8_19;
01172   int64_t f8f9_38 = f8   * (int64_t) f9_38;
01173   int64_t f9f9_38 = f9   * (int64_t) f9_38;
01174   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
01175   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
01176   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
01177   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
01178   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
01179   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
01180   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
01181   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
01182   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
01183   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
01184   int64_t carry0;
01185   int64_t carry1;
01186   int64_t carry2;
01187   int64_t carry3;
01188   int64_t carry4;
01189   int64_t carry5;
01190   int64_t carry6;
01191   int64_t carry7;
01192   int64_t carry8;
01193   int64_t carry9;
01194 
01195   h0 += h0;
01196   h1 += h1;
01197   h2 += h2;
01198   h3 += h3;
01199   h4 += h4;
01200   h5 += h5;
01201   h6 += h6;
01202   h7 += h7;
01203   h8 += h8;
01204   h9 += h9;
01205 
01206   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01207   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01208 
01209   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
01210   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
01211 
01212   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
01213   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
01214 
01215   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
01216   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
01217 
01218   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01219   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
01220 
01221   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
01222 
01223   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01224 
01225   h[0] = (int32_t)h0;
01226   h[1] = (int32_t)h1;
01227   h[2] = (int32_t)h2;
01228   h[3] = (int32_t)h3;
01229   h[4] = (int32_t)h4;
01230   h[5] = (int32_t)h5;
01231   h[6] = (int32_t)h6;
01232   h[7] = (int32_t)h7;
01233   h[8] = (int32_t)h8;
01234   h[9] = (int32_t)h9;
01235 }
01236 
01237 
01238 void fe_pow22523(fe out,const fe z)
01239 {
01240   fe t0;
01241   fe t1;
01242   fe t2;
01243   int i;
01244 
01245   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
01246   fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
01247   fe_mul(t1,z,t1);
01248   fe_mul(t0,t0,t1);
01249   fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0);
01250   fe_mul(t0,t1,t0);
01251   fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1);
01252   fe_mul(t0,t1,t0);
01253   fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1);
01254   fe_mul(t1,t1,t0);
01255   fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2);
01256   fe_mul(t1,t2,t1);
01257   fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1);
01258   fe_mul(t0,t1,t0);
01259   fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1);
01260   fe_mul(t1,t1,t0);
01261   fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2);
01262   fe_mul(t1,t2,t1);
01263   fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1);
01264   fe_mul(t0,t1,t0);
01265   fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0);
01266   fe_mul(out,t0,z);
01267 
01268   return;
01269 }
01270 
01271 
01272 /*
01273 h = -f
01274 
01275 Preconditions:
01276    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01277 
01278 Postconditions:
01279    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01280 */
01281 
01282 void fe_neg(fe h,const fe f)
01283 {
01284   int32_t f0 = f[0];
01285   int32_t f1 = f[1];
01286   int32_t f2 = f[2];
01287   int32_t f3 = f[3];
01288   int32_t f4 = f[4];
01289   int32_t f5 = f[5];
01290   int32_t f6 = f[6];
01291   int32_t f7 = f[7];
01292   int32_t f8 = f[8];
01293   int32_t f9 = f[9];
01294   int32_t h0 = -f0;
01295   int32_t h1 = -f1;
01296   int32_t h2 = -f2;
01297   int32_t h3 = -f3;
01298   int32_t h4 = -f4;
01299   int32_t h5 = -f5;
01300   int32_t h6 = -f6;
01301   int32_t h7 = -f7;
01302   int32_t h8 = -f8;
01303   int32_t h9 = -f9;
01304   h[0] = h0;
01305   h[1] = h1;
01306   h[2] = h2;
01307   h[3] = h3;
01308   h[4] = h4;
01309   h[5] = h5;
01310   h[6] = h6;
01311   h[7] = h7;
01312   h[8] = h8;
01313   h[9] = h9;
01314 }
01315 
01316 
01317 /*
01318 Preconditions:
01319    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01320 */
01321 
01322 static const unsigned char zero[32] = {0};
01323 
01324 int fe_isnonzero(const fe f)
01325 {
01326   unsigned char s[32];
01327   fe_tobytes(s,f);
01328   return ConstantCompare(s,zero,32);
01329 }
01330 
01331 
01332 /*
01333 return 1 if f is in {1,3,5,...,q-2}
01334 return 0 if f is in {0,2,4,...,q-1}
01335 
01336 Preconditions:
01337    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01338 */
01339 
01340 int fe_isnegative(const fe f)
01341 {
01342   unsigned char s[32];
01343   fe_tobytes(s,f);
01344   return s[0] & 1;
01345 }
01346 
01347 
01348 /*
01349 Replace (f,g) with (g,g) if b == 1;
01350 replace (f,g) with (f,g) if b == 0.
01351 
01352 Preconditions: b in {0,1}.
01353 */
01354 
01355 void fe_cmov(fe f,const fe g,unsigned int b)
01356 {
01357   int32_t f0 = f[0];
01358   int32_t f1 = f[1];
01359   int32_t f2 = f[2];
01360   int32_t f3 = f[3];
01361   int32_t f4 = f[4];
01362   int32_t f5 = f[5];
01363   int32_t f6 = f[6];
01364   int32_t f7 = f[7];
01365   int32_t f8 = f[8];
01366   int32_t f9 = f[9];
01367   int32_t g0 = g[0];
01368   int32_t g1 = g[1];
01369   int32_t g2 = g[2];
01370   int32_t g3 = g[3];
01371   int32_t g4 = g[4];
01372   int32_t g5 = g[5];
01373   int32_t g6 = g[6];
01374   int32_t g7 = g[7];
01375   int32_t g8 = g[8];
01376   int32_t g9 = g[9];
01377   int32_t x0 = f0 ^ g0;
01378   int32_t x1 = f1 ^ g1;
01379   int32_t x2 = f2 ^ g2;
01380   int32_t x3 = f3 ^ g3;
01381   int32_t x4 = f4 ^ g4;
01382   int32_t x5 = f5 ^ g5;
01383   int32_t x6 = f6 ^ g6;
01384   int32_t x7 = f7 ^ g7;
01385   int32_t x8 = f8 ^ g8;
01386   int32_t x9 = f9 ^ g9;
01387   b = -b;
01388   x0 &= b;
01389   x1 &= b;
01390   x2 &= b;
01391   x3 &= b;
01392   x4 &= b;
01393   x5 &= b;
01394   x6 &= b;
01395   x7 &= b;
01396   x8 &= b;
01397   x9 &= b;
01398   f[0] = f0 ^ x0;
01399   f[1] = f1 ^ x1;
01400   f[2] = f2 ^ x2;
01401   f[3] = f3 ^ x3;
01402   f[4] = f4 ^ x4;
01403   f[5] = f5 ^ x5;
01404   f[6] = f6 ^ x6;
01405   f[7] = f7 ^ x7;
01406   f[8] = f8 ^ x8;
01407   f[9] = f9 ^ x9;
01408 }
01409 #endif /* HAVE ED25519 or CURVE25519 */
01410 #endif /* not defined CURVED25519_SMALL */
01411 
01412