wolf SSL / wolfSSL-TLS13-Beta

Fork of wolfSSL by wolf SSL

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers fe_operations.c Source File

fe_operations.c

00001 /* fe_operations.c
00002  *
00003  * Copyright (C) 2006-2016 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023  /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work. */
00024 
00025 #ifdef HAVE_CONFIG_H
00026     #include <config.h>
00027 #endif
00028 
00029 #include <wolfssl/wolfcrypt/settings.h>
00030 
00031 #ifndef CURVED25519_SMALL /* run when not defined to use small memory math */
00032 #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519)
00033 
00034 #include <wolfssl/wolfcrypt/fe_operations.h>
00035 #include <stdint.h>
00036 
00037 #ifdef NO_INLINE
00038     #include <wolfssl/wolfcrypt/misc.h>
00039 #else
00040     #define WOLFSSL_MISC_INCLUDED
00041     #include <wolfcrypt/src/misc.c>
00042 #endif
00043 
00044 #ifdef HAVE___UINT128_T
00045 #include "fe_x25519_128.i"
00046 #else
00047 /*
00048 fe means field element.
00049 Here the field is \Z/(2^255-19).
00050 An element t, entries t[0]...t[9], represents the integer
00051 t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
00052 Bounds on each t[i] vary depending on context.
00053 */
00054 
00055 uint64_t load_3(const unsigned char *in)
00056 {
00057   uint64_t result;
00058   result = (uint64_t) in[0];
00059   result |= ((uint64_t) in[1]) << 8;
00060   result |= ((uint64_t) in[2]) << 16;
00061   return result;
00062 }
00063 
00064 
00065 uint64_t load_4(const unsigned char *in)
00066 {
00067   uint64_t result;
00068   result = (uint64_t) in[0];
00069   result |= ((uint64_t) in[1]) << 8;
00070   result |= ((uint64_t) in[2]) << 16;
00071   result |= ((uint64_t) in[3]) << 24;
00072   return result;
00073 }
00074 
00075 
00076 /*
00077 h = 1
00078 */
00079 
00080 void fe_1(fe h)
00081 {
00082   h[0] = 1;
00083   h[1] = 0;
00084   h[2] = 0;
00085   h[3] = 0;
00086   h[4] = 0;
00087   h[5] = 0;
00088   h[6] = 0;
00089   h[7] = 0;
00090   h[8] = 0;
00091   h[9] = 0;
00092 }
00093 
00094 
00095 /*
00096 h = 0
00097 */
00098 
00099 void fe_0(fe h)
00100 {
00101   h[0] = 0;
00102   h[1] = 0;
00103   h[2] = 0;
00104   h[3] = 0;
00105   h[4] = 0;
00106   h[5] = 0;
00107   h[6] = 0;
00108   h[7] = 0;
00109   h[8] = 0;
00110   h[9] = 0;
00111 }
00112 
00113 #ifndef FREESCALE_LTC_ECC
00114 int curve25519(byte* q, byte* n, byte* p)
00115 {
00116 #if 0
00117   unsigned char e[32];
00118 #endif
00119   fe x1;
00120   fe x2;
00121   fe z2;
00122   fe x3;
00123   fe z3;
00124   fe tmp0;
00125   fe tmp1;
00126   int pos;
00127   unsigned int swap;
00128   unsigned int b;
00129 
00130   /* Clamp already done during key generation and import */
00131 #if 0
00132   {
00133     unsigned int i;
00134     for (i = 0;i < 32;++i) e[i] = n[i];
00135     e[0] &= 248;
00136     e[31] &= 127;
00137     e[31] |= 64;
00138   }
00139 #endif
00140 
00141   fe_frombytes(x1,p);
00142   fe_1(x2);
00143   fe_0(z2);
00144   fe_copy(x3,x1);
00145   fe_1(z3);
00146 
00147   swap = 0;
00148   for (pos = 254;pos >= 0;--pos) {
00149 #if 0
00150     b = e[pos / 8] >> (pos & 7);
00151 #else
00152     b = n[pos / 8] >> (pos & 7);
00153 #endif
00154     b &= 1;
00155     swap ^= b;
00156     fe_cswap(x2,x3,swap);
00157     fe_cswap(z2,z3,swap);
00158     swap = b;
00159 
00160     /* montgomery */
00161     fe_sub(tmp0,x3,z3);
00162     fe_sub(tmp1,x2,z2);
00163     fe_add(x2,x2,z2);
00164     fe_add(z2,x3,z3);
00165     fe_mul(z3,tmp0,x2);
00166     fe_mul(z2,z2,tmp1);
00167     fe_sq(tmp0,tmp1);
00168     fe_sq(tmp1,x2);
00169     fe_add(x3,z3,z2);
00170     fe_sub(z2,z3,z2);
00171     fe_mul(x2,tmp1,tmp0);
00172     fe_sub(tmp1,tmp1,tmp0);
00173     fe_sq(z2,z2);
00174     fe_mul121666(z3,tmp1);
00175     fe_sq(x3,x3);
00176     fe_add(tmp0,tmp0,z3);
00177     fe_mul(z3,x1,z2);
00178     fe_mul(z2,tmp1,tmp0);
00179   }
00180   fe_cswap(x2,x3,swap);
00181   fe_cswap(z2,z3,swap);
00182 
00183   fe_invert(z2,z2);
00184   fe_mul(x2,x2,z2);
00185   fe_tobytes(q,x2);
00186 
00187   return 0;
00188 }
00189 #endif /* !FREESCALE_LTC_ECC */
00190 
00191 /*
00192 h = f * f
00193 Can overlap h with f.
00194 
00195 Preconditions:
00196    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00197 
00198 Postconditions:
00199    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
00200 */
00201 
00202 /*
00203 See fe_mul.c for discussion of implementation strategy.
00204 */
00205 
00206 void fe_sq(fe h,const fe f)
00207 {
00208   int32_t f0 = f[0];
00209   int32_t f1 = f[1];
00210   int32_t f2 = f[2];
00211   int32_t f3 = f[3];
00212   int32_t f4 = f[4];
00213   int32_t f5 = f[5];
00214   int32_t f6 = f[6];
00215   int32_t f7 = f[7];
00216   int32_t f8 = f[8];
00217   int32_t f9 = f[9];
00218   int32_t f0_2 = 2 * f0;
00219   int32_t f1_2 = 2 * f1;
00220   int32_t f2_2 = 2 * f2;
00221   int32_t f3_2 = 2 * f3;
00222   int32_t f4_2 = 2 * f4;
00223   int32_t f5_2 = 2 * f5;
00224   int32_t f6_2 = 2 * f6;
00225   int32_t f7_2 = 2 * f7;
00226   int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
00227   int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
00228   int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
00229   int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
00230   int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
00231   int64_t f0f0    = f0   * (int64_t) f0;
00232   int64_t f0f1_2  = f0_2 * (int64_t) f1;
00233   int64_t f0f2_2  = f0_2 * (int64_t) f2;
00234   int64_t f0f3_2  = f0_2 * (int64_t) f3;
00235   int64_t f0f4_2  = f0_2 * (int64_t) f4;
00236   int64_t f0f5_2  = f0_2 * (int64_t) f5;
00237   int64_t f0f6_2  = f0_2 * (int64_t) f6;
00238   int64_t f0f7_2  = f0_2 * (int64_t) f7;
00239   int64_t f0f8_2  = f0_2 * (int64_t) f8;
00240   int64_t f0f9_2  = f0_2 * (int64_t) f9;
00241   int64_t f1f1_2  = f1_2 * (int64_t) f1;
00242   int64_t f1f2_2  = f1_2 * (int64_t) f2;
00243   int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
00244   int64_t f1f4_2  = f1_2 * (int64_t) f4;
00245   int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
00246   int64_t f1f6_2  = f1_2 * (int64_t) f6;
00247   int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
00248   int64_t f1f8_2  = f1_2 * (int64_t) f8;
00249   int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
00250   int64_t f2f2    = f2   * (int64_t) f2;
00251   int64_t f2f3_2  = f2_2 * (int64_t) f3;
00252   int64_t f2f4_2  = f2_2 * (int64_t) f4;
00253   int64_t f2f5_2  = f2_2 * (int64_t) f5;
00254   int64_t f2f6_2  = f2_2 * (int64_t) f6;
00255   int64_t f2f7_2  = f2_2 * (int64_t) f7;
00256   int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
00257   int64_t f2f9_38 = f2   * (int64_t) f9_38;
00258   int64_t f3f3_2  = f3_2 * (int64_t) f3;
00259   int64_t f3f4_2  = f3_2 * (int64_t) f4;
00260   int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
00261   int64_t f3f6_2  = f3_2 * (int64_t) f6;
00262   int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
00263   int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
00264   int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
00265   int64_t f4f4    = f4   * (int64_t) f4;
00266   int64_t f4f5_2  = f4_2 * (int64_t) f5;
00267   int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
00268   int64_t f4f7_38 = f4   * (int64_t) f7_38;
00269   int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
00270   int64_t f4f9_38 = f4   * (int64_t) f9_38;
00271   int64_t f5f5_38 = f5   * (int64_t) f5_38;
00272   int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
00273   int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
00274   int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
00275   int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
00276   int64_t f6f6_19 = f6   * (int64_t) f6_19;
00277   int64_t f6f7_38 = f6   * (int64_t) f7_38;
00278   int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
00279   int64_t f6f9_38 = f6   * (int64_t) f9_38;
00280   int64_t f7f7_38 = f7   * (int64_t) f7_38;
00281   int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
00282   int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
00283   int64_t f8f8_19 = f8   * (int64_t) f8_19;
00284   int64_t f8f9_38 = f8   * (int64_t) f9_38;
00285   int64_t f9f9_38 = f9   * (int64_t) f9_38;
00286   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
00287   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
00288   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
00289   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
00290   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
00291   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
00292   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
00293   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
00294   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
00295   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
00296   int64_t carry0;
00297   int64_t carry1;
00298   int64_t carry2;
00299   int64_t carry3;
00300   int64_t carry4;
00301   int64_t carry5;
00302   int64_t carry6;
00303   int64_t carry7;
00304   int64_t carry8;
00305   int64_t carry9;
00306 
00307   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00308   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00309 
00310   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00311   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00312 
00313   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00314   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00315 
00316   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00317   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00318 
00319   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00320   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00321 
00322   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00323 
00324   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00325 
00326   h[0] = (int32_t)h0;
00327   h[1] = (int32_t)h1;
00328   h[2] = (int32_t)h2;
00329   h[3] = (int32_t)h3;
00330   h[4] = (int32_t)h4;
00331   h[5] = (int32_t)h5;
00332   h[6] = (int32_t)h6;
00333   h[7] = (int32_t)h7;
00334   h[8] = (int32_t)h8;
00335   h[9] = (int32_t)h9;
00336 }
00337 
00338 
00339 /*
00340 h = f + g
00341 Can overlap h with f or g.
00342 
00343 Preconditions:
00344    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00345    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00346 
00347 Postconditions:
00348    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00349 */
00350 
00351 void fe_add(fe h,const fe f,const fe g)
00352 {
00353   int32_t f0 = f[0];
00354   int32_t f1 = f[1];
00355   int32_t f2 = f[2];
00356   int32_t f3 = f[3];
00357   int32_t f4 = f[4];
00358   int32_t f5 = f[5];
00359   int32_t f6 = f[6];
00360   int32_t f7 = f[7];
00361   int32_t f8 = f[8];
00362   int32_t f9 = f[9];
00363   int32_t g0 = g[0];
00364   int32_t g1 = g[1];
00365   int32_t g2 = g[2];
00366   int32_t g3 = g[3];
00367   int32_t g4 = g[4];
00368   int32_t g5 = g[5];
00369   int32_t g6 = g[6];
00370   int32_t g7 = g[7];
00371   int32_t g8 = g[8];
00372   int32_t g9 = g[9];
00373   int32_t h0 = f0 + g0;
00374   int32_t h1 = f1 + g1;
00375   int32_t h2 = f2 + g2;
00376   int32_t h3 = f3 + g3;
00377   int32_t h4 = f4 + g4;
00378   int32_t h5 = f5 + g5;
00379   int32_t h6 = f6 + g6;
00380   int32_t h7 = f7 + g7;
00381   int32_t h8 = f8 + g8;
00382   int32_t h9 = f9 + g9;
00383   h[0] = h0;
00384   h[1] = h1;
00385   h[2] = h2;
00386   h[3] = h3;
00387   h[4] = h4;
00388   h[5] = h5;
00389   h[6] = h6;
00390   h[7] = h7;
00391   h[8] = h8;
00392   h[9] = h9;
00393 }
00394 
00395 
00396 /*
00397 Preconditions:
00398   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00399 
00400 Write p=2^255-19; q=floor(h/p).
00401 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
00402 
00403 Proof:
00404   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
00405   Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
00406 
00407   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
00408   Then 0<y<1.
00409 
00410   Write r=h-pq.
00411   Have 0<=r<=p-1=2^255-20.
00412   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
00413 
00414   Write x=r+19(2^-255)r+y.
00415   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
00416 
00417   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
00418   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
00419 */
00420 
00421 void fe_tobytes(unsigned char *s,const fe h)
00422 {
00423   int32_t h0 = h[0];
00424   int32_t h1 = h[1];
00425   int32_t h2 = h[2];
00426   int32_t h3 = h[3];
00427   int32_t h4 = h[4];
00428   int32_t h5 = h[5];
00429   int32_t h6 = h[6];
00430   int32_t h7 = h[7];
00431   int32_t h8 = h[8];
00432   int32_t h9 = h[9];
00433   int32_t q;
00434   int32_t carry0;
00435   int32_t carry1;
00436   int32_t carry2;
00437   int32_t carry3;
00438   int32_t carry4;
00439   int32_t carry5;
00440   int32_t carry6;
00441   int32_t carry7;
00442   int32_t carry8;
00443   int32_t carry9;
00444 
00445   q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
00446   q = (h0 + q) >> 26;
00447   q = (h1 + q) >> 25;
00448   q = (h2 + q) >> 26;
00449   q = (h3 + q) >> 25;
00450   q = (h4 + q) >> 26;
00451   q = (h5 + q) >> 25;
00452   q = (h6 + q) >> 26;
00453   q = (h7 + q) >> 25;
00454   q = (h8 + q) >> 26;
00455   q = (h9 + q) >> 25;
00456 
00457   /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
00458   h0 += 19 * q;
00459   /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
00460 
00461   carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
00462   carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
00463   carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
00464   carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
00465   carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
00466   carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
00467   carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
00468   carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
00469   carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
00470   carry9 = h9 >> 25;               h9 -= carry9 << 25;
00471                   /* h10 = carry9 */
00472 
00473   /*
00474   Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
00475   Have h0+...+2^230 h9 between 0 and 2^255-1;
00476   evidently 2^255 h10-2^255 q = 0.
00477   Goal: Output h0+...+2^230 h9.
00478   */
00479 
00480   s[0] = h0 >> 0;
00481   s[1] = h0 >> 8;
00482   s[2] = h0 >> 16;
00483   s[3] = (h0 >> 24) | (h1 << 2);
00484   s[4] = h1 >> 6;
00485   s[5] = h1 >> 14;
00486   s[6] = (h1 >> 22) | (h2 << 3);
00487   s[7] = h2 >> 5;
00488   s[8] = h2 >> 13;
00489   s[9] = (h2 >> 21) | (h3 << 5);
00490   s[10] = h3 >> 3;
00491   s[11] = h3 >> 11;
00492   s[12] = (h3 >> 19) | (h4 << 6);
00493   s[13] = h4 >> 2;
00494   s[14] = h4 >> 10;
00495   s[15] = h4 >> 18;
00496   s[16] = h5 >> 0;
00497   s[17] = h5 >> 8;
00498   s[18] = h5 >> 16;
00499   s[19] = (h5 >> 24) | (h6 << 1);
00500   s[20] = h6 >> 7;
00501   s[21] = h6 >> 15;
00502   s[22] = (h6 >> 23) | (h7 << 3);
00503   s[23] = h7 >> 5;
00504   s[24] = h7 >> 13;
00505   s[25] = (h7 >> 21) | (h8 << 4);
00506   s[26] = h8 >> 4;
00507   s[27] = h8 >> 12;
00508   s[28] = (h8 >> 20) | (h9 << 6);
00509   s[29] = h9 >> 2;
00510   s[30] = h9 >> 10;
00511   s[31] = h9 >> 18;
00512 }
00513 
00514 
00515 /*
00516 h = f - g
00517 Can overlap h with f or g.
00518 
00519 Preconditions:
00520    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00521    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
00522 
00523 Postconditions:
00524    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
00525 */
00526 
00527 void fe_sub(fe h,const fe f,const fe g)
00528 {
00529   int32_t f0 = f[0];
00530   int32_t f1 = f[1];
00531   int32_t f2 = f[2];
00532   int32_t f3 = f[3];
00533   int32_t f4 = f[4];
00534   int32_t f5 = f[5];
00535   int32_t f6 = f[6];
00536   int32_t f7 = f[7];
00537   int32_t f8 = f[8];
00538   int32_t f9 = f[9];
00539   int32_t g0 = g[0];
00540   int32_t g1 = g[1];
00541   int32_t g2 = g[2];
00542   int32_t g3 = g[3];
00543   int32_t g4 = g[4];
00544   int32_t g5 = g[5];
00545   int32_t g6 = g[6];
00546   int32_t g7 = g[7];
00547   int32_t g8 = g[8];
00548   int32_t g9 = g[9];
00549   int32_t h0 = f0 - g0;
00550   int32_t h1 = f1 - g1;
00551   int32_t h2 = f2 - g2;
00552   int32_t h3 = f3 - g3;
00553   int32_t h4 = f4 - g4;
00554   int32_t h5 = f5 - g5;
00555   int32_t h6 = f6 - g6;
00556   int32_t h7 = f7 - g7;
00557   int32_t h8 = f8 - g8;
00558   int32_t h9 = f9 - g9;
00559   h[0] = h0;
00560   h[1] = h1;
00561   h[2] = h2;
00562   h[3] = h3;
00563   h[4] = h4;
00564   h[5] = h5;
00565   h[6] = h6;
00566   h[7] = h7;
00567   h[8] = h8;
00568   h[9] = h9;
00569 }
00570 
00571 
00572 /*
00573 Ignores top bit of h.
00574 */
00575 
00576 void fe_frombytes(fe h,const unsigned char *s)
00577 {
00578   int64_t h0 = load_4(s);
00579   int64_t h1 = load_3(s + 4) << 6;
00580   int64_t h2 = load_3(s + 7) << 5;
00581   int64_t h3 = load_3(s + 10) << 3;
00582   int64_t h4 = load_3(s + 13) << 2;
00583   int64_t h5 = load_4(s + 16);
00584   int64_t h6 = load_3(s + 20) << 7;
00585   int64_t h7 = load_3(s + 23) << 5;
00586   int64_t h8 = load_3(s + 26) << 4;
00587   int64_t h9 = (load_3(s + 29) & 8388607) << 2;
00588   int64_t carry0;
00589   int64_t carry1;
00590   int64_t carry2;
00591   int64_t carry3;
00592   int64_t carry4;
00593   int64_t carry5;
00594   int64_t carry6;
00595   int64_t carry7;
00596   int64_t carry8;
00597   int64_t carry9;
00598 
00599   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00600   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00601   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00602   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00603   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00604 
00605   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00606   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00607   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00608   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00609   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00610 
00611   h[0] = (int32_t)h0;
00612   h[1] = (int32_t)h1;
00613   h[2] = (int32_t)h2;
00614   h[3] = (int32_t)h3;
00615   h[4] = (int32_t)h4;
00616   h[5] = (int32_t)h5;
00617   h[6] = (int32_t)h6;
00618   h[7] = (int32_t)h7;
00619   h[8] = (int32_t)h8;
00620   h[9] = (int32_t)h9;
00621 }
00622 
00623 
00624 void fe_invert(fe out,const fe z)
00625 {
00626   fe t0;
00627   fe t1;
00628   fe t2;
00629   fe t3;
00630   int i;
00631 
00632   /* pow225521 */
00633   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
00634   fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
00635   fe_mul(t1,z,t1);
00636   fe_mul(t0,t0,t1);
00637   fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
00638   fe_mul(t1,t1,t2);
00639   fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
00640   fe_mul(t1,t2,t1);
00641   fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
00642   fe_mul(t2,t2,t1);
00643   fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
00644   fe_mul(t2,t3,t2);
00645   fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
00646   fe_mul(t1,t2,t1);
00647   fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
00648   fe_mul(t2,t2,t1);
00649   fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
00650   fe_mul(t2,t3,t2);
00651   fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
00652   fe_mul(t1,t2,t1);
00653   fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
00654   fe_mul(out,t1,t0);
00655 
00656   return;
00657 }
00658 
00659 
00660 /*
00661 h = f
00662 */
00663 
00664 void fe_copy(fe h,const fe f)
00665 {
00666   int32_t f0 = f[0];
00667   int32_t f1 = f[1];
00668   int32_t f2 = f[2];
00669   int32_t f3 = f[3];
00670   int32_t f4 = f[4];
00671   int32_t f5 = f[5];
00672   int32_t f6 = f[6];
00673   int32_t f7 = f[7];
00674   int32_t f8 = f[8];
00675   int32_t f9 = f[9];
00676   h[0] = f0;
00677   h[1] = f1;
00678   h[2] = f2;
00679   h[3] = f3;
00680   h[4] = f4;
00681   h[5] = f5;
00682   h[6] = f6;
00683   h[7] = f7;
00684   h[8] = f8;
00685   h[9] = f9;
00686 }
00687 
00688 
00689 /*
00690 h = f * g
00691 Can overlap h with f or g.
00692 
00693 Preconditions:
00694    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00695    |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
00696 
00697 Postconditions:
00698    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
00699 */
00700 
00701 /*
00702 Notes on implementation strategy:
00703 
00704 Using schoolbook multiplication.
00705 Karatsuba would save a little in some cost models.
00706 
00707 Most multiplications by 2 and 19 are 32-bit precomputations;
00708 cheaper than 64-bit postcomputations.
00709 
00710 There is one remaining multiplication by 19 in the carry chain;
00711 one *19 precomputation can be merged into this,
00712 but the resulting data flow is considerably less clean.
00713 
00714 There are 12 carries below.
00715 10 of them are 2-way parallelizable and vectorizable.
00716 Can get away with 11 carries, but then data flow is much deeper.
00717 
00718 With tighter constraints on inputs can squeeze carries into int32.
00719 */
00720 
00721 void fe_mul(fe h,const fe f,const fe g)
00722 {
00723   int32_t f0 = f[0];
00724   int32_t f1 = f[1];
00725   int32_t f2 = f[2];
00726   int32_t f3 = f[3];
00727   int32_t f4 = f[4];
00728   int32_t f5 = f[5];
00729   int32_t f6 = f[6];
00730   int32_t f7 = f[7];
00731   int32_t f8 = f[8];
00732   int32_t f9 = f[9];
00733   int32_t g0 = g[0];
00734   int32_t g1 = g[1];
00735   int32_t g2 = g[2];
00736   int32_t g3 = g[3];
00737   int32_t g4 = g[4];
00738   int32_t g5 = g[5];
00739   int32_t g6 = g[6];
00740   int32_t g7 = g[7];
00741   int32_t g8 = g[8];
00742   int32_t g9 = g[9];
00743   int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
00744   int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
00745   int32_t g3_19 = 19 * g3;
00746   int32_t g4_19 = 19 * g4;
00747   int32_t g5_19 = 19 * g5;
00748   int32_t g6_19 = 19 * g6;
00749   int32_t g7_19 = 19 * g7;
00750   int32_t g8_19 = 19 * g8;
00751   int32_t g9_19 = 19 * g9;
00752   int32_t f1_2 = 2 * f1;
00753   int32_t f3_2 = 2 * f3;
00754   int32_t f5_2 = 2 * f5;
00755   int32_t f7_2 = 2 * f7;
00756   int32_t f9_2 = 2 * f9;
00757   int64_t f0g0    = f0   * (int64_t) g0;
00758   int64_t f0g1    = f0   * (int64_t) g1;
00759   int64_t f0g2    = f0   * (int64_t) g2;
00760   int64_t f0g3    = f0   * (int64_t) g3;
00761   int64_t f0g4    = f0   * (int64_t) g4;
00762   int64_t f0g5    = f0   * (int64_t) g5;
00763   int64_t f0g6    = f0   * (int64_t) g6;
00764   int64_t f0g7    = f0   * (int64_t) g7;
00765   int64_t f0g8    = f0   * (int64_t) g8;
00766   int64_t f0g9    = f0   * (int64_t) g9;
00767   int64_t f1g0    = f1   * (int64_t) g0;
00768   int64_t f1g1_2  = f1_2 * (int64_t) g1;
00769   int64_t f1g2    = f1   * (int64_t) g2;
00770   int64_t f1g3_2  = f1_2 * (int64_t) g3;
00771   int64_t f1g4    = f1   * (int64_t) g4;
00772   int64_t f1g5_2  = f1_2 * (int64_t) g5;
00773   int64_t f1g6    = f1   * (int64_t) g6;
00774   int64_t f1g7_2  = f1_2 * (int64_t) g7;
00775   int64_t f1g8    = f1   * (int64_t) g8;
00776   int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
00777   int64_t f2g0    = f2   * (int64_t) g0;
00778   int64_t f2g1    = f2   * (int64_t) g1;
00779   int64_t f2g2    = f2   * (int64_t) g2;
00780   int64_t f2g3    = f2   * (int64_t) g3;
00781   int64_t f2g4    = f2   * (int64_t) g4;
00782   int64_t f2g5    = f2   * (int64_t) g5;
00783   int64_t f2g6    = f2   * (int64_t) g6;
00784   int64_t f2g7    = f2   * (int64_t) g7;
00785   int64_t f2g8_19 = f2   * (int64_t) g8_19;
00786   int64_t f2g9_19 = f2   * (int64_t) g9_19;
00787   int64_t f3g0    = f3   * (int64_t) g0;
00788   int64_t f3g1_2  = f3_2 * (int64_t) g1;
00789   int64_t f3g2    = f3   * (int64_t) g2;
00790   int64_t f3g3_2  = f3_2 * (int64_t) g3;
00791   int64_t f3g4    = f3   * (int64_t) g4;
00792   int64_t f3g5_2  = f3_2 * (int64_t) g5;
00793   int64_t f3g6    = f3   * (int64_t) g6;
00794   int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
00795   int64_t f3g8_19 = f3   * (int64_t) g8_19;
00796   int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
00797   int64_t f4g0    = f4   * (int64_t) g0;
00798   int64_t f4g1    = f4   * (int64_t) g1;
00799   int64_t f4g2    = f4   * (int64_t) g2;
00800   int64_t f4g3    = f4   * (int64_t) g3;
00801   int64_t f4g4    = f4   * (int64_t) g4;
00802   int64_t f4g5    = f4   * (int64_t) g5;
00803   int64_t f4g6_19 = f4   * (int64_t) g6_19;
00804   int64_t f4g7_19 = f4   * (int64_t) g7_19;
00805   int64_t f4g8_19 = f4   * (int64_t) g8_19;
00806   int64_t f4g9_19 = f4   * (int64_t) g9_19;
00807   int64_t f5g0    = f5   * (int64_t) g0;
00808   int64_t f5g1_2  = f5_2 * (int64_t) g1;
00809   int64_t f5g2    = f5   * (int64_t) g2;
00810   int64_t f5g3_2  = f5_2 * (int64_t) g3;
00811   int64_t f5g4    = f5   * (int64_t) g4;
00812   int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
00813   int64_t f5g6_19 = f5   * (int64_t) g6_19;
00814   int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
00815   int64_t f5g8_19 = f5   * (int64_t) g8_19;
00816   int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
00817   int64_t f6g0    = f6   * (int64_t) g0;
00818   int64_t f6g1    = f6   * (int64_t) g1;
00819   int64_t f6g2    = f6   * (int64_t) g2;
00820   int64_t f6g3    = f6   * (int64_t) g3;
00821   int64_t f6g4_19 = f6   * (int64_t) g4_19;
00822   int64_t f6g5_19 = f6   * (int64_t) g5_19;
00823   int64_t f6g6_19 = f6   * (int64_t) g6_19;
00824   int64_t f6g7_19 = f6   * (int64_t) g7_19;
00825   int64_t f6g8_19 = f6   * (int64_t) g8_19;
00826   int64_t f6g9_19 = f6   * (int64_t) g9_19;
00827   int64_t f7g0    = f7   * (int64_t) g0;
00828   int64_t f7g1_2  = f7_2 * (int64_t) g1;
00829   int64_t f7g2    = f7   * (int64_t) g2;
00830   int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
00831   int64_t f7g4_19 = f7   * (int64_t) g4_19;
00832   int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
00833   int64_t f7g6_19 = f7   * (int64_t) g6_19;
00834   int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
00835   int64_t f7g8_19 = f7   * (int64_t) g8_19;
00836   int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
00837   int64_t f8g0    = f8   * (int64_t) g0;
00838   int64_t f8g1    = f8   * (int64_t) g1;
00839   int64_t f8g2_19 = f8   * (int64_t) g2_19;
00840   int64_t f8g3_19 = f8   * (int64_t) g3_19;
00841   int64_t f8g4_19 = f8   * (int64_t) g4_19;
00842   int64_t f8g5_19 = f8   * (int64_t) g5_19;
00843   int64_t f8g6_19 = f8   * (int64_t) g6_19;
00844   int64_t f8g7_19 = f8   * (int64_t) g7_19;
00845   int64_t f8g8_19 = f8   * (int64_t) g8_19;
00846   int64_t f8g9_19 = f8   * (int64_t) g9_19;
00847   int64_t f9g0    = f9   * (int64_t) g0;
00848   int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
00849   int64_t f9g2_19 = f9   * (int64_t) g2_19;
00850   int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
00851   int64_t f9g4_19 = f9   * (int64_t) g4_19;
00852   int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
00853   int64_t f9g6_19 = f9   * (int64_t) g6_19;
00854   int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
00855   int64_t f9g8_19 = f9   * (int64_t) g8_19;
00856   int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
00857   int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
00858   int64_t h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
00859   int64_t h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
00860   int64_t h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
00861   int64_t h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
00862   int64_t h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
00863   int64_t h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
00864   int64_t h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
00865   int64_t h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
00866   int64_t h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
00867   int64_t carry0;
00868   int64_t carry1;
00869   int64_t carry2;
00870   int64_t carry3;
00871   int64_t carry4;
00872   int64_t carry5;
00873   int64_t carry6;
00874   int64_t carry7;
00875   int64_t carry8;
00876   int64_t carry9;
00877 
00878   /*
00879   |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
00880     i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
00881   |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
00882     i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
00883   */
00884 
00885   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00886   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00887   /* |h0| <= 2^25 */
00888   /* |h4| <= 2^25 */
00889   /* |h1| <= 1.71*2^59 */
00890   /* |h5| <= 1.71*2^59 */
00891 
00892   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
00893   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
00894   /* |h1| <= 2^24; from now on fits into int32 */
00895   /* |h5| <= 2^24; from now on fits into int32 */
00896   /* |h2| <= 1.41*2^60 */
00897   /* |h6| <= 1.41*2^60 */
00898 
00899   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
00900   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
00901   /* |h2| <= 2^25; from now on fits into int32 unchanged */
00902   /* |h6| <= 2^25; from now on fits into int32 unchanged */
00903   /* |h3| <= 1.71*2^59 */
00904   /* |h7| <= 1.71*2^59 */
00905 
00906   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
00907   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
00908   /* |h3| <= 2^24; from now on fits into int32 unchanged */
00909   /* |h7| <= 2^24; from now on fits into int32 unchanged */
00910   /* |h4| <= 1.72*2^34 */
00911   /* |h8| <= 1.41*2^60 */
00912 
00913   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
00914   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
00915   /* |h4| <= 2^25; from now on fits into int32 unchanged */
00916   /* |h8| <= 2^25; from now on fits into int32 unchanged */
00917   /* |h5| <= 1.01*2^24 */
00918   /* |h9| <= 1.71*2^59 */
00919 
00920   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
00921   /* |h9| <= 2^24; from now on fits into int32 unchanged */
00922   /* |h0| <= 1.1*2^39 */
00923 
00924   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
00925   /* |h0| <= 2^25; from now on fits into int32 unchanged */
00926   /* |h1| <= 1.01*2^24 */
00927 
00928   h[0] = (int32_t)h0;
00929   h[1] = (int32_t)h1;
00930   h[2] = (int32_t)h2;
00931   h[3] = (int32_t)h3;
00932   h[4] = (int32_t)h4;
00933   h[5] = (int32_t)h5;
00934   h[6] = (int32_t)h6;
00935   h[7] = (int32_t)h7;
00936   h[8] = (int32_t)h8;
00937   h[9] = (int32_t)h9;
00938 }
00939 
00940 
00941 /*
00942 Replace (f,g) with (g,f) if b == 1;
00943 replace (f,g) with (f,g) if b == 0.
00944 
00945 Preconditions: b in {0,1}.
00946 */
00947 
00948 void fe_cswap(fe f, fe g, int b)
00949 {
00950   int32_t f0 = f[0];
00951   int32_t f1 = f[1];
00952   int32_t f2 = f[2];
00953   int32_t f3 = f[3];
00954   int32_t f4 = f[4];
00955   int32_t f5 = f[5];
00956   int32_t f6 = f[6];
00957   int32_t f7 = f[7];
00958   int32_t f8 = f[8];
00959   int32_t f9 = f[9];
00960   int32_t g0 = g[0];
00961   int32_t g1 = g[1];
00962   int32_t g2 = g[2];
00963   int32_t g3 = g[3];
00964   int32_t g4 = g[4];
00965   int32_t g5 = g[5];
00966   int32_t g6 = g[6];
00967   int32_t g7 = g[7];
00968   int32_t g8 = g[8];
00969   int32_t g9 = g[9];
00970   int32_t x0 = f0 ^ g0;
00971   int32_t x1 = f1 ^ g1;
00972   int32_t x2 = f2 ^ g2;
00973   int32_t x3 = f3 ^ g3;
00974   int32_t x4 = f4 ^ g4;
00975   int32_t x5 = f5 ^ g5;
00976   int32_t x6 = f6 ^ g6;
00977   int32_t x7 = f7 ^ g7;
00978   int32_t x8 = f8 ^ g8;
00979   int32_t x9 = f9 ^ g9;
00980   b = -b;
00981   x0 &= b;
00982   x1 &= b;
00983   x2 &= b;
00984   x3 &= b;
00985   x4 &= b;
00986   x5 &= b;
00987   x6 &= b;
00988   x7 &= b;
00989   x8 &= b;
00990   x9 &= b;
00991   f[0] = f0 ^ x0;
00992   f[1] = f1 ^ x1;
00993   f[2] = f2 ^ x2;
00994   f[3] = f3 ^ x3;
00995   f[4] = f4 ^ x4;
00996   f[5] = f5 ^ x5;
00997   f[6] = f6 ^ x6;
00998   f[7] = f7 ^ x7;
00999   f[8] = f8 ^ x8;
01000   f[9] = f9 ^ x9;
01001   g[0] = g0 ^ x0;
01002   g[1] = g1 ^ x1;
01003   g[2] = g2 ^ x2;
01004   g[3] = g3 ^ x3;
01005   g[4] = g4 ^ x4;
01006   g[5] = g5 ^ x5;
01007   g[6] = g6 ^ x6;
01008   g[7] = g7 ^ x7;
01009   g[8] = g8 ^ x8;
01010   g[9] = g9 ^ x9;
01011 }
01012 
01013 
01014 /*
01015 h = f * 121666
01016 Can overlap h with f.
01017 
01018 Preconditions:
01019    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01020 
01021 Postconditions:
01022    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01023 */
01024 
01025 void fe_mul121666(fe h,fe f)
01026 {
01027   int32_t f0 = f[0];
01028   int32_t f1 = f[1];
01029   int32_t f2 = f[2];
01030   int32_t f3 = f[3];
01031   int32_t f4 = f[4];
01032   int32_t f5 = f[5];
01033   int32_t f6 = f[6];
01034   int32_t f7 = f[7];
01035   int32_t f8 = f[8];
01036   int32_t f9 = f[9];
01037   int64_t h0 = f0 * (int64_t) 121666;
01038   int64_t h1 = f1 * (int64_t) 121666;
01039   int64_t h2 = f2 * (int64_t) 121666;
01040   int64_t h3 = f3 * (int64_t) 121666;
01041   int64_t h4 = f4 * (int64_t) 121666;
01042   int64_t h5 = f5 * (int64_t) 121666;
01043   int64_t h6 = f6 * (int64_t) 121666;
01044   int64_t h7 = f7 * (int64_t) 121666;
01045   int64_t h8 = f8 * (int64_t) 121666;
01046   int64_t h9 = f9 * (int64_t) 121666;
01047   int64_t carry0;
01048   int64_t carry1;
01049   int64_t carry2;
01050   int64_t carry3;
01051   int64_t carry4;
01052   int64_t carry5;
01053   int64_t carry6;
01054   int64_t carry7;
01055   int64_t carry8;
01056   int64_t carry9;
01057 
01058   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
01059   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
01060   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
01061   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
01062   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
01063 
01064   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01065   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
01066   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01067   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
01068   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
01069 
01070   h[0] = (int32_t)h0;
01071   h[1] = (int32_t)h1;
01072   h[2] = (int32_t)h2;
01073   h[3] = (int32_t)h3;
01074   h[4] = (int32_t)h4;
01075   h[5] = (int32_t)h5;
01076   h[6] = (int32_t)h6;
01077   h[7] = (int32_t)h7;
01078   h[8] = (int32_t)h8;
01079   h[9] = (int32_t)h9;
01080 }
01081 
01082 
01083 /*
01084 h = 2 * f * f
01085 Can overlap h with f.
01086 
01087 Preconditions:
01088    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
01089 
01090 Postconditions:
01091    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
01092 */
01093 
01094 /*
01095 See fe_mul.c for discussion of implementation strategy.
01096 */
01097 
01098 void fe_sq2(fe h,const fe f)
01099 {
01100   int32_t f0 = f[0];
01101   int32_t f1 = f[1];
01102   int32_t f2 = f[2];
01103   int32_t f3 = f[3];
01104   int32_t f4 = f[4];
01105   int32_t f5 = f[5];
01106   int32_t f6 = f[6];
01107   int32_t f7 = f[7];
01108   int32_t f8 = f[8];
01109   int32_t f9 = f[9];
01110   int32_t f0_2 = 2 * f0;
01111   int32_t f1_2 = 2 * f1;
01112   int32_t f2_2 = 2 * f2;
01113   int32_t f3_2 = 2 * f3;
01114   int32_t f4_2 = 2 * f4;
01115   int32_t f5_2 = 2 * f5;
01116   int32_t f6_2 = 2 * f6;
01117   int32_t f7_2 = 2 * f7;
01118   int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
01119   int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
01120   int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
01121   int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
01122   int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
01123   int64_t f0f0    = f0   * (int64_t) f0;
01124   int64_t f0f1_2  = f0_2 * (int64_t) f1;
01125   int64_t f0f2_2  = f0_2 * (int64_t) f2;
01126   int64_t f0f3_2  = f0_2 * (int64_t) f3;
01127   int64_t f0f4_2  = f0_2 * (int64_t) f4;
01128   int64_t f0f5_2  = f0_2 * (int64_t) f5;
01129   int64_t f0f6_2  = f0_2 * (int64_t) f6;
01130   int64_t f0f7_2  = f0_2 * (int64_t) f7;
01131   int64_t f0f8_2  = f0_2 * (int64_t) f8;
01132   int64_t f0f9_2  = f0_2 * (int64_t) f9;
01133   int64_t f1f1_2  = f1_2 * (int64_t) f1;
01134   int64_t f1f2_2  = f1_2 * (int64_t) f2;
01135   int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
01136   int64_t f1f4_2  = f1_2 * (int64_t) f4;
01137   int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
01138   int64_t f1f6_2  = f1_2 * (int64_t) f6;
01139   int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
01140   int64_t f1f8_2  = f1_2 * (int64_t) f8;
01141   int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
01142   int64_t f2f2    = f2   * (int64_t) f2;
01143   int64_t f2f3_2  = f2_2 * (int64_t) f3;
01144   int64_t f2f4_2  = f2_2 * (int64_t) f4;
01145   int64_t f2f5_2  = f2_2 * (int64_t) f5;
01146   int64_t f2f6_2  = f2_2 * (int64_t) f6;
01147   int64_t f2f7_2  = f2_2 * (int64_t) f7;
01148   int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
01149   int64_t f2f9_38 = f2   * (int64_t) f9_38;
01150   int64_t f3f3_2  = f3_2 * (int64_t) f3;
01151   int64_t f3f4_2  = f3_2 * (int64_t) f4;
01152   int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
01153   int64_t f3f6_2  = f3_2 * (int64_t) f6;
01154   int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
01155   int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
01156   int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
01157   int64_t f4f4    = f4   * (int64_t) f4;
01158   int64_t f4f5_2  = f4_2 * (int64_t) f5;
01159   int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
01160   int64_t f4f7_38 = f4   * (int64_t) f7_38;
01161   int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
01162   int64_t f4f9_38 = f4   * (int64_t) f9_38;
01163   int64_t f5f5_38 = f5   * (int64_t) f5_38;
01164   int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
01165   int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
01166   int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
01167   int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
01168   int64_t f6f6_19 = f6   * (int64_t) f6_19;
01169   int64_t f6f7_38 = f6   * (int64_t) f7_38;
01170   int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
01171   int64_t f6f9_38 = f6   * (int64_t) f9_38;
01172   int64_t f7f7_38 = f7   * (int64_t) f7_38;
01173   int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
01174   int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
01175   int64_t f8f8_19 = f8   * (int64_t) f8_19;
01176   int64_t f8f9_38 = f8   * (int64_t) f9_38;
01177   int64_t f9f9_38 = f9   * (int64_t) f9_38;
01178   int64_t h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
01179   int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
01180   int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
01181   int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
01182   int64_t h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
01183   int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
01184   int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
01185   int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
01186   int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
01187   int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
01188   int64_t carry0;
01189   int64_t carry1;
01190   int64_t carry2;
01191   int64_t carry3;
01192   int64_t carry4;
01193   int64_t carry5;
01194   int64_t carry6;
01195   int64_t carry7;
01196   int64_t carry8;
01197   int64_t carry9;
01198 
01199   h0 += h0;
01200   h1 += h1;
01201   h2 += h2;
01202   h3 += h3;
01203   h4 += h4;
01204   h5 += h5;
01205   h6 += h6;
01206   h7 += h7;
01207   h8 += h8;
01208   h9 += h9;
01209 
01210   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01211   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01212 
01213   carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
01214   carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
01215 
01216   carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
01217   carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
01218 
01219   carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
01220   carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
01221 
01222   carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
01223   carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
01224 
01225   carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
01226 
01227   carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
01228 
01229   h[0] = (int32_t)h0;
01230   h[1] = (int32_t)h1;
01231   h[2] = (int32_t)h2;
01232   h[3] = (int32_t)h3;
01233   h[4] = (int32_t)h4;
01234   h[5] = (int32_t)h5;
01235   h[6] = (int32_t)h6;
01236   h[7] = (int32_t)h7;
01237   h[8] = (int32_t)h8;
01238   h[9] = (int32_t)h9;
01239 }
01240 
01241 
01242 void fe_pow22523(fe out,const fe z)
01243 {
01244   fe t0;
01245   fe t1;
01246   fe t2;
01247   int i;
01248 
01249   fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
01250   fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
01251   fe_mul(t1,z,t1);
01252   fe_mul(t0,t0,t1);
01253   fe_sq(t0,t0); for (i = 1;i < 1;++i) fe_sq(t0,t0);
01254   fe_mul(t0,t1,t0);
01255   fe_sq(t1,t0); for (i = 1;i < 5;++i) fe_sq(t1,t1);
01256   fe_mul(t0,t1,t0);
01257   fe_sq(t1,t0); for (i = 1;i < 10;++i) fe_sq(t1,t1);
01258   fe_mul(t1,t1,t0);
01259   fe_sq(t2,t1); for (i = 1;i < 20;++i) fe_sq(t2,t2);
01260   fe_mul(t1,t2,t1);
01261   fe_sq(t1,t1); for (i = 1;i < 10;++i) fe_sq(t1,t1);
01262   fe_mul(t0,t1,t0);
01263   fe_sq(t1,t0); for (i = 1;i < 50;++i) fe_sq(t1,t1);
01264   fe_mul(t1,t1,t0);
01265   fe_sq(t2,t1); for (i = 1;i < 100;++i) fe_sq(t2,t2);
01266   fe_mul(t1,t2,t1);
01267   fe_sq(t1,t1); for (i = 1;i < 50;++i) fe_sq(t1,t1);
01268   fe_mul(t0,t1,t0);
01269   fe_sq(t0,t0); for (i = 1;i < 2;++i) fe_sq(t0,t0);
01270   fe_mul(out,t0,z);
01271 
01272   return;
01273 }
01274 
01275 
01276 /*
01277 h = -f
01278 
01279 Preconditions:
01280    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01281 
01282 Postconditions:
01283    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
01284 */
01285 
01286 void fe_neg(fe h,const fe f)
01287 {
01288   int32_t f0 = f[0];
01289   int32_t f1 = f[1];
01290   int32_t f2 = f[2];
01291   int32_t f3 = f[3];
01292   int32_t f4 = f[4];
01293   int32_t f5 = f[5];
01294   int32_t f6 = f[6];
01295   int32_t f7 = f[7];
01296   int32_t f8 = f[8];
01297   int32_t f9 = f[9];
01298   int32_t h0 = -f0;
01299   int32_t h1 = -f1;
01300   int32_t h2 = -f2;
01301   int32_t h3 = -f3;
01302   int32_t h4 = -f4;
01303   int32_t h5 = -f5;
01304   int32_t h6 = -f6;
01305   int32_t h7 = -f7;
01306   int32_t h8 = -f8;
01307   int32_t h9 = -f9;
01308   h[0] = h0;
01309   h[1] = h1;
01310   h[2] = h2;
01311   h[3] = h3;
01312   h[4] = h4;
01313   h[5] = h5;
01314   h[6] = h6;
01315   h[7] = h7;
01316   h[8] = h8;
01317   h[9] = h9;
01318 }
01319 
01320 
01321 /*
01322 Preconditions:
01323    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01324 */
01325 
01326 static const unsigned char zero[32] = {0};
01327 
01328 int fe_isnonzero(const fe f)
01329 {
01330   unsigned char s[32];
01331   fe_tobytes(s,f);
01332   return ConstantCompare(s,zero,32);
01333 }
01334 
01335 
01336 /*
01337 return 1 if f is in {1,3,5,...,q-2}
01338 return 0 if f is in {0,2,4,...,q-1}
01339 
01340 Preconditions:
01341    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
01342 */
01343 
01344 int fe_isnegative(const fe f)
01345 {
01346   unsigned char s[32];
01347   fe_tobytes(s,f);
01348   return s[0] & 1;
01349 }
01350 
01351 
01352 /*
01353 Replace (f,g) with (g,g) if b == 1;
01354 replace (f,g) with (f,g) if b == 0.
01355 
01356 Preconditions: b in {0,1}.
01357 */
01358 
01359 void fe_cmov(fe f, const fe g, int b)
01360 {
01361   int32_t f0 = f[0];
01362   int32_t f1 = f[1];
01363   int32_t f2 = f[2];
01364   int32_t f3 = f[3];
01365   int32_t f4 = f[4];
01366   int32_t f5 = f[5];
01367   int32_t f6 = f[6];
01368   int32_t f7 = f[7];
01369   int32_t f8 = f[8];
01370   int32_t f9 = f[9];
01371   int32_t g0 = g[0];
01372   int32_t g1 = g[1];
01373   int32_t g2 = g[2];
01374   int32_t g3 = g[3];
01375   int32_t g4 = g[4];
01376   int32_t g5 = g[5];
01377   int32_t g6 = g[6];
01378   int32_t g7 = g[7];
01379   int32_t g8 = g[8];
01380   int32_t g9 = g[9];
01381   int32_t x0 = f0 ^ g0;
01382   int32_t x1 = f1 ^ g1;
01383   int32_t x2 = f2 ^ g2;
01384   int32_t x3 = f3 ^ g3;
01385   int32_t x4 = f4 ^ g4;
01386   int32_t x5 = f5 ^ g5;
01387   int32_t x6 = f6 ^ g6;
01388   int32_t x7 = f7 ^ g7;
01389   int32_t x8 = f8 ^ g8;
01390   int32_t x9 = f9 ^ g9;
01391   b = -b;
01392   x0 &= b;
01393   x1 &= b;
01394   x2 &= b;
01395   x3 &= b;
01396   x4 &= b;
01397   x5 &= b;
01398   x6 &= b;
01399   x7 &= b;
01400   x8 &= b;
01401   x9 &= b;
01402   f[0] = f0 ^ x0;
01403   f[1] = f1 ^ x1;
01404   f[2] = f2 ^ x2;
01405   f[3] = f3 ^ x3;
01406   f[4] = f4 ^ x4;
01407   f[5] = f5 ^ x5;
01408   f[6] = f6 ^ x6;
01409   f[7] = f7 ^ x7;
01410   f[8] = f8 ^ x8;
01411   f[9] = f9 ^ x9;
01412 }
01413 #endif
01414 #endif /* HAVE ED25519 or CURVE25519 */
01415 #endif /* not defined CURVED25519_SMALL */
01416 
01417