Renesas / SecureDweet
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers fe_low_mem.c Source File

fe_low_mem.c

00001 /* fe_low_mem.c
00002  *
00003  * Copyright (C) 2006-2016 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 /* Based from Daniel Beer's public domain word. */
00024 
00025 #ifdef HAVE_CONFIG_H
00026     #include <config.h>
00027 #endif
00028 
00029 #include <wolfssl/wolfcrypt/settings.h>
00030 
00031 #if defined(CURVED25519_SMALL) /* use slower code that takes less memory */
00032 #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519)
00033 
00034 #include <wolfssl/wolfcrypt/fe_operations.h>
00035 
00036 #ifdef NO_INLINE
00037     #include <wolfssl/wolfcrypt/misc.h>
00038 #else
00039     #include <wolfcrypt/src/misc.c>
00040 #endif
00041 
00042 
00043 void fprime_copy(byte *x, const byte *a)
00044 {
00045     int i;
00046     for (i = 0; i < F25519_SIZE; i++)
00047         x[i] = a[i];
00048 }
00049 
00050 
00051 void fe_copy(fe x, const fe a)
00052 {
00053     int i;
00054     for (i = 0; i < F25519_SIZE; i++)
00055         x[i] = a[i];
00056 }
00057 
00058 
00059 /* Double an X-coordinate */
00060 static void xc_double(byte *x3, byte *z3,
00061               const byte *x1, const byte *z1)
00062 {
00063     /* Explicit formulas database: dbl-1987-m
00064      *
00065      * source 1987 Montgomery "Speeding the Pollard and elliptic
00066      *   curve methods of factorization", page 261, fourth display
00067      * compute X3 = (X1^2-Z1^2)^2
00068      * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
00069      */
00070     byte x1sq[F25519_SIZE];
00071     byte z1sq[F25519_SIZE];
00072     byte x1z1[F25519_SIZE];
00073     byte a[F25519_SIZE];
00074 
00075     fe_mul__distinct(x1sq, x1, x1);
00076     fe_mul__distinct(z1sq, z1, z1);
00077     fe_mul__distinct(x1z1, x1, z1);
00078 
00079     fe_sub(a, x1sq, z1sq);
00080     fe_mul__distinct(x3, a, a);
00081 
00082     fe_mul_c(a, x1z1, 486662);
00083     fe_add(a, x1sq, a);
00084     fe_add(a, z1sq, a);
00085     fe_mul__distinct(x1sq, x1z1, a);
00086     fe_mul_c(z3, x1sq, 4);
00087 }
00088 
00089 
00090 /* Differential addition */
00091 static void xc_diffadd(byte *x5, byte *z5,
00092                const byte *x1, const byte *z1,
00093                const byte *x2, const byte *z2,
00094                const byte *x3, const byte *z3)
00095 {
00096     /* Explicit formulas database: dbl-1987-m3
00097      *
00098      * source 1987 Montgomery "Speeding the Pollard and elliptic curve
00099      *   methods of factorization", page 261, fifth display, plus
00100      *   common-subexpression elimination
00101      * compute A = X2+Z2
00102      * compute B = X2-Z2
00103      * compute C = X3+Z3
00104      * compute D = X3-Z3
00105      * compute DA = D A
00106      * compute CB = C B
00107      * compute X5 = Z1(DA+CB)^2
00108      * compute Z5 = X1(DA-CB)^2
00109      */
00110     byte da[F25519_SIZE];
00111     byte cb[F25519_SIZE];
00112     byte a[F25519_SIZE];
00113     byte b[F25519_SIZE];
00114 
00115     fe_add(a, x2, z2);
00116     fe_sub(b, x3, z3); /* D */
00117     fe_mul__distinct(da, a, b);
00118 
00119     fe_sub(b, x2, z2);
00120     fe_add(a, x3, z3); /* C */
00121     fe_mul__distinct(cb, a, b);
00122 
00123     fe_add(a, da, cb);
00124     fe_mul__distinct(b, a, a);
00125     fe_mul__distinct(x5, z1, b);
00126 
00127     fe_sub(a, da, cb);
00128     fe_mul__distinct(b, a, a);
00129     fe_mul__distinct(z5, x1, b);
00130 }
00131 
00132 
00133 int curve25519(byte *result, byte *e, byte *q)
00134 {
00135     /* Current point: P_m */
00136     byte xm[F25519_SIZE];
00137     byte zm[F25519_SIZE] = {1};
00138 
00139     /* Predecessor: P_(m-1) */
00140     byte xm1[F25519_SIZE] = {1};
00141     byte zm1[F25519_SIZE] = {0};
00142 
00143     int i;
00144 
00145     /* Note: bit 254 is assumed to be 1 */
00146     fe_copy(xm, q);
00147 
00148     for (i = 253; i >= 0; i--) {
00149         const int bit = (e[i >> 3] >> (i & 7)) & 1;
00150         byte xms[F25519_SIZE];
00151         byte zms[F25519_SIZE];
00152 
00153         /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
00154         xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
00155         xc_double(xm, zm, xm, zm);
00156 
00157         /* Compute P_(2m+1) */
00158         xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
00159 
00160         /* Select:
00161          *   bit = 1 --> (P_(2m+1), P_(2m))
00162          *   bit = 0 --> (P_(2m), P_(2m-1))
00163          */
00164         fe_select(xm1, xm1, xm, bit);
00165         fe_select(zm1, zm1, zm, bit);
00166         fe_select(xm, xm, xms, bit);
00167         fe_select(zm, zm, zms, bit);
00168     }
00169 
00170     /* Freeze out of projective coordinates */
00171     fe_inv__distinct(zm1, zm);
00172     fe_mul__distinct(result, zm1, xm);
00173     fe_normalize(result);
00174     return 0;
00175 }
00176 
00177 
00178 static void raw_add(byte *x, const byte *p)
00179 {
00180     word16 c = 0;
00181     int i;
00182 
00183     for (i = 0; i < F25519_SIZE; i++) {
00184         c += ((word16)x[i]) + ((word16)p[i]);
00185         x[i] = c;
00186         c >>= 8;
00187     }
00188 }
00189 
00190 
00191 static void raw_try_sub(byte *x, const byte *p)
00192 {
00193     byte minusp[F25519_SIZE];
00194     word16 c = 0;
00195     int i;
00196 
00197     for (i = 0; i < F25519_SIZE; i++) {
00198         c = ((word16)x[i]) - ((word16)p[i]) - c;
00199         minusp[i] = c;
00200         c = (c >> 8) & 1;
00201     }
00202 
00203     fprime_select(x, minusp, x, c);
00204 }
00205 
00206 
00207 static int prime_msb(const byte *p)
00208 {
00209     int i;
00210     byte x;
00211     int shift = 1;
00212     int z     = F25519_SIZE - 1;
00213 
00214    /*
00215        Test for any hot bits.
00216        As soon as one instance is encountered set shift to 0.
00217     */
00218     for (i = F25519_SIZE - 1; i >= 0; i--) {
00219         shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
00220         z -= shift;
00221     }
00222     x = p[z];
00223     z <<= 3;
00224     shift = 1;
00225     for (i = 0; i < 8; i++) {
00226         shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
00227         z += shift;
00228     }
00229 
00230     return z - 1;
00231 }
00232 
00233 
00234 void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
00235 {
00236     const byte mask = -condition;
00237     int i;
00238 
00239     for (i = 0; i < F25519_SIZE; i++)
00240         dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
00241 }
00242 
00243 
00244 void fprime_add(byte *r, const byte *a, const byte *modulus)
00245 {
00246     raw_add(r, a);
00247     raw_try_sub(r, modulus);
00248 }
00249 
00250 
00251 void fprime_sub(byte *r, const byte *a, const byte *modulus)
00252 {
00253     raw_add(r, modulus);
00254     raw_try_sub(r, a);
00255     raw_try_sub(r, modulus);
00256 }
00257 
00258 
00259 void fprime_mul(byte *r, const byte *a, const byte *b,
00260         const byte *modulus)
00261 {
00262     word16 c = 0;
00263     int i,j;
00264 
00265     XMEMSET(r, 0, F25519_SIZE);
00266 
00267     for (i = prime_msb(modulus); i >= 0; i--) {
00268         const byte bit = (b[i >> 3] >> (i & 7)) & 1;
00269         byte plusa[F25519_SIZE];
00270 
00271         for (j = 0; j < F25519_SIZE; j++) {
00272             c |= ((word16)r[j]) << 1;
00273             r[j] = c;
00274             c >>= 8;
00275         }
00276         raw_try_sub(r, modulus);
00277 
00278         fprime_copy(plusa, r);
00279         fprime_add(plusa, a, modulus);
00280 
00281         fprime_select(r, r, plusa, bit);
00282     }
00283 }
00284 
00285 
00286 void fe_load(byte *x, word32 c)
00287 {
00288     word32 i;
00289 
00290     for (i = 0; i < sizeof(c); i++) {
00291         x[i] = c;
00292         c >>= 8;
00293     }
00294 
00295     for (; i < F25519_SIZE; i++)
00296         x[i] = 0;
00297 }
00298 
00299 
00300 void fe_normalize(byte *x)
00301 {
00302     byte minusp[F25519_SIZE];
00303     word16 c;
00304     int i;
00305 
00306     /* Reduce using 2^255 = 19 mod p */
00307     c = (x[31] >> 7) * 19;
00308     x[31] &= 127;
00309 
00310     for (i = 0; i < F25519_SIZE; i++) {
00311         c += x[i];
00312         x[i] = c;
00313         c >>= 8;
00314     }
00315 
00316     /* The number is now less than 2^255 + 18, and therefore less than
00317      * 2p. Try subtracting p, and conditionally load the subtracted
00318      * value if underflow did not occur.
00319      */
00320     c = 19;
00321 
00322     for (i = 0; i + 1 < F25519_SIZE; i++) {
00323         c += x[i];
00324         minusp[i] = c;
00325         c >>= 8;
00326     }
00327 
00328     c += ((word16)x[i]) - 128;
00329     minusp[31] = c;
00330 
00331     /* Load x-p if no underflow */
00332     fe_select(x, minusp, x, (c >> 15) & 1);
00333 }
00334 
00335 
00336 void fe_select(byte *dst,
00337            const byte *zero, const byte *one,
00338            byte condition)
00339 {
00340     const byte mask = -condition;
00341     int i;
00342 
00343     for (i = 0; i < F25519_SIZE; i++)
00344         dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
00345 }
00346 
00347 
00348 void fe_add(fe r, const fe a, const fe b)
00349 {
00350     word16 c = 0;
00351     int i;
00352 
00353     /* Add */
00354     for (i = 0; i < F25519_SIZE; i++) {
00355         c >>= 8;
00356         c += ((word16)a[i]) + ((word16)b[i]);
00357         r[i] = c;
00358     }
00359 
00360     /* Reduce with 2^255 = 19 mod p */
00361     r[31] &= 127;
00362     c = (c >> 7) * 19;
00363 
00364     for (i = 0; i < F25519_SIZE; i++) {
00365         c += r[i];
00366         r[i] = c;
00367         c >>= 8;
00368     }
00369 }
00370 
00371 
00372 void fe_sub(fe r, const fe a, const fe b)
00373 {
00374     word32 c = 0;
00375     int i;
00376 
00377     /* Calculate a + 2p - b, to avoid underflow */
00378     c = 218;
00379     for (i = 0; i + 1 < F25519_SIZE; i++) {
00380         c += 65280 + ((word32)a[i]) - ((word32)b[i]);
00381         r[i] = c;
00382         c >>= 8;
00383     }
00384 
00385     c += ((word32)a[31]) - ((word32)b[31]);
00386     r[31] = c & 127;
00387     c = (c >> 7) * 19;
00388 
00389     for (i = 0; i < F25519_SIZE; i++) {
00390         c += r[i];
00391         r[i] = c;
00392         c >>= 8;
00393     }
00394 }
00395 
00396 
00397 void fe_neg(fe r, const fe a)
00398 {
00399     word32 c = 0;
00400     int i;
00401 
00402     /* Calculate 2p - a, to avoid underflow */
00403     c = 218;
00404     for (i = 0; i + 1 < F25519_SIZE; i++) {
00405         c += 65280 - ((word32)a[i]);
00406         r[i] = c;
00407         c >>= 8;
00408     }
00409 
00410     c -= ((word32)a[31]);
00411     r[31] = c & 127;
00412     c = (c >> 7) * 19;
00413 
00414     for (i = 0; i < F25519_SIZE; i++) {
00415         c += r[i];
00416         r[i] = c;
00417         c >>= 8;
00418     }
00419 }
00420 
00421 
00422 void fe_mul__distinct(byte *r, const byte *a, const byte *b)
00423 {
00424     word32 c = 0;
00425     int i;
00426 
00427     for (i = 0; i < F25519_SIZE; i++) {
00428         int j;
00429 
00430         c >>= 8;
00431         for (j = 0; j <= i; j++)
00432             c += ((word32)a[j]) * ((word32)b[i - j]);
00433 
00434         for (; j < F25519_SIZE; j++)
00435             c += ((word32)a[j]) *
00436                  ((word32)b[i + F25519_SIZE - j]) * 38;
00437 
00438         r[i] = c;
00439     }
00440 
00441     r[31] &= 127;
00442     c = (c >> 7) * 19;
00443 
00444     for (i = 0; i < F25519_SIZE; i++) {
00445         c += r[i];
00446         r[i] = c;
00447         c >>= 8;
00448     }
00449 }
00450 
00451 
00452 void fe_mul(fe r, const fe a, const fe b)
00453 {
00454     byte tmp[F25519_SIZE];
00455 
00456     fe_mul__distinct(tmp, a, b);
00457     fe_copy(r, tmp);
00458 }
00459 
00460 
00461 void fe_mul_c(byte *r, const byte *a, word32 b)
00462 {
00463     word32 c = 0;
00464     int i;
00465 
00466     for (i = 0; i < F25519_SIZE; i++) {
00467         c >>= 8;
00468         c += b * ((word32)a[i]);
00469         r[i] = c;
00470     }
00471 
00472     r[31] &= 127;
00473     c >>= 7;
00474     c *= 19;
00475 
00476     for (i = 0; i < F25519_SIZE; i++) {
00477         c += r[i];
00478         r[i] = c;
00479         c >>= 8;
00480     }
00481 }
00482 
00483 
00484 void fe_inv__distinct(byte *r, const byte *x)
00485 {
00486     byte s[F25519_SIZE];
00487     int i;
00488 
00489     /* This is a prime field, so by Fermat's little theorem:
00490      *
00491      *     x^(p-1) = 1 mod p
00492      *
00493      * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
00494      * inverse.
00495      *
00496      * This is a 255-bit binary number with the digits:
00497      *
00498      *     11111111... 01011
00499      *
00500      * We compute the result by the usual binary chain, but
00501      * alternate between keeping the accumulator in r and s, so as
00502      * to avoid copying temporaries.
00503      */
00504 
00505     /* 1 1 */
00506     fe_mul__distinct(s, x, x);
00507     fe_mul__distinct(r, s, x);
00508 
00509     /* 1 x 248 */
00510     for (i = 0; i < 248; i++) {
00511         fe_mul__distinct(s, r, r);
00512         fe_mul__distinct(r, s, x);
00513     }
00514 
00515     /* 0 */
00516     fe_mul__distinct(s, r, r);
00517 
00518     /* 1 */
00519     fe_mul__distinct(r, s, s);
00520     fe_mul__distinct(s, r, x);
00521 
00522     /* 0 */
00523     fe_mul__distinct(r, s, s);
00524 
00525     /* 1 */
00526     fe_mul__distinct(s, r, r);
00527     fe_mul__distinct(r, s, x);
00528 
00529     /* 1 */
00530     fe_mul__distinct(s, r, r);
00531     fe_mul__distinct(r, s, x);
00532 }
00533 
00534 
00535 void fe_invert(fe r, const fe x)
00536 {
00537     byte tmp[F25519_SIZE];
00538 
00539     fe_inv__distinct(tmp, x);
00540     fe_copy(r, tmp);
00541 }
00542 
00543 
00544 /* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
00545  * storage.
00546  */
00547 static void exp2523(byte *r, const byte *x, byte *s)
00548 {
00549     int i;
00550 
00551     /* This number is a 252-bit number with the binary expansion:
00552      *
00553      *     111111... 01
00554      */
00555 
00556     /* 1 1 */
00557     fe_mul__distinct(r, x, x);
00558     fe_mul__distinct(s, r, x);
00559 
00560     /* 1 x 248 */
00561     for (i = 0; i < 248; i++) {
00562         fe_mul__distinct(r, s, s);
00563         fe_mul__distinct(s, r, x);
00564     }
00565 
00566     /* 0 */
00567     fe_mul__distinct(r, s, s);
00568 
00569     /* 1 */
00570     fe_mul__distinct(s, r, r);
00571     fe_mul__distinct(r, s, x);
00572 }
00573 
00574 
00575 void fe_sqrt(byte *r, const byte *a)
00576 {
00577     byte v[F25519_SIZE];
00578     byte i[F25519_SIZE];
00579     byte x[F25519_SIZE];
00580     byte y[F25519_SIZE];
00581 
00582     /* v = (2a)^((p-5)/8) [x = 2a] */
00583     fe_mul_c(x, a, 2);
00584     exp2523(v, x, y);
00585 
00586     /* i = 2av^2 - 1 */
00587     fe_mul__distinct(y, v, v);
00588     fe_mul__distinct(i, x, y);
00589     fe_load(y, 1);
00590     fe_sub(i, i, y);
00591 
00592     /* r = avi */
00593     fe_mul__distinct(x, v, a);
00594     fe_mul__distinct(r, x, i);
00595 }
00596 
00597 #endif /* HAVE_CURVE25519 or HAVE_ED25519 */
00598 #endif /* CURVED25519_SMALL */
00599