wolf SSL / wolfSSL

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers fe_low_mem.c Source File

fe_low_mem.c

00001 /* fe_low_mem.c
00002  *
00003  * Copyright (C) 2006-2020 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 /* Based from Daniel Beer's public domain work. */
00024 
00025 #ifdef HAVE_CONFIG_H
00026     #include <config.h>
00027 #endif
00028 
00029 #include <wolfssl/wolfcrypt/settings.h>
00030 
00031 #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
00032 #if defined(CURVE25519_SMALL) || defined(ED25519_SMALL) /* use slower code that takes less memory */
00033 
00034 #include <wolfssl/wolfcrypt/fe_operations.h>
00035 
00036 #ifdef NO_INLINE
00037     #include <wolfssl/wolfcrypt/misc.h>
00038 #else
00039     #define WOLFSSL_MISC_INCLUDED
00040     #include <wolfcrypt/src/misc.c>
00041 #endif
00042 
00043 void fprime_copy(byte *x, const byte *a)
00044 {
00045     int i;
00046     for (i = 0; i < F25519_SIZE; i++)
00047         x[i] = a[i];
00048 }
00049 
00050 
00051 void lm_copy(byte* x, const byte* a)
00052 {
00053     int i;
00054     for (i = 0; i < F25519_SIZE; i++)
00055         x[i] = a[i];
00056 }
00057 
00058 #if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \
00059     (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) &&      \
00060     !defined(FREESCALE_LTC_ECC)
00061     /* to be Complementary to fe_low_mem.c */
00062 #else
00063 void fe_init(void)
00064 {
00065 }
00066 #endif
00067 
00068 #ifdef CURVE25519_SMALL
00069 
00070 /* Double an X-coordinate */
00071 static void xc_double(byte *x3, byte *z3,
00072               const byte *x1, const byte *z1)
00073 {
00074     /* Explicit formulas database: dbl-1987-m
00075      *
00076      * source 1987 Montgomery "Speeding the Pollard and elliptic
00077      *   curve methods of factorization", page 261, fourth display
00078      * compute X3 = (X1^2-Z1^2)^2
00079      * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
00080      */
00081     byte x1sq[F25519_SIZE];
00082     byte z1sq[F25519_SIZE];
00083     byte x1z1[F25519_SIZE];
00084     byte a[F25519_SIZE];
00085 
00086     fe_mul__distinct(x1sq, x1, x1);
00087     fe_mul__distinct(z1sq, z1, z1);
00088     fe_mul__distinct(x1z1, x1, z1);
00089 
00090     lm_sub(a, x1sq, z1sq);
00091     fe_mul__distinct(x3, a, a);
00092 
00093     fe_mul_c(a, x1z1, 486662);
00094     lm_add(a, x1sq, a);
00095     lm_add(a, z1sq, a);
00096     fe_mul__distinct(x1sq, x1z1, a);
00097     fe_mul_c(z3, x1sq, 4);
00098 }
00099 
00100 
00101 /* Differential addition */
00102 static void xc_diffadd(byte *x5, byte *z5,
00103                const byte *x1, const byte *z1,
00104                const byte *x2, const byte *z2,
00105                const byte *x3, const byte *z3)
00106 {
00107     /* Explicit formulas database: dbl-1987-m3
00108      *
00109      * source 1987 Montgomery "Speeding the Pollard and elliptic curve
00110      *   methods of factorization", page 261, fifth display, plus
00111      *   common-subexpression elimination
00112      * compute A = X2+Z2
00113      * compute B = X2-Z2
00114      * compute C = X3+Z3
00115      * compute D = X3-Z3
00116      * compute DA = D A
00117      * compute CB = C B
00118      * compute X5 = Z1(DA+CB)^2
00119      * compute Z5 = X1(DA-CB)^2
00120      */
00121     byte da[F25519_SIZE];
00122     byte cb[F25519_SIZE];
00123     byte a[F25519_SIZE];
00124     byte b[F25519_SIZE];
00125 
00126     lm_add(a, x2, z2);
00127     lm_sub(b, x3, z3); /* D */
00128     fe_mul__distinct(da, a, b);
00129 
00130     lm_sub(b, x2, z2);
00131     lm_add(a, x3, z3); /* C */
00132     fe_mul__distinct(cb, a, b);
00133 
00134     lm_add(a, da, cb);
00135     fe_mul__distinct(b, a, a);
00136     fe_mul__distinct(x5, z1, b);
00137 
00138     lm_sub(a, da, cb);
00139     fe_mul__distinct(b, a, a);
00140     fe_mul__distinct(z5, x1, b);
00141 }
00142 
00143 #ifndef FREESCALE_LTC_ECC
00144 int curve25519(byte *result, byte *e, byte *q)
00145 {
00146     /* Current point: P_m */
00147     byte xm[F25519_SIZE];
00148     byte zm[F25519_SIZE] = {1};
00149 
00150     /* Predecessor: P_(m-1) */
00151     byte xm1[F25519_SIZE] = {1};
00152     byte zm1[F25519_SIZE] = {0};
00153 
00154     int i;
00155 
00156     /* Note: bit 254 is assumed to be 1 */
00157     lm_copy(xm, q);
00158 
00159     for (i = 253; i >= 0; i--) {
00160         const int bit = (e[i >> 3] >> (i & 7)) & 1;
00161         byte xms[F25519_SIZE];
00162         byte zms[F25519_SIZE];
00163 
00164         /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
00165         xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
00166         xc_double(xm, zm, xm, zm);
00167 
00168         /* Compute P_(2m+1) */
00169         xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
00170 
00171         /* Select:
00172          *   bit = 1 --> (P_(2m+1), P_(2m))
00173          *   bit = 0 --> (P_(2m), P_(2m-1))
00174          */
00175         fe_select(xm1, xm1, xm, bit);
00176         fe_select(zm1, zm1, zm, bit);
00177         fe_select(xm, xm, xms, bit);
00178         fe_select(zm, zm, zms, bit);
00179     }
00180 
00181     /* Freeze out of projective coordinates */
00182     fe_inv__distinct(zm1, zm);
00183     fe_mul__distinct(result, zm1, xm);
00184     fe_normalize(result);
00185     return 0;
00186 }
00187 #endif /* !FREESCALE_LTC_ECC */
00188 #endif /* CURVE25519_SMALL */
00189 
00190 
00191 static void raw_add(byte *x, const byte *p)
00192 {
00193     word16 c = 0;
00194     int i;
00195 
00196     for (i = 0; i < F25519_SIZE; i++) {
00197         c += ((word16)x[i]) + ((word16)p[i]);
00198         x[i] = (byte)c;
00199         c >>= 8;
00200     }
00201 }
00202 
00203 
00204 static void raw_try_sub(byte *x, const byte *p)
00205 {
00206     byte minusp[F25519_SIZE];
00207     word16 c = 0;
00208     int i;
00209 
00210     for (i = 0; i < F25519_SIZE; i++) {
00211         c = ((word16)x[i]) - ((word16)p[i]) - c;
00212         minusp[i] = (byte)c;
00213         c = (c >> 8) & 1;
00214     }
00215 
00216     fprime_select(x, minusp, x, (byte)c);
00217 }
00218 
00219 
00220 static int prime_msb(const byte *p)
00221 {
00222     int i;
00223     byte x;
00224     int shift = 1;
00225     int z     = F25519_SIZE - 1;
00226 
00227    /*
00228        Test for any hot bits.
00229        As soon as one instance is encountered set shift to 0.
00230     */
00231     for (i = F25519_SIZE - 1; i >= 0; i--) {
00232         shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
00233         z -= shift;
00234     }
00235     x = p[z];
00236     z <<= 3;
00237     shift = 1;
00238     for (i = 0; i < 8; i++) {
00239         shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
00240         z += shift;
00241     }
00242 
00243     return z - 1;
00244 }
00245 
00246 
00247 void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
00248 {
00249     const byte mask = -condition;
00250     int i;
00251 
00252     for (i = 0; i < F25519_SIZE; i++)
00253         dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
00254 }
00255 
00256 
00257 void fprime_add(byte *r, const byte *a, const byte *modulus)
00258 {
00259     raw_add(r, a);
00260     raw_try_sub(r, modulus);
00261 }
00262 
00263 
00264 void fprime_sub(byte *r, const byte *a, const byte *modulus)
00265 {
00266     raw_add(r, modulus);
00267     raw_try_sub(r, a);
00268     raw_try_sub(r, modulus);
00269 }
00270 
00271 
00272 void fprime_mul(byte *r, const byte *a, const byte *b,
00273         const byte *modulus)
00274 {
00275     word16 c = 0;
00276     int i,j;
00277 
00278     XMEMSET(r, 0, F25519_SIZE);
00279 
00280     for (i = prime_msb(modulus); i >= 0; i--) {
00281         const byte bit = (b[i >> 3] >> (i & 7)) & 1;
00282         byte plusa[F25519_SIZE];
00283 
00284         for (j = 0; j < F25519_SIZE; j++) {
00285             c |= ((word16)r[j]) << 1;
00286             r[j] = (byte)c;
00287             c >>= 8;
00288         }
00289         raw_try_sub(r, modulus);
00290 
00291         fprime_copy(plusa, r);
00292         fprime_add(plusa, a, modulus);
00293 
00294         fprime_select(r, r, plusa, bit);
00295     }
00296 }
00297 
00298 
00299 void fe_load(byte *x, word32 c)
00300 {
00301     word32 i;
00302 
00303     for (i = 0; i < sizeof(c); i++) {
00304         x[i] = c;
00305         c >>= 8;
00306     }
00307 
00308     for (; i < F25519_SIZE; i++)
00309         x[i] = 0;
00310 }
00311 
00312 
00313 void fe_normalize(byte *x)
00314 {
00315     byte minusp[F25519_SIZE];
00316     word16 c;
00317     int i;
00318 
00319     /* Reduce using 2^255 = 19 mod p */
00320     c = (x[31] >> 7) * 19;
00321     x[31] &= 127;
00322 
00323     for (i = 0; i < F25519_SIZE; i++) {
00324         c += x[i];
00325         x[i] = (byte)c;
00326         c >>= 8;
00327     }
00328 
00329     /* The number is now less than 2^255 + 18, and therefore less than
00330      * 2p. Try subtracting p, and conditionally load the subtracted
00331      * value if underflow did not occur.
00332      */
00333     c = 19;
00334 
00335     for (i = 0; i + 1 < F25519_SIZE; i++) {
00336         c += x[i];
00337         minusp[i] = (byte)c;
00338         c >>= 8;
00339     }
00340 
00341     c += ((word16)x[i]) - 128;
00342     minusp[31] = (byte)c;
00343 
00344     /* Load x-p if no underflow */
00345     fe_select(x, minusp, x, (c >> 15) & 1);
00346 }
00347 
00348 
00349 void fe_select(byte *dst,
00350            const byte *zero, const byte *one,
00351            byte condition)
00352 {
00353     const byte mask = -condition;
00354     int i;
00355 
00356     for (i = 0; i < F25519_SIZE; i++)
00357         dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
00358 }
00359 
00360 
00361 void lm_add(byte* r, const byte* a, const byte* b)
00362 {
00363     word16 c = 0;
00364     int i;
00365 
00366     /* Add */
00367     for (i = 0; i < F25519_SIZE; i++) {
00368         c >>= 8;
00369         c += ((word16)a[i]) + ((word16)b[i]);
00370         r[i] = (byte)c;
00371     }
00372 
00373     /* Reduce with 2^255 = 19 mod p */
00374     r[31] &= 127;
00375     c = (c >> 7) * 19;
00376 
00377     for (i = 0; i < F25519_SIZE; i++) {
00378         c += r[i];
00379         r[i] = (byte)c;
00380         c >>= 8;
00381     }
00382 }
00383 
00384 
00385 void lm_sub(byte* r, const byte* a, const byte* b)
00386 {
00387     word32 c = 0;
00388     int i;
00389 
00390     /* Calculate a + 2p - b, to avoid underflow */
00391     c = 218;
00392     for (i = 0; i + 1 < F25519_SIZE; i++) {
00393         c += 65280 + ((word32)a[i]) - ((word32)b[i]);
00394         r[i] = c;
00395         c >>= 8;
00396     }
00397 
00398     c += ((word32)a[31]) - ((word32)b[31]);
00399     r[31] = c & 127;
00400     c = (c >> 7) * 19;
00401 
00402     for (i = 0; i < F25519_SIZE; i++) {
00403         c += r[i];
00404         r[i] = c;
00405         c >>= 8;
00406     }
00407 }
00408 
00409 
00410 void lm_neg(byte* r, const byte* a)
00411 {
00412     word32 c = 0;
00413     int i;
00414 
00415     /* Calculate 2p - a, to avoid underflow */
00416     c = 218;
00417     for (i = 0; i + 1 < F25519_SIZE; i++) {
00418         c += 65280 - ((word32)a[i]);
00419         r[i] = c;
00420         c >>= 8;
00421     }
00422 
00423     c -= ((word32)a[31]);
00424     r[31] = c & 127;
00425     c = (c >> 7) * 19;
00426 
00427     for (i = 0; i < F25519_SIZE; i++) {
00428         c += r[i];
00429         r[i] = c;
00430         c >>= 8;
00431     }
00432 }
00433 
00434 
00435 void fe_mul__distinct(byte *r, const byte *a, const byte *b)
00436 {
00437     word32 c = 0;
00438     int i;
00439 
00440     for (i = 0; i < F25519_SIZE; i++) {
00441         int j;
00442 
00443         c >>= 8;
00444         for (j = 0; j <= i; j++)
00445             c += ((word32)a[j]) * ((word32)b[i - j]);
00446 
00447         for (; j < F25519_SIZE; j++)
00448             c += ((word32)a[j]) *
00449                  ((word32)b[i + F25519_SIZE - j]) * 38;
00450 
00451         r[i] = c;
00452     }
00453 
00454     r[31] &= 127;
00455     c = (c >> 7) * 19;
00456 
00457     for (i = 0; i < F25519_SIZE; i++) {
00458         c += r[i];
00459         r[i] = c;
00460         c >>= 8;
00461     }
00462 }
00463 
00464 
00465 void lm_mul(byte *r, const byte* a, const byte *b)
00466 {
00467     byte tmp[F25519_SIZE];
00468 
00469     fe_mul__distinct(tmp, a, b);
00470     lm_copy(r, tmp);
00471 }
00472 
00473 
00474 void fe_mul_c(byte *r, const byte *a, word32 b)
00475 {
00476     word32 c = 0;
00477     int i;
00478 
00479     for (i = 0; i < F25519_SIZE; i++) {
00480         c >>= 8;
00481         c += b * ((word32)a[i]);
00482         r[i] = c;
00483     }
00484 
00485     r[31] &= 127;
00486     c >>= 7;
00487     c *= 19;
00488 
00489     for (i = 0; i < F25519_SIZE; i++) {
00490         c += r[i];
00491         r[i] = c;
00492         c >>= 8;
00493     }
00494 }
00495 
00496 
00497 void fe_inv__distinct(byte *r, const byte *x)
00498 {
00499     byte s[F25519_SIZE];
00500     int i;
00501 
00502     /* This is a prime field, so by Fermat's little theorem:
00503      *
00504      *     x^(p-1) = 1 mod p
00505      *
00506      * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
00507      * inverse.
00508      *
00509      * This is a 255-bit binary number with the digits:
00510      *
00511      *     11111111... 01011
00512      *
00513      * We compute the result by the usual binary chain, but
00514      * alternate between keeping the accumulator in r and s, so as
00515      * to avoid copying temporaries.
00516      */
00517 
00518     /* 1 1 */
00519     fe_mul__distinct(s, x, x);
00520     fe_mul__distinct(r, s, x);
00521 
00522     /* 1 x 248 */
00523     for (i = 0; i < 248; i++) {
00524         fe_mul__distinct(s, r, r);
00525         fe_mul__distinct(r, s, x);
00526     }
00527 
00528     /* 0 */
00529     fe_mul__distinct(s, r, r);
00530 
00531     /* 1 */
00532     fe_mul__distinct(r, s, s);
00533     fe_mul__distinct(s, r, x);
00534 
00535     /* 0 */
00536     fe_mul__distinct(r, s, s);
00537 
00538     /* 1 */
00539     fe_mul__distinct(s, r, r);
00540     fe_mul__distinct(r, s, x);
00541 
00542     /* 1 */
00543     fe_mul__distinct(s, r, r);
00544     fe_mul__distinct(r, s, x);
00545 }
00546 
00547 
00548 void lm_invert(byte *r, const byte *x)
00549 {
00550     byte tmp[F25519_SIZE];
00551 
00552     fe_inv__distinct(tmp, x);
00553     lm_copy(r, tmp);
00554 }
00555 
00556 
00557 /* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
00558  * storage.
00559  */
00560 static void exp2523(byte *r, const byte *x, byte *s)
00561 {
00562     int i;
00563 
00564     /* This number is a 252-bit number with the binary expansion:
00565      *
00566      *     111111... 01
00567      */
00568 
00569     /* 1 1 */
00570     fe_mul__distinct(r, x, x);
00571     fe_mul__distinct(s, r, x);
00572 
00573     /* 1 x 248 */
00574     for (i = 0; i < 248; i++) {
00575         fe_mul__distinct(r, s, s);
00576         fe_mul__distinct(s, r, x);
00577     }
00578 
00579     /* 0 */
00580     fe_mul__distinct(r, s, s);
00581 
00582     /* 1 */
00583     fe_mul__distinct(s, r, r);
00584     fe_mul__distinct(r, s, x);
00585 }
00586 
00587 
00588 void fe_sqrt(byte *r, const byte *a)
00589 {
00590     byte v[F25519_SIZE];
00591     byte i[F25519_SIZE];
00592     byte x[F25519_SIZE];
00593     byte y[F25519_SIZE];
00594 
00595     /* v = (2a)^((p-5)/8) [x = 2a] */
00596     fe_mul_c(x, a, 2);
00597     exp2523(v, x, y);
00598 
00599     /* i = 2av^2 - 1 */
00600     fe_mul__distinct(y, v, v);
00601     fe_mul__distinct(i, x, y);
00602     fe_load(y, 1);
00603     lm_sub(i, i, y);
00604 
00605     /* r = avi */
00606     fe_mul__distinct(x, v, a);
00607     fe_mul__distinct(r, x, i);
00608 }
00609 
00610 #endif /* CURVE25519_SMALL || ED25519_SMALL */
00611 #endif /* HAVE_CURVE25519 || HAVE_ED25519 */
00612