wolf SSL / wolfSSL-TLS13-Beta

Fork of wolfSSL by wolf SSL

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers integer.c Source File

integer.c

00001 /* integer.c
00002  *
00003  * Copyright (C) 2006-2016 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 
00024 /*
00025  * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
00026  * http://math.libtomcrypt.com
00027  */
00028 
00029 
00030 #ifdef HAVE_CONFIG_H
00031     #include <config.h>
00032 #endif
00033 
00034 /* in case user set USE_FAST_MATH there */
00035 #include <wolfssl/wolfcrypt/settings.h>
00036 
00037 #ifdef NO_INLINE
00038     #include <wolfssl/wolfcrypt/misc.h>
00039 #else
00040     #define WOLFSSL_MISC_INCLUDED
00041     #include <wolfcrypt/src/misc.c>
00042 #endif
00043 
00044 #ifndef NO_BIG_INT
00045 
00046 #ifndef USE_FAST_MATH
00047 
00048 #include <wolfssl/wolfcrypt/integer.h>
00049 
00050 #if defined(FREESCALE_LTC_TFM)
00051     #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
00052 #endif
00053 #ifdef WOLFSSL_DEBUG_MATH
00054     #include <stdio.h>
00055 #endif
00056 
00057 #ifndef NO_WOLFSSL_SMALL_STACK
00058     #ifndef WOLFSSL_SMALL_STACK
00059         #define WOLFSSL_SMALL_STACK
00060     #endif
00061 #endif
00062 
00063 #ifdef SHOW_GEN
00064     #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
00065         #if MQX_USE_IO_OLD
00066             #include <fio.h>
00067         #else
00068             #include <nio.h>
00069         #endif
00070     #else
00071         #include <stdio.h>
00072     #endif
00073 #endif
00074 
00075 /* reverse an array, used for radix code */
00076 static void
00077 bn_reverse (unsigned char *s, int len)
00078 {
00079     int     ix, iy;
00080     unsigned char t;
00081 
00082     ix = 0;
00083     iy = len - 1;
00084     while (ix < iy) {
00085         t     = s[ix];
00086         s[ix] = s[iy];
00087         s[iy] = t;
00088         ++ix;
00089         --iy;
00090     }
00091 }
00092 
00093 /* math settings check */
00094 word32 CheckRunTimeSettings(void)
00095 {
00096     return CTC_SETTINGS;
00097 }
00098 
00099 
00100 /* handle up to 6 inits */
00101 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
00102                   mp_int* f)
00103 {
00104     int res = MP_OKAY;
00105 
00106     if (a) XMEMSET(a, 0, sizeof(mp_int));
00107     if (b) XMEMSET(b, 0, sizeof(mp_int));
00108     if (c) XMEMSET(c, 0, sizeof(mp_int));
00109     if (d) XMEMSET(d, 0, sizeof(mp_int));
00110     if (e) XMEMSET(e, 0, sizeof(mp_int));
00111     if (f) XMEMSET(f, 0, sizeof(mp_int));
00112 
00113     if (a && ((res = mp_init(a)) != MP_OKAY))
00114         return res;
00115 
00116     if (b && ((res = mp_init(b)) != MP_OKAY)) {
00117         mp_clear(a);
00118         return res;
00119     }
00120 
00121     if (c && ((res = mp_init(c)) != MP_OKAY)) {
00122         mp_clear(a); mp_clear(b);
00123         return res;
00124     }
00125 
00126     if (d && ((res = mp_init(d)) != MP_OKAY)) {
00127         mp_clear(a); mp_clear(b); mp_clear(c);
00128         return res;
00129     }
00130 
00131     if (e && ((res = mp_init(e)) != MP_OKAY)) {
00132         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
00133         return res;
00134     }
00135 
00136     if (f && ((res = mp_init(f)) != MP_OKAY)) {
00137         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
00138         return res;
00139     }
00140 
00141     return res;
00142 }
00143 
00144 
00145 /* init a new mp_int */
00146 int mp_init (mp_int * a)
00147 {
00148   /* Safeguard against passing in a null pointer */
00149   if (a == NULL)
00150     return MP_VAL;
00151 
00152   /* defer allocation until mp_grow */
00153   a->dp = NULL;
00154 
00155   /* set the used to zero, allocated digits to the default precision
00156    * and sign to positive */
00157   a->used  = 0;
00158   a->alloc = 0;
00159   a->sign  = MP_ZPOS;
00160 #ifdef HAVE_WOLF_BIGINT
00161   wc_bigint_init(&a->raw);
00162 #endif
00163 
00164   return MP_OKAY;
00165 }
00166 
00167 
00168 /* clear one (frees)  */
00169 void mp_clear (mp_int * a)
00170 {
00171   int i;
00172 
00173   if (a == NULL)
00174       return;
00175 
00176   /* only do anything if a hasn't been freed previously */
00177   if (a->dp != NULL) {
00178     /* first zero the digits */
00179     for (i = 0; i < a->used; i++) {
00180         a->dp[i] = 0;
00181     }
00182 
00183     /* free ram */
00184     mp_free(a);
00185 
00186     /* reset members to make debugging easier */
00187     a->alloc = a->used = 0;
00188     a->sign  = MP_ZPOS;
00189   }
00190 }
00191 
00192 void mp_free (mp_int * a)
00193 {
00194   /* only do anything if a hasn't been freed previously */
00195   if (a->dp != NULL) {
00196     /* free ram */
00197     XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
00198     a->dp    = NULL;
00199   }
00200 
00201 #ifdef HAVE_WOLF_BIGINT
00202   wc_bigint_free(&a->raw);
00203 #endif
00204 }
00205 
00206 void mp_forcezero(mp_int * a)
00207 {
00208     if (a == NULL)
00209         return;
00210 
00211     /* only do anything if a hasn't been freed previously */
00212     if (a->dp != NULL) {
00213       /* force zero the used digits */
00214       ForceZero(a->dp, a->used * sizeof(mp_digit));
00215 
00216       /* free ram */
00217       mp_free(a);
00218 
00219       /* reset members to make debugging easier */
00220       a->alloc = a->used = 0;
00221       a->sign  = MP_ZPOS;
00222     }
00223 
00224     a->sign = MP_ZPOS;
00225     a->used = 0;
00226 }
00227 
00228 
00229 /* get the size for an unsigned equivalent */
00230 int mp_unsigned_bin_size (mp_int * a)
00231 {
00232   int     size = mp_count_bits (a);
00233   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
00234 }
00235 
00236 
00237 /* returns the number of bits in an int */
00238 int mp_count_bits (mp_int * a)
00239 {
00240   int     r;
00241   mp_digit q;
00242 
00243   /* shortcut */
00244   if (a->used == 0) {
00245     return 0;
00246   }
00247 
00248   /* get number of digits and add that */
00249   r = (a->used - 1) * DIGIT_BIT;
00250 
00251   /* take the last digit and count the bits in it */
00252   q = a->dp[a->used - 1];
00253   while (q > ((mp_digit) 0)) {
00254     ++r;
00255     q >>= ((mp_digit) 1);
00256   }
00257   return r;
00258 }
00259 
00260 
00261 int mp_leading_bit (mp_int * a)
00262 {
00263     int bit = 0;
00264     mp_int t;
00265 
00266     if (mp_init_copy(&t, a) != MP_OKAY)
00267         return 0;
00268 
00269     while (mp_iszero(&t) == MP_NO) {
00270 #ifndef MP_8BIT
00271         bit = (t.dp[0] & 0x80) != 0;
00272 #else
00273         bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
00274 #endif
00275         if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
00276             break;
00277     }
00278     mp_clear(&t);
00279     return bit;
00280 }
00281 
00282 int mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b)
00283 {
00284   int res = 0;
00285   while (mp_iszero(t) == MP_NO) {
00286 #ifndef MP_8BIT
00287       b[x++] = (unsigned char) (t->dp[0] & 255);
00288 #else
00289       b[x++] = (unsigned char) (t->dp[0] | ((t->dp[1] & 0x01) << 7));
00290 #endif
00291     if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
00292       return res;
00293     }
00294     res = x;
00295   }
00296   return res;
00297 }
00298 
00299 /* store in unsigned [big endian] format */
00300 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
00301 {
00302   int     x, res;
00303   mp_int  t;
00304 
00305   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
00306     return res;
00307   }
00308 
00309   x = mp_to_unsigned_bin_at_pos(0, &t, b);
00310   if (x < 0) {
00311     mp_clear(&t);
00312     return x;
00313   }
00314 
00315   bn_reverse (b, x);
00316   mp_clear (&t);
00317   return res;
00318 }
00319 
00320 
00321 /* creates "a" then copies b into it */
00322 int mp_init_copy (mp_int * a, mp_int * b)
00323 {
00324   int     res;
00325 
00326   if ((res = mp_init (a)) != MP_OKAY) {
00327     return res;
00328   }
00329   return mp_copy (b, a);
00330 }
00331 
00332 
00333 /* copy, b = a */
00334 int mp_copy (mp_int * a, mp_int * b)
00335 {
00336   int     res, n;
00337 
00338   /* Safeguard against passing in a null pointer */
00339   if (a == NULL || b == NULL)
00340     return MP_VAL;
00341 
00342   /* if dst == src do nothing */
00343   if (a == b) {
00344     return MP_OKAY;
00345   }
00346 
00347   /* grow dest */
00348   if (b->alloc < a->used || b->alloc == 0) {
00349      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
00350         return res;
00351      }
00352   }
00353 
00354   /* zero b and copy the parameters over */
00355   {
00356     mp_digit *tmpa, *tmpb;
00357 
00358     /* pointer aliases */
00359 
00360     /* source */
00361     tmpa = a->dp;
00362 
00363     /* destination */
00364     tmpb = b->dp;
00365 
00366     /* copy all the digits */
00367     for (n = 0; n < a->used; n++) {
00368       *tmpb++ = *tmpa++;
00369     }
00370 
00371     /* clear high digits */
00372     for (; n < b->used && b->dp; n++) {
00373       *tmpb++ = 0;
00374     }
00375   }
00376 
00377   /* copy used count and sign */
00378   b->used = a->used;
00379   b->sign = a->sign;
00380   return MP_OKAY;
00381 }
00382 
00383 
00384 /* grow as required */
00385 int mp_grow (mp_int * a, int size)
00386 {
00387   int     i;
00388   mp_digit *tmp;
00389 
00390   /* if the alloc size is smaller alloc more ram */
00391   if (a->alloc < size || size == 0) {
00392     /* ensure there are always at least MP_PREC digits extra on top */
00393     size += (MP_PREC * 2) - (size % MP_PREC);
00394 
00395     /* reallocate the array a->dp
00396      *
00397      * We store the return in a temporary variable
00398      * in case the operation failed we don't want
00399      * to overwrite the dp member of a.
00400      */
00401     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
00402                                                            DYNAMIC_TYPE_BIGINT);
00403     if (tmp == NULL) {
00404       /* reallocation failed but "a" is still valid [can be freed] */
00405       return MP_MEM;
00406     }
00407 
00408     /* reallocation succeeded so set a->dp */
00409     a->dp = tmp;
00410 
00411     /* zero excess digits */
00412     i        = a->alloc;
00413     a->alloc = size;
00414     for (; i < a->alloc; i++) {
00415       a->dp[i] = 0;
00416     }
00417   }
00418   return MP_OKAY;
00419 }
00420 
00421 
00422 /* shift right by a certain bit count (store quotient in c, optional
00423    remainder in d) */
00424 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
00425 {
00426   int     D, res;
00427   mp_int  t;
00428 
00429 
00430   /* if the shift count is <= 0 then we do no work */
00431   if (b <= 0) {
00432     res = mp_copy (a, c);
00433     if (d != NULL) {
00434       mp_zero (d);
00435     }
00436     return res;
00437   }
00438 
00439   if ((res = mp_init (&t)) != MP_OKAY) {
00440     return res;
00441   }
00442 
00443   /* get the remainder */
00444   if (d != NULL) {
00445     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
00446       mp_clear (&t);
00447       return res;
00448     }
00449   }
00450 
00451   /* copy */
00452   if ((res = mp_copy (a, c)) != MP_OKAY) {
00453     mp_clear (&t);
00454     return res;
00455   }
00456 
00457   /* shift by as many digits in the bit count */
00458   if (b >= (int)DIGIT_BIT) {
00459     mp_rshd (c, b / DIGIT_BIT);
00460   }
00461 
00462   /* shift any bit count < DIGIT_BIT */
00463   D = (b % DIGIT_BIT);
00464   if (D != 0) {
00465     mp_rshb(c, D);
00466   }
00467   mp_clamp (c);
00468   if (d != NULL) {
00469     mp_exch (&t, d);
00470   }
00471   mp_clear (&t);
00472   return MP_OKAY;
00473 }
00474 
00475 
00476 /* set to zero */
00477 void mp_zero (mp_int * a)
00478 {
00479   int       n;
00480   mp_digit *tmp;
00481 
00482   if (a == NULL)
00483       return;
00484 
00485   a->sign = MP_ZPOS;
00486   a->used = 0;
00487 #ifdef HAVE_WOLF_BIGINT
00488   wc_bigint_zero(&a->raw);
00489 #endif
00490 
00491   tmp = a->dp;
00492   for (n = 0; n < a->alloc; n++) {
00493      *tmp++ = 0;
00494   }
00495 }
00496 
00497 
00498 /* trim unused digits
00499  *
00500  * This is used to ensure that leading zero digits are
00501  * trimmed and the leading "used" digit will be non-zero
00502  * Typically very fast.  Also fixes the sign if there
00503  * are no more leading digits
00504  */
00505 void mp_clamp (mp_int * a)
00506 {
00507   /* decrease used while the most significant digit is
00508    * zero.
00509    */
00510   while (a->used > 0 && a->dp[a->used - 1] == 0) {
00511     --(a->used);
00512   }
00513 
00514   /* reset the sign flag if used == 0 */
00515   if (a->used == 0) {
00516     a->sign = MP_ZPOS;
00517   }
00518 }
00519 
00520 
00521 /* swap the elements of two integers, for cases where you can't simply swap the
00522  * mp_int pointers around
00523  */
00524 void mp_exch (mp_int * a, mp_int * b)
00525 {
00526   mp_int  t;
00527 
00528   t  = *a;
00529   *a = *b;
00530   *b = t;
00531 }
00532 
00533 
00534 /* shift right a certain number of bits */
00535 void mp_rshb (mp_int *c, int x)
00536 {
00537     mp_digit *tmpc, mask, shift;
00538     mp_digit r, rr;
00539     mp_digit D = x;
00540 
00541     /* mask */
00542     mask = (((mp_digit)1) << D) - 1;
00543 
00544     /* shift for lsb */
00545     shift = DIGIT_BIT - D;
00546 
00547     /* alias */
00548     tmpc = c->dp + (c->used - 1);
00549 
00550     /* carry */
00551     r = 0;
00552     for (x = c->used - 1; x >= 0; x--) {
00553       /* get the lower  bits of this word in a temp */
00554       rr = *tmpc & mask;
00555 
00556       /* shift the current word and mix in the carry bits from previous word */
00557       *tmpc = (*tmpc >> D) | (r << shift);
00558       --tmpc;
00559 
00560       /* set the carry to the carry bits of the current word found above */
00561       r = rr;
00562     }
00563     mp_clamp(c);
00564 }
00565 
00566 
00567 /* shift right a certain amount of digits */
00568 void mp_rshd (mp_int * a, int b)
00569 {
00570   int     x;
00571 
00572   /* if b <= 0 then ignore it */
00573   if (b <= 0) {
00574     return;
00575   }
00576 
00577   /* if b > used then simply zero it and return */
00578   if (a->used <= b) {
00579     mp_zero (a);
00580     return;
00581   }
00582 
00583   {
00584     mp_digit *bottom, *top;
00585 
00586     /* shift the digits down */
00587 
00588     /* bottom */
00589     bottom = a->dp;
00590 
00591     /* top [offset into digits] */
00592     top = a->dp + b;
00593 
00594     /* this is implemented as a sliding window where
00595      * the window is b-digits long and digits from
00596      * the top of the window are copied to the bottom
00597      *
00598      * e.g.
00599 
00600      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
00601                  /\                   |      ---->
00602                   \-------------------/      ---->
00603      */
00604     for (x = 0; x < (a->used - b); x++) {
00605       *bottom++ = *top++;
00606     }
00607 
00608     /* zero the top digits */
00609     for (; x < a->used; x++) {
00610       *bottom++ = 0;
00611     }
00612   }
00613 
00614   /* remove excess digits */
00615   a->used -= b;
00616 }
00617 
00618 
00619 /* calc a value mod 2**b */
00620 int mp_mod_2d (mp_int * a, int b, mp_int * c)
00621 {
00622   int     x, res;
00623 
00624   /* if b is <= 0 then zero the int */
00625   if (b <= 0) {
00626     mp_zero (c);
00627     return MP_OKAY;
00628   }
00629 
00630   /* if the modulus is larger than the value than return */
00631   if (b >= (int) (a->used * DIGIT_BIT)) {
00632     res = mp_copy (a, c);
00633     return res;
00634   }
00635 
00636   /* copy */
00637   if ((res = mp_copy (a, c)) != MP_OKAY) {
00638     return res;
00639   }
00640 
00641   /* zero digits above the last digit of the modulus */
00642   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00643     c->dp[x] = 0;
00644   }
00645   /* clear the digit that is not completely outside/inside the modulus */
00646   c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
00647               (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
00648   mp_clamp (c);
00649   return MP_OKAY;
00650 }
00651 
00652 
00653 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
00654 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
00655 {
00656   int     res;
00657 
00658   /* make sure there are at least two digits */
00659   if (a->alloc < 2) {
00660      if ((res = mp_grow(a, 2)) != MP_OKAY) {
00661         return res;
00662      }
00663   }
00664 
00665   /* zero the int */
00666   mp_zero (a);
00667 
00668   /* read the bytes in */
00669   while (c-- > 0) {
00670     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
00671       return res;
00672     }
00673 
00674 #ifndef MP_8BIT
00675       a->dp[0] |= *b++;
00676       a->used += 1;
00677 #else
00678       a->dp[0] = (*b & MP_MASK);
00679       a->dp[1] |= ((*b++ >> 7U) & 1);
00680       a->used += 2;
00681 #endif
00682   }
00683   mp_clamp (a);
00684   return MP_OKAY;
00685 }
00686 
00687 
00688 /* shift left by a certain bit count */
00689 int mp_mul_2d (mp_int * a, int b, mp_int * c)
00690 {
00691   mp_digit d;
00692   int      res;
00693 
00694   /* copy */
00695   if (a != c) {
00696      if ((res = mp_copy (a, c)) != MP_OKAY) {
00697        return res;
00698      }
00699   }
00700 
00701   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
00702      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
00703        return res;
00704      }
00705   }
00706 
00707   /* shift by as many digits in the bit count */
00708   if (b >= (int)DIGIT_BIT) {
00709     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
00710       return res;
00711     }
00712   }
00713 
00714   /* shift any bit count < DIGIT_BIT */
00715   d = (mp_digit) (b % DIGIT_BIT);
00716   if (d != 0) {
00717     mp_digit *tmpc, shift, mask, r, rr;
00718     int x;
00719 
00720     /* bitmask for carries */
00721     mask = (((mp_digit)1) << d) - 1;
00722 
00723     /* shift for msbs */
00724     shift = DIGIT_BIT - d;
00725 
00726     /* alias */
00727     tmpc = c->dp;
00728 
00729     /* carry */
00730     r    = 0;
00731     for (x = 0; x < c->used; x++) {
00732       /* get the higher bits of the current word */
00733       rr = (*tmpc >> shift) & mask;
00734 
00735       /* shift the current word and OR in the carry */
00736       *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK);
00737       ++tmpc;
00738 
00739       /* set the carry to the carry bits of the current word */
00740       r = rr;
00741     }
00742 
00743     /* set final carry */
00744     if (r != 0) {
00745        c->dp[(c->used)++] = r;
00746     }
00747   }
00748   mp_clamp (c);
00749   return MP_OKAY;
00750 }
00751 
00752 
00753 /* shift left a certain amount of digits */
00754 int mp_lshd (mp_int * a, int b)
00755 {
00756   int     x, res;
00757 
00758   /* if its less than zero return */
00759   if (b <= 0) {
00760     return MP_OKAY;
00761   }
00762 
00763   /* grow to fit the new digits */
00764   if (a->alloc < a->used + b) {
00765      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
00766        return res;
00767      }
00768   }
00769 
00770   {
00771     mp_digit *top, *bottom;
00772 
00773     /* increment the used by the shift amount then copy upwards */
00774     a->used += b;
00775 
00776     /* top */
00777     top = a->dp + a->used - 1;
00778 
00779     /* base */
00780     bottom = a->dp + a->used - 1 - b;
00781 
00782     /* much like mp_rshd this is implemented using a sliding window
00783      * except the window goes the other way around.  Copying from
00784      * the bottom to the top.  see bn_mp_rshd.c for more info.
00785      */
00786     for (x = a->used - 1; x >= b; x--) {
00787       *top-- = *bottom--;
00788     }
00789 
00790     /* zero the lower digits */
00791     top = a->dp;
00792     for (x = 0; x < b; x++) {
00793       *top++ = 0;
00794     }
00795   }
00796   return MP_OKAY;
00797 }
00798 
00799 
00800 /* this is a shell function that calls either the normal or Montgomery
00801  * exptmod functions.  Originally the call to the montgomery code was
00802  * embedded in the normal function but that wasted a lot of stack space
00803  * for nothing (since 99% of the time the Montgomery code would be called)
00804  */
00805 #if defined(FREESCALE_LTC_TFM)
00806 int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00807 #else
00808 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00809 #endif
00810 {
00811   int dr;
00812 
00813   /* modulus P must be positive */
00814   if (P->sign == MP_NEG) {
00815      return MP_VAL;
00816   }
00817 
00818   /* if exponent X is negative we have to recurse */
00819   if (X->sign == MP_NEG) {
00820 #ifdef BN_MP_INVMOD_C
00821      mp_int tmpG, tmpX;
00822      int err;
00823 
00824      /* first compute 1/G mod P */
00825      if ((err = mp_init(&tmpG)) != MP_OKAY) {
00826         return err;
00827      }
00828      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
00829         mp_clear(&tmpG);
00830         return err;
00831      }
00832 
00833      /* now get |X| */
00834      if ((err = mp_init(&tmpX)) != MP_OKAY) {
00835         mp_clear(&tmpG);
00836         return err;
00837      }
00838      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
00839         mp_clear(&tmpG);
00840         mp_clear(&tmpX);
00841         return err;
00842      }
00843 
00844      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
00845      err = mp_exptmod(&tmpG, &tmpX, P, Y);
00846      mp_clear(&tmpG);
00847      mp_clear(&tmpX);
00848      return err;
00849 #else
00850      /* no invmod */
00851      return MP_VAL;
00852 #endif
00853   }
00854 
00855 /* modified diminished radix reduction */
00856 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
00857   defined(BN_S_MP_EXPTMOD_C)
00858   if (mp_reduce_is_2k_l(P) == MP_YES) {
00859      return s_mp_exptmod(G, X, P, Y, 1);
00860   }
00861 #endif
00862 
00863 #ifdef BN_MP_DR_IS_MODULUS_C
00864   /* is it a DR modulus? */
00865   dr = mp_dr_is_modulus(P);
00866 #else
00867   /* default to no */
00868   dr = 0;
00869 #endif
00870 
00871 #ifdef BN_MP_REDUCE_IS_2K_C
00872   /* if not, is it a unrestricted DR modulus? */
00873   if (dr == 0) {
00874      dr = mp_reduce_is_2k(P) << 1;
00875   }
00876 #endif
00877 
00878   /* if the modulus is odd or dr != 0 use the montgomery method */
00879 #ifdef BN_MP_EXPTMOD_FAST_C
00880   if (mp_isodd (P) == MP_YES || dr !=  0) {
00881     return mp_exptmod_fast (G, X, P, Y, dr);
00882   } else {
00883 #endif
00884 #ifdef BN_S_MP_EXPTMOD_C
00885     /* otherwise use the generic Barrett reduction technique */
00886     return s_mp_exptmod (G, X, P, Y, 0);
00887 #else
00888     /* no exptmod for evens */
00889     return MP_VAL;
00890 #endif
00891 #ifdef BN_MP_EXPTMOD_FAST_C
00892   }
00893 #endif
00894 }
00895 
00896 
00897 /* b = |a|
00898  *
00899  * Simple function copies the input and fixes the sign to positive
00900  */
00901 int mp_abs (mp_int * a, mp_int * b)
00902 {
00903   int     res;
00904 
00905   /* copy a to b */
00906   if (a != b) {
00907      if ((res = mp_copy (a, b)) != MP_OKAY) {
00908        return res;
00909      }
00910   }
00911 
00912   /* force the sign of b to positive */
00913   b->sign = MP_ZPOS;
00914 
00915   return MP_OKAY;
00916 }
00917 
00918 
00919 /* hac 14.61, pp608 */
00920 #if defined(FREESCALE_LTC_TFM)
00921 int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
00922 #else
00923 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00924 #endif
00925 {
00926   /* b cannot be negative */
00927   if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
00928     return MP_VAL;
00929   }
00930 
00931 #ifdef BN_FAST_MP_INVMOD_C
00932   /* if the modulus is odd we can use a faster routine instead */
00933   if (mp_isodd (b) == MP_YES) {
00934     return fast_mp_invmod (a, b, c);
00935   }
00936 #endif
00937 
00938 #ifdef BN_MP_INVMOD_SLOW_C
00939   return mp_invmod_slow(a, b, c);
00940 #endif
00941 }
00942 
00943 
00944 /* computes the modular inverse via binary extended euclidean algorithm,
00945  * that is c = 1/a mod b
00946  *
00947  * Based on slow invmod except this is optimized for the case where b is
00948  * odd as per HAC Note 14.64 on pp. 610
00949  */
00950 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00951 {
00952   mp_int  x, y, u, v, B, D;
00953   int     res, neg, loop_check = 0;
00954 
00955   /* 2. [modified] b must be odd   */
00956   if (mp_iseven (b) == MP_YES) {
00957     return MP_VAL;
00958   }
00959 
00960   /* init all our temps */
00961   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
00962      return res;
00963   }
00964 
00965   /* x == modulus, y == value to invert */
00966   if ((res = mp_copy (b, &x)) != MP_OKAY) {
00967     goto LBL_ERR;
00968   }
00969 
00970   /* we need y = |a| */
00971   if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
00972     goto LBL_ERR;
00973   }
00974 
00975   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00976   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00977     goto LBL_ERR;
00978   }
00979   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00980     goto LBL_ERR;
00981   }
00982   if ((res = mp_set (&D, 1)) != MP_OKAY) {
00983     goto LBL_ERR;
00984   }
00985 
00986 top:
00987   /* 4.  while u is even do */
00988   while (mp_iseven (&u) == MP_YES) {
00989     /* 4.1 u = u/2 */
00990     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00991       goto LBL_ERR;
00992     }
00993     /* 4.2 if B is odd then */
00994     if (mp_isodd (&B) == MP_YES) {
00995       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
00996         goto LBL_ERR;
00997       }
00998     }
00999     /* B = B/2 */
01000     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01001       goto LBL_ERR;
01002     }
01003   }
01004 
01005   /* 5.  while v is even do */
01006   while (mp_iseven (&v) == MP_YES) {
01007     /* 5.1 v = v/2 */
01008     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01009       goto LBL_ERR;
01010     }
01011     /* 5.2 if D is odd then */
01012     if (mp_isodd (&D) == MP_YES) {
01013       /* D = (D-x)/2 */
01014       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01015         goto LBL_ERR;
01016       }
01017     }
01018     /* D = D/2 */
01019     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01020       goto LBL_ERR;
01021     }
01022   }
01023 
01024   /* 6.  if u >= v then */
01025   if (mp_cmp (&u, &v) != MP_LT) {
01026     /* u = u - v, B = B - D */
01027     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01028       goto LBL_ERR;
01029     }
01030 
01031     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01032       goto LBL_ERR;
01033     }
01034   } else {
01035     /* v - v - u, D = D - B */
01036     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01037       goto LBL_ERR;
01038     }
01039 
01040     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01041       goto LBL_ERR;
01042     }
01043   }
01044 
01045   /* if not zero goto step 4 */
01046   if (mp_iszero (&u) == MP_NO) {
01047     if (++loop_check > 4096) {
01048         res = MP_VAL;
01049         goto LBL_ERR;
01050     }
01051     goto top;
01052   }
01053 
01054   /* now a = C, b = D, gcd == g*v */
01055 
01056   /* if v != 1 then there is no inverse */
01057   if (mp_cmp_d (&v, 1) != MP_EQ) {
01058     res = MP_VAL;
01059     goto LBL_ERR;
01060   }
01061 
01062   /* b is now the inverse */
01063   neg = a->sign;
01064   while (D.sign == MP_NEG) {
01065     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
01066       goto LBL_ERR;
01067     }
01068   }
01069   /* too big */
01070   while (mp_cmp_mag(&D, b) != MP_LT) {
01071       if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
01072          goto LBL_ERR;
01073       }
01074   }
01075   mp_exch (&D, c);
01076   c->sign = neg;
01077   res = MP_OKAY;
01078 
01079 LBL_ERR:mp_clear(&x);
01080         mp_clear(&y);
01081         mp_clear(&u);
01082         mp_clear(&v);
01083         mp_clear(&B);
01084         mp_clear(&D);
01085   return res;
01086 }
01087 
01088 
01089 /* hac 14.61, pp608 */
01090 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
01091 {
01092   mp_int  x, y, u, v, A, B, C, D;
01093   int     res;
01094 
01095   /* b cannot be negative */
01096   if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
01097     return MP_VAL;
01098   }
01099 
01100   /* init temps */
01101   if ((res = mp_init_multi(&x, &y, &u, &v,
01102                            &A, &B)) != MP_OKAY) {
01103      return res;
01104   }
01105 
01106   /* init rest of tmps temps */
01107   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
01108      mp_clear(&x);
01109      mp_clear(&y);
01110      mp_clear(&u);
01111      mp_clear(&v);
01112      mp_clear(&A);
01113      mp_clear(&B);
01114      return res;
01115   }
01116 
01117   /* x = a, y = b */
01118   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
01119       goto LBL_ERR;
01120   }
01121   if ((res = mp_copy (b, &y)) != MP_OKAY) {
01122     goto LBL_ERR;
01123   }
01124 
01125   /* 2. [modified] if x,y are both even then return an error! */
01126   if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
01127     res = MP_VAL;
01128     goto LBL_ERR;
01129   }
01130 
01131   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
01132   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
01133     goto LBL_ERR;
01134   }
01135   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
01136     goto LBL_ERR;
01137   }
01138   if ((res = mp_set (&A, 1)) != MP_OKAY) {
01139     goto LBL_ERR;
01140   }
01141   if ((res = mp_set (&D, 1)) != MP_OKAY) {
01142     goto LBL_ERR;
01143   }
01144 
01145 top:
01146   /* 4.  while u is even do */
01147   while (mp_iseven (&u) == MP_YES) {
01148     /* 4.1 u = u/2 */
01149     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
01150       goto LBL_ERR;
01151     }
01152     /* 4.2 if A or B is odd then */
01153     if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) {
01154       /* A = (A+y)/2, B = (B-x)/2 */
01155       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
01156          goto LBL_ERR;
01157       }
01158       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
01159          goto LBL_ERR;
01160       }
01161     }
01162     /* A = A/2, B = B/2 */
01163     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
01164       goto LBL_ERR;
01165     }
01166     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01167       goto LBL_ERR;
01168     }
01169   }
01170 
01171   /* 5.  while v is even do */
01172   while (mp_iseven (&v) == MP_YES) {
01173     /* 5.1 v = v/2 */
01174     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01175       goto LBL_ERR;
01176     }
01177     /* 5.2 if C or D is odd then */
01178     if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) {
01179       /* C = (C+y)/2, D = (D-x)/2 */
01180       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
01181          goto LBL_ERR;
01182       }
01183       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01184          goto LBL_ERR;
01185       }
01186     }
01187     /* C = C/2, D = D/2 */
01188     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
01189       goto LBL_ERR;
01190     }
01191     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01192       goto LBL_ERR;
01193     }
01194   }
01195 
01196   /* 6.  if u >= v then */
01197   if (mp_cmp (&u, &v) != MP_LT) {
01198     /* u = u - v, A = A - C, B = B - D */
01199     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01200       goto LBL_ERR;
01201     }
01202 
01203     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
01204       goto LBL_ERR;
01205     }
01206 
01207     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01208       goto LBL_ERR;
01209     }
01210   } else {
01211     /* v - v - u, C = C - A, D = D - B */
01212     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01213       goto LBL_ERR;
01214     }
01215 
01216     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
01217       goto LBL_ERR;
01218     }
01219 
01220     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01221       goto LBL_ERR;
01222     }
01223   }
01224 
01225   /* if not zero goto step 4 */
01226   if (mp_iszero (&u) == MP_NO)
01227     goto top;
01228 
01229   /* now a = C, b = D, gcd == g*v */
01230 
01231   /* if v != 1 then there is no inverse */
01232   if (mp_cmp_d (&v, 1) != MP_EQ) {
01233     res = MP_VAL;
01234     goto LBL_ERR;
01235   }
01236 
01237   /* if its too low */
01238   while (mp_cmp_d(&C, 0) == MP_LT) {
01239       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
01240          goto LBL_ERR;
01241       }
01242   }
01243 
01244   /* too big */
01245   while (mp_cmp_mag(&C, b) != MP_LT) {
01246       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
01247          goto LBL_ERR;
01248       }
01249   }
01250 
01251   /* C is now the inverse */
01252   mp_exch (&C, c);
01253   res = MP_OKAY;
01254 LBL_ERR:mp_clear(&x);
01255         mp_clear(&y);
01256         mp_clear(&u);
01257         mp_clear(&v);
01258         mp_clear(&A);
01259         mp_clear(&B);
01260         mp_clear(&C);
01261         mp_clear(&D);
01262   return res;
01263 }
01264 
01265 
01266 /* compare magnitude of two ints (unsigned) */
01267 int mp_cmp_mag (mp_int * a, mp_int * b)
01268 {
01269   int     n;
01270   mp_digit *tmpa, *tmpb;
01271 
01272   /* compare based on # of non-zero digits */
01273   if (a->used > b->used) {
01274     return MP_GT;
01275   }
01276 
01277   if (a->used < b->used) {
01278     return MP_LT;
01279   }
01280 
01281   /* alias for a */
01282   tmpa = a->dp + (a->used - 1);
01283 
01284   /* alias for b */
01285   tmpb = b->dp + (a->used - 1);
01286 
01287   /* compare based on digits  */
01288   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
01289     if (*tmpa > *tmpb) {
01290       return MP_GT;
01291     }
01292 
01293     if (*tmpa < *tmpb) {
01294       return MP_LT;
01295     }
01296   }
01297   return MP_EQ;
01298 }
01299 
01300 
01301 /* compare two ints (signed)*/
01302 int mp_cmp (mp_int * a, mp_int * b)
01303 {
01304   /* compare based on sign */
01305   if (a->sign != b->sign) {
01306      if (a->sign == MP_NEG) {
01307         return MP_LT;
01308      } else {
01309         return MP_GT;
01310      }
01311   }
01312 
01313   /* compare digits */
01314   if (a->sign == MP_NEG) {
01315      /* if negative compare opposite direction */
01316      return mp_cmp_mag(b, a);
01317   } else {
01318      return mp_cmp_mag(a, b);
01319   }
01320 }
01321 
01322 
01323 /* compare a digit */
01324 int mp_cmp_d(mp_int * a, mp_digit b)
01325 {
01326   /* special case for zero*/
01327   if (a->used == 0 && b == 0)
01328     return MP_EQ;
01329 
01330   /* compare based on sign */
01331   if ((b && a->used == 0) || a->sign == MP_NEG) {
01332     return MP_LT;
01333   }
01334 
01335   /* compare based on magnitude */
01336   if (a->used > 1) {
01337     return MP_GT;
01338   }
01339 
01340   /* compare the only digit of a to b */
01341   if (a->dp[0] > b) {
01342     return MP_GT;
01343   } else if (a->dp[0] < b) {
01344     return MP_LT;
01345   } else {
01346     return MP_EQ;
01347   }
01348 }
01349 
01350 
01351 /* set to a digit */
01352 int mp_set (mp_int * a, mp_digit b)
01353 {
01354   int res;
01355   mp_zero (a);
01356   res = mp_grow (a, 1);
01357   if (res == MP_OKAY) {
01358     a->dp[0] = (mp_digit)(b & MP_MASK);
01359     a->used  = (a->dp[0] != 0) ? 1 : 0;
01360   }
01361   return res;
01362 }
01363 
01364 /* chek if a bit is set */
01365 int mp_is_bit_set (mp_int *a, mp_digit b)
01366 {
01367     if ((mp_digit)a->used < b/DIGIT_BIT)
01368         return 0;
01369 
01370     return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
01371 }
01372 
01373 /* c = a mod b, 0 <= c < b */
01374 #if defined(FREESCALE_LTC_TFM)
01375 int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
01376 #else
01377 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
01378 #endif
01379 {
01380   mp_int  t;
01381   int     res;
01382 
01383   if ((res = mp_init (&t)) != MP_OKAY) {
01384     return res;
01385   }
01386 
01387   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
01388     mp_clear (&t);
01389     return res;
01390   }
01391 
01392   if (t.sign != b->sign) {
01393     res = mp_add (b, &t, c);
01394   } else {
01395     res = MP_OKAY;
01396     mp_exch (&t, c);
01397   }
01398 
01399   mp_clear (&t);
01400   return res;
01401 }
01402 
01403 
01404 /* slower bit-bang division... also smaller */
01405 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01406 {
01407    mp_int ta, tb, tq, q;
01408    int    res, n, n2;
01409 
01410   /* is divisor zero ? */
01411   if (mp_iszero (b) == MP_YES) {
01412     return MP_VAL;
01413   }
01414 
01415   /* if a < b then q=0, r = a */
01416   if (mp_cmp_mag (a, b) == MP_LT) {
01417     if (d != NULL) {
01418       res = mp_copy (a, d);
01419     } else {
01420       res = MP_OKAY;
01421     }
01422     if (c != NULL) {
01423       mp_zero (c);
01424     }
01425     return res;
01426   }
01427 
01428   /* init our temps */
01429   if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
01430      return res;
01431   }
01432 
01433   if ((res = mp_set(&tq, 1)) != MP_OKAY) {
01434      return res;
01435   }
01436   n = mp_count_bits(a) - mp_count_bits(b);
01437   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
01438       ((res = mp_abs(b, &tb)) != MP_OKAY) ||
01439       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
01440       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
01441       goto LBL_ERR;
01442   }
01443 
01444   while (n-- >= 0) {
01445      if (mp_cmp(&tb, &ta) != MP_GT) {
01446         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
01447             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
01448            goto LBL_ERR;
01449         }
01450      }
01451      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
01452          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
01453            goto LBL_ERR;
01454      }
01455   }
01456 
01457   /* now q == quotient and ta == remainder */
01458   n  = a->sign;
01459   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
01460   if (c != NULL) {
01461      mp_exch(c, &q);
01462      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
01463   }
01464   if (d != NULL) {
01465      mp_exch(d, &ta);
01466      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
01467   }
01468 LBL_ERR:
01469    mp_clear(&ta);
01470    mp_clear(&tb);
01471    mp_clear(&tq);
01472    mp_clear(&q);
01473    return res;
01474 }
01475 
01476 
01477 /* b = a/2 */
01478 int mp_div_2(mp_int * a, mp_int * b)
01479 {
01480   int     x, res, oldused;
01481 
01482   /* copy */
01483   if (b->alloc < a->used) {
01484     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01485       return res;
01486     }
01487   }
01488 
01489   oldused = b->used;
01490   b->used = a->used;
01491   {
01492     mp_digit r, rr, *tmpa, *tmpb;
01493 
01494     /* source alias */
01495     tmpa = a->dp + b->used - 1;
01496 
01497     /* dest alias */
01498     tmpb = b->dp + b->used - 1;
01499 
01500     /* carry */
01501     r = 0;
01502     for (x = b->used - 1; x >= 0; x--) {
01503       /* get the carry for the next iteration */
01504       rr = *tmpa & 1;
01505 
01506       /* shift the current digit, add in carry and store */
01507       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
01508 
01509       /* forward carry to next iteration */
01510       r = rr;
01511     }
01512 
01513     /* zero excess digits */
01514     tmpb = b->dp + b->used;
01515     for (x = b->used; x < oldused; x++) {
01516       *tmpb++ = 0;
01517     }
01518   }
01519   b->sign = a->sign;
01520   mp_clamp (b);
01521   return MP_OKAY;
01522 }
01523 
01524 
01525 /* high level addition (handles signs) */
01526 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01527 {
01528   int sa, sb, res;
01529 
01530   /* get sign of both inputs */
01531   sa = a->sign;
01532   sb = b->sign;
01533 
01534   /* handle two cases, not four */
01535   if (sa == sb) {
01536     /* both positive or both negative */
01537     /* add their magnitudes, copy the sign */
01538     c->sign = sa;
01539     res = s_mp_add (a, b, c);
01540   } else {
01541     /* one positive, the other negative */
01542     /* subtract the one with the greater magnitude from */
01543     /* the one of the lesser magnitude.  The result gets */
01544     /* the sign of the one with the greater magnitude. */
01545     if (mp_cmp_mag (a, b) == MP_LT) {
01546       c->sign = sb;
01547       res = s_mp_sub (b, a, c);
01548     } else {
01549       c->sign = sa;
01550       res = s_mp_sub (a, b, c);
01551     }
01552   }
01553   return res;
01554 }
01555 
01556 
01557 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
01558 int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
01559 {
01560   mp_int *x;
01561   int     olduse, res, min_ab, max_ab;
01562 
01563   /* find sizes, we let |a| <= |b| which means we have to sort
01564    * them.  "x" will point to the input with the most digits
01565    */
01566   if (a->used > b->used) {
01567     min_ab = b->used;
01568     max_ab = a->used;
01569     x = a;
01570   } else {
01571     min_ab = a->used;
01572     max_ab = b->used;
01573     x = b;
01574   }
01575 
01576   /* init result */
01577   if (c->alloc < max_ab + 1) {
01578     if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
01579       return res;
01580     }
01581   }
01582 
01583   /* get old used digit count and set new one */
01584   olduse = c->used;
01585   c->used = max_ab + 1;
01586 
01587   {
01588     mp_digit u, *tmpa, *tmpb, *tmpc;
01589     int i;
01590 
01591     /* alias for digit pointers */
01592 
01593     /* first input */
01594     tmpa = a->dp;
01595 
01596     /* second input */
01597     tmpb = b->dp;
01598 
01599     /* destination */
01600     tmpc = c->dp;
01601 
01602     /* zero the carry */
01603     u = 0;
01604     for (i = 0; i < min_ab; i++) {
01605       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
01606       *tmpc = *tmpa++ + *tmpb++ + u;
01607 
01608       /* U = carry bit of T[i] */
01609       u = *tmpc >> ((mp_digit)DIGIT_BIT);
01610 
01611       /* take away carry bit from T[i] */
01612       *tmpc++ &= MP_MASK;
01613     }
01614 
01615     /* now copy higher words if any, that is in A+B
01616      * if A or B has more digits add those in
01617      */
01618     if (min_ab != max_ab) {
01619       for (; i < max_ab; i++) {
01620         /* T[i] = X[i] + U */
01621         *tmpc = x->dp[i] + u;
01622 
01623         /* U = carry bit of T[i] */
01624         u = *tmpc >> ((mp_digit)DIGIT_BIT);
01625 
01626         /* take away carry bit from T[i] */
01627         *tmpc++ &= MP_MASK;
01628       }
01629     }
01630 
01631     /* add carry */
01632     *tmpc++ = u;
01633 
01634     /* clear digits above olduse */
01635     for (i = c->used; i < olduse; i++) {
01636       *tmpc++ = 0;
01637     }
01638   }
01639 
01640   mp_clamp (c);
01641   return MP_OKAY;
01642 }
01643 
01644 
01645 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
01646 int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
01647 {
01648   int     olduse, res, min_b, max_a;
01649 
01650   /* find sizes */
01651   min_b = b->used;
01652   max_a = a->used;
01653 
01654   /* init result */
01655   if (c->alloc < max_a) {
01656     if ((res = mp_grow (c, max_a)) != MP_OKAY) {
01657       return res;
01658     }
01659   }
01660 
01661   /* sanity check on destination */
01662   if (c->dp == NULL)
01663      return MP_VAL;
01664 
01665   olduse = c->used;
01666   c->used = max_a;
01667 
01668   {
01669     mp_digit u, *tmpa, *tmpb, *tmpc;
01670     int i;
01671 
01672     /* alias for digit pointers */
01673     tmpa = a->dp;
01674     tmpb = b->dp;
01675     tmpc = c->dp;
01676 
01677     /* set carry to zero */
01678     u = 0;
01679     for (i = 0; i < min_b; i++) {
01680       /* T[i] = A[i] - B[i] - U */
01681       *tmpc = *tmpa++ - *tmpb++ - u;
01682 
01683       /* U = carry bit of T[i]
01684        * Note this saves performing an AND operation since
01685        * if a carry does occur it will propagate all the way to the
01686        * MSB.  As a result a single shift is enough to get the carry
01687        */
01688       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01689 
01690       /* Clear carry from T[i] */
01691       *tmpc++ &= MP_MASK;
01692     }
01693 
01694     /* now copy higher words if any, e.g. if A has more digits than B  */
01695     for (; i < max_a; i++) {
01696       /* T[i] = A[i] - U */
01697       *tmpc = *tmpa++ - u;
01698 
01699       /* U = carry bit of T[i] */
01700       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01701 
01702       /* Clear carry from T[i] */
01703       *tmpc++ &= MP_MASK;
01704     }
01705 
01706     /* clear digits above used (since we may not have grown result above) */
01707     for (i = c->used; i < olduse; i++) {
01708       *tmpc++ = 0;
01709     }
01710   }
01711 
01712   mp_clamp (c);
01713   return MP_OKAY;
01714 }
01715 
01716 
01717 /* high level subtraction (handles signs) */
01718 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
01719 {
01720   int     sa, sb, res;
01721 
01722   sa = a->sign;
01723   sb = b->sign;
01724 
01725   if (sa != sb) {
01726     /* subtract a negative from a positive, OR */
01727     /* subtract a positive from a negative. */
01728     /* In either case, ADD their magnitudes, */
01729     /* and use the sign of the first number. */
01730     c->sign = sa;
01731     res = s_mp_add (a, b, c);
01732   } else {
01733     /* subtract a positive from a positive, OR */
01734     /* subtract a negative from a negative. */
01735     /* First, take the difference between their */
01736     /* magnitudes, then... */
01737     if (mp_cmp_mag (a, b) != MP_LT) {
01738       /* Copy the sign from the first */
01739       c->sign = sa;
01740       /* The first has a larger or equal magnitude */
01741       res = s_mp_sub (a, b, c);
01742     } else {
01743       /* The result has the *opposite* sign from */
01744       /* the first number. */
01745       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
01746       /* The second has a larger magnitude */
01747       res = s_mp_sub (b, a, c);
01748     }
01749   }
01750   return res;
01751 }
01752 
01753 
01754 /* determines if reduce_2k_l can be used */
01755 int mp_reduce_is_2k_l(mp_int *a)
01756 {
01757    int ix, iy;
01758 
01759    if (a->used == 0) {
01760       return MP_NO;
01761    } else if (a->used == 1) {
01762       return MP_YES;
01763    } else if (a->used > 1) {
01764       /* if more than half of the digits are -1 we're sold */
01765       for (iy = ix = 0; ix < a->used; ix++) {
01766           if (a->dp[ix] == MP_MASK) {
01767               ++iy;
01768           }
01769       }
01770       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
01771 
01772    }
01773    return MP_NO;
01774 }
01775 
01776 
01777 /* determines if mp_reduce_2k can be used */
01778 int mp_reduce_is_2k(mp_int *a)
01779 {
01780    int ix, iy, iw;
01781    mp_digit iz;
01782 
01783    if (a->used == 0) {
01784       return MP_NO;
01785    } else if (a->used == 1) {
01786       return MP_YES;
01787    } else if (a->used > 1) {
01788       iy = mp_count_bits(a);
01789       iz = 1;
01790       iw = 1;
01791 
01792       /* Test every bit from the second digit up, must be 1 */
01793       for (ix = DIGIT_BIT; ix < iy; ix++) {
01794           if ((a->dp[iw] & iz) == 0) {
01795              return MP_NO;
01796           }
01797           iz <<= 1;
01798           if (iz > (mp_digit)MP_MASK) {
01799              ++iw;
01800              iz = 1;
01801           }
01802       }
01803    }
01804    return MP_YES;
01805 }
01806 
01807 
01808 /* determines if a number is a valid DR modulus */
01809 int mp_dr_is_modulus(mp_int *a)
01810 {
01811    int ix;
01812 
01813    /* must be at least two digits */
01814    if (a->used < 2) {
01815       return 0;
01816    }
01817 
01818    /* must be of the form b**k - a [a <= b] so all
01819     * but the first digit must be equal to -1 (mod b).
01820     */
01821    for (ix = 1; ix < a->used; ix++) {
01822        if (a->dp[ix] != MP_MASK) {
01823           return 0;
01824        }
01825    }
01826    return 1;
01827 }
01828 
01829 
01830 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
01831  *
01832  * Uses a left-to-right k-ary sliding window to compute the modular
01833  * exponentiation.
01834  * The value of k changes based on the size of the exponent.
01835  *
01836  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
01837  */
01838 
01839 #ifdef MP_LOW_MEM
01840    #define TAB_SIZE 32
01841 #else
01842    #define TAB_SIZE 256
01843 #endif
01844 
01845 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
01846                      int redmode)
01847 {
01848   mp_int res;
01849   mp_digit buf, mp;
01850   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01851 #ifdef WOLFSSL_SMALL_STACK
01852   mp_int* M = NULL;
01853 #else
01854   mp_int M[TAB_SIZE];
01855 #endif
01856   /* use a pointer to the reduction algorithm.  This allows us to use
01857    * one of many reduction algorithms without modding the guts of
01858    * the code with if statements everywhere.
01859    */
01860   int     (*redux)(mp_int*,mp_int*,mp_digit);
01861 
01862 #ifdef WOLFSSL_SMALL_STACK
01863   M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
01864                                                        DYNAMIC_TYPE_TMP_BUFFER);
01865   if (M == NULL)
01866     return MP_MEM;
01867 #endif
01868 
01869   /* find window size */
01870   x = mp_count_bits (X);
01871   if (x <= 7) {
01872     winsize = 2;
01873   } else if (x <= 36) {
01874     winsize = 3;
01875   } else if (x <= 140) {
01876     winsize = 4;
01877   } else if (x <= 450) {
01878     winsize = 5;
01879   } else if (x <= 1303) {
01880     winsize = 6;
01881   } else if (x <= 3529) {
01882     winsize = 7;
01883   } else {
01884     winsize = 8;
01885   }
01886 
01887 #ifdef MP_LOW_MEM
01888   if (winsize > 5) {
01889      winsize = 5;
01890   }
01891 #endif
01892 
01893   /* init M array */
01894   /* init first cell */
01895   if ((err = mp_init(&M[1])) != MP_OKAY) {
01896 #ifdef WOLFSSL_SMALL_STACK
01897      XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01898 #endif
01899 
01900      return err;
01901   }
01902 
01903   /* now init the second half of the array */
01904   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01905     if ((err = mp_init(&M[x])) != MP_OKAY) {
01906       for (y = 1<<(winsize-1); y < x; y++) {
01907         mp_clear (&M[y]);
01908       }
01909       mp_clear(&M[1]);
01910 
01911 #ifdef WOLFSSL_SMALL_STACK
01912       XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01913 #endif
01914 
01915       return err;
01916     }
01917   }
01918 
01919   /* determine and setup reduction code */
01920   if (redmode == 0) {
01921 #ifdef BN_MP_MONTGOMERY_SETUP_C
01922      /* now setup montgomery  */
01923      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
01924         goto LBL_M;
01925      }
01926 #else
01927      err = MP_VAL;
01928      goto LBL_M;
01929 #endif
01930 
01931      /* automatically pick the comba one if available (saves quite a few
01932         calls/ifs) */
01933 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
01934      if (((P->used * 2 + 1) < MP_WARRAY) &&
01935           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
01936         redux = fast_mp_montgomery_reduce;
01937      } else
01938 #endif
01939      {
01940 #ifdef BN_MP_MONTGOMERY_REDUCE_C
01941         /* use slower baseline Montgomery method */
01942         redux = mp_montgomery_reduce;
01943 #else
01944         err = MP_VAL;
01945         goto LBL_M;
01946 #endif
01947      }
01948   } else if (redmode == 1) {
01949 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
01950      /* setup DR reduction for moduli of the form B**k - b */
01951      mp_dr_setup(P, &mp);
01952      redux = mp_dr_reduce;
01953 #else
01954      err = MP_VAL;
01955      goto LBL_M;
01956 #endif
01957   } else {
01958 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
01959      /* setup DR reduction for moduli of the form 2**k - b */
01960      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
01961         goto LBL_M;
01962      }
01963      redux = mp_reduce_2k;
01964 #else
01965      err = MP_VAL;
01966      goto LBL_M;
01967 #endif
01968   }
01969 
01970   /* setup result */
01971   if ((err = mp_init (&res)) != MP_OKAY) {
01972     goto LBL_M;
01973   }
01974 
01975   /* create M table
01976    *
01977 
01978    *
01979    * The first half of the table is not computed though accept for M[0] and M[1]
01980    */
01981 
01982   if (redmode == 0) {
01983 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
01984      /* now we need R mod m */
01985      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
01986        goto LBL_RES;
01987      }
01988 #else
01989      err = MP_VAL;
01990      goto LBL_RES;
01991 #endif
01992 
01993      /* now set M[1] to G * R mod m */
01994      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
01995        goto LBL_RES;
01996      }
01997   } else {
01998      if ((err = mp_set(&res, 1)) != MP_OKAY) {
01999         goto LBL_RES;
02000      }
02001      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
02002         goto LBL_RES;
02003      }
02004   }
02005 
02006   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
02007   if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
02008     goto LBL_RES;
02009   }
02010 
02011   for (x = 0; x < (winsize - 1); x++) {
02012     if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
02013                        &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
02014       goto LBL_RES;
02015     }
02016     if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
02017       goto LBL_RES;
02018     }
02019   }
02020 
02021   /* create upper table */
02022   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
02023     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
02024       goto LBL_RES;
02025     }
02026     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
02027       goto LBL_RES;
02028     }
02029   }
02030 
02031   /* set initial mode and bit cnt */
02032   mode   = 0;
02033   bitcnt = 1;
02034   buf    = 0;
02035   digidx = X->used - 1;
02036   bitcpy = 0;
02037   bitbuf = 0;
02038 
02039   for (;;) {
02040     /* grab next digit as required */
02041     if (--bitcnt == 0) {
02042       /* if digidx == -1 we are out of digits so break */
02043       if (digidx == -1) {
02044         break;
02045       }
02046       /* read next digit and reset bitcnt */
02047       buf    = X->dp[digidx--];
02048       bitcnt = (int)DIGIT_BIT;
02049     }
02050 
02051     /* grab the next msb from the exponent */
02052     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
02053     buf <<= (mp_digit)1;
02054 
02055     /* if the bit is zero and mode == 0 then we ignore it
02056      * These represent the leading zero bits before the first 1 bit
02057      * in the exponent.  Technically this opt is not required but it
02058      * does lower the # of trivial squaring/reductions used
02059      */
02060     if (mode == 0 && y == 0) {
02061       continue;
02062     }
02063 
02064     /* if the bit is zero and mode == 1 then we square */
02065     if (mode == 1 && y == 0) {
02066       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02067         goto LBL_RES;
02068       }
02069       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02070         goto LBL_RES;
02071       }
02072       continue;
02073     }
02074 
02075     /* else we add it to the window */
02076     bitbuf |= (y << (winsize - ++bitcpy));
02077     mode    = 2;
02078 
02079     if (bitcpy == winsize) {
02080       /* ok window is filled so square as required and multiply  */
02081       /* square first */
02082       for (x = 0; x < winsize; x++) {
02083         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02084           goto LBL_RES;
02085         }
02086         if ((err = redux (&res, P, mp)) != MP_OKAY) {
02087           goto LBL_RES;
02088         }
02089       }
02090 
02091       /* then multiply */
02092       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
02093         goto LBL_RES;
02094       }
02095       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02096         goto LBL_RES;
02097       }
02098 
02099       /* empty window and reset */
02100       bitcpy = 0;
02101       bitbuf = 0;
02102       mode   = 1;
02103     }
02104   }
02105 
02106   /* if bits remain then square/multiply */
02107   if (mode == 2 && bitcpy > 0) {
02108     /* square then multiply if the bit is set */
02109     for (x = 0; x < bitcpy; x++) {
02110       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02111         goto LBL_RES;
02112       }
02113       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02114         goto LBL_RES;
02115       }
02116 
02117       /* get next bit of the window */
02118       bitbuf <<= 1;
02119       if ((bitbuf & (1 << winsize)) != 0) {
02120         /* then multiply */
02121         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
02122           goto LBL_RES;
02123         }
02124         if ((err = redux (&res, P, mp)) != MP_OKAY) {
02125           goto LBL_RES;
02126         }
02127       }
02128     }
02129   }
02130 
02131   if (redmode == 0) {
02132      /* fixup result if Montgomery reduction is used
02133       * recall that any value in a Montgomery system is
02134       * actually multiplied by R mod n.  So we have
02135       * to reduce one more time to cancel out the factor
02136       * of R.
02137       */
02138      if ((err = redux(&res, P, mp)) != MP_OKAY) {
02139        goto LBL_RES;
02140      }
02141   }
02142 
02143   /* swap res with Y */
02144   mp_exch (&res, Y);
02145   err = MP_OKAY;
02146 LBL_RES:mp_clear (&res);
02147 LBL_M:
02148   mp_clear(&M[1]);
02149   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
02150     mp_clear (&M[x]);
02151   }
02152 
02153 #ifdef WOLFSSL_SMALL_STACK
02154   XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02155 #endif
02156 
02157   return err;
02158 }
02159 
02160 
02161 /* setups the montgomery reduction stuff */
02162 int mp_montgomery_setup (mp_int * n, mp_digit * rho)
02163 {
02164   mp_digit x, b;
02165 
02166 /* fast inversion mod 2**k
02167  *
02168  * Based on the fact that
02169  *
02170  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
02171  *                    =>  2*X*A - X*X*A*A = 1
02172  *                    =>  2*(1) - (1)     = 1
02173  */
02174   b = n->dp[0];
02175 
02176   if ((b & 1) == 0) {
02177     return MP_VAL;
02178   }
02179 
02180   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
02181   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
02182 #if !defined(MP_8BIT)
02183   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02184 #endif
02185 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
02186   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02187 #endif
02188 #ifdef MP_64BIT
02189   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
02190 #endif
02191 
02192   /* rho = -1/m mod b */
02193   /* TAO, switched mp_word casts to mp_digit to shut up compiler */
02194   *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
02195 
02196   return MP_OKAY;
02197 }
02198 
02199 
02200 /* computes xR**-1 == x (mod N) via Montgomery Reduction
02201  *
02202  * This is an optimized implementation of montgomery_reduce
02203  * which uses the comba method to quickly calculate the columns of the
02204  * reduction.
02205  *
02206  * Based on Algorithm 14.32 on pp.601 of HAC.
02207 */
02208 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02209 {
02210   int     ix, res, olduse;
02211 #ifdef WOLFSSL_SMALL_STACK
02212   mp_word* W;    /* uses dynamic memory and slower */
02213 #else
02214   mp_word W[MP_WARRAY];
02215 #endif
02216 
02217   /* get old used count */
02218   olduse = x->used;
02219 
02220   /* grow a as required */
02221   if (x->alloc < n->used + 1) {
02222     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
02223       return res;
02224     }
02225   }
02226 
02227 #ifdef WOLFSSL_SMALL_STACK
02228   W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
02229   if (W == NULL)
02230     return MP_MEM;
02231 #endif
02232 
02233   /* first we have to get the digits of the input into
02234    * an array of double precision words W[...]
02235    */
02236   {
02237     mp_word *_W;
02238     mp_digit *tmpx;
02239 
02240     /* alias for the W[] array */
02241     _W   = W;
02242 
02243     /* alias for the digits of  x*/
02244     tmpx = x->dp;
02245 
02246     /* copy the digits of a into W[0..a->used-1] */
02247     for (ix = 0; ix < x->used; ix++) {
02248       *_W++ = *tmpx++;
02249     }
02250 
02251     /* zero the high words of W[a->used..m->used*2] */
02252     for (; ix < n->used * 2 + 1; ix++) {
02253       *_W++ = 0;
02254     }
02255   }
02256 
02257   /* now we proceed to zero successive digits
02258    * from the least significant upwards
02259    */
02260   for (ix = 0; ix < n->used; ix++) {
02261     /* mu = ai * m' mod b
02262      *
02263      * We avoid a double precision multiplication (which isn't required)
02264      * by casting the value down to a mp_digit.  Note this requires
02265      * that W[ix-1] have  the carry cleared (see after the inner loop)
02266      */
02267     mp_digit mu;
02268     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
02269 
02270     /* a = a + mu * m * b**i
02271      *
02272      * This is computed in place and on the fly.  The multiplication
02273      * by b**i is handled by offseting which columns the results
02274      * are added to.
02275      *
02276      * Note the comba method normally doesn't handle carries in the
02277      * inner loop In this case we fix the carry from the previous
02278      * column since the Montgomery reduction requires digits of the
02279      * result (so far) [see above] to work.  This is
02280      * handled by fixing up one carry after the inner loop.  The
02281      * carry fixups are done in order so after these loops the
02282      * first m->used words of W[] have the carries fixed
02283      */
02284     {
02285       int iy;
02286       mp_digit *tmpn;
02287       mp_word *_W;
02288 
02289       /* alias for the digits of the modulus */
02290       tmpn = n->dp;
02291 
02292       /* Alias for the columns set by an offset of ix */
02293       _W = W + ix;
02294 
02295       /* inner loop */
02296       for (iy = 0; iy < n->used; iy++) {
02297           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
02298       }
02299     }
02300 
02301     /* now fix carry for next digit, W[ix+1] */
02302     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
02303   }
02304 
02305   /* now we have to propagate the carries and
02306    * shift the words downward [all those least
02307    * significant digits we zeroed].
02308    */
02309   {
02310     mp_digit *tmpx;
02311     mp_word *_W, *_W1;
02312 
02313     /* nox fix rest of carries */
02314 
02315     /* alias for current word */
02316     _W1 = W + ix;
02317 
02318     /* alias for next word, where the carry goes */
02319     _W = W + ++ix;
02320 
02321     for (; ix <= n->used * 2 + 1; ix++) {
02322       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
02323     }
02324 
02325     /* copy out, A = A/b**n
02326      *
02327      * The result is A/b**n but instead of converting from an
02328      * array of mp_word to mp_digit than calling mp_rshd
02329      * we just copy them in the right order
02330      */
02331 
02332     /* alias for destination word */
02333     tmpx = x->dp;
02334 
02335     /* alias for shifted double precision result */
02336     _W = W + n->used;
02337 
02338     for (ix = 0; ix < n->used + 1; ix++) {
02339       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
02340     }
02341 
02342     /* zero olduse digits, if the input a was larger than
02343      * m->used+1 we'll have to clear the digits
02344      */
02345     for (; ix < olduse; ix++) {
02346       *tmpx++ = 0;
02347     }
02348   }
02349 
02350   /* set the max used and clamp */
02351   x->used = n->used + 1;
02352   mp_clamp (x);
02353 
02354 #ifdef WOLFSSL_SMALL_STACK
02355   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
02356 #endif
02357 
02358   /* if A >= m then A = A - m */
02359   if (mp_cmp_mag (x, n) != MP_LT) {
02360     return s_mp_sub (x, n, x);
02361   }
02362   return MP_OKAY;
02363 }
02364 
02365 
02366 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
02367 int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02368 {
02369   int     ix, res, digs;
02370   mp_digit mu;
02371 
02372   /* can the fast reduction [comba] method be used?
02373    *
02374    * Note that unlike in mul you're safely allowed *less*
02375    * than the available columns [255 per default] since carries
02376    * are fixed up in the inner loop.
02377    */
02378   digs = n->used * 2 + 1;
02379   if ((digs < MP_WARRAY) &&
02380       n->used <
02381       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02382     return fast_mp_montgomery_reduce (x, n, rho);
02383   }
02384 
02385   /* grow the input as required */
02386   if (x->alloc < digs) {
02387     if ((res = mp_grow (x, digs)) != MP_OKAY) {
02388       return res;
02389     }
02390   }
02391   x->used = digs;
02392 
02393   for (ix = 0; ix < n->used; ix++) {
02394     /* mu = ai * rho mod b
02395      *
02396      * The value of rho must be precalculated via
02397      * montgomery_setup() such that
02398      * it equals -1/n0 mod b this allows the
02399      * following inner loop to reduce the
02400      * input one digit at a time
02401      */
02402     mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
02403 
02404     /* a = a + mu * m * b**i */
02405     {
02406       int iy;
02407       mp_digit *tmpn, *tmpx, u;
02408       mp_word r;
02409 
02410       /* alias for digits of the modulus */
02411       tmpn = n->dp;
02412 
02413       /* alias for the digits of x [the input] */
02414       tmpx = x->dp + ix;
02415 
02416       /* set the carry to zero */
02417       u = 0;
02418 
02419       /* Multiply and add in place */
02420       for (iy = 0; iy < n->used; iy++) {
02421         /* compute product and sum */
02422         r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
02423                   ((mp_word) u) + ((mp_word) * tmpx);
02424 
02425         /* get carry */
02426         u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02427 
02428         /* fix digit */
02429         *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
02430       }
02431       /* At this point the ix'th digit of x should be zero */
02432 
02433 
02434       /* propagate carries upwards as required*/
02435       while (u) {
02436         *tmpx   += u;
02437         u        = *tmpx >> DIGIT_BIT;
02438         *tmpx++ &= MP_MASK;
02439       }
02440     }
02441   }
02442 
02443   /* at this point the n.used'th least
02444    * significant digits of x are all zero
02445    * which means we can shift x to the
02446    * right by n.used digits and the
02447    * residue is unchanged.
02448    */
02449 
02450   /* x = x/b**n.used */
02451   mp_clamp(x);
02452   mp_rshd (x, n->used);
02453 
02454   /* if x >= n then x = x - n */
02455   if (mp_cmp_mag (x, n) != MP_LT) {
02456     return s_mp_sub (x, n, x);
02457   }
02458 
02459   return MP_OKAY;
02460 }
02461 
02462 
02463 /* determines the setup value */
02464 void mp_dr_setup(mp_int *a, mp_digit *d)
02465 {
02466    /* the casts are required if DIGIT_BIT is one less than
02467     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
02468     */
02469    *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
02470         ((mp_word)a->dp[0]));
02471 }
02472 
02473 
02474 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
02475  *
02476  * Based on algorithm from the paper
02477  *
02478  * "Generating Efficient Primes for Discrete Log Cryptosystems"
02479  *                 Chae Hoon Lim, Pil Joong Lee,
02480  *          POSTECH Information Research Laboratories
02481  *
02482  * The modulus must be of a special format [see manual]
02483  *
02484  * Has been modified to use algorithm 7.10 from the LTM book instead
02485  *
02486  * Input x must be in the range 0 <= x <= (n-1)**2
02487  */
02488 int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
02489 {
02490   int      err, i, m;
02491   mp_word  r;
02492   mp_digit mu, *tmpx1, *tmpx2;
02493 
02494   /* m = digits in modulus */
02495   m = n->used;
02496 
02497   /* ensure that "x" has at least 2m digits */
02498   if (x->alloc < m + m) {
02499     if ((err = mp_grow (x, m + m)) != MP_OKAY) {
02500       return err;
02501     }
02502   }
02503 
02504 /* top of loop, this is where the code resumes if
02505  * another reduction pass is required.
02506  */
02507 top:
02508   /* aliases for digits */
02509   /* alias for lower half of x */
02510   tmpx1 = x->dp;
02511 
02512   /* alias for upper half of x, or x/B**m */
02513   tmpx2 = x->dp + m;
02514 
02515   /* set carry to zero */
02516   mu = 0;
02517 
02518   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
02519   for (i = 0; i < m; i++) {
02520       r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
02521       *tmpx1++  = (mp_digit)(r & MP_MASK);
02522       mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
02523   }
02524 
02525   /* set final carry */
02526   *tmpx1++ = mu;
02527 
02528   /* zero words above m */
02529   for (i = m + 1; i < x->used; i++) {
02530       *tmpx1++ = 0;
02531   }
02532 
02533   /* clamp, sub and return */
02534   mp_clamp (x);
02535 
02536   /* if x >= n then subtract and reduce again
02537    * Each successive "recursion" makes the input smaller and smaller.
02538    */
02539   if (mp_cmp_mag (x, n) != MP_LT) {
02540     s_mp_sub(x, n, x);
02541     goto top;
02542   }
02543   return MP_OKAY;
02544 }
02545 
02546 
02547 /* reduces a modulo n where n is of the form 2**p - d */
02548 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
02549 {
02550    mp_int q;
02551    int    p, res;
02552 
02553    if ((res = mp_init(&q)) != MP_OKAY) {
02554       return res;
02555    }
02556 
02557    p = mp_count_bits(n);
02558 top:
02559    /* q = a/2**p, a = a mod 2**p */
02560    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
02561       goto ERR;
02562    }
02563 
02564    if (d != 1) {
02565       /* q = q * d */
02566       if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
02567          goto ERR;
02568       }
02569    }
02570 
02571    /* a = a + q */
02572    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
02573       goto ERR;
02574    }
02575 
02576    if (mp_cmp_mag(a, n) != MP_LT) {
02577       s_mp_sub(a, n, a);
02578       goto top;
02579    }
02580 
02581 ERR:
02582    mp_clear(&q);
02583    return res;
02584 }
02585 
02586 
02587 /* determines the setup value */
02588 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
02589 {
02590    int res, p;
02591    mp_int tmp;
02592 
02593    if ((res = mp_init(&tmp)) != MP_OKAY) {
02594       return res;
02595    }
02596 
02597    p = mp_count_bits(a);
02598    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
02599       mp_clear(&tmp);
02600       return res;
02601    }
02602 
02603    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
02604       mp_clear(&tmp);
02605       return res;
02606    }
02607 
02608    *d = tmp.dp[0];
02609    mp_clear(&tmp);
02610    return MP_OKAY;
02611 }
02612 
02613 
02614 /* set the b bit of a */
02615 int mp_set_bit (mp_int * a, int b)
02616 {
02617     int i = b / DIGIT_BIT, res;
02618 
02619     if (a->used < (int)(i + 1)) {
02620         /* grow a to accommodate the single bit */
02621         if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
02622             return res;
02623         }
02624 
02625         /* set the used count of where the bit will go */
02626         a->used = (int)(i + 1);
02627     }
02628 
02629     /* put the single bit in its place */
02630     a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT);
02631 
02632     return MP_OKAY;
02633 }
02634 
02635 /* computes a = 2**b
02636  *
02637  * Simple algorithm which zeros the int, set the required bit
02638  */
02639 int mp_2expt (mp_int * a, int b)
02640 {
02641     /* zero a as per default */
02642     mp_zero (a);
02643 
02644     return mp_set_bit(a, b);
02645 }
02646 
02647 /* multiply by a digit */
02648 int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02649 {
02650   mp_digit u, *tmpa, *tmpc;
02651   mp_word  r;
02652   int      ix, res, olduse;
02653 
02654   /* make sure c is big enough to hold a*b */
02655   if (c->alloc < a->used + 1) {
02656     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
02657       return res;
02658     }
02659   }
02660 
02661   /* get the original destinations used count */
02662   olduse = c->used;
02663 
02664   /* set the sign */
02665   c->sign = a->sign;
02666 
02667   /* alias for a->dp [source] */
02668   tmpa = a->dp;
02669 
02670   /* alias for c->dp [dest] */
02671   tmpc = c->dp;
02672 
02673   /* zero carry */
02674   u = 0;
02675 
02676   /* compute columns */
02677   for (ix = 0; ix < a->used; ix++) {
02678     /* compute product and carry sum for this term */
02679     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
02680 
02681     /* mask off higher bits to get a single digit */
02682     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
02683 
02684     /* send carry into next iteration */
02685     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02686   }
02687 
02688   /* store final carry [if any] and increment ix offset  */
02689   *tmpc++ = u;
02690   ++ix;
02691 
02692   /* now zero digits above the top */
02693   while (ix++ < olduse) {
02694      *tmpc++ = 0;
02695   }
02696 
02697   /* set used count */
02698   c->used = a->used + 1;
02699   mp_clamp(c);
02700 
02701   return MP_OKAY;
02702 }
02703 
02704 
02705 /* d = a * b (mod c) */
02706 #if defined(FREESCALE_LTC_TFM)
02707 int wolfcrypt_mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
02708 #else
02709 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02710 #endif
02711 {
02712   int     res;
02713   mp_int  t;
02714 
02715   if ((res = mp_init (&t)) != MP_OKAY) {
02716     return res;
02717   }
02718 
02719   res = mp_mul (a, b, &t);
02720   if (res == MP_OKAY) {
02721       res = mp_mod (&t, c, d);
02722   }
02723 
02724   mp_clear (&t);
02725   return res;
02726 }
02727 
02728 
02729 /* d = a - b (mod c) */
02730 int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
02731 {
02732   int     res;
02733   mp_int  t;
02734 
02735   if ((res = mp_init (&t)) != MP_OKAY) {
02736     return res;
02737   }
02738 
02739   res = mp_sub (a, b, &t);
02740   if (res == MP_OKAY) {
02741       res = mp_mod (&t, c, d);
02742   }
02743 
02744   mp_clear (&t);
02745 
02746   return res;
02747 }
02748 
02749 /* d = a + b (mod c) */
02750 int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
02751 {
02752    int     res;
02753    mp_int  t;
02754 
02755    if ((res = mp_init (&t)) != MP_OKAY) {
02756      return res;
02757    }
02758 
02759    res = mp_add (a, b, &t);
02760    if (res == MP_OKAY) {
02761        res = mp_mod (&t, c, d);
02762    }
02763 
02764    mp_clear (&t);
02765 
02766    return res;
02767 }
02768 
02769 /* computes b = a*a */
02770 int mp_sqr (mp_int * a, mp_int * b)
02771 {
02772   int     res;
02773 
02774   {
02775 #ifdef BN_FAST_S_MP_SQR_C
02776     /* can we use the fast comba multiplier? */
02777     if ((a->used * 2 + 1) < MP_WARRAY &&
02778          a->used <
02779          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
02780       res = fast_s_mp_sqr (a, b);
02781     } else
02782 #endif
02783 #ifdef BN_S_MP_SQR_C
02784       res = s_mp_sqr (a, b);
02785 #else
02786       res = MP_VAL;
02787 #endif
02788   }
02789   b->sign = MP_ZPOS;
02790   return res;
02791 }
02792 
02793 
02794 /* high level multiplication (handles sign) */
02795 #if defined(FREESCALE_LTC_TFM)
02796 int wolfcrypt_mp_mul(mp_int *a, mp_int *b, mp_int *c)
02797 #else
02798 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02799 #endif
02800 {
02801   int     res, neg;
02802   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
02803 
02804   {
02805     /* can we use the fast multiplier?
02806      *
02807      * The fast multiplier can be used if the output will
02808      * have less than MP_WARRAY digits and the number of
02809      * digits won't affect carry propagation
02810      */
02811     int     digs = a->used + b->used + 1;
02812 
02813 #ifdef BN_FAST_S_MP_MUL_DIGS_C
02814     if ((digs < MP_WARRAY) &&
02815         MIN(a->used, b->used) <=
02816         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02817       res = fast_s_mp_mul_digs (a, b, c, digs);
02818     } else
02819 #endif
02820 #ifdef BN_S_MP_MUL_DIGS_C
02821       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
02822 #else
02823       res = MP_VAL;
02824 #endif
02825 
02826   }
02827   c->sign = (c->used > 0) ? neg : MP_ZPOS;
02828   return res;
02829 }
02830 
02831 
02832 /* b = a*2 */
02833 int mp_mul_2(mp_int * a, mp_int * b)
02834 {
02835   int     x, res, oldused;
02836 
02837   /* grow to accommodate result */
02838   if (b->alloc < a->used + 1) {
02839     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
02840       return res;
02841     }
02842   }
02843 
02844   oldused = b->used;
02845   b->used = a->used;
02846 
02847   {
02848     mp_digit r, rr, *tmpa, *tmpb;
02849 
02850     /* alias for source */
02851     tmpa = a->dp;
02852 
02853     /* alias for dest */
02854     tmpb = b->dp;
02855 
02856     /* carry */
02857     r = 0;
02858     for (x = 0; x < a->used; x++) {
02859 
02860       /* get what will be the *next* carry bit from the
02861        * MSB of the current digit
02862        */
02863       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
02864 
02865       /* now shift up this digit, add in the carry [from the previous] */
02866       *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK);
02867 
02868       /* copy the carry that would be from the source
02869        * digit into the next iteration
02870        */
02871       r = rr;
02872     }
02873 
02874     /* new leading digit? */
02875     if (r != 0) {
02876       /* add a MSB which is always 1 at this point */
02877       *tmpb = 1;
02878       ++(b->used);
02879     }
02880 
02881     /* now zero any excess digits on the destination
02882      * that we didn't write to
02883      */
02884     tmpb = b->dp + b->used;
02885     for (x = b->used; x < oldused; x++) {
02886       *tmpb++ = 0;
02887     }
02888   }
02889   b->sign = a->sign;
02890   return MP_OKAY;
02891 }
02892 
02893 
02894 /* divide by three (based on routine from MPI and the GMP manual) */
02895 int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
02896 {
02897   mp_int   q;
02898   mp_word  w, t;
02899   mp_digit b;
02900   int      res, ix;
02901 
02902   /* b = 2**DIGIT_BIT / 3 */
02903   b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
02904 
02905   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
02906      return res;
02907   }
02908 
02909   q.used = a->used;
02910   q.sign = a->sign;
02911   w = 0;
02912   for (ix = a->used - 1; ix >= 0; ix--) {
02913      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
02914 
02915      if (w >= 3) {
02916         /* multiply w by [1/3] */
02917         t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
02918 
02919         /* now subtract 3 * [w/3] from w, to get the remainder */
02920         w -= t+t+t;
02921 
02922         /* fixup the remainder as required since
02923          * the optimization is not exact.
02924          */
02925         while (w >= 3) {
02926            t += 1;
02927            w -= 3;
02928         }
02929       } else {
02930         t = 0;
02931       }
02932       q.dp[ix] = (mp_digit)t;
02933   }
02934 
02935   /* [optional] store the remainder */
02936   if (d != NULL) {
02937      *d = (mp_digit)w;
02938   }
02939 
02940   /* [optional] store the quotient */
02941   if (c != NULL) {
02942      mp_clamp(&q);
02943      mp_exch(&q, c);
02944   }
02945   mp_clear(&q);
02946 
02947   return res;
02948 }
02949 
02950 
02951 /* init an mp_init for a given size */
02952 int mp_init_size (mp_int * a, int size)
02953 {
02954   int x;
02955 
02956   /* pad size so there are always extra digits */
02957   size += (MP_PREC * 2) - (size % MP_PREC);
02958 
02959   /* alloc mem */
02960   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
02961                                       DYNAMIC_TYPE_BIGINT);
02962   if (a->dp == NULL) {
02963     return MP_MEM;
02964   }
02965 
02966   /* set the members */
02967   a->used  = 0;
02968   a->alloc = size;
02969   a->sign  = MP_ZPOS;
02970 #ifdef HAVE_WOLF_BIGINT
02971   wc_bigint_init(&a->raw);
02972 #endif
02973 
02974   /* zero the digits */
02975   for (x = 0; x < size; x++) {
02976       a->dp[x] = 0;
02977   }
02978 
02979   return MP_OKAY;
02980 }
02981 
02982 
02983 /* the jist of squaring...
02984  * you do like mult except the offset of the tmpx [one that
02985  * starts closer to zero] can't equal the offset of tmpy.
02986  * So basically you set up iy like before then you min it with
02987  * (ty-tx) so that it never happens.  You double all those
02988  * you add in the inner loop
02989 
02990 After that loop you do the squares and add them in.
02991 */
02992 
02993 int fast_s_mp_sqr (mp_int * a, mp_int * b)
02994 {
02995   int       olduse, res, pa, ix, iz;
02996 #ifdef WOLFSSL_SMALL_STACK
02997   mp_digit* W;    /* uses dynamic memory and slower */
02998 #else
02999   mp_digit W[MP_WARRAY];
03000 #endif
03001   mp_digit  *tmpx;
03002   mp_word   W1;
03003 
03004   /* grow the destination as required */
03005   pa = a->used + a->used;
03006   if (b->alloc < pa) {
03007     if ((res = mp_grow (b, pa)) != MP_OKAY) {
03008       return res;
03009     }
03010   }
03011 
03012   if (pa > MP_WARRAY)
03013     return MP_RANGE;  /* TAO range check */
03014 
03015 #ifdef WOLFSSL_SMALL_STACK
03016   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
03017   if (W == NULL)
03018     return MP_MEM;
03019 #endif
03020 
03021   /* number of output digits to produce */
03022   W1 = 0;
03023   for (ix = 0; ix < pa; ix++) {
03024       int      tx, ty, iy;
03025       mp_word  _W;
03026       mp_digit *tmpy;
03027 
03028       /* clear counter */
03029       _W = 0;
03030 
03031       /* get offsets into the two bignums */
03032       ty = MIN(a->used-1, ix);
03033       tx = ix - ty;
03034 
03035       /* setup temp aliases */
03036       tmpx = a->dp + tx;
03037       tmpy = a->dp + ty;
03038 
03039       /* this is the number of times the loop will iterate, essentially
03040          while (tx++ < a->used && ty-- >= 0) { ... }
03041        */
03042       iy = MIN(a->used-tx, ty+1);
03043 
03044       /* now for squaring tx can never equal ty
03045        * we halve the distance since they approach at a rate of 2x
03046        * and we have to round because odd cases need to be executed
03047        */
03048       iy = MIN(iy, (ty-tx+1)>>1);
03049 
03050       /* execute loop */
03051       for (iz = 0; iz < iy; iz++) {
03052          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03053       }
03054 
03055       /* double the inner product and add carry */
03056       _W = _W + _W + W1;
03057 
03058       /* even columns have the square term in them */
03059       if ((ix&1) == 0) {
03060          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
03061       }
03062 
03063       /* store it */
03064       W[ix] = (mp_digit)(_W & MP_MASK);
03065 
03066       /* make next carry */
03067       W1 = _W >> ((mp_word)DIGIT_BIT);
03068   }
03069 
03070   /* setup dest */
03071   olduse  = b->used;
03072   b->used = a->used+a->used;
03073 
03074   {
03075     mp_digit *tmpb;
03076     tmpb = b->dp;
03077     for (ix = 0; ix < pa; ix++) {
03078       *tmpb++ = (mp_digit)(W[ix] & MP_MASK);
03079     }
03080 
03081     /* clear unused digits [that existed in the old copy of c] */
03082     for (; ix < olduse; ix++) {
03083       *tmpb++ = 0;
03084     }
03085   }
03086   mp_clamp (b);
03087 
03088 #ifdef WOLFSSL_SMALL_STACK
03089   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
03090 #endif
03091 
03092   return MP_OKAY;
03093 }
03094 
03095 
03096 /* Fast (comba) multiplier
03097  *
03098  * This is the fast column-array [comba] multiplier.  It is
03099  * designed to compute the columns of the product first
03100  * then handle the carries afterwards.  This has the effect
03101  * of making the nested loops that compute the columns very
03102  * simple and schedulable on super-scalar processors.
03103  *
03104  * This has been modified to produce a variable number of
03105  * digits of output so if say only a half-product is required
03106  * you don't have to compute the upper half (a feature
03107  * required for fast Barrett reduction).
03108  *
03109  * Based on Algorithm 14.12 on pp.595 of HAC.
03110  *
03111  */
03112 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03113 {
03114   int     olduse, res, pa, ix, iz;
03115 #ifdef WOLFSSL_SMALL_STACK
03116   mp_digit* W;    /* uses dynamic memory and slower */
03117 #else
03118   mp_digit W[MP_WARRAY];
03119 #endif
03120   mp_word  _W;
03121 
03122   /* grow the destination as required */
03123   if (c->alloc < digs) {
03124     if ((res = mp_grow (c, digs)) != MP_OKAY) {
03125       return res;
03126     }
03127   }
03128 
03129   /* number of output digits to produce */
03130   pa = MIN(digs, a->used + b->used);
03131   if (pa > MP_WARRAY)
03132     return MP_RANGE;  /* TAO range check */
03133 
03134 #ifdef WOLFSSL_SMALL_STACK
03135   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
03136   if (W == NULL)
03137     return MP_MEM;
03138 #endif
03139 
03140   /* clear the carry */
03141   _W = 0;
03142   for (ix = 0; ix < pa; ix++) {
03143       int      tx, ty;
03144       int      iy;
03145       mp_digit *tmpx, *tmpy;
03146 
03147       /* get offsets into the two bignums */
03148       ty = MIN(b->used-1, ix);
03149       tx = ix - ty;
03150 
03151       /* setup temp aliases */
03152       tmpx = a->dp + tx;
03153       tmpy = b->dp + ty;
03154 
03155       /* this is the number of times the loop will iterate, essentially
03156          while (tx++ < a->used && ty-- >= 0) { ... }
03157        */
03158       iy = MIN(a->used-tx, ty+1);
03159 
03160       /* execute loop */
03161       for (iz = 0; iz < iy; ++iz) {
03162          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03163 
03164       }
03165 
03166       /* store term */
03167       W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
03168 
03169       /* make next carry */
03170       _W = _W >> ((mp_word)DIGIT_BIT);
03171  }
03172 
03173   /* setup dest */
03174   olduse  = c->used;
03175   c->used = pa;
03176 
03177   {
03178     mp_digit *tmpc;
03179     tmpc = c->dp;
03180     for (ix = 0; ix < pa+1; ix++) {
03181       /* now extract the previous digit [below the carry] */
03182       *tmpc++ = W[ix];
03183     }
03184 
03185     /* clear unused digits [that existed in the old copy of c] */
03186     for (; ix < olduse; ix++) {
03187       *tmpc++ = 0;
03188     }
03189   }
03190   mp_clamp (c);
03191 
03192 #ifdef WOLFSSL_SMALL_STACK
03193   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
03194 #endif
03195 
03196   return MP_OKAY;
03197 }
03198 
03199 
03200 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
03201 int s_mp_sqr (mp_int * a, mp_int * b)
03202 {
03203   mp_int  t;
03204   int     res, ix, iy, pa;
03205   mp_word r;
03206   mp_digit u, tmpx, *tmpt;
03207 
03208   pa = a->used;
03209   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
03210     return res;
03211   }
03212 
03213   /* default used is maximum possible size */
03214   t.used = 2*pa + 1;
03215 
03216   for (ix = 0; ix < pa; ix++) {
03217     /* first calculate the digit at 2*ix */
03218     /* calculate double precision result */
03219     r = ((mp_word) t.dp[2*ix]) +
03220         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
03221 
03222     /* store lower part in result */
03223     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
03224 
03225     /* get the carry */
03226     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03227 
03228     /* left hand side of A[ix] * A[iy] */
03229     tmpx        = a->dp[ix];
03230 
03231     /* alias for where to store the results */
03232     tmpt        = t.dp + (2*ix + 1);
03233 
03234     for (iy = ix + 1; iy < pa; iy++) {
03235       /* first calculate the product */
03236       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
03237 
03238       /* now calculate the double precision result, note we use
03239        * addition instead of *2 since it's easier to optimize
03240        */
03241       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
03242 
03243       /* store lower part */
03244       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03245 
03246       /* get carry */
03247       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03248     }
03249     /* propagate upwards */
03250     while (u != ((mp_digit) 0)) {
03251       r       = ((mp_word) *tmpt) + ((mp_word) u);
03252       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03253       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03254     }
03255   }
03256 
03257   mp_clamp (&t);
03258   mp_exch (&t, b);
03259   mp_clear (&t);
03260   return MP_OKAY;
03261 }
03262 
03263 
03264 /* multiplies |a| * |b| and only computes up to digs digits of result
03265  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
03266  * many digits of output are created.
03267  */
03268 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03269 {
03270   mp_int  t;
03271   int     res, pa, pb, ix, iy;
03272   mp_digit u;
03273   mp_word r;
03274   mp_digit tmpx, *tmpt, *tmpy;
03275 
03276   /* can we use the fast multiplier? */
03277   if (((digs) < MP_WARRAY) &&
03278       MIN (a->used, b->used) <
03279           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03280     return fast_s_mp_mul_digs (a, b, c, digs);
03281   }
03282 
03283   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
03284     return res;
03285   }
03286   t.used = digs;
03287 
03288   /* compute the digits of the product directly */
03289   pa = a->used;
03290   for (ix = 0; ix < pa; ix++) {
03291     /* set the carry to zero */
03292     u = 0;
03293 
03294     /* limit ourselves to making digs digits of output */
03295     pb = MIN (b->used, digs - ix);
03296 
03297     /* setup some aliases */
03298     /* copy of the digit from a used within the nested loop */
03299     tmpx = a->dp[ix];
03300 
03301     /* an alias for the destination shifted ix places */
03302     tmpt = t.dp + ix;
03303 
03304     /* an alias for the digits of b */
03305     tmpy = b->dp;
03306 
03307     /* compute the columns of the output and propagate the carry */
03308     for (iy = 0; iy < pb; iy++) {
03309       /* compute the column as a mp_word */
03310       r       = ((mp_word)*tmpt) +
03311                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03312                 ((mp_word) u);
03313 
03314       /* the new column is the lower part of the result */
03315       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03316 
03317       /* get the carry word from the result */
03318       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03319     }
03320     /* set carry if it is placed below digs */
03321     if (ix + iy < digs) {
03322       *tmpt = u;
03323     }
03324   }
03325 
03326   mp_clamp (&t);
03327   mp_exch (&t, c);
03328 
03329   mp_clear (&t);
03330   return MP_OKAY;
03331 }
03332 
03333 
03334 /*
03335  * shifts with subtractions when the result is greater than b.
03336  *
03337  * The method is slightly modified to shift B unconditionally up to just under
03338  * the leading bit of b.  This saves a lot of multiple precision shifting.
03339  */
03340 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
03341 {
03342   int     x, bits, res;
03343 
03344   /* how many bits of last digit does b use */
03345   bits = mp_count_bits (b) % DIGIT_BIT;
03346 
03347   if (b->used > 1) {
03348      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
03349          != MP_OKAY) {
03350         return res;
03351      }
03352   } else {
03353      if ((res = mp_set(a, 1)) != MP_OKAY) {
03354         return res;
03355      }
03356      bits = 1;
03357   }
03358 
03359   /* now compute C = A * B mod b */
03360   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
03361     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
03362       return res;
03363     }
03364     if (mp_cmp_mag (a, b) != MP_LT) {
03365       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
03366         return res;
03367       }
03368     }
03369   }
03370 
03371   return MP_OKAY;
03372 }
03373 
03374 
03375 #ifdef MP_LOW_MEM
03376    #define TAB_SIZE 32
03377 #else
03378    #define TAB_SIZE 256
03379 #endif
03380 
03381 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
03382 {
03383   mp_int  M[TAB_SIZE], res, mu;
03384   mp_digit buf;
03385   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
03386   int (*redux)(mp_int*,mp_int*,mp_int*);
03387 
03388   /* find window size */
03389   x = mp_count_bits (X);
03390   if (x <= 7) {
03391     winsize = 2;
03392   } else if (x <= 36) {
03393     winsize = 3;
03394   } else if (x <= 140) {
03395     winsize = 4;
03396   } else if (x <= 450) {
03397     winsize = 5;
03398   } else if (x <= 1303) {
03399     winsize = 6;
03400   } else if (x <= 3529) {
03401     winsize = 7;
03402   } else {
03403     winsize = 8;
03404   }
03405 
03406 #ifdef MP_LOW_MEM
03407     if (winsize > 5) {
03408        winsize = 5;
03409     }
03410 #endif
03411 
03412   /* init M array */
03413   /* init first cell */
03414   if ((err = mp_init(&M[1])) != MP_OKAY) {
03415      return err;
03416   }
03417 
03418   /* now init the second half of the array */
03419   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03420     if ((err = mp_init(&M[x])) != MP_OKAY) {
03421       for (y = 1<<(winsize-1); y < x; y++) {
03422         mp_clear (&M[y]);
03423       }
03424       mp_clear(&M[1]);
03425       return err;
03426     }
03427   }
03428 
03429   /* create mu, used for Barrett reduction */
03430   if ((err = mp_init (&mu)) != MP_OKAY) {
03431     goto LBL_M;
03432   }
03433 
03434   if (redmode == 0) {
03435      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
03436         goto LBL_MU;
03437      }
03438      redux = mp_reduce;
03439   } else {
03440      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
03441         goto LBL_MU;
03442      }
03443      redux = mp_reduce_2k_l;
03444   }
03445 
03446   /* create M table
03447    *
03448    * The M table contains powers of the base,
03449    * e.g. M[x] = G**x mod P
03450    *
03451    * The first half of the table is not
03452    * computed though accept for M[0] and M[1]
03453    */
03454   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
03455     goto LBL_MU;
03456   }
03457 
03458   /* compute the value at M[1<<(winsize-1)] by squaring
03459    * M[1] (winsize-1) times
03460    */
03461   if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
03462     goto LBL_MU;
03463   }
03464 
03465   for (x = 0; x < (winsize - 1); x++) {
03466     /* square it */
03467     if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
03468                        &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
03469       goto LBL_MU;
03470     }
03471 
03472     /* reduce modulo P */
03473     if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
03474       goto LBL_MU;
03475     }
03476   }
03477 
03478   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
03479    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
03480    */
03481   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
03482     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
03483       goto LBL_MU;
03484     }
03485     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
03486       goto LBL_MU;
03487     }
03488   }
03489 
03490   /* setup result */
03491   if ((err = mp_init (&res)) != MP_OKAY) {
03492     goto LBL_MU;
03493   }
03494   if ((err = mp_set (&res, 1)) != MP_OKAY) {
03495     goto LBL_MU;
03496   }
03497 
03498   /* set initial mode and bit cnt */
03499   mode   = 0;
03500   bitcnt = 1;
03501   buf    = 0;
03502   digidx = X->used - 1;
03503   bitcpy = 0;
03504   bitbuf = 0;
03505 
03506   for (;;) {
03507     /* grab next digit as required */
03508     if (--bitcnt == 0) {
03509       /* if digidx == -1 we are out of digits */
03510       if (digidx == -1) {
03511         break;
03512       }
03513       /* read next digit and reset the bitcnt */
03514       buf    = X->dp[digidx--];
03515       bitcnt = (int) DIGIT_BIT;
03516     }
03517 
03518     /* grab the next msb from the exponent */
03519     y     = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
03520     buf <<= (mp_digit)1;
03521 
03522     /* if the bit is zero and mode == 0 then we ignore it
03523      * These represent the leading zero bits before the first 1 bit
03524      * in the exponent.  Technically this opt is not required but it
03525      * does lower the # of trivial squaring/reductions used
03526      */
03527     if (mode == 0 && y == 0) {
03528       continue;
03529     }
03530 
03531     /* if the bit is zero and mode == 1 then we square */
03532     if (mode == 1 && y == 0) {
03533       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03534         goto LBL_RES;
03535       }
03536       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03537         goto LBL_RES;
03538       }
03539       continue;
03540     }
03541 
03542     /* else we add it to the window */
03543     bitbuf |= (y << (winsize - ++bitcpy));
03544     mode    = 2;
03545 
03546     if (bitcpy == winsize) {
03547       /* ok window is filled so square as required and multiply  */
03548       /* square first */
03549       for (x = 0; x < winsize; x++) {
03550         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03551           goto LBL_RES;
03552         }
03553         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03554           goto LBL_RES;
03555         }
03556       }
03557 
03558       /* then multiply */
03559       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
03560         goto LBL_RES;
03561       }
03562       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03563         goto LBL_RES;
03564       }
03565 
03566       /* empty window and reset */
03567       bitcpy = 0;
03568       bitbuf = 0;
03569       mode   = 1;
03570     }
03571   }
03572 
03573   /* if bits remain then square/multiply */
03574   if (mode == 2 && bitcpy > 0) {
03575     /* square then multiply if the bit is set */
03576     for (x = 0; x < bitcpy; x++) {
03577       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03578         goto LBL_RES;
03579       }
03580       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03581         goto LBL_RES;
03582       }
03583 
03584       bitbuf <<= 1;
03585       if ((bitbuf & (1 << winsize)) != 0) {
03586         /* then multiply */
03587         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
03588           goto LBL_RES;
03589         }
03590         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03591           goto LBL_RES;
03592         }
03593       }
03594     }
03595   }
03596 
03597   mp_exch (&res, Y);
03598   err = MP_OKAY;
03599 LBL_RES:mp_clear (&res);
03600 LBL_MU:mp_clear (&mu);
03601 LBL_M:
03602   mp_clear(&M[1]);
03603   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03604     mp_clear (&M[x]);
03605   }
03606   return err;
03607 }
03608 
03609 
03610 /* pre-calculate the value required for Barrett reduction
03611  * For a given modulus "b" it calculates the value required in "a"
03612  */
03613 int mp_reduce_setup (mp_int * a, mp_int * b)
03614 {
03615   int     res;
03616 
03617   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
03618     return res;
03619   }
03620   return mp_div (a, b, a, NULL);
03621 }
03622 
03623 
03624 /* reduces x mod m, assumes 0 < x < m**2, mu is
03625  * precomputed via mp_reduce_setup.
03626  * From HAC pp.604 Algorithm 14.42
03627  */
03628 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
03629 {
03630   mp_int  q;
03631   int     res, um = m->used;
03632 
03633   /* q = x */
03634   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
03635     return res;
03636   }
03637 
03638   /* q1 = x / b**(k-1)  */
03639   mp_rshd (&q, um - 1);
03640 
03641   /* according to HAC this optimization is ok */
03642   if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
03643     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
03644       goto CLEANUP;
03645     }
03646   } else {
03647 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
03648     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03649       goto CLEANUP;
03650     }
03651 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
03652     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03653       goto CLEANUP;
03654     }
03655 #else
03656     {
03657       res = MP_VAL;
03658       goto CLEANUP;
03659     }
03660 #endif
03661   }
03662 
03663   /* q3 = q2 / b**(k+1) */
03664   mp_rshd (&q, um + 1);
03665 
03666   /* x = x mod b**(k+1), quick (no division) */
03667   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
03668     goto CLEANUP;
03669   }
03670 
03671   /* q = q * m mod b**(k+1), quick (no division) */
03672   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
03673     goto CLEANUP;
03674   }
03675 
03676   /* x = x - q */
03677   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
03678     goto CLEANUP;
03679   }
03680 
03681   /* If x < 0, add b**(k+1) to it */
03682   if (mp_cmp_d (x, 0) == MP_LT) {
03683     if ((res = mp_set (&q, 1)) != MP_OKAY)
03684         goto CLEANUP;
03685     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
03686       goto CLEANUP;
03687     if ((res = mp_add (x, &q, x)) != MP_OKAY)
03688       goto CLEANUP;
03689   }
03690 
03691   /* Back off if it's too big */
03692   while (mp_cmp (x, m) != MP_LT) {
03693     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
03694       goto CLEANUP;
03695     }
03696   }
03697 
03698 CLEANUP:
03699   mp_clear (&q);
03700 
03701   return res;
03702 }
03703 
03704 
03705 /* reduces a modulo n where n is of the form 2**p - d
03706    This differs from reduce_2k since "d" can be larger
03707    than a single digit.
03708 */
03709 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
03710 {
03711    mp_int q;
03712    int    p, res;
03713 
03714    if ((res = mp_init(&q)) != MP_OKAY) {
03715       return res;
03716    }
03717 
03718    p = mp_count_bits(n);
03719 top:
03720    /* q = a/2**p, a = a mod 2**p */
03721    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
03722       goto ERR;
03723    }
03724 
03725    /* q = q * d */
03726    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
03727       goto ERR;
03728    }
03729 
03730    /* a = a + q */
03731    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
03732       goto ERR;
03733    }
03734 
03735    if (mp_cmp_mag(a, n) != MP_LT) {
03736       s_mp_sub(a, n, a);
03737       goto top;
03738    }
03739 
03740 ERR:
03741    mp_clear(&q);
03742    return res;
03743 }
03744 
03745 
03746 /* determines the setup value */
03747 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
03748 {
03749    int    res;
03750    mp_int tmp;
03751 
03752    if ((res = mp_init(&tmp)) != MP_OKAY) {
03753       return res;
03754    }
03755 
03756    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
03757       goto ERR;
03758    }
03759 
03760    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
03761       goto ERR;
03762    }
03763 
03764 ERR:
03765    mp_clear(&tmp);
03766    return res;
03767 }
03768 
03769 
03770 /* multiplies |a| * |b| and does not compute the lower digs digits
03771  * [meant to get the higher part of the product]
03772  */
03773 int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03774 {
03775   mp_int  t;
03776   int     res, pa, pb, ix, iy;
03777   mp_digit u;
03778   mp_word r;
03779   mp_digit tmpx, *tmpt, *tmpy;
03780 
03781   /* can we use the fast multiplier? */
03782 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
03783   if (((a->used + b->used + 1) < MP_WARRAY)
03784       && MIN (a->used, b->used) <
03785       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03786     return fast_s_mp_mul_high_digs (a, b, c, digs);
03787   }
03788 #endif
03789 
03790   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
03791     return res;
03792   }
03793   t.used = a->used + b->used + 1;
03794 
03795   pa = a->used;
03796   pb = b->used;
03797   for (ix = 0; ix < pa && a->dp; ix++) {
03798     /* clear the carry */
03799     u = 0;
03800 
03801     /* left hand side of A[ix] * B[iy] */
03802     tmpx = a->dp[ix];
03803 
03804     /* alias to the address of where the digits will be stored */
03805     tmpt = &(t.dp[digs]);
03806 
03807     /* alias for where to read the right hand side from */
03808     tmpy = b->dp + (digs - ix);
03809 
03810     for (iy = digs - ix; iy < pb; iy++) {
03811       /* calculate the double precision result */
03812       r       = ((mp_word)*tmpt) +
03813                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03814                 ((mp_word) u);
03815 
03816       /* get the lower part */
03817       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03818 
03819       /* carry the carry */
03820       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03821     }
03822     *tmpt = u;
03823   }
03824   mp_clamp (&t);
03825   mp_exch (&t, c);
03826   mp_clear (&t);
03827   return MP_OKAY;
03828 }
03829 
03830 
03831 /* this is a modified version of fast_s_mul_digs that only produces
03832  * output digits *above* digs.  See the comments for fast_s_mul_digs
03833  * to see how it works.
03834  *
03835  * This is used in the Barrett reduction since for one of the multiplications
03836  * only the higher digits were needed.  This essentially halves the work.
03837  *
03838  * Based on Algorithm 14.12 on pp.595 of HAC.
03839  */
03840 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03841 {
03842   int     olduse, res, pa, ix, iz;
03843 #ifdef WOLFSSL_SMALL_STACK
03844   mp_digit* W;    /* uses dynamic memory and slower */
03845 #else
03846   mp_digit W[MP_WARRAY];
03847 #endif
03848   mp_word  _W;
03849 
03850   /* grow the destination as required */
03851   pa = a->used + b->used;
03852   if (c->alloc < pa) {
03853     if ((res = mp_grow (c, pa)) != MP_OKAY) {
03854       return res;
03855     }
03856   }
03857 
03858   if (pa > MP_WARRAY)
03859     return MP_RANGE;  /* TAO range check */
03860 
03861 #ifdef WOLFSSL_SMALL_STACK
03862   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
03863   if (W == NULL)
03864     return MP_MEM;
03865 #endif
03866 
03867   /* number of output digits to produce */
03868   pa = a->used + b->used;
03869   _W = 0;
03870   for (ix = digs; ix < pa && a->dp; ix++) {
03871       int      tx, ty, iy;
03872       mp_digit *tmpx, *tmpy;
03873 
03874       /* get offsets into the two bignums */
03875       ty = MIN(b->used-1, ix);
03876       tx = ix - ty;
03877 
03878       /* setup temp aliases */
03879       tmpx = a->dp + tx;
03880       tmpy = b->dp + ty;
03881 
03882       /* this is the number of times the loop will iterate, essentially its
03883          while (tx++ < a->used && ty-- >= 0) { ... }
03884        */
03885       iy = MIN(a->used-tx, ty+1);
03886 
03887       /* execute loop */
03888       for (iz = 0; iz < iy; iz++) {
03889          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03890       }
03891 
03892       /* store term */
03893       W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
03894 
03895       /* make next carry */
03896       _W = _W >> ((mp_word)DIGIT_BIT);
03897   }
03898 
03899   /* setup dest */
03900   olduse  = c->used;
03901   c->used = pa;
03902 
03903   {
03904     mp_digit *tmpc;
03905 
03906     tmpc = c->dp + digs;
03907     for (ix = digs; ix < pa; ix++) {   /* TAO, <= could potentially overwrite */
03908       /* now extract the previous digit [below the carry] */
03909       *tmpc++ = W[ix];
03910     }
03911 
03912     /* clear unused digits [that existed in the old copy of c] */
03913     for (; ix < olduse; ix++) {
03914       *tmpc++ = 0;
03915     }
03916   }
03917   mp_clamp (c);
03918 
03919 #ifdef WOLFSSL_SMALL_STACK
03920   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
03921 #endif
03922 
03923   return MP_OKAY;
03924 }
03925 
03926 
03927 #ifndef MP_SET_CHUNK_BITS
03928     #define MP_SET_CHUNK_BITS 4
03929 #endif
03930 int mp_set_int (mp_int * a, unsigned long b)
03931 {
03932   int x, res;
03933 
03934   /* use direct mp_set if b is less than mp_digit max */
03935   if (b < MP_DIGIT_MAX) {
03936     return mp_set (a, (mp_digit)b);
03937   }
03938 
03939   mp_zero (a);
03940 
03941   /* set chunk bits at a time */
03942   for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
03943     /* shift the number up chunk bits */
03944     if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
03945       return res;
03946     }
03947 
03948     /* OR in the top bits of the source */
03949     a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
03950                                   ((1 << MP_SET_CHUNK_BITS) - 1);
03951 
03952     /* shift the source up to the next chunk bits */
03953     b <<= MP_SET_CHUNK_BITS;
03954 
03955     /* ensure that digits are not clamped off */
03956     a->used += 1;
03957   }
03958   mp_clamp (a);
03959   return MP_OKAY;
03960 }
03961 
03962 
03963 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC)
03964 
03965 /* c = a * a (mod b) */
03966 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
03967 {
03968   int     res;
03969   mp_int  t;
03970 
03971   if ((res = mp_init (&t)) != MP_OKAY) {
03972     return res;
03973   }
03974 
03975   if ((res = mp_sqr (a, &t)) != MP_OKAY) {
03976     mp_clear (&t);
03977     return res;
03978   }
03979   res = mp_mod (&t, b, c);
03980   mp_clear (&t);
03981   return res;
03982 }
03983 
03984 #endif
03985 
03986 
03987 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
03988     defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
03989     defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING)
03990 
03991 /* single digit addition */
03992 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
03993 {
03994   int     res, ix, oldused;
03995   mp_digit *tmpa, *tmpc, mu;
03996 
03997   /* grow c as required */
03998   if (c->alloc < a->used + 1) {
03999      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
04000         return res;
04001      }
04002   }
04003 
04004   /* if a is negative and |a| >= b, call c = |a| - b */
04005   if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
04006      /* temporarily fix sign of a */
04007      a->sign = MP_ZPOS;
04008 
04009      /* c = |a| - b */
04010      res = mp_sub_d(a, b, c);
04011 
04012      /* fix sign  */
04013      a->sign = c->sign = MP_NEG;
04014 
04015      /* clamp */
04016      mp_clamp(c);
04017 
04018      return res;
04019   }
04020 
04021   /* old number of used digits in c */
04022   oldused = c->used;
04023 
04024   /* sign always positive */
04025   c->sign = MP_ZPOS;
04026 
04027   /* source alias */
04028   tmpa    = a->dp;
04029 
04030   /* destination alias */
04031   tmpc    = c->dp;
04032 
04033   /* if a is positive */
04034   if (a->sign == MP_ZPOS) {
04035      /* add digit, after this we're propagating
04036       * the carry.
04037       */
04038      *tmpc   = *tmpa++ + b;
04039      mu      = *tmpc >> DIGIT_BIT;
04040      *tmpc++ &= MP_MASK;
04041 
04042      /* now handle rest of the digits */
04043      for (ix = 1; ix < a->used; ix++) {
04044         *tmpc   = *tmpa++ + mu;
04045         mu      = *tmpc >> DIGIT_BIT;
04046         *tmpc++ &= MP_MASK;
04047      }
04048      /* set final carry */
04049      if (ix < c->alloc) {
04050         ix++;
04051         *tmpc++  = mu;
04052      }
04053 
04054      /* setup size */
04055      c->used = a->used + 1;
04056   } else {
04057      /* a was negative and |a| < b */
04058      c->used  = 1;
04059 
04060      /* the result is a single digit */
04061      if (a->used == 1) {
04062         *tmpc++  =  b - a->dp[0];
04063      } else {
04064         *tmpc++  =  b;
04065      }
04066 
04067      /* setup count so the clearing of oldused
04068       * can fall through correctly
04069       */
04070      ix       = 1;
04071   }
04072 
04073   /* now zero to oldused */
04074   while (ix++ < oldused) {
04075      *tmpc++ = 0;
04076   }
04077   mp_clamp(c);
04078 
04079   return MP_OKAY;
04080 }
04081 
04082 
04083 /* single digit subtraction */
04084 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
04085 {
04086   mp_digit *tmpa, *tmpc, mu;
04087   int       res, ix, oldused;
04088 
04089   /* grow c as required */
04090   if (c->alloc < a->used + 1) {
04091      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
04092         return res;
04093      }
04094   }
04095 
04096   /* if a is negative just do an unsigned
04097    * addition [with fudged signs]
04098    */
04099   if (a->sign == MP_NEG) {
04100      a->sign = MP_ZPOS;
04101      res     = mp_add_d(a, b, c);
04102      a->sign = c->sign = MP_NEG;
04103 
04104      /* clamp */
04105      mp_clamp(c);
04106 
04107      return res;
04108   }
04109 
04110   /* setup regs */
04111   oldused = c->used;
04112   tmpa    = a->dp;
04113   tmpc    = c->dp;
04114 
04115   /* if a <= b simply fix the single digit */
04116   if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
04117      if (a->used == 1) {
04118         *tmpc++ = b - *tmpa;
04119      } else {
04120         *tmpc++ = b;
04121      }
04122      ix      = 1;
04123 
04124      /* negative/1digit */
04125      c->sign = MP_NEG;
04126      c->used = 1;
04127   } else {
04128      /* positive/size */
04129      c->sign = MP_ZPOS;
04130      c->used = a->used;
04131 
04132      /* subtract first digit */
04133      *tmpc    = *tmpa++ - b;
04134      mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
04135      *tmpc++ &= MP_MASK;
04136 
04137      /* handle rest of the digits */
04138      for (ix = 1; ix < a->used; ix++) {
04139         *tmpc    = *tmpa++ - mu;
04140         mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
04141         *tmpc++ &= MP_MASK;
04142      }
04143   }
04144 
04145   /* zero excess digits */
04146   while (ix++ < oldused) {
04147      *tmpc++ = 0;
04148   }
04149   mp_clamp(c);
04150   return MP_OKAY;
04151 }
04152 
04153 #endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
04154 
04155 
04156 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC)
04157 
04158 static const int lnz[16] = {
04159    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
04160 };
04161 
04162 /* Counts the number of lsbs which are zero before the first zero bit */
04163 int mp_cnt_lsb(mp_int *a)
04164 {
04165     int x;
04166     mp_digit q = 0, qq;
04167 
04168     /* easy out */
04169     if (mp_iszero(a) == MP_YES) {
04170         return 0;
04171     }
04172 
04173     /* scan lower digits until non-zero */
04174     for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
04175     if (a->dp)
04176         q = a->dp[x];
04177     x *= DIGIT_BIT;
04178 
04179     /* now scan this digit until a 1 is found */
04180     if ((q & 1) == 0) {
04181         do {
04182             qq  = q & 15;
04183             x  += lnz[qq];
04184             q >>= 4;
04185         } while (qq == 0);
04186     }
04187     return x;
04188 }
04189 
04190 
04191 
04192 
04193 static int s_is_power_of_two(mp_digit b, int *p)
04194 {
04195    int x;
04196 
04197    /* fast return if no power of two */
04198    if ((b==0) || (b & (b-1))) {
04199       return 0;
04200    }
04201 
04202    for (x = 0; x < DIGIT_BIT; x++) {
04203       if (b == (((mp_digit)1)<<x)) {
04204          *p = x;
04205          return 1;
04206       }
04207    }
04208    return 0;
04209 }
04210 
04211 /* single digit division (based on routine from MPI) */
04212 static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
04213 {
04214   mp_int  q;
04215   mp_word w;
04216   mp_digit t;
04217   int     res = MP_OKAY, ix;
04218 
04219   /* cannot divide by zero */
04220   if (b == 0) {
04221      return MP_VAL;
04222   }
04223 
04224   /* quick outs */
04225   if (b == 1 || mp_iszero(a) == MP_YES) {
04226      if (d != NULL) {
04227         *d = 0;
04228      }
04229      if (c != NULL) {
04230         return mp_copy(a, c);
04231      }
04232      return MP_OKAY;
04233   }
04234 
04235   /* power of two ? */
04236   if (s_is_power_of_two(b, &ix) == 1) {
04237      if (d != NULL) {
04238         *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
04239      }
04240      if (c != NULL) {
04241         return mp_div_2d(a, ix, c, NULL);
04242      }
04243      return MP_OKAY;
04244   }
04245 
04246 #ifdef BN_MP_DIV_3_C
04247   /* three? */
04248   if (b == 3) {
04249      return mp_div_3(a, c, d);
04250   }
04251 #endif
04252 
04253   /* no easy answer [c'est la vie].  Just division */
04254   if (c != NULL) {
04255       if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
04256         return res;
04257       }
04258 
04259       q.used = a->used;
04260       q.sign = a->sign;
04261   }
04262   else {
04263       mp_init(&q); /* initialize to help static analysis */
04264   }
04265 
04266 
04267   w = 0;
04268   for (ix = a->used - 1; ix >= 0; ix--) {
04269      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
04270 
04271      if (w >= b) {
04272         t = (mp_digit)(w / b);
04273         w -= ((mp_word)t) * ((mp_word)b);
04274       } else {
04275         t = 0;
04276       }
04277       if (c != NULL)
04278         q.dp[ix] = (mp_digit)t;
04279   }
04280 
04281   if (d != NULL) {
04282      *d = (mp_digit)w;
04283   }
04284 
04285   if (c != NULL) {
04286      mp_clamp(&q);
04287      mp_exch(&q, c);
04288   }
04289   mp_clear(&q);
04290 
04291   return res;
04292 }
04293 
04294 
04295 int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
04296 {
04297   return mp_div_d(a, b, NULL, c);
04298 }
04299 
04300 #endif /* defined(WOLFSSL_KEY_GEN)||defined(HAVE_COMP_KEY)||defined(HAVE_ECC) */
04301 
04302 #ifdef WOLFSSL_KEY_GEN
04303 
04304 const mp_digit ltm_prime_tab[PRIME_SIZE] = {
04305   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
04306   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
04307   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
04308   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
04309 #ifndef MP_8BIT
04310   0x0083,
04311   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
04312   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
04313   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
04314   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
04315 
04316   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
04317   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
04318   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
04319   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
04320   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
04321   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
04322   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
04323   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
04324 
04325   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
04326   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
04327   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
04328   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
04329   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
04330   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
04331   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
04332   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
04333 
04334   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
04335   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
04336   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
04337   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
04338   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
04339   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
04340   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
04341   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
04342 #endif
04343 };
04344 
04345 
04346 /* Miller-Rabin test of "a" to the base of "b" as described in
04347  * HAC pp. 139 Algorithm 4.24
04348  *
04349  * Sets result to 0 if definitely composite or 1 if probably prime.
04350  * Randomly the chance of error is no more than 1/4 and often
04351  * very much lower.
04352  */
04353 static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
04354 {
04355   mp_int  n1, y, r;
04356   int     s, j, err;
04357 
04358   /* default */
04359   *result = MP_NO;
04360 
04361   /* ensure b > 1 */
04362   if (mp_cmp_d(b, 1) != MP_GT) {
04363      return MP_VAL;
04364   }
04365 
04366   /* get n1 = a - 1 */
04367   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
04368     return err;
04369   }
04370   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
04371     goto LBL_N1;
04372   }
04373 
04374   /* set 2**s * r = n1 */
04375   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
04376     goto LBL_N1;
04377   }
04378 
04379   /* count the number of least significant bits
04380    * which are zero
04381    */
04382   s = mp_cnt_lsb(&r);
04383 
04384   /* now divide n - 1 by 2**s */
04385   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
04386     goto LBL_R;
04387   }
04388 
04389   /* compute y = b**r mod a */
04390   if ((err = mp_init (&y)) != MP_OKAY) {
04391     goto LBL_R;
04392   }
04393   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
04394     goto LBL_Y;
04395   }
04396 
04397   /* if y != 1 and y != n1 do */
04398   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
04399     j = 1;
04400     /* while j <= s-1 and y != n1 */
04401     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
04402       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
04403          goto LBL_Y;
04404       }
04405 
04406       /* if y == 1 then composite */
04407       if (mp_cmp_d (&y, 1) == MP_EQ) {
04408          goto LBL_Y;
04409       }
04410 
04411       ++j;
04412     }
04413 
04414     /* if y != n1 then composite */
04415     if (mp_cmp (&y, &n1) != MP_EQ) {
04416       goto LBL_Y;
04417     }
04418   }
04419 
04420   /* probably prime now */
04421   *result = MP_YES;
04422 LBL_Y:mp_clear (&y);
04423 LBL_R:mp_clear (&r);
04424 LBL_N1:mp_clear (&n1);
04425   return err;
04426 }
04427 
04428 
04429 /* determines if an integers is divisible by one
04430  * of the first PRIME_SIZE primes or not
04431  *
04432  * sets result to 0 if not, 1 if yes
04433  */
04434 static int mp_prime_is_divisible (mp_int * a, int *result)
04435 {
04436   int     err, ix;
04437   mp_digit res;
04438 
04439   /* default to not */
04440   *result = MP_NO;
04441 
04442   for (ix = 0; ix < PRIME_SIZE; ix++) {
04443     /* what is a mod LBL_prime_tab[ix] */
04444     if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
04445       return err;
04446     }
04447 
04448     /* is the residue zero? */
04449     if (res == 0) {
04450       *result = MP_YES;
04451       return MP_OKAY;
04452     }
04453   }
04454 
04455   return MP_OKAY;
04456 }
04457 
04458 static const int USE_BBS = 1;
04459 
04460 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
04461 {
04462     int   err, res, type;
04463     byte* buf;
04464 
04465     if (N == NULL || rng == NULL)
04466         return MP_VAL;
04467 
04468     /* get type */
04469     if (len < 0) {
04470         type = USE_BBS;
04471         len = -len;
04472     } else {
04473         type = 0;
04474     }
04475 
04476     /* allow sizes between 2 and 512 bytes for a prime size */
04477     if (len < 2 || len > 512) {
04478         return MP_VAL;
04479     }
04480 
04481     /* allocate buffer to work with */
04482     buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
04483     if (buf == NULL) {
04484         return MP_MEM;
04485     }
04486     XMEMSET(buf, 0, len);
04487 
04488     do {
04489 #ifdef SHOW_GEN
04490         printf(".");
04491         fflush(stdout);
04492 #endif
04493         /* generate value */
04494         err = wc_RNG_GenerateBlock(rng, buf, len);
04495         if (err != 0) {
04496             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04497             return err;
04498         }
04499 
04500         /* munge bits */
04501         buf[0]     |= 0x80 | 0x40;
04502         buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
04503 
04504         /* load value */
04505         if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
04506             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04507             return err;
04508         }
04509 
04510         /* test */
04511         if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
04512             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04513             return err;
04514         }
04515     } while (res == MP_NO);
04516 
04517     XMEMSET(buf, 0, len);
04518     XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04519 
04520     return MP_OKAY;
04521 }
04522 
04523 /*
04524  * Sets result to 1 if probably prime, 0 otherwise
04525  */
04526 int mp_prime_is_prime (mp_int * a, int t, int *result)
04527 {
04528   mp_int  b;
04529   int     ix, err, res;
04530 
04531   /* default to no */
04532   *result = MP_NO;
04533 
04534   /* valid value of t? */
04535   if (t <= 0 || t > PRIME_SIZE) {
04536     return MP_VAL;
04537   }
04538 
04539   /* is the input equal to one of the primes in the table? */
04540   for (ix = 0; ix < PRIME_SIZE; ix++) {
04541       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
04542          *result = 1;
04543          return MP_OKAY;
04544       }
04545   }
04546 
04547   /* first perform trial division */
04548   if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
04549     return err;
04550   }
04551 
04552   /* return if it was trivially divisible */
04553   if (res == MP_YES) {
04554     return MP_OKAY;
04555   }
04556 
04557   /* now perform the miller-rabin rounds */
04558   if ((err = mp_init (&b)) != MP_OKAY) {
04559     return err;
04560   }
04561 
04562   for (ix = 0; ix < t; ix++) {
04563     /* set the prime */
04564     if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
04565         goto LBL_B;
04566     }
04567 
04568     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
04569       goto LBL_B;
04570     }
04571 
04572     if (res == MP_NO) {
04573       goto LBL_B;
04574     }
04575   }
04576 
04577   /* passed the test */
04578   *result = MP_YES;
04579 LBL_B:mp_clear (&b);
04580   return err;
04581 }
04582 
04583 
04584 /* computes least common multiple as |a*b|/(a, b) */
04585 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
04586 {
04587   int     res;
04588   mp_int  t1, t2;
04589 
04590 
04591   if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
04592     return res;
04593   }
04594 
04595   /* t1 = get the GCD of the two inputs */
04596   if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
04597     goto LBL_T;
04598   }
04599 
04600   /* divide the smallest by the GCD */
04601   if (mp_cmp_mag(a, b) == MP_LT) {
04602      /* store quotient in t2 such that t2 * b is the LCM */
04603      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
04604         goto LBL_T;
04605      }
04606      res = mp_mul(b, &t2, c);
04607   } else {
04608      /* store quotient in t2 such that t2 * a is the LCM */
04609      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
04610         goto LBL_T;
04611      }
04612      res = mp_mul(a, &t2, c);
04613   }
04614 
04615   /* fix the sign to positive */
04616   c->sign = MP_ZPOS;
04617 
04618 LBL_T:
04619   mp_clear(&t1);
04620   mp_clear(&t2);
04621   return res;
04622 }
04623 
04624 
04625 
04626 /* Greatest Common Divisor using the binary method */
04627 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
04628 {
04629     mp_int  u, v;
04630     int     k, u_lsb, v_lsb, res;
04631 
04632     /* either zero than gcd is the largest */
04633     if (mp_iszero (a) == MP_YES) {
04634         return mp_abs (b, c);
04635     }
04636     if (mp_iszero (b) == MP_YES) {
04637         return mp_abs (a, c);
04638     }
04639 
04640     /* get copies of a and b we can modify */
04641     if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
04642         return res;
04643     }
04644 
04645     if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
04646         goto LBL_U;
04647     }
04648 
04649     /* must be positive for the remainder of the algorithm */
04650     u.sign = v.sign = MP_ZPOS;
04651 
04652     /* B1.  Find the common power of two for u and v */
04653     u_lsb = mp_cnt_lsb(&u);
04654     v_lsb = mp_cnt_lsb(&v);
04655     k     = MIN(u_lsb, v_lsb);
04656 
04657     if (k > 0) {
04658         /* divide the power of two out */
04659         if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
04660             goto LBL_V;
04661         }
04662 
04663         if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
04664             goto LBL_V;
04665         }
04666     }
04667 
04668     /* divide any remaining factors of two out */
04669     if (u_lsb != k) {
04670         if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
04671             goto LBL_V;
04672         }
04673     }
04674 
04675     if (v_lsb != k) {
04676         if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
04677             goto LBL_V;
04678         }
04679     }
04680 
04681     while (mp_iszero(&v) == MP_NO) {
04682         /* make sure v is the largest */
04683         if (mp_cmp_mag(&u, &v) == MP_GT) {
04684             /* swap u and v to make sure v is >= u */
04685             mp_exch(&u, &v);
04686         }
04687 
04688         /* subtract smallest from largest */
04689         if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
04690             goto LBL_V;
04691         }
04692 
04693         /* Divide out all factors of two */
04694         if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
04695             goto LBL_V;
04696         }
04697     }
04698 
04699     /* multiply by 2**k which we divided out at the beginning */
04700     if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
04701         goto LBL_V;
04702     }
04703     c->sign = MP_ZPOS;
04704     res = MP_OKAY;
04705 LBL_V:mp_clear (&u);
04706 LBL_U:mp_clear (&v);
04707     return res;
04708 }
04709 
04710 #endif /* WOLFSSL_KEY_GEN */
04711 
04712 
04713 #if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
04714 
04715 /* chars used in radix conversions */
04716 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
04717                          abcdefghijklmnopqrstuvwxyz+/";
04718 #endif
04719 
04720 #ifdef HAVE_ECC
04721 /* read a string [ASCII] in a given radix */
04722 int mp_read_radix (mp_int * a, const char *str, int radix)
04723 {
04724   int     y, res, neg;
04725   char    ch;
04726 
04727   /* zero the digit bignum */
04728   mp_zero(a);
04729 
04730   /* make sure the radix is ok */
04731   if (radix < 2 || radix > 64) {
04732     return MP_VAL;
04733   }
04734 
04735   /* if the leading digit is a
04736    * minus set the sign to negative.
04737    */
04738   if (*str == '-') {
04739     ++str;
04740     neg = MP_NEG;
04741   } else {
04742     neg = MP_ZPOS;
04743   }
04744 
04745   /* set the integer to the default of zero */
04746   mp_zero (a);
04747 
04748   /* process each digit of the string */
04749   while (*str) {
04750     /* if the radix <= 36 the conversion is case insensitive
04751      * this allows numbers like 1AB and 1ab to represent the same  value
04752      * [e.g. in hex]
04753      */
04754     ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)*str) : *str;
04755     for (y = 0; y < 64; y++) {
04756       if (ch == mp_s_rmap[y]) {
04757          break;
04758       }
04759     }
04760 
04761     /* if the char was found in the map
04762      * and is less than the given radix add it
04763      * to the number, otherwise exit the loop.
04764      */
04765     if (y < radix) {
04766       if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
04767          return res;
04768       }
04769       if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
04770          return res;
04771       }
04772     } else {
04773       break;
04774     }
04775     ++str;
04776   }
04777 
04778   /* set the sign only if a != 0 */
04779   if (mp_iszero(a) != MP_YES) {
04780      a->sign = neg;
04781   }
04782   return MP_OKAY;
04783 }
04784 #endif /* HAVE_ECC */
04785 
04786 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
04787     defined(WOLFSSL_DEBUG_MATH)
04788 
04789 /* returns size of ASCII representation */
04790 int mp_radix_size (mp_int *a, int radix, int *size)
04791 {
04792     int     res, digs;
04793     mp_int  t;
04794     mp_digit d;
04795 
04796     *size = 0;
04797 
04798     /* special case for binary */
04799     if (radix == 2) {
04800         *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
04801         return MP_OKAY;
04802     }
04803 
04804     /* make sure the radix is in range */
04805     if (radix < 2 || radix > 64) {
04806         return MP_VAL;
04807     }
04808 
04809     if (mp_iszero(a) == MP_YES) {
04810         *size = 2;
04811         return MP_OKAY;
04812     }
04813 
04814     /* digs is the digit count */
04815     digs = 0;
04816 
04817     /* if it's negative add one for the sign */
04818     if (a->sign == MP_NEG) {
04819         ++digs;
04820     }
04821 
04822     /* init a copy of the input */
04823     if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
04824         return res;
04825     }
04826 
04827     /* force temp to positive */
04828     t.sign = MP_ZPOS;
04829 
04830     /* fetch out all of the digits */
04831     while (mp_iszero (&t) == MP_NO) {
04832         if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
04833             mp_clear (&t);
04834             return res;
04835         }
04836         ++digs;
04837     }
04838     mp_clear (&t);
04839 
04840     /* return digs + 1, the 1 is for the NULL byte that would be required. */
04841     *size = digs + 1;
04842     return MP_OKAY;
04843 }
04844 
04845 /* stores a bignum as a ASCII string in a given radix (2..64) */
04846 int mp_toradix (mp_int *a, char *str, int radix)
04847 {
04848     int     res, digs;
04849     mp_int  t;
04850     mp_digit d;
04851     char   *_s = str;
04852 
04853     /* check range of the radix */
04854     if (radix < 2 || radix > 64) {
04855         return MP_VAL;
04856     }
04857 
04858     /* quick out if its zero */
04859     if (mp_iszero(a) == MP_YES) {
04860         *str++ = '0';
04861         *str = '\0';
04862         return MP_OKAY;
04863     }
04864 
04865     if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
04866         return res;
04867     }
04868 
04869     /* if it is negative output a - */
04870     if (t.sign == MP_NEG) {
04871         ++_s;
04872         *str++ = '-';
04873         t.sign = MP_ZPOS;
04874     }
04875 
04876     digs = 0;
04877     while (mp_iszero (&t) == MP_NO) {
04878         if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
04879             mp_clear (&t);
04880             return res;
04881         }
04882         *str++ = mp_s_rmap[d];
04883         ++digs;
04884     }
04885 
04886     /* reverse the digits of the string.  In this case _s points
04887      * to the first digit [excluding the sign] of the number]
04888      */
04889     bn_reverse ((unsigned char *)_s, digs);
04890 
04891     /* append a NULL so the string is properly terminated */
04892     *str = '\0';
04893 
04894     mp_clear (&t);
04895     return MP_OKAY;
04896 }
04897 
04898 #ifdef WOLFSSL_DEBUG_MATH
04899 void mp_dump(const char* desc, mp_int* a, byte verbose)
04900 {
04901   char *buffer;
04902   int size = a->alloc;
04903 
04904   buffer = (char*)XMALLOC(size * sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04905   if (buffer == NULL) {
04906     return;
04907   }
04908 
04909   printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
04910     desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
04911 
04912   mp_toradix(a, buffer, 16);
04913   printf("  %s\n  ", buffer);
04914 
04915   if (verbose) {
04916     int i;
04917     for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
04918       printf("%02x ", *(((byte*)a->dp) + i));
04919     }
04920     printf("\n");
04921   }
04922 
04923   XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04924 }
04925 #endif /* WOLFSSL_DEBUG_MATH */
04926 
04927 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
04928 
04929 #endif /* USE_FAST_MATH */
04930 
04931 #endif /* NO_BIG_INT */
04932 
04933