Renesas / SecureDweet
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers integer.c Source File

integer.c

00001 /* integer.c
00002  *
00003  * Copyright (C) 2006-2016 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 
00024 /*
00025  * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
00026  * http://math.libtomcrypt.com
00027  */
00028 
00029 
00030 #ifdef HAVE_CONFIG_H
00031     #include <config.h>
00032 #endif
00033 
00034 /* in case user set USE_FAST_MATH there */
00035 #include <wolfssl/wolfcrypt/settings.h>
00036 
00037 #ifndef NO_BIG_INT
00038 
00039 #ifndef USE_FAST_MATH
00040 
00041 #include <wolfssl/wolfcrypt/integer.h>
00042 
00043 #ifndef NO_WOLFSSL_SMALL_STACK
00044     #ifndef WOLFSSL_SMALL_STACK
00045         #define WOLFSSL_SMALL_STACK
00046     #endif
00047 #endif
00048 
00049 #ifdef SHOW_GEN
00050     #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
00051         #if MQX_USE_IO_OLD
00052             #include <fio.h>
00053         #else
00054             #include <nio.h>
00055         #endif
00056     #else
00057         #include <stdio.h>
00058     #endif
00059 #endif
00060 
00061 /* reverse an array, used for radix code */
00062 static void
00063 bn_reverse (unsigned char *s, int len)
00064 {
00065     int     ix, iy;
00066     unsigned char t;
00067 
00068     ix = 0;
00069     iy = len - 1;
00070     while (ix < iy) {
00071         t     = s[ix];
00072         s[ix] = s[iy];
00073         s[iy] = t;
00074         ++ix;
00075         --iy;
00076     }
00077 }
00078 
00079 /* math settings check */
00080 word32 CheckRunTimeSettings(void)
00081 {
00082     return CTC_SETTINGS;
00083 }
00084 
00085 
00086 /* handle up to 6 inits */
00087 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
00088                   mp_int* f)
00089 {
00090     int res = MP_OKAY;
00091 
00092     if (a && ((res = mp_init(a)) != MP_OKAY))
00093         return res;
00094 
00095     if (b && ((res = mp_init(b)) != MP_OKAY)) {
00096         mp_clear(a);
00097         return res;
00098     }
00099 
00100     if (c && ((res = mp_init(c)) != MP_OKAY)) {
00101         mp_clear(a); mp_clear(b);
00102         return res;
00103     }
00104 
00105     if (d && ((res = mp_init(d)) != MP_OKAY)) {
00106         mp_clear(a); mp_clear(b); mp_clear(c);
00107         return res;
00108     }
00109 
00110     if (e && ((res = mp_init(e)) != MP_OKAY)) {
00111         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
00112         return res;
00113     }
00114 
00115     if (f && ((res = mp_init(f)) != MP_OKAY)) {
00116         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
00117         return res;
00118     }
00119 
00120     return res;
00121 }
00122 
00123 
00124 /* init a new mp_int */
00125 int mp_init (mp_int * a)
00126 {
00127   int i;
00128 
00129   /* Safeguard against passing in a null pointer */
00130   if (a == NULL)
00131     return MP_VAL;
00132 
00133   /* allocate memory required and clear it */
00134   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
00135                                       DYNAMIC_TYPE_BIGINT);
00136   if (a->dp == NULL) {
00137     return MP_MEM;
00138   }
00139 
00140   /* set the digits to zero */
00141   for (i = 0; i < MP_PREC; i++) {
00142       a->dp[i] = 0;
00143   }
00144 
00145   /* set the used to zero, allocated digits to the default precision
00146    * and sign to positive */
00147   a->used  = 0;
00148   a->alloc = MP_PREC;
00149   a->sign  = MP_ZPOS;
00150 
00151   return MP_OKAY;
00152 }
00153 
00154 
00155 /* clear one (frees)  */
00156 void
00157 mp_clear (mp_int * a)
00158 {
00159   int i;
00160 
00161   if (a == NULL)
00162       return;
00163 
00164   /* only do anything if a hasn't been freed previously */
00165   if (a->dp != NULL) {
00166     /* first zero the digits */
00167     for (i = 0; i < a->used; i++) {
00168         a->dp[i] = 0;
00169     }
00170 
00171     /* free ram */
00172     XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
00173 
00174     /* reset members to make debugging easier */
00175     a->dp    = NULL;
00176     a->alloc = a->used = 0;
00177     a->sign  = MP_ZPOS;
00178   }
00179 }
00180 
00181 
00182 /* get the size for an unsigned equivalent */
00183 int mp_unsigned_bin_size (mp_int * a)
00184 {
00185   int     size = mp_count_bits (a);
00186   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
00187 }
00188 
00189 
00190 /* returns the number of bits in an int */
00191 int
00192 mp_count_bits (mp_int * a)
00193 {
00194   int     r;
00195   mp_digit q;
00196 
00197   /* shortcut */
00198   if (a->used == 0) {
00199     return 0;
00200   }
00201 
00202   /* get number of digits and add that */
00203   r = (a->used - 1) * DIGIT_BIT;
00204 
00205   /* take the last digit and count the bits in it */
00206   q = a->dp[a->used - 1];
00207   while (q > ((mp_digit) 0)) {
00208     ++r;
00209     q >>= ((mp_digit) 1);
00210   }
00211   return r;
00212 }
00213 
00214 
00215 int mp_leading_bit (mp_int * a)
00216 {
00217     int bit = 0;
00218     mp_int t;
00219 
00220     if (mp_init_copy(&t, a) != MP_OKAY)
00221         return 0;
00222 
00223     while (mp_iszero(&t) == 0) {
00224 #ifndef MP_8BIT
00225         bit = (t.dp[0] & 0x80) != 0;
00226 #else
00227         bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
00228 #endif
00229         if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
00230             break;
00231     }
00232     mp_clear(&t);
00233     return bit;
00234 }
00235 
00236 
00237 /* store in unsigned [big endian] format */
00238 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
00239 {
00240   int     x, res;
00241   mp_int  t;
00242 
00243   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
00244     return res;
00245   }
00246 
00247   x = 0;
00248   while (mp_iszero (&t) == 0) {
00249 #ifndef MP_8BIT
00250       b[x++] = (unsigned char) (t.dp[0] & 255);
00251 #else
00252       b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
00253 #endif
00254     if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
00255       mp_clear (&t);
00256       return res;
00257     }
00258   }
00259   bn_reverse (b, x);
00260   mp_clear (&t);
00261   return MP_OKAY;
00262 }
00263 
00264 
00265 /* creates "a" then copies b into it */
00266 int mp_init_copy (mp_int * a, mp_int * b)
00267 {
00268   int     res;
00269 
00270   if ((res = mp_init (a)) != MP_OKAY) {
00271     return res;
00272   }
00273   return mp_copy (b, a);
00274 }
00275 
00276 
00277 /* copy, b = a */
00278 int
00279 mp_copy (mp_int * a, mp_int * b)
00280 {
00281   int     res, n;
00282 
00283   /* Safeguard against passing in a null pointer */
00284   if (a == NULL || b == NULL)
00285     return MP_VAL;
00286 
00287   /* if dst == src do nothing */
00288   if (a == b) {
00289     return MP_OKAY;
00290   }
00291 
00292   /* grow dest */
00293   if (b->alloc < a->used) {
00294      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
00295         return res;
00296      }
00297   }
00298 
00299   /* zero b and copy the parameters over */
00300   {
00301     register mp_digit *tmpa, *tmpb;
00302 
00303     /* pointer aliases */
00304 
00305     /* source */
00306     tmpa = a->dp;
00307 
00308     /* destination */
00309     tmpb = b->dp;
00310 
00311     /* copy all the digits */
00312     for (n = 0; n < a->used; n++) {
00313       *tmpb++ = *tmpa++;
00314     }
00315 
00316     /* clear high digits */
00317     for (; n < b->used; n++) {
00318       *tmpb++ = 0;
00319     }
00320   }
00321 
00322   /* copy used count and sign */
00323   b->used = a->used;
00324   b->sign = a->sign;
00325   return MP_OKAY;
00326 }
00327 
00328 
00329 /* grow as required */
00330 int mp_grow (mp_int * a, int size)
00331 {
00332   int     i;
00333   mp_digit *tmp;
00334 
00335   /* if the alloc size is smaller alloc more ram */
00336   if (a->alloc < size) {
00337     /* ensure there are always at least MP_PREC digits extra on top */
00338     size += (MP_PREC * 2) - (size % MP_PREC);
00339 
00340     /* reallocate the array a->dp
00341      *
00342      * We store the return in a temporary variable
00343      * in case the operation failed we don't want
00344      * to overwrite the dp member of a.
00345      */
00346     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
00347                                        DYNAMIC_TYPE_BIGINT);
00348     if (tmp == NULL) {
00349       /* reallocation failed but "a" is still valid [can be freed] */
00350       return MP_MEM;
00351     }
00352 
00353     /* reallocation succeeded so set a->dp */
00354     a->dp = tmp;
00355 
00356     /* zero excess digits */
00357     i        = a->alloc;
00358     a->alloc = size;
00359     for (; i < a->alloc; i++) {
00360       a->dp[i] = 0;
00361     }
00362   }
00363   return MP_OKAY;
00364 }
00365 
00366 
00367 /* shift right by a certain bit count (store quotient in c, optional
00368    remainder in d) */
00369 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
00370 {
00371   int     D, res;
00372   mp_int  t;
00373 
00374 
00375   /* if the shift count is <= 0 then we do no work */
00376   if (b <= 0) {
00377     res = mp_copy (a, c);
00378     if (d != NULL) {
00379       mp_zero (d);
00380     }
00381     return res;
00382   }
00383 
00384   if ((res = mp_init (&t)) != MP_OKAY) {
00385     return res;
00386   }
00387 
00388   /* get the remainder */
00389   if (d != NULL) {
00390     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
00391       mp_clear (&t);
00392       return res;
00393     }
00394   }
00395 
00396   /* copy */
00397   if ((res = mp_copy (a, c)) != MP_OKAY) {
00398     mp_clear (&t);
00399     return res;
00400   }
00401 
00402   /* shift by as many digits in the bit count */
00403   if (b >= (int)DIGIT_BIT) {
00404     mp_rshd (c, b / DIGIT_BIT);
00405   }
00406 
00407   /* shift any bit count < DIGIT_BIT */
00408   D = (b % DIGIT_BIT);
00409   if (D != 0) {
00410     mp_rshb(c, D);
00411   }
00412   mp_clamp (c);
00413   if (d != NULL) {
00414     mp_exch (&t, d);
00415   }
00416   mp_clear (&t);
00417   return MP_OKAY;
00418 }
00419 
00420 
00421 /* set to zero */
00422 void mp_zero (mp_int * a)
00423 {
00424   int       n;
00425   mp_digit *tmp;
00426 
00427   a->sign = MP_ZPOS;
00428   a->used = 0;
00429 
00430   tmp = a->dp;
00431   for (n = 0; n < a->alloc; n++) {
00432      *tmp++ = 0;
00433   }
00434 }
00435 
00436 
00437 /* trim unused digits
00438  *
00439  * This is used to ensure that leading zero digits are
00440  * trimmed and the leading "used" digit will be non-zero
00441  * Typically very fast.  Also fixes the sign if there
00442  * are no more leading digits
00443  */
00444 void
00445 mp_clamp (mp_int * a)
00446 {
00447   /* decrease used while the most significant digit is
00448    * zero.
00449    */
00450   while (a->used > 0 && a->dp[a->used - 1] == 0) {
00451     --(a->used);
00452   }
00453 
00454   /* reset the sign flag if used == 0 */
00455   if (a->used == 0) {
00456     a->sign = MP_ZPOS;
00457   }
00458 }
00459 
00460 
00461 /* swap the elements of two integers, for cases where you can't simply swap the
00462  * mp_int pointers around
00463  */
00464 void
00465 mp_exch (mp_int * a, mp_int * b)
00466 {
00467   mp_int  t;
00468 
00469   t  = *a;
00470   *a = *b;
00471   *b = t;
00472 }
00473 
00474 
00475 /* shift right a certain number of bits */
00476 void mp_rshb (mp_int *c, int x)
00477 {
00478     register mp_digit *tmpc, mask, shift;
00479     mp_digit r, rr;
00480     mp_digit D = x;
00481 
00482     /* mask */
00483     mask = (((mp_digit)1) << D) - 1;
00484 
00485     /* shift for lsb */
00486     shift = DIGIT_BIT - D;
00487 
00488     /* alias */
00489     tmpc = c->dp + (c->used - 1);
00490 
00491     /* carry */
00492     r = 0;
00493     for (x = c->used - 1; x >= 0; x--) {
00494       /* get the lower  bits of this word in a temp */
00495       rr = *tmpc & mask;
00496 
00497       /* shift the current word and mix in the carry bits from previous word */
00498       *tmpc = (*tmpc >> D) | (r << shift);
00499       --tmpc;
00500 
00501       /* set the carry to the carry bits of the current word found above */
00502       r = rr;
00503     }
00504 }
00505 
00506 
00507 /* shift right a certain amount of digits */
00508 void mp_rshd (mp_int * a, int b)
00509 {
00510   int     x;
00511 
00512   /* if b <= 0 then ignore it */
00513   if (b <= 0) {
00514     return;
00515   }
00516 
00517   /* if b > used then simply zero it and return */
00518   if (a->used <= b) {
00519     mp_zero (a);
00520     return;
00521   }
00522 
00523   {
00524     register mp_digit *bottom, *top;
00525 
00526     /* shift the digits down */
00527 
00528     /* bottom */
00529     bottom = a->dp;
00530 
00531     /* top [offset into digits] */
00532     top = a->dp + b;
00533 
00534     /* this is implemented as a sliding window where
00535      * the window is b-digits long and digits from
00536      * the top of the window are copied to the bottom
00537      *
00538      * e.g.
00539 
00540      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
00541                  /\                   |      ---->
00542                   \-------------------/      ---->
00543      */
00544     for (x = 0; x < (a->used - b); x++) {
00545       *bottom++ = *top++;
00546     }
00547 
00548     /* zero the top digits */
00549     for (; x < a->used; x++) {
00550       *bottom++ = 0;
00551     }
00552   }
00553 
00554   /* remove excess digits */
00555   a->used -= b;
00556 }
00557 
00558 
00559 /* calc a value mod 2**b */
00560 int
00561 mp_mod_2d (mp_int * a, int b, mp_int * c)
00562 {
00563   int     x, res;
00564 
00565   /* if b is <= 0 then zero the int */
00566   if (b <= 0) {
00567     mp_zero (c);
00568     return MP_OKAY;
00569   }
00570 
00571   /* if the modulus is larger than the value than return */
00572   if (b >= (int) (a->used * DIGIT_BIT)) {
00573     res = mp_copy (a, c);
00574     return res;
00575   }
00576 
00577   /* copy */
00578   if ((res = mp_copy (a, c)) != MP_OKAY) {
00579     return res;
00580   }
00581 
00582   /* zero digits above the last digit of the modulus */
00583   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00584     c->dp[x] = 0;
00585   }
00586   /* clear the digit that is not completely outside/inside the modulus */
00587   c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
00588               (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
00589   mp_clamp (c);
00590   return MP_OKAY;
00591 }
00592 
00593 
00594 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
00595 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
00596 {
00597   int     res;
00598 
00599   /* make sure there are at least two digits */
00600   if (a->alloc < 2) {
00601      if ((res = mp_grow(a, 2)) != MP_OKAY) {
00602         return res;
00603      }
00604   }
00605 
00606   /* zero the int */
00607   mp_zero (a);
00608 
00609   /* read the bytes in */
00610   while (c-- > 0) {
00611     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
00612       return res;
00613     }
00614 
00615 #ifndef MP_8BIT
00616       a->dp[0] |= *b++;
00617       a->used += 1;
00618 #else
00619       a->dp[0] = (*b & MP_MASK);
00620       a->dp[1] |= ((*b++ >> 7U) & 1);
00621       a->used += 2;
00622 #endif
00623   }
00624   mp_clamp (a);
00625   return MP_OKAY;
00626 }
00627 
00628 
00629 /* shift left by a certain bit count */
00630 int mp_mul_2d (mp_int * a, int b, mp_int * c)
00631 {
00632   mp_digit d;
00633   int      res;
00634 
00635   /* copy */
00636   if (a != c) {
00637      if ((res = mp_copy (a, c)) != MP_OKAY) {
00638        return res;
00639      }
00640   }
00641 
00642   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
00643      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
00644        return res;
00645      }
00646   }
00647 
00648   /* shift by as many digits in the bit count */
00649   if (b >= (int)DIGIT_BIT) {
00650     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
00651       return res;
00652     }
00653   }
00654 
00655   /* shift any bit count < DIGIT_BIT */
00656   d = (mp_digit) (b % DIGIT_BIT);
00657   if (d != 0) {
00658     register mp_digit *tmpc, shift, mask, r, rr;
00659     register int x;
00660 
00661     /* bitmask for carries */
00662     mask = (((mp_digit)1) << d) - 1;
00663 
00664     /* shift for msbs */
00665     shift = DIGIT_BIT - d;
00666 
00667     /* alias */
00668     tmpc = c->dp;
00669 
00670     /* carry */
00671     r    = 0;
00672     for (x = 0; x < c->used; x++) {
00673       /* get the higher bits of the current word */
00674       rr = (*tmpc >> shift) & mask;
00675 
00676       /* shift the current word and OR in the carry */
00677       *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK);
00678       ++tmpc;
00679 
00680       /* set the carry to the carry bits of the current word */
00681       r = rr;
00682     }
00683 
00684     /* set final carry */
00685     if (r != 0) {
00686        c->dp[(c->used)++] = r;
00687     }
00688   }
00689   mp_clamp (c);
00690   return MP_OKAY;
00691 }
00692 
00693 
00694 /* shift left a certain amount of digits */
00695 int mp_lshd (mp_int * a, int b)
00696 {
00697   int     x, res;
00698 
00699   /* if its less than zero return */
00700   if (b <= 0) {
00701     return MP_OKAY;
00702   }
00703 
00704   /* grow to fit the new digits */
00705   if (a->alloc < a->used + b) {
00706      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
00707        return res;
00708      }
00709   }
00710 
00711   {
00712     register mp_digit *top, *bottom;
00713 
00714     /* increment the used by the shift amount then copy upwards */
00715     a->used += b;
00716 
00717     /* top */
00718     top = a->dp + a->used - 1;
00719 
00720     /* base */
00721     bottom = a->dp + a->used - 1 - b;
00722 
00723     /* much like mp_rshd this is implemented using a sliding window
00724      * except the window goes the other way around.  Copying from
00725      * the bottom to the top.  see bn_mp_rshd.c for more info.
00726      */
00727     for (x = a->used - 1; x >= b; x--) {
00728       *top-- = *bottom--;
00729     }
00730 
00731     /* zero the lower digits */
00732     top = a->dp;
00733     for (x = 0; x < b; x++) {
00734       *top++ = 0;
00735     }
00736   }
00737   return MP_OKAY;
00738 }
00739 
00740 
00741 /* this is a shell function that calls either the normal or Montgomery
00742  * exptmod functions.  Originally the call to the montgomery code was
00743  * embedded in the normal function but that wasted a lot of stack space
00744  * for nothing (since 99% of the time the Montgomery code would be called)
00745  */
00746 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00747 {
00748   int dr;
00749 
00750   /* modulus P must be positive */
00751   if (P->sign == MP_NEG) {
00752      return MP_VAL;
00753   }
00754 
00755   /* if exponent X is negative we have to recurse */
00756   if (X->sign == MP_NEG) {
00757 #ifdef BN_MP_INVMOD_C
00758      mp_int tmpG, tmpX;
00759      int err;
00760 
00761      /* first compute 1/G mod P */
00762      if ((err = mp_init(&tmpG)) != MP_OKAY) {
00763         return err;
00764      }
00765      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
00766         mp_clear(&tmpG);
00767         return err;
00768      }
00769 
00770      /* now get |X| */
00771      if ((err = mp_init(&tmpX)) != MP_OKAY) {
00772         mp_clear(&tmpG);
00773         return err;
00774      }
00775      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
00776         mp_clear(&tmpG);
00777         mp_clear(&tmpX);
00778         return err;
00779      }
00780 
00781      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
00782      err = mp_exptmod(&tmpG, &tmpX, P, Y);
00783      mp_clear(&tmpG);
00784      mp_clear(&tmpX);
00785      return err;
00786 #else
00787      /* no invmod */
00788      return MP_VAL;
00789 #endif
00790   }
00791 
00792 /* modified diminished radix reduction */
00793 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
00794   defined(BN_S_MP_EXPTMOD_C)
00795   if (mp_reduce_is_2k_l(P) == MP_YES) {
00796      return s_mp_exptmod(G, X, P, Y, 1);
00797   }
00798 #endif
00799 
00800 #ifdef BN_MP_DR_IS_MODULUS_C
00801   /* is it a DR modulus? */
00802   dr = mp_dr_is_modulus(P);
00803 #else
00804   /* default to no */
00805   dr = 0;
00806 #endif
00807 
00808 #ifdef BN_MP_REDUCE_IS_2K_C
00809   /* if not, is it a unrestricted DR modulus? */
00810   if (dr == 0) {
00811      dr = mp_reduce_is_2k(P) << 1;
00812   }
00813 #endif
00814 
00815   /* if the modulus is odd or dr != 0 use the montgomery method */
00816 #ifdef BN_MP_EXPTMOD_FAST_C
00817   if (mp_isodd (P) == 1 || dr !=  0) {
00818     return mp_exptmod_fast (G, X, P, Y, dr);
00819   } else {
00820 #endif
00821 #ifdef BN_S_MP_EXPTMOD_C
00822     /* otherwise use the generic Barrett reduction technique */
00823     return s_mp_exptmod (G, X, P, Y, 0);
00824 #else
00825     /* no exptmod for evens */
00826     return MP_VAL;
00827 #endif
00828 #ifdef BN_MP_EXPTMOD_FAST_C
00829   }
00830 #endif
00831 }
00832 
00833 
00834 /* b = |a|
00835  *
00836  * Simple function copies the input and fixes the sign to positive
00837  */
00838 int
00839 mp_abs (mp_int * a, mp_int * b)
00840 {
00841   int     res;
00842 
00843   /* copy a to b */
00844   if (a != b) {
00845      if ((res = mp_copy (a, b)) != MP_OKAY) {
00846        return res;
00847      }
00848   }
00849 
00850   /* force the sign of b to positive */
00851   b->sign = MP_ZPOS;
00852 
00853   return MP_OKAY;
00854 }
00855 
00856 
00857 /* hac 14.61, pp608 */
00858 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00859 {
00860   /* b cannot be negative */
00861   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00862     return MP_VAL;
00863   }
00864 
00865 #ifdef BN_FAST_MP_INVMOD_C
00866   /* if the modulus is odd we can use a faster routine instead */
00867   if (mp_isodd (b) == 1) {
00868     return fast_mp_invmod (a, b, c);
00869   }
00870 #endif
00871 
00872 #ifdef BN_MP_INVMOD_SLOW_C
00873   return mp_invmod_slow(a, b, c);
00874 #endif
00875 }
00876 
00877 
00878 /* computes the modular inverse via binary extended euclidean algorithm,
00879  * that is c = 1/a mod b
00880  *
00881  * Based on slow invmod except this is optimized for the case where b is
00882  * odd as per HAC Note 14.64 on pp. 610
00883  */
00884 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00885 {
00886   mp_int  x, y, u, v, B, D;
00887   int     res, neg, loop_check = 0;
00888 
00889   /* 2. [modified] b must be odd   */
00890   if (mp_iseven (b) == 1) {
00891     return MP_VAL;
00892   }
00893 
00894   /* init all our temps */
00895   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
00896      return res;
00897   }
00898 
00899   /* x == modulus, y == value to invert */
00900   if ((res = mp_copy (b, &x)) != MP_OKAY) {
00901     goto LBL_ERR;
00902   }
00903 
00904   /* we need y = |a| */
00905   if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
00906     goto LBL_ERR;
00907   }
00908 
00909   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00910   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00911     goto LBL_ERR;
00912   }
00913   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00914     goto LBL_ERR;
00915   }
00916   mp_set (&D, 1);
00917 
00918 top:
00919   /* 4.  while u is even do */
00920   while (mp_iseven (&u) == 1) {
00921     /* 4.1 u = u/2 */
00922     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00923       goto LBL_ERR;
00924     }
00925     /* 4.2 if B is odd then */
00926     if (mp_isodd (&B) == 1) {
00927       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
00928         goto LBL_ERR;
00929       }
00930     }
00931     /* B = B/2 */
00932     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
00933       goto LBL_ERR;
00934     }
00935   }
00936 
00937   /* 5.  while v is even do */
00938   while (mp_iseven (&v) == 1) {
00939     /* 5.1 v = v/2 */
00940     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
00941       goto LBL_ERR;
00942     }
00943     /* 5.2 if D is odd then */
00944     if (mp_isodd (&D) == 1) {
00945       /* D = (D-x)/2 */
00946       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
00947         goto LBL_ERR;
00948       }
00949     }
00950     /* D = D/2 */
00951     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
00952       goto LBL_ERR;
00953     }
00954   }
00955 
00956   /* 6.  if u >= v then */
00957   if (mp_cmp (&u, &v) != MP_LT) {
00958     /* u = u - v, B = B - D */
00959     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
00960       goto LBL_ERR;
00961     }
00962 
00963     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
00964       goto LBL_ERR;
00965     }
00966   } else {
00967     /* v - v - u, D = D - B */
00968     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
00969       goto LBL_ERR;
00970     }
00971 
00972     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
00973       goto LBL_ERR;
00974     }
00975   }
00976 
00977   /* if not zero goto step 4 */
00978   if (mp_iszero (&u) == 0) {
00979     if (++loop_check > 4096) {
00980         res = MP_VAL;
00981         goto LBL_ERR;
00982     }
00983     goto top;
00984   }
00985 
00986   /* now a = C, b = D, gcd == g*v */
00987 
00988   /* if v != 1 then there is no inverse */
00989   if (mp_cmp_d (&v, 1) != MP_EQ) {
00990     res = MP_VAL;
00991     goto LBL_ERR;
00992   }
00993 
00994   /* b is now the inverse */
00995   neg = a->sign;
00996   while (D.sign == MP_NEG) {
00997     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
00998       goto LBL_ERR;
00999     }
01000   }
01001   /* too big */
01002   while (mp_cmp_mag(&D, b) != MP_LT) {
01003       if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
01004          goto LBL_ERR;
01005       }
01006   }
01007   mp_exch (&D, c);
01008   c->sign = neg;
01009   res = MP_OKAY;
01010 
01011 LBL_ERR:mp_clear(&x);
01012         mp_clear(&y);
01013         mp_clear(&u);
01014         mp_clear(&v);
01015         mp_clear(&B);
01016         mp_clear(&D);
01017   return res;
01018 }
01019 
01020 
01021 /* hac 14.61, pp608 */
01022 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
01023 {
01024   mp_int  x, y, u, v, A, B, C, D;
01025   int     res;
01026 
01027   /* b cannot be negative */
01028   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
01029     return MP_VAL;
01030   }
01031 
01032   /* init temps */
01033   if ((res = mp_init_multi(&x, &y, &u, &v,
01034                            &A, &B)) != MP_OKAY) {
01035      return res;
01036   }
01037 
01038   /* init rest of tmps temps */
01039   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
01040      return res;
01041   }
01042 
01043   /* x = a, y = b */
01044   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
01045       goto LBL_ERR;
01046   }
01047   if ((res = mp_copy (b, &y)) != MP_OKAY) {
01048     goto LBL_ERR;
01049   }
01050 
01051   /* 2. [modified] if x,y are both even then return an error! */
01052   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
01053     res = MP_VAL;
01054     goto LBL_ERR;
01055   }
01056 
01057   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
01058   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
01059     goto LBL_ERR;
01060   }
01061   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
01062     goto LBL_ERR;
01063   }
01064   mp_set (&A, 1);
01065   mp_set (&D, 1);
01066 
01067 top:
01068   /* 4.  while u is even do */
01069   while (mp_iseven (&u) == 1) {
01070     /* 4.1 u = u/2 */
01071     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
01072       goto LBL_ERR;
01073     }
01074     /* 4.2 if A or B is odd then */
01075     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
01076       /* A = (A+y)/2, B = (B-x)/2 */
01077       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
01078          goto LBL_ERR;
01079       }
01080       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
01081          goto LBL_ERR;
01082       }
01083     }
01084     /* A = A/2, B = B/2 */
01085     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
01086       goto LBL_ERR;
01087     }
01088     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01089       goto LBL_ERR;
01090     }
01091   }
01092 
01093   /* 5.  while v is even do */
01094   while (mp_iseven (&v) == 1) {
01095     /* 5.1 v = v/2 */
01096     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01097       goto LBL_ERR;
01098     }
01099     /* 5.2 if C or D is odd then */
01100     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
01101       /* C = (C+y)/2, D = (D-x)/2 */
01102       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
01103          goto LBL_ERR;
01104       }
01105       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01106          goto LBL_ERR;
01107       }
01108     }
01109     /* C = C/2, D = D/2 */
01110     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
01111       goto LBL_ERR;
01112     }
01113     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01114       goto LBL_ERR;
01115     }
01116   }
01117 
01118   /* 6.  if u >= v then */
01119   if (mp_cmp (&u, &v) != MP_LT) {
01120     /* u = u - v, A = A - C, B = B - D */
01121     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01122       goto LBL_ERR;
01123     }
01124 
01125     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
01126       goto LBL_ERR;
01127     }
01128 
01129     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01130       goto LBL_ERR;
01131     }
01132   } else {
01133     /* v - v - u, C = C - A, D = D - B */
01134     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01135       goto LBL_ERR;
01136     }
01137 
01138     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
01139       goto LBL_ERR;
01140     }
01141 
01142     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01143       goto LBL_ERR;
01144     }
01145   }
01146 
01147   /* if not zero goto step 4 */
01148   if (mp_iszero (&u) == 0)
01149     goto top;
01150 
01151   /* now a = C, b = D, gcd == g*v */
01152 
01153   /* if v != 1 then there is no inverse */
01154   if (mp_cmp_d (&v, 1) != MP_EQ) {
01155     res = MP_VAL;
01156     goto LBL_ERR;
01157   }
01158 
01159   /* if its too low */
01160   while (mp_cmp_d(&C, 0) == MP_LT) {
01161       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
01162          goto LBL_ERR;
01163       }
01164   }
01165 
01166   /* too big */
01167   while (mp_cmp_mag(&C, b) != MP_LT) {
01168       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
01169          goto LBL_ERR;
01170       }
01171   }
01172 
01173   /* C is now the inverse */
01174   mp_exch (&C, c);
01175   res = MP_OKAY;
01176 LBL_ERR:mp_clear(&x);
01177         mp_clear(&y);
01178         mp_clear(&u);
01179         mp_clear(&v);
01180         mp_clear(&A);
01181         mp_clear(&B);
01182         mp_clear(&C);
01183         mp_clear(&D);
01184   return res;
01185 }
01186 
01187 
01188 /* compare magnitude of two ints (unsigned) */
01189 int mp_cmp_mag (mp_int * a, mp_int * b)
01190 {
01191   int     n;
01192   mp_digit *tmpa, *tmpb;
01193 
01194   /* compare based on # of non-zero digits */
01195   if (a->used > b->used) {
01196     return MP_GT;
01197   }
01198 
01199   if (a->used < b->used) {
01200     return MP_LT;
01201   }
01202 
01203   /* alias for a */
01204   tmpa = a->dp + (a->used - 1);
01205 
01206   /* alias for b */
01207   tmpb = b->dp + (a->used - 1);
01208 
01209   /* compare based on digits  */
01210   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
01211     if (*tmpa > *tmpb) {
01212       return MP_GT;
01213     }
01214 
01215     if (*tmpa < *tmpb) {
01216       return MP_LT;
01217     }
01218   }
01219   return MP_EQ;
01220 }
01221 
01222 
01223 /* compare two ints (signed)*/
01224 int
01225 mp_cmp (mp_int * a, mp_int * b)
01226 {
01227   /* compare based on sign */
01228   if (a->sign != b->sign) {
01229      if (a->sign == MP_NEG) {
01230         return MP_LT;
01231      } else {
01232         return MP_GT;
01233      }
01234   }
01235 
01236   /* compare digits */
01237   if (a->sign == MP_NEG) {
01238      /* if negative compare opposite direction */
01239      return mp_cmp_mag(b, a);
01240   } else {
01241      return mp_cmp_mag(a, b);
01242   }
01243 }
01244 
01245 
01246 /* compare a digit */
01247 int mp_cmp_d(mp_int * a, mp_digit b)
01248 {
01249   /* compare based on sign */
01250   if (a->sign == MP_NEG) {
01251     return MP_LT;
01252   }
01253 
01254   /* compare based on magnitude */
01255   if (a->used > 1) {
01256     return MP_GT;
01257   }
01258 
01259   /* compare the only digit of a to b */
01260   if (a->dp[0] > b) {
01261     return MP_GT;
01262   } else if (a->dp[0] < b) {
01263     return MP_LT;
01264   } else {
01265     return MP_EQ;
01266   }
01267 }
01268 
01269 
01270 /* set to a digit */
01271 void mp_set (mp_int * a, mp_digit b)
01272 {
01273   mp_zero (a);
01274   a->dp[0] = (mp_digit)(b & MP_MASK);
01275   a->used  = (a->dp[0] != 0) ? 1 : 0;
01276 }
01277 
01278 /* chek if a bit is set */
01279 int mp_is_bit_set (mp_int *a, mp_digit b)
01280 {
01281     if ((mp_digit)a->used < b/DIGIT_BIT)
01282         return 0;
01283 
01284     return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
01285 }
01286 
01287 /* c = a mod b, 0 <= c < b */
01288 int
01289 mp_mod (mp_int * a, mp_int * b, mp_int * c)
01290 {
01291   mp_int  t;
01292   int     res;
01293 
01294   if ((res = mp_init (&t)) != MP_OKAY) {
01295     return res;
01296   }
01297 
01298   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
01299     mp_clear (&t);
01300     return res;
01301   }
01302 
01303   if (t.sign != b->sign) {
01304     res = mp_add (b, &t, c);
01305   } else {
01306     res = MP_OKAY;
01307     mp_exch (&t, c);
01308   }
01309 
01310   mp_clear (&t);
01311   return res;
01312 }
01313 
01314 
01315 /* slower bit-bang division... also smaller */
01316 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01317 {
01318    mp_int ta, tb, tq, q;
01319    int    res, n, n2;
01320 
01321   /* is divisor zero ? */
01322   if (mp_iszero (b) == 1) {
01323     return MP_VAL;
01324   }
01325 
01326   /* if a < b then q=0, r = a */
01327   if (mp_cmp_mag (a, b) == MP_LT) {
01328     if (d != NULL) {
01329       res = mp_copy (a, d);
01330     } else {
01331       res = MP_OKAY;
01332     }
01333     if (c != NULL) {
01334       mp_zero (c);
01335     }
01336     return res;
01337   }
01338 
01339   /* init our temps */
01340   if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
01341      return res;
01342   }
01343 
01344 
01345   mp_set(&tq, 1);
01346   n = mp_count_bits(a) - mp_count_bits(b);
01347   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
01348       ((res = mp_abs(b, &tb)) != MP_OKAY) ||
01349       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
01350       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
01351       goto LBL_ERR;
01352   }
01353 
01354   while (n-- >= 0) {
01355      if (mp_cmp(&tb, &ta) != MP_GT) {
01356         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
01357             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
01358            goto LBL_ERR;
01359         }
01360      }
01361      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
01362          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
01363            goto LBL_ERR;
01364      }
01365   }
01366 
01367   /* now q == quotient and ta == remainder */
01368   n  = a->sign;
01369   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
01370   if (c != NULL) {
01371      mp_exch(c, &q);
01372      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
01373   }
01374   if (d != NULL) {
01375      mp_exch(d, &ta);
01376      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
01377   }
01378 LBL_ERR:
01379    mp_clear(&ta);
01380    mp_clear(&tb);
01381    mp_clear(&tq);
01382    mp_clear(&q);
01383    return res;
01384 }
01385 
01386 
01387 /* b = a/2 */
01388 int mp_div_2(mp_int * a, mp_int * b)
01389 {
01390   int     x, res, oldused;
01391 
01392   /* copy */
01393   if (b->alloc < a->used) {
01394     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01395       return res;
01396     }
01397   }
01398 
01399   oldused = b->used;
01400   b->used = a->used;
01401   {
01402     register mp_digit r, rr, *tmpa, *tmpb;
01403 
01404     /* source alias */
01405     tmpa = a->dp + b->used - 1;
01406 
01407     /* dest alias */
01408     tmpb = b->dp + b->used - 1;
01409 
01410     /* carry */
01411     r = 0;
01412     for (x = b->used - 1; x >= 0; x--) {
01413       /* get the carry for the next iteration */
01414       rr = *tmpa & 1;
01415 
01416       /* shift the current digit, add in carry and store */
01417       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
01418 
01419       /* forward carry to next iteration */
01420       r = rr;
01421     }
01422 
01423     /* zero excess digits */
01424     tmpb = b->dp + b->used;
01425     for (x = b->used; x < oldused; x++) {
01426       *tmpb++ = 0;
01427     }
01428   }
01429   b->sign = a->sign;
01430   mp_clamp (b);
01431   return MP_OKAY;
01432 }
01433 
01434 
01435 /* high level addition (handles signs) */
01436 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01437 {
01438   int     sa, sb, res;
01439 
01440   /* get sign of both inputs */
01441   sa = a->sign;
01442   sb = b->sign;
01443 
01444   /* handle two cases, not four */
01445   if (sa == sb) {
01446     /* both positive or both negative */
01447     /* add their magnitudes, copy the sign */
01448     c->sign = sa;
01449     res = s_mp_add (a, b, c);
01450   } else {
01451     /* one positive, the other negative */
01452     /* subtract the one with the greater magnitude from */
01453     /* the one of the lesser magnitude.  The result gets */
01454     /* the sign of the one with the greater magnitude. */
01455     if (mp_cmp_mag (a, b) == MP_LT) {
01456       c->sign = sb;
01457       res = s_mp_sub (b, a, c);
01458     } else {
01459       c->sign = sa;
01460       res = s_mp_sub (a, b, c);
01461     }
01462   }
01463   return res;
01464 }
01465 
01466 
01467 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
01468 int
01469 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
01470 {
01471   mp_int *x;
01472   int     olduse, res, min, max;
01473 
01474   /* find sizes, we let |a| <= |b| which means we have to sort
01475    * them.  "x" will point to the input with the most digits
01476    */
01477   if (a->used > b->used) {
01478     min = b->used;
01479     max = a->used;
01480     x = a;
01481   } else {
01482     min = a->used;
01483     max = b->used;
01484     x = b;
01485   }
01486 
01487   /* init result */
01488   if (c->alloc < max + 1) {
01489     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
01490       return res;
01491     }
01492   }
01493 
01494   /* get old used digit count and set new one */
01495   olduse = c->used;
01496   c->used = max + 1;
01497 
01498   {
01499     register mp_digit u, *tmpa, *tmpb, *tmpc;
01500     register int i;
01501 
01502     /* alias for digit pointers */
01503 
01504     /* first input */
01505     tmpa = a->dp;
01506 
01507     /* second input */
01508     tmpb = b->dp;
01509 
01510     /* destination */
01511     tmpc = c->dp;
01512 
01513     /* zero the carry */
01514     u = 0;
01515     for (i = 0; i < min; i++) {
01516       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
01517       *tmpc = *tmpa++ + *tmpb++ + u;
01518 
01519       /* U = carry bit of T[i] */
01520       u = *tmpc >> ((mp_digit)DIGIT_BIT);
01521 
01522       /* take away carry bit from T[i] */
01523       *tmpc++ &= MP_MASK;
01524     }
01525 
01526     /* now copy higher words if any, that is in A+B
01527      * if A or B has more digits add those in
01528      */
01529     if (min != max) {
01530       for (; i < max; i++) {
01531         /* T[i] = X[i] + U */
01532         *tmpc = x->dp[i] + u;
01533 
01534         /* U = carry bit of T[i] */
01535         u = *tmpc >> ((mp_digit)DIGIT_BIT);
01536 
01537         /* take away carry bit from T[i] */
01538         *tmpc++ &= MP_MASK;
01539       }
01540     }
01541 
01542     /* add carry */
01543     *tmpc++ = u;
01544 
01545     /* clear digits above olduse */
01546     for (i = c->used; i < olduse; i++) {
01547       *tmpc++ = 0;
01548     }
01549   }
01550 
01551   mp_clamp (c);
01552   return MP_OKAY;
01553 }
01554 
01555 
01556 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
01557 int
01558 s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
01559 {
01560   int     olduse, res, min, max;
01561 
01562   /* find sizes */
01563   min = b->used;
01564   max = a->used;
01565 
01566   /* init result */
01567   if (c->alloc < max) {
01568     if ((res = mp_grow (c, max)) != MP_OKAY) {
01569       return res;
01570     }
01571   }
01572   olduse = c->used;
01573   c->used = max;
01574 
01575   {
01576     register mp_digit u, *tmpa, *tmpb, *tmpc;
01577     register int i;
01578 
01579     /* alias for digit pointers */
01580     tmpa = a->dp;
01581     tmpb = b->dp;
01582     tmpc = c->dp;
01583 
01584     /* set carry to zero */
01585     u = 0;
01586     for (i = 0; i < min; i++) {
01587       /* T[i] = A[i] - B[i] - U */
01588       *tmpc = *tmpa++ - *tmpb++ - u;
01589 
01590       /* U = carry bit of T[i]
01591        * Note this saves performing an AND operation since
01592        * if a carry does occur it will propagate all the way to the
01593        * MSB.  As a result a single shift is enough to get the carry
01594        */
01595       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01596 
01597       /* Clear carry from T[i] */
01598       *tmpc++ &= MP_MASK;
01599     }
01600 
01601     /* now copy higher words if any, e.g. if A has more digits than B  */
01602     for (; i < max; i++) {
01603       /* T[i] = A[i] - U */
01604       *tmpc = *tmpa++ - u;
01605 
01606       /* U = carry bit of T[i] */
01607       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01608 
01609       /* Clear carry from T[i] */
01610       *tmpc++ &= MP_MASK;
01611     }
01612 
01613     /* clear digits above used (since we may not have grown result above) */
01614     for (i = c->used; i < olduse; i++) {
01615       *tmpc++ = 0;
01616     }
01617   }
01618 
01619   mp_clamp (c);
01620   return MP_OKAY;
01621 }
01622 
01623 
01624 /* high level subtraction (handles signs) */
01625 int
01626 mp_sub (mp_int * a, mp_int * b, mp_int * c)
01627 {
01628   int     sa, sb, res;
01629 
01630   sa = a->sign;
01631   sb = b->sign;
01632 
01633   if (sa != sb) {
01634     /* subtract a negative from a positive, OR */
01635     /* subtract a positive from a negative. */
01636     /* In either case, ADD their magnitudes, */
01637     /* and use the sign of the first number. */
01638     c->sign = sa;
01639     res = s_mp_add (a, b, c);
01640   } else {
01641     /* subtract a positive from a positive, OR */
01642     /* subtract a negative from a negative. */
01643     /* First, take the difference between their */
01644     /* magnitudes, then... */
01645     if (mp_cmp_mag (a, b) != MP_LT) {
01646       /* Copy the sign from the first */
01647       c->sign = sa;
01648       /* The first has a larger or equal magnitude */
01649       res = s_mp_sub (a, b, c);
01650     } else {
01651       /* The result has the *opposite* sign from */
01652       /* the first number. */
01653       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
01654       /* The second has a larger magnitude */
01655       res = s_mp_sub (b, a, c);
01656     }
01657   }
01658   return res;
01659 }
01660 
01661 
01662 /* determines if reduce_2k_l can be used */
01663 int mp_reduce_is_2k_l(mp_int *a)
01664 {
01665    int ix, iy;
01666 
01667    if (a->used == 0) {
01668       return MP_NO;
01669    } else if (a->used == 1) {
01670       return MP_YES;
01671    } else if (a->used > 1) {
01672       /* if more than half of the digits are -1 we're sold */
01673       for (iy = ix = 0; ix < a->used; ix++) {
01674           if (a->dp[ix] == MP_MASK) {
01675               ++iy;
01676           }
01677       }
01678       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
01679 
01680    }
01681    return MP_NO;
01682 }
01683 
01684 
01685 /* determines if mp_reduce_2k can be used */
01686 int mp_reduce_is_2k(mp_int *a)
01687 {
01688    int ix, iy, iw;
01689    mp_digit iz;
01690 
01691    if (a->used == 0) {
01692       return MP_NO;
01693    } else if (a->used == 1) {
01694       return MP_YES;
01695    } else if (a->used > 1) {
01696       iy = mp_count_bits(a);
01697       iz = 1;
01698       iw = 1;
01699 
01700       /* Test every bit from the second digit up, must be 1 */
01701       for (ix = DIGIT_BIT; ix < iy; ix++) {
01702           if ((a->dp[iw] & iz) == 0) {
01703              return MP_NO;
01704           }
01705           iz <<= 1;
01706           if (iz > (mp_digit)MP_MASK) {
01707              ++iw;
01708              iz = 1;
01709           }
01710       }
01711    }
01712    return MP_YES;
01713 }
01714 
01715 
01716 /* determines if a number is a valid DR modulus */
01717 int mp_dr_is_modulus(mp_int *a)
01718 {
01719    int ix;
01720 
01721    /* must be at least two digits */
01722    if (a->used < 2) {
01723       return 0;
01724    }
01725 
01726    /* must be of the form b**k - a [a <= b] so all
01727     * but the first digit must be equal to -1 (mod b).
01728     */
01729    for (ix = 1; ix < a->used; ix++) {
01730        if (a->dp[ix] != MP_MASK) {
01731           return 0;
01732        }
01733    }
01734    return 1;
01735 }
01736 
01737 
01738 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
01739  *
01740  * Uses a left-to-right k-ary sliding window to compute the modular
01741  * exponentiation.
01742  * The value of k changes based on the size of the exponent.
01743  *
01744  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
01745  */
01746 
01747 #ifdef MP_LOW_MEM
01748    #define TAB_SIZE 32
01749 #else
01750    #define TAB_SIZE 256
01751 #endif
01752 
01753 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
01754                      int redmode)
01755 {
01756   mp_int res;
01757   mp_digit buf, mp;
01758   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01759 #ifdef WOLFSSL_SMALL_STACK
01760   mp_int* M = NULL;
01761 #else
01762   mp_int M[TAB_SIZE];
01763 #endif
01764   /* use a pointer to the reduction algorithm.  This allows us to use
01765    * one of many reduction algorithms without modding the guts of
01766    * the code with if statements everywhere.
01767    */
01768   int     (*redux)(mp_int*,mp_int*,mp_digit);
01769 
01770 #ifdef WOLFSSL_SMALL_STACK
01771   M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
01772                                                        DYNAMIC_TYPE_TMP_BUFFER);
01773   if (M == NULL)
01774     return MP_MEM;
01775 #endif
01776 
01777   /* find window size */
01778   x = mp_count_bits (X);
01779   if (x <= 7) {
01780     winsize = 2;
01781   } else if (x <= 36) {
01782     winsize = 3;
01783   } else if (x <= 140) {
01784     winsize = 4;
01785   } else if (x <= 450) {
01786     winsize = 5;
01787   } else if (x <= 1303) {
01788     winsize = 6;
01789   } else if (x <= 3529) {
01790     winsize = 7;
01791   } else {
01792     winsize = 8;
01793   }
01794 
01795 #ifdef MP_LOW_MEM
01796   if (winsize > 5) {
01797      winsize = 5;
01798   }
01799 #endif
01800 
01801   /* init M array */
01802   /* init first cell */
01803   if ((err = mp_init(&M[1])) != MP_OKAY) {
01804 #ifdef WOLFSSL_SMALL_STACK
01805   XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01806 #endif
01807 
01808      return err;
01809   }
01810 
01811   /* now init the second half of the array */
01812   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01813     if ((err = mp_init(&M[x])) != MP_OKAY) {
01814       for (y = 1<<(winsize-1); y < x; y++) {
01815         mp_clear (&M[y]);
01816       }
01817       mp_clear(&M[1]);
01818 
01819 #ifdef WOLFSSL_SMALL_STACK
01820       XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01821 #endif
01822 
01823       return err;
01824     }
01825   }
01826 
01827   /* determine and setup reduction code */
01828   if (redmode == 0) {
01829 #ifdef BN_MP_MONTGOMERY_SETUP_C
01830      /* now setup montgomery  */
01831      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
01832         goto LBL_M;
01833      }
01834 #else
01835      err = MP_VAL;
01836      goto LBL_M;
01837 #endif
01838 
01839      /* automatically pick the comba one if available (saves quite a few
01840         calls/ifs) */
01841 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
01842      if (((P->used * 2 + 1) < MP_WARRAY) &&
01843           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
01844         redux = fast_mp_montgomery_reduce;
01845      } else
01846 #endif
01847      {
01848 #ifdef BN_MP_MONTGOMERY_REDUCE_C
01849         /* use slower baseline Montgomery method */
01850         redux = mp_montgomery_reduce;
01851 #else
01852         err = MP_VAL;
01853         goto LBL_M;
01854 #endif
01855      }
01856   } else if (redmode == 1) {
01857 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
01858      /* setup DR reduction for moduli of the form B**k - b */
01859      mp_dr_setup(P, &mp);
01860      redux = mp_dr_reduce;
01861 #else
01862      err = MP_VAL;
01863      goto LBL_M;
01864 #endif
01865   } else {
01866 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
01867      /* setup DR reduction for moduli of the form 2**k - b */
01868      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
01869         goto LBL_M;
01870      }
01871      redux = mp_reduce_2k;
01872 #else
01873      err = MP_VAL;
01874      goto LBL_M;
01875 #endif
01876   }
01877 
01878   /* setup result */
01879   if ((err = mp_init (&res)) != MP_OKAY) {
01880     goto LBL_M;
01881   }
01882 
01883   /* create M table
01884    *
01885 
01886    *
01887    * The first half of the table is not computed though accept for M[0] and M[1]
01888    */
01889 
01890   if (redmode == 0) {
01891 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
01892      /* now we need R mod m */
01893      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
01894        goto LBL_RES;
01895      }
01896 #else
01897      err = MP_VAL;
01898      goto LBL_RES;
01899 #endif
01900 
01901      /* now set M[1] to G * R mod m */
01902      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
01903        goto LBL_RES;
01904      }
01905   } else {
01906      mp_set(&res, 1);
01907      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
01908         goto LBL_RES;
01909      }
01910   }
01911 
01912   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
01913   if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
01914     goto LBL_RES;
01915   }
01916 
01917   for (x = 0; x < (winsize - 1); x++) {
01918     if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
01919                        &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
01920       goto LBL_RES;
01921     }
01922     if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
01923       goto LBL_RES;
01924     }
01925   }
01926 
01927   /* create upper table */
01928   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01929     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
01930       goto LBL_RES;
01931     }
01932     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
01933       goto LBL_RES;
01934     }
01935   }
01936 
01937   /* set initial mode and bit cnt */
01938   mode   = 0;
01939   bitcnt = 1;
01940   buf    = 0;
01941   digidx = X->used - 1;
01942   bitcpy = 0;
01943   bitbuf = 0;
01944 
01945   for (;;) {
01946     /* grab next digit as required */
01947     if (--bitcnt == 0) {
01948       /* if digidx == -1 we are out of digits so break */
01949       if (digidx == -1) {
01950         break;
01951       }
01952       /* read next digit and reset bitcnt */
01953       buf    = X->dp[digidx--];
01954       bitcnt = (int)DIGIT_BIT;
01955     }
01956 
01957     /* grab the next msb from the exponent */
01958     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01959     buf <<= (mp_digit)1;
01960 
01961     /* if the bit is zero and mode == 0 then we ignore it
01962      * These represent the leading zero bits before the first 1 bit
01963      * in the exponent.  Technically this opt is not required but it
01964      * does lower the # of trivial squaring/reductions used
01965      */
01966     if (mode == 0 && y == 0) {
01967       continue;
01968     }
01969 
01970     /* if the bit is zero and mode == 1 then we square */
01971     if (mode == 1 && y == 0) {
01972       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01973         goto LBL_RES;
01974       }
01975       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01976         goto LBL_RES;
01977       }
01978       continue;
01979     }
01980 
01981     /* else we add it to the window */
01982     bitbuf |= (y << (winsize - ++bitcpy));
01983     mode    = 2;
01984 
01985     if (bitcpy == winsize) {
01986       /* ok window is filled so square as required and multiply  */
01987       /* square first */
01988       for (x = 0; x < winsize; x++) {
01989         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01990           goto LBL_RES;
01991         }
01992         if ((err = redux (&res, P, mp)) != MP_OKAY) {
01993           goto LBL_RES;
01994         }
01995       }
01996 
01997       /* then multiply */
01998       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
01999         goto LBL_RES;
02000       }
02001       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02002         goto LBL_RES;
02003       }
02004 
02005       /* empty window and reset */
02006       bitcpy = 0;
02007       bitbuf = 0;
02008       mode   = 1;
02009     }
02010   }
02011 
02012   /* if bits remain then square/multiply */
02013   if (mode == 2 && bitcpy > 0) {
02014     /* square then multiply if the bit is set */
02015     for (x = 0; x < bitcpy; x++) {
02016       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02017         goto LBL_RES;
02018       }
02019       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02020         goto LBL_RES;
02021       }
02022 
02023       /* get next bit of the window */
02024       bitbuf <<= 1;
02025       if ((bitbuf & (1 << winsize)) != 0) {
02026         /* then multiply */
02027         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
02028           goto LBL_RES;
02029         }
02030         if ((err = redux (&res, P, mp)) != MP_OKAY) {
02031           goto LBL_RES;
02032         }
02033       }
02034     }
02035   }
02036 
02037   if (redmode == 0) {
02038      /* fixup result if Montgomery reduction is used
02039       * recall that any value in a Montgomery system is
02040       * actually multiplied by R mod n.  So we have
02041       * to reduce one more time to cancel out the factor
02042       * of R.
02043       */
02044      if ((err = redux(&res, P, mp)) != MP_OKAY) {
02045        goto LBL_RES;
02046      }
02047   }
02048 
02049   /* swap res with Y */
02050   mp_exch (&res, Y);
02051   err = MP_OKAY;
02052 LBL_RES:mp_clear (&res);
02053 LBL_M:
02054   mp_clear(&M[1]);
02055   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
02056     mp_clear (&M[x]);
02057   }
02058 
02059 #ifdef WOLFSSL_SMALL_STACK
02060   XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02061 #endif
02062 
02063   return err;
02064 }
02065 
02066 
02067 /* setups the montgomery reduction stuff */
02068 int
02069 mp_montgomery_setup (mp_int * n, mp_digit * rho)
02070 {
02071   mp_digit x, b;
02072 
02073 /* fast inversion mod 2**k
02074  *
02075  * Based on the fact that
02076  *
02077  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
02078  *                    =>  2*X*A - X*X*A*A = 1
02079  *                    =>  2*(1) - (1)     = 1
02080  */
02081   b = n->dp[0];
02082 
02083   if ((b & 1) == 0) {
02084     return MP_VAL;
02085   }
02086 
02087   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
02088   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
02089 #if !defined(MP_8BIT)
02090   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02091 #endif
02092 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
02093   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02094 #endif
02095 #ifdef MP_64BIT
02096   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
02097 #endif
02098 
02099   /* rho = -1/m mod b */
02100   /* TAO, switched mp_word casts to mp_digit to shut up compiler */
02101   *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
02102 
02103   return MP_OKAY;
02104 }
02105 
02106 
02107 /* computes xR**-1 == x (mod N) via Montgomery Reduction
02108  *
02109  * This is an optimized implementation of montgomery_reduce
02110  * which uses the comba method to quickly calculate the columns of the
02111  * reduction.
02112  *
02113  * Based on Algorithm 14.32 on pp.601 of HAC.
02114 */
02115 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02116 {
02117   int     ix, res, olduse;
02118 #ifdef WOLFSSL_SMALL_STACK
02119   mp_word* W;    /* uses dynamic memory and slower */
02120 #else
02121   mp_word W[MP_WARRAY];
02122 #endif
02123 
02124   /* get old used count */
02125   olduse = x->used;
02126 
02127   /* grow a as required */
02128   if (x->alloc < n->used + 1) {
02129     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
02130       return res;
02131     }
02132   }
02133 
02134 #ifdef WOLFSSL_SMALL_STACK
02135   W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02136   if (W == NULL)
02137     return MP_MEM;
02138 #endif
02139 
02140   /* first we have to get the digits of the input into
02141    * an array of double precision words W[...]
02142    */
02143   {
02144     register mp_word *_W;
02145     register mp_digit *tmpx;
02146 
02147     /* alias for the W[] array */
02148     _W   = W;
02149 
02150     /* alias for the digits of  x*/
02151     tmpx = x->dp;
02152 
02153     /* copy the digits of a into W[0..a->used-1] */
02154     for (ix = 0; ix < x->used; ix++) {
02155       *_W++ = *tmpx++;
02156     }
02157 
02158     /* zero the high words of W[a->used..m->used*2] */
02159     for (; ix < n->used * 2 + 1; ix++) {
02160       *_W++ = 0;
02161     }
02162   }
02163 
02164   /* now we proceed to zero successive digits
02165    * from the least significant upwards
02166    */
02167   for (ix = 0; ix < n->used; ix++) {
02168     /* mu = ai * m' mod b
02169      *
02170      * We avoid a double precision multiplication (which isn't required)
02171      * by casting the value down to a mp_digit.  Note this requires
02172      * that W[ix-1] have  the carry cleared (see after the inner loop)
02173      */
02174     register mp_digit mu;
02175     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
02176 
02177     /* a = a + mu * m * b**i
02178      *
02179      * This is computed in place and on the fly.  The multiplication
02180      * by b**i is handled by offseting which columns the results
02181      * are added to.
02182      *
02183      * Note the comba method normally doesn't handle carries in the
02184      * inner loop In this case we fix the carry from the previous
02185      * column since the Montgomery reduction requires digits of the
02186      * result (so far) [see above] to work.  This is
02187      * handled by fixing up one carry after the inner loop.  The
02188      * carry fixups are done in order so after these loops the
02189      * first m->used words of W[] have the carries fixed
02190      */
02191     {
02192       register int iy;
02193       register mp_digit *tmpn;
02194       register mp_word *_W;
02195 
02196       /* alias for the digits of the modulus */
02197       tmpn = n->dp;
02198 
02199       /* Alias for the columns set by an offset of ix */
02200       _W = W + ix;
02201 
02202       /* inner loop */
02203       for (iy = 0; iy < n->used; iy++) {
02204           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
02205       }
02206     }
02207 
02208     /* now fix carry for next digit, W[ix+1] */
02209     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
02210   }
02211 
02212   /* now we have to propagate the carries and
02213    * shift the words downward [all those least
02214    * significant digits we zeroed].
02215    */
02216   {
02217     register mp_digit *tmpx;
02218     register mp_word *_W, *_W1;
02219 
02220     /* nox fix rest of carries */
02221 
02222     /* alias for current word */
02223     _W1 = W + ix;
02224 
02225     /* alias for next word, where the carry goes */
02226     _W = W + ++ix;
02227 
02228     for (; ix <= n->used * 2 + 1; ix++) {
02229       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
02230     }
02231 
02232     /* copy out, A = A/b**n
02233      *
02234      * The result is A/b**n but instead of converting from an
02235      * array of mp_word to mp_digit than calling mp_rshd
02236      * we just copy them in the right order
02237      */
02238 
02239     /* alias for destination word */
02240     tmpx = x->dp;
02241 
02242     /* alias for shifted double precision result */
02243     _W = W + n->used;
02244 
02245     for (ix = 0; ix < n->used + 1; ix++) {
02246       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
02247     }
02248 
02249     /* zero olduse digits, if the input a was larger than
02250      * m->used+1 we'll have to clear the digits
02251      */
02252     for (; ix < olduse; ix++) {
02253       *tmpx++ = 0;
02254     }
02255   }
02256 
02257   /* set the max used and clamp */
02258   x->used = n->used + 1;
02259   mp_clamp (x);
02260 
02261 #ifdef WOLFSSL_SMALL_STACK
02262   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02263 #endif
02264 
02265   /* if A >= m then A = A - m */
02266   if (mp_cmp_mag (x, n) != MP_LT) {
02267     return s_mp_sub (x, n, x);
02268   }
02269   return MP_OKAY;
02270 }
02271 
02272 
02273 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
02274 int
02275 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02276 {
02277   int     ix, res, digs;
02278   mp_digit mu;
02279 
02280   /* can the fast reduction [comba] method be used?
02281    *
02282    * Note that unlike in mul you're safely allowed *less*
02283    * than the available columns [255 per default] since carries
02284    * are fixed up in the inner loop.
02285    */
02286   digs = n->used * 2 + 1;
02287   if ((digs < MP_WARRAY) &&
02288       n->used <
02289       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02290     return fast_mp_montgomery_reduce (x, n, rho);
02291   }
02292 
02293   /* grow the input as required */
02294   if (x->alloc < digs) {
02295     if ((res = mp_grow (x, digs)) != MP_OKAY) {
02296       return res;
02297     }
02298   }
02299   x->used = digs;
02300 
02301   for (ix = 0; ix < n->used; ix++) {
02302     /* mu = ai * rho mod b
02303      *
02304      * The value of rho must be precalculated via
02305      * montgomery_setup() such that
02306      * it equals -1/n0 mod b this allows the
02307      * following inner loop to reduce the
02308      * input one digit at a time
02309      */
02310     mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
02311 
02312     /* a = a + mu * m * b**i */
02313     {
02314       register int iy;
02315       register mp_digit *tmpn, *tmpx, u;
02316       register mp_word r;
02317 
02318       /* alias for digits of the modulus */
02319       tmpn = n->dp;
02320 
02321       /* alias for the digits of x [the input] */
02322       tmpx = x->dp + ix;
02323 
02324       /* set the carry to zero */
02325       u = 0;
02326 
02327       /* Multiply and add in place */
02328       for (iy = 0; iy < n->used; iy++) {
02329         /* compute product and sum */
02330         r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
02331                   ((mp_word) u) + ((mp_word) * tmpx);
02332 
02333         /* get carry */
02334         u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02335 
02336         /* fix digit */
02337         *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
02338       }
02339       /* At this point the ix'th digit of x should be zero */
02340 
02341 
02342       /* propagate carries upwards as required*/
02343       while (u) {
02344         *tmpx   += u;
02345         u        = *tmpx >> DIGIT_BIT;
02346         *tmpx++ &= MP_MASK;
02347       }
02348     }
02349   }
02350 
02351   /* at this point the n.used'th least
02352    * significant digits of x are all zero
02353    * which means we can shift x to the
02354    * right by n.used digits and the
02355    * residue is unchanged.
02356    */
02357 
02358   /* x = x/b**n.used */
02359   mp_clamp(x);
02360   mp_rshd (x, n->used);
02361 
02362   /* if x >= n then x = x - n */
02363   if (mp_cmp_mag (x, n) != MP_LT) {
02364     return s_mp_sub (x, n, x);
02365   }
02366 
02367   return MP_OKAY;
02368 }
02369 
02370 
02371 /* determines the setup value */
02372 void mp_dr_setup(mp_int *a, mp_digit *d)
02373 {
02374    /* the casts are required if DIGIT_BIT is one less than
02375     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
02376     */
02377    *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
02378         ((mp_word)a->dp[0]));
02379 }
02380 
02381 
02382 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
02383  *
02384  * Based on algorithm from the paper
02385  *
02386  * "Generating Efficient Primes for Discrete Log Cryptosystems"
02387  *                 Chae Hoon Lim, Pil Joong Lee,
02388  *          POSTECH Information Research Laboratories
02389  *
02390  * The modulus must be of a special format [see manual]
02391  *
02392  * Has been modified to use algorithm 7.10 from the LTM book instead
02393  *
02394  * Input x must be in the range 0 <= x <= (n-1)**2
02395  */
02396 int
02397 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
02398 {
02399   int      err, i, m;
02400   mp_word  r;
02401   mp_digit mu, *tmpx1, *tmpx2;
02402 
02403   /* m = digits in modulus */
02404   m = n->used;
02405 
02406   /* ensure that "x" has at least 2m digits */
02407   if (x->alloc < m + m) {
02408     if ((err = mp_grow (x, m + m)) != MP_OKAY) {
02409       return err;
02410     }
02411   }
02412 
02413 /* top of loop, this is where the code resumes if
02414  * another reduction pass is required.
02415  */
02416 top:
02417   /* aliases for digits */
02418   /* alias for lower half of x */
02419   tmpx1 = x->dp;
02420 
02421   /* alias for upper half of x, or x/B**m */
02422   tmpx2 = x->dp + m;
02423 
02424   /* set carry to zero */
02425   mu = 0;
02426 
02427   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
02428   for (i = 0; i < m; i++) {
02429       r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
02430       *tmpx1++  = (mp_digit)(r & MP_MASK);
02431       mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
02432   }
02433 
02434   /* set final carry */
02435   *tmpx1++ = mu;
02436 
02437   /* zero words above m */
02438   for (i = m + 1; i < x->used; i++) {
02439       *tmpx1++ = 0;
02440   }
02441 
02442   /* clamp, sub and return */
02443   mp_clamp (x);
02444 
02445   /* if x >= n then subtract and reduce again
02446    * Each successive "recursion" makes the input smaller and smaller.
02447    */
02448   if (mp_cmp_mag (x, n) != MP_LT) {
02449     s_mp_sub(x, n, x);
02450     goto top;
02451   }
02452   return MP_OKAY;
02453 }
02454 
02455 
02456 /* reduces a modulo n where n is of the form 2**p - d */
02457 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
02458 {
02459    mp_int q;
02460    int    p, res;
02461 
02462    if ((res = mp_init(&q)) != MP_OKAY) {
02463       return res;
02464    }
02465 
02466    p = mp_count_bits(n);
02467 top:
02468    /* q = a/2**p, a = a mod 2**p */
02469    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
02470       goto ERR;
02471    }
02472 
02473    if (d != 1) {
02474       /* q = q * d */
02475       if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
02476          goto ERR;
02477       }
02478    }
02479 
02480    /* a = a + q */
02481    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
02482       goto ERR;
02483    }
02484 
02485    if (mp_cmp_mag(a, n) != MP_LT) {
02486       s_mp_sub(a, n, a);
02487       goto top;
02488    }
02489 
02490 ERR:
02491    mp_clear(&q);
02492    return res;
02493 }
02494 
02495 
02496 /* determines the setup value */
02497 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
02498 {
02499    int res, p;
02500    mp_int tmp;
02501 
02502    if ((res = mp_init(&tmp)) != MP_OKAY) {
02503       return res;
02504    }
02505 
02506    p = mp_count_bits(a);
02507    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
02508       mp_clear(&tmp);
02509       return res;
02510    }
02511 
02512    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
02513       mp_clear(&tmp);
02514       return res;
02515    }
02516 
02517    *d = tmp.dp[0];
02518    mp_clear(&tmp);
02519    return MP_OKAY;
02520 }
02521 
02522 
02523 /* set the b bit of a */
02524 int
02525 mp_set_bit (mp_int * a, int b)
02526 {
02527     int i = b / DIGIT_BIT, res;
02528 
02529     if (a->used < (int)(i + 1)) {
02530         /* grow a to accommodate the single bit */
02531         if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
02532             return res;
02533         }
02534 
02535         /* set the used count of where the bit will go */
02536         a->used = (int)(i + 1);
02537     }
02538 
02539     /* put the single bit in its place */
02540     a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT);
02541 
02542     return MP_OKAY;
02543 }
02544 
02545 /* computes a = 2**b
02546  *
02547  * Simple algorithm which zeros the int, set the required bit
02548  */
02549 int
02550 mp_2expt (mp_int * a, int b)
02551 {
02552     /* zero a as per default */
02553     mp_zero (a);
02554 
02555     return mp_set_bit(a, b);
02556 }
02557 
02558 /* multiply by a digit */
02559 int
02560 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02561 {
02562   mp_digit u, *tmpa, *tmpc;
02563   mp_word  r;
02564   int      ix, res, olduse;
02565 
02566   /* make sure c is big enough to hold a*b */
02567   if (c->alloc < a->used + 1) {
02568     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
02569       return res;
02570     }
02571   }
02572 
02573   /* get the original destinations used count */
02574   olduse = c->used;
02575 
02576   /* set the sign */
02577   c->sign = a->sign;
02578 
02579   /* alias for a->dp [source] */
02580   tmpa = a->dp;
02581 
02582   /* alias for c->dp [dest] */
02583   tmpc = c->dp;
02584 
02585   /* zero carry */
02586   u = 0;
02587 
02588   /* compute columns */
02589   for (ix = 0; ix < a->used; ix++) {
02590     /* compute product and carry sum for this term */
02591     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
02592 
02593     /* mask off higher bits to get a single digit */
02594     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
02595 
02596     /* send carry into next iteration */
02597     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02598   }
02599 
02600   /* store final carry [if any] and increment ix offset  */
02601   *tmpc++ = u;
02602   ++ix;
02603 
02604   /* now zero digits above the top */
02605   while (ix++ < olduse) {
02606      *tmpc++ = 0;
02607   }
02608 
02609   /* set used count */
02610   c->used = a->used + 1;
02611   mp_clamp(c);
02612 
02613   return MP_OKAY;
02614 }
02615 
02616 
02617 /* d = a * b (mod c) */
02618 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02619 {
02620   int     res;
02621   mp_int  t;
02622 
02623   if ((res = mp_init (&t)) != MP_OKAY) {
02624     return res;
02625   }
02626 
02627   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
02628     mp_clear (&t);
02629     return res;
02630   }
02631   res = mp_mod (&t, c, d);
02632   mp_clear (&t);
02633   return res;
02634 }
02635 
02636 
02637 /* computes b = a*a */
02638 int
02639 mp_sqr (mp_int * a, mp_int * b)
02640 {
02641   int     res;
02642 
02643   {
02644 #ifdef BN_FAST_S_MP_SQR_C
02645     /* can we use the fast comba multiplier? */
02646     if ((a->used * 2 + 1) < MP_WARRAY &&
02647          a->used <
02648          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
02649       res = fast_s_mp_sqr (a, b);
02650     } else
02651 #endif
02652 #ifdef BN_S_MP_SQR_C
02653       res = s_mp_sqr (a, b);
02654 #else
02655       res = MP_VAL;
02656 #endif
02657   }
02658   b->sign = MP_ZPOS;
02659   return res;
02660 }
02661 
02662 
02663 /* high level multiplication (handles sign) */
02664 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02665 {
02666   int     res, neg;
02667   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
02668 
02669   {
02670     /* can we use the fast multiplier?
02671      *
02672      * The fast multiplier can be used if the output will
02673      * have less than MP_WARRAY digits and the number of
02674      * digits won't affect carry propagation
02675      */
02676     int     digs = a->used + b->used + 1;
02677 
02678 #ifdef BN_FAST_S_MP_MUL_DIGS_C
02679     if ((digs < MP_WARRAY) &&
02680         MIN(a->used, b->used) <=
02681         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02682       res = fast_s_mp_mul_digs (a, b, c, digs);
02683     } else
02684 #endif
02685 #ifdef BN_S_MP_MUL_DIGS_C
02686       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
02687 #else
02688       res = MP_VAL;
02689 #endif
02690 
02691   }
02692   c->sign = (c->used > 0) ? neg : MP_ZPOS;
02693   return res;
02694 }
02695 
02696 
02697 /* b = a*2 */
02698 int mp_mul_2(mp_int * a, mp_int * b)
02699 {
02700   int     x, res, oldused;
02701 
02702   /* grow to accommodate result */
02703   if (b->alloc < a->used + 1) {
02704     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
02705       return res;
02706     }
02707   }
02708 
02709   oldused = b->used;
02710   b->used = a->used;
02711 
02712   {
02713     register mp_digit r, rr, *tmpa, *tmpb;
02714 
02715     /* alias for source */
02716     tmpa = a->dp;
02717 
02718     /* alias for dest */
02719     tmpb = b->dp;
02720 
02721     /* carry */
02722     r = 0;
02723     for (x = 0; x < a->used; x++) {
02724 
02725       /* get what will be the *next* carry bit from the
02726        * MSB of the current digit
02727        */
02728       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
02729 
02730       /* now shift up this digit, add in the carry [from the previous] */
02731       *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK);
02732 
02733       /* copy the carry that would be from the source
02734        * digit into the next iteration
02735        */
02736       r = rr;
02737     }
02738 
02739     /* new leading digit? */
02740     if (r != 0) {
02741       /* add a MSB which is always 1 at this point */
02742       *tmpb = 1;
02743       ++(b->used);
02744     }
02745 
02746     /* now zero any excess digits on the destination
02747      * that we didn't write to
02748      */
02749     tmpb = b->dp + b->used;
02750     for (x = b->used; x < oldused; x++) {
02751       *tmpb++ = 0;
02752     }
02753   }
02754   b->sign = a->sign;
02755   return MP_OKAY;
02756 }
02757 
02758 
02759 /* divide by three (based on routine from MPI and the GMP manual) */
02760 int
02761 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
02762 {
02763   mp_int   q;
02764   mp_word  w, t;
02765   mp_digit b;
02766   int      res, ix;
02767 
02768   /* b = 2**DIGIT_BIT / 3 */
02769   b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
02770 
02771   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
02772      return res;
02773   }
02774 
02775   q.used = a->used;
02776   q.sign = a->sign;
02777   w = 0;
02778   for (ix = a->used - 1; ix >= 0; ix--) {
02779      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
02780 
02781      if (w >= 3) {
02782         /* multiply w by [1/3] */
02783         t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
02784 
02785         /* now subtract 3 * [w/3] from w, to get the remainder */
02786         w -= t+t+t;
02787 
02788         /* fixup the remainder as required since
02789          * the optimization is not exact.
02790          */
02791         while (w >= 3) {
02792            t += 1;
02793            w -= 3;
02794         }
02795       } else {
02796         t = 0;
02797       }
02798       q.dp[ix] = (mp_digit)t;
02799   }
02800 
02801   /* [optional] store the remainder */
02802   if (d != NULL) {
02803      *d = (mp_digit)w;
02804   }
02805 
02806   /* [optional] store the quotient */
02807   if (c != NULL) {
02808      mp_clamp(&q);
02809      mp_exch(&q, c);
02810   }
02811   mp_clear(&q);
02812 
02813   return res;
02814 }
02815 
02816 
02817 /* init an mp_init for a given size */
02818 int mp_init_size (mp_int * a, int size)
02819 {
02820   int x;
02821 
02822   /* pad size so there are always extra digits */
02823   size += (MP_PREC * 2) - (size % MP_PREC);
02824 
02825   /* alloc mem */
02826   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
02827                                       DYNAMIC_TYPE_BIGINT);
02828   if (a->dp == NULL) {
02829     return MP_MEM;
02830   }
02831 
02832   /* set the members */
02833   a->used  = 0;
02834   a->alloc = size;
02835   a->sign  = MP_ZPOS;
02836 
02837   /* zero the digits */
02838   for (x = 0; x < size; x++) {
02839       a->dp[x] = 0;
02840   }
02841 
02842   return MP_OKAY;
02843 }
02844 
02845 
02846 /* the jist of squaring...
02847  * you do like mult except the offset of the tmpx [one that
02848  * starts closer to zero] can't equal the offset of tmpy.
02849  * So basically you set up iy like before then you min it with
02850  * (ty-tx) so that it never happens.  You double all those
02851  * you add in the inner loop
02852 
02853 After that loop you do the squares and add them in.
02854 */
02855 
02856 int fast_s_mp_sqr (mp_int * a, mp_int * b)
02857 {
02858   int       olduse, res, pa, ix, iz;
02859 #ifdef WOLFSSL_SMALL_STACK
02860   mp_digit* W;    /* uses dynamic memory and slower */
02861 #else
02862   mp_digit W[MP_WARRAY];
02863 #endif
02864   mp_digit  *tmpx;
02865   mp_word   W1;
02866 
02867   /* grow the destination as required */
02868   pa = a->used + a->used;
02869   if (b->alloc < pa) {
02870     if ((res = mp_grow (b, pa)) != MP_OKAY) {
02871       return res;
02872     }
02873   }
02874 
02875   if (pa > MP_WARRAY)
02876     return MP_RANGE;  /* TAO range check */
02877 
02878 #ifdef WOLFSSL_SMALL_STACK
02879   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02880   if (W == NULL)
02881     return MP_MEM;
02882 #endif
02883 
02884   /* number of output digits to produce */
02885   W1 = 0;
02886   for (ix = 0; ix < pa; ix++) {
02887       int      tx, ty, iy;
02888       mp_word  _W;
02889       mp_digit *tmpy;
02890 
02891       /* clear counter */
02892       _W = 0;
02893 
02894       /* get offsets into the two bignums */
02895       ty = MIN(a->used-1, ix);
02896       tx = ix - ty;
02897 
02898       /* setup temp aliases */
02899       tmpx = a->dp + tx;
02900       tmpy = a->dp + ty;
02901 
02902       /* this is the number of times the loop will iterate, essentially
02903          while (tx++ < a->used && ty-- >= 0) { ... }
02904        */
02905       iy = MIN(a->used-tx, ty+1);
02906 
02907       /* now for squaring tx can never equal ty
02908        * we halve the distance since they approach at a rate of 2x
02909        * and we have to round because odd cases need to be executed
02910        */
02911       iy = MIN(iy, (ty-tx+1)>>1);
02912 
02913       /* execute loop */
02914       for (iz = 0; iz < iy; iz++) {
02915          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02916       }
02917 
02918       /* double the inner product and add carry */
02919       _W = _W + _W + W1;
02920 
02921       /* even columns have the square term in them */
02922       if ((ix&1) == 0) {
02923          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
02924       }
02925 
02926       /* store it */
02927       W[ix] = (mp_digit)(_W & MP_MASK);
02928 
02929       /* make next carry */
02930       W1 = _W >> ((mp_word)DIGIT_BIT);
02931   }
02932 
02933   /* setup dest */
02934   olduse  = b->used;
02935   b->used = a->used+a->used;
02936 
02937   {
02938     mp_digit *tmpb;
02939     tmpb = b->dp;
02940     for (ix = 0; ix < pa; ix++) {
02941       *tmpb++ = (mp_digit)(W[ix] & MP_MASK);
02942     }
02943 
02944     /* clear unused digits [that existed in the old copy of c] */
02945     for (; ix < olduse; ix++) {
02946       *tmpb++ = 0;
02947     }
02948   }
02949   mp_clamp (b);
02950 
02951 #ifdef WOLFSSL_SMALL_STACK
02952   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02953 #endif
02954 
02955   return MP_OKAY;
02956 }
02957 
02958 
02959 /* Fast (comba) multiplier
02960  *
02961  * This is the fast column-array [comba] multiplier.  It is
02962  * designed to compute the columns of the product first
02963  * then handle the carries afterwards.  This has the effect
02964  * of making the nested loops that compute the columns very
02965  * simple and schedulable on super-scalar processors.
02966  *
02967  * This has been modified to produce a variable number of
02968  * digits of output so if say only a half-product is required
02969  * you don't have to compute the upper half (a feature
02970  * required for fast Barrett reduction).
02971  *
02972  * Based on Algorithm 14.12 on pp.595 of HAC.
02973  *
02974  */
02975 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02976 {
02977   int     olduse, res, pa, ix, iz;
02978 #ifdef WOLFSSL_SMALL_STACK
02979   mp_digit* W;    /* uses dynamic memory and slower */
02980 #else
02981   mp_digit W[MP_WARRAY];
02982 #endif
02983   register mp_word  _W;
02984 
02985   /* grow the destination as required */
02986   if (c->alloc < digs) {
02987     if ((res = mp_grow (c, digs)) != MP_OKAY) {
02988       return res;
02989     }
02990   }
02991 
02992   /* number of output digits to produce */
02993   pa = MIN(digs, a->used + b->used);
02994   if (pa > MP_WARRAY)
02995     return MP_RANGE;  /* TAO range check */
02996 
02997 #ifdef WOLFSSL_SMALL_STACK
02998   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02999   if (W == NULL)
03000     return MP_MEM;
03001 #endif
03002 
03003   /* clear the carry */
03004   _W = 0;
03005   for (ix = 0; ix < pa; ix++) {
03006       int      tx, ty;
03007       int      iy;
03008       mp_digit *tmpx, *tmpy;
03009 
03010       /* get offsets into the two bignums */
03011       ty = MIN(b->used-1, ix);
03012       tx = ix - ty;
03013 
03014       /* setup temp aliases */
03015       tmpx = a->dp + tx;
03016       tmpy = b->dp + ty;
03017 
03018       /* this is the number of times the loop will iterate, essentially
03019          while (tx++ < a->used && ty-- >= 0) { ... }
03020        */
03021       iy = MIN(a->used-tx, ty+1);
03022 
03023       /* execute loop */
03024       for (iz = 0; iz < iy; ++iz) {
03025          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03026 
03027       }
03028 
03029       /* store term */
03030       W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
03031 
03032       /* make next carry */
03033       _W = _W >> ((mp_word)DIGIT_BIT);
03034  }
03035 
03036   /* setup dest */
03037   olduse  = c->used;
03038   c->used = pa;
03039 
03040   {
03041     register mp_digit *tmpc;
03042     tmpc = c->dp;
03043     for (ix = 0; ix < pa+1; ix++) {
03044       /* now extract the previous digit [below the carry] */
03045       *tmpc++ = W[ix];
03046     }
03047 
03048     /* clear unused digits [that existed in the old copy of c] */
03049     for (; ix < olduse; ix++) {
03050       *tmpc++ = 0;
03051     }
03052   }
03053   mp_clamp (c);
03054 
03055 #ifdef WOLFSSL_SMALL_STACK
03056   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
03057 #endif
03058 
03059   return MP_OKAY;
03060 }
03061 
03062 
03063 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
03064 int s_mp_sqr (mp_int * a, mp_int * b)
03065 {
03066   mp_int  t;
03067   int     res, ix, iy, pa;
03068   mp_word r;
03069   mp_digit u, tmpx, *tmpt;
03070 
03071   pa = a->used;
03072   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
03073     return res;
03074   }
03075 
03076   /* default used is maximum possible size */
03077   t.used = 2*pa + 1;
03078 
03079   for (ix = 0; ix < pa; ix++) {
03080     /* first calculate the digit at 2*ix */
03081     /* calculate double precision result */
03082     r = ((mp_word) t.dp[2*ix]) +
03083         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
03084 
03085     /* store lower part in result */
03086     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
03087 
03088     /* get the carry */
03089     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03090 
03091     /* left hand side of A[ix] * A[iy] */
03092     tmpx        = a->dp[ix];
03093 
03094     /* alias for where to store the results */
03095     tmpt        = t.dp + (2*ix + 1);
03096 
03097     for (iy = ix + 1; iy < pa; iy++) {
03098       /* first calculate the product */
03099       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
03100 
03101       /* now calculate the double precision result, note we use
03102        * addition instead of *2 since it's easier to optimize
03103        */
03104       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
03105 
03106       /* store lower part */
03107       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03108 
03109       /* get carry */
03110       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03111     }
03112     /* propagate upwards */
03113     while (u != ((mp_digit) 0)) {
03114       r       = ((mp_word) *tmpt) + ((mp_word) u);
03115       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03116       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03117     }
03118   }
03119 
03120   mp_clamp (&t);
03121   mp_exch (&t, b);
03122   mp_clear (&t);
03123   return MP_OKAY;
03124 }
03125 
03126 
03127 /* multiplies |a| * |b| and only computes up to digs digits of result
03128  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
03129  * many digits of output are created.
03130  */
03131 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03132 {
03133   mp_int  t;
03134   int     res, pa, pb, ix, iy;
03135   mp_digit u;
03136   mp_word r;
03137   mp_digit tmpx, *tmpt, *tmpy;
03138 
03139   /* can we use the fast multiplier? */
03140   if (((digs) < MP_WARRAY) &&
03141       MIN (a->used, b->used) <
03142           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03143     return fast_s_mp_mul_digs (a, b, c, digs);
03144   }
03145 
03146   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
03147     return res;
03148   }
03149   t.used = digs;
03150 
03151   /* compute the digits of the product directly */
03152   pa = a->used;
03153   for (ix = 0; ix < pa; ix++) {
03154     /* set the carry to zero */
03155     u = 0;
03156 
03157     /* limit ourselves to making digs digits of output */
03158     pb = MIN (b->used, digs - ix);
03159 
03160     /* setup some aliases */
03161     /* copy of the digit from a used within the nested loop */
03162     tmpx = a->dp[ix];
03163 
03164     /* an alias for the destination shifted ix places */
03165     tmpt = t.dp + ix;
03166 
03167     /* an alias for the digits of b */
03168     tmpy = b->dp;
03169 
03170     /* compute the columns of the output and propagate the carry */
03171     for (iy = 0; iy < pb; iy++) {
03172       /* compute the column as a mp_word */
03173       r       = ((mp_word)*tmpt) +
03174                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03175                 ((mp_word) u);
03176 
03177       /* the new column is the lower part of the result */
03178       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03179 
03180       /* get the carry word from the result */
03181       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03182     }
03183     /* set carry if it is placed below digs */
03184     if (ix + iy < digs) {
03185       *tmpt = u;
03186     }
03187   }
03188 
03189   mp_clamp (&t);
03190   mp_exch (&t, c);
03191 
03192   mp_clear (&t);
03193   return MP_OKAY;
03194 }
03195 
03196 
03197 /*
03198  * shifts with subtractions when the result is greater than b.
03199  *
03200  * The method is slightly modified to shift B unconditionally up to just under
03201  * the leading bit of b.  This saves a lot of multiple precision shifting.
03202  */
03203 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
03204 {
03205   int     x, bits, res;
03206 
03207   /* how many bits of last digit does b use */
03208   bits = mp_count_bits (b) % DIGIT_BIT;
03209 
03210   if (b->used > 1) {
03211      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
03212          != MP_OKAY) {
03213         return res;
03214      }
03215   } else {
03216      mp_set(a, 1);
03217      bits = 1;
03218   }
03219 
03220 
03221   /* now compute C = A * B mod b */
03222   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
03223     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
03224       return res;
03225     }
03226     if (mp_cmp_mag (a, b) != MP_LT) {
03227       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
03228         return res;
03229       }
03230     }
03231   }
03232 
03233   return MP_OKAY;
03234 }
03235 
03236 
03237 #ifdef MP_LOW_MEM
03238    #define TAB_SIZE 32
03239 #else
03240    #define TAB_SIZE 256
03241 #endif
03242 
03243 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
03244 {
03245   mp_int  M[TAB_SIZE], res, mu;
03246   mp_digit buf;
03247   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
03248   int (*redux)(mp_int*,mp_int*,mp_int*);
03249 
03250   /* find window size */
03251   x = mp_count_bits (X);
03252   if (x <= 7) {
03253     winsize = 2;
03254   } else if (x <= 36) {
03255     winsize = 3;
03256   } else if (x <= 140) {
03257     winsize = 4;
03258   } else if (x <= 450) {
03259     winsize = 5;
03260   } else if (x <= 1303) {
03261     winsize = 6;
03262   } else if (x <= 3529) {
03263     winsize = 7;
03264   } else {
03265     winsize = 8;
03266   }
03267 
03268 #ifdef MP_LOW_MEM
03269     if (winsize > 5) {
03270        winsize = 5;
03271     }
03272 #endif
03273 
03274   /* init M array */
03275   /* init first cell */
03276   if ((err = mp_init(&M[1])) != MP_OKAY) {
03277      return err;
03278   }
03279 
03280   /* now init the second half of the array */
03281   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03282     if ((err = mp_init(&M[x])) != MP_OKAY) {
03283       for (y = 1<<(winsize-1); y < x; y++) {
03284         mp_clear (&M[y]);
03285       }
03286       mp_clear(&M[1]);
03287       return err;
03288     }
03289   }
03290 
03291   /* create mu, used for Barrett reduction */
03292   if ((err = mp_init (&mu)) != MP_OKAY) {
03293     goto LBL_M;
03294   }
03295 
03296   if (redmode == 0) {
03297      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
03298         goto LBL_MU;
03299      }
03300      redux = mp_reduce;
03301   } else {
03302      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
03303         goto LBL_MU;
03304      }
03305      redux = mp_reduce_2k_l;
03306   }
03307 
03308   /* create M table
03309    *
03310    * The M table contains powers of the base,
03311    * e.g. M[x] = G**x mod P
03312    *
03313    * The first half of the table is not
03314    * computed though accept for M[0] and M[1]
03315    */
03316   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
03317     goto LBL_MU;
03318   }
03319 
03320   /* compute the value at M[1<<(winsize-1)] by squaring
03321    * M[1] (winsize-1) times
03322    */
03323   if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
03324     goto LBL_MU;
03325   }
03326 
03327   for (x = 0; x < (winsize - 1); x++) {
03328     /* square it */
03329     if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
03330                        &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
03331       goto LBL_MU;
03332     }
03333 
03334     /* reduce modulo P */
03335     if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
03336       goto LBL_MU;
03337     }
03338   }
03339 
03340   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
03341    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
03342    */
03343   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
03344     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
03345       goto LBL_MU;
03346     }
03347     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
03348       goto LBL_MU;
03349     }
03350   }
03351 
03352   /* setup result */
03353   if ((err = mp_init (&res)) != MP_OKAY) {
03354     goto LBL_MU;
03355   }
03356   mp_set (&res, 1);
03357 
03358   /* set initial mode and bit cnt */
03359   mode   = 0;
03360   bitcnt = 1;
03361   buf    = 0;
03362   digidx = X->used - 1;
03363   bitcpy = 0;
03364   bitbuf = 0;
03365 
03366   for (;;) {
03367     /* grab next digit as required */
03368     if (--bitcnt == 0) {
03369       /* if digidx == -1 we are out of digits */
03370       if (digidx == -1) {
03371         break;
03372       }
03373       /* read next digit and reset the bitcnt */
03374       buf    = X->dp[digidx--];
03375       bitcnt = (int) DIGIT_BIT;
03376     }
03377 
03378     /* grab the next msb from the exponent */
03379     y     = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
03380     buf <<= (mp_digit)1;
03381 
03382     /* if the bit is zero and mode == 0 then we ignore it
03383      * These represent the leading zero bits before the first 1 bit
03384      * in the exponent.  Technically this opt is not required but it
03385      * does lower the # of trivial squaring/reductions used
03386      */
03387     if (mode == 0 && y == 0) {
03388       continue;
03389     }
03390 
03391     /* if the bit is zero and mode == 1 then we square */
03392     if (mode == 1 && y == 0) {
03393       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03394         goto LBL_RES;
03395       }
03396       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03397         goto LBL_RES;
03398       }
03399       continue;
03400     }
03401 
03402     /* else we add it to the window */
03403     bitbuf |= (y << (winsize - ++bitcpy));
03404     mode    = 2;
03405 
03406     if (bitcpy == winsize) {
03407       /* ok window is filled so square as required and multiply  */
03408       /* square first */
03409       for (x = 0; x < winsize; x++) {
03410         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03411           goto LBL_RES;
03412         }
03413         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03414           goto LBL_RES;
03415         }
03416       }
03417 
03418       /* then multiply */
03419       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
03420         goto LBL_RES;
03421       }
03422       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03423         goto LBL_RES;
03424       }
03425 
03426       /* empty window and reset */
03427       bitcpy = 0;
03428       bitbuf = 0;
03429       mode   = 1;
03430     }
03431   }
03432 
03433   /* if bits remain then square/multiply */
03434   if (mode == 2 && bitcpy > 0) {
03435     /* square then multiply if the bit is set */
03436     for (x = 0; x < bitcpy; x++) {
03437       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03438         goto LBL_RES;
03439       }
03440       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03441         goto LBL_RES;
03442       }
03443 
03444       bitbuf <<= 1;
03445       if ((bitbuf & (1 << winsize)) != 0) {
03446         /* then multiply */
03447         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
03448           goto LBL_RES;
03449         }
03450         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03451           goto LBL_RES;
03452         }
03453       }
03454     }
03455   }
03456 
03457   mp_exch (&res, Y);
03458   err = MP_OKAY;
03459 LBL_RES:mp_clear (&res);
03460 LBL_MU:mp_clear (&mu);
03461 LBL_M:
03462   mp_clear(&M[1]);
03463   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03464     mp_clear (&M[x]);
03465   }
03466   return err;
03467 }
03468 
03469 
03470 /* pre-calculate the value required for Barrett reduction
03471  * For a given modulus "b" it calculates the value required in "a"
03472  */
03473 int mp_reduce_setup (mp_int * a, mp_int * b)
03474 {
03475   int     res;
03476 
03477   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
03478     return res;
03479   }
03480   return mp_div (a, b, a, NULL);
03481 }
03482 
03483 
03484 /* reduces x mod m, assumes 0 < x < m**2, mu is
03485  * precomputed via mp_reduce_setup.
03486  * From HAC pp.604 Algorithm 14.42
03487  */
03488 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
03489 {
03490   mp_int  q;
03491   int     res, um = m->used;
03492 
03493   /* q = x */
03494   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
03495     return res;
03496   }
03497 
03498   /* q1 = x / b**(k-1)  */
03499   mp_rshd (&q, um - 1);
03500 
03501   /* according to HAC this optimization is ok */
03502   if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
03503     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
03504       goto CLEANUP;
03505     }
03506   } else {
03507 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
03508     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03509       goto CLEANUP;
03510     }
03511 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
03512     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03513       goto CLEANUP;
03514     }
03515 #else
03516     {
03517       res = MP_VAL;
03518       goto CLEANUP;
03519     }
03520 #endif
03521   }
03522 
03523   /* q3 = q2 / b**(k+1) */
03524   mp_rshd (&q, um + 1);
03525 
03526   /* x = x mod b**(k+1), quick (no division) */
03527   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
03528     goto CLEANUP;
03529   }
03530 
03531   /* q = q * m mod b**(k+1), quick (no division) */
03532   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
03533     goto CLEANUP;
03534   }
03535 
03536   /* x = x - q */
03537   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
03538     goto CLEANUP;
03539   }
03540 
03541   /* If x < 0, add b**(k+1) to it */
03542   if (mp_cmp_d (x, 0) == MP_LT) {
03543     mp_set (&q, 1);
03544     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
03545       goto CLEANUP;
03546     if ((res = mp_add (x, &q, x)) != MP_OKAY)
03547       goto CLEANUP;
03548   }
03549 
03550   /* Back off if it's too big */
03551   while (mp_cmp (x, m) != MP_LT) {
03552     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
03553       goto CLEANUP;
03554     }
03555   }
03556 
03557 CLEANUP:
03558   mp_clear (&q);
03559 
03560   return res;
03561 }
03562 
03563 
03564 /* reduces a modulo n where n is of the form 2**p - d
03565    This differs from reduce_2k since "d" can be larger
03566    than a single digit.
03567 */
03568 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
03569 {
03570    mp_int q;
03571    int    p, res;
03572 
03573    if ((res = mp_init(&q)) != MP_OKAY) {
03574       return res;
03575    }
03576 
03577    p = mp_count_bits(n);
03578 top:
03579    /* q = a/2**p, a = a mod 2**p */
03580    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
03581       goto ERR;
03582    }
03583 
03584    /* q = q * d */
03585    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
03586       goto ERR;
03587    }
03588 
03589    /* a = a + q */
03590    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
03591       goto ERR;
03592    }
03593 
03594    if (mp_cmp_mag(a, n) != MP_LT) {
03595       s_mp_sub(a, n, a);
03596       goto top;
03597    }
03598 
03599 ERR:
03600    mp_clear(&q);
03601    return res;
03602 }
03603 
03604 
03605 /* determines the setup value */
03606 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
03607 {
03608    int    res;
03609    mp_int tmp;
03610 
03611    if ((res = mp_init(&tmp)) != MP_OKAY) {
03612       return res;
03613    }
03614 
03615    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
03616       goto ERR;
03617    }
03618 
03619    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
03620       goto ERR;
03621    }
03622 
03623 ERR:
03624    mp_clear(&tmp);
03625    return res;
03626 }
03627 
03628 
03629 /* multiplies |a| * |b| and does not compute the lower digs digits
03630  * [meant to get the higher part of the product]
03631  */
03632 int
03633 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03634 {
03635   mp_int  t;
03636   int     res, pa, pb, ix, iy;
03637   mp_digit u;
03638   mp_word r;
03639   mp_digit tmpx, *tmpt, *tmpy;
03640 
03641   /* can we use the fast multiplier? */
03642 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
03643   if (((a->used + b->used + 1) < MP_WARRAY)
03644       && MIN (a->used, b->used) <
03645       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03646     return fast_s_mp_mul_high_digs (a, b, c, digs);
03647   }
03648 #endif
03649 
03650   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
03651     return res;
03652   }
03653   t.used = a->used + b->used + 1;
03654 
03655   pa = a->used;
03656   pb = b->used;
03657   for (ix = 0; ix < pa; ix++) {
03658     /* clear the carry */
03659     u = 0;
03660 
03661     /* left hand side of A[ix] * B[iy] */
03662     tmpx = a->dp[ix];
03663 
03664     /* alias to the address of where the digits will be stored */
03665     tmpt = &(t.dp[digs]);
03666 
03667     /* alias for where to read the right hand side from */
03668     tmpy = b->dp + (digs - ix);
03669 
03670     for (iy = digs - ix; iy < pb; iy++) {
03671       /* calculate the double precision result */
03672       r       = ((mp_word)*tmpt) +
03673                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03674                 ((mp_word) u);
03675 
03676       /* get the lower part */
03677       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03678 
03679       /* carry the carry */
03680       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03681     }
03682     *tmpt = u;
03683   }
03684   mp_clamp (&t);
03685   mp_exch (&t, c);
03686   mp_clear (&t);
03687   return MP_OKAY;
03688 }
03689 
03690 
03691 /* this is a modified version of fast_s_mul_digs that only produces
03692  * output digits *above* digs.  See the comments for fast_s_mul_digs
03693  * to see how it works.
03694  *
03695  * This is used in the Barrett reduction since for one of the multiplications
03696  * only the higher digits were needed.  This essentially halves the work.
03697  *
03698  * Based on Algorithm 14.12 on pp.595 of HAC.
03699  */
03700 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03701 {
03702   int     olduse, res, pa, ix, iz;
03703 #ifdef WOLFSSL_SMALL_STACK
03704   mp_digit* W;    /* uses dynamic memory and slower */
03705 #else
03706   mp_digit W[MP_WARRAY];
03707 #endif
03708   mp_word  _W;
03709 
03710   /* grow the destination as required */
03711   pa = a->used + b->used;
03712   if (c->alloc < pa) {
03713     if ((res = mp_grow (c, pa)) != MP_OKAY) {
03714       return res;
03715     }
03716   }
03717 
03718   if (pa > MP_WARRAY)
03719     return MP_RANGE;  /* TAO range check */
03720 
03721 #ifdef WOLFSSL_SMALL_STACK
03722   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
03723   if (W == NULL)
03724     return MP_MEM;
03725 #endif
03726 
03727   /* number of output digits to produce */
03728   pa = a->used + b->used;
03729   _W = 0;
03730   for (ix = digs; ix < pa; ix++) {
03731       int      tx, ty, iy;
03732       mp_digit *tmpx, *tmpy;
03733 
03734       /* get offsets into the two bignums */
03735       ty = MIN(b->used-1, ix);
03736       tx = ix - ty;
03737 
03738       /* setup temp aliases */
03739       tmpx = a->dp + tx;
03740       tmpy = b->dp + ty;
03741 
03742       /* this is the number of times the loop will iterate, essentially its
03743          while (tx++ < a->used && ty-- >= 0) { ... }
03744        */
03745       iy = MIN(a->used-tx, ty+1);
03746 
03747       /* execute loop */
03748       for (iz = 0; iz < iy; iz++) {
03749          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03750       }
03751 
03752       /* store term */
03753       W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
03754 
03755       /* make next carry */
03756       _W = _W >> ((mp_word)DIGIT_BIT);
03757   }
03758 
03759   /* setup dest */
03760   olduse  = c->used;
03761   c->used = pa;
03762 
03763   {
03764     register mp_digit *tmpc;
03765 
03766     tmpc = c->dp + digs;
03767     for (ix = digs; ix < pa; ix++) {   /* TAO, <= could potentially overwrite */
03768       /* now extract the previous digit [below the carry] */
03769       *tmpc++ = W[ix];
03770     }
03771 
03772     /* clear unused digits [that existed in the old copy of c] */
03773     for (; ix < olduse; ix++) {
03774       *tmpc++ = 0;
03775     }
03776   }
03777   mp_clamp (c);
03778 
03779 #ifdef WOLFSSL_SMALL_STACK
03780   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
03781 #endif
03782 
03783   return MP_OKAY;
03784 }
03785 
03786 
03787 /* set a 32-bit const */
03788 int mp_set_int (mp_int * a, unsigned long b)
03789 {
03790   int     x, res;
03791 
03792   mp_zero (a);
03793 
03794   /* set four bits at a time */
03795   for (x = 0; x < 8; x++) {
03796     /* shift the number up four bits */
03797     if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
03798       return res;
03799     }
03800 
03801     /* OR in the top four bits of the source */
03802     a->dp[0] |= (b >> 28) & 15;
03803 
03804     /* shift the source up to the next four bits */
03805     b <<= 4;
03806 
03807     /* ensure that digits are not clamped off */
03808     a->used += 1;
03809   }
03810   mp_clamp (a);
03811   return MP_OKAY;
03812 }
03813 
03814 
03815 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC)
03816 
03817 /* c = a * a (mod b) */
03818 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
03819 {
03820   int     res;
03821   mp_int  t;
03822 
03823   if ((res = mp_init (&t)) != MP_OKAY) {
03824     return res;
03825   }
03826 
03827   if ((res = mp_sqr (a, &t)) != MP_OKAY) {
03828     mp_clear (&t);
03829     return res;
03830   }
03831   res = mp_mod (&t, b, c);
03832   mp_clear (&t);
03833   return res;
03834 }
03835 
03836 #endif
03837 
03838 
03839 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
03840     defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
03841     defined(OPENSSL_EXTRA)
03842 
03843 /* single digit addition */
03844 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
03845 {
03846   int     res, ix, oldused;
03847   mp_digit *tmpa, *tmpc, mu;
03848 
03849   /* grow c as required */
03850   if (c->alloc < a->used + 1) {
03851      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03852         return res;
03853      }
03854   }
03855 
03856   /* if a is negative and |a| >= b, call c = |a| - b */
03857   if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
03858      /* temporarily fix sign of a */
03859      a->sign = MP_ZPOS;
03860 
03861      /* c = |a| - b */
03862      res = mp_sub_d(a, b, c);
03863 
03864      /* fix sign  */
03865      a->sign = c->sign = MP_NEG;
03866 
03867      /* clamp */
03868      mp_clamp(c);
03869 
03870      return res;
03871   }
03872 
03873   /* old number of used digits in c */
03874   oldused = c->used;
03875 
03876   /* sign always positive */
03877   c->sign = MP_ZPOS;
03878 
03879   /* source alias */
03880   tmpa    = a->dp;
03881 
03882   /* destination alias */
03883   tmpc    = c->dp;
03884 
03885   /* if a is positive */
03886   if (a->sign == MP_ZPOS) {
03887      /* add digit, after this we're propagating
03888       * the carry.
03889       */
03890      *tmpc   = *tmpa++ + b;
03891      mu      = *tmpc >> DIGIT_BIT;
03892      *tmpc++ &= MP_MASK;
03893 
03894      /* now handle rest of the digits */
03895      for (ix = 1; ix < a->used; ix++) {
03896         *tmpc   = *tmpa++ + mu;
03897         mu      = *tmpc >> DIGIT_BIT;
03898         *tmpc++ &= MP_MASK;
03899      }
03900      /* set final carry */
03901      if (ix < c->alloc) {
03902         ix++;
03903         *tmpc++  = mu;
03904      }
03905 
03906      /* setup size */
03907      c->used = a->used + 1;
03908   } else {
03909      /* a was negative and |a| < b */
03910      c->used  = 1;
03911 
03912      /* the result is a single digit */
03913      if (a->used == 1) {
03914         *tmpc++  =  b - a->dp[0];
03915      } else {
03916         *tmpc++  =  b;
03917      }
03918 
03919      /* setup count so the clearing of oldused
03920       * can fall through correctly
03921       */
03922      ix       = 1;
03923   }
03924 
03925   /* now zero to oldused */
03926   while (ix++ < oldused) {
03927      *tmpc++ = 0;
03928   }
03929   mp_clamp(c);
03930 
03931   return MP_OKAY;
03932 }
03933 
03934 
03935 /* single digit subtraction */
03936 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
03937 {
03938   mp_digit *tmpa, *tmpc, mu;
03939   int       res, ix, oldused;
03940 
03941   /* grow c as required */
03942   if (c->alloc < a->used + 1) {
03943      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03944         return res;
03945      }
03946   }
03947 
03948   /* if a is negative just do an unsigned
03949    * addition [with fudged signs]
03950    */
03951   if (a->sign == MP_NEG) {
03952      a->sign = MP_ZPOS;
03953      res     = mp_add_d(a, b, c);
03954      a->sign = c->sign = MP_NEG;
03955 
03956      /* clamp */
03957      mp_clamp(c);
03958 
03959      return res;
03960   }
03961 
03962   /* setup regs */
03963   oldused = c->used;
03964   tmpa    = a->dp;
03965   tmpc    = c->dp;
03966 
03967   /* if a <= b simply fix the single digit */
03968   if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
03969      if (a->used == 1) {
03970         *tmpc++ = b - *tmpa;
03971      } else {
03972         *tmpc++ = b;
03973      }
03974      ix      = 1;
03975 
03976      /* negative/1digit */
03977      c->sign = MP_NEG;
03978      c->used = 1;
03979   } else {
03980      /* positive/size */
03981      c->sign = MP_ZPOS;
03982      c->used = a->used;
03983 
03984      /* subtract first digit */
03985      *tmpc    = *tmpa++ - b;
03986      mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03987      *tmpc++ &= MP_MASK;
03988 
03989      /* handle rest of the digits */
03990      for (ix = 1; ix < a->used; ix++) {
03991         *tmpc    = *tmpa++ - mu;
03992         mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03993         *tmpc++ &= MP_MASK;
03994      }
03995   }
03996 
03997   /* zero excess digits */
03998   while (ix++ < oldused) {
03999      *tmpc++ = 0;
04000   }
04001   mp_clamp(c);
04002   return MP_OKAY;
04003 }
04004 
04005 #endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
04006 
04007 
04008 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC)
04009 
04010 static const int lnz[16] = {
04011    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
04012 };
04013 
04014 /* Counts the number of lsbs which are zero before the first zero bit */
04015 int mp_cnt_lsb(mp_int *a)
04016 {
04017     int x;
04018     mp_digit q, qq;
04019 
04020     /* easy out */
04021     if (mp_iszero(a) == 1) {
04022         return 0;
04023     }
04024 
04025     /* scan lower digits until non-zero */
04026     for (x = 0; x < a->used && a->dp[x] == 0; x++);
04027     q = a->dp[x];
04028     x *= DIGIT_BIT;
04029 
04030     /* now scan this digit until a 1 is found */
04031     if ((q & 1) == 0) {
04032         do {
04033             qq  = q & 15;
04034             x  += lnz[qq];
04035             q >>= 4;
04036         } while (qq == 0);
04037     }
04038     return x;
04039 }
04040 
04041 
04042 
04043 
04044 static int s_is_power_of_two(mp_digit b, int *p)
04045 {
04046    int x;
04047 
04048    /* fast return if no power of two */
04049    if ((b==0) || (b & (b-1))) {
04050       return 0;
04051    }
04052 
04053    for (x = 0; x < DIGIT_BIT; x++) {
04054       if (b == (((mp_digit)1)<<x)) {
04055          *p = x;
04056          return 1;
04057       }
04058    }
04059    return 0;
04060 }
04061 
04062 /* single digit division (based on routine from MPI) */
04063 static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
04064 {
04065   mp_int  q;
04066   mp_word w;
04067   mp_digit t;
04068   int     res = MP_OKAY, ix;
04069 
04070   /* cannot divide by zero */
04071   if (b == 0) {
04072      return MP_VAL;
04073   }
04074 
04075   /* quick outs */
04076   if (b == 1 || mp_iszero(a) == 1) {
04077      if (d != NULL) {
04078         *d = 0;
04079      }
04080      if (c != NULL) {
04081         return mp_copy(a, c);
04082      }
04083      return MP_OKAY;
04084   }
04085 
04086   /* power of two ? */
04087   if (s_is_power_of_two(b, &ix) == 1) {
04088      if (d != NULL) {
04089         *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
04090      }
04091      if (c != NULL) {
04092         return mp_div_2d(a, ix, c, NULL);
04093      }
04094      return MP_OKAY;
04095   }
04096 
04097 #ifdef BN_MP_DIV_3_C
04098   /* three? */
04099   if (b == 3) {
04100      return mp_div_3(a, c, d);
04101   }
04102 #endif
04103 
04104   /* no easy answer [c'est la vie].  Just division */
04105   if (c != NULL) {
04106       if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
04107         return res;
04108       }
04109 
04110       q.used = a->used;
04111       q.sign = a->sign;
04112   }
04113 
04114   w = 0;
04115   for (ix = a->used - 1; ix >= 0; ix--) {
04116      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
04117 
04118      if (w >= b) {
04119         t = (mp_digit)(w / b);
04120         w -= ((mp_word)t) * ((mp_word)b);
04121       } else {
04122         t = 0;
04123       }
04124       if (c != NULL)
04125         q.dp[ix] = (mp_digit)t;
04126   }
04127 
04128   if (d != NULL) {
04129      *d = (mp_digit)w;
04130   }
04131 
04132   if (c != NULL) {
04133      mp_clamp(&q);
04134      mp_exch(&q, c);
04135      mp_clear(&q);
04136   }
04137 
04138   return res;
04139 }
04140 
04141 
04142 int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
04143 {
04144   return mp_div_d(a, b, NULL, c);
04145 }
04146 
04147 #endif /* defined(WOLFSSL_KEY_GEN)||defined(HAVE_COMP_KEY)||defined(HAVE_ECC) */
04148 
04149 #ifdef WOLFSSL_KEY_GEN
04150 
04151 const mp_digit ltm_prime_tab[] = {
04152   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
04153   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
04154   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
04155   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
04156 #ifndef MP_8BIT
04157   0x0083,
04158   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
04159   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
04160   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
04161   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
04162 
04163   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
04164   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
04165   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
04166   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
04167   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
04168   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
04169   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
04170   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
04171 
04172   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
04173   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
04174   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
04175   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
04176   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
04177   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
04178   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
04179   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
04180 
04181   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
04182   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
04183   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
04184   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
04185   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
04186   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
04187   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
04188   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
04189 #endif
04190 };
04191 
04192 
04193 /* Miller-Rabin test of "a" to the base of "b" as described in
04194  * HAC pp. 139 Algorithm 4.24
04195  *
04196  * Sets result to 0 if definitely composite or 1 if probably prime.
04197  * Randomly the chance of error is no more than 1/4 and often
04198  * very much lower.
04199  */
04200 static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
04201 {
04202   mp_int  n1, y, r;
04203   int     s, j, err;
04204 
04205   /* default */
04206   *result = MP_NO;
04207 
04208   /* ensure b > 1 */
04209   if (mp_cmp_d(b, 1) != MP_GT) {
04210      return MP_VAL;
04211   }
04212 
04213   /* get n1 = a - 1 */
04214   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
04215     return err;
04216   }
04217   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
04218     goto LBL_N1;
04219   }
04220 
04221   /* set 2**s * r = n1 */
04222   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
04223     goto LBL_N1;
04224   }
04225 
04226   /* count the number of least significant bits
04227    * which are zero
04228    */
04229   s = mp_cnt_lsb(&r);
04230 
04231   /* now divide n - 1 by 2**s */
04232   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
04233     goto LBL_R;
04234   }
04235 
04236   /* compute y = b**r mod a */
04237   if ((err = mp_init (&y)) != MP_OKAY) {
04238     goto LBL_R;
04239   }
04240   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
04241     goto LBL_Y;
04242   }
04243 
04244   /* if y != 1 and y != n1 do */
04245   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
04246     j = 1;
04247     /* while j <= s-1 and y != n1 */
04248     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
04249       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
04250          goto LBL_Y;
04251       }
04252 
04253       /* if y == 1 then composite */
04254       if (mp_cmp_d (&y, 1) == MP_EQ) {
04255          goto LBL_Y;
04256       }
04257 
04258       ++j;
04259     }
04260 
04261     /* if y != n1 then composite */
04262     if (mp_cmp (&y, &n1) != MP_EQ) {
04263       goto LBL_Y;
04264     }
04265   }
04266 
04267   /* probably prime now */
04268   *result = MP_YES;
04269 LBL_Y:mp_clear (&y);
04270 LBL_R:mp_clear (&r);
04271 LBL_N1:mp_clear (&n1);
04272   return err;
04273 }
04274 
04275 
04276 /* determines if an integers is divisible by one
04277  * of the first PRIME_SIZE primes or not
04278  *
04279  * sets result to 0 if not, 1 if yes
04280  */
04281 static int mp_prime_is_divisible (mp_int * a, int *result)
04282 {
04283   int     err, ix;
04284   mp_digit res;
04285 
04286   /* default to not */
04287   *result = MP_NO;
04288 
04289   for (ix = 0; ix < PRIME_SIZE; ix++) {
04290     /* what is a mod LBL_prime_tab[ix] */
04291     if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
04292       return err;
04293     }
04294 
04295     /* is the residue zero? */
04296     if (res == 0) {
04297       *result = MP_YES;
04298       return MP_OKAY;
04299     }
04300   }
04301 
04302   return MP_OKAY;
04303 }
04304 
04305 static const int USE_BBS = 1;
04306 
04307 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
04308 {
04309     int   err, res, type;
04310     byte* buf;
04311 
04312     if (N == NULL || rng == NULL)
04313         return MP_VAL;
04314 
04315     /* get type */
04316     if (len < 0) {
04317         type = USE_BBS;
04318         len = -len;
04319     } else {
04320         type = 0;
04321     }
04322 
04323     /* allow sizes between 2 and 512 bytes for a prime size */
04324     if (len < 2 || len > 512) {
04325         return MP_VAL;
04326     }
04327 
04328     /* allocate buffer to work with */
04329     buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
04330     if (buf == NULL) {
04331         return MP_MEM;
04332     }
04333     XMEMSET(buf, 0, len);
04334 
04335     do {
04336 #ifdef SHOW_GEN
04337         printf(".");
04338         fflush(stdout);
04339 #endif
04340         /* generate value */
04341         err = wc_RNG_GenerateBlock(rng, buf, len);
04342         if (err != 0) {
04343             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04344             return err;
04345         }
04346 
04347         /* munge bits */
04348         buf[0]     |= 0x80 | 0x40;
04349         buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
04350 
04351         /* load value */
04352         if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
04353             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04354             return err;
04355         }
04356 
04357         /* test */
04358         if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
04359             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04360             return err;
04361         }
04362     } while (res == MP_NO);
04363 
04364     XMEMSET(buf, 0, len);
04365     XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04366     
04367     return MP_OKAY;
04368 }
04369 
04370 /*
04371  * Sets result to 1 if probably prime, 0 otherwise
04372  */
04373 int mp_prime_is_prime (mp_int * a, int t, int *result)
04374 {
04375   mp_int  b;
04376   int     ix, err, res;
04377 
04378   /* default to no */
04379   *result = MP_NO;
04380 
04381   /* valid value of t? */
04382   if (t <= 0 || t > PRIME_SIZE) {
04383     return MP_VAL;
04384   }
04385 
04386   /* is the input equal to one of the primes in the table? */
04387   for (ix = 0; ix < PRIME_SIZE; ix++) {
04388       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
04389          *result = 1;
04390          return MP_OKAY;
04391       }
04392   }
04393 
04394   /* first perform trial division */
04395   if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
04396     return err;
04397   }
04398 
04399   /* return if it was trivially divisible */
04400   if (res == MP_YES) {
04401     return MP_OKAY;
04402   }
04403 
04404   /* now perform the miller-rabin rounds */
04405   if ((err = mp_init (&b)) != MP_OKAY) {
04406     return err;
04407   }
04408 
04409   for (ix = 0; ix < t; ix++) {
04410     /* set the prime */
04411     mp_set (&b, ltm_prime_tab[ix]);
04412 
04413     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
04414       goto LBL_B;
04415     }
04416 
04417     if (res == MP_NO) {
04418       goto LBL_B;
04419     }
04420   }
04421 
04422   /* passed the test */
04423   *result = MP_YES;
04424 LBL_B:mp_clear (&b);
04425   return err;
04426 }
04427 
04428 
04429 /* computes least common multiple as |a*b|/(a, b) */
04430 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
04431 {
04432   int     res;
04433   mp_int  t1, t2;
04434 
04435 
04436   if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
04437     return res;
04438   }
04439 
04440   /* t1 = get the GCD of the two inputs */
04441   if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
04442     goto LBL_T;
04443   }
04444 
04445   /* divide the smallest by the GCD */
04446   if (mp_cmp_mag(a, b) == MP_LT) {
04447      /* store quotient in t2 such that t2 * b is the LCM */
04448      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
04449         goto LBL_T;
04450      }
04451      res = mp_mul(b, &t2, c);
04452   } else {
04453      /* store quotient in t2 such that t2 * a is the LCM */
04454      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
04455         goto LBL_T;
04456      }
04457      res = mp_mul(a, &t2, c);
04458   }
04459 
04460   /* fix the sign to positive */
04461   c->sign = MP_ZPOS;
04462 
04463 LBL_T:
04464   mp_clear(&t1);
04465   mp_clear(&t2);
04466   return res;
04467 }
04468 
04469 
04470 
04471 /* Greatest Common Divisor using the binary method */
04472 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
04473 {
04474     mp_int  u, v;
04475     int     k, u_lsb, v_lsb, res;
04476 
04477     /* either zero than gcd is the largest */
04478     if (mp_iszero (a) == MP_YES) {
04479         return mp_abs (b, c);
04480     }
04481     if (mp_iszero (b) == MP_YES) {
04482         return mp_abs (a, c);
04483     }
04484 
04485     /* get copies of a and b we can modify */
04486     if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
04487         return res;
04488     }
04489 
04490     if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
04491         goto LBL_U;
04492     }
04493 
04494     /* must be positive for the remainder of the algorithm */
04495     u.sign = v.sign = MP_ZPOS;
04496 
04497     /* B1.  Find the common power of two for u and v */
04498     u_lsb = mp_cnt_lsb(&u);
04499     v_lsb = mp_cnt_lsb(&v);
04500     k     = MIN(u_lsb, v_lsb);
04501 
04502     if (k > 0) {
04503         /* divide the power of two out */
04504         if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
04505             goto LBL_V;
04506         }
04507 
04508         if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
04509             goto LBL_V;
04510         }
04511     }
04512 
04513     /* divide any remaining factors of two out */
04514     if (u_lsb != k) {
04515         if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
04516             goto LBL_V;
04517         }
04518     }
04519 
04520     if (v_lsb != k) {
04521         if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
04522             goto LBL_V;
04523         }
04524     }
04525 
04526     while (mp_iszero(&v) == 0) {
04527         /* make sure v is the largest */
04528         if (mp_cmp_mag(&u, &v) == MP_GT) {
04529             /* swap u and v to make sure v is >= u */
04530             mp_exch(&u, &v);
04531         }
04532 
04533         /* subtract smallest from largest */
04534         if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
04535             goto LBL_V;
04536         }
04537 
04538         /* Divide out all factors of two */
04539         if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
04540             goto LBL_V;
04541         }
04542     }
04543 
04544     /* multiply by 2**k which we divided out at the beginning */
04545     if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
04546         goto LBL_V;
04547     }
04548     c->sign = MP_ZPOS;
04549     res = MP_OKAY;
04550 LBL_V:mp_clear (&u);
04551 LBL_U:mp_clear (&v);
04552     return res;
04553 }
04554 
04555 #endif /* WOLFSSL_KEY_GEN */
04556 
04557 
04558 #if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
04559 
04560 /* chars used in radix conversions */
04561 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
04562                          abcdefghijklmnopqrstuvwxyz+/";
04563 #endif
04564 
04565 #ifdef HAVE_ECC
04566 /* read a string [ASCII] in a given radix */
04567 int mp_read_radix (mp_int * a, const char *str, int radix)
04568 {
04569   int     y, res, neg;
04570   char    ch;
04571 
04572   /* zero the digit bignum */
04573   mp_zero(a);
04574 
04575   /* make sure the radix is ok */
04576   if (radix < 2 || radix > 64) {
04577     return MP_VAL;
04578   }
04579 
04580   /* if the leading digit is a
04581    * minus set the sign to negative.
04582    */
04583   if (*str == '-') {
04584     ++str;
04585     neg = MP_NEG;
04586   } else {
04587     neg = MP_ZPOS;
04588   }
04589 
04590   /* set the integer to the default of zero */
04591   mp_zero (a);
04592 
04593   /* process each digit of the string */
04594   while (*str) {
04595     /* if the radix < 36 the conversion is case insensitive
04596      * this allows numbers like 1AB and 1ab to represent the same  value
04597      * [e.g. in hex]
04598      */
04599     ch = (char) ((radix < 36) ? XTOUPPER((unsigned char)*str) : *str);
04600     for (y = 0; y < 64; y++) {
04601       if (ch == mp_s_rmap[y]) {
04602          break;
04603       }
04604     }
04605 
04606     /* if the char was found in the map
04607      * and is less than the given radix add it
04608      * to the number, otherwise exit the loop.
04609      */
04610     if (y < radix) {
04611       if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
04612          return res;
04613       }
04614       if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
04615          return res;
04616       }
04617     } else {
04618       break;
04619     }
04620     ++str;
04621   }
04622 
04623   /* set the sign only if a != 0 */
04624   if (mp_iszero(a) != 1) {
04625      a->sign = neg;
04626   }
04627   return MP_OKAY;
04628 }
04629 #endif /* HAVE_ECC */
04630 
04631 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
04632 
04633 /* returns size of ASCII representation */
04634 int mp_radix_size (mp_int *a, int radix, int *size)
04635 {
04636     int     res, digs;
04637     mp_int  t;
04638     mp_digit d;
04639 
04640     *size = 0;
04641 
04642     /* special case for binary */
04643     if (radix == 2) {
04644         *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
04645         return MP_OKAY;
04646     }
04647 
04648     /* make sure the radix is in range */
04649     if (radix < 2 || radix > 64) {
04650         return MP_VAL;
04651     }
04652 
04653     if (mp_iszero(a) == MP_YES) {
04654         *size = 2;
04655         return MP_OKAY;
04656     }
04657 
04658     /* digs is the digit count */
04659     digs = 0;
04660 
04661     /* if it's negative add one for the sign */
04662     if (a->sign == MP_NEG) {
04663         ++digs;
04664     }
04665 
04666     /* init a copy of the input */
04667     if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
04668         return res;
04669     }
04670 
04671     /* force temp to positive */
04672     t.sign = MP_ZPOS;
04673 
04674     /* fetch out all of the digits */
04675     while (mp_iszero (&t) == MP_NO) {
04676         if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
04677             mp_clear (&t);
04678             return res;
04679         }
04680         ++digs;
04681     }
04682     mp_clear (&t);
04683 
04684     /* return digs + 1, the 1 is for the NULL byte that would be required. */
04685     *size = digs + 1;
04686     return MP_OKAY;
04687 }
04688 
04689 /* stores a bignum as a ASCII string in a given radix (2..64) */
04690 int mp_toradix (mp_int *a, char *str, int radix)
04691 {
04692     int     res, digs;
04693     mp_int  t;
04694     mp_digit d;
04695     char   *_s = str;
04696 
04697     /* check range of the radix */
04698     if (radix < 2 || radix > 64) {
04699         return MP_VAL;
04700     }
04701 
04702     /* quick out if its zero */
04703     if (mp_iszero(a) == 1) {
04704         *str++ = '0';
04705         *str = '\0';
04706         return MP_OKAY;
04707     }
04708 
04709     if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
04710         return res;
04711     }
04712 
04713     /* if it is negative output a - */
04714     if (t.sign == MP_NEG) {
04715         ++_s;
04716         *str++ = '-';
04717         t.sign = MP_ZPOS;
04718     }
04719 
04720     digs = 0;
04721     while (mp_iszero (&t) == 0) {
04722         if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
04723             mp_clear (&t);
04724             return res;
04725         }
04726         *str++ = mp_s_rmap[d];
04727         ++digs;
04728     }
04729 
04730     /* reverse the digits of the string.  In this case _s points
04731      * to the first digit [excluding the sign] of the number]
04732      */
04733     bn_reverse ((unsigned char *)_s, digs);
04734 
04735     /* append a NULL so the string is properly terminated */
04736     *str = '\0';
04737 
04738     mp_clear (&t);
04739     return MP_OKAY;
04740 }
04741 
04742 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
04743 
04744 #endif /* USE_FAST_MATH */
04745 
04746 #endif /* NO_BIG_INT */
04747 
04748