This is a port of cyaSSL 2.7.0.

Dependents:   CyaSSL_DTLS_Cellular CyaSSL_DTLS_Ethernet

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers integer.c Source File

integer.c

00001 /* integer.c
00002  *
00003  * Copyright (C) 2006-2013 wolfSSL Inc.
00004  *
00005  * This file is part of CyaSSL.
00006  *
00007  * CyaSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * CyaSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
00020  */
00021 
00022 
00023 /*
00024  * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
00025  * http://math.libtomcrypt.com
00026  */
00027 
00028 
00029 #ifdef HAVE_CONFIG_H
00030     #include <config.h>
00031 #endif
00032 
00033 /* in case user set USE_FAST_MATH there */
00034 #include <cyassl/ctaocrypt/settings.h>
00035 
00036 #ifndef NO_BIG_INT 
00037 
00038 #ifndef USE_FAST_MATH
00039 
00040 #include <cyassl/ctaocrypt/integer.h>
00041 
00042 #ifndef NO_CYASSL_SMALL_STACK
00043     #ifndef CYASSL_SMALL_STACK
00044         #define CYASSL_SMALL_STACK
00045     #endif
00046 #endif
00047 
00048 static void bn_reverse (unsigned char *s, int len);
00049 
00050 /* math settings check */
00051 word32 CheckRunTimeSettings(void)
00052 {
00053     return CTC_SETTINGS;
00054 }
00055 
00056 
00057 /* handle up to 6 inits */
00058 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
00059                   mp_int* f)
00060 {
00061     int res = MP_OKAY;
00062 
00063     if (a && ((res = mp_init(a)) != MP_OKAY))
00064         return res;
00065 
00066     if (b && ((res = mp_init(b)) != MP_OKAY)) {
00067         mp_clear(a);
00068         return res;
00069     }
00070 
00071     if (c && ((res = mp_init(c)) != MP_OKAY)) {
00072         mp_clear(a); mp_clear(b);
00073         return res;
00074     }
00075 
00076     if (d && ((res = mp_init(d)) != MP_OKAY)) {
00077         mp_clear(a); mp_clear(b); mp_clear(c);
00078         return res;
00079     }
00080 
00081     if (e && ((res = mp_init(e)) != MP_OKAY)) {
00082         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
00083         return res;
00084     }
00085 
00086     if (f && ((res = mp_init(f)) != MP_OKAY)) {
00087         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
00088         return res;
00089     }
00090 
00091     return res;
00092 }
00093 
00094 
00095 /* init a new mp_int */
00096 int mp_init (mp_int * a)
00097 {
00098   int i;
00099 
00100   /* allocate memory required and clear it */
00101   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
00102                                       DYNAMIC_TYPE_BIGINT);
00103   if (a->dp == NULL) {
00104     return MP_MEM;
00105   }
00106 
00107   /* set the digits to zero */
00108   for (i = 0; i < MP_PREC; i++) {
00109       a->dp[i] = 0;
00110   }
00111 
00112   /* set the used to zero, allocated digits to the default precision
00113    * and sign to positive */
00114   a->used  = 0;
00115   a->alloc = MP_PREC;
00116   a->sign  = MP_ZPOS;
00117 
00118   return MP_OKAY;
00119 }
00120 
00121 
00122 /* clear one (frees)  */
00123 void
00124 mp_clear (mp_int * a)
00125 {
00126   int i;
00127 
00128   if (a == NULL)
00129       return;
00130 
00131   /* only do anything if a hasn't been freed previously */
00132   if (a->dp != NULL) {
00133     /* first zero the digits */
00134     for (i = 0; i < a->used; i++) {
00135         a->dp[i] = 0;
00136     }
00137 
00138     /* free ram */
00139     XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
00140 
00141     /* reset members to make debugging easier */
00142     a->dp    = NULL;
00143     a->alloc = a->used = 0;
00144     a->sign  = MP_ZPOS;
00145   }
00146 }
00147 
00148 
00149 /* get the size for an unsigned equivalent */
00150 int mp_unsigned_bin_size (mp_int * a)
00151 {
00152   int     size = mp_count_bits (a);
00153   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
00154 }
00155 
00156 
00157 /* returns the number of bits in an int */
00158 int
00159 mp_count_bits (mp_int * a)
00160 {
00161   int     r;
00162   mp_digit q;
00163 
00164   /* shortcut */
00165   if (a->used == 0) {
00166     return 0;
00167   }
00168 
00169   /* get number of digits and add that */
00170   r = (a->used - 1) * DIGIT_BIT;
00171   
00172   /* take the last digit and count the bits in it */
00173   q = a->dp[a->used - 1];
00174   while (q > ((mp_digit) 0)) {
00175     ++r;
00176     q >>= ((mp_digit) 1);
00177   }
00178   return r;
00179 }
00180 
00181 
00182 /* store in unsigned [big endian] format */
00183 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
00184 {
00185   int     x, res;
00186   mp_int  t;
00187 
00188   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
00189     return res;
00190   }
00191 
00192   x = 0;
00193   while (mp_iszero (&t) == 0) {
00194 #ifndef MP_8BIT
00195       b[x++] = (unsigned char) (t.dp[0] & 255);
00196 #else
00197       b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
00198 #endif
00199     if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
00200       mp_clear (&t);
00201       return res;
00202     }
00203   }
00204   bn_reverse (b, x);
00205   mp_clear (&t);
00206   return MP_OKAY;
00207 }
00208 
00209 
00210 /* creates "a" then copies b into it */
00211 int mp_init_copy (mp_int * a, mp_int * b)
00212 {
00213   int     res;
00214 
00215   if ((res = mp_init (a)) != MP_OKAY) {
00216     return res;
00217   }
00218   return mp_copy (b, a);
00219 }
00220 
00221 
00222 /* copy, b = a */
00223 int
00224 mp_copy (mp_int * a, mp_int * b)
00225 {
00226   int     res, n;
00227 
00228   /* if dst == src do nothing */
00229   if (a == b) {
00230     return MP_OKAY;
00231   }
00232 
00233   /* grow dest */
00234   if (b->alloc < a->used) {
00235      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
00236         return res;
00237      }
00238   }
00239 
00240   /* zero b and copy the parameters over */
00241   {
00242     register mp_digit *tmpa, *tmpb;
00243 
00244     /* pointer aliases */
00245 
00246     /* source */
00247     tmpa = a->dp;
00248 
00249     /* destination */
00250     tmpb = b->dp;
00251 
00252     /* copy all the digits */
00253     for (n = 0; n < a->used; n++) {
00254       *tmpb++ = *tmpa++;
00255     }
00256 
00257     /* clear high digits */
00258     for (; n < b->used; n++) {
00259       *tmpb++ = 0;
00260     }
00261   }
00262 
00263   /* copy used count and sign */
00264   b->used = a->used;
00265   b->sign = a->sign;
00266   return MP_OKAY;
00267 }
00268 
00269 
00270 /* grow as required */
00271 int mp_grow (mp_int * a, int size)
00272 {
00273   int     i;
00274   mp_digit *tmp;
00275 
00276   /* if the alloc size is smaller alloc more ram */
00277   if (a->alloc < size) {
00278     /* ensure there are always at least MP_PREC digits extra on top */
00279     size += (MP_PREC * 2) - (size % MP_PREC);
00280 
00281     /* reallocate the array a->dp
00282      *
00283      * We store the return in a temporary variable
00284      * in case the operation failed we don't want
00285      * to overwrite the dp member of a.
00286      */
00287     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
00288                                        DYNAMIC_TYPE_BIGINT);
00289     if (tmp == NULL) {
00290       /* reallocation failed but "a" is still valid [can be freed] */
00291       return MP_MEM;
00292     }
00293 
00294     /* reallocation succeeded so set a->dp */
00295     a->dp = tmp;
00296 
00297     /* zero excess digits */
00298     i        = a->alloc;
00299     a->alloc = size;
00300     for (; i < a->alloc; i++) {
00301       a->dp[i] = 0;
00302     }
00303   }
00304   return MP_OKAY;
00305 }
00306 
00307 
00308 /* reverse an array, used for radix code */
00309 void
00310 bn_reverse (unsigned char *s, int len)
00311 {
00312   int     ix, iy;
00313   unsigned char t;
00314 
00315   ix = 0;
00316   iy = len - 1;
00317   while (ix < iy) {
00318     t     = s[ix];
00319     s[ix] = s[iy];
00320     s[iy] = t;
00321     ++ix;
00322     --iy;
00323   }
00324 }
00325 
00326 
00327 /* shift right by a certain bit count (store quotient in c, optional
00328    remainder in d) */
00329 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
00330 {
00331   mp_digit D, r, rr;
00332   int     x, res;
00333   mp_int  t;
00334 
00335 
00336   /* if the shift count is <= 0 then we do no work */
00337   if (b <= 0) {
00338     res = mp_copy (a, c);
00339     if (d != NULL) {
00340       mp_zero (d);
00341     }
00342     return res;
00343   }
00344 
00345   if ((res = mp_init (&t)) != MP_OKAY) {
00346     return res;
00347   }
00348 
00349   /* get the remainder */
00350   if (d != NULL) {
00351     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
00352       mp_clear (&t);
00353       return res;
00354     }
00355   }
00356 
00357   /* copy */
00358   if ((res = mp_copy (a, c)) != MP_OKAY) {
00359     mp_clear (&t);
00360     return res;
00361   }
00362 
00363   /* shift by as many digits in the bit count */
00364   if (b >= (int)DIGIT_BIT) {
00365     mp_rshd (c, b / DIGIT_BIT);
00366   }
00367 
00368   /* shift any bit count < DIGIT_BIT */
00369   D = (mp_digit) (b % DIGIT_BIT);
00370   if (D != 0) {
00371     register mp_digit *tmpc, mask, shift;
00372 
00373     /* mask */
00374     mask = (((mp_digit)1) << D) - 1;
00375 
00376     /* shift for lsb */
00377     shift = DIGIT_BIT - D;
00378 
00379     /* alias */
00380     tmpc = c->dp + (c->used - 1);
00381 
00382     /* carry */
00383     r = 0;
00384     for (x = c->used - 1; x >= 0; x--) {
00385       /* get the lower  bits of this word in a temp */
00386       rr = *tmpc & mask;
00387 
00388       /* shift the current word and mix in the carry bits from the previous
00389          word */
00390       *tmpc = (*tmpc >> D) | (r << shift);
00391       --tmpc;
00392 
00393       /* set the carry to the carry bits of the current word found above */
00394       r = rr;
00395     }
00396   }
00397   mp_clamp (c);
00398   if (d != NULL) {
00399     mp_exch (&t, d);
00400   }
00401   mp_clear (&t);
00402   return MP_OKAY;
00403 }
00404 
00405 
00406 /* set to zero */
00407 void mp_zero (mp_int * a)
00408 {
00409   int       n;
00410   mp_digit *tmp;
00411 
00412   a->sign = MP_ZPOS;
00413   a->used = 0;
00414 
00415   tmp = a->dp;
00416   for (n = 0; n < a->alloc; n++) {
00417      *tmp++ = 0;
00418   }
00419 }
00420 
00421 
00422 /* trim unused digits 
00423  *
00424  * This is used to ensure that leading zero digits are
00425  * trimed and the leading "used" digit will be non-zero
00426  * Typically very fast.  Also fixes the sign if there
00427  * are no more leading digits
00428  */
00429 void
00430 mp_clamp (mp_int * a)
00431 {
00432   /* decrease used while the most significant digit is
00433    * zero.
00434    */
00435   while (a->used > 0 && a->dp[a->used - 1] == 0) {
00436     --(a->used);
00437   }
00438 
00439   /* reset the sign flag if used == 0 */
00440   if (a->used == 0) {
00441     a->sign = MP_ZPOS;
00442   }
00443 }
00444 
00445 
00446 /* swap the elements of two integers, for cases where you can't simply swap the 
00447  * mp_int pointers around
00448  */
00449 void
00450 mp_exch (mp_int * a, mp_int * b)
00451 {
00452   mp_int  t;
00453 
00454   t  = *a;
00455   *a = *b;
00456   *b = t;
00457 }
00458 
00459 
00460 /* shift right a certain amount of digits */
00461 void mp_rshd (mp_int * a, int b)
00462 {
00463   int     x;
00464 
00465   /* if b <= 0 then ignore it */
00466   if (b <= 0) {
00467     return;
00468   }
00469 
00470   /* if b > used then simply zero it and return */
00471   if (a->used <= b) {
00472     mp_zero (a);
00473     return;
00474   }
00475 
00476   {
00477     register mp_digit *bottom, *top;
00478 
00479     /* shift the digits down */
00480 
00481     /* bottom */
00482     bottom = a->dp;
00483 
00484     /* top [offset into digits] */
00485     top = a->dp + b;
00486 
00487     /* this is implemented as a sliding window where 
00488      * the window is b-digits long and digits from 
00489      * the top of the window are copied to the bottom
00490      *
00491      * e.g.
00492 
00493      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
00494                  /\                   |      ---->
00495                   \-------------------/      ---->
00496      */
00497     for (x = 0; x < (a->used - b); x++) {
00498       *bottom++ = *top++;
00499     }
00500 
00501     /* zero the top digits */
00502     for (; x < a->used; x++) {
00503       *bottom++ = 0;
00504     }
00505   }
00506   
00507   /* remove excess digits */
00508   a->used -= b;
00509 }
00510 
00511 
00512 /* calc a value mod 2**b */
00513 int
00514 mp_mod_2d (mp_int * a, int b, mp_int * c)
00515 {
00516   int     x, res;
00517 
00518   /* if b is <= 0 then zero the int */
00519   if (b <= 0) {
00520     mp_zero (c);
00521     return MP_OKAY;
00522   }
00523 
00524   /* if the modulus is larger than the value than return */
00525   if (b >= (int) (a->used * DIGIT_BIT)) {
00526     res = mp_copy (a, c);
00527     return res;
00528   }
00529 
00530   /* copy */
00531   if ((res = mp_copy (a, c)) != MP_OKAY) {
00532     return res;
00533   }
00534 
00535   /* zero digits above the last digit of the modulus */
00536   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00537     c->dp[x] = 0;
00538   }
00539   /* clear the digit that is not completely outside/inside the modulus */
00540   c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
00541               (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
00542   mp_clamp (c);
00543   return MP_OKAY;
00544 }
00545 
00546 
00547 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
00548 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
00549 {
00550   int     res;
00551 
00552   /* make sure there are at least two digits */
00553   if (a->alloc < 2) {
00554      if ((res = mp_grow(a, 2)) != MP_OKAY) {
00555         return res;
00556      }
00557   }
00558 
00559   /* zero the int */
00560   mp_zero (a);
00561 
00562   /* read the bytes in */
00563   while (c-- > 0) {
00564     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
00565       return res;
00566     }
00567 
00568 #ifndef MP_8BIT
00569       a->dp[0] |= *b++;
00570       a->used += 1;
00571 #else
00572       a->dp[0] = (*b & MP_MASK);
00573       a->dp[1] |= ((*b++ >> 7U) & 1);
00574       a->used += 2;
00575 #endif
00576   }
00577   mp_clamp (a);
00578   return MP_OKAY;
00579 }
00580 
00581 
00582 /* shift left by a certain bit count */
00583 int mp_mul_2d (mp_int * a, int b, mp_int * c)
00584 {
00585   mp_digit d;
00586   int      res;
00587 
00588   /* copy */
00589   if (a != c) {
00590      if ((res = mp_copy (a, c)) != MP_OKAY) {
00591        return res;
00592      }
00593   }
00594 
00595   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
00596      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
00597        return res;
00598      }
00599   }
00600 
00601   /* shift by as many digits in the bit count */
00602   if (b >= (int)DIGIT_BIT) {
00603     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
00604       return res;
00605     }
00606   }
00607 
00608   /* shift any bit count < DIGIT_BIT */
00609   d = (mp_digit) (b % DIGIT_BIT);
00610   if (d != 0) {
00611     register mp_digit *tmpc, shift, mask, r, rr;
00612     register int x;
00613 
00614     /* bitmask for carries */
00615     mask = (((mp_digit)1) << d) - 1;
00616 
00617     /* shift for msbs */
00618     shift = DIGIT_BIT - d;
00619 
00620     /* alias */
00621     tmpc = c->dp;
00622 
00623     /* carry */
00624     r    = 0;
00625     for (x = 0; x < c->used; x++) {
00626       /* get the higher bits of the current word */
00627       rr = (*tmpc >> shift) & mask;
00628 
00629       /* shift the current word and OR in the carry */
00630       *tmpc = ((*tmpc << d) | r) & MP_MASK;
00631       ++tmpc;
00632 
00633       /* set the carry to the carry bits of the current word */
00634       r = rr;
00635     }
00636     
00637     /* set final carry */
00638     if (r != 0) {
00639        c->dp[(c->used)++] = r;
00640     }
00641   }
00642   mp_clamp (c);
00643   return MP_OKAY;
00644 }
00645 
00646 
00647 /* shift left a certain amount of digits */
00648 int mp_lshd (mp_int * a, int b)
00649 {
00650   int     x, res;
00651 
00652   /* if its less than zero return */
00653   if (b <= 0) {
00654     return MP_OKAY;
00655   }
00656 
00657   /* grow to fit the new digits */
00658   if (a->alloc < a->used + b) {
00659      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
00660        return res;
00661      }
00662   }
00663 
00664   {
00665     register mp_digit *top, *bottom;
00666 
00667     /* increment the used by the shift amount then copy upwards */
00668     a->used += b;
00669 
00670     /* top */
00671     top = a->dp + a->used - 1;
00672 
00673     /* base */
00674     bottom = a->dp + a->used - 1 - b;
00675 
00676     /* much like mp_rshd this is implemented using a sliding window
00677      * except the window goes the otherway around.  Copying from
00678      * the bottom to the top.  see bn_mp_rshd.c for more info.
00679      */
00680     for (x = a->used - 1; x >= b; x--) {
00681       *top-- = *bottom--;
00682     }
00683 
00684     /* zero the lower digits */
00685     top = a->dp;
00686     for (x = 0; x < b; x++) {
00687       *top++ = 0;
00688     }
00689   }
00690   return MP_OKAY;
00691 }
00692 
00693 
00694 /* this is a shell function that calls either the normal or Montgomery
00695  * exptmod functions.  Originally the call to the montgomery code was
00696  * embedded in the normal function but that wasted alot of stack space
00697  * for nothing (since 99% of the time the Montgomery code would be called)
00698  */
00699 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00700 {
00701   int dr;
00702 
00703   /* modulus P must be positive */
00704   if (P->sign == MP_NEG) {
00705      return MP_VAL;
00706   }
00707 
00708   /* if exponent X is negative we have to recurse */
00709   if (X->sign == MP_NEG) {
00710 #ifdef BN_MP_INVMOD_C
00711      mp_int tmpG, tmpX;
00712      int err;
00713 
00714      /* first compute 1/G mod P */
00715      if ((err = mp_init(&tmpG)) != MP_OKAY) {
00716         return err;
00717      }
00718      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
00719         mp_clear(&tmpG);
00720         return err;
00721      }
00722 
00723      /* now get |X| */
00724      if ((err = mp_init(&tmpX)) != MP_OKAY) {
00725         mp_clear(&tmpG);
00726         return err;
00727      }
00728      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
00729         mp_clear(&tmpG);
00730         mp_clear(&tmpX);
00731         return err;
00732      }
00733 
00734      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
00735      err = mp_exptmod(&tmpG, &tmpX, P, Y);
00736      mp_clear(&tmpG);
00737      mp_clear(&tmpX);
00738      return err;
00739 #else 
00740      /* no invmod */
00741      return MP_VAL;
00742 #endif
00743   }
00744 
00745 /* modified diminished radix reduction */
00746 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
00747   defined(BN_S_MP_EXPTMOD_C)
00748   if (mp_reduce_is_2k_l(P) == MP_YES) {
00749      return s_mp_exptmod(G, X, P, Y, 1);
00750   }
00751 #endif
00752 
00753 #ifdef BN_MP_DR_IS_MODULUS_C
00754   /* is it a DR modulus? */
00755   dr = mp_dr_is_modulus(P);
00756 #else
00757   /* default to no */
00758   dr = 0;
00759 #endif
00760 
00761 #ifdef BN_MP_REDUCE_IS_2K_C
00762   /* if not, is it a unrestricted DR modulus? */
00763   if (dr == 0) {
00764      dr = mp_reduce_is_2k(P) << 1;
00765   }
00766 #endif
00767     
00768   /* if the modulus is odd or dr != 0 use the montgomery method */
00769 #ifdef BN_MP_EXPTMOD_FAST_C
00770   if (mp_isodd (P) == 1 || dr !=  0) {
00771     return mp_exptmod_fast (G, X, P, Y, dr);
00772   } else {
00773 #endif
00774 #ifdef BN_S_MP_EXPTMOD_C
00775     /* otherwise use the generic Barrett reduction technique */
00776     return s_mp_exptmod (G, X, P, Y, 0);
00777 #else
00778     /* no exptmod for evens */
00779     return MP_VAL;
00780 #endif
00781 #ifdef BN_MP_EXPTMOD_FAST_C
00782   }
00783 #endif
00784 }
00785 
00786 
00787 /* b = |a| 
00788  *
00789  * Simple function copies the input and fixes the sign to positive
00790  */
00791 int
00792 mp_abs (mp_int * a, mp_int * b)
00793 {
00794   int     res;
00795 
00796   /* copy a to b */
00797   if (a != b) {
00798      if ((res = mp_copy (a, b)) != MP_OKAY) {
00799        return res;
00800      }
00801   }
00802 
00803   /* force the sign of b to positive */
00804   b->sign = MP_ZPOS;
00805 
00806   return MP_OKAY;
00807 }
00808 
00809 
00810 /* hac 14.61, pp608 */
00811 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00812 {
00813   /* b cannot be negative */
00814   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00815     return MP_VAL;
00816   }
00817 
00818 #ifdef BN_FAST_MP_INVMOD_C
00819   /* if the modulus is odd we can use a faster routine instead */
00820   if (mp_isodd (b) == 1) {
00821     return fast_mp_invmod (a, b, c);
00822   }
00823 #endif
00824 
00825 #ifdef BN_MP_INVMOD_SLOW_C
00826   return mp_invmod_slow(a, b, c);
00827 #endif
00828 }
00829 
00830 
00831 /* computes the modular inverse via binary extended euclidean algorithm, 
00832  * that is c = 1/a mod b 
00833  *
00834  * Based on slow invmod except this is optimized for the case where b is 
00835  * odd as per HAC Note 14.64 on pp. 610
00836  */
00837 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00838 {
00839   mp_int  x, y, u, v, B, D;
00840   int     res, neg;
00841 
00842   /* 2. [modified] b must be odd   */
00843   if (mp_iseven (b) == 1) {
00844     return MP_VAL;
00845   }
00846 
00847   /* init all our temps */
00848   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
00849      return res;
00850   }
00851 
00852   /* x == modulus, y == value to invert */
00853   if ((res = mp_copy (b, &x)) != MP_OKAY) {
00854     goto LBL_ERR;
00855   }
00856 
00857   /* we need y = |a| */
00858   if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
00859     goto LBL_ERR;
00860   }
00861 
00862   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00863   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00864     goto LBL_ERR;
00865   }
00866   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00867     goto LBL_ERR;
00868   }
00869   mp_set (&D, 1);
00870 
00871 top:
00872   /* 4.  while u is even do */
00873   while (mp_iseven (&u) == 1) {
00874     /* 4.1 u = u/2 */
00875     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00876       goto LBL_ERR;
00877     }
00878     /* 4.2 if B is odd then */
00879     if (mp_isodd (&B) == 1) {
00880       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
00881         goto LBL_ERR;
00882       }
00883     }
00884     /* B = B/2 */
00885     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
00886       goto LBL_ERR;
00887     }
00888   }
00889 
00890   /* 5.  while v is even do */
00891   while (mp_iseven (&v) == 1) {
00892     /* 5.1 v = v/2 */
00893     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
00894       goto LBL_ERR;
00895     }
00896     /* 5.2 if D is odd then */
00897     if (mp_isodd (&D) == 1) {
00898       /* D = (D-x)/2 */
00899       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
00900         goto LBL_ERR;
00901       }
00902     }
00903     /* D = D/2 */
00904     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
00905       goto LBL_ERR;
00906     }
00907   }
00908 
00909   /* 6.  if u >= v then */
00910   if (mp_cmp (&u, &v) != MP_LT) {
00911     /* u = u - v, B = B - D */
00912     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
00913       goto LBL_ERR;
00914     }
00915 
00916     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
00917       goto LBL_ERR;
00918     }
00919   } else {
00920     /* v - v - u, D = D - B */
00921     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
00922       goto LBL_ERR;
00923     }
00924 
00925     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
00926       goto LBL_ERR;
00927     }
00928   }
00929 
00930   /* if not zero goto step 4 */
00931   if (mp_iszero (&u) == 0) {
00932     goto top;
00933   }
00934 
00935   /* now a = C, b = D, gcd == g*v */
00936 
00937   /* if v != 1 then there is no inverse */
00938   if (mp_cmp_d (&v, 1) != MP_EQ) {
00939     res = MP_VAL;
00940     goto LBL_ERR;
00941   }
00942 
00943   /* b is now the inverse */
00944   neg = a->sign;
00945   while (D.sign == MP_NEG) {
00946     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
00947       goto LBL_ERR;
00948     }
00949   }
00950   mp_exch (&D, c);
00951   c->sign = neg;
00952   res = MP_OKAY;
00953 
00954 LBL_ERR:mp_clear(&x);
00955         mp_clear(&y);
00956         mp_clear(&u);
00957         mp_clear(&v);
00958         mp_clear(&B);
00959         mp_clear(&D);
00960   return res;
00961 }
00962 
00963 
00964 /* hac 14.61, pp608 */
00965 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
00966 {
00967   mp_int  x, y, u, v, A, B, C, D;
00968   int     res;
00969 
00970   /* b cannot be negative */
00971   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00972     return MP_VAL;
00973   }
00974 
00975   /* init temps */
00976   if ((res = mp_init_multi(&x, &y, &u, &v, 
00977                            &A, &B)) != MP_OKAY) {
00978      return res;
00979   }
00980 
00981   /* init rest of tmps temps */
00982   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
00983      return res;
00984   }
00985 
00986   /* x = a, y = b */
00987   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
00988       goto LBL_ERR;
00989   }
00990   if ((res = mp_copy (b, &y)) != MP_OKAY) {
00991     goto LBL_ERR;
00992   }
00993 
00994   /* 2. [modified] if x,y are both even then return an error! */
00995   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
00996     res = MP_VAL;
00997     goto LBL_ERR;
00998   }
00999 
01000   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
01001   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
01002     goto LBL_ERR;
01003   }
01004   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
01005     goto LBL_ERR;
01006   }
01007   mp_set (&A, 1);
01008   mp_set (&D, 1);
01009 
01010 top:
01011   /* 4.  while u is even do */
01012   while (mp_iseven (&u) == 1) {
01013     /* 4.1 u = u/2 */
01014     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
01015       goto LBL_ERR;
01016     }
01017     /* 4.2 if A or B is odd then */
01018     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
01019       /* A = (A+y)/2, B = (B-x)/2 */
01020       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
01021          goto LBL_ERR;
01022       }
01023       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
01024          goto LBL_ERR;
01025       }
01026     }
01027     /* A = A/2, B = B/2 */
01028     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
01029       goto LBL_ERR;
01030     }
01031     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01032       goto LBL_ERR;
01033     }
01034   }
01035 
01036   /* 5.  while v is even do */
01037   while (mp_iseven (&v) == 1) {
01038     /* 5.1 v = v/2 */
01039     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01040       goto LBL_ERR;
01041     }
01042     /* 5.2 if C or D is odd then */
01043     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
01044       /* C = (C+y)/2, D = (D-x)/2 */
01045       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
01046          goto LBL_ERR;
01047       }
01048       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01049          goto LBL_ERR;
01050       }
01051     }
01052     /* C = C/2, D = D/2 */
01053     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
01054       goto LBL_ERR;
01055     }
01056     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01057       goto LBL_ERR;
01058     }
01059   }
01060 
01061   /* 6.  if u >= v then */
01062   if (mp_cmp (&u, &v) != MP_LT) {
01063     /* u = u - v, A = A - C, B = B - D */
01064     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01065       goto LBL_ERR;
01066     }
01067 
01068     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
01069       goto LBL_ERR;
01070     }
01071 
01072     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01073       goto LBL_ERR;
01074     }
01075   } else {
01076     /* v - v - u, C = C - A, D = D - B */
01077     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01078       goto LBL_ERR;
01079     }
01080 
01081     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
01082       goto LBL_ERR;
01083     }
01084 
01085     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01086       goto LBL_ERR;
01087     }
01088   }
01089 
01090   /* if not zero goto step 4 */
01091   if (mp_iszero (&u) == 0)
01092     goto top;
01093 
01094   /* now a = C, b = D, gcd == g*v */
01095 
01096   /* if v != 1 then there is no inverse */
01097   if (mp_cmp_d (&v, 1) != MP_EQ) {
01098     res = MP_VAL;
01099     goto LBL_ERR;
01100   }
01101 
01102   /* if its too low */
01103   while (mp_cmp_d(&C, 0) == MP_LT) {
01104       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
01105          goto LBL_ERR;
01106       }
01107   }
01108   
01109   /* too big */
01110   while (mp_cmp_mag(&C, b) != MP_LT) {
01111       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
01112          goto LBL_ERR;
01113       }
01114   }
01115   
01116   /* C is now the inverse */
01117   mp_exch (&C, c);
01118   res = MP_OKAY;
01119 LBL_ERR:mp_clear(&x);
01120         mp_clear(&y);
01121         mp_clear(&u);
01122         mp_clear(&v);
01123         mp_clear(&A);
01124         mp_clear(&B);
01125         mp_clear(&C);
01126         mp_clear(&D);
01127   return res;
01128 }
01129 
01130 
01131 /* compare maginitude of two ints (unsigned) */
01132 int mp_cmp_mag (mp_int * a, mp_int * b)
01133 {
01134   int     n;
01135   mp_digit *tmpa, *tmpb;
01136 
01137   /* compare based on # of non-zero digits */
01138   if (a->used > b->used) {
01139     return MP_GT;
01140   }
01141   
01142   if (a->used < b->used) {
01143     return MP_LT;
01144   }
01145 
01146   /* alias for a */
01147   tmpa = a->dp + (a->used - 1);
01148 
01149   /* alias for b */
01150   tmpb = b->dp + (a->used - 1);
01151 
01152   /* compare based on digits  */
01153   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
01154     if (*tmpa > *tmpb) {
01155       return MP_GT;
01156     }
01157 
01158     if (*tmpa < *tmpb) {
01159       return MP_LT;
01160     }
01161   }
01162   return MP_EQ;
01163 }
01164 
01165 
01166 /* compare two ints (signed)*/
01167 int
01168 mp_cmp (mp_int * a, mp_int * b)
01169 {
01170   /* compare based on sign */
01171   if (a->sign != b->sign) {
01172      if (a->sign == MP_NEG) {
01173         return MP_LT;
01174      } else {
01175         return MP_GT;
01176      }
01177   }
01178   
01179   /* compare digits */
01180   if (a->sign == MP_NEG) {
01181      /* if negative compare opposite direction */
01182      return mp_cmp_mag(b, a);
01183   } else {
01184      return mp_cmp_mag(a, b);
01185   }
01186 }
01187 
01188 
01189 /* compare a digit */
01190 int mp_cmp_d(mp_int * a, mp_digit b)
01191 {
01192   /* compare based on sign */
01193   if (a->sign == MP_NEG) {
01194     return MP_LT;
01195   }
01196 
01197   /* compare based on magnitude */
01198   if (a->used > 1) {
01199     return MP_GT;
01200   }
01201 
01202   /* compare the only digit of a to b */
01203   if (a->dp[0] > b) {
01204     return MP_GT;
01205   } else if (a->dp[0] < b) {
01206     return MP_LT;
01207   } else {
01208     return MP_EQ;
01209   }
01210 }
01211 
01212 
01213 /* set to a digit */
01214 void mp_set (mp_int * a, mp_digit b)
01215 {
01216   mp_zero (a);
01217   a->dp[0] = b & MP_MASK;
01218   a->used  = (a->dp[0] != 0) ? 1 : 0;
01219 }
01220 
01221 
01222 /* c = a mod b, 0 <= c < b */
01223 int
01224 mp_mod (mp_int * a, mp_int * b, mp_int * c)
01225 {
01226   mp_int  t;
01227   int     res;
01228 
01229   if ((res = mp_init (&t)) != MP_OKAY) {
01230     return res;
01231   }
01232 
01233   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
01234     mp_clear (&t);
01235     return res;
01236   }
01237 
01238   if (t.sign != b->sign) {
01239     res = mp_add (b, &t, c);
01240   } else {
01241     res = MP_OKAY;
01242     mp_exch (&t, c);
01243   }
01244 
01245   mp_clear (&t);
01246   return res;
01247 }
01248 
01249 
01250 /* slower bit-bang division... also smaller */
01251 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01252 {
01253    mp_int ta, tb, tq, q;
01254    int    res, n, n2;
01255 
01256   /* is divisor zero ? */
01257   if (mp_iszero (b) == 1) {
01258     return MP_VAL;
01259   }
01260 
01261   /* if a < b then q=0, r = a */
01262   if (mp_cmp_mag (a, b) == MP_LT) {
01263     if (d != NULL) {
01264       res = mp_copy (a, d);
01265     } else {
01266       res = MP_OKAY;
01267     }
01268     if (c != NULL) {
01269       mp_zero (c);
01270     }
01271     return res;
01272   }
01273     
01274   /* init our temps */
01275   if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
01276      return res;
01277   }
01278 
01279 
01280   mp_set(&tq, 1);
01281   n = mp_count_bits(a) - mp_count_bits(b);
01282   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
01283       ((res = mp_abs(b, &tb)) != MP_OKAY) || 
01284       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
01285       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
01286       goto LBL_ERR;
01287   }
01288 
01289   while (n-- >= 0) {
01290      if (mp_cmp(&tb, &ta) != MP_GT) {
01291         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
01292             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
01293            goto LBL_ERR;
01294         }
01295      }
01296      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
01297          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
01298            goto LBL_ERR;
01299      }
01300   }
01301 
01302   /* now q == quotient and ta == remainder */
01303   n  = a->sign;
01304   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
01305   if (c != NULL) {
01306      mp_exch(c, &q);
01307      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
01308   }
01309   if (d != NULL) {
01310      mp_exch(d, &ta);
01311      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
01312   }
01313 LBL_ERR:
01314    mp_clear(&ta);
01315    mp_clear(&tb);
01316    mp_clear(&tq);
01317    mp_clear(&q);
01318    return res;
01319 }
01320 
01321 
01322 /* b = a/2 */
01323 int mp_div_2(mp_int * a, mp_int * b)
01324 {
01325   int     x, res, oldused;
01326 
01327   /* copy */
01328   if (b->alloc < a->used) {
01329     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01330       return res;
01331     }
01332   }
01333 
01334   oldused = b->used;
01335   b->used = a->used;
01336   {
01337     register mp_digit r, rr, *tmpa, *tmpb;
01338 
01339     /* source alias */
01340     tmpa = a->dp + b->used - 1;
01341 
01342     /* dest alias */
01343     tmpb = b->dp + b->used - 1;
01344 
01345     /* carry */
01346     r = 0;
01347     for (x = b->used - 1; x >= 0; x--) {
01348       /* get the carry for the next iteration */
01349       rr = *tmpa & 1;
01350 
01351       /* shift the current digit, add in carry and store */
01352       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
01353 
01354       /* forward carry to next iteration */
01355       r = rr;
01356     }
01357 
01358     /* zero excess digits */
01359     tmpb = b->dp + b->used;
01360     for (x = b->used; x < oldused; x++) {
01361       *tmpb++ = 0;
01362     }
01363   }
01364   b->sign = a->sign;
01365   mp_clamp (b);
01366   return MP_OKAY;
01367 }
01368 
01369 
01370 /* high level addition (handles signs) */
01371 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01372 {
01373   int     sa, sb, res;
01374 
01375   /* get sign of both inputs */
01376   sa = a->sign;
01377   sb = b->sign;
01378 
01379   /* handle two cases, not four */
01380   if (sa == sb) {
01381     /* both positive or both negative */
01382     /* add their magnitudes, copy the sign */
01383     c->sign = sa;
01384     res = s_mp_add (a, b, c);
01385   } else {
01386     /* one positive, the other negative */
01387     /* subtract the one with the greater magnitude from */
01388     /* the one of the lesser magnitude.  The result gets */
01389     /* the sign of the one with the greater magnitude. */
01390     if (mp_cmp_mag (a, b) == MP_LT) {
01391       c->sign = sb;
01392       res = s_mp_sub (b, a, c);
01393     } else {
01394       c->sign = sa;
01395       res = s_mp_sub (a, b, c);
01396     }
01397   }
01398   return res;
01399 }
01400 
01401 
01402 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
01403 int
01404 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
01405 {
01406   mp_int *x;
01407   int     olduse, res, min, max;
01408 
01409   /* find sizes, we let |a| <= |b| which means we have to sort
01410    * them.  "x" will point to the input with the most digits
01411    */
01412   if (a->used > b->used) {
01413     min = b->used;
01414     max = a->used;
01415     x = a;
01416   } else {
01417     min = a->used;
01418     max = b->used;
01419     x = b;
01420   }
01421 
01422   /* init result */
01423   if (c->alloc < max + 1) {
01424     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
01425       return res;
01426     }
01427   }
01428 
01429   /* get old used digit count and set new one */
01430   olduse = c->used;
01431   c->used = max + 1;
01432 
01433   {
01434     register mp_digit u, *tmpa, *tmpb, *tmpc;
01435     register int i;
01436 
01437     /* alias for digit pointers */
01438 
01439     /* first input */
01440     tmpa = a->dp;
01441 
01442     /* second input */
01443     tmpb = b->dp;
01444 
01445     /* destination */
01446     tmpc = c->dp;
01447 
01448     /* zero the carry */
01449     u = 0;
01450     for (i = 0; i < min; i++) {
01451       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
01452       *tmpc = *tmpa++ + *tmpb++ + u;
01453 
01454       /* U = carry bit of T[i] */
01455       u = *tmpc >> ((mp_digit)DIGIT_BIT);
01456 
01457       /* take away carry bit from T[i] */
01458       *tmpc++ &= MP_MASK;
01459     }
01460 
01461     /* now copy higher words if any, that is in A+B 
01462      * if A or B has more digits add those in 
01463      */
01464     if (min != max) {
01465       for (; i < max; i++) {
01466         /* T[i] = X[i] + U */
01467         *tmpc = x->dp[i] + u;
01468 
01469         /* U = carry bit of T[i] */
01470         u = *tmpc >> ((mp_digit)DIGIT_BIT);
01471 
01472         /* take away carry bit from T[i] */
01473         *tmpc++ &= MP_MASK;
01474       }
01475     }
01476 
01477     /* add carry */
01478     *tmpc++ = u;
01479 
01480     /* clear digits above oldused */
01481     for (i = c->used; i < olduse; i++) {
01482       *tmpc++ = 0;
01483     }
01484   }
01485 
01486   mp_clamp (c);
01487   return MP_OKAY;
01488 }
01489 
01490 
01491 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
01492 int
01493 s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
01494 {
01495   int     olduse, res, min, max;
01496 
01497   /* find sizes */
01498   min = b->used;
01499   max = a->used;
01500 
01501   /* init result */
01502   if (c->alloc < max) {
01503     if ((res = mp_grow (c, max)) != MP_OKAY) {
01504       return res;
01505     }
01506   }
01507   olduse = c->used;
01508   c->used = max;
01509 
01510   {
01511     register mp_digit u, *tmpa, *tmpb, *tmpc;
01512     register int i;
01513 
01514     /* alias for digit pointers */
01515     tmpa = a->dp;
01516     tmpb = b->dp;
01517     tmpc = c->dp;
01518 
01519     /* set carry to zero */
01520     u = 0;
01521     for (i = 0; i < min; i++) {
01522       /* T[i] = A[i] - B[i] - U */
01523       *tmpc = *tmpa++ - *tmpb++ - u;
01524 
01525       /* U = carry bit of T[i]
01526        * Note this saves performing an AND operation since
01527        * if a carry does occur it will propagate all the way to the
01528        * MSB.  As a result a single shift is enough to get the carry
01529        */
01530       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01531 
01532       /* Clear carry from T[i] */
01533       *tmpc++ &= MP_MASK;
01534     }
01535 
01536     /* now copy higher words if any, e.g. if A has more digits than B  */
01537     for (; i < max; i++) {
01538       /* T[i] = A[i] - U */
01539       *tmpc = *tmpa++ - u;
01540 
01541       /* U = carry bit of T[i] */
01542       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01543 
01544       /* Clear carry from T[i] */
01545       *tmpc++ &= MP_MASK;
01546     }
01547 
01548     /* clear digits above used (since we may not have grown result above) */
01549     for (i = c->used; i < olduse; i++) {
01550       *tmpc++ = 0;
01551     }
01552   }
01553 
01554   mp_clamp (c);
01555   return MP_OKAY;
01556 }
01557 
01558 
01559 /* high level subtraction (handles signs) */
01560 int
01561 mp_sub (mp_int * a, mp_int * b, mp_int * c)
01562 {
01563   int     sa, sb, res;
01564 
01565   sa = a->sign;
01566   sb = b->sign;
01567 
01568   if (sa != sb) {
01569     /* subtract a negative from a positive, OR */
01570     /* subtract a positive from a negative. */
01571     /* In either case, ADD their magnitudes, */
01572     /* and use the sign of the first number. */
01573     c->sign = sa;
01574     res = s_mp_add (a, b, c);
01575   } else {
01576     /* subtract a positive from a positive, OR */
01577     /* subtract a negative from a negative. */
01578     /* First, take the difference between their */
01579     /* magnitudes, then... */
01580     if (mp_cmp_mag (a, b) != MP_LT) {
01581       /* Copy the sign from the first */
01582       c->sign = sa;
01583       /* The first has a larger or equal magnitude */
01584       res = s_mp_sub (a, b, c);
01585     } else {
01586       /* The result has the *opposite* sign from */
01587       /* the first number. */
01588       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
01589       /* The second has a larger magnitude */
01590       res = s_mp_sub (b, a, c);
01591     }
01592   }
01593   return res;
01594 }
01595 
01596 
01597 /* determines if reduce_2k_l can be used */
01598 int mp_reduce_is_2k_l(mp_int *a)
01599 {
01600    int ix, iy;
01601    
01602    if (a->used == 0) {
01603       return MP_NO;
01604    } else if (a->used == 1) {
01605       return MP_YES;
01606    } else if (a->used > 1) {
01607       /* if more than half of the digits are -1 we're sold */
01608       for (iy = ix = 0; ix < a->used; ix++) {
01609           if (a->dp[ix] == MP_MASK) {
01610               ++iy;
01611           }
01612       }
01613       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
01614       
01615    }
01616    return MP_NO;
01617 }
01618 
01619 
01620 /* determines if mp_reduce_2k can be used */
01621 int mp_reduce_is_2k(mp_int *a)
01622 {
01623    int ix, iy, iw;
01624    mp_digit iz;
01625    
01626    if (a->used == 0) {
01627       return MP_NO;
01628    } else if (a->used == 1) {
01629       return MP_YES;
01630    } else if (a->used > 1) {
01631       iy = mp_count_bits(a);
01632       iz = 1;
01633       iw = 1;
01634     
01635       /* Test every bit from the second digit up, must be 1 */
01636       for (ix = DIGIT_BIT; ix < iy; ix++) {
01637           if ((a->dp[iw] & iz) == 0) {
01638              return MP_NO;
01639           }
01640           iz <<= 1;
01641           if (iz > (mp_digit)MP_MASK) {
01642              ++iw;
01643              iz = 1;
01644           }
01645       }
01646    }
01647    return MP_YES;
01648 }
01649 
01650 
01651 /* determines if a number is a valid DR modulus */
01652 int mp_dr_is_modulus(mp_int *a)
01653 {
01654    int ix;
01655 
01656    /* must be at least two digits */
01657    if (a->used < 2) {
01658       return 0;
01659    }
01660 
01661    /* must be of the form b**k - a [a <= b] so all
01662     * but the first digit must be equal to -1 (mod b).
01663     */
01664    for (ix = 1; ix < a->used; ix++) {
01665        if (a->dp[ix] != MP_MASK) {
01666           return 0;
01667        }
01668    }
01669    return 1;
01670 }
01671 
01672 
01673 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
01674  *
01675  * Uses a left-to-right k-ary sliding window to compute the modular
01676  * exponentiation.
01677  * The value of k changes based on the size of the exponent.
01678  *
01679  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
01680  */
01681 
01682 #ifdef MP_LOW_MEM
01683    #define TAB_SIZE 32
01684 #else
01685    #define TAB_SIZE 256
01686 #endif
01687 
01688 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
01689                      int redmode)
01690 {
01691   mp_int  M[TAB_SIZE], res;
01692   mp_digit buf, mp;
01693   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01694 
01695   /* use a pointer to the reduction algorithm.  This allows us to use
01696    * one of many reduction algorithms without modding the guts of
01697    * the code with if statements everywhere.
01698    */
01699   int     (*redux)(mp_int*,mp_int*,mp_digit);
01700 
01701   /* find window size */
01702   x = mp_count_bits (X);
01703   if (x <= 7) {
01704     winsize = 2;
01705   } else if (x <= 36) {
01706     winsize = 3;
01707   } else if (x <= 140) {
01708     winsize = 4;
01709   } else if (x <= 450) {
01710     winsize = 5;
01711   } else if (x <= 1303) {
01712     winsize = 6;
01713   } else if (x <= 3529) {
01714     winsize = 7;
01715   } else {
01716     winsize = 8;
01717   }
01718 
01719 #ifdef MP_LOW_MEM
01720   if (winsize > 5) {
01721      winsize = 5;
01722   }
01723 #endif
01724 
01725   /* init M array */
01726   /* init first cell */
01727   if ((err = mp_init(&M[1])) != MP_OKAY) {
01728      return err;
01729   }
01730 
01731   /* now init the second half of the array */
01732   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01733     if ((err = mp_init(&M[x])) != MP_OKAY) {
01734       for (y = 1<<(winsize-1); y < x; y++) {
01735         mp_clear (&M[y]);
01736       }
01737       mp_clear(&M[1]);
01738       return err;
01739     }
01740   }
01741 
01742   /* determine and setup reduction code */
01743   if (redmode == 0) {
01744 #ifdef BN_MP_MONTGOMERY_SETUP_C     
01745      /* now setup montgomery  */
01746      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
01747         goto LBL_M;
01748      }
01749 #else
01750      err = MP_VAL;
01751      goto LBL_M;
01752 #endif
01753 
01754      /* automatically pick the comba one if available (saves quite a few
01755         calls/ifs) */
01756 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
01757      if (((P->used * 2 + 1) < MP_WARRAY) &&
01758           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
01759         redux = fast_mp_montgomery_reduce;
01760      } else 
01761 #endif
01762      {
01763 #ifdef BN_MP_MONTGOMERY_REDUCE_C
01764         /* use slower baseline Montgomery method */
01765         redux = mp_montgomery_reduce;
01766 #else
01767         err = MP_VAL;
01768         goto LBL_M;
01769 #endif
01770      }
01771   } else if (redmode == 1) {
01772 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
01773      /* setup DR reduction for moduli of the form B**k - b */
01774      mp_dr_setup(P, &mp);
01775      redux = mp_dr_reduce;
01776 #else
01777      err = MP_VAL;
01778      goto LBL_M;
01779 #endif
01780   } else {
01781 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
01782      /* setup DR reduction for moduli of the form 2**k - b */
01783      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
01784         goto LBL_M;
01785      }
01786      redux = mp_reduce_2k;
01787 #else
01788      err = MP_VAL;
01789      goto LBL_M;
01790 #endif
01791   }
01792 
01793   /* setup result */
01794   if ((err = mp_init (&res)) != MP_OKAY) {
01795     goto LBL_M;
01796   }
01797 
01798   /* create M table
01799    *
01800 
01801    *
01802    * The first half of the table is not computed though accept for M[0] and M[1]
01803    */
01804 
01805   if (redmode == 0) {
01806 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
01807      /* now we need R mod m */
01808      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
01809        goto LBL_RES;
01810      }
01811 #else 
01812      err = MP_VAL;
01813      goto LBL_RES;
01814 #endif
01815 
01816      /* now set M[1] to G * R mod m */
01817      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
01818        goto LBL_RES;
01819      }
01820   } else {
01821      mp_set(&res, 1);
01822      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
01823         goto LBL_RES;
01824      }
01825   }
01826 
01827   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
01828   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
01829     goto LBL_RES;
01830   }
01831 
01832   for (x = 0; x < (winsize - 1); x++) {
01833     if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
01834       goto LBL_RES;
01835     }
01836     if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
01837       goto LBL_RES;
01838     }
01839   }
01840 
01841   /* create upper table */
01842   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01843     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
01844       goto LBL_RES;
01845     }
01846     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
01847       goto LBL_RES;
01848     }
01849   }
01850 
01851   /* set initial mode and bit cnt */
01852   mode   = 0;
01853   bitcnt = 1;
01854   buf    = 0;
01855   digidx = X->used - 1;
01856   bitcpy = 0;
01857   bitbuf = 0;
01858 
01859   for (;;) {
01860     /* grab next digit as required */
01861     if (--bitcnt == 0) {
01862       /* if digidx == -1 we are out of digits so break */
01863       if (digidx == -1) {
01864         break;
01865       }
01866       /* read next digit and reset bitcnt */
01867       buf    = X->dp[digidx--];
01868       bitcnt = (int)DIGIT_BIT;
01869     }
01870 
01871     /* grab the next msb from the exponent */
01872     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01873     buf <<= (mp_digit)1;
01874 
01875     /* if the bit is zero and mode == 0 then we ignore it
01876      * These represent the leading zero bits before the first 1 bit
01877      * in the exponent.  Technically this opt is not required but it
01878      * does lower the # of trivial squaring/reductions used
01879      */
01880     if (mode == 0 && y == 0) {
01881       continue;
01882     }
01883 
01884     /* if the bit is zero and mode == 1 then we square */
01885     if (mode == 1 && y == 0) {
01886       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01887         goto LBL_RES;
01888       }
01889       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01890         goto LBL_RES;
01891       }
01892       continue;
01893     }
01894 
01895     /* else we add it to the window */
01896     bitbuf |= (y << (winsize - ++bitcpy));
01897     mode    = 2;
01898 
01899     if (bitcpy == winsize) {
01900       /* ok window is filled so square as required and multiply  */
01901       /* square first */
01902       for (x = 0; x < winsize; x++) {
01903         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01904           goto LBL_RES;
01905         }
01906         if ((err = redux (&res, P, mp)) != MP_OKAY) {
01907           goto LBL_RES;
01908         }
01909       }
01910 
01911       /* then multiply */
01912       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
01913         goto LBL_RES;
01914       }
01915       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01916         goto LBL_RES;
01917       }
01918 
01919       /* empty window and reset */
01920       bitcpy = 0;
01921       bitbuf = 0;
01922       mode   = 1;
01923     }
01924   }
01925 
01926   /* if bits remain then square/multiply */
01927   if (mode == 2 && bitcpy > 0) {
01928     /* square then multiply if the bit is set */
01929     for (x = 0; x < bitcpy; x++) {
01930       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01931         goto LBL_RES;
01932       }
01933       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01934         goto LBL_RES;
01935       }
01936 
01937       /* get next bit of the window */
01938       bitbuf <<= 1;
01939       if ((bitbuf & (1 << winsize)) != 0) {
01940         /* then multiply */
01941         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
01942           goto LBL_RES;
01943         }
01944         if ((err = redux (&res, P, mp)) != MP_OKAY) {
01945           goto LBL_RES;
01946         }
01947       }
01948     }
01949   }
01950 
01951   if (redmode == 0) {
01952      /* fixup result if Montgomery reduction is used
01953       * recall that any value in a Montgomery system is
01954       * actually multiplied by R mod n.  So we have
01955       * to reduce one more time to cancel out the factor
01956       * of R.
01957       */
01958      if ((err = redux(&res, P, mp)) != MP_OKAY) {
01959        goto LBL_RES;
01960      }
01961   }
01962 
01963   /* swap res with Y */
01964   mp_exch (&res, Y);
01965   err = MP_OKAY;
01966 LBL_RES:mp_clear (&res);
01967 LBL_M:
01968   mp_clear(&M[1]);
01969   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01970     mp_clear (&M[x]);
01971   }
01972   return err;
01973 }
01974 
01975 
01976 /* setups the montgomery reduction stuff */
01977 int
01978 mp_montgomery_setup (mp_int * n, mp_digit * rho)
01979 {
01980   mp_digit x, b;
01981 
01982 /* fast inversion mod 2**k
01983  *
01984  * Based on the fact that
01985  *
01986  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01987  *                    =>  2*X*A - X*X*A*A = 1
01988  *                    =>  2*(1) - (1)     = 1
01989  */
01990   b = n->dp[0];
01991 
01992   if ((b & 1) == 0) {
01993     return MP_VAL;
01994   }
01995 
01996   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01997   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01998 #if !defined(MP_8BIT)
01999   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02000 #endif
02001 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
02002   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02003 #endif
02004 #ifdef MP_64BIT
02005   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
02006 #endif
02007 
02008   /* rho = -1/m mod b */
02009   /* TAO, switched mp_word casts to mp_digit to shut up compiler */
02010   *rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
02011 
02012   return MP_OKAY;
02013 }
02014 
02015 
02016 /* computes xR**-1 == x (mod N) via Montgomery Reduction
02017  *
02018  * This is an optimized implementation of montgomery_reduce
02019  * which uses the comba method to quickly calculate the columns of the
02020  * reduction.
02021  *
02022  * Based on Algorithm 14.32 on pp.601 of HAC.
02023 */
02024 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02025 {
02026   int     ix, res, olduse;
02027 #ifdef CYASSL_SMALL_STACK
02028   mp_word* W;    /* uses dynamic memory and slower */
02029 #else
02030   mp_word W[MP_WARRAY];
02031 #endif
02032 
02033   /* get old used count */
02034   olduse = x->used;
02035 
02036   /* grow a as required */
02037   if (x->alloc < n->used + 1) {
02038     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
02039       return res;
02040     }
02041   }
02042 
02043 #ifdef CYASSL_SMALL_STACK
02044   W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02045   if (W == NULL)    
02046     return MP_MEM;
02047 #endif
02048 
02049   /* first we have to get the digits of the input into
02050    * an array of double precision words W[...]
02051    */
02052   {
02053     register mp_word *_W;
02054     register mp_digit *tmpx;
02055 
02056     /* alias for the W[] array */
02057     _W   = W;
02058 
02059     /* alias for the digits of  x*/
02060     tmpx = x->dp;
02061 
02062     /* copy the digits of a into W[0..a->used-1] */
02063     for (ix = 0; ix < x->used; ix++) {
02064       *_W++ = *tmpx++;
02065     }
02066 
02067     /* zero the high words of W[a->used..m->used*2] */
02068     for (; ix < n->used * 2 + 1; ix++) {
02069       *_W++ = 0;
02070     }
02071   }
02072 
02073   /* now we proceed to zero successive digits
02074    * from the least significant upwards
02075    */
02076   for (ix = 0; ix < n->used; ix++) {
02077     /* mu = ai * m' mod b
02078      *
02079      * We avoid a double precision multiplication (which isn't required)
02080      * by casting the value down to a mp_digit.  Note this requires
02081      * that W[ix-1] have  the carry cleared (see after the inner loop)
02082      */
02083     register mp_digit mu;
02084     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
02085 
02086     /* a = a + mu * m * b**i
02087      *
02088      * This is computed in place and on the fly.  The multiplication
02089      * by b**i is handled by offseting which columns the results
02090      * are added to.
02091      *
02092      * Note the comba method normally doesn't handle carries in the
02093      * inner loop In this case we fix the carry from the previous
02094      * column since the Montgomery reduction requires digits of the
02095      * result (so far) [see above] to work.  This is
02096      * handled by fixing up one carry after the inner loop.  The
02097      * carry fixups are done in order so after these loops the
02098      * first m->used words of W[] have the carries fixed
02099      */
02100     {
02101       register int iy;
02102       register mp_digit *tmpn;
02103       register mp_word *_W;
02104 
02105       /* alias for the digits of the modulus */
02106       tmpn = n->dp;
02107 
02108       /* Alias for the columns set by an offset of ix */
02109       _W = W + ix;
02110 
02111       /* inner loop */
02112       for (iy = 0; iy < n->used; iy++) {
02113           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
02114       }
02115     }
02116 
02117     /* now fix carry for next digit, W[ix+1] */
02118     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
02119   }
02120 
02121   /* now we have to propagate the carries and
02122    * shift the words downward [all those least
02123    * significant digits we zeroed].
02124    */
02125   {
02126     register mp_digit *tmpx;
02127     register mp_word *_W, *_W1;
02128 
02129     /* nox fix rest of carries */
02130 
02131     /* alias for current word */
02132     _W1 = W + ix;
02133 
02134     /* alias for next word, where the carry goes */
02135     _W = W + ++ix;
02136 
02137     for (; ix <= n->used * 2 + 1; ix++) {
02138       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
02139     }
02140 
02141     /* copy out, A = A/b**n
02142      *
02143      * The result is A/b**n but instead of converting from an
02144      * array of mp_word to mp_digit than calling mp_rshd
02145      * we just copy them in the right order
02146      */
02147 
02148     /* alias for destination word */
02149     tmpx = x->dp;
02150 
02151     /* alias for shifted double precision result */
02152     _W = W + n->used;
02153 
02154     for (ix = 0; ix < n->used + 1; ix++) {
02155       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
02156     }
02157 
02158     /* zero oldused digits, if the input a was larger than
02159      * m->used+1 we'll have to clear the digits
02160      */
02161     for (; ix < olduse; ix++) {
02162       *tmpx++ = 0;
02163     }
02164   }
02165 
02166   /* set the max used and clamp */
02167   x->used = n->used + 1;
02168   mp_clamp (x);
02169 
02170 #ifdef CYASSL_SMALL_STACK
02171   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02172 #endif
02173 
02174   /* if A >= m then A = A - m */
02175   if (mp_cmp_mag (x, n) != MP_LT) {
02176     return s_mp_sub (x, n, x);
02177   }
02178   return MP_OKAY;
02179 }
02180 
02181 
02182 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
02183 int
02184 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02185 {
02186   int     ix, res, digs;
02187   mp_digit mu;
02188 
02189   /* can the fast reduction [comba] method be used?
02190    *
02191    * Note that unlike in mul you're safely allowed *less*
02192    * than the available columns [255 per default] since carries
02193    * are fixed up in the inner loop.
02194    */
02195   digs = n->used * 2 + 1;
02196   if ((digs < MP_WARRAY) &&
02197       n->used <
02198       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02199     return fast_mp_montgomery_reduce (x, n, rho);
02200   }
02201 
02202   /* grow the input as required */
02203   if (x->alloc < digs) {
02204     if ((res = mp_grow (x, digs)) != MP_OKAY) {
02205       return res;
02206     }
02207   }
02208   x->used = digs;
02209 
02210   for (ix = 0; ix < n->used; ix++) {
02211     /* mu = ai * rho mod b
02212      *
02213      * The value of rho must be precalculated via
02214      * montgomery_setup() such that
02215      * it equals -1/n0 mod b this allows the
02216      * following inner loop to reduce the
02217      * input one digit at a time
02218      */
02219     mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
02220 
02221     /* a = a + mu * m * b**i */
02222     {
02223       register int iy;
02224       register mp_digit *tmpn, *tmpx, u;
02225       register mp_word r;
02226 
02227       /* alias for digits of the modulus */
02228       tmpn = n->dp;
02229 
02230       /* alias for the digits of x [the input] */
02231       tmpx = x->dp + ix;
02232 
02233       /* set the carry to zero */
02234       u = 0;
02235 
02236       /* Multiply and add in place */
02237       for (iy = 0; iy < n->used; iy++) {
02238         /* compute product and sum */
02239         r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
02240                   ((mp_word) u) + ((mp_word) * tmpx);
02241 
02242         /* get carry */
02243         u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02244 
02245         /* fix digit */
02246         *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
02247       }
02248       /* At this point the ix'th digit of x should be zero */
02249 
02250 
02251       /* propagate carries upwards as required*/
02252       while (u) {
02253         *tmpx   += u;
02254         u        = *tmpx >> DIGIT_BIT;
02255         *tmpx++ &= MP_MASK;
02256       }
02257     }
02258   }
02259 
02260   /* at this point the n.used'th least
02261    * significant digits of x are all zero
02262    * which means we can shift x to the
02263    * right by n.used digits and the
02264    * residue is unchanged.
02265    */
02266 
02267   /* x = x/b**n.used */
02268   mp_clamp(x);
02269   mp_rshd (x, n->used);
02270 
02271   /* if x >= n then x = x - n */
02272   if (mp_cmp_mag (x, n) != MP_LT) {
02273     return s_mp_sub (x, n, x);
02274   }
02275 
02276   return MP_OKAY;
02277 }
02278 
02279 
02280 /* determines the setup value */
02281 void mp_dr_setup(mp_int *a, mp_digit *d)
02282 {
02283    /* the casts are required if DIGIT_BIT is one less than
02284     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
02285     */
02286    *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 
02287         ((mp_word)a->dp[0]));
02288 }
02289 
02290 
02291 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
02292  *
02293  * Based on algorithm from the paper
02294  *
02295  * "Generating Efficient Primes for Discrete Log Cryptosystems"
02296  *                 Chae Hoon Lim, Pil Joong Lee,
02297  *          POSTECH Information Research Laboratories
02298  *
02299  * The modulus must be of a special format [see manual]
02300  *
02301  * Has been modified to use algorithm 7.10 from the LTM book instead
02302  *
02303  * Input x must be in the range 0 <= x <= (n-1)**2
02304  */
02305 int
02306 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
02307 {
02308   int      err, i, m;
02309   mp_word  r;
02310   mp_digit mu, *tmpx1, *tmpx2;
02311 
02312   /* m = digits in modulus */
02313   m = n->used;
02314 
02315   /* ensure that "x" has at least 2m digits */
02316   if (x->alloc < m + m) {
02317     if ((err = mp_grow (x, m + m)) != MP_OKAY) {
02318       return err;
02319     }
02320   }
02321 
02322 /* top of loop, this is where the code resumes if
02323  * another reduction pass is required.
02324  */
02325 top:
02326   /* aliases for digits */
02327   /* alias for lower half of x */
02328   tmpx1 = x->dp;
02329 
02330   /* alias for upper half of x, or x/B**m */
02331   tmpx2 = x->dp + m;
02332 
02333   /* set carry to zero */
02334   mu = 0;
02335 
02336   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
02337   for (i = 0; i < m; i++) {
02338       r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
02339       *tmpx1++  = (mp_digit)(r & MP_MASK);
02340       mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
02341   }
02342 
02343   /* set final carry */
02344   *tmpx1++ = mu;
02345 
02346   /* zero words above m */
02347   for (i = m + 1; i < x->used; i++) {
02348       *tmpx1++ = 0;
02349   }
02350 
02351   /* clamp, sub and return */
02352   mp_clamp (x);
02353 
02354   /* if x >= n then subtract and reduce again
02355    * Each successive "recursion" makes the input smaller and smaller.
02356    */
02357   if (mp_cmp_mag (x, n) != MP_LT) {
02358     s_mp_sub(x, n, x);
02359     goto top;
02360   }
02361   return MP_OKAY;
02362 }
02363 
02364 
02365 /* reduces a modulo n where n is of the form 2**p - d */
02366 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
02367 {
02368    mp_int q;
02369    int    p, res;
02370    
02371    if ((res = mp_init(&q)) != MP_OKAY) {
02372       return res;
02373    }
02374    
02375    p = mp_count_bits(n);    
02376 top:
02377    /* q = a/2**p, a = a mod 2**p */
02378    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
02379       goto ERR;
02380    }
02381    
02382    if (d != 1) {
02383       /* q = q * d */
02384       if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 
02385          goto ERR;
02386       }
02387    }
02388    
02389    /* a = a + q */
02390    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
02391       goto ERR;
02392    }
02393    
02394    if (mp_cmp_mag(a, n) != MP_LT) {
02395       s_mp_sub(a, n, a);
02396       goto top;
02397    }
02398    
02399 ERR:
02400    mp_clear(&q);
02401    return res;
02402 }
02403 
02404 
02405 /* determines the setup value */
02406 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
02407 {
02408    int res, p;
02409    mp_int tmp;
02410    
02411    if ((res = mp_init(&tmp)) != MP_OKAY) {
02412       return res;
02413    }
02414    
02415    p = mp_count_bits(a);
02416    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
02417       mp_clear(&tmp);
02418       return res;
02419    }
02420    
02421    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
02422       mp_clear(&tmp);
02423       return res;
02424    }
02425    
02426    *d = tmp.dp[0];
02427    mp_clear(&tmp);
02428    return MP_OKAY;
02429 }
02430 
02431 
02432 /* computes a = 2**b 
02433  *
02434  * Simple algorithm which zeroes the int, grows it then just sets one bit
02435  * as required.
02436  */
02437 int
02438 mp_2expt (mp_int * a, int b)
02439 {
02440   int     res;
02441 
02442   /* zero a as per default */
02443   mp_zero (a);
02444 
02445   /* grow a to accomodate the single bit */
02446   if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
02447     return res;
02448   }
02449 
02450   /* set the used count of where the bit will go */
02451   a->used = b / DIGIT_BIT + 1;
02452 
02453   /* put the single bit in its place */
02454   a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
02455 
02456   return MP_OKAY;
02457 }
02458 
02459 
02460 /* multiply by a digit */
02461 int
02462 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02463 {
02464   mp_digit u, *tmpa, *tmpc;
02465   mp_word  r;
02466   int      ix, res, olduse;
02467 
02468   /* make sure c is big enough to hold a*b */
02469   if (c->alloc < a->used + 1) {
02470     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
02471       return res;
02472     }
02473   }
02474 
02475   /* get the original destinations used count */
02476   olduse = c->used;
02477 
02478   /* set the sign */
02479   c->sign = a->sign;
02480 
02481   /* alias for a->dp [source] */
02482   tmpa = a->dp;
02483 
02484   /* alias for c->dp [dest] */
02485   tmpc = c->dp;
02486 
02487   /* zero carry */
02488   u = 0;
02489 
02490   /* compute columns */
02491   for (ix = 0; ix < a->used; ix++) {
02492     /* compute product and carry sum for this term */
02493     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
02494 
02495     /* mask off higher bits to get a single digit */
02496     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
02497 
02498     /* send carry into next iteration */
02499     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02500   }
02501 
02502   /* store final carry [if any] and increment ix offset  */
02503   *tmpc++ = u;
02504   ++ix;
02505 
02506   /* now zero digits above the top */
02507   while (ix++ < olduse) {
02508      *tmpc++ = 0;
02509   }
02510 
02511   /* set used count */
02512   c->used = a->used + 1;
02513   mp_clamp(c);
02514 
02515   return MP_OKAY;
02516 }
02517 
02518 
02519 /* d = a * b (mod c) */
02520 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02521 {
02522   int     res;
02523   mp_int  t;
02524 
02525   if ((res = mp_init (&t)) != MP_OKAY) {
02526     return res;
02527   }
02528 
02529   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
02530     mp_clear (&t);
02531     return res;
02532   }
02533   res = mp_mod (&t, c, d);
02534   mp_clear (&t);
02535   return res;
02536 }
02537 
02538 
02539 /* computes b = a*a */
02540 int
02541 mp_sqr (mp_int * a, mp_int * b)
02542 {
02543   int     res;
02544 
02545   {
02546 #ifdef BN_FAST_S_MP_SQR_C
02547     /* can we use the fast comba multiplier? */
02548     if ((a->used * 2 + 1) < MP_WARRAY && 
02549          a->used < 
02550          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
02551       res = fast_s_mp_sqr (a, b);
02552     } else
02553 #endif
02554 #ifdef BN_S_MP_SQR_C
02555       res = s_mp_sqr (a, b);
02556 #else
02557       res = MP_VAL;
02558 #endif
02559   }
02560   b->sign = MP_ZPOS;
02561   return res;
02562 }
02563 
02564 
02565 /* high level multiplication (handles sign) */
02566 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02567 {
02568   int     res, neg;
02569   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
02570 
02571   {
02572     /* can we use the fast multiplier?
02573      *
02574      * The fast multiplier can be used if the output will 
02575      * have less than MP_WARRAY digits and the number of 
02576      * digits won't affect carry propagation
02577      */
02578     int     digs = a->used + b->used + 1;
02579 
02580 #ifdef BN_FAST_S_MP_MUL_DIGS_C
02581     if ((digs < MP_WARRAY) &&
02582         MIN(a->used, b->used) <= 
02583         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02584       res = fast_s_mp_mul_digs (a, b, c, digs);
02585     } else 
02586 #endif
02587 #ifdef BN_S_MP_MUL_DIGS_C
02588       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
02589 #else
02590       res = MP_VAL;
02591 #endif
02592 
02593   }
02594   c->sign = (c->used > 0) ? neg : MP_ZPOS;
02595   return res;
02596 }
02597 
02598 
02599 /* b = a*2 */
02600 int mp_mul_2(mp_int * a, mp_int * b)
02601 {
02602   int     x, res, oldused;
02603 
02604   /* grow to accomodate result */
02605   if (b->alloc < a->used + 1) {
02606     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
02607       return res;
02608     }
02609   }
02610 
02611   oldused = b->used;
02612   b->used = a->used;
02613 
02614   {
02615     register mp_digit r, rr, *tmpa, *tmpb;
02616 
02617     /* alias for source */
02618     tmpa = a->dp;
02619     
02620     /* alias for dest */
02621     tmpb = b->dp;
02622 
02623     /* carry */
02624     r = 0;
02625     for (x = 0; x < a->used; x++) {
02626     
02627       /* get what will be the *next* carry bit from the 
02628        * MSB of the current digit 
02629        */
02630       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
02631       
02632       /* now shift up this digit, add in the carry [from the previous] */
02633       *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
02634       
02635       /* copy the carry that would be from the source 
02636        * digit into the next iteration 
02637        */
02638       r = rr;
02639     }
02640 
02641     /* new leading digit? */
02642     if (r != 0) {
02643       /* add a MSB which is always 1 at this point */
02644       *tmpb = 1;
02645       ++(b->used);
02646     }
02647 
02648     /* now zero any excess digits on the destination 
02649      * that we didn't write to 
02650      */
02651     tmpb = b->dp + b->used;
02652     for (x = b->used; x < oldused; x++) {
02653       *tmpb++ = 0;
02654     }
02655   }
02656   b->sign = a->sign;
02657   return MP_OKAY;
02658 }
02659 
02660 
02661 /* divide by three (based on routine from MPI and the GMP manual) */
02662 int
02663 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
02664 {
02665   mp_int   q;
02666   mp_word  w, t;
02667   mp_digit b;
02668   int      res, ix;
02669   
02670   /* b = 2**DIGIT_BIT / 3 */
02671   b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
02672 
02673   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
02674      return res;
02675   }
02676   
02677   q.used = a->used;
02678   q.sign = a->sign;
02679   w = 0;
02680   for (ix = a->used - 1; ix >= 0; ix--) {
02681      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
02682 
02683      if (w >= 3) {
02684         /* multiply w by [1/3] */
02685         t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
02686 
02687         /* now subtract 3 * [w/3] from w, to get the remainder */
02688         w -= t+t+t;
02689 
02690         /* fixup the remainder as required since
02691          * the optimization is not exact.
02692          */
02693         while (w >= 3) {
02694            t += 1;
02695            w -= 3;
02696         }
02697       } else {
02698         t = 0;
02699       }
02700       q.dp[ix] = (mp_digit)t;
02701   }
02702 
02703   /* [optional] store the remainder */
02704   if (d != NULL) {
02705      *d = (mp_digit)w;
02706   }
02707 
02708   /* [optional] store the quotient */
02709   if (c != NULL) {
02710      mp_clamp(&q);
02711      mp_exch(&q, c);
02712   }
02713   mp_clear(&q);
02714   
02715   return res;
02716 }
02717 
02718 
02719 /* init an mp_init for a given size */
02720 int mp_init_size (mp_int * a, int size)
02721 {
02722   int x;
02723 
02724   /* pad size so there are always extra digits */
02725   size += (MP_PREC * 2) - (size % MP_PREC); 
02726   
02727   /* alloc mem */
02728   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
02729                                       DYNAMIC_TYPE_BIGINT);
02730   if (a->dp == NULL) {
02731     return MP_MEM;
02732   }
02733 
02734   /* set the members */
02735   a->used  = 0;
02736   a->alloc = size;
02737   a->sign  = MP_ZPOS;
02738 
02739   /* zero the digits */
02740   for (x = 0; x < size; x++) {
02741       a->dp[x] = 0;
02742   }
02743 
02744   return MP_OKAY;
02745 }
02746 
02747 
02748 /* the jist of squaring...
02749  * you do like mult except the offset of the tmpx [one that 
02750  * starts closer to zero] can't equal the offset of tmpy.  
02751  * So basically you set up iy like before then you min it with
02752  * (ty-tx) so that it never happens.  You double all those 
02753  * you add in the inner loop
02754 
02755 After that loop you do the squares and add them in.
02756 */
02757 
02758 int fast_s_mp_sqr (mp_int * a, mp_int * b)
02759 {
02760   int       olduse, res, pa, ix, iz;
02761 #ifdef CYASSL_SMALL_STACK
02762   mp_digit* W;    /* uses dynamic memory and slower */
02763 #else
02764   mp_digit W[MP_WARRAY];
02765 #endif
02766   mp_digit  *tmpx;
02767   mp_word   W1;
02768 
02769   /* grow the destination as required */
02770   pa = a->used + a->used;
02771   if (b->alloc < pa) {
02772     if ((res = mp_grow (b, pa)) != MP_OKAY) {
02773       return res;
02774     }
02775   }
02776 
02777   if (pa > MP_WARRAY)
02778     return MP_RANGE;  /* TAO range check */
02779 
02780 #ifdef CYASSL_SMALL_STACK
02781   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02782   if (W == NULL)    
02783     return MP_MEM;
02784 #endif
02785 
02786   /* number of output digits to produce */
02787   W1 = 0;
02788   for (ix = 0; ix < pa; ix++) { 
02789       int      tx, ty, iy;
02790       mp_word  _W;
02791       mp_digit *tmpy;
02792 
02793       /* clear counter */
02794       _W = 0;
02795 
02796       /* get offsets into the two bignums */
02797       ty = MIN(a->used-1, ix);
02798       tx = ix - ty;
02799 
02800       /* setup temp aliases */
02801       tmpx = a->dp + tx;
02802       tmpy = a->dp + ty;
02803 
02804       /* this is the number of times the loop will iterrate, essentially
02805          while (tx++ < a->used && ty-- >= 0) { ... }
02806        */
02807       iy = MIN(a->used-tx, ty+1);
02808 
02809       /* now for squaring tx can never equal ty 
02810        * we halve the distance since they approach at a rate of 2x
02811        * and we have to round because odd cases need to be executed
02812        */
02813       iy = MIN(iy, (ty-tx+1)>>1);
02814 
02815       /* execute loop */
02816       for (iz = 0; iz < iy; iz++) {
02817          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02818       }
02819 
02820       /* double the inner product and add carry */
02821       _W = _W + _W + W1;
02822 
02823       /* even columns have the square term in them */
02824       if ((ix&1) == 0) {
02825          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
02826       }
02827 
02828       /* store it */
02829       W[ix] = (mp_digit)(_W & MP_MASK);
02830 
02831       /* make next carry */
02832       W1 = _W >> ((mp_word)DIGIT_BIT);
02833   }
02834 
02835   /* setup dest */
02836   olduse  = b->used;
02837   b->used = a->used+a->used;
02838 
02839   {
02840     mp_digit *tmpb;
02841     tmpb = b->dp;
02842     for (ix = 0; ix < pa; ix++) {
02843       *tmpb++ = W[ix] & MP_MASK;
02844     }
02845 
02846     /* clear unused digits [that existed in the old copy of c] */
02847     for (; ix < olduse; ix++) {
02848       *tmpb++ = 0;
02849     }
02850   }
02851   mp_clamp (b);
02852 
02853 #ifdef CYASSL_SMALL_STACK
02854   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02855 #endif
02856 
02857   return MP_OKAY;
02858 }
02859 
02860 
02861 /* Fast (comba) multiplier
02862  *
02863  * This is the fast column-array [comba] multiplier.  It is 
02864  * designed to compute the columns of the product first 
02865  * then handle the carries afterwards.  This has the effect 
02866  * of making the nested loops that compute the columns very
02867  * simple and schedulable on super-scalar processors.
02868  *
02869  * This has been modified to produce a variable number of 
02870  * digits of output so if say only a half-product is required 
02871  * you don't have to compute the upper half (a feature 
02872  * required for fast Barrett reduction).
02873  *
02874  * Based on Algorithm 14.12 on pp.595 of HAC.
02875  *
02876  */
02877 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02878 {
02879   int     olduse, res, pa, ix, iz;
02880 #ifdef CYASSL_SMALL_STACK
02881   mp_digit* W;    /* uses dynamic memory and slower */
02882 #else
02883   mp_digit W[MP_WARRAY];
02884 #endif
02885   register mp_word  _W;
02886 
02887   /* grow the destination as required */
02888   if (c->alloc < digs) {
02889     if ((res = mp_grow (c, digs)) != MP_OKAY) {
02890       return res;
02891     }
02892   }
02893 
02894   /* number of output digits to produce */
02895   pa = MIN(digs, a->used + b->used);
02896   if (pa > MP_WARRAY)
02897     return MP_RANGE;  /* TAO range check */
02898 
02899 #ifdef CYASSL_SMALL_STACK
02900   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02901   if (W == NULL)    
02902     return MP_MEM;
02903 #endif
02904 
02905   /* clear the carry */
02906   _W = 0;
02907   for (ix = 0; ix < pa; ix++) { 
02908       int      tx, ty;
02909       int      iy;
02910       mp_digit *tmpx, *tmpy;
02911 
02912       /* get offsets into the two bignums */
02913       ty = MIN(b->used-1, ix);
02914       tx = ix - ty;
02915 
02916       /* setup temp aliases */
02917       tmpx = a->dp + tx;
02918       tmpy = b->dp + ty;
02919 
02920       /* this is the number of times the loop will iterrate, essentially 
02921          while (tx++ < a->used && ty-- >= 0) { ... }
02922        */
02923       iy = MIN(a->used-tx, ty+1);
02924 
02925       /* execute loop */
02926       for (iz = 0; iz < iy; ++iz) {
02927          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02928 
02929       }
02930 
02931       /* store term */
02932       W[ix] = ((mp_digit)_W) & MP_MASK;
02933 
02934       /* make next carry */
02935       _W = _W >> ((mp_word)DIGIT_BIT);
02936  }
02937 
02938   /* setup dest */
02939   olduse  = c->used;
02940   c->used = pa;
02941 
02942   {
02943     register mp_digit *tmpc;
02944     tmpc = c->dp;
02945     for (ix = 0; ix < pa+1; ix++) {
02946       /* now extract the previous digit [below the carry] */
02947       *tmpc++ = W[ix];
02948     }
02949 
02950     /* clear unused digits [that existed in the old copy of c] */
02951     for (; ix < olduse; ix++) {
02952       *tmpc++ = 0;
02953     }
02954   }
02955   mp_clamp (c);
02956 
02957 #ifdef CYASSL_SMALL_STACK
02958   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02959 #endif
02960 
02961   return MP_OKAY;
02962 }
02963 
02964 
02965 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
02966 int s_mp_sqr (mp_int * a, mp_int * b)
02967 {
02968   mp_int  t;
02969   int     res, ix, iy, pa;
02970   mp_word r;
02971   mp_digit u, tmpx, *tmpt;
02972 
02973   pa = a->used;
02974   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
02975     return res;
02976   }
02977 
02978   /* default used is maximum possible size */
02979   t.used = 2*pa + 1;
02980 
02981   for (ix = 0; ix < pa; ix++) {
02982     /* first calculate the digit at 2*ix */
02983     /* calculate double precision result */
02984     r = ((mp_word) t.dp[2*ix]) +
02985         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
02986 
02987     /* store lower part in result */
02988     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
02989 
02990     /* get the carry */
02991     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02992 
02993     /* left hand side of A[ix] * A[iy] */
02994     tmpx        = a->dp[ix];
02995 
02996     /* alias for where to store the results */
02997     tmpt        = t.dp + (2*ix + 1);
02998     
02999     for (iy = ix + 1; iy < pa; iy++) {
03000       /* first calculate the product */
03001       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
03002 
03003       /* now calculate the double precision result, note we use
03004        * addition instead of *2 since it's easier to optimize
03005        */
03006       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
03007 
03008       /* store lower part */
03009       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03010 
03011       /* get carry */
03012       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03013     }
03014     /* propagate upwards */
03015     while (u != ((mp_digit) 0)) {
03016       r       = ((mp_word) *tmpt) + ((mp_word) u);
03017       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03018       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03019     }
03020   }
03021 
03022   mp_clamp (&t);
03023   mp_exch (&t, b);
03024   mp_clear (&t);
03025   return MP_OKAY;
03026 }
03027 
03028 
03029 /* multiplies |a| * |b| and only computes upto digs digits of result
03030  * HAC pp. 595, Algorithm 14.12  Modified so you can control how 
03031  * many digits of output are created.
03032  */
03033 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03034 {
03035   mp_int  t;
03036   int     res, pa, pb, ix, iy;
03037   mp_digit u;
03038   mp_word r;
03039   mp_digit tmpx, *tmpt, *tmpy;
03040 
03041   /* can we use the fast multiplier? */
03042   if (((digs) < MP_WARRAY) &&
03043       MIN (a->used, b->used) < 
03044           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03045     return fast_s_mp_mul_digs (a, b, c, digs);
03046   }
03047 
03048   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
03049     return res;
03050   }
03051   t.used = digs;
03052 
03053   /* compute the digits of the product directly */
03054   pa = a->used;
03055   for (ix = 0; ix < pa; ix++) {
03056     /* set the carry to zero */
03057     u = 0;
03058 
03059     /* limit ourselves to making digs digits of output */
03060     pb = MIN (b->used, digs - ix);
03061 
03062     /* setup some aliases */
03063     /* copy of the digit from a used within the nested loop */
03064     tmpx = a->dp[ix];
03065     
03066     /* an alias for the destination shifted ix places */
03067     tmpt = t.dp + ix;
03068     
03069     /* an alias for the digits of b */
03070     tmpy = b->dp;
03071 
03072     /* compute the columns of the output and propagate the carry */
03073     for (iy = 0; iy < pb; iy++) {
03074       /* compute the column as a mp_word */
03075       r       = ((mp_word)*tmpt) +
03076                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03077                 ((mp_word) u);
03078 
03079       /* the new column is the lower part of the result */
03080       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03081 
03082       /* get the carry word from the result */
03083       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03084     }
03085     /* set carry if it is placed below digs */
03086     if (ix + iy < digs) {
03087       *tmpt = u;
03088     }
03089   }
03090 
03091   mp_clamp (&t);
03092   mp_exch (&t, c);
03093 
03094   mp_clear (&t);
03095   return MP_OKAY;
03096 }
03097 
03098 
03099 /*
03100  * shifts with subtractions when the result is greater than b.
03101  *
03102  * The method is slightly modified to shift B unconditionally upto just under
03103  * the leading bit of b.  This saves alot of multiple precision shifting.
03104  */
03105 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
03106 {
03107   int     x, bits, res;
03108 
03109   /* how many bits of last digit does b use */
03110   bits = mp_count_bits (b) % DIGIT_BIT;
03111 
03112   if (b->used > 1) {
03113      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
03114         return res;
03115      }
03116   } else {
03117      mp_set(a, 1);
03118      bits = 1;
03119   }
03120 
03121 
03122   /* now compute C = A * B mod b */
03123   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
03124     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
03125       return res;
03126     }
03127     if (mp_cmp_mag (a, b) != MP_LT) {
03128       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
03129         return res;
03130       }
03131     }
03132   }
03133 
03134   return MP_OKAY;
03135 }
03136 
03137 
03138 #ifdef MP_LOW_MEM
03139    #define TAB_SIZE 32
03140 #else
03141    #define TAB_SIZE 256
03142 #endif
03143 
03144 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
03145 {
03146   mp_int  M[TAB_SIZE], res, mu;
03147   mp_digit buf;
03148   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
03149   int (*redux)(mp_int*,mp_int*,mp_int*);
03150 
03151   /* find window size */
03152   x = mp_count_bits (X);
03153   if (x <= 7) {
03154     winsize = 2;
03155   } else if (x <= 36) {
03156     winsize = 3;
03157   } else if (x <= 140) {
03158     winsize = 4;
03159   } else if (x <= 450) {
03160     winsize = 5;
03161   } else if (x <= 1303) {
03162     winsize = 6;
03163   } else if (x <= 3529) {
03164     winsize = 7;
03165   } else {
03166     winsize = 8;
03167   }
03168 
03169 #ifdef MP_LOW_MEM
03170     if (winsize > 5) {
03171        winsize = 5;
03172     }
03173 #endif
03174 
03175   /* init M array */
03176   /* init first cell */
03177   if ((err = mp_init(&M[1])) != MP_OKAY) {
03178      return err; 
03179   }
03180 
03181   /* now init the second half of the array */
03182   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03183     if ((err = mp_init(&M[x])) != MP_OKAY) {
03184       for (y = 1<<(winsize-1); y < x; y++) {
03185         mp_clear (&M[y]);
03186       }
03187       mp_clear(&M[1]);
03188       return err;
03189     }
03190   }
03191 
03192   /* create mu, used for Barrett reduction */
03193   if ((err = mp_init (&mu)) != MP_OKAY) {
03194     goto LBL_M;
03195   }
03196   
03197   if (redmode == 0) {
03198      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
03199         goto LBL_MU;
03200      }
03201      redux = mp_reduce;
03202   } else {
03203      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
03204         goto LBL_MU;
03205      }
03206      redux = mp_reduce_2k_l;
03207   }    
03208 
03209   /* create M table
03210    *
03211    * The M table contains powers of the base, 
03212    * e.g. M[x] = G**x mod P
03213    *
03214    * The first half of the table is not 
03215    * computed though accept for M[0] and M[1]
03216    */
03217   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
03218     goto LBL_MU;
03219   }
03220 
03221   /* compute the value at M[1<<(winsize-1)] by squaring 
03222    * M[1] (winsize-1) times 
03223    */
03224   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
03225     goto LBL_MU;
03226   }
03227 
03228   for (x = 0; x < (winsize - 1); x++) {
03229     /* square it */
03230     if ((err = mp_sqr (&M[1 << (winsize - 1)], 
03231                        &M[1 << (winsize - 1)])) != MP_OKAY) {
03232       goto LBL_MU;
03233     }
03234 
03235     /* reduce modulo P */
03236     if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
03237       goto LBL_MU;
03238     }
03239   }
03240 
03241   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
03242    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
03243    */
03244   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
03245     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
03246       goto LBL_MU;
03247     }
03248     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
03249       goto LBL_MU;
03250     }
03251   }
03252 
03253   /* setup result */
03254   if ((err = mp_init (&res)) != MP_OKAY) {
03255     goto LBL_MU;
03256   }
03257   mp_set (&res, 1);
03258 
03259   /* set initial mode and bit cnt */
03260   mode   = 0;
03261   bitcnt = 1;
03262   buf    = 0;
03263   digidx = X->used - 1;
03264   bitcpy = 0;
03265   bitbuf = 0;
03266 
03267   for (;;) {
03268     /* grab next digit as required */
03269     if (--bitcnt == 0) {
03270       /* if digidx == -1 we are out of digits */
03271       if (digidx == -1) {
03272         break;
03273       }
03274       /* read next digit and reset the bitcnt */
03275       buf    = X->dp[digidx--];
03276       bitcnt = (int) DIGIT_BIT;
03277     }
03278 
03279     /* grab the next msb from the exponent */
03280     y     = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
03281     buf <<= (mp_digit)1;
03282 
03283     /* if the bit is zero and mode == 0 then we ignore it
03284      * These represent the leading zero bits before the first 1 bit
03285      * in the exponent.  Technically this opt is not required but it
03286      * does lower the # of trivial squaring/reductions used
03287      */
03288     if (mode == 0 && y == 0) {
03289       continue;
03290     }
03291 
03292     /* if the bit is zero and mode == 1 then we square */
03293     if (mode == 1 && y == 0) {
03294       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03295         goto LBL_RES;
03296       }
03297       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03298         goto LBL_RES;
03299       }
03300       continue;
03301     }
03302 
03303     /* else we add it to the window */
03304     bitbuf |= (y << (winsize - ++bitcpy));
03305     mode    = 2;
03306 
03307     if (bitcpy == winsize) {
03308       /* ok window is filled so square as required and multiply  */
03309       /* square first */
03310       for (x = 0; x < winsize; x++) {
03311         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03312           goto LBL_RES;
03313         }
03314         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03315           goto LBL_RES;
03316         }
03317       }
03318 
03319       /* then multiply */
03320       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
03321         goto LBL_RES;
03322       }
03323       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03324         goto LBL_RES;
03325       }
03326 
03327       /* empty window and reset */
03328       bitcpy = 0;
03329       bitbuf = 0;
03330       mode   = 1;
03331     }
03332   }
03333 
03334   /* if bits remain then square/multiply */
03335   if (mode == 2 && bitcpy > 0) {
03336     /* square then multiply if the bit is set */
03337     for (x = 0; x < bitcpy; x++) {
03338       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03339         goto LBL_RES;
03340       }
03341       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03342         goto LBL_RES;
03343       }
03344 
03345       bitbuf <<= 1;
03346       if ((bitbuf & (1 << winsize)) != 0) {
03347         /* then multiply */
03348         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
03349           goto LBL_RES;
03350         }
03351         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03352           goto LBL_RES;
03353         }
03354       }
03355     }
03356   }
03357 
03358   mp_exch (&res, Y);
03359   err = MP_OKAY;
03360 LBL_RES:mp_clear (&res);
03361 LBL_MU:mp_clear (&mu);
03362 LBL_M:
03363   mp_clear(&M[1]);
03364   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03365     mp_clear (&M[x]);
03366   }
03367   return err;
03368 }
03369 
03370 
03371 /* pre-calculate the value required for Barrett reduction
03372  * For a given modulus "b" it calulates the value required in "a"
03373  */
03374 int mp_reduce_setup (mp_int * a, mp_int * b)
03375 {
03376   int     res;
03377   
03378   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
03379     return res;
03380   }
03381   return mp_div (a, b, a, NULL);
03382 }
03383 
03384 
03385 /* reduces x mod m, assumes 0 < x < m**2, mu is 
03386  * precomputed via mp_reduce_setup.
03387  * From HAC pp.604 Algorithm 14.42
03388  */
03389 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
03390 {
03391   mp_int  q;
03392   int     res, um = m->used;
03393 
03394   /* q = x */
03395   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
03396     return res;
03397   }
03398 
03399   /* q1 = x / b**(k-1)  */
03400   mp_rshd (&q, um - 1);         
03401 
03402   /* according to HAC this optimization is ok */
03403   if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
03404     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
03405       goto CLEANUP;
03406     }
03407   } else {
03408 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
03409     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03410       goto CLEANUP;
03411     }
03412 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
03413     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03414       goto CLEANUP;
03415     }
03416 #else 
03417     { 
03418       res = MP_VAL;
03419       goto CLEANUP;
03420     }
03421 #endif
03422   }
03423 
03424   /* q3 = q2 / b**(k+1) */
03425   mp_rshd (&q, um + 1);         
03426 
03427   /* x = x mod b**(k+1), quick (no division) */
03428   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
03429     goto CLEANUP;
03430   }
03431 
03432   /* q = q * m mod b**(k+1), quick (no division) */
03433   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
03434     goto CLEANUP;
03435   }
03436 
03437   /* x = x - q */
03438   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
03439     goto CLEANUP;
03440   }
03441 
03442   /* If x < 0, add b**(k+1) to it */
03443   if (mp_cmp_d (x, 0) == MP_LT) {
03444     mp_set (&q, 1);
03445     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
03446       goto CLEANUP;
03447     if ((res = mp_add (x, &q, x)) != MP_OKAY)
03448       goto CLEANUP;
03449   }
03450 
03451   /* Back off if it's too big */
03452   while (mp_cmp (x, m) != MP_LT) {
03453     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
03454       goto CLEANUP;
03455     }
03456   }
03457   
03458 CLEANUP:
03459   mp_clear (&q);
03460 
03461   return res;
03462 }
03463 
03464 
03465 /* reduces a modulo n where n is of the form 2**p - d 
03466    This differs from reduce_2k since "d" can be larger
03467    than a single digit.
03468 */
03469 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
03470 {
03471    mp_int q;
03472    int    p, res;
03473    
03474    if ((res = mp_init(&q)) != MP_OKAY) {
03475       return res;
03476    }
03477    
03478    p = mp_count_bits(n);    
03479 top:
03480    /* q = a/2**p, a = a mod 2**p */
03481    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
03482       goto ERR;
03483    }
03484    
03485    /* q = q * d */
03486    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 
03487       goto ERR;
03488    }
03489    
03490    /* a = a + q */
03491    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
03492       goto ERR;
03493    }
03494    
03495    if (mp_cmp_mag(a, n) != MP_LT) {
03496       s_mp_sub(a, n, a);
03497       goto top;
03498    }
03499    
03500 ERR:
03501    mp_clear(&q);
03502    return res;
03503 }
03504 
03505 
03506 /* determines the setup value */
03507 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
03508 {
03509    int    res;
03510    mp_int tmp;
03511    
03512    if ((res = mp_init(&tmp)) != MP_OKAY) {
03513       return res;
03514    }
03515    
03516    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
03517       goto ERR;
03518    }
03519    
03520    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
03521       goto ERR;
03522    }
03523    
03524 ERR:
03525    mp_clear(&tmp);
03526    return res;
03527 }
03528 
03529 
03530 /* multiplies |a| * |b| and does not compute the lower digs digits
03531  * [meant to get the higher part of the product]
03532  */
03533 int
03534 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03535 {
03536   mp_int  t;
03537   int     res, pa, pb, ix, iy;
03538   mp_digit u;
03539   mp_word r;
03540   mp_digit tmpx, *tmpt, *tmpy;
03541 
03542   /* can we use the fast multiplier? */
03543 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
03544   if (((a->used + b->used + 1) < MP_WARRAY)
03545       && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03546     return fast_s_mp_mul_high_digs (a, b, c, digs);
03547   }
03548 #endif
03549 
03550   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
03551     return res;
03552   }
03553   t.used = a->used + b->used + 1;
03554 
03555   pa = a->used;
03556   pb = b->used;
03557   for (ix = 0; ix < pa; ix++) {
03558     /* clear the carry */
03559     u = 0;
03560 
03561     /* left hand side of A[ix] * B[iy] */
03562     tmpx = a->dp[ix];
03563 
03564     /* alias to the address of where the digits will be stored */
03565     tmpt = &(t.dp[digs]);
03566 
03567     /* alias for where to read the right hand side from */
03568     tmpy = b->dp + (digs - ix);
03569 
03570     for (iy = digs - ix; iy < pb; iy++) {
03571       /* calculate the double precision result */
03572       r       = ((mp_word)*tmpt) +
03573                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03574                 ((mp_word) u);
03575 
03576       /* get the lower part */
03577       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03578 
03579       /* carry the carry */
03580       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03581     }
03582     *tmpt = u;
03583   }
03584   mp_clamp (&t);
03585   mp_exch (&t, c);
03586   mp_clear (&t);
03587   return MP_OKAY;
03588 }
03589 
03590 
03591 /* this is a modified version of fast_s_mul_digs that only produces
03592  * output digits *above* digs.  See the comments for fast_s_mul_digs
03593  * to see how it works.
03594  *
03595  * This is used in the Barrett reduction since for one of the multiplications
03596  * only the higher digits were needed.  This essentially halves the work.
03597  *
03598  * Based on Algorithm 14.12 on pp.595 of HAC.
03599  */
03600 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03601 {
03602   int     olduse, res, pa, ix, iz;
03603 #ifdef CYASSL_SMALL_STACK
03604   mp_digit* W;    /* uses dynamic memory and slower */
03605 #else
03606   mp_digit W[MP_WARRAY];
03607 #endif
03608   mp_word  _W;
03609 
03610   /* grow the destination as required */
03611   pa = a->used + b->used;
03612   if (c->alloc < pa) {
03613     if ((res = mp_grow (c, pa)) != MP_OKAY) {
03614       return res;
03615     }
03616   }
03617 
03618   if (pa > MP_WARRAY)
03619     return MP_RANGE;  /* TAO range check */
03620   
03621 #ifdef CYASSL_SMALL_STACK
03622   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
03623   if (W == NULL)    
03624     return MP_MEM;
03625 #endif
03626 
03627   /* number of output digits to produce */
03628   pa = a->used + b->used;
03629   _W = 0;
03630   for (ix = digs; ix < pa; ix++) { 
03631       int      tx, ty, iy;
03632       mp_digit *tmpx, *tmpy;
03633 
03634       /* get offsets into the two bignums */
03635       ty = MIN(b->used-1, ix);
03636       tx = ix - ty;
03637 
03638       /* setup temp aliases */
03639       tmpx = a->dp + tx;
03640       tmpy = b->dp + ty;
03641 
03642       /* this is the number of times the loop will iterrate, essentially its 
03643          while (tx++ < a->used && ty-- >= 0) { ... }
03644        */
03645       iy = MIN(a->used-tx, ty+1);
03646 
03647       /* execute loop */
03648       for (iz = 0; iz < iy; iz++) {
03649          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03650       }
03651 
03652       /* store term */
03653       W[ix] = ((mp_digit)_W) & MP_MASK;
03654 
03655       /* make next carry */
03656       _W = _W >> ((mp_word)DIGIT_BIT);
03657   }
03658   
03659   /* setup dest */
03660   olduse  = c->used;
03661   c->used = pa;
03662 
03663   {
03664     register mp_digit *tmpc;
03665 
03666     tmpc = c->dp + digs;
03667     for (ix = digs; ix <= pa; ix++) {
03668       /* now extract the previous digit [below the carry] */
03669       *tmpc++ = W[ix];
03670     }
03671 
03672     /* clear unused digits [that existed in the old copy of c] */
03673     for (; ix < olduse; ix++) {
03674       *tmpc++ = 0;
03675     }
03676   }
03677   mp_clamp (c);
03678 
03679 #ifdef CYASSL_SMALL_STACK
03680   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
03681 #endif
03682 
03683   return MP_OKAY;
03684 }
03685 
03686 
03687 /* set a 32-bit const */
03688 int mp_set_int (mp_int * a, unsigned long b)
03689 {
03690   int     x, res;
03691 
03692   mp_zero (a);
03693   
03694   /* set four bits at a time */
03695   for (x = 0; x < 8; x++) {
03696     /* shift the number up four bits */
03697     if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
03698       return res;
03699     }
03700 
03701     /* OR in the top four bits of the source */
03702     a->dp[0] |= (b >> 28) & 15;
03703 
03704     /* shift the source up to the next four bits */
03705     b <<= 4;
03706 
03707     /* ensure that digits are not clamped off */
03708     a->used += 1;
03709   }
03710   mp_clamp (a);
03711   return MP_OKAY;
03712 }
03713 
03714 
03715 #if defined(CYASSL_KEY_GEN) || defined(HAVE_ECC)
03716 
03717 /* c = a * a (mod b) */
03718 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
03719 {
03720   int     res;
03721   mp_int  t;
03722 
03723   if ((res = mp_init (&t)) != MP_OKAY) {
03724     return res;
03725   }
03726 
03727   if ((res = mp_sqr (a, &t)) != MP_OKAY) {
03728     mp_clear (&t);
03729     return res;
03730   }
03731   res = mp_mod (&t, b, c);
03732   mp_clear (&t);
03733   return res;
03734 }
03735 
03736 #endif
03737 
03738 
03739 #if defined(HAVE_ECC) || !defined(NO_PWDBASED)
03740 
03741 /* single digit addition */
03742 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
03743 {
03744   int     res, ix, oldused;
03745   mp_digit *tmpa, *tmpc, mu;
03746 
03747   /* grow c as required */
03748   if (c->alloc < a->used + 1) {
03749      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03750         return res;
03751      }
03752   }
03753 
03754   /* if a is negative and |a| >= b, call c = |a| - b */
03755   if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
03756      /* temporarily fix sign of a */
03757      a->sign = MP_ZPOS;
03758 
03759      /* c = |a| - b */
03760      res = mp_sub_d(a, b, c);
03761 
03762      /* fix sign  */
03763      a->sign = c->sign = MP_NEG;
03764 
03765      /* clamp */
03766      mp_clamp(c);
03767 
03768      return res;
03769   }
03770 
03771   /* old number of used digits in c */
03772   oldused = c->used;
03773 
03774   /* sign always positive */
03775   c->sign = MP_ZPOS;
03776 
03777   /* source alias */
03778   tmpa    = a->dp;
03779 
03780   /* destination alias */
03781   tmpc    = c->dp;
03782 
03783   /* if a is positive */
03784   if (a->sign == MP_ZPOS) {
03785      /* add digit, after this we're propagating
03786       * the carry.
03787       */
03788      *tmpc   = *tmpa++ + b;
03789      mu      = *tmpc >> DIGIT_BIT;
03790      *tmpc++ &= MP_MASK;
03791 
03792      /* now handle rest of the digits */
03793      for (ix = 1; ix < a->used; ix++) {
03794         *tmpc   = *tmpa++ + mu;
03795         mu      = *tmpc >> DIGIT_BIT;
03796         *tmpc++ &= MP_MASK;
03797      }
03798      /* set final carry */
03799      if (mu != 0 && ix < c->alloc) {
03800         ix++;
03801         *tmpc++  = mu;
03802      }
03803 
03804      /* setup size */
03805      c->used = a->used + 1;
03806   } else {
03807      /* a was negative and |a| < b */
03808      c->used  = 1;
03809 
03810      /* the result is a single digit */
03811      if (a->used == 1) {
03812         *tmpc++  =  b - a->dp[0];
03813      } else {
03814         *tmpc++  =  b;
03815      }
03816 
03817      /* setup count so the clearing of oldused
03818       * can fall through correctly
03819       */
03820      ix       = 1;
03821   }
03822 
03823   /* now zero to oldused */
03824   while (ix++ < oldused) {
03825      *tmpc++ = 0;
03826   }
03827   mp_clamp(c);
03828 
03829   return MP_OKAY;
03830 }
03831 
03832 
03833 /* single digit subtraction */
03834 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
03835 {
03836   mp_digit *tmpa, *tmpc, mu;
03837   int       res, ix, oldused;
03838 
03839   /* grow c as required */
03840   if (c->alloc < a->used + 1) {
03841      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03842         return res;
03843      }
03844   }
03845 
03846   /* if a is negative just do an unsigned
03847    * addition [with fudged signs]
03848    */
03849   if (a->sign == MP_NEG) {
03850      a->sign = MP_ZPOS;
03851      res     = mp_add_d(a, b, c);
03852      a->sign = c->sign = MP_NEG;
03853 
03854      /* clamp */
03855      mp_clamp(c);
03856 
03857      return res;
03858   }
03859 
03860   /* setup regs */
03861   oldused = c->used;
03862   tmpa    = a->dp;
03863   tmpc    = c->dp;
03864 
03865   /* if a <= b simply fix the single digit */
03866   if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
03867      if (a->used == 1) {
03868         *tmpc++ = b - *tmpa;
03869      } else {
03870         *tmpc++ = b;
03871      }
03872      ix      = 1;
03873 
03874      /* negative/1digit */
03875      c->sign = MP_NEG;
03876      c->used = 1;
03877   } else {
03878      /* positive/size */
03879      c->sign = MP_ZPOS;
03880      c->used = a->used;
03881 
03882      /* subtract first digit */
03883      *tmpc    = *tmpa++ - b;
03884      mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03885      *tmpc++ &= MP_MASK;
03886 
03887      /* handle rest of the digits */
03888      for (ix = 1; ix < a->used; ix++) {
03889         *tmpc    = *tmpa++ - mu;
03890         mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03891         *tmpc++ &= MP_MASK;
03892      }
03893   }
03894 
03895   /* zero excess digits */
03896   while (ix++ < oldused) {
03897      *tmpc++ = 0;
03898   }
03899   mp_clamp(c);
03900   return MP_OKAY;
03901 }
03902 
03903 #endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
03904 
03905 
03906 #ifdef CYASSL_KEY_GEN
03907 
03908 int mp_cnt_lsb(mp_int *a);
03909 
03910 static int s_is_power_of_two(mp_digit b, int *p)
03911 {
03912    int x;
03913 
03914    /* fast return if no power of two */
03915    if ((b==0) || (b & (b-1))) {
03916       return 0;
03917    }
03918 
03919    for (x = 0; x < DIGIT_BIT; x++) {
03920       if (b == (((mp_digit)1)<<x)) {
03921          *p = x;
03922          return 1;
03923       }
03924    }
03925    return 0;
03926 }
03927 
03928 /* single digit division (based on routine from MPI) */
03929 static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
03930 {
03931   mp_int  q;
03932   mp_word w;
03933   mp_digit t;
03934   int     res, ix;
03935 
03936   /* cannot divide by zero */
03937   if (b == 0) {
03938      return MP_VAL;
03939   }
03940 
03941   /* quick outs */
03942   if (b == 1 || mp_iszero(a) == 1) {
03943      if (d != NULL) {
03944         *d = 0;
03945      }
03946      if (c != NULL) {
03947         return mp_copy(a, c);
03948      }
03949      return MP_OKAY;
03950   }
03951 
03952   /* power of two ? */
03953   if (s_is_power_of_two(b, &ix) == 1) {
03954      if (d != NULL) {
03955         *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
03956      }
03957      if (c != NULL) {
03958         return mp_div_2d(a, ix, c, NULL);
03959      }
03960      return MP_OKAY;
03961   }
03962 
03963 #ifdef BN_MP_DIV_3_C
03964   /* three? */
03965   if (b == 3) {
03966      return mp_div_3(a, c, d);
03967   }
03968 #endif
03969 
03970   /* no easy answer [c'est la vie].  Just division */
03971   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
03972      return res;
03973   }
03974   
03975   q.used = a->used;
03976   q.sign = a->sign;
03977   w = 0;
03978   for (ix = a->used - 1; ix >= 0; ix--) {
03979      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
03980      
03981      if (w >= b) {
03982         t = (mp_digit)(w / b);
03983         w -= ((mp_word)t) * ((mp_word)b);
03984       } else {
03985         t = 0;
03986       }
03987       q.dp[ix] = (mp_digit)t;
03988   }
03989   
03990   if (d != NULL) {
03991      *d = (mp_digit)w;
03992   }
03993   
03994   if (c != NULL) {
03995      mp_clamp(&q);
03996      mp_exch(&q, c);
03997   }
03998   mp_clear(&q);
03999   
04000   return res;
04001 }
04002 
04003 
04004 static int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
04005 {
04006   return mp_div_d(a, b, NULL, c);
04007 }
04008 
04009 
04010 const mp_digit ltm_prime_tab[] = {
04011   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
04012   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
04013   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
04014   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
04015 #ifndef MP_8BIT
04016   0x0083,
04017   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
04018   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
04019   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
04020   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
04021 
04022   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
04023   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
04024   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
04025   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
04026   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
04027   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
04028   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
04029   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
04030 
04031   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
04032   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
04033   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
04034   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
04035   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
04036   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
04037   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
04038   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
04039 
04040   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
04041   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
04042   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
04043   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
04044   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
04045   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
04046   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
04047   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
04048 #endif
04049 };
04050 
04051 
04052 /* Miller-Rabin test of "a" to the base of "b" as described in 
04053  * HAC pp. 139 Algorithm 4.24
04054  *
04055  * Sets result to 0 if definitely composite or 1 if probably prime.
04056  * Randomly the chance of error is no more than 1/4 and often 
04057  * very much lower.
04058  */
04059 static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
04060 {
04061   mp_int  n1, y, r;
04062   int     s, j, err;
04063 
04064   /* default */
04065   *result = MP_NO;
04066 
04067   /* ensure b > 1 */
04068   if (mp_cmp_d(b, 1) != MP_GT) {
04069      return MP_VAL;
04070   }     
04071 
04072   /* get n1 = a - 1 */
04073   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
04074     return err;
04075   }
04076   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
04077     goto LBL_N1;
04078   }
04079 
04080   /* set 2**s * r = n1 */
04081   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
04082     goto LBL_N1;
04083   }
04084 
04085   /* count the number of least significant bits
04086    * which are zero
04087    */
04088   s = mp_cnt_lsb(&r);
04089 
04090   /* now divide n - 1 by 2**s */
04091   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
04092     goto LBL_R;
04093   }
04094 
04095   /* compute y = b**r mod a */
04096   if ((err = mp_init (&y)) != MP_OKAY) {
04097     goto LBL_R;
04098   }
04099   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
04100     goto LBL_Y;
04101   }
04102 
04103   /* if y != 1 and y != n1 do */
04104   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
04105     j = 1;
04106     /* while j <= s-1 and y != n1 */
04107     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
04108       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
04109          goto LBL_Y;
04110       }
04111 
04112       /* if y == 1 then composite */
04113       if (mp_cmp_d (&y, 1) == MP_EQ) {
04114          goto LBL_Y;
04115       }
04116 
04117       ++j;
04118     }
04119 
04120     /* if y != n1 then composite */
04121     if (mp_cmp (&y, &n1) != MP_EQ) {
04122       goto LBL_Y;
04123     }
04124   }
04125 
04126   /* probably prime now */
04127   *result = MP_YES;
04128 LBL_Y:mp_clear (&y);
04129 LBL_R:mp_clear (&r);
04130 LBL_N1:mp_clear (&n1);
04131   return err;
04132 }
04133 
04134 
04135 /* determines if an integers is divisible by one 
04136  * of the first PRIME_SIZE primes or not
04137  *
04138  * sets result to 0 if not, 1 if yes
04139  */
04140 static int mp_prime_is_divisible (mp_int * a, int *result)
04141 {
04142   int     err, ix;
04143   mp_digit res;
04144 
04145   /* default to not */
04146   *result = MP_NO;
04147 
04148   for (ix = 0; ix < PRIME_SIZE; ix++) {
04149     /* what is a mod LBL_prime_tab[ix] */
04150     if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
04151       return err;
04152     }
04153 
04154     /* is the residue zero? */
04155     if (res == 0) {
04156       *result = MP_YES;
04157       return MP_OKAY;
04158     }
04159   }
04160 
04161   return MP_OKAY;
04162 }
04163 
04164 
04165 /*
04166  * Sets result to 1 if probably prime, 0 otherwise
04167  */
04168 int mp_prime_is_prime (mp_int * a, int t, int *result)
04169 {
04170   mp_int  b;
04171   int     ix, err, res;
04172 
04173   /* default to no */
04174   *result = MP_NO;
04175 
04176   /* valid value of t? */
04177   if (t <= 0 || t > PRIME_SIZE) {
04178     return MP_VAL;
04179   }
04180 
04181   /* is the input equal to one of the primes in the table? */
04182   for (ix = 0; ix < PRIME_SIZE; ix++) {
04183       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
04184          *result = 1;
04185          return MP_OKAY;
04186       }
04187   }
04188 
04189   /* first perform trial division */
04190   if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
04191     return err;
04192   }
04193 
04194   /* return if it was trivially divisible */
04195   if (res == MP_YES) {
04196     return MP_OKAY;
04197   }
04198 
04199   /* now perform the miller-rabin rounds */
04200   if ((err = mp_init (&b)) != MP_OKAY) {
04201     return err;
04202   }
04203 
04204   for (ix = 0; ix < t; ix++) {
04205     /* set the prime */
04206     mp_set (&b, ltm_prime_tab[ix]);
04207 
04208     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
04209       goto LBL_B;
04210     }
04211 
04212     if (res == MP_NO) {
04213       goto LBL_B;
04214     }
04215   }
04216 
04217   /* passed the test */
04218   *result = MP_YES;
04219 LBL_B:mp_clear (&b);
04220   return err;
04221 }
04222 
04223 
04224 /* computes least common multiple as |a*b|/(a, b) */
04225 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
04226 {
04227   int     res;
04228   mp_int  t1, t2;
04229 
04230 
04231   if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
04232     return res;
04233   }
04234 
04235   /* t1 = get the GCD of the two inputs */
04236   if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
04237     goto LBL_T;
04238   }
04239 
04240   /* divide the smallest by the GCD */
04241   if (mp_cmp_mag(a, b) == MP_LT) {
04242      /* store quotient in t2 such that t2 * b is the LCM */
04243      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
04244         goto LBL_T;
04245      }
04246      res = mp_mul(b, &t2, c);
04247   } else {
04248      /* store quotient in t2 such that t2 * a is the LCM */
04249      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
04250         goto LBL_T;
04251      }
04252      res = mp_mul(a, &t2, c);
04253   }
04254 
04255   /* fix the sign to positive */
04256   c->sign = MP_ZPOS;
04257 
04258 LBL_T:
04259   mp_clear(&t1);
04260   mp_clear(&t2);
04261   return res;
04262 }
04263 
04264 
04265 static const int lnz[16] = { 
04266    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
04267 };
04268 
04269 /* Counts the number of lsbs which are zero before the first zero bit */
04270 int mp_cnt_lsb(mp_int *a)
04271 {
04272     int x;
04273     mp_digit q, qq;
04274 
04275     /* easy out */
04276     if (mp_iszero(a) == 1) {
04277         return 0;
04278     }
04279 
04280     /* scan lower digits until non-zero */
04281     for (x = 0; x < a->used && a->dp[x] == 0; x++);
04282     q = a->dp[x];
04283     x *= DIGIT_BIT;
04284 
04285     /* now scan this digit until a 1 is found */
04286     if ((q & 1) == 0) {
04287         do {
04288             qq  = q & 15;
04289             x  += lnz[qq];
04290             q >>= 4;
04291         } while (qq == 0);
04292     }
04293     return x;
04294 }
04295 
04296 
04297 /* Greatest Common Divisor using the binary method */
04298 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
04299 {
04300     mp_int  u, v;
04301     int     k, u_lsb, v_lsb, res;
04302 
04303     /* either zero than gcd is the largest */
04304     if (mp_iszero (a) == MP_YES) {
04305         return mp_abs (b, c);
04306     }
04307     if (mp_iszero (b) == MP_YES) {
04308         return mp_abs (a, c);
04309     }
04310 
04311     /* get copies of a and b we can modify */
04312     if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
04313         return res;
04314     }
04315 
04316     if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
04317         goto LBL_U;
04318     }
04319 
04320     /* must be positive for the remainder of the algorithm */
04321     u.sign = v.sign = MP_ZPOS;
04322 
04323     /* B1.  Find the common power of two for u and v */
04324     u_lsb = mp_cnt_lsb(&u);
04325     v_lsb = mp_cnt_lsb(&v);
04326     k     = MIN(u_lsb, v_lsb);
04327 
04328     if (k > 0) {
04329         /* divide the power of two out */
04330         if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
04331             goto LBL_V;
04332         }
04333 
04334         if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
04335             goto LBL_V;
04336         }
04337     }
04338 
04339     /* divide any remaining factors of two out */
04340     if (u_lsb != k) {
04341         if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
04342             goto LBL_V;
04343         }
04344     }
04345 
04346     if (v_lsb != k) {
04347         if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
04348             goto LBL_V;
04349         }
04350     }
04351 
04352     while (mp_iszero(&v) == 0) {
04353         /* make sure v is the largest */
04354         if (mp_cmp_mag(&u, &v) == MP_GT) {
04355             /* swap u and v to make sure v is >= u */
04356             mp_exch(&u, &v);
04357         }
04358      
04359         /* subtract smallest from largest */
04360         if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
04361             goto LBL_V;
04362         }
04363      
04364         /* Divide out all factors of two */
04365         if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
04366             goto LBL_V;
04367         } 
04368     } 
04369 
04370     /* multiply by 2**k which we divided out at the beginning */
04371     if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
04372         goto LBL_V;
04373     }
04374     c->sign = MP_ZPOS;
04375     res = MP_OKAY;
04376 LBL_V:mp_clear (&u);
04377 LBL_U:mp_clear (&v);
04378     return res;
04379 }
04380 
04381 
04382 
04383 #endif /* CYASSL_KEY_GEN */
04384 
04385 
04386 #ifdef HAVE_ECC
04387 
04388 /* chars used in radix conversions */
04389 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
04390 
04391 /* read a string [ASCII] in a given radix */
04392 int mp_read_radix (mp_int * a, const char *str, int radix)
04393 {
04394   int     y, res, neg;
04395   char    ch;
04396 
04397   /* zero the digit bignum */
04398   mp_zero(a);
04399 
04400   /* make sure the radix is ok */
04401   if (radix < 2 || radix > 64) {
04402     return MP_VAL;
04403   }
04404 
04405   /* if the leading digit is a 
04406    * minus set the sign to negative. 
04407    */
04408   if (*str == '-') {
04409     ++str;
04410     neg = MP_NEG;
04411   } else {
04412     neg = MP_ZPOS;
04413   }
04414 
04415   /* set the integer to the default of zero */
04416   mp_zero (a);
04417   
04418   /* process each digit of the string */
04419   while (*str) {
04420     /* if the radix < 36 the conversion is case insensitive
04421      * this allows numbers like 1AB and 1ab to represent the same  value
04422      * [e.g. in hex]
04423      */
04424     ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
04425     for (y = 0; y < 64; y++) {
04426       if (ch == mp_s_rmap[y]) {
04427          break;
04428       }
04429     }
04430 
04431     /* if the char was found in the map 
04432      * and is less than the given radix add it
04433      * to the number, otherwise exit the loop. 
04434      */
04435     if (y < radix) {
04436       if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
04437          return res;
04438       }
04439       if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
04440          return res;
04441       }
04442     } else {
04443       break;
04444     }
04445     ++str;
04446   }
04447   
04448   /* set the sign only if a != 0 */
04449   if (mp_iszero(a) != 1) {
04450      a->sign = neg;
04451   }
04452   return MP_OKAY;
04453 }
04454 
04455 #endif /* HAVE_ECC */
04456 
04457 #endif /* USE_FAST_MATH */
04458 
04459 #endif /* NO_BIG_INT */