wolf SSL / CyaSSL-2.9.4

Dependents:  

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers integer.c Source File

integer.c

00001 /* integer.c
00002  *
00003  * Copyright (C) 2006-2013 wolfSSL Inc.
00004  *
00005  * This file is part of CyaSSL.
00006  *
00007  * CyaSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * CyaSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
00020  */
00021 
00022 
00023 /*
00024  * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
00025  * http://math.libtomcrypt.com
00026  */
00027 
00028 
00029 #ifdef HAVE_CONFIG_H
00030     #include <config.h>
00031 #endif
00032 
00033 /* in case user set USE_FAST_MATH there */
00034 #include <cyassl/ctaocrypt/settings.h>
00035 
00036 #ifndef NO_BIG_INT 
00037 
00038 #ifndef USE_FAST_MATH
00039 
00040 #include <cyassl/ctaocrypt/integer.h>
00041 
00042 #ifndef NO_CYASSL_SMALL_STACK
00043     #ifndef CYASSL_SMALL_STACK
00044         #define CYASSL_SMALL_STACK
00045     #endif
00046 #endif
00047 
00048 static void bn_reverse (unsigned char *s, int len);
00049 
00050 /* math settings check */
00051 word32 CheckRunTimeSettings(void)
00052 {
00053     return CTC_SETTINGS;
00054 }
00055 
00056 
00057 /* handle up to 6 inits */
00058 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
00059                   mp_int* f)
00060 {
00061     int res = MP_OKAY;
00062 
00063     if (a && ((res = mp_init(a)) != MP_OKAY))
00064         return res;
00065 
00066     if (b && ((res = mp_init(b)) != MP_OKAY)) {
00067         mp_clear(a);
00068         return res;
00069     }
00070 
00071     if (c && ((res = mp_init(c)) != MP_OKAY)) {
00072         mp_clear(a); mp_clear(b);
00073         return res;
00074     }
00075 
00076     if (d && ((res = mp_init(d)) != MP_OKAY)) {
00077         mp_clear(a); mp_clear(b); mp_clear(c);
00078         return res;
00079     }
00080 
00081     if (e && ((res = mp_init(e)) != MP_OKAY)) {
00082         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
00083         return res;
00084     }
00085 
00086     if (f && ((res = mp_init(f)) != MP_OKAY)) {
00087         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
00088         return res;
00089     }
00090 
00091     return res;
00092 }
00093 
00094 
00095 /* init a new mp_int */
00096 int mp_init (mp_int * a)
00097 {
00098   int i;
00099 
00100   /* allocate memory required and clear it */
00101   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
00102                                       DYNAMIC_TYPE_BIGINT);
00103   if (a->dp == NULL) {
00104     return MP_MEM;
00105   }
00106 
00107   /* set the digits to zero */
00108   for (i = 0; i < MP_PREC; i++) {
00109       a->dp[i] = 0;
00110   }
00111 
00112   /* set the used to zero, allocated digits to the default precision
00113    * and sign to positive */
00114   a->used  = 0;
00115   a->alloc = MP_PREC;
00116   a->sign  = MP_ZPOS;
00117 
00118   return MP_OKAY;
00119 }
00120 
00121 
00122 /* clear one (frees)  */
00123 void
00124 mp_clear (mp_int * a)
00125 {
00126   int i;
00127 
00128   if (a == NULL)
00129       return;
00130 
00131   /* only do anything if a hasn't been freed previously */
00132   if (a->dp != NULL) {
00133     /* first zero the digits */
00134     for (i = 0; i < a->used; i++) {
00135         a->dp[i] = 0;
00136     }
00137 
00138     /* free ram */
00139     XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
00140 
00141     /* reset members to make debugging easier */
00142     a->dp    = NULL;
00143     a->alloc = a->used = 0;
00144     a->sign  = MP_ZPOS;
00145   }
00146 }
00147 
00148 
00149 /* get the size for an unsigned equivalent */
00150 int mp_unsigned_bin_size (mp_int * a)
00151 {
00152   int     size = mp_count_bits (a);
00153   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
00154 }
00155 
00156 
00157 /* returns the number of bits in an int */
00158 int
00159 mp_count_bits (mp_int * a)
00160 {
00161   int     r;
00162   mp_digit q;
00163 
00164   /* shortcut */
00165   if (a->used == 0) {
00166     return 0;
00167   }
00168 
00169   /* get number of digits and add that */
00170   r = (a->used - 1) * DIGIT_BIT;
00171   
00172   /* take the last digit and count the bits in it */
00173   q = a->dp[a->used - 1];
00174   while (q > ((mp_digit) 0)) {
00175     ++r;
00176     q >>= ((mp_digit) 1);
00177   }
00178   return r;
00179 }
00180 
00181 
00182 int mp_leading_bit (mp_int * a)
00183 {
00184     int bit = 0;
00185     mp_int t;
00186 
00187     if (mp_init_copy(&t, a) != MP_OKAY)
00188         return 0;
00189 
00190     while (mp_iszero(&t) == 0) {
00191 #ifndef MP_8BIT
00192         bit = (t.dp[0] & 0x80) != 0;
00193 #else
00194         bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
00195 #endif
00196         if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
00197             break;
00198     }
00199     mp_clear(&t);
00200     return bit;
00201 }
00202 
00203 
00204 /* store in unsigned [big endian] format */
00205 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
00206 {
00207   int     x, res;
00208   mp_int  t;
00209 
00210   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
00211     return res;
00212   }
00213 
00214   x = 0;
00215   while (mp_iszero (&t) == 0) {
00216 #ifndef MP_8BIT
00217       b[x++] = (unsigned char) (t.dp[0] & 255);
00218 #else
00219       b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
00220 #endif
00221     if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
00222       mp_clear (&t);
00223       return res;
00224     }
00225   }
00226   bn_reverse (b, x);
00227   mp_clear (&t);
00228   return MP_OKAY;
00229 }
00230 
00231 
00232 /* creates "a" then copies b into it */
00233 int mp_init_copy (mp_int * a, mp_int * b)
00234 {
00235   int     res;
00236 
00237   if ((res = mp_init (a)) != MP_OKAY) {
00238     return res;
00239   }
00240   return mp_copy (b, a);
00241 }
00242 
00243 
00244 /* copy, b = a */
00245 int
00246 mp_copy (mp_int * a, mp_int * b)
00247 {
00248   int     res, n;
00249 
00250   /* if dst == src do nothing */
00251   if (a == b) {
00252     return MP_OKAY;
00253   }
00254 
00255   /* grow dest */
00256   if (b->alloc < a->used) {
00257      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
00258         return res;
00259      }
00260   }
00261 
00262   /* zero b and copy the parameters over */
00263   {
00264     register mp_digit *tmpa, *tmpb;
00265 
00266     /* pointer aliases */
00267 
00268     /* source */
00269     tmpa = a->dp;
00270 
00271     /* destination */
00272     tmpb = b->dp;
00273 
00274     /* copy all the digits */
00275     for (n = 0; n < a->used; n++) {
00276       *tmpb++ = *tmpa++;
00277     }
00278 
00279     /* clear high digits */
00280     for (; n < b->used; n++) {
00281       *tmpb++ = 0;
00282     }
00283   }
00284 
00285   /* copy used count and sign */
00286   b->used = a->used;
00287   b->sign = a->sign;
00288   return MP_OKAY;
00289 }
00290 
00291 
00292 /* grow as required */
00293 int mp_grow (mp_int * a, int size)
00294 {
00295   int     i;
00296   mp_digit *tmp;
00297 
00298   /* if the alloc size is smaller alloc more ram */
00299   if (a->alloc < size) {
00300     /* ensure there are always at least MP_PREC digits extra on top */
00301     size += (MP_PREC * 2) - (size % MP_PREC);
00302 
00303     /* reallocate the array a->dp
00304      *
00305      * We store the return in a temporary variable
00306      * in case the operation failed we don't want
00307      * to overwrite the dp member of a.
00308      */
00309     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
00310                                        DYNAMIC_TYPE_BIGINT);
00311     if (tmp == NULL) {
00312       /* reallocation failed but "a" is still valid [can be freed] */
00313       return MP_MEM;
00314     }
00315 
00316     /* reallocation succeeded so set a->dp */
00317     a->dp = tmp;
00318 
00319     /* zero excess digits */
00320     i        = a->alloc;
00321     a->alloc = size;
00322     for (; i < a->alloc; i++) {
00323       a->dp[i] = 0;
00324     }
00325   }
00326   return MP_OKAY;
00327 }
00328 
00329 
00330 /* reverse an array, used for radix code */
00331 void
00332 bn_reverse (unsigned char *s, int len)
00333 {
00334   int     ix, iy;
00335   unsigned char t;
00336 
00337   ix = 0;
00338   iy = len - 1;
00339   while (ix < iy) {
00340     t     = s[ix];
00341     s[ix] = s[iy];
00342     s[iy] = t;
00343     ++ix;
00344     --iy;
00345   }
00346 }
00347 
00348 
00349 /* shift right by a certain bit count (store quotient in c, optional
00350    remainder in d) */
00351 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
00352 {
00353   int     D, res;
00354   mp_int  t;
00355 
00356 
00357   /* if the shift count is <= 0 then we do no work */
00358   if (b <= 0) {
00359     res = mp_copy (a, c);
00360     if (d != NULL) {
00361       mp_zero (d);
00362     }
00363     return res;
00364   }
00365 
00366   if ((res = mp_init (&t)) != MP_OKAY) {
00367     return res;
00368   }
00369 
00370   /* get the remainder */
00371   if (d != NULL) {
00372     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
00373       mp_clear (&t);
00374       return res;
00375     }
00376   }
00377 
00378   /* copy */
00379   if ((res = mp_copy (a, c)) != MP_OKAY) {
00380     mp_clear (&t);
00381     return res;
00382   }
00383 
00384   /* shift by as many digits in the bit count */
00385   if (b >= (int)DIGIT_BIT) {
00386     mp_rshd (c, b / DIGIT_BIT);
00387   }
00388 
00389   /* shift any bit count < DIGIT_BIT */
00390   D = (b % DIGIT_BIT);
00391   if (D != 0) {
00392     mp_rshb(c, D);
00393   }
00394   mp_clamp (c);
00395   if (d != NULL) {
00396     mp_exch (&t, d);
00397   }
00398   mp_clear (&t);
00399   return MP_OKAY;
00400 }
00401 
00402 
00403 /* set to zero */
00404 void mp_zero (mp_int * a)
00405 {
00406   int       n;
00407   mp_digit *tmp;
00408 
00409   a->sign = MP_ZPOS;
00410   a->used = 0;
00411 
00412   tmp = a->dp;
00413   for (n = 0; n < a->alloc; n++) {
00414      *tmp++ = 0;
00415   }
00416 }
00417 
00418 
00419 /* trim unused digits 
00420  *
00421  * This is used to ensure that leading zero digits are
00422  * trimed and the leading "used" digit will be non-zero
00423  * Typically very fast.  Also fixes the sign if there
00424  * are no more leading digits
00425  */
00426 void
00427 mp_clamp (mp_int * a)
00428 {
00429   /* decrease used while the most significant digit is
00430    * zero.
00431    */
00432   while (a->used > 0 && a->dp[a->used - 1] == 0) {
00433     --(a->used);
00434   }
00435 
00436   /* reset the sign flag if used == 0 */
00437   if (a->used == 0) {
00438     a->sign = MP_ZPOS;
00439   }
00440 }
00441 
00442 
00443 /* swap the elements of two integers, for cases where you can't simply swap the 
00444  * mp_int pointers around
00445  */
00446 void
00447 mp_exch (mp_int * a, mp_int * b)
00448 {
00449   mp_int  t;
00450 
00451   t  = *a;
00452   *a = *b;
00453   *b = t;
00454 }
00455 
00456 
00457 /* shift right a certain number of bits */
00458 void mp_rshb (mp_int *c, int x)
00459 {
00460     register mp_digit *tmpc, mask, shift;
00461     mp_digit r, rr;
00462     mp_digit D = x;
00463 
00464     /* mask */
00465     mask = (((mp_digit)1) << D) - 1;
00466 
00467     /* shift for lsb */
00468     shift = DIGIT_BIT - D;
00469 
00470     /* alias */
00471     tmpc = c->dp + (c->used - 1);
00472 
00473     /* carry */
00474     r = 0;
00475     for (x = c->used - 1; x >= 0; x--) {
00476       /* get the lower  bits of this word in a temp */
00477       rr = *tmpc & mask;
00478 
00479       /* shift the current word and mix in the carry bits from previous word */
00480       *tmpc = (*tmpc >> D) | (r << shift);
00481       --tmpc;
00482 
00483       /* set the carry to the carry bits of the current word found above */
00484       r = rr;
00485     }
00486 }
00487 
00488 
00489 /* shift right a certain amount of digits */
00490 void mp_rshd (mp_int * a, int b)
00491 {
00492   int     x;
00493 
00494   /* if b <= 0 then ignore it */
00495   if (b <= 0) {
00496     return;
00497   }
00498 
00499   /* if b > used then simply zero it and return */
00500   if (a->used <= b) {
00501     mp_zero (a);
00502     return;
00503   }
00504 
00505   {
00506     register mp_digit *bottom, *top;
00507 
00508     /* shift the digits down */
00509 
00510     /* bottom */
00511     bottom = a->dp;
00512 
00513     /* top [offset into digits] */
00514     top = a->dp + b;
00515 
00516     /* this is implemented as a sliding window where 
00517      * the window is b-digits long and digits from 
00518      * the top of the window are copied to the bottom
00519      *
00520      * e.g.
00521 
00522      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
00523                  /\                   |      ---->
00524                   \-------------------/      ---->
00525      */
00526     for (x = 0; x < (a->used - b); x++) {
00527       *bottom++ = *top++;
00528     }
00529 
00530     /* zero the top digits */
00531     for (; x < a->used; x++) {
00532       *bottom++ = 0;
00533     }
00534   }
00535   
00536   /* remove excess digits */
00537   a->used -= b;
00538 }
00539 
00540 
00541 /* calc a value mod 2**b */
00542 int
00543 mp_mod_2d (mp_int * a, int b, mp_int * c)
00544 {
00545   int     x, res;
00546 
00547   /* if b is <= 0 then zero the int */
00548   if (b <= 0) {
00549     mp_zero (c);
00550     return MP_OKAY;
00551   }
00552 
00553   /* if the modulus is larger than the value than return */
00554   if (b >= (int) (a->used * DIGIT_BIT)) {
00555     res = mp_copy (a, c);
00556     return res;
00557   }
00558 
00559   /* copy */
00560   if ((res = mp_copy (a, c)) != MP_OKAY) {
00561     return res;
00562   }
00563 
00564   /* zero digits above the last digit of the modulus */
00565   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00566     c->dp[x] = 0;
00567   }
00568   /* clear the digit that is not completely outside/inside the modulus */
00569   c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
00570               (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
00571   mp_clamp (c);
00572   return MP_OKAY;
00573 }
00574 
00575 
00576 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
00577 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
00578 {
00579   int     res;
00580 
00581   /* make sure there are at least two digits */
00582   if (a->alloc < 2) {
00583      if ((res = mp_grow(a, 2)) != MP_OKAY) {
00584         return res;
00585      }
00586   }
00587 
00588   /* zero the int */
00589   mp_zero (a);
00590 
00591   /* read the bytes in */
00592   while (c-- > 0) {
00593     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
00594       return res;
00595     }
00596 
00597 #ifndef MP_8BIT
00598       a->dp[0] |= *b++;
00599       a->used += 1;
00600 #else
00601       a->dp[0] = (*b & MP_MASK);
00602       a->dp[1] |= ((*b++ >> 7U) & 1);
00603       a->used += 2;
00604 #endif
00605   }
00606   mp_clamp (a);
00607   return MP_OKAY;
00608 }
00609 
00610 
00611 /* shift left by a certain bit count */
00612 int mp_mul_2d (mp_int * a, int b, mp_int * c)
00613 {
00614   mp_digit d;
00615   int      res;
00616 
00617   /* copy */
00618   if (a != c) {
00619      if ((res = mp_copy (a, c)) != MP_OKAY) {
00620        return res;
00621      }
00622   }
00623 
00624   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
00625      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
00626        return res;
00627      }
00628   }
00629 
00630   /* shift by as many digits in the bit count */
00631   if (b >= (int)DIGIT_BIT) {
00632     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
00633       return res;
00634     }
00635   }
00636 
00637   /* shift any bit count < DIGIT_BIT */
00638   d = (mp_digit) (b % DIGIT_BIT);
00639   if (d != 0) {
00640     register mp_digit *tmpc, shift, mask, r, rr;
00641     register int x;
00642 
00643     /* bitmask for carries */
00644     mask = (((mp_digit)1) << d) - 1;
00645 
00646     /* shift for msbs */
00647     shift = DIGIT_BIT - d;
00648 
00649     /* alias */
00650     tmpc = c->dp;
00651 
00652     /* carry */
00653     r    = 0;
00654     for (x = 0; x < c->used; x++) {
00655       /* get the higher bits of the current word */
00656       rr = (*tmpc >> shift) & mask;
00657 
00658       /* shift the current word and OR in the carry */
00659       *tmpc = ((*tmpc << d) | r) & MP_MASK;
00660       ++tmpc;
00661 
00662       /* set the carry to the carry bits of the current word */
00663       r = rr;
00664     }
00665     
00666     /* set final carry */
00667     if (r != 0) {
00668        c->dp[(c->used)++] = r;
00669     }
00670   }
00671   mp_clamp (c);
00672   return MP_OKAY;
00673 }
00674 
00675 
00676 /* shift left a certain amount of digits */
00677 int mp_lshd (mp_int * a, int b)
00678 {
00679   int     x, res;
00680 
00681   /* if its less than zero return */
00682   if (b <= 0) {
00683     return MP_OKAY;
00684   }
00685 
00686   /* grow to fit the new digits */
00687   if (a->alloc < a->used + b) {
00688      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
00689        return res;
00690      }
00691   }
00692 
00693   {
00694     register mp_digit *top, *bottom;
00695 
00696     /* increment the used by the shift amount then copy upwards */
00697     a->used += b;
00698 
00699     /* top */
00700     top = a->dp + a->used - 1;
00701 
00702     /* base */
00703     bottom = a->dp + a->used - 1 - b;
00704 
00705     /* much like mp_rshd this is implemented using a sliding window
00706      * except the window goes the otherway around.  Copying from
00707      * the bottom to the top.  see bn_mp_rshd.c for more info.
00708      */
00709     for (x = a->used - 1; x >= b; x--) {
00710       *top-- = *bottom--;
00711     }
00712 
00713     /* zero the lower digits */
00714     top = a->dp;
00715     for (x = 0; x < b; x++) {
00716       *top++ = 0;
00717     }
00718   }
00719   return MP_OKAY;
00720 }
00721 
00722 
00723 /* this is a shell function that calls either the normal or Montgomery
00724  * exptmod functions.  Originally the call to the montgomery code was
00725  * embedded in the normal function but that wasted alot of stack space
00726  * for nothing (since 99% of the time the Montgomery code would be called)
00727  */
00728 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00729 {
00730   int dr;
00731 
00732   /* modulus P must be positive */
00733   if (P->sign == MP_NEG) {
00734      return MP_VAL;
00735   }
00736 
00737   /* if exponent X is negative we have to recurse */
00738   if (X->sign == MP_NEG) {
00739 #ifdef BN_MP_INVMOD_C
00740      mp_int tmpG, tmpX;
00741      int err;
00742 
00743      /* first compute 1/G mod P */
00744      if ((err = mp_init(&tmpG)) != MP_OKAY) {
00745         return err;
00746      }
00747      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
00748         mp_clear(&tmpG);
00749         return err;
00750      }
00751 
00752      /* now get |X| */
00753      if ((err = mp_init(&tmpX)) != MP_OKAY) {
00754         mp_clear(&tmpG);
00755         return err;
00756      }
00757      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
00758         mp_clear(&tmpG);
00759         mp_clear(&tmpX);
00760         return err;
00761      }
00762 
00763      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
00764      err = mp_exptmod(&tmpG, &tmpX, P, Y);
00765      mp_clear(&tmpG);
00766      mp_clear(&tmpX);
00767      return err;
00768 #else 
00769      /* no invmod */
00770      return MP_VAL;
00771 #endif
00772   }
00773 
00774 /* modified diminished radix reduction */
00775 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
00776   defined(BN_S_MP_EXPTMOD_C)
00777   if (mp_reduce_is_2k_l(P) == MP_YES) {
00778      return s_mp_exptmod(G, X, P, Y, 1);
00779   }
00780 #endif
00781 
00782 #ifdef BN_MP_DR_IS_MODULUS_C
00783   /* is it a DR modulus? */
00784   dr = mp_dr_is_modulus(P);
00785 #else
00786   /* default to no */
00787   dr = 0;
00788 #endif
00789 
00790 #ifdef BN_MP_REDUCE_IS_2K_C
00791   /* if not, is it a unrestricted DR modulus? */
00792   if (dr == 0) {
00793      dr = mp_reduce_is_2k(P) << 1;
00794   }
00795 #endif
00796     
00797   /* if the modulus is odd or dr != 0 use the montgomery method */
00798 #ifdef BN_MP_EXPTMOD_FAST_C
00799   if (mp_isodd (P) == 1 || dr !=  0) {
00800     return mp_exptmod_fast (G, X, P, Y, dr);
00801   } else {
00802 #endif
00803 #ifdef BN_S_MP_EXPTMOD_C
00804     /* otherwise use the generic Barrett reduction technique */
00805     return s_mp_exptmod (G, X, P, Y, 0);
00806 #else
00807     /* no exptmod for evens */
00808     return MP_VAL;
00809 #endif
00810 #ifdef BN_MP_EXPTMOD_FAST_C
00811   }
00812 #endif
00813 }
00814 
00815 
00816 /* b = |a| 
00817  *
00818  * Simple function copies the input and fixes the sign to positive
00819  */
00820 int
00821 mp_abs (mp_int * a, mp_int * b)
00822 {
00823   int     res;
00824 
00825   /* copy a to b */
00826   if (a != b) {
00827      if ((res = mp_copy (a, b)) != MP_OKAY) {
00828        return res;
00829      }
00830   }
00831 
00832   /* force the sign of b to positive */
00833   b->sign = MP_ZPOS;
00834 
00835   return MP_OKAY;
00836 }
00837 
00838 
00839 /* hac 14.61, pp608 */
00840 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00841 {
00842   /* b cannot be negative */
00843   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00844     return MP_VAL;
00845   }
00846 
00847 #ifdef BN_FAST_MP_INVMOD_C
00848   /* if the modulus is odd we can use a faster routine instead */
00849   if (mp_isodd (b) == 1) {
00850     return fast_mp_invmod (a, b, c);
00851   }
00852 #endif
00853 
00854 #ifdef BN_MP_INVMOD_SLOW_C
00855   return mp_invmod_slow(a, b, c);
00856 #endif
00857 }
00858 
00859 
00860 /* computes the modular inverse via binary extended euclidean algorithm, 
00861  * that is c = 1/a mod b 
00862  *
00863  * Based on slow invmod except this is optimized for the case where b is 
00864  * odd as per HAC Note 14.64 on pp. 610
00865  */
00866 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00867 {
00868   mp_int  x, y, u, v, B, D;
00869   int     res, neg;
00870 
00871   /* 2. [modified] b must be odd   */
00872   if (mp_iseven (b) == 1) {
00873     return MP_VAL;
00874   }
00875 
00876   /* init all our temps */
00877   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
00878      return res;
00879   }
00880 
00881   /* x == modulus, y == value to invert */
00882   if ((res = mp_copy (b, &x)) != MP_OKAY) {
00883     goto LBL_ERR;
00884   }
00885 
00886   /* we need y = |a| */
00887   if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
00888     goto LBL_ERR;
00889   }
00890 
00891   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00892   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00893     goto LBL_ERR;
00894   }
00895   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00896     goto LBL_ERR;
00897   }
00898   mp_set (&D, 1);
00899 
00900 top:
00901   /* 4.  while u is even do */
00902   while (mp_iseven (&u) == 1) {
00903     /* 4.1 u = u/2 */
00904     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00905       goto LBL_ERR;
00906     }
00907     /* 4.2 if B is odd then */
00908     if (mp_isodd (&B) == 1) {
00909       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
00910         goto LBL_ERR;
00911       }
00912     }
00913     /* B = B/2 */
00914     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
00915       goto LBL_ERR;
00916     }
00917   }
00918 
00919   /* 5.  while v is even do */
00920   while (mp_iseven (&v) == 1) {
00921     /* 5.1 v = v/2 */
00922     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
00923       goto LBL_ERR;
00924     }
00925     /* 5.2 if D is odd then */
00926     if (mp_isodd (&D) == 1) {
00927       /* D = (D-x)/2 */
00928       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
00929         goto LBL_ERR;
00930       }
00931     }
00932     /* D = D/2 */
00933     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
00934       goto LBL_ERR;
00935     }
00936   }
00937 
00938   /* 6.  if u >= v then */
00939   if (mp_cmp (&u, &v) != MP_LT) {
00940     /* u = u - v, B = B - D */
00941     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
00942       goto LBL_ERR;
00943     }
00944 
00945     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
00946       goto LBL_ERR;
00947     }
00948   } else {
00949     /* v - v - u, D = D - B */
00950     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
00951       goto LBL_ERR;
00952     }
00953 
00954     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
00955       goto LBL_ERR;
00956     }
00957   }
00958 
00959   /* if not zero goto step 4 */
00960   if (mp_iszero (&u) == 0) {
00961     goto top;
00962   }
00963 
00964   /* now a = C, b = D, gcd == g*v */
00965 
00966   /* if v != 1 then there is no inverse */
00967   if (mp_cmp_d (&v, 1) != MP_EQ) {
00968     res = MP_VAL;
00969     goto LBL_ERR;
00970   }
00971 
00972   /* b is now the inverse */
00973   neg = a->sign;
00974   while (D.sign == MP_NEG) {
00975     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
00976       goto LBL_ERR;
00977     }
00978   }
00979   mp_exch (&D, c);
00980   c->sign = neg;
00981   res = MP_OKAY;
00982 
00983 LBL_ERR:mp_clear(&x);
00984         mp_clear(&y);
00985         mp_clear(&u);
00986         mp_clear(&v);
00987         mp_clear(&B);
00988         mp_clear(&D);
00989   return res;
00990 }
00991 
00992 
00993 /* hac 14.61, pp608 */
00994 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
00995 {
00996   mp_int  x, y, u, v, A, B, C, D;
00997   int     res;
00998 
00999   /* b cannot be negative */
01000   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
01001     return MP_VAL;
01002   }
01003 
01004   /* init temps */
01005   if ((res = mp_init_multi(&x, &y, &u, &v, 
01006                            &A, &B)) != MP_OKAY) {
01007      return res;
01008   }
01009 
01010   /* init rest of tmps temps */
01011   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
01012      return res;
01013   }
01014 
01015   /* x = a, y = b */
01016   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
01017       goto LBL_ERR;
01018   }
01019   if ((res = mp_copy (b, &y)) != MP_OKAY) {
01020     goto LBL_ERR;
01021   }
01022 
01023   /* 2. [modified] if x,y are both even then return an error! */
01024   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
01025     res = MP_VAL;
01026     goto LBL_ERR;
01027   }
01028 
01029   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
01030   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
01031     goto LBL_ERR;
01032   }
01033   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
01034     goto LBL_ERR;
01035   }
01036   mp_set (&A, 1);
01037   mp_set (&D, 1);
01038 
01039 top:
01040   /* 4.  while u is even do */
01041   while (mp_iseven (&u) == 1) {
01042     /* 4.1 u = u/2 */
01043     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
01044       goto LBL_ERR;
01045     }
01046     /* 4.2 if A or B is odd then */
01047     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
01048       /* A = (A+y)/2, B = (B-x)/2 */
01049       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
01050          goto LBL_ERR;
01051       }
01052       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
01053          goto LBL_ERR;
01054       }
01055     }
01056     /* A = A/2, B = B/2 */
01057     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
01058       goto LBL_ERR;
01059     }
01060     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01061       goto LBL_ERR;
01062     }
01063   }
01064 
01065   /* 5.  while v is even do */
01066   while (mp_iseven (&v) == 1) {
01067     /* 5.1 v = v/2 */
01068     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01069       goto LBL_ERR;
01070     }
01071     /* 5.2 if C or D is odd then */
01072     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
01073       /* C = (C+y)/2, D = (D-x)/2 */
01074       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
01075          goto LBL_ERR;
01076       }
01077       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01078          goto LBL_ERR;
01079       }
01080     }
01081     /* C = C/2, D = D/2 */
01082     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
01083       goto LBL_ERR;
01084     }
01085     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01086       goto LBL_ERR;
01087     }
01088   }
01089 
01090   /* 6.  if u >= v then */
01091   if (mp_cmp (&u, &v) != MP_LT) {
01092     /* u = u - v, A = A - C, B = B - D */
01093     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01094       goto LBL_ERR;
01095     }
01096 
01097     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
01098       goto LBL_ERR;
01099     }
01100 
01101     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01102       goto LBL_ERR;
01103     }
01104   } else {
01105     /* v - v - u, C = C - A, D = D - B */
01106     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01107       goto LBL_ERR;
01108     }
01109 
01110     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
01111       goto LBL_ERR;
01112     }
01113 
01114     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01115       goto LBL_ERR;
01116     }
01117   }
01118 
01119   /* if not zero goto step 4 */
01120   if (mp_iszero (&u) == 0)
01121     goto top;
01122 
01123   /* now a = C, b = D, gcd == g*v */
01124 
01125   /* if v != 1 then there is no inverse */
01126   if (mp_cmp_d (&v, 1) != MP_EQ) {
01127     res = MP_VAL;
01128     goto LBL_ERR;
01129   }
01130 
01131   /* if its too low */
01132   while (mp_cmp_d(&C, 0) == MP_LT) {
01133       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
01134          goto LBL_ERR;
01135       }
01136   }
01137   
01138   /* too big */
01139   while (mp_cmp_mag(&C, b) != MP_LT) {
01140       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
01141          goto LBL_ERR;
01142       }
01143   }
01144   
01145   /* C is now the inverse */
01146   mp_exch (&C, c);
01147   res = MP_OKAY;
01148 LBL_ERR:mp_clear(&x);
01149         mp_clear(&y);
01150         mp_clear(&u);
01151         mp_clear(&v);
01152         mp_clear(&A);
01153         mp_clear(&B);
01154         mp_clear(&C);
01155         mp_clear(&D);
01156   return res;
01157 }
01158 
01159 
01160 /* compare maginitude of two ints (unsigned) */
01161 int mp_cmp_mag (mp_int * a, mp_int * b)
01162 {
01163   int     n;
01164   mp_digit *tmpa, *tmpb;
01165 
01166   /* compare based on # of non-zero digits */
01167   if (a->used > b->used) {
01168     return MP_GT;
01169   }
01170   
01171   if (a->used < b->used) {
01172     return MP_LT;
01173   }
01174 
01175   /* alias for a */
01176   tmpa = a->dp + (a->used - 1);
01177 
01178   /* alias for b */
01179   tmpb = b->dp + (a->used - 1);
01180 
01181   /* compare based on digits  */
01182   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
01183     if (*tmpa > *tmpb) {
01184       return MP_GT;
01185     }
01186 
01187     if (*tmpa < *tmpb) {
01188       return MP_LT;
01189     }
01190   }
01191   return MP_EQ;
01192 }
01193 
01194 
01195 /* compare two ints (signed)*/
01196 int
01197 mp_cmp (mp_int * a, mp_int * b)
01198 {
01199   /* compare based on sign */
01200   if (a->sign != b->sign) {
01201      if (a->sign == MP_NEG) {
01202         return MP_LT;
01203      } else {
01204         return MP_GT;
01205      }
01206   }
01207   
01208   /* compare digits */
01209   if (a->sign == MP_NEG) {
01210      /* if negative compare opposite direction */
01211      return mp_cmp_mag(b, a);
01212   } else {
01213      return mp_cmp_mag(a, b);
01214   }
01215 }
01216 
01217 
01218 /* compare a digit */
01219 int mp_cmp_d(mp_int * a, mp_digit b)
01220 {
01221   /* compare based on sign */
01222   if (a->sign == MP_NEG) {
01223     return MP_LT;
01224   }
01225 
01226   /* compare based on magnitude */
01227   if (a->used > 1) {
01228     return MP_GT;
01229   }
01230 
01231   /* compare the only digit of a to b */
01232   if (a->dp[0] > b) {
01233     return MP_GT;
01234   } else if (a->dp[0] < b) {
01235     return MP_LT;
01236   } else {
01237     return MP_EQ;
01238   }
01239 }
01240 
01241 
01242 /* set to a digit */
01243 void mp_set (mp_int * a, mp_digit b)
01244 {
01245   mp_zero (a);
01246   a->dp[0] = b & MP_MASK;
01247   a->used  = (a->dp[0] != 0) ? 1 : 0;
01248 }
01249 
01250 
01251 /* c = a mod b, 0 <= c < b */
01252 int
01253 mp_mod (mp_int * a, mp_int * b, mp_int * c)
01254 {
01255   mp_int  t;
01256   int     res;
01257 
01258   if ((res = mp_init (&t)) != MP_OKAY) {
01259     return res;
01260   }
01261 
01262   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
01263     mp_clear (&t);
01264     return res;
01265   }
01266 
01267   if (t.sign != b->sign) {
01268     res = mp_add (b, &t, c);
01269   } else {
01270     res = MP_OKAY;
01271     mp_exch (&t, c);
01272   }
01273 
01274   mp_clear (&t);
01275   return res;
01276 }
01277 
01278 
01279 /* slower bit-bang division... also smaller */
01280 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01281 {
01282    mp_int ta, tb, tq, q;
01283    int    res, n, n2;
01284 
01285   /* is divisor zero ? */
01286   if (mp_iszero (b) == 1) {
01287     return MP_VAL;
01288   }
01289 
01290   /* if a < b then q=0, r = a */
01291   if (mp_cmp_mag (a, b) == MP_LT) {
01292     if (d != NULL) {
01293       res = mp_copy (a, d);
01294     } else {
01295       res = MP_OKAY;
01296     }
01297     if (c != NULL) {
01298       mp_zero (c);
01299     }
01300     return res;
01301   }
01302     
01303   /* init our temps */
01304   if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
01305      return res;
01306   }
01307 
01308 
01309   mp_set(&tq, 1);
01310   n = mp_count_bits(a) - mp_count_bits(b);
01311   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
01312       ((res = mp_abs(b, &tb)) != MP_OKAY) || 
01313       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
01314       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
01315       goto LBL_ERR;
01316   }
01317 
01318   while (n-- >= 0) {
01319      if (mp_cmp(&tb, &ta) != MP_GT) {
01320         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
01321             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
01322            goto LBL_ERR;
01323         }
01324      }
01325      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
01326          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
01327            goto LBL_ERR;
01328      }
01329   }
01330 
01331   /* now q == quotient and ta == remainder */
01332   n  = a->sign;
01333   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
01334   if (c != NULL) {
01335      mp_exch(c, &q);
01336      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
01337   }
01338   if (d != NULL) {
01339      mp_exch(d, &ta);
01340      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
01341   }
01342 LBL_ERR:
01343    mp_clear(&ta);
01344    mp_clear(&tb);
01345    mp_clear(&tq);
01346    mp_clear(&q);
01347    return res;
01348 }
01349 
01350 
01351 /* b = a/2 */
01352 int mp_div_2(mp_int * a, mp_int * b)
01353 {
01354   int     x, res, oldused;
01355 
01356   /* copy */
01357   if (b->alloc < a->used) {
01358     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01359       return res;
01360     }
01361   }
01362 
01363   oldused = b->used;
01364   b->used = a->used;
01365   {
01366     register mp_digit r, rr, *tmpa, *tmpb;
01367 
01368     /* source alias */
01369     tmpa = a->dp + b->used - 1;
01370 
01371     /* dest alias */
01372     tmpb = b->dp + b->used - 1;
01373 
01374     /* carry */
01375     r = 0;
01376     for (x = b->used - 1; x >= 0; x--) {
01377       /* get the carry for the next iteration */
01378       rr = *tmpa & 1;
01379 
01380       /* shift the current digit, add in carry and store */
01381       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
01382 
01383       /* forward carry to next iteration */
01384       r = rr;
01385     }
01386 
01387     /* zero excess digits */
01388     tmpb = b->dp + b->used;
01389     for (x = b->used; x < oldused; x++) {
01390       *tmpb++ = 0;
01391     }
01392   }
01393   b->sign = a->sign;
01394   mp_clamp (b);
01395   return MP_OKAY;
01396 }
01397 
01398 
01399 /* high level addition (handles signs) */
01400 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01401 {
01402   int     sa, sb, res;
01403 
01404   /* get sign of both inputs */
01405   sa = a->sign;
01406   sb = b->sign;
01407 
01408   /* handle two cases, not four */
01409   if (sa == sb) {
01410     /* both positive or both negative */
01411     /* add their magnitudes, copy the sign */
01412     c->sign = sa;
01413     res = s_mp_add (a, b, c);
01414   } else {
01415     /* one positive, the other negative */
01416     /* subtract the one with the greater magnitude from */
01417     /* the one of the lesser magnitude.  The result gets */
01418     /* the sign of the one with the greater magnitude. */
01419     if (mp_cmp_mag (a, b) == MP_LT) {
01420       c->sign = sb;
01421       res = s_mp_sub (b, a, c);
01422     } else {
01423       c->sign = sa;
01424       res = s_mp_sub (a, b, c);
01425     }
01426   }
01427   return res;
01428 }
01429 
01430 
01431 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
01432 int
01433 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
01434 {
01435   mp_int *x;
01436   int     olduse, res, min, max;
01437 
01438   /* find sizes, we let |a| <= |b| which means we have to sort
01439    * them.  "x" will point to the input with the most digits
01440    */
01441   if (a->used > b->used) {
01442     min = b->used;
01443     max = a->used;
01444     x = a;
01445   } else {
01446     min = a->used;
01447     max = b->used;
01448     x = b;
01449   }
01450 
01451   /* init result */
01452   if (c->alloc < max + 1) {
01453     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
01454       return res;
01455     }
01456   }
01457 
01458   /* get old used digit count and set new one */
01459   olduse = c->used;
01460   c->used = max + 1;
01461 
01462   {
01463     register mp_digit u, *tmpa, *tmpb, *tmpc;
01464     register int i;
01465 
01466     /* alias for digit pointers */
01467 
01468     /* first input */
01469     tmpa = a->dp;
01470 
01471     /* second input */
01472     tmpb = b->dp;
01473 
01474     /* destination */
01475     tmpc = c->dp;
01476 
01477     /* zero the carry */
01478     u = 0;
01479     for (i = 0; i < min; i++) {
01480       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
01481       *tmpc = *tmpa++ + *tmpb++ + u;
01482 
01483       /* U = carry bit of T[i] */
01484       u = *tmpc >> ((mp_digit)DIGIT_BIT);
01485 
01486       /* take away carry bit from T[i] */
01487       *tmpc++ &= MP_MASK;
01488     }
01489 
01490     /* now copy higher words if any, that is in A+B 
01491      * if A or B has more digits add those in 
01492      */
01493     if (min != max) {
01494       for (; i < max; i++) {
01495         /* T[i] = X[i] + U */
01496         *tmpc = x->dp[i] + u;
01497 
01498         /* U = carry bit of T[i] */
01499         u = *tmpc >> ((mp_digit)DIGIT_BIT);
01500 
01501         /* take away carry bit from T[i] */
01502         *tmpc++ &= MP_MASK;
01503       }
01504     }
01505 
01506     /* add carry */
01507     *tmpc++ = u;
01508 
01509     /* clear digits above oldused */
01510     for (i = c->used; i < olduse; i++) {
01511       *tmpc++ = 0;
01512     }
01513   }
01514 
01515   mp_clamp (c);
01516   return MP_OKAY;
01517 }
01518 
01519 
01520 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
01521 int
01522 s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
01523 {
01524   int     olduse, res, min, max;
01525 
01526   /* find sizes */
01527   min = b->used;
01528   max = a->used;
01529 
01530   /* init result */
01531   if (c->alloc < max) {
01532     if ((res = mp_grow (c, max)) != MP_OKAY) {
01533       return res;
01534     }
01535   }
01536   olduse = c->used;
01537   c->used = max;
01538 
01539   {
01540     register mp_digit u, *tmpa, *tmpb, *tmpc;
01541     register int i;
01542 
01543     /* alias for digit pointers */
01544     tmpa = a->dp;
01545     tmpb = b->dp;
01546     tmpc = c->dp;
01547 
01548     /* set carry to zero */
01549     u = 0;
01550     for (i = 0; i < min; i++) {
01551       /* T[i] = A[i] - B[i] - U */
01552       *tmpc = *tmpa++ - *tmpb++ - u;
01553 
01554       /* U = carry bit of T[i]
01555        * Note this saves performing an AND operation since
01556        * if a carry does occur it will propagate all the way to the
01557        * MSB.  As a result a single shift is enough to get the carry
01558        */
01559       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01560 
01561       /* Clear carry from T[i] */
01562       *tmpc++ &= MP_MASK;
01563     }
01564 
01565     /* now copy higher words if any, e.g. if A has more digits than B  */
01566     for (; i < max; i++) {
01567       /* T[i] = A[i] - U */
01568       *tmpc = *tmpa++ - u;
01569 
01570       /* U = carry bit of T[i] */
01571       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01572 
01573       /* Clear carry from T[i] */
01574       *tmpc++ &= MP_MASK;
01575     }
01576 
01577     /* clear digits above used (since we may not have grown result above) */
01578     for (i = c->used; i < olduse; i++) {
01579       *tmpc++ = 0;
01580     }
01581   }
01582 
01583   mp_clamp (c);
01584   return MP_OKAY;
01585 }
01586 
01587 
01588 /* high level subtraction (handles signs) */
01589 int
01590 mp_sub (mp_int * a, mp_int * b, mp_int * c)
01591 {
01592   int     sa, sb, res;
01593 
01594   sa = a->sign;
01595   sb = b->sign;
01596 
01597   if (sa != sb) {
01598     /* subtract a negative from a positive, OR */
01599     /* subtract a positive from a negative. */
01600     /* In either case, ADD their magnitudes, */
01601     /* and use the sign of the first number. */
01602     c->sign = sa;
01603     res = s_mp_add (a, b, c);
01604   } else {
01605     /* subtract a positive from a positive, OR */
01606     /* subtract a negative from a negative. */
01607     /* First, take the difference between their */
01608     /* magnitudes, then... */
01609     if (mp_cmp_mag (a, b) != MP_LT) {
01610       /* Copy the sign from the first */
01611       c->sign = sa;
01612       /* The first has a larger or equal magnitude */
01613       res = s_mp_sub (a, b, c);
01614     } else {
01615       /* The result has the *opposite* sign from */
01616       /* the first number. */
01617       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
01618       /* The second has a larger magnitude */
01619       res = s_mp_sub (b, a, c);
01620     }
01621   }
01622   return res;
01623 }
01624 
01625 
01626 /* determines if reduce_2k_l can be used */
01627 int mp_reduce_is_2k_l(mp_int *a)
01628 {
01629    int ix, iy;
01630    
01631    if (a->used == 0) {
01632       return MP_NO;
01633    } else if (a->used == 1) {
01634       return MP_YES;
01635    } else if (a->used > 1) {
01636       /* if more than half of the digits are -1 we're sold */
01637       for (iy = ix = 0; ix < a->used; ix++) {
01638           if (a->dp[ix] == MP_MASK) {
01639               ++iy;
01640           }
01641       }
01642       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
01643       
01644    }
01645    return MP_NO;
01646 }
01647 
01648 
01649 /* determines if mp_reduce_2k can be used */
01650 int mp_reduce_is_2k(mp_int *a)
01651 {
01652    int ix, iy, iw;
01653    mp_digit iz;
01654    
01655    if (a->used == 0) {
01656       return MP_NO;
01657    } else if (a->used == 1) {
01658       return MP_YES;
01659    } else if (a->used > 1) {
01660       iy = mp_count_bits(a);
01661       iz = 1;
01662       iw = 1;
01663     
01664       /* Test every bit from the second digit up, must be 1 */
01665       for (ix = DIGIT_BIT; ix < iy; ix++) {
01666           if ((a->dp[iw] & iz) == 0) {
01667              return MP_NO;
01668           }
01669           iz <<= 1;
01670           if (iz > (mp_digit)MP_MASK) {
01671              ++iw;
01672              iz = 1;
01673           }
01674       }
01675    }
01676    return MP_YES;
01677 }
01678 
01679 
01680 /* determines if a number is a valid DR modulus */
01681 int mp_dr_is_modulus(mp_int *a)
01682 {
01683    int ix;
01684 
01685    /* must be at least two digits */
01686    if (a->used < 2) {
01687       return 0;
01688    }
01689 
01690    /* must be of the form b**k - a [a <= b] so all
01691     * but the first digit must be equal to -1 (mod b).
01692     */
01693    for (ix = 1; ix < a->used; ix++) {
01694        if (a->dp[ix] != MP_MASK) {
01695           return 0;
01696        }
01697    }
01698    return 1;
01699 }
01700 
01701 
01702 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
01703  *
01704  * Uses a left-to-right k-ary sliding window to compute the modular
01705  * exponentiation.
01706  * The value of k changes based on the size of the exponent.
01707  *
01708  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
01709  */
01710 
01711 #ifdef MP_LOW_MEM
01712    #define TAB_SIZE 32
01713 #else
01714    #define TAB_SIZE 256
01715 #endif
01716 
01717 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
01718                      int redmode)
01719 {
01720   mp_int  M[TAB_SIZE], res;
01721   mp_digit buf, mp;
01722   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01723 
01724   /* use a pointer to the reduction algorithm.  This allows us to use
01725    * one of many reduction algorithms without modding the guts of
01726    * the code with if statements everywhere.
01727    */
01728   int     (*redux)(mp_int*,mp_int*,mp_digit);
01729 
01730   /* find window size */
01731   x = mp_count_bits (X);
01732   if (x <= 7) {
01733     winsize = 2;
01734   } else if (x <= 36) {
01735     winsize = 3;
01736   } else if (x <= 140) {
01737     winsize = 4;
01738   } else if (x <= 450) {
01739     winsize = 5;
01740   } else if (x <= 1303) {
01741     winsize = 6;
01742   } else if (x <= 3529) {
01743     winsize = 7;
01744   } else {
01745     winsize = 8;
01746   }
01747 
01748 #ifdef MP_LOW_MEM
01749   if (winsize > 5) {
01750      winsize = 5;
01751   }
01752 #endif
01753 
01754   /* init M array */
01755   /* init first cell */
01756   if ((err = mp_init(&M[1])) != MP_OKAY) {
01757      return err;
01758   }
01759 
01760   /* now init the second half of the array */
01761   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01762     if ((err = mp_init(&M[x])) != MP_OKAY) {
01763       for (y = 1<<(winsize-1); y < x; y++) {
01764         mp_clear (&M[y]);
01765       }
01766       mp_clear(&M[1]);
01767       return err;
01768     }
01769   }
01770 
01771   /* determine and setup reduction code */
01772   if (redmode == 0) {
01773 #ifdef BN_MP_MONTGOMERY_SETUP_C     
01774      /* now setup montgomery  */
01775      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
01776         goto LBL_M;
01777      }
01778 #else
01779      err = MP_VAL;
01780      goto LBL_M;
01781 #endif
01782 
01783      /* automatically pick the comba one if available (saves quite a few
01784         calls/ifs) */
01785 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
01786      if (((P->used * 2 + 1) < MP_WARRAY) &&
01787           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
01788         redux = fast_mp_montgomery_reduce;
01789      } else 
01790 #endif
01791      {
01792 #ifdef BN_MP_MONTGOMERY_REDUCE_C
01793         /* use slower baseline Montgomery method */
01794         redux = mp_montgomery_reduce;
01795 #else
01796         err = MP_VAL;
01797         goto LBL_M;
01798 #endif
01799      }
01800   } else if (redmode == 1) {
01801 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
01802      /* setup DR reduction for moduli of the form B**k - b */
01803      mp_dr_setup(P, &mp);
01804      redux = mp_dr_reduce;
01805 #else
01806      err = MP_VAL;
01807      goto LBL_M;
01808 #endif
01809   } else {
01810 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
01811      /* setup DR reduction for moduli of the form 2**k - b */
01812      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
01813         goto LBL_M;
01814      }
01815      redux = mp_reduce_2k;
01816 #else
01817      err = MP_VAL;
01818      goto LBL_M;
01819 #endif
01820   }
01821 
01822   /* setup result */
01823   if ((err = mp_init (&res)) != MP_OKAY) {
01824     goto LBL_M;
01825   }
01826 
01827   /* create M table
01828    *
01829 
01830    *
01831    * The first half of the table is not computed though accept for M[0] and M[1]
01832    */
01833 
01834   if (redmode == 0) {
01835 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
01836      /* now we need R mod m */
01837      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
01838        goto LBL_RES;
01839      }
01840 #else 
01841      err = MP_VAL;
01842      goto LBL_RES;
01843 #endif
01844 
01845      /* now set M[1] to G * R mod m */
01846      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
01847        goto LBL_RES;
01848      }
01849   } else {
01850      mp_set(&res, 1);
01851      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
01852         goto LBL_RES;
01853      }
01854   }
01855 
01856   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
01857   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
01858     goto LBL_RES;
01859   }
01860 
01861   for (x = 0; x < (winsize - 1); x++) {
01862     if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
01863       goto LBL_RES;
01864     }
01865     if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
01866       goto LBL_RES;
01867     }
01868   }
01869 
01870   /* create upper table */
01871   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01872     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
01873       goto LBL_RES;
01874     }
01875     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
01876       goto LBL_RES;
01877     }
01878   }
01879 
01880   /* set initial mode and bit cnt */
01881   mode   = 0;
01882   bitcnt = 1;
01883   buf    = 0;
01884   digidx = X->used - 1;
01885   bitcpy = 0;
01886   bitbuf = 0;
01887 
01888   for (;;) {
01889     /* grab next digit as required */
01890     if (--bitcnt == 0) {
01891       /* if digidx == -1 we are out of digits so break */
01892       if (digidx == -1) {
01893         break;
01894       }
01895       /* read next digit and reset bitcnt */
01896       buf    = X->dp[digidx--];
01897       bitcnt = (int)DIGIT_BIT;
01898     }
01899 
01900     /* grab the next msb from the exponent */
01901     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01902     buf <<= (mp_digit)1;
01903 
01904     /* if the bit is zero and mode == 0 then we ignore it
01905      * These represent the leading zero bits before the first 1 bit
01906      * in the exponent.  Technically this opt is not required but it
01907      * does lower the # of trivial squaring/reductions used
01908      */
01909     if (mode == 0 && y == 0) {
01910       continue;
01911     }
01912 
01913     /* if the bit is zero and mode == 1 then we square */
01914     if (mode == 1 && y == 0) {
01915       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01916         goto LBL_RES;
01917       }
01918       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01919         goto LBL_RES;
01920       }
01921       continue;
01922     }
01923 
01924     /* else we add it to the window */
01925     bitbuf |= (y << (winsize - ++bitcpy));
01926     mode    = 2;
01927 
01928     if (bitcpy == winsize) {
01929       /* ok window is filled so square as required and multiply  */
01930       /* square first */
01931       for (x = 0; x < winsize; x++) {
01932         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01933           goto LBL_RES;
01934         }
01935         if ((err = redux (&res, P, mp)) != MP_OKAY) {
01936           goto LBL_RES;
01937         }
01938       }
01939 
01940       /* then multiply */
01941       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
01942         goto LBL_RES;
01943       }
01944       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01945         goto LBL_RES;
01946       }
01947 
01948       /* empty window and reset */
01949       bitcpy = 0;
01950       bitbuf = 0;
01951       mode   = 1;
01952     }
01953   }
01954 
01955   /* if bits remain then square/multiply */
01956   if (mode == 2 && bitcpy > 0) {
01957     /* square then multiply if the bit is set */
01958     for (x = 0; x < bitcpy; x++) {
01959       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01960         goto LBL_RES;
01961       }
01962       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01963         goto LBL_RES;
01964       }
01965 
01966       /* get next bit of the window */
01967       bitbuf <<= 1;
01968       if ((bitbuf & (1 << winsize)) != 0) {
01969         /* then multiply */
01970         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
01971           goto LBL_RES;
01972         }
01973         if ((err = redux (&res, P, mp)) != MP_OKAY) {
01974           goto LBL_RES;
01975         }
01976       }
01977     }
01978   }
01979 
01980   if (redmode == 0) {
01981      /* fixup result if Montgomery reduction is used
01982       * recall that any value in a Montgomery system is
01983       * actually multiplied by R mod n.  So we have
01984       * to reduce one more time to cancel out the factor
01985       * of R.
01986       */
01987      if ((err = redux(&res, P, mp)) != MP_OKAY) {
01988        goto LBL_RES;
01989      }
01990   }
01991 
01992   /* swap res with Y */
01993   mp_exch (&res, Y);
01994   err = MP_OKAY;
01995 LBL_RES:mp_clear (&res);
01996 LBL_M:
01997   mp_clear(&M[1]);
01998   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01999     mp_clear (&M[x]);
02000   }
02001   return err;
02002 }
02003 
02004 
02005 /* setups the montgomery reduction stuff */
02006 int
02007 mp_montgomery_setup (mp_int * n, mp_digit * rho)
02008 {
02009   mp_digit x, b;
02010 
02011 /* fast inversion mod 2**k
02012  *
02013  * Based on the fact that
02014  *
02015  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
02016  *                    =>  2*X*A - X*X*A*A = 1
02017  *                    =>  2*(1) - (1)     = 1
02018  */
02019   b = n->dp[0];
02020 
02021   if ((b & 1) == 0) {
02022     return MP_VAL;
02023   }
02024 
02025   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
02026   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
02027 #if !defined(MP_8BIT)
02028   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02029 #endif
02030 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
02031   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02032 #endif
02033 #ifdef MP_64BIT
02034   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
02035 #endif
02036 
02037   /* rho = -1/m mod b */
02038   /* TAO, switched mp_word casts to mp_digit to shut up compiler */
02039   *rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
02040 
02041   return MP_OKAY;
02042 }
02043 
02044 
02045 /* computes xR**-1 == x (mod N) via Montgomery Reduction
02046  *
02047  * This is an optimized implementation of montgomery_reduce
02048  * which uses the comba method to quickly calculate the columns of the
02049  * reduction.
02050  *
02051  * Based on Algorithm 14.32 on pp.601 of HAC.
02052 */
02053 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02054 {
02055   int     ix, res, olduse;
02056 #ifdef CYASSL_SMALL_STACK
02057   mp_word* W;    /* uses dynamic memory and slower */
02058 #else
02059   mp_word W[MP_WARRAY];
02060 #endif
02061 
02062   /* get old used count */
02063   olduse = x->used;
02064 
02065   /* grow a as required */
02066   if (x->alloc < n->used + 1) {
02067     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
02068       return res;
02069     }
02070   }
02071 
02072 #ifdef CYASSL_SMALL_STACK
02073   W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02074   if (W == NULL)    
02075     return MP_MEM;
02076 #endif
02077 
02078   /* first we have to get the digits of the input into
02079    * an array of double precision words W[...]
02080    */
02081   {
02082     register mp_word *_W;
02083     register mp_digit *tmpx;
02084 
02085     /* alias for the W[] array */
02086     _W   = W;
02087 
02088     /* alias for the digits of  x*/
02089     tmpx = x->dp;
02090 
02091     /* copy the digits of a into W[0..a->used-1] */
02092     for (ix = 0; ix < x->used; ix++) {
02093       *_W++ = *tmpx++;
02094     }
02095 
02096     /* zero the high words of W[a->used..m->used*2] */
02097     for (; ix < n->used * 2 + 1; ix++) {
02098       *_W++ = 0;
02099     }
02100   }
02101 
02102   /* now we proceed to zero successive digits
02103    * from the least significant upwards
02104    */
02105   for (ix = 0; ix < n->used; ix++) {
02106     /* mu = ai * m' mod b
02107      *
02108      * We avoid a double precision multiplication (which isn't required)
02109      * by casting the value down to a mp_digit.  Note this requires
02110      * that W[ix-1] have  the carry cleared (see after the inner loop)
02111      */
02112     register mp_digit mu;
02113     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
02114 
02115     /* a = a + mu * m * b**i
02116      *
02117      * This is computed in place and on the fly.  The multiplication
02118      * by b**i is handled by offseting which columns the results
02119      * are added to.
02120      *
02121      * Note the comba method normally doesn't handle carries in the
02122      * inner loop In this case we fix the carry from the previous
02123      * column since the Montgomery reduction requires digits of the
02124      * result (so far) [see above] to work.  This is
02125      * handled by fixing up one carry after the inner loop.  The
02126      * carry fixups are done in order so after these loops the
02127      * first m->used words of W[] have the carries fixed
02128      */
02129     {
02130       register int iy;
02131       register mp_digit *tmpn;
02132       register mp_word *_W;
02133 
02134       /* alias for the digits of the modulus */
02135       tmpn = n->dp;
02136 
02137       /* Alias for the columns set by an offset of ix */
02138       _W = W + ix;
02139 
02140       /* inner loop */
02141       for (iy = 0; iy < n->used; iy++) {
02142           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
02143       }
02144     }
02145 
02146     /* now fix carry for next digit, W[ix+1] */
02147     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
02148   }
02149 
02150   /* now we have to propagate the carries and
02151    * shift the words downward [all those least
02152    * significant digits we zeroed].
02153    */
02154   {
02155     register mp_digit *tmpx;
02156     register mp_word *_W, *_W1;
02157 
02158     /* nox fix rest of carries */
02159 
02160     /* alias for current word */
02161     _W1 = W + ix;
02162 
02163     /* alias for next word, where the carry goes */
02164     _W = W + ++ix;
02165 
02166     for (; ix <= n->used * 2 + 1; ix++) {
02167       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
02168     }
02169 
02170     /* copy out, A = A/b**n
02171      *
02172      * The result is A/b**n but instead of converting from an
02173      * array of mp_word to mp_digit than calling mp_rshd
02174      * we just copy them in the right order
02175      */
02176 
02177     /* alias for destination word */
02178     tmpx = x->dp;
02179 
02180     /* alias for shifted double precision result */
02181     _W = W + n->used;
02182 
02183     for (ix = 0; ix < n->used + 1; ix++) {
02184       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
02185     }
02186 
02187     /* zero oldused digits, if the input a was larger than
02188      * m->used+1 we'll have to clear the digits
02189      */
02190     for (; ix < olduse; ix++) {
02191       *tmpx++ = 0;
02192     }
02193   }
02194 
02195   /* set the max used and clamp */
02196   x->used = n->used + 1;
02197   mp_clamp (x);
02198 
02199 #ifdef CYASSL_SMALL_STACK
02200   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02201 #endif
02202 
02203   /* if A >= m then A = A - m */
02204   if (mp_cmp_mag (x, n) != MP_LT) {
02205     return s_mp_sub (x, n, x);
02206   }
02207   return MP_OKAY;
02208 }
02209 
02210 
02211 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
02212 int
02213 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02214 {
02215   int     ix, res, digs;
02216   mp_digit mu;
02217 
02218   /* can the fast reduction [comba] method be used?
02219    *
02220    * Note that unlike in mul you're safely allowed *less*
02221    * than the available columns [255 per default] since carries
02222    * are fixed up in the inner loop.
02223    */
02224   digs = n->used * 2 + 1;
02225   if ((digs < MP_WARRAY) &&
02226       n->used <
02227       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02228     return fast_mp_montgomery_reduce (x, n, rho);
02229   }
02230 
02231   /* grow the input as required */
02232   if (x->alloc < digs) {
02233     if ((res = mp_grow (x, digs)) != MP_OKAY) {
02234       return res;
02235     }
02236   }
02237   x->used = digs;
02238 
02239   for (ix = 0; ix < n->used; ix++) {
02240     /* mu = ai * rho mod b
02241      *
02242      * The value of rho must be precalculated via
02243      * montgomery_setup() such that
02244      * it equals -1/n0 mod b this allows the
02245      * following inner loop to reduce the
02246      * input one digit at a time
02247      */
02248     mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
02249 
02250     /* a = a + mu * m * b**i */
02251     {
02252       register int iy;
02253       register mp_digit *tmpn, *tmpx, u;
02254       register mp_word r;
02255 
02256       /* alias for digits of the modulus */
02257       tmpn = n->dp;
02258 
02259       /* alias for the digits of x [the input] */
02260       tmpx = x->dp + ix;
02261 
02262       /* set the carry to zero */
02263       u = 0;
02264 
02265       /* Multiply and add in place */
02266       for (iy = 0; iy < n->used; iy++) {
02267         /* compute product and sum */
02268         r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
02269                   ((mp_word) u) + ((mp_word) * tmpx);
02270 
02271         /* get carry */
02272         u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02273 
02274         /* fix digit */
02275         *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
02276       }
02277       /* At this point the ix'th digit of x should be zero */
02278 
02279 
02280       /* propagate carries upwards as required*/
02281       while (u) {
02282         *tmpx   += u;
02283         u        = *tmpx >> DIGIT_BIT;
02284         *tmpx++ &= MP_MASK;
02285       }
02286     }
02287   }
02288 
02289   /* at this point the n.used'th least
02290    * significant digits of x are all zero
02291    * which means we can shift x to the
02292    * right by n.used digits and the
02293    * residue is unchanged.
02294    */
02295 
02296   /* x = x/b**n.used */
02297   mp_clamp(x);
02298   mp_rshd (x, n->used);
02299 
02300   /* if x >= n then x = x - n */
02301   if (mp_cmp_mag (x, n) != MP_LT) {
02302     return s_mp_sub (x, n, x);
02303   }
02304 
02305   return MP_OKAY;
02306 }
02307 
02308 
02309 /* determines the setup value */
02310 void mp_dr_setup(mp_int *a, mp_digit *d)
02311 {
02312    /* the casts are required if DIGIT_BIT is one less than
02313     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
02314     */
02315    *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 
02316         ((mp_word)a->dp[0]));
02317 }
02318 
02319 
02320 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
02321  *
02322  * Based on algorithm from the paper
02323  *
02324  * "Generating Efficient Primes for Discrete Log Cryptosystems"
02325  *                 Chae Hoon Lim, Pil Joong Lee,
02326  *          POSTECH Information Research Laboratories
02327  *
02328  * The modulus must be of a special format [see manual]
02329  *
02330  * Has been modified to use algorithm 7.10 from the LTM book instead
02331  *
02332  * Input x must be in the range 0 <= x <= (n-1)**2
02333  */
02334 int
02335 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
02336 {
02337   int      err, i, m;
02338   mp_word  r;
02339   mp_digit mu, *tmpx1, *tmpx2;
02340 
02341   /* m = digits in modulus */
02342   m = n->used;
02343 
02344   /* ensure that "x" has at least 2m digits */
02345   if (x->alloc < m + m) {
02346     if ((err = mp_grow (x, m + m)) != MP_OKAY) {
02347       return err;
02348     }
02349   }
02350 
02351 /* top of loop, this is where the code resumes if
02352  * another reduction pass is required.
02353  */
02354 top:
02355   /* aliases for digits */
02356   /* alias for lower half of x */
02357   tmpx1 = x->dp;
02358 
02359   /* alias for upper half of x, or x/B**m */
02360   tmpx2 = x->dp + m;
02361 
02362   /* set carry to zero */
02363   mu = 0;
02364 
02365   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
02366   for (i = 0; i < m; i++) {
02367       r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
02368       *tmpx1++  = (mp_digit)(r & MP_MASK);
02369       mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
02370   }
02371 
02372   /* set final carry */
02373   *tmpx1++ = mu;
02374 
02375   /* zero words above m */
02376   for (i = m + 1; i < x->used; i++) {
02377       *tmpx1++ = 0;
02378   }
02379 
02380   /* clamp, sub and return */
02381   mp_clamp (x);
02382 
02383   /* if x >= n then subtract and reduce again
02384    * Each successive "recursion" makes the input smaller and smaller.
02385    */
02386   if (mp_cmp_mag (x, n) != MP_LT) {
02387     s_mp_sub(x, n, x);
02388     goto top;
02389   }
02390   return MP_OKAY;
02391 }
02392 
02393 
02394 /* reduces a modulo n where n is of the form 2**p - d */
02395 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
02396 {
02397    mp_int q;
02398    int    p, res;
02399    
02400    if ((res = mp_init(&q)) != MP_OKAY) {
02401       return res;
02402    }
02403    
02404    p = mp_count_bits(n);    
02405 top:
02406    /* q = a/2**p, a = a mod 2**p */
02407    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
02408       goto ERR;
02409    }
02410    
02411    if (d != 1) {
02412       /* q = q * d */
02413       if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 
02414          goto ERR;
02415       }
02416    }
02417    
02418    /* a = a + q */
02419    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
02420       goto ERR;
02421    }
02422    
02423    if (mp_cmp_mag(a, n) != MP_LT) {
02424       s_mp_sub(a, n, a);
02425       goto top;
02426    }
02427    
02428 ERR:
02429    mp_clear(&q);
02430    return res;
02431 }
02432 
02433 
02434 /* determines the setup value */
02435 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
02436 {
02437    int res, p;
02438    mp_int tmp;
02439    
02440    if ((res = mp_init(&tmp)) != MP_OKAY) {
02441       return res;
02442    }
02443    
02444    p = mp_count_bits(a);
02445    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
02446       mp_clear(&tmp);
02447       return res;
02448    }
02449    
02450    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
02451       mp_clear(&tmp);
02452       return res;
02453    }
02454    
02455    *d = tmp.dp[0];
02456    mp_clear(&tmp);
02457    return MP_OKAY;
02458 }
02459 
02460 
02461 /* computes a = 2**b 
02462  *
02463  * Simple algorithm which zeroes the int, grows it then just sets one bit
02464  * as required.
02465  */
02466 int
02467 mp_2expt (mp_int * a, int b)
02468 {
02469   int     res;
02470 
02471   /* zero a as per default */
02472   mp_zero (a);
02473 
02474   /* grow a to accomodate the single bit */
02475   if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
02476     return res;
02477   }
02478 
02479   /* set the used count of where the bit will go */
02480   a->used = b / DIGIT_BIT + 1;
02481 
02482   /* put the single bit in its place */
02483   a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
02484 
02485   return MP_OKAY;
02486 }
02487 
02488 
02489 /* multiply by a digit */
02490 int
02491 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02492 {
02493   mp_digit u, *tmpa, *tmpc;
02494   mp_word  r;
02495   int      ix, res, olduse;
02496 
02497   /* make sure c is big enough to hold a*b */
02498   if (c->alloc < a->used + 1) {
02499     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
02500       return res;
02501     }
02502   }
02503 
02504   /* get the original destinations used count */
02505   olduse = c->used;
02506 
02507   /* set the sign */
02508   c->sign = a->sign;
02509 
02510   /* alias for a->dp [source] */
02511   tmpa = a->dp;
02512 
02513   /* alias for c->dp [dest] */
02514   tmpc = c->dp;
02515 
02516   /* zero carry */
02517   u = 0;
02518 
02519   /* compute columns */
02520   for (ix = 0; ix < a->used; ix++) {
02521     /* compute product and carry sum for this term */
02522     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
02523 
02524     /* mask off higher bits to get a single digit */
02525     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
02526 
02527     /* send carry into next iteration */
02528     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02529   }
02530 
02531   /* store final carry [if any] and increment ix offset  */
02532   *tmpc++ = u;
02533   ++ix;
02534 
02535   /* now zero digits above the top */
02536   while (ix++ < olduse) {
02537      *tmpc++ = 0;
02538   }
02539 
02540   /* set used count */
02541   c->used = a->used + 1;
02542   mp_clamp(c);
02543 
02544   return MP_OKAY;
02545 }
02546 
02547 
02548 /* d = a * b (mod c) */
02549 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02550 {
02551   int     res;
02552   mp_int  t;
02553 
02554   if ((res = mp_init (&t)) != MP_OKAY) {
02555     return res;
02556   }
02557 
02558   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
02559     mp_clear (&t);
02560     return res;
02561   }
02562   res = mp_mod (&t, c, d);
02563   mp_clear (&t);
02564   return res;
02565 }
02566 
02567 
02568 /* computes b = a*a */
02569 int
02570 mp_sqr (mp_int * a, mp_int * b)
02571 {
02572   int     res;
02573 
02574   {
02575 #ifdef BN_FAST_S_MP_SQR_C
02576     /* can we use the fast comba multiplier? */
02577     if ((a->used * 2 + 1) < MP_WARRAY && 
02578          a->used < 
02579          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
02580       res = fast_s_mp_sqr (a, b);
02581     } else
02582 #endif
02583 #ifdef BN_S_MP_SQR_C
02584       res = s_mp_sqr (a, b);
02585 #else
02586       res = MP_VAL;
02587 #endif
02588   }
02589   b->sign = MP_ZPOS;
02590   return res;
02591 }
02592 
02593 
02594 /* high level multiplication (handles sign) */
02595 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02596 {
02597   int     res, neg;
02598   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
02599 
02600   {
02601     /* can we use the fast multiplier?
02602      *
02603      * The fast multiplier can be used if the output will 
02604      * have less than MP_WARRAY digits and the number of 
02605      * digits won't affect carry propagation
02606      */
02607     int     digs = a->used + b->used + 1;
02608 
02609 #ifdef BN_FAST_S_MP_MUL_DIGS_C
02610     if ((digs < MP_WARRAY) &&
02611         MIN(a->used, b->used) <= 
02612         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02613       res = fast_s_mp_mul_digs (a, b, c, digs);
02614     } else 
02615 #endif
02616 #ifdef BN_S_MP_MUL_DIGS_C
02617       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
02618 #else
02619       res = MP_VAL;
02620 #endif
02621 
02622   }
02623   c->sign = (c->used > 0) ? neg : MP_ZPOS;
02624   return res;
02625 }
02626 
02627 
02628 /* b = a*2 */
02629 int mp_mul_2(mp_int * a, mp_int * b)
02630 {
02631   int     x, res, oldused;
02632 
02633   /* grow to accomodate result */
02634   if (b->alloc < a->used + 1) {
02635     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
02636       return res;
02637     }
02638   }
02639 
02640   oldused = b->used;
02641   b->used = a->used;
02642 
02643   {
02644     register mp_digit r, rr, *tmpa, *tmpb;
02645 
02646     /* alias for source */
02647     tmpa = a->dp;
02648     
02649     /* alias for dest */
02650     tmpb = b->dp;
02651 
02652     /* carry */
02653     r = 0;
02654     for (x = 0; x < a->used; x++) {
02655     
02656       /* get what will be the *next* carry bit from the 
02657        * MSB of the current digit 
02658        */
02659       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
02660       
02661       /* now shift up this digit, add in the carry [from the previous] */
02662       *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
02663       
02664       /* copy the carry that would be from the source 
02665        * digit into the next iteration 
02666        */
02667       r = rr;
02668     }
02669 
02670     /* new leading digit? */
02671     if (r != 0) {
02672       /* add a MSB which is always 1 at this point */
02673       *tmpb = 1;
02674       ++(b->used);
02675     }
02676 
02677     /* now zero any excess digits on the destination 
02678      * that we didn't write to 
02679      */
02680     tmpb = b->dp + b->used;
02681     for (x = b->used; x < oldused; x++) {
02682       *tmpb++ = 0;
02683     }
02684   }
02685   b->sign = a->sign;
02686   return MP_OKAY;
02687 }
02688 
02689 
02690 /* divide by three (based on routine from MPI and the GMP manual) */
02691 int
02692 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
02693 {
02694   mp_int   q;
02695   mp_word  w, t;
02696   mp_digit b;
02697   int      res, ix;
02698   
02699   /* b = 2**DIGIT_BIT / 3 */
02700   b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
02701 
02702   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
02703      return res;
02704   }
02705   
02706   q.used = a->used;
02707   q.sign = a->sign;
02708   w = 0;
02709   for (ix = a->used - 1; ix >= 0; ix--) {
02710      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
02711 
02712      if (w >= 3) {
02713         /* multiply w by [1/3] */
02714         t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
02715 
02716         /* now subtract 3 * [w/3] from w, to get the remainder */
02717         w -= t+t+t;
02718 
02719         /* fixup the remainder as required since
02720          * the optimization is not exact.
02721          */
02722         while (w >= 3) {
02723            t += 1;
02724            w -= 3;
02725         }
02726       } else {
02727         t = 0;
02728       }
02729       q.dp[ix] = (mp_digit)t;
02730   }
02731 
02732   /* [optional] store the remainder */
02733   if (d != NULL) {
02734      *d = (mp_digit)w;
02735   }
02736 
02737   /* [optional] store the quotient */
02738   if (c != NULL) {
02739      mp_clamp(&q);
02740      mp_exch(&q, c);
02741   }
02742   mp_clear(&q);
02743   
02744   return res;
02745 }
02746 
02747 
02748 /* init an mp_init for a given size */
02749 int mp_init_size (mp_int * a, int size)
02750 {
02751   int x;
02752 
02753   /* pad size so there are always extra digits */
02754   size += (MP_PREC * 2) - (size % MP_PREC); 
02755   
02756   /* alloc mem */
02757   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
02758                                       DYNAMIC_TYPE_BIGINT);
02759   if (a->dp == NULL) {
02760     return MP_MEM;
02761   }
02762 
02763   /* set the members */
02764   a->used  = 0;
02765   a->alloc = size;
02766   a->sign  = MP_ZPOS;
02767 
02768   /* zero the digits */
02769   for (x = 0; x < size; x++) {
02770       a->dp[x] = 0;
02771   }
02772 
02773   return MP_OKAY;
02774 }
02775 
02776 
02777 /* the jist of squaring...
02778  * you do like mult except the offset of the tmpx [one that 
02779  * starts closer to zero] can't equal the offset of tmpy.  
02780  * So basically you set up iy like before then you min it with
02781  * (ty-tx) so that it never happens.  You double all those 
02782  * you add in the inner loop
02783 
02784 After that loop you do the squares and add them in.
02785 */
02786 
02787 int fast_s_mp_sqr (mp_int * a, mp_int * b)
02788 {
02789   int       olduse, res, pa, ix, iz;
02790 #ifdef CYASSL_SMALL_STACK
02791   mp_digit* W;    /* uses dynamic memory and slower */
02792 #else
02793   mp_digit W[MP_WARRAY];
02794 #endif
02795   mp_digit  *tmpx;
02796   mp_word   W1;
02797 
02798   /* grow the destination as required */
02799   pa = a->used + a->used;
02800   if (b->alloc < pa) {
02801     if ((res = mp_grow (b, pa)) != MP_OKAY) {
02802       return res;
02803     }
02804   }
02805 
02806   if (pa > MP_WARRAY)
02807     return MP_RANGE;  /* TAO range check */
02808 
02809 #ifdef CYASSL_SMALL_STACK
02810   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02811   if (W == NULL)    
02812     return MP_MEM;
02813 #endif
02814 
02815   /* number of output digits to produce */
02816   W1 = 0;
02817   for (ix = 0; ix < pa; ix++) { 
02818       int      tx, ty, iy;
02819       mp_word  _W;
02820       mp_digit *tmpy;
02821 
02822       /* clear counter */
02823       _W = 0;
02824 
02825       /* get offsets into the two bignums */
02826       ty = MIN(a->used-1, ix);
02827       tx = ix - ty;
02828 
02829       /* setup temp aliases */
02830       tmpx = a->dp + tx;
02831       tmpy = a->dp + ty;
02832 
02833       /* this is the number of times the loop will iterrate, essentially
02834          while (tx++ < a->used && ty-- >= 0) { ... }
02835        */
02836       iy = MIN(a->used-tx, ty+1);
02837 
02838       /* now for squaring tx can never equal ty 
02839        * we halve the distance since they approach at a rate of 2x
02840        * and we have to round because odd cases need to be executed
02841        */
02842       iy = MIN(iy, (ty-tx+1)>>1);
02843 
02844       /* execute loop */
02845       for (iz = 0; iz < iy; iz++) {
02846          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02847       }
02848 
02849       /* double the inner product and add carry */
02850       _W = _W + _W + W1;
02851 
02852       /* even columns have the square term in them */
02853       if ((ix&1) == 0) {
02854          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
02855       }
02856 
02857       /* store it */
02858       W[ix] = (mp_digit)(_W & MP_MASK);
02859 
02860       /* make next carry */
02861       W1 = _W >> ((mp_word)DIGIT_BIT);
02862   }
02863 
02864   /* setup dest */
02865   olduse  = b->used;
02866   b->used = a->used+a->used;
02867 
02868   {
02869     mp_digit *tmpb;
02870     tmpb = b->dp;
02871     for (ix = 0; ix < pa; ix++) {
02872       *tmpb++ = W[ix] & MP_MASK;
02873     }
02874 
02875     /* clear unused digits [that existed in the old copy of c] */
02876     for (; ix < olduse; ix++) {
02877       *tmpb++ = 0;
02878     }
02879   }
02880   mp_clamp (b);
02881 
02882 #ifdef CYASSL_SMALL_STACK
02883   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02884 #endif
02885 
02886   return MP_OKAY;
02887 }
02888 
02889 
02890 /* Fast (comba) multiplier
02891  *
02892  * This is the fast column-array [comba] multiplier.  It is 
02893  * designed to compute the columns of the product first 
02894  * then handle the carries afterwards.  This has the effect 
02895  * of making the nested loops that compute the columns very
02896  * simple and schedulable on super-scalar processors.
02897  *
02898  * This has been modified to produce a variable number of 
02899  * digits of output so if say only a half-product is required 
02900  * you don't have to compute the upper half (a feature 
02901  * required for fast Barrett reduction).
02902  *
02903  * Based on Algorithm 14.12 on pp.595 of HAC.
02904  *
02905  */
02906 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02907 {
02908   int     olduse, res, pa, ix, iz;
02909 #ifdef CYASSL_SMALL_STACK
02910   mp_digit* W;    /* uses dynamic memory and slower */
02911 #else
02912   mp_digit W[MP_WARRAY];
02913 #endif
02914   register mp_word  _W;
02915 
02916   /* grow the destination as required */
02917   if (c->alloc < digs) {
02918     if ((res = mp_grow (c, digs)) != MP_OKAY) {
02919       return res;
02920     }
02921   }
02922 
02923   /* number of output digits to produce */
02924   pa = MIN(digs, a->used + b->used);
02925   if (pa > MP_WARRAY)
02926     return MP_RANGE;  /* TAO range check */
02927 
02928 #ifdef CYASSL_SMALL_STACK
02929   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
02930   if (W == NULL)    
02931     return MP_MEM;
02932 #endif
02933 
02934   /* clear the carry */
02935   _W = 0;
02936   for (ix = 0; ix < pa; ix++) { 
02937       int      tx, ty;
02938       int      iy;
02939       mp_digit *tmpx, *tmpy;
02940 
02941       /* get offsets into the two bignums */
02942       ty = MIN(b->used-1, ix);
02943       tx = ix - ty;
02944 
02945       /* setup temp aliases */
02946       tmpx = a->dp + tx;
02947       tmpy = b->dp + ty;
02948 
02949       /* this is the number of times the loop will iterrate, essentially 
02950          while (tx++ < a->used && ty-- >= 0) { ... }
02951        */
02952       iy = MIN(a->used-tx, ty+1);
02953 
02954       /* execute loop */
02955       for (iz = 0; iz < iy; ++iz) {
02956          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02957 
02958       }
02959 
02960       /* store term */
02961       W[ix] = ((mp_digit)_W) & MP_MASK;
02962 
02963       /* make next carry */
02964       _W = _W >> ((mp_word)DIGIT_BIT);
02965  }
02966 
02967   /* setup dest */
02968   olduse  = c->used;
02969   c->used = pa;
02970 
02971   {
02972     register mp_digit *tmpc;
02973     tmpc = c->dp;
02974     for (ix = 0; ix < pa+1; ix++) {
02975       /* now extract the previous digit [below the carry] */
02976       *tmpc++ = W[ix];
02977     }
02978 
02979     /* clear unused digits [that existed in the old copy of c] */
02980     for (; ix < olduse; ix++) {
02981       *tmpc++ = 0;
02982     }
02983   }
02984   mp_clamp (c);
02985 
02986 #ifdef CYASSL_SMALL_STACK
02987   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
02988 #endif
02989 
02990   return MP_OKAY;
02991 }
02992 
02993 
02994 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
02995 int s_mp_sqr (mp_int * a, mp_int * b)
02996 {
02997   mp_int  t;
02998   int     res, ix, iy, pa;
02999   mp_word r;
03000   mp_digit u, tmpx, *tmpt;
03001 
03002   pa = a->used;
03003   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
03004     return res;
03005   }
03006 
03007   /* default used is maximum possible size */
03008   t.used = 2*pa + 1;
03009 
03010   for (ix = 0; ix < pa; ix++) {
03011     /* first calculate the digit at 2*ix */
03012     /* calculate double precision result */
03013     r = ((mp_word) t.dp[2*ix]) +
03014         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
03015 
03016     /* store lower part in result */
03017     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
03018 
03019     /* get the carry */
03020     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03021 
03022     /* left hand side of A[ix] * A[iy] */
03023     tmpx        = a->dp[ix];
03024 
03025     /* alias for where to store the results */
03026     tmpt        = t.dp + (2*ix + 1);
03027     
03028     for (iy = ix + 1; iy < pa; iy++) {
03029       /* first calculate the product */
03030       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
03031 
03032       /* now calculate the double precision result, note we use
03033        * addition instead of *2 since it's easier to optimize
03034        */
03035       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
03036 
03037       /* store lower part */
03038       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03039 
03040       /* get carry */
03041       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03042     }
03043     /* propagate upwards */
03044     while (u != ((mp_digit) 0)) {
03045       r       = ((mp_word) *tmpt) + ((mp_word) u);
03046       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03047       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03048     }
03049   }
03050 
03051   mp_clamp (&t);
03052   mp_exch (&t, b);
03053   mp_clear (&t);
03054   return MP_OKAY;
03055 }
03056 
03057 
03058 /* multiplies |a| * |b| and only computes upto digs digits of result
03059  * HAC pp. 595, Algorithm 14.12  Modified so you can control how 
03060  * many digits of output are created.
03061  */
03062 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03063 {
03064   mp_int  t;
03065   int     res, pa, pb, ix, iy;
03066   mp_digit u;
03067   mp_word r;
03068   mp_digit tmpx, *tmpt, *tmpy;
03069 
03070   /* can we use the fast multiplier? */
03071   if (((digs) < MP_WARRAY) &&
03072       MIN (a->used, b->used) < 
03073           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03074     return fast_s_mp_mul_digs (a, b, c, digs);
03075   }
03076 
03077   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
03078     return res;
03079   }
03080   t.used = digs;
03081 
03082   /* compute the digits of the product directly */
03083   pa = a->used;
03084   for (ix = 0; ix < pa; ix++) {
03085     /* set the carry to zero */
03086     u = 0;
03087 
03088     /* limit ourselves to making digs digits of output */
03089     pb = MIN (b->used, digs - ix);
03090 
03091     /* setup some aliases */
03092     /* copy of the digit from a used within the nested loop */
03093     tmpx = a->dp[ix];
03094     
03095     /* an alias for the destination shifted ix places */
03096     tmpt = t.dp + ix;
03097     
03098     /* an alias for the digits of b */
03099     tmpy = b->dp;
03100 
03101     /* compute the columns of the output and propagate the carry */
03102     for (iy = 0; iy < pb; iy++) {
03103       /* compute the column as a mp_word */
03104       r       = ((mp_word)*tmpt) +
03105                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03106                 ((mp_word) u);
03107 
03108       /* the new column is the lower part of the result */
03109       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03110 
03111       /* get the carry word from the result */
03112       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03113     }
03114     /* set carry if it is placed below digs */
03115     if (ix + iy < digs) {
03116       *tmpt = u;
03117     }
03118   }
03119 
03120   mp_clamp (&t);
03121   mp_exch (&t, c);
03122 
03123   mp_clear (&t);
03124   return MP_OKAY;
03125 }
03126 
03127 
03128 /*
03129  * shifts with subtractions when the result is greater than b.
03130  *
03131  * The method is slightly modified to shift B unconditionally upto just under
03132  * the leading bit of b.  This saves alot of multiple precision shifting.
03133  */
03134 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
03135 {
03136   int     x, bits, res;
03137 
03138   /* how many bits of last digit does b use */
03139   bits = mp_count_bits (b) % DIGIT_BIT;
03140 
03141   if (b->used > 1) {
03142      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
03143         return res;
03144      }
03145   } else {
03146      mp_set(a, 1);
03147      bits = 1;
03148   }
03149 
03150 
03151   /* now compute C = A * B mod b */
03152   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
03153     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
03154       return res;
03155     }
03156     if (mp_cmp_mag (a, b) != MP_LT) {
03157       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
03158         return res;
03159       }
03160     }
03161   }
03162 
03163   return MP_OKAY;
03164 }
03165 
03166 
03167 #ifdef MP_LOW_MEM
03168    #define TAB_SIZE 32
03169 #else
03170    #define TAB_SIZE 256
03171 #endif
03172 
03173 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
03174 {
03175   mp_int  M[TAB_SIZE], res, mu;
03176   mp_digit buf;
03177   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
03178   int (*redux)(mp_int*,mp_int*,mp_int*);
03179 
03180   /* find window size */
03181   x = mp_count_bits (X);
03182   if (x <= 7) {
03183     winsize = 2;
03184   } else if (x <= 36) {
03185     winsize = 3;
03186   } else if (x <= 140) {
03187     winsize = 4;
03188   } else if (x <= 450) {
03189     winsize = 5;
03190   } else if (x <= 1303) {
03191     winsize = 6;
03192   } else if (x <= 3529) {
03193     winsize = 7;
03194   } else {
03195     winsize = 8;
03196   }
03197 
03198 #ifdef MP_LOW_MEM
03199     if (winsize > 5) {
03200        winsize = 5;
03201     }
03202 #endif
03203 
03204   /* init M array */
03205   /* init first cell */
03206   if ((err = mp_init(&M[1])) != MP_OKAY) {
03207      return err; 
03208   }
03209 
03210   /* now init the second half of the array */
03211   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03212     if ((err = mp_init(&M[x])) != MP_OKAY) {
03213       for (y = 1<<(winsize-1); y < x; y++) {
03214         mp_clear (&M[y]);
03215       }
03216       mp_clear(&M[1]);
03217       return err;
03218     }
03219   }
03220 
03221   /* create mu, used for Barrett reduction */
03222   if ((err = mp_init (&mu)) != MP_OKAY) {
03223     goto LBL_M;
03224   }
03225   
03226   if (redmode == 0) {
03227      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
03228         goto LBL_MU;
03229      }
03230      redux = mp_reduce;
03231   } else {
03232      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
03233         goto LBL_MU;
03234      }
03235      redux = mp_reduce_2k_l;
03236   }    
03237 
03238   /* create M table
03239    *
03240    * The M table contains powers of the base, 
03241    * e.g. M[x] = G**x mod P
03242    *
03243    * The first half of the table is not 
03244    * computed though accept for M[0] and M[1]
03245    */
03246   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
03247     goto LBL_MU;
03248   }
03249 
03250   /* compute the value at M[1<<(winsize-1)] by squaring 
03251    * M[1] (winsize-1) times 
03252    */
03253   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
03254     goto LBL_MU;
03255   }
03256 
03257   for (x = 0; x < (winsize - 1); x++) {
03258     /* square it */
03259     if ((err = mp_sqr (&M[1 << (winsize - 1)], 
03260                        &M[1 << (winsize - 1)])) != MP_OKAY) {
03261       goto LBL_MU;
03262     }
03263 
03264     /* reduce modulo P */
03265     if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
03266       goto LBL_MU;
03267     }
03268   }
03269 
03270   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
03271    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
03272    */
03273   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
03274     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
03275       goto LBL_MU;
03276     }
03277     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
03278       goto LBL_MU;
03279     }
03280   }
03281 
03282   /* setup result */
03283   if ((err = mp_init (&res)) != MP_OKAY) {
03284     goto LBL_MU;
03285   }
03286   mp_set (&res, 1);
03287 
03288   /* set initial mode and bit cnt */
03289   mode   = 0;
03290   bitcnt = 1;
03291   buf    = 0;
03292   digidx = X->used - 1;
03293   bitcpy = 0;
03294   bitbuf = 0;
03295 
03296   for (;;) {
03297     /* grab next digit as required */
03298     if (--bitcnt == 0) {
03299       /* if digidx == -1 we are out of digits */
03300       if (digidx == -1) {
03301         break;
03302       }
03303       /* read next digit and reset the bitcnt */
03304       buf    = X->dp[digidx--];
03305       bitcnt = (int) DIGIT_BIT;
03306     }
03307 
03308     /* grab the next msb from the exponent */
03309     y     = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
03310     buf <<= (mp_digit)1;
03311 
03312     /* if the bit is zero and mode == 0 then we ignore it
03313      * These represent the leading zero bits before the first 1 bit
03314      * in the exponent.  Technically this opt is not required but it
03315      * does lower the # of trivial squaring/reductions used
03316      */
03317     if (mode == 0 && y == 0) {
03318       continue;
03319     }
03320 
03321     /* if the bit is zero and mode == 1 then we square */
03322     if (mode == 1 && y == 0) {
03323       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03324         goto LBL_RES;
03325       }
03326       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03327         goto LBL_RES;
03328       }
03329       continue;
03330     }
03331 
03332     /* else we add it to the window */
03333     bitbuf |= (y << (winsize - ++bitcpy));
03334     mode    = 2;
03335 
03336     if (bitcpy == winsize) {
03337       /* ok window is filled so square as required and multiply  */
03338       /* square first */
03339       for (x = 0; x < winsize; x++) {
03340         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03341           goto LBL_RES;
03342         }
03343         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03344           goto LBL_RES;
03345         }
03346       }
03347 
03348       /* then multiply */
03349       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
03350         goto LBL_RES;
03351       }
03352       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03353         goto LBL_RES;
03354       }
03355 
03356       /* empty window and reset */
03357       bitcpy = 0;
03358       bitbuf = 0;
03359       mode   = 1;
03360     }
03361   }
03362 
03363   /* if bits remain then square/multiply */
03364   if (mode == 2 && bitcpy > 0) {
03365     /* square then multiply if the bit is set */
03366     for (x = 0; x < bitcpy; x++) {
03367       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03368         goto LBL_RES;
03369       }
03370       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03371         goto LBL_RES;
03372       }
03373 
03374       bitbuf <<= 1;
03375       if ((bitbuf & (1 << winsize)) != 0) {
03376         /* then multiply */
03377         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
03378           goto LBL_RES;
03379         }
03380         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03381           goto LBL_RES;
03382         }
03383       }
03384     }
03385   }
03386 
03387   mp_exch (&res, Y);
03388   err = MP_OKAY;
03389 LBL_RES:mp_clear (&res);
03390 LBL_MU:mp_clear (&mu);
03391 LBL_M:
03392   mp_clear(&M[1]);
03393   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03394     mp_clear (&M[x]);
03395   }
03396   return err;
03397 }
03398 
03399 
03400 /* pre-calculate the value required for Barrett reduction
03401  * For a given modulus "b" it calulates the value required in "a"
03402  */
03403 int mp_reduce_setup (mp_int * a, mp_int * b)
03404 {
03405   int     res;
03406   
03407   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
03408     return res;
03409   }
03410   return mp_div (a, b, a, NULL);
03411 }
03412 
03413 
03414 /* reduces x mod m, assumes 0 < x < m**2, mu is 
03415  * precomputed via mp_reduce_setup.
03416  * From HAC pp.604 Algorithm 14.42
03417  */
03418 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
03419 {
03420   mp_int  q;
03421   int     res, um = m->used;
03422 
03423   /* q = x */
03424   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
03425     return res;
03426   }
03427 
03428   /* q1 = x / b**(k-1)  */
03429   mp_rshd (&q, um - 1);         
03430 
03431   /* according to HAC this optimization is ok */
03432   if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
03433     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
03434       goto CLEANUP;
03435     }
03436   } else {
03437 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
03438     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03439       goto CLEANUP;
03440     }
03441 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
03442     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03443       goto CLEANUP;
03444     }
03445 #else 
03446     { 
03447       res = MP_VAL;
03448       goto CLEANUP;
03449     }
03450 #endif
03451   }
03452 
03453   /* q3 = q2 / b**(k+1) */
03454   mp_rshd (&q, um + 1);         
03455 
03456   /* x = x mod b**(k+1), quick (no division) */
03457   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
03458     goto CLEANUP;
03459   }
03460 
03461   /* q = q * m mod b**(k+1), quick (no division) */
03462   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
03463     goto CLEANUP;
03464   }
03465 
03466   /* x = x - q */
03467   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
03468     goto CLEANUP;
03469   }
03470 
03471   /* If x < 0, add b**(k+1) to it */
03472   if (mp_cmp_d (x, 0) == MP_LT) {
03473     mp_set (&q, 1);
03474     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
03475       goto CLEANUP;
03476     if ((res = mp_add (x, &q, x)) != MP_OKAY)
03477       goto CLEANUP;
03478   }
03479 
03480   /* Back off if it's too big */
03481   while (mp_cmp (x, m) != MP_LT) {
03482     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
03483       goto CLEANUP;
03484     }
03485   }
03486   
03487 CLEANUP:
03488   mp_clear (&q);
03489 
03490   return res;
03491 }
03492 
03493 
03494 /* reduces a modulo n where n is of the form 2**p - d 
03495    This differs from reduce_2k since "d" can be larger
03496    than a single digit.
03497 */
03498 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
03499 {
03500    mp_int q;
03501    int    p, res;
03502    
03503    if ((res = mp_init(&q)) != MP_OKAY) {
03504       return res;
03505    }
03506    
03507    p = mp_count_bits(n);    
03508 top:
03509    /* q = a/2**p, a = a mod 2**p */
03510    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
03511       goto ERR;
03512    }
03513    
03514    /* q = q * d */
03515    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 
03516       goto ERR;
03517    }
03518    
03519    /* a = a + q */
03520    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
03521       goto ERR;
03522    }
03523    
03524    if (mp_cmp_mag(a, n) != MP_LT) {
03525       s_mp_sub(a, n, a);
03526       goto top;
03527    }
03528    
03529 ERR:
03530    mp_clear(&q);
03531    return res;
03532 }
03533 
03534 
03535 /* determines the setup value */
03536 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
03537 {
03538    int    res;
03539    mp_int tmp;
03540    
03541    if ((res = mp_init(&tmp)) != MP_OKAY) {
03542       return res;
03543    }
03544    
03545    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
03546       goto ERR;
03547    }
03548    
03549    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
03550       goto ERR;
03551    }
03552    
03553 ERR:
03554    mp_clear(&tmp);
03555    return res;
03556 }
03557 
03558 
03559 /* multiplies |a| * |b| and does not compute the lower digs digits
03560  * [meant to get the higher part of the product]
03561  */
03562 int
03563 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03564 {
03565   mp_int  t;
03566   int     res, pa, pb, ix, iy;
03567   mp_digit u;
03568   mp_word r;
03569   mp_digit tmpx, *tmpt, *tmpy;
03570 
03571   /* can we use the fast multiplier? */
03572 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
03573   if (((a->used + b->used + 1) < MP_WARRAY)
03574       && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03575     return fast_s_mp_mul_high_digs (a, b, c, digs);
03576   }
03577 #endif
03578 
03579   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
03580     return res;
03581   }
03582   t.used = a->used + b->used + 1;
03583 
03584   pa = a->used;
03585   pb = b->used;
03586   for (ix = 0; ix < pa; ix++) {
03587     /* clear the carry */
03588     u = 0;
03589 
03590     /* left hand side of A[ix] * B[iy] */
03591     tmpx = a->dp[ix];
03592 
03593     /* alias to the address of where the digits will be stored */
03594     tmpt = &(t.dp[digs]);
03595 
03596     /* alias for where to read the right hand side from */
03597     tmpy = b->dp + (digs - ix);
03598 
03599     for (iy = digs - ix; iy < pb; iy++) {
03600       /* calculate the double precision result */
03601       r       = ((mp_word)*tmpt) +
03602                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03603                 ((mp_word) u);
03604 
03605       /* get the lower part */
03606       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03607 
03608       /* carry the carry */
03609       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03610     }
03611     *tmpt = u;
03612   }
03613   mp_clamp (&t);
03614   mp_exch (&t, c);
03615   mp_clear (&t);
03616   return MP_OKAY;
03617 }
03618 
03619 
03620 /* this is a modified version of fast_s_mul_digs that only produces
03621  * output digits *above* digs.  See the comments for fast_s_mul_digs
03622  * to see how it works.
03623  *
03624  * This is used in the Barrett reduction since for one of the multiplications
03625  * only the higher digits were needed.  This essentially halves the work.
03626  *
03627  * Based on Algorithm 14.12 on pp.595 of HAC.
03628  */
03629 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03630 {
03631   int     olduse, res, pa, ix, iz;
03632 #ifdef CYASSL_SMALL_STACK
03633   mp_digit* W;    /* uses dynamic memory and slower */
03634 #else
03635   mp_digit W[MP_WARRAY];
03636 #endif
03637   mp_word  _W;
03638 
03639   /* grow the destination as required */
03640   pa = a->used + b->used;
03641   if (c->alloc < pa) {
03642     if ((res = mp_grow (c, pa)) != MP_OKAY) {
03643       return res;
03644     }
03645   }
03646 
03647   if (pa > MP_WARRAY)
03648     return MP_RANGE;  /* TAO range check */
03649   
03650 #ifdef CYASSL_SMALL_STACK
03651   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
03652   if (W == NULL)    
03653     return MP_MEM;
03654 #endif
03655 
03656   /* number of output digits to produce */
03657   pa = a->used + b->used;
03658   _W = 0;
03659   for (ix = digs; ix < pa; ix++) { 
03660       int      tx, ty, iy;
03661       mp_digit *tmpx, *tmpy;
03662 
03663       /* get offsets into the two bignums */
03664       ty = MIN(b->used-1, ix);
03665       tx = ix - ty;
03666 
03667       /* setup temp aliases */
03668       tmpx = a->dp + tx;
03669       tmpy = b->dp + ty;
03670 
03671       /* this is the number of times the loop will iterrate, essentially its 
03672          while (tx++ < a->used && ty-- >= 0) { ... }
03673        */
03674       iy = MIN(a->used-tx, ty+1);
03675 
03676       /* execute loop */
03677       for (iz = 0; iz < iy; iz++) {
03678          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03679       }
03680 
03681       /* store term */
03682       W[ix] = ((mp_digit)_W) & MP_MASK;
03683 
03684       /* make next carry */
03685       _W = _W >> ((mp_word)DIGIT_BIT);
03686   }
03687   
03688   /* setup dest */
03689   olduse  = c->used;
03690   c->used = pa;
03691 
03692   {
03693     register mp_digit *tmpc;
03694 
03695     tmpc = c->dp + digs;
03696     for (ix = digs; ix <= pa; ix++) {
03697       /* now extract the previous digit [below the carry] */
03698       *tmpc++ = W[ix];
03699     }
03700 
03701     /* clear unused digits [that existed in the old copy of c] */
03702     for (; ix < olduse; ix++) {
03703       *tmpc++ = 0;
03704     }
03705   }
03706   mp_clamp (c);
03707 
03708 #ifdef CYASSL_SMALL_STACK
03709   XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
03710 #endif
03711 
03712   return MP_OKAY;
03713 }
03714 
03715 
03716 /* set a 32-bit const */
03717 int mp_set_int (mp_int * a, unsigned long b)
03718 {
03719   int     x, res;
03720 
03721   mp_zero (a);
03722   
03723   /* set four bits at a time */
03724   for (x = 0; x < 8; x++) {
03725     /* shift the number up four bits */
03726     if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
03727       return res;
03728     }
03729 
03730     /* OR in the top four bits of the source */
03731     a->dp[0] |= (b >> 28) & 15;
03732 
03733     /* shift the source up to the next four bits */
03734     b <<= 4;
03735 
03736     /* ensure that digits are not clamped off */
03737     a->used += 1;
03738   }
03739   mp_clamp (a);
03740   return MP_OKAY;
03741 }
03742 
03743 
03744 #if defined(CYASSL_KEY_GEN) || defined(HAVE_ECC)
03745 
03746 /* c = a * a (mod b) */
03747 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
03748 {
03749   int     res;
03750   mp_int  t;
03751 
03752   if ((res = mp_init (&t)) != MP_OKAY) {
03753     return res;
03754   }
03755 
03756   if ((res = mp_sqr (a, &t)) != MP_OKAY) {
03757     mp_clear (&t);
03758     return res;
03759   }
03760   res = mp_mod (&t, b, c);
03761   mp_clear (&t);
03762   return res;
03763 }
03764 
03765 #endif
03766 
03767 
03768 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(CYASSL_SNIFFER) || defined(CYASSL_HAVE_WOLFSCEP)
03769 
03770 /* single digit addition */
03771 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
03772 {
03773   int     res, ix, oldused;
03774   mp_digit *tmpa, *tmpc, mu;
03775 
03776   /* grow c as required */
03777   if (c->alloc < a->used + 1) {
03778      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03779         return res;
03780      }
03781   }
03782 
03783   /* if a is negative and |a| >= b, call c = |a| - b */
03784   if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
03785      /* temporarily fix sign of a */
03786      a->sign = MP_ZPOS;
03787 
03788      /* c = |a| - b */
03789      res = mp_sub_d(a, b, c);
03790 
03791      /* fix sign  */
03792      a->sign = c->sign = MP_NEG;
03793 
03794      /* clamp */
03795      mp_clamp(c);
03796 
03797      return res;
03798   }
03799 
03800   /* old number of used digits in c */
03801   oldused = c->used;
03802 
03803   /* sign always positive */
03804   c->sign = MP_ZPOS;
03805 
03806   /* source alias */
03807   tmpa    = a->dp;
03808 
03809   /* destination alias */
03810   tmpc    = c->dp;
03811 
03812   /* if a is positive */
03813   if (a->sign == MP_ZPOS) {
03814      /* add digit, after this we're propagating
03815       * the carry.
03816       */
03817      *tmpc   = *tmpa++ + b;
03818      mu      = *tmpc >> DIGIT_BIT;
03819      *tmpc++ &= MP_MASK;
03820 
03821      /* now handle rest of the digits */
03822      for (ix = 1; ix < a->used; ix++) {
03823         *tmpc   = *tmpa++ + mu;
03824         mu      = *tmpc >> DIGIT_BIT;
03825         *tmpc++ &= MP_MASK;
03826      }
03827      /* set final carry */
03828      if (mu != 0 && ix < c->alloc) {
03829         ix++;
03830         *tmpc++  = mu;
03831      }
03832 
03833      /* setup size */
03834      c->used = a->used + 1;
03835   } else {
03836      /* a was negative and |a| < b */
03837      c->used  = 1;
03838 
03839      /* the result is a single digit */
03840      if (a->used == 1) {
03841         *tmpc++  =  b - a->dp[0];
03842      } else {
03843         *tmpc++  =  b;
03844      }
03845 
03846      /* setup count so the clearing of oldused
03847       * can fall through correctly
03848       */
03849      ix       = 1;
03850   }
03851 
03852   /* now zero to oldused */
03853   while (ix++ < oldused) {
03854      *tmpc++ = 0;
03855   }
03856   mp_clamp(c);
03857 
03858   return MP_OKAY;
03859 }
03860 
03861 
03862 /* single digit subtraction */
03863 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
03864 {
03865   mp_digit *tmpa, *tmpc, mu;
03866   int       res, ix, oldused;
03867 
03868   /* grow c as required */
03869   if (c->alloc < a->used + 1) {
03870      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03871         return res;
03872      }
03873   }
03874 
03875   /* if a is negative just do an unsigned
03876    * addition [with fudged signs]
03877    */
03878   if (a->sign == MP_NEG) {
03879      a->sign = MP_ZPOS;
03880      res     = mp_add_d(a, b, c);
03881      a->sign = c->sign = MP_NEG;
03882 
03883      /* clamp */
03884      mp_clamp(c);
03885 
03886      return res;
03887   }
03888 
03889   /* setup regs */
03890   oldused = c->used;
03891   tmpa    = a->dp;
03892   tmpc    = c->dp;
03893 
03894   /* if a <= b simply fix the single digit */
03895   if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
03896      if (a->used == 1) {
03897         *tmpc++ = b - *tmpa;
03898      } else {
03899         *tmpc++ = b;
03900      }
03901      ix      = 1;
03902 
03903      /* negative/1digit */
03904      c->sign = MP_NEG;
03905      c->used = 1;
03906   } else {
03907      /* positive/size */
03908      c->sign = MP_ZPOS;
03909      c->used = a->used;
03910 
03911      /* subtract first digit */
03912      *tmpc    = *tmpa++ - b;
03913      mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03914      *tmpc++ &= MP_MASK;
03915 
03916      /* handle rest of the digits */
03917      for (ix = 1; ix < a->used; ix++) {
03918         *tmpc    = *tmpa++ - mu;
03919         mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03920         *tmpc++ &= MP_MASK;
03921      }
03922   }
03923 
03924   /* zero excess digits */
03925   while (ix++ < oldused) {
03926      *tmpc++ = 0;
03927   }
03928   mp_clamp(c);
03929   return MP_OKAY;
03930 }
03931 
03932 #endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
03933 
03934 
03935 #ifdef CYASSL_KEY_GEN
03936 
03937 int mp_cnt_lsb(mp_int *a);
03938 
03939 static int s_is_power_of_two(mp_digit b, int *p)
03940 {
03941    int x;
03942 
03943    /* fast return if no power of two */
03944    if ((b==0) || (b & (b-1))) {
03945       return 0;
03946    }
03947 
03948    for (x = 0; x < DIGIT_BIT; x++) {
03949       if (b == (((mp_digit)1)<<x)) {
03950          *p = x;
03951          return 1;
03952       }
03953    }
03954    return 0;
03955 }
03956 
03957 /* single digit division (based on routine from MPI) */
03958 static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
03959 {
03960   mp_int  q;
03961   mp_word w;
03962   mp_digit t;
03963   int     res, ix;
03964 
03965   /* cannot divide by zero */
03966   if (b == 0) {
03967      return MP_VAL;
03968   }
03969 
03970   /* quick outs */
03971   if (b == 1 || mp_iszero(a) == 1) {
03972      if (d != NULL) {
03973         *d = 0;
03974      }
03975      if (c != NULL) {
03976         return mp_copy(a, c);
03977      }
03978      return MP_OKAY;
03979   }
03980 
03981   /* power of two ? */
03982   if (s_is_power_of_two(b, &ix) == 1) {
03983      if (d != NULL) {
03984         *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
03985      }
03986      if (c != NULL) {
03987         return mp_div_2d(a, ix, c, NULL);
03988      }
03989      return MP_OKAY;
03990   }
03991 
03992 #ifdef BN_MP_DIV_3_C
03993   /* three? */
03994   if (b == 3) {
03995      return mp_div_3(a, c, d);
03996   }
03997 #endif
03998 
03999   /* no easy answer [c'est la vie].  Just division */
04000   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
04001      return res;
04002   }
04003   
04004   q.used = a->used;
04005   q.sign = a->sign;
04006   w = 0;
04007   for (ix = a->used - 1; ix >= 0; ix--) {
04008      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
04009      
04010      if (w >= b) {
04011         t = (mp_digit)(w / b);
04012         w -= ((mp_word)t) * ((mp_word)b);
04013       } else {
04014         t = 0;
04015       }
04016       q.dp[ix] = (mp_digit)t;
04017   }
04018   
04019   if (d != NULL) {
04020      *d = (mp_digit)w;
04021   }
04022   
04023   if (c != NULL) {
04024      mp_clamp(&q);
04025      mp_exch(&q, c);
04026   }
04027   mp_clear(&q);
04028   
04029   return res;
04030 }
04031 
04032 
04033 static int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
04034 {
04035   return mp_div_d(a, b, NULL, c);
04036 }
04037 
04038 
04039 const mp_digit ltm_prime_tab[] = {
04040   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
04041   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
04042   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
04043   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
04044 #ifndef MP_8BIT
04045   0x0083,
04046   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
04047   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
04048   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
04049   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
04050 
04051   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
04052   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
04053   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
04054   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
04055   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
04056   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
04057   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
04058   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
04059 
04060   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
04061   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
04062   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
04063   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
04064   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
04065   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
04066   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
04067   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
04068 
04069   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
04070   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
04071   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
04072   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
04073   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
04074   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
04075   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
04076   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
04077 #endif
04078 };
04079 
04080 
04081 /* Miller-Rabin test of "a" to the base of "b" as described in 
04082  * HAC pp. 139 Algorithm 4.24
04083  *
04084  * Sets result to 0 if definitely composite or 1 if probably prime.
04085  * Randomly the chance of error is no more than 1/4 and often 
04086  * very much lower.
04087  */
04088 static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
04089 {
04090   mp_int  n1, y, r;
04091   int     s, j, err;
04092 
04093   /* default */
04094   *result = MP_NO;
04095 
04096   /* ensure b > 1 */
04097   if (mp_cmp_d(b, 1) != MP_GT) {
04098      return MP_VAL;
04099   }     
04100 
04101   /* get n1 = a - 1 */
04102   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
04103     return err;
04104   }
04105   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
04106     goto LBL_N1;
04107   }
04108 
04109   /* set 2**s * r = n1 */
04110   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
04111     goto LBL_N1;
04112   }
04113 
04114   /* count the number of least significant bits
04115    * which are zero
04116    */
04117   s = mp_cnt_lsb(&r);
04118 
04119   /* now divide n - 1 by 2**s */
04120   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
04121     goto LBL_R;
04122   }
04123 
04124   /* compute y = b**r mod a */
04125   if ((err = mp_init (&y)) != MP_OKAY) {
04126     goto LBL_R;
04127   }
04128   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
04129     goto LBL_Y;
04130   }
04131 
04132   /* if y != 1 and y != n1 do */
04133   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
04134     j = 1;
04135     /* while j <= s-1 and y != n1 */
04136     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
04137       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
04138          goto LBL_Y;
04139       }
04140 
04141       /* if y == 1 then composite */
04142       if (mp_cmp_d (&y, 1) == MP_EQ) {
04143          goto LBL_Y;
04144       }
04145 
04146       ++j;
04147     }
04148 
04149     /* if y != n1 then composite */
04150     if (mp_cmp (&y, &n1) != MP_EQ) {
04151       goto LBL_Y;
04152     }
04153   }
04154 
04155   /* probably prime now */
04156   *result = MP_YES;
04157 LBL_Y:mp_clear (&y);
04158 LBL_R:mp_clear (&r);
04159 LBL_N1:mp_clear (&n1);
04160   return err;
04161 }
04162 
04163 
04164 /* determines if an integers is divisible by one 
04165  * of the first PRIME_SIZE primes or not
04166  *
04167  * sets result to 0 if not, 1 if yes
04168  */
04169 static int mp_prime_is_divisible (mp_int * a, int *result)
04170 {
04171   int     err, ix;
04172   mp_digit res;
04173 
04174   /* default to not */
04175   *result = MP_NO;
04176 
04177   for (ix = 0; ix < PRIME_SIZE; ix++) {
04178     /* what is a mod LBL_prime_tab[ix] */
04179     if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
04180       return err;
04181     }
04182 
04183     /* is the residue zero? */
04184     if (res == 0) {
04185       *result = MP_YES;
04186       return MP_OKAY;
04187     }
04188   }
04189 
04190   return MP_OKAY;
04191 }
04192 
04193 
04194 /*
04195  * Sets result to 1 if probably prime, 0 otherwise
04196  */
04197 int mp_prime_is_prime (mp_int * a, int t, int *result)
04198 {
04199   mp_int  b;
04200   int     ix, err, res;
04201 
04202   /* default to no */
04203   *result = MP_NO;
04204 
04205   /* valid value of t? */
04206   if (t <= 0 || t > PRIME_SIZE) {
04207     return MP_VAL;
04208   }
04209 
04210   /* is the input equal to one of the primes in the table? */
04211   for (ix = 0; ix < PRIME_SIZE; ix++) {
04212       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
04213          *result = 1;
04214          return MP_OKAY;
04215       }
04216   }
04217 
04218   /* first perform trial division */
04219   if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
04220     return err;
04221   }
04222 
04223   /* return if it was trivially divisible */
04224   if (res == MP_YES) {
04225     return MP_OKAY;
04226   }
04227 
04228   /* now perform the miller-rabin rounds */
04229   if ((err = mp_init (&b)) != MP_OKAY) {
04230     return err;
04231   }
04232 
04233   for (ix = 0; ix < t; ix++) {
04234     /* set the prime */
04235     mp_set (&b, ltm_prime_tab[ix]);
04236 
04237     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
04238       goto LBL_B;
04239     }
04240 
04241     if (res == MP_NO) {
04242       goto LBL_B;
04243     }
04244   }
04245 
04246   /* passed the test */
04247   *result = MP_YES;
04248 LBL_B:mp_clear (&b);
04249   return err;
04250 }
04251 
04252 
04253 /* computes least common multiple as |a*b|/(a, b) */
04254 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
04255 {
04256   int     res;
04257   mp_int  t1, t2;
04258 
04259 
04260   if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
04261     return res;
04262   }
04263 
04264   /* t1 = get the GCD of the two inputs */
04265   if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
04266     goto LBL_T;
04267   }
04268 
04269   /* divide the smallest by the GCD */
04270   if (mp_cmp_mag(a, b) == MP_LT) {
04271      /* store quotient in t2 such that t2 * b is the LCM */
04272      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
04273         goto LBL_T;
04274      }
04275      res = mp_mul(b, &t2, c);
04276   } else {
04277      /* store quotient in t2 such that t2 * a is the LCM */
04278      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
04279         goto LBL_T;
04280      }
04281      res = mp_mul(a, &t2, c);
04282   }
04283 
04284   /* fix the sign to positive */
04285   c->sign = MP_ZPOS;
04286 
04287 LBL_T:
04288   mp_clear(&t1);
04289   mp_clear(&t2);
04290   return res;
04291 }
04292 
04293 
04294 static const int lnz[16] = { 
04295    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
04296 };
04297 
04298 /* Counts the number of lsbs which are zero before the first zero bit */
04299 int mp_cnt_lsb(mp_int *a)
04300 {
04301     int x;
04302     mp_digit q, qq;
04303 
04304     /* easy out */
04305     if (mp_iszero(a) == 1) {
04306         return 0;
04307     }
04308 
04309     /* scan lower digits until non-zero */
04310     for (x = 0; x < a->used && a->dp[x] == 0; x++);
04311     q = a->dp[x];
04312     x *= DIGIT_BIT;
04313 
04314     /* now scan this digit until a 1 is found */
04315     if ((q & 1) == 0) {
04316         do {
04317             qq  = q & 15;
04318             x  += lnz[qq];
04319             q >>= 4;
04320         } while (qq == 0);
04321     }
04322     return x;
04323 }
04324 
04325 
04326 /* Greatest Common Divisor using the binary method */
04327 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
04328 {
04329     mp_int  u, v;
04330     int     k, u_lsb, v_lsb, res;
04331 
04332     /* either zero than gcd is the largest */
04333     if (mp_iszero (a) == MP_YES) {
04334         return mp_abs (b, c);
04335     }
04336     if (mp_iszero (b) == MP_YES) {
04337         return mp_abs (a, c);
04338     }
04339 
04340     /* get copies of a and b we can modify */
04341     if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
04342         return res;
04343     }
04344 
04345     if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
04346         goto LBL_U;
04347     }
04348 
04349     /* must be positive for the remainder of the algorithm */
04350     u.sign = v.sign = MP_ZPOS;
04351 
04352     /* B1.  Find the common power of two for u and v */
04353     u_lsb = mp_cnt_lsb(&u);
04354     v_lsb = mp_cnt_lsb(&v);
04355     k     = MIN(u_lsb, v_lsb);
04356 
04357     if (k > 0) {
04358         /* divide the power of two out */
04359         if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
04360             goto LBL_V;
04361         }
04362 
04363         if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
04364             goto LBL_V;
04365         }
04366     }
04367 
04368     /* divide any remaining factors of two out */
04369     if (u_lsb != k) {
04370         if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
04371             goto LBL_V;
04372         }
04373     }
04374 
04375     if (v_lsb != k) {
04376         if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
04377             goto LBL_V;
04378         }
04379     }
04380 
04381     while (mp_iszero(&v) == 0) {
04382         /* make sure v is the largest */
04383         if (mp_cmp_mag(&u, &v) == MP_GT) {
04384             /* swap u and v to make sure v is >= u */
04385             mp_exch(&u, &v);
04386         }
04387      
04388         /* subtract smallest from largest */
04389         if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
04390             goto LBL_V;
04391         }
04392      
04393         /* Divide out all factors of two */
04394         if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
04395             goto LBL_V;
04396         } 
04397     } 
04398 
04399     /* multiply by 2**k which we divided out at the beginning */
04400     if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
04401         goto LBL_V;
04402     }
04403     c->sign = MP_ZPOS;
04404     res = MP_OKAY;
04405 LBL_V:mp_clear (&u);
04406 LBL_U:mp_clear (&v);
04407     return res;
04408 }
04409 
04410 
04411 
04412 #endif /* CYASSL_KEY_GEN */
04413 
04414 
04415 #ifdef HAVE_ECC
04416 
04417 /* chars used in radix conversions */
04418 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
04419 
04420 /* read a string [ASCII] in a given radix */
04421 int mp_read_radix (mp_int * a, const char *str, int radix)
04422 {
04423   int     y, res, neg;
04424   char    ch;
04425 
04426   /* zero the digit bignum */
04427   mp_zero(a);
04428 
04429   /* make sure the radix is ok */
04430   if (radix < 2 || radix > 64) {
04431     return MP_VAL;
04432   }
04433 
04434   /* if the leading digit is a 
04435    * minus set the sign to negative. 
04436    */
04437   if (*str == '-') {
04438     ++str;
04439     neg = MP_NEG;
04440   } else {
04441     neg = MP_ZPOS;
04442   }
04443 
04444   /* set the integer to the default of zero */
04445   mp_zero (a);
04446   
04447   /* process each digit of the string */
04448   while (*str) {
04449     /* if the radix < 36 the conversion is case insensitive
04450      * this allows numbers like 1AB and 1ab to represent the same  value
04451      * [e.g. in hex]
04452      */
04453     ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
04454     for (y = 0; y < 64; y++) {
04455       if (ch == mp_s_rmap[y]) {
04456          break;
04457       }
04458     }
04459 
04460     /* if the char was found in the map 
04461      * and is less than the given radix add it
04462      * to the number, otherwise exit the loop. 
04463      */
04464     if (y < radix) {
04465       if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
04466          return res;
04467       }
04468       if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
04469          return res;
04470       }
04471     } else {
04472       break;
04473     }
04474     ++str;
04475   }
04476   
04477   /* set the sign only if a != 0 */
04478   if (mp_iszero(a) != 1) {
04479      a->sign = neg;
04480   }
04481   return MP_OKAY;
04482 }
04483 
04484 #endif /* HAVE_ECC */
04485 
04486 #endif /* USE_FAST_MATH */
04487 
04488 #endif /* NO_BIG_INT */