CyaSSL is an SSL library for devices like mbed.

Dependents:   cyassl-client Sync

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers integer.c Source File

integer.c

00001 /* integer.c
00002  *
00003  * Copyright (C) 2006-2009 Sawtooth Consulting Ltd.
00004  *
00005  * This file is part of CyaSSL.
00006  *
00007  * CyaSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * CyaSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
00020  */
00021 
00022 /*
00023  * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
00024  * http://math.libtomcrypt.com
00025  */
00026 
00027 
00028 #ifndef USE_FAST_MATH
00029 
00030 #include "integer.h"
00031 
00032 
00033 /* handle up to 6 inits */
00034 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
00035                   mp_int* f)
00036 {
00037     int res = MP_OKAY;
00038 
00039     if (a && ((res = mp_init(a)) != MP_OKAY))
00040         return res;
00041 
00042     if (b && ((res = mp_init(b)) != MP_OKAY)) {
00043         mp_clear(a);
00044         return res;
00045     }
00046 
00047     if (c && ((res = mp_init(c)) != MP_OKAY)) {
00048         mp_clear(a); mp_clear(b);
00049         return res;
00050     }
00051 
00052     if (d && ((res = mp_init(d)) != MP_OKAY)) {
00053         mp_clear(a); mp_clear(b); mp_clear(c);
00054         return res;
00055     }
00056 
00057     if (e && ((res = mp_init(e)) != MP_OKAY)) {
00058         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
00059         return res;
00060     }
00061 
00062     if (f && ((res = mp_init(f)) != MP_OKAY)) {
00063         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
00064         return res;
00065     }
00066 
00067     return res;
00068 }
00069 
00070 
00071 /* init a new mp_int */
00072 int mp_init (mp_int * a)
00073 {
00074   int i;
00075 
00076   /* allocate memory required and clear it */
00077   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
00078                                       DYNAMIC_TYPE_BIGINT);
00079   if (a->dp == NULL) {
00080     return MP_MEM;
00081   }
00082 
00083   /* set the digits to zero */
00084   for (i = 0; i < MP_PREC; i++) {
00085       a->dp[i] = 0;
00086   }
00087 
00088   /* set the used to zero, allocated digits to the default precision
00089    * and sign to positive */
00090   a->used  = 0;
00091   a->alloc = MP_PREC;
00092   a->sign  = MP_ZPOS;
00093 
00094   return MP_OKAY;
00095 }
00096 
00097 
00098 /* clear one (frees)  */
00099 void
00100 mp_clear (mp_int * a)
00101 {
00102   int i;
00103 
00104   /* only do anything if a hasn't been freed previously */
00105   if (a->dp != NULL) {
00106     /* first zero the digits */
00107     for (i = 0; i < a->used; i++) {
00108         a->dp[i] = 0;
00109     }
00110 
00111     /* free ram */
00112     XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
00113 
00114     /* reset members to make debugging easier */
00115     a->dp    = NULL;
00116     a->alloc = a->used = 0;
00117     a->sign  = MP_ZPOS;
00118   }
00119 }
00120 
00121 
00122 /* get the size for an unsigned equivalent */
00123 int mp_unsigned_bin_size (mp_int * a)
00124 {
00125   int     size = mp_count_bits (a);
00126   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
00127 }
00128 
00129 
00130 /* returns the number of bits in an int */
00131 int
00132 mp_count_bits (mp_int * a)
00133 {
00134   int     r;
00135   mp_digit q;
00136 
00137   /* shortcut */
00138   if (a->used == 0) {
00139     return 0;
00140   }
00141 
00142   /* get number of digits and add that */
00143   r = (a->used - 1) * DIGIT_BIT;
00144   
00145   /* take the last digit and count the bits in it */
00146   q = a->dp[a->used - 1];
00147   while (q > ((mp_digit) 0)) {
00148     ++r;
00149     q >>= ((mp_digit) 1);
00150   }
00151   return r;
00152 }
00153 
00154 
00155 /* store in unsigned [big endian] format */
00156 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
00157 {
00158   int     x, res;
00159   mp_int  t;
00160 
00161   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
00162     return res;
00163   }
00164 
00165   x = 0;
00166   while (mp_iszero (&t) == 0) {
00167 #ifndef MP_8BIT
00168       b[x++] = (unsigned char) (t.dp[0] & 255);
00169 #else
00170       b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
00171 #endif
00172     if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
00173       mp_clear (&t);
00174       return res;
00175     }
00176   }
00177   bn_reverse (b, x);
00178   mp_clear (&t);
00179   return MP_OKAY;
00180 }
00181 
00182 
00183 /* creates "a" then copies b into it */
00184 int mp_init_copy (mp_int * a, mp_int * b)
00185 {
00186   int     res;
00187 
00188   if ((res = mp_init (a)) != MP_OKAY) {
00189     return res;
00190   }
00191   return mp_copy (b, a);
00192 }
00193 
00194 
00195 /* copy, b = a */
00196 int
00197 mp_copy (mp_int * a, mp_int * b)
00198 {
00199   int     res, n;
00200 
00201   /* if dst == src do nothing */
00202   if (a == b) {
00203     return MP_OKAY;
00204   }
00205 
00206   /* grow dest */
00207   if (b->alloc < a->used) {
00208      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
00209         return res;
00210      }
00211   }
00212 
00213   /* zero b and copy the parameters over */
00214   {
00215     register mp_digit *tmpa, *tmpb;
00216 
00217     /* pointer aliases */
00218 
00219     /* source */
00220     tmpa = a->dp;
00221 
00222     /* destination */
00223     tmpb = b->dp;
00224 
00225     /* copy all the digits */
00226     for (n = 0; n < a->used; n++) {
00227       *tmpb++ = *tmpa++;
00228     }
00229 
00230     /* clear high digits */
00231     for (; n < b->used; n++) {
00232       *tmpb++ = 0;
00233     }
00234   }
00235 
00236   /* copy used count and sign */
00237   b->used = a->used;
00238   b->sign = a->sign;
00239   return MP_OKAY;
00240 }
00241 
00242 
00243 /* grow as required */
00244 int mp_grow (mp_int * a, int size)
00245 {
00246   int     i;
00247   mp_digit *tmp;
00248 
00249   /* if the alloc size is smaller alloc more ram */
00250   if (a->alloc < size) {
00251     /* ensure there are always at least MP_PREC digits extra on top */
00252     size += (MP_PREC * 2) - (size % MP_PREC);
00253 
00254     /* reallocate the array a->dp
00255      *
00256      * We store the return in a temporary variable
00257      * in case the operation failed we don't want
00258      * to overwrite the dp member of a.
00259      */
00260     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
00261                                        DYNAMIC_TYPE_BIGINT);
00262     if (tmp == NULL) {
00263       /* reallocation failed but "a" is still valid [can be freed] */
00264       return MP_MEM;
00265     }
00266 
00267     /* reallocation succeeded so set a->dp */
00268     a->dp = tmp;
00269 
00270     /* zero excess digits */
00271     i        = a->alloc;
00272     a->alloc = size;
00273     for (; i < a->alloc; i++) {
00274       a->dp[i] = 0;
00275     }
00276   }
00277   return MP_OKAY;
00278 }
00279 
00280 
00281 /* reverse an array, used for radix code */
00282 void
00283 bn_reverse (unsigned char *s, int len)
00284 {
00285   int     ix, iy;
00286   unsigned char t;
00287 
00288   ix = 0;
00289   iy = len - 1;
00290   while (ix < iy) {
00291     t     = s[ix];
00292     s[ix] = s[iy];
00293     s[iy] = t;
00294     ++ix;
00295     --iy;
00296   }
00297 }
00298 
00299 
00300 /* shift right by a certain bit count (store quotient in c, optional
00301    remainder in d) */
00302 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
00303 {
00304   mp_digit D, r, rr;
00305   int     x, res;
00306   mp_int  t;
00307 
00308 
00309   /* if the shift count is <= 0 then we do no work */
00310   if (b <= 0) {
00311     res = mp_copy (a, c);
00312     if (d != NULL) {
00313       mp_zero (d);
00314     }
00315     return res;
00316   }
00317 
00318   if ((res = mp_init (&t)) != MP_OKAY) {
00319     return res;
00320   }
00321 
00322   /* get the remainder */
00323   if (d != NULL) {
00324     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
00325       mp_clear (&t);
00326       return res;
00327     }
00328   }
00329 
00330   /* copy */
00331   if ((res = mp_copy (a, c)) != MP_OKAY) {
00332     mp_clear (&t);
00333     return res;
00334   }
00335 
00336   /* shift by as many digits in the bit count */
00337   if (b >= (int)DIGIT_BIT) {
00338     mp_rshd (c, b / DIGIT_BIT);
00339   }
00340 
00341   /* shift any bit count < DIGIT_BIT */
00342   D = (mp_digit) (b % DIGIT_BIT);
00343   if (D != 0) {
00344     register mp_digit *tmpc, mask, shift;
00345 
00346     /* mask */
00347     mask = (((mp_digit)1) << D) - 1;
00348 
00349     /* shift for lsb */
00350     shift = DIGIT_BIT - D;
00351 
00352     /* alias */
00353     tmpc = c->dp + (c->used - 1);
00354 
00355     /* carry */
00356     r = 0;
00357     for (x = c->used - 1; x >= 0; x--) {
00358       /* get the lower  bits of this word in a temp */
00359       rr = *tmpc & mask;
00360 
00361       /* shift the current word and mix in the carry bits from the previous
00362          word */
00363       *tmpc = (*tmpc >> D) | (r << shift);
00364       --tmpc;
00365 
00366       /* set the carry to the carry bits of the current word found above */
00367       r = rr;
00368     }
00369   }
00370   mp_clamp (c);
00371   if (d != NULL) {
00372     mp_exch (&t, d);
00373   }
00374   mp_clear (&t);
00375   return MP_OKAY;
00376 }
00377 
00378 
00379 /* set to zero */
00380 void mp_zero (mp_int * a)
00381 {
00382   int       n;
00383   mp_digit *tmp;
00384 
00385   a->sign = MP_ZPOS;
00386   a->used = 0;
00387 
00388   tmp = a->dp;
00389   for (n = 0; n < a->alloc; n++) {
00390      *tmp++ = 0;
00391   }
00392 }
00393 
00394 
00395 /* trim unused digits 
00396  *
00397  * This is used to ensure that leading zero digits are
00398  * trimed and the leading "used" digit will be non-zero
00399  * Typically very fast.  Also fixes the sign if there
00400  * are no more leading digits
00401  */
00402 void
00403 mp_clamp (mp_int * a)
00404 {
00405   /* decrease used while the most significant digit is
00406    * zero.
00407    */
00408   while (a->used > 0 && a->dp[a->used - 1] == 0) {
00409     --(a->used);
00410   }
00411 
00412   /* reset the sign flag if used == 0 */
00413   if (a->used == 0) {
00414     a->sign = MP_ZPOS;
00415   }
00416 }
00417 
00418 
00419 /* swap the elements of two integers, for cases where you can't simply swap the 
00420  * mp_int pointers around
00421  */
00422 void
00423 mp_exch (mp_int * a, mp_int * b)
00424 {
00425   mp_int  t;
00426 
00427   t  = *a;
00428   *a = *b;
00429   *b = t;
00430 }
00431 
00432 
00433 /* shift right a certain amount of digits */
00434 void mp_rshd (mp_int * a, int b)
00435 {
00436   int     x;
00437 
00438   /* if b <= 0 then ignore it */
00439   if (b <= 0) {
00440     return;
00441   }
00442 
00443   /* if b > used then simply zero it and return */
00444   if (a->used <= b) {
00445     mp_zero (a);
00446     return;
00447   }
00448 
00449   {
00450     register mp_digit *bottom, *top;
00451 
00452     /* shift the digits down */
00453 
00454     /* bottom */
00455     bottom = a->dp;
00456 
00457     /* top [offset into digits] */
00458     top = a->dp + b;
00459 
00460     /* this is implemented as a sliding window where 
00461      * the window is b-digits long and digits from 
00462      * the top of the window are copied to the bottom
00463      *
00464      * e.g.
00465 
00466      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
00467                  /\                   |      ---->
00468                   \-------------------/      ---->
00469      */
00470     for (x = 0; x < (a->used - b); x++) {
00471       *bottom++ = *top++;
00472     }
00473 
00474     /* zero the top digits */
00475     for (; x < a->used; x++) {
00476       *bottom++ = 0;
00477     }
00478   }
00479   
00480   /* remove excess digits */
00481   a->used -= b;
00482 }
00483 
00484 
00485 /* calc a value mod 2**b */
00486 int
00487 mp_mod_2d (mp_int * a, int b, mp_int * c)
00488 {
00489   int     x, res;
00490 
00491   /* if b is <= 0 then zero the int */
00492   if (b <= 0) {
00493     mp_zero (c);
00494     return MP_OKAY;
00495   }
00496 
00497   /* if the modulus is larger than the value than return */
00498   if (b >= (int) (a->used * DIGIT_BIT)) {
00499     res = mp_copy (a, c);
00500     return res;
00501   }
00502 
00503   /* copy */
00504   if ((res = mp_copy (a, c)) != MP_OKAY) {
00505     return res;
00506   }
00507 
00508   /* zero digits above the last digit of the modulus */
00509   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00510     c->dp[x] = 0;
00511   }
00512   /* clear the digit that is not completely outside/inside the modulus */
00513   c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
00514               (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
00515   mp_clamp (c);
00516   return MP_OKAY;
00517 }
00518 
00519 
00520 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
00521 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
00522 {
00523   int     res;
00524 
00525   /* make sure there are at least two digits */
00526   if (a->alloc < 2) {
00527      if ((res = mp_grow(a, 2)) != MP_OKAY) {
00528         return res;
00529      }
00530   }
00531 
00532   /* zero the int */
00533   mp_zero (a);
00534 
00535   /* read the bytes in */
00536   while (c-- > 0) {
00537     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
00538       return res;
00539     }
00540 
00541 #ifndef MP_8BIT
00542       a->dp[0] |= *b++;
00543       a->used += 1;
00544 #else
00545       a->dp[0] = (*b & MP_MASK);
00546       a->dp[1] |= ((*b++ >> 7U) & 1);
00547       a->used += 2;
00548 #endif
00549   }
00550   mp_clamp (a);
00551   return MP_OKAY;
00552 }
00553 
00554 
00555 /* shift left by a certain bit count */
00556 int mp_mul_2d (mp_int * a, int b, mp_int * c)
00557 {
00558   mp_digit d;
00559   int      res;
00560 
00561   /* copy */
00562   if (a != c) {
00563      if ((res = mp_copy (a, c)) != MP_OKAY) {
00564        return res;
00565      }
00566   }
00567 
00568   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
00569      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
00570        return res;
00571      }
00572   }
00573 
00574   /* shift by as many digits in the bit count */
00575   if (b >= (int)DIGIT_BIT) {
00576     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
00577       return res;
00578     }
00579   }
00580 
00581   /* shift any bit count < DIGIT_BIT */
00582   d = (mp_digit) (b % DIGIT_BIT);
00583   if (d != 0) {
00584     register mp_digit *tmpc, shift, mask, r, rr;
00585     register int x;
00586 
00587     /* bitmask for carries */
00588     mask = (((mp_digit)1) << d) - 1;
00589 
00590     /* shift for msbs */
00591     shift = DIGIT_BIT - d;
00592 
00593     /* alias */
00594     tmpc = c->dp;
00595 
00596     /* carry */
00597     r    = 0;
00598     for (x = 0; x < c->used; x++) {
00599       /* get the higher bits of the current word */
00600       rr = (*tmpc >> shift) & mask;
00601 
00602       /* shift the current word and OR in the carry */
00603       *tmpc = ((*tmpc << d) | r) & MP_MASK;
00604       ++tmpc;
00605 
00606       /* set the carry to the carry bits of the current word */
00607       r = rr;
00608     }
00609     
00610     /* set final carry */
00611     if (r != 0) {
00612        c->dp[(c->used)++] = r;
00613     }
00614   }
00615   mp_clamp (c);
00616   return MP_OKAY;
00617 }
00618 
00619 
00620 /* shift left a certain amount of digits */
00621 int mp_lshd (mp_int * a, int b)
00622 {
00623   int     x, res;
00624 
00625   /* if its less than zero return */
00626   if (b <= 0) {
00627     return MP_OKAY;
00628   }
00629 
00630   /* grow to fit the new digits */
00631   if (a->alloc < a->used + b) {
00632      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
00633        return res;
00634      }
00635   }
00636 
00637   {
00638     register mp_digit *top, *bottom;
00639 
00640     /* increment the used by the shift amount then copy upwards */
00641     a->used += b;
00642 
00643     /* top */
00644     top = a->dp + a->used - 1;
00645 
00646     /* base */
00647     bottom = a->dp + a->used - 1 - b;
00648 
00649     /* much like mp_rshd this is implemented using a sliding window
00650      * except the window goes the otherway around.  Copying from
00651      * the bottom to the top.  see bn_mp_rshd.c for more info.
00652      */
00653     for (x = a->used - 1; x >= b; x--) {
00654       *top-- = *bottom--;
00655     }
00656 
00657     /* zero the lower digits */
00658     top = a->dp;
00659     for (x = 0; x < b; x++) {
00660       *top++ = 0;
00661     }
00662   }
00663   return MP_OKAY;
00664 }
00665 
00666 
00667 /* this is a shell function that calls either the normal or Montgomery
00668  * exptmod functions.  Originally the call to the montgomery code was
00669  * embedded in the normal function but that wasted alot of stack space
00670  * for nothing (since 99% of the time the Montgomery code would be called)
00671  */
00672 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00673 {
00674   int dr;
00675 
00676   /* modulus P must be positive */
00677   if (P->sign == MP_NEG) {
00678      return MP_VAL;
00679   }
00680 
00681   /* if exponent X is negative we have to recurse */
00682   if (X->sign == MP_NEG) {
00683 #ifdef BN_MP_INVMOD_C
00684      mp_int tmpG, tmpX;
00685      int err;
00686 
00687      /* first compute 1/G mod P */
00688      if ((err = mp_init(&tmpG)) != MP_OKAY) {
00689         return err;
00690      }
00691      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
00692         mp_clear(&tmpG);
00693         return err;
00694      }
00695 
00696      /* now get |X| */
00697      if ((err = mp_init(&tmpX)) != MP_OKAY) {
00698         mp_clear(&tmpG);
00699         return err;
00700      }
00701      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
00702         mp_clear(&tmpG);
00703         mp_clear(&tmpX);
00704         return err;
00705      }
00706 
00707      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
00708      err = mp_exptmod(&tmpG, &tmpX, P, Y);
00709      mp_clear(&tmpG);
00710      mp_clear(&tmpX);
00711      return err;
00712 #else 
00713      /* no invmod */
00714      return MP_VAL;
00715 #endif
00716   }
00717 
00718 /* modified diminished radix reduction */
00719 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
00720   defined(BN_S_MP_EXPTMOD_C)
00721   if (mp_reduce_is_2k_l(P) == MP_YES) {
00722      return s_mp_exptmod(G, X, P, Y, 1);
00723   }
00724 #endif
00725 
00726 #ifdef BN_MP_DR_IS_MODULUS_C
00727   /* is it a DR modulus? */
00728   dr = mp_dr_is_modulus(P);
00729 #else
00730   /* default to no */
00731   dr = 0;
00732 #endif
00733 
00734 #ifdef BN_MP_REDUCE_IS_2K_C
00735   /* if not, is it a unrestricted DR modulus? */
00736   if (dr == 0) {
00737      dr = mp_reduce_is_2k(P) << 1;
00738   }
00739 #endif
00740     
00741   /* if the modulus is odd or dr != 0 use the montgomery method */
00742 #ifdef BN_MP_EXPTMOD_FAST_C
00743   if (mp_isodd (P) == 1 || dr !=  0) {
00744     return mp_exptmod_fast (G, X, P, Y, dr);
00745   } else {
00746 #endif
00747 #ifdef BN_S_MP_EXPTMOD_C
00748     /* otherwise use the generic Barrett reduction technique */
00749     return s_mp_exptmod (G, X, P, Y, 0);
00750 #else
00751     /* no exptmod for evens */
00752     return MP_VAL;
00753 #endif
00754 #ifdef BN_MP_EXPTMOD_FAST_C
00755   }
00756 #endif
00757 }
00758 
00759 
00760 /* b = |a| 
00761  *
00762  * Simple function copies the input and fixes the sign to positive
00763  */
00764 int
00765 mp_abs (mp_int * a, mp_int * b)
00766 {
00767   int     res;
00768 
00769   /* copy a to b */
00770   if (a != b) {
00771      if ((res = mp_copy (a, b)) != MP_OKAY) {
00772        return res;
00773      }
00774   }
00775 
00776   /* force the sign of b to positive */
00777   b->sign = MP_ZPOS;
00778 
00779   return MP_OKAY;
00780 }
00781 
00782 
00783 /* hac 14.61, pp608 */
00784 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00785 {
00786   /* b cannot be negative */
00787   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00788     return MP_VAL;
00789   }
00790 
00791 #ifdef BN_FAST_MP_INVMOD_C
00792   /* if the modulus is odd we can use a faster routine instead */
00793   if (mp_isodd (b) == 1) {
00794     return fast_mp_invmod (a, b, c);
00795   }
00796 #endif
00797 
00798 #ifdef BN_MP_INVMOD_SLOW_C
00799   return mp_invmod_slow(a, b, c);
00800 #endif
00801 }
00802 
00803 
00804 /* computes the modular inverse via binary extended euclidean algorithm, 
00805  * that is c = 1/a mod b 
00806  *
00807  * Based on slow invmod except this is optimized for the case where b is 
00808  * odd as per HAC Note 14.64 on pp. 610
00809  */
00810 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00811 {
00812   mp_int  x, y, u, v, B, D;
00813   int     res, neg;
00814 
00815   /* 2. [modified] b must be odd   */
00816   if (mp_iseven (b) == 1) {
00817     return MP_VAL;
00818   }
00819 
00820   /* init all our temps */
00821   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
00822      return res;
00823   }
00824 
00825   /* x == modulus, y == value to invert */
00826   if ((res = mp_copy (b, &x)) != MP_OKAY) {
00827     goto LBL_ERR;
00828   }
00829 
00830   /* we need y = |a| */
00831   if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
00832     goto LBL_ERR;
00833   }
00834 
00835   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00836   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00837     goto LBL_ERR;
00838   }
00839   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00840     goto LBL_ERR;
00841   }
00842   mp_set (&D, 1);
00843 
00844 top:
00845   /* 4.  while u is even do */
00846   while (mp_iseven (&u) == 1) {
00847     /* 4.1 u = u/2 */
00848     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00849       goto LBL_ERR;
00850     }
00851     /* 4.2 if B is odd then */
00852     if (mp_isodd (&B) == 1) {
00853       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
00854         goto LBL_ERR;
00855       }
00856     }
00857     /* B = B/2 */
00858     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
00859       goto LBL_ERR;
00860     }
00861   }
00862 
00863   /* 5.  while v is even do */
00864   while (mp_iseven (&v) == 1) {
00865     /* 5.1 v = v/2 */
00866     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
00867       goto LBL_ERR;
00868     }
00869     /* 5.2 if D is odd then */
00870     if (mp_isodd (&D) == 1) {
00871       /* D = (D-x)/2 */
00872       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
00873         goto LBL_ERR;
00874       }
00875     }
00876     /* D = D/2 */
00877     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
00878       goto LBL_ERR;
00879     }
00880   }
00881 
00882   /* 6.  if u >= v then */
00883   if (mp_cmp (&u, &v) != MP_LT) {
00884     /* u = u - v, B = B - D */
00885     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
00886       goto LBL_ERR;
00887     }
00888 
00889     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
00890       goto LBL_ERR;
00891     }
00892   } else {
00893     /* v - v - u, D = D - B */
00894     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
00895       goto LBL_ERR;
00896     }
00897 
00898     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
00899       goto LBL_ERR;
00900     }
00901   }
00902 
00903   /* if not zero goto step 4 */
00904   if (mp_iszero (&u) == 0) {
00905     goto top;
00906   }
00907 
00908   /* now a = C, b = D, gcd == g*v */
00909 
00910   /* if v != 1 then there is no inverse */
00911   if (mp_cmp_d (&v, 1) != MP_EQ) {
00912     res = MP_VAL;
00913     goto LBL_ERR;
00914   }
00915 
00916   /* b is now the inverse */
00917   neg = a->sign;
00918   while (D.sign == MP_NEG) {
00919     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
00920       goto LBL_ERR;
00921     }
00922   }
00923   mp_exch (&D, c);
00924   c->sign = neg;
00925   res = MP_OKAY;
00926 
00927 LBL_ERR:mp_clear(&x);
00928         mp_clear(&y);
00929         mp_clear(&u);
00930         mp_clear(&v);
00931         mp_clear(&B);
00932         mp_clear(&D);
00933   return res;
00934 }
00935 
00936 
00937 /* hac 14.61, pp608 */
00938 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
00939 {
00940   mp_int  x, y, u, v, A, B, C, D;
00941   int     res;
00942 
00943   /* b cannot be negative */
00944   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00945     return MP_VAL;
00946   }
00947 
00948   /* init temps */
00949   if ((res = mp_init_multi(&x, &y, &u, &v, 
00950                            &A, &B)) != MP_OKAY) {
00951      return res;
00952   }
00953 
00954   /* init rest of tmps temps */
00955   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
00956      return res;
00957   }
00958 
00959   /* x = a, y = b */
00960   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
00961       goto LBL_ERR;
00962   }
00963   if ((res = mp_copy (b, &y)) != MP_OKAY) {
00964     goto LBL_ERR;
00965   }
00966 
00967   /* 2. [modified] if x,y are both even then return an error! */
00968   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
00969     res = MP_VAL;
00970     goto LBL_ERR;
00971   }
00972 
00973   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00974   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00975     goto LBL_ERR;
00976   }
00977   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00978     goto LBL_ERR;
00979   }
00980   mp_set (&A, 1);
00981   mp_set (&D, 1);
00982 
00983 top:
00984   /* 4.  while u is even do */
00985   while (mp_iseven (&u) == 1) {
00986     /* 4.1 u = u/2 */
00987     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00988       goto LBL_ERR;
00989     }
00990     /* 4.2 if A or B is odd then */
00991     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
00992       /* A = (A+y)/2, B = (B-x)/2 */
00993       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
00994          goto LBL_ERR;
00995       }
00996       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
00997          goto LBL_ERR;
00998       }
00999     }
01000     /* A = A/2, B = B/2 */
01001     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
01002       goto LBL_ERR;
01003     }
01004     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01005       goto LBL_ERR;
01006     }
01007   }
01008 
01009   /* 5.  while v is even do */
01010   while (mp_iseven (&v) == 1) {
01011     /* 5.1 v = v/2 */
01012     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01013       goto LBL_ERR;
01014     }
01015     /* 5.2 if C or D is odd then */
01016     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
01017       /* C = (C+y)/2, D = (D-x)/2 */
01018       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
01019          goto LBL_ERR;
01020       }
01021       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01022          goto LBL_ERR;
01023       }
01024     }
01025     /* C = C/2, D = D/2 */
01026     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
01027       goto LBL_ERR;
01028     }
01029     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01030       goto LBL_ERR;
01031     }
01032   }
01033 
01034   /* 6.  if u >= v then */
01035   if (mp_cmp (&u, &v) != MP_LT) {
01036     /* u = u - v, A = A - C, B = B - D */
01037     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01038       goto LBL_ERR;
01039     }
01040 
01041     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
01042       goto LBL_ERR;
01043     }
01044 
01045     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01046       goto LBL_ERR;
01047     }
01048   } else {
01049     /* v - v - u, C = C - A, D = D - B */
01050     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01051       goto LBL_ERR;
01052     }
01053 
01054     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
01055       goto LBL_ERR;
01056     }
01057 
01058     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01059       goto LBL_ERR;
01060     }
01061   }
01062 
01063   /* if not zero goto step 4 */
01064   if (mp_iszero (&u) == 0)
01065     goto top;
01066 
01067   /* now a = C, b = D, gcd == g*v */
01068 
01069   /* if v != 1 then there is no inverse */
01070   if (mp_cmp_d (&v, 1) != MP_EQ) {
01071     res = MP_VAL;
01072     goto LBL_ERR;
01073   }
01074 
01075   /* if its too low */
01076   while (mp_cmp_d(&C, 0) == MP_LT) {
01077       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
01078          goto LBL_ERR;
01079       }
01080   }
01081   
01082   /* too big */
01083   while (mp_cmp_mag(&C, b) != MP_LT) {
01084       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
01085          goto LBL_ERR;
01086       }
01087   }
01088   
01089   /* C is now the inverse */
01090   mp_exch (&C, c);
01091   res = MP_OKAY;
01092 LBL_ERR:mp_clear(&x);
01093         mp_clear(&y);
01094         mp_clear(&u);
01095         mp_clear(&v);
01096         mp_clear(&A);
01097         mp_clear(&B);
01098         mp_clear(&C);
01099         mp_clear(&D);
01100   return res;
01101 }
01102 
01103 
01104 /* compare maginitude of two ints (unsigned) */
01105 int mp_cmp_mag (mp_int * a, mp_int * b)
01106 {
01107   int     n;
01108   mp_digit *tmpa, *tmpb;
01109 
01110   /* compare based on # of non-zero digits */
01111   if (a->used > b->used) {
01112     return MP_GT;
01113   }
01114   
01115   if (a->used < b->used) {
01116     return MP_LT;
01117   }
01118 
01119   /* alias for a */
01120   tmpa = a->dp + (a->used - 1);
01121 
01122   /* alias for b */
01123   tmpb = b->dp + (a->used - 1);
01124 
01125   /* compare based on digits  */
01126   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
01127     if (*tmpa > *tmpb) {
01128       return MP_GT;
01129     }
01130 
01131     if (*tmpa < *tmpb) {
01132       return MP_LT;
01133     }
01134   }
01135   return MP_EQ;
01136 }
01137 
01138 
01139 /* compare two ints (signed)*/
01140 int
01141 mp_cmp (mp_int * a, mp_int * b)
01142 {
01143   /* compare based on sign */
01144   if (a->sign != b->sign) {
01145      if (a->sign == MP_NEG) {
01146         return MP_LT;
01147      } else {
01148         return MP_GT;
01149      }
01150   }
01151   
01152   /* compare digits */
01153   if (a->sign == MP_NEG) {
01154      /* if negative compare opposite direction */
01155      return mp_cmp_mag(b, a);
01156   } else {
01157      return mp_cmp_mag(a, b);
01158   }
01159 }
01160 
01161 
01162 /* compare a digit */
01163 int mp_cmp_d(mp_int * a, mp_digit b)
01164 {
01165   /* compare based on sign */
01166   if (a->sign == MP_NEG) {
01167     return MP_LT;
01168   }
01169 
01170   /* compare based on magnitude */
01171   if (a->used > 1) {
01172     return MP_GT;
01173   }
01174 
01175   /* compare the only digit of a to b */
01176   if (a->dp[0] > b) {
01177     return MP_GT;
01178   } else if (a->dp[0] < b) {
01179     return MP_LT;
01180   } else {
01181     return MP_EQ;
01182   }
01183 }
01184 
01185 
01186 /* set to a digit */
01187 void mp_set (mp_int * a, mp_digit b)
01188 {
01189   mp_zero (a);
01190   a->dp[0] = b & MP_MASK;
01191   a->used  = (a->dp[0] != 0) ? 1 : 0;
01192 }
01193 
01194 
01195 /* c = a mod b, 0 <= c < b */
01196 int
01197 mp_mod (mp_int * a, mp_int * b, mp_int * c)
01198 {
01199   mp_int  t;
01200   int     res;
01201 
01202   if ((res = mp_init (&t)) != MP_OKAY) {
01203     return res;
01204   }
01205 
01206   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
01207     mp_clear (&t);
01208     return res;
01209   }
01210 
01211   if (t.sign != b->sign) {
01212     res = mp_add (b, &t, c);
01213   } else {
01214     res = MP_OKAY;
01215     mp_exch (&t, c);
01216   }
01217 
01218   mp_clear (&t);
01219   return res;
01220 }
01221 
01222 
01223 /* slower bit-bang division... also smaller */
01224 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01225 {
01226    mp_int ta, tb, tq, q;
01227    int    res, n, n2;
01228 
01229   /* is divisor zero ? */
01230   if (mp_iszero (b) == 1) {
01231     return MP_VAL;
01232   }
01233 
01234   /* if a < b then q=0, r = a */
01235   if (mp_cmp_mag (a, b) == MP_LT) {
01236     if (d != NULL) {
01237       res = mp_copy (a, d);
01238     } else {
01239       res = MP_OKAY;
01240     }
01241     if (c != NULL) {
01242       mp_zero (c);
01243     }
01244     return res;
01245   }
01246     
01247   /* init our temps */
01248   if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0) != MP_OKAY)) {
01249      return res;
01250   }
01251 
01252 
01253   mp_set(&tq, 1);
01254   n = mp_count_bits(a) - mp_count_bits(b);
01255   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
01256       ((res = mp_abs(b, &tb)) != MP_OKAY) || 
01257       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
01258       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
01259       goto LBL_ERR;
01260   }
01261 
01262   while (n-- >= 0) {
01263      if (mp_cmp(&tb, &ta) != MP_GT) {
01264         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
01265             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
01266            goto LBL_ERR;
01267         }
01268      }
01269      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
01270          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
01271            goto LBL_ERR;
01272      }
01273   }
01274 
01275   /* now q == quotient and ta == remainder */
01276   n  = a->sign;
01277   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
01278   if (c != NULL) {
01279      mp_exch(c, &q);
01280      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
01281   }
01282   if (d != NULL) {
01283      mp_exch(d, &ta);
01284      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
01285   }
01286 LBL_ERR:
01287    mp_clear(&ta);
01288    mp_clear(&tb);
01289    mp_clear(&tq);
01290    mp_clear(&q);
01291    return res;
01292 }
01293 
01294 
01295 /* b = a/2 */
01296 int mp_div_2(mp_int * a, mp_int * b)
01297 {
01298   int     x, res, oldused;
01299 
01300   /* copy */
01301   if (b->alloc < a->used) {
01302     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01303       return res;
01304     }
01305   }
01306 
01307   oldused = b->used;
01308   b->used = a->used;
01309   {
01310     register mp_digit r, rr, *tmpa, *tmpb;
01311 
01312     /* source alias */
01313     tmpa = a->dp + b->used - 1;
01314 
01315     /* dest alias */
01316     tmpb = b->dp + b->used - 1;
01317 
01318     /* carry */
01319     r = 0;
01320     for (x = b->used - 1; x >= 0; x--) {
01321       /* get the carry for the next iteration */
01322       rr = *tmpa & 1;
01323 
01324       /* shift the current digit, add in carry and store */
01325       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
01326 
01327       /* forward carry to next iteration */
01328       r = rr;
01329     }
01330 
01331     /* zero excess digits */
01332     tmpb = b->dp + b->used;
01333     for (x = b->used; x < oldused; x++) {
01334       *tmpb++ = 0;
01335     }
01336   }
01337   b->sign = a->sign;
01338   mp_clamp (b);
01339   return MP_OKAY;
01340 }
01341 
01342 
01343 /* high level addition (handles signs) */
01344 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01345 {
01346   int     sa, sb, res;
01347 
01348   /* get sign of both inputs */
01349   sa = a->sign;
01350   sb = b->sign;
01351 
01352   /* handle two cases, not four */
01353   if (sa == sb) {
01354     /* both positive or both negative */
01355     /* add their magnitudes, copy the sign */
01356     c->sign = sa;
01357     res = s_mp_add (a, b, c);
01358   } else {
01359     /* one positive, the other negative */
01360     /* subtract the one with the greater magnitude from */
01361     /* the one of the lesser magnitude.  The result gets */
01362     /* the sign of the one with the greater magnitude. */
01363     if (mp_cmp_mag (a, b) == MP_LT) {
01364       c->sign = sb;
01365       res = s_mp_sub (b, a, c);
01366     } else {
01367       c->sign = sa;
01368       res = s_mp_sub (a, b, c);
01369     }
01370   }
01371   return res;
01372 }
01373 
01374 
01375 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
01376 int
01377 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
01378 {
01379   mp_int *x;
01380   int     olduse, res, min, max;
01381 
01382   /* find sizes, we let |a| <= |b| which means we have to sort
01383    * them.  "x" will point to the input with the most digits
01384    */
01385   if (a->used > b->used) {
01386     min = b->used;
01387     max = a->used;
01388     x = a;
01389   } else {
01390     min = a->used;
01391     max = b->used;
01392     x = b;
01393   }
01394 
01395   /* init result */
01396   if (c->alloc < max + 1) {
01397     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
01398       return res;
01399     }
01400   }
01401 
01402   /* get old used digit count and set new one */
01403   olduse = c->used;
01404   c->used = max + 1;
01405 
01406   {
01407     register mp_digit u, *tmpa, *tmpb, *tmpc;
01408     register int i;
01409 
01410     /* alias for digit pointers */
01411 
01412     /* first input */
01413     tmpa = a->dp;
01414 
01415     /* second input */
01416     tmpb = b->dp;
01417 
01418     /* destination */
01419     tmpc = c->dp;
01420 
01421     /* zero the carry */
01422     u = 0;
01423     for (i = 0; i < min; i++) {
01424       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
01425       *tmpc = *tmpa++ + *tmpb++ + u;
01426 
01427       /* U = carry bit of T[i] */
01428       u = *tmpc >> ((mp_digit)DIGIT_BIT);
01429 
01430       /* take away carry bit from T[i] */
01431       *tmpc++ &= MP_MASK;
01432     }
01433 
01434     /* now copy higher words if any, that is in A+B 
01435      * if A or B has more digits add those in 
01436      */
01437     if (min != max) {
01438       for (; i < max; i++) {
01439         /* T[i] = X[i] + U */
01440         *tmpc = x->dp[i] + u;
01441 
01442         /* U = carry bit of T[i] */
01443         u = *tmpc >> ((mp_digit)DIGIT_BIT);
01444 
01445         /* take away carry bit from T[i] */
01446         *tmpc++ &= MP_MASK;
01447       }
01448     }
01449 
01450     /* add carry */
01451     *tmpc++ = u;
01452 
01453     /* clear digits above oldused */
01454     for (i = c->used; i < olduse; i++) {
01455       *tmpc++ = 0;
01456     }
01457   }
01458 
01459   mp_clamp (c);
01460   return MP_OKAY;
01461 }
01462 
01463 
01464 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
01465 int
01466 s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
01467 {
01468   int     olduse, res, min, max;
01469 
01470   /* find sizes */
01471   min = b->used;
01472   max = a->used;
01473 
01474   /* init result */
01475   if (c->alloc < max) {
01476     if ((res = mp_grow (c, max)) != MP_OKAY) {
01477       return res;
01478     }
01479   }
01480   olduse = c->used;
01481   c->used = max;
01482 
01483   {
01484     register mp_digit u, *tmpa, *tmpb, *tmpc;
01485     register int i;
01486 
01487     /* alias for digit pointers */
01488     tmpa = a->dp;
01489     tmpb = b->dp;
01490     tmpc = c->dp;
01491 
01492     /* set carry to zero */
01493     u = 0;
01494     for (i = 0; i < min; i++) {
01495       /* T[i] = A[i] - B[i] - U */
01496       *tmpc = *tmpa++ - *tmpb++ - u;
01497 
01498       /* U = carry bit of T[i]
01499        * Note this saves performing an AND operation since
01500        * if a carry does occur it will propagate all the way to the
01501        * MSB.  As a result a single shift is enough to get the carry
01502        */
01503       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01504 
01505       /* Clear carry from T[i] */
01506       *tmpc++ &= MP_MASK;
01507     }
01508 
01509     /* now copy higher words if any, e.g. if A has more digits than B  */
01510     for (; i < max; i++) {
01511       /* T[i] = A[i] - U */
01512       *tmpc = *tmpa++ - u;
01513 
01514       /* U = carry bit of T[i] */
01515       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01516 
01517       /* Clear carry from T[i] */
01518       *tmpc++ &= MP_MASK;
01519     }
01520 
01521     /* clear digits above used (since we may not have grown result above) */
01522     for (i = c->used; i < olduse; i++) {
01523       *tmpc++ = 0;
01524     }
01525   }
01526 
01527   mp_clamp (c);
01528   return MP_OKAY;
01529 }
01530 
01531 
01532 /* high level subtraction (handles signs) */
01533 int
01534 mp_sub (mp_int * a, mp_int * b, mp_int * c)
01535 {
01536   int     sa, sb, res;
01537 
01538   sa = a->sign;
01539   sb = b->sign;
01540 
01541   if (sa != sb) {
01542     /* subtract a negative from a positive, OR */
01543     /* subtract a positive from a negative. */
01544     /* In either case, ADD their magnitudes, */
01545     /* and use the sign of the first number. */
01546     c->sign = sa;
01547     res = s_mp_add (a, b, c);
01548   } else {
01549     /* subtract a positive from a positive, OR */
01550     /* subtract a negative from a negative. */
01551     /* First, take the difference between their */
01552     /* magnitudes, then... */
01553     if (mp_cmp_mag (a, b) != MP_LT) {
01554       /* Copy the sign from the first */
01555       c->sign = sa;
01556       /* The first has a larger or equal magnitude */
01557       res = s_mp_sub (a, b, c);
01558     } else {
01559       /* The result has the *opposite* sign from */
01560       /* the first number. */
01561       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
01562       /* The second has a larger magnitude */
01563       res = s_mp_sub (b, a, c);
01564     }
01565   }
01566   return res;
01567 }
01568 
01569 
01570 /* determines if reduce_2k_l can be used */
01571 int mp_reduce_is_2k_l(mp_int *a)
01572 {
01573    int ix, iy;
01574    
01575    if (a->used == 0) {
01576       return MP_NO;
01577    } else if (a->used == 1) {
01578       return MP_YES;
01579    } else if (a->used > 1) {
01580       /* if more than half of the digits are -1 we're sold */
01581       for (iy = ix = 0; ix < a->used; ix++) {
01582           if (a->dp[ix] == MP_MASK) {
01583               ++iy;
01584           }
01585       }
01586       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
01587       
01588    }
01589    return MP_NO;
01590 }
01591 
01592 
01593 /* determines if mp_reduce_2k can be used */
01594 int mp_reduce_is_2k(mp_int *a)
01595 {
01596    int ix, iy, iw;
01597    mp_digit iz;
01598    
01599    if (a->used == 0) {
01600       return MP_NO;
01601    } else if (a->used == 1) {
01602       return MP_YES;
01603    } else if (a->used > 1) {
01604       iy = mp_count_bits(a);
01605       iz = 1;
01606       iw = 1;
01607     
01608       /* Test every bit from the second digit up, must be 1 */
01609       for (ix = DIGIT_BIT; ix < iy; ix++) {
01610           if ((a->dp[iw] & iz) == 0) {
01611              return MP_NO;
01612           }
01613           iz <<= 1;
01614           if (iz > (mp_digit)MP_MASK) {
01615              ++iw;
01616              iz = 1;
01617           }
01618       }
01619    }
01620    return MP_YES;
01621 }
01622 
01623 
01624 /* determines if a number is a valid DR modulus */
01625 int mp_dr_is_modulus(mp_int *a)
01626 {
01627    int ix;
01628 
01629    /* must be at least two digits */
01630    if (a->used < 2) {
01631       return 0;
01632    }
01633 
01634    /* must be of the form b**k - a [a <= b] so all
01635     * but the first digit must be equal to -1 (mod b).
01636     */
01637    for (ix = 1; ix < a->used; ix++) {
01638        if (a->dp[ix] != MP_MASK) {
01639           return 0;
01640        }
01641    }
01642    return 1;
01643 }
01644 
01645 
01646 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
01647  *
01648  * Uses a left-to-right k-ary sliding window to compute the modular
01649  * exponentiation.
01650  * The value of k changes based on the size of the exponent.
01651  *
01652  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
01653  */
01654 
01655 #ifdef MP_LOW_MEM
01656    #define TAB_SIZE 32
01657 #else
01658    #define TAB_SIZE 256
01659 #endif
01660 
01661 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
01662                      int redmode)
01663 {
01664   mp_int  M[TAB_SIZE], res;
01665   mp_digit buf, mp;
01666   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01667 
01668   /* use a pointer to the reduction algorithm.  This allows us to use
01669    * one of many reduction algorithms without modding the guts of
01670    * the code with if statements everywhere.
01671    */
01672   int     (*redux)(mp_int*,mp_int*,mp_digit);
01673 
01674   /* find window size */
01675   x = mp_count_bits (X);
01676   if (x <= 7) {
01677     winsize = 2;
01678   } else if (x <= 36) {
01679     winsize = 3;
01680   } else if (x <= 140) {
01681     winsize = 4;
01682   } else if (x <= 450) {
01683     winsize = 5;
01684   } else if (x <= 1303) {
01685     winsize = 6;
01686   } else if (x <= 3529) {
01687     winsize = 7;
01688   } else {
01689     winsize = 8;
01690   }
01691 
01692 #ifdef MP_LOW_MEM
01693   if (winsize > 5) {
01694      winsize = 5;
01695   }
01696 #endif
01697 
01698   /* init M array */
01699   /* init first cell */
01700   if ((err = mp_init(&M[1])) != MP_OKAY) {
01701      return err;
01702   }
01703 
01704   /* now init the second half of the array */
01705   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01706     if ((err = mp_init(&M[x])) != MP_OKAY) {
01707       for (y = 1<<(winsize-1); y < x; y++) {
01708         mp_clear (&M[y]);
01709       }
01710       mp_clear(&M[1]);
01711       return err;
01712     }
01713   }
01714 
01715   /* determine and setup reduction code */
01716   if (redmode == 0) {
01717 #ifdef BN_MP_MONTGOMERY_SETUP_C     
01718      /* now setup montgomery  */
01719      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
01720         goto LBL_M;
01721      }
01722 #else
01723      err = MP_VAL;
01724      goto LBL_M;
01725 #endif
01726 
01727      /* automatically pick the comba one if available (saves quite a few
01728         calls/ifs) */
01729 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
01730      if (((P->used * 2 + 1) < MP_WARRAY) &&
01731           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
01732         redux = fast_mp_montgomery_reduce;
01733      } else 
01734 #endif
01735      {
01736 #ifdef BN_MP_MONTGOMERY_REDUCE_C
01737         /* use slower baseline Montgomery method */
01738         redux = mp_montgomery_reduce;
01739 #else
01740         err = MP_VAL;
01741         goto LBL_M;
01742 #endif
01743      }
01744   } else if (redmode == 1) {
01745 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
01746      /* setup DR reduction for moduli of the form B**k - b */
01747      mp_dr_setup(P, &mp);
01748      redux = mp_dr_reduce;
01749 #else
01750      err = MP_VAL;
01751      goto LBL_M;
01752 #endif
01753   } else {
01754 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
01755      /* setup DR reduction for moduli of the form 2**k - b */
01756      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
01757         goto LBL_M;
01758      }
01759      redux = mp_reduce_2k;
01760 #else
01761      err = MP_VAL;
01762      goto LBL_M;
01763 #endif
01764   }
01765 
01766   /* setup result */
01767   if ((err = mp_init (&res)) != MP_OKAY) {
01768     goto LBL_M;
01769   }
01770 
01771   /* create M table
01772    *
01773 
01774    *
01775    * The first half of the table is not computed though accept for M[0] and M[1]
01776    */
01777 
01778   if (redmode == 0) {
01779 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
01780      /* now we need R mod m */
01781      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
01782        goto LBL_RES;
01783      }
01784 #else 
01785      err = MP_VAL;
01786      goto LBL_RES;
01787 #endif
01788 
01789      /* now set M[1] to G * R mod m */
01790      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
01791        goto LBL_RES;
01792      }
01793   } else {
01794      mp_set(&res, 1);
01795      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
01796         goto LBL_RES;
01797      }
01798   }
01799 
01800   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
01801   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
01802     goto LBL_RES;
01803   }
01804 
01805   for (x = 0; x < (winsize - 1); x++) {
01806     if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
01807       goto LBL_RES;
01808     }
01809     if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
01810       goto LBL_RES;
01811     }
01812   }
01813 
01814   /* create upper table */
01815   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01816     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
01817       goto LBL_RES;
01818     }
01819     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
01820       goto LBL_RES;
01821     }
01822   }
01823 
01824   /* set initial mode and bit cnt */
01825   mode   = 0;
01826   bitcnt = 1;
01827   buf    = 0;
01828   digidx = X->used - 1;
01829   bitcpy = 0;
01830   bitbuf = 0;
01831 
01832   for (;;) {
01833     /* grab next digit as required */
01834     if (--bitcnt == 0) {
01835       /* if digidx == -1 we are out of digits so break */
01836       if (digidx == -1) {
01837         break;
01838       }
01839       /* read next digit and reset bitcnt */
01840       buf    = X->dp[digidx--];
01841       bitcnt = (int)DIGIT_BIT;
01842     }
01843 
01844     /* grab the next msb from the exponent */
01845     y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
01846     buf <<= (mp_digit)1;
01847 
01848     /* if the bit is zero and mode == 0 then we ignore it
01849      * These represent the leading zero bits before the first 1 bit
01850      * in the exponent.  Technically this opt is not required but it
01851      * does lower the # of trivial squaring/reductions used
01852      */
01853     if (mode == 0 && y == 0) {
01854       continue;
01855     }
01856 
01857     /* if the bit is zero and mode == 1 then we square */
01858     if (mode == 1 && y == 0) {
01859       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01860         goto LBL_RES;
01861       }
01862       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01863         goto LBL_RES;
01864       }
01865       continue;
01866     }
01867 
01868     /* else we add it to the window */
01869     bitbuf |= (y << (winsize - ++bitcpy));
01870     mode    = 2;
01871 
01872     if (bitcpy == winsize) {
01873       /* ok window is filled so square as required and multiply  */
01874       /* square first */
01875       for (x = 0; x < winsize; x++) {
01876         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01877           goto LBL_RES;
01878         }
01879         if ((err = redux (&res, P, mp)) != MP_OKAY) {
01880           goto LBL_RES;
01881         }
01882       }
01883 
01884       /* then multiply */
01885       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
01886         goto LBL_RES;
01887       }
01888       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01889         goto LBL_RES;
01890       }
01891 
01892       /* empty window and reset */
01893       bitcpy = 0;
01894       bitbuf = 0;
01895       mode   = 1;
01896     }
01897   }
01898 
01899   /* if bits remain then square/multiply */
01900   if (mode == 2 && bitcpy > 0) {
01901     /* square then multiply if the bit is set */
01902     for (x = 0; x < bitcpy; x++) {
01903       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
01904         goto LBL_RES;
01905       }
01906       if ((err = redux (&res, P, mp)) != MP_OKAY) {
01907         goto LBL_RES;
01908       }
01909 
01910       /* get next bit of the window */
01911       bitbuf <<= 1;
01912       if ((bitbuf & (1 << winsize)) != 0) {
01913         /* then multiply */
01914         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
01915           goto LBL_RES;
01916         }
01917         if ((err = redux (&res, P, mp)) != MP_OKAY) {
01918           goto LBL_RES;
01919         }
01920       }
01921     }
01922   }
01923 
01924   if (redmode == 0) {
01925      /* fixup result if Montgomery reduction is used
01926       * recall that any value in a Montgomery system is
01927       * actually multiplied by R mod n.  So we have
01928       * to reduce one more time to cancel out the factor
01929       * of R.
01930       */
01931      if ((err = redux(&res, P, mp)) != MP_OKAY) {
01932        goto LBL_RES;
01933      }
01934   }
01935 
01936   /* swap res with Y */
01937   mp_exch (&res, Y);
01938   err = MP_OKAY;
01939 LBL_RES:mp_clear (&res);
01940 LBL_M:
01941   mp_clear(&M[1]);
01942   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01943     mp_clear (&M[x]);
01944   }
01945   return err;
01946 }
01947 
01948 
01949 /* setups the montgomery reduction stuff */
01950 int
01951 mp_montgomery_setup (mp_int * n, mp_digit * rho)
01952 {
01953   mp_digit x, b;
01954 
01955 /* fast inversion mod 2**k
01956  *
01957  * Based on the fact that
01958  *
01959  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01960  *                    =>  2*X*A - X*X*A*A = 1
01961  *                    =>  2*(1) - (1)     = 1
01962  */
01963   b = n->dp[0];
01964 
01965   if ((b & 1) == 0) {
01966     return MP_VAL;
01967   }
01968 
01969   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01970   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01971 #if !defined(MP_8BIT)
01972   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01973 #endif
01974 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
01975   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01976 #endif
01977 #ifdef MP_64BIT
01978   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
01979 #endif
01980 
01981   /* rho = -1/m mod b */
01982   /* TAO, switched mp_word casts to mp_digit to shut up compiler */
01983   *rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
01984 
01985   return MP_OKAY;
01986 }
01987 
01988 
01989 /* computes xR**-1 == x (mod N) via Montgomery Reduction
01990  *
01991  * This is an optimized implementation of montgomery_reduce
01992  * which uses the comba method to quickly calculate the columns of the
01993  * reduction.
01994  *
01995  * Based on Algorithm 14.32 on pp.601 of HAC.
01996 */
01997 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
01998 {
01999   int     ix, res, olduse;
02000   mp_word W[MP_WARRAY];
02001 
02002   /* get old used count */
02003   olduse = x->used;
02004 
02005   /* grow a as required */
02006   if (x->alloc < n->used + 1) {
02007     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
02008       return res;
02009     }
02010   }
02011 
02012   /* first we have to get the digits of the input into
02013    * an array of double precision words W[...]
02014    */
02015   {
02016     register mp_word *_W;
02017     register mp_digit *tmpx;
02018 
02019     /* alias for the W[] array */
02020     _W   = W;
02021 
02022     /* alias for the digits of  x*/
02023     tmpx = x->dp;
02024 
02025     /* copy the digits of a into W[0..a->used-1] */
02026     for (ix = 0; ix < x->used; ix++) {
02027       *_W++ = *tmpx++;
02028     }
02029 
02030     /* zero the high words of W[a->used..m->used*2] */
02031     for (; ix < n->used * 2 + 1; ix++) {
02032       *_W++ = 0;
02033     }
02034   }
02035 
02036   /* now we proceed to zero successive digits
02037    * from the least significant upwards
02038    */
02039   for (ix = 0; ix < n->used; ix++) {
02040     /* mu = ai * m' mod b
02041      *
02042      * We avoid a double precision multiplication (which isn't required)
02043      * by casting the value down to a mp_digit.  Note this requires
02044      * that W[ix-1] have  the carry cleared (see after the inner loop)
02045      */
02046     register mp_digit mu;
02047     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
02048 
02049     /* a = a + mu * m * b**i
02050      *
02051      * This is computed in place and on the fly.  The multiplication
02052      * by b**i is handled by offseting which columns the results
02053      * are added to.
02054      *
02055      * Note the comba method normally doesn't handle carries in the
02056      * inner loop In this case we fix the carry from the previous
02057      * column since the Montgomery reduction requires digits of the
02058      * result (so far) [see above] to work.  This is
02059      * handled by fixing up one carry after the inner loop.  The
02060      * carry fixups are done in order so after these loops the
02061      * first m->used words of W[] have the carries fixed
02062      */
02063     {
02064       register int iy;
02065       register mp_digit *tmpn;
02066       register mp_word *_W;
02067 
02068       /* alias for the digits of the modulus */
02069       tmpn = n->dp;
02070 
02071       /* Alias for the columns set by an offset of ix */
02072       _W = W + ix;
02073 
02074       /* inner loop */
02075       for (iy = 0; iy < n->used; iy++) {
02076           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
02077       }
02078     }
02079 
02080     /* now fix carry for next digit, W[ix+1] */
02081     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
02082   }
02083 
02084   /* now we have to propagate the carries and
02085    * shift the words downward [all those least
02086    * significant digits we zeroed].
02087    */
02088   {
02089     register mp_digit *tmpx;
02090     register mp_word *_W, *_W1;
02091 
02092     /* nox fix rest of carries */
02093 
02094     /* alias for current word */
02095     _W1 = W + ix;
02096 
02097     /* alias for next word, where the carry goes */
02098     _W = W + ++ix;
02099 
02100     for (; ix <= n->used * 2 + 1; ix++) {
02101       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
02102     }
02103 
02104     /* copy out, A = A/b**n
02105      *
02106      * The result is A/b**n but instead of converting from an
02107      * array of mp_word to mp_digit than calling mp_rshd
02108      * we just copy them in the right order
02109      */
02110 
02111     /* alias for destination word */
02112     tmpx = x->dp;
02113 
02114     /* alias for shifted double precision result */
02115     _W = W + n->used;
02116 
02117     for (ix = 0; ix < n->used + 1; ix++) {
02118       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
02119     }
02120 
02121     /* zero oldused digits, if the input a was larger than
02122      * m->used+1 we'll have to clear the digits
02123      */
02124     for (; ix < olduse; ix++) {
02125       *tmpx++ = 0;
02126     }
02127   }
02128 
02129   /* set the max used and clamp */
02130   x->used = n->used + 1;
02131   mp_clamp (x);
02132 
02133   /* if A >= m then A = A - m */
02134   if (mp_cmp_mag (x, n) != MP_LT) {
02135     return s_mp_sub (x, n, x);
02136   }
02137   return MP_OKAY;
02138 }
02139 
02140 
02141 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
02142 int
02143 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02144 {
02145   int     ix, res, digs;
02146   mp_digit mu;
02147 
02148   /* can the fast reduction [comba] method be used?
02149    *
02150    * Note that unlike in mul you're safely allowed *less*
02151    * than the available columns [255 per default] since carries
02152    * are fixed up in the inner loop.
02153    */
02154   digs = n->used * 2 + 1;
02155   if ((digs < MP_WARRAY) &&
02156       n->used <
02157       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02158     return fast_mp_montgomery_reduce (x, n, rho);
02159   }
02160 
02161   /* grow the input as required */
02162   if (x->alloc < digs) {
02163     if ((res = mp_grow (x, digs)) != MP_OKAY) {
02164       return res;
02165     }
02166   }
02167   x->used = digs;
02168 
02169   for (ix = 0; ix < n->used; ix++) {
02170     /* mu = ai * rho mod b
02171      *
02172      * The value of rho must be precalculated via
02173      * montgomery_setup() such that
02174      * it equals -1/n0 mod b this allows the
02175      * following inner loop to reduce the
02176      * input one digit at a time
02177      */
02178     mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
02179 
02180     /* a = a + mu * m * b**i */
02181     {
02182       register int iy;
02183       register mp_digit *tmpn, *tmpx, u;
02184       register mp_word r;
02185 
02186       /* alias for digits of the modulus */
02187       tmpn = n->dp;
02188 
02189       /* alias for the digits of x [the input] */
02190       tmpx = x->dp + ix;
02191 
02192       /* set the carry to zero */
02193       u = 0;
02194 
02195       /* Multiply and add in place */
02196       for (iy = 0; iy < n->used; iy++) {
02197         /* compute product and sum */
02198         r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
02199                   ((mp_word) u) + ((mp_word) * tmpx);
02200 
02201         /* get carry */
02202         u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02203 
02204         /* fix digit */
02205         *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
02206       }
02207       /* At this point the ix'th digit of x should be zero */
02208 
02209 
02210       /* propagate carries upwards as required*/
02211       while (u) {
02212         *tmpx   += u;
02213         u        = *tmpx >> DIGIT_BIT;
02214         *tmpx++ &= MP_MASK;
02215       }
02216     }
02217   }
02218 
02219   /* at this point the n.used'th least
02220    * significant digits of x are all zero
02221    * which means we can shift x to the
02222    * right by n.used digits and the
02223    * residue is unchanged.
02224    */
02225 
02226   /* x = x/b**n.used */
02227   mp_clamp(x);
02228   mp_rshd (x, n->used);
02229 
02230   /* if x >= n then x = x - n */
02231   if (mp_cmp_mag (x, n) != MP_LT) {
02232     return s_mp_sub (x, n, x);
02233   }
02234 
02235   return MP_OKAY;
02236 }
02237 
02238 
02239 /* determines the setup value */
02240 void mp_dr_setup(mp_int *a, mp_digit *d)
02241 {
02242    /* the casts are required if DIGIT_BIT is one less than
02243     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
02244     */
02245    *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 
02246         ((mp_word)a->dp[0]));
02247 }
02248 
02249 
02250 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
02251  *
02252  * Based on algorithm from the paper
02253  *
02254  * "Generating Efficient Primes for Discrete Log Cryptosystems"
02255  *                 Chae Hoon Lim, Pil Joong Lee,
02256  *          POSTECH Information Research Laboratories
02257  *
02258  * The modulus must be of a special format [see manual]
02259  *
02260  * Has been modified to use algorithm 7.10 from the LTM book instead
02261  *
02262  * Input x must be in the range 0 <= x <= (n-1)**2
02263  */
02264 int
02265 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
02266 {
02267   int      err, i, m;
02268   mp_word  r;
02269   mp_digit mu, *tmpx1, *tmpx2;
02270 
02271   /* m = digits in modulus */
02272   m = n->used;
02273 
02274   /* ensure that "x" has at least 2m digits */
02275   if (x->alloc < m + m) {
02276     if ((err = mp_grow (x, m + m)) != MP_OKAY) {
02277       return err;
02278     }
02279   }
02280 
02281 /* top of loop, this is where the code resumes if
02282  * another reduction pass is required.
02283  */
02284 top:
02285   /* aliases for digits */
02286   /* alias for lower half of x */
02287   tmpx1 = x->dp;
02288 
02289   /* alias for upper half of x, or x/B**m */
02290   tmpx2 = x->dp + m;
02291 
02292   /* set carry to zero */
02293   mu = 0;
02294 
02295   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
02296   for (i = 0; i < m; i++) {
02297       r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
02298       *tmpx1++  = (mp_digit)(r & MP_MASK);
02299       mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
02300   }
02301 
02302   /* set final carry */
02303   *tmpx1++ = mu;
02304 
02305   /* zero words above m */
02306   for (i = m + 1; i < x->used; i++) {
02307       *tmpx1++ = 0;
02308   }
02309 
02310   /* clamp, sub and return */
02311   mp_clamp (x);
02312 
02313   /* if x >= n then subtract and reduce again
02314    * Each successive "recursion" makes the input smaller and smaller.
02315    */
02316   if (mp_cmp_mag (x, n) != MP_LT) {
02317     s_mp_sub(x, n, x);
02318     goto top;
02319   }
02320   return MP_OKAY;
02321 }
02322 
02323 
02324 /* reduces a modulo n where n is of the form 2**p - d */
02325 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
02326 {
02327    mp_int q;
02328    int    p, res;
02329    
02330    if ((res = mp_init(&q)) != MP_OKAY) {
02331       return res;
02332    }
02333    
02334    p = mp_count_bits(n);    
02335 top:
02336    /* q = a/2**p, a = a mod 2**p */
02337    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
02338       goto ERR;
02339    }
02340    
02341    if (d != 1) {
02342       /* q = q * d */
02343       if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 
02344          goto ERR;
02345       }
02346    }
02347    
02348    /* a = a + q */
02349    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
02350       goto ERR;
02351    }
02352    
02353    if (mp_cmp_mag(a, n) != MP_LT) {
02354       s_mp_sub(a, n, a);
02355       goto top;
02356    }
02357    
02358 ERR:
02359    mp_clear(&q);
02360    return res;
02361 }
02362 
02363 
02364 /* determines the setup value */
02365 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
02366 {
02367    int res, p;
02368    mp_int tmp;
02369    
02370    if ((res = mp_init(&tmp)) != MP_OKAY) {
02371       return res;
02372    }
02373    
02374    p = mp_count_bits(a);
02375    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
02376       mp_clear(&tmp);
02377       return res;
02378    }
02379    
02380    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
02381       mp_clear(&tmp);
02382       return res;
02383    }
02384    
02385    *d = tmp.dp[0];
02386    mp_clear(&tmp);
02387    return MP_OKAY;
02388 }
02389 
02390 
02391 /* computes a = 2**b 
02392  *
02393  * Simple algorithm which zeroes the int, grows it then just sets one bit
02394  * as required.
02395  */
02396 int
02397 mp_2expt (mp_int * a, int b)
02398 {
02399   int     res;
02400 
02401   /* zero a as per default */
02402   mp_zero (a);
02403 
02404   /* grow a to accomodate the single bit */
02405   if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
02406     return res;
02407   }
02408 
02409   /* set the used count of where the bit will go */
02410   a->used = b / DIGIT_BIT + 1;
02411 
02412   /* put the single bit in its place */
02413   a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
02414 
02415   return MP_OKAY;
02416 }
02417 
02418 
02419 /* multiply by a digit */
02420 int
02421 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02422 {
02423   mp_digit u, *tmpa, *tmpc;
02424   mp_word  r;
02425   int      ix, res, olduse;
02426 
02427   /* make sure c is big enough to hold a*b */
02428   if (c->alloc < a->used + 1) {
02429     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
02430       return res;
02431     }
02432   }
02433 
02434   /* get the original destinations used count */
02435   olduse = c->used;
02436 
02437   /* set the sign */
02438   c->sign = a->sign;
02439 
02440   /* alias for a->dp [source] */
02441   tmpa = a->dp;
02442 
02443   /* alias for c->dp [dest] */
02444   tmpc = c->dp;
02445 
02446   /* zero carry */
02447   u = 0;
02448 
02449   /* compute columns */
02450   for (ix = 0; ix < a->used; ix++) {
02451     /* compute product and carry sum for this term */
02452     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
02453 
02454     /* mask off higher bits to get a single digit */
02455     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
02456 
02457     /* send carry into next iteration */
02458     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02459   }
02460 
02461   /* store final carry [if any] and increment ix offset  */
02462   *tmpc++ = u;
02463   ++ix;
02464 
02465   /* now zero digits above the top */
02466   while (ix++ < olduse) {
02467      *tmpc++ = 0;
02468   }
02469 
02470   /* set used count */
02471   c->used = a->used + 1;
02472   mp_clamp(c);
02473 
02474   return MP_OKAY;
02475 }
02476 
02477 
02478 /* d = a * b (mod c) */
02479 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02480 {
02481   int     res;
02482   mp_int  t;
02483 
02484   if ((res = mp_init (&t)) != MP_OKAY) {
02485     return res;
02486   }
02487 
02488   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
02489     mp_clear (&t);
02490     return res;
02491   }
02492   res = mp_mod (&t, c, d);
02493   mp_clear (&t);
02494   return res;
02495 }
02496 
02497 
02498 /* computes b = a*a */
02499 int
02500 mp_sqr (mp_int * a, mp_int * b)
02501 {
02502   int     res;
02503 
02504   {
02505 #ifdef BN_FAST_S_MP_SQR_C
02506     /* can we use the fast comba multiplier? */
02507     if ((a->used * 2 + 1) < MP_WARRAY && 
02508          a->used < 
02509          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
02510       res = fast_s_mp_sqr (a, b);
02511     } else
02512 #endif
02513 #ifdef BN_S_MP_SQR_C
02514       res = s_mp_sqr (a, b);
02515 #else
02516       res = MP_VAL;
02517 #endif
02518   }
02519   b->sign = MP_ZPOS;
02520   return res;
02521 }
02522 
02523 
02524 /* high level multiplication (handles sign) */
02525 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02526 {
02527   int     res, neg;
02528   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
02529 
02530   {
02531     /* can we use the fast multiplier?
02532      *
02533      * The fast multiplier can be used if the output will 
02534      * have less than MP_WARRAY digits and the number of 
02535      * digits won't affect carry propagation
02536      */
02537     int     digs = a->used + b->used + 1;
02538 
02539 #ifdef BN_FAST_S_MP_MUL_DIGS_C
02540     if ((digs < MP_WARRAY) &&
02541         MIN(a->used, b->used) <= 
02542         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02543       res = fast_s_mp_mul_digs (a, b, c, digs);
02544     } else 
02545 #endif
02546 #ifdef BN_S_MP_MUL_DIGS_C
02547       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
02548 #else
02549       res = MP_VAL;
02550 #endif
02551 
02552   }
02553   c->sign = (c->used > 0) ? neg : MP_ZPOS;
02554   return res;
02555 }
02556 
02557 
02558 /* b = a*2 */
02559 int mp_mul_2(mp_int * a, mp_int * b)
02560 {
02561   int     x, res, oldused;
02562 
02563   /* grow to accomodate result */
02564   if (b->alloc < a->used + 1) {
02565     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
02566       return res;
02567     }
02568   }
02569 
02570   oldused = b->used;
02571   b->used = a->used;
02572 
02573   {
02574     register mp_digit r, rr, *tmpa, *tmpb;
02575 
02576     /* alias for source */
02577     tmpa = a->dp;
02578     
02579     /* alias for dest */
02580     tmpb = b->dp;
02581 
02582     /* carry */
02583     r = 0;
02584     for (x = 0; x < a->used; x++) {
02585     
02586       /* get what will be the *next* carry bit from the 
02587        * MSB of the current digit 
02588        */
02589       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
02590       
02591       /* now shift up this digit, add in the carry [from the previous] */
02592       *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
02593       
02594       /* copy the carry that would be from the source 
02595        * digit into the next iteration 
02596        */
02597       r = rr;
02598     }
02599 
02600     /* new leading digit? */
02601     if (r != 0) {
02602       /* add a MSB which is always 1 at this point */
02603       *tmpb = 1;
02604       ++(b->used);
02605     }
02606 
02607     /* now zero any excess digits on the destination 
02608      * that we didn't write to 
02609      */
02610     tmpb = b->dp + b->used;
02611     for (x = b->used; x < oldused; x++) {
02612       *tmpb++ = 0;
02613     }
02614   }
02615   b->sign = a->sign;
02616   return MP_OKAY;
02617 }
02618 
02619 
02620 /* divide by three (based on routine from MPI and the GMP manual) */
02621 int
02622 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
02623 {
02624   mp_int   q;
02625   mp_word  w, t;
02626   mp_digit b;
02627   int      res, ix;
02628   
02629   /* b = 2**DIGIT_BIT / 3 */
02630   b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
02631 
02632   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
02633      return res;
02634   }
02635   
02636   q.used = a->used;
02637   q.sign = a->sign;
02638   w = 0;
02639   for (ix = a->used - 1; ix >= 0; ix--) {
02640      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
02641 
02642      if (w >= 3) {
02643         /* multiply w by [1/3] */
02644         t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
02645 
02646         /* now subtract 3 * [w/3] from w, to get the remainder */
02647         w -= t+t+t;
02648 
02649         /* fixup the remainder as required since
02650          * the optimization is not exact.
02651          */
02652         while (w >= 3) {
02653            t += 1;
02654            w -= 3;
02655         }
02656       } else {
02657         t = 0;
02658       }
02659       q.dp[ix] = (mp_digit)t;
02660   }
02661 
02662   /* [optional] store the remainder */
02663   if (d != NULL) {
02664      *d = (mp_digit)w;
02665   }
02666 
02667   /* [optional] store the quotient */
02668   if (c != NULL) {
02669      mp_clamp(&q);
02670      mp_exch(&q, c);
02671   }
02672   mp_clear(&q);
02673   
02674   return res;
02675 }
02676 
02677 
02678 /* init an mp_init for a given size */
02679 int mp_init_size (mp_int * a, int size)
02680 {
02681   int x;
02682 
02683   /* pad size so there are always extra digits */
02684   size += (MP_PREC * 2) - (size % MP_PREC); 
02685   
02686   /* alloc mem */
02687   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
02688                                       DYNAMIC_TYPE_BIGINT);
02689   if (a->dp == NULL) {
02690     return MP_MEM;
02691   }
02692 
02693   /* set the members */
02694   a->used  = 0;
02695   a->alloc = size;
02696   a->sign  = MP_ZPOS;
02697 
02698   /* zero the digits */
02699   for (x = 0; x < size; x++) {
02700       a->dp[x] = 0;
02701   }
02702 
02703   return MP_OKAY;
02704 }
02705 
02706 
02707 /* the jist of squaring...
02708  * you do like mult except the offset of the tmpx [one that 
02709  * starts closer to zero] can't equal the offset of tmpy.  
02710  * So basically you set up iy like before then you min it with
02711  * (ty-tx) so that it never happens.  You double all those 
02712  * you add in the inner loop
02713 
02714 After that loop you do the squares and add them in.
02715 */
02716 
02717 int fast_s_mp_sqr (mp_int * a, mp_int * b)
02718 {
02719   int       olduse, res, pa, ix, iz;
02720   mp_digit   W[MP_WARRAY], *tmpx;
02721   mp_word   W1;
02722 
02723   /* grow the destination as required */
02724   pa = a->used + a->used;
02725   if (b->alloc < pa) {
02726     if ((res = mp_grow (b, pa)) != MP_OKAY) {
02727       return res;
02728     }
02729   }
02730 
02731   /* number of output digits to produce */
02732   W1 = 0;
02733   for (ix = 0; ix < pa; ix++) { 
02734       int      tx, ty, iy;
02735       mp_word  _W;
02736       mp_digit *tmpy;
02737 
02738       /* clear counter */
02739       _W = 0;
02740 
02741       /* get offsets into the two bignums */
02742       ty = MIN(a->used-1, ix);
02743       tx = ix - ty;
02744 
02745       /* setup temp aliases */
02746       tmpx = a->dp + tx;
02747       tmpy = a->dp + ty;
02748 
02749       /* this is the number of times the loop will iterrate, essentially
02750          while (tx++ < a->used && ty-- >= 0) { ... }
02751        */
02752       iy = MIN(a->used-tx, ty+1);
02753 
02754       /* now for squaring tx can never equal ty 
02755        * we halve the distance since they approach at a rate of 2x
02756        * and we have to round because odd cases need to be executed
02757        */
02758       iy = MIN(iy, (ty-tx+1)>>1);
02759 
02760       /* execute loop */
02761       for (iz = 0; iz < iy; iz++) {
02762          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02763       }
02764 
02765       /* double the inner product and add carry */
02766       _W = _W + _W + W1;
02767 
02768       /* even columns have the square term in them */
02769       if ((ix&1) == 0) {
02770          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
02771       }
02772 
02773       /* store it */
02774       W[ix] = (mp_digit)(_W & MP_MASK);
02775 
02776       /* make next carry */
02777       W1 = _W >> ((mp_word)DIGIT_BIT);
02778   }
02779 
02780   /* setup dest */
02781   olduse  = b->used;
02782   b->used = a->used+a->used;
02783 
02784   {
02785     mp_digit *tmpb;
02786     tmpb = b->dp;
02787     for (ix = 0; ix < pa; ix++) {
02788       *tmpb++ = W[ix] & MP_MASK;
02789     }
02790 
02791     /* clear unused digits [that existed in the old copy of c] */
02792     for (; ix < olduse; ix++) {
02793       *tmpb++ = 0;
02794     }
02795   }
02796   mp_clamp (b);
02797   return MP_OKAY;
02798 }
02799 
02800 
02801 /* Fast (comba) multiplier
02802  *
02803  * This is the fast column-array [comba] multiplier.  It is 
02804  * designed to compute the columns of the product first 
02805  * then handle the carries afterwards.  This has the effect 
02806  * of making the nested loops that compute the columns very
02807  * simple and schedulable on super-scalar processors.
02808  *
02809  * This has been modified to produce a variable number of 
02810  * digits of output so if say only a half-product is required 
02811  * you don't have to compute the upper half (a feature 
02812  * required for fast Barrett reduction).
02813  *
02814  * Based on Algorithm 14.12 on pp.595 of HAC.
02815  *
02816  */
02817 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02818 {
02819   int     olduse, res, pa, ix, iz;
02820   mp_digit W[MP_WARRAY];
02821   register mp_word  _W;
02822 
02823   /* grow the destination as required */
02824   if (c->alloc < digs) {
02825     if ((res = mp_grow (c, digs)) != MP_OKAY) {
02826       return res;
02827     }
02828   }
02829 
02830   /* number of output digits to produce */
02831   pa = MIN(digs, a->used + b->used);
02832 
02833   /* clear the carry */
02834   _W = 0;
02835   for (ix = 0; ix < pa; ix++) { 
02836       int      tx, ty;
02837       int      iy;
02838       mp_digit *tmpx, *tmpy;
02839 
02840       /* get offsets into the two bignums */
02841       ty = MIN(b->used-1, ix);
02842       tx = ix - ty;
02843 
02844       /* setup temp aliases */
02845       tmpx = a->dp + tx;
02846       tmpy = b->dp + ty;
02847 
02848       /* this is the number of times the loop will iterrate, essentially 
02849          while (tx++ < a->used && ty-- >= 0) { ... }
02850        */
02851       iy = MIN(a->used-tx, ty+1);
02852 
02853       /* execute loop */
02854       for (iz = 0; iz < iy; ++iz) {
02855          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02856 
02857       }
02858 
02859       /* store term */
02860       W[ix] = ((mp_digit)_W) & MP_MASK;
02861 
02862       /* make next carry */
02863       _W = _W >> ((mp_word)DIGIT_BIT);
02864  }
02865 
02866   /* setup dest */
02867   olduse  = c->used;
02868   c->used = pa;
02869 
02870   {
02871     register mp_digit *tmpc;
02872     tmpc = c->dp;
02873     for (ix = 0; ix < pa+1; ix++) {
02874       /* now extract the previous digit [below the carry] */
02875       *tmpc++ = W[ix];
02876     }
02877 
02878     /* clear unused digits [that existed in the old copy of c] */
02879     for (; ix < olduse; ix++) {
02880       *tmpc++ = 0;
02881     }
02882   }
02883   mp_clamp (c);
02884   return MP_OKAY;
02885 }
02886 
02887 
02888 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
02889 int s_mp_sqr (mp_int * a, mp_int * b)
02890 {
02891   mp_int  t;
02892   int     res, ix, iy, pa;
02893   mp_word r;
02894   mp_digit u, tmpx, *tmpt;
02895 
02896   pa = a->used;
02897   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
02898     return res;
02899   }
02900 
02901   /* default used is maximum possible size */
02902   t.used = 2*pa + 1;
02903 
02904   for (ix = 0; ix < pa; ix++) {
02905     /* first calculate the digit at 2*ix */
02906     /* calculate double precision result */
02907     r = ((mp_word) t.dp[2*ix]) +
02908         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
02909 
02910     /* store lower part in result */
02911     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
02912 
02913     /* get the carry */
02914     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02915 
02916     /* left hand side of A[ix] * A[iy] */
02917     tmpx        = a->dp[ix];
02918 
02919     /* alias for where to store the results */
02920     tmpt        = t.dp + (2*ix + 1);
02921     
02922     for (iy = ix + 1; iy < pa; iy++) {
02923       /* first calculate the product */
02924       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
02925 
02926       /* now calculate the double precision result, note we use
02927        * addition instead of *2 since it's easier to optimize
02928        */
02929       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
02930 
02931       /* store lower part */
02932       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
02933 
02934       /* get carry */
02935       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02936     }
02937     /* propagate upwards */
02938     while (u != ((mp_digit) 0)) {
02939       r       = ((mp_word) *tmpt) + ((mp_word) u);
02940       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
02941       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02942     }
02943   }
02944 
02945   mp_clamp (&t);
02946   mp_exch (&t, b);
02947   mp_clear (&t);
02948   return MP_OKAY;
02949 }
02950 
02951 
02952 /* multiplies |a| * |b| and only computes upto digs digits of result
02953  * HAC pp. 595, Algorithm 14.12  Modified so you can control how 
02954  * many digits of output are created.
02955  */
02956 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02957 {
02958   mp_int  t;
02959   int     res, pa, pb, ix, iy;
02960   mp_digit u;
02961   mp_word r;
02962   mp_digit tmpx, *tmpt, *tmpy;
02963 
02964   /* can we use the fast multiplier? */
02965   if (((digs) < MP_WARRAY) &&
02966       MIN (a->used, b->used) < 
02967           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02968     return fast_s_mp_mul_digs (a, b, c, digs);
02969   }
02970 
02971   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
02972     return res;
02973   }
02974   t.used = digs;
02975 
02976   /* compute the digits of the product directly */
02977   pa = a->used;
02978   for (ix = 0; ix < pa; ix++) {
02979     /* set the carry to zero */
02980     u = 0;
02981 
02982     /* limit ourselves to making digs digits of output */
02983     pb = MIN (b->used, digs - ix);
02984 
02985     /* setup some aliases */
02986     /* copy of the digit from a used within the nested loop */
02987     tmpx = a->dp[ix];
02988     
02989     /* an alias for the destination shifted ix places */
02990     tmpt = t.dp + ix;
02991     
02992     /* an alias for the digits of b */
02993     tmpy = b->dp;
02994 
02995     /* compute the columns of the output and propagate the carry */
02996     for (iy = 0; iy < pb; iy++) {
02997       /* compute the column as a mp_word */
02998       r       = ((mp_word)*tmpt) +
02999                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03000                 ((mp_word) u);
03001 
03002       /* the new column is the lower part of the result */
03003       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03004 
03005       /* get the carry word from the result */
03006       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03007     }
03008     /* set carry if it is placed below digs */
03009     if (ix + iy < digs) {
03010       *tmpt = u;
03011     }
03012   }
03013 
03014   mp_clamp (&t);
03015   mp_exch (&t, c);
03016 
03017   mp_clear (&t);
03018   return MP_OKAY;
03019 }
03020 
03021 
03022 /*
03023  * shifts with subtractions when the result is greater than b.
03024  *
03025  * The method is slightly modified to shift B unconditionally upto just under
03026  * the leading bit of b.  This saves alot of multiple precision shifting.
03027  */
03028 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
03029 {
03030   int     x, bits, res;
03031 
03032   /* how many bits of last digit does b use */
03033   bits = mp_count_bits (b) % DIGIT_BIT;
03034 
03035   if (b->used > 1) {
03036      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
03037         return res;
03038      }
03039   } else {
03040      mp_set(a, 1);
03041      bits = 1;
03042   }
03043 
03044 
03045   /* now compute C = A * B mod b */
03046   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
03047     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
03048       return res;
03049     }
03050     if (mp_cmp_mag (a, b) != MP_LT) {
03051       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
03052         return res;
03053       }
03054     }
03055   }
03056 
03057   return MP_OKAY;
03058 }
03059 
03060 
03061 #ifdef MP_LOW_MEM
03062    #define TAB_SIZE 32
03063 #else
03064    #define TAB_SIZE 256
03065 #endif
03066 
03067 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
03068 {
03069   mp_int  M[TAB_SIZE], res, mu;
03070   mp_digit buf;
03071   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
03072   int (*redux)(mp_int*,mp_int*,mp_int*);
03073 
03074   /* find window size */
03075   x = mp_count_bits (X);
03076   if (x <= 7) {
03077     winsize = 2;
03078   } else if (x <= 36) {
03079     winsize = 3;
03080   } else if (x <= 140) {
03081     winsize = 4;
03082   } else if (x <= 450) {
03083     winsize = 5;
03084   } else if (x <= 1303) {
03085     winsize = 6;
03086   } else if (x <= 3529) {
03087     winsize = 7;
03088   } else {
03089     winsize = 8;
03090   }
03091 
03092 #ifdef MP_LOW_MEM
03093     if (winsize > 5) {
03094        winsize = 5;
03095     }
03096 #endif
03097 
03098   /* init M array */
03099   /* init first cell */
03100   if ((err = mp_init(&M[1])) != MP_OKAY) {
03101      return err; 
03102   }
03103 
03104   /* now init the second half of the array */
03105   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03106     if ((err = mp_init(&M[x])) != MP_OKAY) {
03107       for (y = 1<<(winsize-1); y < x; y++) {
03108         mp_clear (&M[y]);
03109       }
03110       mp_clear(&M[1]);
03111       return err;
03112     }
03113   }
03114 
03115   /* create mu, used for Barrett reduction */
03116   if ((err = mp_init (&mu)) != MP_OKAY) {
03117     goto LBL_M;
03118   }
03119   
03120   if (redmode == 0) {
03121      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
03122         goto LBL_MU;
03123      }
03124      redux = mp_reduce;
03125   } else {
03126      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
03127         goto LBL_MU;
03128      }
03129      redux = mp_reduce_2k_l;
03130   }    
03131 
03132   /* create M table
03133    *
03134    * The M table contains powers of the base, 
03135    * e.g. M[x] = G**x mod P
03136    *
03137    * The first half of the table is not 
03138    * computed though accept for M[0] and M[1]
03139    */
03140   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
03141     goto LBL_MU;
03142   }
03143 
03144   /* compute the value at M[1<<(winsize-1)] by squaring 
03145    * M[1] (winsize-1) times 
03146    */
03147   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
03148     goto LBL_MU;
03149   }
03150 
03151   for (x = 0; x < (winsize - 1); x++) {
03152     /* square it */
03153     if ((err = mp_sqr (&M[1 << (winsize - 1)], 
03154                        &M[1 << (winsize - 1)])) != MP_OKAY) {
03155       goto LBL_MU;
03156     }
03157 
03158     /* reduce modulo P */
03159     if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
03160       goto LBL_MU;
03161     }
03162   }
03163 
03164   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
03165    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
03166    */
03167   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
03168     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
03169       goto LBL_MU;
03170     }
03171     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
03172       goto LBL_MU;
03173     }
03174   }
03175 
03176   /* setup result */
03177   if ((err = mp_init (&res)) != MP_OKAY) {
03178     goto LBL_MU;
03179   }
03180   mp_set (&res, 1);
03181 
03182   /* set initial mode and bit cnt */
03183   mode   = 0;
03184   bitcnt = 1;
03185   buf    = 0;
03186   digidx = X->used - 1;
03187   bitcpy = 0;
03188   bitbuf = 0;
03189 
03190   for (;;) {
03191     /* grab next digit as required */
03192     if (--bitcnt == 0) {
03193       /* if digidx == -1 we are out of digits */
03194       if (digidx == -1) {
03195         break;
03196       }
03197       /* read next digit and reset the bitcnt */
03198       buf    = X->dp[digidx--];
03199       bitcnt = (int) DIGIT_BIT;
03200     }
03201 
03202     /* grab the next msb from the exponent */
03203     y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
03204     buf <<= (mp_digit)1;
03205 
03206     /* if the bit is zero and mode == 0 then we ignore it
03207      * These represent the leading zero bits before the first 1 bit
03208      * in the exponent.  Technically this opt is not required but it
03209      * does lower the # of trivial squaring/reductions used
03210      */
03211     if (mode == 0 && y == 0) {
03212       continue;
03213     }
03214 
03215     /* if the bit is zero and mode == 1 then we square */
03216     if (mode == 1 && y == 0) {
03217       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03218         goto LBL_RES;
03219       }
03220       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03221         goto LBL_RES;
03222       }
03223       continue;
03224     }
03225 
03226     /* else we add it to the window */
03227     bitbuf |= (y << (winsize - ++bitcpy));
03228     mode    = 2;
03229 
03230     if (bitcpy == winsize) {
03231       /* ok window is filled so square as required and multiply  */
03232       /* square first */
03233       for (x = 0; x < winsize; x++) {
03234         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03235           goto LBL_RES;
03236         }
03237         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03238           goto LBL_RES;
03239         }
03240       }
03241 
03242       /* then multiply */
03243       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
03244         goto LBL_RES;
03245       }
03246       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03247         goto LBL_RES;
03248       }
03249 
03250       /* empty window and reset */
03251       bitcpy = 0;
03252       bitbuf = 0;
03253       mode   = 1;
03254     }
03255   }
03256 
03257   /* if bits remain then square/multiply */
03258   if (mode == 2 && bitcpy > 0) {
03259     /* square then multiply if the bit is set */
03260     for (x = 0; x < bitcpy; x++) {
03261       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03262         goto LBL_RES;
03263       }
03264       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03265         goto LBL_RES;
03266       }
03267 
03268       bitbuf <<= 1;
03269       if ((bitbuf & (1 << winsize)) != 0) {
03270         /* then multiply */
03271         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
03272           goto LBL_RES;
03273         }
03274         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03275           goto LBL_RES;
03276         }
03277       }
03278     }
03279   }
03280 
03281   mp_exch (&res, Y);
03282   err = MP_OKAY;
03283 LBL_RES:mp_clear (&res);
03284 LBL_MU:mp_clear (&mu);
03285 LBL_M:
03286   mp_clear(&M[1]);
03287   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03288     mp_clear (&M[x]);
03289   }
03290   return err;
03291 }
03292 
03293 
03294 /* pre-calculate the value required for Barrett reduction
03295  * For a given modulus "b" it calulates the value required in "a"
03296  */
03297 int mp_reduce_setup (mp_int * a, mp_int * b)
03298 {
03299   int     res;
03300   
03301   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
03302     return res;
03303   }
03304   return mp_div (a, b, a, NULL);
03305 }
03306 
03307 
03308 /* reduces x mod m, assumes 0 < x < m**2, mu is 
03309  * precomputed via mp_reduce_setup.
03310  * From HAC pp.604 Algorithm 14.42
03311  */
03312 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
03313 {
03314   mp_int  q;
03315   int     res, um = m->used;
03316 
03317   /* q = x */
03318   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
03319     return res;
03320   }
03321 
03322   /* q1 = x / b**(k-1)  */
03323   mp_rshd (&q, um - 1);         
03324 
03325   /* according to HAC this optimization is ok */
03326   if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
03327     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
03328       goto CLEANUP;
03329     }
03330   } else {
03331 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
03332     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03333       goto CLEANUP;
03334     }
03335 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
03336     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03337       goto CLEANUP;
03338     }
03339 #else 
03340     { 
03341       res = MP_VAL;
03342       goto CLEANUP;
03343     }
03344 #endif
03345   }
03346 
03347   /* q3 = q2 / b**(k+1) */
03348   mp_rshd (&q, um + 1);         
03349 
03350   /* x = x mod b**(k+1), quick (no division) */
03351   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
03352     goto CLEANUP;
03353   }
03354 
03355   /* q = q * m mod b**(k+1), quick (no division) */
03356   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
03357     goto CLEANUP;
03358   }
03359 
03360   /* x = x - q */
03361   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
03362     goto CLEANUP;
03363   }
03364 
03365   /* If x < 0, add b**(k+1) to it */
03366   if (mp_cmp_d (x, 0) == MP_LT) {
03367     mp_set (&q, 1);
03368     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
03369       goto CLEANUP;
03370     if ((res = mp_add (x, &q, x)) != MP_OKAY)
03371       goto CLEANUP;
03372   }
03373 
03374   /* Back off if it's too big */
03375   while (mp_cmp (x, m) != MP_LT) {
03376     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
03377       goto CLEANUP;
03378     }
03379   }
03380   
03381 CLEANUP:
03382   mp_clear (&q);
03383 
03384   return res;
03385 }
03386 
03387 
03388 /* reduces a modulo n where n is of the form 2**p - d 
03389    This differs from reduce_2k since "d" can be larger
03390    than a single digit.
03391 */
03392 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
03393 {
03394    mp_int q;
03395    int    p, res;
03396    
03397    if ((res = mp_init(&q)) != MP_OKAY) {
03398       return res;
03399    }
03400    
03401    p = mp_count_bits(n);    
03402 top:
03403    /* q = a/2**p, a = a mod 2**p */
03404    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
03405       goto ERR;
03406    }
03407    
03408    /* q = q * d */
03409    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 
03410       goto ERR;
03411    }
03412    
03413    /* a = a + q */
03414    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
03415       goto ERR;
03416    }
03417    
03418    if (mp_cmp_mag(a, n) != MP_LT) {
03419       s_mp_sub(a, n, a);
03420       goto top;
03421    }
03422    
03423 ERR:
03424    mp_clear(&q);
03425    return res;
03426 }
03427 
03428 
03429 /* determines the setup value */
03430 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
03431 {
03432    int    res;
03433    mp_int tmp;
03434    
03435    if ((res = mp_init(&tmp)) != MP_OKAY) {
03436       return res;
03437    }
03438    
03439    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
03440       goto ERR;
03441    }
03442    
03443    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
03444       goto ERR;
03445    }
03446    
03447 ERR:
03448    mp_clear(&tmp);
03449    return res;
03450 }
03451 
03452 
03453 /* multiplies |a| * |b| and does not compute the lower digs digits
03454  * [meant to get the higher part of the product]
03455  */
03456 int
03457 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03458 {
03459   mp_int  t;
03460   int     res, pa, pb, ix, iy;
03461   mp_digit u;
03462   mp_word r;
03463   mp_digit tmpx, *tmpt, *tmpy;
03464 
03465   /* can we use the fast multiplier? */
03466 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
03467   if (((a->used + b->used + 1) < MP_WARRAY)
03468       && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03469     return fast_s_mp_mul_high_digs (a, b, c, digs);
03470   }
03471 #endif
03472 
03473   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
03474     return res;
03475   }
03476   t.used = a->used + b->used + 1;
03477 
03478   pa = a->used;
03479   pb = b->used;
03480   for (ix = 0; ix < pa; ix++) {
03481     /* clear the carry */
03482     u = 0;
03483 
03484     /* left hand side of A[ix] * B[iy] */
03485     tmpx = a->dp[ix];
03486 
03487     /* alias to the address of where the digits will be stored */
03488     tmpt = &(t.dp[digs]);
03489 
03490     /* alias for where to read the right hand side from */
03491     tmpy = b->dp + (digs - ix);
03492 
03493     for (iy = digs - ix; iy < pb; iy++) {
03494       /* calculate the double precision result */
03495       r       = ((mp_word)*tmpt) +
03496                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03497                 ((mp_word) u);
03498 
03499       /* get the lower part */
03500       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03501 
03502       /* carry the carry */
03503       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03504     }
03505     *tmpt = u;
03506   }
03507   mp_clamp (&t);
03508   mp_exch (&t, c);
03509   mp_clear (&t);
03510   return MP_OKAY;
03511 }
03512 
03513 
03514 /* this is a modified version of fast_s_mul_digs that only produces
03515  * output digits *above* digs.  See the comments for fast_s_mul_digs
03516  * to see how it works.
03517  *
03518  * This is used in the Barrett reduction since for one of the multiplications
03519  * only the higher digits were needed.  This essentially halves the work.
03520  *
03521  * Based on Algorithm 14.12 on pp.595 of HAC.
03522  */
03523 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03524 {
03525   int     olduse, res, pa, ix, iz;
03526   mp_digit W[MP_WARRAY];
03527   mp_word  _W;
03528 
03529   /* grow the destination as required */
03530   pa = a->used + b->used;
03531   if (c->alloc < pa) {
03532     if ((res = mp_grow (c, pa)) != MP_OKAY) {
03533       return res;
03534     }
03535   }
03536 
03537   /* number of output digits to produce */
03538   pa = a->used + b->used;
03539   _W = 0;
03540   for (ix = digs; ix < pa; ix++) { 
03541       int      tx, ty, iy;
03542       mp_digit *tmpx, *tmpy;
03543 
03544       /* get offsets into the two bignums */
03545       ty = MIN(b->used-1, ix);
03546       tx = ix - ty;
03547 
03548       /* setup temp aliases */
03549       tmpx = a->dp + tx;
03550       tmpy = b->dp + ty;
03551 
03552       /* this is the number of times the loop will iterrate, essentially its 
03553          while (tx++ < a->used && ty-- >= 0) { ... }
03554        */
03555       iy = MIN(a->used-tx, ty+1);
03556 
03557       /* execute loop */
03558       for (iz = 0; iz < iy; iz++) {
03559          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03560       }
03561 
03562       /* store term */
03563       W[ix] = ((mp_digit)_W) & MP_MASK;
03564 
03565       /* make next carry */
03566       _W = _W >> ((mp_word)DIGIT_BIT);
03567   }
03568   
03569   /* setup dest */
03570   olduse  = c->used;
03571   c->used = pa;
03572 
03573   {
03574     register mp_digit *tmpc;
03575 
03576     tmpc = c->dp + digs;
03577     for (ix = digs; ix <= pa; ix++) {
03578       /* now extract the previous digit [below the carry] */
03579       *tmpc++ = W[ix];
03580     }
03581 
03582     /* clear unused digits [that existed in the old copy of c] */
03583     for (; ix < olduse; ix++) {
03584       *tmpc++ = 0;
03585     }
03586   }
03587   mp_clamp (c);
03588   return MP_OKAY;
03589 }
03590 
03591 
03592 #ifdef CYASSL_KEY_GEN
03593 
03594 int mp_cnt_lsb(mp_int *a);
03595 
03596 /* c = a * a (mod b) */
03597 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
03598 {
03599   int     res;
03600   mp_int  t;
03601 
03602   if ((res = mp_init (&t)) != MP_OKAY) {
03603     return res;
03604   }
03605 
03606   if ((res = mp_sqr (a, &t)) != MP_OKAY) {
03607     mp_clear (&t);
03608     return res;
03609   }
03610   res = mp_mod (&t, b, c);
03611   mp_clear (&t);
03612   return res;
03613 }
03614 
03615 
03616 /* single digit addition */
03617 int mp_add_d (mp_int * a, mp_digit b, mp_int * c)
03618 {
03619   int     res, ix, oldused;
03620   mp_digit *tmpa, *tmpc, mu;
03621 
03622   /* grow c as required */
03623   if (c->alloc < a->used + 1) {
03624      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03625         return res;
03626      }
03627   }
03628 
03629   /* if a is negative and |a| >= b, call c = |a| - b */
03630   if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
03631      /* temporarily fix sign of a */
03632      a->sign = MP_ZPOS;
03633 
03634      /* c = |a| - b */
03635      res = mp_sub_d(a, b, c);
03636 
03637      /* fix sign  */
03638      a->sign = c->sign = MP_NEG;
03639 
03640      /* clamp */
03641      mp_clamp(c);
03642 
03643      return res;
03644   }
03645 
03646   /* old number of used digits in c */
03647   oldused = c->used;
03648 
03649   /* sign always positive */
03650   c->sign = MP_ZPOS;
03651 
03652   /* source alias */
03653   tmpa    = a->dp;
03654 
03655   /* destination alias */
03656   tmpc    = c->dp;
03657 
03658   /* if a is positive */
03659   if (a->sign == MP_ZPOS) {
03660      /* add digit, after this we're propagating
03661       * the carry.
03662       */
03663      *tmpc   = *tmpa++ + b;
03664      mu      = *tmpc >> DIGIT_BIT;
03665      *tmpc++ &= MP_MASK;
03666 
03667      /* now handle rest of the digits */
03668      for (ix = 1; ix < a->used; ix++) {
03669         *tmpc   = *tmpa++ + mu;
03670         mu      = *tmpc >> DIGIT_BIT;
03671         *tmpc++ &= MP_MASK;
03672      }
03673      /* set final carry */
03674      ix++;
03675      *tmpc++  = mu;
03676 
03677      /* setup size */
03678      c->used = a->used + 1;
03679   } else {
03680      /* a was negative and |a| < b */
03681      c->used  = 1;
03682 
03683      /* the result is a single digit */
03684      if (a->used == 1) {
03685         *tmpc++  =  b - a->dp[0];
03686      } else {
03687         *tmpc++  =  b;
03688      }
03689 
03690      /* setup count so the clearing of oldused
03691       * can fall through correctly
03692       */
03693      ix       = 1;
03694   }
03695 
03696   /* now zero to oldused */
03697   while (ix++ < oldused) {
03698      *tmpc++ = 0;
03699   }
03700   mp_clamp(c);
03701 
03702   return MP_OKAY;
03703 }
03704 
03705 
03706 /* single digit subtraction */
03707 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
03708 {
03709   mp_digit *tmpa, *tmpc, mu;
03710   int       res, ix, oldused;
03711 
03712   /* grow c as required */
03713   if (c->alloc < a->used + 1) {
03714      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
03715         return res;
03716      }
03717   }
03718 
03719   /* if a is negative just do an unsigned
03720    * addition [with fudged signs]
03721    */
03722   if (a->sign == MP_NEG) {
03723      a->sign = MP_ZPOS;
03724      res     = mp_add_d(a, b, c);
03725      a->sign = c->sign = MP_NEG;
03726 
03727      /* clamp */
03728      mp_clamp(c);
03729 
03730      return res;
03731   }
03732 
03733   /* setup regs */
03734   oldused = c->used;
03735   tmpa    = a->dp;
03736   tmpc    = c->dp;
03737 
03738   /* if a <= b simply fix the single digit */
03739   if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
03740      if (a->used == 1) {
03741         *tmpc++ = b - *tmpa;
03742      } else {
03743         *tmpc++ = b;
03744      }
03745      ix      = 1;
03746 
03747      /* negative/1digit */
03748      c->sign = MP_NEG;
03749      c->used = 1;
03750   } else {
03751      /* positive/size */
03752      c->sign = MP_ZPOS;
03753      c->used = a->used;
03754 
03755      /* subtract first digit */
03756      *tmpc    = *tmpa++ - b;
03757      mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03758      *tmpc++ &= MP_MASK;
03759 
03760      /* handle rest of the digits */
03761      for (ix = 1; ix < a->used; ix++) {
03762         *tmpc    = *tmpa++ - mu;
03763         mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
03764         *tmpc++ &= MP_MASK;
03765      }
03766   }
03767 
03768   /* zero excess digits */
03769   while (ix++ < oldused) {
03770      *tmpc++ = 0;
03771   }
03772   mp_clamp(c);
03773   return MP_OKAY;
03774 }
03775 
03776 
03777 static int s_is_power_of_two(mp_digit b, int *p)
03778 {
03779    int x;
03780 
03781    /* fast return if no power of two */
03782    if ((b==0) || (b & (b-1))) {
03783       return 0;
03784    }
03785 
03786    for (x = 0; x < DIGIT_BIT; x++) {
03787       if (b == (((mp_digit)1)<<x)) {
03788          *p = x;
03789          return 1;
03790       }
03791    }
03792    return 0;
03793 }
03794 
03795 /* single digit division (based on routine from MPI) */
03796 int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
03797 {
03798   mp_int  q;
03799   mp_word w;
03800   mp_digit t;
03801   int     res, ix;
03802 
03803   /* cannot divide by zero */
03804   if (b == 0) {
03805      return MP_VAL;
03806   }
03807 
03808   /* quick outs */
03809   if (b == 1 || mp_iszero(a) == 1) {
03810      if (d != NULL) {
03811         *d = 0;
03812      }
03813      if (c != NULL) {
03814         return mp_copy(a, c);
03815      }
03816      return MP_OKAY;
03817   }
03818 
03819   /* power of two ? */
03820   if (s_is_power_of_two(b, &ix) == 1) {
03821      if (d != NULL) {
03822         *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
03823      }
03824      if (c != NULL) {
03825         return mp_div_2d(a, ix, c, NULL);
03826      }
03827      return MP_OKAY;
03828   }
03829 
03830 #ifdef BN_MP_DIV_3_C
03831   /* three? */
03832   if (b == 3) {
03833      return mp_div_3(a, c, d);
03834   }
03835 #endif
03836 
03837   /* no easy answer [c'est la vie].  Just division */
03838   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
03839      return res;
03840   }
03841   
03842   q.used = a->used;
03843   q.sign = a->sign;
03844   w = 0;
03845   for (ix = a->used - 1; ix >= 0; ix--) {
03846      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
03847      
03848      if (w >= b) {
03849         t = (mp_digit)(w / b);
03850         w -= ((mp_word)t) * ((mp_word)b);
03851       } else {
03852         t = 0;
03853       }
03854       q.dp[ix] = (mp_digit)t;
03855   }
03856   
03857   if (d != NULL) {
03858      *d = (mp_digit)w;
03859   }
03860   
03861   if (c != NULL) {
03862      mp_clamp(&q);
03863      mp_exch(&q, c);
03864   }
03865   mp_clear(&q);
03866   
03867   return res;
03868 }
03869 
03870 
03871 int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
03872 {
03873   return mp_div_d(a, b, NULL, c);
03874 }
03875 
03876 
03877 const mp_digit ltm_prime_tab[] = {
03878   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
03879   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
03880   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
03881   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
03882 #ifndef MP_8BIT
03883   0x0083,
03884   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
03885   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
03886   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
03887   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
03888 
03889   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
03890   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
03891   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
03892   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
03893   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
03894   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
03895   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
03896   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
03897 
03898   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
03899   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
03900   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
03901   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
03902   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
03903   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
03904   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
03905   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
03906 
03907   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
03908   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
03909   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
03910   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
03911   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
03912   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
03913   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
03914   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
03915 #endif
03916 };
03917 
03918 
03919 /* Miller-Rabin test of "a" to the base of "b" as described in 
03920  * HAC pp. 139 Algorithm 4.24
03921  *
03922  * Sets result to 0 if definitely composite or 1 if probably prime.
03923  * Randomly the chance of error is no more than 1/4 and often 
03924  * very much lower.
03925  */
03926 int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
03927 {
03928   mp_int  n1, y, r;
03929   int     s, j, err;
03930 
03931   /* default */
03932   *result = MP_NO;
03933 
03934   /* ensure b > 1 */
03935   if (mp_cmp_d(b, 1) != MP_GT) {
03936      return MP_VAL;
03937   }     
03938 
03939   /* get n1 = a - 1 */
03940   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
03941     return err;
03942   }
03943   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
03944     goto LBL_N1;
03945   }
03946 
03947   /* set 2**s * r = n1 */
03948   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
03949     goto LBL_N1;
03950   }
03951 
03952   /* count the number of least significant bits
03953    * which are zero
03954    */
03955   s = mp_cnt_lsb(&r);
03956 
03957   /* now divide n - 1 by 2**s */
03958   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
03959     goto LBL_R;
03960   }
03961 
03962   /* compute y = b**r mod a */
03963   if ((err = mp_init (&y)) != MP_OKAY) {
03964     goto LBL_R;
03965   }
03966   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
03967     goto LBL_Y;
03968   }
03969 
03970   /* if y != 1 and y != n1 do */
03971   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
03972     j = 1;
03973     /* while j <= s-1 and y != n1 */
03974     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
03975       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
03976          goto LBL_Y;
03977       }
03978 
03979       /* if y == 1 then composite */
03980       if (mp_cmp_d (&y, 1) == MP_EQ) {
03981          goto LBL_Y;
03982       }
03983 
03984       ++j;
03985     }
03986 
03987     /* if y != n1 then composite */
03988     if (mp_cmp (&y, &n1) != MP_EQ) {
03989       goto LBL_Y;
03990     }
03991   }
03992 
03993   /* probably prime now */
03994   *result = MP_YES;
03995 LBL_Y:mp_clear (&y);
03996 LBL_R:mp_clear (&r);
03997 LBL_N1:mp_clear (&n1);
03998   return err;
03999 }
04000 
04001 
04002 /* determines if an integers is divisible by one 
04003  * of the first PRIME_SIZE primes or not
04004  *
04005  * sets result to 0 if not, 1 if yes
04006  */
04007 int mp_prime_is_divisible (mp_int * a, int *result)
04008 {
04009   int     err, ix;
04010   mp_digit res;
04011 
04012   /* default to not */
04013   *result = MP_NO;
04014 
04015   for (ix = 0; ix < PRIME_SIZE; ix++) {
04016     /* what is a mod LBL_prime_tab[ix] */
04017     if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
04018       return err;
04019     }
04020 
04021     /* is the residue zero? */
04022     if (res == 0) {
04023       *result = MP_YES;
04024       return MP_OKAY;
04025     }
04026   }
04027 
04028   return MP_OKAY;
04029 }
04030 
04031 
04032 /*
04033  * Sets result to 1 if probably prime, 0 otherwise
04034  */
04035 int mp_prime_is_prime (mp_int * a, int t, int *result)
04036 {
04037   mp_int  b;
04038   int     ix, err, res;
04039 
04040   /* default to no */
04041   *result = MP_NO;
04042 
04043   /* valid value of t? */
04044   if (t <= 0 || t > PRIME_SIZE) {
04045     return MP_VAL;
04046   }
04047 
04048   /* is the input equal to one of the primes in the table? */
04049   for (ix = 0; ix < PRIME_SIZE; ix++) {
04050       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
04051          *result = 1;
04052          return MP_OKAY;
04053       }
04054   }
04055 
04056   /* first perform trial division */
04057   if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
04058     return err;
04059   }
04060 
04061   /* return if it was trivially divisible */
04062   if (res == MP_YES) {
04063     return MP_OKAY;
04064   }
04065 
04066   /* now perform the miller-rabin rounds */
04067   if ((err = mp_init (&b)) != MP_OKAY) {
04068     return err;
04069   }
04070 
04071   for (ix = 0; ix < t; ix++) {
04072     /* set the prime */
04073     mp_set (&b, ltm_prime_tab[ix]);
04074 
04075     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
04076       goto LBL_B;
04077     }
04078 
04079     if (res == MP_NO) {
04080       goto LBL_B;
04081     }
04082   }
04083 
04084   /* passed the test */
04085   *result = MP_YES;
04086 LBL_B:mp_clear (&b);
04087   return err;
04088 }
04089 
04090 
04091 /* computes least common multiple as |a*b|/(a, b) */
04092 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
04093 {
04094   int     res;
04095   mp_int  t1, t2;
04096 
04097 
04098   if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
04099     return res;
04100   }
04101 
04102   /* t1 = get the GCD of the two inputs */
04103   if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
04104     goto LBL_T;
04105   }
04106 
04107   /* divide the smallest by the GCD */
04108   if (mp_cmp_mag(a, b) == MP_LT) {
04109      /* store quotient in t2 such that t2 * b is the LCM */
04110      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
04111         goto LBL_T;
04112      }
04113      res = mp_mul(b, &t2, c);
04114   } else {
04115      /* store quotient in t2 such that t2 * a is the LCM */
04116      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
04117         goto LBL_T;
04118      }
04119      res = mp_mul(a, &t2, c);
04120   }
04121 
04122   /* fix the sign to positive */
04123   c->sign = MP_ZPOS;
04124 
04125 LBL_T:
04126   mp_clear(&t1);
04127   mp_clear(&t2);
04128   return res;
04129 }
04130 
04131 
04132 static const int lnz[16] = { 
04133    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
04134 };
04135 
04136 /* Counts the number of lsbs which are zero before the first zero bit */
04137 int mp_cnt_lsb(mp_int *a)
04138 {
04139     int x;
04140     mp_digit q, qq;
04141 
04142     /* easy out */
04143     if (mp_iszero(a) == 1) {
04144         return 0;
04145     }
04146 
04147     /* scan lower digits until non-zero */
04148     for (x = 0; x < a->used && a->dp[x] == 0; x++);
04149     q = a->dp[x];
04150     x *= DIGIT_BIT;
04151 
04152     /* now scan this digit until a 1 is found */
04153     if ((q & 1) == 0) {
04154         do {
04155             qq  = q & 15;
04156             x  += lnz[qq];
04157             q >>= 4;
04158         } while (qq == 0);
04159     }
04160     return x;
04161 }
04162 
04163 
04164 /* Greatest Common Divisor using the binary method */
04165 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
04166 {
04167     mp_int  u, v;
04168     int     k, u_lsb, v_lsb, res;
04169 
04170     /* either zero than gcd is the largest */
04171     if (mp_iszero (a) == MP_YES) {
04172         return mp_abs (b, c);
04173     }
04174     if (mp_iszero (b) == MP_YES) {
04175         return mp_abs (a, c);
04176     }
04177 
04178     /* get copies of a and b we can modify */
04179     if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
04180         return res;
04181     }
04182 
04183     if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
04184         goto LBL_U;
04185     }
04186 
04187     /* must be positive for the remainder of the algorithm */
04188     u.sign = v.sign = MP_ZPOS;
04189 
04190     /* B1.  Find the common power of two for u and v */
04191     u_lsb = mp_cnt_lsb(&u);
04192     v_lsb = mp_cnt_lsb(&v);
04193     k     = MIN(u_lsb, v_lsb);
04194 
04195     if (k > 0) {
04196         /* divide the power of two out */
04197         if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
04198             goto LBL_V;
04199         }
04200 
04201         if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
04202             goto LBL_V;
04203         }
04204     }
04205 
04206     /* divide any remaining factors of two out */
04207     if (u_lsb != k) {
04208         if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
04209             goto LBL_V;
04210         }
04211     }
04212 
04213     if (v_lsb != k) {
04214         if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
04215             goto LBL_V;
04216         }
04217     }
04218 
04219     while (mp_iszero(&v) == 0) {
04220         /* make sure v is the largest */
04221         if (mp_cmp_mag(&u, &v) == MP_GT) {
04222             /* swap u and v to make sure v is >= u */
04223             mp_exch(&u, &v);
04224         }
04225      
04226         /* subtract smallest from largest */
04227         if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
04228             goto LBL_V;
04229         }
04230      
04231         /* Divide out all factors of two */
04232         if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
04233             goto LBL_V;
04234         } 
04235     } 
04236 
04237     /* multiply by 2**k which we divided out at the beginning */
04238     if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
04239         goto LBL_V;
04240     }
04241     c->sign = MP_ZPOS;
04242     res = MP_OKAY;
04243 LBL_V:mp_clear (&u);
04244 LBL_U:mp_clear (&v);
04245     return res;
04246 }
04247 
04248 
04249 /* set a 32-bit const */
04250 int mp_set_int (mp_int * a, unsigned long b)
04251 {
04252   int     x, res;
04253 
04254   mp_zero (a);
04255   
04256   /* set four bits at a time */
04257   for (x = 0; x < 8; x++) {
04258     /* shift the number up four bits */
04259     if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
04260       return res;
04261     }
04262 
04263     /* OR in the top four bits of the source */
04264     a->dp[0] |= (b >> 28) & 15;
04265 
04266     /* shift the source up to the next four bits */
04267     b <<= 4;
04268 
04269     /* ensure that digits are not clamped off */
04270     a->used += 1;
04271   }
04272   mp_clamp (a);
04273   return MP_OKAY;
04274 }
04275 
04276 
04277 #endif /* CYASSL_KEY_GEN */
04278 
04279 #endif /* USE_FAST_MATH */
04280