ssh lib

Dependents:   OS

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers integer.c Source File

integer.c

00001 /* integer.c
00002  *
00003  * Copyright (C) 2006-2017 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 
00024 /*
00025  * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
00026  * http://math.libtomcrypt.com
00027  */
00028 
00029 
00030 #ifdef HAVE_CONFIG_H
00031     #include <config.h>
00032 #endif
00033 
00034 /* in case user set USE_FAST_MATH there */
00035 #include <wolfcrypt/settings.h>
00036 
00037 #ifdef NO_INLINE
00038     #include <wolfcrypt/misc.h>
00039 #else
00040     #define WOLFSSL_MISC_INCLUDED
00041     #include <wolfcrypt/src/misc.c>
00042 #endif
00043 
00044 #ifndef NO_BIG_INT
00045 
00046 #ifndef USE_FAST_MATH
00047 
00048 #ifndef WOLFSSL_SP_MATH
00049 
00050 #include <wolfcrypt/integer.h>
00051 
00052 #if defined(FREESCALE_LTC_TFM)
00053     #include <wolfcrypt/port/nxp/ksdk_port.h>
00054 #endif
00055 #ifdef WOLFSSL_DEBUG_MATH
00056     #include <stdio.h>
00057 #endif
00058 
00059 #ifndef NO_WOLFSSL_SMALL_STACK
00060     #ifndef WOLFSSL_SMALL_STACK
00061         #define WOLFSSL_SMALL_STACK
00062     #endif
00063 #endif
00064 
00065 #ifdef SHOW_GEN
00066     #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
00067         #if MQX_USE_IO_OLD
00068             #include <fio.h>
00069         #else
00070             #include <nio.h>
00071         #endif
00072     #else
00073         #include <stdio.h>
00074     #endif
00075 #endif
00076 
00077 /* reverse an array, used for radix code */
00078 static void
00079 bn_reverse (unsigned char *s, int len)
00080 {
00081     int     ix, iy;
00082     unsigned char t;
00083 
00084     ix = 0;
00085     iy = len - 1;
00086     while (ix < iy) {
00087         t     = s[ix];
00088         s[ix] = s[iy];
00089         s[iy] = t;
00090         ++ix;
00091         --iy;
00092     }
00093 }
00094 
00095 /* math settings check */
00096 word32 CheckRunTimeSettings(void)
00097 {
00098     return CTC_SETTINGS;
00099 }
00100 
00101 
00102 /* handle up to 6 inits */
00103 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
00104                   mp_int* f)
00105 {
00106     int res = MP_OKAY;
00107 
00108     if (a) XMEMSET(a, 0, sizeof(mp_int));
00109     if (b) XMEMSET(b, 0, sizeof(mp_int));
00110     if (c) XMEMSET(c, 0, sizeof(mp_int));
00111     if (d) XMEMSET(d, 0, sizeof(mp_int));
00112     if (e) XMEMSET(e, 0, sizeof(mp_int));
00113     if (f) XMEMSET(f, 0, sizeof(mp_int));
00114 
00115     if (a && ((res = mp_init(a)) != MP_OKAY))
00116         return res;
00117 
00118     if (b && ((res = mp_init(b)) != MP_OKAY)) {
00119         mp_clear(a);
00120         return res;
00121     }
00122 
00123     if (c && ((res = mp_init(c)) != MP_OKAY)) {
00124         mp_clear(a); mp_clear(b);
00125         return res;
00126     }
00127 
00128     if (d && ((res = mp_init(d)) != MP_OKAY)) {
00129         mp_clear(a); mp_clear(b); mp_clear(c);
00130         return res;
00131     }
00132 
00133     if (e && ((res = mp_init(e)) != MP_OKAY)) {
00134         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
00135         return res;
00136     }
00137 
00138     if (f && ((res = mp_init(f)) != MP_OKAY)) {
00139         mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
00140         return res;
00141     }
00142 
00143     return res;
00144 }
00145 
00146 
00147 /* init a new mp_int */
00148 int mp_init (mp_int * a)
00149 {
00150   /* Safeguard against passing in a null pointer */
00151   if (a == NULL)
00152     return MP_VAL;
00153 
00154   /* defer allocation until mp_grow */
00155   a->dp = NULL;
00156 
00157   /* set the used to zero, allocated digits to the default precision
00158    * and sign to positive */
00159   a->used  = 0;
00160   a->alloc = 0;
00161   a->sign  = MP_ZPOS;
00162 #ifdef HAVE_WOLF_BIGINT
00163   wc_bigint_init(&a->raw);
00164 #endif
00165 
00166   return MP_OKAY;
00167 }
00168 
00169 
00170 /* clear one (frees)  */
00171 void mp_clear (mp_int * a)
00172 {
00173   int i;
00174 
00175   if (a == NULL)
00176       return;
00177 
00178   /* only do anything if a hasn't been freed previously */
00179   if (a->dp != NULL) {
00180     /* first zero the digits */
00181     for (i = 0; i < a->used; i++) {
00182         a->dp[i] = 0;
00183     }
00184 
00185     /* free ram */
00186     mp_free(a);
00187 
00188     /* reset members to make debugging easier */
00189     a->alloc = a->used = 0;
00190     a->sign  = MP_ZPOS;
00191   }
00192 }
00193 
00194 void mp_free (mp_int * a)
00195 {
00196   /* only do anything if a hasn't been freed previously */
00197   if (a->dp != NULL) {
00198     /* free ram */
00199     XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
00200     a->dp    = NULL;
00201   }
00202 
00203 #ifdef HAVE_WOLF_BIGINT
00204   wc_bigint_free(&a->raw);
00205 #endif
00206 }
00207 
00208 void mp_forcezero(mp_int * a)
00209 {
00210     if (a == NULL)
00211         return;
00212 
00213     /* only do anything if a hasn't been freed previously */
00214     if (a->dp != NULL) {
00215       /* force zero the used digits */
00216       ForceZero(a->dp, a->used * sizeof(mp_digit));
00217 #ifdef HAVE_WOLF_BIGINT
00218       wc_bigint_zero(&a->raw);
00219 #endif
00220       /* free ram */
00221       mp_free(a);
00222 
00223       /* reset members to make debugging easier */
00224       a->alloc = a->used = 0;
00225       a->sign  = MP_ZPOS;
00226     }
00227 
00228     a->sign = MP_ZPOS;
00229     a->used = 0;
00230 }
00231 
00232 
00233 /* get the size for an unsigned equivalent */
00234 int mp_unsigned_bin_size (mp_int * a)
00235 {
00236   int     size = mp_count_bits (a);
00237   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
00238 }
00239 
00240 
00241 /* returns the number of bits in an int */
00242 int mp_count_bits (mp_int * a)
00243 {
00244   int     r;
00245   mp_digit q;
00246 
00247   /* shortcut */
00248   if (a->used == 0) {
00249     return 0;
00250   }
00251 
00252   /* get number of digits and add that */
00253   r = (a->used - 1) * DIGIT_BIT;
00254 
00255   /* take the last digit and count the bits in it */
00256   q = a->dp[a->used - 1];
00257   while (q > ((mp_digit) 0)) {
00258     ++r;
00259     q >>= ((mp_digit) 1);
00260   }
00261   return r;
00262 }
00263 
00264 
00265 int mp_leading_bit (mp_int * a)
00266 {
00267     int bit = 0;
00268     mp_int t;
00269 
00270     if (mp_init_copy(&t, a) != MP_OKAY)
00271         return 0;
00272 
00273     while (mp_iszero(&t) == MP_NO) {
00274 #ifndef MP_8BIT
00275         bit = (t.dp[0] & 0x80) != 0;
00276 #else
00277         bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
00278 #endif
00279         if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
00280             break;
00281     }
00282     mp_clear(&t);
00283     return bit;
00284 }
00285 
00286 int mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b)
00287 {
00288   int res = 0;
00289   while (mp_iszero(t) == MP_NO) {
00290 #ifndef MP_8BIT
00291       b[x++] = (unsigned char) (t->dp[0] & 255);
00292 #else
00293       b[x++] = (unsigned char) (t->dp[0] | ((t->dp[1] & 0x01) << 7));
00294 #endif
00295     if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
00296       return res;
00297     }
00298     res = x;
00299   }
00300   return res;
00301 }
00302 
00303 /* store in unsigned [big endian] format */
00304 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
00305 {
00306   int     x, res;
00307   mp_int  t;
00308 
00309   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
00310     return res;
00311   }
00312 
00313   x = mp_to_unsigned_bin_at_pos(0, &t, b);
00314   if (x < 0) {
00315     mp_clear(&t);
00316     return x;
00317   }
00318 
00319   bn_reverse (b, x);
00320   mp_clear (&t);
00321   return res;
00322 }
00323 
00324 
00325 /* creates "a" then copies b into it */
00326 int mp_init_copy (mp_int * a, mp_int * b)
00327 {
00328   int     res;
00329 
00330   if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
00331     return res;
00332   }
00333 
00334   if((res = mp_copy (b, a)) != MP_OKAY) {
00335     mp_clear(a);
00336   }
00337 
00338   return res;
00339 }
00340 
00341 
00342 /* copy, b = a */
00343 int mp_copy (mp_int * a, mp_int * b)
00344 {
00345   int     res, n;
00346 
00347   /* Safeguard against passing in a null pointer */
00348   if (a == NULL || b == NULL)
00349     return MP_VAL;
00350 
00351   /* if dst == src do nothing */
00352   if (a == b) {
00353     return MP_OKAY;
00354   }
00355 
00356   /* grow dest */
00357   if (b->alloc < a->used || b->alloc == 0) {
00358      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
00359         return res;
00360      }
00361   }
00362 
00363   /* zero b and copy the parameters over */
00364   {
00365     mp_digit *tmpa, *tmpb;
00366 
00367     /* pointer aliases */
00368 
00369     /* source */
00370     tmpa = a->dp;
00371 
00372     /* destination */
00373     tmpb = b->dp;
00374 
00375     /* copy all the digits */
00376     for (n = 0; n < a->used; n++) {
00377       *tmpb++ = *tmpa++;
00378     }
00379 
00380     /* clear high digits */
00381     for (; n < b->used && b->dp; n++) {
00382       *tmpb++ = 0;
00383     }
00384   }
00385 
00386   /* copy used count and sign */
00387   b->used = a->used;
00388   b->sign = a->sign;
00389   return MP_OKAY;
00390 }
00391 
00392 
00393 /* grow as required */
00394 int mp_grow (mp_int * a, int size)
00395 {
00396   int     i;
00397   mp_digit *tmp;
00398 
00399   /* if the alloc size is smaller alloc more ram */
00400   if (a->alloc < size || size == 0) {
00401     /* ensure there are always at least MP_PREC digits extra on top */
00402     size += (MP_PREC * 2) - (size % MP_PREC);
00403 
00404     /* reallocate the array a->dp
00405      *
00406      * We store the return in a temporary variable
00407      * in case the operation failed we don't want
00408      * to overwrite the dp member of a.
00409      */
00410     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
00411                                                            DYNAMIC_TYPE_BIGINT);
00412     if (tmp == NULL) {
00413       /* reallocation failed but "a" is still valid [can be freed] */
00414       return MP_MEM;
00415     }
00416 
00417     /* reallocation succeeded so set a->dp */
00418     a->dp = tmp;
00419 
00420     /* zero excess digits */
00421     i        = a->alloc;
00422     a->alloc = size;
00423     for (; i < a->alloc; i++) {
00424       a->dp[i] = 0;
00425     }
00426   }
00427   return MP_OKAY;
00428 }
00429 
00430 
00431 /* shift right by a certain bit count (store quotient in c, optional
00432    remainder in d) */
00433 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
00434 {
00435   int     D, res;
00436   mp_int  t;
00437 
00438 
00439   /* if the shift count is <= 0 then we do no work */
00440   if (b <= 0) {
00441     res = mp_copy (a, c);
00442     if (d != NULL) {
00443       mp_zero (d);
00444     }
00445     return res;
00446   }
00447 
00448   if ((res = mp_init (&t)) != MP_OKAY) {
00449     return res;
00450   }
00451 
00452   /* get the remainder */
00453   if (d != NULL) {
00454     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
00455       mp_clear (&t);
00456       return res;
00457     }
00458   }
00459 
00460   /* copy */
00461   if ((res = mp_copy (a, c)) != MP_OKAY) {
00462     mp_clear (&t);
00463     return res;
00464   }
00465 
00466   /* shift by as many digits in the bit count */
00467   if (b >= (int)DIGIT_BIT) {
00468     mp_rshd (c, b / DIGIT_BIT);
00469   }
00470 
00471   /* shift any bit count < DIGIT_BIT */
00472   D = (b % DIGIT_BIT);
00473   if (D != 0) {
00474     mp_rshb(c, D);
00475   }
00476   mp_clamp (c);
00477   if (d != NULL) {
00478     mp_exch (&t, d);
00479   }
00480   mp_clear (&t);
00481   return MP_OKAY;
00482 }
00483 
00484 
00485 /* set to zero */
00486 void mp_zero (mp_int * a)
00487 {
00488   int       n;
00489   mp_digit *tmp;
00490 
00491   if (a == NULL)
00492       return;
00493 
00494   a->sign = MP_ZPOS;
00495   a->used = 0;
00496 
00497   tmp = a->dp;
00498   for (n = 0; n < a->alloc; n++) {
00499      *tmp++ = 0;
00500   }
00501 }
00502 
00503 
00504 /* trim unused digits
00505  *
00506  * This is used to ensure that leading zero digits are
00507  * trimmed and the leading "used" digit will be non-zero
00508  * Typically very fast.  Also fixes the sign if there
00509  * are no more leading digits
00510  */
00511 void mp_clamp (mp_int * a)
00512 {
00513   /* decrease used while the most significant digit is
00514    * zero.
00515    */
00516   while (a->used > 0 && a->dp[a->used - 1] == 0) {
00517     --(a->used);
00518   }
00519 
00520   /* reset the sign flag if used == 0 */
00521   if (a->used == 0) {
00522     a->sign = MP_ZPOS;
00523   }
00524 }
00525 
00526 
00527 /* swap the elements of two integers, for cases where you can't simply swap the
00528  * mp_int pointers around
00529  */
00530 void mp_exch (mp_int * a, mp_int * b)
00531 {
00532   mp_int  t;
00533 
00534   t  = *a;
00535   *a = *b;
00536   *b = t;
00537 }
00538 
00539 
00540 /* shift right a certain number of bits */
00541 void mp_rshb (mp_int *c, int x)
00542 {
00543     mp_digit *tmpc, mask, shift;
00544     mp_digit r, rr;
00545     mp_digit D = x;
00546 
00547     /* mask */
00548     mask = (((mp_digit)1) << D) - 1;
00549 
00550     /* shift for lsb */
00551     shift = DIGIT_BIT - D;
00552 
00553     /* alias */
00554     tmpc = c->dp + (c->used - 1);
00555 
00556     /* carry */
00557     r = 0;
00558     for (x = c->used - 1; x >= 0; x--) {
00559       /* get the lower  bits of this word in a temp */
00560       rr = *tmpc & mask;
00561 
00562       /* shift the current word and mix in the carry bits from previous word */
00563       *tmpc = (*tmpc >> D) | (r << shift);
00564       --tmpc;
00565 
00566       /* set the carry to the carry bits of the current word found above */
00567       r = rr;
00568     }
00569     mp_clamp(c);
00570 }
00571 
00572 
00573 /* shift right a certain amount of digits */
00574 void mp_rshd (mp_int * a, int b)
00575 {
00576   int     x;
00577 
00578   /* if b <= 0 then ignore it */
00579   if (b <= 0) {
00580     return;
00581   }
00582 
00583   /* if b > used then simply zero it and return */
00584   if (a->used <= b) {
00585     mp_zero (a);
00586     return;
00587   }
00588 
00589   {
00590     mp_digit *bottom, *top;
00591 
00592     /* shift the digits down */
00593 
00594     /* bottom */
00595     bottom = a->dp;
00596 
00597     /* top [offset into digits] */
00598     top = a->dp + b;
00599 
00600     /* this is implemented as a sliding window where
00601      * the window is b-digits long and digits from
00602      * the top of the window are copied to the bottom
00603      *
00604      * e.g.
00605 
00606      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
00607                  /\                   |      ---->
00608                   \-------------------/      ---->
00609      */
00610     for (x = 0; x < (a->used - b); x++) {
00611       *bottom++ = *top++;
00612     }
00613 
00614     /* zero the top digits */
00615     for (; x < a->used; x++) {
00616       *bottom++ = 0;
00617     }
00618   }
00619 
00620   /* remove excess digits */
00621   a->used -= b;
00622 }
00623 
00624 
00625 /* calc a value mod 2**b */
00626 int mp_mod_2d (mp_int * a, int b, mp_int * c)
00627 {
00628   int     x, res;
00629 
00630   /* if b is <= 0 then zero the int */
00631   if (b <= 0) {
00632     mp_zero (c);
00633     return MP_OKAY;
00634   }
00635 
00636   /* if the modulus is larger than the value than return */
00637   if (b >= (int) (a->used * DIGIT_BIT)) {
00638     res = mp_copy (a, c);
00639     return res;
00640   }
00641 
00642   /* copy */
00643   if ((res = mp_copy (a, c)) != MP_OKAY) {
00644     return res;
00645   }
00646 
00647   /* zero digits above the last digit of the modulus */
00648   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00649     c->dp[x] = 0;
00650   }
00651   /* clear the digit that is not completely outside/inside the modulus */
00652   c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
00653               (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
00654   mp_clamp (c);
00655   return MP_OKAY;
00656 }
00657 
00658 
00659 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
00660 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
00661 {
00662   int     res;
00663 
00664   /* make sure there are at least two digits */
00665   if (a->alloc < 2) {
00666      if ((res = mp_grow(a, 2)) != MP_OKAY) {
00667         return res;
00668      }
00669   }
00670 
00671   /* zero the int */
00672   mp_zero (a);
00673 
00674   /* read the bytes in */
00675   while (c-- > 0) {
00676     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
00677       return res;
00678     }
00679 
00680 #ifndef MP_8BIT
00681       a->dp[0] |= *b++;
00682       a->used += 1;
00683 #else
00684       a->dp[0] = (*b & MP_MASK);
00685       a->dp[1] |= ((*b++ >> 7U) & 1);
00686       a->used += 2;
00687 #endif
00688   }
00689   mp_clamp (a);
00690   return MP_OKAY;
00691 }
00692 
00693 
00694 /* shift left by a certain bit count */
00695 int mp_mul_2d (mp_int * a, int b, mp_int * c)
00696 {
00697   mp_digit d;
00698   int      res;
00699 
00700   /* copy */
00701   if (a != c) {
00702      if ((res = mp_copy (a, c)) != MP_OKAY) {
00703        return res;
00704      }
00705   }
00706 
00707   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
00708      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
00709        return res;
00710      }
00711   }
00712 
00713   /* shift by as many digits in the bit count */
00714   if (b >= (int)DIGIT_BIT) {
00715     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
00716       return res;
00717     }
00718   }
00719 
00720   /* shift any bit count < DIGIT_BIT */
00721   d = (mp_digit) (b % DIGIT_BIT);
00722   if (d != 0) {
00723     mp_digit *tmpc, shift, mask, r, rr;
00724     int x;
00725 
00726     /* bitmask for carries */
00727     mask = (((mp_digit)1) << d) - 1;
00728 
00729     /* shift for msbs */
00730     shift = DIGIT_BIT - d;
00731 
00732     /* alias */
00733     tmpc = c->dp;
00734 
00735     /* carry */
00736     r    = 0;
00737     for (x = 0; x < c->used; x++) {
00738       /* get the higher bits of the current word */
00739       rr = (*tmpc >> shift) & mask;
00740 
00741       /* shift the current word and OR in the carry */
00742       *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK);
00743       ++tmpc;
00744 
00745       /* set the carry to the carry bits of the current word */
00746       r = rr;
00747     }
00748 
00749     /* set final carry */
00750     if (r != 0) {
00751        c->dp[(c->used)++] = r;
00752     }
00753   }
00754   mp_clamp (c);
00755   return MP_OKAY;
00756 }
00757 
00758 
00759 /* shift left a certain amount of digits */
00760 int mp_lshd (mp_int * a, int b)
00761 {
00762   int     x, res;
00763 
00764   /* if its less than zero return */
00765   if (b <= 0) {
00766     return MP_OKAY;
00767   }
00768 
00769   /* grow to fit the new digits */
00770   if (a->alloc < a->used + b) {
00771      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
00772        return res;
00773      }
00774   }
00775 
00776   {
00777     mp_digit *top, *bottom;
00778 
00779     /* increment the used by the shift amount then copy upwards */
00780     a->used += b;
00781 
00782     /* top */
00783     top = a->dp + a->used - 1;
00784 
00785     /* base */
00786     bottom = a->dp + a->used - 1 - b;
00787 
00788     /* much like mp_rshd this is implemented using a sliding window
00789      * except the window goes the other way around.  Copying from
00790      * the bottom to the top.  see bn_mp_rshd.c for more info.
00791      */
00792     for (x = a->used - 1; x >= b; x--) {
00793       *top-- = *bottom--;
00794     }
00795 
00796     /* zero the lower digits */
00797     top = a->dp;
00798     for (x = 0; x < b; x++) {
00799       *top++ = 0;
00800     }
00801   }
00802   return MP_OKAY;
00803 }
00804 
00805 
00806 /* this is a shell function that calls either the normal or Montgomery
00807  * exptmod functions.  Originally the call to the montgomery code was
00808  * embedded in the normal function but that wasted a lot of stack space
00809  * for nothing (since 99% of the time the Montgomery code would be called)
00810  */
00811 #if defined(FREESCALE_LTC_TFM)
00812 int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00813 #else
00814 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00815 #endif
00816 {
00817   int dr;
00818 
00819   /* modulus P must be positive */
00820   if (P->sign == MP_NEG) {
00821      return MP_VAL;
00822   }
00823 
00824   /* if exponent X is negative we have to recurse */
00825   if (X->sign == MP_NEG) {
00826 #ifdef BN_MP_INVMOD_C
00827      mp_int tmpG, tmpX;
00828      int err;
00829 
00830      /* first compute 1/G mod P */
00831      if ((err = mp_init(&tmpG)) != MP_OKAY) {
00832         return err;
00833      }
00834      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
00835         mp_clear(&tmpG);
00836         return err;
00837      }
00838 
00839      /* now get |X| */
00840      if ((err = mp_init(&tmpX)) != MP_OKAY) {
00841         mp_clear(&tmpG);
00842         return err;
00843      }
00844      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
00845         mp_clear(&tmpG);
00846         mp_clear(&tmpX);
00847         return err;
00848      }
00849 
00850      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
00851      err = mp_exptmod(&tmpG, &tmpX, P, Y);
00852      mp_clear(&tmpG);
00853      mp_clear(&tmpX);
00854      return err;
00855 #else
00856      /* no invmod */
00857      return MP_VAL;
00858 #endif
00859   }
00860 
00861 /* modified diminished radix reduction */
00862 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
00863   defined(BN_S_MP_EXPTMOD_C)
00864   if (mp_reduce_is_2k_l(P) == MP_YES) {
00865      return s_mp_exptmod(G, X, P, Y, 1);
00866   }
00867 #endif
00868 
00869 #ifdef BN_MP_DR_IS_MODULUS_C
00870   /* is it a DR modulus? */
00871   dr = mp_dr_is_modulus(P);
00872 #else
00873   /* default to no */
00874   dr = 0;
00875 #endif
00876 
00877 #ifdef BN_MP_REDUCE_IS_2K_C
00878   /* if not, is it a unrestricted DR modulus? */
00879   if (dr == 0) {
00880      dr = mp_reduce_is_2k(P) << 1;
00881   }
00882 #endif
00883 
00884   /* if the modulus is odd or dr != 0 use the montgomery method */
00885 #ifdef BN_MP_EXPTMOD_FAST_C
00886   if (mp_isodd (P) == MP_YES || dr !=  0) {
00887     return mp_exptmod_fast (G, X, P, Y, dr);
00888   } else {
00889 #endif
00890 #ifdef BN_S_MP_EXPTMOD_C
00891     /* otherwise use the generic Barrett reduction technique */
00892     return s_mp_exptmod (G, X, P, Y, 0);
00893 #else
00894     /* no exptmod for evens */
00895     return MP_VAL;
00896 #endif
00897 #ifdef BN_MP_EXPTMOD_FAST_C
00898   }
00899 #endif
00900 }
00901 
00902 
00903 /* b = |a|
00904  *
00905  * Simple function copies the input and fixes the sign to positive
00906  */
00907 int mp_abs (mp_int * a, mp_int * b)
00908 {
00909   int     res;
00910 
00911   /* copy a to b */
00912   if (a != b) {
00913      if ((res = mp_copy (a, b)) != MP_OKAY) {
00914        return res;
00915      }
00916   }
00917 
00918   /* force the sign of b to positive */
00919   b->sign = MP_ZPOS;
00920 
00921   return MP_OKAY;
00922 }
00923 
00924 
00925 /* hac 14.61, pp608 */
00926 #if defined(FREESCALE_LTC_TFM)
00927 int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
00928 #else
00929 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00930 #endif
00931 {
00932   /* b cannot be negative */
00933   if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
00934     return MP_VAL;
00935   }
00936 
00937 #ifdef BN_FAST_MP_INVMOD_C
00938   /* if the modulus is odd we can use a faster routine instead */
00939   if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
00940     return fast_mp_invmod (a, b, c);
00941   }
00942 #endif
00943 
00944 #ifdef BN_MP_INVMOD_SLOW_C
00945   return mp_invmod_slow(a, b, c);
00946 #else
00947   return MP_VAL;
00948 #endif
00949 }
00950 
00951 
00952 /* computes the modular inverse via binary extended euclidean algorithm,
00953  * that is c = 1/a mod b
00954  *
00955  * Based on slow invmod except this is optimized for the case where b is
00956  * odd as per HAC Note 14.64 on pp. 610
00957  */
00958 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00959 {
00960   mp_int  x, y, u, v, B, D;
00961   int     res, neg, loop_check = 0;
00962 
00963   /* 2. [modified] b must be odd   */
00964   if (mp_iseven (b) == MP_YES) {
00965     return MP_VAL;
00966   }
00967 
00968   /* init all our temps */
00969   if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
00970      return res;
00971   }
00972 
00973   /* x == modulus, y == value to invert */
00974   if ((res = mp_copy (b, &x)) != MP_OKAY) {
00975     goto LBL_ERR;
00976   }
00977 
00978   /* we need y = |a| */
00979   if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
00980     goto LBL_ERR;
00981   }
00982 
00983   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00984   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00985     goto LBL_ERR;
00986   }
00987   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00988     goto LBL_ERR;
00989   }
00990   if ((res = mp_set (&D, 1)) != MP_OKAY) {
00991     goto LBL_ERR;
00992   }
00993 
00994 top:
00995   /* 4.  while u is even do */
00996   while (mp_iseven (&u) == MP_YES) {
00997     /* 4.1 u = u/2 */
00998     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00999       goto LBL_ERR;
01000     }
01001     /* 4.2 if B is odd then */
01002     if (mp_isodd (&B) == MP_YES) {
01003       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
01004         goto LBL_ERR;
01005       }
01006     }
01007     /* B = B/2 */
01008     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01009       goto LBL_ERR;
01010     }
01011   }
01012 
01013   /* 5.  while v is even do */
01014   while (mp_iseven (&v) == MP_YES) {
01015     /* 5.1 v = v/2 */
01016     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01017       goto LBL_ERR;
01018     }
01019     /* 5.2 if D is odd then */
01020     if (mp_isodd (&D) == MP_YES) {
01021       /* D = (D-x)/2 */
01022       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01023         goto LBL_ERR;
01024       }
01025     }
01026     /* D = D/2 */
01027     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01028       goto LBL_ERR;
01029     }
01030   }
01031 
01032   /* 6.  if u >= v then */
01033   if (mp_cmp (&u, &v) != MP_LT) {
01034     /* u = u - v, B = B - D */
01035     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01036       goto LBL_ERR;
01037     }
01038 
01039     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01040       goto LBL_ERR;
01041     }
01042   } else {
01043     /* v - v - u, D = D - B */
01044     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01045       goto LBL_ERR;
01046     }
01047 
01048     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01049       goto LBL_ERR;
01050     }
01051   }
01052 
01053   /* if not zero goto step 4 */
01054   if (mp_iszero (&u) == MP_NO) {
01055     if (++loop_check > MAX_INVMOD_SZ) {
01056         res = MP_VAL;
01057         goto LBL_ERR;
01058     }
01059     goto top;
01060   }
01061 
01062   /* now a = C, b = D, gcd == g*v */
01063 
01064   /* if v != 1 then there is no inverse */
01065   if (mp_cmp_d (&v, 1) != MP_EQ) {
01066     res = MP_VAL;
01067     goto LBL_ERR;
01068   }
01069 
01070   /* b is now the inverse */
01071   neg = a->sign;
01072   while (D.sign == MP_NEG) {
01073     if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
01074       goto LBL_ERR;
01075     }
01076   }
01077   /* too big */
01078   while (mp_cmp_mag(&D, b) != MP_LT) {
01079       if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
01080          goto LBL_ERR;
01081       }
01082   }
01083   mp_exch (&D, c);
01084   c->sign = neg;
01085   res = MP_OKAY;
01086 
01087 LBL_ERR:mp_clear(&x);
01088         mp_clear(&y);
01089         mp_clear(&u);
01090         mp_clear(&v);
01091         mp_clear(&B);
01092         mp_clear(&D);
01093   return res;
01094 }
01095 
01096 
01097 /* hac 14.61, pp608 */
01098 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
01099 {
01100   mp_int  x, y, u, v, A, B, C, D;
01101   int     res;
01102 
01103   /* b cannot be negative */
01104   if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
01105     return MP_VAL;
01106   }
01107 
01108   /* init temps */
01109   if ((res = mp_init_multi(&x, &y, &u, &v,
01110                            &A, &B)) != MP_OKAY) {
01111      return res;
01112   }
01113 
01114   /* init rest of tmps temps */
01115   if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
01116      mp_clear(&x);
01117      mp_clear(&y);
01118      mp_clear(&u);
01119      mp_clear(&v);
01120      mp_clear(&A);
01121      mp_clear(&B);
01122      return res;
01123   }
01124 
01125   /* x = a, y = b */
01126   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
01127       goto LBL_ERR;
01128   }
01129   if ((res = mp_copy (b, &y)) != MP_OKAY) {
01130     goto LBL_ERR;
01131   }
01132 
01133   /* 2. [modified] if x,y are both even then return an error! */
01134   if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
01135     res = MP_VAL;
01136     goto LBL_ERR;
01137   }
01138 
01139   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
01140   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
01141     goto LBL_ERR;
01142   }
01143   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
01144     goto LBL_ERR;
01145   }
01146   if ((res = mp_set (&A, 1)) != MP_OKAY) {
01147     goto LBL_ERR;
01148   }
01149   if ((res = mp_set (&D, 1)) != MP_OKAY) {
01150     goto LBL_ERR;
01151   }
01152 
01153 top:
01154   /* 4.  while u is even do */
01155   while (mp_iseven (&u) == MP_YES) {
01156     /* 4.1 u = u/2 */
01157     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
01158       goto LBL_ERR;
01159     }
01160     /* 4.2 if A or B is odd then */
01161     if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) {
01162       /* A = (A+y)/2, B = (B-x)/2 */
01163       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
01164          goto LBL_ERR;
01165       }
01166       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
01167          goto LBL_ERR;
01168       }
01169     }
01170     /* A = A/2, B = B/2 */
01171     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
01172       goto LBL_ERR;
01173     }
01174     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
01175       goto LBL_ERR;
01176     }
01177   }
01178 
01179   /* 5.  while v is even do */
01180   while (mp_iseven (&v) == MP_YES) {
01181     /* 5.1 v = v/2 */
01182     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
01183       goto LBL_ERR;
01184     }
01185     /* 5.2 if C or D is odd then */
01186     if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) {
01187       /* C = (C+y)/2, D = (D-x)/2 */
01188       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
01189          goto LBL_ERR;
01190       }
01191       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
01192          goto LBL_ERR;
01193       }
01194     }
01195     /* C = C/2, D = D/2 */
01196     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
01197       goto LBL_ERR;
01198     }
01199     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
01200       goto LBL_ERR;
01201     }
01202   }
01203 
01204   /* 6.  if u >= v then */
01205   if (mp_cmp (&u, &v) != MP_LT) {
01206     /* u = u - v, A = A - C, B = B - D */
01207     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
01208       goto LBL_ERR;
01209     }
01210 
01211     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
01212       goto LBL_ERR;
01213     }
01214 
01215     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
01216       goto LBL_ERR;
01217     }
01218   } else {
01219     /* v - v - u, C = C - A, D = D - B */
01220     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
01221       goto LBL_ERR;
01222     }
01223 
01224     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
01225       goto LBL_ERR;
01226     }
01227 
01228     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
01229       goto LBL_ERR;
01230     }
01231   }
01232 
01233   /* if not zero goto step 4 */
01234   if (mp_iszero (&u) == MP_NO)
01235     goto top;
01236 
01237   /* now a = C, b = D, gcd == g*v */
01238 
01239   /* if v != 1 then there is no inverse */
01240   if (mp_cmp_d (&v, 1) != MP_EQ) {
01241     res = MP_VAL;
01242     goto LBL_ERR;
01243   }
01244 
01245   /* if its too low */
01246   while (mp_cmp_d(&C, 0) == MP_LT) {
01247       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
01248          goto LBL_ERR;
01249       }
01250   }
01251 
01252   /* too big */
01253   while (mp_cmp_mag(&C, b) != MP_LT) {
01254       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
01255          goto LBL_ERR;
01256       }
01257   }
01258 
01259   /* C is now the inverse */
01260   mp_exch (&C, c);
01261   res = MP_OKAY;
01262 LBL_ERR:mp_clear(&x);
01263         mp_clear(&y);
01264         mp_clear(&u);
01265         mp_clear(&v);
01266         mp_clear(&A);
01267         mp_clear(&B);
01268         mp_clear(&C);
01269         mp_clear(&D);
01270   return res;
01271 }
01272 
01273 
01274 /* compare magnitude of two ints (unsigned) */
01275 int mp_cmp_mag (mp_int * a, mp_int * b)
01276 {
01277   int     n;
01278   mp_digit *tmpa, *tmpb;
01279 
01280   /* compare based on # of non-zero digits */
01281   if (a->used > b->used) {
01282     return MP_GT;
01283   }
01284 
01285   if (a->used < b->used) {
01286     return MP_LT;
01287   }
01288 
01289   /* alias for a */
01290   tmpa = a->dp + (a->used - 1);
01291 
01292   /* alias for b */
01293   tmpb = b->dp + (a->used - 1);
01294 
01295   /* compare based on digits  */
01296   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
01297     if (*tmpa > *tmpb) {
01298       return MP_GT;
01299     }
01300 
01301     if (*tmpa < *tmpb) {
01302       return MP_LT;
01303     }
01304   }
01305   return MP_EQ;
01306 }
01307 
01308 
01309 /* compare two ints (signed)*/
01310 int mp_cmp (mp_int * a, mp_int * b)
01311 {
01312   /* compare based on sign */
01313   if (a->sign != b->sign) {
01314      if (a->sign == MP_NEG) {
01315         return MP_LT;
01316      } else {
01317         return MP_GT;
01318      }
01319   }
01320 
01321   /* compare digits */
01322   if (a->sign == MP_NEG) {
01323      /* if negative compare opposite direction */
01324      return mp_cmp_mag(b, a);
01325   } else {
01326      return mp_cmp_mag(a, b);
01327   }
01328 }
01329 
01330 
01331 /* compare a digit */
01332 int mp_cmp_d(mp_int * a, mp_digit b)
01333 {
01334   /* special case for zero*/
01335   if (a->used == 0 && b == 0)
01336     return MP_EQ;
01337 
01338   /* compare based on sign */
01339   if ((b && a->used == 0) || a->sign == MP_NEG) {
01340     return MP_LT;
01341   }
01342 
01343   /* compare based on magnitude */
01344   if (a->used > 1) {
01345     return MP_GT;
01346   }
01347 
01348   /* compare the only digit of a to b */
01349   if (a->dp[0] > b) {
01350     return MP_GT;
01351   } else if (a->dp[0] < b) {
01352     return MP_LT;
01353   } else {
01354     return MP_EQ;
01355   }
01356 }
01357 
01358 
01359 /* set to a digit */
01360 int mp_set (mp_int * a, mp_digit b)
01361 {
01362   int res;
01363   mp_zero (a);
01364   res = mp_grow (a, 1);
01365   if (res == MP_OKAY) {
01366     a->dp[0] = (mp_digit)(b & MP_MASK);
01367     a->used  = (a->dp[0] != 0) ? 1 : 0;
01368   }
01369   return res;
01370 }
01371 
01372 /* chek if a bit is set */
01373 int mp_is_bit_set (mp_int *a, mp_digit b)
01374 {
01375     if ((mp_digit)a->used < b/DIGIT_BIT)
01376         return 0;
01377 
01378     return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
01379 }
01380 
01381 /* c = a mod b, 0 <= c < b */
01382 #if defined(FREESCALE_LTC_TFM)
01383 int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
01384 #else
01385 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
01386 #endif
01387 {
01388   mp_int  t;
01389   int     res;
01390 
01391   if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
01392     return res;
01393   }
01394 
01395   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
01396     mp_clear (&t);
01397     return res;
01398   }
01399 
01400   if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {
01401     res = MP_OKAY;
01402     mp_exch (&t, c);
01403   } else {
01404     res = mp_add (b, &t, c);
01405   }
01406 
01407   mp_clear (&t);
01408   return res;
01409 }
01410 
01411 
01412 /* slower bit-bang division... also smaller */
01413 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01414 {
01415    mp_int ta, tb, tq, q;
01416    int    res, n, n2;
01417 
01418   /* is divisor zero ? */
01419   if (mp_iszero (b) == MP_YES) {
01420     return MP_VAL;
01421   }
01422 
01423   /* if a < b then q=0, r = a */
01424   if (mp_cmp_mag (a, b) == MP_LT) {
01425     if (d != NULL) {
01426       res = mp_copy (a, d);
01427     } else {
01428       res = MP_OKAY;
01429     }
01430     if (c != NULL) {
01431       mp_zero (c);
01432     }
01433     return res;
01434   }
01435 
01436   /* init our temps */
01437   if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
01438      return res;
01439   }
01440 
01441   if ((res = mp_set(&tq, 1)) != MP_OKAY) {
01442      return res;
01443   }
01444   n = mp_count_bits(a) - mp_count_bits(b);
01445   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
01446       ((res = mp_abs(b, &tb)) != MP_OKAY) ||
01447       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
01448       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
01449       goto LBL_ERR;
01450   }
01451 
01452   while (n-- >= 0) {
01453      if (mp_cmp(&tb, &ta) != MP_GT) {
01454         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
01455             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
01456            goto LBL_ERR;
01457         }
01458      }
01459      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
01460          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
01461            goto LBL_ERR;
01462      }
01463   }
01464 
01465   /* now q == quotient and ta == remainder */
01466   n  = a->sign;
01467   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
01468   if (c != NULL) {
01469      mp_exch(c, &q);
01470      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
01471   }
01472   if (d != NULL) {
01473      mp_exch(d, &ta);
01474      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
01475   }
01476 LBL_ERR:
01477    mp_clear(&ta);
01478    mp_clear(&tb);
01479    mp_clear(&tq);
01480    mp_clear(&q);
01481    return res;
01482 }
01483 
01484 
01485 /* b = a/2 */
01486 int mp_div_2(mp_int * a, mp_int * b)
01487 {
01488   int     x, res, oldused;
01489 
01490   /* copy */
01491   if (b->alloc < a->used) {
01492     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01493       return res;
01494     }
01495   }
01496 
01497   oldused = b->used;
01498   b->used = a->used;
01499   {
01500     mp_digit r, rr, *tmpa, *tmpb;
01501 
01502     /* source alias */
01503     tmpa = a->dp + b->used - 1;
01504 
01505     /* dest alias */
01506     tmpb = b->dp + b->used - 1;
01507 
01508     /* carry */
01509     r = 0;
01510     for (x = b->used - 1; x >= 0; x--) {
01511       /* get the carry for the next iteration */
01512       rr = *tmpa & 1;
01513 
01514       /* shift the current digit, add in carry and store */
01515       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
01516 
01517       /* forward carry to next iteration */
01518       r = rr;
01519     }
01520 
01521     /* zero excess digits */
01522     tmpb = b->dp + b->used;
01523     for (x = b->used; x < oldused; x++) {
01524       *tmpb++ = 0;
01525     }
01526   }
01527   b->sign = a->sign;
01528   mp_clamp (b);
01529   return MP_OKAY;
01530 }
01531 
01532 
01533 /* high level addition (handles signs) */
01534 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01535 {
01536   int sa, sb, res;
01537 
01538   /* get sign of both inputs */
01539   sa = a->sign;
01540   sb = b->sign;
01541 
01542   /* handle two cases, not four */
01543   if (sa == sb) {
01544     /* both positive or both negative */
01545     /* add their magnitudes, copy the sign */
01546     c->sign = sa;
01547     res = s_mp_add (a, b, c);
01548   } else {
01549     /* one positive, the other negative */
01550     /* subtract the one with the greater magnitude from */
01551     /* the one of the lesser magnitude.  The result gets */
01552     /* the sign of the one with the greater magnitude. */
01553     if (mp_cmp_mag (a, b) == MP_LT) {
01554       c->sign = sb;
01555       res = s_mp_sub (b, a, c);
01556     } else {
01557       c->sign = sa;
01558       res = s_mp_sub (a, b, c);
01559     }
01560   }
01561   return res;
01562 }
01563 
01564 
01565 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
01566 int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
01567 {
01568   mp_int *x;
01569   int     olduse, res, min_ab, max_ab;
01570 
01571   /* find sizes, we let |a| <= |b| which means we have to sort
01572    * them.  "x" will point to the input with the most digits
01573    */
01574   if (a->used > b->used) {
01575     min_ab = b->used;
01576     max_ab = a->used;
01577     x = a;
01578   } else {
01579     min_ab = a->used;
01580     max_ab = b->used;
01581     x = b;
01582   }
01583 
01584   /* init result */
01585   if (c->alloc < max_ab + 1) {
01586     if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
01587       return res;
01588     }
01589   }
01590 
01591   /* get old used digit count and set new one */
01592   olduse = c->used;
01593   c->used = max_ab + 1;
01594 
01595   {
01596     mp_digit u, *tmpa, *tmpb, *tmpc;
01597     int i;
01598 
01599     /* alias for digit pointers */
01600 
01601     /* first input */
01602     tmpa = a->dp;
01603 
01604     /* second input */
01605     tmpb = b->dp;
01606 
01607     /* destination */
01608     tmpc = c->dp;
01609 
01610     /* zero the carry */
01611     u = 0;
01612     for (i = 0; i < min_ab; i++) {
01613       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
01614       *tmpc = *tmpa++ + *tmpb++ + u;
01615 
01616       /* U = carry bit of T[i] */
01617       u = *tmpc >> ((mp_digit)DIGIT_BIT);
01618 
01619       /* take away carry bit from T[i] */
01620       *tmpc++ &= MP_MASK;
01621     }
01622 
01623     /* now copy higher words if any, that is in A+B
01624      * if A or B has more digits add those in
01625      */
01626     if (min_ab != max_ab) {
01627       for (; i < max_ab; i++) {
01628         /* T[i] = X[i] + U */
01629         *tmpc = x->dp[i] + u;
01630 
01631         /* U = carry bit of T[i] */
01632         u = *tmpc >> ((mp_digit)DIGIT_BIT);
01633 
01634         /* take away carry bit from T[i] */
01635         *tmpc++ &= MP_MASK;
01636       }
01637     }
01638 
01639     /* add carry */
01640     *tmpc++ = u;
01641 
01642     /* clear digits above olduse */
01643     for (i = c->used; i < olduse; i++) {
01644       *tmpc++ = 0;
01645     }
01646   }
01647 
01648   mp_clamp (c);
01649   return MP_OKAY;
01650 }
01651 
01652 
01653 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
01654 int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
01655 {
01656   int     olduse, res, min_b, max_a;
01657 
01658   /* find sizes */
01659   min_b = b->used;
01660   max_a = a->used;
01661 
01662   /* init result */
01663   if (c->alloc < max_a) {
01664     if ((res = mp_grow (c, max_a)) != MP_OKAY) {
01665       return res;
01666     }
01667   }
01668 
01669   /* sanity check on destination */
01670   if (c->dp == NULL)
01671      return MP_VAL;
01672 
01673   olduse = c->used;
01674   c->used = max_a;
01675 
01676   {
01677     mp_digit u, *tmpa, *tmpb, *tmpc;
01678     int i;
01679 
01680     /* alias for digit pointers */
01681     tmpa = a->dp;
01682     tmpb = b->dp;
01683     tmpc = c->dp;
01684 
01685     /* set carry to zero */
01686     u = 0;
01687     for (i = 0; i < min_b; i++) {
01688       /* T[i] = A[i] - B[i] - U */
01689       *tmpc = *tmpa++ - *tmpb++ - u;
01690 
01691       /* U = carry bit of T[i]
01692        * Note this saves performing an AND operation since
01693        * if a carry does occur it will propagate all the way to the
01694        * MSB.  As a result a single shift is enough to get the carry
01695        */
01696       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01697 
01698       /* Clear carry from T[i] */
01699       *tmpc++ &= MP_MASK;
01700     }
01701 
01702     /* now copy higher words if any, e.g. if A has more digits than B  */
01703     for (; i < max_a; i++) {
01704       /* T[i] = A[i] - U */
01705       *tmpc = *tmpa++ - u;
01706 
01707       /* U = carry bit of T[i] */
01708       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
01709 
01710       /* Clear carry from T[i] */
01711       *tmpc++ &= MP_MASK;
01712     }
01713 
01714     /* clear digits above used (since we may not have grown result above) */
01715     for (i = c->used; i < olduse; i++) {
01716       *tmpc++ = 0;
01717     }
01718   }
01719 
01720   mp_clamp (c);
01721   return MP_OKAY;
01722 }
01723 
01724 
01725 /* high level subtraction (handles signs) */
01726 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
01727 {
01728   int     sa, sb, res;
01729 
01730   sa = a->sign;
01731   sb = b->sign;
01732 
01733   if (sa != sb) {
01734     /* subtract a negative from a positive, OR */
01735     /* subtract a positive from a negative. */
01736     /* In either case, ADD their magnitudes, */
01737     /* and use the sign of the first number. */
01738     c->sign = sa;
01739     res = s_mp_add (a, b, c);
01740   } else {
01741     /* subtract a positive from a positive, OR */
01742     /* subtract a negative from a negative. */
01743     /* First, take the difference between their */
01744     /* magnitudes, then... */
01745     if (mp_cmp_mag (a, b) != MP_LT) {
01746       /* Copy the sign from the first */
01747       c->sign = sa;
01748       /* The first has a larger or equal magnitude */
01749       res = s_mp_sub (a, b, c);
01750     } else {
01751       /* The result has the *opposite* sign from */
01752       /* the first number. */
01753       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
01754       /* The second has a larger magnitude */
01755       res = s_mp_sub (b, a, c);
01756     }
01757   }
01758   return res;
01759 }
01760 
01761 
01762 /* determines if reduce_2k_l can be used */
01763 int mp_reduce_is_2k_l(mp_int *a)
01764 {
01765    int ix, iy;
01766 
01767    if (a->used == 0) {
01768       return MP_NO;
01769    } else if (a->used == 1) {
01770       return MP_YES;
01771    } else if (a->used > 1) {
01772       /* if more than half of the digits are -1 we're sold */
01773       for (iy = ix = 0; ix < a->used; ix++) {
01774           if (a->dp[ix] == MP_MASK) {
01775               ++iy;
01776           }
01777       }
01778       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
01779 
01780    }
01781    return MP_NO;
01782 }
01783 
01784 
01785 /* determines if mp_reduce_2k can be used */
01786 int mp_reduce_is_2k(mp_int *a)
01787 {
01788    int ix, iy, iw;
01789    mp_digit iz;
01790 
01791    if (a->used == 0) {
01792       return MP_NO;
01793    } else if (a->used == 1) {
01794       return MP_YES;
01795    } else if (a->used > 1) {
01796       iy = mp_count_bits(a);
01797       iz = 1;
01798       iw = 1;
01799 
01800       /* Test every bit from the second digit up, must be 1 */
01801       for (ix = DIGIT_BIT; ix < iy; ix++) {
01802           if ((a->dp[iw] & iz) == 0) {
01803              return MP_NO;
01804           }
01805           iz <<= 1;
01806           if (iz > (mp_digit)MP_MASK) {
01807              ++iw;
01808              iz = 1;
01809           }
01810       }
01811    }
01812    return MP_YES;
01813 }
01814 
01815 
01816 /* determines if a number is a valid DR modulus */
01817 int mp_dr_is_modulus(mp_int *a)
01818 {
01819    int ix;
01820 
01821    /* must be at least two digits */
01822    if (a->used < 2) {
01823       return 0;
01824    }
01825 
01826    /* must be of the form b**k - a [a <= b] so all
01827     * but the first digit must be equal to -1 (mod b).
01828     */
01829    for (ix = 1; ix < a->used; ix++) {
01830        if (a->dp[ix] != MP_MASK) {
01831           return 0;
01832        }
01833    }
01834    return 1;
01835 }
01836 
01837 
01838 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
01839  *
01840  * Uses a left-to-right k-ary sliding window to compute the modular
01841  * exponentiation.
01842  * The value of k changes based on the size of the exponent.
01843  *
01844  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
01845  */
01846 
01847 #ifdef MP_LOW_MEM
01848    #define TAB_SIZE 32
01849 #else
01850    #define TAB_SIZE 256
01851 #endif
01852 
01853 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
01854                      int redmode)
01855 {
01856   mp_int res;
01857   mp_digit buf, mp;
01858   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01859 #ifdef WOLFSSL_SMALL_STACK
01860   mp_int* M = NULL;
01861 #else
01862   mp_int M[TAB_SIZE];
01863 #endif
01864   /* use a pointer to the reduction algorithm.  This allows us to use
01865    * one of many reduction algorithms without modding the guts of
01866    * the code with if statements everywhere.
01867    */
01868   int     (*redux)(mp_int*,mp_int*,mp_digit);
01869 
01870 #ifdef WOLFSSL_SMALL_STACK
01871   M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
01872                                                        DYNAMIC_TYPE_TMP_BUFFER);
01873   if (M == NULL)
01874     return MP_MEM;
01875 #endif
01876 
01877   /* find window size */
01878   x = mp_count_bits (X);
01879   if (x <= 7) {
01880     winsize = 2;
01881   } else if (x <= 36) {
01882     winsize = 3;
01883   } else if (x <= 140) {
01884     winsize = 4;
01885   } else if (x <= 450) {
01886     winsize = 5;
01887   } else if (x <= 1303) {
01888     winsize = 6;
01889   } else if (x <= 3529) {
01890     winsize = 7;
01891   } else {
01892     winsize = 8;
01893   }
01894 
01895 #ifdef MP_LOW_MEM
01896   if (winsize > 5) {
01897      winsize = 5;
01898   }
01899 #endif
01900 
01901   /* init M array */
01902   /* init first cell */
01903   if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
01904 #ifdef WOLFSSL_SMALL_STACK
01905      XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01906 #endif
01907 
01908      return err;
01909   }
01910 
01911   /* now init the second half of the array */
01912   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01913     if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
01914       for (y = 1<<(winsize-1); y < x; y++) {
01915         mp_clear (&M[y]);
01916       }
01917       mp_clear(&M[1]);
01918 
01919 #ifdef WOLFSSL_SMALL_STACK
01920       XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01921 #endif
01922 
01923       return err;
01924     }
01925   }
01926 
01927   /* determine and setup reduction code */
01928   if (redmode == 0) {
01929 #ifdef BN_MP_MONTGOMERY_SETUP_C
01930      /* now setup montgomery  */
01931      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
01932         goto LBL_M;
01933      }
01934 #else
01935      err = MP_VAL;
01936      goto LBL_M;
01937 #endif
01938 
01939      /* automatically pick the comba one if available (saves quite a few
01940         calls/ifs) */
01941 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
01942      if (((P->used * 2 + 1) < MP_WARRAY) &&
01943           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
01944         redux = fast_mp_montgomery_reduce;
01945      } else
01946 #endif
01947      {
01948 #ifdef BN_MP_MONTGOMERY_REDUCE_C
01949         /* use slower baseline Montgomery method */
01950         redux = mp_montgomery_reduce;
01951 #else
01952         err = MP_VAL;
01953         goto LBL_M;
01954 #endif
01955      }
01956   } else if (redmode == 1) {
01957 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
01958      /* setup DR reduction for moduli of the form B**k - b */
01959      mp_dr_setup(P, &mp);
01960      redux = mp_dr_reduce;
01961 #else
01962      err = MP_VAL;
01963      goto LBL_M;
01964 #endif
01965   } else {
01966 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
01967      /* setup DR reduction for moduli of the form 2**k - b */
01968      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
01969         goto LBL_M;
01970      }
01971      redux = mp_reduce_2k;
01972 #else
01973      err = MP_VAL;
01974      goto LBL_M;
01975 #endif
01976   }
01977 
01978   /* setup result */
01979   if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
01980     goto LBL_M;
01981   }
01982 
01983   /* create M table
01984    *
01985 
01986    *
01987    * The first half of the table is not computed though accept for M[0] and M[1]
01988    */
01989 
01990   if (redmode == 0) {
01991 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
01992      /* now we need R mod m */
01993      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
01994        goto LBL_RES;
01995      }
01996 
01997      /* now set M[1] to G * R mod m */
01998      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
01999        goto LBL_RES;
02000      }
02001 #else
02002      err = MP_VAL;
02003      goto LBL_RES;
02004 #endif
02005   } else {
02006      if ((err = mp_set(&res, 1)) != MP_OKAY) {
02007         goto LBL_RES;
02008      }
02009      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
02010         goto LBL_RES;
02011      }
02012   }
02013 
02014   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
02015   if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
02016     goto LBL_RES;
02017   }
02018 
02019   for (x = 0; x < (winsize - 1); x++) {
02020     if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
02021                        &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
02022       goto LBL_RES;
02023     }
02024     if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
02025       goto LBL_RES;
02026     }
02027   }
02028 
02029   /* create upper table */
02030   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
02031     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
02032       goto LBL_RES;
02033     }
02034     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
02035       goto LBL_RES;
02036     }
02037   }
02038 
02039   /* set initial mode and bit cnt */
02040   mode   = 0;
02041   bitcnt = 1;
02042   buf    = 0;
02043   digidx = X->used - 1;
02044   bitcpy = 0;
02045   bitbuf = 0;
02046 
02047   for (;;) {
02048     /* grab next digit as required */
02049     if (--bitcnt == 0) {
02050       /* if digidx == -1 we are out of digits so break */
02051       if (digidx == -1) {
02052         break;
02053       }
02054       /* read next digit and reset bitcnt */
02055       buf    = X->dp[digidx--];
02056       bitcnt = (int)DIGIT_BIT;
02057     }
02058 
02059     /* grab the next msb from the exponent */
02060     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
02061     buf <<= (mp_digit)1;
02062 
02063     /* if the bit is zero and mode == 0 then we ignore it
02064      * These represent the leading zero bits before the first 1 bit
02065      * in the exponent.  Technically this opt is not required but it
02066      * does lower the # of trivial squaring/reductions used
02067      */
02068     if (mode == 0 && y == 0) {
02069       continue;
02070     }
02071 
02072     /* if the bit is zero and mode == 1 then we square */
02073     if (mode == 1 && y == 0) {
02074       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02075         goto LBL_RES;
02076       }
02077       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02078         goto LBL_RES;
02079       }
02080       continue;
02081     }
02082 
02083     /* else we add it to the window */
02084     bitbuf |= (y << (winsize - ++bitcpy));
02085     mode    = 2;
02086 
02087     if (bitcpy == winsize) {
02088       /* ok window is filled so square as required and multiply  */
02089       /* square first */
02090       for (x = 0; x < winsize; x++) {
02091         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02092           goto LBL_RES;
02093         }
02094         if ((err = redux (&res, P, mp)) != MP_OKAY) {
02095           goto LBL_RES;
02096         }
02097       }
02098 
02099       /* then multiply */
02100       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
02101         goto LBL_RES;
02102       }
02103       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02104         goto LBL_RES;
02105       }
02106 
02107       /* empty window and reset */
02108       bitcpy = 0;
02109       bitbuf = 0;
02110       mode   = 1;
02111     }
02112   }
02113 
02114   /* if bits remain then square/multiply */
02115   if (mode == 2 && bitcpy > 0) {
02116     /* square then multiply if the bit is set */
02117     for (x = 0; x < bitcpy; x++) {
02118       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02119         goto LBL_RES;
02120       }
02121       if ((err = redux (&res, P, mp)) != MP_OKAY) {
02122         goto LBL_RES;
02123       }
02124 
02125       /* get next bit of the window */
02126       bitbuf <<= 1;
02127       if ((bitbuf & (1 << winsize)) != 0) {
02128         /* then multiply */
02129         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
02130           goto LBL_RES;
02131         }
02132         if ((err = redux (&res, P, mp)) != MP_OKAY) {
02133           goto LBL_RES;
02134         }
02135       }
02136     }
02137   }
02138 
02139   if (redmode == 0) {
02140      /* fixup result if Montgomery reduction is used
02141       * recall that any value in a Montgomery system is
02142       * actually multiplied by R mod n.  So we have
02143       * to reduce one more time to cancel out the factor
02144       * of R.
02145       */
02146      if ((err = redux(&res, P, mp)) != MP_OKAY) {
02147        goto LBL_RES;
02148      }
02149   }
02150 
02151   /* swap res with Y */
02152   mp_exch (&res, Y);
02153   err = MP_OKAY;
02154 LBL_RES:mp_clear (&res);
02155 LBL_M:
02156   mp_clear(&M[1]);
02157   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
02158     mp_clear (&M[x]);
02159   }
02160 
02161 #ifdef WOLFSSL_SMALL_STACK
02162   XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02163 #endif
02164 
02165   return err;
02166 }
02167 
02168 
02169 /* setups the montgomery reduction stuff */
02170 int mp_montgomery_setup (mp_int * n, mp_digit * rho)
02171 {
02172   mp_digit x, b;
02173 
02174 /* fast inversion mod 2**k
02175  *
02176  * Based on the fact that
02177  *
02178  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
02179  *                    =>  2*X*A - X*X*A*A = 1
02180  *                    =>  2*(1) - (1)     = 1
02181  */
02182   b = n->dp[0];
02183 
02184   if ((b & 1) == 0) {
02185     return MP_VAL;
02186   }
02187 
02188   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
02189   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
02190 #if !defined(MP_8BIT)
02191   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02192 #endif
02193 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
02194   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02195 #endif
02196 #ifdef MP_64BIT
02197   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
02198 #endif
02199 
02200   /* rho = -1/m mod b */
02201   /* TAO, switched mp_word casts to mp_digit to shut up compiler */
02202   *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
02203 
02204   return MP_OKAY;
02205 }
02206 
02207 
02208 /* computes xR**-1 == x (mod N) via Montgomery Reduction
02209  *
02210  * This is an optimized implementation of montgomery_reduce
02211  * which uses the comba method to quickly calculate the columns of the
02212  * reduction.
02213  *
02214  * Based on Algorithm 14.32 on pp.601 of HAC.
02215 */
02216 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02217 {
02218   int     ix, res, olduse;
02219 #ifdef WOLFSSL_SMALL_STACK
02220   mp_word* W;    /* uses dynamic memory and slower */
02221 #else
02222   mp_word W[MP_WARRAY];
02223 #endif
02224 
02225   /* get old used count */
02226   olduse = x->used;
02227 
02228   /* grow a as required */
02229   if (x->alloc < n->used + 1) {
02230     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
02231       return res;
02232     }
02233   }
02234 
02235 #ifdef WOLFSSL_SMALL_STACK
02236   W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
02237   if (W == NULL)
02238     return MP_MEM;
02239 #endif
02240 
02241   /* first we have to get the digits of the input into
02242    * an array of double precision words W[...]
02243    */
02244   {
02245     mp_word *_W;
02246     mp_digit *tmpx;
02247 
02248     /* alias for the W[] array */
02249     _W   = W;
02250 
02251     /* alias for the digits of  x*/
02252     tmpx = x->dp;
02253 
02254     /* copy the digits of a into W[0..a->used-1] */
02255     for (ix = 0; ix < x->used; ix++) {
02256       *_W++ = *tmpx++;
02257     }
02258 
02259     /* zero the high words of W[a->used..m->used*2] */
02260     for (; ix < n->used * 2 + 1; ix++) {
02261       *_W++ = 0;
02262     }
02263   }
02264 
02265   /* now we proceed to zero successive digits
02266    * from the least significant upwards
02267    */
02268   for (ix = 0; ix < n->used; ix++) {
02269     /* mu = ai * m' mod b
02270      *
02271      * We avoid a double precision multiplication (which isn't required)
02272      * by casting the value down to a mp_digit.  Note this requires
02273      * that W[ix-1] have  the carry cleared (see after the inner loop)
02274      */
02275     mp_digit mu;
02276     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
02277 
02278     /* a = a + mu * m * b**i
02279      *
02280      * This is computed in place and on the fly.  The multiplication
02281      * by b**i is handled by offseting which columns the results
02282      * are added to.
02283      *
02284      * Note the comba method normally doesn't handle carries in the
02285      * inner loop In this case we fix the carry from the previous
02286      * column since the Montgomery reduction requires digits of the
02287      * result (so far) [see above] to work.  This is
02288      * handled by fixing up one carry after the inner loop.  The
02289      * carry fixups are done in order so after these loops the
02290      * first m->used words of W[] have the carries fixed
02291      */
02292     {
02293       int iy;
02294       mp_digit *tmpn;
02295       mp_word *_W;
02296 
02297       /* alias for the digits of the modulus */
02298       tmpn = n->dp;
02299 
02300       /* Alias for the columns set by an offset of ix */
02301       _W = W + ix;
02302 
02303       /* inner loop */
02304       for (iy = 0; iy < n->used; iy++) {
02305           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
02306       }
02307     }
02308 
02309     /* now fix carry for next digit, W[ix+1] */
02310     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
02311   }
02312 
02313   /* now we have to propagate the carries and
02314    * shift the words downward [all those least
02315    * significant digits we zeroed].
02316    */
02317   {
02318     mp_digit *tmpx;
02319     mp_word *_W, *_W1;
02320 
02321     /* nox fix rest of carries */
02322 
02323     /* alias for current word */
02324     _W1 = W + ix;
02325 
02326     /* alias for next word, where the carry goes */
02327     _W = W + ++ix;
02328 
02329     for (; ix <= n->used * 2 + 1; ix++) {
02330       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
02331     }
02332 
02333     /* copy out, A = A/b**n
02334      *
02335      * The result is A/b**n but instead of converting from an
02336      * array of mp_word to mp_digit than calling mp_rshd
02337      * we just copy them in the right order
02338      */
02339 
02340     /* alias for destination word */
02341     tmpx = x->dp;
02342 
02343     /* alias for shifted double precision result */
02344     _W = W + n->used;
02345 
02346     for (ix = 0; ix < n->used + 1; ix++) {
02347       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
02348     }
02349 
02350     /* zero olduse digits, if the input a was larger than
02351      * m->used+1 we'll have to clear the digits
02352      */
02353     for (; ix < olduse; ix++) {
02354       *tmpx++ = 0;
02355     }
02356   }
02357 
02358   /* set the max used and clamp */
02359   x->used = n->used + 1;
02360   mp_clamp (x);
02361 
02362 #ifdef WOLFSSL_SMALL_STACK
02363   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
02364 #endif
02365 
02366   /* if A >= m then A = A - m */
02367   if (mp_cmp_mag (x, n) != MP_LT) {
02368     return s_mp_sub (x, n, x);
02369   }
02370   return MP_OKAY;
02371 }
02372 
02373 
02374 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
02375 int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02376 {
02377   int     ix, res, digs;
02378   mp_digit mu;
02379 
02380   /* can the fast reduction [comba] method be used?
02381    *
02382    * Note that unlike in mul you're safely allowed *less*
02383    * than the available columns [255 per default] since carries
02384    * are fixed up in the inner loop.
02385    */
02386   digs = n->used * 2 + 1;
02387   if ((digs < MP_WARRAY) &&
02388       n->used <
02389       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02390     return fast_mp_montgomery_reduce (x, n, rho);
02391   }
02392 
02393   /* grow the input as required */
02394   if (x->alloc < digs) {
02395     if ((res = mp_grow (x, digs)) != MP_OKAY) {
02396       return res;
02397     }
02398   }
02399   x->used = digs;
02400 
02401   for (ix = 0; ix < n->used; ix++) {
02402     /* mu = ai * rho mod b
02403      *
02404      * The value of rho must be precalculated via
02405      * montgomery_setup() such that
02406      * it equals -1/n0 mod b this allows the
02407      * following inner loop to reduce the
02408      * input one digit at a time
02409      */
02410     mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
02411 
02412     /* a = a + mu * m * b**i */
02413     {
02414       int iy;
02415       mp_digit *tmpn, *tmpx, u;
02416       mp_word r;
02417 
02418       /* alias for digits of the modulus */
02419       tmpn = n->dp;
02420 
02421       /* alias for the digits of x [the input] */
02422       tmpx = x->dp + ix;
02423 
02424       /* set the carry to zero */
02425       u = 0;
02426 
02427       /* Multiply and add in place */
02428       for (iy = 0; iy < n->used; iy++) {
02429         /* compute product and sum */
02430         r       = ((mp_word)mu) * ((mp_word)*tmpn++) +
02431                   ((mp_word) u) + ((mp_word) * tmpx);
02432 
02433         /* get carry */
02434         u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02435 
02436         /* fix digit */
02437         *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
02438       }
02439       /* At this point the ix'th digit of x should be zero */
02440 
02441 
02442       /* propagate carries upwards as required*/
02443       while (u) {
02444         *tmpx   += u;
02445         u        = *tmpx >> DIGIT_BIT;
02446         *tmpx++ &= MP_MASK;
02447       }
02448     }
02449   }
02450 
02451   /* at this point the n.used'th least
02452    * significant digits of x are all zero
02453    * which means we can shift x to the
02454    * right by n.used digits and the
02455    * residue is unchanged.
02456    */
02457 
02458   /* x = x/b**n.used */
02459   mp_clamp(x);
02460   mp_rshd (x, n->used);
02461 
02462   /* if x >= n then x = x - n */
02463   if (mp_cmp_mag (x, n) != MP_LT) {
02464     return s_mp_sub (x, n, x);
02465   }
02466 
02467   return MP_OKAY;
02468 }
02469 
02470 
02471 /* determines the setup value */
02472 void mp_dr_setup(mp_int *a, mp_digit *d)
02473 {
02474    /* the casts are required if DIGIT_BIT is one less than
02475     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
02476     */
02477    *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
02478         ((mp_word)a->dp[0]));
02479 }
02480 
02481 
02482 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
02483  *
02484  * Based on algorithm from the paper
02485  *
02486  * "Generating Efficient Primes for Discrete Log Cryptosystems"
02487  *                 Chae Hoon Lim, Pil Joong Lee,
02488  *          POSTECH Information Research Laboratories
02489  *
02490  * The modulus must be of a special format [see manual]
02491  *
02492  * Has been modified to use algorithm 7.10 from the LTM book instead
02493  *
02494  * Input x must be in the range 0 <= x <= (n-1)**2
02495  */
02496 int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
02497 {
02498   int      err, i, m;
02499   mp_word  r;
02500   mp_digit mu, *tmpx1, *tmpx2;
02501 
02502   /* m = digits in modulus */
02503   m = n->used;
02504 
02505   /* ensure that "x" has at least 2m digits */
02506   if (x->alloc < m + m) {
02507     if ((err = mp_grow (x, m + m)) != MP_OKAY) {
02508       return err;
02509     }
02510   }
02511 
02512 /* top of loop, this is where the code resumes if
02513  * another reduction pass is required.
02514  */
02515 top:
02516   /* aliases for digits */
02517   /* alias for lower half of x */
02518   tmpx1 = x->dp;
02519 
02520   /* alias for upper half of x, or x/B**m */
02521   tmpx2 = x->dp + m;
02522 
02523   /* set carry to zero */
02524   mu = 0;
02525 
02526   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
02527   for (i = 0; i < m; i++) {
02528       r         = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
02529       *tmpx1++  = (mp_digit)(r & MP_MASK);
02530       mu        = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
02531   }
02532 
02533   /* set final carry */
02534   *tmpx1++ = mu;
02535 
02536   /* zero words above m */
02537   for (i = m + 1; i < x->used; i++) {
02538       *tmpx1++ = 0;
02539   }
02540 
02541   /* clamp, sub and return */
02542   mp_clamp (x);
02543 
02544   /* if x >= n then subtract and reduce again
02545    * Each successive "recursion" makes the input smaller and smaller.
02546    */
02547   if (mp_cmp_mag (x, n) != MP_LT) {
02548     if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
02549         return err;
02550     }
02551     goto top;
02552   }
02553   return MP_OKAY;
02554 }
02555 
02556 
02557 /* reduces a modulo n where n is of the form 2**p - d */
02558 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
02559 {
02560    mp_int q;
02561    int    p, res;
02562 
02563    if ((res = mp_init(&q)) != MP_OKAY) {
02564       return res;
02565    }
02566 
02567    p = mp_count_bits(n);
02568 top:
02569    /* q = a/2**p, a = a mod 2**p */
02570    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
02571       goto ERR;
02572    }
02573 
02574    if (d != 1) {
02575       /* q = q * d */
02576       if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
02577          goto ERR;
02578       }
02579    }
02580 
02581    /* a = a + q */
02582    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
02583       goto ERR;
02584    }
02585 
02586    if (mp_cmp_mag(a, n) != MP_LT) {
02587       if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
02588          goto ERR;
02589       }
02590       goto top;
02591    }
02592 
02593 ERR:
02594    mp_clear(&q);
02595    return res;
02596 }
02597 
02598 
02599 /* determines the setup value */
02600 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
02601 {
02602    int res, p;
02603    mp_int tmp;
02604 
02605    if ((res = mp_init(&tmp)) != MP_OKAY) {
02606       return res;
02607    }
02608 
02609    p = mp_count_bits(a);
02610    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
02611       mp_clear(&tmp);
02612       return res;
02613    }
02614 
02615    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
02616       mp_clear(&tmp);
02617       return res;
02618    }
02619 
02620    *d = tmp.dp[0];
02621    mp_clear(&tmp);
02622    return MP_OKAY;
02623 }
02624 
02625 
02626 /* set the b bit of a */
02627 int mp_set_bit (mp_int * a, int b)
02628 {
02629     int i = b / DIGIT_BIT, res;
02630 
02631     if (a->used < (int)(i + 1)) {
02632         /* grow a to accommodate the single bit */
02633         if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
02634             return res;
02635         }
02636 
02637         /* set the used count of where the bit will go */
02638         a->used = (int)(i + 1);
02639     }
02640 
02641     /* put the single bit in its place */
02642     a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT);
02643 
02644     return MP_OKAY;
02645 }
02646 
02647 /* computes a = 2**b
02648  *
02649  * Simple algorithm which zeros the int, set the required bit
02650  */
02651 int mp_2expt (mp_int * a, int b)
02652 {
02653     /* zero a as per default */
02654     mp_zero (a);
02655 
02656     return mp_set_bit(a, b);
02657 }
02658 
02659 /* multiply by a digit */
02660 int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02661 {
02662   mp_digit u, *tmpa, *tmpc;
02663   mp_word  r;
02664   int      ix, res, olduse;
02665 
02666   /* make sure c is big enough to hold a*b */
02667   if (c->alloc < a->used + 1) {
02668     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
02669       return res;
02670     }
02671   }
02672 
02673   /* get the original destinations used count */
02674   olduse = c->used;
02675 
02676   /* set the sign */
02677   c->sign = a->sign;
02678 
02679   /* alias for a->dp [source] */
02680   tmpa = a->dp;
02681 
02682   /* alias for c->dp [dest] */
02683   tmpc = c->dp;
02684 
02685   /* zero carry */
02686   u = 0;
02687 
02688   /* compute columns */
02689   for (ix = 0; ix < a->used; ix++) {
02690     /* compute product and carry sum for this term */
02691     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
02692 
02693     /* mask off higher bits to get a single digit */
02694     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
02695 
02696     /* send carry into next iteration */
02697     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02698   }
02699 
02700   /* store final carry [if any] and increment ix offset  */
02701   *tmpc++ = u;
02702   ++ix;
02703 
02704   /* now zero digits above the top */
02705   while (ix++ < olduse) {
02706      *tmpc++ = 0;
02707   }
02708 
02709   /* set used count */
02710   c->used = a->used + 1;
02711   mp_clamp(c);
02712 
02713   return MP_OKAY;
02714 }
02715 
02716 
02717 /* d = a * b (mod c) */
02718 #if defined(FREESCALE_LTC_TFM)
02719 int wolfcrypt_mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
02720 #else
02721 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02722 #endif
02723 {
02724   int     res;
02725   mp_int  t;
02726 
02727   if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
02728     return res;
02729   }
02730 
02731   res = mp_mul (a, b, &t);
02732   if (res == MP_OKAY) {
02733       res = mp_mod (&t, c, d);
02734   }
02735 
02736   mp_clear (&t);
02737   return res;
02738 }
02739 
02740 
02741 /* d = a - b (mod c) */
02742 int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
02743 {
02744   int     res;
02745   mp_int  t;
02746 
02747   if ((res = mp_init (&t)) != MP_OKAY) {
02748     return res;
02749   }
02750 
02751   res = mp_sub (a, b, &t);
02752   if (res == MP_OKAY) {
02753       res = mp_mod (&t, c, d);
02754   }
02755 
02756   mp_clear (&t);
02757 
02758   return res;
02759 }
02760 
02761 /* d = a + b (mod c) */
02762 int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
02763 {
02764    int     res;
02765    mp_int  t;
02766 
02767    if ((res = mp_init (&t)) != MP_OKAY) {
02768      return res;
02769    }
02770 
02771    res = mp_add (a, b, &t);
02772    if (res == MP_OKAY) {
02773        res = mp_mod (&t, c, d);
02774    }
02775 
02776    mp_clear (&t);
02777 
02778    return res;
02779 }
02780 
02781 /* computes b = a*a */
02782 int mp_sqr (mp_int * a, mp_int * b)
02783 {
02784   int     res;
02785 
02786   {
02787 #ifdef BN_FAST_S_MP_SQR_C
02788     /* can we use the fast comba multiplier? */
02789     if ((a->used * 2 + 1) < MP_WARRAY &&
02790          a->used <
02791          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
02792       res = fast_s_mp_sqr (a, b);
02793     } else
02794 #endif
02795 #ifdef BN_S_MP_SQR_C
02796       res = s_mp_sqr (a, b);
02797 #else
02798       res = MP_VAL;
02799 #endif
02800   }
02801   b->sign = MP_ZPOS;
02802   return res;
02803 }
02804 
02805 
02806 /* high level multiplication (handles sign) */
02807 #if defined(FREESCALE_LTC_TFM)
02808 int wolfcrypt_mp_mul(mp_int *a, mp_int *b, mp_int *c)
02809 #else
02810 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02811 #endif
02812 {
02813   int     res, neg;
02814   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
02815 
02816   {
02817     /* can we use the fast multiplier?
02818      *
02819      * The fast multiplier can be used if the output will
02820      * have less than MP_WARRAY digits and the number of
02821      * digits won't affect carry propagation
02822      */
02823     int     digs = a->used + b->used + 1;
02824 
02825 #ifdef BN_FAST_S_MP_MUL_DIGS_C
02826     if ((digs < MP_WARRAY) &&
02827         MIN(a->used, b->used) <=
02828         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02829       res = fast_s_mp_mul_digs (a, b, c, digs);
02830     } else
02831 #endif
02832 #ifdef BN_S_MP_MUL_DIGS_C
02833       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
02834 #else
02835       res = MP_VAL;
02836 #endif
02837 
02838   }
02839   c->sign = (c->used > 0) ? neg : MP_ZPOS;
02840   return res;
02841 }
02842 
02843 
02844 /* b = a*2 */
02845 int mp_mul_2(mp_int * a, mp_int * b)
02846 {
02847   int     x, res, oldused;
02848 
02849   /* grow to accommodate result */
02850   if (b->alloc < a->used + 1) {
02851     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
02852       return res;
02853     }
02854   }
02855 
02856   oldused = b->used;
02857   b->used = a->used;
02858 
02859   {
02860     mp_digit r, rr, *tmpa, *tmpb;
02861 
02862     /* alias for source */
02863     tmpa = a->dp;
02864 
02865     /* alias for dest */
02866     tmpb = b->dp;
02867 
02868     /* carry */
02869     r = 0;
02870     for (x = 0; x < a->used; x++) {
02871 
02872       /* get what will be the *next* carry bit from the
02873        * MSB of the current digit
02874        */
02875       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
02876 
02877       /* now shift up this digit, add in the carry [from the previous] */
02878       *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK);
02879 
02880       /* copy the carry that would be from the source
02881        * digit into the next iteration
02882        */
02883       r = rr;
02884     }
02885 
02886     /* new leading digit? */
02887     if (r != 0) {
02888       /* add a MSB which is always 1 at this point */
02889       *tmpb = 1;
02890       ++(b->used);
02891     }
02892 
02893     /* now zero any excess digits on the destination
02894      * that we didn't write to
02895      */
02896     tmpb = b->dp + b->used;
02897     for (x = b->used; x < oldused; x++) {
02898       *tmpb++ = 0;
02899     }
02900   }
02901   b->sign = a->sign;
02902   return MP_OKAY;
02903 }
02904 
02905 
02906 /* divide by three (based on routine from MPI and the GMP manual) */
02907 int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
02908 {
02909   mp_int   q;
02910   mp_word  w, t;
02911   mp_digit b;
02912   int      res, ix;
02913 
02914   /* b = 2**DIGIT_BIT / 3 */
02915   b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
02916 
02917   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
02918      return res;
02919   }
02920 
02921   q.used = a->used;
02922   q.sign = a->sign;
02923   w = 0;
02924   for (ix = a->used - 1; ix >= 0; ix--) {
02925      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
02926 
02927      if (w >= 3) {
02928         /* multiply w by [1/3] */
02929         t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
02930 
02931         /* now subtract 3 * [w/3] from w, to get the remainder */
02932         w -= t+t+t;
02933 
02934         /* fixup the remainder as required since
02935          * the optimization is not exact.
02936          */
02937         while (w >= 3) {
02938            t += 1;
02939            w -= 3;
02940         }
02941       } else {
02942         t = 0;
02943       }
02944       q.dp[ix] = (mp_digit)t;
02945   }
02946 
02947   /* [optional] store the remainder */
02948   if (d != NULL) {
02949      *d = (mp_digit)w;
02950   }
02951 
02952   /* [optional] store the quotient */
02953   if (c != NULL) {
02954      mp_clamp(&q);
02955      mp_exch(&q, c);
02956   }
02957   mp_clear(&q);
02958 
02959   return res;
02960 }
02961 
02962 
02963 /* init an mp_init for a given size */
02964 int mp_init_size (mp_int * a, int size)
02965 {
02966   int x;
02967 
02968   /* pad size so there are always extra digits */
02969   size += (MP_PREC * 2) - (size % MP_PREC);
02970 
02971   /* alloc mem */
02972   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
02973                                       DYNAMIC_TYPE_BIGINT);
02974   if (a->dp == NULL) {
02975     return MP_MEM;
02976   }
02977 
02978   /* set the members */
02979   a->used  = 0;
02980   a->alloc = size;
02981   a->sign  = MP_ZPOS;
02982 #ifdef HAVE_WOLF_BIGINT
02983   wc_bigint_init(&a->raw);
02984 #endif
02985 
02986   /* zero the digits */
02987   for (x = 0; x < size; x++) {
02988       a->dp[x] = 0;
02989   }
02990 
02991   return MP_OKAY;
02992 }
02993 
02994 
02995 /* the jist of squaring...
02996  * you do like mult except the offset of the tmpx [one that
02997  * starts closer to zero] can't equal the offset of tmpy.
02998  * So basically you set up iy like before then you min it with
02999  * (ty-tx) so that it never happens.  You double all those
03000  * you add in the inner loop
03001 
03002 After that loop you do the squares and add them in.
03003 */
03004 
03005 int fast_s_mp_sqr (mp_int * a, mp_int * b)
03006 {
03007   int       olduse, res, pa, ix, iz;
03008 #ifdef WOLFSSL_SMALL_STACK
03009   mp_digit* W;    /* uses dynamic memory and slower */
03010 #else
03011   mp_digit W[MP_WARRAY];
03012 #endif
03013   mp_digit  *tmpx;
03014   mp_word   W1;
03015 
03016   /* grow the destination as required */
03017   pa = a->used + a->used;
03018   if (b->alloc < pa) {
03019     if ((res = mp_grow (b, pa)) != MP_OKAY) {
03020       return res;
03021     }
03022   }
03023 
03024   if (pa > MP_WARRAY)
03025     return MP_RANGE;  /* TAO range check */
03026 
03027 #ifdef WOLFSSL_SMALL_STACK
03028   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
03029   if (W == NULL)
03030     return MP_MEM;
03031 #endif
03032 
03033   /* number of output digits to produce */
03034   W1 = 0;
03035   for (ix = 0; ix < pa; ix++) {
03036       int      tx, ty, iy;
03037       mp_word  _W;
03038       mp_digit *tmpy;
03039 
03040       /* clear counter */
03041       _W = 0;
03042 
03043       /* get offsets into the two bignums */
03044       ty = MIN(a->used-1, ix);
03045       tx = ix - ty;
03046 
03047       /* setup temp aliases */
03048       tmpx = a->dp + tx;
03049       tmpy = a->dp + ty;
03050 
03051       /* this is the number of times the loop will iterate, essentially
03052          while (tx++ < a->used && ty-- >= 0) { ... }
03053        */
03054       iy = MIN(a->used-tx, ty+1);
03055 
03056       /* now for squaring tx can never equal ty
03057        * we halve the distance since they approach at a rate of 2x
03058        * and we have to round because odd cases need to be executed
03059        */
03060       iy = MIN(iy, (ty-tx+1)>>1);
03061 
03062       /* execute loop */
03063       for (iz = 0; iz < iy; iz++) {
03064          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03065       }
03066 
03067       /* double the inner product and add carry */
03068       _W = _W + _W + W1;
03069 
03070       /* even columns have the square term in them */
03071       if ((ix&1) == 0) {
03072          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
03073       }
03074 
03075       /* store it */
03076       W[ix] = (mp_digit)(_W & MP_MASK);
03077 
03078       /* make next carry */
03079       W1 = _W >> ((mp_word)DIGIT_BIT);
03080   }
03081 
03082   /* setup dest */
03083   olduse  = b->used;
03084   b->used = a->used+a->used;
03085 
03086   {
03087     mp_digit *tmpb;
03088     tmpb = b->dp;
03089     for (ix = 0; ix < pa; ix++) {
03090       *tmpb++ = (mp_digit)(W[ix] & MP_MASK);
03091     }
03092 
03093     /* clear unused digits [that existed in the old copy of c] */
03094     for (; ix < olduse; ix++) {
03095       *tmpb++ = 0;
03096     }
03097   }
03098   mp_clamp (b);
03099 
03100 #ifdef WOLFSSL_SMALL_STACK
03101   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
03102 #endif
03103 
03104   return MP_OKAY;
03105 }
03106 
03107 
03108 /* Fast (comba) multiplier
03109  *
03110  * This is the fast column-array [comba] multiplier.  It is
03111  * designed to compute the columns of the product first
03112  * then handle the carries afterwards.  This has the effect
03113  * of making the nested loops that compute the columns very
03114  * simple and schedulable on super-scalar processors.
03115  *
03116  * This has been modified to produce a variable number of
03117  * digits of output so if say only a half-product is required
03118  * you don't have to compute the upper half (a feature
03119  * required for fast Barrett reduction).
03120  *
03121  * Based on Algorithm 14.12 on pp.595 of HAC.
03122  *
03123  */
03124 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03125 {
03126   int     olduse, res, pa, ix, iz;
03127 #ifdef WOLFSSL_SMALL_STACK
03128   mp_digit* W;    /* uses dynamic memory and slower */
03129 #else
03130   mp_digit W[MP_WARRAY];
03131 #endif
03132   mp_word  _W;
03133 
03134   /* grow the destination as required */
03135   if (c->alloc < digs) {
03136     if ((res = mp_grow (c, digs)) != MP_OKAY) {
03137       return res;
03138     }
03139   }
03140 
03141   /* number of output digits to produce */
03142   pa = MIN(digs, a->used + b->used);
03143   if (pa > MP_WARRAY)
03144     return MP_RANGE;  /* TAO range check */
03145 
03146 #ifdef WOLFSSL_SMALL_STACK
03147   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
03148   if (W == NULL)
03149     return MP_MEM;
03150 #endif
03151 
03152   /* clear the carry */
03153   _W = 0;
03154   for (ix = 0; ix < pa; ix++) {
03155       int      tx, ty;
03156       int      iy;
03157       mp_digit *tmpx, *tmpy;
03158 
03159       /* get offsets into the two bignums */
03160       ty = MIN(b->used-1, ix);
03161       tx = ix - ty;
03162 
03163       /* setup temp aliases */
03164       tmpx = a->dp + tx;
03165       tmpy = b->dp + ty;
03166 
03167       /* this is the number of times the loop will iterate, essentially
03168          while (tx++ < a->used && ty-- >= 0) { ... }
03169        */
03170       iy = MIN(a->used-tx, ty+1);
03171 
03172       /* execute loop */
03173       for (iz = 0; iz < iy; ++iz) {
03174          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03175 
03176       }
03177 
03178       /* store term */
03179       W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
03180 
03181       /* make next carry */
03182       _W = _W >> ((mp_word)DIGIT_BIT);
03183  }
03184 
03185   /* setup dest */
03186   olduse  = c->used;
03187   c->used = pa;
03188 
03189   {
03190     mp_digit *tmpc;
03191     tmpc = c->dp;
03192     for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
03193       /* now extract the previous digit [below the carry] */
03194       *tmpc++ = W[ix];
03195     }
03196 
03197     /* clear unused digits [that existed in the old copy of c] */
03198     for (; ix < olduse; ix++) {
03199       *tmpc++ = 0;
03200     }
03201   }
03202   mp_clamp (c);
03203 
03204 #ifdef WOLFSSL_SMALL_STACK
03205   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
03206 #endif
03207 
03208   return MP_OKAY;
03209 }
03210 
03211 
03212 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
03213 int s_mp_sqr (mp_int * a, mp_int * b)
03214 {
03215   mp_int  t;
03216   int     res, ix, iy, pa;
03217   mp_word r;
03218   mp_digit u, tmpx, *tmpt;
03219 
03220   pa = a->used;
03221   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
03222     return res;
03223   }
03224 
03225   /* default used is maximum possible size */
03226   t.used = 2*pa + 1;
03227 
03228   for (ix = 0; ix < pa; ix++) {
03229     /* first calculate the digit at 2*ix */
03230     /* calculate double precision result */
03231     r = ((mp_word) t.dp[2*ix]) +
03232         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
03233 
03234     /* store lower part in result */
03235     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
03236 
03237     /* get the carry */
03238     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03239 
03240     /* left hand side of A[ix] * A[iy] */
03241     tmpx        = a->dp[ix];
03242 
03243     /* alias for where to store the results */
03244     tmpt        = t.dp + (2*ix + 1);
03245 
03246     for (iy = ix + 1; iy < pa; iy++) {
03247       /* first calculate the product */
03248       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
03249 
03250       /* now calculate the double precision result, note we use
03251        * addition instead of *2 since it's easier to optimize
03252        */
03253       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
03254 
03255       /* store lower part */
03256       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03257 
03258       /* get carry */
03259       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03260     }
03261     /* propagate upwards */
03262     while (u != ((mp_digit) 0)) {
03263       r       = ((mp_word) *tmpt) + ((mp_word) u);
03264       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03265       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
03266     }
03267   }
03268 
03269   mp_clamp (&t);
03270   mp_exch (&t, b);
03271   mp_clear (&t);
03272   return MP_OKAY;
03273 }
03274 
03275 
03276 /* multiplies |a| * |b| and only computes up to digs digits of result
03277  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
03278  * many digits of output are created.
03279  */
03280 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03281 {
03282   mp_int  t;
03283   int     res, pa, pb, ix, iy;
03284   mp_digit u;
03285   mp_word r;
03286   mp_digit tmpx, *tmpt, *tmpy;
03287 
03288   /* can we use the fast multiplier? */
03289   if (((digs) < MP_WARRAY) &&
03290       MIN (a->used, b->used) <
03291           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03292     return fast_s_mp_mul_digs (a, b, c, digs);
03293   }
03294 
03295   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
03296     return res;
03297   }
03298   t.used = digs;
03299 
03300   /* compute the digits of the product directly */
03301   pa = a->used;
03302   for (ix = 0; ix < pa; ix++) {
03303     /* set the carry to zero */
03304     u = 0;
03305 
03306     /* limit ourselves to making digs digits of output */
03307     pb = MIN (b->used, digs - ix);
03308 
03309     /* setup some aliases */
03310     /* copy of the digit from a used within the nested loop */
03311     tmpx = a->dp[ix];
03312 
03313     /* an alias for the destination shifted ix places */
03314     tmpt = t.dp + ix;
03315 
03316     /* an alias for the digits of b */
03317     tmpy = b->dp;
03318 
03319     /* compute the columns of the output and propagate the carry */
03320     for (iy = 0; iy < pb; iy++) {
03321       /* compute the column as a mp_word */
03322       r       = ((mp_word)*tmpt) +
03323                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03324                 ((mp_word) u);
03325 
03326       /* the new column is the lower part of the result */
03327       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03328 
03329       /* get the carry word from the result */
03330       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03331     }
03332     /* set carry if it is placed below digs */
03333     if (ix + iy < digs) {
03334       *tmpt = u;
03335     }
03336   }
03337 
03338   mp_clamp (&t);
03339   mp_exch (&t, c);
03340 
03341   mp_clear (&t);
03342   return MP_OKAY;
03343 }
03344 
03345 
03346 /*
03347  * shifts with subtractions when the result is greater than b.
03348  *
03349  * The method is slightly modified to shift B unconditionally up to just under
03350  * the leading bit of b.  This saves a lot of multiple precision shifting.
03351  */
03352 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
03353 {
03354   int     x, bits, res;
03355 
03356   /* how many bits of last digit does b use */
03357   bits = mp_count_bits (b) % DIGIT_BIT;
03358 
03359   if (b->used > 1) {
03360      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
03361          != MP_OKAY) {
03362         return res;
03363      }
03364   } else {
03365      if ((res = mp_set(a, 1)) != MP_OKAY) {
03366         return res;
03367      }
03368      bits = 1;
03369   }
03370 
03371   /* now compute C = A * B mod b */
03372   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
03373     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
03374       return res;
03375     }
03376     if (mp_cmp_mag (a, b) != MP_LT) {
03377       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
03378         return res;
03379       }
03380     }
03381   }
03382 
03383   return MP_OKAY;
03384 }
03385 
03386 
03387 #ifdef MP_LOW_MEM
03388    #define TAB_SIZE 32
03389 #else
03390    #define TAB_SIZE 256
03391 #endif
03392 
03393 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
03394 {
03395   mp_int  M[TAB_SIZE], res, mu;
03396   mp_digit buf;
03397   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
03398   int (*redux)(mp_int*,mp_int*,mp_int*);
03399 
03400   /* find window size */
03401   x = mp_count_bits (X);
03402   if (x <= 7) {
03403     winsize = 2;
03404   } else if (x <= 36) {
03405     winsize = 3;
03406   } else if (x <= 140) {
03407     winsize = 4;
03408   } else if (x <= 450) {
03409     winsize = 5;
03410   } else if (x <= 1303) {
03411     winsize = 6;
03412   } else if (x <= 3529) {
03413     winsize = 7;
03414   } else {
03415     winsize = 8;
03416   }
03417 
03418 #ifdef MP_LOW_MEM
03419     if (winsize > 5) {
03420        winsize = 5;
03421     }
03422 #endif
03423 
03424   /* init M array */
03425   /* init first cell */
03426   if ((err = mp_init(&M[1])) != MP_OKAY) {
03427      return err;
03428   }
03429 
03430   /* now init the second half of the array */
03431   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03432     if ((err = mp_init(&M[x])) != MP_OKAY) {
03433       for (y = 1<<(winsize-1); y < x; y++) {
03434         mp_clear (&M[y]);
03435       }
03436       mp_clear(&M[1]);
03437       return err;
03438     }
03439   }
03440 
03441   /* create mu, used for Barrett reduction */
03442   if ((err = mp_init (&mu)) != MP_OKAY) {
03443     goto LBL_M;
03444   }
03445 
03446   if (redmode == 0) {
03447      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
03448         goto LBL_MU;
03449      }
03450      redux = mp_reduce;
03451   } else {
03452      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
03453         goto LBL_MU;
03454      }
03455      redux = mp_reduce_2k_l;
03456   }
03457 
03458   /* create M table
03459    *
03460    * The M table contains powers of the base,
03461    * e.g. M[x] = G**x mod P
03462    *
03463    * The first half of the table is not
03464    * computed though accept for M[0] and M[1]
03465    */
03466   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
03467     goto LBL_MU;
03468   }
03469 
03470   /* compute the value at M[1<<(winsize-1)] by squaring
03471    * M[1] (winsize-1) times
03472    */
03473   if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
03474     goto LBL_MU;
03475   }
03476 
03477   for (x = 0; x < (winsize - 1); x++) {
03478     /* square it */
03479     if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
03480                        &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
03481       goto LBL_MU;
03482     }
03483 
03484     /* reduce modulo P */
03485     if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
03486       goto LBL_MU;
03487     }
03488   }
03489 
03490   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
03491    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
03492    */
03493   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
03494     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
03495       goto LBL_MU;
03496     }
03497     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
03498       goto LBL_MU;
03499     }
03500   }
03501 
03502   /* setup result */
03503   if ((err = mp_init (&res)) != MP_OKAY) {
03504     goto LBL_MU;
03505   }
03506   if ((err = mp_set (&res, 1)) != MP_OKAY) {
03507     goto LBL_MU;
03508   }
03509 
03510   /* set initial mode and bit cnt */
03511   mode   = 0;
03512   bitcnt = 1;
03513   buf    = 0;
03514   digidx = X->used - 1;
03515   bitcpy = 0;
03516   bitbuf = 0;
03517 
03518   for (;;) {
03519     /* grab next digit as required */
03520     if (--bitcnt == 0) {
03521       /* if digidx == -1 we are out of digits */
03522       if (digidx == -1) {
03523         break;
03524       }
03525       /* read next digit and reset the bitcnt */
03526       buf    = X->dp[digidx--];
03527       bitcnt = (int) DIGIT_BIT;
03528     }
03529 
03530     /* grab the next msb from the exponent */
03531     y     = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
03532     buf <<= (mp_digit)1;
03533 
03534     /* if the bit is zero and mode == 0 then we ignore it
03535      * These represent the leading zero bits before the first 1 bit
03536      * in the exponent.  Technically this opt is not required but it
03537      * does lower the # of trivial squaring/reductions used
03538      */
03539     if (mode == 0 && y == 0) {
03540       continue;
03541     }
03542 
03543     /* if the bit is zero and mode == 1 then we square */
03544     if (mode == 1 && y == 0) {
03545       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03546         goto LBL_RES;
03547       }
03548       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03549         goto LBL_RES;
03550       }
03551       continue;
03552     }
03553 
03554     /* else we add it to the window */
03555     bitbuf |= (y << (winsize - ++bitcpy));
03556     mode    = 2;
03557 
03558     if (bitcpy == winsize) {
03559       /* ok window is filled so square as required and multiply  */
03560       /* square first */
03561       for (x = 0; x < winsize; x++) {
03562         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03563           goto LBL_RES;
03564         }
03565         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03566           goto LBL_RES;
03567         }
03568       }
03569 
03570       /* then multiply */
03571       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
03572         goto LBL_RES;
03573       }
03574       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03575         goto LBL_RES;
03576       }
03577 
03578       /* empty window and reset */
03579       bitcpy = 0;
03580       bitbuf = 0;
03581       mode   = 1;
03582     }
03583   }
03584 
03585   /* if bits remain then square/multiply */
03586   if (mode == 2 && bitcpy > 0) {
03587     /* square then multiply if the bit is set */
03588     for (x = 0; x < bitcpy; x++) {
03589       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03590         goto LBL_RES;
03591       }
03592       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03593         goto LBL_RES;
03594       }
03595 
03596       bitbuf <<= 1;
03597       if ((bitbuf & (1 << winsize)) != 0) {
03598         /* then multiply */
03599         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
03600           goto LBL_RES;
03601         }
03602         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
03603           goto LBL_RES;
03604         }
03605       }
03606     }
03607   }
03608 
03609   mp_exch (&res, Y);
03610   err = MP_OKAY;
03611 LBL_RES:mp_clear (&res);
03612 LBL_MU:mp_clear (&mu);
03613 LBL_M:
03614   mp_clear(&M[1]);
03615   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03616     mp_clear (&M[x]);
03617   }
03618   return err;
03619 }
03620 
03621 
03622 /* pre-calculate the value required for Barrett reduction
03623  * For a given modulus "b" it calculates the value required in "a"
03624  */
03625 int mp_reduce_setup (mp_int * a, mp_int * b)
03626 {
03627   int     res;
03628 
03629   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
03630     return res;
03631   }
03632   return mp_div (a, b, a, NULL);
03633 }
03634 
03635 
03636 /* reduces x mod m, assumes 0 < x < m**2, mu is
03637  * precomputed via mp_reduce_setup.
03638  * From HAC pp.604 Algorithm 14.42
03639  */
03640 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
03641 {
03642   mp_int  q;
03643   int     res, um = m->used;
03644 
03645   /* q = x */
03646   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
03647     return res;
03648   }
03649 
03650   /* q1 = x / b**(k-1)  */
03651   mp_rshd (&q, um - 1);
03652 
03653   /* according to HAC this optimization is ok */
03654   if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
03655     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
03656       goto CLEANUP;
03657     }
03658   } else {
03659 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
03660     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03661       goto CLEANUP;
03662     }
03663 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
03664     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
03665       goto CLEANUP;
03666     }
03667 #else
03668     {
03669       res = MP_VAL;
03670       goto CLEANUP;
03671     }
03672 #endif
03673   }
03674 
03675   /* q3 = q2 / b**(k+1) */
03676   mp_rshd (&q, um + 1);
03677 
03678   /* x = x mod b**(k+1), quick (no division) */
03679   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
03680     goto CLEANUP;
03681   }
03682 
03683   /* q = q * m mod b**(k+1), quick (no division) */
03684   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
03685     goto CLEANUP;
03686   }
03687 
03688   /* x = x - q */
03689   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
03690     goto CLEANUP;
03691   }
03692 
03693   /* If x < 0, add b**(k+1) to it */
03694   if (mp_cmp_d (x, 0) == MP_LT) {
03695     if ((res = mp_set (&q, 1)) != MP_OKAY)
03696         goto CLEANUP;
03697     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
03698       goto CLEANUP;
03699     if ((res = mp_add (x, &q, x)) != MP_OKAY)
03700       goto CLEANUP;
03701   }
03702 
03703   /* Back off if it's too big */
03704   while (mp_cmp (x, m) != MP_LT) {
03705     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
03706       goto CLEANUP;
03707     }
03708   }
03709 
03710 CLEANUP:
03711   mp_clear (&q);
03712 
03713   return res;
03714 }
03715 
03716 
03717 /* reduces a modulo n where n is of the form 2**p - d
03718    This differs from reduce_2k since "d" can be larger
03719    than a single digit.
03720 */
03721 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
03722 {
03723    mp_int q;
03724    int    p, res;
03725 
03726    if ((res = mp_init(&q)) != MP_OKAY) {
03727       return res;
03728    }
03729 
03730    p = mp_count_bits(n);
03731 top:
03732    /* q = a/2**p, a = a mod 2**p */
03733    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
03734       goto ERR;
03735    }
03736 
03737    /* q = q * d */
03738    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
03739       goto ERR;
03740    }
03741 
03742    /* a = a + q */
03743    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
03744       goto ERR;
03745    }
03746 
03747    if (mp_cmp_mag(a, n) != MP_LT) {
03748       if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
03749          goto ERR;
03750       }
03751       goto top;
03752    }
03753 
03754 ERR:
03755    mp_clear(&q);
03756    return res;
03757 }
03758 
03759 
03760 /* determines the setup value */
03761 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
03762 {
03763    int    res;
03764    mp_int tmp;
03765 
03766    if ((res = mp_init(&tmp)) != MP_OKAY) {
03767       return res;
03768    }
03769 
03770    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
03771       goto ERR;
03772    }
03773 
03774    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
03775       goto ERR;
03776    }
03777 
03778 ERR:
03779    mp_clear(&tmp);
03780    return res;
03781 }
03782 
03783 
03784 /* multiplies |a| * |b| and does not compute the lower digs digits
03785  * [meant to get the higher part of the product]
03786  */
03787 int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03788 {
03789   mp_int  t;
03790   int     res, pa, pb, ix, iy;
03791   mp_digit u;
03792   mp_word r;
03793   mp_digit tmpx, *tmpt, *tmpy;
03794 
03795   /* can we use the fast multiplier? */
03796 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
03797   if (((a->used + b->used + 1) < MP_WARRAY)
03798       && MIN (a->used, b->used) <
03799       (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03800     return fast_s_mp_mul_high_digs (a, b, c, digs);
03801   }
03802 #endif
03803 
03804   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
03805     return res;
03806   }
03807   t.used = a->used + b->used + 1;
03808 
03809   pa = a->used;
03810   pb = b->used;
03811   for (ix = 0; ix < pa && a->dp; ix++) {
03812     /* clear the carry */
03813     u = 0;
03814 
03815     /* left hand side of A[ix] * B[iy] */
03816     tmpx = a->dp[ix];
03817 
03818     /* alias to the address of where the digits will be stored */
03819     tmpt = &(t.dp[digs]);
03820 
03821     /* alias for where to read the right hand side from */
03822     tmpy = b->dp + (digs - ix);
03823 
03824     for (iy = digs - ix; iy < pb; iy++) {
03825       /* calculate the double precision result */
03826       r       = ((mp_word)*tmpt) +
03827                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
03828                 ((mp_word) u);
03829 
03830       /* get the lower part */
03831       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
03832 
03833       /* carry the carry */
03834       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03835     }
03836     *tmpt = u;
03837   }
03838   mp_clamp (&t);
03839   mp_exch (&t, c);
03840   mp_clear (&t);
03841   return MP_OKAY;
03842 }
03843 
03844 
03845 /* this is a modified version of fast_s_mul_digs that only produces
03846  * output digits *above* digs.  See the comments for fast_s_mul_digs
03847  * to see how it works.
03848  *
03849  * This is used in the Barrett reduction since for one of the multiplications
03850  * only the higher digits were needed.  This essentially halves the work.
03851  *
03852  * Based on Algorithm 14.12 on pp.595 of HAC.
03853  */
03854 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
03855 {
03856   int     olduse, res, pa, ix, iz;
03857 #ifdef WOLFSSL_SMALL_STACK
03858   mp_digit* W;    /* uses dynamic memory and slower */
03859 #else
03860   mp_digit W[MP_WARRAY];
03861 #endif
03862   mp_word  _W;
03863 
03864   if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
03865       return MP_VAL;
03866   }
03867 
03868   /* grow the destination as required */
03869   pa = a->used + b->used;
03870   if (c->alloc < pa) {
03871     if ((res = mp_grow (c, pa)) != MP_OKAY) {
03872       return res;
03873     }
03874   }
03875 
03876   if (pa > MP_WARRAY)
03877     return MP_RANGE;  /* TAO range check */
03878 
03879 #ifdef WOLFSSL_SMALL_STACK
03880   W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
03881   if (W == NULL)
03882     return MP_MEM;
03883 #endif
03884 
03885   /* number of output digits to produce */
03886   pa = a->used + b->used;
03887   _W = 0;
03888   for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
03889       int      tx, ty, iy;
03890       mp_digit *tmpx, *tmpy;
03891 
03892       /* get offsets into the two bignums */
03893       ty = MIN(b->used-1, ix);
03894       tx = ix - ty;
03895 
03896       /* setup temp aliases */
03897       tmpx = a->dp + tx;
03898       tmpy = b->dp + ty;
03899 
03900       /* this is the number of times the loop will iterate, essentially its
03901          while (tx++ < a->used && ty-- >= 0) { ... }
03902        */
03903       iy = MIN(a->used-tx, ty+1);
03904 
03905       /* execute loop */
03906       for (iz = 0; iz < iy; iz++) {
03907          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03908       }
03909 
03910       /* store term */
03911       W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
03912 
03913       /* make next carry */
03914       _W = _W >> ((mp_word)DIGIT_BIT);
03915   }
03916 
03917   /* setup dest */
03918   olduse  = c->used;
03919   c->used = pa;
03920 
03921   {
03922     mp_digit *tmpc;
03923 
03924     tmpc = c->dp + digs;
03925     for (ix = digs; ix < pa; ix++) {   /* TAO, <= could potentially overwrite */
03926       /* now extract the previous digit [below the carry] */
03927       *tmpc++ = W[ix];
03928     }
03929 
03930     /* clear unused digits [that existed in the old copy of c] */
03931     for (; ix < olduse; ix++) {
03932       *tmpc++ = 0;
03933     }
03934   }
03935   mp_clamp (c);
03936 
03937 #ifdef WOLFSSL_SMALL_STACK
03938   XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
03939 #endif
03940 
03941   return MP_OKAY;
03942 }
03943 
03944 
03945 #ifndef MP_SET_CHUNK_BITS
03946     #define MP_SET_CHUNK_BITS 4
03947 #endif
03948 int mp_set_int (mp_int * a, unsigned long b)
03949 {
03950   int x, res;
03951 
03952   /* use direct mp_set if b is less than mp_digit max */
03953   if (b < MP_DIGIT_MAX) {
03954     return mp_set (a, (mp_digit)b);
03955   }
03956 
03957   mp_zero (a);
03958 
03959   /* set chunk bits at a time */
03960   for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
03961     /* shift the number up chunk bits */
03962     if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
03963       return res;
03964     }
03965 
03966     /* OR in the top bits of the source */
03967     a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
03968                                   ((1 << MP_SET_CHUNK_BITS) - 1);
03969 
03970     /* shift the source up to the next chunk bits */
03971     b <<= MP_SET_CHUNK_BITS;
03972 
03973     /* ensure that digits are not clamped off */
03974     a->used += 1;
03975   }
03976   mp_clamp (a);
03977   return MP_OKAY;
03978 }
03979 
03980 
03981 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC)
03982 
03983 /* c = a * a (mod b) */
03984 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
03985 {
03986   int     res;
03987   mp_int  t;
03988 
03989   if ((res = mp_init (&t)) != MP_OKAY) {
03990     return res;
03991   }
03992 
03993   if ((res = mp_sqr (a, &t)) != MP_OKAY) {
03994     mp_clear (&t);
03995     return res;
03996   }
03997   res = mp_mod (&t, b, c);
03998   mp_clear (&t);
03999   return res;
04000 }
04001 
04002 #endif
04003 
04004 
04005 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
04006     defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
04007     defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING)
04008 
04009 /* single digit addition */
04010 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
04011 {
04012   int     res, ix, oldused;
04013   mp_digit *tmpa, *tmpc, mu;
04014 
04015   /* grow c as required */
04016   if (c->alloc < a->used + 1) {
04017      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
04018         return res;
04019      }
04020   }
04021 
04022   /* if a is negative and |a| >= b, call c = |a| - b */
04023   if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
04024      /* temporarily fix sign of a */
04025      a->sign = MP_ZPOS;
04026 
04027      /* c = |a| - b */
04028      res = mp_sub_d(a, b, c);
04029 
04030      /* fix sign  */
04031      a->sign = c->sign = MP_NEG;
04032 
04033      /* clamp */
04034      mp_clamp(c);
04035 
04036      return res;
04037   }
04038 
04039   /* old number of used digits in c */
04040   oldused = c->used;
04041 
04042   /* sign always positive */
04043   c->sign = MP_ZPOS;
04044 
04045   /* source alias */
04046   tmpa    = a->dp;
04047 
04048   /* destination alias */
04049   tmpc    = c->dp;
04050 
04051   /* if a is positive */
04052   if (a->sign == MP_ZPOS) {
04053      /* add digit, after this we're propagating
04054       * the carry.
04055       */
04056      *tmpc   = *tmpa++ + b;
04057      mu      = *tmpc >> DIGIT_BIT;
04058      *tmpc++ &= MP_MASK;
04059 
04060      /* now handle rest of the digits */
04061      for (ix = 1; ix < a->used; ix++) {
04062         *tmpc   = *tmpa++ + mu;
04063         mu      = *tmpc >> DIGIT_BIT;
04064         *tmpc++ &= MP_MASK;
04065      }
04066      /* set final carry */
04067      if (ix < c->alloc) {
04068         ix++;
04069         *tmpc++  = mu;
04070      }
04071 
04072      /* setup size */
04073      c->used = a->used + 1;
04074   } else {
04075      /* a was negative and |a| < b */
04076      c->used  = 1;
04077 
04078      /* the result is a single digit */
04079      if (a->used == 1) {
04080         *tmpc++  =  b - a->dp[0];
04081      } else {
04082         *tmpc++  =  b;
04083      }
04084 
04085      /* setup count so the clearing of oldused
04086       * can fall through correctly
04087       */
04088      ix       = 1;
04089   }
04090 
04091   /* now zero to oldused */
04092   while (ix++ < oldused) {
04093      *tmpc++ = 0;
04094   }
04095   mp_clamp(c);
04096 
04097   return MP_OKAY;
04098 }
04099 
04100 
04101 /* single digit subtraction */
04102 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
04103 {
04104   mp_digit *tmpa, *tmpc, mu;
04105   int       res, ix, oldused;
04106 
04107   /* grow c as required */
04108   if (c->alloc < a->used + 1) {
04109      if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
04110         return res;
04111      }
04112   }
04113 
04114   /* if a is negative just do an unsigned
04115    * addition [with fudged signs]
04116    */
04117   if (a->sign == MP_NEG) {
04118      a->sign = MP_ZPOS;
04119      res     = mp_add_d(a, b, c);
04120      a->sign = c->sign = MP_NEG;
04121 
04122      /* clamp */
04123      mp_clamp(c);
04124 
04125      return res;
04126   }
04127 
04128   /* setup regs */
04129   oldused = c->used;
04130   tmpa    = a->dp;
04131   tmpc    = c->dp;
04132 
04133   /* if a <= b simply fix the single digit */
04134   if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
04135      if (a->used == 1) {
04136         *tmpc++ = b - *tmpa;
04137      } else {
04138         *tmpc++ = b;
04139      }
04140      ix      = 1;
04141 
04142      /* negative/1digit */
04143      c->sign = MP_NEG;
04144      c->used = 1;
04145   } else {
04146      /* positive/size */
04147      c->sign = MP_ZPOS;
04148      c->used = a->used;
04149 
04150      /* subtract first digit */
04151      *tmpc    = *tmpa++ - b;
04152      mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
04153      *tmpc++ &= MP_MASK;
04154 
04155      /* handle rest of the digits */
04156      for (ix = 1; ix < a->used; ix++) {
04157         *tmpc    = *tmpa++ - mu;
04158         mu       = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
04159         *tmpc++ &= MP_MASK;
04160      }
04161   }
04162 
04163   /* zero excess digits */
04164   while (ix++ < oldused) {
04165      *tmpc++ = 0;
04166   }
04167   mp_clamp(c);
04168   return MP_OKAY;
04169 }
04170 
04171 #endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
04172 
04173 
04174 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC) || \
04175     defined(DEBUG_WOLFSSL)
04176 
04177 static const int lnz[16] = {
04178    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
04179 };
04180 
04181 /* Counts the number of lsbs which are zero before the first zero bit */
04182 int mp_cnt_lsb(mp_int *a)
04183 {
04184     int x;
04185     mp_digit q = 0, qq;
04186 
04187     /* easy out */
04188     if (mp_iszero(a) == MP_YES) {
04189         return 0;
04190     }
04191 
04192     /* scan lower digits until non-zero */
04193     for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
04194     if (a->dp)
04195         q = a->dp[x];
04196     x *= DIGIT_BIT;
04197 
04198     /* now scan this digit until a 1 is found */
04199     if ((q & 1) == 0) {
04200         do {
04201             qq  = q & 15;
04202             x  += lnz[qq];
04203             q >>= 4;
04204         } while (qq == 0);
04205     }
04206     return x;
04207 }
04208 
04209 
04210 
04211 
04212 static int s_is_power_of_two(mp_digit b, int *p)
04213 {
04214    int x;
04215 
04216    /* fast return if no power of two */
04217    if ((b==0) || (b & (b-1))) {
04218       return 0;
04219    }
04220 
04221    for (x = 0; x < DIGIT_BIT; x++) {
04222       if (b == (((mp_digit)1)<<x)) {
04223          *p = x;
04224          return 1;
04225       }
04226    }
04227    return 0;
04228 }
04229 
04230 /* single digit division (based on routine from MPI) */
04231 static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
04232 {
04233   mp_int  q;
04234   mp_word w;
04235   mp_digit t;
04236   int     res = MP_OKAY, ix;
04237 
04238   /* cannot divide by zero */
04239   if (b == 0) {
04240      return MP_VAL;
04241   }
04242 
04243   /* quick outs */
04244   if (b == 1 || mp_iszero(a) == MP_YES) {
04245      if (d != NULL) {
04246         *d = 0;
04247      }
04248      if (c != NULL) {
04249         return mp_copy(a, c);
04250      }
04251      return MP_OKAY;
04252   }
04253 
04254   /* power of two ? */
04255   if (s_is_power_of_two(b, &ix) == 1) {
04256      if (d != NULL) {
04257         *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
04258      }
04259      if (c != NULL) {
04260         return mp_div_2d(a, ix, c, NULL);
04261      }
04262      return MP_OKAY;
04263   }
04264 
04265 #ifdef BN_MP_DIV_3_C
04266   /* three? */
04267   if (b == 3) {
04268      return mp_div_3(a, c, d);
04269   }
04270 #endif
04271 
04272   /* no easy answer [c'est la vie].  Just division */
04273   if (c != NULL) {
04274       if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
04275          return res;
04276       }
04277 
04278       q.used = a->used;
04279       q.sign = a->sign;
04280   }
04281   else {
04282       if ((res = mp_init(&q)) != MP_OKAY) {
04283          return res;
04284       }
04285   }
04286 
04287 
04288   w = 0;
04289   for (ix = a->used - 1; ix >= 0; ix--) {
04290      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
04291 
04292      if (w >= b) {
04293         t = (mp_digit)(w / b);
04294         w -= ((mp_word)t) * ((mp_word)b);
04295       } else {
04296         t = 0;
04297       }
04298       if (c != NULL)
04299         q.dp[ix] = (mp_digit)t;
04300   }
04301 
04302   if (d != NULL) {
04303      *d = (mp_digit)w;
04304   }
04305 
04306   if (c != NULL) {
04307      mp_clamp(&q);
04308      mp_exch(&q, c);
04309   }
04310   mp_clear(&q);
04311 
04312   return res;
04313 }
04314 
04315 
04316 int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
04317 {
04318   return mp_div_d(a, b, NULL, c);
04319 }
04320 
04321 #endif /* WOLFSSL_KEY_GEN || HAVE_COMP_KEY || HAVE_ECC || DEBUG_WOLFSSL */
04322 
04323 #ifdef WOLFSSL_KEY_GEN
04324 
04325 const mp_digit ltm_prime_tab[PRIME_SIZE] = {
04326   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
04327   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
04328   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
04329   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
04330 #ifndef MP_8BIT
04331   0x0083,
04332   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
04333   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
04334   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
04335   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
04336 
04337   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
04338   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
04339   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
04340   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
04341   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
04342   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
04343   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
04344   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
04345 
04346   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
04347   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
04348   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
04349   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
04350   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
04351   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
04352   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
04353   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
04354 
04355   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
04356   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
04357   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
04358   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
04359   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
04360   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
04361   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
04362   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
04363 #endif
04364 };
04365 
04366 
04367 /* Miller-Rabin test of "a" to the base of "b" as described in
04368  * HAC pp. 139 Algorithm 4.24
04369  *
04370  * Sets result to 0 if definitely composite or 1 if probably prime.
04371  * Randomly the chance of error is no more than 1/4 and often
04372  * very much lower.
04373  */
04374 static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
04375 {
04376   mp_int  n1, y, r;
04377   int     s, j, err;
04378 
04379   /* default */
04380   *result = MP_NO;
04381 
04382   /* ensure b > 1 */
04383   if (mp_cmp_d(b, 1) != MP_GT) {
04384      return MP_VAL;
04385   }
04386 
04387   /* get n1 = a - 1 */
04388   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
04389     return err;
04390   }
04391   if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
04392     goto LBL_N1;
04393   }
04394 
04395   /* set 2**s * r = n1 */
04396   if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
04397     goto LBL_N1;
04398   }
04399 
04400   /* count the number of least significant bits
04401    * which are zero
04402    */
04403   s = mp_cnt_lsb(&r);
04404 
04405   /* now divide n - 1 by 2**s */
04406   if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
04407     goto LBL_R;
04408   }
04409 
04410   /* compute y = b**r mod a */
04411   if ((err = mp_init (&y)) != MP_OKAY) {
04412     goto LBL_R;
04413   }
04414   if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
04415     goto LBL_Y;
04416   }
04417 
04418   /* if y != 1 and y != n1 do */
04419   if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
04420     j = 1;
04421     /* while j <= s-1 and y != n1 */
04422     while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
04423       if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
04424          goto LBL_Y;
04425       }
04426 
04427       /* if y == 1 then composite */
04428       if (mp_cmp_d (&y, 1) == MP_EQ) {
04429          goto LBL_Y;
04430       }
04431 
04432       ++j;
04433     }
04434 
04435     /* if y != n1 then composite */
04436     if (mp_cmp (&y, &n1) != MP_EQ) {
04437       goto LBL_Y;
04438     }
04439   }
04440 
04441   /* probably prime now */
04442   *result = MP_YES;
04443 LBL_Y:mp_clear (&y);
04444 LBL_R:mp_clear (&r);
04445 LBL_N1:mp_clear (&n1);
04446   return err;
04447 }
04448 
04449 
04450 /* determines if an integers is divisible by one
04451  * of the first PRIME_SIZE primes or not
04452  *
04453  * sets result to 0 if not, 1 if yes
04454  */
04455 static int mp_prime_is_divisible (mp_int * a, int *result)
04456 {
04457   int     err, ix;
04458   mp_digit res;
04459 
04460   /* default to not */
04461   *result = MP_NO;
04462 
04463   for (ix = 0; ix < PRIME_SIZE; ix++) {
04464     /* what is a mod LBL_prime_tab[ix] */
04465     if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
04466       return err;
04467     }
04468 
04469     /* is the residue zero? */
04470     if (res == 0) {
04471       *result = MP_YES;
04472       return MP_OKAY;
04473     }
04474   }
04475 
04476   return MP_OKAY;
04477 }
04478 
04479 static const int USE_BBS = 1;
04480 
04481 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
04482 {
04483     int   err, res, type;
04484     byte* buf;
04485 
04486     if (N == NULL || rng == NULL)
04487         return MP_VAL;
04488 
04489     /* get type */
04490     if (len < 0) {
04491         type = USE_BBS;
04492         len = -len;
04493     } else {
04494         type = 0;
04495     }
04496 
04497     /* allow sizes between 2 and 512 bytes for a prime size */
04498     if (len < 2 || len > 512) {
04499         return MP_VAL;
04500     }
04501 
04502     /* allocate buffer to work with */
04503     buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
04504     if (buf == NULL) {
04505         return MP_MEM;
04506     }
04507     XMEMSET(buf, 0, len);
04508 
04509     do {
04510 #ifdef SHOW_GEN
04511         printf(".");
04512         fflush(stdout);
04513 #endif
04514         /* generate value */
04515         err = wc_RNG_GenerateBlock(rng, buf, len);
04516         if (err != 0) {
04517             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04518             return err;
04519         }
04520 
04521         /* munge bits */
04522         buf[0]     |= 0x80 | 0x40;
04523         buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
04524 
04525         /* load value */
04526         if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
04527             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04528             return err;
04529         }
04530 
04531         /* test */
04532         if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
04533             XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04534             return err;
04535         }
04536     } while (res == MP_NO);
04537 
04538     XMEMSET(buf, 0, len);
04539     XFREE(buf, heap, DYNAMIC_TYPE_RSA);
04540 
04541     return MP_OKAY;
04542 }
04543 
04544 /*
04545  * Sets result to 1 if probably prime, 0 otherwise
04546  */
04547 int mp_prime_is_prime (mp_int * a, int t, int *result)
04548 {
04549   mp_int  b;
04550   int     ix, err, res;
04551 
04552   /* default to no */
04553   *result = MP_NO;
04554 
04555   /* valid value of t? */
04556   if (t <= 0 || t > PRIME_SIZE) {
04557     return MP_VAL;
04558   }
04559 
04560   /* is the input equal to one of the primes in the table? */
04561   for (ix = 0; ix < PRIME_SIZE; ix++) {
04562       if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
04563          *result = 1;
04564          return MP_OKAY;
04565       }
04566   }
04567 
04568   /* first perform trial division */
04569   if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
04570     return err;
04571   }
04572 
04573   /* return if it was trivially divisible */
04574   if (res == MP_YES) {
04575     return MP_OKAY;
04576   }
04577 
04578   /* now perform the miller-rabin rounds */
04579   if ((err = mp_init (&b)) != MP_OKAY) {
04580     return err;
04581   }
04582 
04583   for (ix = 0; ix < t; ix++) {
04584     /* set the prime */
04585     if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
04586         goto LBL_B;
04587     }
04588 
04589     if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
04590       goto LBL_B;
04591     }
04592 
04593     if (res == MP_NO) {
04594       goto LBL_B;
04595     }
04596   }
04597 
04598   /* passed the test */
04599   *result = MP_YES;
04600 LBL_B:mp_clear (&b);
04601   return err;
04602 }
04603 
04604 
04605 /* computes least common multiple as |a*b|/(a, b) */
04606 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
04607 {
04608   int     res;
04609   mp_int  t1, t2;
04610 
04611 
04612   if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
04613     return res;
04614   }
04615 
04616   /* t1 = get the GCD of the two inputs */
04617   if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
04618     goto LBL_T;
04619   }
04620 
04621   /* divide the smallest by the GCD */
04622   if (mp_cmp_mag(a, b) == MP_LT) {
04623      /* store quotient in t2 such that t2 * b is the LCM */
04624      if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
04625         goto LBL_T;
04626      }
04627      res = mp_mul(b, &t2, c);
04628   } else {
04629      /* store quotient in t2 such that t2 * a is the LCM */
04630      if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
04631         goto LBL_T;
04632      }
04633      res = mp_mul(a, &t2, c);
04634   }
04635 
04636   /* fix the sign to positive */
04637   c->sign = MP_ZPOS;
04638 
04639 LBL_T:
04640   mp_clear(&t1);
04641   mp_clear(&t2);
04642   return res;
04643 }
04644 
04645 
04646 
04647 /* Greatest Common Divisor using the binary method */
04648 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
04649 {
04650     mp_int  u, v;
04651     int     k, u_lsb, v_lsb, res;
04652 
04653     /* either zero than gcd is the largest */
04654     if (mp_iszero (a) == MP_YES) {
04655         return mp_abs (b, c);
04656     }
04657     if (mp_iszero (b) == MP_YES) {
04658         return mp_abs (a, c);
04659     }
04660 
04661     /* get copies of a and b we can modify */
04662     if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
04663         return res;
04664     }
04665 
04666     if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
04667         goto LBL_U;
04668     }
04669 
04670     /* must be positive for the remainder of the algorithm */
04671     u.sign = v.sign = MP_ZPOS;
04672 
04673     /* B1.  Find the common power of two for u and v */
04674     u_lsb = mp_cnt_lsb(&u);
04675     v_lsb = mp_cnt_lsb(&v);
04676     k     = MIN(u_lsb, v_lsb);
04677 
04678     if (k > 0) {
04679         /* divide the power of two out */
04680         if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
04681             goto LBL_V;
04682         }
04683 
04684         if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
04685             goto LBL_V;
04686         }
04687     }
04688 
04689     /* divide any remaining factors of two out */
04690     if (u_lsb != k) {
04691         if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
04692             goto LBL_V;
04693         }
04694     }
04695 
04696     if (v_lsb != k) {
04697         if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
04698             goto LBL_V;
04699         }
04700     }
04701 
04702     while (mp_iszero(&v) == MP_NO) {
04703         /* make sure v is the largest */
04704         if (mp_cmp_mag(&u, &v) == MP_GT) {
04705             /* swap u and v to make sure v is >= u */
04706             mp_exch(&u, &v);
04707         }
04708 
04709         /* subtract smallest from largest */
04710         if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
04711             goto LBL_V;
04712         }
04713 
04714         /* Divide out all factors of two */
04715         if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
04716             goto LBL_V;
04717         }
04718     }
04719 
04720     /* multiply by 2**k which we divided out at the beginning */
04721     if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
04722         goto LBL_V;
04723     }
04724     c->sign = MP_ZPOS;
04725     res = MP_OKAY;
04726 LBL_V:mp_clear (&u);
04727 LBL_U:mp_clear (&v);
04728     return res;
04729 }
04730 
04731 #endif /* WOLFSSL_KEY_GEN */
04732 
04733 
04734 #if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
04735     defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
04736     defined(DEBUG_WOLFSSL)
04737 
04738 /* chars used in radix conversions */
04739 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
04740                          abcdefghijklmnopqrstuvwxyz+/";
04741 #endif
04742 
04743 #if !defined(NO_DSA) || defined(HAVE_ECC)
04744 /* read a string [ASCII] in a given radix */
04745 int mp_read_radix (mp_int * a, const char *str, int radix)
04746 {
04747   int     y, res, neg;
04748   char    ch;
04749 
04750   /* zero the digit bignum */
04751   mp_zero(a);
04752 
04753   /* make sure the radix is ok */
04754   if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
04755     return MP_VAL;
04756   }
04757 
04758   /* if the leading digit is a
04759    * minus set the sign to negative.
04760    */
04761   if (*str == '-') {
04762     ++str;
04763     neg = MP_NEG;
04764   } else {
04765     neg = MP_ZPOS;
04766   }
04767 
04768   /* set the integer to the default of zero */
04769   mp_zero (a);
04770 
04771   /* process each digit of the string */
04772   while (*str != '\0') {
04773     /* if the radix <= 36 the conversion is case insensitive
04774      * this allows numbers like 1AB and 1ab to represent the same  value
04775      * [e.g. in hex]
04776      */
04777     ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)*str) : *str;
04778     for (y = 0; y < 64; y++) {
04779       if (ch == mp_s_rmap[y]) {
04780          break;
04781       }
04782     }
04783 
04784     /* if the char was found in the map
04785      * and is less than the given radix add it
04786      * to the number, otherwise exit the loop.
04787      */
04788     if (y < radix) {
04789       if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
04790          return res;
04791       }
04792       if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
04793          return res;
04794       }
04795     } else {
04796       break;
04797     }
04798     ++str;
04799   }
04800 
04801   /* if digit in isn't null term, then invalid character was found */
04802   if (*str != '\0') {
04803      mp_zero (a);
04804      return MP_VAL;
04805   }
04806 
04807   /* set the sign only if a != 0 */
04808   if (mp_iszero(a) != MP_YES) {
04809      a->sign = neg;
04810   }
04811   return MP_OKAY;
04812 }
04813 #endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
04814 
04815 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
04816     defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
04817     defined(WOLFSSL_PUBLIC_MP)
04818 
04819 /* returns size of ASCII representation */
04820 int mp_radix_size (mp_int *a, int radix, int *size)
04821 {
04822     int     res, digs;
04823     mp_int  t;
04824     mp_digit d;
04825 
04826     *size = 0;
04827 
04828     /* special case for binary */
04829     if (radix == MP_RADIX_BIN) {
04830         *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
04831         return MP_OKAY;
04832     }
04833 
04834     /* make sure the radix is in range */
04835     if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
04836         return MP_VAL;
04837     }
04838 
04839     if (mp_iszero(a) == MP_YES) {
04840         *size = 2;
04841         return MP_OKAY;
04842     }
04843 
04844     /* digs is the digit count */
04845     digs = 0;
04846 
04847     /* if it's negative add one for the sign */
04848     if (a->sign == MP_NEG) {
04849         ++digs;
04850     }
04851 
04852     /* init a copy of the input */
04853     if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
04854         return res;
04855     }
04856 
04857     /* force temp to positive */
04858     t.sign = MP_ZPOS;
04859 
04860     /* fetch out all of the digits */
04861     while (mp_iszero (&t) == MP_NO) {
04862         if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
04863             mp_clear (&t);
04864             return res;
04865         }
04866         ++digs;
04867     }
04868     mp_clear (&t);
04869 
04870     /* return digs + 1, the 1 is for the NULL byte that would be required. */
04871     *size = digs + 1;
04872     return MP_OKAY;
04873 }
04874 
04875 /* stores a bignum as a ASCII string in a given radix (2..64) */
04876 int mp_toradix (mp_int *a, char *str, int radix)
04877 {
04878     int     res, digs;
04879     mp_int  t;
04880     mp_digit d;
04881     char   *_s = str;
04882 
04883     /* check range of the radix */
04884     if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
04885         return MP_VAL;
04886     }
04887 
04888     /* quick out if its zero */
04889     if (mp_iszero(a) == MP_YES) {
04890         *str++ = '0';
04891         *str = '\0';
04892         return MP_OKAY;
04893     }
04894 
04895     if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
04896         return res;
04897     }
04898 
04899     /* if it is negative output a - */
04900     if (t.sign == MP_NEG) {
04901         ++_s;
04902         *str++ = '-';
04903         t.sign = MP_ZPOS;
04904     }
04905 
04906     digs = 0;
04907     while (mp_iszero (&t) == MP_NO) {
04908         if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
04909             mp_clear (&t);
04910             return res;
04911         }
04912         *str++ = mp_s_rmap[d];
04913         ++digs;
04914     }
04915 
04916     /* reverse the digits of the string.  In this case _s points
04917      * to the first digit [excluding the sign] of the number]
04918      */
04919     bn_reverse ((unsigned char *)_s, digs);
04920 
04921     /* append a NULL so the string is properly terminated */
04922     *str = '\0';
04923 
04924     mp_clear (&t);
04925     return MP_OKAY;
04926 }
04927 
04928 #ifdef WOLFSSL_DEBUG_MATH
04929 void mp_dump(const char* desc, mp_int* a, byte verbose)
04930 {
04931   char *buffer;
04932   int size = a->alloc;
04933 
04934   buffer = (char*)XMALLOC(size * sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04935   if (buffer == NULL) {
04936     return;
04937   }
04938 
04939   printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
04940     desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
04941 
04942   mp_tohex(a, buffer);
04943   printf("  %s\n  ", buffer);
04944 
04945   if (verbose) {
04946     int i;
04947     for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
04948       printf("%02x ", *(((byte*)a->dp) + i));
04949     }
04950     printf("\n");
04951   }
04952 
04953   XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04954 }
04955 #endif /* WOLFSSL_DEBUG_MATH */
04956 
04957 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
04958 
04959 #endif /* WOLFSSL_SP_MATH */
04960 
04961 #endif /* USE_FAST_MATH */
04962 
04963 #endif /* NO_BIG_INT */
04964 
04965