Renesas / SecureDweet
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers tfm.c Source File

tfm.c

00001 /* tfm.c
00002  *
00003  * Copyright (C) 2006-2016 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 
00024 /*
00025  * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
00026  * http://math.libtomcrypt.com
00027  */
00028 
00029 /**
00030  *  Edited by Moises Guimaraes (moisesguimaraesm@gmail.com)
00031  *  to fit CyaSSL's needs.
00032  */
00033 
00034 #ifdef HAVE_CONFIG_H
00035     #include <config.h>
00036 #endif
00037 
00038 /* in case user set USE_FAST_MATH there */
00039 #include <wolfssl/wolfcrypt/settings.h>
00040 #ifdef NO_INLINE
00041     #include <wolfssl/wolfcrypt/misc.h>
00042 #else
00043     #include <wolfcrypt/src/misc.c>
00044 #endif
00045 
00046 #ifdef USE_FAST_MATH
00047 
00048 #include <wolfssl/wolfcrypt/random.h>
00049 #include <wolfssl/wolfcrypt/tfm.h>
00050 #include <wolfcrypt/src/asm.c>  /* will define asm MACROS or C ones */
00051 
00052 
00053 /* math settings check */
00054 word32 CheckRunTimeSettings(void)
00055 {
00056     return CTC_SETTINGS;
00057 }
00058 
00059 
00060 /* math settings size check */
00061 word32 CheckRunTimeFastMath(void)
00062 {
00063     return FP_SIZE;
00064 }
00065 
00066 
00067 /* Functions */
00068 
00069 void fp_add(fp_int *a, fp_int *b, fp_int *c)
00070 {
00071   int     sa, sb;
00072 
00073   /* get sign of both inputs */
00074   sa = a->sign;
00075   sb = b->sign;
00076 
00077   /* handle two cases, not four */
00078   if (sa == sb) {
00079     /* both positive or both negative */
00080     /* add their magnitudes, copy the sign */
00081     c->sign = sa;
00082     s_fp_add (a, b, c);
00083   } else {
00084     /* one positive, the other negative */
00085     /* subtract the one with the greater magnitude from */
00086     /* the one of the lesser magnitude.  The result gets */
00087     /* the sign of the one with the greater magnitude. */
00088     if (fp_cmp_mag (a, b) == FP_LT) {
00089       c->sign = sb;
00090       s_fp_sub (b, a, c);
00091     } else {
00092       c->sign = sa;
00093       s_fp_sub (a, b, c);
00094     }
00095   }
00096 }
00097 
00098 /* unsigned addition */
00099 void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
00100 {
00101   int      x, y, oldused;
00102   register fp_word  t;
00103 
00104   y       = MAX(a->used, b->used);
00105   oldused = MIN(c->used, FP_SIZE);   /* help static analysis w/ largest size */
00106   c->used = y;
00107 
00108   t = 0;
00109   for (x = 0; x < y; x++) {
00110       t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
00111       c->dp[x]   = (fp_digit)t;
00112       t        >>= DIGIT_BIT;
00113   }
00114   if (t != 0 && x < FP_SIZE) {
00115      c->dp[c->used++] = (fp_digit)t;
00116      ++x;
00117   }
00118 
00119   c->used = x;
00120   for (; x < oldused; x++) {
00121      c->dp[x] = 0;
00122   }
00123   fp_clamp(c);
00124 }
00125 
00126 /* c = a - b */
00127 void fp_sub(fp_int *a, fp_int *b, fp_int *c)
00128 {
00129   int     sa, sb;
00130 
00131   sa = a->sign;
00132   sb = b->sign;
00133 
00134   if (sa != sb) {
00135     /* subtract a negative from a positive, OR */
00136     /* subtract a positive from a negative. */
00137     /* In either case, ADD their magnitudes, */
00138     /* and use the sign of the first number. */
00139     c->sign = sa;
00140     s_fp_add (a, b, c);
00141   } else {
00142     /* subtract a positive from a positive, OR */
00143     /* subtract a negative from a negative. */
00144     /* First, take the difference between their */
00145     /* magnitudes, then... */
00146     if (fp_cmp_mag (a, b) != FP_LT) {
00147       /* Copy the sign from the first */
00148       c->sign = sa;
00149       /* The first has a larger or equal magnitude */
00150       s_fp_sub (a, b, c);
00151     } else {
00152       /* The result has the *opposite* sign from */
00153       /* the first number. */
00154       c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
00155       /* The second has a larger magnitude */
00156       s_fp_sub (b, a, c);
00157     }
00158   }
00159 }
00160 
00161 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
00162 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
00163 {
00164   int      x, oldbused, oldused;
00165   fp_word  t;
00166 
00167   oldused  = c->used;
00168   oldbused = b->used;
00169   c->used  = a->used;
00170   t       = 0;
00171   for (x = 0; x < oldbused; x++) {
00172      t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
00173      c->dp[x]  = (fp_digit)t;
00174      t         = (t >> DIGIT_BIT)&1;
00175   }
00176   for (; x < a->used; x++) {
00177      t         = ((fp_word)a->dp[x]) - t;
00178      c->dp[x]  = (fp_digit)t;
00179      t         = (t >> DIGIT_BIT)&1;
00180    }
00181   for (; x < oldused; x++) {
00182      c->dp[x] = 0;
00183   }
00184   fp_clamp(c);
00185 }
00186 
00187 /* c = a * b */
00188 void fp_mul(fp_int *A, fp_int *B, fp_int *C)
00189 {
00190     int   y, yy;
00191 
00192     y  = MAX(A->used, B->used);
00193     yy = MIN(A->used, B->used);
00194 
00195     /* call generic if we're out of range */
00196     if (y + yy > FP_SIZE) {
00197        fp_mul_comba(A, B, C);
00198        return ;
00199     }
00200 
00201     /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
00202        of the largest input.  We also want to avoid doing excess mults if the
00203        inputs are not close to the next power of two.  That is, for example,
00204        if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
00205     */
00206 
00207 #ifdef TFM_MUL3
00208         if (y <= 3) {
00209            fp_mul_comba3(A,B,C);
00210            return;
00211         }
00212 #endif
00213 #ifdef TFM_MUL4
00214         if (y == 4) {
00215            fp_mul_comba4(A,B,C);
00216            return;
00217         }
00218 #endif
00219 #ifdef TFM_MUL6
00220         if (y <= 6) {
00221            fp_mul_comba6(A,B,C);
00222            return;
00223         }
00224 #endif
00225 #ifdef TFM_MUL7
00226         if (y == 7) {
00227            fp_mul_comba7(A,B,C);
00228            return;
00229         }
00230 #endif
00231 #ifdef TFM_MUL8
00232         if (y == 8) {
00233            fp_mul_comba8(A,B,C);
00234            return;
00235         }
00236 #endif
00237 #ifdef TFM_MUL9
00238         if (y == 9) {
00239            fp_mul_comba9(A,B,C);
00240            return;
00241         }
00242 #endif
00243 #ifdef TFM_MUL12
00244         if (y <= 12) {
00245            fp_mul_comba12(A,B,C);
00246            return;
00247         }
00248 #endif
00249 #ifdef TFM_MUL17
00250         if (y <= 17) {
00251            fp_mul_comba17(A,B,C);
00252            return;
00253         }
00254 #endif
00255 
00256 #ifdef TFM_SMALL_SET
00257         if (y <= 16) {
00258            fp_mul_comba_small(A,B,C);
00259            return;
00260         }
00261 #endif
00262 #if defined(TFM_MUL20)
00263         if (y <= 20) {
00264            fp_mul_comba20(A,B,C);
00265            return;
00266         }
00267 #endif
00268 #if defined(TFM_MUL24)
00269         if (yy >= 16 && y <= 24) {
00270            fp_mul_comba24(A,B,C);
00271            return;
00272         }
00273 #endif
00274 #if defined(TFM_MUL28)
00275         if (yy >= 20 && y <= 28) {
00276            fp_mul_comba28(A,B,C);
00277            return;
00278         }
00279 #endif
00280 #if defined(TFM_MUL32)
00281         if (yy >= 24 && y <= 32) {
00282            fp_mul_comba32(A,B,C);
00283            return;
00284         }
00285 #endif
00286 #if defined(TFM_MUL48)
00287         if (yy >= 40 && y <= 48) {
00288            fp_mul_comba48(A,B,C);
00289            return;
00290         }
00291 #endif
00292 #if defined(TFM_MUL64)
00293         if (yy >= 56 && y <= 64) {
00294            fp_mul_comba64(A,B,C);
00295            return;
00296         }
00297 #endif
00298         fp_mul_comba(A,B,C);
00299 }
00300 
00301 void fp_mul_2(fp_int * a, fp_int * b)
00302 {
00303   int     x, oldused;
00304 
00305   oldused = b->used;
00306   b->used = a->used;
00307 
00308   {
00309     register fp_digit r, rr, *tmpa, *tmpb;
00310 
00311     /* alias for source */
00312     tmpa = a->dp;
00313 
00314     /* alias for dest */
00315     tmpb = b->dp;
00316 
00317     /* carry */
00318     r = 0;
00319     for (x = 0; x < a->used; x++) {
00320 
00321       /* get what will be the *next* carry bit from the
00322        * MSB of the current digit
00323        */
00324       rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
00325 
00326       /* now shift up this digit, add in the carry [from the previous] */
00327       *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
00328 
00329       /* copy the carry that would be from the source
00330        * digit into the next iteration
00331        */
00332       r = rr;
00333     }
00334 
00335     /* new leading digit? */
00336     if (r != 0 && b->used != (FP_SIZE-1)) {
00337       /* add a MSB which is always 1 at this point */
00338       *tmpb = 1;
00339       ++(b->used);
00340     }
00341 
00342     /* now zero any excess digits on the destination
00343      * that we didn't write to
00344      */
00345     tmpb = b->dp + b->used;
00346     for (x = b->used; x < oldused; x++) {
00347       *tmpb++ = 0;
00348     }
00349   }
00350   b->sign = a->sign;
00351 }
00352 
00353 /* c = a * b */
00354 void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
00355 {
00356    fp_word  w;
00357    int      x, oldused;
00358 
00359    oldused = c->used;
00360    c->used = a->used;
00361    c->sign = a->sign;
00362    w       = 0;
00363    for (x = 0; x < a->used; x++) {
00364        w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
00365        c->dp[x]  = (fp_digit)w;
00366        w         = w >> DIGIT_BIT;
00367    }
00368    if (w != 0 && (a->used != FP_SIZE)) {
00369       c->dp[c->used++] = (fp_digit) w;
00370       ++x;
00371    }
00372    for (; x < oldused; x++) {
00373       c->dp[x] = 0;
00374    }
00375    fp_clamp(c);
00376 }
00377 
00378 /* c = a * 2**d */
00379 void fp_mul_2d(fp_int *a, int b, fp_int *c)
00380 {
00381    fp_digit carry, carrytmp, shift;
00382    int x;
00383 
00384    /* copy it */
00385    fp_copy(a, c);
00386 
00387    /* handle whole digits */
00388    if (b >= DIGIT_BIT) {
00389       fp_lshd(c, b/DIGIT_BIT);
00390    }
00391    b %= DIGIT_BIT;
00392 
00393    /* shift the digits */
00394    if (b != 0) {
00395       carry = 0;
00396       shift = DIGIT_BIT - b;
00397       for (x = 0; x < c->used; x++) {
00398           carrytmp = c->dp[x] >> shift;
00399           c->dp[x] = (c->dp[x] << b) + carry;
00400           carry = carrytmp;
00401       }
00402       /* store last carry if room */
00403       if (carry && x < FP_SIZE) {
00404          c->dp[c->used++] = carry;
00405       }
00406    }
00407    fp_clamp(c);
00408 }
00409 
00410 /* generic PxQ multiplier */
00411 #if defined(HAVE_INTEL_MULX)
00412 
00413 INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
00414 
00415 {
00416    int       ix, iy, iz, pa;
00417    fp_int    tmp, *dst;
00418 
00419    /* get size of output and trim */
00420    pa = A->used + B->used;
00421    if (pa >= FP_SIZE) {
00422       pa = FP_SIZE-1;
00423    }
00424 
00425    if (A == C || B == C) {
00426       fp_init(&tmp);
00427       dst = &tmp;
00428    } else {
00429       fp_zero(C);
00430       dst = C;
00431    }
00432 
00433    TFM_INTEL_MUL_COMBA(A, B, dst) ;
00434 
00435   dst->used = pa;
00436   dst->sign = A->sign ^ B->sign;
00437   fp_clamp(dst);
00438   fp_copy(dst, C);
00439 }
00440 #endif
00441 
00442 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
00443 {
00444    int       ix, iy, iz, tx, ty, pa;
00445    fp_digit  c0, c1, c2, *tmpx, *tmpy;
00446    fp_int    tmp, *dst;
00447 
00448    IF_HAVE_INTEL_MULX(fp_mul_comba_mulx(A, B, C), return) ;
00449 
00450    COMBA_START;
00451    COMBA_CLEAR;
00452 
00453    /* get size of output and trim */
00454    pa = A->used + B->used;
00455    if (pa >= FP_SIZE) {
00456       pa = FP_SIZE-1;
00457    }
00458 
00459    if (A == C || B == C) {
00460       fp_init(&tmp);
00461       dst = &tmp;
00462    } else {
00463       fp_zero(C);
00464       dst = C;
00465    }
00466 
00467    for (ix = 0; ix < pa; ix++) {
00468       /* get offsets into the two bignums */
00469       ty = MIN(ix, B->used-1);
00470       tx = ix - ty;
00471 
00472       /* setup temp aliases */
00473       tmpx = A->dp + tx;
00474       tmpy = B->dp + ty;
00475 
00476       /* this is the number of times the loop will iterate, essentially its
00477          while (tx++ < a->used && ty-- >= 0) { ... }
00478        */
00479       iy = MIN(A->used-tx, ty+1);
00480 
00481       /* execute loop */
00482       COMBA_FORWARD;
00483       for (iz = 0; iz < iy; ++iz) {
00484           /* TAO change COMBA_ADD back to MULADD */
00485           MULADD(*tmpx++, *tmpy--);
00486       }
00487 
00488       /* store term */
00489       COMBA_STORE(dst->dp[ix]);
00490   }
00491   COMBA_FINI;
00492 
00493   dst->used = pa;
00494   dst->sign = A->sign ^ B->sign;
00495   fp_clamp(dst);
00496   fp_copy(dst, C);
00497 }
00498 
00499 /* a/b => cb + d == a */
00500 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00501 {
00502   fp_int  q, x, y, t1, t2;
00503   int     n, t, i, norm, neg;
00504 
00505   /* is divisor zero ? */
00506   if (fp_iszero (b) == 1) {
00507     return FP_VAL;
00508   }
00509 
00510   /* if a < b then q=0, r = a */
00511   if (fp_cmp_mag (a, b) == FP_LT) {
00512     if (d != NULL) {
00513       fp_copy (a, d);
00514     }
00515     if (c != NULL) {
00516       fp_zero (c);
00517     }
00518     return FP_OKAY;
00519   }
00520 
00521   fp_init(&q);
00522   q.used = a->used + 2;
00523 
00524   fp_init(&t1);
00525   fp_init(&t2);
00526   fp_init_copy(&x, a);
00527   fp_init_copy(&y, b);
00528 
00529   /* fix the sign */
00530   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
00531   x.sign = y.sign = FP_ZPOS;
00532 
00533   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
00534   norm = fp_count_bits(&y) % DIGIT_BIT;
00535   if (norm < (int)(DIGIT_BIT-1)) {
00536      norm = (DIGIT_BIT-1) - norm;
00537      fp_mul_2d (&x, norm, &x);
00538      fp_mul_2d (&y, norm, &y);
00539   } else {
00540      norm = 0;
00541   }
00542 
00543   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
00544   n = x.used - 1;
00545   t = y.used - 1;
00546 
00547   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
00548   fp_lshd (&y, n - t); /* y = y*b**{n-t} */
00549 
00550   while (fp_cmp (&x, &y) != FP_LT) {
00551     ++(q.dp[n - t]);
00552     fp_sub (&x, &y, &x);
00553   }
00554 
00555   /* reset y by shifting it back down */
00556   fp_rshd (&y, n - t);
00557 
00558   /* step 3. for i from n down to (t + 1) */
00559   for (i = n; i >= (t + 1); i--) {
00560     if (i > x.used) {
00561       continue;
00562     }
00563 
00564     /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
00565      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
00566     if (x.dp[i] == y.dp[t]) {
00567       q.dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
00568     } else {
00569       fp_word tmp;
00570       tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
00571       tmp |= ((fp_word) x.dp[i - 1]);
00572       tmp /= ((fp_word)y.dp[t]);
00573       q.dp[i - t - 1] = (fp_digit) (tmp);
00574     }
00575 
00576     /* while (q{i-t-1} * (yt * b + y{t-1})) >
00577              xi * b**2 + xi-1 * b + xi-2
00578 
00579        do q{i-t-1} -= 1;
00580     */
00581     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
00582     do {
00583       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
00584 
00585       /* find left hand */
00586       fp_zero (&t1);
00587       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
00588       t1.dp[1] = y.dp[t];
00589       t1.used = 2;
00590       fp_mul_d (&t1, q.dp[i - t - 1], &t1);
00591 
00592       /* find right hand */
00593       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
00594       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
00595       t2.dp[2] = x.dp[i];
00596       t2.used = 3;
00597     } while (fp_cmp_mag(&t1, &t2) == FP_GT);
00598 
00599     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
00600     fp_mul_d (&y, q.dp[i - t - 1], &t1);
00601     fp_lshd  (&t1, i - t - 1);
00602     fp_sub   (&x, &t1, &x);
00603 
00604     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
00605     if (x.sign == FP_NEG) {
00606       fp_copy (&y, &t1);
00607       fp_lshd (&t1, i - t - 1);
00608       fp_add (&x, &t1, &x);
00609       q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
00610     }
00611   }
00612 
00613   /* now q is the quotient and x is the remainder
00614    * [which we have to normalize]
00615    */
00616 
00617   /* get sign before writing to c */
00618   x.sign = x.used == 0 ? FP_ZPOS : a->sign;
00619 
00620   if (c != NULL) {
00621     fp_clamp (&q);
00622     fp_copy (&q, c);
00623     c->sign = neg;
00624   }
00625 
00626   if (d != NULL) {
00627     fp_div_2d (&x, norm, &x, NULL);
00628 
00629 /* the following is a kludge, essentially we were seeing the right remainder but
00630    with excess digits that should have been zero
00631  */
00632     for (i = b->used; i < x.used; i++) {
00633         x.dp[i] = 0;
00634     }
00635     fp_clamp(&x);
00636     fp_copy (&x, d);
00637   }
00638 
00639   return FP_OKAY;
00640 }
00641 
00642 /* b = a/2 */
00643 void fp_div_2(fp_int * a, fp_int * b)
00644 {
00645   int     x, oldused;
00646 
00647   oldused = b->used;
00648   b->used = a->used;
00649   {
00650     register fp_digit r, rr, *tmpa, *tmpb;
00651 
00652     /* source alias */
00653     tmpa = a->dp + b->used - 1;
00654 
00655     /* dest alias */
00656     tmpb = b->dp + b->used - 1;
00657 
00658     /* carry */
00659     r = 0;
00660     for (x = b->used - 1; x >= 0; x--) {
00661       /* get the carry for the next iteration */
00662       rr = *tmpa & 1;
00663 
00664       /* shift the current digit, add in carry and store */
00665       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
00666 
00667       /* forward carry to next iteration */
00668       r = rr;
00669     }
00670 
00671     /* zero excess digits */
00672     tmpb = b->dp + b->used;
00673     for (x = b->used; x < oldused; x++) {
00674       *tmpb++ = 0;
00675     }
00676   }
00677   b->sign = a->sign;
00678   fp_clamp (b);
00679 }
00680 
00681 /* c = a / 2**b */
00682 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
00683 {
00684   int      D;
00685   fp_int   t;
00686 
00687   /* if the shift count is <= 0 then we do no work */
00688   if (b <= 0) {
00689     fp_copy (a, c);
00690     if (d != NULL) {
00691       fp_zero (d);
00692     }
00693     return;
00694   }
00695 
00696   fp_init(&t);
00697 
00698   /* get the remainder */
00699   if (d != NULL) {
00700     fp_mod_2d (a, b, &t);
00701   }
00702 
00703   /* copy */
00704   fp_copy(a, c);
00705 
00706   /* shift by as many digits in the bit count */
00707   if (b >= (int)DIGIT_BIT) {
00708     fp_rshd (c, b / DIGIT_BIT);
00709   }
00710 
00711   /* shift any bit count < DIGIT_BIT */
00712   D = (b % DIGIT_BIT);
00713   if (D != 0) {
00714     fp_rshb(c, D);
00715   }
00716   fp_clamp (c);
00717   if (d != NULL) {
00718     fp_copy (&t, d);
00719   }
00720 }
00721 
00722 /* c = a mod b, 0 <= c < b  */
00723 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
00724 {
00725    fp_int t;
00726    int    err;
00727 
00728    fp_init(&t);
00729    if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
00730       return err;
00731    }
00732    if (t.sign != b->sign) {
00733       fp_add(&t, b, c);
00734    } else {
00735       fp_copy(&t, c);
00736   }
00737   return FP_OKAY;
00738 }
00739 
00740 /* c = a mod 2**d */
00741 void fp_mod_2d(fp_int *a, int b, fp_int *c)
00742 {
00743    int x;
00744 
00745    /* zero if count less than or equal to zero */
00746    if (b <= 0) {
00747       fp_zero(c);
00748       return;
00749    }
00750 
00751    /* get copy of input */
00752    fp_copy(a, c);
00753 
00754    /* if 2**d is larger than we just return */
00755    if (b >= (DIGIT_BIT * a->used)) {
00756       return;
00757    }
00758 
00759   /* zero digits above the last digit of the modulus */
00760   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00761     c->dp[x] = 0;
00762   }
00763   /* clear the digit that is not completely outside/inside the modulus */
00764   c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
00765   fp_clamp (c);
00766 }
00767 
00768 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
00769 {
00770   fp_int  x, y, u, v, A, B, C, D;
00771   int     res;
00772 
00773   /* b cannot be negative */
00774   if (b->sign == FP_NEG || fp_iszero(b) == 1) {
00775     return FP_VAL;
00776   }
00777 
00778   /* init temps */
00779   fp_init(&x);    fp_init(&y);
00780   fp_init(&u);    fp_init(&v);
00781   fp_init(&A);    fp_init(&B);
00782   fp_init(&C);    fp_init(&D);
00783 
00784   /* x = a, y = b */
00785   if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
00786       return res;
00787   }
00788   fp_copy(b, &y);
00789 
00790   /* 2. [modified] if x,y are both even then return an error! */
00791   if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
00792     return FP_VAL;
00793   }
00794 
00795   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00796   fp_copy (&x, &u);
00797   fp_copy (&y, &v);
00798   fp_set (&A, 1);
00799   fp_set (&D, 1);
00800 
00801 top:
00802   /* 4.  while u is even do */
00803   while (fp_iseven (&u) == 1) {
00804     /* 4.1 u = u/2 */
00805     fp_div_2 (&u, &u);
00806 
00807     /* 4.2 if A or B is odd then */
00808     if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
00809       /* A = (A+y)/2, B = (B-x)/2 */
00810       fp_add (&A, &y, &A);
00811       fp_sub (&B, &x, &B);
00812     }
00813     /* A = A/2, B = B/2 */
00814     fp_div_2 (&A, &A);
00815     fp_div_2 (&B, &B);
00816   }
00817 
00818   /* 5.  while v is even do */
00819   while (fp_iseven (&v) == 1) {
00820     /* 5.1 v = v/2 */
00821     fp_div_2 (&v, &v);
00822 
00823     /* 5.2 if C or D is odd then */
00824     if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
00825       /* C = (C+y)/2, D = (D-x)/2 */
00826       fp_add (&C, &y, &C);
00827       fp_sub (&D, &x, &D);
00828     }
00829     /* C = C/2, D = D/2 */
00830     fp_div_2 (&C, &C);
00831     fp_div_2 (&D, &D);
00832   }
00833 
00834   /* 6.  if u >= v then */
00835   if (fp_cmp (&u, &v) != FP_LT) {
00836     /* u = u - v, A = A - C, B = B - D */
00837     fp_sub (&u, &v, &u);
00838     fp_sub (&A, &C, &A);
00839     fp_sub (&B, &D, &B);
00840   } else {
00841     /* v - v - u, C = C - A, D = D - B */
00842     fp_sub (&v, &u, &v);
00843     fp_sub (&C, &A, &C);
00844     fp_sub (&D, &B, &D);
00845   }
00846 
00847   /* if not zero goto step 4 */
00848   if (fp_iszero (&u) == 0)
00849     goto top;
00850 
00851   /* now a = C, b = D, gcd == g*v */
00852 
00853   /* if v != 1 then there is no inverse */
00854   if (fp_cmp_d (&v, 1) != FP_EQ) {
00855     return FP_VAL;
00856   }
00857 
00858   /* if its too low */
00859   while (fp_cmp_d(&C, 0) == FP_LT) {
00860       fp_add(&C, b, &C);
00861   }
00862 
00863   /* too big */
00864   while (fp_cmp_mag(&C, b) != FP_LT) {
00865       fp_sub(&C, b, &C);
00866   }
00867 
00868   /* C is now the inverse */
00869   fp_copy(&C, c);
00870   return FP_OKAY;
00871 }
00872 
00873 
00874 /* c = 1/a (mod b) for odd b only */
00875 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
00876 {
00877   fp_int  x, y, u, v, B, D;
00878   int     neg, loop_check = 0;
00879 
00880   /* 2. [modified] b must be odd   */
00881   if (fp_iseven (b) == FP_YES) {
00882     return fp_invmod_slow(a,b,c);
00883   }
00884 
00885   /* init all our temps */
00886   fp_init(&x);  fp_init(&y);
00887   fp_init(&u);  fp_init(&v);
00888   fp_init(&B);  fp_init(&D);
00889 
00890   /* x == modulus, y == value to invert */
00891   fp_copy(b, &x);
00892 
00893   /* we need y = |a| */
00894   fp_abs(a, &y);
00895 
00896   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00897   fp_copy(&x, &u);
00898   fp_copy(&y, &v);
00899   fp_set (&D, 1);
00900 
00901 top:
00902   /* 4.  while u is even do */
00903   while (fp_iseven (&u) == FP_YES) {
00904     /* 4.1 u = u/2 */
00905     fp_div_2 (&u, &u);
00906 
00907     /* 4.2 if B is odd then */
00908     if (fp_isodd (&B) == FP_YES) {
00909       fp_sub (&B, &x, &B);
00910     }
00911     /* B = B/2 */
00912     fp_div_2 (&B, &B);
00913   }
00914 
00915   /* 5.  while v is even do */
00916   while (fp_iseven (&v) == FP_YES) {
00917     /* 5.1 v = v/2 */
00918     fp_div_2 (&v, &v);
00919 
00920     /* 5.2 if D is odd then */
00921     if (fp_isodd (&D) == FP_YES) {
00922       /* D = (D-x)/2 */
00923       fp_sub (&D, &x, &D);
00924     }
00925     /* D = D/2 */
00926     fp_div_2 (&D, &D);
00927   }
00928 
00929   /* 6.  if u >= v then */
00930   if (fp_cmp (&u, &v) != FP_LT) {
00931     /* u = u - v, B = B - D */
00932     fp_sub (&u, &v, &u);
00933     fp_sub (&B, &D, &B);
00934   } else {
00935     /* v - v - u, D = D - B */
00936     fp_sub (&v, &u, &v);
00937     fp_sub (&D, &B, &D);
00938   }
00939 
00940   /* if not zero goto step 4 */
00941   if (fp_iszero (&u) == FP_NO) {
00942     if (++loop_check > 4096) /* bad input */
00943       return FP_VAL;
00944     goto top;
00945   }
00946 
00947   /* now a = C, b = D, gcd == g*v */
00948 
00949   /* if v != 1 then there is no inverse */
00950   if (fp_cmp_d (&v, 1) != FP_EQ) {
00951     return FP_VAL;
00952   }
00953 
00954   /* b is now the inverse */
00955   neg = a->sign;
00956   while (D.sign == FP_NEG) {
00957     fp_add (&D, b, &D);
00958   }
00959   /* too big */
00960   while (fp_cmp_mag(&D, b) != FP_LT) {
00961     fp_sub(&D, b, &D);
00962   }
00963   fp_copy (&D, c);
00964   c->sign = neg;
00965   return FP_OKAY;
00966 }
00967 
00968 /* d = a * b (mod c) */
00969 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00970 {
00971   fp_int tmp;
00972   fp_init(&tmp);
00973   fp_mul(a, b, &tmp);
00974   return fp_mod(&tmp, c, d);
00975 }
00976 
00977 #ifdef TFM_TIMING_RESISTANT
00978 
00979 /* timing resistant montgomery ladder based exptmod
00980    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
00981    Cryptographic Hardware and Embedded Systems, CHES 2002
00982 */
00983 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
00984 {
00985   fp_int   R[2];
00986   fp_digit buf, mp;
00987   int      err, bitcnt, digidx, y;
00988 
00989   /* now setup montgomery  */
00990   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
00991      return err;
00992   }
00993 
00994   fp_init(&R[0]);
00995   fp_init(&R[1]);
00996 
00997   /* now we need R mod m */
00998   fp_montgomery_calc_normalization (&R[0], P);
00999 
01000   /* now set R[0][1] to G * R mod m */
01001   if (fp_cmp_mag(P, G) != FP_GT) {
01002      /* G > P so we reduce it first */
01003      fp_mod(G, P, &R[1]);
01004   } else {
01005      fp_copy(G, &R[1]);
01006   }
01007   fp_mulmod (&R[1], &R[0], P, &R[1]);
01008 
01009   /* for j = t-1 downto 0 do
01010         r_!k = R0*R1; r_k = r_k^2
01011   */
01012 
01013   /* set initial mode and bit cnt */
01014   bitcnt = 1;
01015   buf    = 0;
01016   digidx = X->used - 1;
01017 
01018   for (;;) {
01019     /* grab next digit as required */
01020     if (--bitcnt == 0) {
01021       /* if digidx == -1 we are out of digits so break */
01022       if (digidx == -1) {
01023         break;
01024       }
01025       /* read next digit and reset bitcnt */
01026       buf    = X->dp[digidx--];
01027       bitcnt = (int)DIGIT_BIT;
01028     }
01029 
01030     /* grab the next msb from the exponent */
01031     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01032     buf <<= (fp_digit)1;
01033 
01034     /* do ops */
01035     fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
01036     fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
01037   }
01038 
01039    fp_montgomery_reduce(&R[0], P, mp);
01040    fp_copy(&R[0], Y);
01041    return FP_OKAY;
01042 }
01043 
01044 #else
01045 
01046 /* y = g**x (mod b)
01047  * Some restrictions... x must be positive and < b
01048  */
01049 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01050 {
01051   fp_int   M[64], res;
01052   fp_digit buf, mp;
01053   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01054 
01055   /* find window size */
01056   x = fp_count_bits (X);
01057   if (x <= 21) {
01058     winsize = 1;
01059   } else if (x <= 36) {
01060     winsize = 3;
01061   } else if (x <= 140) {
01062     winsize = 4;
01063   } else if (x <= 450) {
01064     winsize = 5;
01065   } else {
01066     winsize = 6;
01067   }
01068 
01069   /* init M array */
01070   for(x = 0; x < (int)(sizeof(M)/sizeof(fp_int)); x++)
01071     fp_init(&M[x]);
01072 
01073   /* now setup montgomery  */
01074   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01075      return err;
01076   }
01077 
01078   /* setup result */
01079   fp_init(&res);
01080 
01081   /* create M table
01082    *
01083    * The M table contains powers of the input base, e.g. M[x] = G^x mod P
01084    *
01085    * The first half of the table is not computed though accept for M[0] and M[1]
01086    */
01087 
01088    /* now we need R mod m */
01089    fp_montgomery_calc_normalization (&res, P);
01090 
01091    /* now set M[1] to G * R mod m */
01092    if (fp_cmp_mag(P, G) != FP_GT) {
01093       /* G > P so we reduce it first */
01094       fp_mod(G, P, &M[1]);
01095    } else {
01096       fp_copy(G, &M[1]);
01097    }
01098    fp_mulmod (&M[1], &res, P, &M[1]);
01099 
01100   /* compute the value at M[1<<(winsize-1)] by
01101    * squaring M[1] (winsize-1) times */
01102   fp_copy (&M[1], &M[1 << (winsize - 1)]);
01103   for (x = 0; x < (winsize - 1); x++) {
01104     fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
01105     fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
01106   }
01107 
01108   /* create upper table */
01109   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01110     fp_mul(&M[x - 1], &M[1], &M[x]);
01111     fp_montgomery_reduce(&M[x], P, mp);
01112   }
01113 
01114   /* set initial mode and bit cnt */
01115   mode   = 0;
01116   bitcnt = 1;
01117   buf    = 0;
01118   digidx = X->used - 1;
01119   bitcpy = 0;
01120   bitbuf = 0;
01121 
01122   for (;;) {
01123     /* grab next digit as required */
01124     if (--bitcnt == 0) {
01125       /* if digidx == -1 we are out of digits so break */
01126       if (digidx == -1) {
01127         break;
01128       }
01129       /* read next digit and reset bitcnt */
01130       buf    = X->dp[digidx--];
01131       bitcnt = (int)DIGIT_BIT;
01132     }
01133 
01134     /* grab the next msb from the exponent */
01135     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01136     buf <<= (fp_digit)1;
01137 
01138     /* if the bit is zero and mode == 0 then we ignore it
01139      * These represent the leading zero bits before the first 1 bit
01140      * in the exponent.  Technically this opt is not required but it
01141      * does lower the # of trivial squaring/reductions used
01142      */
01143     if (mode == 0 && y == 0) {
01144       continue;
01145     }
01146 
01147     /* if the bit is zero and mode == 1 then we square */
01148     if (mode == 1 && y == 0) {
01149       fp_sqr(&res, &res);
01150       fp_montgomery_reduce(&res, P, mp);
01151       continue;
01152     }
01153 
01154     /* else we add it to the window */
01155     bitbuf |= (y << (winsize - ++bitcpy));
01156     mode    = 2;
01157 
01158     if (bitcpy == winsize) {
01159       /* ok window is filled so square as required and multiply  */
01160       /* square first */
01161       for (x = 0; x < winsize; x++) {
01162         fp_sqr(&res, &res);
01163         fp_montgomery_reduce(&res, P, mp);
01164       }
01165 
01166       /* then multiply */
01167       fp_mul(&res, &M[bitbuf], &res);
01168       fp_montgomery_reduce(&res, P, mp);
01169 
01170       /* empty window and reset */
01171       bitcpy = 0;
01172       bitbuf = 0;
01173       mode   = 1;
01174     }
01175   }
01176 
01177   /* if bits remain then square/multiply */
01178   if (mode == 2 && bitcpy > 0) {
01179     /* square then multiply if the bit is set */
01180     for (x = 0; x < bitcpy; x++) {
01181       fp_sqr(&res, &res);
01182       fp_montgomery_reduce(&res, P, mp);
01183 
01184       /* get next bit of the window */
01185       bitbuf <<= 1;
01186       if ((bitbuf & (1 << winsize)) != 0) {
01187         /* then multiply */
01188         fp_mul(&res, &M[1], &res);
01189         fp_montgomery_reduce(&res, P, mp);
01190       }
01191     }
01192   }
01193 
01194   /* fixup result if Montgomery reduction is used
01195    * recall that any value in a Montgomery system is
01196    * actually multiplied by R mod n.  So we have
01197    * to reduce one more time to cancel out the factor
01198    * of R.
01199    */
01200   fp_montgomery_reduce(&res, P, mp);
01201 
01202   /* swap res with Y */
01203   fp_copy (&res, Y);
01204   return FP_OKAY;
01205 }
01206 
01207 #endif
01208 
01209 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01210 {
01211    /* prevent overflows */
01212    if (P->used > (FP_SIZE/2)) {
01213       return FP_VAL;
01214    }
01215 
01216    if (X->sign == FP_NEG) {
01217 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
01218       int    err;
01219       fp_int tmp;
01220 
01221       /* yes, copy G and invmod it */
01222       fp_init_copy(&tmp, G);
01223       if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
01224          return err;
01225       }
01226       X->sign = FP_ZPOS;
01227       err =  _fp_exptmod(&tmp, X, P, Y);
01228       if (X != Y) {
01229          X->sign = FP_NEG;
01230       }
01231       return err;
01232 #else
01233       return FP_VAL;
01234 #endif
01235    }
01236    else {
01237       /* Positive exponent so just exptmod */
01238       return _fp_exptmod(G, X, P, Y);
01239    }
01240 }
01241 
01242 /* computes a = 2**b */
01243 void fp_2expt(fp_int *a, int b)
01244 {
01245    int     z;
01246 
01247    /* zero a as per default */
01248    fp_zero (a);
01249 
01250    if (b < 0) {
01251       return;
01252    }
01253 
01254    z = b / DIGIT_BIT;
01255    if (z >= FP_SIZE) {
01256       return;
01257    }
01258 
01259   /* set the used count of where the bit will go */
01260   a->used = z + 1;
01261 
01262   /* put the single bit in its place */
01263   a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
01264 }
01265 
01266 /* b = a*a  */
01267 void fp_sqr(fp_int *A, fp_int *B)
01268 {
01269     int y = A->used;
01270 
01271     /* call generic if we're out of range */
01272     if (y + y > FP_SIZE) {
01273        fp_sqr_comba(A, B);
01274        return ;
01275     }
01276 
01277 #if defined(TFM_SQR3)
01278         if (y <= 3) {
01279            fp_sqr_comba3(A,B);
01280            return;
01281         }
01282 #endif
01283 #if defined(TFM_SQR4)
01284         if (y == 4) {
01285            fp_sqr_comba4(A,B);
01286            return;
01287         }
01288 #endif
01289 #if defined(TFM_SQR6)
01290         if (y <= 6) {
01291            fp_sqr_comba6(A,B);
01292            return;
01293         }
01294 #endif
01295 #if defined(TFM_SQR7)
01296         if (y == 7) {
01297            fp_sqr_comba7(A,B);
01298            return;
01299         }
01300 #endif
01301 #if defined(TFM_SQR8)
01302         if (y == 8) {
01303            fp_sqr_comba8(A,B);
01304            return;
01305         }
01306 #endif
01307 #if defined(TFM_SQR9)
01308         if (y == 9) {
01309            fp_sqr_comba9(A,B);
01310            return;
01311         }
01312 #endif
01313 #if defined(TFM_SQR12)
01314         if (y <= 12) {
01315            fp_sqr_comba12(A,B);
01316            return;
01317         }
01318 #endif
01319 #if defined(TFM_SQR17)
01320         if (y <= 17) {
01321            fp_sqr_comba17(A,B);
01322            return;
01323         }
01324 #endif
01325 #if defined(TFM_SMALL_SET)
01326         if (y <= 16) {
01327            fp_sqr_comba_small(A,B);
01328            return;
01329         }
01330 #endif
01331 #if defined(TFM_SQR20)
01332         if (y <= 20) {
01333            fp_sqr_comba20(A,B);
01334            return;
01335         }
01336 #endif
01337 #if defined(TFM_SQR24)
01338         if (y <= 24) {
01339            fp_sqr_comba24(A,B);
01340            return;
01341         }
01342 #endif
01343 #if defined(TFM_SQR28)
01344         if (y <= 28) {
01345            fp_sqr_comba28(A,B);
01346            return;
01347         }
01348 #endif
01349 #if defined(TFM_SQR32)
01350         if (y <= 32) {
01351            fp_sqr_comba32(A,B);
01352            return;
01353         }
01354 #endif
01355 #if defined(TFM_SQR48)
01356         if (y <= 48) {
01357            fp_sqr_comba48(A,B);
01358            return;
01359         }
01360 #endif
01361 #if defined(TFM_SQR64)
01362         if (y <= 64) {
01363            fp_sqr_comba64(A,B);
01364            return;
01365         }
01366 #endif
01367        fp_sqr_comba(A, B);
01368 }
01369 
01370 /* generic comba squarer */
01371 void fp_sqr_comba(fp_int *A, fp_int *B)
01372 {
01373   int       pa, ix, iz;
01374   fp_digit  c0, c1, c2;
01375   fp_int    tmp, *dst;
01376 #ifdef TFM_ISO
01377   fp_word   tt;
01378 #endif
01379 
01380   /* get size of output and trim */
01381   pa = A->used + A->used;
01382   if (pa >= FP_SIZE) {
01383      pa = FP_SIZE-1;
01384   }
01385 
01386   /* number of output digits to produce */
01387   COMBA_START;
01388   COMBA_CLEAR;
01389 
01390   if (A == B) {
01391      fp_init(&tmp);
01392      dst = &tmp;
01393   } else {
01394      fp_zero(B);
01395      dst = B;
01396   }
01397 
01398   for (ix = 0; ix < pa; ix++) {
01399       int      tx, ty, iy;
01400       fp_digit *tmpy, *tmpx;
01401 
01402       /* get offsets into the two bignums */
01403       ty = MIN(A->used-1, ix);
01404       tx = ix - ty;
01405 
01406       /* setup temp aliases */
01407       tmpx = A->dp + tx;
01408       tmpy = A->dp + ty;
01409 
01410       /* this is the number of times the loop will iterate,
01411          while (tx++ < a->used && ty-- >= 0) { ... }
01412        */
01413       iy = MIN(A->used-tx, ty+1);
01414 
01415       /* now for squaring tx can never equal ty
01416        * we halve the distance since they approach
01417        * at a rate of 2x and we have to round because
01418        * odd cases need to be executed
01419        */
01420       iy = MIN(iy, (ty-tx+1)>>1);
01421 
01422       /* forward carries */
01423       COMBA_FORWARD;
01424 
01425       /* execute loop */
01426       for (iz = 0; iz < iy; iz++) {
01427           SQRADD2(*tmpx++, *tmpy--);
01428       }
01429 
01430       /* even columns have the square term in them */
01431       if ((ix&1) == 0) {
01432           /* TAO change COMBA_ADD back to SQRADD */
01433           SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
01434       }
01435 
01436       /* store it */
01437       COMBA_STORE(dst->dp[ix]);
01438   }
01439 
01440   COMBA_FINI;
01441 
01442   /* setup dest */
01443   dst->used = pa;
01444   fp_clamp (dst);
01445   if (dst != B) {
01446      fp_copy(dst, B);
01447   }
01448 }
01449 
01450 int fp_cmp(fp_int *a, fp_int *b)
01451 {
01452    if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
01453       return FP_LT;
01454    } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
01455       return FP_GT;
01456    } else {
01457       /* compare digits */
01458       if (a->sign == FP_NEG) {
01459          /* if negative compare opposite direction */
01460          return fp_cmp_mag(b, a);
01461       } else {
01462          return fp_cmp_mag(a, b);
01463       }
01464    }
01465 }
01466 
01467 /* compare against a single digit */
01468 int fp_cmp_d(fp_int *a, fp_digit b)
01469 {
01470   /* special case for zero*/
01471   if (a->used == 0 && b == 0)
01472     return FP_EQ;
01473 
01474   /* compare based on sign */
01475   if ((b && a->used == 0) || a->sign == FP_NEG) {
01476     return FP_LT;
01477   }
01478 
01479   /* compare based on magnitude */
01480   if (a->used > 1) {
01481     return FP_GT;
01482   }
01483 
01484   /* compare the only digit of a to b */
01485   if (a->dp[0] > b) {
01486     return FP_GT;
01487   } else if (a->dp[0] < b) {
01488     return FP_LT;
01489   } else {
01490     return FP_EQ;
01491   }
01492 
01493 }
01494 
01495 int fp_cmp_mag(fp_int *a, fp_int *b)
01496 {
01497    int x;
01498 
01499    if (a->used > b->used) {
01500       return FP_GT;
01501    } else if (a->used < b->used) {
01502       return FP_LT;
01503    } else {
01504       for (x = a->used - 1; x >= 0; x--) {
01505           if (a->dp[x] > b->dp[x]) {
01506              return FP_GT;
01507           } else if (a->dp[x] < b->dp[x]) {
01508              return FP_LT;
01509           }
01510       }
01511    }
01512    return FP_EQ;
01513 }
01514 
01515 /* setups the montgomery reduction */
01516 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
01517 {
01518   fp_digit x, b;
01519 
01520 /* fast inversion mod 2**k
01521  *
01522  * Based on the fact that
01523  *
01524  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01525  *                    =>  2*X*A - X*X*A*A = 1
01526  *                    =>  2*(1) - (1)     = 1
01527  */
01528   b = a->dp[0];
01529 
01530   if ((b & 1) == 0) {
01531     return FP_VAL;
01532   }
01533 
01534   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01535   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01536   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01537   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01538 #ifdef FP_64BIT
01539   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
01540 #endif
01541 
01542   /* rho = -1/m mod b */
01543   *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
01544 
01545   return FP_OKAY;
01546 }
01547 
01548 /* computes a = B**n mod b without division or multiplication useful for
01549  * normalizing numbers in a Montgomery system.
01550  */
01551 void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
01552 {
01553   int     x, bits;
01554 
01555   /* how many bits of last digit does b use */
01556   bits = fp_count_bits (b) % DIGIT_BIT;
01557   if (!bits) bits = DIGIT_BIT;
01558 
01559   /* compute A = B^(n-1) * 2^(bits-1) */
01560   if (b->used > 1) {
01561      fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
01562   } else {
01563      fp_set(a, 1);
01564      bits = 1;
01565   }
01566 
01567   /* now compute C = A * B mod b */
01568   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
01569     fp_mul_2 (a, a);
01570     if (fp_cmp_mag (a, b) != FP_LT) {
01571       s_fp_sub (a, b, a);
01572     }
01573   }
01574 }
01575 
01576 
01577 #ifdef TFM_SMALL_MONT_SET
01578     #include "fp_mont_small.i"
01579 #endif
01580 
01581 #ifdef HAVE_INTEL_MULX
01582 static INLINE void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
01583 {
01584     fp_digit _c0, _c1, _c2, _c3, _c4, _c5, _c6, _c7, cy ;
01585 
01586     cy = *cy_mulx ;
01587     _c0=c_mulx[0]; _c1=c_mulx[1]; _c2=c_mulx[2]; _c3=c_mulx[3]; _c4=c_mulx[4]; _c5=c_mulx[5]; _c6=c_mulx[6]; _c7=c_mulx[7];
01588     INNERMUL8_MULX ;
01589     c_mulx[0]=_c0; c_mulx[1]=_c1; c_mulx[2]=_c2; c_mulx[3]=_c3; c_mulx[4]=_c4; c_mulx[5]=_c5; c_mulx[6]=_c6; c_mulx[7]=_c7;
01590     *cy_mulx = cy ;
01591 }
01592 
01593 /* computes x/R == x (mod N) via Montgomery Reduction */
01594 static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
01595 {
01596    fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
01597    int      oldused, x, y, pa;
01598 
01599    /* bail if too large */
01600    if (m->used > (FP_SIZE/2)) {
01601       (void)mu;                     /* shut up compiler */
01602       return;
01603    }
01604 
01605 #ifdef TFM_SMALL_MONT_SET
01606    if (m->used <= 16) {
01607       fp_montgomery_reduce_small(a, m, mp);
01608       return;
01609    }
01610 #endif
01611 
01612 
01613    /* now zero the buff */
01614    XMEMSET(c, 0, sizeof c);
01615    pa = m->used;
01616 
01617    /* copy the input */
01618    oldused = a->used;
01619    for (x = 0; x < oldused; x++) {
01620        c[x] = a->dp[x];
01621    }
01622    MONT_START;
01623 
01624    for (x = 0; x < pa; x++) {
01625        fp_digit cy = 0;
01626        /* get Mu for this round */
01627        LOOP_START;
01628        _c   = c + x;
01629        tmpm = m->dp;
01630        y = 0;
01631         for (; y < (pa & ~7); y += 8) {
01632               innermul8_mulx(_c, &cy, tmpm, mu) ;
01633               _c   += 8;
01634               tmpm += 8;
01635            }
01636        for (; y < pa; y++) {
01637           INNERMUL;
01638           ++_c;
01639        }
01640        LOOP_END;
01641        while (cy) {
01642            PROPCARRY;
01643            ++_c;
01644        }
01645   }
01646 
01647   /* now copy out */
01648   _c   = c + pa;
01649   tmpm = a->dp;
01650   for (x = 0; x < pa+1; x++) {
01651      *tmpm++ = *_c++;
01652   }
01653 
01654   for (; x < oldused; x++)   {
01655      *tmpm++ = 0;
01656   }
01657 
01658   MONT_FINI;
01659 
01660   a->used = pa+1;
01661   fp_clamp(a);
01662 
01663   /* if A >= m then A = A - m */
01664   if (fp_cmp_mag (a, m) != FP_LT) {
01665     s_fp_sub (a, m, a);
01666   }
01667 }
01668 #endif
01669 
01670 /* computes x/R == x (mod N) via Montgomery Reduction */
01671 void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
01672 {
01673    fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
01674    int      oldused, x, y, pa;
01675 
01676    IF_HAVE_INTEL_MULX(fp_montgomery_reduce_mulx(a, m, mp), return) ;
01677 
01678    /* bail if too large */
01679    if (m->used > (FP_SIZE/2)) {
01680       (void)mu;                     /* shut up compiler */
01681       return;
01682    }
01683 
01684 #ifdef TFM_SMALL_MONT_SET
01685    if (m->used <= 16) {
01686       fp_montgomery_reduce_small(a, m, mp);
01687       return;
01688    }
01689 #endif
01690 
01691 
01692    /* now zero the buff */
01693    XMEMSET(c, 0, sizeof c);
01694    pa = m->used;
01695 
01696    /* copy the input */
01697    oldused = a->used;
01698    for (x = 0; x < oldused; x++) {
01699        c[x] = a->dp[x];
01700    }
01701    MONT_START;
01702 
01703    for (x = 0; x < pa; x++) {
01704        fp_digit cy = 0;
01705        /* get Mu for this round */
01706        LOOP_START;
01707        _c   = c + x;
01708        tmpm = m->dp;
01709        y = 0;
01710        #if (defined(TFM_SSE2) || defined(TFM_X86_64))
01711         for (; y < (pa & ~7); y += 8) {
01712               INNERMUL8 ;
01713               _c   += 8;
01714               tmpm += 8;
01715            }
01716        #endif
01717        for (; y < pa; y++) {
01718           INNERMUL;
01719           ++_c;
01720        }
01721        LOOP_END;
01722        while (cy) {
01723            PROPCARRY;
01724            ++_c;
01725        }
01726   }
01727 
01728   /* now copy out */
01729   _c   = c + pa;
01730   tmpm = a->dp;
01731   for (x = 0; x < pa+1; x++) {
01732      *tmpm++ = *_c++;
01733   }
01734 
01735   for (; x < oldused; x++)   {
01736      *tmpm++ = 0;
01737   }
01738 
01739   MONT_FINI;
01740 
01741   a->used = pa+1;
01742   fp_clamp(a);
01743 
01744   /* if A >= m then A = A - m */
01745   if (fp_cmp_mag (a, m) != FP_LT) {
01746     s_fp_sub (a, m, a);
01747   }
01748 }
01749 
01750 void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
01751 {
01752   /* zero the int */
01753   fp_zero (a);
01754 
01755   /* If we know the endianness of this architecture, and we're using
01756      32-bit fp_digits, we can optimize this */
01757 #if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && \
01758     defined(FP_32BIT)
01759   /* But not for both simultaneously */
01760 #if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
01761 #error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
01762 #endif
01763   {
01764      unsigned char *pd = (unsigned char *)a->dp;
01765 
01766      if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
01767         int excess = c - (FP_SIZE * sizeof(fp_digit));
01768         c -= excess;
01769         b += excess;
01770      }
01771      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
01772      /* read the bytes in */
01773 #ifdef BIG_ENDIAN_ORDER
01774      {
01775        /* Use Duff's device to unroll the loop. */
01776        int idx = (c - 1) & ~3;
01777        switch (c % 4) {
01778        case 0:    do { pd[idx+0] = *b++;
01779        case 3:         pd[idx+1] = *b++;
01780        case 2:         pd[idx+2] = *b++;
01781        case 1:         pd[idx+3] = *b++;
01782                      idx -= 4;
01783                  } while ((c -= 4) > 0);
01784        }
01785      }
01786 #else
01787      for (c -= 1; c >= 0; c -= 1) {
01788        pd[c] = *b++;
01789      }
01790 #endif
01791   }
01792 #else
01793   /* read the bytes in */
01794   for (; c > 0; c--) {
01795      fp_mul_2d (a, 8, a);
01796      a->dp[0] |= *b++;
01797      a->used += 1;
01798   }
01799 #endif
01800   fp_clamp (a);
01801 }
01802 
01803 void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
01804 {
01805   int     x;
01806   fp_int  t;
01807 
01808   fp_init_copy(&t, a);
01809 
01810   x = 0;
01811   while (fp_iszero (&t) == FP_NO) {
01812       b[x++] = (unsigned char) (t.dp[0] & 255);
01813       fp_div_2d (&t, 8, &t, NULL);
01814   }
01815   fp_reverse (b, x);
01816 }
01817 
01818 int fp_unsigned_bin_size(fp_int *a)
01819 {
01820   int     size = fp_count_bits (a);
01821   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
01822 }
01823 
01824 void fp_set(fp_int *a, fp_digit b)
01825 {
01826    fp_zero(a);
01827    a->dp[0] = b;
01828    a->used  = a->dp[0] ? 1 : 0;
01829 }
01830 
01831 /* chek if a bit is set */
01832 int fp_is_bit_set (fp_int *a, fp_digit b)
01833 {
01834     fp_digit i;
01835 
01836     if (b > FP_MAX_BITS)
01837         return 0;
01838     else
01839         i = b/DIGIT_BIT;
01840 
01841     if ((fp_digit)a->used < i)
01842         return 0;
01843 
01844     return (int)((a->dp[i] >> b%DIGIT_BIT) & (fp_digit)1);
01845 }
01846 
01847 /* set the b bit of a */
01848 int fp_set_bit (fp_int * a, fp_digit b)
01849 {
01850     fp_digit i;
01851 
01852     if (b > FP_MAX_BITS)
01853         return 0;
01854     else
01855         i = b/DIGIT_BIT;
01856 
01857     /* set the used count of where the bit will go if required */
01858     if (a->used < (int)(i+1))
01859         a->used = (int)(i+1);
01860 
01861     /* put the single bit in its place */
01862     a->dp[i] |= ((fp_digit)1) << (b % DIGIT_BIT);
01863 
01864     return MP_OKAY;
01865 }
01866 
01867 int fp_count_bits (fp_int * a)
01868 {
01869   int     r;
01870   fp_digit q;
01871 
01872   /* shortcut */
01873   if (a->used == 0) {
01874     return 0;
01875   }
01876 
01877   /* get number of digits and add that */
01878   r = (a->used - 1) * DIGIT_BIT;
01879 
01880   /* take the last digit and count the bits in it */
01881   q = a->dp[a->used - 1];
01882   while (q > ((fp_digit) 0)) {
01883     ++r;
01884     q >>= ((fp_digit) 1);
01885   }
01886 
01887   return r;
01888 }
01889 
01890 int fp_leading_bit(fp_int *a)
01891 {
01892     int bit = 0;
01893 
01894     if (a->used != 0) {
01895         fp_digit q = a->dp[a->used - 1];
01896         int qSz = sizeof(fp_digit);
01897 
01898         while (qSz > 0) {
01899             if ((unsigned char)q != 0)
01900                 bit = (q & 0x80) != 0;
01901             q >>= 8;
01902             qSz--;
01903         }
01904     }
01905 
01906     return bit;
01907 }
01908 
01909 void fp_lshd(fp_int *a, int x)
01910 {
01911    int y;
01912 
01913    /* move up and truncate as required */
01914    y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
01915 
01916    /* store new size */
01917    a->used = y + 1;
01918 
01919    /* move digits */
01920    for (; y >= x; y--) {
01921        a->dp[y] = a->dp[y-x];
01922    }
01923 
01924    /* zero lower digits */
01925    for (; y >= 0; y--) {
01926        a->dp[y] = 0;
01927    }
01928 
01929    /* clamp digits */
01930    fp_clamp(a);
01931 }
01932 
01933 
01934 /* right shift by bit count */
01935 void fp_rshb(fp_int *c, int x)
01936 {
01937     register fp_digit *tmpc, mask, shift;
01938     fp_digit r, rr;
01939     fp_digit D = x;
01940 
01941     /* mask */
01942     mask = (((fp_digit)1) << D) - 1;
01943 
01944     /* shift for lsb */
01945     shift = DIGIT_BIT - D;
01946 
01947     /* alias */
01948     tmpc = c->dp + (c->used - 1);
01949 
01950     /* carry */
01951     r = 0;
01952     for (x = c->used - 1; x >= 0; x--) {
01953       /* get the lower  bits of this word in a temp */
01954       rr = *tmpc & mask;
01955 
01956       /* shift the current word and mix in the carry bits from previous word */
01957       *tmpc = (*tmpc >> D) | (r << shift);
01958       --tmpc;
01959 
01960       /* set the carry to the carry bits of the current word found above */
01961       r = rr;
01962     }
01963 }
01964 
01965 
01966 void fp_rshd(fp_int *a, int x)
01967 {
01968   int y;
01969 
01970   /* too many digits just zero and return */
01971   if (x >= a->used) {
01972      fp_zero(a);
01973      return;
01974   }
01975 
01976    /* shift */
01977    for (y = 0; y < a->used - x; y++) {
01978       a->dp[y] = a->dp[y+x];
01979    }
01980 
01981    /* zero rest */
01982    for (; y < a->used; y++) {
01983       a->dp[y] = 0;
01984    }
01985 
01986    /* decrement count */
01987    a->used -= x;
01988    fp_clamp(a);
01989 }
01990 
01991 /* reverse an array, used for radix code */
01992 void fp_reverse (unsigned char *s, int len)
01993 {
01994   int     ix, iy;
01995   unsigned char t;
01996 
01997   ix = 0;
01998   iy = len - 1;
01999   while (ix < iy) {
02000     t     = s[ix];
02001     s[ix] = s[iy];
02002     s[iy] = t;
02003     ++ix;
02004     --iy;
02005   }
02006 }
02007 
02008 
02009 /* c = a - b */
02010 void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
02011 {
02012    fp_int tmp;
02013    fp_init(&tmp);
02014    fp_set(&tmp, b);
02015    fp_sub(a, &tmp, c);
02016 }
02017 
02018 
02019 /* wolfSSL callers from normal lib */
02020 
02021 /* init a new mp_int */
02022 int mp_init (mp_int * a)
02023 {
02024   if (a)
02025     fp_init(a);
02026   return MP_OKAY;
02027 }
02028 
02029 #ifdef ALT_ECC_SIZE
02030 void fp_init(fp_int *a)
02031 {
02032     a->size = FP_SIZE;
02033     fp_zero(a);
02034 }
02035 
02036 void fp_zero(fp_int *a)
02037 {
02038     a->used = 0;
02039     a->sign = FP_ZPOS;
02040     XMEMSET(a->dp, 0, a->size * sizeof(fp_digit));
02041 }
02042 
02043 void fp_clear(fp_int *a)
02044 {
02045     a->used = 0;
02046     a->sign = FP_ZPOS;
02047     ForceZero(a->dp, a->size * sizeof(fp_digit));
02048 }
02049 #endif
02050 
02051 
02052 /* clear one (frees)  */
02053 void mp_clear (mp_int * a)
02054 {
02055     fp_clear(a);
02056 }
02057 
02058 /* handle up to 6 inits */
02059 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d,
02060                   mp_int* e, mp_int* f)
02061 {
02062     if (a)
02063         fp_init(a);
02064     if (b)
02065         fp_init(b);
02066     if (c)
02067         fp_init(c);
02068     if (d)
02069         fp_init(d);
02070     if (e)
02071         fp_init(e);
02072     if (f)
02073         fp_init(f);
02074 
02075     return MP_OKAY;
02076 }
02077 
02078 /* high level addition (handles signs) */
02079 int mp_add (mp_int * a, mp_int * b, mp_int * c)
02080 {
02081   fp_add(a, b, c);
02082   return MP_OKAY;
02083 }
02084 
02085 /* high level subtraction (handles signs) */
02086 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
02087 {
02088   fp_sub(a, b, c);
02089   return MP_OKAY;
02090 }
02091 
02092 /* high level multiplication (handles sign) */
02093 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02094 {
02095   fp_mul(a, b, c);
02096   return MP_OKAY;
02097 }
02098 
02099 /* d = a * b (mod c) */
02100 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02101 {
02102   return fp_mulmod(a, b, c, d);
02103 }
02104 
02105 /* c = a mod b, 0 <= c < b */
02106 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
02107 {
02108   return fp_mod (a, b, c);
02109 }
02110 
02111 /* hac 14.61, pp608 */
02112 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
02113 {
02114   return fp_invmod(a, b, c);
02115 }
02116 
02117 /* this is a shell function that calls either the normal or Montgomery
02118  * exptmod functions.  Originally the call to the montgomery code was
02119  * embedded in the normal function but that wasted alot of stack space
02120  * for nothing (since 99% of the time the Montgomery code would be called)
02121  */
02122 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
02123 {
02124   return fp_exptmod(G, X, P, Y);
02125 }
02126 
02127 /* compare two ints (signed)*/
02128 int mp_cmp (mp_int * a, mp_int * b)
02129 {
02130   return fp_cmp(a, b);
02131 }
02132 
02133 /* compare a digit */
02134 int mp_cmp_d(mp_int * a, mp_digit b)
02135 {
02136   return fp_cmp_d(a, b);
02137 }
02138 
02139 /* get the size for an unsigned equivalent */
02140 int mp_unsigned_bin_size (mp_int * a)
02141 {
02142   return fp_unsigned_bin_size(a);
02143 }
02144 
02145 /* store in unsigned [big endian] format */
02146 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
02147 {
02148   fp_to_unsigned_bin(a,b);
02149   return MP_OKAY;
02150 }
02151 
02152 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
02153 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
02154 {
02155   fp_read_unsigned_bin(a, (unsigned char *)b, c);
02156   return MP_OKAY;
02157 }
02158 
02159 
02160 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
02161 {
02162     fp_sub_d(a, b, c);
02163     return MP_OKAY;
02164 }
02165 
02166 int mp_mul_2d(fp_int *a, int b, fp_int *c)
02167 {
02168     fp_mul_2d(a, b, c);
02169     return MP_OKAY;
02170 }
02171 
02172 int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d)
02173 {
02174     fp_div_2d(a, b, c, d);
02175     return MP_OKAY;
02176 }
02177 
02178 #ifdef ALT_ECC_SIZE
02179 void fp_copy(fp_int *a, fp_int* b)
02180 {
02181     if (a != b && b->size >= a->used) {
02182         b->used = a->used;
02183         b->sign = a->sign;
02184 
02185         XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit));
02186     }
02187 }
02188 
02189 void fp_init_copy(fp_int *a, fp_int* b)
02190 {
02191     if (a != b) {
02192         fp_init(a);
02193         fp_copy(b, a);
02194     }
02195 }
02196 #endif
02197 
02198 /* fast math conversion */
02199 int mp_copy(fp_int* a, fp_int* b)
02200 {
02201     fp_copy(a, b);
02202     return MP_OKAY;
02203 }
02204 
02205 
02206 /* fast math conversion */
02207 int mp_isodd(mp_int* a)
02208 {
02209     return fp_isodd(a);
02210 }
02211 
02212 
02213 /* fast math conversion */
02214 int mp_iszero(mp_int* a)
02215 {
02216     return fp_iszero(a);
02217 }
02218 
02219 
02220 /* fast math conversion */
02221 int mp_count_bits (mp_int* a)
02222 {
02223     return fp_count_bits(a);
02224 }
02225 
02226 
02227 int mp_leading_bit (mp_int* a)
02228 {
02229     return fp_leading_bit(a);
02230 }
02231 
02232 
02233 /* fast math conversion */
02234 void mp_rshb (mp_int* a, int x)
02235 {
02236     fp_rshb(a, x);
02237 }
02238 
02239 
02240 /* fast math wrappers */
02241 int mp_set_int(mp_int *a, mp_digit b)
02242 {
02243     fp_set(a, b);
02244     return MP_OKAY;
02245 }
02246 
02247 int mp_is_bit_set (mp_int *a, mp_digit b)
02248 {
02249     return fp_is_bit_set(a, b);
02250 }
02251 
02252 int mp_set_bit(mp_int *a, mp_digit b)
02253 {
02254     return fp_set_bit(a, b);
02255 }
02256 
02257 #if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC)
02258 
02259 /* c = a * a (mod b) */
02260 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
02261 {
02262   fp_int tmp;
02263   fp_init(&tmp);
02264   fp_sqr(a, &tmp);
02265   return fp_mod(&tmp, b, c);
02266 }
02267 
02268 /* fast math conversion */
02269 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
02270 {
02271     return fp_sqrmod(a, b, c);
02272 }
02273 
02274 /* fast math conversion */
02275 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
02276 {
02277     fp_montgomery_calc_normalization(a, b);
02278     return MP_OKAY;
02279 }
02280 
02281 #endif /* WOLFSSL_KEYGEN || HAVE_ECC */
02282 
02283 
02284 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
02285 
02286 static const int lnz[16] = {
02287    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
02288 };
02289 
02290 #ifdef WOLFSSL_KEY_GEN
02291 /* swap the elements of two integers, for cases where you can't simply swap the
02292  * mp_int pointers around
02293  */
02294 static void fp_exch (fp_int * a, fp_int * b)
02295 {
02296     fp_int  t;
02297 
02298     t  = *a;
02299     *a = *b;
02300     *b = t;
02301 }
02302 #endif
02303 
02304 /* Counts the number of lsbs which are zero before the first zero bit */
02305 int fp_cnt_lsb(fp_int *a)
02306 {
02307    int x;
02308    fp_digit q, qq;
02309 
02310    /* easy out */
02311    if (fp_iszero(a) == 1) {
02312       return 0;
02313    }
02314 
02315    /* scan lower digits until non-zero */
02316    for (x = 0; x < a->used && a->dp[x] == 0; x++);
02317    q = a->dp[x];
02318    x *= DIGIT_BIT;
02319 
02320    /* now scan this digit until a 1 is found */
02321    if ((q & 1) == 0) {
02322       do {
02323          qq  = q & 15;
02324          x  += lnz[qq];
02325          q >>= 4;
02326       } while (qq == 0);
02327    }
02328    return x;
02329 }
02330 
02331 
02332 static int s_is_power_of_two(fp_digit b, int *p)
02333 {
02334    int x;
02335 
02336    /* fast return if no power of two */
02337    if ((b==0) || (b & (b-1))) {
02338       return 0;
02339    }
02340 
02341    for (x = 0; x < DIGIT_BIT; x++) {
02342       if (b == (((fp_digit)1)<<x)) {
02343          *p = x;
02344          return 1;
02345       }
02346    }
02347    return 0;
02348 }
02349 
02350 /* a/b => cb + d == a */
02351 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
02352 {
02353   fp_int   q;
02354   fp_word  w;
02355   fp_digit t;
02356   int      ix;
02357 
02358   /* cannot divide by zero */
02359   if (b == 0) {
02360      return FP_VAL;
02361   }
02362 
02363   /* quick outs */
02364   if (b == 1 || fp_iszero(a) == 1) {
02365      if (d != NULL) {
02366         *d = 0;
02367      }
02368      if (c != NULL) {
02369         fp_copy(a, c);
02370      }
02371      return FP_OKAY;
02372   }
02373 
02374   /* power of two ? */
02375   if (s_is_power_of_two(b, &ix) == 1) {
02376      if (d != NULL) {
02377         *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
02378      }
02379      if (c != NULL) {
02380         fp_div_2d(a, ix, c, NULL);
02381      }
02382      return FP_OKAY;
02383   }
02384 
02385   if (c != NULL) {
02386     /* no easy answer [c'est la vie].  Just division */
02387     fp_init(&q);
02388 
02389     q.used = a->used;
02390     q.sign = a->sign;
02391   }
02392 
02393   w = 0;
02394   for (ix = a->used - 1; ix >= 0; ix--) {
02395      w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
02396 
02397      if (w >= b) {
02398         t = (fp_digit)(w / b);
02399         w -= ((fp_word)t) * ((fp_word)b);
02400       } else {
02401         t = 0;
02402       }
02403       if (c != NULL)
02404         q.dp[ix] = (fp_digit)t;
02405   }
02406 
02407   if (d != NULL) {
02408      *d = (fp_digit)w;
02409   }
02410 
02411   if (c != NULL) {
02412      fp_clamp(&q);
02413      fp_copy(&q, c);
02414   }
02415 
02416   return FP_OKAY;
02417 }
02418 
02419 
02420 /* c = a mod b, 0 <= c < b  */
02421 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02422 {
02423    return fp_div_d(a, b, NULL, c);
02424 }
02425 
02426 int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02427 {
02428    return fp_mod_d(a, b, c);
02429 }
02430 
02431 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
02432 
02433 #ifdef WOLFSSL_KEY_GEN
02434 
02435 void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
02436 void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
02437 int  fp_isprime(fp_int *a);
02438 int  fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
02439 
02440 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
02441 {
02442     fp_gcd(a, b, c);
02443     return MP_OKAY;
02444 }
02445 
02446 
02447 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
02448 {
02449     fp_lcm(a, b, c);
02450     return MP_OKAY;
02451 }
02452 
02453 
02454 int mp_prime_is_prime(mp_int* a, int t, int* result)
02455 {
02456     (void)t;
02457     *result = fp_isprime(a);
02458     return MP_OKAY;
02459 }
02460 
02461 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
02462 {
02463     int err;
02464 
02465     err = fp_randprime(N, len, rng, heap);
02466     switch(err) {
02467         case FP_VAL:
02468             return MP_VAL;
02469             break;
02470         case FP_MEM:
02471             return MP_MEM;
02472             break;
02473         default:
02474             break;
02475     }
02476 
02477     return MP_OKAY;
02478 }
02479 
02480 int mp_exch (mp_int * a, mp_int * b)
02481 {
02482     fp_exch(a, b);
02483     return MP_OKAY;
02484 }
02485 
02486 /* Miller-Rabin test of "a" to the base of "b" as described in
02487  * HAC pp. 139 Algorithm 4.24
02488  *
02489  * Sets result to 0 if definitely composite or 1 if probably prime.
02490  * Randomly the chance of error is no more than 1/4 and often
02491  * very much lower.
02492  */
02493 static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
02494 {
02495   fp_int  n1, y, r;
02496   int     s, j;
02497 
02498   /* default */
02499   *result = FP_NO;
02500 
02501   /* ensure b > 1 */
02502   if (fp_cmp_d(b, 1) != FP_GT) {
02503      return;
02504   }
02505 
02506   /* get n1 = a - 1 */
02507   fp_init_copy(&n1, a);
02508   fp_sub_d(&n1, 1, &n1);
02509 
02510   /* set 2**s * r = n1 */
02511   fp_init_copy(&r, &n1);
02512 
02513   /* count the number of least significant bits
02514    * which are zero
02515    */
02516   s = fp_cnt_lsb(&r);
02517 
02518   /* now divide n - 1 by 2**s */
02519   fp_div_2d (&r, s, &r, NULL);
02520 
02521   /* compute y = b**r mod a */
02522   fp_init(&y);
02523   fp_exptmod(b, &r, a, &y);
02524 
02525   /* if y != 1 and y != n1 do */
02526   if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
02527     j = 1;
02528     /* while j <= s-1 and y != n1 */
02529     while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
02530       fp_sqrmod (&y, a, &y);
02531 
02532       /* if y == 1 then composite */
02533       if (fp_cmp_d (&y, 1) == FP_EQ) {
02534          return;
02535       }
02536       ++j;
02537     }
02538 
02539     /* if y != n1 then composite */
02540     if (fp_cmp (&y, &n1) != FP_EQ) {
02541        return;
02542     }
02543   }
02544 
02545   /* probably prime now */
02546   *result = FP_YES;
02547 }
02548 
02549 
02550 /* a few primes */
02551 static const fp_digit primes[256] = {
02552   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
02553   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
02554   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
02555   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
02556   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
02557   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
02558   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
02559   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
02560 
02561   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
02562   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
02563   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
02564   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
02565   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
02566   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
02567   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
02568   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
02569 
02570   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
02571   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
02572   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
02573   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
02574   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
02575   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
02576   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
02577   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
02578 
02579   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
02580   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
02581   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
02582   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
02583   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
02584   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
02585   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
02586   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
02587 };
02588 
02589 int fp_isprime(fp_int *a)
02590 {
02591    fp_int   b;
02592    fp_digit d = 0;
02593    int      r, res;
02594 
02595    /* do trial division */
02596    for (r = 0; r < 256; r++) {
02597        fp_mod_d(a, primes[r], &d);
02598        if (d == 0) {
02599           return FP_NO;
02600        }
02601    }
02602 
02603    /* now do 8 miller rabins */
02604    fp_init(&b);
02605    for (r = 0; r < 8; r++) {
02606        fp_set(&b, primes[r]);
02607        fp_prime_miller_rabin(a, &b, &res);
02608        if (res == FP_NO) {
02609           return FP_NO;
02610        }
02611    }
02612    return FP_YES;
02613 }
02614 
02615 int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap)
02616 {
02617     static const int USE_BBS = 1;
02618     int   err, type;
02619     byte* buf;
02620 
02621     /* get type */
02622     if (len < 0) {
02623         type = USE_BBS;
02624         len = -len;
02625     } else {
02626         type = 0;
02627     }
02628 
02629     /* allow sizes between 2 and 512 bytes for a prime size */
02630     if (len < 2 || len > 512) {
02631         return FP_VAL;
02632     }
02633 
02634     /* allocate buffer to work with */
02635     buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_TMP_BUFFER);
02636     if (buf == NULL) {
02637         return FP_MEM;
02638     }
02639     XMEMSET(buf, 0, len);
02640 
02641     do {
02642 #ifdef SHOW_GEN
02643         printf(".");
02644         fflush(stdout);
02645 #endif
02646         /* generate value */
02647         err = wc_RNG_GenerateBlock(rng, buf, len);
02648         if (err != 0) {
02649             XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
02650             return FP_VAL;
02651         }
02652 
02653         /* munge bits */
02654         buf[0]     |= 0x80 | 0x40;
02655         buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
02656 
02657         /* load value */
02658         fp_read_unsigned_bin(N, buf, len);
02659 
02660         /* test */
02661     } while (fp_isprime(N) == FP_NO);
02662 
02663     XMEMSET(buf, 0, len);
02664     XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
02665     
02666     return FP_OKAY;
02667 }
02668 
02669 /* c = [a, b] */
02670 void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
02671 {
02672    fp_int  t1, t2;
02673 
02674    fp_init(&t1);
02675    fp_init(&t2);
02676    fp_gcd(a, b, &t1);
02677    if (fp_cmp_mag(a, b) == FP_GT) {
02678       fp_div(a, &t1, &t2, NULL);
02679       fp_mul(b, &t2, c);
02680    } else {
02681       fp_div(b, &t1, &t2, NULL);
02682       fp_mul(a, &t2, c);
02683    }
02684 }
02685 
02686 
02687 
02688 /* c = (a, b) */
02689 void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
02690 {
02691    fp_int u, v, r;
02692 
02693    /* either zero than gcd is the largest */
02694    if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
02695      fp_abs (b, c);
02696      return;
02697    }
02698    if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
02699      fp_abs (a, c);
02700      return;
02701    }
02702 
02703    /* optimized.  At this point if a == 0 then
02704     * b must equal zero too
02705     */
02706    if (fp_iszero (a) == 1) {
02707      fp_zero(c);
02708      return;
02709    }
02710 
02711    /* sort inputs */
02712    if (fp_cmp_mag(a, b) != FP_LT) {
02713       fp_init_copy(&u, a);
02714       fp_init_copy(&v, b);
02715    } else {
02716       fp_init_copy(&u, b);
02717       fp_init_copy(&v, a);
02718    }
02719 
02720    fp_init(&r);
02721    while (fp_iszero(&v) == FP_NO) {
02722       fp_mod(&u, &v, &r);
02723       fp_copy(&v, &u);
02724       fp_copy(&r, &v);
02725    }
02726    fp_copy(&u, c);
02727 }
02728 
02729 #endif /* WOLFSSL_KEY_GEN */
02730 
02731 
02732 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA)
02733 /* c = a + b */
02734 void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
02735 {
02736    fp_int tmp;
02737    fp_init(&tmp);
02738    fp_set(&tmp, b);
02739    fp_add(a,&tmp,c);
02740 }
02741 
02742 /* external compatibility */
02743 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
02744 {
02745     fp_add_d(a, b, c);
02746     return MP_OKAY;
02747 }
02748 
02749 #endif  /* HAVE_ECC || !NO_PWDBASED */
02750 
02751 
02752 #if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
02753 
02754 /* chars used in radix conversions */
02755 static const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
02756                                 abcdefghijklmnopqrstuvwxyz+/";
02757 #endif
02758 
02759 #ifdef HAVE_ECC
02760 static int fp_read_radix(fp_int *a, const char *str, int radix)
02761 {
02762   int     y, neg;
02763   char    ch;
02764 
02765   /* make sure the radix is ok */
02766   if (radix < 2 || radix > 64) {
02767     return FP_VAL;
02768   }
02769 
02770   /* if the leading digit is a
02771    * minus set the sign to negative.
02772    */
02773   if (*str == '-') {
02774     ++str;
02775     neg = FP_NEG;
02776   } else {
02777     neg = FP_ZPOS;
02778   }
02779 
02780   /* set the integer to the default of zero */
02781   fp_zero (a);
02782 
02783   /* process each digit of the string */
02784   while (*str) {
02785     /* if the radix < 36 the conversion is case insensitive
02786      * this allows numbers like 1AB and 1ab to represent the same  value
02787      * [e.g. in hex]
02788      */
02789     ch = (char) ((radix < 36) ? XTOUPPER((unsigned char)*str) : *str);
02790     for (y = 0; y < 64; y++) {
02791       if (ch == fp_s_rmap[y]) {
02792          break;
02793       }
02794     }
02795 
02796     /* if the char was found in the map
02797      * and is less than the given radix add it
02798      * to the number, otherwise exit the loop.
02799      */
02800     if (y < radix) {
02801       fp_mul_d (a, (fp_digit) radix, a);
02802       fp_add_d (a, (fp_digit) y, a);
02803     } else {
02804       break;
02805     }
02806     ++str;
02807   }
02808 
02809   /* set the sign only if a != 0 */
02810   if (fp_iszero(a) != FP_YES) {
02811      a->sign = neg;
02812   }
02813   return FP_OKAY;
02814 }
02815 
02816 /* fast math conversion */
02817 int mp_read_radix(mp_int *a, const char *str, int radix)
02818 {
02819     return fp_read_radix(a, str, radix);
02820 }
02821 
02822 /* fast math conversion */
02823 void mp_set(fp_int *a, fp_digit b)
02824 {
02825     fp_set(a,b);
02826 }
02827 
02828 /* fast math conversion */
02829 int mp_sqr(fp_int *A, fp_int *B)
02830 {
02831     fp_sqr(A, B);
02832     return MP_OKAY;
02833 }
02834 
02835 /* fast math conversion */
02836 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
02837 {
02838     fp_montgomery_reduce(a, m, mp);
02839     return MP_OKAY;
02840 }
02841 
02842 
02843 /* fast math conversion */
02844 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
02845 {
02846     return fp_montgomery_setup(a, rho);
02847 }
02848 
02849 int mp_div_2(fp_int * a, fp_int * b)
02850 {
02851     fp_div_2(a, b);
02852     return MP_OKAY;
02853 }
02854 
02855 
02856 int mp_init_copy(fp_int * a, fp_int * b)
02857 {
02858     fp_init_copy(a, b);
02859     return MP_OKAY;
02860 }
02861 
02862 #ifdef HAVE_COMP_KEY
02863 
02864 int mp_cnt_lsb(fp_int* a)
02865 {
02866     fp_cnt_lsb(a);
02867     return MP_OKAY;
02868 }
02869 
02870 #endif /* HAVE_COMP_KEY */
02871 
02872 #endif /* HAVE_ECC */
02873 
02874 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
02875 
02876 /* returns size of ASCII representation */
02877 int mp_radix_size (mp_int *a, int radix, int *size)
02878 {
02879     int     res, digs;
02880     fp_int  t;
02881     fp_digit d;
02882 
02883     *size = 0;
02884 
02885     /* special case for binary */
02886     if (radix == 2) {
02887         *size = fp_count_bits (a) + (a->sign == FP_NEG ? 1 : 0) + 1;
02888         return FP_YES;
02889     }
02890 
02891     /* make sure the radix is in range */
02892     if (radix < 2 || radix > 64) {
02893         return FP_VAL;
02894     }
02895 
02896     if (fp_iszero(a) == MP_YES) {
02897         *size = 2;
02898         return FP_OKAY;
02899     }
02900 
02901     /* digs is the digit count */
02902     digs = 0;
02903 
02904     /* if it's negative add one for the sign */
02905     if (a->sign == FP_NEG) {
02906         ++digs;
02907     }
02908 
02909     /* init a copy of the input */
02910     fp_init_copy (&t, a);
02911 
02912     /* force temp to positive */
02913     t.sign = FP_ZPOS;
02914 
02915     /* fetch out all of the digits */
02916     while (fp_iszero (&t) == FP_NO) {
02917         if ((res = fp_div_d (&t, (mp_digit) radix, &t, &d)) != FP_OKAY) {
02918             fp_zero (&t);
02919             return res;
02920         }
02921         ++digs;
02922     }
02923     fp_zero (&t);
02924 
02925     /* return digs + 1, the 1 is for the NULL byte that would be required. */
02926     *size = digs + 1;
02927     return FP_OKAY;
02928 }
02929 
02930 /* stores a bignum as a ASCII string in a given radix (2..64) */
02931 int mp_toradix (mp_int *a, char *str, int radix)
02932 {
02933     int     res, digs;
02934     fp_int  t;
02935     fp_digit d;
02936     char   *_s = str;
02937 
02938     /* check range of the radix */
02939     if (radix < 2 || radix > 64) {
02940         return FP_VAL;
02941     }
02942 
02943     /* quick out if its zero */
02944     if (fp_iszero(a) == 1) {
02945         *str++ = '0';
02946         *str = '\0';
02947         return FP_YES;
02948     }
02949 
02950     /* init a copy of the input */
02951     fp_init_copy (&t, a);
02952 
02953     /* if it is negative output a - */
02954     if (t.sign == FP_NEG) {
02955         ++_s;
02956         *str++ = '-';
02957         t.sign = FP_ZPOS;
02958     }
02959 
02960     digs = 0;
02961     while (fp_iszero (&t) == 0) {
02962         if ((res = fp_div_d (&t, (fp_digit) radix, &t, &d)) != FP_OKAY) {
02963             fp_zero (&t);
02964             return res;
02965         }
02966         *str++ = fp_s_rmap[d];
02967         ++digs;
02968     }
02969 
02970     /* reverse the digits of the string.  In this case _s points
02971      * to the first digit [excluding the sign] of the number]
02972      */
02973     fp_reverse ((unsigned char *)_s, digs);
02974 
02975     /* append a NULL so the string is properly terminated */
02976     *str = '\0';
02977 
02978     fp_zero (&t);
02979     return FP_OKAY;
02980 }
02981 
02982 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */
02983 
02984 #endif /* USE_FAST_MATH */
02985 
02986