This is a port of cyaSSL 2.7.0.

Dependents:   CyaSSL_DTLS_Cellular CyaSSL_DTLS_Ethernet

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers tfm.c Source File

tfm.c

00001 /* tfm.c
00002  *
00003  * Copyright (C) 2006-2013 wolfSSL Inc.
00004  *
00005  * This file is part of CyaSSL.
00006  *
00007  * CyaSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * CyaSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
00020  */
00021 
00022 
00023 /*
00024  * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
00025  * http://math.libtomcrypt.com
00026  */
00027 
00028 /**
00029  *  Edited by Moisés Guimarães (moisesguimaraesm@gmail.com)
00030  *  to fit CyaSSL's needs.
00031  */
00032 
00033 #ifdef HAVE_CONFIG_H
00034     #include <config.h>
00035 #endif
00036 
00037 /* in case user set USE_FAST_MATH there */
00038 #include <cyassl/ctaocrypt/settings.h>
00039 
00040 #ifdef USE_FAST_MATH
00041 
00042 #include <cyassl/ctaocrypt/tfm.h>
00043 #include <ctaocrypt/src/asm.c>  /* will define asm MACROS or C ones */
00044 
00045 
00046 /* math settings check */
00047 word32 CheckRunTimeSettings(void)
00048 {
00049     return CTC_SETTINGS;
00050 }
00051 
00052 
00053 /* math settings size check */
00054 word32 CheckRunTimeFastMath(void)
00055 {
00056     return FP_SIZE;
00057 }
00058 
00059 
00060 /* Functions */
00061 
00062 void fp_add(fp_int *a, fp_int *b, fp_int *c)
00063 {
00064   int     sa, sb;
00065 
00066   /* get sign of both inputs */
00067   sa = a->sign;
00068   sb = b->sign;
00069 
00070   /* handle two cases, not four */
00071   if (sa == sb) {
00072     /* both positive or both negative */
00073     /* add their magnitudes, copy the sign */
00074     c->sign = sa;
00075     s_fp_add (a, b, c);
00076   } else {
00077     /* one positive, the other negative */
00078     /* subtract the one with the greater magnitude from */
00079     /* the one of the lesser magnitude.  The result gets */
00080     /* the sign of the one with the greater magnitude. */
00081     if (fp_cmp_mag (a, b) == FP_LT) {
00082       c->sign = sb;
00083       s_fp_sub (b, a, c);
00084     } else {
00085       c->sign = sa;
00086       s_fp_sub (a, b, c);
00087     }
00088   }
00089 }
00090 
00091 /* unsigned addition */
00092 void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
00093 {
00094   int      x, y, oldused;
00095   register fp_word  t;
00096 
00097   y       = MAX(a->used, b->used);
00098   oldused = c->used;
00099   c->used = y;
00100  
00101   t = 0;
00102   for (x = 0; x < y; x++) {
00103       t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
00104       c->dp[x]   = (fp_digit)t;
00105       t        >>= DIGIT_BIT;
00106   }
00107   if (t != 0 && x < FP_SIZE) {
00108      c->dp[c->used++] = (fp_digit)t;
00109      ++x;
00110   }
00111 
00112   c->used = x;
00113   for (; x < oldused; x++) {
00114      c->dp[x] = 0;
00115   }
00116   fp_clamp(c);
00117 }
00118 
00119 /* c = a - b */
00120 void fp_sub(fp_int *a, fp_int *b, fp_int *c)
00121 {
00122   int     sa, sb;
00123 
00124   sa = a->sign;
00125   sb = b->sign;
00126 
00127   if (sa != sb) {
00128     /* subtract a negative from a positive, OR */
00129     /* subtract a positive from a negative. */
00130     /* In either case, ADD their magnitudes, */
00131     /* and use the sign of the first number. */
00132     c->sign = sa;
00133     s_fp_add (a, b, c);
00134   } else {
00135     /* subtract a positive from a positive, OR */
00136     /* subtract a negative from a negative. */
00137     /* First, take the difference between their */
00138     /* magnitudes, then... */
00139     if (fp_cmp_mag (a, b) != FP_LT) {
00140       /* Copy the sign from the first */
00141       c->sign = sa;
00142       /* The first has a larger or equal magnitude */
00143       s_fp_sub (a, b, c);
00144     } else {
00145       /* The result has the *opposite* sign from */
00146       /* the first number. */
00147       c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
00148       /* The second has a larger magnitude */
00149       s_fp_sub (b, a, c);
00150     }
00151   }
00152 }
00153 
00154 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
00155 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
00156 {
00157   int      x, oldbused, oldused;
00158   fp_word  t;
00159 
00160   oldused  = c->used;
00161   oldbused = b->used;
00162   c->used  = a->used;
00163   t       = 0;
00164   for (x = 0; x < oldbused; x++) {
00165      t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
00166      c->dp[x]  = (fp_digit)t;
00167      t         = (t >> DIGIT_BIT)&1;
00168   }
00169   for (; x < a->used; x++) {
00170      t         = ((fp_word)a->dp[x]) - t;
00171      c->dp[x]  = (fp_digit)t;
00172      t         = (t >> DIGIT_BIT)&1;
00173    }
00174   for (; x < oldused; x++) {
00175      c->dp[x] = 0;
00176   }
00177   fp_clamp(c);
00178 }
00179 
00180 /* c = a * b */
00181 void fp_mul(fp_int *A, fp_int *B, fp_int *C)
00182 {
00183     int   y, yy;
00184 
00185     y  = MAX(A->used, B->used);
00186     yy = MIN(A->used, B->used);
00187 
00188     /* call generic if we're out of range */
00189     if (y + yy > FP_SIZE) {
00190        fp_mul_comba(A, B, C);
00191        return ;
00192     }
00193 
00194     /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
00195        of the largest input.  We also want to avoid doing excess mults if the 
00196        inputs are not close to the next power of two.  That is, for example,
00197        if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications 
00198     */
00199 
00200 #ifdef TFM_MUL3
00201         if (y <= 3) {
00202            fp_mul_comba3(A,B,C);
00203            return;
00204         }
00205 #endif
00206 #ifdef TFM_MUL4
00207         if (y == 4) {
00208            fp_mul_comba4(A,B,C);
00209            return;
00210         }
00211 #endif
00212 #ifdef TFM_MUL6
00213         if (y <= 6) {
00214            fp_mul_comba6(A,B,C);
00215            return;
00216         }
00217 #endif
00218 #ifdef TFM_MUL7
00219         if (y == 7) {
00220            fp_mul_comba7(A,B,C);
00221            return;
00222         }
00223 #endif
00224 #ifdef TFM_MUL8
00225         if (y == 8) {
00226            fp_mul_comba8(A,B,C);
00227            return;
00228         }
00229 #endif
00230 #ifdef TFM_MUL9
00231         if (y == 9) {
00232            fp_mul_comba9(A,B,C);
00233            return;
00234         }
00235 #endif
00236 #ifdef TFM_MUL12
00237         if (y <= 12) {
00238            fp_mul_comba12(A,B,C);
00239            return;
00240         }
00241 #endif
00242 #ifdef TFM_MUL17
00243         if (y <= 17) {
00244            fp_mul_comba17(A,B,C);
00245            return;
00246         }
00247 #endif
00248 
00249 #ifdef TFM_SMALL_SET
00250         if (y <= 16) {
00251            fp_mul_comba_small(A,B,C);
00252            return;
00253         }
00254 #endif        
00255 #if defined(TFM_MUL20)
00256         if (y <= 20) {
00257            fp_mul_comba20(A,B,C);
00258            return;
00259         }
00260 #endif
00261 #if defined(TFM_MUL24)
00262         if (yy >= 16 && y <= 24) {
00263            fp_mul_comba24(A,B,C);
00264            return;
00265         }
00266 #endif
00267 #if defined(TFM_MUL28)
00268         if (yy >= 20 && y <= 28) {
00269            fp_mul_comba28(A,B,C);
00270            return;
00271         }
00272 #endif
00273 #if defined(TFM_MUL32)
00274         if (yy >= 24 && y <= 32) {
00275            fp_mul_comba32(A,B,C);
00276            return;
00277         }
00278 #endif
00279 #if defined(TFM_MUL48)
00280         if (yy >= 40 && y <= 48) {
00281            fp_mul_comba48(A,B,C);
00282            return;
00283         }
00284 #endif        
00285 #if defined(TFM_MUL64)
00286         if (yy >= 56 && y <= 64) {
00287            fp_mul_comba64(A,B,C);
00288            return;
00289         }
00290 #endif
00291         fp_mul_comba(A,B,C);
00292 }
00293 
00294 void fp_mul_2(fp_int * a, fp_int * b)
00295 {
00296   int     x, oldused;
00297    
00298   oldused = b->used;
00299   b->used = a->used;
00300 
00301   {
00302     register fp_digit r, rr, *tmpa, *tmpb;
00303 
00304     /* alias for source */
00305     tmpa = a->dp;
00306     
00307     /* alias for dest */
00308     tmpb = b->dp;
00309 
00310     /* carry */
00311     r = 0;
00312     for (x = 0; x < a->used; x++) {
00313     
00314       /* get what will be the *next* carry bit from the 
00315        * MSB of the current digit 
00316        */
00317       rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
00318       
00319       /* now shift up this digit, add in the carry [from the previous] */
00320       *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
00321       
00322       /* copy the carry that would be from the source 
00323        * digit into the next iteration 
00324        */
00325       r = rr;
00326     }
00327 
00328     /* new leading digit? */
00329     if (r != 0 && b->used != (FP_SIZE-1)) {
00330       /* add a MSB which is always 1 at this point */
00331       *tmpb = 1;
00332       ++(b->used);
00333     }
00334 
00335     /* now zero any excess digits on the destination 
00336      * that we didn't write to 
00337      */
00338     tmpb = b->dp + b->used;
00339     for (x = b->used; x < oldused; x++) {
00340       *tmpb++ = 0;
00341     }
00342   }
00343   b->sign = a->sign;
00344 }
00345 
00346 /* c = a * b */
00347 void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
00348 {
00349    fp_word  w;
00350    int      x, oldused;
00351 
00352    oldused = c->used;
00353    c->used = a->used;
00354    c->sign = a->sign;
00355    w       = 0;
00356    for (x = 0; x < a->used; x++) {
00357        w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
00358        c->dp[x]  = (fp_digit)w;
00359        w         = w >> DIGIT_BIT;
00360    }
00361    if (w != 0 && (a->used != FP_SIZE)) {
00362       c->dp[c->used++] = (fp_digit) w;
00363       ++x;
00364    }
00365    for (; x < oldused; x++) {
00366       c->dp[x] = 0;
00367    }
00368    fp_clamp(c);
00369 }
00370 
00371 /* c = a * 2**d */
00372 void fp_mul_2d(fp_int *a, int b, fp_int *c)
00373 {
00374    fp_digit carry, carrytmp, shift;
00375    int x;
00376 
00377    /* copy it */
00378    fp_copy(a, c);
00379 
00380    /* handle whole digits */
00381    if (b >= DIGIT_BIT) {
00382       fp_lshd(c, b/DIGIT_BIT);
00383    }
00384    b %= DIGIT_BIT;
00385 
00386    /* shift the digits */
00387    if (b != 0) {
00388       carry = 0;   
00389       shift = DIGIT_BIT - b;
00390       for (x = 0; x < c->used; x++) {
00391           carrytmp = c->dp[x] >> shift;
00392           c->dp[x] = (c->dp[x] << b) + carry;
00393           carry = carrytmp;
00394       }
00395       /* store last carry if room */
00396       if (carry && x < FP_SIZE) {
00397          c->dp[c->used++] = carry;
00398       }
00399    }
00400    fp_clamp(c);
00401 }
00402 
00403 /* generic PxQ multiplier */
00404 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
00405 {
00406    int       ix, iy, iz, tx, ty, pa;
00407    fp_digit  c0, c1, c2, *tmpx, *tmpy;
00408    fp_int    tmp, *dst;
00409 
00410    COMBA_START;
00411    COMBA_CLEAR;
00412    
00413    /* get size of output and trim */
00414    pa = A->used + B->used;
00415    if (pa >= FP_SIZE) {
00416       pa = FP_SIZE-1;
00417    }
00418 
00419    if (A == C || B == C) {
00420       fp_zero(&tmp);
00421       dst = &tmp;
00422    } else {
00423       fp_zero(C);
00424       dst = C;
00425    }
00426 
00427    for (ix = 0; ix < pa; ix++) {
00428       /* get offsets into the two bignums */
00429       ty = MIN(ix, B->used-1);
00430       tx = ix - ty;
00431 
00432       /* setup temp aliases */
00433       tmpx = A->dp + tx;
00434       tmpy = B->dp + ty;
00435 
00436       /* this is the number of times the loop will iterrate, essentially its 
00437          while (tx++ < a->used && ty-- >= 0) { ... }
00438        */
00439       iy = MIN(A->used-tx, ty+1);
00440 
00441       /* execute loop */
00442       COMBA_FORWARD;
00443       for (iz = 0; iz < iy; ++iz) {
00444           /* TAO change COMBA_ADD back to MULADD */
00445           MULADD(*tmpx++, *tmpy--);
00446       }
00447 
00448       /* store term */
00449       COMBA_STORE(dst->dp[ix]);
00450   }
00451   COMBA_FINI;
00452 
00453   dst->used = pa;
00454   dst->sign = A->sign ^ B->sign;
00455   fp_clamp(dst);
00456   fp_copy(dst, C);
00457 }
00458 
00459 /* a/b => cb + d == a */
00460 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00461 {
00462   fp_int  q, x, y, t1, t2;
00463   int     n, t, i, norm, neg;
00464 
00465   /* is divisor zero ? */
00466   if (fp_iszero (b) == 1) {
00467     return FP_VAL;
00468   }
00469 
00470   /* if a < b then q=0, r = a */
00471   if (fp_cmp_mag (a, b) == FP_LT) {
00472     if (d != NULL) {
00473       fp_copy (a, d);
00474     } 
00475     if (c != NULL) {
00476       fp_zero (c);
00477     }
00478     return FP_OKAY;
00479   }
00480 
00481   fp_init(&q);
00482   q.used = a->used + 2;
00483 
00484   fp_init(&t1);
00485   fp_init(&t2);
00486   fp_init_copy(&x, a);
00487   fp_init_copy(&y, b);
00488 
00489   /* fix the sign */
00490   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
00491   x.sign = y.sign = FP_ZPOS;
00492 
00493   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
00494   norm = fp_count_bits(&y) % DIGIT_BIT;
00495   if (norm < (int)(DIGIT_BIT-1)) {
00496      norm = (DIGIT_BIT-1) - norm;
00497      fp_mul_2d (&x, norm, &x);
00498      fp_mul_2d (&y, norm, &y);
00499   } else {
00500      norm = 0;
00501   }
00502 
00503   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
00504   n = x.used - 1;
00505   t = y.used - 1;
00506 
00507   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
00508   fp_lshd (&y, n - t);                                             /* y = y*b**{n-t} */
00509 
00510   while (fp_cmp (&x, &y) != FP_LT) {
00511     ++(q.dp[n - t]);
00512     fp_sub (&x, &y, &x);
00513   }
00514 
00515   /* reset y by shifting it back down */
00516   fp_rshd (&y, n - t);
00517 
00518   /* step 3. for i from n down to (t + 1) */
00519   for (i = n; i >= (t + 1); i--) {
00520     if (i > x.used) {
00521       continue;
00522     }
00523 
00524     /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
00525      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
00526     if (x.dp[i] == y.dp[t]) {
00527       q.dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
00528     } else {
00529       fp_word tmp;
00530       tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
00531       tmp |= ((fp_word) x.dp[i - 1]);
00532       tmp /= ((fp_word)y.dp[t]);
00533       q.dp[i - t - 1] = (fp_digit) (tmp);
00534     }
00535 
00536     /* while (q{i-t-1} * (yt * b + y{t-1})) > 
00537              xi * b**2 + xi-1 * b + xi-2 
00538      
00539        do q{i-t-1} -= 1; 
00540     */
00541     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
00542     do {
00543       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
00544 
00545       /* find left hand */
00546       fp_zero (&t1);
00547       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
00548       t1.dp[1] = y.dp[t];
00549       t1.used = 2;
00550       fp_mul_d (&t1, q.dp[i - t - 1], &t1);
00551 
00552       /* find right hand */
00553       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
00554       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
00555       t2.dp[2] = x.dp[i];
00556       t2.used = 3;
00557     } while (fp_cmp_mag(&t1, &t2) == FP_GT);
00558 
00559     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
00560     fp_mul_d (&y, q.dp[i - t - 1], &t1);
00561     fp_lshd  (&t1, i - t - 1);
00562     fp_sub   (&x, &t1, &x);
00563 
00564     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
00565     if (x.sign == FP_NEG) {
00566       fp_copy (&y, &t1);
00567       fp_lshd (&t1, i - t - 1);
00568       fp_add (&x, &t1, &x);
00569       q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
00570     }
00571   }
00572 
00573   /* now q is the quotient and x is the remainder 
00574    * [which we have to normalize] 
00575    */
00576   
00577   /* get sign before writing to c */
00578   x.sign = x.used == 0 ? FP_ZPOS : a->sign;
00579 
00580   if (c != NULL) {
00581     fp_clamp (&q);
00582     fp_copy (&q, c);
00583     c->sign = neg;
00584   }
00585 
00586   if (d != NULL) {
00587     fp_div_2d (&x, norm, &x, NULL);
00588 
00589 /* the following is a kludge, essentially we were seeing the right remainder but 
00590    with excess digits that should have been zero
00591  */
00592     for (i = b->used; i < x.used; i++) {
00593         x.dp[i] = 0;
00594     }
00595     fp_clamp(&x);
00596     fp_copy (&x, d);
00597   }
00598 
00599   return FP_OKAY;
00600 }
00601 
00602 /* b = a/2 */
00603 void fp_div_2(fp_int * a, fp_int * b)
00604 {
00605   int     x, oldused;
00606 
00607   oldused = b->used;
00608   b->used = a->used;
00609   {
00610     register fp_digit r, rr, *tmpa, *tmpb;
00611 
00612     /* source alias */
00613     tmpa = a->dp + b->used - 1;
00614 
00615     /* dest alias */
00616     tmpb = b->dp + b->used - 1;
00617 
00618     /* carry */
00619     r = 0;
00620     for (x = b->used - 1; x >= 0; x--) {
00621       /* get the carry for the next iteration */
00622       rr = *tmpa & 1;
00623 
00624       /* shift the current digit, add in carry and store */
00625       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
00626 
00627       /* forward carry to next iteration */
00628       r = rr;
00629     }
00630 
00631     /* zero excess digits */
00632     tmpb = b->dp + b->used;
00633     for (x = b->used; x < oldused; x++) {
00634       *tmpb++ = 0;
00635     }
00636   }
00637   b->sign = a->sign;
00638   fp_clamp (b);
00639 }
00640 
00641 /* c = a / 2**b */
00642 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
00643 {
00644   fp_digit D, r, rr;
00645   int      x;
00646   fp_int   t;
00647 
00648   /* if the shift count is <= 0 then we do no work */
00649   if (b <= 0) {
00650     fp_copy (a, c);
00651     if (d != NULL) {
00652       fp_zero (d);
00653     }
00654     return;
00655   }
00656 
00657   fp_init(&t);
00658 
00659   /* get the remainder */
00660   if (d != NULL) {
00661     fp_mod_2d (a, b, &t);
00662   }
00663 
00664   /* copy */
00665   fp_copy(a, c);
00666 
00667   /* shift by as many digits in the bit count */
00668   if (b >= (int)DIGIT_BIT) {
00669     fp_rshd (c, b / DIGIT_BIT);
00670   }
00671 
00672   /* shift any bit count < DIGIT_BIT */
00673   D = (fp_digit) (b % DIGIT_BIT);
00674   if (D != 0) {
00675     register fp_digit *tmpc, mask, shift;
00676 
00677     /* mask */
00678     mask = (((fp_digit)1) << D) - 1;
00679 
00680     /* shift for lsb */
00681     shift = DIGIT_BIT - D;
00682 
00683     /* alias */
00684     tmpc = c->dp + (c->used - 1);
00685 
00686     /* carry */
00687     r = 0;
00688     for (x = c->used - 1; x >= 0; x--) {
00689       /* get the lower  bits of this word in a temp */
00690       rr = *tmpc & mask;
00691 
00692       /* shift the current word and mix in the carry bits from the previous word */
00693       *tmpc = (*tmpc >> D) | (r << shift);
00694       --tmpc;
00695 
00696       /* set the carry to the carry bits of the current word found above */
00697       r = rr;
00698     }
00699   }
00700   fp_clamp (c);
00701   if (d != NULL) {
00702     fp_copy (&t, d);
00703   }
00704 }
00705 
00706 /* c = a mod b, 0 <= c < b  */
00707 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
00708 {
00709    fp_int t;
00710    int    err;
00711 
00712    fp_zero(&t);
00713    if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
00714       return err;
00715    }
00716    if (t.sign != b->sign) {
00717       fp_add(&t, b, c);
00718    } else {
00719       fp_copy(&t, c);
00720   }
00721   return FP_OKAY;
00722 }
00723 
00724 /* c = a mod 2**d */
00725 void fp_mod_2d(fp_int *a, int b, fp_int *c)
00726 {
00727    int x;
00728 
00729    /* zero if count less than or equal to zero */
00730    if (b <= 0) {
00731       fp_zero(c);
00732       return;
00733    }
00734 
00735    /* get copy of input */
00736    fp_copy(a, c);
00737  
00738    /* if 2**d is larger than we just return */
00739    if (b >= (DIGIT_BIT * a->used)) {
00740       return;
00741    }
00742 
00743   /* zero digits above the last digit of the modulus */
00744   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00745     c->dp[x] = 0;
00746   }
00747   /* clear the digit that is not completely outside/inside the modulus */
00748   c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
00749   fp_clamp (c);
00750 }
00751 
00752 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
00753 {
00754   fp_int  x, y, u, v, A, B, C, D;
00755   int     res;
00756 
00757   /* b cannot be negative */
00758   if (b->sign == FP_NEG || fp_iszero(b) == 1) {
00759     return FP_VAL;
00760   }
00761 
00762   /* init temps */
00763   fp_init(&x);    fp_init(&y);
00764   fp_init(&u);    fp_init(&v);
00765   fp_init(&A);    fp_init(&B);
00766   fp_init(&C);    fp_init(&D);
00767 
00768   /* x = a, y = b */
00769   if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
00770       return res;
00771   }
00772   fp_copy(b, &y);
00773 
00774   /* 2. [modified] if x,y are both even then return an error! */
00775   if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
00776     return FP_VAL;
00777   }
00778 
00779   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00780   fp_copy (&x, &u);
00781   fp_copy (&y, &v);
00782   fp_set (&A, 1);
00783   fp_set (&D, 1);
00784 
00785 top:
00786   /* 4.  while u is even do */
00787   while (fp_iseven (&u) == 1) {
00788     /* 4.1 u = u/2 */
00789     fp_div_2 (&u, &u);
00790 
00791     /* 4.2 if A or B is odd then */
00792     if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
00793       /* A = (A+y)/2, B = (B-x)/2 */
00794       fp_add (&A, &y, &A);
00795       fp_sub (&B, &x, &B);
00796     }
00797     /* A = A/2, B = B/2 */
00798     fp_div_2 (&A, &A);
00799     fp_div_2 (&B, &B);
00800   }
00801 
00802   /* 5.  while v is even do */
00803   while (fp_iseven (&v) == 1) {
00804     /* 5.1 v = v/2 */
00805     fp_div_2 (&v, &v);
00806 
00807     /* 5.2 if C or D is odd then */
00808     if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
00809       /* C = (C+y)/2, D = (D-x)/2 */
00810       fp_add (&C, &y, &C);
00811       fp_sub (&D, &x, &D);
00812     }
00813     /* C = C/2, D = D/2 */
00814     fp_div_2 (&C, &C);
00815     fp_div_2 (&D, &D);
00816   }
00817 
00818   /* 6.  if u >= v then */
00819   if (fp_cmp (&u, &v) != FP_LT) {
00820     /* u = u - v, A = A - C, B = B - D */
00821     fp_sub (&u, &v, &u);
00822     fp_sub (&A, &C, &A);
00823     fp_sub (&B, &D, &B);
00824   } else {
00825     /* v - v - u, C = C - A, D = D - B */
00826     fp_sub (&v, &u, &v);
00827     fp_sub (&C, &A, &C);
00828     fp_sub (&D, &B, &D);
00829   }
00830 
00831   /* if not zero goto step 4 */
00832   if (fp_iszero (&u) == 0)
00833     goto top;
00834 
00835   /* now a = C, b = D, gcd == g*v */
00836 
00837   /* if v != 1 then there is no inverse */
00838   if (fp_cmp_d (&v, 1) != FP_EQ) {
00839     return FP_VAL;
00840   }
00841 
00842   /* if its too low */
00843   while (fp_cmp_d(&C, 0) == FP_LT) {
00844       fp_add(&C, b, &C);
00845   }
00846   
00847   /* too big */
00848   while (fp_cmp_mag(&C, b) != FP_LT) {
00849       fp_sub(&C, b, &C);
00850   }
00851   
00852   /* C is now the inverse */
00853   fp_copy(&C, c);
00854   return FP_OKAY;
00855 }
00856 
00857 /* c = 1/a (mod b) for odd b only */
00858 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
00859 {
00860   fp_int  x, y, u, v, B, D;
00861   int     neg;
00862 
00863   /* 2. [modified] b must be odd   */
00864   if (fp_iseven (b) == FP_YES) {
00865     return fp_invmod_slow(a,b,c);
00866   }
00867 
00868   /* init all our temps */
00869   fp_init(&x);  fp_init(&y);
00870   fp_init(&u);  fp_init(&v);
00871   fp_init(&B);  fp_init(&D);
00872 
00873   /* x == modulus, y == value to invert */
00874   fp_copy(b, &x);
00875 
00876   /* we need y = |a| */
00877   fp_abs(a, &y);
00878 
00879   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00880   fp_copy(&x, &u);
00881   fp_copy(&y, &v);
00882   fp_set (&D, 1);
00883 
00884 top:
00885   /* 4.  while u is even do */
00886   while (fp_iseven (&u) == FP_YES) {
00887     /* 4.1 u = u/2 */
00888     fp_div_2 (&u, &u);
00889 
00890     /* 4.2 if B is odd then */
00891     if (fp_isodd (&B) == FP_YES) {
00892       fp_sub (&B, &x, &B);
00893     }
00894     /* B = B/2 */
00895     fp_div_2 (&B, &B);
00896   }
00897 
00898   /* 5.  while v is even do */
00899   while (fp_iseven (&v) == FP_YES) {
00900     /* 5.1 v = v/2 */
00901     fp_div_2 (&v, &v);
00902 
00903     /* 5.2 if D is odd then */
00904     if (fp_isodd (&D) == FP_YES) {
00905       /* D = (D-x)/2 */
00906       fp_sub (&D, &x, &D);
00907     }
00908     /* D = D/2 */
00909     fp_div_2 (&D, &D);
00910   }
00911 
00912   /* 6.  if u >= v then */
00913   if (fp_cmp (&u, &v) != FP_LT) {
00914     /* u = u - v, B = B - D */
00915     fp_sub (&u, &v, &u);
00916     fp_sub (&B, &D, &B);
00917   } else {
00918     /* v - v - u, D = D - B */
00919     fp_sub (&v, &u, &v);
00920     fp_sub (&D, &B, &D);
00921   }
00922 
00923   /* if not zero goto step 4 */
00924   if (fp_iszero (&u) == FP_NO) {
00925     goto top;
00926   }
00927 
00928   /* now a = C, b = D, gcd == g*v */
00929 
00930   /* if v != 1 then there is no inverse */
00931   if (fp_cmp_d (&v, 1) != FP_EQ) {
00932     return FP_VAL;
00933   }
00934 
00935   /* b is now the inverse */
00936   neg = a->sign;
00937   while (D.sign == FP_NEG) {
00938     fp_add (&D, b, &D);
00939   }
00940   fp_copy (&D, c);
00941   c->sign = neg;
00942   return FP_OKAY;
00943 }
00944 
00945 /* d = a * b (mod c) */
00946 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00947 {
00948   fp_int tmp;
00949   fp_zero(&tmp);
00950   fp_mul(a, b, &tmp);
00951   return fp_mod(&tmp, c, d);
00952 }
00953 
00954 #ifdef TFM_TIMING_RESISTANT
00955 
00956 /* timing resistant montgomery ladder based exptmod 
00957 
00958    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
00959 */
00960 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
00961 {
00962   fp_int   R[2];
00963   fp_digit buf, mp;
00964   int      err, bitcnt, digidx, y;
00965 
00966   /* now setup montgomery  */
00967   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
00968      return err;
00969   }
00970 
00971   fp_init(&R[0]);   
00972   fp_init(&R[1]);   
00973    
00974   /* now we need R mod m */
00975   fp_montgomery_calc_normalization (&R[0], P);
00976 
00977   /* now set R[0][1] to G * R mod m */
00978   if (fp_cmp_mag(P, G) != FP_GT) {
00979      /* G > P so we reduce it first */
00980      fp_mod(G, P, &R[1]);
00981   } else {
00982      fp_copy(G, &R[1]);
00983   }
00984   fp_mulmod (&R[1], &R[0], P, &R[1]);
00985 
00986   /* for j = t-1 downto 0 do
00987         r_!k = R0*R1; r_k = r_k^2
00988   */
00989   
00990   /* set initial mode and bit cnt */
00991   bitcnt = 1;
00992   buf    = 0;
00993   digidx = X->used - 1;
00994 
00995   for (;;) {
00996     /* grab next digit as required */
00997     if (--bitcnt == 0) {
00998       /* if digidx == -1 we are out of digits so break */
00999       if (digidx == -1) {
01000         break;
01001       }
01002       /* read next digit and reset bitcnt */
01003       buf    = X->dp[digidx--];
01004       bitcnt = (int)DIGIT_BIT;
01005     }
01006 
01007     /* grab the next msb from the exponent */
01008     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01009     buf <<= (fp_digit)1;
01010 
01011     /* do ops */
01012     fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
01013     fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
01014   }
01015 
01016    fp_montgomery_reduce(&R[0], P, mp);
01017    fp_copy(&R[0], Y);
01018    return FP_OKAY;
01019 }   
01020 
01021 #else
01022 
01023 /* y = g**x (mod b) 
01024  * Some restrictions... x must be positive and < b
01025  */
01026 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01027 {
01028   fp_int   M[64], res;
01029   fp_digit buf, mp;
01030   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01031 
01032   /* find window size */
01033   x = fp_count_bits (X);
01034   if (x <= 21) {
01035     winsize = 1;
01036   } else if (x <= 36) {
01037     winsize = 3;
01038   } else if (x <= 140) {
01039     winsize = 4;
01040   } else if (x <= 450) {
01041     winsize = 5;
01042   } else {
01043     winsize = 6;
01044   } 
01045 
01046   /* init M array */
01047   XMEMSET(M, 0, sizeof(M)); 
01048 
01049   /* now setup montgomery  */
01050   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01051      return err;
01052   }
01053 
01054   /* setup result */
01055   fp_init(&res);
01056 
01057   /* create M table
01058    *
01059    * The M table contains powers of the input base, e.g. M[x] = G^x mod P
01060    *
01061    * The first half of the table is not computed though accept for M[0] and M[1]
01062    */
01063 
01064    /* now we need R mod m */
01065    fp_montgomery_calc_normalization (&res, P);
01066 
01067    /* now set M[1] to G * R mod m */
01068    if (fp_cmp_mag(P, G) != FP_GT) {
01069       /* G > P so we reduce it first */
01070       fp_mod(G, P, &M[1]);
01071    } else {
01072       fp_copy(G, &M[1]);
01073    }
01074    fp_mulmod (&M[1], &res, P, &M[1]);
01075 
01076   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
01077   fp_copy (&M[1], &M[1 << (winsize - 1)]);
01078   for (x = 0; x < (winsize - 1); x++) {
01079     fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
01080     fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
01081   }
01082 
01083   /* create upper table */
01084   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01085     fp_mul(&M[x - 1], &M[1], &M[x]);
01086     fp_montgomery_reduce(&M[x], P, mp);
01087   }
01088 
01089   /* set initial mode and bit cnt */
01090   mode   = 0;
01091   bitcnt = 1;
01092   buf    = 0;
01093   digidx = X->used - 1;
01094   bitcpy = 0;
01095   bitbuf = 0;
01096 
01097   for (;;) {
01098     /* grab next digit as required */
01099     if (--bitcnt == 0) {
01100       /* if digidx == -1 we are out of digits so break */
01101       if (digidx == -1) {
01102         break;
01103       }
01104       /* read next digit and reset bitcnt */
01105       buf    = X->dp[digidx--];
01106       bitcnt = (int)DIGIT_BIT;
01107     }
01108 
01109     /* grab the next msb from the exponent */
01110     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01111     buf <<= (fp_digit)1;
01112 
01113     /* if the bit is zero and mode == 0 then we ignore it
01114      * These represent the leading zero bits before the first 1 bit
01115      * in the exponent.  Technically this opt is not required but it
01116      * does lower the # of trivial squaring/reductions used
01117      */
01118     if (mode == 0 && y == 0) {
01119       continue;
01120     }
01121 
01122     /* if the bit is zero and mode == 1 then we square */
01123     if (mode == 1 && y == 0) {
01124       fp_sqr(&res, &res);
01125       fp_montgomery_reduce(&res, P, mp);
01126       continue;
01127     }
01128 
01129     /* else we add it to the window */
01130     bitbuf |= (y << (winsize - ++bitcpy));
01131     mode    = 2;
01132 
01133     if (bitcpy == winsize) {
01134       /* ok window is filled so square as required and multiply  */
01135       /* square first */
01136       for (x = 0; x < winsize; x++) {
01137         fp_sqr(&res, &res);
01138         fp_montgomery_reduce(&res, P, mp);
01139       }
01140 
01141       /* then multiply */
01142       fp_mul(&res, &M[bitbuf], &res);
01143       fp_montgomery_reduce(&res, P, mp);
01144 
01145       /* empty window and reset */
01146       bitcpy = 0;
01147       bitbuf = 0;
01148       mode   = 1;
01149     }
01150   }
01151 
01152   /* if bits remain then square/multiply */
01153   if (mode == 2 && bitcpy > 0) {
01154     /* square then multiply if the bit is set */
01155     for (x = 0; x < bitcpy; x++) {
01156       fp_sqr(&res, &res);
01157       fp_montgomery_reduce(&res, P, mp);
01158 
01159       /* get next bit of the window */
01160       bitbuf <<= 1;
01161       if ((bitbuf & (1 << winsize)) != 0) {
01162         /* then multiply */
01163         fp_mul(&res, &M[1], &res);
01164         fp_montgomery_reduce(&res, P, mp);
01165       }
01166     }
01167   }
01168 
01169   /* fixup result if Montgomery reduction is used
01170    * recall that any value in a Montgomery system is
01171    * actually multiplied by R mod n.  So we have
01172    * to reduce one more time to cancel out the factor
01173    * of R.
01174    */
01175   fp_montgomery_reduce(&res, P, mp);
01176 
01177   /* swap res with Y */
01178   fp_copy (&res, Y);
01179   return FP_OKAY;
01180 }
01181 
01182 #endif
01183 
01184 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01185 {
01186    /* prevent overflows */
01187    if (P->used > (FP_SIZE/2)) {
01188       return FP_VAL;
01189    }
01190 
01191    if (X->sign == FP_NEG) {
01192 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
01193       int    err;
01194       fp_int tmp;
01195 
01196       /* yes, copy G and invmod it */
01197       fp_copy(G, &tmp);
01198       if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
01199          return err;
01200       }
01201       X->sign = FP_ZPOS;
01202       err =  _fp_exptmod(&tmp, X, P, Y);
01203       if (X != Y) {
01204          X->sign = FP_NEG;
01205       }
01206       return err;
01207 #else
01208       return FP_VAL;
01209 #endif 
01210    }
01211    else {
01212       /* Positive exponent so just exptmod */
01213       return _fp_exptmod(G, X, P, Y);
01214    }
01215 }
01216 
01217 /* computes a = 2**b */
01218 void fp_2expt(fp_int *a, int b)
01219 {
01220    int     z;
01221 
01222    /* zero a as per default */
01223    fp_zero (a);
01224 
01225    if (b < 0) { 
01226       return;
01227    }
01228 
01229    z = b / DIGIT_BIT;
01230    if (z >= FP_SIZE) {
01231       return; 
01232    }
01233 
01234   /* set the used count of where the bit will go */
01235   a->used = z + 1;
01236 
01237   /* put the single bit in its place */
01238   a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
01239 }
01240 
01241 /* b = a*a  */
01242 void fp_sqr(fp_int *A, fp_int *B)
01243 {
01244     int y = A->used;
01245 
01246     /* call generic if we're out of range */
01247     if (y + y > FP_SIZE) {
01248        fp_sqr_comba(A, B);
01249        return ;
01250     }
01251 
01252 #if defined(TFM_SQR3)
01253         if (y <= 3) {
01254            fp_sqr_comba3(A,B);
01255            return;
01256         }
01257 #endif
01258 #if defined(TFM_SQR4)
01259         if (y == 4) {
01260            fp_sqr_comba4(A,B);
01261            return;
01262         }
01263 #endif
01264 #if defined(TFM_SQR6)
01265         if (y <= 6) {
01266            fp_sqr_comba6(A,B);
01267            return;
01268         }
01269 #endif
01270 #if defined(TFM_SQR7)
01271         if (y == 7) {
01272            fp_sqr_comba7(A,B);
01273            return;
01274         }
01275 #endif
01276 #if defined(TFM_SQR8)
01277         if (y == 8) {
01278            fp_sqr_comba8(A,B);
01279            return;
01280         }
01281 #endif
01282 #if defined(TFM_SQR9)
01283         if (y == 9) {
01284            fp_sqr_comba9(A,B);
01285            return;
01286         }
01287 #endif
01288 #if defined(TFM_SQR12)
01289         if (y <= 12) {
01290            fp_sqr_comba12(A,B);
01291            return;
01292         }
01293 #endif
01294 #if defined(TFM_SQR17)
01295         if (y <= 17) {
01296            fp_sqr_comba17(A,B);
01297            return;
01298         }
01299 #endif
01300 #if defined(TFM_SMALL_SET)
01301         if (y <= 16) {
01302            fp_sqr_comba_small(A,B);
01303            return;
01304         }
01305 #endif
01306 #if defined(TFM_SQR20)
01307         if (y <= 20) {
01308            fp_sqr_comba20(A,B);
01309            return;
01310         }
01311 #endif
01312 #if defined(TFM_SQR24)
01313         if (y <= 24) {
01314            fp_sqr_comba24(A,B);
01315            return;
01316         }
01317 #endif
01318 #if defined(TFM_SQR28)
01319         if (y <= 28) {
01320            fp_sqr_comba28(A,B);
01321            return;
01322         }
01323 #endif
01324 #if defined(TFM_SQR32)
01325         if (y <= 32) {
01326            fp_sqr_comba32(A,B);
01327            return;
01328         }
01329 #endif
01330 #if defined(TFM_SQR48)
01331         if (y <= 48) {
01332            fp_sqr_comba48(A,B);
01333            return;
01334         }
01335 #endif
01336 #if defined(TFM_SQR64)
01337         if (y <= 64) {
01338            fp_sqr_comba64(A,B);
01339            return;
01340         }
01341 #endif
01342        fp_sqr_comba(A, B);
01343 }
01344 
01345 /* generic comba squarer */
01346 void fp_sqr_comba(fp_int *A, fp_int *B)
01347 {
01348   int       pa, ix, iz;
01349   fp_digit  c0, c1, c2;
01350   fp_int    tmp, *dst;
01351 #ifdef TFM_ISO
01352   fp_word   tt;
01353 #endif    
01354 
01355   /* get size of output and trim */
01356   pa = A->used + A->used;
01357   if (pa >= FP_SIZE) {
01358      pa = FP_SIZE-1;
01359   }
01360 
01361   /* number of output digits to produce */
01362   COMBA_START;
01363   COMBA_CLEAR;
01364 
01365   if (A == B) {
01366      fp_zero(&tmp);
01367      dst = &tmp;
01368   } else {
01369      fp_zero(B);
01370      dst = B;
01371   }
01372 
01373   for (ix = 0; ix < pa; ix++) { 
01374       int      tx, ty, iy;
01375       fp_digit *tmpy, *tmpx;
01376 
01377       /* get offsets into the two bignums */
01378       ty = MIN(A->used-1, ix);
01379       tx = ix - ty;
01380 
01381       /* setup temp aliases */
01382       tmpx = A->dp + tx;
01383       tmpy = A->dp + ty;
01384 
01385       /* this is the number of times the loop will iterrate,
01386          while (tx++ < a->used && ty-- >= 0) { ... }
01387        */
01388       iy = MIN(A->used-tx, ty+1);
01389 
01390       /* now for squaring tx can never equal ty 
01391        * we halve the distance since they approach 
01392        * at a rate of 2x and we have to round because 
01393        * odd cases need to be executed
01394        */
01395       iy = MIN(iy, (ty-tx+1)>>1);
01396 
01397       /* forward carries */
01398       COMBA_FORWARD;
01399 
01400       /* execute loop */
01401       for (iz = 0; iz < iy; iz++) {
01402           SQRADD2(*tmpx++, *tmpy--);
01403       }
01404 
01405       /* even columns have the square term in them */
01406       if ((ix&1) == 0) {
01407           /* TAO change COMBA_ADD back to SQRADD */
01408           SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
01409       }
01410 
01411       /* store it */
01412       COMBA_STORE(dst->dp[ix]);
01413   }
01414 
01415   COMBA_FINI;
01416 
01417   /* setup dest */
01418   dst->used = pa;
01419   fp_clamp (dst);
01420   if (dst != B) {
01421      fp_copy(dst, B);
01422   }
01423 }
01424 
01425 int fp_cmp(fp_int *a, fp_int *b)
01426 {
01427    if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
01428       return FP_LT;
01429    } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
01430       return FP_GT;
01431    } else {
01432       /* compare digits */
01433       if (a->sign == FP_NEG) {
01434          /* if negative compare opposite direction */
01435          return fp_cmp_mag(b, a);
01436       } else {
01437          return fp_cmp_mag(a, b);
01438       }
01439    }
01440 }
01441 
01442 /* compare against a single digit */
01443 int fp_cmp_d(fp_int *a, fp_digit b)
01444 {
01445   /* compare based on sign */
01446   if ((b && a->used == 0) || a->sign == FP_NEG) {
01447     return FP_LT;
01448   }
01449 
01450   /* compare based on magnitude */
01451   if (a->used > 1) {
01452     return FP_GT;
01453   }
01454 
01455   /* compare the only digit of a to b */
01456   if (a->dp[0] > b) {
01457     return FP_GT;
01458   } else if (a->dp[0] < b) {
01459     return FP_LT;
01460   } else {
01461     return FP_EQ;
01462   }
01463 
01464 }
01465 
01466 int fp_cmp_mag(fp_int *a, fp_int *b)
01467 {
01468    int x;
01469 
01470    if (a->used > b->used) {
01471       return FP_GT;
01472    } else if (a->used < b->used) {
01473       return FP_LT;
01474    } else {
01475       for (x = a->used - 1; x >= 0; x--) {
01476           if (a->dp[x] > b->dp[x]) {
01477              return FP_GT;
01478           } else if (a->dp[x] < b->dp[x]) {
01479              return FP_LT;
01480           }
01481       }
01482    }
01483    return FP_EQ;
01484 }
01485 
01486 /* setups the montgomery reduction */
01487 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
01488 {
01489   fp_digit x, b;
01490 
01491 /* fast inversion mod 2**k
01492  *
01493  * Based on the fact that
01494  *
01495  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01496  *                    =>  2*X*A - X*X*A*A = 1
01497  *                    =>  2*(1) - (1)     = 1
01498  */
01499   b = a->dp[0];
01500 
01501   if ((b & 1) == 0) {
01502     return FP_VAL;
01503   }
01504 
01505   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01506   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01507   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01508   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01509 #ifdef FP_64BIT
01510   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
01511 #endif
01512 
01513   /* rho = -1/m mod b */
01514   *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
01515 
01516   return FP_OKAY;
01517 }
01518 
01519 /* computes a = B**n mod b without division or multiplication useful for
01520  * normalizing numbers in a Montgomery system.
01521  */
01522 void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
01523 {
01524   int     x, bits;
01525 
01526   /* how many bits of last digit does b use */
01527   bits = fp_count_bits (b) % DIGIT_BIT;
01528   if (!bits) bits = DIGIT_BIT;
01529 
01530   /* compute A = B^(n-1) * 2^(bits-1) */
01531   if (b->used > 1) {
01532      fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
01533   } else {
01534      fp_set(a, 1);
01535      bits = 1;
01536   }
01537 
01538   /* now compute C = A * B mod b */
01539   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
01540     fp_mul_2 (a, a);
01541     if (fp_cmp_mag (a, b) != FP_LT) {
01542       s_fp_sub (a, b, a);
01543     }
01544   }
01545 }
01546 
01547 
01548 #ifdef TFM_SMALL_MONT_SET
01549     #include "fp_mont_small.i"
01550 #endif
01551 
01552 /* computes x/R == x (mod N) via Montgomery Reduction */
01553 void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
01554 {
01555    fp_digit c[FP_SIZE], *_c, *tmpm, mu = 0;
01556    int      oldused, x, y, pa;
01557 
01558    /* bail if too large */
01559    if (m->used > (FP_SIZE/2)) {
01560       (void)mu;                     /* shut up compiler */
01561       return;
01562    }
01563 
01564 #ifdef TFM_SMALL_MONT_SET
01565    if (m->used <= 16) {
01566       fp_montgomery_reduce_small(a, m, mp);
01567       return;
01568    }
01569 #endif
01570 
01571 
01572    /* now zero the buff */
01573    XMEMSET(c, 0, sizeof c);
01574    pa = m->used;
01575 
01576    /* copy the input */
01577    oldused = a->used;
01578    for (x = 0; x < oldused; x++) {
01579        c[x] = a->dp[x];
01580    }
01581    MONT_START;
01582 
01583    for (x = 0; x < pa; x++) {
01584        fp_digit cy = 0;
01585        /* get Mu for this round */
01586        LOOP_START;
01587        _c   = c + x;
01588        tmpm = m->dp;
01589        y = 0;
01590        #if (defined(TFM_SSE2) || defined(TFM_X86_64))
01591         for (; y < (pa & ~7); y += 8) {
01592               INNERMUL8;
01593               _c   += 8;
01594               tmpm += 8;
01595            }
01596        #endif
01597 
01598        for (; y < pa; y++) {
01599           INNERMUL;
01600           ++_c;
01601        }
01602        LOOP_END;
01603        while (cy) {
01604            PROPCARRY;
01605            ++_c;
01606        }
01607   }         
01608 
01609   /* now copy out */
01610   _c   = c + pa;
01611   tmpm = a->dp;
01612   for (x = 0; x < pa+1; x++) {
01613      *tmpm++ = *_c++;
01614   }
01615 
01616   for (; x < oldused; x++)   {
01617      *tmpm++ = 0;
01618   }
01619 
01620   MONT_FINI;
01621 
01622   a->used = pa+1;
01623   fp_clamp(a);
01624   
01625   /* if A >= m then A = A - m */
01626   if (fp_cmp_mag (a, m) != FP_LT) {
01627     s_fp_sub (a, m, a);
01628   }
01629 }
01630 
01631 void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
01632 {
01633   /* zero the int */
01634   fp_zero (a);
01635 
01636   /* If we know the endianness of this architecture, and we're using
01637      32-bit fp_digits, we can optimize this */
01638 #if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && !defined(FP_64BIT)
01639   /* But not for both simultaneously */
01640 #if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
01641 #error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
01642 #endif
01643   {
01644      unsigned char *pd = (unsigned char *)a->dp;
01645 
01646      if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
01647         int excess = c - (FP_SIZE * sizeof(fp_digit));
01648         c -= excess;
01649         b += excess;
01650      }
01651      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
01652      /* read the bytes in */
01653 #ifdef BIG_ENDIAN_ORDER
01654      {
01655        /* Use Duff's device to unroll the loop. */
01656        int idx = (c - 1) & ~3;
01657        switch (c % 4) {
01658        case 0:  do { pd[idx+0] = *b++;
01659        case 3:       pd[idx+1] = *b++;
01660        case 2:       pd[idx+2] = *b++;
01661        case 1:       pd[idx+3] = *b++;
01662                      idx -= 4;
01663                 } while ((c -= 4) > 0);
01664        }
01665      }
01666 #else
01667      for (c -= 1; c >= 0; c -= 1) {
01668        pd[c] = *b++;
01669      }
01670 #endif
01671   }
01672 #else
01673   /* read the bytes in */
01674   for (; c > 0; c--) {
01675      fp_mul_2d (a, 8, a);
01676      a->dp[0] |= *b++;
01677      a->used += 1;
01678   }
01679 #endif
01680   fp_clamp (a);
01681 }
01682 
01683 void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
01684 {
01685   int     x;
01686   fp_int  t;
01687 
01688   fp_init_copy(&t, a);
01689 
01690   x = 0;
01691   while (fp_iszero (&t) == FP_NO) {
01692       b[x++] = (unsigned char) (t.dp[0] & 255);
01693       fp_div_2d (&t, 8, &t, NULL);
01694   }
01695   fp_reverse (b, x);
01696 }
01697 
01698 int fp_unsigned_bin_size(fp_int *a)
01699 {
01700   int     size = fp_count_bits (a);
01701   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
01702 }
01703 
01704 void fp_set(fp_int *a, fp_digit b)
01705 {
01706    fp_zero(a);
01707    a->dp[0] = b;
01708    a->used  = a->dp[0] ? 1 : 0;
01709 }
01710 
01711 int fp_count_bits (fp_int * a)
01712 {
01713   int     r;
01714   fp_digit q;
01715 
01716   /* shortcut */
01717   if (a->used == 0) {
01718     return 0;
01719   }
01720 
01721   /* get number of digits and add that */
01722   r = (a->used - 1) * DIGIT_BIT;
01723 
01724   /* take the last digit and count the bits in it */
01725   q = a->dp[a->used - 1];
01726   while (q > ((fp_digit) 0)) {
01727     ++r;
01728     q >>= ((fp_digit) 1);
01729   }
01730   return r;
01731 }
01732 
01733 void fp_lshd(fp_int *a, int x)
01734 {
01735    int y;
01736 
01737    /* move up and truncate as required */
01738    y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
01739 
01740    /* store new size */
01741    a->used = y + 1;
01742 
01743    /* move digits */
01744    for (; y >= x; y--) {
01745        a->dp[y] = a->dp[y-x];
01746    }
01747  
01748    /* zero lower digits */
01749    for (; y >= 0; y--) {
01750        a->dp[y] = 0;
01751    }
01752 
01753    /* clamp digits */
01754    fp_clamp(a);
01755 }
01756 
01757 void fp_rshd(fp_int *a, int x)
01758 {
01759   int y;
01760 
01761   /* too many digits just zero and return */
01762   if (x >= a->used) {
01763      fp_zero(a);
01764      return;
01765   }
01766 
01767    /* shift */
01768    for (y = 0; y < a->used - x; y++) {
01769       a->dp[y] = a->dp[y+x];
01770    }
01771 
01772    /* zero rest */
01773    for (; y < a->used; y++) {
01774       a->dp[y] = 0;
01775    }
01776    
01777    /* decrement count */
01778    a->used -= x;
01779    fp_clamp(a);
01780 }
01781 
01782 /* reverse an array, used for radix code */
01783 void fp_reverse (unsigned char *s, int len)
01784 {
01785   int     ix, iy;
01786   unsigned char t;
01787 
01788   ix = 0;
01789   iy = len - 1;
01790   while (ix < iy) {
01791     t     = s[ix];
01792     s[ix] = s[iy];
01793     s[iy] = t;
01794     ++ix;
01795     --iy;
01796   }
01797 }
01798 
01799 
01800 /* c = a - b */
01801 void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
01802 {
01803    fp_int tmp;
01804    fp_set(&tmp, b);
01805    fp_sub(a, &tmp, c);
01806 }
01807 
01808 
01809 /* CyaSSL callers from normal lib */
01810 
01811 /* init a new mp_int */
01812 int mp_init (mp_int * a)
01813 {
01814   if (a)
01815     fp_init(a);
01816   return MP_OKAY;
01817 }
01818 
01819 /* clear one (frees)  */
01820 void mp_clear (mp_int * a)
01821 {
01822   fp_zero(a);
01823 }
01824 
01825 /* handle up to 6 inits */
01826 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, mp_int* f)
01827 {
01828     if (a)
01829         fp_init(a);
01830     if (b)
01831         fp_init(b);
01832     if (c)
01833         fp_init(c);
01834     if (d)
01835         fp_init(d);
01836     if (e)
01837         fp_init(e);
01838     if (f)
01839         fp_init(f);
01840 
01841     return MP_OKAY;
01842 }
01843 
01844 /* high level addition (handles signs) */
01845 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01846 {
01847   fp_add(a, b, c);
01848   return MP_OKAY;
01849 }
01850 
01851 /* high level subtraction (handles signs) */
01852 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
01853 {
01854   fp_sub(a, b, c);
01855   return MP_OKAY;
01856 }
01857 
01858 /* high level multiplication (handles sign) */
01859 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
01860 {
01861   fp_mul(a, b, c);
01862   return MP_OKAY;
01863 }
01864 
01865 /* d = a * b (mod c) */
01866 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01867 {
01868   return fp_mulmod(a, b, c, d);
01869 }
01870 
01871 /* c = a mod b, 0 <= c < b */
01872 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
01873 {
01874   return fp_mod (a, b, c);
01875 }
01876 
01877 /* hac 14.61, pp608 */
01878 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
01879 {
01880   return fp_invmod(a, b, c);
01881 }
01882 
01883 /* this is a shell function that calls either the normal or Montgomery
01884  * exptmod functions.  Originally the call to the montgomery code was
01885  * embedded in the normal function but that wasted alot of stack space
01886  * for nothing (since 99% of the time the Montgomery code would be called)
01887  */
01888 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
01889 {
01890   return fp_exptmod(G, X, P, Y);
01891 }
01892 
01893 /* compare two ints (signed)*/
01894 int mp_cmp (mp_int * a, mp_int * b)
01895 {
01896   return fp_cmp(a, b);
01897 }
01898 
01899 /* compare a digit */
01900 int mp_cmp_d(mp_int * a, mp_digit b)
01901 {
01902   return fp_cmp_d(a, b);
01903 }
01904 
01905 /* get the size for an unsigned equivalent */
01906 int mp_unsigned_bin_size (mp_int * a)
01907 {
01908   return fp_unsigned_bin_size(a);
01909 }
01910 
01911 /* store in unsigned [big endian] format */
01912 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
01913 {
01914   fp_to_unsigned_bin(a,b);
01915   return MP_OKAY;
01916 }
01917 
01918 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
01919 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
01920 {
01921   fp_read_unsigned_bin(a, (unsigned char *)b, c);
01922   return MP_OKAY;
01923 }
01924 
01925 
01926 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
01927 {
01928     fp_sub_d(a, b, c);
01929     return MP_OKAY;
01930 }
01931 
01932 
01933 /* fast math conversion */
01934 int mp_copy(fp_int* a, fp_int* b)
01935 {
01936     fp_copy(a, b);
01937     return MP_OKAY;
01938 }
01939 
01940 
01941 /* fast math conversion */
01942 int mp_isodd(mp_int* a)
01943 {
01944     return fp_isodd(a);
01945 }
01946 
01947 
01948 /* fast math conversion */
01949 int mp_iszero(mp_int* a)
01950 {
01951     return fp_iszero(a);
01952 }
01953 
01954 
01955 /* fast math conversion */
01956 int mp_count_bits (mp_int* a)
01957 {
01958     return fp_count_bits(a);
01959 }
01960 
01961 
01962 /* fast math wrappers */
01963 int mp_set_int(fp_int *a, fp_digit b)
01964 {
01965     fp_set(a, b);
01966     return MP_OKAY;
01967 }
01968 
01969 
01970 #if defined(CYASSL_KEY_GEN) || defined (HAVE_ECC)
01971 
01972 /* c = a * a (mod b) */
01973 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
01974 {
01975   fp_int tmp;
01976   fp_zero(&tmp);
01977   fp_sqr(a, &tmp);
01978   return fp_mod(&tmp, b, c);
01979 }
01980 
01981 /* fast math conversion */
01982 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
01983 {
01984     return fp_sqrmod(a, b, c);
01985 }
01986 
01987 /* fast math conversion */
01988 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
01989 {
01990     fp_montgomery_calc_normalization(a, b);
01991     return MP_OKAY;
01992 }
01993 
01994 #endif /* CYASSL_KEYGEN || HAVE_ECC */
01995 
01996 
01997 #ifdef CYASSL_KEY_GEN
01998 
01999 void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
02000 void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
02001 int  fp_isprime(fp_int *a);
02002 int  fp_cnt_lsb(fp_int *a);
02003 
02004 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
02005 {
02006     fp_gcd(a, b, c);
02007     return MP_OKAY;
02008 }
02009 
02010 
02011 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
02012 {
02013     fp_lcm(a, b, c);
02014     return MP_OKAY;
02015 }
02016 
02017 
02018 int mp_prime_is_prime(mp_int* a, int t, int* result)
02019 {
02020     (void)t;
02021     *result = fp_isprime(a);
02022     return MP_OKAY;
02023 }
02024 
02025 
02026 
02027 static int s_is_power_of_two(fp_digit b, int *p)
02028 {
02029    int x;
02030 
02031    /* fast return if no power of two */
02032    if ((b==0) || (b & (b-1))) {
02033       return 0;
02034    }
02035 
02036    for (x = 0; x < DIGIT_BIT; x++) {
02037       if (b == (((fp_digit)1)<<x)) {
02038          *p = x;
02039          return 1;
02040       }
02041    }
02042    return 0;
02043 }
02044 
02045 /* a/b => cb + d == a */
02046 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
02047 {
02048   fp_int   q;
02049   fp_word  w;
02050   fp_digit t;
02051   int      ix;
02052 
02053   /* cannot divide by zero */
02054   if (b == 0) {
02055      return FP_VAL;
02056   }
02057 
02058   /* quick outs */
02059   if (b == 1 || fp_iszero(a) == 1) {
02060      if (d != NULL) {
02061         *d = 0;
02062      }
02063      if (c != NULL) {
02064         fp_copy(a, c);
02065      }
02066      return FP_OKAY;
02067   }
02068 
02069   /* power of two ? */
02070   if (s_is_power_of_two(b, &ix) == 1) {
02071      if (d != NULL) {
02072         *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
02073      }
02074      if (c != NULL) {
02075         fp_div_2d(a, ix, c, NULL);
02076      }
02077      return FP_OKAY;
02078   }
02079 
02080   /* no easy answer [c'est la vie].  Just division */
02081   fp_init(&q);
02082   
02083   q.used = a->used;
02084   q.sign = a->sign;
02085   w = 0;
02086   for (ix = a->used - 1; ix >= 0; ix--) {
02087      w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
02088      
02089      if (w >= b) {
02090         t = (fp_digit)(w / b);
02091         w -= ((fp_word)t) * ((fp_word)b);
02092       } else {
02093         t = 0;
02094       }
02095       q.dp[ix] = (fp_digit)t;
02096   }
02097   
02098   if (d != NULL) {
02099      *d = (fp_digit)w;
02100   }
02101   
02102   if (c != NULL) {
02103      fp_clamp(&q);
02104      fp_copy(&q, c);
02105   }
02106  
02107   return FP_OKAY;
02108 }
02109 
02110 
02111 /* c = a mod b, 0 <= c < b  */
02112 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02113 {
02114    return fp_div_d(a, b, NULL, c);
02115 }
02116 
02117 
02118 /* Miller-Rabin test of "a" to the base of "b" as described in 
02119  * HAC pp. 139 Algorithm 4.24
02120  *
02121  * Sets result to 0 if definitely composite or 1 if probably prime.
02122  * Randomly the chance of error is no more than 1/4 and often 
02123  * very much lower.
02124  */
02125 static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
02126 {
02127   fp_int  n1, y, r;
02128   int     s, j;
02129 
02130   /* default */
02131   *result = FP_NO;
02132 
02133   /* ensure b > 1 */
02134   if (fp_cmp_d(b, 1) != FP_GT) {
02135      return;
02136   }     
02137 
02138   /* get n1 = a - 1 */
02139   fp_init_copy(&n1, a);
02140   fp_sub_d(&n1, 1, &n1);
02141 
02142   /* set 2**s * r = n1 */
02143   fp_init_copy(&r, &n1);
02144 
02145   /* count the number of least significant bits
02146    * which are zero
02147    */
02148   s = fp_cnt_lsb(&r);
02149 
02150   /* now divide n - 1 by 2**s */
02151   fp_div_2d (&r, s, &r, NULL);
02152 
02153   /* compute y = b**r mod a */
02154   fp_init(&y);
02155   fp_exptmod(b, &r, a, &y);
02156 
02157   /* if y != 1 and y != n1 do */
02158   if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
02159     j = 1;
02160     /* while j <= s-1 and y != n1 */
02161     while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
02162       fp_sqrmod (&y, a, &y);
02163 
02164       /* if y == 1 then composite */
02165       if (fp_cmp_d (&y, 1) == FP_EQ) {
02166          return;
02167       }
02168       ++j;
02169     }
02170 
02171     /* if y != n1 then composite */
02172     if (fp_cmp (&y, &n1) != FP_EQ) {
02173        return;
02174     }
02175   }
02176 
02177   /* probably prime now */
02178   *result = FP_YES;
02179 }
02180 
02181 
02182 /* a few primes */
02183 static const fp_digit primes[256] = {
02184   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
02185   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
02186   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
02187   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
02188   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
02189   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
02190   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
02191   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
02192 
02193   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
02194   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
02195   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
02196   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
02197   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
02198   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
02199   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
02200   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
02201 
02202   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
02203   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
02204   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
02205   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
02206   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
02207   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
02208   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
02209   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
02210 
02211   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
02212   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
02213   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
02214   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
02215   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
02216   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
02217   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
02218   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
02219 };
02220 
02221 int fp_isprime(fp_int *a)
02222 {
02223    fp_int   b;
02224    fp_digit d = 0;
02225    int      r, res;
02226 
02227    /* do trial division */
02228    for (r = 0; r < 256; r++) {
02229        fp_mod_d(a, primes[r], &d);
02230        if (d == 0) {
02231           return FP_NO;
02232        }
02233    }
02234 
02235    /* now do 8 miller rabins */
02236    fp_init(&b);
02237    for (r = 0; r < 8; r++) {
02238        fp_set(&b, primes[r]);
02239        fp_prime_miller_rabin(a, &b, &res);
02240        if (res == FP_NO) {
02241           return FP_NO;
02242        }
02243    }
02244    return FP_YES;
02245 }
02246 
02247 
02248 /* c = [a, b] */
02249 void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
02250 {
02251    fp_int  t1, t2;
02252 
02253    fp_init(&t1);
02254    fp_init(&t2);
02255    fp_gcd(a, b, &t1);
02256    if (fp_cmp_mag(a, b) == FP_GT) {
02257       fp_div(a, &t1, &t2, NULL);
02258       fp_mul(b, &t2, c);
02259    } else {
02260       fp_div(b, &t1, &t2, NULL);
02261       fp_mul(a, &t2, c);
02262    }   
02263 }
02264 
02265 
02266 static const int lnz[16] = {
02267    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
02268 };
02269 
02270 /* Counts the number of lsbs which are zero before the first zero bit */
02271 int fp_cnt_lsb(fp_int *a)
02272 {
02273    int x;
02274    fp_digit q, qq;
02275 
02276    /* easy out */
02277    if (fp_iszero(a) == 1) {
02278       return 0;
02279    }
02280 
02281    /* scan lower digits until non-zero */
02282    for (x = 0; x < a->used && a->dp[x] == 0; x++);
02283    q = a->dp[x];
02284    x *= DIGIT_BIT;
02285 
02286    /* now scan this digit until a 1 is found */
02287    if ((q & 1) == 0) {
02288       do {
02289          qq  = q & 15;
02290          x  += lnz[qq];
02291          q >>= 4;
02292       } while (qq == 0);
02293    }
02294    return x;
02295 }
02296 
02297 
02298 /* c = (a, b) */
02299 void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
02300 {
02301    fp_int u, v, r;
02302 
02303    /* either zero than gcd is the largest */
02304    if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
02305      fp_abs (b, c);
02306      return;
02307    }
02308    if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
02309      fp_abs (a, c);
02310      return;
02311    }
02312 
02313    /* optimized.  At this point if a == 0 then
02314     * b must equal zero too
02315     */
02316    if (fp_iszero (a) == 1) {
02317      fp_zero(c);
02318      return;
02319    }
02320 
02321    /* sort inputs */
02322    if (fp_cmp_mag(a, b) != FP_LT) {
02323       fp_init_copy(&u, a);
02324       fp_init_copy(&v, b);
02325    } else {
02326       fp_init_copy(&u, b);
02327       fp_init_copy(&v, a);
02328    }
02329  
02330    fp_zero(&r);
02331    while (fp_iszero(&v) == FP_NO) {
02332       fp_mod(&u, &v, &r);
02333       fp_copy(&v, &u);
02334       fp_copy(&r, &v);
02335    }
02336    fp_copy(&u, c);
02337 }
02338 
02339 #endif /* CYASSL_KEY_GEN */
02340 
02341 
02342 #if defined(HAVE_ECC) || !defined(NO_PWDBASED)
02343 /* c = a + b */
02344 void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
02345 {
02346    fp_int tmp;
02347    fp_set(&tmp, b);
02348    fp_add(a,&tmp,c);
02349 }
02350 
02351 /* external compatibility */
02352 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
02353 {
02354     fp_add_d(a, b, c);
02355     return MP_OKAY;
02356 }
02357 
02358 #endif  /* HAVE_ECC || !NO_PWDBASED */
02359 
02360 
02361 #ifdef HAVE_ECC
02362 
02363 /* chars used in radix conversions */
02364 const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
02365 
02366 static int fp_read_radix(fp_int *a, const char *str, int radix)
02367 {
02368   int     y, neg;
02369   char    ch;
02370 
02371   /* make sure the radix is ok */
02372   if (radix < 2 || radix > 64) {
02373     return FP_VAL;
02374   }
02375 
02376   /* if the leading digit is a
02377    * minus set the sign to negative.
02378    */
02379   if (*str == '-') {
02380     ++str;
02381     neg = FP_NEG;
02382   } else {
02383     neg = FP_ZPOS;
02384   }
02385 
02386   /* set the integer to the default of zero */
02387   fp_zero (a);
02388 
02389   /* process each digit of the string */
02390   while (*str) {
02391     /* if the radix < 36 the conversion is case insensitive
02392      * this allows numbers like 1AB and 1ab to represent the same  value
02393      * [e.g. in hex]
02394      */
02395     ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
02396     for (y = 0; y < 64; y++) {
02397       if (ch == fp_s_rmap[y]) {
02398          break;
02399       }
02400     }
02401 
02402     /* if the char was found in the map
02403      * and is less than the given radix add it
02404      * to the number, otherwise exit the loop.
02405      */
02406     if (y < radix) {
02407       fp_mul_d (a, (fp_digit) radix, a);
02408       fp_add_d (a, (fp_digit) y, a);
02409     } else {
02410       break;
02411     }
02412     ++str;
02413   }
02414 
02415   /* set the sign only if a != 0 */
02416   if (fp_iszero(a) != FP_YES) {
02417      a->sign = neg;
02418   }
02419   return FP_OKAY;
02420 }
02421 
02422 /* fast math conversion */
02423 int mp_read_radix(mp_int *a, const char *str, int radix)
02424 {
02425     return fp_read_radix(a, str, radix);
02426 }
02427 
02428 /* fast math conversion */
02429 int mp_set(fp_int *a, fp_digit b)
02430 {
02431     fp_set(a,b);
02432     return MP_OKAY;
02433 }
02434 
02435 /* fast math conversion */
02436 int mp_sqr(fp_int *A, fp_int *B)
02437 {
02438     fp_sqr(A, B);
02439     return MP_OKAY;
02440 }
02441   
02442 /* fast math conversion */
02443 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
02444 {
02445     fp_montgomery_reduce(a, m, mp);
02446     return MP_OKAY;
02447 }
02448 
02449 
02450 /* fast math conversion */
02451 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
02452 {
02453     return fp_montgomery_setup(a, rho);
02454 }
02455 
02456 int mp_div_2(fp_int * a, fp_int * b)
02457 {
02458     fp_div_2(a, b);
02459     return MP_OKAY;
02460 }
02461 
02462 
02463 int mp_init_copy(fp_int * a, fp_int * b)
02464 {
02465     fp_init_copy(a, b);
02466     return MP_OKAY;
02467 }
02468 
02469 
02470 
02471 #endif /* HAVE_ECC */
02472 
02473 #endif /* USE_FAST_MATH */