wolf SSL / CyaSSL-2.9.4

Dependents:  

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers tfm.c Source File

tfm.c

00001 /* tfm.c
00002  *
00003  * Copyright (C) 2006-2013 wolfSSL Inc.
00004  *
00005  * This file is part of CyaSSL.
00006  *
00007  * CyaSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * CyaSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
00020  */
00021 
00022 
00023 /*
00024  * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
00025  * http://math.libtomcrypt.com
00026  */
00027 
00028 /**
00029  *  Edited by Moisés Guimarães (moisesguimaraesm@gmail.com)
00030  *  to fit CyaSSL's needs.
00031  */
00032 
00033 #ifdef HAVE_CONFIG_H
00034     #include <config.h>
00035 #endif
00036 
00037 /* in case user set USE_FAST_MATH there */
00038 #include <cyassl/ctaocrypt/settings.h>
00039 
00040 #ifdef USE_FAST_MATH
00041 
00042 #include <cyassl/ctaocrypt/tfm.h>
00043 #include <ctaocrypt/src/asm.c>  /* will define asm MACROS or C ones */
00044 
00045 
00046 /* math settings check */
00047 word32 CheckRunTimeSettings(void)
00048 {
00049     return CTC_SETTINGS;
00050 }
00051 
00052 
00053 /* math settings size check */
00054 word32 CheckRunTimeFastMath(void)
00055 {
00056     return FP_SIZE;
00057 }
00058 
00059 
00060 /* Functions */
00061 
00062 void fp_add(fp_int *a, fp_int *b, fp_int *c)
00063 {
00064   int     sa, sb;
00065 
00066   /* get sign of both inputs */
00067   sa = a->sign;
00068   sb = b->sign;
00069 
00070   /* handle two cases, not four */
00071   if (sa == sb) {
00072     /* both positive or both negative */
00073     /* add their magnitudes, copy the sign */
00074     c->sign = sa;
00075     s_fp_add (a, b, c);
00076   } else {
00077     /* one positive, the other negative */
00078     /* subtract the one with the greater magnitude from */
00079     /* the one of the lesser magnitude.  The result gets */
00080     /* the sign of the one with the greater magnitude. */
00081     if (fp_cmp_mag (a, b) == FP_LT) {
00082       c->sign = sb;
00083       s_fp_sub (b, a, c);
00084     } else {
00085       c->sign = sa;
00086       s_fp_sub (a, b, c);
00087     }
00088   }
00089 }
00090 
00091 /* unsigned addition */
00092 void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
00093 {
00094   int      x, y, oldused;
00095   register fp_word  t;
00096 
00097   y       = MAX(a->used, b->used);
00098   oldused = c->used;
00099   c->used = y;
00100  
00101   t = 0;
00102   for (x = 0; x < y; x++) {
00103       t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
00104       c->dp[x]   = (fp_digit)t;
00105       t        >>= DIGIT_BIT;
00106   }
00107   if (t != 0 && x < FP_SIZE) {
00108      c->dp[c->used++] = (fp_digit)t;
00109      ++x;
00110   }
00111 
00112   c->used = x;
00113   for (; x < oldused; x++) {
00114      c->dp[x] = 0;
00115   }
00116   fp_clamp(c);
00117 }
00118 
00119 /* c = a - b */
00120 void fp_sub(fp_int *a, fp_int *b, fp_int *c)
00121 {
00122   int     sa, sb;
00123 
00124   sa = a->sign;
00125   sb = b->sign;
00126 
00127   if (sa != sb) {
00128     /* subtract a negative from a positive, OR */
00129     /* subtract a positive from a negative. */
00130     /* In either case, ADD their magnitudes, */
00131     /* and use the sign of the first number. */
00132     c->sign = sa;
00133     s_fp_add (a, b, c);
00134   } else {
00135     /* subtract a positive from a positive, OR */
00136     /* subtract a negative from a negative. */
00137     /* First, take the difference between their */
00138     /* magnitudes, then... */
00139     if (fp_cmp_mag (a, b) != FP_LT) {
00140       /* Copy the sign from the first */
00141       c->sign = sa;
00142       /* The first has a larger or equal magnitude */
00143       s_fp_sub (a, b, c);
00144     } else {
00145       /* The result has the *opposite* sign from */
00146       /* the first number. */
00147       c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
00148       /* The second has a larger magnitude */
00149       s_fp_sub (b, a, c);
00150     }
00151   }
00152 }
00153 
00154 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
00155 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
00156 {
00157   int      x, oldbused, oldused;
00158   fp_word  t;
00159 
00160   oldused  = c->used;
00161   oldbused = b->used;
00162   c->used  = a->used;
00163   t       = 0;
00164   for (x = 0; x < oldbused; x++) {
00165      t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
00166      c->dp[x]  = (fp_digit)t;
00167      t         = (t >> DIGIT_BIT)&1;
00168   }
00169   for (; x < a->used; x++) {
00170      t         = ((fp_word)a->dp[x]) - t;
00171      c->dp[x]  = (fp_digit)t;
00172      t         = (t >> DIGIT_BIT)&1;
00173    }
00174   for (; x < oldused; x++) {
00175      c->dp[x] = 0;
00176   }
00177   fp_clamp(c);
00178 }
00179 
00180 /* c = a * b */
00181 void fp_mul(fp_int *A, fp_int *B, fp_int *C)
00182 {
00183     int   y, yy;
00184 
00185     y  = MAX(A->used, B->used);
00186     yy = MIN(A->used, B->used);
00187 
00188     /* call generic if we're out of range */
00189     if (y + yy > FP_SIZE) {
00190        fp_mul_comba(A, B, C);
00191        return ;
00192     }
00193 
00194     /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
00195        of the largest input.  We also want to avoid doing excess mults if the 
00196        inputs are not close to the next power of two.  That is, for example,
00197        if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications 
00198     */
00199 
00200 #ifdef TFM_MUL3
00201         if (y <= 3) {
00202            fp_mul_comba3(A,B,C);
00203            return;
00204         }
00205 #endif
00206 #ifdef TFM_MUL4
00207         if (y == 4) {
00208            fp_mul_comba4(A,B,C);
00209            return;
00210         }
00211 #endif
00212 #ifdef TFM_MUL6
00213         if (y <= 6) {
00214            fp_mul_comba6(A,B,C);
00215            return;
00216         }
00217 #endif
00218 #ifdef TFM_MUL7
00219         if (y == 7) {
00220            fp_mul_comba7(A,B,C);
00221            return;
00222         }
00223 #endif
00224 #ifdef TFM_MUL8
00225         if (y == 8) {
00226            fp_mul_comba8(A,B,C);
00227            return;
00228         }
00229 #endif
00230 #ifdef TFM_MUL9
00231         if (y == 9) {
00232            fp_mul_comba9(A,B,C);
00233            return;
00234         }
00235 #endif
00236 #ifdef TFM_MUL12
00237         if (y <= 12) {
00238            fp_mul_comba12(A,B,C);
00239            return;
00240         }
00241 #endif
00242 #ifdef TFM_MUL17
00243         if (y <= 17) {
00244            fp_mul_comba17(A,B,C);
00245            return;
00246         }
00247 #endif
00248 
00249 #ifdef TFM_SMALL_SET
00250         if (y <= 16) {
00251            fp_mul_comba_small(A,B,C);
00252            return;
00253         }
00254 #endif        
00255 #if defined(TFM_MUL20)
00256         if (y <= 20) {
00257            fp_mul_comba20(A,B,C);
00258            return;
00259         }
00260 #endif
00261 #if defined(TFM_MUL24)
00262         if (yy >= 16 && y <= 24) {
00263            fp_mul_comba24(A,B,C);
00264            return;
00265         }
00266 #endif
00267 #if defined(TFM_MUL28)
00268         if (yy >= 20 && y <= 28) {
00269            fp_mul_comba28(A,B,C);
00270            return;
00271         }
00272 #endif
00273 #if defined(TFM_MUL32)
00274         if (yy >= 24 && y <= 32) {
00275            fp_mul_comba32(A,B,C);
00276            return;
00277         }
00278 #endif
00279 #if defined(TFM_MUL48)
00280         if (yy >= 40 && y <= 48) {
00281            fp_mul_comba48(A,B,C);
00282            return;
00283         }
00284 #endif        
00285 #if defined(TFM_MUL64)
00286         if (yy >= 56 && y <= 64) {
00287            fp_mul_comba64(A,B,C);
00288            return;
00289         }
00290 #endif
00291         fp_mul_comba(A,B,C);
00292 }
00293 
00294 void fp_mul_2(fp_int * a, fp_int * b)
00295 {
00296   int     x, oldused;
00297    
00298   oldused = b->used;
00299   b->used = a->used;
00300 
00301   {
00302     register fp_digit r, rr, *tmpa, *tmpb;
00303 
00304     /* alias for source */
00305     tmpa = a->dp;
00306     
00307     /* alias for dest */
00308     tmpb = b->dp;
00309 
00310     /* carry */
00311     r = 0;
00312     for (x = 0; x < a->used; x++) {
00313     
00314       /* get what will be the *next* carry bit from the 
00315        * MSB of the current digit 
00316        */
00317       rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
00318       
00319       /* now shift up this digit, add in the carry [from the previous] */
00320       *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
00321       
00322       /* copy the carry that would be from the source 
00323        * digit into the next iteration 
00324        */
00325       r = rr;
00326     }
00327 
00328     /* new leading digit? */
00329     if (r != 0 && b->used != (FP_SIZE-1)) {
00330       /* add a MSB which is always 1 at this point */
00331       *tmpb = 1;
00332       ++(b->used);
00333     }
00334 
00335     /* now zero any excess digits on the destination 
00336      * that we didn't write to 
00337      */
00338     tmpb = b->dp + b->used;
00339     for (x = b->used; x < oldused; x++) {
00340       *tmpb++ = 0;
00341     }
00342   }
00343   b->sign = a->sign;
00344 }
00345 
00346 /* c = a * b */
00347 void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
00348 {
00349    fp_word  w;
00350    int      x, oldused;
00351 
00352    oldused = c->used;
00353    c->used = a->used;
00354    c->sign = a->sign;
00355    w       = 0;
00356    for (x = 0; x < a->used; x++) {
00357        w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
00358        c->dp[x]  = (fp_digit)w;
00359        w         = w >> DIGIT_BIT;
00360    }
00361    if (w != 0 && (a->used != FP_SIZE)) {
00362       c->dp[c->used++] = (fp_digit) w;
00363       ++x;
00364    }
00365    for (; x < oldused; x++) {
00366       c->dp[x] = 0;
00367    }
00368    fp_clamp(c);
00369 }
00370 
00371 /* c = a * 2**d */
00372 void fp_mul_2d(fp_int *a, int b, fp_int *c)
00373 {
00374    fp_digit carry, carrytmp, shift;
00375    int x;
00376 
00377    /* copy it */
00378    fp_copy(a, c);
00379 
00380    /* handle whole digits */
00381    if (b >= DIGIT_BIT) {
00382       fp_lshd(c, b/DIGIT_BIT);
00383    }
00384    b %= DIGIT_BIT;
00385 
00386    /* shift the digits */
00387    if (b != 0) {
00388       carry = 0;   
00389       shift = DIGIT_BIT - b;
00390       for (x = 0; x < c->used; x++) {
00391           carrytmp = c->dp[x] >> shift;
00392           c->dp[x] = (c->dp[x] << b) + carry;
00393           carry = carrytmp;
00394       }
00395       /* store last carry if room */
00396       if (carry && x < FP_SIZE) {
00397          c->dp[c->used++] = carry;
00398       }
00399    }
00400    fp_clamp(c);
00401 }
00402 
00403 /* generic PxQ multiplier */
00404 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
00405 {
00406    int       ix, iy, iz, tx, ty, pa;
00407    fp_digit  c0, c1, c2, *tmpx, *tmpy;
00408    fp_int    tmp, *dst;
00409 
00410    COMBA_START;
00411    COMBA_CLEAR;
00412    
00413    /* get size of output and trim */
00414    pa = A->used + B->used;
00415    if (pa >= FP_SIZE) {
00416       pa = FP_SIZE-1;
00417    }
00418 
00419    if (A == C || B == C) {
00420       fp_zero(&tmp);
00421       dst = &tmp;
00422    } else {
00423       fp_zero(C);
00424       dst = C;
00425    }
00426 
00427    for (ix = 0; ix < pa; ix++) {
00428       /* get offsets into the two bignums */
00429       ty = MIN(ix, B->used-1);
00430       tx = ix - ty;
00431 
00432       /* setup temp aliases */
00433       tmpx = A->dp + tx;
00434       tmpy = B->dp + ty;
00435 
00436       /* this is the number of times the loop will iterrate, essentially its 
00437          while (tx++ < a->used && ty-- >= 0) { ... }
00438        */
00439       iy = MIN(A->used-tx, ty+1);
00440 
00441       /* execute loop */
00442       COMBA_FORWARD;
00443       for (iz = 0; iz < iy; ++iz) {
00444           /* TAO change COMBA_ADD back to MULADD */
00445           MULADD(*tmpx++, *tmpy--);
00446       }
00447 
00448       /* store term */
00449       COMBA_STORE(dst->dp[ix]);
00450   }
00451   COMBA_FINI;
00452 
00453   dst->used = pa;
00454   dst->sign = A->sign ^ B->sign;
00455   fp_clamp(dst);
00456   fp_copy(dst, C);
00457 }
00458 
00459 /* a/b => cb + d == a */
00460 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00461 {
00462   fp_int  q, x, y, t1, t2;
00463   int     n, t, i, norm, neg;
00464 
00465   /* is divisor zero ? */
00466   if (fp_iszero (b) == 1) {
00467     return FP_VAL;
00468   }
00469 
00470   /* if a < b then q=0, r = a */
00471   if (fp_cmp_mag (a, b) == FP_LT) {
00472     if (d != NULL) {
00473       fp_copy (a, d);
00474     } 
00475     if (c != NULL) {
00476       fp_zero (c);
00477     }
00478     return FP_OKAY;
00479   }
00480 
00481   fp_init(&q);
00482   q.used = a->used + 2;
00483 
00484   fp_init(&t1);
00485   fp_init(&t2);
00486   fp_init_copy(&x, a);
00487   fp_init_copy(&y, b);
00488 
00489   /* fix the sign */
00490   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
00491   x.sign = y.sign = FP_ZPOS;
00492 
00493   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
00494   norm = fp_count_bits(&y) % DIGIT_BIT;
00495   if (norm < (int)(DIGIT_BIT-1)) {
00496      norm = (DIGIT_BIT-1) - norm;
00497      fp_mul_2d (&x, norm, &x);
00498      fp_mul_2d (&y, norm, &y);
00499   } else {
00500      norm = 0;
00501   }
00502 
00503   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
00504   n = x.used - 1;
00505   t = y.used - 1;
00506 
00507   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
00508   fp_lshd (&y, n - t);                                             /* y = y*b**{n-t} */
00509 
00510   while (fp_cmp (&x, &y) != FP_LT) {
00511     ++(q.dp[n - t]);
00512     fp_sub (&x, &y, &x);
00513   }
00514 
00515   /* reset y by shifting it back down */
00516   fp_rshd (&y, n - t);
00517 
00518   /* step 3. for i from n down to (t + 1) */
00519   for (i = n; i >= (t + 1); i--) {
00520     if (i > x.used) {
00521       continue;
00522     }
00523 
00524     /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
00525      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
00526     if (x.dp[i] == y.dp[t]) {
00527       q.dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
00528     } else {
00529       fp_word tmp;
00530       tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
00531       tmp |= ((fp_word) x.dp[i - 1]);
00532       tmp /= ((fp_word)y.dp[t]);
00533       q.dp[i - t - 1] = (fp_digit) (tmp);
00534     }
00535 
00536     /* while (q{i-t-1} * (yt * b + y{t-1})) > 
00537              xi * b**2 + xi-1 * b + xi-2 
00538      
00539        do q{i-t-1} -= 1; 
00540     */
00541     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
00542     do {
00543       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
00544 
00545       /* find left hand */
00546       fp_zero (&t1);
00547       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
00548       t1.dp[1] = y.dp[t];
00549       t1.used = 2;
00550       fp_mul_d (&t1, q.dp[i - t - 1], &t1);
00551 
00552       /* find right hand */
00553       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
00554       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
00555       t2.dp[2] = x.dp[i];
00556       t2.used = 3;
00557     } while (fp_cmp_mag(&t1, &t2) == FP_GT);
00558 
00559     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
00560     fp_mul_d (&y, q.dp[i - t - 1], &t1);
00561     fp_lshd  (&t1, i - t - 1);
00562     fp_sub   (&x, &t1, &x);
00563 
00564     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
00565     if (x.sign == FP_NEG) {
00566       fp_copy (&y, &t1);
00567       fp_lshd (&t1, i - t - 1);
00568       fp_add (&x, &t1, &x);
00569       q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
00570     }
00571   }
00572 
00573   /* now q is the quotient and x is the remainder 
00574    * [which we have to normalize] 
00575    */
00576   
00577   /* get sign before writing to c */
00578   x.sign = x.used == 0 ? FP_ZPOS : a->sign;
00579 
00580   if (c != NULL) {
00581     fp_clamp (&q);
00582     fp_copy (&q, c);
00583     c->sign = neg;
00584   }
00585 
00586   if (d != NULL) {
00587     fp_div_2d (&x, norm, &x, NULL);
00588 
00589 /* the following is a kludge, essentially we were seeing the right remainder but 
00590    with excess digits that should have been zero
00591  */
00592     for (i = b->used; i < x.used; i++) {
00593         x.dp[i] = 0;
00594     }
00595     fp_clamp(&x);
00596     fp_copy (&x, d);
00597   }
00598 
00599   return FP_OKAY;
00600 }
00601 
00602 /* b = a/2 */
00603 void fp_div_2(fp_int * a, fp_int * b)
00604 {
00605   int     x, oldused;
00606 
00607   oldused = b->used;
00608   b->used = a->used;
00609   {
00610     register fp_digit r, rr, *tmpa, *tmpb;
00611 
00612     /* source alias */
00613     tmpa = a->dp + b->used - 1;
00614 
00615     /* dest alias */
00616     tmpb = b->dp + b->used - 1;
00617 
00618     /* carry */
00619     r = 0;
00620     for (x = b->used - 1; x >= 0; x--) {
00621       /* get the carry for the next iteration */
00622       rr = *tmpa & 1;
00623 
00624       /* shift the current digit, add in carry and store */
00625       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
00626 
00627       /* forward carry to next iteration */
00628       r = rr;
00629     }
00630 
00631     /* zero excess digits */
00632     tmpb = b->dp + b->used;
00633     for (x = b->used; x < oldused; x++) {
00634       *tmpb++ = 0;
00635     }
00636   }
00637   b->sign = a->sign;
00638   fp_clamp (b);
00639 }
00640 
00641 /* c = a / 2**b */
00642 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
00643 {
00644   int      D;
00645   fp_int   t;
00646 
00647   /* if the shift count is <= 0 then we do no work */
00648   if (b <= 0) {
00649     fp_copy (a, c);
00650     if (d != NULL) {
00651       fp_zero (d);
00652     }
00653     return;
00654   }
00655 
00656   fp_init(&t);
00657 
00658   /* get the remainder */
00659   if (d != NULL) {
00660     fp_mod_2d (a, b, &t);
00661   }
00662 
00663   /* copy */
00664   fp_copy(a, c);
00665 
00666   /* shift by as many digits in the bit count */
00667   if (b >= (int)DIGIT_BIT) {
00668     fp_rshd (c, b / DIGIT_BIT);
00669   }
00670 
00671   /* shift any bit count < DIGIT_BIT */
00672   D = (b % DIGIT_BIT);
00673   if (D != 0) {
00674     fp_rshb(c, D);
00675   }
00676   fp_clamp (c);
00677   if (d != NULL) {
00678     fp_copy (&t, d);
00679   }
00680 }
00681 
00682 /* c = a mod b, 0 <= c < b  */
00683 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
00684 {
00685    fp_int t;
00686    int    err;
00687 
00688    fp_zero(&t);
00689    if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
00690       return err;
00691    }
00692    if (t.sign != b->sign) {
00693       fp_add(&t, b, c);
00694    } else {
00695       fp_copy(&t, c);
00696   }
00697   return FP_OKAY;
00698 }
00699 
00700 /* c = a mod 2**d */
00701 void fp_mod_2d(fp_int *a, int b, fp_int *c)
00702 {
00703    int x;
00704 
00705    /* zero if count less than or equal to zero */
00706    if (b <= 0) {
00707       fp_zero(c);
00708       return;
00709    }
00710 
00711    /* get copy of input */
00712    fp_copy(a, c);
00713  
00714    /* if 2**d is larger than we just return */
00715    if (b >= (DIGIT_BIT * a->used)) {
00716       return;
00717    }
00718 
00719   /* zero digits above the last digit of the modulus */
00720   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00721     c->dp[x] = 0;
00722   }
00723   /* clear the digit that is not completely outside/inside the modulus */
00724   c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
00725   fp_clamp (c);
00726 }
00727 
00728 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
00729 {
00730   fp_int  x, y, u, v, A, B, C, D;
00731   int     res;
00732 
00733   /* b cannot be negative */
00734   if (b->sign == FP_NEG || fp_iszero(b) == 1) {
00735     return FP_VAL;
00736   }
00737 
00738   /* init temps */
00739   fp_init(&x);    fp_init(&y);
00740   fp_init(&u);    fp_init(&v);
00741   fp_init(&A);    fp_init(&B);
00742   fp_init(&C);    fp_init(&D);
00743 
00744   /* x = a, y = b */
00745   if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
00746       return res;
00747   }
00748   fp_copy(b, &y);
00749 
00750   /* 2. [modified] if x,y are both even then return an error! */
00751   if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
00752     return FP_VAL;
00753   }
00754 
00755   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00756   fp_copy (&x, &u);
00757   fp_copy (&y, &v);
00758   fp_set (&A, 1);
00759   fp_set (&D, 1);
00760 
00761 top:
00762   /* 4.  while u is even do */
00763   while (fp_iseven (&u) == 1) {
00764     /* 4.1 u = u/2 */
00765     fp_div_2 (&u, &u);
00766 
00767     /* 4.2 if A or B is odd then */
00768     if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
00769       /* A = (A+y)/2, B = (B-x)/2 */
00770       fp_add (&A, &y, &A);
00771       fp_sub (&B, &x, &B);
00772     }
00773     /* A = A/2, B = B/2 */
00774     fp_div_2 (&A, &A);
00775     fp_div_2 (&B, &B);
00776   }
00777 
00778   /* 5.  while v is even do */
00779   while (fp_iseven (&v) == 1) {
00780     /* 5.1 v = v/2 */
00781     fp_div_2 (&v, &v);
00782 
00783     /* 5.2 if C or D is odd then */
00784     if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
00785       /* C = (C+y)/2, D = (D-x)/2 */
00786       fp_add (&C, &y, &C);
00787       fp_sub (&D, &x, &D);
00788     }
00789     /* C = C/2, D = D/2 */
00790     fp_div_2 (&C, &C);
00791     fp_div_2 (&D, &D);
00792   }
00793 
00794   /* 6.  if u >= v then */
00795   if (fp_cmp (&u, &v) != FP_LT) {
00796     /* u = u - v, A = A - C, B = B - D */
00797     fp_sub (&u, &v, &u);
00798     fp_sub (&A, &C, &A);
00799     fp_sub (&B, &D, &B);
00800   } else {
00801     /* v - v - u, C = C - A, D = D - B */
00802     fp_sub (&v, &u, &v);
00803     fp_sub (&C, &A, &C);
00804     fp_sub (&D, &B, &D);
00805   }
00806 
00807   /* if not zero goto step 4 */
00808   if (fp_iszero (&u) == 0)
00809     goto top;
00810 
00811   /* now a = C, b = D, gcd == g*v */
00812 
00813   /* if v != 1 then there is no inverse */
00814   if (fp_cmp_d (&v, 1) != FP_EQ) {
00815     return FP_VAL;
00816   }
00817 
00818   /* if its too low */
00819   while (fp_cmp_d(&C, 0) == FP_LT) {
00820       fp_add(&C, b, &C);
00821   }
00822   
00823   /* too big */
00824   while (fp_cmp_mag(&C, b) != FP_LT) {
00825       fp_sub(&C, b, &C);
00826   }
00827   
00828   /* C is now the inverse */
00829   fp_copy(&C, c);
00830   return FP_OKAY;
00831 }
00832 
00833 /* c = 1/a (mod b) for odd b only */
00834 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
00835 {
00836   fp_int  x, y, u, v, B, D;
00837   int     neg;
00838 
00839   /* 2. [modified] b must be odd   */
00840   if (fp_iseven (b) == FP_YES) {
00841     return fp_invmod_slow(a,b,c);
00842   }
00843 
00844   /* init all our temps */
00845   fp_init(&x);  fp_init(&y);
00846   fp_init(&u);  fp_init(&v);
00847   fp_init(&B);  fp_init(&D);
00848 
00849   /* x == modulus, y == value to invert */
00850   fp_copy(b, &x);
00851 
00852   /* we need y = |a| */
00853   fp_abs(a, &y);
00854 
00855   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00856   fp_copy(&x, &u);
00857   fp_copy(&y, &v);
00858   fp_set (&D, 1);
00859 
00860 top:
00861   /* 4.  while u is even do */
00862   while (fp_iseven (&u) == FP_YES) {
00863     /* 4.1 u = u/2 */
00864     fp_div_2 (&u, &u);
00865 
00866     /* 4.2 if B is odd then */
00867     if (fp_isodd (&B) == FP_YES) {
00868       fp_sub (&B, &x, &B);
00869     }
00870     /* B = B/2 */
00871     fp_div_2 (&B, &B);
00872   }
00873 
00874   /* 5.  while v is even do */
00875   while (fp_iseven (&v) == FP_YES) {
00876     /* 5.1 v = v/2 */
00877     fp_div_2 (&v, &v);
00878 
00879     /* 5.2 if D is odd then */
00880     if (fp_isodd (&D) == FP_YES) {
00881       /* D = (D-x)/2 */
00882       fp_sub (&D, &x, &D);
00883     }
00884     /* D = D/2 */
00885     fp_div_2 (&D, &D);
00886   }
00887 
00888   /* 6.  if u >= v then */
00889   if (fp_cmp (&u, &v) != FP_LT) {
00890     /* u = u - v, B = B - D */
00891     fp_sub (&u, &v, &u);
00892     fp_sub (&B, &D, &B);
00893   } else {
00894     /* v - v - u, D = D - B */
00895     fp_sub (&v, &u, &v);
00896     fp_sub (&D, &B, &D);
00897   }
00898 
00899   /* if not zero goto step 4 */
00900   if (fp_iszero (&u) == FP_NO) {
00901     goto top;
00902   }
00903 
00904   /* now a = C, b = D, gcd == g*v */
00905 
00906   /* if v != 1 then there is no inverse */
00907   if (fp_cmp_d (&v, 1) != FP_EQ) {
00908     return FP_VAL;
00909   }
00910 
00911   /* b is now the inverse */
00912   neg = a->sign;
00913   while (D.sign == FP_NEG) {
00914     fp_add (&D, b, &D);
00915   }
00916   fp_copy (&D, c);
00917   c->sign = neg;
00918   return FP_OKAY;
00919 }
00920 
00921 /* d = a * b (mod c) */
00922 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00923 {
00924   fp_int tmp;
00925   fp_zero(&tmp);
00926   fp_mul(a, b, &tmp);
00927   return fp_mod(&tmp, c, d);
00928 }
00929 
00930 #ifdef TFM_TIMING_RESISTANT
00931 
00932 /* timing resistant montgomery ladder based exptmod 
00933 
00934    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
00935 */
00936 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
00937 {
00938   fp_int   R[2];
00939   fp_digit buf, mp;
00940   int      err, bitcnt, digidx, y;
00941 
00942   /* now setup montgomery  */
00943   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
00944      return err;
00945   }
00946 
00947   fp_init(&R[0]);   
00948   fp_init(&R[1]);   
00949    
00950   /* now we need R mod m */
00951   fp_montgomery_calc_normalization (&R[0], P);
00952 
00953   /* now set R[0][1] to G * R mod m */
00954   if (fp_cmp_mag(P, G) != FP_GT) {
00955      /* G > P so we reduce it first */
00956      fp_mod(G, P, &R[1]);
00957   } else {
00958      fp_copy(G, &R[1]);
00959   }
00960   fp_mulmod (&R[1], &R[0], P, &R[1]);
00961 
00962   /* for j = t-1 downto 0 do
00963         r_!k = R0*R1; r_k = r_k^2
00964   */
00965   
00966   /* set initial mode and bit cnt */
00967   bitcnt = 1;
00968   buf    = 0;
00969   digidx = X->used - 1;
00970 
00971   for (;;) {
00972     /* grab next digit as required */
00973     if (--bitcnt == 0) {
00974       /* if digidx == -1 we are out of digits so break */
00975       if (digidx == -1) {
00976         break;
00977       }
00978       /* read next digit and reset bitcnt */
00979       buf    = X->dp[digidx--];
00980       bitcnt = (int)DIGIT_BIT;
00981     }
00982 
00983     /* grab the next msb from the exponent */
00984     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
00985     buf <<= (fp_digit)1;
00986 
00987     /* do ops */
00988     fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
00989     fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
00990   }
00991 
00992    fp_montgomery_reduce(&R[0], P, mp);
00993    fp_copy(&R[0], Y);
00994    return FP_OKAY;
00995 }   
00996 
00997 #else
00998 
00999 /* y = g**x (mod b) 
01000  * Some restrictions... x must be positive and < b
01001  */
01002 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01003 {
01004   fp_int   M[64], res;
01005   fp_digit buf, mp;
01006   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01007 
01008   /* find window size */
01009   x = fp_count_bits (X);
01010   if (x <= 21) {
01011     winsize = 1;
01012   } else if (x <= 36) {
01013     winsize = 3;
01014   } else if (x <= 140) {
01015     winsize = 4;
01016   } else if (x <= 450) {
01017     winsize = 5;
01018   } else {
01019     winsize = 6;
01020   } 
01021 
01022   /* init M array */
01023   XMEMSET(M, 0, sizeof(M)); 
01024 
01025   /* now setup montgomery  */
01026   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01027      return err;
01028   }
01029 
01030   /* setup result */
01031   fp_init(&res);
01032 
01033   /* create M table
01034    *
01035    * The M table contains powers of the input base, e.g. M[x] = G^x mod P
01036    *
01037    * The first half of the table is not computed though accept for M[0] and M[1]
01038    */
01039 
01040    /* now we need R mod m */
01041    fp_montgomery_calc_normalization (&res, P);
01042 
01043    /* now set M[1] to G * R mod m */
01044    if (fp_cmp_mag(P, G) != FP_GT) {
01045       /* G > P so we reduce it first */
01046       fp_mod(G, P, &M[1]);
01047    } else {
01048       fp_copy(G, &M[1]);
01049    }
01050    fp_mulmod (&M[1], &res, P, &M[1]);
01051 
01052   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
01053   fp_copy (&M[1], &M[1 << (winsize - 1)]);
01054   for (x = 0; x < (winsize - 1); x++) {
01055     fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
01056     fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
01057   }
01058 
01059   /* create upper table */
01060   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01061     fp_mul(&M[x - 1], &M[1], &M[x]);
01062     fp_montgomery_reduce(&M[x], P, mp);
01063   }
01064 
01065   /* set initial mode and bit cnt */
01066   mode   = 0;
01067   bitcnt = 1;
01068   buf    = 0;
01069   digidx = X->used - 1;
01070   bitcpy = 0;
01071   bitbuf = 0;
01072 
01073   for (;;) {
01074     /* grab next digit as required */
01075     if (--bitcnt == 0) {
01076       /* if digidx == -1 we are out of digits so break */
01077       if (digidx == -1) {
01078         break;
01079       }
01080       /* read next digit and reset bitcnt */
01081       buf    = X->dp[digidx--];
01082       bitcnt = (int)DIGIT_BIT;
01083     }
01084 
01085     /* grab the next msb from the exponent */
01086     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01087     buf <<= (fp_digit)1;
01088 
01089     /* if the bit is zero and mode == 0 then we ignore it
01090      * These represent the leading zero bits before the first 1 bit
01091      * in the exponent.  Technically this opt is not required but it
01092      * does lower the # of trivial squaring/reductions used
01093      */
01094     if (mode == 0 && y == 0) {
01095       continue;
01096     }
01097 
01098     /* if the bit is zero and mode == 1 then we square */
01099     if (mode == 1 && y == 0) {
01100       fp_sqr(&res, &res);
01101       fp_montgomery_reduce(&res, P, mp);
01102       continue;
01103     }
01104 
01105     /* else we add it to the window */
01106     bitbuf |= (y << (winsize - ++bitcpy));
01107     mode    = 2;
01108 
01109     if (bitcpy == winsize) {
01110       /* ok window is filled so square as required and multiply  */
01111       /* square first */
01112       for (x = 0; x < winsize; x++) {
01113         fp_sqr(&res, &res);
01114         fp_montgomery_reduce(&res, P, mp);
01115       }
01116 
01117       /* then multiply */
01118       fp_mul(&res, &M[bitbuf], &res);
01119       fp_montgomery_reduce(&res, P, mp);
01120 
01121       /* empty window and reset */
01122       bitcpy = 0;
01123       bitbuf = 0;
01124       mode   = 1;
01125     }
01126   }
01127 
01128   /* if bits remain then square/multiply */
01129   if (mode == 2 && bitcpy > 0) {
01130     /* square then multiply if the bit is set */
01131     for (x = 0; x < bitcpy; x++) {
01132       fp_sqr(&res, &res);
01133       fp_montgomery_reduce(&res, P, mp);
01134 
01135       /* get next bit of the window */
01136       bitbuf <<= 1;
01137       if ((bitbuf & (1 << winsize)) != 0) {
01138         /* then multiply */
01139         fp_mul(&res, &M[1], &res);
01140         fp_montgomery_reduce(&res, P, mp);
01141       }
01142     }
01143   }
01144 
01145   /* fixup result if Montgomery reduction is used
01146    * recall that any value in a Montgomery system is
01147    * actually multiplied by R mod n.  So we have
01148    * to reduce one more time to cancel out the factor
01149    * of R.
01150    */
01151   fp_montgomery_reduce(&res, P, mp);
01152 
01153   /* swap res with Y */
01154   fp_copy (&res, Y);
01155   return FP_OKAY;
01156 }
01157 
01158 #endif
01159 
01160 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01161 {
01162    /* prevent overflows */
01163    if (P->used > (FP_SIZE/2)) {
01164       return FP_VAL;
01165    }
01166 
01167    if (X->sign == FP_NEG) {
01168 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
01169       int    err;
01170       fp_int tmp;
01171 
01172       /* yes, copy G and invmod it */
01173       fp_copy(G, &tmp);
01174       if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
01175          return err;
01176       }
01177       X->sign = FP_ZPOS;
01178       err =  _fp_exptmod(&tmp, X, P, Y);
01179       if (X != Y) {
01180          X->sign = FP_NEG;
01181       }
01182       return err;
01183 #else
01184       return FP_VAL;
01185 #endif 
01186    }
01187    else {
01188       /* Positive exponent so just exptmod */
01189       return _fp_exptmod(G, X, P, Y);
01190    }
01191 }
01192 
01193 /* computes a = 2**b */
01194 void fp_2expt(fp_int *a, int b)
01195 {
01196    int     z;
01197 
01198    /* zero a as per default */
01199    fp_zero (a);
01200 
01201    if (b < 0) { 
01202       return;
01203    }
01204 
01205    z = b / DIGIT_BIT;
01206    if (z >= FP_SIZE) {
01207       return; 
01208    }
01209 
01210   /* set the used count of where the bit will go */
01211   a->used = z + 1;
01212 
01213   /* put the single bit in its place */
01214   a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
01215 }
01216 
01217 /* b = a*a  */
01218 void fp_sqr(fp_int *A, fp_int *B)
01219 {
01220     int y = A->used;
01221 
01222     /* call generic if we're out of range */
01223     if (y + y > FP_SIZE) {
01224        fp_sqr_comba(A, B);
01225        return ;
01226     }
01227 
01228 #if defined(TFM_SQR3)
01229         if (y <= 3) {
01230            fp_sqr_comba3(A,B);
01231            return;
01232         }
01233 #endif
01234 #if defined(TFM_SQR4)
01235         if (y == 4) {
01236            fp_sqr_comba4(A,B);
01237            return;
01238         }
01239 #endif
01240 #if defined(TFM_SQR6)
01241         if (y <= 6) {
01242            fp_sqr_comba6(A,B);
01243            return;
01244         }
01245 #endif
01246 #if defined(TFM_SQR7)
01247         if (y == 7) {
01248            fp_sqr_comba7(A,B);
01249            return;
01250         }
01251 #endif
01252 #if defined(TFM_SQR8)
01253         if (y == 8) {
01254            fp_sqr_comba8(A,B);
01255            return;
01256         }
01257 #endif
01258 #if defined(TFM_SQR9)
01259         if (y == 9) {
01260            fp_sqr_comba9(A,B);
01261            return;
01262         }
01263 #endif
01264 #if defined(TFM_SQR12)
01265         if (y <= 12) {
01266            fp_sqr_comba12(A,B);
01267            return;
01268         }
01269 #endif
01270 #if defined(TFM_SQR17)
01271         if (y <= 17) {
01272            fp_sqr_comba17(A,B);
01273            return;
01274         }
01275 #endif
01276 #if defined(TFM_SMALL_SET)
01277         if (y <= 16) {
01278            fp_sqr_comba_small(A,B);
01279            return;
01280         }
01281 #endif
01282 #if defined(TFM_SQR20)
01283         if (y <= 20) {
01284            fp_sqr_comba20(A,B);
01285            return;
01286         }
01287 #endif
01288 #if defined(TFM_SQR24)
01289         if (y <= 24) {
01290            fp_sqr_comba24(A,B);
01291            return;
01292         }
01293 #endif
01294 #if defined(TFM_SQR28)
01295         if (y <= 28) {
01296            fp_sqr_comba28(A,B);
01297            return;
01298         }
01299 #endif
01300 #if defined(TFM_SQR32)
01301         if (y <= 32) {
01302            fp_sqr_comba32(A,B);
01303            return;
01304         }
01305 #endif
01306 #if defined(TFM_SQR48)
01307         if (y <= 48) {
01308            fp_sqr_comba48(A,B);
01309            return;
01310         }
01311 #endif
01312 #if defined(TFM_SQR64)
01313         if (y <= 64) {
01314            fp_sqr_comba64(A,B);
01315            return;
01316         }
01317 #endif
01318        fp_sqr_comba(A, B);
01319 }
01320 
01321 /* generic comba squarer */
01322 void fp_sqr_comba(fp_int *A, fp_int *B)
01323 {
01324   int       pa, ix, iz;
01325   fp_digit  c0, c1, c2;
01326   fp_int    tmp, *dst;
01327 #ifdef TFM_ISO
01328   fp_word   tt;
01329 #endif    
01330 
01331   /* get size of output and trim */
01332   pa = A->used + A->used;
01333   if (pa >= FP_SIZE) {
01334      pa = FP_SIZE-1;
01335   }
01336 
01337   /* number of output digits to produce */
01338   COMBA_START;
01339   COMBA_CLEAR;
01340 
01341   if (A == B) {
01342      fp_zero(&tmp);
01343      dst = &tmp;
01344   } else {
01345      fp_zero(B);
01346      dst = B;
01347   }
01348 
01349   for (ix = 0; ix < pa; ix++) { 
01350       int      tx, ty, iy;
01351       fp_digit *tmpy, *tmpx;
01352 
01353       /* get offsets into the two bignums */
01354       ty = MIN(A->used-1, ix);
01355       tx = ix - ty;
01356 
01357       /* setup temp aliases */
01358       tmpx = A->dp + tx;
01359       tmpy = A->dp + ty;
01360 
01361       /* this is the number of times the loop will iterrate,
01362          while (tx++ < a->used && ty-- >= 0) { ... }
01363        */
01364       iy = MIN(A->used-tx, ty+1);
01365 
01366       /* now for squaring tx can never equal ty 
01367        * we halve the distance since they approach 
01368        * at a rate of 2x and we have to round because 
01369        * odd cases need to be executed
01370        */
01371       iy = MIN(iy, (ty-tx+1)>>1);
01372 
01373       /* forward carries */
01374       COMBA_FORWARD;
01375 
01376       /* execute loop */
01377       for (iz = 0; iz < iy; iz++) {
01378           SQRADD2(*tmpx++, *tmpy--);
01379       }
01380 
01381       /* even columns have the square term in them */
01382       if ((ix&1) == 0) {
01383           /* TAO change COMBA_ADD back to SQRADD */
01384           SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
01385       }
01386 
01387       /* store it */
01388       COMBA_STORE(dst->dp[ix]);
01389   }
01390 
01391   COMBA_FINI;
01392 
01393   /* setup dest */
01394   dst->used = pa;
01395   fp_clamp (dst);
01396   if (dst != B) {
01397      fp_copy(dst, B);
01398   }
01399 }
01400 
01401 int fp_cmp(fp_int *a, fp_int *b)
01402 {
01403    if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
01404       return FP_LT;
01405    } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
01406       return FP_GT;
01407    } else {
01408       /* compare digits */
01409       if (a->sign == FP_NEG) {
01410          /* if negative compare opposite direction */
01411          return fp_cmp_mag(b, a);
01412       } else {
01413          return fp_cmp_mag(a, b);
01414       }
01415    }
01416 }
01417 
01418 /* compare against a single digit */
01419 int fp_cmp_d(fp_int *a, fp_digit b)
01420 {
01421   /* compare based on sign */
01422   if ((b && a->used == 0) || a->sign == FP_NEG) {
01423     return FP_LT;
01424   }
01425 
01426   /* compare based on magnitude */
01427   if (a->used > 1) {
01428     return FP_GT;
01429   }
01430 
01431   /* compare the only digit of a to b */
01432   if (a->dp[0] > b) {
01433     return FP_GT;
01434   } else if (a->dp[0] < b) {
01435     return FP_LT;
01436   } else {
01437     return FP_EQ;
01438   }
01439 
01440 }
01441 
01442 int fp_cmp_mag(fp_int *a, fp_int *b)
01443 {
01444    int x;
01445 
01446    if (a->used > b->used) {
01447       return FP_GT;
01448    } else if (a->used < b->used) {
01449       return FP_LT;
01450    } else {
01451       for (x = a->used - 1; x >= 0; x--) {
01452           if (a->dp[x] > b->dp[x]) {
01453              return FP_GT;
01454           } else if (a->dp[x] < b->dp[x]) {
01455              return FP_LT;
01456           }
01457       }
01458    }
01459    return FP_EQ;
01460 }
01461 
01462 /* setups the montgomery reduction */
01463 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
01464 {
01465   fp_digit x, b;
01466 
01467 /* fast inversion mod 2**k
01468  *
01469  * Based on the fact that
01470  *
01471  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01472  *                    =>  2*X*A - X*X*A*A = 1
01473  *                    =>  2*(1) - (1)     = 1
01474  */
01475   b = a->dp[0];
01476 
01477   if ((b & 1) == 0) {
01478     return FP_VAL;
01479   }
01480 
01481   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01482   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01483   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01484   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01485 #ifdef FP_64BIT
01486   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
01487 #endif
01488 
01489   /* rho = -1/m mod b */
01490   *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
01491 
01492   return FP_OKAY;
01493 }
01494 
01495 /* computes a = B**n mod b without division or multiplication useful for
01496  * normalizing numbers in a Montgomery system.
01497  */
01498 void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
01499 {
01500   int     x, bits;
01501 
01502   /* how many bits of last digit does b use */
01503   bits = fp_count_bits (b) % DIGIT_BIT;
01504   if (!bits) bits = DIGIT_BIT;
01505 
01506   /* compute A = B^(n-1) * 2^(bits-1) */
01507   if (b->used > 1) {
01508      fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
01509   } else {
01510      fp_set(a, 1);
01511      bits = 1;
01512   }
01513 
01514   /* now compute C = A * B mod b */
01515   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
01516     fp_mul_2 (a, a);
01517     if (fp_cmp_mag (a, b) != FP_LT) {
01518       s_fp_sub (a, b, a);
01519     }
01520   }
01521 }
01522 
01523 
01524 #ifdef TFM_SMALL_MONT_SET
01525     #include "fp_mont_small.i"
01526 #endif
01527 
01528 /* computes x/R == x (mod N) via Montgomery Reduction */
01529 void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
01530 {
01531    fp_digit c[FP_SIZE], *_c, *tmpm, mu = 0;
01532    int      oldused, x, y, pa;
01533 
01534    /* bail if too large */
01535    if (m->used > (FP_SIZE/2)) {
01536       (void)mu;                     /* shut up compiler */
01537       return;
01538    }
01539 
01540 #ifdef TFM_SMALL_MONT_SET
01541    if (m->used <= 16) {
01542       fp_montgomery_reduce_small(a, m, mp);
01543       return;
01544    }
01545 #endif
01546 
01547 
01548    /* now zero the buff */
01549    XMEMSET(c, 0, sizeof c);
01550    pa = m->used;
01551 
01552    /* copy the input */
01553    oldused = a->used;
01554    for (x = 0; x < oldused; x++) {
01555        c[x] = a->dp[x];
01556    }
01557    MONT_START;
01558 
01559    for (x = 0; x < pa; x++) {
01560        fp_digit cy = 0;
01561        /* get Mu for this round */
01562        LOOP_START;
01563        _c   = c + x;
01564        tmpm = m->dp;
01565        y = 0;
01566        #if (defined(TFM_SSE2) || defined(TFM_X86_64))
01567         for (; y < (pa & ~7); y += 8) {
01568               INNERMUL8;
01569               _c   += 8;
01570               tmpm += 8;
01571            }
01572        #endif
01573 
01574        for (; y < pa; y++) {
01575           INNERMUL;
01576           ++_c;
01577        }
01578        LOOP_END;
01579        while (cy) {
01580            PROPCARRY;
01581            ++_c;
01582        }
01583   }         
01584 
01585   /* now copy out */
01586   _c   = c + pa;
01587   tmpm = a->dp;
01588   for (x = 0; x < pa+1; x++) {
01589      *tmpm++ = *_c++;
01590   }
01591 
01592   for (; x < oldused; x++)   {
01593      *tmpm++ = 0;
01594   }
01595 
01596   MONT_FINI;
01597 
01598   a->used = pa+1;
01599   fp_clamp(a);
01600   
01601   /* if A >= m then A = A - m */
01602   if (fp_cmp_mag (a, m) != FP_LT) {
01603     s_fp_sub (a, m, a);
01604   }
01605 }
01606 
01607 void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
01608 {
01609   /* zero the int */
01610   fp_zero (a);
01611 
01612   /* If we know the endianness of this architecture, and we're using
01613      32-bit fp_digits, we can optimize this */
01614 #if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && !defined(FP_64BIT)
01615   /* But not for both simultaneously */
01616 #if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
01617 #error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
01618 #endif
01619   {
01620      unsigned char *pd = (unsigned char *)a->dp;
01621 
01622      if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
01623         int excess = c - (FP_SIZE * sizeof(fp_digit));
01624         c -= excess;
01625         b += excess;
01626      }
01627      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
01628      /* read the bytes in */
01629 #ifdef BIG_ENDIAN_ORDER
01630      {
01631        /* Use Duff's device to unroll the loop. */
01632        int idx = (c - 1) & ~3;
01633        switch (c % 4) {
01634        case 0:  do { pd[idx+0] = *b++;
01635        case 3:       pd[idx+1] = *b++;
01636        case 2:       pd[idx+2] = *b++;
01637        case 1:       pd[idx+3] = *b++;
01638                      idx -= 4;
01639                 } while ((c -= 4) > 0);
01640        }
01641      }
01642 #else
01643      for (c -= 1; c >= 0; c -= 1) {
01644        pd[c] = *b++;
01645      }
01646 #endif
01647   }
01648 #else
01649   /* read the bytes in */
01650   for (; c > 0; c--) {
01651      fp_mul_2d (a, 8, a);
01652      a->dp[0] |= *b++;
01653      a->used += 1;
01654   }
01655 #endif
01656   fp_clamp (a);
01657 }
01658 
01659 void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
01660 {
01661   int     x;
01662   fp_int  t;
01663 
01664   fp_init_copy(&t, a);
01665 
01666   x = 0;
01667   while (fp_iszero (&t) == FP_NO) {
01668       b[x++] = (unsigned char) (t.dp[0] & 255);
01669       fp_div_2d (&t, 8, &t, NULL);
01670   }
01671   fp_reverse (b, x);
01672 }
01673 
01674 int fp_unsigned_bin_size(fp_int *a)
01675 {
01676   int     size = fp_count_bits (a);
01677   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
01678 }
01679 
01680 void fp_set(fp_int *a, fp_digit b)
01681 {
01682    fp_zero(a);
01683    a->dp[0] = b;
01684    a->used  = a->dp[0] ? 1 : 0;
01685 }
01686 
01687 int fp_count_bits (fp_int * a)
01688 {
01689   int     r;
01690   fp_digit q;
01691 
01692   /* shortcut */
01693   if (a->used == 0) {
01694     return 0;
01695   }
01696 
01697   /* get number of digits and add that */
01698   r = (a->used - 1) * DIGIT_BIT;
01699 
01700   /* take the last digit and count the bits in it */
01701   q = a->dp[a->used - 1];
01702   while (q > ((fp_digit) 0)) {
01703     ++r;
01704     q >>= ((fp_digit) 1);
01705   }
01706   return r;
01707 }
01708 
01709 int fp_leading_bit(fp_int *a)
01710 {
01711     int bit = 0;
01712 
01713     if (a->used != 0) {
01714         fp_digit q = a->dp[a->used - 1];
01715         int qSz = sizeof(fp_digit);
01716 
01717         while (qSz > 0) {
01718             if ((unsigned char)q != 0)
01719                 bit = (q & 0x80) != 0;
01720             q >>= 8;
01721             qSz--;
01722         }
01723     }
01724 
01725     return bit;
01726 }
01727 
01728 void fp_lshd(fp_int *a, int x)
01729 {
01730    int y;
01731 
01732    /* move up and truncate as required */
01733    y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
01734 
01735    /* store new size */
01736    a->used = y + 1;
01737 
01738    /* move digits */
01739    for (; y >= x; y--) {
01740        a->dp[y] = a->dp[y-x];
01741    }
01742  
01743    /* zero lower digits */
01744    for (; y >= 0; y--) {
01745        a->dp[y] = 0;
01746    }
01747 
01748    /* clamp digits */
01749    fp_clamp(a);
01750 }
01751 
01752 
01753 /* right shift by bit count */
01754 void fp_rshb(fp_int *c, int x)
01755 {
01756     register fp_digit *tmpc, mask, shift;
01757     fp_digit r, rr;
01758     fp_digit D = x;
01759 
01760     /* mask */
01761     mask = (((fp_digit)1) << D) - 1;
01762 
01763     /* shift for lsb */
01764     shift = DIGIT_BIT - D;
01765 
01766     /* alias */
01767     tmpc = c->dp + (c->used - 1);
01768 
01769     /* carry */
01770     r = 0;
01771     for (x = c->used - 1; x >= 0; x--) {
01772       /* get the lower  bits of this word in a temp */
01773       rr = *tmpc & mask;
01774 
01775       /* shift the current word and mix in the carry bits from previous word */
01776       *tmpc = (*tmpc >> D) | (r << shift);
01777       --tmpc;
01778 
01779       /* set the carry to the carry bits of the current word found above */
01780       r = rr;
01781     }
01782 }
01783 
01784 
01785 void fp_rshd(fp_int *a, int x)
01786 {
01787   int y;
01788 
01789   /* too many digits just zero and return */
01790   if (x >= a->used) {
01791      fp_zero(a);
01792      return;
01793   }
01794 
01795    /* shift */
01796    for (y = 0; y < a->used - x; y++) {
01797       a->dp[y] = a->dp[y+x];
01798    }
01799 
01800    /* zero rest */
01801    for (; y < a->used; y++) {
01802       a->dp[y] = 0;
01803    }
01804    
01805    /* decrement count */
01806    a->used -= x;
01807    fp_clamp(a);
01808 }
01809 
01810 /* reverse an array, used for radix code */
01811 void fp_reverse (unsigned char *s, int len)
01812 {
01813   int     ix, iy;
01814   unsigned char t;
01815 
01816   ix = 0;
01817   iy = len - 1;
01818   while (ix < iy) {
01819     t     = s[ix];
01820     s[ix] = s[iy];
01821     s[iy] = t;
01822     ++ix;
01823     --iy;
01824   }
01825 }
01826 
01827 
01828 /* c = a - b */
01829 void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
01830 {
01831    fp_int tmp;
01832    fp_set(&tmp, b);
01833    fp_sub(a, &tmp, c);
01834 }
01835 
01836 
01837 /* CyaSSL callers from normal lib */
01838 
01839 /* init a new mp_int */
01840 int mp_init (mp_int * a)
01841 {
01842   if (a)
01843     fp_init(a);
01844   return MP_OKAY;
01845 }
01846 
01847 /* clear one (frees)  */
01848 void mp_clear (mp_int * a)
01849 {
01850   fp_zero(a);
01851 }
01852 
01853 /* handle up to 6 inits */
01854 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, mp_int* f)
01855 {
01856     if (a)
01857         fp_init(a);
01858     if (b)
01859         fp_init(b);
01860     if (c)
01861         fp_init(c);
01862     if (d)
01863         fp_init(d);
01864     if (e)
01865         fp_init(e);
01866     if (f)
01867         fp_init(f);
01868 
01869     return MP_OKAY;
01870 }
01871 
01872 /* high level addition (handles signs) */
01873 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01874 {
01875   fp_add(a, b, c);
01876   return MP_OKAY;
01877 }
01878 
01879 /* high level subtraction (handles signs) */
01880 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
01881 {
01882   fp_sub(a, b, c);
01883   return MP_OKAY;
01884 }
01885 
01886 /* high level multiplication (handles sign) */
01887 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
01888 {
01889   fp_mul(a, b, c);
01890   return MP_OKAY;
01891 }
01892 
01893 /* d = a * b (mod c) */
01894 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01895 {
01896   return fp_mulmod(a, b, c, d);
01897 }
01898 
01899 /* c = a mod b, 0 <= c < b */
01900 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
01901 {
01902   return fp_mod (a, b, c);
01903 }
01904 
01905 /* hac 14.61, pp608 */
01906 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
01907 {
01908   return fp_invmod(a, b, c);
01909 }
01910 
01911 /* this is a shell function that calls either the normal or Montgomery
01912  * exptmod functions.  Originally the call to the montgomery code was
01913  * embedded in the normal function but that wasted alot of stack space
01914  * for nothing (since 99% of the time the Montgomery code would be called)
01915  */
01916 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
01917 {
01918   return fp_exptmod(G, X, P, Y);
01919 }
01920 
01921 /* compare two ints (signed)*/
01922 int mp_cmp (mp_int * a, mp_int * b)
01923 {
01924   return fp_cmp(a, b);
01925 }
01926 
01927 /* compare a digit */
01928 int mp_cmp_d(mp_int * a, mp_digit b)
01929 {
01930   return fp_cmp_d(a, b);
01931 }
01932 
01933 /* get the size for an unsigned equivalent */
01934 int mp_unsigned_bin_size (mp_int * a)
01935 {
01936   return fp_unsigned_bin_size(a);
01937 }
01938 
01939 /* store in unsigned [big endian] format */
01940 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
01941 {
01942   fp_to_unsigned_bin(a,b);
01943   return MP_OKAY;
01944 }
01945 
01946 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
01947 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
01948 {
01949   fp_read_unsigned_bin(a, (unsigned char *)b, c);
01950   return MP_OKAY;
01951 }
01952 
01953 
01954 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
01955 {
01956     fp_sub_d(a, b, c);
01957     return MP_OKAY;
01958 }
01959 
01960 
01961 /* fast math conversion */
01962 int mp_copy(fp_int* a, fp_int* b)
01963 {
01964     fp_copy(a, b);
01965     return MP_OKAY;
01966 }
01967 
01968 
01969 /* fast math conversion */
01970 int mp_isodd(mp_int* a)
01971 {
01972     return fp_isodd(a);
01973 }
01974 
01975 
01976 /* fast math conversion */
01977 int mp_iszero(mp_int* a)
01978 {
01979     return fp_iszero(a);
01980 }
01981 
01982 
01983 /* fast math conversion */
01984 int mp_count_bits (mp_int* a)
01985 {
01986     return fp_count_bits(a);
01987 }
01988 
01989 
01990 int mp_leading_bit (mp_int* a)
01991 {
01992     return fp_leading_bit(a);
01993 }
01994 
01995 
01996 /* fast math conversion */
01997 void mp_rshb (mp_int* a, int x)
01998 {
01999     fp_rshb(a, x);
02000 }
02001 
02002 
02003 /* fast math wrappers */
02004 int mp_set_int(fp_int *a, fp_digit b)
02005 {
02006     fp_set(a, b);
02007     return MP_OKAY;
02008 }
02009 
02010 
02011 #if defined(CYASSL_KEY_GEN) || defined (HAVE_ECC)
02012 
02013 /* c = a * a (mod b) */
02014 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
02015 {
02016   fp_int tmp;
02017   fp_zero(&tmp);
02018   fp_sqr(a, &tmp);
02019   return fp_mod(&tmp, b, c);
02020 }
02021 
02022 /* fast math conversion */
02023 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
02024 {
02025     return fp_sqrmod(a, b, c);
02026 }
02027 
02028 /* fast math conversion */
02029 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
02030 {
02031     fp_montgomery_calc_normalization(a, b);
02032     return MP_OKAY;
02033 }
02034 
02035 #endif /* CYASSL_KEYGEN || HAVE_ECC */
02036 
02037 
02038 #ifdef CYASSL_KEY_GEN
02039 
02040 void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
02041 void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
02042 int  fp_isprime(fp_int *a);
02043 int  fp_cnt_lsb(fp_int *a);
02044 
02045 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
02046 {
02047     fp_gcd(a, b, c);
02048     return MP_OKAY;
02049 }
02050 
02051 
02052 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
02053 {
02054     fp_lcm(a, b, c);
02055     return MP_OKAY;
02056 }
02057 
02058 
02059 int mp_prime_is_prime(mp_int* a, int t, int* result)
02060 {
02061     (void)t;
02062     *result = fp_isprime(a);
02063     return MP_OKAY;
02064 }
02065 
02066 
02067 
02068 static int s_is_power_of_two(fp_digit b, int *p)
02069 {
02070    int x;
02071 
02072    /* fast return if no power of two */
02073    if ((b==0) || (b & (b-1))) {
02074       return 0;
02075    }
02076 
02077    for (x = 0; x < DIGIT_BIT; x++) {
02078       if (b == (((fp_digit)1)<<x)) {
02079          *p = x;
02080          return 1;
02081       }
02082    }
02083    return 0;
02084 }
02085 
02086 /* a/b => cb + d == a */
02087 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
02088 {
02089   fp_int   q;
02090   fp_word  w;
02091   fp_digit t;
02092   int      ix;
02093 
02094   /* cannot divide by zero */
02095   if (b == 0) {
02096      return FP_VAL;
02097   }
02098 
02099   /* quick outs */
02100   if (b == 1 || fp_iszero(a) == 1) {
02101      if (d != NULL) {
02102         *d = 0;
02103      }
02104      if (c != NULL) {
02105         fp_copy(a, c);
02106      }
02107      return FP_OKAY;
02108   }
02109 
02110   /* power of two ? */
02111   if (s_is_power_of_two(b, &ix) == 1) {
02112      if (d != NULL) {
02113         *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
02114      }
02115      if (c != NULL) {
02116         fp_div_2d(a, ix, c, NULL);
02117      }
02118      return FP_OKAY;
02119   }
02120 
02121   /* no easy answer [c'est la vie].  Just division */
02122   fp_init(&q);
02123   
02124   q.used = a->used;
02125   q.sign = a->sign;
02126   w = 0;
02127   for (ix = a->used - 1; ix >= 0; ix--) {
02128      w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
02129      
02130      if (w >= b) {
02131         t = (fp_digit)(w / b);
02132         w -= ((fp_word)t) * ((fp_word)b);
02133       } else {
02134         t = 0;
02135       }
02136       q.dp[ix] = (fp_digit)t;
02137   }
02138   
02139   if (d != NULL) {
02140      *d = (fp_digit)w;
02141   }
02142   
02143   if (c != NULL) {
02144      fp_clamp(&q);
02145      fp_copy(&q, c);
02146   }
02147  
02148   return FP_OKAY;
02149 }
02150 
02151 
02152 /* c = a mod b, 0 <= c < b  */
02153 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02154 {
02155    return fp_div_d(a, b, NULL, c);
02156 }
02157 
02158 
02159 /* Miller-Rabin test of "a" to the base of "b" as described in 
02160  * HAC pp. 139 Algorithm 4.24
02161  *
02162  * Sets result to 0 if definitely composite or 1 if probably prime.
02163  * Randomly the chance of error is no more than 1/4 and often 
02164  * very much lower.
02165  */
02166 static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
02167 {
02168   fp_int  n1, y, r;
02169   int     s, j;
02170 
02171   /* default */
02172   *result = FP_NO;
02173 
02174   /* ensure b > 1 */
02175   if (fp_cmp_d(b, 1) != FP_GT) {
02176      return;
02177   }     
02178 
02179   /* get n1 = a - 1 */
02180   fp_init_copy(&n1, a);
02181   fp_sub_d(&n1, 1, &n1);
02182 
02183   /* set 2**s * r = n1 */
02184   fp_init_copy(&r, &n1);
02185 
02186   /* count the number of least significant bits
02187    * which are zero
02188    */
02189   s = fp_cnt_lsb(&r);
02190 
02191   /* now divide n - 1 by 2**s */
02192   fp_div_2d (&r, s, &r, NULL);
02193 
02194   /* compute y = b**r mod a */
02195   fp_init(&y);
02196   fp_exptmod(b, &r, a, &y);
02197 
02198   /* if y != 1 and y != n1 do */
02199   if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
02200     j = 1;
02201     /* while j <= s-1 and y != n1 */
02202     while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
02203       fp_sqrmod (&y, a, &y);
02204 
02205       /* if y == 1 then composite */
02206       if (fp_cmp_d (&y, 1) == FP_EQ) {
02207          return;
02208       }
02209       ++j;
02210     }
02211 
02212     /* if y != n1 then composite */
02213     if (fp_cmp (&y, &n1) != FP_EQ) {
02214        return;
02215     }
02216   }
02217 
02218   /* probably prime now */
02219   *result = FP_YES;
02220 }
02221 
02222 
02223 /* a few primes */
02224 static const fp_digit primes[256] = {
02225   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
02226   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
02227   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
02228   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
02229   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
02230   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
02231   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
02232   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
02233 
02234   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
02235   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
02236   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
02237   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
02238   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
02239   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
02240   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
02241   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
02242 
02243   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
02244   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
02245   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
02246   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
02247   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
02248   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
02249   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
02250   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
02251 
02252   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
02253   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
02254   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
02255   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
02256   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
02257   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
02258   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
02259   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
02260 };
02261 
02262 int fp_isprime(fp_int *a)
02263 {
02264    fp_int   b;
02265    fp_digit d = 0;
02266    int      r, res;
02267 
02268    /* do trial division */
02269    for (r = 0; r < 256; r++) {
02270        fp_mod_d(a, primes[r], &d);
02271        if (d == 0) {
02272           return FP_NO;
02273        }
02274    }
02275 
02276    /* now do 8 miller rabins */
02277    fp_init(&b);
02278    for (r = 0; r < 8; r++) {
02279        fp_set(&b, primes[r]);
02280        fp_prime_miller_rabin(a, &b, &res);
02281        if (res == FP_NO) {
02282           return FP_NO;
02283        }
02284    }
02285    return FP_YES;
02286 }
02287 
02288 
02289 /* c = [a, b] */
02290 void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
02291 {
02292    fp_int  t1, t2;
02293 
02294    fp_init(&t1);
02295    fp_init(&t2);
02296    fp_gcd(a, b, &t1);
02297    if (fp_cmp_mag(a, b) == FP_GT) {
02298       fp_div(a, &t1, &t2, NULL);
02299       fp_mul(b, &t2, c);
02300    } else {
02301       fp_div(b, &t1, &t2, NULL);
02302       fp_mul(a, &t2, c);
02303    }   
02304 }
02305 
02306 
02307 static const int lnz[16] = {
02308    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
02309 };
02310 
02311 /* Counts the number of lsbs which are zero before the first zero bit */
02312 int fp_cnt_lsb(fp_int *a)
02313 {
02314    int x;
02315    fp_digit q, qq;
02316 
02317    /* easy out */
02318    if (fp_iszero(a) == 1) {
02319       return 0;
02320    }
02321 
02322    /* scan lower digits until non-zero */
02323    for (x = 0; x < a->used && a->dp[x] == 0; x++);
02324    q = a->dp[x];
02325    x *= DIGIT_BIT;
02326 
02327    /* now scan this digit until a 1 is found */
02328    if ((q & 1) == 0) {
02329       do {
02330          qq  = q & 15;
02331          x  += lnz[qq];
02332          q >>= 4;
02333       } while (qq == 0);
02334    }
02335    return x;
02336 }
02337 
02338 
02339 /* c = (a, b) */
02340 void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
02341 {
02342    fp_int u, v, r;
02343 
02344    /* either zero than gcd is the largest */
02345    if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
02346      fp_abs (b, c);
02347      return;
02348    }
02349    if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
02350      fp_abs (a, c);
02351      return;
02352    }
02353 
02354    /* optimized.  At this point if a == 0 then
02355     * b must equal zero too
02356     */
02357    if (fp_iszero (a) == 1) {
02358      fp_zero(c);
02359      return;
02360    }
02361 
02362    /* sort inputs */
02363    if (fp_cmp_mag(a, b) != FP_LT) {
02364       fp_init_copy(&u, a);
02365       fp_init_copy(&v, b);
02366    } else {
02367       fp_init_copy(&u, b);
02368       fp_init_copy(&v, a);
02369    }
02370  
02371    fp_zero(&r);
02372    while (fp_iszero(&v) == FP_NO) {
02373       fp_mod(&u, &v, &r);
02374       fp_copy(&v, &u);
02375       fp_copy(&r, &v);
02376    }
02377    fp_copy(&u, c);
02378 }
02379 
02380 #endif /* CYASSL_KEY_GEN */
02381 
02382 
02383 #if defined(HAVE_ECC) || !defined(NO_PWDBASED)
02384 /* c = a + b */
02385 void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
02386 {
02387    fp_int tmp;
02388    fp_set(&tmp, b);
02389    fp_add(a,&tmp,c);
02390 }
02391 
02392 /* external compatibility */
02393 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
02394 {
02395     fp_add_d(a, b, c);
02396     return MP_OKAY;
02397 }
02398 
02399 #endif  /* HAVE_ECC || !NO_PWDBASED */
02400 
02401 
02402 #ifdef HAVE_ECC
02403 
02404 /* chars used in radix conversions */
02405 static const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
02406 
02407 static int fp_read_radix(fp_int *a, const char *str, int radix)
02408 {
02409   int     y, neg;
02410   char    ch;
02411 
02412   /* make sure the radix is ok */
02413   if (radix < 2 || radix > 64) {
02414     return FP_VAL;
02415   }
02416 
02417   /* if the leading digit is a
02418    * minus set the sign to negative.
02419    */
02420   if (*str == '-') {
02421     ++str;
02422     neg = FP_NEG;
02423   } else {
02424     neg = FP_ZPOS;
02425   }
02426 
02427   /* set the integer to the default of zero */
02428   fp_zero (a);
02429 
02430   /* process each digit of the string */
02431   while (*str) {
02432     /* if the radix < 36 the conversion is case insensitive
02433      * this allows numbers like 1AB and 1ab to represent the same  value
02434      * [e.g. in hex]
02435      */
02436     ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
02437     for (y = 0; y < 64; y++) {
02438       if (ch == fp_s_rmap[y]) {
02439          break;
02440       }
02441     }
02442 
02443     /* if the char was found in the map
02444      * and is less than the given radix add it
02445      * to the number, otherwise exit the loop.
02446      */
02447     if (y < radix) {
02448       fp_mul_d (a, (fp_digit) radix, a);
02449       fp_add_d (a, (fp_digit) y, a);
02450     } else {
02451       break;
02452     }
02453     ++str;
02454   }
02455 
02456   /* set the sign only if a != 0 */
02457   if (fp_iszero(a) != FP_YES) {
02458      a->sign = neg;
02459   }
02460   return FP_OKAY;
02461 }
02462 
02463 /* fast math conversion */
02464 int mp_read_radix(mp_int *a, const char *str, int radix)
02465 {
02466     return fp_read_radix(a, str, radix);
02467 }
02468 
02469 /* fast math conversion */
02470 int mp_set(fp_int *a, fp_digit b)
02471 {
02472     fp_set(a,b);
02473     return MP_OKAY;
02474 }
02475 
02476 /* fast math conversion */
02477 int mp_sqr(fp_int *A, fp_int *B)
02478 {
02479     fp_sqr(A, B);
02480     return MP_OKAY;
02481 }
02482   
02483 /* fast math conversion */
02484 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
02485 {
02486     fp_montgomery_reduce(a, m, mp);
02487     return MP_OKAY;
02488 }
02489 
02490 
02491 /* fast math conversion */
02492 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
02493 {
02494     return fp_montgomery_setup(a, rho);
02495 }
02496 
02497 int mp_div_2(fp_int * a, fp_int * b)
02498 {
02499     fp_div_2(a, b);
02500     return MP_OKAY;
02501 }
02502 
02503 
02504 int mp_init_copy(fp_int * a, fp_int * b)
02505 {
02506     fp_init_copy(a, b);
02507     return MP_OKAY;
02508 }
02509 
02510 
02511 
02512 #endif /* HAVE_ECC */
02513 
02514 #endif /* USE_FAST_MATH */