cyassl re-port with cellular comms, PSK test

Dependencies:   VodafoneUSBModem_bleedingedge2 mbed-rtos mbed-src

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers tfm.c Source File

tfm.c

00001 /* tfm.c
00002  *
00003  * Copyright (C) 2006-2012 Sawtooth Consulting Ltd.
00004  *
00005  * This file is part of CyaSSL.
00006  *
00007  * CyaSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * CyaSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
00020  */
00021 
00022 
00023 /*
00024  * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
00025  * http://math.libtomcrypt.com
00026  */
00027 
00028 /**
00029  *  Edited by Moisés Guimarães (moises.guimaraes@phoebus.com.br)
00030  *  to fit CyaSSL's needs.
00031  */
00032 
00033 #ifdef HAVE_CONFIG_H
00034     #include <config.h>
00035 #endif
00036 
00037 /* in case user set USE_FAST_MATH there */
00038 #include <cyassl/ctaocrypt/settings.h>
00039 
00040 #ifdef USE_FAST_MATH
00041 
00042 #include <cyassl/ctaocrypt/tfm.h>
00043 #include <ctaocrypt/src/asm.c>  /* will define asm MACROS or C ones */
00044 
00045 
00046 /* math settings check */
00047 word32 CheckRunTimeSettings(void)
00048 {
00049     return CTC_SETTINGS;
00050 }
00051 
00052 
00053 /* math settings size check */
00054 word32 CheckRunTimeFastMath(void)
00055 {
00056     return FP_SIZE;
00057 }
00058 
00059 
00060 /* Functions */
00061 
00062 void fp_add(fp_int *a, fp_int *b, fp_int *c)
00063 {
00064   int     sa, sb;
00065 
00066   /* get sign of both inputs */
00067   sa = a->sign;
00068   sb = b->sign;
00069 
00070   /* handle two cases, not four */
00071   if (sa == sb) {
00072     /* both positive or both negative */
00073     /* add their magnitudes, copy the sign */
00074     c->sign = sa;
00075     s_fp_add (a, b, c);
00076   } else {
00077     /* one positive, the other negative */
00078     /* subtract the one with the greater magnitude from */
00079     /* the one of the lesser magnitude.  The result gets */
00080     /* the sign of the one with the greater magnitude. */
00081     if (fp_cmp_mag (a, b) == FP_LT) {
00082       c->sign = sb;
00083       s_fp_sub (b, a, c);
00084     } else {
00085       c->sign = sa;
00086       s_fp_sub (a, b, c);
00087     }
00088   }
00089 }
00090 
00091 /* unsigned addition */
00092 void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
00093 {
00094   int      x, y, oldused;
00095   register fp_word  t;
00096 
00097   y       = MAX(a->used, b->used);
00098   oldused = c->used;
00099   c->used = y;
00100  
00101   t = 0;
00102   for (x = 0; x < y; x++) {
00103       t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
00104       c->dp[x]   = (fp_digit)t;
00105       t        >>= DIGIT_BIT;
00106   }
00107   if (t != 0 && x < FP_SIZE) {
00108      c->dp[c->used++] = (fp_digit)t;
00109      ++x;
00110   }
00111 
00112   c->used = x;
00113   for (; x < oldused; x++) {
00114      c->dp[x] = 0;
00115   }
00116   fp_clamp(c);
00117 }
00118 
00119 /* c = a - b */
00120 void fp_sub(fp_int *a, fp_int *b, fp_int *c)
00121 {
00122   int     sa, sb;
00123 
00124   sa = a->sign;
00125   sb = b->sign;
00126 
00127   if (sa != sb) {
00128     /* subtract a negative from a positive, OR */
00129     /* subtract a positive from a negative. */
00130     /* In either case, ADD their magnitudes, */
00131     /* and use the sign of the first number. */
00132     c->sign = sa;
00133     s_fp_add (a, b, c);
00134   } else {
00135     /* subtract a positive from a positive, OR */
00136     /* subtract a negative from a negative. */
00137     /* First, take the difference between their */
00138     /* magnitudes, then... */
00139     if (fp_cmp_mag (a, b) != FP_LT) {
00140       /* Copy the sign from the first */
00141       c->sign = sa;
00142       /* The first has a larger or equal magnitude */
00143       s_fp_sub (a, b, c);
00144     } else {
00145       /* The result has the *opposite* sign from */
00146       /* the first number. */
00147       c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
00148       /* The second has a larger magnitude */
00149       s_fp_sub (b, a, c);
00150     }
00151   }
00152 }
00153 
00154 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
00155 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
00156 {
00157   int      x, oldbused, oldused;
00158   fp_word  t;
00159 
00160   oldused  = c->used;
00161   oldbused = b->used;
00162   c->used  = a->used;
00163   t       = 0;
00164   for (x = 0; x < oldbused; x++) {
00165      t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
00166      c->dp[x]  = (fp_digit)t;
00167      t         = (t >> DIGIT_BIT)&1;
00168   }
00169   for (; x < a->used; x++) {
00170      t         = ((fp_word)a->dp[x]) - t;
00171      c->dp[x]  = (fp_digit)t;
00172      t         = (t >> DIGIT_BIT);
00173    }
00174   for (; x < oldused; x++) {
00175      c->dp[x] = 0;
00176   }
00177   fp_clamp(c);
00178 }
00179 
00180 /* c = a * b */
00181 void fp_mul(fp_int *A, fp_int *B, fp_int *C)
00182 {
00183     int   y, yy;
00184 
00185     y  = MAX(A->used, B->used);
00186     yy = MIN(A->used, B->used);
00187 
00188     /* call generic if we're out of range */
00189     if (y + yy > FP_SIZE) {
00190        fp_mul_comba(A, B, C);
00191        return ;
00192     }
00193 
00194     /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
00195        of the largest input.  We also want to avoid doing excess mults if the 
00196        inputs are not close to the next power of two.  That is, for example,
00197        if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications 
00198     */
00199 
00200 #ifdef TFM_MUL3
00201         if (y <= 3) {
00202            fp_mul_comba3(A,B,C);
00203            return;
00204         }
00205 #endif
00206 #ifdef TFM_MUL4
00207         if (y == 4) {
00208            fp_mul_comba4(A,B,C);
00209            return;
00210         }
00211 #endif
00212 #ifdef TFM_MUL6
00213         if (y <= 6) {
00214            fp_mul_comba6(A,B,C);
00215            return;
00216         }
00217 #endif
00218 #ifdef TFM_MUL7
00219         if (y == 7) {
00220            fp_mul_comba7(A,B,C);
00221            return;
00222         }
00223 #endif
00224 #ifdef TFM_MUL8
00225         if (y == 8) {
00226            fp_mul_comba8(A,B,C);
00227            return;
00228         }
00229 #endif
00230 #ifdef TFM_MUL9
00231         if (y == 9) {
00232            fp_mul_comba9(A,B,C);
00233            return;
00234         }
00235 #endif
00236 #ifdef TFM_MUL12
00237         if (y <= 12) {
00238            fp_mul_comba12(A,B,C);
00239            return;
00240         }
00241 #endif
00242 #ifdef TFM_MUL17
00243         if (y <= 17) {
00244            fp_mul_comba17(A,B,C);
00245            return;
00246         }
00247 #endif
00248 
00249 #ifdef TFM_SMALL_SET
00250         if (y <= 16) {
00251            fp_mul_comba_small(A,B,C);
00252            return;
00253         }
00254 #endif        
00255 #if defined(TFM_MUL20)
00256         if (y <= 20) {
00257            fp_mul_comba20(A,B,C);
00258            return;
00259         }
00260 #endif
00261 #if defined(TFM_MUL24)
00262         if (yy >= 16 && y <= 24) {
00263            fp_mul_comba24(A,B,C);
00264            return;
00265         }
00266 #endif
00267 #if defined(TFM_MUL28)
00268         if (yy >= 20 && y <= 28) {
00269            fp_mul_comba28(A,B,C);
00270            return;
00271         }
00272 #endif
00273 #if defined(TFM_MUL32)
00274         if (yy >= 24 && y <= 32) {
00275            fp_mul_comba32(A,B,C);
00276            return;
00277         }
00278 #endif
00279 #if defined(TFM_MUL48)
00280         if (yy >= 40 && y <= 48) {
00281            fp_mul_comba48(A,B,C);
00282            return;
00283         }
00284 #endif        
00285 #if defined(TFM_MUL64)
00286         if (yy >= 56 && y <= 64) {
00287            fp_mul_comba64(A,B,C);
00288            return;
00289         }
00290 #endif
00291         fp_mul_comba(A,B,C);
00292 }
00293 
00294 void fp_mul_2(fp_int * a, fp_int * b)
00295 {
00296   int     x, oldused;
00297    
00298   oldused = b->used;
00299   b->used = a->used;
00300 
00301   {
00302     register fp_digit r, rr, *tmpa, *tmpb;
00303 
00304     /* alias for source */
00305     tmpa = a->dp;
00306     
00307     /* alias for dest */
00308     tmpb = b->dp;
00309 
00310     /* carry */
00311     r = 0;
00312     for (x = 0; x < a->used; x++) {
00313     
00314       /* get what will be the *next* carry bit from the 
00315        * MSB of the current digit 
00316        */
00317       rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
00318       
00319       /* now shift up this digit, add in the carry [from the previous] */
00320       *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
00321       
00322       /* copy the carry that would be from the source 
00323        * digit into the next iteration 
00324        */
00325       r = rr;
00326     }
00327 
00328     /* new leading digit? */
00329     if (r != 0 && b->used != (FP_SIZE-1)) {
00330       /* add a MSB which is always 1 at this point */
00331       *tmpb = 1;
00332       ++(b->used);
00333     }
00334 
00335     /* now zero any excess digits on the destination 
00336      * that we didn't write to 
00337      */
00338     tmpb = b->dp + b->used;
00339     for (x = b->used; x < oldused; x++) {
00340       *tmpb++ = 0;
00341     }
00342   }
00343   b->sign = a->sign;
00344 }
00345 
00346 /* c = a * b */
00347 void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
00348 {
00349    fp_word  w;
00350    int      x, oldused;
00351 
00352    oldused = c->used;
00353    c->used = a->used;
00354    c->sign = a->sign;
00355    w       = 0;
00356    for (x = 0; x < a->used; x++) {
00357        w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
00358        c->dp[x]  = (fp_digit)w;
00359        w         = w >> DIGIT_BIT;
00360    }
00361    if (w != 0 && (a->used != FP_SIZE)) {
00362       c->dp[c->used++] = (fp_digit) w;
00363       ++x;
00364    }
00365    for (; x < oldused; x++) {
00366       c->dp[x] = 0;
00367    }
00368    fp_clamp(c);
00369 }
00370 
00371 /* c = a * 2**d */
00372 void fp_mul_2d(fp_int *a, int b, fp_int *c)
00373 {
00374    fp_digit carry, carrytmp, shift;
00375    int x;
00376 
00377    /* copy it */
00378    fp_copy(a, c);
00379 
00380    /* handle whole digits */
00381    if (b >= DIGIT_BIT) {
00382       fp_lshd(c, b/DIGIT_BIT);
00383    }
00384    b %= DIGIT_BIT;
00385 
00386    /* shift the digits */
00387    if (b != 0) {
00388       carry = 0;   
00389       shift = DIGIT_BIT - b;
00390       for (x = 0; x < c->used; x++) {
00391           carrytmp = c->dp[x] >> shift;
00392           c->dp[x] = (c->dp[x] << b) + carry;
00393           carry = carrytmp;
00394       }
00395       /* store last carry if room */
00396       if (carry && x < FP_SIZE) {
00397          c->dp[c->used++] = carry;
00398       }
00399    }
00400    fp_clamp(c);
00401 }
00402 
00403 /* generic PxQ multiplier */
00404 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
00405 {
00406    int       ix, iy, iz, tx, ty, pa;
00407    fp_digit  c0, c1, c2, *tmpx, *tmpy;
00408    fp_int    tmp, *dst;
00409 
00410    COMBA_START;
00411    COMBA_CLEAR;
00412    
00413    /* get size of output and trim */
00414    pa = A->used + B->used;
00415    if (pa >= FP_SIZE) {
00416       pa = FP_SIZE-1;
00417    }
00418 
00419    if (A == C || B == C) {
00420       fp_zero(&tmp);
00421       dst = &tmp;
00422    } else {
00423       fp_zero(C);
00424       dst = C;
00425    }
00426 
00427    for (ix = 0; ix < pa; ix++) {
00428       /* get offsets into the two bignums */
00429       ty = MIN(ix, B->used-1);
00430       tx = ix - ty;
00431 
00432       /* setup temp aliases */
00433       tmpx = A->dp + tx;
00434       tmpy = B->dp + ty;
00435 
00436       /* this is the number of times the loop will iterrate, essentially its 
00437          while (tx++ < a->used && ty-- >= 0) { ... }
00438        */
00439       iy = MIN(A->used-tx, ty+1);
00440 
00441       /* execute loop */
00442       COMBA_FORWARD;
00443       for (iz = 0; iz < iy; ++iz) {
00444           /* TAO change COMBA_ADD back to MULADD */
00445           MULADD(*tmpx++, *tmpy--);
00446       }
00447 
00448       /* store term */
00449       COMBA_STORE(dst->dp[ix]);
00450   }
00451   COMBA_FINI;
00452 
00453   dst->used = pa;
00454   dst->sign = A->sign ^ B->sign;
00455   fp_clamp(dst);
00456   fp_copy(dst, C);
00457 }
00458 
00459 /* a/b => cb + d == a */
00460 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00461 {
00462   fp_int  q, x, y, t1, t2;
00463   int     n, t, i, norm, neg;
00464 
00465   /* is divisor zero ? */
00466   if (fp_iszero (b) == 1) {
00467     return FP_VAL;
00468   }
00469 
00470   /* if a < b then q=0, r = a */
00471   if (fp_cmp_mag (a, b) == FP_LT) {
00472     if (d != NULL) {
00473       fp_copy (a, d);
00474     } 
00475     if (c != NULL) {
00476       fp_zero (c);
00477     }
00478     return FP_OKAY;
00479   }
00480 
00481   fp_init(&q);
00482   q.used = a->used + 2;
00483 
00484   fp_init(&t1);
00485   fp_init(&t2);
00486   fp_init_copy(&x, a);
00487   fp_init_copy(&y, b);
00488 
00489   /* fix the sign */
00490   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
00491   x.sign = y.sign = FP_ZPOS;
00492 
00493   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
00494   norm = fp_count_bits(&y) % DIGIT_BIT;
00495   if (norm < (int)(DIGIT_BIT-1)) {
00496      norm = (DIGIT_BIT-1) - norm;
00497      fp_mul_2d (&x, norm, &x);
00498      fp_mul_2d (&y, norm, &y);
00499   } else {
00500      norm = 0;
00501   }
00502 
00503   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
00504   n = x.used - 1;
00505   t = y.used - 1;
00506 
00507   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
00508   fp_lshd (&y, n - t);                                             /* y = y*b**{n-t} */
00509 
00510   while (fp_cmp (&x, &y) != FP_LT) {
00511     ++(q.dp[n - t]);
00512     fp_sub (&x, &y, &x);
00513   }
00514 
00515   /* reset y by shifting it back down */
00516   fp_rshd (&y, n - t);
00517 
00518   /* step 3. for i from n down to (t + 1) */
00519   for (i = n; i >= (t + 1); i--) {
00520     if (i > x.used) {
00521       continue;
00522     }
00523 
00524     /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
00525      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
00526     if (x.dp[i] == y.dp[t]) {
00527       q.dp[i - t - 1] = ((((fp_word)1) << DIGIT_BIT) - 1);
00528     } else {
00529       fp_word tmp;
00530       tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
00531       tmp |= ((fp_word) x.dp[i - 1]);
00532       tmp /= ((fp_word)y.dp[t]);
00533       q.dp[i - t - 1] = (fp_digit) (tmp);
00534     }
00535 
00536     /* while (q{i-t-1} * (yt * b + y{t-1})) > 
00537              xi * b**2 + xi-1 * b + xi-2 
00538      
00539        do q{i-t-1} -= 1; 
00540     */
00541     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
00542     do {
00543       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
00544 
00545       /* find left hand */
00546       fp_zero (&t1);
00547       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
00548       t1.dp[1] = y.dp[t];
00549       t1.used = 2;
00550       fp_mul_d (&t1, q.dp[i - t - 1], &t1);
00551 
00552       /* find right hand */
00553       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
00554       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
00555       t2.dp[2] = x.dp[i];
00556       t2.used = 3;
00557     } while (fp_cmp_mag(&t1, &t2) == FP_GT);
00558 
00559     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
00560     fp_mul_d (&y, q.dp[i - t - 1], &t1);
00561     fp_lshd  (&t1, i - t - 1);
00562     fp_sub   (&x, &t1, &x);
00563 
00564     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
00565     if (x.sign == FP_NEG) {
00566       fp_copy (&y, &t1);
00567       fp_lshd (&t1, i - t - 1);
00568       fp_add (&x, &t1, &x);
00569       q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
00570     }
00571   }
00572 
00573   /* now q is the quotient and x is the remainder 
00574    * [which we have to normalize] 
00575    */
00576   
00577   /* get sign before writing to c */
00578   x.sign = x.used == 0 ? FP_ZPOS : a->sign;
00579 
00580   if (c != NULL) {
00581     fp_clamp (&q);
00582     fp_copy (&q, c);
00583     c->sign = neg;
00584   }
00585 
00586   if (d != NULL) {
00587     fp_div_2d (&x, norm, &x, NULL);
00588 
00589 /* the following is a kludge, essentially we were seeing the right remainder but 
00590    with excess digits that should have been zero
00591  */
00592     for (i = b->used; i < x.used; i++) {
00593         x.dp[i] = 0;
00594     }
00595     fp_clamp(&x);
00596     fp_copy (&x, d);
00597   }
00598 
00599   return FP_OKAY;
00600 }
00601 
00602 /* b = a/2 */
00603 void fp_div_2(fp_int * a, fp_int * b)
00604 {
00605   int     x, oldused;
00606 
00607   oldused = b->used;
00608   b->used = a->used;
00609   {
00610     register fp_digit r, rr, *tmpa, *tmpb;
00611 
00612     /* source alias */
00613     tmpa = a->dp + b->used - 1;
00614 
00615     /* dest alias */
00616     tmpb = b->dp + b->used - 1;
00617 
00618     /* carry */
00619     r = 0;
00620     for (x = b->used - 1; x >= 0; x--) {
00621       /* get the carry for the next iteration */
00622       rr = *tmpa & 1;
00623 
00624       /* shift the current digit, add in carry and store */
00625       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
00626 
00627       /* forward carry to next iteration */
00628       r = rr;
00629     }
00630 
00631     /* zero excess digits */
00632     tmpb = b->dp + b->used;
00633     for (x = b->used; x < oldused; x++) {
00634       *tmpb++ = 0;
00635     }
00636   }
00637   b->sign = a->sign;
00638   fp_clamp (b);
00639 }
00640 
00641 /* c = a / 2**b */
00642 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
00643 {
00644   fp_digit D, r, rr;
00645   int      x;
00646   fp_int   t;
00647 
00648   /* if the shift count is <= 0 then we do no work */
00649   if (b <= 0) {
00650     fp_copy (a, c);
00651     if (d != NULL) {
00652       fp_zero (d);
00653     }
00654     return;
00655   }
00656 
00657   fp_init(&t);
00658 
00659   /* get the remainder */
00660   if (d != NULL) {
00661     fp_mod_2d (a, b, &t);
00662   }
00663 
00664   /* copy */
00665   fp_copy(a, c);
00666 
00667   /* shift by as many digits in the bit count */
00668   if (b >= (int)DIGIT_BIT) {
00669     fp_rshd (c, b / DIGIT_BIT);
00670   }
00671 
00672   /* shift any bit count < DIGIT_BIT */
00673   D = (fp_digit) (b % DIGIT_BIT);
00674   if (D != 0) {
00675     register fp_digit *tmpc, mask, shift;
00676 
00677     /* mask */
00678     mask = (((fp_digit)1) << D) - 1;
00679 
00680     /* shift for lsb */
00681     shift = DIGIT_BIT - D;
00682 
00683     /* alias */
00684     tmpc = c->dp + (c->used - 1);
00685 
00686     /* carry */
00687     r = 0;
00688     for (x = c->used - 1; x >= 0; x--) {
00689       /* get the lower  bits of this word in a temp */
00690       rr = *tmpc & mask;
00691 
00692       /* shift the current word and mix in the carry bits from the previous word */
00693       *tmpc = (*tmpc >> D) | (r << shift);
00694       --tmpc;
00695 
00696       /* set the carry to the carry bits of the current word found above */
00697       r = rr;
00698     }
00699   }
00700   fp_clamp (c);
00701   if (d != NULL) {
00702     fp_copy (&t, d);
00703   }
00704 }
00705 
00706 /* c = a mod b, 0 <= c < b  */
00707 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
00708 {
00709    fp_int t;
00710    int    err;
00711 
00712    fp_zero(&t);
00713    if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
00714       return err;
00715    }
00716    if (t.sign != b->sign) {
00717       fp_add(&t, b, c);
00718    } else {
00719       fp_copy(&t, c);
00720   }
00721   return FP_OKAY;
00722 }
00723 
00724 /* c = a mod 2**d */
00725 void fp_mod_2d(fp_int *a, int b, fp_int *c)
00726 {
00727    int x;
00728 
00729    /* zero if count less than or equal to zero */
00730    if (b <= 0) {
00731       fp_zero(c);
00732       return;
00733    }
00734 
00735    /* get copy of input */
00736    fp_copy(a, c);
00737  
00738    /* if 2**d is larger than we just return */
00739    if (b >= (DIGIT_BIT * a->used)) {
00740       return;
00741    }
00742 
00743   /* zero digits above the last digit of the modulus */
00744   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00745     c->dp[x] = 0;
00746   }
00747   /* clear the digit that is not completely outside/inside the modulus */
00748   c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
00749   fp_clamp (c);
00750 }
00751 
00752 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
00753 {
00754   fp_int  x, y, u, v, A, B, C, D;
00755   int     res;
00756 
00757   /* b cannot be negative */
00758   if (b->sign == FP_NEG || fp_iszero(b) == 1) {
00759     return FP_VAL;
00760   }
00761 
00762   /* init temps */
00763   fp_init(&x);    fp_init(&y);
00764   fp_init(&u);    fp_init(&v);
00765   fp_init(&A);    fp_init(&B);
00766   fp_init(&C);    fp_init(&D);
00767 
00768   /* x = a, y = b */
00769   if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
00770       return res;
00771   }
00772   fp_copy(b, &y);
00773 
00774   /* 2. [modified] if x,y are both even then return an error! */
00775   if (fp_iseven (&x) == 1 && fp_iseven (&y) == 1) {
00776     return FP_VAL;
00777   }
00778 
00779   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00780   fp_copy (&x, &u);
00781   fp_copy (&y, &v);
00782   fp_set (&A, 1);
00783   fp_set (&D, 1);
00784 
00785 top:
00786   /* 4.  while u is even do */
00787   while (fp_iseven (&u) == 1) {
00788     /* 4.1 u = u/2 */
00789     fp_div_2 (&u, &u);
00790 
00791     /* 4.2 if A or B is odd then */
00792     if (fp_isodd (&A) == 1 || fp_isodd (&B) == 1) {
00793       /* A = (A+y)/2, B = (B-x)/2 */
00794       fp_add (&A, &y, &A);
00795       fp_sub (&B, &x, &B);
00796     }
00797     /* A = A/2, B = B/2 */
00798     fp_div_2 (&A, &A);
00799     fp_div_2 (&B, &B);
00800   }
00801 
00802   /* 5.  while v is even do */
00803   while (fp_iseven (&v) == 1) {
00804     /* 5.1 v = v/2 */
00805     fp_div_2 (&v, &v);
00806 
00807     /* 5.2 if C or D is odd then */
00808     if (fp_isodd (&C) == 1 || fp_isodd (&D) == 1) {
00809       /* C = (C+y)/2, D = (D-x)/2 */
00810       fp_add (&C, &y, &C);
00811       fp_sub (&D, &x, &D);
00812     }
00813     /* C = C/2, D = D/2 */
00814     fp_div_2 (&C, &C);
00815     fp_div_2 (&D, &D);
00816   }
00817 
00818   /* 6.  if u >= v then */
00819   if (fp_cmp (&u, &v) != FP_LT) {
00820     /* u = u - v, A = A - C, B = B - D */
00821     fp_sub (&u, &v, &u);
00822     fp_sub (&A, &C, &A);
00823     fp_sub (&B, &D, &B);
00824   } else {
00825     /* v - v - u, C = C - A, D = D - B */
00826     fp_sub (&v, &u, &v);
00827     fp_sub (&C, &A, &C);
00828     fp_sub (&D, &B, &D);
00829   }
00830 
00831   /* if not zero goto step 4 */
00832   if (fp_iszero (&u) == 0)
00833     goto top;
00834 
00835   /* now a = C, b = D, gcd == g*v */
00836 
00837   /* if v != 1 then there is no inverse */
00838   if (fp_cmp_d (&v, 1) != FP_EQ) {
00839     return FP_VAL;
00840   }
00841 
00842   /* if its too low */
00843   while (fp_cmp_d(&C, 0) == FP_LT) {
00844       fp_add(&C, b, &C);
00845   }
00846   
00847   /* too big */
00848   while (fp_cmp_mag(&C, b) != FP_LT) {
00849       fp_sub(&C, b, &C);
00850   }
00851   
00852   /* C is now the inverse */
00853   fp_copy(&C, c);
00854   return FP_OKAY;
00855 }
00856 
00857 /* c = 1/a (mod b) for odd b only */
00858 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
00859 {
00860   fp_int  x, y, u, v, B, D;
00861   int     neg;
00862 
00863   /* 2. [modified] b must be odd   */
00864   if (fp_iseven (b) == FP_YES) {
00865     return fp_invmod_slow(a,b,c);
00866   }
00867 
00868   /* init all our temps */
00869   fp_init(&x);  fp_init(&y);
00870   fp_init(&u);  fp_init(&v);
00871   fp_init(&B);  fp_init(&D);
00872 
00873   /* x == modulus, y == value to invert */
00874   fp_copy(b, &x);
00875 
00876   /* we need y = |a| */
00877   fp_abs(a, &y);
00878 
00879   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00880   fp_copy(&x, &u);
00881   fp_copy(&y, &v);
00882   fp_set (&D, 1);
00883 
00884 top:
00885   /* 4.  while u is even do */
00886   while (fp_iseven (&u) == FP_YES) {
00887     /* 4.1 u = u/2 */
00888     fp_div_2 (&u, &u);
00889 
00890     /* 4.2 if B is odd then */
00891     if (fp_isodd (&B) == FP_YES) {
00892       fp_sub (&B, &x, &B);
00893     }
00894     /* B = B/2 */
00895     fp_div_2 (&B, &B);
00896   }
00897 
00898   /* 5.  while v is even do */
00899   while (fp_iseven (&v) == FP_YES) {
00900     /* 5.1 v = v/2 */
00901     fp_div_2 (&v, &v);
00902 
00903     /* 5.2 if D is odd then */
00904     if (fp_isodd (&D) == FP_YES) {
00905       /* D = (D-x)/2 */
00906       fp_sub (&D, &x, &D);
00907     }
00908     /* D = D/2 */
00909     fp_div_2 (&D, &D);
00910   }
00911 
00912   /* 6.  if u >= v then */
00913   if (fp_cmp (&u, &v) != FP_LT) {
00914     /* u = u - v, B = B - D */
00915     fp_sub (&u, &v, &u);
00916     fp_sub (&B, &D, &B);
00917   } else {
00918     /* v - v - u, D = D - B */
00919     fp_sub (&v, &u, &v);
00920     fp_sub (&D, &B, &D);
00921   }
00922 
00923   /* if not zero goto step 4 */
00924   if (fp_iszero (&u) == FP_NO) {
00925     goto top;
00926   }
00927 
00928   /* now a = C, b = D, gcd == g*v */
00929 
00930   /* if v != 1 then there is no inverse */
00931   if (fp_cmp_d (&v, 1) != FP_EQ) {
00932     return FP_VAL;
00933   }
00934 
00935   /* b is now the inverse */
00936   neg = a->sign;
00937   while (D.sign == FP_NEG) {
00938     fp_add (&D, b, &D);
00939   }
00940   fp_copy (&D, c);
00941   c->sign = neg;
00942   return FP_OKAY;
00943 }
00944 
00945 /* d = a * b (mod c) */
00946 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00947 {
00948   fp_int tmp;
00949   fp_zero(&tmp);
00950   fp_mul(a, b, &tmp);
00951   return fp_mod(&tmp, c, d);
00952 }
00953 
00954 #ifdef TFM_TIMING_RESISTANT
00955 
00956 /* timing resistant montgomery ladder based exptmod 
00957 
00958    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
00959 */
00960 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
00961 {
00962   fp_int   R[2];
00963   fp_digit buf, mp;
00964   int      err, bitcnt, digidx, y;
00965 
00966   /* now setup montgomery  */
00967   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
00968      return err;
00969   }
00970 
00971   fp_init(&R[0]);   
00972   fp_init(&R[1]);   
00973    
00974   /* now we need R mod m */
00975   fp_montgomery_calc_normalization (&R[0], P);
00976 
00977   /* now set R[0][1] to G * R mod m */
00978   if (fp_cmp_mag(P, G) != FP_GT) {
00979      /* G > P so we reduce it first */
00980      fp_mod(G, P, &R[1]);
00981   } else {
00982      fp_copy(G, &R[1]);
00983   }
00984   fp_mulmod (&R[1], &R[0], P, &R[1]);
00985 
00986   /* for j = t-1 downto 0 do
00987         r_!k = R0*R1; r_k = r_k^2
00988   */
00989   
00990   /* set initial mode and bit cnt */
00991   bitcnt = 1;
00992   buf    = 0;
00993   digidx = X->used - 1;
00994 
00995   for (;;) {
00996     /* grab next digit as required */
00997     if (--bitcnt == 0) {
00998       /* if digidx == -1 we are out of digits so break */
00999       if (digidx == -1) {
01000         break;
01001       }
01002       /* read next digit and reset bitcnt */
01003       buf    = X->dp[digidx--];
01004       bitcnt = (int)DIGIT_BIT;
01005     }
01006 
01007     /* grab the next msb from the exponent */
01008     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01009     buf <<= (fp_digit)1;
01010 
01011     /* do ops */
01012     fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
01013     fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
01014   }
01015 
01016    fp_montgomery_reduce(&R[0], P, mp);
01017    fp_copy(&R[0], Y);
01018    return FP_OKAY;
01019 }   
01020 
01021 #else
01022 
01023 /* y = g**x (mod b) 
01024  * Some restrictions... x must be positive and < b
01025  */
01026 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01027 {
01028   fp_int   M[64], res;
01029   fp_digit buf, mp;
01030   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01031 
01032   /* find window size */
01033   x = fp_count_bits (X);
01034   if (x <= 21) {
01035     winsize = 1;
01036   } else if (x <= 36) {
01037     winsize = 3;
01038   } else if (x <= 140) {
01039     winsize = 4;
01040   } else if (x <= 450) {
01041     winsize = 5;
01042   } else {
01043     winsize = 6;
01044   } 
01045 
01046   /* init M array */
01047   XMEMSET(M, 0, sizeof(M)); 
01048 
01049   /* now setup montgomery  */
01050   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01051      return err;
01052   }
01053 
01054   /* setup result */
01055   fp_init(&res);
01056 
01057   /* create M table
01058    *
01059    * The M table contains powers of the input base, e.g. M[x] = G^x mod P
01060    *
01061    * The first half of the table is not computed though accept for M[0] and M[1]
01062    */
01063 
01064    /* now we need R mod m */
01065    fp_montgomery_calc_normalization (&res, P);
01066 
01067    /* now set M[1] to G * R mod m */
01068    if (fp_cmp_mag(P, G) != FP_GT) {
01069       /* G > P so we reduce it first */
01070       fp_mod(G, P, &M[1]);
01071    } else {
01072       fp_copy(G, &M[1]);
01073    }
01074    fp_mulmod (&M[1], &res, P, &M[1]);
01075 
01076   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
01077   fp_copy (&M[1], &M[1 << (winsize - 1)]);
01078   for (x = 0; x < (winsize - 1); x++) {
01079     fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
01080     fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
01081   }
01082 
01083   /* create upper table */
01084   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01085     fp_mul(&M[x - 1], &M[1], &M[x]);
01086     fp_montgomery_reduce(&M[x], P, mp);
01087   }
01088 
01089   /* set initial mode and bit cnt */
01090   mode   = 0;
01091   bitcnt = 1;
01092   buf    = 0;
01093   digidx = X->used - 1;
01094   bitcpy = 0;
01095   bitbuf = 0;
01096 
01097   for (;;) {
01098     /* grab next digit as required */
01099     if (--bitcnt == 0) {
01100       /* if digidx == -1 we are out of digits so break */
01101       if (digidx == -1) {
01102         break;
01103       }
01104       /* read next digit and reset bitcnt */
01105       buf    = X->dp[digidx--];
01106       bitcnt = (int)DIGIT_BIT;
01107     }
01108 
01109     /* grab the next msb from the exponent */
01110     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01111     buf <<= (fp_digit)1;
01112 
01113     /* if the bit is zero and mode == 0 then we ignore it
01114      * These represent the leading zero bits before the first 1 bit
01115      * in the exponent.  Technically this opt is not required but it
01116      * does lower the # of trivial squaring/reductions used
01117      */
01118     if (mode == 0 && y == 0) {
01119       continue;
01120     }
01121 
01122     /* if the bit is zero and mode == 1 then we square */
01123     if (mode == 1 && y == 0) {
01124       fp_sqr(&res, &res);
01125       fp_montgomery_reduce(&res, P, mp);
01126       continue;
01127     }
01128 
01129     /* else we add it to the window */
01130     bitbuf |= (y << (winsize - ++bitcpy));
01131     mode    = 2;
01132 
01133     if (bitcpy == winsize) {
01134       /* ok window is filled so square as required and multiply  */
01135       /* square first */
01136       for (x = 0; x < winsize; x++) {
01137         fp_sqr(&res, &res);
01138         fp_montgomery_reduce(&res, P, mp);
01139       }
01140 
01141       /* then multiply */
01142       fp_mul(&res, &M[bitbuf], &res);
01143       fp_montgomery_reduce(&res, P, mp);
01144 
01145       /* empty window and reset */
01146       bitcpy = 0;
01147       bitbuf = 0;
01148       mode   = 1;
01149     }
01150   }
01151 
01152   /* if bits remain then square/multiply */
01153   if (mode == 2 && bitcpy > 0) {
01154     /* square then multiply if the bit is set */
01155     for (x = 0; x < bitcpy; x++) {
01156       fp_sqr(&res, &res);
01157       fp_montgomery_reduce(&res, P, mp);
01158 
01159       /* get next bit of the window */
01160       bitbuf <<= 1;
01161       if ((bitbuf & (1 << winsize)) != 0) {
01162         /* then multiply */
01163         fp_mul(&res, &M[1], &res);
01164         fp_montgomery_reduce(&res, P, mp);
01165       }
01166     }
01167   }
01168 
01169   /* fixup result if Montgomery reduction is used
01170    * recall that any value in a Montgomery system is
01171    * actually multiplied by R mod n.  So we have
01172    * to reduce one more time to cancel out the factor
01173    * of R.
01174    */
01175   fp_montgomery_reduce(&res, P, mp);
01176 
01177   /* swap res with Y */
01178   fp_copy (&res, Y);
01179   return FP_OKAY;
01180 }
01181 
01182 #endif
01183 
01184 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01185 {
01186    fp_int tmp;
01187    int    err;
01188   
01189    /* prevent overflows */
01190    if (P->used > (FP_SIZE/2)) {
01191       return FP_VAL;
01192    }
01193 
01194    /* is X negative?  */
01195    if (X->sign == FP_NEG) {
01196       /* yes, copy G and invmod it */
01197       fp_copy(G, &tmp);
01198       if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
01199          return err;
01200       }
01201       X->sign = FP_ZPOS;
01202       err =  _fp_exptmod(&tmp, X, P, Y);
01203       if (X != Y) {
01204          X->sign = FP_NEG;
01205       }
01206       return err;
01207    } else {
01208       /* Positive exponent so just exptmod */
01209       return _fp_exptmod(G, X, P, Y);
01210    }
01211 }
01212 
01213 /* computes a = 2**b */
01214 void fp_2expt(fp_int *a, int b)
01215 {
01216    int     z;
01217 
01218    /* zero a as per default */
01219    fp_zero (a);
01220 
01221    if (b < 0) { 
01222       return;
01223    }
01224 
01225    z = b / DIGIT_BIT;
01226    if (z >= FP_SIZE) {
01227       return; 
01228    }
01229 
01230   /* set the used count of where the bit will go */
01231   a->used = z + 1;
01232 
01233   /* put the single bit in its place */
01234   a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
01235 }
01236 
01237 /* b = a*a  */
01238 void fp_sqr(fp_int *A, fp_int *B)
01239 {
01240     int y = A->used;
01241 
01242     /* call generic if we're out of range */
01243     if (y + y > FP_SIZE) {
01244        fp_sqr_comba(A, B);
01245        return ;
01246     }
01247 
01248 #if defined(TFM_SQR3)
01249         if (y <= 3) {
01250            fp_sqr_comba3(A,B);
01251            return;
01252         }
01253 #endif
01254 #if defined(TFM_SQR4)
01255         if (y == 4) {
01256            fp_sqr_comba4(A,B);
01257            return;
01258         }
01259 #endif
01260 #if defined(TFM_SQR6)
01261         if (y <= 6) {
01262            fp_sqr_comba6(A,B);
01263            return;
01264         }
01265 #endif
01266 #if defined(TFM_SQR7)
01267         if (y == 7) {
01268            fp_sqr_comba7(A,B);
01269            return;
01270         }
01271 #endif
01272 #if defined(TFM_SQR8)
01273         if (y == 8) {
01274            fp_sqr_comba8(A,B);
01275            return;
01276         }
01277 #endif
01278 #if defined(TFM_SQR9)
01279         if (y == 9) {
01280            fp_sqr_comba9(A,B);
01281            return;
01282         }
01283 #endif
01284 #if defined(TFM_SQR12)
01285         if (y <= 12) {
01286            fp_sqr_comba12(A,B);
01287            return;
01288         }
01289 #endif
01290 #if defined(TFM_SQR17)
01291         if (y <= 17) {
01292            fp_sqr_comba17(A,B);
01293            return;
01294         }
01295 #endif
01296 #if defined(TFM_SMALL_SET)
01297         if (y <= 16) {
01298            fp_sqr_comba_small(A,B);
01299            return;
01300         }
01301 #endif
01302 #if defined(TFM_SQR20)
01303         if (y <= 20) {
01304            fp_sqr_comba20(A,B);
01305            return;
01306         }
01307 #endif
01308 #if defined(TFM_SQR24)
01309         if (y <= 24) {
01310            fp_sqr_comba24(A,B);
01311            return;
01312         }
01313 #endif
01314 #if defined(TFM_SQR28)
01315         if (y <= 28) {
01316            fp_sqr_comba28(A,B);
01317            return;
01318         }
01319 #endif
01320 #if defined(TFM_SQR32)
01321         if (y <= 32) {
01322            fp_sqr_comba32(A,B);
01323            return;
01324         }
01325 #endif
01326 #if defined(TFM_SQR48)
01327         if (y <= 48) {
01328            fp_sqr_comba48(A,B);
01329            return;
01330         }
01331 #endif
01332 #if defined(TFM_SQR64)
01333         if (y <= 64) {
01334            fp_sqr_comba64(A,B);
01335            return;
01336         }
01337 #endif
01338        fp_sqr_comba(A, B);
01339 }
01340 
01341 /* generic comba squarer */
01342 void fp_sqr_comba(fp_int *A, fp_int *B)
01343 {
01344   int       pa, ix, iz;
01345   fp_digit  c0, c1, c2;
01346   fp_int    tmp, *dst;
01347 #ifdef TFM_ISO
01348   fp_word   tt;
01349 #endif    
01350 
01351   /* get size of output and trim */
01352   pa = A->used + A->used;
01353   if (pa >= FP_SIZE) {
01354      pa = FP_SIZE-1;
01355   }
01356 
01357   /* number of output digits to produce */
01358   COMBA_START;
01359   COMBA_CLEAR;
01360 
01361   if (A == B) {
01362      fp_zero(&tmp);
01363      dst = &tmp;
01364   } else {
01365      fp_zero(B);
01366      dst = B;
01367   }
01368 
01369   for (ix = 0; ix < pa; ix++) { 
01370       int      tx, ty, iy;
01371       fp_digit *tmpy, *tmpx;
01372 
01373       /* get offsets into the two bignums */
01374       ty = MIN(A->used-1, ix);
01375       tx = ix - ty;
01376 
01377       /* setup temp aliases */
01378       tmpx = A->dp + tx;
01379       tmpy = A->dp + ty;
01380 
01381       /* this is the number of times the loop will iterrate,
01382          while (tx++ < a->used && ty-- >= 0) { ... }
01383        */
01384       iy = MIN(A->used-tx, ty+1);
01385 
01386       /* now for squaring tx can never equal ty 
01387        * we halve the distance since they approach 
01388        * at a rate of 2x and we have to round because 
01389        * odd cases need to be executed
01390        */
01391       iy = MIN(iy, (ty-tx+1)>>1);
01392 
01393       /* forward carries */
01394       COMBA_FORWARD;
01395 
01396       /* execute loop */
01397       for (iz = 0; iz < iy; iz++) {
01398           SQRADD2(*tmpx++, *tmpy--);
01399       }
01400 
01401       /* even columns have the square term in them */
01402       if ((ix&1) == 0) {
01403           /* TAO change COMBA_ADD back to SQRADD */
01404           SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
01405       }
01406 
01407       /* store it */
01408       COMBA_STORE(dst->dp[ix]);
01409   }
01410 
01411   COMBA_FINI;
01412 
01413   /* setup dest */
01414   dst->used = pa;
01415   fp_clamp (dst);
01416   if (dst != B) {
01417      fp_copy(dst, B);
01418   }
01419 }
01420 
01421 int fp_cmp(fp_int *a, fp_int *b)
01422 {
01423    if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
01424       return FP_LT;
01425    } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
01426       return FP_GT;
01427    } else {
01428       /* compare digits */
01429       if (a->sign == FP_NEG) {
01430          /* if negative compare opposite direction */
01431          return fp_cmp_mag(b, a);
01432       } else {
01433          return fp_cmp_mag(a, b);
01434       }
01435    }
01436 }
01437 
01438 /* compare against a single digit */
01439 int fp_cmp_d(fp_int *a, fp_digit b)
01440 {
01441   /* compare based on sign */
01442   if ((b && a->used == 0) || a->sign == FP_NEG) {
01443     return FP_LT;
01444   }
01445 
01446   /* compare based on magnitude */
01447   if (a->used > 1) {
01448     return FP_GT;
01449   }
01450 
01451   /* compare the only digit of a to b */
01452   if (a->dp[0] > b) {
01453     return FP_GT;
01454   } else if (a->dp[0] < b) {
01455     return FP_LT;
01456   } else {
01457     return FP_EQ;
01458   }
01459 
01460 }
01461 
01462 int fp_cmp_mag(fp_int *a, fp_int *b)
01463 {
01464    int x;
01465 
01466    if (a->used > b->used) {
01467       return FP_GT;
01468    } else if (a->used < b->used) {
01469       return FP_LT;
01470    } else {
01471       for (x = a->used - 1; x >= 0; x--) {
01472           if (a->dp[x] > b->dp[x]) {
01473              return FP_GT;
01474           } else if (a->dp[x] < b->dp[x]) {
01475              return FP_LT;
01476           }
01477       }
01478    }
01479    return FP_EQ;
01480 }
01481 
01482 /* setups the montgomery reduction */
01483 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
01484 {
01485   fp_digit x, b;
01486 
01487 /* fast inversion mod 2**k
01488  *
01489  * Based on the fact that
01490  *
01491  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01492  *                    =>  2*X*A - X*X*A*A = 1
01493  *                    =>  2*(1) - (1)     = 1
01494  */
01495   b = a->dp[0];
01496 
01497   if ((b & 1) == 0) {
01498     return FP_VAL;
01499   }
01500 
01501   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01502   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01503   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01504   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01505 #ifdef FP_64BIT
01506   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
01507 #endif
01508 
01509   /* rho = -1/m mod b */
01510   *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
01511 
01512   return FP_OKAY;
01513 }
01514 
01515 /* computes a = B**n mod b without division or multiplication useful for
01516  * normalizing numbers in a Montgomery system.
01517  */
01518 void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
01519 {
01520   int     x, bits;
01521 
01522   /* how many bits of last digit does b use */
01523   bits = fp_count_bits (b) % DIGIT_BIT;
01524   if (!bits) bits = DIGIT_BIT;
01525 
01526   /* compute A = B^(n-1) * 2^(bits-1) */
01527   if (b->used > 1) {
01528      fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
01529   } else {
01530      fp_set(a, 1);
01531      bits = 1;
01532   }
01533 
01534   /* now compute C = A * B mod b */
01535   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
01536     fp_mul_2 (a, a);
01537     if (fp_cmp_mag (a, b) != FP_LT) {
01538       s_fp_sub (a, b, a);
01539     }
01540   }
01541 }
01542 
01543 
01544 #ifdef TFM_SMALL_MONT_SET
01545     #include "fp_mont_small.i"
01546 #endif
01547 
01548 /* computes x/R == x (mod N) via Montgomery Reduction */
01549 void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
01550 {
01551    fp_digit c[FP_SIZE], *_c, *tmpm, mu = 0;
01552    int      oldused, x, y, pa;
01553 
01554    /* bail if too large */
01555    if (m->used > (FP_SIZE/2)) {
01556       (void)mu;                     /* shut up compiler */
01557       return;
01558    }
01559 
01560 #ifdef TFM_SMALL_MONT_SET
01561    if (m->used <= 16) {
01562       fp_montgomery_reduce_small(a, m, mp);
01563       return;
01564    }
01565 #endif
01566 
01567 
01568 #if defined(USE_MEMSET)
01569    /* now zero the buff */
01570    XMEMSET(c, 0, sizeof c);
01571 #endif
01572    pa = m->used;
01573 
01574    /* copy the input */
01575    oldused = a->used;
01576    for (x = 0; x < oldused; x++) {
01577        c[x] = a->dp[x];
01578    }
01579 #if !defined(USE_MEMSET)
01580    for (; x < 2*pa+1; x++) {
01581        c[x] = 0;
01582    }
01583 #endif
01584    MONT_START;
01585 
01586    for (x = 0; x < pa; x++) {
01587        fp_digit cy = 0;
01588        /* get Mu for this round */
01589        LOOP_START;
01590        _c   = c + x;
01591        tmpm = m->dp;
01592        y = 0;
01593        #if (defined(TFM_SSE2) || defined(TFM_X86_64))
01594         for (; y < (pa & ~7); y += 8) {
01595               INNERMUL8;
01596               _c   += 8;
01597               tmpm += 8;
01598            }
01599        #endif
01600 
01601        for (; y < pa; y++) {
01602           INNERMUL;
01603           ++_c;
01604        }
01605        LOOP_END;
01606        while (cy) {
01607            PROPCARRY;
01608            ++_c;
01609        }
01610   }         
01611 
01612   /* now copy out */
01613   _c   = c + pa;
01614   tmpm = a->dp;
01615   for (x = 0; x < pa+1; x++) {
01616      *tmpm++ = *_c++;
01617   }
01618 
01619   for (; x < oldused; x++)   {
01620      *tmpm++ = 0;
01621   }
01622 
01623   MONT_FINI;
01624 
01625   a->used = pa+1;
01626   fp_clamp(a);
01627   
01628   /* if A >= m then A = A - m */
01629   if (fp_cmp_mag (a, m) != FP_LT) {
01630     s_fp_sub (a, m, a);
01631   }
01632 }
01633 
01634 void fp_read_unsigned_bin(fp_int *a, unsigned char *b, int c)
01635 {
01636   /* zero the int */
01637   fp_zero (a);
01638 
01639   /* If we know the endianness of this architecture, and we're using
01640      32-bit fp_digits, we can optimize this */
01641 #if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(FP_64BIT)
01642   /* But not for both simultaneously */
01643 #if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
01644 #error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
01645 #endif
01646   {
01647      unsigned char *pd = (unsigned char *)a->dp;
01648 
01649      if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
01650         int excess = c - (FP_SIZE * sizeof(fp_digit));
01651         c -= excess;
01652         b += excess;
01653      }
01654      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
01655      /* read the bytes in */
01656 #ifdef ENDIAN_BIG
01657      {
01658        /* Use Duff's device to unroll the loop. */
01659        int idx = (c - 1) & ~3;
01660        switch (c % 4) {
01661        case 0:    do { pd[idx+0] = *b++;
01662        case 3:         pd[idx+1] = *b++;
01663        case 2:         pd[idx+2] = *b++;
01664        case 1:         pd[idx+3] = *b++;
01665                      idx -= 4;
01666                  } while ((c -= 4) > 0);
01667        }
01668      }
01669 #else
01670      for (c -= 1; c >= 0; c -= 1) {
01671        pd[c] = *b++;
01672      }
01673 #endif
01674   }
01675 #else
01676   /* read the bytes in */
01677   for (; c > 0; c--) {
01678      fp_mul_2d (a, 8, a);
01679      a->dp[0] |= *b++;
01680      a->used += 1;
01681   }
01682 #endif
01683   fp_clamp (a);
01684 }
01685 
01686 void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
01687 {
01688   int     x;
01689   fp_int  t;
01690 
01691   fp_init_copy(&t, a);
01692 
01693   x = 0;
01694   while (fp_iszero (&t) == FP_NO) {
01695       b[x++] = (unsigned char) (t.dp[0] & 255);
01696       fp_div_2d (&t, 8, &t, NULL);
01697   }
01698   fp_reverse (b, x);
01699 }
01700 
01701 int fp_unsigned_bin_size(fp_int *a)
01702 {
01703   int     size = fp_count_bits (a);
01704   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
01705 }
01706 
01707 void fp_set(fp_int *a, fp_digit b)
01708 {
01709    fp_zero(a);
01710    a->dp[0] = b;
01711    a->used  = a->dp[0] ? 1 : 0;
01712 }
01713 
01714 int fp_count_bits (fp_int * a)
01715 {
01716   int     r;
01717   fp_digit q;
01718 
01719   /* shortcut */
01720   if (a->used == 0) {
01721     return 0;
01722   }
01723 
01724   /* get number of digits and add that */
01725   r = (a->used - 1) * DIGIT_BIT;
01726 
01727   /* take the last digit and count the bits in it */
01728   q = a->dp[a->used - 1];
01729   while (q > ((fp_digit) 0)) {
01730     ++r;
01731     q >>= ((fp_digit) 1);
01732   }
01733   return r;
01734 }
01735 
01736 void fp_lshd(fp_int *a, int x)
01737 {
01738    int y;
01739 
01740    /* move up and truncate as required */
01741    y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
01742 
01743    /* store new size */
01744    a->used = y + 1;
01745 
01746    /* move digits */
01747    for (; y >= x; y--) {
01748        a->dp[y] = a->dp[y-x];
01749    }
01750  
01751    /* zero lower digits */
01752    for (; y >= 0; y--) {
01753        a->dp[y] = 0;
01754    }
01755 
01756    /* clamp digits */
01757    fp_clamp(a);
01758 }
01759 
01760 void fp_rshd(fp_int *a, int x)
01761 {
01762   int y;
01763 
01764   /* too many digits just zero and return */
01765   if (x >= a->used) {
01766      fp_zero(a);
01767      return;
01768   }
01769 
01770    /* shift */
01771    for (y = 0; y < a->used - x; y++) {
01772       a->dp[y] = a->dp[y+x];
01773    }
01774 
01775    /* zero rest */
01776    for (; y < a->used; y++) {
01777       a->dp[y] = 0;
01778    }
01779    
01780    /* decrement count */
01781    a->used -= x;
01782    fp_clamp(a);
01783 }
01784 
01785 /* reverse an array, used for radix code */
01786 void fp_reverse (unsigned char *s, int len)
01787 {
01788   int     ix, iy;
01789   unsigned char t;
01790 
01791   ix = 0;
01792   iy = len - 1;
01793   while (ix < iy) {
01794     t     = s[ix];
01795     s[ix] = s[iy];
01796     s[iy] = t;
01797     ++ix;
01798     --iy;
01799   }
01800 }
01801 
01802 
01803 /* c = a - b */
01804 void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
01805 {
01806    fp_int tmp;
01807    fp_set(&tmp, b);
01808    fp_sub(a, &tmp, c);
01809 }
01810 
01811 
01812 /* CyaSSL callers from normal lib */
01813 
01814 /* init a new mp_int */
01815 int mp_init (mp_int * a)
01816 {
01817   if (a)
01818     fp_init(a);
01819   return MP_OKAY;
01820 }
01821 
01822 /* clear one (frees)  */
01823 void mp_clear (mp_int * a)
01824 {
01825   fp_zero(a);
01826 }
01827 
01828 /* handle up to 6 inits */
01829 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, mp_int* f)
01830 {
01831     if (a)
01832         fp_init(a);
01833     if (b)
01834         fp_init(b);
01835     if (c)
01836         fp_init(c);
01837     if (d)
01838         fp_init(d);
01839     if (e)
01840         fp_init(e);
01841     if (f)
01842         fp_init(f);
01843 
01844     return MP_OKAY;
01845 }
01846 
01847 /* high level addition (handles signs) */
01848 int mp_add (mp_int * a, mp_int * b, mp_int * c)
01849 {
01850   fp_add(a, b, c);
01851   return MP_OKAY;
01852 }
01853 
01854 /* high level subtraction (handles signs) */
01855 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
01856 {
01857   fp_sub(a, b, c);
01858   return MP_OKAY;
01859 }
01860 
01861 /* high level multiplication (handles sign) */
01862 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
01863 {
01864   fp_mul(a, b, c);
01865   return MP_OKAY;
01866 }
01867 
01868 /* d = a * b (mod c) */
01869 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01870 {
01871   return fp_mulmod(a, b, c, d);
01872 }
01873 
01874 /* c = a mod b, 0 <= c < b */
01875 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
01876 {
01877   return fp_mod (a, b, c);
01878 }
01879 
01880 /* hac 14.61, pp608 */
01881 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
01882 {
01883   return fp_invmod(a, b, c);
01884 }
01885 
01886 /* this is a shell function that calls either the normal or Montgomery
01887  * exptmod functions.  Originally the call to the montgomery code was
01888  * embedded in the normal function but that wasted alot of stack space
01889  * for nothing (since 99% of the time the Montgomery code would be called)
01890  */
01891 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
01892 {
01893   return fp_exptmod(G, X, P, Y);
01894 }
01895 
01896 /* compare two ints (signed)*/
01897 int mp_cmp (mp_int * a, mp_int * b)
01898 {
01899   return fp_cmp(a, b);
01900 }
01901 
01902 /* compare a digit */
01903 int mp_cmp_d(mp_int * a, mp_digit b)
01904 {
01905   return fp_cmp_d(a, b);
01906 }
01907 
01908 /* get the size for an unsigned equivalent */
01909 int mp_unsigned_bin_size (mp_int * a)
01910 {
01911   return fp_unsigned_bin_size(a);
01912 }
01913 
01914 /* store in unsigned [big endian] format */
01915 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
01916 {
01917   fp_to_unsigned_bin(a,b);
01918   return MP_OKAY;
01919 }
01920 
01921 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
01922 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
01923 {
01924   fp_read_unsigned_bin(a, (unsigned char *)b, c);
01925   return MP_OKAY;
01926 }
01927 
01928 
01929 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
01930 {
01931     fp_sub_d(a, b, c);
01932     return MP_OKAY;
01933 }
01934 
01935 
01936 /* fast math conversion */
01937 int mp_copy(fp_int* a, fp_int* b)
01938 {
01939     fp_copy(a, b);
01940     return MP_OKAY;
01941 }
01942 
01943 
01944 /* fast math conversion */
01945 int mp_isodd(mp_int* a)
01946 {
01947     return fp_isodd(a);
01948 }
01949 
01950 
01951 /* fast math conversion */
01952 int mp_iszero(mp_int* a)
01953 {
01954     return fp_iszero(a);
01955 }
01956 
01957 
01958 /* fast math conversion */
01959 int mp_count_bits (mp_int* a)
01960 {
01961     return fp_count_bits(a);
01962 }
01963 
01964 
01965 /* fast math wrappers */
01966 int mp_set_int(fp_int *a, fp_digit b)
01967 {
01968     fp_set(a, b);
01969     return MP_OKAY;
01970 }
01971 
01972 
01973 #if defined(CYASSL_KEY_GEN) || defined (HAVE_ECC)
01974 
01975 /* c = a * a (mod b) */
01976 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
01977 {
01978   fp_int tmp;
01979   fp_zero(&tmp);
01980   fp_sqr(a, &tmp);
01981   return fp_mod(&tmp, b, c);
01982 }
01983 
01984 /* fast math conversion */
01985 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
01986 {
01987     return fp_sqrmod(a, b, c);
01988 }
01989 
01990 /* fast math conversion */
01991 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
01992 {
01993     fp_montgomery_calc_normalization(a, b);
01994     return MP_OKAY;
01995 }
01996 
01997 #endif /* CYASSL_KEYGEN || HAVE_ECC */
01998 
01999 
02000 #ifdef CYASSL_KEY_GEN
02001 
02002 void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
02003 void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
02004 int  fp_isprime(fp_int *a);
02005 int  fp_cnt_lsb(fp_int *a);
02006 
02007 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
02008 {
02009     fp_gcd(a, b, c);
02010     return MP_OKAY;
02011 }
02012 
02013 
02014 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
02015 {
02016     fp_lcm(a, b, c);
02017     return MP_OKAY;
02018 }
02019 
02020 
02021 int mp_prime_is_prime(mp_int* a, int t, int* result)
02022 {
02023     (void)t;
02024     *result = fp_isprime(a);
02025     return MP_OKAY;
02026 }
02027 
02028 
02029 
02030 static int s_is_power_of_two(fp_digit b, int *p)
02031 {
02032    int x;
02033 
02034    /* fast return if no power of two */
02035    if ((b==0) || (b & (b-1))) {
02036       return 0;
02037    }
02038 
02039    for (x = 0; x < DIGIT_BIT; x++) {
02040       if (b == (((fp_digit)1)<<x)) {
02041          *p = x;
02042          return 1;
02043       }
02044    }
02045    return 0;
02046 }
02047 
02048 /* a/b => cb + d == a */
02049 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
02050 {
02051   fp_int   q;
02052   fp_word  w;
02053   fp_digit t;
02054   int      ix;
02055 
02056   /* cannot divide by zero */
02057   if (b == 0) {
02058      return FP_VAL;
02059   }
02060 
02061   /* quick outs */
02062   if (b == 1 || fp_iszero(a) == 1) {
02063      if (d != NULL) {
02064         *d = 0;
02065      }
02066      if (c != NULL) {
02067         fp_copy(a, c);
02068      }
02069      return FP_OKAY;
02070   }
02071 
02072   /* power of two ? */
02073   if (s_is_power_of_two(b, &ix) == 1) {
02074      if (d != NULL) {
02075         *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
02076      }
02077      if (c != NULL) {
02078         fp_div_2d(a, ix, c, NULL);
02079      }
02080      return FP_OKAY;
02081   }
02082 
02083   /* no easy answer [c'est la vie].  Just division */
02084   fp_init(&q);
02085   
02086   q.used = a->used;
02087   q.sign = a->sign;
02088   w = 0;
02089   for (ix = a->used - 1; ix >= 0; ix--) {
02090      w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
02091      
02092      if (w >= b) {
02093         t = (fp_digit)(w / b);
02094         w -= ((fp_word)t) * ((fp_word)b);
02095       } else {
02096         t = 0;
02097       }
02098       q.dp[ix] = (fp_digit)t;
02099   }
02100   
02101   if (d != NULL) {
02102      *d = (fp_digit)w;
02103   }
02104   
02105   if (c != NULL) {
02106      fp_clamp(&q);
02107      fp_copy(&q, c);
02108   }
02109  
02110   return FP_OKAY;
02111 }
02112 
02113 
02114 /* c = a mod b, 0 <= c < b  */
02115 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02116 {
02117    return fp_div_d(a, b, NULL, c);
02118 }
02119 
02120 
02121 /* Miller-Rabin test of "a" to the base of "b" as described in 
02122  * HAC pp. 139 Algorithm 4.24
02123  *
02124  * Sets result to 0 if definitely composite or 1 if probably prime.
02125  * Randomly the chance of error is no more than 1/4 and often 
02126  * very much lower.
02127  */
02128 static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
02129 {
02130   fp_int  n1, y, r;
02131   int     s, j;
02132 
02133   /* default */
02134   *result = FP_NO;
02135 
02136   /* ensure b > 1 */
02137   if (fp_cmp_d(b, 1) != FP_GT) {
02138      return;
02139   }     
02140 
02141   /* get n1 = a - 1 */
02142   fp_init_copy(&n1, a);
02143   fp_sub_d(&n1, 1, &n1);
02144 
02145   /* set 2**s * r = n1 */
02146   fp_init_copy(&r, &n1);
02147 
02148   /* count the number of least significant bits
02149    * which are zero
02150    */
02151   s = fp_cnt_lsb(&r);
02152 
02153   /* now divide n - 1 by 2**s */
02154   fp_div_2d (&r, s, &r, NULL);
02155 
02156   /* compute y = b**r mod a */
02157   fp_init(&y);
02158   fp_exptmod(b, &r, a, &y);
02159 
02160   /* if y != 1 and y != n1 do */
02161   if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
02162     j = 1;
02163     /* while j <= s-1 and y != n1 */
02164     while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
02165       fp_sqrmod (&y, a, &y);
02166 
02167       /* if y == 1 then composite */
02168       if (fp_cmp_d (&y, 1) == FP_EQ) {
02169          return;
02170       }
02171       ++j;
02172     }
02173 
02174     /* if y != n1 then composite */
02175     if (fp_cmp (&y, &n1) != FP_EQ) {
02176        return;
02177     }
02178   }
02179 
02180   /* probably prime now */
02181   *result = FP_YES;
02182 }
02183 
02184 
02185 /* a few primes */
02186 static const fp_digit primes[256] = {
02187   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
02188   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
02189   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
02190   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
02191   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
02192   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
02193   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
02194   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
02195 
02196   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
02197   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
02198   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
02199   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
02200   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
02201   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
02202   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
02203   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
02204 
02205   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
02206   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
02207   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
02208   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
02209   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
02210   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
02211   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
02212   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
02213 
02214   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
02215   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
02216   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
02217   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
02218   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
02219   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
02220   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
02221   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
02222 };
02223 
02224 int fp_isprime(fp_int *a)
02225 {
02226    fp_int   b;
02227    fp_digit d = 0;
02228    int      r, res;
02229 
02230    /* do trial division */
02231    for (r = 0; r < 256; r++) {
02232        fp_mod_d(a, primes[r], &d);
02233        if (d == 0) {
02234           return FP_NO;
02235        }
02236    }
02237 
02238    /* now do 8 miller rabins */
02239    fp_init(&b);
02240    for (r = 0; r < 8; r++) {
02241        fp_set(&b, primes[r]);
02242        fp_prime_miller_rabin(a, &b, &res);
02243        if (res == FP_NO) {
02244           return FP_NO;
02245        }
02246    }
02247    return FP_YES;
02248 }
02249 
02250 
02251 /* c = [a, b] */
02252 void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
02253 {
02254    fp_int  t1, t2;
02255 
02256    fp_init(&t1);
02257    fp_init(&t2);
02258    fp_gcd(a, b, &t1);
02259    if (fp_cmp_mag(a, b) == FP_GT) {
02260       fp_div(a, &t1, &t2, NULL);
02261       fp_mul(b, &t2, c);
02262    } else {
02263       fp_div(b, &t1, &t2, NULL);
02264       fp_mul(a, &t2, c);
02265    }   
02266 }
02267 
02268 
02269 static const int lnz[16] = {
02270    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
02271 };
02272 
02273 /* Counts the number of lsbs which are zero before the first zero bit */
02274 int fp_cnt_lsb(fp_int *a)
02275 {
02276    int x;
02277    fp_digit q, qq;
02278 
02279    /* easy out */
02280    if (fp_iszero(a) == 1) {
02281       return 0;
02282    }
02283 
02284    /* scan lower digits until non-zero */
02285    for (x = 0; x < a->used && a->dp[x] == 0; x++);
02286    q = a->dp[x];
02287    x *= DIGIT_BIT;
02288 
02289    /* now scan this digit until a 1 is found */
02290    if ((q & 1) == 0) {
02291       do {
02292          qq  = q & 15;
02293          x  += lnz[qq];
02294          q >>= 4;
02295       } while (qq == 0);
02296    }
02297    return x;
02298 }
02299 
02300 
02301 /* c = (a, b) */
02302 void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
02303 {
02304    fp_int u, v, r;
02305 
02306    /* either zero than gcd is the largest */
02307    if (fp_iszero (a) == 1 && fp_iszero (b) == 0) {
02308      fp_abs (b, c);
02309      return;
02310    }
02311    if (fp_iszero (a) == 0 && fp_iszero (b) == 1) {
02312      fp_abs (a, c);
02313      return;
02314    }
02315 
02316    /* optimized.  At this point if a == 0 then
02317     * b must equal zero too
02318     */
02319    if (fp_iszero (a) == 1) {
02320      fp_zero(c);
02321      return;
02322    }
02323 
02324    /* sort inputs */
02325    if (fp_cmp_mag(a, b) != FP_LT) {
02326       fp_init_copy(&u, a);
02327       fp_init_copy(&v, b);
02328    } else {
02329       fp_init_copy(&u, b);
02330       fp_init_copy(&v, a);
02331    }
02332  
02333    fp_zero(&r);
02334    while (fp_iszero(&v) == FP_NO) {
02335       fp_mod(&u, &v, &r);
02336       fp_copy(&v, &u);
02337       fp_copy(&r, &v);
02338    }
02339    fp_copy(&u, c);
02340 }
02341 
02342 #endif /* CYASSL_KEY_GEN */
02343 
02344 
02345 #if defined(HAVE_ECC) || !defined(NO_PWDBASED)
02346 /* c = a + b */
02347 void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
02348 {
02349    fp_int tmp;
02350    fp_set(&tmp, b);
02351    fp_add(a,&tmp,c);
02352 }
02353 
02354 /* external compatibility */
02355 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
02356 {
02357     fp_add_d(a, b, c);
02358     return MP_OKAY;
02359 }
02360 
02361 #endif  /* HAVE_ECC || !NO_PWDBASED */
02362 
02363 
02364 #ifdef HAVE_ECC
02365 
02366 /* chars used in radix conversions */
02367 const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
02368 
02369 static int fp_read_radix(fp_int *a, const char *str, int radix)
02370 {
02371   int     y, neg;
02372   char    ch;
02373 
02374   /* make sure the radix is ok */
02375   if (radix < 2 || radix > 64) {
02376     return FP_VAL;
02377   }
02378 
02379   /* if the leading digit is a
02380    * minus set the sign to negative.
02381    */
02382   if (*str == '-') {
02383     ++str;
02384     neg = FP_NEG;
02385   } else {
02386     neg = FP_ZPOS;
02387   }
02388 
02389   /* set the integer to the default of zero */
02390   fp_zero (a);
02391 
02392   /* process each digit of the string */
02393   while (*str) {
02394     /* if the radix < 36 the conversion is case insensitive
02395      * this allows numbers like 1AB and 1ab to represent the same  value
02396      * [e.g. in hex]
02397      */
02398     ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
02399     for (y = 0; y < 64; y++) {
02400       if (ch == fp_s_rmap[y]) {
02401          break;
02402       }
02403     }
02404 
02405     /* if the char was found in the map
02406      * and is less than the given radix add it
02407      * to the number, otherwise exit the loop.
02408      */
02409     if (y < radix) {
02410       fp_mul_d (a, (fp_digit) radix, a);
02411       fp_add_d (a, (fp_digit) y, a);
02412     } else {
02413       break;
02414     }
02415     ++str;
02416   }
02417 
02418   /* set the sign only if a != 0 */
02419   if (fp_iszero(a) != FP_YES) {
02420      a->sign = neg;
02421   }
02422   return FP_OKAY;
02423 }
02424 
02425 /* fast math conversion */
02426 int mp_read_radix(mp_int *a, const char *str, int radix)
02427 {
02428     return fp_read_radix(a, str, radix);
02429 }
02430 
02431 /* fast math conversion */
02432 int mp_set(fp_int *a, fp_digit b)
02433 {
02434     fp_set(a,b);
02435     return MP_OKAY;
02436 }
02437 
02438 /* fast math conversion */
02439 int mp_sqr(fp_int *A, fp_int *B)
02440 {
02441     fp_sqr(A, B);
02442     return MP_OKAY;
02443 }
02444   
02445 /* fast math conversion */
02446 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
02447 {
02448     fp_montgomery_reduce(a, m, mp);
02449     return MP_OKAY;
02450 }
02451 
02452 
02453 /* fast math conversion */
02454 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
02455 {
02456     return fp_montgomery_setup(a, rho);
02457 }
02458 
02459 int mp_div_2(fp_int * a, fp_int * b)
02460 {
02461     fp_div_2(a, b);
02462     return MP_OKAY;
02463 }
02464 
02465 
02466 int mp_init_copy(fp_int * a, fp_int * b)
02467 {
02468     fp_init_copy(a, b);
02469     return MP_OKAY;
02470 }
02471 
02472 
02473 
02474 #endif /* HAVE_ECC */
02475 
02476 #endif /* USE_FAST_MATH */