wolf SSL / wolfSSL-TLS13-Beta

Fork of wolfSSL by wolf SSL

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers tfm.c Source File

tfm.c

00001 /* tfm.c
00002  *
00003  * Copyright (C) 2006-2016 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 
00024 /*
00025  * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
00026  * http://math.libtomcrypt.com
00027  */
00028 
00029 /**
00030  *  Edited by Moises Guimaraes (moisesguimaraesm@gmail.com)
00031  *  to fit CyaSSL's needs.
00032  */
00033 
00034 #ifdef HAVE_CONFIG_H
00035     #include <config.h>
00036 #endif
00037 
00038 /* in case user set USE_FAST_MATH there */
00039 #include <wolfssl/wolfcrypt/settings.h>
00040 #ifdef NO_INLINE
00041     #include <wolfssl/wolfcrypt/misc.h>
00042 #else
00043     #define WOLFSSL_MISC_INCLUDED
00044     #include <wolfcrypt/src/misc.c>
00045 #endif
00046 
00047 #ifdef USE_FAST_MATH
00048 
00049 #include <wolfssl/wolfcrypt/random.h>
00050 #include <wolfssl/wolfcrypt/tfm.h>
00051 #include <wolfcrypt/src/asm.c>  /* will define asm MACROS or C ones */
00052 #include <wolfssl/wolfcrypt/wolfmath.h> /* common functions */
00053 
00054 #if defined(FREESCALE_LTC_TFM)
00055     #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
00056 #endif
00057 #ifdef WOLFSSL_DEBUG_MATH
00058     #include <stdio.h>
00059 #endif
00060 
00061 
00062 /* math settings check */
00063 word32 CheckRunTimeSettings(void)
00064 {
00065     return CTC_SETTINGS;
00066 }
00067 
00068 
00069 /* math settings size check */
00070 word32 CheckRunTimeFastMath(void)
00071 {
00072     return FP_SIZE;
00073 }
00074 
00075 
00076 /* Functions */
00077 
00078 void fp_add(fp_int *a, fp_int *b, fp_int *c)
00079 {
00080   int     sa, sb;
00081 
00082   /* get sign of both inputs */
00083   sa = a->sign;
00084   sb = b->sign;
00085 
00086   /* handle two cases, not four */
00087   if (sa == sb) {
00088     /* both positive or both negative */
00089     /* add their magnitudes, copy the sign */
00090     c->sign = sa;
00091     s_fp_add (a, b, c);
00092   } else {
00093     /* one positive, the other negative */
00094     /* subtract the one with the greater magnitude from */
00095     /* the one of the lesser magnitude.  The result gets */
00096     /* the sign of the one with the greater magnitude. */
00097     if (fp_cmp_mag (a, b) == FP_LT) {
00098       c->sign = sb;
00099       s_fp_sub (b, a, c);
00100     } else {
00101       c->sign = sa;
00102       s_fp_sub (a, b, c);
00103     }
00104   }
00105 }
00106 
00107 /* unsigned addition */
00108 void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
00109 {
00110   int      x, y, oldused;
00111   fp_word  t;
00112 
00113   y       = MAX(a->used, b->used);
00114   oldused = MIN(c->used, FP_SIZE);   /* help static analysis w/ largest size */
00115   c->used = y;
00116 
00117   t = 0;
00118   for (x = 0; x < y; x++) {
00119       t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
00120       c->dp[x]   = (fp_digit)t;
00121       t        >>= DIGIT_BIT;
00122   }
00123   if (t != 0 && x < FP_SIZE) {
00124      c->dp[c->used++] = (fp_digit)t;
00125      ++x;
00126   }
00127 
00128   c->used = x;
00129 
00130   /* zero any excess digits on the destination that we didn't write to */
00131   for (; x < oldused; x++) {
00132      c->dp[x] = 0;
00133   }
00134   fp_clamp(c);
00135 }
00136 
00137 /* c = a - b */
00138 void fp_sub(fp_int *a, fp_int *b, fp_int *c)
00139 {
00140   int     sa, sb;
00141 
00142   sa = a->sign;
00143   sb = b->sign;
00144 
00145   if (sa != sb) {
00146     /* subtract a negative from a positive, OR */
00147     /* subtract a positive from a negative. */
00148     /* In either case, ADD their magnitudes, */
00149     /* and use the sign of the first number. */
00150     c->sign = sa;
00151     s_fp_add (a, b, c);
00152   } else {
00153     /* subtract a positive from a positive, OR */
00154     /* subtract a negative from a negative. */
00155     /* First, take the difference between their */
00156     /* magnitudes, then... */
00157     if (fp_cmp_mag (a, b) != FP_LT) {
00158       /* Copy the sign from the first */
00159       c->sign = sa;
00160       /* The first has a larger or equal magnitude */
00161       s_fp_sub (a, b, c);
00162     } else {
00163       /* The result has the *opposite* sign from */
00164       /* the first number. */
00165       c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
00166       /* The second has a larger magnitude */
00167       s_fp_sub (b, a, c);
00168     }
00169   }
00170 }
00171 
00172 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
00173 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
00174 {
00175   int      x, oldbused, oldused;
00176   fp_word  t;
00177 
00178   oldused  = c->used;
00179   oldbused = b->used;
00180   c->used  = a->used;
00181   t       = 0;
00182   for (x = 0; x < oldbused; x++) {
00183      t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
00184      c->dp[x]  = (fp_digit)t;
00185      t         = (t >> DIGIT_BIT)&1;
00186   }
00187   for (; x < a->used; x++) {
00188      t         = ((fp_word)a->dp[x]) - t;
00189      c->dp[x]  = (fp_digit)t;
00190      t         = (t >> DIGIT_BIT)&1;
00191    }
00192 
00193   /* zero any excess digits on the destination that we didn't write to */
00194   for (; x < oldused; x++) {
00195      c->dp[x] = 0;
00196   }
00197   fp_clamp(c);
00198 }
00199 
00200 /* c = a * b */
00201 void fp_mul(fp_int *A, fp_int *B, fp_int *C)
00202 {
00203     int   y, yy, oldused;
00204 
00205     oldused = C->used;
00206 
00207     y  = MAX(A->used, B->used);
00208     yy = MIN(A->used, B->used);
00209 
00210     /* call generic if we're out of range */
00211     if (y + yy > FP_SIZE) {
00212        fp_mul_comba(A, B, C);
00213        goto clean;
00214     }
00215 
00216     /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
00217        of the largest input.  We also want to avoid doing excess mults if the
00218        inputs are not close to the next power of two.  That is, for example,
00219        if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
00220     */
00221 
00222 #if defined(TFM_MUL3) && FP_SIZE >= 6
00223         if (y <= 3) {
00224            fp_mul_comba3(A,B,C);
00225            goto clean;
00226         }
00227 #endif
00228 #if defined(TFM_MUL4) && FP_SIZE >= 8
00229         if (y == 4) {
00230            fp_mul_comba4(A,B,C);
00231            goto clean;
00232         }
00233 #endif
00234 #if defined(TFM_MUL6) && FP_SIZE >= 12
00235         if (y <= 6) {
00236            fp_mul_comba6(A,B,C);
00237            goto clean;
00238         }
00239 #endif
00240 #if defined(TFM_MUL7) && FP_SIZE >= 14
00241         if (y == 7) {
00242            fp_mul_comba7(A,B,C);
00243            goto clean;
00244         }
00245 #endif
00246 #if defined(TFM_MUL8) && FP_SIZE >= 16
00247         if (y == 8) {
00248            fp_mul_comba8(A,B,C);
00249            goto clean;
00250         }
00251 #endif
00252 #if defined(TFM_MUL9) && FP_SIZE >= 18
00253         if (y == 9) {
00254            fp_mul_comba9(A,B,C);
00255            goto clean;
00256         }
00257 #endif
00258 #if defined(TFM_MUL12) && FP_SIZE >= 24
00259         if (y <= 12) {
00260            fp_mul_comba12(A,B,C);
00261            goto clean;
00262         }
00263 #endif
00264 #if defined(TFM_MUL17) && FP_SIZE >= 34
00265         if (y <= 17) {
00266            fp_mul_comba17(A,B,C);
00267            goto clean;
00268         }
00269 #endif
00270 
00271 #if defined(TFM_SMALL_SET) && FP_SIZE >= 32
00272         if (y <= 16) {
00273            fp_mul_comba_small(A,B,C);
00274            goto clean;
00275         }
00276 #endif
00277 #if defined(TFM_MUL20) && FP_SIZE >= 40
00278         if (y <= 20) {
00279            fp_mul_comba20(A,B,C);
00280            goto clean;
00281         }
00282 #endif
00283 #if defined(TFM_MUL24) && FP_SIZE >= 48
00284         if (yy >= 16 && y <= 24) {
00285            fp_mul_comba24(A,B,C);
00286            goto clean;
00287         }
00288 #endif
00289 #if defined(TFM_MUL28) && FP_SIZE >= 56
00290         if (yy >= 20 && y <= 28) {
00291            fp_mul_comba28(A,B,C);
00292            goto clean;
00293         }
00294 #endif
00295 #if defined(TFM_MUL32) && FP_SIZE >= 64
00296         if (yy >= 24 && y <= 32) {
00297            fp_mul_comba32(A,B,C);
00298            goto clean;
00299         }
00300 #endif
00301 #if defined(TFM_MUL48) && FP_SIZE >= 96
00302         if (yy >= 40 && y <= 48) {
00303           fp_mul_comba48(A,B,C);
00304           goto clean;
00305         }
00306 #endif
00307 #if defined(TFM_MUL64) && FP_SIZE >= 128
00308         if (yy >= 56 && y <= 64) {
00309            fp_mul_comba64(A,B,C);
00310            goto clean;
00311         }
00312 #endif
00313         fp_mul_comba(A,B,C);
00314 
00315 clean:
00316     /* zero any excess digits on the destination that we didn't write to */
00317     for (y = C->used; y < oldused; y++) {
00318         C->dp[y] = 0;
00319     }
00320 }
00321 
00322 void fp_mul_2(fp_int * a, fp_int * b)
00323 {
00324   int     x, oldused;
00325 
00326   oldused = b->used;
00327   b->used = a->used;
00328 
00329   {
00330     fp_digit r, rr, *tmpa, *tmpb;
00331 
00332     /* alias for source */
00333     tmpa = a->dp;
00334 
00335     /* alias for dest */
00336     tmpb = b->dp;
00337 
00338     /* carry */
00339     r = 0;
00340     for (x = 0; x < a->used; x++) {
00341 
00342       /* get what will be the *next* carry bit from the
00343        * MSB of the current digit
00344        */
00345       rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
00346 
00347       /* now shift up this digit, add in the carry [from the previous] */
00348       *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
00349 
00350       /* copy the carry that would be from the source
00351        * digit into the next iteration
00352        */
00353       r = rr;
00354     }
00355 
00356     /* new leading digit? */
00357     if (r != 0 && b->used != (FP_SIZE-1)) {
00358       /* add a MSB which is always 1 at this point */
00359       *tmpb = 1;
00360       ++(b->used);
00361     }
00362 
00363     /* zero any excess digits on the destination that we didn't write to */
00364     tmpb = b->dp + b->used;
00365     for (x = b->used; x < oldused; x++) {
00366       *tmpb++ = 0;
00367     }
00368   }
00369   b->sign = a->sign;
00370 }
00371 
00372 /* c = a * b */
00373 void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
00374 {
00375    fp_word  w;
00376    int      x, oldused;
00377 
00378    oldused = c->used;
00379    c->used = a->used;
00380    c->sign = a->sign;
00381    w       = 0;
00382    for (x = 0; x < a->used; x++) {
00383        w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
00384        c->dp[x]  = (fp_digit)w;
00385        w         = w >> DIGIT_BIT;
00386    }
00387    if (w != 0 && (a->used != FP_SIZE)) {
00388       c->dp[c->used++] = (fp_digit) w;
00389       ++x;
00390    }
00391 
00392    /* zero any excess digits on the destination that we didn't write to */
00393    for (; x < oldused; x++) {
00394       c->dp[x] = 0;
00395    }
00396    fp_clamp(c);
00397 }
00398 
00399 /* c = a * 2**d */
00400 void fp_mul_2d(fp_int *a, int b, fp_int *c)
00401 {
00402    fp_digit carry, carrytmp, shift;
00403    int x;
00404 
00405    /* copy it */
00406    fp_copy(a, c);
00407 
00408    /* handle whole digits */
00409    if (b >= DIGIT_BIT) {
00410       fp_lshd(c, b/DIGIT_BIT);
00411    }
00412    b %= DIGIT_BIT;
00413 
00414    /* shift the digits */
00415    if (b != 0) {
00416       carry = 0;
00417       shift = DIGIT_BIT - b;
00418       for (x = 0; x < c->used; x++) {
00419           carrytmp = c->dp[x] >> shift;
00420           c->dp[x] = (c->dp[x] << b) + carry;
00421           carry = carrytmp;
00422       }
00423       /* store last carry if room */
00424       if (carry && x < FP_SIZE) {
00425          c->dp[c->used++] = carry;
00426       }
00427    }
00428    fp_clamp(c);
00429 }
00430 
00431 /* generic PxQ multiplier */
00432 #if defined(HAVE_INTEL_MULX)
00433 
00434 INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
00435 
00436 {
00437    int       ix, iy, iz, pa;
00438    fp_int    tmp, *dst;
00439 
00440    /* get size of output and trim */
00441    pa = A->used + B->used;
00442    if (pa >= FP_SIZE) {
00443       pa = FP_SIZE-1;
00444    }
00445 
00446    /* Always take branch to use tmp variable. This avoids a cache attack for
00447     * determining if C equals A */
00448    if (1) {
00449       fp_init(&tmp);
00450       dst = &tmp;
00451    }
00452 
00453    TFM_INTEL_MUL_COMBA(A, B, dst) ;
00454 
00455   dst->used = pa;
00456   dst->sign = A->sign ^ B->sign;
00457   fp_clamp(dst);
00458   fp_copy(dst, C);
00459 }
00460 #endif
00461 
00462 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
00463 {
00464    int       ix, iy, iz, tx, ty, pa;
00465    fp_digit  c0, c1, c2, *tmpx, *tmpy;
00466    fp_int    tmp, *dst;
00467 
00468    IF_HAVE_INTEL_MULX(fp_mul_comba_mulx(A, B, C), return) ;
00469 
00470    COMBA_START;
00471    COMBA_CLEAR;
00472 
00473    /* get size of output and trim */
00474    pa = A->used + B->used;
00475    if (pa >= FP_SIZE) {
00476       pa = FP_SIZE-1;
00477    }
00478 
00479    /* Always take branch to use tmp variable. This avoids a cache attack for
00480     * determining if C equals A */
00481    if (1) {
00482       fp_init(&tmp);
00483       dst = &tmp;
00484    }
00485 
00486    for (ix = 0; ix < pa; ix++) {
00487       /* get offsets into the two bignums */
00488       ty = MIN(ix, B->used-1);
00489       tx = ix - ty;
00490 
00491       /* setup temp aliases */
00492       tmpx = A->dp + tx;
00493       tmpy = B->dp + ty;
00494 
00495       /* this is the number of times the loop will iterate, essentially its
00496          while (tx++ < a->used && ty-- >= 0) { ... }
00497        */
00498       iy = MIN(A->used-tx, ty+1);
00499 
00500       /* execute loop */
00501       COMBA_FORWARD;
00502       for (iz = 0; iz < iy; ++iz) {
00503           fp_digit _tmpx = *tmpx++;
00504           fp_digit _tmpy = *tmpy--;
00505           MULADD(_tmpx, _tmpy);
00506       }
00507 
00508       /* store term */
00509       COMBA_STORE(dst->dp[ix]);
00510   }
00511   COMBA_FINI;
00512 
00513   dst->used = pa;
00514   dst->sign = A->sign ^ B->sign;
00515   fp_clamp(dst);
00516   fp_copy(dst, C);
00517 }
00518 
00519 /* a/b => cb + d == a */
00520 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00521 {
00522   fp_int  q, x, y, t1, t2;
00523   int     n, t, i, norm, neg;
00524 
00525   /* is divisor zero ? */
00526   if (fp_iszero (b) == FP_YES) {
00527     return FP_VAL;
00528   }
00529 
00530   /* if a < b then q=0, r = a */
00531   if (fp_cmp_mag (a, b) == FP_LT) {
00532     if (d != NULL) {
00533       fp_copy (a, d);
00534     }
00535     if (c != NULL) {
00536       fp_zero (c);
00537     }
00538     return FP_OKAY;
00539   }
00540 
00541   fp_init(&q);
00542   q.used = a->used + 2;
00543 
00544   fp_init(&t1);
00545   fp_init(&t2);
00546   fp_init_copy(&x, a);
00547   fp_init_copy(&y, b);
00548 
00549   /* fix the sign */
00550   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
00551   x.sign = y.sign = FP_ZPOS;
00552 
00553   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
00554   norm = fp_count_bits(&y) % DIGIT_BIT;
00555   if (norm < (int)(DIGIT_BIT-1)) {
00556      norm = (DIGIT_BIT-1) - norm;
00557      fp_mul_2d (&x, norm, &x);
00558      fp_mul_2d (&y, norm, &y);
00559   } else {
00560      norm = 0;
00561   }
00562 
00563   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
00564   n = x.used - 1;
00565   t = y.used - 1;
00566 
00567   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
00568   fp_lshd (&y, n - t); /* y = y*b**{n-t} */
00569 
00570   while (fp_cmp (&x, &y) != FP_LT) {
00571     ++(q.dp[n - t]);
00572     fp_sub (&x, &y, &x);
00573   }
00574 
00575   /* reset y by shifting it back down */
00576   fp_rshd (&y, n - t);
00577 
00578   /* step 3. for i from n down to (t + 1) */
00579   for (i = n; i >= (t + 1); i--) {
00580     if (i > x.used) {
00581       continue;
00582     }
00583 
00584     /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
00585      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
00586     if (x.dp[i] == y.dp[t]) {
00587       q.dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
00588     } else {
00589       fp_word tmp;
00590       tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
00591       tmp |= ((fp_word) x.dp[i - 1]);
00592       tmp /= ((fp_word)y.dp[t]);
00593       q.dp[i - t - 1] = (fp_digit) (tmp);
00594     }
00595 
00596     /* while (q{i-t-1} * (yt * b + y{t-1})) >
00597              xi * b**2 + xi-1 * b + xi-2
00598 
00599        do q{i-t-1} -= 1;
00600     */
00601     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
00602     do {
00603       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
00604 
00605       /* find left hand */
00606       fp_zero (&t1);
00607       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
00608       t1.dp[1] = y.dp[t];
00609       t1.used = 2;
00610       fp_mul_d (&t1, q.dp[i - t - 1], &t1);
00611 
00612       /* find right hand */
00613       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
00614       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
00615       t2.dp[2] = x.dp[i];
00616       t2.used = 3;
00617     } while (fp_cmp_mag(&t1, &t2) == FP_GT);
00618 
00619     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
00620     fp_mul_d (&y, q.dp[i - t - 1], &t1);
00621     fp_lshd  (&t1, i - t - 1);
00622     fp_sub   (&x, &t1, &x);
00623 
00624     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
00625     if (x.sign == FP_NEG) {
00626       fp_copy (&y, &t1);
00627       fp_lshd (&t1, i - t - 1);
00628       fp_add (&x, &t1, &x);
00629       q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
00630     }
00631   }
00632 
00633   /* now q is the quotient and x is the remainder
00634    * [which we have to normalize]
00635    */
00636 
00637   /* get sign before writing to c */
00638   x.sign = x.used == 0 ? FP_ZPOS : a->sign;
00639 
00640   if (c != NULL) {
00641     fp_clamp (&q);
00642     fp_copy (&q, c);
00643     c->sign = neg;
00644   }
00645 
00646   if (d != NULL) {
00647     fp_div_2d (&x, norm, &x, NULL);
00648 
00649     /* zero any excess digits on the destination that we didn't write to */
00650     for (i = b->used; i < x.used; i++) {
00651         x.dp[i] = 0;
00652     }
00653     fp_clamp(&x);
00654     fp_copy (&x, d);
00655   }
00656 
00657   return FP_OKAY;
00658 }
00659 
00660 /* b = a/2 */
00661 void fp_div_2(fp_int * a, fp_int * b)
00662 {
00663   int     x, oldused;
00664 
00665   oldused = b->used;
00666   b->used = a->used;
00667   {
00668     fp_digit r, rr, *tmpa, *tmpb;
00669 
00670     /* source alias */
00671     tmpa = a->dp + b->used - 1;
00672 
00673     /* dest alias */
00674     tmpb = b->dp + b->used - 1;
00675 
00676     /* carry */
00677     r = 0;
00678     for (x = b->used - 1; x >= 0; x--) {
00679       /* get the carry for the next iteration */
00680       rr = *tmpa & 1;
00681 
00682       /* shift the current digit, add in carry and store */
00683       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
00684 
00685       /* forward carry to next iteration */
00686       r = rr;
00687     }
00688 
00689     /* zero any excess digits on the destination that we didn't write to */
00690     tmpb = b->dp + b->used;
00691     for (x = b->used; x < oldused; x++) {
00692       *tmpb++ = 0;
00693     }
00694   }
00695   b->sign = a->sign;
00696   fp_clamp (b);
00697 }
00698 
00699 /* c = a / 2**b */
00700 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
00701 {
00702   int      D;
00703   fp_int   t;
00704 
00705   /* if the shift count is <= 0 then we do no work */
00706   if (b <= 0) {
00707     fp_copy (a, c);
00708     if (d != NULL) {
00709       fp_zero (d);
00710     }
00711     return;
00712   }
00713 
00714   fp_init(&t);
00715 
00716   /* get the remainder */
00717   if (d != NULL) {
00718     fp_mod_2d (a, b, &t);
00719   }
00720 
00721   /* copy */
00722   fp_copy(a, c);
00723 
00724   /* shift by as many digits in the bit count */
00725   if (b >= (int)DIGIT_BIT) {
00726     fp_rshd (c, b / DIGIT_BIT);
00727   }
00728 
00729   /* shift any bit count < DIGIT_BIT */
00730   D = (b % DIGIT_BIT);
00731   if (D != 0) {
00732     fp_rshb(c, D);
00733   }
00734   fp_clamp (c);
00735   if (d != NULL) {
00736     fp_copy (&t, d);
00737   }
00738 }
00739 
00740 /* c = a mod b, 0 <= c < b  */
00741 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
00742 {
00743    fp_int t;
00744    int    err;
00745 
00746    fp_init(&t);
00747    if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
00748       return err;
00749    }
00750    if (t.sign != b->sign) {
00751       fp_add(&t, b, c);
00752    } else {
00753       fp_copy(&t, c);
00754   }
00755   return FP_OKAY;
00756 }
00757 
00758 /* c = a mod 2**d */
00759 void fp_mod_2d(fp_int *a, int b, fp_int *c)
00760 {
00761    int x;
00762 
00763    /* zero if count less than or equal to zero */
00764    if (b <= 0) {
00765       fp_zero(c);
00766       return;
00767    }
00768 
00769    /* get copy of input */
00770    fp_copy(a, c);
00771 
00772    /* if 2**d is larger than we just return */
00773    if (b >= (DIGIT_BIT * a->used)) {
00774       return;
00775    }
00776 
00777   /* zero digits above the last digit of the modulus */
00778   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00779     c->dp[x] = 0;
00780   }
00781   /* clear the digit that is not completely outside/inside the modulus */
00782   c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
00783   fp_clamp (c);
00784 }
00785 
00786 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
00787 {
00788   fp_int  x, y, u, v, A, B, C, D;
00789   int     res;
00790 
00791   /* b cannot be negative */
00792   if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
00793     return FP_VAL;
00794   }
00795 
00796   /* init temps */
00797   fp_init(&x);    fp_init(&y);
00798   fp_init(&u);    fp_init(&v);
00799   fp_init(&A);    fp_init(&B);
00800   fp_init(&C);    fp_init(&D);
00801 
00802   /* x = a, y = b */
00803   if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
00804       return res;
00805   }
00806   fp_copy(b, &y);
00807 
00808   /* 2. [modified] if x,y are both even then return an error! */
00809   if (fp_iseven (&x) == FP_YES && fp_iseven (&y) == FP_YES) {
00810     return FP_VAL;
00811   }
00812 
00813   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00814   fp_copy (&x, &u);
00815   fp_copy (&y, &v);
00816   fp_set (&A, 1);
00817   fp_set (&D, 1);
00818 
00819 top:
00820   /* 4.  while u is even do */
00821   while (fp_iseven (&u) == FP_YES) {
00822     /* 4.1 u = u/2 */
00823     fp_div_2 (&u, &u);
00824 
00825     /* 4.2 if A or B is odd then */
00826     if (fp_isodd (&A) == FP_YES || fp_isodd (&B) == FP_YES) {
00827       /* A = (A+y)/2, B = (B-x)/2 */
00828       fp_add (&A, &y, &A);
00829       fp_sub (&B, &x, &B);
00830     }
00831     /* A = A/2, B = B/2 */
00832     fp_div_2 (&A, &A);
00833     fp_div_2 (&B, &B);
00834   }
00835 
00836   /* 5.  while v is even do */
00837   while (fp_iseven (&v) == FP_YES) {
00838     /* 5.1 v = v/2 */
00839     fp_div_2 (&v, &v);
00840 
00841     /* 5.2 if C or D is odd then */
00842     if (fp_isodd (&C) == FP_YES || fp_isodd (&D) == FP_YES) {
00843       /* C = (C+y)/2, D = (D-x)/2 */
00844       fp_add (&C, &y, &C);
00845       fp_sub (&D, &x, &D);
00846     }
00847     /* C = C/2, D = D/2 */
00848     fp_div_2 (&C, &C);
00849     fp_div_2 (&D, &D);
00850   }
00851 
00852   /* 6.  if u >= v then */
00853   if (fp_cmp (&u, &v) != FP_LT) {
00854     /* u = u - v, A = A - C, B = B - D */
00855     fp_sub (&u, &v, &u);
00856     fp_sub (&A, &C, &A);
00857     fp_sub (&B, &D, &B);
00858   } else {
00859     /* v - v - u, C = C - A, D = D - B */
00860     fp_sub (&v, &u, &v);
00861     fp_sub (&C, &A, &C);
00862     fp_sub (&D, &B, &D);
00863   }
00864 
00865   /* if not zero goto step 4 */
00866   if (fp_iszero (&u) == FP_NO)
00867     goto top;
00868 
00869   /* now a = C, b = D, gcd == g*v */
00870 
00871   /* if v != 1 then there is no inverse */
00872   if (fp_cmp_d (&v, 1) != FP_EQ) {
00873     return FP_VAL;
00874   }
00875 
00876   /* if its too low */
00877   while (fp_cmp_d(&C, 0) == FP_LT) {
00878       fp_add(&C, b, &C);
00879   }
00880 
00881   /* too big */
00882   while (fp_cmp_mag(&C, b) != FP_LT) {
00883       fp_sub(&C, b, &C);
00884   }
00885 
00886   /* C is now the inverse */
00887   fp_copy(&C, c);
00888   return FP_OKAY;
00889 }
00890 
00891 /* c = 1/a (mod b) for odd b only */
00892 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
00893 {
00894   fp_int  x, y, u, v, B, D;
00895   int     neg;
00896 
00897   /* 2. [modified] b must be odd   */
00898   if (fp_iseven (b) == FP_YES) {
00899     return fp_invmod_slow(a,b,c);
00900   }
00901 
00902   /* init all our temps */
00903   fp_init(&x);  fp_init(&y);
00904   fp_init(&u);  fp_init(&v);
00905   fp_init(&B);  fp_init(&D);
00906 
00907   /* x == modulus, y == value to invert */
00908   fp_copy(b, &x);
00909 
00910   /* we need y = |a| */
00911   fp_abs(a, &y);
00912 
00913   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00914   fp_copy(&x, &u);
00915   fp_copy(&y, &v);
00916   fp_set (&D, 1);
00917 
00918 top:
00919   /* 4.  while u is even do */
00920   while (fp_iseven (&u) == FP_YES) {
00921     /* 4.1 u = u/2 */
00922     fp_div_2 (&u, &u);
00923 
00924     /* 4.2 if B is odd then */
00925     if (fp_isodd (&B) == FP_YES) {
00926       fp_sub (&B, &x, &B);
00927     }
00928     /* B = B/2 */
00929     fp_div_2 (&B, &B);
00930   }
00931 
00932   /* 5.  while v is even do */
00933   while (fp_iseven (&v) == FP_YES) {
00934     /* 5.1 v = v/2 */
00935     fp_div_2 (&v, &v);
00936 
00937     /* 5.2 if D is odd then */
00938     if (fp_isodd (&D) == FP_YES) {
00939       /* D = (D-x)/2 */
00940       fp_sub (&D, &x, &D);
00941     }
00942     /* D = D/2 */
00943     fp_div_2 (&D, &D);
00944   }
00945 
00946   /* 6.  if u >= v then */
00947   if (fp_cmp (&u, &v) != FP_LT) {
00948     /* u = u - v, B = B - D */
00949     fp_sub (&u, &v, &u);
00950     fp_sub (&B, &D, &B);
00951   } else {
00952     /* v - v - u, D = D - B */
00953     fp_sub (&v, &u, &v);
00954     fp_sub (&D, &B, &D);
00955   }
00956 
00957   /* if not zero goto step 4 */
00958   if (fp_iszero (&u) == FP_NO) {
00959     goto top;
00960   }
00961 
00962   /* now a = C, b = D, gcd == g*v */
00963 
00964   /* if v != 1 then there is no inverse */
00965   if (fp_cmp_d (&v, 1) != FP_EQ) {
00966     return FP_VAL;
00967   }
00968 
00969   /* b is now the inverse */
00970   neg = a->sign;
00971   while (D.sign == FP_NEG) {
00972     fp_add (&D, b, &D);
00973   }
00974   /* too big */
00975   while (fp_cmp_mag(&D, b) != FP_LT) {
00976     fp_sub(&D, b, &D);
00977   }
00978   fp_copy (&D, c);
00979   c->sign = neg;
00980   return FP_OKAY;
00981 }
00982 
00983 /* d = a * b (mod c) */
00984 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00985 {
00986   int err;
00987   fp_int t;
00988 
00989   fp_init(&t);
00990   fp_mul(a, b, &t);
00991 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
00992   if (d->size < FP_SIZE) {
00993     err = fp_mod(&t, c, &t);
00994     fp_copy(&t, d);
00995   } else
00996 #endif
00997   {
00998     err = fp_mod(&t, c, d);
00999   }
01000 
01001   return err;
01002 }
01003 
01004 /* d = a - b (mod c) */
01005 int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
01006 {
01007   int err;
01008   fp_int t;
01009 
01010   fp_init(&t);
01011   fp_sub(a, b, &t);
01012 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
01013   if (d->size < FP_SIZE) {
01014     err = fp_mod(&t, c, &t);
01015     fp_copy(&t, d);
01016   } else
01017 #endif
01018   {
01019     err = fp_mod(&t, c, d);
01020   }
01021 
01022   return err;
01023 }
01024 
01025 /* d = a + b (mod c) */
01026 int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
01027 {
01028   int err;
01029   fp_int t;
01030 
01031   fp_init(&t);
01032   fp_add(a, b, &t);
01033 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
01034   if (d->size < FP_SIZE) {
01035     err = fp_mod(&t, c, &t);
01036     fp_copy(&t, d);
01037   } else
01038 #endif
01039   {
01040     err = fp_mod(&t, c, d);
01041   }
01042 
01043   return err;
01044 }
01045 
01046 #ifdef TFM_TIMING_RESISTANT
01047 
01048 #ifndef WC_NO_CACHE_RESISTANT
01049 /* all off / all on pointer addresses for constant calculations */
01050 /* ecc.c uses same table */
01051 const wolfssl_word wc_off_on_addr[2] =
01052 {
01053 #if defined(WC_64BIT_CPU)
01054     W64LIT(0x0000000000000000),
01055     W64LIT(0xffffffffffffffff)
01056 #elif defined(WC_16BIT_CPU)
01057     0x0000U,
01058     0xffffU
01059 #else
01060     /* 32 bit */
01061     0x00000000U,
01062     0xffffffffU
01063 #endif
01064 };
01065 
01066 #endif /* WC_NO_CACHE_RESISTANT */
01067 
01068 /* timing resistant montgomery ladder based exptmod
01069    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
01070    Cryptographic Hardware and Embedded Systems, CHES 2002
01071 */
01072 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01073 {
01074 #ifdef WC_NO_CACHE_RESISTANT
01075   fp_int   R[2];
01076 #else
01077   fp_int   R[3];   /* need a temp for cache resistance */
01078 #endif
01079   fp_digit buf, mp;
01080   int      err, bitcnt, digidx, y;
01081 
01082   /* now setup montgomery  */
01083   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01084      return err;
01085   }
01086 
01087   fp_init(&R[0]);
01088   fp_init(&R[1]);
01089 #ifndef WC_NO_CACHE_RESISTANT
01090   fp_init(&R[2]);
01091 #endif
01092 
01093   /* now we need R mod m */
01094   fp_montgomery_calc_normalization (&R[0], P);
01095 
01096   /* now set R[0][1] to G * R mod m */
01097   if (fp_cmp_mag(P, G) != FP_GT) {
01098      /* G > P so we reduce it first */
01099      fp_mod(G, P, &R[1]);
01100   } else {
01101      fp_copy(G, &R[1]);
01102   }
01103   fp_mulmod (&R[1], &R[0], P, &R[1]);
01104 
01105   /* for j = t-1 downto 0 do
01106         r_!k = R0*R1; r_k = r_k^2
01107   */
01108 
01109   /* set initial mode and bit cnt */
01110   bitcnt = 1;
01111   buf    = 0;
01112   digidx = X->used - 1;
01113 
01114   for (;;) {
01115     /* grab next digit as required */
01116     if (--bitcnt == 0) {
01117       /* if digidx == -1 we are out of digits so break */
01118       if (digidx == -1) {
01119         break;
01120       }
01121       /* read next digit and reset bitcnt */
01122       buf    = X->dp[digidx--];
01123       bitcnt = (int)DIGIT_BIT;
01124     }
01125 
01126     /* grab the next msb from the exponent */
01127     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01128     buf <<= (fp_digit)1;
01129 
01130     /* do ops */
01131     fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
01132 
01133 #ifdef WC_NO_CACHE_RESISTANT
01134     fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
01135 #else
01136     /* instead of using R[y] for sqr, which leaks key bit to cache monitor,
01137      * use R[2] as temp, make sure address calc is constant, keep
01138      * &R[0] and &R[1] in cache */
01139     fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
01140                         ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ),
01141             &R[2]);
01142     fp_sqr(&R[2], &R[2]);          fp_montgomery_reduce(&R[2], P, mp);
01143     fp_copy(&R[2],
01144             (fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
01145                         ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ) );
01146 #endif /* WC_NO_CACHE_RESISTANT */
01147   }
01148 
01149    fp_montgomery_reduce(&R[0], P, mp);
01150    fp_copy(&R[0], Y);
01151    return FP_OKAY;
01152 }
01153 
01154 #else /* TFM_TIMING_RESISTANT */
01155 
01156 /* y = g**x (mod b)
01157  * Some restrictions... x must be positive and < b
01158  */
01159 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01160 {
01161   fp_int   res;
01162   fp_digit buf, mp;
01163   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01164 #ifdef WOLFSSL_SMALL_STACK
01165   fp_int  *M;
01166 #else
01167   fp_int   M[64];
01168 #endif
01169 
01170   /* find window size */
01171   x = fp_count_bits (X);
01172   if (x <= 21) {
01173     winsize = 1;
01174   } else if (x <= 36) {
01175     winsize = 3;
01176   } else if (x <= 140) {
01177     winsize = 4;
01178   } else if (x <= 450) {
01179     winsize = 5;
01180   } else {
01181     winsize = 6;
01182   }
01183 
01184   /* now setup montgomery  */
01185   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01186      return err;
01187   }
01188 
01189 #ifdef WOLFSSL_SMALL_STACK
01190   /* only allocate space for what's needed */
01191   M = (fp_int*)XMALLOC(sizeof(fp_int)*(1 << winsize), NULL, DYNAMIC_TYPE_TMP_BUFFER);
01192   if (M == NULL) {
01193      return FP_MEM;
01194   }
01195 #endif
01196 
01197   /* init M array */
01198   for(x = 0; x < (1 << winsize); x++)
01199     fp_init(&M[x]);
01200 
01201   /* setup result */
01202   fp_init(&res);
01203 
01204   /* create M table
01205    *
01206    * The M table contains powers of the input base, e.g. M[x] = G^x mod P
01207    *
01208    * The first half of the table is not computed though except for M[0] and M[1]
01209    */
01210 
01211    /* now we need R mod m */
01212    fp_montgomery_calc_normalization (&res, P);
01213 
01214    /* now set M[1] to G * R mod m */
01215    if (fp_cmp_mag(P, G) != FP_GT) {
01216       /* G > P so we reduce it first */
01217       fp_mod(G, P, &M[1]);
01218    } else {
01219       fp_copy(G, &M[1]);
01220    }
01221    fp_mulmod (&M[1], &res, P, &M[1]);
01222 
01223   /* compute the value at M[1<<(winsize-1)] by
01224    * squaring M[1] (winsize-1) times */
01225   fp_copy (&M[1], &M[1 << (winsize - 1)]);
01226   for (x = 0; x < (winsize - 1); x++) {
01227     fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
01228     fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
01229   }
01230 
01231   /* create upper table */
01232   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01233     fp_mul(&M[x - 1], &M[1], &M[x]);
01234     fp_montgomery_reduce(&M[x], P, mp);
01235   }
01236 
01237   /* set initial mode and bit cnt */
01238   mode   = 0;
01239   bitcnt = 1;
01240   buf    = 0;
01241   digidx = X->used - 1;
01242   bitcpy = 0;
01243   bitbuf = 0;
01244 
01245   for (;;) {
01246     /* grab next digit as required */
01247     if (--bitcnt == 0) {
01248       /* if digidx == -1 we are out of digits so break */
01249       if (digidx == -1) {
01250         break;
01251       }
01252       /* read next digit and reset bitcnt */
01253       buf    = X->dp[digidx--];
01254       bitcnt = (int)DIGIT_BIT;
01255     }
01256 
01257     /* grab the next msb from the exponent */
01258     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01259     buf <<= (fp_digit)1;
01260 
01261     /* if the bit is zero and mode == 0 then we ignore it
01262      * These represent the leading zero bits before the first 1 bit
01263      * in the exponent.  Technically this opt is not required but it
01264      * does lower the # of trivial squaring/reductions used
01265      */
01266     if (mode == 0 && y == 0) {
01267       continue;
01268     }
01269 
01270     /* if the bit is zero and mode == 1 then we square */
01271     if (mode == 1 && y == 0) {
01272       fp_sqr(&res, &res);
01273       fp_montgomery_reduce(&res, P, mp);
01274       continue;
01275     }
01276 
01277     /* else we add it to the window */
01278     bitbuf |= (y << (winsize - ++bitcpy));
01279     mode    = 2;
01280 
01281     if (bitcpy == winsize) {
01282       /* ok window is filled so square as required and multiply  */
01283       /* square first */
01284       for (x = 0; x < winsize; x++) {
01285         fp_sqr(&res, &res);
01286         fp_montgomery_reduce(&res, P, mp);
01287       }
01288 
01289       /* then multiply */
01290       fp_mul(&res, &M[bitbuf], &res);
01291       fp_montgomery_reduce(&res, P, mp);
01292 
01293       /* empty window and reset */
01294       bitcpy = 0;
01295       bitbuf = 0;
01296       mode   = 1;
01297     }
01298   }
01299 
01300   /* if bits remain then square/multiply */
01301   if (mode == 2 && bitcpy > 0) {
01302     /* square then multiply if the bit is set */
01303     for (x = 0; x < bitcpy; x++) {
01304       fp_sqr(&res, &res);
01305       fp_montgomery_reduce(&res, P, mp);
01306 
01307       /* get next bit of the window */
01308       bitbuf <<= 1;
01309       if ((bitbuf & (1 << winsize)) != 0) {
01310         /* then multiply */
01311         fp_mul(&res, &M[1], &res);
01312         fp_montgomery_reduce(&res, P, mp);
01313       }
01314     }
01315   }
01316 
01317   /* fixup result if Montgomery reduction is used
01318    * recall that any value in a Montgomery system is
01319    * actually multiplied by R mod n.  So we have
01320    * to reduce one more time to cancel out the factor
01321    * of R.
01322    */
01323   fp_montgomery_reduce(&res, P, mp);
01324 
01325   /* swap res with Y */
01326   fp_copy (&res, Y);
01327 
01328 #ifdef WOLFSSL_SMALL_STACK
01329   XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01330 #endif
01331 
01332   return FP_OKAY;
01333 }
01334 
01335 #endif /* TFM_TIMING_RESISTANT */
01336 
01337 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01338 {
01339    /* prevent overflows */
01340    if (P->used > (FP_SIZE/2)) {
01341       return FP_VAL;
01342    }
01343 
01344    if (X->sign == FP_NEG) {
01345 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
01346       int    err;
01347       fp_int tmp;
01348 
01349       /* yes, copy G and invmod it */
01350       fp_init_copy(&tmp, G);
01351       if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
01352          return err;
01353       }
01354       X->sign = FP_ZPOS;
01355       err =  _fp_exptmod(&tmp, X, P, Y);
01356       if (X != Y) {
01357          X->sign = FP_NEG;
01358       }
01359       return err;
01360 #else
01361       return FP_VAL;
01362 #endif
01363    }
01364    else {
01365       /* Positive exponent so just exptmod */
01366       return _fp_exptmod(G, X, P, Y);
01367    }
01368 }
01369 
01370 /* computes a = 2**b */
01371 void fp_2expt(fp_int *a, int b)
01372 {
01373    int     z;
01374 
01375    /* zero a as per default */
01376    fp_zero (a);
01377 
01378    if (b < 0) {
01379       return;
01380    }
01381 
01382    z = b / DIGIT_BIT;
01383    if (z >= FP_SIZE) {
01384       return;
01385    }
01386 
01387   /* set the used count of where the bit will go */
01388   a->used = z + 1;
01389 
01390   /* put the single bit in its place */
01391   a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
01392 }
01393 
01394 /* b = a*a  */
01395 void fp_sqr(fp_int *A, fp_int *B)
01396 {
01397     int y, oldused;
01398 
01399     oldused = B->used;
01400     y = A->used;
01401 
01402     /* call generic if we're out of range */
01403     if (y + y > FP_SIZE) {
01404        fp_sqr_comba(A, B);
01405        goto clean;
01406     }
01407 
01408 #if defined(TFM_SQR3) && FP_SIZE >= 6
01409         if (y <= 3) {
01410            fp_sqr_comba3(A,B);
01411            goto clean;
01412         }
01413 #endif
01414 #if defined(TFM_SQR4) && FP_SIZE >= 8
01415         if (y == 4) {
01416            fp_sqr_comba4(A,B);
01417            goto clean;
01418         }
01419 #endif
01420 #if defined(TFM_SQR6) && FP_SIZE >= 12
01421         if (y <= 6) {
01422            fp_sqr_comba6(A,B);
01423            goto clean;
01424         }
01425 #endif
01426 #if defined(TFM_SQR7) && FP_SIZE >= 14
01427         if (y == 7) {
01428            fp_sqr_comba7(A,B);
01429            goto clean;
01430         }
01431 #endif
01432 #if defined(TFM_SQR8) && FP_SIZE >= 16
01433         if (y == 8) {
01434            fp_sqr_comba8(A,B);
01435            goto clean;
01436         }
01437 #endif
01438 #if defined(TFM_SQR9) && FP_SIZE >= 18
01439         if (y == 9) {
01440            fp_sqr_comba9(A,B);
01441            goto clean;
01442         }
01443 #endif
01444 #if defined(TFM_SQR12) && FP_SIZE >= 24
01445         if (y <= 12) {
01446            fp_sqr_comba12(A,B);
01447            goto clean;
01448         }
01449 #endif
01450 #if defined(TFM_SQR17) && FP_SIZE >= 34
01451         if (y <= 17) {
01452            fp_sqr_comba17(A,B);
01453            goto clean;
01454         }
01455 #endif
01456 #if defined(TFM_SMALL_SET)
01457         if (y <= 16) {
01458            fp_sqr_comba_small(A,B);
01459            goto clean;
01460         }
01461 #endif
01462 #if defined(TFM_SQR20) && FP_SIZE >= 40
01463         if (y <= 20) {
01464            fp_sqr_comba20(A,B);
01465            goto clean;
01466         }
01467 #endif
01468 #if defined(TFM_SQR24) && FP_SIZE >= 48
01469         if (y <= 24) {
01470            fp_sqr_comba24(A,B);
01471            goto clean;
01472         }
01473 #endif
01474 #if defined(TFM_SQR28) && FP_SIZE >= 56
01475         if (y <= 28) {
01476            fp_sqr_comba28(A,B);
01477            goto clean;
01478         }
01479 #endif
01480 #if defined(TFM_SQR32) && FP_SIZE >= 64
01481         if (y <= 32) {
01482            fp_sqr_comba32(A,B);
01483            goto clean;
01484         }
01485 #endif
01486 #if defined(TFM_SQR48) && FP_SIZE >= 96
01487         if (y <= 48) {
01488            fp_sqr_comba48(A,B);
01489            goto clean;
01490         }
01491 #endif
01492 #if defined(TFM_SQR64) && FP_SIZE >= 128
01493         if (y <= 64) {
01494            fp_sqr_comba64(A,B);
01495            goto clean;
01496         }
01497 #endif
01498        fp_sqr_comba(A, B);
01499 
01500 clean:
01501   /* zero any excess digits on the destination that we didn't write to */
01502   for (y = B->used; y < oldused; y++) {
01503     B->dp[y] = 0;
01504   }
01505 }
01506 
01507 /* generic comba squarer */
01508 void fp_sqr_comba(fp_int *A, fp_int *B)
01509 {
01510   int       pa, ix, iz;
01511   fp_digit  c0, c1, c2;
01512   fp_int    tmp, *dst;
01513 #ifdef TFM_ISO
01514   fp_word   tt;
01515 #endif
01516 
01517   /* get size of output and trim */
01518   pa = A->used + A->used;
01519   if (pa >= FP_SIZE) {
01520      pa = FP_SIZE-1;
01521   }
01522 
01523   /* number of output digits to produce */
01524   COMBA_START;
01525   COMBA_CLEAR;
01526 
01527   if (A == B) {
01528      fp_init(&tmp);
01529      dst = &tmp;
01530   } else {
01531      fp_zero(B);
01532      dst = B;
01533   }
01534 
01535   for (ix = 0; ix < pa; ix++) {
01536       int      tx, ty, iy;
01537       fp_digit *tmpy, *tmpx;
01538 
01539       /* get offsets into the two bignums */
01540       ty = MIN(A->used-1, ix);
01541       tx = ix - ty;
01542 
01543       /* setup temp aliases */
01544       tmpx = A->dp + tx;
01545       tmpy = A->dp + ty;
01546 
01547       /* this is the number of times the loop will iterate,
01548          while (tx++ < a->used && ty-- >= 0) { ... }
01549        */
01550       iy = MIN(A->used-tx, ty+1);
01551 
01552       /* now for squaring tx can never equal ty
01553        * we halve the distance since they approach
01554        * at a rate of 2x and we have to round because
01555        * odd cases need to be executed
01556        */
01557       iy = MIN(iy, (ty-tx+1)>>1);
01558 
01559       /* forward carries */
01560       COMBA_FORWARD;
01561 
01562       /* execute loop */
01563       for (iz = 0; iz < iy; iz++) {
01564           SQRADD2(*tmpx++, *tmpy--);
01565       }
01566 
01567       /* even columns have the square term in them */
01568       if ((ix&1) == 0) {
01569           /* TAO change COMBA_ADD back to SQRADD */
01570           SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
01571       }
01572 
01573       /* store it */
01574       COMBA_STORE(dst->dp[ix]);
01575   }
01576 
01577   COMBA_FINI;
01578 
01579   /* setup dest */
01580   dst->used = pa;
01581   fp_clamp (dst);
01582   if (dst != B) {
01583      fp_copy(dst, B);
01584   }
01585 }
01586 
01587 int fp_cmp(fp_int *a, fp_int *b)
01588 {
01589    if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
01590       return FP_LT;
01591    } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
01592       return FP_GT;
01593    } else {
01594       /* compare digits */
01595       if (a->sign == FP_NEG) {
01596          /* if negative compare opposite direction */
01597          return fp_cmp_mag(b, a);
01598       } else {
01599          return fp_cmp_mag(a, b);
01600       }
01601    }
01602 }
01603 
01604 /* compare against a single digit */
01605 int fp_cmp_d(fp_int *a, fp_digit b)
01606 {
01607   /* special case for zero*/
01608   if (a->used == 0 && b == 0)
01609     return FP_EQ;
01610 
01611   /* compare based on sign */
01612   if ((b && a->used == 0) || a->sign == FP_NEG) {
01613     return FP_LT;
01614   }
01615 
01616   /* compare based on magnitude */
01617   if (a->used > 1) {
01618     return FP_GT;
01619   }
01620 
01621   /* compare the only digit of a to b */
01622   if (a->dp[0] > b) {
01623     return FP_GT;
01624   } else if (a->dp[0] < b) {
01625     return FP_LT;
01626   } else {
01627     return FP_EQ;
01628   }
01629 
01630 }
01631 
01632 int fp_cmp_mag(fp_int *a, fp_int *b)
01633 {
01634    int x;
01635 
01636    if (a->used > b->used) {
01637       return FP_GT;
01638    } else if (a->used < b->used) {
01639       return FP_LT;
01640    } else {
01641       for (x = a->used - 1; x >= 0; x--) {
01642           if (a->dp[x] > b->dp[x]) {
01643              return FP_GT;
01644           } else if (a->dp[x] < b->dp[x]) {
01645              return FP_LT;
01646           }
01647       }
01648    }
01649    return FP_EQ;
01650 }
01651 
01652 /* sets up the montgomery reduction */
01653 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
01654 {
01655   fp_digit x, b;
01656 
01657 /* fast inversion mod 2**k
01658  *
01659  * Based on the fact that
01660  *
01661  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01662  *                    =>  2*X*A - X*X*A*A = 1
01663  *                    =>  2*(1) - (1)     = 1
01664  */
01665   b = a->dp[0];
01666 
01667   if ((b & 1) == 0) {
01668     return FP_VAL;
01669   }
01670 
01671   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01672   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01673   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01674   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01675 #ifdef FP_64BIT
01676   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
01677 #endif
01678 
01679   /* rho = -1/m mod b */
01680   *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
01681 
01682   return FP_OKAY;
01683 }
01684 
01685 /* computes a = B**n mod b without division or multiplication useful for
01686  * normalizing numbers in a Montgomery system.
01687  */
01688 void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
01689 {
01690   int     x, bits;
01691 
01692   /* how many bits of last digit does b use */
01693   bits = fp_count_bits (b) % DIGIT_BIT;
01694   if (!bits) bits = DIGIT_BIT;
01695 
01696   /* compute A = B^(n-1) * 2^(bits-1) */
01697   if (b->used > 1) {
01698      fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
01699   } else {
01700      fp_set(a, 1);
01701      bits = 1;
01702   }
01703 
01704   /* now compute C = A * B mod b */
01705   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
01706     fp_mul_2 (a, a);
01707     if (fp_cmp_mag (a, b) != FP_LT) {
01708       s_fp_sub (a, b, a);
01709     }
01710   }
01711 }
01712 
01713 
01714 #ifdef TFM_SMALL_MONT_SET
01715     #include "fp_mont_small.i"
01716 #endif
01717 
01718 #ifdef HAVE_INTEL_MULX
01719 static INLINE void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
01720 {
01721     fp_digit _c0, _c1, _c2, _c3, _c4, _c5, _c6, _c7, cy ;
01722 
01723     cy = *cy_mulx ;
01724     _c0=c_mulx[0]; _c1=c_mulx[1]; _c2=c_mulx[2]; _c3=c_mulx[3]; _c4=c_mulx[4]; _c5=c_mulx[5]; _c6=c_mulx[6]; _c7=c_mulx[7];
01725     INNERMUL8_MULX ;
01726     c_mulx[0]=_c0; c_mulx[1]=_c1; c_mulx[2]=_c2; c_mulx[3]=_c3; c_mulx[4]=_c4; c_mulx[5]=_c5; c_mulx[6]=_c6; c_mulx[7]=_c7;
01727     *cy_mulx = cy ;
01728 }
01729 
01730 /* computes x/R == x (mod N) via Montgomery Reduction */
01731 static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
01732 {
01733    fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
01734    int      oldused, x, y, pa;
01735 
01736    /* bail if too large */
01737    if (m->used > (FP_SIZE/2)) {
01738       (void)mu;                     /* shut up compiler */
01739       return;
01740    }
01741 
01742 #ifdef TFM_SMALL_MONT_SET
01743    if (m->used <= 16) {
01744       fp_montgomery_reduce_small(a, m, mp);
01745       return;
01746    }
01747 #endif
01748 
01749 
01750    /* now zero the buff */
01751    XMEMSET(c, 0, sizeof(c));
01752    pa = m->used;
01753 
01754    /* copy the input */
01755    oldused = a->used;
01756    for (x = 0; x < oldused; x++) {
01757        c[x] = a->dp[x];
01758    }
01759    MONT_START;
01760 
01761    for (x = 0; x < pa; x++) {
01762        fp_digit cy = 0;
01763        /* get Mu for this round */
01764        LOOP_START;
01765        _c   = c + x;
01766        tmpm = m->dp;
01767        y = 0;
01768         for (; y < (pa & ~7); y += 8) {
01769               innermul8_mulx(_c, &cy, tmpm, mu) ;
01770               _c   += 8;
01771               tmpm += 8;
01772            }
01773        for (; y < pa; y++) {
01774           INNERMUL;
01775           ++_c;
01776        }
01777        LOOP_END;
01778        while (cy) {
01779            PROPCARRY;
01780            ++_c;
01781        }
01782   }
01783 
01784   /* now copy out */
01785   _c   = c + pa;
01786   tmpm = a->dp;
01787   for (x = 0; x < pa+1; x++) {
01788      *tmpm++ = *_c++;
01789   }
01790 
01791   /* zero any excess digits on the destination that we didn't write to */
01792   for (; x < oldused; x++) {
01793      *tmpm++ = 0;
01794   }
01795 
01796   MONT_FINI;
01797 
01798   a->used = pa+1;
01799   fp_clamp(a);
01800 
01801   /* if A >= m then A = A - m */
01802   if (fp_cmp_mag (a, m) != FP_LT) {
01803     s_fp_sub (a, m, a);
01804   }
01805 }
01806 #endif
01807 
01808 /* computes x/R == x (mod N) via Montgomery Reduction */
01809 void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
01810 {
01811    fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
01812    int      oldused, x, y, pa;
01813 
01814    IF_HAVE_INTEL_MULX(fp_montgomery_reduce_mulx(a, m, mp), return) ;
01815 
01816    /* bail if too large */
01817    if (m->used > (FP_SIZE/2)) {
01818       (void)mu;                     /* shut up compiler */
01819       return;
01820    }
01821 
01822 #ifdef TFM_SMALL_MONT_SET
01823    if (m->used <= 16) {
01824       fp_montgomery_reduce_small(a, m, mp);
01825       return;
01826    }
01827 #endif
01828 
01829 
01830    /* now zero the buff */
01831    XMEMSET(c, 0, sizeof(c));
01832    pa = m->used;
01833 
01834    /* copy the input */
01835    oldused = a->used;
01836    for (x = 0; x < oldused; x++) {
01837        c[x] = a->dp[x];
01838    }
01839    MONT_START;
01840 
01841    for (x = 0; x < pa; x++) {
01842        fp_digit cy = 0;
01843        /* get Mu for this round */
01844        LOOP_START;
01845        _c   = c + x;
01846        tmpm = m->dp;
01847        y = 0;
01848 #if defined(INNERMUL8)
01849         for (; y < (pa & ~7); y += 8) {
01850               INNERMUL8 ;
01851               _c   += 8;
01852               tmpm += 8;
01853            }
01854 #endif
01855        for (; y < pa; y++) {
01856           INNERMUL;
01857           ++_c;
01858        }
01859        LOOP_END;
01860        while (cy) {
01861            PROPCARRY;
01862            ++_c;
01863        }
01864   }
01865 
01866   /* now copy out */
01867   _c   = c + pa;
01868   tmpm = a->dp;
01869   for (x = 0; x < pa+1; x++) {
01870      *tmpm++ = *_c++;
01871   }
01872 
01873   /* zero any excess digits on the destination that we didn't write to */
01874   for (; x < oldused; x++) {
01875      *tmpm++ = 0;
01876   }
01877 
01878   MONT_FINI;
01879 
01880   a->used = pa+1;
01881   fp_clamp(a);
01882 
01883   /* if A >= m then A = A - m */
01884   if (fp_cmp_mag (a, m) != FP_LT) {
01885     s_fp_sub (a, m, a);
01886   }
01887 }
01888 
01889 void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
01890 {
01891   /* zero the int */
01892   fp_zero (a);
01893 
01894   /* If we know the endianness of this architecture, and we're using
01895      32-bit fp_digits, we can optimize this */
01896 #if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && \
01897     defined(FP_32BIT)
01898   /* But not for both simultaneously */
01899 #if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
01900 #error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
01901 #endif
01902   {
01903      unsigned char *pd = (unsigned char *)a->dp;
01904 
01905      if ((unsigned)c > (FP_SIZE * sizeof(fp_digit))) {
01906         int excess = c - (FP_SIZE * sizeof(fp_digit));
01907         c -= excess;
01908         b += excess;
01909      }
01910      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
01911      /* read the bytes in */
01912 #ifdef BIG_ENDIAN_ORDER
01913      {
01914        /* Use Duff's device to unroll the loop. */
01915        int idx = (c - 1) & ~3;
01916        switch (c % 4) {
01917        case 0:    do { pd[idx+0] = *b++;
01918        case 3:         pd[idx+1] = *b++;
01919        case 2:         pd[idx+2] = *b++;
01920        case 1:         pd[idx+3] = *b++;
01921                      idx -= 4;
01922                  } while ((c -= 4) > 0);
01923        }
01924      }
01925 #else
01926      for (c -= 1; c >= 0; c -= 1) {
01927        pd[c] = *b++;
01928      }
01929 #endif
01930   }
01931 #else
01932   /* read the bytes in */
01933   for (; c > 0; c--) {
01934      fp_mul_2d (a, 8, a);
01935      a->dp[0] |= *b++;
01936      a->used += 1;
01937   }
01938 #endif
01939   fp_clamp (a);
01940 }
01941 
01942 int fp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
01943 {
01944    while (fp_iszero (t) == FP_NO) {
01945       b[x++] = (unsigned char) (t->dp[0] & 255);
01946       fp_div_2d (t, 8, t, NULL);
01947   }
01948   return x;
01949 }
01950 
01951 void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
01952 {
01953   int     x;
01954   fp_int  t;
01955 
01956   fp_init_copy(&t, a);
01957 
01958   x = fp_to_unsigned_bin_at_pos(0, &t, b);
01959   fp_reverse (b, x);
01960 }
01961 
01962 int fp_unsigned_bin_size(fp_int *a)
01963 {
01964   int     size = fp_count_bits (a);
01965   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
01966 }
01967 
01968 void fp_set(fp_int *a, fp_digit b)
01969 {
01970    fp_zero(a);
01971    a->dp[0] = b;
01972    a->used  = a->dp[0] ? 1 : 0;
01973 }
01974 
01975 
01976 #ifndef MP_SET_CHUNK_BITS
01977     #define MP_SET_CHUNK_BITS 4
01978 #endif
01979 void fp_set_int(fp_int *a, unsigned long b)
01980 {
01981   int x;
01982 
01983   /* use direct fp_set if b is less than fp_digit max */
01984   if (b < FP_DIGIT_MAX) {
01985     fp_set (a, b);
01986     return;
01987   }
01988 
01989   fp_zero (a);
01990 
01991   /* set chunk bits at a time */
01992   for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
01993     fp_mul_2d (a, MP_SET_CHUNK_BITS, a);
01994 
01995     /* OR in the top bits of the source */
01996     a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
01997                                   ((1 << MP_SET_CHUNK_BITS) - 1);
01998 
01999     /* shift the source up to the next chunk bits */
02000     b <<= MP_SET_CHUNK_BITS;
02001 
02002     /* ensure that digits are not clamped off */
02003     a->used += 1;
02004   }
02005 
02006   /* clamp digits */
02007   fp_clamp(a);
02008 }
02009 
02010 /* check if a bit is set */
02011 int fp_is_bit_set (fp_int *a, fp_digit b)
02012 {
02013     fp_digit i;
02014 
02015     if (b > FP_MAX_BITS)
02016         return 0;
02017     else
02018         i = b/DIGIT_BIT;
02019 
02020     if ((fp_digit)a->used < i)
02021         return 0;
02022 
02023     return (int)((a->dp[i] >> b%DIGIT_BIT) & (fp_digit)1);
02024 }
02025 
02026 /* set the b bit of a */
02027 int fp_set_bit (fp_int * a, fp_digit b)
02028 {
02029     fp_digit i;
02030 
02031     if (b > FP_MAX_BITS)
02032         return 0;
02033     else
02034         i = b/DIGIT_BIT;
02035 
02036     /* set the used count of where the bit will go if required */
02037     if (a->used < (int)(i+1))
02038         a->used = (int)(i+1);
02039 
02040     /* put the single bit in its place */
02041     a->dp[i] |= ((fp_digit)1) << (b % DIGIT_BIT);
02042 
02043     return MP_OKAY;
02044 }
02045 
02046 int fp_count_bits (fp_int * a)
02047 {
02048   int     r;
02049   fp_digit q;
02050 
02051   /* shortcut */
02052   if (a->used == 0) {
02053     return 0;
02054   }
02055 
02056   /* get number of digits and add that */
02057   r = (a->used - 1) * DIGIT_BIT;
02058 
02059   /* take the last digit and count the bits in it */
02060   q = a->dp[a->used - 1];
02061   while (q > ((fp_digit) 0)) {
02062     ++r;
02063     q >>= ((fp_digit) 1);
02064   }
02065 
02066   return r;
02067 }
02068 
02069 int fp_leading_bit(fp_int *a)
02070 {
02071     int bit = 0;
02072 
02073     if (a->used != 0) {
02074         fp_digit q = a->dp[a->used - 1];
02075         int qSz = sizeof(fp_digit);
02076 
02077         while (qSz > 0) {
02078             if ((unsigned char)q != 0)
02079                 bit = (q & 0x80) != 0;
02080             q >>= 8;
02081             qSz--;
02082         }
02083     }
02084 
02085     return bit;
02086 }
02087 
02088 void fp_lshd(fp_int *a, int x)
02089 {
02090     int y;
02091 
02092     /* move up and truncate as required */
02093     y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
02094 
02095     /* store new size */
02096     a->used = y + 1;
02097 
02098     /* move digits */
02099     for (; y >= x; y--) {
02100         a->dp[y] = a->dp[y-x];
02101     }
02102 
02103     /* zero lower digits */
02104     for (; y >= 0; y--) {
02105         a->dp[y] = 0;
02106     }
02107 
02108     /* clamp digits */
02109     fp_clamp(a);
02110 }
02111 
02112 
02113 /* right shift by bit count */
02114 void fp_rshb(fp_int *c, int x)
02115 {
02116     fp_digit *tmpc, mask, shift;
02117     fp_digit r, rr;
02118     fp_digit D = x;
02119 
02120     /* mask */
02121     mask = (((fp_digit)1) << D) - 1;
02122 
02123     /* shift for lsb */
02124     shift = DIGIT_BIT - D;
02125 
02126     /* alias */
02127     tmpc = c->dp + (c->used - 1);
02128 
02129     /* carry */
02130     r = 0;
02131     for (x = c->used - 1; x >= 0; x--) {
02132       /* get the lower  bits of this word in a temp */
02133       rr = *tmpc & mask;
02134 
02135       /* shift the current word and mix in the carry bits from previous word */
02136       *tmpc = (*tmpc >> D) | (r << shift);
02137       --tmpc;
02138 
02139       /* set the carry to the carry bits of the current word found above */
02140       r = rr;
02141     }
02142 
02143     /* clamp digits */
02144     fp_clamp(c);
02145 }
02146 
02147 
02148 void fp_rshd(fp_int *a, int x)
02149 {
02150   int y;
02151 
02152   /* too many digits just zero and return */
02153   if (x >= a->used) {
02154      fp_zero(a);
02155      return;
02156   }
02157 
02158    /* shift */
02159    for (y = 0; y < a->used - x; y++) {
02160       a->dp[y] = a->dp[y+x];
02161    }
02162 
02163    /* zero rest */
02164    for (; y < a->used; y++) {
02165       a->dp[y] = 0;
02166    }
02167 
02168    /* decrement count */
02169    a->used -= x;
02170    fp_clamp(a);
02171 }
02172 
02173 /* reverse an array, used for radix code */
02174 void fp_reverse (unsigned char *s, int len)
02175 {
02176   int     ix, iy;
02177   unsigned char t;
02178 
02179   ix = 0;
02180   iy = len - 1;
02181   while (ix < iy) {
02182     t     = s[ix];
02183     s[ix] = s[iy];
02184     s[iy] = t;
02185     ++ix;
02186     --iy;
02187   }
02188 }
02189 
02190 
02191 /* c = a - b */
02192 void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
02193 {
02194    fp_int tmp;
02195    fp_init(&tmp);
02196    fp_set(&tmp, b);
02197 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02198    if (c->size < FP_SIZE) {
02199      fp_sub(a, &tmp, &tmp);
02200      fp_copy(&tmp, c);
02201    } else
02202 #endif
02203    {
02204      fp_sub(a, &tmp, c);
02205    }
02206 }
02207 
02208 
02209 /* wolfSSL callers from normal lib */
02210 
02211 /* init a new mp_int */
02212 int mp_init (mp_int * a)
02213 {
02214   if (a)
02215     fp_init(a);
02216   return MP_OKAY;
02217 }
02218 
02219 void fp_init(fp_int *a)
02220 {
02221 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02222     a->size = FP_SIZE;
02223 #endif
02224 #ifdef HAVE_WOLF_BIGINT
02225     wc_bigint_init(&a->raw);
02226 #endif
02227     fp_zero(a);
02228 }
02229 
02230 void fp_zero(fp_int *a)
02231 {
02232     int size = FP_SIZE;
02233     a->used = 0;
02234     a->sign = FP_ZPOS;
02235 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02236     size = a->size;
02237 #endif
02238     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
02239 }
02240 
02241 void fp_clear(fp_int *a)
02242 {
02243     int size = FP_SIZE;
02244     a->used = 0;
02245     a->sign = FP_ZPOS;
02246 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02247     size = a->size;
02248 #endif
02249     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
02250     fp_free(a);
02251 }
02252 
02253 void fp_forcezero (mp_int * a)
02254 {
02255     int size = FP_SIZE;
02256     a->used = 0;
02257     a->sign = FP_ZPOS;
02258 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02259     size = a->size;
02260 #endif
02261     ForceZero(a->dp, size * sizeof(fp_digit));
02262 #ifdef HAVE_WOLF_BIGINT
02263     wc_bigint_zero(&a->raw);
02264 #endif
02265     fp_free(a);
02266 }
02267 
02268 void mp_forcezero (mp_int * a)
02269 {
02270     fp_forcezero(a);
02271 }
02272 
02273 void fp_free(fp_int* a)
02274 {
02275 #ifdef HAVE_WOLF_BIGINT
02276     wc_bigint_free(&a->raw);
02277 #else
02278     (void)a;
02279 #endif
02280 }
02281 
02282 
02283 /* clear one (frees)  */
02284 void mp_clear (mp_int * a)
02285 {
02286     if (a == NULL)
02287         return;
02288     fp_clear(a);
02289 }
02290 
02291 void mp_free(mp_int* a)
02292 {
02293     fp_free(a);
02294 }
02295 
02296 /* handle up to 6 inits */
02297 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d,
02298                   mp_int* e, mp_int* f)
02299 {
02300     if (a)
02301         fp_init(a);
02302     if (b)
02303         fp_init(b);
02304     if (c)
02305         fp_init(c);
02306     if (d)
02307         fp_init(d);
02308     if (e)
02309         fp_init(e);
02310     if (f)
02311         fp_init(f);
02312 
02313     return MP_OKAY;
02314 }
02315 
02316 /* high level addition (handles signs) */
02317 int mp_add (mp_int * a, mp_int * b, mp_int * c)
02318 {
02319   fp_add(a, b, c);
02320   return MP_OKAY;
02321 }
02322 
02323 /* high level subtraction (handles signs) */
02324 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
02325 {
02326   fp_sub(a, b, c);
02327   return MP_OKAY;
02328 }
02329 
02330 /* high level multiplication (handles sign) */
02331 #if defined(FREESCALE_LTC_TFM)
02332 int wolfcrypt_mp_mul(mp_int * a, mp_int * b, mp_int * c)
02333 #else
02334 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02335 #endif
02336 {
02337   fp_mul(a, b, c);
02338   return MP_OKAY;
02339 }
02340 
02341 int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02342 {
02343   fp_mul_d(a, b, c);
02344   return MP_OKAY;
02345 }
02346 
02347 /* d = a * b (mod c) */
02348 #if defined(FREESCALE_LTC_TFM)
02349 int wolfcrypt_mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02350 #else
02351 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02352 #endif
02353 {
02354   return fp_mulmod(a, b, c, d);
02355 }
02356 
02357 /* d = a - b (mod c) */
02358 int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
02359 {
02360   return fp_submod(a, b, c, d);
02361 }
02362 
02363 /* d = a + b (mod c) */
02364 int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
02365 {
02366   return fp_addmod(a, b, c, d);
02367 }
02368 
02369 /* c = a mod b, 0 <= c < b */
02370 #if defined(FREESCALE_LTC_TFM)
02371 int wolfcrypt_mp_mod (mp_int * a, mp_int * b, mp_int * c)
02372 #else
02373 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
02374 #endif
02375 {
02376   return fp_mod (a, b, c);
02377 }
02378 
02379 /* hac 14.61, pp608 */
02380 #if defined(FREESCALE_LTC_TFM)
02381 int wolfcrypt_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
02382 #else
02383 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
02384 #endif
02385 {
02386   return fp_invmod(a, b, c);
02387 }
02388 
02389 /* this is a shell function that calls either the normal or Montgomery
02390  * exptmod functions.  Originally the call to the montgomery code was
02391  * embedded in the normal function but that wasted alot of stack space
02392  * for nothing (since 99% of the time the Montgomery code would be called)
02393  */
02394 #if defined(FREESCALE_LTC_TFM)
02395 int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
02396 #else
02397 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
02398 #endif
02399 {
02400   return fp_exptmod(G, X, P, Y);
02401 }
02402 
02403 /* compare two ints (signed)*/
02404 int mp_cmp (mp_int * a, mp_int * b)
02405 {
02406   return fp_cmp(a, b);
02407 }
02408 
02409 /* compare a digit */
02410 int mp_cmp_d(mp_int * a, mp_digit b)
02411 {
02412   return fp_cmp_d(a, b);
02413 }
02414 
02415 /* get the size for an unsigned equivalent */
02416 int mp_unsigned_bin_size (mp_int * a)
02417 {
02418   return fp_unsigned_bin_size(a);
02419 }
02420 
02421 int mp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
02422 {
02423     return fp_to_unsigned_bin_at_pos(x, t, b);
02424 }
02425 
02426 /* store in unsigned [big endian] format */
02427 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
02428 {
02429   fp_to_unsigned_bin(a,b);
02430   return MP_OKAY;
02431 }
02432 
02433 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
02434 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
02435 {
02436   fp_read_unsigned_bin(a, b, c);
02437   return MP_OKAY;
02438 }
02439 
02440 
02441 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
02442 {
02443     fp_sub_d(a, b, c);
02444     return MP_OKAY;
02445 }
02446 
02447 int mp_mul_2d(fp_int *a, int b, fp_int *c)
02448 {
02449     fp_mul_2d(a, b, c);
02450     return MP_OKAY;
02451 }
02452 
02453 int mp_div(fp_int * a, fp_int * b, fp_int * c, fp_int * d)
02454 {
02455     return fp_div(a, b, c, d);
02456 }
02457 
02458 int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d)
02459 {
02460     fp_div_2d(a, b, c, d);
02461     return MP_OKAY;
02462 }
02463 
02464 void fp_copy(fp_int *a, fp_int *b)
02465 {
02466     /* if source and destination are different */
02467     if (a != b) {
02468 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02469         /* verify a will fit in b */
02470         if (b->size >= a->used) {
02471             int x, oldused;
02472             oldused = b->used;
02473             b->used = a->used;
02474             b->sign = a->sign;
02475 
02476             XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit));
02477 
02478             /* zero any excess digits on the destination that we didn't write to */
02479             for (x = b->used; x < oldused; x++) {
02480                 b->dp[x] = 0;
02481             }
02482         }
02483         else {
02484             /* TODO: Handle error case */
02485         }
02486 #else
02487         /* all dp's are same size, so do straight copy */
02488         b->used = a->used;
02489         b->sign = a->sign;
02490         XMEMCPY(b->dp, a->dp, FP_SIZE * sizeof(fp_digit));
02491 #endif
02492     }
02493 }
02494 
02495 void fp_init_copy(fp_int *a, fp_int* b)
02496 {
02497     if (a != b) {
02498         fp_init(a);
02499         fp_copy(b, a);
02500     }
02501 }
02502 
02503 /* fast math wrappers */
02504 int mp_copy(fp_int* a, fp_int* b)
02505 {
02506     fp_copy(a, b);
02507     return MP_OKAY;
02508 }
02509 
02510 int mp_isodd(mp_int* a)
02511 {
02512     return fp_isodd(a);
02513 }
02514 
02515 int mp_iszero(mp_int* a)
02516 {
02517     return fp_iszero(a);
02518 }
02519 
02520 int mp_count_bits (mp_int* a)
02521 {
02522     return fp_count_bits(a);
02523 }
02524 
02525 int mp_leading_bit (mp_int* a)
02526 {
02527     return fp_leading_bit(a);
02528 }
02529 
02530 void mp_rshb (mp_int* a, int x)
02531 {
02532     fp_rshb(a, x);
02533 }
02534 
02535 void mp_rshd (mp_int* a, int x)
02536 {
02537     fp_rshd(a, x);
02538 }
02539 
02540 int mp_set_int(mp_int *a, unsigned long b)
02541 {
02542     fp_set_int(a, b);
02543     return MP_OKAY;
02544 }
02545 
02546 int mp_is_bit_set (mp_int *a, mp_digit b)
02547 {
02548     return fp_is_bit_set(a, b);
02549 }
02550 
02551 int mp_set_bit(mp_int *a, mp_digit b)
02552 {
02553     return fp_set_bit(a, b);
02554 }
02555 
02556 #if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC)
02557 
02558 /* c = a * a (mod b) */
02559 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
02560 {
02561   int err;
02562   fp_int t;
02563 
02564   fp_init(&t);
02565   fp_sqr(a, &t);
02566 
02567 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02568   if (c->size < FP_SIZE) {
02569     err = fp_mod(&t, b, &t);
02570     fp_copy(&t, c);
02571   }
02572   else
02573 #endif
02574   {
02575     err = fp_mod(&t, b, c);
02576   }
02577 
02578   return err;
02579 }
02580 
02581 /* fast math conversion */
02582 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
02583 {
02584     return fp_sqrmod(a, b, c);
02585 }
02586 
02587 /* fast math conversion */
02588 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
02589 {
02590     fp_montgomery_calc_normalization(a, b);
02591     return MP_OKAY;
02592 }
02593 
02594 #endif /* WOLFSSL_KEYGEN || HAVE_ECC */
02595 
02596 
02597 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
02598     defined(WOLFSSL_DEBUG_MATH)
02599 
02600 #ifdef WOLFSSL_KEY_GEN
02601 /* swap the elements of two integers, for cases where you can't simply swap the
02602  * mp_int pointers around
02603  */
02604 static void fp_exch (fp_int * a, fp_int * b)
02605 {
02606     fp_int  t;
02607 
02608     t  = *a;
02609     *a = *b;
02610     *b = t;
02611 }
02612 #endif
02613 
02614 static const int lnz[16] = {
02615    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
02616 };
02617 
02618 /* Counts the number of lsbs which are zero before the first zero bit */
02619 int fp_cnt_lsb(fp_int *a)
02620 {
02621    int x;
02622    fp_digit q, qq;
02623 
02624    /* easy out */
02625    if (fp_iszero(a) == FP_YES) {
02626       return 0;
02627    }
02628 
02629    /* scan lower digits until non-zero */
02630    for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
02631    q = a->dp[x];
02632    x *= DIGIT_BIT;
02633 
02634    /* now scan this digit until a 1 is found */
02635    if ((q & 1) == 0) {
02636       do {
02637          qq  = q & 15;
02638          x  += lnz[qq];
02639          q >>= 4;
02640       } while (qq == 0);
02641    }
02642    return x;
02643 }
02644 
02645 
02646 static int s_is_power_of_two(fp_digit b, int *p)
02647 {
02648    int x;
02649 
02650    /* fast return if no power of two */
02651    if ((b==0) || (b & (b-1))) {
02652       return FP_NO;
02653    }
02654 
02655    for (x = 0; x < DIGIT_BIT; x++) {
02656       if (b == (((fp_digit)1)<<x)) {
02657          *p = x;
02658          return FP_YES;
02659       }
02660    }
02661    return FP_NO;
02662 }
02663 
02664 /* a/b => cb + d == a */
02665 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
02666 {
02667   fp_int   q;
02668   fp_word  w;
02669   fp_digit t;
02670   int      ix;
02671 
02672   /* cannot divide by zero */
02673   if (b == 0) {
02674      return FP_VAL;
02675   }
02676 
02677   /* quick outs */
02678   if (b == 1 || fp_iszero(a) == FP_YES) {
02679      if (d != NULL) {
02680         *d = 0;
02681      }
02682      if (c != NULL) {
02683         fp_copy(a, c);
02684      }
02685      return FP_OKAY;
02686   }
02687 
02688   /* power of two ? */
02689   if (s_is_power_of_two(b, &ix) == FP_YES) {
02690      if (d != NULL) {
02691         *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
02692      }
02693      if (c != NULL) {
02694         fp_div_2d(a, ix, c, NULL);
02695      }
02696      return FP_OKAY;
02697   }
02698 
02699   if (c != NULL) {
02700     /* no easy answer [c'est la vie].  Just division */
02701     fp_init(&q);
02702 
02703     q.used = a->used;
02704     q.sign = a->sign;
02705   }
02706 
02707   w = 0;
02708   for (ix = a->used - 1; ix >= 0; ix--) {
02709      w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
02710 
02711      if (w >= b) {
02712         t = (fp_digit)(w / b);
02713         w -= ((fp_word)t) * ((fp_word)b);
02714       } else {
02715         t = 0;
02716       }
02717       if (c != NULL)
02718         q.dp[ix] = (fp_digit)t;
02719   }
02720 
02721   if (d != NULL) {
02722      *d = (fp_digit)w;
02723   }
02724 
02725   if (c != NULL) {
02726      fp_clamp(&q);
02727      fp_copy(&q, c);
02728   }
02729 
02730   return FP_OKAY;
02731 }
02732 
02733 
02734 /* c = a mod b, 0 <= c < b  */
02735 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02736 {
02737    return fp_div_d(a, b, NULL, c);
02738 }
02739 
02740 int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02741 {
02742    return fp_mod_d(a, b, c);
02743 }
02744 
02745 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
02746 
02747 #ifdef WOLFSSL_KEY_GEN
02748 
02749 static void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
02750 static void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
02751 static int  fp_isprime_ex(fp_int *a, int t);
02752 static int  fp_isprime(fp_int *a);
02753 static int  fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
02754 
02755 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
02756 {
02757     fp_gcd(a, b, c);
02758     return MP_OKAY;
02759 }
02760 
02761 
02762 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
02763 {
02764     fp_lcm(a, b, c);
02765     return MP_OKAY;
02766 }
02767 
02768 
02769 int mp_prime_is_prime(mp_int* a, int t, int* result)
02770 {
02771     (void)t;
02772     *result = fp_isprime(a);
02773     return MP_OKAY;
02774 }
02775 
02776 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
02777 {
02778     int err;
02779 
02780     err = fp_randprime(N, len, rng, heap);
02781     switch(err) {
02782         case FP_VAL:
02783             return MP_VAL;
02784         case FP_MEM:
02785             return MP_MEM;
02786         default:
02787             break;
02788     }
02789 
02790     return MP_OKAY;
02791 }
02792 
02793 int mp_exch (mp_int * a, mp_int * b)
02794 {
02795     fp_exch(a, b);
02796     return MP_OKAY;
02797 }
02798 
02799 /* Miller-Rabin test of "a" to the base of "b" as described in
02800  * HAC pp. 139 Algorithm 4.24
02801  *
02802  * Sets result to 0 if definitely composite or 1 if probably prime.
02803  * Randomly the chance of error is no more than 1/4 and often
02804  * very much lower.
02805  */
02806 static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
02807 {
02808   fp_int  n1, y, r;
02809   int     s, j;
02810 
02811   /* default */
02812   *result = FP_NO;
02813 
02814   /* ensure b > 1 */
02815   if (fp_cmp_d(b, 1) != FP_GT) {
02816      return;
02817   }
02818 
02819   /* get n1 = a - 1 */
02820   fp_init_copy(&n1, a);
02821   fp_sub_d(&n1, 1, &n1);
02822 
02823   /* set 2**s * r = n1 */
02824   fp_init_copy(&r, &n1);
02825 
02826   /* count the number of least significant bits
02827    * which are zero
02828    */
02829   s = fp_cnt_lsb(&r);
02830 
02831   /* now divide n - 1 by 2**s */
02832   fp_div_2d (&r, s, &r, NULL);
02833 
02834   /* compute y = b**r mod a */
02835   fp_init(&y);
02836   fp_exptmod(b, &r, a, &y);
02837 
02838   /* if y != 1 and y != n1 do */
02839   if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
02840     j = 1;
02841     /* while j <= s-1 and y != n1 */
02842     while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
02843       fp_sqrmod (&y, a, &y);
02844 
02845       /* if y == 1 then composite */
02846       if (fp_cmp_d (&y, 1) == FP_EQ) {
02847          return;
02848       }
02849       ++j;
02850     }
02851 
02852     /* if y != n1 then composite */
02853     if (fp_cmp (&y, &n1) != FP_EQ) {
02854        return;
02855     }
02856   }
02857 
02858   /* probably prime now */
02859   *result = FP_YES;
02860 }
02861 
02862 
02863 /* a few primes */
02864 static const fp_digit primes[FP_PRIME_SIZE] = {
02865   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
02866   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
02867   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
02868   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
02869   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
02870   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
02871   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
02872   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
02873 
02874   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
02875   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
02876   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
02877   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
02878   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
02879   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
02880   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
02881   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
02882 
02883   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
02884   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
02885   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
02886   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
02887   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
02888   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
02889   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
02890   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
02891 
02892   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
02893   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
02894   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
02895   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
02896   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
02897   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
02898   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
02899   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
02900 };
02901 
02902 int fp_isprime_ex(fp_int *a, int t)
02903 {
02904    fp_int   b;
02905    fp_digit d;
02906    int      r, res;
02907 
02908    if (t <= 0 || t > FP_PRIME_SIZE) {
02909      return FP_NO;
02910    }
02911 
02912    /* do trial division */
02913    for (r = 0; r < FP_PRIME_SIZE; r++) {
02914        fp_mod_d(a, primes[r], &d);
02915        if (d == 0) {
02916           return FP_NO;
02917        }
02918    }
02919 
02920    /* now do 't' miller rabins */
02921    fp_init(&b);
02922    for (r = 0; r < t; r++) {
02923        fp_set(&b, primes[r]);
02924        fp_prime_miller_rabin(a, &b, &res);
02925        if (res == FP_NO) {
02926           return FP_NO;
02927        }
02928    }
02929    return FP_YES;
02930 }
02931 
02932 int fp_isprime(fp_int *a)
02933 {
02934   return fp_isprime_ex(a, 8);
02935 }
02936 
02937 int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap)
02938 {
02939     static const int USE_BBS = 1;
02940     int   err, type;
02941     byte* buf;
02942 
02943     /* get type */
02944     if (len < 0) {
02945         type = USE_BBS;
02946         len = -len;
02947     } else {
02948         type = 0;
02949     }
02950 
02951     /* allow sizes between 2 and 512 bytes for a prime size */
02952     if (len < 2 || len > 512) {
02953         return FP_VAL;
02954     }
02955 
02956     /* allocate buffer to work with */
02957     buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_TMP_BUFFER);
02958     if (buf == NULL) {
02959         return FP_MEM;
02960     }
02961     XMEMSET(buf, 0, len);
02962 
02963     do {
02964 #ifdef SHOW_GEN
02965         printf(".");
02966         fflush(stdout);
02967 #endif
02968         /* generate value */
02969         err = wc_RNG_GenerateBlock(rng, buf, len);
02970         if (err != 0) {
02971             XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
02972             return FP_VAL;
02973         }
02974 
02975         /* munge bits */
02976         buf[0]     |= 0x80 | 0x40;
02977         buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
02978 
02979         /* load value */
02980         fp_read_unsigned_bin(N, buf, len);
02981 
02982         /* test */
02983     } while (fp_isprime(N) == FP_NO);
02984 
02985     XMEMSET(buf, 0, len);
02986     XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
02987 
02988     return FP_OKAY;
02989 }
02990 
02991 /* c = [a, b] */
02992 void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
02993 {
02994    fp_int  t1, t2;
02995 
02996    fp_init(&t1);
02997    fp_init(&t2);
02998    fp_gcd(a, b, &t1);
02999    if (fp_cmp_mag(a, b) == FP_GT) {
03000       fp_div(a, &t1, &t2, NULL);
03001       fp_mul(b, &t2, c);
03002    } else {
03003       fp_div(b, &t1, &t2, NULL);
03004       fp_mul(a, &t2, c);
03005    }
03006 }
03007 
03008 
03009 
03010 /* c = (a, b) */
03011 void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
03012 {
03013    fp_int u, v, r;
03014 
03015    /* either zero than gcd is the largest */
03016    if (fp_iszero (a) == FP_YES && fp_iszero (b) == FP_NO) {
03017      fp_abs (b, c);
03018      return;
03019    }
03020    if (fp_iszero (a) == FP_NO && fp_iszero (b) == FP_YES) {
03021      fp_abs (a, c);
03022      return;
03023    }
03024 
03025    /* optimized.  At this point if a == 0 then
03026     * b must equal zero too
03027     */
03028    if (fp_iszero (a) == FP_YES) {
03029      fp_zero(c);
03030      return;
03031    }
03032 
03033    /* sort inputs */
03034    if (fp_cmp_mag(a, b) != FP_LT) {
03035       fp_init_copy(&u, a);
03036       fp_init_copy(&v, b);
03037    } else {
03038       fp_init_copy(&u, b);
03039       fp_init_copy(&v, a);
03040    }
03041 
03042    fp_init(&r);
03043    while (fp_iszero(&v) == FP_NO) {
03044       fp_mod(&u, &v, &r);
03045       fp_copy(&v, &u);
03046       fp_copy(&r, &v);
03047    }
03048    fp_copy(&u, c);
03049 }
03050 
03051 #endif /* WOLFSSL_KEY_GEN */
03052 
03053 
03054 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA) || \
03055     defined(WC_RSA_BLINDING)
03056 /* c = a + b */
03057 void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
03058 {
03059    fp_int tmp;
03060    fp_init(&tmp);
03061    fp_set(&tmp, b);
03062    fp_add(a, &tmp, c);
03063 }
03064 
03065 /* external compatibility */
03066 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
03067 {
03068     fp_add_d(a, b, c);
03069     return MP_OKAY;
03070 }
03071 
03072 #endif  /* HAVE_ECC || !NO_PWDBASED */
03073 
03074 
03075 #if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
03076 
03077 /* chars used in radix conversions */
03078 static const char *fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
03079                                 abcdefghijklmnopqrstuvwxyz+/";
03080 #endif
03081 
03082 #ifdef HAVE_ECC
03083 static int fp_read_radix(fp_int *a, const char *str, int radix)
03084 {
03085   int     y, neg;
03086   char    ch;
03087 
03088   /* set the integer to the default of zero */
03089   fp_zero (a);
03090 
03091   /* make sure the radix is ok */
03092   if (radix < 2 || radix > 64) {
03093     return FP_VAL;
03094   }
03095 
03096   /* if the leading digit is a
03097    * minus set the sign to negative.
03098    */
03099   if (*str == '-') {
03100     ++str;
03101     neg = FP_NEG;
03102   } else {
03103     neg = FP_ZPOS;
03104   }
03105 
03106   /* process each digit of the string */
03107   while (*str) {
03108     /* if the radix <= 36 the conversion is case insensitive
03109      * this allows numbers like 1AB and 1ab to represent the same  value
03110      * [e.g. in hex]
03111      */
03112     ch = (char)((radix <= 36) ? XTOUPPER((unsigned char)*str) : *str);
03113     for (y = 0; y < 64; y++) {
03114       if (ch == fp_s_rmap[y]) {
03115          break;
03116       }
03117     }
03118 
03119     /* if the char was found in the map
03120      * and is less than the given radix add it
03121      * to the number, otherwise exit the loop.
03122      */
03123     if (y < radix) {
03124       fp_mul_d (a, (fp_digit) radix, a);
03125       fp_add_d (a, (fp_digit) y, a);
03126     } else {
03127       break;
03128     }
03129     ++str;
03130   }
03131 
03132   /* set the sign only if a != 0 */
03133   if (fp_iszero(a) != FP_YES) {
03134      a->sign = neg;
03135   }
03136   return FP_OKAY;
03137 }
03138 
03139 /* fast math conversion */
03140 int mp_read_radix(mp_int *a, const char *str, int radix)
03141 {
03142     return fp_read_radix(a, str, radix);
03143 }
03144 
03145 /* fast math conversion */
03146 int mp_sqr(fp_int *A, fp_int *B)
03147 {
03148     fp_sqr(A, B);
03149     return MP_OKAY;
03150 }
03151 
03152 /* fast math conversion */
03153 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
03154 {
03155     fp_montgomery_reduce(a, m, mp);
03156     return MP_OKAY;
03157 }
03158 
03159 
03160 /* fast math conversion */
03161 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
03162 {
03163     return fp_montgomery_setup(a, rho);
03164 }
03165 
03166 int mp_div_2(fp_int * a, fp_int * b)
03167 {
03168     fp_div_2(a, b);
03169     return MP_OKAY;
03170 }
03171 
03172 
03173 int mp_init_copy(fp_int * a, fp_int * b)
03174 {
03175     fp_init_copy(a, b);
03176     return MP_OKAY;
03177 }
03178 
03179 #ifdef HAVE_COMP_KEY
03180 
03181 int mp_cnt_lsb(fp_int* a)
03182 {
03183     return fp_cnt_lsb(a);
03184 }
03185 
03186 #endif /* HAVE_COMP_KEY */
03187 
03188 #endif /* HAVE_ECC */
03189 
03190 #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA)
03191 /* fast math conversion */
03192 int mp_set(fp_int *a, fp_digit b)
03193 {
03194     fp_set(a,b);
03195     return MP_OKAY;
03196 }
03197 #endif
03198 
03199 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
03200     defined(WOLFSSL_DEBUG_MATH)
03201 
03202 /* returns size of ASCII representation */
03203 int mp_radix_size (mp_int *a, int radix, int *size)
03204 {
03205     int     res, digs;
03206     fp_int  t;
03207     fp_digit d;
03208 
03209     *size = 0;
03210 
03211     /* special case for binary */
03212     if (radix == 2) {
03213         *size = fp_count_bits (a) + (a->sign == FP_NEG ? 1 : 0) + 1;
03214         return FP_YES;
03215     }
03216 
03217     /* make sure the radix is in range */
03218     if (radix < 2 || radix > 64) {
03219         return FP_VAL;
03220     }
03221 
03222     if (fp_iszero(a) == MP_YES) {
03223         *size = 2;
03224         return FP_OKAY;
03225     }
03226 
03227     /* digs is the digit count */
03228     digs = 0;
03229 
03230     /* if it's negative add one for the sign */
03231     if (a->sign == FP_NEG) {
03232         ++digs;
03233     }
03234 
03235     /* init a copy of the input */
03236     fp_init_copy (&t, a);
03237 
03238     /* force temp to positive */
03239     t.sign = FP_ZPOS;
03240 
03241     /* fetch out all of the digits */
03242     while (fp_iszero (&t) == FP_NO) {
03243         if ((res = fp_div_d (&t, (mp_digit) radix, &t, &d)) != FP_OKAY) {
03244             fp_zero (&t);
03245             return res;
03246         }
03247         ++digs;
03248     }
03249     fp_zero (&t);
03250 
03251     /* return digs + 1, the 1 is for the NULL byte that would be required. */
03252     *size = digs + 1;
03253     return FP_OKAY;
03254 }
03255 
03256 /* stores a bignum as a ASCII string in a given radix (2..64) */
03257 int mp_toradix (mp_int *a, char *str, int radix)
03258 {
03259     int     res, digs;
03260     fp_int  t;
03261     fp_digit d;
03262     char   *_s = str;
03263 
03264     /* check range of the radix */
03265     if (radix < 2 || radix > 64) {
03266         return FP_VAL;
03267     }
03268 
03269     /* quick out if its zero */
03270     if (fp_iszero(a) == FP_YES) {
03271         *str++ = '0';
03272         *str = '\0';
03273         return FP_YES;
03274     }
03275 
03276     /* init a copy of the input */
03277     fp_init_copy (&t, a);
03278 
03279     /* if it is negative output a - */
03280     if (t.sign == FP_NEG) {
03281         ++_s;
03282         *str++ = '-';
03283         t.sign = FP_ZPOS;
03284     }
03285 
03286     digs = 0;
03287     while (fp_iszero (&t) == FP_NO) {
03288         if ((res = fp_div_d (&t, (fp_digit) radix, &t, &d)) != FP_OKAY) {
03289             fp_zero (&t);
03290             return res;
03291         }
03292         *str++ = fp_s_rmap[d];
03293         ++digs;
03294     }
03295 
03296     /* reverse the digits of the string.  In this case _s points
03297      * to the first digit [excluding the sign] of the number]
03298      */
03299     fp_reverse ((unsigned char *)_s, digs);
03300 
03301     /* append a NULL so the string is properly terminated */
03302     *str = '\0';
03303 
03304     fp_zero (&t);
03305     return FP_OKAY;
03306 }
03307 
03308 #ifdef WOLFSSL_DEBUG_MATH
03309 void mp_dump(const char* desc, mp_int* a, byte verbose)
03310 {
03311   char buffer[FP_SIZE * sizeof(fp_digit) * 2];
03312   int size = FP_SIZE;
03313 
03314 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
03315   size = a->size;
03316 #endif
03317 
03318   printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n",
03319     desc, a, a->used, a->sign, size, (int)sizeof(fp_digit));
03320 
03321   mp_toradix(a, buffer, 16);
03322   printf("  %s\n  ", buffer);
03323 
03324   if (verbose) {
03325     int i;
03326     for(i=0; i<size * (int)sizeof(fp_digit); i++) {
03327       printf("%x ", *(((byte*)a->dp) + i));
03328     }
03329     printf("\n");
03330   }
03331 }
03332 #endif /* WOLFSSL_DEBUG_MATH */
03333 
03334 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
03335 
03336 
03337 int mp_lshd (mp_int * a, int b)
03338 {
03339     fp_lshd(a, b);
03340     return FP_OKAY;
03341 }
03342 
03343 #endif /* USE_FAST_MATH */
03344