Xuyi Wang / wolfcrypt

Dependents:   OS

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers tfm.c Source File

tfm.c

00001 /* tfm.c
00002  *
00003  * Copyright (C) 2006-2017 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 
00024 /*
00025  * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
00026  * http://math.libtomcrypt.com
00027  */
00028 
00029 /**
00030  *  Edited by Moises Guimaraes (moises@wolfssl.com)
00031  *  to fit wolfSSL's needs.
00032  */
00033 
00034 #ifdef HAVE_CONFIG_H
00035     #include <config.h>
00036 #endif
00037 
00038 /* in case user set USE_FAST_MATH there */
00039 #include <wolfcrypt/settings.h>
00040 #ifdef NO_INLINE
00041     #include <wolfcrypt/misc.h>
00042 #else
00043     #define WOLFSSL_MISC_INCLUDED
00044     #include <wolfcrypt/src/misc.c>
00045 #endif
00046 
00047 #ifdef USE_FAST_MATH
00048 
00049 #include <wolfcrypt/random.h>
00050 #include <wolfcrypt/tfm.h>
00051 #include <wolfcrypt/src/asm.c>  /* will define asm MACROS or C ones */
00052 #include <wolfcrypt/wolfmath.h> /* common functions */
00053 
00054 #if defined(FREESCALE_LTC_TFM)
00055     #include <wolfcrypt/port/nxp/ksdk_port.h>
00056 #endif
00057 #ifdef WOLFSSL_DEBUG_MATH
00058     #include <stdio.h>
00059 #endif
00060 
00061 
00062 /* math settings check */
00063 word32 CheckRunTimeSettings(void)
00064 {
00065     return CTC_SETTINGS;
00066 }
00067 
00068 
00069 /* math settings size check */
00070 word32 CheckRunTimeFastMath(void)
00071 {
00072     return FP_SIZE;
00073 }
00074 
00075 
00076 /* Functions */
00077 
00078 void fp_add(fp_int *a, fp_int *b, fp_int *c)
00079 {
00080   int     sa, sb;
00081 
00082   /* get sign of both inputs */
00083   sa = a->sign;
00084   sb = b->sign;
00085 
00086   /* handle two cases, not four */
00087   if (sa == sb) {
00088     /* both positive or both negative */
00089     /* add their magnitudes, copy the sign */
00090     c->sign = sa;
00091     s_fp_add (a, b, c);
00092   } else {
00093     /* one positive, the other negative */
00094     /* subtract the one with the greater magnitude from */
00095     /* the one of the lesser magnitude.  The result gets */
00096     /* the sign of the one with the greater magnitude. */
00097     if (fp_cmp_mag (a, b) == FP_LT) {
00098       c->sign = sb;
00099       s_fp_sub (b, a, c);
00100     } else {
00101       c->sign = sa;
00102       s_fp_sub (a, b, c);
00103     }
00104   }
00105 }
00106 
00107 /* unsigned addition */
00108 void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
00109 {
00110   int      x, y, oldused;
00111   fp_word  t;
00112 
00113   y       = MAX(a->used, b->used);
00114   oldused = MIN(c->used, FP_SIZE);   /* help static analysis w/ largest size */
00115   c->used = y;
00116 
00117   t = 0;
00118   for (x = 0; x < y; x++) {
00119       t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
00120       c->dp[x]   = (fp_digit)t;
00121       t        >>= DIGIT_BIT;
00122   }
00123   if (t != 0 && x < FP_SIZE) {
00124      c->dp[c->used++] = (fp_digit)t;
00125      ++x;
00126   }
00127 
00128   c->used = x;
00129 
00130   /* zero any excess digits on the destination that we didn't write to */
00131   for (; x < oldused; x++) {
00132      c->dp[x] = 0;
00133   }
00134   fp_clamp(c);
00135 }
00136 
00137 /* c = a - b */
00138 void fp_sub(fp_int *a, fp_int *b, fp_int *c)
00139 {
00140   int     sa, sb;
00141 
00142   sa = a->sign;
00143   sb = b->sign;
00144 
00145   if (sa != sb) {
00146     /* subtract a negative from a positive, OR */
00147     /* subtract a positive from a negative. */
00148     /* In either case, ADD their magnitudes, */
00149     /* and use the sign of the first number. */
00150     c->sign = sa;
00151     s_fp_add (a, b, c);
00152   } else {
00153     /* subtract a positive from a positive, OR */
00154     /* subtract a negative from a negative. */
00155     /* First, take the difference between their */
00156     /* magnitudes, then... */
00157     if (fp_cmp_mag (a, b) != FP_LT) {
00158       /* Copy the sign from the first */
00159       c->sign = sa;
00160       /* The first has a larger or equal magnitude */
00161       s_fp_sub (a, b, c);
00162     } else {
00163       /* The result has the *opposite* sign from */
00164       /* the first number. */
00165       c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
00166       /* The second has a larger magnitude */
00167       s_fp_sub (b, a, c);
00168     }
00169   }
00170 }
00171 
00172 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
00173 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
00174 {
00175   int      x, oldbused, oldused;
00176   fp_word  t;
00177 
00178   oldused  = c->used;
00179   oldbused = b->used;
00180   c->used  = a->used;
00181   t       = 0;
00182   for (x = 0; x < oldbused; x++) {
00183      t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
00184      c->dp[x]  = (fp_digit)t;
00185      t         = (t >> DIGIT_BIT)&1;
00186   }
00187   for (; x < a->used; x++) {
00188      t         = ((fp_word)a->dp[x]) - t;
00189      c->dp[x]  = (fp_digit)t;
00190      t         = (t >> DIGIT_BIT)&1;
00191    }
00192 
00193   /* zero any excess digits on the destination that we didn't write to */
00194   for (; x < oldused; x++) {
00195      c->dp[x] = 0;
00196   }
00197   fp_clamp(c);
00198 }
00199 
00200 /* c = a * b */
00201 void fp_mul(fp_int *A, fp_int *B, fp_int *C)
00202 {
00203     int   y, yy, oldused;
00204 
00205     oldused = C->used;
00206 
00207     y  = MAX(A->used, B->used);
00208     yy = MIN(A->used, B->used);
00209 
00210     /* call generic if we're out of range */
00211     if (y + yy > FP_SIZE) {
00212        fp_mul_comba(A, B, C);
00213        goto clean;
00214     }
00215 
00216     /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
00217        of the largest input.  We also want to avoid doing excess mults if the
00218        inputs are not close to the next power of two.  That is, for example,
00219        if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
00220     */
00221 
00222 #if defined(TFM_MUL3) && FP_SIZE >= 6
00223         if (y <= 3) {
00224            fp_mul_comba3(A,B,C);
00225            goto clean;
00226         }
00227 #endif
00228 #if defined(TFM_MUL4) && FP_SIZE >= 8
00229         if (y == 4) {
00230            fp_mul_comba4(A,B,C);
00231            goto clean;
00232         }
00233 #endif
00234 #if defined(TFM_MUL6) && FP_SIZE >= 12
00235         if (y <= 6) {
00236            fp_mul_comba6(A,B,C);
00237            goto clean;
00238         }
00239 #endif
00240 #if defined(TFM_MUL7) && FP_SIZE >= 14
00241         if (y == 7) {
00242            fp_mul_comba7(A,B,C);
00243            goto clean;
00244         }
00245 #endif
00246 #if defined(TFM_MUL8) && FP_SIZE >= 16
00247         if (y == 8) {
00248            fp_mul_comba8(A,B,C);
00249            goto clean;
00250         }
00251 #endif
00252 #if defined(TFM_MUL9) && FP_SIZE >= 18
00253         if (y == 9) {
00254            fp_mul_comba9(A,B,C);
00255            goto clean;
00256         }
00257 #endif
00258 #if defined(TFM_MUL12) && FP_SIZE >= 24
00259         if (y <= 12) {
00260            fp_mul_comba12(A,B,C);
00261            goto clean;
00262         }
00263 #endif
00264 #if defined(TFM_MUL17) && FP_SIZE >= 34
00265         if (y <= 17) {
00266            fp_mul_comba17(A,B,C);
00267            goto clean;
00268         }
00269 #endif
00270 
00271 #if defined(TFM_SMALL_SET) && FP_SIZE >= 32
00272         if (y <= 16) {
00273            fp_mul_comba_small(A,B,C);
00274            goto clean;
00275         }
00276 #endif
00277 #if defined(TFM_MUL20) && FP_SIZE >= 40
00278         if (y <= 20) {
00279            fp_mul_comba20(A,B,C);
00280            goto clean;
00281         }
00282 #endif
00283 #if defined(TFM_MUL24) && FP_SIZE >= 48
00284         if (yy >= 16 && y <= 24) {
00285            fp_mul_comba24(A,B,C);
00286            goto clean;
00287         }
00288 #endif
00289 #if defined(TFM_MUL28) && FP_SIZE >= 56
00290         if (yy >= 20 && y <= 28) {
00291            fp_mul_comba28(A,B,C);
00292            goto clean;
00293         }
00294 #endif
00295 #if defined(TFM_MUL32) && FP_SIZE >= 64
00296         if (yy >= 24 && y <= 32) {
00297            fp_mul_comba32(A,B,C);
00298            goto clean;
00299         }
00300 #endif
00301 #if defined(TFM_MUL48) && FP_SIZE >= 96
00302         if (yy >= 40 && y <= 48) {
00303           fp_mul_comba48(A,B,C);
00304           goto clean;
00305         }
00306 #endif
00307 #if defined(TFM_MUL64) && FP_SIZE >= 128
00308         if (yy >= 56 && y <= 64) {
00309            fp_mul_comba64(A,B,C);
00310            goto clean;
00311         }
00312 #endif
00313         fp_mul_comba(A,B,C);
00314 
00315 clean:
00316     /* zero any excess digits on the destination that we didn't write to */
00317     for (y = C->used; y >= 0 && y < oldused; y++) {
00318         C->dp[y] = 0;
00319     }
00320 }
00321 
00322 void fp_mul_2(fp_int * a, fp_int * b)
00323 {
00324   int     x, oldused;
00325 
00326   oldused = b->used;
00327   b->used = a->used;
00328 
00329   {
00330     fp_digit r, rr, *tmpa, *tmpb;
00331 
00332     /* alias for source */
00333     tmpa = a->dp;
00334 
00335     /* alias for dest */
00336     tmpb = b->dp;
00337 
00338     /* carry */
00339     r = 0;
00340     for (x = 0; x < a->used; x++) {
00341 
00342       /* get what will be the *next* carry bit from the
00343        * MSB of the current digit
00344        */
00345       rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
00346 
00347       /* now shift up this digit, add in the carry [from the previous] */
00348       *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
00349 
00350       /* copy the carry that would be from the source
00351        * digit into the next iteration
00352        */
00353       r = rr;
00354     }
00355 
00356     /* new leading digit? */
00357     if (r != 0 && b->used != (FP_SIZE-1)) {
00358       /* add a MSB which is always 1 at this point */
00359       *tmpb = 1;
00360       ++(b->used);
00361     }
00362 
00363     /* zero any excess digits on the destination that we didn't write to */
00364     tmpb = b->dp + b->used;
00365     for (x = b->used; x < oldused; x++) {
00366       *tmpb++ = 0;
00367     }
00368   }
00369   b->sign = a->sign;
00370 }
00371 
00372 /* c = a * b */
00373 void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
00374 {
00375    fp_word  w;
00376    int      x, oldused;
00377 
00378    oldused = c->used;
00379    c->used = a->used;
00380    c->sign = a->sign;
00381    w       = 0;
00382    for (x = 0; x < a->used; x++) {
00383        w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
00384        c->dp[x]  = (fp_digit)w;
00385        w         = w >> DIGIT_BIT;
00386    }
00387    if (w != 0 && (a->used != FP_SIZE)) {
00388       c->dp[c->used++] = (fp_digit) w;
00389       ++x;
00390    }
00391 
00392    /* zero any excess digits on the destination that we didn't write to */
00393    for (; x < oldused; x++) {
00394       c->dp[x] = 0;
00395    }
00396    fp_clamp(c);
00397 }
00398 
00399 /* c = a * 2**d */
00400 void fp_mul_2d(fp_int *a, int b, fp_int *c)
00401 {
00402    fp_digit carry, carrytmp, shift;
00403    int x;
00404 
00405    /* copy it */
00406    fp_copy(a, c);
00407 
00408    /* handle whole digits */
00409    if (b >= DIGIT_BIT) {
00410       fp_lshd(c, b/DIGIT_BIT);
00411    }
00412    b %= DIGIT_BIT;
00413 
00414    /* shift the digits */
00415    if (b != 0) {
00416       carry = 0;
00417       shift = DIGIT_BIT - b;
00418       for (x = 0; x < c->used; x++) {
00419           carrytmp = c->dp[x] >> shift;
00420           c->dp[x] = (c->dp[x] << b) + carry;
00421           carry = carrytmp;
00422       }
00423       /* store last carry if room */
00424       if (carry && x < FP_SIZE) {
00425          c->dp[c->used++] = carry;
00426       }
00427    }
00428    fp_clamp(c);
00429 }
00430 
00431 /* generic PxQ multiplier */
00432 #if defined(HAVE_INTEL_MULX)
00433 
00434 WC_INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
00435 
00436 {
00437    int       ix, iy, iz, pa;
00438    fp_int    tmp, *dst;
00439 
00440    /* get size of output and trim */
00441    pa = A->used + B->used;
00442    if (pa >= FP_SIZE) {
00443       pa = FP_SIZE-1;
00444    }
00445 
00446    /* Always take branch to use tmp variable. This avoids a cache attack for
00447     * determining if C equals A */
00448    if (1) {
00449       fp_init(&tmp);
00450       dst = &tmp;
00451    }
00452 
00453    TFM_INTEL_MUL_COMBA(A, B, dst) ;
00454 
00455   dst->used = pa;
00456   dst->sign = A->sign ^ B->sign;
00457   fp_clamp(dst);
00458   fp_copy(dst, C);
00459 }
00460 #endif
00461 
00462 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
00463 {
00464    int       ix, iy, iz, tx, ty, pa;
00465    fp_digit  c0, c1, c2, *tmpx, *tmpy;
00466    fp_int    tmp, *dst;
00467 
00468    IF_HAVE_INTEL_MULX(fp_mul_comba_mulx(A, B, C), return) ;
00469 
00470    COMBA_START;
00471    COMBA_CLEAR;
00472 
00473    /* get size of output and trim */
00474    pa = A->used + B->used;
00475    if (pa >= FP_SIZE) {
00476       pa = FP_SIZE-1;
00477    }
00478 
00479    /* Always take branch to use tmp variable. This avoids a cache attack for
00480     * determining if C equals A */
00481    if (1) {
00482       fp_init(&tmp);
00483       dst = &tmp;
00484    }
00485 
00486    for (ix = 0; ix < pa; ix++) {
00487       /* get offsets into the two bignums */
00488       ty = MIN(ix, (B->used > 0 ? B->used - 1 : 0));
00489       tx = ix - ty;
00490 
00491       /* setup temp aliases */
00492       tmpx = A->dp + tx;
00493       tmpy = B->dp + ty;
00494 
00495       /* this is the number of times the loop will iterate, essentially its
00496          while (tx++ < a->used && ty-- >= 0) { ... }
00497        */
00498       iy = MIN(A->used-tx, ty+1);
00499 
00500       /* execute loop */
00501       COMBA_FORWARD;
00502       for (iz = 0; iz < iy; ++iz) {
00503           fp_digit _tmpx = *tmpx++;
00504           fp_digit _tmpy = *tmpy--;
00505           MULADD(_tmpx, _tmpy);
00506       }
00507 
00508       /* store term */
00509       COMBA_STORE(dst->dp[ix]);
00510   }
00511   COMBA_FINI;
00512 
00513   dst->used = pa;
00514   dst->sign = A->sign ^ B->sign;
00515   fp_clamp(dst);
00516   fp_copy(dst, C);
00517 }
00518 
00519 /* a/b => cb + d == a */
00520 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00521 {
00522   fp_int  q, x, y, t1, t2;
00523   int     n, t, i, norm, neg;
00524 
00525   /* is divisor zero ? */
00526   if (fp_iszero (b) == FP_YES) {
00527     return FP_VAL;
00528   }
00529 
00530   /* if a < b then q=0, r = a */
00531   if (fp_cmp_mag (a, b) == FP_LT) {
00532     if (d != NULL) {
00533       fp_copy (a, d);
00534     }
00535     if (c != NULL) {
00536       fp_zero (c);
00537     }
00538     return FP_OKAY;
00539   }
00540 
00541   fp_init(&q);
00542   q.used = a->used + 2;
00543 
00544   fp_init(&t1);
00545   fp_init(&t2);
00546   fp_init_copy(&x, a);
00547   fp_init_copy(&y, b);
00548 
00549   /* fix the sign */
00550   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
00551   x.sign = y.sign = FP_ZPOS;
00552 
00553   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
00554   norm = fp_count_bits(&y) % DIGIT_BIT;
00555   if (norm < (int)(DIGIT_BIT-1)) {
00556      norm = (DIGIT_BIT-1) - norm;
00557      fp_mul_2d (&x, norm, &x);
00558      fp_mul_2d (&y, norm, &y);
00559   } else {
00560      norm = 0;
00561   }
00562 
00563   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
00564   n = x.used - 1;
00565   t = y.used - 1;
00566 
00567   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
00568   fp_lshd (&y, n - t); /* y = y*b**{n-t} */
00569 
00570   while (fp_cmp (&x, &y) != FP_LT) {
00571     ++(q.dp[n - t]);
00572     fp_sub (&x, &y, &x);
00573   }
00574 
00575   /* reset y by shifting it back down */
00576   fp_rshd (&y, n - t);
00577 
00578   /* step 3. for i from n down to (t + 1) */
00579   for (i = n; i >= (t + 1); i--) {
00580     if (i > x.used) {
00581       continue;
00582     }
00583 
00584     /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
00585      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
00586     if (x.dp[i] == y.dp[t]) {
00587       q.dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
00588     } else {
00589       fp_word tmp;
00590       tmp = ((fp_word) x.dp[i]) << ((fp_word) DIGIT_BIT);
00591       tmp |= ((fp_word) x.dp[i - 1]);
00592       tmp /= ((fp_word)y.dp[t]);
00593       q.dp[i - t - 1] = (fp_digit) (tmp);
00594     }
00595 
00596     /* while (q{i-t-1} * (yt * b + y{t-1})) >
00597              xi * b**2 + xi-1 * b + xi-2
00598 
00599        do q{i-t-1} -= 1;
00600     */
00601     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
00602     do {
00603       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
00604 
00605       /* find left hand */
00606       fp_zero (&t1);
00607       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
00608       t1.dp[1] = y.dp[t];
00609       t1.used = 2;
00610       fp_mul_d (&t1, q.dp[i - t - 1], &t1);
00611 
00612       /* find right hand */
00613       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
00614       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
00615       t2.dp[2] = x.dp[i];
00616       t2.used = 3;
00617     } while (fp_cmp_mag(&t1, &t2) == FP_GT);
00618 
00619     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
00620     fp_mul_d (&y, q.dp[i - t - 1], &t1);
00621     fp_lshd  (&t1, i - t - 1);
00622     fp_sub   (&x, &t1, &x);
00623 
00624     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
00625     if (x.sign == FP_NEG) {
00626       fp_copy (&y, &t1);
00627       fp_lshd (&t1, i - t - 1);
00628       fp_add (&x, &t1, &x);
00629       q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
00630     }
00631   }
00632 
00633   /* now q is the quotient and x is the remainder
00634    * [which we have to normalize]
00635    */
00636 
00637   /* get sign before writing to c */
00638   x.sign = x.used == 0 ? FP_ZPOS : a->sign;
00639 
00640   if (c != NULL) {
00641     fp_clamp (&q);
00642     fp_copy (&q, c);
00643     c->sign = neg;
00644   }
00645 
00646   if (d != NULL) {
00647     fp_div_2d (&x, norm, &x, NULL);
00648 
00649     /* zero any excess digits on the destination that we didn't write to */
00650     for (i = b->used; i < x.used; i++) {
00651         x.dp[i] = 0;
00652     }
00653     fp_clamp(&x);
00654     fp_copy (&x, d);
00655   }
00656 
00657   return FP_OKAY;
00658 }
00659 
00660 /* b = a/2 */
00661 void fp_div_2(fp_int * a, fp_int * b)
00662 {
00663   int     x, oldused;
00664 
00665   oldused = b->used;
00666   b->used = a->used;
00667   {
00668     fp_digit r, rr, *tmpa, *tmpb;
00669 
00670     /* source alias */
00671     tmpa = a->dp + b->used - 1;
00672 
00673     /* dest alias */
00674     tmpb = b->dp + b->used - 1;
00675 
00676     /* carry */
00677     r = 0;
00678     for (x = b->used - 1; x >= 0; x--) {
00679       /* get the carry for the next iteration */
00680       rr = *tmpa & 1;
00681 
00682       /* shift the current digit, add in carry and store */
00683       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
00684 
00685       /* forward carry to next iteration */
00686       r = rr;
00687     }
00688 
00689     /* zero any excess digits on the destination that we didn't write to */
00690     tmpb = b->dp + b->used;
00691     for (x = b->used; x < oldused; x++) {
00692       *tmpb++ = 0;
00693     }
00694   }
00695   b->sign = a->sign;
00696   fp_clamp (b);
00697 }
00698 
00699 /* c = a / 2**b */
00700 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
00701 {
00702   int      D;
00703   fp_int   t;
00704 
00705   /* if the shift count is <= 0 then we do no work */
00706   if (b <= 0) {
00707     fp_copy (a, c);
00708     if (d != NULL) {
00709       fp_zero (d);
00710     }
00711     return;
00712   }
00713 
00714   fp_init(&t);
00715 
00716   /* get the remainder */
00717   if (d != NULL) {
00718     fp_mod_2d (a, b, &t);
00719   }
00720 
00721   /* copy */
00722   fp_copy(a, c);
00723 
00724   /* shift by as many digits in the bit count */
00725   if (b >= (int)DIGIT_BIT) {
00726     fp_rshd (c, b / DIGIT_BIT);
00727   }
00728 
00729   /* shift any bit count < DIGIT_BIT */
00730   D = (b % DIGIT_BIT);
00731   if (D != 0) {
00732     fp_rshb(c, D);
00733   }
00734   fp_clamp (c);
00735   if (d != NULL) {
00736     fp_copy (&t, d);
00737   }
00738 }
00739 
00740 /* c = a mod b, 0 <= c < b  */
00741 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
00742 {
00743    fp_int t;
00744    int    err;
00745 
00746    fp_init(&t);
00747    if ((err = fp_div(a, b, NULL, &t)) != FP_OKAY) {
00748       return err;
00749    }
00750    if (t.sign != b->sign) {
00751       fp_add(&t, b, c);
00752    } else {
00753       fp_copy(&t, c);
00754   }
00755   return FP_OKAY;
00756 }
00757 
00758 /* c = a mod 2**d */
00759 void fp_mod_2d(fp_int *a, int b, fp_int *c)
00760 {
00761    int x;
00762 
00763    /* zero if count less than or equal to zero */
00764    if (b <= 0) {
00765       fp_zero(c);
00766       return;
00767    }
00768 
00769    /* get copy of input */
00770    fp_copy(a, c);
00771 
00772    /* if 2**d is larger than we just return */
00773    if (b >= (DIGIT_BIT * a->used)) {
00774       return;
00775    }
00776 
00777   /* zero digits above the last digit of the modulus */
00778   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
00779     c->dp[x] = 0;
00780   }
00781   /* clear the digit that is not completely outside/inside the modulus */
00782   c->dp[b / DIGIT_BIT] &= ~((fp_digit)0) >> (DIGIT_BIT - b);
00783   fp_clamp (c);
00784 }
00785 
00786 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
00787 {
00788   fp_int  x, y, u, v, A, B, C, D;
00789   int     res;
00790 
00791   /* b cannot be negative */
00792   if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
00793     return FP_VAL;
00794   }
00795 
00796   /* init temps */
00797   fp_init(&x);    fp_init(&y);
00798   fp_init(&u);    fp_init(&v);
00799   fp_init(&A);    fp_init(&B);
00800   fp_init(&C);    fp_init(&D);
00801 
00802   /* x = a, y = b */
00803   if ((res = fp_mod(a, b, &x)) != FP_OKAY) {
00804       return res;
00805   }
00806   fp_copy(b, &y);
00807 
00808   /* 2. [modified] if x,y are both even then return an error! */
00809   if (fp_iseven (&x) == FP_YES && fp_iseven (&y) == FP_YES) {
00810     return FP_VAL;
00811   }
00812 
00813   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00814   fp_copy (&x, &u);
00815   fp_copy (&y, &v);
00816   fp_set (&A, 1);
00817   fp_set (&D, 1);
00818 
00819 top:
00820   /* 4.  while u is even do */
00821   while (fp_iseven (&u) == FP_YES) {
00822     /* 4.1 u = u/2 */
00823     fp_div_2 (&u, &u);
00824 
00825     /* 4.2 if A or B is odd then */
00826     if (fp_isodd (&A) == FP_YES || fp_isodd (&B) == FP_YES) {
00827       /* A = (A+y)/2, B = (B-x)/2 */
00828       fp_add (&A, &y, &A);
00829       fp_sub (&B, &x, &B);
00830     }
00831     /* A = A/2, B = B/2 */
00832     fp_div_2 (&A, &A);
00833     fp_div_2 (&B, &B);
00834   }
00835 
00836   /* 5.  while v is even do */
00837   while (fp_iseven (&v) == FP_YES) {
00838     /* 5.1 v = v/2 */
00839     fp_div_2 (&v, &v);
00840 
00841     /* 5.2 if C or D is odd then */
00842     if (fp_isodd (&C) == FP_YES || fp_isodd (&D) == FP_YES) {
00843       /* C = (C+y)/2, D = (D-x)/2 */
00844       fp_add (&C, &y, &C);
00845       fp_sub (&D, &x, &D);
00846     }
00847     /* C = C/2, D = D/2 */
00848     fp_div_2 (&C, &C);
00849     fp_div_2 (&D, &D);
00850   }
00851 
00852   /* 6.  if u >= v then */
00853   if (fp_cmp (&u, &v) != FP_LT) {
00854     /* u = u - v, A = A - C, B = B - D */
00855     fp_sub (&u, &v, &u);
00856     fp_sub (&A, &C, &A);
00857     fp_sub (&B, &D, &B);
00858   } else {
00859     /* v - v - u, C = C - A, D = D - B */
00860     fp_sub (&v, &u, &v);
00861     fp_sub (&C, &A, &C);
00862     fp_sub (&D, &B, &D);
00863   }
00864 
00865   /* if not zero goto step 4 */
00866   if (fp_iszero (&u) == FP_NO)
00867     goto top;
00868 
00869   /* now a = C, b = D, gcd == g*v */
00870 
00871   /* if v != 1 then there is no inverse */
00872   if (fp_cmp_d (&v, 1) != FP_EQ) {
00873     return FP_VAL;
00874   }
00875 
00876   /* if its too low */
00877   while (fp_cmp_d(&C, 0) == FP_LT) {
00878       fp_add(&C, b, &C);
00879   }
00880 
00881   /* too big */
00882   while (fp_cmp_mag(&C, b) != FP_LT) {
00883       fp_sub(&C, b, &C);
00884   }
00885 
00886   /* C is now the inverse */
00887   fp_copy(&C, c);
00888   return FP_OKAY;
00889 }
00890 
00891 /* c = 1/a (mod b) for odd b only */
00892 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
00893 {
00894   fp_int  x, y, u, v, B, D;
00895   int     neg;
00896 
00897   /* 2. [modified] b must be odd   */
00898   if (fp_iseven (b) == FP_YES) {
00899     return fp_invmod_slow(a,b,c);
00900   }
00901 
00902   /* init all our temps */
00903   fp_init(&x);  fp_init(&y);
00904   fp_init(&u);  fp_init(&v);
00905   fp_init(&B);  fp_init(&D);
00906 
00907   /* x == modulus, y == value to invert */
00908   fp_copy(b, &x);
00909 
00910   /* we need y = |a| */
00911   fp_abs(a, &y);
00912 
00913   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00914   fp_copy(&x, &u);
00915   fp_copy(&y, &v);
00916   fp_set (&D, 1);
00917 
00918 top:
00919   /* 4.  while u is even do */
00920   while (fp_iseven (&u) == FP_YES) {
00921     /* 4.1 u = u/2 */
00922     fp_div_2 (&u, &u);
00923 
00924     /* 4.2 if B is odd then */
00925     if (fp_isodd (&B) == FP_YES) {
00926       fp_sub (&B, &x, &B);
00927     }
00928     /* B = B/2 */
00929     fp_div_2 (&B, &B);
00930   }
00931 
00932   /* 5.  while v is even do */
00933   while (fp_iseven (&v) == FP_YES) {
00934     /* 5.1 v = v/2 */
00935     fp_div_2 (&v, &v);
00936 
00937     /* 5.2 if D is odd then */
00938     if (fp_isodd (&D) == FP_YES) {
00939       /* D = (D-x)/2 */
00940       fp_sub (&D, &x, &D);
00941     }
00942     /* D = D/2 */
00943     fp_div_2 (&D, &D);
00944   }
00945 
00946   /* 6.  if u >= v then */
00947   if (fp_cmp (&u, &v) != FP_LT) {
00948     /* u = u - v, B = B - D */
00949     fp_sub (&u, &v, &u);
00950     fp_sub (&B, &D, &B);
00951   } else {
00952     /* v - v - u, D = D - B */
00953     fp_sub (&v, &u, &v);
00954     fp_sub (&D, &B, &D);
00955   }
00956 
00957   /* if not zero goto step 4 */
00958   if (fp_iszero (&u) == FP_NO) {
00959     goto top;
00960   }
00961 
00962   /* now a = C, b = D, gcd == g*v */
00963 
00964   /* if v != 1 then there is no inverse */
00965   if (fp_cmp_d (&v, 1) != FP_EQ) {
00966     return FP_VAL;
00967   }
00968 
00969   /* b is now the inverse */
00970   neg = a->sign;
00971   while (D.sign == FP_NEG) {
00972     fp_add (&D, b, &D);
00973   }
00974   /* too big */
00975   while (fp_cmp_mag(&D, b) != FP_LT) {
00976     fp_sub(&D, b, &D);
00977   }
00978   fp_copy (&D, c);
00979   c->sign = neg;
00980   return FP_OKAY;
00981 }
00982 
00983 /* d = a * b (mod c) */
00984 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
00985 {
00986   int err;
00987   fp_int t;
00988 
00989   fp_init(&t);
00990   fp_mul(a, b, &t);
00991 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
00992   if (d->size < FP_SIZE) {
00993     err = fp_mod(&t, c, &t);
00994     fp_copy(&t, d);
00995   } else
00996 #endif
00997   {
00998     err = fp_mod(&t, c, d);
00999   }
01000 
01001   return err;
01002 }
01003 
01004 /* d = a - b (mod c) */
01005 int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
01006 {
01007   int err;
01008   fp_int t;
01009 
01010   fp_init(&t);
01011   fp_sub(a, b, &t);
01012 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
01013   if (d->size < FP_SIZE) {
01014     err = fp_mod(&t, c, &t);
01015     fp_copy(&t, d);
01016   } else
01017 #endif
01018   {
01019     err = fp_mod(&t, c, d);
01020   }
01021 
01022   return err;
01023 }
01024 
01025 /* d = a + b (mod c) */
01026 int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
01027 {
01028   int err;
01029   fp_int t;
01030 
01031   fp_init(&t);
01032   fp_add(a, b, &t);
01033 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
01034   if (d->size < FP_SIZE) {
01035     err = fp_mod(&t, c, &t);
01036     fp_copy(&t, d);
01037   } else
01038 #endif
01039   {
01040     err = fp_mod(&t, c, d);
01041   }
01042 
01043   return err;
01044 }
01045 
01046 #ifdef TFM_TIMING_RESISTANT
01047 
01048 /* timing resistant montgomery ladder based exptmod
01049    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
01050    Cryptographic Hardware and Embedded Systems, CHES 2002
01051 */
01052 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01053 {
01054 #ifdef WC_NO_CACHE_RESISTANT
01055   fp_int   R[2];
01056 #else
01057   fp_int   R[3];   /* need a temp for cache resistance */
01058 #endif
01059   fp_digit buf, mp;
01060   int      err, bitcnt, digidx, y;
01061 
01062   /* now setup montgomery  */
01063   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01064      return err;
01065   }
01066 
01067   fp_init(&R[0]);
01068   fp_init(&R[1]);
01069 #ifndef WC_NO_CACHE_RESISTANT
01070   fp_init(&R[2]);
01071 #endif
01072 
01073   /* now we need R mod m */
01074   fp_montgomery_calc_normalization (&R[0], P);
01075 
01076   /* now set R[0][1] to G * R mod m */
01077   if (fp_cmp_mag(P, G) != FP_GT) {
01078      /* G > P so we reduce it first */
01079      fp_mod(G, P, &R[1]);
01080   } else {
01081      fp_copy(G, &R[1]);
01082   }
01083   fp_mulmod (&R[1], &R[0], P, &R[1]);
01084 
01085   /* for j = t-1 downto 0 do
01086         r_!k = R0*R1; r_k = r_k^2
01087   */
01088 
01089   /* set initial mode and bit cnt */
01090   bitcnt = 1;
01091   buf    = 0;
01092   digidx = X->used - 1;
01093 
01094   for (;;) {
01095     /* grab next digit as required */
01096     if (--bitcnt == 0) {
01097       /* if digidx == -1 we are out of digits so break */
01098       if (digidx == -1) {
01099         break;
01100       }
01101       /* read next digit and reset bitcnt */
01102       buf    = X->dp[digidx--];
01103       bitcnt = (int)DIGIT_BIT;
01104     }
01105 
01106     /* grab the next msb from the exponent */
01107     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01108     buf <<= (fp_digit)1;
01109 
01110     /* do ops */
01111     fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
01112 
01113 #ifdef WC_NO_CACHE_RESISTANT
01114     fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
01115 #else
01116     /* instead of using R[y] for sqr, which leaks key bit to cache monitor,
01117      * use R[2] as temp, make sure address calc is constant, keep
01118      * &R[0] and &R[1] in cache */
01119     fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
01120                         ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ),
01121             &R[2]);
01122     fp_sqr(&R[2], &R[2]);          fp_montgomery_reduce(&R[2], P, mp);
01123     fp_copy(&R[2],
01124             (fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
01125                         ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ) );
01126 #endif /* WC_NO_CACHE_RESISTANT */
01127   }
01128 
01129    fp_montgomery_reduce(&R[0], P, mp);
01130    fp_copy(&R[0], Y);
01131    return FP_OKAY;
01132 }
01133 
01134 #else /* TFM_TIMING_RESISTANT */
01135 
01136 /* y = g**x (mod b)
01137  * Some restrictions... x must be positive and < b
01138  */
01139 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01140 {
01141   fp_int   res;
01142   fp_digit buf, mp;
01143   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01144 #ifdef WOLFSSL_SMALL_STACK
01145   fp_int  *M;
01146 #else
01147   fp_int   M[64];
01148 #endif
01149 
01150   /* find window size */
01151   x = fp_count_bits (X);
01152   if (x <= 21) {
01153     winsize = 1;
01154   } else if (x <= 36) {
01155     winsize = 3;
01156   } else if (x <= 140) {
01157     winsize = 4;
01158   } else if (x <= 450) {
01159     winsize = 5;
01160   } else {
01161     winsize = 6;
01162   }
01163 
01164   /* now setup montgomery  */
01165   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
01166      return err;
01167   }
01168 
01169 #ifdef WOLFSSL_SMALL_STACK
01170   /* only allocate space for what's needed */
01171   M = (fp_int*)XMALLOC(sizeof(fp_int)*(1 << winsize), NULL, DYNAMIC_TYPE_TMP_BUFFER);
01172   if (M == NULL) {
01173      return FP_MEM;
01174   }
01175 #endif
01176 
01177   /* init M array */
01178   for(x = 0; x < (1 << winsize); x++)
01179     fp_init(&M[x]);
01180 
01181   /* setup result */
01182   fp_init(&res);
01183 
01184   /* create M table
01185    *
01186    * The M table contains powers of the input base, e.g. M[x] = G^x mod P
01187    *
01188    * The first half of the table is not computed though except for M[0] and M[1]
01189    */
01190 
01191    /* now we need R mod m */
01192    fp_montgomery_calc_normalization (&res, P);
01193 
01194    /* now set M[1] to G * R mod m */
01195    if (fp_cmp_mag(P, G) != FP_GT) {
01196       /* G > P so we reduce it first */
01197       fp_mod(G, P, &M[1]);
01198    } else {
01199       fp_copy(G, &M[1]);
01200    }
01201    fp_mulmod (&M[1], &res, P, &M[1]);
01202 
01203   /* compute the value at M[1<<(winsize-1)] by
01204    * squaring M[1] (winsize-1) times */
01205   fp_copy (&M[1], &M[1 << (winsize - 1)]);
01206   for (x = 0; x < (winsize - 1); x++) {
01207     fp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)]);
01208     fp_montgomery_reduce (&M[1 << (winsize - 1)], P, mp);
01209   }
01210 
01211   /* create upper table */
01212   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01213     fp_mul(&M[x - 1], &M[1], &M[x]);
01214     fp_montgomery_reduce(&M[x], P, mp);
01215   }
01216 
01217   /* set initial mode and bit cnt */
01218   mode   = 0;
01219   bitcnt = 1;
01220   buf    = 0;
01221   digidx = X->used - 1;
01222   bitcpy = 0;
01223   bitbuf = 0;
01224 
01225   for (;;) {
01226     /* grab next digit as required */
01227     if (--bitcnt == 0) {
01228       /* if digidx == -1 we are out of digits so break */
01229       if (digidx == -1) {
01230         break;
01231       }
01232       /* read next digit and reset bitcnt */
01233       buf    = X->dp[digidx--];
01234       bitcnt = (int)DIGIT_BIT;
01235     }
01236 
01237     /* grab the next msb from the exponent */
01238     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
01239     buf <<= (fp_digit)1;
01240 
01241     /* if the bit is zero and mode == 0 then we ignore it
01242      * These represent the leading zero bits before the first 1 bit
01243      * in the exponent.  Technically this opt is not required but it
01244      * does lower the # of trivial squaring/reductions used
01245      */
01246     if (mode == 0 && y == 0) {
01247       continue;
01248     }
01249 
01250     /* if the bit is zero and mode == 1 then we square */
01251     if (mode == 1 && y == 0) {
01252       fp_sqr(&res, &res);
01253       fp_montgomery_reduce(&res, P, mp);
01254       continue;
01255     }
01256 
01257     /* else we add it to the window */
01258     bitbuf |= (y << (winsize - ++bitcpy));
01259     mode    = 2;
01260 
01261     if (bitcpy == winsize) {
01262       /* ok window is filled so square as required and multiply  */
01263       /* square first */
01264       for (x = 0; x < winsize; x++) {
01265         fp_sqr(&res, &res);
01266         fp_montgomery_reduce(&res, P, mp);
01267       }
01268 
01269       /* then multiply */
01270       fp_mul(&res, &M[bitbuf], &res);
01271       fp_montgomery_reduce(&res, P, mp);
01272 
01273       /* empty window and reset */
01274       bitcpy = 0;
01275       bitbuf = 0;
01276       mode   = 1;
01277     }
01278   }
01279 
01280   /* if bits remain then square/multiply */
01281   if (mode == 2 && bitcpy > 0) {
01282     /* square then multiply if the bit is set */
01283     for (x = 0; x < bitcpy; x++) {
01284       fp_sqr(&res, &res);
01285       fp_montgomery_reduce(&res, P, mp);
01286 
01287       /* get next bit of the window */
01288       bitbuf <<= 1;
01289       if ((bitbuf & (1 << winsize)) != 0) {
01290         /* then multiply */
01291         fp_mul(&res, &M[1], &res);
01292         fp_montgomery_reduce(&res, P, mp);
01293       }
01294     }
01295   }
01296 
01297   /* fixup result if Montgomery reduction is used
01298    * recall that any value in a Montgomery system is
01299    * actually multiplied by R mod n.  So we have
01300    * to reduce one more time to cancel out the factor
01301    * of R.
01302    */
01303   fp_montgomery_reduce(&res, P, mp);
01304 
01305   /* swap res with Y */
01306   fp_copy (&res, Y);
01307 
01308 #ifdef WOLFSSL_SMALL_STACK
01309   XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01310 #endif
01311 
01312   return FP_OKAY;
01313 }
01314 
01315 #endif /* TFM_TIMING_RESISTANT */
01316 
01317 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
01318 {
01319    /* prevent overflows */
01320    if (P->used > (FP_SIZE/2)) {
01321       return FP_VAL;
01322    }
01323 
01324    if (X->sign == FP_NEG) {
01325 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
01326       int    err;
01327       fp_int tmp;
01328 
01329       /* yes, copy G and invmod it */
01330       fp_init_copy(&tmp, G);
01331       if ((err = fp_invmod(&tmp, P, &tmp)) != FP_OKAY) {
01332          return err;
01333       }
01334       X->sign = FP_ZPOS;
01335       err =  _fp_exptmod(&tmp, X, P, Y);
01336       if (X != Y) {
01337          X->sign = FP_NEG;
01338       }
01339       return err;
01340 #else
01341       return FP_VAL;
01342 #endif
01343    }
01344    else {
01345       /* Positive exponent so just exptmod */
01346       return _fp_exptmod(G, X, P, Y);
01347    }
01348 }
01349 
01350 /* computes a = 2**b */
01351 void fp_2expt(fp_int *a, int b)
01352 {
01353    int     z;
01354 
01355    /* zero a as per default */
01356    fp_zero (a);
01357 
01358    if (b < 0) {
01359       return;
01360    }
01361 
01362    z = b / DIGIT_BIT;
01363    if (z >= FP_SIZE) {
01364       return;
01365    }
01366 
01367   /* set the used count of where the bit will go */
01368   a->used = z + 1;
01369 
01370   /* put the single bit in its place */
01371   a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
01372 }
01373 
01374 /* b = a*a  */
01375 void fp_sqr(fp_int *A, fp_int *B)
01376 {
01377     int y, oldused;
01378 
01379     oldused = B->used;
01380     y = A->used;
01381 
01382     /* call generic if we're out of range */
01383     if (y + y > FP_SIZE) {
01384        fp_sqr_comba(A, B);
01385        goto clean;
01386     }
01387 
01388 #if defined(TFM_SQR3) && FP_SIZE >= 6
01389         if (y <= 3) {
01390            fp_sqr_comba3(A,B);
01391            goto clean;
01392         }
01393 #endif
01394 #if defined(TFM_SQR4) && FP_SIZE >= 8
01395         if (y == 4) {
01396            fp_sqr_comba4(A,B);
01397            goto clean;
01398         }
01399 #endif
01400 #if defined(TFM_SQR6) && FP_SIZE >= 12
01401         if (y <= 6) {
01402            fp_sqr_comba6(A,B);
01403            goto clean;
01404         }
01405 #endif
01406 #if defined(TFM_SQR7) && FP_SIZE >= 14
01407         if (y == 7) {
01408            fp_sqr_comba7(A,B);
01409            goto clean;
01410         }
01411 #endif
01412 #if defined(TFM_SQR8) && FP_SIZE >= 16
01413         if (y == 8) {
01414            fp_sqr_comba8(A,B);
01415            goto clean;
01416         }
01417 #endif
01418 #if defined(TFM_SQR9) && FP_SIZE >= 18
01419         if (y == 9) {
01420            fp_sqr_comba9(A,B);
01421            goto clean;
01422         }
01423 #endif
01424 #if defined(TFM_SQR12) && FP_SIZE >= 24
01425         if (y <= 12) {
01426            fp_sqr_comba12(A,B);
01427            goto clean;
01428         }
01429 #endif
01430 #if defined(TFM_SQR17) && FP_SIZE >= 34
01431         if (y <= 17) {
01432            fp_sqr_comba17(A,B);
01433            goto clean;
01434         }
01435 #endif
01436 #if defined(TFM_SMALL_SET)
01437         if (y <= 16) {
01438            fp_sqr_comba_small(A,B);
01439            goto clean;
01440         }
01441 #endif
01442 #if defined(TFM_SQR20) && FP_SIZE >= 40
01443         if (y <= 20) {
01444            fp_sqr_comba20(A,B);
01445            goto clean;
01446         }
01447 #endif
01448 #if defined(TFM_SQR24) && FP_SIZE >= 48
01449         if (y <= 24) {
01450            fp_sqr_comba24(A,B);
01451            goto clean;
01452         }
01453 #endif
01454 #if defined(TFM_SQR28) && FP_SIZE >= 56
01455         if (y <= 28) {
01456            fp_sqr_comba28(A,B);
01457            goto clean;
01458         }
01459 #endif
01460 #if defined(TFM_SQR32) && FP_SIZE >= 64
01461         if (y <= 32) {
01462            fp_sqr_comba32(A,B);
01463            goto clean;
01464         }
01465 #endif
01466 #if defined(TFM_SQR48) && FP_SIZE >= 96
01467         if (y <= 48) {
01468            fp_sqr_comba48(A,B);
01469            goto clean;
01470         }
01471 #endif
01472 #if defined(TFM_SQR64) && FP_SIZE >= 128
01473         if (y <= 64) {
01474            fp_sqr_comba64(A,B);
01475            goto clean;
01476         }
01477 #endif
01478        fp_sqr_comba(A, B);
01479 
01480 clean:
01481   /* zero any excess digits on the destination that we didn't write to */
01482   for (y = B->used; y >= 0 && y < oldused; y++) {
01483     B->dp[y] = 0;
01484   }
01485 }
01486 
01487 /* generic comba squarer */
01488 void fp_sqr_comba(fp_int *A, fp_int *B)
01489 {
01490   int       pa, ix, iz;
01491   fp_digit  c0, c1, c2;
01492   fp_int    tmp, *dst;
01493 #ifdef TFM_ISO
01494   fp_word   tt;
01495 #endif
01496 
01497   /* get size of output and trim */
01498   pa = A->used + A->used;
01499   if (pa >= FP_SIZE) {
01500      pa = FP_SIZE-1;
01501   }
01502 
01503   /* number of output digits to produce */
01504   COMBA_START;
01505   COMBA_CLEAR;
01506 
01507   if (A == B) {
01508      fp_init(&tmp);
01509      dst = &tmp;
01510   } else {
01511      fp_zero(B);
01512      dst = B;
01513   }
01514 
01515   for (ix = 0; ix < pa; ix++) {
01516       int      tx, ty, iy;
01517       fp_digit *tmpy, *tmpx;
01518 
01519       /* get offsets into the two bignums */
01520       ty = MIN(A->used-1, ix);
01521       tx = ix - ty;
01522 
01523       /* setup temp aliases */
01524       tmpx = A->dp + tx;
01525       tmpy = A->dp + ty;
01526 
01527       /* this is the number of times the loop will iterate,
01528          while (tx++ < a->used && ty-- >= 0) { ... }
01529        */
01530       iy = MIN(A->used-tx, ty+1);
01531 
01532       /* now for squaring tx can never equal ty
01533        * we halve the distance since they approach
01534        * at a rate of 2x and we have to round because
01535        * odd cases need to be executed
01536        */
01537       iy = MIN(iy, (ty-tx+1)>>1);
01538 
01539       /* forward carries */
01540       COMBA_FORWARD;
01541 
01542       /* execute loop */
01543       for (iz = 0; iz < iy; iz++) {
01544           SQRADD2(*tmpx++, *tmpy--);
01545       }
01546 
01547       /* even columns have the square term in them */
01548       if ((ix&1) == 0) {
01549           /* TAO change COMBA_ADD back to SQRADD */
01550           SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
01551       }
01552 
01553       /* store it */
01554       COMBA_STORE(dst->dp[ix]);
01555   }
01556 
01557   COMBA_FINI;
01558 
01559   /* setup dest */
01560   dst->used = pa;
01561   fp_clamp (dst);
01562   if (dst != B) {
01563      fp_copy(dst, B);
01564   }
01565 }
01566 
01567 int fp_cmp(fp_int *a, fp_int *b)
01568 {
01569    if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
01570       return FP_LT;
01571    } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
01572       return FP_GT;
01573    } else {
01574       /* compare digits */
01575       if (a->sign == FP_NEG) {
01576          /* if negative compare opposite direction */
01577          return fp_cmp_mag(b, a);
01578       } else {
01579          return fp_cmp_mag(a, b);
01580       }
01581    }
01582 }
01583 
01584 /* compare against a single digit */
01585 int fp_cmp_d(fp_int *a, fp_digit b)
01586 {
01587   /* special case for zero*/
01588   if (a->used == 0 && b == 0)
01589     return FP_EQ;
01590 
01591   /* compare based on sign */
01592   if ((b && a->used == 0) || a->sign == FP_NEG) {
01593     return FP_LT;
01594   }
01595 
01596   /* compare based on magnitude */
01597   if (a->used > 1) {
01598     return FP_GT;
01599   }
01600 
01601   /* compare the only digit of a to b */
01602   if (a->dp[0] > b) {
01603     return FP_GT;
01604   } else if (a->dp[0] < b) {
01605     return FP_LT;
01606   } else {
01607     return FP_EQ;
01608   }
01609 
01610 }
01611 
01612 int fp_cmp_mag(fp_int *a, fp_int *b)
01613 {
01614    int x;
01615 
01616    if (a->used > b->used) {
01617       return FP_GT;
01618    } else if (a->used < b->used) {
01619       return FP_LT;
01620    } else {
01621       for (x = a->used - 1; x >= 0; x--) {
01622           if (a->dp[x] > b->dp[x]) {
01623              return FP_GT;
01624           } else if (a->dp[x] < b->dp[x]) {
01625              return FP_LT;
01626           }
01627       }
01628    }
01629    return FP_EQ;
01630 }
01631 
01632 /* sets up the montgomery reduction */
01633 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
01634 {
01635   fp_digit x, b;
01636 
01637 /* fast inversion mod 2**k
01638  *
01639  * Based on the fact that
01640  *
01641  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
01642  *                    =>  2*X*A - X*X*A*A = 1
01643  *                    =>  2*(1) - (1)     = 1
01644  */
01645   b = a->dp[0];
01646 
01647   if ((b & 1) == 0) {
01648     return FP_VAL;
01649   }
01650 
01651   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01652   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01653   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01654   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01655 #ifdef FP_64BIT
01656   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
01657 #endif
01658 
01659   /* rho = -1/m mod b */
01660   *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
01661 
01662   return FP_OKAY;
01663 }
01664 
01665 /* computes a = B**n mod b without division or multiplication useful for
01666  * normalizing numbers in a Montgomery system.
01667  */
01668 void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
01669 {
01670   int     x, bits;
01671 
01672   /* how many bits of last digit does b use */
01673   bits = fp_count_bits (b) % DIGIT_BIT;
01674   if (!bits) bits = DIGIT_BIT;
01675 
01676   /* compute A = B^(n-1) * 2^(bits-1) */
01677   if (b->used > 1) {
01678      fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
01679   } else {
01680      fp_set(a, 1);
01681      bits = 1;
01682   }
01683 
01684   /* now compute C = A * B mod b */
01685   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
01686     fp_mul_2 (a, a);
01687     if (fp_cmp_mag (a, b) != FP_LT) {
01688       s_fp_sub (a, b, a);
01689     }
01690   }
01691 }
01692 
01693 
01694 #ifdef TFM_SMALL_MONT_SET
01695     #include "fp_mont_small.i"
01696 #endif
01697 
01698 #ifdef HAVE_INTEL_MULX
01699 static WC_INLINE void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
01700 {
01701     fp_digit cy = *cy_mulx ;
01702     INNERMUL8_MULX ;
01703     *cy_mulx = cy ;
01704 }
01705 
01706 /* computes x/R == x (mod N) via Montgomery Reduction */
01707 static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
01708 {
01709    fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
01710    int      oldused, x, y, pa;
01711 
01712    /* bail if too large */
01713    if (m->used > (FP_SIZE/2)) {
01714       (void)mu;                     /* shut up compiler */
01715       return;
01716    }
01717 
01718 #ifdef TFM_SMALL_MONT_SET
01719    if (m->used <= 16) {
01720       fp_montgomery_reduce_small(a, m, mp);
01721       return;
01722    }
01723 #endif
01724 
01725 
01726    /* now zero the buff */
01727    XMEMSET(c, 0, sizeof(c));
01728    pa = m->used;
01729 
01730    /* copy the input */
01731    oldused = a->used;
01732    for (x = 0; x < oldused; x++) {
01733        c[x] = a->dp[x];
01734    }
01735    MONT_START;
01736 
01737    for (x = 0; x < pa; x++) {
01738        fp_digit cy = 0;
01739        /* get Mu for this round */
01740        LOOP_START;
01741        _c   = c + x;
01742        tmpm = m->dp;
01743        y = 0;
01744         for (; y < (pa & ~7); y += 8) {
01745               innermul8_mulx(_c, &cy, tmpm, mu) ;
01746               _c   += 8;
01747               tmpm += 8;
01748            }
01749        for (; y < pa; y++) {
01750           INNERMUL;
01751           ++_c;
01752        }
01753        LOOP_END;
01754        while (cy) {
01755            PROPCARRY;
01756            ++_c;
01757        }
01758   }
01759 
01760   /* now copy out */
01761   _c   = c + pa;
01762   tmpm = a->dp;
01763   for (x = 0; x < pa+1; x++) {
01764      *tmpm++ = *_c++;
01765   }
01766 
01767   /* zero any excess digits on the destination that we didn't write to */
01768   for (; x < oldused; x++) {
01769      *tmpm++ = 0;
01770   }
01771 
01772   MONT_FINI;
01773 
01774   a->used = pa+1;
01775   fp_clamp(a);
01776 
01777   /* if A >= m then A = A - m */
01778   if (fp_cmp_mag (a, m) != FP_LT) {
01779     s_fp_sub (a, m, a);
01780   }
01781 }
01782 #endif
01783 
01784 /* computes x/R == x (mod N) via Montgomery Reduction */
01785 void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
01786 {
01787    fp_digit c[FP_SIZE+1], *_c, *tmpm, mu = 0;
01788    int      oldused, x, y, pa;
01789 
01790    IF_HAVE_INTEL_MULX(fp_montgomery_reduce_mulx(a, m, mp), return) ;
01791 
01792    /* bail if too large */
01793    if (m->used > (FP_SIZE/2)) {
01794       (void)mu;                     /* shut up compiler */
01795       return;
01796    }
01797 
01798 #ifdef TFM_SMALL_MONT_SET
01799    if (m->used <= 16) {
01800       fp_montgomery_reduce_small(a, m, mp);
01801       return;
01802    }
01803 #endif
01804 
01805 
01806    /* now zero the buff */
01807    XMEMSET(c, 0, sizeof(c));
01808    pa = m->used;
01809 
01810    /* copy the input */
01811    oldused = a->used;
01812    for (x = 0; x < oldused; x++) {
01813        c[x] = a->dp[x];
01814    }
01815    MONT_START;
01816 
01817    for (x = 0; x < pa; x++) {
01818        fp_digit cy = 0;
01819        /* get Mu for this round */
01820        LOOP_START;
01821        _c   = c + x;
01822        tmpm = m->dp;
01823        y = 0;
01824 #if defined(INNERMUL8)
01825         for (; y < (pa & ~7); y += 8) {
01826               INNERMUL8 ;
01827               _c   += 8;
01828               tmpm += 8;
01829            }
01830 #endif
01831        for (; y < pa; y++) {
01832           INNERMUL;
01833           ++_c;
01834        }
01835        LOOP_END;
01836        while (cy) {
01837            PROPCARRY;
01838            ++_c;
01839        }
01840   }
01841 
01842   /* now copy out */
01843   _c   = c + pa;
01844   tmpm = a->dp;
01845   for (x = 0; x < pa+1; x++) {
01846      *tmpm++ = *_c++;
01847   }
01848 
01849   /* zero any excess digits on the destination that we didn't write to */
01850   for (; x < oldused; x++) {
01851      *tmpm++ = 0;
01852   }
01853 
01854   MONT_FINI;
01855 
01856   a->used = pa+1;
01857   fp_clamp(a);
01858 
01859   /* if A >= m then A = A - m */
01860   if (fp_cmp_mag (a, m) != FP_LT) {
01861     s_fp_sub (a, m, a);
01862   }
01863 }
01864 
01865 void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
01866 {
01867 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
01868   const word32 maxC = (a->size * sizeof(fp_digit));
01869 #else
01870   const word32 maxC = (FP_SIZE * sizeof(fp_digit));
01871 #endif
01872 
01873   /* zero the int */
01874   fp_zero (a);
01875 
01876   /* if input b excess max, then truncate */
01877   if (c > 0 && (word32)c > maxC) {
01878      int excess = (c - maxC);
01879      c -= excess;
01880      b += excess;
01881   }
01882 
01883   /* If we know the endianness of this architecture, and we're using
01884      32-bit fp_digits, we can optimize this */
01885 #if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && \
01886     defined(FP_32BIT)
01887   /* But not for both simultaneously */
01888 #if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
01889 #error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
01890 #endif
01891   {
01892      unsigned char *pd = (unsigned char *)a->dp;
01893 
01894      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
01895      /* read the bytes in */
01896 #ifdef BIG_ENDIAN_ORDER
01897      {
01898        /* Use Duff's device to unroll the loop. */
01899        int idx = (c - 1) & ~3;
01900        switch (c % 4) {
01901        case 0:    do { pd[idx+0] = *b++;
01902        case 3:         pd[idx+1] = *b++;
01903        case 2:         pd[idx+2] = *b++;
01904        case 1:         pd[idx+3] = *b++;
01905                      idx -= 4;
01906                  } while ((c -= 4) > 0);
01907        }
01908      }
01909 #else
01910      for (c -= 1; c >= 0; c -= 1) {
01911        pd[c] = *b++;
01912      }
01913 #endif
01914   }
01915 #else
01916   /* read the bytes in */
01917   for (; c > 0; c--) {
01918      fp_mul_2d (a, 8, a);
01919      a->dp[0] |= *b++;
01920 
01921      if (a->used == 0) {
01922          a->used = 1;
01923      }
01924   }
01925 #endif
01926   fp_clamp (a);
01927 }
01928 
01929 int fp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
01930 {
01931 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
01932    int i, j;
01933    fp_digit n;
01934 
01935    for (j=0,i=0; i<t->used-1; ) {
01936        b[x++] = (unsigned char)(t->dp[i] >> j);
01937        j += 8;
01938        i += j == DIGIT_BIT;
01939        j &= DIGIT_BIT - 1;
01940    }
01941    n = t->dp[i];
01942    while (n != 0) {
01943        b[x++] = (unsigned char)n;
01944        n >>= 8;
01945    }
01946    return x;
01947 #else
01948    while (fp_iszero (t) == FP_NO) {
01949       b[x++] = (unsigned char) (t->dp[0] & 255);
01950       fp_div_2d (t, 8, t, NULL);
01951   }
01952   return x;
01953 #endif
01954 }
01955 
01956 void fp_to_unsigned_bin(fp_int *a, unsigned char *b)
01957 {
01958   int     x;
01959   fp_int  t;
01960 
01961   fp_init_copy(&t, a);
01962 
01963   x = fp_to_unsigned_bin_at_pos(0, &t, b);
01964   fp_reverse (b, x);
01965 }
01966 
01967 int fp_unsigned_bin_size(fp_int *a)
01968 {
01969   int     size = fp_count_bits (a);
01970   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
01971 }
01972 
01973 void fp_set(fp_int *a, fp_digit b)
01974 {
01975    fp_zero(a);
01976    a->dp[0] = b;
01977    a->used  = a->dp[0] ? 1 : 0;
01978 }
01979 
01980 
01981 #ifndef MP_SET_CHUNK_BITS
01982     #define MP_SET_CHUNK_BITS 4
01983 #endif
01984 void fp_set_int(fp_int *a, unsigned long b)
01985 {
01986   int x;
01987 
01988   /* use direct fp_set if b is less than fp_digit max */
01989   if (b < FP_DIGIT_MAX) {
01990     fp_set (a, (fp_digit)b);
01991     return;
01992   }
01993 
01994   fp_zero (a);
01995 
01996   /* set chunk bits at a time */
01997   for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
01998     fp_mul_2d (a, MP_SET_CHUNK_BITS, a);
01999 
02000     /* OR in the top bits of the source */
02001     a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
02002                                   ((1 << MP_SET_CHUNK_BITS) - 1);
02003 
02004     /* shift the source up to the next chunk bits */
02005     b <<= MP_SET_CHUNK_BITS;
02006 
02007     /* ensure that digits are not clamped off */
02008     a->used += 1;
02009   }
02010 
02011   /* clamp digits */
02012   fp_clamp(a);
02013 }
02014 
02015 /* check if a bit is set */
02016 int fp_is_bit_set (fp_int *a, fp_digit b)
02017 {
02018     fp_digit i;
02019 
02020     if (b > FP_MAX_BITS)
02021         return 0;
02022     else
02023         i = b/DIGIT_BIT;
02024 
02025     if ((fp_digit)a->used < i)
02026         return 0;
02027 
02028     return (int)((a->dp[i] >> b%DIGIT_BIT) & (fp_digit)1);
02029 }
02030 
02031 /* set the b bit of a */
02032 int fp_set_bit (fp_int * a, fp_digit b)
02033 {
02034     fp_digit i;
02035 
02036     if (b > FP_MAX_BITS)
02037         return 0;
02038     else
02039         i = b/DIGIT_BIT;
02040 
02041     /* set the used count of where the bit will go if required */
02042     if (a->used < (int)(i+1))
02043         a->used = (int)(i+1);
02044 
02045     /* put the single bit in its place */
02046     a->dp[i] |= ((fp_digit)1) << (b % DIGIT_BIT);
02047 
02048     return MP_OKAY;
02049 }
02050 
02051 int fp_count_bits (fp_int * a)
02052 {
02053   int     r;
02054   fp_digit q;
02055 
02056   /* shortcut */
02057   if (a->used == 0) {
02058     return 0;
02059   }
02060 
02061   /* get number of digits and add that */
02062   r = (a->used - 1) * DIGIT_BIT;
02063 
02064   /* take the last digit and count the bits in it */
02065   q = a->dp[a->used - 1];
02066   while (q > ((fp_digit) 0)) {
02067     ++r;
02068     q >>= ((fp_digit) 1);
02069   }
02070 
02071   return r;
02072 }
02073 
02074 int fp_leading_bit(fp_int *a)
02075 {
02076     int bit = 0;
02077 
02078     if (a->used != 0) {
02079         fp_digit q = a->dp[a->used - 1];
02080         int qSz = sizeof(fp_digit);
02081 
02082         while (qSz > 0) {
02083             if ((unsigned char)q != 0)
02084                 bit = (q & 0x80) != 0;
02085             q >>= 8;
02086             qSz--;
02087         }
02088     }
02089 
02090     return bit;
02091 }
02092 
02093 void fp_lshd(fp_int *a, int x)
02094 {
02095     int y;
02096 
02097     /* move up and truncate as required */
02098     y = MIN(a->used + x - 1, (int)(FP_SIZE-1));
02099 
02100     /* store new size */
02101     a->used = y + 1;
02102 
02103     /* move digits */
02104     for (; y >= x; y--) {
02105         a->dp[y] = a->dp[y-x];
02106     }
02107 
02108     /* zero lower digits */
02109     for (; y >= 0; y--) {
02110         a->dp[y] = 0;
02111     }
02112 
02113     /* clamp digits */
02114     fp_clamp(a);
02115 }
02116 
02117 
02118 /* right shift by bit count */
02119 void fp_rshb(fp_int *c, int x)
02120 {
02121     fp_digit *tmpc, mask, shift;
02122     fp_digit r, rr;
02123     fp_digit D = x;
02124 
02125     /* mask */
02126     mask = (((fp_digit)1) << D) - 1;
02127 
02128     /* shift for lsb */
02129     shift = DIGIT_BIT - D;
02130 
02131     /* alias */
02132     tmpc = c->dp + (c->used - 1);
02133 
02134     /* carry */
02135     r = 0;
02136     for (x = c->used - 1; x >= 0; x--) {
02137       /* get the lower  bits of this word in a temp */
02138       rr = *tmpc & mask;
02139 
02140       /* shift the current word and mix in the carry bits from previous word */
02141       *tmpc = (*tmpc >> D) | (r << shift);
02142       --tmpc;
02143 
02144       /* set the carry to the carry bits of the current word found above */
02145       r = rr;
02146     }
02147 
02148     /* clamp digits */
02149     fp_clamp(c);
02150 }
02151 
02152 
02153 void fp_rshd(fp_int *a, int x)
02154 {
02155   int y;
02156 
02157   /* too many digits just zero and return */
02158   if (x >= a->used) {
02159      fp_zero(a);
02160      return;
02161   }
02162 
02163    /* shift */
02164    for (y = 0; y < a->used - x; y++) {
02165       a->dp[y] = a->dp[y+x];
02166    }
02167 
02168    /* zero rest */
02169    for (; y < a->used; y++) {
02170       a->dp[y] = 0;
02171    }
02172 
02173    /* decrement count */
02174    a->used -= x;
02175    fp_clamp(a);
02176 }
02177 
02178 /* reverse an array, used for radix code */
02179 void fp_reverse (unsigned char *s, int len)
02180 {
02181   int     ix, iy;
02182   unsigned char t;
02183 
02184   ix = 0;
02185   iy = len - 1;
02186   while (ix < iy) {
02187     t     = s[ix];
02188     s[ix] = s[iy];
02189     s[iy] = t;
02190     ++ix;
02191     --iy;
02192   }
02193 }
02194 
02195 
02196 /* c = a - b */
02197 void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
02198 {
02199    fp_int tmp;
02200    fp_init(&tmp);
02201    fp_set(&tmp, b);
02202 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02203    if (c->size < FP_SIZE) {
02204      fp_sub(a, &tmp, &tmp);
02205      fp_copy(&tmp, c);
02206    } else
02207 #endif
02208    {
02209      fp_sub(a, &tmp, c);
02210    }
02211 }
02212 
02213 
02214 /* wolfSSL callers from normal lib */
02215 
02216 /* init a new mp_int */
02217 int mp_init (mp_int * a)
02218 {
02219   if (a)
02220     fp_init(a);
02221   return MP_OKAY;
02222 }
02223 
02224 void fp_init(fp_int *a)
02225 {
02226 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02227     a->size = FP_SIZE;
02228 #endif
02229 #ifdef HAVE_WOLF_BIGINT
02230     wc_bigint_init(&a->raw);
02231 #endif
02232     fp_zero(a);
02233 }
02234 
02235 void fp_zero(fp_int *a)
02236 {
02237     int size = FP_SIZE;
02238     a->used = 0;
02239     a->sign = FP_ZPOS;
02240 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02241     size = a->size;
02242 #endif
02243     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
02244 }
02245 
02246 void fp_clear(fp_int *a)
02247 {
02248     int size = FP_SIZE;
02249     a->used = 0;
02250     a->sign = FP_ZPOS;
02251 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02252     size = a->size;
02253 #endif
02254     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
02255     fp_free(a);
02256 }
02257 
02258 void fp_forcezero (mp_int * a)
02259 {
02260     int size = FP_SIZE;
02261     a->used = 0;
02262     a->sign = FP_ZPOS;
02263 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02264     size = a->size;
02265 #endif
02266     ForceZero(a->dp, size * sizeof(fp_digit));
02267 #ifdef HAVE_WOLF_BIGINT
02268     wc_bigint_zero(&a->raw);
02269 #endif
02270     fp_free(a);
02271 }
02272 
02273 void mp_forcezero (mp_int * a)
02274 {
02275     fp_forcezero(a);
02276 }
02277 
02278 void fp_free(fp_int* a)
02279 {
02280 #ifdef HAVE_WOLF_BIGINT
02281     wc_bigint_free(&a->raw);
02282 #else
02283     (void)a;
02284 #endif
02285 }
02286 
02287 
02288 /* clear one (frees)  */
02289 void mp_clear (mp_int * a)
02290 {
02291     if (a == NULL)
02292         return;
02293     fp_clear(a);
02294 }
02295 
02296 void mp_free(mp_int* a)
02297 {
02298     fp_free(a);
02299 }
02300 
02301 /* handle up to 6 inits */
02302 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d,
02303                   mp_int* e, mp_int* f)
02304 {
02305     if (a)
02306         fp_init(a);
02307     if (b)
02308         fp_init(b);
02309     if (c)
02310         fp_init(c);
02311     if (d)
02312         fp_init(d);
02313     if (e)
02314         fp_init(e);
02315     if (f)
02316         fp_init(f);
02317 
02318     return MP_OKAY;
02319 }
02320 
02321 /* high level addition (handles signs) */
02322 int mp_add (mp_int * a, mp_int * b, mp_int * c)
02323 {
02324   fp_add(a, b, c);
02325   return MP_OKAY;
02326 }
02327 
02328 /* high level subtraction (handles signs) */
02329 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
02330 {
02331   fp_sub(a, b, c);
02332   return MP_OKAY;
02333 }
02334 
02335 /* high level multiplication (handles sign) */
02336 #if defined(FREESCALE_LTC_TFM)
02337 int wolfcrypt_mp_mul(mp_int * a, mp_int * b, mp_int * c)
02338 #else
02339 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
02340 #endif
02341 {
02342   fp_mul(a, b, c);
02343   return MP_OKAY;
02344 }
02345 
02346 int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
02347 {
02348   fp_mul_d(a, b, c);
02349   return MP_OKAY;
02350 }
02351 
02352 /* d = a * b (mod c) */
02353 #if defined(FREESCALE_LTC_TFM)
02354 int wolfcrypt_mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02355 #else
02356 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
02357 #endif
02358 {
02359   return fp_mulmod(a, b, c, d);
02360 }
02361 
02362 /* d = a - b (mod c) */
02363 int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
02364 {
02365   return fp_submod(a, b, c, d);
02366 }
02367 
02368 /* d = a + b (mod c) */
02369 int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
02370 {
02371   return fp_addmod(a, b, c, d);
02372 }
02373 
02374 /* c = a mod b, 0 <= c < b */
02375 #if defined(FREESCALE_LTC_TFM)
02376 int wolfcrypt_mp_mod (mp_int * a, mp_int * b, mp_int * c)
02377 #else
02378 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
02379 #endif
02380 {
02381   return fp_mod (a, b, c);
02382 }
02383 
02384 /* hac 14.61, pp608 */
02385 #if defined(FREESCALE_LTC_TFM)
02386 int wolfcrypt_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
02387 #else
02388 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
02389 #endif
02390 {
02391   return fp_invmod(a, b, c);
02392 }
02393 
02394 /* this is a shell function that calls either the normal or Montgomery
02395  * exptmod functions.  Originally the call to the montgomery code was
02396  * embedded in the normal function but that wasted alot of stack space
02397  * for nothing (since 99% of the time the Montgomery code would be called)
02398  */
02399 #if defined(FREESCALE_LTC_TFM)
02400 int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
02401 #else
02402 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
02403 #endif
02404 {
02405   return fp_exptmod(G, X, P, Y);
02406 }
02407 
02408 /* compare two ints (signed)*/
02409 int mp_cmp (mp_int * a, mp_int * b)
02410 {
02411   return fp_cmp(a, b);
02412 }
02413 
02414 /* compare a digit */
02415 int mp_cmp_d(mp_int * a, mp_digit b)
02416 {
02417   return fp_cmp_d(a, b);
02418 }
02419 
02420 /* get the size for an unsigned equivalent */
02421 int mp_unsigned_bin_size (mp_int * a)
02422 {
02423   return fp_unsigned_bin_size(a);
02424 }
02425 
02426 int mp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
02427 {
02428     return fp_to_unsigned_bin_at_pos(x, t, b);
02429 }
02430 
02431 /* store in unsigned [big endian] format */
02432 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
02433 {
02434   fp_to_unsigned_bin(a,b);
02435   return MP_OKAY;
02436 }
02437 
02438 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
02439 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
02440 {
02441   fp_read_unsigned_bin(a, b, c);
02442   return MP_OKAY;
02443 }
02444 
02445 
02446 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
02447 {
02448     fp_sub_d(a, b, c);
02449     return MP_OKAY;
02450 }
02451 
02452 int mp_mul_2d(fp_int *a, int b, fp_int *c)
02453 {
02454     fp_mul_2d(a, b, c);
02455     return MP_OKAY;
02456 }
02457 
02458 int mp_2expt(fp_int* a, int b)
02459 {
02460     fp_2expt(a, b);
02461     return MP_OKAY;
02462 }
02463 
02464 int mp_div(fp_int * a, fp_int * b, fp_int * c, fp_int * d)
02465 {
02466     return fp_div(a, b, c, d);
02467 }
02468 
02469 int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d)
02470 {
02471     fp_div_2d(a, b, c, d);
02472     return MP_OKAY;
02473 }
02474 
02475 void fp_copy(fp_int *a, fp_int *b)
02476 {
02477     /* if source and destination are different */
02478     if (a != b) {
02479 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02480         /* verify a will fit in b */
02481         if (b->size >= a->used) {
02482             int x, oldused;
02483             oldused = b->used;
02484             b->used = a->used;
02485             b->sign = a->sign;
02486 
02487             XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit));
02488 
02489             /* zero any excess digits on the destination that we didn't write to */
02490             for (x = b->used; x >= 0 && x < oldused; x++) {
02491                 b->dp[x] = 0;
02492             }
02493         }
02494         else {
02495             /* TODO: Handle error case */
02496         }
02497 #else
02498         /* all dp's are same size, so do straight copy */
02499         b->used = a->used;
02500         b->sign = a->sign;
02501         XMEMCPY(b->dp, a->dp, FP_SIZE * sizeof(fp_digit));
02502 #endif
02503     }
02504 }
02505 
02506 void fp_init_copy(fp_int *a, fp_int* b)
02507 {
02508     if (a != b) {
02509         fp_init(a);
02510         fp_copy(b, a);
02511     }
02512 }
02513 
02514 /* fast math wrappers */
02515 int mp_copy(fp_int* a, fp_int* b)
02516 {
02517     fp_copy(a, b);
02518     return MP_OKAY;
02519 }
02520 
02521 int mp_isodd(mp_int* a)
02522 {
02523     return fp_isodd(a);
02524 }
02525 
02526 int mp_iszero(mp_int* a)
02527 {
02528     return fp_iszero(a);
02529 }
02530 
02531 int mp_count_bits (mp_int* a)
02532 {
02533     return fp_count_bits(a);
02534 }
02535 
02536 int mp_leading_bit (mp_int* a)
02537 {
02538     return fp_leading_bit(a);
02539 }
02540 
02541 void mp_rshb (mp_int* a, int x)
02542 {
02543     fp_rshb(a, x);
02544 }
02545 
02546 void mp_rshd (mp_int* a, int x)
02547 {
02548     fp_rshd(a, x);
02549 }
02550 
02551 int mp_set_int(mp_int *a, unsigned long b)
02552 {
02553     fp_set_int(a, b);
02554     return MP_OKAY;
02555 }
02556 
02557 int mp_is_bit_set (mp_int *a, mp_digit b)
02558 {
02559     return fp_is_bit_set(a, b);
02560 }
02561 
02562 int mp_set_bit(mp_int *a, mp_digit b)
02563 {
02564     return fp_set_bit(a, b);
02565 }
02566 
02567 #if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC)
02568 
02569 /* c = a * a (mod b) */
02570 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
02571 {
02572   int err;
02573   fp_int t;
02574 
02575   fp_init(&t);
02576   fp_sqr(a, &t);
02577 
02578 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
02579   if (c->size < FP_SIZE) {
02580     err = fp_mod(&t, b, &t);
02581     fp_copy(&t, c);
02582   }
02583   else
02584 #endif
02585   {
02586     err = fp_mod(&t, b, c);
02587   }
02588 
02589   return err;
02590 }
02591 
02592 /* fast math conversion */
02593 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
02594 {
02595     return fp_sqrmod(a, b, c);
02596 }
02597 
02598 /* fast math conversion */
02599 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
02600 {
02601     fp_montgomery_calc_normalization(a, b);
02602     return MP_OKAY;
02603 }
02604 
02605 #endif /* WOLFSSL_KEYGEN || HAVE_ECC */
02606 
02607 
02608 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
02609     defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
02610     defined(WOLFSSL_PUBLIC_MP)
02611 
02612 #ifdef WOLFSSL_KEY_GEN
02613 /* swap the elements of two integers, for cases where you can't simply swap the
02614  * mp_int pointers around
02615  */
02616 static void fp_exch (fp_int * a, fp_int * b)
02617 {
02618     fp_int  t;
02619 
02620     t  = *a;
02621     *a = *b;
02622     *b = t;
02623 }
02624 #endif
02625 
02626 static const int lnz[16] = {
02627    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
02628 };
02629 
02630 /* Counts the number of lsbs which are zero before the first zero bit */
02631 int fp_cnt_lsb(fp_int *a)
02632 {
02633    int x;
02634    fp_digit q, qq;
02635 
02636    /* easy out */
02637    if (fp_iszero(a) == FP_YES) {
02638       return 0;
02639    }
02640 
02641    /* scan lower digits until non-zero */
02642    for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
02643    q = a->dp[x];
02644    x *= DIGIT_BIT;
02645 
02646    /* now scan this digit until a 1 is found */
02647    if ((q & 1) == 0) {
02648       do {
02649          qq  = q & 15;
02650          x  += lnz[qq];
02651          q >>= 4;
02652       } while (qq == 0);
02653    }
02654    return x;
02655 }
02656 
02657 
02658 static int s_is_power_of_two(fp_digit b, int *p)
02659 {
02660    int x;
02661 
02662    /* fast return if no power of two */
02663    if ((b==0) || (b & (b-1))) {
02664       return FP_NO;
02665    }
02666 
02667    for (x = 0; x < DIGIT_BIT; x++) {
02668       if (b == (((fp_digit)1)<<x)) {
02669          *p = x;
02670          return FP_YES;
02671       }
02672    }
02673    return FP_NO;
02674 }
02675 
02676 /* a/b => cb + d == a */
02677 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
02678 {
02679   fp_int   q;
02680   fp_word  w;
02681   fp_digit t;
02682   int      ix;
02683 
02684   fp_init(&q);
02685 
02686   /* cannot divide by zero */
02687   if (b == 0) {
02688      return FP_VAL;
02689   }
02690 
02691   /* quick outs */
02692   if (b == 1 || fp_iszero(a) == FP_YES) {
02693      if (d != NULL) {
02694         *d = 0;
02695      }
02696      if (c != NULL) {
02697         fp_copy(a, c);
02698      }
02699      return FP_OKAY;
02700   }
02701 
02702   /* power of two ? */
02703   if (s_is_power_of_two(b, &ix) == FP_YES) {
02704      if (d != NULL) {
02705         *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
02706      }
02707      if (c != NULL) {
02708         fp_div_2d(a, ix, c, NULL);
02709      }
02710      return FP_OKAY;
02711   }
02712 
02713   if (c != NULL) {
02714     q.used = a->used;
02715     q.sign = a->sign;
02716   }
02717 
02718   w = 0;
02719   for (ix = a->used - 1; ix >= 0; ix--) {
02720      w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
02721 
02722      if (w >= b) {
02723         t = (fp_digit)(w / b);
02724         w -= ((fp_word)t) * ((fp_word)b);
02725       } else {
02726         t = 0;
02727       }
02728       if (c != NULL)
02729         q.dp[ix] = (fp_digit)t;
02730   }
02731 
02732   if (d != NULL) {
02733      *d = (fp_digit)w;
02734   }
02735 
02736   if (c != NULL) {
02737      fp_clamp(&q);
02738      fp_copy(&q, c);
02739   }
02740 
02741   return FP_OKAY;
02742 }
02743 
02744 
02745 /* c = a mod b, 0 <= c < b  */
02746 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02747 {
02748    return fp_div_d(a, b, NULL, c);
02749 }
02750 
02751 int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
02752 {
02753    return fp_mod_d(a, b, c);
02754 }
02755 
02756 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
02757 
02758 #ifdef WOLFSSL_KEY_GEN
02759 
02760 static void fp_gcd(fp_int *a, fp_int *b, fp_int *c);
02761 static void fp_lcm(fp_int *a, fp_int *b, fp_int *c);
02762 static int  fp_isprime_ex(fp_int *a, int t);
02763 static int  fp_isprime(fp_int *a);
02764 static int  fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
02765 
02766 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
02767 {
02768     fp_gcd(a, b, c);
02769     return MP_OKAY;
02770 }
02771 
02772 
02773 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
02774 {
02775     fp_lcm(a, b, c);
02776     return MP_OKAY;
02777 }
02778 
02779 
02780 int mp_prime_is_prime(mp_int* a, int t, int* result)
02781 {
02782     (void)t;
02783     *result = fp_isprime(a);
02784     return MP_OKAY;
02785 }
02786 
02787 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
02788 {
02789     int err;
02790 
02791     err = fp_randprime(N, len, rng, heap);
02792     switch(err) {
02793         case FP_VAL:
02794             return MP_VAL;
02795         case FP_MEM:
02796             return MP_MEM;
02797         default:
02798             break;
02799     }
02800 
02801     return MP_OKAY;
02802 }
02803 
02804 int mp_exch (mp_int * a, mp_int * b)
02805 {
02806     fp_exch(a, b);
02807     return MP_OKAY;
02808 }
02809 
02810 /* Miller-Rabin test of "a" to the base of "b" as described in
02811  * HAC pp. 139 Algorithm 4.24
02812  *
02813  * Sets result to 0 if definitely composite or 1 if probably prime.
02814  * Randomly the chance of error is no more than 1/4 and often
02815  * very much lower.
02816  */
02817 static void fp_prime_miller_rabin (fp_int * a, fp_int * b, int *result)
02818 {
02819   fp_int  n1, y, r;
02820   int     s, j;
02821 
02822   /* default */
02823   *result = FP_NO;
02824 
02825   /* ensure b > 1 */
02826   if (fp_cmp_d(b, 1) != FP_GT) {
02827      return;
02828   }
02829 
02830   /* get n1 = a - 1 */
02831   fp_init_copy(&n1, a);
02832   fp_sub_d(&n1, 1, &n1);
02833 
02834   /* set 2**s * r = n1 */
02835   fp_init_copy(&r, &n1);
02836 
02837   /* count the number of least significant bits
02838    * which are zero
02839    */
02840   s = fp_cnt_lsb(&r);
02841 
02842   /* now divide n - 1 by 2**s */
02843   fp_div_2d (&r, s, &r, NULL);
02844 
02845   /* compute y = b**r mod a */
02846   fp_init(&y);
02847   fp_exptmod(b, &r, a, &y);
02848 
02849   /* if y != 1 and y != n1 do */
02850   if (fp_cmp_d (&y, 1) != FP_EQ && fp_cmp (&y, &n1) != FP_EQ) {
02851     j = 1;
02852     /* while j <= s-1 and y != n1 */
02853     while ((j <= (s - 1)) && fp_cmp (&y, &n1) != FP_EQ) {
02854       fp_sqrmod (&y, a, &y);
02855 
02856       /* if y == 1 then composite */
02857       if (fp_cmp_d (&y, 1) == FP_EQ) {
02858          return;
02859       }
02860       ++j;
02861     }
02862 
02863     /* if y != n1 then composite */
02864     if (fp_cmp (&y, &n1) != FP_EQ) {
02865        return;
02866     }
02867   }
02868 
02869   /* probably prime now */
02870   *result = FP_YES;
02871 }
02872 
02873 
02874 /* a few primes */
02875 static const fp_digit primes[FP_PRIME_SIZE] = {
02876   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
02877   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
02878   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
02879   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
02880   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
02881   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
02882   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
02883   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
02884 
02885   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
02886   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
02887   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
02888   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
02889   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
02890   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
02891   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
02892   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
02893 
02894   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
02895   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
02896   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
02897   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
02898   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
02899   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
02900   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
02901   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
02902 
02903   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
02904   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
02905   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
02906   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
02907   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
02908   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
02909   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
02910   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
02911 };
02912 
02913 int fp_isprime_ex(fp_int *a, int t)
02914 {
02915    fp_int   b;
02916    fp_digit d;
02917    int      r, res;
02918 
02919    if (t <= 0 || t > FP_PRIME_SIZE) {
02920      return FP_NO;
02921    }
02922 
02923    /* do trial division */
02924    for (r = 0; r < FP_PRIME_SIZE; r++) {
02925        res = fp_mod_d(a, primes[r], &d);
02926        if (res != MP_OKAY || d == 0) {
02927            return FP_NO;
02928        }
02929    }
02930 
02931    /* now do 't' miller rabins */
02932    fp_init(&b);
02933    for (r = 0; r < t; r++) {
02934        fp_set(&b, primes[r]);
02935        fp_prime_miller_rabin(a, &b, &res);
02936        if (res == FP_NO) {
02937           return FP_NO;
02938        }
02939    }
02940    return FP_YES;
02941 }
02942 
02943 int fp_isprime(fp_int *a)
02944 {
02945   return fp_isprime_ex(a, 8);
02946 }
02947 
02948 int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap)
02949 {
02950     static const int USE_BBS = 1;
02951     int   err, type;
02952     byte* buf;
02953 
02954     /* get type */
02955     if (len < 0) {
02956         type = USE_BBS;
02957         len = -len;
02958     } else {
02959         type = 0;
02960     }
02961 
02962     /* allow sizes between 2 and 512 bytes for a prime size */
02963     if (len < 2 || len > 512) {
02964         return FP_VAL;
02965     }
02966 
02967     /* allocate buffer to work with */
02968     buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_TMP_BUFFER);
02969     if (buf == NULL) {
02970         return FP_MEM;
02971     }
02972     XMEMSET(buf, 0, len);
02973 
02974     do {
02975 #ifdef SHOW_GEN
02976         printf(".");
02977         fflush(stdout);
02978 #endif
02979         /* generate value */
02980         err = wc_RNG_GenerateBlock(rng, buf, len);
02981         if (err != 0) {
02982             XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
02983             return FP_VAL;
02984         }
02985 
02986         /* munge bits */
02987         buf[0]     |= 0x80 | 0x40;
02988         buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
02989 
02990         /* load value */
02991         fp_read_unsigned_bin(N, buf, len);
02992 
02993         /* test */
02994     } while (fp_isprime(N) == FP_NO);
02995 
02996     XMEMSET(buf, 0, len);
02997     XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
02998 
02999     return FP_OKAY;
03000 }
03001 
03002 /* c = [a, b] */
03003 void fp_lcm(fp_int *a, fp_int *b, fp_int *c)
03004 {
03005    fp_int  t1, t2;
03006 
03007    fp_init(&t1);
03008    fp_init(&t2);
03009    fp_gcd(a, b, &t1);
03010    if (fp_cmp_mag(a, b) == FP_GT) {
03011       fp_div(a, &t1, &t2, NULL);
03012       fp_mul(b, &t2, c);
03013    } else {
03014       fp_div(b, &t1, &t2, NULL);
03015       fp_mul(a, &t2, c);
03016    }
03017 }
03018 
03019 
03020 
03021 /* c = (a, b) */
03022 void fp_gcd(fp_int *a, fp_int *b, fp_int *c)
03023 {
03024    fp_int u, v, r;
03025 
03026    /* either zero than gcd is the largest */
03027    if (fp_iszero (a) == FP_YES && fp_iszero (b) == FP_NO) {
03028      fp_abs (b, c);
03029      return;
03030    }
03031    if (fp_iszero (a) == FP_NO && fp_iszero (b) == FP_YES) {
03032      fp_abs (a, c);
03033      return;
03034    }
03035 
03036    /* optimized.  At this point if a == 0 then
03037     * b must equal zero too
03038     */
03039    if (fp_iszero (a) == FP_YES) {
03040      fp_zero(c);
03041      return;
03042    }
03043 
03044    /* sort inputs */
03045    if (fp_cmp_mag(a, b) != FP_LT) {
03046       fp_init_copy(&u, a);
03047       fp_init_copy(&v, b);
03048    } else {
03049       fp_init_copy(&u, b);
03050       fp_init_copy(&v, a);
03051    }
03052 
03053    fp_init(&r);
03054    while (fp_iszero(&v) == FP_NO) {
03055       fp_mod(&u, &v, &r);
03056       fp_copy(&v, &u);
03057       fp_copy(&r, &v);
03058    }
03059    fp_copy(&u, c);
03060 }
03061 
03062 #endif /* WOLFSSL_KEY_GEN */
03063 
03064 
03065 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA) || \
03066     defined(WC_RSA_BLINDING)
03067 /* c = a + b */
03068 void fp_add_d(fp_int *a, fp_digit b, fp_int *c)
03069 {
03070    fp_int tmp;
03071    fp_init(&tmp);
03072    fp_set(&tmp, b);
03073    fp_add(a, &tmp, c);
03074 }
03075 
03076 /* external compatibility */
03077 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
03078 {
03079     fp_add_d(a, b, c);
03080     return MP_OKAY;
03081 }
03082 
03083 #endif  /* HAVE_ECC || !NO_PWDBASED */
03084 
03085 
03086 #if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
03087     defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
03088     defined(DEBUG_WOLFSSL)
03089 
03090 /* chars used in radix conversions */
03091 static const char* const fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
03092                                      "abcdefghijklmnopqrstuvwxyz+/";
03093 #endif
03094 
03095 #if !defined(NO_DSA) || defined(HAVE_ECC)
03096 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
03097 static int fp_read_radix_16(fp_int *a, const char *str)
03098 {
03099   int     i, j, k, neg;
03100   char    ch;
03101 
03102   /* if the leading digit is a
03103    * minus set the sign to negative.
03104    */
03105   if (*str == '-') {
03106     ++str;
03107     neg = FP_NEG;
03108   } else {
03109     neg = FP_ZPOS;
03110   }
03111 
03112   j = 0;
03113   k = 0;
03114   for (i = (int)(XSTRLEN(str) - 1); i >= 0; i--) {
03115       ch = str[i];
03116       if (ch >= '0' && ch <= '9')
03117           ch -= '0';
03118       else if (ch >= 'A' && ch <= 'F')
03119           ch -= 'A' - 10;
03120       else if (ch >= 'a' && ch <= 'f')
03121           ch -= 'a' - 10;
03122       else
03123           return FP_VAL;
03124 
03125       a->dp[k] |= ((fp_digit)ch) << j;
03126       j += 4;
03127       k += j == DIGIT_BIT;
03128       j &= DIGIT_BIT - 1;
03129   }
03130 
03131   a->used = k + 1;
03132   fp_clamp(a);
03133   /* set the sign only if a != 0 */
03134   if (fp_iszero(a) != FP_YES) {
03135      a->sign = neg;
03136   }
03137   return FP_OKAY;
03138 }
03139 #endif
03140 
03141 static int fp_read_radix(fp_int *a, const char *str, int radix)
03142 {
03143   int     y, neg;
03144   char    ch;
03145 
03146   /* set the integer to the default of zero */
03147   fp_zero (a);
03148 
03149 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
03150   if (radix == 16)
03151       return fp_read_radix_16(a, str);
03152 #endif
03153 
03154   /* make sure the radix is ok */
03155   if (radix < 2 || radix > 64) {
03156     return FP_VAL;
03157   }
03158 
03159   /* if the leading digit is a
03160    * minus set the sign to negative.
03161    */
03162   if (*str == '-') {
03163     ++str;
03164     neg = FP_NEG;
03165   } else {
03166     neg = FP_ZPOS;
03167   }
03168 
03169   /* process each digit of the string */
03170   while (*str) {
03171     /* if the radix <= 36 the conversion is case insensitive
03172      * this allows numbers like 1AB and 1ab to represent the same  value
03173      * [e.g. in hex]
03174      */
03175     ch = (char)((radix <= 36) ? XTOUPPER((unsigned char)*str) : *str);
03176     for (y = 0; y < 64; y++) {
03177       if (ch == fp_s_rmap[y]) {
03178          break;
03179       }
03180     }
03181 
03182     /* if the char was found in the map
03183      * and is less than the given radix add it
03184      * to the number, otherwise exit the loop.
03185      */
03186     if (y < radix) {
03187       fp_mul_d (a, (fp_digit) radix, a);
03188       fp_add_d (a, (fp_digit) y, a);
03189     } else {
03190       break;
03191     }
03192     ++str;
03193   }
03194 
03195   /* set the sign only if a != 0 */
03196   if (fp_iszero(a) != FP_YES) {
03197      a->sign = neg;
03198   }
03199   return FP_OKAY;
03200 }
03201 
03202 /* fast math conversion */
03203 int mp_read_radix(mp_int *a, const char *str, int radix)
03204 {
03205     return fp_read_radix(a, str, radix);
03206 }
03207 
03208 #endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
03209 
03210 #ifdef HAVE_ECC
03211 
03212 /* fast math conversion */
03213 int mp_sqr(fp_int *A, fp_int *B)
03214 {
03215     fp_sqr(A, B);
03216     return MP_OKAY;
03217 }
03218 
03219 /* fast math conversion */
03220 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
03221 {
03222     fp_montgomery_reduce(a, m, mp);
03223     return MP_OKAY;
03224 }
03225 
03226 
03227 /* fast math conversion */
03228 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
03229 {
03230     return fp_montgomery_setup(a, rho);
03231 }
03232 
03233 int mp_div_2(fp_int * a, fp_int * b)
03234 {
03235     fp_div_2(a, b);
03236     return MP_OKAY;
03237 }
03238 
03239 
03240 int mp_init_copy(fp_int * a, fp_int * b)
03241 {
03242     fp_init_copy(a, b);
03243     return MP_OKAY;
03244 }
03245 
03246 #ifdef HAVE_COMP_KEY
03247 
03248 int mp_cnt_lsb(fp_int* a)
03249 {
03250     return fp_cnt_lsb(a);
03251 }
03252 
03253 #endif /* HAVE_COMP_KEY */
03254 
03255 #endif /* HAVE_ECC */
03256 
03257 #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA)
03258 /* fast math conversion */
03259 int mp_set(fp_int *a, fp_digit b)
03260 {
03261     fp_set(a,b);
03262     return MP_OKAY;
03263 }
03264 #endif
03265 
03266 #if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
03267     defined(WOLFSSL_DEBUG_MATH) || defined(DEBUG_WOLFSSL) || \
03268     defined(WOLFSSL_PUBLIC_MP)
03269 
03270 /* returns size of ASCII representation */
03271 int mp_radix_size (mp_int *a, int radix, int *size)
03272 {
03273     int     res, digs;
03274     fp_int  t;
03275     fp_digit d;
03276 
03277     *size = 0;
03278 
03279     /* special case for binary */
03280     if (radix == 2) {
03281         *size = fp_count_bits (a) + (a->sign == FP_NEG ? 1 : 0) + 1;
03282         return FP_YES;
03283     }
03284 
03285     /* make sure the radix is in range */
03286     if (radix < 2 || radix > 64) {
03287         return FP_VAL;
03288     }
03289 
03290     if (fp_iszero(a) == MP_YES) {
03291         *size = 2;
03292         return FP_OKAY;
03293     }
03294 
03295     /* digs is the digit count */
03296     digs = 0;
03297 
03298     /* if it's negative add one for the sign */
03299     if (a->sign == FP_NEG) {
03300         ++digs;
03301     }
03302 
03303     /* init a copy of the input */
03304     fp_init_copy (&t, a);
03305 
03306     /* force temp to positive */
03307     t.sign = FP_ZPOS;
03308 
03309     /* fetch out all of the digits */
03310     while (fp_iszero (&t) == FP_NO) {
03311         if ((res = fp_div_d (&t, (mp_digit) radix, &t, &d)) != FP_OKAY) {
03312             fp_zero (&t);
03313             return res;
03314         }
03315         ++digs;
03316     }
03317     fp_zero (&t);
03318 
03319     /* return digs + 1, the 1 is for the NULL byte that would be required. */
03320     *size = digs + 1;
03321     return FP_OKAY;
03322 }
03323 
03324 /* stores a bignum as a ASCII string in a given radix (2..64) */
03325 int mp_toradix (mp_int *a, char *str, int radix)
03326 {
03327     int     res, digs;
03328     fp_int  t;
03329     fp_digit d;
03330     char   *_s = str;
03331 
03332     /* check range of the radix */
03333     if (radix < 2 || radix > 64) {
03334         return FP_VAL;
03335     }
03336 
03337     /* quick out if its zero */
03338     if (fp_iszero(a) == FP_YES) {
03339         *str++ = '0';
03340         *str = '\0';
03341         return FP_YES;
03342     }
03343 
03344     /* init a copy of the input */
03345     fp_init_copy (&t, a);
03346 
03347     /* if it is negative output a - */
03348     if (t.sign == FP_NEG) {
03349         ++_s;
03350         *str++ = '-';
03351         t.sign = FP_ZPOS;
03352     }
03353 
03354     digs = 0;
03355     while (fp_iszero (&t) == FP_NO) {
03356         if ((res = fp_div_d (&t, (fp_digit) radix, &t, &d)) != FP_OKAY) {
03357             fp_zero (&t);
03358             return res;
03359         }
03360         *str++ = fp_s_rmap[d];
03361         ++digs;
03362     }
03363 
03364     /* reverse the digits of the string.  In this case _s points
03365      * to the first digit [excluding the sign] of the number]
03366      */
03367     fp_reverse ((unsigned char *)_s, digs);
03368 
03369     /* append a NULL so the string is properly terminated */
03370     *str = '\0';
03371 
03372     fp_zero (&t);
03373     return FP_OKAY;
03374 }
03375 
03376 #ifdef WOLFSSL_DEBUG_MATH
03377 void mp_dump(const char* desc, mp_int* a, byte verbose)
03378 {
03379   char buffer[FP_SIZE * sizeof(fp_digit) * 2];
03380   int size = FP_SIZE;
03381 
03382 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
03383   size = a->size;
03384 #endif
03385 
03386   printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n",
03387     desc, a, a->used, a->sign, size, (int)sizeof(fp_digit));
03388 
03389   mp_tohex(a, buffer);
03390   printf("  %s\n  ", buffer);
03391 
03392   if (verbose) {
03393     int i;
03394     for(i=0; i<size * (int)sizeof(fp_digit); i++) {
03395       printf("%x ", *(((byte*)a->dp) + i));
03396     }
03397     printf("\n");
03398   }
03399 }
03400 #endif /* WOLFSSL_DEBUG_MATH */
03401 
03402 #endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
03403 
03404 
03405 int mp_abs(mp_int* a, mp_int* b)
03406 {
03407     fp_abs(a, b);
03408     return FP_OKAY;
03409 }
03410 
03411 
03412 int mp_lshd (mp_int * a, int b)
03413 {
03414     fp_lshd(a, b);
03415     return FP_OKAY;
03416 }
03417 
03418 #endif /* USE_FAST_MATH */
03419