Xuyi Wang / wolfcrypt

Dependents:   OS

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers sp_c64.c Source File

sp_c64.c

00001 /* sp.c
00002  *
00003  * Copyright (C) 2006-2018 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 /* Implementation by Sean Parkinson. */
00023 
00024 #ifdef HAVE_CONFIG_H
00025     #include <config.h>
00026 #endif
00027 
00028 #include <wolfcrypt/settings.h>
00029 #include <wolfcrypt/error-crypt.h>
00030 #include <wolfcrypt/cpuid.h>
00031 #ifdef NO_INLINE
00032     #include <wolfcrypt/misc.h>
00033 #else
00034     #define WOLFSSL_MISC_INCLUDED
00035     #include <wolfcrypt/src/misc.c>
00036 #endif
00037 
00038 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
00039                                     defined(WOLFSSL_HAVE_SP_ECC)
00040 
00041 #ifdef RSA_LOW_MEM
00042 #define SP_RSA_PRIVATE_EXP_D
00043 
00044 #ifndef WOLFSSL_SP_SMALL
00045 #define WOLFSSL_SP_SMALL
00046 #endif
00047 #endif
00048 
00049 #include <wolfcrypt/sp.h>
00050 
00051 #ifndef WOLFSSL_SP_ASM
00052 #if SP_WORD_SIZE == 64
00053 #if defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)
00054 /* Mask for address to obfuscate which of the two address will be used. */
00055 static const size_t addr_mask[2] = { 0, (size_t)-1 };
00056 #endif
00057 
00058 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
00059 #ifndef WOLFSSL_SP_NO_2048
00060 /* Read big endian unsigned byte aray into r.
00061  *
00062  * r  A single precision integer.
00063  * a  Byte array.
00064  * n  Number of bytes in array to read.
00065  */
00066 static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
00067 {
00068     int i, j = 0, s = 0;
00069 
00070     r[0] = 0;
00071     for (i = n-1; i >= 0; i--) {
00072         r[j] |= ((sp_digit)a[i]) << s;
00073         if (s >= 49) {
00074             r[j] &= 0x1ffffffffffffffl;
00075             s = 57 - s;
00076             if (j + 1 >= max)
00077                 break;
00078             r[++j] = a[i] >> s;
00079             s = 8 - s;
00080         }
00081         else
00082             s += 8;
00083     }
00084 
00085     for (j++; j < max; j++)
00086         r[j] = 0;
00087 }
00088 
00089 /* Convert an mp_int to an array of sp_digit.
00090  *
00091  * r  A single precision integer.
00092  * a  A multi-precision integer.
00093  */
00094 static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
00095 {
00096 #if DIGIT_BIT == 57
00097     int j;
00098 
00099     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
00100 
00101     for (j = a->used; j < max; j++)
00102         r[j] = 0;
00103 #elif DIGIT_BIT > 57
00104     int i, j = 0, s = 0;
00105 
00106     r[0] = 0;
00107     for (i = 0; i < a->used && j < max; i++) {
00108         r[j] |= a->dp[i] << s;
00109         r[j] &= 0x1ffffffffffffffl;
00110         s = 57 - s;
00111         if (j + 1 >= max)
00112             break;
00113         r[++j] = a->dp[i] >> s;
00114         while (s + 57 <= DIGIT_BIT) {
00115             s += 57;
00116             r[j] &= 0x1ffffffffffffffl;
00117             if (j + 1 >= max)
00118                 break;
00119             if (s < DIGIT_BIT)
00120                 r[++j] = a->dp[i] >> s;
00121             else
00122                 r[++j] = 0;
00123         }
00124         s = DIGIT_BIT - s;
00125     }
00126 
00127     for (j++; j < max; j++)
00128         r[j] = 0;
00129 #else
00130     int i, j = 0, s = 0;
00131 
00132     r[0] = 0;
00133     for (i = 0; i < a->used && j < max; i++) {
00134         r[j] |= ((sp_digit)a->dp[i]) << s;
00135         if (s + DIGIT_BIT >= 57) {
00136             r[j] &= 0x1ffffffffffffffl;
00137             if (j + 1 >= max)
00138                 break;
00139             s = 57 - s;
00140             if (s == DIGIT_BIT) {
00141                 r[++j] = 0;
00142                 s = 0;
00143             }
00144             else {
00145                 r[++j] = a->dp[i] >> s;
00146                 s = DIGIT_BIT - s;
00147             }
00148         }
00149         else
00150             s += DIGIT_BIT;
00151     }
00152 
00153     for (j++; j < max; j++)
00154         r[j] = 0;
00155 #endif
00156 }
00157 
00158 /* Write r as big endian to byte aray.
00159  * Fixed length number of bytes written: 256
00160  *
00161  * r  A single precision integer.
00162  * a  Byte array.
00163  */
00164 static void sp_2048_to_bin(sp_digit* r, byte* a)
00165 {
00166     int i, j, s = 0, b;
00167 
00168     for (i=0; i<35; i++) {
00169         r[i+1] += r[i] >> 57;
00170         r[i] &= 0x1ffffffffffffffl;
00171     }
00172     j = 2048 / 8 - 1;
00173     a[j] = 0;
00174     for (i=0; i<36 && j>=0; i++) {
00175         b = 0;
00176         a[j--] |= r[i] << s; b += 8 - s;
00177         if (j < 0)
00178             break;
00179         while (b < 57) {
00180             a[j--] = r[i] >> b; b += 8;
00181             if (j < 0)
00182                 break;
00183         }
00184         s = 8 - (b - 57);
00185         if (j >= 0)
00186             a[j] = 0;
00187         if (s != 0)
00188             j++;
00189     }
00190 }
00191 
00192 #ifndef WOLFSSL_SP_SMALL
00193 /* Multiply a and b into r. (r = a * b)
00194  *
00195  * r  A single precision integer.
00196  * a  A single precision integer.
00197  * b  A single precision integer.
00198  */
00199 SP_NOINLINE static void sp_2048_mul_9(sp_digit* r, const sp_digit* a,
00200     const sp_digit* b)
00201 {
00202     int128_t t0   = ((int128_t)a[ 0]) * b[ 0];
00203     int128_t t1   = ((int128_t)a[ 0]) * b[ 1]
00204                  + ((int128_t)a[ 1]) * b[ 0];
00205     int128_t t2   = ((int128_t)a[ 0]) * b[ 2]
00206                  + ((int128_t)a[ 1]) * b[ 1]
00207                  + ((int128_t)a[ 2]) * b[ 0];
00208     int128_t t3   = ((int128_t)a[ 0]) * b[ 3]
00209                  + ((int128_t)a[ 1]) * b[ 2]
00210                  + ((int128_t)a[ 2]) * b[ 1]
00211                  + ((int128_t)a[ 3]) * b[ 0];
00212     int128_t t4   = ((int128_t)a[ 0]) * b[ 4]
00213                  + ((int128_t)a[ 1]) * b[ 3]
00214                  + ((int128_t)a[ 2]) * b[ 2]
00215                  + ((int128_t)a[ 3]) * b[ 1]
00216                  + ((int128_t)a[ 4]) * b[ 0];
00217     int128_t t5   = ((int128_t)a[ 0]) * b[ 5]
00218                  + ((int128_t)a[ 1]) * b[ 4]
00219                  + ((int128_t)a[ 2]) * b[ 3]
00220                  + ((int128_t)a[ 3]) * b[ 2]
00221                  + ((int128_t)a[ 4]) * b[ 1]
00222                  + ((int128_t)a[ 5]) * b[ 0];
00223     int128_t t6   = ((int128_t)a[ 0]) * b[ 6]
00224                  + ((int128_t)a[ 1]) * b[ 5]
00225                  + ((int128_t)a[ 2]) * b[ 4]
00226                  + ((int128_t)a[ 3]) * b[ 3]
00227                  + ((int128_t)a[ 4]) * b[ 2]
00228                  + ((int128_t)a[ 5]) * b[ 1]
00229                  + ((int128_t)a[ 6]) * b[ 0];
00230     int128_t t7   = ((int128_t)a[ 0]) * b[ 7]
00231                  + ((int128_t)a[ 1]) * b[ 6]
00232                  + ((int128_t)a[ 2]) * b[ 5]
00233                  + ((int128_t)a[ 3]) * b[ 4]
00234                  + ((int128_t)a[ 4]) * b[ 3]
00235                  + ((int128_t)a[ 5]) * b[ 2]
00236                  + ((int128_t)a[ 6]) * b[ 1]
00237                  + ((int128_t)a[ 7]) * b[ 0];
00238     int128_t t8   = ((int128_t)a[ 0]) * b[ 8]
00239                  + ((int128_t)a[ 1]) * b[ 7]
00240                  + ((int128_t)a[ 2]) * b[ 6]
00241                  + ((int128_t)a[ 3]) * b[ 5]
00242                  + ((int128_t)a[ 4]) * b[ 4]
00243                  + ((int128_t)a[ 5]) * b[ 3]
00244                  + ((int128_t)a[ 6]) * b[ 2]
00245                  + ((int128_t)a[ 7]) * b[ 1]
00246                  + ((int128_t)a[ 8]) * b[ 0];
00247     int128_t t9   = ((int128_t)a[ 1]) * b[ 8]
00248                  + ((int128_t)a[ 2]) * b[ 7]
00249                  + ((int128_t)a[ 3]) * b[ 6]
00250                  + ((int128_t)a[ 4]) * b[ 5]
00251                  + ((int128_t)a[ 5]) * b[ 4]
00252                  + ((int128_t)a[ 6]) * b[ 3]
00253                  + ((int128_t)a[ 7]) * b[ 2]
00254                  + ((int128_t)a[ 8]) * b[ 1];
00255     int128_t t10  = ((int128_t)a[ 2]) * b[ 8]
00256                  + ((int128_t)a[ 3]) * b[ 7]
00257                  + ((int128_t)a[ 4]) * b[ 6]
00258                  + ((int128_t)a[ 5]) * b[ 5]
00259                  + ((int128_t)a[ 6]) * b[ 4]
00260                  + ((int128_t)a[ 7]) * b[ 3]
00261                  + ((int128_t)a[ 8]) * b[ 2];
00262     int128_t t11  = ((int128_t)a[ 3]) * b[ 8]
00263                  + ((int128_t)a[ 4]) * b[ 7]
00264                  + ((int128_t)a[ 5]) * b[ 6]
00265                  + ((int128_t)a[ 6]) * b[ 5]
00266                  + ((int128_t)a[ 7]) * b[ 4]
00267                  + ((int128_t)a[ 8]) * b[ 3];
00268     int128_t t12  = ((int128_t)a[ 4]) * b[ 8]
00269                  + ((int128_t)a[ 5]) * b[ 7]
00270                  + ((int128_t)a[ 6]) * b[ 6]
00271                  + ((int128_t)a[ 7]) * b[ 5]
00272                  + ((int128_t)a[ 8]) * b[ 4];
00273     int128_t t13  = ((int128_t)a[ 5]) * b[ 8]
00274                  + ((int128_t)a[ 6]) * b[ 7]
00275                  + ((int128_t)a[ 7]) * b[ 6]
00276                  + ((int128_t)a[ 8]) * b[ 5];
00277     int128_t t14  = ((int128_t)a[ 6]) * b[ 8]
00278                  + ((int128_t)a[ 7]) * b[ 7]
00279                  + ((int128_t)a[ 8]) * b[ 6];
00280     int128_t t15  = ((int128_t)a[ 7]) * b[ 8]
00281                  + ((int128_t)a[ 8]) * b[ 7];
00282     int128_t t16  = ((int128_t)a[ 8]) * b[ 8];
00283 
00284     t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
00285     t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
00286     t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
00287     t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
00288     t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
00289     t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
00290     t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
00291     t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
00292     t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
00293     t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
00294     t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
00295     t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
00296     t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
00297     t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
00298     t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
00299     t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
00300     r[17] = (sp_digit)(t16 >> 57);
00301                        r[16] = t16 & 0x1ffffffffffffffl;
00302 }
00303 
00304 /* Square a and put result in r. (r = a * a)
00305  *
00306  * r  A single precision integer.
00307  * a  A single precision integer.
00308  */
00309 SP_NOINLINE static void sp_2048_sqr_9(sp_digit* r, const sp_digit* a)
00310 {
00311     int128_t t0   =  ((int128_t)a[ 0]) * a[ 0];
00312     int128_t t1   = (((int128_t)a[ 0]) * a[ 1]) * 2;
00313     int128_t t2   = (((int128_t)a[ 0]) * a[ 2]) * 2
00314                  +  ((int128_t)a[ 1]) * a[ 1];
00315     int128_t t3   = (((int128_t)a[ 0]) * a[ 3]
00316                  +  ((int128_t)a[ 1]) * a[ 2]) * 2;
00317     int128_t t4   = (((int128_t)a[ 0]) * a[ 4]
00318                  +  ((int128_t)a[ 1]) * a[ 3]) * 2
00319                  +  ((int128_t)a[ 2]) * a[ 2];
00320     int128_t t5   = (((int128_t)a[ 0]) * a[ 5]
00321                  +  ((int128_t)a[ 1]) * a[ 4]
00322                  +  ((int128_t)a[ 2]) * a[ 3]) * 2;
00323     int128_t t6   = (((int128_t)a[ 0]) * a[ 6]
00324                  +  ((int128_t)a[ 1]) * a[ 5]
00325                  +  ((int128_t)a[ 2]) * a[ 4]) * 2
00326                  +  ((int128_t)a[ 3]) * a[ 3];
00327     int128_t t7   = (((int128_t)a[ 0]) * a[ 7]
00328                  +  ((int128_t)a[ 1]) * a[ 6]
00329                  +  ((int128_t)a[ 2]) * a[ 5]
00330                  +  ((int128_t)a[ 3]) * a[ 4]) * 2;
00331     int128_t t8   = (((int128_t)a[ 0]) * a[ 8]
00332                  +  ((int128_t)a[ 1]) * a[ 7]
00333                  +  ((int128_t)a[ 2]) * a[ 6]
00334                  +  ((int128_t)a[ 3]) * a[ 5]) * 2
00335                  +  ((int128_t)a[ 4]) * a[ 4];
00336     int128_t t9   = (((int128_t)a[ 1]) * a[ 8]
00337                  +  ((int128_t)a[ 2]) * a[ 7]
00338                  +  ((int128_t)a[ 3]) * a[ 6]
00339                  +  ((int128_t)a[ 4]) * a[ 5]) * 2;
00340     int128_t t10  = (((int128_t)a[ 2]) * a[ 8]
00341                  +  ((int128_t)a[ 3]) * a[ 7]
00342                  +  ((int128_t)a[ 4]) * a[ 6]) * 2
00343                  +  ((int128_t)a[ 5]) * a[ 5];
00344     int128_t t11  = (((int128_t)a[ 3]) * a[ 8]
00345                  +  ((int128_t)a[ 4]) * a[ 7]
00346                  +  ((int128_t)a[ 5]) * a[ 6]) * 2;
00347     int128_t t12  = (((int128_t)a[ 4]) * a[ 8]
00348                  +  ((int128_t)a[ 5]) * a[ 7]) * 2
00349                  +  ((int128_t)a[ 6]) * a[ 6];
00350     int128_t t13  = (((int128_t)a[ 5]) * a[ 8]
00351                  +  ((int128_t)a[ 6]) * a[ 7]) * 2;
00352     int128_t t14  = (((int128_t)a[ 6]) * a[ 8]) * 2
00353                  +  ((int128_t)a[ 7]) * a[ 7];
00354     int128_t t15  = (((int128_t)a[ 7]) * a[ 8]) * 2;
00355     int128_t t16  =  ((int128_t)a[ 8]) * a[ 8];
00356 
00357     t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
00358     t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
00359     t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
00360     t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
00361     t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
00362     t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
00363     t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
00364     t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
00365     t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
00366     t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
00367     t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
00368     t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
00369     t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
00370     t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
00371     t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
00372     t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
00373     r[17] = (sp_digit)(t16 >> 57);
00374                        r[16] = t16 & 0x1ffffffffffffffl;
00375 }
00376 
00377 /* Add b to a into r. (r = a + b)
00378  *
00379  * r  A single precision integer.
00380  * a  A single precision integer.
00381  * b  A single precision integer.
00382  */
00383 SP_NOINLINE static int sp_2048_add_9(sp_digit* r, const sp_digit* a,
00384         const sp_digit* b)
00385 {
00386     r[ 0] = a[ 0] + b[ 0];
00387     r[ 1] = a[ 1] + b[ 1];
00388     r[ 2] = a[ 2] + b[ 2];
00389     r[ 3] = a[ 3] + b[ 3];
00390     r[ 4] = a[ 4] + b[ 4];
00391     r[ 5] = a[ 5] + b[ 5];
00392     r[ 6] = a[ 6] + b[ 6];
00393     r[ 7] = a[ 7] + b[ 7];
00394     r[ 8] = a[ 8] + b[ 8];
00395 
00396     return 0;
00397 }
00398 
00399 /* Add b to a into r. (r = a + b)
00400  *
00401  * r  A single precision integer.
00402  * a  A single precision integer.
00403  * b  A single precision integer.
00404  */
00405 SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a,
00406         const sp_digit* b)
00407 {
00408     int i;
00409 
00410     for (i = 0; i < 16; i += 8) {
00411         r[i + 0] = a[i + 0] + b[i + 0];
00412         r[i + 1] = a[i + 1] + b[i + 1];
00413         r[i + 2] = a[i + 2] + b[i + 2];
00414         r[i + 3] = a[i + 3] + b[i + 3];
00415         r[i + 4] = a[i + 4] + b[i + 4];
00416         r[i + 5] = a[i + 5] + b[i + 5];
00417         r[i + 6] = a[i + 6] + b[i + 6];
00418         r[i + 7] = a[i + 7] + b[i + 7];
00419     }
00420     r[16] = a[16] + b[16];
00421     r[17] = a[17] + b[17];
00422 
00423     return 0;
00424 }
00425 
00426 /* Sub b from a into r. (r = a - b)
00427  *
00428  * r  A single precision integer.
00429  * a  A single precision integer.
00430  * b  A single precision integer.
00431  */
00432 SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a,
00433         const sp_digit* b)
00434 {
00435     int i;
00436 
00437     for (i = 0; i < 16; i += 8) {
00438         r[i + 0] = a[i + 0] - b[i + 0];
00439         r[i + 1] = a[i + 1] - b[i + 1];
00440         r[i + 2] = a[i + 2] - b[i + 2];
00441         r[i + 3] = a[i + 3] - b[i + 3];
00442         r[i + 4] = a[i + 4] - b[i + 4];
00443         r[i + 5] = a[i + 5] - b[i + 5];
00444         r[i + 6] = a[i + 6] - b[i + 6];
00445         r[i + 7] = a[i + 7] - b[i + 7];
00446     }
00447     r[16] = a[16] - b[16];
00448     r[17] = a[17] - b[17];
00449 
00450     return 0;
00451 }
00452 
00453 /* Multiply a and b into r. (r = a * b)
00454  *
00455  * r  A single precision integer.
00456  * a  A single precision integer.
00457  * b  A single precision integer.
00458  */
00459 SP_NOINLINE static void sp_2048_mul_18(sp_digit* r, const sp_digit* a,
00460     const sp_digit* b)
00461 {
00462     sp_digit* z0 = r;
00463     sp_digit z1[18];
00464     sp_digit* a1 = z1;
00465     sp_digit b1[9];
00466     sp_digit* z2 = r + 18;
00467     sp_2048_add_9(a1, a, &a[9]);
00468     sp_2048_add_9(b1, b, &b[9]);
00469     sp_2048_mul_9(z2, &a[9], &b[9]);
00470     sp_2048_mul_9(z0, a, b);
00471     sp_2048_mul_9(z1, a1, b1);
00472     sp_2048_sub_18(z1, z1, z2);
00473     sp_2048_sub_18(z1, z1, z0);
00474     sp_2048_add_18(r + 9, r + 9, z1);
00475 }
00476 
00477 /* Square a and put result in r. (r = a * a)
00478  *
00479  * r  A single precision integer.
00480  * a  A single precision integer.
00481  */
00482 SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a)
00483 {
00484     sp_digit* z0 = r;
00485     sp_digit z1[18];
00486     sp_digit* a1 = z1;
00487     sp_digit* z2 = r + 18;
00488     sp_2048_add_9(a1, a, &a[9]);
00489     sp_2048_sqr_9(z2, &a[9]);
00490     sp_2048_sqr_9(z0, a);
00491     sp_2048_sqr_9(z1, a1);
00492     sp_2048_sub_18(z1, z1, z2);
00493     sp_2048_sub_18(z1, z1, z0);
00494     sp_2048_add_18(r + 9, r + 9, z1);
00495 }
00496 
00497 /* Add b to a into r. (r = a + b)
00498  *
00499  * r  A single precision integer.
00500  * a  A single precision integer.
00501  * b  A single precision integer.
00502  */
00503 SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a,
00504         const sp_digit* b)
00505 {
00506     int i;
00507 
00508     for (i = 0; i < 32; i += 8) {
00509         r[i + 0] = a[i + 0] + b[i + 0];
00510         r[i + 1] = a[i + 1] + b[i + 1];
00511         r[i + 2] = a[i + 2] + b[i + 2];
00512         r[i + 3] = a[i + 3] + b[i + 3];
00513         r[i + 4] = a[i + 4] + b[i + 4];
00514         r[i + 5] = a[i + 5] + b[i + 5];
00515         r[i + 6] = a[i + 6] + b[i + 6];
00516         r[i + 7] = a[i + 7] + b[i + 7];
00517     }
00518     r[32] = a[32] + b[32];
00519     r[33] = a[33] + b[33];
00520     r[34] = a[34] + b[34];
00521     r[35] = a[35] + b[35];
00522 
00523     return 0;
00524 }
00525 
00526 /* Sub b from a into r. (r = a - b)
00527  *
00528  * r  A single precision integer.
00529  * a  A single precision integer.
00530  * b  A single precision integer.
00531  */
00532 SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a,
00533         const sp_digit* b)
00534 {
00535     int i;
00536 
00537     for (i = 0; i < 32; i += 8) {
00538         r[i + 0] = a[i + 0] - b[i + 0];
00539         r[i + 1] = a[i + 1] - b[i + 1];
00540         r[i + 2] = a[i + 2] - b[i + 2];
00541         r[i + 3] = a[i + 3] - b[i + 3];
00542         r[i + 4] = a[i + 4] - b[i + 4];
00543         r[i + 5] = a[i + 5] - b[i + 5];
00544         r[i + 6] = a[i + 6] - b[i + 6];
00545         r[i + 7] = a[i + 7] - b[i + 7];
00546     }
00547     r[32] = a[32] - b[32];
00548     r[33] = a[33] - b[33];
00549     r[34] = a[34] - b[34];
00550     r[35] = a[35] - b[35];
00551 
00552     return 0;
00553 }
00554 
00555 /* Multiply a and b into r. (r = a * b)
00556  *
00557  * r  A single precision integer.
00558  * a  A single precision integer.
00559  * b  A single precision integer.
00560  */
00561 SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a,
00562     const sp_digit* b)
00563 {
00564     sp_digit* z0 = r;
00565     sp_digit z1[36];
00566     sp_digit* a1 = z1;
00567     sp_digit b1[18];
00568     sp_digit* z2 = r + 36;
00569     sp_2048_add_18(a1, a, &a[18]);
00570     sp_2048_add_18(b1, b, &b[18]);
00571     sp_2048_mul_18(z2, &a[18], &b[18]);
00572     sp_2048_mul_18(z0, a, b);
00573     sp_2048_mul_18(z1, a1, b1);
00574     sp_2048_sub_36(z1, z1, z2);
00575     sp_2048_sub_36(z1, z1, z0);
00576     sp_2048_add_36(r + 18, r + 18, z1);
00577 }
00578 
00579 /* Square a and put result in r. (r = a * a)
00580  *
00581  * r  A single precision integer.
00582  * a  A single precision integer.
00583  */
00584 SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a)
00585 {
00586     sp_digit* z0 = r;
00587     sp_digit z1[36];
00588     sp_digit* a1 = z1;
00589     sp_digit* z2 = r + 36;
00590     sp_2048_add_18(a1, a, &a[18]);
00591     sp_2048_sqr_18(z2, &a[18]);
00592     sp_2048_sqr_18(z0, a);
00593     sp_2048_sqr_18(z1, a1);
00594     sp_2048_sub_36(z1, z1, z2);
00595     sp_2048_sub_36(z1, z1, z0);
00596     sp_2048_add_36(r + 18, r + 18, z1);
00597 }
00598 
00599 #endif /* WOLFSSL_SP_SMALL */
00600 #ifdef WOLFSSL_SP_SMALL
00601 /* Add b to a into r. (r = a + b)
00602  *
00603  * r  A single precision integer.
00604  * a  A single precision integer.
00605  * b  A single precision integer.
00606  */
00607 SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a,
00608         const sp_digit* b)
00609 {
00610     int i;
00611 
00612     for (i = 0; i < 36; i++)
00613         r[i] = a[i] + b[i];
00614 
00615     return 0;
00616 }
00617 #endif /* WOLFSSL_SP_SMALL */
00618 #ifdef WOLFSSL_SP_SMALL
00619 /* Sub b from a into r. (r = a - b)
00620  *
00621  * r  A single precision integer.
00622  * a  A single precision integer.
00623  * b  A single precision integer.
00624  */
00625 SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a,
00626         const sp_digit* b)
00627 {
00628     int i;
00629 
00630     for (i = 0; i < 36; i++)
00631         r[i] = a[i] - b[i];
00632 
00633     return 0;
00634 }
00635 
00636 #endif /* WOLFSSL_SP_SMALL */
00637 #ifdef WOLFSSL_SP_SMALL
00638 /* Multiply a and b into r. (r = a * b)
00639  *
00640  * r  A single precision integer.
00641  * a  A single precision integer.
00642  * b  A single precision integer.
00643  */
00644 SP_NOINLINE static void sp_2048_mul_36(sp_digit* r, const sp_digit* a,
00645     const sp_digit* b)
00646 {
00647     int i, j, k;
00648     int128_t c;
00649 
00650     c = ((int128_t)a[35]) * b[35];
00651     r[71] = (sp_digit)(c >> 57);
00652     c = (c & 0x1ffffffffffffffl) << 57;
00653     for (k = 69; k >= 0; k--) {
00654         for (i = 35; i >= 0; i--) {
00655             j = k - i;
00656             if (j >= 36)
00657                 break;
00658             if (j < 0)
00659                 continue;
00660 
00661             c += ((int128_t)a[i]) * b[j];
00662         }
00663         r[k + 2] += c >> 114;
00664         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
00665         c = (c & 0x1ffffffffffffffl) << 57;
00666     }
00667     r[0] = (sp_digit)(c >> 57);
00668 }
00669 
00670 /* Square a and put result in r. (r = a * a)
00671  *
00672  * r  A single precision integer.
00673  * a  A single precision integer.
00674  */
00675 SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a)
00676 {
00677     int i, j, k;
00678     int128_t c;
00679 
00680     c = ((int128_t)a[35]) * a[35];
00681     r[71] = (sp_digit)(c >> 57);
00682     c = (c & 0x1ffffffffffffffl) << 57;
00683     for (k = 69; k >= 0; k--) {
00684         for (i = 35; i >= 0; i--) {
00685             j = k - i;
00686             if (j >= 36 || i <= j)
00687                 break;
00688             if (j < 0)
00689                 continue;
00690 
00691             c += ((int128_t)a[i]) * a[j] * 2;
00692         }
00693         if (i == j)
00694            c += ((int128_t)a[i]) * a[i];
00695 
00696         r[k + 2] += c >> 114;
00697         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
00698         c = (c & 0x1ffffffffffffffl) << 57;
00699     }
00700     r[0] = (sp_digit)(c >> 57);
00701 }
00702 
00703 #endif /* WOLFSSL_SP_SMALL */
00704 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
00705 #ifdef WOLFSSL_SP_SMALL
00706 /* Add b to a into r. (r = a + b)
00707  *
00708  * r  A single precision integer.
00709  * a  A single precision integer.
00710  * b  A single precision integer.
00711  */
00712 SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a,
00713         const sp_digit* b)
00714 {
00715     int i;
00716 
00717     for (i = 0; i < 18; i++)
00718         r[i] = a[i] + b[i];
00719 
00720     return 0;
00721 }
00722 #endif /* WOLFSSL_SP_SMALL */
00723 #ifdef WOLFSSL_SP_SMALL
00724 /* Sub b from a into r. (r = a - b)
00725  *
00726  * r  A single precision integer.
00727  * a  A single precision integer.
00728  * b  A single precision integer.
00729  */
00730 SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a,
00731         const sp_digit* b)
00732 {
00733     int i;
00734 
00735     for (i = 0; i < 18; i++)
00736         r[i] = a[i] - b[i];
00737 
00738     return 0;
00739 }
00740 
00741 #endif /* WOLFSSL_SP_SMALL */
00742 #ifdef WOLFSSL_SP_SMALL
00743 /* Multiply a and b into r. (r = a * b)
00744  *
00745  * r  A single precision integer.
00746  * a  A single precision integer.
00747  * b  A single precision integer.
00748  */
00749 SP_NOINLINE static void sp_2048_mul_18(sp_digit* r, const sp_digit* a,
00750     const sp_digit* b)
00751 {
00752     int i, j, k;
00753     int128_t c;
00754 
00755     c = ((int128_t)a[17]) * b[17];
00756     r[35] = (sp_digit)(c >> 57);
00757     c = (c & 0x1ffffffffffffffl) << 57;
00758     for (k = 33; k >= 0; k--) {
00759         for (i = 17; i >= 0; i--) {
00760             j = k - i;
00761             if (j >= 18)
00762                 break;
00763             if (j < 0)
00764                 continue;
00765 
00766             c += ((int128_t)a[i]) * b[j];
00767         }
00768         r[k + 2] += c >> 114;
00769         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
00770         c = (c & 0x1ffffffffffffffl) << 57;
00771     }
00772     r[0] = (sp_digit)(c >> 57);
00773 }
00774 
00775 /* Square a and put result in r. (r = a * a)
00776  *
00777  * r  A single precision integer.
00778  * a  A single precision integer.
00779  */
00780 SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a)
00781 {
00782     int i, j, k;
00783     int128_t c;
00784 
00785     c = ((int128_t)a[17]) * a[17];
00786     r[35] = (sp_digit)(c >> 57);
00787     c = (c & 0x1ffffffffffffffl) << 57;
00788     for (k = 33; k >= 0; k--) {
00789         for (i = 17; i >= 0; i--) {
00790             j = k - i;
00791             if (j >= 18 || i <= j)
00792                 break;
00793             if (j < 0)
00794                 continue;
00795 
00796             c += ((int128_t)a[i]) * a[j] * 2;
00797         }
00798         if (i == j)
00799            c += ((int128_t)a[i]) * a[i];
00800 
00801         r[k + 2] += c >> 114;
00802         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
00803         c = (c & 0x1ffffffffffffffl) << 57;
00804     }
00805     r[0] = (sp_digit)(c >> 57);
00806 }
00807 
00808 #endif /* WOLFSSL_SP_SMALL */
00809 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
00810 
00811 /* Caclulate the bottom digit of -1/a mod 2^n.
00812  *
00813  * a    A single precision number.
00814  * rho  Bottom word of inverse.
00815  */
00816 static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
00817 {
00818     sp_digit x, b;
00819 
00820     b = a[0];
00821     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
00822     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
00823     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
00824     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
00825     x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
00826     x &= 0x1ffffffffffffffl;
00827 
00828     /* rho = -1/m mod b */
00829     *rho = (1L << 57) - x;
00830 }
00831 
00832 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
00833 /* r = 2^n mod m where n is the number of bits to reduce by.
00834  * Given m must be 2048 bits, just need to subtract.
00835  *
00836  * r  A single precision number.
00837  * m  A signle precision number.
00838  */
00839 static void sp_2048_mont_norm_18(sp_digit* r, sp_digit* m)
00840 {
00841     /* Set r = 2^n - 1. */
00842 #ifdef WOLFSSL_SP_SMALL
00843     int i;
00844 
00845     for (i=0; i<17; i++)
00846         r[i] = 0x1ffffffffffffffl;
00847 #else
00848     int i;
00849 
00850     for (i = 0; i < 16; i += 8) {
00851         r[i + 0] = 0x1ffffffffffffffl;
00852         r[i + 1] = 0x1ffffffffffffffl;
00853         r[i + 2] = 0x1ffffffffffffffl;
00854         r[i + 3] = 0x1ffffffffffffffl;
00855         r[i + 4] = 0x1ffffffffffffffl;
00856         r[i + 5] = 0x1ffffffffffffffl;
00857         r[i + 6] = 0x1ffffffffffffffl;
00858         r[i + 7] = 0x1ffffffffffffffl;
00859     }
00860     r[16] = 0x1ffffffffffffffl;
00861 #endif
00862     r[17] = 0x7fffffffffffffl;
00863 
00864     /* r = (2^n - 1) mod n */
00865     sp_2048_sub_18(r, r, m);
00866 
00867     /* Add one so r = 2^n mod m */
00868     r[0] += 1;
00869 }
00870 
00871 /* Compare a with b in constant time.
00872  *
00873  * a  A single precision integer.
00874  * b  A single precision integer.
00875  * return -ve, 0 or +ve if a is less than, equal to or greater than b
00876  * respectively.
00877  */
00878 static sp_digit sp_2048_cmp_18(const sp_digit* a, const sp_digit* b)
00879 {
00880     sp_digit r = 0;
00881 #ifdef WOLFSSL_SP_SMALL
00882     int i;
00883 
00884     for (i=17; i>=0; i--)
00885         r |= (a[i] - b[i]) & (0 - !r);
00886 #else
00887     int i;
00888 
00889     r |= (a[17] - b[17]) & (0 - !r);
00890     r |= (a[16] - b[16]) & (0 - !r);
00891     for (i = 8; i >= 0; i -= 8) {
00892         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
00893         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
00894         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
00895         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
00896         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
00897         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
00898         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
00899         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
00900     }
00901 #endif /* WOLFSSL_SP_SMALL */
00902 
00903     return r;
00904 }
00905 
00906 /* Conditionally subtract b from a using the mask m.
00907  * m is -1 to subtract and 0 when not.
00908  *
00909  * r  A single precision number representing condition subtract result.
00910  * a  A single precision number to subtract from.
00911  * b  A single precision number to subtract.
00912  * m  Mask value to apply.
00913  */
00914 static void sp_2048_cond_sub_18(sp_digit* r, const sp_digit* a,
00915         const sp_digit* b, const sp_digit m)
00916 {
00917 #ifdef WOLFSSL_SP_SMALL
00918     int i;
00919 
00920     for (i = 0; i < 18; i++)
00921         r[i] = a[i] - (b[i] & m);
00922 #else
00923     int i;
00924 
00925     for (i = 0; i < 16; i += 8) {
00926         r[i + 0] = a[i + 0] - (b[i + 0] & m);
00927         r[i + 1] = a[i + 1] - (b[i + 1] & m);
00928         r[i + 2] = a[i + 2] - (b[i + 2] & m);
00929         r[i + 3] = a[i + 3] - (b[i + 3] & m);
00930         r[i + 4] = a[i + 4] - (b[i + 4] & m);
00931         r[i + 5] = a[i + 5] - (b[i + 5] & m);
00932         r[i + 6] = a[i + 6] - (b[i + 6] & m);
00933         r[i + 7] = a[i + 7] - (b[i + 7] & m);
00934     }
00935     r[16] = a[16] - (b[16] & m);
00936     r[17] = a[17] - (b[17] & m);
00937 #endif /* WOLFSSL_SP_SMALL */
00938 }
00939 
00940 /* Mul a by scalar b and add into r. (r += a * b)
00941  *
00942  * r  A single precision integer.
00943  * a  A single precision integer.
00944  * b  A scalar.
00945  */
00946 SP_NOINLINE static void sp_2048_mul_add_18(sp_digit* r, const sp_digit* a,
00947         const sp_digit b)
00948 {
00949 #ifdef WOLFSSL_SP_SMALL
00950     int128_t tb = b;
00951     int128_t t = 0;
00952     int i;
00953 
00954     for (i = 0; i < 18; i++) {
00955         t += (tb * a[i]) + r[i];
00956         r[i] = t & 0x1ffffffffffffffl;
00957         t >>= 57;
00958     }
00959     r[18] += t;
00960 #else
00961     int128_t tb = b;
00962     int128_t t[8];
00963     int i;
00964 
00965     t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
00966     for (i = 0; i < 16; i += 8) {
00967         t[1] = tb * a[i+1];
00968         r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
00969         t[2] = tb * a[i+2];
00970         r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
00971         t[3] = tb * a[i+3];
00972         r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
00973         t[4] = tb * a[i+4];
00974         r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
00975         t[5] = tb * a[i+5];
00976         r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
00977         t[6] = tb * a[i+6];
00978         r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
00979         t[7] = tb * a[i+7];
00980         r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
00981         t[0] = tb * a[i+8];
00982         r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
00983     }
00984     t[1] = tb * a[17]; r[17] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
00985     r[18] +=  t[1] >> 57;
00986 #endif /* WOLFSSL_SP_SMALL */
00987 }
00988 
00989 /* Normalize the values in each word to 57.
00990  *
00991  * a  Array of sp_digit to normalize.
00992  */
00993 static void sp_2048_norm_18(sp_digit* a)
00994 {
00995 #ifdef WOLFSSL_SP_SMALL
00996     int i;
00997     for (i = 0; i < 17; i++) {
00998         a[i+1] += a[i] >> 57;
00999         a[i] &= 0x1ffffffffffffffl;
01000     }
01001 #else
01002     int i;
01003     for (i = 0; i < 16; i += 8) {
01004         a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
01005         a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
01006         a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
01007         a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
01008         a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
01009         a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
01010         a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
01011         a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
01012         a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
01013     }
01014     a[16+1] += a[16] >> 57;
01015     a[16] &= 0x1ffffffffffffffl;
01016 #endif
01017 }
01018 
01019 /* Shift the result in the high 1024 bits down to the bottom.
01020  *
01021  * r  A single precision number.
01022  * a  A single precision number.
01023  */
01024 static void sp_2048_mont_shift_18(sp_digit* r, const sp_digit* a)
01025 {
01026 #ifdef WOLFSSL_SP_SMALL
01027     int i;
01028     word64 n;
01029 
01030     n = a[17] >> 55;
01031     for (i = 0; i < 17; i++) {
01032         n += a[18 + i] << 2;
01033         r[i] = n & 0x1ffffffffffffffl;
01034         n >>= 57;
01035     }
01036     n += a[35] << 2;
01037     r[17] = n;
01038 #else
01039     word64 n;
01040     int i;
01041 
01042     n  = a[17] >> 55;
01043     for (i = 0; i < 16; i += 8) {
01044         n += a[i+18] << 2; r[i+0] = n & 0x1ffffffffffffffl; n >>= 57;
01045         n += a[i+19] << 2; r[i+1] = n & 0x1ffffffffffffffl; n >>= 57;
01046         n += a[i+20] << 2; r[i+2] = n & 0x1ffffffffffffffl; n >>= 57;
01047         n += a[i+21] << 2; r[i+3] = n & 0x1ffffffffffffffl; n >>= 57;
01048         n += a[i+22] << 2; r[i+4] = n & 0x1ffffffffffffffl; n >>= 57;
01049         n += a[i+23] << 2; r[i+5] = n & 0x1ffffffffffffffl; n >>= 57;
01050         n += a[i+24] << 2; r[i+6] = n & 0x1ffffffffffffffl; n >>= 57;
01051         n += a[i+25] << 2; r[i+7] = n & 0x1ffffffffffffffl; n >>= 57;
01052     }
01053     n += a[34] << 2; r[16] = n & 0x1ffffffffffffffl; n >>= 57;
01054     n += a[35] << 2; r[17] = n;
01055 #endif /* WOLFSSL_SP_SMALL */
01056     XMEMSET(&r[18], 0, sizeof(*r) * 18);
01057 }
01058 
01059 /* Reduce the number back to 2048 bits using Montgomery reduction.
01060  *
01061  * a   A single precision number to reduce in place.
01062  * m   The single precision number representing the modulus.
01063  * mp  The digit representing the negative inverse of m mod 2^n.
01064  */
01065 static void sp_2048_mont_reduce_18(sp_digit* a, sp_digit* m, sp_digit mp)
01066 {
01067     int i;
01068     sp_digit mu;
01069 
01070     for (i=0; i<17; i++) {
01071         mu = (a[i] * mp) & 0x1ffffffffffffffl;
01072         sp_2048_mul_add_18(a+i, m, mu);
01073         a[i+1] += a[i] >> 57;
01074     }
01075     mu = (a[i] * mp) & 0x7fffffffffffffl;
01076     sp_2048_mul_add_18(a+i, m, mu);
01077     a[i+1] += a[i] >> 57;
01078     a[i] &= 0x1ffffffffffffffl;
01079 
01080     sp_2048_mont_shift_18(a, a);
01081     sp_2048_cond_sub_18(a, a, m, 0 - ((a[17] >> 55) > 0));
01082     sp_2048_norm_18(a);
01083 }
01084 
01085 /* Multiply two Montogmery form numbers mod the modulus (prime).
01086  * (r = a * b mod m)
01087  *
01088  * r   Result of multiplication.
01089  * a   First number to multiply in Montogmery form.
01090  * b   Second number to multiply in Montogmery form.
01091  * m   Modulus (prime).
01092  * mp  Montogmery mulitplier.
01093  */
01094 static void sp_2048_mont_mul_18(sp_digit* r, sp_digit* a, sp_digit* b,
01095         sp_digit* m, sp_digit mp)
01096 {
01097     sp_2048_mul_18(r, a, b);
01098     sp_2048_mont_reduce_18(r, m, mp);
01099 }
01100 
01101 /* Square the Montgomery form number. (r = a * a mod m)
01102  *
01103  * r   Result of squaring.
01104  * a   Number to square in Montogmery form.
01105  * m   Modulus (prime).
01106  * mp  Montogmery mulitplier.
01107  */
01108 static void sp_2048_mont_sqr_18(sp_digit* r, sp_digit* a, sp_digit* m,
01109         sp_digit mp)
01110 {
01111     sp_2048_sqr_18(r, a);
01112     sp_2048_mont_reduce_18(r, m, mp);
01113 }
01114 
01115 /* Multiply a by scalar b into r. (r = a * b)
01116  *
01117  * r  A single precision integer.
01118  * a  A single precision integer.
01119  * b  A scalar.
01120  */
01121 SP_NOINLINE static void sp_2048_mul_d_18(sp_digit* r, const sp_digit* a,
01122     const sp_digit b)
01123 {
01124 #ifdef WOLFSSL_SP_SMALL
01125     int128_t tb = b;
01126     int128_t t = 0;
01127     int i;
01128 
01129     for (i = 0; i < 18; i++) {
01130         t += tb * a[i];
01131         r[i] = t & 0x1ffffffffffffffl;
01132         t >>= 57;
01133     }
01134     r[18] = (sp_digit)t;
01135 #else
01136     int128_t tb = b;
01137     int128_t t[8];
01138     int i;
01139 
01140     t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
01141     for (i = 0; i < 16; i += 8) {
01142         t[1] = tb * a[i+1];
01143         r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
01144         t[2] = tb * a[i+2];
01145         r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
01146         t[3] = tb * a[i+3];
01147         r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
01148         t[4] = tb * a[i+4];
01149         r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
01150         t[5] = tb * a[i+5];
01151         r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
01152         t[6] = tb * a[i+6];
01153         r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
01154         t[7] = tb * a[i+7];
01155         r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
01156         t[0] = tb * a[i+8];
01157         r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
01158     }
01159     t[1] = tb * a[17];
01160     r[17] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
01161     r[18] =  (sp_digit)(t[1] >> 57);
01162 #endif /* WOLFSSL_SP_SMALL */
01163 }
01164 
01165 /* Conditionally add a and b using the mask m.
01166  * m is -1 to add and 0 when not.
01167  *
01168  * r  A single precision number representing conditional add result.
01169  * a  A single precision number to add with.
01170  * b  A single precision number to add.
01171  * m  Mask value to apply.
01172  */
01173 static void sp_2048_cond_add_18(sp_digit* r, const sp_digit* a,
01174         const sp_digit* b, const sp_digit m)
01175 {
01176 #ifdef WOLFSSL_SP_SMALL
01177     int i;
01178 
01179     for (i = 0; i < 18; i++)
01180         r[i] = a[i] + (b[i] & m);
01181 #else
01182     int i;
01183 
01184     for (i = 0; i < 16; i += 8) {
01185         r[i + 0] = a[i + 0] + (b[i + 0] & m);
01186         r[i + 1] = a[i + 1] + (b[i + 1] & m);
01187         r[i + 2] = a[i + 2] + (b[i + 2] & m);
01188         r[i + 3] = a[i + 3] + (b[i + 3] & m);
01189         r[i + 4] = a[i + 4] + (b[i + 4] & m);
01190         r[i + 5] = a[i + 5] + (b[i + 5] & m);
01191         r[i + 6] = a[i + 6] + (b[i + 6] & m);
01192         r[i + 7] = a[i + 7] + (b[i + 7] & m);
01193     }
01194     r[16] = a[16] + (b[16] & m);
01195     r[17] = a[17] + (b[17] & m);
01196 #endif /* WOLFSSL_SP_SMALL */
01197 }
01198 
01199 #ifdef WOLFSSL_SMALL
01200 /* Sub b from a into r. (r = a - b)
01201  *
01202  * r  A single precision integer.
01203  * a  A single precision integer.
01204  * b  A single precision integer.
01205  */
01206 SP_NOINLINE static int sp_2048_sub_18(sp_digit* r, const sp_digit* a,
01207         const sp_digit* b)
01208 {
01209     int i;
01210 
01211     for (i = 0; i < 18; i++)
01212         r[i] = a[i] - b[i];
01213 
01214     return 0;
01215 }
01216 
01217 #endif
01218 #ifdef WOLFSSL_SMALL
01219 /* Add b to a into r. (r = a + b)
01220  *
01221  * r  A single precision integer.
01222  * a  A single precision integer.
01223  * b  A single precision integer.
01224  */
01225 SP_NOINLINE static int sp_2048_add_18(sp_digit* r, const sp_digit* a,
01226         const sp_digit* b)
01227 {
01228     int i;
01229 
01230     for (i = 0; i < 18; i++)
01231         r[i] = a[i] + b[i];
01232 
01233     return 0;
01234 }
01235 #endif
01236 /* Divide d in a and put remainder into r (m*d + r = a)
01237  * m is not calculated as it is not needed at this time.
01238  *
01239  * a  Nmber to be divided.
01240  * d  Number to divide with.
01241  * m  Multiplier result.
01242  * r  Remainder from the division.
01243  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
01244  */
01245 static int sp_2048_div_18(sp_digit* a, sp_digit* d, sp_digit* m,
01246         sp_digit* r)
01247 {
01248     int i;
01249     int128_t d1;
01250     sp_digit div, r1;
01251 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
01252     sp_digit* td;
01253 #else
01254     sp_digit t1d[36], t2d[18 + 1];
01255 #endif
01256     sp_digit* t1;
01257     sp_digit* t2;
01258     int err = MP_OKAY;
01259 
01260 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
01261     td = XMALLOC(sizeof(sp_digit) * (3 * 18 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
01262     if (td != NULL) {
01263         t1 = td;
01264         t2 = td + 2 * 18;
01265     }
01266     else
01267         err = MEMORY_E;
01268 #else
01269     t1 = t1d;
01270     t2 = t2d;
01271 #endif
01272 
01273     (void)m;
01274 
01275     if (err == MP_OKAY) {
01276         div = d[17];
01277         XMEMCPY(t1, a, sizeof(*t1) * 2 * 18);
01278         for (i=17; i>=0; i--) {
01279             t1[18 + i] += t1[18 + i - 1] >> 57;
01280             t1[18 + i - 1] &= 0x1ffffffffffffffl;
01281             d1 = t1[18 + i];
01282             d1 <<= 57;
01283             d1 += t1[18 + i - 1];
01284             r1 = (sp_digit)(d1 / div);
01285 
01286             sp_2048_mul_d_18(t2, d, r1);
01287             sp_2048_sub_18(&t1[i], &t1[i], t2);
01288             t1[18 + i] -= t2[18];
01289             t1[18 + i] += t1[18 + i - 1] >> 57;
01290             t1[18 + i - 1] &= 0x1ffffffffffffffl;
01291             r1 = (((-t1[18 + i]) << 57) - t1[18 + i - 1]) / div;
01292             r1++;
01293             sp_2048_mul_d_18(t2, d, r1);
01294             sp_2048_add_18(&t1[i], &t1[i], t2);
01295             t1[18 + i] += t1[18 + i - 1] >> 57;
01296             t1[18 + i - 1] &= 0x1ffffffffffffffl;
01297         }
01298         t1[18 - 1] += t1[18 - 2] >> 57;
01299         t1[18 - 2] &= 0x1ffffffffffffffl;
01300         d1 = t1[18 - 1];
01301         r1 = (sp_digit)(d1 / div);
01302 
01303         sp_2048_mul_d_18(t2, d, r1);
01304         sp_2048_sub_18(t1, t1, t2);
01305         XMEMCPY(r, t1, sizeof(*r) * 2 * 18);
01306         for (i=0; i<16; i++) {
01307             r[i+1] += r[i] >> 57;
01308             r[i] &= 0x1ffffffffffffffl;
01309         }
01310         sp_2048_cond_add_18(r, r, d, 0 - (r[17] < 0));
01311     }
01312 
01313 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
01314     if (td != NULL)
01315         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01316 #endif
01317 
01318     return err;
01319 }
01320 
01321 /* Reduce a modulo m into r. (r = a mod m)
01322  *
01323  * r  A single precision number that is the reduced result.
01324  * a  A single precision number that is to be reduced.
01325  * m  A single precision number that is the modulus to reduce with.
01326  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
01327  */
01328 static int sp_2048_mod_18(sp_digit* r, sp_digit* a, sp_digit* m)
01329 {
01330     return sp_2048_div_18(a, m, NULL, r);
01331 }
01332 
01333 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
01334  *
01335  * r     A single precision number that is the result of the operation.
01336  * a     A single precision number being exponentiated.
01337  * e     A single precision number that is the exponent.
01338  * bits  The number of bits in the exponent.
01339  * m     A single precision number that is the modulus.
01340  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
01341  */
01342 static int sp_2048_mod_exp_18(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
01343     sp_digit* m, int reduceA)
01344 {
01345 #ifdef WOLFSSL_SP_SMALL
01346     sp_digit* td;
01347     sp_digit* t[3];
01348     sp_digit* norm;
01349     sp_digit mp = 1;
01350     sp_digit n;
01351     int i;
01352     int c, y;
01353     int err = MP_OKAY;
01354 
01355     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL,
01356                             DYNAMIC_TYPE_TMP_BUFFER);
01357     if (td == NULL)
01358         err = MEMORY_E;
01359 
01360     if (err == MP_OKAY) {
01361         XMEMSET(td, 0, sizeof(*td) * 3 * 18 * 2);
01362 
01363         norm = t[0] = td;
01364         t[1] = &td[18 * 2];
01365         t[2] = &td[2 * 18 * 2];
01366 
01367         sp_2048_mont_setup(m, &mp);
01368         sp_2048_mont_norm_18(norm, m);
01369 
01370         if (reduceA)
01371             err = sp_2048_mod_18(t[1], a, m);
01372         else
01373             XMEMCPY(t[1], a, sizeof(sp_digit) * 18);
01374     }
01375     if (err == MP_OKAY) {
01376         sp_2048_mul_18(t[1], t[1], norm);
01377         err = sp_2048_mod_18(t[1], t[1], m);
01378     }
01379 
01380     if (err == MP_OKAY) {
01381         i = bits / 57;
01382         c = bits % 57;
01383         n = e[i--] << (57 - c);
01384         for (; ; c--) {
01385             if (c == 0) {
01386                 if (i == -1)
01387                     break;
01388 
01389                 n = e[i--];
01390                 c = 57;
01391             }
01392 
01393             y = (n >> 56) & 1;
01394             n <<= 1;
01395 
01396             sp_2048_mont_mul_18(t[y^1], t[0], t[1], m, mp);
01397 
01398             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
01399                                   ((size_t)t[1] & addr_mask[y])),
01400                     sizeof(*t[2]) * 18 * 2);
01401             sp_2048_mont_sqr_18(t[2], t[2], m, mp);
01402             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
01403                             ((size_t)t[1] & addr_mask[y])), t[2],
01404                     sizeof(*t[2]) * 18 * 2);
01405         }
01406 
01407         sp_2048_mont_reduce_18(t[0], m, mp);
01408         n = sp_2048_cmp_18(t[0], m);
01409         sp_2048_cond_sub_18(t[0], t[0], m, (n < 0) - 1);
01410         XMEMCPY(r, t[0], sizeof(*r) * 18 * 2);
01411 
01412     }
01413 
01414     if (td != NULL)
01415         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01416 
01417     return err;
01418 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
01419 #ifndef WOLFSSL_SMALL_STACK
01420     sp_digit t[3][36];
01421 #else
01422     sp_digit* td;
01423     sp_digit* t[3];
01424 #endif
01425     sp_digit* norm;
01426     sp_digit mp = 1;
01427     sp_digit n;
01428     int i;
01429     int c, y;
01430     int err = MP_OKAY;
01431 
01432 #ifdef WOLFSSL_SMALL_STACK
01433     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 18 * 2, NULL,
01434                             DYNAMIC_TYPE_TMP_BUFFER);
01435     if (td == NULL)
01436         err = MEMORY_E;
01437 
01438     if (err == MP_OKAY) {
01439         t[0] = td;
01440         t[1] = &td[18 * 2];
01441         t[2] = &td[2 * 18 * 2];
01442         norm = t[0];
01443     }
01444 #else
01445     norm = t[0];
01446 #endif
01447 
01448     if (err == MP_OKAY) {
01449         sp_2048_mont_setup(m, &mp);
01450         sp_2048_mont_norm_18(norm, m);
01451 
01452         if (reduceA) {
01453             err = sp_2048_mod_18(t[1], a, m);
01454             if (err == MP_OKAY) {
01455                 sp_2048_mul_18(t[1], t[1], norm);
01456                 err = sp_2048_mod_18(t[1], t[1], m);
01457             }
01458         }
01459         else {
01460             sp_2048_mul_18(t[1], a, norm);
01461             err = sp_2048_mod_18(t[1], t[1], m);
01462         }
01463     }
01464 
01465     if (err == MP_OKAY) {
01466         i = bits / 57;
01467         c = bits % 57;
01468         n = e[i--] << (57 - c);
01469         for (; ; c--) {
01470             if (c == 0) {
01471                 if (i == -1)
01472                     break;
01473 
01474                 n = e[i--];
01475                 c = 57;
01476             }
01477 
01478             y = (n >> 56) & 1;
01479             n <<= 1;
01480 
01481             sp_2048_mont_mul_18(t[y^1], t[0], t[1], m, mp);
01482 
01483             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
01484                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
01485             sp_2048_mont_sqr_18(t[2], t[2], m, mp);
01486             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
01487                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
01488         }
01489 
01490         sp_2048_mont_reduce_18(t[0], m, mp);
01491         n = sp_2048_cmp_18(t[0], m);
01492         sp_2048_cond_sub_18(t[0], t[0], m, (n < 0) - 1);
01493         XMEMCPY(r, t[0], sizeof(t[0]));
01494     }
01495 
01496 #ifdef WOLFSSL_SMALL_STACK
01497     if (td != NULL)
01498         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01499 #endif
01500 
01501     return err;
01502 #else
01503 #ifndef WOLFSSL_SMALL_STACK
01504     sp_digit t[32][36];
01505 #else
01506     sp_digit* t[32];
01507     sp_digit* td;
01508 #endif
01509     sp_digit* norm;
01510     sp_digit rt[36];
01511     sp_digit mp = 1;
01512     sp_digit n;
01513     int i;
01514     int c, y;
01515     int err = MP_OKAY;
01516 
01517 #ifdef WOLFSSL_SMALL_STACK
01518     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 36, NULL,
01519                             DYNAMIC_TYPE_TMP_BUFFER);
01520     if (td == NULL)
01521         err = MEMORY_E;
01522 
01523     if (err == MP_OKAY) {
01524         for (i=0; i<32; i++)
01525             t[i] = td + i * 36;
01526         norm = t[0];
01527     }
01528 #else
01529     norm = t[0];
01530 #endif
01531 
01532     if (err == MP_OKAY) {
01533         sp_2048_mont_setup(m, &mp);
01534         sp_2048_mont_norm_18(norm, m);
01535 
01536         if (reduceA) {
01537             err = sp_2048_mod_18(t[1], a, m);
01538             if (err == MP_OKAY) {
01539                 sp_2048_mul_18(t[1], t[1], norm);
01540                 err = sp_2048_mod_18(t[1], t[1], m);
01541             }
01542         }
01543         else {
01544             sp_2048_mul_18(t[1], a, norm);
01545             err = sp_2048_mod_18(t[1], t[1], m);
01546         }
01547     }
01548 
01549     if (err == MP_OKAY) {
01550         sp_2048_mont_sqr_18(t[ 2], t[ 1], m, mp);
01551         sp_2048_mont_mul_18(t[ 3], t[ 2], t[ 1], m, mp);
01552         sp_2048_mont_sqr_18(t[ 4], t[ 2], m, mp);
01553         sp_2048_mont_mul_18(t[ 5], t[ 3], t[ 2], m, mp);
01554         sp_2048_mont_sqr_18(t[ 6], t[ 3], m, mp);
01555         sp_2048_mont_mul_18(t[ 7], t[ 4], t[ 3], m, mp);
01556         sp_2048_mont_sqr_18(t[ 8], t[ 4], m, mp);
01557         sp_2048_mont_mul_18(t[ 9], t[ 5], t[ 4], m, mp);
01558         sp_2048_mont_sqr_18(t[10], t[ 5], m, mp);
01559         sp_2048_mont_mul_18(t[11], t[ 6], t[ 5], m, mp);
01560         sp_2048_mont_sqr_18(t[12], t[ 6], m, mp);
01561         sp_2048_mont_mul_18(t[13], t[ 7], t[ 6], m, mp);
01562         sp_2048_mont_sqr_18(t[14], t[ 7], m, mp);
01563         sp_2048_mont_mul_18(t[15], t[ 8], t[ 7], m, mp);
01564         sp_2048_mont_sqr_18(t[16], t[ 8], m, mp);
01565         sp_2048_mont_mul_18(t[17], t[ 9], t[ 8], m, mp);
01566         sp_2048_mont_sqr_18(t[18], t[ 9], m, mp);
01567         sp_2048_mont_mul_18(t[19], t[10], t[ 9], m, mp);
01568         sp_2048_mont_sqr_18(t[20], t[10], m, mp);
01569         sp_2048_mont_mul_18(t[21], t[11], t[10], m, mp);
01570         sp_2048_mont_sqr_18(t[22], t[11], m, mp);
01571         sp_2048_mont_mul_18(t[23], t[12], t[11], m, mp);
01572         sp_2048_mont_sqr_18(t[24], t[12], m, mp);
01573         sp_2048_mont_mul_18(t[25], t[13], t[12], m, mp);
01574         sp_2048_mont_sqr_18(t[26], t[13], m, mp);
01575         sp_2048_mont_mul_18(t[27], t[14], t[13], m, mp);
01576         sp_2048_mont_sqr_18(t[28], t[14], m, mp);
01577         sp_2048_mont_mul_18(t[29], t[15], t[14], m, mp);
01578         sp_2048_mont_sqr_18(t[30], t[15], m, mp);
01579         sp_2048_mont_mul_18(t[31], t[16], t[15], m, mp);
01580 
01581         bits = ((bits + 4) / 5) * 5;
01582         i = ((bits + 56) / 57) - 1;
01583         c = bits % 57;
01584         if (c == 0)
01585             c = 57;
01586         if (i < 18)
01587             n = e[i--] << (64 - c);
01588         else {
01589             n = 0;
01590             i--;
01591         }
01592         if (c < 5) {
01593             n |= e[i--] << (7 - c);
01594             c += 57;
01595         }
01596         y = n >> 59;
01597         n <<= 5;
01598         c -= 5;
01599         XMEMCPY(rt, t[y], sizeof(rt));
01600         for (; i>=0 || c>=5; ) {
01601             if (c < 5) {
01602                 n |= e[i--] << (7 - c);
01603                 c += 57;
01604             }
01605             y = (n >> 59) & 0x1f;
01606             n <<= 5;
01607             c -= 5;
01608 
01609             sp_2048_mont_sqr_18(rt, rt, m, mp);
01610             sp_2048_mont_sqr_18(rt, rt, m, mp);
01611             sp_2048_mont_sqr_18(rt, rt, m, mp);
01612             sp_2048_mont_sqr_18(rt, rt, m, mp);
01613             sp_2048_mont_sqr_18(rt, rt, m, mp);
01614 
01615             sp_2048_mont_mul_18(rt, rt, t[y], m, mp);
01616         }
01617 
01618         sp_2048_mont_reduce_18(rt, m, mp);
01619         n = sp_2048_cmp_18(rt, m);
01620         sp_2048_cond_sub_18(rt, rt, m, (n < 0) - 1);
01621         XMEMCPY(r, rt, sizeof(rt));
01622     }
01623 
01624 #ifdef WOLFSSL_SMALL_STACK
01625     if (td != NULL)
01626         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01627 #endif
01628 
01629     return err;
01630 #endif
01631 }
01632 
01633 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
01634 
01635 /* r = 2^n mod m where n is the number of bits to reduce by.
01636  * Given m must be 2048 bits, just need to subtract.
01637  *
01638  * r  A single precision number.
01639  * m  A signle precision number.
01640  */
01641 static void sp_2048_mont_norm_36(sp_digit* r, sp_digit* m)
01642 {
01643     /* Set r = 2^n - 1. */
01644 #ifdef WOLFSSL_SP_SMALL
01645     int i;
01646 
01647     for (i=0; i<35; i++)
01648         r[i] = 0x1ffffffffffffffl;
01649 #else
01650     int i;
01651 
01652     for (i = 0; i < 32; i += 8) {
01653         r[i + 0] = 0x1ffffffffffffffl;
01654         r[i + 1] = 0x1ffffffffffffffl;
01655         r[i + 2] = 0x1ffffffffffffffl;
01656         r[i + 3] = 0x1ffffffffffffffl;
01657         r[i + 4] = 0x1ffffffffffffffl;
01658         r[i + 5] = 0x1ffffffffffffffl;
01659         r[i + 6] = 0x1ffffffffffffffl;
01660         r[i + 7] = 0x1ffffffffffffffl;
01661     }
01662     r[32] = 0x1ffffffffffffffl;
01663     r[33] = 0x1ffffffffffffffl;
01664     r[34] = 0x1ffffffffffffffl;
01665 #endif
01666     r[35] = 0x1fffffffffffffl;
01667 
01668     /* r = (2^n - 1) mod n */
01669     sp_2048_sub_36(r, r, m);
01670 
01671     /* Add one so r = 2^n mod m */
01672     r[0] += 1;
01673 }
01674 
01675 /* Compare a with b in constant time.
01676  *
01677  * a  A single precision integer.
01678  * b  A single precision integer.
01679  * return -ve, 0 or +ve if a is less than, equal to or greater than b
01680  * respectively.
01681  */
01682 static sp_digit sp_2048_cmp_36(const sp_digit* a, const sp_digit* b)
01683 {
01684     sp_digit r = 0;
01685 #ifdef WOLFSSL_SP_SMALL
01686     int i;
01687 
01688     for (i=35; i>=0; i--)
01689         r |= (a[i] - b[i]) & (0 - !r);
01690 #else
01691     int i;
01692 
01693     r |= (a[35] - b[35]) & (0 - !r);
01694     r |= (a[34] - b[34]) & (0 - !r);
01695     r |= (a[33] - b[33]) & (0 - !r);
01696     r |= (a[32] - b[32]) & (0 - !r);
01697     for (i = 24; i >= 0; i -= 8) {
01698         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
01699         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
01700         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
01701         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
01702         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
01703         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
01704         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
01705         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
01706     }
01707 #endif /* WOLFSSL_SP_SMALL */
01708 
01709     return r;
01710 }
01711 
01712 /* Conditionally subtract b from a using the mask m.
01713  * m is -1 to subtract and 0 when not.
01714  *
01715  * r  A single precision number representing condition subtract result.
01716  * a  A single precision number to subtract from.
01717  * b  A single precision number to subtract.
01718  * m  Mask value to apply.
01719  */
01720 static void sp_2048_cond_sub_36(sp_digit* r, const sp_digit* a,
01721         const sp_digit* b, const sp_digit m)
01722 {
01723 #ifdef WOLFSSL_SP_SMALL
01724     int i;
01725 
01726     for (i = 0; i < 36; i++)
01727         r[i] = a[i] - (b[i] & m);
01728 #else
01729     int i;
01730 
01731     for (i = 0; i < 32; i += 8) {
01732         r[i + 0] = a[i + 0] - (b[i + 0] & m);
01733         r[i + 1] = a[i + 1] - (b[i + 1] & m);
01734         r[i + 2] = a[i + 2] - (b[i + 2] & m);
01735         r[i + 3] = a[i + 3] - (b[i + 3] & m);
01736         r[i + 4] = a[i + 4] - (b[i + 4] & m);
01737         r[i + 5] = a[i + 5] - (b[i + 5] & m);
01738         r[i + 6] = a[i + 6] - (b[i + 6] & m);
01739         r[i + 7] = a[i + 7] - (b[i + 7] & m);
01740     }
01741     r[32] = a[32] - (b[32] & m);
01742     r[33] = a[33] - (b[33] & m);
01743     r[34] = a[34] - (b[34] & m);
01744     r[35] = a[35] - (b[35] & m);
01745 #endif /* WOLFSSL_SP_SMALL */
01746 }
01747 
01748 /* Mul a by scalar b and add into r. (r += a * b)
01749  *
01750  * r  A single precision integer.
01751  * a  A single precision integer.
01752  * b  A scalar.
01753  */
01754 SP_NOINLINE static void sp_2048_mul_add_36(sp_digit* r, const sp_digit* a,
01755         const sp_digit b)
01756 {
01757 #ifdef WOLFSSL_SP_SMALL
01758     int128_t tb = b;
01759     int128_t t = 0;
01760     int i;
01761 
01762     for (i = 0; i < 36; i++) {
01763         t += (tb * a[i]) + r[i];
01764         r[i] = t & 0x1ffffffffffffffl;
01765         t >>= 57;
01766     }
01767     r[36] += t;
01768 #else
01769     int128_t tb = b;
01770     int128_t t[8];
01771     int i;
01772 
01773     t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
01774     for (i = 0; i < 32; i += 8) {
01775         t[1] = tb * a[i+1];
01776         r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
01777         t[2] = tb * a[i+2];
01778         r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
01779         t[3] = tb * a[i+3];
01780         r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
01781         t[4] = tb * a[i+4];
01782         r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
01783         t[5] = tb * a[i+5];
01784         r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
01785         t[6] = tb * a[i+6];
01786         r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
01787         t[7] = tb * a[i+7];
01788         r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
01789         t[0] = tb * a[i+8];
01790         r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
01791     }
01792     t[1] = tb * a[33]; r[33] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
01793     t[2] = tb * a[34]; r[34] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
01794     t[3] = tb * a[35]; r[35] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
01795     r[36] +=  t[3] >> 57;
01796 #endif /* WOLFSSL_SP_SMALL */
01797 }
01798 
01799 /* Normalize the values in each word to 57.
01800  *
01801  * a  Array of sp_digit to normalize.
01802  */
01803 static void sp_2048_norm_36(sp_digit* a)
01804 {
01805 #ifdef WOLFSSL_SP_SMALL
01806     int i;
01807     for (i = 0; i < 35; i++) {
01808         a[i+1] += a[i] >> 57;
01809         a[i] &= 0x1ffffffffffffffl;
01810     }
01811 #else
01812     int i;
01813     for (i = 0; i < 32; i += 8) {
01814         a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
01815         a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
01816         a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
01817         a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
01818         a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
01819         a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
01820         a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
01821         a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
01822         a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
01823     }
01824     a[32+1] += a[32] >> 57;
01825     a[32] &= 0x1ffffffffffffffl;
01826     a[33+1] += a[33] >> 57;
01827     a[33] &= 0x1ffffffffffffffl;
01828     a[34+1] += a[34] >> 57;
01829     a[34] &= 0x1ffffffffffffffl;
01830 #endif
01831 }
01832 
01833 /* Shift the result in the high 2048 bits down to the bottom.
01834  *
01835  * r  A single precision number.
01836  * a  A single precision number.
01837  */
01838 static void sp_2048_mont_shift_36(sp_digit* r, const sp_digit* a)
01839 {
01840 #ifdef WOLFSSL_SP_SMALL
01841     int i;
01842     sp_digit n, s;
01843 
01844     s = a[36];
01845     n = a[35] >> 53;
01846     for (i = 0; i < 35; i++) {
01847         n += (s & 0x1ffffffffffffffl) << 4;
01848         r[i] = n & 0x1ffffffffffffffl;
01849         n >>= 57;
01850         s = a[37 + i] + (s >> 57);
01851     }
01852     n += s << 4;
01853     r[35] = n;
01854 #else
01855     sp_digit n, s;
01856     int i;
01857 
01858     s = a[36]; n = a[35] >> 53;
01859     for (i = 0; i < 32; i += 8) {
01860         n += (s & 0x1ffffffffffffffl) << 4; r[i+0] = n & 0x1ffffffffffffffl;
01861         n >>= 57; s = a[i+37] + (s >> 57);
01862         n += (s & 0x1ffffffffffffffl) << 4; r[i+1] = n & 0x1ffffffffffffffl;
01863         n >>= 57; s = a[i+38] + (s >> 57);
01864         n += (s & 0x1ffffffffffffffl) << 4; r[i+2] = n & 0x1ffffffffffffffl;
01865         n >>= 57; s = a[i+39] + (s >> 57);
01866         n += (s & 0x1ffffffffffffffl) << 4; r[i+3] = n & 0x1ffffffffffffffl;
01867         n >>= 57; s = a[i+40] + (s >> 57);
01868         n += (s & 0x1ffffffffffffffl) << 4; r[i+4] = n & 0x1ffffffffffffffl;
01869         n >>= 57; s = a[i+41] + (s >> 57);
01870         n += (s & 0x1ffffffffffffffl) << 4; r[i+5] = n & 0x1ffffffffffffffl;
01871         n >>= 57; s = a[i+42] + (s >> 57);
01872         n += (s & 0x1ffffffffffffffl) << 4; r[i+6] = n & 0x1ffffffffffffffl;
01873         n >>= 57; s = a[i+43] + (s >> 57);
01874         n += (s & 0x1ffffffffffffffl) << 4; r[i+7] = n & 0x1ffffffffffffffl;
01875         n >>= 57; s = a[i+44] + (s >> 57);
01876     }
01877     n += (s & 0x1ffffffffffffffl) << 4; r[32] = n & 0x1ffffffffffffffl;
01878     n >>= 57; s = a[69] + (s >> 57);
01879     n += (s & 0x1ffffffffffffffl) << 4; r[33] = n & 0x1ffffffffffffffl;
01880     n >>= 57; s = a[70] + (s >> 57);
01881     n += (s & 0x1ffffffffffffffl) << 4; r[34] = n & 0x1ffffffffffffffl;
01882     n >>= 57; s = a[71] + (s >> 57);
01883     n += s << 4;              r[35] = n;
01884 #endif /* WOLFSSL_SP_SMALL */
01885     XMEMSET(&r[36], 0, sizeof(*r) * 36);
01886 }
01887 
01888 /* Reduce the number back to 2048 bits using Montgomery reduction.
01889  *
01890  * a   A single precision number to reduce in place.
01891  * m   The single precision number representing the modulus.
01892  * mp  The digit representing the negative inverse of m mod 2^n.
01893  */
01894 static void sp_2048_mont_reduce_36(sp_digit* a, sp_digit* m, sp_digit mp)
01895 {
01896     int i;
01897     sp_digit mu;
01898 
01899     if (mp != 1) {
01900         for (i=0; i<35; i++) {
01901             mu = (a[i] * mp) & 0x1ffffffffffffffl;
01902             sp_2048_mul_add_36(a+i, m, mu);
01903             a[i+1] += a[i] >> 57;
01904         }
01905         mu = (a[i] * mp) & 0x1fffffffffffffl;
01906         sp_2048_mul_add_36(a+i, m, mu);
01907         a[i+1] += a[i] >> 57;
01908         a[i] &= 0x1ffffffffffffffl;
01909     }
01910     else {
01911         for (i=0; i<35; i++) {
01912             mu = a[i] & 0x1ffffffffffffffl;
01913             sp_2048_mul_add_36(a+i, m, mu);
01914             a[i+1] += a[i] >> 57;
01915         }
01916         mu = a[i] & 0x1fffffffffffffl;
01917         sp_2048_mul_add_36(a+i, m, mu);
01918         a[i+1] += a[i] >> 57;
01919         a[i] &= 0x1ffffffffffffffl;
01920     }
01921 
01922     sp_2048_mont_shift_36(a, a);
01923     sp_2048_cond_sub_36(a, a, m, 0 - ((a[35] >> 53) > 0));
01924     sp_2048_norm_36(a);
01925 }
01926 
01927 /* Multiply two Montogmery form numbers mod the modulus (prime).
01928  * (r = a * b mod m)
01929  *
01930  * r   Result of multiplication.
01931  * a   First number to multiply in Montogmery form.
01932  * b   Second number to multiply in Montogmery form.
01933  * m   Modulus (prime).
01934  * mp  Montogmery mulitplier.
01935  */
01936 static void sp_2048_mont_mul_36(sp_digit* r, sp_digit* a, sp_digit* b,
01937         sp_digit* m, sp_digit mp)
01938 {
01939     sp_2048_mul_36(r, a, b);
01940     sp_2048_mont_reduce_36(r, m, mp);
01941 }
01942 
01943 /* Square the Montgomery form number. (r = a * a mod m)
01944  *
01945  * r   Result of squaring.
01946  * a   Number to square in Montogmery form.
01947  * m   Modulus (prime).
01948  * mp  Montogmery mulitplier.
01949  */
01950 static void sp_2048_mont_sqr_36(sp_digit* r, sp_digit* a, sp_digit* m,
01951         sp_digit mp)
01952 {
01953     sp_2048_sqr_36(r, a);
01954     sp_2048_mont_reduce_36(r, m, mp);
01955 }
01956 
01957 /* Multiply a by scalar b into r. (r = a * b)
01958  *
01959  * r  A single precision integer.
01960  * a  A single precision integer.
01961  * b  A scalar.
01962  */
01963 SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a,
01964     const sp_digit b)
01965 {
01966 #ifdef WOLFSSL_SP_SMALL
01967     int128_t tb = b;
01968     int128_t t = 0;
01969     int i;
01970 
01971     for (i = 0; i < 36; i++) {
01972         t += tb * a[i];
01973         r[i] = t & 0x1ffffffffffffffl;
01974         t >>= 57;
01975     }
01976     r[36] = (sp_digit)t;
01977 #else
01978     int128_t tb = b;
01979     int128_t t[8];
01980     int i;
01981 
01982     t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
01983     for (i = 0; i < 32; i += 8) {
01984         t[1] = tb * a[i+1];
01985         r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
01986         t[2] = tb * a[i+2];
01987         r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
01988         t[3] = tb * a[i+3];
01989         r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
01990         t[4] = tb * a[i+4];
01991         r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
01992         t[5] = tb * a[i+5];
01993         r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
01994         t[6] = tb * a[i+6];
01995         r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
01996         t[7] = tb * a[i+7];
01997         r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
01998         t[0] = tb * a[i+8];
01999         r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
02000     }
02001     t[1] = tb * a[33];
02002     r[33] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
02003     t[2] = tb * a[34];
02004     r[34] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
02005     t[3] = tb * a[35];
02006     r[35] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
02007     r[36] =  (sp_digit)(t[3] >> 57);
02008 #endif /* WOLFSSL_SP_SMALL */
02009 }
02010 
02011 /* Conditionally add a and b using the mask m.
02012  * m is -1 to add and 0 when not.
02013  *
02014  * r  A single precision number representing conditional add result.
02015  * a  A single precision number to add with.
02016  * b  A single precision number to add.
02017  * m  Mask value to apply.
02018  */
02019 static void sp_2048_cond_add_36(sp_digit* r, const sp_digit* a,
02020         const sp_digit* b, const sp_digit m)
02021 {
02022 #ifdef WOLFSSL_SP_SMALL
02023     int i;
02024 
02025     for (i = 0; i < 36; i++)
02026         r[i] = a[i] + (b[i] & m);
02027 #else
02028     int i;
02029 
02030     for (i = 0; i < 32; i += 8) {
02031         r[i + 0] = a[i + 0] + (b[i + 0] & m);
02032         r[i + 1] = a[i + 1] + (b[i + 1] & m);
02033         r[i + 2] = a[i + 2] + (b[i + 2] & m);
02034         r[i + 3] = a[i + 3] + (b[i + 3] & m);
02035         r[i + 4] = a[i + 4] + (b[i + 4] & m);
02036         r[i + 5] = a[i + 5] + (b[i + 5] & m);
02037         r[i + 6] = a[i + 6] + (b[i + 6] & m);
02038         r[i + 7] = a[i + 7] + (b[i + 7] & m);
02039     }
02040     r[32] = a[32] + (b[32] & m);
02041     r[33] = a[33] + (b[33] & m);
02042     r[34] = a[34] + (b[34] & m);
02043     r[35] = a[35] + (b[35] & m);
02044 #endif /* WOLFSSL_SP_SMALL */
02045 }
02046 
02047 #ifdef WOLFSSL_SMALL
02048 /* Sub b from a into r. (r = a - b)
02049  *
02050  * r  A single precision integer.
02051  * a  A single precision integer.
02052  * b  A single precision integer.
02053  */
02054 SP_NOINLINE static int sp_2048_sub_36(sp_digit* r, const sp_digit* a,
02055         const sp_digit* b)
02056 {
02057     int i;
02058 
02059     for (i = 0; i < 36; i++)
02060         r[i] = a[i] - b[i];
02061 
02062     return 0;
02063 }
02064 
02065 #endif
02066 #ifdef WOLFSSL_SMALL
02067 /* Add b to a into r. (r = a + b)
02068  *
02069  * r  A single precision integer.
02070  * a  A single precision integer.
02071  * b  A single precision integer.
02072  */
02073 SP_NOINLINE static int sp_2048_add_36(sp_digit* r, const sp_digit* a,
02074         const sp_digit* b)
02075 {
02076     int i;
02077 
02078     for (i = 0; i < 36; i++)
02079         r[i] = a[i] + b[i];
02080 
02081     return 0;
02082 }
02083 #endif
02084 /* Divide d in a and put remainder into r (m*d + r = a)
02085  * m is not calculated as it is not needed at this time.
02086  *
02087  * a  Nmber to be divided.
02088  * d  Number to divide with.
02089  * m  Multiplier result.
02090  * r  Remainder from the division.
02091  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
02092  */
02093 static int sp_2048_div_36(sp_digit* a, sp_digit* d, sp_digit* m,
02094         sp_digit* r)
02095 {
02096     int i;
02097     int128_t d1;
02098     sp_digit div, r1;
02099 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02100     sp_digit* td;
02101 #else
02102     sp_digit t1d[72], t2d[36 + 1];
02103 #endif
02104     sp_digit* t1;
02105     sp_digit* t2;
02106     int err = MP_OKAY;
02107 
02108 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02109     td = XMALLOC(sizeof(sp_digit) * (3 * 36 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
02110     if (td != NULL) {
02111         t1 = td;
02112         t2 = td + 2 * 36;
02113     }
02114     else
02115         err = MEMORY_E;
02116 #else
02117     t1 = t1d;
02118     t2 = t2d;
02119 #endif
02120 
02121     (void)m;
02122 
02123     if (err == MP_OKAY) {
02124         div = d[35];
02125         XMEMCPY(t1, a, sizeof(*t1) * 2 * 36);
02126         for (i=35; i>=0; i--) {
02127             t1[36 + i] += t1[36 + i - 1] >> 57;
02128             t1[36 + i - 1] &= 0x1ffffffffffffffl;
02129             d1 = t1[36 + i];
02130             d1 <<= 57;
02131             d1 += t1[36 + i - 1];
02132             r1 = (sp_digit)(d1 / div);
02133 
02134             sp_2048_mul_d_36(t2, d, r1);
02135             sp_2048_sub_36(&t1[i], &t1[i], t2);
02136             t1[36 + i] -= t2[36];
02137             t1[36 + i] += t1[36 + i - 1] >> 57;
02138             t1[36 + i - 1] &= 0x1ffffffffffffffl;
02139             r1 = (((-t1[36 + i]) << 57) - t1[36 + i - 1]) / div;
02140             r1++;
02141             sp_2048_mul_d_36(t2, d, r1);
02142             sp_2048_add_36(&t1[i], &t1[i], t2);
02143             t1[36 + i] += t1[36 + i - 1] >> 57;
02144             t1[36 + i - 1] &= 0x1ffffffffffffffl;
02145         }
02146         t1[36 - 1] += t1[36 - 2] >> 57;
02147         t1[36 - 2] &= 0x1ffffffffffffffl;
02148         d1 = t1[36 - 1];
02149         r1 = (sp_digit)(d1 / div);
02150 
02151         sp_2048_mul_d_36(t2, d, r1);
02152         sp_2048_sub_36(t1, t1, t2);
02153         XMEMCPY(r, t1, sizeof(*r) * 2 * 36);
02154         for (i=0; i<34; i++) {
02155             r[i+1] += r[i] >> 57;
02156             r[i] &= 0x1ffffffffffffffl;
02157         }
02158         sp_2048_cond_add_36(r, r, d, 0 - (r[35] < 0));
02159     }
02160 
02161 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02162     if (td != NULL)
02163         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02164 #endif
02165 
02166     return err;
02167 }
02168 
02169 /* Reduce a modulo m into r. (r = a mod m)
02170  *
02171  * r  A single precision number that is the reduced result.
02172  * a  A single precision number that is to be reduced.
02173  * m  A single precision number that is the modulus to reduce with.
02174  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
02175  */
02176 static int sp_2048_mod_36(sp_digit* r, sp_digit* a, sp_digit* m)
02177 {
02178     return sp_2048_div_36(a, m, NULL, r);
02179 }
02180 
02181 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
02182 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
02183  *
02184  * r     A single precision number that is the result of the operation.
02185  * a     A single precision number being exponentiated.
02186  * e     A single precision number that is the exponent.
02187  * bits  The number of bits in the exponent.
02188  * m     A single precision number that is the modulus.
02189  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
02190  */
02191 static int sp_2048_mod_exp_36(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
02192     sp_digit* m, int reduceA)
02193 {
02194 #ifdef WOLFSSL_SP_SMALL
02195     sp_digit* td;
02196     sp_digit* t[3];
02197     sp_digit* norm;
02198     sp_digit mp = 1;
02199     sp_digit n;
02200     int i;
02201     int c, y;
02202     int err = MP_OKAY;
02203 
02204     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL,
02205                             DYNAMIC_TYPE_TMP_BUFFER);
02206     if (td == NULL)
02207         err = MEMORY_E;
02208 
02209     if (err == MP_OKAY) {
02210         XMEMSET(td, 0, sizeof(*td) * 3 * 36 * 2);
02211 
02212         norm = t[0] = td;
02213         t[1] = &td[36 * 2];
02214         t[2] = &td[2 * 36 * 2];
02215 
02216         sp_2048_mont_setup(m, &mp);
02217         sp_2048_mont_norm_36(norm, m);
02218 
02219         if (reduceA)
02220             err = sp_2048_mod_36(t[1], a, m);
02221         else
02222             XMEMCPY(t[1], a, sizeof(sp_digit) * 36);
02223     }
02224     if (err == MP_OKAY) {
02225         sp_2048_mul_36(t[1], t[1], norm);
02226         err = sp_2048_mod_36(t[1], t[1], m);
02227     }
02228 
02229     if (err == MP_OKAY) {
02230         i = bits / 57;
02231         c = bits % 57;
02232         n = e[i--] << (57 - c);
02233         for (; ; c--) {
02234             if (c == 0) {
02235                 if (i == -1)
02236                     break;
02237 
02238                 n = e[i--];
02239                 c = 57;
02240             }
02241 
02242             y = (n >> 56) & 1;
02243             n <<= 1;
02244 
02245             sp_2048_mont_mul_36(t[y^1], t[0], t[1], m, mp);
02246 
02247             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
02248                                   ((size_t)t[1] & addr_mask[y])),
02249                     sizeof(*t[2]) * 36 * 2);
02250             sp_2048_mont_sqr_36(t[2], t[2], m, mp);
02251             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
02252                             ((size_t)t[1] & addr_mask[y])), t[2],
02253                     sizeof(*t[2]) * 36 * 2);
02254         }
02255 
02256         sp_2048_mont_reduce_36(t[0], m, mp);
02257         n = sp_2048_cmp_36(t[0], m);
02258         sp_2048_cond_sub_36(t[0], t[0], m, (n < 0) - 1);
02259         XMEMCPY(r, t[0], sizeof(*r) * 36 * 2);
02260 
02261     }
02262 
02263     if (td != NULL)
02264         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02265 
02266     return err;
02267 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
02268 #ifndef WOLFSSL_SMALL_STACK
02269     sp_digit t[3][72];
02270 #else
02271     sp_digit* td;
02272     sp_digit* t[3];
02273 #endif
02274     sp_digit* norm;
02275     sp_digit mp = 1;
02276     sp_digit n;
02277     int i;
02278     int c, y;
02279     int err = MP_OKAY;
02280 
02281 #ifdef WOLFSSL_SMALL_STACK
02282     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 36 * 2, NULL,
02283                             DYNAMIC_TYPE_TMP_BUFFER);
02284     if (td == NULL)
02285         err = MEMORY_E;
02286 
02287     if (err == MP_OKAY) {
02288         t[0] = td;
02289         t[1] = &td[36 * 2];
02290         t[2] = &td[2 * 36 * 2];
02291         norm = t[0];
02292     }
02293 #else
02294     norm = t[0];
02295 #endif
02296 
02297     if (err == MP_OKAY) {
02298         sp_2048_mont_setup(m, &mp);
02299         sp_2048_mont_norm_36(norm, m);
02300 
02301         if (reduceA) {
02302             err = sp_2048_mod_36(t[1], a, m);
02303             if (err == MP_OKAY) {
02304                 sp_2048_mul_36(t[1], t[1], norm);
02305                 err = sp_2048_mod_36(t[1], t[1], m);
02306             }
02307         }
02308         else {
02309             sp_2048_mul_36(t[1], a, norm);
02310             err = sp_2048_mod_36(t[1], t[1], m);
02311         }
02312     }
02313 
02314     if (err == MP_OKAY) {
02315         i = bits / 57;
02316         c = bits % 57;
02317         n = e[i--] << (57 - c);
02318         for (; ; c--) {
02319             if (c == 0) {
02320                 if (i == -1)
02321                     break;
02322 
02323                 n = e[i--];
02324                 c = 57;
02325             }
02326 
02327             y = (n >> 56) & 1;
02328             n <<= 1;
02329 
02330             sp_2048_mont_mul_36(t[y^1], t[0], t[1], m, mp);
02331 
02332             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
02333                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
02334             sp_2048_mont_sqr_36(t[2], t[2], m, mp);
02335             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
02336                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
02337         }
02338 
02339         sp_2048_mont_reduce_36(t[0], m, mp);
02340         n = sp_2048_cmp_36(t[0], m);
02341         sp_2048_cond_sub_36(t[0], t[0], m, (n < 0) - 1);
02342         XMEMCPY(r, t[0], sizeof(t[0]));
02343     }
02344 
02345 #ifdef WOLFSSL_SMALL_STACK
02346     if (td != NULL)
02347         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02348 #endif
02349 
02350     return err;
02351 #else
02352 #ifndef WOLFSSL_SMALL_STACK
02353     sp_digit t[32][72];
02354 #else
02355     sp_digit* t[32];
02356     sp_digit* td;
02357 #endif
02358     sp_digit* norm;
02359     sp_digit rt[72];
02360     sp_digit mp = 1;
02361     sp_digit n;
02362     int i;
02363     int c, y;
02364     int err = MP_OKAY;
02365 
02366 #ifdef WOLFSSL_SMALL_STACK
02367     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 72, NULL,
02368                             DYNAMIC_TYPE_TMP_BUFFER);
02369     if (td == NULL)
02370         err = MEMORY_E;
02371 
02372     if (err == MP_OKAY) {
02373         for (i=0; i<32; i++)
02374             t[i] = td + i * 72;
02375         norm = t[0];
02376     }
02377 #else
02378     norm = t[0];
02379 #endif
02380 
02381     if (err == MP_OKAY) {
02382         sp_2048_mont_setup(m, &mp);
02383         sp_2048_mont_norm_36(norm, m);
02384 
02385         if (reduceA) {
02386             err = sp_2048_mod_36(t[1], a, m);
02387             if (err == MP_OKAY) {
02388                 sp_2048_mul_36(t[1], t[1], norm);
02389                 err = sp_2048_mod_36(t[1], t[1], m);
02390             }
02391         }
02392         else {
02393             sp_2048_mul_36(t[1], a, norm);
02394             err = sp_2048_mod_36(t[1], t[1], m);
02395         }
02396     }
02397 
02398     if (err == MP_OKAY) {
02399         sp_2048_mont_sqr_36(t[ 2], t[ 1], m, mp);
02400         sp_2048_mont_mul_36(t[ 3], t[ 2], t[ 1], m, mp);
02401         sp_2048_mont_sqr_36(t[ 4], t[ 2], m, mp);
02402         sp_2048_mont_mul_36(t[ 5], t[ 3], t[ 2], m, mp);
02403         sp_2048_mont_sqr_36(t[ 6], t[ 3], m, mp);
02404         sp_2048_mont_mul_36(t[ 7], t[ 4], t[ 3], m, mp);
02405         sp_2048_mont_sqr_36(t[ 8], t[ 4], m, mp);
02406         sp_2048_mont_mul_36(t[ 9], t[ 5], t[ 4], m, mp);
02407         sp_2048_mont_sqr_36(t[10], t[ 5], m, mp);
02408         sp_2048_mont_mul_36(t[11], t[ 6], t[ 5], m, mp);
02409         sp_2048_mont_sqr_36(t[12], t[ 6], m, mp);
02410         sp_2048_mont_mul_36(t[13], t[ 7], t[ 6], m, mp);
02411         sp_2048_mont_sqr_36(t[14], t[ 7], m, mp);
02412         sp_2048_mont_mul_36(t[15], t[ 8], t[ 7], m, mp);
02413         sp_2048_mont_sqr_36(t[16], t[ 8], m, mp);
02414         sp_2048_mont_mul_36(t[17], t[ 9], t[ 8], m, mp);
02415         sp_2048_mont_sqr_36(t[18], t[ 9], m, mp);
02416         sp_2048_mont_mul_36(t[19], t[10], t[ 9], m, mp);
02417         sp_2048_mont_sqr_36(t[20], t[10], m, mp);
02418         sp_2048_mont_mul_36(t[21], t[11], t[10], m, mp);
02419         sp_2048_mont_sqr_36(t[22], t[11], m, mp);
02420         sp_2048_mont_mul_36(t[23], t[12], t[11], m, mp);
02421         sp_2048_mont_sqr_36(t[24], t[12], m, mp);
02422         sp_2048_mont_mul_36(t[25], t[13], t[12], m, mp);
02423         sp_2048_mont_sqr_36(t[26], t[13], m, mp);
02424         sp_2048_mont_mul_36(t[27], t[14], t[13], m, mp);
02425         sp_2048_mont_sqr_36(t[28], t[14], m, mp);
02426         sp_2048_mont_mul_36(t[29], t[15], t[14], m, mp);
02427         sp_2048_mont_sqr_36(t[30], t[15], m, mp);
02428         sp_2048_mont_mul_36(t[31], t[16], t[15], m, mp);
02429 
02430         bits = ((bits + 4) / 5) * 5;
02431         i = ((bits + 56) / 57) - 1;
02432         c = bits % 57;
02433         if (c == 0)
02434             c = 57;
02435         if (i < 36)
02436             n = e[i--] << (64 - c);
02437         else {
02438             n = 0;
02439             i--;
02440         }
02441         if (c < 5) {
02442             n |= e[i--] << (7 - c);
02443             c += 57;
02444         }
02445         y = n >> 59;
02446         n <<= 5;
02447         c -= 5;
02448         XMEMCPY(rt, t[y], sizeof(rt));
02449         for (; i>=0 || c>=5; ) {
02450             if (c < 5) {
02451                 n |= e[i--] << (7 - c);
02452                 c += 57;
02453             }
02454             y = (n >> 59) & 0x1f;
02455             n <<= 5;
02456             c -= 5;
02457 
02458             sp_2048_mont_sqr_36(rt, rt, m, mp);
02459             sp_2048_mont_sqr_36(rt, rt, m, mp);
02460             sp_2048_mont_sqr_36(rt, rt, m, mp);
02461             sp_2048_mont_sqr_36(rt, rt, m, mp);
02462             sp_2048_mont_sqr_36(rt, rt, m, mp);
02463 
02464             sp_2048_mont_mul_36(rt, rt, t[y], m, mp);
02465         }
02466 
02467         sp_2048_mont_reduce_36(rt, m, mp);
02468         n = sp_2048_cmp_36(rt, m);
02469         sp_2048_cond_sub_36(rt, rt, m, (n < 0) - 1);
02470         XMEMCPY(r, rt, sizeof(rt));
02471     }
02472 
02473 #ifdef WOLFSSL_SMALL_STACK
02474     if (td != NULL)
02475         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02476 #endif
02477 
02478     return err;
02479 #endif
02480 }
02481 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
02482 
02483 #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
02484                                     !defined(RSA_LOW_MEM)
02485 /* AND m into each word of a and store in r.
02486  *
02487  * r  A single precision integer.
02488  * a  A single precision integer.
02489  * m  Mask to AND against each digit.
02490  */
02491 static void sp_2048_mask_18(sp_digit* r, sp_digit* a, sp_digit m)
02492 {
02493 #ifdef WOLFSSL_SP_SMALL
02494     int i;
02495 
02496     for (i=0; i<18; i++)
02497         r[i] = a[i] & m;
02498 #else
02499     int i;
02500 
02501     for (i = 0; i < 16; i += 8) {
02502         r[i+0] = a[i+0] & m;
02503         r[i+1] = a[i+1] & m;
02504         r[i+2] = a[i+2] & m;
02505         r[i+3] = a[i+3] & m;
02506         r[i+4] = a[i+4] & m;
02507         r[i+5] = a[i+5] & m;
02508         r[i+6] = a[i+6] & m;
02509         r[i+7] = a[i+7] & m;
02510     }
02511     r[16] = a[16] & m;
02512     r[17] = a[17] & m;
02513 #endif
02514 }
02515 
02516 #endif
02517 #ifdef WOLFSSL_HAVE_SP_RSA
02518 /* RSA public key operation.
02519  *
02520  * in      Array of bytes representing the number to exponentiate, base.
02521  * inLen   Number of bytes in base.
02522  * em      Public exponent.
02523  * mm      Modulus.
02524  * out     Buffer to hold big-endian bytes of exponentiation result.
02525  *         Must be at least 256 bytes long.
02526  * outLen  Number of bytes in result.
02527  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
02528  * an array is too long and MEMORY_E when dynamic memory allocation fails.
02529  */
02530 int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
02531     byte* out, word32* outLen)
02532 {
02533 #ifdef WOLFSSL_SP_SMALL
02534     sp_digit* d = NULL;
02535     sp_digit* a;
02536     sp_digit* m;
02537     sp_digit* r;
02538     sp_digit* norm;
02539     sp_digit e[1];
02540     sp_digit mp;
02541     int i;
02542     int err = MP_OKAY;
02543 
02544     if (*outLen < 256)
02545         err = MP_TO_E;
02546     if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 256 ||
02547                                                      mp_count_bits(mm) != 2048))
02548         err = MP_READ_E;
02549 
02550     if (err == MP_OKAY) {
02551         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL,
02552                                DYNAMIC_TYPE_TMP_BUFFER);
02553         if (d == NULL)
02554             err = MEMORY_E;
02555     }
02556 
02557     if (err == MP_OKAY) {
02558         a = d;
02559         r = a + 36 * 2;
02560         m = r + 36 * 2;
02561         norm = r;
02562 
02563         sp_2048_from_bin(a, 36, in, inLen);
02564 #if DIGIT_BIT >= 57
02565         e[0] = em->dp[0];
02566 #else
02567         e[0] = em->dp[0];
02568         if (em->used > 1)
02569             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
02570 #endif
02571         if (e[0] == 0)
02572             err = MP_EXPTMOD_E;
02573     }
02574 
02575     if (err == MP_OKAY) {
02576         sp_2048_from_mp(m, 36, mm);
02577 
02578         sp_2048_mont_setup(m, &mp);
02579         sp_2048_mont_norm_36(norm, m);
02580     }
02581     if (err == MP_OKAY) {
02582         sp_2048_mul_36(a, a, norm);
02583         err = sp_2048_mod_36(a, a, m);
02584     }
02585     if (err == MP_OKAY) {
02586         for (i=56; i>=0; i--)
02587             if (e[0] >> i)
02588                 break;
02589 
02590         XMEMCPY(r, a, sizeof(sp_digit) * 36 * 2);
02591         for (i--; i>=0; i--) {
02592             sp_2048_mont_sqr_36(r, r, m, mp);
02593 
02594             if (((e[0] >> i) & 1) == 1)
02595                 sp_2048_mont_mul_36(r, r, a, m, mp);
02596         }
02597         sp_2048_mont_reduce_36(r, m, mp);
02598         mp = sp_2048_cmp_36(r, m);
02599         sp_2048_cond_sub_36(r, r, m, (mp < 0) - 1);
02600 
02601         sp_2048_to_bin(r, out);
02602         *outLen = 256;
02603     }
02604 
02605     if (d != NULL)
02606         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02607 
02608     return err;
02609 #else
02610 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
02611     sp_digit ad[72], md[36], rd[72];
02612 #else
02613     sp_digit* d = NULL;
02614 #endif
02615     sp_digit* a;
02616     sp_digit* m;
02617     sp_digit* r;
02618     sp_digit e[1];
02619     int err = MP_OKAY;
02620 
02621     if (*outLen < 256)
02622         err = MP_TO_E;
02623     if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 256 ||
02624                                                      mp_count_bits(mm) != 2048))
02625         err = MP_READ_E;
02626 
02627 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02628     if (err == MP_OKAY) {
02629         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 5, NULL,
02630                                DYNAMIC_TYPE_TMP_BUFFER);
02631         if (d == NULL)
02632             err = MEMORY_E;
02633     }
02634 
02635     if (err == MP_OKAY) {
02636         a = d;
02637         r = a + 36 * 2;
02638         m = r + 36 * 2;
02639     }
02640 #else
02641     a = ad;
02642     m = md;
02643     r = rd;
02644 #endif
02645 
02646     if (err == MP_OKAY) {
02647         sp_2048_from_bin(a, 36, in, inLen);
02648 #if DIGIT_BIT >= 57
02649         e[0] = em->dp[0];
02650 #else
02651         e[0] = em->dp[0];
02652         if (em->used > 1)
02653             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
02654 #endif
02655         if (e[0] == 0)
02656             err = MP_EXPTMOD_E;
02657     }
02658     if (err == MP_OKAY) {
02659         sp_2048_from_mp(m, 36, mm);
02660 
02661         if (e[0] == 0x3) {
02662             if (err == MP_OKAY) {
02663                 sp_2048_sqr_36(r, a);
02664                 err = sp_2048_mod_36(r, r, m);
02665             }
02666             if (err == MP_OKAY) {
02667                 sp_2048_mul_36(r, a, r);
02668                 err = sp_2048_mod_36(r, r, m);
02669             }
02670         }
02671         else {
02672             sp_digit* norm = r;
02673             int i;
02674             sp_digit mp;
02675 
02676             sp_2048_mont_setup(m, &mp);
02677             sp_2048_mont_norm_36(norm, m);
02678 
02679             if (err == MP_OKAY) {
02680                 sp_2048_mul_36(a, a, norm);
02681                 err = sp_2048_mod_36(a, a, m);
02682             }
02683 
02684             if (err == MP_OKAY) {
02685                 for (i=56; i>=0; i--)
02686                     if (e[0] >> i)
02687                         break;
02688 
02689                 XMEMCPY(r, a, sizeof(sp_digit) * 72);
02690                 for (i--; i>=0; i--) {
02691                     sp_2048_mont_sqr_36(r, r, m, mp);
02692 
02693                     if (((e[0] >> i) & 1) == 1)
02694                         sp_2048_mont_mul_36(r, r, a, m, mp);
02695                 }
02696                 sp_2048_mont_reduce_36(r, m, mp);
02697                 mp = sp_2048_cmp_36(r, m);
02698                 sp_2048_cond_sub_36(r, r, m, (mp < 0) - 1);
02699             }
02700         }
02701     }
02702 
02703     if (err == MP_OKAY) {
02704         sp_2048_to_bin(r, out);
02705         *outLen = 256;
02706     }
02707 
02708 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02709     if (d != NULL)
02710         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02711 #endif
02712 
02713     return err;
02714 #endif /* WOLFSSL_SP_SMALL */
02715 }
02716 
02717 /* RSA private key operation.
02718  *
02719  * in      Array of bytes representing the number to exponentiate, base.
02720  * inLen   Number of bytes in base.
02721  * dm      Private exponent.
02722  * pm      First prime.
02723  * qm      Second prime.
02724  * dpm     First prime's CRT exponent.
02725  * dqm     Second prime's CRT exponent.
02726  * qim     Inverse of second prime mod p.
02727  * mm      Modulus.
02728  * out     Buffer to hold big-endian bytes of exponentiation result.
02729  *         Must be at least 256 bytes long.
02730  * outLen  Number of bytes in result.
02731  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
02732  * an array is too long and MEMORY_E when dynamic memory allocation fails.
02733  */
02734 int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
02735     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
02736     byte* out, word32* outLen)
02737 {
02738 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
02739 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02740     sp_digit* a;
02741     sp_digit* d = NULL;
02742     sp_digit* m;
02743     sp_digit* r;
02744     int err = MP_OKAY;
02745 
02746     (void)pm;
02747     (void)qm;
02748     (void)dpm;
02749     (void)dqm;
02750     (void)qim;
02751 
02752     if (*outLen < 256)
02753         err = MP_TO_E;
02754     if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
02755                                                      mp_count_bits(mm) != 2048))
02756         err = MP_READ_E;
02757 
02758     if (err == MP_OKAY) {
02759         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 36 * 4, NULL,
02760                                DYNAMIC_TYPE_TMP_BUFFER);
02761         if (d == NULL)
02762             err = MEMORY_E;
02763     }
02764     if (err == MP_OKAY) {
02765         a = d + 36;
02766         m = a + 36;
02767         r = a;
02768 
02769         sp_2048_from_bin(a, 36, in, inLen);
02770         sp_2048_from_mp(d, 36, dm);
02771         sp_2048_from_mp(m, 36, mm);
02772         err = sp_2048_mod_exp_36(r, a, d, 2048, m, 0);
02773     }
02774     if (err == MP_OKAY) {
02775         sp_2048_to_bin(r, out);
02776         *outLen = 256;
02777     }
02778 
02779     if (d != NULL) {
02780         XMEMSET(d, 0, sizeof(sp_digit) * 36);
02781         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02782     }
02783 
02784     return err;
02785 #else
02786     sp_digit a[72], d[36], m[36];
02787     sp_digit* r = a;
02788     int err = MP_OKAY;
02789 
02790     (void)pm;
02791     (void)qm;
02792     (void)dpm;
02793     (void)dqm;
02794     (void)qim;
02795 
02796     if (*outLen < 256)
02797         err = MP_TO_E;
02798     if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
02799                                                      mp_count_bits(mm) != 2048))
02800         err = MP_READ_E;
02801 
02802     if (err == MP_OKAY) {
02803         sp_2048_from_bin(a, 36, in, inLen);
02804         sp_2048_from_mp(d, 36, dm);
02805         sp_2048_from_mp(m, 36, mm);
02806         err = sp_2048_mod_exp_36(r, a, d, 2048, m, 0);
02807     }
02808 
02809     if (err == MP_OKAY) {
02810         sp_2048_to_bin(r, out);
02811         *outLen = 256;
02812     }
02813 
02814     XMEMSET(d, 0, sizeof(sp_digit) * 36);
02815 
02816     return err;
02817 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
02818 #else
02819 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02820     sp_digit* t = NULL;
02821     sp_digit* a;
02822     sp_digit* p;
02823     sp_digit* q;
02824     sp_digit* dp;
02825     sp_digit* dq;
02826     sp_digit* qi;
02827     sp_digit* tmp;
02828     sp_digit* tmpa;
02829     sp_digit* tmpb;
02830     sp_digit* r;
02831     int err = MP_OKAY;
02832 
02833     (void)dm;
02834     (void)mm;
02835 
02836     if (*outLen < 256)
02837         err = MP_TO_E;
02838     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
02839         err = MP_READ_E;
02840 
02841     if (err == MP_OKAY) {
02842         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 18 * 11, NULL,
02843                                DYNAMIC_TYPE_TMP_BUFFER);
02844         if (t == NULL)
02845             err = MEMORY_E;
02846     }
02847     if (err == MP_OKAY) {
02848         a = t;
02849         p = a + 36 * 2;
02850         q = p + 18;
02851         qi = dq = dp = q + 18;
02852         tmpa = qi + 18;
02853         tmpb = tmpa + 36;
02854 
02855         tmp = t;
02856         r = tmp + 36;
02857 
02858         sp_2048_from_bin(a, 36, in, inLen);
02859         sp_2048_from_mp(p, 18, pm);
02860         sp_2048_from_mp(q, 18, qm);
02861         sp_2048_from_mp(dp, 18, dpm);
02862         err = sp_2048_mod_exp_18(tmpa, a, dp, 1024, p, 1);
02863     }
02864     if (err == MP_OKAY) {
02865         sp_2048_from_mp(dq, 18, dqm);
02866         err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1);
02867     }
02868     if (err == MP_OKAY) {
02869         sp_2048_sub_18(tmpa, tmpa, tmpb);
02870         sp_2048_mask_18(tmp, p, tmpa[17] >> 63);
02871         sp_2048_add_18(tmpa, tmpa, tmp);
02872 
02873         sp_2048_from_mp(qi, 18, qim);
02874         sp_2048_mul_18(tmpa, tmpa, qi);
02875         err = sp_2048_mod_18(tmpa, tmpa, p);
02876     }
02877 
02878     if (err == MP_OKAY) {
02879         sp_2048_mul_18(tmpa, q, tmpa);
02880         sp_2048_add_36(r, tmpb, tmpa);
02881         sp_2048_norm_36(r);
02882 
02883         sp_2048_to_bin(r, out);
02884         *outLen = 256;
02885     }
02886 
02887     if (t != NULL) {
02888         XMEMSET(t, 0, sizeof(sp_digit) * 18 * 11);
02889         XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02890     }
02891 
02892     return err;
02893 #else
02894     sp_digit a[36 * 2];
02895     sp_digit p[18], q[18], dp[18], dq[18], qi[18];
02896     sp_digit tmp[36], tmpa[36], tmpb[36];
02897     sp_digit* r = a;
02898     int err = MP_OKAY;
02899 
02900     (void)dm;
02901     (void)mm;
02902 
02903     if (*outLen < 256)
02904         err = MP_TO_E;
02905     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
02906         err = MP_READ_E;
02907 
02908     if (err == MP_OKAY) {
02909         sp_2048_from_bin(a, 36, in, inLen);
02910         sp_2048_from_mp(p, 18, pm);
02911         sp_2048_from_mp(q, 18, qm);
02912         sp_2048_from_mp(dp, 18, dpm);
02913         sp_2048_from_mp(dq, 18, dqm);
02914         sp_2048_from_mp(qi, 18, qim);
02915 
02916         err = sp_2048_mod_exp_18(tmpa, a, dp, 1024, p, 1);
02917     }
02918     if (err == MP_OKAY)
02919         err = sp_2048_mod_exp_18(tmpb, a, dq, 1024, q, 1);
02920 
02921     if (err == MP_OKAY) {
02922         sp_2048_sub_18(tmpa, tmpa, tmpb);
02923         sp_2048_mask_18(tmp, p, tmpa[17] >> 63);
02924         sp_2048_add_18(tmpa, tmpa, tmp);
02925         sp_2048_mul_18(tmpa, tmpa, qi);
02926         err = sp_2048_mod_18(tmpa, tmpa, p);
02927     }
02928 
02929     if (err == MP_OKAY) {
02930         sp_2048_mul_18(tmpa, tmpa, q);
02931         sp_2048_add_36(r, tmpb, tmpa);
02932         sp_2048_norm_36(r);
02933 
02934         sp_2048_to_bin(r, out);
02935         *outLen = 256;
02936     }
02937 
02938     XMEMSET(tmpa, 0, sizeof(tmpa));
02939     XMEMSET(tmpb, 0, sizeof(tmpb));
02940     XMEMSET(p, 0, sizeof(p));
02941     XMEMSET(q, 0, sizeof(q));
02942     XMEMSET(dp, 0, sizeof(dp));
02943     XMEMSET(dq, 0, sizeof(dq));
02944     XMEMSET(qi, 0, sizeof(qi));
02945 
02946     return err;
02947 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
02948 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
02949 }
02950 
02951 #endif /* WOLFSSL_HAVE_SP_RSA */
02952 #ifdef WOLFSSL_HAVE_SP_DH
02953 /* Convert an array of sp_digit to an mp_int.
02954  *
02955  * a  A single precision integer.
02956  * r  A multi-precision integer.
02957  */
02958 static int sp_2048_to_mp(sp_digit* a, mp_int* r)
02959 {
02960     int err;
02961 
02962     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
02963     if (err == MP_OKAY) {
02964 #if DIGIT_BIT == 57
02965         XMEMCPY(r->dp, a, sizeof(sp_digit) * 36);
02966         r->used = 36;
02967         mp_clamp(r);
02968 #elif DIGIT_BIT < 57
02969         int i, j = 0, s = 0;
02970 
02971         r->dp[0] = 0;
02972         for (i = 0; i < 36; i++) {
02973             r->dp[j] |= a[i] << s;
02974             r->dp[j] &= (1l << DIGIT_BIT) - 1;
02975             s = DIGIT_BIT - s;
02976             r->dp[++j] = a[i] >> s;
02977             while (s + DIGIT_BIT <= 57) {
02978                 s += DIGIT_BIT;
02979                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
02980                 r->dp[++j] = a[i] >> s;
02981             }
02982             s = 57 - s;
02983         }
02984         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
02985         mp_clamp(r);
02986 #else
02987         int i, j = 0, s = 0;
02988 
02989         r->dp[0] = 0;
02990         for (i = 0; i < 36; i++) {
02991             r->dp[j] |= ((mp_digit)a[i]) << s;
02992             if (s + 57 >= DIGIT_BIT) {
02993     #if DIGIT_BIT < 64
02994                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
02995     #endif
02996                 s = DIGIT_BIT - s;
02997                 r->dp[++j] = a[i] >> s;
02998                 s = 57 - s;
02999             }
03000             else
03001                 s += 57;
03002         }
03003         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
03004         mp_clamp(r);
03005 #endif
03006     }
03007 
03008     return err;
03009 }
03010 
03011 /* Perform the modular exponentiation for Diffie-Hellman.
03012  *
03013  * base  Base. MP integer.
03014  * exp   Exponent. MP integer.
03015  * mod   Modulus. MP integer.
03016  * res   Result. MP integer.
03017  * returs 0 on success, MP_READ_E if there are too many bytes in an array
03018  * and MEMORY_E if memory allocation fails.
03019  */
03020 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
03021 {
03022 #ifdef WOLFSSL_SP_SMALL
03023     int err = MP_OKAY;
03024     sp_digit* d = NULL;
03025     sp_digit* b;
03026     sp_digit* e;
03027     sp_digit* m;
03028     sp_digit* r;
03029     int expBits = mp_count_bits(exp);
03030 
03031     if (mp_count_bits(base) > 2048 || expBits > 2048 ||
03032                                                    mp_count_bits(mod) != 2048) {
03033         err = MP_READ_E;
03034     }
03035 
03036     if (err == MP_OKAY) {
03037         d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
03038                                DYNAMIC_TYPE_TMP_BUFFER);
03039         if (d == NULL)
03040             err = MEMORY_E;
03041     }
03042 
03043     if (err == MP_OKAY) {
03044         b = d;
03045         e = b + 36 * 2;
03046         m = e + 36;
03047         r = b;
03048 
03049         sp_2048_from_mp(b, 36, base);
03050         sp_2048_from_mp(e, 36, exp);
03051         sp_2048_from_mp(m, 36, mod);
03052 
03053         err = sp_2048_mod_exp_36(r, b, e, mp_count_bits(exp), m, 0);
03054     }
03055 
03056     if (err == MP_OKAY) {
03057         err = sp_2048_to_mp(r, res);
03058     }
03059 
03060     if (d != NULL) {
03061         XMEMSET(e, 0, sizeof(sp_digit) * 36);
03062         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03063     }
03064     return err;
03065 #else
03066 #ifndef WOLFSSL_SMALL_STACK
03067     sp_digit bd[72], ed[36], md[36];
03068 #else
03069     sp_digit* d = NULL;
03070 #endif
03071     sp_digit* b;
03072     sp_digit* e;
03073     sp_digit* m;
03074     sp_digit* r;
03075     int err = MP_OKAY;
03076     int expBits = mp_count_bits(exp);
03077 
03078     if (mp_count_bits(base) > 2048 || expBits > 2048 ||
03079                                                    mp_count_bits(mod) != 2048) {
03080         err = MP_READ_E;
03081     }
03082 
03083 #ifdef WOLFSSL_SMALL_STACK
03084     if (err == MP_OKAY) {
03085         d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
03086                                DYNAMIC_TYPE_TMP_BUFFER);
03087         if (d == NULL)
03088             err = MEMORY_E;
03089     }
03090 
03091     if (err == MP_OKAY) {
03092         b = d;
03093         e = b + 36 * 2;
03094         m = e + 36;
03095         r = b;
03096     }
03097 #else
03098     r = b = bd;
03099     e = ed;
03100     m = md;
03101 #endif
03102 
03103     if (err == MP_OKAY) {
03104         sp_2048_from_mp(b, 36, base);
03105         sp_2048_from_mp(e, 36, exp);
03106         sp_2048_from_mp(m, 36, mod);
03107 
03108         err = sp_2048_mod_exp_36(r, b, e, expBits, m, 0);
03109     }
03110 
03111     if (err == MP_OKAY) {
03112         err = sp_2048_to_mp(r, res);
03113     }
03114 
03115     XMEMSET(e, 0, sizeof(sp_digit) * 36);
03116 
03117 #ifdef WOLFSSL_SMALL_STACK
03118     if (d != NULL)
03119         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03120 #endif
03121 
03122     return err;
03123 #endif
03124 }
03125 
03126 /* Perform the modular exponentiation for Diffie-Hellman.
03127  *
03128  * base     Base.
03129  * exp      Array of bytes that is the exponent.
03130  * expLen   Length of data, in bytes, in exponent.
03131  * mod      Modulus.
03132  * out      Buffer to hold big-endian bytes of exponentiation result.
03133  *          Must be at least 256 bytes long.
03134  * outLen   Length, in bytes, of exponentiation result.
03135  * returs 0 on success, MP_READ_E if there are too many bytes in an array
03136  * and MEMORY_E if memory allocation fails.
03137  */
03138 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
03139     mp_int* mod, byte* out, word32* outLen)
03140 {
03141 #ifdef WOLFSSL_SP_SMALL
03142     int err = MP_OKAY;
03143     sp_digit* d = NULL;
03144     sp_digit* b;
03145     sp_digit* e;
03146     sp_digit* m;
03147     sp_digit* r;
03148     word32 i;
03149 
03150     if (mp_count_bits(base) > 2048 || expLen > 256 ||
03151                                                    mp_count_bits(mod) != 2048) {
03152         err = MP_READ_E;
03153     }
03154 
03155     if (err == MP_OKAY) {
03156         d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
03157                                DYNAMIC_TYPE_TMP_BUFFER);
03158         if (d == NULL)
03159             err = MEMORY_E;
03160     }
03161 
03162     if (err == MP_OKAY) {
03163         b = d;
03164         e = b + 36 * 2;
03165         m = e + 36;
03166         r = b;
03167 
03168         sp_2048_from_mp(b, 36, base);
03169         sp_2048_from_bin(e, 36, exp, expLen);
03170         sp_2048_from_mp(m, 36, mod);
03171 
03172         err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
03173     }
03174 
03175     if (err == MP_OKAY) {
03176         sp_2048_to_bin(r, out);
03177         *outLen = 256;
03178         for (i=0; i<256 && out[i] == 0; i++) {
03179         }
03180         *outLen -= i;
03181         XMEMMOVE(out, out + i, *outLen);
03182     }
03183 
03184     if (d != NULL) {
03185         XMEMSET(e, 0, sizeof(sp_digit) * 36);
03186         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03187     }
03188     return err;
03189 #else
03190 #ifndef WOLFSSL_SMALL_STACK
03191     sp_digit bd[72], ed[36], md[36];
03192 #else
03193     sp_digit* d = NULL;
03194 #endif
03195     sp_digit* b;
03196     sp_digit* e;
03197     sp_digit* m;
03198     sp_digit* r;
03199     word32 i;
03200     int err = MP_OKAY;
03201 
03202     if (mp_count_bits(base) > 2048 || expLen > 256 ||
03203                                                    mp_count_bits(mod) != 2048) {
03204         err = MP_READ_E;
03205     }
03206 
03207 #ifdef WOLFSSL_SMALL_STACK
03208     if (err == MP_OKAY) {
03209         d = (sp_digit*)XMALLOC(sizeof(*d) * 36 * 4, NULL,
03210                                DYNAMIC_TYPE_TMP_BUFFER);
03211         if (d == NULL)
03212             err = MEMORY_E;
03213     }
03214 
03215     if (err == MP_OKAY) {
03216         b = d;
03217         e = b + 36 * 2;
03218         m = e + 36;
03219         r = b;
03220     }
03221 #else
03222     r = b = bd;
03223     e = ed;
03224     m = md;
03225 #endif
03226 
03227     if (err == MP_OKAY) {
03228         sp_2048_from_mp(b, 36, base);
03229         sp_2048_from_bin(e, 36, exp, expLen);
03230         sp_2048_from_mp(m, 36, mod);
03231 
03232         err = sp_2048_mod_exp_36(r, b, e, expLen * 8, m, 0);
03233     }
03234 
03235     if (err == MP_OKAY) {
03236         sp_2048_to_bin(r, out);
03237         *outLen = 256;
03238         for (i=0; i<256 && out[i] == 0; i++) {
03239         }
03240         *outLen -= i;
03241         XMEMMOVE(out, out + i, *outLen);
03242     }
03243 
03244     XMEMSET(e, 0, sizeof(sp_digit) * 36);
03245 
03246 #ifdef WOLFSSL_SMALL_STACK
03247     if (d != NULL)
03248         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03249 #endif
03250 
03251     return err;
03252 #endif
03253 }
03254 
03255 #endif /* WOLFSSL_HAVE_SP_DH */
03256 
03257 #endif /* WOLFSSL_SP_NO_2048 */
03258 
03259 #ifndef WOLFSSL_SP_NO_3072
03260 /* Read big endian unsigned byte aray into r.
03261  *
03262  * r  A single precision integer.
03263  * a  Byte array.
03264  * n  Number of bytes in array to read.
03265  */
03266 static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
03267 {
03268     int i, j = 0, s = 0;
03269 
03270     r[0] = 0;
03271     for (i = n-1; i >= 0; i--) {
03272         r[j] |= ((sp_digit)a[i]) << s;
03273         if (s >= 49) {
03274             r[j] &= 0x1ffffffffffffffl;
03275             s = 57 - s;
03276             if (j + 1 >= max)
03277                 break;
03278             r[++j] = a[i] >> s;
03279             s = 8 - s;
03280         }
03281         else
03282             s += 8;
03283     }
03284 
03285     for (j++; j < max; j++)
03286         r[j] = 0;
03287 }
03288 
03289 /* Convert an mp_int to an array of sp_digit.
03290  *
03291  * r  A single precision integer.
03292  * a  A multi-precision integer.
03293  */
03294 static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
03295 {
03296 #if DIGIT_BIT == 57
03297     int j;
03298 
03299     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
03300 
03301     for (j = a->used; j < max; j++)
03302         r[j] = 0;
03303 #elif DIGIT_BIT > 57
03304     int i, j = 0, s = 0;
03305 
03306     r[0] = 0;
03307     for (i = 0; i < a->used && j < max; i++) {
03308         r[j] |= a->dp[i] << s;
03309         r[j] &= 0x1ffffffffffffffl;
03310         s = 57 - s;
03311         if (j + 1 >= max)
03312             break;
03313         r[++j] = a->dp[i] >> s;
03314         while (s + 57 <= DIGIT_BIT) {
03315             s += 57;
03316             r[j] &= 0x1ffffffffffffffl;
03317             if (j + 1 >= max)
03318                 break;
03319             if (s < DIGIT_BIT)
03320                 r[++j] = a->dp[i] >> s;
03321             else
03322                 r[++j] = 0;
03323         }
03324         s = DIGIT_BIT - s;
03325     }
03326 
03327     for (j++; j < max; j++)
03328         r[j] = 0;
03329 #else
03330     int i, j = 0, s = 0;
03331 
03332     r[0] = 0;
03333     for (i = 0; i < a->used && j < max; i++) {
03334         r[j] |= ((sp_digit)a->dp[i]) << s;
03335         if (s + DIGIT_BIT >= 57) {
03336             r[j] &= 0x1ffffffffffffffl;
03337             if (j + 1 >= max)
03338                 break;
03339             s = 57 - s;
03340             if (s == DIGIT_BIT) {
03341                 r[++j] = 0;
03342                 s = 0;
03343             }
03344             else {
03345                 r[++j] = a->dp[i] >> s;
03346                 s = DIGIT_BIT - s;
03347             }
03348         }
03349         else
03350             s += DIGIT_BIT;
03351     }
03352 
03353     for (j++; j < max; j++)
03354         r[j] = 0;
03355 #endif
03356 }
03357 
03358 /* Write r as big endian to byte aray.
03359  * Fixed length number of bytes written: 384
03360  *
03361  * r  A single precision integer.
03362  * a  Byte array.
03363  */
03364 static void sp_3072_to_bin(sp_digit* r, byte* a)
03365 {
03366     int i, j, s = 0, b;
03367 
03368     for (i=0; i<53; i++) {
03369         r[i+1] += r[i] >> 57;
03370         r[i] &= 0x1ffffffffffffffl;
03371     }
03372     j = 3072 / 8 - 1;
03373     a[j] = 0;
03374     for (i=0; i<54 && j>=0; i++) {
03375         b = 0;
03376         a[j--] |= r[i] << s; b += 8 - s;
03377         if (j < 0)
03378             break;
03379         while (b < 57) {
03380             a[j--] = r[i] >> b; b += 8;
03381             if (j < 0)
03382                 break;
03383         }
03384         s = 8 - (b - 57);
03385         if (j >= 0)
03386             a[j] = 0;
03387         if (s != 0)
03388             j++;
03389     }
03390 }
03391 
03392 #ifndef WOLFSSL_SP_SMALL
03393 /* Multiply a and b into r. (r = a * b)
03394  *
03395  * r  A single precision integer.
03396  * a  A single precision integer.
03397  * b  A single precision integer.
03398  */
03399 SP_NOINLINE static void sp_3072_mul_9(sp_digit* r, const sp_digit* a,
03400     const sp_digit* b)
03401 {
03402     int128_t t0   = ((int128_t)a[ 0]) * b[ 0];
03403     int128_t t1   = ((int128_t)a[ 0]) * b[ 1]
03404                  + ((int128_t)a[ 1]) * b[ 0];
03405     int128_t t2   = ((int128_t)a[ 0]) * b[ 2]
03406                  + ((int128_t)a[ 1]) * b[ 1]
03407                  + ((int128_t)a[ 2]) * b[ 0];
03408     int128_t t3   = ((int128_t)a[ 0]) * b[ 3]
03409                  + ((int128_t)a[ 1]) * b[ 2]
03410                  + ((int128_t)a[ 2]) * b[ 1]
03411                  + ((int128_t)a[ 3]) * b[ 0];
03412     int128_t t4   = ((int128_t)a[ 0]) * b[ 4]
03413                  + ((int128_t)a[ 1]) * b[ 3]
03414                  + ((int128_t)a[ 2]) * b[ 2]
03415                  + ((int128_t)a[ 3]) * b[ 1]
03416                  + ((int128_t)a[ 4]) * b[ 0];
03417     int128_t t5   = ((int128_t)a[ 0]) * b[ 5]
03418                  + ((int128_t)a[ 1]) * b[ 4]
03419                  + ((int128_t)a[ 2]) * b[ 3]
03420                  + ((int128_t)a[ 3]) * b[ 2]
03421                  + ((int128_t)a[ 4]) * b[ 1]
03422                  + ((int128_t)a[ 5]) * b[ 0];
03423     int128_t t6   = ((int128_t)a[ 0]) * b[ 6]
03424                  + ((int128_t)a[ 1]) * b[ 5]
03425                  + ((int128_t)a[ 2]) * b[ 4]
03426                  + ((int128_t)a[ 3]) * b[ 3]
03427                  + ((int128_t)a[ 4]) * b[ 2]
03428                  + ((int128_t)a[ 5]) * b[ 1]
03429                  + ((int128_t)a[ 6]) * b[ 0];
03430     int128_t t7   = ((int128_t)a[ 0]) * b[ 7]
03431                  + ((int128_t)a[ 1]) * b[ 6]
03432                  + ((int128_t)a[ 2]) * b[ 5]
03433                  + ((int128_t)a[ 3]) * b[ 4]
03434                  + ((int128_t)a[ 4]) * b[ 3]
03435                  + ((int128_t)a[ 5]) * b[ 2]
03436                  + ((int128_t)a[ 6]) * b[ 1]
03437                  + ((int128_t)a[ 7]) * b[ 0];
03438     int128_t t8   = ((int128_t)a[ 0]) * b[ 8]
03439                  + ((int128_t)a[ 1]) * b[ 7]
03440                  + ((int128_t)a[ 2]) * b[ 6]
03441                  + ((int128_t)a[ 3]) * b[ 5]
03442                  + ((int128_t)a[ 4]) * b[ 4]
03443                  + ((int128_t)a[ 5]) * b[ 3]
03444                  + ((int128_t)a[ 6]) * b[ 2]
03445                  + ((int128_t)a[ 7]) * b[ 1]
03446                  + ((int128_t)a[ 8]) * b[ 0];
03447     int128_t t9   = ((int128_t)a[ 1]) * b[ 8]
03448                  + ((int128_t)a[ 2]) * b[ 7]
03449                  + ((int128_t)a[ 3]) * b[ 6]
03450                  + ((int128_t)a[ 4]) * b[ 5]
03451                  + ((int128_t)a[ 5]) * b[ 4]
03452                  + ((int128_t)a[ 6]) * b[ 3]
03453                  + ((int128_t)a[ 7]) * b[ 2]
03454                  + ((int128_t)a[ 8]) * b[ 1];
03455     int128_t t10  = ((int128_t)a[ 2]) * b[ 8]
03456                  + ((int128_t)a[ 3]) * b[ 7]
03457                  + ((int128_t)a[ 4]) * b[ 6]
03458                  + ((int128_t)a[ 5]) * b[ 5]
03459                  + ((int128_t)a[ 6]) * b[ 4]
03460                  + ((int128_t)a[ 7]) * b[ 3]
03461                  + ((int128_t)a[ 8]) * b[ 2];
03462     int128_t t11  = ((int128_t)a[ 3]) * b[ 8]
03463                  + ((int128_t)a[ 4]) * b[ 7]
03464                  + ((int128_t)a[ 5]) * b[ 6]
03465                  + ((int128_t)a[ 6]) * b[ 5]
03466                  + ((int128_t)a[ 7]) * b[ 4]
03467                  + ((int128_t)a[ 8]) * b[ 3];
03468     int128_t t12  = ((int128_t)a[ 4]) * b[ 8]
03469                  + ((int128_t)a[ 5]) * b[ 7]
03470                  + ((int128_t)a[ 6]) * b[ 6]
03471                  + ((int128_t)a[ 7]) * b[ 5]
03472                  + ((int128_t)a[ 8]) * b[ 4];
03473     int128_t t13  = ((int128_t)a[ 5]) * b[ 8]
03474                  + ((int128_t)a[ 6]) * b[ 7]
03475                  + ((int128_t)a[ 7]) * b[ 6]
03476                  + ((int128_t)a[ 8]) * b[ 5];
03477     int128_t t14  = ((int128_t)a[ 6]) * b[ 8]
03478                  + ((int128_t)a[ 7]) * b[ 7]
03479                  + ((int128_t)a[ 8]) * b[ 6];
03480     int128_t t15  = ((int128_t)a[ 7]) * b[ 8]
03481                  + ((int128_t)a[ 8]) * b[ 7];
03482     int128_t t16  = ((int128_t)a[ 8]) * b[ 8];
03483 
03484     t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
03485     t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
03486     t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
03487     t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
03488     t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
03489     t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
03490     t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
03491     t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
03492     t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
03493     t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
03494     t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
03495     t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
03496     t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
03497     t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
03498     t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
03499     t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
03500     r[17] = (sp_digit)(t16 >> 57);
03501                        r[16] = t16 & 0x1ffffffffffffffl;
03502 }
03503 
03504 /* Square a and put result in r. (r = a * a)
03505  *
03506  * r  A single precision integer.
03507  * a  A single precision integer.
03508  */
03509 SP_NOINLINE static void sp_3072_sqr_9(sp_digit* r, const sp_digit* a)
03510 {
03511     int128_t t0   =  ((int128_t)a[ 0]) * a[ 0];
03512     int128_t t1   = (((int128_t)a[ 0]) * a[ 1]) * 2;
03513     int128_t t2   = (((int128_t)a[ 0]) * a[ 2]) * 2
03514                  +  ((int128_t)a[ 1]) * a[ 1];
03515     int128_t t3   = (((int128_t)a[ 0]) * a[ 3]
03516                  +  ((int128_t)a[ 1]) * a[ 2]) * 2;
03517     int128_t t4   = (((int128_t)a[ 0]) * a[ 4]
03518                  +  ((int128_t)a[ 1]) * a[ 3]) * 2
03519                  +  ((int128_t)a[ 2]) * a[ 2];
03520     int128_t t5   = (((int128_t)a[ 0]) * a[ 5]
03521                  +  ((int128_t)a[ 1]) * a[ 4]
03522                  +  ((int128_t)a[ 2]) * a[ 3]) * 2;
03523     int128_t t6   = (((int128_t)a[ 0]) * a[ 6]
03524                  +  ((int128_t)a[ 1]) * a[ 5]
03525                  +  ((int128_t)a[ 2]) * a[ 4]) * 2
03526                  +  ((int128_t)a[ 3]) * a[ 3];
03527     int128_t t7   = (((int128_t)a[ 0]) * a[ 7]
03528                  +  ((int128_t)a[ 1]) * a[ 6]
03529                  +  ((int128_t)a[ 2]) * a[ 5]
03530                  +  ((int128_t)a[ 3]) * a[ 4]) * 2;
03531     int128_t t8   = (((int128_t)a[ 0]) * a[ 8]
03532                  +  ((int128_t)a[ 1]) * a[ 7]
03533                  +  ((int128_t)a[ 2]) * a[ 6]
03534                  +  ((int128_t)a[ 3]) * a[ 5]) * 2
03535                  +  ((int128_t)a[ 4]) * a[ 4];
03536     int128_t t9   = (((int128_t)a[ 1]) * a[ 8]
03537                  +  ((int128_t)a[ 2]) * a[ 7]
03538                  +  ((int128_t)a[ 3]) * a[ 6]
03539                  +  ((int128_t)a[ 4]) * a[ 5]) * 2;
03540     int128_t t10  = (((int128_t)a[ 2]) * a[ 8]
03541                  +  ((int128_t)a[ 3]) * a[ 7]
03542                  +  ((int128_t)a[ 4]) * a[ 6]) * 2
03543                  +  ((int128_t)a[ 5]) * a[ 5];
03544     int128_t t11  = (((int128_t)a[ 3]) * a[ 8]
03545                  +  ((int128_t)a[ 4]) * a[ 7]
03546                  +  ((int128_t)a[ 5]) * a[ 6]) * 2;
03547     int128_t t12  = (((int128_t)a[ 4]) * a[ 8]
03548                  +  ((int128_t)a[ 5]) * a[ 7]) * 2
03549                  +  ((int128_t)a[ 6]) * a[ 6];
03550     int128_t t13  = (((int128_t)a[ 5]) * a[ 8]
03551                  +  ((int128_t)a[ 6]) * a[ 7]) * 2;
03552     int128_t t14  = (((int128_t)a[ 6]) * a[ 8]) * 2
03553                  +  ((int128_t)a[ 7]) * a[ 7];
03554     int128_t t15  = (((int128_t)a[ 7]) * a[ 8]) * 2;
03555     int128_t t16  =  ((int128_t)a[ 8]) * a[ 8];
03556 
03557     t1   += t0  >> 57; r[ 0] = t0  & 0x1ffffffffffffffl;
03558     t2   += t1  >> 57; r[ 1] = t1  & 0x1ffffffffffffffl;
03559     t3   += t2  >> 57; r[ 2] = t2  & 0x1ffffffffffffffl;
03560     t4   += t3  >> 57; r[ 3] = t3  & 0x1ffffffffffffffl;
03561     t5   += t4  >> 57; r[ 4] = t4  & 0x1ffffffffffffffl;
03562     t6   += t5  >> 57; r[ 5] = t5  & 0x1ffffffffffffffl;
03563     t7   += t6  >> 57; r[ 6] = t6  & 0x1ffffffffffffffl;
03564     t8   += t7  >> 57; r[ 7] = t7  & 0x1ffffffffffffffl;
03565     t9   += t8  >> 57; r[ 8] = t8  & 0x1ffffffffffffffl;
03566     t10  += t9  >> 57; r[ 9] = t9  & 0x1ffffffffffffffl;
03567     t11  += t10 >> 57; r[10] = t10 & 0x1ffffffffffffffl;
03568     t12  += t11 >> 57; r[11] = t11 & 0x1ffffffffffffffl;
03569     t13  += t12 >> 57; r[12] = t12 & 0x1ffffffffffffffl;
03570     t14  += t13 >> 57; r[13] = t13 & 0x1ffffffffffffffl;
03571     t15  += t14 >> 57; r[14] = t14 & 0x1ffffffffffffffl;
03572     t16  += t15 >> 57; r[15] = t15 & 0x1ffffffffffffffl;
03573     r[17] = (sp_digit)(t16 >> 57);
03574                        r[16] = t16 & 0x1ffffffffffffffl;
03575 }
03576 
03577 /* Add b to a into r. (r = a + b)
03578  *
03579  * r  A single precision integer.
03580  * a  A single precision integer.
03581  * b  A single precision integer.
03582  */
03583 SP_NOINLINE static int sp_3072_add_9(sp_digit* r, const sp_digit* a,
03584         const sp_digit* b)
03585 {
03586     r[ 0] = a[ 0] + b[ 0];
03587     r[ 1] = a[ 1] + b[ 1];
03588     r[ 2] = a[ 2] + b[ 2];
03589     r[ 3] = a[ 3] + b[ 3];
03590     r[ 4] = a[ 4] + b[ 4];
03591     r[ 5] = a[ 5] + b[ 5];
03592     r[ 6] = a[ 6] + b[ 6];
03593     r[ 7] = a[ 7] + b[ 7];
03594     r[ 8] = a[ 8] + b[ 8];
03595 
03596     return 0;
03597 }
03598 
03599 /* Add b to a into r. (r = a + b)
03600  *
03601  * r  A single precision integer.
03602  * a  A single precision integer.
03603  * b  A single precision integer.
03604  */
03605 SP_NOINLINE static int sp_3072_add_18(sp_digit* r, const sp_digit* a,
03606         const sp_digit* b)
03607 {
03608     int i;
03609 
03610     for (i = 0; i < 16; i += 8) {
03611         r[i + 0] = a[i + 0] + b[i + 0];
03612         r[i + 1] = a[i + 1] + b[i + 1];
03613         r[i + 2] = a[i + 2] + b[i + 2];
03614         r[i + 3] = a[i + 3] + b[i + 3];
03615         r[i + 4] = a[i + 4] + b[i + 4];
03616         r[i + 5] = a[i + 5] + b[i + 5];
03617         r[i + 6] = a[i + 6] + b[i + 6];
03618         r[i + 7] = a[i + 7] + b[i + 7];
03619     }
03620     r[16] = a[16] + b[16];
03621     r[17] = a[17] + b[17];
03622 
03623     return 0;
03624 }
03625 
03626 /* Sub b from a into r. (r = a - b)
03627  *
03628  * r  A single precision integer.
03629  * a  A single precision integer.
03630  * b  A single precision integer.
03631  */
03632 SP_NOINLINE static int sp_3072_sub_18(sp_digit* r, const sp_digit* a,
03633         const sp_digit* b)
03634 {
03635     int i;
03636 
03637     for (i = 0; i < 16; i += 8) {
03638         r[i + 0] = a[i + 0] - b[i + 0];
03639         r[i + 1] = a[i + 1] - b[i + 1];
03640         r[i + 2] = a[i + 2] - b[i + 2];
03641         r[i + 3] = a[i + 3] - b[i + 3];
03642         r[i + 4] = a[i + 4] - b[i + 4];
03643         r[i + 5] = a[i + 5] - b[i + 5];
03644         r[i + 6] = a[i + 6] - b[i + 6];
03645         r[i + 7] = a[i + 7] - b[i + 7];
03646     }
03647     r[16] = a[16] - b[16];
03648     r[17] = a[17] - b[17];
03649 
03650     return 0;
03651 }
03652 
03653 /* Multiply a and b into r. (r = a * b)
03654  *
03655  * r  A single precision integer.
03656  * a  A single precision integer.
03657  * b  A single precision integer.
03658  */
03659 SP_NOINLINE static void sp_3072_mul_18(sp_digit* r, const sp_digit* a,
03660     const sp_digit* b)
03661 {
03662     sp_digit* z0 = r;
03663     sp_digit z1[18];
03664     sp_digit* a1 = z1;
03665     sp_digit b1[9];
03666     sp_digit* z2 = r + 18;
03667     sp_3072_add_9(a1, a, &a[9]);
03668     sp_3072_add_9(b1, b, &b[9]);
03669     sp_3072_mul_9(z2, &a[9], &b[9]);
03670     sp_3072_mul_9(z0, a, b);
03671     sp_3072_mul_9(z1, a1, b1);
03672     sp_3072_sub_18(z1, z1, z2);
03673     sp_3072_sub_18(z1, z1, z0);
03674     sp_3072_add_18(r + 9, r + 9, z1);
03675 }
03676 
03677 /* Square a and put result in r. (r = a * a)
03678  *
03679  * r  A single precision integer.
03680  * a  A single precision integer.
03681  */
03682 SP_NOINLINE static void sp_3072_sqr_18(sp_digit* r, const sp_digit* a)
03683 {
03684     sp_digit* z0 = r;
03685     sp_digit z1[18];
03686     sp_digit* a1 = z1;
03687     sp_digit* z2 = r + 18;
03688     sp_3072_add_9(a1, a, &a[9]);
03689     sp_3072_sqr_9(z2, &a[9]);
03690     sp_3072_sqr_9(z0, a);
03691     sp_3072_sqr_9(z1, a1);
03692     sp_3072_sub_18(z1, z1, z2);
03693     sp_3072_sub_18(z1, z1, z0);
03694     sp_3072_add_18(r + 9, r + 9, z1);
03695 }
03696 
03697 /* Sub b from a into r. (r = a - b)
03698  *
03699  * r  A single precision integer.
03700  * a  A single precision integer.
03701  * b  A single precision integer.
03702  */
03703 SP_NOINLINE static int sp_3072_sub_36(sp_digit* r, const sp_digit* a,
03704         const sp_digit* b)
03705 {
03706     int i;
03707 
03708     for (i = 0; i < 32; i += 8) {
03709         r[i + 0] = a[i + 0] - b[i + 0];
03710         r[i + 1] = a[i + 1] - b[i + 1];
03711         r[i + 2] = a[i + 2] - b[i + 2];
03712         r[i + 3] = a[i + 3] - b[i + 3];
03713         r[i + 4] = a[i + 4] - b[i + 4];
03714         r[i + 5] = a[i + 5] - b[i + 5];
03715         r[i + 6] = a[i + 6] - b[i + 6];
03716         r[i + 7] = a[i + 7] - b[i + 7];
03717     }
03718     r[32] = a[32] - b[32];
03719     r[33] = a[33] - b[33];
03720     r[34] = a[34] - b[34];
03721     r[35] = a[35] - b[35];
03722 
03723     return 0;
03724 }
03725 
03726 /* Add b to a into r. (r = a + b)
03727  *
03728  * r  A single precision integer.
03729  * a  A single precision integer.
03730  * b  A single precision integer.
03731  */
03732 SP_NOINLINE static int sp_3072_add_36(sp_digit* r, const sp_digit* a,
03733         const sp_digit* b)
03734 {
03735     int i;
03736 
03737     for (i = 0; i < 32; i += 8) {
03738         r[i + 0] = a[i + 0] + b[i + 0];
03739         r[i + 1] = a[i + 1] + b[i + 1];
03740         r[i + 2] = a[i + 2] + b[i + 2];
03741         r[i + 3] = a[i + 3] + b[i + 3];
03742         r[i + 4] = a[i + 4] + b[i + 4];
03743         r[i + 5] = a[i + 5] + b[i + 5];
03744         r[i + 6] = a[i + 6] + b[i + 6];
03745         r[i + 7] = a[i + 7] + b[i + 7];
03746     }
03747     r[32] = a[32] + b[32];
03748     r[33] = a[33] + b[33];
03749     r[34] = a[34] + b[34];
03750     r[35] = a[35] + b[35];
03751 
03752     return 0;
03753 }
03754 
03755 /* Multiply a and b into r. (r = a * b)
03756  *
03757  * r  A single precision integer.
03758  * a  A single precision integer.
03759  * b  A single precision integer.
03760  */
03761 SP_NOINLINE static void sp_3072_mul_54(sp_digit* r, const sp_digit* a,
03762     const sp_digit* b)
03763 {
03764     sp_digit p0[36];
03765     sp_digit p1[36];
03766     sp_digit p2[36];
03767     sp_digit p3[36];
03768     sp_digit p4[36];
03769     sp_digit p5[36];
03770     sp_digit t0[36];
03771     sp_digit t1[36];
03772     sp_digit t2[36];
03773     sp_digit a0[18];
03774     sp_digit a1[18];
03775     sp_digit a2[18];
03776     sp_digit b0[18];
03777     sp_digit b1[18];
03778     sp_digit b2[18];
03779     sp_3072_add_18(a0, a, &a[18]);
03780     sp_3072_add_18(b0, b, &b[18]);
03781     sp_3072_add_18(a1, &a[18], &a[36]);
03782     sp_3072_add_18(b1, &b[18], &b[36]);
03783     sp_3072_add_18(a2, a0, &a[36]);
03784     sp_3072_add_18(b2, b0, &b[36]);
03785     sp_3072_mul_18(p0, a, b);
03786     sp_3072_mul_18(p2, &a[18], &b[18]);
03787     sp_3072_mul_18(p4, &a[36], &b[36]);
03788     sp_3072_mul_18(p1, a0, b0);
03789     sp_3072_mul_18(p3, a1, b1);
03790     sp_3072_mul_18(p5, a2, b2);
03791     XMEMSET(r, 0, sizeof(*r)*2*54);
03792     sp_3072_sub_36(t0, p3, p2);
03793     sp_3072_sub_36(t1, p1, p2);
03794     sp_3072_sub_36(t2, p5, t0);
03795     sp_3072_sub_36(t2, t2, t1);
03796     sp_3072_sub_36(t0, t0, p4);
03797     sp_3072_sub_36(t1, t1, p0);
03798     sp_3072_add_36(r, r, p0);
03799     sp_3072_add_36(&r[18], &r[18], t1);
03800     sp_3072_add_36(&r[36], &r[36], t2);
03801     sp_3072_add_36(&r[54], &r[54], t0);
03802     sp_3072_add_36(&r[72], &r[72], p4);
03803 }
03804 
03805 /* Square a into r. (r = a * a)
03806  *
03807  * r  A single precision integer.
03808  * a  A single precision integer.
03809  */
03810 SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a)
03811 {
03812     sp_digit p0[36];
03813     sp_digit p1[36];
03814     sp_digit p2[36];
03815     sp_digit p3[36];
03816     sp_digit p4[36];
03817     sp_digit p5[36];
03818     sp_digit t0[36];
03819     sp_digit t1[36];
03820     sp_digit t2[36];
03821     sp_digit a0[18];
03822     sp_digit a1[18];
03823     sp_digit a2[18];
03824     sp_3072_add_18(a0, a, &a[18]);
03825     sp_3072_add_18(a1, &a[18], &a[36]);
03826     sp_3072_add_18(a2, a0, &a[36]);
03827     sp_3072_sqr_18(p0, a);
03828     sp_3072_sqr_18(p2, &a[18]);
03829     sp_3072_sqr_18(p4, &a[36]);
03830     sp_3072_sqr_18(p1, a0);
03831     sp_3072_sqr_18(p3, a1);
03832     sp_3072_sqr_18(p5, a2);
03833     XMEMSET(r, 0, sizeof(*r)*2*54);
03834     sp_3072_sub_36(t0, p3, p2);
03835     sp_3072_sub_36(t1, p1, p2);
03836     sp_3072_sub_36(t2, p5, t0);
03837     sp_3072_sub_36(t2, t2, t1);
03838     sp_3072_sub_36(t0, t0, p4);
03839     sp_3072_sub_36(t1, t1, p0);
03840     sp_3072_add_36(r, r, p0);
03841     sp_3072_add_36(&r[18], &r[18], t1);
03842     sp_3072_add_36(&r[36], &r[36], t2);
03843     sp_3072_add_36(&r[54], &r[54], t0);
03844     sp_3072_add_36(&r[72], &r[72], p4);
03845 }
03846 
03847 #endif /* WOLFSSL_SP_SMALL */
03848 #ifdef WOLFSSL_SP_SMALL
03849 /* Add b to a into r. (r = a + b)
03850  *
03851  * r  A single precision integer.
03852  * a  A single precision integer.
03853  * b  A single precision integer.
03854  */
03855 SP_NOINLINE static int sp_3072_add_54(sp_digit* r, const sp_digit* a,
03856         const sp_digit* b)
03857 {
03858     int i;
03859 
03860     for (i = 0; i < 54; i++)
03861         r[i] = a[i] + b[i];
03862 
03863     return 0;
03864 }
03865 #else
03866 /* Add b to a into r. (r = a + b)
03867  *
03868  * r  A single precision integer.
03869  * a  A single precision integer.
03870  * b  A single precision integer.
03871  */
03872 SP_NOINLINE static int sp_3072_add_54(sp_digit* r, const sp_digit* a,
03873         const sp_digit* b)
03874 {
03875     int i;
03876 
03877     for (i = 0; i < 48; i += 8) {
03878         r[i + 0] = a[i + 0] + b[i + 0];
03879         r[i + 1] = a[i + 1] + b[i + 1];
03880         r[i + 2] = a[i + 2] + b[i + 2];
03881         r[i + 3] = a[i + 3] + b[i + 3];
03882         r[i + 4] = a[i + 4] + b[i + 4];
03883         r[i + 5] = a[i + 5] + b[i + 5];
03884         r[i + 6] = a[i + 6] + b[i + 6];
03885         r[i + 7] = a[i + 7] + b[i + 7];
03886     }
03887     r[48] = a[48] + b[48];
03888     r[49] = a[49] + b[49];
03889     r[50] = a[50] + b[50];
03890     r[51] = a[51] + b[51];
03891     r[52] = a[52] + b[52];
03892     r[53] = a[53] + b[53];
03893 
03894     return 0;
03895 }
03896 
03897 #endif /* WOLFSSL_SP_SMALL */
03898 #ifdef WOLFSSL_SP_SMALL
03899 /* Sub b from a into r. (r = a - b)
03900  *
03901  * r  A single precision integer.
03902  * a  A single precision integer.
03903  * b  A single precision integer.
03904  */
03905 SP_NOINLINE static int sp_3072_sub_54(sp_digit* r, const sp_digit* a,
03906         const sp_digit* b)
03907 {
03908     int i;
03909 
03910     for (i = 0; i < 54; i++)
03911         r[i] = a[i] - b[i];
03912 
03913     return 0;
03914 }
03915 
03916 #else
03917 /* Sub b from a into r. (r = a - b)
03918  *
03919  * r  A single precision integer.
03920  * a  A single precision integer.
03921  * b  A single precision integer.
03922  */
03923 SP_NOINLINE static int sp_3072_sub_54(sp_digit* r, const sp_digit* a,
03924         const sp_digit* b)
03925 {
03926     int i;
03927 
03928     for (i = 0; i < 48; i += 8) {
03929         r[i + 0] = a[i + 0] - b[i + 0];
03930         r[i + 1] = a[i + 1] - b[i + 1];
03931         r[i + 2] = a[i + 2] - b[i + 2];
03932         r[i + 3] = a[i + 3] - b[i + 3];
03933         r[i + 4] = a[i + 4] - b[i + 4];
03934         r[i + 5] = a[i + 5] - b[i + 5];
03935         r[i + 6] = a[i + 6] - b[i + 6];
03936         r[i + 7] = a[i + 7] - b[i + 7];
03937     }
03938     r[48] = a[48] - b[48];
03939     r[49] = a[49] - b[49];
03940     r[50] = a[50] - b[50];
03941     r[51] = a[51] - b[51];
03942     r[52] = a[52] - b[52];
03943     r[53] = a[53] - b[53];
03944 
03945     return 0;
03946 }
03947 
03948 #endif /* WOLFSSL_SP_SMALL */
03949 #ifdef WOLFSSL_SP_SMALL
03950 /* Multiply a and b into r. (r = a * b)
03951  *
03952  * r  A single precision integer.
03953  * a  A single precision integer.
03954  * b  A single precision integer.
03955  */
03956 SP_NOINLINE static void sp_3072_mul_54(sp_digit* r, const sp_digit* a,
03957     const sp_digit* b)
03958 {
03959     int i, j, k;
03960     int128_t c;
03961 
03962     c = ((int128_t)a[53]) * b[53];
03963     r[107] = (sp_digit)(c >> 57);
03964     c = (c & 0x1ffffffffffffffl) << 57;
03965     for (k = 105; k >= 0; k--) {
03966         for (i = 53; i >= 0; i--) {
03967             j = k - i;
03968             if (j >= 54)
03969                 break;
03970             if (j < 0)
03971                 continue;
03972 
03973             c += ((int128_t)a[i]) * b[j];
03974         }
03975         r[k + 2] += c >> 114;
03976         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
03977         c = (c & 0x1ffffffffffffffl) << 57;
03978     }
03979     r[0] = (sp_digit)(c >> 57);
03980 }
03981 
03982 /* Square a and put result in r. (r = a * a)
03983  *
03984  * r  A single precision integer.
03985  * a  A single precision integer.
03986  */
03987 SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a)
03988 {
03989     int i, j, k;
03990     int128_t c;
03991 
03992     c = ((int128_t)a[53]) * a[53];
03993     r[107] = (sp_digit)(c >> 57);
03994     c = (c & 0x1ffffffffffffffl) << 57;
03995     for (k = 105; k >= 0; k--) {
03996         for (i = 53; i >= 0; i--) {
03997             j = k - i;
03998             if (j >= 54 || i <= j)
03999                 break;
04000             if (j < 0)
04001                 continue;
04002 
04003             c += ((int128_t)a[i]) * a[j] * 2;
04004         }
04005         if (i == j)
04006            c += ((int128_t)a[i]) * a[i];
04007 
04008         r[k + 2] += c >> 114;
04009         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
04010         c = (c & 0x1ffffffffffffffl) << 57;
04011     }
04012     r[0] = (sp_digit)(c >> 57);
04013 }
04014 
04015 #endif /* WOLFSSL_SP_SMALL */
04016 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
04017 #ifdef WOLFSSL_SP_SMALL
04018 /* Add b to a into r. (r = a + b)
04019  *
04020  * r  A single precision integer.
04021  * a  A single precision integer.
04022  * b  A single precision integer.
04023  */
04024 SP_NOINLINE static int sp_3072_add_27(sp_digit* r, const sp_digit* a,
04025         const sp_digit* b)
04026 {
04027     int i;
04028 
04029     for (i = 0; i < 27; i++)
04030         r[i] = a[i] + b[i];
04031 
04032     return 0;
04033 }
04034 #else
04035 /* Add b to a into r. (r = a + b)
04036  *
04037  * r  A single precision integer.
04038  * a  A single precision integer.
04039  * b  A single precision integer.
04040  */
04041 SP_NOINLINE static int sp_3072_add_27(sp_digit* r, const sp_digit* a,
04042         const sp_digit* b)
04043 {
04044     int i;
04045 
04046     for (i = 0; i < 24; i += 8) {
04047         r[i + 0] = a[i + 0] + b[i + 0];
04048         r[i + 1] = a[i + 1] + b[i + 1];
04049         r[i + 2] = a[i + 2] + b[i + 2];
04050         r[i + 3] = a[i + 3] + b[i + 3];
04051         r[i + 4] = a[i + 4] + b[i + 4];
04052         r[i + 5] = a[i + 5] + b[i + 5];
04053         r[i + 6] = a[i + 6] + b[i + 6];
04054         r[i + 7] = a[i + 7] + b[i + 7];
04055     }
04056     r[24] = a[24] + b[24];
04057     r[25] = a[25] + b[25];
04058     r[26] = a[26] + b[26];
04059 
04060     return 0;
04061 }
04062 
04063 #endif /* WOLFSSL_SP_SMALL */
04064 #ifdef WOLFSSL_SP_SMALL
04065 /* Sub b from a into r. (r = a - b)
04066  *
04067  * r  A single precision integer.
04068  * a  A single precision integer.
04069  * b  A single precision integer.
04070  */
04071 SP_NOINLINE static int sp_3072_sub_27(sp_digit* r, const sp_digit* a,
04072         const sp_digit* b)
04073 {
04074     int i;
04075 
04076     for (i = 0; i < 27; i++)
04077         r[i] = a[i] - b[i];
04078 
04079     return 0;
04080 }
04081 
04082 #else
04083 /* Sub b from a into r. (r = a - b)
04084  *
04085  * r  A single precision integer.
04086  * a  A single precision integer.
04087  * b  A single precision integer.
04088  */
04089 SP_NOINLINE static int sp_3072_sub_27(sp_digit* r, const sp_digit* a,
04090         const sp_digit* b)
04091 {
04092     int i;
04093 
04094     for (i = 0; i < 24; i += 8) {
04095         r[i + 0] = a[i + 0] - b[i + 0];
04096         r[i + 1] = a[i + 1] - b[i + 1];
04097         r[i + 2] = a[i + 2] - b[i + 2];
04098         r[i + 3] = a[i + 3] - b[i + 3];
04099         r[i + 4] = a[i + 4] - b[i + 4];
04100         r[i + 5] = a[i + 5] - b[i + 5];
04101         r[i + 6] = a[i + 6] - b[i + 6];
04102         r[i + 7] = a[i + 7] - b[i + 7];
04103     }
04104     r[24] = a[24] - b[24];
04105     r[25] = a[25] - b[25];
04106     r[26] = a[26] - b[26];
04107 
04108     return 0;
04109 }
04110 
04111 #endif /* WOLFSSL_SP_SMALL */
04112 #ifdef WOLFSSL_SP_SMALL
04113 /* Multiply a and b into r. (r = a * b)
04114  *
04115  * r  A single precision integer.
04116  * a  A single precision integer.
04117  * b  A single precision integer.
04118  */
04119 SP_NOINLINE static void sp_3072_mul_27(sp_digit* r, const sp_digit* a,
04120     const sp_digit* b)
04121 {
04122     int i, j, k;
04123     int128_t c;
04124 
04125     c = ((int128_t)a[26]) * b[26];
04126     r[53] = (sp_digit)(c >> 57);
04127     c = (c & 0x1ffffffffffffffl) << 57;
04128     for (k = 51; k >= 0; k--) {
04129         for (i = 26; i >= 0; i--) {
04130             j = k - i;
04131             if (j >= 27)
04132                 break;
04133             if (j < 0)
04134                 continue;
04135 
04136             c += ((int128_t)a[i]) * b[j];
04137         }
04138         r[k + 2] += c >> 114;
04139         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
04140         c = (c & 0x1ffffffffffffffl) << 57;
04141     }
04142     r[0] = (sp_digit)(c >> 57);
04143 }
04144 
04145 #else
04146 /* Multiply a and b into r. (r = a * b)
04147  *
04148  * r  A single precision integer.
04149  * a  A single precision integer.
04150  * b  A single precision integer.
04151  */
04152 SP_NOINLINE static void sp_3072_mul_27(sp_digit* r, const sp_digit* a,
04153     const sp_digit* b)
04154 {
04155     int i, j;
04156     int128_t t[54];
04157 
04158     XMEMSET(t, 0, sizeof(t));
04159     for (i=0; i<27; i++) {
04160         for (j=0; j<27; j++)
04161             t[i+j] += ((int128_t)a[i]) * b[j];
04162     }
04163     for (i=0; i<53; i++) {
04164         r[i] = t[i] & 0x1ffffffffffffffl;
04165         t[i+1] += t[i] >> 57;
04166     }
04167     r[53] = (sp_digit)t[53];
04168 }
04169 
04170 #endif /* WOLFSSL_SP_SMALL */
04171 #ifdef WOLFSSL_SP_SMALL
04172 /* Square a and put result in r. (r = a * a)
04173  *
04174  * r  A single precision integer.
04175  * a  A single precision integer.
04176  */
04177 SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a)
04178 {
04179     int i, j, k;
04180     int128_t c;
04181 
04182     c = ((int128_t)a[26]) * a[26];
04183     r[53] = (sp_digit)(c >> 57);
04184     c = (c & 0x1ffffffffffffffl) << 57;
04185     for (k = 51; k >= 0; k--) {
04186         for (i = 26; i >= 0; i--) {
04187             j = k - i;
04188             if (j >= 27 || i <= j)
04189                 break;
04190             if (j < 0)
04191                 continue;
04192 
04193             c += ((int128_t)a[i]) * a[j] * 2;
04194         }
04195         if (i == j)
04196            c += ((int128_t)a[i]) * a[i];
04197 
04198         r[k + 2] += c >> 114;
04199         r[k + 1] = (c >> 57) & 0x1ffffffffffffffl;
04200         c = (c & 0x1ffffffffffffffl) << 57;
04201     }
04202     r[0] = (sp_digit)(c >> 57);
04203 }
04204 
04205 #else
04206 /* Square a and put result in r. (r = a * a)
04207  *
04208  * r  A single precision integer.
04209  * a  A single precision integer.
04210  */
04211 SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a)
04212 {
04213     int i, j;
04214     int128_t t[54];
04215 
04216     XMEMSET(t, 0, sizeof(t));
04217     for (i=0; i<27; i++) {
04218         for (j=0; j<i; j++)
04219             t[i+j] += (((int128_t)a[i]) * a[j]) * 2;
04220         t[i+i] += ((int128_t)a[i]) * a[i];
04221     }
04222     for (i=0; i<53; i++) {
04223         r[i] = t[i] & 0x1ffffffffffffffl;
04224         t[i+1] += t[i] >> 57;
04225     }
04226     r[53] = (sp_digit)t[53];
04227 }
04228 
04229 #endif /* WOLFSSL_SP_SMALL */
04230 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
04231 
04232 /* Caclulate the bottom digit of -1/a mod 2^n.
04233  *
04234  * a    A single precision number.
04235  * rho  Bottom word of inverse.
04236  */
04237 static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
04238 {
04239     sp_digit x, b;
04240 
04241     b = a[0];
04242     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
04243     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
04244     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
04245     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
04246     x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
04247     x &= 0x1ffffffffffffffl;
04248 
04249     /* rho = -1/m mod b */
04250     *rho = (1L << 57) - x;
04251 }
04252 
04253 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
04254 /* r = 2^n mod m where n is the number of bits to reduce by.
04255  * Given m must be 3072 bits, just need to subtract.
04256  *
04257  * r  A single precision number.
04258  * m  A signle precision number.
04259  */
04260 static void sp_3072_mont_norm_27(sp_digit* r, sp_digit* m)
04261 {
04262     /* Set r = 2^n - 1. */
04263 #ifdef WOLFSSL_SP_SMALL
04264     int i;
04265 
04266     for (i=0; i<26; i++)
04267         r[i] = 0x1ffffffffffffffl;
04268 #else
04269     int i;
04270 
04271     for (i = 0; i < 24; i += 8) {
04272         r[i + 0] = 0x1ffffffffffffffl;
04273         r[i + 1] = 0x1ffffffffffffffl;
04274         r[i + 2] = 0x1ffffffffffffffl;
04275         r[i + 3] = 0x1ffffffffffffffl;
04276         r[i + 4] = 0x1ffffffffffffffl;
04277         r[i + 5] = 0x1ffffffffffffffl;
04278         r[i + 6] = 0x1ffffffffffffffl;
04279         r[i + 7] = 0x1ffffffffffffffl;
04280     }
04281     r[24] = 0x1ffffffffffffffl;
04282     r[25] = 0x1ffffffffffffffl;
04283 #endif
04284     r[26] = 0x3fffffffffffffl;
04285 
04286     /* r = (2^n - 1) mod n */
04287     sp_3072_sub_27(r, r, m);
04288 
04289     /* Add one so r = 2^n mod m */
04290     r[0] += 1;
04291 }
04292 
04293 /* Compare a with b in constant time.
04294  *
04295  * a  A single precision integer.
04296  * b  A single precision integer.
04297  * return -ve, 0 or +ve if a is less than, equal to or greater than b
04298  * respectively.
04299  */
04300 static sp_digit sp_3072_cmp_27(const sp_digit* a, const sp_digit* b)
04301 {
04302     sp_digit r = 0;
04303 #ifdef WOLFSSL_SP_SMALL
04304     int i;
04305 
04306     for (i=26; i>=0; i--)
04307         r |= (a[i] - b[i]) & (0 - !r);
04308 #else
04309     int i;
04310 
04311     r |= (a[26] - b[26]) & (0 - !r);
04312     r |= (a[25] - b[25]) & (0 - !r);
04313     r |= (a[24] - b[24]) & (0 - !r);
04314     for (i = 16; i >= 0; i -= 8) {
04315         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
04316         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
04317         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
04318         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
04319         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
04320         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
04321         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
04322         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
04323     }
04324 #endif /* WOLFSSL_SP_SMALL */
04325 
04326     return r;
04327 }
04328 
04329 /* Conditionally subtract b from a using the mask m.
04330  * m is -1 to subtract and 0 when not.
04331  *
04332  * r  A single precision number representing condition subtract result.
04333  * a  A single precision number to subtract from.
04334  * b  A single precision number to subtract.
04335  * m  Mask value to apply.
04336  */
04337 static void sp_3072_cond_sub_27(sp_digit* r, const sp_digit* a,
04338         const sp_digit* b, const sp_digit m)
04339 {
04340 #ifdef WOLFSSL_SP_SMALL
04341     int i;
04342 
04343     for (i = 0; i < 27; i++)
04344         r[i] = a[i] - (b[i] & m);
04345 #else
04346     int i;
04347 
04348     for (i = 0; i < 24; i += 8) {
04349         r[i + 0] = a[i + 0] - (b[i + 0] & m);
04350         r[i + 1] = a[i + 1] - (b[i + 1] & m);
04351         r[i + 2] = a[i + 2] - (b[i + 2] & m);
04352         r[i + 3] = a[i + 3] - (b[i + 3] & m);
04353         r[i + 4] = a[i + 4] - (b[i + 4] & m);
04354         r[i + 5] = a[i + 5] - (b[i + 5] & m);
04355         r[i + 6] = a[i + 6] - (b[i + 6] & m);
04356         r[i + 7] = a[i + 7] - (b[i + 7] & m);
04357     }
04358     r[24] = a[24] - (b[24] & m);
04359     r[25] = a[25] - (b[25] & m);
04360     r[26] = a[26] - (b[26] & m);
04361 #endif /* WOLFSSL_SP_SMALL */
04362 }
04363 
04364 /* Mul a by scalar b and add into r. (r += a * b)
04365  *
04366  * r  A single precision integer.
04367  * a  A single precision integer.
04368  * b  A scalar.
04369  */
04370 SP_NOINLINE static void sp_3072_mul_add_27(sp_digit* r, const sp_digit* a,
04371         const sp_digit b)
04372 {
04373 #ifdef WOLFSSL_SP_SMALL
04374     int128_t tb = b;
04375     int128_t t = 0;
04376     int i;
04377 
04378     for (i = 0; i < 27; i++) {
04379         t += (tb * a[i]) + r[i];
04380         r[i] = t & 0x1ffffffffffffffl;
04381         t >>= 57;
04382     }
04383     r[27] += t;
04384 #else
04385     int128_t tb = b;
04386     int128_t t[8];
04387     int i;
04388 
04389     t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
04390     for (i = 0; i < 24; i += 8) {
04391         t[1] = tb * a[i+1];
04392         r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
04393         t[2] = tb * a[i+2];
04394         r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
04395         t[3] = tb * a[i+3];
04396         r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
04397         t[4] = tb * a[i+4];
04398         r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
04399         t[5] = tb * a[i+5];
04400         r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
04401         t[6] = tb * a[i+6];
04402         r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
04403         t[7] = tb * a[i+7];
04404         r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
04405         t[0] = tb * a[i+8];
04406         r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
04407     }
04408     t[1] = tb * a[25]; r[25] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
04409     t[2] = tb * a[26]; r[26] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
04410     r[27] +=  t[2] >> 57;
04411 #endif /* WOLFSSL_SP_SMALL */
04412 }
04413 
04414 /* Normalize the values in each word to 57.
04415  *
04416  * a  Array of sp_digit to normalize.
04417  */
04418 static void sp_3072_norm_27(sp_digit* a)
04419 {
04420 #ifdef WOLFSSL_SP_SMALL
04421     int i;
04422     for (i = 0; i < 26; i++) {
04423         a[i+1] += a[i] >> 57;
04424         a[i] &= 0x1ffffffffffffffl;
04425     }
04426 #else
04427     int i;
04428     for (i = 0; i < 24; i += 8) {
04429         a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
04430         a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
04431         a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
04432         a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
04433         a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
04434         a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
04435         a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
04436         a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
04437         a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
04438     }
04439     a[24+1] += a[24] >> 57;
04440     a[24] &= 0x1ffffffffffffffl;
04441     a[25+1] += a[25] >> 57;
04442     a[25] &= 0x1ffffffffffffffl;
04443 #endif
04444 }
04445 
04446 /* Shift the result in the high 1536 bits down to the bottom.
04447  *
04448  * r  A single precision number.
04449  * a  A single precision number.
04450  */
04451 static void sp_3072_mont_shift_27(sp_digit* r, const sp_digit* a)
04452 {
04453 #ifdef WOLFSSL_SP_SMALL
04454     int i;
04455     sp_digit n, s;
04456 
04457     s = a[27];
04458     n = a[26] >> 54;
04459     for (i = 0; i < 26; i++) {
04460         n += (s & 0x1ffffffffffffffl) << 3;
04461         r[i] = n & 0x1ffffffffffffffl;
04462         n >>= 57;
04463         s = a[28 + i] + (s >> 57);
04464     }
04465     n += s << 3;
04466     r[26] = n;
04467 #else
04468     sp_digit n, s;
04469     int i;
04470 
04471     s = a[27]; n = a[26] >> 54;
04472     for (i = 0; i < 24; i += 8) {
04473         n += (s & 0x1ffffffffffffffl) << 3; r[i+0] = n & 0x1ffffffffffffffl;
04474         n >>= 57; s = a[i+28] + (s >> 57);
04475         n += (s & 0x1ffffffffffffffl) << 3; r[i+1] = n & 0x1ffffffffffffffl;
04476         n >>= 57; s = a[i+29] + (s >> 57);
04477         n += (s & 0x1ffffffffffffffl) << 3; r[i+2] = n & 0x1ffffffffffffffl;
04478         n >>= 57; s = a[i+30] + (s >> 57);
04479         n += (s & 0x1ffffffffffffffl) << 3; r[i+3] = n & 0x1ffffffffffffffl;
04480         n >>= 57; s = a[i+31] + (s >> 57);
04481         n += (s & 0x1ffffffffffffffl) << 3; r[i+4] = n & 0x1ffffffffffffffl;
04482         n >>= 57; s = a[i+32] + (s >> 57);
04483         n += (s & 0x1ffffffffffffffl) << 3; r[i+5] = n & 0x1ffffffffffffffl;
04484         n >>= 57; s = a[i+33] + (s >> 57);
04485         n += (s & 0x1ffffffffffffffl) << 3; r[i+6] = n & 0x1ffffffffffffffl;
04486         n >>= 57; s = a[i+34] + (s >> 57);
04487         n += (s & 0x1ffffffffffffffl) << 3; r[i+7] = n & 0x1ffffffffffffffl;
04488         n >>= 57; s = a[i+35] + (s >> 57);
04489     }
04490     n += (s & 0x1ffffffffffffffl) << 3; r[24] = n & 0x1ffffffffffffffl;
04491     n >>= 57; s = a[52] + (s >> 57);
04492     n += (s & 0x1ffffffffffffffl) << 3; r[25] = n & 0x1ffffffffffffffl;
04493     n >>= 57; s = a[53] + (s >> 57);
04494     n += s << 3;              r[26] = n;
04495 #endif /* WOLFSSL_SP_SMALL */
04496     XMEMSET(&r[27], 0, sizeof(*r) * 27);
04497 }
04498 
04499 /* Reduce the number back to 3072 bits using Montgomery reduction.
04500  *
04501  * a   A single precision number to reduce in place.
04502  * m   The single precision number representing the modulus.
04503  * mp  The digit representing the negative inverse of m mod 2^n.
04504  */
04505 static void sp_3072_mont_reduce_27(sp_digit* a, sp_digit* m, sp_digit mp)
04506 {
04507     int i;
04508     sp_digit mu;
04509 
04510     for (i=0; i<26; i++) {
04511         mu = (a[i] * mp) & 0x1ffffffffffffffl;
04512         sp_3072_mul_add_27(a+i, m, mu);
04513         a[i+1] += a[i] >> 57;
04514     }
04515     mu = (a[i] * mp) & 0x3fffffffffffffl;
04516     sp_3072_mul_add_27(a+i, m, mu);
04517     a[i+1] += a[i] >> 57;
04518     a[i] &= 0x1ffffffffffffffl;
04519 
04520     sp_3072_mont_shift_27(a, a);
04521     sp_3072_cond_sub_27(a, a, m, 0 - ((a[26] >> 54) > 0));
04522     sp_3072_norm_27(a);
04523 }
04524 
04525 /* Multiply two Montogmery form numbers mod the modulus (prime).
04526  * (r = a * b mod m)
04527  *
04528  * r   Result of multiplication.
04529  * a   First number to multiply in Montogmery form.
04530  * b   Second number to multiply in Montogmery form.
04531  * m   Modulus (prime).
04532  * mp  Montogmery mulitplier.
04533  */
04534 static void sp_3072_mont_mul_27(sp_digit* r, sp_digit* a, sp_digit* b,
04535         sp_digit* m, sp_digit mp)
04536 {
04537     sp_3072_mul_27(r, a, b);
04538     sp_3072_mont_reduce_27(r, m, mp);
04539 }
04540 
04541 /* Square the Montgomery form number. (r = a * a mod m)
04542  *
04543  * r   Result of squaring.
04544  * a   Number to square in Montogmery form.
04545  * m   Modulus (prime).
04546  * mp  Montogmery mulitplier.
04547  */
04548 static void sp_3072_mont_sqr_27(sp_digit* r, sp_digit* a, sp_digit* m,
04549         sp_digit mp)
04550 {
04551     sp_3072_sqr_27(r, a);
04552     sp_3072_mont_reduce_27(r, m, mp);
04553 }
04554 
04555 /* Multiply a by scalar b into r. (r = a * b)
04556  *
04557  * r  A single precision integer.
04558  * a  A single precision integer.
04559  * b  A scalar.
04560  */
04561 SP_NOINLINE static void sp_3072_mul_d_27(sp_digit* r, const sp_digit* a,
04562     const sp_digit b)
04563 {
04564 #ifdef WOLFSSL_SP_SMALL
04565     int128_t tb = b;
04566     int128_t t = 0;
04567     int i;
04568 
04569     for (i = 0; i < 27; i++) {
04570         t += tb * a[i];
04571         r[i] = t & 0x1ffffffffffffffl;
04572         t >>= 57;
04573     }
04574     r[27] = (sp_digit)t;
04575 #else
04576     int128_t tb = b;
04577     int128_t t[8];
04578     int i;
04579 
04580     t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
04581     for (i = 0; i < 24; i += 8) {
04582         t[1] = tb * a[i+1];
04583         r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
04584         t[2] = tb * a[i+2];
04585         r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
04586         t[3] = tb * a[i+3];
04587         r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
04588         t[4] = tb * a[i+4];
04589         r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
04590         t[5] = tb * a[i+5];
04591         r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
04592         t[6] = tb * a[i+6];
04593         r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
04594         t[7] = tb * a[i+7];
04595         r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
04596         t[0] = tb * a[i+8];
04597         r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
04598     }
04599     t[1] = tb * a[25];
04600     r[25] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
04601     t[2] = tb * a[26];
04602     r[26] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
04603     r[27] =  (sp_digit)(t[2] >> 57);
04604 #endif /* WOLFSSL_SP_SMALL */
04605 }
04606 
04607 /* Conditionally add a and b using the mask m.
04608  * m is -1 to add and 0 when not.
04609  *
04610  * r  A single precision number representing conditional add result.
04611  * a  A single precision number to add with.
04612  * b  A single precision number to add.
04613  * m  Mask value to apply.
04614  */
04615 static void sp_3072_cond_add_27(sp_digit* r, const sp_digit* a,
04616         const sp_digit* b, const sp_digit m)
04617 {
04618 #ifdef WOLFSSL_SP_SMALL
04619     int i;
04620 
04621     for (i = 0; i < 27; i++)
04622         r[i] = a[i] + (b[i] & m);
04623 #else
04624     int i;
04625 
04626     for (i = 0; i < 24; i += 8) {
04627         r[i + 0] = a[i + 0] + (b[i + 0] & m);
04628         r[i + 1] = a[i + 1] + (b[i + 1] & m);
04629         r[i + 2] = a[i + 2] + (b[i + 2] & m);
04630         r[i + 3] = a[i + 3] + (b[i + 3] & m);
04631         r[i + 4] = a[i + 4] + (b[i + 4] & m);
04632         r[i + 5] = a[i + 5] + (b[i + 5] & m);
04633         r[i + 6] = a[i + 6] + (b[i + 6] & m);
04634         r[i + 7] = a[i + 7] + (b[i + 7] & m);
04635     }
04636     r[24] = a[24] + (b[24] & m);
04637     r[25] = a[25] + (b[25] & m);
04638     r[26] = a[26] + (b[26] & m);
04639 #endif /* WOLFSSL_SP_SMALL */
04640 }
04641 
04642 /* Divide d in a and put remainder into r (m*d + r = a)
04643  * m is not calculated as it is not needed at this time.
04644  *
04645  * a  Nmber to be divided.
04646  * d  Number to divide with.
04647  * m  Multiplier result.
04648  * r  Remainder from the division.
04649  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
04650  */
04651 static int sp_3072_div_27(sp_digit* a, sp_digit* d, sp_digit* m,
04652         sp_digit* r)
04653 {
04654     int i;
04655     int128_t d1;
04656     sp_digit div, r1;
04657 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
04658     sp_digit* td;
04659 #else
04660     sp_digit t1d[54], t2d[27 + 1];
04661 #endif
04662     sp_digit* t1;
04663     sp_digit* t2;
04664     int err = MP_OKAY;
04665 
04666 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
04667     td = XMALLOC(sizeof(sp_digit) * (3 * 27 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
04668     if (td != NULL) {
04669         t1 = td;
04670         t2 = td + 2 * 27;
04671     }
04672     else
04673         err = MEMORY_E;
04674 #else
04675     t1 = t1d;
04676     t2 = t2d;
04677 #endif
04678 
04679     (void)m;
04680 
04681     if (err == MP_OKAY) {
04682         div = d[26];
04683         XMEMCPY(t1, a, sizeof(*t1) * 2 * 27);
04684         for (i=26; i>=0; i--) {
04685             t1[27 + i] += t1[27 + i - 1] >> 57;
04686             t1[27 + i - 1] &= 0x1ffffffffffffffl;
04687             d1 = t1[27 + i];
04688             d1 <<= 57;
04689             d1 += t1[27 + i - 1];
04690             r1 = (sp_digit)(d1 / div);
04691 
04692             sp_3072_mul_d_27(t2, d, r1);
04693             sp_3072_sub_27(&t1[i], &t1[i], t2);
04694             t1[27 + i] -= t2[27];
04695             t1[27 + i] += t1[27 + i - 1] >> 57;
04696             t1[27 + i - 1] &= 0x1ffffffffffffffl;
04697             r1 = (((-t1[27 + i]) << 57) - t1[27 + i - 1]) / div;
04698             r1++;
04699             sp_3072_mul_d_27(t2, d, r1);
04700             sp_3072_add_27(&t1[i], &t1[i], t2);
04701             t1[27 + i] += t1[27 + i - 1] >> 57;
04702             t1[27 + i - 1] &= 0x1ffffffffffffffl;
04703         }
04704         t1[27 - 1] += t1[27 - 2] >> 57;
04705         t1[27 - 2] &= 0x1ffffffffffffffl;
04706         d1 = t1[27 - 1];
04707         r1 = (sp_digit)(d1 / div);
04708 
04709         sp_3072_mul_d_27(t2, d, r1);
04710         sp_3072_sub_27(t1, t1, t2);
04711         XMEMCPY(r, t1, sizeof(*r) * 2 * 27);
04712         for (i=0; i<25; i++) {
04713             r[i+1] += r[i] >> 57;
04714             r[i] &= 0x1ffffffffffffffl;
04715         }
04716         sp_3072_cond_add_27(r, r, d, 0 - (r[26] < 0));
04717     }
04718 
04719 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
04720     if (td != NULL)
04721         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04722 #endif
04723 
04724     return err;
04725 }
04726 
04727 /* Reduce a modulo m into r. (r = a mod m)
04728  *
04729  * r  A single precision number that is the reduced result.
04730  * a  A single precision number that is to be reduced.
04731  * m  A single precision number that is the modulus to reduce with.
04732  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
04733  */
04734 static int sp_3072_mod_27(sp_digit* r, sp_digit* a, sp_digit* m)
04735 {
04736     return sp_3072_div_27(a, m, NULL, r);
04737 }
04738 
04739 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
04740  *
04741  * r     A single precision number that is the result of the operation.
04742  * a     A single precision number being exponentiated.
04743  * e     A single precision number that is the exponent.
04744  * bits  The number of bits in the exponent.
04745  * m     A single precision number that is the modulus.
04746  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
04747  */
04748 static int sp_3072_mod_exp_27(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
04749     sp_digit* m, int reduceA)
04750 {
04751 #ifdef WOLFSSL_SP_SMALL
04752     sp_digit* td;
04753     sp_digit* t[3];
04754     sp_digit* norm;
04755     sp_digit mp = 1;
04756     sp_digit n;
04757     int i;
04758     int c, y;
04759     int err = MP_OKAY;
04760 
04761     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL,
04762                             DYNAMIC_TYPE_TMP_BUFFER);
04763     if (td == NULL)
04764         err = MEMORY_E;
04765 
04766     if (err == MP_OKAY) {
04767         XMEMSET(td, 0, sizeof(*td) * 3 * 27 * 2);
04768 
04769         norm = t[0] = td;
04770         t[1] = &td[27 * 2];
04771         t[2] = &td[2 * 27 * 2];
04772 
04773         sp_3072_mont_setup(m, &mp);
04774         sp_3072_mont_norm_27(norm, m);
04775 
04776         if (reduceA)
04777             err = sp_3072_mod_27(t[1], a, m);
04778         else
04779             XMEMCPY(t[1], a, sizeof(sp_digit) * 27);
04780     }
04781     if (err == MP_OKAY) {
04782         sp_3072_mul_27(t[1], t[1], norm);
04783         err = sp_3072_mod_27(t[1], t[1], m);
04784     }
04785 
04786     if (err == MP_OKAY) {
04787         i = bits / 57;
04788         c = bits % 57;
04789         n = e[i--] << (57 - c);
04790         for (; ; c--) {
04791             if (c == 0) {
04792                 if (i == -1)
04793                     break;
04794 
04795                 n = e[i--];
04796                 c = 57;
04797             }
04798 
04799             y = (n >> 56) & 1;
04800             n <<= 1;
04801 
04802             sp_3072_mont_mul_27(t[y^1], t[0], t[1], m, mp);
04803 
04804             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
04805                                   ((size_t)t[1] & addr_mask[y])),
04806                     sizeof(*t[2]) * 27 * 2);
04807             sp_3072_mont_sqr_27(t[2], t[2], m, mp);
04808             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
04809                             ((size_t)t[1] & addr_mask[y])), t[2],
04810                     sizeof(*t[2]) * 27 * 2);
04811         }
04812 
04813         sp_3072_mont_reduce_27(t[0], m, mp);
04814         n = sp_3072_cmp_27(t[0], m);
04815         sp_3072_cond_sub_27(t[0], t[0], m, (n < 0) - 1);
04816         XMEMCPY(r, t[0], sizeof(*r) * 27 * 2);
04817 
04818     }
04819 
04820     if (td != NULL)
04821         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04822 
04823     return err;
04824 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
04825 #ifndef WOLFSSL_SMALL_STACK
04826     sp_digit t[3][54];
04827 #else
04828     sp_digit* td;
04829     sp_digit* t[3];
04830 #endif
04831     sp_digit* norm;
04832     sp_digit mp = 1;
04833     sp_digit n;
04834     int i;
04835     int c, y;
04836     int err = MP_OKAY;
04837 
04838 #ifdef WOLFSSL_SMALL_STACK
04839     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 27 * 2, NULL,
04840                             DYNAMIC_TYPE_TMP_BUFFER);
04841     if (td == NULL)
04842         err = MEMORY_E;
04843 
04844     if (err == MP_OKAY) {
04845         t[0] = td;
04846         t[1] = &td[27 * 2];
04847         t[2] = &td[2 * 27 * 2];
04848         norm = t[0];
04849     }
04850 #else
04851     norm = t[0];
04852 #endif
04853 
04854     if (err == MP_OKAY) {
04855         sp_3072_mont_setup(m, &mp);
04856         sp_3072_mont_norm_27(norm, m);
04857 
04858         if (reduceA) {
04859             err = sp_3072_mod_27(t[1], a, m);
04860             if (err == MP_OKAY) {
04861                 sp_3072_mul_27(t[1], t[1], norm);
04862                 err = sp_3072_mod_27(t[1], t[1], m);
04863             }
04864         }
04865         else {
04866             sp_3072_mul_27(t[1], a, norm);
04867             err = sp_3072_mod_27(t[1], t[1], m);
04868         }
04869     }
04870 
04871     if (err == MP_OKAY) {
04872         i = bits / 57;
04873         c = bits % 57;
04874         n = e[i--] << (57 - c);
04875         for (; ; c--) {
04876             if (c == 0) {
04877                 if (i == -1)
04878                     break;
04879 
04880                 n = e[i--];
04881                 c = 57;
04882             }
04883 
04884             y = (n >> 56) & 1;
04885             n <<= 1;
04886 
04887             sp_3072_mont_mul_27(t[y^1], t[0], t[1], m, mp);
04888 
04889             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
04890                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
04891             sp_3072_mont_sqr_27(t[2], t[2], m, mp);
04892             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
04893                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
04894         }
04895 
04896         sp_3072_mont_reduce_27(t[0], m, mp);
04897         n = sp_3072_cmp_27(t[0], m);
04898         sp_3072_cond_sub_27(t[0], t[0], m, (n < 0) - 1);
04899         XMEMCPY(r, t[0], sizeof(t[0]));
04900     }
04901 
04902 #ifdef WOLFSSL_SMALL_STACK
04903     if (td != NULL)
04904         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04905 #endif
04906 
04907     return err;
04908 #else
04909 #ifndef WOLFSSL_SMALL_STACK
04910     sp_digit t[32][54];
04911 #else
04912     sp_digit* t[32];
04913     sp_digit* td;
04914 #endif
04915     sp_digit* norm;
04916     sp_digit rt[54];
04917     sp_digit mp = 1;
04918     sp_digit n;
04919     int i;
04920     int c, y;
04921     int err = MP_OKAY;
04922 
04923 #ifdef WOLFSSL_SMALL_STACK
04924     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 54, NULL,
04925                             DYNAMIC_TYPE_TMP_BUFFER);
04926     if (td == NULL)
04927         err = MEMORY_E;
04928 
04929     if (err == MP_OKAY) {
04930         for (i=0; i<32; i++)
04931             t[i] = td + i * 54;
04932         norm = t[0];
04933     }
04934 #else
04935     norm = t[0];
04936 #endif
04937 
04938     if (err == MP_OKAY) {
04939         sp_3072_mont_setup(m, &mp);
04940         sp_3072_mont_norm_27(norm, m);
04941 
04942         if (reduceA) {
04943             err = sp_3072_mod_27(t[1], a, m);
04944             if (err == MP_OKAY) {
04945                 sp_3072_mul_27(t[1], t[1], norm);
04946                 err = sp_3072_mod_27(t[1], t[1], m);
04947             }
04948         }
04949         else {
04950             sp_3072_mul_27(t[1], a, norm);
04951             err = sp_3072_mod_27(t[1], t[1], m);
04952         }
04953     }
04954 
04955     if (err == MP_OKAY) {
04956         sp_3072_mont_sqr_27(t[ 2], t[ 1], m, mp);
04957         sp_3072_mont_mul_27(t[ 3], t[ 2], t[ 1], m, mp);
04958         sp_3072_mont_sqr_27(t[ 4], t[ 2], m, mp);
04959         sp_3072_mont_mul_27(t[ 5], t[ 3], t[ 2], m, mp);
04960         sp_3072_mont_sqr_27(t[ 6], t[ 3], m, mp);
04961         sp_3072_mont_mul_27(t[ 7], t[ 4], t[ 3], m, mp);
04962         sp_3072_mont_sqr_27(t[ 8], t[ 4], m, mp);
04963         sp_3072_mont_mul_27(t[ 9], t[ 5], t[ 4], m, mp);
04964         sp_3072_mont_sqr_27(t[10], t[ 5], m, mp);
04965         sp_3072_mont_mul_27(t[11], t[ 6], t[ 5], m, mp);
04966         sp_3072_mont_sqr_27(t[12], t[ 6], m, mp);
04967         sp_3072_mont_mul_27(t[13], t[ 7], t[ 6], m, mp);
04968         sp_3072_mont_sqr_27(t[14], t[ 7], m, mp);
04969         sp_3072_mont_mul_27(t[15], t[ 8], t[ 7], m, mp);
04970         sp_3072_mont_sqr_27(t[16], t[ 8], m, mp);
04971         sp_3072_mont_mul_27(t[17], t[ 9], t[ 8], m, mp);
04972         sp_3072_mont_sqr_27(t[18], t[ 9], m, mp);
04973         sp_3072_mont_mul_27(t[19], t[10], t[ 9], m, mp);
04974         sp_3072_mont_sqr_27(t[20], t[10], m, mp);
04975         sp_3072_mont_mul_27(t[21], t[11], t[10], m, mp);
04976         sp_3072_mont_sqr_27(t[22], t[11], m, mp);
04977         sp_3072_mont_mul_27(t[23], t[12], t[11], m, mp);
04978         sp_3072_mont_sqr_27(t[24], t[12], m, mp);
04979         sp_3072_mont_mul_27(t[25], t[13], t[12], m, mp);
04980         sp_3072_mont_sqr_27(t[26], t[13], m, mp);
04981         sp_3072_mont_mul_27(t[27], t[14], t[13], m, mp);
04982         sp_3072_mont_sqr_27(t[28], t[14], m, mp);
04983         sp_3072_mont_mul_27(t[29], t[15], t[14], m, mp);
04984         sp_3072_mont_sqr_27(t[30], t[15], m, mp);
04985         sp_3072_mont_mul_27(t[31], t[16], t[15], m, mp);
04986 
04987         bits = ((bits + 4) / 5) * 5;
04988         i = ((bits + 56) / 57) - 1;
04989         c = bits % 57;
04990         if (c == 0)
04991             c = 57;
04992         if (i < 27)
04993             n = e[i--] << (64 - c);
04994         else {
04995             n = 0;
04996             i--;
04997         }
04998         if (c < 5) {
04999             n |= e[i--] << (7 - c);
05000             c += 57;
05001         }
05002         y = n >> 59;
05003         n <<= 5;
05004         c -= 5;
05005         XMEMCPY(rt, t[y], sizeof(rt));
05006         for (; i>=0 || c>=5; ) {
05007             if (c < 5) {
05008                 n |= e[i--] << (7 - c);
05009                 c += 57;
05010             }
05011             y = (n >> 59) & 0x1f;
05012             n <<= 5;
05013             c -= 5;
05014 
05015             sp_3072_mont_sqr_27(rt, rt, m, mp);
05016             sp_3072_mont_sqr_27(rt, rt, m, mp);
05017             sp_3072_mont_sqr_27(rt, rt, m, mp);
05018             sp_3072_mont_sqr_27(rt, rt, m, mp);
05019             sp_3072_mont_sqr_27(rt, rt, m, mp);
05020 
05021             sp_3072_mont_mul_27(rt, rt, t[y], m, mp);
05022         }
05023 
05024         sp_3072_mont_reduce_27(rt, m, mp);
05025         n = sp_3072_cmp_27(rt, m);
05026         sp_3072_cond_sub_27(rt, rt, m, (n < 0) - 1);
05027         XMEMCPY(r, rt, sizeof(rt));
05028     }
05029 
05030 #ifdef WOLFSSL_SMALL_STACK
05031     if (td != NULL)
05032         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05033 #endif
05034 
05035     return err;
05036 #endif
05037 }
05038 
05039 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
05040 
05041 /* r = 2^n mod m where n is the number of bits to reduce by.
05042  * Given m must be 3072 bits, just need to subtract.
05043  *
05044  * r  A single precision number.
05045  * m  A signle precision number.
05046  */
05047 static void sp_3072_mont_norm_54(sp_digit* r, sp_digit* m)
05048 {
05049     /* Set r = 2^n - 1. */
05050 #ifdef WOLFSSL_SP_SMALL
05051     int i;
05052 
05053     for (i=0; i<53; i++)
05054         r[i] = 0x1ffffffffffffffl;
05055 #else
05056     int i;
05057 
05058     for (i = 0; i < 48; i += 8) {
05059         r[i + 0] = 0x1ffffffffffffffl;
05060         r[i + 1] = 0x1ffffffffffffffl;
05061         r[i + 2] = 0x1ffffffffffffffl;
05062         r[i + 3] = 0x1ffffffffffffffl;
05063         r[i + 4] = 0x1ffffffffffffffl;
05064         r[i + 5] = 0x1ffffffffffffffl;
05065         r[i + 6] = 0x1ffffffffffffffl;
05066         r[i + 7] = 0x1ffffffffffffffl;
05067     }
05068     r[48] = 0x1ffffffffffffffl;
05069     r[49] = 0x1ffffffffffffffl;
05070     r[50] = 0x1ffffffffffffffl;
05071     r[51] = 0x1ffffffffffffffl;
05072     r[52] = 0x1ffffffffffffffl;
05073 #endif
05074     r[53] = 0x7ffffffffffffl;
05075 
05076     /* r = (2^n - 1) mod n */
05077     sp_3072_sub_54(r, r, m);
05078 
05079     /* Add one so r = 2^n mod m */
05080     r[0] += 1;
05081 }
05082 
05083 /* Compare a with b in constant time.
05084  *
05085  * a  A single precision integer.
05086  * b  A single precision integer.
05087  * return -ve, 0 or +ve if a is less than, equal to or greater than b
05088  * respectively.
05089  */
05090 static sp_digit sp_3072_cmp_54(const sp_digit* a, const sp_digit* b)
05091 {
05092     sp_digit r = 0;
05093 #ifdef WOLFSSL_SP_SMALL
05094     int i;
05095 
05096     for (i=53; i>=0; i--)
05097         r |= (a[i] - b[i]) & (0 - !r);
05098 #else
05099     int i;
05100 
05101     r |= (a[53] - b[53]) & (0 - !r);
05102     r |= (a[52] - b[52]) & (0 - !r);
05103     r |= (a[51] - b[51]) & (0 - !r);
05104     r |= (a[50] - b[50]) & (0 - !r);
05105     r |= (a[49] - b[49]) & (0 - !r);
05106     r |= (a[48] - b[48]) & (0 - !r);
05107     for (i = 40; i >= 0; i -= 8) {
05108         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
05109         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
05110         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
05111         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
05112         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
05113         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
05114         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
05115         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
05116     }
05117 #endif /* WOLFSSL_SP_SMALL */
05118 
05119     return r;
05120 }
05121 
05122 /* Conditionally subtract b from a using the mask m.
05123  * m is -1 to subtract and 0 when not.
05124  *
05125  * r  A single precision number representing condition subtract result.
05126  * a  A single precision number to subtract from.
05127  * b  A single precision number to subtract.
05128  * m  Mask value to apply.
05129  */
05130 static void sp_3072_cond_sub_54(sp_digit* r, const sp_digit* a,
05131         const sp_digit* b, const sp_digit m)
05132 {
05133 #ifdef WOLFSSL_SP_SMALL
05134     int i;
05135 
05136     for (i = 0; i < 54; i++)
05137         r[i] = a[i] - (b[i] & m);
05138 #else
05139     int i;
05140 
05141     for (i = 0; i < 48; i += 8) {
05142         r[i + 0] = a[i + 0] - (b[i + 0] & m);
05143         r[i + 1] = a[i + 1] - (b[i + 1] & m);
05144         r[i + 2] = a[i + 2] - (b[i + 2] & m);
05145         r[i + 3] = a[i + 3] - (b[i + 3] & m);
05146         r[i + 4] = a[i + 4] - (b[i + 4] & m);
05147         r[i + 5] = a[i + 5] - (b[i + 5] & m);
05148         r[i + 6] = a[i + 6] - (b[i + 6] & m);
05149         r[i + 7] = a[i + 7] - (b[i + 7] & m);
05150     }
05151     r[48] = a[48] - (b[48] & m);
05152     r[49] = a[49] - (b[49] & m);
05153     r[50] = a[50] - (b[50] & m);
05154     r[51] = a[51] - (b[51] & m);
05155     r[52] = a[52] - (b[52] & m);
05156     r[53] = a[53] - (b[53] & m);
05157 #endif /* WOLFSSL_SP_SMALL */
05158 }
05159 
05160 /* Mul a by scalar b and add into r. (r += a * b)
05161  *
05162  * r  A single precision integer.
05163  * a  A single precision integer.
05164  * b  A scalar.
05165  */
05166 SP_NOINLINE static void sp_3072_mul_add_54(sp_digit* r, const sp_digit* a,
05167         const sp_digit b)
05168 {
05169 #ifdef WOLFSSL_SP_SMALL
05170     int128_t tb = b;
05171     int128_t t = 0;
05172     int i;
05173 
05174     for (i = 0; i < 54; i++) {
05175         t += (tb * a[i]) + r[i];
05176         r[i] = t & 0x1ffffffffffffffl;
05177         t >>= 57;
05178     }
05179     r[54] += t;
05180 #else
05181     int128_t tb = b;
05182     int128_t t[8];
05183     int i;
05184 
05185     t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffl;
05186     for (i = 0; i < 48; i += 8) {
05187         t[1] = tb * a[i+1];
05188         r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
05189         t[2] = tb * a[i+2];
05190         r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
05191         t[3] = tb * a[i+3];
05192         r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
05193         t[4] = tb * a[i+4];
05194         r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
05195         t[5] = tb * a[i+5];
05196         r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
05197         t[6] = tb * a[i+6];
05198         r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
05199         t[7] = tb * a[i+7];
05200         r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
05201         t[0] = tb * a[i+8];
05202         r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
05203     }
05204     t[1] = tb * a[49]; r[49] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
05205     t[2] = tb * a[50]; r[50] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
05206     t[3] = tb * a[51]; r[51] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
05207     t[4] = tb * a[52]; r[52] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
05208     t[5] = tb * a[53]; r[53] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
05209     r[54] +=  t[5] >> 57;
05210 #endif /* WOLFSSL_SP_SMALL */
05211 }
05212 
05213 /* Normalize the values in each word to 57.
05214  *
05215  * a  Array of sp_digit to normalize.
05216  */
05217 static void sp_3072_norm_54(sp_digit* a)
05218 {
05219 #ifdef WOLFSSL_SP_SMALL
05220     int i;
05221     for (i = 0; i < 53; i++) {
05222         a[i+1] += a[i] >> 57;
05223         a[i] &= 0x1ffffffffffffffl;
05224     }
05225 #else
05226     int i;
05227     for (i = 0; i < 48; i += 8) {
05228         a[i+1] += a[i+0] >> 57; a[i+0] &= 0x1ffffffffffffffl;
05229         a[i+2] += a[i+1] >> 57; a[i+1] &= 0x1ffffffffffffffl;
05230         a[i+3] += a[i+2] >> 57; a[i+2] &= 0x1ffffffffffffffl;
05231         a[i+4] += a[i+3] >> 57; a[i+3] &= 0x1ffffffffffffffl;
05232         a[i+5] += a[i+4] >> 57; a[i+4] &= 0x1ffffffffffffffl;
05233         a[i+6] += a[i+5] >> 57; a[i+5] &= 0x1ffffffffffffffl;
05234         a[i+7] += a[i+6] >> 57; a[i+6] &= 0x1ffffffffffffffl;
05235         a[i+8] += a[i+7] >> 57; a[i+7] &= 0x1ffffffffffffffl;
05236         a[i+9] += a[i+8] >> 57; a[i+8] &= 0x1ffffffffffffffl;
05237     }
05238     a[48+1] += a[48] >> 57;
05239     a[48] &= 0x1ffffffffffffffl;
05240     a[49+1] += a[49] >> 57;
05241     a[49] &= 0x1ffffffffffffffl;
05242     a[50+1] += a[50] >> 57;
05243     a[50] &= 0x1ffffffffffffffl;
05244     a[51+1] += a[51] >> 57;
05245     a[51] &= 0x1ffffffffffffffl;
05246     a[52+1] += a[52] >> 57;
05247     a[52] &= 0x1ffffffffffffffl;
05248 #endif
05249 }
05250 
05251 /* Shift the result in the high 3072 bits down to the bottom.
05252  *
05253  * r  A single precision number.
05254  * a  A single precision number.
05255  */
05256 static void sp_3072_mont_shift_54(sp_digit* r, const sp_digit* a)
05257 {
05258 #ifdef WOLFSSL_SP_SMALL
05259     int i;
05260     int128_t n = a[53] >> 51;
05261     n += ((int128_t)a[54]) << 6;
05262 
05263     for (i = 0; i < 53; i++) {
05264         r[i] = n & 0x1ffffffffffffffl;
05265         n >>= 57;
05266         n += ((int128_t)a[55 + i]) << 6;
05267     }
05268     r[53] = (sp_digit)n;
05269 #else
05270     int i;
05271     int128_t n = a[53] >> 51;
05272     n += ((int128_t)a[54]) << 6;
05273     for (i = 0; i < 48; i += 8) {
05274         r[i + 0] = n & 0x1ffffffffffffffl;
05275         n >>= 57; n += ((int128_t)a[i + 55]) << 6;
05276         r[i + 1] = n & 0x1ffffffffffffffl;
05277         n >>= 57; n += ((int128_t)a[i + 56]) << 6;
05278         r[i + 2] = n & 0x1ffffffffffffffl;
05279         n >>= 57; n += ((int128_t)a[i + 57]) << 6;
05280         r[i + 3] = n & 0x1ffffffffffffffl;
05281         n >>= 57; n += ((int128_t)a[i + 58]) << 6;
05282         r[i + 4] = n & 0x1ffffffffffffffl;
05283         n >>= 57; n += ((int128_t)a[i + 59]) << 6;
05284         r[i + 5] = n & 0x1ffffffffffffffl;
05285         n >>= 57; n += ((int128_t)a[i + 60]) << 6;
05286         r[i + 6] = n & 0x1ffffffffffffffl;
05287         n >>= 57; n += ((int128_t)a[i + 61]) << 6;
05288         r[i + 7] = n & 0x1ffffffffffffffl;
05289         n >>= 57; n += ((int128_t)a[i + 62]) << 6;
05290     }
05291     r[48] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[103]) << 6;
05292     r[49] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[104]) << 6;
05293     r[50] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[105]) << 6;
05294     r[51] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[106]) << 6;
05295     r[52] = n & 0x1ffffffffffffffl; n >>= 57; n += ((int128_t)a[107]) << 6;
05296     r[53] = (sp_digit)n;
05297 #endif /* WOLFSSL_SP_SMALL */
05298     XMEMSET(&r[54], 0, sizeof(*r) * 54);
05299 }
05300 
05301 /* Reduce the number back to 3072 bits using Montgomery reduction.
05302  *
05303  * a   A single precision number to reduce in place.
05304  * m   The single precision number representing the modulus.
05305  * mp  The digit representing the negative inverse of m mod 2^n.
05306  */
05307 static void sp_3072_mont_reduce_54(sp_digit* a, sp_digit* m, sp_digit mp)
05308 {
05309     int i;
05310     sp_digit mu;
05311 
05312     if (mp != 1) {
05313         for (i=0; i<53; i++) {
05314             mu = (a[i] * mp) & 0x1ffffffffffffffl;
05315             sp_3072_mul_add_54(a+i, m, mu);
05316             a[i+1] += a[i] >> 57;
05317         }
05318         mu = (a[i] * mp) & 0x7ffffffffffffl;
05319         sp_3072_mul_add_54(a+i, m, mu);
05320         a[i+1] += a[i] >> 57;
05321         a[i] &= 0x1ffffffffffffffl;
05322     }
05323     else {
05324         for (i=0; i<53; i++) {
05325             mu = a[i] & 0x1ffffffffffffffl;
05326             sp_3072_mul_add_54(a+i, m, mu);
05327             a[i+1] += a[i] >> 57;
05328         }
05329         mu = a[i] & 0x7ffffffffffffl;
05330         sp_3072_mul_add_54(a+i, m, mu);
05331         a[i+1] += a[i] >> 57;
05332         a[i] &= 0x1ffffffffffffffl;
05333     }
05334 
05335     sp_3072_mont_shift_54(a, a);
05336     sp_3072_cond_sub_54(a, a, m, 0 - ((a[53] >> 51) > 0));
05337     sp_3072_norm_54(a);
05338 }
05339 
05340 /* Multiply two Montogmery form numbers mod the modulus (prime).
05341  * (r = a * b mod m)
05342  *
05343  * r   Result of multiplication.
05344  * a   First number to multiply in Montogmery form.
05345  * b   Second number to multiply in Montogmery form.
05346  * m   Modulus (prime).
05347  * mp  Montogmery mulitplier.
05348  */
05349 static void sp_3072_mont_mul_54(sp_digit* r, sp_digit* a, sp_digit* b,
05350         sp_digit* m, sp_digit mp)
05351 {
05352     sp_3072_mul_54(r, a, b);
05353     sp_3072_mont_reduce_54(r, m, mp);
05354 }
05355 
05356 /* Square the Montgomery form number. (r = a * a mod m)
05357  *
05358  * r   Result of squaring.
05359  * a   Number to square in Montogmery form.
05360  * m   Modulus (prime).
05361  * mp  Montogmery mulitplier.
05362  */
05363 static void sp_3072_mont_sqr_54(sp_digit* r, sp_digit* a, sp_digit* m,
05364         sp_digit mp)
05365 {
05366     sp_3072_sqr_54(r, a);
05367     sp_3072_mont_reduce_54(r, m, mp);
05368 }
05369 
05370 /* Multiply a by scalar b into r. (r = a * b)
05371  *
05372  * r  A single precision integer.
05373  * a  A single precision integer.
05374  * b  A scalar.
05375  */
05376 SP_NOINLINE static void sp_3072_mul_d_54(sp_digit* r, const sp_digit* a,
05377     const sp_digit b)
05378 {
05379 #ifdef WOLFSSL_SP_SMALL
05380     int128_t tb = b;
05381     int128_t t = 0;
05382     int i;
05383 
05384     for (i = 0; i < 54; i++) {
05385         t += tb * a[i];
05386         r[i] = t & 0x1ffffffffffffffl;
05387         t >>= 57;
05388     }
05389     r[54] = (sp_digit)t;
05390 #else
05391     int128_t tb = b;
05392     int128_t t[8];
05393     int i;
05394 
05395     t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl;
05396     for (i = 0; i < 48; i += 8) {
05397         t[1] = tb * a[i+1];
05398         r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
05399         t[2] = tb * a[i+2];
05400         r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
05401         t[3] = tb * a[i+3];
05402         r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
05403         t[4] = tb * a[i+4];
05404         r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
05405         t[5] = tb * a[i+5];
05406         r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
05407         t[6] = tb * a[i+6];
05408         r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl);
05409         t[7] = tb * a[i+7];
05410         r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl);
05411         t[0] = tb * a[i+8];
05412         r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl);
05413     }
05414     t[1] = tb * a[49];
05415     r[49] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl);
05416     t[2] = tb * a[50];
05417     r[50] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl);
05418     t[3] = tb * a[51];
05419     r[51] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl);
05420     t[4] = tb * a[52];
05421     r[52] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl);
05422     t[5] = tb * a[53];
05423     r[53] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl);
05424     r[54] =  (sp_digit)(t[5] >> 57);
05425 #endif /* WOLFSSL_SP_SMALL */
05426 }
05427 
05428 /* Conditionally add a and b using the mask m.
05429  * m is -1 to add and 0 when not.
05430  *
05431  * r  A single precision number representing conditional add result.
05432  * a  A single precision number to add with.
05433  * b  A single precision number to add.
05434  * m  Mask value to apply.
05435  */
05436 static void sp_3072_cond_add_54(sp_digit* r, const sp_digit* a,
05437         const sp_digit* b, const sp_digit m)
05438 {
05439 #ifdef WOLFSSL_SP_SMALL
05440     int i;
05441 
05442     for (i = 0; i < 54; i++)
05443         r[i] = a[i] + (b[i] & m);
05444 #else
05445     int i;
05446 
05447     for (i = 0; i < 48; i += 8) {
05448         r[i + 0] = a[i + 0] + (b[i + 0] & m);
05449         r[i + 1] = a[i + 1] + (b[i + 1] & m);
05450         r[i + 2] = a[i + 2] + (b[i + 2] & m);
05451         r[i + 3] = a[i + 3] + (b[i + 3] & m);
05452         r[i + 4] = a[i + 4] + (b[i + 4] & m);
05453         r[i + 5] = a[i + 5] + (b[i + 5] & m);
05454         r[i + 6] = a[i + 6] + (b[i + 6] & m);
05455         r[i + 7] = a[i + 7] + (b[i + 7] & m);
05456     }
05457     r[48] = a[48] + (b[48] & m);
05458     r[49] = a[49] + (b[49] & m);
05459     r[50] = a[50] + (b[50] & m);
05460     r[51] = a[51] + (b[51] & m);
05461     r[52] = a[52] + (b[52] & m);
05462     r[53] = a[53] + (b[53] & m);
05463 #endif /* WOLFSSL_SP_SMALL */
05464 }
05465 
05466 /* Divide d in a and put remainder into r (m*d + r = a)
05467  * m is not calculated as it is not needed at this time.
05468  *
05469  * a  Nmber to be divided.
05470  * d  Number to divide with.
05471  * m  Multiplier result.
05472  * r  Remainder from the division.
05473  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
05474  */
05475 static int sp_3072_div_54(sp_digit* a, sp_digit* d, sp_digit* m,
05476         sp_digit* r)
05477 {
05478     int i;
05479     int128_t d1;
05480     sp_digit div, r1;
05481 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
05482     sp_digit* td;
05483 #else
05484     sp_digit t1d[108], t2d[54 + 1];
05485 #endif
05486     sp_digit* t1;
05487     sp_digit* t2;
05488     int err = MP_OKAY;
05489 
05490 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
05491     td = XMALLOC(sizeof(sp_digit) * (3 * 54 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
05492     if (td != NULL) {
05493         t1 = td;
05494         t2 = td + 2 * 54;
05495     }
05496     else
05497         err = MEMORY_E;
05498 #else
05499     t1 = t1d;
05500     t2 = t2d;
05501 #endif
05502 
05503     (void)m;
05504 
05505     if (err == MP_OKAY) {
05506         div = d[53];
05507         XMEMCPY(t1, a, sizeof(*t1) * 2 * 54);
05508         for (i=53; i>=0; i--) {
05509             t1[54 + i] += t1[54 + i - 1] >> 57;
05510             t1[54 + i - 1] &= 0x1ffffffffffffffl;
05511             d1 = t1[54 + i];
05512             d1 <<= 57;
05513             d1 += t1[54 + i - 1];
05514             r1 = (sp_digit)(d1 / div);
05515 
05516             sp_3072_mul_d_54(t2, d, r1);
05517             sp_3072_sub_54(&t1[i], &t1[i], t2);
05518             t1[54 + i] -= t2[54];
05519             t1[54 + i] += t1[54 + i - 1] >> 57;
05520             t1[54 + i - 1] &= 0x1ffffffffffffffl;
05521             r1 = (((-t1[54 + i]) << 57) - t1[54 + i - 1]) / div;
05522             r1++;
05523             sp_3072_mul_d_54(t2, d, r1);
05524             sp_3072_add_54(&t1[i], &t1[i], t2);
05525             t1[54 + i] += t1[54 + i - 1] >> 57;
05526             t1[54 + i - 1] &= 0x1ffffffffffffffl;
05527         }
05528         t1[54 - 1] += t1[54 - 2] >> 57;
05529         t1[54 - 2] &= 0x1ffffffffffffffl;
05530         d1 = t1[54 - 1];
05531         r1 = (sp_digit)(d1 / div);
05532 
05533         sp_3072_mul_d_54(t2, d, r1);
05534         sp_3072_sub_54(t1, t1, t2);
05535         XMEMCPY(r, t1, sizeof(*r) * 2 * 54);
05536         for (i=0; i<52; i++) {
05537             r[i+1] += r[i] >> 57;
05538             r[i] &= 0x1ffffffffffffffl;
05539         }
05540         sp_3072_cond_add_54(r, r, d, 0 - (r[53] < 0));
05541     }
05542 
05543 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
05544     if (td != NULL)
05545         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05546 #endif
05547 
05548     return err;
05549 }
05550 
05551 /* Reduce a modulo m into r. (r = a mod m)
05552  *
05553  * r  A single precision number that is the reduced result.
05554  * a  A single precision number that is to be reduced.
05555  * m  A single precision number that is the modulus to reduce with.
05556  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
05557  */
05558 static int sp_3072_mod_54(sp_digit* r, sp_digit* a, sp_digit* m)
05559 {
05560     return sp_3072_div_54(a, m, NULL, r);
05561 }
05562 
05563 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
05564 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
05565  *
05566  * r     A single precision number that is the result of the operation.
05567  * a     A single precision number being exponentiated.
05568  * e     A single precision number that is the exponent.
05569  * bits  The number of bits in the exponent.
05570  * m     A single precision number that is the modulus.
05571  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
05572  */
05573 static int sp_3072_mod_exp_54(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
05574     sp_digit* m, int reduceA)
05575 {
05576 #ifdef WOLFSSL_SP_SMALL
05577     sp_digit* td;
05578     sp_digit* t[3];
05579     sp_digit* norm;
05580     sp_digit mp = 1;
05581     sp_digit n;
05582     int i;
05583     int c, y;
05584     int err = MP_OKAY;
05585 
05586     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL,
05587                             DYNAMIC_TYPE_TMP_BUFFER);
05588     if (td == NULL)
05589         err = MEMORY_E;
05590 
05591     if (err == MP_OKAY) {
05592         XMEMSET(td, 0, sizeof(*td) * 3 * 54 * 2);
05593 
05594         norm = t[0] = td;
05595         t[1] = &td[54 * 2];
05596         t[2] = &td[2 * 54 * 2];
05597 
05598         sp_3072_mont_setup(m, &mp);
05599         sp_3072_mont_norm_54(norm, m);
05600 
05601         if (reduceA)
05602             err = sp_3072_mod_54(t[1], a, m);
05603         else
05604             XMEMCPY(t[1], a, sizeof(sp_digit) * 54);
05605     }
05606     if (err == MP_OKAY) {
05607         sp_3072_mul_54(t[1], t[1], norm);
05608         err = sp_3072_mod_54(t[1], t[1], m);
05609     }
05610 
05611     if (err == MP_OKAY) {
05612         i = bits / 57;
05613         c = bits % 57;
05614         n = e[i--] << (57 - c);
05615         for (; ; c--) {
05616             if (c == 0) {
05617                 if (i == -1)
05618                     break;
05619 
05620                 n = e[i--];
05621                 c = 57;
05622             }
05623 
05624             y = (n >> 56) & 1;
05625             n <<= 1;
05626 
05627             sp_3072_mont_mul_54(t[y^1], t[0], t[1], m, mp);
05628 
05629             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
05630                                   ((size_t)t[1] & addr_mask[y])),
05631                     sizeof(*t[2]) * 54 * 2);
05632             sp_3072_mont_sqr_54(t[2], t[2], m, mp);
05633             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
05634                             ((size_t)t[1] & addr_mask[y])), t[2],
05635                     sizeof(*t[2]) * 54 * 2);
05636         }
05637 
05638         sp_3072_mont_reduce_54(t[0], m, mp);
05639         n = sp_3072_cmp_54(t[0], m);
05640         sp_3072_cond_sub_54(t[0], t[0], m, (n < 0) - 1);
05641         XMEMCPY(r, t[0], sizeof(*r) * 54 * 2);
05642 
05643     }
05644 
05645     if (td != NULL)
05646         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05647 
05648     return err;
05649 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
05650 #ifndef WOLFSSL_SMALL_STACK
05651     sp_digit t[3][108];
05652 #else
05653     sp_digit* td;
05654     sp_digit* t[3];
05655 #endif
05656     sp_digit* norm;
05657     sp_digit mp = 1;
05658     sp_digit n;
05659     int i;
05660     int c, y;
05661     int err = MP_OKAY;
05662 
05663 #ifdef WOLFSSL_SMALL_STACK
05664     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 54 * 2, NULL,
05665                             DYNAMIC_TYPE_TMP_BUFFER);
05666     if (td == NULL)
05667         err = MEMORY_E;
05668 
05669     if (err == MP_OKAY) {
05670         t[0] = td;
05671         t[1] = &td[54 * 2];
05672         t[2] = &td[2 * 54 * 2];
05673         norm = t[0];
05674     }
05675 #else
05676     norm = t[0];
05677 #endif
05678 
05679     if (err == MP_OKAY) {
05680         sp_3072_mont_setup(m, &mp);
05681         sp_3072_mont_norm_54(norm, m);
05682 
05683         if (reduceA) {
05684             err = sp_3072_mod_54(t[1], a, m);
05685             if (err == MP_OKAY) {
05686                 sp_3072_mul_54(t[1], t[1], norm);
05687                 err = sp_3072_mod_54(t[1], t[1], m);
05688             }
05689         }
05690         else {
05691             sp_3072_mul_54(t[1], a, norm);
05692             err = sp_3072_mod_54(t[1], t[1], m);
05693         }
05694     }
05695 
05696     if (err == MP_OKAY) {
05697         i = bits / 57;
05698         c = bits % 57;
05699         n = e[i--] << (57 - c);
05700         for (; ; c--) {
05701             if (c == 0) {
05702                 if (i == -1)
05703                     break;
05704 
05705                 n = e[i--];
05706                 c = 57;
05707             }
05708 
05709             y = (n >> 56) & 1;
05710             n <<= 1;
05711 
05712             sp_3072_mont_mul_54(t[y^1], t[0], t[1], m, mp);
05713 
05714             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
05715                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
05716             sp_3072_mont_sqr_54(t[2], t[2], m, mp);
05717             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
05718                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
05719         }
05720 
05721         sp_3072_mont_reduce_54(t[0], m, mp);
05722         n = sp_3072_cmp_54(t[0], m);
05723         sp_3072_cond_sub_54(t[0], t[0], m, (n < 0) - 1);
05724         XMEMCPY(r, t[0], sizeof(t[0]));
05725     }
05726 
05727 #ifdef WOLFSSL_SMALL_STACK
05728     if (td != NULL)
05729         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05730 #endif
05731 
05732     return err;
05733 #else
05734 #ifndef WOLFSSL_SMALL_STACK
05735     sp_digit t[32][108];
05736 #else
05737     sp_digit* t[32];
05738     sp_digit* td;
05739 #endif
05740     sp_digit* norm;
05741     sp_digit rt[108];
05742     sp_digit mp = 1;
05743     sp_digit n;
05744     int i;
05745     int c, y;
05746     int err = MP_OKAY;
05747 
05748 #ifdef WOLFSSL_SMALL_STACK
05749     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 108, NULL,
05750                             DYNAMIC_TYPE_TMP_BUFFER);
05751     if (td == NULL)
05752         err = MEMORY_E;
05753 
05754     if (err == MP_OKAY) {
05755         for (i=0; i<32; i++)
05756             t[i] = td + i * 108;
05757         norm = t[0];
05758     }
05759 #else
05760     norm = t[0];
05761 #endif
05762 
05763     if (err == MP_OKAY) {
05764         sp_3072_mont_setup(m, &mp);
05765         sp_3072_mont_norm_54(norm, m);
05766 
05767         if (reduceA) {
05768             err = sp_3072_mod_54(t[1], a, m);
05769             if (err == MP_OKAY) {
05770                 sp_3072_mul_54(t[1], t[1], norm);
05771                 err = sp_3072_mod_54(t[1], t[1], m);
05772             }
05773         }
05774         else {
05775             sp_3072_mul_54(t[1], a, norm);
05776             err = sp_3072_mod_54(t[1], t[1], m);
05777         }
05778     }
05779 
05780     if (err == MP_OKAY) {
05781         sp_3072_mont_sqr_54(t[ 2], t[ 1], m, mp);
05782         sp_3072_mont_mul_54(t[ 3], t[ 2], t[ 1], m, mp);
05783         sp_3072_mont_sqr_54(t[ 4], t[ 2], m, mp);
05784         sp_3072_mont_mul_54(t[ 5], t[ 3], t[ 2], m, mp);
05785         sp_3072_mont_sqr_54(t[ 6], t[ 3], m, mp);
05786         sp_3072_mont_mul_54(t[ 7], t[ 4], t[ 3], m, mp);
05787         sp_3072_mont_sqr_54(t[ 8], t[ 4], m, mp);
05788         sp_3072_mont_mul_54(t[ 9], t[ 5], t[ 4], m, mp);
05789         sp_3072_mont_sqr_54(t[10], t[ 5], m, mp);
05790         sp_3072_mont_mul_54(t[11], t[ 6], t[ 5], m, mp);
05791         sp_3072_mont_sqr_54(t[12], t[ 6], m, mp);
05792         sp_3072_mont_mul_54(t[13], t[ 7], t[ 6], m, mp);
05793         sp_3072_mont_sqr_54(t[14], t[ 7], m, mp);
05794         sp_3072_mont_mul_54(t[15], t[ 8], t[ 7], m, mp);
05795         sp_3072_mont_sqr_54(t[16], t[ 8], m, mp);
05796         sp_3072_mont_mul_54(t[17], t[ 9], t[ 8], m, mp);
05797         sp_3072_mont_sqr_54(t[18], t[ 9], m, mp);
05798         sp_3072_mont_mul_54(t[19], t[10], t[ 9], m, mp);
05799         sp_3072_mont_sqr_54(t[20], t[10], m, mp);
05800         sp_3072_mont_mul_54(t[21], t[11], t[10], m, mp);
05801         sp_3072_mont_sqr_54(t[22], t[11], m, mp);
05802         sp_3072_mont_mul_54(t[23], t[12], t[11], m, mp);
05803         sp_3072_mont_sqr_54(t[24], t[12], m, mp);
05804         sp_3072_mont_mul_54(t[25], t[13], t[12], m, mp);
05805         sp_3072_mont_sqr_54(t[26], t[13], m, mp);
05806         sp_3072_mont_mul_54(t[27], t[14], t[13], m, mp);
05807         sp_3072_mont_sqr_54(t[28], t[14], m, mp);
05808         sp_3072_mont_mul_54(t[29], t[15], t[14], m, mp);
05809         sp_3072_mont_sqr_54(t[30], t[15], m, mp);
05810         sp_3072_mont_mul_54(t[31], t[16], t[15], m, mp);
05811 
05812         bits = ((bits + 4) / 5) * 5;
05813         i = ((bits + 56) / 57) - 1;
05814         c = bits % 57;
05815         if (c == 0)
05816             c = 57;
05817         if (i < 54)
05818             n = e[i--] << (64 - c);
05819         else {
05820             n = 0;
05821             i--;
05822         }
05823         if (c < 5) {
05824             n |= e[i--] << (7 - c);
05825             c += 57;
05826         }
05827         y = n >> 59;
05828         n <<= 5;
05829         c -= 5;
05830         XMEMCPY(rt, t[y], sizeof(rt));
05831         for (; i>=0 || c>=5; ) {
05832             if (c < 5) {
05833                 n |= e[i--] << (7 - c);
05834                 c += 57;
05835             }
05836             y = (n >> 59) & 0x1f;
05837             n <<= 5;
05838             c -= 5;
05839 
05840             sp_3072_mont_sqr_54(rt, rt, m, mp);
05841             sp_3072_mont_sqr_54(rt, rt, m, mp);
05842             sp_3072_mont_sqr_54(rt, rt, m, mp);
05843             sp_3072_mont_sqr_54(rt, rt, m, mp);
05844             sp_3072_mont_sqr_54(rt, rt, m, mp);
05845 
05846             sp_3072_mont_mul_54(rt, rt, t[y], m, mp);
05847         }
05848 
05849         sp_3072_mont_reduce_54(rt, m, mp);
05850         n = sp_3072_cmp_54(rt, m);
05851         sp_3072_cond_sub_54(rt, rt, m, (n < 0) - 1);
05852         XMEMCPY(r, rt, sizeof(rt));
05853     }
05854 
05855 #ifdef WOLFSSL_SMALL_STACK
05856     if (td != NULL)
05857         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05858 #endif
05859 
05860     return err;
05861 #endif
05862 }
05863 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
05864 
05865 #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
05866                                     !defined(RSA_LOW_MEM)
05867 /* AND m into each word of a and store in r.
05868  *
05869  * r  A single precision integer.
05870  * a  A single precision integer.
05871  * m  Mask to AND against each digit.
05872  */
05873 static void sp_3072_mask_27(sp_digit* r, sp_digit* a, sp_digit m)
05874 {
05875 #ifdef WOLFSSL_SP_SMALL
05876     int i;
05877 
05878     for (i=0; i<27; i++)
05879         r[i] = a[i] & m;
05880 #else
05881     int i;
05882 
05883     for (i = 0; i < 24; i += 8) {
05884         r[i+0] = a[i+0] & m;
05885         r[i+1] = a[i+1] & m;
05886         r[i+2] = a[i+2] & m;
05887         r[i+3] = a[i+3] & m;
05888         r[i+4] = a[i+4] & m;
05889         r[i+5] = a[i+5] & m;
05890         r[i+6] = a[i+6] & m;
05891         r[i+7] = a[i+7] & m;
05892     }
05893     r[24] = a[24] & m;
05894     r[25] = a[25] & m;
05895     r[26] = a[26] & m;
05896 #endif
05897 }
05898 
05899 #endif
05900 #ifdef WOLFSSL_HAVE_SP_RSA
05901 /* RSA public key operation.
05902  *
05903  * in      Array of bytes representing the number to exponentiate, base.
05904  * inLen   Number of bytes in base.
05905  * em      Public exponent.
05906  * mm      Modulus.
05907  * out     Buffer to hold big-endian bytes of exponentiation result.
05908  *         Must be at least 384 bytes long.
05909  * outLen  Number of bytes in result.
05910  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
05911  * an array is too long and MEMORY_E when dynamic memory allocation fails.
05912  */
05913 int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
05914     byte* out, word32* outLen)
05915 {
05916 #ifdef WOLFSSL_SP_SMALL
05917     sp_digit* d = NULL;
05918     sp_digit* a;
05919     sp_digit* m;
05920     sp_digit* r;
05921     sp_digit* norm;
05922     sp_digit e[1];
05923     sp_digit mp;
05924     int i;
05925     int err = MP_OKAY;
05926 
05927     if (*outLen < 384)
05928         err = MP_TO_E;
05929     if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 384 ||
05930                                                      mp_count_bits(mm) != 3072))
05931         err = MP_READ_E;
05932 
05933     if (err == MP_OKAY) {
05934         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL,
05935                                DYNAMIC_TYPE_TMP_BUFFER);
05936         if (d == NULL)
05937             err = MEMORY_E;
05938     }
05939 
05940     if (err == MP_OKAY) {
05941         a = d;
05942         r = a + 54 * 2;
05943         m = r + 54 * 2;
05944         norm = r;
05945 
05946         sp_3072_from_bin(a, 54, in, inLen);
05947 #if DIGIT_BIT >= 57
05948         e[0] = em->dp[0];
05949 #else
05950         e[0] = em->dp[0];
05951         if (em->used > 1)
05952             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
05953 #endif
05954         if (e[0] == 0)
05955             err = MP_EXPTMOD_E;
05956     }
05957 
05958     if (err == MP_OKAY) {
05959         sp_3072_from_mp(m, 54, mm);
05960 
05961         sp_3072_mont_setup(m, &mp);
05962         sp_3072_mont_norm_54(norm, m);
05963     }
05964     if (err == MP_OKAY) {
05965         sp_3072_mul_54(a, a, norm);
05966         err = sp_3072_mod_54(a, a, m);
05967     }
05968     if (err == MP_OKAY) {
05969         for (i=56; i>=0; i--)
05970             if (e[0] >> i)
05971                 break;
05972 
05973         XMEMCPY(r, a, sizeof(sp_digit) * 54 * 2);
05974         for (i--; i>=0; i--) {
05975             sp_3072_mont_sqr_54(r, r, m, mp);
05976 
05977             if (((e[0] >> i) & 1) == 1)
05978                 sp_3072_mont_mul_54(r, r, a, m, mp);
05979         }
05980         sp_3072_mont_reduce_54(r, m, mp);
05981         mp = sp_3072_cmp_54(r, m);
05982         sp_3072_cond_sub_54(r, r, m, (mp < 0) - 1);
05983 
05984         sp_3072_to_bin(r, out);
05985         *outLen = 384;
05986     }
05987 
05988     if (d != NULL)
05989         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05990 
05991     return err;
05992 #else
05993 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
05994     sp_digit ad[108], md[54], rd[108];
05995 #else
05996     sp_digit* d = NULL;
05997 #endif
05998     sp_digit* a;
05999     sp_digit* m;
06000     sp_digit* r;
06001     sp_digit e[1];
06002     int err = MP_OKAY;
06003 
06004     if (*outLen < 384)
06005         err = MP_TO_E;
06006     if (err == MP_OKAY && (mp_count_bits(em) > 57 || inLen > 384 ||
06007                                                      mp_count_bits(mm) != 3072))
06008         err = MP_READ_E;
06009 
06010 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06011     if (err == MP_OKAY) {
06012         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 5, NULL,
06013                                DYNAMIC_TYPE_TMP_BUFFER);
06014         if (d == NULL)
06015             err = MEMORY_E;
06016     }
06017 
06018     if (err == MP_OKAY) {
06019         a = d;
06020         r = a + 54 * 2;
06021         m = r + 54 * 2;
06022     }
06023 #else
06024     a = ad;
06025     m = md;
06026     r = rd;
06027 #endif
06028 
06029     if (err == MP_OKAY) {
06030         sp_3072_from_bin(a, 54, in, inLen);
06031 #if DIGIT_BIT >= 57
06032         e[0] = em->dp[0];
06033 #else
06034         e[0] = em->dp[0];
06035         if (em->used > 1)
06036             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
06037 #endif
06038         if (e[0] == 0)
06039             err = MP_EXPTMOD_E;
06040     }
06041     if (err == MP_OKAY) {
06042         sp_3072_from_mp(m, 54, mm);
06043 
06044         if (e[0] == 0x3) {
06045             if (err == MP_OKAY) {
06046                 sp_3072_sqr_54(r, a);
06047                 err = sp_3072_mod_54(r, r, m);
06048             }
06049             if (err == MP_OKAY) {
06050                 sp_3072_mul_54(r, a, r);
06051                 err = sp_3072_mod_54(r, r, m);
06052             }
06053         }
06054         else {
06055             sp_digit* norm = r;
06056             int i;
06057             sp_digit mp;
06058 
06059             sp_3072_mont_setup(m, &mp);
06060             sp_3072_mont_norm_54(norm, m);
06061 
06062             if (err == MP_OKAY) {
06063                 sp_3072_mul_54(a, a, norm);
06064                 err = sp_3072_mod_54(a, a, m);
06065             }
06066 
06067             if (err == MP_OKAY) {
06068                 for (i=56; i>=0; i--)
06069                     if (e[0] >> i)
06070                         break;
06071 
06072                 XMEMCPY(r, a, sizeof(sp_digit) * 108);
06073                 for (i--; i>=0; i--) {
06074                     sp_3072_mont_sqr_54(r, r, m, mp);
06075 
06076                     if (((e[0] >> i) & 1) == 1)
06077                         sp_3072_mont_mul_54(r, r, a, m, mp);
06078                 }
06079                 sp_3072_mont_reduce_54(r, m, mp);
06080                 mp = sp_3072_cmp_54(r, m);
06081                 sp_3072_cond_sub_54(r, r, m, (mp < 0) - 1);
06082             }
06083         }
06084     }
06085 
06086     if (err == MP_OKAY) {
06087         sp_3072_to_bin(r, out);
06088         *outLen = 384;
06089     }
06090 
06091 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06092     if (d != NULL)
06093         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06094 #endif
06095 
06096     return err;
06097 #endif /* WOLFSSL_SP_SMALL */
06098 }
06099 
06100 /* RSA private key operation.
06101  *
06102  * in      Array of bytes representing the number to exponentiate, base.
06103  * inLen   Number of bytes in base.
06104  * dm      Private exponent.
06105  * pm      First prime.
06106  * qm      Second prime.
06107  * dpm     First prime's CRT exponent.
06108  * dqm     Second prime's CRT exponent.
06109  * qim     Inverse of second prime mod p.
06110  * mm      Modulus.
06111  * out     Buffer to hold big-endian bytes of exponentiation result.
06112  *         Must be at least 384 bytes long.
06113  * outLen  Number of bytes in result.
06114  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
06115  * an array is too long and MEMORY_E when dynamic memory allocation fails.
06116  */
06117 int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
06118     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
06119     byte* out, word32* outLen)
06120 {
06121 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
06122 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06123     sp_digit* a;
06124     sp_digit* d = NULL;
06125     sp_digit* m;
06126     sp_digit* r;
06127     int err = MP_OKAY;
06128 
06129     (void)pm;
06130     (void)qm;
06131     (void)dpm;
06132     (void)dqm;
06133     (void)qim;
06134 
06135     if (*outLen < 384)
06136         err = MP_TO_E;
06137     if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
06138                                                      mp_count_bits(mm) != 3072))
06139         err = MP_READ_E;
06140 
06141     if (err == MP_OKAY) {
06142         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 54 * 4, NULL,
06143                                DYNAMIC_TYPE_TMP_BUFFER);
06144         if (d == NULL)
06145             err = MEMORY_E;
06146     }
06147     if (err == MP_OKAY) {
06148         a = d + 54;
06149         m = a + 54;
06150         r = a;
06151 
06152         sp_3072_from_bin(a, 54, in, inLen);
06153         sp_3072_from_mp(d, 54, dm);
06154         sp_3072_from_mp(m, 54, mm);
06155         err = sp_3072_mod_exp_54(r, a, d, 3072, m, 0);
06156     }
06157     if (err == MP_OKAY) {
06158         sp_3072_to_bin(r, out);
06159         *outLen = 384;
06160     }
06161 
06162     if (d != NULL) {
06163         XMEMSET(d, 0, sizeof(sp_digit) * 54);
06164         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06165     }
06166 
06167     return err;
06168 #else
06169     sp_digit a[108], d[54], m[54];
06170     sp_digit* r = a;
06171     int err = MP_OKAY;
06172 
06173     (void)pm;
06174     (void)qm;
06175     (void)dpm;
06176     (void)dqm;
06177     (void)qim;
06178 
06179     if (*outLen < 384)
06180         err = MP_TO_E;
06181     if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
06182                                                      mp_count_bits(mm) != 3072))
06183         err = MP_READ_E;
06184 
06185     if (err == MP_OKAY) {
06186         sp_3072_from_bin(a, 54, in, inLen);
06187         sp_3072_from_mp(d, 54, dm);
06188         sp_3072_from_mp(m, 54, mm);
06189         err = sp_3072_mod_exp_54(r, a, d, 3072, m, 0);
06190     }
06191 
06192     if (err == MP_OKAY) {
06193         sp_3072_to_bin(r, out);
06194         *outLen = 384;
06195     }
06196 
06197     XMEMSET(d, 0, sizeof(sp_digit) * 54);
06198 
06199     return err;
06200 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
06201 #else
06202 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06203     sp_digit* t = NULL;
06204     sp_digit* a;
06205     sp_digit* p;
06206     sp_digit* q;
06207     sp_digit* dp;
06208     sp_digit* dq;
06209     sp_digit* qi;
06210     sp_digit* tmp;
06211     sp_digit* tmpa;
06212     sp_digit* tmpb;
06213     sp_digit* r;
06214     int err = MP_OKAY;
06215 
06216     (void)dm;
06217     (void)mm;
06218 
06219     if (*outLen < 384)
06220         err = MP_TO_E;
06221     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
06222         err = MP_READ_E;
06223 
06224     if (err == MP_OKAY) {
06225         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 27 * 11, NULL,
06226                                DYNAMIC_TYPE_TMP_BUFFER);
06227         if (t == NULL)
06228             err = MEMORY_E;
06229     }
06230     if (err == MP_OKAY) {
06231         a = t;
06232         p = a + 54 * 2;
06233         q = p + 27;
06234         qi = dq = dp = q + 27;
06235         tmpa = qi + 27;
06236         tmpb = tmpa + 54;
06237 
06238         tmp = t;
06239         r = tmp + 54;
06240 
06241         sp_3072_from_bin(a, 54, in, inLen);
06242         sp_3072_from_mp(p, 27, pm);
06243         sp_3072_from_mp(q, 27, qm);
06244         sp_3072_from_mp(dp, 27, dpm);
06245         err = sp_3072_mod_exp_27(tmpa, a, dp, 1536, p, 1);
06246     }
06247     if (err == MP_OKAY) {
06248         sp_3072_from_mp(dq, 27, dqm);
06249         err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1);
06250     }
06251     if (err == MP_OKAY) {
06252         sp_3072_sub_27(tmpa, tmpa, tmpb);
06253         sp_3072_mask_27(tmp, p, tmpa[26] >> 63);
06254         sp_3072_add_27(tmpa, tmpa, tmp);
06255 
06256         sp_3072_from_mp(qi, 27, qim);
06257         sp_3072_mul_27(tmpa, tmpa, qi);
06258         err = sp_3072_mod_27(tmpa, tmpa, p);
06259     }
06260 
06261     if (err == MP_OKAY) {
06262         sp_3072_mul_27(tmpa, q, tmpa);
06263         sp_3072_add_54(r, tmpb, tmpa);
06264         sp_3072_norm_54(r);
06265 
06266         sp_3072_to_bin(r, out);
06267         *outLen = 384;
06268     }
06269 
06270     if (t != NULL) {
06271         XMEMSET(t, 0, sizeof(sp_digit) * 27 * 11);
06272         XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06273     }
06274 
06275     return err;
06276 #else
06277     sp_digit a[54 * 2];
06278     sp_digit p[27], q[27], dp[27], dq[27], qi[27];
06279     sp_digit tmp[54], tmpa[54], tmpb[54];
06280     sp_digit* r = a;
06281     int err = MP_OKAY;
06282 
06283     (void)dm;
06284     (void)mm;
06285 
06286     if (*outLen < 384)
06287         err = MP_TO_E;
06288     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
06289         err = MP_READ_E;
06290 
06291     if (err == MP_OKAY) {
06292         sp_3072_from_bin(a, 54, in, inLen);
06293         sp_3072_from_mp(p, 27, pm);
06294         sp_3072_from_mp(q, 27, qm);
06295         sp_3072_from_mp(dp, 27, dpm);
06296         sp_3072_from_mp(dq, 27, dqm);
06297         sp_3072_from_mp(qi, 27, qim);
06298 
06299         err = sp_3072_mod_exp_27(tmpa, a, dp, 1536, p, 1);
06300     }
06301     if (err == MP_OKAY)
06302         err = sp_3072_mod_exp_27(tmpb, a, dq, 1536, q, 1);
06303 
06304     if (err == MP_OKAY) {
06305         sp_3072_sub_27(tmpa, tmpa, tmpb);
06306         sp_3072_mask_27(tmp, p, tmpa[26] >> 63);
06307         sp_3072_add_27(tmpa, tmpa, tmp);
06308         sp_3072_mul_27(tmpa, tmpa, qi);
06309         err = sp_3072_mod_27(tmpa, tmpa, p);
06310     }
06311 
06312     if (err == MP_OKAY) {
06313         sp_3072_mul_27(tmpa, tmpa, q);
06314         sp_3072_add_54(r, tmpb, tmpa);
06315         sp_3072_norm_54(r);
06316 
06317         sp_3072_to_bin(r, out);
06318         *outLen = 384;
06319     }
06320 
06321     XMEMSET(tmpa, 0, sizeof(tmpa));
06322     XMEMSET(tmpb, 0, sizeof(tmpb));
06323     XMEMSET(p, 0, sizeof(p));
06324     XMEMSET(q, 0, sizeof(q));
06325     XMEMSET(dp, 0, sizeof(dp));
06326     XMEMSET(dq, 0, sizeof(dq));
06327     XMEMSET(qi, 0, sizeof(qi));
06328 
06329     return err;
06330 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
06331 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
06332 }
06333 
06334 #endif /* WOLFSSL_HAVE_SP_RSA */
06335 #ifdef WOLFSSL_HAVE_SP_DH
06336 /* Convert an array of sp_digit to an mp_int.
06337  *
06338  * a  A single precision integer.
06339  * r  A multi-precision integer.
06340  */
06341 static int sp_3072_to_mp(sp_digit* a, mp_int* r)
06342 {
06343     int err;
06344 
06345     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
06346     if (err == MP_OKAY) {
06347 #if DIGIT_BIT == 57
06348         XMEMCPY(r->dp, a, sizeof(sp_digit) * 54);
06349         r->used = 54;
06350         mp_clamp(r);
06351 #elif DIGIT_BIT < 57
06352         int i, j = 0, s = 0;
06353 
06354         r->dp[0] = 0;
06355         for (i = 0; i < 54; i++) {
06356             r->dp[j] |= a[i] << s;
06357             r->dp[j] &= (1l << DIGIT_BIT) - 1;
06358             s = DIGIT_BIT - s;
06359             r->dp[++j] = a[i] >> s;
06360             while (s + DIGIT_BIT <= 57) {
06361                 s += DIGIT_BIT;
06362                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
06363                 r->dp[++j] = a[i] >> s;
06364             }
06365             s = 57 - s;
06366         }
06367         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
06368         mp_clamp(r);
06369 #else
06370         int i, j = 0, s = 0;
06371 
06372         r->dp[0] = 0;
06373         for (i = 0; i < 54; i++) {
06374             r->dp[j] |= ((mp_digit)a[i]) << s;
06375             if (s + 57 >= DIGIT_BIT) {
06376     #if DIGIT_BIT < 64
06377                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
06378     #endif
06379                 s = DIGIT_BIT - s;
06380                 r->dp[++j] = a[i] >> s;
06381                 s = 57 - s;
06382             }
06383             else
06384                 s += 57;
06385         }
06386         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
06387         mp_clamp(r);
06388 #endif
06389     }
06390 
06391     return err;
06392 }
06393 
06394 /* Perform the modular exponentiation for Diffie-Hellman.
06395  *
06396  * base  Base. MP integer.
06397  * exp   Exponent. MP integer.
06398  * mod   Modulus. MP integer.
06399  * res   Result. MP integer.
06400  * returs 0 on success, MP_READ_E if there are too many bytes in an array
06401  * and MEMORY_E if memory allocation fails.
06402  */
06403 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
06404 {
06405 #ifdef WOLFSSL_SP_SMALL
06406     int err = MP_OKAY;
06407     sp_digit* d = NULL;
06408     sp_digit* b;
06409     sp_digit* e;
06410     sp_digit* m;
06411     sp_digit* r;
06412     int expBits = mp_count_bits(exp);
06413 
06414     if (mp_count_bits(base) > 3072 || expBits > 3072 ||
06415                                                    mp_count_bits(mod) != 3072) {
06416         err = MP_READ_E;
06417     }
06418 
06419     if (err == MP_OKAY) {
06420         d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
06421                                DYNAMIC_TYPE_TMP_BUFFER);
06422         if (d == NULL)
06423             err = MEMORY_E;
06424     }
06425 
06426     if (err == MP_OKAY) {
06427         b = d;
06428         e = b + 54 * 2;
06429         m = e + 54;
06430         r = b;
06431 
06432         sp_3072_from_mp(b, 54, base);
06433         sp_3072_from_mp(e, 54, exp);
06434         sp_3072_from_mp(m, 54, mod);
06435 
06436         err = sp_3072_mod_exp_54(r, b, e, mp_count_bits(exp), m, 0);
06437     }
06438 
06439     if (err == MP_OKAY) {
06440         err = sp_3072_to_mp(r, res);
06441     }
06442 
06443     if (d != NULL) {
06444         XMEMSET(e, 0, sizeof(sp_digit) * 54);
06445         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06446     }
06447     return err;
06448 #else
06449 #ifndef WOLFSSL_SMALL_STACK
06450     sp_digit bd[108], ed[54], md[54];
06451 #else
06452     sp_digit* d = NULL;
06453 #endif
06454     sp_digit* b;
06455     sp_digit* e;
06456     sp_digit* m;
06457     sp_digit* r;
06458     int err = MP_OKAY;
06459     int expBits = mp_count_bits(exp);
06460 
06461     if (mp_count_bits(base) > 3072 || expBits > 3072 ||
06462                                                    mp_count_bits(mod) != 3072) {
06463         err = MP_READ_E;
06464     }
06465 
06466 #ifdef WOLFSSL_SMALL_STACK
06467     if (err == MP_OKAY) {
06468         d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
06469                                DYNAMIC_TYPE_TMP_BUFFER);
06470         if (d == NULL)
06471             err = MEMORY_E;
06472     }
06473 
06474     if (err == MP_OKAY) {
06475         b = d;
06476         e = b + 54 * 2;
06477         m = e + 54;
06478         r = b;
06479     }
06480 #else
06481     r = b = bd;
06482     e = ed;
06483     m = md;
06484 #endif
06485 
06486     if (err == MP_OKAY) {
06487         sp_3072_from_mp(b, 54, base);
06488         sp_3072_from_mp(e, 54, exp);
06489         sp_3072_from_mp(m, 54, mod);
06490 
06491         err = sp_3072_mod_exp_54(r, b, e, expBits, m, 0);
06492     }
06493 
06494     if (err == MP_OKAY) {
06495         err = sp_3072_to_mp(r, res);
06496     }
06497 
06498     XMEMSET(e, 0, sizeof(sp_digit) * 54);
06499 
06500 #ifdef WOLFSSL_SMALL_STACK
06501     if (d != NULL)
06502         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06503 #endif
06504 
06505     return err;
06506 #endif
06507 }
06508 
06509 /* Perform the modular exponentiation for Diffie-Hellman.
06510  *
06511  * base     Base.
06512  * exp      Array of bytes that is the exponent.
06513  * expLen   Length of data, in bytes, in exponent.
06514  * mod      Modulus.
06515  * out      Buffer to hold big-endian bytes of exponentiation result.
06516  *          Must be at least 384 bytes long.
06517  * outLen   Length, in bytes, of exponentiation result.
06518  * returs 0 on success, MP_READ_E if there are too many bytes in an array
06519  * and MEMORY_E if memory allocation fails.
06520  */
06521 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
06522     mp_int* mod, byte* out, word32* outLen)
06523 {
06524 #ifdef WOLFSSL_SP_SMALL
06525     int err = MP_OKAY;
06526     sp_digit* d = NULL;
06527     sp_digit* b;
06528     sp_digit* e;
06529     sp_digit* m;
06530     sp_digit* r;
06531     word32 i;
06532 
06533     if (mp_count_bits(base) > 3072 || expLen > 384 ||
06534                                                    mp_count_bits(mod) != 3072) {
06535         err = MP_READ_E;
06536     }
06537 
06538     if (err == MP_OKAY) {
06539         d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
06540                                DYNAMIC_TYPE_TMP_BUFFER);
06541         if (d == NULL)
06542             err = MEMORY_E;
06543     }
06544 
06545     if (err == MP_OKAY) {
06546         b = d;
06547         e = b + 54 * 2;
06548         m = e + 54;
06549         r = b;
06550 
06551         sp_3072_from_mp(b, 54, base);
06552         sp_3072_from_bin(e, 54, exp, expLen);
06553         sp_3072_from_mp(m, 54, mod);
06554 
06555         err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
06556     }
06557 
06558     if (err == MP_OKAY) {
06559         sp_3072_to_bin(r, out);
06560         *outLen = 384;
06561         for (i=0; i<384 && out[i] == 0; i++) {
06562         }
06563         *outLen -= i;
06564         XMEMMOVE(out, out + i, *outLen);
06565     }
06566 
06567     if (d != NULL) {
06568         XMEMSET(e, 0, sizeof(sp_digit) * 54);
06569         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06570     }
06571     return err;
06572 #else
06573 #ifndef WOLFSSL_SMALL_STACK
06574     sp_digit bd[108], ed[54], md[54];
06575 #else
06576     sp_digit* d = NULL;
06577 #endif
06578     sp_digit* b;
06579     sp_digit* e;
06580     sp_digit* m;
06581     sp_digit* r;
06582     word32 i;
06583     int err = MP_OKAY;
06584 
06585     if (mp_count_bits(base) > 3072 || expLen > 384 ||
06586                                                    mp_count_bits(mod) != 3072) {
06587         err = MP_READ_E;
06588     }
06589 
06590 #ifdef WOLFSSL_SMALL_STACK
06591     if (err == MP_OKAY) {
06592         d = (sp_digit*)XMALLOC(sizeof(*d) * 54 * 4, NULL,
06593                                DYNAMIC_TYPE_TMP_BUFFER);
06594         if (d == NULL)
06595             err = MEMORY_E;
06596     }
06597 
06598     if (err == MP_OKAY) {
06599         b = d;
06600         e = b + 54 * 2;
06601         m = e + 54;
06602         r = b;
06603     }
06604 #else
06605     r = b = bd;
06606     e = ed;
06607     m = md;
06608 #endif
06609 
06610     if (err == MP_OKAY) {
06611         sp_3072_from_mp(b, 54, base);
06612         sp_3072_from_bin(e, 54, exp, expLen);
06613         sp_3072_from_mp(m, 54, mod);
06614 
06615         err = sp_3072_mod_exp_54(r, b, e, expLen * 8, m, 0);
06616     }
06617 
06618     if (err == MP_OKAY) {
06619         sp_3072_to_bin(r, out);
06620         *outLen = 384;
06621         for (i=0; i<384 && out[i] == 0; i++) {
06622         }
06623         *outLen -= i;
06624         XMEMMOVE(out, out + i, *outLen);
06625     }
06626 
06627     XMEMSET(e, 0, sizeof(sp_digit) * 54);
06628 
06629 #ifdef WOLFSSL_SMALL_STACK
06630     if (d != NULL)
06631         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06632 #endif
06633 
06634     return err;
06635 #endif
06636 }
06637 
06638 #endif /* WOLFSSL_HAVE_SP_DH */
06639 
06640 #endif /* WOLFSSL_SP_NO_3072 */
06641 
06642 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
06643 #ifdef WOLFSSL_HAVE_SP_ECC
06644 #ifndef WOLFSSL_SP_NO_256
06645 
06646 /* Point structure to use. */
06647 typedef struct sp_point {
06648     sp_digit x[2 * 5];
06649     sp_digit y[2 * 5];
06650     sp_digit z[2 * 5];
06651     int infinity;
06652 } sp_point;
06653 
06654 /* The modulus (prime) of the curve P256. */
06655 static sp_digit p256_mod[5] = {
06656     0xfffffffffffffl,0x00fffffffffffl,0x0000000000000l,0x0001000000000l,
06657     0x0ffffffff0000l
06658 };
06659 #ifndef WOLFSSL_SP_SMALL
06660 /* The Montogmery normalizer for modulus of the curve P256. */
06661 static sp_digit p256_norm_mod[5] = {
06662     0x0000000000001l,0xff00000000000l,0xfffffffffffffl,0xfffefffffffffl,
06663     0x000000000ffffl
06664 };
06665 #endif /* WOLFSSL_SP_SMALL */
06666 /* The Montogmery multiplier for modulus of the curve P256. */
06667 static sp_digit p256_mp_mod = 0x0000000000001;
06668 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
06669                                             defined(HAVE_ECC_VERIFY)
06670 /* The order of the curve P256. */
06671 static sp_digit p256_order[5] = {
06672     0x9cac2fc632551l,0xada7179e84f3bl,0xfffffffbce6fal,0x0000fffffffffl,
06673     0x0ffffffff0000l
06674 };
06675 #endif
06676 /* The order of the curve P256 minus 2. */
06677 static sp_digit p256_order2[5] = {
06678     0x9cac2fc63254fl,0xada7179e84f3bl,0xfffffffbce6fal,0x0000fffffffffl,
06679     0x0ffffffff0000l
06680 };
06681 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
06682 /* The Montogmery normalizer for order of the curve P256. */
06683 static sp_digit p256_norm_order[5] = {
06684     0x6353d039cdaafl,0x5258e8617b0c4l,0x0000000431905l,0xffff000000000l,
06685     0x000000000ffffl
06686 };
06687 #endif
06688 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
06689 /* The Montogmery multiplier for order of the curve P256. */
06690 static sp_digit p256_mp_order = 0x1c8aaee00bc4fl;
06691 #endif
06692 /* The base point of curve P256. */
06693 static sp_point p256_base = {
06694     /* X ordinate */
06695     {
06696         0x13945d898c296l,0x812deb33a0f4al,0x3a440f277037dl,0x4247f8bce6e56l,
06697         0x06b17d1f2e12cl
06698     },
06699     /* Y ordinate */
06700     {
06701         0x6406837bf51f5l,0x576b315ececbbl,0xc0f9e162bce33l,0x7f9b8ee7eb4a7l,
06702         0x04fe342e2fe1al
06703     },
06704     /* Z ordinate */
06705     {
06706         0x0000000000001l,0x0000000000000l,0x0000000000000l,0x0000000000000l,
06707         0x0000000000000l
06708     },
06709     /* infinity */
06710     0
06711 };
06712 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
06713 static sp_digit p256_b[5] = {
06714     0xe3c3e27d2604bl,0xb0cc53b0f63bcl,0x69886bc651d06l,0x93e7b3ebbd557l,
06715     0x05ac635d8aa3al
06716 };
06717 #endif
06718 
06719 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06720 /* Allocate memory for point and return error. */
06721 #define sp_ecc_point_new(heap, sp, p)                                   \
06722     ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
06723         MEMORY_E : MP_OKAY
06724 #else
06725 /* Set pointer to data and return no error. */
06726 #define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
06727 #endif
06728 
06729 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06730 /* If valid pointer then clear point data if requested and free data. */
06731 #define sp_ecc_point_free(p, clear, heap)     \
06732     do {                                      \
06733         if (p != NULL) {                      \
06734             if (clear)                        \
06735                 XMEMSET(p, 0, sizeof(*p));    \
06736             XFREE(p, heap, DYNAMIC_TYPE_ECC); \
06737         }                                     \
06738     }                                         \
06739     while (0)
06740 #else
06741 /* Clear point data if requested. */
06742 #define sp_ecc_point_free(p, clear, heap) \
06743     do {                                  \
06744         if (clear)                        \
06745             XMEMSET(p, 0, sizeof(*p));    \
06746     }                                     \
06747     while (0)
06748 #endif
06749 
06750 /* Multiply a number by Montogmery normalizer mod modulus (prime).
06751  *
06752  * r  The resulting Montgomery form number.
06753  * a  The number to convert.
06754  * m  The modulus (prime).
06755  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
06756  */
06757 static int sp_256_mod_mul_norm_5(sp_digit* r, sp_digit* a, sp_digit* m)
06758 {
06759 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06760     int64_t* td;
06761 #else
06762     int64_t td[8];
06763     int64_t a32d[8];
06764 #endif
06765     int64_t* t;
06766     int64_t* a32;
06767     int64_t o;
06768     int err = MP_OKAY;
06769 
06770     (void)m;
06771 
06772 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06773     td = XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
06774     if (td != NULL) {
06775         t = td;
06776         a32 = td + 8;
06777     }
06778     else
06779         err = MEMORY_E;
06780 #else
06781     t = td;
06782     a32 = a32d;
06783 #endif
06784 
06785     if (err == MP_OKAY) {
06786         a32[0] = (sp_digit)(a[0]) & 0xffffffff;
06787         a32[1] = (sp_digit)(a[0] >> 32);
06788         a32[1] |= a[1] << 20;
06789         a32[1] &= 0xffffffff;
06790         a32[2] = (sp_digit)(a[1] >> 12) & 0xffffffff;
06791         a32[3] = (sp_digit)(a[1] >> 44);
06792         a32[3] |= a[2] << 8;
06793         a32[3] &= 0xffffffff;
06794         a32[4] = (sp_digit)(a[2] >> 24);
06795         a32[4] |= a[3] << 28;
06796         a32[4] &= 0xffffffff;
06797         a32[5] = (sp_digit)(a[3] >> 4) & 0xffffffff;
06798         a32[6] = (sp_digit)(a[3] >> 36);
06799         a32[6] |= a[4] << 16;
06800         a32[6] &= 0xffffffff;
06801         a32[7] = (sp_digit)(a[4] >> 16) & 0xffffffff;
06802 
06803         /*  1  1  0 -1 -1 -1 -1  0 */
06804         t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
06805         /*  0  1  1  0 -1 -1 -1 -1 */
06806         t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
06807         /*  0  0  1  1  0 -1 -1 -1 */
06808         t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
06809         /* -1 -1  0  2  2  1  0 -1 */
06810         t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
06811         /*  0 -1 -1  0  2  2  1  0 */
06812         t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
06813         /*  0  0 -1 -1  0  2  2  1 */
06814         t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
06815         /* -1 -1  0  0  0  1  3  2 */
06816         t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
06817         /*  1  0 -1 -1 -1 -1  0  3 */
06818         t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
06819 
06820         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
06821         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
06822         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
06823         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
06824         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
06825         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
06826         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
06827         o     = t[7] >> 32; t[7] &= 0xffffffff;
06828         t[0] += o;
06829         t[3] -= o;
06830         t[6] -= o;
06831         t[7] += o;
06832         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
06833         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
06834         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
06835         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
06836         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
06837         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
06838         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
06839 
06840         r[0] = t[0];
06841         r[0] |= t[1] << 32;
06842         r[0] &= 0xfffffffffffffl;
06843         r[1] = (sp_digit)(t[1] >> 20);
06844         r[1] |= t[2] << 12;
06845         r[1] |= t[3] << 44;
06846         r[1] &= 0xfffffffffffffl;
06847         r[2] = (sp_digit)(t[3] >> 8);
06848         r[2] |= t[4] << 24;
06849         r[2] &= 0xfffffffffffffl;
06850         r[3] = (sp_digit)(t[4] >> 28);
06851         r[3] |= t[5] << 4;
06852         r[3] |= t[6] << 36;
06853         r[3] &= 0xfffffffffffffl;
06854         r[4] = (sp_digit)(t[6] >> 16);
06855         r[4] |= t[7] << 16;
06856     }
06857 
06858 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06859     if (td != NULL)
06860         XFREE(td, NULL, DYNAMIC_TYPE_ECC);
06861 #endif
06862 
06863     return err;
06864 }
06865 
06866 /* Convert an mp_int to an array of sp_digit.
06867  *
06868  * r  A single precision integer.
06869  * a  A multi-precision integer.
06870  */
06871 static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
06872 {
06873 #if DIGIT_BIT == 52
06874     int j;
06875 
06876     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
06877 
06878     for (j = a->used; j < max; j++)
06879         r[j] = 0;
06880 #elif DIGIT_BIT > 52
06881     int i, j = 0, s = 0;
06882 
06883     r[0] = 0;
06884     for (i = 0; i < a->used && j < max; i++) {
06885         r[j] |= a->dp[i] << s;
06886         r[j] &= 0xfffffffffffffl;
06887         s = 52 - s;
06888         if (j + 1 >= max)
06889             break;
06890         r[++j] = a->dp[i] >> s;
06891         while (s + 52 <= DIGIT_BIT) {
06892             s += 52;
06893             r[j] &= 0xfffffffffffffl;
06894             if (j + 1 >= max)
06895                 break;
06896             if (s < DIGIT_BIT)
06897                 r[++j] = a->dp[i] >> s;
06898             else
06899                 r[++j] = 0;
06900         }
06901         s = DIGIT_BIT - s;
06902     }
06903 
06904     for (j++; j < max; j++)
06905         r[j] = 0;
06906 #else
06907     int i, j = 0, s = 0;
06908 
06909     r[0] = 0;
06910     for (i = 0; i < a->used && j < max; i++) {
06911         r[j] |= ((sp_digit)a->dp[i]) << s;
06912         if (s + DIGIT_BIT >= 52) {
06913             r[j] &= 0xfffffffffffffl;
06914             if (j + 1 >= max)
06915                 break;
06916             s = 52 - s;
06917             if (s == DIGIT_BIT) {
06918                 r[++j] = 0;
06919                 s = 0;
06920             }
06921             else {
06922                 r[++j] = a->dp[i] >> s;
06923                 s = DIGIT_BIT - s;
06924             }
06925         }
06926         else
06927             s += DIGIT_BIT;
06928     }
06929 
06930     for (j++; j < max; j++)
06931         r[j] = 0;
06932 #endif
06933 }
06934 
06935 /* Convert a point of type ecc_point to type sp_point.
06936  *
06937  * p   Point of type sp_point (result).
06938  * pm  Point of type ecc_point.
06939  */
06940 static void sp_256_point_from_ecc_point_5(sp_point* p, ecc_point* pm)
06941 {
06942     XMEMSET(p->x, 0, sizeof(p->x));
06943     XMEMSET(p->y, 0, sizeof(p->y));
06944     XMEMSET(p->z, 0, sizeof(p->z));
06945     sp_256_from_mp(p->x, 5, pm->x);
06946     sp_256_from_mp(p->y, 5, pm->y);
06947     sp_256_from_mp(p->z, 5, pm->z);
06948     p->infinity = 0;
06949 }
06950 
06951 /* Convert an array of sp_digit to an mp_int.
06952  *
06953  * a  A single precision integer.
06954  * r  A multi-precision integer.
06955  */
06956 static int sp_256_to_mp(sp_digit* a, mp_int* r)
06957 {
06958     int err;
06959 
06960     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
06961     if (err == MP_OKAY) {
06962 #if DIGIT_BIT == 52
06963         XMEMCPY(r->dp, a, sizeof(sp_digit) * 5);
06964         r->used = 5;
06965         mp_clamp(r);
06966 #elif DIGIT_BIT < 52
06967         int i, j = 0, s = 0;
06968 
06969         r->dp[0] = 0;
06970         for (i = 0; i < 5; i++) {
06971             r->dp[j] |= a[i] << s;
06972             r->dp[j] &= (1l << DIGIT_BIT) - 1;
06973             s = DIGIT_BIT - s;
06974             r->dp[++j] = a[i] >> s;
06975             while (s + DIGIT_BIT <= 52) {
06976                 s += DIGIT_BIT;
06977                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
06978                 r->dp[++j] = a[i] >> s;
06979             }
06980             s = 52 - s;
06981         }
06982         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
06983         mp_clamp(r);
06984 #else
06985         int i, j = 0, s = 0;
06986 
06987         r->dp[0] = 0;
06988         for (i = 0; i < 5; i++) {
06989             r->dp[j] |= ((mp_digit)a[i]) << s;
06990             if (s + 52 >= DIGIT_BIT) {
06991     #if DIGIT_BIT < 64
06992                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
06993     #endif
06994                 s = DIGIT_BIT - s;
06995                 r->dp[++j] = a[i] >> s;
06996                 s = 52 - s;
06997             }
06998             else
06999                 s += 52;
07000         }
07001         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
07002         mp_clamp(r);
07003 #endif
07004     }
07005 
07006     return err;
07007 }
07008 
07009 /* Convert a point of type sp_point to type ecc_point.
07010  *
07011  * p   Point of type sp_point.
07012  * pm  Point of type ecc_point (result).
07013  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
07014  * MP_OKAY.
07015  */
07016 static int sp_256_point_to_ecc_point_5(sp_point* p, ecc_point* pm)
07017 {
07018     int err;
07019 
07020     err = sp_256_to_mp(p->x, pm->x);
07021     if (err == MP_OKAY)
07022         err = sp_256_to_mp(p->y, pm->y);
07023     if (err == MP_OKAY)
07024         err = sp_256_to_mp(p->z, pm->z);
07025 
07026     return err;
07027 }
07028 
07029 /* Compare a with b in constant time.
07030  *
07031  * a  A single precision integer.
07032  * b  A single precision integer.
07033  * return -ve, 0 or +ve if a is less than, equal to or greater than b
07034  * respectively.
07035  */
07036 static sp_digit sp_256_cmp_5(const sp_digit* a, const sp_digit* b)
07037 {
07038     sp_digit r = 0;
07039 #ifdef WOLFSSL_SP_SMALL
07040     int i;
07041 
07042     for (i=4; i>=0; i--)
07043         r |= (a[i] - b[i]) & (0 - !r);
07044 #else
07045     r |= (a[ 4] - b[ 4]) & (0 - !r);
07046     r |= (a[ 3] - b[ 3]) & (0 - !r);
07047     r |= (a[ 2] - b[ 2]) & (0 - !r);
07048     r |= (a[ 1] - b[ 1]) & (0 - !r);
07049     r |= (a[ 0] - b[ 0]) & (0 - !r);
07050 #endif /* WOLFSSL_SP_SMALL */
07051 
07052     return r;
07053 }
07054 
07055 /* Normalize the values in each word to 52.
07056  *
07057  * a  Array of sp_digit to normalize.
07058  */
07059 static void sp_256_norm_5(sp_digit* a)
07060 {
07061 #ifdef WOLFSSL_SP_SMALL
07062     int i;
07063     for (i = 0; i < 4; i++) {
07064         a[i+1] += a[i] >> 52;
07065         a[i] &= 0xfffffffffffffl;
07066     }
07067 #else
07068     a[1] += a[0] >> 52; a[0] &= 0xfffffffffffffl;
07069     a[2] += a[1] >> 52; a[1] &= 0xfffffffffffffl;
07070     a[3] += a[2] >> 52; a[2] &= 0xfffffffffffffl;
07071     a[4] += a[3] >> 52; a[3] &= 0xfffffffffffffl;
07072 #endif
07073 }
07074 
07075 /* Conditionally subtract b from a using the mask m.
07076  * m is -1 to subtract and 0 when not.
07077  *
07078  * r  A single precision number representing condition subtract result.
07079  * a  A single precision number to subtract from.
07080  * b  A single precision number to subtract.
07081  * m  Mask value to apply.
07082  */
07083 static void sp_256_cond_sub_5(sp_digit* r, const sp_digit* a,
07084         const sp_digit* b, const sp_digit m)
07085 {
07086 #ifdef WOLFSSL_SP_SMALL
07087     int i;
07088 
07089     for (i = 0; i < 5; i++)
07090         r[i] = a[i] - (b[i] & m);
07091 #else
07092     r[ 0] = a[ 0] - (b[ 0] & m);
07093     r[ 1] = a[ 1] - (b[ 1] & m);
07094     r[ 2] = a[ 2] - (b[ 2] & m);
07095     r[ 3] = a[ 3] - (b[ 3] & m);
07096     r[ 4] = a[ 4] - (b[ 4] & m);
07097 #endif /* WOLFSSL_SP_SMALL */
07098 }
07099 
07100 /* Mul a by scalar b and add into r. (r += a * b)
07101  *
07102  * r  A single precision integer.
07103  * a  A single precision integer.
07104  * b  A scalar.
07105  */
07106 SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a,
07107         const sp_digit b)
07108 {
07109 #ifdef WOLFSSL_SP_SMALL
07110     int128_t tb = b;
07111     int128_t t = 0;
07112     int i;
07113 
07114     for (i = 0; i < 5; i++) {
07115         t += (tb * a[i]) + r[i];
07116         r[i] = t & 0xfffffffffffffl;
07117         t >>= 52;
07118     }
07119     r[5] += t;
07120 #else
07121     int128_t tb = b;
07122     int128_t t[5];
07123 
07124     t[ 0] = tb * a[ 0];
07125     t[ 1] = tb * a[ 1];
07126     t[ 2] = tb * a[ 2];
07127     t[ 3] = tb * a[ 3];
07128     t[ 4] = tb * a[ 4];
07129     r[ 0] +=                 (t[ 0] & 0xfffffffffffffl);
07130     r[ 1] += (t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffl);
07131     r[ 2] += (t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffl);
07132     r[ 3] += (t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffl);
07133     r[ 4] += (t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffl);
07134     r[ 5] +=  t[ 4] >> 52;
07135 #endif /* WOLFSSL_SP_SMALL */
07136 }
07137 
07138 /* Shift the result in the high 256 bits down to the bottom.
07139  *
07140  * r  A single precision number.
07141  * a  A single precision number.
07142  */
07143 static void sp_256_mont_shift_5(sp_digit* r, const sp_digit* a)
07144 {
07145 #ifdef WOLFSSL_SP_SMALL
07146     int i;
07147     word64 n;
07148 
07149     n = a[4] >> 48;
07150     for (i = 0; i < 4; i++) {
07151         n += a[5 + i] << 4;
07152         r[i] = n & 0xfffffffffffffl;
07153         n >>= 52;
07154     }
07155     n += a[9] << 4;
07156     r[4] = n;
07157 #else
07158     word64 n;
07159 
07160     n  = a[4] >> 48;
07161     n += a[ 5] << 4; r[ 0] = n & 0xfffffffffffffl; n >>= 52;
07162     n += a[ 6] << 4; r[ 1] = n & 0xfffffffffffffl; n >>= 52;
07163     n += a[ 7] << 4; r[ 2] = n & 0xfffffffffffffl; n >>= 52;
07164     n += a[ 8] << 4; r[ 3] = n & 0xfffffffffffffl; n >>= 52;
07165     n += a[ 9] << 4; r[ 4] = n;
07166 #endif /* WOLFSSL_SP_SMALL */
07167     XMEMSET(&r[5], 0, sizeof(*r) * 5);
07168 }
07169 
07170 /* Reduce the number back to 256 bits using Montgomery reduction.
07171  *
07172  * a   A single precision number to reduce in place.
07173  * m   The single precision number representing the modulus.
07174  * mp  The digit representing the negative inverse of m mod 2^n.
07175  */
07176 static void sp_256_mont_reduce_5(sp_digit* a, sp_digit* m, sp_digit mp)
07177 {
07178     int i;
07179     sp_digit mu;
07180 
07181     if (mp != 1) {
07182         for (i=0; i<4; i++) {
07183             mu = (a[i] * mp) & 0xfffffffffffffl;
07184             sp_256_mul_add_5(a+i, m, mu);
07185             a[i+1] += a[i] >> 52;
07186         }
07187         mu = (a[i] * mp) & 0xffffffffffffl;
07188         sp_256_mul_add_5(a+i, m, mu);
07189         a[i+1] += a[i] >> 52;
07190         a[i] &= 0xfffffffffffffl;
07191     }
07192     else {
07193         for (i=0; i<4; i++) {
07194             mu = a[i] & 0xfffffffffffffl;
07195             sp_256_mul_add_5(a+i, p256_mod, mu);
07196             a[i+1] += a[i] >> 52;
07197         }
07198         mu = a[i] & 0xffffffffffffl;
07199         sp_256_mul_add_5(a+i, p256_mod, mu);
07200         a[i+1] += a[i] >> 52;
07201         a[i] &= 0xfffffffffffffl;
07202     }
07203 
07204     sp_256_mont_shift_5(a, a);
07205     sp_256_cond_sub_5(a, a, m, 0 - ((a[4] >> 48) > 0));
07206     sp_256_norm_5(a);
07207 }
07208 
07209 #ifdef WOLFSSL_SP_SMALL
07210 /* Multiply a and b into r. (r = a * b)
07211  *
07212  * r  A single precision integer.
07213  * a  A single precision integer.
07214  * b  A single precision integer.
07215  */
07216 SP_NOINLINE static void sp_256_mul_5(sp_digit* r, const sp_digit* a,
07217     const sp_digit* b)
07218 {
07219     int i, j, k;
07220     int128_t c;
07221 
07222     c = ((int128_t)a[4]) * b[4];
07223     r[9] = (sp_digit)(c >> 52);
07224     c = (c & 0xfffffffffffffl) << 52;
07225     for (k = 7; k >= 0; k--) {
07226         for (i = 4; i >= 0; i--) {
07227             j = k - i;
07228             if (j >= 5)
07229                 break;
07230             if (j < 0)
07231                 continue;
07232 
07233             c += ((int128_t)a[i]) * b[j];
07234         }
07235         r[k + 2] += c >> 104;
07236         r[k + 1] = (c >> 52) & 0xfffffffffffffl;
07237         c = (c & 0xfffffffffffffl) << 52;
07238     }
07239     r[0] = (sp_digit)(c >> 52);
07240 }
07241 
07242 #else
07243 /* Multiply a and b into r. (r = a * b)
07244  *
07245  * r  A single precision integer.
07246  * a  A single precision integer.
07247  * b  A single precision integer.
07248  */
07249 SP_NOINLINE static void sp_256_mul_5(sp_digit* r, const sp_digit* a,
07250     const sp_digit* b)
07251 {
07252     int128_t t0   = ((int128_t)a[ 0]) * b[ 0];
07253     int128_t t1   = ((int128_t)a[ 0]) * b[ 1]
07254                  + ((int128_t)a[ 1]) * b[ 0];
07255     int128_t t2   = ((int128_t)a[ 0]) * b[ 2]
07256                  + ((int128_t)a[ 1]) * b[ 1]
07257                  + ((int128_t)a[ 2]) * b[ 0];
07258     int128_t t3   = ((int128_t)a[ 0]) * b[ 3]
07259                  + ((int128_t)a[ 1]) * b[ 2]
07260                  + ((int128_t)a[ 2]) * b[ 1]
07261                  + ((int128_t)a[ 3]) * b[ 0];
07262     int128_t t4   = ((int128_t)a[ 0]) * b[ 4]
07263                  + ((int128_t)a[ 1]) * b[ 3]
07264                  + ((int128_t)a[ 2]) * b[ 2]
07265                  + ((int128_t)a[ 3]) * b[ 1]
07266                  + ((int128_t)a[ 4]) * b[ 0];
07267     int128_t t5   = ((int128_t)a[ 1]) * b[ 4]
07268                  + ((int128_t)a[ 2]) * b[ 3]
07269                  + ((int128_t)a[ 3]) * b[ 2]
07270                  + ((int128_t)a[ 4]) * b[ 1];
07271     int128_t t6   = ((int128_t)a[ 2]) * b[ 4]
07272                  + ((int128_t)a[ 3]) * b[ 3]
07273                  + ((int128_t)a[ 4]) * b[ 2];
07274     int128_t t7   = ((int128_t)a[ 3]) * b[ 4]
07275                  + ((int128_t)a[ 4]) * b[ 3];
07276     int128_t t8   = ((int128_t)a[ 4]) * b[ 4];
07277 
07278     t1   += t0  >> 52; r[ 0] = t0  & 0xfffffffffffffl;
07279     t2   += t1  >> 52; r[ 1] = t1  & 0xfffffffffffffl;
07280     t3   += t2  >> 52; r[ 2] = t2  & 0xfffffffffffffl;
07281     t4   += t3  >> 52; r[ 3] = t3  & 0xfffffffffffffl;
07282     t5   += t4  >> 52; r[ 4] = t4  & 0xfffffffffffffl;
07283     t6   += t5  >> 52; r[ 5] = t5  & 0xfffffffffffffl;
07284     t7   += t6  >> 52; r[ 6] = t6  & 0xfffffffffffffl;
07285     t8   += t7  >> 52; r[ 7] = t7  & 0xfffffffffffffl;
07286     r[9] = (sp_digit)(t8 >> 52);
07287                        r[8] = t8 & 0xfffffffffffffl;
07288 }
07289 
07290 #endif /* WOLFSSL_SP_SMALL */
07291 /* Multiply two Montogmery form numbers mod the modulus (prime).
07292  * (r = a * b mod m)
07293  *
07294  * r   Result of multiplication.
07295  * a   First number to multiply in Montogmery form.
07296  * b   Second number to multiply in Montogmery form.
07297  * m   Modulus (prime).
07298  * mp  Montogmery mulitplier.
07299  */
07300 static void sp_256_mont_mul_5(sp_digit* r, sp_digit* a, sp_digit* b,
07301         sp_digit* m, sp_digit mp)
07302 {
07303     sp_256_mul_5(r, a, b);
07304     sp_256_mont_reduce_5(r, m, mp);
07305 }
07306 
07307 #ifdef WOLFSSL_SP_SMALL
07308 /* Square a and put result in r. (r = a * a)
07309  *
07310  * r  A single precision integer.
07311  * a  A single precision integer.
07312  */
07313 SP_NOINLINE static void sp_256_sqr_5(sp_digit* r, const sp_digit* a)
07314 {
07315     int i, j, k;
07316     int128_t c;
07317 
07318     c = ((int128_t)a[4]) * a[4];
07319     r[9] = (sp_digit)(c >> 52);
07320     c = (c & 0xfffffffffffffl) << 52;
07321     for (k = 7; k >= 0; k--) {
07322         for (i = 4; i >= 0; i--) {
07323             j = k - i;
07324             if (j >= 5 || i <= j)
07325                 break;
07326             if (j < 0)
07327                 continue;
07328 
07329             c += ((int128_t)a[i]) * a[j] * 2;
07330         }
07331         if (i == j)
07332            c += ((int128_t)a[i]) * a[i];
07333 
07334         r[k + 2] += c >> 104;
07335         r[k + 1] = (c >> 52) & 0xfffffffffffffl;
07336         c = (c & 0xfffffffffffffl) << 52;
07337     }
07338     r[0] = (sp_digit)(c >> 52);
07339 }
07340 
07341 #else
07342 /* Square a and put result in r. (r = a * a)
07343  *
07344  * r  A single precision integer.
07345  * a  A single precision integer.
07346  */
07347 SP_NOINLINE static void sp_256_sqr_5(sp_digit* r, const sp_digit* a)
07348 {
07349     int128_t t0   =  ((int128_t)a[ 0]) * a[ 0];
07350     int128_t t1   = (((int128_t)a[ 0]) * a[ 1]) * 2;
07351     int128_t t2   = (((int128_t)a[ 0]) * a[ 2]) * 2
07352                  +  ((int128_t)a[ 1]) * a[ 1];
07353     int128_t t3   = (((int128_t)a[ 0]) * a[ 3]
07354                  +  ((int128_t)a[ 1]) * a[ 2]) * 2;
07355     int128_t t4   = (((int128_t)a[ 0]) * a[ 4]
07356                  +  ((int128_t)a[ 1]) * a[ 3]) * 2
07357                  +  ((int128_t)a[ 2]) * a[ 2];
07358     int128_t t5   = (((int128_t)a[ 1]) * a[ 4]
07359                  +  ((int128_t)a[ 2]) * a[ 3]) * 2;
07360     int128_t t6   = (((int128_t)a[ 2]) * a[ 4]) * 2
07361                  +  ((int128_t)a[ 3]) * a[ 3];
07362     int128_t t7   = (((int128_t)a[ 3]) * a[ 4]) * 2;
07363     int128_t t8   =  ((int128_t)a[ 4]) * a[ 4];
07364 
07365     t1   += t0  >> 52; r[ 0] = t0  & 0xfffffffffffffl;
07366     t2   += t1  >> 52; r[ 1] = t1  & 0xfffffffffffffl;
07367     t3   += t2  >> 52; r[ 2] = t2  & 0xfffffffffffffl;
07368     t4   += t3  >> 52; r[ 3] = t3  & 0xfffffffffffffl;
07369     t5   += t4  >> 52; r[ 4] = t4  & 0xfffffffffffffl;
07370     t6   += t5  >> 52; r[ 5] = t5  & 0xfffffffffffffl;
07371     t7   += t6  >> 52; r[ 6] = t6  & 0xfffffffffffffl;
07372     t8   += t7  >> 52; r[ 7] = t7  & 0xfffffffffffffl;
07373     r[9] = (sp_digit)(t8 >> 52);
07374                        r[8] = t8 & 0xfffffffffffffl;
07375 }
07376 
07377 #endif /* WOLFSSL_SP_SMALL */
07378 /* Square the Montgomery form number. (r = a * a mod m)
07379  *
07380  * r   Result of squaring.
07381  * a   Number to square in Montogmery form.
07382  * m   Modulus (prime).
07383  * mp  Montogmery mulitplier.
07384  */
07385 static void sp_256_mont_sqr_5(sp_digit* r, sp_digit* a, sp_digit* m,
07386         sp_digit mp)
07387 {
07388     sp_256_sqr_5(r, a);
07389     sp_256_mont_reduce_5(r, m, mp);
07390 }
07391 
07392 #ifndef WOLFSSL_SP_SMALL
07393 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
07394  *
07395  * r   Result of squaring.
07396  * a   Number to square in Montogmery form.
07397  * n   Number of times to square.
07398  * m   Modulus (prime).
07399  * mp  Montogmery mulitplier.
07400  */
07401 static void sp_256_mont_sqr_n_5(sp_digit* r, sp_digit* a, int n,
07402         sp_digit* m, sp_digit mp)
07403 {
07404     sp_256_mont_sqr_5(r, a, m, mp);
07405     for (; n > 1; n--)
07406         sp_256_mont_sqr_5(r, r, m, mp);
07407 }
07408 
07409 #else
07410 /* Mod-2 for the P256 curve. */
07411 static const uint64_t p256_mod_2[4] = {
07412     0xfffffffffffffffd,0x00000000ffffffff,0x0000000000000000,
07413     0xffffffff00000001
07414 };
07415 #endif /* !WOLFSSL_SP_SMALL */
07416 
07417 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
07418  * P256 curve. (r = 1 / a mod m)
07419  *
07420  * r   Inverse result.
07421  * a   Number to invert.
07422  * td  Temporary data.
07423  */
07424 static void sp_256_mont_inv_5(sp_digit* r, sp_digit* a, sp_digit* td)
07425 {
07426 #ifdef WOLFSSL_SP_SMALL
07427     sp_digit* t = td;
07428     int i;
07429 
07430     XMEMCPY(t, a, sizeof(sp_digit) * 5);
07431     for (i=254; i>=0; i--) {
07432         sp_256_mont_sqr_5(t, t, p256_mod, p256_mp_mod);
07433         if (p256_mod_2[i / 64] & ((sp_digit)1 << (i % 64)))
07434             sp_256_mont_mul_5(t, t, a, p256_mod, p256_mp_mod);
07435     }
07436     XMEMCPY(r, t, sizeof(sp_digit) * 5);
07437 #else
07438     sp_digit* t = td;
07439     sp_digit* t2 = td + 2 * 5;
07440     sp_digit* t3 = td + 4 * 5;
07441 
07442     /* t = a^2 */
07443     sp_256_mont_sqr_5(t, a, p256_mod, p256_mp_mod);
07444     /* t = a^3 = t * a */
07445     sp_256_mont_mul_5(t, t, a, p256_mod, p256_mp_mod);
07446     /* t2= a^c = t ^ 2 ^ 2 */
07447     sp_256_mont_sqr_n_5(t2, t, 2, p256_mod, p256_mp_mod);
07448     /* t3= a^d = t2 * a */
07449     sp_256_mont_mul_5(t3, t2, a, p256_mod, p256_mp_mod);
07450     /* t = a^f = t2 * t */
07451     sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
07452     /* t2= a^f0 = t ^ 2 ^ 4 */
07453     sp_256_mont_sqr_n_5(t2, t, 4, p256_mod, p256_mp_mod);
07454     /* t3= a^fd = t2 * t3 */
07455     sp_256_mont_mul_5(t3, t2, t3, p256_mod, p256_mp_mod);
07456     /* t = a^ff = t2 * t */
07457     sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
07458     /* t2= a^ff00 = t ^ 2 ^ 8 */
07459     sp_256_mont_sqr_n_5(t2, t, 8, p256_mod, p256_mp_mod);
07460     /* t3= a^fffd = t2 * t3 */
07461     sp_256_mont_mul_5(t3, t2, t3, p256_mod, p256_mp_mod);
07462     /* t = a^ffff = t2 * t */
07463     sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
07464     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
07465     sp_256_mont_sqr_n_5(t2, t, 16, p256_mod, p256_mp_mod);
07466     /* t3= a^fffffffd = t2 * t3 */
07467     sp_256_mont_mul_5(t3, t2, t3, p256_mod, p256_mp_mod);
07468     /* t = a^ffffffff = t2 * t */
07469     sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
07470     /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
07471     sp_256_mont_sqr_n_5(t2, t, 32, p256_mod, p256_mp_mod);
07472     /* t2= a^ffffffffffffffff = t2 * t */
07473     sp_256_mont_mul_5(t, t2, t, p256_mod, p256_mp_mod);
07474     /* t2= a^ffffffff00000001 = t2 * a */
07475     sp_256_mont_mul_5(t2, t2, a, p256_mod, p256_mp_mod);
07476     /* t2= a^ffffffff000000010000000000000000000000000000000000000000
07477      *   = t2 ^ 2 ^ 160 */
07478     sp_256_mont_sqr_n_5(t2, t2, 160, p256_mod, p256_mp_mod);
07479     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
07480      *   = t2 * t */
07481     sp_256_mont_mul_5(t2, t2, t, p256_mod, p256_mp_mod);
07482     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
07483      *   = t2 ^ 2 ^ 32 */
07484     sp_256_mont_sqr_n_5(t2, t2, 32, p256_mod, p256_mp_mod);
07485     /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
07486      *   = t2 * t3 */
07487     sp_256_mont_mul_5(r, t2, t3, p256_mod, p256_mp_mod);
07488 #endif /* WOLFSSL_SP_SMALL */
07489 }
07490 
07491 /* Map the Montgomery form projective co-ordinate point to an affine point.
07492  *
07493  * r  Resulting affine co-ordinate point.
07494  * p  Montgomery form projective co-ordinate point.
07495  * t  Temporary ordinate data.
07496  */
07497 static void sp_256_map_5(sp_point* r, sp_point* p, sp_digit* t)
07498 {
07499     sp_digit* t1 = t;
07500     sp_digit* t2 = t + 2*5;
07501     int64_t n;
07502 
07503     sp_256_mont_inv_5(t1, p->z, t + 2*5);
07504 
07505     sp_256_mont_sqr_5(t2, t1, p256_mod, p256_mp_mod);
07506     sp_256_mont_mul_5(t1, t2, t1, p256_mod, p256_mp_mod);
07507 
07508     /* x /= z^2 */
07509     sp_256_mont_mul_5(r->x, p->x, t2, p256_mod, p256_mp_mod);
07510     XMEMSET(r->x + 5, 0, sizeof(r->x) / 2);
07511     sp_256_mont_reduce_5(r->x, p256_mod, p256_mp_mod);
07512     /* Reduce x to less than modulus */
07513     n = sp_256_cmp_5(r->x, p256_mod);
07514     sp_256_cond_sub_5(r->x, r->x, p256_mod, 0 - (n >= 0));
07515     sp_256_norm_5(r->x);
07516 
07517     /* y /= z^3 */
07518     sp_256_mont_mul_5(r->y, p->y, t1, p256_mod, p256_mp_mod);
07519     XMEMSET(r->y + 5, 0, sizeof(r->y) / 2);
07520     sp_256_mont_reduce_5(r->y, p256_mod, p256_mp_mod);
07521     /* Reduce y to less than modulus */
07522     n = sp_256_cmp_5(r->y, p256_mod);
07523     sp_256_cond_sub_5(r->y, r->y, p256_mod, 0 - (n >= 0));
07524     sp_256_norm_5(r->y);
07525 
07526     XMEMSET(r->z, 0, sizeof(r->z));
07527     r->z[0] = 1;
07528 
07529 }
07530 
07531 #ifdef WOLFSSL_SP_SMALL
07532 /* Add b to a into r. (r = a + b)
07533  *
07534  * r  A single precision integer.
07535  * a  A single precision integer.
07536  * b  A single precision integer.
07537  */
07538 SP_NOINLINE static int sp_256_add_5(sp_digit* r, const sp_digit* a,
07539         const sp_digit* b)
07540 {
07541     int i;
07542 
07543     for (i = 0; i < 5; i++)
07544         r[i] = a[i] + b[i];
07545 
07546     return 0;
07547 }
07548 #else
07549 /* Add b to a into r. (r = a + b)
07550  *
07551  * r  A single precision integer.
07552  * a  A single precision integer.
07553  * b  A single precision integer.
07554  */
07555 SP_NOINLINE static int sp_256_add_5(sp_digit* r, const sp_digit* a,
07556         const sp_digit* b)
07557 {
07558     r[ 0] = a[ 0] + b[ 0];
07559     r[ 1] = a[ 1] + b[ 1];
07560     r[ 2] = a[ 2] + b[ 2];
07561     r[ 3] = a[ 3] + b[ 3];
07562     r[ 4] = a[ 4] + b[ 4];
07563 
07564     return 0;
07565 }
07566 
07567 #endif /* WOLFSSL_SP_SMALL */
07568 /* Add two Montgomery form numbers (r = a + b % m).
07569  *
07570  * r   Result of addition.
07571  * a   First number to add in Montogmery form.
07572  * b   Second number to add in Montogmery form.
07573  * m   Modulus (prime).
07574  */
07575 static void sp_256_mont_add_5(sp_digit* r, sp_digit* a, sp_digit* b,
07576         sp_digit* m)
07577 {
07578     sp_256_add_5(r, a, b);
07579     sp_256_norm_5(r);
07580     sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
07581     sp_256_norm_5(r);
07582 }
07583 
07584 /* Double a Montgomery form number (r = a + a % m).
07585  *
07586  * r   Result of doubling.
07587  * a   Number to double in Montogmery form.
07588  * m   Modulus (prime).
07589  */
07590 static void sp_256_mont_dbl_5(sp_digit* r, sp_digit* a, sp_digit* m)
07591 {
07592     sp_256_add_5(r, a, a);
07593     sp_256_norm_5(r);
07594     sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
07595     sp_256_norm_5(r);
07596 }
07597 
07598 /* Triple a Montgomery form number (r = a + a + a % m).
07599  *
07600  * r   Result of Tripling.
07601  * a   Number to triple in Montogmery form.
07602  * m   Modulus (prime).
07603  */
07604 static void sp_256_mont_tpl_5(sp_digit* r, sp_digit* a, sp_digit* m)
07605 {
07606     sp_256_add_5(r, a, a);
07607     sp_256_norm_5(r);
07608     sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
07609     sp_256_norm_5(r);
07610     sp_256_add_5(r, r, a);
07611     sp_256_norm_5(r);
07612     sp_256_cond_sub_5(r, r, m, 0 - ((r[4] >> 48) > 0));
07613     sp_256_norm_5(r);
07614 }
07615 
07616 #ifdef WOLFSSL_SP_SMALL
07617 /* Sub b from a into r. (r = a - b)
07618  *
07619  * r  A single precision integer.
07620  * a  A single precision integer.
07621  * b  A single precision integer.
07622  */
07623 SP_NOINLINE static int sp_256_sub_5(sp_digit* r, const sp_digit* a,
07624         const sp_digit* b)
07625 {
07626     int i;
07627 
07628     for (i = 0; i < 5; i++)
07629         r[i] = a[i] - b[i];
07630 
07631     return 0;
07632 }
07633 
07634 #else
07635 /* Sub b from a into r. (r = a - b)
07636  *
07637  * r  A single precision integer.
07638  * a  A single precision integer.
07639  * b  A single precision integer.
07640  */
07641 SP_NOINLINE static int sp_256_sub_5(sp_digit* r, const sp_digit* a,
07642         const sp_digit* b)
07643 {
07644     r[ 0] = a[ 0] - b[ 0];
07645     r[ 1] = a[ 1] - b[ 1];
07646     r[ 2] = a[ 2] - b[ 2];
07647     r[ 3] = a[ 3] - b[ 3];
07648     r[ 4] = a[ 4] - b[ 4];
07649 
07650     return 0;
07651 }
07652 
07653 #endif /* WOLFSSL_SP_SMALL */
07654 /* Conditionally add a and b using the mask m.
07655  * m is -1 to add and 0 when not.
07656  *
07657  * r  A single precision number representing conditional add result.
07658  * a  A single precision number to add with.
07659  * b  A single precision number to add.
07660  * m  Mask value to apply.
07661  */
07662 static void sp_256_cond_add_5(sp_digit* r, const sp_digit* a,
07663         const sp_digit* b, const sp_digit m)
07664 {
07665 #ifdef WOLFSSL_SP_SMALL
07666     int i;
07667 
07668     for (i = 0; i < 5; i++)
07669         r[i] = a[i] + (b[i] & m);
07670 #else
07671     r[ 0] = a[ 0] + (b[ 0] & m);
07672     r[ 1] = a[ 1] + (b[ 1] & m);
07673     r[ 2] = a[ 2] + (b[ 2] & m);
07674     r[ 3] = a[ 3] + (b[ 3] & m);
07675     r[ 4] = a[ 4] + (b[ 4] & m);
07676 #endif /* WOLFSSL_SP_SMALL */
07677 }
07678 
07679 /* Subtract two Montgomery form numbers (r = a - b % m).
07680  *
07681  * r   Result of subtration.
07682  * a   Number to subtract from in Montogmery form.
07683  * b   Number to subtract with in Montogmery form.
07684  * m   Modulus (prime).
07685  */
07686 static void sp_256_mont_sub_5(sp_digit* r, sp_digit* a, sp_digit* b,
07687         sp_digit* m)
07688 {
07689     sp_256_sub_5(r, a, b);
07690     sp_256_cond_add_5(r, r, m, r[4] >> 48);
07691     sp_256_norm_5(r);
07692 }
07693 
07694 /* Shift number left one bit.
07695  * Bottom bit is lost.
07696  *
07697  * r  Result of shift.
07698  * a  Number to shift.
07699  */
07700 SP_NOINLINE static void sp_256_rshift1_5(sp_digit* r, sp_digit* a)
07701 {
07702 #ifdef WOLFSSL_SP_SMALL
07703     int i;
07704 
07705     for (i=0; i<4; i++)
07706         r[i] = ((a[i] >> 1) | (a[i + 1] << 51)) & 0xfffffffffffffl;
07707 #else
07708     r[0] = ((a[0] >> 1) | (a[1] << 51)) & 0xfffffffffffffl;
07709     r[1] = ((a[1] >> 1) | (a[2] << 51)) & 0xfffffffffffffl;
07710     r[2] = ((a[2] >> 1) | (a[3] << 51)) & 0xfffffffffffffl;
07711     r[3] = ((a[3] >> 1) | (a[4] << 51)) & 0xfffffffffffffl;
07712 #endif
07713     r[4] = a[4] >> 1;
07714 }
07715 
07716 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
07717  *
07718  * r  Result of division by 2.
07719  * a  Number to divide.
07720  * m  Modulus (prime).
07721  */
07722 static void sp_256_div2_5(sp_digit* r, sp_digit* a, sp_digit* m)
07723 {
07724     sp_256_cond_add_5(r, a, m, 0 - (a[0] & 1));
07725     sp_256_norm_5(r);
07726     sp_256_rshift1_5(r, r);
07727 }
07728 
07729 /* Double the Montgomery form projective point p.
07730  *
07731  * r  Result of doubling point.
07732  * p  Point to double.
07733  * t  Temporary ordinate data.
07734  */
07735 static void sp_256_proj_point_dbl_5(sp_point* r, sp_point* p, sp_digit* t)
07736 {
07737     sp_point *rp[2];
07738     sp_point tp;
07739     sp_digit* t1 = t;
07740     sp_digit* t2 = t + 2*5;
07741     sp_digit* x;
07742     sp_digit* y;
07743     sp_digit* z;
07744     int i;
07745 
07746     /* When infinity don't double point passed in - constant time. */
07747     rp[0] = r;
07748     rp[1] = &tp;
07749     x = rp[p->infinity]->x;
07750     y = rp[p->infinity]->y;
07751     z = rp[p->infinity]->z;
07752     /* Put point to double into result - good for infinty. */
07753     if (r != p) {
07754         for (i=0; i<5; i++)
07755             r->x[i] = p->x[i];
07756         for (i=0; i<5; i++)
07757             r->y[i] = p->y[i];
07758         for (i=0; i<5; i++)
07759             r->z[i] = p->z[i];
07760         r->infinity = p->infinity;
07761     }
07762 
07763     /* T1 = Z * Z */
07764     sp_256_mont_sqr_5(t1, z, p256_mod, p256_mp_mod);
07765     /* Z = Y * Z */
07766     sp_256_mont_mul_5(z, y, z, p256_mod, p256_mp_mod);
07767     /* Z = 2Z */
07768     sp_256_mont_dbl_5(z, z, p256_mod);
07769     /* T2 = X - T1 */
07770     sp_256_mont_sub_5(t2, x, t1, p256_mod);
07771     /* T1 = X + T1 */
07772     sp_256_mont_add_5(t1, x, t1, p256_mod);
07773     /* T2 = T1 * T2 */
07774     sp_256_mont_mul_5(t2, t1, t2, p256_mod, p256_mp_mod);
07775     /* T1 = 3T2 */
07776     sp_256_mont_tpl_5(t1, t2, p256_mod);
07777     /* Y = 2Y */
07778     sp_256_mont_dbl_5(y, y, p256_mod);
07779     /* Y = Y * Y */
07780     sp_256_mont_sqr_5(y, y, p256_mod, p256_mp_mod);
07781     /* T2 = Y * Y */
07782     sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
07783     /* T2 = T2/2 */
07784     sp_256_div2_5(t2, t2, p256_mod);
07785     /* Y = Y * X */
07786     sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod);
07787     /* X = T1 * T1 */
07788     sp_256_mont_mul_5(x, t1, t1, p256_mod, p256_mp_mod);
07789     /* X = X - Y */
07790     sp_256_mont_sub_5(x, x, y, p256_mod);
07791     /* X = X - Y */
07792     sp_256_mont_sub_5(x, x, y, p256_mod);
07793     /* Y = Y - X */
07794     sp_256_mont_sub_5(y, y, x, p256_mod);
07795     /* Y = Y * T1 */
07796     sp_256_mont_mul_5(y, y, t1, p256_mod, p256_mp_mod);
07797     /* Y = Y - T2 */
07798     sp_256_mont_sub_5(y, y, t2, p256_mod);
07799 
07800 }
07801 
07802 /* Compare two numbers to determine if they are equal.
07803  * Constant time implementation.
07804  *
07805  * a  First number to compare.
07806  * b  Second number to compare.
07807  * returns 1 when equal and 0 otherwise.
07808  */
07809 static int sp_256_cmp_equal_5(const sp_digit* a, const sp_digit* b)
07810 {
07811     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
07812             (a[4] ^ b[4])) == 0;
07813 }
07814 
07815 /* Add two Montgomery form projective points.
07816  *
07817  * r  Result of addition.
07818  * p  Frist point to add.
07819  * q  Second point to add.
07820  * t  Temporary ordinate data.
07821  */
07822 static void sp_256_proj_point_add_5(sp_point* r, sp_point* p, sp_point* q,
07823         sp_digit* t)
07824 {
07825     sp_point *ap[2];
07826     sp_point *rp[2];
07827     sp_point tp;
07828     sp_digit* t1 = t;
07829     sp_digit* t2 = t + 2*5;
07830     sp_digit* t3 = t + 4*5;
07831     sp_digit* t4 = t + 6*5;
07832     sp_digit* t5 = t + 8*5;
07833     sp_digit* x;
07834     sp_digit* y;
07835     sp_digit* z;
07836     int i;
07837 
07838     /* Ensure only the first point is the same as the result. */
07839     if (q == r) {
07840         sp_point* a = p;
07841         p = q;
07842         q = a;
07843     }
07844 
07845     /* Check double */
07846     sp_256_sub_5(t1, p256_mod, q->y);
07847     sp_256_norm_5(t1);
07848     if (sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
07849         (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) {
07850         sp_256_proj_point_dbl_5(r, p, t);
07851     }
07852     else {
07853         rp[0] = r;
07854         rp[1] = &tp;
07855         XMEMSET(&tp, 0, sizeof(tp));
07856         x = rp[p->infinity | q->infinity]->x;
07857         y = rp[p->infinity | q->infinity]->y;
07858         z = rp[p->infinity | q->infinity]->z;
07859 
07860         ap[0] = p;
07861         ap[1] = q;
07862         for (i=0; i<5; i++)
07863             r->x[i] = ap[p->infinity]->x[i];
07864         for (i=0; i<5; i++)
07865             r->y[i] = ap[p->infinity]->y[i];
07866         for (i=0; i<5; i++)
07867             r->z[i] = ap[p->infinity]->z[i];
07868         r->infinity = ap[p->infinity]->infinity;
07869 
07870         /* U1 = X1*Z2^2 */
07871         sp_256_mont_sqr_5(t1, q->z, p256_mod, p256_mp_mod);
07872         sp_256_mont_mul_5(t3, t1, q->z, p256_mod, p256_mp_mod);
07873         sp_256_mont_mul_5(t1, t1, x, p256_mod, p256_mp_mod);
07874         /* U2 = X2*Z1^2 */
07875         sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod);
07876         sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod);
07877         sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod);
07878         /* S1 = Y1*Z2^3 */
07879         sp_256_mont_mul_5(t3, t3, y, p256_mod, p256_mp_mod);
07880         /* S2 = Y2*Z1^3 */
07881         sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod);
07882         /* H = U2 - U1 */
07883         sp_256_mont_sub_5(t2, t2, t1, p256_mod);
07884         /* R = S2 - S1 */
07885         sp_256_mont_sub_5(t4, t4, t3, p256_mod);
07886         /* Z3 = H*Z1*Z2 */
07887         sp_256_mont_mul_5(z, z, q->z, p256_mod, p256_mp_mod);
07888         sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod);
07889         /* X3 = R^2 - H^3 - 2*U1*H^2 */
07890         sp_256_mont_sqr_5(x, t4, p256_mod, p256_mp_mod);
07891         sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod);
07892         sp_256_mont_mul_5(y, t1, t5, p256_mod, p256_mp_mod);
07893         sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod);
07894         sp_256_mont_sub_5(x, x, t5, p256_mod);
07895         sp_256_mont_dbl_5(t1, y, p256_mod);
07896         sp_256_mont_sub_5(x, x, t1, p256_mod);
07897         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
07898         sp_256_mont_sub_5(y, y, x, p256_mod);
07899         sp_256_mont_mul_5(y, y, t4, p256_mod, p256_mp_mod);
07900         sp_256_mont_mul_5(t5, t5, t3, p256_mod, p256_mp_mod);
07901         sp_256_mont_sub_5(y, y, t5, p256_mod);
07902     }
07903 }
07904 
07905 #ifdef WOLFSSL_SP_SMALL
07906 /* Multiply the point by the scalar and return the result.
07907  * If map is true then convert result to affine co-ordinates.
07908  *
07909  * r     Resulting point.
07910  * g     Point to multiply.
07911  * k     Scalar to multiply by.
07912  * map   Indicates whether to convert result to affine.
07913  * heap  Heap to use for allocation.
07914  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
07915  */
07916 static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
07917         int map, void* heap)
07918 {
07919     sp_point* td;
07920     sp_point* t[3];
07921     sp_digit* tmp;
07922     sp_digit n;
07923     int i;
07924     int c, y;
07925     int err = MP_OKAY;
07926 
07927     (void)heap;
07928 
07929     td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC);
07930     if (td == NULL)
07931         err = MEMORY_E;
07932     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
07933                              DYNAMIC_TYPE_ECC);
07934     if (tmp == NULL)
07935         err = MEMORY_E;
07936 
07937     if (err == MP_OKAY) {
07938         XMEMSET(td, 0, sizeof(*td) * 3);
07939 
07940         t[0] = &td[0];
07941         t[1] = &td[1];
07942         t[2] = &td[2];
07943 
07944         /* t[0] = {0, 0, 1} * norm */
07945         t[0]->infinity = 1;
07946         /* t[1] = {g->x, g->y, g->z} * norm */
07947         err = sp_256_mod_mul_norm_5(t[1]->x, g->x, p256_mod);
07948     }
07949     if (err == MP_OKAY)
07950         err = sp_256_mod_mul_norm_5(t[1]->y, g->y, p256_mod);
07951     if (err == MP_OKAY)
07952         err = sp_256_mod_mul_norm_5(t[1]->z, g->z, p256_mod);
07953 
07954     if (err == MP_OKAY) {
07955         i = 4;
07956         c = 48;
07957         n = k[i--] << (52 - c);
07958         for (; ; c--) {
07959             if (c == 0) {
07960                 if (i == -1)
07961                     break;
07962 
07963                 n = k[i--];
07964                 c = 52;
07965             }
07966 
07967             y = (n >> 51) & 1;
07968             n <<= 1;
07969 
07970             sp_256_proj_point_add_5(t[y^1], t[0], t[1], tmp);
07971 
07972             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
07973                                   ((size_t)t[1] & addr_mask[y])),
07974                     sizeof(sp_point));
07975             sp_256_proj_point_dbl_5(t[2], t[2], tmp);
07976             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
07977                             ((size_t)t[1] & addr_mask[y])), t[2],
07978                     sizeof(sp_point));
07979         }
07980 
07981         if (map)
07982             sp_256_map_5(r, t[0], tmp);
07983         else
07984             XMEMCPY(r, t[0], sizeof(sp_point));
07985     }
07986 
07987     if (tmp != NULL) {
07988         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
07989         XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07990     }
07991     if (td != NULL) {
07992         XMEMSET(td, 0, sizeof(sp_point) * 3);
07993         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07994     }
07995 
07996     return err;
07997 }
07998 
07999 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
08000 /* Multiply the point by the scalar and return the result.
08001  * If map is true then convert result to affine co-ordinates.
08002  *
08003  * r     Resulting point.
08004  * g     Point to multiply.
08005  * k     Scalar to multiply by.
08006  * map   Indicates whether to convert result to affine.
08007  * heap  Heap to use for allocation.
08008  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08009  */
08010 static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
08011         int map, void* heap)
08012 {
08013 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
08014     sp_point td[3];
08015     sp_digit tmpd[2 * 5 * 5];
08016 #endif
08017     sp_point* t;
08018     sp_digit* tmp;
08019     sp_digit n;
08020     int i;
08021     int c, y;
08022     int err = MP_OKAY;
08023 
08024     (void)heap;
08025 
08026 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08027     sp_point td[3];
08028     t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
08029     if (t == NULL)
08030         err = MEMORY_E;
08031     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
08032                              DYNAMIC_TYPE_ECC);
08033     if (tmp == NULL)
08034         err = MEMORY_E;
08035 #else
08036     t = td;
08037     tmp = tmpd;
08038 #endif
08039 
08040     if (err == MP_OKAY) {
08041         t[0] = &td[0];
08042         t[1] = &td[1];
08043         t[2] = &td[2];
08044 
08045         /* t[0] = {0, 0, 1} * norm */
08046         XMEMSET(&t[0], 0, sizeof(t[0]));
08047         t[0].infinity = 1;
08048         /* t[1] = {g->x, g->y, g->z} * norm */
08049         err = sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
08050     }
08051     if (err == MP_OKAY)
08052         err = sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
08053     if (err == MP_OKAY)
08054         err = sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
08055 
08056     if (err == MP_OKAY) {
08057         i = 4;
08058         c = 48;
08059         n = k[i--] << (52 - c);
08060         for (; ; c--) {
08061             if (c == 0) {
08062                 if (i == -1)
08063                     break;
08064 
08065                 n = k[i--];
08066                 c = 52;
08067             }
08068 
08069             y = (n >> 51) & 1;
08070             n <<= 1;
08071 
08072             sp_256_proj_point_add_5(&t[y^1], &t[0], &t[1], tmp);
08073 
08074             XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
08075                                  ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
08076             sp_256_proj_point_dbl_5(&t[2], &t[2], tmp);
08077             XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
08078                            ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
08079         }
08080 
08081         if (map)
08082             sp_256_map_5(r, &t[0], tmp);
08083         else
08084             XMEMCPY(r, &t[0], sizeof(sp_point));
08085     }
08086 
08087 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08088     if (tmp != NULL) {
08089         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
08090         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
08091     }
08092     if (t != NULL) {
08093         XMEMSET(t, 0, sizeof(sp_point) * 3);
08094         XFREE(t, heap, DYNAMIC_TYPE_ECC);
08095     }
08096 #else
08097     ForceZero(tmpd, sizeof(tmpd));
08098     ForceZero(td, sizeof(td));
08099 #endif
08100 
08101     return err;
08102 }
08103 
08104 #else
08105 /* A table entry for pre-computed points. */
08106 typedef struct sp_table_entry {
08107     sp_digit x[5];
08108     sp_digit y[5];
08109     byte infinity;
08110 } sp_table_entry;
08111 
08112 /* Multiply the point by the scalar and return the result.
08113  * If map is true then convert result to affine co-ordinates.
08114  *
08115  * r     Resulting point.
08116  * g     Point to multiply.
08117  * k     Scalar to multiply by.
08118  * map   Indicates whether to convert result to affine.
08119  * heap  Heap to use for allocation.
08120  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08121  */
08122 static int sp_256_ecc_mulmod_fast_5(sp_point* r, sp_point* g, sp_digit* k,
08123         int map, void* heap)
08124 {
08125 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
08126     sp_point td[16];
08127     sp_point rtd;
08128     sp_digit tmpd[2 * 5 * 5];
08129 #endif
08130     sp_point* t;
08131     sp_point* rt;
08132     sp_digit* tmp;
08133     sp_digit n;
08134     int i;
08135     int c, y;
08136     int err;
08137 
08138     (void)heap;
08139 
08140     err = sp_ecc_point_new(heap, rtd, rt);
08141 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08142     t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
08143     if (t == NULL)
08144         err = MEMORY_E;
08145     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
08146                              DYNAMIC_TYPE_ECC);
08147     if (tmp == NULL)
08148         err = MEMORY_E;
08149 #else
08150     t = td;
08151     tmp = tmpd;
08152 #endif
08153 
08154     if (err == MP_OKAY) {
08155         /* t[0] = {0, 0, 1} * norm */
08156         XMEMSET(&t[0], 0, sizeof(t[0]));
08157         t[0].infinity = 1;
08158         /* t[1] = {g->x, g->y, g->z} * norm */
08159         sp_256_mod_mul_norm_5(t[1].x, g->x, p256_mod);
08160         sp_256_mod_mul_norm_5(t[1].y, g->y, p256_mod);
08161         sp_256_mod_mul_norm_5(t[1].z, g->z, p256_mod);
08162         t[1].infinity = 0;
08163         sp_256_proj_point_dbl_5(&t[ 2], &t[ 1], tmp);
08164         t[ 2].infinity = 0;
08165         sp_256_proj_point_add_5(&t[ 3], &t[ 2], &t[ 1], tmp);
08166         t[ 3].infinity = 0;
08167         sp_256_proj_point_dbl_5(&t[ 4], &t[ 2], tmp);
08168         t[ 4].infinity = 0;
08169         sp_256_proj_point_add_5(&t[ 5], &t[ 3], &t[ 2], tmp);
08170         t[ 5].infinity = 0;
08171         sp_256_proj_point_dbl_5(&t[ 6], &t[ 3], tmp);
08172         t[ 6].infinity = 0;
08173         sp_256_proj_point_add_5(&t[ 7], &t[ 4], &t[ 3], tmp);
08174         t[ 7].infinity = 0;
08175         sp_256_proj_point_dbl_5(&t[ 8], &t[ 4], tmp);
08176         t[ 8].infinity = 0;
08177         sp_256_proj_point_add_5(&t[ 9], &t[ 5], &t[ 4], tmp);
08178         t[ 9].infinity = 0;
08179         sp_256_proj_point_dbl_5(&t[10], &t[ 5], tmp);
08180         t[10].infinity = 0;
08181         sp_256_proj_point_add_5(&t[11], &t[ 6], &t[ 5], tmp);
08182         t[11].infinity = 0;
08183         sp_256_proj_point_dbl_5(&t[12], &t[ 6], tmp);
08184         t[12].infinity = 0;
08185         sp_256_proj_point_add_5(&t[13], &t[ 7], &t[ 6], tmp);
08186         t[13].infinity = 0;
08187         sp_256_proj_point_dbl_5(&t[14], &t[ 7], tmp);
08188         t[14].infinity = 0;
08189         sp_256_proj_point_add_5(&t[15], &t[ 8], &t[ 7], tmp);
08190         t[15].infinity = 0;
08191 
08192         i = 3;
08193         n = k[i+1] << 12;
08194         c = 44;
08195         y = n >> 56;
08196         XMEMCPY(rt, &t[y], sizeof(sp_point));
08197         n <<= 8;
08198         for (; i>=0 || c>=4; ) {
08199             if (c < 4) {
08200                 n |= k[i--] << (12 - c);
08201                 c += 52;
08202             }
08203             y = (n >> 60) & 0xf;
08204             n <<= 4;
08205             c -= 4;
08206 
08207             sp_256_proj_point_dbl_5(rt, rt, tmp);
08208             sp_256_proj_point_dbl_5(rt, rt, tmp);
08209             sp_256_proj_point_dbl_5(rt, rt, tmp);
08210             sp_256_proj_point_dbl_5(rt, rt, tmp);
08211 
08212             sp_256_proj_point_add_5(rt, rt, &t[y], tmp);
08213         }
08214 
08215         if (map)
08216             sp_256_map_5(r, rt, tmp);
08217         else
08218             XMEMCPY(r, rt, sizeof(sp_point));
08219     }
08220 
08221 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08222     if (tmp != NULL) {
08223         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 5 * 5);
08224         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
08225     }
08226     if (t != NULL) {
08227         XMEMSET(t, 0, sizeof(sp_point) * 16);
08228         XFREE(t, heap, DYNAMIC_TYPE_ECC);
08229     }
08230 #else
08231     ForceZero(tmpd, sizeof(tmpd));
08232     ForceZero(td, sizeof(td));
08233 #endif
08234     sp_ecc_point_free(rt, 1, heap);
08235 
08236     return err;
08237 }
08238 
08239 #ifdef FP_ECC
08240 /* Double the Montgomery form projective point p a number of times.
08241  *
08242  * r  Result of repeated doubling of point.
08243  * p  Point to double.
08244  * n  Number of times to double
08245  * t  Temporary ordinate data.
08246  */
08247 static void sp_256_proj_point_dbl_n_5(sp_point* r, sp_point* p, int n,
08248         sp_digit* t)
08249 {
08250     sp_point *rp[2];
08251     sp_point tp;
08252     sp_digit* w = t;
08253     sp_digit* a = t + 2*5;
08254     sp_digit* b = t + 4*5;
08255     sp_digit* t1 = t + 6*5;
08256     sp_digit* t2 = t + 8*5;
08257     sp_digit* x;
08258     sp_digit* y;
08259     sp_digit* z;
08260     int i;
08261 
08262     rp[0] = r;
08263     rp[1] = &tp;
08264     x = rp[p->infinity]->x;
08265     y = rp[p->infinity]->y;
08266     z = rp[p->infinity]->z;
08267     if (r != p) {
08268         for (i=0; i<5; i++)
08269             r->x[i] = p->x[i];
08270         for (i=0; i<5; i++)
08271             r->y[i] = p->y[i];
08272         for (i=0; i<5; i++)
08273             r->z[i] = p->z[i];
08274         r->infinity = p->infinity;
08275     }
08276 
08277     /* Y = 2*Y */
08278     sp_256_mont_dbl_5(y, y, p256_mod);
08279     /* W = Z^4 */
08280     sp_256_mont_sqr_5(w, z, p256_mod, p256_mp_mod);
08281     sp_256_mont_sqr_5(w, w, p256_mod, p256_mp_mod);
08282     while (n--) {
08283         /* A = 3*(X^2 - W) */
08284         sp_256_mont_sqr_5(t1, x, p256_mod, p256_mp_mod);
08285         sp_256_mont_sub_5(t1, t1, w, p256_mod);
08286         sp_256_mont_tpl_5(a, t1, p256_mod);
08287         /* B = X*Y^2 */
08288         sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
08289         sp_256_mont_mul_5(b, t2, x, p256_mod, p256_mp_mod);
08290         /* X = A^2 - 2B */
08291         sp_256_mont_sqr_5(x, a, p256_mod, p256_mp_mod);
08292         sp_256_mont_dbl_5(t1, b, p256_mod);
08293         sp_256_mont_sub_5(x, x, t1, p256_mod);
08294         /* Z = Z*Y */
08295         sp_256_mont_mul_5(z, z, y, p256_mod, p256_mp_mod);
08296         /* t2 = Y^4 */
08297         sp_256_mont_sqr_5(t2, t2, p256_mod, p256_mp_mod);
08298         if (n) {
08299             /* W = W*Y^4 */
08300             sp_256_mont_mul_5(w, w, t2, p256_mod, p256_mp_mod);
08301         }
08302         /* y = 2*A*(B - X) - Y^4 */
08303         sp_256_mont_sub_5(y, b, x, p256_mod);
08304         sp_256_mont_mul_5(y, y, a, p256_mod, p256_mp_mod);
08305         sp_256_mont_dbl_5(y, y, p256_mod);
08306         sp_256_mont_sub_5(y, y, t2, p256_mod);
08307     }
08308     /* Y = Y/2 */
08309     sp_256_div2_5(y, y, p256_mod);
08310 }
08311 
08312 #endif /* FP_ECC */
08313 /* Add two Montgomery form projective points. The second point has a q value of
08314  * one.
08315  * Only the first point can be the same pointer as the result point.
08316  *
08317  * r  Result of addition.
08318  * p  Frist point to add.
08319  * q  Second point to add.
08320  * t  Temporary ordinate data.
08321  */
08322 static void sp_256_proj_point_add_qz1_5(sp_point* r, sp_point* p,
08323         sp_point* q, sp_digit* t)
08324 {
08325     sp_point *ap[2];
08326     sp_point *rp[2];
08327     sp_point tp;
08328     sp_digit* t1 = t;
08329     sp_digit* t2 = t + 2*5;
08330     sp_digit* t3 = t + 4*5;
08331     sp_digit* t4 = t + 6*5;
08332     sp_digit* t5 = t + 8*5;
08333     sp_digit* x;
08334     sp_digit* y;
08335     sp_digit* z;
08336     int i;
08337 
08338     /* Check double */
08339     sp_256_sub_5(t1, p256_mod, q->y);
08340     sp_256_norm_5(t1);
08341     if (sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) &
08342         (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) {
08343         sp_256_proj_point_dbl_5(r, p, t);
08344     }
08345     else {
08346         rp[0] = r;
08347         rp[1] = &tp;
08348         XMEMSET(&tp, 0, sizeof(tp));
08349         x = rp[p->infinity | q->infinity]->x;
08350         y = rp[p->infinity | q->infinity]->y;
08351         z = rp[p->infinity | q->infinity]->z;
08352 
08353         ap[0] = p;
08354         ap[1] = q;
08355         for (i=0; i<5; i++)
08356             r->x[i] = ap[p->infinity]->x[i];
08357         for (i=0; i<5; i++)
08358             r->y[i] = ap[p->infinity]->y[i];
08359         for (i=0; i<5; i++)
08360             r->z[i] = ap[p->infinity]->z[i];
08361         r->infinity = ap[p->infinity]->infinity;
08362 
08363         /* U2 = X2*Z1^2 */
08364         sp_256_mont_sqr_5(t2, z, p256_mod, p256_mp_mod);
08365         sp_256_mont_mul_5(t4, t2, z, p256_mod, p256_mp_mod);
08366         sp_256_mont_mul_5(t2, t2, q->x, p256_mod, p256_mp_mod);
08367         /* S2 = Y2*Z1^3 */
08368         sp_256_mont_mul_5(t4, t4, q->y, p256_mod, p256_mp_mod);
08369         /* H = U2 - X1 */
08370         sp_256_mont_sub_5(t2, t2, x, p256_mod);
08371         /* R = S2 - Y1 */
08372         sp_256_mont_sub_5(t4, t4, y, p256_mod);
08373         /* Z3 = H*Z1 */
08374         sp_256_mont_mul_5(z, z, t2, p256_mod, p256_mp_mod);
08375         /* X3 = R^2 - H^3 - 2*X1*H^2 */
08376         sp_256_mont_sqr_5(t1, t4, p256_mod, p256_mp_mod);
08377         sp_256_mont_sqr_5(t5, t2, p256_mod, p256_mp_mod);
08378         sp_256_mont_mul_5(t3, x, t5, p256_mod, p256_mp_mod);
08379         sp_256_mont_mul_5(t5, t5, t2, p256_mod, p256_mp_mod);
08380         sp_256_mont_sub_5(x, t1, t5, p256_mod);
08381         sp_256_mont_dbl_5(t1, t3, p256_mod);
08382         sp_256_mont_sub_5(x, x, t1, p256_mod);
08383         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
08384         sp_256_mont_sub_5(t3, t3, x, p256_mod);
08385         sp_256_mont_mul_5(t3, t3, t4, p256_mod, p256_mp_mod);
08386         sp_256_mont_mul_5(t5, t5, y, p256_mod, p256_mp_mod);
08387         sp_256_mont_sub_5(y, t3, t5, p256_mod);
08388     }
08389 }
08390 
08391 #ifdef FP_ECC
08392 /* Convert the projective point to affine.
08393  * Ordinates are in Montgomery form.
08394  *
08395  * a  Point to convert.
08396  * t  Temprorary data.
08397  */
08398 static void sp_256_proj_to_affine_5(sp_point* a, sp_digit* t)
08399 {
08400     sp_digit* t1 = t;
08401     sp_digit* t2 = t + 2 * 5;
08402     sp_digit* tmp = t + 4 * 5;
08403 
08404     sp_256_mont_inv_5(t1, a->z, tmp);
08405 
08406     sp_256_mont_sqr_5(t2, t1, p256_mod, p256_mp_mod);
08407     sp_256_mont_mul_5(t1, t2, t1, p256_mod, p256_mp_mod);
08408 
08409     sp_256_mont_mul_5(a->x, a->x, t2, p256_mod, p256_mp_mod);
08410     sp_256_mont_mul_5(a->y, a->y, t1, p256_mod, p256_mp_mod);
08411     XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
08412 }
08413 
08414 /* Generate the pre-computed table of points for the base point.
08415  *
08416  * a      The base point.
08417  * table  Place to store generated point data.
08418  * tmp    Temprorary data.
08419  * heap  Heap to use for allocation.
08420  */
08421 static int sp_256_gen_stripe_table_5(sp_point* a,
08422         sp_table_entry* table, sp_digit* tmp, void* heap)
08423 {
08424 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
08425     sp_point td, s1d, s2d;
08426 #endif
08427     sp_point* t;
08428     sp_point* s1 = NULL;
08429     sp_point* s2 = NULL;
08430     int i, j;
08431     int err;
08432 
08433     (void)heap;
08434 
08435     err = sp_ecc_point_new(heap, td, t);
08436     if (err == MP_OKAY)
08437         err = sp_ecc_point_new(heap, s1d, s1);
08438     if (err == MP_OKAY)
08439         err = sp_ecc_point_new(heap, s2d, s2);
08440 
08441     if (err == MP_OKAY)
08442         err = sp_256_mod_mul_norm_5(t->x, a->x, p256_mod);
08443     if (err == MP_OKAY)
08444         err = sp_256_mod_mul_norm_5(t->y, a->y, p256_mod);
08445     if (err == MP_OKAY)
08446         err = sp_256_mod_mul_norm_5(t->z, a->z, p256_mod);
08447     if (err == MP_OKAY) {
08448         t->infinity = 0;
08449         sp_256_proj_to_affine_5(t, tmp);
08450 
08451         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
08452         s1->infinity = 0;
08453         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
08454         s2->infinity = 0;
08455 
08456         /* table[0] = {0, 0, infinity} */
08457         XMEMSET(&table[0], 0, sizeof(sp_table_entry));
08458         table[0].infinity = 1;
08459         /* table[1] = Affine version of 'a' in Montgomery form */
08460         XMEMCPY(table[1].x, t->x, sizeof(table->x));
08461         XMEMCPY(table[1].y, t->y, sizeof(table->y));
08462         table[1].infinity = 0;
08463 
08464         for (i=1; i<8; i++) {
08465             sp_256_proj_point_dbl_n_5(t, t, 32, tmp);
08466             sp_256_proj_to_affine_5(t, tmp);
08467             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
08468             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
08469             table[1<<i].infinity = 0;
08470         }
08471 
08472         for (i=1; i<8; i++) {
08473             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
08474             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
08475             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
08476                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
08477                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
08478                 sp_256_proj_point_add_qz1_5(t, s1, s2, tmp);
08479                 sp_256_proj_to_affine_5(t, tmp);
08480                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
08481                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
08482                 table[j].infinity = 0;
08483             }
08484         }
08485     }
08486 
08487     sp_ecc_point_free(s2, 0, heap);
08488     sp_ecc_point_free(s1, 0, heap);
08489     sp_ecc_point_free( t, 0, heap);
08490 
08491     return err;
08492 }
08493 
08494 #endif /* FP_ECC */
08495 /* Multiply the point by the scalar and return the result.
08496  * If map is true then convert result to affine co-ordinates.
08497  *
08498  * r     Resulting point.
08499  * k     Scalar to multiply by.
08500  * map   Indicates whether to convert result to affine.
08501  * heap  Heap to use for allocation.
08502  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08503  */
08504 static int sp_256_ecc_mulmod_stripe_5(sp_point* r, sp_point* g,
08505         sp_table_entry* table, sp_digit* k, int map, void* heap)
08506 {
08507 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
08508     sp_point rtd;
08509     sp_point pd;
08510     sp_digit td[2 * 5 * 5];
08511 #endif
08512     sp_point* rt;
08513     sp_point* p = NULL;
08514     sp_digit* t;
08515     int i, j;
08516     int y, x;
08517     int err;
08518 
08519     (void)g;
08520     (void)heap;
08521 
08522     err = sp_ecc_point_new(heap, rtd, rt);
08523     if (err == MP_OKAY)
08524         err = sp_ecc_point_new(heap, pd, p);
08525 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08526     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, heap,
08527                            DYNAMIC_TYPE_ECC);
08528     if (t == NULL)
08529         err = MEMORY_E;
08530 #else
08531     t = td;
08532 #endif
08533 
08534     if (err == MP_OKAY) {
08535         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
08536         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
08537 
08538         y = 0;
08539         for (j=0,x=31; j<8; j++,x+=32)
08540             y |= ((k[x / 52] >> (x % 52)) & 1) << j;
08541         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
08542         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
08543         rt->infinity = table[y].infinity;
08544         for (i=30; i>=0; i--) {
08545             y = 0;
08546             for (j=0,x=i; j<8; j++,x+=32)
08547                 y |= ((k[x / 52] >> (x % 52)) & 1) << j;
08548 
08549             sp_256_proj_point_dbl_5(rt, rt, t);
08550             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
08551             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
08552             p->infinity = table[y].infinity;
08553             sp_256_proj_point_add_qz1_5(rt, rt, p, t);
08554         }
08555 
08556         if (map)
08557             sp_256_map_5(r, rt, t);
08558         else
08559             XMEMCPY(r, rt, sizeof(sp_point));
08560     }
08561 
08562 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08563     if (t != NULL)
08564         XFREE(t, heap, DYNAMIC_TYPE_ECC);
08565 #endif
08566     sp_ecc_point_free(p, 0, heap);
08567     sp_ecc_point_free(rt, 0, heap);
08568 
08569     return err;
08570 }
08571 
08572 #ifdef FP_ECC
08573 #ifndef FP_ENTRIES
08574     #define FP_ENTRIES 16
08575 #endif
08576 
08577 typedef struct sp_cache_t {
08578     sp_digit x[5];
08579     sp_digit y[5];
08580     sp_table_entry table[256];
08581     uint32_t cnt;
08582     int set;
08583 } sp_cache_t;
08584 
08585 static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
08586 static THREAD_LS_T int sp_cache_last = -1;
08587 static THREAD_LS_T int sp_cache_inited = 0;
08588 
08589 #ifndef HAVE_THREAD_LS
08590     static volatile int initCacheMutex = 0;
08591     static wolfSSL_Mutex sp_cache_lock;
08592 #endif
08593 
08594 static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
08595 {
08596     int i, j;
08597     uint32_t least;
08598 
08599     if (sp_cache_inited == 0) {
08600         for (i=0; i<FP_ENTRIES; i++) {
08601             sp_cache[i].set = 0;
08602         }
08603         sp_cache_inited = 1;
08604     }
08605 
08606     /* Compare point with those in cache. */
08607     for (i=0; i<FP_ENTRIES; i++) {
08608         if (!sp_cache[i].set)
08609             continue;
08610 
08611         if (sp_256_cmp_equal_5(g->x, sp_cache[i].x) & 
08612                            sp_256_cmp_equal_5(g->y, sp_cache[i].y)) {
08613             sp_cache[i].cnt++;
08614             break;
08615         }
08616     }
08617 
08618     /* No match. */
08619     if (i == FP_ENTRIES) {
08620         /* Find empty entry. */
08621         i = (sp_cache_last + 1) % FP_ENTRIES;
08622         for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
08623             if (!sp_cache[i].set) {
08624                 break;
08625             }
08626         }
08627 
08628         /* Evict least used. */
08629         if (i == sp_cache_last) {
08630             least = sp_cache[0].cnt;
08631             for (j=1; j<FP_ENTRIES; j++) {
08632                 if (sp_cache[j].cnt < least) {
08633                     i = j;
08634                     least = sp_cache[i].cnt;
08635                 }
08636             }
08637         }
08638 
08639         XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
08640         XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
08641         sp_cache[i].set = 1;
08642         sp_cache[i].cnt = 1;
08643     }
08644 
08645     *cache = &sp_cache[i];
08646     sp_cache_last = i;
08647 }
08648 #endif /* FP_ECC */
08649 
08650 /* Multiply the base point of P256 by the scalar and return the result.
08651  * If map is true then convert result to affine co-ordinates.
08652  *
08653  * r     Resulting point.
08654  * g     Point to multiply.
08655  * k     Scalar to multiply by.
08656  * map   Indicates whether to convert result to affine.
08657  * heap  Heap to use for allocation.
08658  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08659  */
08660 static int sp_256_ecc_mulmod_5(sp_point* r, sp_point* g, sp_digit* k,
08661         int map, void* heap)
08662 {
08663 #ifndef FP_ECC
08664     return sp_256_ecc_mulmod_fast_5(r, g, k, map, heap);
08665 #else
08666     sp_digit tmp[2 * 5 * 5];
08667     sp_cache_t* cache;
08668     int err = MP_OKAY;
08669 
08670 #ifndef HAVE_THREAD_LS
08671     if (initCacheMutex == 0) {
08672          wc_InitMutex(&sp_cache_lock);
08673          initCacheMutex = 1;
08674     }
08675     if (wc_LockMutex(&sp_cache_lock) != 0)
08676        err = BAD_MUTEX_E;
08677 #endif /* HAVE_THREAD_LS */
08678 
08679     if (err == MP_OKAY) {
08680         sp_ecc_get_cache(g, &cache);
08681         if (cache->cnt == 2)
08682             sp_256_gen_stripe_table_5(g, cache->table, tmp, heap);
08683 
08684 #ifndef HAVE_THREAD_LS
08685         wc_UnLockMutex(&sp_cache_lock);
08686 #endif /* HAVE_THREAD_LS */
08687 
08688         if (cache->cnt < 2) {
08689             err = sp_256_ecc_mulmod_fast_5(r, g, k, map, heap);
08690         }
08691         else {
08692             err = sp_256_ecc_mulmod_stripe_5(r, g, cache->table, k,
08693                     map, heap);
08694         }
08695     }
08696 
08697     return err;
08698 #endif
08699 }
08700 
08701 #endif
08702 /* Multiply the point by the scalar and return the result.
08703  * If map is true then convert result to affine co-ordinates.
08704  *
08705  * km    Scalar to multiply by.
08706  * p     Point to multiply.
08707  * r     Resulting point.
08708  * map   Indicates whether to convert result to affine.
08709  * heap  Heap to use for allocation.
08710  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08711  */
08712 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
08713         void* heap)
08714 {
08715 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
08716     sp_point p;
08717     sp_digit kd[5];
08718 #endif
08719     sp_point* point;
08720     sp_digit* k = NULL;
08721     int err = MP_OKAY;
08722 #ifdef HAVE_INTEL_AVX2
08723     word32 cpuid_flags = cpuid_get_flags();
08724 #endif
08725 
08726     err = sp_ecc_point_new(heap, p, point);
08727 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08728     if (err == MP_OKAY) {
08729         k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
08730         if (k == NULL)
08731             err = MEMORY_E;
08732     }
08733 #else
08734     k = kd;
08735 #endif
08736     if (err == MP_OKAY) {
08737         sp_256_from_mp(k, 5, km);
08738         sp_256_point_from_ecc_point_5(point, gm);
08739 
08740 #ifdef HAVE_INTEL_AVX2
08741         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
08742             err = sp_256_ecc_mulmod_avx2_5(point, point, k, map, heap);
08743         else
08744 #endif
08745             err = sp_256_ecc_mulmod_5(point, point, k, map, heap);
08746     }
08747     if (err == MP_OKAY)
08748         err = sp_256_point_to_ecc_point_5(point, r);
08749 
08750 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08751     if (k != NULL)
08752         XFREE(k, heap, DYNAMIC_TYPE_ECC);
08753 #endif
08754     sp_ecc_point_free(point, 0, heap);
08755 
08756     return err;
08757 }
08758 
08759 #ifdef WOLFSSL_SP_SMALL
08760 /* Multiply the base point of P256 by the scalar and return the result.
08761  * If map is true then convert result to affine co-ordinates.
08762  *
08763  * r     Resulting point.
08764  * k     Scalar to multiply by.
08765  * map   Indicates whether to convert result to affine.
08766  * heap  Heap to use for allocation.
08767  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08768  */
08769 static int sp_256_ecc_mulmod_base_5(sp_point* r, sp_digit* k,
08770         int map, void* heap)
08771 {
08772     /* No pre-computed values. */
08773     return sp_256_ecc_mulmod_5(r, &p256_base, k, map, heap);
08774 }
08775 
08776 #else
08777 static sp_table_entry p256_table[256] = {
08778     /* 0 */
08779     { { 0x00, 0x00, 0x00, 0x00, 0x00 },
08780       { 0x00, 0x00, 0x00, 0x00, 0x00 },
08781       1 },
08782     /* 1 */
08783     { { 0x730d418a9143cl,0xfc5fedb60179el,0x762251075ba95l,0x55c679fb732b7l,
08784         0x018905f76a537l },
08785       { 0x25357ce95560al,0xe4ba19e45cddfl,0xd21f3258b4ab8l,0x5d85d2e88688dl,
08786         0x08571ff182588l },
08787       0 },
08788     /* 2 */
08789     { { 0x886024147519al,0xac26b372f0202l,0x785ebc8d0981el,0x58e9a9d4a7caal,
08790         0x0d953c50ddbdfl },
08791       { 0x361ccfd590f8fl,0x6b44e6c9179d6l,0x2eb64cf72e962l,0x88f37fd961102l,
08792         0x0863ebb7e9eb2l },
08793       0 },
08794     /* 3 */
08795     { { 0x6b6235cdb6485l,0xa22f0a2f97785l,0xf7e300b808f0el,0x80a03e68d9544l,
08796         0x000076055b5ffl },
08797       { 0x4eb9b838d2010l,0xbb3243708a763l,0x42a660654014fl,0x3ee0e0e47d398l,
08798         0x0830877613437l },
08799       0 },
08800     /* 4 */
08801     { { 0x22fc516a0d2bbl,0x6c1a6234994f9l,0x7c62c8b0d5cc1l,0x667f9241cf3a5l,
08802         0x02f5e6961fd1bl },
08803       { 0x5c70bf5a01797l,0x4d609561925c1l,0x71fdb523d20b4l,0x0f7b04911b370l,
08804         0x0f648f9168d6fl },
08805       0 },
08806     /* 5 */
08807     { { 0x66847e137bbbcl,0x9e8a6a0bec9e5l,0x9d73463e43446l,0x0015b1c427617l,
08808         0x05abe0285133dl },
08809       { 0xa837cc04c7dabl,0x4c43260c0792al,0x8e6cc37573d9fl,0x73830c9315627l,
08810         0x094bb725b6b6fl },
08811       0 },
08812     /* 6 */
08813     { { 0x9b48f720f141cl,0xcd2df5bc74bbfl,0x11045c46199b3l,0xc4efdc3f61294l,
08814         0x0cdd6bbcb2f7dl },
08815       { 0x6700beaf436fdl,0x6db99326beccal,0x14f25226f647fl,0xe5f60c0fa7920l,
08816         0x0a361bebd4bdal },
08817       0 },
08818     /* 7 */
08819     { { 0xa2558597c13c7l,0x5f50b7c3e128al,0x3c09d1dc38d63l,0x292c07039aecfl,
08820         0x0ba12ca09c4b5l },
08821       { 0x08fa459f91dfdl,0x66ceea07fb9e4l,0xd780b293af43bl,0xef4b1eceb0899l,
08822         0x053ebb99d701fl },
08823       0 },
08824     /* 8 */
08825     { { 0x7ee31b0e63d34l,0x72a9e54fab4fel,0x5e7b5a4f46005l,0x4831c0493334dl,
08826         0x08589fb9206d5l },
08827       { 0x0f5cc6583553al,0x4ae25649e5aa7l,0x0044652087909l,0x1c4fcc9045071l,
08828         0x0ebb0696d0254l },
08829       0 },
08830     /* 9 */
08831     { { 0x6ca15ac1647c5l,0x47c4cf5799461l,0x64dfbacb8127dl,0x7da3dc666aa37l,
08832         0x0eb2820cbd1b2l },
08833       { 0x6f8d86a87e008l,0x9d922378f3940l,0x0ccecb2d87dfal,0xda1d56ed2e428l,
08834         0x01f28289b55a7l },
08835       0 },
08836     /* 10 */
08837     { { 0xaa0c03b89da99l,0x9eb8284022abbl,0x81c05e8a6f2d7l,0x4d6327847862bl,
08838         0x0337a4b5905e5l },
08839       { 0x7500d21f7794al,0xb77d6d7f613c6l,0x4cfd6e8207005l,0xfbd60a5a37810l,
08840         0x00d65e0d5f4c2l },
08841       0 },
08842     /* 11 */
08843     { { 0x09bbeb5275d38l,0x450be0a358d9dl,0x73eb2654268a7l,0xa232f0762ff49l,
08844         0x0c23da24252f4l },
08845       { 0x1b84f0b94520cl,0x63b05bd78e5dal,0x4d29ea1096667l,0xcff13a4dcb869l,
08846         0x019de3b8cc790l },
08847       0 },
08848     /* 12 */
08849     { { 0xa716c26c5fe04l,0x0b3bba1bdb183l,0x4cb712c3b28del,0xcbfd7432c586al,
08850         0x0e34dcbd491fcl },
08851       { 0x8d46baaa58403l,0x8682e97a53b40l,0x6aaa8af9a6974l,0x0f7f9e3901273l,
08852         0x0e7641f447b4el },
08853       0 },
08854     /* 13 */
08855     { { 0x53941df64ba59l,0xec0b0242fc7d7l,0x1581859d33f10l,0x57bf4f06dfc6al,
08856         0x04a12df57052al },
08857       { 0x6338f9439dbd0l,0xd4bde53e1fbfal,0x1f1b314d3c24bl,0xea46fd5e4ffa2l,
08858         0x06af5aa93bb5bl },
08859       0 },
08860     /* 14 */
08861     { { 0x0b69910c91999l,0x402a580491da1l,0x8cc20900a24b4l,0x40133e0094b4bl,
08862         0x05fe3475a66a4l },
08863       { 0x8cabdf93e7b4bl,0x1a7c23f91ab0fl,0xd1e6263292b50l,0xa91642e889aecl,
08864         0x0b544e308ecfel },
08865       0 },
08866     /* 15 */
08867     { { 0x8c6e916ddfdcel,0x66f89179e6647l,0xd4e67e12c3291l,0xc20b4e8d6e764l,
08868         0x0e0b6b2bda6b0l },
08869       { 0x12df2bb7efb57l,0xde790c40070d3l,0x79bc9441aac0dl,0x3774f90336ad6l,
08870         0x071c023de25a6l },
08871       0 },
08872     /* 16 */
08873     { { 0x8c244bfe20925l,0xc38fdce86762al,0xd38706391c19al,0x24f65a96a5d5dl,
08874         0x061d587d421d3l },
08875       { 0x673a2a37173eal,0x0853778b65e87l,0x5bab43e238480l,0xefbe10f8441e0l,
08876         0x0fa11fe124621l },
08877       0 },
08878     /* 17 */
08879     { { 0x91f2b2cb19ffdl,0x5bb1923c231c8l,0xac5ca8e01ba8dl,0xbedcb6d03d678l,
08880         0x0586eb04c1f13l },
08881       { 0x5c6e527e8ed09l,0x3c1819ede20c3l,0x6c652fa1e81a3l,0x4f11278fd6c05l,
08882         0x019d5ac087086l },
08883       0 },
08884     /* 18 */
08885     { { 0x9f581309a4e1fl,0x1be92700741e9l,0xfd28d20ab7de7l,0x563f26a5ef0bel,
08886         0x0e7c0073f7f9cl },
08887       { 0xd663a0ef59f76l,0x5420fcb0501f6l,0xa6602d4669b3bl,0x3c0ac08c1f7a7l,
08888         0x0e08504fec65bl },
08889       0 },
08890     /* 19 */
08891     { { 0x8f68da031b3cal,0x9ee6da6d66f09l,0x4f246e86d1cabl,0x96b45bfd81fa9l,
08892         0x078f018825b09l },
08893       { 0xefde43a25787fl,0x0d1dccac9bb7el,0x35bfc368016f8l,0x747a0cea4877bl,
08894         0x043a773b87e94l },
08895       0 },
08896     /* 20 */
08897     { { 0x77734d2b533d5l,0xf6a1bdddc0625l,0x79ec293673b8al,0x66b1577e7c9aal,
08898         0x0bb6de651c3b2l },
08899       { 0x9303ab65259b3l,0xd3d03a7480e7el,0xb3cfc27d6a0afl,0xb99bc5ac83d19l,
08900         0x060b4619a5d18l },
08901       0 },
08902     /* 21 */
08903     { { 0xa38e11ae5aa1cl,0x2b49e73658bd6l,0xe5f87edb8b765l,0xffcd0b130014el,
08904         0x09d0f27b2aeebl },
08905       { 0x246317a730a55l,0x2fddbbc83aca9l,0xc019a719c955bl,0xc48d07c1dfe0al,
08906         0x0244a566d356el },
08907       0 },
08908     /* 22 */
08909     { { 0x0394aeacf1f96l,0xa9024c271c6dbl,0x2cbd3b99f2122l,0xef692626ac1b8l,
08910         0x045e58c873581l },
08911       { 0xf479da38f9dbcl,0x46e888a040d3fl,0x6e0bed7a8aaf1l,0xb7a4945adfb24l,
08912         0x0c040e21cc1e4l },
08913       0 },
08914     /* 23 */
08915     { { 0xaf0006f8117b6l,0xff73a35433847l,0xd9475eb651969l,0x6ec7482b35761l,
08916         0x01cdf5c97682cl },
08917       { 0x775b411f04839l,0xf448de16987dbl,0x70b32197dbeacl,0xff3db2921dd1bl,
08918         0x0046755f8a92dl },
08919       0 },
08920     /* 24 */
08921     { { 0xac5d2bce8ffcdl,0x8b2fe61a82cc8l,0x202d6c70d53c4l,0xa5f3f6f161727l,
08922         0x0046e5e113b83l },
08923       { 0x8ff64d8007f01l,0x125af43183e7bl,0x5e1a03c7fb1efl,0x005b045c5ea63l,
08924         0x06e0106c3303dl },
08925       0 },
08926     /* 25 */
08927     { { 0x7358488dd73b1l,0x8f995ed0d948cl,0x56a2ab7767070l,0xcf1f38385ea8cl,
08928         0x0442594ede901l },
08929       { 0xaa2c912d4b65bl,0x3b96c90c37f8fl,0xe978d1f94c234l,0xe68ed326e4a15l,
08930         0x0a796fa514c2el },
08931       0 },
08932     /* 26 */
08933     { { 0xfb604823addd7l,0x83e56693b3359l,0xcbf3c809e2a61l,0x66e9f885b78e3l,
08934         0x0e4ad2da9c697l },
08935       { 0xf7f428e048a61l,0x8cc092d9a0357l,0x03ed8ef082d19l,0x5143fc3a1af4cl,
08936         0x0c5e94046c37bl },
08937       0 },
08938     /* 27 */
08939     { { 0xa538c2be75f9el,0xe8cb123a78476l,0x109c04b6fd1a9l,0x4747d85e4df0bl,
08940         0x063283dafdb46l },
08941       { 0x28cf7baf2df15l,0x550ad9a7f4ce7l,0x834bcc3e592c4l,0xa938fab226adel,
08942         0x068bd19ab1981l },
08943       0 },
08944     /* 28 */
08945     { { 0xead511887d659l,0xf4b359305ac08l,0xfe74fe33374d5l,0xdfd696986981cl,
08946         0x0495292f53c6fl },
08947       { 0x78c9e1acec896l,0x10ec5b44844a8l,0x64d60a7d964b2l,0x68376696f7e26l,
08948         0x00ec7530d2603l },
08949       0 },
08950     /* 29 */
08951     { { 0x13a05ad2687bbl,0x6af32e21fa2dal,0xdd4607ba1f83bl,0x3f0b390f5ef51l,
08952         0x00f6207a66486l },
08953       { 0x7e3bb0f138233l,0x6c272aa718bd6l,0x6ec88aedd66b9l,0x6dcf8ed004072l,
08954         0x0ff0db07208edl },
08955       0 },
08956     /* 30 */
08957     { { 0xfa1014c95d553l,0xfd5d680a8a749l,0xf3b566fa44052l,0x0ea3183b4317fl,
08958         0x0313b513c8874l },
08959       { 0x2e2ac08d11549l,0x0bb4dee21cb40l,0x7f2320e071ee1l,0x9f8126b987dd4l,
08960         0x02d3abcf986f1l },
08961       0 },
08962     /* 31 */
08963     { { 0x88501815581a2l,0x56632211af4c2l,0xcab2e999a0a6dl,0x8cdf19ba7a0f0l,
08964         0x0c036fa10ded9l },
08965       { 0xe08bac1fbd009l,0x9006d1581629al,0xb9e0d8f0b68b1l,0x0194c2eb32779l,
08966         0x0a6b2a2c4b6d4l },
08967       0 },
08968     /* 32 */
08969     { { 0x3e50f6d3549cfl,0x6ffacd665ed43l,0xe11fcb46f3369l,0x9860695bfdaccl,
08970         0x0810ee252af7cl },
08971       { 0x50fe17159bb2cl,0xbe758b357b654l,0x69fea72f7dfbel,0x17452b057e74dl,
08972         0x0d485717a9273l },
08973       0 },
08974     /* 33 */
08975     { { 0x41a8af0cb5a98l,0x931f3110bf117l,0xb382adfd3da8fl,0x604e1994e2cbal,
08976         0x06a6045a72f9al },
08977       { 0xc0d3fa2b2411dl,0x3e510e96e0170l,0x865b3ccbe0eb8l,0x57903bcc9f738l,
08978         0x0d3e45cfaf9e1l },
08979       0 },
08980     /* 34 */
08981     { { 0xf69bbe83f7669l,0x8272877d6bce1l,0x244278d09f8ael,0xc19c9548ae543l,
08982         0x0207755dee3c2l },
08983       { 0xd61d96fef1945l,0xefb12d28c387bl,0x2df64aa18813cl,0xb00d9fbcd1d67l,
08984         0x048dc5ee57154l },
08985       0 },
08986     /* 35 */
08987     { { 0x790bff7e5a199l,0xcf989ccbb7123l,0xa519c79e0efb8l,0xf445c27a2bfe0l,
08988         0x0f2fb0aeddff6l },
08989       { 0x09575f0b5025fl,0xd740fa9f2241cl,0x80bfbd0550543l,0xd5258fa3c8ad3l,
08990         0x0a13e9015db28l },
08991       0 },
08992     /* 36 */
08993     { { 0x7a350a2b65cbcl,0x722a464226f9fl,0x23f07a10b04b9l,0x526f265ce241el,
08994         0x02bf0d6b01497l },
08995       { 0x4dd3f4b216fb7l,0x67fbdda26ad3dl,0x708505cf7d7b8l,0xe89faeb7b83f6l,
08996         0x042a94a5a162fl },
08997       0 },
08998     /* 37 */
08999     { { 0x6ad0beaadf191l,0x9025a268d7584l,0x94dc1f60f8a48l,0xde3de86030504l,
09000         0x02c2dd969c65el },
09001       { 0x2171d93849c17l,0xba1da250dd6d0l,0xc3a5485460488l,0x6dbc4810c7063l,
09002         0x0f437fa1f42c5l },
09003       0 },
09004     /* 38 */
09005     { { 0x0d7144a0f7dabl,0x931776e9ac6aal,0x5f397860f0497l,0x7aa852c0a050fl,
09006         0x0aaf45b335470l },
09007       { 0x37c33c18d364al,0x063e49716585el,0x5ec5444d40b9bl,0x72bcf41716811l,
09008         0x0cdf6310df4f2l },
09009       0 },
09010     /* 39 */
09011     { { 0x3c6238ea8b7efl,0x1885bc2287747l,0xbda8e3408e935l,0x2ff2419567722l,
09012         0x0f0d008bada9el },
09013       { 0x2671d2414d3b1l,0x85b019ea76291l,0x53bcbdbb37549l,0x7b8b5c61b96d4l,
09014         0x05bd5c2f5ca88l },
09015       0 },
09016     /* 40 */
09017     { { 0xf469ef49a3154l,0x956e2b2e9aef0l,0xa924a9c3e85a5l,0x471945aaec1eal,
09018         0x0aa12dfc8a09el },
09019       { 0x272274df69f1dl,0x2ca2ff5e7326fl,0x7a9dd44e0e4c8l,0xa901b9d8ce73bl,
09020         0x06c036e73e48cl },
09021       0 },
09022     /* 41 */
09023     { { 0xae12a0f6e3138l,0x0025ad345a5cfl,0x5672bc56966efl,0xbe248993c64b4l,
09024         0x0292ff65896afl },
09025       { 0x50d445e213402l,0x274392c9fed52l,0xa1c72e8f6580el,0x7276097b397fdl,
09026         0x0644e0c90311bl },
09027       0 },
09028     /* 42 */
09029     { { 0x421e1a47153f0l,0x79920418c9e1el,0x05d7672b86c3bl,0x9a7793bdce877l,
09030         0x0f25ae793cab7l },
09031       { 0x194a36d869d0cl,0x824986c2641f3l,0x96e945e9d55c8l,0x0a3e49fb5ea30l,
09032         0x039b8e65313dbl },
09033       0 },
09034     /* 43 */
09035     { { 0x54200b6fd2e59l,0x669255c98f377l,0xe2a573935e2c0l,0xdb06d9dab21a0l,
09036         0x039122f2f0f19l },
09037       { 0xce1e003cad53cl,0x0fe65c17e3cfbl,0xaa13877225b2cl,0xff8d72baf1d29l,
09038         0x08de80af8ce80l },
09039       0 },
09040     /* 44 */
09041     { { 0xea8d9207bbb76l,0x7c21782758afbl,0xc0436b1921c7el,0x8c04dfa2b74b1l,
09042         0x0871949062e36l },
09043       { 0x928bba3993df5l,0xb5f3b3d26ab5fl,0x5b55050639d75l,0xfde1011aa78a8l,
09044         0x0fc315e6a5b74l },
09045       0 },
09046     /* 45 */
09047     { { 0xfd41ae8d6ecfal,0xf61aec7f86561l,0x924741d5f8c44l,0x908898452a7b4l,
09048         0x0e6d4a7adee38l },
09049       { 0x52ed14593c75dl,0xa4dd271162605l,0xba2c7db70a70dl,0xae57d2aede937l,
09050         0x035dfaf9a9be2l },
09051       0 },
09052     /* 46 */
09053     { { 0x56fcdaa736636l,0x97ae2cab7e6b9l,0xf34996609f51dl,0x0d2bfb10bf410l,
09054         0x01da5c7d71c83l },
09055       { 0x1e4833cce6825l,0x8ff9573c3b5c4l,0x23036b815ad11l,0xb9d6a28552c7fl,
09056         0x07077c0fddbf4l },
09057       0 },
09058     /* 47 */
09059     { { 0x3ff8d46b9661cl,0x6b0d2cfd71bf6l,0x847f8f7a1dfd3l,0xfe440373e140al,
09060         0x053a8632ee50el },
09061       { 0x6ff68696d8051l,0x95c74f468a097l,0xe4e26bddaec0cl,0xfcc162994dc35l,
09062         0x0028ca76d34e1l },
09063       0 },
09064     /* 48 */
09065     { { 0xd47dcfc9877eel,0x10801d0002d11l,0x4c260b6c8b362l,0xf046d002c1175l,
09066         0x004c17cd86962l },
09067       { 0xbd094b0daddf5l,0x7524ce55c06d9l,0x2da03b5bea235l,0x7474663356e67l,
09068         0x0f7ba4de9fed9l },
09069       0 },
09070     /* 49 */
09071     { { 0xbfa34ebe1263fl,0x3571ae7ce6d0dl,0x2a6f523557637l,0x1c41d24405538l,
09072         0x0e31f96005213l },
09073       { 0xb9216ea6b6ec6l,0x2e73c2fc44d1bl,0x9d0a29437a1d1l,0xd47bc10e7eac8l,
09074         0x0aa3a6259ce34l },
09075       0 },
09076     /* 50 */
09077     { { 0xf9df536f3dcd3l,0x50d2bf7360fbcl,0xf504f5b6cededl,0xdaee491710fadl,
09078         0x02398dd627e79l },
09079       { 0x705a36d09569el,0xbb5149f769cf4l,0x5f6034cea0619l,0x6210ff9c03773l,
09080         0x05717f5b21c04l },
09081       0 },
09082     /* 51 */
09083     { { 0x229c921dd895el,0x0040c284519fel,0xd637ecd8e5185l,0x28defa13d2391l,
09084         0x0660a2c560e3cl },
09085       { 0xa88aed67fcbd0l,0x780ea9f0969ccl,0x2e92b4dc84724l,0x245332b2f4817l,
09086         0x0624ee54c4f52l },
09087       0 },
09088     /* 52 */
09089     { { 0x49ce4d897ecccl,0xd93f9880aa095l,0x43a7c204d49d1l,0xfbc0723c24230l,
09090         0x04f392afb92bdl },
09091       { 0x9f8fa7de44fd9l,0xe457b32156696l,0x68ebc3cb66cfbl,0x399cdb2fa8033l,
09092         0x08a3e7977ccdbl },
09093       0 },
09094     /* 53 */
09095     { { 0x1881f06c4b125l,0x00f6e3ca8cddel,0xc7a13e9ae34e3l,0x4404ef6999de5l,
09096         0x03888d02370c2l },
09097       { 0x8035644f91081l,0x615f015504762l,0x32cd36e3d9fcfl,0x23361827edc86l,
09098         0x0a5e62e471810l },
09099       0 },
09100     /* 54 */
09101     { { 0x25ee32facd6c8l,0x5454bcbc661a8l,0x8df9931699c63l,0x5adc0ce3edf79l,
09102         0x02c4768e6466al },
09103       { 0x6ff8c90a64bc9l,0x20e4779f5cb34l,0xc05e884630a60l,0x52a0d949d064bl,
09104         0x07b5e6441f9e6l },
09105       0 },
09106     /* 55 */
09107     { { 0x9422c1d28444al,0xd8be136a39216l,0xb0c7fcee996c5l,0x744a2387afe5fl,
09108         0x0b8af73cb0c8dl },
09109       { 0xe83aa338b86fdl,0x58a58a5cff5fdl,0x0ac9433fee3f1l,0x0895c9ee8f6f2l,
09110         0x0a036395f7f3fl },
09111       0 },
09112     /* 56 */
09113     { { 0x3c6bba10f7770l,0x81a12a0e248c7l,0x1bc2b9fa6f16dl,0xb533100df6825l,
09114         0x04be36b01875fl },
09115       { 0x6086e9fb56dbbl,0x8b07e7a4f8922l,0x6d52f20306fefl,0x00c0eeaccc056l,
09116         0x08cbc9a871bdcl },
09117       0 },
09118     /* 57 */
09119     { { 0x1895cc0dac4abl,0x40712ff112e13l,0xa1cee57a874a4l,0x35f86332ae7c6l,
09120         0x044e7553e0c08l },
09121       { 0x03fff7734002dl,0x8b0b34425c6d5l,0xe8738b59d35cbl,0xfc1895f702760l,
09122         0x0470a683a5eb8l },
09123       0 },
09124     /* 58 */
09125     { { 0x761dc90513482l,0x2a01e9276a81bl,0xce73083028720l,0xc6efcda441ee0l,
09126         0x016410690c63dl },
09127       { 0x34a066d06a2edl,0x45189b100bf50l,0xb8218c9dd4d77l,0xbb4fd914ae72al,
09128         0x0d73479fd7abcl },
09129       0 },
09130     /* 59 */
09131     { { 0xefb165ad4c6e5l,0x8f5b06d04d7edl,0x575cb14262cf0l,0x666b12ed5bb18l,
09132         0x0816469e30771l },
09133       { 0xb9d79561e291el,0x22c1de1661d7al,0x35e0513eb9dafl,0x3f9cf49827eb1l,
09134         0x00a36dd23f0ddl },
09135       0 },
09136     /* 60 */
09137     { { 0xd32c741d5533cl,0x9e8684628f098l,0x349bd117c5f5al,0xb11839a228adel,
09138         0x0e331dfd6fdbal },
09139       { 0x0ab686bcc6ed8l,0xbdef7a260e510l,0xce850d77160c3l,0x33899063d9a7bl,
09140         0x0d3b4782a492el },
09141       0 },
09142     /* 61 */
09143     { { 0x9b6e8f3821f90l,0xed66eb7aada14l,0xa01311692edd9l,0xa5bd0bb669531l,
09144         0x07281275a4c86l },
09145       { 0x858f7d3ff47e5l,0xbc61016441503l,0xdfd9bb15e1616l,0x505962b0f11a7l,
09146         0x02c062e7ece14l },
09147       0 },
09148     /* 62 */
09149     { { 0xf996f0159ac2el,0x36cbdb2713a76l,0x8e46047281e77l,0x7ef12ad6d2880l,
09150         0x0282a35f92c4el },
09151       { 0x54b1ec0ce5cd2l,0xc91379c2299c3l,0xe82c11ecf99efl,0x2abd992caf383l,
09152         0x0c71cd513554dl },
09153       0 },
09154     /* 63 */
09155     { { 0x5de9c09b578f4l,0x58e3affa7a488l,0x9182f1f1884e2l,0xf3a38f76b1b75l,
09156         0x0c50f6740cf47l },
09157       { 0x4adf3374b68eal,0x2369965fe2a9cl,0x5a53050a406f3l,0x58dc2f86a2228l,
09158         0x0b9ecb3a72129l },
09159       0 },
09160     /* 64 */
09161     { { 0x8410ef4f8b16al,0xfec47b266a56fl,0xd9c87c197241al,0xab1b0a406b8e6l,
09162         0x0803f3e02cd42l },
09163       { 0x309a804dbec69l,0xf73bbad05f7f0l,0xd8e197fa83b85l,0xadc1c6097273al,
09164         0x0c097440e5067l },
09165       0 },
09166     /* 65 */
09167     { { 0xa56f2c379ab34l,0x8b841df8d1846l,0x76c68efa8ee06l,0x1f30203144591l,
09168         0x0f1af32d5915fl },
09169       { 0x375315d75bd50l,0xbaf72f67bc99cl,0x8d7723f837cffl,0x1c8b0613a4184l,
09170         0x023d0f130e2d4l },
09171       0 },
09172     /* 66 */
09173     { { 0xab6edf41500d9l,0xe5fcbeada8857l,0x97259510d890al,0xfadd52fe86488l,
09174         0x0b0288dd6c0a3l },
09175       { 0x20f30650bcb08l,0x13695d6e16853l,0x989aa7671af63l,0xc8d231f520a7bl,
09176         0x0ffd3724ff408l },
09177       0 },
09178     /* 67 */
09179     { { 0x68e64b458e6cbl,0x20317a5d28539l,0xaa75f56992dadl,0x26df3814ae0b7l,
09180         0x0f5590f4ad78cl },
09181       { 0x24bd3cf0ba55al,0x4a0c778bae0fcl,0x83b674a0fc472l,0x4a201ce9864f6l,
09182         0x018d6da54f6f7l },
09183       0 },
09184     /* 68 */
09185     { { 0x3e225d5be5a2bl,0x835934f3c6ed9l,0x2626ffc6fe799l,0x216a431409262l,
09186         0x050bbb4d97990l },
09187       { 0x191c6e57ec63el,0x40181dcdb2378l,0x236e0f665422cl,0x49c341a8099b0l,
09188         0x02b10011801fel },
09189       0 },
09190     /* 69 */
09191     { { 0x8b5c59b391593l,0xa2598270fcfc6l,0x19adcbbc385f5l,0xae0c7144f3aadl,
09192         0x0dd55899983fbl },
09193       { 0x88b8e74b82ff4l,0x4071e734c993bl,0x3c0322ad2e03cl,0x60419a7a9eaf4l,
09194         0x0e6e4c551149dl },
09195       0 },
09196     /* 70 */
09197     { { 0x655bb1e9af288l,0x64f7ada93155fl,0xb2820e5647e1al,0x56ff43697e4bcl,
09198         0x051e00db107edl },
09199       { 0x169b8771c327el,0x0b4a96c2ad43dl,0xdeb477929cdb2l,0x9177c07d51f53l,
09200         0x0e22f42414982l },
09201       0 },
09202     /* 71 */
09203     { { 0x5e8f4635f1abbl,0xb568538874cd4l,0x5a8034d7edc0cl,0x48c9c9472c1fbl,
09204         0x0f709373d52dcl },
09205       { 0x966bba8af30d6l,0x4af137b69c401l,0x361c47e95bf5fl,0x5b113966162a9l,
09206         0x0bd52d288e727l },
09207       0 },
09208     /* 72 */
09209     { { 0x55c7a9c5fa877l,0x727d3a3d48ab1l,0x3d189d817dad6l,0x77a643f43f9e7l,
09210         0x0a0d0f8e4c8aal },
09211       { 0xeafd8cc94f92dl,0xbe0c4ddb3a0bbl,0x82eba14d818c8l,0x6a0022cc65f8bl,
09212         0x0a56c78c7946dl },
09213       0 },
09214     /* 73 */
09215     { { 0x2391b0dd09529l,0xa63daddfcf296l,0xb5bf481803e0el,0x367a2c77351f5l,
09216         0x0d8befdf8731al },
09217       { 0x19d42fc0157f4l,0xd7fec8e650ab9l,0x2d48b0af51cael,0x6478cdf9cb400l,
09218         0x0854a68a5ce9fl },
09219       0 },
09220     /* 74 */
09221     { { 0x5f67b63506ea5l,0x89a4fe0d66dc3l,0xe95cd4d9286c4l,0x6a953f101d3bfl,
09222         0x05cacea0b9884l },
09223       { 0xdf60c9ceac44dl,0xf4354d1c3aa90l,0xd5dbabe3db29al,0xefa908dd3de8al,
09224         0x0e4982d1235e4l },
09225       0 },
09226     /* 75 */
09227     { { 0x04a22c34cd55el,0xb32680d132231l,0xfa1d94358695bl,0x0499fb345afa1l,
09228         0x08046b7f616b2l },
09229       { 0x3581e38e7d098l,0x8df46f0b70b53l,0x4cb78c4d7f61el,0xaf5530dea9ea4l,
09230         0x0eb17ca7b9082l },
09231       0 },
09232     /* 76 */
09233     { { 0x1b59876a145b9l,0x0fc1bc71ec175l,0x92715bba5cf6bl,0xe131d3e035653l,
09234         0x0097b00bafab5l },
09235       { 0x6c8e9565f69e1l,0x5ab5be5199aa6l,0xa4fd98477e8f7l,0xcc9e6033ba11dl,
09236         0x0f95c747bafdbl },
09237       0 },
09238     /* 77 */
09239     { { 0xf01d3bebae45el,0xf0c4bc6955558l,0xbc64fc6a8ebe9l,0xd837aeb705b1dl,
09240         0x03512601e566el },
09241       { 0x6f1e1fa1161cdl,0xd54c65ef87933l,0x24f21e5328ab8l,0xab6b4757eee27l,
09242         0x00ef971236068l },
09243       0 },
09244     /* 78 */
09245     { { 0x98cf754ca4226l,0x38f8642c8e025l,0x68e17905eede1l,0xbc9548963f744l,
09246         0x0fc16d9333b4fl },
09247       { 0x6fb31e7c800cal,0x312678adaabe9l,0xff3e8b5138063l,0x7a173d6244976l,
09248         0x014ca4af1b95dl },
09249       0 },
09250     /* 79 */
09251     { { 0x771babd2f81d5l,0x6901f7d1967a4l,0xad9c9071a5f9dl,0x231dd898bef7cl,
09252         0x04057b063f59cl },
09253       { 0xd82fe89c05c0al,0x6f1dc0df85bffl,0x35a16dbe4911cl,0x0b133befccaeal,
09254         0x01c3b5d64f133l },
09255       0 },
09256     /* 80 */
09257     { { 0x14bfe80ec21fel,0x6ac255be825fel,0xf4a5d67f6ce11l,0x63af98bc5a072l,
09258         0x0fad27148db7el },
09259       { 0x0b6ac29ab05b3l,0x3c4e251ae690cl,0x2aade7d37a9a8l,0x1a840a7dc875cl,
09260         0x077387de39f0el },
09261       0 },
09262     /* 81 */
09263     { { 0xecc49a56c0dd7l,0xd846086c741e9l,0x505aecea5cffcl,0xc47e8f7a1408fl,
09264         0x0b37b85c0bef0l },
09265       { 0x6b6e4cc0e6a8fl,0xbf6b388f23359l,0x39cef4efd6d4bl,0x28d5aba453facl,
09266         0x09c135ac8f9f6l },
09267       0 },
09268     /* 82 */
09269     { { 0xa320284e35743l,0xb185a3cdef32al,0xdf19819320d6al,0x851fb821b1761l,
09270         0x05721361fc433l },
09271       { 0xdb36a71fc9168l,0x735e5c403c1f0l,0x7bcd8f55f98bal,0x11bdf64ca87e3l,
09272         0x0dcbac3c9e6bbl },
09273       0 },
09274     /* 83 */
09275     { { 0xd99684518cbe2l,0x189c9eb04ef01l,0x47feebfd242fcl,0x6862727663c7el,
09276         0x0b8c1c89e2d62l },
09277       { 0x58bddc8e1d569l,0xc8b7d88cd051al,0x11f31eb563809l,0x22d426c27fd9fl,
09278         0x05d23bbda2f94l },
09279       0 },
09280     /* 84 */
09281     { { 0xc729495c8f8bel,0x803bf362bf0a1l,0xf63d4ac2961c4l,0xe9009e418403dl,
09282         0x0c109f9cb91ecl },
09283       { 0x095d058945705l,0x96ddeb85c0c2dl,0xa40449bb9083dl,0x1ee184692b8d7l,
09284         0x09bc3344f2eeel },
09285       0 },
09286     /* 85 */
09287     { { 0xae35642913074l,0x2748a542b10d5l,0x310732a55491bl,0x4cc1469ca665bl,
09288         0x029591d525f1al },
09289       { 0xf5b6bb84f983fl,0x419f5f84e1e76l,0x0baa189be7eefl,0x332c1200d4968l,
09290         0x06376551f18efl },
09291       0 },
09292     /* 86 */
09293     { { 0x5f14e562976ccl,0xe60ef12c38bdal,0xcca985222bca3l,0x987abbfa30646l,
09294         0x0bdb79dc808e2l },
09295       { 0xcb5c9cb06a772l,0xaafe536dcefd2l,0xc2b5db838f475l,0xc14ac2a3e0227l,
09296         0x08ee86001add3l },
09297       0 },
09298     /* 87 */
09299     { { 0x96981a4ade873l,0x4dc4fba48ccbel,0xa054ba57ee9aal,0xaa4b2cee28995l,
09300         0x092e51d7a6f77l },
09301       { 0xbafa87190a34dl,0x5bf6bd1ed1948l,0xcaf1144d698f7l,0xaaaad00ee6e30l,
09302         0x05182f86f0a56l },
09303       0 },
09304     /* 88 */
09305     { { 0x6212c7a4cc99cl,0x683e6d9ca1fbal,0xac98c5aff609bl,0xa6f25dbb27cb5l,
09306         0x091dcab5d4073l },
09307       { 0x6cc3d5f575a70l,0x396f8d87fa01bl,0x99817360cb361l,0x4f2b165d4e8c8l,
09308         0x017a0cedb9797l },
09309       0 },
09310     /* 89 */
09311     { { 0x61e2a076c8d3al,0x39210f924b388l,0x3a835d9701aadl,0xdf4194d0eae41l,
09312         0x02e8ce36c7f4cl },
09313       { 0x73dab037a862bl,0xb760e4c8fa912l,0x3baf2dd01ba9bl,0x68f3f96453883l,
09314         0x0f4ccc6cb34f6l },
09315       0 },
09316     /* 90 */
09317     { { 0xf525cf1f79687l,0x9592efa81544el,0x5c78d297c5954l,0xf3c9e1231741al,
09318         0x0ac0db4889a0dl },
09319       { 0xfc711df01747fl,0x58ef17df1386bl,0xccb6bb5592b93l,0x74a2e5880e4f5l,
09320         0x095a64a6194c9l },
09321       0 },
09322     /* 91 */
09323     { { 0x1efdac15a4c93l,0x738258514172cl,0x6cb0bad40269bl,0x06776a8dfb1c1l,
09324         0x0231e54ba2921l },
09325       { 0xdf9178ae6d2dcl,0x3f39112918a70l,0xe5b72234d6aa6l,0x31e1f627726b5l,
09326         0x0ab0be032d8a7l },
09327       0 },
09328     /* 92 */
09329     { { 0xad0e98d131f2dl,0xe33b04f101097l,0x5e9a748637f09l,0xa6791ac86196dl,
09330         0x0f1bcc8802cf6l },
09331       { 0x69140e8daacb4l,0x5560f6500925cl,0x77937a63c4e40l,0xb271591cc8fc4l,
09332         0x0851694695aebl },
09333       0 },
09334     /* 93 */
09335     { { 0x5c143f1dcf593l,0x29b018be3bde3l,0xbdd9d3d78202bl,0x55d8e9cdadc29l,
09336         0x08f67d9d2daadl },
09337       { 0x116567481ea5fl,0xe9e34c590c841l,0x5053fa8e7d2ddl,0x8b5dffdd43f40l,
09338         0x0f84572b9c072l },
09339       0 },
09340     /* 94 */
09341     { { 0xa7a7197af71c9l,0x447a7365655e1l,0xe1d5063a14494l,0x2c19a1b4ae070l,
09342         0x0edee2710616bl },
09343       { 0x034f511734121l,0x554a25e9f0b2fl,0x40c2ecf1cac6el,0xd7f48dc148f3al,
09344         0x09fd27e9b44ebl },
09345       0 },
09346     /* 95 */
09347     { { 0x7658af6e2cb16l,0x2cfe5919b63ccl,0x68d5583e3eb7dl,0xf3875a8c58161l,
09348         0x0a40c2fb6958fl },
09349       { 0xec560fedcc158l,0xc655f230568c9l,0xa307e127ad804l,0xdecfd93967049l,
09350         0x099bc9bb87dc6l },
09351       0 },
09352     /* 96 */
09353     { { 0x9521d927dafc6l,0x695c09cd1984al,0x9366dde52c1fbl,0x7e649d9581a0fl,
09354         0x09abe210ba16dl },
09355       { 0xaf84a48915220l,0x6a4dd816c6480l,0x681ca5afa7317l,0x44b0c7d539871l,
09356         0x07881c25787f3l },
09357       0 },
09358     /* 97 */
09359     { { 0x99b51e0bcf3ffl,0xc5127f74f6933l,0xd01d9680d02cbl,0x89408fb465a2dl,
09360         0x015e6e319a30el },
09361       { 0xd6e0d3e0e05f4l,0xdc43588404646l,0x4f850d3fad7bdl,0x72cebe61c7d1cl,
09362         0x00e55facf1911l },
09363       0 },
09364     /* 98 */
09365     { { 0xd9806f8787564l,0x2131e85ce67e9l,0x819e8d61a3317l,0x65776b0158cabl,
09366         0x0d73d09766fe9l },
09367       { 0x834251eb7206el,0x0fc618bb42424l,0xe30a520a51929l,0xa50b5dcbb8595l,
09368         0x09250a3748f15l },
09369       0 },
09370     /* 99 */
09371     { { 0xf08f8be577410l,0x035077a8c6cafl,0xc0a63a4fd408al,0x8c0bf1f63289el,
09372         0x077414082c1ccl },
09373       { 0x40fa6eb0991cdl,0x6649fdc29605al,0x324fd40c1ca08l,0x20b93a68a3c7bl,
09374         0x08cb04f4d12ebl },
09375       0 },
09376     /* 100 */
09377     { { 0x2d0556906171cl,0xcdb0240c3fb1cl,0x89068419073e9l,0x3b51db8e6b4fdl,
09378         0x0e4e429ef4712l },
09379       { 0xdd53c38ec36f4l,0x01ff4b6a270b8l,0x79a9a48f9d2dcl,0x65525d066e078l,
09380         0x037bca2ff3c6el },
09381       0 },
09382     /* 101 */
09383     { { 0x2e3c7df562470l,0xa2c0964ac94cdl,0x0c793be44f272l,0xb22a7c6d5df98l,
09384         0x059913edc3002l },
09385       { 0x39a835750592al,0x80e783de027a1l,0xa05d64f99e01dl,0xe226cf8c0375el,
09386         0x043786e4ab013l },
09387       0 },
09388     /* 102 */
09389     { { 0x2b0ed9e56b5a6l,0xa6d9fc68f9ff3l,0x97846a70750d9l,0x9e7aec15e8455l,
09390         0x08638ca98b7e7l },
09391       { 0xae0960afc24b2l,0xaf4dace8f22f5l,0xecba78f05398el,0xa6f03b765dd0al,
09392         0x01ecdd36a7b3al },
09393       0 },
09394     /* 103 */
09395     { { 0xacd626c5ff2f3l,0xc02873a9785d3l,0x2110d54a2d516l,0xf32dad94c9fadl,
09396         0x0d85d0f85d459l },
09397       { 0x00b8d10b11da3l,0x30a78318c49f7l,0x208decdd2c22cl,0x3c62556988f49l,
09398         0x0a04f19c3b4edl },
09399       0 },
09400     /* 104 */
09401     { { 0x924c8ed7f93bdl,0x5d392f51f6087l,0x21b71afcb64acl,0x50b07cae330a8l,
09402         0x092b2eeea5c09l },
09403       { 0xc4c9485b6e235l,0xa92936c0f085al,0x0508891ab2ca4l,0x276c80faa6b3el,
09404         0x01ee782215834l },
09405       0 },
09406     /* 105 */
09407     { { 0xa2e00e63e79f7l,0xb2f399d906a60l,0x607c09df590e7l,0xe1509021054a6l,
09408         0x0f3f2ced857a6l },
09409       { 0x510f3f10d9b55l,0xacd8642648200l,0x8bd0e7c9d2fcfl,0xe210e5631aa7el,
09410         0x00f56a4543da3l },
09411       0 },
09412     /* 106 */
09413     { { 0x1bffa1043e0dfl,0xcc9c007e6d5b2l,0x4a8517a6c74b6l,0xe2631a656ec0dl,
09414         0x0bd8f17411969l },
09415       { 0xbbb86beb7494al,0x6f45f3b8388a9l,0x4e5a79a1567d4l,0xfa09df7a12a7al,
09416         0x02d1a1c3530ccl },
09417       0 },
09418     /* 107 */
09419     { { 0xe3813506508dal,0xc4a1d795a7192l,0xa9944b3336180l,0xba46cddb59497l,
09420         0x0a107a65eb91fl },
09421       { 0x1d1c50f94d639l,0x758a58b7d7e6dl,0xd37ca1c8b4af3l,0x9af21a7c5584bl,
09422         0x0183d760af87al },
09423       0 },
09424     /* 108 */
09425     { { 0x697110dde59a4l,0x070e8bef8729dl,0xf2ebe78f1ad8dl,0xd754229b49634l,
09426         0x01d44179dc269l },
09427       { 0xdc0cf8390d30el,0x530de8110cb32l,0xbc0339a0a3b27l,0xd26231af1dc52l,
09428         0x0771f9cc29606l },
09429       0 },
09430     /* 109 */
09431     { { 0x93e7785040739l,0xb98026a939999l,0x5f8fc2644539dl,0x718ecf40f6f2fl,
09432         0x064427a310362l },
09433       { 0xf2d8785428aa8l,0x3febfb49a84f4l,0x23d01ac7b7adcl,0x0d6d201b2c6dfl,
09434         0x049d9b7496ae9l },
09435       0 },
09436     /* 110 */
09437     { { 0x8d8bc435d1099l,0x4e8e8d1a08cc7l,0xcb68a412adbcdl,0x544502c2e2a02l,
09438         0x09037d81b3f60l },
09439       { 0xbac27074c7b61l,0xab57bfd72e7cdl,0x96d5352fe2031l,0x639c61ccec965l,
09440         0x008c3de6a7cc0l },
09441       0 },
09442     /* 111 */
09443     { { 0xdd020f6d552abl,0x9805cd81f120fl,0x135129156baffl,0x6b2f06fb7c3e9l,
09444         0x0c69094424579l },
09445       { 0x3ae9c41231bd1l,0x875cc5820517bl,0x9d6a1221eac6el,0x3ac0208837abfl,
09446         0x03fa3db02cafel },
09447       0 },
09448     /* 112 */
09449     { { 0xa3e6505058880l,0xef643943f2d75l,0xab249257da365l,0x08ff4147861cfl,
09450         0x0c5c4bdb0fdb8l },
09451       { 0x13e34b272b56bl,0x9511b9043a735l,0x8844969c8327el,0xb6b5fd8ce37dfl,
09452         0x02d56db9446c2l },
09453       0 },
09454     /* 113 */
09455     { { 0x1782fff46ac6bl,0x2607a2e425246l,0x9a48de1d19f79l,0xba42fafea3c40l,
09456         0x00f56bd9de503l },
09457       { 0xd4ed1345cda49l,0xfc816f299d137l,0xeb43402821158l,0xb5f1e7c6a54aal,
09458         0x04003bb9d1173l },
09459       0 },
09460     /* 114 */
09461     { { 0xe8189a0803387l,0xf539cbd4043b8l,0x2877f21ece115l,0x2f9e4297208ddl,
09462         0x053765522a07fl },
09463       { 0x80a21a8a4182dl,0x7a3219df79a49l,0xa19a2d4a2bbd0l,0x4549674d0a2e1l,
09464         0x07a056f586c5dl },
09465       0 },
09466     /* 115 */
09467     { { 0xb25589d8a2a47l,0x48c3df2773646l,0xbf0d5395b5829l,0x267551ec000eal,
09468         0x077d482f17a1al },
09469       { 0x1bd9587853948l,0xbd6cfbffeeb8al,0x0681e47a6f817l,0xb0e4ab6ec0578l,
09470         0x04115012b2b38l },
09471       0 },
09472     /* 116 */
09473     { { 0x3f0f46de28cedl,0x609b13ec473c7l,0xe5c63921d5da7l,0x094661b8ce9e6l,
09474         0x0cdf04572fbeal },
09475       { 0x3c58b6c53c3b0l,0x10447b843c1cbl,0xcb9780e97fe3cl,0x3109fb2b8ae12l,
09476         0x0ee703dda9738l },
09477       0 },
09478     /* 117 */
09479     { { 0x15140ff57e43al,0xd3b1b811b8345l,0xf42b986d44660l,0xce212b3b5dff8l,
09480         0x02a0ad89da162l },
09481       { 0x4a6946bc277bal,0x54c141c27664el,0xabf6274c788c9l,0x4659141aa64ccl,
09482         0x0d62d0b67ac2bl },
09483       0 },
09484     /* 118 */
09485     { { 0x5d87b2c054ac4l,0x59f27df78839cl,0x18128d6570058l,0x2426edf7cbf3bl,
09486         0x0b39a23f2991cl },
09487       { 0x84a15f0b16ae5l,0xb1a136f51b952l,0x27007830c6a05l,0x4cc51d63c137fl,
09488         0x004ed0092c067l },
09489       0 },
09490     /* 119 */
09491     { { 0x185d19ae90393l,0x294a3d64e61f4l,0x854fc143047b4l,0xc387ae0001a69l,
09492         0x0a0a91fc10177l },
09493       { 0xa3f01ae2c831el,0x822b727e16ff0l,0xa3075b4bb76ael,0x0c418f12c8a15l,
09494         0x0084cf9889ed2l },
09495       0 },
09496     /* 120 */
09497     { { 0x509defca6becfl,0x807dffb328d98l,0x778e8b92fceael,0xf77e5d8a15c44l,
09498         0x0d57955b273abl },
09499       { 0xda79e31b5d4f1l,0x4b3cfa7a1c210l,0xc27c20baa52f0l,0x41f1d4d12089dl,
09500         0x08e14ea4202d1l },
09501       0 },
09502     /* 121 */
09503     { { 0x50345f2897042l,0x1f43402c4aeedl,0x8bdfb218d0533l,0xd158c8d9c194cl,
09504         0x0597e1a372aa4l },
09505       { 0x7ec1acf0bd68cl,0xdcab024945032l,0x9fe3e846d4be0l,0x4dea5b9c8d7acl,
09506         0x0ca3f0236199bl },
09507       0 },
09508     /* 122 */
09509     { { 0xa10b56170bd20l,0xf16d3f5de7592l,0x4b2ade20ea897l,0x07e4a3363ff14l,
09510         0x0bde7fd7e309cl },
09511       { 0xbb6d2b8f5432cl,0xcbe043444b516l,0x8f95b5a210dc1l,0xd1983db01e6ffl,
09512         0x0b623ad0e0a7dl },
09513       0 },
09514     /* 123 */
09515     { { 0xbd67560c7b65bl,0x9023a4a289a75l,0x7b26795ab8c55l,0x137bf8220fd0dl,
09516         0x0d6aa2e4658ecl },
09517       { 0xbc00b5138bb85l,0x21d833a95c10al,0x702a32e8c31d1l,0x513ab24ff00b1l,
09518         0x0111662e02dccl },
09519       0 },
09520     /* 124 */
09521     { { 0x14015efb42b87l,0x701b6c4dff781l,0x7d7c129bd9f5dl,0x50f866ecccd7al,
09522         0x0db3ee1cb94b7l },
09523       { 0xf3db0f34837cfl,0x8bb9578d4fb26l,0xc56657de7eed1l,0x6a595d2cdf937l,
09524         0x0886a64425220l },
09525       0 },
09526     /* 125 */
09527     { { 0x34cfb65b569eal,0x41f72119c13c2l,0x15a619e200111l,0x17bc8badc85dal,
09528         0x0a70cf4eb018al },
09529       { 0xf97ae8c4a6a65l,0x270134378f224l,0xf7e096036e5cfl,0x7b77be3a609e4l,
09530         0x0aa4772abd174l },
09531       0 },
09532     /* 126 */
09533     { { 0x761317aa60cc0l,0x610368115f676l,0xbc1bb5ac79163l,0xf974ded98bb4bl,
09534         0x0611a6ddc30fal },
09535       { 0x78cbcc15ee47al,0x824e0d96a530el,0xdd9ed882e8962l,0x9c8836f35adf3l,
09536         0x05cfffaf81642l },
09537       0 },
09538     /* 127 */
09539     { { 0x54cff9b7a99cdl,0x9d843c45a1c0dl,0x2c739e17bf3b9l,0x994c038a908f6l,
09540         0x06e5a6b237dc1l },
09541       { 0xb454e0ba5db77l,0x7facf60d63ef8l,0x6608378b7b880l,0xabcce591c0c67l,
09542         0x0481a238d242dl },
09543       0 },
09544     /* 128 */
09545     { { 0x17bc035d0b34al,0x6b8327c0a7e34l,0xc0362d1440b38l,0xf9438fb7262dal,
09546         0x02c41114ce0cdl },
09547       { 0x5cef1ad95a0b1l,0xa867d543622bal,0x1e486c9c09b37l,0x929726d6cdd20l,
09548         0x020477abf42ffl },
09549       0 },
09550     /* 129 */
09551     { { 0x5173c18d65dbfl,0x0e339edad82f7l,0xcf1001c77bf94l,0x96b67022d26bdl,
09552         0x0ac66409ac773l },
09553       { 0xbb36fc6261cc3l,0xc9190e7e908b0l,0x45e6c10213f7bl,0x2f856541cebaal,
09554         0x0ce8e6975cc12l },
09555       0 },
09556     /* 130 */
09557     { { 0x21b41bc0a67d2l,0x0a444d248a0f1l,0x59b473762d476l,0xb4a80e044f1d6l,
09558         0x008fde365250bl },
09559       { 0xec3da848bf287l,0x82d3369d6eacel,0x2449482c2a621l,0x6cd73582dfdc9l,
09560         0x02f7e2fd2565dl },
09561       0 },
09562     /* 131 */
09563     { { 0xb92dbc3770fa7l,0x5c379043f9ae4l,0x7761171095e8dl,0x02ae54f34e9d1l,
09564         0x0c65be92e9077l },
09565       { 0x8a303f6fd0a40l,0xe3bcce784b275l,0xf9767bfe7d822l,0x3b3a7ae4f5854l,
09566         0x04bff8e47d119l },
09567       0 },
09568     /* 132 */
09569     { { 0x1d21f00ff1480l,0x7d0754db16cd4l,0xbe0f3ea2ab8fbl,0x967dac81d2efbl,
09570         0x03e4e4ae65772l },
09571       { 0x8f36d3c5303e6l,0x4b922623977e1l,0x324c3c03bd999l,0x60289ed70e261l,
09572         0x05388aefd58ecl },
09573       0 },
09574     /* 133 */
09575     { { 0x317eb5e5d7713l,0xee75de49daad1l,0x74fb26109b985l,0xbe0e32f5bc4fcl,
09576         0x05cf908d14f75l },
09577       { 0x435108e657b12l,0xa5b96ed9e6760l,0x970ccc2bfd421l,0x0ce20e29f51f8l,
09578         0x0a698ba4060f0l },
09579       0 },
09580     /* 134 */
09581     { { 0xb1686ef748fecl,0xa27e9d2cf973dl,0xe265effe6e755l,0xad8d630b6544cl,
09582         0x0b142ef8a7aebl },
09583       { 0x1af9f17d5770al,0x672cb3412fad3l,0xf3359de66af3bl,0x50756bd60d1bdl,
09584         0x0d1896a965851l },
09585       0 },
09586     /* 135 */
09587     { { 0x957ab33c41c08l,0xac5468e2e1ec5l,0xc472f6c87de94l,0xda3918816b73al,
09588         0x0267b0e0b7981l },
09589       { 0x54e5d8e62b988l,0x55116d21e76e5l,0xd2a6f99d8ddc7l,0x93934610faf03l,
09590         0x0b54e287aa111l },
09591       0 },
09592     /* 136 */
09593     { { 0x122b5178a876bl,0xff085104b40a0l,0x4f29f7651ff96l,0xd4e6050b31ab1l,
09594         0x084abb28b5f87l },
09595       { 0xd439f8270790al,0x9d85e3f46bd5el,0xc1e22122d6cb5l,0x564075f55c1b6l,
09596         0x0e5436f671765l },
09597       0 },
09598     /* 137 */
09599     { { 0x9025e2286e8d5l,0xb4864453be53fl,0x408e3a0353c95l,0xe99ed832f5bdel,
09600         0x00404f68b5b9cl },
09601       { 0x33bdea781e8e5l,0x18163c2f5bcadl,0x119caa33cdf50l,0xc701575769600l,
09602         0x03a4263df0ac1l },
09603       0 },
09604     /* 138 */
09605     { { 0x65ecc9aeb596dl,0xe7023c92b4c29l,0xe01396101ea03l,0xa3674704b4b62l,
09606         0x00ca8fd3f905el },
09607       { 0x23a42551b2b61l,0x9c390fcd06925l,0x392a63e1eb7a8l,0x0c33e7f1d2be0l,
09608         0x096dca2644ddbl },
09609       0 },
09610     /* 139 */
09611     { { 0xbb43a387510afl,0xa8a9a36a01203l,0xf950378846feal,0x59dcd23a57702l,
09612         0x04363e2123aadl },
09613       { 0x3a1c740246a47l,0xd2e55dd24dca4l,0xd8faf96b362b8l,0x98c4f9b086045l,
09614         0x0840e115cd8bbl },
09615       0 },
09616     /* 140 */
09617     { { 0x205e21023e8a7l,0xcdd8dc7a0bf12l,0x63a5ddfc808a8l,0xd6d4e292a2721l,
09618         0x05e0d6abd30del },
09619       { 0x721c27cfc0f64l,0x1d0e55ed8807al,0xd1f9db242eec0l,0xa25a26a7bef91l,
09620         0x07dea48f42945l },
09621       0 },
09622     /* 141 */
09623     { { 0xf6f1ce5060a81l,0x72f8f95615abdl,0x6ac268be79f9cl,0x16d1cfd36c540l,
09624         0x0abc2a2beebfdl },
09625       { 0x66f91d3e2eac7l,0x63d2dd04668acl,0x282d31b6f10bal,0xefc16790e3770l,
09626         0x04ea353946c7el },
09627       0 },
09628     /* 142 */
09629     { { 0xa2f8d5266309dl,0xc081945a3eed8l,0x78c5dc10a51c6l,0xffc3cecaf45a5l,
09630         0x03a76e6891c94l },
09631       { 0xce8a47d7b0d0fl,0x968f584a5f9aal,0xe697fbe963acel,0x646451a30c724l,
09632         0x08212a10a465el },
09633       0 },
09634     /* 143 */
09635     { { 0xc61c3cfab8caal,0x840e142390ef7l,0xe9733ca18eb8el,0xb164cd1dff677l,
09636         0x0aa7cab71599cl },
09637       { 0xc9273bc837bd1l,0xd0c36af5d702fl,0x423da49c06407l,0x17c317621292fl,
09638         0x040e38073fe06l },
09639       0 },
09640     /* 144 */
09641     { { 0x80824a7bf9b7cl,0x203fbe30d0f4fl,0x7cf9ce3365d23l,0x5526bfbe53209l,
09642         0x0e3604700b305l },
09643       { 0xb99116cc6c2c7l,0x08ba4cbee64dcl,0x37ad9ec726837l,0xe15fdcded4346l,
09644         0x06542d677a3del },
09645       0 },
09646     /* 145 */
09647     { { 0x2b6d07b6c377al,0x47903448be3f3l,0x0da8af76cb038l,0x6f21d6fdd3a82l,
09648         0x0a6534aee09bbl },
09649       { 0x1780d1035facfl,0x339dcb47e630al,0x447f39335e55al,0xef226ea50fe1cl,
09650         0x0f3cb672fdc9al },
09651       0 },
09652     /* 146 */
09653     { { 0x719fe3b55fd83l,0x6c875ddd10eb3l,0x5cea784e0d7a4l,0x70e733ac9fa90l,
09654         0x07cafaa2eaae8l },
09655       { 0x14d041d53b338l,0xa0ef87e6c69b8l,0x1672b0fe0acc0l,0x522efb93d1081l,
09656         0x00aab13c1b9bdl },
09657       0 },
09658     /* 147 */
09659     { { 0xce278d2681297l,0xb1b509546addcl,0x661aaf2cb350el,0x12e92dc431737l,
09660         0x04b91a6028470l },
09661       { 0xf109572f8ddcfl,0x1e9a911af4dcfl,0x372430e08ebf6l,0x1cab48f4360acl,
09662         0x049534c537232l },
09663       0 },
09664     /* 148 */
09665     { { 0xf7d71f07b7e9dl,0xa313cd516f83dl,0xc047ee3a478efl,0xc5ee78ef264b6l,
09666         0x0caf46c4fd65al },
09667       { 0xd0c7792aa8266l,0x66913684bba04l,0xe4b16b0edf454l,0x770f56e65168al,
09668         0x014ce9e5704c6l },
09669       0 },
09670     /* 149 */
09671     { { 0x45e3e965e8f91l,0xbacb0f2492994l,0x0c8a0a0d3aca1l,0x9a71d31cc70f9l,
09672         0x01bb708a53e4cl },
09673       { 0xa9e69558bdd7al,0x08018a26b1d5cl,0xc9cf1ec734a05l,0x0102b093aa714l,
09674         0x0f9d126f2da30l },
09675       0 },
09676     /* 150 */
09677     { { 0xbca7aaff9563el,0xfeb49914a0749l,0xf5f1671dd077al,0xcc69e27a0311bl,
09678         0x0807afcb9729el },
09679       { 0xa9337c9b08b77l,0x85443c7e387f8l,0x76fd8ba86c3a7l,0xcd8c85fafa594l,
09680         0x0751adcd16568l },
09681       0 },
09682     /* 151 */
09683     { { 0xa38b410715c0dl,0x718f7697f78ael,0x3fbf06dd113eal,0x743f665eab149l,
09684         0x029ec44682537l },
09685       { 0x4719cb50bebbcl,0xbfe45054223d9l,0xd2dedb1399ee5l,0x077d90cd5b3a8l,
09686         0x0ff9370e392a4l },
09687       0 },
09688     /* 152 */
09689     { { 0x2d69bc6b75b65l,0xd5266651c559al,0xde9d7d24188f8l,0xd01a28a9f33e3l,
09690         0x09776478ba2a9l },
09691       { 0x2622d929af2c7l,0x6d4e690923885l,0x89a51e9334f5dl,0x82face6cc7e5al,
09692         0x074a6313fac2fl },
09693       0 },
09694     /* 153 */
09695     { { 0x4dfddb75f079cl,0x9518e36fbbb2fl,0x7cd36dd85b07cl,0x863d1b6cfcf0el,
09696         0x0ab75be150ff4l },
09697       { 0x367c0173fc9b7l,0x20d2594fd081bl,0x4091236b90a74l,0x59f615fdbf03cl,
09698         0x04ebeac2e0b44l },
09699       0 },
09700     /* 154 */
09701     { { 0xc5fe75c9f2c53l,0x118eae9411eb6l,0x95ac5d8d25220l,0xaffcc8887633fl,
09702         0x0df99887b2c1bl },
09703       { 0x8eed2850aaecbl,0x1b01d6a272bb7l,0x1cdbcac9d4918l,0x4058978dd511bl,
09704         0x027b040a7779fl },
09705       0 },
09706     /* 155 */
09707     { { 0x05db7f73b2eb2l,0x088e1b2118904l,0x962327ee0df85l,0xa3f5501b71525l,
09708         0x0b393dd37e4cfl },
09709       { 0x30e7b3fd75165l,0xc2bcd33554a12l,0xf7b5022d66344l,0x34196c36f1be0l,
09710         0x009588c12d046l },
09711       0 },
09712     /* 156 */
09713     { { 0x6093f02601c3bl,0xf8cf5c335fe08l,0x94aff28fb0252l,0x648b955cf2808l,
09714         0x081c879a9db9fl },
09715       { 0xe687cc6f56c51l,0x693f17618c040l,0x059353bfed471l,0x1bc444f88a419l,
09716         0x0fa0d48f55fc1l },
09717       0 },
09718     /* 157 */
09719     { { 0xe1c9de1608e4dl,0x113582822cbc6l,0x57ec2d7010ddal,0x67d6f6b7ddc11l,
09720         0x08ea0e156b6a3l },
09721       { 0x4e02f2383b3b4l,0x943f01f53ca35l,0xde03ca569966bl,0xb5ac4ff6632b2l,
09722         0x03f5ab924fa00l },
09723       0 },
09724     /* 158 */
09725     { { 0xbb0d959739efbl,0xf4e7ebec0d337l,0x11a67d1c751b0l,0x256e2da52dd64l,
09726         0x08bc768872b74l },
09727       { 0xe3b7282d3d253l,0xa1f58d779fa5bl,0x16767bba9f679l,0xf34fa1cac168el,
09728         0x0b386f19060fcl },
09729       0 },
09730     /* 159 */
09731     { { 0x3c1352fedcfc2l,0x6262f8af0d31fl,0x57288c25396bfl,0x9c4d9a02b4eael,
09732         0x04cb460f71b06l },
09733       { 0x7b4d35b8095eal,0x596fc07603ae6l,0x614a16592bbf8l,0x5223e1475f66bl,
09734         0x052c0d50895efl },
09735       0 },
09736     /* 160 */
09737     { { 0xc210e15339848l,0xe870778c8d231l,0x956e170e87a28l,0x9c0b9d1de6616l,
09738         0x04ac3c9382bb0l },
09739       { 0xe05516998987dl,0xc4ae09f4d619bl,0xa3f933d8b2376l,0x05f41de0b7651l,
09740         0x0380d94c7e397l },
09741       0 },
09742     /* 161 */
09743     { { 0x355aa81542e75l,0xa1ee01b9b701al,0x24d708796c724l,0x37af6b3a29776l,
09744         0x02ce3e171de26l },
09745       { 0xfeb49f5d5bc1al,0x7e2777e2b5cfel,0x513756ca65560l,0x4e4d4feaac2f9l,
09746         0x02e6cd8520b62l },
09747       0 },
09748     /* 162 */
09749     { { 0x5954b8c31c31dl,0x005bf21a0c368l,0x5c79ec968533dl,0x9d540bd7626e7l,
09750         0x0ca17754742c6l },
09751       { 0xedafff6d2dbb2l,0xbd174a9d18cc6l,0xa4578e8fd0d8cl,0x2ce6875e8793al,
09752         0x0a976a7139cabl },
09753       0 },
09754     /* 163 */
09755     { { 0x51f1b93fb353dl,0x8b57fcfa720a6l,0x1b15281d75cabl,0x4999aa88cfa73l,
09756         0x08720a7170a1fl },
09757       { 0xe8d37693e1b90l,0x0b16f6dfc38c3l,0x52a8742d345dcl,0x893c8ea8d00abl,
09758         0x09719ef29c769l },
09759       0 },
09760     /* 164 */
09761     { { 0xeed8d58e35909l,0xdc33ddc116820l,0xe2050269366d8l,0x04c1d7f999d06l,
09762         0x0a5072976e157l },
09763       { 0xa37eac4e70b2el,0x576890aa8a002l,0x45b2a5c84dcf6l,0x7725cd71bf186l,
09764         0x099389c9df7b7l },
09765       0 },
09766     /* 165 */
09767     { { 0xc08f27ada7a4bl,0x03fd389366238l,0x66f512c3abe9dl,0x82e46b672e897l,
09768         0x0a88806aa202cl },
09769       { 0x2044ad380184el,0xc4126a8b85660l,0xd844f17a8cb78l,0xdcfe79d670c0al,
09770         0x00043bffb4738l },
09771       0 },
09772     /* 166 */
09773     { { 0x9b5dc36d5192el,0xd34590b2af8d5l,0x1601781acf885l,0x486683566d0a1l,
09774         0x052f3ef01ba6cl },
09775       { 0x6732a0edcb64dl,0x238068379f398l,0x040f3090a482cl,0x7e7516cbe5fa7l,
09776         0x03296bd899ef2l },
09777       0 },
09778     /* 167 */
09779     { { 0xaba89454d81d7l,0xef51eb9b3c476l,0x1c579869eade7l,0x71e9619a21cd8l,
09780         0x03b90febfaee5l },
09781       { 0x3023e5496f7cbl,0xd87fb51bc4939l,0x9beb5ce55be41l,0x0b1803f1dd489l,
09782         0x06e88069d9f81l },
09783       0 },
09784     /* 168 */
09785     { { 0x7ab11b43ea1dbl,0xa95259d292ce3l,0xf84f1860a7ff1l,0xad13851b02218l,
09786         0x0a7222beadefal },
09787       { 0xc78ec2b0a9144l,0x51f2fa59c5a2al,0x147ce385a0240l,0xc69091d1eca56l,
09788         0x0be94d523bc2al },
09789       0 },
09790     /* 169 */
09791     { { 0x4945e0b226ce7l,0x47967e8b7072fl,0x5a6c63eb8afd7l,0xc766edea46f18l,
09792         0x07782defe9be8l },
09793       { 0xd2aa43db38626l,0x8776f67ad1760l,0x4499cdb460ae7l,0x2e4b341b86fc5l,
09794         0x003838567a289l },
09795       0 },
09796     /* 170 */
09797     { { 0xdaefd79ec1a0fl,0xfdceb39c972d8l,0x8f61a953bbcd6l,0xb420f5575ffc5l,
09798         0x0dbd986c4adf7l },
09799       { 0xa881415f39eb7l,0xf5b98d976c81al,0xf2f717d6ee2fcl,0xbbd05465475dcl,
09800         0x08e24d3c46860l },
09801       0 },
09802     /* 171 */
09803     { { 0xd8e549a587390l,0x4f0cbec588749l,0x25983c612bb19l,0xafc846e07da4bl,
09804         0x0541a99c4407bl },
09805       { 0x41692624c8842l,0x2ad86c05ffdb2l,0xf7fcf626044c1l,0x35d1c59d14b44l,
09806         0x0c0092c49f57dl },
09807       0 },
09808     /* 172 */
09809     { { 0xc75c3df2e61efl,0xc82e1b35cad3cl,0x09f29f47e8841l,0x944dc62d30d19l,
09810         0x075e406347286l },
09811       { 0x41fc5bbc237d0l,0xf0ec4f01c9e7dl,0x82bd534c9537bl,0x858691c51a162l,
09812         0x05b7cb658c784l },
09813       0 },
09814     /* 173 */
09815     { { 0xa70848a28ead1l,0x08fd3b47f6964l,0x67e5b39802dc5l,0x97a19ae4bfd17l,
09816         0x07ae13eba8df0l },
09817       { 0x16ef8eadd384el,0xd9b6b2ff06fd2l,0xbcdb5f30361a2l,0xe3fd204b98784l,
09818         0x0787d8074e2a8l },
09819       0 },
09820     /* 174 */
09821     { { 0x25d6b757fbb1cl,0xb2ca201debc5el,0xd2233ffe47bddl,0x84844a55e9a36l,
09822         0x05c2228199ef2l },
09823       { 0xd4a8588315250l,0x2b827097c1773l,0xef5d33f21b21al,0xf2b0ab7c4ea1dl,
09824         0x0e45d37abbaf0l },
09825       0 },
09826     /* 175 */
09827     { { 0xf1e3428511c8al,0xc8bdca6cd3d2dl,0x27c39a7ebb229l,0xb9d3578a71a76l,
09828         0x0ed7bc12284dfl },
09829       { 0x2a6df93dea561l,0x8dd48f0ed1cf2l,0xbad23e85443f1l,0x6d27d8b861405l,
09830         0x0aac97cc945cal },
09831       0 },
09832     /* 176 */
09833     { { 0x4ea74a16bd00al,0xadf5c0bcc1eb5l,0xf9bfc06d839e9l,0xdc4e092bb7f11l,
09834         0x0318f97b31163l },
09835       { 0x0c5bec30d7138l,0x23abc30220eccl,0x022360644e8dfl,0xff4d2bb7972fbl,
09836         0x0fa41faa19a84l },
09837       0 },
09838     /* 177 */
09839     { { 0x2d974a6642269l,0xce9bb783bd440l,0x941e60bc81814l,0xe9e2398d38e47l,
09840         0x038bb6b2c1d26l },
09841       { 0xe4a256a577f87l,0x53dc11fe1cc64l,0x22807288b52d2l,0x01a5ff336abf6l,
09842         0x094dd0905ce76l },
09843       0 },
09844     /* 178 */
09845     { { 0xcf7dcde93f92al,0xcb89b5f315156l,0x995e750a01333l,0x2ae902404df9cl,
09846         0x092077867d25cl },
09847       { 0x71e010bf39d44l,0x2096bb53d7e24l,0xc9c3d8f5f2c90l,0xeb514c44b7b35l,
09848         0x081e8428bd29bl },
09849       0 },
09850     /* 179 */
09851     { { 0x9c2bac477199fl,0xee6b5ecdd96ddl,0xe40fd0e8cb8eel,0xa4b18af7db3fel,
09852         0x01b94ab62dbbfl },
09853       { 0x0d8b3ce47f143l,0xfc63f4616344fl,0xc59938351e623l,0x90eef18f270fcl,
09854         0x006a38e280555l },
09855       0 },
09856     /* 180 */
09857     { { 0xb0139b3355b49l,0x60b4ebf99b2e5l,0x269f3dc20e265l,0xd4f8c08ffa6bdl,
09858         0x0a7b36c2083d9l },
09859       { 0x15c3a1b3e8830l,0xe1a89f9c0b64dl,0x2d16930d5fceal,0x2a20cfeee4a2el,
09860         0x0be54c6b4a282l },
09861       0 },
09862     /* 181 */
09863     { { 0xdb3df8d91167cl,0x79e7a6625ed6cl,0x46ac7f4517c3fl,0x22bb7105648f3l,
09864         0x0bf30a5abeae0l },
09865       { 0x785be93828a68l,0x327f3ef0368e7l,0x92146b25161c3l,0xd13ae11b5feb5l,
09866         0x0d1c820de2732l },
09867       0 },
09868     /* 182 */
09869     { { 0xe13479038b363l,0x546b05e519043l,0x026cad158c11fl,0x8da34fe57abe6l,
09870         0x0b7d17bed68a1l },
09871       { 0xa5891e29c2559l,0x765bfffd8444cl,0x4e469484f7a03l,0xcc64498de4af7l,
09872         0x03997fd5e6412l },
09873       0 },
09874     /* 183 */
09875     { { 0x746828bd61507l,0xd534a64d2af20l,0xa8a15e329e132l,0x13e8ffeddfb08l,
09876         0x00eeb89293c6cl },
09877       { 0x69a3ea7e259f8l,0xe6d13e7e67e9bl,0xd1fa685ce1db7l,0xb6ef277318f6al,
09878         0x0228916f8c922l },
09879       0 },
09880     /* 184 */
09881     { { 0xae25b0a12ab5bl,0x1f957bc136959l,0x16e2b0ccc1117l,0x097e8058429edl,
09882         0x0ec05ad1d6e93l },
09883       { 0xba5beac3f3708l,0x3530b59d77157l,0x18234e531baf9l,0x1b3747b552371l,
09884         0x07d3141567ff1l },
09885       0 },
09886     /* 185 */
09887     { { 0x9c05cf6dfefabl,0x68dcb377077bdl,0xa38bb95be2f22l,0xd7a3e53ead973l,
09888         0x0e9ce66fc9bc1l },
09889       { 0xa15766f6a02a1l,0xdf60e600ed75al,0x8cdc1b938c087l,0x0651f8947f346l,
09890         0x0d9650b017228l },
09891       0 },
09892     /* 186 */
09893     { { 0xb4c4a5a057e60l,0xbe8def25e4504l,0x7c1ccbdcbccc3l,0xb7a2a63532081l,
09894         0x014d6699a804el },
09895       { 0xa8415db1f411al,0x0bf80d769c2c8l,0xc2f77ad09fbafl,0x598ab4deef901l,
09896         0x06f4c68410d43l },
09897       0 },
09898     /* 187 */
09899     { { 0x6df4e96c24a96l,0x85fcbd99a3872l,0xb2ae30a534dbcl,0x9abb3c466ef28l,
09900         0x04c4350fd6118l },
09901       { 0x7f716f855b8dal,0x94463c38a1296l,0xae9334341a423l,0x18b5c37e1413el,
09902         0x0a726d2425a31l },
09903       0 },
09904     /* 188 */
09905     { { 0x6b3ee948c1086l,0x3dcbd3a2e1dael,0x3d022f3f1de50l,0xf3923f35ed3f0l,
09906         0x013639e82cc6cl },
09907       { 0x938fbcdafaa86l,0xfb2654a2589acl,0x5051329f45bc5l,0x35a31963b26e4l,
09908         0x0ca9365e1c1a3l },
09909       0 },
09910     /* 189 */
09911     { { 0x5ac754c3b2d20l,0x17904e241b361l,0xc9d071d742a54l,0x72a5b08521c4cl,
09912         0x09ce29c34970bl },
09913       { 0x81f736d3e0ad6l,0x9ef2f8434c8ccl,0xce862d98060dal,0xaf9835ed1d1a6l,
09914         0x048c4abd7ab42l },
09915       0 },
09916     /* 190 */
09917     { { 0x1b0cc40c7485al,0xbbe5274dbfd22l,0x263d2e8ead455l,0x33cb493c76989l,
09918         0x078017c32f67bl },
09919       { 0x35769930cb5eel,0x940c408ed2b9dl,0x72f1a4dc0d14el,0x1c04f8b7bf552l,
09920         0x053cd0454de5cl },
09921       0 },
09922     /* 191 */
09923     { { 0x585fa5d28ccacl,0x56005b746ebcdl,0xd0123aa5f823el,0xfa8f7c79f0a1cl,
09924         0x0eea465c1d3d7l },
09925       { 0x0659f0551803bl,0x9f7ce6af70781l,0x9288e706c0b59l,0x91934195a7702l,
09926         0x01b6e42a47ae6l },
09927       0 },
09928     /* 192 */
09929     { { 0x0937cf67d04c3l,0xe289eeb8112e8l,0x2594d601e312bl,0xbd3d56b5d8879l,
09930         0x00224da14187fl },
09931       { 0xbb8630c5fe36fl,0x604ef51f5f87al,0x3b429ec580f3cl,0xff33964fb1bfbl,
09932         0x060838ef042bfl },
09933       0 },
09934     /* 193 */
09935     { { 0xcb2f27e0bbe99l,0xf304aa39ee432l,0xfa939037bda44l,0x16435f497c7a9l,
09936         0x0636eb2022d33l },
09937       { 0xd0e6193ae00aal,0xfe31ae6d2ffcfl,0xf93901c875a00l,0x8bacf43658a29l,
09938         0x08844eeb63921l },
09939       0 },
09940     /* 194 */
09941     { { 0x171d26b3bae58l,0x7117e39f3e114l,0x1a8eada7db3dfl,0x789ecd37bc7f8l,
09942         0x027ba83dc51fbl },
09943       { 0xf439ffbf54de5l,0x0bb5fe1a71a7dl,0xb297a48727703l,0xa4ab42ee8e35dl,
09944         0x0adb62d3487f3l },
09945       0 },
09946     /* 195 */
09947     { { 0x168a2a175df2al,0x4f618c32e99b1l,0x46b0916082aa0l,0xc8b2c9e4f2e71l,
09948         0x0b990fd7675e7l },
09949       { 0x9d96b4df37313l,0x79d0b40789082l,0x80877111c2055l,0xd18d66c9ae4a7l,
09950         0x081707ef94d10l },
09951       0 },
09952     /* 196 */
09953     { { 0x7cab203d6ff96l,0xfc0d84336097dl,0x042db4b5b851bl,0xaa5c268823c4dl,
09954         0x03792daead5a8l },
09955       { 0x18865941afa0bl,0x4142d83671528l,0xbe4e0a7f3e9e7l,0x01ba17c825275l,
09956         0x05abd635e94b0l },
09957       0 },
09958     /* 197 */
09959     { { 0xfa84e0ac4927cl,0x35a7c8cf23727l,0xadca0dfe38860l,0xb610a4bcd5ea4l,
09960         0x05995bf21846al },
09961       { 0xf860b829dfa33l,0xae958fc18be90l,0x8630366caafe2l,0x411e9b3baf447l,
09962         0x044c32ca2d483l },
09963       0 },
09964     /* 198 */
09965     { { 0xa97f1e40ed80cl,0xb131d2ca82a74l,0xc2d6ad95f938cl,0xa54c53f2124b7l,
09966         0x01f2162fb8082l },
09967       { 0x67cc5720b173el,0x66085f12f97e4l,0xc9d65dc40e8a6l,0x07c98cebc20e4l,
09968         0x08f1d402bc3e9l },
09969       0 },
09970     /* 199 */
09971     { { 0x92f9cfbc4058al,0xb6292f56704f5l,0xc1d8c57b15e14l,0xdbf9c55cfe37bl,
09972         0x0b1980f43926el },
09973       { 0x33e0932c76b09l,0x9d33b07f7898cl,0x63bb4611df527l,0x8e456f08ead48l,
09974         0x02828ad9b3744l },
09975       0 },
09976     /* 200 */
09977     { { 0x722c4c4cf4ac5l,0x3fdde64afb696l,0x0890832f5ac1al,0xb3900551baa2el,
09978         0x04973f1275a14l },
09979       { 0xd8335322eac5dl,0xf50bd9b568e59l,0x25883935e07eel,0x8ac7ab36720fal,
09980         0x06dac8ed0db16l },
09981       0 },
09982     /* 201 */
09983     { { 0x545aeeda835efl,0xd21d10ed51f7bl,0x3741b094aa113l,0xde4c035a65e01l,
09984         0x04b23ef5920b9l },
09985       { 0xbb6803c4c7341l,0x6d3f58bc37e82l,0x51e3ee8d45770l,0x9a4e73527863al,
09986         0x04dd71534ddf4l },
09987       0 },
09988     /* 202 */
09989     { { 0x4467295476cd9l,0x2fe31a725bbf9l,0xc4b67e0648d07l,0x4dbb1441c8b8fl,
09990         0x0fd3170002f4al },
09991       { 0x43ff48995d0e1l,0xd10ef729aa1cbl,0x179898276e695l,0xf365e0d5f9764l,
09992         0x014fac58c9569l },
09993       0 },
09994     /* 203 */
09995     { { 0xa0065f312ae18l,0xc0fcc93fc9ad9l,0xa7d284651958dl,0xda50d9a142408l,
09996         0x0ed7c765136abl },
09997       { 0x70f1a25d4abbcl,0xf3f1a113ea462l,0xb51952f9b5dd8l,0x9f53c609b0755l,
09998         0x0fefcb7f74d2el },
09999       0 },
10000     /* 204 */
10001     { { 0x9497aba119185l,0x30aac45ba4bd0l,0xa521179d54e8cl,0xd80b492479deal,
10002         0x01801a57e87e0l },
10003       { 0xd3f8dfcafffb0l,0x0bae255240073l,0xb5fdfbc6cf33cl,0x1064781d763b5l,
10004         0x09f8fc11e1eadl },
10005       0 },
10006     /* 205 */
10007     { { 0x3a1715e69544cl,0x67f04b7813158l,0x78a4c320eaf85l,0x69a91e22a8fd2l,
10008         0x0a9d3809d3d3al },
10009       { 0xc2c2c59a2da3bl,0xf61895c847936l,0x3d5086938ccbcl,0x8ef75e65244e6l,
10010         0x03006b9aee117l },
10011       0 },
10012     /* 206 */
10013     { { 0x1f2b0c9eead28l,0x5d89f4dfbc0bbl,0x2ce89397eef63l,0xf761074757fdbl,
10014         0x00ab85fd745f8l },
10015       { 0xa7c933e5b4549l,0x5c97922f21ecdl,0x43b80404be2bbl,0x42c2261a1274bl,
10016         0x0b122d67511e9l },
10017       0 },
10018     /* 207 */
10019     { { 0x607be66a5ae7al,0xfa76adcbe33bel,0xeb6e5c501e703l,0xbaecaf9043014l,
10020         0x09f599dc1097dl },
10021       { 0x5b7180ff250edl,0x74349a20dc6d7l,0x0b227a38eb915l,0x4b78425605a41l,
10022         0x07d5528e08a29l },
10023       0 },
10024     /* 208 */
10025     { { 0x58f6620c26defl,0xea582b2d1ef0fl,0x1ce3881025585l,0x1730fbe7d79b0l,
10026         0x028ccea01303fl },
10027       { 0xabcd179644ba5l,0xe806fff0b8d1dl,0x6b3e17b1fc643l,0x13bfa60a76fc6l,
10028         0x0c18baf48a1d0l },
10029       0 },
10030     /* 209 */
10031     { { 0x638c85dc4216dl,0x67206142ac34el,0x5f5064a00c010l,0x596bd453a1719l,
10032         0x09def809db7a9l },
10033       { 0x8642e67ab8d2cl,0x336237a2b641el,0x4c4218bb42404l,0x8ce57d506a6d6l,
10034         0x00357f8b06880l },
10035       0 },
10036     /* 210 */
10037     { { 0xdbe644cd2cc88l,0x8df0b8f39d8e9l,0xd30a0c8cc61c2l,0x98874a309874cl,
10038         0x0e4a01add1b48l },
10039       { 0x1eeacf57cd8f9l,0x3ebd594c482edl,0xbd2f7871b767dl,0xcc30a7295c717l,
10040         0x0466d7d79ce10l },
10041       0 },
10042     /* 211 */
10043     { { 0x318929dada2c7l,0xc38f9aa27d47dl,0x20a59e14fa0a6l,0xad1a90e4fd288l,
10044         0x0c672a522451el },
10045       { 0x07cc85d86b655l,0x3bf9ad4af1306l,0x71172a6f0235dl,0x751399a086805l,
10046         0x05e3d64faf2a6l },
10047       0 },
10048     /* 212 */
10049     { { 0x410c79b3b4416l,0x85eab26d99aa6l,0xb656a74cd8fcfl,0x42fc5ebff74adl,
10050         0x06c8a7a95eb8el },
10051       { 0x60ba7b02a63bdl,0x038b8f004710cl,0x12d90b06b2f23l,0xca918c6c37383l,
10052         0x0348ae422ad82l },
10053       0 },
10054     /* 213 */
10055     { { 0x746635ccda2fbl,0xa18e0726d27f4l,0x92b1f2022accal,0x2d2e85adf7824l,
10056         0x0c1074de0d9efl },
10057       { 0x3ce44ae9a65b3l,0xac05d7151bfcfl,0xe6a9788fd71e4l,0x4ffcd4711f50cl,
10058         0x0fbadfbdbc9e5l },
10059       0 },
10060     /* 214 */
10061     { { 0x3f1cd20a99363l,0x8f6cf22775171l,0x4d359b2b91565l,0x6fcd968175cd2l,
10062         0x0b7f976b48371l },
10063       { 0x8e24d5d6dbf74l,0xfd71c3af36575l,0x243dfe38d23bal,0xc80548f477600l,
10064         0x0f4d41b2ecafcl },
10065       0 },
10066     /* 215 */
10067     { { 0x1cf28fdabd48dl,0x3632c078a451fl,0x17146e9ce81bel,0x0f106ace29741l,
10068         0x0180824eae016l },
10069       { 0x7698b66e58358l,0x52ce6ca358038l,0xe41e6c5635687l,0x6d2582380e345l,
10070         0x067e5f63983cfl },
10071       0 },
10072     /* 216 */
10073     { { 0xccb8dcf4899efl,0xf09ebb44c0f89l,0x2598ec9949015l,0x1fc6546f9276bl,
10074         0x09fef789a04c1l },
10075       { 0x67ecf53d2a071l,0x7fa4519b096d3l,0x11e2eefb10e1al,0x4e20ca6b3fb06l,
10076         0x0bc80c181a99cl },
10077       0 },
10078     /* 217 */
10079     { { 0x536f8e5eb82e6l,0xc7f56cb920972l,0x0b5da5e1a484fl,0xdf10c78e21715l,
10080         0x049270e629f8cl },
10081       { 0x9b7bbea6b50adl,0xc1a2388ffc1a3l,0x107197b9a0284l,0x2f7f5403eb178l,
10082         0x0d2ee52f96137l },
10083       0 },
10084     /* 218 */
10085     { { 0xcd28588e0362al,0xa78fa5d94dd37l,0x434a526442fa8l,0xb733aff836e5al,
10086         0x0dfb478bee5abl },
10087       { 0xf1ce7673eede6l,0xd42b5b2f04a91l,0x530da2fa5390al,0x473a5e66f7bf5l,
10088         0x0d9a140b408dfl },
10089       0 },
10090     /* 219 */
10091     { { 0x221b56e8ea498l,0x293563ee090e0l,0x35d2ade623478l,0x4b1ae06b83913l,
10092         0x0760c058d623fl },
10093       { 0x9b58cc198aa79l,0xd2f07aba7f0b8l,0xde2556af74890l,0x04094e204110fl,
10094         0x07141982d8f19l },
10095       0 },
10096     /* 220 */
10097     { { 0xa0e334d4b0f45l,0x38392a94e16f0l,0x3c61d5ed9280bl,0x4e473af324c6bl,
10098         0x03af9d1ce89d5l },
10099       { 0xf798120930371l,0x4c21c17097fd8l,0xc42309beda266l,0x7dd60e9545dcdl,
10100         0x0b1f815c37395l },
10101       0 },
10102     /* 221 */
10103     { { 0xaa78e89fec44al,0x473caa4caf84fl,0x1b6a624c8c2ael,0xf052691c807dcl,
10104         0x0a41aed141543l },
10105       { 0x353997d5ffe04l,0xdf625b6e20424l,0x78177758bacb2l,0x60ef85d660be8l,
10106         0x0d6e9c1dd86fbl },
10107       0 },
10108     /* 222 */
10109     { { 0x2e97ec6853264l,0xb7e2304a0b3aal,0x8eae9be771533l,0xf8c21b912bb7bl,
10110         0x09c9c6e10ae9bl },
10111       { 0x09a59e030b74cl,0x4d6a631e90a23l,0x49b79f24ed749l,0x61b689f44b23al,
10112         0x0566bd59640fal },
10113       0 },
10114     /* 223 */
10115     { { 0xc0118c18061f3l,0xd37c83fc70066l,0x7273245190b25l,0x345ef05fc8e02l,
10116         0x0cf2c7390f525l },
10117       { 0xbceb410eb30cfl,0xba0d77703aa09l,0x50ff255cfd2ebl,0x0979e842c43a1l,
10118         0x002f517558aa2l },
10119       0 },
10120     /* 224 */
10121     { { 0xef794addb7d07l,0x4224455500396l,0x78aa3ce0b4fc7l,0xd97dfaff8eaccl,
10122         0x014e9ada5e8d4l },
10123       { 0x480a12f7079e2l,0xcde4b0800edaal,0x838157d45baa3l,0x9ae801765e2d7l,
10124         0x0a0ad4fab8e9dl },
10125       0 },
10126     /* 225 */
10127     { { 0xb76214a653618l,0x3c31eaaa5f0bfl,0x4949d5e187281l,0xed1e1553e7374l,
10128         0x0bcd530b86e56l },
10129       { 0xbe85332e9c47bl,0xfeb50059ab169l,0x92bfbb4dc2776l,0x341dcdba97611l,
10130         0x0909283cf6979l },
10131       0 },
10132     /* 226 */
10133     { { 0x0032476e81a13l,0x996217123967bl,0x32e19d69bee1al,0x549a08ed361bdl,
10134         0x035eeb7c9ace1l },
10135       { 0x0ae5a7e4e5bdcl,0xd3b6ceec6e128l,0xe266bc12dcd2cl,0xe86452e4224c6l,
10136         0x09a8b2cf4448al },
10137       0 },
10138     /* 227 */
10139     { { 0x71bf209d03b59l,0xa3b65af2abf64l,0xbd5eec9c90e62l,0x1379ff7ff168el,
10140         0x06bdb60f4d449l },
10141       { 0xafebc8a55bc30l,0x1610097fe0dadl,0xc1e3bddc79eadl,0x08a942e197414l,
10142         0x001ec3cfd94bal },
10143       0 },
10144     /* 228 */
10145     { { 0x277ebdc9485c2l,0x7922fb10c7ba6l,0x0a28d8a48cc9al,0x64f64f61d60f7l,
10146         0x0d1acb1c04754l },
10147       { 0x902b126f36612l,0x4ee0618d8bd26l,0x08357ee59c3a4l,0x26c24df8a8133l,
10148         0x07dcd079d4056l },
10149       0 },
10150     /* 229 */
10151     { { 0x7d4d3f05a4b48l,0x52372307725cel,0x12a915aadcd29l,0x19b8d18f79718l,
10152         0x00bf53589377dl },
10153       { 0xcd95a6c68ea73l,0xca823a584d35el,0x473a723c7f3bbl,0x86fc9fb674c6fl,
10154         0x0d28be4d9e166l },
10155       0 },
10156     /* 230 */
10157     { { 0xb990638fa8e4bl,0x6e893fd8fc5d2l,0x36fb6fc559f18l,0x88ce3a6de2aa4l,
10158         0x0d76007aa510fl },
10159       { 0x0aab6523a4988l,0x4474dd02732d1l,0x3407278b455cfl,0xbb017f467082al,
10160         0x0f2b52f68b303l },
10161       0 },
10162     /* 231 */
10163     { { 0x7eafa9835b4cal,0xfcbb669cbc0d5l,0x66431982d2232l,0xed3a8eeeb680cl,
10164         0x0d8dbe98ecc5al },
10165       { 0x9be3fc5a02709l,0xe5f5ba1fa8cbal,0x10ea85230be68l,0x9705febd43cdfl,
10166         0x0e01593a3ee55l },
10167       0 },
10168     /* 232 */
10169     { { 0x5af50ea75a0a6l,0xac57858033d3el,0x0176406512226l,0xef066fe6d50fdl,
10170         0x0afec07b1aeb8l },
10171       { 0x9956780bb0a31l,0xcc37309aae7fbl,0x1abf3896f1af3l,0xbfdd9153a15a0l,
10172         0x0a71b93546e2dl },
10173       0 },
10174     /* 233 */
10175     { { 0xe12e018f593d2l,0x28a078122bbf8l,0xba4f2add1a904l,0x23d9150505db0l,
10176         0x053a2005c6285l },
10177       { 0x8b639e7f2b935l,0x5ac182961a07cl,0x518ca2c2bff97l,0x8e3d86bceea77l,
10178         0x0bf47d19b3d58l },
10179       0 },
10180     /* 234 */
10181     { { 0x967a7dd7665d5l,0x572f2f4de5672l,0x0d4903f4e3030l,0xa1b6144005ae8l,
10182         0x0001c2c7f39c9l },
10183       { 0xa801469efc6d6l,0xaa7bc7a724143l,0x78150a4c810bdl,0xb99b5f65670bal,
10184         0x0fdadf8e786ffl },
10185       0 },
10186     /* 235 */
10187     { { 0x8cb88ffc00785l,0x913b48eb67fd3l,0xf368fbc77fa75l,0x3c940454d055bl,
10188         0x03a838e4d5aa4l },
10189       { 0x663293e97bb9al,0x63441d94d9561l,0xadb2a839eb933l,0x1da3515591a60l,
10190         0x03cdb8257873el },
10191       0 },
10192     /* 236 */
10193     { { 0x140a97de77eabl,0x0d41648109137l,0xeb1d0dff7e1c5l,0x7fba762dcad2cl,
10194         0x05a60cc89f1f5l },
10195       { 0x3638240d45673l,0x195913c65580bl,0xd64b7411b82bel,0x8fc0057284b8dl,
10196         0x0922ff56fdbfdl },
10197       0 },
10198     /* 237 */
10199     { { 0x65deec9a129a1l,0x57cc284e041b2l,0xebfbe3ca5b1cel,0xcd6204380c46cl,
10200         0x072919a7df6c5l },
10201       { 0xf453a8fb90f9al,0x0b88e4031b298l,0x96f1856d719c0l,0x089ae32c0e777l,
10202         0x05e7917803624l },
10203       0 },
10204     /* 238 */
10205     { { 0x6ec557f63cdfbl,0x71f1cae4fd5c1l,0x60597ca8e6a35l,0x2fabfce26bea5l,
10206         0x04e0a5371e24cl },
10207       { 0xa40d3a5765357l,0x440d73a2b4276l,0x1d11a323c89afl,0x04eeb8f370ae4l,
10208         0x0f5ff7818d566l },
10209       0 },
10210     /* 239 */
10211     { { 0x3e3fe1a09df21l,0x8ee66e8e47fbfl,0x9c8901526d5d2l,0x5e642096bd0a2l,
10212         0x0e41df0e9533fl },
10213       { 0xfda40b3ba9e3fl,0xeb2604d895305l,0xf0367c7f2340cl,0x155f0866e1927l,
10214         0x08edd7d6eac4fl },
10215       0 },
10216     /* 240 */
10217     { { 0x1dc0e0bfc8ff3l,0x2be936f42fc9al,0xca381ef14efd8l,0xee9667016f7ccl,
10218         0x01432c1caed8al },
10219       { 0x8482970b23c26l,0x730735b273ec6l,0xaef0f5aa64fe8l,0xd2c6e389f6e5el,
10220         0x0caef480b5ac8l },
10221       0 },
10222     /* 241 */
10223     { { 0x5c97875315922l,0x713063cca5524l,0x64ef2cbd82951l,0xe236f3ce60d0bl,
10224         0x0d0ba177e8efal },
10225       { 0x9ae8fb1b3af60l,0xe53d2da20e53al,0xf9eef281a796al,0xae1601d63605dl,
10226         0x0f31c957c1c54l },
10227       0 },
10228     /* 242 */
10229     { { 0x58d5249cc4597l,0xb0bae0a028c0fl,0x34a814adc5015l,0x7c3aefc5fc557l,
10230         0x0013404cb96e1l },
10231       { 0xe2585c9a824bfl,0x5e001eaed7b29l,0x1ef68acd59318l,0x3e6c8d6ee6826l,
10232         0x06f377c4b9193l },
10233       0 },
10234     /* 243 */
10235     { { 0x3bad1a8333fd2l,0x025a2a95b89f9l,0xaf75acea89302l,0x9506211e5037el,
10236         0x06dba3e4ed2d0l },
10237       { 0xef98cd04399cdl,0x6ee6b73adea48l,0x17ecaf31811c6l,0xf4a772f60752cl,
10238         0x0f13cf3423becl },
10239       0 },
10240     /* 244 */
10241     { { 0xb9ec0a919e2ebl,0x95f62c0f68ceel,0xaba229983a9a1l,0xbad3cfba3bb67l,
10242         0x0c83fa9a9274bl },
10243       { 0xd1b0b62fa1ce0l,0xf53418efbf0d7l,0x2706f04e58b60l,0x2683bfa8ef9e5l,
10244         0x0b49d70f45d70l },
10245       0 },
10246     /* 245 */
10247     { { 0xc7510fad5513bl,0xecb1751e2d914l,0x9fb9d5905f32el,0xf1cf6d850418dl,
10248         0x059cfadbb0c30l },
10249       { 0x7ac2355cb7fd6l,0xb8820426a3e16l,0x0a78864249367l,0x4b67eaeec58c9l,
10250         0x05babf362354al },
10251       0 },
10252     /* 246 */
10253     { { 0x981d1ee424865l,0x78f2e5577f37cl,0x9e0c0588b0028l,0xc8f0702970f1bl,
10254         0x06188c6a79026l },
10255       { 0x9a19bd0f244dal,0x5cfb08087306fl,0xf2136371eccedl,0xb9d935470f9b9l,
10256         0x0993fe475df50l },
10257       0 },
10258     /* 247 */
10259     { { 0x31cdf9b2c3609l,0xc02c46d4ea68el,0xa77510184eb19l,0x616b7ac9ec1a9l,
10260         0x081f764664c80l },
10261       { 0xc2a5a75fbe978l,0xd3f183b3561d7l,0x01dd2bf6743fel,0x060d838d1f045l,
10262         0x0564a812a5fe9l },
10263       0 },
10264     /* 248 */
10265     { { 0xa64f4fa817d1dl,0x44bea82e0f7a5l,0xd57f9aa55f968l,0x1d6cb5ff5a0fcl,
10266         0x0226bf3cf00e5l },
10267       { 0x1a9f92f2833cfl,0x5a4f4f89a8d6dl,0xf3f7f7720a0a3l,0x783611536c498l,
10268         0x068779f47ff25l },
10269       0 },
10270     /* 249 */
10271     { { 0x0c1c173043d08l,0x741fc020fa79bl,0xa6d26d0a54467l,0x2e0bd3767e289l,
10272         0x097bcb0d1eb09l },
10273       { 0x6eaa8f32ed3c3l,0x51b281bc482abl,0xfa178f3c8a4f1l,0x46554d1bf4f3bl,
10274         0x0a872ffe80a78l },
10275       0 },
10276     /* 250 */
10277     { { 0xb7935a32b2086l,0x0e8160f486b1al,0xb6ae6bee1eb71l,0xa36a9bd0cd913l,
10278         0x002812bfcb732l },
10279       { 0xfd7cacf605318l,0x50fdfd6d1da63l,0x102d619646e5dl,0x96afa1d683982l,
10280         0x007391cc9fe53l },
10281       0 },
10282     /* 251 */
10283     { { 0x157f08b80d02bl,0xd162877f7fc50l,0x8d542ae6b8333l,0x2a087aca1af87l,
10284         0x0355d2adc7e6dl },
10285       { 0xf335a287386e1l,0x94f8e43275b41l,0x79989eafd272al,0x3a79286ca2cdel,
10286         0x03dc2b1e37c2al },
10287       0 },
10288     /* 252 */
10289     { { 0x9d21c04581352l,0x25376782bed68l,0xfed701f0a00c8l,0x846b203bd5909l,
10290         0x0c47869103ccdl },
10291       { 0xa770824c768edl,0x026841f6575dbl,0xaccce0e72feeal,0x4d3273313ed56l,
10292         0x0ccc42968d5bbl },
10293       0 },
10294     /* 253 */
10295     { { 0x50de13d7620b9l,0x8a5992a56a94el,0x75487c9d89a5cl,0x71cfdc0076406l,
10296         0x0e147eb42aa48l },
10297       { 0xab4eeacf3ae46l,0xfb50350fbe274l,0x8c840eafd4936l,0x96e3df2afe474l,
10298         0x0239ac047080el },
10299       0 },
10300     /* 254 */
10301     { { 0xd1f352bfee8d4l,0xcffa7b0fec481l,0xce9af3cce80b5l,0xe59d105c4c9e2l,
10302         0x0c55fa1a3f5f7l },
10303       { 0x6f14e8257c227l,0x3f342be00b318l,0xa904fb2c5b165l,0xb69909afc998al,
10304         0x0094cd99cd4f4l },
10305       0 },
10306     /* 255 */
10307     { { 0x81c84d703bebal,0x5032ceb2918a9l,0x3bd49ec8631d1l,0xad33a445f2c9el,
10308         0x0b90a30b642abl },
10309       { 0x5404fb4a5abf9l,0xc375db7603b46l,0xa35d89f004750l,0x24f76f9a42cccl,
10310         0x0019f8b9a1b79l },
10311       0 },
10312 };
10313 
10314 /* Multiply the base point of P256 by the scalar and return the result.
10315  * If map is true then convert result to affine co-ordinates.
10316  *
10317  * r     Resulting point.
10318  * k     Scalar to multiply by.
10319  * map   Indicates whether to convert result to affine.
10320  * heap  Heap to use for allocation.
10321  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
10322  */
10323 static int sp_256_ecc_mulmod_base_5(sp_point* r, sp_digit* k,
10324         int map, void* heap)
10325 {
10326     return sp_256_ecc_mulmod_stripe_5(r, &p256_base, p256_table,
10327                                       k, map, heap);
10328 }
10329 
10330 #endif
10331 
10332 /* Multiply the base point of P256 by the scalar and return the result.
10333  * If map is true then convert result to affine co-ordinates.
10334  *
10335  * km    Scalar to multiply by.
10336  * r     Resulting point.
10337  * map   Indicates whether to convert result to affine.
10338  * heap  Heap to use for allocation.
10339  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
10340  */
10341 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
10342 {
10343 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
10344     sp_point p;
10345     sp_digit kd[5];
10346 #endif
10347     sp_point* point;
10348     sp_digit* k = NULL;
10349     int err = MP_OKAY;
10350 #ifdef HAVE_INTEL_AVX2
10351     word32 cpuid_flags = cpuid_get_flags();
10352 #endif
10353 
10354     err = sp_ecc_point_new(heap, p, point);
10355 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10356     if (err == MP_OKAY) {
10357         k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
10358         if (k == NULL)
10359             err = MEMORY_E;
10360     }
10361 #else
10362     k = kd;
10363 #endif
10364     if (err == MP_OKAY) {
10365         sp_256_from_mp(k, 5, km);
10366 
10367 #ifdef HAVE_INTEL_AVX2
10368         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
10369             err = sp_256_ecc_mulmod_base_avx2_5(point, k, map, heap);
10370         else
10371 #endif
10372             err = sp_256_ecc_mulmod_base_5(point, k, map, heap);
10373     }
10374     if (err == MP_OKAY)
10375         err = sp_256_point_to_ecc_point_5(point, r);
10376 
10377 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10378     if (k != NULL)
10379         XFREE(k, heap, DYNAMIC_TYPE_ECC);
10380 #endif
10381     sp_ecc_point_free(point, 0, heap);
10382 
10383     return err;
10384 }
10385 
10386 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
10387 /* Returns 1 if the number of zero.
10388  * Implementation is constant time.
10389  *
10390  * a  Number to check.
10391  * returns 1 if the number is zero and 0 otherwise.
10392  */
10393 static int sp_256_iszero_5(const sp_digit* a)
10394 {
10395     return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0;
10396 }
10397 
10398 #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
10399 /* Add 1 to a. (a = a + 1)
10400  *
10401  * r  A single precision integer.
10402  * a  A single precision integer.
10403  */
10404 SP_NOINLINE static void sp_256_add_one_5(sp_digit* a)
10405 {
10406     a[0]++;
10407     sp_256_norm_5(a);
10408 }
10409 
10410 /* Read big endian unsigned byte aray into r.
10411  *
10412  * r  A single precision integer.
10413  * a  Byte array.
10414  * n  Number of bytes in array to read.
10415  */
10416 static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
10417 {
10418     int i, j = 0, s = 0;
10419 
10420     r[0] = 0;
10421     for (i = n-1; i >= 0; i--) {
10422         r[j] |= ((sp_digit)a[i]) << s;
10423         if (s >= 44) {
10424             r[j] &= 0xfffffffffffffl;
10425             s = 52 - s;
10426             if (j + 1 >= max)
10427                 break;
10428             r[++j] = a[i] >> s;
10429             s = 8 - s;
10430         }
10431         else
10432             s += 8;
10433     }
10434 
10435     for (j++; j < max; j++)
10436         r[j] = 0;
10437 }
10438 
10439 /* Generates a scalar that is in the range 1..order-1.
10440  *
10441  * rng  Random number generator.
10442  * k    Scalar value.
10443  * returns RNG failures, MEMORY_E when memory allocation fails and
10444  * MP_OKAY on success.
10445  */
10446 static int sp_256_ecc_gen_k_5(WC_RNG* rng, sp_digit* k)
10447 {
10448     int err;
10449     byte buf[32];
10450 
10451     do {
10452         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
10453         if (err == 0) {
10454             sp_256_from_bin(k, 5, buf, sizeof(buf));
10455             if (sp_256_cmp_5(k, p256_order2) < 0) {
10456                 sp_256_add_one_5(k);
10457                 break;
10458             }
10459         }
10460     }
10461     while (err == 0);
10462 
10463     return err;
10464 }
10465 
10466 /* Makes a random EC key pair.
10467  *
10468  * rng   Random number generator.
10469  * priv  Generated private value.
10470  * pub   Generated public point.
10471  * heap  Heap to use for allocation.
10472  * returns ECC_INF_E when the point does not have the correct order, RNG
10473  * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
10474  */
10475 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
10476 {
10477 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
10478     sp_point p;
10479     sp_digit kd[5];
10480 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
10481     sp_point inf;
10482 #endif
10483 #endif
10484     sp_point* point;
10485     sp_digit* k = NULL;
10486 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
10487     sp_point* infinity;
10488 #endif
10489     int err;
10490 #ifdef HAVE_INTEL_AVX2
10491     word32 cpuid_flags = cpuid_get_flags();
10492 #endif
10493 
10494     (void)heap;
10495 
10496     err = sp_ecc_point_new(heap, p, point);
10497 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
10498     if (err == MP_OKAY)
10499         err = sp_ecc_point_new(heap, inf, infinity);
10500 #endif
10501 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10502     if (err == MP_OKAY) {
10503         k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
10504         if (k == NULL)
10505             err = MEMORY_E;
10506     }
10507 #else
10508     k = kd;
10509 #endif
10510 
10511     if (err == MP_OKAY)
10512         err = sp_256_ecc_gen_k_5(rng, k);
10513     if (err == MP_OKAY) {
10514 #ifdef HAVE_INTEL_AVX2
10515         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
10516             err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, NULL);
10517         else
10518 #endif
10519             err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
10520     }
10521 
10522 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
10523     if (err == MP_OKAY) {
10524 #ifdef HAVE_INTEL_AVX2
10525         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
10526             err = sp_256_ecc_mulmod_avx2_5(infinity, point, p256_order, 1,
10527                                                                           NULL);
10528         }
10529         else
10530 #endif
10531             err = sp_256_ecc_mulmod_5(infinity, point, p256_order, 1, NULL);
10532     }
10533     if (err == MP_OKAY) {
10534         if (!sp_256_iszero_5(point->x) || !sp_256_iszero_5(point->y))
10535             err = ECC_INF_E;
10536     }
10537 #endif
10538 
10539     if (err == MP_OKAY)
10540         err = sp_256_to_mp(k, priv);
10541     if (err == MP_OKAY)
10542         err = sp_256_point_to_ecc_point_5(point, pub);
10543 
10544 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10545     if (k != NULL)
10546         XFREE(k, heap, DYNAMIC_TYPE_ECC);
10547 #endif
10548 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
10549     sp_ecc_point_free(infinity, 1, heap);
10550 #endif
10551     sp_ecc_point_free(point, 1, heap);
10552 
10553     return err;
10554 }
10555 
10556 #ifdef HAVE_ECC_DHE
10557 /* Write r as big endian to byte aray.
10558  * Fixed length number of bytes written: 32
10559  *
10560  * r  A single precision integer.
10561  * a  Byte array.
10562  */
10563 static void sp_256_to_bin(sp_digit* r, byte* a)
10564 {
10565     int i, j, s = 0, b;
10566 
10567     for (i=0; i<4; i++) {
10568         r[i+1] += r[i] >> 52;
10569         r[i] &= 0xfffffffffffffl;
10570     }
10571     j = 256 / 8 - 1;
10572     a[j] = 0;
10573     for (i=0; i<5 && j>=0; i++) {
10574         b = 0;
10575         a[j--] |= r[i] << s; b += 8 - s;
10576         if (j < 0)
10577             break;
10578         while (b < 52) {
10579             a[j--] = r[i] >> b; b += 8;
10580             if (j < 0)
10581                 break;
10582         }
10583         s = 8 - (b - 52);
10584         if (j >= 0)
10585             a[j] = 0;
10586         if (s != 0)
10587             j++;
10588     }
10589 }
10590 
10591 /* Multiply the point by the scalar and serialize the X ordinate.
10592  * The number is 0 padded to maximum size on output.
10593  *
10594  * priv    Scalar to multiply the point by.
10595  * pub     Point to multiply.
10596  * out     Buffer to hold X ordinate.
10597  * outLen  On entry, size of the buffer in bytes.
10598  *         On exit, length of data in buffer in bytes.
10599  * heap    Heap to use for allocation.
10600  * returns BUFFER_E if the buffer is to small for output size,
10601  * MEMORY_E when memory allocation fails and MP_OKAY on success.
10602  */
10603 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
10604                           word32* outLen, void* heap)
10605 {
10606 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
10607     sp_point p;
10608     sp_digit kd[5];
10609 #endif
10610     sp_point* point = NULL;
10611     sp_digit* k = NULL;
10612     int err = MP_OKAY;
10613 #ifdef HAVE_INTEL_AVX2
10614     word32 cpuid_flags = cpuid_get_flags();
10615 #endif
10616 
10617     if (*outLen < 32)
10618         err = BUFFER_E;
10619 
10620     if (err == MP_OKAY)
10621         err = sp_ecc_point_new(heap, p, point);
10622 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10623     if (err == MP_OKAY) {
10624         k = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
10625         if (k == NULL)
10626             err = MEMORY_E;
10627     }
10628 #else
10629     k = kd;
10630 #endif
10631 
10632     if (err == MP_OKAY) {
10633         sp_256_from_mp(k, 5, priv);
10634         sp_256_point_from_ecc_point_5(point, pub);
10635 #ifdef HAVE_INTEL_AVX2
10636         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
10637             err = sp_256_ecc_mulmod_avx2_5(point, point, k, 1, heap);
10638         else
10639 #endif
10640             err = sp_256_ecc_mulmod_5(point, point, k, 1, heap);
10641     }
10642     if (err == MP_OKAY) {
10643         sp_256_to_bin(point->x, out);
10644         *outLen = 32;
10645     }
10646 
10647 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10648     if (k != NULL)
10649         XFREE(k, heap, DYNAMIC_TYPE_ECC);
10650 #endif
10651     sp_ecc_point_free(point, 0, heap);
10652 
10653     return err;
10654 }
10655 #endif /* HAVE_ECC_DHE */
10656 
10657 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
10658 #ifdef HAVE_INTEL_AVX2
10659 #endif /* HAVE_INTEL_AVX2 */
10660 #endif
10661 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
10662 /* Multiply a by scalar b into r. (r = a * b)
10663  *
10664  * r  A single precision integer.
10665  * a  A single precision integer.
10666  * b  A scalar.
10667  */
10668 SP_NOINLINE static void sp_256_mul_d_5(sp_digit* r, const sp_digit* a,
10669     const sp_digit b)
10670 {
10671 #ifdef WOLFSSL_SP_SMALL
10672     int128_t tb = b;
10673     int128_t t = 0;
10674     int i;
10675 
10676     for (i = 0; i < 5; i++) {
10677         t += tb * a[i];
10678         r[i] = t & 0xfffffffffffffl;
10679         t >>= 52;
10680     }
10681     r[5] = (sp_digit)t;
10682 #else
10683     int128_t tb = b;
10684     int128_t t[5];
10685 
10686     t[ 0] = tb * a[ 0];
10687     t[ 1] = tb * a[ 1];
10688     t[ 2] = tb * a[ 2];
10689     t[ 3] = tb * a[ 3];
10690     t[ 4] = tb * a[ 4];
10691     r[ 0] =                           (t[ 0] & 0xfffffffffffffl);
10692     r[ 1] = (sp_digit)(t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffl);
10693     r[ 2] = (sp_digit)(t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffl);
10694     r[ 3] = (sp_digit)(t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffl);
10695     r[ 4] = (sp_digit)(t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffl);
10696     r[ 5] = (sp_digit)(t[ 4] >> 52);
10697 #endif /* WOLFSSL_SP_SMALL */
10698 }
10699 
10700 /* Divide d in a and put remainder into r (m*d + r = a)
10701  * m is not calculated as it is not needed at this time.
10702  *
10703  * a  Nmber to be divided.
10704  * d  Number to divide with.
10705  * m  Multiplier result.
10706  * r  Remainder from the division.
10707  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
10708  */
10709 static int sp_256_div_5(sp_digit* a, sp_digit* d, sp_digit* m,
10710         sp_digit* r)
10711 {
10712     int i;
10713     int128_t d1;
10714     sp_digit div, r1;
10715 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10716     sp_digit* td;
10717 #else
10718     sp_digit t1d[10], t2d[5 + 1];
10719 #endif
10720     sp_digit* t1;
10721     sp_digit* t2;
10722     int err = MP_OKAY;
10723 
10724 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10725     td = XMALLOC(sizeof(sp_digit) * (3 * 5 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
10726     if (td != NULL) {
10727         t1 = td;
10728         t2 = td + 2 * 5;
10729     }
10730     else
10731         err = MEMORY_E;
10732 #else
10733     t1 = t1d;
10734     t2 = t2d;
10735 #endif
10736 
10737     (void)m;
10738 
10739     if (err == MP_OKAY) {
10740         div = d[4];
10741         XMEMCPY(t1, a, sizeof(*t1) * 2 * 5);
10742         for (i=4; i>=0; i--) {
10743             t1[5 + i] += t1[5 + i - 1] >> 52;
10744             t1[5 + i - 1] &= 0xfffffffffffffl;
10745             d1 = t1[5 + i];
10746             d1 <<= 52;
10747             d1 += t1[5 + i - 1];
10748             r1 = (sp_digit)(d1 / div);
10749 
10750             sp_256_mul_d_5(t2, d, r1);
10751             sp_256_sub_5(&t1[i], &t1[i], t2);
10752             t1[5 + i] -= t2[5];
10753             t1[5 + i] += t1[5 + i - 1] >> 52;
10754             t1[5 + i - 1] &= 0xfffffffffffffl;
10755             r1 = (((-t1[5 + i]) << 52) - t1[5 + i - 1]) / div;
10756             r1++;
10757             sp_256_mul_d_5(t2, d, r1);
10758             sp_256_add_5(&t1[i], &t1[i], t2);
10759             t1[5 + i] += t1[5 + i - 1] >> 52;
10760             t1[5 + i - 1] &= 0xfffffffffffffl;
10761         }
10762         t1[5 - 1] += t1[5 - 2] >> 52;
10763         t1[5 - 2] &= 0xfffffffffffffl;
10764         d1 = t1[5 - 1];
10765         r1 = (sp_digit)(d1 / div);
10766 
10767         sp_256_mul_d_5(t2, d, r1);
10768         sp_256_sub_5(t1, t1, t2);
10769         XMEMCPY(r, t1, sizeof(*r) * 2 * 5);
10770         for (i=0; i<3; i++) {
10771             r[i+1] += r[i] >> 52;
10772             r[i] &= 0xfffffffffffffl;
10773         }
10774         sp_256_cond_add_5(r, r, d, 0 - (r[4] < 0));
10775     }
10776 
10777 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
10778     if (td != NULL)
10779         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
10780 #endif
10781 
10782     return err;
10783 }
10784 
10785 /* Reduce a modulo m into r. (r = a mod m)
10786  *
10787  * r  A single precision number that is the reduced result.
10788  * a  A single precision number that is to be reduced.
10789  * m  A single precision number that is the modulus to reduce with.
10790  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
10791  */
10792 static int sp_256_mod_5(sp_digit* r, sp_digit* a, sp_digit* m)
10793 {
10794     return sp_256_div_5(a, m, NULL, r);
10795 }
10796 
10797 #endif
10798 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
10799 #ifdef WOLFSSL_SP_SMALL
10800 /* Order-2 for the P256 curve. */
10801 static const uint64_t p256_order_2[4] = {
10802     0xf3b9cac2fc63254f,0xbce6faada7179e84,0xffffffffffffffff,
10803     0xffffffff00000000
10804 };
10805 #else
10806 /* The low half of the order-2 of the P256 curve. */
10807 static const uint64_t p256_order_low[2] = {
10808     0xf3b9cac2fc63254f,0xbce6faada7179e84
10809 };
10810 #endif /* WOLFSSL_SP_SMALL */
10811 
10812 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
10813  *
10814  * r  Result of the multiplication.
10815  * a  First operand of the multiplication.
10816  * b  Second operand of the multiplication.
10817  */
10818 static void sp_256_mont_mul_order_5(sp_digit* r, sp_digit* a, sp_digit* b)
10819 {
10820     sp_256_mul_5(r, a, b);
10821     sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
10822 }
10823 
10824 /* Square number mod the order of P256 curve. (r = a * a mod order)
10825  *
10826  * r  Result of the squaring.
10827  * a  Number to square.
10828  */
10829 static void sp_256_mont_sqr_order_5(sp_digit* r, sp_digit* a)
10830 {
10831     sp_256_sqr_5(r, a);
10832     sp_256_mont_reduce_5(r, p256_order, p256_mp_order);
10833 }
10834 
10835 #ifndef WOLFSSL_SP_SMALL
10836 /* Square number mod the order of P256 curve a number of times.
10837  * (r = a ^ n mod order)
10838  *
10839  * r  Result of the squaring.
10840  * a  Number to square.
10841  */
10842 static void sp_256_mont_sqr_n_order_5(sp_digit* r, sp_digit* a, int n)
10843 {
10844     int i;
10845 
10846     sp_256_mont_sqr_order_5(r, a);
10847     for (i=1; i<n; i++)
10848         sp_256_mont_sqr_order_5(r, r);
10849 }
10850 #endif /* !WOLFSSL_SP_SMALL */
10851 
10852 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
10853  * (r = 1 / a mod order)
10854  *
10855  * r   Inverse result.
10856  * a   Number to invert.
10857  * td  Temporary data.
10858  */
10859 static void sp_256_mont_inv_order_5(sp_digit* r, sp_digit* a,
10860         sp_digit* td)
10861 {
10862 #ifdef WOLFSSL_SP_SMALL
10863     sp_digit* t = td;
10864     int i;
10865 
10866     XMEMCPY(t, a, sizeof(sp_digit) * 5);
10867     for (i=254; i>=0; i--) {
10868         sp_256_mont_sqr_order_5(t, t);
10869         if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
10870             sp_256_mont_mul_order_5(t, t, a);
10871     }
10872     XMEMCPY(r, t, sizeof(sp_digit) * 5);
10873 #else
10874     sp_digit* t = td;
10875     sp_digit* t2 = td + 2 * 5;
10876     sp_digit* t3 = td + 4 * 5;
10877     int i;
10878 
10879     /* t = a^2 */
10880     sp_256_mont_sqr_order_5(t, a);
10881     /* t = a^3 = t * a */
10882     sp_256_mont_mul_order_5(t, t, a);
10883     /* t2= a^c = t ^ 2 ^ 2 */
10884     sp_256_mont_sqr_n_order_5(t2, t, 2);
10885     /* t3= a^f = t2 * t */
10886     sp_256_mont_mul_order_5(t3, t2, t);
10887     /* t2= a^f0 = t3 ^ 2 ^ 4 */
10888     sp_256_mont_sqr_n_order_5(t2, t3, 4);
10889     /* t = a^ff = t2 * t3 */
10890     sp_256_mont_mul_order_5(t, t2, t3);
10891     /* t3= a^ff00 = t ^ 2 ^ 8 */
10892     sp_256_mont_sqr_n_order_5(t2, t, 8);
10893     /* t = a^ffff = t2 * t */
10894     sp_256_mont_mul_order_5(t, t2, t);
10895     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
10896     sp_256_mont_sqr_n_order_5(t2, t, 16);
10897     /* t = a^ffffffff = t2 * t */
10898     sp_256_mont_mul_order_5(t, t2, t);
10899     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
10900     sp_256_mont_sqr_n_order_5(t2, t, 64);
10901     /* t2= a^ffffffff00000000ffffffff = t2 * t */
10902     sp_256_mont_mul_order_5(t2, t2, t);
10903     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
10904     sp_256_mont_sqr_n_order_5(t2, t2, 32);
10905     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
10906     sp_256_mont_mul_order_5(t2, t2, t);
10907     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
10908     for (i=127; i>=112; i--) {
10909         sp_256_mont_sqr_order_5(t2, t2);
10910         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
10911             sp_256_mont_mul_order_5(t2, t2, a);
10912     }
10913     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
10914     sp_256_mont_sqr_n_order_5(t2, t2, 4);
10915     sp_256_mont_mul_order_5(t2, t2, t3);
10916     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
10917     for (i=107; i>=64; i--) {
10918         sp_256_mont_sqr_order_5(t2, t2);
10919         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
10920             sp_256_mont_mul_order_5(t2, t2, a);
10921     }
10922     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
10923     sp_256_mont_sqr_n_order_5(t2, t2, 4);
10924     sp_256_mont_mul_order_5(t2, t2, t3);
10925     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
10926     for (i=59; i>=32; i--) {
10927         sp_256_mont_sqr_order_5(t2, t2);
10928         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
10929             sp_256_mont_mul_order_5(t2, t2, a);
10930     }
10931     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
10932     sp_256_mont_sqr_n_order_5(t2, t2, 4);
10933     sp_256_mont_mul_order_5(t2, t2, t3);
10934     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
10935     for (i=27; i>=0; i--) {
10936         sp_256_mont_sqr_order_5(t2, t2);
10937         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
10938             sp_256_mont_mul_order_5(t2, t2, a);
10939     }
10940     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
10941     sp_256_mont_sqr_n_order_5(t2, t2, 4);
10942     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
10943     sp_256_mont_mul_order_5(r, t2, t3);
10944 #endif /* WOLFSSL_SP_SMALL */
10945 }
10946 
10947 #ifdef HAVE_INTEL_AVX2
10948 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
10949  *
10950  * r  Result of the multiplication.
10951  * a  First operand of the multiplication.
10952  * b  Second operand of the multiplication.
10953  */
10954 static void sp_256_mont_mul_order_avx2_5(sp_digit* r, sp_digit* a, sp_digit* b)
10955 {
10956     sp_256_mul_avx2_5(r, a, b);
10957     sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
10958 }
10959 
10960 /* Square number mod the order of P256 curve. (r = a * a mod order)
10961  *
10962  * r  Result of the squaring.
10963  * a  Number to square.
10964  */
10965 static void sp_256_mont_sqr_order_avx2_5(sp_digit* r, sp_digit* a)
10966 {
10967     sp_256_sqr_avx2_5(r, a);
10968     sp_256_mont_reduce_avx2_5(r, p256_order, p256_mp_order);
10969 }
10970 
10971 #ifndef WOLFSSL_SP_SMALL
10972 /* Square number mod the order of P256 curve a number of times.
10973  * (r = a ^ n mod order)
10974  *
10975  * r  Result of the squaring.
10976  * a  Number to square.
10977  */
10978 static void sp_256_mont_sqr_n_order_avx2_5(sp_digit* r, sp_digit* a, int n)
10979 {
10980     int i;
10981 
10982     sp_256_mont_sqr_order_avx2_5(r, a);
10983     for (i=1; i<n; i++)
10984         sp_256_mont_sqr_order_avx2_5(r, r);
10985 }
10986 #endif /* !WOLFSSL_SP_SMALL */
10987 
10988 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
10989  * (r = 1 / a mod order)
10990  *
10991  * r   Inverse result.
10992  * a   Number to invert.
10993  * td  Temporary data.
10994  */
10995 static void sp_256_mont_inv_order_avx2_5(sp_digit* r, sp_digit* a,
10996         sp_digit* td)
10997 {
10998 #ifdef WOLFSSL_SP_SMALL
10999     sp_digit* t = td;
11000     int i;
11001 
11002     XMEMCPY(t, a, sizeof(sp_digit) * 5);
11003     for (i=254; i>=0; i--) {
11004         sp_256_mont_sqr_order_avx2_5(t, t);
11005         if (p256_order_2[i / 64] & ((sp_digit)1 << (i % 64)))
11006             sp_256_mont_mul_order_avx2_5(t, t, a);
11007     }
11008     XMEMCPY(r, t, sizeof(sp_digit) * 5);
11009 #else
11010     sp_digit* t = td;
11011     sp_digit* t2 = td + 2 * 5;
11012     sp_digit* t3 = td + 4 * 5;
11013     int i;
11014 
11015     /* t = a^2 */
11016     sp_256_mont_sqr_order_avx2_5(t, a);
11017     /* t = a^3 = t * a */
11018     sp_256_mont_mul_order_avx2_5(t, t, a);
11019     /* t2= a^c = t ^ 2 ^ 2 */
11020     sp_256_mont_sqr_n_order_avx2_5(t2, t, 2);
11021     /* t3= a^f = t2 * t */
11022     sp_256_mont_mul_order_avx2_5(t3, t2, t);
11023     /* t2= a^f0 = t3 ^ 2 ^ 4 */
11024     sp_256_mont_sqr_n_order_avx2_5(t2, t3, 4);
11025     /* t = a^ff = t2 * t3 */
11026     sp_256_mont_mul_order_avx2_5(t, t2, t3);
11027     /* t3= a^ff00 = t ^ 2 ^ 8 */
11028     sp_256_mont_sqr_n_order_avx2_5(t2, t, 8);
11029     /* t = a^ffff = t2 * t */
11030     sp_256_mont_mul_order_avx2_5(t, t2, t);
11031     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
11032     sp_256_mont_sqr_n_order_avx2_5(t2, t, 16);
11033     /* t = a^ffffffff = t2 * t */
11034     sp_256_mont_mul_order_avx2_5(t, t2, t);
11035     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
11036     sp_256_mont_sqr_n_order_avx2_5(t2, t, 64);
11037     /* t2= a^ffffffff00000000ffffffff = t2 * t */
11038     sp_256_mont_mul_order_avx2_5(t2, t2, t);
11039     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
11040     sp_256_mont_sqr_n_order_avx2_5(t2, t2, 32);
11041     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
11042     sp_256_mont_mul_order_avx2_5(t2, t2, t);
11043     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
11044     for (i=127; i>=112; i--) {
11045         sp_256_mont_sqr_order_avx2_5(t2, t2);
11046         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
11047             sp_256_mont_mul_order_avx2_5(t2, t2, a);
11048     }
11049     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
11050     sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
11051     sp_256_mont_mul_order_avx2_5(t2, t2, t3);
11052     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
11053     for (i=107; i>=64; i--) {
11054         sp_256_mont_sqr_order_avx2_5(t2, t2);
11055         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
11056             sp_256_mont_mul_order_avx2_5(t2, t2, a);
11057     }
11058     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
11059     sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
11060     sp_256_mont_mul_order_avx2_5(t2, t2, t3);
11061     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
11062     for (i=59; i>=32; i--) {
11063         sp_256_mont_sqr_order_avx2_5(t2, t2);
11064         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
11065             sp_256_mont_mul_order_avx2_5(t2, t2, a);
11066     }
11067     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
11068     sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
11069     sp_256_mont_mul_order_avx2_5(t2, t2, t3);
11070     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
11071     for (i=27; i>=0; i--) {
11072         sp_256_mont_sqr_order_avx2_5(t2, t2);
11073         if (p256_order_low[i / 64] & ((sp_digit)1 << (i % 64)))
11074             sp_256_mont_mul_order_avx2_5(t2, t2, a);
11075     }
11076     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
11077     sp_256_mont_sqr_n_order_avx2_5(t2, t2, 4);
11078     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
11079     sp_256_mont_mul_order_avx2_5(r, t2, t3);
11080 #endif /* WOLFSSL_SP_SMALL */
11081 }
11082 
11083 #endif /* HAVE_INTEL_AVX2 */
11084 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
11085 #ifdef HAVE_ECC_SIGN
11086 #ifndef SP_ECC_MAX_SIG_GEN
11087 #define SP_ECC_MAX_SIG_GEN  64
11088 #endif
11089 
11090 /* Sign the hash using the private key.
11091  *   e = [hash, 256 bits] from binary
11092  *   r = (k.G)->x mod order
11093  *   s = (r * x + e) / k mod order
11094  * The hash is truncated to the first 256 bits.
11095  *
11096  * hash     Hash to sign.
11097  * hashLen  Length of the hash data.
11098  * rng      Random number generator.
11099  * priv     Private part of key - scalar.
11100  * rm       First part of result as an mp_int.
11101  * sm       Sirst part of result as an mp_int.
11102  * heap     Heap to use for allocation.
11103  * returns RNG failures, MEMORY_E when memory allocation fails and
11104  * MP_OKAY on success.
11105  */
11106 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
11107                     mp_int* rm, mp_int* sm, void* heap)
11108 {
11109 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11110     sp_digit* d;
11111 #else
11112     sp_digit ed[2*5];
11113     sp_digit xd[2*5];
11114     sp_digit kd[2*5];
11115     sp_digit rd[2*5];
11116     sp_digit td[3 * 2*5];
11117     sp_point p;
11118 #endif
11119     sp_digit* e = NULL;
11120     sp_digit* x = NULL;
11121     sp_digit* k = NULL;
11122     sp_digit* r = NULL;
11123     sp_digit* tmp = NULL;
11124     sp_point* point = NULL;
11125     sp_digit carry;
11126     sp_digit* s;
11127     sp_digit* kInv;
11128     int err = MP_OKAY;
11129     int64_t c;
11130     int i;
11131 #ifdef HAVE_INTEL_AVX2
11132     word32 cpuid_flags = cpuid_get_flags();
11133 #endif
11134 
11135     (void)heap;
11136 
11137     err = sp_ecc_point_new(heap, p, point);
11138 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11139     if (err == MP_OKAY) {
11140         d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 5, heap, DYNAMIC_TYPE_ECC);
11141         if (d != NULL) {
11142             e = d + 0 * 5;
11143             x = d + 2 * 5;
11144             k = d + 4 * 5;
11145             r = d + 6 * 5;
11146             tmp = d + 8 * 5;
11147         }
11148         else
11149             err = MEMORY_E;
11150     }
11151 #else
11152     e = ed;
11153     x = xd;
11154     k = kd;
11155     r = rd;
11156     tmp = td;
11157 #endif
11158     s = e;
11159     kInv = k;
11160 
11161     if (err == MP_OKAY) {
11162         if (hashLen > 32)
11163             hashLen = 32;
11164 
11165         sp_256_from_bin(e, 5, hash, hashLen);
11166         sp_256_from_mp(x, 5, priv);
11167     }
11168 
11169     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
11170         /* New random point. */
11171         err = sp_256_ecc_gen_k_5(rng, k);
11172         if (err == MP_OKAY) {
11173 #ifdef HAVE_INTEL_AVX2
11174             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11175                 err = sp_256_ecc_mulmod_base_avx2_5(point, k, 1, heap);
11176             else
11177 #endif
11178                 err = sp_256_ecc_mulmod_base_5(point, k, 1, NULL);
11179         }
11180 
11181         if (err == MP_OKAY) {
11182             /* r = point->x mod order */
11183             XMEMCPY(r, point->x, sizeof(sp_digit) * 5);
11184             sp_256_norm_5(r);
11185             c = sp_256_cmp_5(r, p256_order);
11186             sp_256_cond_sub_5(r, r, p256_order, 0 - (c >= 0));
11187             sp_256_norm_5(r);
11188 
11189             /* Conv k to Montgomery form (mod order) */
11190 #ifdef HAVE_INTEL_AVX2
11191             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11192                 sp_256_mul_avx2_5(k, k, p256_norm_order);
11193             else
11194 #endif
11195                 sp_256_mul_5(k, k, p256_norm_order);
11196             err = sp_256_mod_5(k, k, p256_order);
11197         }
11198         if (err == MP_OKAY) {
11199             sp_256_norm_5(k);
11200             /* kInv = 1/k mod order */
11201 #ifdef HAVE_INTEL_AVX2
11202             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11203                 sp_256_mont_inv_order_avx2_5(kInv, k, tmp);
11204             else
11205 #endif
11206                 sp_256_mont_inv_order_5(kInv, k, tmp);
11207             sp_256_norm_5(kInv);
11208 
11209             /* s = r * x + e */
11210 #ifdef HAVE_INTEL_AVX2
11211             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11212                 sp_256_mul_avx2_5(x, x, r);
11213             else
11214 #endif
11215                 sp_256_mul_5(x, x, r);
11216             err = sp_256_mod_5(x, x, p256_order);
11217         }
11218         if (err == MP_OKAY) {
11219             sp_256_norm_5(x);
11220             carry = sp_256_add_5(s, e, x);
11221             sp_256_cond_sub_5(s, s, p256_order, 0 - carry);
11222             sp_256_norm_5(s);
11223             c = sp_256_cmp_5(s, p256_order);
11224             sp_256_cond_sub_5(s, s, p256_order, 0 - (c >= 0));
11225             sp_256_norm_5(s);
11226 
11227             /* s = s * k^-1 mod order */
11228 #ifdef HAVE_INTEL_AVX2
11229             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11230                 sp_256_mont_mul_order_avx2_5(s, s, kInv);
11231             else
11232 #endif
11233                 sp_256_mont_mul_order_5(s, s, kInv);
11234             sp_256_norm_5(s);
11235 
11236             /* Check that signature is usable. */
11237             if (!sp_256_iszero_5(s))
11238                 break;
11239         }
11240     }
11241 
11242     if (i == 0)
11243         err = RNG_FAILURE_E;
11244 
11245     if (err == MP_OKAY)
11246         err = sp_256_to_mp(r, rm);
11247     if (err == MP_OKAY)
11248         err = sp_256_to_mp(s, sm);
11249 
11250 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11251     if (d != NULL) {
11252         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 5);
11253         XFREE(d, heap, DYNAMIC_TYPE_ECC);
11254     }
11255 #else
11256     XMEMSET(e, 0, sizeof(sp_digit) * 2 * 5);
11257     XMEMSET(x, 0, sizeof(sp_digit) * 2 * 5);
11258     XMEMSET(k, 0, sizeof(sp_digit) * 2 * 5);
11259     XMEMSET(r, 0, sizeof(sp_digit) * 2 * 5);
11260     XMEMSET(r, 0, sizeof(sp_digit) * 2 * 5);
11261     XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*5);
11262 #endif
11263     sp_ecc_point_free(point, 1, heap);
11264 
11265     return err;
11266 }
11267 #endif /* HAVE_ECC_SIGN */
11268 
11269 #ifdef HAVE_ECC_VERIFY
11270 /* Verify the signature values with the hash and public key.
11271  *   e = Truncate(hash, 256)
11272  *   u1 = e/s mod order
11273  *   u2 = r/s mod order
11274  *   r == (u1.G + u2.Q)->x mod order
11275  * Optimization: Leave point in projective form.
11276  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
11277  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
11278  * The hash is truncated to the first 256 bits.
11279  *
11280  * hash     Hash to sign.
11281  * hashLen  Length of the hash data.
11282  * rng      Random number generator.
11283  * priv     Private part of key - scalar.
11284  * rm       First part of result as an mp_int.
11285  * sm       Sirst part of result as an mp_int.
11286  * heap     Heap to use for allocation.
11287  * returns RNG failures, MEMORY_E when memory allocation fails and
11288  * MP_OKAY on success.
11289  */
11290 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
11291     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
11292 {
11293 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11294     sp_digit* d = NULL;
11295 #else
11296     sp_digit u1d[2*5];
11297     sp_digit u2d[2*5];
11298     sp_digit sd[2*5];
11299     sp_digit tmpd[2*5 * 5];
11300     sp_point p1d;
11301     sp_point p2d;
11302 #endif
11303     sp_digit* u1;
11304     sp_digit* u2;
11305     sp_digit* s;
11306     sp_digit* tmp;
11307     sp_point* p1;
11308     sp_point* p2 = NULL;
11309     sp_digit carry;
11310     int64_t c;
11311     int err;
11312 #ifdef HAVE_INTEL_AVX2
11313     word32 cpuid_flags = cpuid_get_flags();
11314 #endif
11315 
11316     err = sp_ecc_point_new(heap, p1d, p1);
11317     if (err == MP_OKAY)
11318         err = sp_ecc_point_new(heap, p2d, p2);
11319 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11320     if (err == MP_OKAY) {
11321         d = XMALLOC(sizeof(sp_digit) * 16 * 5, heap, DYNAMIC_TYPE_ECC);
11322         if (d != NULL) {
11323             u1  = d + 0 * 5;
11324             u2  = d + 2 * 5;
11325             s   = d + 4 * 5;
11326             tmp = d + 6 * 5;
11327         }
11328         else
11329             err = MEMORY_E;
11330     }
11331 #else
11332     u1 = u1d;
11333     u2 = u2d;
11334     s  = sd;
11335     tmp = tmpd;
11336 #endif
11337 
11338     if (err == MP_OKAY) {
11339         if (hashLen > 32)
11340             hashLen = 32;
11341 
11342         sp_256_from_bin(u1, 5, hash, hashLen);
11343         sp_256_from_mp(u2, 5, r);
11344         sp_256_from_mp(s, 5, sm);
11345         sp_256_from_mp(p2->x, 5, pX);
11346         sp_256_from_mp(p2->y, 5, pY);
11347         sp_256_from_mp(p2->z, 5, pZ);
11348 
11349 #ifdef HAVE_INTEL_AVX2
11350         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11351             sp_256_mul_avx2_5(s, s, p256_norm_order);
11352         else
11353 #endif
11354             sp_256_mul_5(s, s, p256_norm_order);
11355         err = sp_256_mod_5(s, s, p256_order);
11356     }
11357     if (err == MP_OKAY) {
11358         sp_256_norm_5(s);
11359 #ifdef HAVE_INTEL_AVX2
11360         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
11361             sp_256_mont_inv_order_avx2_5(s, s, tmp);
11362             sp_256_mont_mul_order_avx2_5(u1, u1, s);
11363             sp_256_mont_mul_order_avx2_5(u2, u2, s);
11364         }
11365         else
11366 #endif
11367         {
11368             sp_256_mont_inv_order_5(s, s, tmp);
11369             sp_256_mont_mul_order_5(u1, u1, s);
11370             sp_256_mont_mul_order_5(u2, u2, s);
11371         }
11372 
11373 #ifdef HAVE_INTEL_AVX2
11374         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11375             err = sp_256_ecc_mulmod_base_avx2_5(p1, u1, 0, heap);
11376         else
11377 #endif
11378             err = sp_256_ecc_mulmod_base_5(p1, u1, 0, heap);
11379     }
11380     if (err == MP_OKAY) {
11381 #ifdef HAVE_INTEL_AVX2
11382         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11383             err = sp_256_ecc_mulmod_avx2_5(p2, p2, u2, 0, heap);
11384         else
11385 #endif
11386             err = sp_256_ecc_mulmod_5(p2, p2, u2, 0, heap);
11387     }
11388 
11389     if (err == MP_OKAY) {
11390 #ifdef HAVE_INTEL_AVX2
11391         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11392             sp_256_proj_point_add_avx2_5(p1, p1, p2, tmp);
11393         else
11394 #endif
11395             sp_256_proj_point_add_5(p1, p1, p2, tmp);
11396 
11397         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
11398         /* Reload r and convert to Montgomery form. */
11399         sp_256_from_mp(u2, 5, r);
11400         err = sp_256_mod_mul_norm_5(u2, u2, p256_mod);
11401     }
11402 
11403     if (err == MP_OKAY) {
11404         /* u1 = r.z'.z' mod prime */
11405         sp_256_mont_sqr_5(p1->z, p1->z, p256_mod, p256_mp_mod);
11406         sp_256_mont_mul_5(u1, u2, p1->z, p256_mod, p256_mp_mod);
11407         *res = sp_256_cmp_5(p1->x, u1) == 0;
11408         if (*res == 0) {
11409             /* Reload r and add order. */
11410             sp_256_from_mp(u2, 5, r);
11411             carry = sp_256_add_5(u2, u2, p256_order);
11412             /* Carry means result is greater than mod and is not valid. */
11413             if (!carry) {
11414                 sp_256_norm_5(u2);
11415 
11416                 /* Compare with mod and if greater or equal then not valid. */
11417                 c = sp_256_cmp_5(u2, p256_mod);
11418                 if (c < 0) {
11419                     /* Convert to Montogomery form */
11420                     err = sp_256_mod_mul_norm_5(u2, u2, p256_mod);
11421                     if (err == MP_OKAY) {
11422                         /* u1 = (r + 1*order).z'.z' mod prime */
11423                         sp_256_mont_mul_5(u1, u2, p1->z, p256_mod,
11424                                                                   p256_mp_mod);
11425                         *res = sp_256_cmp_5(p1->x, u2) == 0;
11426                     }
11427                 }
11428             }
11429         }
11430     }
11431 
11432 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11433     if (d != NULL)
11434         XFREE(d, heap, DYNAMIC_TYPE_ECC);
11435 #endif
11436     sp_ecc_point_free(p1, 0, heap);
11437     sp_ecc_point_free(p2, 0, heap);
11438 
11439     return err;
11440 }
11441 #endif /* HAVE_ECC_VERIFY */
11442 
11443 #ifdef HAVE_ECC_CHECK_KEY
11444 /* Check that the x and y oridinates are a valid point on the curve.
11445  *
11446  * point  EC point.
11447  * heap   Heap to use if dynamically allocating.
11448  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
11449  * not on the curve and MP_OKAY otherwise.
11450  */
11451 static int sp_256_ecc_is_point_5(sp_point* point, void* heap)
11452 {
11453 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11454     sp_digit* d = NULL;
11455 #else
11456     sp_digit t1d[2*5];
11457     sp_digit t2d[2*5];
11458 #endif
11459     sp_digit* t1;
11460     sp_digit* t2;
11461     int err = MP_OKAY;
11462 
11463 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11464     d = XMALLOC(sizeof(sp_digit) * 5 * 4, heap, DYNAMIC_TYPE_ECC);
11465     if (d != NULL) {
11466         t1 = d + 0 * 5;
11467         t2 = d + 2 * 5;
11468     }
11469     else
11470         err = MEMORY_E;
11471 #else
11472     (void)heap;
11473 
11474     t1 = t1d;
11475     t2 = t2d;
11476 #endif
11477 
11478     if (err == MP_OKAY) {
11479         sp_256_sqr_5(t1, point->y);
11480         sp_256_mod_5(t1, t1, p256_mod);
11481         sp_256_sqr_5(t2, point->x);
11482         sp_256_mod_5(t2, t2, p256_mod);
11483         sp_256_mul_5(t2, t2, point->x);
11484         sp_256_mod_5(t2, t2, p256_mod);
11485     sp_256_sub_5(t2, p256_mod, t2);
11486         sp_256_mont_add_5(t1, t1, t2, p256_mod);
11487 
11488         sp_256_mont_add_5(t1, t1, point->x, p256_mod);
11489         sp_256_mont_add_5(t1, t1, point->x, p256_mod);
11490         sp_256_mont_add_5(t1, t1, point->x, p256_mod);
11491 
11492         if (sp_256_cmp_5(t1, p256_b) != 0)
11493             err = MP_VAL;
11494     }
11495 
11496 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11497     if (d != NULL)
11498         XFREE(d, heap, DYNAMIC_TYPE_ECC);
11499 #endif
11500 
11501     return err;
11502 }
11503 
11504 /* Check that the x and y oridinates are a valid point on the curve.
11505  *
11506  * pX  X ordinate of EC point.
11507  * pY  Y ordinate of EC point.
11508  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
11509  * not on the curve and MP_OKAY otherwise.
11510  */
11511 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
11512 {
11513 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11514     sp_point pubd;
11515 #endif
11516     sp_point* pub;
11517     byte one[1] = { 1 };
11518     int err;
11519 
11520     err = sp_ecc_point_new(NULL, pubd, pub);
11521     if (err == MP_OKAY) {
11522         sp_256_from_mp(pub->x, 5, pX);
11523         sp_256_from_mp(pub->y, 5, pY);
11524         sp_256_from_bin(pub->z, 5, one, sizeof(one));
11525 
11526         err = sp_256_ecc_is_point_5(pub, NULL);
11527     }
11528 
11529     sp_ecc_point_free(pub, 0, NULL);
11530 
11531     return err;
11532 }
11533 
11534 /* Check that the private scalar generates the EC point (px, py), the point is
11535  * on the curve and the point has the correct order.
11536  *
11537  * pX     X ordinate of EC point.
11538  * pY     Y ordinate of EC point.
11539  * privm  Private scalar that generates EC point.
11540  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
11541  * not on the curve, ECC_INF_E if the point does not have the correct order,
11542  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
11543  * MP_OKAY otherwise.
11544  */
11545 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
11546 {
11547 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11548     sp_digit privd[5];
11549     sp_point pubd;
11550     sp_point pd;
11551 #endif
11552     sp_digit* priv = NULL;
11553     sp_point* pub;
11554     sp_point* p = NULL;
11555     byte one[1] = { 1 };
11556     int err;
11557 #ifdef HAVE_INTEL_AVX2
11558     word32 cpuid_flags = cpuid_get_flags();
11559 #endif
11560 
11561     err = sp_ecc_point_new(heap, pubd, pub);
11562     if (err == MP_OKAY)
11563         err = sp_ecc_point_new(heap, pd, p);
11564 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11565     if (err == MP_OKAY) {
11566         priv = XMALLOC(sizeof(sp_digit) * 5, heap, DYNAMIC_TYPE_ECC);
11567         if (priv == NULL)
11568             err = MEMORY_E;
11569     }
11570 #else
11571     priv = privd;
11572 #endif
11573 
11574     if (err == MP_OKAY) {
11575         sp_256_from_mp(pub->x, 5, pX);
11576         sp_256_from_mp(pub->y, 5, pY);
11577         sp_256_from_bin(pub->z, 5, one, sizeof(one));
11578         sp_256_from_mp(priv, 5, privm);
11579 
11580         /* Check point at infinitiy. */
11581         if (sp_256_iszero_5(pub->x) &&
11582             sp_256_iszero_5(pub->y))
11583             err = ECC_INF_E;
11584     }
11585 
11586     if (err == MP_OKAY) {
11587         /* Check range of X and Y */
11588         if (sp_256_cmp_5(pub->x, p256_mod) >= 0 ||
11589             sp_256_cmp_5(pub->y, p256_mod) >= 0)
11590             err = ECC_OUT_OF_RANGE_E;
11591     }
11592 
11593     if (err == MP_OKAY) {
11594         /* Check point is on curve */
11595         err = sp_256_ecc_is_point_5(pub, heap);
11596     }
11597 
11598     if (err == MP_OKAY) {
11599         /* Point * order = infinity */
11600 #ifdef HAVE_INTEL_AVX2
11601         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11602             err = sp_256_ecc_mulmod_avx2_5(p, pub, p256_order, 1, heap);
11603         else
11604 #endif
11605             err = sp_256_ecc_mulmod_5(p, pub, p256_order, 1, heap);
11606     }
11607     if (err == MP_OKAY) {
11608         /* Check result is infinity */
11609         if (!sp_256_iszero_5(p->x) ||
11610             !sp_256_iszero_5(p->y)) {
11611             err = ECC_INF_E;
11612         }
11613     }
11614 
11615     if (err == MP_OKAY) {
11616         /* Base * private = point */
11617 #ifdef HAVE_INTEL_AVX2
11618         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11619             err = sp_256_ecc_mulmod_base_avx2_5(p, priv, 1, heap);
11620         else
11621 #endif
11622             err = sp_256_ecc_mulmod_base_5(p, priv, 1, heap);
11623     }
11624     if (err == MP_OKAY) {
11625         /* Check result is public key */
11626         if (sp_256_cmp_5(p->x, pub->x) != 0 ||
11627             sp_256_cmp_5(p->y, pub->y) != 0) {
11628             err = ECC_PRIV_KEY_E;
11629         }
11630     }
11631 
11632 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11633     if (priv != NULL)
11634         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
11635 #endif
11636     sp_ecc_point_free(p, 0, heap);
11637     sp_ecc_point_free(pub, 0, heap);
11638 
11639     return err;
11640 }
11641 #endif
11642 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
11643 /* Add two projective EC points together.
11644  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
11645  *
11646  * pX   First EC point's X ordinate.
11647  * pY   First EC point's Y ordinate.
11648  * pZ   First EC point's Z ordinate.
11649  * qX   Second EC point's X ordinate.
11650  * qY   Second EC point's Y ordinate.
11651  * qZ   Second EC point's Z ordinate.
11652  * rX   Resultant EC point's X ordinate.
11653  * rY   Resultant EC point's Y ordinate.
11654  * rZ   Resultant EC point's Z ordinate.
11655  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
11656  */
11657 int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
11658                               mp_int* qX, mp_int* qY, mp_int* qZ,
11659                               mp_int* rX, mp_int* rY, mp_int* rZ)
11660 {
11661 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11662     sp_digit tmpd[2 * 5 * 5];
11663     sp_point pd;
11664     sp_point qd;
11665 #endif
11666     sp_digit* tmp;
11667     sp_point* p;
11668     sp_point* q = NULL;
11669     int err;
11670 #ifdef HAVE_INTEL_AVX2
11671     word32 cpuid_flags = cpuid_get_flags();
11672 #endif
11673 
11674     err = sp_ecc_point_new(NULL, pd, p);
11675     if (err == MP_OKAY)
11676         err = sp_ecc_point_new(NULL, qd, q);
11677 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11678     if (err == MP_OKAY) {
11679         tmp = XMALLOC(sizeof(sp_digit) * 2 * 5 * 5, NULL, DYNAMIC_TYPE_ECC);
11680         if (tmp == NULL)
11681             err = MEMORY_E;
11682     }
11683 #else
11684     tmp = tmpd;
11685 #endif
11686 
11687     if (err == MP_OKAY) {
11688         sp_256_from_mp(p->x, 5, pX);
11689         sp_256_from_mp(p->y, 5, pY);
11690         sp_256_from_mp(p->z, 5, pZ);
11691         sp_256_from_mp(q->x, 5, qX);
11692         sp_256_from_mp(q->y, 5, qY);
11693         sp_256_from_mp(q->z, 5, qZ);
11694 
11695 #ifdef HAVE_INTEL_AVX2
11696         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11697             sp_256_proj_point_add_avx2_5(p, p, q, tmp);
11698         else
11699 #endif
11700             sp_256_proj_point_add_5(p, p, q, tmp);
11701     }
11702 
11703     if (err == MP_OKAY)
11704         err = sp_256_to_mp(p->x, rX);
11705     if (err == MP_OKAY)
11706         err = sp_256_to_mp(p->y, rY);
11707     if (err == MP_OKAY)
11708         err = sp_256_to_mp(p->z, rZ);
11709 
11710 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11711     if (tmp != NULL)
11712         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
11713 #endif
11714     sp_ecc_point_free(q, 0, NULL);
11715     sp_ecc_point_free(p, 0, NULL);
11716 
11717     return err;
11718 }
11719 
11720 /* Double a projective EC point.
11721  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
11722  *
11723  * pX   EC point's X ordinate.
11724  * pY   EC point's Y ordinate.
11725  * pZ   EC point's Z ordinate.
11726  * rX   Resultant EC point's X ordinate.
11727  * rY   Resultant EC point's Y ordinate.
11728  * rZ   Resultant EC point's Z ordinate.
11729  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
11730  */
11731 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
11732                               mp_int* rX, mp_int* rY, mp_int* rZ)
11733 {
11734 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11735     sp_digit tmpd[2 * 5 * 2];
11736     sp_point pd;
11737 #endif
11738     sp_digit* tmp;
11739     sp_point* p;
11740     int err;
11741 #ifdef HAVE_INTEL_AVX2
11742     word32 cpuid_flags = cpuid_get_flags();
11743 #endif
11744 
11745     err = sp_ecc_point_new(NULL, pd, p);
11746 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11747     if (err == MP_OKAY) {
11748         tmp = XMALLOC(sizeof(sp_digit) * 2 * 5 * 2, NULL, DYNAMIC_TYPE_ECC);
11749         if (tmp == NULL)
11750             err = MEMORY_E;
11751     }
11752 #else
11753     tmp = tmpd;
11754 #endif
11755 
11756     if (err == MP_OKAY) {
11757         sp_256_from_mp(p->x, 5, pX);
11758         sp_256_from_mp(p->y, 5, pY);
11759         sp_256_from_mp(p->z, 5, pZ);
11760 
11761 #ifdef HAVE_INTEL_AVX2
11762         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11763             sp_256_proj_point_dbl_avx2_5(p, p, tmp);
11764         else
11765 #endif
11766             sp_256_proj_point_dbl_5(p, p, tmp);
11767     }
11768 
11769     if (err == MP_OKAY)
11770         err = sp_256_to_mp(p->x, rX);
11771     if (err == MP_OKAY)
11772         err = sp_256_to_mp(p->y, rY);
11773     if (err == MP_OKAY)
11774         err = sp_256_to_mp(p->z, rZ);
11775 
11776 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11777     if (tmp != NULL)
11778         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
11779 #endif
11780     sp_ecc_point_free(p, 0, NULL);
11781 
11782     return err;
11783 }
11784 
11785 /* Map a projective EC point to affine in place.
11786  * pZ will be one.
11787  *
11788  * pX   EC point's X ordinate.
11789  * pY   EC point's Y ordinate.
11790  * pZ   EC point's Z ordinate.
11791  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
11792  */
11793 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
11794 {
11795 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11796     sp_digit tmpd[2 * 5 * 4];
11797     sp_point pd;
11798 #endif
11799     sp_digit* tmp;
11800     sp_point* p;
11801     int err;
11802 
11803     err = sp_ecc_point_new(NULL, pd, p);
11804 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11805     if (err == MP_OKAY) {
11806         tmp = XMALLOC(sizeof(sp_digit) * 2 * 5 * 4, NULL, DYNAMIC_TYPE_ECC);
11807         if (tmp == NULL)
11808             err = MEMORY_E;
11809     }
11810 #else
11811     tmp = tmpd;
11812 #endif
11813     if (err == MP_OKAY) {
11814         sp_256_from_mp(p->x, 5, pX);
11815         sp_256_from_mp(p->y, 5, pY);
11816         sp_256_from_mp(p->z, 5, pZ);
11817 
11818         sp_256_map_5(p, p, tmp);
11819     }
11820 
11821     if (err == MP_OKAY)
11822         err = sp_256_to_mp(p->x, pX);
11823     if (err == MP_OKAY)
11824         err = sp_256_to_mp(p->y, pY);
11825     if (err == MP_OKAY)
11826         err = sp_256_to_mp(p->z, pZ);
11827 
11828 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11829     if (tmp != NULL)
11830         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
11831 #endif
11832     sp_ecc_point_free(p, 0, NULL);
11833 
11834     return err;
11835 }
11836 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
11837 #ifdef HAVE_COMP_KEY
11838 /* Find the square root of a number mod the prime of the curve.
11839  *
11840  * y  The number to operate on and the result.
11841  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
11842  */
11843 static int sp_256_mont_sqrt_5(sp_digit* y)
11844 {
11845 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11846     sp_digit* d;
11847 #else
11848     sp_digit t1d[2 * 5];
11849     sp_digit t2d[2 * 5];
11850 #endif
11851     sp_digit* t1;
11852     sp_digit* t2;
11853     int err = MP_OKAY;
11854 #ifdef HAVE_INTEL_AVX2
11855     word32 cpuid_flags = cpuid_get_flags();
11856 #endif
11857 
11858 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11859     d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
11860     if (d != NULL) {
11861         t1 = d + 0 * 5;
11862         t2 = d + 2 * 5;
11863     }
11864     else
11865         err = MEMORY_E;
11866 #else
11867     t1 = t1d;
11868     t2 = t2d;
11869 #endif
11870 
11871     if (err == MP_OKAY) {
11872 #ifdef HAVE_INTEL_AVX2
11873         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
11874             /* t2 = y ^ 0x2 */
11875             sp_256_mont_sqr_avx2_5(t2, y, p256_mod, p256_mp_mod);
11876             /* t1 = y ^ 0x3 */
11877             sp_256_mont_mul_avx2_5(t1, t2, y, p256_mod, p256_mp_mod);
11878             /* t2 = y ^ 0xc */
11879             sp_256_mont_sqr_n_avx2_5(t2, t1, 2, p256_mod, p256_mp_mod);
11880             /* t1 = y ^ 0xf */
11881             sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
11882             /* t2 = y ^ 0xf0 */
11883             sp_256_mont_sqr_n_avx2_5(t2, t1, 4, p256_mod, p256_mp_mod);
11884             /* t1 = y ^ 0xff */
11885             sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
11886             /* t2 = y ^ 0xff00 */
11887             sp_256_mont_sqr_n_avx2_5(t2, t1, 8, p256_mod, p256_mp_mod);
11888             /* t1 = y ^ 0xffff */
11889             sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
11890             /* t2 = y ^ 0xffff0000 */
11891             sp_256_mont_sqr_n_avx2_5(t2, t1, 16, p256_mod, p256_mp_mod);
11892             /* t1 = y ^ 0xffffffff */
11893             sp_256_mont_mul_avx2_5(t1, t1, t2, p256_mod, p256_mp_mod);
11894             /* t1 = y ^ 0xffffffff00000000 */
11895             sp_256_mont_sqr_n_avx2_5(t1, t1, 32, p256_mod, p256_mp_mod);
11896             /* t1 = y ^ 0xffffffff00000001 */
11897             sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
11898             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
11899             sp_256_mont_sqr_n_avx2_5(t1, t1, 96, p256_mod, p256_mp_mod);
11900             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
11901             sp_256_mont_mul_avx2_5(t1, t1, y, p256_mod, p256_mp_mod);
11902             sp_256_mont_sqr_n_avx2_5(y, t1, 94, p256_mod, p256_mp_mod);
11903         }
11904         else
11905 #endif
11906         {
11907             /* t2 = y ^ 0x2 */
11908             sp_256_mont_sqr_5(t2, y, p256_mod, p256_mp_mod);
11909             /* t1 = y ^ 0x3 */
11910             sp_256_mont_mul_5(t1, t2, y, p256_mod, p256_mp_mod);
11911             /* t2 = y ^ 0xc */
11912             sp_256_mont_sqr_n_5(t2, t1, 2, p256_mod, p256_mp_mod);
11913             /* t1 = y ^ 0xf */
11914             sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
11915             /* t2 = y ^ 0xf0 */
11916             sp_256_mont_sqr_n_5(t2, t1, 4, p256_mod, p256_mp_mod);
11917             /* t1 = y ^ 0xff */
11918             sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
11919             /* t2 = y ^ 0xff00 */
11920             sp_256_mont_sqr_n_5(t2, t1, 8, p256_mod, p256_mp_mod);
11921             /* t1 = y ^ 0xffff */
11922             sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
11923             /* t2 = y ^ 0xffff0000 */
11924             sp_256_mont_sqr_n_5(t2, t1, 16, p256_mod, p256_mp_mod);
11925             /* t1 = y ^ 0xffffffff */
11926             sp_256_mont_mul_5(t1, t1, t2, p256_mod, p256_mp_mod);
11927             /* t1 = y ^ 0xffffffff00000000 */
11928             sp_256_mont_sqr_n_5(t1, t1, 32, p256_mod, p256_mp_mod);
11929             /* t1 = y ^ 0xffffffff00000001 */
11930             sp_256_mont_mul_5(t1, t1, y, p256_mod, p256_mp_mod);
11931             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
11932             sp_256_mont_sqr_n_5(t1, t1, 96, p256_mod, p256_mp_mod);
11933             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
11934             sp_256_mont_mul_5(t1, t1, y, p256_mod, p256_mp_mod);
11935             sp_256_mont_sqr_n_5(y, t1, 94, p256_mod, p256_mp_mod);
11936         }
11937     }
11938 
11939 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11940     if (d != NULL)
11941         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
11942 #endif
11943 
11944     return err;
11945 }
11946 
11947 /* Uncompress the point given the X ordinate.
11948  *
11949  * xm    X ordinate.
11950  * odd   Whether the Y ordinate is odd.
11951  * ym    Calculated Y ordinate.
11952  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
11953  */
11954 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
11955 {
11956 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11957     sp_digit* d;
11958 #else
11959     sp_digit xd[2 * 5];
11960     sp_digit yd[2 * 5];
11961 #endif
11962     sp_digit* x;
11963     sp_digit* y;
11964     int err = MP_OKAY;
11965 #ifdef HAVE_INTEL_AVX2
11966     word32 cpuid_flags = cpuid_get_flags();
11967 #endif
11968 
11969 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11970     d = XMALLOC(sizeof(sp_digit) * 4 * 5, NULL, DYNAMIC_TYPE_ECC);
11971     if (d != NULL) {
11972         x = d + 0 * 5;
11973         y = d + 2 * 5;
11974     }
11975     else
11976         err = MEMORY_E;
11977 #else
11978     x = xd;
11979     y = yd;
11980 #endif
11981 
11982     if (err == MP_OKAY) {
11983         sp_256_from_mp(x, 5, xm);
11984 
11985         err = sp_256_mod_mul_norm_5(x, x, p256_mod);
11986     }
11987 
11988     if (err == MP_OKAY) {
11989         /* y = x^3 */
11990 #ifdef HAVE_INTEL_AVX2
11991         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
11992             sp_256_mont_sqr_avx2_5(y, x, p256_mod, p256_mp_mod);
11993             sp_256_mont_mul_avx2_5(y, y, x, p256_mod, p256_mp_mod);
11994         }
11995         else
11996 #endif
11997         {
11998             sp_256_mont_sqr_5(y, x, p256_mod, p256_mp_mod);
11999             sp_256_mont_mul_5(y, y, x, p256_mod, p256_mp_mod);
12000         }
12001         /* y = x^3 - 3x */
12002         sp_256_mont_sub_5(y, y, x, p256_mod);
12003         sp_256_mont_sub_5(y, y, x, p256_mod);
12004         sp_256_mont_sub_5(y, y, x, p256_mod);
12005         /* y = x^3 - 3x + b */
12006         err = sp_256_mod_mul_norm_5(x, p256_b, p256_mod);
12007     }
12008     if (err == MP_OKAY) {
12009         sp_256_mont_add_5(y, y, x, p256_mod);
12010         /* y = sqrt(x^3 - 3x + b) */
12011         err = sp_256_mont_sqrt_5(y);
12012     }
12013     if (err == MP_OKAY) {
12014         XMEMSET(y + 5, 0, 5 * sizeof(sp_digit));
12015         sp_256_mont_reduce_5(y, p256_mod, p256_mp_mod);
12016         if (((y[0] ^ odd) & 1) != 0)
12017             sp_256_mont_sub_5(y, p256_mod, y, p256_mod);
12018 
12019         err = sp_256_to_mp(y, ym);
12020     }
12021 
12022 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12023     if (d != NULL)
12024         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
12025 #endif
12026 
12027     return err;
12028 }
12029 #endif
12030 #endif /* WOLFSSL_SP_NO_256 */
12031 #endif /* WOLFSSL_HAVE_SP_ECC */
12032 #endif /* SP_WORD_SIZE == 64 */
12033 #endif /* !WOLFSSL_SP_ASM */
12034 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
12035