ssh lib

Dependents:   OS

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers sp_c32.c Source File

sp_c32.c

00001 /* sp.c
00002  *
00003  * Copyright (C) 2006-2018 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 /* Implementation by Sean Parkinson. */
00023 
00024 #ifdef HAVE_CONFIG_H
00025     #include <config.h>
00026 #endif
00027 
00028 #include <wolfcrypt/settings.h>
00029 #include <wolfcrypt/error-crypt.h>
00030 #include <wolfcrypt/cpuid.h>
00031 #ifdef NO_INLINE
00032     #include <wolfcrypt/misc.h>
00033 #else
00034     #define WOLFSSL_MISC_INCLUDED
00035     #include <wolfcrypt/src/misc.c>
00036 #endif
00037 
00038 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
00039                                     defined(WOLFSSL_HAVE_SP_ECC)
00040 
00041 #ifdef RSA_LOW_MEM
00042 #define SP_RSA_PRIVATE_EXP_D
00043 
00044 #ifndef WOLFSSL_SP_SMALL
00045 #define WOLFSSL_SP_SMALL
00046 #endif
00047 #endif
00048 
00049 #include <wolfcrypt/sp.h>
00050 
00051 #ifndef WOLFSSL_SP_ASM
00052 #if SP_WORD_SIZE == 32
00053 #if defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)
00054 /* Mask for address to obfuscate which of the two address will be used. */
00055 static const size_t addr_mask[2] = { 0, (size_t)-1 };
00056 #endif
00057 
00058 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
00059 #ifndef WOLFSSL_SP_NO_2048
00060 /* Read big endian unsigned byte aray into r.
00061  *
00062  * r  A single precision integer.
00063  * a  Byte array.
00064  * n  Number of bytes in array to read.
00065  */
00066 static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
00067 {
00068     int i, j = 0, s = 0;
00069 
00070     r[0] = 0;
00071     for (i = n-1; i >= 0; i--) {
00072         r[j] |= ((sp_digit)a[i]) << s;
00073         if (s >= 15) {
00074             r[j] &= 0x7fffff;
00075             s = 23 - s;
00076             if (j + 1 >= max)
00077                 break;
00078             r[++j] = a[i] >> s;
00079             s = 8 - s;
00080         }
00081         else
00082             s += 8;
00083     }
00084 
00085     for (j++; j < max; j++)
00086         r[j] = 0;
00087 }
00088 
00089 /* Convert an mp_int to an array of sp_digit.
00090  *
00091  * r  A single precision integer.
00092  * a  A multi-precision integer.
00093  */
00094 static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
00095 {
00096 #if DIGIT_BIT == 23
00097     int j;
00098 
00099     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
00100 
00101     for (j = a->used; j < max; j++)
00102         r[j] = 0;
00103 #elif DIGIT_BIT > 23
00104     int i, j = 0, s = 0;
00105 
00106     r[0] = 0;
00107     for (i = 0; i < a->used && j < max; i++) {
00108         r[j] |= a->dp[i] << s;
00109         r[j] &= 0x7fffff;
00110         s = 23 - s;
00111         if (j + 1 >= max)
00112             break;
00113         r[++j] = a->dp[i] >> s;
00114         while (s + 23 <= DIGIT_BIT) {
00115             s += 23;
00116             r[j] &= 0x7fffff;
00117             if (j + 1 >= max)
00118                 break;
00119             if (s < DIGIT_BIT)
00120                 r[++j] = a->dp[i] >> s;
00121             else
00122                 r[++j] = 0;
00123         }
00124         s = DIGIT_BIT - s;
00125     }
00126 
00127     for (j++; j < max; j++)
00128         r[j] = 0;
00129 #else
00130     int i, j = 0, s = 0;
00131 
00132     r[0] = 0;
00133     for (i = 0; i < a->used && j < max; i++) {
00134         r[j] |= ((sp_digit)a->dp[i]) << s;
00135         if (s + DIGIT_BIT >= 23) {
00136             r[j] &= 0x7fffff;
00137             if (j + 1 >= max)
00138                 break;
00139             s = 23 - s;
00140             if (s == DIGIT_BIT) {
00141                 r[++j] = 0;
00142                 s = 0;
00143             }
00144             else {
00145                 r[++j] = a->dp[i] >> s;
00146                 s = DIGIT_BIT - s;
00147             }
00148         }
00149         else
00150             s += DIGIT_BIT;
00151     }
00152 
00153     for (j++; j < max; j++)
00154         r[j] = 0;
00155 #endif
00156 }
00157 
00158 /* Write r as big endian to byte aray.
00159  * Fixed length number of bytes written: 256
00160  *
00161  * r  A single precision integer.
00162  * a  Byte array.
00163  */
00164 static void sp_2048_to_bin(sp_digit* r, byte* a)
00165 {
00166     int i, j, s = 0, b;
00167 
00168     for (i=0; i<89; i++) {
00169         r[i+1] += r[i] >> 23;
00170         r[i] &= 0x7fffff;
00171     }
00172     j = 2048 / 8 - 1;
00173     a[j] = 0;
00174     for (i=0; i<90 && j>=0; i++) {
00175         b = 0;
00176         a[j--] |= r[i] << s; b += 8 - s;
00177         if (j < 0)
00178             break;
00179         while (b < 23) {
00180             a[j--] = r[i] >> b; b += 8;
00181             if (j < 0)
00182                 break;
00183         }
00184         s = 8 - (b - 23);
00185         if (j >= 0)
00186             a[j] = 0;
00187         if (s != 0)
00188             j++;
00189     }
00190 }
00191 
00192 #ifndef WOLFSSL_SP_SMALL
00193 /* Multiply a and b into r. (r = a * b)
00194  *
00195  * r  A single precision integer.
00196  * a  A single precision integer.
00197  * b  A single precision integer.
00198  */
00199 SP_NOINLINE static void sp_2048_mul_15(sp_digit* r, const sp_digit* a,
00200     const sp_digit* b)
00201 {
00202     int64_t t0   = ((int64_t)a[ 0]) * b[ 0];
00203     int64_t t1   = ((int64_t)a[ 0]) * b[ 1]
00204                  + ((int64_t)a[ 1]) * b[ 0];
00205     int64_t t2   = ((int64_t)a[ 0]) * b[ 2]
00206                  + ((int64_t)a[ 1]) * b[ 1]
00207                  + ((int64_t)a[ 2]) * b[ 0];
00208     int64_t t3   = ((int64_t)a[ 0]) * b[ 3]
00209                  + ((int64_t)a[ 1]) * b[ 2]
00210                  + ((int64_t)a[ 2]) * b[ 1]
00211                  + ((int64_t)a[ 3]) * b[ 0];
00212     int64_t t4   = ((int64_t)a[ 0]) * b[ 4]
00213                  + ((int64_t)a[ 1]) * b[ 3]
00214                  + ((int64_t)a[ 2]) * b[ 2]
00215                  + ((int64_t)a[ 3]) * b[ 1]
00216                  + ((int64_t)a[ 4]) * b[ 0];
00217     int64_t t5   = ((int64_t)a[ 0]) * b[ 5]
00218                  + ((int64_t)a[ 1]) * b[ 4]
00219                  + ((int64_t)a[ 2]) * b[ 3]
00220                  + ((int64_t)a[ 3]) * b[ 2]
00221                  + ((int64_t)a[ 4]) * b[ 1]
00222                  + ((int64_t)a[ 5]) * b[ 0];
00223     int64_t t6   = ((int64_t)a[ 0]) * b[ 6]
00224                  + ((int64_t)a[ 1]) * b[ 5]
00225                  + ((int64_t)a[ 2]) * b[ 4]
00226                  + ((int64_t)a[ 3]) * b[ 3]
00227                  + ((int64_t)a[ 4]) * b[ 2]
00228                  + ((int64_t)a[ 5]) * b[ 1]
00229                  + ((int64_t)a[ 6]) * b[ 0];
00230     int64_t t7   = ((int64_t)a[ 0]) * b[ 7]
00231                  + ((int64_t)a[ 1]) * b[ 6]
00232                  + ((int64_t)a[ 2]) * b[ 5]
00233                  + ((int64_t)a[ 3]) * b[ 4]
00234                  + ((int64_t)a[ 4]) * b[ 3]
00235                  + ((int64_t)a[ 5]) * b[ 2]
00236                  + ((int64_t)a[ 6]) * b[ 1]
00237                  + ((int64_t)a[ 7]) * b[ 0];
00238     int64_t t8   = ((int64_t)a[ 0]) * b[ 8]
00239                  + ((int64_t)a[ 1]) * b[ 7]
00240                  + ((int64_t)a[ 2]) * b[ 6]
00241                  + ((int64_t)a[ 3]) * b[ 5]
00242                  + ((int64_t)a[ 4]) * b[ 4]
00243                  + ((int64_t)a[ 5]) * b[ 3]
00244                  + ((int64_t)a[ 6]) * b[ 2]
00245                  + ((int64_t)a[ 7]) * b[ 1]
00246                  + ((int64_t)a[ 8]) * b[ 0];
00247     int64_t t9   = ((int64_t)a[ 0]) * b[ 9]
00248                  + ((int64_t)a[ 1]) * b[ 8]
00249                  + ((int64_t)a[ 2]) * b[ 7]
00250                  + ((int64_t)a[ 3]) * b[ 6]
00251                  + ((int64_t)a[ 4]) * b[ 5]
00252                  + ((int64_t)a[ 5]) * b[ 4]
00253                  + ((int64_t)a[ 6]) * b[ 3]
00254                  + ((int64_t)a[ 7]) * b[ 2]
00255                  + ((int64_t)a[ 8]) * b[ 1]
00256                  + ((int64_t)a[ 9]) * b[ 0];
00257     int64_t t10  = ((int64_t)a[ 0]) * b[10]
00258                  + ((int64_t)a[ 1]) * b[ 9]
00259                  + ((int64_t)a[ 2]) * b[ 8]
00260                  + ((int64_t)a[ 3]) * b[ 7]
00261                  + ((int64_t)a[ 4]) * b[ 6]
00262                  + ((int64_t)a[ 5]) * b[ 5]
00263                  + ((int64_t)a[ 6]) * b[ 4]
00264                  + ((int64_t)a[ 7]) * b[ 3]
00265                  + ((int64_t)a[ 8]) * b[ 2]
00266                  + ((int64_t)a[ 9]) * b[ 1]
00267                  + ((int64_t)a[10]) * b[ 0];
00268     int64_t t11  = ((int64_t)a[ 0]) * b[11]
00269                  + ((int64_t)a[ 1]) * b[10]
00270                  + ((int64_t)a[ 2]) * b[ 9]
00271                  + ((int64_t)a[ 3]) * b[ 8]
00272                  + ((int64_t)a[ 4]) * b[ 7]
00273                  + ((int64_t)a[ 5]) * b[ 6]
00274                  + ((int64_t)a[ 6]) * b[ 5]
00275                  + ((int64_t)a[ 7]) * b[ 4]
00276                  + ((int64_t)a[ 8]) * b[ 3]
00277                  + ((int64_t)a[ 9]) * b[ 2]
00278                  + ((int64_t)a[10]) * b[ 1]
00279                  + ((int64_t)a[11]) * b[ 0];
00280     int64_t t12  = ((int64_t)a[ 0]) * b[12]
00281                  + ((int64_t)a[ 1]) * b[11]
00282                  + ((int64_t)a[ 2]) * b[10]
00283                  + ((int64_t)a[ 3]) * b[ 9]
00284                  + ((int64_t)a[ 4]) * b[ 8]
00285                  + ((int64_t)a[ 5]) * b[ 7]
00286                  + ((int64_t)a[ 6]) * b[ 6]
00287                  + ((int64_t)a[ 7]) * b[ 5]
00288                  + ((int64_t)a[ 8]) * b[ 4]
00289                  + ((int64_t)a[ 9]) * b[ 3]
00290                  + ((int64_t)a[10]) * b[ 2]
00291                  + ((int64_t)a[11]) * b[ 1]
00292                  + ((int64_t)a[12]) * b[ 0];
00293     int64_t t13  = ((int64_t)a[ 0]) * b[13]
00294                  + ((int64_t)a[ 1]) * b[12]
00295                  + ((int64_t)a[ 2]) * b[11]
00296                  + ((int64_t)a[ 3]) * b[10]
00297                  + ((int64_t)a[ 4]) * b[ 9]
00298                  + ((int64_t)a[ 5]) * b[ 8]
00299                  + ((int64_t)a[ 6]) * b[ 7]
00300                  + ((int64_t)a[ 7]) * b[ 6]
00301                  + ((int64_t)a[ 8]) * b[ 5]
00302                  + ((int64_t)a[ 9]) * b[ 4]
00303                  + ((int64_t)a[10]) * b[ 3]
00304                  + ((int64_t)a[11]) * b[ 2]
00305                  + ((int64_t)a[12]) * b[ 1]
00306                  + ((int64_t)a[13]) * b[ 0];
00307     int64_t t14  = ((int64_t)a[ 0]) * b[14]
00308                  + ((int64_t)a[ 1]) * b[13]
00309                  + ((int64_t)a[ 2]) * b[12]
00310                  + ((int64_t)a[ 3]) * b[11]
00311                  + ((int64_t)a[ 4]) * b[10]
00312                  + ((int64_t)a[ 5]) * b[ 9]
00313                  + ((int64_t)a[ 6]) * b[ 8]
00314                  + ((int64_t)a[ 7]) * b[ 7]
00315                  + ((int64_t)a[ 8]) * b[ 6]
00316                  + ((int64_t)a[ 9]) * b[ 5]
00317                  + ((int64_t)a[10]) * b[ 4]
00318                  + ((int64_t)a[11]) * b[ 3]
00319                  + ((int64_t)a[12]) * b[ 2]
00320                  + ((int64_t)a[13]) * b[ 1]
00321                  + ((int64_t)a[14]) * b[ 0];
00322     int64_t t15  = ((int64_t)a[ 1]) * b[14]
00323                  + ((int64_t)a[ 2]) * b[13]
00324                  + ((int64_t)a[ 3]) * b[12]
00325                  + ((int64_t)a[ 4]) * b[11]
00326                  + ((int64_t)a[ 5]) * b[10]
00327                  + ((int64_t)a[ 6]) * b[ 9]
00328                  + ((int64_t)a[ 7]) * b[ 8]
00329                  + ((int64_t)a[ 8]) * b[ 7]
00330                  + ((int64_t)a[ 9]) * b[ 6]
00331                  + ((int64_t)a[10]) * b[ 5]
00332                  + ((int64_t)a[11]) * b[ 4]
00333                  + ((int64_t)a[12]) * b[ 3]
00334                  + ((int64_t)a[13]) * b[ 2]
00335                  + ((int64_t)a[14]) * b[ 1];
00336     int64_t t16  = ((int64_t)a[ 2]) * b[14]
00337                  + ((int64_t)a[ 3]) * b[13]
00338                  + ((int64_t)a[ 4]) * b[12]
00339                  + ((int64_t)a[ 5]) * b[11]
00340                  + ((int64_t)a[ 6]) * b[10]
00341                  + ((int64_t)a[ 7]) * b[ 9]
00342                  + ((int64_t)a[ 8]) * b[ 8]
00343                  + ((int64_t)a[ 9]) * b[ 7]
00344                  + ((int64_t)a[10]) * b[ 6]
00345                  + ((int64_t)a[11]) * b[ 5]
00346                  + ((int64_t)a[12]) * b[ 4]
00347                  + ((int64_t)a[13]) * b[ 3]
00348                  + ((int64_t)a[14]) * b[ 2];
00349     int64_t t17  = ((int64_t)a[ 3]) * b[14]
00350                  + ((int64_t)a[ 4]) * b[13]
00351                  + ((int64_t)a[ 5]) * b[12]
00352                  + ((int64_t)a[ 6]) * b[11]
00353                  + ((int64_t)a[ 7]) * b[10]
00354                  + ((int64_t)a[ 8]) * b[ 9]
00355                  + ((int64_t)a[ 9]) * b[ 8]
00356                  + ((int64_t)a[10]) * b[ 7]
00357                  + ((int64_t)a[11]) * b[ 6]
00358                  + ((int64_t)a[12]) * b[ 5]
00359                  + ((int64_t)a[13]) * b[ 4]
00360                  + ((int64_t)a[14]) * b[ 3];
00361     int64_t t18  = ((int64_t)a[ 4]) * b[14]
00362                  + ((int64_t)a[ 5]) * b[13]
00363                  + ((int64_t)a[ 6]) * b[12]
00364                  + ((int64_t)a[ 7]) * b[11]
00365                  + ((int64_t)a[ 8]) * b[10]
00366                  + ((int64_t)a[ 9]) * b[ 9]
00367                  + ((int64_t)a[10]) * b[ 8]
00368                  + ((int64_t)a[11]) * b[ 7]
00369                  + ((int64_t)a[12]) * b[ 6]
00370                  + ((int64_t)a[13]) * b[ 5]
00371                  + ((int64_t)a[14]) * b[ 4];
00372     int64_t t19  = ((int64_t)a[ 5]) * b[14]
00373                  + ((int64_t)a[ 6]) * b[13]
00374                  + ((int64_t)a[ 7]) * b[12]
00375                  + ((int64_t)a[ 8]) * b[11]
00376                  + ((int64_t)a[ 9]) * b[10]
00377                  + ((int64_t)a[10]) * b[ 9]
00378                  + ((int64_t)a[11]) * b[ 8]
00379                  + ((int64_t)a[12]) * b[ 7]
00380                  + ((int64_t)a[13]) * b[ 6]
00381                  + ((int64_t)a[14]) * b[ 5];
00382     int64_t t20  = ((int64_t)a[ 6]) * b[14]
00383                  + ((int64_t)a[ 7]) * b[13]
00384                  + ((int64_t)a[ 8]) * b[12]
00385                  + ((int64_t)a[ 9]) * b[11]
00386                  + ((int64_t)a[10]) * b[10]
00387                  + ((int64_t)a[11]) * b[ 9]
00388                  + ((int64_t)a[12]) * b[ 8]
00389                  + ((int64_t)a[13]) * b[ 7]
00390                  + ((int64_t)a[14]) * b[ 6];
00391     int64_t t21  = ((int64_t)a[ 7]) * b[14]
00392                  + ((int64_t)a[ 8]) * b[13]
00393                  + ((int64_t)a[ 9]) * b[12]
00394                  + ((int64_t)a[10]) * b[11]
00395                  + ((int64_t)a[11]) * b[10]
00396                  + ((int64_t)a[12]) * b[ 9]
00397                  + ((int64_t)a[13]) * b[ 8]
00398                  + ((int64_t)a[14]) * b[ 7];
00399     int64_t t22  = ((int64_t)a[ 8]) * b[14]
00400                  + ((int64_t)a[ 9]) * b[13]
00401                  + ((int64_t)a[10]) * b[12]
00402                  + ((int64_t)a[11]) * b[11]
00403                  + ((int64_t)a[12]) * b[10]
00404                  + ((int64_t)a[13]) * b[ 9]
00405                  + ((int64_t)a[14]) * b[ 8];
00406     int64_t t23  = ((int64_t)a[ 9]) * b[14]
00407                  + ((int64_t)a[10]) * b[13]
00408                  + ((int64_t)a[11]) * b[12]
00409                  + ((int64_t)a[12]) * b[11]
00410                  + ((int64_t)a[13]) * b[10]
00411                  + ((int64_t)a[14]) * b[ 9];
00412     int64_t t24  = ((int64_t)a[10]) * b[14]
00413                  + ((int64_t)a[11]) * b[13]
00414                  + ((int64_t)a[12]) * b[12]
00415                  + ((int64_t)a[13]) * b[11]
00416                  + ((int64_t)a[14]) * b[10];
00417     int64_t t25  = ((int64_t)a[11]) * b[14]
00418                  + ((int64_t)a[12]) * b[13]
00419                  + ((int64_t)a[13]) * b[12]
00420                  + ((int64_t)a[14]) * b[11];
00421     int64_t t26  = ((int64_t)a[12]) * b[14]
00422                  + ((int64_t)a[13]) * b[13]
00423                  + ((int64_t)a[14]) * b[12];
00424     int64_t t27  = ((int64_t)a[13]) * b[14]
00425                  + ((int64_t)a[14]) * b[13];
00426     int64_t t28  = ((int64_t)a[14]) * b[14];
00427 
00428     t1   += t0  >> 23; r[ 0] = t0  & 0x7fffff;
00429     t2   += t1  >> 23; r[ 1] = t1  & 0x7fffff;
00430     t3   += t2  >> 23; r[ 2] = t2  & 0x7fffff;
00431     t4   += t3  >> 23; r[ 3] = t3  & 0x7fffff;
00432     t5   += t4  >> 23; r[ 4] = t4  & 0x7fffff;
00433     t6   += t5  >> 23; r[ 5] = t5  & 0x7fffff;
00434     t7   += t6  >> 23; r[ 6] = t6  & 0x7fffff;
00435     t8   += t7  >> 23; r[ 7] = t7  & 0x7fffff;
00436     t9   += t8  >> 23; r[ 8] = t8  & 0x7fffff;
00437     t10  += t9  >> 23; r[ 9] = t9  & 0x7fffff;
00438     t11  += t10 >> 23; r[10] = t10 & 0x7fffff;
00439     t12  += t11 >> 23; r[11] = t11 & 0x7fffff;
00440     t13  += t12 >> 23; r[12] = t12 & 0x7fffff;
00441     t14  += t13 >> 23; r[13] = t13 & 0x7fffff;
00442     t15  += t14 >> 23; r[14] = t14 & 0x7fffff;
00443     t16  += t15 >> 23; r[15] = t15 & 0x7fffff;
00444     t17  += t16 >> 23; r[16] = t16 & 0x7fffff;
00445     t18  += t17 >> 23; r[17] = t17 & 0x7fffff;
00446     t19  += t18 >> 23; r[18] = t18 & 0x7fffff;
00447     t20  += t19 >> 23; r[19] = t19 & 0x7fffff;
00448     t21  += t20 >> 23; r[20] = t20 & 0x7fffff;
00449     t22  += t21 >> 23; r[21] = t21 & 0x7fffff;
00450     t23  += t22 >> 23; r[22] = t22 & 0x7fffff;
00451     t24  += t23 >> 23; r[23] = t23 & 0x7fffff;
00452     t25  += t24 >> 23; r[24] = t24 & 0x7fffff;
00453     t26  += t25 >> 23; r[25] = t25 & 0x7fffff;
00454     t27  += t26 >> 23; r[26] = t26 & 0x7fffff;
00455     t28  += t27 >> 23; r[27] = t27 & 0x7fffff;
00456     r[29] = (sp_digit)(t28 >> 23);
00457                        r[28] = t28 & 0x7fffff;
00458 }
00459 
00460 /* Square a and put result in r. (r = a * a)
00461  *
00462  * r  A single precision integer.
00463  * a  A single precision integer.
00464  */
00465 SP_NOINLINE static void sp_2048_sqr_15(sp_digit* r, const sp_digit* a)
00466 {
00467     int64_t t0   =  ((int64_t)a[ 0]) * a[ 0];
00468     int64_t t1   = (((int64_t)a[ 0]) * a[ 1]) * 2;
00469     int64_t t2   = (((int64_t)a[ 0]) * a[ 2]) * 2
00470                  +  ((int64_t)a[ 1]) * a[ 1];
00471     int64_t t3   = (((int64_t)a[ 0]) * a[ 3]
00472                  +  ((int64_t)a[ 1]) * a[ 2]) * 2;
00473     int64_t t4   = (((int64_t)a[ 0]) * a[ 4]
00474                  +  ((int64_t)a[ 1]) * a[ 3]) * 2
00475                  +  ((int64_t)a[ 2]) * a[ 2];
00476     int64_t t5   = (((int64_t)a[ 0]) * a[ 5]
00477                  +  ((int64_t)a[ 1]) * a[ 4]
00478                  +  ((int64_t)a[ 2]) * a[ 3]) * 2;
00479     int64_t t6   = (((int64_t)a[ 0]) * a[ 6]
00480                  +  ((int64_t)a[ 1]) * a[ 5]
00481                  +  ((int64_t)a[ 2]) * a[ 4]) * 2
00482                  +  ((int64_t)a[ 3]) * a[ 3];
00483     int64_t t7   = (((int64_t)a[ 0]) * a[ 7]
00484                  +  ((int64_t)a[ 1]) * a[ 6]
00485                  +  ((int64_t)a[ 2]) * a[ 5]
00486                  +  ((int64_t)a[ 3]) * a[ 4]) * 2;
00487     int64_t t8   = (((int64_t)a[ 0]) * a[ 8]
00488                  +  ((int64_t)a[ 1]) * a[ 7]
00489                  +  ((int64_t)a[ 2]) * a[ 6]
00490                  +  ((int64_t)a[ 3]) * a[ 5]) * 2
00491                  +  ((int64_t)a[ 4]) * a[ 4];
00492     int64_t t9   = (((int64_t)a[ 0]) * a[ 9]
00493                  +  ((int64_t)a[ 1]) * a[ 8]
00494                  +  ((int64_t)a[ 2]) * a[ 7]
00495                  +  ((int64_t)a[ 3]) * a[ 6]
00496                  +  ((int64_t)a[ 4]) * a[ 5]) * 2;
00497     int64_t t10  = (((int64_t)a[ 0]) * a[10]
00498                  +  ((int64_t)a[ 1]) * a[ 9]
00499                  +  ((int64_t)a[ 2]) * a[ 8]
00500                  +  ((int64_t)a[ 3]) * a[ 7]
00501                  +  ((int64_t)a[ 4]) * a[ 6]) * 2
00502                  +  ((int64_t)a[ 5]) * a[ 5];
00503     int64_t t11  = (((int64_t)a[ 0]) * a[11]
00504                  +  ((int64_t)a[ 1]) * a[10]
00505                  +  ((int64_t)a[ 2]) * a[ 9]
00506                  +  ((int64_t)a[ 3]) * a[ 8]
00507                  +  ((int64_t)a[ 4]) * a[ 7]
00508                  +  ((int64_t)a[ 5]) * a[ 6]) * 2;
00509     int64_t t12  = (((int64_t)a[ 0]) * a[12]
00510                  +  ((int64_t)a[ 1]) * a[11]
00511                  +  ((int64_t)a[ 2]) * a[10]
00512                  +  ((int64_t)a[ 3]) * a[ 9]
00513                  +  ((int64_t)a[ 4]) * a[ 8]
00514                  +  ((int64_t)a[ 5]) * a[ 7]) * 2
00515                  +  ((int64_t)a[ 6]) * a[ 6];
00516     int64_t t13  = (((int64_t)a[ 0]) * a[13]
00517                  +  ((int64_t)a[ 1]) * a[12]
00518                  +  ((int64_t)a[ 2]) * a[11]
00519                  +  ((int64_t)a[ 3]) * a[10]
00520                  +  ((int64_t)a[ 4]) * a[ 9]
00521                  +  ((int64_t)a[ 5]) * a[ 8]
00522                  +  ((int64_t)a[ 6]) * a[ 7]) * 2;
00523     int64_t t14  = (((int64_t)a[ 0]) * a[14]
00524                  +  ((int64_t)a[ 1]) * a[13]
00525                  +  ((int64_t)a[ 2]) * a[12]
00526                  +  ((int64_t)a[ 3]) * a[11]
00527                  +  ((int64_t)a[ 4]) * a[10]
00528                  +  ((int64_t)a[ 5]) * a[ 9]
00529                  +  ((int64_t)a[ 6]) * a[ 8]) * 2
00530                  +  ((int64_t)a[ 7]) * a[ 7];
00531     int64_t t15  = (((int64_t)a[ 1]) * a[14]
00532                  +  ((int64_t)a[ 2]) * a[13]
00533                  +  ((int64_t)a[ 3]) * a[12]
00534                  +  ((int64_t)a[ 4]) * a[11]
00535                  +  ((int64_t)a[ 5]) * a[10]
00536                  +  ((int64_t)a[ 6]) * a[ 9]
00537                  +  ((int64_t)a[ 7]) * a[ 8]) * 2;
00538     int64_t t16  = (((int64_t)a[ 2]) * a[14]
00539                  +  ((int64_t)a[ 3]) * a[13]
00540                  +  ((int64_t)a[ 4]) * a[12]
00541                  +  ((int64_t)a[ 5]) * a[11]
00542                  +  ((int64_t)a[ 6]) * a[10]
00543                  +  ((int64_t)a[ 7]) * a[ 9]) * 2
00544                  +  ((int64_t)a[ 8]) * a[ 8];
00545     int64_t t17  = (((int64_t)a[ 3]) * a[14]
00546                  +  ((int64_t)a[ 4]) * a[13]
00547                  +  ((int64_t)a[ 5]) * a[12]
00548                  +  ((int64_t)a[ 6]) * a[11]
00549                  +  ((int64_t)a[ 7]) * a[10]
00550                  +  ((int64_t)a[ 8]) * a[ 9]) * 2;
00551     int64_t t18  = (((int64_t)a[ 4]) * a[14]
00552                  +  ((int64_t)a[ 5]) * a[13]
00553                  +  ((int64_t)a[ 6]) * a[12]
00554                  +  ((int64_t)a[ 7]) * a[11]
00555                  +  ((int64_t)a[ 8]) * a[10]) * 2
00556                  +  ((int64_t)a[ 9]) * a[ 9];
00557     int64_t t19  = (((int64_t)a[ 5]) * a[14]
00558                  +  ((int64_t)a[ 6]) * a[13]
00559                  +  ((int64_t)a[ 7]) * a[12]
00560                  +  ((int64_t)a[ 8]) * a[11]
00561                  +  ((int64_t)a[ 9]) * a[10]) * 2;
00562     int64_t t20  = (((int64_t)a[ 6]) * a[14]
00563                  +  ((int64_t)a[ 7]) * a[13]
00564                  +  ((int64_t)a[ 8]) * a[12]
00565                  +  ((int64_t)a[ 9]) * a[11]) * 2
00566                  +  ((int64_t)a[10]) * a[10];
00567     int64_t t21  = (((int64_t)a[ 7]) * a[14]
00568                  +  ((int64_t)a[ 8]) * a[13]
00569                  +  ((int64_t)a[ 9]) * a[12]
00570                  +  ((int64_t)a[10]) * a[11]) * 2;
00571     int64_t t22  = (((int64_t)a[ 8]) * a[14]
00572                  +  ((int64_t)a[ 9]) * a[13]
00573                  +  ((int64_t)a[10]) * a[12]) * 2
00574                  +  ((int64_t)a[11]) * a[11];
00575     int64_t t23  = (((int64_t)a[ 9]) * a[14]
00576                  +  ((int64_t)a[10]) * a[13]
00577                  +  ((int64_t)a[11]) * a[12]) * 2;
00578     int64_t t24  = (((int64_t)a[10]) * a[14]
00579                  +  ((int64_t)a[11]) * a[13]) * 2
00580                  +  ((int64_t)a[12]) * a[12];
00581     int64_t t25  = (((int64_t)a[11]) * a[14]
00582                  +  ((int64_t)a[12]) * a[13]) * 2;
00583     int64_t t26  = (((int64_t)a[12]) * a[14]) * 2
00584                  +  ((int64_t)a[13]) * a[13];
00585     int64_t t27  = (((int64_t)a[13]) * a[14]) * 2;
00586     int64_t t28  =  ((int64_t)a[14]) * a[14];
00587 
00588     t1   += t0  >> 23; r[ 0] = t0  & 0x7fffff;
00589     t2   += t1  >> 23; r[ 1] = t1  & 0x7fffff;
00590     t3   += t2  >> 23; r[ 2] = t2  & 0x7fffff;
00591     t4   += t3  >> 23; r[ 3] = t3  & 0x7fffff;
00592     t5   += t4  >> 23; r[ 4] = t4  & 0x7fffff;
00593     t6   += t5  >> 23; r[ 5] = t5  & 0x7fffff;
00594     t7   += t6  >> 23; r[ 6] = t6  & 0x7fffff;
00595     t8   += t7  >> 23; r[ 7] = t7  & 0x7fffff;
00596     t9   += t8  >> 23; r[ 8] = t8  & 0x7fffff;
00597     t10  += t9  >> 23; r[ 9] = t9  & 0x7fffff;
00598     t11  += t10 >> 23; r[10] = t10 & 0x7fffff;
00599     t12  += t11 >> 23; r[11] = t11 & 0x7fffff;
00600     t13  += t12 >> 23; r[12] = t12 & 0x7fffff;
00601     t14  += t13 >> 23; r[13] = t13 & 0x7fffff;
00602     t15  += t14 >> 23; r[14] = t14 & 0x7fffff;
00603     t16  += t15 >> 23; r[15] = t15 & 0x7fffff;
00604     t17  += t16 >> 23; r[16] = t16 & 0x7fffff;
00605     t18  += t17 >> 23; r[17] = t17 & 0x7fffff;
00606     t19  += t18 >> 23; r[18] = t18 & 0x7fffff;
00607     t20  += t19 >> 23; r[19] = t19 & 0x7fffff;
00608     t21  += t20 >> 23; r[20] = t20 & 0x7fffff;
00609     t22  += t21 >> 23; r[21] = t21 & 0x7fffff;
00610     t23  += t22 >> 23; r[22] = t22 & 0x7fffff;
00611     t24  += t23 >> 23; r[23] = t23 & 0x7fffff;
00612     t25  += t24 >> 23; r[24] = t24 & 0x7fffff;
00613     t26  += t25 >> 23; r[25] = t25 & 0x7fffff;
00614     t27  += t26 >> 23; r[26] = t26 & 0x7fffff;
00615     t28  += t27 >> 23; r[27] = t27 & 0x7fffff;
00616     r[29] = (sp_digit)(t28 >> 23);
00617                        r[28] = t28 & 0x7fffff;
00618 }
00619 
00620 /* Add b to a into r. (r = a + b)
00621  *
00622  * r  A single precision integer.
00623  * a  A single precision integer.
00624  * b  A single precision integer.
00625  */
00626 SP_NOINLINE static int sp_2048_add_15(sp_digit* r, const sp_digit* a,
00627         const sp_digit* b)
00628 {
00629     r[ 0] = a[ 0] + b[ 0];
00630     r[ 1] = a[ 1] + b[ 1];
00631     r[ 2] = a[ 2] + b[ 2];
00632     r[ 3] = a[ 3] + b[ 3];
00633     r[ 4] = a[ 4] + b[ 4];
00634     r[ 5] = a[ 5] + b[ 5];
00635     r[ 6] = a[ 6] + b[ 6];
00636     r[ 7] = a[ 7] + b[ 7];
00637     r[ 8] = a[ 8] + b[ 8];
00638     r[ 9] = a[ 9] + b[ 9];
00639     r[10] = a[10] + b[10];
00640     r[11] = a[11] + b[11];
00641     r[12] = a[12] + b[12];
00642     r[13] = a[13] + b[13];
00643     r[14] = a[14] + b[14];
00644 
00645     return 0;
00646 }
00647 
00648 /* Sub b from a into r. (r = a - b)
00649  *
00650  * r  A single precision integer.
00651  * a  A single precision integer.
00652  * b  A single precision integer.
00653  */
00654 SP_NOINLINE static int sp_2048_sub_30(sp_digit* r, const sp_digit* a,
00655         const sp_digit* b)
00656 {
00657     int i;
00658 
00659     for (i = 0; i < 24; i += 8) {
00660         r[i + 0] = a[i + 0] - b[i + 0];
00661         r[i + 1] = a[i + 1] - b[i + 1];
00662         r[i + 2] = a[i + 2] - b[i + 2];
00663         r[i + 3] = a[i + 3] - b[i + 3];
00664         r[i + 4] = a[i + 4] - b[i + 4];
00665         r[i + 5] = a[i + 5] - b[i + 5];
00666         r[i + 6] = a[i + 6] - b[i + 6];
00667         r[i + 7] = a[i + 7] - b[i + 7];
00668     }
00669     r[24] = a[24] - b[24];
00670     r[25] = a[25] - b[25];
00671     r[26] = a[26] - b[26];
00672     r[27] = a[27] - b[27];
00673     r[28] = a[28] - b[28];
00674     r[29] = a[29] - b[29];
00675 
00676     return 0;
00677 }
00678 
00679 /* Add b to a into r. (r = a + b)
00680  *
00681  * r  A single precision integer.
00682  * a  A single precision integer.
00683  * b  A single precision integer.
00684  */
00685 SP_NOINLINE static int sp_2048_add_30(sp_digit* r, const sp_digit* a,
00686         const sp_digit* b)
00687 {
00688     int i;
00689 
00690     for (i = 0; i < 24; i += 8) {
00691         r[i + 0] = a[i + 0] + b[i + 0];
00692         r[i + 1] = a[i + 1] + b[i + 1];
00693         r[i + 2] = a[i + 2] + b[i + 2];
00694         r[i + 3] = a[i + 3] + b[i + 3];
00695         r[i + 4] = a[i + 4] + b[i + 4];
00696         r[i + 5] = a[i + 5] + b[i + 5];
00697         r[i + 6] = a[i + 6] + b[i + 6];
00698         r[i + 7] = a[i + 7] + b[i + 7];
00699     }
00700     r[24] = a[24] + b[24];
00701     r[25] = a[25] + b[25];
00702     r[26] = a[26] + b[26];
00703     r[27] = a[27] + b[27];
00704     r[28] = a[28] + b[28];
00705     r[29] = a[29] + b[29];
00706 
00707     return 0;
00708 }
00709 
00710 /* Multiply a and b into r. (r = a * b)
00711  *
00712  * r  A single precision integer.
00713  * a  A single precision integer.
00714  * b  A single precision integer.
00715  */
00716 SP_NOINLINE static void sp_2048_mul_45(sp_digit* r, const sp_digit* a,
00717     const sp_digit* b)
00718 {
00719     sp_digit p0[30];
00720     sp_digit p1[30];
00721     sp_digit p2[30];
00722     sp_digit p3[30];
00723     sp_digit p4[30];
00724     sp_digit p5[30];
00725     sp_digit t0[30];
00726     sp_digit t1[30];
00727     sp_digit t2[30];
00728     sp_digit a0[15];
00729     sp_digit a1[15];
00730     sp_digit a2[15];
00731     sp_digit b0[15];
00732     sp_digit b1[15];
00733     sp_digit b2[15];
00734     sp_2048_add_15(a0, a, &a[15]);
00735     sp_2048_add_15(b0, b, &b[15]);
00736     sp_2048_add_15(a1, &a[15], &a[30]);
00737     sp_2048_add_15(b1, &b[15], &b[30]);
00738     sp_2048_add_15(a2, a0, &a[30]);
00739     sp_2048_add_15(b2, b0, &b[30]);
00740     sp_2048_mul_15(p0, a, b);
00741     sp_2048_mul_15(p2, &a[15], &b[15]);
00742     sp_2048_mul_15(p4, &a[30], &b[30]);
00743     sp_2048_mul_15(p1, a0, b0);
00744     sp_2048_mul_15(p3, a1, b1);
00745     sp_2048_mul_15(p5, a2, b2);
00746     XMEMSET(r, 0, sizeof(*r)*2*45);
00747     sp_2048_sub_30(t0, p3, p2);
00748     sp_2048_sub_30(t1, p1, p2);
00749     sp_2048_sub_30(t2, p5, t0);
00750     sp_2048_sub_30(t2, t2, t1);
00751     sp_2048_sub_30(t0, t0, p4);
00752     sp_2048_sub_30(t1, t1, p0);
00753     sp_2048_add_30(r, r, p0);
00754     sp_2048_add_30(&r[15], &r[15], t1);
00755     sp_2048_add_30(&r[30], &r[30], t2);
00756     sp_2048_add_30(&r[45], &r[45], t0);
00757     sp_2048_add_30(&r[60], &r[60], p4);
00758 }
00759 
00760 /* Square a into r. (r = a * a)
00761  *
00762  * r  A single precision integer.
00763  * a  A single precision integer.
00764  */
00765 SP_NOINLINE static void sp_2048_sqr_45(sp_digit* r, const sp_digit* a)
00766 {
00767     sp_digit p0[30];
00768     sp_digit p1[30];
00769     sp_digit p2[30];
00770     sp_digit p3[30];
00771     sp_digit p4[30];
00772     sp_digit p5[30];
00773     sp_digit t0[30];
00774     sp_digit t1[30];
00775     sp_digit t2[30];
00776     sp_digit a0[15];
00777     sp_digit a1[15];
00778     sp_digit a2[15];
00779     sp_2048_add_15(a0, a, &a[15]);
00780     sp_2048_add_15(a1, &a[15], &a[30]);
00781     sp_2048_add_15(a2, a0, &a[30]);
00782     sp_2048_sqr_15(p0, a);
00783     sp_2048_sqr_15(p2, &a[15]);
00784     sp_2048_sqr_15(p4, &a[30]);
00785     sp_2048_sqr_15(p1, a0);
00786     sp_2048_sqr_15(p3, a1);
00787     sp_2048_sqr_15(p5, a2);
00788     XMEMSET(r, 0, sizeof(*r)*2*45);
00789     sp_2048_sub_30(t0, p3, p2);
00790     sp_2048_sub_30(t1, p1, p2);
00791     sp_2048_sub_30(t2, p5, t0);
00792     sp_2048_sub_30(t2, t2, t1);
00793     sp_2048_sub_30(t0, t0, p4);
00794     sp_2048_sub_30(t1, t1, p0);
00795     sp_2048_add_30(r, r, p0);
00796     sp_2048_add_30(&r[15], &r[15], t1);
00797     sp_2048_add_30(&r[30], &r[30], t2);
00798     sp_2048_add_30(&r[45], &r[45], t0);
00799     sp_2048_add_30(&r[60], &r[60], p4);
00800 }
00801 
00802 /* Add b to a into r. (r = a + b)
00803  *
00804  * r  A single precision integer.
00805  * a  A single precision integer.
00806  * b  A single precision integer.
00807  */
00808 SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a,
00809         const sp_digit* b)
00810 {
00811     int i;
00812 
00813     for (i = 0; i < 40; i += 8) {
00814         r[i + 0] = a[i + 0] + b[i + 0];
00815         r[i + 1] = a[i + 1] + b[i + 1];
00816         r[i + 2] = a[i + 2] + b[i + 2];
00817         r[i + 3] = a[i + 3] + b[i + 3];
00818         r[i + 4] = a[i + 4] + b[i + 4];
00819         r[i + 5] = a[i + 5] + b[i + 5];
00820         r[i + 6] = a[i + 6] + b[i + 6];
00821         r[i + 7] = a[i + 7] + b[i + 7];
00822     }
00823     r[40] = a[40] + b[40];
00824     r[41] = a[41] + b[41];
00825     r[42] = a[42] + b[42];
00826     r[43] = a[43] + b[43];
00827     r[44] = a[44] + b[44];
00828 
00829     return 0;
00830 }
00831 
00832 /* Add b to a into r. (r = a + b)
00833  *
00834  * r  A single precision integer.
00835  * a  A single precision integer.
00836  * b  A single precision integer.
00837  */
00838 SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a,
00839         const sp_digit* b)
00840 {
00841     int i;
00842 
00843     for (i = 0; i < 88; i += 8) {
00844         r[i + 0] = a[i + 0] + b[i + 0];
00845         r[i + 1] = a[i + 1] + b[i + 1];
00846         r[i + 2] = a[i + 2] + b[i + 2];
00847         r[i + 3] = a[i + 3] + b[i + 3];
00848         r[i + 4] = a[i + 4] + b[i + 4];
00849         r[i + 5] = a[i + 5] + b[i + 5];
00850         r[i + 6] = a[i + 6] + b[i + 6];
00851         r[i + 7] = a[i + 7] + b[i + 7];
00852     }
00853     r[88] = a[88] + b[88];
00854     r[89] = a[89] + b[89];
00855 
00856     return 0;
00857 }
00858 
00859 /* Sub b from a into r. (r = a - b)
00860  *
00861  * r  A single precision integer.
00862  * a  A single precision integer.
00863  * b  A single precision integer.
00864  */
00865 SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a,
00866         const sp_digit* b)
00867 {
00868     int i;
00869 
00870     for (i = 0; i < 88; i += 8) {
00871         r[i + 0] = a[i + 0] - b[i + 0];
00872         r[i + 1] = a[i + 1] - b[i + 1];
00873         r[i + 2] = a[i + 2] - b[i + 2];
00874         r[i + 3] = a[i + 3] - b[i + 3];
00875         r[i + 4] = a[i + 4] - b[i + 4];
00876         r[i + 5] = a[i + 5] - b[i + 5];
00877         r[i + 6] = a[i + 6] - b[i + 6];
00878         r[i + 7] = a[i + 7] - b[i + 7];
00879     }
00880     r[88] = a[88] - b[88];
00881     r[89] = a[89] - b[89];
00882 
00883     return 0;
00884 }
00885 
00886 /* Multiply a and b into r. (r = a * b)
00887  *
00888  * r  A single precision integer.
00889  * a  A single precision integer.
00890  * b  A single precision integer.
00891  */
00892 SP_NOINLINE static void sp_2048_mul_90(sp_digit* r, const sp_digit* a,
00893     const sp_digit* b)
00894 {
00895     sp_digit* z0 = r;
00896     sp_digit z1[90];
00897     sp_digit* a1 = z1;
00898     sp_digit b1[45];
00899     sp_digit* z2 = r + 90;
00900     sp_2048_add_45(a1, a, &a[45]);
00901     sp_2048_add_45(b1, b, &b[45]);
00902     sp_2048_mul_45(z2, &a[45], &b[45]);
00903     sp_2048_mul_45(z0, a, b);
00904     sp_2048_mul_45(z1, a1, b1);
00905     sp_2048_sub_90(z1, z1, z2);
00906     sp_2048_sub_90(z1, z1, z0);
00907     sp_2048_add_90(r + 45, r + 45, z1);
00908 }
00909 
00910 /* Square a and put result in r. (r = a * a)
00911  *
00912  * r  A single precision integer.
00913  * a  A single precision integer.
00914  */
00915 SP_NOINLINE static void sp_2048_sqr_90(sp_digit* r, const sp_digit* a)
00916 {
00917     sp_digit* z0 = r;
00918     sp_digit z1[90];
00919     sp_digit* a1 = z1;
00920     sp_digit* z2 = r + 90;
00921     sp_2048_add_45(a1, a, &a[45]);
00922     sp_2048_sqr_45(z2, &a[45]);
00923     sp_2048_sqr_45(z0, a);
00924     sp_2048_sqr_45(z1, a1);
00925     sp_2048_sub_90(z1, z1, z2);
00926     sp_2048_sub_90(z1, z1, z0);
00927     sp_2048_add_90(r + 45, r + 45, z1);
00928 }
00929 
00930 #endif /* WOLFSSL_SP_SMALL */
00931 #ifdef WOLFSSL_SP_SMALL
00932 /* Add b to a into r. (r = a + b)
00933  *
00934  * r  A single precision integer.
00935  * a  A single precision integer.
00936  * b  A single precision integer.
00937  */
00938 SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a,
00939         const sp_digit* b)
00940 {
00941     int i;
00942 
00943     for (i = 0; i < 90; i++)
00944         r[i] = a[i] + b[i];
00945 
00946     return 0;
00947 }
00948 #endif /* WOLFSSL_SP_SMALL */
00949 #ifdef WOLFSSL_SP_SMALL
00950 /* Sub b from a into r. (r = a - b)
00951  *
00952  * r  A single precision integer.
00953  * a  A single precision integer.
00954  * b  A single precision integer.
00955  */
00956 SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a,
00957         const sp_digit* b)
00958 {
00959     int i;
00960 
00961     for (i = 0; i < 90; i++)
00962         r[i] = a[i] - b[i];
00963 
00964     return 0;
00965 }
00966 
00967 #endif /* WOLFSSL_SP_SMALL */
00968 #ifdef WOLFSSL_SP_SMALL
00969 /* Multiply a and b into r. (r = a * b)
00970  *
00971  * r  A single precision integer.
00972  * a  A single precision integer.
00973  * b  A single precision integer.
00974  */
00975 SP_NOINLINE static void sp_2048_mul_90(sp_digit* r, const sp_digit* a,
00976     const sp_digit* b)
00977 {
00978     int i, j, k;
00979     int64_t c;
00980 
00981     c = ((int64_t)a[89]) * b[89];
00982     r[179] = (sp_digit)(c >> 23);
00983     c = (c & 0x7fffff) << 23;
00984     for (k = 177; k >= 0; k--) {
00985         for (i = 89; i >= 0; i--) {
00986             j = k - i;
00987             if (j >= 90)
00988                 break;
00989             if (j < 0)
00990                 continue;
00991 
00992             c += ((int64_t)a[i]) * b[j];
00993         }
00994         r[k + 2] += c >> 46;
00995         r[k + 1] = (c >> 23) & 0x7fffff;
00996         c = (c & 0x7fffff) << 23;
00997     }
00998     r[0] = (sp_digit)(c >> 23);
00999 }
01000 
01001 /* Square a and put result in r. (r = a * a)
01002  *
01003  * r  A single precision integer.
01004  * a  A single precision integer.
01005  */
01006 SP_NOINLINE static void sp_2048_sqr_90(sp_digit* r, const sp_digit* a)
01007 {
01008     int i, j, k;
01009     int64_t c;
01010 
01011     c = ((int64_t)a[89]) * a[89];
01012     r[179] = (sp_digit)(c >> 23);
01013     c = (c & 0x7fffff) << 23;
01014     for (k = 177; k >= 0; k--) {
01015         for (i = 89; i >= 0; i--) {
01016             j = k - i;
01017             if (j >= 90 || i <= j)
01018                 break;
01019             if (j < 0)
01020                 continue;
01021 
01022             c += ((int64_t)a[i]) * a[j] * 2;
01023         }
01024         if (i == j)
01025            c += ((int64_t)a[i]) * a[i];
01026 
01027         r[k + 2] += c >> 46;
01028         r[k + 1] = (c >> 23) & 0x7fffff;
01029         c = (c & 0x7fffff) << 23;
01030     }
01031     r[0] = (sp_digit)(c >> 23);
01032 }
01033 
01034 #endif /* WOLFSSL_SP_SMALL */
01035 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
01036 #ifdef WOLFSSL_SP_SMALL
01037 /* Add b to a into r. (r = a + b)
01038  *
01039  * r  A single precision integer.
01040  * a  A single precision integer.
01041  * b  A single precision integer.
01042  */
01043 SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a,
01044         const sp_digit* b)
01045 {
01046     int i;
01047 
01048     for (i = 0; i < 45; i++)
01049         r[i] = a[i] + b[i];
01050 
01051     return 0;
01052 }
01053 #endif /* WOLFSSL_SP_SMALL */
01054 #ifdef WOLFSSL_SP_SMALL
01055 /* Sub b from a into r. (r = a - b)
01056  *
01057  * r  A single precision integer.
01058  * a  A single precision integer.
01059  * b  A single precision integer.
01060  */
01061 SP_NOINLINE static int sp_2048_sub_45(sp_digit* r, const sp_digit* a,
01062         const sp_digit* b)
01063 {
01064     int i;
01065 
01066     for (i = 0; i < 45; i++)
01067         r[i] = a[i] - b[i];
01068 
01069     return 0;
01070 }
01071 
01072 #else
01073 /* Sub b from a into r. (r = a - b)
01074  *
01075  * r  A single precision integer.
01076  * a  A single precision integer.
01077  * b  A single precision integer.
01078  */
01079 SP_NOINLINE static int sp_2048_sub_45(sp_digit* r, const sp_digit* a,
01080         const sp_digit* b)
01081 {
01082     int i;
01083 
01084     for (i = 0; i < 40; i += 8) {
01085         r[i + 0] = a[i + 0] - b[i + 0];
01086         r[i + 1] = a[i + 1] - b[i + 1];
01087         r[i + 2] = a[i + 2] - b[i + 2];
01088         r[i + 3] = a[i + 3] - b[i + 3];
01089         r[i + 4] = a[i + 4] - b[i + 4];
01090         r[i + 5] = a[i + 5] - b[i + 5];
01091         r[i + 6] = a[i + 6] - b[i + 6];
01092         r[i + 7] = a[i + 7] - b[i + 7];
01093     }
01094     r[40] = a[40] - b[40];
01095     r[41] = a[41] - b[41];
01096     r[42] = a[42] - b[42];
01097     r[43] = a[43] - b[43];
01098     r[44] = a[44] - b[44];
01099 
01100     return 0;
01101 }
01102 
01103 #endif /* WOLFSSL_SP_SMALL */
01104 #ifdef WOLFSSL_SP_SMALL
01105 /* Multiply a and b into r. (r = a * b)
01106  *
01107  * r  A single precision integer.
01108  * a  A single precision integer.
01109  * b  A single precision integer.
01110  */
01111 SP_NOINLINE static void sp_2048_mul_45(sp_digit* r, const sp_digit* a,
01112     const sp_digit* b)
01113 {
01114     int i, j, k;
01115     int64_t c;
01116 
01117     c = ((int64_t)a[44]) * b[44];
01118     r[89] = (sp_digit)(c >> 23);
01119     c = (c & 0x7fffff) << 23;
01120     for (k = 87; k >= 0; k--) {
01121         for (i = 44; i >= 0; i--) {
01122             j = k - i;
01123             if (j >= 45)
01124                 break;
01125             if (j < 0)
01126                 continue;
01127 
01128             c += ((int64_t)a[i]) * b[j];
01129         }
01130         r[k + 2] += c >> 46;
01131         r[k + 1] = (c >> 23) & 0x7fffff;
01132         c = (c & 0x7fffff) << 23;
01133     }
01134     r[0] = (sp_digit)(c >> 23);
01135 }
01136 
01137 /* Square a and put result in r. (r = a * a)
01138  *
01139  * r  A single precision integer.
01140  * a  A single precision integer.
01141  */
01142 SP_NOINLINE static void sp_2048_sqr_45(sp_digit* r, const sp_digit* a)
01143 {
01144     int i, j, k;
01145     int64_t c;
01146 
01147     c = ((int64_t)a[44]) * a[44];
01148     r[89] = (sp_digit)(c >> 23);
01149     c = (c & 0x7fffff) << 23;
01150     for (k = 87; k >= 0; k--) {
01151         for (i = 44; i >= 0; i--) {
01152             j = k - i;
01153             if (j >= 45 || i <= j)
01154                 break;
01155             if (j < 0)
01156                 continue;
01157 
01158             c += ((int64_t)a[i]) * a[j] * 2;
01159         }
01160         if (i == j)
01161            c += ((int64_t)a[i]) * a[i];
01162 
01163         r[k + 2] += c >> 46;
01164         r[k + 1] = (c >> 23) & 0x7fffff;
01165         c = (c & 0x7fffff) << 23;
01166     }
01167     r[0] = (sp_digit)(c >> 23);
01168 }
01169 
01170 #endif /* WOLFSSL_SP_SMALL */
01171 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
01172 
01173 /* Caclulate the bottom digit of -1/a mod 2^n.
01174  *
01175  * a    A single precision number.
01176  * rho  Bottom word of inverse.
01177  */
01178 static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
01179 {
01180     sp_digit x, b;
01181 
01182     b = a[0];
01183     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
01184     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
01185     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
01186     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
01187     x &= 0x7fffff;
01188 
01189     /* rho = -1/m mod b */
01190     *rho = (1L << 23) - x;
01191 }
01192 
01193 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
01194 /* r = 2^n mod m where n is the number of bits to reduce by.
01195  * Given m must be 2048 bits, just need to subtract.
01196  *
01197  * r  A single precision number.
01198  * m  A signle precision number.
01199  */
01200 static void sp_2048_mont_norm_45(sp_digit* r, sp_digit* m)
01201 {
01202     /* Set r = 2^n - 1. */
01203 #ifdef WOLFSSL_SP_SMALL
01204     int i;
01205 
01206     for (i=0; i<44; i++)
01207         r[i] = 0x7fffff;
01208 #else
01209     int i;
01210 
01211     for (i = 0; i < 40; i += 8) {
01212         r[i + 0] = 0x7fffff;
01213         r[i + 1] = 0x7fffff;
01214         r[i + 2] = 0x7fffff;
01215         r[i + 3] = 0x7fffff;
01216         r[i + 4] = 0x7fffff;
01217         r[i + 5] = 0x7fffff;
01218         r[i + 6] = 0x7fffff;
01219         r[i + 7] = 0x7fffff;
01220     }
01221     r[40] = 0x7fffff;
01222     r[41] = 0x7fffff;
01223     r[42] = 0x7fffff;
01224     r[43] = 0x7fffff;
01225 #endif
01226     r[44] = 0xfffl;
01227 
01228     /* r = (2^n - 1) mod n */
01229     sp_2048_sub_45(r, r, m);
01230 
01231     /* Add one so r = 2^n mod m */
01232     r[0] += 1;
01233 }
01234 
01235 /* Compare a with b in constant time.
01236  *
01237  * a  A single precision integer.
01238  * b  A single precision integer.
01239  * return -ve, 0 or +ve if a is less than, equal to or greater than b
01240  * respectively.
01241  */
01242 static sp_digit sp_2048_cmp_45(const sp_digit* a, const sp_digit* b)
01243 {
01244     sp_digit r = 0;
01245 #ifdef WOLFSSL_SP_SMALL
01246     int i;
01247 
01248     for (i=44; i>=0; i--)
01249         r |= (a[i] - b[i]) & (0 - !r);
01250 #else
01251     int i;
01252 
01253     r |= (a[44] - b[44]) & (0 - !r);
01254     r |= (a[43] - b[43]) & (0 - !r);
01255     r |= (a[42] - b[42]) & (0 - !r);
01256     r |= (a[41] - b[41]) & (0 - !r);
01257     r |= (a[40] - b[40]) & (0 - !r);
01258     for (i = 32; i >= 0; i -= 8) {
01259         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
01260         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
01261         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
01262         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
01263         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
01264         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
01265         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
01266         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
01267     }
01268 #endif /* WOLFSSL_SP_SMALL */
01269 
01270     return r;
01271 }
01272 
01273 /* Conditionally subtract b from a using the mask m.
01274  * m is -1 to subtract and 0 when not.
01275  *
01276  * r  A single precision number representing condition subtract result.
01277  * a  A single precision number to subtract from.
01278  * b  A single precision number to subtract.
01279  * m  Mask value to apply.
01280  */
01281 static void sp_2048_cond_sub_45(sp_digit* r, const sp_digit* a,
01282         const sp_digit* b, const sp_digit m)
01283 {
01284 #ifdef WOLFSSL_SP_SMALL
01285     int i;
01286 
01287     for (i = 0; i < 45; i++)
01288         r[i] = a[i] - (b[i] & m);
01289 #else
01290     int i;
01291 
01292     for (i = 0; i < 40; i += 8) {
01293         r[i + 0] = a[i + 0] - (b[i + 0] & m);
01294         r[i + 1] = a[i + 1] - (b[i + 1] & m);
01295         r[i + 2] = a[i + 2] - (b[i + 2] & m);
01296         r[i + 3] = a[i + 3] - (b[i + 3] & m);
01297         r[i + 4] = a[i + 4] - (b[i + 4] & m);
01298         r[i + 5] = a[i + 5] - (b[i + 5] & m);
01299         r[i + 6] = a[i + 6] - (b[i + 6] & m);
01300         r[i + 7] = a[i + 7] - (b[i + 7] & m);
01301     }
01302     r[40] = a[40] - (b[40] & m);
01303     r[41] = a[41] - (b[41] & m);
01304     r[42] = a[42] - (b[42] & m);
01305     r[43] = a[43] - (b[43] & m);
01306     r[44] = a[44] - (b[44] & m);
01307 #endif /* WOLFSSL_SP_SMALL */
01308 }
01309 
01310 /* Mul a by scalar b and add into r. (r += a * b)
01311  *
01312  * r  A single precision integer.
01313  * a  A single precision integer.
01314  * b  A scalar.
01315  */
01316 SP_NOINLINE static void sp_2048_mul_add_45(sp_digit* r, const sp_digit* a,
01317         const sp_digit b)
01318 {
01319 #ifdef WOLFSSL_SP_SMALL
01320     int64_t tb = b;
01321     int64_t t = 0;
01322     int i;
01323 
01324     for (i = 0; i < 45; i++) {
01325         t += (tb * a[i]) + r[i];
01326         r[i] = t & 0x7fffff;
01327         t >>= 23;
01328     }
01329     r[45] += t;
01330 #else
01331     int64_t tb = b;
01332     int64_t t[8];
01333     int i;
01334 
01335     t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
01336     for (i = 0; i < 40; i += 8) {
01337         t[1] = tb * a[i+1];
01338         r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
01339         t[2] = tb * a[i+2];
01340         r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
01341         t[3] = tb * a[i+3];
01342         r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
01343         t[4] = tb * a[i+4];
01344         r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
01345         t[5] = tb * a[i+5];
01346         r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
01347         t[6] = tb * a[i+6];
01348         r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
01349         t[7] = tb * a[i+7];
01350         r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
01351         t[0] = tb * a[i+8];
01352         r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
01353     }
01354     t[1] = tb * a[41]; r[41] += (t[0] >> 23) + (t[1] & 0x7fffff);
01355     t[2] = tb * a[42]; r[42] += (t[1] >> 23) + (t[2] & 0x7fffff);
01356     t[3] = tb * a[43]; r[43] += (t[2] >> 23) + (t[3] & 0x7fffff);
01357     t[4] = tb * a[44]; r[44] += (t[3] >> 23) + (t[4] & 0x7fffff);
01358     r[45] +=  t[4] >> 23;
01359 #endif /* WOLFSSL_SP_SMALL */
01360 }
01361 
01362 /* Normalize the values in each word to 23.
01363  *
01364  * a  Array of sp_digit to normalize.
01365  */
01366 static void sp_2048_norm_45(sp_digit* a)
01367 {
01368 #ifdef WOLFSSL_SP_SMALL
01369     int i;
01370     for (i = 0; i < 44; i++) {
01371         a[i+1] += a[i] >> 23;
01372         a[i] &= 0x7fffff;
01373     }
01374 #else
01375     int i;
01376     for (i = 0; i < 40; i += 8) {
01377         a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
01378         a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
01379         a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
01380         a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
01381         a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
01382         a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
01383         a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
01384         a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
01385         a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
01386     }
01387     a[40+1] += a[40] >> 23;
01388     a[40] &= 0x7fffff;
01389     a[41+1] += a[41] >> 23;
01390     a[41] &= 0x7fffff;
01391     a[42+1] += a[42] >> 23;
01392     a[42] &= 0x7fffff;
01393     a[43+1] += a[43] >> 23;
01394     a[43] &= 0x7fffff;
01395 #endif
01396 }
01397 
01398 /* Shift the result in the high 1024 bits down to the bottom.
01399  *
01400  * r  A single precision number.
01401  * a  A single precision number.
01402  */
01403 static void sp_2048_mont_shift_45(sp_digit* r, const sp_digit* a)
01404 {
01405 #ifdef WOLFSSL_SP_SMALL
01406     int i;
01407     int64_t n = a[44] >> 12;
01408     n += ((int64_t)a[45]) << 11;
01409 
01410     for (i = 0; i < 44; i++) {
01411         r[i] = n & 0x7fffff;
01412         n >>= 23;
01413         n += ((int64_t)a[46 + i]) << 11;
01414     }
01415     r[44] = (sp_digit)n;
01416 #else
01417     int i;
01418     int64_t n = a[44] >> 12;
01419     n += ((int64_t)a[45]) << 11;
01420     for (i = 0; i < 40; i += 8) {
01421         r[i + 0] = n & 0x7fffff;
01422         n >>= 23; n += ((int64_t)a[i + 46]) << 11;
01423         r[i + 1] = n & 0x7fffff;
01424         n >>= 23; n += ((int64_t)a[i + 47]) << 11;
01425         r[i + 2] = n & 0x7fffff;
01426         n >>= 23; n += ((int64_t)a[i + 48]) << 11;
01427         r[i + 3] = n & 0x7fffff;
01428         n >>= 23; n += ((int64_t)a[i + 49]) << 11;
01429         r[i + 4] = n & 0x7fffff;
01430         n >>= 23; n += ((int64_t)a[i + 50]) << 11;
01431         r[i + 5] = n & 0x7fffff;
01432         n >>= 23; n += ((int64_t)a[i + 51]) << 11;
01433         r[i + 6] = n & 0x7fffff;
01434         n >>= 23; n += ((int64_t)a[i + 52]) << 11;
01435         r[i + 7] = n & 0x7fffff;
01436         n >>= 23; n += ((int64_t)a[i + 53]) << 11;
01437     }
01438     r[40] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[86]) << 11;
01439     r[41] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[87]) << 11;
01440     r[42] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[88]) << 11;
01441     r[43] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[89]) << 11;
01442     r[44] = (sp_digit)n;
01443 #endif /* WOLFSSL_SP_SMALL */
01444     XMEMSET(&r[45], 0, sizeof(*r) * 45);
01445 }
01446 
01447 /* Reduce the number back to 2048 bits using Montgomery reduction.
01448  *
01449  * a   A single precision number to reduce in place.
01450  * m   The single precision number representing the modulus.
01451  * mp  The digit representing the negative inverse of m mod 2^n.
01452  */
01453 static void sp_2048_mont_reduce_45(sp_digit* a, sp_digit* m, sp_digit mp)
01454 {
01455     int i;
01456     sp_digit mu;
01457 
01458     for (i=0; i<44; i++) {
01459         mu = (a[i] * mp) & 0x7fffff;
01460         sp_2048_mul_add_45(a+i, m, mu);
01461         a[i+1] += a[i] >> 23;
01462     }
01463     mu = (a[i] * mp) & 0xfffl;
01464     sp_2048_mul_add_45(a+i, m, mu);
01465     a[i+1] += a[i] >> 23;
01466     a[i] &= 0x7fffff;
01467 
01468     sp_2048_mont_shift_45(a, a);
01469     sp_2048_cond_sub_45(a, a, m, 0 - ((a[44] >> 12) > 0));
01470     sp_2048_norm_45(a);
01471 }
01472 
01473 /* Multiply two Montogmery form numbers mod the modulus (prime).
01474  * (r = a * b mod m)
01475  *
01476  * r   Result of multiplication.
01477  * a   First number to multiply in Montogmery form.
01478  * b   Second number to multiply in Montogmery form.
01479  * m   Modulus (prime).
01480  * mp  Montogmery mulitplier.
01481  */
01482 static void sp_2048_mont_mul_45(sp_digit* r, sp_digit* a, sp_digit* b,
01483         sp_digit* m, sp_digit mp)
01484 {
01485     sp_2048_mul_45(r, a, b);
01486     sp_2048_mont_reduce_45(r, m, mp);
01487 }
01488 
01489 /* Square the Montgomery form number. (r = a * a mod m)
01490  *
01491  * r   Result of squaring.
01492  * a   Number to square in Montogmery form.
01493  * m   Modulus (prime).
01494  * mp  Montogmery mulitplier.
01495  */
01496 static void sp_2048_mont_sqr_45(sp_digit* r, sp_digit* a, sp_digit* m,
01497         sp_digit mp)
01498 {
01499     sp_2048_sqr_45(r, a);
01500     sp_2048_mont_reduce_45(r, m, mp);
01501 }
01502 
01503 /* Multiply a by scalar b into r. (r = a * b)
01504  *
01505  * r  A single precision integer.
01506  * a  A single precision integer.
01507  * b  A scalar.
01508  */
01509 SP_NOINLINE static void sp_2048_mul_d_45(sp_digit* r, const sp_digit* a,
01510     const sp_digit b)
01511 {
01512 #ifdef WOLFSSL_SP_SMALL
01513     int64_t tb = b;
01514     int64_t t = 0;
01515     int i;
01516 
01517     for (i = 0; i < 45; i++) {
01518         t += tb * a[i];
01519         r[i] = t & 0x7fffff;
01520         t >>= 23;
01521     }
01522     r[45] = (sp_digit)t;
01523 #else
01524     int64_t tb = b;
01525     int64_t t[8];
01526     int i;
01527 
01528     t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
01529     for (i = 0; i < 40; i += 8) {
01530         t[1] = tb * a[i+1];
01531         r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
01532         t[2] = tb * a[i+2];
01533         r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
01534         t[3] = tb * a[i+3];
01535         r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
01536         t[4] = tb * a[i+4];
01537         r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
01538         t[5] = tb * a[i+5];
01539         r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
01540         t[6] = tb * a[i+6];
01541         r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
01542         t[7] = tb * a[i+7];
01543         r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
01544         t[0] = tb * a[i+8];
01545         r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
01546     }
01547     t[1] = tb * a[41];
01548     r[41] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
01549     t[2] = tb * a[42];
01550     r[42] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
01551     t[3] = tb * a[43];
01552     r[43] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
01553     t[4] = tb * a[44];
01554     r[44] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
01555     r[45] =  (sp_digit)(t[4] >> 23);
01556 #endif /* WOLFSSL_SP_SMALL */
01557 }
01558 
01559 /* Multiply a by scalar b into r. (r = a * b)
01560  *
01561  * r  A single precision integer.
01562  * a  A single precision integer.
01563  * b  A scalar.
01564  */
01565 SP_NOINLINE static void sp_2048_mul_d_90(sp_digit* r, const sp_digit* a,
01566     const sp_digit b)
01567 {
01568 #ifdef WOLFSSL_SP_SMALL
01569     int64_t tb = b;
01570     int64_t t = 0;
01571     int i;
01572 
01573     for (i = 0; i < 90; i++) {
01574         t += tb * a[i];
01575         r[i] = t & 0x7fffff;
01576         t >>= 23;
01577     }
01578     r[90] = (sp_digit)t;
01579 #else
01580     int64_t tb = b;
01581     int64_t t[8];
01582     int i;
01583 
01584     t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
01585     for (i = 0; i < 88; i += 8) {
01586         t[1] = tb * a[i+1];
01587         r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
01588         t[2] = tb * a[i+2];
01589         r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
01590         t[3] = tb * a[i+3];
01591         r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
01592         t[4] = tb * a[i+4];
01593         r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
01594         t[5] = tb * a[i+5];
01595         r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
01596         t[6] = tb * a[i+6];
01597         r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
01598         t[7] = tb * a[i+7];
01599         r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
01600         t[0] = tb * a[i+8];
01601         r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
01602     }
01603     t[1] = tb * a[89];
01604     r[89] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
01605     r[90] =  (sp_digit)(t[1] >> 23);
01606 #endif /* WOLFSSL_SP_SMALL */
01607 }
01608 
01609 /* Conditionally add a and b using the mask m.
01610  * m is -1 to add and 0 when not.
01611  *
01612  * r  A single precision number representing conditional add result.
01613  * a  A single precision number to add with.
01614  * b  A single precision number to add.
01615  * m  Mask value to apply.
01616  */
01617 static void sp_2048_cond_add_45(sp_digit* r, const sp_digit* a,
01618         const sp_digit* b, const sp_digit m)
01619 {
01620 #ifdef WOLFSSL_SP_SMALL
01621     int i;
01622 
01623     for (i = 0; i < 45; i++)
01624         r[i] = a[i] + (b[i] & m);
01625 #else
01626     int i;
01627 
01628     for (i = 0; i < 40; i += 8) {
01629         r[i + 0] = a[i + 0] + (b[i + 0] & m);
01630         r[i + 1] = a[i + 1] + (b[i + 1] & m);
01631         r[i + 2] = a[i + 2] + (b[i + 2] & m);
01632         r[i + 3] = a[i + 3] + (b[i + 3] & m);
01633         r[i + 4] = a[i + 4] + (b[i + 4] & m);
01634         r[i + 5] = a[i + 5] + (b[i + 5] & m);
01635         r[i + 6] = a[i + 6] + (b[i + 6] & m);
01636         r[i + 7] = a[i + 7] + (b[i + 7] & m);
01637     }
01638     r[40] = a[40] + (b[40] & m);
01639     r[41] = a[41] + (b[41] & m);
01640     r[42] = a[42] + (b[42] & m);
01641     r[43] = a[43] + (b[43] & m);
01642     r[44] = a[44] + (b[44] & m);
01643 #endif /* WOLFSSL_SP_SMALL */
01644 }
01645 
01646 #ifdef WOLFSSL_SMALL
01647 /* Add b to a into r. (r = a + b)
01648  *
01649  * r  A single precision integer.
01650  * a  A single precision integer.
01651  * b  A single precision integer.
01652  */
01653 SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a,
01654         const sp_digit* b)
01655 {
01656     int i;
01657 
01658     for (i = 0; i < 45; i++)
01659         r[i] = a[i] + b[i];
01660 
01661     return 0;
01662 }
01663 #endif
01664 SP_NOINLINE static void sp_2048_rshift_45(sp_digit* r, sp_digit* a, byte n)
01665 {
01666 #ifdef WOLFSSL_SP_SMALL
01667     int i;
01668 
01669     for (i=0; i<44; i++)
01670         r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
01671 #else
01672     r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff;
01673     r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff;
01674     r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff;
01675     r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff;
01676     r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff;
01677     r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff;
01678     r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff;
01679     r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff;
01680     r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff;
01681     r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff;
01682     r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff;
01683     r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff;
01684     r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff;
01685     r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff;
01686     r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff;
01687     r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff;
01688     r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff;
01689     r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff;
01690     r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff;
01691     r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff;
01692     r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff;
01693     r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff;
01694     r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff;
01695     r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff;
01696     r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff;
01697     r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff;
01698     r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff;
01699     r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff;
01700     r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff;
01701     r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff;
01702     r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff;
01703     r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff;
01704     r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff;
01705     r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff;
01706     r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff;
01707     r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff;
01708     r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff;
01709     r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff;
01710     r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff;
01711     r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff;
01712     r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff;
01713     r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff;
01714     r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff;
01715     r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff;
01716 #endif
01717     r[44] = a[44] >> n;
01718 }
01719 
01720 /* Divide d in a and put remainder into r (m*d + r = a)
01721  * m is not calculated as it is not needed at this time.
01722  *
01723  * a  Nmber to be divided.
01724  * d  Number to divide with.
01725  * m  Multiplier result.
01726  * r  Remainder from the division.
01727  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
01728  */
01729 static int sp_2048_div_45(sp_digit* a, sp_digit* d, sp_digit* m,
01730         sp_digit* r)
01731 {
01732     int i;
01733     int64_t d1;
01734     sp_digit div, r1;
01735 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
01736     sp_digit* td;
01737 #else
01738     sp_digit t1d[90 + 1], t2d[45 + 1], sdd[45 + 1];
01739 #endif
01740     sp_digit* t1;
01741     sp_digit* t2;
01742     sp_digit* sd;
01743     int err = MP_OKAY;
01744 
01745 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
01746     td = XMALLOC(sizeof(sp_digit) * (4 * 45 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
01747     if (td != NULL) {
01748         t1 = td;
01749         t2 = td + 90 + 1;
01750         sd = t2 + 45 + 1;
01751     }
01752     else
01753         err = MEMORY_E;
01754 #else
01755     t1 = t1d;
01756     t2 = t2d;
01757     sd = sdd;
01758 #endif
01759 
01760     (void)m;
01761 
01762     if (err == MP_OKAY) {
01763         sp_2048_mul_d_45(sd, d, 1 << 11);
01764         sp_2048_mul_d_90(t1, a, 1 << 11);
01765         div = sd[44];
01766         for (i=45; i>=0; i--) {
01767             t1[45 + i] += t1[45 + i - 1] >> 23;
01768             t1[45 + i - 1] &= 0x7fffff;
01769             d1 = t1[45 + i];
01770             d1 <<= 23;
01771             d1 += t1[45 + i - 1];
01772             r1 = (sp_digit)(d1 / div);
01773 
01774             sp_2048_mul_d_45(t2, sd, r1);
01775             sp_2048_sub_45(&t1[i], &t1[i], t2);
01776             t1[45 + i] -= t2[45];
01777             t1[45 + i] += t1[45 + i - 1] >> 23;
01778             t1[45 + i - 1] &= 0x7fffff;
01779             r1 = (((-t1[45 + i]) << 23) - t1[45 + i - 1]) / div;
01780             r1 -= t1[45 + i];
01781             sp_2048_mul_d_45(t2, sd, r1);
01782             sp_2048_add_45(&t1[i], &t1[i], t2);
01783             t1[45 + i] += t1[45 + i - 1] >> 23;
01784             t1[45 + i - 1] &= 0x7fffff;
01785         }
01786         t1[45 - 1] += t1[45 - 2] >> 23;
01787         t1[45 - 2] &= 0x7fffff;
01788         d1 = t1[45 - 1];
01789         r1 = (sp_digit)(d1 / div);
01790 
01791         sp_2048_mul_d_45(t2, sd, r1);
01792         sp_2048_sub_45(t1, t1, t2);
01793         XMEMCPY(r, t1, sizeof(*r) * 2 * 45);
01794         for (i=0; i<43; i++) {
01795             r[i+1] += r[i] >> 23;
01796             r[i] &= 0x7fffff;
01797         }
01798         sp_2048_cond_add_45(r, r, sd, 0 - (r[44] < 0));
01799     }
01800 
01801     sp_2048_rshift_45(r, r, 11);
01802 
01803 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
01804     if (td != NULL)
01805         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01806 #endif
01807 
01808     return err;
01809 }
01810 
01811 /* Reduce a modulo m into r. (r = a mod m)
01812  *
01813  * r  A single precision number that is the reduced result.
01814  * a  A single precision number that is to be reduced.
01815  * m  A single precision number that is the modulus to reduce with.
01816  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
01817  */
01818 static int sp_2048_mod_45(sp_digit* r, sp_digit* a, sp_digit* m)
01819 {
01820     return sp_2048_div_45(a, m, NULL, r);
01821 }
01822 
01823 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
01824  *
01825  * r     A single precision number that is the result of the operation.
01826  * a     A single precision number being exponentiated.
01827  * e     A single precision number that is the exponent.
01828  * bits  The number of bits in the exponent.
01829  * m     A single precision number that is the modulus.
01830  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
01831  */
01832 static int sp_2048_mod_exp_45(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
01833     sp_digit* m, int reduceA)
01834 {
01835 #ifdef WOLFSSL_SP_SMALL
01836     sp_digit* td;
01837     sp_digit* t[3];
01838     sp_digit* norm;
01839     sp_digit mp = 1;
01840     sp_digit n;
01841     int i;
01842     int c, y;
01843     int err = MP_OKAY;
01844 
01845     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL,
01846                             DYNAMIC_TYPE_TMP_BUFFER);
01847     if (td == NULL)
01848         err = MEMORY_E;
01849 
01850     if (err == MP_OKAY) {
01851         XMEMSET(td, 0, sizeof(*td) * 3 * 45 * 2);
01852 
01853         norm = t[0] = td;
01854         t[1] = &td[45 * 2];
01855         t[2] = &td[2 * 45 * 2];
01856 
01857         sp_2048_mont_setup(m, &mp);
01858         sp_2048_mont_norm_45(norm, m);
01859 
01860         if (reduceA)
01861             err = sp_2048_mod_45(t[1], a, m);
01862         else
01863             XMEMCPY(t[1], a, sizeof(sp_digit) * 45);
01864     }
01865     if (err == MP_OKAY) {
01866         sp_2048_mul_45(t[1], t[1], norm);
01867         err = sp_2048_mod_45(t[1], t[1], m);
01868     }
01869 
01870     if (err == MP_OKAY) {
01871         i = bits / 23;
01872         c = bits % 23;
01873         n = e[i--] << (23 - c);
01874         for (; ; c--) {
01875             if (c == 0) {
01876                 if (i == -1)
01877                     break;
01878 
01879                 n = e[i--];
01880                 c = 23;
01881             }
01882 
01883             y = (n >> 22) & 1;
01884             n <<= 1;
01885 
01886             sp_2048_mont_mul_45(t[y^1], t[0], t[1], m, mp);
01887 
01888             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
01889                                   ((size_t)t[1] & addr_mask[y])),
01890                     sizeof(*t[2]) * 45 * 2);
01891             sp_2048_mont_sqr_45(t[2], t[2], m, mp);
01892             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
01893                             ((size_t)t[1] & addr_mask[y])), t[2],
01894                     sizeof(*t[2]) * 45 * 2);
01895         }
01896 
01897         sp_2048_mont_reduce_45(t[0], m, mp);
01898         n = sp_2048_cmp_45(t[0], m);
01899         sp_2048_cond_sub_45(t[0], t[0], m, (n < 0) - 1);
01900         XMEMCPY(r, t[0], sizeof(*r) * 45 * 2);
01901 
01902     }
01903 
01904     if (td != NULL)
01905         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01906 
01907     return err;
01908 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
01909 #ifndef WOLFSSL_SMALL_STACK
01910     sp_digit t[3][90];
01911 #else
01912     sp_digit* td;
01913     sp_digit* t[3];
01914 #endif
01915     sp_digit* norm;
01916     sp_digit mp = 1;
01917     sp_digit n;
01918     int i;
01919     int c, y;
01920     int err = MP_OKAY;
01921 
01922 #ifdef WOLFSSL_SMALL_STACK
01923     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 45 * 2, NULL,
01924                             DYNAMIC_TYPE_TMP_BUFFER);
01925     if (td == NULL)
01926         err = MEMORY_E;
01927 
01928     if (err == MP_OKAY) {
01929         t[0] = td;
01930         t[1] = &td[45 * 2];
01931         t[2] = &td[2 * 45 * 2];
01932         norm = t[0];
01933     }
01934 #else
01935     norm = t[0];
01936 #endif
01937 
01938     if (err == MP_OKAY) {
01939         sp_2048_mont_setup(m, &mp);
01940         sp_2048_mont_norm_45(norm, m);
01941 
01942         if (reduceA) {
01943             err = sp_2048_mod_45(t[1], a, m);
01944             if (err == MP_OKAY) {
01945                 sp_2048_mul_45(t[1], t[1], norm);
01946                 err = sp_2048_mod_45(t[1], t[1], m);
01947             }
01948         }
01949         else {
01950             sp_2048_mul_45(t[1], a, norm);
01951             err = sp_2048_mod_45(t[1], t[1], m);
01952         }
01953     }
01954 
01955     if (err == MP_OKAY) {
01956         i = bits / 23;
01957         c = bits % 23;
01958         n = e[i--] << (23 - c);
01959         for (; ; c--) {
01960             if (c == 0) {
01961                 if (i == -1)
01962                     break;
01963 
01964                 n = e[i--];
01965                 c = 23;
01966             }
01967 
01968             y = (n >> 22) & 1;
01969             n <<= 1;
01970 
01971             sp_2048_mont_mul_45(t[y^1], t[0], t[1], m, mp);
01972 
01973             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
01974                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
01975             sp_2048_mont_sqr_45(t[2], t[2], m, mp);
01976             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
01977                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
01978         }
01979 
01980         sp_2048_mont_reduce_45(t[0], m, mp);
01981         n = sp_2048_cmp_45(t[0], m);
01982         sp_2048_cond_sub_45(t[0], t[0], m, (n < 0) - 1);
01983         XMEMCPY(r, t[0], sizeof(t[0]));
01984     }
01985 
01986 #ifdef WOLFSSL_SMALL_STACK
01987     if (td != NULL)
01988         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
01989 #endif
01990 
01991     return err;
01992 #else
01993 #ifndef WOLFSSL_SMALL_STACK
01994     sp_digit t[32][90];
01995 #else
01996     sp_digit* t[32];
01997     sp_digit* td;
01998 #endif
01999     sp_digit* norm;
02000     sp_digit rt[90];
02001     sp_digit mp = 1;
02002     sp_digit n;
02003     int i;
02004     int c, y;
02005     int err = MP_OKAY;
02006 
02007 #ifdef WOLFSSL_SMALL_STACK
02008     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 90, NULL,
02009                             DYNAMIC_TYPE_TMP_BUFFER);
02010     if (td == NULL)
02011         err = MEMORY_E;
02012 
02013     if (err == MP_OKAY) {
02014         for (i=0; i<32; i++)
02015             t[i] = td + i * 90;
02016         norm = t[0];
02017     }
02018 #else
02019     norm = t[0];
02020 #endif
02021 
02022     if (err == MP_OKAY) {
02023         sp_2048_mont_setup(m, &mp);
02024         sp_2048_mont_norm_45(norm, m);
02025 
02026         if (reduceA) {
02027             err = sp_2048_mod_45(t[1], a, m);
02028             if (err == MP_OKAY) {
02029                 sp_2048_mul_45(t[1], t[1], norm);
02030                 err = sp_2048_mod_45(t[1], t[1], m);
02031             }
02032         }
02033         else {
02034             sp_2048_mul_45(t[1], a, norm);
02035             err = sp_2048_mod_45(t[1], t[1], m);
02036         }
02037     }
02038 
02039     if (err == MP_OKAY) {
02040         sp_2048_mont_sqr_45(t[ 2], t[ 1], m, mp);
02041         sp_2048_mont_mul_45(t[ 3], t[ 2], t[ 1], m, mp);
02042         sp_2048_mont_sqr_45(t[ 4], t[ 2], m, mp);
02043         sp_2048_mont_mul_45(t[ 5], t[ 3], t[ 2], m, mp);
02044         sp_2048_mont_sqr_45(t[ 6], t[ 3], m, mp);
02045         sp_2048_mont_mul_45(t[ 7], t[ 4], t[ 3], m, mp);
02046         sp_2048_mont_sqr_45(t[ 8], t[ 4], m, mp);
02047         sp_2048_mont_mul_45(t[ 9], t[ 5], t[ 4], m, mp);
02048         sp_2048_mont_sqr_45(t[10], t[ 5], m, mp);
02049         sp_2048_mont_mul_45(t[11], t[ 6], t[ 5], m, mp);
02050         sp_2048_mont_sqr_45(t[12], t[ 6], m, mp);
02051         sp_2048_mont_mul_45(t[13], t[ 7], t[ 6], m, mp);
02052         sp_2048_mont_sqr_45(t[14], t[ 7], m, mp);
02053         sp_2048_mont_mul_45(t[15], t[ 8], t[ 7], m, mp);
02054         sp_2048_mont_sqr_45(t[16], t[ 8], m, mp);
02055         sp_2048_mont_mul_45(t[17], t[ 9], t[ 8], m, mp);
02056         sp_2048_mont_sqr_45(t[18], t[ 9], m, mp);
02057         sp_2048_mont_mul_45(t[19], t[10], t[ 9], m, mp);
02058         sp_2048_mont_sqr_45(t[20], t[10], m, mp);
02059         sp_2048_mont_mul_45(t[21], t[11], t[10], m, mp);
02060         sp_2048_mont_sqr_45(t[22], t[11], m, mp);
02061         sp_2048_mont_mul_45(t[23], t[12], t[11], m, mp);
02062         sp_2048_mont_sqr_45(t[24], t[12], m, mp);
02063         sp_2048_mont_mul_45(t[25], t[13], t[12], m, mp);
02064         sp_2048_mont_sqr_45(t[26], t[13], m, mp);
02065         sp_2048_mont_mul_45(t[27], t[14], t[13], m, mp);
02066         sp_2048_mont_sqr_45(t[28], t[14], m, mp);
02067         sp_2048_mont_mul_45(t[29], t[15], t[14], m, mp);
02068         sp_2048_mont_sqr_45(t[30], t[15], m, mp);
02069         sp_2048_mont_mul_45(t[31], t[16], t[15], m, mp);
02070 
02071         bits = ((bits + 4) / 5) * 5;
02072         i = ((bits + 22) / 23) - 1;
02073         c = bits % 23;
02074         if (c == 0)
02075             c = 23;
02076         if (i < 45)
02077             n = e[i--] << (32 - c);
02078         else {
02079             n = 0;
02080             i--;
02081         }
02082         if (c < 5) {
02083             n |= e[i--] << (9 - c);
02084             c += 23;
02085         }
02086         y = n >> 27;
02087         n <<= 5;
02088         c -= 5;
02089         XMEMCPY(rt, t[y], sizeof(rt));
02090         for (; i>=0 || c>=5; ) {
02091             if (c < 5) {
02092                 n |= e[i--] << (9 - c);
02093                 c += 23;
02094             }
02095             y = (n >> 27) & 0x1f;
02096             n <<= 5;
02097             c -= 5;
02098 
02099             sp_2048_mont_sqr_45(rt, rt, m, mp);
02100             sp_2048_mont_sqr_45(rt, rt, m, mp);
02101             sp_2048_mont_sqr_45(rt, rt, m, mp);
02102             sp_2048_mont_sqr_45(rt, rt, m, mp);
02103             sp_2048_mont_sqr_45(rt, rt, m, mp);
02104 
02105             sp_2048_mont_mul_45(rt, rt, t[y], m, mp);
02106         }
02107 
02108         sp_2048_mont_reduce_45(rt, m, mp);
02109         n = sp_2048_cmp_45(rt, m);
02110         sp_2048_cond_sub_45(rt, rt, m, (n < 0) - 1);
02111         XMEMCPY(r, rt, sizeof(rt));
02112     }
02113 
02114 #ifdef WOLFSSL_SMALL_STACK
02115     if (td != NULL)
02116         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02117 #endif
02118 
02119     return err;
02120 #endif
02121 }
02122 
02123 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
02124 
02125 /* r = 2^n mod m where n is the number of bits to reduce by.
02126  * Given m must be 2048 bits, just need to subtract.
02127  *
02128  * r  A single precision number.
02129  * m  A signle precision number.
02130  */
02131 static void sp_2048_mont_norm_90(sp_digit* r, sp_digit* m)
02132 {
02133     /* Set r = 2^n - 1. */
02134 #ifdef WOLFSSL_SP_SMALL
02135     int i;
02136 
02137     for (i=0; i<89; i++)
02138         r[i] = 0x7fffff;
02139 #else
02140     int i;
02141 
02142     for (i = 0; i < 88; i += 8) {
02143         r[i + 0] = 0x7fffff;
02144         r[i + 1] = 0x7fffff;
02145         r[i + 2] = 0x7fffff;
02146         r[i + 3] = 0x7fffff;
02147         r[i + 4] = 0x7fffff;
02148         r[i + 5] = 0x7fffff;
02149         r[i + 6] = 0x7fffff;
02150         r[i + 7] = 0x7fffff;
02151     }
02152     r[88] = 0x7fffff;
02153 #endif
02154     r[89] = 0x1l;
02155 
02156     /* r = (2^n - 1) mod n */
02157     sp_2048_sub_90(r, r, m);
02158 
02159     /* Add one so r = 2^n mod m */
02160     r[0] += 1;
02161 }
02162 
02163 /* Compare a with b in constant time.
02164  *
02165  * a  A single precision integer.
02166  * b  A single precision integer.
02167  * return -ve, 0 or +ve if a is less than, equal to or greater than b
02168  * respectively.
02169  */
02170 static sp_digit sp_2048_cmp_90(const sp_digit* a, const sp_digit* b)
02171 {
02172     sp_digit r = 0;
02173 #ifdef WOLFSSL_SP_SMALL
02174     int i;
02175 
02176     for (i=89; i>=0; i--)
02177         r |= (a[i] - b[i]) & (0 - !r);
02178 #else
02179     int i;
02180 
02181     r |= (a[89] - b[89]) & (0 - !r);
02182     r |= (a[88] - b[88]) & (0 - !r);
02183     for (i = 80; i >= 0; i -= 8) {
02184         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
02185         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
02186         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
02187         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
02188         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
02189         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
02190         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
02191         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
02192     }
02193 #endif /* WOLFSSL_SP_SMALL */
02194 
02195     return r;
02196 }
02197 
02198 /* Conditionally subtract b from a using the mask m.
02199  * m is -1 to subtract and 0 when not.
02200  *
02201  * r  A single precision number representing condition subtract result.
02202  * a  A single precision number to subtract from.
02203  * b  A single precision number to subtract.
02204  * m  Mask value to apply.
02205  */
02206 static void sp_2048_cond_sub_90(sp_digit* r, const sp_digit* a,
02207         const sp_digit* b, const sp_digit m)
02208 {
02209 #ifdef WOLFSSL_SP_SMALL
02210     int i;
02211 
02212     for (i = 0; i < 90; i++)
02213         r[i] = a[i] - (b[i] & m);
02214 #else
02215     int i;
02216 
02217     for (i = 0; i < 88; i += 8) {
02218         r[i + 0] = a[i + 0] - (b[i + 0] & m);
02219         r[i + 1] = a[i + 1] - (b[i + 1] & m);
02220         r[i + 2] = a[i + 2] - (b[i + 2] & m);
02221         r[i + 3] = a[i + 3] - (b[i + 3] & m);
02222         r[i + 4] = a[i + 4] - (b[i + 4] & m);
02223         r[i + 5] = a[i + 5] - (b[i + 5] & m);
02224         r[i + 6] = a[i + 6] - (b[i + 6] & m);
02225         r[i + 7] = a[i + 7] - (b[i + 7] & m);
02226     }
02227     r[88] = a[88] - (b[88] & m);
02228     r[89] = a[89] - (b[89] & m);
02229 #endif /* WOLFSSL_SP_SMALL */
02230 }
02231 
02232 /* Mul a by scalar b and add into r. (r += a * b)
02233  *
02234  * r  A single precision integer.
02235  * a  A single precision integer.
02236  * b  A scalar.
02237  */
02238 SP_NOINLINE static void sp_2048_mul_add_90(sp_digit* r, const sp_digit* a,
02239         const sp_digit b)
02240 {
02241 #ifdef WOLFSSL_SP_SMALL
02242     int64_t tb = b;
02243     int64_t t = 0;
02244     int i;
02245 
02246     for (i = 0; i < 90; i++) {
02247         t += (tb * a[i]) + r[i];
02248         r[i] = t & 0x7fffff;
02249         t >>= 23;
02250     }
02251     r[90] += t;
02252 #else
02253     int64_t tb = b;
02254     int64_t t[8];
02255     int i;
02256 
02257     t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
02258     for (i = 0; i < 88; i += 8) {
02259         t[1] = tb * a[i+1];
02260         r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
02261         t[2] = tb * a[i+2];
02262         r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
02263         t[3] = tb * a[i+3];
02264         r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
02265         t[4] = tb * a[i+4];
02266         r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
02267         t[5] = tb * a[i+5];
02268         r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
02269         t[6] = tb * a[i+6];
02270         r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
02271         t[7] = tb * a[i+7];
02272         r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
02273         t[0] = tb * a[i+8];
02274         r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
02275     }
02276     t[1] = tb * a[89]; r[89] += (t[0] >> 23) + (t[1] & 0x7fffff);
02277     r[90] +=  t[1] >> 23;
02278 #endif /* WOLFSSL_SP_SMALL */
02279 }
02280 
02281 /* Normalize the values in each word to 23.
02282  *
02283  * a  Array of sp_digit to normalize.
02284  */
02285 static void sp_2048_norm_90(sp_digit* a)
02286 {
02287 #ifdef WOLFSSL_SP_SMALL
02288     int i;
02289     for (i = 0; i < 89; i++) {
02290         a[i+1] += a[i] >> 23;
02291         a[i] &= 0x7fffff;
02292     }
02293 #else
02294     int i;
02295     for (i = 0; i < 88; i += 8) {
02296         a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
02297         a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
02298         a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
02299         a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
02300         a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
02301         a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
02302         a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
02303         a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
02304         a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
02305     }
02306     a[88+1] += a[88] >> 23;
02307     a[88] &= 0x7fffff;
02308 #endif
02309 }
02310 
02311 /* Shift the result in the high 2048 bits down to the bottom.
02312  *
02313  * r  A single precision number.
02314  * a  A single precision number.
02315  */
02316 static void sp_2048_mont_shift_90(sp_digit* r, const sp_digit* a)
02317 {
02318 #ifdef WOLFSSL_SP_SMALL
02319     int i;
02320     int64_t n = a[89] >> 1;
02321     n += ((int64_t)a[90]) << 22;
02322 
02323     for (i = 0; i < 89; i++) {
02324         r[i] = n & 0x7fffff;
02325         n >>= 23;
02326         n += ((int64_t)a[91 + i]) << 22;
02327     }
02328     r[89] = (sp_digit)n;
02329 #else
02330     int i;
02331     int64_t n = a[89] >> 1;
02332     n += ((int64_t)a[90]) << 22;
02333     for (i = 0; i < 88; i += 8) {
02334         r[i + 0] = n & 0x7fffff;
02335         n >>= 23; n += ((int64_t)a[i + 91]) << 22;
02336         r[i + 1] = n & 0x7fffff;
02337         n >>= 23; n += ((int64_t)a[i + 92]) << 22;
02338         r[i + 2] = n & 0x7fffff;
02339         n >>= 23; n += ((int64_t)a[i + 93]) << 22;
02340         r[i + 3] = n & 0x7fffff;
02341         n >>= 23; n += ((int64_t)a[i + 94]) << 22;
02342         r[i + 4] = n & 0x7fffff;
02343         n >>= 23; n += ((int64_t)a[i + 95]) << 22;
02344         r[i + 5] = n & 0x7fffff;
02345         n >>= 23; n += ((int64_t)a[i + 96]) << 22;
02346         r[i + 6] = n & 0x7fffff;
02347         n >>= 23; n += ((int64_t)a[i + 97]) << 22;
02348         r[i + 7] = n & 0x7fffff;
02349         n >>= 23; n += ((int64_t)a[i + 98]) << 22;
02350     }
02351     r[88] = n & 0x7fffff; n >>= 23; n += ((int64_t)a[179]) << 22;
02352     r[89] = (sp_digit)n;
02353 #endif /* WOLFSSL_SP_SMALL */
02354     XMEMSET(&r[90], 0, sizeof(*r) * 90);
02355 }
02356 
02357 /* Reduce the number back to 2048 bits using Montgomery reduction.
02358  *
02359  * a   A single precision number to reduce in place.
02360  * m   The single precision number representing the modulus.
02361  * mp  The digit representing the negative inverse of m mod 2^n.
02362  */
02363 static void sp_2048_mont_reduce_90(sp_digit* a, sp_digit* m, sp_digit mp)
02364 {
02365     int i;
02366     sp_digit mu;
02367 
02368     if (mp != 1) {
02369         for (i=0; i<89; i++) {
02370             mu = (a[i] * mp) & 0x7fffff;
02371             sp_2048_mul_add_90(a+i, m, mu);
02372             a[i+1] += a[i] >> 23;
02373         }
02374         mu = (a[i] * mp) & 0x1l;
02375         sp_2048_mul_add_90(a+i, m, mu);
02376         a[i+1] += a[i] >> 23;
02377         a[i] &= 0x7fffff;
02378     }
02379     else {
02380         for (i=0; i<89; i++) {
02381             mu = a[i] & 0x7fffff;
02382             sp_2048_mul_add_90(a+i, m, mu);
02383             a[i+1] += a[i] >> 23;
02384         }
02385         mu = a[i] & 0x1l;
02386         sp_2048_mul_add_90(a+i, m, mu);
02387         a[i+1] += a[i] >> 23;
02388         a[i] &= 0x7fffff;
02389     }
02390 
02391     sp_2048_mont_shift_90(a, a);
02392     sp_2048_cond_sub_90(a, a, m, 0 - ((a[89] >> 1) > 0));
02393     sp_2048_norm_90(a);
02394 }
02395 
02396 /* Multiply two Montogmery form numbers mod the modulus (prime).
02397  * (r = a * b mod m)
02398  *
02399  * r   Result of multiplication.
02400  * a   First number to multiply in Montogmery form.
02401  * b   Second number to multiply in Montogmery form.
02402  * m   Modulus (prime).
02403  * mp  Montogmery mulitplier.
02404  */
02405 static void sp_2048_mont_mul_90(sp_digit* r, sp_digit* a, sp_digit* b,
02406         sp_digit* m, sp_digit mp)
02407 {
02408     sp_2048_mul_90(r, a, b);
02409     sp_2048_mont_reduce_90(r, m, mp);
02410 }
02411 
02412 /* Square the Montgomery form number. (r = a * a mod m)
02413  *
02414  * r   Result of squaring.
02415  * a   Number to square in Montogmery form.
02416  * m   Modulus (prime).
02417  * mp  Montogmery mulitplier.
02418  */
02419 static void sp_2048_mont_sqr_90(sp_digit* r, sp_digit* a, sp_digit* m,
02420         sp_digit mp)
02421 {
02422     sp_2048_sqr_90(r, a);
02423     sp_2048_mont_reduce_90(r, m, mp);
02424 }
02425 
02426 /* Multiply a by scalar b into r. (r = a * b)
02427  *
02428  * r  A single precision integer.
02429  * a  A single precision integer.
02430  * b  A scalar.
02431  */
02432 SP_NOINLINE static void sp_2048_mul_d_180(sp_digit* r, const sp_digit* a,
02433     const sp_digit b)
02434 {
02435 #ifdef WOLFSSL_SP_SMALL
02436     int64_t tb = b;
02437     int64_t t = 0;
02438     int i;
02439 
02440     for (i = 0; i < 180; i++) {
02441         t += tb * a[i];
02442         r[i] = t & 0x7fffff;
02443         t >>= 23;
02444     }
02445     r[180] = (sp_digit)t;
02446 #else
02447     int64_t tb = b;
02448     int64_t t[8];
02449     int i;
02450 
02451     t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
02452     for (i = 0; i < 176; i += 8) {
02453         t[1] = tb * a[i+1];
02454         r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
02455         t[2] = tb * a[i+2];
02456         r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
02457         t[3] = tb * a[i+3];
02458         r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
02459         t[4] = tb * a[i+4];
02460         r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
02461         t[5] = tb * a[i+5];
02462         r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
02463         t[6] = tb * a[i+6];
02464         r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
02465         t[7] = tb * a[i+7];
02466         r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
02467         t[0] = tb * a[i+8];
02468         r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
02469     }
02470     t[1] = tb * a[177];
02471     r[177] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
02472     t[2] = tb * a[178];
02473     r[178] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
02474     t[3] = tb * a[179];
02475     r[179] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
02476     r[180] =  (sp_digit)(t[3] >> 23);
02477 #endif /* WOLFSSL_SP_SMALL */
02478 }
02479 
02480 /* Conditionally add a and b using the mask m.
02481  * m is -1 to add and 0 when not.
02482  *
02483  * r  A single precision number representing conditional add result.
02484  * a  A single precision number to add with.
02485  * b  A single precision number to add.
02486  * m  Mask value to apply.
02487  */
02488 static void sp_2048_cond_add_90(sp_digit* r, const sp_digit* a,
02489         const sp_digit* b, const sp_digit m)
02490 {
02491 #ifdef WOLFSSL_SP_SMALL
02492     int i;
02493 
02494     for (i = 0; i < 90; i++)
02495         r[i] = a[i] + (b[i] & m);
02496 #else
02497     int i;
02498 
02499     for (i = 0; i < 88; i += 8) {
02500         r[i + 0] = a[i + 0] + (b[i + 0] & m);
02501         r[i + 1] = a[i + 1] + (b[i + 1] & m);
02502         r[i + 2] = a[i + 2] + (b[i + 2] & m);
02503         r[i + 3] = a[i + 3] + (b[i + 3] & m);
02504         r[i + 4] = a[i + 4] + (b[i + 4] & m);
02505         r[i + 5] = a[i + 5] + (b[i + 5] & m);
02506         r[i + 6] = a[i + 6] + (b[i + 6] & m);
02507         r[i + 7] = a[i + 7] + (b[i + 7] & m);
02508     }
02509     r[88] = a[88] + (b[88] & m);
02510     r[89] = a[89] + (b[89] & m);
02511 #endif /* WOLFSSL_SP_SMALL */
02512 }
02513 
02514 #ifdef WOLFSSL_SMALL
02515 /* Sub b from a into r. (r = a - b)
02516  *
02517  * r  A single precision integer.
02518  * a  A single precision integer.
02519  * b  A single precision integer.
02520  */
02521 SP_NOINLINE static int sp_2048_sub_90(sp_digit* r, const sp_digit* a,
02522         const sp_digit* b)
02523 {
02524     int i;
02525 
02526     for (i = 0; i < 90; i++)
02527         r[i] = a[i] - b[i];
02528 
02529     return 0;
02530 }
02531 
02532 #endif
02533 #ifdef WOLFSSL_SMALL
02534 /* Add b to a into r. (r = a + b)
02535  *
02536  * r  A single precision integer.
02537  * a  A single precision integer.
02538  * b  A single precision integer.
02539  */
02540 SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a,
02541         const sp_digit* b)
02542 {
02543     int i;
02544 
02545     for (i = 0; i < 90; i++)
02546         r[i] = a[i] + b[i];
02547 
02548     return 0;
02549 }
02550 #endif
02551 SP_NOINLINE static void sp_2048_rshift_90(sp_digit* r, sp_digit* a, byte n)
02552 {
02553 #ifdef WOLFSSL_SP_SMALL
02554     int i;
02555 
02556     for (i=0; i<89; i++)
02557         r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
02558 #else
02559     r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff;
02560     r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff;
02561     r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff;
02562     r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff;
02563     r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff;
02564     r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff;
02565     r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff;
02566     r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff;
02567     r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff;
02568     r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff;
02569     r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff;
02570     r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff;
02571     r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff;
02572     r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff;
02573     r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff;
02574     r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff;
02575     r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff;
02576     r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff;
02577     r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff;
02578     r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff;
02579     r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff;
02580     r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff;
02581     r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff;
02582     r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff;
02583     r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff;
02584     r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff;
02585     r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff;
02586     r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff;
02587     r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff;
02588     r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff;
02589     r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff;
02590     r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff;
02591     r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff;
02592     r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff;
02593     r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff;
02594     r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff;
02595     r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff;
02596     r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff;
02597     r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff;
02598     r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff;
02599     r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff;
02600     r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff;
02601     r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff;
02602     r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff;
02603     r[44] = ((a[44] >> n) | (a[45] << (23 - n))) & 0x7fffff;
02604     r[45] = ((a[45] >> n) | (a[46] << (23 - n))) & 0x7fffff;
02605     r[46] = ((a[46] >> n) | (a[47] << (23 - n))) & 0x7fffff;
02606     r[47] = ((a[47] >> n) | (a[48] << (23 - n))) & 0x7fffff;
02607     r[48] = ((a[48] >> n) | (a[49] << (23 - n))) & 0x7fffff;
02608     r[49] = ((a[49] >> n) | (a[50] << (23 - n))) & 0x7fffff;
02609     r[50] = ((a[50] >> n) | (a[51] << (23 - n))) & 0x7fffff;
02610     r[51] = ((a[51] >> n) | (a[52] << (23 - n))) & 0x7fffff;
02611     r[52] = ((a[52] >> n) | (a[53] << (23 - n))) & 0x7fffff;
02612     r[53] = ((a[53] >> n) | (a[54] << (23 - n))) & 0x7fffff;
02613     r[54] = ((a[54] >> n) | (a[55] << (23 - n))) & 0x7fffff;
02614     r[55] = ((a[55] >> n) | (a[56] << (23 - n))) & 0x7fffff;
02615     r[56] = ((a[56] >> n) | (a[57] << (23 - n))) & 0x7fffff;
02616     r[57] = ((a[57] >> n) | (a[58] << (23 - n))) & 0x7fffff;
02617     r[58] = ((a[58] >> n) | (a[59] << (23 - n))) & 0x7fffff;
02618     r[59] = ((a[59] >> n) | (a[60] << (23 - n))) & 0x7fffff;
02619     r[60] = ((a[60] >> n) | (a[61] << (23 - n))) & 0x7fffff;
02620     r[61] = ((a[61] >> n) | (a[62] << (23 - n))) & 0x7fffff;
02621     r[62] = ((a[62] >> n) | (a[63] << (23 - n))) & 0x7fffff;
02622     r[63] = ((a[63] >> n) | (a[64] << (23 - n))) & 0x7fffff;
02623     r[64] = ((a[64] >> n) | (a[65] << (23 - n))) & 0x7fffff;
02624     r[65] = ((a[65] >> n) | (a[66] << (23 - n))) & 0x7fffff;
02625     r[66] = ((a[66] >> n) | (a[67] << (23 - n))) & 0x7fffff;
02626     r[67] = ((a[67] >> n) | (a[68] << (23 - n))) & 0x7fffff;
02627     r[68] = ((a[68] >> n) | (a[69] << (23 - n))) & 0x7fffff;
02628     r[69] = ((a[69] >> n) | (a[70] << (23 - n))) & 0x7fffff;
02629     r[70] = ((a[70] >> n) | (a[71] << (23 - n))) & 0x7fffff;
02630     r[71] = ((a[71] >> n) | (a[72] << (23 - n))) & 0x7fffff;
02631     r[72] = ((a[72] >> n) | (a[73] << (23 - n))) & 0x7fffff;
02632     r[73] = ((a[73] >> n) | (a[74] << (23 - n))) & 0x7fffff;
02633     r[74] = ((a[74] >> n) | (a[75] << (23 - n))) & 0x7fffff;
02634     r[75] = ((a[75] >> n) | (a[76] << (23 - n))) & 0x7fffff;
02635     r[76] = ((a[76] >> n) | (a[77] << (23 - n))) & 0x7fffff;
02636     r[77] = ((a[77] >> n) | (a[78] << (23 - n))) & 0x7fffff;
02637     r[78] = ((a[78] >> n) | (a[79] << (23 - n))) & 0x7fffff;
02638     r[79] = ((a[79] >> n) | (a[80] << (23 - n))) & 0x7fffff;
02639     r[80] = ((a[80] >> n) | (a[81] << (23 - n))) & 0x7fffff;
02640     r[81] = ((a[81] >> n) | (a[82] << (23 - n))) & 0x7fffff;
02641     r[82] = ((a[82] >> n) | (a[83] << (23 - n))) & 0x7fffff;
02642     r[83] = ((a[83] >> n) | (a[84] << (23 - n))) & 0x7fffff;
02643     r[84] = ((a[84] >> n) | (a[85] << (23 - n))) & 0x7fffff;
02644     r[85] = ((a[85] >> n) | (a[86] << (23 - n))) & 0x7fffff;
02645     r[86] = ((a[86] >> n) | (a[87] << (23 - n))) & 0x7fffff;
02646     r[87] = ((a[87] >> n) | (a[88] << (23 - n))) & 0x7fffff;
02647     r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff;
02648 #endif
02649     r[89] = a[89] >> n;
02650 }
02651 
02652 /* Divide d in a and put remainder into r (m*d + r = a)
02653  * m is not calculated as it is not needed at this time.
02654  *
02655  * a  Nmber to be divided.
02656  * d  Number to divide with.
02657  * m  Multiplier result.
02658  * r  Remainder from the division.
02659  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
02660  */
02661 static int sp_2048_div_90(sp_digit* a, sp_digit* d, sp_digit* m,
02662         sp_digit* r)
02663 {
02664     int i;
02665     int64_t d1;
02666     sp_digit div, r1;
02667 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02668     sp_digit* td;
02669 #else
02670     sp_digit t1d[180 + 1], t2d[90 + 1], sdd[90 + 1];
02671 #endif
02672     sp_digit* t1;
02673     sp_digit* t2;
02674     sp_digit* sd;
02675     int err = MP_OKAY;
02676 
02677 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02678     td = XMALLOC(sizeof(sp_digit) * (4 * 90 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
02679     if (td != NULL) {
02680         t1 = td;
02681         t2 = td + 180 + 1;
02682         sd = t2 + 90 + 1;
02683     }
02684     else
02685         err = MEMORY_E;
02686 #else
02687     t1 = t1d;
02688     t2 = t2d;
02689     sd = sdd;
02690 #endif
02691 
02692     (void)m;
02693 
02694     if (err == MP_OKAY) {
02695         sp_2048_mul_d_90(sd, d, 1 << 22);
02696         sp_2048_mul_d_180(t1, a, 1 << 22);
02697         div = sd[89];
02698         for (i=90; i>=0; i--) {
02699             t1[90 + i] += t1[90 + i - 1] >> 23;
02700             t1[90 + i - 1] &= 0x7fffff;
02701             d1 = t1[90 + i];
02702             d1 <<= 23;
02703             d1 += t1[90 + i - 1];
02704             r1 = (sp_digit)(d1 / div);
02705 
02706             sp_2048_mul_d_90(t2, sd, r1);
02707             sp_2048_sub_90(&t1[i], &t1[i], t2);
02708             t1[90 + i] -= t2[90];
02709             t1[90 + i] += t1[90 + i - 1] >> 23;
02710             t1[90 + i - 1] &= 0x7fffff;
02711             r1 = (((-t1[90 + i]) << 23) - t1[90 + i - 1]) / div;
02712             r1 -= t1[90 + i];
02713             sp_2048_mul_d_90(t2, sd, r1);
02714             sp_2048_add_90(&t1[i], &t1[i], t2);
02715             t1[90 + i] += t1[90 + i - 1] >> 23;
02716             t1[90 + i - 1] &= 0x7fffff;
02717         }
02718         t1[90 - 1] += t1[90 - 2] >> 23;
02719         t1[90 - 2] &= 0x7fffff;
02720         d1 = t1[90 - 1];
02721         r1 = (sp_digit)(d1 / div);
02722 
02723         sp_2048_mul_d_90(t2, sd, r1);
02724         sp_2048_sub_90(t1, t1, t2);
02725         XMEMCPY(r, t1, sizeof(*r) * 2 * 90);
02726         for (i=0; i<88; i++) {
02727             r[i+1] += r[i] >> 23;
02728             r[i] &= 0x7fffff;
02729         }
02730         sp_2048_cond_add_90(r, r, sd, 0 - (r[89] < 0));
02731     }
02732 
02733     sp_2048_rshift_90(r, r, 22);
02734 
02735 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
02736     if (td != NULL)
02737         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02738 #endif
02739 
02740     return err;
02741 }
02742 
02743 /* Reduce a modulo m into r. (r = a mod m)
02744  *
02745  * r  A single precision number that is the reduced result.
02746  * a  A single precision number that is to be reduced.
02747  * m  A single precision number that is the modulus to reduce with.
02748  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
02749  */
02750 static int sp_2048_mod_90(sp_digit* r, sp_digit* a, sp_digit* m)
02751 {
02752     return sp_2048_div_90(a, m, NULL, r);
02753 }
02754 
02755 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
02756 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
02757  *
02758  * r     A single precision number that is the result of the operation.
02759  * a     A single precision number being exponentiated.
02760  * e     A single precision number that is the exponent.
02761  * bits  The number of bits in the exponent.
02762  * m     A single precision number that is the modulus.
02763  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
02764  */
02765 static int sp_2048_mod_exp_90(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
02766     sp_digit* m, int reduceA)
02767 {
02768 #ifdef WOLFSSL_SP_SMALL
02769     sp_digit* td;
02770     sp_digit* t[3];
02771     sp_digit* norm;
02772     sp_digit mp = 1;
02773     sp_digit n;
02774     int i;
02775     int c, y;
02776     int err = MP_OKAY;
02777 
02778     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL,
02779                             DYNAMIC_TYPE_TMP_BUFFER);
02780     if (td == NULL)
02781         err = MEMORY_E;
02782 
02783     if (err == MP_OKAY) {
02784         XMEMSET(td, 0, sizeof(*td) * 3 * 90 * 2);
02785 
02786         norm = t[0] = td;
02787         t[1] = &td[90 * 2];
02788         t[2] = &td[2 * 90 * 2];
02789 
02790         sp_2048_mont_setup(m, &mp);
02791         sp_2048_mont_norm_90(norm, m);
02792 
02793         if (reduceA)
02794             err = sp_2048_mod_90(t[1], a, m);
02795         else
02796             XMEMCPY(t[1], a, sizeof(sp_digit) * 90);
02797     }
02798     if (err == MP_OKAY) {
02799         sp_2048_mul_90(t[1], t[1], norm);
02800         err = sp_2048_mod_90(t[1], t[1], m);
02801     }
02802 
02803     if (err == MP_OKAY) {
02804         i = bits / 23;
02805         c = bits % 23;
02806         n = e[i--] << (23 - c);
02807         for (; ; c--) {
02808             if (c == 0) {
02809                 if (i == -1)
02810                     break;
02811 
02812                 n = e[i--];
02813                 c = 23;
02814             }
02815 
02816             y = (n >> 22) & 1;
02817             n <<= 1;
02818 
02819             sp_2048_mont_mul_90(t[y^1], t[0], t[1], m, mp);
02820 
02821             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
02822                                   ((size_t)t[1] & addr_mask[y])),
02823                     sizeof(*t[2]) * 90 * 2);
02824             sp_2048_mont_sqr_90(t[2], t[2], m, mp);
02825             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
02826                             ((size_t)t[1] & addr_mask[y])), t[2],
02827                     sizeof(*t[2]) * 90 * 2);
02828         }
02829 
02830         sp_2048_mont_reduce_90(t[0], m, mp);
02831         n = sp_2048_cmp_90(t[0], m);
02832         sp_2048_cond_sub_90(t[0], t[0], m, (n < 0) - 1);
02833         XMEMCPY(r, t[0], sizeof(*r) * 90 * 2);
02834 
02835     }
02836 
02837     if (td != NULL)
02838         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02839 
02840     return err;
02841 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
02842 #ifndef WOLFSSL_SMALL_STACK
02843     sp_digit t[3][180];
02844 #else
02845     sp_digit* td;
02846     sp_digit* t[3];
02847 #endif
02848     sp_digit* norm;
02849     sp_digit mp = 1;
02850     sp_digit n;
02851     int i;
02852     int c, y;
02853     int err = MP_OKAY;
02854 
02855 #ifdef WOLFSSL_SMALL_STACK
02856     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 90 * 2, NULL,
02857                             DYNAMIC_TYPE_TMP_BUFFER);
02858     if (td == NULL)
02859         err = MEMORY_E;
02860 
02861     if (err == MP_OKAY) {
02862         t[0] = td;
02863         t[1] = &td[90 * 2];
02864         t[2] = &td[2 * 90 * 2];
02865         norm = t[0];
02866     }
02867 #else
02868     norm = t[0];
02869 #endif
02870 
02871     if (err == MP_OKAY) {
02872         sp_2048_mont_setup(m, &mp);
02873         sp_2048_mont_norm_90(norm, m);
02874 
02875         if (reduceA) {
02876             err = sp_2048_mod_90(t[1], a, m);
02877             if (err == MP_OKAY) {
02878                 sp_2048_mul_90(t[1], t[1], norm);
02879                 err = sp_2048_mod_90(t[1], t[1], m);
02880             }
02881         }
02882         else {
02883             sp_2048_mul_90(t[1], a, norm);
02884             err = sp_2048_mod_90(t[1], t[1], m);
02885         }
02886     }
02887 
02888     if (err == MP_OKAY) {
02889         i = bits / 23;
02890         c = bits % 23;
02891         n = e[i--] << (23 - c);
02892         for (; ; c--) {
02893             if (c == 0) {
02894                 if (i == -1)
02895                     break;
02896 
02897                 n = e[i--];
02898                 c = 23;
02899             }
02900 
02901             y = (n >> 22) & 1;
02902             n <<= 1;
02903 
02904             sp_2048_mont_mul_90(t[y^1], t[0], t[1], m, mp);
02905 
02906             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
02907                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
02908             sp_2048_mont_sqr_90(t[2], t[2], m, mp);
02909             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
02910                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
02911         }
02912 
02913         sp_2048_mont_reduce_90(t[0], m, mp);
02914         n = sp_2048_cmp_90(t[0], m);
02915         sp_2048_cond_sub_90(t[0], t[0], m, (n < 0) - 1);
02916         XMEMCPY(r, t[0], sizeof(t[0]));
02917     }
02918 
02919 #ifdef WOLFSSL_SMALL_STACK
02920     if (td != NULL)
02921         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
02922 #endif
02923 
02924     return err;
02925 #else
02926 #ifndef WOLFSSL_SMALL_STACK
02927     sp_digit t[32][180];
02928 #else
02929     sp_digit* t[32];
02930     sp_digit* td;
02931 #endif
02932     sp_digit* norm;
02933     sp_digit rt[180];
02934     sp_digit mp = 1;
02935     sp_digit n;
02936     int i;
02937     int c, y;
02938     int err = MP_OKAY;
02939 
02940 #ifdef WOLFSSL_SMALL_STACK
02941     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 180, NULL,
02942                             DYNAMIC_TYPE_TMP_BUFFER);
02943     if (td == NULL)
02944         err = MEMORY_E;
02945 
02946     if (err == MP_OKAY) {
02947         for (i=0; i<32; i++)
02948             t[i] = td + i * 180;
02949         norm = t[0];
02950     }
02951 #else
02952     norm = t[0];
02953 #endif
02954 
02955     if (err == MP_OKAY) {
02956         sp_2048_mont_setup(m, &mp);
02957         sp_2048_mont_norm_90(norm, m);
02958 
02959         if (reduceA) {
02960             err = sp_2048_mod_90(t[1], a, m);
02961             if (err == MP_OKAY) {
02962                 sp_2048_mul_90(t[1], t[1], norm);
02963                 err = sp_2048_mod_90(t[1], t[1], m);
02964             }
02965         }
02966         else {
02967             sp_2048_mul_90(t[1], a, norm);
02968             err = sp_2048_mod_90(t[1], t[1], m);
02969         }
02970     }
02971 
02972     if (err == MP_OKAY) {
02973         sp_2048_mont_sqr_90(t[ 2], t[ 1], m, mp);
02974         sp_2048_mont_mul_90(t[ 3], t[ 2], t[ 1], m, mp);
02975         sp_2048_mont_sqr_90(t[ 4], t[ 2], m, mp);
02976         sp_2048_mont_mul_90(t[ 5], t[ 3], t[ 2], m, mp);
02977         sp_2048_mont_sqr_90(t[ 6], t[ 3], m, mp);
02978         sp_2048_mont_mul_90(t[ 7], t[ 4], t[ 3], m, mp);
02979         sp_2048_mont_sqr_90(t[ 8], t[ 4], m, mp);
02980         sp_2048_mont_mul_90(t[ 9], t[ 5], t[ 4], m, mp);
02981         sp_2048_mont_sqr_90(t[10], t[ 5], m, mp);
02982         sp_2048_mont_mul_90(t[11], t[ 6], t[ 5], m, mp);
02983         sp_2048_mont_sqr_90(t[12], t[ 6], m, mp);
02984         sp_2048_mont_mul_90(t[13], t[ 7], t[ 6], m, mp);
02985         sp_2048_mont_sqr_90(t[14], t[ 7], m, mp);
02986         sp_2048_mont_mul_90(t[15], t[ 8], t[ 7], m, mp);
02987         sp_2048_mont_sqr_90(t[16], t[ 8], m, mp);
02988         sp_2048_mont_mul_90(t[17], t[ 9], t[ 8], m, mp);
02989         sp_2048_mont_sqr_90(t[18], t[ 9], m, mp);
02990         sp_2048_mont_mul_90(t[19], t[10], t[ 9], m, mp);
02991         sp_2048_mont_sqr_90(t[20], t[10], m, mp);
02992         sp_2048_mont_mul_90(t[21], t[11], t[10], m, mp);
02993         sp_2048_mont_sqr_90(t[22], t[11], m, mp);
02994         sp_2048_mont_mul_90(t[23], t[12], t[11], m, mp);
02995         sp_2048_mont_sqr_90(t[24], t[12], m, mp);
02996         sp_2048_mont_mul_90(t[25], t[13], t[12], m, mp);
02997         sp_2048_mont_sqr_90(t[26], t[13], m, mp);
02998         sp_2048_mont_mul_90(t[27], t[14], t[13], m, mp);
02999         sp_2048_mont_sqr_90(t[28], t[14], m, mp);
03000         sp_2048_mont_mul_90(t[29], t[15], t[14], m, mp);
03001         sp_2048_mont_sqr_90(t[30], t[15], m, mp);
03002         sp_2048_mont_mul_90(t[31], t[16], t[15], m, mp);
03003 
03004         bits = ((bits + 4) / 5) * 5;
03005         i = ((bits + 22) / 23) - 1;
03006         c = bits % 23;
03007         if (c == 0)
03008             c = 23;
03009         if (i < 90)
03010             n = e[i--] << (32 - c);
03011         else {
03012             n = 0;
03013             i--;
03014         }
03015         if (c < 5) {
03016             n |= e[i--] << (9 - c);
03017             c += 23;
03018         }
03019         y = n >> 27;
03020         n <<= 5;
03021         c -= 5;
03022         XMEMCPY(rt, t[y], sizeof(rt));
03023         for (; i>=0 || c>=5; ) {
03024             if (c < 5) {
03025                 n |= e[i--] << (9 - c);
03026                 c += 23;
03027             }
03028             y = (n >> 27) & 0x1f;
03029             n <<= 5;
03030             c -= 5;
03031 
03032             sp_2048_mont_sqr_90(rt, rt, m, mp);
03033             sp_2048_mont_sqr_90(rt, rt, m, mp);
03034             sp_2048_mont_sqr_90(rt, rt, m, mp);
03035             sp_2048_mont_sqr_90(rt, rt, m, mp);
03036             sp_2048_mont_sqr_90(rt, rt, m, mp);
03037 
03038             sp_2048_mont_mul_90(rt, rt, t[y], m, mp);
03039         }
03040 
03041         sp_2048_mont_reduce_90(rt, m, mp);
03042         n = sp_2048_cmp_90(rt, m);
03043         sp_2048_cond_sub_90(rt, rt, m, (n < 0) - 1);
03044         XMEMCPY(r, rt, sizeof(rt));
03045     }
03046 
03047 #ifdef WOLFSSL_SMALL_STACK
03048     if (td != NULL)
03049         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03050 #endif
03051 
03052     return err;
03053 #endif
03054 }
03055 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
03056 
03057 #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
03058                                     !defined(RSA_LOW_MEM)
03059 /* AND m into each word of a and store in r.
03060  *
03061  * r  A single precision integer.
03062  * a  A single precision integer.
03063  * m  Mask to AND against each digit.
03064  */
03065 static void sp_2048_mask_45(sp_digit* r, sp_digit* a, sp_digit m)
03066 {
03067 #ifdef WOLFSSL_SP_SMALL
03068     int i;
03069 
03070     for (i=0; i<45; i++)
03071         r[i] = a[i] & m;
03072 #else
03073     int i;
03074 
03075     for (i = 0; i < 40; i += 8) {
03076         r[i+0] = a[i+0] & m;
03077         r[i+1] = a[i+1] & m;
03078         r[i+2] = a[i+2] & m;
03079         r[i+3] = a[i+3] & m;
03080         r[i+4] = a[i+4] & m;
03081         r[i+5] = a[i+5] & m;
03082         r[i+6] = a[i+6] & m;
03083         r[i+7] = a[i+7] & m;
03084     }
03085     r[40] = a[40] & m;
03086     r[41] = a[41] & m;
03087     r[42] = a[42] & m;
03088     r[43] = a[43] & m;
03089     r[44] = a[44] & m;
03090 #endif
03091 }
03092 
03093 #endif
03094 #ifdef WOLFSSL_HAVE_SP_RSA
03095 /* RSA public key operation.
03096  *
03097  * in      Array of bytes representing the number to exponentiate, base.
03098  * inLen   Number of bytes in base.
03099  * em      Public exponent.
03100  * mm      Modulus.
03101  * out     Buffer to hold big-endian bytes of exponentiation result.
03102  *         Must be at least 256 bytes long.
03103  * outLen  Number of bytes in result.
03104  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
03105  * an array is too long and MEMORY_E when dynamic memory allocation fails.
03106  */
03107 int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
03108     byte* out, word32* outLen)
03109 {
03110 #ifdef WOLFSSL_SP_SMALL
03111     sp_digit* d = NULL;
03112     sp_digit* a;
03113     sp_digit* m;
03114     sp_digit* r;
03115     sp_digit* norm;
03116     sp_digit e[1];
03117     sp_digit mp;
03118     int i;
03119     int err = MP_OKAY;
03120 
03121     if (*outLen < 256)
03122         err = MP_TO_E;
03123     if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 256 ||
03124                                                      mp_count_bits(mm) != 2048))
03125         err = MP_READ_E;
03126 
03127     if (err == MP_OKAY) {
03128         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL,
03129                                DYNAMIC_TYPE_TMP_BUFFER);
03130         if (d == NULL)
03131             err = MEMORY_E;
03132     }
03133 
03134     if (err == MP_OKAY) {
03135         a = d;
03136         r = a + 90 * 2;
03137         m = r + 90 * 2;
03138         norm = r;
03139 
03140         sp_2048_from_bin(a, 90, in, inLen);
03141 #if DIGIT_BIT >= 23
03142         e[0] = em->dp[0];
03143 #else
03144         e[0] = em->dp[0];
03145         if (em->used > 1)
03146             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
03147 #endif
03148         if (e[0] == 0)
03149             err = MP_EXPTMOD_E;
03150     }
03151 
03152     if (err == MP_OKAY) {
03153         sp_2048_from_mp(m, 90, mm);
03154 
03155         sp_2048_mont_setup(m, &mp);
03156         sp_2048_mont_norm_90(norm, m);
03157     }
03158     if (err == MP_OKAY) {
03159         sp_2048_mul_90(a, a, norm);
03160         err = sp_2048_mod_90(a, a, m);
03161     }
03162     if (err == MP_OKAY) {
03163         for (i=22; i>=0; i--)
03164             if (e[0] >> i)
03165                 break;
03166 
03167         XMEMCPY(r, a, sizeof(sp_digit) * 90 * 2);
03168         for (i--; i>=0; i--) {
03169             sp_2048_mont_sqr_90(r, r, m, mp);
03170 
03171             if (((e[0] >> i) & 1) == 1)
03172                 sp_2048_mont_mul_90(r, r, a, m, mp);
03173         }
03174         sp_2048_mont_reduce_90(r, m, mp);
03175         mp = sp_2048_cmp_90(r, m);
03176         sp_2048_cond_sub_90(r, r, m, (mp < 0) - 1);
03177 
03178         sp_2048_to_bin(r, out);
03179         *outLen = 256;
03180     }
03181 
03182     if (d != NULL)
03183         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03184 
03185     return err;
03186 #else
03187 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
03188     sp_digit ad[180], md[90], rd[180];
03189 #else
03190     sp_digit* d = NULL;
03191 #endif
03192     sp_digit* a;
03193     sp_digit* m;
03194     sp_digit* r;
03195     sp_digit e[1];
03196     int err = MP_OKAY;
03197 
03198     if (*outLen < 256)
03199         err = MP_TO_E;
03200     if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 256 ||
03201                                                      mp_count_bits(mm) != 2048))
03202         err = MP_READ_E;
03203 
03204 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
03205     if (err == MP_OKAY) {
03206         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 5, NULL,
03207                                DYNAMIC_TYPE_TMP_BUFFER);
03208         if (d == NULL)
03209             err = MEMORY_E;
03210     }
03211 
03212     if (err == MP_OKAY) {
03213         a = d;
03214         r = a + 90 * 2;
03215         m = r + 90 * 2;
03216     }
03217 #else
03218     a = ad;
03219     m = md;
03220     r = rd;
03221 #endif
03222 
03223     if (err == MP_OKAY) {
03224         sp_2048_from_bin(a, 90, in, inLen);
03225 #if DIGIT_BIT >= 23
03226         e[0] = em->dp[0];
03227 #else
03228         e[0] = em->dp[0];
03229         if (em->used > 1)
03230             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
03231 #endif
03232         if (e[0] == 0)
03233             err = MP_EXPTMOD_E;
03234     }
03235     if (err == MP_OKAY) {
03236         sp_2048_from_mp(m, 90, mm);
03237 
03238         if (e[0] == 0x3) {
03239             if (err == MP_OKAY) {
03240                 sp_2048_sqr_90(r, a);
03241                 err = sp_2048_mod_90(r, r, m);
03242             }
03243             if (err == MP_OKAY) {
03244                 sp_2048_mul_90(r, a, r);
03245                 err = sp_2048_mod_90(r, r, m);
03246             }
03247         }
03248         else {
03249             sp_digit* norm = r;
03250             int i;
03251             sp_digit mp;
03252 
03253             sp_2048_mont_setup(m, &mp);
03254             sp_2048_mont_norm_90(norm, m);
03255 
03256             if (err == MP_OKAY) {
03257                 sp_2048_mul_90(a, a, norm);
03258                 err = sp_2048_mod_90(a, a, m);
03259             }
03260 
03261             if (err == MP_OKAY) {
03262                 for (i=22; i>=0; i--)
03263                     if (e[0] >> i)
03264                         break;
03265 
03266                 XMEMCPY(r, a, sizeof(sp_digit) * 180);
03267                 for (i--; i>=0; i--) {
03268                     sp_2048_mont_sqr_90(r, r, m, mp);
03269 
03270                     if (((e[0] >> i) & 1) == 1)
03271                         sp_2048_mont_mul_90(r, r, a, m, mp);
03272                 }
03273                 sp_2048_mont_reduce_90(r, m, mp);
03274                 mp = sp_2048_cmp_90(r, m);
03275                 sp_2048_cond_sub_90(r, r, m, (mp < 0) - 1);
03276             }
03277         }
03278     }
03279 
03280     if (err == MP_OKAY) {
03281         sp_2048_to_bin(r, out);
03282         *outLen = 256;
03283     }
03284 
03285 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
03286     if (d != NULL)
03287         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03288 #endif
03289 
03290     return err;
03291 #endif /* WOLFSSL_SP_SMALL */
03292 }
03293 
03294 /* RSA private key operation.
03295  *
03296  * in      Array of bytes representing the number to exponentiate, base.
03297  * inLen   Number of bytes in base.
03298  * dm      Private exponent.
03299  * pm      First prime.
03300  * qm      Second prime.
03301  * dpm     First prime's CRT exponent.
03302  * dqm     Second prime's CRT exponent.
03303  * qim     Inverse of second prime mod p.
03304  * mm      Modulus.
03305  * out     Buffer to hold big-endian bytes of exponentiation result.
03306  *         Must be at least 256 bytes long.
03307  * outLen  Number of bytes in result.
03308  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
03309  * an array is too long and MEMORY_E when dynamic memory allocation fails.
03310  */
03311 int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
03312     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
03313     byte* out, word32* outLen)
03314 {
03315 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
03316 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
03317     sp_digit* a;
03318     sp_digit* d = NULL;
03319     sp_digit* m;
03320     sp_digit* r;
03321     int err = MP_OKAY;
03322 
03323     (void)pm;
03324     (void)qm;
03325     (void)dpm;
03326     (void)dqm;
03327     (void)qim;
03328 
03329     if (*outLen < 256)
03330         err = MP_TO_E;
03331     if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
03332                                                      mp_count_bits(mm) != 2048))
03333         err = MP_READ_E;
03334 
03335     if (err == MP_OKAY) {
03336         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 90 * 4, NULL,
03337                                DYNAMIC_TYPE_TMP_BUFFER);
03338         if (d == NULL)
03339             err = MEMORY_E;
03340     }
03341     if (err == MP_OKAY) {
03342         a = d + 90;
03343         m = a + 90;
03344         r = a;
03345 
03346         sp_2048_from_bin(a, 90, in, inLen);
03347         sp_2048_from_mp(d, 90, dm);
03348         sp_2048_from_mp(m, 90, mm);
03349         err = sp_2048_mod_exp_90(r, a, d, 2048, m, 0);
03350     }
03351     if (err == MP_OKAY) {
03352         sp_2048_to_bin(r, out);
03353         *outLen = 256;
03354     }
03355 
03356     if (d != NULL) {
03357         XMEMSET(d, 0, sizeof(sp_digit) * 90);
03358         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03359     }
03360 
03361     return err;
03362 #else
03363     sp_digit a[180], d[90], m[90];
03364     sp_digit* r = a;
03365     int err = MP_OKAY;
03366 
03367     (void)pm;
03368     (void)qm;
03369     (void)dpm;
03370     (void)dqm;
03371     (void)qim;
03372 
03373     if (*outLen < 256)
03374         err = MP_TO_E;
03375     if (err == MP_OKAY && (mp_count_bits(dm) > 2048 || inLen > 256 ||
03376                                                      mp_count_bits(mm) != 2048))
03377         err = MP_READ_E;
03378 
03379     if (err == MP_OKAY) {
03380         sp_2048_from_bin(a, 90, in, inLen);
03381         sp_2048_from_mp(d, 90, dm);
03382         sp_2048_from_mp(m, 90, mm);
03383         err = sp_2048_mod_exp_90(r, a, d, 2048, m, 0);
03384     }
03385 
03386     if (err == MP_OKAY) {
03387         sp_2048_to_bin(r, out);
03388         *outLen = 256;
03389     }
03390 
03391     XMEMSET(d, 0, sizeof(sp_digit) * 90);
03392 
03393     return err;
03394 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
03395 #else
03396 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
03397     sp_digit* t = NULL;
03398     sp_digit* a;
03399     sp_digit* p;
03400     sp_digit* q;
03401     sp_digit* dp;
03402     sp_digit* dq;
03403     sp_digit* qi;
03404     sp_digit* tmp;
03405     sp_digit* tmpa;
03406     sp_digit* tmpb;
03407     sp_digit* r;
03408     int err = MP_OKAY;
03409 
03410     (void)dm;
03411     (void)mm;
03412 
03413     if (*outLen < 256)
03414         err = MP_TO_E;
03415     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
03416         err = MP_READ_E;
03417 
03418     if (err == MP_OKAY) {
03419         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 45 * 11, NULL,
03420                                DYNAMIC_TYPE_TMP_BUFFER);
03421         if (t == NULL)
03422             err = MEMORY_E;
03423     }
03424     if (err == MP_OKAY) {
03425         a = t;
03426         p = a + 90 * 2;
03427         q = p + 45;
03428         qi = dq = dp = q + 45;
03429         tmpa = qi + 45;
03430         tmpb = tmpa + 90;
03431 
03432         tmp = t;
03433         r = tmp + 90;
03434 
03435         sp_2048_from_bin(a, 90, in, inLen);
03436         sp_2048_from_mp(p, 45, pm);
03437         sp_2048_from_mp(q, 45, qm);
03438         sp_2048_from_mp(dp, 45, dpm);
03439         err = sp_2048_mod_exp_45(tmpa, a, dp, 1024, p, 1);
03440     }
03441     if (err == MP_OKAY) {
03442         sp_2048_from_mp(dq, 45, dqm);
03443         err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1);
03444     }
03445     if (err == MP_OKAY) {
03446         sp_2048_sub_45(tmpa, tmpa, tmpb);
03447         sp_2048_mask_45(tmp, p, tmpa[44] >> 31);
03448         sp_2048_add_45(tmpa, tmpa, tmp);
03449 
03450         sp_2048_from_mp(qi, 45, qim);
03451         sp_2048_mul_45(tmpa, tmpa, qi);
03452         err = sp_2048_mod_45(tmpa, tmpa, p);
03453     }
03454 
03455     if (err == MP_OKAY) {
03456         sp_2048_mul_45(tmpa, q, tmpa);
03457         sp_2048_add_90(r, tmpb, tmpa);
03458         sp_2048_norm_90(r);
03459 
03460         sp_2048_to_bin(r, out);
03461         *outLen = 256;
03462     }
03463 
03464     if (t != NULL) {
03465         XMEMSET(t, 0, sizeof(sp_digit) * 45 * 11);
03466         XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03467     }
03468 
03469     return err;
03470 #else
03471     sp_digit a[90 * 2];
03472     sp_digit p[45], q[45], dp[45], dq[45], qi[45];
03473     sp_digit tmp[90], tmpa[90], tmpb[90];
03474     sp_digit* r = a;
03475     int err = MP_OKAY;
03476 
03477     (void)dm;
03478     (void)mm;
03479 
03480     if (*outLen < 256)
03481         err = MP_TO_E;
03482     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
03483         err = MP_READ_E;
03484 
03485     if (err == MP_OKAY) {
03486         sp_2048_from_bin(a, 90, in, inLen);
03487         sp_2048_from_mp(p, 45, pm);
03488         sp_2048_from_mp(q, 45, qm);
03489         sp_2048_from_mp(dp, 45, dpm);
03490         sp_2048_from_mp(dq, 45, dqm);
03491         sp_2048_from_mp(qi, 45, qim);
03492 
03493         err = sp_2048_mod_exp_45(tmpa, a, dp, 1024, p, 1);
03494     }
03495     if (err == MP_OKAY)
03496         err = sp_2048_mod_exp_45(tmpb, a, dq, 1024, q, 1);
03497 
03498     if (err == MP_OKAY) {
03499         sp_2048_sub_45(tmpa, tmpa, tmpb);
03500         sp_2048_mask_45(tmp, p, tmpa[44] >> 31);
03501         sp_2048_add_45(tmpa, tmpa, tmp);
03502         sp_2048_mul_45(tmpa, tmpa, qi);
03503         err = sp_2048_mod_45(tmpa, tmpa, p);
03504     }
03505 
03506     if (err == MP_OKAY) {
03507         sp_2048_mul_45(tmpa, tmpa, q);
03508         sp_2048_add_90(r, tmpb, tmpa);
03509         sp_2048_norm_90(r);
03510 
03511         sp_2048_to_bin(r, out);
03512         *outLen = 256;
03513     }
03514 
03515     XMEMSET(tmpa, 0, sizeof(tmpa));
03516     XMEMSET(tmpb, 0, sizeof(tmpb));
03517     XMEMSET(p, 0, sizeof(p));
03518     XMEMSET(q, 0, sizeof(q));
03519     XMEMSET(dp, 0, sizeof(dp));
03520     XMEMSET(dq, 0, sizeof(dq));
03521     XMEMSET(qi, 0, sizeof(qi));
03522 
03523     return err;
03524 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
03525 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
03526 }
03527 
03528 #endif /* WOLFSSL_HAVE_SP_RSA */
03529 #ifdef WOLFSSL_HAVE_SP_DH
03530 /* Convert an array of sp_digit to an mp_int.
03531  *
03532  * a  A single precision integer.
03533  * r  A multi-precision integer.
03534  */
03535 static int sp_2048_to_mp(sp_digit* a, mp_int* r)
03536 {
03537     int err;
03538 
03539     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
03540     if (err == MP_OKAY) {
03541 #if DIGIT_BIT == 23
03542         XMEMCPY(r->dp, a, sizeof(sp_digit) * 90);
03543         r->used = 90;
03544         mp_clamp(r);
03545 #elif DIGIT_BIT < 23
03546         int i, j = 0, s = 0;
03547 
03548         r->dp[0] = 0;
03549         for (i = 0; i < 90; i++) {
03550             r->dp[j] |= a[i] << s;
03551             r->dp[j] &= (1l << DIGIT_BIT) - 1;
03552             s = DIGIT_BIT - s;
03553             r->dp[++j] = a[i] >> s;
03554             while (s + DIGIT_BIT <= 23) {
03555                 s += DIGIT_BIT;
03556                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
03557                 r->dp[++j] = a[i] >> s;
03558             }
03559             s = 23 - s;
03560         }
03561         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
03562         mp_clamp(r);
03563 #else
03564         int i, j = 0, s = 0;
03565 
03566         r->dp[0] = 0;
03567         for (i = 0; i < 90; i++) {
03568             r->dp[j] |= ((mp_digit)a[i]) << s;
03569             if (s + 23 >= DIGIT_BIT) {
03570     #if DIGIT_BIT < 32
03571                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
03572     #endif
03573                 s = DIGIT_BIT - s;
03574                 r->dp[++j] = a[i] >> s;
03575                 s = 23 - s;
03576             }
03577             else
03578                 s += 23;
03579         }
03580         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
03581         mp_clamp(r);
03582 #endif
03583     }
03584 
03585     return err;
03586 }
03587 
03588 /* Perform the modular exponentiation for Diffie-Hellman.
03589  *
03590  * base  Base. MP integer.
03591  * exp   Exponent. MP integer.
03592  * mod   Modulus. MP integer.
03593  * res   Result. MP integer.
03594  * returs 0 on success, MP_READ_E if there are too many bytes in an array
03595  * and MEMORY_E if memory allocation fails.
03596  */
03597 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
03598 {
03599 #ifdef WOLFSSL_SP_SMALL
03600     int err = MP_OKAY;
03601     sp_digit* d = NULL;
03602     sp_digit* b;
03603     sp_digit* e;
03604     sp_digit* m;
03605     sp_digit* r;
03606     int expBits = mp_count_bits(exp);
03607 
03608     if (mp_count_bits(base) > 2048 || expBits > 2048 ||
03609                                                    mp_count_bits(mod) != 2048) {
03610         err = MP_READ_E;
03611     }
03612 
03613     if (err == MP_OKAY) {
03614         d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
03615                                DYNAMIC_TYPE_TMP_BUFFER);
03616         if (d == NULL)
03617             err = MEMORY_E;
03618     }
03619 
03620     if (err == MP_OKAY) {
03621         b = d;
03622         e = b + 90 * 2;
03623         m = e + 90;
03624         r = b;
03625 
03626         sp_2048_from_mp(b, 90, base);
03627         sp_2048_from_mp(e, 90, exp);
03628         sp_2048_from_mp(m, 90, mod);
03629 
03630         err = sp_2048_mod_exp_90(r, b, e, mp_count_bits(exp), m, 0);
03631     }
03632 
03633     if (err == MP_OKAY) {
03634         err = sp_2048_to_mp(r, res);
03635     }
03636 
03637     if (d != NULL) {
03638         XMEMSET(e, 0, sizeof(sp_digit) * 90);
03639         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03640     }
03641     return err;
03642 #else
03643 #ifndef WOLFSSL_SMALL_STACK
03644     sp_digit bd[180], ed[90], md[90];
03645 #else
03646     sp_digit* d = NULL;
03647 #endif
03648     sp_digit* b;
03649     sp_digit* e;
03650     sp_digit* m;
03651     sp_digit* r;
03652     int err = MP_OKAY;
03653     int expBits = mp_count_bits(exp);
03654 
03655     if (mp_count_bits(base) > 2048 || expBits > 2048 ||
03656                                                    mp_count_bits(mod) != 2048) {
03657         err = MP_READ_E;
03658     }
03659 
03660 #ifdef WOLFSSL_SMALL_STACK
03661     if (err == MP_OKAY) {
03662         d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
03663                                DYNAMIC_TYPE_TMP_BUFFER);
03664         if (d == NULL)
03665             err = MEMORY_E;
03666     }
03667 
03668     if (err == MP_OKAY) {
03669         b = d;
03670         e = b + 90 * 2;
03671         m = e + 90;
03672         r = b;
03673     }
03674 #else
03675     r = b = bd;
03676     e = ed;
03677     m = md;
03678 #endif
03679 
03680     if (err == MP_OKAY) {
03681         sp_2048_from_mp(b, 90, base);
03682         sp_2048_from_mp(e, 90, exp);
03683         sp_2048_from_mp(m, 90, mod);
03684 
03685         err = sp_2048_mod_exp_90(r, b, e, expBits, m, 0);
03686     }
03687 
03688     if (err == MP_OKAY) {
03689         err = sp_2048_to_mp(r, res);
03690     }
03691 
03692     XMEMSET(e, 0, sizeof(sp_digit) * 90);
03693 
03694 #ifdef WOLFSSL_SMALL_STACK
03695     if (d != NULL)
03696         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03697 #endif
03698 
03699     return err;
03700 #endif
03701 }
03702 
03703 /* Perform the modular exponentiation for Diffie-Hellman.
03704  *
03705  * base     Base.
03706  * exp      Array of bytes that is the exponent.
03707  * expLen   Length of data, in bytes, in exponent.
03708  * mod      Modulus.
03709  * out      Buffer to hold big-endian bytes of exponentiation result.
03710  *          Must be at least 256 bytes long.
03711  * outLen   Length, in bytes, of exponentiation result.
03712  * returs 0 on success, MP_READ_E if there are too many bytes in an array
03713  * and MEMORY_E if memory allocation fails.
03714  */
03715 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
03716     mp_int* mod, byte* out, word32* outLen)
03717 {
03718 #ifdef WOLFSSL_SP_SMALL
03719     int err = MP_OKAY;
03720     sp_digit* d = NULL;
03721     sp_digit* b;
03722     sp_digit* e;
03723     sp_digit* m;
03724     sp_digit* r;
03725     word32 i;
03726 
03727     if (mp_count_bits(base) > 2048 || expLen > 256 ||
03728                                                    mp_count_bits(mod) != 2048) {
03729         err = MP_READ_E;
03730     }
03731 
03732     if (err == MP_OKAY) {
03733         d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
03734                                DYNAMIC_TYPE_TMP_BUFFER);
03735         if (d == NULL)
03736             err = MEMORY_E;
03737     }
03738 
03739     if (err == MP_OKAY) {
03740         b = d;
03741         e = b + 90 * 2;
03742         m = e + 90;
03743         r = b;
03744 
03745         sp_2048_from_mp(b, 90, base);
03746         sp_2048_from_bin(e, 90, exp, expLen);
03747         sp_2048_from_mp(m, 90, mod);
03748 
03749         err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
03750     }
03751 
03752     if (err == MP_OKAY) {
03753         sp_2048_to_bin(r, out);
03754         *outLen = 256;
03755         for (i=0; i<256 && out[i] == 0; i++) {
03756         }
03757         *outLen -= i;
03758         XMEMMOVE(out, out + i, *outLen);
03759     }
03760 
03761     if (d != NULL) {
03762         XMEMSET(e, 0, sizeof(sp_digit) * 90);
03763         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03764     }
03765     return err;
03766 #else
03767 #ifndef WOLFSSL_SMALL_STACK
03768     sp_digit bd[180], ed[90], md[90];
03769 #else
03770     sp_digit* d = NULL;
03771 #endif
03772     sp_digit* b;
03773     sp_digit* e;
03774     sp_digit* m;
03775     sp_digit* r;
03776     word32 i;
03777     int err = MP_OKAY;
03778 
03779     if (mp_count_bits(base) > 2048 || expLen > 256 ||
03780                                                    mp_count_bits(mod) != 2048) {
03781         err = MP_READ_E;
03782     }
03783 
03784 #ifdef WOLFSSL_SMALL_STACK
03785     if (err == MP_OKAY) {
03786         d = (sp_digit*)XMALLOC(sizeof(*d) * 90 * 4, NULL,
03787                                DYNAMIC_TYPE_TMP_BUFFER);
03788         if (d == NULL)
03789             err = MEMORY_E;
03790     }
03791 
03792     if (err == MP_OKAY) {
03793         b = d;
03794         e = b + 90 * 2;
03795         m = e + 90;
03796         r = b;
03797     }
03798 #else
03799     r = b = bd;
03800     e = ed;
03801     m = md;
03802 #endif
03803 
03804     if (err == MP_OKAY) {
03805         sp_2048_from_mp(b, 90, base);
03806         sp_2048_from_bin(e, 90, exp, expLen);
03807         sp_2048_from_mp(m, 90, mod);
03808 
03809         err = sp_2048_mod_exp_90(r, b, e, expLen * 8, m, 0);
03810     }
03811 
03812     if (err == MP_OKAY) {
03813         sp_2048_to_bin(r, out);
03814         *outLen = 256;
03815         for (i=0; i<256 && out[i] == 0; i++) {
03816         }
03817         *outLen -= i;
03818         XMEMMOVE(out, out + i, *outLen);
03819     }
03820 
03821     XMEMSET(e, 0, sizeof(sp_digit) * 90);
03822 
03823 #ifdef WOLFSSL_SMALL_STACK
03824     if (d != NULL)
03825         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03826 #endif
03827 
03828     return err;
03829 #endif
03830 }
03831 
03832 #endif /* WOLFSSL_HAVE_SP_DH */
03833 
03834 #endif /* WOLFSSL_SP_NO_2048 */
03835 
03836 #ifndef WOLFSSL_SP_NO_3072
03837 /* Read big endian unsigned byte aray into r.
03838  *
03839  * r  A single precision integer.
03840  * a  Byte array.
03841  * n  Number of bytes in array to read.
03842  */
03843 static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
03844 {
03845     int i, j = 0, s = 0;
03846 
03847     r[0] = 0;
03848     for (i = n-1; i >= 0; i--) {
03849         r[j] |= ((sp_digit)a[i]) << s;
03850         if (s >= 15) {
03851             r[j] &= 0x7fffff;
03852             s = 23 - s;
03853             if (j + 1 >= max)
03854                 break;
03855             r[++j] = a[i] >> s;
03856             s = 8 - s;
03857         }
03858         else
03859             s += 8;
03860     }
03861 
03862     for (j++; j < max; j++)
03863         r[j] = 0;
03864 }
03865 
03866 /* Convert an mp_int to an array of sp_digit.
03867  *
03868  * r  A single precision integer.
03869  * a  A multi-precision integer.
03870  */
03871 static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
03872 {
03873 #if DIGIT_BIT == 23
03874     int j;
03875 
03876     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
03877 
03878     for (j = a->used; j < max; j++)
03879         r[j] = 0;
03880 #elif DIGIT_BIT > 23
03881     int i, j = 0, s = 0;
03882 
03883     r[0] = 0;
03884     for (i = 0; i < a->used && j < max; i++) {
03885         r[j] |= a->dp[i] << s;
03886         r[j] &= 0x7fffff;
03887         s = 23 - s;
03888         if (j + 1 >= max)
03889             break;
03890         r[++j] = a->dp[i] >> s;
03891         while (s + 23 <= DIGIT_BIT) {
03892             s += 23;
03893             r[j] &= 0x7fffff;
03894             if (j + 1 >= max)
03895                 break;
03896             if (s < DIGIT_BIT)
03897                 r[++j] = a->dp[i] >> s;
03898             else
03899                 r[++j] = 0;
03900         }
03901         s = DIGIT_BIT - s;
03902     }
03903 
03904     for (j++; j < max; j++)
03905         r[j] = 0;
03906 #else
03907     int i, j = 0, s = 0;
03908 
03909     r[0] = 0;
03910     for (i = 0; i < a->used && j < max; i++) {
03911         r[j] |= ((sp_digit)a->dp[i]) << s;
03912         if (s + DIGIT_BIT >= 23) {
03913             r[j] &= 0x7fffff;
03914             if (j + 1 >= max)
03915                 break;
03916             s = 23 - s;
03917             if (s == DIGIT_BIT) {
03918                 r[++j] = 0;
03919                 s = 0;
03920             }
03921             else {
03922                 r[++j] = a->dp[i] >> s;
03923                 s = DIGIT_BIT - s;
03924             }
03925         }
03926         else
03927             s += DIGIT_BIT;
03928     }
03929 
03930     for (j++; j < max; j++)
03931         r[j] = 0;
03932 #endif
03933 }
03934 
03935 /* Write r as big endian to byte aray.
03936  * Fixed length number of bytes written: 384
03937  *
03938  * r  A single precision integer.
03939  * a  Byte array.
03940  */
03941 static void sp_3072_to_bin(sp_digit* r, byte* a)
03942 {
03943     int i, j, s = 0, b;
03944 
03945     for (i=0; i<135; i++) {
03946         r[i+1] += r[i] >> 23;
03947         r[i] &= 0x7fffff;
03948     }
03949     j = 3072 / 8 - 1;
03950     a[j] = 0;
03951     for (i=0; i<136 && j>=0; i++) {
03952         b = 0;
03953         a[j--] |= r[i] << s; b += 8 - s;
03954         if (j < 0)
03955             break;
03956         while (b < 23) {
03957             a[j--] = r[i] >> b; b += 8;
03958             if (j < 0)
03959                 break;
03960         }
03961         s = 8 - (b - 23);
03962         if (j >= 0)
03963             a[j] = 0;
03964         if (s != 0)
03965             j++;
03966     }
03967 }
03968 
03969 #ifndef WOLFSSL_SP_SMALL
03970 /* Multiply a and b into r. (r = a * b)
03971  *
03972  * r  A single precision integer.
03973  * a  A single precision integer.
03974  * b  A single precision integer.
03975  */
03976 SP_NOINLINE static void sp_3072_mul_17(sp_digit* r, const sp_digit* a,
03977     const sp_digit* b)
03978 {
03979     int i, j;
03980     int64_t t[34];
03981 
03982     XMEMSET(t, 0, sizeof(t));
03983     for (i=0; i<17; i++) {
03984         for (j=0; j<17; j++)
03985             t[i+j] += ((int64_t)a[i]) * b[j];
03986     }
03987     for (i=0; i<33; i++) {
03988         r[i] = t[i] & 0x7fffff;
03989         t[i+1] += t[i] >> 23;
03990     }
03991     r[33] = (sp_digit)t[33];
03992 }
03993 
03994 /* Square a and put result in r. (r = a * a)
03995  *
03996  * r  A single precision integer.
03997  * a  A single precision integer.
03998  */
03999 SP_NOINLINE static void sp_3072_sqr_17(sp_digit* r, const sp_digit* a)
04000 {
04001     int i, j;
04002     int64_t t[34];
04003 
04004     XMEMSET(t, 0, sizeof(t));
04005     for (i=0; i<17; i++) {
04006         for (j=0; j<i; j++)
04007             t[i+j] += (((int64_t)a[i]) * a[j]) * 2;
04008         t[i+i] += ((int64_t)a[i]) * a[i];
04009     }
04010     for (i=0; i<33; i++) {
04011         r[i] = t[i] & 0x7fffff;
04012         t[i+1] += t[i] >> 23;
04013     }
04014     r[33] = (sp_digit)t[33];
04015 }
04016 
04017 /* Add b to a into r. (r = a + b)
04018  *
04019  * r  A single precision integer.
04020  * a  A single precision integer.
04021  * b  A single precision integer.
04022  */
04023 SP_NOINLINE static int sp_3072_add_17(sp_digit* r, const sp_digit* a,
04024         const sp_digit* b)
04025 {
04026     int i;
04027 
04028     for (i = 0; i < 16; i += 8) {
04029         r[i + 0] = a[i + 0] + b[i + 0];
04030         r[i + 1] = a[i + 1] + b[i + 1];
04031         r[i + 2] = a[i + 2] + b[i + 2];
04032         r[i + 3] = a[i + 3] + b[i + 3];
04033         r[i + 4] = a[i + 4] + b[i + 4];
04034         r[i + 5] = a[i + 5] + b[i + 5];
04035         r[i + 6] = a[i + 6] + b[i + 6];
04036         r[i + 7] = a[i + 7] + b[i + 7];
04037     }
04038     r[16] = a[16] + b[16];
04039 
04040     return 0;
04041 }
04042 
04043 /* Add b to a into r. (r = a + b)
04044  *
04045  * r  A single precision integer.
04046  * a  A single precision integer.
04047  * b  A single precision integer.
04048  */
04049 SP_NOINLINE static int sp_3072_add_34(sp_digit* r, const sp_digit* a,
04050         const sp_digit* b)
04051 {
04052     int i;
04053 
04054     for (i = 0; i < 32; i += 8) {
04055         r[i + 0] = a[i + 0] + b[i + 0];
04056         r[i + 1] = a[i + 1] + b[i + 1];
04057         r[i + 2] = a[i + 2] + b[i + 2];
04058         r[i + 3] = a[i + 3] + b[i + 3];
04059         r[i + 4] = a[i + 4] + b[i + 4];
04060         r[i + 5] = a[i + 5] + b[i + 5];
04061         r[i + 6] = a[i + 6] + b[i + 6];
04062         r[i + 7] = a[i + 7] + b[i + 7];
04063     }
04064     r[32] = a[32] + b[32];
04065     r[33] = a[33] + b[33];
04066 
04067     return 0;
04068 }
04069 
04070 /* Sub b from a into r. (r = a - b)
04071  *
04072  * r  A single precision integer.
04073  * a  A single precision integer.
04074  * b  A single precision integer.
04075  */
04076 SP_NOINLINE static int sp_3072_sub_34(sp_digit* r, const sp_digit* a,
04077         const sp_digit* b)
04078 {
04079     int i;
04080 
04081     for (i = 0; i < 32; i += 8) {
04082         r[i + 0] = a[i + 0] - b[i + 0];
04083         r[i + 1] = a[i + 1] - b[i + 1];
04084         r[i + 2] = a[i + 2] - b[i + 2];
04085         r[i + 3] = a[i + 3] - b[i + 3];
04086         r[i + 4] = a[i + 4] - b[i + 4];
04087         r[i + 5] = a[i + 5] - b[i + 5];
04088         r[i + 6] = a[i + 6] - b[i + 6];
04089         r[i + 7] = a[i + 7] - b[i + 7];
04090     }
04091     r[32] = a[32] - b[32];
04092     r[33] = a[33] - b[33];
04093 
04094     return 0;
04095 }
04096 
04097 /* Multiply a and b into r. (r = a * b)
04098  *
04099  * r  A single precision integer.
04100  * a  A single precision integer.
04101  * b  A single precision integer.
04102  */
04103 SP_NOINLINE static void sp_3072_mul_34(sp_digit* r, const sp_digit* a,
04104     const sp_digit* b)
04105 {
04106     sp_digit* z0 = r;
04107     sp_digit z1[34];
04108     sp_digit* a1 = z1;
04109     sp_digit b1[17];
04110     sp_digit* z2 = r + 34;
04111     sp_3072_add_17(a1, a, &a[17]);
04112     sp_3072_add_17(b1, b, &b[17]);
04113     sp_3072_mul_17(z2, &a[17], &b[17]);
04114     sp_3072_mul_17(z0, a, b);
04115     sp_3072_mul_17(z1, a1, b1);
04116     sp_3072_sub_34(z1, z1, z2);
04117     sp_3072_sub_34(z1, z1, z0);
04118     sp_3072_add_34(r + 17, r + 17, z1);
04119 }
04120 
04121 /* Square a and put result in r. (r = a * a)
04122  *
04123  * r  A single precision integer.
04124  * a  A single precision integer.
04125  */
04126 SP_NOINLINE static void sp_3072_sqr_34(sp_digit* r, const sp_digit* a)
04127 {
04128     sp_digit* z0 = r;
04129     sp_digit z1[34];
04130     sp_digit* a1 = z1;
04131     sp_digit* z2 = r + 34;
04132     sp_3072_add_17(a1, a, &a[17]);
04133     sp_3072_sqr_17(z2, &a[17]);
04134     sp_3072_sqr_17(z0, a);
04135     sp_3072_sqr_17(z1, a1);
04136     sp_3072_sub_34(z1, z1, z2);
04137     sp_3072_sub_34(z1, z1, z0);
04138     sp_3072_add_34(r + 17, r + 17, z1);
04139 }
04140 
04141 /* Add b to a into r. (r = a + b)
04142  *
04143  * r  A single precision integer.
04144  * a  A single precision integer.
04145  * b  A single precision integer.
04146  */
04147 SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a,
04148         const sp_digit* b)
04149 {
04150     int i;
04151 
04152     for (i = 0; i < 64; i += 8) {
04153         r[i + 0] = a[i + 0] + b[i + 0];
04154         r[i + 1] = a[i + 1] + b[i + 1];
04155         r[i + 2] = a[i + 2] + b[i + 2];
04156         r[i + 3] = a[i + 3] + b[i + 3];
04157         r[i + 4] = a[i + 4] + b[i + 4];
04158         r[i + 5] = a[i + 5] + b[i + 5];
04159         r[i + 6] = a[i + 6] + b[i + 6];
04160         r[i + 7] = a[i + 7] + b[i + 7];
04161     }
04162     r[64] = a[64] + b[64];
04163     r[65] = a[65] + b[65];
04164     r[66] = a[66] + b[66];
04165     r[67] = a[67] + b[67];
04166 
04167     return 0;
04168 }
04169 
04170 /* Sub b from a into r. (r = a - b)
04171  *
04172  * r  A single precision integer.
04173  * a  A single precision integer.
04174  * b  A single precision integer.
04175  */
04176 SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a,
04177         const sp_digit* b)
04178 {
04179     int i;
04180 
04181     for (i = 0; i < 64; i += 8) {
04182         r[i + 0] = a[i + 0] - b[i + 0];
04183         r[i + 1] = a[i + 1] - b[i + 1];
04184         r[i + 2] = a[i + 2] - b[i + 2];
04185         r[i + 3] = a[i + 3] - b[i + 3];
04186         r[i + 4] = a[i + 4] - b[i + 4];
04187         r[i + 5] = a[i + 5] - b[i + 5];
04188         r[i + 6] = a[i + 6] - b[i + 6];
04189         r[i + 7] = a[i + 7] - b[i + 7];
04190     }
04191     r[64] = a[64] - b[64];
04192     r[65] = a[65] - b[65];
04193     r[66] = a[66] - b[66];
04194     r[67] = a[67] - b[67];
04195 
04196     return 0;
04197 }
04198 
04199 /* Multiply a and b into r. (r = a * b)
04200  *
04201  * r  A single precision integer.
04202  * a  A single precision integer.
04203  * b  A single precision integer.
04204  */
04205 SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a,
04206     const sp_digit* b)
04207 {
04208     sp_digit* z0 = r;
04209     sp_digit z1[68];
04210     sp_digit* a1 = z1;
04211     sp_digit b1[34];
04212     sp_digit* z2 = r + 68;
04213     sp_3072_add_34(a1, a, &a[34]);
04214     sp_3072_add_34(b1, b, &b[34]);
04215     sp_3072_mul_34(z2, &a[34], &b[34]);
04216     sp_3072_mul_34(z0, a, b);
04217     sp_3072_mul_34(z1, a1, b1);
04218     sp_3072_sub_68(z1, z1, z2);
04219     sp_3072_sub_68(z1, z1, z0);
04220     sp_3072_add_68(r + 34, r + 34, z1);
04221 }
04222 
04223 /* Square a and put result in r. (r = a * a)
04224  *
04225  * r  A single precision integer.
04226  * a  A single precision integer.
04227  */
04228 SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a)
04229 {
04230     sp_digit* z0 = r;
04231     sp_digit z1[68];
04232     sp_digit* a1 = z1;
04233     sp_digit* z2 = r + 68;
04234     sp_3072_add_34(a1, a, &a[34]);
04235     sp_3072_sqr_34(z2, &a[34]);
04236     sp_3072_sqr_34(z0, a);
04237     sp_3072_sqr_34(z1, a1);
04238     sp_3072_sub_68(z1, z1, z2);
04239     sp_3072_sub_68(z1, z1, z0);
04240     sp_3072_add_68(r + 34, r + 34, z1);
04241 }
04242 
04243 /* Add b to a into r. (r = a + b)
04244  *
04245  * r  A single precision integer.
04246  * a  A single precision integer.
04247  * b  A single precision integer.
04248  */
04249 SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a,
04250         const sp_digit* b)
04251 {
04252     int i;
04253 
04254     for (i = 0; i < 136; i += 8) {
04255         r[i + 0] = a[i + 0] + b[i + 0];
04256         r[i + 1] = a[i + 1] + b[i + 1];
04257         r[i + 2] = a[i + 2] + b[i + 2];
04258         r[i + 3] = a[i + 3] + b[i + 3];
04259         r[i + 4] = a[i + 4] + b[i + 4];
04260         r[i + 5] = a[i + 5] + b[i + 5];
04261         r[i + 6] = a[i + 6] + b[i + 6];
04262         r[i + 7] = a[i + 7] + b[i + 7];
04263     }
04264 
04265     return 0;
04266 }
04267 
04268 /* Sub b from a into r. (r = a - b)
04269  *
04270  * r  A single precision integer.
04271  * a  A single precision integer.
04272  * b  A single precision integer.
04273  */
04274 SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a,
04275         const sp_digit* b)
04276 {
04277     int i;
04278 
04279     for (i = 0; i < 136; i += 8) {
04280         r[i + 0] = a[i + 0] - b[i + 0];
04281         r[i + 1] = a[i + 1] - b[i + 1];
04282         r[i + 2] = a[i + 2] - b[i + 2];
04283         r[i + 3] = a[i + 3] - b[i + 3];
04284         r[i + 4] = a[i + 4] - b[i + 4];
04285         r[i + 5] = a[i + 5] - b[i + 5];
04286         r[i + 6] = a[i + 6] - b[i + 6];
04287         r[i + 7] = a[i + 7] - b[i + 7];
04288     }
04289 
04290     return 0;
04291 }
04292 
04293 /* Multiply a and b into r. (r = a * b)
04294  *
04295  * r  A single precision integer.
04296  * a  A single precision integer.
04297  * b  A single precision integer.
04298  */
04299 SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a,
04300     const sp_digit* b)
04301 {
04302     sp_digit* z0 = r;
04303     sp_digit z1[136];
04304     sp_digit* a1 = z1;
04305     sp_digit b1[68];
04306     sp_digit* z2 = r + 136;
04307     sp_3072_add_68(a1, a, &a[68]);
04308     sp_3072_add_68(b1, b, &b[68]);
04309     sp_3072_mul_68(z2, &a[68], &b[68]);
04310     sp_3072_mul_68(z0, a, b);
04311     sp_3072_mul_68(z1, a1, b1);
04312     sp_3072_sub_136(z1, z1, z2);
04313     sp_3072_sub_136(z1, z1, z0);
04314     sp_3072_add_136(r + 68, r + 68, z1);
04315 }
04316 
04317 /* Square a and put result in r. (r = a * a)
04318  *
04319  * r  A single precision integer.
04320  * a  A single precision integer.
04321  */
04322 SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a)
04323 {
04324     sp_digit* z0 = r;
04325     sp_digit z1[136];
04326     sp_digit* a1 = z1;
04327     sp_digit* z2 = r + 136;
04328     sp_3072_add_68(a1, a, &a[68]);
04329     sp_3072_sqr_68(z2, &a[68]);
04330     sp_3072_sqr_68(z0, a);
04331     sp_3072_sqr_68(z1, a1);
04332     sp_3072_sub_136(z1, z1, z2);
04333     sp_3072_sub_136(z1, z1, z0);
04334     sp_3072_add_136(r + 68, r + 68, z1);
04335 }
04336 
04337 #endif /* WOLFSSL_SP_SMALL */
04338 #ifdef WOLFSSL_SP_SMALL
04339 /* Add b to a into r. (r = a + b)
04340  *
04341  * r  A single precision integer.
04342  * a  A single precision integer.
04343  * b  A single precision integer.
04344  */
04345 SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a,
04346         const sp_digit* b)
04347 {
04348     int i;
04349 
04350     for (i = 0; i < 136; i++)
04351         r[i] = a[i] + b[i];
04352 
04353     return 0;
04354 }
04355 #endif /* WOLFSSL_SP_SMALL */
04356 #ifdef WOLFSSL_SP_SMALL
04357 /* Sub b from a into r. (r = a - b)
04358  *
04359  * r  A single precision integer.
04360  * a  A single precision integer.
04361  * b  A single precision integer.
04362  */
04363 SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a,
04364         const sp_digit* b)
04365 {
04366     int i;
04367 
04368     for (i = 0; i < 136; i++)
04369         r[i] = a[i] - b[i];
04370 
04371     return 0;
04372 }
04373 
04374 #endif /* WOLFSSL_SP_SMALL */
04375 #ifdef WOLFSSL_SP_SMALL
04376 /* Multiply a and b into r. (r = a * b)
04377  *
04378  * r  A single precision integer.
04379  * a  A single precision integer.
04380  * b  A single precision integer.
04381  */
04382 SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a,
04383     const sp_digit* b)
04384 {
04385     int i, j, k;
04386     int64_t c;
04387 
04388     c = ((int64_t)a[135]) * b[135];
04389     r[271] = (sp_digit)(c >> 23);
04390     c = (c & 0x7fffff) << 23;
04391     for (k = 269; k >= 0; k--) {
04392         for (i = 135; i >= 0; i--) {
04393             j = k - i;
04394             if (j >= 136)
04395                 break;
04396             if (j < 0)
04397                 continue;
04398 
04399             c += ((int64_t)a[i]) * b[j];
04400         }
04401         r[k + 2] += c >> 46;
04402         r[k + 1] = (c >> 23) & 0x7fffff;
04403         c = (c & 0x7fffff) << 23;
04404     }
04405     r[0] = (sp_digit)(c >> 23);
04406 }
04407 
04408 /* Square a and put result in r. (r = a * a)
04409  *
04410  * r  A single precision integer.
04411  * a  A single precision integer.
04412  */
04413 SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a)
04414 {
04415     int i, j, k;
04416     int64_t c;
04417 
04418     c = ((int64_t)a[135]) * a[135];
04419     r[271] = (sp_digit)(c >> 23);
04420     c = (c & 0x7fffff) << 23;
04421     for (k = 269; k >= 0; k--) {
04422         for (i = 135; i >= 0; i--) {
04423             j = k - i;
04424             if (j >= 136 || i <= j)
04425                 break;
04426             if (j < 0)
04427                 continue;
04428 
04429             c += ((int64_t)a[i]) * a[j] * 2;
04430         }
04431         if (i == j)
04432            c += ((int64_t)a[i]) * a[i];
04433 
04434         r[k + 2] += c >> 46;
04435         r[k + 1] = (c >> 23) & 0x7fffff;
04436         c = (c & 0x7fffff) << 23;
04437     }
04438     r[0] = (sp_digit)(c >> 23);
04439 }
04440 
04441 #endif /* WOLFSSL_SP_SMALL */
04442 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
04443 #ifdef WOLFSSL_SP_SMALL
04444 /* Add b to a into r. (r = a + b)
04445  *
04446  * r  A single precision integer.
04447  * a  A single precision integer.
04448  * b  A single precision integer.
04449  */
04450 SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a,
04451         const sp_digit* b)
04452 {
04453     int i;
04454 
04455     for (i = 0; i < 68; i++)
04456         r[i] = a[i] + b[i];
04457 
04458     return 0;
04459 }
04460 #endif /* WOLFSSL_SP_SMALL */
04461 #ifdef WOLFSSL_SP_SMALL
04462 /* Sub b from a into r. (r = a - b)
04463  *
04464  * r  A single precision integer.
04465  * a  A single precision integer.
04466  * b  A single precision integer.
04467  */
04468 SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a,
04469         const sp_digit* b)
04470 {
04471     int i;
04472 
04473     for (i = 0; i < 68; i++)
04474         r[i] = a[i] - b[i];
04475 
04476     return 0;
04477 }
04478 
04479 #endif /* WOLFSSL_SP_SMALL */
04480 #ifdef WOLFSSL_SP_SMALL
04481 /* Multiply a and b into r. (r = a * b)
04482  *
04483  * r  A single precision integer.
04484  * a  A single precision integer.
04485  * b  A single precision integer.
04486  */
04487 SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a,
04488     const sp_digit* b)
04489 {
04490     int i, j, k;
04491     int64_t c;
04492 
04493     c = ((int64_t)a[67]) * b[67];
04494     r[135] = (sp_digit)(c >> 23);
04495     c = (c & 0x7fffff) << 23;
04496     for (k = 133; k >= 0; k--) {
04497         for (i = 67; i >= 0; i--) {
04498             j = k - i;
04499             if (j >= 68)
04500                 break;
04501             if (j < 0)
04502                 continue;
04503 
04504             c += ((int64_t)a[i]) * b[j];
04505         }
04506         r[k + 2] += c >> 46;
04507         r[k + 1] = (c >> 23) & 0x7fffff;
04508         c = (c & 0x7fffff) << 23;
04509     }
04510     r[0] = (sp_digit)(c >> 23);
04511 }
04512 
04513 /* Square a and put result in r. (r = a * a)
04514  *
04515  * r  A single precision integer.
04516  * a  A single precision integer.
04517  */
04518 SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a)
04519 {
04520     int i, j, k;
04521     int64_t c;
04522 
04523     c = ((int64_t)a[67]) * a[67];
04524     r[135] = (sp_digit)(c >> 23);
04525     c = (c & 0x7fffff) << 23;
04526     for (k = 133; k >= 0; k--) {
04527         for (i = 67; i >= 0; i--) {
04528             j = k - i;
04529             if (j >= 68 || i <= j)
04530                 break;
04531             if (j < 0)
04532                 continue;
04533 
04534             c += ((int64_t)a[i]) * a[j] * 2;
04535         }
04536         if (i == j)
04537            c += ((int64_t)a[i]) * a[i];
04538 
04539         r[k + 2] += c >> 46;
04540         r[k + 1] = (c >> 23) & 0x7fffff;
04541         c = (c & 0x7fffff) << 23;
04542     }
04543     r[0] = (sp_digit)(c >> 23);
04544 }
04545 
04546 #endif /* WOLFSSL_SP_SMALL */
04547 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
04548 
04549 /* Caclulate the bottom digit of -1/a mod 2^n.
04550  *
04551  * a    A single precision number.
04552  * rho  Bottom word of inverse.
04553  */
04554 static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
04555 {
04556     sp_digit x, b;
04557 
04558     b = a[0];
04559     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
04560     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
04561     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
04562     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
04563     x &= 0x7fffff;
04564 
04565     /* rho = -1/m mod b */
04566     *rho = (1L << 23) - x;
04567 }
04568 
04569 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
04570 /* r = 2^n mod m where n is the number of bits to reduce by.
04571  * Given m must be 3072 bits, just need to subtract.
04572  *
04573  * r  A single precision number.
04574  * m  A signle precision number.
04575  */
04576 static void sp_3072_mont_norm_68(sp_digit* r, sp_digit* m)
04577 {
04578     /* Set r = 2^n - 1. */
04579 #ifdef WOLFSSL_SP_SMALL
04580     int i;
04581 
04582     for (i=0; i<67; i++)
04583         r[i] = 0x7fffff;
04584 #else
04585     int i;
04586 
04587     for (i = 0; i < 64; i += 8) {
04588         r[i + 0] = 0x7fffff;
04589         r[i + 1] = 0x7fffff;
04590         r[i + 2] = 0x7fffff;
04591         r[i + 3] = 0x7fffff;
04592         r[i + 4] = 0x7fffff;
04593         r[i + 5] = 0x7fffff;
04594         r[i + 6] = 0x7fffff;
04595         r[i + 7] = 0x7fffff;
04596     }
04597     r[64] = 0x7fffff;
04598     r[65] = 0x7fffff;
04599     r[66] = 0x7fffff;
04600 #endif
04601     r[67] = 0x3ffffl;
04602 
04603     /* r = (2^n - 1) mod n */
04604     sp_3072_sub_68(r, r, m);
04605 
04606     /* Add one so r = 2^n mod m */
04607     r[0] += 1;
04608 }
04609 
04610 /* Compare a with b in constant time.
04611  *
04612  * a  A single precision integer.
04613  * b  A single precision integer.
04614  * return -ve, 0 or +ve if a is less than, equal to or greater than b
04615  * respectively.
04616  */
04617 static sp_digit sp_3072_cmp_68(const sp_digit* a, const sp_digit* b)
04618 {
04619     sp_digit r = 0;
04620 #ifdef WOLFSSL_SP_SMALL
04621     int i;
04622 
04623     for (i=67; i>=0; i--)
04624         r |= (a[i] - b[i]) & (0 - !r);
04625 #else
04626     int i;
04627 
04628     r |= (a[67] - b[67]) & (0 - !r);
04629     r |= (a[66] - b[66]) & (0 - !r);
04630     r |= (a[65] - b[65]) & (0 - !r);
04631     r |= (a[64] - b[64]) & (0 - !r);
04632     for (i = 56; i >= 0; i -= 8) {
04633         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
04634         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
04635         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
04636         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
04637         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
04638         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
04639         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
04640         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
04641     }
04642 #endif /* WOLFSSL_SP_SMALL */
04643 
04644     return r;
04645 }
04646 
04647 /* Conditionally subtract b from a using the mask m.
04648  * m is -1 to subtract and 0 when not.
04649  *
04650  * r  A single precision number representing condition subtract result.
04651  * a  A single precision number to subtract from.
04652  * b  A single precision number to subtract.
04653  * m  Mask value to apply.
04654  */
04655 static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a,
04656         const sp_digit* b, const sp_digit m)
04657 {
04658 #ifdef WOLFSSL_SP_SMALL
04659     int i;
04660 
04661     for (i = 0; i < 68; i++)
04662         r[i] = a[i] - (b[i] & m);
04663 #else
04664     int i;
04665 
04666     for (i = 0; i < 64; i += 8) {
04667         r[i + 0] = a[i + 0] - (b[i + 0] & m);
04668         r[i + 1] = a[i + 1] - (b[i + 1] & m);
04669         r[i + 2] = a[i + 2] - (b[i + 2] & m);
04670         r[i + 3] = a[i + 3] - (b[i + 3] & m);
04671         r[i + 4] = a[i + 4] - (b[i + 4] & m);
04672         r[i + 5] = a[i + 5] - (b[i + 5] & m);
04673         r[i + 6] = a[i + 6] - (b[i + 6] & m);
04674         r[i + 7] = a[i + 7] - (b[i + 7] & m);
04675     }
04676     r[64] = a[64] - (b[64] & m);
04677     r[65] = a[65] - (b[65] & m);
04678     r[66] = a[66] - (b[66] & m);
04679     r[67] = a[67] - (b[67] & m);
04680 #endif /* WOLFSSL_SP_SMALL */
04681 }
04682 
04683 /* Mul a by scalar b and add into r. (r += a * b)
04684  *
04685  * r  A single precision integer.
04686  * a  A single precision integer.
04687  * b  A scalar.
04688  */
04689 SP_NOINLINE static void sp_3072_mul_add_68(sp_digit* r, const sp_digit* a,
04690         const sp_digit b)
04691 {
04692 #ifdef WOLFSSL_SP_SMALL
04693     int64_t tb = b;
04694     int64_t t = 0;
04695     int i;
04696 
04697     for (i = 0; i < 68; i++) {
04698         t += (tb * a[i]) + r[i];
04699         r[i] = t & 0x7fffff;
04700         t >>= 23;
04701     }
04702     r[68] += t;
04703 #else
04704     int64_t tb = b;
04705     int64_t t[8];
04706     int i;
04707 
04708     t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
04709     for (i = 0; i < 64; i += 8) {
04710         t[1] = tb * a[i+1];
04711         r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
04712         t[2] = tb * a[i+2];
04713         r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
04714         t[3] = tb * a[i+3];
04715         r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
04716         t[4] = tb * a[i+4];
04717         r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
04718         t[5] = tb * a[i+5];
04719         r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
04720         t[6] = tb * a[i+6];
04721         r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
04722         t[7] = tb * a[i+7];
04723         r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
04724         t[0] = tb * a[i+8];
04725         r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
04726     }
04727     t[1] = tb * a[65]; r[65] += (t[0] >> 23) + (t[1] & 0x7fffff);
04728     t[2] = tb * a[66]; r[66] += (t[1] >> 23) + (t[2] & 0x7fffff);
04729     t[3] = tb * a[67]; r[67] += (t[2] >> 23) + (t[3] & 0x7fffff);
04730     r[68] +=  t[3] >> 23;
04731 #endif /* WOLFSSL_SP_SMALL */
04732 }
04733 
04734 /* Normalize the values in each word to 23.
04735  *
04736  * a  Array of sp_digit to normalize.
04737  */
04738 static void sp_3072_norm_68(sp_digit* a)
04739 {
04740 #ifdef WOLFSSL_SP_SMALL
04741     int i;
04742     for (i = 0; i < 67; i++) {
04743         a[i+1] += a[i] >> 23;
04744         a[i] &= 0x7fffff;
04745     }
04746 #else
04747     int i;
04748     for (i = 0; i < 64; i += 8) {
04749         a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
04750         a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
04751         a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
04752         a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
04753         a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
04754         a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
04755         a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
04756         a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
04757         a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
04758     }
04759     a[64+1] += a[64] >> 23;
04760     a[64] &= 0x7fffff;
04761     a[65+1] += a[65] >> 23;
04762     a[65] &= 0x7fffff;
04763     a[66+1] += a[66] >> 23;
04764     a[66] &= 0x7fffff;
04765 #endif
04766 }
04767 
04768 /* Shift the result in the high 1536 bits down to the bottom.
04769  *
04770  * r  A single precision number.
04771  * a  A single precision number.
04772  */
04773 static void sp_3072_mont_shift_68(sp_digit* r, const sp_digit* a)
04774 {
04775 #ifdef WOLFSSL_SP_SMALL
04776     int i;
04777     sp_digit n, s;
04778 
04779     s = a[68];
04780     n = a[67] >> 18;
04781     for (i = 0; i < 67; i++) {
04782         n += (s & 0x7fffff) << 5;
04783         r[i] = n & 0x7fffff;
04784         n >>= 23;
04785         s = a[69 + i] + (s >> 23);
04786     }
04787     n += s << 5;
04788     r[67] = n;
04789 #else
04790     sp_digit n, s;
04791     int i;
04792 
04793     s = a[68]; n = a[67] >> 18;
04794     for (i = 0; i < 64; i += 8) {
04795         n += (s & 0x7fffff) << 5; r[i+0] = n & 0x7fffff;
04796         n >>= 23; s = a[i+69] + (s >> 23);
04797         n += (s & 0x7fffff) << 5; r[i+1] = n & 0x7fffff;
04798         n >>= 23; s = a[i+70] + (s >> 23);
04799         n += (s & 0x7fffff) << 5; r[i+2] = n & 0x7fffff;
04800         n >>= 23; s = a[i+71] + (s >> 23);
04801         n += (s & 0x7fffff) << 5; r[i+3] = n & 0x7fffff;
04802         n >>= 23; s = a[i+72] + (s >> 23);
04803         n += (s & 0x7fffff) << 5; r[i+4] = n & 0x7fffff;
04804         n >>= 23; s = a[i+73] + (s >> 23);
04805         n += (s & 0x7fffff) << 5; r[i+5] = n & 0x7fffff;
04806         n >>= 23; s = a[i+74] + (s >> 23);
04807         n += (s & 0x7fffff) << 5; r[i+6] = n & 0x7fffff;
04808         n >>= 23; s = a[i+75] + (s >> 23);
04809         n += (s & 0x7fffff) << 5; r[i+7] = n & 0x7fffff;
04810         n >>= 23; s = a[i+76] + (s >> 23);
04811     }
04812     n += (s & 0x7fffff) << 5; r[64] = n & 0x7fffff;
04813     n >>= 23; s = a[133] + (s >> 23);
04814     n += (s & 0x7fffff) << 5; r[65] = n & 0x7fffff;
04815     n >>= 23; s = a[134] + (s >> 23);
04816     n += (s & 0x7fffff) << 5; r[66] = n & 0x7fffff;
04817     n >>= 23; s = a[135] + (s >> 23);
04818     n += s << 5;              r[67] = n;
04819 #endif /* WOLFSSL_SP_SMALL */
04820     XMEMSET(&r[68], 0, sizeof(*r) * 68);
04821 }
04822 
04823 /* Reduce the number back to 3072 bits using Montgomery reduction.
04824  *
04825  * a   A single precision number to reduce in place.
04826  * m   The single precision number representing the modulus.
04827  * mp  The digit representing the negative inverse of m mod 2^n.
04828  */
04829 static void sp_3072_mont_reduce_68(sp_digit* a, sp_digit* m, sp_digit mp)
04830 {
04831     int i;
04832     sp_digit mu;
04833 
04834     for (i=0; i<67; i++) {
04835         mu = (a[i] * mp) & 0x7fffff;
04836         sp_3072_mul_add_68(a+i, m, mu);
04837         a[i+1] += a[i] >> 23;
04838     }
04839     mu = (a[i] * mp) & 0x3ffffl;
04840     sp_3072_mul_add_68(a+i, m, mu);
04841     a[i+1] += a[i] >> 23;
04842     a[i] &= 0x7fffff;
04843 
04844     sp_3072_mont_shift_68(a, a);
04845     sp_3072_cond_sub_68(a, a, m, 0 - ((a[67] >> 18) > 0));
04846     sp_3072_norm_68(a);
04847 }
04848 
04849 /* Multiply two Montogmery form numbers mod the modulus (prime).
04850  * (r = a * b mod m)
04851  *
04852  * r   Result of multiplication.
04853  * a   First number to multiply in Montogmery form.
04854  * b   Second number to multiply in Montogmery form.
04855  * m   Modulus (prime).
04856  * mp  Montogmery mulitplier.
04857  */
04858 static void sp_3072_mont_mul_68(sp_digit* r, sp_digit* a, sp_digit* b,
04859         sp_digit* m, sp_digit mp)
04860 {
04861     sp_3072_mul_68(r, a, b);
04862     sp_3072_mont_reduce_68(r, m, mp);
04863 }
04864 
04865 /* Square the Montgomery form number. (r = a * a mod m)
04866  *
04867  * r   Result of squaring.
04868  * a   Number to square in Montogmery form.
04869  * m   Modulus (prime).
04870  * mp  Montogmery mulitplier.
04871  */
04872 static void sp_3072_mont_sqr_68(sp_digit* r, sp_digit* a, sp_digit* m,
04873         sp_digit mp)
04874 {
04875     sp_3072_sqr_68(r, a);
04876     sp_3072_mont_reduce_68(r, m, mp);
04877 }
04878 
04879 /* Multiply a by scalar b into r. (r = a * b)
04880  *
04881  * r  A single precision integer.
04882  * a  A single precision integer.
04883  * b  A scalar.
04884  */
04885 SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a,
04886     const sp_digit b)
04887 {
04888 #ifdef WOLFSSL_SP_SMALL
04889     int64_t tb = b;
04890     int64_t t = 0;
04891     int i;
04892 
04893     for (i = 0; i < 68; i++) {
04894         t += tb * a[i];
04895         r[i] = t & 0x7fffff;
04896         t >>= 23;
04897     }
04898     r[68] = (sp_digit)t;
04899 #else
04900     int64_t tb = b;
04901     int64_t t[8];
04902     int i;
04903 
04904     t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
04905     for (i = 0; i < 64; i += 8) {
04906         t[1] = tb * a[i+1];
04907         r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
04908         t[2] = tb * a[i+2];
04909         r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
04910         t[3] = tb * a[i+3];
04911         r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
04912         t[4] = tb * a[i+4];
04913         r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
04914         t[5] = tb * a[i+5];
04915         r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
04916         t[6] = tb * a[i+6];
04917         r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
04918         t[7] = tb * a[i+7];
04919         r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
04920         t[0] = tb * a[i+8];
04921         r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
04922     }
04923     t[1] = tb * a[65];
04924     r[65] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
04925     t[2] = tb * a[66];
04926     r[66] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
04927     t[3] = tb * a[67];
04928     r[67] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
04929     r[68] =  (sp_digit)(t[3] >> 23);
04930 #endif /* WOLFSSL_SP_SMALL */
04931 }
04932 
04933 /* Conditionally add a and b using the mask m.
04934  * m is -1 to add and 0 when not.
04935  *
04936  * r  A single precision number representing conditional add result.
04937  * a  A single precision number to add with.
04938  * b  A single precision number to add.
04939  * m  Mask value to apply.
04940  */
04941 static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a,
04942         const sp_digit* b, const sp_digit m)
04943 {
04944 #ifdef WOLFSSL_SP_SMALL
04945     int i;
04946 
04947     for (i = 0; i < 68; i++)
04948         r[i] = a[i] + (b[i] & m);
04949 #else
04950     int i;
04951 
04952     for (i = 0; i < 64; i += 8) {
04953         r[i + 0] = a[i + 0] + (b[i + 0] & m);
04954         r[i + 1] = a[i + 1] + (b[i + 1] & m);
04955         r[i + 2] = a[i + 2] + (b[i + 2] & m);
04956         r[i + 3] = a[i + 3] + (b[i + 3] & m);
04957         r[i + 4] = a[i + 4] + (b[i + 4] & m);
04958         r[i + 5] = a[i + 5] + (b[i + 5] & m);
04959         r[i + 6] = a[i + 6] + (b[i + 6] & m);
04960         r[i + 7] = a[i + 7] + (b[i + 7] & m);
04961     }
04962     r[64] = a[64] + (b[64] & m);
04963     r[65] = a[65] + (b[65] & m);
04964     r[66] = a[66] + (b[66] & m);
04965     r[67] = a[67] + (b[67] & m);
04966 #endif /* WOLFSSL_SP_SMALL */
04967 }
04968 
04969 #ifdef WOLFSSL_SMALL
04970 /* Sub b from a into r. (r = a - b)
04971  *
04972  * r  A single precision integer.
04973  * a  A single precision integer.
04974  * b  A single precision integer.
04975  */
04976 SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a,
04977         const sp_digit* b)
04978 {
04979     int i;
04980 
04981     for (i = 0; i < 68; i++)
04982         r[i] = a[i] - b[i];
04983 
04984     return 0;
04985 }
04986 
04987 #endif
04988 #ifdef WOLFSSL_SMALL
04989 /* Add b to a into r. (r = a + b)
04990  *
04991  * r  A single precision integer.
04992  * a  A single precision integer.
04993  * b  A single precision integer.
04994  */
04995 SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a,
04996         const sp_digit* b)
04997 {
04998     int i;
04999 
05000     for (i = 0; i < 68; i++)
05001         r[i] = a[i] + b[i];
05002 
05003     return 0;
05004 }
05005 #endif
05006 /* Divide d in a and put remainder into r (m*d + r = a)
05007  * m is not calculated as it is not needed at this time.
05008  *
05009  * a  Nmber to be divided.
05010  * d  Number to divide with.
05011  * m  Multiplier result.
05012  * r  Remainder from the division.
05013  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
05014  */
05015 static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m,
05016         sp_digit* r)
05017 {
05018     int i;
05019     int64_t d1;
05020     sp_digit div, r1;
05021 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
05022     sp_digit* td;
05023 #else
05024     sp_digit t1d[136], t2d[68 + 1];
05025 #endif
05026     sp_digit* t1;
05027     sp_digit* t2;
05028     int err = MP_OKAY;
05029 
05030 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
05031     td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
05032     if (td != NULL) {
05033         t1 = td;
05034         t2 = td + 2 * 68;
05035     }
05036     else
05037         err = MEMORY_E;
05038 #else
05039     t1 = t1d;
05040     t2 = t2d;
05041 #endif
05042 
05043     (void)m;
05044 
05045     if (err == MP_OKAY) {
05046         div = d[67];
05047         XMEMCPY(t1, a, sizeof(*t1) * 2 * 68);
05048         for (i=67; i>=0; i--) {
05049             t1[68 + i] += t1[68 + i - 1] >> 23;
05050             t1[68 + i - 1] &= 0x7fffff;
05051             d1 = t1[68 + i];
05052             d1 <<= 23;
05053             d1 += t1[68 + i - 1];
05054             r1 = (sp_digit)(d1 / div);
05055 
05056             sp_3072_mul_d_68(t2, d, r1);
05057             sp_3072_sub_68(&t1[i], &t1[i], t2);
05058             t1[68 + i] -= t2[68];
05059             t1[68 + i] += t1[68 + i - 1] >> 23;
05060             t1[68 + i - 1] &= 0x7fffff;
05061             r1 = (((-t1[68 + i]) << 23) - t1[68 + i - 1]) / div;
05062             r1++;
05063             sp_3072_mul_d_68(t2, d, r1);
05064             sp_3072_add_68(&t1[i], &t1[i], t2);
05065             t1[68 + i] += t1[68 + i - 1] >> 23;
05066             t1[68 + i - 1] &= 0x7fffff;
05067         }
05068         t1[68 - 1] += t1[68 - 2] >> 23;
05069         t1[68 - 2] &= 0x7fffff;
05070         d1 = t1[68 - 1];
05071         r1 = (sp_digit)(d1 / div);
05072 
05073         sp_3072_mul_d_68(t2, d, r1);
05074         sp_3072_sub_68(t1, t1, t2);
05075         XMEMCPY(r, t1, sizeof(*r) * 2 * 68);
05076         for (i=0; i<66; i++) {
05077             r[i+1] += r[i] >> 23;
05078             r[i] &= 0x7fffff;
05079         }
05080         sp_3072_cond_add_68(r, r, d, 0 - (r[67] < 0));
05081     }
05082 
05083 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
05084     if (td != NULL)
05085         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05086 #endif
05087 
05088     return err;
05089 }
05090 
05091 /* Reduce a modulo m into r. (r = a mod m)
05092  *
05093  * r  A single precision number that is the reduced result.
05094  * a  A single precision number that is to be reduced.
05095  * m  A single precision number that is the modulus to reduce with.
05096  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
05097  */
05098 static int sp_3072_mod_68(sp_digit* r, sp_digit* a, sp_digit* m)
05099 {
05100     return sp_3072_div_68(a, m, NULL, r);
05101 }
05102 
05103 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
05104  *
05105  * r     A single precision number that is the result of the operation.
05106  * a     A single precision number being exponentiated.
05107  * e     A single precision number that is the exponent.
05108  * bits  The number of bits in the exponent.
05109  * m     A single precision number that is the modulus.
05110  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
05111  */
05112 static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
05113     sp_digit* m, int reduceA)
05114 {
05115 #ifdef WOLFSSL_SP_SMALL
05116     sp_digit* td;
05117     sp_digit* t[3];
05118     sp_digit* norm;
05119     sp_digit mp = 1;
05120     sp_digit n;
05121     int i;
05122     int c, y;
05123     int err = MP_OKAY;
05124 
05125     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL,
05126                             DYNAMIC_TYPE_TMP_BUFFER);
05127     if (td == NULL)
05128         err = MEMORY_E;
05129 
05130     if (err == MP_OKAY) {
05131         XMEMSET(td, 0, sizeof(*td) * 3 * 68 * 2);
05132 
05133         norm = t[0] = td;
05134         t[1] = &td[68 * 2];
05135         t[2] = &td[2 * 68 * 2];
05136 
05137         sp_3072_mont_setup(m, &mp);
05138         sp_3072_mont_norm_68(norm, m);
05139 
05140         if (reduceA)
05141             err = sp_3072_mod_68(t[1], a, m);
05142         else
05143             XMEMCPY(t[1], a, sizeof(sp_digit) * 68);
05144     }
05145     if (err == MP_OKAY) {
05146         sp_3072_mul_68(t[1], t[1], norm);
05147         err = sp_3072_mod_68(t[1], t[1], m);
05148     }
05149 
05150     if (err == MP_OKAY) {
05151         i = bits / 23;
05152         c = bits % 23;
05153         n = e[i--] << (23 - c);
05154         for (; ; c--) {
05155             if (c == 0) {
05156                 if (i == -1)
05157                     break;
05158 
05159                 n = e[i--];
05160                 c = 23;
05161             }
05162 
05163             y = (n >> 22) & 1;
05164             n <<= 1;
05165 
05166             sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp);
05167 
05168             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
05169                                   ((size_t)t[1] & addr_mask[y])),
05170                     sizeof(*t[2]) * 68 * 2);
05171             sp_3072_mont_sqr_68(t[2], t[2], m, mp);
05172             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
05173                             ((size_t)t[1] & addr_mask[y])), t[2],
05174                     sizeof(*t[2]) * 68 * 2);
05175         }
05176 
05177         sp_3072_mont_reduce_68(t[0], m, mp);
05178         n = sp_3072_cmp_68(t[0], m);
05179         sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1);
05180         XMEMCPY(r, t[0], sizeof(*r) * 68 * 2);
05181 
05182     }
05183 
05184     if (td != NULL)
05185         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05186 
05187     return err;
05188 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
05189 #ifndef WOLFSSL_SMALL_STACK
05190     sp_digit t[3][136];
05191 #else
05192     sp_digit* td;
05193     sp_digit* t[3];
05194 #endif
05195     sp_digit* norm;
05196     sp_digit mp = 1;
05197     sp_digit n;
05198     int i;
05199     int c, y;
05200     int err = MP_OKAY;
05201 
05202 #ifdef WOLFSSL_SMALL_STACK
05203     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL,
05204                             DYNAMIC_TYPE_TMP_BUFFER);
05205     if (td == NULL)
05206         err = MEMORY_E;
05207 
05208     if (err == MP_OKAY) {
05209         t[0] = td;
05210         t[1] = &td[68 * 2];
05211         t[2] = &td[2 * 68 * 2];
05212         norm = t[0];
05213     }
05214 #else
05215     norm = t[0];
05216 #endif
05217 
05218     if (err == MP_OKAY) {
05219         sp_3072_mont_setup(m, &mp);
05220         sp_3072_mont_norm_68(norm, m);
05221 
05222         if (reduceA) {
05223             err = sp_3072_mod_68(t[1], a, m);
05224             if (err == MP_OKAY) {
05225                 sp_3072_mul_68(t[1], t[1], norm);
05226                 err = sp_3072_mod_68(t[1], t[1], m);
05227             }
05228         }
05229         else {
05230             sp_3072_mul_68(t[1], a, norm);
05231             err = sp_3072_mod_68(t[1], t[1], m);
05232         }
05233     }
05234 
05235     if (err == MP_OKAY) {
05236         i = bits / 23;
05237         c = bits % 23;
05238         n = e[i--] << (23 - c);
05239         for (; ; c--) {
05240             if (c == 0) {
05241                 if (i == -1)
05242                     break;
05243 
05244                 n = e[i--];
05245                 c = 23;
05246             }
05247 
05248             y = (n >> 22) & 1;
05249             n <<= 1;
05250 
05251             sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp);
05252 
05253             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
05254                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
05255             sp_3072_mont_sqr_68(t[2], t[2], m, mp);
05256             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
05257                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
05258         }
05259 
05260         sp_3072_mont_reduce_68(t[0], m, mp);
05261         n = sp_3072_cmp_68(t[0], m);
05262         sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1);
05263         XMEMCPY(r, t[0], sizeof(t[0]));
05264     }
05265 
05266 #ifdef WOLFSSL_SMALL_STACK
05267     if (td != NULL)
05268         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05269 #endif
05270 
05271     return err;
05272 #else
05273 #ifndef WOLFSSL_SMALL_STACK
05274     sp_digit t[32][136];
05275 #else
05276     sp_digit* t[32];
05277     sp_digit* td;
05278 #endif
05279     sp_digit* norm;
05280     sp_digit rt[136];
05281     sp_digit mp = 1;
05282     sp_digit n;
05283     int i;
05284     int c, y;
05285     int err = MP_OKAY;
05286 
05287 #ifdef WOLFSSL_SMALL_STACK
05288     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 136, NULL,
05289                             DYNAMIC_TYPE_TMP_BUFFER);
05290     if (td == NULL)
05291         err = MEMORY_E;
05292 
05293     if (err == MP_OKAY) {
05294         for (i=0; i<32; i++)
05295             t[i] = td + i * 136;
05296         norm = t[0];
05297     }
05298 #else
05299     norm = t[0];
05300 #endif
05301 
05302     if (err == MP_OKAY) {
05303         sp_3072_mont_setup(m, &mp);
05304         sp_3072_mont_norm_68(norm, m);
05305 
05306         if (reduceA) {
05307             err = sp_3072_mod_68(t[1], a, m);
05308             if (err == MP_OKAY) {
05309                 sp_3072_mul_68(t[1], t[1], norm);
05310                 err = sp_3072_mod_68(t[1], t[1], m);
05311             }
05312         }
05313         else {
05314             sp_3072_mul_68(t[1], a, norm);
05315             err = sp_3072_mod_68(t[1], t[1], m);
05316         }
05317     }
05318 
05319     if (err == MP_OKAY) {
05320         sp_3072_mont_sqr_68(t[ 2], t[ 1], m, mp);
05321         sp_3072_mont_mul_68(t[ 3], t[ 2], t[ 1], m, mp);
05322         sp_3072_mont_sqr_68(t[ 4], t[ 2], m, mp);
05323         sp_3072_mont_mul_68(t[ 5], t[ 3], t[ 2], m, mp);
05324         sp_3072_mont_sqr_68(t[ 6], t[ 3], m, mp);
05325         sp_3072_mont_mul_68(t[ 7], t[ 4], t[ 3], m, mp);
05326         sp_3072_mont_sqr_68(t[ 8], t[ 4], m, mp);
05327         sp_3072_mont_mul_68(t[ 9], t[ 5], t[ 4], m, mp);
05328         sp_3072_mont_sqr_68(t[10], t[ 5], m, mp);
05329         sp_3072_mont_mul_68(t[11], t[ 6], t[ 5], m, mp);
05330         sp_3072_mont_sqr_68(t[12], t[ 6], m, mp);
05331         sp_3072_mont_mul_68(t[13], t[ 7], t[ 6], m, mp);
05332         sp_3072_mont_sqr_68(t[14], t[ 7], m, mp);
05333         sp_3072_mont_mul_68(t[15], t[ 8], t[ 7], m, mp);
05334         sp_3072_mont_sqr_68(t[16], t[ 8], m, mp);
05335         sp_3072_mont_mul_68(t[17], t[ 9], t[ 8], m, mp);
05336         sp_3072_mont_sqr_68(t[18], t[ 9], m, mp);
05337         sp_3072_mont_mul_68(t[19], t[10], t[ 9], m, mp);
05338         sp_3072_mont_sqr_68(t[20], t[10], m, mp);
05339         sp_3072_mont_mul_68(t[21], t[11], t[10], m, mp);
05340         sp_3072_mont_sqr_68(t[22], t[11], m, mp);
05341         sp_3072_mont_mul_68(t[23], t[12], t[11], m, mp);
05342         sp_3072_mont_sqr_68(t[24], t[12], m, mp);
05343         sp_3072_mont_mul_68(t[25], t[13], t[12], m, mp);
05344         sp_3072_mont_sqr_68(t[26], t[13], m, mp);
05345         sp_3072_mont_mul_68(t[27], t[14], t[13], m, mp);
05346         sp_3072_mont_sqr_68(t[28], t[14], m, mp);
05347         sp_3072_mont_mul_68(t[29], t[15], t[14], m, mp);
05348         sp_3072_mont_sqr_68(t[30], t[15], m, mp);
05349         sp_3072_mont_mul_68(t[31], t[16], t[15], m, mp);
05350 
05351         bits = ((bits + 4) / 5) * 5;
05352         i = ((bits + 22) / 23) - 1;
05353         c = bits % 23;
05354         if (c == 0)
05355             c = 23;
05356         if (i < 68)
05357             n = e[i--] << (32 - c);
05358         else {
05359             n = 0;
05360             i--;
05361         }
05362         if (c < 5) {
05363             n |= e[i--] << (9 - c);
05364             c += 23;
05365         }
05366         y = n >> 27;
05367         n <<= 5;
05368         c -= 5;
05369         XMEMCPY(rt, t[y], sizeof(rt));
05370         for (; i>=0 || c>=5; ) {
05371             if (c < 5) {
05372                 n |= e[i--] << (9 - c);
05373                 c += 23;
05374             }
05375             y = (n >> 27) & 0x1f;
05376             n <<= 5;
05377             c -= 5;
05378 
05379             sp_3072_mont_sqr_68(rt, rt, m, mp);
05380             sp_3072_mont_sqr_68(rt, rt, m, mp);
05381             sp_3072_mont_sqr_68(rt, rt, m, mp);
05382             sp_3072_mont_sqr_68(rt, rt, m, mp);
05383             sp_3072_mont_sqr_68(rt, rt, m, mp);
05384 
05385             sp_3072_mont_mul_68(rt, rt, t[y], m, mp);
05386         }
05387 
05388         sp_3072_mont_reduce_68(rt, m, mp);
05389         n = sp_3072_cmp_68(rt, m);
05390         sp_3072_cond_sub_68(rt, rt, m, (n < 0) - 1);
05391         XMEMCPY(r, rt, sizeof(rt));
05392     }
05393 
05394 #ifdef WOLFSSL_SMALL_STACK
05395     if (td != NULL)
05396         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05397 #endif
05398 
05399     return err;
05400 #endif
05401 }
05402 
05403 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
05404 
05405 /* r = 2^n mod m where n is the number of bits to reduce by.
05406  * Given m must be 3072 bits, just need to subtract.
05407  *
05408  * r  A single precision number.
05409  * m  A signle precision number.
05410  */
05411 static void sp_3072_mont_norm_136(sp_digit* r, sp_digit* m)
05412 {
05413     /* Set r = 2^n - 1. */
05414 #ifdef WOLFSSL_SP_SMALL
05415     int i;
05416 
05417     for (i=0; i<135; i++)
05418         r[i] = 0x7fffff;
05419 #else
05420     int i;
05421 
05422     for (i = 0; i < 136; i += 8) {
05423         r[i + 0] = 0x7fffff;
05424         r[i + 1] = 0x7fffff;
05425         r[i + 2] = 0x7fffff;
05426         r[i + 3] = 0x7fffff;
05427         r[i + 4] = 0x7fffff;
05428         r[i + 5] = 0x7fffff;
05429         r[i + 6] = 0x7fffff;
05430         r[i + 7] = 0x7fffff;
05431     }
05432 #endif
05433     r[135] = 0x1fffl;
05434 
05435     /* r = (2^n - 1) mod n */
05436     sp_3072_sub_136(r, r, m);
05437 
05438     /* Add one so r = 2^n mod m */
05439     r[0] += 1;
05440 }
05441 
05442 /* Compare a with b in constant time.
05443  *
05444  * a  A single precision integer.
05445  * b  A single precision integer.
05446  * return -ve, 0 or +ve if a is less than, equal to or greater than b
05447  * respectively.
05448  */
05449 static sp_digit sp_3072_cmp_136(const sp_digit* a, const sp_digit* b)
05450 {
05451     sp_digit r = 0;
05452 #ifdef WOLFSSL_SP_SMALL
05453     int i;
05454 
05455     for (i=135; i>=0; i--)
05456         r |= (a[i] - b[i]) & (0 - !r);
05457 #else
05458     int i;
05459 
05460     for (i = 128; i >= 0; i -= 8) {
05461         r |= (a[i + 7] - b[i + 7]) & (0 - !r);
05462         r |= (a[i + 6] - b[i + 6]) & (0 - !r);
05463         r |= (a[i + 5] - b[i + 5]) & (0 - !r);
05464         r |= (a[i + 4] - b[i + 4]) & (0 - !r);
05465         r |= (a[i + 3] - b[i + 3]) & (0 - !r);
05466         r |= (a[i + 2] - b[i + 2]) & (0 - !r);
05467         r |= (a[i + 1] - b[i + 1]) & (0 - !r);
05468         r |= (a[i + 0] - b[i + 0]) & (0 - !r);
05469     }
05470 #endif /* WOLFSSL_SP_SMALL */
05471 
05472     return r;
05473 }
05474 
05475 /* Conditionally subtract b from a using the mask m.
05476  * m is -1 to subtract and 0 when not.
05477  *
05478  * r  A single precision number representing condition subtract result.
05479  * a  A single precision number to subtract from.
05480  * b  A single precision number to subtract.
05481  * m  Mask value to apply.
05482  */
05483 static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a,
05484         const sp_digit* b, const sp_digit m)
05485 {
05486 #ifdef WOLFSSL_SP_SMALL
05487     int i;
05488 
05489     for (i = 0; i < 136; i++)
05490         r[i] = a[i] - (b[i] & m);
05491 #else
05492     int i;
05493 
05494     for (i = 0; i < 136; i += 8) {
05495         r[i + 0] = a[i + 0] - (b[i + 0] & m);
05496         r[i + 1] = a[i + 1] - (b[i + 1] & m);
05497         r[i + 2] = a[i + 2] - (b[i + 2] & m);
05498         r[i + 3] = a[i + 3] - (b[i + 3] & m);
05499         r[i + 4] = a[i + 4] - (b[i + 4] & m);
05500         r[i + 5] = a[i + 5] - (b[i + 5] & m);
05501         r[i + 6] = a[i + 6] - (b[i + 6] & m);
05502         r[i + 7] = a[i + 7] - (b[i + 7] & m);
05503     }
05504 #endif /* WOLFSSL_SP_SMALL */
05505 }
05506 
05507 /* Mul a by scalar b and add into r. (r += a * b)
05508  *
05509  * r  A single precision integer.
05510  * a  A single precision integer.
05511  * b  A scalar.
05512  */
05513 SP_NOINLINE static void sp_3072_mul_add_136(sp_digit* r, const sp_digit* a,
05514         const sp_digit b)
05515 {
05516 #ifdef WOLFSSL_SP_SMALL
05517     int64_t tb = b;
05518     int64_t t = 0;
05519     int i;
05520 
05521     for (i = 0; i < 136; i++) {
05522         t += (tb * a[i]) + r[i];
05523         r[i] = t & 0x7fffff;
05524         t >>= 23;
05525     }
05526     r[136] += t;
05527 #else
05528     int64_t tb = b;
05529     int64_t t[8];
05530     int i;
05531 
05532     t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff;
05533     for (i = 0; i < 136; i += 8) {
05534         t[1] = tb * a[i+1];
05535         r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff);
05536         t[2] = tb * a[i+2];
05537         r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff);
05538         t[3] = tb * a[i+3];
05539         r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff);
05540         t[4] = tb * a[i+4];
05541         r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff);
05542         t[5] = tb * a[i+5];
05543         r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff);
05544         t[6] = tb * a[i+6];
05545         r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff);
05546         t[7] = tb * a[i+7];
05547         r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff);
05548         t[0] = tb * a[i+8];
05549         r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff);
05550     }
05551     r[136] +=  t[7] >> 23;
05552 #endif /* WOLFSSL_SP_SMALL */
05553 }
05554 
05555 /* Normalize the values in each word to 23.
05556  *
05557  * a  Array of sp_digit to normalize.
05558  */
05559 static void sp_3072_norm_136(sp_digit* a)
05560 {
05561 #ifdef WOLFSSL_SP_SMALL
05562     int i;
05563     for (i = 0; i < 135; i++) {
05564         a[i+1] += a[i] >> 23;
05565         a[i] &= 0x7fffff;
05566     }
05567 #else
05568     int i;
05569     for (i = 0; i < 128; i += 8) {
05570         a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff;
05571         a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff;
05572         a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff;
05573         a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff;
05574         a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff;
05575         a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff;
05576         a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff;
05577         a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff;
05578         a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff;
05579     }
05580     a[128+1] += a[128] >> 23;
05581     a[128] &= 0x7fffff;
05582     a[129+1] += a[129] >> 23;
05583     a[129] &= 0x7fffff;
05584     a[130+1] += a[130] >> 23;
05585     a[130] &= 0x7fffff;
05586     a[131+1] += a[131] >> 23;
05587     a[131] &= 0x7fffff;
05588     a[132+1] += a[132] >> 23;
05589     a[132] &= 0x7fffff;
05590     a[133+1] += a[133] >> 23;
05591     a[133] &= 0x7fffff;
05592     a[134+1] += a[134] >> 23;
05593     a[134] &= 0x7fffff;
05594 #endif
05595 }
05596 
05597 /* Shift the result in the high 3072 bits down to the bottom.
05598  *
05599  * r  A single precision number.
05600  * a  A single precision number.
05601  */
05602 static void sp_3072_mont_shift_136(sp_digit* r, const sp_digit* a)
05603 {
05604 #ifdef WOLFSSL_SP_SMALL
05605     int i;
05606     int64_t n = a[135] >> 13;
05607     n += ((int64_t)a[136]) << 10;
05608 
05609     for (i = 0; i < 135; i++) {
05610         r[i] = n & 0x7fffff;
05611         n >>= 23;
05612         n += ((int64_t)a[137 + i]) << 10;
05613     }
05614     r[135] = (sp_digit)n;
05615 #else
05616     int i;
05617     int64_t n = a[135] >> 13;
05618     n += ((int64_t)a[136]) << 10;
05619     for (i = 0; i < 136; i += 8) {
05620         r[i + 0] = n & 0x7fffff;
05621         n >>= 23; n += ((int64_t)a[i + 137]) << 10;
05622         r[i + 1] = n & 0x7fffff;
05623         n >>= 23; n += ((int64_t)a[i + 138]) << 10;
05624         r[i + 2] = n & 0x7fffff;
05625         n >>= 23; n += ((int64_t)a[i + 139]) << 10;
05626         r[i + 3] = n & 0x7fffff;
05627         n >>= 23; n += ((int64_t)a[i + 140]) << 10;
05628         r[i + 4] = n & 0x7fffff;
05629         n >>= 23; n += ((int64_t)a[i + 141]) << 10;
05630         r[i + 5] = n & 0x7fffff;
05631         n >>= 23; n += ((int64_t)a[i + 142]) << 10;
05632         r[i + 6] = n & 0x7fffff;
05633         n >>= 23; n += ((int64_t)a[i + 143]) << 10;
05634         r[i + 7] = n & 0x7fffff;
05635         n >>= 23; n += ((int64_t)a[i + 144]) << 10;
05636     }
05637     r[135] = (sp_digit)n;
05638 #endif /* WOLFSSL_SP_SMALL */
05639     XMEMSET(&r[136], 0, sizeof(*r) * 136);
05640 }
05641 
05642 /* Reduce the number back to 3072 bits using Montgomery reduction.
05643  *
05644  * a   A single precision number to reduce in place.
05645  * m   The single precision number representing the modulus.
05646  * mp  The digit representing the negative inverse of m mod 2^n.
05647  */
05648 static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp)
05649 {
05650     int i;
05651     sp_digit mu;
05652 
05653     if (mp != 1) {
05654         for (i=0; i<135; i++) {
05655             mu = (a[i] * mp) & 0x7fffff;
05656             sp_3072_mul_add_136(a+i, m, mu);
05657             a[i+1] += a[i] >> 23;
05658         }
05659         mu = (a[i] * mp) & 0x1fffl;
05660         sp_3072_mul_add_136(a+i, m, mu);
05661         a[i+1] += a[i] >> 23;
05662         a[i] &= 0x7fffff;
05663     }
05664     else {
05665         for (i=0; i<135; i++) {
05666             mu = a[i] & 0x7fffff;
05667             sp_3072_mul_add_136(a+i, m, mu);
05668             a[i+1] += a[i] >> 23;
05669         }
05670         mu = a[i] & 0x1fffl;
05671         sp_3072_mul_add_136(a+i, m, mu);
05672         a[i+1] += a[i] >> 23;
05673         a[i] &= 0x7fffff;
05674     }
05675 
05676     sp_3072_mont_shift_136(a, a);
05677     sp_3072_cond_sub_136(a, a, m, 0 - ((a[135] >> 13) > 0));
05678     sp_3072_norm_136(a);
05679 }
05680 
05681 /* Multiply two Montogmery form numbers mod the modulus (prime).
05682  * (r = a * b mod m)
05683  *
05684  * r   Result of multiplication.
05685  * a   First number to multiply in Montogmery form.
05686  * b   Second number to multiply in Montogmery form.
05687  * m   Modulus (prime).
05688  * mp  Montogmery mulitplier.
05689  */
05690 static void sp_3072_mont_mul_136(sp_digit* r, sp_digit* a, sp_digit* b,
05691         sp_digit* m, sp_digit mp)
05692 {
05693     sp_3072_mul_136(r, a, b);
05694     sp_3072_mont_reduce_136(r, m, mp);
05695 }
05696 
05697 /* Square the Montgomery form number. (r = a * a mod m)
05698  *
05699  * r   Result of squaring.
05700  * a   Number to square in Montogmery form.
05701  * m   Modulus (prime).
05702  * mp  Montogmery mulitplier.
05703  */
05704 static void sp_3072_mont_sqr_136(sp_digit* r, sp_digit* a, sp_digit* m,
05705         sp_digit mp)
05706 {
05707     sp_3072_sqr_136(r, a);
05708     sp_3072_mont_reduce_136(r, m, mp);
05709 }
05710 
05711 /* Multiply a by scalar b into r. (r = a * b)
05712  *
05713  * r  A single precision integer.
05714  * a  A single precision integer.
05715  * b  A scalar.
05716  */
05717 SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a,
05718     const sp_digit b)
05719 {
05720 #ifdef WOLFSSL_SP_SMALL
05721     int64_t tb = b;
05722     int64_t t = 0;
05723     int i;
05724 
05725     for (i = 0; i < 136; i++) {
05726         t += tb * a[i];
05727         r[i] = t & 0x7fffff;
05728         t >>= 23;
05729     }
05730     r[136] = (sp_digit)t;
05731 #else
05732     int64_t tb = b;
05733     int64_t t[8];
05734     int i;
05735 
05736     t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
05737     for (i = 0; i < 136; i += 8) {
05738         t[1] = tb * a[i+1];
05739         r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
05740         t[2] = tb * a[i+2];
05741         r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
05742         t[3] = tb * a[i+3];
05743         r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
05744         t[4] = tb * a[i+4];
05745         r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
05746         t[5] = tb * a[i+5];
05747         r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
05748         t[6] = tb * a[i+6];
05749         r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
05750         t[7] = tb * a[i+7];
05751         r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
05752         t[0] = tb * a[i+8];
05753         r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
05754     }
05755     r[136] =  (sp_digit)(t[7] >> 23);
05756 #endif /* WOLFSSL_SP_SMALL */
05757 }
05758 
05759 /* Multiply a by scalar b into r. (r = a * b)
05760  *
05761  * r  A single precision integer.
05762  * a  A single precision integer.
05763  * b  A scalar.
05764  */
05765 SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a,
05766     const sp_digit b)
05767 {
05768 #ifdef WOLFSSL_SP_SMALL
05769     int64_t tb = b;
05770     int64_t t = 0;
05771     int i;
05772 
05773     for (i = 0; i < 272; i++) {
05774         t += tb * a[i];
05775         r[i] = t & 0x7fffff;
05776         t >>= 23;
05777     }
05778     r[272] = (sp_digit)t;
05779 #else
05780     int64_t tb = b;
05781     int64_t t[8];
05782     int i;
05783 
05784     t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff;
05785     for (i = 0; i < 272; i += 8) {
05786         t[1] = tb * a[i+1];
05787         r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff);
05788         t[2] = tb * a[i+2];
05789         r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff);
05790         t[3] = tb * a[i+3];
05791         r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff);
05792         t[4] = tb * a[i+4];
05793         r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff);
05794         t[5] = tb * a[i+5];
05795         r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff);
05796         t[6] = tb * a[i+6];
05797         r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff);
05798         t[7] = tb * a[i+7];
05799         r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff);
05800         t[0] = tb * a[i+8];
05801         r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff);
05802     }
05803     r[272] =  (sp_digit)(t[7] >> 23);
05804 #endif /* WOLFSSL_SP_SMALL */
05805 }
05806 
05807 /* Conditionally add a and b using the mask m.
05808  * m is -1 to add and 0 when not.
05809  *
05810  * r  A single precision number representing conditional add result.
05811  * a  A single precision number to add with.
05812  * b  A single precision number to add.
05813  * m  Mask value to apply.
05814  */
05815 static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a,
05816         const sp_digit* b, const sp_digit m)
05817 {
05818 #ifdef WOLFSSL_SP_SMALL
05819     int i;
05820 
05821     for (i = 0; i < 136; i++)
05822         r[i] = a[i] + (b[i] & m);
05823 #else
05824     int i;
05825 
05826     for (i = 0; i < 136; i += 8) {
05827         r[i + 0] = a[i + 0] + (b[i + 0] & m);
05828         r[i + 1] = a[i + 1] + (b[i + 1] & m);
05829         r[i + 2] = a[i + 2] + (b[i + 2] & m);
05830         r[i + 3] = a[i + 3] + (b[i + 3] & m);
05831         r[i + 4] = a[i + 4] + (b[i + 4] & m);
05832         r[i + 5] = a[i + 5] + (b[i + 5] & m);
05833         r[i + 6] = a[i + 6] + (b[i + 6] & m);
05834         r[i + 7] = a[i + 7] + (b[i + 7] & m);
05835     }
05836 #endif /* WOLFSSL_SP_SMALL */
05837 }
05838 
05839 #ifdef WOLFSSL_SMALL
05840 /* Sub b from a into r. (r = a - b)
05841  *
05842  * r  A single precision integer.
05843  * a  A single precision integer.
05844  * b  A single precision integer.
05845  */
05846 SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a,
05847         const sp_digit* b)
05848 {
05849     int i;
05850 
05851     for (i = 0; i < 136; i++)
05852         r[i] = a[i] - b[i];
05853 
05854     return 0;
05855 }
05856 
05857 #endif
05858 #ifdef WOLFSSL_SMALL
05859 /* Add b to a into r. (r = a + b)
05860  *
05861  * r  A single precision integer.
05862  * a  A single precision integer.
05863  * b  A single precision integer.
05864  */
05865 SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a,
05866         const sp_digit* b)
05867 {
05868     int i;
05869 
05870     for (i = 0; i < 136; i++)
05871         r[i] = a[i] + b[i];
05872 
05873     return 0;
05874 }
05875 #endif
05876 SP_NOINLINE static void sp_3072_rshift_136(sp_digit* r, sp_digit* a, byte n)
05877 {
05878 #ifdef WOLFSSL_SP_SMALL
05879     int i;
05880 
05881     for (i=0; i<135; i++)
05882         r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff;
05883 #else
05884     r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff;
05885     r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff;
05886     r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff;
05887     r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff;
05888     r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff;
05889     r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff;
05890     r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff;
05891     r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff;
05892     r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff;
05893     r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff;
05894     r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff;
05895     r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff;
05896     r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff;
05897     r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff;
05898     r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff;
05899     r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff;
05900     r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff;
05901     r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff;
05902     r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff;
05903     r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff;
05904     r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff;
05905     r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff;
05906     r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff;
05907     r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff;
05908     r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff;
05909     r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff;
05910     r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff;
05911     r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff;
05912     r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff;
05913     r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff;
05914     r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff;
05915     r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff;
05916     r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff;
05917     r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff;
05918     r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff;
05919     r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff;
05920     r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff;
05921     r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff;
05922     r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff;
05923     r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff;
05924     r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff;
05925     r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff;
05926     r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff;
05927     r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff;
05928     r[44] = ((a[44] >> n) | (a[45] << (23 - n))) & 0x7fffff;
05929     r[45] = ((a[45] >> n) | (a[46] << (23 - n))) & 0x7fffff;
05930     r[46] = ((a[46] >> n) | (a[47] << (23 - n))) & 0x7fffff;
05931     r[47] = ((a[47] >> n) | (a[48] << (23 - n))) & 0x7fffff;
05932     r[48] = ((a[48] >> n) | (a[49] << (23 - n))) & 0x7fffff;
05933     r[49] = ((a[49] >> n) | (a[50] << (23 - n))) & 0x7fffff;
05934     r[50] = ((a[50] >> n) | (a[51] << (23 - n))) & 0x7fffff;
05935     r[51] = ((a[51] >> n) | (a[52] << (23 - n))) & 0x7fffff;
05936     r[52] = ((a[52] >> n) | (a[53] << (23 - n))) & 0x7fffff;
05937     r[53] = ((a[53] >> n) | (a[54] << (23 - n))) & 0x7fffff;
05938     r[54] = ((a[54] >> n) | (a[55] << (23 - n))) & 0x7fffff;
05939     r[55] = ((a[55] >> n) | (a[56] << (23 - n))) & 0x7fffff;
05940     r[56] = ((a[56] >> n) | (a[57] << (23 - n))) & 0x7fffff;
05941     r[57] = ((a[57] >> n) | (a[58] << (23 - n))) & 0x7fffff;
05942     r[58] = ((a[58] >> n) | (a[59] << (23 - n))) & 0x7fffff;
05943     r[59] = ((a[59] >> n) | (a[60] << (23 - n))) & 0x7fffff;
05944     r[60] = ((a[60] >> n) | (a[61] << (23 - n))) & 0x7fffff;
05945     r[61] = ((a[61] >> n) | (a[62] << (23 - n))) & 0x7fffff;
05946     r[62] = ((a[62] >> n) | (a[63] << (23 - n))) & 0x7fffff;
05947     r[63] = ((a[63] >> n) | (a[64] << (23 - n))) & 0x7fffff;
05948     r[64] = ((a[64] >> n) | (a[65] << (23 - n))) & 0x7fffff;
05949     r[65] = ((a[65] >> n) | (a[66] << (23 - n))) & 0x7fffff;
05950     r[66] = ((a[66] >> n) | (a[67] << (23 - n))) & 0x7fffff;
05951     r[67] = ((a[67] >> n) | (a[68] << (23 - n))) & 0x7fffff;
05952     r[68] = ((a[68] >> n) | (a[69] << (23 - n))) & 0x7fffff;
05953     r[69] = ((a[69] >> n) | (a[70] << (23 - n))) & 0x7fffff;
05954     r[70] = ((a[70] >> n) | (a[71] << (23 - n))) & 0x7fffff;
05955     r[71] = ((a[71] >> n) | (a[72] << (23 - n))) & 0x7fffff;
05956     r[72] = ((a[72] >> n) | (a[73] << (23 - n))) & 0x7fffff;
05957     r[73] = ((a[73] >> n) | (a[74] << (23 - n))) & 0x7fffff;
05958     r[74] = ((a[74] >> n) | (a[75] << (23 - n))) & 0x7fffff;
05959     r[75] = ((a[75] >> n) | (a[76] << (23 - n))) & 0x7fffff;
05960     r[76] = ((a[76] >> n) | (a[77] << (23 - n))) & 0x7fffff;
05961     r[77] = ((a[77] >> n) | (a[78] << (23 - n))) & 0x7fffff;
05962     r[78] = ((a[78] >> n) | (a[79] << (23 - n))) & 0x7fffff;
05963     r[79] = ((a[79] >> n) | (a[80] << (23 - n))) & 0x7fffff;
05964     r[80] = ((a[80] >> n) | (a[81] << (23 - n))) & 0x7fffff;
05965     r[81] = ((a[81] >> n) | (a[82] << (23 - n))) & 0x7fffff;
05966     r[82] = ((a[82] >> n) | (a[83] << (23 - n))) & 0x7fffff;
05967     r[83] = ((a[83] >> n) | (a[84] << (23 - n))) & 0x7fffff;
05968     r[84] = ((a[84] >> n) | (a[85] << (23 - n))) & 0x7fffff;
05969     r[85] = ((a[85] >> n) | (a[86] << (23 - n))) & 0x7fffff;
05970     r[86] = ((a[86] >> n) | (a[87] << (23 - n))) & 0x7fffff;
05971     r[87] = ((a[87] >> n) | (a[88] << (23 - n))) & 0x7fffff;
05972     r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff;
05973     r[89] = ((a[89] >> n) | (a[90] << (23 - n))) & 0x7fffff;
05974     r[90] = ((a[90] >> n) | (a[91] << (23 - n))) & 0x7fffff;
05975     r[91] = ((a[91] >> n) | (a[92] << (23 - n))) & 0x7fffff;
05976     r[92] = ((a[92] >> n) | (a[93] << (23 - n))) & 0x7fffff;
05977     r[93] = ((a[93] >> n) | (a[94] << (23 - n))) & 0x7fffff;
05978     r[94] = ((a[94] >> n) | (a[95] << (23 - n))) & 0x7fffff;
05979     r[95] = ((a[95] >> n) | (a[96] << (23 - n))) & 0x7fffff;
05980     r[96] = ((a[96] >> n) | (a[97] << (23 - n))) & 0x7fffff;
05981     r[97] = ((a[97] >> n) | (a[98] << (23 - n))) & 0x7fffff;
05982     r[98] = ((a[98] >> n) | (a[99] << (23 - n))) & 0x7fffff;
05983     r[99] = ((a[99] >> n) | (a[100] << (23 - n))) & 0x7fffff;
05984     r[100] = ((a[100] >> n) | (a[101] << (23 - n))) & 0x7fffff;
05985     r[101] = ((a[101] >> n) | (a[102] << (23 - n))) & 0x7fffff;
05986     r[102] = ((a[102] >> n) | (a[103] << (23 - n))) & 0x7fffff;
05987     r[103] = ((a[103] >> n) | (a[104] << (23 - n))) & 0x7fffff;
05988     r[104] = ((a[104] >> n) | (a[105] << (23 - n))) & 0x7fffff;
05989     r[105] = ((a[105] >> n) | (a[106] << (23 - n))) & 0x7fffff;
05990     r[106] = ((a[106] >> n) | (a[107] << (23 - n))) & 0x7fffff;
05991     r[107] = ((a[107] >> n) | (a[108] << (23 - n))) & 0x7fffff;
05992     r[108] = ((a[108] >> n) | (a[109] << (23 - n))) & 0x7fffff;
05993     r[109] = ((a[109] >> n) | (a[110] << (23 - n))) & 0x7fffff;
05994     r[110] = ((a[110] >> n) | (a[111] << (23 - n))) & 0x7fffff;
05995     r[111] = ((a[111] >> n) | (a[112] << (23 - n))) & 0x7fffff;
05996     r[112] = ((a[112] >> n) | (a[113] << (23 - n))) & 0x7fffff;
05997     r[113] = ((a[113] >> n) | (a[114] << (23 - n))) & 0x7fffff;
05998     r[114] = ((a[114] >> n) | (a[115] << (23 - n))) & 0x7fffff;
05999     r[115] = ((a[115] >> n) | (a[116] << (23 - n))) & 0x7fffff;
06000     r[116] = ((a[116] >> n) | (a[117] << (23 - n))) & 0x7fffff;
06001     r[117] = ((a[117] >> n) | (a[118] << (23 - n))) & 0x7fffff;
06002     r[118] = ((a[118] >> n) | (a[119] << (23 - n))) & 0x7fffff;
06003     r[119] = ((a[119] >> n) | (a[120] << (23 - n))) & 0x7fffff;
06004     r[120] = ((a[120] >> n) | (a[121] << (23 - n))) & 0x7fffff;
06005     r[121] = ((a[121] >> n) | (a[122] << (23 - n))) & 0x7fffff;
06006     r[122] = ((a[122] >> n) | (a[123] << (23 - n))) & 0x7fffff;
06007     r[123] = ((a[123] >> n) | (a[124] << (23 - n))) & 0x7fffff;
06008     r[124] = ((a[124] >> n) | (a[125] << (23 - n))) & 0x7fffff;
06009     r[125] = ((a[125] >> n) | (a[126] << (23 - n))) & 0x7fffff;
06010     r[126] = ((a[126] >> n) | (a[127] << (23 - n))) & 0x7fffff;
06011     r[127] = ((a[127] >> n) | (a[128] << (23 - n))) & 0x7fffff;
06012     r[128] = ((a[128] >> n) | (a[129] << (23 - n))) & 0x7fffff;
06013     r[129] = ((a[129] >> n) | (a[130] << (23 - n))) & 0x7fffff;
06014     r[130] = ((a[130] >> n) | (a[131] << (23 - n))) & 0x7fffff;
06015     r[131] = ((a[131] >> n) | (a[132] << (23 - n))) & 0x7fffff;
06016     r[132] = ((a[132] >> n) | (a[133] << (23 - n))) & 0x7fffff;
06017     r[133] = ((a[133] >> n) | (a[134] << (23 - n))) & 0x7fffff;
06018     r[134] = ((a[134] >> n) | (a[135] << (23 - n))) & 0x7fffff;
06019 #endif
06020     r[135] = a[135] >> n;
06021 }
06022 
06023 /* Divide d in a and put remainder into r (m*d + r = a)
06024  * m is not calculated as it is not needed at this time.
06025  *
06026  * a  Nmber to be divided.
06027  * d  Number to divide with.
06028  * m  Multiplier result.
06029  * r  Remainder from the division.
06030  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
06031  */
06032 static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m,
06033         sp_digit* r)
06034 {
06035     int i;
06036     int64_t d1;
06037     sp_digit div, r1;
06038 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06039     sp_digit* td;
06040 #else
06041     sp_digit t1d[272 + 1], t2d[136 + 1], sdd[136 + 1];
06042 #endif
06043     sp_digit* t1;
06044     sp_digit* t2;
06045     sp_digit* sd;
06046     int err = MP_OKAY;
06047 
06048 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06049     td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER);
06050     if (td != NULL) {
06051         t1 = td;
06052         t2 = td + 272 + 1;
06053         sd = t2 + 136 + 1;
06054     }
06055     else
06056         err = MEMORY_E;
06057 #else
06058     t1 = t1d;
06059     t2 = t2d;
06060     sd = sdd;
06061 #endif
06062 
06063     (void)m;
06064 
06065     if (err == MP_OKAY) {
06066         sp_3072_mul_d_136(sd, d, 1 << 10);
06067         sp_3072_mul_d_272(t1, a, 1 << 10);
06068         div = sd[135];
06069         for (i=136; i>=0; i--) {
06070             t1[136 + i] += t1[136 + i - 1] >> 23;
06071             t1[136 + i - 1] &= 0x7fffff;
06072             d1 = t1[136 + i];
06073             d1 <<= 23;
06074             d1 += t1[136 + i - 1];
06075             r1 = (sp_digit)(d1 / div);
06076 
06077             sp_3072_mul_d_136(t2, sd, r1);
06078             sp_3072_sub_136(&t1[i], &t1[i], t2);
06079             t1[136 + i] -= t2[136];
06080             t1[136 + i] += t1[136 + i - 1] >> 23;
06081             t1[136 + i - 1] &= 0x7fffff;
06082             r1 = (((-t1[136 + i]) << 23) - t1[136 + i - 1]) / div;
06083             r1 -= t1[136 + i];
06084             sp_3072_mul_d_136(t2, sd, r1);
06085             sp_3072_add_136(&t1[i], &t1[i], t2);
06086             t1[136 + i] += t1[136 + i - 1] >> 23;
06087             t1[136 + i - 1] &= 0x7fffff;
06088         }
06089         t1[136 - 1] += t1[136 - 2] >> 23;
06090         t1[136 - 2] &= 0x7fffff;
06091         d1 = t1[136 - 1];
06092         r1 = (sp_digit)(d1 / div);
06093 
06094         sp_3072_mul_d_136(t2, sd, r1);
06095         sp_3072_sub_136(t1, t1, t2);
06096         XMEMCPY(r, t1, sizeof(*r) * 2 * 136);
06097         for (i=0; i<134; i++) {
06098             r[i+1] += r[i] >> 23;
06099             r[i] &= 0x7fffff;
06100         }
06101         sp_3072_cond_add_136(r, r, sd, 0 - (r[135] < 0));
06102     }
06103 
06104     sp_3072_rshift_136(r, r, 10);
06105 
06106 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06107     if (td != NULL)
06108         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06109 #endif
06110 
06111     return err;
06112 }
06113 
06114 /* Reduce a modulo m into r. (r = a mod m)
06115  *
06116  * r  A single precision number that is the reduced result.
06117  * a  A single precision number that is to be reduced.
06118  * m  A single precision number that is the modulus to reduce with.
06119  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
06120  */
06121 static int sp_3072_mod_136(sp_digit* r, sp_digit* a, sp_digit* m)
06122 {
06123     return sp_3072_div_136(a, m, NULL, r);
06124 }
06125 
06126 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
06127 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
06128  *
06129  * r     A single precision number that is the result of the operation.
06130  * a     A single precision number being exponentiated.
06131  * e     A single precision number that is the exponent.
06132  * bits  The number of bits in the exponent.
06133  * m     A single precision number that is the modulus.
06134  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
06135  */
06136 static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits,
06137     sp_digit* m, int reduceA)
06138 {
06139 #ifdef WOLFSSL_SP_SMALL
06140     sp_digit* td;
06141     sp_digit* t[3];
06142     sp_digit* norm;
06143     sp_digit mp = 1;
06144     sp_digit n;
06145     int i;
06146     int c, y;
06147     int err = MP_OKAY;
06148 
06149     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL,
06150                             DYNAMIC_TYPE_TMP_BUFFER);
06151     if (td == NULL)
06152         err = MEMORY_E;
06153 
06154     if (err == MP_OKAY) {
06155         XMEMSET(td, 0, sizeof(*td) * 3 * 136 * 2);
06156 
06157         norm = t[0] = td;
06158         t[1] = &td[136 * 2];
06159         t[2] = &td[2 * 136 * 2];
06160 
06161         sp_3072_mont_setup(m, &mp);
06162         sp_3072_mont_norm_136(norm, m);
06163 
06164         if (reduceA)
06165             err = sp_3072_mod_136(t[1], a, m);
06166         else
06167             XMEMCPY(t[1], a, sizeof(sp_digit) * 136);
06168     }
06169     if (err == MP_OKAY) {
06170         sp_3072_mul_136(t[1], t[1], norm);
06171         err = sp_3072_mod_136(t[1], t[1], m);
06172     }
06173 
06174     if (err == MP_OKAY) {
06175         i = bits / 23;
06176         c = bits % 23;
06177         n = e[i--] << (23 - c);
06178         for (; ; c--) {
06179             if (c == 0) {
06180                 if (i == -1)
06181                     break;
06182 
06183                 n = e[i--];
06184                 c = 23;
06185             }
06186 
06187             y = (n >> 22) & 1;
06188             n <<= 1;
06189 
06190             sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp);
06191 
06192             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
06193                                   ((size_t)t[1] & addr_mask[y])),
06194                     sizeof(*t[2]) * 136 * 2);
06195             sp_3072_mont_sqr_136(t[2], t[2], m, mp);
06196             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
06197                             ((size_t)t[1] & addr_mask[y])), t[2],
06198                     sizeof(*t[2]) * 136 * 2);
06199         }
06200 
06201         sp_3072_mont_reduce_136(t[0], m, mp);
06202         n = sp_3072_cmp_136(t[0], m);
06203         sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1);
06204         XMEMCPY(r, t[0], sizeof(*r) * 136 * 2);
06205 
06206     }
06207 
06208     if (td != NULL)
06209         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06210 
06211     return err;
06212 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
06213 #ifndef WOLFSSL_SMALL_STACK
06214     sp_digit t[3][272];
06215 #else
06216     sp_digit* td;
06217     sp_digit* t[3];
06218 #endif
06219     sp_digit* norm;
06220     sp_digit mp = 1;
06221     sp_digit n;
06222     int i;
06223     int c, y;
06224     int err = MP_OKAY;
06225 
06226 #ifdef WOLFSSL_SMALL_STACK
06227     td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL,
06228                             DYNAMIC_TYPE_TMP_BUFFER);
06229     if (td == NULL)
06230         err = MEMORY_E;
06231 
06232     if (err == MP_OKAY) {
06233         t[0] = td;
06234         t[1] = &td[136 * 2];
06235         t[2] = &td[2 * 136 * 2];
06236         norm = t[0];
06237     }
06238 #else
06239     norm = t[0];
06240 #endif
06241 
06242     if (err == MP_OKAY) {
06243         sp_3072_mont_setup(m, &mp);
06244         sp_3072_mont_norm_136(norm, m);
06245 
06246         if (reduceA) {
06247             err = sp_3072_mod_136(t[1], a, m);
06248             if (err == MP_OKAY) {
06249                 sp_3072_mul_136(t[1], t[1], norm);
06250                 err = sp_3072_mod_136(t[1], t[1], m);
06251             }
06252         }
06253         else {
06254             sp_3072_mul_136(t[1], a, norm);
06255             err = sp_3072_mod_136(t[1], t[1], m);
06256         }
06257     }
06258 
06259     if (err == MP_OKAY) {
06260         i = bits / 23;
06261         c = bits % 23;
06262         n = e[i--] << (23 - c);
06263         for (; ; c--) {
06264             if (c == 0) {
06265                 if (i == -1)
06266                     break;
06267 
06268                 n = e[i--];
06269                 c = 23;
06270             }
06271 
06272             y = (n >> 22) & 1;
06273             n <<= 1;
06274 
06275             sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp);
06276 
06277             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
06278                                  ((size_t)t[1] & addr_mask[y])), sizeof(t[2]));
06279             sp_3072_mont_sqr_136(t[2], t[2], m, mp);
06280             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
06281                            ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2]));
06282         }
06283 
06284         sp_3072_mont_reduce_136(t[0], m, mp);
06285         n = sp_3072_cmp_136(t[0], m);
06286         sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1);
06287         XMEMCPY(r, t[0], sizeof(t[0]));
06288     }
06289 
06290 #ifdef WOLFSSL_SMALL_STACK
06291     if (td != NULL)
06292         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06293 #endif
06294 
06295     return err;
06296 #else
06297 #ifndef WOLFSSL_SMALL_STACK
06298     sp_digit t[32][272];
06299 #else
06300     sp_digit* t[32];
06301     sp_digit* td;
06302 #endif
06303     sp_digit* norm;
06304     sp_digit rt[272];
06305     sp_digit mp = 1;
06306     sp_digit n;
06307     int i;
06308     int c, y;
06309     int err = MP_OKAY;
06310 
06311 #ifdef WOLFSSL_SMALL_STACK
06312     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 272, NULL,
06313                             DYNAMIC_TYPE_TMP_BUFFER);
06314     if (td == NULL)
06315         err = MEMORY_E;
06316 
06317     if (err == MP_OKAY) {
06318         for (i=0; i<32; i++)
06319             t[i] = td + i * 272;
06320         norm = t[0];
06321     }
06322 #else
06323     norm = t[0];
06324 #endif
06325 
06326     if (err == MP_OKAY) {
06327         sp_3072_mont_setup(m, &mp);
06328         sp_3072_mont_norm_136(norm, m);
06329 
06330         if (reduceA) {
06331             err = sp_3072_mod_136(t[1], a, m);
06332             if (err == MP_OKAY) {
06333                 sp_3072_mul_136(t[1], t[1], norm);
06334                 err = sp_3072_mod_136(t[1], t[1], m);
06335             }
06336         }
06337         else {
06338             sp_3072_mul_136(t[1], a, norm);
06339             err = sp_3072_mod_136(t[1], t[1], m);
06340         }
06341     }
06342 
06343     if (err == MP_OKAY) {
06344         sp_3072_mont_sqr_136(t[ 2], t[ 1], m, mp);
06345         sp_3072_mont_mul_136(t[ 3], t[ 2], t[ 1], m, mp);
06346         sp_3072_mont_sqr_136(t[ 4], t[ 2], m, mp);
06347         sp_3072_mont_mul_136(t[ 5], t[ 3], t[ 2], m, mp);
06348         sp_3072_mont_sqr_136(t[ 6], t[ 3], m, mp);
06349         sp_3072_mont_mul_136(t[ 7], t[ 4], t[ 3], m, mp);
06350         sp_3072_mont_sqr_136(t[ 8], t[ 4], m, mp);
06351         sp_3072_mont_mul_136(t[ 9], t[ 5], t[ 4], m, mp);
06352         sp_3072_mont_sqr_136(t[10], t[ 5], m, mp);
06353         sp_3072_mont_mul_136(t[11], t[ 6], t[ 5], m, mp);
06354         sp_3072_mont_sqr_136(t[12], t[ 6], m, mp);
06355         sp_3072_mont_mul_136(t[13], t[ 7], t[ 6], m, mp);
06356         sp_3072_mont_sqr_136(t[14], t[ 7], m, mp);
06357         sp_3072_mont_mul_136(t[15], t[ 8], t[ 7], m, mp);
06358         sp_3072_mont_sqr_136(t[16], t[ 8], m, mp);
06359         sp_3072_mont_mul_136(t[17], t[ 9], t[ 8], m, mp);
06360         sp_3072_mont_sqr_136(t[18], t[ 9], m, mp);
06361         sp_3072_mont_mul_136(t[19], t[10], t[ 9], m, mp);
06362         sp_3072_mont_sqr_136(t[20], t[10], m, mp);
06363         sp_3072_mont_mul_136(t[21], t[11], t[10], m, mp);
06364         sp_3072_mont_sqr_136(t[22], t[11], m, mp);
06365         sp_3072_mont_mul_136(t[23], t[12], t[11], m, mp);
06366         sp_3072_mont_sqr_136(t[24], t[12], m, mp);
06367         sp_3072_mont_mul_136(t[25], t[13], t[12], m, mp);
06368         sp_3072_mont_sqr_136(t[26], t[13], m, mp);
06369         sp_3072_mont_mul_136(t[27], t[14], t[13], m, mp);
06370         sp_3072_mont_sqr_136(t[28], t[14], m, mp);
06371         sp_3072_mont_mul_136(t[29], t[15], t[14], m, mp);
06372         sp_3072_mont_sqr_136(t[30], t[15], m, mp);
06373         sp_3072_mont_mul_136(t[31], t[16], t[15], m, mp);
06374 
06375         bits = ((bits + 4) / 5) * 5;
06376         i = ((bits + 22) / 23) - 1;
06377         c = bits % 23;
06378         if (c == 0)
06379             c = 23;
06380         if (i < 136)
06381             n = e[i--] << (32 - c);
06382         else {
06383             n = 0;
06384             i--;
06385         }
06386         if (c < 5) {
06387             n |= e[i--] << (9 - c);
06388             c += 23;
06389         }
06390         y = n >> 27;
06391         n <<= 5;
06392         c -= 5;
06393         XMEMCPY(rt, t[y], sizeof(rt));
06394         for (; i>=0 || c>=5; ) {
06395             if (c < 5) {
06396                 n |= e[i--] << (9 - c);
06397                 c += 23;
06398             }
06399             y = (n >> 27) & 0x1f;
06400             n <<= 5;
06401             c -= 5;
06402 
06403             sp_3072_mont_sqr_136(rt, rt, m, mp);
06404             sp_3072_mont_sqr_136(rt, rt, m, mp);
06405             sp_3072_mont_sqr_136(rt, rt, m, mp);
06406             sp_3072_mont_sqr_136(rt, rt, m, mp);
06407             sp_3072_mont_sqr_136(rt, rt, m, mp);
06408 
06409             sp_3072_mont_mul_136(rt, rt, t[y], m, mp);
06410         }
06411 
06412         sp_3072_mont_reduce_136(rt, m, mp);
06413         n = sp_3072_cmp_136(rt, m);
06414         sp_3072_cond_sub_136(rt, rt, m, (n < 0) - 1);
06415         XMEMCPY(r, rt, sizeof(rt));
06416     }
06417 
06418 #ifdef WOLFSSL_SMALL_STACK
06419     if (td != NULL)
06420         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06421 #endif
06422 
06423     return err;
06424 #endif
06425 }
06426 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
06427 
06428 #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \
06429                                     !defined(RSA_LOW_MEM)
06430 /* AND m into each word of a and store in r.
06431  *
06432  * r  A single precision integer.
06433  * a  A single precision integer.
06434  * m  Mask to AND against each digit.
06435  */
06436 static void sp_3072_mask_68(sp_digit* r, sp_digit* a, sp_digit m)
06437 {
06438 #ifdef WOLFSSL_SP_SMALL
06439     int i;
06440 
06441     for (i=0; i<68; i++)
06442         r[i] = a[i] & m;
06443 #else
06444     int i;
06445 
06446     for (i = 0; i < 64; i += 8) {
06447         r[i+0] = a[i+0] & m;
06448         r[i+1] = a[i+1] & m;
06449         r[i+2] = a[i+2] & m;
06450         r[i+3] = a[i+3] & m;
06451         r[i+4] = a[i+4] & m;
06452         r[i+5] = a[i+5] & m;
06453         r[i+6] = a[i+6] & m;
06454         r[i+7] = a[i+7] & m;
06455     }
06456     r[64] = a[64] & m;
06457     r[65] = a[65] & m;
06458     r[66] = a[66] & m;
06459     r[67] = a[67] & m;
06460 #endif
06461 }
06462 
06463 #endif
06464 #ifdef WOLFSSL_HAVE_SP_RSA
06465 /* RSA public key operation.
06466  *
06467  * in      Array of bytes representing the number to exponentiate, base.
06468  * inLen   Number of bytes in base.
06469  * em      Public exponent.
06470  * mm      Modulus.
06471  * out     Buffer to hold big-endian bytes of exponentiation result.
06472  *         Must be at least 384 bytes long.
06473  * outLen  Number of bytes in result.
06474  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
06475  * an array is too long and MEMORY_E when dynamic memory allocation fails.
06476  */
06477 int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
06478     byte* out, word32* outLen)
06479 {
06480 #ifdef WOLFSSL_SP_SMALL
06481     sp_digit* d = NULL;
06482     sp_digit* a;
06483     sp_digit* m;
06484     sp_digit* r;
06485     sp_digit* norm;
06486     sp_digit e[1];
06487     sp_digit mp;
06488     int i;
06489     int err = MP_OKAY;
06490 
06491     if (*outLen < 384)
06492         err = MP_TO_E;
06493     if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 ||
06494                                                      mp_count_bits(mm) != 3072))
06495         err = MP_READ_E;
06496 
06497     if (err == MP_OKAY) {
06498         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL,
06499                                DYNAMIC_TYPE_TMP_BUFFER);
06500         if (d == NULL)
06501             err = MEMORY_E;
06502     }
06503 
06504     if (err == MP_OKAY) {
06505         a = d;
06506         r = a + 136 * 2;
06507         m = r + 136 * 2;
06508         norm = r;
06509 
06510         sp_3072_from_bin(a, 136, in, inLen);
06511 #if DIGIT_BIT >= 23
06512         e[0] = em->dp[0];
06513 #else
06514         e[0] = em->dp[0];
06515         if (em->used > 1)
06516             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
06517 #endif
06518         if (e[0] == 0)
06519             err = MP_EXPTMOD_E;
06520     }
06521 
06522     if (err == MP_OKAY) {
06523         sp_3072_from_mp(m, 136, mm);
06524 
06525         sp_3072_mont_setup(m, &mp);
06526         sp_3072_mont_norm_136(norm, m);
06527     }
06528     if (err == MP_OKAY) {
06529         sp_3072_mul_136(a, a, norm);
06530         err = sp_3072_mod_136(a, a, m);
06531     }
06532     if (err == MP_OKAY) {
06533         for (i=22; i>=0; i--)
06534             if (e[0] >> i)
06535                 break;
06536 
06537         XMEMCPY(r, a, sizeof(sp_digit) * 136 * 2);
06538         for (i--; i>=0; i--) {
06539             sp_3072_mont_sqr_136(r, r, m, mp);
06540 
06541             if (((e[0] >> i) & 1) == 1)
06542                 sp_3072_mont_mul_136(r, r, a, m, mp);
06543         }
06544         sp_3072_mont_reduce_136(r, m, mp);
06545         mp = sp_3072_cmp_136(r, m);
06546         sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1);
06547 
06548         sp_3072_to_bin(r, out);
06549         *outLen = 384;
06550     }
06551 
06552     if (d != NULL)
06553         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06554 
06555     return err;
06556 #else
06557 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
06558     sp_digit ad[272], md[136], rd[272];
06559 #else
06560     sp_digit* d = NULL;
06561 #endif
06562     sp_digit* a;
06563     sp_digit* m;
06564     sp_digit* r;
06565     sp_digit e[1];
06566     int err = MP_OKAY;
06567 
06568     if (*outLen < 384)
06569         err = MP_TO_E;
06570     if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 ||
06571                                                      mp_count_bits(mm) != 3072))
06572         err = MP_READ_E;
06573 
06574 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06575     if (err == MP_OKAY) {
06576         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL,
06577                                DYNAMIC_TYPE_TMP_BUFFER);
06578         if (d == NULL)
06579             err = MEMORY_E;
06580     }
06581 
06582     if (err == MP_OKAY) {
06583         a = d;
06584         r = a + 136 * 2;
06585         m = r + 136 * 2;
06586     }
06587 #else
06588     a = ad;
06589     m = md;
06590     r = rd;
06591 #endif
06592 
06593     if (err == MP_OKAY) {
06594         sp_3072_from_bin(a, 136, in, inLen);
06595 #if DIGIT_BIT >= 23
06596         e[0] = em->dp[0];
06597 #else
06598         e[0] = em->dp[0];
06599         if (em->used > 1)
06600             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
06601 #endif
06602         if (e[0] == 0)
06603             err = MP_EXPTMOD_E;
06604     }
06605     if (err == MP_OKAY) {
06606         sp_3072_from_mp(m, 136, mm);
06607 
06608         if (e[0] == 0x3) {
06609             if (err == MP_OKAY) {
06610                 sp_3072_sqr_136(r, a);
06611                 err = sp_3072_mod_136(r, r, m);
06612             }
06613             if (err == MP_OKAY) {
06614                 sp_3072_mul_136(r, a, r);
06615                 err = sp_3072_mod_136(r, r, m);
06616             }
06617         }
06618         else {
06619             sp_digit* norm = r;
06620             int i;
06621             sp_digit mp;
06622 
06623             sp_3072_mont_setup(m, &mp);
06624             sp_3072_mont_norm_136(norm, m);
06625 
06626             if (err == MP_OKAY) {
06627                 sp_3072_mul_136(a, a, norm);
06628                 err = sp_3072_mod_136(a, a, m);
06629             }
06630 
06631             if (err == MP_OKAY) {
06632                 for (i=22; i>=0; i--)
06633                     if (e[0] >> i)
06634                         break;
06635 
06636                 XMEMCPY(r, a, sizeof(sp_digit) * 272);
06637                 for (i--; i>=0; i--) {
06638                     sp_3072_mont_sqr_136(r, r, m, mp);
06639 
06640                     if (((e[0] >> i) & 1) == 1)
06641                         sp_3072_mont_mul_136(r, r, a, m, mp);
06642                 }
06643                 sp_3072_mont_reduce_136(r, m, mp);
06644                 mp = sp_3072_cmp_136(r, m);
06645                 sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1);
06646             }
06647         }
06648     }
06649 
06650     if (err == MP_OKAY) {
06651         sp_3072_to_bin(r, out);
06652         *outLen = 384;
06653     }
06654 
06655 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06656     if (d != NULL)
06657         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06658 #endif
06659 
06660     return err;
06661 #endif /* WOLFSSL_SP_SMALL */
06662 }
06663 
06664 /* RSA private key operation.
06665  *
06666  * in      Array of bytes representing the number to exponentiate, base.
06667  * inLen   Number of bytes in base.
06668  * dm      Private exponent.
06669  * pm      First prime.
06670  * qm      Second prime.
06671  * dpm     First prime's CRT exponent.
06672  * dqm     Second prime's CRT exponent.
06673  * qim     Inverse of second prime mod p.
06674  * mm      Modulus.
06675  * out     Buffer to hold big-endian bytes of exponentiation result.
06676  *         Must be at least 384 bytes long.
06677  * outLen  Number of bytes in result.
06678  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
06679  * an array is too long and MEMORY_E when dynamic memory allocation fails.
06680  */
06681 int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
06682     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
06683     byte* out, word32* outLen)
06684 {
06685 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
06686 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06687     sp_digit* a;
06688     sp_digit* d = NULL;
06689     sp_digit* m;
06690     sp_digit* r;
06691     int err = MP_OKAY;
06692 
06693     (void)pm;
06694     (void)qm;
06695     (void)dpm;
06696     (void)dqm;
06697     (void)qim;
06698 
06699     if (*outLen < 384)
06700         err = MP_TO_E;
06701     if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
06702                                                      mp_count_bits(mm) != 3072))
06703         err = MP_READ_E;
06704 
06705     if (err == MP_OKAY) {
06706         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 4, NULL,
06707                                DYNAMIC_TYPE_TMP_BUFFER);
06708         if (d == NULL)
06709             err = MEMORY_E;
06710     }
06711     if (err == MP_OKAY) {
06712         a = d + 136;
06713         m = a + 136;
06714         r = a;
06715 
06716         sp_3072_from_bin(a, 136, in, inLen);
06717         sp_3072_from_mp(d, 136, dm);
06718         sp_3072_from_mp(m, 136, mm);
06719         err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0);
06720     }
06721     if (err == MP_OKAY) {
06722         sp_3072_to_bin(r, out);
06723         *outLen = 384;
06724     }
06725 
06726     if (d != NULL) {
06727         XMEMSET(d, 0, sizeof(sp_digit) * 136);
06728         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06729     }
06730 
06731     return err;
06732 #else
06733     sp_digit a[272], d[136], m[136];
06734     sp_digit* r = a;
06735     int err = MP_OKAY;
06736 
06737     (void)pm;
06738     (void)qm;
06739     (void)dpm;
06740     (void)dqm;
06741     (void)qim;
06742 
06743     if (*outLen < 384)
06744         err = MP_TO_E;
06745     if (err == MP_OKAY && (mp_count_bits(dm) > 3072 || inLen > 384 ||
06746                                                      mp_count_bits(mm) != 3072))
06747         err = MP_READ_E;
06748 
06749     if (err == MP_OKAY) {
06750         sp_3072_from_bin(a, 136, in, inLen);
06751         sp_3072_from_mp(d, 136, dm);
06752         sp_3072_from_mp(m, 136, mm);
06753         err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0);
06754     }
06755 
06756     if (err == MP_OKAY) {
06757         sp_3072_to_bin(r, out);
06758         *outLen = 384;
06759     }
06760 
06761     XMEMSET(d, 0, sizeof(sp_digit) * 136);
06762 
06763     return err;
06764 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
06765 #else
06766 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06767     sp_digit* t = NULL;
06768     sp_digit* a;
06769     sp_digit* p;
06770     sp_digit* q;
06771     sp_digit* dp;
06772     sp_digit* dq;
06773     sp_digit* qi;
06774     sp_digit* tmp;
06775     sp_digit* tmpa;
06776     sp_digit* tmpb;
06777     sp_digit* r;
06778     int err = MP_OKAY;
06779 
06780     (void)dm;
06781     (void)mm;
06782 
06783     if (*outLen < 384)
06784         err = MP_TO_E;
06785     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
06786         err = MP_READ_E;
06787 
06788     if (err == MP_OKAY) {
06789         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 68 * 11, NULL,
06790                                DYNAMIC_TYPE_TMP_BUFFER);
06791         if (t == NULL)
06792             err = MEMORY_E;
06793     }
06794     if (err == MP_OKAY) {
06795         a = t;
06796         p = a + 136 * 2;
06797         q = p + 68;
06798         qi = dq = dp = q + 68;
06799         tmpa = qi + 68;
06800         tmpb = tmpa + 136;
06801 
06802         tmp = t;
06803         r = tmp + 136;
06804 
06805         sp_3072_from_bin(a, 136, in, inLen);
06806         sp_3072_from_mp(p, 68, pm);
06807         sp_3072_from_mp(q, 68, qm);
06808         sp_3072_from_mp(dp, 68, dpm);
06809         err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1);
06810     }
06811     if (err == MP_OKAY) {
06812         sp_3072_from_mp(dq, 68, dqm);
06813         err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1);
06814     }
06815     if (err == MP_OKAY) {
06816         sp_3072_sub_68(tmpa, tmpa, tmpb);
06817         sp_3072_mask_68(tmp, p, tmpa[67] >> 31);
06818         sp_3072_add_68(tmpa, tmpa, tmp);
06819 
06820         sp_3072_from_mp(qi, 68, qim);
06821         sp_3072_mul_68(tmpa, tmpa, qi);
06822         err = sp_3072_mod_68(tmpa, tmpa, p);
06823     }
06824 
06825     if (err == MP_OKAY) {
06826         sp_3072_mul_68(tmpa, q, tmpa);
06827         sp_3072_add_136(r, tmpb, tmpa);
06828         sp_3072_norm_136(r);
06829 
06830         sp_3072_to_bin(r, out);
06831         *outLen = 384;
06832     }
06833 
06834     if (t != NULL) {
06835         XMEMSET(t, 0, sizeof(sp_digit) * 68 * 11);
06836         XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06837     }
06838 
06839     return err;
06840 #else
06841     sp_digit a[136 * 2];
06842     sp_digit p[68], q[68], dp[68], dq[68], qi[68];
06843     sp_digit tmp[136], tmpa[136], tmpb[136];
06844     sp_digit* r = a;
06845     int err = MP_OKAY;
06846 
06847     (void)dm;
06848     (void)mm;
06849 
06850     if (*outLen < 384)
06851         err = MP_TO_E;
06852     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
06853         err = MP_READ_E;
06854 
06855     if (err == MP_OKAY) {
06856         sp_3072_from_bin(a, 136, in, inLen);
06857         sp_3072_from_mp(p, 68, pm);
06858         sp_3072_from_mp(q, 68, qm);
06859         sp_3072_from_mp(dp, 68, dpm);
06860         sp_3072_from_mp(dq, 68, dqm);
06861         sp_3072_from_mp(qi, 68, qim);
06862 
06863         err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1);
06864     }
06865     if (err == MP_OKAY)
06866         err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1);
06867 
06868     if (err == MP_OKAY) {
06869         sp_3072_sub_68(tmpa, tmpa, tmpb);
06870         sp_3072_mask_68(tmp, p, tmpa[67] >> 31);
06871         sp_3072_add_68(tmpa, tmpa, tmp);
06872         sp_3072_mul_68(tmpa, tmpa, qi);
06873         err = sp_3072_mod_68(tmpa, tmpa, p);
06874     }
06875 
06876     if (err == MP_OKAY) {
06877         sp_3072_mul_68(tmpa, tmpa, q);
06878         sp_3072_add_136(r, tmpb, tmpa);
06879         sp_3072_norm_136(r);
06880 
06881         sp_3072_to_bin(r, out);
06882         *outLen = 384;
06883     }
06884 
06885     XMEMSET(tmpa, 0, sizeof(tmpa));
06886     XMEMSET(tmpb, 0, sizeof(tmpb));
06887     XMEMSET(p, 0, sizeof(p));
06888     XMEMSET(q, 0, sizeof(q));
06889     XMEMSET(dp, 0, sizeof(dp));
06890     XMEMSET(dq, 0, sizeof(dq));
06891     XMEMSET(qi, 0, sizeof(qi));
06892 
06893     return err;
06894 #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */
06895 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
06896 }
06897 
06898 #endif /* WOLFSSL_HAVE_SP_RSA */
06899 #ifdef WOLFSSL_HAVE_SP_DH
06900 /* Convert an array of sp_digit to an mp_int.
06901  *
06902  * a  A single precision integer.
06903  * r  A multi-precision integer.
06904  */
06905 static int sp_3072_to_mp(sp_digit* a, mp_int* r)
06906 {
06907     int err;
06908 
06909     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
06910     if (err == MP_OKAY) {
06911 #if DIGIT_BIT == 23
06912         XMEMCPY(r->dp, a, sizeof(sp_digit) * 136);
06913         r->used = 136;
06914         mp_clamp(r);
06915 #elif DIGIT_BIT < 23
06916         int i, j = 0, s = 0;
06917 
06918         r->dp[0] = 0;
06919         for (i = 0; i < 136; i++) {
06920             r->dp[j] |= a[i] << s;
06921             r->dp[j] &= (1l << DIGIT_BIT) - 1;
06922             s = DIGIT_BIT - s;
06923             r->dp[++j] = a[i] >> s;
06924             while (s + DIGIT_BIT <= 23) {
06925                 s += DIGIT_BIT;
06926                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
06927                 r->dp[++j] = a[i] >> s;
06928             }
06929             s = 23 - s;
06930         }
06931         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
06932         mp_clamp(r);
06933 #else
06934         int i, j = 0, s = 0;
06935 
06936         r->dp[0] = 0;
06937         for (i = 0; i < 136; i++) {
06938             r->dp[j] |= ((mp_digit)a[i]) << s;
06939             if (s + 23 >= DIGIT_BIT) {
06940     #if DIGIT_BIT < 32
06941                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
06942     #endif
06943                 s = DIGIT_BIT - s;
06944                 r->dp[++j] = a[i] >> s;
06945                 s = 23 - s;
06946             }
06947             else
06948                 s += 23;
06949         }
06950         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
06951         mp_clamp(r);
06952 #endif
06953     }
06954 
06955     return err;
06956 }
06957 
06958 /* Perform the modular exponentiation for Diffie-Hellman.
06959  *
06960  * base  Base. MP integer.
06961  * exp   Exponent. MP integer.
06962  * mod   Modulus. MP integer.
06963  * res   Result. MP integer.
06964  * returs 0 on success, MP_READ_E if there are too many bytes in an array
06965  * and MEMORY_E if memory allocation fails.
06966  */
06967 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
06968 {
06969 #ifdef WOLFSSL_SP_SMALL
06970     int err = MP_OKAY;
06971     sp_digit* d = NULL;
06972     sp_digit* b;
06973     sp_digit* e;
06974     sp_digit* m;
06975     sp_digit* r;
06976     int expBits = mp_count_bits(exp);
06977 
06978     if (mp_count_bits(base) > 3072 || expBits > 3072 ||
06979                                                    mp_count_bits(mod) != 3072) {
06980         err = MP_READ_E;
06981     }
06982 
06983     if (err == MP_OKAY) {
06984         d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
06985                                DYNAMIC_TYPE_TMP_BUFFER);
06986         if (d == NULL)
06987             err = MEMORY_E;
06988     }
06989 
06990     if (err == MP_OKAY) {
06991         b = d;
06992         e = b + 136 * 2;
06993         m = e + 136;
06994         r = b;
06995 
06996         sp_3072_from_mp(b, 136, base);
06997         sp_3072_from_mp(e, 136, exp);
06998         sp_3072_from_mp(m, 136, mod);
06999 
07000         err = sp_3072_mod_exp_136(r, b, e, mp_count_bits(exp), m, 0);
07001     }
07002 
07003     if (err == MP_OKAY) {
07004         err = sp_3072_to_mp(r, res);
07005     }
07006 
07007     if (d != NULL) {
07008         XMEMSET(e, 0, sizeof(sp_digit) * 136);
07009         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07010     }
07011     return err;
07012 #else
07013 #ifndef WOLFSSL_SMALL_STACK
07014     sp_digit bd[272], ed[136], md[136];
07015 #else
07016     sp_digit* d = NULL;
07017 #endif
07018     sp_digit* b;
07019     sp_digit* e;
07020     sp_digit* m;
07021     sp_digit* r;
07022     int err = MP_OKAY;
07023     int expBits = mp_count_bits(exp);
07024 
07025     if (mp_count_bits(base) > 3072 || expBits > 3072 ||
07026                                                    mp_count_bits(mod) != 3072) {
07027         err = MP_READ_E;
07028     }
07029 
07030 #ifdef WOLFSSL_SMALL_STACK
07031     if (err == MP_OKAY) {
07032         d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
07033                                DYNAMIC_TYPE_TMP_BUFFER);
07034         if (d == NULL)
07035             err = MEMORY_E;
07036     }
07037 
07038     if (err == MP_OKAY) {
07039         b = d;
07040         e = b + 136 * 2;
07041         m = e + 136;
07042         r = b;
07043     }
07044 #else
07045     r = b = bd;
07046     e = ed;
07047     m = md;
07048 #endif
07049 
07050     if (err == MP_OKAY) {
07051         sp_3072_from_mp(b, 136, base);
07052         sp_3072_from_mp(e, 136, exp);
07053         sp_3072_from_mp(m, 136, mod);
07054 
07055         err = sp_3072_mod_exp_136(r, b, e, expBits, m, 0);
07056     }
07057 
07058     if (err == MP_OKAY) {
07059         err = sp_3072_to_mp(r, res);
07060     }
07061 
07062     XMEMSET(e, 0, sizeof(sp_digit) * 136);
07063 
07064 #ifdef WOLFSSL_SMALL_STACK
07065     if (d != NULL)
07066         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07067 #endif
07068 
07069     return err;
07070 #endif
07071 }
07072 
07073 /* Perform the modular exponentiation for Diffie-Hellman.
07074  *
07075  * base     Base.
07076  * exp      Array of bytes that is the exponent.
07077  * expLen   Length of data, in bytes, in exponent.
07078  * mod      Modulus.
07079  * out      Buffer to hold big-endian bytes of exponentiation result.
07080  *          Must be at least 384 bytes long.
07081  * outLen   Length, in bytes, of exponentiation result.
07082  * returs 0 on success, MP_READ_E if there are too many bytes in an array
07083  * and MEMORY_E if memory allocation fails.
07084  */
07085 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
07086     mp_int* mod, byte* out, word32* outLen)
07087 {
07088 #ifdef WOLFSSL_SP_SMALL
07089     int err = MP_OKAY;
07090     sp_digit* d = NULL;
07091     sp_digit* b;
07092     sp_digit* e;
07093     sp_digit* m;
07094     sp_digit* r;
07095     word32 i;
07096 
07097     if (mp_count_bits(base) > 3072 || expLen > 384 ||
07098                                                    mp_count_bits(mod) != 3072) {
07099         err = MP_READ_E;
07100     }
07101 
07102     if (err == MP_OKAY) {
07103         d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
07104                                DYNAMIC_TYPE_TMP_BUFFER);
07105         if (d == NULL)
07106             err = MEMORY_E;
07107     }
07108 
07109     if (err == MP_OKAY) {
07110         b = d;
07111         e = b + 136 * 2;
07112         m = e + 136;
07113         r = b;
07114 
07115         sp_3072_from_mp(b, 136, base);
07116         sp_3072_from_bin(e, 136, exp, expLen);
07117         sp_3072_from_mp(m, 136, mod);
07118 
07119         err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0);
07120     }
07121 
07122     if (err == MP_OKAY) {
07123         sp_3072_to_bin(r, out);
07124         *outLen = 384;
07125         for (i=0; i<384 && out[i] == 0; i++) {
07126         }
07127         *outLen -= i;
07128         XMEMMOVE(out, out + i, *outLen);
07129     }
07130 
07131     if (d != NULL) {
07132         XMEMSET(e, 0, sizeof(sp_digit) * 136);
07133         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07134     }
07135     return err;
07136 #else
07137 #ifndef WOLFSSL_SMALL_STACK
07138     sp_digit bd[272], ed[136], md[136];
07139 #else
07140     sp_digit* d = NULL;
07141 #endif
07142     sp_digit* b;
07143     sp_digit* e;
07144     sp_digit* m;
07145     sp_digit* r;
07146     word32 i;
07147     int err = MP_OKAY;
07148 
07149     if (mp_count_bits(base) > 3072 || expLen > 384 ||
07150                                                    mp_count_bits(mod) != 3072) {
07151         err = MP_READ_E;
07152     }
07153 
07154 #ifdef WOLFSSL_SMALL_STACK
07155     if (err == MP_OKAY) {
07156         d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL,
07157                                DYNAMIC_TYPE_TMP_BUFFER);
07158         if (d == NULL)
07159             err = MEMORY_E;
07160     }
07161 
07162     if (err == MP_OKAY) {
07163         b = d;
07164         e = b + 136 * 2;
07165         m = e + 136;
07166         r = b;
07167     }
07168 #else
07169     r = b = bd;
07170     e = ed;
07171     m = md;
07172 #endif
07173 
07174     if (err == MP_OKAY) {
07175         sp_3072_from_mp(b, 136, base);
07176         sp_3072_from_bin(e, 136, exp, expLen);
07177         sp_3072_from_mp(m, 136, mod);
07178 
07179         err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0);
07180     }
07181 
07182     if (err == MP_OKAY) {
07183         sp_3072_to_bin(r, out);
07184         *outLen = 384;
07185         for (i=0; i<384 && out[i] == 0; i++) {
07186         }
07187         *outLen -= i;
07188         XMEMMOVE(out, out + i, *outLen);
07189     }
07190 
07191     XMEMSET(e, 0, sizeof(sp_digit) * 136);
07192 
07193 #ifdef WOLFSSL_SMALL_STACK
07194     if (d != NULL)
07195         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07196 #endif
07197 
07198     return err;
07199 #endif
07200 }
07201 
07202 #endif /* WOLFSSL_HAVE_SP_DH */
07203 
07204 #endif /* WOLFSSL_SP_NO_3072 */
07205 
07206 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
07207 #ifdef WOLFSSL_HAVE_SP_ECC
07208 #ifndef WOLFSSL_SP_NO_256
07209 
07210 /* Point structure to use. */
07211 typedef struct sp_point {
07212     sp_digit x[2 * 10];
07213     sp_digit y[2 * 10];
07214     sp_digit z[2 * 10];
07215     int infinity;
07216 } sp_point;
07217 
07218 /* The modulus (prime) of the curve P256. */
07219 static sp_digit p256_mod[10] = {
07220     0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000,
07221     0x0000400,0x3ff0000,0x03fffff
07222 };
07223 #ifndef WOLFSSL_SP_SMALL
07224 /* The Montogmery normalizer for modulus of the curve P256. */
07225 static sp_digit p256_norm_mod[10] = {
07226     0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff,
07227     0x3fffbff,0x000ffff,0x0000000
07228 };
07229 #endif /* WOLFSSL_SP_SMALL */
07230 /* The Montogmery multiplier for modulus of the curve P256. */
07231 static sp_digit p256_mp_mod = 0x000001;
07232 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
07233                                             defined(HAVE_ECC_VERIFY)
07234 /* The order of the curve P256. */
07235 static sp_digit p256_order[10] = {
07236     0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
07237     0x00003ff,0x3ff0000,0x03fffff
07238 };
07239 #endif
07240 /* The order of the curve P256 minus 2. */
07241 static sp_digit p256_order2[10] = {
07242     0x063254f,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
07243     0x00003ff,0x3ff0000,0x03fffff
07244 };
07245 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
07246 /* The Montogmery normalizer for order of the curve P256. */
07247 static sp_digit p256_norm_order[10] = {
07248     0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000,
07249     0x3fffc00,0x000ffff,0x0000000
07250 };
07251 #endif
07252 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
07253 /* The Montogmery multiplier for order of the curve P256. */
07254 static sp_digit p256_mp_order = 0x200bc4f;
07255 #endif
07256 /* The base point of curve P256. */
07257 static sp_point p256_base = {
07258     /* X ordinate */
07259     {
07260         0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56,
07261         0x1091fe2,0x1f2e12c,0x01ac5f4
07262     },
07263     /* Y ordinate */
07264     {
07265         0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7,
07266         0x1fe6e3b,0x2e2fe1a,0x013f8d0
07267     },
07268     /* Z ordinate */
07269     {
07270         0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
07271         0x0000000,0x0000000,0x0000000
07272     },
07273     /* infinity */
07274     0
07275 };
07276 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
07277 static sp_digit p256_b[10] = {
07278     0x3d2604b,0x38f0f89,0x30f63bc,0x2c3314e,0x0651d06,0x1a621af,0x2bbd557,
07279     0x24f9ecf,0x1d8aa3a,0x016b18d
07280 };
07281 #endif
07282 
07283 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07284 /* Allocate memory for point and return error. */
07285 #define sp_ecc_point_new(heap, sp, p)                                   \
07286     ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
07287         MEMORY_E : MP_OKAY
07288 #else
07289 /* Set pointer to data and return no error. */
07290 #define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
07291 #endif
07292 
07293 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07294 /* If valid pointer then clear point data if requested and free data. */
07295 #define sp_ecc_point_free(p, clear, heap)     \
07296     do {                                      \
07297         if (p != NULL) {                      \
07298             if (clear)                        \
07299                 XMEMSET(p, 0, sizeof(*p));    \
07300             XFREE(p, heap, DYNAMIC_TYPE_ECC); \
07301         }                                     \
07302     }                                         \
07303     while (0)
07304 #else
07305 /* Clear point data if requested. */
07306 #define sp_ecc_point_free(p, clear, heap) \
07307     do {                                  \
07308         if (clear)                        \
07309             XMEMSET(p, 0, sizeof(*p));    \
07310     }                                     \
07311     while (0)
07312 #endif
07313 
07314 /* Multiply a number by Montogmery normalizer mod modulus (prime).
07315  *
07316  * r  The resulting Montgomery form number.
07317  * a  The number to convert.
07318  * m  The modulus (prime).
07319  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
07320  */
07321 static int sp_256_mod_mul_norm_10(sp_digit* r, sp_digit* a, sp_digit* m)
07322 {
07323 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07324     int64_t* td;
07325 #else
07326     int64_t td[8];
07327     int64_t a32d[8];
07328 #endif
07329     int64_t* t;
07330     int64_t* a32;
07331     int64_t o;
07332     int err = MP_OKAY;
07333 
07334     (void)m;
07335 
07336 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07337     td = XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
07338     if (td != NULL) {
07339         t = td;
07340         a32 = td + 8;
07341     }
07342     else
07343         err = MEMORY_E;
07344 #else
07345     t = td;
07346     a32 = a32d;
07347 #endif
07348 
07349     if (err == MP_OKAY) {
07350         a32[0] = a[0];
07351         a32[0] |= a[1] << 26;
07352         a32[0] &= 0xffffffff;
07353         a32[1] = (sp_digit)(a[1] >> 6);
07354         a32[1] |= a[2] << 20;
07355         a32[1] &= 0xffffffff;
07356         a32[2] = (sp_digit)(a[2] >> 12);
07357         a32[2] |= a[3] << 14;
07358         a32[2] &= 0xffffffff;
07359         a32[3] = (sp_digit)(a[3] >> 18);
07360         a32[3] |= a[4] << 8;
07361         a32[3] &= 0xffffffff;
07362         a32[4] = (sp_digit)(a[4] >> 24);
07363         a32[4] |= a[5] << 2;
07364         a32[4] |= a[6] << 28;
07365         a32[4] &= 0xffffffff;
07366         a32[5] = (sp_digit)(a[6] >> 4);
07367         a32[5] |= a[7] << 22;
07368         a32[5] &= 0xffffffff;
07369         a32[6] = (sp_digit)(a[7] >> 10);
07370         a32[6] |= a[8] << 16;
07371         a32[6] &= 0xffffffff;
07372         a32[7] = (sp_digit)(a[8] >> 16);
07373         a32[7] |= a[9] << 10;
07374         a32[7] &= 0xffffffff;
07375 
07376         /*  1  1  0 -1 -1 -1 -1  0 */
07377         t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
07378         /*  0  1  1  0 -1 -1 -1 -1 */
07379         t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
07380         /*  0  0  1  1  0 -1 -1 -1 */
07381         t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
07382         /* -1 -1  0  2  2  1  0 -1 */
07383         t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
07384         /*  0 -1 -1  0  2  2  1  0 */
07385         t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
07386         /*  0  0 -1 -1  0  2  2  1 */
07387         t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
07388         /* -1 -1  0  0  0  1  3  2 */
07389         t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
07390         /*  1  0 -1 -1 -1 -1  0  3 */
07391         t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
07392 
07393         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
07394         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
07395         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
07396         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
07397         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
07398         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
07399         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
07400         o     = t[7] >> 32; t[7] &= 0xffffffff;
07401         t[0] += o;
07402         t[3] -= o;
07403         t[6] -= o;
07404         t[7] += o;
07405         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
07406         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
07407         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
07408         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
07409         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
07410         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
07411         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
07412 
07413         r[0] = (sp_digit)(t[0]) & 0x3ffffff;
07414         r[1] = (sp_digit)(t[0] >> 26);
07415         r[1] |= t[1] << 6;
07416         r[1] &= 0x3ffffff;
07417         r[2] = (sp_digit)(t[1] >> 20);
07418         r[2] |= t[2] << 12;
07419         r[2] &= 0x3ffffff;
07420         r[3] = (sp_digit)(t[2] >> 14);
07421         r[3] |= t[3] << 18;
07422         r[3] &= 0x3ffffff;
07423         r[4] = (sp_digit)(t[3] >> 8);
07424         r[4] |= t[4] << 24;
07425         r[4] &= 0x3ffffff;
07426         r[5] = (sp_digit)(t[4] >> 2) & 0x3ffffff;
07427         r[6] = (sp_digit)(t[4] >> 28);
07428         r[6] |= t[5] << 4;
07429         r[6] &= 0x3ffffff;
07430         r[7] = (sp_digit)(t[5] >> 22);
07431         r[7] |= t[6] << 10;
07432         r[7] &= 0x3ffffff;
07433         r[8] = (sp_digit)(t[6] >> 16);
07434         r[8] |= t[7] << 16;
07435         r[8] &= 0x3ffffff;
07436         r[9] = (sp_digit)(t[7] >> 10);
07437     }
07438 
07439 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07440     if (td != NULL)
07441         XFREE(td, NULL, DYNAMIC_TYPE_ECC);
07442 #endif
07443 
07444     return err;
07445 }
07446 
07447 /* Convert an mp_int to an array of sp_digit.
07448  *
07449  * r  A single precision integer.
07450  * a  A multi-precision integer.
07451  */
07452 static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
07453 {
07454 #if DIGIT_BIT == 26
07455     int j;
07456 
07457     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
07458 
07459     for (j = a->used; j < max; j++)
07460         r[j] = 0;
07461 #elif DIGIT_BIT > 26
07462     int i, j = 0, s = 0;
07463 
07464     r[0] = 0;
07465     for (i = 0; i < a->used && j < max; i++) {
07466         r[j] |= a->dp[i] << s;
07467         r[j] &= 0x3ffffff;
07468         s = 26 - s;
07469         if (j + 1 >= max)
07470             break;
07471         r[++j] = a->dp[i] >> s;
07472         while (s + 26 <= DIGIT_BIT) {
07473             s += 26;
07474             r[j] &= 0x3ffffff;
07475             if (j + 1 >= max)
07476                 break;
07477             if (s < DIGIT_BIT)
07478                 r[++j] = a->dp[i] >> s;
07479             else
07480                 r[++j] = 0;
07481         }
07482         s = DIGIT_BIT - s;
07483     }
07484 
07485     for (j++; j < max; j++)
07486         r[j] = 0;
07487 #else
07488     int i, j = 0, s = 0;
07489 
07490     r[0] = 0;
07491     for (i = 0; i < a->used && j < max; i++) {
07492         r[j] |= ((sp_digit)a->dp[i]) << s;
07493         if (s + DIGIT_BIT >= 26) {
07494             r[j] &= 0x3ffffff;
07495             if (j + 1 >= max)
07496                 break;
07497             s = 26 - s;
07498             if (s == DIGIT_BIT) {
07499                 r[++j] = 0;
07500                 s = 0;
07501             }
07502             else {
07503                 r[++j] = a->dp[i] >> s;
07504                 s = DIGIT_BIT - s;
07505             }
07506         }
07507         else
07508             s += DIGIT_BIT;
07509     }
07510 
07511     for (j++; j < max; j++)
07512         r[j] = 0;
07513 #endif
07514 }
07515 
07516 /* Convert a point of type ecc_point to type sp_point.
07517  *
07518  * p   Point of type sp_point (result).
07519  * pm  Point of type ecc_point.
07520  */
07521 static void sp_256_point_from_ecc_point_10(sp_point* p, ecc_point* pm)
07522 {
07523     XMEMSET(p->x, 0, sizeof(p->x));
07524     XMEMSET(p->y, 0, sizeof(p->y));
07525     XMEMSET(p->z, 0, sizeof(p->z));
07526     sp_256_from_mp(p->x, 10, pm->x);
07527     sp_256_from_mp(p->y, 10, pm->y);
07528     sp_256_from_mp(p->z, 10, pm->z);
07529     p->infinity = 0;
07530 }
07531 
07532 /* Convert an array of sp_digit to an mp_int.
07533  *
07534  * a  A single precision integer.
07535  * r  A multi-precision integer.
07536  */
07537 static int sp_256_to_mp(sp_digit* a, mp_int* r)
07538 {
07539     int err;
07540 
07541     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
07542     if (err == MP_OKAY) {
07543 #if DIGIT_BIT == 26
07544         XMEMCPY(r->dp, a, sizeof(sp_digit) * 10);
07545         r->used = 10;
07546         mp_clamp(r);
07547 #elif DIGIT_BIT < 26
07548         int i, j = 0, s = 0;
07549 
07550         r->dp[0] = 0;
07551         for (i = 0; i < 10; i++) {
07552             r->dp[j] |= a[i] << s;
07553             r->dp[j] &= (1l << DIGIT_BIT) - 1;
07554             s = DIGIT_BIT - s;
07555             r->dp[++j] = a[i] >> s;
07556             while (s + DIGIT_BIT <= 26) {
07557                 s += DIGIT_BIT;
07558                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
07559                 r->dp[++j] = a[i] >> s;
07560             }
07561             s = 26 - s;
07562         }
07563         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
07564         mp_clamp(r);
07565 #else
07566         int i, j = 0, s = 0;
07567 
07568         r->dp[0] = 0;
07569         for (i = 0; i < 10; i++) {
07570             r->dp[j] |= ((mp_digit)a[i]) << s;
07571             if (s + 26 >= DIGIT_BIT) {
07572     #if DIGIT_BIT < 32
07573                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
07574     #endif
07575                 s = DIGIT_BIT - s;
07576                 r->dp[++j] = a[i] >> s;
07577                 s = 26 - s;
07578             }
07579             else
07580                 s += 26;
07581         }
07582         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
07583         mp_clamp(r);
07584 #endif
07585     }
07586 
07587     return err;
07588 }
07589 
07590 /* Convert a point of type sp_point to type ecc_point.
07591  *
07592  * p   Point of type sp_point.
07593  * pm  Point of type ecc_point (result).
07594  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
07595  * MP_OKAY.
07596  */
07597 static int sp_256_point_to_ecc_point_10(sp_point* p, ecc_point* pm)
07598 {
07599     int err;
07600 
07601     err = sp_256_to_mp(p->x, pm->x);
07602     if (err == MP_OKAY)
07603         err = sp_256_to_mp(p->y, pm->y);
07604     if (err == MP_OKAY)
07605         err = sp_256_to_mp(p->z, pm->z);
07606 
07607     return err;
07608 }
07609 
07610 /* Compare a with b in constant time.
07611  *
07612  * a  A single precision integer.
07613  * b  A single precision integer.
07614  * return -ve, 0 or +ve if a is less than, equal to or greater than b
07615  * respectively.
07616  */
07617 static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
07618 {
07619     sp_digit r = 0;
07620 #ifdef WOLFSSL_SP_SMALL
07621     int i;
07622 
07623     for (i=9; i>=0; i--)
07624         r |= (a[i] - b[i]) & (0 - !r);
07625 #else
07626     r |= (a[ 9] - b[ 9]) & (0 - !r);
07627     r |= (a[ 8] - b[ 8]) & (0 - !r);
07628     r |= (a[ 7] - b[ 7]) & (0 - !r);
07629     r |= (a[ 6] - b[ 6]) & (0 - !r);
07630     r |= (a[ 5] - b[ 5]) & (0 - !r);
07631     r |= (a[ 4] - b[ 4]) & (0 - !r);
07632     r |= (a[ 3] - b[ 3]) & (0 - !r);
07633     r |= (a[ 2] - b[ 2]) & (0 - !r);
07634     r |= (a[ 1] - b[ 1]) & (0 - !r);
07635     r |= (a[ 0] - b[ 0]) & (0 - !r);
07636 #endif /* WOLFSSL_SP_SMALL */
07637 
07638     return r;
07639 }
07640 
07641 /* Normalize the values in each word to 26.
07642  *
07643  * a  Array of sp_digit to normalize.
07644  */
07645 static void sp_256_norm_10(sp_digit* a)
07646 {
07647 #ifdef WOLFSSL_SP_SMALL
07648     int i;
07649     for (i = 0; i < 9; i++) {
07650         a[i+1] += a[i] >> 26;
07651         a[i] &= 0x3ffffff;
07652     }
07653 #else
07654     a[1] += a[0] >> 26; a[0] &= 0x3ffffff;
07655     a[2] += a[1] >> 26; a[1] &= 0x3ffffff;
07656     a[3] += a[2] >> 26; a[2] &= 0x3ffffff;
07657     a[4] += a[3] >> 26; a[3] &= 0x3ffffff;
07658     a[5] += a[4] >> 26; a[4] &= 0x3ffffff;
07659     a[6] += a[5] >> 26; a[5] &= 0x3ffffff;
07660     a[7] += a[6] >> 26; a[6] &= 0x3ffffff;
07661     a[8] += a[7] >> 26; a[7] &= 0x3ffffff;
07662     a[9] += a[8] >> 26; a[8] &= 0x3ffffff;
07663 #endif
07664 }
07665 
07666 /* Conditionally subtract b from a using the mask m.
07667  * m is -1 to subtract and 0 when not.
07668  *
07669  * r  A single precision number representing condition subtract result.
07670  * a  A single precision number to subtract from.
07671  * b  A single precision number to subtract.
07672  * m  Mask value to apply.
07673  */
07674 static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
07675         const sp_digit* b, const sp_digit m)
07676 {
07677 #ifdef WOLFSSL_SP_SMALL
07678     int i;
07679 
07680     for (i = 0; i < 10; i++)
07681         r[i] = a[i] - (b[i] & m);
07682 #else
07683     r[ 0] = a[ 0] - (b[ 0] & m);
07684     r[ 1] = a[ 1] - (b[ 1] & m);
07685     r[ 2] = a[ 2] - (b[ 2] & m);
07686     r[ 3] = a[ 3] - (b[ 3] & m);
07687     r[ 4] = a[ 4] - (b[ 4] & m);
07688     r[ 5] = a[ 5] - (b[ 5] & m);
07689     r[ 6] = a[ 6] - (b[ 6] & m);
07690     r[ 7] = a[ 7] - (b[ 7] & m);
07691     r[ 8] = a[ 8] - (b[ 8] & m);
07692     r[ 9] = a[ 9] - (b[ 9] & m);
07693 #endif /* WOLFSSL_SP_SMALL */
07694 }
07695 
07696 /* Mul a by scalar b and add into r. (r += a * b)
07697  *
07698  * r  A single precision integer.
07699  * a  A single precision integer.
07700  * b  A scalar.
07701  */
07702 SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
07703         const sp_digit b)
07704 {
07705 #ifdef WOLFSSL_SP_SMALL
07706     int64_t tb = b;
07707     int64_t t = 0;
07708     int i;
07709 
07710     for (i = 0; i < 10; i++) {
07711         t += (tb * a[i]) + r[i];
07712         r[i] = t & 0x3ffffff;
07713         t >>= 26;
07714     }
07715     r[10] += t;
07716 #else
07717     int64_t tb = b;
07718     int64_t t[10];
07719 
07720     t[ 0] = tb * a[ 0];
07721     t[ 1] = tb * a[ 1];
07722     t[ 2] = tb * a[ 2];
07723     t[ 3] = tb * a[ 3];
07724     t[ 4] = tb * a[ 4];
07725     t[ 5] = tb * a[ 5];
07726     t[ 6] = tb * a[ 6];
07727     t[ 7] = tb * a[ 7];
07728     t[ 8] = tb * a[ 8];
07729     t[ 9] = tb * a[ 9];
07730     r[ 0] +=                 (t[ 0] & 0x3ffffff);
07731     r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
07732     r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
07733     r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
07734     r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
07735     r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
07736     r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
07737     r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
07738     r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
07739     r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
07740     r[10] +=  t[ 9] >> 26;
07741 #endif /* WOLFSSL_SP_SMALL */
07742 }
07743 
07744 /* Shift the result in the high 256 bits down to the bottom.
07745  *
07746  * r  A single precision number.
07747  * a  A single precision number.
07748  */
07749 static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
07750 {
07751 #ifdef WOLFSSL_SP_SMALL
07752     int i;
07753     sp_digit n, s;
07754 
07755     s = a[10];
07756     n = a[9] >> 22;
07757     for (i = 0; i < 9; i++) {
07758         n += (s & 0x3ffffff) << 4;
07759         r[i] = n & 0x3ffffff;
07760         n >>= 26;
07761         s = a[11 + i] + (s >> 26);
07762     }
07763     n += s << 4;
07764     r[9] = n;
07765 #else
07766     sp_digit n, s;
07767 
07768     s = a[10]; n = a[9] >> 22;
07769     n += (s & 0x3ffffff) << 4; r[ 0] = n & 0x3ffffff;
07770     n >>= 26; s = a[11] + (s >> 26);
07771     n += (s & 0x3ffffff) << 4; r[ 1] = n & 0x3ffffff;
07772     n >>= 26; s = a[12] + (s >> 26);
07773     n += (s & 0x3ffffff) << 4; r[ 2] = n & 0x3ffffff;
07774     n >>= 26; s = a[13] + (s >> 26);
07775     n += (s & 0x3ffffff) << 4; r[ 3] = n & 0x3ffffff;
07776     n >>= 26; s = a[14] + (s >> 26);
07777     n += (s & 0x3ffffff) << 4; r[ 4] = n & 0x3ffffff;
07778     n >>= 26; s = a[15] + (s >> 26);
07779     n += (s & 0x3ffffff) << 4; r[ 5] = n & 0x3ffffff;
07780     n >>= 26; s = a[16] + (s >> 26);
07781     n += (s & 0x3ffffff) << 4; r[ 6] = n & 0x3ffffff;
07782     n >>= 26; s = a[17] + (s >> 26);
07783     n += (s & 0x3ffffff) << 4; r[ 7] = n & 0x3ffffff;
07784     n >>= 26; s = a[18] + (s >> 26);
07785     n += (s & 0x3ffffff) << 4; r[ 8] = n & 0x3ffffff;
07786     n >>= 26; s = a[19] + (s >> 26);
07787     n += s << 4;              r[ 9] = n;
07788 #endif /* WOLFSSL_SP_SMALL */
07789     XMEMSET(&r[10], 0, sizeof(*r) * 10);
07790 }
07791 
07792 /* Reduce the number back to 256 bits using Montgomery reduction.
07793  *
07794  * a   A single precision number to reduce in place.
07795  * m   The single precision number representing the modulus.
07796  * mp  The digit representing the negative inverse of m mod 2^n.
07797  */
07798 static void sp_256_mont_reduce_10(sp_digit* a, sp_digit* m, sp_digit mp)
07799 {
07800     int i;
07801     sp_digit mu;
07802 
07803     if (mp != 1) {
07804         for (i=0; i<9; i++) {
07805             mu = (a[i] * mp) & 0x3ffffff;
07806             sp_256_mul_add_10(a+i, m, mu);
07807             a[i+1] += a[i] >> 26;
07808         }
07809         mu = (a[i] * mp) & 0x3fffffl;
07810         sp_256_mul_add_10(a+i, m, mu);
07811         a[i+1] += a[i] >> 26;
07812         a[i] &= 0x3ffffff;
07813     }
07814     else {
07815         for (i=0; i<9; i++) {
07816             mu = a[i] & 0x3ffffff;
07817             sp_256_mul_add_10(a+i, p256_mod, mu);
07818             a[i+1] += a[i] >> 26;
07819         }
07820         mu = a[i] & 0x3fffffl;
07821         sp_256_mul_add_10(a+i, p256_mod, mu);
07822         a[i+1] += a[i] >> 26;
07823         a[i] &= 0x3ffffff;
07824     }
07825 
07826     sp_256_mont_shift_10(a, a);
07827     sp_256_cond_sub_10(a, a, m, 0 - ((a[9] >> 22) > 0));
07828     sp_256_norm_10(a);
07829 }
07830 
07831 #ifdef WOLFSSL_SP_SMALL
07832 /* Multiply a and b into r. (r = a * b)
07833  *
07834  * r  A single precision integer.
07835  * a  A single precision integer.
07836  * b  A single precision integer.
07837  */
07838 SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a,
07839     const sp_digit* b)
07840 {
07841     int i, j, k;
07842     int64_t c;
07843 
07844     c = ((int64_t)a[9]) * b[9];
07845     r[19] = (sp_digit)(c >> 26);
07846     c = (c & 0x3ffffff) << 26;
07847     for (k = 17; k >= 0; k--) {
07848         for (i = 9; i >= 0; i--) {
07849             j = k - i;
07850             if (j >= 10)
07851                 break;
07852             if (j < 0)
07853                 continue;
07854 
07855             c += ((int64_t)a[i]) * b[j];
07856         }
07857         r[k + 2] += c >> 52;
07858         r[k + 1] = (c >> 26) & 0x3ffffff;
07859         c = (c & 0x3ffffff) << 26;
07860     }
07861     r[0] = (sp_digit)(c >> 26);
07862 }
07863 
07864 #else
07865 /* Multiply a and b into r. (r = a * b)
07866  *
07867  * r  A single precision integer.
07868  * a  A single precision integer.
07869  * b  A single precision integer.
07870  */
07871 SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a,
07872     const sp_digit* b)
07873 {
07874     int64_t t0   = ((int64_t)a[ 0]) * b[ 0];
07875     int64_t t1   = ((int64_t)a[ 0]) * b[ 1]
07876                  + ((int64_t)a[ 1]) * b[ 0];
07877     int64_t t2   = ((int64_t)a[ 0]) * b[ 2]
07878                  + ((int64_t)a[ 1]) * b[ 1]
07879                  + ((int64_t)a[ 2]) * b[ 0];
07880     int64_t t3   = ((int64_t)a[ 0]) * b[ 3]
07881                  + ((int64_t)a[ 1]) * b[ 2]
07882                  + ((int64_t)a[ 2]) * b[ 1]
07883                  + ((int64_t)a[ 3]) * b[ 0];
07884     int64_t t4   = ((int64_t)a[ 0]) * b[ 4]
07885                  + ((int64_t)a[ 1]) * b[ 3]
07886                  + ((int64_t)a[ 2]) * b[ 2]
07887                  + ((int64_t)a[ 3]) * b[ 1]
07888                  + ((int64_t)a[ 4]) * b[ 0];
07889     int64_t t5   = ((int64_t)a[ 0]) * b[ 5]
07890                  + ((int64_t)a[ 1]) * b[ 4]
07891                  + ((int64_t)a[ 2]) * b[ 3]
07892                  + ((int64_t)a[ 3]) * b[ 2]
07893                  + ((int64_t)a[ 4]) * b[ 1]
07894                  + ((int64_t)a[ 5]) * b[ 0];
07895     int64_t t6   = ((int64_t)a[ 0]) * b[ 6]
07896                  + ((int64_t)a[ 1]) * b[ 5]
07897                  + ((int64_t)a[ 2]) * b[ 4]
07898                  + ((int64_t)a[ 3]) * b[ 3]
07899                  + ((int64_t)a[ 4]) * b[ 2]
07900                  + ((int64_t)a[ 5]) * b[ 1]
07901                  + ((int64_t)a[ 6]) * b[ 0];
07902     int64_t t7   = ((int64_t)a[ 0]) * b[ 7]
07903                  + ((int64_t)a[ 1]) * b[ 6]
07904                  + ((int64_t)a[ 2]) * b[ 5]
07905                  + ((int64_t)a[ 3]) * b[ 4]
07906                  + ((int64_t)a[ 4]) * b[ 3]
07907                  + ((int64_t)a[ 5]) * b[ 2]
07908                  + ((int64_t)a[ 6]) * b[ 1]
07909                  + ((int64_t)a[ 7]) * b[ 0];
07910     int64_t t8   = ((int64_t)a[ 0]) * b[ 8]
07911                  + ((int64_t)a[ 1]) * b[ 7]
07912                  + ((int64_t)a[ 2]) * b[ 6]
07913                  + ((int64_t)a[ 3]) * b[ 5]
07914                  + ((int64_t)a[ 4]) * b[ 4]
07915                  + ((int64_t)a[ 5]) * b[ 3]
07916                  + ((int64_t)a[ 6]) * b[ 2]
07917                  + ((int64_t)a[ 7]) * b[ 1]
07918                  + ((int64_t)a[ 8]) * b[ 0];
07919     int64_t t9   = ((int64_t)a[ 0]) * b[ 9]
07920                  + ((int64_t)a[ 1]) * b[ 8]
07921                  + ((int64_t)a[ 2]) * b[ 7]
07922                  + ((int64_t)a[ 3]) * b[ 6]
07923                  + ((int64_t)a[ 4]) * b[ 5]
07924                  + ((int64_t)a[ 5]) * b[ 4]
07925                  + ((int64_t)a[ 6]) * b[ 3]
07926                  + ((int64_t)a[ 7]) * b[ 2]
07927                  + ((int64_t)a[ 8]) * b[ 1]
07928                  + ((int64_t)a[ 9]) * b[ 0];
07929     int64_t t10  = ((int64_t)a[ 1]) * b[ 9]
07930                  + ((int64_t)a[ 2]) * b[ 8]
07931                  + ((int64_t)a[ 3]) * b[ 7]
07932                  + ((int64_t)a[ 4]) * b[ 6]
07933                  + ((int64_t)a[ 5]) * b[ 5]
07934                  + ((int64_t)a[ 6]) * b[ 4]
07935                  + ((int64_t)a[ 7]) * b[ 3]
07936                  + ((int64_t)a[ 8]) * b[ 2]
07937                  + ((int64_t)a[ 9]) * b[ 1];
07938     int64_t t11  = ((int64_t)a[ 2]) * b[ 9]
07939                  + ((int64_t)a[ 3]) * b[ 8]
07940                  + ((int64_t)a[ 4]) * b[ 7]
07941                  + ((int64_t)a[ 5]) * b[ 6]
07942                  + ((int64_t)a[ 6]) * b[ 5]
07943                  + ((int64_t)a[ 7]) * b[ 4]
07944                  + ((int64_t)a[ 8]) * b[ 3]
07945                  + ((int64_t)a[ 9]) * b[ 2];
07946     int64_t t12  = ((int64_t)a[ 3]) * b[ 9]
07947                  + ((int64_t)a[ 4]) * b[ 8]
07948                  + ((int64_t)a[ 5]) * b[ 7]
07949                  + ((int64_t)a[ 6]) * b[ 6]
07950                  + ((int64_t)a[ 7]) * b[ 5]
07951                  + ((int64_t)a[ 8]) * b[ 4]
07952                  + ((int64_t)a[ 9]) * b[ 3];
07953     int64_t t13  = ((int64_t)a[ 4]) * b[ 9]
07954                  + ((int64_t)a[ 5]) * b[ 8]
07955                  + ((int64_t)a[ 6]) * b[ 7]
07956                  + ((int64_t)a[ 7]) * b[ 6]
07957                  + ((int64_t)a[ 8]) * b[ 5]
07958                  + ((int64_t)a[ 9]) * b[ 4];
07959     int64_t t14  = ((int64_t)a[ 5]) * b[ 9]
07960                  + ((int64_t)a[ 6]) * b[ 8]
07961                  + ((int64_t)a[ 7]) * b[ 7]
07962                  + ((int64_t)a[ 8]) * b[ 6]
07963                  + ((int64_t)a[ 9]) * b[ 5];
07964     int64_t t15  = ((int64_t)a[ 6]) * b[ 9]
07965                  + ((int64_t)a[ 7]) * b[ 8]
07966                  + ((int64_t)a[ 8]) * b[ 7]
07967                  + ((int64_t)a[ 9]) * b[ 6];
07968     int64_t t16  = ((int64_t)a[ 7]) * b[ 9]
07969                  + ((int64_t)a[ 8]) * b[ 8]
07970                  + ((int64_t)a[ 9]) * b[ 7];
07971     int64_t t17  = ((int64_t)a[ 8]) * b[ 9]
07972                  + ((int64_t)a[ 9]) * b[ 8];
07973     int64_t t18  = ((int64_t)a[ 9]) * b[ 9];
07974 
07975     t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
07976     t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
07977     t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
07978     t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
07979     t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
07980     t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
07981     t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
07982     t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
07983     t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
07984     t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
07985     t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
07986     t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
07987     t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
07988     t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
07989     t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
07990     t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
07991     t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
07992     t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
07993     r[19] = (sp_digit)(t18 >> 26);
07994                        r[18] = t18 & 0x3ffffff;
07995 }
07996 
07997 #endif /* WOLFSSL_SP_SMALL */
07998 /* Multiply two Montogmery form numbers mod the modulus (prime).
07999  * (r = a * b mod m)
08000  *
08001  * r   Result of multiplication.
08002  * a   First number to multiply in Montogmery form.
08003  * b   Second number to multiply in Montogmery form.
08004  * m   Modulus (prime).
08005  * mp  Montogmery mulitplier.
08006  */
08007 static void sp_256_mont_mul_10(sp_digit* r, sp_digit* a, sp_digit* b,
08008         sp_digit* m, sp_digit mp)
08009 {
08010     sp_256_mul_10(r, a, b);
08011     sp_256_mont_reduce_10(r, m, mp);
08012 }
08013 
08014 #ifdef WOLFSSL_SP_SMALL
08015 /* Square a and put result in r. (r = a * a)
08016  *
08017  * r  A single precision integer.
08018  * a  A single precision integer.
08019  */
08020 SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
08021 {
08022     int i, j, k;
08023     int64_t c;
08024 
08025     c = ((int64_t)a[9]) * a[9];
08026     r[19] = (sp_digit)(c >> 26);
08027     c = (c & 0x3ffffff) << 26;
08028     for (k = 17; k >= 0; k--) {
08029         for (i = 9; i >= 0; i--) {
08030             j = k - i;
08031             if (j >= 10 || i <= j)
08032                 break;
08033             if (j < 0)
08034                 continue;
08035 
08036             c += ((int64_t)a[i]) * a[j] * 2;
08037         }
08038         if (i == j)
08039            c += ((int64_t)a[i]) * a[i];
08040 
08041         r[k + 2] += c >> 52;
08042         r[k + 1] = (c >> 26) & 0x3ffffff;
08043         c = (c & 0x3ffffff) << 26;
08044     }
08045     r[0] = (sp_digit)(c >> 26);
08046 }
08047 
08048 #else
08049 /* Square a and put result in r. (r = a * a)
08050  *
08051  * r  A single precision integer.
08052  * a  A single precision integer.
08053  */
08054 SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
08055 {
08056     int64_t t0   =  ((int64_t)a[ 0]) * a[ 0];
08057     int64_t t1   = (((int64_t)a[ 0]) * a[ 1]) * 2;
08058     int64_t t2   = (((int64_t)a[ 0]) * a[ 2]) * 2
08059                  +  ((int64_t)a[ 1]) * a[ 1];
08060     int64_t t3   = (((int64_t)a[ 0]) * a[ 3]
08061                  +  ((int64_t)a[ 1]) * a[ 2]) * 2;
08062     int64_t t4   = (((int64_t)a[ 0]) * a[ 4]
08063                  +  ((int64_t)a[ 1]) * a[ 3]) * 2
08064                  +  ((int64_t)a[ 2]) * a[ 2];
08065     int64_t t5   = (((int64_t)a[ 0]) * a[ 5]
08066                  +  ((int64_t)a[ 1]) * a[ 4]
08067                  +  ((int64_t)a[ 2]) * a[ 3]) * 2;
08068     int64_t t6   = (((int64_t)a[ 0]) * a[ 6]
08069                  +  ((int64_t)a[ 1]) * a[ 5]
08070                  +  ((int64_t)a[ 2]) * a[ 4]) * 2
08071                  +  ((int64_t)a[ 3]) * a[ 3];
08072     int64_t t7   = (((int64_t)a[ 0]) * a[ 7]
08073                  +  ((int64_t)a[ 1]) * a[ 6]
08074                  +  ((int64_t)a[ 2]) * a[ 5]
08075                  +  ((int64_t)a[ 3]) * a[ 4]) * 2;
08076     int64_t t8   = (((int64_t)a[ 0]) * a[ 8]
08077                  +  ((int64_t)a[ 1]) * a[ 7]
08078                  +  ((int64_t)a[ 2]) * a[ 6]
08079                  +  ((int64_t)a[ 3]) * a[ 5]) * 2
08080                  +  ((int64_t)a[ 4]) * a[ 4];
08081     int64_t t9   = (((int64_t)a[ 0]) * a[ 9]
08082                  +  ((int64_t)a[ 1]) * a[ 8]
08083                  +  ((int64_t)a[ 2]) * a[ 7]
08084                  +  ((int64_t)a[ 3]) * a[ 6]
08085                  +  ((int64_t)a[ 4]) * a[ 5]) * 2;
08086     int64_t t10  = (((int64_t)a[ 1]) * a[ 9]
08087                  +  ((int64_t)a[ 2]) * a[ 8]
08088                  +  ((int64_t)a[ 3]) * a[ 7]
08089                  +  ((int64_t)a[ 4]) * a[ 6]) * 2
08090                  +  ((int64_t)a[ 5]) * a[ 5];
08091     int64_t t11  = (((int64_t)a[ 2]) * a[ 9]
08092                  +  ((int64_t)a[ 3]) * a[ 8]
08093                  +  ((int64_t)a[ 4]) * a[ 7]
08094                  +  ((int64_t)a[ 5]) * a[ 6]) * 2;
08095     int64_t t12  = (((int64_t)a[ 3]) * a[ 9]
08096                  +  ((int64_t)a[ 4]) * a[ 8]
08097                  +  ((int64_t)a[ 5]) * a[ 7]) * 2
08098                  +  ((int64_t)a[ 6]) * a[ 6];
08099     int64_t t13  = (((int64_t)a[ 4]) * a[ 9]
08100                  +  ((int64_t)a[ 5]) * a[ 8]
08101                  +  ((int64_t)a[ 6]) * a[ 7]) * 2;
08102     int64_t t14  = (((int64_t)a[ 5]) * a[ 9]
08103                  +  ((int64_t)a[ 6]) * a[ 8]) * 2
08104                  +  ((int64_t)a[ 7]) * a[ 7];
08105     int64_t t15  = (((int64_t)a[ 6]) * a[ 9]
08106                  +  ((int64_t)a[ 7]) * a[ 8]) * 2;
08107     int64_t t16  = (((int64_t)a[ 7]) * a[ 9]) * 2
08108                  +  ((int64_t)a[ 8]) * a[ 8];
08109     int64_t t17  = (((int64_t)a[ 8]) * a[ 9]) * 2;
08110     int64_t t18  =  ((int64_t)a[ 9]) * a[ 9];
08111 
08112     t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
08113     t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
08114     t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
08115     t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
08116     t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
08117     t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
08118     t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
08119     t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
08120     t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
08121     t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
08122     t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
08123     t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
08124     t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
08125     t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
08126     t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
08127     t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
08128     t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
08129     t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
08130     r[19] = (sp_digit)(t18 >> 26);
08131                        r[18] = t18 & 0x3ffffff;
08132 }
08133 
08134 #endif /* WOLFSSL_SP_SMALL */
08135 /* Square the Montgomery form number. (r = a * a mod m)
08136  *
08137  * r   Result of squaring.
08138  * a   Number to square in Montogmery form.
08139  * m   Modulus (prime).
08140  * mp  Montogmery mulitplier.
08141  */
08142 static void sp_256_mont_sqr_10(sp_digit* r, sp_digit* a, sp_digit* m,
08143         sp_digit mp)
08144 {
08145     sp_256_sqr_10(r, a);
08146     sp_256_mont_reduce_10(r, m, mp);
08147 }
08148 
08149 #ifndef WOLFSSL_SP_SMALL
08150 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
08151  *
08152  * r   Result of squaring.
08153  * a   Number to square in Montogmery form.
08154  * n   Number of times to square.
08155  * m   Modulus (prime).
08156  * mp  Montogmery mulitplier.
08157  */
08158 static void sp_256_mont_sqr_n_10(sp_digit* r, sp_digit* a, int n,
08159         sp_digit* m, sp_digit mp)
08160 {
08161     sp_256_mont_sqr_10(r, a, m, mp);
08162     for (; n > 1; n--)
08163         sp_256_mont_sqr_10(r, r, m, mp);
08164 }
08165 
08166 #else
08167 /* Mod-2 for the P256 curve. */
08168 static const uint32_t p256_mod_2[8] = {
08169     0xfffffffd,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
08170     0x00000001,0xffffffff
08171 };
08172 #endif /* !WOLFSSL_SP_SMALL */
08173 
08174 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
08175  * P256 curve. (r = 1 / a mod m)
08176  *
08177  * r   Inverse result.
08178  * a   Number to invert.
08179  * td  Temporary data.
08180  */
08181 static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a, sp_digit* td)
08182 {
08183 #ifdef WOLFSSL_SP_SMALL
08184     sp_digit* t = td;
08185     int i;
08186 
08187     XMEMCPY(t, a, sizeof(sp_digit) * 10);
08188     for (i=254; i>=0; i--) {
08189         sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod);
08190         if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))
08191             sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
08192     }
08193     XMEMCPY(r, t, sizeof(sp_digit) * 10);
08194 #else
08195     sp_digit* t = td;
08196     sp_digit* t2 = td + 2 * 10;
08197     sp_digit* t3 = td + 4 * 10;
08198 
08199     /* t = a^2 */
08200     sp_256_mont_sqr_10(t, a, p256_mod, p256_mp_mod);
08201     /* t = a^3 = t * a */
08202     sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
08203     /* t2= a^c = t ^ 2 ^ 2 */
08204     sp_256_mont_sqr_n_10(t2, t, 2, p256_mod, p256_mp_mod);
08205     /* t3= a^d = t2 * a */
08206     sp_256_mont_mul_10(t3, t2, a, p256_mod, p256_mp_mod);
08207     /* t = a^f = t2 * t */
08208     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
08209     /* t2= a^f0 = t ^ 2 ^ 4 */
08210     sp_256_mont_sqr_n_10(t2, t, 4, p256_mod, p256_mp_mod);
08211     /* t3= a^fd = t2 * t3 */
08212     sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
08213     /* t = a^ff = t2 * t */
08214     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
08215     /* t2= a^ff00 = t ^ 2 ^ 8 */
08216     sp_256_mont_sqr_n_10(t2, t, 8, p256_mod, p256_mp_mod);
08217     /* t3= a^fffd = t2 * t3 */
08218     sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
08219     /* t = a^ffff = t2 * t */
08220     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
08221     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
08222     sp_256_mont_sqr_n_10(t2, t, 16, p256_mod, p256_mp_mod);
08223     /* t3= a^fffffffd = t2 * t3 */
08224     sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
08225     /* t = a^ffffffff = t2 * t */
08226     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
08227     /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
08228     sp_256_mont_sqr_n_10(t2, t, 32, p256_mod, p256_mp_mod);
08229     /* t2= a^ffffffffffffffff = t2 * t */
08230     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
08231     /* t2= a^ffffffff00000001 = t2 * a */
08232     sp_256_mont_mul_10(t2, t2, a, p256_mod, p256_mp_mod);
08233     /* t2= a^ffffffff000000010000000000000000000000000000000000000000
08234      *   = t2 ^ 2 ^ 160 */
08235     sp_256_mont_sqr_n_10(t2, t2, 160, p256_mod, p256_mp_mod);
08236     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
08237      *   = t2 * t */
08238     sp_256_mont_mul_10(t2, t2, t, p256_mod, p256_mp_mod);
08239     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
08240      *   = t2 ^ 2 ^ 32 */
08241     sp_256_mont_sqr_n_10(t2, t2, 32, p256_mod, p256_mp_mod);
08242     /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
08243      *   = t2 * t3 */
08244     sp_256_mont_mul_10(r, t2, t3, p256_mod, p256_mp_mod);
08245 #endif /* WOLFSSL_SP_SMALL */
08246 }
08247 
08248 /* Map the Montgomery form projective co-ordinate point to an affine point.
08249  *
08250  * r  Resulting affine co-ordinate point.
08251  * p  Montgomery form projective co-ordinate point.
08252  * t  Temporary ordinate data.
08253  */
08254 static void sp_256_map_10(sp_point* r, sp_point* p, sp_digit* t)
08255 {
08256     sp_digit* t1 = t;
08257     sp_digit* t2 = t + 2*10;
08258     int32_t n;
08259 
08260     sp_256_mont_inv_10(t1, p->z, t + 2*10);
08261 
08262     sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
08263     sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
08264 
08265     /* x /= z^2 */
08266     sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod);
08267     XMEMSET(r->x + 10, 0, sizeof(r->x) / 2);
08268     sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod);
08269     /* Reduce x to less than modulus */
08270     n = sp_256_cmp_10(r->x, p256_mod);
08271     sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - (n >= 0));
08272     sp_256_norm_10(r->x);
08273 
08274     /* y /= z^3 */
08275     sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod);
08276     XMEMSET(r->y + 10, 0, sizeof(r->y) / 2);
08277     sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod);
08278     /* Reduce y to less than modulus */
08279     n = sp_256_cmp_10(r->y, p256_mod);
08280     sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - (n >= 0));
08281     sp_256_norm_10(r->y);
08282 
08283     XMEMSET(r->z, 0, sizeof(r->z));
08284     r->z[0] = 1;
08285 
08286 }
08287 
08288 #ifdef WOLFSSL_SP_SMALL
08289 /* Add b to a into r. (r = a + b)
08290  *
08291  * r  A single precision integer.
08292  * a  A single precision integer.
08293  * b  A single precision integer.
08294  */
08295 SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a,
08296         const sp_digit* b)
08297 {
08298     int i;
08299 
08300     for (i = 0; i < 10; i++)
08301         r[i] = a[i] + b[i];
08302 
08303     return 0;
08304 }
08305 #else
08306 /* Add b to a into r. (r = a + b)
08307  *
08308  * r  A single precision integer.
08309  * a  A single precision integer.
08310  * b  A single precision integer.
08311  */
08312 SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a,
08313         const sp_digit* b)
08314 {
08315     r[ 0] = a[ 0] + b[ 0];
08316     r[ 1] = a[ 1] + b[ 1];
08317     r[ 2] = a[ 2] + b[ 2];
08318     r[ 3] = a[ 3] + b[ 3];
08319     r[ 4] = a[ 4] + b[ 4];
08320     r[ 5] = a[ 5] + b[ 5];
08321     r[ 6] = a[ 6] + b[ 6];
08322     r[ 7] = a[ 7] + b[ 7];
08323     r[ 8] = a[ 8] + b[ 8];
08324     r[ 9] = a[ 9] + b[ 9];
08325 
08326     return 0;
08327 }
08328 
08329 #endif /* WOLFSSL_SP_SMALL */
08330 /* Add two Montgomery form numbers (r = a + b % m).
08331  *
08332  * r   Result of addition.
08333  * a   First number to add in Montogmery form.
08334  * b   Second number to add in Montogmery form.
08335  * m   Modulus (prime).
08336  */
08337 static void sp_256_mont_add_10(sp_digit* r, sp_digit* a, sp_digit* b,
08338         sp_digit* m)
08339 {
08340     sp_256_add_10(r, a, b);
08341     sp_256_norm_10(r);
08342     sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
08343     sp_256_norm_10(r);
08344 }
08345 
08346 /* Double a Montgomery form number (r = a + a % m).
08347  *
08348  * r   Result of doubling.
08349  * a   Number to double in Montogmery form.
08350  * m   Modulus (prime).
08351  */
08352 static void sp_256_mont_dbl_10(sp_digit* r, sp_digit* a, sp_digit* m)
08353 {
08354     sp_256_add_10(r, a, a);
08355     sp_256_norm_10(r);
08356     sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
08357     sp_256_norm_10(r);
08358 }
08359 
08360 /* Triple a Montgomery form number (r = a + a + a % m).
08361  *
08362  * r   Result of Tripling.
08363  * a   Number to triple in Montogmery form.
08364  * m   Modulus (prime).
08365  */
08366 static void sp_256_mont_tpl_10(sp_digit* r, sp_digit* a, sp_digit* m)
08367 {
08368     sp_256_add_10(r, a, a);
08369     sp_256_norm_10(r);
08370     sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
08371     sp_256_norm_10(r);
08372     sp_256_add_10(r, r, a);
08373     sp_256_norm_10(r);
08374     sp_256_cond_sub_10(r, r, m, 0 - ((r[9] >> 22) > 0));
08375     sp_256_norm_10(r);
08376 }
08377 
08378 #ifdef WOLFSSL_SP_SMALL
08379 /* Sub b from a into r. (r = a - b)
08380  *
08381  * r  A single precision integer.
08382  * a  A single precision integer.
08383  * b  A single precision integer.
08384  */
08385 SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a,
08386         const sp_digit* b)
08387 {
08388     int i;
08389 
08390     for (i = 0; i < 10; i++)
08391         r[i] = a[i] - b[i];
08392 
08393     return 0;
08394 }
08395 
08396 #else
08397 /* Sub b from a into r. (r = a - b)
08398  *
08399  * r  A single precision integer.
08400  * a  A single precision integer.
08401  * b  A single precision integer.
08402  */
08403 SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a,
08404         const sp_digit* b)
08405 {
08406     r[ 0] = a[ 0] - b[ 0];
08407     r[ 1] = a[ 1] - b[ 1];
08408     r[ 2] = a[ 2] - b[ 2];
08409     r[ 3] = a[ 3] - b[ 3];
08410     r[ 4] = a[ 4] - b[ 4];
08411     r[ 5] = a[ 5] - b[ 5];
08412     r[ 6] = a[ 6] - b[ 6];
08413     r[ 7] = a[ 7] - b[ 7];
08414     r[ 8] = a[ 8] - b[ 8];
08415     r[ 9] = a[ 9] - b[ 9];
08416 
08417     return 0;
08418 }
08419 
08420 #endif /* WOLFSSL_SP_SMALL */
08421 /* Conditionally add a and b using the mask m.
08422  * m is -1 to add and 0 when not.
08423  *
08424  * r  A single precision number representing conditional add result.
08425  * a  A single precision number to add with.
08426  * b  A single precision number to add.
08427  * m  Mask value to apply.
08428  */
08429 static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a,
08430         const sp_digit* b, const sp_digit m)
08431 {
08432 #ifdef WOLFSSL_SP_SMALL
08433     int i;
08434 
08435     for (i = 0; i < 10; i++)
08436         r[i] = a[i] + (b[i] & m);
08437 #else
08438     r[ 0] = a[ 0] + (b[ 0] & m);
08439     r[ 1] = a[ 1] + (b[ 1] & m);
08440     r[ 2] = a[ 2] + (b[ 2] & m);
08441     r[ 3] = a[ 3] + (b[ 3] & m);
08442     r[ 4] = a[ 4] + (b[ 4] & m);
08443     r[ 5] = a[ 5] + (b[ 5] & m);
08444     r[ 6] = a[ 6] + (b[ 6] & m);
08445     r[ 7] = a[ 7] + (b[ 7] & m);
08446     r[ 8] = a[ 8] + (b[ 8] & m);
08447     r[ 9] = a[ 9] + (b[ 9] & m);
08448 #endif /* WOLFSSL_SP_SMALL */
08449 }
08450 
08451 /* Subtract two Montgomery form numbers (r = a - b % m).
08452  *
08453  * r   Result of subtration.
08454  * a   Number to subtract from in Montogmery form.
08455  * b   Number to subtract with in Montogmery form.
08456  * m   Modulus (prime).
08457  */
08458 static void sp_256_mont_sub_10(sp_digit* r, sp_digit* a, sp_digit* b,
08459         sp_digit* m)
08460 {
08461     sp_256_sub_10(r, a, b);
08462     sp_256_cond_add_10(r, r, m, r[9] >> 22);
08463     sp_256_norm_10(r);
08464 }
08465 
08466 /* Shift number left one bit.
08467  * Bottom bit is lost.
08468  *
08469  * r  Result of shift.
08470  * a  Number to shift.
08471  */
08472 SP_NOINLINE static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
08473 {
08474 #ifdef WOLFSSL_SP_SMALL
08475     int i;
08476 
08477     for (i=0; i<9; i++)
08478         r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
08479 #else
08480     r[0] = ((a[0] >> 1) | (a[1] << 25)) & 0x3ffffff;
08481     r[1] = ((a[1] >> 1) | (a[2] << 25)) & 0x3ffffff;
08482     r[2] = ((a[2] >> 1) | (a[3] << 25)) & 0x3ffffff;
08483     r[3] = ((a[3] >> 1) | (a[4] << 25)) & 0x3ffffff;
08484     r[4] = ((a[4] >> 1) | (a[5] << 25)) & 0x3ffffff;
08485     r[5] = ((a[5] >> 1) | (a[6] << 25)) & 0x3ffffff;
08486     r[6] = ((a[6] >> 1) | (a[7] << 25)) & 0x3ffffff;
08487     r[7] = ((a[7] >> 1) | (a[8] << 25)) & 0x3ffffff;
08488     r[8] = ((a[8] >> 1) | (a[9] << 25)) & 0x3ffffff;
08489 #endif
08490     r[9] = a[9] >> 1;
08491 }
08492 
08493 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
08494  *
08495  * r  Result of division by 2.
08496  * a  Number to divide.
08497  * m  Modulus (prime).
08498  */
08499 static void sp_256_div2_10(sp_digit* r, sp_digit* a, sp_digit* m)
08500 {
08501     sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1));
08502     sp_256_norm_10(r);
08503     sp_256_rshift1_10(r, r);
08504 }
08505 
08506 /* Double the Montgomery form projective point p.
08507  *
08508  * r  Result of doubling point.
08509  * p  Point to double.
08510  * t  Temporary ordinate data.
08511  */
08512 static void sp_256_proj_point_dbl_10(sp_point* r, sp_point* p, sp_digit* t)
08513 {
08514     sp_point *rp[2];
08515     sp_point tp;
08516     sp_digit* t1 = t;
08517     sp_digit* t2 = t + 2*10;
08518     sp_digit* x;
08519     sp_digit* y;
08520     sp_digit* z;
08521     int i;
08522 
08523     /* When infinity don't double point passed in - constant time. */
08524     rp[0] = r;
08525     rp[1] = &tp;
08526     x = rp[p->infinity]->x;
08527     y = rp[p->infinity]->y;
08528     z = rp[p->infinity]->z;
08529     /* Put point to double into result - good for infinty. */
08530     if (r != p) {
08531         for (i=0; i<10; i++)
08532             r->x[i] = p->x[i];
08533         for (i=0; i<10; i++)
08534             r->y[i] = p->y[i];
08535         for (i=0; i<10; i++)
08536             r->z[i] = p->z[i];
08537         r->infinity = p->infinity;
08538     }
08539 
08540     /* T1 = Z * Z */
08541     sp_256_mont_sqr_10(t1, z, p256_mod, p256_mp_mod);
08542     /* Z = Y * Z */
08543     sp_256_mont_mul_10(z, y, z, p256_mod, p256_mp_mod);
08544     /* Z = 2Z */
08545     sp_256_mont_dbl_10(z, z, p256_mod);
08546     /* T2 = X - T1 */
08547     sp_256_mont_sub_10(t2, x, t1, p256_mod);
08548     /* T1 = X + T1 */
08549     sp_256_mont_add_10(t1, x, t1, p256_mod);
08550     /* T2 = T1 * T2 */
08551     sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod);
08552     /* T1 = 3T2 */
08553     sp_256_mont_tpl_10(t1, t2, p256_mod);
08554     /* Y = 2Y */
08555     sp_256_mont_dbl_10(y, y, p256_mod);
08556     /* Y = Y * Y */
08557     sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod);
08558     /* T2 = Y * Y */
08559     sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
08560     /* T2 = T2/2 */
08561     sp_256_div2_10(t2, t2, p256_mod);
08562     /* Y = Y * X */
08563     sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
08564     /* X = T1 * T1 */
08565     sp_256_mont_mul_10(x, t1, t1, p256_mod, p256_mp_mod);
08566     /* X = X - Y */
08567     sp_256_mont_sub_10(x, x, y, p256_mod);
08568     /* X = X - Y */
08569     sp_256_mont_sub_10(x, x, y, p256_mod);
08570     /* Y = Y - X */
08571     sp_256_mont_sub_10(y, y, x, p256_mod);
08572     /* Y = Y * T1 */
08573     sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod);
08574     /* Y = Y - T2 */
08575     sp_256_mont_sub_10(y, y, t2, p256_mod);
08576 
08577 }
08578 
08579 /* Compare two numbers to determine if they are equal.
08580  * Constant time implementation.
08581  *
08582  * a  First number to compare.
08583  * b  Second number to compare.
08584  * returns 1 when equal and 0 otherwise.
08585  */
08586 static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b)
08587 {
08588     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
08589             (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
08590             (a[8] ^ b[8]) | (a[9] ^ b[9])) == 0;
08591 }
08592 
08593 /* Add two Montgomery form projective points.
08594  *
08595  * r  Result of addition.
08596  * p  Frist point to add.
08597  * q  Second point to add.
08598  * t  Temporary ordinate data.
08599  */
08600 static void sp_256_proj_point_add_10(sp_point* r, sp_point* p, sp_point* q,
08601         sp_digit* t)
08602 {
08603     sp_point *ap[2];
08604     sp_point *rp[2];
08605     sp_point tp;
08606     sp_digit* t1 = t;
08607     sp_digit* t2 = t + 2*10;
08608     sp_digit* t3 = t + 4*10;
08609     sp_digit* t4 = t + 6*10;
08610     sp_digit* t5 = t + 8*10;
08611     sp_digit* x;
08612     sp_digit* y;
08613     sp_digit* z;
08614     int i;
08615 
08616     /* Ensure only the first point is the same as the result. */
08617     if (q == r) {
08618         sp_point* a = p;
08619         p = q;
08620         q = a;
08621     }
08622 
08623     /* Check double */
08624     sp_256_sub_10(t1, p256_mod, q->y);
08625     sp_256_norm_10(t1);
08626     if (sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
08627         (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) {
08628         sp_256_proj_point_dbl_10(r, p, t);
08629     }
08630     else {
08631         rp[0] = r;
08632         rp[1] = &tp;
08633         XMEMSET(&tp, 0, sizeof(tp));
08634         x = rp[p->infinity | q->infinity]->x;
08635         y = rp[p->infinity | q->infinity]->y;
08636         z = rp[p->infinity | q->infinity]->z;
08637 
08638         ap[0] = p;
08639         ap[1] = q;
08640         for (i=0; i<10; i++)
08641             r->x[i] = ap[p->infinity]->x[i];
08642         for (i=0; i<10; i++)
08643             r->y[i] = ap[p->infinity]->y[i];
08644         for (i=0; i<10; i++)
08645             r->z[i] = ap[p->infinity]->z[i];
08646         r->infinity = ap[p->infinity]->infinity;
08647 
08648         /* U1 = X1*Z2^2 */
08649         sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod);
08650         sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod);
08651         sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod);
08652         /* U2 = X2*Z1^2 */
08653         sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
08654         sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
08655         sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
08656         /* S1 = Y1*Z2^3 */
08657         sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod);
08658         /* S2 = Y2*Z1^3 */
08659         sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
08660         /* H = U2 - U1 */
08661         sp_256_mont_sub_10(t2, t2, t1, p256_mod);
08662         /* R = S2 - S1 */
08663         sp_256_mont_sub_10(t4, t4, t3, p256_mod);
08664         /* Z3 = H*Z1*Z2 */
08665         sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod);
08666         sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
08667         /* X3 = R^2 - H^3 - 2*U1*H^2 */
08668         sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod);
08669         sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
08670         sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod);
08671         sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
08672         sp_256_mont_sub_10(x, x, t5, p256_mod);
08673         sp_256_mont_dbl_10(t1, y, p256_mod);
08674         sp_256_mont_sub_10(x, x, t1, p256_mod);
08675         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
08676         sp_256_mont_sub_10(y, y, x, p256_mod);
08677         sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod);
08678         sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod);
08679         sp_256_mont_sub_10(y, y, t5, p256_mod);
08680     }
08681 }
08682 
08683 #ifdef WOLFSSL_SP_SMALL
08684 /* Multiply the point by the scalar and return the result.
08685  * If map is true then convert result to affine co-ordinates.
08686  *
08687  * r     Resulting point.
08688  * g     Point to multiply.
08689  * k     Scalar to multiply by.
08690  * map   Indicates whether to convert result to affine.
08691  * heap  Heap to use for allocation.
08692  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08693  */
08694 static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
08695         int map, void* heap)
08696 {
08697     sp_point* td;
08698     sp_point* t[3];
08699     sp_digit* tmp;
08700     sp_digit n;
08701     int i;
08702     int c, y;
08703     int err = MP_OKAY;
08704 
08705     (void)heap;
08706 
08707     td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC);
08708     if (td == NULL)
08709         err = MEMORY_E;
08710     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
08711                              DYNAMIC_TYPE_ECC);
08712     if (tmp == NULL)
08713         err = MEMORY_E;
08714 
08715     if (err == MP_OKAY) {
08716         XMEMSET(td, 0, sizeof(*td) * 3);
08717 
08718         t[0] = &td[0];
08719         t[1] = &td[1];
08720         t[2] = &td[2];
08721 
08722         /* t[0] = {0, 0, 1} * norm */
08723         t[0]->infinity = 1;
08724         /* t[1] = {g->x, g->y, g->z} * norm */
08725         err = sp_256_mod_mul_norm_10(t[1]->x, g->x, p256_mod);
08726     }
08727     if (err == MP_OKAY)
08728         err = sp_256_mod_mul_norm_10(t[1]->y, g->y, p256_mod);
08729     if (err == MP_OKAY)
08730         err = sp_256_mod_mul_norm_10(t[1]->z, g->z, p256_mod);
08731 
08732     if (err == MP_OKAY) {
08733         i = 9;
08734         c = 22;
08735         n = k[i--] << (26 - c);
08736         for (; ; c--) {
08737             if (c == 0) {
08738                 if (i == -1)
08739                     break;
08740 
08741                 n = k[i--];
08742                 c = 26;
08743             }
08744 
08745             y = (n >> 25) & 1;
08746             n <<= 1;
08747 
08748             sp_256_proj_point_add_10(t[y^1], t[0], t[1], tmp);
08749 
08750             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
08751                                   ((size_t)t[1] & addr_mask[y])),
08752                     sizeof(sp_point));
08753             sp_256_proj_point_dbl_10(t[2], t[2], tmp);
08754             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
08755                             ((size_t)t[1] & addr_mask[y])), t[2],
08756                     sizeof(sp_point));
08757         }
08758 
08759         if (map)
08760             sp_256_map_10(r, t[0], tmp);
08761         else
08762             XMEMCPY(r, t[0], sizeof(sp_point));
08763     }
08764 
08765     if (tmp != NULL) {
08766         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
08767         XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
08768     }
08769     if (td != NULL) {
08770         XMEMSET(td, 0, sizeof(sp_point) * 3);
08771         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
08772     }
08773 
08774     return err;
08775 }
08776 
08777 #elif defined(WOLFSSL_SP_CACHE_RESISTANT)
08778 /* Multiply the point by the scalar and return the result.
08779  * If map is true then convert result to affine co-ordinates.
08780  *
08781  * r     Resulting point.
08782  * g     Point to multiply.
08783  * k     Scalar to multiply by.
08784  * map   Indicates whether to convert result to affine.
08785  * heap  Heap to use for allocation.
08786  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08787  */
08788 static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
08789         int map, void* heap)
08790 {
08791 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
08792     sp_point td[3];
08793     sp_digit tmpd[2 * 10 * 5];
08794 #endif
08795     sp_point* t;
08796     sp_digit* tmp;
08797     sp_digit n;
08798     int i;
08799     int c, y;
08800     int err = MP_OKAY;
08801 
08802     (void)heap;
08803 
08804 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08805     sp_point td[3];
08806     t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
08807     if (t == NULL)
08808         err = MEMORY_E;
08809     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
08810                              DYNAMIC_TYPE_ECC);
08811     if (tmp == NULL)
08812         err = MEMORY_E;
08813 #else
08814     t = td;
08815     tmp = tmpd;
08816 #endif
08817 
08818     if (err == MP_OKAY) {
08819         t[0] = &td[0];
08820         t[1] = &td[1];
08821         t[2] = &td[2];
08822 
08823         /* t[0] = {0, 0, 1} * norm */
08824         XMEMSET(&t[0], 0, sizeof(t[0]));
08825         t[0].infinity = 1;
08826         /* t[1] = {g->x, g->y, g->z} * norm */
08827         err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
08828     }
08829     if (err == MP_OKAY)
08830         err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
08831     if (err == MP_OKAY)
08832         err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
08833 
08834     if (err == MP_OKAY) {
08835         i = 9;
08836         c = 22;
08837         n = k[i--] << (26 - c);
08838         for (; ; c--) {
08839             if (c == 0) {
08840                 if (i == -1)
08841                     break;
08842 
08843                 n = k[i--];
08844                 c = 26;
08845             }
08846 
08847             y = (n >> 25) & 1;
08848             n <<= 1;
08849 
08850             sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp);
08851 
08852             XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
08853                                  ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
08854             sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
08855             XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
08856                            ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
08857         }
08858 
08859         if (map)
08860             sp_256_map_10(r, &t[0], tmp);
08861         else
08862             XMEMCPY(r, &t[0], sizeof(sp_point));
08863     }
08864 
08865 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08866     if (tmp != NULL) {
08867         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
08868         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
08869     }
08870     if (t != NULL) {
08871         XMEMSET(t, 0, sizeof(sp_point) * 3);
08872         XFREE(t, heap, DYNAMIC_TYPE_ECC);
08873     }
08874 #else
08875     ForceZero(tmpd, sizeof(tmpd));
08876     ForceZero(td, sizeof(td));
08877 #endif
08878 
08879     return err;
08880 }
08881 
08882 #else
08883 /* A table entry for pre-computed points. */
08884 typedef struct sp_table_entry {
08885     sp_digit x[10];
08886     sp_digit y[10];
08887     byte infinity;
08888 } sp_table_entry;
08889 
08890 /* Multiply the point by the scalar and return the result.
08891  * If map is true then convert result to affine co-ordinates.
08892  *
08893  * r     Resulting point.
08894  * g     Point to multiply.
08895  * k     Scalar to multiply by.
08896  * map   Indicates whether to convert result to affine.
08897  * heap  Heap to use for allocation.
08898  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
08899  */
08900 static int sp_256_ecc_mulmod_fast_10(sp_point* r, sp_point* g, sp_digit* k,
08901         int map, void* heap)
08902 {
08903 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
08904     sp_point td[16];
08905     sp_point rtd;
08906     sp_digit tmpd[2 * 10 * 5];
08907 #endif
08908     sp_point* t;
08909     sp_point* rt;
08910     sp_digit* tmp;
08911     sp_digit n;
08912     int i;
08913     int c, y;
08914     int err;
08915 
08916     (void)heap;
08917 
08918     err = sp_ecc_point_new(heap, rtd, rt);
08919 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
08920     t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
08921     if (t == NULL)
08922         err = MEMORY_E;
08923     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
08924                              DYNAMIC_TYPE_ECC);
08925     if (tmp == NULL)
08926         err = MEMORY_E;
08927 #else
08928     t = td;
08929     tmp = tmpd;
08930 #endif
08931 
08932     if (err == MP_OKAY) {
08933         /* t[0] = {0, 0, 1} * norm */
08934         XMEMSET(&t[0], 0, sizeof(t[0]));
08935         t[0].infinity = 1;
08936         /* t[1] = {g->x, g->y, g->z} * norm */
08937         sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
08938         sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
08939         sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
08940         t[1].infinity = 0;
08941         sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp);
08942         t[ 2].infinity = 0;
08943         sp_256_proj_point_add_10(&t[ 3], &t[ 2], &t[ 1], tmp);
08944         t[ 3].infinity = 0;
08945         sp_256_proj_point_dbl_10(&t[ 4], &t[ 2], tmp);
08946         t[ 4].infinity = 0;
08947         sp_256_proj_point_add_10(&t[ 5], &t[ 3], &t[ 2], tmp);
08948         t[ 5].infinity = 0;
08949         sp_256_proj_point_dbl_10(&t[ 6], &t[ 3], tmp);
08950         t[ 6].infinity = 0;
08951         sp_256_proj_point_add_10(&t[ 7], &t[ 4], &t[ 3], tmp);
08952         t[ 7].infinity = 0;
08953         sp_256_proj_point_dbl_10(&t[ 8], &t[ 4], tmp);
08954         t[ 8].infinity = 0;
08955         sp_256_proj_point_add_10(&t[ 9], &t[ 5], &t[ 4], tmp);
08956         t[ 9].infinity = 0;
08957         sp_256_proj_point_dbl_10(&t[10], &t[ 5], tmp);
08958         t[10].infinity = 0;
08959         sp_256_proj_point_add_10(&t[11], &t[ 6], &t[ 5], tmp);
08960         t[11].infinity = 0;
08961         sp_256_proj_point_dbl_10(&t[12], &t[ 6], tmp);
08962         t[12].infinity = 0;
08963         sp_256_proj_point_add_10(&t[13], &t[ 7], &t[ 6], tmp);
08964         t[13].infinity = 0;
08965         sp_256_proj_point_dbl_10(&t[14], &t[ 7], tmp);
08966         t[14].infinity = 0;
08967         sp_256_proj_point_add_10(&t[15], &t[ 8], &t[ 7], tmp);
08968         t[15].infinity = 0;
08969 
08970         i = 8;
08971         n = k[i+1] << 6;
08972         c = 18;
08973         y = n >> 24;
08974         XMEMCPY(rt, &t[y], sizeof(sp_point));
08975         n <<= 8;
08976         for (; i>=0 || c>=4; ) {
08977             if (c < 4) {
08978                 n |= k[i--] << (6 - c);
08979                 c += 26;
08980             }
08981             y = (n >> 28) & 0xf;
08982             n <<= 4;
08983             c -= 4;
08984 
08985             sp_256_proj_point_dbl_10(rt, rt, tmp);
08986             sp_256_proj_point_dbl_10(rt, rt, tmp);
08987             sp_256_proj_point_dbl_10(rt, rt, tmp);
08988             sp_256_proj_point_dbl_10(rt, rt, tmp);
08989 
08990             sp_256_proj_point_add_10(rt, rt, &t[y], tmp);
08991         }
08992 
08993         if (map)
08994             sp_256_map_10(r, rt, tmp);
08995         else
08996             XMEMCPY(r, rt, sizeof(sp_point));
08997     }
08998 
08999 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
09000     if (tmp != NULL) {
09001         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
09002         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
09003     }
09004     if (t != NULL) {
09005         XMEMSET(t, 0, sizeof(sp_point) * 16);
09006         XFREE(t, heap, DYNAMIC_TYPE_ECC);
09007     }
09008 #else
09009     ForceZero(tmpd, sizeof(tmpd));
09010     ForceZero(td, sizeof(td));
09011 #endif
09012     sp_ecc_point_free(rt, 1, heap);
09013 
09014     return err;
09015 }
09016 
09017 #ifdef FP_ECC
09018 /* Double the Montgomery form projective point p a number of times.
09019  *
09020  * r  Result of repeated doubling of point.
09021  * p  Point to double.
09022  * n  Number of times to double
09023  * t  Temporary ordinate data.
09024  */
09025 static void sp_256_proj_point_dbl_n_10(sp_point* r, sp_point* p, int n,
09026         sp_digit* t)
09027 {
09028     sp_point *rp[2];
09029     sp_point tp;
09030     sp_digit* w = t;
09031     sp_digit* a = t + 2*10;
09032     sp_digit* b = t + 4*10;
09033     sp_digit* t1 = t + 6*10;
09034     sp_digit* t2 = t + 8*10;
09035     sp_digit* x;
09036     sp_digit* y;
09037     sp_digit* z;
09038     int i;
09039 
09040     rp[0] = r;
09041     rp[1] = &tp;
09042     x = rp[p->infinity]->x;
09043     y = rp[p->infinity]->y;
09044     z = rp[p->infinity]->z;
09045     if (r != p) {
09046         for (i=0; i<10; i++)
09047             r->x[i] = p->x[i];
09048         for (i=0; i<10; i++)
09049             r->y[i] = p->y[i];
09050         for (i=0; i<10; i++)
09051             r->z[i] = p->z[i];
09052         r->infinity = p->infinity;
09053     }
09054 
09055     /* Y = 2*Y */
09056     sp_256_mont_dbl_10(y, y, p256_mod);
09057     /* W = Z^4 */
09058     sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod);
09059     sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod);
09060     while (n--) {
09061         /* A = 3*(X^2 - W) */
09062         sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
09063         sp_256_mont_sub_10(t1, t1, w, p256_mod);
09064         sp_256_mont_tpl_10(a, t1, p256_mod);
09065         /* B = X*Y^2 */
09066         sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
09067         sp_256_mont_mul_10(b, t2, x, p256_mod, p256_mp_mod);
09068         /* X = A^2 - 2B */
09069         sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
09070         sp_256_mont_dbl_10(t1, b, p256_mod);
09071         sp_256_mont_sub_10(x, x, t1, p256_mod);
09072         /* Z = Z*Y */
09073         sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
09074         /* t2 = Y^4 */
09075         sp_256_mont_sqr_10(t2, t2, p256_mod, p256_mp_mod);
09076         if (n) {
09077             /* W = W*Y^4 */
09078             sp_256_mont_mul_10(w, w, t2, p256_mod, p256_mp_mod);
09079         }
09080         /* y = 2*A*(B - X) - Y^4 */
09081         sp_256_mont_sub_10(y, b, x, p256_mod);
09082         sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
09083         sp_256_mont_dbl_10(y, y, p256_mod);
09084         sp_256_mont_sub_10(y, y, t2, p256_mod);
09085     }
09086     /* Y = Y/2 */
09087     sp_256_div2_10(y, y, p256_mod);
09088 }
09089 
09090 #endif /* FP_ECC */
09091 /* Add two Montgomery form projective points. The second point has a q value of
09092  * one.
09093  * Only the first point can be the same pointer as the result point.
09094  *
09095  * r  Result of addition.
09096  * p  Frist point to add.
09097  * q  Second point to add.
09098  * t  Temporary ordinate data.
09099  */
09100 static void sp_256_proj_point_add_qz1_10(sp_point* r, sp_point* p,
09101         sp_point* q, sp_digit* t)
09102 {
09103     sp_point *ap[2];
09104     sp_point *rp[2];
09105     sp_point tp;
09106     sp_digit* t1 = t;
09107     sp_digit* t2 = t + 2*10;
09108     sp_digit* t3 = t + 4*10;
09109     sp_digit* t4 = t + 6*10;
09110     sp_digit* t5 = t + 8*10;
09111     sp_digit* x;
09112     sp_digit* y;
09113     sp_digit* z;
09114     int i;
09115 
09116     /* Check double */
09117     sp_256_sub_10(t1, p256_mod, q->y);
09118     sp_256_norm_10(t1);
09119     if (sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
09120         (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) {
09121         sp_256_proj_point_dbl_10(r, p, t);
09122     }
09123     else {
09124         rp[0] = r;
09125         rp[1] = &tp;
09126         XMEMSET(&tp, 0, sizeof(tp));
09127         x = rp[p->infinity | q->infinity]->x;
09128         y = rp[p->infinity | q->infinity]->y;
09129         z = rp[p->infinity | q->infinity]->z;
09130 
09131         ap[0] = p;
09132         ap[1] = q;
09133         for (i=0; i<10; i++)
09134             r->x[i] = ap[p->infinity]->x[i];
09135         for (i=0; i<10; i++)
09136             r->y[i] = ap[p->infinity]->y[i];
09137         for (i=0; i<10; i++)
09138             r->z[i] = ap[p->infinity]->z[i];
09139         r->infinity = ap[p->infinity]->infinity;
09140 
09141         /* U2 = X2*Z1^2 */
09142         sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
09143         sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
09144         sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
09145         /* S2 = Y2*Z1^3 */
09146         sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
09147         /* H = U2 - X1 */
09148         sp_256_mont_sub_10(t2, t2, x, p256_mod);
09149         /* R = S2 - Y1 */
09150         sp_256_mont_sub_10(t4, t4, y, p256_mod);
09151         /* Z3 = H*Z1 */
09152         sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
09153         /* X3 = R^2 - H^3 - 2*X1*H^2 */
09154         sp_256_mont_sqr_10(t1, t4, p256_mod, p256_mp_mod);
09155         sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
09156         sp_256_mont_mul_10(t3, x, t5, p256_mod, p256_mp_mod);
09157         sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
09158         sp_256_mont_sub_10(x, t1, t5, p256_mod);
09159         sp_256_mont_dbl_10(t1, t3, p256_mod);
09160         sp_256_mont_sub_10(x, x, t1, p256_mod);
09161         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
09162         sp_256_mont_sub_10(t3, t3, x, p256_mod);
09163         sp_256_mont_mul_10(t3, t3, t4, p256_mod, p256_mp_mod);
09164         sp_256_mont_mul_10(t5, t5, y, p256_mod, p256_mp_mod);
09165         sp_256_mont_sub_10(y, t3, t5, p256_mod);
09166     }
09167 }
09168 
09169 #ifdef FP_ECC
09170 /* Convert the projective point to affine.
09171  * Ordinates are in Montgomery form.
09172  *
09173  * a  Point to convert.
09174  * t  Temprorary data.
09175  */
09176 static void sp_256_proj_to_affine_10(sp_point* a, sp_digit* t)
09177 {
09178     sp_digit* t1 = t;
09179     sp_digit* t2 = t + 2 * 10;
09180     sp_digit* tmp = t + 4 * 10;
09181 
09182     sp_256_mont_inv_10(t1, a->z, tmp);
09183 
09184     sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
09185     sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
09186 
09187     sp_256_mont_mul_10(a->x, a->x, t2, p256_mod, p256_mp_mod);
09188     sp_256_mont_mul_10(a->y, a->y, t1, p256_mod, p256_mp_mod);
09189     XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
09190 }
09191 
09192 /* Generate the pre-computed table of points for the base point.
09193  *
09194  * a      The base point.
09195  * table  Place to store generated point data.
09196  * tmp    Temprorary data.
09197  * heap  Heap to use for allocation.
09198  */
09199 static int sp_256_gen_stripe_table_10(sp_point* a,
09200         sp_table_entry* table, sp_digit* tmp, void* heap)
09201 {
09202 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
09203     sp_point td, s1d, s2d;
09204 #endif
09205     sp_point* t;
09206     sp_point* s1 = NULL;
09207     sp_point* s2 = NULL;
09208     int i, j;
09209     int err;
09210 
09211     (void)heap;
09212 
09213     err = sp_ecc_point_new(heap, td, t);
09214     if (err == MP_OKAY)
09215         err = sp_ecc_point_new(heap, s1d, s1);
09216     if (err == MP_OKAY)
09217         err = sp_ecc_point_new(heap, s2d, s2);
09218 
09219     if (err == MP_OKAY)
09220         err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod);
09221     if (err == MP_OKAY)
09222         err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod);
09223     if (err == MP_OKAY)
09224         err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod);
09225     if (err == MP_OKAY) {
09226         t->infinity = 0;
09227         sp_256_proj_to_affine_10(t, tmp);
09228 
09229         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
09230         s1->infinity = 0;
09231         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
09232         s2->infinity = 0;
09233 
09234         /* table[0] = {0, 0, infinity} */
09235         XMEMSET(&table[0], 0, sizeof(sp_table_entry));
09236         table[0].infinity = 1;
09237         /* table[1] = Affine version of 'a' in Montgomery form */
09238         XMEMCPY(table[1].x, t->x, sizeof(table->x));
09239         XMEMCPY(table[1].y, t->y, sizeof(table->y));
09240         table[1].infinity = 0;
09241 
09242         for (i=1; i<8; i++) {
09243             sp_256_proj_point_dbl_n_10(t, t, 32, tmp);
09244             sp_256_proj_to_affine_10(t, tmp);
09245             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
09246             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
09247             table[1<<i].infinity = 0;
09248         }
09249 
09250         for (i=1; i<8; i++) {
09251             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
09252             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
09253             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
09254                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
09255                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
09256                 sp_256_proj_point_add_qz1_10(t, s1, s2, tmp);
09257                 sp_256_proj_to_affine_10(t, tmp);
09258                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
09259                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
09260                 table[j].infinity = 0;
09261             }
09262         }
09263     }
09264 
09265     sp_ecc_point_free(s2, 0, heap);
09266     sp_ecc_point_free(s1, 0, heap);
09267     sp_ecc_point_free( t, 0, heap);
09268 
09269     return err;
09270 }
09271 
09272 #endif /* FP_ECC */
09273 /* Multiply the point by the scalar and return the result.
09274  * If map is true then convert result to affine co-ordinates.
09275  *
09276  * r     Resulting point.
09277  * k     Scalar to multiply by.
09278  * map   Indicates whether to convert result to affine.
09279  * heap  Heap to use for allocation.
09280  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
09281  */
09282 static int sp_256_ecc_mulmod_stripe_10(sp_point* r, sp_point* g,
09283         sp_table_entry* table, sp_digit* k, int map, void* heap)
09284 {
09285 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
09286     sp_point rtd;
09287     sp_point pd;
09288     sp_digit td[2 * 10 * 5];
09289 #endif
09290     sp_point* rt;
09291     sp_point* p = NULL;
09292     sp_digit* t;
09293     int i, j;
09294     int y, x;
09295     int err;
09296 
09297     (void)g;
09298     (void)heap;
09299 
09300     err = sp_ecc_point_new(heap, rtd, rt);
09301     if (err == MP_OKAY)
09302         err = sp_ecc_point_new(heap, pd, p);
09303 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
09304     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
09305                            DYNAMIC_TYPE_ECC);
09306     if (t == NULL)
09307         err = MEMORY_E;
09308 #else
09309     t = td;
09310 #endif
09311 
09312     if (err == MP_OKAY) {
09313         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
09314         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
09315 
09316         y = 0;
09317         for (j=0,x=31; j<8; j++,x+=32)
09318             y |= ((k[x / 26] >> (x % 26)) & 1) << j;
09319         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
09320         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
09321         rt->infinity = table[y].infinity;
09322         for (i=30; i>=0; i--) {
09323             y = 0;
09324             for (j=0,x=i; j<8; j++,x+=32)
09325                 y |= ((k[x / 26] >> (x % 26)) & 1) << j;
09326 
09327             sp_256_proj_point_dbl_10(rt, rt, t);
09328             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
09329             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
09330             p->infinity = table[y].infinity;
09331             sp_256_proj_point_add_qz1_10(rt, rt, p, t);
09332         }
09333 
09334         if (map)
09335             sp_256_map_10(r, rt, t);
09336         else
09337             XMEMCPY(r, rt, sizeof(sp_point));
09338     }
09339 
09340 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
09341     if (t != NULL)
09342         XFREE(t, heap, DYNAMIC_TYPE_ECC);
09343 #endif
09344     sp_ecc_point_free(p, 0, heap);
09345     sp_ecc_point_free(rt, 0, heap);
09346 
09347     return err;
09348 }
09349 
09350 #ifdef FP_ECC
09351 #ifndef FP_ENTRIES
09352     #define FP_ENTRIES 16
09353 #endif
09354 
09355 typedef struct sp_cache_t {
09356     sp_digit x[10];
09357     sp_digit y[10];
09358     sp_table_entry table[256];
09359     uint32_t cnt;
09360     int set;
09361 } sp_cache_t;
09362 
09363 static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
09364 static THREAD_LS_T int sp_cache_last = -1;
09365 static THREAD_LS_T int sp_cache_inited = 0;
09366 
09367 #ifndef HAVE_THREAD_LS
09368     static volatile int initCacheMutex = 0;
09369     static wolfSSL_Mutex sp_cache_lock;
09370 #endif
09371 
09372 static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
09373 {
09374     int i, j;
09375     uint32_t least;
09376 
09377     if (sp_cache_inited == 0) {
09378         for (i=0; i<FP_ENTRIES; i++) {
09379             sp_cache[i].set = 0;
09380         }
09381         sp_cache_inited = 1;
09382     }
09383 
09384     /* Compare point with those in cache. */
09385     for (i=0; i<FP_ENTRIES; i++) {
09386         if (!sp_cache[i].set)
09387             continue;
09388 
09389         if (sp_256_cmp_equal_10(g->x, sp_cache[i].x) & 
09390                            sp_256_cmp_equal_10(g->y, sp_cache[i].y)) {
09391             sp_cache[i].cnt++;
09392             break;
09393         }
09394     }
09395 
09396     /* No match. */
09397     if (i == FP_ENTRIES) {
09398         /* Find empty entry. */
09399         i = (sp_cache_last + 1) % FP_ENTRIES;
09400         for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
09401             if (!sp_cache[i].set) {
09402                 break;
09403             }
09404         }
09405 
09406         /* Evict least used. */
09407         if (i == sp_cache_last) {
09408             least = sp_cache[0].cnt;
09409             for (j=1; j<FP_ENTRIES; j++) {
09410                 if (sp_cache[j].cnt < least) {
09411                     i = j;
09412                     least = sp_cache[i].cnt;
09413                 }
09414             }
09415         }
09416 
09417         XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
09418         XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
09419         sp_cache[i].set = 1;
09420         sp_cache[i].cnt = 1;
09421     }
09422 
09423     *cache = &sp_cache[i];
09424     sp_cache_last = i;
09425 }
09426 #endif /* FP_ECC */
09427 
09428 /* Multiply the base point of P256 by the scalar and return the result.
09429  * If map is true then convert result to affine co-ordinates.
09430  *
09431  * r     Resulting point.
09432  * g     Point to multiply.
09433  * k     Scalar to multiply by.
09434  * map   Indicates whether to convert result to affine.
09435  * heap  Heap to use for allocation.
09436  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
09437  */
09438 static int sp_256_ecc_mulmod_10(sp_point* r, sp_point* g, sp_digit* k,
09439         int map, void* heap)
09440 {
09441 #ifndef FP_ECC
09442     return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
09443 #else
09444     sp_digit tmp[2 * 10 * 5];
09445     sp_cache_t* cache;
09446     int err = MP_OKAY;
09447 
09448 #ifndef HAVE_THREAD_LS
09449     if (initCacheMutex == 0) {
09450          wc_InitMutex(&sp_cache_lock);
09451          initCacheMutex = 1;
09452     }
09453     if (wc_LockMutex(&sp_cache_lock) != 0)
09454        err = BAD_MUTEX_E;
09455 #endif /* HAVE_THREAD_LS */
09456 
09457     if (err == MP_OKAY) {
09458         sp_ecc_get_cache(g, &cache);
09459         if (cache->cnt == 2)
09460             sp_256_gen_stripe_table_10(g, cache->table, tmp, heap);
09461 
09462 #ifndef HAVE_THREAD_LS
09463         wc_UnLockMutex(&sp_cache_lock);
09464 #endif /* HAVE_THREAD_LS */
09465 
09466         if (cache->cnt < 2) {
09467             err = sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
09468         }
09469         else {
09470             err = sp_256_ecc_mulmod_stripe_10(r, g, cache->table, k,
09471                     map, heap);
09472         }
09473     }
09474 
09475     return err;
09476 #endif
09477 }
09478 
09479 #endif
09480 /* Multiply the point by the scalar and return the result.
09481  * If map is true then convert result to affine co-ordinates.
09482  *
09483  * km    Scalar to multiply by.
09484  * p     Point to multiply.
09485  * r     Resulting point.
09486  * map   Indicates whether to convert result to affine.
09487  * heap  Heap to use for allocation.
09488  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
09489  */
09490 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
09491         void* heap)
09492 {
09493 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
09494     sp_point p;
09495     sp_digit kd[10];
09496 #endif
09497     sp_point* point;
09498     sp_digit* k = NULL;
09499     int err = MP_OKAY;
09500 #ifdef HAVE_INTEL_AVX2
09501     word32 cpuid_flags = cpuid_get_flags();
09502 #endif
09503 
09504     err = sp_ecc_point_new(heap, p, point);
09505 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
09506     if (err == MP_OKAY) {
09507         k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
09508         if (k == NULL)
09509             err = MEMORY_E;
09510     }
09511 #else
09512     k = kd;
09513 #endif
09514     if (err == MP_OKAY) {
09515         sp_256_from_mp(k, 10, km);
09516         sp_256_point_from_ecc_point_10(point, gm);
09517 
09518 #ifdef HAVE_INTEL_AVX2
09519         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
09520             err = sp_256_ecc_mulmod_avx2_10(point, point, k, map, heap);
09521         else
09522 #endif
09523             err = sp_256_ecc_mulmod_10(point, point, k, map, heap);
09524     }
09525     if (err == MP_OKAY)
09526         err = sp_256_point_to_ecc_point_10(point, r);
09527 
09528 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
09529     if (k != NULL)
09530         XFREE(k, heap, DYNAMIC_TYPE_ECC);
09531 #endif
09532     sp_ecc_point_free(point, 0, heap);
09533 
09534     return err;
09535 }
09536 
09537 #ifdef WOLFSSL_SP_SMALL
09538 /* Multiply the base point of P256 by the scalar and return the result.
09539  * If map is true then convert result to affine co-ordinates.
09540  *
09541  * r     Resulting point.
09542  * k     Scalar to multiply by.
09543  * map   Indicates whether to convert result to affine.
09544  * heap  Heap to use for allocation.
09545  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
09546  */
09547 static int sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k,
09548         int map, void* heap)
09549 {
09550     /* No pre-computed values. */
09551     return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
09552 }
09553 
09554 #else
09555 static sp_table_entry p256_table[256] = {
09556     /* 0 */
09557     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
09558       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
09559       1 },
09560     /* 1 */
09561     { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944,
09562         0x3b732b7,0x15719e7,0x376a537,0x0062417 },
09563       { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9,
09564         0x288688d,0x176174b,0x3182588,0x0215c7f },
09565       0 },
09566     /* 2 */
09567     { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2,
09568         0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 },
09569       { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933,
09570         0x1961102,0x223cdff,0x37e9eb2,0x0218fae },
09571       0 },
09572     /* 3 */
09573     { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02,
09574         0x28d9544,0x20280f9,0x055b5ff,0x00001d8 },
09575       { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981,
09576         0x247d398,0x0fb8383,0x3613437,0x020c21d },
09577       0 },
09578     /* 4 */
09579     { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22,
09580         0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a },
09581       { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4,
09582         0x111b370,0x03dec12,0x1168d6f,0x03d923e },
09583       0 },
09584     /* 5 */
09585     { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18,
09586         0x0427617,0x00056c7,0x285133d,0x016af80 },
09587       { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d,
09588         0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc },
09589       0 },
09590     /* 6 */
09591     { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171,
09592         0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae },
09593       { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948,
09594         0x0fa7920,0x397d830,0x2bd4bda,0x028d86f },
09595       0 },
09596     /* 7 */
09597     { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747,
09598         0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 },
09599       { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca,
09600         0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee },
09601       0 },
09602     /* 8 */
09603     { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69,
09604         0x093334d,0x120c701,0x39206d5,0x021627e },
09605       { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194,
09606         0x1045071,0x0713f32,0x16d0254,0x03aec1a },
09607       0 },
09608     /* 9 */
09609     { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb,
09610         0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 },
09611       { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c,
09612         0x2d2e428,0x368755b,0x09b55a7,0x007ca0a },
09613       0 },
09614     /* 10 */
09615     { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a,
09616         0x047862b,0x1358c9e,0x35905e5,0x00cde92 },
09617       { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba,
09618         0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 },
09619       0 },
09620     /* 11 */
09621     { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99,
09622         0x362ff49,0x288cbc1,0x24252f4,0x0308f68 },
09623       { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8,
09624         0x0dcb869,0x33fc4e9,0x38cc790,0x006778e },
09625       0 },
09626     /* 12 */
09627     { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b,
09628         0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 },
09629       { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b,
09630         0x3901273,0x03dfe78,0x3447b4e,0x039d907 },
09631       0 },
09632     /* 13 */
09633     { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616,
09634         0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 },
09635       { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5,
09636         0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a },
09637       0 },
09638     /* 14 */
09639     { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824,
09640         0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 },
09641       { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898,
09642         0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 },
09643       0 },
09644     /* 15 */
09645     { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8,
09646         0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac },
09647       { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251,
09648         0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 },
09649       0 },
09650     /* 16 */
09651     { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18,
09652         0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 },
09653       { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f,
09654         0x38441e0,0x3bef843,0x2124621,0x03e847f },
09655       0 },
09656     /* 17 */
09657     { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3,
09658         0x103d678,0x2fb72db,0x04c1f13,0x0161bac },
09659       { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be,
09660         0x0fd6c05,0x13c449e,0x0087086,0x006756b },
09661       0 },
09662     /* 18 */
09663     { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348,
09664         0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 },
09665       { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5,
09666         0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 },
09667       0 },
09668     /* 19 */
09669     { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba,
09670         0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 },
09671       { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d,
09672         0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc },
09673       0 },
09674     /* 20 */
09675     { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4,
09676         0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 },
09677       { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09,
09678         0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 },
09679       0 },
09680     /* 21 */
09681     { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb,
09682         0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 },
09683       { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c,
09684         0x01dfe0a,0x312341f,0x26d356e,0x0091295 },
09685       0 },
09686     /* 22 */
09687     { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee,
09688         0x26ac1b8,0x3bda498,0x0873581,0x0117963 },
09689       { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5,
09690         0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 },
09691       0 },
09692     /* 23 */
09693     { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a,
09694         0x2b35761,0x1bb1d20,0x097682c,0x00737d7 },
09695       { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86,
09696         0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 },
09697       0 },
09698     /* 24 */
09699     { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1,
09700         0x3161727,0x297cfdb,0x2113b83,0x0011b97 },
09701       { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f,
09702         0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 },
09703       0 },
09704     /* 25 */
09705     { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad,
09706         0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 },
09707       { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347,
09708         0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be },
09709       0 },
09710     /* 26 */
09711     { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20,
09712         0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b },
09713       { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b,
09714         0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 },
09715       0 },
09716     /* 27 */
09717     { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012,
09718         0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f },
09719       { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30,
09720         0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 },
09721       0 },
09722     /* 28 */
09723     { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8,
09724         0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 },
09725       { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829,
09726         0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 },
09727       0 },
09728     /* 29 */
09729     { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e,
09730         0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 },
09731       { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b,
09732         0x1004072,0x1b73e3b,0x07208ed,0x03fc36c },
09733       0 },
09734     /* 30 */
09735     { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b,
09736         0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 },
09737       { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83,
09738         0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf },
09739       0 },
09740     /* 31 */
09741     { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6,
09742         0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe },
09743       { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363,
09744         0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 },
09745       0 },
09746     /* 32 */
09747     { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d,
09748         0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 },
09749       { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c,
09750         0x057e74d,0x05d14ac,0x17a9273,0x035215c },
09751       0 },
09752     /* 33 */
09753     { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7,
09754         0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 },
09755       { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3,
09756         0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 },
09757       0 },
09758     /* 34 */
09759     { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3,
09760         0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 },
09761       { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a,
09762         0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 },
09763       0 },
09764     /* 35 */
09765     { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e,
09766         0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 },
09767       { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4,
09768         0x23c8ad3,0x354963e,0x015db28,0x0284fa4 },
09769       0 },
09770     /* 36 */
09771     { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8,
09772         0x1ce241e,0x149bc99,0x2b01497,0x00afc35 },
09773       { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417,
09774         0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 },
09775       0 },
09776     /* 37 */
09777     { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d,
09778         0x2030504,0x378f7a1,0x169c65e,0x00b0b76 },
09779       { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521,
09780         0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe },
09781       0 },
09782     /* 38 */
09783     { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1,
09784         0x00a050f,0x1eaa14b,0x3335470,0x02abd16 },
09785       { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511,
09786         0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c },
09787       0 },
09788     /* 39 */
09789     { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d,
09790         0x1567722,0x0bfc906,0x0bada9e,0x03c3402 },
09791       { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6,
09792         0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 },
09793       0 },
09794     /* 40 */
09795     { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7,
09796         0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 },
09797       { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751,
09798         0x18ce73b,0x2a406e7,0x273e48c,0x01b00db },
09799       0 },
09800     /* 41 */
09801     { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1,
09802         0x13c64b4,0x2f89226,0x25896af,0x00a4bfd },
09803       { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba,
09804         0x3b397fd,0x1c9d825,0x090311b,0x0191383 },
09805       0 },
09806     /* 42 */
09807     { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c,
09808         0x3dce877,0x269de4e,0x393cab7,0x03c96b9 },
09809       { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517,
09810         0x3b5ea30,0x028f927,0x25313db,0x00e6e39 },
09811       0 },
09812     /* 43 */
09813     { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce,
09814         0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b },
09815       { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d,
09816         0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 },
09817       0 },
09818     /* 44 */
09819     { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac,
09820         0x22b74b1,0x230137e,0x1062e36,0x021c652 },
09821       { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414,
09822         0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 },
09823       0 },
09824     /* 45 */
09825     { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07,
09826         0x052a7b4,0x2422261,0x3adee38,0x039b529 },
09827       { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6,
09828         0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb },
09829       0 },
09830     /* 46 */
09831     { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659,
09832         0x10bf410,0x034afec,0x3d71c83,0x0076971 },
09833       { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae,
09834         0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 },
09835       0 },
09836     /* 47 */
09837     { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d,
09838         0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 },
09839       { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af,
09840         0x194dc35,0x3f3058a,0x36d34e1,0x000a329 },
09841       0 },
09842     /* 48 */
09843     { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d,
09844         0x02c1175,0x3c11b40,0x0d86962,0x001305f },
09845       { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed,
09846         0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 },
09847       0 },
09848     /* 49 */
09849     { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48,
09850         0x0405538,0x0710749,0x2005213,0x038c7e5 },
09851       { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5,
09852         0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 },
09853       0 },
09854     /* 50 */
09855     { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6,
09856         0x1710fad,0x36bb924,0x1627e79,0x008e637 },
09857       { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3,
09858         0x1c03773,0x18843fe,0x1b21c04,0x015c5fd },
09859       0 },
09860     /* 51 */
09861     { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3,
09862         0x13d2391,0x0a37be8,0x0560e3c,0x019828b },
09863       { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3,
09864         0x32f4817,0x0914cca,0x14c4f52,0x01893b9 },
09865       0 },
09866     /* 52 */
09867     { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08,
09868         0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a },
09869       { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f,
09870         0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e },
09871       0 },
09872     /* 53 */
09873     { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa,
09874         0x2999de5,0x11013bd,0x02370c2,0x00e2234 },
09875       { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db,
09876         0x27edc86,0x08cd860,0x2471810,0x029798b },
09877       0 },
09878     /* 54 */
09879     { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c,
09880         0x23edf79,0x16b7033,0x0e6466a,0x00b11da },
09881       { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21,
09882         0x09d064b,0x14a8365,0x041f9e6,0x01ed799 },
09883       0 },
09884     /* 55 */
09885     { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3,
09886         0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc },
09887       { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c,
09888         0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e },
09889       0 },
09890     /* 56 */
09891     { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7,
09892         0x0df6825,0x2d4cc40,0x301875f,0x012f8da },
09893       { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8,
09894         0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 },
09895       0 },
09896     /* 57 */
09897     { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95,
09898         0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 },
09899       { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d,
09900         0x3702760,0x3f06257,0x03a5eb8,0x011c29a },
09901       0 },
09902     /* 58 */
09903     { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20,
09904         0x2441ee0,0x31bbf36,0x290c63d,0x0059041 },
09905       { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632,
09906         0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e },
09907       0 },
09908     /* 59 */
09909     { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5,
09910         0x2d5bb18,0x199ac4b,0x1e30771,0x020591a },
09911       { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144,
09912         0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 },
09913       0 },
09914     /* 60 */
09915     { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44,
09916         0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 },
09917       { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435,
09918         0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e },
09919       0 },
09920     /* 61 */
09921     { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45,
09922         0x3669531,0x296f42e,0x35a4c86,0x01ca049 },
09923       { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec,
09924         0x30f11a7,0x141658a,0x27ece14,0x00b018b },
09925       0 },
09926     /* 62 */
09927     { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811,
09928         0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d },
09929       { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047,
09930         0x2caf383,0x0aaf664,0x113554d,0x031c735 },
09931       0 },
09932     /* 63 */
09933     { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7,
09934         0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 },
09935       { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14,
09936         0x06a2228,0x16370be,0x3a72129,0x02e7b2c },
09937       0 },
09938     /* 64 */
09939     { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0,
09940         0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf },
09941       { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f,
09942         0x097273a,0x2b70718,0x00e5067,0x03025d1 },
09943       0 },
09944     /* 65 */
09945     { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b,
09946         0x3144591,0x07cc080,0x2d5915f,0x03c6bcc },
09947       { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f,
09948         0x13a4184,0x0722c18,0x130e2d4,0x008f43c },
09949       0 },
09950     /* 66 */
09951     { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654,
09952         0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 },
09953       { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d,
09954         0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc },
09955       0 },
09956     /* 67 */
09957     { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5,
09958         0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 },
09959       { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2,
09960         0x29864f6,0x1288073,0x254f6f7,0x00635b6 },
09961       0 },
09962     /* 68 */
09963     { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff,
09964         0x1409262,0x085a90c,0x0d97990,0x0142eed },
09965       { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d,
09966         0x28099b0,0x1270d06,0x11801fe,0x00ac400 },
09967       0 },
09968     /* 69 */
09969     { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e,
09970         0x04f3aad,0x2b831c5,0x19983fb,0x0375562 },
09971       { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a,
09972         0x3a9eaf4,0x1810669,0x151149d,0x039b931 },
09973       0 },
09974     /* 70 */
09975     { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839,
09976         0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 },
09977       { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de,
09978         0x3d51f53,0x245df01,0x2414982,0x0388bd0 },
09979       0 },
09980     /* 71 */
09981     { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3,
09982         0x072c1fb,0x1232725,0x33d52dc,0x03dc24d },
09983       { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f,
09984         0x26162a9,0x16c44e5,0x288e727,0x02f54b4 },
09985       0 },
09986     /* 72 */
09987     { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276,
09988         0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e },
09989       { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85,
09990         0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e },
09991       0 },
09992     /* 73 */
09993     { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20,
09994         0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf },
09995       { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2,
09996         0x39cb400,0x191e337,0x0a5ce9f,0x021529a },
09997       0 },
09998     /* 74 */
09999     { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353,
10000         0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a },
10001       { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf,
10002         0x1d3de8a,0x3bea423,0x11235e4,0x039260b },
10003       0 },
10004     /* 75 */
10005     { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650,
10006         0x345afa1,0x01267ec,0x3f616b2,0x02011ad },
10007       { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31,
10008         0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 },
10009       0 },
10010     /* 76 */
10011     { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e,
10012         0x2035653,0x384c74f,0x0bafab5,0x0025ec0 },
10013       { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661,
10014         0x33ba11d,0x3327980,0x07bafdb,0x03e571d },
10015       0 },
10016     /* 77 */
10017     { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1,
10018         0x3705b1d,0x360deba,0x01e566e,0x00d4498 },
10019       { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879,
10020         0x17eee27,0x2adad1d,0x1236068,0x003be5c },
10021       0 },
10022     /* 78 */
10023     { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4,
10024         0x163f744,0x2f25522,0x1333b4f,0x03f05b6 },
10025       { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d,
10026         0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 },
10027       0 },
10028     /* 79 */
10029     { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241,
10030         0x18bef7c,0x08c7762,0x063f59c,0x01015ec },
10031       { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6,
10032         0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 },
10033       0 },
10034     /* 80 */
10035     { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759,
10036         0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c },
10037       { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f,
10038         0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f },
10039       0 },
10040     /* 81 */
10041     { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3,
10042         0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 },
10043       { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3,
10044         0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 },
10045       0 },
10046     /* 82 */
10047     { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606,
10048         0x21b1761,0x2147ee0,0x21fc433,0x015c84d },
10049       { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d,
10050         0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 },
10051       0 },
10052     /* 83 */
10053     { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf,
10054         0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 },
10055       { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a,
10056         0x027fd9f,0x08b509b,0x3da2f94,0x01748ee },
10057       0 },
10058     /* 84 */
10059     { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b,
10060         0x018403d,0x3a40279,0x1cb91ec,0x030427e },
10061       { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126,
10062         0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd },
10063       0 },
10064     /* 85 */
10065     { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca,
10066         0x1ca665b,0x133051a,0x1525f1a,0x00a5647 },
10067       { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862,
10068         0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 },
10069       0 },
10070     /* 86 */
10071     { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614,
10072         0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 },
10073       { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e,
10074         0x23e0227,0x3052b0a,0x001add3,0x023ba18 },
10075       0 },
10076     /* 87 */
10077     { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9,
10078         0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 },
10079       { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451,
10080         0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be },
10081       0 },
10082     /* 88 */
10083     { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316,
10084         0x3b27cb5,0x29bc976,0x35d4073,0x024772a },
10085       { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd,
10086         0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 },
10087       0 },
10088     /* 89 */
10089     { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76,
10090         0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 },
10091       { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7,
10092         0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 },
10093       0 },
10094     /* 90 */
10095     { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a,
10096         0x231741a,0x3cf2784,0x0889a0d,0x02b036d },
10097       { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed,
10098         0x080e4f5,0x1d28b96,0x26194c9,0x0256992 },
10099       0 },
10100     /* 91 */
10101     { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb,
10102         0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 },
10103       { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88,
10104         0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 },
10105       0 },
10106     /* 92 */
10107     { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2,
10108         0x086196d,0x299e46b,0x0802cf6,0x03c6f32 },
10109       { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9,
10110         0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 },
10111       0 },
10112     /* 93 */
10113     { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f,
10114         0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 },
10115       { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea,
10116         0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c },
10117       0 },
10118     /* 94 */
10119     { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418,
10120         0x34ae070,0x0b06686,0x310616b,0x03b7b89 },
10121       { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3,
10122         0x0148f3a,0x35fd237,0x29b44eb,0x027f49f },
10123       0 },
10124     /* 95 */
10125     { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560,
10126         0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b },
10127       { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84,
10128         0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 },
10129       0 },
10130     /* 96 */
10131     { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77,
10132         0x1581a0f,0x1f99276,0x10ba16d,0x026af88 },
10133       { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296,
10134         0x1539871,0x112c31f,0x25787f3,0x01e2070 },
10135       0 },
10136     /* 97 */
10137     { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a,
10138         0x3465a2d,0x225023e,0x319a30e,0x00579b8 },
10139       { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434,
10140         0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e },
10141       0 },
10142     /* 98 */
10143     { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35,
10144         0x0158cab,0x195ddac,0x1766fe9,0x035cf42 },
10145       { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948,
10146         0x0bb8595,0x2942d77,0x3748f15,0x0249428 },
10147       0 },
10148     /* 99 */
10149     { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9,
10150         0x363289e,0x2302fc7,0x082c1cc,0x01dd050 },
10151       { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50,
10152         0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 },
10153       0 },
10154     /* 100 */
10155     { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10,
10156         0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a },
10157       { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692,
10158         0x066e078,0x1954974,0x2ff3c6e,0x00def28 },
10159       0 },
10160     /* 101 */
10161     { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef,
10162         0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f },
10163       { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593,
10164         0x0c0375e,0x3889b3e,0x24ab013,0x010de1b },
10165       0 },
10166     /* 102 */
10167     { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9,
10168         0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 },
10169       { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3,
10170         0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 },
10171       0 },
10172     /* 103 */
10173     { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355,
10174         0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 },
10175       { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3,
10176         0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 },
10177       0 },
10178     /* 104 */
10179     { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b,
10180         0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb },
10181       { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224,
10182         0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 },
10183       0 },
10184     /* 105 */
10185     { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027,
10186         0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 },
10187       { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f,
10188         0x231aa7e,0x3884395,0x0543da3,0x003d5a9 },
10189       0 },
10190     /* 106 */
10191     { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e,
10192         0x256ec0d,0x3898c69,0x3411969,0x02f63c5 },
10193       { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6,
10194         0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 },
10195       0 },
10196     /* 107 */
10197     { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c,
10198         0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 },
10199       { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287,
10200         0x3c5584b,0x26bc869,0x20af87a,0x0060f5d },
10201       0 },
10202     /* 108 */
10203     { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e,
10204         0x1b49634,0x35d508a,0x39dc269,0x0075105 },
10205       { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6,
10206         0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 },
10207       0 },
10208     /* 109 */
10209     { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09,
10210         0x00f6f2f,0x1c63b3d,0x2310362,0x019109e },
10211       { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b,
10212         0x1b2c6df,0x035b480,0x3496ae9,0x012766d },
10213       0 },
10214     /* 110 */
10215     { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290,
10216         0x02e2a02,0x151140b,0x01b3f60,0x0240df6 },
10217       { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4,
10218         0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 },
10219       0 },
10220     /* 111 */
10221     { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4,
10222         0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 },
10223       { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848,
10224         0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 },
10225       0 },
10226     /* 112 */
10227     { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249,
10228         0x07861cf,0x023fd05,0x1b0fdb8,0x031712f },
10229       { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a,
10230         0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 },
10231       0 },
10232     /* 113 */
10233     { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378,
10234         0x3ea3c40,0x2e90beb,0x19de503,0x003d5af },
10235       { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00,
10236         0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee },
10237       0 },
10238     /* 114 */
10239     { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8,
10240         0x17208dd,0x0be790a,0x122a07f,0x014dd95 },
10241       { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5,
10242         0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b },
10243       0 },
10244     /* 115 */
10245     { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e,
10246         0x2c000ea,0x099d547,0x2f17a1a,0x01df520 },
10247       { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791,
10248         0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 },
10249       0 },
10250     /* 116 */
10251     { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4,
10252         0x38ce9e6,0x0251986,0x172fbea,0x0337c11 },
10253       { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03,
10254         0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f },
10255       0 },
10256     /* 117 */
10257     { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61,
10258         0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 },
10259       { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d,
10260         0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 },
10261       0 },
10262     /* 118 */
10263     { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35,
10264         0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 },
10265       { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0,
10266         0x23c137f,0x1331475,0x092c067,0x0013b40 },
10267       0 },
10268     /* 119 */
10269     { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05,
10270         0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 },
10271       { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d,
10272         0x12c8a15,0x031063c,0x1889ed2,0x002133e },
10273       0 },
10274     /* 120 */
10275     { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e,
10276         0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 },
10277       { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082,
10278         0x112089d,0x107c753,0x24202d1,0x023853a },
10279       0 },
10280     /* 121 */
10281     { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8,
10282         0x19c194c,0x3456323,0x2372aa4,0x0165f86 },
10283       { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1,
10284         0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 },
10285       0 },
10286     /* 122 */
10287     { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78,
10288         0x363ff14,0x01f928c,0x17e309c,0x02f79ff },
10289       { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6,
10290         0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb },
10291       0 },
10292     /* 123 */
10293     { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5,
10294         0x220fd0d,0x04defe0,0x24658ec,0x035aa8b },
10295       { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb,
10296         0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 },
10297       0 },
10298     /* 124 */
10299     { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a,
10300         0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 },
10301       { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f,
10302         0x2cdf937,0x1a96574,0x0425220,0x0221a99 },
10303       0 },
10304     /* 125 */
10305     { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867,
10306         0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d },
10307       { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258,
10308         0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc },
10309       0 },
10310     /* 126 */
10311     { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6,
10312         0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b },
10313       { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62,
10314         0x335adf3,0x27220db,0x2f81642,0x0173ffe },
10315       0 },
10316     /* 127 */
10317     { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78,
10318         0x0a908f6,0x265300e,0x3237dc1,0x01b969a },
10319       { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de,
10320         0x11c0c67,0x2af3396,0x38d242d,0x0120688 },
10321       0 },
10322     /* 128 */
10323     { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4,
10324         0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 },
10325       { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2,
10326         0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de },
10327       0 },
10328     /* 129 */
10329     { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007,
10330         0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 },
10331       { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04,
10332         0x01cebaa,0x0be1595,0x175cc12,0x033a39a },
10333       0 },
10334     /* 130 */
10335     { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd,
10336         0x044f1d6,0x2d2a038,0x365250b,0x0023f78 },
10337       { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520,
10338         0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b },
10339       0 },
10340     /* 131 */
10341     { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c,
10342         0x334e9d1,0x00ab953,0x12e9077,0x03196fa },
10343       { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef,
10344         0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 },
10345       0 },
10346     /* 132 */
10347     { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa,
10348         0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 },
10349       { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0,
10350         0x170e261,0x180a27b,0x2fd58ec,0x014e22b },
10351       0 },
10352     /* 133 */
10353     { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98,
10354         0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 },
10355       { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330,
10356         0x29f51f8,0x0338838,0x24060f0,0x029a62e },
10357       0 },
10358     /* 134 */
10359     { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf,
10360         0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb },
10361       { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677,
10362         0x160d1bd,0x141d5af,0x2965851,0x034625a },
10363       0 },
10364     /* 135 */
10365     { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb,
10366         0x016b73a,0x368e462,0x20b7981,0x0099ec3 },
10367       { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6,
10368         0x10faf03,0x24e4d18,0x07aa111,0x02d538a },
10369       0 },
10370     /* 136 */
10371     { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd,
10372         0x0b31ab1,0x3539814,0x28b5f87,0x0212aec },
10373       { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884,
10374         0x355c1b6,0x15901d7,0x3671765,0x03950db },
10375       0 },
10376     /* 137 */
10377     { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8,
10378         0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d },
10379       { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8,
10380         0x1769600,0x31c055d,0x3df0ac1,0x00e9098 },
10381       0 },
10382     /* 138 */
10383     { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58,
10384         0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f },
10385       { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f,
10386         0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 },
10387       0 },
10388     /* 139 */
10389     { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de,
10390         0x3a57702,0x1677348,0x2123aad,0x010d8f8 },
10391       { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5,
10392         0x3086045,0x26313e6,0x15cd8bb,0x0210384 },
10393       0 },
10394     /* 140 */
10395     { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777,
10396         0x12a2721,0x35b538a,0x2bd30de,0x017835a },
10397       { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c,
10398         0x27bef91,0x289689a,0x0f42945,0x01f7a92 },
10399       0 },
10400     /* 141 */
10401     { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2,
10402         0x136c540,0x05b473f,0x2beebfd,0x02af0a8 },
10403       { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6,
10404         0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 },
10405       0 },
10406     /* 142 */
10407     { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770,
10408         0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 },
10409       { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef,
10410         0x230c724,0x1919146,0x10a465e,0x02084a8 },
10411       0 },
10412     /* 143 */
10413     { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2,
10414         0x1dff677,0x2c59334,0x371599c,0x02a9f2a },
10415       { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692,
10416         0x221292f,0x05f0c5d,0x073fe06,0x01038e0 },
10417       0 },
10418     /* 144 */
10419     { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738,
10420         0x3e53209,0x1549afe,0x300b305,0x038d811 },
10421       { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b,
10422         0x1ed4346,0x3857f73,0x277a3de,0x01950b5 },
10423       0 },
10424     /* 145 */
10425     { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd,
10426         0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 },
10427       { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4,
10428         0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 },
10429       0 },
10430     /* 146 */
10431     { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1,
10432         0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea },
10433       { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3,
10434         0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 },
10435       0 },
10436     /* 147 */
10437     { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc,
10438         0x0431737,0x04ba4b7,0x2028470,0x012e469 },
10439       { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3,
10440         0x34360ac,0x072ad23,0x0537232,0x01254d3 },
10441       0 },
10442     /* 148 */
10443     { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8,
10444         0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b },
10445       { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac,
10446         0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 },
10447       0 },
10448     /* 149 */
10449     { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828,
10450         0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 },
10451       { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b,
10452         0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 },
10453       0 },
10454     /* 150 */
10455     { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c,
10456         0x3a0311b,0x331a789,0x0b9729e,0x0201ebf },
10457       { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e,
10458         0x3afa594,0x3363217,0x0d16568,0x01d46b7 },
10459       0 },
10460     /* 151 */
10461     { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b,
10462         0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 },
10463       { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c,
10464         0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc },
10465       0 },
10466     /* 152 */
10467     { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4,
10468         0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 },
10469       { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a,
10470         0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c },
10471       0 },
10472     /* 153 */
10473     { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7,
10474         0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f },
10475       { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d,
10476         0x3dbf03c,0x167d857,0x02e0b44,0x013afab },
10477       0 },
10478     /* 154 */
10479     { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176,
10480         0x087633f,0x2bff322,0x07b2c1b,0x037e662 },
10481       { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b,
10482         0x0dd511b,0x101625e,0x0a7779f,0x009ec10 },
10483       0 },
10484     /* 155 */
10485     { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f,
10486         0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 },
10487       { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408,
10488         0x36f1be0,0x0d065b0,0x012d046,0x0025623 },
10489       0 },
10490     /* 156 */
10491     { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca,
10492         0x1cf2808,0x1922e55,0x1a9db9f,0x020721e },
10493       { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e,
10494         0x388a419,0x06f1113,0x0f55fc1,0x03e8352 },
10495       0 },
10496     /* 157 */
10497     { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5,
10498         0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 },
10499       { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29,
10500         0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae },
10501       0 },
10502     /* 158 */
10503     { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4,
10504         0x252dd64,0x095b8b6,0x0872b74,0x022f1da },
10505       { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee,
10506         0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc },
10507       0 },
10508     /* 159 */
10509     { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230,
10510         0x02b4eae,0x2713668,0x0f71b06,0x0132d18 },
10511       { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859,
10512         0x075f66b,0x1488f85,0x10895ef,0x014b035 },
10513       0 },
10514     /* 160 */
10515     { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c,
10516         0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 },
10517       { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf,
10518         0x20b7651,0x017d077,0x0c7e397,0x00e0365 },
10519       0 },
10520     /* 161 */
10521     { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21,
10522         0x3a29776,0x0debdac,0x171de26,0x00b38f8 },
10523       { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b,
10524         0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 },
10525       0 },
10526     /* 162 */
10527     { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2,
10528         0x17626e7,0x275502f,0x14742c6,0x03285dd },
10529       { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a,
10530         0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 },
10531       0 },
10532     /* 163 */
10533     { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0,
10534         0x08cfa73,0x12666aa,0x3170a1f,0x021c829 },
10535       { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0,
10536         0x28d00ab,0x224f23a,0x329c769,0x025c67b },
10537       0 },
10538     /* 164 */
10539     { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409,
10540         0x3999d06,0x013075f,0x176e157,0x02941ca },
10541       { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97,
10542         0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 },
10543       0 },
10544     /* 165 */
10545     { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b,
10546         0x272e897,0x20b91ad,0x2aa202c,0x02a2201 },
10547       { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5,
10548         0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef },
10549       0 },
10550     /* 166 */
10551     { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0,
10552         0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb },
10553       { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2,
10554         0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af },
10555       0 },
10556     /* 167 */
10557     { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61,
10558         0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f },
10559       { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73,
10560         0x31dd489,0x02c600f,0x29d9f81,0x01ba201 },
10561       0 },
10562     /* 168 */
10563     { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61,
10564         0x1b02218,0x2b44e14,0x3eadefa,0x029c88a },
10565       { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e,
10566         0x11eca56,0x31a4247,0x123bc2a,0x02fa535 },
10567       0 },
10568     /* 169 */
10569     { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f,
10570         0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 },
10571       { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736,
10572         0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 },
10573       0 },
10574     /* 170 */
10575     { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5,
10576         0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 },
10577       { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f,
10578         0x25475dc,0x2ef4151,0x3c46860,0x0238934 },
10579       0 },
10580     /* 171 */
10581     { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1,
10582         0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 },
10583       { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8,
10584         0x1d14b44,0x0d74716,0x049f57d,0x030024b },
10585       0 },
10586     /* 172 */
10587     { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d,
10588         0x2d30d19,0x2513718,0x2347286,0x01d7901 },
10589       { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d,
10590         0x051a162,0x2161a47,0x258c784,0x016df2d },
10591       0 },
10592     /* 173 */
10593     { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce,
10594         0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f },
10595       { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c,
10596         0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 },
10597       0 },
10598     /* 174 */
10599     { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff,
10600         0x15e9a36,0x2121129,0x0199ef2,0x017088a },
10601       { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf,
10602         0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d },
10603       0 },
10604     /* 175 */
10605     { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69,
10606         0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 },
10607       { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa,
10608         0x3861405,0x1b49f62,0x0c945ca,0x02ab25f },
10609       0 },
10610     /* 176 */
10611     { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01,
10612         0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 },
10613       { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81,
10614         0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e },
10615       0 },
10616     /* 177 */
10617     { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982,
10618         0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda },
10619       { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca,
10620         0x336abf6,0x00697fc,0x105ce76,0x0253742 },
10621       0 },
10622     /* 178 */
10623     { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4,
10624         0x004df9c,0x0aba409,0x067d25c,0x02481de },
10625       { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63,
10626         0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 },
10627       0 },
10628     /* 179 */
10629     { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43,
10630         0x37db3fe,0x292c62b,0x362dbbf,0x006e52a },
10631       { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0,
10632         0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 },
10633       0 },
10634     /* 180 */
10635     { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7,
10636         0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb },
10637       { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c,
10638         0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 },
10639       0 },
10640     /* 181 */
10641     { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd,
10642         0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 },
10643       { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac,
10644         0x1b5feb5,0x344eb84,0x0de2732,0x0347208 },
10645       0 },
10646     /* 182 */
10647     { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4,
10648         0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e },
10649       { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52,
10650         0x0de4af7,0x3319126,0x15e6412,0x00e65ff },
10651       0 },
10652     /* 183 */
10653     { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578,
10654         0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 },
10655       { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1,
10656         0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 },
10657       0 },
10658     /* 184 */
10659     { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3,
10660         0x18429ed,0x025fa01,0x11d6e93,0x03b016b },
10661       { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39,
10662         0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 },
10663       0 },
10664     /* 185 */
10665     { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5,
10666         0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 },
10667       { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e,
10668         0x147f346,0x01947e2,0x3017228,0x0365942 },
10669       0 },
10670     /* 186 */
10671     { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f,
10672         0x3532081,0x2de8a98,0x19a804e,0x005359a },
10673       { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb,
10674         0x1eef901,0x1662ad3,0x0410d43,0x01bd31a },
10675       0 },
10676     /* 187 */
10677     { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2,
10678         0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 },
10679       { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0,
10680         0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 },
10681       0 },
10682     /* 188 */
10683     { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc,
10684         0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 },
10685       { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca,
10686         0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 },
10687       0 },
10688     /* 189 */
10689     { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7,
10690         0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 },
10691       { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6,
10692         0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a },
10693       0 },
10694     /* 190 */
10695     { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba,
10696         0x3c76989,0x0cf2d24,0x032f67b,0x01e005f },
10697       { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693,
10698         0x37bf552,0x07013e2,0x054de5c,0x014f341 },
10699       0 },
10700     /* 191 */
10701     { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea,
10702         0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 },
10703       { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c,
10704         0x15a7702,0x2464d06,0x2a47ae6,0x006db90 },
10705       0 },
10706     /* 192 */
10707     { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358,
10708         0x35d8879,0x2f4f55a,0x214187f,0x0008936 },
10709       { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b,
10710         0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 },
10711       0 },
10712     /* 193 */
10713     { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40,
10714         0x097c7a9,0x0590d7d,0x2022d33,0x018dbac },
10715       { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407,
10716         0x3658a29,0x22eb3d0,0x2b63921,0x022113b },
10717       0 },
10718     /* 194 */
10719     { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6,
10720         0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 },
10721       { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92,
10722         0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b },
10723       0 },
10724     /* 195 */
10725     { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245,
10726         0x24f2e71,0x322cb27,0x17675e7,0x02e643f },
10727       { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4,
10728         0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f },
10729       0 },
10730     /* 196 */
10731     { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2,
10732         0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 },
10733       { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829,
10734         0x0825275,0x006e85f,0x35e94b0,0x016af58 },
10735       0 },
10736     /* 197 */
10737     { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837,
10738         0x3cd5ea4,0x2d84292,0x321846a,0x016656f },
10739       { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9,
10740         0x3baf447,0x1047a6c,0x0a2d483,0x01130cb },
10741       0 },
10742     /* 198 */
10743     { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6,
10744         0x32124b7,0x295314f,0x2fb8082,0x007c858 },
10745       { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977,
10746         0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 },
10747       0 },
10748     /* 199 */
10749     { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315,
10750         0x1cfe37b,0x36fe715,0x343926e,0x02c6603 },
10751       { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18,
10752         0x08ead48,0x23915bc,0x19b3744,0x00a0a2b },
10753       0 },
10754     /* 200 */
10755     { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c,
10756         0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc },
10757       { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4,
10758         0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 },
10759       0 },
10760     /* 201 */
10761     { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2,
10762         0x1a65e01,0x379300d,0x35920b9,0x012c8fb },
10763       { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba,
10764         0x127863a,0x26939cd,0x134ddf4,0x01375c5 },
10765       0 },
10766     /* 202 */
10767     { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8,
10768         0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c },
10769       { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260,
10770         0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 },
10771       0 },
10772     /* 203 */
10773     { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11,
10774         0x2142408,0x3694366,0x25136ab,0x03b5f1d },
10775       { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b,
10776         0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d },
10777       0 },
10778     /* 204 */
10779     { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e,
10780         0x2479dea,0x3602d24,0x17e87e0,0x0060069 },
10781       { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef,
10782         0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 },
10783       0 },
10784     /* 205 */
10785     { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c,
10786         0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 },
10787       { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a,
10788         0x25244e6,0x23bdd79,0x1aee117,0x00c01ae },
10789       0 },
10790     /* 206 */
10791     { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e,
10792         0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 },
10793       { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010,
10794         0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 },
10795       0 },
10796     /* 207 */
10797     { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971,
10798         0x1043014,0x2ebb2be,0x1c1097d,0x027d667 },
10799       { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8,
10800         0x1605a41,0x12de109,0x0e08a29,0x01f554a },
10801       0 },
10802     /* 208 */
10803     { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20,
10804         0x27d79b0,0x05cc3ef,0x201303f,0x00a333a },
10805       { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e,
10806         0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb },
10807       0 },
10808     /* 209 */
10809     { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192,
10810         0x13a1719,0x165af51,0x09db7a9,0x0277be0 },
10811       { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862,
10812         0x106a6d6,0x23395f5,0x0b06880,0x000d5fe },
10813       0 },
10814     /* 210 */
10815     { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832,
10816         0x309874c,0x2621d28,0x2dd1b48,0x0392806 },
10817       { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1,
10818         0x295c717,0x330c29c,0x179ce10,0x0119b5f },
10819       0 },
10820     /* 211 */
10821     { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678,
10822         0x24fd288,0x2b46a43,0x122451e,0x0319ca9 },
10823       { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9,
10824         0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 },
10825       0 },
10826     /* 212 */
10827     { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d,
10828         0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e },
10829       { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c,
10830         0x2c37383,0x32a4631,0x022ad82,0x00d22b9 },
10831       0 },
10832     /* 213 */
10833     { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8,
10834         0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 },
10835       { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2,
10836         0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e },
10837       0 },
10838     /* 214 */
10839     { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c,
10840         0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d },
10841       { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8,
10842         0x3477600,0x3201523,0x32ecafc,0x03d3506 },
10843       0 },
10844     /* 215 */
10845     { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba,
10846         0x0e29741,0x03c41ab,0x0eae016,0x0060209 },
10847       { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1,
10848         0x380e345,0x1b49608,0x23983cf,0x019f97d },
10849       0 },
10850     /* 216 */
10851     { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2,
10852         0x2f9276b,0x07f1951,0x09a04c1,0x027fbde },
10853       { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb,
10854         0x2b3fb06,0x1388329,0x181a99c,0x02f2030 },
10855       0 },
10856     /* 217 */
10857     { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697,
10858         0x0e21715,0x37c431e,0x2629f8c,0x01249c3 },
10859       { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e,
10860         0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 },
10861       0 },
10862     /* 218 */
10863     { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949,
10864         0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e },
10865       { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b,
10866         0x26f7bf5,0x11ce979,0x0b408df,0x0366850 },
10867       0 },
10868     /* 219 */
10869     { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7,
10870         0x2b83913,0x12c6b81,0x18d623f,0x01d8301 },
10871       { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a,
10872         0x204110f,0x0102538,0x02d8f19,0x01c5066 },
10873       0 },
10874     /* 220 */
10875     { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757,
10876         0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 },
10877       { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26,
10878         0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 },
10879       0 },
10880     /* 221 */
10881     { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989,
10882         0x1c807dc,0x3c149a4,0x1141543,0x02906bb },
10883       { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd,
10884         0x1660be8,0x183be17,0x1dd86fb,0x035ba70 },
10885       0 },
10886     /* 222 */
10887     { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f,
10888         0x112bb7b,0x3e3086e,0x210ae9b,0x027271b },
10889       { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c,
10890         0x344b23a,0x186da27,0x19640fa,0x0159af5 },
10891       0 },
10892     /* 223 */
10893     { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91,
10894         0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c },
10895       { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95,
10896         0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 },
10897       0 },
10898     /* 224 */
10899     { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3,
10900         0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b },
10901       { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f,
10902         0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 },
10903       0 },
10904     /* 225 */
10905     { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757,
10906         0x13e7374,0x3b47855,0x0b86e56,0x02f354c },
10907       { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed,
10908         0x3a97611,0x0d07736,0x3cf6979,0x02424a0 },
10909       0 },
10910     /* 226 */
10911     { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675,
10912         0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad },
10913       { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0,
10914         0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb },
10915       0 },
10916     /* 227 */
10917     { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2,
10918         0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 },
10919       { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7,
10920         0x2197414,0x022a50b,0x0fd94ba,0x0007b0f },
10921       0 },
10922     /* 228 */
10923     { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362,
10924         0x21d60f7,0x193d93d,0x1c04754,0x0346b2c },
10925       { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb,
10926         0x38a8133,0x09b0937,0x39d4056,0x01f7341 },
10927       0 },
10928     /* 229 */
10929     { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456,
10930         0x0f79718,0x066e346,0x189377d,0x002fd4d },
10931       { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8,
10932         0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 },
10933       0 },
10934     /* 230 */
10935     { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf,
10936         0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 },
10937       { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e,
10938         0x067082a,0x2ec05fd,0x368b303,0x03cad4b },
10939       0 },
10940     /* 231 */
10941     { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66,
10942         0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa },
10943       { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14,
10944         0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 },
10945       0 },
10946     /* 232 */
10947     { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901,
10948         0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 },
10949       { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2,
10950         0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 },
10951       0 },
10952     /* 233 */
10953     { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab,
10954         0x0505db0,0x08f6454,0x05c6285,0x014e880 },
10955       { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b,
10956         0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 },
10957       0 },
10958     /* 234 */
10959     { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f,
10960         0x0005ae8,0x286d851,0x07f39c9,0x000070b },
10961       { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429,
10962         0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e },
10963       0 },
10964     /* 235 */
10965     { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef,
10966         0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 },
10967       { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0,
10968         0x1591a60,0x0768d45,0x257873e,0x00f36e0 },
10969       0 },
10970     /* 236 */
10971     { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437,
10972         0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 },
10973       { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0,
10974         0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd },
10975       0 },
10976     /* 237 */
10977     { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f,
10978         0x380c46c,0x3358810,0x27df6c5,0x01ca466 },
10979       { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615,
10980         0x2c0e777,0x0226b8c,0x3803624,0x0179e45 },
10981       0 },
10982     /* 238 */
10983     { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2,
10984         0x226bea5,0x0beaff3,0x371e24c,0x0138294 },
10985       { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c,
10986         0x3370ae4,0x013bae3,0x018d566,0x03d7fde },
10987       0 },
10988     /* 239 */
10989     { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405,
10990         0x16bd0a2,0x1799082,0x0e9533f,0x039077c },
10991       { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1,
10992         0x26e1927,0x0557c21,0x16eac4f,0x023b75f },
10993       0 },
10994     /* 240 */
10995     { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b,
10996         0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 },
10997       { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6,
10998         0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 },
10999       0 },
11000     /* 241 */
11001     { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2,
11002         0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 },
11003       { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca,
11004         0x163605d,0x2b85807,0x17c1c54,0x03cc725 },
11005       0 },
11006     /* 242 */
11007     { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052,
11008         0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 },
11009       { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b,
11010         0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf },
11011       0 },
11012     /* 243 */
11013     { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3,
11014         0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f },
11015       { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc,
11016         0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c },
11017       0 },
11018     /* 244 */
11019     { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6,
11020         0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea },
11021       { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1,
11022         0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c },
11023       0 },
11024     /* 245 */
11025     { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756,
11026         0x050418d,0x3c73db6,0x1bb0c30,0x01673eb },
11027       { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219,
11028         0x2ec58c9,0x12d9fab,0x362354a,0x016eafc },
11029       0 },
11030     /* 246 */
11031     { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016,
11032         0x2970f1b,0x323c1c0,0x2a79026,0x0186231 },
11033       { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d,
11034         0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 },
11035       0 },
11036     /* 247 */
11037     { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440,
11038         0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 },
11039       { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af,
11040         0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 },
11041       0 },
11042     /* 248 */
11043     { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a,
11044         0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc },
11045       { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd,
11046         0x136c498,0x1e0d845,0x347ff25,0x01a1de7 },
11047       0 },
11048     /* 249 */
11049     { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4,
11050         0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c },
11051       { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c,
11052         0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf },
11053       0 },
11054     /* 250 */
11055     { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af,
11056         0x10cd913,0x28daa6f,0x3fcb732,0x000a04a },
11057       { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586,
11058         0x1683982,0x25abe87,0x0c9fe53,0x001ce47 },
11059       0 },
11060     /* 251 */
11061     { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab,
11062         0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a },
11063       { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a,
11064         0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac },
11065       0 },
11066     /* 252 */
11067     { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07,
11068         0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a },
11069       { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383,
11070         0x313ed56,0x134c9cc,0x168d5bb,0x033310a },
11071       0 },
11072     /* 253 */
11073     { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2,
11074         0x0076406,0x1c73f70,0x342aa48,0x03851fa },
11075       { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a,
11076         0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 },
11077       0 },
11078     /* 254 */
11079     { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf,
11080         0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 },
11081       { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec,
11082         0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 },
11083       0 },
11084     /* 255 */
11085     { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b,
11086         0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c },
11087       { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627,
11088         0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 },
11089       0 },
11090 };
11091 
11092 /* Multiply the base point of P256 by the scalar and return the result.
11093  * If map is true then convert result to affine co-ordinates.
11094  *
11095  * r     Resulting point.
11096  * k     Scalar to multiply by.
11097  * map   Indicates whether to convert result to affine.
11098  * heap  Heap to use for allocation.
11099  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
11100  */
11101 static int sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k,
11102         int map, void* heap)
11103 {
11104     return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table,
11105                                       k, map, heap);
11106 }
11107 
11108 #endif
11109 
11110 /* Multiply the base point of P256 by the scalar and return the result.
11111  * If map is true then convert result to affine co-ordinates.
11112  *
11113  * km    Scalar to multiply by.
11114  * r     Resulting point.
11115  * map   Indicates whether to convert result to affine.
11116  * heap  Heap to use for allocation.
11117  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
11118  */
11119 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
11120 {
11121 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11122     sp_point p;
11123     sp_digit kd[10];
11124 #endif
11125     sp_point* point;
11126     sp_digit* k = NULL;
11127     int err = MP_OKAY;
11128 #ifdef HAVE_INTEL_AVX2
11129     word32 cpuid_flags = cpuid_get_flags();
11130 #endif
11131 
11132     err = sp_ecc_point_new(heap, p, point);
11133 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11134     if (err == MP_OKAY) {
11135         k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
11136         if (k == NULL)
11137             err = MEMORY_E;
11138     }
11139 #else
11140     k = kd;
11141 #endif
11142     if (err == MP_OKAY) {
11143         sp_256_from_mp(k, 10, km);
11144 
11145 #ifdef HAVE_INTEL_AVX2
11146         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11147             err = sp_256_ecc_mulmod_base_avx2_10(point, k, map, heap);
11148         else
11149 #endif
11150             err = sp_256_ecc_mulmod_base_10(point, k, map, heap);
11151     }
11152     if (err == MP_OKAY)
11153         err = sp_256_point_to_ecc_point_10(point, r);
11154 
11155 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11156     if (k != NULL)
11157         XFREE(k, heap, DYNAMIC_TYPE_ECC);
11158 #endif
11159     sp_ecc_point_free(point, 0, heap);
11160 
11161     return err;
11162 }
11163 
11164 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
11165 /* Returns 1 if the number of zero.
11166  * Implementation is constant time.
11167  *
11168  * a  Number to check.
11169  * returns 1 if the number is zero and 0 otherwise.
11170  */
11171 static int sp_256_iszero_10(const sp_digit* a)
11172 {
11173     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
11174             a[8] | a[9]) == 0;
11175 }
11176 
11177 #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
11178 /* Add 1 to a. (a = a + 1)
11179  *
11180  * r  A single precision integer.
11181  * a  A single precision integer.
11182  */
11183 SP_NOINLINE static void sp_256_add_one_10(sp_digit* a)
11184 {
11185     a[0]++;
11186     sp_256_norm_10(a);
11187 }
11188 
11189 /* Read big endian unsigned byte aray into r.
11190  *
11191  * r  A single precision integer.
11192  * a  Byte array.
11193  * n  Number of bytes in array to read.
11194  */
11195 static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
11196 {
11197     int i, j = 0, s = 0;
11198 
11199     r[0] = 0;
11200     for (i = n-1; i >= 0; i--) {
11201         r[j] |= ((sp_digit)a[i]) << s;
11202         if (s >= 18) {
11203             r[j] &= 0x3ffffff;
11204             s = 26 - s;
11205             if (j + 1 >= max)
11206                 break;
11207             r[++j] = a[i] >> s;
11208             s = 8 - s;
11209         }
11210         else
11211             s += 8;
11212     }
11213 
11214     for (j++; j < max; j++)
11215         r[j] = 0;
11216 }
11217 
11218 /* Generates a scalar that is in the range 1..order-1.
11219  *
11220  * rng  Random number generator.
11221  * k    Scalar value.
11222  * returns RNG failures, MEMORY_E when memory allocation fails and
11223  * MP_OKAY on success.
11224  */
11225 static int sp_256_ecc_gen_k_10(WC_RNG* rng, sp_digit* k)
11226 {
11227     int err;
11228     byte buf[32];
11229 
11230     do {
11231         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
11232         if (err == 0) {
11233             sp_256_from_bin(k, 10, buf, sizeof(buf));
11234             if (sp_256_cmp_10(k, p256_order2) < 0) {
11235                 sp_256_add_one_10(k);
11236                 break;
11237             }
11238         }
11239     }
11240     while (err == 0);
11241 
11242     return err;
11243 }
11244 
11245 /* Makes a random EC key pair.
11246  *
11247  * rng   Random number generator.
11248  * priv  Generated private value.
11249  * pub   Generated public point.
11250  * heap  Heap to use for allocation.
11251  * returns ECC_INF_E when the point does not have the correct order, RNG
11252  * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
11253  */
11254 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
11255 {
11256 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11257     sp_point p;
11258     sp_digit kd[10];
11259 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
11260     sp_point inf;
11261 #endif
11262 #endif
11263     sp_point* point;
11264     sp_digit* k = NULL;
11265 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
11266     sp_point* infinity;
11267 #endif
11268     int err;
11269 #ifdef HAVE_INTEL_AVX2
11270     word32 cpuid_flags = cpuid_get_flags();
11271 #endif
11272 
11273     (void)heap;
11274 
11275     err = sp_ecc_point_new(heap, p, point);
11276 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
11277     if (err == MP_OKAY)
11278         err = sp_ecc_point_new(heap, inf, infinity);
11279 #endif
11280 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11281     if (err == MP_OKAY) {
11282         k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
11283         if (k == NULL)
11284             err = MEMORY_E;
11285     }
11286 #else
11287     k = kd;
11288 #endif
11289 
11290     if (err == MP_OKAY)
11291         err = sp_256_ecc_gen_k_10(rng, k);
11292     if (err == MP_OKAY) {
11293 #ifdef HAVE_INTEL_AVX2
11294         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11295             err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, NULL);
11296         else
11297 #endif
11298             err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
11299     }
11300 
11301 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
11302     if (err == MP_OKAY) {
11303 #ifdef HAVE_INTEL_AVX2
11304         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
11305             err = sp_256_ecc_mulmod_avx2_10(infinity, point, p256_order, 1,
11306                                                                           NULL);
11307         }
11308         else
11309 #endif
11310             err = sp_256_ecc_mulmod_10(infinity, point, p256_order, 1, NULL);
11311     }
11312     if (err == MP_OKAY) {
11313         if (!sp_256_iszero_10(point->x) || !sp_256_iszero_10(point->y))
11314             err = ECC_INF_E;
11315     }
11316 #endif
11317 
11318     if (err == MP_OKAY)
11319         err = sp_256_to_mp(k, priv);
11320     if (err == MP_OKAY)
11321         err = sp_256_point_to_ecc_point_10(point, pub);
11322 
11323 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11324     if (k != NULL)
11325         XFREE(k, heap, DYNAMIC_TYPE_ECC);
11326 #endif
11327 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
11328     sp_ecc_point_free(infinity, 1, heap);
11329 #endif
11330     sp_ecc_point_free(point, 1, heap);
11331 
11332     return err;
11333 }
11334 
11335 #ifdef HAVE_ECC_DHE
11336 /* Write r as big endian to byte aray.
11337  * Fixed length number of bytes written: 32
11338  *
11339  * r  A single precision integer.
11340  * a  Byte array.
11341  */
11342 static void sp_256_to_bin(sp_digit* r, byte* a)
11343 {
11344     int i, j, s = 0, b;
11345 
11346     for (i=0; i<9; i++) {
11347         r[i+1] += r[i] >> 26;
11348         r[i] &= 0x3ffffff;
11349     }
11350     j = 256 / 8 - 1;
11351     a[j] = 0;
11352     for (i=0; i<10 && j>=0; i++) {
11353         b = 0;
11354         a[j--] |= r[i] << s; b += 8 - s;
11355         if (j < 0)
11356             break;
11357         while (b < 26) {
11358             a[j--] = r[i] >> b; b += 8;
11359             if (j < 0)
11360                 break;
11361         }
11362         s = 8 - (b - 26);
11363         if (j >= 0)
11364             a[j] = 0;
11365         if (s != 0)
11366             j++;
11367     }
11368 }
11369 
11370 /* Multiply the point by the scalar and serialize the X ordinate.
11371  * The number is 0 padded to maximum size on output.
11372  *
11373  * priv    Scalar to multiply the point by.
11374  * pub     Point to multiply.
11375  * out     Buffer to hold X ordinate.
11376  * outLen  On entry, size of the buffer in bytes.
11377  *         On exit, length of data in buffer in bytes.
11378  * heap    Heap to use for allocation.
11379  * returns BUFFER_E if the buffer is to small for output size,
11380  * MEMORY_E when memory allocation fails and MP_OKAY on success.
11381  */
11382 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
11383                           word32* outLen, void* heap)
11384 {
11385 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
11386     sp_point p;
11387     sp_digit kd[10];
11388 #endif
11389     sp_point* point = NULL;
11390     sp_digit* k = NULL;
11391     int err = MP_OKAY;
11392 #ifdef HAVE_INTEL_AVX2
11393     word32 cpuid_flags = cpuid_get_flags();
11394 #endif
11395 
11396     if (*outLen < 32)
11397         err = BUFFER_E;
11398 
11399     if (err == MP_OKAY)
11400         err = sp_ecc_point_new(heap, p, point);
11401 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11402     if (err == MP_OKAY) {
11403         k = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
11404         if (k == NULL)
11405             err = MEMORY_E;
11406     }
11407 #else
11408     k = kd;
11409 #endif
11410 
11411     if (err == MP_OKAY) {
11412         sp_256_from_mp(k, 10, priv);
11413         sp_256_point_from_ecc_point_10(point, pub);
11414 #ifdef HAVE_INTEL_AVX2
11415         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11416             err = sp_256_ecc_mulmod_avx2_10(point, point, k, 1, heap);
11417         else
11418 #endif
11419             err = sp_256_ecc_mulmod_10(point, point, k, 1, heap);
11420     }
11421     if (err == MP_OKAY) {
11422         sp_256_to_bin(point->x, out);
11423         *outLen = 32;
11424     }
11425 
11426 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11427     if (k != NULL)
11428         XFREE(k, heap, DYNAMIC_TYPE_ECC);
11429 #endif
11430     sp_ecc_point_free(point, 0, heap);
11431 
11432     return err;
11433 }
11434 #endif /* HAVE_ECC_DHE */
11435 
11436 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
11437 #ifdef HAVE_INTEL_AVX2
11438 #endif /* HAVE_INTEL_AVX2 */
11439 #endif
11440 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
11441 /* Multiply a by scalar b into r. (r = a * b)
11442  *
11443  * r  A single precision integer.
11444  * a  A single precision integer.
11445  * b  A scalar.
11446  */
11447 SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a,
11448     const sp_digit b)
11449 {
11450 #ifdef WOLFSSL_SP_SMALL
11451     int64_t tb = b;
11452     int64_t t = 0;
11453     int i;
11454 
11455     for (i = 0; i < 10; i++) {
11456         t += tb * a[i];
11457         r[i] = t & 0x3ffffff;
11458         t >>= 26;
11459     }
11460     r[10] = (sp_digit)t;
11461 #else
11462     int64_t tb = b;
11463     int64_t t[10];
11464 
11465     t[ 0] = tb * a[ 0];
11466     t[ 1] = tb * a[ 1];
11467     t[ 2] = tb * a[ 2];
11468     t[ 3] = tb * a[ 3];
11469     t[ 4] = tb * a[ 4];
11470     t[ 5] = tb * a[ 5];
11471     t[ 6] = tb * a[ 6];
11472     t[ 7] = tb * a[ 7];
11473     t[ 8] = tb * a[ 8];
11474     t[ 9] = tb * a[ 9];
11475     r[ 0] =                           (t[ 0] & 0x3ffffff);
11476     r[ 1] = (sp_digit)(t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
11477     r[ 2] = (sp_digit)(t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
11478     r[ 3] = (sp_digit)(t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
11479     r[ 4] = (sp_digit)(t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
11480     r[ 5] = (sp_digit)(t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
11481     r[ 6] = (sp_digit)(t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
11482     r[ 7] = (sp_digit)(t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
11483     r[ 8] = (sp_digit)(t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
11484     r[ 9] = (sp_digit)(t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
11485     r[10] = (sp_digit)(t[ 9] >> 26);
11486 #endif /* WOLFSSL_SP_SMALL */
11487 }
11488 
11489 /* Divide d in a and put remainder into r (m*d + r = a)
11490  * m is not calculated as it is not needed at this time.
11491  *
11492  * a  Nmber to be divided.
11493  * d  Number to divide with.
11494  * m  Multiplier result.
11495  * r  Remainder from the division.
11496  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
11497  */
11498 static int sp_256_div_10(sp_digit* a, sp_digit* d, sp_digit* m,
11499         sp_digit* r)
11500 {
11501     int i;
11502     int64_t d1;
11503     sp_digit div, r1;
11504 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11505     sp_digit* td;
11506 #else
11507     sp_digit t1d[20], t2d[10 + 1];
11508 #endif
11509     sp_digit* t1;
11510     sp_digit* t2;
11511     int err = MP_OKAY;
11512 
11513 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11514     td = XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER);
11515     if (td != NULL) {
11516         t1 = td;
11517         t2 = td + 2 * 10;
11518     }
11519     else
11520         err = MEMORY_E;
11521 #else
11522     t1 = t1d;
11523     t2 = t2d;
11524 #endif
11525 
11526     (void)m;
11527 
11528     if (err == MP_OKAY) {
11529         div = d[9];
11530         XMEMCPY(t1, a, sizeof(*t1) * 2 * 10);
11531         for (i=9; i>=0; i--) {
11532             t1[10 + i] += t1[10 + i - 1] >> 26;
11533             t1[10 + i - 1] &= 0x3ffffff;
11534             d1 = t1[10 + i];
11535             d1 <<= 26;
11536             d1 += t1[10 + i - 1];
11537             r1 = (sp_digit)(d1 / div);
11538 
11539             sp_256_mul_d_10(t2, d, r1);
11540             sp_256_sub_10(&t1[i], &t1[i], t2);
11541             t1[10 + i] -= t2[10];
11542             t1[10 + i] += t1[10 + i - 1] >> 26;
11543             t1[10 + i - 1] &= 0x3ffffff;
11544             r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / div;
11545             r1++;
11546             sp_256_mul_d_10(t2, d, r1);
11547             sp_256_add_10(&t1[i], &t1[i], t2);
11548             t1[10 + i] += t1[10 + i - 1] >> 26;
11549             t1[10 + i - 1] &= 0x3ffffff;
11550         }
11551         t1[10 - 1] += t1[10 - 2] >> 26;
11552         t1[10 - 2] &= 0x3ffffff;
11553         d1 = t1[10 - 1];
11554         r1 = (sp_digit)(d1 / div);
11555 
11556         sp_256_mul_d_10(t2, d, r1);
11557         sp_256_sub_10(t1, t1, t2);
11558         XMEMCPY(r, t1, sizeof(*r) * 2 * 10);
11559         for (i=0; i<8; i++) {
11560             r[i+1] += r[i] >> 26;
11561             r[i] &= 0x3ffffff;
11562         }
11563         sp_256_cond_add_10(r, r, d, 0 - (r[9] < 0));
11564     }
11565 
11566 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11567     if (td != NULL)
11568         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
11569 #endif
11570 
11571     return err;
11572 }
11573 
11574 /* Reduce a modulo m into r. (r = a mod m)
11575  *
11576  * r  A single precision number that is the reduced result.
11577  * a  A single precision number that is to be reduced.
11578  * m  A single precision number that is the modulus to reduce with.
11579  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
11580  */
11581 static int sp_256_mod_10(sp_digit* r, sp_digit* a, sp_digit* m)
11582 {
11583     return sp_256_div_10(a, m, NULL, r);
11584 }
11585 
11586 #endif
11587 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
11588 #ifdef WOLFSSL_SP_SMALL
11589 /* Order-2 for the P256 curve. */
11590 static const uint32_t p256_order_2[8] = {
11591     0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
11592     0x00000000,0xffffffff
11593 };
11594 #else
11595 /* The low half of the order-2 of the P256 curve. */
11596 static const uint32_t p256_order_low[4] = {
11597     0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad
11598 };
11599 #endif /* WOLFSSL_SP_SMALL */
11600 
11601 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
11602  *
11603  * r  Result of the multiplication.
11604  * a  First operand of the multiplication.
11605  * b  Second operand of the multiplication.
11606  */
11607 static void sp_256_mont_mul_order_10(sp_digit* r, sp_digit* a, sp_digit* b)
11608 {
11609     sp_256_mul_10(r, a, b);
11610     sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
11611 }
11612 
11613 /* Square number mod the order of P256 curve. (r = a * a mod order)
11614  *
11615  * r  Result of the squaring.
11616  * a  Number to square.
11617  */
11618 static void sp_256_mont_sqr_order_10(sp_digit* r, sp_digit* a)
11619 {
11620     sp_256_sqr_10(r, a);
11621     sp_256_mont_reduce_10(r, p256_order, p256_mp_order);
11622 }
11623 
11624 #ifndef WOLFSSL_SP_SMALL
11625 /* Square number mod the order of P256 curve a number of times.
11626  * (r = a ^ n mod order)
11627  *
11628  * r  Result of the squaring.
11629  * a  Number to square.
11630  */
11631 static void sp_256_mont_sqr_n_order_10(sp_digit* r, sp_digit* a, int n)
11632 {
11633     int i;
11634 
11635     sp_256_mont_sqr_order_10(r, a);
11636     for (i=1; i<n; i++)
11637         sp_256_mont_sqr_order_10(r, r);
11638 }
11639 #endif /* !WOLFSSL_SP_SMALL */
11640 
11641 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
11642  * (r = 1 / a mod order)
11643  *
11644  * r   Inverse result.
11645  * a   Number to invert.
11646  * td  Temporary data.
11647  */
11648 static void sp_256_mont_inv_order_10(sp_digit* r, sp_digit* a,
11649         sp_digit* td)
11650 {
11651 #ifdef WOLFSSL_SP_SMALL
11652     sp_digit* t = td;
11653     int i;
11654 
11655     XMEMCPY(t, a, sizeof(sp_digit) * 10);
11656     for (i=254; i>=0; i--) {
11657         sp_256_mont_sqr_order_10(t, t);
11658         if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
11659             sp_256_mont_mul_order_10(t, t, a);
11660     }
11661     XMEMCPY(r, t, sizeof(sp_digit) * 10);
11662 #else
11663     sp_digit* t = td;
11664     sp_digit* t2 = td + 2 * 10;
11665     sp_digit* t3 = td + 4 * 10;
11666     int i;
11667 
11668     /* t = a^2 */
11669     sp_256_mont_sqr_order_10(t, a);
11670     /* t = a^3 = t * a */
11671     sp_256_mont_mul_order_10(t, t, a);
11672     /* t2= a^c = t ^ 2 ^ 2 */
11673     sp_256_mont_sqr_n_order_10(t2, t, 2);
11674     /* t3= a^f = t2 * t */
11675     sp_256_mont_mul_order_10(t3, t2, t);
11676     /* t2= a^f0 = t3 ^ 2 ^ 4 */
11677     sp_256_mont_sqr_n_order_10(t2, t3, 4);
11678     /* t = a^ff = t2 * t3 */
11679     sp_256_mont_mul_order_10(t, t2, t3);
11680     /* t3= a^ff00 = t ^ 2 ^ 8 */
11681     sp_256_mont_sqr_n_order_10(t2, t, 8);
11682     /* t = a^ffff = t2 * t */
11683     sp_256_mont_mul_order_10(t, t2, t);
11684     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
11685     sp_256_mont_sqr_n_order_10(t2, t, 16);
11686     /* t = a^ffffffff = t2 * t */
11687     sp_256_mont_mul_order_10(t, t2, t);
11688     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
11689     sp_256_mont_sqr_n_order_10(t2, t, 64);
11690     /* t2= a^ffffffff00000000ffffffff = t2 * t */
11691     sp_256_mont_mul_order_10(t2, t2, t);
11692     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
11693     sp_256_mont_sqr_n_order_10(t2, t2, 32);
11694     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
11695     sp_256_mont_mul_order_10(t2, t2, t);
11696     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
11697     for (i=127; i>=112; i--) {
11698         sp_256_mont_sqr_order_10(t2, t2);
11699         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11700             sp_256_mont_mul_order_10(t2, t2, a);
11701     }
11702     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
11703     sp_256_mont_sqr_n_order_10(t2, t2, 4);
11704     sp_256_mont_mul_order_10(t2, t2, t3);
11705     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
11706     for (i=107; i>=64; i--) {
11707         sp_256_mont_sqr_order_10(t2, t2);
11708         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11709             sp_256_mont_mul_order_10(t2, t2, a);
11710     }
11711     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
11712     sp_256_mont_sqr_n_order_10(t2, t2, 4);
11713     sp_256_mont_mul_order_10(t2, t2, t3);
11714     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
11715     for (i=59; i>=32; i--) {
11716         sp_256_mont_sqr_order_10(t2, t2);
11717         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11718             sp_256_mont_mul_order_10(t2, t2, a);
11719     }
11720     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
11721     sp_256_mont_sqr_n_order_10(t2, t2, 4);
11722     sp_256_mont_mul_order_10(t2, t2, t3);
11723     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
11724     for (i=27; i>=0; i--) {
11725         sp_256_mont_sqr_order_10(t2, t2);
11726         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11727             sp_256_mont_mul_order_10(t2, t2, a);
11728     }
11729     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
11730     sp_256_mont_sqr_n_order_10(t2, t2, 4);
11731     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
11732     sp_256_mont_mul_order_10(r, t2, t3);
11733 #endif /* WOLFSSL_SP_SMALL */
11734 }
11735 
11736 #ifdef HAVE_INTEL_AVX2
11737 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
11738  *
11739  * r  Result of the multiplication.
11740  * a  First operand of the multiplication.
11741  * b  Second operand of the multiplication.
11742  */
11743 static void sp_256_mont_mul_order_avx2_10(sp_digit* r, sp_digit* a, sp_digit* b)
11744 {
11745     sp_256_mul_avx2_10(r, a, b);
11746     sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
11747 }
11748 
11749 /* Square number mod the order of P256 curve. (r = a * a mod order)
11750  *
11751  * r  Result of the squaring.
11752  * a  Number to square.
11753  */
11754 static void sp_256_mont_sqr_order_avx2_10(sp_digit* r, sp_digit* a)
11755 {
11756     sp_256_sqr_avx2_10(r, a);
11757     sp_256_mont_reduce_avx2_10(r, p256_order, p256_mp_order);
11758 }
11759 
11760 #ifndef WOLFSSL_SP_SMALL
11761 /* Square number mod the order of P256 curve a number of times.
11762  * (r = a ^ n mod order)
11763  *
11764  * r  Result of the squaring.
11765  * a  Number to square.
11766  */
11767 static void sp_256_mont_sqr_n_order_avx2_10(sp_digit* r, sp_digit* a, int n)
11768 {
11769     int i;
11770 
11771     sp_256_mont_sqr_order_avx2_10(r, a);
11772     for (i=1; i<n; i++)
11773         sp_256_mont_sqr_order_avx2_10(r, r);
11774 }
11775 #endif /* !WOLFSSL_SP_SMALL */
11776 
11777 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
11778  * (r = 1 / a mod order)
11779  *
11780  * r   Inverse result.
11781  * a   Number to invert.
11782  * td  Temporary data.
11783  */
11784 static void sp_256_mont_inv_order_avx2_10(sp_digit* r, sp_digit* a,
11785         sp_digit* td)
11786 {
11787 #ifdef WOLFSSL_SP_SMALL
11788     sp_digit* t = td;
11789     int i;
11790 
11791     XMEMCPY(t, a, sizeof(sp_digit) * 10);
11792     for (i=254; i>=0; i--) {
11793         sp_256_mont_sqr_order_avx2_10(t, t);
11794         if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
11795             sp_256_mont_mul_order_avx2_10(t, t, a);
11796     }
11797     XMEMCPY(r, t, sizeof(sp_digit) * 10);
11798 #else
11799     sp_digit* t = td;
11800     sp_digit* t2 = td + 2 * 10;
11801     sp_digit* t3 = td + 4 * 10;
11802     int i;
11803 
11804     /* t = a^2 */
11805     sp_256_mont_sqr_order_avx2_10(t, a);
11806     /* t = a^3 = t * a */
11807     sp_256_mont_mul_order_avx2_10(t, t, a);
11808     /* t2= a^c = t ^ 2 ^ 2 */
11809     sp_256_mont_sqr_n_order_avx2_10(t2, t, 2);
11810     /* t3= a^f = t2 * t */
11811     sp_256_mont_mul_order_avx2_10(t3, t2, t);
11812     /* t2= a^f0 = t3 ^ 2 ^ 4 */
11813     sp_256_mont_sqr_n_order_avx2_10(t2, t3, 4);
11814     /* t = a^ff = t2 * t3 */
11815     sp_256_mont_mul_order_avx2_10(t, t2, t3);
11816     /* t3= a^ff00 = t ^ 2 ^ 8 */
11817     sp_256_mont_sqr_n_order_avx2_10(t2, t, 8);
11818     /* t = a^ffff = t2 * t */
11819     sp_256_mont_mul_order_avx2_10(t, t2, t);
11820     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
11821     sp_256_mont_sqr_n_order_avx2_10(t2, t, 16);
11822     /* t = a^ffffffff = t2 * t */
11823     sp_256_mont_mul_order_avx2_10(t, t2, t);
11824     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
11825     sp_256_mont_sqr_n_order_avx2_10(t2, t, 64);
11826     /* t2= a^ffffffff00000000ffffffff = t2 * t */
11827     sp_256_mont_mul_order_avx2_10(t2, t2, t);
11828     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
11829     sp_256_mont_sqr_n_order_avx2_10(t2, t2, 32);
11830     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
11831     sp_256_mont_mul_order_avx2_10(t2, t2, t);
11832     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
11833     for (i=127; i>=112; i--) {
11834         sp_256_mont_sqr_order_avx2_10(t2, t2);
11835         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11836             sp_256_mont_mul_order_avx2_10(t2, t2, a);
11837     }
11838     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
11839     sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
11840     sp_256_mont_mul_order_avx2_10(t2, t2, t3);
11841     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
11842     for (i=107; i>=64; i--) {
11843         sp_256_mont_sqr_order_avx2_10(t2, t2);
11844         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11845             sp_256_mont_mul_order_avx2_10(t2, t2, a);
11846     }
11847     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
11848     sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
11849     sp_256_mont_mul_order_avx2_10(t2, t2, t3);
11850     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
11851     for (i=59; i>=32; i--) {
11852         sp_256_mont_sqr_order_avx2_10(t2, t2);
11853         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11854             sp_256_mont_mul_order_avx2_10(t2, t2, a);
11855     }
11856     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
11857     sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
11858     sp_256_mont_mul_order_avx2_10(t2, t2, t3);
11859     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
11860     for (i=27; i>=0; i--) {
11861         sp_256_mont_sqr_order_avx2_10(t2, t2);
11862         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
11863             sp_256_mont_mul_order_avx2_10(t2, t2, a);
11864     }
11865     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
11866     sp_256_mont_sqr_n_order_avx2_10(t2, t2, 4);
11867     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
11868     sp_256_mont_mul_order_avx2_10(r, t2, t3);
11869 #endif /* WOLFSSL_SP_SMALL */
11870 }
11871 
11872 #endif /* HAVE_INTEL_AVX2 */
11873 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
11874 #ifdef HAVE_ECC_SIGN
11875 #ifndef SP_ECC_MAX_SIG_GEN
11876 #define SP_ECC_MAX_SIG_GEN  64
11877 #endif
11878 
11879 /* Sign the hash using the private key.
11880  *   e = [hash, 256 bits] from binary
11881  *   r = (k.G)->x mod order
11882  *   s = (r * x + e) / k mod order
11883  * The hash is truncated to the first 256 bits.
11884  *
11885  * hash     Hash to sign.
11886  * hashLen  Length of the hash data.
11887  * rng      Random number generator.
11888  * priv     Private part of key - scalar.
11889  * rm       First part of result as an mp_int.
11890  * sm       Sirst part of result as an mp_int.
11891  * heap     Heap to use for allocation.
11892  * returns RNG failures, MEMORY_E when memory allocation fails and
11893  * MP_OKAY on success.
11894  */
11895 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
11896                     mp_int* rm, mp_int* sm, void* heap)
11897 {
11898 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11899     sp_digit* d;
11900 #else
11901     sp_digit ed[2*10];
11902     sp_digit xd[2*10];
11903     sp_digit kd[2*10];
11904     sp_digit rd[2*10];
11905     sp_digit td[3 * 2*10];
11906     sp_point p;
11907 #endif
11908     sp_digit* e = NULL;
11909     sp_digit* x = NULL;
11910     sp_digit* k = NULL;
11911     sp_digit* r = NULL;
11912     sp_digit* tmp = NULL;
11913     sp_point* point = NULL;
11914     sp_digit carry;
11915     sp_digit* s;
11916     sp_digit* kInv;
11917     int err = MP_OKAY;
11918     int32_t c;
11919     int i;
11920 #ifdef HAVE_INTEL_AVX2
11921     word32 cpuid_flags = cpuid_get_flags();
11922 #endif
11923 
11924     (void)heap;
11925 
11926     err = sp_ecc_point_new(heap, p, point);
11927 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
11928     if (err == MP_OKAY) {
11929         d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 10, heap, DYNAMIC_TYPE_ECC);
11930         if (d != NULL) {
11931             e = d + 0 * 10;
11932             x = d + 2 * 10;
11933             k = d + 4 * 10;
11934             r = d + 6 * 10;
11935             tmp = d + 8 * 10;
11936         }
11937         else
11938             err = MEMORY_E;
11939     }
11940 #else
11941     e = ed;
11942     x = xd;
11943     k = kd;
11944     r = rd;
11945     tmp = td;
11946 #endif
11947     s = e;
11948     kInv = k;
11949 
11950     if (err == MP_OKAY) {
11951         if (hashLen > 32)
11952             hashLen = 32;
11953 
11954         sp_256_from_bin(e, 10, hash, hashLen);
11955         sp_256_from_mp(x, 10, priv);
11956     }
11957 
11958     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
11959         /* New random point. */
11960         err = sp_256_ecc_gen_k_10(rng, k);
11961         if (err == MP_OKAY) {
11962 #ifdef HAVE_INTEL_AVX2
11963             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11964                 err = sp_256_ecc_mulmod_base_avx2_10(point, k, 1, heap);
11965             else
11966 #endif
11967                 err = sp_256_ecc_mulmod_base_10(point, k, 1, NULL);
11968         }
11969 
11970         if (err == MP_OKAY) {
11971             /* r = point->x mod order */
11972             XMEMCPY(r, point->x, sizeof(sp_digit) * 10);
11973             sp_256_norm_10(r);
11974             c = sp_256_cmp_10(r, p256_order);
11975             sp_256_cond_sub_10(r, r, p256_order, 0 - (c >= 0));
11976             sp_256_norm_10(r);
11977 
11978             /* Conv k to Montgomery form (mod order) */
11979 #ifdef HAVE_INTEL_AVX2
11980             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11981                 sp_256_mul_avx2_10(k, k, p256_norm_order);
11982             else
11983 #endif
11984                 sp_256_mul_10(k, k, p256_norm_order);
11985             err = sp_256_mod_10(k, k, p256_order);
11986         }
11987         if (err == MP_OKAY) {
11988             sp_256_norm_10(k);
11989             /* kInv = 1/k mod order */
11990 #ifdef HAVE_INTEL_AVX2
11991             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
11992                 sp_256_mont_inv_order_avx2_10(kInv, k, tmp);
11993             else
11994 #endif
11995                 sp_256_mont_inv_order_10(kInv, k, tmp);
11996             sp_256_norm_10(kInv);
11997 
11998             /* s = r * x + e */
11999 #ifdef HAVE_INTEL_AVX2
12000             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12001                 sp_256_mul_avx2_10(x, x, r);
12002             else
12003 #endif
12004                 sp_256_mul_10(x, x, r);
12005             err = sp_256_mod_10(x, x, p256_order);
12006         }
12007         if (err == MP_OKAY) {
12008             sp_256_norm_10(x);
12009             carry = sp_256_add_10(s, e, x);
12010             sp_256_cond_sub_10(s, s, p256_order, 0 - carry);
12011             sp_256_norm_10(s);
12012             c = sp_256_cmp_10(s, p256_order);
12013             sp_256_cond_sub_10(s, s, p256_order, 0 - (c >= 0));
12014             sp_256_norm_10(s);
12015 
12016             /* s = s * k^-1 mod order */
12017 #ifdef HAVE_INTEL_AVX2
12018             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12019                 sp_256_mont_mul_order_avx2_10(s, s, kInv);
12020             else
12021 #endif
12022                 sp_256_mont_mul_order_10(s, s, kInv);
12023             sp_256_norm_10(s);
12024 
12025             /* Check that signature is usable. */
12026             if (!sp_256_iszero_10(s))
12027                 break;
12028         }
12029     }
12030 
12031     if (i == 0)
12032         err = RNG_FAILURE_E;
12033 
12034     if (err == MP_OKAY)
12035         err = sp_256_to_mp(r, rm);
12036     if (err == MP_OKAY)
12037         err = sp_256_to_mp(s, sm);
12038 
12039 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12040     if (d != NULL) {
12041         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 10);
12042         XFREE(d, heap, DYNAMIC_TYPE_ECC);
12043     }
12044 #else
12045     XMEMSET(e, 0, sizeof(sp_digit) * 2 * 10);
12046     XMEMSET(x, 0, sizeof(sp_digit) * 2 * 10);
12047     XMEMSET(k, 0, sizeof(sp_digit) * 2 * 10);
12048     XMEMSET(r, 0, sizeof(sp_digit) * 2 * 10);
12049     XMEMSET(r, 0, sizeof(sp_digit) * 2 * 10);
12050     XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*10);
12051 #endif
12052     sp_ecc_point_free(point, 1, heap);
12053 
12054     return err;
12055 }
12056 #endif /* HAVE_ECC_SIGN */
12057 
12058 #ifdef HAVE_ECC_VERIFY
12059 /* Verify the signature values with the hash and public key.
12060  *   e = Truncate(hash, 256)
12061  *   u1 = e/s mod order
12062  *   u2 = r/s mod order
12063  *   r == (u1.G + u2.Q)->x mod order
12064  * Optimization: Leave point in projective form.
12065  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
12066  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
12067  * The hash is truncated to the first 256 bits.
12068  *
12069  * hash     Hash to sign.
12070  * hashLen  Length of the hash data.
12071  * rng      Random number generator.
12072  * priv     Private part of key - scalar.
12073  * rm       First part of result as an mp_int.
12074  * sm       Sirst part of result as an mp_int.
12075  * heap     Heap to use for allocation.
12076  * returns RNG failures, MEMORY_E when memory allocation fails and
12077  * MP_OKAY on success.
12078  */
12079 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
12080     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
12081 {
12082 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12083     sp_digit* d = NULL;
12084 #else
12085     sp_digit u1d[2*10];
12086     sp_digit u2d[2*10];
12087     sp_digit sd[2*10];
12088     sp_digit tmpd[2*10 * 5];
12089     sp_point p1d;
12090     sp_point p2d;
12091 #endif
12092     sp_digit* u1;
12093     sp_digit* u2;
12094     sp_digit* s;
12095     sp_digit* tmp;
12096     sp_point* p1;
12097     sp_point* p2 = NULL;
12098     sp_digit carry;
12099     int32_t c;
12100     int err;
12101 #ifdef HAVE_INTEL_AVX2
12102     word32 cpuid_flags = cpuid_get_flags();
12103 #endif
12104 
12105     err = sp_ecc_point_new(heap, p1d, p1);
12106     if (err == MP_OKAY)
12107         err = sp_ecc_point_new(heap, p2d, p2);
12108 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12109     if (err == MP_OKAY) {
12110         d = XMALLOC(sizeof(sp_digit) * 16 * 10, heap, DYNAMIC_TYPE_ECC);
12111         if (d != NULL) {
12112             u1  = d + 0 * 10;
12113             u2  = d + 2 * 10;
12114             s   = d + 4 * 10;
12115             tmp = d + 6 * 10;
12116         }
12117         else
12118             err = MEMORY_E;
12119     }
12120 #else
12121     u1 = u1d;
12122     u2 = u2d;
12123     s  = sd;
12124     tmp = tmpd;
12125 #endif
12126 
12127     if (err == MP_OKAY) {
12128         if (hashLen > 32)
12129             hashLen = 32;
12130 
12131         sp_256_from_bin(u1, 10, hash, hashLen);
12132         sp_256_from_mp(u2, 10, r);
12133         sp_256_from_mp(s, 10, sm);
12134         sp_256_from_mp(p2->x, 10, pX);
12135         sp_256_from_mp(p2->y, 10, pY);
12136         sp_256_from_mp(p2->z, 10, pZ);
12137 
12138 #ifdef HAVE_INTEL_AVX2
12139         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12140             sp_256_mul_avx2_10(s, s, p256_norm_order);
12141         else
12142 #endif
12143             sp_256_mul_10(s, s, p256_norm_order);
12144         err = sp_256_mod_10(s, s, p256_order);
12145     }
12146     if (err == MP_OKAY) {
12147         sp_256_norm_10(s);
12148 #ifdef HAVE_INTEL_AVX2
12149         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
12150             sp_256_mont_inv_order_avx2_10(s, s, tmp);
12151             sp_256_mont_mul_order_avx2_10(u1, u1, s);
12152             sp_256_mont_mul_order_avx2_10(u2, u2, s);
12153         }
12154         else
12155 #endif
12156         {
12157             sp_256_mont_inv_order_10(s, s, tmp);
12158             sp_256_mont_mul_order_10(u1, u1, s);
12159             sp_256_mont_mul_order_10(u2, u2, s);
12160         }
12161 
12162 #ifdef HAVE_INTEL_AVX2
12163         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12164             err = sp_256_ecc_mulmod_base_avx2_10(p1, u1, 0, heap);
12165         else
12166 #endif
12167             err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
12168     }
12169     if (err == MP_OKAY) {
12170 #ifdef HAVE_INTEL_AVX2
12171         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12172             err = sp_256_ecc_mulmod_avx2_10(p2, p2, u2, 0, heap);
12173         else
12174 #endif
12175             err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
12176     }
12177 
12178     if (err == MP_OKAY) {
12179 #ifdef HAVE_INTEL_AVX2
12180         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12181             sp_256_proj_point_add_avx2_10(p1, p1, p2, tmp);
12182         else
12183 #endif
12184             sp_256_proj_point_add_10(p1, p1, p2, tmp);
12185 
12186         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
12187         /* Reload r and convert to Montgomery form. */
12188         sp_256_from_mp(u2, 10, r);
12189         err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
12190     }
12191 
12192     if (err == MP_OKAY) {
12193         /* u1 = r.z'.z' mod prime */
12194         sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod);
12195         sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod);
12196         *res = sp_256_cmp_10(p1->x, u1) == 0;
12197         if (*res == 0) {
12198             /* Reload r and add order. */
12199             sp_256_from_mp(u2, 10, r);
12200             carry = sp_256_add_10(u2, u2, p256_order);
12201             /* Carry means result is greater than mod and is not valid. */
12202             if (!carry) {
12203                 sp_256_norm_10(u2);
12204 
12205                 /* Compare with mod and if greater or equal then not valid. */
12206                 c = sp_256_cmp_10(u2, p256_mod);
12207                 if (c < 0) {
12208                     /* Convert to Montogomery form */
12209                     err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
12210                     if (err == MP_OKAY) {
12211                         /* u1 = (r + 1*order).z'.z' mod prime */
12212                         sp_256_mont_mul_10(u1, u2, p1->z, p256_mod,
12213                                                                   p256_mp_mod);
12214                         *res = sp_256_cmp_10(p1->x, u2) == 0;
12215                     }
12216                 }
12217             }
12218         }
12219     }
12220 
12221 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12222     if (d != NULL)
12223         XFREE(d, heap, DYNAMIC_TYPE_ECC);
12224 #endif
12225     sp_ecc_point_free(p1, 0, heap);
12226     sp_ecc_point_free(p2, 0, heap);
12227 
12228     return err;
12229 }
12230 #endif /* HAVE_ECC_VERIFY */
12231 
12232 #ifdef HAVE_ECC_CHECK_KEY
12233 /* Check that the x and y oridinates are a valid point on the curve.
12234  *
12235  * point  EC point.
12236  * heap   Heap to use if dynamically allocating.
12237  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
12238  * not on the curve and MP_OKAY otherwise.
12239  */
12240 static int sp_256_ecc_is_point_10(sp_point* point, void* heap)
12241 {
12242 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12243     sp_digit* d = NULL;
12244 #else
12245     sp_digit t1d[2*10];
12246     sp_digit t2d[2*10];
12247 #endif
12248     sp_digit* t1;
12249     sp_digit* t2;
12250     int err = MP_OKAY;
12251 
12252 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12253     d = XMALLOC(sizeof(sp_digit) * 10 * 4, heap, DYNAMIC_TYPE_ECC);
12254     if (d != NULL) {
12255         t1 = d + 0 * 10;
12256         t2 = d + 2 * 10;
12257     }
12258     else
12259         err = MEMORY_E;
12260 #else
12261     (void)heap;
12262 
12263     t1 = t1d;
12264     t2 = t2d;
12265 #endif
12266 
12267     if (err == MP_OKAY) {
12268         sp_256_sqr_10(t1, point->y);
12269         sp_256_mod_10(t1, t1, p256_mod);
12270         sp_256_sqr_10(t2, point->x);
12271         sp_256_mod_10(t2, t2, p256_mod);
12272         sp_256_mul_10(t2, t2, point->x);
12273         sp_256_mod_10(t2, t2, p256_mod);
12274     sp_256_sub_10(t2, p256_mod, t2);
12275         sp_256_mont_add_10(t1, t1, t2, p256_mod);
12276 
12277         sp_256_mont_add_10(t1, t1, point->x, p256_mod);
12278         sp_256_mont_add_10(t1, t1, point->x, p256_mod);
12279         sp_256_mont_add_10(t1, t1, point->x, p256_mod);
12280 
12281         if (sp_256_cmp_10(t1, p256_b) != 0)
12282             err = MP_VAL;
12283     }
12284 
12285 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12286     if (d != NULL)
12287         XFREE(d, heap, DYNAMIC_TYPE_ECC);
12288 #endif
12289 
12290     return err;
12291 }
12292 
12293 /* Check that the x and y oridinates are a valid point on the curve.
12294  *
12295  * pX  X ordinate of EC point.
12296  * pY  Y ordinate of EC point.
12297  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
12298  * not on the curve and MP_OKAY otherwise.
12299  */
12300 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
12301 {
12302 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
12303     sp_point pubd;
12304 #endif
12305     sp_point* pub;
12306     byte one[1] = { 1 };
12307     int err;
12308 
12309     err = sp_ecc_point_new(NULL, pubd, pub);
12310     if (err == MP_OKAY) {
12311         sp_256_from_mp(pub->x, 10, pX);
12312         sp_256_from_mp(pub->y, 10, pY);
12313         sp_256_from_bin(pub->z, 10, one, sizeof(one));
12314 
12315         err = sp_256_ecc_is_point_10(pub, NULL);
12316     }
12317 
12318     sp_ecc_point_free(pub, 0, NULL);
12319 
12320     return err;
12321 }
12322 
12323 /* Check that the private scalar generates the EC point (px, py), the point is
12324  * on the curve and the point has the correct order.
12325  *
12326  * pX     X ordinate of EC point.
12327  * pY     Y ordinate of EC point.
12328  * privm  Private scalar that generates EC point.
12329  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
12330  * not on the curve, ECC_INF_E if the point does not have the correct order,
12331  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
12332  * MP_OKAY otherwise.
12333  */
12334 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
12335 {
12336 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
12337     sp_digit privd[10];
12338     sp_point pubd;
12339     sp_point pd;
12340 #endif
12341     sp_digit* priv = NULL;
12342     sp_point* pub;
12343     sp_point* p = NULL;
12344     byte one[1] = { 1 };
12345     int err;
12346 #ifdef HAVE_INTEL_AVX2
12347     word32 cpuid_flags = cpuid_get_flags();
12348 #endif
12349 
12350     err = sp_ecc_point_new(heap, pubd, pub);
12351     if (err == MP_OKAY)
12352         err = sp_ecc_point_new(heap, pd, p);
12353 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12354     if (err == MP_OKAY) {
12355         priv = XMALLOC(sizeof(sp_digit) * 10, heap, DYNAMIC_TYPE_ECC);
12356         if (priv == NULL)
12357             err = MEMORY_E;
12358     }
12359 #else
12360     priv = privd;
12361 #endif
12362 
12363     if (err == MP_OKAY) {
12364         sp_256_from_mp(pub->x, 10, pX);
12365         sp_256_from_mp(pub->y, 10, pY);
12366         sp_256_from_bin(pub->z, 10, one, sizeof(one));
12367         sp_256_from_mp(priv, 10, privm);
12368 
12369         /* Check point at infinitiy. */
12370         if (sp_256_iszero_10(pub->x) &&
12371             sp_256_iszero_10(pub->y))
12372             err = ECC_INF_E;
12373     }
12374 
12375     if (err == MP_OKAY) {
12376         /* Check range of X and Y */
12377         if (sp_256_cmp_10(pub->x, p256_mod) >= 0 ||
12378             sp_256_cmp_10(pub->y, p256_mod) >= 0)
12379             err = ECC_OUT_OF_RANGE_E;
12380     }
12381 
12382     if (err == MP_OKAY) {
12383         /* Check point is on curve */
12384         err = sp_256_ecc_is_point_10(pub, heap);
12385     }
12386 
12387     if (err == MP_OKAY) {
12388         /* Point * order = infinity */
12389 #ifdef HAVE_INTEL_AVX2
12390         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12391             err = sp_256_ecc_mulmod_avx2_10(p, pub, p256_order, 1, heap);
12392         else
12393 #endif
12394             err = sp_256_ecc_mulmod_10(p, pub, p256_order, 1, heap);
12395     }
12396     if (err == MP_OKAY) {
12397         /* Check result is infinity */
12398         if (!sp_256_iszero_10(p->x) ||
12399             !sp_256_iszero_10(p->y)) {
12400             err = ECC_INF_E;
12401         }
12402     }
12403 
12404     if (err == MP_OKAY) {
12405         /* Base * private = point */
12406 #ifdef HAVE_INTEL_AVX2
12407         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12408             err = sp_256_ecc_mulmod_base_avx2_10(p, priv, 1, heap);
12409         else
12410 #endif
12411             err = sp_256_ecc_mulmod_base_10(p, priv, 1, heap);
12412     }
12413     if (err == MP_OKAY) {
12414         /* Check result is public key */
12415         if (sp_256_cmp_10(p->x, pub->x) != 0 ||
12416             sp_256_cmp_10(p->y, pub->y) != 0) {
12417             err = ECC_PRIV_KEY_E;
12418         }
12419     }
12420 
12421 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12422     if (priv != NULL)
12423         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
12424 #endif
12425     sp_ecc_point_free(p, 0, heap);
12426     sp_ecc_point_free(pub, 0, heap);
12427 
12428     return err;
12429 }
12430 #endif
12431 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
12432 /* Add two projective EC points together.
12433  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
12434  *
12435  * pX   First EC point's X ordinate.
12436  * pY   First EC point's Y ordinate.
12437  * pZ   First EC point's Z ordinate.
12438  * qX   Second EC point's X ordinate.
12439  * qY   Second EC point's Y ordinate.
12440  * qZ   Second EC point's Z ordinate.
12441  * rX   Resultant EC point's X ordinate.
12442  * rY   Resultant EC point's Y ordinate.
12443  * rZ   Resultant EC point's Z ordinate.
12444  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
12445  */
12446 int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
12447                               mp_int* qX, mp_int* qY, mp_int* qZ,
12448                               mp_int* rX, mp_int* rY, mp_int* rZ)
12449 {
12450 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
12451     sp_digit tmpd[2 * 10 * 5];
12452     sp_point pd;
12453     sp_point qd;
12454 #endif
12455     sp_digit* tmp;
12456     sp_point* p;
12457     sp_point* q = NULL;
12458     int err;
12459 #ifdef HAVE_INTEL_AVX2
12460     word32 cpuid_flags = cpuid_get_flags();
12461 #endif
12462 
12463     err = sp_ecc_point_new(NULL, pd, p);
12464     if (err == MP_OKAY)
12465         err = sp_ecc_point_new(NULL, qd, q);
12466 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12467     if (err == MP_OKAY) {
12468         tmp = XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL, DYNAMIC_TYPE_ECC);
12469         if (tmp == NULL)
12470             err = MEMORY_E;
12471     }
12472 #else
12473     tmp = tmpd;
12474 #endif
12475 
12476     if (err == MP_OKAY) {
12477         sp_256_from_mp(p->x, 10, pX);
12478         sp_256_from_mp(p->y, 10, pY);
12479         sp_256_from_mp(p->z, 10, pZ);
12480         sp_256_from_mp(q->x, 10, qX);
12481         sp_256_from_mp(q->y, 10, qY);
12482         sp_256_from_mp(q->z, 10, qZ);
12483 
12484 #ifdef HAVE_INTEL_AVX2
12485         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12486             sp_256_proj_point_add_avx2_10(p, p, q, tmp);
12487         else
12488 #endif
12489             sp_256_proj_point_add_10(p, p, q, tmp);
12490     }
12491 
12492     if (err == MP_OKAY)
12493         err = sp_256_to_mp(p->x, rX);
12494     if (err == MP_OKAY)
12495         err = sp_256_to_mp(p->y, rY);
12496     if (err == MP_OKAY)
12497         err = sp_256_to_mp(p->z, rZ);
12498 
12499 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12500     if (tmp != NULL)
12501         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
12502 #endif
12503     sp_ecc_point_free(q, 0, NULL);
12504     sp_ecc_point_free(p, 0, NULL);
12505 
12506     return err;
12507 }
12508 
12509 /* Double a projective EC point.
12510  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
12511  *
12512  * pX   EC point's X ordinate.
12513  * pY   EC point's Y ordinate.
12514  * pZ   EC point's Z ordinate.
12515  * rX   Resultant EC point's X ordinate.
12516  * rY   Resultant EC point's Y ordinate.
12517  * rZ   Resultant EC point's Z ordinate.
12518  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
12519  */
12520 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
12521                               mp_int* rX, mp_int* rY, mp_int* rZ)
12522 {
12523 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
12524     sp_digit tmpd[2 * 10 * 2];
12525     sp_point pd;
12526 #endif
12527     sp_digit* tmp;
12528     sp_point* p;
12529     int err;
12530 #ifdef HAVE_INTEL_AVX2
12531     word32 cpuid_flags = cpuid_get_flags();
12532 #endif
12533 
12534     err = sp_ecc_point_new(NULL, pd, p);
12535 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12536     if (err == MP_OKAY) {
12537         tmp = XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL, DYNAMIC_TYPE_ECC);
12538         if (tmp == NULL)
12539             err = MEMORY_E;
12540     }
12541 #else
12542     tmp = tmpd;
12543 #endif
12544 
12545     if (err == MP_OKAY) {
12546         sp_256_from_mp(p->x, 10, pX);
12547         sp_256_from_mp(p->y, 10, pY);
12548         sp_256_from_mp(p->z, 10, pZ);
12549 
12550 #ifdef HAVE_INTEL_AVX2
12551         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
12552             sp_256_proj_point_dbl_avx2_10(p, p, tmp);
12553         else
12554 #endif
12555             sp_256_proj_point_dbl_10(p, p, tmp);
12556     }
12557 
12558     if (err == MP_OKAY)
12559         err = sp_256_to_mp(p->x, rX);
12560     if (err == MP_OKAY)
12561         err = sp_256_to_mp(p->y, rY);
12562     if (err == MP_OKAY)
12563         err = sp_256_to_mp(p->z, rZ);
12564 
12565 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12566     if (tmp != NULL)
12567         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
12568 #endif
12569     sp_ecc_point_free(p, 0, NULL);
12570 
12571     return err;
12572 }
12573 
12574 /* Map a projective EC point to affine in place.
12575  * pZ will be one.
12576  *
12577  * pX   EC point's X ordinate.
12578  * pY   EC point's Y ordinate.
12579  * pZ   EC point's Z ordinate.
12580  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
12581  */
12582 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
12583 {
12584 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
12585     sp_digit tmpd[2 * 10 * 4];
12586     sp_point pd;
12587 #endif
12588     sp_digit* tmp;
12589     sp_point* p;
12590     int err;
12591 
12592     err = sp_ecc_point_new(NULL, pd, p);
12593 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12594     if (err == MP_OKAY) {
12595         tmp = XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL, DYNAMIC_TYPE_ECC);
12596         if (tmp == NULL)
12597             err = MEMORY_E;
12598     }
12599 #else
12600     tmp = tmpd;
12601 #endif
12602     if (err == MP_OKAY) {
12603         sp_256_from_mp(p->x, 10, pX);
12604         sp_256_from_mp(p->y, 10, pY);
12605         sp_256_from_mp(p->z, 10, pZ);
12606 
12607         sp_256_map_10(p, p, tmp);
12608     }
12609 
12610     if (err == MP_OKAY)
12611         err = sp_256_to_mp(p->x, pX);
12612     if (err == MP_OKAY)
12613         err = sp_256_to_mp(p->y, pY);
12614     if (err == MP_OKAY)
12615         err = sp_256_to_mp(p->z, pZ);
12616 
12617 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12618     if (tmp != NULL)
12619         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
12620 #endif
12621     sp_ecc_point_free(p, 0, NULL);
12622 
12623     return err;
12624 }
12625 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
12626 #ifdef HAVE_COMP_KEY
12627 /* Find the square root of a number mod the prime of the curve.
12628  *
12629  * y  The number to operate on and the result.
12630  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
12631  */
12632 static int sp_256_mont_sqrt_10(sp_digit* y)
12633 {
12634 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12635     sp_digit* d;
12636 #else
12637     sp_digit t1d[2 * 10];
12638     sp_digit t2d[2 * 10];
12639 #endif
12640     sp_digit* t1;
12641     sp_digit* t2;
12642     int err = MP_OKAY;
12643 #ifdef HAVE_INTEL_AVX2
12644     word32 cpuid_flags = cpuid_get_flags();
12645 #endif
12646 
12647 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12648     d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
12649     if (d != NULL) {
12650         t1 = d + 0 * 10;
12651         t2 = d + 2 * 10;
12652     }
12653     else
12654         err = MEMORY_E;
12655 #else
12656     t1 = t1d;
12657     t2 = t2d;
12658 #endif
12659 
12660     if (err == MP_OKAY) {
12661 #ifdef HAVE_INTEL_AVX2
12662         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
12663             /* t2 = y ^ 0x2 */
12664             sp_256_mont_sqr_avx2_10(t2, y, p256_mod, p256_mp_mod);
12665             /* t1 = y ^ 0x3 */
12666             sp_256_mont_mul_avx2_10(t1, t2, y, p256_mod, p256_mp_mod);
12667             /* t2 = y ^ 0xc */
12668             sp_256_mont_sqr_n_avx2_10(t2, t1, 2, p256_mod, p256_mp_mod);
12669             /* t1 = y ^ 0xf */
12670             sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
12671             /* t2 = y ^ 0xf0 */
12672             sp_256_mont_sqr_n_avx2_10(t2, t1, 4, p256_mod, p256_mp_mod);
12673             /* t1 = y ^ 0xff */
12674             sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
12675             /* t2 = y ^ 0xff00 */
12676             sp_256_mont_sqr_n_avx2_10(t2, t1, 8, p256_mod, p256_mp_mod);
12677             /* t1 = y ^ 0xffff */
12678             sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
12679             /* t2 = y ^ 0xffff0000 */
12680             sp_256_mont_sqr_n_avx2_10(t2, t1, 16, p256_mod, p256_mp_mod);
12681             /* t1 = y ^ 0xffffffff */
12682             sp_256_mont_mul_avx2_10(t1, t1, t2, p256_mod, p256_mp_mod);
12683             /* t1 = y ^ 0xffffffff00000000 */
12684             sp_256_mont_sqr_n_avx2_10(t1, t1, 32, p256_mod, p256_mp_mod);
12685             /* t1 = y ^ 0xffffffff00000001 */
12686             sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
12687             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
12688             sp_256_mont_sqr_n_avx2_10(t1, t1, 96, p256_mod, p256_mp_mod);
12689             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
12690             sp_256_mont_mul_avx2_10(t1, t1, y, p256_mod, p256_mp_mod);
12691             sp_256_mont_sqr_n_avx2_10(y, t1, 94, p256_mod, p256_mp_mod);
12692         }
12693         else
12694 #endif
12695         {
12696             /* t2 = y ^ 0x2 */
12697             sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
12698             /* t1 = y ^ 0x3 */
12699             sp_256_mont_mul_10(t1, t2, y, p256_mod, p256_mp_mod);
12700             /* t2 = y ^ 0xc */
12701             sp_256_mont_sqr_n_10(t2, t1, 2, p256_mod, p256_mp_mod);
12702             /* t1 = y ^ 0xf */
12703             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
12704             /* t2 = y ^ 0xf0 */
12705             sp_256_mont_sqr_n_10(t2, t1, 4, p256_mod, p256_mp_mod);
12706             /* t1 = y ^ 0xff */
12707             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
12708             /* t2 = y ^ 0xff00 */
12709             sp_256_mont_sqr_n_10(t2, t1, 8, p256_mod, p256_mp_mod);
12710             /* t1 = y ^ 0xffff */
12711             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
12712             /* t2 = y ^ 0xffff0000 */
12713             sp_256_mont_sqr_n_10(t2, t1, 16, p256_mod, p256_mp_mod);
12714             /* t1 = y ^ 0xffffffff */
12715             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
12716             /* t1 = y ^ 0xffffffff00000000 */
12717             sp_256_mont_sqr_n_10(t1, t1, 32, p256_mod, p256_mp_mod);
12718             /* t1 = y ^ 0xffffffff00000001 */
12719             sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
12720             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
12721             sp_256_mont_sqr_n_10(t1, t1, 96, p256_mod, p256_mp_mod);
12722             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
12723             sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
12724             sp_256_mont_sqr_n_10(y, t1, 94, p256_mod, p256_mp_mod);
12725         }
12726     }
12727 
12728 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12729     if (d != NULL)
12730         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
12731 #endif
12732 
12733     return err;
12734 }
12735 
12736 /* Uncompress the point given the X ordinate.
12737  *
12738  * xm    X ordinate.
12739  * odd   Whether the Y ordinate is odd.
12740  * ym    Calculated Y ordinate.
12741  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
12742  */
12743 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
12744 {
12745 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12746     sp_digit* d;
12747 #else
12748     sp_digit xd[2 * 10];
12749     sp_digit yd[2 * 10];
12750 #endif
12751     sp_digit* x;
12752     sp_digit* y;
12753     int err = MP_OKAY;
12754 #ifdef HAVE_INTEL_AVX2
12755     word32 cpuid_flags = cpuid_get_flags();
12756 #endif
12757 
12758 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12759     d = XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
12760     if (d != NULL) {
12761         x = d + 0 * 10;
12762         y = d + 2 * 10;
12763     }
12764     else
12765         err = MEMORY_E;
12766 #else
12767     x = xd;
12768     y = yd;
12769 #endif
12770 
12771     if (err == MP_OKAY) {
12772         sp_256_from_mp(x, 10, xm);
12773 
12774         err = sp_256_mod_mul_norm_10(x, x, p256_mod);
12775     }
12776 
12777     if (err == MP_OKAY) {
12778         /* y = x^3 */
12779 #ifdef HAVE_INTEL_AVX2
12780         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
12781             sp_256_mont_sqr_avx2_10(y, x, p256_mod, p256_mp_mod);
12782             sp_256_mont_mul_avx2_10(y, y, x, p256_mod, p256_mp_mod);
12783         }
12784         else
12785 #endif
12786         {
12787             sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
12788             sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
12789         }
12790         /* y = x^3 - 3x */
12791         sp_256_mont_sub_10(y, y, x, p256_mod);
12792         sp_256_mont_sub_10(y, y, x, p256_mod);
12793         sp_256_mont_sub_10(y, y, x, p256_mod);
12794         /* y = x^3 - 3x + b */
12795         err = sp_256_mod_mul_norm_10(x, p256_b, p256_mod);
12796     }
12797     if (err == MP_OKAY) {
12798         sp_256_mont_add_10(y, y, x, p256_mod);
12799         /* y = sqrt(x^3 - 3x + b) */
12800         err = sp_256_mont_sqrt_10(y);
12801     }
12802     if (err == MP_OKAY) {
12803         XMEMSET(y + 10, 0, 10 * sizeof(sp_digit));
12804         sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod);
12805         if (((y[0] ^ odd) & 1) != 0)
12806             sp_256_mont_sub_10(y, p256_mod, y, p256_mod);
12807 
12808         err = sp_256_to_mp(y, ym);
12809     }
12810 
12811 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
12812     if (d != NULL)
12813         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
12814 #endif
12815 
12816     return err;
12817 }
12818 #endif
12819 #endif /* WOLFSSL_SP_NO_256 */
12820 #endif /* SP_WORD_SIZE == 32 */
12821 #endif /* !WOLFSSL_SP_ASM */
12822 #endif /* WOLFSSL_HAVE_SP_ECC */
12823 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
12824