wolf SSL / wolfSSL

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers sp_armthumb.c Source File

sp_armthumb.c

00001 /* sp.c
00002  *
00003  * Copyright (C) 2006-2020 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 /* Implementation by Sean Parkinson. */
00023 
00024 #ifdef HAVE_CONFIG_H
00025     #include <config.h>
00026 #endif
00027 
00028 #include <wolfssl/wolfcrypt/settings.h>
00029 #include <wolfssl/wolfcrypt/error-crypt.h >
00030 #include <wolfssl/wolfcrypt/cpuid.h>
00031 #ifdef NO_INLINE
00032     #include <wolfssl/wolfcrypt/misc.h>
00033 #else
00034     #define WOLFSSL_MISC_INCLUDED
00035     #include <wolfcrypt/src/misc.c>
00036 #endif
00037 
00038 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
00039                                     defined(WOLFSSL_HAVE_SP_ECC)
00040 
00041 #ifdef RSA_LOW_MEM
00042 #ifndef WOLFSSL_SP_SMALL
00043 #define WOLFSSL_SP_SMALL
00044 #endif
00045 #endif
00046 
00047 #include <wolfssl/wolfcrypt/sp.h>
00048 
00049 #ifdef WOLFSSL_SP_ARM_THUMB_ASM
00050 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
00051 #ifndef WOLFSSL_SP_NO_2048
00052 /* Read big endian unsigned byte array into r.
00053  *
00054  * r  A single precision integer.
00055  * size  Maximum number of bytes to convert
00056  * a  Byte array.
00057  * n  Number of bytes in array to read.
00058  */
00059 static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
00060 {
00061     int i, j = 0;
00062     word32 s = 0;
00063 
00064     r[0] = 0;
00065     for (i = n-1; i >= 0; i--) {
00066         r[j] |= (((sp_digit)a[i]) << s);
00067         if (s >= 24U) {
00068             r[j] &= 0xffffffff;
00069             s = 32U - s;
00070             if (j + 1 >= size) {
00071                 break;
00072             }
00073             r[++j] = (sp_digit)a[i] >> s;
00074             s = 8U - s;
00075         }
00076         else {
00077             s += 8U;
00078         }
00079     }
00080 
00081     for (j++; j < size; j++) {
00082         r[j] = 0;
00083     }
00084 }
00085 
00086 /* Convert an mp_int to an array of sp_digit.
00087  *
00088  * r  A single precision integer.
00089  * size  Maximum number of bytes to convert
00090  * a  A multi-precision integer.
00091  */
00092 static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
00093 {
00094 #if DIGIT_BIT == 32
00095     int j;
00096 
00097     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
00098 
00099     for (j = a->used; j < size; j++) {
00100         r[j] = 0;
00101     }
00102 #elif DIGIT_BIT > 32
00103     int i, j = 0;
00104     word32 s = 0;
00105 
00106     r[0] = 0;
00107     for (i = 0; i < a->used && j < size; i++) {
00108         r[j] |= ((sp_digit)a->dp[i] << s);
00109         r[j] &= 0xffffffff;
00110         s = 32U - s;
00111         if (j + 1 >= size) {
00112             break;
00113         }
00114         /* lint allow cast of mismatch word32 and mp_digit */
00115         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
00116         while ((s + 32U) <= (word32)DIGIT_BIT) {
00117             s += 32U;
00118             r[j] &= 0xffffffff;
00119             if (j + 1 >= size) {
00120                 break;
00121             }
00122             if (s < (word32)DIGIT_BIT) {
00123                 /* lint allow cast of mismatch word32 and mp_digit */
00124                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
00125             }
00126             else {
00127                 r[++j] = 0L;
00128             }
00129         }
00130         s = (word32)DIGIT_BIT - s;
00131     }
00132 
00133     for (j++; j < size; j++) {
00134         r[j] = 0;
00135     }
00136 #else
00137     int i, j = 0, s = 0;
00138 
00139     r[0] = 0;
00140     for (i = 0; i < a->used && j < size; i++) {
00141         r[j] |= ((sp_digit)a->dp[i]) << s;
00142         if (s + DIGIT_BIT >= 32) {
00143             r[j] &= 0xffffffff;
00144             if (j + 1 >= size) {
00145                 break;
00146             }
00147             s = 32 - s;
00148             if (s == DIGIT_BIT) {
00149                 r[++j] = 0;
00150                 s = 0;
00151             }
00152             else {
00153                 r[++j] = a->dp[i] >> s;
00154                 s = DIGIT_BIT - s;
00155             }
00156         }
00157         else {
00158             s += DIGIT_BIT;
00159         }
00160     }
00161 
00162     for (j++; j < size; j++) {
00163         r[j] = 0;
00164     }
00165 #endif
00166 }
00167 
00168 /* Write r as big endian to byte array.
00169  * Fixed length number of bytes written: 256
00170  *
00171  * r  A single precision integer.
00172  * a  Byte array.
00173  */
00174 static void sp_2048_to_bin(sp_digit* r, byte* a)
00175 {
00176     int i, j, s = 0, b;
00177 
00178     j = 2048 / 8 - 1;
00179     a[j] = 0;
00180     for (i=0; i<64 && j>=0; i++) {
00181         b = 0;
00182         /* lint allow cast of mismatch sp_digit and int */
00183         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
00184         b += 8 - s;
00185         if (j < 0) {
00186             break;
00187         }
00188         while (b < 32) {
00189             a[j--] = (byte)(r[i] >> b);
00190             b += 8;
00191             if (j < 0) {
00192                 break;
00193             }
00194         }
00195         s = 8 - (b - 32);
00196         if (j >= 0) {
00197             a[j] = 0;
00198         }
00199         if (s != 0) {
00200             j++;
00201         }
00202     }
00203 }
00204 
00205 #ifndef WOLFSSL_SP_SMALL
00206 /* Multiply a and b into r. (r = a * b)
00207  *
00208  * r  A single precision integer.
00209  * a  A single precision integer.
00210  * b  A single precision integer.
00211  */
00212 SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
00213         const sp_digit* b)
00214 {
00215     sp_digit tmp[8 * 2];
00216     __asm__ __volatile__ (
00217         "mov    r3, #0\n\t"
00218         "mov    r4, #0\n\t"
00219         "mov    r8, r3\n\t"
00220         "mov    r11, %[r]\n\t"
00221         "mov    r9, %[a]\n\t"
00222         "mov    r10, %[b]\n\t"
00223         "mov    r6, #32\n\t"
00224         "add    r6, r9\n\t"
00225         "mov    r12, r6\n\t"
00226         "\n1:\n\t"
00227         "mov    %[r], #0\n\t"
00228         "mov    r5, #0\n\t"
00229         "mov    r6, #28\n\t"
00230         "mov    %[a], r8\n\t"
00231         "sub    %[a], r6\n\t"
00232         "sbc    r6, r6\n\t"
00233         "mvn    r6, r6\n\t"
00234         "and    %[a], r6\n\t"
00235         "mov    %[b], r8\n\t"
00236         "sub    %[b], %[a]\n\t"
00237         "add    %[a], r9\n\t"
00238         "add    %[b], r10\n\t"
00239         "\n2:\n\t"
00240         "# Multiply Start\n\t"
00241         "ldr    r6, [%[a]]\n\t"
00242         "ldr    r7, [%[b]]\n\t"
00243         "lsl    r6, r6, #16\n\t"
00244         "lsl    r7, r7, #16\n\t"
00245         "lsr    r6, r6, #16\n\t"
00246         "lsr    r7, r7, #16\n\t"
00247         "mul    r7, r6\n\t"
00248         "add    r3, r7\n\t"
00249         "adc    r4, %[r]\n\t"
00250         "adc    r5, %[r]\n\t"
00251         "ldr    r7, [%[b]]\n\t"
00252         "lsr    r7, r7, #16\n\t"
00253         "mul    r6, r7\n\t"
00254         "lsr    r7, r6, #16\n\t"
00255         "lsl    r6, r6, #16\n\t"
00256         "add    r3, r6\n\t"
00257         "adc    r4, r7\n\t"
00258         "adc    r5, %[r]\n\t"
00259         "ldr    r6, [%[a]]\n\t"
00260         "ldr    r7, [%[b]]\n\t"
00261         "lsr    r6, r6, #16\n\t"
00262         "lsr    r7, r7, #16\n\t"
00263         "mul    r7, r6\n\t"
00264         "add    r4, r7\n\t"
00265         "adc    r5, %[r]\n\t"
00266         "ldr    r7, [%[b]]\n\t"
00267         "lsl    r7, r7, #16\n\t"
00268         "lsr    r7, r7, #16\n\t"
00269         "mul    r6, r7\n\t"
00270         "lsr    r7, r6, #16\n\t"
00271         "lsl    r6, r6, #16\n\t"
00272         "add    r3, r6\n\t"
00273         "adc    r4, r7\n\t"
00274         "adc    r5, %[r]\n\t"
00275         "# Multiply Done\n\t"
00276         "add    %[a], #4\n\t"
00277         "sub    %[b], #4\n\t"
00278         "cmp    %[a], r12\n\t"
00279         "beq    3f\n\t"
00280         "mov    r6, r8\n\t"
00281         "add    r6, r9\n\t"
00282         "cmp    %[a], r6\n\t"
00283         "ble    2b\n\t"
00284         "\n3:\n\t"
00285         "mov    %[r], r11\n\t"
00286         "mov    r7, r8\n\t"
00287         "str    r3, [%[r], r7]\n\t"
00288         "mov    r3, r4\n\t"
00289         "mov    r4, r5\n\t"
00290         "add    r7, #4\n\t"
00291         "mov    r8, r7\n\t"
00292         "mov    r6, #56\n\t"
00293         "cmp    r7, r6\n\t"
00294         "ble    1b\n\t"
00295         "str    r3, [%[r], r7]\n\t"
00296         "mov    %[a], r9\n\t"
00297         "mov    %[b], r10\n\t"
00298         :
00299         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
00300         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
00301     );
00302 
00303     XMEMCPY(r, tmp, sizeof(tmp));
00304 }
00305 
00306 /* Square a and put result in r. (r = a * a)
00307  *
00308  * r  A single precision integer.
00309  * a  A single precision integer.
00310  */
00311 SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
00312 {
00313     __asm__ __volatile__ (
00314         "mov    r3, #0\n\t"
00315         "mov    r4, #0\n\t"
00316         "mov    r5, #0\n\t"
00317         "mov    r8, r3\n\t"
00318         "mov    r11, %[r]\n\t"
00319         "mov    r6, #64\n\t"
00320         "neg    r6, r6\n\t"
00321         "add    sp, r6\n\t"
00322         "mov    r10, sp\n\t"
00323         "mov    r9, %[a]\n\t"
00324         "\n1:\n\t"
00325         "mov    %[r], #0\n\t"
00326         "mov    r6, #28\n\t"
00327         "mov    %[a], r8\n\t"
00328         "sub    %[a], r6\n\t"
00329         "sbc    r6, r6\n\t"
00330         "mvn    r6, r6\n\t"
00331         "and    %[a], r6\n\t"
00332         "mov    r2, r8\n\t"
00333         "sub    r2, %[a]\n\t"
00334         "add    %[a], r9\n\t"
00335         "add    r2, r9\n\t"
00336         "\n2:\n\t"
00337         "cmp    r2, %[a]\n\t"
00338         "beq    4f\n\t"
00339         "# Multiply * 2: Start\n\t"
00340         "ldr    r6, [%[a]]\n\t"
00341         "ldr    r7, [r2]\n\t"
00342         "lsl    r6, r6, #16\n\t"
00343         "lsl    r7, r7, #16\n\t"
00344         "lsr    r6, r6, #16\n\t"
00345         "lsr    r7, r7, #16\n\t"
00346         "mul    r7, r6\n\t"
00347         "add    r3, r7\n\t"
00348         "adc    r4, %[r]\n\t"
00349         "adc    r5, %[r]\n\t"
00350         "add    r3, r7\n\t"
00351         "adc    r4, %[r]\n\t"
00352         "adc    r5, %[r]\n\t"
00353         "ldr    r7, [r2]\n\t"
00354         "lsr    r7, r7, #16\n\t"
00355         "mul    r6, r7\n\t"
00356         "lsr    r7, r6, #16\n\t"
00357         "lsl    r6, r6, #16\n\t"
00358         "add    r3, r6\n\t"
00359         "adc    r4, r7\n\t"
00360         "adc    r5, %[r]\n\t"
00361         "add    r3, r6\n\t"
00362         "adc    r4, r7\n\t"
00363         "adc    r5, %[r]\n\t"
00364         "ldr    r6, [%[a]]\n\t"
00365         "ldr    r7, [r2]\n\t"
00366         "lsr    r6, r6, #16\n\t"
00367         "lsr    r7, r7, #16\n\t"
00368         "mul    r7, r6\n\t"
00369         "add    r4, r7\n\t"
00370         "adc    r5, %[r]\n\t"
00371         "add    r4, r7\n\t"
00372         "adc    r5, %[r]\n\t"
00373         "ldr    r7, [r2]\n\t"
00374         "lsl    r7, r7, #16\n\t"
00375         "lsr    r7, r7, #16\n\t"
00376         "mul    r6, r7\n\t"
00377         "lsr    r7, r6, #16\n\t"
00378         "lsl    r6, r6, #16\n\t"
00379         "add    r3, r6\n\t"
00380         "adc    r4, r7\n\t"
00381         "adc    r5, %[r]\n\t"
00382         "add    r3, r6\n\t"
00383         "adc    r4, r7\n\t"
00384         "adc    r5, %[r]\n\t"
00385         "# Multiply * 2: Done\n\t"
00386         "bal    5f\n\t"
00387         "\n4:\n\t"
00388         "# Square: Start\n\t"
00389         "ldr    r6, [%[a]]\n\t"
00390         "lsr    r7, r6, #16\n\t"
00391         "lsl    r6, r6, #16\n\t"
00392         "lsr    r6, r6, #16\n\t"
00393         "mul    r6, r6\n\t"
00394         "add    r3, r6\n\t"
00395         "adc    r4, %[r]\n\t"
00396         "adc    r5, %[r]\n\t"
00397         "mul    r7, r7\n\t"
00398         "add    r4, r7\n\t"
00399         "adc    r5, %[r]\n\t"
00400         "ldr    r6, [%[a]]\n\t"
00401         "lsr    r7, r6, #16\n\t"
00402         "lsl    r6, r6, #16\n\t"
00403         "lsr    r6, r6, #16\n\t"
00404         "mul    r6, r7\n\t"
00405         "lsr    r7, r6, #15\n\t"
00406         "lsl    r6, r6, #17\n\t"
00407         "add    r3, r6\n\t"
00408         "adc    r4, r7\n\t"
00409         "adc    r5, %[r]\n\t"
00410         "# Square: Done\n\t"
00411         "\n5:\n\t"
00412         "add    %[a], #4\n\t"
00413         "sub    r2, #4\n\t"
00414         "mov    r6, #32\n\t"
00415         "add    r6, r9\n\t"
00416         "cmp    %[a], r6\n\t"
00417         "beq    3f\n\t"
00418         "cmp    %[a], r2\n\t"
00419         "bgt    3f\n\t"
00420         "mov    r7, r8\n\t"
00421         "add    r7, r9\n\t"
00422         "cmp    %[a], r7\n\t"
00423         "ble    2b\n\t"
00424         "\n3:\n\t"
00425         "mov    %[r], r10\n\t"
00426         "mov    r7, r8\n\t"
00427         "str    r3, [%[r], r7]\n\t"
00428         "mov    r3, r4\n\t"
00429         "mov    r4, r5\n\t"
00430         "mov    r5, #0\n\t"
00431         "add    r7, #4\n\t"
00432         "mov    r8, r7\n\t"
00433         "mov    r6, #56\n\t"
00434         "cmp    r7, r6\n\t"
00435         "ble    1b\n\t"
00436         "mov    %[a], r9\n\t"
00437         "str    r3, [%[r], r7]\n\t"
00438         "mov    %[r], r11\n\t"
00439         "mov    %[a], r10\n\t"
00440         "mov    r3, #60\n\t"
00441         "\n4:\n\t"
00442         "ldr    r6, [%[a], r3]\n\t"
00443         "str    r6, [%[r], r3]\n\t"
00444         "sub    r3, #4\n\t"
00445         "bge    4b\n\t"
00446         "mov    r6, #64\n\t"
00447         "add    sp, r6\n\t"
00448         :
00449         : [r] "r" (r), [a] "r" (a)
00450         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
00451     );
00452 }
00453 
00454 /* Add b to a into r. (r = a + b)
00455  *
00456  * r  A single precision integer.
00457  * a  A single precision integer.
00458  * b  A single precision integer.
00459  */
00460 SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
00461         const sp_digit* b)
00462 {
00463     sp_digit c = 0;
00464 
00465     __asm__ __volatile__ (
00466         "ldr    r4, [%[a], #0]\n\t"
00467         "ldr    r5, [%[b], #0]\n\t"
00468         "add    r4, r5\n\t"
00469         "str    r4, [%[r], #0]\n\t"
00470         "ldr    r4, [%[a], #4]\n\t"
00471         "ldr    r5, [%[b], #4]\n\t"
00472         "adc    r4, r5\n\t"
00473         "str    r4, [%[r], #4]\n\t"
00474         "ldr    r4, [%[a], #8]\n\t"
00475         "ldr    r5, [%[b], #8]\n\t"
00476         "adc    r4, r5\n\t"
00477         "str    r4, [%[r], #8]\n\t"
00478         "ldr    r4, [%[a], #12]\n\t"
00479         "ldr    r5, [%[b], #12]\n\t"
00480         "adc    r4, r5\n\t"
00481         "str    r4, [%[r], #12]\n\t"
00482         "ldr    r4, [%[a], #16]\n\t"
00483         "ldr    r5, [%[b], #16]\n\t"
00484         "adc    r4, r5\n\t"
00485         "str    r4, [%[r], #16]\n\t"
00486         "ldr    r4, [%[a], #20]\n\t"
00487         "ldr    r5, [%[b], #20]\n\t"
00488         "adc    r4, r5\n\t"
00489         "str    r4, [%[r], #20]\n\t"
00490         "ldr    r4, [%[a], #24]\n\t"
00491         "ldr    r5, [%[b], #24]\n\t"
00492         "adc    r4, r5\n\t"
00493         "str    r4, [%[r], #24]\n\t"
00494         "ldr    r4, [%[a], #28]\n\t"
00495         "ldr    r5, [%[b], #28]\n\t"
00496         "adc    r4, r5\n\t"
00497         "str    r4, [%[r], #28]\n\t"
00498         "mov    %[c], #0\n\t"
00499         "adc    %[c], %[c]\n\t"
00500         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
00501         :
00502         : "memory", "r4", "r5"
00503     );
00504 
00505     return c;
00506 }
00507 
00508 /* Sub b from a into r. (r = a - b)
00509  *
00510  * r  A single precision integer.
00511  * a  A single precision integer.
00512  * b  A single precision integer.
00513  */
00514 SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
00515         const sp_digit* b)
00516 {
00517     sp_digit c = 0;
00518 
00519     __asm__ __volatile__ (
00520         "ldr    r3, [%[a], #0]\n\t"
00521         "ldr    r4, [%[a], #4]\n\t"
00522         "ldr    r5, [%[b], #0]\n\t"
00523         "ldr    r6, [%[b], #4]\n\t"
00524         "sub    r3, r5\n\t"
00525         "sbc    r4, r6\n\t"
00526         "str    r3, [%[a], #0]\n\t"
00527         "str    r4, [%[a], #4]\n\t"
00528         "ldr    r3, [%[a], #8]\n\t"
00529         "ldr    r4, [%[a], #12]\n\t"
00530         "ldr    r5, [%[b], #8]\n\t"
00531         "ldr    r6, [%[b], #12]\n\t"
00532         "sbc    r3, r5\n\t"
00533         "sbc    r4, r6\n\t"
00534         "str    r3, [%[a], #8]\n\t"
00535         "str    r4, [%[a], #12]\n\t"
00536         "ldr    r3, [%[a], #16]\n\t"
00537         "ldr    r4, [%[a], #20]\n\t"
00538         "ldr    r5, [%[b], #16]\n\t"
00539         "ldr    r6, [%[b], #20]\n\t"
00540         "sbc    r3, r5\n\t"
00541         "sbc    r4, r6\n\t"
00542         "str    r3, [%[a], #16]\n\t"
00543         "str    r4, [%[a], #20]\n\t"
00544         "ldr    r3, [%[a], #24]\n\t"
00545         "ldr    r4, [%[a], #28]\n\t"
00546         "ldr    r5, [%[b], #24]\n\t"
00547         "ldr    r6, [%[b], #28]\n\t"
00548         "sbc    r3, r5\n\t"
00549         "sbc    r4, r6\n\t"
00550         "str    r3, [%[a], #24]\n\t"
00551         "str    r4, [%[a], #28]\n\t"
00552         "ldr    r3, [%[a], #32]\n\t"
00553         "ldr    r4, [%[a], #36]\n\t"
00554         "ldr    r5, [%[b], #32]\n\t"
00555         "ldr    r6, [%[b], #36]\n\t"
00556         "sbc    r3, r5\n\t"
00557         "sbc    r4, r6\n\t"
00558         "str    r3, [%[a], #32]\n\t"
00559         "str    r4, [%[a], #36]\n\t"
00560         "ldr    r3, [%[a], #40]\n\t"
00561         "ldr    r4, [%[a], #44]\n\t"
00562         "ldr    r5, [%[b], #40]\n\t"
00563         "ldr    r6, [%[b], #44]\n\t"
00564         "sbc    r3, r5\n\t"
00565         "sbc    r4, r6\n\t"
00566         "str    r3, [%[a], #40]\n\t"
00567         "str    r4, [%[a], #44]\n\t"
00568         "ldr    r3, [%[a], #48]\n\t"
00569         "ldr    r4, [%[a], #52]\n\t"
00570         "ldr    r5, [%[b], #48]\n\t"
00571         "ldr    r6, [%[b], #52]\n\t"
00572         "sbc    r3, r5\n\t"
00573         "sbc    r4, r6\n\t"
00574         "str    r3, [%[a], #48]\n\t"
00575         "str    r4, [%[a], #52]\n\t"
00576         "ldr    r3, [%[a], #56]\n\t"
00577         "ldr    r4, [%[a], #60]\n\t"
00578         "ldr    r5, [%[b], #56]\n\t"
00579         "ldr    r6, [%[b], #60]\n\t"
00580         "sbc    r3, r5\n\t"
00581         "sbc    r4, r6\n\t"
00582         "str    r3, [%[a], #56]\n\t"
00583         "str    r4, [%[a], #60]\n\t"
00584         "sbc    %[c], %[c]\n\t"
00585         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
00586         :
00587         : "memory", "r3", "r4", "r5", "r6"
00588     );
00589 
00590     return c;
00591 }
00592 
00593 /* Add b to a into r. (r = a + b)
00594  *
00595  * r  A single precision integer.
00596  * a  A single precision integer.
00597  * b  A single precision integer.
00598  */
00599 SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
00600         const sp_digit* b)
00601 {
00602     sp_digit c = 0;
00603 
00604     __asm__ __volatile__ (
00605         "ldr    r4, [%[a], #0]\n\t"
00606         "ldr    r5, [%[b], #0]\n\t"
00607         "add    r4, r5\n\t"
00608         "str    r4, [%[r], #0]\n\t"
00609         "ldr    r4, [%[a], #4]\n\t"
00610         "ldr    r5, [%[b], #4]\n\t"
00611         "adc    r4, r5\n\t"
00612         "str    r4, [%[r], #4]\n\t"
00613         "ldr    r4, [%[a], #8]\n\t"
00614         "ldr    r5, [%[b], #8]\n\t"
00615         "adc    r4, r5\n\t"
00616         "str    r4, [%[r], #8]\n\t"
00617         "ldr    r4, [%[a], #12]\n\t"
00618         "ldr    r5, [%[b], #12]\n\t"
00619         "adc    r4, r5\n\t"
00620         "str    r4, [%[r], #12]\n\t"
00621         "ldr    r4, [%[a], #16]\n\t"
00622         "ldr    r5, [%[b], #16]\n\t"
00623         "adc    r4, r5\n\t"
00624         "str    r4, [%[r], #16]\n\t"
00625         "ldr    r4, [%[a], #20]\n\t"
00626         "ldr    r5, [%[b], #20]\n\t"
00627         "adc    r4, r5\n\t"
00628         "str    r4, [%[r], #20]\n\t"
00629         "ldr    r4, [%[a], #24]\n\t"
00630         "ldr    r5, [%[b], #24]\n\t"
00631         "adc    r4, r5\n\t"
00632         "str    r4, [%[r], #24]\n\t"
00633         "ldr    r4, [%[a], #28]\n\t"
00634         "ldr    r5, [%[b], #28]\n\t"
00635         "adc    r4, r5\n\t"
00636         "str    r4, [%[r], #28]\n\t"
00637         "ldr    r4, [%[a], #32]\n\t"
00638         "ldr    r5, [%[b], #32]\n\t"
00639         "adc    r4, r5\n\t"
00640         "str    r4, [%[r], #32]\n\t"
00641         "ldr    r4, [%[a], #36]\n\t"
00642         "ldr    r5, [%[b], #36]\n\t"
00643         "adc    r4, r5\n\t"
00644         "str    r4, [%[r], #36]\n\t"
00645         "ldr    r4, [%[a], #40]\n\t"
00646         "ldr    r5, [%[b], #40]\n\t"
00647         "adc    r4, r5\n\t"
00648         "str    r4, [%[r], #40]\n\t"
00649         "ldr    r4, [%[a], #44]\n\t"
00650         "ldr    r5, [%[b], #44]\n\t"
00651         "adc    r4, r5\n\t"
00652         "str    r4, [%[r], #44]\n\t"
00653         "ldr    r4, [%[a], #48]\n\t"
00654         "ldr    r5, [%[b], #48]\n\t"
00655         "adc    r4, r5\n\t"
00656         "str    r4, [%[r], #48]\n\t"
00657         "ldr    r4, [%[a], #52]\n\t"
00658         "ldr    r5, [%[b], #52]\n\t"
00659         "adc    r4, r5\n\t"
00660         "str    r4, [%[r], #52]\n\t"
00661         "ldr    r4, [%[a], #56]\n\t"
00662         "ldr    r5, [%[b], #56]\n\t"
00663         "adc    r4, r5\n\t"
00664         "str    r4, [%[r], #56]\n\t"
00665         "ldr    r4, [%[a], #60]\n\t"
00666         "ldr    r5, [%[b], #60]\n\t"
00667         "adc    r4, r5\n\t"
00668         "str    r4, [%[r], #60]\n\t"
00669         "mov    %[c], #0\n\t"
00670         "adc    %[c], %[c]\n\t"
00671         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
00672         :
00673         : "memory", "r4", "r5"
00674     );
00675 
00676     return c;
00677 }
00678 
00679 /* AND m into each word of a and store in r.
00680  *
00681  * r  A single precision integer.
00682  * a  A single precision integer.
00683  * m  Mask to AND against each digit.
00684  */
00685 static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
00686 {
00687 #ifdef WOLFSSL_SP_SMALL
00688     int i;
00689 
00690     for (i=0; i<8; i++) {
00691         r[i] = a[i] & m;
00692     }
00693 #else
00694     r[0] = a[0] & m;
00695     r[1] = a[1] & m;
00696     r[2] = a[2] & m;
00697     r[3] = a[3] & m;
00698     r[4] = a[4] & m;
00699     r[5] = a[5] & m;
00700     r[6] = a[6] & m;
00701     r[7] = a[7] & m;
00702 #endif
00703 }
00704 
00705 /* Multiply a and b into r. (r = a * b)
00706  *
00707  * r  A single precision integer.
00708  * a  A single precision integer.
00709  * b  A single precision integer.
00710  */
00711 SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
00712         const sp_digit* b)
00713 {
00714     sp_digit* z0 = r;
00715     sp_digit z1[16];
00716     sp_digit a1[8];
00717     sp_digit b1[8];
00718     sp_digit z2[16];
00719     sp_digit u, ca, cb;
00720 
00721     ca = sp_2048_add_8(a1, a, &a[8]);
00722     cb = sp_2048_add_8(b1, b, &b[8]);
00723     u  = ca & cb;
00724     sp_2048_mul_8(z1, a1, b1);
00725     sp_2048_mul_8(z2, &a[8], &b[8]);
00726     sp_2048_mul_8(z0, a, b);
00727     sp_2048_mask_8(r + 16, a1, 0 - cb);
00728     sp_2048_mask_8(b1, b1, 0 - ca);
00729     u += sp_2048_add_8(r + 16, r + 16, b1);
00730     u += sp_2048_sub_in_place_16(z1, z2);
00731     u += sp_2048_sub_in_place_16(z1, z0);
00732     u += sp_2048_add_16(r + 8, r + 8, z1);
00733     r[24] = u;
00734     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
00735     (void)sp_2048_add_16(r + 16, r + 16, z2);
00736 }
00737 
00738 /* Square a and put result in r. (r = a * a)
00739  *
00740  * r  A single precision integer.
00741  * a  A single precision integer.
00742  */
00743 SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
00744 {
00745     sp_digit* z0 = r;
00746     sp_digit z2[16];
00747     sp_digit z1[16];
00748     sp_digit a1[8];
00749     sp_digit u;
00750 
00751     u = sp_2048_add_8(a1, a, &a[8]);
00752     sp_2048_sqr_8(z1, a1);
00753     sp_2048_sqr_8(z2, &a[8]);
00754     sp_2048_sqr_8(z0, a);
00755     sp_2048_mask_8(r + 16, a1, 0 - u);
00756     u += sp_2048_add_8(r + 16, r + 16, r + 16);
00757     u += sp_2048_sub_in_place_16(z1, z2);
00758     u += sp_2048_sub_in_place_16(z1, z0);
00759     u += sp_2048_add_16(r + 8, r + 8, z1);
00760     r[24] = u;
00761     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
00762     (void)sp_2048_add_16(r + 16, r + 16, z2);
00763 }
00764 
00765 /* Sub b from a into r. (r = a - b)
00766  *
00767  * r  A single precision integer.
00768  * a  A single precision integer.
00769  * b  A single precision integer.
00770  */
00771 SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
00772         const sp_digit* b)
00773 {
00774     sp_digit c = 0;
00775 
00776     __asm__ __volatile__ (
00777         "ldr    r3, [%[a], #0]\n\t"
00778         "ldr    r4, [%[a], #4]\n\t"
00779         "ldr    r5, [%[b], #0]\n\t"
00780         "ldr    r6, [%[b], #4]\n\t"
00781         "sub    r3, r5\n\t"
00782         "sbc    r4, r6\n\t"
00783         "str    r3, [%[a], #0]\n\t"
00784         "str    r4, [%[a], #4]\n\t"
00785         "ldr    r3, [%[a], #8]\n\t"
00786         "ldr    r4, [%[a], #12]\n\t"
00787         "ldr    r5, [%[b], #8]\n\t"
00788         "ldr    r6, [%[b], #12]\n\t"
00789         "sbc    r3, r5\n\t"
00790         "sbc    r4, r6\n\t"
00791         "str    r3, [%[a], #8]\n\t"
00792         "str    r4, [%[a], #12]\n\t"
00793         "ldr    r3, [%[a], #16]\n\t"
00794         "ldr    r4, [%[a], #20]\n\t"
00795         "ldr    r5, [%[b], #16]\n\t"
00796         "ldr    r6, [%[b], #20]\n\t"
00797         "sbc    r3, r5\n\t"
00798         "sbc    r4, r6\n\t"
00799         "str    r3, [%[a], #16]\n\t"
00800         "str    r4, [%[a], #20]\n\t"
00801         "ldr    r3, [%[a], #24]\n\t"
00802         "ldr    r4, [%[a], #28]\n\t"
00803         "ldr    r5, [%[b], #24]\n\t"
00804         "ldr    r6, [%[b], #28]\n\t"
00805         "sbc    r3, r5\n\t"
00806         "sbc    r4, r6\n\t"
00807         "str    r3, [%[a], #24]\n\t"
00808         "str    r4, [%[a], #28]\n\t"
00809         "ldr    r3, [%[a], #32]\n\t"
00810         "ldr    r4, [%[a], #36]\n\t"
00811         "ldr    r5, [%[b], #32]\n\t"
00812         "ldr    r6, [%[b], #36]\n\t"
00813         "sbc    r3, r5\n\t"
00814         "sbc    r4, r6\n\t"
00815         "str    r3, [%[a], #32]\n\t"
00816         "str    r4, [%[a], #36]\n\t"
00817         "ldr    r3, [%[a], #40]\n\t"
00818         "ldr    r4, [%[a], #44]\n\t"
00819         "ldr    r5, [%[b], #40]\n\t"
00820         "ldr    r6, [%[b], #44]\n\t"
00821         "sbc    r3, r5\n\t"
00822         "sbc    r4, r6\n\t"
00823         "str    r3, [%[a], #40]\n\t"
00824         "str    r4, [%[a], #44]\n\t"
00825         "ldr    r3, [%[a], #48]\n\t"
00826         "ldr    r4, [%[a], #52]\n\t"
00827         "ldr    r5, [%[b], #48]\n\t"
00828         "ldr    r6, [%[b], #52]\n\t"
00829         "sbc    r3, r5\n\t"
00830         "sbc    r4, r6\n\t"
00831         "str    r3, [%[a], #48]\n\t"
00832         "str    r4, [%[a], #52]\n\t"
00833         "ldr    r3, [%[a], #56]\n\t"
00834         "ldr    r4, [%[a], #60]\n\t"
00835         "ldr    r5, [%[b], #56]\n\t"
00836         "ldr    r6, [%[b], #60]\n\t"
00837         "sbc    r3, r5\n\t"
00838         "sbc    r4, r6\n\t"
00839         "str    r3, [%[a], #56]\n\t"
00840         "str    r4, [%[a], #60]\n\t"
00841         "ldr    r3, [%[a], #64]\n\t"
00842         "ldr    r4, [%[a], #68]\n\t"
00843         "ldr    r5, [%[b], #64]\n\t"
00844         "ldr    r6, [%[b], #68]\n\t"
00845         "sbc    r3, r5\n\t"
00846         "sbc    r4, r6\n\t"
00847         "str    r3, [%[a], #64]\n\t"
00848         "str    r4, [%[a], #68]\n\t"
00849         "ldr    r3, [%[a], #72]\n\t"
00850         "ldr    r4, [%[a], #76]\n\t"
00851         "ldr    r5, [%[b], #72]\n\t"
00852         "ldr    r6, [%[b], #76]\n\t"
00853         "sbc    r3, r5\n\t"
00854         "sbc    r4, r6\n\t"
00855         "str    r3, [%[a], #72]\n\t"
00856         "str    r4, [%[a], #76]\n\t"
00857         "ldr    r3, [%[a], #80]\n\t"
00858         "ldr    r4, [%[a], #84]\n\t"
00859         "ldr    r5, [%[b], #80]\n\t"
00860         "ldr    r6, [%[b], #84]\n\t"
00861         "sbc    r3, r5\n\t"
00862         "sbc    r4, r6\n\t"
00863         "str    r3, [%[a], #80]\n\t"
00864         "str    r4, [%[a], #84]\n\t"
00865         "ldr    r3, [%[a], #88]\n\t"
00866         "ldr    r4, [%[a], #92]\n\t"
00867         "ldr    r5, [%[b], #88]\n\t"
00868         "ldr    r6, [%[b], #92]\n\t"
00869         "sbc    r3, r5\n\t"
00870         "sbc    r4, r6\n\t"
00871         "str    r3, [%[a], #88]\n\t"
00872         "str    r4, [%[a], #92]\n\t"
00873         "ldr    r3, [%[a], #96]\n\t"
00874         "ldr    r4, [%[a], #100]\n\t"
00875         "ldr    r5, [%[b], #96]\n\t"
00876         "ldr    r6, [%[b], #100]\n\t"
00877         "sbc    r3, r5\n\t"
00878         "sbc    r4, r6\n\t"
00879         "str    r3, [%[a], #96]\n\t"
00880         "str    r4, [%[a], #100]\n\t"
00881         "ldr    r3, [%[a], #104]\n\t"
00882         "ldr    r4, [%[a], #108]\n\t"
00883         "ldr    r5, [%[b], #104]\n\t"
00884         "ldr    r6, [%[b], #108]\n\t"
00885         "sbc    r3, r5\n\t"
00886         "sbc    r4, r6\n\t"
00887         "str    r3, [%[a], #104]\n\t"
00888         "str    r4, [%[a], #108]\n\t"
00889         "ldr    r3, [%[a], #112]\n\t"
00890         "ldr    r4, [%[a], #116]\n\t"
00891         "ldr    r5, [%[b], #112]\n\t"
00892         "ldr    r6, [%[b], #116]\n\t"
00893         "sbc    r3, r5\n\t"
00894         "sbc    r4, r6\n\t"
00895         "str    r3, [%[a], #112]\n\t"
00896         "str    r4, [%[a], #116]\n\t"
00897         "ldr    r3, [%[a], #120]\n\t"
00898         "ldr    r4, [%[a], #124]\n\t"
00899         "ldr    r5, [%[b], #120]\n\t"
00900         "ldr    r6, [%[b], #124]\n\t"
00901         "sbc    r3, r5\n\t"
00902         "sbc    r4, r6\n\t"
00903         "str    r3, [%[a], #120]\n\t"
00904         "str    r4, [%[a], #124]\n\t"
00905         "sbc    %[c], %[c]\n\t"
00906         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
00907         :
00908         : "memory", "r3", "r4", "r5", "r6"
00909     );
00910 
00911     return c;
00912 }
00913 
00914 /* Add b to a into r. (r = a + b)
00915  *
00916  * r  A single precision integer.
00917  * a  A single precision integer.
00918  * b  A single precision integer.
00919  */
00920 SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
00921         const sp_digit* b)
00922 {
00923     sp_digit c = 0;
00924 
00925     __asm__ __volatile__ (
00926         "ldr    r4, [%[a], #0]\n\t"
00927         "ldr    r5, [%[b], #0]\n\t"
00928         "add    r4, r5\n\t"
00929         "str    r4, [%[r], #0]\n\t"
00930         "ldr    r4, [%[a], #4]\n\t"
00931         "ldr    r5, [%[b], #4]\n\t"
00932         "adc    r4, r5\n\t"
00933         "str    r4, [%[r], #4]\n\t"
00934         "ldr    r4, [%[a], #8]\n\t"
00935         "ldr    r5, [%[b], #8]\n\t"
00936         "adc    r4, r5\n\t"
00937         "str    r4, [%[r], #8]\n\t"
00938         "ldr    r4, [%[a], #12]\n\t"
00939         "ldr    r5, [%[b], #12]\n\t"
00940         "adc    r4, r5\n\t"
00941         "str    r4, [%[r], #12]\n\t"
00942         "ldr    r4, [%[a], #16]\n\t"
00943         "ldr    r5, [%[b], #16]\n\t"
00944         "adc    r4, r5\n\t"
00945         "str    r4, [%[r], #16]\n\t"
00946         "ldr    r4, [%[a], #20]\n\t"
00947         "ldr    r5, [%[b], #20]\n\t"
00948         "adc    r4, r5\n\t"
00949         "str    r4, [%[r], #20]\n\t"
00950         "ldr    r4, [%[a], #24]\n\t"
00951         "ldr    r5, [%[b], #24]\n\t"
00952         "adc    r4, r5\n\t"
00953         "str    r4, [%[r], #24]\n\t"
00954         "ldr    r4, [%[a], #28]\n\t"
00955         "ldr    r5, [%[b], #28]\n\t"
00956         "adc    r4, r5\n\t"
00957         "str    r4, [%[r], #28]\n\t"
00958         "ldr    r4, [%[a], #32]\n\t"
00959         "ldr    r5, [%[b], #32]\n\t"
00960         "adc    r4, r5\n\t"
00961         "str    r4, [%[r], #32]\n\t"
00962         "ldr    r4, [%[a], #36]\n\t"
00963         "ldr    r5, [%[b], #36]\n\t"
00964         "adc    r4, r5\n\t"
00965         "str    r4, [%[r], #36]\n\t"
00966         "ldr    r4, [%[a], #40]\n\t"
00967         "ldr    r5, [%[b], #40]\n\t"
00968         "adc    r4, r5\n\t"
00969         "str    r4, [%[r], #40]\n\t"
00970         "ldr    r4, [%[a], #44]\n\t"
00971         "ldr    r5, [%[b], #44]\n\t"
00972         "adc    r4, r5\n\t"
00973         "str    r4, [%[r], #44]\n\t"
00974         "ldr    r4, [%[a], #48]\n\t"
00975         "ldr    r5, [%[b], #48]\n\t"
00976         "adc    r4, r5\n\t"
00977         "str    r4, [%[r], #48]\n\t"
00978         "ldr    r4, [%[a], #52]\n\t"
00979         "ldr    r5, [%[b], #52]\n\t"
00980         "adc    r4, r5\n\t"
00981         "str    r4, [%[r], #52]\n\t"
00982         "ldr    r4, [%[a], #56]\n\t"
00983         "ldr    r5, [%[b], #56]\n\t"
00984         "adc    r4, r5\n\t"
00985         "str    r4, [%[r], #56]\n\t"
00986         "ldr    r4, [%[a], #60]\n\t"
00987         "ldr    r5, [%[b], #60]\n\t"
00988         "adc    r4, r5\n\t"
00989         "str    r4, [%[r], #60]\n\t"
00990         "ldr    r4, [%[a], #64]\n\t"
00991         "ldr    r5, [%[b], #64]\n\t"
00992         "adc    r4, r5\n\t"
00993         "str    r4, [%[r], #64]\n\t"
00994         "ldr    r4, [%[a], #68]\n\t"
00995         "ldr    r5, [%[b], #68]\n\t"
00996         "adc    r4, r5\n\t"
00997         "str    r4, [%[r], #68]\n\t"
00998         "ldr    r4, [%[a], #72]\n\t"
00999         "ldr    r5, [%[b], #72]\n\t"
01000         "adc    r4, r5\n\t"
01001         "str    r4, [%[r], #72]\n\t"
01002         "ldr    r4, [%[a], #76]\n\t"
01003         "ldr    r5, [%[b], #76]\n\t"
01004         "adc    r4, r5\n\t"
01005         "str    r4, [%[r], #76]\n\t"
01006         "ldr    r4, [%[a], #80]\n\t"
01007         "ldr    r5, [%[b], #80]\n\t"
01008         "adc    r4, r5\n\t"
01009         "str    r4, [%[r], #80]\n\t"
01010         "ldr    r4, [%[a], #84]\n\t"
01011         "ldr    r5, [%[b], #84]\n\t"
01012         "adc    r4, r5\n\t"
01013         "str    r4, [%[r], #84]\n\t"
01014         "ldr    r4, [%[a], #88]\n\t"
01015         "ldr    r5, [%[b], #88]\n\t"
01016         "adc    r4, r5\n\t"
01017         "str    r4, [%[r], #88]\n\t"
01018         "ldr    r4, [%[a], #92]\n\t"
01019         "ldr    r5, [%[b], #92]\n\t"
01020         "adc    r4, r5\n\t"
01021         "str    r4, [%[r], #92]\n\t"
01022         "ldr    r4, [%[a], #96]\n\t"
01023         "ldr    r5, [%[b], #96]\n\t"
01024         "adc    r4, r5\n\t"
01025         "str    r4, [%[r], #96]\n\t"
01026         "ldr    r4, [%[a], #100]\n\t"
01027         "ldr    r5, [%[b], #100]\n\t"
01028         "adc    r4, r5\n\t"
01029         "str    r4, [%[r], #100]\n\t"
01030         "ldr    r4, [%[a], #104]\n\t"
01031         "ldr    r5, [%[b], #104]\n\t"
01032         "adc    r4, r5\n\t"
01033         "str    r4, [%[r], #104]\n\t"
01034         "ldr    r4, [%[a], #108]\n\t"
01035         "ldr    r5, [%[b], #108]\n\t"
01036         "adc    r4, r5\n\t"
01037         "str    r4, [%[r], #108]\n\t"
01038         "ldr    r4, [%[a], #112]\n\t"
01039         "ldr    r5, [%[b], #112]\n\t"
01040         "adc    r4, r5\n\t"
01041         "str    r4, [%[r], #112]\n\t"
01042         "ldr    r4, [%[a], #116]\n\t"
01043         "ldr    r5, [%[b], #116]\n\t"
01044         "adc    r4, r5\n\t"
01045         "str    r4, [%[r], #116]\n\t"
01046         "ldr    r4, [%[a], #120]\n\t"
01047         "ldr    r5, [%[b], #120]\n\t"
01048         "adc    r4, r5\n\t"
01049         "str    r4, [%[r], #120]\n\t"
01050         "ldr    r4, [%[a], #124]\n\t"
01051         "ldr    r5, [%[b], #124]\n\t"
01052         "adc    r4, r5\n\t"
01053         "str    r4, [%[r], #124]\n\t"
01054         "mov    %[c], #0\n\t"
01055         "adc    %[c], %[c]\n\t"
01056         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
01057         :
01058         : "memory", "r4", "r5"
01059     );
01060 
01061     return c;
01062 }
01063 
01064 /* AND m into each word of a and store in r.
01065  *
01066  * r  A single precision integer.
01067  * a  A single precision integer.
01068  * m  Mask to AND against each digit.
01069  */
01070 static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
01071 {
01072 #ifdef WOLFSSL_SP_SMALL
01073     int i;
01074 
01075     for (i=0; i<16; i++) {
01076         r[i] = a[i] & m;
01077     }
01078 #else
01079     int i;
01080 
01081     for (i = 0; i < 16; i += 8) {
01082         r[i+0] = a[i+0] & m;
01083         r[i+1] = a[i+1] & m;
01084         r[i+2] = a[i+2] & m;
01085         r[i+3] = a[i+3] & m;
01086         r[i+4] = a[i+4] & m;
01087         r[i+5] = a[i+5] & m;
01088         r[i+6] = a[i+6] & m;
01089         r[i+7] = a[i+7] & m;
01090     }
01091 #endif
01092 }
01093 
01094 /* Multiply a and b into r. (r = a * b)
01095  *
01096  * r  A single precision integer.
01097  * a  A single precision integer.
01098  * b  A single precision integer.
01099  */
01100 SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
01101         const sp_digit* b)
01102 {
01103     sp_digit* z0 = r;
01104     sp_digit z1[32];
01105     sp_digit a1[16];
01106     sp_digit b1[16];
01107     sp_digit z2[32];
01108     sp_digit u, ca, cb;
01109 
01110     ca = sp_2048_add_16(a1, a, &a[16]);
01111     cb = sp_2048_add_16(b1, b, &b[16]);
01112     u  = ca & cb;
01113     sp_2048_mul_16(z1, a1, b1);
01114     sp_2048_mul_16(z2, &a[16], &b[16]);
01115     sp_2048_mul_16(z0, a, b);
01116     sp_2048_mask_16(r + 32, a1, 0 - cb);
01117     sp_2048_mask_16(b1, b1, 0 - ca);
01118     u += sp_2048_add_16(r + 32, r + 32, b1);
01119     u += sp_2048_sub_in_place_32(z1, z2);
01120     u += sp_2048_sub_in_place_32(z1, z0);
01121     u += sp_2048_add_32(r + 16, r + 16, z1);
01122     r[48] = u;
01123     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
01124     (void)sp_2048_add_32(r + 32, r + 32, z2);
01125 }
01126 
01127 /* Square a and put result in r. (r = a * a)
01128  *
01129  * r  A single precision integer.
01130  * a  A single precision integer.
01131  */
01132 SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
01133 {
01134     sp_digit* z0 = r;
01135     sp_digit z2[32];
01136     sp_digit z1[32];
01137     sp_digit a1[16];
01138     sp_digit u;
01139 
01140     u = sp_2048_add_16(a1, a, &a[16]);
01141     sp_2048_sqr_16(z1, a1);
01142     sp_2048_sqr_16(z2, &a[16]);
01143     sp_2048_sqr_16(z0, a);
01144     sp_2048_mask_16(r + 32, a1, 0 - u);
01145     u += sp_2048_add_16(r + 32, r + 32, r + 32);
01146     u += sp_2048_sub_in_place_32(z1, z2);
01147     u += sp_2048_sub_in_place_32(z1, z0);
01148     u += sp_2048_add_32(r + 16, r + 16, z1);
01149     r[48] = u;
01150     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
01151     (void)sp_2048_add_32(r + 32, r + 32, z2);
01152 }
01153 
01154 /* Sub b from a into r. (r = a - b)
01155  *
01156  * r  A single precision integer.
01157  * a  A single precision integer.
01158  * b  A single precision integer.
01159  */
01160 SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
01161         const sp_digit* b)
01162 {
01163     sp_digit c = 0;
01164 
01165     __asm__ __volatile__ (
01166         "ldr    r3, [%[a], #0]\n\t"
01167         "ldr    r4, [%[a], #4]\n\t"
01168         "ldr    r5, [%[b], #0]\n\t"
01169         "ldr    r6, [%[b], #4]\n\t"
01170         "sub    r3, r5\n\t"
01171         "sbc    r4, r6\n\t"
01172         "str    r3, [%[a], #0]\n\t"
01173         "str    r4, [%[a], #4]\n\t"
01174         "ldr    r3, [%[a], #8]\n\t"
01175         "ldr    r4, [%[a], #12]\n\t"
01176         "ldr    r5, [%[b], #8]\n\t"
01177         "ldr    r6, [%[b], #12]\n\t"
01178         "sbc    r3, r5\n\t"
01179         "sbc    r4, r6\n\t"
01180         "str    r3, [%[a], #8]\n\t"
01181         "str    r4, [%[a], #12]\n\t"
01182         "ldr    r3, [%[a], #16]\n\t"
01183         "ldr    r4, [%[a], #20]\n\t"
01184         "ldr    r5, [%[b], #16]\n\t"
01185         "ldr    r6, [%[b], #20]\n\t"
01186         "sbc    r3, r5\n\t"
01187         "sbc    r4, r6\n\t"
01188         "str    r3, [%[a], #16]\n\t"
01189         "str    r4, [%[a], #20]\n\t"
01190         "ldr    r3, [%[a], #24]\n\t"
01191         "ldr    r4, [%[a], #28]\n\t"
01192         "ldr    r5, [%[b], #24]\n\t"
01193         "ldr    r6, [%[b], #28]\n\t"
01194         "sbc    r3, r5\n\t"
01195         "sbc    r4, r6\n\t"
01196         "str    r3, [%[a], #24]\n\t"
01197         "str    r4, [%[a], #28]\n\t"
01198         "ldr    r3, [%[a], #32]\n\t"
01199         "ldr    r4, [%[a], #36]\n\t"
01200         "ldr    r5, [%[b], #32]\n\t"
01201         "ldr    r6, [%[b], #36]\n\t"
01202         "sbc    r3, r5\n\t"
01203         "sbc    r4, r6\n\t"
01204         "str    r3, [%[a], #32]\n\t"
01205         "str    r4, [%[a], #36]\n\t"
01206         "ldr    r3, [%[a], #40]\n\t"
01207         "ldr    r4, [%[a], #44]\n\t"
01208         "ldr    r5, [%[b], #40]\n\t"
01209         "ldr    r6, [%[b], #44]\n\t"
01210         "sbc    r3, r5\n\t"
01211         "sbc    r4, r6\n\t"
01212         "str    r3, [%[a], #40]\n\t"
01213         "str    r4, [%[a], #44]\n\t"
01214         "ldr    r3, [%[a], #48]\n\t"
01215         "ldr    r4, [%[a], #52]\n\t"
01216         "ldr    r5, [%[b], #48]\n\t"
01217         "ldr    r6, [%[b], #52]\n\t"
01218         "sbc    r3, r5\n\t"
01219         "sbc    r4, r6\n\t"
01220         "str    r3, [%[a], #48]\n\t"
01221         "str    r4, [%[a], #52]\n\t"
01222         "ldr    r3, [%[a], #56]\n\t"
01223         "ldr    r4, [%[a], #60]\n\t"
01224         "ldr    r5, [%[b], #56]\n\t"
01225         "ldr    r6, [%[b], #60]\n\t"
01226         "sbc    r3, r5\n\t"
01227         "sbc    r4, r6\n\t"
01228         "str    r3, [%[a], #56]\n\t"
01229         "str    r4, [%[a], #60]\n\t"
01230         "ldr    r3, [%[a], #64]\n\t"
01231         "ldr    r4, [%[a], #68]\n\t"
01232         "ldr    r5, [%[b], #64]\n\t"
01233         "ldr    r6, [%[b], #68]\n\t"
01234         "sbc    r3, r5\n\t"
01235         "sbc    r4, r6\n\t"
01236         "str    r3, [%[a], #64]\n\t"
01237         "str    r4, [%[a], #68]\n\t"
01238         "ldr    r3, [%[a], #72]\n\t"
01239         "ldr    r4, [%[a], #76]\n\t"
01240         "ldr    r5, [%[b], #72]\n\t"
01241         "ldr    r6, [%[b], #76]\n\t"
01242         "sbc    r3, r5\n\t"
01243         "sbc    r4, r6\n\t"
01244         "str    r3, [%[a], #72]\n\t"
01245         "str    r4, [%[a], #76]\n\t"
01246         "ldr    r3, [%[a], #80]\n\t"
01247         "ldr    r4, [%[a], #84]\n\t"
01248         "ldr    r5, [%[b], #80]\n\t"
01249         "ldr    r6, [%[b], #84]\n\t"
01250         "sbc    r3, r5\n\t"
01251         "sbc    r4, r6\n\t"
01252         "str    r3, [%[a], #80]\n\t"
01253         "str    r4, [%[a], #84]\n\t"
01254         "ldr    r3, [%[a], #88]\n\t"
01255         "ldr    r4, [%[a], #92]\n\t"
01256         "ldr    r5, [%[b], #88]\n\t"
01257         "ldr    r6, [%[b], #92]\n\t"
01258         "sbc    r3, r5\n\t"
01259         "sbc    r4, r6\n\t"
01260         "str    r3, [%[a], #88]\n\t"
01261         "str    r4, [%[a], #92]\n\t"
01262         "ldr    r3, [%[a], #96]\n\t"
01263         "ldr    r4, [%[a], #100]\n\t"
01264         "ldr    r5, [%[b], #96]\n\t"
01265         "ldr    r6, [%[b], #100]\n\t"
01266         "sbc    r3, r5\n\t"
01267         "sbc    r4, r6\n\t"
01268         "str    r3, [%[a], #96]\n\t"
01269         "str    r4, [%[a], #100]\n\t"
01270         "ldr    r3, [%[a], #104]\n\t"
01271         "ldr    r4, [%[a], #108]\n\t"
01272         "ldr    r5, [%[b], #104]\n\t"
01273         "ldr    r6, [%[b], #108]\n\t"
01274         "sbc    r3, r5\n\t"
01275         "sbc    r4, r6\n\t"
01276         "str    r3, [%[a], #104]\n\t"
01277         "str    r4, [%[a], #108]\n\t"
01278         "ldr    r3, [%[a], #112]\n\t"
01279         "ldr    r4, [%[a], #116]\n\t"
01280         "ldr    r5, [%[b], #112]\n\t"
01281         "ldr    r6, [%[b], #116]\n\t"
01282         "sbc    r3, r5\n\t"
01283         "sbc    r4, r6\n\t"
01284         "str    r3, [%[a], #112]\n\t"
01285         "str    r4, [%[a], #116]\n\t"
01286         "ldr    r3, [%[a], #120]\n\t"
01287         "ldr    r4, [%[a], #124]\n\t"
01288         "ldr    r5, [%[b], #120]\n\t"
01289         "ldr    r6, [%[b], #124]\n\t"
01290         "sbc    r3, r5\n\t"
01291         "sbc    r4, r6\n\t"
01292         "str    r3, [%[a], #120]\n\t"
01293         "str    r4, [%[a], #124]\n\t"
01294         "sbc    %[c], %[c]\n\t"
01295         "add    %[a], #0x80\n\t"
01296         "add    %[b], #0x80\n\t"
01297         "mov    r5, #0\n\t"
01298         "sub    r5, %[c]\n\t"
01299         "ldr    r3, [%[a], #0]\n\t"
01300         "ldr    r4, [%[a], #4]\n\t"
01301         "ldr    r5, [%[b], #0]\n\t"
01302         "ldr    r6, [%[b], #4]\n\t"
01303         "sbc    r3, r5\n\t"
01304         "sbc    r4, r6\n\t"
01305         "str    r3, [%[a], #0]\n\t"
01306         "str    r4, [%[a], #4]\n\t"
01307         "ldr    r3, [%[a], #8]\n\t"
01308         "ldr    r4, [%[a], #12]\n\t"
01309         "ldr    r5, [%[b], #8]\n\t"
01310         "ldr    r6, [%[b], #12]\n\t"
01311         "sbc    r3, r5\n\t"
01312         "sbc    r4, r6\n\t"
01313         "str    r3, [%[a], #8]\n\t"
01314         "str    r4, [%[a], #12]\n\t"
01315         "ldr    r3, [%[a], #16]\n\t"
01316         "ldr    r4, [%[a], #20]\n\t"
01317         "ldr    r5, [%[b], #16]\n\t"
01318         "ldr    r6, [%[b], #20]\n\t"
01319         "sbc    r3, r5\n\t"
01320         "sbc    r4, r6\n\t"
01321         "str    r3, [%[a], #16]\n\t"
01322         "str    r4, [%[a], #20]\n\t"
01323         "ldr    r3, [%[a], #24]\n\t"
01324         "ldr    r4, [%[a], #28]\n\t"
01325         "ldr    r5, [%[b], #24]\n\t"
01326         "ldr    r6, [%[b], #28]\n\t"
01327         "sbc    r3, r5\n\t"
01328         "sbc    r4, r6\n\t"
01329         "str    r3, [%[a], #24]\n\t"
01330         "str    r4, [%[a], #28]\n\t"
01331         "ldr    r3, [%[a], #32]\n\t"
01332         "ldr    r4, [%[a], #36]\n\t"
01333         "ldr    r5, [%[b], #32]\n\t"
01334         "ldr    r6, [%[b], #36]\n\t"
01335         "sbc    r3, r5\n\t"
01336         "sbc    r4, r6\n\t"
01337         "str    r3, [%[a], #32]\n\t"
01338         "str    r4, [%[a], #36]\n\t"
01339         "ldr    r3, [%[a], #40]\n\t"
01340         "ldr    r4, [%[a], #44]\n\t"
01341         "ldr    r5, [%[b], #40]\n\t"
01342         "ldr    r6, [%[b], #44]\n\t"
01343         "sbc    r3, r5\n\t"
01344         "sbc    r4, r6\n\t"
01345         "str    r3, [%[a], #40]\n\t"
01346         "str    r4, [%[a], #44]\n\t"
01347         "ldr    r3, [%[a], #48]\n\t"
01348         "ldr    r4, [%[a], #52]\n\t"
01349         "ldr    r5, [%[b], #48]\n\t"
01350         "ldr    r6, [%[b], #52]\n\t"
01351         "sbc    r3, r5\n\t"
01352         "sbc    r4, r6\n\t"
01353         "str    r3, [%[a], #48]\n\t"
01354         "str    r4, [%[a], #52]\n\t"
01355         "ldr    r3, [%[a], #56]\n\t"
01356         "ldr    r4, [%[a], #60]\n\t"
01357         "ldr    r5, [%[b], #56]\n\t"
01358         "ldr    r6, [%[b], #60]\n\t"
01359         "sbc    r3, r5\n\t"
01360         "sbc    r4, r6\n\t"
01361         "str    r3, [%[a], #56]\n\t"
01362         "str    r4, [%[a], #60]\n\t"
01363         "ldr    r3, [%[a], #64]\n\t"
01364         "ldr    r4, [%[a], #68]\n\t"
01365         "ldr    r5, [%[b], #64]\n\t"
01366         "ldr    r6, [%[b], #68]\n\t"
01367         "sbc    r3, r5\n\t"
01368         "sbc    r4, r6\n\t"
01369         "str    r3, [%[a], #64]\n\t"
01370         "str    r4, [%[a], #68]\n\t"
01371         "ldr    r3, [%[a], #72]\n\t"
01372         "ldr    r4, [%[a], #76]\n\t"
01373         "ldr    r5, [%[b], #72]\n\t"
01374         "ldr    r6, [%[b], #76]\n\t"
01375         "sbc    r3, r5\n\t"
01376         "sbc    r4, r6\n\t"
01377         "str    r3, [%[a], #72]\n\t"
01378         "str    r4, [%[a], #76]\n\t"
01379         "ldr    r3, [%[a], #80]\n\t"
01380         "ldr    r4, [%[a], #84]\n\t"
01381         "ldr    r5, [%[b], #80]\n\t"
01382         "ldr    r6, [%[b], #84]\n\t"
01383         "sbc    r3, r5\n\t"
01384         "sbc    r4, r6\n\t"
01385         "str    r3, [%[a], #80]\n\t"
01386         "str    r4, [%[a], #84]\n\t"
01387         "ldr    r3, [%[a], #88]\n\t"
01388         "ldr    r4, [%[a], #92]\n\t"
01389         "ldr    r5, [%[b], #88]\n\t"
01390         "ldr    r6, [%[b], #92]\n\t"
01391         "sbc    r3, r5\n\t"
01392         "sbc    r4, r6\n\t"
01393         "str    r3, [%[a], #88]\n\t"
01394         "str    r4, [%[a], #92]\n\t"
01395         "ldr    r3, [%[a], #96]\n\t"
01396         "ldr    r4, [%[a], #100]\n\t"
01397         "ldr    r5, [%[b], #96]\n\t"
01398         "ldr    r6, [%[b], #100]\n\t"
01399         "sbc    r3, r5\n\t"
01400         "sbc    r4, r6\n\t"
01401         "str    r3, [%[a], #96]\n\t"
01402         "str    r4, [%[a], #100]\n\t"
01403         "ldr    r3, [%[a], #104]\n\t"
01404         "ldr    r4, [%[a], #108]\n\t"
01405         "ldr    r5, [%[b], #104]\n\t"
01406         "ldr    r6, [%[b], #108]\n\t"
01407         "sbc    r3, r5\n\t"
01408         "sbc    r4, r6\n\t"
01409         "str    r3, [%[a], #104]\n\t"
01410         "str    r4, [%[a], #108]\n\t"
01411         "ldr    r3, [%[a], #112]\n\t"
01412         "ldr    r4, [%[a], #116]\n\t"
01413         "ldr    r5, [%[b], #112]\n\t"
01414         "ldr    r6, [%[b], #116]\n\t"
01415         "sbc    r3, r5\n\t"
01416         "sbc    r4, r6\n\t"
01417         "str    r3, [%[a], #112]\n\t"
01418         "str    r4, [%[a], #116]\n\t"
01419         "ldr    r3, [%[a], #120]\n\t"
01420         "ldr    r4, [%[a], #124]\n\t"
01421         "ldr    r5, [%[b], #120]\n\t"
01422         "ldr    r6, [%[b], #124]\n\t"
01423         "sbc    r3, r5\n\t"
01424         "sbc    r4, r6\n\t"
01425         "str    r3, [%[a], #120]\n\t"
01426         "str    r4, [%[a], #124]\n\t"
01427         "sbc    %[c], %[c]\n\t"
01428         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
01429         :
01430         : "memory", "r3", "r4", "r5", "r6"
01431     );
01432 
01433     return c;
01434 }
01435 
01436 /* Add b to a into r. (r = a + b)
01437  *
01438  * r  A single precision integer.
01439  * a  A single precision integer.
01440  * b  A single precision integer.
01441  */
01442 SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
01443         const sp_digit* b)
01444 {
01445     sp_digit c = 0;
01446 
01447     __asm__ __volatile__ (
01448         "mov    r7, #0\n\t"
01449         "mvn    r7, r7\n\t"
01450         "ldr    r4, [%[a], #0]\n\t"
01451         "ldr    r5, [%[b], #0]\n\t"
01452         "add    r4, r5\n\t"
01453         "str    r4, [%[r], #0]\n\t"
01454         "ldr    r4, [%[a], #4]\n\t"
01455         "ldr    r5, [%[b], #4]\n\t"
01456         "adc    r4, r5\n\t"
01457         "str    r4, [%[r], #4]\n\t"
01458         "ldr    r4, [%[a], #8]\n\t"
01459         "ldr    r5, [%[b], #8]\n\t"
01460         "adc    r4, r5\n\t"
01461         "str    r4, [%[r], #8]\n\t"
01462         "ldr    r4, [%[a], #12]\n\t"
01463         "ldr    r5, [%[b], #12]\n\t"
01464         "adc    r4, r5\n\t"
01465         "str    r4, [%[r], #12]\n\t"
01466         "ldr    r4, [%[a], #16]\n\t"
01467         "ldr    r5, [%[b], #16]\n\t"
01468         "adc    r4, r5\n\t"
01469         "str    r4, [%[r], #16]\n\t"
01470         "ldr    r4, [%[a], #20]\n\t"
01471         "ldr    r5, [%[b], #20]\n\t"
01472         "adc    r4, r5\n\t"
01473         "str    r4, [%[r], #20]\n\t"
01474         "ldr    r4, [%[a], #24]\n\t"
01475         "ldr    r5, [%[b], #24]\n\t"
01476         "adc    r4, r5\n\t"
01477         "str    r4, [%[r], #24]\n\t"
01478         "ldr    r4, [%[a], #28]\n\t"
01479         "ldr    r5, [%[b], #28]\n\t"
01480         "adc    r4, r5\n\t"
01481         "str    r4, [%[r], #28]\n\t"
01482         "ldr    r4, [%[a], #32]\n\t"
01483         "ldr    r5, [%[b], #32]\n\t"
01484         "adc    r4, r5\n\t"
01485         "str    r4, [%[r], #32]\n\t"
01486         "ldr    r4, [%[a], #36]\n\t"
01487         "ldr    r5, [%[b], #36]\n\t"
01488         "adc    r4, r5\n\t"
01489         "str    r4, [%[r], #36]\n\t"
01490         "ldr    r4, [%[a], #40]\n\t"
01491         "ldr    r5, [%[b], #40]\n\t"
01492         "adc    r4, r5\n\t"
01493         "str    r4, [%[r], #40]\n\t"
01494         "ldr    r4, [%[a], #44]\n\t"
01495         "ldr    r5, [%[b], #44]\n\t"
01496         "adc    r4, r5\n\t"
01497         "str    r4, [%[r], #44]\n\t"
01498         "ldr    r4, [%[a], #48]\n\t"
01499         "ldr    r5, [%[b], #48]\n\t"
01500         "adc    r4, r5\n\t"
01501         "str    r4, [%[r], #48]\n\t"
01502         "ldr    r4, [%[a], #52]\n\t"
01503         "ldr    r5, [%[b], #52]\n\t"
01504         "adc    r4, r5\n\t"
01505         "str    r4, [%[r], #52]\n\t"
01506         "ldr    r4, [%[a], #56]\n\t"
01507         "ldr    r5, [%[b], #56]\n\t"
01508         "adc    r4, r5\n\t"
01509         "str    r4, [%[r], #56]\n\t"
01510         "ldr    r4, [%[a], #60]\n\t"
01511         "ldr    r5, [%[b], #60]\n\t"
01512         "adc    r4, r5\n\t"
01513         "str    r4, [%[r], #60]\n\t"
01514         "ldr    r4, [%[a], #64]\n\t"
01515         "ldr    r5, [%[b], #64]\n\t"
01516         "adc    r4, r5\n\t"
01517         "str    r4, [%[r], #64]\n\t"
01518         "ldr    r4, [%[a], #68]\n\t"
01519         "ldr    r5, [%[b], #68]\n\t"
01520         "adc    r4, r5\n\t"
01521         "str    r4, [%[r], #68]\n\t"
01522         "ldr    r4, [%[a], #72]\n\t"
01523         "ldr    r5, [%[b], #72]\n\t"
01524         "adc    r4, r5\n\t"
01525         "str    r4, [%[r], #72]\n\t"
01526         "ldr    r4, [%[a], #76]\n\t"
01527         "ldr    r5, [%[b], #76]\n\t"
01528         "adc    r4, r5\n\t"
01529         "str    r4, [%[r], #76]\n\t"
01530         "ldr    r4, [%[a], #80]\n\t"
01531         "ldr    r5, [%[b], #80]\n\t"
01532         "adc    r4, r5\n\t"
01533         "str    r4, [%[r], #80]\n\t"
01534         "ldr    r4, [%[a], #84]\n\t"
01535         "ldr    r5, [%[b], #84]\n\t"
01536         "adc    r4, r5\n\t"
01537         "str    r4, [%[r], #84]\n\t"
01538         "ldr    r4, [%[a], #88]\n\t"
01539         "ldr    r5, [%[b], #88]\n\t"
01540         "adc    r4, r5\n\t"
01541         "str    r4, [%[r], #88]\n\t"
01542         "ldr    r4, [%[a], #92]\n\t"
01543         "ldr    r5, [%[b], #92]\n\t"
01544         "adc    r4, r5\n\t"
01545         "str    r4, [%[r], #92]\n\t"
01546         "ldr    r4, [%[a], #96]\n\t"
01547         "ldr    r5, [%[b], #96]\n\t"
01548         "adc    r4, r5\n\t"
01549         "str    r4, [%[r], #96]\n\t"
01550         "ldr    r4, [%[a], #100]\n\t"
01551         "ldr    r5, [%[b], #100]\n\t"
01552         "adc    r4, r5\n\t"
01553         "str    r4, [%[r], #100]\n\t"
01554         "ldr    r4, [%[a], #104]\n\t"
01555         "ldr    r5, [%[b], #104]\n\t"
01556         "adc    r4, r5\n\t"
01557         "str    r4, [%[r], #104]\n\t"
01558         "ldr    r4, [%[a], #108]\n\t"
01559         "ldr    r5, [%[b], #108]\n\t"
01560         "adc    r4, r5\n\t"
01561         "str    r4, [%[r], #108]\n\t"
01562         "ldr    r4, [%[a], #112]\n\t"
01563         "ldr    r5, [%[b], #112]\n\t"
01564         "adc    r4, r5\n\t"
01565         "str    r4, [%[r], #112]\n\t"
01566         "ldr    r4, [%[a], #116]\n\t"
01567         "ldr    r5, [%[b], #116]\n\t"
01568         "adc    r4, r5\n\t"
01569         "str    r4, [%[r], #116]\n\t"
01570         "ldr    r4, [%[a], #120]\n\t"
01571         "ldr    r5, [%[b], #120]\n\t"
01572         "adc    r4, r5\n\t"
01573         "str    r4, [%[r], #120]\n\t"
01574         "ldr    r4, [%[a], #124]\n\t"
01575         "ldr    r5, [%[b], #124]\n\t"
01576         "adc    r4, r5\n\t"
01577         "str    r4, [%[r], #124]\n\t"
01578         "mov    %[c], #0\n\t"
01579         "adc    %[c], %[c]\n\t"
01580         "add    %[a], #0x80\n\t"
01581         "add    %[b], #0x80\n\t"
01582         "add    %[r], #0x80\n\t"
01583         "add    %[c], r7\n\t"
01584         "ldr    r4, [%[a], #0]\n\t"
01585         "ldr    r5, [%[b], #0]\n\t"
01586         "adc    r4, r5\n\t"
01587         "str    r4, [%[r], #0]\n\t"
01588         "ldr    r4, [%[a], #4]\n\t"
01589         "ldr    r5, [%[b], #4]\n\t"
01590         "adc    r4, r5\n\t"
01591         "str    r4, [%[r], #4]\n\t"
01592         "ldr    r4, [%[a], #8]\n\t"
01593         "ldr    r5, [%[b], #8]\n\t"
01594         "adc    r4, r5\n\t"
01595         "str    r4, [%[r], #8]\n\t"
01596         "ldr    r4, [%[a], #12]\n\t"
01597         "ldr    r5, [%[b], #12]\n\t"
01598         "adc    r4, r5\n\t"
01599         "str    r4, [%[r], #12]\n\t"
01600         "ldr    r4, [%[a], #16]\n\t"
01601         "ldr    r5, [%[b], #16]\n\t"
01602         "adc    r4, r5\n\t"
01603         "str    r4, [%[r], #16]\n\t"
01604         "ldr    r4, [%[a], #20]\n\t"
01605         "ldr    r5, [%[b], #20]\n\t"
01606         "adc    r4, r5\n\t"
01607         "str    r4, [%[r], #20]\n\t"
01608         "ldr    r4, [%[a], #24]\n\t"
01609         "ldr    r5, [%[b], #24]\n\t"
01610         "adc    r4, r5\n\t"
01611         "str    r4, [%[r], #24]\n\t"
01612         "ldr    r4, [%[a], #28]\n\t"
01613         "ldr    r5, [%[b], #28]\n\t"
01614         "adc    r4, r5\n\t"
01615         "str    r4, [%[r], #28]\n\t"
01616         "ldr    r4, [%[a], #32]\n\t"
01617         "ldr    r5, [%[b], #32]\n\t"
01618         "adc    r4, r5\n\t"
01619         "str    r4, [%[r], #32]\n\t"
01620         "ldr    r4, [%[a], #36]\n\t"
01621         "ldr    r5, [%[b], #36]\n\t"
01622         "adc    r4, r5\n\t"
01623         "str    r4, [%[r], #36]\n\t"
01624         "ldr    r4, [%[a], #40]\n\t"
01625         "ldr    r5, [%[b], #40]\n\t"
01626         "adc    r4, r5\n\t"
01627         "str    r4, [%[r], #40]\n\t"
01628         "ldr    r4, [%[a], #44]\n\t"
01629         "ldr    r5, [%[b], #44]\n\t"
01630         "adc    r4, r5\n\t"
01631         "str    r4, [%[r], #44]\n\t"
01632         "ldr    r4, [%[a], #48]\n\t"
01633         "ldr    r5, [%[b], #48]\n\t"
01634         "adc    r4, r5\n\t"
01635         "str    r4, [%[r], #48]\n\t"
01636         "ldr    r4, [%[a], #52]\n\t"
01637         "ldr    r5, [%[b], #52]\n\t"
01638         "adc    r4, r5\n\t"
01639         "str    r4, [%[r], #52]\n\t"
01640         "ldr    r4, [%[a], #56]\n\t"
01641         "ldr    r5, [%[b], #56]\n\t"
01642         "adc    r4, r5\n\t"
01643         "str    r4, [%[r], #56]\n\t"
01644         "ldr    r4, [%[a], #60]\n\t"
01645         "ldr    r5, [%[b], #60]\n\t"
01646         "adc    r4, r5\n\t"
01647         "str    r4, [%[r], #60]\n\t"
01648         "ldr    r4, [%[a], #64]\n\t"
01649         "ldr    r5, [%[b], #64]\n\t"
01650         "adc    r4, r5\n\t"
01651         "str    r4, [%[r], #64]\n\t"
01652         "ldr    r4, [%[a], #68]\n\t"
01653         "ldr    r5, [%[b], #68]\n\t"
01654         "adc    r4, r5\n\t"
01655         "str    r4, [%[r], #68]\n\t"
01656         "ldr    r4, [%[a], #72]\n\t"
01657         "ldr    r5, [%[b], #72]\n\t"
01658         "adc    r4, r5\n\t"
01659         "str    r4, [%[r], #72]\n\t"
01660         "ldr    r4, [%[a], #76]\n\t"
01661         "ldr    r5, [%[b], #76]\n\t"
01662         "adc    r4, r5\n\t"
01663         "str    r4, [%[r], #76]\n\t"
01664         "ldr    r4, [%[a], #80]\n\t"
01665         "ldr    r5, [%[b], #80]\n\t"
01666         "adc    r4, r5\n\t"
01667         "str    r4, [%[r], #80]\n\t"
01668         "ldr    r4, [%[a], #84]\n\t"
01669         "ldr    r5, [%[b], #84]\n\t"
01670         "adc    r4, r5\n\t"
01671         "str    r4, [%[r], #84]\n\t"
01672         "ldr    r4, [%[a], #88]\n\t"
01673         "ldr    r5, [%[b], #88]\n\t"
01674         "adc    r4, r5\n\t"
01675         "str    r4, [%[r], #88]\n\t"
01676         "ldr    r4, [%[a], #92]\n\t"
01677         "ldr    r5, [%[b], #92]\n\t"
01678         "adc    r4, r5\n\t"
01679         "str    r4, [%[r], #92]\n\t"
01680         "ldr    r4, [%[a], #96]\n\t"
01681         "ldr    r5, [%[b], #96]\n\t"
01682         "adc    r4, r5\n\t"
01683         "str    r4, [%[r], #96]\n\t"
01684         "ldr    r4, [%[a], #100]\n\t"
01685         "ldr    r5, [%[b], #100]\n\t"
01686         "adc    r4, r5\n\t"
01687         "str    r4, [%[r], #100]\n\t"
01688         "ldr    r4, [%[a], #104]\n\t"
01689         "ldr    r5, [%[b], #104]\n\t"
01690         "adc    r4, r5\n\t"
01691         "str    r4, [%[r], #104]\n\t"
01692         "ldr    r4, [%[a], #108]\n\t"
01693         "ldr    r5, [%[b], #108]\n\t"
01694         "adc    r4, r5\n\t"
01695         "str    r4, [%[r], #108]\n\t"
01696         "ldr    r4, [%[a], #112]\n\t"
01697         "ldr    r5, [%[b], #112]\n\t"
01698         "adc    r4, r5\n\t"
01699         "str    r4, [%[r], #112]\n\t"
01700         "ldr    r4, [%[a], #116]\n\t"
01701         "ldr    r5, [%[b], #116]\n\t"
01702         "adc    r4, r5\n\t"
01703         "str    r4, [%[r], #116]\n\t"
01704         "ldr    r4, [%[a], #120]\n\t"
01705         "ldr    r5, [%[b], #120]\n\t"
01706         "adc    r4, r5\n\t"
01707         "str    r4, [%[r], #120]\n\t"
01708         "ldr    r4, [%[a], #124]\n\t"
01709         "ldr    r5, [%[b], #124]\n\t"
01710         "adc    r4, r5\n\t"
01711         "str    r4, [%[r], #124]\n\t"
01712         "mov    %[c], #0\n\t"
01713         "adc    %[c], %[c]\n\t"
01714         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
01715         :
01716         : "memory", "r4", "r5", "r7"
01717     );
01718 
01719     return c;
01720 }
01721 
01722 /* AND m into each word of a and store in r.
01723  *
01724  * r  A single precision integer.
01725  * a  A single precision integer.
01726  * m  Mask to AND against each digit.
01727  */
01728 static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
01729 {
01730 #ifdef WOLFSSL_SP_SMALL
01731     int i;
01732 
01733     for (i=0; i<32; i++) {
01734         r[i] = a[i] & m;
01735     }
01736 #else
01737     int i;
01738 
01739     for (i = 0; i < 32; i += 8) {
01740         r[i+0] = a[i+0] & m;
01741         r[i+1] = a[i+1] & m;
01742         r[i+2] = a[i+2] & m;
01743         r[i+3] = a[i+3] & m;
01744         r[i+4] = a[i+4] & m;
01745         r[i+5] = a[i+5] & m;
01746         r[i+6] = a[i+6] & m;
01747         r[i+7] = a[i+7] & m;
01748     }
01749 #endif
01750 }
01751 
01752 /* Multiply a and b into r. (r = a * b)
01753  *
01754  * r  A single precision integer.
01755  * a  A single precision integer.
01756  * b  A single precision integer.
01757  */
01758 SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
01759         const sp_digit* b)
01760 {
01761     sp_digit* z0 = r;
01762     sp_digit z1[64];
01763     sp_digit a1[32];
01764     sp_digit b1[32];
01765     sp_digit z2[64];
01766     sp_digit u, ca, cb;
01767 
01768     ca = sp_2048_add_32(a1, a, &a[32]);
01769     cb = sp_2048_add_32(b1, b, &b[32]);
01770     u  = ca & cb;
01771     sp_2048_mul_32(z1, a1, b1);
01772     sp_2048_mul_32(z2, &a[32], &b[32]);
01773     sp_2048_mul_32(z0, a, b);
01774     sp_2048_mask_32(r + 64, a1, 0 - cb);
01775     sp_2048_mask_32(b1, b1, 0 - ca);
01776     u += sp_2048_add_32(r + 64, r + 64, b1);
01777     u += sp_2048_sub_in_place_64(z1, z2);
01778     u += sp_2048_sub_in_place_64(z1, z0);
01779     u += sp_2048_add_64(r + 32, r + 32, z1);
01780     r[96] = u;
01781     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
01782     (void)sp_2048_add_64(r + 64, r + 64, z2);
01783 }
01784 
01785 /* Square a and put result in r. (r = a * a)
01786  *
01787  * r  A single precision integer.
01788  * a  A single precision integer.
01789  */
01790 SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
01791 {
01792     sp_digit* z0 = r;
01793     sp_digit z2[64];
01794     sp_digit z1[64];
01795     sp_digit a1[32];
01796     sp_digit u;
01797 
01798     u = sp_2048_add_32(a1, a, &a[32]);
01799     sp_2048_sqr_32(z1, a1);
01800     sp_2048_sqr_32(z2, &a[32]);
01801     sp_2048_sqr_32(z0, a);
01802     sp_2048_mask_32(r + 64, a1, 0 - u);
01803     u += sp_2048_add_32(r + 64, r + 64, r + 64);
01804     u += sp_2048_sub_in_place_64(z1, z2);
01805     u += sp_2048_sub_in_place_64(z1, z0);
01806     u += sp_2048_add_64(r + 32, r + 32, z1);
01807     r[96] = u;
01808     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
01809     (void)sp_2048_add_64(r + 64, r + 64, z2);
01810 }
01811 
01812 #endif /* !WOLFSSL_SP_SMALL */
01813 #ifdef WOLFSSL_SP_SMALL
01814 /* Add b to a into r. (r = a + b)
01815  *
01816  * r  A single precision integer.
01817  * a  A single precision integer.
01818  * b  A single precision integer.
01819  */
01820 SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
01821         const sp_digit* b)
01822 {
01823     sp_digit c = 0;
01824 
01825     __asm__ __volatile__ (
01826         "mov    r6, %[a]\n\t"
01827         "mov    r7, #0\n\t"
01828         "mov    r4, #1\n\t"
01829         "lsl    r4, #8\n\t"
01830         "sub    r7, #1\n\t"
01831         "add    r6, r4\n\t"
01832         "\n1:\n\t"
01833         "add    %[c], r7\n\t"
01834         "ldr    r4, [%[a]]\n\t"
01835         "ldr    r5, [%[b]]\n\t"
01836         "adc    r4, r5\n\t"
01837         "str    r4, [%[r]]\n\t"
01838         "mov    %[c], #0\n\t"
01839         "adc    %[c], %[c]\n\t"
01840         "add    %[a], #4\n\t"
01841         "add    %[b], #4\n\t"
01842         "add    %[r], #4\n\t"
01843         "cmp    %[a], r6\n\t"
01844         "bne    1b\n\t"
01845         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
01846         :
01847         : "memory", "r4", "r5", "r6", "r7"
01848     );
01849 
01850     return c;
01851 }
01852 
01853 #endif /* WOLFSSL_SP_SMALL */
01854 #ifdef WOLFSSL_SP_SMALL
01855 /* Sub b from a into a. (a -= b)
01856  *
01857  * a  A single precision integer.
01858  * b  A single precision integer.
01859  */
01860 SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
01861         const sp_digit* b)
01862 {
01863     sp_digit c = 0;
01864     __asm__ __volatile__ (
01865         "mov    r7, %[a]\n\t"
01866         "mov    r5, #1\n\t"
01867         "lsl    r5, #8\n\t"
01868         "add    r7, r5\n\t"
01869         "\n1:\n\t"
01870         "mov    r5, #0\n\t"
01871         "sub    r5, %[c]\n\t"
01872         "ldr    r3, [%[a]]\n\t"
01873         "ldr    r4, [%[a], #4]\n\t"
01874         "ldr    r5, [%[b]]\n\t"
01875         "ldr    r6, [%[b], #4]\n\t"
01876         "sbc    r3, r5\n\t"
01877         "sbc    r4, r6\n\t"
01878         "str    r3, [%[a]]\n\t"
01879         "str    r4, [%[a], #4]\n\t"
01880         "sbc    %[c], %[c]\n\t"
01881         "add    %[a], #8\n\t"
01882         "add    %[b], #8\n\t"
01883         "cmp    %[a], r7\n\t"
01884         "bne    1b\n\t"
01885         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
01886         :
01887         : "memory", "r3", "r4", "r5", "r6", "r7"
01888     );
01889 
01890     return c;
01891 }
01892 
01893 #endif /* WOLFSSL_SP_SMALL */
01894 #ifdef WOLFSSL_SP_SMALL
01895 /* Multiply a and b into r. (r = a * b)
01896  *
01897  * r  A single precision integer.
01898  * a  A single precision integer.
01899  * b  A single precision integer.
01900  */
01901 SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
01902         const sp_digit* b)
01903 {
01904     sp_digit tmp[64 * 2];
01905     __asm__ __volatile__ (
01906         "mov    r3, #0\n\t"
01907         "mov    r4, #0\n\t"
01908         "mov    r8, r3\n\t"
01909         "mov    r11, %[r]\n\t"
01910         "mov    r9, %[a]\n\t"
01911         "mov    r10, %[b]\n\t"
01912         "mov    r6, #1\n\t"
01913         "lsl    r6, r6, #8\n\t"
01914         "add    r6, r9\n\t"
01915         "mov    r12, r6\n\t"
01916         "\n1:\n\t"
01917         "mov    %[r], #0\n\t"
01918         "mov    r5, #0\n\t"
01919         "mov    r6, #252\n\t"
01920         "mov    %[a], r8\n\t"
01921         "sub    %[a], r6\n\t"
01922         "sbc    r6, r6\n\t"
01923         "mvn    r6, r6\n\t"
01924         "and    %[a], r6\n\t"
01925         "mov    %[b], r8\n\t"
01926         "sub    %[b], %[a]\n\t"
01927         "add    %[a], r9\n\t"
01928         "add    %[b], r10\n\t"
01929         "\n2:\n\t"
01930         "# Multiply Start\n\t"
01931         "ldr    r6, [%[a]]\n\t"
01932         "ldr    r7, [%[b]]\n\t"
01933         "lsl    r6, r6, #16\n\t"
01934         "lsl    r7, r7, #16\n\t"
01935         "lsr    r6, r6, #16\n\t"
01936         "lsr    r7, r7, #16\n\t"
01937         "mul    r7, r6\n\t"
01938         "add    r3, r7\n\t"
01939         "adc    r4, %[r]\n\t"
01940         "adc    r5, %[r]\n\t"
01941         "ldr    r7, [%[b]]\n\t"
01942         "lsr    r7, r7, #16\n\t"
01943         "mul    r6, r7\n\t"
01944         "lsr    r7, r6, #16\n\t"
01945         "lsl    r6, r6, #16\n\t"
01946         "add    r3, r6\n\t"
01947         "adc    r4, r7\n\t"
01948         "adc    r5, %[r]\n\t"
01949         "ldr    r6, [%[a]]\n\t"
01950         "ldr    r7, [%[b]]\n\t"
01951         "lsr    r6, r6, #16\n\t"
01952         "lsr    r7, r7, #16\n\t"
01953         "mul    r7, r6\n\t"
01954         "add    r4, r7\n\t"
01955         "adc    r5, %[r]\n\t"
01956         "ldr    r7, [%[b]]\n\t"
01957         "lsl    r7, r7, #16\n\t"
01958         "lsr    r7, r7, #16\n\t"
01959         "mul    r6, r7\n\t"
01960         "lsr    r7, r6, #16\n\t"
01961         "lsl    r6, r6, #16\n\t"
01962         "add    r3, r6\n\t"
01963         "adc    r4, r7\n\t"
01964         "adc    r5, %[r]\n\t"
01965         "# Multiply Done\n\t"
01966         "add    %[a], #4\n\t"
01967         "sub    %[b], #4\n\t"
01968         "cmp    %[a], r12\n\t"
01969         "beq    3f\n\t"
01970         "mov    r6, r8\n\t"
01971         "add    r6, r9\n\t"
01972         "cmp    %[a], r6\n\t"
01973         "ble    2b\n\t"
01974         "\n3:\n\t"
01975         "mov    %[r], r11\n\t"
01976         "mov    r7, r8\n\t"
01977         "str    r3, [%[r], r7]\n\t"
01978         "mov    r3, r4\n\t"
01979         "mov    r4, r5\n\t"
01980         "add    r7, #4\n\t"
01981         "mov    r8, r7\n\t"
01982         "mov    r6, #1\n\t"
01983         "lsl    r6, r6, #8\n\t"
01984         "add    r6, #248\n\t"
01985         "cmp    r7, r6\n\t"
01986         "ble    1b\n\t"
01987         "str    r3, [%[r], r7]\n\t"
01988         "mov    %[a], r9\n\t"
01989         "mov    %[b], r10\n\t"
01990         :
01991         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
01992         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
01993     );
01994 
01995     XMEMCPY(r, tmp, sizeof(tmp));
01996 }
01997 
01998 /* Square a and put result in r. (r = a * a)
01999  *
02000  * r  A single precision integer.
02001  * a  A single precision integer.
02002  */
02003 SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
02004 {
02005     __asm__ __volatile__ (
02006         "mov    r3, #0\n\t"
02007         "mov    r4, #0\n\t"
02008         "mov    r5, #0\n\t"
02009         "mov    r8, r3\n\t"
02010         "mov    r11, %[r]\n\t"
02011         "mov    r6, #2\n\t"
02012         "lsl    r6, r6, #8\n\t"
02013         "neg    r6, r6\n\t"
02014         "add    sp, r6\n\t"
02015         "mov    r10, sp\n\t"
02016         "mov    r9, %[a]\n\t"
02017         "\n1:\n\t"
02018         "mov    %[r], #0\n\t"
02019         "mov    r6, #252\n\t"
02020         "mov    %[a], r8\n\t"
02021         "sub    %[a], r6\n\t"
02022         "sbc    r6, r6\n\t"
02023         "mvn    r6, r6\n\t"
02024         "and    %[a], r6\n\t"
02025         "mov    r2, r8\n\t"
02026         "sub    r2, %[a]\n\t"
02027         "add    %[a], r9\n\t"
02028         "add    r2, r9\n\t"
02029         "\n2:\n\t"
02030         "cmp    r2, %[a]\n\t"
02031         "beq    4f\n\t"
02032         "# Multiply * 2: Start\n\t"
02033         "ldr    r6, [%[a]]\n\t"
02034         "ldr    r7, [r2]\n\t"
02035         "lsl    r6, r6, #16\n\t"
02036         "lsl    r7, r7, #16\n\t"
02037         "lsr    r6, r6, #16\n\t"
02038         "lsr    r7, r7, #16\n\t"
02039         "mul    r7, r6\n\t"
02040         "add    r3, r7\n\t"
02041         "adc    r4, %[r]\n\t"
02042         "adc    r5, %[r]\n\t"
02043         "add    r3, r7\n\t"
02044         "adc    r4, %[r]\n\t"
02045         "adc    r5, %[r]\n\t"
02046         "ldr    r7, [r2]\n\t"
02047         "lsr    r7, r7, #16\n\t"
02048         "mul    r6, r7\n\t"
02049         "lsr    r7, r6, #16\n\t"
02050         "lsl    r6, r6, #16\n\t"
02051         "add    r3, r6\n\t"
02052         "adc    r4, r7\n\t"
02053         "adc    r5, %[r]\n\t"
02054         "add    r3, r6\n\t"
02055         "adc    r4, r7\n\t"
02056         "adc    r5, %[r]\n\t"
02057         "ldr    r6, [%[a]]\n\t"
02058         "ldr    r7, [r2]\n\t"
02059         "lsr    r6, r6, #16\n\t"
02060         "lsr    r7, r7, #16\n\t"
02061         "mul    r7, r6\n\t"
02062         "add    r4, r7\n\t"
02063         "adc    r5, %[r]\n\t"
02064         "add    r4, r7\n\t"
02065         "adc    r5, %[r]\n\t"
02066         "ldr    r7, [r2]\n\t"
02067         "lsl    r7, r7, #16\n\t"
02068         "lsr    r7, r7, #16\n\t"
02069         "mul    r6, r7\n\t"
02070         "lsr    r7, r6, #16\n\t"
02071         "lsl    r6, r6, #16\n\t"
02072         "add    r3, r6\n\t"
02073         "adc    r4, r7\n\t"
02074         "adc    r5, %[r]\n\t"
02075         "add    r3, r6\n\t"
02076         "adc    r4, r7\n\t"
02077         "adc    r5, %[r]\n\t"
02078         "# Multiply * 2: Done\n\t"
02079         "bal    5f\n\t"
02080         "\n4:\n\t"
02081         "# Square: Start\n\t"
02082         "ldr    r6, [%[a]]\n\t"
02083         "lsr    r7, r6, #16\n\t"
02084         "lsl    r6, r6, #16\n\t"
02085         "lsr    r6, r6, #16\n\t"
02086         "mul    r6, r6\n\t"
02087         "add    r3, r6\n\t"
02088         "adc    r4, %[r]\n\t"
02089         "adc    r5, %[r]\n\t"
02090         "mul    r7, r7\n\t"
02091         "add    r4, r7\n\t"
02092         "adc    r5, %[r]\n\t"
02093         "ldr    r6, [%[a]]\n\t"
02094         "lsr    r7, r6, #16\n\t"
02095         "lsl    r6, r6, #16\n\t"
02096         "lsr    r6, r6, #16\n\t"
02097         "mul    r6, r7\n\t"
02098         "lsr    r7, r6, #15\n\t"
02099         "lsl    r6, r6, #17\n\t"
02100         "add    r3, r6\n\t"
02101         "adc    r4, r7\n\t"
02102         "adc    r5, %[r]\n\t"
02103         "# Square: Done\n\t"
02104         "\n5:\n\t"
02105         "add    %[a], #4\n\t"
02106         "sub    r2, #4\n\t"
02107         "mov    r6, #1\n\t"
02108         "lsl    r6, r6, #8\n\t"
02109         "add    r6, r9\n\t"
02110         "cmp    %[a], r6\n\t"
02111         "beq    3f\n\t"
02112         "cmp    %[a], r2\n\t"
02113         "bgt    3f\n\t"
02114         "mov    r7, r8\n\t"
02115         "add    r7, r9\n\t"
02116         "cmp    %[a], r7\n\t"
02117         "ble    2b\n\t"
02118         "\n3:\n\t"
02119         "mov    %[r], r10\n\t"
02120         "mov    r7, r8\n\t"
02121         "str    r3, [%[r], r7]\n\t"
02122         "mov    r3, r4\n\t"
02123         "mov    r4, r5\n\t"
02124         "mov    r5, #0\n\t"
02125         "add    r7, #4\n\t"
02126         "mov    r8, r7\n\t"
02127         "mov    r6, #1\n\t"
02128         "lsl    r6, r6, #8\n\t"
02129         "add    r6, #248\n\t"
02130         "cmp    r7, r6\n\t"
02131         "ble    1b\n\t"
02132         "mov    %[a], r9\n\t"
02133         "str    r3, [%[r], r7]\n\t"
02134         "mov    %[r], r11\n\t"
02135         "mov    %[a], r10\n\t"
02136         "mov    r3, #1\n\t"
02137         "lsl    r3, r3, #8\n\t"
02138         "add    r3, #252\n\t"
02139         "\n4:\n\t"
02140         "ldr    r6, [%[a], r3]\n\t"
02141         "str    r6, [%[r], r3]\n\t"
02142         "sub    r3, #4\n\t"
02143         "bge    4b\n\t"
02144         "mov    r6, #2\n\t"
02145         "lsl    r6, r6, #8\n\t"
02146         "add    sp, r6\n\t"
02147         :
02148         : [r] "r" (r), [a] "r" (a)
02149         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
02150     );
02151 }
02152 
02153 #endif /* WOLFSSL_SP_SMALL */
02154 #if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
02155 #ifdef WOLFSSL_SP_SMALL
02156 /* AND m into each word of a and store in r.
02157  *
02158  * r  A single precision integer.
02159  * a  A single precision integer.
02160  * m  Mask to AND against each digit.
02161  */
02162 static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
02163 {
02164     int i;
02165 
02166     for (i=0; i<32; i++) {
02167         r[i] = a[i] & m;
02168     }
02169 }
02170 
02171 #endif /* WOLFSSL_SP_SMALL */
02172 #ifdef WOLFSSL_SP_SMALL
02173 /* Add b to a into r. (r = a + b)
02174  *
02175  * r  A single precision integer.
02176  * a  A single precision integer.
02177  * b  A single precision integer.
02178  */
02179 SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
02180         const sp_digit* b)
02181 {
02182     sp_digit c = 0;
02183 
02184     __asm__ __volatile__ (
02185         "mov    r6, %[a]\n\t"
02186         "mov    r7, #0\n\t"
02187         "add    r6, #128\n\t"
02188         "sub    r7, #1\n\t"
02189         "\n1:\n\t"
02190         "add    %[c], r7\n\t"
02191         "ldr    r4, [%[a]]\n\t"
02192         "ldr    r5, [%[b]]\n\t"
02193         "adc    r4, r5\n\t"
02194         "str    r4, [%[r]]\n\t"
02195         "mov    %[c], #0\n\t"
02196         "adc    %[c], %[c]\n\t"
02197         "add    %[a], #4\n\t"
02198         "add    %[b], #4\n\t"
02199         "add    %[r], #4\n\t"
02200         "cmp    %[a], r6\n\t"
02201         "bne    1b\n\t"
02202         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
02203         :
02204         : "memory", "r4", "r5", "r6", "r7"
02205     );
02206 
02207     return c;
02208 }
02209 
02210 #endif /* WOLFSSL_SP_SMALL */
02211 #ifdef WOLFSSL_SP_SMALL
02212 /* Sub b from a into a. (a -= b)
02213  *
02214  * a  A single precision integer.
02215  * b  A single precision integer.
02216  */
02217 SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
02218         const sp_digit* b)
02219 {
02220     sp_digit c = 0;
02221     __asm__ __volatile__ (
02222         "mov    r7, %[a]\n\t"
02223         "add    r7, #128\n\t"
02224         "\n1:\n\t"
02225         "mov    r5, #0\n\t"
02226         "sub    r5, %[c]\n\t"
02227         "ldr    r3, [%[a]]\n\t"
02228         "ldr    r4, [%[a], #4]\n\t"
02229         "ldr    r5, [%[b]]\n\t"
02230         "ldr    r6, [%[b], #4]\n\t"
02231         "sbc    r3, r5\n\t"
02232         "sbc    r4, r6\n\t"
02233         "str    r3, [%[a]]\n\t"
02234         "str    r4, [%[a], #4]\n\t"
02235         "sbc    %[c], %[c]\n\t"
02236         "add    %[a], #8\n\t"
02237         "add    %[b], #8\n\t"
02238         "cmp    %[a], r7\n\t"
02239         "bne    1b\n\t"
02240         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
02241         :
02242         : "memory", "r3", "r4", "r5", "r6", "r7"
02243     );
02244 
02245     return c;
02246 }
02247 
02248 #endif /* WOLFSSL_SP_SMALL */
02249 #ifdef WOLFSSL_SP_SMALL
02250 /* Multiply a and b into r. (r = a * b)
02251  *
02252  * r  A single precision integer.
02253  * a  A single precision integer.
02254  * b  A single precision integer.
02255  */
02256 SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
02257         const sp_digit* b)
02258 {
02259     sp_digit tmp[32 * 2];
02260     __asm__ __volatile__ (
02261         "mov    r3, #0\n\t"
02262         "mov    r4, #0\n\t"
02263         "mov    r8, r3\n\t"
02264         "mov    r11, %[r]\n\t"
02265         "mov    r9, %[a]\n\t"
02266         "mov    r10, %[b]\n\t"
02267         "mov    r6, #128\n\t"
02268         "add    r6, r9\n\t"
02269         "mov    r12, r6\n\t"
02270         "\n1:\n\t"
02271         "mov    %[r], #0\n\t"
02272         "mov    r5, #0\n\t"
02273         "mov    r6, #124\n\t"
02274         "mov    %[a], r8\n\t"
02275         "sub    %[a], r6\n\t"
02276         "sbc    r6, r6\n\t"
02277         "mvn    r6, r6\n\t"
02278         "and    %[a], r6\n\t"
02279         "mov    %[b], r8\n\t"
02280         "sub    %[b], %[a]\n\t"
02281         "add    %[a], r9\n\t"
02282         "add    %[b], r10\n\t"
02283         "\n2:\n\t"
02284         "# Multiply Start\n\t"
02285         "ldr    r6, [%[a]]\n\t"
02286         "ldr    r7, [%[b]]\n\t"
02287         "lsl    r6, r6, #16\n\t"
02288         "lsl    r7, r7, #16\n\t"
02289         "lsr    r6, r6, #16\n\t"
02290         "lsr    r7, r7, #16\n\t"
02291         "mul    r7, r6\n\t"
02292         "add    r3, r7\n\t"
02293         "adc    r4, %[r]\n\t"
02294         "adc    r5, %[r]\n\t"
02295         "ldr    r7, [%[b]]\n\t"
02296         "lsr    r7, r7, #16\n\t"
02297         "mul    r6, r7\n\t"
02298         "lsr    r7, r6, #16\n\t"
02299         "lsl    r6, r6, #16\n\t"
02300         "add    r3, r6\n\t"
02301         "adc    r4, r7\n\t"
02302         "adc    r5, %[r]\n\t"
02303         "ldr    r6, [%[a]]\n\t"
02304         "ldr    r7, [%[b]]\n\t"
02305         "lsr    r6, r6, #16\n\t"
02306         "lsr    r7, r7, #16\n\t"
02307         "mul    r7, r6\n\t"
02308         "add    r4, r7\n\t"
02309         "adc    r5, %[r]\n\t"
02310         "ldr    r7, [%[b]]\n\t"
02311         "lsl    r7, r7, #16\n\t"
02312         "lsr    r7, r7, #16\n\t"
02313         "mul    r6, r7\n\t"
02314         "lsr    r7, r6, #16\n\t"
02315         "lsl    r6, r6, #16\n\t"
02316         "add    r3, r6\n\t"
02317         "adc    r4, r7\n\t"
02318         "adc    r5, %[r]\n\t"
02319         "# Multiply Done\n\t"
02320         "add    %[a], #4\n\t"
02321         "sub    %[b], #4\n\t"
02322         "cmp    %[a], r12\n\t"
02323         "beq    3f\n\t"
02324         "mov    r6, r8\n\t"
02325         "add    r6, r9\n\t"
02326         "cmp    %[a], r6\n\t"
02327         "ble    2b\n\t"
02328         "\n3:\n\t"
02329         "mov    %[r], r11\n\t"
02330         "mov    r7, r8\n\t"
02331         "str    r3, [%[r], r7]\n\t"
02332         "mov    r3, r4\n\t"
02333         "mov    r4, r5\n\t"
02334         "add    r7, #4\n\t"
02335         "mov    r8, r7\n\t"
02336         "mov    r6, #248\n\t"
02337         "cmp    r7, r6\n\t"
02338         "ble    1b\n\t"
02339         "str    r3, [%[r], r7]\n\t"
02340         "mov    %[a], r9\n\t"
02341         "mov    %[b], r10\n\t"
02342         :
02343         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
02344         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
02345     );
02346 
02347     XMEMCPY(r, tmp, sizeof(tmp));
02348 }
02349 
02350 /* Square a and put result in r. (r = a * a)
02351  *
02352  * r  A single precision integer.
02353  * a  A single precision integer.
02354  */
02355 SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
02356 {
02357     __asm__ __volatile__ (
02358         "mov    r3, #0\n\t"
02359         "mov    r4, #0\n\t"
02360         "mov    r5, #0\n\t"
02361         "mov    r8, r3\n\t"
02362         "mov    r11, %[r]\n\t"
02363         "mov    r6, #1\n\t"
02364         "lsl    r6, r6, #8\n\t"
02365         "neg    r6, r6\n\t"
02366         "add    sp, r6\n\t"
02367         "mov    r10, sp\n\t"
02368         "mov    r9, %[a]\n\t"
02369         "\n1:\n\t"
02370         "mov    %[r], #0\n\t"
02371         "mov    r6, #124\n\t"
02372         "mov    %[a], r8\n\t"
02373         "sub    %[a], r6\n\t"
02374         "sbc    r6, r6\n\t"
02375         "mvn    r6, r6\n\t"
02376         "and    %[a], r6\n\t"
02377         "mov    r2, r8\n\t"
02378         "sub    r2, %[a]\n\t"
02379         "add    %[a], r9\n\t"
02380         "add    r2, r9\n\t"
02381         "\n2:\n\t"
02382         "cmp    r2, %[a]\n\t"
02383         "beq    4f\n\t"
02384         "# Multiply * 2: Start\n\t"
02385         "ldr    r6, [%[a]]\n\t"
02386         "ldr    r7, [r2]\n\t"
02387         "lsl    r6, r6, #16\n\t"
02388         "lsl    r7, r7, #16\n\t"
02389         "lsr    r6, r6, #16\n\t"
02390         "lsr    r7, r7, #16\n\t"
02391         "mul    r7, r6\n\t"
02392         "add    r3, r7\n\t"
02393         "adc    r4, %[r]\n\t"
02394         "adc    r5, %[r]\n\t"
02395         "add    r3, r7\n\t"
02396         "adc    r4, %[r]\n\t"
02397         "adc    r5, %[r]\n\t"
02398         "ldr    r7, [r2]\n\t"
02399         "lsr    r7, r7, #16\n\t"
02400         "mul    r6, r7\n\t"
02401         "lsr    r7, r6, #16\n\t"
02402         "lsl    r6, r6, #16\n\t"
02403         "add    r3, r6\n\t"
02404         "adc    r4, r7\n\t"
02405         "adc    r5, %[r]\n\t"
02406         "add    r3, r6\n\t"
02407         "adc    r4, r7\n\t"
02408         "adc    r5, %[r]\n\t"
02409         "ldr    r6, [%[a]]\n\t"
02410         "ldr    r7, [r2]\n\t"
02411         "lsr    r6, r6, #16\n\t"
02412         "lsr    r7, r7, #16\n\t"
02413         "mul    r7, r6\n\t"
02414         "add    r4, r7\n\t"
02415         "adc    r5, %[r]\n\t"
02416         "add    r4, r7\n\t"
02417         "adc    r5, %[r]\n\t"
02418         "ldr    r7, [r2]\n\t"
02419         "lsl    r7, r7, #16\n\t"
02420         "lsr    r7, r7, #16\n\t"
02421         "mul    r6, r7\n\t"
02422         "lsr    r7, r6, #16\n\t"
02423         "lsl    r6, r6, #16\n\t"
02424         "add    r3, r6\n\t"
02425         "adc    r4, r7\n\t"
02426         "adc    r5, %[r]\n\t"
02427         "add    r3, r6\n\t"
02428         "adc    r4, r7\n\t"
02429         "adc    r5, %[r]\n\t"
02430         "# Multiply * 2: Done\n\t"
02431         "bal    5f\n\t"
02432         "\n4:\n\t"
02433         "# Square: Start\n\t"
02434         "ldr    r6, [%[a]]\n\t"
02435         "lsr    r7, r6, #16\n\t"
02436         "lsl    r6, r6, #16\n\t"
02437         "lsr    r6, r6, #16\n\t"
02438         "mul    r6, r6\n\t"
02439         "add    r3, r6\n\t"
02440         "adc    r4, %[r]\n\t"
02441         "adc    r5, %[r]\n\t"
02442         "mul    r7, r7\n\t"
02443         "add    r4, r7\n\t"
02444         "adc    r5, %[r]\n\t"
02445         "ldr    r6, [%[a]]\n\t"
02446         "lsr    r7, r6, #16\n\t"
02447         "lsl    r6, r6, #16\n\t"
02448         "lsr    r6, r6, #16\n\t"
02449         "mul    r6, r7\n\t"
02450         "lsr    r7, r6, #15\n\t"
02451         "lsl    r6, r6, #17\n\t"
02452         "add    r3, r6\n\t"
02453         "adc    r4, r7\n\t"
02454         "adc    r5, %[r]\n\t"
02455         "# Square: Done\n\t"
02456         "\n5:\n\t"
02457         "add    %[a], #4\n\t"
02458         "sub    r2, #4\n\t"
02459         "mov    r6, #128\n\t"
02460         "add    r6, r9\n\t"
02461         "cmp    %[a], r6\n\t"
02462         "beq    3f\n\t"
02463         "cmp    %[a], r2\n\t"
02464         "bgt    3f\n\t"
02465         "mov    r7, r8\n\t"
02466         "add    r7, r9\n\t"
02467         "cmp    %[a], r7\n\t"
02468         "ble    2b\n\t"
02469         "\n3:\n\t"
02470         "mov    %[r], r10\n\t"
02471         "mov    r7, r8\n\t"
02472         "str    r3, [%[r], r7]\n\t"
02473         "mov    r3, r4\n\t"
02474         "mov    r4, r5\n\t"
02475         "mov    r5, #0\n\t"
02476         "add    r7, #4\n\t"
02477         "mov    r8, r7\n\t"
02478         "mov    r6, #248\n\t"
02479         "cmp    r7, r6\n\t"
02480         "ble    1b\n\t"
02481         "mov    %[a], r9\n\t"
02482         "str    r3, [%[r], r7]\n\t"
02483         "mov    %[r], r11\n\t"
02484         "mov    %[a], r10\n\t"
02485         "mov    r3, #252\n\t"
02486         "\n4:\n\t"
02487         "ldr    r6, [%[a], r3]\n\t"
02488         "str    r6, [%[r], r3]\n\t"
02489         "sub    r3, #4\n\t"
02490         "bge    4b\n\t"
02491         "mov    r6, #1\n\t"
02492         "lsl    r6, r6, #8\n\t"
02493         "add    sp, r6\n\t"
02494         :
02495         : [r] "r" (r), [a] "r" (a)
02496         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
02497     );
02498 }
02499 
02500 #endif /* WOLFSSL_SP_SMALL */
02501 #endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
02502 
02503 /* Caclulate the bottom digit of -1/a mod 2^n.
02504  *
02505  * a    A single precision number.
02506  * rho  Bottom word of inverse.
02507  */
02508 static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
02509 {
02510     sp_digit x, b;
02511 
02512     b = a[0];
02513     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
02514     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
02515     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02516     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02517 
02518     /* rho = -1/m mod b */
02519     *rho = -x;
02520 }
02521 
02522 /* Mul a by digit b into r. (r = a * b)
02523  *
02524  * r  A single precision integer.
02525  * a  A single precision integer.
02526  * b  A single precision digit.
02527  */
02528 SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
02529         sp_digit b)
02530 {
02531     __asm__ __volatile__ (
02532         "mov    r6, #1\n\t"
02533         "lsl    r6, r6, #8\n\t"
02534         "add    r6, %[a]\n\t"
02535         "mov    r8, %[r]\n\t"
02536         "mov    r9, r6\n\t"
02537         "mov    r3, #0\n\t"
02538         "mov    r4, #0\n\t"
02539         "1:\n\t"
02540         "mov    %[r], #0\n\t"
02541         "mov    r5, #0\n\t"
02542         "# A[] * B\n\t"
02543         "ldr    r6, [%[a]]\n\t"
02544         "lsl    r6, r6, #16\n\t"
02545         "lsl    r7, %[b], #16\n\t"
02546         "lsr    r6, r6, #16\n\t"
02547         "lsr    r7, r7, #16\n\t"
02548         "mul    r7, r6\n\t"
02549         "add    r3, r7\n\t"
02550         "adc    r4, %[r]\n\t"
02551         "adc    r5, %[r]\n\t"
02552         "lsr    r7, %[b], #16\n\t"
02553         "mul    r6, r7\n\t"
02554         "lsr    r7, r6, #16\n\t"
02555         "lsl    r6, r6, #16\n\t"
02556         "add    r3, r6\n\t"
02557         "adc    r4, r7\n\t"
02558         "adc    r5, %[r]\n\t"
02559         "ldr    r6, [%[a]]\n\t"
02560         "lsr    r6, r6, #16\n\t"
02561         "lsr    r7, %[b], #16\n\t"
02562         "mul    r7, r6\n\t"
02563         "add    r4, r7\n\t"
02564         "adc    r5, %[r]\n\t"
02565         "lsl    r7, %[b], #16\n\t"
02566         "lsr    r7, r7, #16\n\t"
02567         "mul    r6, r7\n\t"
02568         "lsr    r7, r6, #16\n\t"
02569         "lsl    r6, r6, #16\n\t"
02570         "add    r3, r6\n\t"
02571         "adc    r4, r7\n\t"
02572         "adc    r5, %[r]\n\t"
02573         "# A[] * B - Done\n\t"
02574         "mov    %[r], r8\n\t"
02575         "str    r3, [%[r]]\n\t"
02576         "mov    r3, r4\n\t"
02577         "mov    r4, r5\n\t"
02578         "add    %[r], #4\n\t"
02579         "add    %[a], #4\n\t"
02580         "mov    r8, %[r]\n\t"
02581         "cmp    %[a], r9\n\t"
02582         "blt    1b\n\t"
02583         "str    r3, [%[r]]\n\t"
02584         : [r] "+r" (r), [a] "+r" (a)
02585         : [b] "r" (b)
02586         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
02587     );
02588 }
02589 
02590 #if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
02591 /* r = 2^n mod m where n is the number of bits to reduce by.
02592  * Given m must be 2048 bits, just need to subtract.
02593  *
02594  * r  A single precision number.
02595  * m  A single precision number.
02596  */
02597 static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
02598 {
02599     XMEMSET(r, 0, sizeof(sp_digit) * 32);
02600 
02601     /* r = 2^n mod m */
02602     sp_2048_sub_in_place_32(r, m);
02603 }
02604 
02605 /* Conditionally subtract b from a using the mask m.
02606  * m is -1 to subtract and 0 when not copying.
02607  *
02608  * r  A single precision number representing condition subtract result.
02609  * a  A single precision number to subtract from.
02610  * b  A single precision number to subtract.
02611  * m  Mask value to apply.
02612  */
02613 SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
02614         const sp_digit* b, sp_digit m)
02615 {
02616     sp_digit c = 0;
02617 
02618     __asm__ __volatile__ (
02619         "mov    r5, #128\n\t"
02620         "mov    r8, r5\n\t"
02621         "mov    r7, #0\n\t"
02622         "1:\n\t"
02623         "ldr    r6, [%[b], r7]\n\t"
02624         "and    r6, %[m]\n\t"
02625         "mov    r5, #0\n\t"
02626         "sub    r5, %[c]\n\t"
02627         "ldr    r5, [%[a], r7]\n\t"
02628         "sbc    r5, r6\n\t"
02629         "sbc    %[c], %[c]\n\t"
02630         "str    r5, [%[r], r7]\n\t"
02631         "add    r7, #4\n\t"
02632         "cmp    r7, r8\n\t"
02633         "blt    1b\n\t"
02634         : [c] "+r" (c)
02635         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
02636         : "memory", "r5", "r6", "r7", "r8"
02637     );
02638 
02639     return c;
02640 }
02641 
02642 /* Reduce the number back to 2048 bits using Montgomery reduction.
02643  *
02644  * a   A single precision number to reduce in place.
02645  * m   The single precision number representing the modulus.
02646  * mp  The digit representing the negative inverse of m mod 2^n.
02647  */
02648 SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
02649         sp_digit mp)
02650 {
02651     sp_digit ca = 0;
02652 
02653     __asm__ __volatile__ (
02654         "mov    r8, %[mp]\n\t"
02655         "mov    r12, %[ca]\n\t"
02656         "mov    r14, %[m]\n\t"
02657         "mov    r9, %[a]\n\t"
02658         "mov    r4, #0\n\t"
02659         "# i = 0\n\t"
02660         "mov    r11, r4\n\t"
02661         "\n1:\n\t"
02662         "mov    r5, #0\n\t"
02663         "mov    %[ca], #0\n\t"
02664         "# mu = a[i] * mp\n\t"
02665         "mov    %[mp], r8\n\t"
02666         "ldr    %[a], [%[a]]\n\t"
02667         "mul    %[mp], %[a]\n\t"
02668         "mov    %[m], r14\n\t"
02669         "mov    r10, r9\n\t"
02670         "\n2:\n\t"
02671         "# a[i+j] += m[j] * mu\n\t"
02672         "mov    %[a], r10\n\t"
02673         "ldr    %[a], [%[a]]\n\t"
02674         "mov    %[ca], #0\n\t"
02675         "mov    r4, r5\n\t"
02676         "mov    r5, #0\n\t"
02677         "# Multiply m[j] and mu - Start\n\t"
02678         "ldr    r7, [%[m]]\n\t"
02679         "lsl    r6, %[mp], #16\n\t"
02680         "lsl    r7, r7, #16\n\t"
02681         "lsr    r6, r6, #16\n\t"
02682         "lsr    r7, r7, #16\n\t"
02683         "mul    r7, r6\n\t"
02684         "add    %[a], r7\n\t"
02685         "adc    r5, %[ca]\n\t"
02686         "ldr    r7, [%[m]]\n\t"
02687         "lsr    r7, r7, #16\n\t"
02688         "mul    r6, r7\n\t"
02689         "lsr    r7, r6, #16\n\t"
02690         "lsl    r6, r6, #16\n\t"
02691         "add    %[a], r6\n\t"
02692         "adc    r5, r7\n\t"
02693         "ldr    r7, [%[m]]\n\t"
02694         "lsr    r6, %[mp], #16\n\t"
02695         "lsr    r7, r7, #16\n\t"
02696         "mul    r7, r6\n\t"
02697         "add    r5, r7\n\t"
02698         "ldr    r7, [%[m]]\n\t"
02699         "lsl    r7, r7, #16\n\t"
02700         "lsr    r7, r7, #16\n\t"
02701         "mul    r6, r7\n\t"
02702         "lsr    r7, r6, #16\n\t"
02703         "lsl    r6, r6, #16\n\t"
02704         "add    %[a], r6\n\t"
02705         "adc    r5, r7\n\t"
02706         "# Multiply m[j] and mu - Done\n\t"
02707         "add    r4, %[a]\n\t"
02708         "adc    r5, %[ca]\n\t"
02709         "mov    %[a], r10\n\t"
02710         "str    r4, [%[a]]\n\t"
02711         "mov    r6, #4\n\t"
02712         "add    %[m], #4\n\t"
02713         "add    r10, r6\n\t"
02714         "mov    r4, #124\n\t"
02715         "add    r4, r9\n\t"
02716         "cmp    r10, r4\n\t"
02717         "blt    2b\n\t"
02718         "# a[i+31] += m[31] * mu\n\t"
02719         "mov    %[ca], #0\n\t"
02720         "mov    r4, r12\n\t"
02721         "mov    %[a], #0\n\t"
02722         "# Multiply m[31] and mu - Start\n\t"
02723         "ldr    r7, [%[m]]\n\t"
02724         "lsl    r6, %[mp], #16\n\t"
02725         "lsl    r7, r7, #16\n\t"
02726         "lsr    r6, r6, #16\n\t"
02727         "lsr    r7, r7, #16\n\t"
02728         "mul    r7, r6\n\t"
02729         "add    r5, r7\n\t"
02730         "adc    r4, %[ca]\n\t"
02731         "adc    %[a], %[ca]\n\t"
02732         "ldr    r7, [%[m]]\n\t"
02733         "lsr    r7, r7, #16\n\t"
02734         "mul    r6, r7\n\t"
02735         "lsr    r7, r6, #16\n\t"
02736         "lsl    r6, r6, #16\n\t"
02737         "add    r5, r6\n\t"
02738         "adc    r4, r7\n\t"
02739         "adc    %[a], %[ca]\n\t"
02740         "ldr    r7, [%[m]]\n\t"
02741         "lsr    r6, %[mp], #16\n\t"
02742         "lsr    r7, r7, #16\n\t"
02743         "mul    r7, r6\n\t"
02744         "add    r4, r7\n\t"
02745         "adc    %[a], %[ca]\n\t"
02746         "ldr    r7, [%[m]]\n\t"
02747         "lsl    r7, r7, #16\n\t"
02748         "lsr    r7, r7, #16\n\t"
02749         "mul    r6, r7\n\t"
02750         "lsr    r7, r6, #16\n\t"
02751         "lsl    r6, r6, #16\n\t"
02752         "add    r5, r6\n\t"
02753         "adc    r4, r7\n\t"
02754         "adc    %[a], %[ca]\n\t"
02755         "# Multiply m[31] and mu - Done\n\t"
02756         "mov    %[ca], %[a]\n\t"
02757         "mov    %[a], r10\n\t"
02758         "ldr    r7, [%[a], #4]\n\t"
02759         "ldr    %[a], [%[a]]\n\t"
02760         "mov    r6, #0\n\t"
02761         "add    r5, %[a]\n\t"
02762         "adc    r7, r4\n\t"
02763         "adc    %[ca], r6\n\t"
02764         "mov    %[a], r10\n\t"
02765         "str    r5, [%[a]]\n\t"
02766         "str    r7, [%[a], #4]\n\t"
02767         "# i += 1\n\t"
02768         "mov    r6, #4\n\t"
02769         "add    r9, r6\n\t"
02770         "add    r11, r6\n\t"
02771         "mov    r12, %[ca]\n\t"
02772         "mov    %[a], r9\n\t"
02773         "mov    r4, #128\n\t"
02774         "cmp    r11, r4\n\t"
02775         "blt    1b\n\t"
02776         "mov    %[m], r14\n\t"
02777         : [ca] "+r" (ca), [a] "+r" (a)
02778         : [m] "r" (m), [mp] "r" (mp)
02779         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
02780     );
02781 
02782     sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
02783 }
02784 
02785 /* Multiply two Montogmery form numbers mod the modulus (prime).
02786  * (r = a * b mod m)
02787  *
02788  * r   Result of multiplication.
02789  * a   First number to multiply in Montogmery form.
02790  * b   Second number to multiply in Montogmery form.
02791  * m   Modulus (prime).
02792  * mp  Montogmery mulitplier.
02793  */
02794 static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
02795         const sp_digit* m, sp_digit mp)
02796 {
02797     sp_2048_mul_32(r, a, b);
02798     sp_2048_mont_reduce_32(r, m, mp);
02799 }
02800 
02801 /* Square the Montgomery form number. (r = a * a mod m)
02802  *
02803  * r   Result of squaring.
02804  * a   Number to square in Montogmery form.
02805  * m   Modulus (prime).
02806  * mp  Montogmery mulitplier.
02807  */
02808 static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, const sp_digit* m,
02809         sp_digit mp)
02810 {
02811     sp_2048_sqr_32(r, a);
02812     sp_2048_mont_reduce_32(r, m, mp);
02813 }
02814 
02815 /* Mul a by digit b into r. (r = a * b)
02816  *
02817  * r  A single precision integer.
02818  * a  A single precision integer.
02819  * b  A single precision digit.
02820  */
02821 SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
02822         sp_digit b)
02823 {
02824     __asm__ __volatile__ (
02825         "mov    r6, #128\n\t"
02826         "add    r6, %[a]\n\t"
02827         "mov    r8, %[r]\n\t"
02828         "mov    r9, r6\n\t"
02829         "mov    r3, #0\n\t"
02830         "mov    r4, #0\n\t"
02831         "1:\n\t"
02832         "mov    %[r], #0\n\t"
02833         "mov    r5, #0\n\t"
02834         "# A[] * B\n\t"
02835         "ldr    r6, [%[a]]\n\t"
02836         "lsl    r6, r6, #16\n\t"
02837         "lsl    r7, %[b], #16\n\t"
02838         "lsr    r6, r6, #16\n\t"
02839         "lsr    r7, r7, #16\n\t"
02840         "mul    r7, r6\n\t"
02841         "add    r3, r7\n\t"
02842         "adc    r4, %[r]\n\t"
02843         "adc    r5, %[r]\n\t"
02844         "lsr    r7, %[b], #16\n\t"
02845         "mul    r6, r7\n\t"
02846         "lsr    r7, r6, #16\n\t"
02847         "lsl    r6, r6, #16\n\t"
02848         "add    r3, r6\n\t"
02849         "adc    r4, r7\n\t"
02850         "adc    r5, %[r]\n\t"
02851         "ldr    r6, [%[a]]\n\t"
02852         "lsr    r6, r6, #16\n\t"
02853         "lsr    r7, %[b], #16\n\t"
02854         "mul    r7, r6\n\t"
02855         "add    r4, r7\n\t"
02856         "adc    r5, %[r]\n\t"
02857         "lsl    r7, %[b], #16\n\t"
02858         "lsr    r7, r7, #16\n\t"
02859         "mul    r6, r7\n\t"
02860         "lsr    r7, r6, #16\n\t"
02861         "lsl    r6, r6, #16\n\t"
02862         "add    r3, r6\n\t"
02863         "adc    r4, r7\n\t"
02864         "adc    r5, %[r]\n\t"
02865         "# A[] * B - Done\n\t"
02866         "mov    %[r], r8\n\t"
02867         "str    r3, [%[r]]\n\t"
02868         "mov    r3, r4\n\t"
02869         "mov    r4, r5\n\t"
02870         "add    %[r], #4\n\t"
02871         "add    %[a], #4\n\t"
02872         "mov    r8, %[r]\n\t"
02873         "cmp    %[a], r9\n\t"
02874         "blt    1b\n\t"
02875         "str    r3, [%[r]]\n\t"
02876         : [r] "+r" (r), [a] "+r" (a)
02877         : [b] "r" (b)
02878         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
02879     );
02880 }
02881 
02882 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
02883  *
02884  * d1   The high order half of the number to divide.
02885  * d0   The low order half of the number to divide.
02886  * div  The dividend.
02887  * returns the result of the division.
02888  *
02889  * Note that this is an approximate div. It may give an answer 1 larger.
02890  */
02891 SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
02892         sp_digit div)
02893 {
02894     sp_digit r = 0;
02895 
02896     __asm__ __volatile__ (
02897         "lsr    r5, %[div], #1\n\t"
02898         "add    r5, #1\n\t"
02899         "mov    r8, %[d0]\n\t"
02900         "mov    r9, %[d1]\n\t"
02901         "# Do top 32\n\t"
02902         "mov    r6, r5\n\t"
02903         "sub    r6, %[d1]\n\t"
02904         "sbc    r6, r6\n\t"
02905         "add    %[r], %[r]\n\t"
02906         "sub    %[r], r6\n\t"
02907         "and    r6, r5\n\t"
02908         "sub    %[d1], r6\n\t"
02909         "# Next 30 bits\n\t"
02910         "mov    r4, #29\n\t"
02911         "1:\n\t"
02912         "lsl    %[d0], %[d0], #1\n\t"
02913         "adc    %[d1], %[d1]\n\t"
02914         "mov    r6, r5\n\t"
02915         "sub    r6, %[d1]\n\t"
02916         "sbc    r6, r6\n\t"
02917         "add    %[r], %[r]\n\t"
02918         "sub    %[r], r6\n\t"
02919         "and    r6, r5\n\t"
02920         "sub    %[d1], r6\n\t"
02921         "sub    r4, #1\n\t"
02922         "bpl    1b\n\t"
02923         "mov    r7, #0\n\t"
02924         "add    %[r], %[r]\n\t"
02925         "add    %[r], #1\n\t"
02926         "# r * div - Start\n\t"
02927         "lsl    %[d1], %[r], #16\n\t"
02928         "lsl    r4, %[div], #16\n\t"
02929         "lsr    %[d1], %[d1], #16\n\t"
02930         "lsr    r4, r4, #16\n\t"
02931         "mul    r4, %[d1]\n\t"
02932         "lsr    r6, %[div], #16\n\t"
02933         "mul    %[d1], r6\n\t"
02934         "lsr    r5, %[d1], #16\n\t"
02935         "lsl    %[d1], %[d1], #16\n\t"
02936         "add    r4, %[d1]\n\t"
02937         "adc    r5, r7\n\t"
02938         "lsr    %[d1], %[r], #16\n\t"
02939         "mul    r6, %[d1]\n\t"
02940         "add    r5, r6\n\t"
02941         "lsl    r6, %[div], #16\n\t"
02942         "lsr    r6, r6, #16\n\t"
02943         "mul    %[d1], r6\n\t"
02944         "lsr    r6, %[d1], #16\n\t"
02945         "lsl    %[d1], %[d1], #16\n\t"
02946         "add    r4, %[d1]\n\t"
02947         "adc    r5, r6\n\t"
02948         "# r * div - Done\n\t"
02949         "mov    %[d1], r8\n\t"
02950         "sub    %[d1], r4\n\t"
02951         "mov    r4, %[d1]\n\t"
02952         "mov    %[d1], r9\n\t"
02953         "sbc    %[d1], r5\n\t"
02954         "mov    r5, %[d1]\n\t"
02955         "add    %[r], r5\n\t"
02956         "# r * div - Start\n\t"
02957         "lsl    %[d1], %[r], #16\n\t"
02958         "lsl    r4, %[div], #16\n\t"
02959         "lsr    %[d1], %[d1], #16\n\t"
02960         "lsr    r4, r4, #16\n\t"
02961         "mul    r4, %[d1]\n\t"
02962         "lsr    r6, %[div], #16\n\t"
02963         "mul    %[d1], r6\n\t"
02964         "lsr    r5, %[d1], #16\n\t"
02965         "lsl    %[d1], %[d1], #16\n\t"
02966         "add    r4, %[d1]\n\t"
02967         "adc    r5, r7\n\t"
02968         "lsr    %[d1], %[r], #16\n\t"
02969         "mul    r6, %[d1]\n\t"
02970         "add    r5, r6\n\t"
02971         "lsl    r6, %[div], #16\n\t"
02972         "lsr    r6, r6, #16\n\t"
02973         "mul    %[d1], r6\n\t"
02974         "lsr    r6, %[d1], #16\n\t"
02975         "lsl    %[d1], %[d1], #16\n\t"
02976         "add    r4, %[d1]\n\t"
02977         "adc    r5, r6\n\t"
02978         "# r * div - Done\n\t"
02979         "mov    %[d1], r8\n\t"
02980         "mov    r6, r9\n\t"
02981         "sub    r4, %[d1], r4\n\t"
02982         "sbc    r6, r5\n\t"
02983         "mov    r5, r6\n\t"
02984         "add    %[r], r5\n\t"
02985         "# r * div - Start\n\t"
02986         "lsl    %[d1], %[r], #16\n\t"
02987         "lsl    r4, %[div], #16\n\t"
02988         "lsr    %[d1], %[d1], #16\n\t"
02989         "lsr    r4, r4, #16\n\t"
02990         "mul    r4, %[d1]\n\t"
02991         "lsr    r6, %[div], #16\n\t"
02992         "mul    %[d1], r6\n\t"
02993         "lsr    r5, %[d1], #16\n\t"
02994         "lsl    %[d1], %[d1], #16\n\t"
02995         "add    r4, %[d1]\n\t"
02996         "adc    r5, r7\n\t"
02997         "lsr    %[d1], %[r], #16\n\t"
02998         "mul    r6, %[d1]\n\t"
02999         "add    r5, r6\n\t"
03000         "lsl    r6, %[div], #16\n\t"
03001         "lsr    r6, r6, #16\n\t"
03002         "mul    %[d1], r6\n\t"
03003         "lsr    r6, %[d1], #16\n\t"
03004         "lsl    %[d1], %[d1], #16\n\t"
03005         "add    r4, %[d1]\n\t"
03006         "adc    r5, r6\n\t"
03007         "# r * div - Done\n\t"
03008         "mov    %[d1], r8\n\t"
03009         "mov    r6, r9\n\t"
03010         "sub    r4, %[d1], r4\n\t"
03011         "sbc    r6, r5\n\t"
03012         "mov    r5, r6\n\t"
03013         "add    %[r], r5\n\t"
03014         "mov    r6, %[div]\n\t"
03015         "sub    r6, r4\n\t"
03016         "sbc    r6, r6\n\t"
03017         "sub    %[r], r6\n\t"
03018         : [r] "+r" (r)
03019         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
03020         : "r4", "r5", "r7", "r6", "r8", "r9"
03021     );
03022     return r;
03023 }
03024 
03025 /* Compare a with b in constant time.
03026  *
03027  * a  A single precision integer.
03028  * b  A single precision integer.
03029  * return -ve, 0 or +ve if a is less than, equal to or greater than b
03030  * respectively.
03031  */
03032 SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
03033 {
03034     sp_digit r = 0;
03035 
03036 
03037     __asm__ __volatile__ (
03038         "mov    r3, #0\n\t"
03039         "mvn    r3, r3\n\t"
03040         "mov    r6, #124\n\t"
03041         "1:\n\t"
03042         "ldr    r7, [%[a], r6]\n\t"
03043         "ldr    r5, [%[b], r6]\n\t"
03044         "and    r7, r3\n\t"
03045         "and    r5, r3\n\t"
03046         "mov    r4, r7\n\t"
03047         "sub    r7, r5\n\t"
03048         "sbc    r7, r7\n\t"
03049         "add    %[r], r7\n\t"
03050         "mvn    r7, r7\n\t"
03051         "and    r3, r7\n\t"
03052         "sub    r5, r4\n\t"
03053         "sbc    r7, r7\n\t"
03054         "sub    %[r], r7\n\t"
03055         "mvn    r7, r7\n\t"
03056         "and    r3, r7\n\t"
03057         "sub    r6, #4\n\t"
03058         "cmp    r6, #0\n\t"
03059         "bge    1b\n\t"
03060         : [r] "+r" (r)
03061         : [a] "r" (a), [b] "r" (b)
03062         : "r3", "r4", "r5", "r6", "r7"
03063     );
03064 
03065     return r;
03066 }
03067 
03068 /* Divide d in a and put remainder into r (m*d + r = a)
03069  * m is not calculated as it is not needed at this time.
03070  *
03071  * a  Nmber to be divided.
03072  * d  Number to divide with.
03073  * m  Multiplier result.
03074  * r  Remainder from the division.
03075  * returns MP_OKAY indicating success.
03076  */
03077 static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
03078         sp_digit* r)
03079 {
03080     sp_digit t1[64], t2[33];
03081     sp_digit div, r1;
03082     int i;
03083 
03084     (void)m;
03085 
03086     div = d[31];
03087     XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
03088     for (i=31; i>=0; i--) {
03089         r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
03090 
03091         sp_2048_mul_d_32(t2, d, r1);
03092         t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
03093         t1[32 + i] -= t2[32];
03094         sp_2048_mask_32(t2, d, t1[32 + i]);
03095         t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
03096         sp_2048_mask_32(t2, d, t1[32 + i]);
03097         t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
03098     }
03099 
03100     r1 = sp_2048_cmp_32(t1, d) >= 0;
03101     sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
03102 
03103     return MP_OKAY;
03104 }
03105 
03106 /* Reduce a modulo m into r. (r = a mod m)
03107  *
03108  * r  A single precision number that is the reduced result.
03109  * a  A single precision number that is to be reduced.
03110  * m  A single precision number that is the modulus to reduce with.
03111  * returns MP_OKAY indicating success.
03112  */
03113 static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
03114 {
03115     return sp_2048_div_32(a, m, NULL, r);
03116 }
03117 
03118 #ifdef WOLFSSL_SP_SMALL
03119 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
03120  *
03121  * r     A single precision number that is the result of the operation.
03122  * a     A single precision number being exponentiated.
03123  * e     A single precision number that is the exponent.
03124  * bits  The number of bits in the exponent.
03125  * m     A single precision number that is the modulus.
03126  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
03127  */
03128 static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
03129         int bits, const sp_digit* m, int reduceA)
03130 {
03131 #ifndef WOLFSSL_SMALL_STACK
03132     sp_digit t[16][64];
03133 #else
03134     sp_digit* t[16];
03135     sp_digit* td;
03136 #endif
03137     sp_digit* norm;
03138     sp_digit mp = 1;
03139     sp_digit n;
03140     sp_digit mask;
03141     int i;
03142     int c, y;
03143     int err = MP_OKAY;
03144 
03145 #ifdef WOLFSSL_SMALL_STACK
03146     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
03147                             DYNAMIC_TYPE_TMP_BUFFER);
03148     if (td == NULL) {
03149         err = MEMORY_E;
03150     }
03151 #endif
03152 
03153     if (err == MP_OKAY) {
03154 #ifdef WOLFSSL_SMALL_STACK
03155         for (i=0; i<16; i++) {
03156             t[i] = td + i * 64;
03157         }
03158 #endif
03159         norm = t[0];
03160 
03161         sp_2048_mont_setup(m, &mp);
03162         sp_2048_mont_norm_32(norm, m);
03163 
03164         XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
03165         if (reduceA != 0) {
03166             err = sp_2048_mod_32(t[1] + 32, a, m);
03167             if (err == MP_OKAY) {
03168                 err = sp_2048_mod_32(t[1], t[1], m);
03169             }
03170         }
03171         else {
03172             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
03173             err = sp_2048_mod_32(t[1], t[1], m);
03174         }
03175     }
03176 
03177     if (err == MP_OKAY) {
03178         sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
03179         sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
03180         sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
03181         sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
03182         sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
03183         sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
03184         sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
03185         sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
03186         sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
03187         sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
03188         sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
03189         sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
03190         sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
03191         sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
03192 
03193         i = (bits - 1) / 32;
03194         n = e[i--];
03195         c = bits & 31;
03196         if (c == 0) {
03197             c = 32;
03198         }
03199         c -= bits % 4;
03200         if (c == 32) {
03201             c = 28;
03202         }
03203         y = (int)(n >> c);
03204         n <<= 32 - c;
03205         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
03206         for (; i>=0 || c>=4; ) {
03207             if (c == 0) {
03208                 n = e[i--];
03209                 y = n >> 28;
03210                 n <<= 4;
03211                 c = 28;
03212             }
03213             else if (c < 4) {
03214                 y = n >> 28;
03215                 n = e[i--];
03216                 c = 4 - c;
03217                 y |= n >> (32 - c);
03218                 n <<= c;
03219                 c = 32 - c;
03220             }
03221             else {
03222                 y = (n >> 28) & 0xf;
03223                 n <<= 4;
03224                 c -= 4;
03225             }
03226 
03227             sp_2048_mont_sqr_32(r, r, m, mp);
03228             sp_2048_mont_sqr_32(r, r, m, mp);
03229             sp_2048_mont_sqr_32(r, r, m, mp);
03230             sp_2048_mont_sqr_32(r, r, m, mp);
03231 
03232             sp_2048_mont_mul_32(r, r, t[y], m, mp);
03233         }
03234 
03235         XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
03236         sp_2048_mont_reduce_32(r, m, mp);
03237 
03238         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
03239         sp_2048_cond_sub_32(r, r, m, mask);
03240     }
03241 
03242 #ifdef WOLFSSL_SMALL_STACK
03243     if (td != NULL) {
03244         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03245     }
03246 #endif
03247 
03248     return err;
03249 }
03250 #else
03251 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
03252  *
03253  * r     A single precision number that is the result of the operation.
03254  * a     A single precision number being exponentiated.
03255  * e     A single precision number that is the exponent.
03256  * bits  The number of bits in the exponent.
03257  * m     A single precision number that is the modulus.
03258  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
03259  */
03260 static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
03261         int bits, const sp_digit* m, int reduceA)
03262 {
03263 #ifndef WOLFSSL_SMALL_STACK
03264     sp_digit t[32][64];
03265 #else
03266     sp_digit* t[32];
03267     sp_digit* td;
03268 #endif
03269     sp_digit* norm;
03270     sp_digit mp = 1;
03271     sp_digit n;
03272     sp_digit mask;
03273     int i;
03274     int c, y;
03275     int err = MP_OKAY;
03276 
03277 #ifdef WOLFSSL_SMALL_STACK
03278     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
03279                             DYNAMIC_TYPE_TMP_BUFFER);
03280     if (td == NULL) {
03281         err = MEMORY_E;
03282     }
03283 #endif
03284 
03285     if (err == MP_OKAY) {
03286 #ifdef WOLFSSL_SMALL_STACK
03287         for (i=0; i<32; i++) {
03288             t[i] = td + i * 64;
03289         }
03290 #endif
03291         norm = t[0];
03292 
03293         sp_2048_mont_setup(m, &mp);
03294         sp_2048_mont_norm_32(norm, m);
03295 
03296         XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
03297         if (reduceA != 0) {
03298             err = sp_2048_mod_32(t[1] + 32, a, m);
03299             if (err == MP_OKAY) {
03300                 err = sp_2048_mod_32(t[1], t[1], m);
03301             }
03302         }
03303         else {
03304             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
03305             err = sp_2048_mod_32(t[1], t[1], m);
03306         }
03307     }
03308 
03309     if (err == MP_OKAY) {
03310         sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
03311         sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
03312         sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
03313         sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
03314         sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
03315         sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
03316         sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
03317         sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
03318         sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
03319         sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
03320         sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
03321         sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
03322         sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
03323         sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
03324         sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
03325         sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
03326         sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
03327         sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
03328         sp_2048_mont_sqr_32(t[20], t[10], m, mp);
03329         sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
03330         sp_2048_mont_sqr_32(t[22], t[11], m, mp);
03331         sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
03332         sp_2048_mont_sqr_32(t[24], t[12], m, mp);
03333         sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
03334         sp_2048_mont_sqr_32(t[26], t[13], m, mp);
03335         sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
03336         sp_2048_mont_sqr_32(t[28], t[14], m, mp);
03337         sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
03338         sp_2048_mont_sqr_32(t[30], t[15], m, mp);
03339         sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
03340 
03341         i = (bits - 1) / 32;
03342         n = e[i--];
03343         c = bits & 31;
03344         if (c == 0) {
03345             c = 32;
03346         }
03347         c -= bits % 5;
03348         if (c == 32) {
03349             c = 27;
03350         }
03351         y = (int)(n >> c);
03352         n <<= 32 - c;
03353         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
03354         for (; i>=0 || c>=5; ) {
03355             if (c == 0) {
03356                 n = e[i--];
03357                 y = n >> 27;
03358                 n <<= 5;
03359                 c = 27;
03360             }
03361             else if (c < 5) {
03362                 y = n >> 27;
03363                 n = e[i--];
03364                 c = 5 - c;
03365                 y |= n >> (32 - c);
03366                 n <<= c;
03367                 c = 32 - c;
03368             }
03369             else {
03370                 y = (n >> 27) & 0x1f;
03371                 n <<= 5;
03372                 c -= 5;
03373             }
03374 
03375             sp_2048_mont_sqr_32(r, r, m, mp);
03376             sp_2048_mont_sqr_32(r, r, m, mp);
03377             sp_2048_mont_sqr_32(r, r, m, mp);
03378             sp_2048_mont_sqr_32(r, r, m, mp);
03379             sp_2048_mont_sqr_32(r, r, m, mp);
03380 
03381             sp_2048_mont_mul_32(r, r, t[y], m, mp);
03382         }
03383 
03384         XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
03385         sp_2048_mont_reduce_32(r, m, mp);
03386 
03387         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
03388         sp_2048_cond_sub_32(r, r, m, mask);
03389     }
03390 
03391 #ifdef WOLFSSL_SMALL_STACK
03392     if (td != NULL) {
03393         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
03394     }
03395 #endif
03396 
03397     return err;
03398 }
03399 #endif /* WOLFSSL_SP_SMALL */
03400 
03401 #endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
03402 
03403 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
03404 /* r = 2^n mod m where n is the number of bits to reduce by.
03405  * Given m must be 2048 bits, just need to subtract.
03406  *
03407  * r  A single precision number.
03408  * m  A single precision number.
03409  */
03410 static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
03411 {
03412     XMEMSET(r, 0, sizeof(sp_digit) * 64);
03413 
03414     /* r = 2^n mod m */
03415     sp_2048_sub_in_place_64(r, m);
03416 }
03417 
03418 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
03419 /* Conditionally subtract b from a using the mask m.
03420  * m is -1 to subtract and 0 when not copying.
03421  *
03422  * r  A single precision number representing condition subtract result.
03423  * a  A single precision number to subtract from.
03424  * b  A single precision number to subtract.
03425  * m  Mask value to apply.
03426  */
03427 SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
03428         const sp_digit* b, sp_digit m)
03429 {
03430     sp_digit c = 0;
03431 
03432     __asm__ __volatile__ (
03433         "mov    r5, #1\n\t"
03434         "lsl    r5, r5, #8\n\t"
03435         "mov    r8, r5\n\t"
03436         "mov    r7, #0\n\t"
03437         "1:\n\t"
03438         "ldr    r6, [%[b], r7]\n\t"
03439         "and    r6, %[m]\n\t"
03440         "mov    r5, #0\n\t"
03441         "sub    r5, %[c]\n\t"
03442         "ldr    r5, [%[a], r7]\n\t"
03443         "sbc    r5, r6\n\t"
03444         "sbc    %[c], %[c]\n\t"
03445         "str    r5, [%[r], r7]\n\t"
03446         "add    r7, #4\n\t"
03447         "cmp    r7, r8\n\t"
03448         "blt    1b\n\t"
03449         : [c] "+r" (c)
03450         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
03451         : "memory", "r5", "r6", "r7", "r8"
03452     );
03453 
03454     return c;
03455 }
03456 
03457 /* Reduce the number back to 2048 bits using Montgomery reduction.
03458  *
03459  * a   A single precision number to reduce in place.
03460  * m   The single precision number representing the modulus.
03461  * mp  The digit representing the negative inverse of m mod 2^n.
03462  */
03463 SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
03464         sp_digit mp)
03465 {
03466     sp_digit ca = 0;
03467 
03468     __asm__ __volatile__ (
03469         "mov    r8, %[mp]\n\t"
03470         "mov    r12, %[ca]\n\t"
03471         "mov    r14, %[m]\n\t"
03472         "mov    r9, %[a]\n\t"
03473         "mov    r4, #0\n\t"
03474         "# i = 0\n\t"
03475         "mov    r11, r4\n\t"
03476         "\n1:\n\t"
03477         "mov    r5, #0\n\t"
03478         "mov    %[ca], #0\n\t"
03479         "# mu = a[i] * mp\n\t"
03480         "mov    %[mp], r8\n\t"
03481         "ldr    %[a], [%[a]]\n\t"
03482         "mul    %[mp], %[a]\n\t"
03483         "mov    %[m], r14\n\t"
03484         "mov    r10, r9\n\t"
03485         "\n2:\n\t"
03486         "# a[i+j] += m[j] * mu\n\t"
03487         "mov    %[a], r10\n\t"
03488         "ldr    %[a], [%[a]]\n\t"
03489         "mov    %[ca], #0\n\t"
03490         "mov    r4, r5\n\t"
03491         "mov    r5, #0\n\t"
03492         "# Multiply m[j] and mu - Start\n\t"
03493         "ldr    r7, [%[m]]\n\t"
03494         "lsl    r6, %[mp], #16\n\t"
03495         "lsl    r7, r7, #16\n\t"
03496         "lsr    r6, r6, #16\n\t"
03497         "lsr    r7, r7, #16\n\t"
03498         "mul    r7, r6\n\t"
03499         "add    %[a], r7\n\t"
03500         "adc    r5, %[ca]\n\t"
03501         "ldr    r7, [%[m]]\n\t"
03502         "lsr    r7, r7, #16\n\t"
03503         "mul    r6, r7\n\t"
03504         "lsr    r7, r6, #16\n\t"
03505         "lsl    r6, r6, #16\n\t"
03506         "add    %[a], r6\n\t"
03507         "adc    r5, r7\n\t"
03508         "ldr    r7, [%[m]]\n\t"
03509         "lsr    r6, %[mp], #16\n\t"
03510         "lsr    r7, r7, #16\n\t"
03511         "mul    r7, r6\n\t"
03512         "add    r5, r7\n\t"
03513         "ldr    r7, [%[m]]\n\t"
03514         "lsl    r7, r7, #16\n\t"
03515         "lsr    r7, r7, #16\n\t"
03516         "mul    r6, r7\n\t"
03517         "lsr    r7, r6, #16\n\t"
03518         "lsl    r6, r6, #16\n\t"
03519         "add    %[a], r6\n\t"
03520         "adc    r5, r7\n\t"
03521         "# Multiply m[j] and mu - Done\n\t"
03522         "add    r4, %[a]\n\t"
03523         "adc    r5, %[ca]\n\t"
03524         "mov    %[a], r10\n\t"
03525         "str    r4, [%[a]]\n\t"
03526         "mov    r6, #4\n\t"
03527         "add    %[m], #4\n\t"
03528         "add    r10, r6\n\t"
03529         "mov    r4, #252\n\t"
03530         "add    r4, r9\n\t"
03531         "cmp    r10, r4\n\t"
03532         "blt    2b\n\t"
03533         "# a[i+63] += m[63] * mu\n\t"
03534         "mov    %[ca], #0\n\t"
03535         "mov    r4, r12\n\t"
03536         "mov    %[a], #0\n\t"
03537         "# Multiply m[63] and mu - Start\n\t"
03538         "ldr    r7, [%[m]]\n\t"
03539         "lsl    r6, %[mp], #16\n\t"
03540         "lsl    r7, r7, #16\n\t"
03541         "lsr    r6, r6, #16\n\t"
03542         "lsr    r7, r7, #16\n\t"
03543         "mul    r7, r6\n\t"
03544         "add    r5, r7\n\t"
03545         "adc    r4, %[ca]\n\t"
03546         "adc    %[a], %[ca]\n\t"
03547         "ldr    r7, [%[m]]\n\t"
03548         "lsr    r7, r7, #16\n\t"
03549         "mul    r6, r7\n\t"
03550         "lsr    r7, r6, #16\n\t"
03551         "lsl    r6, r6, #16\n\t"
03552         "add    r5, r6\n\t"
03553         "adc    r4, r7\n\t"
03554         "adc    %[a], %[ca]\n\t"
03555         "ldr    r7, [%[m]]\n\t"
03556         "lsr    r6, %[mp], #16\n\t"
03557         "lsr    r7, r7, #16\n\t"
03558         "mul    r7, r6\n\t"
03559         "add    r4, r7\n\t"
03560         "adc    %[a], %[ca]\n\t"
03561         "ldr    r7, [%[m]]\n\t"
03562         "lsl    r7, r7, #16\n\t"
03563         "lsr    r7, r7, #16\n\t"
03564         "mul    r6, r7\n\t"
03565         "lsr    r7, r6, #16\n\t"
03566         "lsl    r6, r6, #16\n\t"
03567         "add    r5, r6\n\t"
03568         "adc    r4, r7\n\t"
03569         "adc    %[a], %[ca]\n\t"
03570         "# Multiply m[63] and mu - Done\n\t"
03571         "mov    %[ca], %[a]\n\t"
03572         "mov    %[a], r10\n\t"
03573         "ldr    r7, [%[a], #4]\n\t"
03574         "ldr    %[a], [%[a]]\n\t"
03575         "mov    r6, #0\n\t"
03576         "add    r5, %[a]\n\t"
03577         "adc    r7, r4\n\t"
03578         "adc    %[ca], r6\n\t"
03579         "mov    %[a], r10\n\t"
03580         "str    r5, [%[a]]\n\t"
03581         "str    r7, [%[a], #4]\n\t"
03582         "# i += 1\n\t"
03583         "mov    r6, #4\n\t"
03584         "add    r9, r6\n\t"
03585         "add    r11, r6\n\t"
03586         "mov    r12, %[ca]\n\t"
03587         "mov    %[a], r9\n\t"
03588         "mov    r4, #1\n\t"
03589         "lsl    r4, r4, #8\n\t"
03590         "cmp    r11, r4\n\t"
03591         "blt    1b\n\t"
03592         "mov    %[m], r14\n\t"
03593         : [ca] "+r" (ca), [a] "+r" (a)
03594         : [m] "r" (m), [mp] "r" (mp)
03595         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
03596     );
03597 
03598     sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
03599 }
03600 
03601 /* Multiply two Montogmery form numbers mod the modulus (prime).
03602  * (r = a * b mod m)
03603  *
03604  * r   Result of multiplication.
03605  * a   First number to multiply in Montogmery form.
03606  * b   Second number to multiply in Montogmery form.
03607  * m   Modulus (prime).
03608  * mp  Montogmery mulitplier.
03609  */
03610 static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
03611         const sp_digit* m, sp_digit mp)
03612 {
03613     sp_2048_mul_64(r, a, b);
03614     sp_2048_mont_reduce_64(r, m, mp);
03615 }
03616 
03617 /* Square the Montgomery form number. (r = a * a mod m)
03618  *
03619  * r   Result of squaring.
03620  * a   Number to square in Montogmery form.
03621  * m   Modulus (prime).
03622  * mp  Montogmery mulitplier.
03623  */
03624 static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, const sp_digit* m,
03625         sp_digit mp)
03626 {
03627     sp_2048_sqr_64(r, a);
03628     sp_2048_mont_reduce_64(r, m, mp);
03629 }
03630 
03631 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
03632  *
03633  * d1   The high order half of the number to divide.
03634  * d0   The low order half of the number to divide.
03635  * div  The dividend.
03636  * returns the result of the division.
03637  *
03638  * Note that this is an approximate div. It may give an answer 1 larger.
03639  */
03640 SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
03641         sp_digit div)
03642 {
03643     sp_digit r = 0;
03644 
03645     __asm__ __volatile__ (
03646         "lsr    r5, %[div], #1\n\t"
03647         "add    r5, #1\n\t"
03648         "mov    r8, %[d0]\n\t"
03649         "mov    r9, %[d1]\n\t"
03650         "# Do top 32\n\t"
03651         "mov    r6, r5\n\t"
03652         "sub    r6, %[d1]\n\t"
03653         "sbc    r6, r6\n\t"
03654         "add    %[r], %[r]\n\t"
03655         "sub    %[r], r6\n\t"
03656         "and    r6, r5\n\t"
03657         "sub    %[d1], r6\n\t"
03658         "# Next 30 bits\n\t"
03659         "mov    r4, #29\n\t"
03660         "1:\n\t"
03661         "lsl    %[d0], %[d0], #1\n\t"
03662         "adc    %[d1], %[d1]\n\t"
03663         "mov    r6, r5\n\t"
03664         "sub    r6, %[d1]\n\t"
03665         "sbc    r6, r6\n\t"
03666         "add    %[r], %[r]\n\t"
03667         "sub    %[r], r6\n\t"
03668         "and    r6, r5\n\t"
03669         "sub    %[d1], r6\n\t"
03670         "sub    r4, #1\n\t"
03671         "bpl    1b\n\t"
03672         "mov    r7, #0\n\t"
03673         "add    %[r], %[r]\n\t"
03674         "add    %[r], #1\n\t"
03675         "# r * div - Start\n\t"
03676         "lsl    %[d1], %[r], #16\n\t"
03677         "lsl    r4, %[div], #16\n\t"
03678         "lsr    %[d1], %[d1], #16\n\t"
03679         "lsr    r4, r4, #16\n\t"
03680         "mul    r4, %[d1]\n\t"
03681         "lsr    r6, %[div], #16\n\t"
03682         "mul    %[d1], r6\n\t"
03683         "lsr    r5, %[d1], #16\n\t"
03684         "lsl    %[d1], %[d1], #16\n\t"
03685         "add    r4, %[d1]\n\t"
03686         "adc    r5, r7\n\t"
03687         "lsr    %[d1], %[r], #16\n\t"
03688         "mul    r6, %[d1]\n\t"
03689         "add    r5, r6\n\t"
03690         "lsl    r6, %[div], #16\n\t"
03691         "lsr    r6, r6, #16\n\t"
03692         "mul    %[d1], r6\n\t"
03693         "lsr    r6, %[d1], #16\n\t"
03694         "lsl    %[d1], %[d1], #16\n\t"
03695         "add    r4, %[d1]\n\t"
03696         "adc    r5, r6\n\t"
03697         "# r * div - Done\n\t"
03698         "mov    %[d1], r8\n\t"
03699         "sub    %[d1], r4\n\t"
03700         "mov    r4, %[d1]\n\t"
03701         "mov    %[d1], r9\n\t"
03702         "sbc    %[d1], r5\n\t"
03703         "mov    r5, %[d1]\n\t"
03704         "add    %[r], r5\n\t"
03705         "# r * div - Start\n\t"
03706         "lsl    %[d1], %[r], #16\n\t"
03707         "lsl    r4, %[div], #16\n\t"
03708         "lsr    %[d1], %[d1], #16\n\t"
03709         "lsr    r4, r4, #16\n\t"
03710         "mul    r4, %[d1]\n\t"
03711         "lsr    r6, %[div], #16\n\t"
03712         "mul    %[d1], r6\n\t"
03713         "lsr    r5, %[d1], #16\n\t"
03714         "lsl    %[d1], %[d1], #16\n\t"
03715         "add    r4, %[d1]\n\t"
03716         "adc    r5, r7\n\t"
03717         "lsr    %[d1], %[r], #16\n\t"
03718         "mul    r6, %[d1]\n\t"
03719         "add    r5, r6\n\t"
03720         "lsl    r6, %[div], #16\n\t"
03721         "lsr    r6, r6, #16\n\t"
03722         "mul    %[d1], r6\n\t"
03723         "lsr    r6, %[d1], #16\n\t"
03724         "lsl    %[d1], %[d1], #16\n\t"
03725         "add    r4, %[d1]\n\t"
03726         "adc    r5, r6\n\t"
03727         "# r * div - Done\n\t"
03728         "mov    %[d1], r8\n\t"
03729         "mov    r6, r9\n\t"
03730         "sub    r4, %[d1], r4\n\t"
03731         "sbc    r6, r5\n\t"
03732         "mov    r5, r6\n\t"
03733         "add    %[r], r5\n\t"
03734         "# r * div - Start\n\t"
03735         "lsl    %[d1], %[r], #16\n\t"
03736         "lsl    r4, %[div], #16\n\t"
03737         "lsr    %[d1], %[d1], #16\n\t"
03738         "lsr    r4, r4, #16\n\t"
03739         "mul    r4, %[d1]\n\t"
03740         "lsr    r6, %[div], #16\n\t"
03741         "mul    %[d1], r6\n\t"
03742         "lsr    r5, %[d1], #16\n\t"
03743         "lsl    %[d1], %[d1], #16\n\t"
03744         "add    r4, %[d1]\n\t"
03745         "adc    r5, r7\n\t"
03746         "lsr    %[d1], %[r], #16\n\t"
03747         "mul    r6, %[d1]\n\t"
03748         "add    r5, r6\n\t"
03749         "lsl    r6, %[div], #16\n\t"
03750         "lsr    r6, r6, #16\n\t"
03751         "mul    %[d1], r6\n\t"
03752         "lsr    r6, %[d1], #16\n\t"
03753         "lsl    %[d1], %[d1], #16\n\t"
03754         "add    r4, %[d1]\n\t"
03755         "adc    r5, r6\n\t"
03756         "# r * div - Done\n\t"
03757         "mov    %[d1], r8\n\t"
03758         "mov    r6, r9\n\t"
03759         "sub    r4, %[d1], r4\n\t"
03760         "sbc    r6, r5\n\t"
03761         "mov    r5, r6\n\t"
03762         "add    %[r], r5\n\t"
03763         "mov    r6, %[div]\n\t"
03764         "sub    r6, r4\n\t"
03765         "sbc    r6, r6\n\t"
03766         "sub    %[r], r6\n\t"
03767         : [r] "+r" (r)
03768         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
03769         : "r4", "r5", "r7", "r6", "r8", "r9"
03770     );
03771     return r;
03772 }
03773 
03774 /* AND m into each word of a and store in r.
03775  *
03776  * r  A single precision integer.
03777  * a  A single precision integer.
03778  * m  Mask to AND against each digit.
03779  */
03780 static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
03781 {
03782 #ifdef WOLFSSL_SP_SMALL
03783     int i;
03784 
03785     for (i=0; i<64; i++) {
03786         r[i] = a[i] & m;
03787     }
03788 #else
03789     int i;
03790 
03791     for (i = 0; i < 64; i += 8) {
03792         r[i+0] = a[i+0] & m;
03793         r[i+1] = a[i+1] & m;
03794         r[i+2] = a[i+2] & m;
03795         r[i+3] = a[i+3] & m;
03796         r[i+4] = a[i+4] & m;
03797         r[i+5] = a[i+5] & m;
03798         r[i+6] = a[i+6] & m;
03799         r[i+7] = a[i+7] & m;
03800     }
03801 #endif
03802 }
03803 
03804 /* Compare a with b in constant time.
03805  *
03806  * a  A single precision integer.
03807  * b  A single precision integer.
03808  * return -ve, 0 or +ve if a is less than, equal to or greater than b
03809  * respectively.
03810  */
03811 SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
03812 {
03813     sp_digit r = 0;
03814 
03815 
03816     __asm__ __volatile__ (
03817         "mov    r3, #0\n\t"
03818         "mvn    r3, r3\n\t"
03819         "mov    r6, #252\n\t"
03820         "1:\n\t"
03821         "ldr    r7, [%[a], r6]\n\t"
03822         "ldr    r5, [%[b], r6]\n\t"
03823         "and    r7, r3\n\t"
03824         "and    r5, r3\n\t"
03825         "mov    r4, r7\n\t"
03826         "sub    r7, r5\n\t"
03827         "sbc    r7, r7\n\t"
03828         "add    %[r], r7\n\t"
03829         "mvn    r7, r7\n\t"
03830         "and    r3, r7\n\t"
03831         "sub    r5, r4\n\t"
03832         "sbc    r7, r7\n\t"
03833         "sub    %[r], r7\n\t"
03834         "mvn    r7, r7\n\t"
03835         "and    r3, r7\n\t"
03836         "sub    r6, #4\n\t"
03837         "cmp    r6, #0\n\t"
03838         "bge    1b\n\t"
03839         : [r] "+r" (r)
03840         : [a] "r" (a), [b] "r" (b)
03841         : "r3", "r4", "r5", "r6", "r7"
03842     );
03843 
03844     return r;
03845 }
03846 
03847 /* Divide d in a and put remainder into r (m*d + r = a)
03848  * m is not calculated as it is not needed at this time.
03849  *
03850  * a  Nmber to be divided.
03851  * d  Number to divide with.
03852  * m  Multiplier result.
03853  * r  Remainder from the division.
03854  * returns MP_OKAY indicating success.
03855  */
03856 static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
03857         sp_digit* r)
03858 {
03859     sp_digit t1[128], t2[65];
03860     sp_digit div, r1;
03861     int i;
03862 
03863     (void)m;
03864 
03865     div = d[63];
03866     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
03867     for (i=63; i>=0; i--) {
03868         r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
03869 
03870         sp_2048_mul_d_64(t2, d, r1);
03871         t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
03872         t1[64 + i] -= t2[64];
03873         sp_2048_mask_64(t2, d, t1[64 + i]);
03874         t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
03875         sp_2048_mask_64(t2, d, t1[64 + i]);
03876         t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
03877     }
03878 
03879     r1 = sp_2048_cmp_64(t1, d) >= 0;
03880     sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
03881 
03882     return MP_OKAY;
03883 }
03884 
03885 /* Reduce a modulo m into r. (r = a mod m)
03886  *
03887  * r  A single precision number that is the reduced result.
03888  * a  A single precision number that is to be reduced.
03889  * m  A single precision number that is the modulus to reduce with.
03890  * returns MP_OKAY indicating success.
03891  */
03892 static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
03893 {
03894     return sp_2048_div_64(a, m, NULL, r);
03895 }
03896 
03897 /* Divide d in a and put remainder into r (m*d + r = a)
03898  * m is not calculated as it is not needed at this time.
03899  *
03900  * a  Nmber to be divided.
03901  * d  Number to divide with.
03902  * m  Multiplier result.
03903  * r  Remainder from the division.
03904  * returns MP_OKAY indicating success.
03905  */
03906 static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
03907         sp_digit* r)
03908 {
03909     sp_digit t1[128], t2[65];
03910     sp_digit div, r1;
03911     int i;
03912 
03913     (void)m;
03914 
03915     div = d[63];
03916     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
03917     for (i=63; i>=0; i--) {
03918         r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
03919 
03920         sp_2048_mul_d_64(t2, d, r1);
03921         t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
03922         t1[64 + i] -= t2[64];
03923         if (t1[64 + i] != 0) {
03924             t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
03925             if (t1[64 + i] != 0)
03926                 t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
03927         }
03928     }
03929 
03930     r1 = sp_2048_cmp_64(t1, d) >= 0;
03931     sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
03932 
03933     return MP_OKAY;
03934 }
03935 
03936 /* Reduce a modulo m into r. (r = a mod m)
03937  *
03938  * r  A single precision number that is the reduced result.
03939  * a  A single precision number that is to be reduced.
03940  * m  A single precision number that is the modulus to reduce with.
03941  * returns MP_OKAY indicating success.
03942  */
03943 static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
03944 {
03945     return sp_2048_div_64_cond(a, m, NULL, r);
03946 }
03947 
03948 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
03949                                                      defined(WOLFSSL_HAVE_SP_DH)
03950 #ifdef WOLFSSL_SP_SMALL
03951 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
03952  *
03953  * r     A single precision number that is the result of the operation.
03954  * a     A single precision number being exponentiated.
03955  * e     A single precision number that is the exponent.
03956  * bits  The number of bits in the exponent.
03957  * m     A single precision number that is the modulus.
03958  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
03959  */
03960 static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
03961         int bits, const sp_digit* m, int reduceA)
03962 {
03963 #ifndef WOLFSSL_SMALL_STACK
03964     sp_digit t[16][128];
03965 #else
03966     sp_digit* t[16];
03967     sp_digit* td;
03968 #endif
03969     sp_digit* norm;
03970     sp_digit mp = 1;
03971     sp_digit n;
03972     sp_digit mask;
03973     int i;
03974     int c, y;
03975     int err = MP_OKAY;
03976 
03977 #ifdef WOLFSSL_SMALL_STACK
03978     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
03979                             DYNAMIC_TYPE_TMP_BUFFER);
03980     if (td == NULL) {
03981         err = MEMORY_E;
03982     }
03983 #endif
03984 
03985     if (err == MP_OKAY) {
03986 #ifdef WOLFSSL_SMALL_STACK
03987         for (i=0; i<16; i++) {
03988             t[i] = td + i * 128;
03989         }
03990 #endif
03991         norm = t[0];
03992 
03993         sp_2048_mont_setup(m, &mp);
03994         sp_2048_mont_norm_64(norm, m);
03995 
03996         XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
03997         if (reduceA != 0) {
03998             err = sp_2048_mod_64(t[1] + 64, a, m);
03999             if (err == MP_OKAY) {
04000                 err = sp_2048_mod_64(t[1], t[1], m);
04001             }
04002         }
04003         else {
04004             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
04005             err = sp_2048_mod_64(t[1], t[1], m);
04006         }
04007     }
04008 
04009     if (err == MP_OKAY) {
04010         sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
04011         sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
04012         sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
04013         sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
04014         sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
04015         sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
04016         sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
04017         sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
04018         sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
04019         sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
04020         sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
04021         sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
04022         sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
04023         sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
04024 
04025         i = (bits - 1) / 32;
04026         n = e[i--];
04027         c = bits & 31;
04028         if (c == 0) {
04029             c = 32;
04030         }
04031         c -= bits % 4;
04032         if (c == 32) {
04033             c = 28;
04034         }
04035         y = (int)(n >> c);
04036         n <<= 32 - c;
04037         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
04038         for (; i>=0 || c>=4; ) {
04039             if (c == 0) {
04040                 n = e[i--];
04041                 y = n >> 28;
04042                 n <<= 4;
04043                 c = 28;
04044             }
04045             else if (c < 4) {
04046                 y = n >> 28;
04047                 n = e[i--];
04048                 c = 4 - c;
04049                 y |= n >> (32 - c);
04050                 n <<= c;
04051                 c = 32 - c;
04052             }
04053             else {
04054                 y = (n >> 28) & 0xf;
04055                 n <<= 4;
04056                 c -= 4;
04057             }
04058 
04059             sp_2048_mont_sqr_64(r, r, m, mp);
04060             sp_2048_mont_sqr_64(r, r, m, mp);
04061             sp_2048_mont_sqr_64(r, r, m, mp);
04062             sp_2048_mont_sqr_64(r, r, m, mp);
04063 
04064             sp_2048_mont_mul_64(r, r, t[y], m, mp);
04065         }
04066 
04067         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
04068         sp_2048_mont_reduce_64(r, m, mp);
04069 
04070         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
04071         sp_2048_cond_sub_64(r, r, m, mask);
04072     }
04073 
04074 #ifdef WOLFSSL_SMALL_STACK
04075     if (td != NULL) {
04076         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04077     }
04078 #endif
04079 
04080     return err;
04081 }
04082 #else
04083 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
04084  *
04085  * r     A single precision number that is the result of the operation.
04086  * a     A single precision number being exponentiated.
04087  * e     A single precision number that is the exponent.
04088  * bits  The number of bits in the exponent.
04089  * m     A single precision number that is the modulus.
04090  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
04091  */
04092 static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
04093         int bits, const sp_digit* m, int reduceA)
04094 {
04095 #ifndef WOLFSSL_SMALL_STACK
04096     sp_digit t[32][128];
04097 #else
04098     sp_digit* t[32];
04099     sp_digit* td;
04100 #endif
04101     sp_digit* norm;
04102     sp_digit mp = 1;
04103     sp_digit n;
04104     sp_digit mask;
04105     int i;
04106     int c, y;
04107     int err = MP_OKAY;
04108 
04109 #ifdef WOLFSSL_SMALL_STACK
04110     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
04111                             DYNAMIC_TYPE_TMP_BUFFER);
04112     if (td == NULL) {
04113         err = MEMORY_E;
04114     }
04115 #endif
04116 
04117     if (err == MP_OKAY) {
04118 #ifdef WOLFSSL_SMALL_STACK
04119         for (i=0; i<32; i++) {
04120             t[i] = td + i * 128;
04121         }
04122 #endif
04123         norm = t[0];
04124 
04125         sp_2048_mont_setup(m, &mp);
04126         sp_2048_mont_norm_64(norm, m);
04127 
04128         XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
04129         if (reduceA != 0) {
04130             err = sp_2048_mod_64(t[1] + 64, a, m);
04131             if (err == MP_OKAY) {
04132                 err = sp_2048_mod_64(t[1], t[1], m);
04133             }
04134         }
04135         else {
04136             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
04137             err = sp_2048_mod_64(t[1], t[1], m);
04138         }
04139     }
04140 
04141     if (err == MP_OKAY) {
04142         sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
04143         sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
04144         sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
04145         sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
04146         sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
04147         sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
04148         sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
04149         sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
04150         sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
04151         sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
04152         sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
04153         sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
04154         sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
04155         sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
04156         sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
04157         sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
04158         sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
04159         sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
04160         sp_2048_mont_sqr_64(t[20], t[10], m, mp);
04161         sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
04162         sp_2048_mont_sqr_64(t[22], t[11], m, mp);
04163         sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
04164         sp_2048_mont_sqr_64(t[24], t[12], m, mp);
04165         sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
04166         sp_2048_mont_sqr_64(t[26], t[13], m, mp);
04167         sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
04168         sp_2048_mont_sqr_64(t[28], t[14], m, mp);
04169         sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
04170         sp_2048_mont_sqr_64(t[30], t[15], m, mp);
04171         sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
04172 
04173         i = (bits - 1) / 32;
04174         n = e[i--];
04175         c = bits & 31;
04176         if (c == 0) {
04177             c = 32;
04178         }
04179         c -= bits % 5;
04180         if (c == 32) {
04181             c = 27;
04182         }
04183         y = (int)(n >> c);
04184         n <<= 32 - c;
04185         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
04186         for (; i>=0 || c>=5; ) {
04187             if (c == 0) {
04188                 n = e[i--];
04189                 y = n >> 27;
04190                 n <<= 5;
04191                 c = 27;
04192             }
04193             else if (c < 5) {
04194                 y = n >> 27;
04195                 n = e[i--];
04196                 c = 5 - c;
04197                 y |= n >> (32 - c);
04198                 n <<= c;
04199                 c = 32 - c;
04200             }
04201             else {
04202                 y = (n >> 27) & 0x1f;
04203                 n <<= 5;
04204                 c -= 5;
04205             }
04206 
04207             sp_2048_mont_sqr_64(r, r, m, mp);
04208             sp_2048_mont_sqr_64(r, r, m, mp);
04209             sp_2048_mont_sqr_64(r, r, m, mp);
04210             sp_2048_mont_sqr_64(r, r, m, mp);
04211             sp_2048_mont_sqr_64(r, r, m, mp);
04212 
04213             sp_2048_mont_mul_64(r, r, t[y], m, mp);
04214         }
04215 
04216         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
04217         sp_2048_mont_reduce_64(r, m, mp);
04218 
04219         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
04220         sp_2048_cond_sub_64(r, r, m, mask);
04221     }
04222 
04223 #ifdef WOLFSSL_SMALL_STACK
04224     if (td != NULL) {
04225         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04226     }
04227 #endif
04228 
04229     return err;
04230 }
04231 #endif /* WOLFSSL_SP_SMALL */
04232 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
04233 
04234 #ifdef WOLFSSL_HAVE_SP_RSA
04235 /* RSA public key operation.
04236  *
04237  * in      Array of bytes representing the number to exponentiate, base.
04238  * inLen   Number of bytes in base.
04239  * em      Public exponent.
04240  * mm      Modulus.
04241  * out     Buffer to hold big-endian bytes of exponentiation result.
04242  *         Must be at least 256 bytes long.
04243  * outLen  Number of bytes in result.
04244  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
04245  * an array is too long and MEMORY_E when dynamic memory allocation fails.
04246  */
04247 int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
04248     byte* out, word32* outLen)
04249 {
04250 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
04251     sp_digit a[128], m[64], r[128];
04252 #else
04253     sp_digit* d = NULL;
04254     sp_digit* a;
04255     sp_digit* m;
04256     sp_digit* r;
04257 #endif
04258     sp_digit *ah;
04259     sp_digit e[1];
04260     int err = MP_OKAY;
04261 
04262     if (*outLen < 256)
04263         err = MP_TO_E;
04264     if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
04265                                                      mp_count_bits(mm) != 2048))
04266         err = MP_READ_E;
04267 
04268 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
04269     if (err == MP_OKAY) {
04270         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
04271                                                               DYNAMIC_TYPE_RSA);
04272         if (d == NULL)
04273             err = MEMORY_E;
04274     }
04275 
04276     if (err == MP_OKAY) {
04277         a = d;
04278         r = a + 64 * 2;
04279         m = r + 64 * 2;
04280     }
04281 #endif
04282 
04283     if (err == MP_OKAY) {
04284         ah = a + 64;
04285 
04286         sp_2048_from_bin(ah, 64, in, inLen);
04287 #if DIGIT_BIT >= 32
04288         e[0] = em->dp[0];
04289 #else
04290         e[0] = em->dp[0];
04291         if (em->used > 1) {
04292             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
04293         }
04294 #endif
04295         if (e[0] == 0) {
04296             err = MP_EXPTMOD_E;
04297         }
04298     }
04299     if (err == MP_OKAY) {
04300         sp_2048_from_mp(m, 64, mm);
04301 
04302         if (e[0] == 0x3) {
04303             if (err == MP_OKAY) {
04304                 sp_2048_sqr_64(r, ah);
04305                 err = sp_2048_mod_64_cond(r, r, m);
04306             }
04307             if (err == MP_OKAY) {
04308                 sp_2048_mul_64(r, ah, r);
04309                 err = sp_2048_mod_64_cond(r, r, m);
04310             }
04311         }
04312         else {
04313             int i;
04314             sp_digit mp;
04315 
04316             sp_2048_mont_setup(m, &mp);
04317 
04318             /* Convert to Montgomery form. */
04319             XMEMSET(a, 0, sizeof(sp_digit) * 64);
04320             err = sp_2048_mod_64_cond(a, a, m);
04321 
04322             if (err == MP_OKAY) {
04323                 for (i = 31; i >= 0; i--) {
04324                     if (e[0] >> i) {
04325                         break;
04326                     }
04327                 }
04328 
04329                 XMEMCPY(r, a, sizeof(sp_digit) * 64);
04330                 for (i--; i>=0; i--) {
04331                     sp_2048_mont_sqr_64(r, r, m, mp);
04332                     if (((e[0] >> i) & 1) == 1) {
04333                         sp_2048_mont_mul_64(r, r, a, m, mp);
04334                     }
04335                 }
04336                 XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
04337                 sp_2048_mont_reduce_64(r, m, mp);
04338 
04339                 for (i = 63; i > 0; i--) {
04340                     if (r[i] != m[i]) {
04341                         break;
04342                     }
04343                 }
04344                 if (r[i] >= m[i]) {
04345                     sp_2048_sub_in_place_64(r, m);
04346                 }
04347             }
04348         }
04349     }
04350 
04351     if (err == MP_OKAY) {
04352         sp_2048_to_bin(r, out);
04353         *outLen = 256;
04354     }
04355 
04356 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
04357     if (d != NULL) {
04358         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
04359     }
04360 #endif
04361 
04362     return err;
04363 }
04364 
04365 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
04366     sp_digit* a;
04367     sp_digit* d = NULL;
04368     sp_digit* m;
04369     sp_digit* r;
04370     int err = MP_OKAY;
04371 
04372     (void)pm;
04373     (void)qm;
04374     (void)dpm;
04375     (void)dqm;
04376     (void)qim;
04377 
04378     if (*outLen < 256U) {
04379         err = MP_TO_E;
04380     }
04381     if (err == MP_OKAY) {
04382         if (mp_count_bits(dm) > 2048) {
04383            err = MP_READ_E;
04384         }
04385         if (inLen > 256) {
04386             err = MP_READ_E;
04387         }
04388         if (mp_count_bits(mm) != 2048) {
04389             err = MP_READ_E;
04390         }
04391     }
04392 
04393     if (err == MP_OKAY) {
04394         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
04395                                                               DYNAMIC_TYPE_RSA);
04396         if (d == NULL) {
04397             err = MEMORY_E;
04398         }
04399     }
04400     if (err == MP_OKAY) {
04401         a = d + 64;
04402         m = a + 128;
04403         r = a;
04404 
04405         sp_2048_from_bin(a, 64, in, inLen);
04406         sp_2048_from_mp(d, 64, dm);
04407         sp_2048_from_mp(m, 64, mm);
04408         err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
04409     }
04410     if (err == MP_OKAY) {
04411         sp_2048_to_bin(r, out);
04412         *outLen = 256;
04413     }
04414 
04415     if (d != NULL) {
04416         XMEMSET(d, 0, sizeof(sp_digit) * 64);
04417         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
04418     }
04419 
04420     return err;
04421 #else
04422 #ifndef WOLFSSL_RSA_PUBLIC_ONLY
04423 /* Conditionally add a and b using the mask m.
04424  * m is -1 to add and 0 when not.
04425  *
04426  * r  A single precision number representing conditional add result.
04427  * a  A single precision number to add with.
04428  * b  A single precision number to add.
04429  * m  Mask value to apply.
04430  */
04431 SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
04432         sp_digit m)
04433 {
04434     sp_digit c = 0;
04435 
04436     __asm__ __volatile__ (
04437         "mov    r5, #128\n\t"
04438         "mov    r8, r5\n\t"
04439         "mov    r7, #0\n\t"
04440         "1:\n\t"
04441         "ldr    r6, [%[b], r7]\n\t"
04442         "and    r6, %[m]\n\t"
04443         "mov    r5, #0\n\t"
04444         "sub    r5, #1\n\t"
04445         "add    r5, %[c]\n\t"
04446         "ldr    r5, [%[a], r7]\n\t"
04447         "adc    r5, r6\n\t"
04448         "mov    %[c], #0\n\t"
04449         "adc    %[c], %[c]\n\t"
04450         "str    r5, [%[r], r7]\n\t"
04451         "add    r7, #4\n\t"
04452         "cmp    r7, r8\n\t"
04453         "blt    1b\n\t"
04454         : [c] "+r" (c)
04455         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
04456         : "memory", "r5", "r6", "r7", "r8"
04457     );
04458 
04459     return c;
04460 }
04461 
04462 /* RSA private key operation.
04463  *
04464  * in      Array of bytes representing the number to exponentiate, base.
04465  * inLen   Number of bytes in base.
04466  * dm      Private exponent.
04467  * pm      First prime.
04468  * qm      Second prime.
04469  * dpm     First prime's CRT exponent.
04470  * dqm     Second prime's CRT exponent.
04471  * qim     Inverse of second prime mod p.
04472  * mm      Modulus.
04473  * out     Buffer to hold big-endian bytes of exponentiation result.
04474  *         Must be at least 256 bytes long.
04475  * outLen  Number of bytes in result.
04476  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
04477  * an array is too long and MEMORY_E when dynamic memory allocation fails.
04478  */
04479 int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
04480     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
04481     byte* out, word32* outLen)
04482 {
04483 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
04484     sp_digit a[64 * 2];
04485     sp_digit p[32], q[32], dp[32];
04486     sp_digit tmpa[64], tmpb[64];
04487 #else
04488     sp_digit* t = NULL;
04489     sp_digit* a;
04490     sp_digit* p;
04491     sp_digit* q;
04492     sp_digit* dp;
04493     sp_digit* tmpa;
04494     sp_digit* tmpb;
04495 #endif
04496     sp_digit* r;
04497     sp_digit* qi;
04498     sp_digit* dq;
04499     sp_digit c;
04500     int err = MP_OKAY;
04501 
04502     (void)dm;
04503     (void)mm;
04504 
04505     if (*outLen < 256)
04506         err = MP_TO_E;
04507     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
04508         err = MP_READ_E;
04509 
04510 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
04511     if (err == MP_OKAY) {
04512         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
04513                                                               DYNAMIC_TYPE_RSA);
04514         if (t == NULL)
04515             err = MEMORY_E;
04516     }
04517     if (err == MP_OKAY) {
04518         a = t;
04519         p = a + 64 * 2;
04520         q = p + 32;
04521         qi = dq = dp = q + 32;
04522         tmpa = qi + 32;
04523         tmpb = tmpa + 64;
04524 
04525         r = t + 64;
04526     }
04527 #else
04528 #endif
04529 
04530     if (err == MP_OKAY) {
04531 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
04532         r = a;
04533         qi = dq = dp;
04534 #endif
04535         sp_2048_from_bin(a, 64, in, inLen);
04536         sp_2048_from_mp(p, 32, pm);
04537         sp_2048_from_mp(q, 32, qm);
04538         sp_2048_from_mp(dp, 32, dpm);
04539 
04540         err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
04541     }
04542     if (err == MP_OKAY) {
04543         sp_2048_from_mp(dq, 32, dqm);
04544         err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
04545     }
04546 
04547     if (err == MP_OKAY) {
04548         c = sp_2048_sub_in_place_32(tmpa, tmpb);
04549         c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
04550         sp_2048_cond_add_32(tmpa, tmpa, p, c);
04551 
04552         sp_2048_from_mp(qi, 32, qim);
04553         sp_2048_mul_32(tmpa, tmpa, qi);
04554         err = sp_2048_mod_32(tmpa, tmpa, p);
04555     }
04556 
04557     if (err == MP_OKAY) {
04558         sp_2048_mul_32(tmpa, q, tmpa);
04559         XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
04560         sp_2048_add_64(r, tmpb, tmpa);
04561 
04562         sp_2048_to_bin(r, out);
04563         *outLen = 256;
04564     }
04565 
04566 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
04567     if (t != NULL) {
04568         XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
04569         XFREE(t, NULL, DYNAMIC_TYPE_RSA);
04570     }
04571 #else
04572     XMEMSET(tmpa, 0, sizeof(tmpa));
04573     XMEMSET(tmpb, 0, sizeof(tmpb));
04574     XMEMSET(p,    0, sizeof(p));
04575     XMEMSET(q,    0, sizeof(q));
04576     XMEMSET(dp,   0, sizeof(dp));
04577 #endif
04578 
04579     return err;
04580 }
04581 #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
04582 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
04583 #endif /* WOLFSSL_HAVE_SP_RSA */
04584 #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
04585                                               !defined(WOLFSSL_RSA_PUBLIC_ONLY))
04586 /* Convert an array of sp_digit to an mp_int.
04587  *
04588  * a  A single precision integer.
04589  * r  A multi-precision integer.
04590  */
04591 static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
04592 {
04593     int err;
04594 
04595     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
04596     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
04597 #if DIGIT_BIT == 32
04598         XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
04599         r->used = 64;
04600         mp_clamp(r);
04601 #elif DIGIT_BIT < 32
04602         int i, j = 0, s = 0;
04603 
04604         r->dp[0] = 0;
04605         for (i = 0; i < 64; i++) {
04606             r->dp[j] |= (mp_digit)(a[i] << s);
04607             r->dp[j] &= (1L << DIGIT_BIT) - 1;
04608             s = DIGIT_BIT - s;
04609             r->dp[++j] = (mp_digit)(a[i] >> s);
04610             while (s + DIGIT_BIT <= 32) {
04611                 s += DIGIT_BIT;
04612                 r->dp[j++] &= (1L << DIGIT_BIT) - 1;
04613                 if (s == SP_WORD_SIZE) {
04614                     r->dp[j] = 0;
04615                 }
04616                 else {
04617                     r->dp[j] = (mp_digit)(a[i] >> s);
04618                 }
04619             }
04620             s = 32 - s;
04621         }
04622         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
04623         mp_clamp(r);
04624 #else
04625         int i, j = 0, s = 0;
04626 
04627         r->dp[0] = 0;
04628         for (i = 0; i < 64; i++) {
04629             r->dp[j] |= ((mp_digit)a[i]) << s;
04630             if (s + 32 >= DIGIT_BIT) {
04631     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
04632                 r->dp[j] &= (1L << DIGIT_BIT) - 1;
04633     #endif
04634                 s = DIGIT_BIT - s;
04635                 r->dp[++j] = a[i] >> s;
04636                 s = 32 - s;
04637             }
04638             else {
04639                 s += 32;
04640             }
04641         }
04642         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
04643         mp_clamp(r);
04644 #endif
04645     }
04646 
04647     return err;
04648 }
04649 
04650 /* Perform the modular exponentiation for Diffie-Hellman.
04651  *
04652  * base  Base. MP integer.
04653  * exp   Exponent. MP integer.
04654  * mod   Modulus. MP integer.
04655  * res   Result. MP integer.
04656  * returns 0 on success, MP_READ_E if there are too many bytes in an array
04657  * and MEMORY_E if memory allocation fails.
04658  */
04659 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
04660 {
04661     int err = MP_OKAY;
04662     sp_digit b[128], e[64], m[64];
04663     sp_digit* r = b;
04664     int expBits = mp_count_bits(exp);
04665 
04666     if (mp_count_bits(base) > 2048) {
04667         err = MP_READ_E;
04668     }
04669 
04670     if (err == MP_OKAY) {
04671         if (expBits > 2048) {
04672             err = MP_READ_E;
04673         }
04674     }
04675 
04676     if (err == MP_OKAY) {
04677         if (mp_count_bits(mod) != 2048) {
04678             err = MP_READ_E;
04679         }
04680     }
04681 
04682     if (err == MP_OKAY) {
04683         sp_2048_from_mp(b, 64, base);
04684         sp_2048_from_mp(e, 64, exp);
04685         sp_2048_from_mp(m, 64, mod);
04686 
04687         err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
04688     }
04689 
04690     if (err == MP_OKAY) {
04691         err = sp_2048_to_mp(r, res);
04692     }
04693 
04694     XMEMSET(e, 0, sizeof(e));
04695 
04696     return err;
04697 }
04698 
04699 #ifdef WOLFSSL_HAVE_SP_DH
04700 
04701 #ifdef HAVE_FFDHE_2048
04702 static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
04703 {
04704     __asm__ __volatile__ (
04705         "mov    r6, #31\n\t"
04706         "sub    r6, r6, %[n]\n\t"
04707         "add    %[a], %[a], #192\n\t"
04708         "add    %[r], %[r], #192\n\t"
04709         "ldr    r3, [%[a], #60]\n\t"
04710         "lsr    r4, r3, #1\n\t"
04711         "lsl    r3, r3, %[n]\n\t"
04712         "lsr    r4, r4, r6\n\t"
04713         "ldr    r2, [%[a], #56]\n\t"
04714         "str    r4, [%[r], #64]\n\t"
04715         "lsr    r5, r2, #1\n\t"
04716         "lsl    r2, r2, %[n]\n\t"
04717         "lsr    r5, r5, r6\n\t"
04718         "orr    r3, r3, r5\n\t"
04719         "ldr    r4, [%[a], #52]\n\t"
04720         "str    r3, [%[r], #60]\n\t"
04721         "lsr    r5, r4, #1\n\t"
04722         "lsl    r4, r4, %[n]\n\t"
04723         "lsr    r5, r5, r6\n\t"
04724         "orr    r2, r2, r5\n\t"
04725         "ldr    r3, [%[a], #48]\n\t"
04726         "str    r2, [%[r], #56]\n\t"
04727         "lsr    r5, r3, #1\n\t"
04728         "lsl    r3, r3, %[n]\n\t"
04729         "lsr    r5, r5, r6\n\t"
04730         "orr    r4, r4, r5\n\t"
04731         "ldr    r2, [%[a], #44]\n\t"
04732         "str    r4, [%[r], #52]\n\t"
04733         "lsr    r5, r2, #1\n\t"
04734         "lsl    r2, r2, %[n]\n\t"
04735         "lsr    r5, r5, r6\n\t"
04736         "orr    r3, r3, r5\n\t"
04737         "ldr    r4, [%[a], #40]\n\t"
04738         "str    r3, [%[r], #48]\n\t"
04739         "lsr    r5, r4, #1\n\t"
04740         "lsl    r4, r4, %[n]\n\t"
04741         "lsr    r5, r5, r6\n\t"
04742         "orr    r2, r2, r5\n\t"
04743         "ldr    r3, [%[a], #36]\n\t"
04744         "str    r2, [%[r], #44]\n\t"
04745         "lsr    r5, r3, #1\n\t"
04746         "lsl    r3, r3, %[n]\n\t"
04747         "lsr    r5, r5, r6\n\t"
04748         "orr    r4, r4, r5\n\t"
04749         "ldr    r2, [%[a], #32]\n\t"
04750         "str    r4, [%[r], #40]\n\t"
04751         "lsr    r5, r2, #1\n\t"
04752         "lsl    r2, r2, %[n]\n\t"
04753         "lsr    r5, r5, r6\n\t"
04754         "orr    r3, r3, r5\n\t"
04755         "ldr    r4, [%[a], #28]\n\t"
04756         "str    r3, [%[r], #36]\n\t"
04757         "lsr    r5, r4, #1\n\t"
04758         "lsl    r4, r4, %[n]\n\t"
04759         "lsr    r5, r5, r6\n\t"
04760         "orr    r2, r2, r5\n\t"
04761         "ldr    r3, [%[a], #24]\n\t"
04762         "str    r2, [%[r], #32]\n\t"
04763         "lsr    r5, r3, #1\n\t"
04764         "lsl    r3, r3, %[n]\n\t"
04765         "lsr    r5, r5, r6\n\t"
04766         "orr    r4, r4, r5\n\t"
04767         "ldr    r2, [%[a], #20]\n\t"
04768         "str    r4, [%[r], #28]\n\t"
04769         "lsr    r5, r2, #1\n\t"
04770         "lsl    r2, r2, %[n]\n\t"
04771         "lsr    r5, r5, r6\n\t"
04772         "orr    r3, r3, r5\n\t"
04773         "ldr    r4, [%[a], #16]\n\t"
04774         "str    r3, [%[r], #24]\n\t"
04775         "lsr    r5, r4, #1\n\t"
04776         "lsl    r4, r4, %[n]\n\t"
04777         "lsr    r5, r5, r6\n\t"
04778         "orr    r2, r2, r5\n\t"
04779         "ldr    r3, [%[a], #12]\n\t"
04780         "str    r2, [%[r], #20]\n\t"
04781         "lsr    r5, r3, #1\n\t"
04782         "lsl    r3, r3, %[n]\n\t"
04783         "lsr    r5, r5, r6\n\t"
04784         "orr    r4, r4, r5\n\t"
04785         "ldr    r2, [%[a], #8]\n\t"
04786         "str    r4, [%[r], #16]\n\t"
04787         "lsr    r5, r2, #1\n\t"
04788         "lsl    r2, r2, %[n]\n\t"
04789         "lsr    r5, r5, r6\n\t"
04790         "orr    r3, r3, r5\n\t"
04791         "ldr    r4, [%[a], #4]\n\t"
04792         "str    r3, [%[r], #12]\n\t"
04793         "lsr    r5, r4, #1\n\t"
04794         "lsl    r4, r4, %[n]\n\t"
04795         "lsr    r5, r5, r6\n\t"
04796         "orr    r2, r2, r5\n\t"
04797         "ldr    r3, [%[a], #0]\n\t"
04798         "str    r2, [%[r], #8]\n\t"
04799         "lsr    r5, r3, #1\n\t"
04800         "lsl    r3, r3, %[n]\n\t"
04801         "lsr    r5, r5, r6\n\t"
04802         "orr    r4, r4, r5\n\t"
04803         "sub    %[a], %[a], #64\n\t"
04804         "sub    %[r], %[r], #64\n\t"
04805         "ldr    r2, [%[a], #60]\n\t"
04806         "str    r4, [%[r], #68]\n\t"
04807         "lsr    r5, r2, #1\n\t"
04808         "lsl    r2, r2, %[n]\n\t"
04809         "lsr    r5, r5, r6\n\t"
04810         "orr    r3, r3, r5\n\t"
04811         "ldr    r4, [%[a], #56]\n\t"
04812         "str    r3, [%[r], #64]\n\t"
04813         "lsr    r5, r4, #1\n\t"
04814         "lsl    r4, r4, %[n]\n\t"
04815         "lsr    r5, r5, r6\n\t"
04816         "orr    r2, r2, r5\n\t"
04817         "ldr    r3, [%[a], #52]\n\t"
04818         "str    r2, [%[r], #60]\n\t"
04819         "lsr    r5, r3, #1\n\t"
04820         "lsl    r3, r3, %[n]\n\t"
04821         "lsr    r5, r5, r6\n\t"
04822         "orr    r4, r4, r5\n\t"
04823         "ldr    r2, [%[a], #48]\n\t"
04824         "str    r4, [%[r], #56]\n\t"
04825         "lsr    r5, r2, #1\n\t"
04826         "lsl    r2, r2, %[n]\n\t"
04827         "lsr    r5, r5, r6\n\t"
04828         "orr    r3, r3, r5\n\t"
04829         "ldr    r4, [%[a], #44]\n\t"
04830         "str    r3, [%[r], #52]\n\t"
04831         "lsr    r5, r4, #1\n\t"
04832         "lsl    r4, r4, %[n]\n\t"
04833         "lsr    r5, r5, r6\n\t"
04834         "orr    r2, r2, r5\n\t"
04835         "ldr    r3, [%[a], #40]\n\t"
04836         "str    r2, [%[r], #48]\n\t"
04837         "lsr    r5, r3, #1\n\t"
04838         "lsl    r3, r3, %[n]\n\t"
04839         "lsr    r5, r5, r6\n\t"
04840         "orr    r4, r4, r5\n\t"
04841         "ldr    r2, [%[a], #36]\n\t"
04842         "str    r4, [%[r], #44]\n\t"
04843         "lsr    r5, r2, #1\n\t"
04844         "lsl    r2, r2, %[n]\n\t"
04845         "lsr    r5, r5, r6\n\t"
04846         "orr    r3, r3, r5\n\t"
04847         "ldr    r4, [%[a], #32]\n\t"
04848         "str    r3, [%[r], #40]\n\t"
04849         "lsr    r5, r4, #1\n\t"
04850         "lsl    r4, r4, %[n]\n\t"
04851         "lsr    r5, r5, r6\n\t"
04852         "orr    r2, r2, r5\n\t"
04853         "ldr    r3, [%[a], #28]\n\t"
04854         "str    r2, [%[r], #36]\n\t"
04855         "lsr    r5, r3, #1\n\t"
04856         "lsl    r3, r3, %[n]\n\t"
04857         "lsr    r5, r5, r6\n\t"
04858         "orr    r4, r4, r5\n\t"
04859         "ldr    r2, [%[a], #24]\n\t"
04860         "str    r4, [%[r], #32]\n\t"
04861         "lsr    r5, r2, #1\n\t"
04862         "lsl    r2, r2, %[n]\n\t"
04863         "lsr    r5, r5, r6\n\t"
04864         "orr    r3, r3, r5\n\t"
04865         "ldr    r4, [%[a], #20]\n\t"
04866         "str    r3, [%[r], #28]\n\t"
04867         "lsr    r5, r4, #1\n\t"
04868         "lsl    r4, r4, %[n]\n\t"
04869         "lsr    r5, r5, r6\n\t"
04870         "orr    r2, r2, r5\n\t"
04871         "ldr    r3, [%[a], #16]\n\t"
04872         "str    r2, [%[r], #24]\n\t"
04873         "lsr    r5, r3, #1\n\t"
04874         "lsl    r3, r3, %[n]\n\t"
04875         "lsr    r5, r5, r6\n\t"
04876         "orr    r4, r4, r5\n\t"
04877         "ldr    r2, [%[a], #12]\n\t"
04878         "str    r4, [%[r], #20]\n\t"
04879         "lsr    r5, r2, #1\n\t"
04880         "lsl    r2, r2, %[n]\n\t"
04881         "lsr    r5, r5, r6\n\t"
04882         "orr    r3, r3, r5\n\t"
04883         "ldr    r4, [%[a], #8]\n\t"
04884         "str    r3, [%[r], #16]\n\t"
04885         "lsr    r5, r4, #1\n\t"
04886         "lsl    r4, r4, %[n]\n\t"
04887         "lsr    r5, r5, r6\n\t"
04888         "orr    r2, r2, r5\n\t"
04889         "ldr    r3, [%[a], #4]\n\t"
04890         "str    r2, [%[r], #12]\n\t"
04891         "lsr    r5, r3, #1\n\t"
04892         "lsl    r3, r3, %[n]\n\t"
04893         "lsr    r5, r5, r6\n\t"
04894         "orr    r4, r4, r5\n\t"
04895         "ldr    r2, [%[a], #0]\n\t"
04896         "str    r4, [%[r], #8]\n\t"
04897         "lsr    r5, r2, #1\n\t"
04898         "lsl    r2, r2, %[n]\n\t"
04899         "lsr    r5, r5, r6\n\t"
04900         "orr    r3, r3, r5\n\t"
04901         "sub    %[a], %[a], #64\n\t"
04902         "sub    %[r], %[r], #64\n\t"
04903         "ldr    r4, [%[a], #60]\n\t"
04904         "str    r3, [%[r], #68]\n\t"
04905         "lsr    r5, r4, #1\n\t"
04906         "lsl    r4, r4, %[n]\n\t"
04907         "lsr    r5, r5, r6\n\t"
04908         "orr    r2, r2, r5\n\t"
04909         "ldr    r3, [%[a], #56]\n\t"
04910         "str    r2, [%[r], #64]\n\t"
04911         "lsr    r5, r3, #1\n\t"
04912         "lsl    r3, r3, %[n]\n\t"
04913         "lsr    r5, r5, r6\n\t"
04914         "orr    r4, r4, r5\n\t"
04915         "ldr    r2, [%[a], #52]\n\t"
04916         "str    r4, [%[r], #60]\n\t"
04917         "lsr    r5, r2, #1\n\t"
04918         "lsl    r2, r2, %[n]\n\t"
04919         "lsr    r5, r5, r6\n\t"
04920         "orr    r3, r3, r5\n\t"
04921         "ldr    r4, [%[a], #48]\n\t"
04922         "str    r3, [%[r], #56]\n\t"
04923         "lsr    r5, r4, #1\n\t"
04924         "lsl    r4, r4, %[n]\n\t"
04925         "lsr    r5, r5, r6\n\t"
04926         "orr    r2, r2, r5\n\t"
04927         "ldr    r3, [%[a], #44]\n\t"
04928         "str    r2, [%[r], #52]\n\t"
04929         "lsr    r5, r3, #1\n\t"
04930         "lsl    r3, r3, %[n]\n\t"
04931         "lsr    r5, r5, r6\n\t"
04932         "orr    r4, r4, r5\n\t"
04933         "ldr    r2, [%[a], #40]\n\t"
04934         "str    r4, [%[r], #48]\n\t"
04935         "lsr    r5, r2, #1\n\t"
04936         "lsl    r2, r2, %[n]\n\t"
04937         "lsr    r5, r5, r6\n\t"
04938         "orr    r3, r3, r5\n\t"
04939         "ldr    r4, [%[a], #36]\n\t"
04940         "str    r3, [%[r], #44]\n\t"
04941         "lsr    r5, r4, #1\n\t"
04942         "lsl    r4, r4, %[n]\n\t"
04943         "lsr    r5, r5, r6\n\t"
04944         "orr    r2, r2, r5\n\t"
04945         "ldr    r3, [%[a], #32]\n\t"
04946         "str    r2, [%[r], #40]\n\t"
04947         "lsr    r5, r3, #1\n\t"
04948         "lsl    r3, r3, %[n]\n\t"
04949         "lsr    r5, r5, r6\n\t"
04950         "orr    r4, r4, r5\n\t"
04951         "ldr    r2, [%[a], #28]\n\t"
04952         "str    r4, [%[r], #36]\n\t"
04953         "lsr    r5, r2, #1\n\t"
04954         "lsl    r2, r2, %[n]\n\t"
04955         "lsr    r5, r5, r6\n\t"
04956         "orr    r3, r3, r5\n\t"
04957         "ldr    r4, [%[a], #24]\n\t"
04958         "str    r3, [%[r], #32]\n\t"
04959         "lsr    r5, r4, #1\n\t"
04960         "lsl    r4, r4, %[n]\n\t"
04961         "lsr    r5, r5, r6\n\t"
04962         "orr    r2, r2, r5\n\t"
04963         "ldr    r3, [%[a], #20]\n\t"
04964         "str    r2, [%[r], #28]\n\t"
04965         "lsr    r5, r3, #1\n\t"
04966         "lsl    r3, r3, %[n]\n\t"
04967         "lsr    r5, r5, r6\n\t"
04968         "orr    r4, r4, r5\n\t"
04969         "ldr    r2, [%[a], #16]\n\t"
04970         "str    r4, [%[r], #24]\n\t"
04971         "lsr    r5, r2, #1\n\t"
04972         "lsl    r2, r2, %[n]\n\t"
04973         "lsr    r5, r5, r6\n\t"
04974         "orr    r3, r3, r5\n\t"
04975         "ldr    r4, [%[a], #12]\n\t"
04976         "str    r3, [%[r], #20]\n\t"
04977         "lsr    r5, r4, #1\n\t"
04978         "lsl    r4, r4, %[n]\n\t"
04979         "lsr    r5, r5, r6\n\t"
04980         "orr    r2, r2, r5\n\t"
04981         "ldr    r3, [%[a], #8]\n\t"
04982         "str    r2, [%[r], #16]\n\t"
04983         "lsr    r5, r3, #1\n\t"
04984         "lsl    r3, r3, %[n]\n\t"
04985         "lsr    r5, r5, r6\n\t"
04986         "orr    r4, r4, r5\n\t"
04987         "ldr    r2, [%[a], #4]\n\t"
04988         "str    r4, [%[r], #12]\n\t"
04989         "lsr    r5, r2, #1\n\t"
04990         "lsl    r2, r2, %[n]\n\t"
04991         "lsr    r5, r5, r6\n\t"
04992         "orr    r3, r3, r5\n\t"
04993         "ldr    r4, [%[a], #0]\n\t"
04994         "str    r3, [%[r], #8]\n\t"
04995         "lsr    r5, r4, #1\n\t"
04996         "lsl    r4, r4, %[n]\n\t"
04997         "lsr    r5, r5, r6\n\t"
04998         "orr    r2, r2, r5\n\t"
04999         "sub    %[a], %[a], #64\n\t"
05000         "sub    %[r], %[r], #64\n\t"
05001         "ldr    r3, [%[a], #60]\n\t"
05002         "str    r2, [%[r], #68]\n\t"
05003         "lsr    r5, r3, #1\n\t"
05004         "lsl    r3, r3, %[n]\n\t"
05005         "lsr    r5, r5, r6\n\t"
05006         "orr    r4, r4, r5\n\t"
05007         "ldr    r2, [%[a], #56]\n\t"
05008         "str    r4, [%[r], #64]\n\t"
05009         "lsr    r5, r2, #1\n\t"
05010         "lsl    r2, r2, %[n]\n\t"
05011         "lsr    r5, r5, r6\n\t"
05012         "orr    r3, r3, r5\n\t"
05013         "ldr    r4, [%[a], #52]\n\t"
05014         "str    r3, [%[r], #60]\n\t"
05015         "lsr    r5, r4, #1\n\t"
05016         "lsl    r4, r4, %[n]\n\t"
05017         "lsr    r5, r5, r6\n\t"
05018         "orr    r2, r2, r5\n\t"
05019         "ldr    r3, [%[a], #48]\n\t"
05020         "str    r2, [%[r], #56]\n\t"
05021         "lsr    r5, r3, #1\n\t"
05022         "lsl    r3, r3, %[n]\n\t"
05023         "lsr    r5, r5, r6\n\t"
05024         "orr    r4, r4, r5\n\t"
05025         "ldr    r2, [%[a], #44]\n\t"
05026         "str    r4, [%[r], #52]\n\t"
05027         "lsr    r5, r2, #1\n\t"
05028         "lsl    r2, r2, %[n]\n\t"
05029         "lsr    r5, r5, r6\n\t"
05030         "orr    r3, r3, r5\n\t"
05031         "ldr    r4, [%[a], #40]\n\t"
05032         "str    r3, [%[r], #48]\n\t"
05033         "lsr    r5, r4, #1\n\t"
05034         "lsl    r4, r4, %[n]\n\t"
05035         "lsr    r5, r5, r6\n\t"
05036         "orr    r2, r2, r5\n\t"
05037         "ldr    r3, [%[a], #36]\n\t"
05038         "str    r2, [%[r], #44]\n\t"
05039         "lsr    r5, r3, #1\n\t"
05040         "lsl    r3, r3, %[n]\n\t"
05041         "lsr    r5, r5, r6\n\t"
05042         "orr    r4, r4, r5\n\t"
05043         "ldr    r2, [%[a], #32]\n\t"
05044         "str    r4, [%[r], #40]\n\t"
05045         "lsr    r5, r2, #1\n\t"
05046         "lsl    r2, r2, %[n]\n\t"
05047         "lsr    r5, r5, r6\n\t"
05048         "orr    r3, r3, r5\n\t"
05049         "ldr    r4, [%[a], #28]\n\t"
05050         "str    r3, [%[r], #36]\n\t"
05051         "lsr    r5, r4, #1\n\t"
05052         "lsl    r4, r4, %[n]\n\t"
05053         "lsr    r5, r5, r6\n\t"
05054         "orr    r2, r2, r5\n\t"
05055         "ldr    r3, [%[a], #24]\n\t"
05056         "str    r2, [%[r], #32]\n\t"
05057         "lsr    r5, r3, #1\n\t"
05058         "lsl    r3, r3, %[n]\n\t"
05059         "lsr    r5, r5, r6\n\t"
05060         "orr    r4, r4, r5\n\t"
05061         "ldr    r2, [%[a], #20]\n\t"
05062         "str    r4, [%[r], #28]\n\t"
05063         "lsr    r5, r2, #1\n\t"
05064         "lsl    r2, r2, %[n]\n\t"
05065         "lsr    r5, r5, r6\n\t"
05066         "orr    r3, r3, r5\n\t"
05067         "ldr    r4, [%[a], #16]\n\t"
05068         "str    r3, [%[r], #24]\n\t"
05069         "lsr    r5, r4, #1\n\t"
05070         "lsl    r4, r4, %[n]\n\t"
05071         "lsr    r5, r5, r6\n\t"
05072         "orr    r2, r2, r5\n\t"
05073         "ldr    r3, [%[a], #12]\n\t"
05074         "str    r2, [%[r], #20]\n\t"
05075         "lsr    r5, r3, #1\n\t"
05076         "lsl    r3, r3, %[n]\n\t"
05077         "lsr    r5, r5, r6\n\t"
05078         "orr    r4, r4, r5\n\t"
05079         "ldr    r2, [%[a], #8]\n\t"
05080         "str    r4, [%[r], #16]\n\t"
05081         "lsr    r5, r2, #1\n\t"
05082         "lsl    r2, r2, %[n]\n\t"
05083         "lsr    r5, r5, r6\n\t"
05084         "orr    r3, r3, r5\n\t"
05085         "ldr    r4, [%[a], #4]\n\t"
05086         "str    r3, [%[r], #12]\n\t"
05087         "lsr    r5, r4, #1\n\t"
05088         "lsl    r4, r4, %[n]\n\t"
05089         "lsr    r5, r5, r6\n\t"
05090         "orr    r2, r2, r5\n\t"
05091         "ldr    r3, [%[a], #0]\n\t"
05092         "str    r2, [%[r], #8]\n\t"
05093         "lsr    r5, r3, #1\n\t"
05094         "lsl    r3, r3, %[n]\n\t"
05095         "lsr    r5, r5, r6\n\t"
05096         "orr    r4, r4, r5\n\t"
05097         "str    r3, [%[r]]\n\t"
05098         "str    r4, [%[r], #4]\n\t"
05099         :
05100         : [r] "r" (r), [a] "r" (a), [n] "r" (n)
05101         : "memory", "r2", "r3", "r4", "r5", "r6"
05102     );
05103 }
05104 
05105 /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
05106  *
05107  * r     A single precision number that is the result of the operation.
05108  * e     A single precision number that is the exponent.
05109  * bits  The number of bits in the exponent.
05110  * m     A single precision number that is the modulus.
05111  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
05112  */
05113 static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
05114         const sp_digit* m)
05115 {
05116 #ifndef WOLFSSL_SMALL_STACK
05117     sp_digit nd[128];
05118     sp_digit td[65];
05119 #else
05120     sp_digit* td;
05121 #endif
05122     sp_digit* norm;
05123     sp_digit* tmp;
05124     sp_digit mp = 1;
05125     sp_digit n, o;
05126     sp_digit mask;
05127     int i;
05128     int c, y;
05129     int err = MP_OKAY;
05130 
05131 #ifdef WOLFSSL_SMALL_STACK
05132     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
05133                             DYNAMIC_TYPE_TMP_BUFFER);
05134     if (td == NULL) {
05135         err = MEMORY_E;
05136     }
05137 #endif
05138 
05139     if (err == MP_OKAY) {
05140 #ifdef WOLFSSL_SMALL_STACK
05141         norm = td;
05142         tmp  = td + 128;
05143 #else
05144         norm = nd;
05145         tmp  = td;
05146 #endif
05147 
05148         sp_2048_mont_setup(m, &mp);
05149         sp_2048_mont_norm_64(norm, m);
05150 
05151         i = (bits - 1) / 32;
05152         n = e[i--];
05153         c = bits & 31;
05154         if (c == 0) {
05155             c = 32;
05156         }
05157         c -= bits % 5;
05158         if (c == 32) {
05159             c = 27;
05160         }
05161         y = (int)(n >> c);
05162         n <<= 32 - c;
05163         sp_2048_lshift_64(r, norm, y);
05164         for (; i>=0 || c>=5; ) {
05165             if (c == 0) {
05166                 n = e[i--];
05167                 y = n >> 27;
05168                 n <<= 5;
05169                 c = 27;
05170             }
05171             else if (c < 5) {
05172                 y = n >> 27;
05173                 n = e[i--];
05174                 c = 5 - c;
05175                 y |= n >> (32 - c);
05176                 n <<= c;
05177                 c = 32 - c;
05178             }
05179             else {
05180                 y = (n >> 27) & 0x1f;
05181                 n <<= 5;
05182                 c -= 5;
05183             }
05184 
05185             sp_2048_mont_sqr_64(r, r, m, mp);
05186             sp_2048_mont_sqr_64(r, r, m, mp);
05187             sp_2048_mont_sqr_64(r, r, m, mp);
05188             sp_2048_mont_sqr_64(r, r, m, mp);
05189             sp_2048_mont_sqr_64(r, r, m, mp);
05190 
05191             sp_2048_lshift_64(r, r, y);
05192             sp_2048_mul_d_64(tmp, norm, r[64]);
05193             r[64] = 0;
05194             o = sp_2048_add_64(r, r, tmp);
05195             sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
05196         }
05197 
05198         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
05199         sp_2048_mont_reduce_64(r, m, mp);
05200 
05201         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
05202         sp_2048_cond_sub_64(r, r, m, mask);
05203     }
05204 
05205 #ifdef WOLFSSL_SMALL_STACK
05206     if (td != NULL) {
05207         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
05208     }
05209 #endif
05210 
05211     return err;
05212 }
05213 #endif /* HAVE_FFDHE_2048 */
05214 
05215 /* Perform the modular exponentiation for Diffie-Hellman.
05216  *
05217  * base     Base.
05218  * exp      Array of bytes that is the exponent.
05219  * expLen   Length of data, in bytes, in exponent.
05220  * mod      Modulus.
05221  * out      Buffer to hold big-endian bytes of exponentiation result.
05222  *          Must be at least 256 bytes long.
05223  * outLen   Length, in bytes, of exponentiation result.
05224  * returns 0 on success, MP_READ_E if there are too many bytes in an array
05225  * and MEMORY_E if memory allocation fails.
05226  */
05227 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
05228     mp_int* mod, byte* out, word32* outLen)
05229 {
05230     int err = MP_OKAY;
05231     sp_digit b[128], e[64], m[64];
05232     sp_digit* r = b;
05233     word32 i;
05234 
05235     if (mp_count_bits(base) > 2048) {
05236         err = MP_READ_E;
05237     }
05238 
05239     if (err == MP_OKAY) {
05240         if (expLen > 256) {
05241             err = MP_READ_E;
05242         }
05243     }
05244 
05245     if (err == MP_OKAY) {
05246         if (mp_count_bits(mod) != 2048) {
05247             err = MP_READ_E;
05248         }
05249     }
05250 
05251     if (err == MP_OKAY) {
05252         sp_2048_from_mp(b, 64, base);
05253         sp_2048_from_bin(e, 64, exp, expLen);
05254         sp_2048_from_mp(m, 64, mod);
05255 
05256     #ifdef HAVE_FFDHE_2048
05257         if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
05258             err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
05259         else
05260     #endif
05261             err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
05262 
05263     }
05264 
05265     if (err == MP_OKAY) {
05266         sp_2048_to_bin(r, out);
05267         *outLen = 256;
05268         for (i=0; i<256 && out[i] == 0; i++) {
05269         }
05270         *outLen -= i;
05271         XMEMMOVE(out, out + i, *outLen);
05272 
05273     }
05274 
05275     XMEMSET(e, 0, sizeof(e));
05276 
05277     return err;
05278 }
05279 #endif /* WOLFSSL_HAVE_SP_DH */
05280 
05281 /* Perform the modular exponentiation for Diffie-Hellman.
05282  *
05283  * base  Base. MP integer.
05284  * exp   Exponent. MP integer.
05285  * mod   Modulus. MP integer.
05286  * res   Result. MP integer.
05287  * returns 0 on success, MP_READ_E if there are too many bytes in an array
05288  * and MEMORY_E if memory allocation fails.
05289  */
05290 int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
05291 {
05292     int err = MP_OKAY;
05293     sp_digit b[64], e[32], m[32];
05294     sp_digit* r = b;
05295     int expBits = mp_count_bits(exp);
05296 
05297     if (mp_count_bits(base) > 1024) {
05298         err = MP_READ_E;
05299     }
05300 
05301     if (err == MP_OKAY) {
05302         if (expBits > 1024) {
05303             err = MP_READ_E;
05304         }
05305     }
05306 
05307     if (err == MP_OKAY) {
05308         if (mp_count_bits(mod) != 1024) {
05309             err = MP_READ_E;
05310         }
05311     }
05312 
05313     if (err == MP_OKAY) {
05314         sp_2048_from_mp(b, 32, base);
05315         sp_2048_from_mp(e, 32, exp);
05316         sp_2048_from_mp(m, 32, mod);
05317 
05318         err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
05319     }
05320 
05321     if (err == MP_OKAY) {
05322         XMEMSET(r + 32, 0, sizeof(*r) * 32U);
05323         err = sp_2048_to_mp(r, res);
05324         res->used = mod->used;
05325         mp_clamp(res);
05326     }
05327 
05328     XMEMSET(e, 0, sizeof(e));
05329 
05330     return err;
05331 }
05332 
05333 #endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
05334 
05335 #endif /* !WOLFSSL_SP_NO_2048 */
05336 
05337 #ifndef WOLFSSL_SP_NO_3072
05338 /* Read big endian unsigned byte array into r.
05339  *
05340  * r  A single precision integer.
05341  * size  Maximum number of bytes to convert
05342  * a  Byte array.
05343  * n  Number of bytes in array to read.
05344  */
05345 static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
05346 {
05347     int i, j = 0;
05348     word32 s = 0;
05349 
05350     r[0] = 0;
05351     for (i = n-1; i >= 0; i--) {
05352         r[j] |= (((sp_digit)a[i]) << s);
05353         if (s >= 24U) {
05354             r[j] &= 0xffffffff;
05355             s = 32U - s;
05356             if (j + 1 >= size) {
05357                 break;
05358             }
05359             r[++j] = (sp_digit)a[i] >> s;
05360             s = 8U - s;
05361         }
05362         else {
05363             s += 8U;
05364         }
05365     }
05366 
05367     for (j++; j < size; j++) {
05368         r[j] = 0;
05369     }
05370 }
05371 
05372 /* Convert an mp_int to an array of sp_digit.
05373  *
05374  * r  A single precision integer.
05375  * size  Maximum number of bytes to convert
05376  * a  A multi-precision integer.
05377  */
05378 static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
05379 {
05380 #if DIGIT_BIT == 32
05381     int j;
05382 
05383     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
05384 
05385     for (j = a->used; j < size; j++) {
05386         r[j] = 0;
05387     }
05388 #elif DIGIT_BIT > 32
05389     int i, j = 0;
05390     word32 s = 0;
05391 
05392     r[0] = 0;
05393     for (i = 0; i < a->used && j < size; i++) {
05394         r[j] |= ((sp_digit)a->dp[i] << s);
05395         r[j] &= 0xffffffff;
05396         s = 32U - s;
05397         if (j + 1 >= size) {
05398             break;
05399         }
05400         /* lint allow cast of mismatch word32 and mp_digit */
05401         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
05402         while ((s + 32U) <= (word32)DIGIT_BIT) {
05403             s += 32U;
05404             r[j] &= 0xffffffff;
05405             if (j + 1 >= size) {
05406                 break;
05407             }
05408             if (s < (word32)DIGIT_BIT) {
05409                 /* lint allow cast of mismatch word32 and mp_digit */
05410                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
05411             }
05412             else {
05413                 r[++j] = 0L;
05414             }
05415         }
05416         s = (word32)DIGIT_BIT - s;
05417     }
05418 
05419     for (j++; j < size; j++) {
05420         r[j] = 0;
05421     }
05422 #else
05423     int i, j = 0, s = 0;
05424 
05425     r[0] = 0;
05426     for (i = 0; i < a->used && j < size; i++) {
05427         r[j] |= ((sp_digit)a->dp[i]) << s;
05428         if (s + DIGIT_BIT >= 32) {
05429             r[j] &= 0xffffffff;
05430             if (j + 1 >= size) {
05431                 break;
05432             }
05433             s = 32 - s;
05434             if (s == DIGIT_BIT) {
05435                 r[++j] = 0;
05436                 s = 0;
05437             }
05438             else {
05439                 r[++j] = a->dp[i] >> s;
05440                 s = DIGIT_BIT - s;
05441             }
05442         }
05443         else {
05444             s += DIGIT_BIT;
05445         }
05446     }
05447 
05448     for (j++; j < size; j++) {
05449         r[j] = 0;
05450     }
05451 #endif
05452 }
05453 
05454 /* Write r as big endian to byte array.
05455  * Fixed length number of bytes written: 384
05456  *
05457  * r  A single precision integer.
05458  * a  Byte array.
05459  */
05460 static void sp_3072_to_bin(sp_digit* r, byte* a)
05461 {
05462     int i, j, s = 0, b;
05463 
05464     j = 3072 / 8 - 1;
05465     a[j] = 0;
05466     for (i=0; i<96 && j>=0; i++) {
05467         b = 0;
05468         /* lint allow cast of mismatch sp_digit and int */
05469         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
05470         b += 8 - s;
05471         if (j < 0) {
05472             break;
05473         }
05474         while (b < 32) {
05475             a[j--] = (byte)(r[i] >> b);
05476             b += 8;
05477             if (j < 0) {
05478                 break;
05479             }
05480         }
05481         s = 8 - (b - 32);
05482         if (j >= 0) {
05483             a[j] = 0;
05484         }
05485         if (s != 0) {
05486             j++;
05487         }
05488     }
05489 }
05490 
05491 #ifndef WOLFSSL_SP_SMALL
05492 /* Multiply a and b into r. (r = a * b)
05493  *
05494  * r  A single precision integer.
05495  * a  A single precision integer.
05496  * b  A single precision integer.
05497  */
05498 SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
05499         const sp_digit* b)
05500 {
05501     sp_digit tmp[12 * 2];
05502     __asm__ __volatile__ (
05503         "mov    r3, #0\n\t"
05504         "mov    r4, #0\n\t"
05505         "mov    r8, r3\n\t"
05506         "mov    r11, %[r]\n\t"
05507         "mov    r9, %[a]\n\t"
05508         "mov    r10, %[b]\n\t"
05509         "mov    r6, #48\n\t"
05510         "add    r6, r9\n\t"
05511         "mov    r12, r6\n\t"
05512         "\n1:\n\t"
05513         "mov    %[r], #0\n\t"
05514         "mov    r5, #0\n\t"
05515         "mov    r6, #44\n\t"
05516         "mov    %[a], r8\n\t"
05517         "sub    %[a], r6\n\t"
05518         "sbc    r6, r6\n\t"
05519         "mvn    r6, r6\n\t"
05520         "and    %[a], r6\n\t"
05521         "mov    %[b], r8\n\t"
05522         "sub    %[b], %[a]\n\t"
05523         "add    %[a], r9\n\t"
05524         "add    %[b], r10\n\t"
05525         "\n2:\n\t"
05526         "# Multiply Start\n\t"
05527         "ldr    r6, [%[a]]\n\t"
05528         "ldr    r7, [%[b]]\n\t"
05529         "lsl    r6, r6, #16\n\t"
05530         "lsl    r7, r7, #16\n\t"
05531         "lsr    r6, r6, #16\n\t"
05532         "lsr    r7, r7, #16\n\t"
05533         "mul    r7, r6\n\t"
05534         "add    r3, r7\n\t"
05535         "adc    r4, %[r]\n\t"
05536         "adc    r5, %[r]\n\t"
05537         "ldr    r7, [%[b]]\n\t"
05538         "lsr    r7, r7, #16\n\t"
05539         "mul    r6, r7\n\t"
05540         "lsr    r7, r6, #16\n\t"
05541         "lsl    r6, r6, #16\n\t"
05542         "add    r3, r6\n\t"
05543         "adc    r4, r7\n\t"
05544         "adc    r5, %[r]\n\t"
05545         "ldr    r6, [%[a]]\n\t"
05546         "ldr    r7, [%[b]]\n\t"
05547         "lsr    r6, r6, #16\n\t"
05548         "lsr    r7, r7, #16\n\t"
05549         "mul    r7, r6\n\t"
05550         "add    r4, r7\n\t"
05551         "adc    r5, %[r]\n\t"
05552         "ldr    r7, [%[b]]\n\t"
05553         "lsl    r7, r7, #16\n\t"
05554         "lsr    r7, r7, #16\n\t"
05555         "mul    r6, r7\n\t"
05556         "lsr    r7, r6, #16\n\t"
05557         "lsl    r6, r6, #16\n\t"
05558         "add    r3, r6\n\t"
05559         "adc    r4, r7\n\t"
05560         "adc    r5, %[r]\n\t"
05561         "# Multiply Done\n\t"
05562         "add    %[a], #4\n\t"
05563         "sub    %[b], #4\n\t"
05564         "cmp    %[a], r12\n\t"
05565         "beq    3f\n\t"
05566         "mov    r6, r8\n\t"
05567         "add    r6, r9\n\t"
05568         "cmp    %[a], r6\n\t"
05569         "ble    2b\n\t"
05570         "\n3:\n\t"
05571         "mov    %[r], r11\n\t"
05572         "mov    r7, r8\n\t"
05573         "str    r3, [%[r], r7]\n\t"
05574         "mov    r3, r4\n\t"
05575         "mov    r4, r5\n\t"
05576         "add    r7, #4\n\t"
05577         "mov    r8, r7\n\t"
05578         "mov    r6, #88\n\t"
05579         "cmp    r7, r6\n\t"
05580         "ble    1b\n\t"
05581         "str    r3, [%[r], r7]\n\t"
05582         "mov    %[a], r9\n\t"
05583         "mov    %[b], r10\n\t"
05584         :
05585         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
05586         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
05587     );
05588 
05589     XMEMCPY(r, tmp, sizeof(tmp));
05590 }
05591 
05592 /* Square a and put result in r. (r = a * a)
05593  *
05594  * r  A single precision integer.
05595  * a  A single precision integer.
05596  */
05597 SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
05598 {
05599     __asm__ __volatile__ (
05600         "mov    r3, #0\n\t"
05601         "mov    r4, #0\n\t"
05602         "mov    r5, #0\n\t"
05603         "mov    r8, r3\n\t"
05604         "mov    r11, %[r]\n\t"
05605         "mov    r6, #96\n\t"
05606         "neg    r6, r6\n\t"
05607         "add    sp, r6\n\t"
05608         "mov    r10, sp\n\t"
05609         "mov    r9, %[a]\n\t"
05610         "\n1:\n\t"
05611         "mov    %[r], #0\n\t"
05612         "mov    r6, #44\n\t"
05613         "mov    %[a], r8\n\t"
05614         "sub    %[a], r6\n\t"
05615         "sbc    r6, r6\n\t"
05616         "mvn    r6, r6\n\t"
05617         "and    %[a], r6\n\t"
05618         "mov    r2, r8\n\t"
05619         "sub    r2, %[a]\n\t"
05620         "add    %[a], r9\n\t"
05621         "add    r2, r9\n\t"
05622         "\n2:\n\t"
05623         "cmp    r2, %[a]\n\t"
05624         "beq    4f\n\t"
05625         "# Multiply * 2: Start\n\t"
05626         "ldr    r6, [%[a]]\n\t"
05627         "ldr    r7, [r2]\n\t"
05628         "lsl    r6, r6, #16\n\t"
05629         "lsl    r7, r7, #16\n\t"
05630         "lsr    r6, r6, #16\n\t"
05631         "lsr    r7, r7, #16\n\t"
05632         "mul    r7, r6\n\t"
05633         "add    r3, r7\n\t"
05634         "adc    r4, %[r]\n\t"
05635         "adc    r5, %[r]\n\t"
05636         "add    r3, r7\n\t"
05637         "adc    r4, %[r]\n\t"
05638         "adc    r5, %[r]\n\t"
05639         "ldr    r7, [r2]\n\t"
05640         "lsr    r7, r7, #16\n\t"
05641         "mul    r6, r7\n\t"
05642         "lsr    r7, r6, #16\n\t"
05643         "lsl    r6, r6, #16\n\t"
05644         "add    r3, r6\n\t"
05645         "adc    r4, r7\n\t"
05646         "adc    r5, %[r]\n\t"
05647         "add    r3, r6\n\t"
05648         "adc    r4, r7\n\t"
05649         "adc    r5, %[r]\n\t"
05650         "ldr    r6, [%[a]]\n\t"
05651         "ldr    r7, [r2]\n\t"
05652         "lsr    r6, r6, #16\n\t"
05653         "lsr    r7, r7, #16\n\t"
05654         "mul    r7, r6\n\t"
05655         "add    r4, r7\n\t"
05656         "adc    r5, %[r]\n\t"
05657         "add    r4, r7\n\t"
05658         "adc    r5, %[r]\n\t"
05659         "ldr    r7, [r2]\n\t"
05660         "lsl    r7, r7, #16\n\t"
05661         "lsr    r7, r7, #16\n\t"
05662         "mul    r6, r7\n\t"
05663         "lsr    r7, r6, #16\n\t"
05664         "lsl    r6, r6, #16\n\t"
05665         "add    r3, r6\n\t"
05666         "adc    r4, r7\n\t"
05667         "adc    r5, %[r]\n\t"
05668         "add    r3, r6\n\t"
05669         "adc    r4, r7\n\t"
05670         "adc    r5, %[r]\n\t"
05671         "# Multiply * 2: Done\n\t"
05672         "bal    5f\n\t"
05673         "\n4:\n\t"
05674         "# Square: Start\n\t"
05675         "ldr    r6, [%[a]]\n\t"
05676         "lsr    r7, r6, #16\n\t"
05677         "lsl    r6, r6, #16\n\t"
05678         "lsr    r6, r6, #16\n\t"
05679         "mul    r6, r6\n\t"
05680         "add    r3, r6\n\t"
05681         "adc    r4, %[r]\n\t"
05682         "adc    r5, %[r]\n\t"
05683         "mul    r7, r7\n\t"
05684         "add    r4, r7\n\t"
05685         "adc    r5, %[r]\n\t"
05686         "ldr    r6, [%[a]]\n\t"
05687         "lsr    r7, r6, #16\n\t"
05688         "lsl    r6, r6, #16\n\t"
05689         "lsr    r6, r6, #16\n\t"
05690         "mul    r6, r7\n\t"
05691         "lsr    r7, r6, #15\n\t"
05692         "lsl    r6, r6, #17\n\t"
05693         "add    r3, r6\n\t"
05694         "adc    r4, r7\n\t"
05695         "adc    r5, %[r]\n\t"
05696         "# Square: Done\n\t"
05697         "\n5:\n\t"
05698         "add    %[a], #4\n\t"
05699         "sub    r2, #4\n\t"
05700         "mov    r6, #48\n\t"
05701         "add    r6, r9\n\t"
05702         "cmp    %[a], r6\n\t"
05703         "beq    3f\n\t"
05704         "cmp    %[a], r2\n\t"
05705         "bgt    3f\n\t"
05706         "mov    r7, r8\n\t"
05707         "add    r7, r9\n\t"
05708         "cmp    %[a], r7\n\t"
05709         "ble    2b\n\t"
05710         "\n3:\n\t"
05711         "mov    %[r], r10\n\t"
05712         "mov    r7, r8\n\t"
05713         "str    r3, [%[r], r7]\n\t"
05714         "mov    r3, r4\n\t"
05715         "mov    r4, r5\n\t"
05716         "mov    r5, #0\n\t"
05717         "add    r7, #4\n\t"
05718         "mov    r8, r7\n\t"
05719         "mov    r6, #88\n\t"
05720         "cmp    r7, r6\n\t"
05721         "ble    1b\n\t"
05722         "mov    %[a], r9\n\t"
05723         "str    r3, [%[r], r7]\n\t"
05724         "mov    %[r], r11\n\t"
05725         "mov    %[a], r10\n\t"
05726         "mov    r3, #92\n\t"
05727         "\n4:\n\t"
05728         "ldr    r6, [%[a], r3]\n\t"
05729         "str    r6, [%[r], r3]\n\t"
05730         "sub    r3, #4\n\t"
05731         "bge    4b\n\t"
05732         "mov    r6, #96\n\t"
05733         "add    sp, r6\n\t"
05734         :
05735         : [r] "r" (r), [a] "r" (a)
05736         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
05737     );
05738 }
05739 
05740 /* Add b to a into r. (r = a + b)
05741  *
05742  * r  A single precision integer.
05743  * a  A single precision integer.
05744  * b  A single precision integer.
05745  */
05746 SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
05747         const sp_digit* b)
05748 {
05749     sp_digit c = 0;
05750 
05751     __asm__ __volatile__ (
05752         "ldr    r4, [%[a], #0]\n\t"
05753         "ldr    r5, [%[b], #0]\n\t"
05754         "add    r4, r5\n\t"
05755         "str    r4, [%[r], #0]\n\t"
05756         "ldr    r4, [%[a], #4]\n\t"
05757         "ldr    r5, [%[b], #4]\n\t"
05758         "adc    r4, r5\n\t"
05759         "str    r4, [%[r], #4]\n\t"
05760         "ldr    r4, [%[a], #8]\n\t"
05761         "ldr    r5, [%[b], #8]\n\t"
05762         "adc    r4, r5\n\t"
05763         "str    r4, [%[r], #8]\n\t"
05764         "ldr    r4, [%[a], #12]\n\t"
05765         "ldr    r5, [%[b], #12]\n\t"
05766         "adc    r4, r5\n\t"
05767         "str    r4, [%[r], #12]\n\t"
05768         "ldr    r4, [%[a], #16]\n\t"
05769         "ldr    r5, [%[b], #16]\n\t"
05770         "adc    r4, r5\n\t"
05771         "str    r4, [%[r], #16]\n\t"
05772         "ldr    r4, [%[a], #20]\n\t"
05773         "ldr    r5, [%[b], #20]\n\t"
05774         "adc    r4, r5\n\t"
05775         "str    r4, [%[r], #20]\n\t"
05776         "ldr    r4, [%[a], #24]\n\t"
05777         "ldr    r5, [%[b], #24]\n\t"
05778         "adc    r4, r5\n\t"
05779         "str    r4, [%[r], #24]\n\t"
05780         "ldr    r4, [%[a], #28]\n\t"
05781         "ldr    r5, [%[b], #28]\n\t"
05782         "adc    r4, r5\n\t"
05783         "str    r4, [%[r], #28]\n\t"
05784         "ldr    r4, [%[a], #32]\n\t"
05785         "ldr    r5, [%[b], #32]\n\t"
05786         "adc    r4, r5\n\t"
05787         "str    r4, [%[r], #32]\n\t"
05788         "ldr    r4, [%[a], #36]\n\t"
05789         "ldr    r5, [%[b], #36]\n\t"
05790         "adc    r4, r5\n\t"
05791         "str    r4, [%[r], #36]\n\t"
05792         "ldr    r4, [%[a], #40]\n\t"
05793         "ldr    r5, [%[b], #40]\n\t"
05794         "adc    r4, r5\n\t"
05795         "str    r4, [%[r], #40]\n\t"
05796         "ldr    r4, [%[a], #44]\n\t"
05797         "ldr    r5, [%[b], #44]\n\t"
05798         "adc    r4, r5\n\t"
05799         "str    r4, [%[r], #44]\n\t"
05800         "mov    %[c], #0\n\t"
05801         "adc    %[c], %[c]\n\t"
05802         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
05803         :
05804         : "memory", "r4", "r5"
05805     );
05806 
05807     return c;
05808 }
05809 
05810 /* Sub b from a into r. (r = a - b)
05811  *
05812  * r  A single precision integer.
05813  * a  A single precision integer.
05814  * b  A single precision integer.
05815  */
05816 SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
05817         const sp_digit* b)
05818 {
05819     sp_digit c = 0;
05820 
05821     __asm__ __volatile__ (
05822         "ldr    r3, [%[a], #0]\n\t"
05823         "ldr    r4, [%[a], #4]\n\t"
05824         "ldr    r5, [%[b], #0]\n\t"
05825         "ldr    r6, [%[b], #4]\n\t"
05826         "sub    r3, r5\n\t"
05827         "sbc    r4, r6\n\t"
05828         "str    r3, [%[a], #0]\n\t"
05829         "str    r4, [%[a], #4]\n\t"
05830         "ldr    r3, [%[a], #8]\n\t"
05831         "ldr    r4, [%[a], #12]\n\t"
05832         "ldr    r5, [%[b], #8]\n\t"
05833         "ldr    r6, [%[b], #12]\n\t"
05834         "sbc    r3, r5\n\t"
05835         "sbc    r4, r6\n\t"
05836         "str    r3, [%[a], #8]\n\t"
05837         "str    r4, [%[a], #12]\n\t"
05838         "ldr    r3, [%[a], #16]\n\t"
05839         "ldr    r4, [%[a], #20]\n\t"
05840         "ldr    r5, [%[b], #16]\n\t"
05841         "ldr    r6, [%[b], #20]\n\t"
05842         "sbc    r3, r5\n\t"
05843         "sbc    r4, r6\n\t"
05844         "str    r3, [%[a], #16]\n\t"
05845         "str    r4, [%[a], #20]\n\t"
05846         "ldr    r3, [%[a], #24]\n\t"
05847         "ldr    r4, [%[a], #28]\n\t"
05848         "ldr    r5, [%[b], #24]\n\t"
05849         "ldr    r6, [%[b], #28]\n\t"
05850         "sbc    r3, r5\n\t"
05851         "sbc    r4, r6\n\t"
05852         "str    r3, [%[a], #24]\n\t"
05853         "str    r4, [%[a], #28]\n\t"
05854         "ldr    r3, [%[a], #32]\n\t"
05855         "ldr    r4, [%[a], #36]\n\t"
05856         "ldr    r5, [%[b], #32]\n\t"
05857         "ldr    r6, [%[b], #36]\n\t"
05858         "sbc    r3, r5\n\t"
05859         "sbc    r4, r6\n\t"
05860         "str    r3, [%[a], #32]\n\t"
05861         "str    r4, [%[a], #36]\n\t"
05862         "ldr    r3, [%[a], #40]\n\t"
05863         "ldr    r4, [%[a], #44]\n\t"
05864         "ldr    r5, [%[b], #40]\n\t"
05865         "ldr    r6, [%[b], #44]\n\t"
05866         "sbc    r3, r5\n\t"
05867         "sbc    r4, r6\n\t"
05868         "str    r3, [%[a], #40]\n\t"
05869         "str    r4, [%[a], #44]\n\t"
05870         "ldr    r3, [%[a], #48]\n\t"
05871         "ldr    r4, [%[a], #52]\n\t"
05872         "ldr    r5, [%[b], #48]\n\t"
05873         "ldr    r6, [%[b], #52]\n\t"
05874         "sbc    r3, r5\n\t"
05875         "sbc    r4, r6\n\t"
05876         "str    r3, [%[a], #48]\n\t"
05877         "str    r4, [%[a], #52]\n\t"
05878         "ldr    r3, [%[a], #56]\n\t"
05879         "ldr    r4, [%[a], #60]\n\t"
05880         "ldr    r5, [%[b], #56]\n\t"
05881         "ldr    r6, [%[b], #60]\n\t"
05882         "sbc    r3, r5\n\t"
05883         "sbc    r4, r6\n\t"
05884         "str    r3, [%[a], #56]\n\t"
05885         "str    r4, [%[a], #60]\n\t"
05886         "ldr    r3, [%[a], #64]\n\t"
05887         "ldr    r4, [%[a], #68]\n\t"
05888         "ldr    r5, [%[b], #64]\n\t"
05889         "ldr    r6, [%[b], #68]\n\t"
05890         "sbc    r3, r5\n\t"
05891         "sbc    r4, r6\n\t"
05892         "str    r3, [%[a], #64]\n\t"
05893         "str    r4, [%[a], #68]\n\t"
05894         "ldr    r3, [%[a], #72]\n\t"
05895         "ldr    r4, [%[a], #76]\n\t"
05896         "ldr    r5, [%[b], #72]\n\t"
05897         "ldr    r6, [%[b], #76]\n\t"
05898         "sbc    r3, r5\n\t"
05899         "sbc    r4, r6\n\t"
05900         "str    r3, [%[a], #72]\n\t"
05901         "str    r4, [%[a], #76]\n\t"
05902         "ldr    r3, [%[a], #80]\n\t"
05903         "ldr    r4, [%[a], #84]\n\t"
05904         "ldr    r5, [%[b], #80]\n\t"
05905         "ldr    r6, [%[b], #84]\n\t"
05906         "sbc    r3, r5\n\t"
05907         "sbc    r4, r6\n\t"
05908         "str    r3, [%[a], #80]\n\t"
05909         "str    r4, [%[a], #84]\n\t"
05910         "ldr    r3, [%[a], #88]\n\t"
05911         "ldr    r4, [%[a], #92]\n\t"
05912         "ldr    r5, [%[b], #88]\n\t"
05913         "ldr    r6, [%[b], #92]\n\t"
05914         "sbc    r3, r5\n\t"
05915         "sbc    r4, r6\n\t"
05916         "str    r3, [%[a], #88]\n\t"
05917         "str    r4, [%[a], #92]\n\t"
05918         "sbc    %[c], %[c]\n\t"
05919         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
05920         :
05921         : "memory", "r3", "r4", "r5", "r6"
05922     );
05923 
05924     return c;
05925 }
05926 
05927 /* Add b to a into r. (r = a + b)
05928  *
05929  * r  A single precision integer.
05930  * a  A single precision integer.
05931  * b  A single precision integer.
05932  */
05933 SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
05934         const sp_digit* b)
05935 {
05936     sp_digit c = 0;
05937 
05938     __asm__ __volatile__ (
05939         "ldr    r4, [%[a], #0]\n\t"
05940         "ldr    r5, [%[b], #0]\n\t"
05941         "add    r4, r5\n\t"
05942         "str    r4, [%[r], #0]\n\t"
05943         "ldr    r4, [%[a], #4]\n\t"
05944         "ldr    r5, [%[b], #4]\n\t"
05945         "adc    r4, r5\n\t"
05946         "str    r4, [%[r], #4]\n\t"
05947         "ldr    r4, [%[a], #8]\n\t"
05948         "ldr    r5, [%[b], #8]\n\t"
05949         "adc    r4, r5\n\t"
05950         "str    r4, [%[r], #8]\n\t"
05951         "ldr    r4, [%[a], #12]\n\t"
05952         "ldr    r5, [%[b], #12]\n\t"
05953         "adc    r4, r5\n\t"
05954         "str    r4, [%[r], #12]\n\t"
05955         "ldr    r4, [%[a], #16]\n\t"
05956         "ldr    r5, [%[b], #16]\n\t"
05957         "adc    r4, r5\n\t"
05958         "str    r4, [%[r], #16]\n\t"
05959         "ldr    r4, [%[a], #20]\n\t"
05960         "ldr    r5, [%[b], #20]\n\t"
05961         "adc    r4, r5\n\t"
05962         "str    r4, [%[r], #20]\n\t"
05963         "ldr    r4, [%[a], #24]\n\t"
05964         "ldr    r5, [%[b], #24]\n\t"
05965         "adc    r4, r5\n\t"
05966         "str    r4, [%[r], #24]\n\t"
05967         "ldr    r4, [%[a], #28]\n\t"
05968         "ldr    r5, [%[b], #28]\n\t"
05969         "adc    r4, r5\n\t"
05970         "str    r4, [%[r], #28]\n\t"
05971         "ldr    r4, [%[a], #32]\n\t"
05972         "ldr    r5, [%[b], #32]\n\t"
05973         "adc    r4, r5\n\t"
05974         "str    r4, [%[r], #32]\n\t"
05975         "ldr    r4, [%[a], #36]\n\t"
05976         "ldr    r5, [%[b], #36]\n\t"
05977         "adc    r4, r5\n\t"
05978         "str    r4, [%[r], #36]\n\t"
05979         "ldr    r4, [%[a], #40]\n\t"
05980         "ldr    r5, [%[b], #40]\n\t"
05981         "adc    r4, r5\n\t"
05982         "str    r4, [%[r], #40]\n\t"
05983         "ldr    r4, [%[a], #44]\n\t"
05984         "ldr    r5, [%[b], #44]\n\t"
05985         "adc    r4, r5\n\t"
05986         "str    r4, [%[r], #44]\n\t"
05987         "ldr    r4, [%[a], #48]\n\t"
05988         "ldr    r5, [%[b], #48]\n\t"
05989         "adc    r4, r5\n\t"
05990         "str    r4, [%[r], #48]\n\t"
05991         "ldr    r4, [%[a], #52]\n\t"
05992         "ldr    r5, [%[b], #52]\n\t"
05993         "adc    r4, r5\n\t"
05994         "str    r4, [%[r], #52]\n\t"
05995         "ldr    r4, [%[a], #56]\n\t"
05996         "ldr    r5, [%[b], #56]\n\t"
05997         "adc    r4, r5\n\t"
05998         "str    r4, [%[r], #56]\n\t"
05999         "ldr    r4, [%[a], #60]\n\t"
06000         "ldr    r5, [%[b], #60]\n\t"
06001         "adc    r4, r5\n\t"
06002         "str    r4, [%[r], #60]\n\t"
06003         "ldr    r4, [%[a], #64]\n\t"
06004         "ldr    r5, [%[b], #64]\n\t"
06005         "adc    r4, r5\n\t"
06006         "str    r4, [%[r], #64]\n\t"
06007         "ldr    r4, [%[a], #68]\n\t"
06008         "ldr    r5, [%[b], #68]\n\t"
06009         "adc    r4, r5\n\t"
06010         "str    r4, [%[r], #68]\n\t"
06011         "ldr    r4, [%[a], #72]\n\t"
06012         "ldr    r5, [%[b], #72]\n\t"
06013         "adc    r4, r5\n\t"
06014         "str    r4, [%[r], #72]\n\t"
06015         "ldr    r4, [%[a], #76]\n\t"
06016         "ldr    r5, [%[b], #76]\n\t"
06017         "adc    r4, r5\n\t"
06018         "str    r4, [%[r], #76]\n\t"
06019         "ldr    r4, [%[a], #80]\n\t"
06020         "ldr    r5, [%[b], #80]\n\t"
06021         "adc    r4, r5\n\t"
06022         "str    r4, [%[r], #80]\n\t"
06023         "ldr    r4, [%[a], #84]\n\t"
06024         "ldr    r5, [%[b], #84]\n\t"
06025         "adc    r4, r5\n\t"
06026         "str    r4, [%[r], #84]\n\t"
06027         "ldr    r4, [%[a], #88]\n\t"
06028         "ldr    r5, [%[b], #88]\n\t"
06029         "adc    r4, r5\n\t"
06030         "str    r4, [%[r], #88]\n\t"
06031         "ldr    r4, [%[a], #92]\n\t"
06032         "ldr    r5, [%[b], #92]\n\t"
06033         "adc    r4, r5\n\t"
06034         "str    r4, [%[r], #92]\n\t"
06035         "mov    %[c], #0\n\t"
06036         "adc    %[c], %[c]\n\t"
06037         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
06038         :
06039         : "memory", "r4", "r5"
06040     );
06041 
06042     return c;
06043 }
06044 
06045 /* AND m into each word of a and store in r.
06046  *
06047  * r  A single precision integer.
06048  * a  A single precision integer.
06049  * m  Mask to AND against each digit.
06050  */
06051 static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
06052 {
06053 #ifdef WOLFSSL_SP_SMALL
06054     int i;
06055 
06056     for (i=0; i<12; i++) {
06057         r[i] = a[i] & m;
06058     }
06059 #else
06060     r[0] = a[0] & m;
06061     r[1] = a[1] & m;
06062     r[2] = a[2] & m;
06063     r[3] = a[3] & m;
06064     r[4] = a[4] & m;
06065     r[5] = a[5] & m;
06066     r[6] = a[6] & m;
06067     r[7] = a[7] & m;
06068     r[8] = a[8] & m;
06069     r[9] = a[9] & m;
06070     r[10] = a[10] & m;
06071     r[11] = a[11] & m;
06072 #endif
06073 }
06074 
06075 /* Multiply a and b into r. (r = a * b)
06076  *
06077  * r  A single precision integer.
06078  * a  A single precision integer.
06079  * b  A single precision integer.
06080  */
06081 SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
06082         const sp_digit* b)
06083 {
06084     sp_digit* z0 = r;
06085     sp_digit z1[24];
06086     sp_digit a1[12];
06087     sp_digit b1[12];
06088     sp_digit z2[24];
06089     sp_digit u, ca, cb;
06090 
06091     ca = sp_3072_add_12(a1, a, &a[12]);
06092     cb = sp_3072_add_12(b1, b, &b[12]);
06093     u  = ca & cb;
06094     sp_3072_mul_12(z1, a1, b1);
06095     sp_3072_mul_12(z2, &a[12], &b[12]);
06096     sp_3072_mul_12(z0, a, b);
06097     sp_3072_mask_12(r + 24, a1, 0 - cb);
06098     sp_3072_mask_12(b1, b1, 0 - ca);
06099     u += sp_3072_add_12(r + 24, r + 24, b1);
06100     u += sp_3072_sub_in_place_24(z1, z2);
06101     u += sp_3072_sub_in_place_24(z1, z0);
06102     u += sp_3072_add_24(r + 12, r + 12, z1);
06103     r[36] = u;
06104     XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
06105     (void)sp_3072_add_24(r + 24, r + 24, z2);
06106 }
06107 
06108 /* Square a and put result in r. (r = a * a)
06109  *
06110  * r  A single precision integer.
06111  * a  A single precision integer.
06112  */
06113 SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
06114 {
06115     sp_digit* z0 = r;
06116     sp_digit z2[24];
06117     sp_digit z1[24];
06118     sp_digit a1[12];
06119     sp_digit u;
06120 
06121     u = sp_3072_add_12(a1, a, &a[12]);
06122     sp_3072_sqr_12(z1, a1);
06123     sp_3072_sqr_12(z2, &a[12]);
06124     sp_3072_sqr_12(z0, a);
06125     sp_3072_mask_12(r + 24, a1, 0 - u);
06126     u += sp_3072_add_12(r + 24, r + 24, r + 24);
06127     u += sp_3072_sub_in_place_24(z1, z2);
06128     u += sp_3072_sub_in_place_24(z1, z0);
06129     u += sp_3072_add_24(r + 12, r + 12, z1);
06130     r[36] = u;
06131     XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
06132     (void)sp_3072_add_24(r + 24, r + 24, z2);
06133 }
06134 
06135 /* Sub b from a into r. (r = a - b)
06136  *
06137  * r  A single precision integer.
06138  * a  A single precision integer.
06139  * b  A single precision integer.
06140  */
06141 SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
06142         const sp_digit* b)
06143 {
06144     sp_digit c = 0;
06145 
06146     __asm__ __volatile__ (
06147         "ldr    r3, [%[a], #0]\n\t"
06148         "ldr    r4, [%[a], #4]\n\t"
06149         "ldr    r5, [%[b], #0]\n\t"
06150         "ldr    r6, [%[b], #4]\n\t"
06151         "sub    r3, r5\n\t"
06152         "sbc    r4, r6\n\t"
06153         "str    r3, [%[a], #0]\n\t"
06154         "str    r4, [%[a], #4]\n\t"
06155         "ldr    r3, [%[a], #8]\n\t"
06156         "ldr    r4, [%[a], #12]\n\t"
06157         "ldr    r5, [%[b], #8]\n\t"
06158         "ldr    r6, [%[b], #12]\n\t"
06159         "sbc    r3, r5\n\t"
06160         "sbc    r4, r6\n\t"
06161         "str    r3, [%[a], #8]\n\t"
06162         "str    r4, [%[a], #12]\n\t"
06163         "ldr    r3, [%[a], #16]\n\t"
06164         "ldr    r4, [%[a], #20]\n\t"
06165         "ldr    r5, [%[b], #16]\n\t"
06166         "ldr    r6, [%[b], #20]\n\t"
06167         "sbc    r3, r5\n\t"
06168         "sbc    r4, r6\n\t"
06169         "str    r3, [%[a], #16]\n\t"
06170         "str    r4, [%[a], #20]\n\t"
06171         "ldr    r3, [%[a], #24]\n\t"
06172         "ldr    r4, [%[a], #28]\n\t"
06173         "ldr    r5, [%[b], #24]\n\t"
06174         "ldr    r6, [%[b], #28]\n\t"
06175         "sbc    r3, r5\n\t"
06176         "sbc    r4, r6\n\t"
06177         "str    r3, [%[a], #24]\n\t"
06178         "str    r4, [%[a], #28]\n\t"
06179         "ldr    r3, [%[a], #32]\n\t"
06180         "ldr    r4, [%[a], #36]\n\t"
06181         "ldr    r5, [%[b], #32]\n\t"
06182         "ldr    r6, [%[b], #36]\n\t"
06183         "sbc    r3, r5\n\t"
06184         "sbc    r4, r6\n\t"
06185         "str    r3, [%[a], #32]\n\t"
06186         "str    r4, [%[a], #36]\n\t"
06187         "ldr    r3, [%[a], #40]\n\t"
06188         "ldr    r4, [%[a], #44]\n\t"
06189         "ldr    r5, [%[b], #40]\n\t"
06190         "ldr    r6, [%[b], #44]\n\t"
06191         "sbc    r3, r5\n\t"
06192         "sbc    r4, r6\n\t"
06193         "str    r3, [%[a], #40]\n\t"
06194         "str    r4, [%[a], #44]\n\t"
06195         "ldr    r3, [%[a], #48]\n\t"
06196         "ldr    r4, [%[a], #52]\n\t"
06197         "ldr    r5, [%[b], #48]\n\t"
06198         "ldr    r6, [%[b], #52]\n\t"
06199         "sbc    r3, r5\n\t"
06200         "sbc    r4, r6\n\t"
06201         "str    r3, [%[a], #48]\n\t"
06202         "str    r4, [%[a], #52]\n\t"
06203         "ldr    r3, [%[a], #56]\n\t"
06204         "ldr    r4, [%[a], #60]\n\t"
06205         "ldr    r5, [%[b], #56]\n\t"
06206         "ldr    r6, [%[b], #60]\n\t"
06207         "sbc    r3, r5\n\t"
06208         "sbc    r4, r6\n\t"
06209         "str    r3, [%[a], #56]\n\t"
06210         "str    r4, [%[a], #60]\n\t"
06211         "ldr    r3, [%[a], #64]\n\t"
06212         "ldr    r4, [%[a], #68]\n\t"
06213         "ldr    r5, [%[b], #64]\n\t"
06214         "ldr    r6, [%[b], #68]\n\t"
06215         "sbc    r3, r5\n\t"
06216         "sbc    r4, r6\n\t"
06217         "str    r3, [%[a], #64]\n\t"
06218         "str    r4, [%[a], #68]\n\t"
06219         "ldr    r3, [%[a], #72]\n\t"
06220         "ldr    r4, [%[a], #76]\n\t"
06221         "ldr    r5, [%[b], #72]\n\t"
06222         "ldr    r6, [%[b], #76]\n\t"
06223         "sbc    r3, r5\n\t"
06224         "sbc    r4, r6\n\t"
06225         "str    r3, [%[a], #72]\n\t"
06226         "str    r4, [%[a], #76]\n\t"
06227         "ldr    r3, [%[a], #80]\n\t"
06228         "ldr    r4, [%[a], #84]\n\t"
06229         "ldr    r5, [%[b], #80]\n\t"
06230         "ldr    r6, [%[b], #84]\n\t"
06231         "sbc    r3, r5\n\t"
06232         "sbc    r4, r6\n\t"
06233         "str    r3, [%[a], #80]\n\t"
06234         "str    r4, [%[a], #84]\n\t"
06235         "ldr    r3, [%[a], #88]\n\t"
06236         "ldr    r4, [%[a], #92]\n\t"
06237         "ldr    r5, [%[b], #88]\n\t"
06238         "ldr    r6, [%[b], #92]\n\t"
06239         "sbc    r3, r5\n\t"
06240         "sbc    r4, r6\n\t"
06241         "str    r3, [%[a], #88]\n\t"
06242         "str    r4, [%[a], #92]\n\t"
06243         "ldr    r3, [%[a], #96]\n\t"
06244         "ldr    r4, [%[a], #100]\n\t"
06245         "ldr    r5, [%[b], #96]\n\t"
06246         "ldr    r6, [%[b], #100]\n\t"
06247         "sbc    r3, r5\n\t"
06248         "sbc    r4, r6\n\t"
06249         "str    r3, [%[a], #96]\n\t"
06250         "str    r4, [%[a], #100]\n\t"
06251         "ldr    r3, [%[a], #104]\n\t"
06252         "ldr    r4, [%[a], #108]\n\t"
06253         "ldr    r5, [%[b], #104]\n\t"
06254         "ldr    r6, [%[b], #108]\n\t"
06255         "sbc    r3, r5\n\t"
06256         "sbc    r4, r6\n\t"
06257         "str    r3, [%[a], #104]\n\t"
06258         "str    r4, [%[a], #108]\n\t"
06259         "ldr    r3, [%[a], #112]\n\t"
06260         "ldr    r4, [%[a], #116]\n\t"
06261         "ldr    r5, [%[b], #112]\n\t"
06262         "ldr    r6, [%[b], #116]\n\t"
06263         "sbc    r3, r5\n\t"
06264         "sbc    r4, r6\n\t"
06265         "str    r3, [%[a], #112]\n\t"
06266         "str    r4, [%[a], #116]\n\t"
06267         "ldr    r3, [%[a], #120]\n\t"
06268         "ldr    r4, [%[a], #124]\n\t"
06269         "ldr    r5, [%[b], #120]\n\t"
06270         "ldr    r6, [%[b], #124]\n\t"
06271         "sbc    r3, r5\n\t"
06272         "sbc    r4, r6\n\t"
06273         "str    r3, [%[a], #120]\n\t"
06274         "str    r4, [%[a], #124]\n\t"
06275         "sbc    %[c], %[c]\n\t"
06276         "add    %[a], #0x80\n\t"
06277         "add    %[b], #0x80\n\t"
06278         "mov    r5, #0\n\t"
06279         "sub    r5, %[c]\n\t"
06280         "ldr    r3, [%[a], #0]\n\t"
06281         "ldr    r4, [%[a], #4]\n\t"
06282         "ldr    r5, [%[b], #0]\n\t"
06283         "ldr    r6, [%[b], #4]\n\t"
06284         "sbc    r3, r5\n\t"
06285         "sbc    r4, r6\n\t"
06286         "str    r3, [%[a], #0]\n\t"
06287         "str    r4, [%[a], #4]\n\t"
06288         "ldr    r3, [%[a], #8]\n\t"
06289         "ldr    r4, [%[a], #12]\n\t"
06290         "ldr    r5, [%[b], #8]\n\t"
06291         "ldr    r6, [%[b], #12]\n\t"
06292         "sbc    r3, r5\n\t"
06293         "sbc    r4, r6\n\t"
06294         "str    r3, [%[a], #8]\n\t"
06295         "str    r4, [%[a], #12]\n\t"
06296         "ldr    r3, [%[a], #16]\n\t"
06297         "ldr    r4, [%[a], #20]\n\t"
06298         "ldr    r5, [%[b], #16]\n\t"
06299         "ldr    r6, [%[b], #20]\n\t"
06300         "sbc    r3, r5\n\t"
06301         "sbc    r4, r6\n\t"
06302         "str    r3, [%[a], #16]\n\t"
06303         "str    r4, [%[a], #20]\n\t"
06304         "ldr    r3, [%[a], #24]\n\t"
06305         "ldr    r4, [%[a], #28]\n\t"
06306         "ldr    r5, [%[b], #24]\n\t"
06307         "ldr    r6, [%[b], #28]\n\t"
06308         "sbc    r3, r5\n\t"
06309         "sbc    r4, r6\n\t"
06310         "str    r3, [%[a], #24]\n\t"
06311         "str    r4, [%[a], #28]\n\t"
06312         "ldr    r3, [%[a], #32]\n\t"
06313         "ldr    r4, [%[a], #36]\n\t"
06314         "ldr    r5, [%[b], #32]\n\t"
06315         "ldr    r6, [%[b], #36]\n\t"
06316         "sbc    r3, r5\n\t"
06317         "sbc    r4, r6\n\t"
06318         "str    r3, [%[a], #32]\n\t"
06319         "str    r4, [%[a], #36]\n\t"
06320         "ldr    r3, [%[a], #40]\n\t"
06321         "ldr    r4, [%[a], #44]\n\t"
06322         "ldr    r5, [%[b], #40]\n\t"
06323         "ldr    r6, [%[b], #44]\n\t"
06324         "sbc    r3, r5\n\t"
06325         "sbc    r4, r6\n\t"
06326         "str    r3, [%[a], #40]\n\t"
06327         "str    r4, [%[a], #44]\n\t"
06328         "ldr    r3, [%[a], #48]\n\t"
06329         "ldr    r4, [%[a], #52]\n\t"
06330         "ldr    r5, [%[b], #48]\n\t"
06331         "ldr    r6, [%[b], #52]\n\t"
06332         "sbc    r3, r5\n\t"
06333         "sbc    r4, r6\n\t"
06334         "str    r3, [%[a], #48]\n\t"
06335         "str    r4, [%[a], #52]\n\t"
06336         "ldr    r3, [%[a], #56]\n\t"
06337         "ldr    r4, [%[a], #60]\n\t"
06338         "ldr    r5, [%[b], #56]\n\t"
06339         "ldr    r6, [%[b], #60]\n\t"
06340         "sbc    r3, r5\n\t"
06341         "sbc    r4, r6\n\t"
06342         "str    r3, [%[a], #56]\n\t"
06343         "str    r4, [%[a], #60]\n\t"
06344         "sbc    %[c], %[c]\n\t"
06345         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
06346         :
06347         : "memory", "r3", "r4", "r5", "r6"
06348     );
06349 
06350     return c;
06351 }
06352 
06353 /* Add b to a into r. (r = a + b)
06354  *
06355  * r  A single precision integer.
06356  * a  A single precision integer.
06357  * b  A single precision integer.
06358  */
06359 SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
06360         const sp_digit* b)
06361 {
06362     sp_digit c = 0;
06363 
06364     __asm__ __volatile__ (
06365         "mov    r7, #0\n\t"
06366         "mvn    r7, r7\n\t"
06367         "ldr    r4, [%[a], #0]\n\t"
06368         "ldr    r5, [%[b], #0]\n\t"
06369         "add    r4, r5\n\t"
06370         "str    r4, [%[r], #0]\n\t"
06371         "ldr    r4, [%[a], #4]\n\t"
06372         "ldr    r5, [%[b], #4]\n\t"
06373         "adc    r4, r5\n\t"
06374         "str    r4, [%[r], #4]\n\t"
06375         "ldr    r4, [%[a], #8]\n\t"
06376         "ldr    r5, [%[b], #8]\n\t"
06377         "adc    r4, r5\n\t"
06378         "str    r4, [%[r], #8]\n\t"
06379         "ldr    r4, [%[a], #12]\n\t"
06380         "ldr    r5, [%[b], #12]\n\t"
06381         "adc    r4, r5\n\t"
06382         "str    r4, [%[r], #12]\n\t"
06383         "ldr    r4, [%[a], #16]\n\t"
06384         "ldr    r5, [%[b], #16]\n\t"
06385         "adc    r4, r5\n\t"
06386         "str    r4, [%[r], #16]\n\t"
06387         "ldr    r4, [%[a], #20]\n\t"
06388         "ldr    r5, [%[b], #20]\n\t"
06389         "adc    r4, r5\n\t"
06390         "str    r4, [%[r], #20]\n\t"
06391         "ldr    r4, [%[a], #24]\n\t"
06392         "ldr    r5, [%[b], #24]\n\t"
06393         "adc    r4, r5\n\t"
06394         "str    r4, [%[r], #24]\n\t"
06395         "ldr    r4, [%[a], #28]\n\t"
06396         "ldr    r5, [%[b], #28]\n\t"
06397         "adc    r4, r5\n\t"
06398         "str    r4, [%[r], #28]\n\t"
06399         "ldr    r4, [%[a], #32]\n\t"
06400         "ldr    r5, [%[b], #32]\n\t"
06401         "adc    r4, r5\n\t"
06402         "str    r4, [%[r], #32]\n\t"
06403         "ldr    r4, [%[a], #36]\n\t"
06404         "ldr    r5, [%[b], #36]\n\t"
06405         "adc    r4, r5\n\t"
06406         "str    r4, [%[r], #36]\n\t"
06407         "ldr    r4, [%[a], #40]\n\t"
06408         "ldr    r5, [%[b], #40]\n\t"
06409         "adc    r4, r5\n\t"
06410         "str    r4, [%[r], #40]\n\t"
06411         "ldr    r4, [%[a], #44]\n\t"
06412         "ldr    r5, [%[b], #44]\n\t"
06413         "adc    r4, r5\n\t"
06414         "str    r4, [%[r], #44]\n\t"
06415         "ldr    r4, [%[a], #48]\n\t"
06416         "ldr    r5, [%[b], #48]\n\t"
06417         "adc    r4, r5\n\t"
06418         "str    r4, [%[r], #48]\n\t"
06419         "ldr    r4, [%[a], #52]\n\t"
06420         "ldr    r5, [%[b], #52]\n\t"
06421         "adc    r4, r5\n\t"
06422         "str    r4, [%[r], #52]\n\t"
06423         "ldr    r4, [%[a], #56]\n\t"
06424         "ldr    r5, [%[b], #56]\n\t"
06425         "adc    r4, r5\n\t"
06426         "str    r4, [%[r], #56]\n\t"
06427         "ldr    r4, [%[a], #60]\n\t"
06428         "ldr    r5, [%[b], #60]\n\t"
06429         "adc    r4, r5\n\t"
06430         "str    r4, [%[r], #60]\n\t"
06431         "ldr    r4, [%[a], #64]\n\t"
06432         "ldr    r5, [%[b], #64]\n\t"
06433         "adc    r4, r5\n\t"
06434         "str    r4, [%[r], #64]\n\t"
06435         "ldr    r4, [%[a], #68]\n\t"
06436         "ldr    r5, [%[b], #68]\n\t"
06437         "adc    r4, r5\n\t"
06438         "str    r4, [%[r], #68]\n\t"
06439         "ldr    r4, [%[a], #72]\n\t"
06440         "ldr    r5, [%[b], #72]\n\t"
06441         "adc    r4, r5\n\t"
06442         "str    r4, [%[r], #72]\n\t"
06443         "ldr    r4, [%[a], #76]\n\t"
06444         "ldr    r5, [%[b], #76]\n\t"
06445         "adc    r4, r5\n\t"
06446         "str    r4, [%[r], #76]\n\t"
06447         "ldr    r4, [%[a], #80]\n\t"
06448         "ldr    r5, [%[b], #80]\n\t"
06449         "adc    r4, r5\n\t"
06450         "str    r4, [%[r], #80]\n\t"
06451         "ldr    r4, [%[a], #84]\n\t"
06452         "ldr    r5, [%[b], #84]\n\t"
06453         "adc    r4, r5\n\t"
06454         "str    r4, [%[r], #84]\n\t"
06455         "ldr    r4, [%[a], #88]\n\t"
06456         "ldr    r5, [%[b], #88]\n\t"
06457         "adc    r4, r5\n\t"
06458         "str    r4, [%[r], #88]\n\t"
06459         "ldr    r4, [%[a], #92]\n\t"
06460         "ldr    r5, [%[b], #92]\n\t"
06461         "adc    r4, r5\n\t"
06462         "str    r4, [%[r], #92]\n\t"
06463         "ldr    r4, [%[a], #96]\n\t"
06464         "ldr    r5, [%[b], #96]\n\t"
06465         "adc    r4, r5\n\t"
06466         "str    r4, [%[r], #96]\n\t"
06467         "ldr    r4, [%[a], #100]\n\t"
06468         "ldr    r5, [%[b], #100]\n\t"
06469         "adc    r4, r5\n\t"
06470         "str    r4, [%[r], #100]\n\t"
06471         "ldr    r4, [%[a], #104]\n\t"
06472         "ldr    r5, [%[b], #104]\n\t"
06473         "adc    r4, r5\n\t"
06474         "str    r4, [%[r], #104]\n\t"
06475         "ldr    r4, [%[a], #108]\n\t"
06476         "ldr    r5, [%[b], #108]\n\t"
06477         "adc    r4, r5\n\t"
06478         "str    r4, [%[r], #108]\n\t"
06479         "ldr    r4, [%[a], #112]\n\t"
06480         "ldr    r5, [%[b], #112]\n\t"
06481         "adc    r4, r5\n\t"
06482         "str    r4, [%[r], #112]\n\t"
06483         "ldr    r4, [%[a], #116]\n\t"
06484         "ldr    r5, [%[b], #116]\n\t"
06485         "adc    r4, r5\n\t"
06486         "str    r4, [%[r], #116]\n\t"
06487         "ldr    r4, [%[a], #120]\n\t"
06488         "ldr    r5, [%[b], #120]\n\t"
06489         "adc    r4, r5\n\t"
06490         "str    r4, [%[r], #120]\n\t"
06491         "ldr    r4, [%[a], #124]\n\t"
06492         "ldr    r5, [%[b], #124]\n\t"
06493         "adc    r4, r5\n\t"
06494         "str    r4, [%[r], #124]\n\t"
06495         "mov    %[c], #0\n\t"
06496         "adc    %[c], %[c]\n\t"
06497         "add    %[a], #0x80\n\t"
06498         "add    %[b], #0x80\n\t"
06499         "add    %[r], #0x80\n\t"
06500         "add    %[c], r7\n\t"
06501         "ldr    r4, [%[a], #0]\n\t"
06502         "ldr    r5, [%[b], #0]\n\t"
06503         "adc    r4, r5\n\t"
06504         "str    r4, [%[r], #0]\n\t"
06505         "ldr    r4, [%[a], #4]\n\t"
06506         "ldr    r5, [%[b], #4]\n\t"
06507         "adc    r4, r5\n\t"
06508         "str    r4, [%[r], #4]\n\t"
06509         "ldr    r4, [%[a], #8]\n\t"
06510         "ldr    r5, [%[b], #8]\n\t"
06511         "adc    r4, r5\n\t"
06512         "str    r4, [%[r], #8]\n\t"
06513         "ldr    r4, [%[a], #12]\n\t"
06514         "ldr    r5, [%[b], #12]\n\t"
06515         "adc    r4, r5\n\t"
06516         "str    r4, [%[r], #12]\n\t"
06517         "ldr    r4, [%[a], #16]\n\t"
06518         "ldr    r5, [%[b], #16]\n\t"
06519         "adc    r4, r5\n\t"
06520         "str    r4, [%[r], #16]\n\t"
06521         "ldr    r4, [%[a], #20]\n\t"
06522         "ldr    r5, [%[b], #20]\n\t"
06523         "adc    r4, r5\n\t"
06524         "str    r4, [%[r], #20]\n\t"
06525         "ldr    r4, [%[a], #24]\n\t"
06526         "ldr    r5, [%[b], #24]\n\t"
06527         "adc    r4, r5\n\t"
06528         "str    r4, [%[r], #24]\n\t"
06529         "ldr    r4, [%[a], #28]\n\t"
06530         "ldr    r5, [%[b], #28]\n\t"
06531         "adc    r4, r5\n\t"
06532         "str    r4, [%[r], #28]\n\t"
06533         "ldr    r4, [%[a], #32]\n\t"
06534         "ldr    r5, [%[b], #32]\n\t"
06535         "adc    r4, r5\n\t"
06536         "str    r4, [%[r], #32]\n\t"
06537         "ldr    r4, [%[a], #36]\n\t"
06538         "ldr    r5, [%[b], #36]\n\t"
06539         "adc    r4, r5\n\t"
06540         "str    r4, [%[r], #36]\n\t"
06541         "ldr    r4, [%[a], #40]\n\t"
06542         "ldr    r5, [%[b], #40]\n\t"
06543         "adc    r4, r5\n\t"
06544         "str    r4, [%[r], #40]\n\t"
06545         "ldr    r4, [%[a], #44]\n\t"
06546         "ldr    r5, [%[b], #44]\n\t"
06547         "adc    r4, r5\n\t"
06548         "str    r4, [%[r], #44]\n\t"
06549         "ldr    r4, [%[a], #48]\n\t"
06550         "ldr    r5, [%[b], #48]\n\t"
06551         "adc    r4, r5\n\t"
06552         "str    r4, [%[r], #48]\n\t"
06553         "ldr    r4, [%[a], #52]\n\t"
06554         "ldr    r5, [%[b], #52]\n\t"
06555         "adc    r4, r5\n\t"
06556         "str    r4, [%[r], #52]\n\t"
06557         "ldr    r4, [%[a], #56]\n\t"
06558         "ldr    r5, [%[b], #56]\n\t"
06559         "adc    r4, r5\n\t"
06560         "str    r4, [%[r], #56]\n\t"
06561         "ldr    r4, [%[a], #60]\n\t"
06562         "ldr    r5, [%[b], #60]\n\t"
06563         "adc    r4, r5\n\t"
06564         "str    r4, [%[r], #60]\n\t"
06565         "mov    %[c], #0\n\t"
06566         "adc    %[c], %[c]\n\t"
06567         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
06568         :
06569         : "memory", "r4", "r5", "r7"
06570     );
06571 
06572     return c;
06573 }
06574 
06575 /* AND m into each word of a and store in r.
06576  *
06577  * r  A single precision integer.
06578  * a  A single precision integer.
06579  * m  Mask to AND against each digit.
06580  */
06581 static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
06582 {
06583 #ifdef WOLFSSL_SP_SMALL
06584     int i;
06585 
06586     for (i=0; i<24; i++) {
06587         r[i] = a[i] & m;
06588     }
06589 #else
06590     int i;
06591 
06592     for (i = 0; i < 24; i += 8) {
06593         r[i+0] = a[i+0] & m;
06594         r[i+1] = a[i+1] & m;
06595         r[i+2] = a[i+2] & m;
06596         r[i+3] = a[i+3] & m;
06597         r[i+4] = a[i+4] & m;
06598         r[i+5] = a[i+5] & m;
06599         r[i+6] = a[i+6] & m;
06600         r[i+7] = a[i+7] & m;
06601     }
06602 #endif
06603 }
06604 
06605 /* Multiply a and b into r. (r = a * b)
06606  *
06607  * r  A single precision integer.
06608  * a  A single precision integer.
06609  * b  A single precision integer.
06610  */
06611 SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
06612         const sp_digit* b)
06613 {
06614     sp_digit* z0 = r;
06615     sp_digit z1[48];
06616     sp_digit a1[24];
06617     sp_digit b1[24];
06618     sp_digit z2[48];
06619     sp_digit u, ca, cb;
06620 
06621     ca = sp_3072_add_24(a1, a, &a[24]);
06622     cb = sp_3072_add_24(b1, b, &b[24]);
06623     u  = ca & cb;
06624     sp_3072_mul_24(z1, a1, b1);
06625     sp_3072_mul_24(z2, &a[24], &b[24]);
06626     sp_3072_mul_24(z0, a, b);
06627     sp_3072_mask_24(r + 48, a1, 0 - cb);
06628     sp_3072_mask_24(b1, b1, 0 - ca);
06629     u += sp_3072_add_24(r + 48, r + 48, b1);
06630     u += sp_3072_sub_in_place_48(z1, z2);
06631     u += sp_3072_sub_in_place_48(z1, z0);
06632     u += sp_3072_add_48(r + 24, r + 24, z1);
06633     r[72] = u;
06634     XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
06635     (void)sp_3072_add_48(r + 48, r + 48, z2);
06636 }
06637 
06638 /* Square a and put result in r. (r = a * a)
06639  *
06640  * r  A single precision integer.
06641  * a  A single precision integer.
06642  */
06643 SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
06644 {
06645     sp_digit* z0 = r;
06646     sp_digit z2[48];
06647     sp_digit z1[48];
06648     sp_digit a1[24];
06649     sp_digit u;
06650 
06651     u = sp_3072_add_24(a1, a, &a[24]);
06652     sp_3072_sqr_24(z1, a1);
06653     sp_3072_sqr_24(z2, &a[24]);
06654     sp_3072_sqr_24(z0, a);
06655     sp_3072_mask_24(r + 48, a1, 0 - u);
06656     u += sp_3072_add_24(r + 48, r + 48, r + 48);
06657     u += sp_3072_sub_in_place_48(z1, z2);
06658     u += sp_3072_sub_in_place_48(z1, z0);
06659     u += sp_3072_add_48(r + 24, r + 24, z1);
06660     r[72] = u;
06661     XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
06662     (void)sp_3072_add_48(r + 48, r + 48, z2);
06663 }
06664 
06665 /* Sub b from a into r. (r = a - b)
06666  *
06667  * r  A single precision integer.
06668  * a  A single precision integer.
06669  * b  A single precision integer.
06670  */
06671 SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
06672         const sp_digit* b)
06673 {
06674     sp_digit c = 0;
06675 
06676     __asm__ __volatile__ (
06677         "ldr    r3, [%[a], #0]\n\t"
06678         "ldr    r4, [%[a], #4]\n\t"
06679         "ldr    r5, [%[b], #0]\n\t"
06680         "ldr    r6, [%[b], #4]\n\t"
06681         "sub    r3, r5\n\t"
06682         "sbc    r4, r6\n\t"
06683         "str    r3, [%[a], #0]\n\t"
06684         "str    r4, [%[a], #4]\n\t"
06685         "ldr    r3, [%[a], #8]\n\t"
06686         "ldr    r4, [%[a], #12]\n\t"
06687         "ldr    r5, [%[b], #8]\n\t"
06688         "ldr    r6, [%[b], #12]\n\t"
06689         "sbc    r3, r5\n\t"
06690         "sbc    r4, r6\n\t"
06691         "str    r3, [%[a], #8]\n\t"
06692         "str    r4, [%[a], #12]\n\t"
06693         "ldr    r3, [%[a], #16]\n\t"
06694         "ldr    r4, [%[a], #20]\n\t"
06695         "ldr    r5, [%[b], #16]\n\t"
06696         "ldr    r6, [%[b], #20]\n\t"
06697         "sbc    r3, r5\n\t"
06698         "sbc    r4, r6\n\t"
06699         "str    r3, [%[a], #16]\n\t"
06700         "str    r4, [%[a], #20]\n\t"
06701         "ldr    r3, [%[a], #24]\n\t"
06702         "ldr    r4, [%[a], #28]\n\t"
06703         "ldr    r5, [%[b], #24]\n\t"
06704         "ldr    r6, [%[b], #28]\n\t"
06705         "sbc    r3, r5\n\t"
06706         "sbc    r4, r6\n\t"
06707         "str    r3, [%[a], #24]\n\t"
06708         "str    r4, [%[a], #28]\n\t"
06709         "ldr    r3, [%[a], #32]\n\t"
06710         "ldr    r4, [%[a], #36]\n\t"
06711         "ldr    r5, [%[b], #32]\n\t"
06712         "ldr    r6, [%[b], #36]\n\t"
06713         "sbc    r3, r5\n\t"
06714         "sbc    r4, r6\n\t"
06715         "str    r3, [%[a], #32]\n\t"
06716         "str    r4, [%[a], #36]\n\t"
06717         "ldr    r3, [%[a], #40]\n\t"
06718         "ldr    r4, [%[a], #44]\n\t"
06719         "ldr    r5, [%[b], #40]\n\t"
06720         "ldr    r6, [%[b], #44]\n\t"
06721         "sbc    r3, r5\n\t"
06722         "sbc    r4, r6\n\t"
06723         "str    r3, [%[a], #40]\n\t"
06724         "str    r4, [%[a], #44]\n\t"
06725         "ldr    r3, [%[a], #48]\n\t"
06726         "ldr    r4, [%[a], #52]\n\t"
06727         "ldr    r5, [%[b], #48]\n\t"
06728         "ldr    r6, [%[b], #52]\n\t"
06729         "sbc    r3, r5\n\t"
06730         "sbc    r4, r6\n\t"
06731         "str    r3, [%[a], #48]\n\t"
06732         "str    r4, [%[a], #52]\n\t"
06733         "ldr    r3, [%[a], #56]\n\t"
06734         "ldr    r4, [%[a], #60]\n\t"
06735         "ldr    r5, [%[b], #56]\n\t"
06736         "ldr    r6, [%[b], #60]\n\t"
06737         "sbc    r3, r5\n\t"
06738         "sbc    r4, r6\n\t"
06739         "str    r3, [%[a], #56]\n\t"
06740         "str    r4, [%[a], #60]\n\t"
06741         "ldr    r3, [%[a], #64]\n\t"
06742         "ldr    r4, [%[a], #68]\n\t"
06743         "ldr    r5, [%[b], #64]\n\t"
06744         "ldr    r6, [%[b], #68]\n\t"
06745         "sbc    r3, r5\n\t"
06746         "sbc    r4, r6\n\t"
06747         "str    r3, [%[a], #64]\n\t"
06748         "str    r4, [%[a], #68]\n\t"
06749         "ldr    r3, [%[a], #72]\n\t"
06750         "ldr    r4, [%[a], #76]\n\t"
06751         "ldr    r5, [%[b], #72]\n\t"
06752         "ldr    r6, [%[b], #76]\n\t"
06753         "sbc    r3, r5\n\t"
06754         "sbc    r4, r6\n\t"
06755         "str    r3, [%[a], #72]\n\t"
06756         "str    r4, [%[a], #76]\n\t"
06757         "ldr    r3, [%[a], #80]\n\t"
06758         "ldr    r4, [%[a], #84]\n\t"
06759         "ldr    r5, [%[b], #80]\n\t"
06760         "ldr    r6, [%[b], #84]\n\t"
06761         "sbc    r3, r5\n\t"
06762         "sbc    r4, r6\n\t"
06763         "str    r3, [%[a], #80]\n\t"
06764         "str    r4, [%[a], #84]\n\t"
06765         "ldr    r3, [%[a], #88]\n\t"
06766         "ldr    r4, [%[a], #92]\n\t"
06767         "ldr    r5, [%[b], #88]\n\t"
06768         "ldr    r6, [%[b], #92]\n\t"
06769         "sbc    r3, r5\n\t"
06770         "sbc    r4, r6\n\t"
06771         "str    r3, [%[a], #88]\n\t"
06772         "str    r4, [%[a], #92]\n\t"
06773         "ldr    r3, [%[a], #96]\n\t"
06774         "ldr    r4, [%[a], #100]\n\t"
06775         "ldr    r5, [%[b], #96]\n\t"
06776         "ldr    r6, [%[b], #100]\n\t"
06777         "sbc    r3, r5\n\t"
06778         "sbc    r4, r6\n\t"
06779         "str    r3, [%[a], #96]\n\t"
06780         "str    r4, [%[a], #100]\n\t"
06781         "ldr    r3, [%[a], #104]\n\t"
06782         "ldr    r4, [%[a], #108]\n\t"
06783         "ldr    r5, [%[b], #104]\n\t"
06784         "ldr    r6, [%[b], #108]\n\t"
06785         "sbc    r3, r5\n\t"
06786         "sbc    r4, r6\n\t"
06787         "str    r3, [%[a], #104]\n\t"
06788         "str    r4, [%[a], #108]\n\t"
06789         "ldr    r3, [%[a], #112]\n\t"
06790         "ldr    r4, [%[a], #116]\n\t"
06791         "ldr    r5, [%[b], #112]\n\t"
06792         "ldr    r6, [%[b], #116]\n\t"
06793         "sbc    r3, r5\n\t"
06794         "sbc    r4, r6\n\t"
06795         "str    r3, [%[a], #112]\n\t"
06796         "str    r4, [%[a], #116]\n\t"
06797         "ldr    r3, [%[a], #120]\n\t"
06798         "ldr    r4, [%[a], #124]\n\t"
06799         "ldr    r5, [%[b], #120]\n\t"
06800         "ldr    r6, [%[b], #124]\n\t"
06801         "sbc    r3, r5\n\t"
06802         "sbc    r4, r6\n\t"
06803         "str    r3, [%[a], #120]\n\t"
06804         "str    r4, [%[a], #124]\n\t"
06805         "sbc    %[c], %[c]\n\t"
06806         "add    %[a], #0x80\n\t"
06807         "add    %[b], #0x80\n\t"
06808         "mov    r5, #0\n\t"
06809         "sub    r5, %[c]\n\t"
06810         "ldr    r3, [%[a], #0]\n\t"
06811         "ldr    r4, [%[a], #4]\n\t"
06812         "ldr    r5, [%[b], #0]\n\t"
06813         "ldr    r6, [%[b], #4]\n\t"
06814         "sbc    r3, r5\n\t"
06815         "sbc    r4, r6\n\t"
06816         "str    r3, [%[a], #0]\n\t"
06817         "str    r4, [%[a], #4]\n\t"
06818         "ldr    r3, [%[a], #8]\n\t"
06819         "ldr    r4, [%[a], #12]\n\t"
06820         "ldr    r5, [%[b], #8]\n\t"
06821         "ldr    r6, [%[b], #12]\n\t"
06822         "sbc    r3, r5\n\t"
06823         "sbc    r4, r6\n\t"
06824         "str    r3, [%[a], #8]\n\t"
06825         "str    r4, [%[a], #12]\n\t"
06826         "ldr    r3, [%[a], #16]\n\t"
06827         "ldr    r4, [%[a], #20]\n\t"
06828         "ldr    r5, [%[b], #16]\n\t"
06829         "ldr    r6, [%[b], #20]\n\t"
06830         "sbc    r3, r5\n\t"
06831         "sbc    r4, r6\n\t"
06832         "str    r3, [%[a], #16]\n\t"
06833         "str    r4, [%[a], #20]\n\t"
06834         "ldr    r3, [%[a], #24]\n\t"
06835         "ldr    r4, [%[a], #28]\n\t"
06836         "ldr    r5, [%[b], #24]\n\t"
06837         "ldr    r6, [%[b], #28]\n\t"
06838         "sbc    r3, r5\n\t"
06839         "sbc    r4, r6\n\t"
06840         "str    r3, [%[a], #24]\n\t"
06841         "str    r4, [%[a], #28]\n\t"
06842         "ldr    r3, [%[a], #32]\n\t"
06843         "ldr    r4, [%[a], #36]\n\t"
06844         "ldr    r5, [%[b], #32]\n\t"
06845         "ldr    r6, [%[b], #36]\n\t"
06846         "sbc    r3, r5\n\t"
06847         "sbc    r4, r6\n\t"
06848         "str    r3, [%[a], #32]\n\t"
06849         "str    r4, [%[a], #36]\n\t"
06850         "ldr    r3, [%[a], #40]\n\t"
06851         "ldr    r4, [%[a], #44]\n\t"
06852         "ldr    r5, [%[b], #40]\n\t"
06853         "ldr    r6, [%[b], #44]\n\t"
06854         "sbc    r3, r5\n\t"
06855         "sbc    r4, r6\n\t"
06856         "str    r3, [%[a], #40]\n\t"
06857         "str    r4, [%[a], #44]\n\t"
06858         "ldr    r3, [%[a], #48]\n\t"
06859         "ldr    r4, [%[a], #52]\n\t"
06860         "ldr    r5, [%[b], #48]\n\t"
06861         "ldr    r6, [%[b], #52]\n\t"
06862         "sbc    r3, r5\n\t"
06863         "sbc    r4, r6\n\t"
06864         "str    r3, [%[a], #48]\n\t"
06865         "str    r4, [%[a], #52]\n\t"
06866         "ldr    r3, [%[a], #56]\n\t"
06867         "ldr    r4, [%[a], #60]\n\t"
06868         "ldr    r5, [%[b], #56]\n\t"
06869         "ldr    r6, [%[b], #60]\n\t"
06870         "sbc    r3, r5\n\t"
06871         "sbc    r4, r6\n\t"
06872         "str    r3, [%[a], #56]\n\t"
06873         "str    r4, [%[a], #60]\n\t"
06874         "ldr    r3, [%[a], #64]\n\t"
06875         "ldr    r4, [%[a], #68]\n\t"
06876         "ldr    r5, [%[b], #64]\n\t"
06877         "ldr    r6, [%[b], #68]\n\t"
06878         "sbc    r3, r5\n\t"
06879         "sbc    r4, r6\n\t"
06880         "str    r3, [%[a], #64]\n\t"
06881         "str    r4, [%[a], #68]\n\t"
06882         "ldr    r3, [%[a], #72]\n\t"
06883         "ldr    r4, [%[a], #76]\n\t"
06884         "ldr    r5, [%[b], #72]\n\t"
06885         "ldr    r6, [%[b], #76]\n\t"
06886         "sbc    r3, r5\n\t"
06887         "sbc    r4, r6\n\t"
06888         "str    r3, [%[a], #72]\n\t"
06889         "str    r4, [%[a], #76]\n\t"
06890         "ldr    r3, [%[a], #80]\n\t"
06891         "ldr    r4, [%[a], #84]\n\t"
06892         "ldr    r5, [%[b], #80]\n\t"
06893         "ldr    r6, [%[b], #84]\n\t"
06894         "sbc    r3, r5\n\t"
06895         "sbc    r4, r6\n\t"
06896         "str    r3, [%[a], #80]\n\t"
06897         "str    r4, [%[a], #84]\n\t"
06898         "ldr    r3, [%[a], #88]\n\t"
06899         "ldr    r4, [%[a], #92]\n\t"
06900         "ldr    r5, [%[b], #88]\n\t"
06901         "ldr    r6, [%[b], #92]\n\t"
06902         "sbc    r3, r5\n\t"
06903         "sbc    r4, r6\n\t"
06904         "str    r3, [%[a], #88]\n\t"
06905         "str    r4, [%[a], #92]\n\t"
06906         "ldr    r3, [%[a], #96]\n\t"
06907         "ldr    r4, [%[a], #100]\n\t"
06908         "ldr    r5, [%[b], #96]\n\t"
06909         "ldr    r6, [%[b], #100]\n\t"
06910         "sbc    r3, r5\n\t"
06911         "sbc    r4, r6\n\t"
06912         "str    r3, [%[a], #96]\n\t"
06913         "str    r4, [%[a], #100]\n\t"
06914         "ldr    r3, [%[a], #104]\n\t"
06915         "ldr    r4, [%[a], #108]\n\t"
06916         "ldr    r5, [%[b], #104]\n\t"
06917         "ldr    r6, [%[b], #108]\n\t"
06918         "sbc    r3, r5\n\t"
06919         "sbc    r4, r6\n\t"
06920         "str    r3, [%[a], #104]\n\t"
06921         "str    r4, [%[a], #108]\n\t"
06922         "ldr    r3, [%[a], #112]\n\t"
06923         "ldr    r4, [%[a], #116]\n\t"
06924         "ldr    r5, [%[b], #112]\n\t"
06925         "ldr    r6, [%[b], #116]\n\t"
06926         "sbc    r3, r5\n\t"
06927         "sbc    r4, r6\n\t"
06928         "str    r3, [%[a], #112]\n\t"
06929         "str    r4, [%[a], #116]\n\t"
06930         "ldr    r3, [%[a], #120]\n\t"
06931         "ldr    r4, [%[a], #124]\n\t"
06932         "ldr    r5, [%[b], #120]\n\t"
06933         "ldr    r6, [%[b], #124]\n\t"
06934         "sbc    r3, r5\n\t"
06935         "sbc    r4, r6\n\t"
06936         "str    r3, [%[a], #120]\n\t"
06937         "str    r4, [%[a], #124]\n\t"
06938         "sbc    %[c], %[c]\n\t"
06939         "add    %[a], #0x80\n\t"
06940         "add    %[b], #0x80\n\t"
06941         "mov    r5, #0\n\t"
06942         "sub    r5, %[c]\n\t"
06943         "ldr    r3, [%[a], #0]\n\t"
06944         "ldr    r4, [%[a], #4]\n\t"
06945         "ldr    r5, [%[b], #0]\n\t"
06946         "ldr    r6, [%[b], #4]\n\t"
06947         "sbc    r3, r5\n\t"
06948         "sbc    r4, r6\n\t"
06949         "str    r3, [%[a], #0]\n\t"
06950         "str    r4, [%[a], #4]\n\t"
06951         "ldr    r3, [%[a], #8]\n\t"
06952         "ldr    r4, [%[a], #12]\n\t"
06953         "ldr    r5, [%[b], #8]\n\t"
06954         "ldr    r6, [%[b], #12]\n\t"
06955         "sbc    r3, r5\n\t"
06956         "sbc    r4, r6\n\t"
06957         "str    r3, [%[a], #8]\n\t"
06958         "str    r4, [%[a], #12]\n\t"
06959         "ldr    r3, [%[a], #16]\n\t"
06960         "ldr    r4, [%[a], #20]\n\t"
06961         "ldr    r5, [%[b], #16]\n\t"
06962         "ldr    r6, [%[b], #20]\n\t"
06963         "sbc    r3, r5\n\t"
06964         "sbc    r4, r6\n\t"
06965         "str    r3, [%[a], #16]\n\t"
06966         "str    r4, [%[a], #20]\n\t"
06967         "ldr    r3, [%[a], #24]\n\t"
06968         "ldr    r4, [%[a], #28]\n\t"
06969         "ldr    r5, [%[b], #24]\n\t"
06970         "ldr    r6, [%[b], #28]\n\t"
06971         "sbc    r3, r5\n\t"
06972         "sbc    r4, r6\n\t"
06973         "str    r3, [%[a], #24]\n\t"
06974         "str    r4, [%[a], #28]\n\t"
06975         "ldr    r3, [%[a], #32]\n\t"
06976         "ldr    r4, [%[a], #36]\n\t"
06977         "ldr    r5, [%[b], #32]\n\t"
06978         "ldr    r6, [%[b], #36]\n\t"
06979         "sbc    r3, r5\n\t"
06980         "sbc    r4, r6\n\t"
06981         "str    r3, [%[a], #32]\n\t"
06982         "str    r4, [%[a], #36]\n\t"
06983         "ldr    r3, [%[a], #40]\n\t"
06984         "ldr    r4, [%[a], #44]\n\t"
06985         "ldr    r5, [%[b], #40]\n\t"
06986         "ldr    r6, [%[b], #44]\n\t"
06987         "sbc    r3, r5\n\t"
06988         "sbc    r4, r6\n\t"
06989         "str    r3, [%[a], #40]\n\t"
06990         "str    r4, [%[a], #44]\n\t"
06991         "ldr    r3, [%[a], #48]\n\t"
06992         "ldr    r4, [%[a], #52]\n\t"
06993         "ldr    r5, [%[b], #48]\n\t"
06994         "ldr    r6, [%[b], #52]\n\t"
06995         "sbc    r3, r5\n\t"
06996         "sbc    r4, r6\n\t"
06997         "str    r3, [%[a], #48]\n\t"
06998         "str    r4, [%[a], #52]\n\t"
06999         "ldr    r3, [%[a], #56]\n\t"
07000         "ldr    r4, [%[a], #60]\n\t"
07001         "ldr    r5, [%[b], #56]\n\t"
07002         "ldr    r6, [%[b], #60]\n\t"
07003         "sbc    r3, r5\n\t"
07004         "sbc    r4, r6\n\t"
07005         "str    r3, [%[a], #56]\n\t"
07006         "str    r4, [%[a], #60]\n\t"
07007         "ldr    r3, [%[a], #64]\n\t"
07008         "ldr    r4, [%[a], #68]\n\t"
07009         "ldr    r5, [%[b], #64]\n\t"
07010         "ldr    r6, [%[b], #68]\n\t"
07011         "sbc    r3, r5\n\t"
07012         "sbc    r4, r6\n\t"
07013         "str    r3, [%[a], #64]\n\t"
07014         "str    r4, [%[a], #68]\n\t"
07015         "ldr    r3, [%[a], #72]\n\t"
07016         "ldr    r4, [%[a], #76]\n\t"
07017         "ldr    r5, [%[b], #72]\n\t"
07018         "ldr    r6, [%[b], #76]\n\t"
07019         "sbc    r3, r5\n\t"
07020         "sbc    r4, r6\n\t"
07021         "str    r3, [%[a], #72]\n\t"
07022         "str    r4, [%[a], #76]\n\t"
07023         "ldr    r3, [%[a], #80]\n\t"
07024         "ldr    r4, [%[a], #84]\n\t"
07025         "ldr    r5, [%[b], #80]\n\t"
07026         "ldr    r6, [%[b], #84]\n\t"
07027         "sbc    r3, r5\n\t"
07028         "sbc    r4, r6\n\t"
07029         "str    r3, [%[a], #80]\n\t"
07030         "str    r4, [%[a], #84]\n\t"
07031         "ldr    r3, [%[a], #88]\n\t"
07032         "ldr    r4, [%[a], #92]\n\t"
07033         "ldr    r5, [%[b], #88]\n\t"
07034         "ldr    r6, [%[b], #92]\n\t"
07035         "sbc    r3, r5\n\t"
07036         "sbc    r4, r6\n\t"
07037         "str    r3, [%[a], #88]\n\t"
07038         "str    r4, [%[a], #92]\n\t"
07039         "ldr    r3, [%[a], #96]\n\t"
07040         "ldr    r4, [%[a], #100]\n\t"
07041         "ldr    r5, [%[b], #96]\n\t"
07042         "ldr    r6, [%[b], #100]\n\t"
07043         "sbc    r3, r5\n\t"
07044         "sbc    r4, r6\n\t"
07045         "str    r3, [%[a], #96]\n\t"
07046         "str    r4, [%[a], #100]\n\t"
07047         "ldr    r3, [%[a], #104]\n\t"
07048         "ldr    r4, [%[a], #108]\n\t"
07049         "ldr    r5, [%[b], #104]\n\t"
07050         "ldr    r6, [%[b], #108]\n\t"
07051         "sbc    r3, r5\n\t"
07052         "sbc    r4, r6\n\t"
07053         "str    r3, [%[a], #104]\n\t"
07054         "str    r4, [%[a], #108]\n\t"
07055         "ldr    r3, [%[a], #112]\n\t"
07056         "ldr    r4, [%[a], #116]\n\t"
07057         "ldr    r5, [%[b], #112]\n\t"
07058         "ldr    r6, [%[b], #116]\n\t"
07059         "sbc    r3, r5\n\t"
07060         "sbc    r4, r6\n\t"
07061         "str    r3, [%[a], #112]\n\t"
07062         "str    r4, [%[a], #116]\n\t"
07063         "ldr    r3, [%[a], #120]\n\t"
07064         "ldr    r4, [%[a], #124]\n\t"
07065         "ldr    r5, [%[b], #120]\n\t"
07066         "ldr    r6, [%[b], #124]\n\t"
07067         "sbc    r3, r5\n\t"
07068         "sbc    r4, r6\n\t"
07069         "str    r3, [%[a], #120]\n\t"
07070         "str    r4, [%[a], #124]\n\t"
07071         "sbc    %[c], %[c]\n\t"
07072         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
07073         :
07074         : "memory", "r3", "r4", "r5", "r6"
07075     );
07076 
07077     return c;
07078 }
07079 
07080 /* Add b to a into r. (r = a + b)
07081  *
07082  * r  A single precision integer.
07083  * a  A single precision integer.
07084  * b  A single precision integer.
07085  */
07086 SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
07087         const sp_digit* b)
07088 {
07089     sp_digit c = 0;
07090 
07091     __asm__ __volatile__ (
07092         "mov    r7, #0\n\t"
07093         "mvn    r7, r7\n\t"
07094         "ldr    r4, [%[a], #0]\n\t"
07095         "ldr    r5, [%[b], #0]\n\t"
07096         "add    r4, r5\n\t"
07097         "str    r4, [%[r], #0]\n\t"
07098         "ldr    r4, [%[a], #4]\n\t"
07099         "ldr    r5, [%[b], #4]\n\t"
07100         "adc    r4, r5\n\t"
07101         "str    r4, [%[r], #4]\n\t"
07102         "ldr    r4, [%[a], #8]\n\t"
07103         "ldr    r5, [%[b], #8]\n\t"
07104         "adc    r4, r5\n\t"
07105         "str    r4, [%[r], #8]\n\t"
07106         "ldr    r4, [%[a], #12]\n\t"
07107         "ldr    r5, [%[b], #12]\n\t"
07108         "adc    r4, r5\n\t"
07109         "str    r4, [%[r], #12]\n\t"
07110         "ldr    r4, [%[a], #16]\n\t"
07111         "ldr    r5, [%[b], #16]\n\t"
07112         "adc    r4, r5\n\t"
07113         "str    r4, [%[r], #16]\n\t"
07114         "ldr    r4, [%[a], #20]\n\t"
07115         "ldr    r5, [%[b], #20]\n\t"
07116         "adc    r4, r5\n\t"
07117         "str    r4, [%[r], #20]\n\t"
07118         "ldr    r4, [%[a], #24]\n\t"
07119         "ldr    r5, [%[b], #24]\n\t"
07120         "adc    r4, r5\n\t"
07121         "str    r4, [%[r], #24]\n\t"
07122         "ldr    r4, [%[a], #28]\n\t"
07123         "ldr    r5, [%[b], #28]\n\t"
07124         "adc    r4, r5\n\t"
07125         "str    r4, [%[r], #28]\n\t"
07126         "ldr    r4, [%[a], #32]\n\t"
07127         "ldr    r5, [%[b], #32]\n\t"
07128         "adc    r4, r5\n\t"
07129         "str    r4, [%[r], #32]\n\t"
07130         "ldr    r4, [%[a], #36]\n\t"
07131         "ldr    r5, [%[b], #36]\n\t"
07132         "adc    r4, r5\n\t"
07133         "str    r4, [%[r], #36]\n\t"
07134         "ldr    r4, [%[a], #40]\n\t"
07135         "ldr    r5, [%[b], #40]\n\t"
07136         "adc    r4, r5\n\t"
07137         "str    r4, [%[r], #40]\n\t"
07138         "ldr    r4, [%[a], #44]\n\t"
07139         "ldr    r5, [%[b], #44]\n\t"
07140         "adc    r4, r5\n\t"
07141         "str    r4, [%[r], #44]\n\t"
07142         "ldr    r4, [%[a], #48]\n\t"
07143         "ldr    r5, [%[b], #48]\n\t"
07144         "adc    r4, r5\n\t"
07145         "str    r4, [%[r], #48]\n\t"
07146         "ldr    r4, [%[a], #52]\n\t"
07147         "ldr    r5, [%[b], #52]\n\t"
07148         "adc    r4, r5\n\t"
07149         "str    r4, [%[r], #52]\n\t"
07150         "ldr    r4, [%[a], #56]\n\t"
07151         "ldr    r5, [%[b], #56]\n\t"
07152         "adc    r4, r5\n\t"
07153         "str    r4, [%[r], #56]\n\t"
07154         "ldr    r4, [%[a], #60]\n\t"
07155         "ldr    r5, [%[b], #60]\n\t"
07156         "adc    r4, r5\n\t"
07157         "str    r4, [%[r], #60]\n\t"
07158         "ldr    r4, [%[a], #64]\n\t"
07159         "ldr    r5, [%[b], #64]\n\t"
07160         "adc    r4, r5\n\t"
07161         "str    r4, [%[r], #64]\n\t"
07162         "ldr    r4, [%[a], #68]\n\t"
07163         "ldr    r5, [%[b], #68]\n\t"
07164         "adc    r4, r5\n\t"
07165         "str    r4, [%[r], #68]\n\t"
07166         "ldr    r4, [%[a], #72]\n\t"
07167         "ldr    r5, [%[b], #72]\n\t"
07168         "adc    r4, r5\n\t"
07169         "str    r4, [%[r], #72]\n\t"
07170         "ldr    r4, [%[a], #76]\n\t"
07171         "ldr    r5, [%[b], #76]\n\t"
07172         "adc    r4, r5\n\t"
07173         "str    r4, [%[r], #76]\n\t"
07174         "ldr    r4, [%[a], #80]\n\t"
07175         "ldr    r5, [%[b], #80]\n\t"
07176         "adc    r4, r5\n\t"
07177         "str    r4, [%[r], #80]\n\t"
07178         "ldr    r4, [%[a], #84]\n\t"
07179         "ldr    r5, [%[b], #84]\n\t"
07180         "adc    r4, r5\n\t"
07181         "str    r4, [%[r], #84]\n\t"
07182         "ldr    r4, [%[a], #88]\n\t"
07183         "ldr    r5, [%[b], #88]\n\t"
07184         "adc    r4, r5\n\t"
07185         "str    r4, [%[r], #88]\n\t"
07186         "ldr    r4, [%[a], #92]\n\t"
07187         "ldr    r5, [%[b], #92]\n\t"
07188         "adc    r4, r5\n\t"
07189         "str    r4, [%[r], #92]\n\t"
07190         "ldr    r4, [%[a], #96]\n\t"
07191         "ldr    r5, [%[b], #96]\n\t"
07192         "adc    r4, r5\n\t"
07193         "str    r4, [%[r], #96]\n\t"
07194         "ldr    r4, [%[a], #100]\n\t"
07195         "ldr    r5, [%[b], #100]\n\t"
07196         "adc    r4, r5\n\t"
07197         "str    r4, [%[r], #100]\n\t"
07198         "ldr    r4, [%[a], #104]\n\t"
07199         "ldr    r5, [%[b], #104]\n\t"
07200         "adc    r4, r5\n\t"
07201         "str    r4, [%[r], #104]\n\t"
07202         "ldr    r4, [%[a], #108]\n\t"
07203         "ldr    r5, [%[b], #108]\n\t"
07204         "adc    r4, r5\n\t"
07205         "str    r4, [%[r], #108]\n\t"
07206         "ldr    r4, [%[a], #112]\n\t"
07207         "ldr    r5, [%[b], #112]\n\t"
07208         "adc    r4, r5\n\t"
07209         "str    r4, [%[r], #112]\n\t"
07210         "ldr    r4, [%[a], #116]\n\t"
07211         "ldr    r5, [%[b], #116]\n\t"
07212         "adc    r4, r5\n\t"
07213         "str    r4, [%[r], #116]\n\t"
07214         "ldr    r4, [%[a], #120]\n\t"
07215         "ldr    r5, [%[b], #120]\n\t"
07216         "adc    r4, r5\n\t"
07217         "str    r4, [%[r], #120]\n\t"
07218         "ldr    r4, [%[a], #124]\n\t"
07219         "ldr    r5, [%[b], #124]\n\t"
07220         "adc    r4, r5\n\t"
07221         "str    r4, [%[r], #124]\n\t"
07222         "mov    %[c], #0\n\t"
07223         "adc    %[c], %[c]\n\t"
07224         "add    %[a], #0x80\n\t"
07225         "add    %[b], #0x80\n\t"
07226         "add    %[r], #0x80\n\t"
07227         "add    %[c], r7\n\t"
07228         "ldr    r4, [%[a], #0]\n\t"
07229         "ldr    r5, [%[b], #0]\n\t"
07230         "adc    r4, r5\n\t"
07231         "str    r4, [%[r], #0]\n\t"
07232         "ldr    r4, [%[a], #4]\n\t"
07233         "ldr    r5, [%[b], #4]\n\t"
07234         "adc    r4, r5\n\t"
07235         "str    r4, [%[r], #4]\n\t"
07236         "ldr    r4, [%[a], #8]\n\t"
07237         "ldr    r5, [%[b], #8]\n\t"
07238         "adc    r4, r5\n\t"
07239         "str    r4, [%[r], #8]\n\t"
07240         "ldr    r4, [%[a], #12]\n\t"
07241         "ldr    r5, [%[b], #12]\n\t"
07242         "adc    r4, r5\n\t"
07243         "str    r4, [%[r], #12]\n\t"
07244         "ldr    r4, [%[a], #16]\n\t"
07245         "ldr    r5, [%[b], #16]\n\t"
07246         "adc    r4, r5\n\t"
07247         "str    r4, [%[r], #16]\n\t"
07248         "ldr    r4, [%[a], #20]\n\t"
07249         "ldr    r5, [%[b], #20]\n\t"
07250         "adc    r4, r5\n\t"
07251         "str    r4, [%[r], #20]\n\t"
07252         "ldr    r4, [%[a], #24]\n\t"
07253         "ldr    r5, [%[b], #24]\n\t"
07254         "adc    r4, r5\n\t"
07255         "str    r4, [%[r], #24]\n\t"
07256         "ldr    r4, [%[a], #28]\n\t"
07257         "ldr    r5, [%[b], #28]\n\t"
07258         "adc    r4, r5\n\t"
07259         "str    r4, [%[r], #28]\n\t"
07260         "ldr    r4, [%[a], #32]\n\t"
07261         "ldr    r5, [%[b], #32]\n\t"
07262         "adc    r4, r5\n\t"
07263         "str    r4, [%[r], #32]\n\t"
07264         "ldr    r4, [%[a], #36]\n\t"
07265         "ldr    r5, [%[b], #36]\n\t"
07266         "adc    r4, r5\n\t"
07267         "str    r4, [%[r], #36]\n\t"
07268         "ldr    r4, [%[a], #40]\n\t"
07269         "ldr    r5, [%[b], #40]\n\t"
07270         "adc    r4, r5\n\t"
07271         "str    r4, [%[r], #40]\n\t"
07272         "ldr    r4, [%[a], #44]\n\t"
07273         "ldr    r5, [%[b], #44]\n\t"
07274         "adc    r4, r5\n\t"
07275         "str    r4, [%[r], #44]\n\t"
07276         "ldr    r4, [%[a], #48]\n\t"
07277         "ldr    r5, [%[b], #48]\n\t"
07278         "adc    r4, r5\n\t"
07279         "str    r4, [%[r], #48]\n\t"
07280         "ldr    r4, [%[a], #52]\n\t"
07281         "ldr    r5, [%[b], #52]\n\t"
07282         "adc    r4, r5\n\t"
07283         "str    r4, [%[r], #52]\n\t"
07284         "ldr    r4, [%[a], #56]\n\t"
07285         "ldr    r5, [%[b], #56]\n\t"
07286         "adc    r4, r5\n\t"
07287         "str    r4, [%[r], #56]\n\t"
07288         "ldr    r4, [%[a], #60]\n\t"
07289         "ldr    r5, [%[b], #60]\n\t"
07290         "adc    r4, r5\n\t"
07291         "str    r4, [%[r], #60]\n\t"
07292         "ldr    r4, [%[a], #64]\n\t"
07293         "ldr    r5, [%[b], #64]\n\t"
07294         "adc    r4, r5\n\t"
07295         "str    r4, [%[r], #64]\n\t"
07296         "ldr    r4, [%[a], #68]\n\t"
07297         "ldr    r5, [%[b], #68]\n\t"
07298         "adc    r4, r5\n\t"
07299         "str    r4, [%[r], #68]\n\t"
07300         "ldr    r4, [%[a], #72]\n\t"
07301         "ldr    r5, [%[b], #72]\n\t"
07302         "adc    r4, r5\n\t"
07303         "str    r4, [%[r], #72]\n\t"
07304         "ldr    r4, [%[a], #76]\n\t"
07305         "ldr    r5, [%[b], #76]\n\t"
07306         "adc    r4, r5\n\t"
07307         "str    r4, [%[r], #76]\n\t"
07308         "ldr    r4, [%[a], #80]\n\t"
07309         "ldr    r5, [%[b], #80]\n\t"
07310         "adc    r4, r5\n\t"
07311         "str    r4, [%[r], #80]\n\t"
07312         "ldr    r4, [%[a], #84]\n\t"
07313         "ldr    r5, [%[b], #84]\n\t"
07314         "adc    r4, r5\n\t"
07315         "str    r4, [%[r], #84]\n\t"
07316         "ldr    r4, [%[a], #88]\n\t"
07317         "ldr    r5, [%[b], #88]\n\t"
07318         "adc    r4, r5\n\t"
07319         "str    r4, [%[r], #88]\n\t"
07320         "ldr    r4, [%[a], #92]\n\t"
07321         "ldr    r5, [%[b], #92]\n\t"
07322         "adc    r4, r5\n\t"
07323         "str    r4, [%[r], #92]\n\t"
07324         "ldr    r4, [%[a], #96]\n\t"
07325         "ldr    r5, [%[b], #96]\n\t"
07326         "adc    r4, r5\n\t"
07327         "str    r4, [%[r], #96]\n\t"
07328         "ldr    r4, [%[a], #100]\n\t"
07329         "ldr    r5, [%[b], #100]\n\t"
07330         "adc    r4, r5\n\t"
07331         "str    r4, [%[r], #100]\n\t"
07332         "ldr    r4, [%[a], #104]\n\t"
07333         "ldr    r5, [%[b], #104]\n\t"
07334         "adc    r4, r5\n\t"
07335         "str    r4, [%[r], #104]\n\t"
07336         "ldr    r4, [%[a], #108]\n\t"
07337         "ldr    r5, [%[b], #108]\n\t"
07338         "adc    r4, r5\n\t"
07339         "str    r4, [%[r], #108]\n\t"
07340         "ldr    r4, [%[a], #112]\n\t"
07341         "ldr    r5, [%[b], #112]\n\t"
07342         "adc    r4, r5\n\t"
07343         "str    r4, [%[r], #112]\n\t"
07344         "ldr    r4, [%[a], #116]\n\t"
07345         "ldr    r5, [%[b], #116]\n\t"
07346         "adc    r4, r5\n\t"
07347         "str    r4, [%[r], #116]\n\t"
07348         "ldr    r4, [%[a], #120]\n\t"
07349         "ldr    r5, [%[b], #120]\n\t"
07350         "adc    r4, r5\n\t"
07351         "str    r4, [%[r], #120]\n\t"
07352         "ldr    r4, [%[a], #124]\n\t"
07353         "ldr    r5, [%[b], #124]\n\t"
07354         "adc    r4, r5\n\t"
07355         "str    r4, [%[r], #124]\n\t"
07356         "mov    %[c], #0\n\t"
07357         "adc    %[c], %[c]\n\t"
07358         "add    %[a], #0x80\n\t"
07359         "add    %[b], #0x80\n\t"
07360         "add    %[r], #0x80\n\t"
07361         "add    %[c], r7\n\t"
07362         "ldr    r4, [%[a], #0]\n\t"
07363         "ldr    r5, [%[b], #0]\n\t"
07364         "adc    r4, r5\n\t"
07365         "str    r4, [%[r], #0]\n\t"
07366         "ldr    r4, [%[a], #4]\n\t"
07367         "ldr    r5, [%[b], #4]\n\t"
07368         "adc    r4, r5\n\t"
07369         "str    r4, [%[r], #4]\n\t"
07370         "ldr    r4, [%[a], #8]\n\t"
07371         "ldr    r5, [%[b], #8]\n\t"
07372         "adc    r4, r5\n\t"
07373         "str    r4, [%[r], #8]\n\t"
07374         "ldr    r4, [%[a], #12]\n\t"
07375         "ldr    r5, [%[b], #12]\n\t"
07376         "adc    r4, r5\n\t"
07377         "str    r4, [%[r], #12]\n\t"
07378         "ldr    r4, [%[a], #16]\n\t"
07379         "ldr    r5, [%[b], #16]\n\t"
07380         "adc    r4, r5\n\t"
07381         "str    r4, [%[r], #16]\n\t"
07382         "ldr    r4, [%[a], #20]\n\t"
07383         "ldr    r5, [%[b], #20]\n\t"
07384         "adc    r4, r5\n\t"
07385         "str    r4, [%[r], #20]\n\t"
07386         "ldr    r4, [%[a], #24]\n\t"
07387         "ldr    r5, [%[b], #24]\n\t"
07388         "adc    r4, r5\n\t"
07389         "str    r4, [%[r], #24]\n\t"
07390         "ldr    r4, [%[a], #28]\n\t"
07391         "ldr    r5, [%[b], #28]\n\t"
07392         "adc    r4, r5\n\t"
07393         "str    r4, [%[r], #28]\n\t"
07394         "ldr    r4, [%[a], #32]\n\t"
07395         "ldr    r5, [%[b], #32]\n\t"
07396         "adc    r4, r5\n\t"
07397         "str    r4, [%[r], #32]\n\t"
07398         "ldr    r4, [%[a], #36]\n\t"
07399         "ldr    r5, [%[b], #36]\n\t"
07400         "adc    r4, r5\n\t"
07401         "str    r4, [%[r], #36]\n\t"
07402         "ldr    r4, [%[a], #40]\n\t"
07403         "ldr    r5, [%[b], #40]\n\t"
07404         "adc    r4, r5\n\t"
07405         "str    r4, [%[r], #40]\n\t"
07406         "ldr    r4, [%[a], #44]\n\t"
07407         "ldr    r5, [%[b], #44]\n\t"
07408         "adc    r4, r5\n\t"
07409         "str    r4, [%[r], #44]\n\t"
07410         "ldr    r4, [%[a], #48]\n\t"
07411         "ldr    r5, [%[b], #48]\n\t"
07412         "adc    r4, r5\n\t"
07413         "str    r4, [%[r], #48]\n\t"
07414         "ldr    r4, [%[a], #52]\n\t"
07415         "ldr    r5, [%[b], #52]\n\t"
07416         "adc    r4, r5\n\t"
07417         "str    r4, [%[r], #52]\n\t"
07418         "ldr    r4, [%[a], #56]\n\t"
07419         "ldr    r5, [%[b], #56]\n\t"
07420         "adc    r4, r5\n\t"
07421         "str    r4, [%[r], #56]\n\t"
07422         "ldr    r4, [%[a], #60]\n\t"
07423         "ldr    r5, [%[b], #60]\n\t"
07424         "adc    r4, r5\n\t"
07425         "str    r4, [%[r], #60]\n\t"
07426         "ldr    r4, [%[a], #64]\n\t"
07427         "ldr    r5, [%[b], #64]\n\t"
07428         "adc    r4, r5\n\t"
07429         "str    r4, [%[r], #64]\n\t"
07430         "ldr    r4, [%[a], #68]\n\t"
07431         "ldr    r5, [%[b], #68]\n\t"
07432         "adc    r4, r5\n\t"
07433         "str    r4, [%[r], #68]\n\t"
07434         "ldr    r4, [%[a], #72]\n\t"
07435         "ldr    r5, [%[b], #72]\n\t"
07436         "adc    r4, r5\n\t"
07437         "str    r4, [%[r], #72]\n\t"
07438         "ldr    r4, [%[a], #76]\n\t"
07439         "ldr    r5, [%[b], #76]\n\t"
07440         "adc    r4, r5\n\t"
07441         "str    r4, [%[r], #76]\n\t"
07442         "ldr    r4, [%[a], #80]\n\t"
07443         "ldr    r5, [%[b], #80]\n\t"
07444         "adc    r4, r5\n\t"
07445         "str    r4, [%[r], #80]\n\t"
07446         "ldr    r4, [%[a], #84]\n\t"
07447         "ldr    r5, [%[b], #84]\n\t"
07448         "adc    r4, r5\n\t"
07449         "str    r4, [%[r], #84]\n\t"
07450         "ldr    r4, [%[a], #88]\n\t"
07451         "ldr    r5, [%[b], #88]\n\t"
07452         "adc    r4, r5\n\t"
07453         "str    r4, [%[r], #88]\n\t"
07454         "ldr    r4, [%[a], #92]\n\t"
07455         "ldr    r5, [%[b], #92]\n\t"
07456         "adc    r4, r5\n\t"
07457         "str    r4, [%[r], #92]\n\t"
07458         "ldr    r4, [%[a], #96]\n\t"
07459         "ldr    r5, [%[b], #96]\n\t"
07460         "adc    r4, r5\n\t"
07461         "str    r4, [%[r], #96]\n\t"
07462         "ldr    r4, [%[a], #100]\n\t"
07463         "ldr    r5, [%[b], #100]\n\t"
07464         "adc    r4, r5\n\t"
07465         "str    r4, [%[r], #100]\n\t"
07466         "ldr    r4, [%[a], #104]\n\t"
07467         "ldr    r5, [%[b], #104]\n\t"
07468         "adc    r4, r5\n\t"
07469         "str    r4, [%[r], #104]\n\t"
07470         "ldr    r4, [%[a], #108]\n\t"
07471         "ldr    r5, [%[b], #108]\n\t"
07472         "adc    r4, r5\n\t"
07473         "str    r4, [%[r], #108]\n\t"
07474         "ldr    r4, [%[a], #112]\n\t"
07475         "ldr    r5, [%[b], #112]\n\t"
07476         "adc    r4, r5\n\t"
07477         "str    r4, [%[r], #112]\n\t"
07478         "ldr    r4, [%[a], #116]\n\t"
07479         "ldr    r5, [%[b], #116]\n\t"
07480         "adc    r4, r5\n\t"
07481         "str    r4, [%[r], #116]\n\t"
07482         "ldr    r4, [%[a], #120]\n\t"
07483         "ldr    r5, [%[b], #120]\n\t"
07484         "adc    r4, r5\n\t"
07485         "str    r4, [%[r], #120]\n\t"
07486         "ldr    r4, [%[a], #124]\n\t"
07487         "ldr    r5, [%[b], #124]\n\t"
07488         "adc    r4, r5\n\t"
07489         "str    r4, [%[r], #124]\n\t"
07490         "mov    %[c], #0\n\t"
07491         "adc    %[c], %[c]\n\t"
07492         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
07493         :
07494         : "memory", "r4", "r5", "r7"
07495     );
07496 
07497     return c;
07498 }
07499 
07500 /* AND m into each word of a and store in r.
07501  *
07502  * r  A single precision integer.
07503  * a  A single precision integer.
07504  * m  Mask to AND against each digit.
07505  */
07506 static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
07507 {
07508 #ifdef WOLFSSL_SP_SMALL
07509     int i;
07510 
07511     for (i=0; i<48; i++) {
07512         r[i] = a[i] & m;
07513     }
07514 #else
07515     int i;
07516 
07517     for (i = 0; i < 48; i += 8) {
07518         r[i+0] = a[i+0] & m;
07519         r[i+1] = a[i+1] & m;
07520         r[i+2] = a[i+2] & m;
07521         r[i+3] = a[i+3] & m;
07522         r[i+4] = a[i+4] & m;
07523         r[i+5] = a[i+5] & m;
07524         r[i+6] = a[i+6] & m;
07525         r[i+7] = a[i+7] & m;
07526     }
07527 #endif
07528 }
07529 
07530 /* Multiply a and b into r. (r = a * b)
07531  *
07532  * r  A single precision integer.
07533  * a  A single precision integer.
07534  * b  A single precision integer.
07535  */
07536 SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
07537         const sp_digit* b)
07538 {
07539     sp_digit* z0 = r;
07540     sp_digit z1[96];
07541     sp_digit a1[48];
07542     sp_digit b1[48];
07543     sp_digit z2[96];
07544     sp_digit u, ca, cb;
07545 
07546     ca = sp_3072_add_48(a1, a, &a[48]);
07547     cb = sp_3072_add_48(b1, b, &b[48]);
07548     u  = ca & cb;
07549     sp_3072_mul_48(z1, a1, b1);
07550     sp_3072_mul_48(z2, &a[48], &b[48]);
07551     sp_3072_mul_48(z0, a, b);
07552     sp_3072_mask_48(r + 96, a1, 0 - cb);
07553     sp_3072_mask_48(b1, b1, 0 - ca);
07554     u += sp_3072_add_48(r + 96, r + 96, b1);
07555     u += sp_3072_sub_in_place_96(z1, z2);
07556     u += sp_3072_sub_in_place_96(z1, z0);
07557     u += sp_3072_add_96(r + 48, r + 48, z1);
07558     r[144] = u;
07559     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
07560     (void)sp_3072_add_96(r + 96, r + 96, z2);
07561 }
07562 
07563 /* Square a and put result in r. (r = a * a)
07564  *
07565  * r  A single precision integer.
07566  * a  A single precision integer.
07567  */
07568 SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
07569 {
07570     sp_digit* z0 = r;
07571     sp_digit z2[96];
07572     sp_digit z1[96];
07573     sp_digit a1[48];
07574     sp_digit u;
07575 
07576     u = sp_3072_add_48(a1, a, &a[48]);
07577     sp_3072_sqr_48(z1, a1);
07578     sp_3072_sqr_48(z2, &a[48]);
07579     sp_3072_sqr_48(z0, a);
07580     sp_3072_mask_48(r + 96, a1, 0 - u);
07581     u += sp_3072_add_48(r + 96, r + 96, r + 96);
07582     u += sp_3072_sub_in_place_96(z1, z2);
07583     u += sp_3072_sub_in_place_96(z1, z0);
07584     u += sp_3072_add_96(r + 48, r + 48, z1);
07585     r[144] = u;
07586     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
07587     (void)sp_3072_add_96(r + 96, r + 96, z2);
07588 }
07589 
07590 #endif /* !WOLFSSL_SP_SMALL */
07591 #ifdef WOLFSSL_SP_SMALL
07592 /* Add b to a into r. (r = a + b)
07593  *
07594  * r  A single precision integer.
07595  * a  A single precision integer.
07596  * b  A single precision integer.
07597  */
07598 SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
07599         const sp_digit* b)
07600 {
07601     sp_digit c = 0;
07602 
07603     __asm__ __volatile__ (
07604         "mov    r6, %[a]\n\t"
07605         "mov    r7, #0\n\t"
07606         "mov    r4, #1\n\t"
07607         "lsl    r4, #8\n\t"
07608         "add    r4, #128\n\t"
07609         "sub    r7, #1\n\t"
07610         "add    r6, r4\n\t"
07611         "\n1:\n\t"
07612         "add    %[c], r7\n\t"
07613         "ldr    r4, [%[a]]\n\t"
07614         "ldr    r5, [%[b]]\n\t"
07615         "adc    r4, r5\n\t"
07616         "str    r4, [%[r]]\n\t"
07617         "mov    %[c], #0\n\t"
07618         "adc    %[c], %[c]\n\t"
07619         "add    %[a], #4\n\t"
07620         "add    %[b], #4\n\t"
07621         "add    %[r], #4\n\t"
07622         "cmp    %[a], r6\n\t"
07623         "bne    1b\n\t"
07624         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
07625         :
07626         : "memory", "r4", "r5", "r6", "r7"
07627     );
07628 
07629     return c;
07630 }
07631 
07632 #endif /* WOLFSSL_SP_SMALL */
07633 #ifdef WOLFSSL_SP_SMALL
07634 /* Sub b from a into a. (a -= b)
07635  *
07636  * a  A single precision integer.
07637  * b  A single precision integer.
07638  */
07639 SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
07640         const sp_digit* b)
07641 {
07642     sp_digit c = 0;
07643     __asm__ __volatile__ (
07644         "mov    r7, %[a]\n\t"
07645         "mov    r5, #1\n\t"
07646         "lsl    r5, #8\n\t"
07647         "add    r5, #128\n\t"
07648         "add    r7, r5\n\t"
07649         "\n1:\n\t"
07650         "mov    r5, #0\n\t"
07651         "sub    r5, %[c]\n\t"
07652         "ldr    r3, [%[a]]\n\t"
07653         "ldr    r4, [%[a], #4]\n\t"
07654         "ldr    r5, [%[b]]\n\t"
07655         "ldr    r6, [%[b], #4]\n\t"
07656         "sbc    r3, r5\n\t"
07657         "sbc    r4, r6\n\t"
07658         "str    r3, [%[a]]\n\t"
07659         "str    r4, [%[a], #4]\n\t"
07660         "sbc    %[c], %[c]\n\t"
07661         "add    %[a], #8\n\t"
07662         "add    %[b], #8\n\t"
07663         "cmp    %[a], r7\n\t"
07664         "bne    1b\n\t"
07665         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
07666         :
07667         : "memory", "r3", "r4", "r5", "r6", "r7"
07668     );
07669 
07670     return c;
07671 }
07672 
07673 #endif /* WOLFSSL_SP_SMALL */
07674 #ifdef WOLFSSL_SP_SMALL
07675 /* Multiply a and b into r. (r = a * b)
07676  *
07677  * r  A single precision integer.
07678  * a  A single precision integer.
07679  * b  A single precision integer.
07680  */
07681 SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
07682         const sp_digit* b)
07683 {
07684     sp_digit tmp[96 * 2];
07685     __asm__ __volatile__ (
07686         "mov    r3, #0\n\t"
07687         "mov    r4, #0\n\t"
07688         "mov    r8, r3\n\t"
07689         "mov    r11, %[r]\n\t"
07690         "mov    r9, %[a]\n\t"
07691         "mov    r10, %[b]\n\t"
07692         "mov    r6, #1\n\t"
07693         "lsl    r6, r6, #8\n\t"
07694         "add    r6, #128\n\t"
07695         "add    r6, r9\n\t"
07696         "mov    r12, r6\n\t"
07697         "\n1:\n\t"
07698         "mov    %[r], #0\n\t"
07699         "mov    r5, #0\n\t"
07700         "mov    r6, #1\n\t"
07701         "lsl    r6, r6, #8\n\t"
07702         "add    r6, #124\n\t"
07703         "mov    %[a], r8\n\t"
07704         "sub    %[a], r6\n\t"
07705         "sbc    r6, r6\n\t"
07706         "mvn    r6, r6\n\t"
07707         "and    %[a], r6\n\t"
07708         "mov    %[b], r8\n\t"
07709         "sub    %[b], %[a]\n\t"
07710         "add    %[a], r9\n\t"
07711         "add    %[b], r10\n\t"
07712         "\n2:\n\t"
07713         "# Multiply Start\n\t"
07714         "ldr    r6, [%[a]]\n\t"
07715         "ldr    r7, [%[b]]\n\t"
07716         "lsl    r6, r6, #16\n\t"
07717         "lsl    r7, r7, #16\n\t"
07718         "lsr    r6, r6, #16\n\t"
07719         "lsr    r7, r7, #16\n\t"
07720         "mul    r7, r6\n\t"
07721         "add    r3, r7\n\t"
07722         "adc    r4, %[r]\n\t"
07723         "adc    r5, %[r]\n\t"
07724         "ldr    r7, [%[b]]\n\t"
07725         "lsr    r7, r7, #16\n\t"
07726         "mul    r6, r7\n\t"
07727         "lsr    r7, r6, #16\n\t"
07728         "lsl    r6, r6, #16\n\t"
07729         "add    r3, r6\n\t"
07730         "adc    r4, r7\n\t"
07731         "adc    r5, %[r]\n\t"
07732         "ldr    r6, [%[a]]\n\t"
07733         "ldr    r7, [%[b]]\n\t"
07734         "lsr    r6, r6, #16\n\t"
07735         "lsr    r7, r7, #16\n\t"
07736         "mul    r7, r6\n\t"
07737         "add    r4, r7\n\t"
07738         "adc    r5, %[r]\n\t"
07739         "ldr    r7, [%[b]]\n\t"
07740         "lsl    r7, r7, #16\n\t"
07741         "lsr    r7, r7, #16\n\t"
07742         "mul    r6, r7\n\t"
07743         "lsr    r7, r6, #16\n\t"
07744         "lsl    r6, r6, #16\n\t"
07745         "add    r3, r6\n\t"
07746         "adc    r4, r7\n\t"
07747         "adc    r5, %[r]\n\t"
07748         "# Multiply Done\n\t"
07749         "add    %[a], #4\n\t"
07750         "sub    %[b], #4\n\t"
07751         "cmp    %[a], r12\n\t"
07752         "beq    3f\n\t"
07753         "mov    r6, r8\n\t"
07754         "add    r6, r9\n\t"
07755         "cmp    %[a], r6\n\t"
07756         "ble    2b\n\t"
07757         "\n3:\n\t"
07758         "mov    %[r], r11\n\t"
07759         "mov    r7, r8\n\t"
07760         "str    r3, [%[r], r7]\n\t"
07761         "mov    r3, r4\n\t"
07762         "mov    r4, r5\n\t"
07763         "add    r7, #4\n\t"
07764         "mov    r8, r7\n\t"
07765         "mov    r6, #2\n\t"
07766         "lsl    r6, r6, #8\n\t"
07767         "add    r6, #248\n\t"
07768         "cmp    r7, r6\n\t"
07769         "ble    1b\n\t"
07770         "str    r3, [%[r], r7]\n\t"
07771         "mov    %[a], r9\n\t"
07772         "mov    %[b], r10\n\t"
07773         :
07774         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
07775         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
07776     );
07777 
07778     XMEMCPY(r, tmp, sizeof(tmp));
07779 }
07780 
07781 /* Square a and put result in r. (r = a * a)
07782  *
07783  * r  A single precision integer.
07784  * a  A single precision integer.
07785  */
07786 SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
07787 {
07788     __asm__ __volatile__ (
07789         "mov    r3, #0\n\t"
07790         "mov    r4, #0\n\t"
07791         "mov    r5, #0\n\t"
07792         "mov    r8, r3\n\t"
07793         "mov    r11, %[r]\n\t"
07794         "mov    r6, #3\n\t"
07795         "lsl    r6, r6, #8\n\t"
07796         "neg    r6, r6\n\t"
07797         "add    sp, r6\n\t"
07798         "mov    r10, sp\n\t"
07799         "mov    r9, %[a]\n\t"
07800         "\n1:\n\t"
07801         "mov    %[r], #0\n\t"
07802         "mov    r6, #1\n\t"
07803         "lsl    r6, r6, #8\n\t"
07804         "add    r6, #124\n\t"
07805         "mov    %[a], r8\n\t"
07806         "sub    %[a], r6\n\t"
07807         "sbc    r6, r6\n\t"
07808         "mvn    r6, r6\n\t"
07809         "and    %[a], r6\n\t"
07810         "mov    r2, r8\n\t"
07811         "sub    r2, %[a]\n\t"
07812         "add    %[a], r9\n\t"
07813         "add    r2, r9\n\t"
07814         "\n2:\n\t"
07815         "cmp    r2, %[a]\n\t"
07816         "beq    4f\n\t"
07817         "# Multiply * 2: Start\n\t"
07818         "ldr    r6, [%[a]]\n\t"
07819         "ldr    r7, [r2]\n\t"
07820         "lsl    r6, r6, #16\n\t"
07821         "lsl    r7, r7, #16\n\t"
07822         "lsr    r6, r6, #16\n\t"
07823         "lsr    r7, r7, #16\n\t"
07824         "mul    r7, r6\n\t"
07825         "add    r3, r7\n\t"
07826         "adc    r4, %[r]\n\t"
07827         "adc    r5, %[r]\n\t"
07828         "add    r3, r7\n\t"
07829         "adc    r4, %[r]\n\t"
07830         "adc    r5, %[r]\n\t"
07831         "ldr    r7, [r2]\n\t"
07832         "lsr    r7, r7, #16\n\t"
07833         "mul    r6, r7\n\t"
07834         "lsr    r7, r6, #16\n\t"
07835         "lsl    r6, r6, #16\n\t"
07836         "add    r3, r6\n\t"
07837         "adc    r4, r7\n\t"
07838         "adc    r5, %[r]\n\t"
07839         "add    r3, r6\n\t"
07840         "adc    r4, r7\n\t"
07841         "adc    r5, %[r]\n\t"
07842         "ldr    r6, [%[a]]\n\t"
07843         "ldr    r7, [r2]\n\t"
07844         "lsr    r6, r6, #16\n\t"
07845         "lsr    r7, r7, #16\n\t"
07846         "mul    r7, r6\n\t"
07847         "add    r4, r7\n\t"
07848         "adc    r5, %[r]\n\t"
07849         "add    r4, r7\n\t"
07850         "adc    r5, %[r]\n\t"
07851         "ldr    r7, [r2]\n\t"
07852         "lsl    r7, r7, #16\n\t"
07853         "lsr    r7, r7, #16\n\t"
07854         "mul    r6, r7\n\t"
07855         "lsr    r7, r6, #16\n\t"
07856         "lsl    r6, r6, #16\n\t"
07857         "add    r3, r6\n\t"
07858         "adc    r4, r7\n\t"
07859         "adc    r5, %[r]\n\t"
07860         "add    r3, r6\n\t"
07861         "adc    r4, r7\n\t"
07862         "adc    r5, %[r]\n\t"
07863         "# Multiply * 2: Done\n\t"
07864         "bal    5f\n\t"
07865         "\n4:\n\t"
07866         "# Square: Start\n\t"
07867         "ldr    r6, [%[a]]\n\t"
07868         "lsr    r7, r6, #16\n\t"
07869         "lsl    r6, r6, #16\n\t"
07870         "lsr    r6, r6, #16\n\t"
07871         "mul    r6, r6\n\t"
07872         "add    r3, r6\n\t"
07873         "adc    r4, %[r]\n\t"
07874         "adc    r5, %[r]\n\t"
07875         "mul    r7, r7\n\t"
07876         "add    r4, r7\n\t"
07877         "adc    r5, %[r]\n\t"
07878         "ldr    r6, [%[a]]\n\t"
07879         "lsr    r7, r6, #16\n\t"
07880         "lsl    r6, r6, #16\n\t"
07881         "lsr    r6, r6, #16\n\t"
07882         "mul    r6, r7\n\t"
07883         "lsr    r7, r6, #15\n\t"
07884         "lsl    r6, r6, #17\n\t"
07885         "add    r3, r6\n\t"
07886         "adc    r4, r7\n\t"
07887         "adc    r5, %[r]\n\t"
07888         "# Square: Done\n\t"
07889         "\n5:\n\t"
07890         "add    %[a], #4\n\t"
07891         "sub    r2, #4\n\t"
07892         "mov    r6, #1\n\t"
07893         "lsl    r6, r6, #8\n\t"
07894         "add    r6, #128\n\t"
07895         "add    r6, r9\n\t"
07896         "cmp    %[a], r6\n\t"
07897         "beq    3f\n\t"
07898         "cmp    %[a], r2\n\t"
07899         "bgt    3f\n\t"
07900         "mov    r7, r8\n\t"
07901         "add    r7, r9\n\t"
07902         "cmp    %[a], r7\n\t"
07903         "ble    2b\n\t"
07904         "\n3:\n\t"
07905         "mov    %[r], r10\n\t"
07906         "mov    r7, r8\n\t"
07907         "str    r3, [%[r], r7]\n\t"
07908         "mov    r3, r4\n\t"
07909         "mov    r4, r5\n\t"
07910         "mov    r5, #0\n\t"
07911         "add    r7, #4\n\t"
07912         "mov    r8, r7\n\t"
07913         "mov    r6, #2\n\t"
07914         "lsl    r6, r6, #8\n\t"
07915         "add    r6, #248\n\t"
07916         "cmp    r7, r6\n\t"
07917         "ble    1b\n\t"
07918         "mov    %[a], r9\n\t"
07919         "str    r3, [%[r], r7]\n\t"
07920         "mov    %[r], r11\n\t"
07921         "mov    %[a], r10\n\t"
07922         "mov    r3, #2\n\t"
07923         "lsl    r3, r3, #8\n\t"
07924         "add    r3, #252\n\t"
07925         "\n4:\n\t"
07926         "ldr    r6, [%[a], r3]\n\t"
07927         "str    r6, [%[r], r3]\n\t"
07928         "sub    r3, #4\n\t"
07929         "bge    4b\n\t"
07930         "mov    r6, #3\n\t"
07931         "lsl    r6, r6, #8\n\t"
07932         "add    sp, r6\n\t"
07933         :
07934         : [r] "r" (r), [a] "r" (a)
07935         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
07936     );
07937 }
07938 
07939 #endif /* WOLFSSL_SP_SMALL */
07940 #if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
07941 #ifdef WOLFSSL_SP_SMALL
07942 /* AND m into each word of a and store in r.
07943  *
07944  * r  A single precision integer.
07945  * a  A single precision integer.
07946  * m  Mask to AND against each digit.
07947  */
07948 static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
07949 {
07950     int i;
07951 
07952     for (i=0; i<48; i++) {
07953         r[i] = a[i] & m;
07954     }
07955 }
07956 
07957 #endif /* WOLFSSL_SP_SMALL */
07958 #ifdef WOLFSSL_SP_SMALL
07959 /* Add b to a into r. (r = a + b)
07960  *
07961  * r  A single precision integer.
07962  * a  A single precision integer.
07963  * b  A single precision integer.
07964  */
07965 SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
07966         const sp_digit* b)
07967 {
07968     sp_digit c = 0;
07969 
07970     __asm__ __volatile__ (
07971         "mov    r6, %[a]\n\t"
07972         "mov    r7, #0\n\t"
07973         "add    r6, #192\n\t"
07974         "sub    r7, #1\n\t"
07975         "\n1:\n\t"
07976         "add    %[c], r7\n\t"
07977         "ldr    r4, [%[a]]\n\t"
07978         "ldr    r5, [%[b]]\n\t"
07979         "adc    r4, r5\n\t"
07980         "str    r4, [%[r]]\n\t"
07981         "mov    %[c], #0\n\t"
07982         "adc    %[c], %[c]\n\t"
07983         "add    %[a], #4\n\t"
07984         "add    %[b], #4\n\t"
07985         "add    %[r], #4\n\t"
07986         "cmp    %[a], r6\n\t"
07987         "bne    1b\n\t"
07988         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
07989         :
07990         : "memory", "r4", "r5", "r6", "r7"
07991     );
07992 
07993     return c;
07994 }
07995 
07996 #endif /* WOLFSSL_SP_SMALL */
07997 #ifdef WOLFSSL_SP_SMALL
07998 /* Sub b from a into a. (a -= b)
07999  *
08000  * a  A single precision integer.
08001  * b  A single precision integer.
08002  */
08003 SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
08004         const sp_digit* b)
08005 {
08006     sp_digit c = 0;
08007     __asm__ __volatile__ (
08008         "mov    r7, %[a]\n\t"
08009         "add    r7, #192\n\t"
08010         "\n1:\n\t"
08011         "mov    r5, #0\n\t"
08012         "sub    r5, %[c]\n\t"
08013         "ldr    r3, [%[a]]\n\t"
08014         "ldr    r4, [%[a], #4]\n\t"
08015         "ldr    r5, [%[b]]\n\t"
08016         "ldr    r6, [%[b], #4]\n\t"
08017         "sbc    r3, r5\n\t"
08018         "sbc    r4, r6\n\t"
08019         "str    r3, [%[a]]\n\t"
08020         "str    r4, [%[a], #4]\n\t"
08021         "sbc    %[c], %[c]\n\t"
08022         "add    %[a], #8\n\t"
08023         "add    %[b], #8\n\t"
08024         "cmp    %[a], r7\n\t"
08025         "bne    1b\n\t"
08026         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
08027         :
08028         : "memory", "r3", "r4", "r5", "r6", "r7"
08029     );
08030 
08031     return c;
08032 }
08033 
08034 #endif /* WOLFSSL_SP_SMALL */
08035 #ifdef WOLFSSL_SP_SMALL
08036 /* Multiply a and b into r. (r = a * b)
08037  *
08038  * r  A single precision integer.
08039  * a  A single precision integer.
08040  * b  A single precision integer.
08041  */
08042 SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
08043         const sp_digit* b)
08044 {
08045     sp_digit tmp[48 * 2];
08046     __asm__ __volatile__ (
08047         "mov    r3, #0\n\t"
08048         "mov    r4, #0\n\t"
08049         "mov    r8, r3\n\t"
08050         "mov    r11, %[r]\n\t"
08051         "mov    r9, %[a]\n\t"
08052         "mov    r10, %[b]\n\t"
08053         "mov    r6, #192\n\t"
08054         "add    r6, r9\n\t"
08055         "mov    r12, r6\n\t"
08056         "\n1:\n\t"
08057         "mov    %[r], #0\n\t"
08058         "mov    r5, #0\n\t"
08059         "mov    r6, #188\n\t"
08060         "mov    %[a], r8\n\t"
08061         "sub    %[a], r6\n\t"
08062         "sbc    r6, r6\n\t"
08063         "mvn    r6, r6\n\t"
08064         "and    %[a], r6\n\t"
08065         "mov    %[b], r8\n\t"
08066         "sub    %[b], %[a]\n\t"
08067         "add    %[a], r9\n\t"
08068         "add    %[b], r10\n\t"
08069         "\n2:\n\t"
08070         "# Multiply Start\n\t"
08071         "ldr    r6, [%[a]]\n\t"
08072         "ldr    r7, [%[b]]\n\t"
08073         "lsl    r6, r6, #16\n\t"
08074         "lsl    r7, r7, #16\n\t"
08075         "lsr    r6, r6, #16\n\t"
08076         "lsr    r7, r7, #16\n\t"
08077         "mul    r7, r6\n\t"
08078         "add    r3, r7\n\t"
08079         "adc    r4, %[r]\n\t"
08080         "adc    r5, %[r]\n\t"
08081         "ldr    r7, [%[b]]\n\t"
08082         "lsr    r7, r7, #16\n\t"
08083         "mul    r6, r7\n\t"
08084         "lsr    r7, r6, #16\n\t"
08085         "lsl    r6, r6, #16\n\t"
08086         "add    r3, r6\n\t"
08087         "adc    r4, r7\n\t"
08088         "adc    r5, %[r]\n\t"
08089         "ldr    r6, [%[a]]\n\t"
08090         "ldr    r7, [%[b]]\n\t"
08091         "lsr    r6, r6, #16\n\t"
08092         "lsr    r7, r7, #16\n\t"
08093         "mul    r7, r6\n\t"
08094         "add    r4, r7\n\t"
08095         "adc    r5, %[r]\n\t"
08096         "ldr    r7, [%[b]]\n\t"
08097         "lsl    r7, r7, #16\n\t"
08098         "lsr    r7, r7, #16\n\t"
08099         "mul    r6, r7\n\t"
08100         "lsr    r7, r6, #16\n\t"
08101         "lsl    r6, r6, #16\n\t"
08102         "add    r3, r6\n\t"
08103         "adc    r4, r7\n\t"
08104         "adc    r5, %[r]\n\t"
08105         "# Multiply Done\n\t"
08106         "add    %[a], #4\n\t"
08107         "sub    %[b], #4\n\t"
08108         "cmp    %[a], r12\n\t"
08109         "beq    3f\n\t"
08110         "mov    r6, r8\n\t"
08111         "add    r6, r9\n\t"
08112         "cmp    %[a], r6\n\t"
08113         "ble    2b\n\t"
08114         "\n3:\n\t"
08115         "mov    %[r], r11\n\t"
08116         "mov    r7, r8\n\t"
08117         "str    r3, [%[r], r7]\n\t"
08118         "mov    r3, r4\n\t"
08119         "mov    r4, r5\n\t"
08120         "add    r7, #4\n\t"
08121         "mov    r8, r7\n\t"
08122         "mov    r6, #1\n\t"
08123         "lsl    r6, r6, #8\n\t"
08124         "add    r6, #120\n\t"
08125         "cmp    r7, r6\n\t"
08126         "ble    1b\n\t"
08127         "str    r3, [%[r], r7]\n\t"
08128         "mov    %[a], r9\n\t"
08129         "mov    %[b], r10\n\t"
08130         :
08131         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
08132         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
08133     );
08134 
08135     XMEMCPY(r, tmp, sizeof(tmp));
08136 }
08137 
08138 /* Square a and put result in r. (r = a * a)
08139  *
08140  * r  A single precision integer.
08141  * a  A single precision integer.
08142  */
08143 SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
08144 {
08145     __asm__ __volatile__ (
08146         "mov    r3, #0\n\t"
08147         "mov    r4, #0\n\t"
08148         "mov    r5, #0\n\t"
08149         "mov    r8, r3\n\t"
08150         "mov    r11, %[r]\n\t"
08151         "mov    r6, #1\n\t"
08152         "lsl    r6, r6, #8\n\t"
08153         "add    r6, #128\n\t"
08154         "neg    r6, r6\n\t"
08155         "add    sp, r6\n\t"
08156         "mov    r10, sp\n\t"
08157         "mov    r9, %[a]\n\t"
08158         "\n1:\n\t"
08159         "mov    %[r], #0\n\t"
08160         "mov    r6, #188\n\t"
08161         "mov    %[a], r8\n\t"
08162         "sub    %[a], r6\n\t"
08163         "sbc    r6, r6\n\t"
08164         "mvn    r6, r6\n\t"
08165         "and    %[a], r6\n\t"
08166         "mov    r2, r8\n\t"
08167         "sub    r2, %[a]\n\t"
08168         "add    %[a], r9\n\t"
08169         "add    r2, r9\n\t"
08170         "\n2:\n\t"
08171         "cmp    r2, %[a]\n\t"
08172         "beq    4f\n\t"
08173         "# Multiply * 2: Start\n\t"
08174         "ldr    r6, [%[a]]\n\t"
08175         "ldr    r7, [r2]\n\t"
08176         "lsl    r6, r6, #16\n\t"
08177         "lsl    r7, r7, #16\n\t"
08178         "lsr    r6, r6, #16\n\t"
08179         "lsr    r7, r7, #16\n\t"
08180         "mul    r7, r6\n\t"
08181         "add    r3, r7\n\t"
08182         "adc    r4, %[r]\n\t"
08183         "adc    r5, %[r]\n\t"
08184         "add    r3, r7\n\t"
08185         "adc    r4, %[r]\n\t"
08186         "adc    r5, %[r]\n\t"
08187         "ldr    r7, [r2]\n\t"
08188         "lsr    r7, r7, #16\n\t"
08189         "mul    r6, r7\n\t"
08190         "lsr    r7, r6, #16\n\t"
08191         "lsl    r6, r6, #16\n\t"
08192         "add    r3, r6\n\t"
08193         "adc    r4, r7\n\t"
08194         "adc    r5, %[r]\n\t"
08195         "add    r3, r6\n\t"
08196         "adc    r4, r7\n\t"
08197         "adc    r5, %[r]\n\t"
08198         "ldr    r6, [%[a]]\n\t"
08199         "ldr    r7, [r2]\n\t"
08200         "lsr    r6, r6, #16\n\t"
08201         "lsr    r7, r7, #16\n\t"
08202         "mul    r7, r6\n\t"
08203         "add    r4, r7\n\t"
08204         "adc    r5, %[r]\n\t"
08205         "add    r4, r7\n\t"
08206         "adc    r5, %[r]\n\t"
08207         "ldr    r7, [r2]\n\t"
08208         "lsl    r7, r7, #16\n\t"
08209         "lsr    r7, r7, #16\n\t"
08210         "mul    r6, r7\n\t"
08211         "lsr    r7, r6, #16\n\t"
08212         "lsl    r6, r6, #16\n\t"
08213         "add    r3, r6\n\t"
08214         "adc    r4, r7\n\t"
08215         "adc    r5, %[r]\n\t"
08216         "add    r3, r6\n\t"
08217         "adc    r4, r7\n\t"
08218         "adc    r5, %[r]\n\t"
08219         "# Multiply * 2: Done\n\t"
08220         "bal    5f\n\t"
08221         "\n4:\n\t"
08222         "# Square: Start\n\t"
08223         "ldr    r6, [%[a]]\n\t"
08224         "lsr    r7, r6, #16\n\t"
08225         "lsl    r6, r6, #16\n\t"
08226         "lsr    r6, r6, #16\n\t"
08227         "mul    r6, r6\n\t"
08228         "add    r3, r6\n\t"
08229         "adc    r4, %[r]\n\t"
08230         "adc    r5, %[r]\n\t"
08231         "mul    r7, r7\n\t"
08232         "add    r4, r7\n\t"
08233         "adc    r5, %[r]\n\t"
08234         "ldr    r6, [%[a]]\n\t"
08235         "lsr    r7, r6, #16\n\t"
08236         "lsl    r6, r6, #16\n\t"
08237         "lsr    r6, r6, #16\n\t"
08238         "mul    r6, r7\n\t"
08239         "lsr    r7, r6, #15\n\t"
08240         "lsl    r6, r6, #17\n\t"
08241         "add    r3, r6\n\t"
08242         "adc    r4, r7\n\t"
08243         "adc    r5, %[r]\n\t"
08244         "# Square: Done\n\t"
08245         "\n5:\n\t"
08246         "add    %[a], #4\n\t"
08247         "sub    r2, #4\n\t"
08248         "mov    r6, #192\n\t"
08249         "add    r6, r9\n\t"
08250         "cmp    %[a], r6\n\t"
08251         "beq    3f\n\t"
08252         "cmp    %[a], r2\n\t"
08253         "bgt    3f\n\t"
08254         "mov    r7, r8\n\t"
08255         "add    r7, r9\n\t"
08256         "cmp    %[a], r7\n\t"
08257         "ble    2b\n\t"
08258         "\n3:\n\t"
08259         "mov    %[r], r10\n\t"
08260         "mov    r7, r8\n\t"
08261         "str    r3, [%[r], r7]\n\t"
08262         "mov    r3, r4\n\t"
08263         "mov    r4, r5\n\t"
08264         "mov    r5, #0\n\t"
08265         "add    r7, #4\n\t"
08266         "mov    r8, r7\n\t"
08267         "mov    r6, #1\n\t"
08268         "lsl    r6, r6, #8\n\t"
08269         "add    r6, #120\n\t"
08270         "cmp    r7, r6\n\t"
08271         "ble    1b\n\t"
08272         "mov    %[a], r9\n\t"
08273         "str    r3, [%[r], r7]\n\t"
08274         "mov    %[r], r11\n\t"
08275         "mov    %[a], r10\n\t"
08276         "mov    r3, #1\n\t"
08277         "lsl    r3, r3, #8\n\t"
08278         "add    r3, #124\n\t"
08279         "\n4:\n\t"
08280         "ldr    r6, [%[a], r3]\n\t"
08281         "str    r6, [%[r], r3]\n\t"
08282         "sub    r3, #4\n\t"
08283         "bge    4b\n\t"
08284         "mov    r6, #1\n\t"
08285         "lsl    r6, r6, #8\n\t"
08286         "add    r6, #128\n\t"
08287         "add    sp, r6\n\t"
08288         :
08289         : [r] "r" (r), [a] "r" (a)
08290         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
08291     );
08292 }
08293 
08294 #endif /* WOLFSSL_SP_SMALL */
08295 #endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
08296 
08297 /* Caclulate the bottom digit of -1/a mod 2^n.
08298  *
08299  * a    A single precision number.
08300  * rho  Bottom word of inverse.
08301  */
08302 static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
08303 {
08304     sp_digit x, b;
08305 
08306     b = a[0];
08307     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
08308     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
08309     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
08310     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
08311 
08312     /* rho = -1/m mod b */
08313     *rho = -x;
08314 }
08315 
08316 /* Mul a by digit b into r. (r = a * b)
08317  *
08318  * r  A single precision integer.
08319  * a  A single precision integer.
08320  * b  A single precision digit.
08321  */
08322 SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
08323         sp_digit b)
08324 {
08325     __asm__ __volatile__ (
08326         "mov    r6, #1\n\t"
08327         "lsl    r6, r6, #8\n\t"
08328         "add    r6, #128\n\t"
08329         "add    r6, %[a]\n\t"
08330         "mov    r8, %[r]\n\t"
08331         "mov    r9, r6\n\t"
08332         "mov    r3, #0\n\t"
08333         "mov    r4, #0\n\t"
08334         "1:\n\t"
08335         "mov    %[r], #0\n\t"
08336         "mov    r5, #0\n\t"
08337         "# A[] * B\n\t"
08338         "ldr    r6, [%[a]]\n\t"
08339         "lsl    r6, r6, #16\n\t"
08340         "lsl    r7, %[b], #16\n\t"
08341         "lsr    r6, r6, #16\n\t"
08342         "lsr    r7, r7, #16\n\t"
08343         "mul    r7, r6\n\t"
08344         "add    r3, r7\n\t"
08345         "adc    r4, %[r]\n\t"
08346         "adc    r5, %[r]\n\t"
08347         "lsr    r7, %[b], #16\n\t"
08348         "mul    r6, r7\n\t"
08349         "lsr    r7, r6, #16\n\t"
08350         "lsl    r6, r6, #16\n\t"
08351         "add    r3, r6\n\t"
08352         "adc    r4, r7\n\t"
08353         "adc    r5, %[r]\n\t"
08354         "ldr    r6, [%[a]]\n\t"
08355         "lsr    r6, r6, #16\n\t"
08356         "lsr    r7, %[b], #16\n\t"
08357         "mul    r7, r6\n\t"
08358         "add    r4, r7\n\t"
08359         "adc    r5, %[r]\n\t"
08360         "lsl    r7, %[b], #16\n\t"
08361         "lsr    r7, r7, #16\n\t"
08362         "mul    r6, r7\n\t"
08363         "lsr    r7, r6, #16\n\t"
08364         "lsl    r6, r6, #16\n\t"
08365         "add    r3, r6\n\t"
08366         "adc    r4, r7\n\t"
08367         "adc    r5, %[r]\n\t"
08368         "# A[] * B - Done\n\t"
08369         "mov    %[r], r8\n\t"
08370         "str    r3, [%[r]]\n\t"
08371         "mov    r3, r4\n\t"
08372         "mov    r4, r5\n\t"
08373         "add    %[r], #4\n\t"
08374         "add    %[a], #4\n\t"
08375         "mov    r8, %[r]\n\t"
08376         "cmp    %[a], r9\n\t"
08377         "blt    1b\n\t"
08378         "str    r3, [%[r]]\n\t"
08379         : [r] "+r" (r), [a] "+r" (a)
08380         : [b] "r" (b)
08381         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
08382     );
08383 }
08384 
08385 #if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
08386 /* r = 2^n mod m where n is the number of bits to reduce by.
08387  * Given m must be 3072 bits, just need to subtract.
08388  *
08389  * r  A single precision number.
08390  * m  A single precision number.
08391  */
08392 static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
08393 {
08394     XMEMSET(r, 0, sizeof(sp_digit) * 48);
08395 
08396     /* r = 2^n mod m */
08397     sp_3072_sub_in_place_48(r, m);
08398 }
08399 
08400 /* Conditionally subtract b from a using the mask m.
08401  * m is -1 to subtract and 0 when not copying.
08402  *
08403  * r  A single precision number representing condition subtract result.
08404  * a  A single precision number to subtract from.
08405  * b  A single precision number to subtract.
08406  * m  Mask value to apply.
08407  */
08408 SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a,
08409         const sp_digit* b, sp_digit m)
08410 {
08411     sp_digit c = 0;
08412 
08413     __asm__ __volatile__ (
08414         "mov    r5, #192\n\t"
08415         "mov    r8, r5\n\t"
08416         "mov    r7, #0\n\t"
08417         "1:\n\t"
08418         "ldr    r6, [%[b], r7]\n\t"
08419         "and    r6, %[m]\n\t"
08420         "mov    r5, #0\n\t"
08421         "sub    r5, %[c]\n\t"
08422         "ldr    r5, [%[a], r7]\n\t"
08423         "sbc    r5, r6\n\t"
08424         "sbc    %[c], %[c]\n\t"
08425         "str    r5, [%[r], r7]\n\t"
08426         "add    r7, #4\n\t"
08427         "cmp    r7, r8\n\t"
08428         "blt    1b\n\t"
08429         : [c] "+r" (c)
08430         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
08431         : "memory", "r5", "r6", "r7", "r8"
08432     );
08433 
08434     return c;
08435 }
08436 
08437 /* Reduce the number back to 3072 bits using Montgomery reduction.
08438  *
08439  * a   A single precision number to reduce in place.
08440  * m   The single precision number representing the modulus.
08441  * mp  The digit representing the negative inverse of m mod 2^n.
08442  */
08443 SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
08444         sp_digit mp)
08445 {
08446     sp_digit ca = 0;
08447 
08448     __asm__ __volatile__ (
08449         "mov    r8, %[mp]\n\t"
08450         "mov    r12, %[ca]\n\t"
08451         "mov    r14, %[m]\n\t"
08452         "mov    r9, %[a]\n\t"
08453         "mov    r4, #0\n\t"
08454         "# i = 0\n\t"
08455         "mov    r11, r4\n\t"
08456         "\n1:\n\t"
08457         "mov    r5, #0\n\t"
08458         "mov    %[ca], #0\n\t"
08459         "# mu = a[i] * mp\n\t"
08460         "mov    %[mp], r8\n\t"
08461         "ldr    %[a], [%[a]]\n\t"
08462         "mul    %[mp], %[a]\n\t"
08463         "mov    %[m], r14\n\t"
08464         "mov    r10, r9\n\t"
08465         "\n2:\n\t"
08466         "# a[i+j] += m[j] * mu\n\t"
08467         "mov    %[a], r10\n\t"
08468         "ldr    %[a], [%[a]]\n\t"
08469         "mov    %[ca], #0\n\t"
08470         "mov    r4, r5\n\t"
08471         "mov    r5, #0\n\t"
08472         "# Multiply m[j] and mu - Start\n\t"
08473         "ldr    r7, [%[m]]\n\t"
08474         "lsl    r6, %[mp], #16\n\t"
08475         "lsl    r7, r7, #16\n\t"
08476         "lsr    r6, r6, #16\n\t"
08477         "lsr    r7, r7, #16\n\t"
08478         "mul    r7, r6\n\t"
08479         "add    %[a], r7\n\t"
08480         "adc    r5, %[ca]\n\t"
08481         "ldr    r7, [%[m]]\n\t"
08482         "lsr    r7, r7, #16\n\t"
08483         "mul    r6, r7\n\t"
08484         "lsr    r7, r6, #16\n\t"
08485         "lsl    r6, r6, #16\n\t"
08486         "add    %[a], r6\n\t"
08487         "adc    r5, r7\n\t"
08488         "ldr    r7, [%[m]]\n\t"
08489         "lsr    r6, %[mp], #16\n\t"
08490         "lsr    r7, r7, #16\n\t"
08491         "mul    r7, r6\n\t"
08492         "add    r5, r7\n\t"
08493         "ldr    r7, [%[m]]\n\t"
08494         "lsl    r7, r7, #16\n\t"
08495         "lsr    r7, r7, #16\n\t"
08496         "mul    r6, r7\n\t"
08497         "lsr    r7, r6, #16\n\t"
08498         "lsl    r6, r6, #16\n\t"
08499         "add    %[a], r6\n\t"
08500         "adc    r5, r7\n\t"
08501         "# Multiply m[j] and mu - Done\n\t"
08502         "add    r4, %[a]\n\t"
08503         "adc    r5, %[ca]\n\t"
08504         "mov    %[a], r10\n\t"
08505         "str    r4, [%[a]]\n\t"
08506         "mov    r6, #4\n\t"
08507         "add    %[m], #4\n\t"
08508         "add    r10, r6\n\t"
08509         "mov    r4, #188\n\t"
08510         "add    r4, r9\n\t"
08511         "cmp    r10, r4\n\t"
08512         "blt    2b\n\t"
08513         "# a[i+47] += m[47] * mu\n\t"
08514         "mov    %[ca], #0\n\t"
08515         "mov    r4, r12\n\t"
08516         "mov    %[a], #0\n\t"
08517         "# Multiply m[47] and mu - Start\n\t"
08518         "ldr    r7, [%[m]]\n\t"
08519         "lsl    r6, %[mp], #16\n\t"
08520         "lsl    r7, r7, #16\n\t"
08521         "lsr    r6, r6, #16\n\t"
08522         "lsr    r7, r7, #16\n\t"
08523         "mul    r7, r6\n\t"
08524         "add    r5, r7\n\t"
08525         "adc    r4, %[ca]\n\t"
08526         "adc    %[a], %[ca]\n\t"
08527         "ldr    r7, [%[m]]\n\t"
08528         "lsr    r7, r7, #16\n\t"
08529         "mul    r6, r7\n\t"
08530         "lsr    r7, r6, #16\n\t"
08531         "lsl    r6, r6, #16\n\t"
08532         "add    r5, r6\n\t"
08533         "adc    r4, r7\n\t"
08534         "adc    %[a], %[ca]\n\t"
08535         "ldr    r7, [%[m]]\n\t"
08536         "lsr    r6, %[mp], #16\n\t"
08537         "lsr    r7, r7, #16\n\t"
08538         "mul    r7, r6\n\t"
08539         "add    r4, r7\n\t"
08540         "adc    %[a], %[ca]\n\t"
08541         "ldr    r7, [%[m]]\n\t"
08542         "lsl    r7, r7, #16\n\t"
08543         "lsr    r7, r7, #16\n\t"
08544         "mul    r6, r7\n\t"
08545         "lsr    r7, r6, #16\n\t"
08546         "lsl    r6, r6, #16\n\t"
08547         "add    r5, r6\n\t"
08548         "adc    r4, r7\n\t"
08549         "adc    %[a], %[ca]\n\t"
08550         "# Multiply m[47] and mu - Done\n\t"
08551         "mov    %[ca], %[a]\n\t"
08552         "mov    %[a], r10\n\t"
08553         "ldr    r7, [%[a], #4]\n\t"
08554         "ldr    %[a], [%[a]]\n\t"
08555         "mov    r6, #0\n\t"
08556         "add    r5, %[a]\n\t"
08557         "adc    r7, r4\n\t"
08558         "adc    %[ca], r6\n\t"
08559         "mov    %[a], r10\n\t"
08560         "str    r5, [%[a]]\n\t"
08561         "str    r7, [%[a], #4]\n\t"
08562         "# i += 1\n\t"
08563         "mov    r6, #4\n\t"
08564         "add    r9, r6\n\t"
08565         "add    r11, r6\n\t"
08566         "mov    r12, %[ca]\n\t"
08567         "mov    %[a], r9\n\t"
08568         "mov    r4, #192\n\t"
08569         "cmp    r11, r4\n\t"
08570         "blt    1b\n\t"
08571         "mov    %[m], r14\n\t"
08572         : [ca] "+r" (ca), [a] "+r" (a)
08573         : [m] "r" (m), [mp] "r" (mp)
08574         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
08575     );
08576 
08577     sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
08578 }
08579 
08580 /* Multiply two Montogmery form numbers mod the modulus (prime).
08581  * (r = a * b mod m)
08582  *
08583  * r   Result of multiplication.
08584  * a   First number to multiply in Montogmery form.
08585  * b   Second number to multiply in Montogmery form.
08586  * m   Modulus (prime).
08587  * mp  Montogmery mulitplier.
08588  */
08589 static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
08590         const sp_digit* m, sp_digit mp)
08591 {
08592     sp_3072_mul_48(r, a, b);
08593     sp_3072_mont_reduce_48(r, m, mp);
08594 }
08595 
08596 /* Square the Montgomery form number. (r = a * a mod m)
08597  *
08598  * r   Result of squaring.
08599  * a   Number to square in Montogmery form.
08600  * m   Modulus (prime).
08601  * mp  Montogmery mulitplier.
08602  */
08603 static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, const sp_digit* m,
08604         sp_digit mp)
08605 {
08606     sp_3072_sqr_48(r, a);
08607     sp_3072_mont_reduce_48(r, m, mp);
08608 }
08609 
08610 /* Mul a by digit b into r. (r = a * b)
08611  *
08612  * r  A single precision integer.
08613  * a  A single precision integer.
08614  * b  A single precision digit.
08615  */
08616 SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
08617         sp_digit b)
08618 {
08619     __asm__ __volatile__ (
08620         "mov    r6, #192\n\t"
08621         "add    r6, %[a]\n\t"
08622         "mov    r8, %[r]\n\t"
08623         "mov    r9, r6\n\t"
08624         "mov    r3, #0\n\t"
08625         "mov    r4, #0\n\t"
08626         "1:\n\t"
08627         "mov    %[r], #0\n\t"
08628         "mov    r5, #0\n\t"
08629         "# A[] * B\n\t"
08630         "ldr    r6, [%[a]]\n\t"
08631         "lsl    r6, r6, #16\n\t"
08632         "lsl    r7, %[b], #16\n\t"
08633         "lsr    r6, r6, #16\n\t"
08634         "lsr    r7, r7, #16\n\t"
08635         "mul    r7, r6\n\t"
08636         "add    r3, r7\n\t"
08637         "adc    r4, %[r]\n\t"
08638         "adc    r5, %[r]\n\t"
08639         "lsr    r7, %[b], #16\n\t"
08640         "mul    r6, r7\n\t"
08641         "lsr    r7, r6, #16\n\t"
08642         "lsl    r6, r6, #16\n\t"
08643         "add    r3, r6\n\t"
08644         "adc    r4, r7\n\t"
08645         "adc    r5, %[r]\n\t"
08646         "ldr    r6, [%[a]]\n\t"
08647         "lsr    r6, r6, #16\n\t"
08648         "lsr    r7, %[b], #16\n\t"
08649         "mul    r7, r6\n\t"
08650         "add    r4, r7\n\t"
08651         "adc    r5, %[r]\n\t"
08652         "lsl    r7, %[b], #16\n\t"
08653         "lsr    r7, r7, #16\n\t"
08654         "mul    r6, r7\n\t"
08655         "lsr    r7, r6, #16\n\t"
08656         "lsl    r6, r6, #16\n\t"
08657         "add    r3, r6\n\t"
08658         "adc    r4, r7\n\t"
08659         "adc    r5, %[r]\n\t"
08660         "# A[] * B - Done\n\t"
08661         "mov    %[r], r8\n\t"
08662         "str    r3, [%[r]]\n\t"
08663         "mov    r3, r4\n\t"
08664         "mov    r4, r5\n\t"
08665         "add    %[r], #4\n\t"
08666         "add    %[a], #4\n\t"
08667         "mov    r8, %[r]\n\t"
08668         "cmp    %[a], r9\n\t"
08669         "blt    1b\n\t"
08670         "str    r3, [%[r]]\n\t"
08671         : [r] "+r" (r), [a] "+r" (a)
08672         : [b] "r" (b)
08673         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
08674     );
08675 }
08676 
08677 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
08678  *
08679  * d1   The high order half of the number to divide.
08680  * d0   The low order half of the number to divide.
08681  * div  The dividend.
08682  * returns the result of the division.
08683  *
08684  * Note that this is an approximate div. It may give an answer 1 larger.
08685  */
08686 SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
08687         sp_digit div)
08688 {
08689     sp_digit r = 0;
08690 
08691     __asm__ __volatile__ (
08692         "lsr    r5, %[div], #1\n\t"
08693         "add    r5, #1\n\t"
08694         "mov    r8, %[d0]\n\t"
08695         "mov    r9, %[d1]\n\t"
08696         "# Do top 32\n\t"
08697         "mov    r6, r5\n\t"
08698         "sub    r6, %[d1]\n\t"
08699         "sbc    r6, r6\n\t"
08700         "add    %[r], %[r]\n\t"
08701         "sub    %[r], r6\n\t"
08702         "and    r6, r5\n\t"
08703         "sub    %[d1], r6\n\t"
08704         "# Next 30 bits\n\t"
08705         "mov    r4, #29\n\t"
08706         "1:\n\t"
08707         "lsl    %[d0], %[d0], #1\n\t"
08708         "adc    %[d1], %[d1]\n\t"
08709         "mov    r6, r5\n\t"
08710         "sub    r6, %[d1]\n\t"
08711         "sbc    r6, r6\n\t"
08712         "add    %[r], %[r]\n\t"
08713         "sub    %[r], r6\n\t"
08714         "and    r6, r5\n\t"
08715         "sub    %[d1], r6\n\t"
08716         "sub    r4, #1\n\t"
08717         "bpl    1b\n\t"
08718         "mov    r7, #0\n\t"
08719         "add    %[r], %[r]\n\t"
08720         "add    %[r], #1\n\t"
08721         "# r * div - Start\n\t"
08722         "lsl    %[d1], %[r], #16\n\t"
08723         "lsl    r4, %[div], #16\n\t"
08724         "lsr    %[d1], %[d1], #16\n\t"
08725         "lsr    r4, r4, #16\n\t"
08726         "mul    r4, %[d1]\n\t"
08727         "lsr    r6, %[div], #16\n\t"
08728         "mul    %[d1], r6\n\t"
08729         "lsr    r5, %[d1], #16\n\t"
08730         "lsl    %[d1], %[d1], #16\n\t"
08731         "add    r4, %[d1]\n\t"
08732         "adc    r5, r7\n\t"
08733         "lsr    %[d1], %[r], #16\n\t"
08734         "mul    r6, %[d1]\n\t"
08735         "add    r5, r6\n\t"
08736         "lsl    r6, %[div], #16\n\t"
08737         "lsr    r6, r6, #16\n\t"
08738         "mul    %[d1], r6\n\t"
08739         "lsr    r6, %[d1], #16\n\t"
08740         "lsl    %[d1], %[d1], #16\n\t"
08741         "add    r4, %[d1]\n\t"
08742         "adc    r5, r6\n\t"
08743         "# r * div - Done\n\t"
08744         "mov    %[d1], r8\n\t"
08745         "sub    %[d1], r4\n\t"
08746         "mov    r4, %[d1]\n\t"
08747         "mov    %[d1], r9\n\t"
08748         "sbc    %[d1], r5\n\t"
08749         "mov    r5, %[d1]\n\t"
08750         "add    %[r], r5\n\t"
08751         "# r * div - Start\n\t"
08752         "lsl    %[d1], %[r], #16\n\t"
08753         "lsl    r4, %[div], #16\n\t"
08754         "lsr    %[d1], %[d1], #16\n\t"
08755         "lsr    r4, r4, #16\n\t"
08756         "mul    r4, %[d1]\n\t"
08757         "lsr    r6, %[div], #16\n\t"
08758         "mul    %[d1], r6\n\t"
08759         "lsr    r5, %[d1], #16\n\t"
08760         "lsl    %[d1], %[d1], #16\n\t"
08761         "add    r4, %[d1]\n\t"
08762         "adc    r5, r7\n\t"
08763         "lsr    %[d1], %[r], #16\n\t"
08764         "mul    r6, %[d1]\n\t"
08765         "add    r5, r6\n\t"
08766         "lsl    r6, %[div], #16\n\t"
08767         "lsr    r6, r6, #16\n\t"
08768         "mul    %[d1], r6\n\t"
08769         "lsr    r6, %[d1], #16\n\t"
08770         "lsl    %[d1], %[d1], #16\n\t"
08771         "add    r4, %[d1]\n\t"
08772         "adc    r5, r6\n\t"
08773         "# r * div - Done\n\t"
08774         "mov    %[d1], r8\n\t"
08775         "mov    r6, r9\n\t"
08776         "sub    r4, %[d1], r4\n\t"
08777         "sbc    r6, r5\n\t"
08778         "mov    r5, r6\n\t"
08779         "add    %[r], r5\n\t"
08780         "# r * div - Start\n\t"
08781         "lsl    %[d1], %[r], #16\n\t"
08782         "lsl    r4, %[div], #16\n\t"
08783         "lsr    %[d1], %[d1], #16\n\t"
08784         "lsr    r4, r4, #16\n\t"
08785         "mul    r4, %[d1]\n\t"
08786         "lsr    r6, %[div], #16\n\t"
08787         "mul    %[d1], r6\n\t"
08788         "lsr    r5, %[d1], #16\n\t"
08789         "lsl    %[d1], %[d1], #16\n\t"
08790         "add    r4, %[d1]\n\t"
08791         "adc    r5, r7\n\t"
08792         "lsr    %[d1], %[r], #16\n\t"
08793         "mul    r6, %[d1]\n\t"
08794         "add    r5, r6\n\t"
08795         "lsl    r6, %[div], #16\n\t"
08796         "lsr    r6, r6, #16\n\t"
08797         "mul    %[d1], r6\n\t"
08798         "lsr    r6, %[d1], #16\n\t"
08799         "lsl    %[d1], %[d1], #16\n\t"
08800         "add    r4, %[d1]\n\t"
08801         "adc    r5, r6\n\t"
08802         "# r * div - Done\n\t"
08803         "mov    %[d1], r8\n\t"
08804         "mov    r6, r9\n\t"
08805         "sub    r4, %[d1], r4\n\t"
08806         "sbc    r6, r5\n\t"
08807         "mov    r5, r6\n\t"
08808         "add    %[r], r5\n\t"
08809         "mov    r6, %[div]\n\t"
08810         "sub    r6, r4\n\t"
08811         "sbc    r6, r6\n\t"
08812         "sub    %[r], r6\n\t"
08813         : [r] "+r" (r)
08814         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
08815         : "r4", "r5", "r7", "r6", "r8", "r9"
08816     );
08817     return r;
08818 }
08819 
08820 /* Compare a with b in constant time.
08821  *
08822  * a  A single precision integer.
08823  * b  A single precision integer.
08824  * return -ve, 0 or +ve if a is less than, equal to or greater than b
08825  * respectively.
08826  */
08827 SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
08828 {
08829     sp_digit r = 0;
08830 
08831 
08832     __asm__ __volatile__ (
08833         "mov    r3, #0\n\t"
08834         "mvn    r3, r3\n\t"
08835         "mov    r6, #188\n\t"
08836         "1:\n\t"
08837         "ldr    r7, [%[a], r6]\n\t"
08838         "ldr    r5, [%[b], r6]\n\t"
08839         "and    r7, r3\n\t"
08840         "and    r5, r3\n\t"
08841         "mov    r4, r7\n\t"
08842         "sub    r7, r5\n\t"
08843         "sbc    r7, r7\n\t"
08844         "add    %[r], r7\n\t"
08845         "mvn    r7, r7\n\t"
08846         "and    r3, r7\n\t"
08847         "sub    r5, r4\n\t"
08848         "sbc    r7, r7\n\t"
08849         "sub    %[r], r7\n\t"
08850         "mvn    r7, r7\n\t"
08851         "and    r3, r7\n\t"
08852         "sub    r6, #4\n\t"
08853         "cmp    r6, #0\n\t"
08854         "bge    1b\n\t"
08855         : [r] "+r" (r)
08856         : [a] "r" (a), [b] "r" (b)
08857         : "r3", "r4", "r5", "r6", "r7"
08858     );
08859 
08860     return r;
08861 }
08862 
08863 /* Divide d in a and put remainder into r (m*d + r = a)
08864  * m is not calculated as it is not needed at this time.
08865  *
08866  * a  Nmber to be divided.
08867  * d  Number to divide with.
08868  * m  Multiplier result.
08869  * r  Remainder from the division.
08870  * returns MP_OKAY indicating success.
08871  */
08872 static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
08873         sp_digit* r)
08874 {
08875     sp_digit t1[96], t2[49];
08876     sp_digit div, r1;
08877     int i;
08878 
08879     (void)m;
08880 
08881     div = d[47];
08882     XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
08883     for (i=47; i>=0; i--) {
08884         r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
08885 
08886         sp_3072_mul_d_48(t2, d, r1);
08887         t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
08888         t1[48 + i] -= t2[48];
08889         sp_3072_mask_48(t2, d, t1[48 + i]);
08890         t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
08891         sp_3072_mask_48(t2, d, t1[48 + i]);
08892         t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
08893     }
08894 
08895     r1 = sp_3072_cmp_48(t1, d) >= 0;
08896     sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
08897 
08898     return MP_OKAY;
08899 }
08900 
08901 /* Reduce a modulo m into r. (r = a mod m)
08902  *
08903  * r  A single precision number that is the reduced result.
08904  * a  A single precision number that is to be reduced.
08905  * m  A single precision number that is the modulus to reduce with.
08906  * returns MP_OKAY indicating success.
08907  */
08908 static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
08909 {
08910     return sp_3072_div_48(a, m, NULL, r);
08911 }
08912 
08913 #ifdef WOLFSSL_SP_SMALL
08914 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
08915  *
08916  * r     A single precision number that is the result of the operation.
08917  * a     A single precision number being exponentiated.
08918  * e     A single precision number that is the exponent.
08919  * bits  The number of bits in the exponent.
08920  * m     A single precision number that is the modulus.
08921  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
08922  */
08923 static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
08924         int bits, const sp_digit* m, int reduceA)
08925 {
08926 #ifndef WOLFSSL_SMALL_STACK
08927     sp_digit t[16][96];
08928 #else
08929     sp_digit* t[16];
08930     sp_digit* td;
08931 #endif
08932     sp_digit* norm;
08933     sp_digit mp = 1;
08934     sp_digit n;
08935     sp_digit mask;
08936     int i;
08937     int c, y;
08938     int err = MP_OKAY;
08939 
08940 #ifdef WOLFSSL_SMALL_STACK
08941     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
08942                             DYNAMIC_TYPE_TMP_BUFFER);
08943     if (td == NULL) {
08944         err = MEMORY_E;
08945     }
08946 #endif
08947 
08948     if (err == MP_OKAY) {
08949 #ifdef WOLFSSL_SMALL_STACK
08950         for (i=0; i<16; i++) {
08951             t[i] = td + i * 96;
08952         }
08953 #endif
08954         norm = t[0];
08955 
08956         sp_3072_mont_setup(m, &mp);
08957         sp_3072_mont_norm_48(norm, m);
08958 
08959         XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
08960         if (reduceA != 0) {
08961             err = sp_3072_mod_48(t[1] + 48, a, m);
08962             if (err == MP_OKAY) {
08963                 err = sp_3072_mod_48(t[1], t[1], m);
08964             }
08965         }
08966         else {
08967             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
08968             err = sp_3072_mod_48(t[1], t[1], m);
08969         }
08970     }
08971 
08972     if (err == MP_OKAY) {
08973         sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
08974         sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
08975         sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
08976         sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
08977         sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
08978         sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
08979         sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
08980         sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
08981         sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
08982         sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
08983         sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
08984         sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
08985         sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
08986         sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
08987 
08988         i = (bits - 1) / 32;
08989         n = e[i--];
08990         c = bits & 31;
08991         if (c == 0) {
08992             c = 32;
08993         }
08994         c -= bits % 4;
08995         if (c == 32) {
08996             c = 28;
08997         }
08998         y = (int)(n >> c);
08999         n <<= 32 - c;
09000         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
09001         for (; i>=0 || c>=4; ) {
09002             if (c == 0) {
09003                 n = e[i--];
09004                 y = n >> 28;
09005                 n <<= 4;
09006                 c = 28;
09007             }
09008             else if (c < 4) {
09009                 y = n >> 28;
09010                 n = e[i--];
09011                 c = 4 - c;
09012                 y |= n >> (32 - c);
09013                 n <<= c;
09014                 c = 32 - c;
09015             }
09016             else {
09017                 y = (n >> 28) & 0xf;
09018                 n <<= 4;
09019                 c -= 4;
09020             }
09021 
09022             sp_3072_mont_sqr_48(r, r, m, mp);
09023             sp_3072_mont_sqr_48(r, r, m, mp);
09024             sp_3072_mont_sqr_48(r, r, m, mp);
09025             sp_3072_mont_sqr_48(r, r, m, mp);
09026 
09027             sp_3072_mont_mul_48(r, r, t[y], m, mp);
09028         }
09029 
09030         XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
09031         sp_3072_mont_reduce_48(r, m, mp);
09032 
09033         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
09034         sp_3072_cond_sub_48(r, r, m, mask);
09035     }
09036 
09037 #ifdef WOLFSSL_SMALL_STACK
09038     if (td != NULL) {
09039         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
09040     }
09041 #endif
09042 
09043     return err;
09044 }
09045 #else
09046 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
09047  *
09048  * r     A single precision number that is the result of the operation.
09049  * a     A single precision number being exponentiated.
09050  * e     A single precision number that is the exponent.
09051  * bits  The number of bits in the exponent.
09052  * m     A single precision number that is the modulus.
09053  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
09054  */
09055 static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
09056         int bits, const sp_digit* m, int reduceA)
09057 {
09058 #ifndef WOLFSSL_SMALL_STACK
09059     sp_digit t[32][96];
09060 #else
09061     sp_digit* t[32];
09062     sp_digit* td;
09063 #endif
09064     sp_digit* norm;
09065     sp_digit mp = 1;
09066     sp_digit n;
09067     sp_digit mask;
09068     int i;
09069     int c, y;
09070     int err = MP_OKAY;
09071 
09072 #ifdef WOLFSSL_SMALL_STACK
09073     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
09074                             DYNAMIC_TYPE_TMP_BUFFER);
09075     if (td == NULL) {
09076         err = MEMORY_E;
09077     }
09078 #endif
09079 
09080     if (err == MP_OKAY) {
09081 #ifdef WOLFSSL_SMALL_STACK
09082         for (i=0; i<32; i++) {
09083             t[i] = td + i * 96;
09084         }
09085 #endif
09086         norm = t[0];
09087 
09088         sp_3072_mont_setup(m, &mp);
09089         sp_3072_mont_norm_48(norm, m);
09090 
09091         XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
09092         if (reduceA != 0) {
09093             err = sp_3072_mod_48(t[1] + 48, a, m);
09094             if (err == MP_OKAY) {
09095                 err = sp_3072_mod_48(t[1], t[1], m);
09096             }
09097         }
09098         else {
09099             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
09100             err = sp_3072_mod_48(t[1], t[1], m);
09101         }
09102     }
09103 
09104     if (err == MP_OKAY) {
09105         sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
09106         sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
09107         sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
09108         sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
09109         sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
09110         sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
09111         sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
09112         sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
09113         sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
09114         sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
09115         sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
09116         sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
09117         sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
09118         sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
09119         sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
09120         sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
09121         sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
09122         sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
09123         sp_3072_mont_sqr_48(t[20], t[10], m, mp);
09124         sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
09125         sp_3072_mont_sqr_48(t[22], t[11], m, mp);
09126         sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
09127         sp_3072_mont_sqr_48(t[24], t[12], m, mp);
09128         sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
09129         sp_3072_mont_sqr_48(t[26], t[13], m, mp);
09130         sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
09131         sp_3072_mont_sqr_48(t[28], t[14], m, mp);
09132         sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
09133         sp_3072_mont_sqr_48(t[30], t[15], m, mp);
09134         sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
09135 
09136         i = (bits - 1) / 32;
09137         n = e[i--];
09138         c = bits & 31;
09139         if (c == 0) {
09140             c = 32;
09141         }
09142         c -= bits % 5;
09143         if (c == 32) {
09144             c = 27;
09145         }
09146         y = (int)(n >> c);
09147         n <<= 32 - c;
09148         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
09149         for (; i>=0 || c>=5; ) {
09150             if (c == 0) {
09151                 n = e[i--];
09152                 y = n >> 27;
09153                 n <<= 5;
09154                 c = 27;
09155             }
09156             else if (c < 5) {
09157                 y = n >> 27;
09158                 n = e[i--];
09159                 c = 5 - c;
09160                 y |= n >> (32 - c);
09161                 n <<= c;
09162                 c = 32 - c;
09163             }
09164             else {
09165                 y = (n >> 27) & 0x1f;
09166                 n <<= 5;
09167                 c -= 5;
09168             }
09169 
09170             sp_3072_mont_sqr_48(r, r, m, mp);
09171             sp_3072_mont_sqr_48(r, r, m, mp);
09172             sp_3072_mont_sqr_48(r, r, m, mp);
09173             sp_3072_mont_sqr_48(r, r, m, mp);
09174             sp_3072_mont_sqr_48(r, r, m, mp);
09175 
09176             sp_3072_mont_mul_48(r, r, t[y], m, mp);
09177         }
09178 
09179         XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
09180         sp_3072_mont_reduce_48(r, m, mp);
09181 
09182         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
09183         sp_3072_cond_sub_48(r, r, m, mask);
09184     }
09185 
09186 #ifdef WOLFSSL_SMALL_STACK
09187     if (td != NULL) {
09188         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
09189     }
09190 #endif
09191 
09192     return err;
09193 }
09194 #endif /* WOLFSSL_SP_SMALL */
09195 
09196 #endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH) && !WOLFSSL_RSA_PUBLIC_ONLY */
09197 
09198 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
09199 /* r = 2^n mod m where n is the number of bits to reduce by.
09200  * Given m must be 3072 bits, just need to subtract.
09201  *
09202  * r  A single precision number.
09203  * m  A single precision number.
09204  */
09205 static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
09206 {
09207     XMEMSET(r, 0, sizeof(sp_digit) * 96);
09208 
09209     /* r = 2^n mod m */
09210     sp_3072_sub_in_place_96(r, m);
09211 }
09212 
09213 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
09214 /* Conditionally subtract b from a using the mask m.
09215  * m is -1 to subtract and 0 when not copying.
09216  *
09217  * r  A single precision number representing condition subtract result.
09218  * a  A single precision number to subtract from.
09219  * b  A single precision number to subtract.
09220  * m  Mask value to apply.
09221  */
09222 SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a,
09223         const sp_digit* b, sp_digit m)
09224 {
09225     sp_digit c = 0;
09226 
09227     __asm__ __volatile__ (
09228         "mov    r5, #1\n\t"
09229         "lsl    r5, r5, #8\n\t"
09230         "add    r5, #128\n\t"
09231         "mov    r8, r5\n\t"
09232         "mov    r7, #0\n\t"
09233         "1:\n\t"
09234         "ldr    r6, [%[b], r7]\n\t"
09235         "and    r6, %[m]\n\t"
09236         "mov    r5, #0\n\t"
09237         "sub    r5, %[c]\n\t"
09238         "ldr    r5, [%[a], r7]\n\t"
09239         "sbc    r5, r6\n\t"
09240         "sbc    %[c], %[c]\n\t"
09241         "str    r5, [%[r], r7]\n\t"
09242         "add    r7, #4\n\t"
09243         "cmp    r7, r8\n\t"
09244         "blt    1b\n\t"
09245         : [c] "+r" (c)
09246         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
09247         : "memory", "r5", "r6", "r7", "r8"
09248     );
09249 
09250     return c;
09251 }
09252 
09253 /* Reduce the number back to 3072 bits using Montgomery reduction.
09254  *
09255  * a   A single precision number to reduce in place.
09256  * m   The single precision number representing the modulus.
09257  * mp  The digit representing the negative inverse of m mod 2^n.
09258  */
09259 SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
09260         sp_digit mp)
09261 {
09262     sp_digit ca = 0;
09263 
09264     __asm__ __volatile__ (
09265         "mov    r8, %[mp]\n\t"
09266         "mov    r12, %[ca]\n\t"
09267         "mov    r14, %[m]\n\t"
09268         "mov    r9, %[a]\n\t"
09269         "mov    r4, #0\n\t"
09270         "# i = 0\n\t"
09271         "mov    r11, r4\n\t"
09272         "\n1:\n\t"
09273         "mov    r5, #0\n\t"
09274         "mov    %[ca], #0\n\t"
09275         "# mu = a[i] * mp\n\t"
09276         "mov    %[mp], r8\n\t"
09277         "ldr    %[a], [%[a]]\n\t"
09278         "mul    %[mp], %[a]\n\t"
09279         "mov    %[m], r14\n\t"
09280         "mov    r10, r9\n\t"
09281         "\n2:\n\t"
09282         "# a[i+j] += m[j] * mu\n\t"
09283         "mov    %[a], r10\n\t"
09284         "ldr    %[a], [%[a]]\n\t"
09285         "mov    %[ca], #0\n\t"
09286         "mov    r4, r5\n\t"
09287         "mov    r5, #0\n\t"
09288         "# Multiply m[j] and mu - Start\n\t"
09289         "ldr    r7, [%[m]]\n\t"
09290         "lsl    r6, %[mp], #16\n\t"
09291         "lsl    r7, r7, #16\n\t"
09292         "lsr    r6, r6, #16\n\t"
09293         "lsr    r7, r7, #16\n\t"
09294         "mul    r7, r6\n\t"
09295         "add    %[a], r7\n\t"
09296         "adc    r5, %[ca]\n\t"
09297         "ldr    r7, [%[m]]\n\t"
09298         "lsr    r7, r7, #16\n\t"
09299         "mul    r6, r7\n\t"
09300         "lsr    r7, r6, #16\n\t"
09301         "lsl    r6, r6, #16\n\t"
09302         "add    %[a], r6\n\t"
09303         "adc    r5, r7\n\t"
09304         "ldr    r7, [%[m]]\n\t"
09305         "lsr    r6, %[mp], #16\n\t"
09306         "lsr    r7, r7, #16\n\t"
09307         "mul    r7, r6\n\t"
09308         "add    r5, r7\n\t"
09309         "ldr    r7, [%[m]]\n\t"
09310         "lsl    r7, r7, #16\n\t"
09311         "lsr    r7, r7, #16\n\t"
09312         "mul    r6, r7\n\t"
09313         "lsr    r7, r6, #16\n\t"
09314         "lsl    r6, r6, #16\n\t"
09315         "add    %[a], r6\n\t"
09316         "adc    r5, r7\n\t"
09317         "# Multiply m[j] and mu - Done\n\t"
09318         "add    r4, %[a]\n\t"
09319         "adc    r5, %[ca]\n\t"
09320         "mov    %[a], r10\n\t"
09321         "str    r4, [%[a]]\n\t"
09322         "mov    r6, #4\n\t"
09323         "add    %[m], #4\n\t"
09324         "add    r10, r6\n\t"
09325         "mov    r4, #1\n\t"
09326         "lsl    r4, r4, #8\n\t"
09327         "add    r4, #124\n\t"
09328         "add    r4, r9\n\t"
09329         "cmp    r10, r4\n\t"
09330         "blt    2b\n\t"
09331         "# a[i+95] += m[95] * mu\n\t"
09332         "mov    %[ca], #0\n\t"
09333         "mov    r4, r12\n\t"
09334         "mov    %[a], #0\n\t"
09335         "# Multiply m[95] and mu - Start\n\t"
09336         "ldr    r7, [%[m]]\n\t"
09337         "lsl    r6, %[mp], #16\n\t"
09338         "lsl    r7, r7, #16\n\t"
09339         "lsr    r6, r6, #16\n\t"
09340         "lsr    r7, r7, #16\n\t"
09341         "mul    r7, r6\n\t"
09342         "add    r5, r7\n\t"
09343         "adc    r4, %[ca]\n\t"
09344         "adc    %[a], %[ca]\n\t"
09345         "ldr    r7, [%[m]]\n\t"
09346         "lsr    r7, r7, #16\n\t"
09347         "mul    r6, r7\n\t"
09348         "lsr    r7, r6, #16\n\t"
09349         "lsl    r6, r6, #16\n\t"
09350         "add    r5, r6\n\t"
09351         "adc    r4, r7\n\t"
09352         "adc    %[a], %[ca]\n\t"
09353         "ldr    r7, [%[m]]\n\t"
09354         "lsr    r6, %[mp], #16\n\t"
09355         "lsr    r7, r7, #16\n\t"
09356         "mul    r7, r6\n\t"
09357         "add    r4, r7\n\t"
09358         "adc    %[a], %[ca]\n\t"
09359         "ldr    r7, [%[m]]\n\t"
09360         "lsl    r7, r7, #16\n\t"
09361         "lsr    r7, r7, #16\n\t"
09362         "mul    r6, r7\n\t"
09363         "lsr    r7, r6, #16\n\t"
09364         "lsl    r6, r6, #16\n\t"
09365         "add    r5, r6\n\t"
09366         "adc    r4, r7\n\t"
09367         "adc    %[a], %[ca]\n\t"
09368         "# Multiply m[95] and mu - Done\n\t"
09369         "mov    %[ca], %[a]\n\t"
09370         "mov    %[a], r10\n\t"
09371         "ldr    r7, [%[a], #4]\n\t"
09372         "ldr    %[a], [%[a]]\n\t"
09373         "mov    r6, #0\n\t"
09374         "add    r5, %[a]\n\t"
09375         "adc    r7, r4\n\t"
09376         "adc    %[ca], r6\n\t"
09377         "mov    %[a], r10\n\t"
09378         "str    r5, [%[a]]\n\t"
09379         "str    r7, [%[a], #4]\n\t"
09380         "# i += 1\n\t"
09381         "mov    r6, #4\n\t"
09382         "add    r9, r6\n\t"
09383         "add    r11, r6\n\t"
09384         "mov    r12, %[ca]\n\t"
09385         "mov    %[a], r9\n\t"
09386         "mov    r4, #1\n\t"
09387         "lsl    r4, r4, #8\n\t"
09388         "add    r4, #128\n\t"
09389         "cmp    r11, r4\n\t"
09390         "blt    1b\n\t"
09391         "mov    %[m], r14\n\t"
09392         : [ca] "+r" (ca), [a] "+r" (a)
09393         : [m] "r" (m), [mp] "r" (mp)
09394         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
09395     );
09396 
09397     sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
09398 }
09399 
09400 /* Multiply two Montogmery form numbers mod the modulus (prime).
09401  * (r = a * b mod m)
09402  *
09403  * r   Result of multiplication.
09404  * a   First number to multiply in Montogmery form.
09405  * b   Second number to multiply in Montogmery form.
09406  * m   Modulus (prime).
09407  * mp  Montogmery mulitplier.
09408  */
09409 static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b,
09410         const sp_digit* m, sp_digit mp)
09411 {
09412     sp_3072_mul_96(r, a, b);
09413     sp_3072_mont_reduce_96(r, m, mp);
09414 }
09415 
09416 /* Square the Montgomery form number. (r = a * a mod m)
09417  *
09418  * r   Result of squaring.
09419  * a   Number to square in Montogmery form.
09420  * m   Modulus (prime).
09421  * mp  Montogmery mulitplier.
09422  */
09423 static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, const sp_digit* m,
09424         sp_digit mp)
09425 {
09426     sp_3072_sqr_96(r, a);
09427     sp_3072_mont_reduce_96(r, m, mp);
09428 }
09429 
09430 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
09431  *
09432  * d1   The high order half of the number to divide.
09433  * d0   The low order half of the number to divide.
09434  * div  The dividend.
09435  * returns the result of the division.
09436  *
09437  * Note that this is an approximate div. It may give an answer 1 larger.
09438  */
09439 SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0,
09440         sp_digit div)
09441 {
09442     sp_digit r = 0;
09443 
09444     __asm__ __volatile__ (
09445         "lsr    r5, %[div], #1\n\t"
09446         "add    r5, #1\n\t"
09447         "mov    r8, %[d0]\n\t"
09448         "mov    r9, %[d1]\n\t"
09449         "# Do top 32\n\t"
09450         "mov    r6, r5\n\t"
09451         "sub    r6, %[d1]\n\t"
09452         "sbc    r6, r6\n\t"
09453         "add    %[r], %[r]\n\t"
09454         "sub    %[r], r6\n\t"
09455         "and    r6, r5\n\t"
09456         "sub    %[d1], r6\n\t"
09457         "# Next 30 bits\n\t"
09458         "mov    r4, #29\n\t"
09459         "1:\n\t"
09460         "lsl    %[d0], %[d0], #1\n\t"
09461         "adc    %[d1], %[d1]\n\t"
09462         "mov    r6, r5\n\t"
09463         "sub    r6, %[d1]\n\t"
09464         "sbc    r6, r6\n\t"
09465         "add    %[r], %[r]\n\t"
09466         "sub    %[r], r6\n\t"
09467         "and    r6, r5\n\t"
09468         "sub    %[d1], r6\n\t"
09469         "sub    r4, #1\n\t"
09470         "bpl    1b\n\t"
09471         "mov    r7, #0\n\t"
09472         "add    %[r], %[r]\n\t"
09473         "add    %[r], #1\n\t"
09474         "# r * div - Start\n\t"
09475         "lsl    %[d1], %[r], #16\n\t"
09476         "lsl    r4, %[div], #16\n\t"
09477         "lsr    %[d1], %[d1], #16\n\t"
09478         "lsr    r4, r4, #16\n\t"
09479         "mul    r4, %[d1]\n\t"
09480         "lsr    r6, %[div], #16\n\t"
09481         "mul    %[d1], r6\n\t"
09482         "lsr    r5, %[d1], #16\n\t"
09483         "lsl    %[d1], %[d1], #16\n\t"
09484         "add    r4, %[d1]\n\t"
09485         "adc    r5, r7\n\t"
09486         "lsr    %[d1], %[r], #16\n\t"
09487         "mul    r6, %[d1]\n\t"
09488         "add    r5, r6\n\t"
09489         "lsl    r6, %[div], #16\n\t"
09490         "lsr    r6, r6, #16\n\t"
09491         "mul    %[d1], r6\n\t"
09492         "lsr    r6, %[d1], #16\n\t"
09493         "lsl    %[d1], %[d1], #16\n\t"
09494         "add    r4, %[d1]\n\t"
09495         "adc    r5, r6\n\t"
09496         "# r * div - Done\n\t"
09497         "mov    %[d1], r8\n\t"
09498         "sub    %[d1], r4\n\t"
09499         "mov    r4, %[d1]\n\t"
09500         "mov    %[d1], r9\n\t"
09501         "sbc    %[d1], r5\n\t"
09502         "mov    r5, %[d1]\n\t"
09503         "add    %[r], r5\n\t"
09504         "# r * div - Start\n\t"
09505         "lsl    %[d1], %[r], #16\n\t"
09506         "lsl    r4, %[div], #16\n\t"
09507         "lsr    %[d1], %[d1], #16\n\t"
09508         "lsr    r4, r4, #16\n\t"
09509         "mul    r4, %[d1]\n\t"
09510         "lsr    r6, %[div], #16\n\t"
09511         "mul    %[d1], r6\n\t"
09512         "lsr    r5, %[d1], #16\n\t"
09513         "lsl    %[d1], %[d1], #16\n\t"
09514         "add    r4, %[d1]\n\t"
09515         "adc    r5, r7\n\t"
09516         "lsr    %[d1], %[r], #16\n\t"
09517         "mul    r6, %[d1]\n\t"
09518         "add    r5, r6\n\t"
09519         "lsl    r6, %[div], #16\n\t"
09520         "lsr    r6, r6, #16\n\t"
09521         "mul    %[d1], r6\n\t"
09522         "lsr    r6, %[d1], #16\n\t"
09523         "lsl    %[d1], %[d1], #16\n\t"
09524         "add    r4, %[d1]\n\t"
09525         "adc    r5, r6\n\t"
09526         "# r * div - Done\n\t"
09527         "mov    %[d1], r8\n\t"
09528         "mov    r6, r9\n\t"
09529         "sub    r4, %[d1], r4\n\t"
09530         "sbc    r6, r5\n\t"
09531         "mov    r5, r6\n\t"
09532         "add    %[r], r5\n\t"
09533         "# r * div - Start\n\t"
09534         "lsl    %[d1], %[r], #16\n\t"
09535         "lsl    r4, %[div], #16\n\t"
09536         "lsr    %[d1], %[d1], #16\n\t"
09537         "lsr    r4, r4, #16\n\t"
09538         "mul    r4, %[d1]\n\t"
09539         "lsr    r6, %[div], #16\n\t"
09540         "mul    %[d1], r6\n\t"
09541         "lsr    r5, %[d1], #16\n\t"
09542         "lsl    %[d1], %[d1], #16\n\t"
09543         "add    r4, %[d1]\n\t"
09544         "adc    r5, r7\n\t"
09545         "lsr    %[d1], %[r], #16\n\t"
09546         "mul    r6, %[d1]\n\t"
09547         "add    r5, r6\n\t"
09548         "lsl    r6, %[div], #16\n\t"
09549         "lsr    r6, r6, #16\n\t"
09550         "mul    %[d1], r6\n\t"
09551         "lsr    r6, %[d1], #16\n\t"
09552         "lsl    %[d1], %[d1], #16\n\t"
09553         "add    r4, %[d1]\n\t"
09554         "adc    r5, r6\n\t"
09555         "# r * div - Done\n\t"
09556         "mov    %[d1], r8\n\t"
09557         "mov    r6, r9\n\t"
09558         "sub    r4, %[d1], r4\n\t"
09559         "sbc    r6, r5\n\t"
09560         "mov    r5, r6\n\t"
09561         "add    %[r], r5\n\t"
09562         "mov    r6, %[div]\n\t"
09563         "sub    r6, r4\n\t"
09564         "sbc    r6, r6\n\t"
09565         "sub    %[r], r6\n\t"
09566         : [r] "+r" (r)
09567         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
09568         : "r4", "r5", "r7", "r6", "r8", "r9"
09569     );
09570     return r;
09571 }
09572 
09573 /* AND m into each word of a and store in r.
09574  *
09575  * r  A single precision integer.
09576  * a  A single precision integer.
09577  * m  Mask to AND against each digit.
09578  */
09579 static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
09580 {
09581 #ifdef WOLFSSL_SP_SMALL
09582     int i;
09583 
09584     for (i=0; i<96; i++) {
09585         r[i] = a[i] & m;
09586     }
09587 #else
09588     int i;
09589 
09590     for (i = 0; i < 96; i += 8) {
09591         r[i+0] = a[i+0] & m;
09592         r[i+1] = a[i+1] & m;
09593         r[i+2] = a[i+2] & m;
09594         r[i+3] = a[i+3] & m;
09595         r[i+4] = a[i+4] & m;
09596         r[i+5] = a[i+5] & m;
09597         r[i+6] = a[i+6] & m;
09598         r[i+7] = a[i+7] & m;
09599     }
09600 #endif
09601 }
09602 
09603 /* Compare a with b in constant time.
09604  *
09605  * a  A single precision integer.
09606  * b  A single precision integer.
09607  * return -ve, 0 or +ve if a is less than, equal to or greater than b
09608  * respectively.
09609  */
09610 SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
09611 {
09612     sp_digit r = 0;
09613 
09614 
09615     __asm__ __volatile__ (
09616         "mov    r3, #0\n\t"
09617         "mvn    r3, r3\n\t"
09618         "mov    r6, #1\n\t"
09619         "lsl    r6, r6, #8\n\t"
09620         "add    r6, #124\n\t"
09621         "1:\n\t"
09622         "ldr    r7, [%[a], r6]\n\t"
09623         "ldr    r5, [%[b], r6]\n\t"
09624         "and    r7, r3\n\t"
09625         "and    r5, r3\n\t"
09626         "mov    r4, r7\n\t"
09627         "sub    r7, r5\n\t"
09628         "sbc    r7, r7\n\t"
09629         "add    %[r], r7\n\t"
09630         "mvn    r7, r7\n\t"
09631         "and    r3, r7\n\t"
09632         "sub    r5, r4\n\t"
09633         "sbc    r7, r7\n\t"
09634         "sub    %[r], r7\n\t"
09635         "mvn    r7, r7\n\t"
09636         "and    r3, r7\n\t"
09637         "sub    r6, #4\n\t"
09638         "cmp    r6, #0\n\t"
09639         "bge    1b\n\t"
09640         : [r] "+r" (r)
09641         : [a] "r" (a), [b] "r" (b)
09642         : "r3", "r4", "r5", "r6", "r7"
09643     );
09644 
09645     return r;
09646 }
09647 
09648 /* Divide d in a and put remainder into r (m*d + r = a)
09649  * m is not calculated as it is not needed at this time.
09650  *
09651  * a  Nmber to be divided.
09652  * d  Number to divide with.
09653  * m  Multiplier result.
09654  * r  Remainder from the division.
09655  * returns MP_OKAY indicating success.
09656  */
09657 static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
09658         sp_digit* r)
09659 {
09660     sp_digit t1[192], t2[97];
09661     sp_digit div, r1;
09662     int i;
09663 
09664     (void)m;
09665 
09666     div = d[95];
09667     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
09668     for (i=95; i>=0; i--) {
09669         r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
09670 
09671         sp_3072_mul_d_96(t2, d, r1);
09672         t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
09673         t1[96 + i] -= t2[96];
09674         sp_3072_mask_96(t2, d, t1[96 + i]);
09675         t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
09676         sp_3072_mask_96(t2, d, t1[96 + i]);
09677         t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
09678     }
09679 
09680     r1 = sp_3072_cmp_96(t1, d) >= 0;
09681     sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
09682 
09683     return MP_OKAY;
09684 }
09685 
09686 /* Reduce a modulo m into r. (r = a mod m)
09687  *
09688  * r  A single precision number that is the reduced result.
09689  * a  A single precision number that is to be reduced.
09690  * m  A single precision number that is the modulus to reduce with.
09691  * returns MP_OKAY indicating success.
09692  */
09693 static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
09694 {
09695     return sp_3072_div_96(a, m, NULL, r);
09696 }
09697 
09698 /* Divide d in a and put remainder into r (m*d + r = a)
09699  * m is not calculated as it is not needed at this time.
09700  *
09701  * a  Nmber to be divided.
09702  * d  Number to divide with.
09703  * m  Multiplier result.
09704  * r  Remainder from the division.
09705  * returns MP_OKAY indicating success.
09706  */
09707 static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
09708         sp_digit* r)
09709 {
09710     sp_digit t1[192], t2[97];
09711     sp_digit div, r1;
09712     int i;
09713 
09714     (void)m;
09715 
09716     div = d[95];
09717     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
09718     for (i=95; i>=0; i--) {
09719         r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
09720 
09721         sp_3072_mul_d_96(t2, d, r1);
09722         t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
09723         t1[96 + i] -= t2[96];
09724         if (t1[96 + i] != 0) {
09725             t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
09726             if (t1[96 + i] != 0)
09727                 t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
09728         }
09729     }
09730 
09731     r1 = sp_3072_cmp_96(t1, d) >= 0;
09732     sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
09733 
09734     return MP_OKAY;
09735 }
09736 
09737 /* Reduce a modulo m into r. (r = a mod m)
09738  *
09739  * r  A single precision number that is the reduced result.
09740  * a  A single precision number that is to be reduced.
09741  * m  A single precision number that is the modulus to reduce with.
09742  * returns MP_OKAY indicating success.
09743  */
09744 static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
09745 {
09746     return sp_3072_div_96_cond(a, m, NULL, r);
09747 }
09748 
09749 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
09750                                                      defined(WOLFSSL_HAVE_SP_DH)
09751 #ifdef WOLFSSL_SP_SMALL
09752 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
09753  *
09754  * r     A single precision number that is the result of the operation.
09755  * a     A single precision number being exponentiated.
09756  * e     A single precision number that is the exponent.
09757  * bits  The number of bits in the exponent.
09758  * m     A single precision number that is the modulus.
09759  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
09760  */
09761 static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
09762         int bits, const sp_digit* m, int reduceA)
09763 {
09764 #ifndef WOLFSSL_SMALL_STACK
09765     sp_digit t[16][192];
09766 #else
09767     sp_digit* t[16];
09768     sp_digit* td;
09769 #endif
09770     sp_digit* norm;
09771     sp_digit mp = 1;
09772     sp_digit n;
09773     sp_digit mask;
09774     int i;
09775     int c, y;
09776     int err = MP_OKAY;
09777 
09778 #ifdef WOLFSSL_SMALL_STACK
09779     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
09780                             DYNAMIC_TYPE_TMP_BUFFER);
09781     if (td == NULL) {
09782         err = MEMORY_E;
09783     }
09784 #endif
09785 
09786     if (err == MP_OKAY) {
09787 #ifdef WOLFSSL_SMALL_STACK
09788         for (i=0; i<16; i++) {
09789             t[i] = td + i * 192;
09790         }
09791 #endif
09792         norm = t[0];
09793 
09794         sp_3072_mont_setup(m, &mp);
09795         sp_3072_mont_norm_96(norm, m);
09796 
09797         XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
09798         if (reduceA != 0) {
09799             err = sp_3072_mod_96(t[1] + 96, a, m);
09800             if (err == MP_OKAY) {
09801                 err = sp_3072_mod_96(t[1], t[1], m);
09802             }
09803         }
09804         else {
09805             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
09806             err = sp_3072_mod_96(t[1], t[1], m);
09807         }
09808     }
09809 
09810     if (err == MP_OKAY) {
09811         sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
09812         sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
09813         sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
09814         sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
09815         sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
09816         sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
09817         sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
09818         sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
09819         sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
09820         sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
09821         sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
09822         sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
09823         sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
09824         sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
09825 
09826         i = (bits - 1) / 32;
09827         n = e[i--];
09828         c = bits & 31;
09829         if (c == 0) {
09830             c = 32;
09831         }
09832         c -= bits % 4;
09833         if (c == 32) {
09834             c = 28;
09835         }
09836         y = (int)(n >> c);
09837         n <<= 32 - c;
09838         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
09839         for (; i>=0 || c>=4; ) {
09840             if (c == 0) {
09841                 n = e[i--];
09842                 y = n >> 28;
09843                 n <<= 4;
09844                 c = 28;
09845             }
09846             else if (c < 4) {
09847                 y = n >> 28;
09848                 n = e[i--];
09849                 c = 4 - c;
09850                 y |= n >> (32 - c);
09851                 n <<= c;
09852                 c = 32 - c;
09853             }
09854             else {
09855                 y = (n >> 28) & 0xf;
09856                 n <<= 4;
09857                 c -= 4;
09858             }
09859 
09860             sp_3072_mont_sqr_96(r, r, m, mp);
09861             sp_3072_mont_sqr_96(r, r, m, mp);
09862             sp_3072_mont_sqr_96(r, r, m, mp);
09863             sp_3072_mont_sqr_96(r, r, m, mp);
09864 
09865             sp_3072_mont_mul_96(r, r, t[y], m, mp);
09866         }
09867 
09868         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
09869         sp_3072_mont_reduce_96(r, m, mp);
09870 
09871         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
09872         sp_3072_cond_sub_96(r, r, m, mask);
09873     }
09874 
09875 #ifdef WOLFSSL_SMALL_STACK
09876     if (td != NULL) {
09877         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
09878     }
09879 #endif
09880 
09881     return err;
09882 }
09883 #else
09884 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
09885  *
09886  * r     A single precision number that is the result of the operation.
09887  * a     A single precision number being exponentiated.
09888  * e     A single precision number that is the exponent.
09889  * bits  The number of bits in the exponent.
09890  * m     A single precision number that is the modulus.
09891  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
09892  */
09893 static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
09894         int bits, const sp_digit* m, int reduceA)
09895 {
09896 #ifndef WOLFSSL_SMALL_STACK
09897     sp_digit t[32][192];
09898 #else
09899     sp_digit* t[32];
09900     sp_digit* td;
09901 #endif
09902     sp_digit* norm;
09903     sp_digit mp = 1;
09904     sp_digit n;
09905     sp_digit mask;
09906     int i;
09907     int c, y;
09908     int err = MP_OKAY;
09909 
09910 #ifdef WOLFSSL_SMALL_STACK
09911     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
09912                             DYNAMIC_TYPE_TMP_BUFFER);
09913     if (td == NULL) {
09914         err = MEMORY_E;
09915     }
09916 #endif
09917 
09918     if (err == MP_OKAY) {
09919 #ifdef WOLFSSL_SMALL_STACK
09920         for (i=0; i<32; i++) {
09921             t[i] = td + i * 192;
09922         }
09923 #endif
09924         norm = t[0];
09925 
09926         sp_3072_mont_setup(m, &mp);
09927         sp_3072_mont_norm_96(norm, m);
09928 
09929         XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
09930         if (reduceA != 0) {
09931             err = sp_3072_mod_96(t[1] + 96, a, m);
09932             if (err == MP_OKAY) {
09933                 err = sp_3072_mod_96(t[1], t[1], m);
09934             }
09935         }
09936         else {
09937             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
09938             err = sp_3072_mod_96(t[1], t[1], m);
09939         }
09940     }
09941 
09942     if (err == MP_OKAY) {
09943         sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
09944         sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
09945         sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
09946         sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
09947         sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
09948         sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
09949         sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
09950         sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
09951         sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
09952         sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
09953         sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
09954         sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
09955         sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
09956         sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
09957         sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
09958         sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
09959         sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
09960         sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
09961         sp_3072_mont_sqr_96(t[20], t[10], m, mp);
09962         sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
09963         sp_3072_mont_sqr_96(t[22], t[11], m, mp);
09964         sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
09965         sp_3072_mont_sqr_96(t[24], t[12], m, mp);
09966         sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
09967         sp_3072_mont_sqr_96(t[26], t[13], m, mp);
09968         sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
09969         sp_3072_mont_sqr_96(t[28], t[14], m, mp);
09970         sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
09971         sp_3072_mont_sqr_96(t[30], t[15], m, mp);
09972         sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
09973 
09974         i = (bits - 1) / 32;
09975         n = e[i--];
09976         c = bits & 31;
09977         if (c == 0) {
09978             c = 32;
09979         }
09980         c -= bits % 5;
09981         if (c == 32) {
09982             c = 27;
09983         }
09984         y = (int)(n >> c);
09985         n <<= 32 - c;
09986         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
09987         for (; i>=0 || c>=5; ) {
09988             if (c == 0) {
09989                 n = e[i--];
09990                 y = n >> 27;
09991                 n <<= 5;
09992                 c = 27;
09993             }
09994             else if (c < 5) {
09995                 y = n >> 27;
09996                 n = e[i--];
09997                 c = 5 - c;
09998                 y |= n >> (32 - c);
09999                 n <<= c;
10000                 c = 32 - c;
10001             }
10002             else {
10003                 y = (n >> 27) & 0x1f;
10004                 n <<= 5;
10005                 c -= 5;
10006             }
10007 
10008             sp_3072_mont_sqr_96(r, r, m, mp);
10009             sp_3072_mont_sqr_96(r, r, m, mp);
10010             sp_3072_mont_sqr_96(r, r, m, mp);
10011             sp_3072_mont_sqr_96(r, r, m, mp);
10012             sp_3072_mont_sqr_96(r, r, m, mp);
10013 
10014             sp_3072_mont_mul_96(r, r, t[y], m, mp);
10015         }
10016 
10017         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
10018         sp_3072_mont_reduce_96(r, m, mp);
10019 
10020         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
10021         sp_3072_cond_sub_96(r, r, m, mask);
10022     }
10023 
10024 #ifdef WOLFSSL_SMALL_STACK
10025     if (td != NULL) {
10026         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
10027     }
10028 #endif
10029 
10030     return err;
10031 }
10032 #endif /* WOLFSSL_SP_SMALL */
10033 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
10034 
10035 #ifdef WOLFSSL_HAVE_SP_RSA
10036 /* RSA public key operation.
10037  *
10038  * in      Array of bytes representing the number to exponentiate, base.
10039  * inLen   Number of bytes in base.
10040  * em      Public exponent.
10041  * mm      Modulus.
10042  * out     Buffer to hold big-endian bytes of exponentiation result.
10043  *         Must be at least 384 bytes long.
10044  * outLen  Number of bytes in result.
10045  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
10046  * an array is too long and MEMORY_E when dynamic memory allocation fails.
10047  */
10048 int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
10049     byte* out, word32* outLen)
10050 {
10051 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
10052     sp_digit a[192], m[96], r[192];
10053 #else
10054     sp_digit* d = NULL;
10055     sp_digit* a;
10056     sp_digit* m;
10057     sp_digit* r;
10058 #endif
10059     sp_digit *ah;
10060     sp_digit e[1];
10061     int err = MP_OKAY;
10062 
10063     if (*outLen < 384)
10064         err = MP_TO_E;
10065     if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
10066                                                      mp_count_bits(mm) != 3072))
10067         err = MP_READ_E;
10068 
10069 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
10070     if (err == MP_OKAY) {
10071         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
10072                                                               DYNAMIC_TYPE_RSA);
10073         if (d == NULL)
10074             err = MEMORY_E;
10075     }
10076 
10077     if (err == MP_OKAY) {
10078         a = d;
10079         r = a + 96 * 2;
10080         m = r + 96 * 2;
10081     }
10082 #endif
10083 
10084     if (err == MP_OKAY) {
10085         ah = a + 96;
10086 
10087         sp_3072_from_bin(ah, 96, in, inLen);
10088 #if DIGIT_BIT >= 32
10089         e[0] = em->dp[0];
10090 #else
10091         e[0] = em->dp[0];
10092         if (em->used > 1) {
10093             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
10094         }
10095 #endif
10096         if (e[0] == 0) {
10097             err = MP_EXPTMOD_E;
10098         }
10099     }
10100     if (err == MP_OKAY) {
10101         sp_3072_from_mp(m, 96, mm);
10102 
10103         if (e[0] == 0x3) {
10104             if (err == MP_OKAY) {
10105                 sp_3072_sqr_96(r, ah);
10106                 err = sp_3072_mod_96_cond(r, r, m);
10107             }
10108             if (err == MP_OKAY) {
10109                 sp_3072_mul_96(r, ah, r);
10110                 err = sp_3072_mod_96_cond(r, r, m);
10111             }
10112         }
10113         else {
10114             int i;
10115             sp_digit mp;
10116 
10117             sp_3072_mont_setup(m, &mp);
10118 
10119             /* Convert to Montgomery form. */
10120             XMEMSET(a, 0, sizeof(sp_digit) * 96);
10121             err = sp_3072_mod_96_cond(a, a, m);
10122 
10123             if (err == MP_OKAY) {
10124                 for (i = 31; i >= 0; i--) {
10125                     if (e[0] >> i) {
10126                         break;
10127                     }
10128                 }
10129 
10130                 XMEMCPY(r, a, sizeof(sp_digit) * 96);
10131                 for (i--; i>=0; i--) {
10132                     sp_3072_mont_sqr_96(r, r, m, mp);
10133                     if (((e[0] >> i) & 1) == 1) {
10134                         sp_3072_mont_mul_96(r, r, a, m, mp);
10135                     }
10136                 }
10137                 XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
10138                 sp_3072_mont_reduce_96(r, m, mp);
10139 
10140                 for (i = 95; i > 0; i--) {
10141                     if (r[i] != m[i]) {
10142                         break;
10143                     }
10144                 }
10145                 if (r[i] >= m[i]) {
10146                     sp_3072_sub_in_place_96(r, m);
10147                 }
10148             }
10149         }
10150     }
10151 
10152     if (err == MP_OKAY) {
10153         sp_3072_to_bin(r, out);
10154         *outLen = 384;
10155     }
10156 
10157 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
10158     if (d != NULL) {
10159         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
10160     }
10161 #endif
10162 
10163     return err;
10164 }
10165 
10166 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
10167     sp_digit* a;
10168     sp_digit* d = NULL;
10169     sp_digit* m;
10170     sp_digit* r;
10171     int err = MP_OKAY;
10172 
10173     (void)pm;
10174     (void)qm;
10175     (void)dpm;
10176     (void)dqm;
10177     (void)qim;
10178 
10179     if (*outLen < 384U) {
10180         err = MP_TO_E;
10181     }
10182     if (err == MP_OKAY) {
10183         if (mp_count_bits(dm) > 3072) {
10184            err = MP_READ_E;
10185         }
10186         if (inLen > 384) {
10187             err = MP_READ_E;
10188         }
10189         if (mp_count_bits(mm) != 3072) {
10190             err = MP_READ_E;
10191         }
10192     }
10193 
10194     if (err == MP_OKAY) {
10195         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
10196                                                               DYNAMIC_TYPE_RSA);
10197         if (d == NULL) {
10198             err = MEMORY_E;
10199         }
10200     }
10201     if (err == MP_OKAY) {
10202         a = d + 96;
10203         m = a + 192;
10204         r = a;
10205 
10206         sp_3072_from_bin(a, 96, in, inLen);
10207         sp_3072_from_mp(d, 96, dm);
10208         sp_3072_from_mp(m, 96, mm);
10209         err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
10210     }
10211     if (err == MP_OKAY) {
10212         sp_3072_to_bin(r, out);
10213         *outLen = 384;
10214     }
10215 
10216     if (d != NULL) {
10217         XMEMSET(d, 0, sizeof(sp_digit) * 96);
10218         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
10219     }
10220 
10221     return err;
10222 #else
10223 #ifndef WOLFSSL_RSA_PUBLIC_ONLY
10224 /* Conditionally add a and b using the mask m.
10225  * m is -1 to add and 0 when not.
10226  *
10227  * r  A single precision number representing conditional add result.
10228  * a  A single precision number to add with.
10229  * b  A single precision number to add.
10230  * m  Mask value to apply.
10231  */
10232 SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
10233         sp_digit m)
10234 {
10235     sp_digit c = 0;
10236 
10237     __asm__ __volatile__ (
10238         "mov    r5, #192\n\t"
10239         "mov    r8, r5\n\t"
10240         "mov    r7, #0\n\t"
10241         "1:\n\t"
10242         "ldr    r6, [%[b], r7]\n\t"
10243         "and    r6, %[m]\n\t"
10244         "mov    r5, #0\n\t"
10245         "sub    r5, #1\n\t"
10246         "add    r5, %[c]\n\t"
10247         "ldr    r5, [%[a], r7]\n\t"
10248         "adc    r5, r6\n\t"
10249         "mov    %[c], #0\n\t"
10250         "adc    %[c], %[c]\n\t"
10251         "str    r5, [%[r], r7]\n\t"
10252         "add    r7, #4\n\t"
10253         "cmp    r7, r8\n\t"
10254         "blt    1b\n\t"
10255         : [c] "+r" (c)
10256         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
10257         : "memory", "r5", "r6", "r7", "r8"
10258     );
10259 
10260     return c;
10261 }
10262 
10263 /* RSA private key operation.
10264  *
10265  * in      Array of bytes representing the number to exponentiate, base.
10266  * inLen   Number of bytes in base.
10267  * dm      Private exponent.
10268  * pm      First prime.
10269  * qm      Second prime.
10270  * dpm     First prime's CRT exponent.
10271  * dqm     Second prime's CRT exponent.
10272  * qim     Inverse of second prime mod p.
10273  * mm      Modulus.
10274  * out     Buffer to hold big-endian bytes of exponentiation result.
10275  *         Must be at least 384 bytes long.
10276  * outLen  Number of bytes in result.
10277  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
10278  * an array is too long and MEMORY_E when dynamic memory allocation fails.
10279  */
10280 int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
10281     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
10282     byte* out, word32* outLen)
10283 {
10284 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
10285     sp_digit a[96 * 2];
10286     sp_digit p[48], q[48], dp[48];
10287     sp_digit tmpa[96], tmpb[96];
10288 #else
10289     sp_digit* t = NULL;
10290     sp_digit* a;
10291     sp_digit* p;
10292     sp_digit* q;
10293     sp_digit* dp;
10294     sp_digit* tmpa;
10295     sp_digit* tmpb;
10296 #endif
10297     sp_digit* r;
10298     sp_digit* qi;
10299     sp_digit* dq;
10300     sp_digit c;
10301     int err = MP_OKAY;
10302 
10303     (void)dm;
10304     (void)mm;
10305 
10306     if (*outLen < 384)
10307         err = MP_TO_E;
10308     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
10309         err = MP_READ_E;
10310 
10311 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
10312     if (err == MP_OKAY) {
10313         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
10314                                                               DYNAMIC_TYPE_RSA);
10315         if (t == NULL)
10316             err = MEMORY_E;
10317     }
10318     if (err == MP_OKAY) {
10319         a = t;
10320         p = a + 96 * 2;
10321         q = p + 48;
10322         qi = dq = dp = q + 48;
10323         tmpa = qi + 48;
10324         tmpb = tmpa + 96;
10325 
10326         r = t + 96;
10327     }
10328 #else
10329 #endif
10330 
10331     if (err == MP_OKAY) {
10332 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
10333         r = a;
10334         qi = dq = dp;
10335 #endif
10336         sp_3072_from_bin(a, 96, in, inLen);
10337         sp_3072_from_mp(p, 48, pm);
10338         sp_3072_from_mp(q, 48, qm);
10339         sp_3072_from_mp(dp, 48, dpm);
10340 
10341         err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
10342     }
10343     if (err == MP_OKAY) {
10344         sp_3072_from_mp(dq, 48, dqm);
10345         err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
10346     }
10347 
10348     if (err == MP_OKAY) {
10349         c = sp_3072_sub_in_place_48(tmpa, tmpb);
10350         c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
10351         sp_3072_cond_add_48(tmpa, tmpa, p, c);
10352 
10353         sp_3072_from_mp(qi, 48, qim);
10354         sp_3072_mul_48(tmpa, tmpa, qi);
10355         err = sp_3072_mod_48(tmpa, tmpa, p);
10356     }
10357 
10358     if (err == MP_OKAY) {
10359         sp_3072_mul_48(tmpa, q, tmpa);
10360         XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
10361         sp_3072_add_96(r, tmpb, tmpa);
10362 
10363         sp_3072_to_bin(r, out);
10364         *outLen = 384;
10365     }
10366 
10367 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
10368     if (t != NULL) {
10369         XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
10370         XFREE(t, NULL, DYNAMIC_TYPE_RSA);
10371     }
10372 #else
10373     XMEMSET(tmpa, 0, sizeof(tmpa));
10374     XMEMSET(tmpb, 0, sizeof(tmpb));
10375     XMEMSET(p,    0, sizeof(p));
10376     XMEMSET(q,    0, sizeof(q));
10377     XMEMSET(dp,   0, sizeof(dp));
10378 #endif
10379 
10380     return err;
10381 }
10382 #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
10383 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
10384 #endif /* WOLFSSL_HAVE_SP_RSA */
10385 #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
10386                                               !defined(WOLFSSL_RSA_PUBLIC_ONLY))
10387 /* Convert an array of sp_digit to an mp_int.
10388  *
10389  * a  A single precision integer.
10390  * r  A multi-precision integer.
10391  */
10392 static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
10393 {
10394     int err;
10395 
10396     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
10397     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
10398 #if DIGIT_BIT == 32
10399         XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
10400         r->used = 96;
10401         mp_clamp(r);
10402 #elif DIGIT_BIT < 32
10403         int i, j = 0, s = 0;
10404 
10405         r->dp[0] = 0;
10406         for (i = 0; i < 96; i++) {
10407             r->dp[j] |= (mp_digit)(a[i] << s);
10408             r->dp[j] &= (1L << DIGIT_BIT) - 1;
10409             s = DIGIT_BIT - s;
10410             r->dp[++j] = (mp_digit)(a[i] >> s);
10411             while (s + DIGIT_BIT <= 32) {
10412                 s += DIGIT_BIT;
10413                 r->dp[j++] &= (1L << DIGIT_BIT) - 1;
10414                 if (s == SP_WORD_SIZE) {
10415                     r->dp[j] = 0;
10416                 }
10417                 else {
10418                     r->dp[j] = (mp_digit)(a[i] >> s);
10419                 }
10420             }
10421             s = 32 - s;
10422         }
10423         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
10424         mp_clamp(r);
10425 #else
10426         int i, j = 0, s = 0;
10427 
10428         r->dp[0] = 0;
10429         for (i = 0; i < 96; i++) {
10430             r->dp[j] |= ((mp_digit)a[i]) << s;
10431             if (s + 32 >= DIGIT_BIT) {
10432     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
10433                 r->dp[j] &= (1L << DIGIT_BIT) - 1;
10434     #endif
10435                 s = DIGIT_BIT - s;
10436                 r->dp[++j] = a[i] >> s;
10437                 s = 32 - s;
10438             }
10439             else {
10440                 s += 32;
10441             }
10442         }
10443         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
10444         mp_clamp(r);
10445 #endif
10446     }
10447 
10448     return err;
10449 }
10450 
10451 /* Perform the modular exponentiation for Diffie-Hellman.
10452  *
10453  * base  Base. MP integer.
10454  * exp   Exponent. MP integer.
10455  * mod   Modulus. MP integer.
10456  * res   Result. MP integer.
10457  * returns 0 on success, MP_READ_E if there are too many bytes in an array
10458  * and MEMORY_E if memory allocation fails.
10459  */
10460 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
10461 {
10462     int err = MP_OKAY;
10463     sp_digit b[192], e[96], m[96];
10464     sp_digit* r = b;
10465     int expBits = mp_count_bits(exp);
10466 
10467     if (mp_count_bits(base) > 3072) {
10468         err = MP_READ_E;
10469     }
10470 
10471     if (err == MP_OKAY) {
10472         if (expBits > 3072) {
10473             err = MP_READ_E;
10474         }
10475     }
10476 
10477     if (err == MP_OKAY) {
10478         if (mp_count_bits(mod) != 3072) {
10479             err = MP_READ_E;
10480         }
10481     }
10482 
10483     if (err == MP_OKAY) {
10484         sp_3072_from_mp(b, 96, base);
10485         sp_3072_from_mp(e, 96, exp);
10486         sp_3072_from_mp(m, 96, mod);
10487 
10488         err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
10489     }
10490 
10491     if (err == MP_OKAY) {
10492         err = sp_3072_to_mp(r, res);
10493     }
10494 
10495     XMEMSET(e, 0, sizeof(e));
10496 
10497     return err;
10498 }
10499 
10500 #ifdef WOLFSSL_HAVE_SP_DH
10501 
10502 #ifdef HAVE_FFDHE_3072
10503 static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
10504 {
10505     __asm__ __volatile__ (
10506         "mov    r6, #31\n\t"
10507         "sub    r6, r6, %[n]\n\t"
10508         "add    %[a], %[a], #255\n\t"
10509         "add    %[r], %[r], #255\n\t"
10510         "add    %[a], %[a], #65\n\t"
10511         "add    %[r], %[r], #65\n\t"
10512         "ldr    r3, [%[a], #60]\n\t"
10513         "lsr    r4, r3, #1\n\t"
10514         "lsl    r3, r3, %[n]\n\t"
10515         "lsr    r4, r4, r6\n\t"
10516         "ldr    r2, [%[a], #56]\n\t"
10517         "str    r4, [%[r], #64]\n\t"
10518         "lsr    r5, r2, #1\n\t"
10519         "lsl    r2, r2, %[n]\n\t"
10520         "lsr    r5, r5, r6\n\t"
10521         "orr    r3, r3, r5\n\t"
10522         "ldr    r4, [%[a], #52]\n\t"
10523         "str    r3, [%[r], #60]\n\t"
10524         "lsr    r5, r4, #1\n\t"
10525         "lsl    r4, r4, %[n]\n\t"
10526         "lsr    r5, r5, r6\n\t"
10527         "orr    r2, r2, r5\n\t"
10528         "ldr    r3, [%[a], #48]\n\t"
10529         "str    r2, [%[r], #56]\n\t"
10530         "lsr    r5, r3, #1\n\t"
10531         "lsl    r3, r3, %[n]\n\t"
10532         "lsr    r5, r5, r6\n\t"
10533         "orr    r4, r4, r5\n\t"
10534         "ldr    r2, [%[a], #44]\n\t"
10535         "str    r4, [%[r], #52]\n\t"
10536         "lsr    r5, r2, #1\n\t"
10537         "lsl    r2, r2, %[n]\n\t"
10538         "lsr    r5, r5, r6\n\t"
10539         "orr    r3, r3, r5\n\t"
10540         "ldr    r4, [%[a], #40]\n\t"
10541         "str    r3, [%[r], #48]\n\t"
10542         "lsr    r5, r4, #1\n\t"
10543         "lsl    r4, r4, %[n]\n\t"
10544         "lsr    r5, r5, r6\n\t"
10545         "orr    r2, r2, r5\n\t"
10546         "ldr    r3, [%[a], #36]\n\t"
10547         "str    r2, [%[r], #44]\n\t"
10548         "lsr    r5, r3, #1\n\t"
10549         "lsl    r3, r3, %[n]\n\t"
10550         "lsr    r5, r5, r6\n\t"
10551         "orr    r4, r4, r5\n\t"
10552         "ldr    r2, [%[a], #32]\n\t"
10553         "str    r4, [%[r], #40]\n\t"
10554         "lsr    r5, r2, #1\n\t"
10555         "lsl    r2, r2, %[n]\n\t"
10556         "lsr    r5, r5, r6\n\t"
10557         "orr    r3, r3, r5\n\t"
10558         "ldr    r4, [%[a], #28]\n\t"
10559         "str    r3, [%[r], #36]\n\t"
10560         "lsr    r5, r4, #1\n\t"
10561         "lsl    r4, r4, %[n]\n\t"
10562         "lsr    r5, r5, r6\n\t"
10563         "orr    r2, r2, r5\n\t"
10564         "ldr    r3, [%[a], #24]\n\t"
10565         "str    r2, [%[r], #32]\n\t"
10566         "lsr    r5, r3, #1\n\t"
10567         "lsl    r3, r3, %[n]\n\t"
10568         "lsr    r5, r5, r6\n\t"
10569         "orr    r4, r4, r5\n\t"
10570         "ldr    r2, [%[a], #20]\n\t"
10571         "str    r4, [%[r], #28]\n\t"
10572         "lsr    r5, r2, #1\n\t"
10573         "lsl    r2, r2, %[n]\n\t"
10574         "lsr    r5, r5, r6\n\t"
10575         "orr    r3, r3, r5\n\t"
10576         "ldr    r4, [%[a], #16]\n\t"
10577         "str    r3, [%[r], #24]\n\t"
10578         "lsr    r5, r4, #1\n\t"
10579         "lsl    r4, r4, %[n]\n\t"
10580         "lsr    r5, r5, r6\n\t"
10581         "orr    r2, r2, r5\n\t"
10582         "ldr    r3, [%[a], #12]\n\t"
10583         "str    r2, [%[r], #20]\n\t"
10584         "lsr    r5, r3, #1\n\t"
10585         "lsl    r3, r3, %[n]\n\t"
10586         "lsr    r5, r5, r6\n\t"
10587         "orr    r4, r4, r5\n\t"
10588         "ldr    r2, [%[a], #8]\n\t"
10589         "str    r4, [%[r], #16]\n\t"
10590         "lsr    r5, r2, #1\n\t"
10591         "lsl    r2, r2, %[n]\n\t"
10592         "lsr    r5, r5, r6\n\t"
10593         "orr    r3, r3, r5\n\t"
10594         "ldr    r4, [%[a], #4]\n\t"
10595         "str    r3, [%[r], #12]\n\t"
10596         "lsr    r5, r4, #1\n\t"
10597         "lsl    r4, r4, %[n]\n\t"
10598         "lsr    r5, r5, r6\n\t"
10599         "orr    r2, r2, r5\n\t"
10600         "ldr    r3, [%[a], #0]\n\t"
10601         "str    r2, [%[r], #8]\n\t"
10602         "lsr    r5, r3, #1\n\t"
10603         "lsl    r3, r3, %[n]\n\t"
10604         "lsr    r5, r5, r6\n\t"
10605         "orr    r4, r4, r5\n\t"
10606         "sub    %[a], %[a], #64\n\t"
10607         "sub    %[r], %[r], #64\n\t"
10608         "ldr    r2, [%[a], #60]\n\t"
10609         "str    r4, [%[r], #68]\n\t"
10610         "lsr    r5, r2, #1\n\t"
10611         "lsl    r2, r2, %[n]\n\t"
10612         "lsr    r5, r5, r6\n\t"
10613         "orr    r3, r3, r5\n\t"
10614         "ldr    r4, [%[a], #56]\n\t"
10615         "str    r3, [%[r], #64]\n\t"
10616         "lsr    r5, r4, #1\n\t"
10617         "lsl    r4, r4, %[n]\n\t"
10618         "lsr    r5, r5, r6\n\t"
10619         "orr    r2, r2, r5\n\t"
10620         "ldr    r3, [%[a], #52]\n\t"
10621         "str    r2, [%[r], #60]\n\t"
10622         "lsr    r5, r3, #1\n\t"
10623         "lsl    r3, r3, %[n]\n\t"
10624         "lsr    r5, r5, r6\n\t"
10625         "orr    r4, r4, r5\n\t"
10626         "ldr    r2, [%[a], #48]\n\t"
10627         "str    r4, [%[r], #56]\n\t"
10628         "lsr    r5, r2, #1\n\t"
10629         "lsl    r2, r2, %[n]\n\t"
10630         "lsr    r5, r5, r6\n\t"
10631         "orr    r3, r3, r5\n\t"
10632         "ldr    r4, [%[a], #44]\n\t"
10633         "str    r3, [%[r], #52]\n\t"
10634         "lsr    r5, r4, #1\n\t"
10635         "lsl    r4, r4, %[n]\n\t"
10636         "lsr    r5, r5, r6\n\t"
10637         "orr    r2, r2, r5\n\t"
10638         "ldr    r3, [%[a], #40]\n\t"
10639         "str    r2, [%[r], #48]\n\t"
10640         "lsr    r5, r3, #1\n\t"
10641         "lsl    r3, r3, %[n]\n\t"
10642         "lsr    r5, r5, r6\n\t"
10643         "orr    r4, r4, r5\n\t"
10644         "ldr    r2, [%[a], #36]\n\t"
10645         "str    r4, [%[r], #44]\n\t"
10646         "lsr    r5, r2, #1\n\t"
10647         "lsl    r2, r2, %[n]\n\t"
10648         "lsr    r5, r5, r6\n\t"
10649         "orr    r3, r3, r5\n\t"
10650         "ldr    r4, [%[a], #32]\n\t"
10651         "str    r3, [%[r], #40]\n\t"
10652         "lsr    r5, r4, #1\n\t"
10653         "lsl    r4, r4, %[n]\n\t"
10654         "lsr    r5, r5, r6\n\t"
10655         "orr    r2, r2, r5\n\t"
10656         "ldr    r3, [%[a], #28]\n\t"
10657         "str    r2, [%[r], #36]\n\t"
10658         "lsr    r5, r3, #1\n\t"
10659         "lsl    r3, r3, %[n]\n\t"
10660         "lsr    r5, r5, r6\n\t"
10661         "orr    r4, r4, r5\n\t"
10662         "ldr    r2, [%[a], #24]\n\t"
10663         "str    r4, [%[r], #32]\n\t"
10664         "lsr    r5, r2, #1\n\t"
10665         "lsl    r2, r2, %[n]\n\t"
10666         "lsr    r5, r5, r6\n\t"
10667         "orr    r3, r3, r5\n\t"
10668         "ldr    r4, [%[a], #20]\n\t"
10669         "str    r3, [%[r], #28]\n\t"
10670         "lsr    r5, r4, #1\n\t"
10671         "lsl    r4, r4, %[n]\n\t"
10672         "lsr    r5, r5, r6\n\t"
10673         "orr    r2, r2, r5\n\t"
10674         "ldr    r3, [%[a], #16]\n\t"
10675         "str    r2, [%[r], #24]\n\t"
10676         "lsr    r5, r3, #1\n\t"
10677         "lsl    r3, r3, %[n]\n\t"
10678         "lsr    r5, r5, r6\n\t"
10679         "orr    r4, r4, r5\n\t"
10680         "ldr    r2, [%[a], #12]\n\t"
10681         "str    r4, [%[r], #20]\n\t"
10682         "lsr    r5, r2, #1\n\t"
10683         "lsl    r2, r2, %[n]\n\t"
10684         "lsr    r5, r5, r6\n\t"
10685         "orr    r3, r3, r5\n\t"
10686         "ldr    r4, [%[a], #8]\n\t"
10687         "str    r3, [%[r], #16]\n\t"
10688         "lsr    r5, r4, #1\n\t"
10689         "lsl    r4, r4, %[n]\n\t"
10690         "lsr    r5, r5, r6\n\t"
10691         "orr    r2, r2, r5\n\t"
10692         "ldr    r3, [%[a], #4]\n\t"
10693         "str    r2, [%[r], #12]\n\t"
10694         "lsr    r5, r3, #1\n\t"
10695         "lsl    r3, r3, %[n]\n\t"
10696         "lsr    r5, r5, r6\n\t"
10697         "orr    r4, r4, r5\n\t"
10698         "ldr    r2, [%[a], #0]\n\t"
10699         "str    r4, [%[r], #8]\n\t"
10700         "lsr    r5, r2, #1\n\t"
10701         "lsl    r2, r2, %[n]\n\t"
10702         "lsr    r5, r5, r6\n\t"
10703         "orr    r3, r3, r5\n\t"
10704         "sub    %[a], %[a], #64\n\t"
10705         "sub    %[r], %[r], #64\n\t"
10706         "ldr    r4, [%[a], #60]\n\t"
10707         "str    r3, [%[r], #68]\n\t"
10708         "lsr    r5, r4, #1\n\t"
10709         "lsl    r4, r4, %[n]\n\t"
10710         "lsr    r5, r5, r6\n\t"
10711         "orr    r2, r2, r5\n\t"
10712         "ldr    r3, [%[a], #56]\n\t"
10713         "str    r2, [%[r], #64]\n\t"
10714         "lsr    r5, r3, #1\n\t"
10715         "lsl    r3, r3, %[n]\n\t"
10716         "lsr    r5, r5, r6\n\t"
10717         "orr    r4, r4, r5\n\t"
10718         "ldr    r2, [%[a], #52]\n\t"
10719         "str    r4, [%[r], #60]\n\t"
10720         "lsr    r5, r2, #1\n\t"
10721         "lsl    r2, r2, %[n]\n\t"
10722         "lsr    r5, r5, r6\n\t"
10723         "orr    r3, r3, r5\n\t"
10724         "ldr    r4, [%[a], #48]\n\t"
10725         "str    r3, [%[r], #56]\n\t"
10726         "lsr    r5, r4, #1\n\t"
10727         "lsl    r4, r4, %[n]\n\t"
10728         "lsr    r5, r5, r6\n\t"
10729         "orr    r2, r2, r5\n\t"
10730         "ldr    r3, [%[a], #44]\n\t"
10731         "str    r2, [%[r], #52]\n\t"
10732         "lsr    r5, r3, #1\n\t"
10733         "lsl    r3, r3, %[n]\n\t"
10734         "lsr    r5, r5, r6\n\t"
10735         "orr    r4, r4, r5\n\t"
10736         "ldr    r2, [%[a], #40]\n\t"
10737         "str    r4, [%[r], #48]\n\t"
10738         "lsr    r5, r2, #1\n\t"
10739         "lsl    r2, r2, %[n]\n\t"
10740         "lsr    r5, r5, r6\n\t"
10741         "orr    r3, r3, r5\n\t"
10742         "ldr    r4, [%[a], #36]\n\t"
10743         "str    r3, [%[r], #44]\n\t"
10744         "lsr    r5, r4, #1\n\t"
10745         "lsl    r4, r4, %[n]\n\t"
10746         "lsr    r5, r5, r6\n\t"
10747         "orr    r2, r2, r5\n\t"
10748         "ldr    r3, [%[a], #32]\n\t"
10749         "str    r2, [%[r], #40]\n\t"
10750         "lsr    r5, r3, #1\n\t"
10751         "lsl    r3, r3, %[n]\n\t"
10752         "lsr    r5, r5, r6\n\t"
10753         "orr    r4, r4, r5\n\t"
10754         "ldr    r2, [%[a], #28]\n\t"
10755         "str    r4, [%[r], #36]\n\t"
10756         "lsr    r5, r2, #1\n\t"
10757         "lsl    r2, r2, %[n]\n\t"
10758         "lsr    r5, r5, r6\n\t"
10759         "orr    r3, r3, r5\n\t"
10760         "ldr    r4, [%[a], #24]\n\t"
10761         "str    r3, [%[r], #32]\n\t"
10762         "lsr    r5, r4, #1\n\t"
10763         "lsl    r4, r4, %[n]\n\t"
10764         "lsr    r5, r5, r6\n\t"
10765         "orr    r2, r2, r5\n\t"
10766         "ldr    r3, [%[a], #20]\n\t"
10767         "str    r2, [%[r], #28]\n\t"
10768         "lsr    r5, r3, #1\n\t"
10769         "lsl    r3, r3, %[n]\n\t"
10770         "lsr    r5, r5, r6\n\t"
10771         "orr    r4, r4, r5\n\t"
10772         "ldr    r2, [%[a], #16]\n\t"
10773         "str    r4, [%[r], #24]\n\t"
10774         "lsr    r5, r2, #1\n\t"
10775         "lsl    r2, r2, %[n]\n\t"
10776         "lsr    r5, r5, r6\n\t"
10777         "orr    r3, r3, r5\n\t"
10778         "ldr    r4, [%[a], #12]\n\t"
10779         "str    r3, [%[r], #20]\n\t"
10780         "lsr    r5, r4, #1\n\t"
10781         "lsl    r4, r4, %[n]\n\t"
10782         "lsr    r5, r5, r6\n\t"
10783         "orr    r2, r2, r5\n\t"
10784         "ldr    r3, [%[a], #8]\n\t"
10785         "str    r2, [%[r], #16]\n\t"
10786         "lsr    r5, r3, #1\n\t"
10787         "lsl    r3, r3, %[n]\n\t"
10788         "lsr    r5, r5, r6\n\t"
10789         "orr    r4, r4, r5\n\t"
10790         "ldr    r2, [%[a], #4]\n\t"
10791         "str    r4, [%[r], #12]\n\t"
10792         "lsr    r5, r2, #1\n\t"
10793         "lsl    r2, r2, %[n]\n\t"
10794         "lsr    r5, r5, r6\n\t"
10795         "orr    r3, r3, r5\n\t"
10796         "ldr    r4, [%[a], #0]\n\t"
10797         "str    r3, [%[r], #8]\n\t"
10798         "lsr    r5, r4, #1\n\t"
10799         "lsl    r4, r4, %[n]\n\t"
10800         "lsr    r5, r5, r6\n\t"
10801         "orr    r2, r2, r5\n\t"
10802         "sub    %[a], %[a], #64\n\t"
10803         "sub    %[r], %[r], #64\n\t"
10804         "ldr    r3, [%[a], #60]\n\t"
10805         "str    r2, [%[r], #68]\n\t"
10806         "lsr    r5, r3, #1\n\t"
10807         "lsl    r3, r3, %[n]\n\t"
10808         "lsr    r5, r5, r6\n\t"
10809         "orr    r4, r4, r5\n\t"
10810         "ldr    r2, [%[a], #56]\n\t"
10811         "str    r4, [%[r], #64]\n\t"
10812         "lsr    r5, r2, #1\n\t"
10813         "lsl    r2, r2, %[n]\n\t"
10814         "lsr    r5, r5, r6\n\t"
10815         "orr    r3, r3, r5\n\t"
10816         "ldr    r4, [%[a], #52]\n\t"
10817         "str    r3, [%[r], #60]\n\t"
10818         "lsr    r5, r4, #1\n\t"
10819         "lsl    r4, r4, %[n]\n\t"
10820         "lsr    r5, r5, r6\n\t"
10821         "orr    r2, r2, r5\n\t"
10822         "ldr    r3, [%[a], #48]\n\t"
10823         "str    r2, [%[r], #56]\n\t"
10824         "lsr    r5, r3, #1\n\t"
10825         "lsl    r3, r3, %[n]\n\t"
10826         "lsr    r5, r5, r6\n\t"
10827         "orr    r4, r4, r5\n\t"
10828         "ldr    r2, [%[a], #44]\n\t"
10829         "str    r4, [%[r], #52]\n\t"
10830         "lsr    r5, r2, #1\n\t"
10831         "lsl    r2, r2, %[n]\n\t"
10832         "lsr    r5, r5, r6\n\t"
10833         "orr    r3, r3, r5\n\t"
10834         "ldr    r4, [%[a], #40]\n\t"
10835         "str    r3, [%[r], #48]\n\t"
10836         "lsr    r5, r4, #1\n\t"
10837         "lsl    r4, r4, %[n]\n\t"
10838         "lsr    r5, r5, r6\n\t"
10839         "orr    r2, r2, r5\n\t"
10840         "ldr    r3, [%[a], #36]\n\t"
10841         "str    r2, [%[r], #44]\n\t"
10842         "lsr    r5, r3, #1\n\t"
10843         "lsl    r3, r3, %[n]\n\t"
10844         "lsr    r5, r5, r6\n\t"
10845         "orr    r4, r4, r5\n\t"
10846         "ldr    r2, [%[a], #32]\n\t"
10847         "str    r4, [%[r], #40]\n\t"
10848         "lsr    r5, r2, #1\n\t"
10849         "lsl    r2, r2, %[n]\n\t"
10850         "lsr    r5, r5, r6\n\t"
10851         "orr    r3, r3, r5\n\t"
10852         "ldr    r4, [%[a], #28]\n\t"
10853         "str    r3, [%[r], #36]\n\t"
10854         "lsr    r5, r4, #1\n\t"
10855         "lsl    r4, r4, %[n]\n\t"
10856         "lsr    r5, r5, r6\n\t"
10857         "orr    r2, r2, r5\n\t"
10858         "ldr    r3, [%[a], #24]\n\t"
10859         "str    r2, [%[r], #32]\n\t"
10860         "lsr    r5, r3, #1\n\t"
10861         "lsl    r3, r3, %[n]\n\t"
10862         "lsr    r5, r5, r6\n\t"
10863         "orr    r4, r4, r5\n\t"
10864         "ldr    r2, [%[a], #20]\n\t"
10865         "str    r4, [%[r], #28]\n\t"
10866         "lsr    r5, r2, #1\n\t"
10867         "lsl    r2, r2, %[n]\n\t"
10868         "lsr    r5, r5, r6\n\t"
10869         "orr    r3, r3, r5\n\t"
10870         "ldr    r4, [%[a], #16]\n\t"
10871         "str    r3, [%[r], #24]\n\t"
10872         "lsr    r5, r4, #1\n\t"
10873         "lsl    r4, r4, %[n]\n\t"
10874         "lsr    r5, r5, r6\n\t"
10875         "orr    r2, r2, r5\n\t"
10876         "ldr    r3, [%[a], #12]\n\t"
10877         "str    r2, [%[r], #20]\n\t"
10878         "lsr    r5, r3, #1\n\t"
10879         "lsl    r3, r3, %[n]\n\t"
10880         "lsr    r5, r5, r6\n\t"
10881         "orr    r4, r4, r5\n\t"
10882         "ldr    r2, [%[a], #8]\n\t"
10883         "str    r4, [%[r], #16]\n\t"
10884         "lsr    r5, r2, #1\n\t"
10885         "lsl    r2, r2, %[n]\n\t"
10886         "lsr    r5, r5, r6\n\t"
10887         "orr    r3, r3, r5\n\t"
10888         "ldr    r4, [%[a], #4]\n\t"
10889         "str    r3, [%[r], #12]\n\t"
10890         "lsr    r5, r4, #1\n\t"
10891         "lsl    r4, r4, %[n]\n\t"
10892         "lsr    r5, r5, r6\n\t"
10893         "orr    r2, r2, r5\n\t"
10894         "ldr    r3, [%[a], #0]\n\t"
10895         "str    r2, [%[r], #8]\n\t"
10896         "lsr    r5, r3, #1\n\t"
10897         "lsl    r3, r3, %[n]\n\t"
10898         "lsr    r5, r5, r6\n\t"
10899         "orr    r4, r4, r5\n\t"
10900         "sub    %[a], %[a], #64\n\t"
10901         "sub    %[r], %[r], #64\n\t"
10902         "ldr    r2, [%[a], #60]\n\t"
10903         "str    r4, [%[r], #68]\n\t"
10904         "lsr    r5, r2, #1\n\t"
10905         "lsl    r2, r2, %[n]\n\t"
10906         "lsr    r5, r5, r6\n\t"
10907         "orr    r3, r3, r5\n\t"
10908         "ldr    r4, [%[a], #56]\n\t"
10909         "str    r3, [%[r], #64]\n\t"
10910         "lsr    r5, r4, #1\n\t"
10911         "lsl    r4, r4, %[n]\n\t"
10912         "lsr    r5, r5, r6\n\t"
10913         "orr    r2, r2, r5\n\t"
10914         "ldr    r3, [%[a], #52]\n\t"
10915         "str    r2, [%[r], #60]\n\t"
10916         "lsr    r5, r3, #1\n\t"
10917         "lsl    r3, r3, %[n]\n\t"
10918         "lsr    r5, r5, r6\n\t"
10919         "orr    r4, r4, r5\n\t"
10920         "ldr    r2, [%[a], #48]\n\t"
10921         "str    r4, [%[r], #56]\n\t"
10922         "lsr    r5, r2, #1\n\t"
10923         "lsl    r2, r2, %[n]\n\t"
10924         "lsr    r5, r5, r6\n\t"
10925         "orr    r3, r3, r5\n\t"
10926         "ldr    r4, [%[a], #44]\n\t"
10927         "str    r3, [%[r], #52]\n\t"
10928         "lsr    r5, r4, #1\n\t"
10929         "lsl    r4, r4, %[n]\n\t"
10930         "lsr    r5, r5, r6\n\t"
10931         "orr    r2, r2, r5\n\t"
10932         "ldr    r3, [%[a], #40]\n\t"
10933         "str    r2, [%[r], #48]\n\t"
10934         "lsr    r5, r3, #1\n\t"
10935         "lsl    r3, r3, %[n]\n\t"
10936         "lsr    r5, r5, r6\n\t"
10937         "orr    r4, r4, r5\n\t"
10938         "ldr    r2, [%[a], #36]\n\t"
10939         "str    r4, [%[r], #44]\n\t"
10940         "lsr    r5, r2, #1\n\t"
10941         "lsl    r2, r2, %[n]\n\t"
10942         "lsr    r5, r5, r6\n\t"
10943         "orr    r3, r3, r5\n\t"
10944         "ldr    r4, [%[a], #32]\n\t"
10945         "str    r3, [%[r], #40]\n\t"
10946         "lsr    r5, r4, #1\n\t"
10947         "lsl    r4, r4, %[n]\n\t"
10948         "lsr    r5, r5, r6\n\t"
10949         "orr    r2, r2, r5\n\t"
10950         "ldr    r3, [%[a], #28]\n\t"
10951         "str    r2, [%[r], #36]\n\t"
10952         "lsr    r5, r3, #1\n\t"
10953         "lsl    r3, r3, %[n]\n\t"
10954         "lsr    r5, r5, r6\n\t"
10955         "orr    r4, r4, r5\n\t"
10956         "ldr    r2, [%[a], #24]\n\t"
10957         "str    r4, [%[r], #32]\n\t"
10958         "lsr    r5, r2, #1\n\t"
10959         "lsl    r2, r2, %[n]\n\t"
10960         "lsr    r5, r5, r6\n\t"
10961         "orr    r3, r3, r5\n\t"
10962         "ldr    r4, [%[a], #20]\n\t"
10963         "str    r3, [%[r], #28]\n\t"
10964         "lsr    r5, r4, #1\n\t"
10965         "lsl    r4, r4, %[n]\n\t"
10966         "lsr    r5, r5, r6\n\t"
10967         "orr    r2, r2, r5\n\t"
10968         "ldr    r3, [%[a], #16]\n\t"
10969         "str    r2, [%[r], #24]\n\t"
10970         "lsr    r5, r3, #1\n\t"
10971         "lsl    r3, r3, %[n]\n\t"
10972         "lsr    r5, r5, r6\n\t"
10973         "orr    r4, r4, r5\n\t"
10974         "ldr    r2, [%[a], #12]\n\t"
10975         "str    r4, [%[r], #20]\n\t"
10976         "lsr    r5, r2, #1\n\t"
10977         "lsl    r2, r2, %[n]\n\t"
10978         "lsr    r5, r5, r6\n\t"
10979         "orr    r3, r3, r5\n\t"
10980         "ldr    r4, [%[a], #8]\n\t"
10981         "str    r3, [%[r], #16]\n\t"
10982         "lsr    r5, r4, #1\n\t"
10983         "lsl    r4, r4, %[n]\n\t"
10984         "lsr    r5, r5, r6\n\t"
10985         "orr    r2, r2, r5\n\t"
10986         "ldr    r3, [%[a], #4]\n\t"
10987         "str    r2, [%[r], #12]\n\t"
10988         "lsr    r5, r3, #1\n\t"
10989         "lsl    r3, r3, %[n]\n\t"
10990         "lsr    r5, r5, r6\n\t"
10991         "orr    r4, r4, r5\n\t"
10992         "ldr    r2, [%[a], #0]\n\t"
10993         "str    r4, [%[r], #8]\n\t"
10994         "lsr    r5, r2, #1\n\t"
10995         "lsl    r2, r2, %[n]\n\t"
10996         "lsr    r5, r5, r6\n\t"
10997         "orr    r3, r3, r5\n\t"
10998         "sub    %[a], %[a], #64\n\t"
10999         "sub    %[r], %[r], #64\n\t"
11000         "ldr    r4, [%[a], #60]\n\t"
11001         "str    r3, [%[r], #68]\n\t"
11002         "lsr    r5, r4, #1\n\t"
11003         "lsl    r4, r4, %[n]\n\t"
11004         "lsr    r5, r5, r6\n\t"
11005         "orr    r2, r2, r5\n\t"
11006         "ldr    r3, [%[a], #56]\n\t"
11007         "str    r2, [%[r], #64]\n\t"
11008         "lsr    r5, r3, #1\n\t"
11009         "lsl    r3, r3, %[n]\n\t"
11010         "lsr    r5, r5, r6\n\t"
11011         "orr    r4, r4, r5\n\t"
11012         "ldr    r2, [%[a], #52]\n\t"
11013         "str    r4, [%[r], #60]\n\t"
11014         "lsr    r5, r2, #1\n\t"
11015         "lsl    r2, r2, %[n]\n\t"
11016         "lsr    r5, r5, r6\n\t"
11017         "orr    r3, r3, r5\n\t"
11018         "ldr    r4, [%[a], #48]\n\t"
11019         "str    r3, [%[r], #56]\n\t"
11020         "lsr    r5, r4, #1\n\t"
11021         "lsl    r4, r4, %[n]\n\t"
11022         "lsr    r5, r5, r6\n\t"
11023         "orr    r2, r2, r5\n\t"
11024         "ldr    r3, [%[a], #44]\n\t"
11025         "str    r2, [%[r], #52]\n\t"
11026         "lsr    r5, r3, #1\n\t"
11027         "lsl    r3, r3, %[n]\n\t"
11028         "lsr    r5, r5, r6\n\t"
11029         "orr    r4, r4, r5\n\t"
11030         "ldr    r2, [%[a], #40]\n\t"
11031         "str    r4, [%[r], #48]\n\t"
11032         "lsr    r5, r2, #1\n\t"
11033         "lsl    r2, r2, %[n]\n\t"
11034         "lsr    r5, r5, r6\n\t"
11035         "orr    r3, r3, r5\n\t"
11036         "ldr    r4, [%[a], #36]\n\t"
11037         "str    r3, [%[r], #44]\n\t"
11038         "lsr    r5, r4, #1\n\t"
11039         "lsl    r4, r4, %[n]\n\t"
11040         "lsr    r5, r5, r6\n\t"
11041         "orr    r2, r2, r5\n\t"
11042         "ldr    r3, [%[a], #32]\n\t"
11043         "str    r2, [%[r], #40]\n\t"
11044         "lsr    r5, r3, #1\n\t"
11045         "lsl    r3, r3, %[n]\n\t"
11046         "lsr    r5, r5, r6\n\t"
11047         "orr    r4, r4, r5\n\t"
11048         "ldr    r2, [%[a], #28]\n\t"
11049         "str    r4, [%[r], #36]\n\t"
11050         "lsr    r5, r2, #1\n\t"
11051         "lsl    r2, r2, %[n]\n\t"
11052         "lsr    r5, r5, r6\n\t"
11053         "orr    r3, r3, r5\n\t"
11054         "ldr    r4, [%[a], #24]\n\t"
11055         "str    r3, [%[r], #32]\n\t"
11056         "lsr    r5, r4, #1\n\t"
11057         "lsl    r4, r4, %[n]\n\t"
11058         "lsr    r5, r5, r6\n\t"
11059         "orr    r2, r2, r5\n\t"
11060         "ldr    r3, [%[a], #20]\n\t"
11061         "str    r2, [%[r], #28]\n\t"
11062         "lsr    r5, r3, #1\n\t"
11063         "lsl    r3, r3, %[n]\n\t"
11064         "lsr    r5, r5, r6\n\t"
11065         "orr    r4, r4, r5\n\t"
11066         "ldr    r2, [%[a], #16]\n\t"
11067         "str    r4, [%[r], #24]\n\t"
11068         "lsr    r5, r2, #1\n\t"
11069         "lsl    r2, r2, %[n]\n\t"
11070         "lsr    r5, r5, r6\n\t"
11071         "orr    r3, r3, r5\n\t"
11072         "ldr    r4, [%[a], #12]\n\t"
11073         "str    r3, [%[r], #20]\n\t"
11074         "lsr    r5, r4, #1\n\t"
11075         "lsl    r4, r4, %[n]\n\t"
11076         "lsr    r5, r5, r6\n\t"
11077         "orr    r2, r2, r5\n\t"
11078         "ldr    r3, [%[a], #8]\n\t"
11079         "str    r2, [%[r], #16]\n\t"
11080         "lsr    r5, r3, #1\n\t"
11081         "lsl    r3, r3, %[n]\n\t"
11082         "lsr    r5, r5, r6\n\t"
11083         "orr    r4, r4, r5\n\t"
11084         "ldr    r2, [%[a], #4]\n\t"
11085         "str    r4, [%[r], #12]\n\t"
11086         "lsr    r5, r2, #1\n\t"
11087         "lsl    r2, r2, %[n]\n\t"
11088         "lsr    r5, r5, r6\n\t"
11089         "orr    r3, r3, r5\n\t"
11090         "ldr    r4, [%[a], #0]\n\t"
11091         "str    r3, [%[r], #8]\n\t"
11092         "lsr    r5, r4, #1\n\t"
11093         "lsl    r4, r4, %[n]\n\t"
11094         "lsr    r5, r5, r6\n\t"
11095         "orr    r2, r2, r5\n\t"
11096         "str    r4, [%[r]]\n\t"
11097         "str    r2, [%[r], #4]\n\t"
11098         :
11099         : [r] "r" (r), [a] "r" (a), [n] "r" (n)
11100         : "memory", "r2", "r3", "r4", "r5", "r6"
11101     );
11102 }
11103 
11104 /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
11105  *
11106  * r     A single precision number that is the result of the operation.
11107  * e     A single precision number that is the exponent.
11108  * bits  The number of bits in the exponent.
11109  * m     A single precision number that is the modulus.
11110  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
11111  */
11112 static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
11113         const sp_digit* m)
11114 {
11115 #ifndef WOLFSSL_SMALL_STACK
11116     sp_digit nd[192];
11117     sp_digit td[97];
11118 #else
11119     sp_digit* td;
11120 #endif
11121     sp_digit* norm;
11122     sp_digit* tmp;
11123     sp_digit mp = 1;
11124     sp_digit n, o;
11125     sp_digit mask;
11126     int i;
11127     int c, y;
11128     int err = MP_OKAY;
11129 
11130 #ifdef WOLFSSL_SMALL_STACK
11131     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
11132                             DYNAMIC_TYPE_TMP_BUFFER);
11133     if (td == NULL) {
11134         err = MEMORY_E;
11135     }
11136 #endif
11137 
11138     if (err == MP_OKAY) {
11139 #ifdef WOLFSSL_SMALL_STACK
11140         norm = td;
11141         tmp  = td + 192;
11142 #else
11143         norm = nd;
11144         tmp  = td;
11145 #endif
11146 
11147         sp_3072_mont_setup(m, &mp);
11148         sp_3072_mont_norm_96(norm, m);
11149 
11150         i = (bits - 1) / 32;
11151         n = e[i--];
11152         c = bits & 31;
11153         if (c == 0) {
11154             c = 32;
11155         }
11156         c -= bits % 5;
11157         if (c == 32) {
11158             c = 27;
11159         }
11160         y = (int)(n >> c);
11161         n <<= 32 - c;
11162         sp_3072_lshift_96(r, norm, y);
11163         for (; i>=0 || c>=5; ) {
11164             if (c == 0) {
11165                 n = e[i--];
11166                 y = n >> 27;
11167                 n <<= 5;
11168                 c = 27;
11169             }
11170             else if (c < 5) {
11171                 y = n >> 27;
11172                 n = e[i--];
11173                 c = 5 - c;
11174                 y |= n >> (32 - c);
11175                 n <<= c;
11176                 c = 32 - c;
11177             }
11178             else {
11179                 y = (n >> 27) & 0x1f;
11180                 n <<= 5;
11181                 c -= 5;
11182             }
11183 
11184             sp_3072_mont_sqr_96(r, r, m, mp);
11185             sp_3072_mont_sqr_96(r, r, m, mp);
11186             sp_3072_mont_sqr_96(r, r, m, mp);
11187             sp_3072_mont_sqr_96(r, r, m, mp);
11188             sp_3072_mont_sqr_96(r, r, m, mp);
11189 
11190             sp_3072_lshift_96(r, r, y);
11191             sp_3072_mul_d_96(tmp, norm, r[96]);
11192             r[96] = 0;
11193             o = sp_3072_add_96(r, r, tmp);
11194             sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
11195         }
11196 
11197         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
11198         sp_3072_mont_reduce_96(r, m, mp);
11199 
11200         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
11201         sp_3072_cond_sub_96(r, r, m, mask);
11202     }
11203 
11204 #ifdef WOLFSSL_SMALL_STACK
11205     if (td != NULL) {
11206         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
11207     }
11208 #endif
11209 
11210     return err;
11211 }
11212 #endif /* HAVE_FFDHE_3072 */
11213 
11214 /* Perform the modular exponentiation for Diffie-Hellman.
11215  *
11216  * base     Base.
11217  * exp      Array of bytes that is the exponent.
11218  * expLen   Length of data, in bytes, in exponent.
11219  * mod      Modulus.
11220  * out      Buffer to hold big-endian bytes of exponentiation result.
11221  *          Must be at least 384 bytes long.
11222  * outLen   Length, in bytes, of exponentiation result.
11223  * returns 0 on success, MP_READ_E if there are too many bytes in an array
11224  * and MEMORY_E if memory allocation fails.
11225  */
11226 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
11227     mp_int* mod, byte* out, word32* outLen)
11228 {
11229     int err = MP_OKAY;
11230     sp_digit b[192], e[96], m[96];
11231     sp_digit* r = b;
11232     word32 i;
11233 
11234     if (mp_count_bits(base) > 3072) {
11235         err = MP_READ_E;
11236     }
11237 
11238     if (err == MP_OKAY) {
11239         if (expLen > 384) {
11240             err = MP_READ_E;
11241         }
11242     }
11243 
11244     if (err == MP_OKAY) {
11245         if (mp_count_bits(mod) != 3072) {
11246             err = MP_READ_E;
11247         }
11248     }
11249 
11250     if (err == MP_OKAY) {
11251         sp_3072_from_mp(b, 96, base);
11252         sp_3072_from_bin(e, 96, exp, expLen);
11253         sp_3072_from_mp(m, 96, mod);
11254 
11255     #ifdef HAVE_FFDHE_3072
11256         if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
11257             err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
11258         else
11259     #endif
11260             err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
11261 
11262     }
11263 
11264     if (err == MP_OKAY) {
11265         sp_3072_to_bin(r, out);
11266         *outLen = 384;
11267         for (i=0; i<384 && out[i] == 0; i++) {
11268         }
11269         *outLen -= i;
11270         XMEMMOVE(out, out + i, *outLen);
11271 
11272     }
11273 
11274     XMEMSET(e, 0, sizeof(e));
11275 
11276     return err;
11277 }
11278 #endif /* WOLFSSL_HAVE_SP_DH */
11279 
11280 /* Perform the modular exponentiation for Diffie-Hellman.
11281  *
11282  * base  Base. MP integer.
11283  * exp   Exponent. MP integer.
11284  * mod   Modulus. MP integer.
11285  * res   Result. MP integer.
11286  * returns 0 on success, MP_READ_E if there are too many bytes in an array
11287  * and MEMORY_E if memory allocation fails.
11288  */
11289 int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
11290 {
11291     int err = MP_OKAY;
11292     sp_digit b[96], e[48], m[48];
11293     sp_digit* r = b;
11294     int expBits = mp_count_bits(exp);
11295 
11296     if (mp_count_bits(base) > 1536) {
11297         err = MP_READ_E;
11298     }
11299 
11300     if (err == MP_OKAY) {
11301         if (expBits > 1536) {
11302             err = MP_READ_E;
11303         }
11304     }
11305 
11306     if (err == MP_OKAY) {
11307         if (mp_count_bits(mod) != 1536) {
11308             err = MP_READ_E;
11309         }
11310     }
11311 
11312     if (err == MP_OKAY) {
11313         sp_3072_from_mp(b, 48, base);
11314         sp_3072_from_mp(e, 48, exp);
11315         sp_3072_from_mp(m, 48, mod);
11316 
11317         err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
11318     }
11319 
11320     if (err == MP_OKAY) {
11321         XMEMSET(r + 48, 0, sizeof(*r) * 48U);
11322         err = sp_3072_to_mp(r, res);
11323         res->used = mod->used;
11324         mp_clamp(res);
11325     }
11326 
11327     XMEMSET(e, 0, sizeof(e));
11328 
11329     return err;
11330 }
11331 
11332 #endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
11333 
11334 #endif /* !WOLFSSL_SP_NO_3072 */
11335 
11336 #ifdef WOLFSSL_SP_4096
11337 /* Read big endian unsigned byte array into r.
11338  *
11339  * r  A single precision integer.
11340  * size  Maximum number of bytes to convert
11341  * a  Byte array.
11342  * n  Number of bytes in array to read.
11343  */
11344 static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
11345 {
11346     int i, j = 0;
11347     word32 s = 0;
11348 
11349     r[0] = 0;
11350     for (i = n-1; i >= 0; i--) {
11351         r[j] |= (((sp_digit)a[i]) << s);
11352         if (s >= 24U) {
11353             r[j] &= 0xffffffff;
11354             s = 32U - s;
11355             if (j + 1 >= size) {
11356                 break;
11357             }
11358             r[++j] = (sp_digit)a[i] >> s;
11359             s = 8U - s;
11360         }
11361         else {
11362             s += 8U;
11363         }
11364     }
11365 
11366     for (j++; j < size; j++) {
11367         r[j] = 0;
11368     }
11369 }
11370 
11371 /* Convert an mp_int to an array of sp_digit.
11372  *
11373  * r  A single precision integer.
11374  * size  Maximum number of bytes to convert
11375  * a  A multi-precision integer.
11376  */
11377 static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
11378 {
11379 #if DIGIT_BIT == 32
11380     int j;
11381 
11382     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
11383 
11384     for (j = a->used; j < size; j++) {
11385         r[j] = 0;
11386     }
11387 #elif DIGIT_BIT > 32
11388     int i, j = 0;
11389     word32 s = 0;
11390 
11391     r[0] = 0;
11392     for (i = 0; i < a->used && j < size; i++) {
11393         r[j] |= ((sp_digit)a->dp[i] << s);
11394         r[j] &= 0xffffffff;
11395         s = 32U - s;
11396         if (j + 1 >= size) {
11397             break;
11398         }
11399         /* lint allow cast of mismatch word32 and mp_digit */
11400         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
11401         while ((s + 32U) <= (word32)DIGIT_BIT) {
11402             s += 32U;
11403             r[j] &= 0xffffffff;
11404             if (j + 1 >= size) {
11405                 break;
11406             }
11407             if (s < (word32)DIGIT_BIT) {
11408                 /* lint allow cast of mismatch word32 and mp_digit */
11409                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
11410             }
11411             else {
11412                 r[++j] = 0L;
11413             }
11414         }
11415         s = (word32)DIGIT_BIT - s;
11416     }
11417 
11418     for (j++; j < size; j++) {
11419         r[j] = 0;
11420     }
11421 #else
11422     int i, j = 0, s = 0;
11423 
11424     r[0] = 0;
11425     for (i = 0; i < a->used && j < size; i++) {
11426         r[j] |= ((sp_digit)a->dp[i]) << s;
11427         if (s + DIGIT_BIT >= 32) {
11428             r[j] &= 0xffffffff;
11429             if (j + 1 >= size) {
11430                 break;
11431             }
11432             s = 32 - s;
11433             if (s == DIGIT_BIT) {
11434                 r[++j] = 0;
11435                 s = 0;
11436             }
11437             else {
11438                 r[++j] = a->dp[i] >> s;
11439                 s = DIGIT_BIT - s;
11440             }
11441         }
11442         else {
11443             s += DIGIT_BIT;
11444         }
11445     }
11446 
11447     for (j++; j < size; j++) {
11448         r[j] = 0;
11449     }
11450 #endif
11451 }
11452 
11453 /* Write r as big endian to byte array.
11454  * Fixed length number of bytes written: 512
11455  *
11456  * r  A single precision integer.
11457  * a  Byte array.
11458  */
11459 static void sp_4096_to_bin(sp_digit* r, byte* a)
11460 {
11461     int i, j, s = 0, b;
11462 
11463     j = 4096 / 8 - 1;
11464     a[j] = 0;
11465     for (i=0; i<128 && j>=0; i++) {
11466         b = 0;
11467         /* lint allow cast of mismatch sp_digit and int */
11468         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
11469         b += 8 - s;
11470         if (j < 0) {
11471             break;
11472         }
11473         while (b < 32) {
11474             a[j--] = (byte)(r[i] >> b);
11475             b += 8;
11476             if (j < 0) {
11477                 break;
11478             }
11479         }
11480         s = 8 - (b - 32);
11481         if (j >= 0) {
11482             a[j] = 0;
11483         }
11484         if (s != 0) {
11485             j++;
11486         }
11487     }
11488 }
11489 
11490 #ifndef WOLFSSL_SP_SMALL
11491 /* Add b to a into r. (r = a + b)
11492  *
11493  * r  A single precision integer.
11494  * a  A single precision integer.
11495  * b  A single precision integer.
11496  */
11497 SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
11498         const sp_digit* b)
11499 {
11500     sp_digit c = 0;
11501 
11502     __asm__ __volatile__ (
11503         "mov    r7, #0\n\t"
11504         "mvn    r7, r7\n\t"
11505         "ldr    r4, [%[a], #0]\n\t"
11506         "ldr    r5, [%[b], #0]\n\t"
11507         "add    r4, r5\n\t"
11508         "str    r4, [%[r], #0]\n\t"
11509         "ldr    r4, [%[a], #4]\n\t"
11510         "ldr    r5, [%[b], #4]\n\t"
11511         "adc    r4, r5\n\t"
11512         "str    r4, [%[r], #4]\n\t"
11513         "ldr    r4, [%[a], #8]\n\t"
11514         "ldr    r5, [%[b], #8]\n\t"
11515         "adc    r4, r5\n\t"
11516         "str    r4, [%[r], #8]\n\t"
11517         "ldr    r4, [%[a], #12]\n\t"
11518         "ldr    r5, [%[b], #12]\n\t"
11519         "adc    r4, r5\n\t"
11520         "str    r4, [%[r], #12]\n\t"
11521         "ldr    r4, [%[a], #16]\n\t"
11522         "ldr    r5, [%[b], #16]\n\t"
11523         "adc    r4, r5\n\t"
11524         "str    r4, [%[r], #16]\n\t"
11525         "ldr    r4, [%[a], #20]\n\t"
11526         "ldr    r5, [%[b], #20]\n\t"
11527         "adc    r4, r5\n\t"
11528         "str    r4, [%[r], #20]\n\t"
11529         "ldr    r4, [%[a], #24]\n\t"
11530         "ldr    r5, [%[b], #24]\n\t"
11531         "adc    r4, r5\n\t"
11532         "str    r4, [%[r], #24]\n\t"
11533         "ldr    r4, [%[a], #28]\n\t"
11534         "ldr    r5, [%[b], #28]\n\t"
11535         "adc    r4, r5\n\t"
11536         "str    r4, [%[r], #28]\n\t"
11537         "ldr    r4, [%[a], #32]\n\t"
11538         "ldr    r5, [%[b], #32]\n\t"
11539         "adc    r4, r5\n\t"
11540         "str    r4, [%[r], #32]\n\t"
11541         "ldr    r4, [%[a], #36]\n\t"
11542         "ldr    r5, [%[b], #36]\n\t"
11543         "adc    r4, r5\n\t"
11544         "str    r4, [%[r], #36]\n\t"
11545         "ldr    r4, [%[a], #40]\n\t"
11546         "ldr    r5, [%[b], #40]\n\t"
11547         "adc    r4, r5\n\t"
11548         "str    r4, [%[r], #40]\n\t"
11549         "ldr    r4, [%[a], #44]\n\t"
11550         "ldr    r5, [%[b], #44]\n\t"
11551         "adc    r4, r5\n\t"
11552         "str    r4, [%[r], #44]\n\t"
11553         "ldr    r4, [%[a], #48]\n\t"
11554         "ldr    r5, [%[b], #48]\n\t"
11555         "adc    r4, r5\n\t"
11556         "str    r4, [%[r], #48]\n\t"
11557         "ldr    r4, [%[a], #52]\n\t"
11558         "ldr    r5, [%[b], #52]\n\t"
11559         "adc    r4, r5\n\t"
11560         "str    r4, [%[r], #52]\n\t"
11561         "ldr    r4, [%[a], #56]\n\t"
11562         "ldr    r5, [%[b], #56]\n\t"
11563         "adc    r4, r5\n\t"
11564         "str    r4, [%[r], #56]\n\t"
11565         "ldr    r4, [%[a], #60]\n\t"
11566         "ldr    r5, [%[b], #60]\n\t"
11567         "adc    r4, r5\n\t"
11568         "str    r4, [%[r], #60]\n\t"
11569         "ldr    r4, [%[a], #64]\n\t"
11570         "ldr    r5, [%[b], #64]\n\t"
11571         "adc    r4, r5\n\t"
11572         "str    r4, [%[r], #64]\n\t"
11573         "ldr    r4, [%[a], #68]\n\t"
11574         "ldr    r5, [%[b], #68]\n\t"
11575         "adc    r4, r5\n\t"
11576         "str    r4, [%[r], #68]\n\t"
11577         "ldr    r4, [%[a], #72]\n\t"
11578         "ldr    r5, [%[b], #72]\n\t"
11579         "adc    r4, r5\n\t"
11580         "str    r4, [%[r], #72]\n\t"
11581         "ldr    r4, [%[a], #76]\n\t"
11582         "ldr    r5, [%[b], #76]\n\t"
11583         "adc    r4, r5\n\t"
11584         "str    r4, [%[r], #76]\n\t"
11585         "ldr    r4, [%[a], #80]\n\t"
11586         "ldr    r5, [%[b], #80]\n\t"
11587         "adc    r4, r5\n\t"
11588         "str    r4, [%[r], #80]\n\t"
11589         "ldr    r4, [%[a], #84]\n\t"
11590         "ldr    r5, [%[b], #84]\n\t"
11591         "adc    r4, r5\n\t"
11592         "str    r4, [%[r], #84]\n\t"
11593         "ldr    r4, [%[a], #88]\n\t"
11594         "ldr    r5, [%[b], #88]\n\t"
11595         "adc    r4, r5\n\t"
11596         "str    r4, [%[r], #88]\n\t"
11597         "ldr    r4, [%[a], #92]\n\t"
11598         "ldr    r5, [%[b], #92]\n\t"
11599         "adc    r4, r5\n\t"
11600         "str    r4, [%[r], #92]\n\t"
11601         "ldr    r4, [%[a], #96]\n\t"
11602         "ldr    r5, [%[b], #96]\n\t"
11603         "adc    r4, r5\n\t"
11604         "str    r4, [%[r], #96]\n\t"
11605         "ldr    r4, [%[a], #100]\n\t"
11606         "ldr    r5, [%[b], #100]\n\t"
11607         "adc    r4, r5\n\t"
11608         "str    r4, [%[r], #100]\n\t"
11609         "ldr    r4, [%[a], #104]\n\t"
11610         "ldr    r5, [%[b], #104]\n\t"
11611         "adc    r4, r5\n\t"
11612         "str    r4, [%[r], #104]\n\t"
11613         "ldr    r4, [%[a], #108]\n\t"
11614         "ldr    r5, [%[b], #108]\n\t"
11615         "adc    r4, r5\n\t"
11616         "str    r4, [%[r], #108]\n\t"
11617         "ldr    r4, [%[a], #112]\n\t"
11618         "ldr    r5, [%[b], #112]\n\t"
11619         "adc    r4, r5\n\t"
11620         "str    r4, [%[r], #112]\n\t"
11621         "ldr    r4, [%[a], #116]\n\t"
11622         "ldr    r5, [%[b], #116]\n\t"
11623         "adc    r4, r5\n\t"
11624         "str    r4, [%[r], #116]\n\t"
11625         "ldr    r4, [%[a], #120]\n\t"
11626         "ldr    r5, [%[b], #120]\n\t"
11627         "adc    r4, r5\n\t"
11628         "str    r4, [%[r], #120]\n\t"
11629         "ldr    r4, [%[a], #124]\n\t"
11630         "ldr    r5, [%[b], #124]\n\t"
11631         "adc    r4, r5\n\t"
11632         "str    r4, [%[r], #124]\n\t"
11633         "mov    %[c], #0\n\t"
11634         "adc    %[c], %[c]\n\t"
11635         "add    %[a], #0x80\n\t"
11636         "add    %[b], #0x80\n\t"
11637         "add    %[r], #0x80\n\t"
11638         "add    %[c], r7\n\t"
11639         "ldr    r4, [%[a], #0]\n\t"
11640         "ldr    r5, [%[b], #0]\n\t"
11641         "adc    r4, r5\n\t"
11642         "str    r4, [%[r], #0]\n\t"
11643         "ldr    r4, [%[a], #4]\n\t"
11644         "ldr    r5, [%[b], #4]\n\t"
11645         "adc    r4, r5\n\t"
11646         "str    r4, [%[r], #4]\n\t"
11647         "ldr    r4, [%[a], #8]\n\t"
11648         "ldr    r5, [%[b], #8]\n\t"
11649         "adc    r4, r5\n\t"
11650         "str    r4, [%[r], #8]\n\t"
11651         "ldr    r4, [%[a], #12]\n\t"
11652         "ldr    r5, [%[b], #12]\n\t"
11653         "adc    r4, r5\n\t"
11654         "str    r4, [%[r], #12]\n\t"
11655         "ldr    r4, [%[a], #16]\n\t"
11656         "ldr    r5, [%[b], #16]\n\t"
11657         "adc    r4, r5\n\t"
11658         "str    r4, [%[r], #16]\n\t"
11659         "ldr    r4, [%[a], #20]\n\t"
11660         "ldr    r5, [%[b], #20]\n\t"
11661         "adc    r4, r5\n\t"
11662         "str    r4, [%[r], #20]\n\t"
11663         "ldr    r4, [%[a], #24]\n\t"
11664         "ldr    r5, [%[b], #24]\n\t"
11665         "adc    r4, r5\n\t"
11666         "str    r4, [%[r], #24]\n\t"
11667         "ldr    r4, [%[a], #28]\n\t"
11668         "ldr    r5, [%[b], #28]\n\t"
11669         "adc    r4, r5\n\t"
11670         "str    r4, [%[r], #28]\n\t"
11671         "ldr    r4, [%[a], #32]\n\t"
11672         "ldr    r5, [%[b], #32]\n\t"
11673         "adc    r4, r5\n\t"
11674         "str    r4, [%[r], #32]\n\t"
11675         "ldr    r4, [%[a], #36]\n\t"
11676         "ldr    r5, [%[b], #36]\n\t"
11677         "adc    r4, r5\n\t"
11678         "str    r4, [%[r], #36]\n\t"
11679         "ldr    r4, [%[a], #40]\n\t"
11680         "ldr    r5, [%[b], #40]\n\t"
11681         "adc    r4, r5\n\t"
11682         "str    r4, [%[r], #40]\n\t"
11683         "ldr    r4, [%[a], #44]\n\t"
11684         "ldr    r5, [%[b], #44]\n\t"
11685         "adc    r4, r5\n\t"
11686         "str    r4, [%[r], #44]\n\t"
11687         "ldr    r4, [%[a], #48]\n\t"
11688         "ldr    r5, [%[b], #48]\n\t"
11689         "adc    r4, r5\n\t"
11690         "str    r4, [%[r], #48]\n\t"
11691         "ldr    r4, [%[a], #52]\n\t"
11692         "ldr    r5, [%[b], #52]\n\t"
11693         "adc    r4, r5\n\t"
11694         "str    r4, [%[r], #52]\n\t"
11695         "ldr    r4, [%[a], #56]\n\t"
11696         "ldr    r5, [%[b], #56]\n\t"
11697         "adc    r4, r5\n\t"
11698         "str    r4, [%[r], #56]\n\t"
11699         "ldr    r4, [%[a], #60]\n\t"
11700         "ldr    r5, [%[b], #60]\n\t"
11701         "adc    r4, r5\n\t"
11702         "str    r4, [%[r], #60]\n\t"
11703         "ldr    r4, [%[a], #64]\n\t"
11704         "ldr    r5, [%[b], #64]\n\t"
11705         "adc    r4, r5\n\t"
11706         "str    r4, [%[r], #64]\n\t"
11707         "ldr    r4, [%[a], #68]\n\t"
11708         "ldr    r5, [%[b], #68]\n\t"
11709         "adc    r4, r5\n\t"
11710         "str    r4, [%[r], #68]\n\t"
11711         "ldr    r4, [%[a], #72]\n\t"
11712         "ldr    r5, [%[b], #72]\n\t"
11713         "adc    r4, r5\n\t"
11714         "str    r4, [%[r], #72]\n\t"
11715         "ldr    r4, [%[a], #76]\n\t"
11716         "ldr    r5, [%[b], #76]\n\t"
11717         "adc    r4, r5\n\t"
11718         "str    r4, [%[r], #76]\n\t"
11719         "ldr    r4, [%[a], #80]\n\t"
11720         "ldr    r5, [%[b], #80]\n\t"
11721         "adc    r4, r5\n\t"
11722         "str    r4, [%[r], #80]\n\t"
11723         "ldr    r4, [%[a], #84]\n\t"
11724         "ldr    r5, [%[b], #84]\n\t"
11725         "adc    r4, r5\n\t"
11726         "str    r4, [%[r], #84]\n\t"
11727         "ldr    r4, [%[a], #88]\n\t"
11728         "ldr    r5, [%[b], #88]\n\t"
11729         "adc    r4, r5\n\t"
11730         "str    r4, [%[r], #88]\n\t"
11731         "ldr    r4, [%[a], #92]\n\t"
11732         "ldr    r5, [%[b], #92]\n\t"
11733         "adc    r4, r5\n\t"
11734         "str    r4, [%[r], #92]\n\t"
11735         "ldr    r4, [%[a], #96]\n\t"
11736         "ldr    r5, [%[b], #96]\n\t"
11737         "adc    r4, r5\n\t"
11738         "str    r4, [%[r], #96]\n\t"
11739         "ldr    r4, [%[a], #100]\n\t"
11740         "ldr    r5, [%[b], #100]\n\t"
11741         "adc    r4, r5\n\t"
11742         "str    r4, [%[r], #100]\n\t"
11743         "ldr    r4, [%[a], #104]\n\t"
11744         "ldr    r5, [%[b], #104]\n\t"
11745         "adc    r4, r5\n\t"
11746         "str    r4, [%[r], #104]\n\t"
11747         "ldr    r4, [%[a], #108]\n\t"
11748         "ldr    r5, [%[b], #108]\n\t"
11749         "adc    r4, r5\n\t"
11750         "str    r4, [%[r], #108]\n\t"
11751         "ldr    r4, [%[a], #112]\n\t"
11752         "ldr    r5, [%[b], #112]\n\t"
11753         "adc    r4, r5\n\t"
11754         "str    r4, [%[r], #112]\n\t"
11755         "ldr    r4, [%[a], #116]\n\t"
11756         "ldr    r5, [%[b], #116]\n\t"
11757         "adc    r4, r5\n\t"
11758         "str    r4, [%[r], #116]\n\t"
11759         "ldr    r4, [%[a], #120]\n\t"
11760         "ldr    r5, [%[b], #120]\n\t"
11761         "adc    r4, r5\n\t"
11762         "str    r4, [%[r], #120]\n\t"
11763         "ldr    r4, [%[a], #124]\n\t"
11764         "ldr    r5, [%[b], #124]\n\t"
11765         "adc    r4, r5\n\t"
11766         "str    r4, [%[r], #124]\n\t"
11767         "mov    %[c], #0\n\t"
11768         "adc    %[c], %[c]\n\t"
11769         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
11770         :
11771         : "memory", "r4", "r5", "r7"
11772     );
11773 
11774     return c;
11775 }
11776 
11777 /* Sub b from a into r. (r = a - b)
11778  *
11779  * r  A single precision integer.
11780  * a  A single precision integer.
11781  * b  A single precision integer.
11782  */
11783 SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
11784         const sp_digit* b)
11785 {
11786     sp_digit c = 0;
11787 
11788     __asm__ __volatile__ (
11789         "ldr    r3, [%[a], #0]\n\t"
11790         "ldr    r4, [%[a], #4]\n\t"
11791         "ldr    r5, [%[b], #0]\n\t"
11792         "ldr    r6, [%[b], #4]\n\t"
11793         "sub    r3, r5\n\t"
11794         "sbc    r4, r6\n\t"
11795         "str    r3, [%[a], #0]\n\t"
11796         "str    r4, [%[a], #4]\n\t"
11797         "ldr    r3, [%[a], #8]\n\t"
11798         "ldr    r4, [%[a], #12]\n\t"
11799         "ldr    r5, [%[b], #8]\n\t"
11800         "ldr    r6, [%[b], #12]\n\t"
11801         "sbc    r3, r5\n\t"
11802         "sbc    r4, r6\n\t"
11803         "str    r3, [%[a], #8]\n\t"
11804         "str    r4, [%[a], #12]\n\t"
11805         "ldr    r3, [%[a], #16]\n\t"
11806         "ldr    r4, [%[a], #20]\n\t"
11807         "ldr    r5, [%[b], #16]\n\t"
11808         "ldr    r6, [%[b], #20]\n\t"
11809         "sbc    r3, r5\n\t"
11810         "sbc    r4, r6\n\t"
11811         "str    r3, [%[a], #16]\n\t"
11812         "str    r4, [%[a], #20]\n\t"
11813         "ldr    r3, [%[a], #24]\n\t"
11814         "ldr    r4, [%[a], #28]\n\t"
11815         "ldr    r5, [%[b], #24]\n\t"
11816         "ldr    r6, [%[b], #28]\n\t"
11817         "sbc    r3, r5\n\t"
11818         "sbc    r4, r6\n\t"
11819         "str    r3, [%[a], #24]\n\t"
11820         "str    r4, [%[a], #28]\n\t"
11821         "ldr    r3, [%[a], #32]\n\t"
11822         "ldr    r4, [%[a], #36]\n\t"
11823         "ldr    r5, [%[b], #32]\n\t"
11824         "ldr    r6, [%[b], #36]\n\t"
11825         "sbc    r3, r5\n\t"
11826         "sbc    r4, r6\n\t"
11827         "str    r3, [%[a], #32]\n\t"
11828         "str    r4, [%[a], #36]\n\t"
11829         "ldr    r3, [%[a], #40]\n\t"
11830         "ldr    r4, [%[a], #44]\n\t"
11831         "ldr    r5, [%[b], #40]\n\t"
11832         "ldr    r6, [%[b], #44]\n\t"
11833         "sbc    r3, r5\n\t"
11834         "sbc    r4, r6\n\t"
11835         "str    r3, [%[a], #40]\n\t"
11836         "str    r4, [%[a], #44]\n\t"
11837         "ldr    r3, [%[a], #48]\n\t"
11838         "ldr    r4, [%[a], #52]\n\t"
11839         "ldr    r5, [%[b], #48]\n\t"
11840         "ldr    r6, [%[b], #52]\n\t"
11841         "sbc    r3, r5\n\t"
11842         "sbc    r4, r6\n\t"
11843         "str    r3, [%[a], #48]\n\t"
11844         "str    r4, [%[a], #52]\n\t"
11845         "ldr    r3, [%[a], #56]\n\t"
11846         "ldr    r4, [%[a], #60]\n\t"
11847         "ldr    r5, [%[b], #56]\n\t"
11848         "ldr    r6, [%[b], #60]\n\t"
11849         "sbc    r3, r5\n\t"
11850         "sbc    r4, r6\n\t"
11851         "str    r3, [%[a], #56]\n\t"
11852         "str    r4, [%[a], #60]\n\t"
11853         "ldr    r3, [%[a], #64]\n\t"
11854         "ldr    r4, [%[a], #68]\n\t"
11855         "ldr    r5, [%[b], #64]\n\t"
11856         "ldr    r6, [%[b], #68]\n\t"
11857         "sbc    r3, r5\n\t"
11858         "sbc    r4, r6\n\t"
11859         "str    r3, [%[a], #64]\n\t"
11860         "str    r4, [%[a], #68]\n\t"
11861         "ldr    r3, [%[a], #72]\n\t"
11862         "ldr    r4, [%[a], #76]\n\t"
11863         "ldr    r5, [%[b], #72]\n\t"
11864         "ldr    r6, [%[b], #76]\n\t"
11865         "sbc    r3, r5\n\t"
11866         "sbc    r4, r6\n\t"
11867         "str    r3, [%[a], #72]\n\t"
11868         "str    r4, [%[a], #76]\n\t"
11869         "ldr    r3, [%[a], #80]\n\t"
11870         "ldr    r4, [%[a], #84]\n\t"
11871         "ldr    r5, [%[b], #80]\n\t"
11872         "ldr    r6, [%[b], #84]\n\t"
11873         "sbc    r3, r5\n\t"
11874         "sbc    r4, r6\n\t"
11875         "str    r3, [%[a], #80]\n\t"
11876         "str    r4, [%[a], #84]\n\t"
11877         "ldr    r3, [%[a], #88]\n\t"
11878         "ldr    r4, [%[a], #92]\n\t"
11879         "ldr    r5, [%[b], #88]\n\t"
11880         "ldr    r6, [%[b], #92]\n\t"
11881         "sbc    r3, r5\n\t"
11882         "sbc    r4, r6\n\t"
11883         "str    r3, [%[a], #88]\n\t"
11884         "str    r4, [%[a], #92]\n\t"
11885         "ldr    r3, [%[a], #96]\n\t"
11886         "ldr    r4, [%[a], #100]\n\t"
11887         "ldr    r5, [%[b], #96]\n\t"
11888         "ldr    r6, [%[b], #100]\n\t"
11889         "sbc    r3, r5\n\t"
11890         "sbc    r4, r6\n\t"
11891         "str    r3, [%[a], #96]\n\t"
11892         "str    r4, [%[a], #100]\n\t"
11893         "ldr    r3, [%[a], #104]\n\t"
11894         "ldr    r4, [%[a], #108]\n\t"
11895         "ldr    r5, [%[b], #104]\n\t"
11896         "ldr    r6, [%[b], #108]\n\t"
11897         "sbc    r3, r5\n\t"
11898         "sbc    r4, r6\n\t"
11899         "str    r3, [%[a], #104]\n\t"
11900         "str    r4, [%[a], #108]\n\t"
11901         "ldr    r3, [%[a], #112]\n\t"
11902         "ldr    r4, [%[a], #116]\n\t"
11903         "ldr    r5, [%[b], #112]\n\t"
11904         "ldr    r6, [%[b], #116]\n\t"
11905         "sbc    r3, r5\n\t"
11906         "sbc    r4, r6\n\t"
11907         "str    r3, [%[a], #112]\n\t"
11908         "str    r4, [%[a], #116]\n\t"
11909         "ldr    r3, [%[a], #120]\n\t"
11910         "ldr    r4, [%[a], #124]\n\t"
11911         "ldr    r5, [%[b], #120]\n\t"
11912         "ldr    r6, [%[b], #124]\n\t"
11913         "sbc    r3, r5\n\t"
11914         "sbc    r4, r6\n\t"
11915         "str    r3, [%[a], #120]\n\t"
11916         "str    r4, [%[a], #124]\n\t"
11917         "sbc    %[c], %[c]\n\t"
11918         "add    %[a], #0x80\n\t"
11919         "add    %[b], #0x80\n\t"
11920         "mov    r5, #0\n\t"
11921         "sub    r5, %[c]\n\t"
11922         "ldr    r3, [%[a], #0]\n\t"
11923         "ldr    r4, [%[a], #4]\n\t"
11924         "ldr    r5, [%[b], #0]\n\t"
11925         "ldr    r6, [%[b], #4]\n\t"
11926         "sbc    r3, r5\n\t"
11927         "sbc    r4, r6\n\t"
11928         "str    r3, [%[a], #0]\n\t"
11929         "str    r4, [%[a], #4]\n\t"
11930         "ldr    r3, [%[a], #8]\n\t"
11931         "ldr    r4, [%[a], #12]\n\t"
11932         "ldr    r5, [%[b], #8]\n\t"
11933         "ldr    r6, [%[b], #12]\n\t"
11934         "sbc    r3, r5\n\t"
11935         "sbc    r4, r6\n\t"
11936         "str    r3, [%[a], #8]\n\t"
11937         "str    r4, [%[a], #12]\n\t"
11938         "ldr    r3, [%[a], #16]\n\t"
11939         "ldr    r4, [%[a], #20]\n\t"
11940         "ldr    r5, [%[b], #16]\n\t"
11941         "ldr    r6, [%[b], #20]\n\t"
11942         "sbc    r3, r5\n\t"
11943         "sbc    r4, r6\n\t"
11944         "str    r3, [%[a], #16]\n\t"
11945         "str    r4, [%[a], #20]\n\t"
11946         "ldr    r3, [%[a], #24]\n\t"
11947         "ldr    r4, [%[a], #28]\n\t"
11948         "ldr    r5, [%[b], #24]\n\t"
11949         "ldr    r6, [%[b], #28]\n\t"
11950         "sbc    r3, r5\n\t"
11951         "sbc    r4, r6\n\t"
11952         "str    r3, [%[a], #24]\n\t"
11953         "str    r4, [%[a], #28]\n\t"
11954         "ldr    r3, [%[a], #32]\n\t"
11955         "ldr    r4, [%[a], #36]\n\t"
11956         "ldr    r5, [%[b], #32]\n\t"
11957         "ldr    r6, [%[b], #36]\n\t"
11958         "sbc    r3, r5\n\t"
11959         "sbc    r4, r6\n\t"
11960         "str    r3, [%[a], #32]\n\t"
11961         "str    r4, [%[a], #36]\n\t"
11962         "ldr    r3, [%[a], #40]\n\t"
11963         "ldr    r4, [%[a], #44]\n\t"
11964         "ldr    r5, [%[b], #40]\n\t"
11965         "ldr    r6, [%[b], #44]\n\t"
11966         "sbc    r3, r5\n\t"
11967         "sbc    r4, r6\n\t"
11968         "str    r3, [%[a], #40]\n\t"
11969         "str    r4, [%[a], #44]\n\t"
11970         "ldr    r3, [%[a], #48]\n\t"
11971         "ldr    r4, [%[a], #52]\n\t"
11972         "ldr    r5, [%[b], #48]\n\t"
11973         "ldr    r6, [%[b], #52]\n\t"
11974         "sbc    r3, r5\n\t"
11975         "sbc    r4, r6\n\t"
11976         "str    r3, [%[a], #48]\n\t"
11977         "str    r4, [%[a], #52]\n\t"
11978         "ldr    r3, [%[a], #56]\n\t"
11979         "ldr    r4, [%[a], #60]\n\t"
11980         "ldr    r5, [%[b], #56]\n\t"
11981         "ldr    r6, [%[b], #60]\n\t"
11982         "sbc    r3, r5\n\t"
11983         "sbc    r4, r6\n\t"
11984         "str    r3, [%[a], #56]\n\t"
11985         "str    r4, [%[a], #60]\n\t"
11986         "ldr    r3, [%[a], #64]\n\t"
11987         "ldr    r4, [%[a], #68]\n\t"
11988         "ldr    r5, [%[b], #64]\n\t"
11989         "ldr    r6, [%[b], #68]\n\t"
11990         "sbc    r3, r5\n\t"
11991         "sbc    r4, r6\n\t"
11992         "str    r3, [%[a], #64]\n\t"
11993         "str    r4, [%[a], #68]\n\t"
11994         "ldr    r3, [%[a], #72]\n\t"
11995         "ldr    r4, [%[a], #76]\n\t"
11996         "ldr    r5, [%[b], #72]\n\t"
11997         "ldr    r6, [%[b], #76]\n\t"
11998         "sbc    r3, r5\n\t"
11999         "sbc    r4, r6\n\t"
12000         "str    r3, [%[a], #72]\n\t"
12001         "str    r4, [%[a], #76]\n\t"
12002         "ldr    r3, [%[a], #80]\n\t"
12003         "ldr    r4, [%[a], #84]\n\t"
12004         "ldr    r5, [%[b], #80]\n\t"
12005         "ldr    r6, [%[b], #84]\n\t"
12006         "sbc    r3, r5\n\t"
12007         "sbc    r4, r6\n\t"
12008         "str    r3, [%[a], #80]\n\t"
12009         "str    r4, [%[a], #84]\n\t"
12010         "ldr    r3, [%[a], #88]\n\t"
12011         "ldr    r4, [%[a], #92]\n\t"
12012         "ldr    r5, [%[b], #88]\n\t"
12013         "ldr    r6, [%[b], #92]\n\t"
12014         "sbc    r3, r5\n\t"
12015         "sbc    r4, r6\n\t"
12016         "str    r3, [%[a], #88]\n\t"
12017         "str    r4, [%[a], #92]\n\t"
12018         "ldr    r3, [%[a], #96]\n\t"
12019         "ldr    r4, [%[a], #100]\n\t"
12020         "ldr    r5, [%[b], #96]\n\t"
12021         "ldr    r6, [%[b], #100]\n\t"
12022         "sbc    r3, r5\n\t"
12023         "sbc    r4, r6\n\t"
12024         "str    r3, [%[a], #96]\n\t"
12025         "str    r4, [%[a], #100]\n\t"
12026         "ldr    r3, [%[a], #104]\n\t"
12027         "ldr    r4, [%[a], #108]\n\t"
12028         "ldr    r5, [%[b], #104]\n\t"
12029         "ldr    r6, [%[b], #108]\n\t"
12030         "sbc    r3, r5\n\t"
12031         "sbc    r4, r6\n\t"
12032         "str    r3, [%[a], #104]\n\t"
12033         "str    r4, [%[a], #108]\n\t"
12034         "ldr    r3, [%[a], #112]\n\t"
12035         "ldr    r4, [%[a], #116]\n\t"
12036         "ldr    r5, [%[b], #112]\n\t"
12037         "ldr    r6, [%[b], #116]\n\t"
12038         "sbc    r3, r5\n\t"
12039         "sbc    r4, r6\n\t"
12040         "str    r3, [%[a], #112]\n\t"
12041         "str    r4, [%[a], #116]\n\t"
12042         "ldr    r3, [%[a], #120]\n\t"
12043         "ldr    r4, [%[a], #124]\n\t"
12044         "ldr    r5, [%[b], #120]\n\t"
12045         "ldr    r6, [%[b], #124]\n\t"
12046         "sbc    r3, r5\n\t"
12047         "sbc    r4, r6\n\t"
12048         "str    r3, [%[a], #120]\n\t"
12049         "str    r4, [%[a], #124]\n\t"
12050         "sbc    %[c], %[c]\n\t"
12051         "add    %[a], #0x80\n\t"
12052         "add    %[b], #0x80\n\t"
12053         "mov    r5, #0\n\t"
12054         "sub    r5, %[c]\n\t"
12055         "ldr    r3, [%[a], #0]\n\t"
12056         "ldr    r4, [%[a], #4]\n\t"
12057         "ldr    r5, [%[b], #0]\n\t"
12058         "ldr    r6, [%[b], #4]\n\t"
12059         "sbc    r3, r5\n\t"
12060         "sbc    r4, r6\n\t"
12061         "str    r3, [%[a], #0]\n\t"
12062         "str    r4, [%[a], #4]\n\t"
12063         "ldr    r3, [%[a], #8]\n\t"
12064         "ldr    r4, [%[a], #12]\n\t"
12065         "ldr    r5, [%[b], #8]\n\t"
12066         "ldr    r6, [%[b], #12]\n\t"
12067         "sbc    r3, r5\n\t"
12068         "sbc    r4, r6\n\t"
12069         "str    r3, [%[a], #8]\n\t"
12070         "str    r4, [%[a], #12]\n\t"
12071         "ldr    r3, [%[a], #16]\n\t"
12072         "ldr    r4, [%[a], #20]\n\t"
12073         "ldr    r5, [%[b], #16]\n\t"
12074         "ldr    r6, [%[b], #20]\n\t"
12075         "sbc    r3, r5\n\t"
12076         "sbc    r4, r6\n\t"
12077         "str    r3, [%[a], #16]\n\t"
12078         "str    r4, [%[a], #20]\n\t"
12079         "ldr    r3, [%[a], #24]\n\t"
12080         "ldr    r4, [%[a], #28]\n\t"
12081         "ldr    r5, [%[b], #24]\n\t"
12082         "ldr    r6, [%[b], #28]\n\t"
12083         "sbc    r3, r5\n\t"
12084         "sbc    r4, r6\n\t"
12085         "str    r3, [%[a], #24]\n\t"
12086         "str    r4, [%[a], #28]\n\t"
12087         "ldr    r3, [%[a], #32]\n\t"
12088         "ldr    r4, [%[a], #36]\n\t"
12089         "ldr    r5, [%[b], #32]\n\t"
12090         "ldr    r6, [%[b], #36]\n\t"
12091         "sbc    r3, r5\n\t"
12092         "sbc    r4, r6\n\t"
12093         "str    r3, [%[a], #32]\n\t"
12094         "str    r4, [%[a], #36]\n\t"
12095         "ldr    r3, [%[a], #40]\n\t"
12096         "ldr    r4, [%[a], #44]\n\t"
12097         "ldr    r5, [%[b], #40]\n\t"
12098         "ldr    r6, [%[b], #44]\n\t"
12099         "sbc    r3, r5\n\t"
12100         "sbc    r4, r6\n\t"
12101         "str    r3, [%[a], #40]\n\t"
12102         "str    r4, [%[a], #44]\n\t"
12103         "ldr    r3, [%[a], #48]\n\t"
12104         "ldr    r4, [%[a], #52]\n\t"
12105         "ldr    r5, [%[b], #48]\n\t"
12106         "ldr    r6, [%[b], #52]\n\t"
12107         "sbc    r3, r5\n\t"
12108         "sbc    r4, r6\n\t"
12109         "str    r3, [%[a], #48]\n\t"
12110         "str    r4, [%[a], #52]\n\t"
12111         "ldr    r3, [%[a], #56]\n\t"
12112         "ldr    r4, [%[a], #60]\n\t"
12113         "ldr    r5, [%[b], #56]\n\t"
12114         "ldr    r6, [%[b], #60]\n\t"
12115         "sbc    r3, r5\n\t"
12116         "sbc    r4, r6\n\t"
12117         "str    r3, [%[a], #56]\n\t"
12118         "str    r4, [%[a], #60]\n\t"
12119         "ldr    r3, [%[a], #64]\n\t"
12120         "ldr    r4, [%[a], #68]\n\t"
12121         "ldr    r5, [%[b], #64]\n\t"
12122         "ldr    r6, [%[b], #68]\n\t"
12123         "sbc    r3, r5\n\t"
12124         "sbc    r4, r6\n\t"
12125         "str    r3, [%[a], #64]\n\t"
12126         "str    r4, [%[a], #68]\n\t"
12127         "ldr    r3, [%[a], #72]\n\t"
12128         "ldr    r4, [%[a], #76]\n\t"
12129         "ldr    r5, [%[b], #72]\n\t"
12130         "ldr    r6, [%[b], #76]\n\t"
12131         "sbc    r3, r5\n\t"
12132         "sbc    r4, r6\n\t"
12133         "str    r3, [%[a], #72]\n\t"
12134         "str    r4, [%[a], #76]\n\t"
12135         "ldr    r3, [%[a], #80]\n\t"
12136         "ldr    r4, [%[a], #84]\n\t"
12137         "ldr    r5, [%[b], #80]\n\t"
12138         "ldr    r6, [%[b], #84]\n\t"
12139         "sbc    r3, r5\n\t"
12140         "sbc    r4, r6\n\t"
12141         "str    r3, [%[a], #80]\n\t"
12142         "str    r4, [%[a], #84]\n\t"
12143         "ldr    r3, [%[a], #88]\n\t"
12144         "ldr    r4, [%[a], #92]\n\t"
12145         "ldr    r5, [%[b], #88]\n\t"
12146         "ldr    r6, [%[b], #92]\n\t"
12147         "sbc    r3, r5\n\t"
12148         "sbc    r4, r6\n\t"
12149         "str    r3, [%[a], #88]\n\t"
12150         "str    r4, [%[a], #92]\n\t"
12151         "ldr    r3, [%[a], #96]\n\t"
12152         "ldr    r4, [%[a], #100]\n\t"
12153         "ldr    r5, [%[b], #96]\n\t"
12154         "ldr    r6, [%[b], #100]\n\t"
12155         "sbc    r3, r5\n\t"
12156         "sbc    r4, r6\n\t"
12157         "str    r3, [%[a], #96]\n\t"
12158         "str    r4, [%[a], #100]\n\t"
12159         "ldr    r3, [%[a], #104]\n\t"
12160         "ldr    r4, [%[a], #108]\n\t"
12161         "ldr    r5, [%[b], #104]\n\t"
12162         "ldr    r6, [%[b], #108]\n\t"
12163         "sbc    r3, r5\n\t"
12164         "sbc    r4, r6\n\t"
12165         "str    r3, [%[a], #104]\n\t"
12166         "str    r4, [%[a], #108]\n\t"
12167         "ldr    r3, [%[a], #112]\n\t"
12168         "ldr    r4, [%[a], #116]\n\t"
12169         "ldr    r5, [%[b], #112]\n\t"
12170         "ldr    r6, [%[b], #116]\n\t"
12171         "sbc    r3, r5\n\t"
12172         "sbc    r4, r6\n\t"
12173         "str    r3, [%[a], #112]\n\t"
12174         "str    r4, [%[a], #116]\n\t"
12175         "ldr    r3, [%[a], #120]\n\t"
12176         "ldr    r4, [%[a], #124]\n\t"
12177         "ldr    r5, [%[b], #120]\n\t"
12178         "ldr    r6, [%[b], #124]\n\t"
12179         "sbc    r3, r5\n\t"
12180         "sbc    r4, r6\n\t"
12181         "str    r3, [%[a], #120]\n\t"
12182         "str    r4, [%[a], #124]\n\t"
12183         "sbc    %[c], %[c]\n\t"
12184         "add    %[a], #0x80\n\t"
12185         "add    %[b], #0x80\n\t"
12186         "mov    r5, #0\n\t"
12187         "sub    r5, %[c]\n\t"
12188         "ldr    r3, [%[a], #0]\n\t"
12189         "ldr    r4, [%[a], #4]\n\t"
12190         "ldr    r5, [%[b], #0]\n\t"
12191         "ldr    r6, [%[b], #4]\n\t"
12192         "sbc    r3, r5\n\t"
12193         "sbc    r4, r6\n\t"
12194         "str    r3, [%[a], #0]\n\t"
12195         "str    r4, [%[a], #4]\n\t"
12196         "ldr    r3, [%[a], #8]\n\t"
12197         "ldr    r4, [%[a], #12]\n\t"
12198         "ldr    r5, [%[b], #8]\n\t"
12199         "ldr    r6, [%[b], #12]\n\t"
12200         "sbc    r3, r5\n\t"
12201         "sbc    r4, r6\n\t"
12202         "str    r3, [%[a], #8]\n\t"
12203         "str    r4, [%[a], #12]\n\t"
12204         "ldr    r3, [%[a], #16]\n\t"
12205         "ldr    r4, [%[a], #20]\n\t"
12206         "ldr    r5, [%[b], #16]\n\t"
12207         "ldr    r6, [%[b], #20]\n\t"
12208         "sbc    r3, r5\n\t"
12209         "sbc    r4, r6\n\t"
12210         "str    r3, [%[a], #16]\n\t"
12211         "str    r4, [%[a], #20]\n\t"
12212         "ldr    r3, [%[a], #24]\n\t"
12213         "ldr    r4, [%[a], #28]\n\t"
12214         "ldr    r5, [%[b], #24]\n\t"
12215         "ldr    r6, [%[b], #28]\n\t"
12216         "sbc    r3, r5\n\t"
12217         "sbc    r4, r6\n\t"
12218         "str    r3, [%[a], #24]\n\t"
12219         "str    r4, [%[a], #28]\n\t"
12220         "ldr    r3, [%[a], #32]\n\t"
12221         "ldr    r4, [%[a], #36]\n\t"
12222         "ldr    r5, [%[b], #32]\n\t"
12223         "ldr    r6, [%[b], #36]\n\t"
12224         "sbc    r3, r5\n\t"
12225         "sbc    r4, r6\n\t"
12226         "str    r3, [%[a], #32]\n\t"
12227         "str    r4, [%[a], #36]\n\t"
12228         "ldr    r3, [%[a], #40]\n\t"
12229         "ldr    r4, [%[a], #44]\n\t"
12230         "ldr    r5, [%[b], #40]\n\t"
12231         "ldr    r6, [%[b], #44]\n\t"
12232         "sbc    r3, r5\n\t"
12233         "sbc    r4, r6\n\t"
12234         "str    r3, [%[a], #40]\n\t"
12235         "str    r4, [%[a], #44]\n\t"
12236         "ldr    r3, [%[a], #48]\n\t"
12237         "ldr    r4, [%[a], #52]\n\t"
12238         "ldr    r5, [%[b], #48]\n\t"
12239         "ldr    r6, [%[b], #52]\n\t"
12240         "sbc    r3, r5\n\t"
12241         "sbc    r4, r6\n\t"
12242         "str    r3, [%[a], #48]\n\t"
12243         "str    r4, [%[a], #52]\n\t"
12244         "ldr    r3, [%[a], #56]\n\t"
12245         "ldr    r4, [%[a], #60]\n\t"
12246         "ldr    r5, [%[b], #56]\n\t"
12247         "ldr    r6, [%[b], #60]\n\t"
12248         "sbc    r3, r5\n\t"
12249         "sbc    r4, r6\n\t"
12250         "str    r3, [%[a], #56]\n\t"
12251         "str    r4, [%[a], #60]\n\t"
12252         "ldr    r3, [%[a], #64]\n\t"
12253         "ldr    r4, [%[a], #68]\n\t"
12254         "ldr    r5, [%[b], #64]\n\t"
12255         "ldr    r6, [%[b], #68]\n\t"
12256         "sbc    r3, r5\n\t"
12257         "sbc    r4, r6\n\t"
12258         "str    r3, [%[a], #64]\n\t"
12259         "str    r4, [%[a], #68]\n\t"
12260         "ldr    r3, [%[a], #72]\n\t"
12261         "ldr    r4, [%[a], #76]\n\t"
12262         "ldr    r5, [%[b], #72]\n\t"
12263         "ldr    r6, [%[b], #76]\n\t"
12264         "sbc    r3, r5\n\t"
12265         "sbc    r4, r6\n\t"
12266         "str    r3, [%[a], #72]\n\t"
12267         "str    r4, [%[a], #76]\n\t"
12268         "ldr    r3, [%[a], #80]\n\t"
12269         "ldr    r4, [%[a], #84]\n\t"
12270         "ldr    r5, [%[b], #80]\n\t"
12271         "ldr    r6, [%[b], #84]\n\t"
12272         "sbc    r3, r5\n\t"
12273         "sbc    r4, r6\n\t"
12274         "str    r3, [%[a], #80]\n\t"
12275         "str    r4, [%[a], #84]\n\t"
12276         "ldr    r3, [%[a], #88]\n\t"
12277         "ldr    r4, [%[a], #92]\n\t"
12278         "ldr    r5, [%[b], #88]\n\t"
12279         "ldr    r6, [%[b], #92]\n\t"
12280         "sbc    r3, r5\n\t"
12281         "sbc    r4, r6\n\t"
12282         "str    r3, [%[a], #88]\n\t"
12283         "str    r4, [%[a], #92]\n\t"
12284         "ldr    r3, [%[a], #96]\n\t"
12285         "ldr    r4, [%[a], #100]\n\t"
12286         "ldr    r5, [%[b], #96]\n\t"
12287         "ldr    r6, [%[b], #100]\n\t"
12288         "sbc    r3, r5\n\t"
12289         "sbc    r4, r6\n\t"
12290         "str    r3, [%[a], #96]\n\t"
12291         "str    r4, [%[a], #100]\n\t"
12292         "ldr    r3, [%[a], #104]\n\t"
12293         "ldr    r4, [%[a], #108]\n\t"
12294         "ldr    r5, [%[b], #104]\n\t"
12295         "ldr    r6, [%[b], #108]\n\t"
12296         "sbc    r3, r5\n\t"
12297         "sbc    r4, r6\n\t"
12298         "str    r3, [%[a], #104]\n\t"
12299         "str    r4, [%[a], #108]\n\t"
12300         "ldr    r3, [%[a], #112]\n\t"
12301         "ldr    r4, [%[a], #116]\n\t"
12302         "ldr    r5, [%[b], #112]\n\t"
12303         "ldr    r6, [%[b], #116]\n\t"
12304         "sbc    r3, r5\n\t"
12305         "sbc    r4, r6\n\t"
12306         "str    r3, [%[a], #112]\n\t"
12307         "str    r4, [%[a], #116]\n\t"
12308         "ldr    r3, [%[a], #120]\n\t"
12309         "ldr    r4, [%[a], #124]\n\t"
12310         "ldr    r5, [%[b], #120]\n\t"
12311         "ldr    r6, [%[b], #124]\n\t"
12312         "sbc    r3, r5\n\t"
12313         "sbc    r4, r6\n\t"
12314         "str    r3, [%[a], #120]\n\t"
12315         "str    r4, [%[a], #124]\n\t"
12316         "sbc    %[c], %[c]\n\t"
12317         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
12318         :
12319         : "memory", "r3", "r4", "r5", "r6"
12320     );
12321 
12322     return c;
12323 }
12324 
12325 /* Add b to a into r. (r = a + b)
12326  *
12327  * r  A single precision integer.
12328  * a  A single precision integer.
12329  * b  A single precision integer.
12330  */
12331 SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
12332         const sp_digit* b)
12333 {
12334     sp_digit c = 0;
12335 
12336     __asm__ __volatile__ (
12337         "mov    r7, #0\n\t"
12338         "mvn    r7, r7\n\t"
12339         "ldr    r4, [%[a], #0]\n\t"
12340         "ldr    r5, [%[b], #0]\n\t"
12341         "add    r4, r5\n\t"
12342         "str    r4, [%[r], #0]\n\t"
12343         "ldr    r4, [%[a], #4]\n\t"
12344         "ldr    r5, [%[b], #4]\n\t"
12345         "adc    r4, r5\n\t"
12346         "str    r4, [%[r], #4]\n\t"
12347         "ldr    r4, [%[a], #8]\n\t"
12348         "ldr    r5, [%[b], #8]\n\t"
12349         "adc    r4, r5\n\t"
12350         "str    r4, [%[r], #8]\n\t"
12351         "ldr    r4, [%[a], #12]\n\t"
12352         "ldr    r5, [%[b], #12]\n\t"
12353         "adc    r4, r5\n\t"
12354         "str    r4, [%[r], #12]\n\t"
12355         "ldr    r4, [%[a], #16]\n\t"
12356         "ldr    r5, [%[b], #16]\n\t"
12357         "adc    r4, r5\n\t"
12358         "str    r4, [%[r], #16]\n\t"
12359         "ldr    r4, [%[a], #20]\n\t"
12360         "ldr    r5, [%[b], #20]\n\t"
12361         "adc    r4, r5\n\t"
12362         "str    r4, [%[r], #20]\n\t"
12363         "ldr    r4, [%[a], #24]\n\t"
12364         "ldr    r5, [%[b], #24]\n\t"
12365         "adc    r4, r5\n\t"
12366         "str    r4, [%[r], #24]\n\t"
12367         "ldr    r4, [%[a], #28]\n\t"
12368         "ldr    r5, [%[b], #28]\n\t"
12369         "adc    r4, r5\n\t"
12370         "str    r4, [%[r], #28]\n\t"
12371         "ldr    r4, [%[a], #32]\n\t"
12372         "ldr    r5, [%[b], #32]\n\t"
12373         "adc    r4, r5\n\t"
12374         "str    r4, [%[r], #32]\n\t"
12375         "ldr    r4, [%[a], #36]\n\t"
12376         "ldr    r5, [%[b], #36]\n\t"
12377         "adc    r4, r5\n\t"
12378         "str    r4, [%[r], #36]\n\t"
12379         "ldr    r4, [%[a], #40]\n\t"
12380         "ldr    r5, [%[b], #40]\n\t"
12381         "adc    r4, r5\n\t"
12382         "str    r4, [%[r], #40]\n\t"
12383         "ldr    r4, [%[a], #44]\n\t"
12384         "ldr    r5, [%[b], #44]\n\t"
12385         "adc    r4, r5\n\t"
12386         "str    r4, [%[r], #44]\n\t"
12387         "ldr    r4, [%[a], #48]\n\t"
12388         "ldr    r5, [%[b], #48]\n\t"
12389         "adc    r4, r5\n\t"
12390         "str    r4, [%[r], #48]\n\t"
12391         "ldr    r4, [%[a], #52]\n\t"
12392         "ldr    r5, [%[b], #52]\n\t"
12393         "adc    r4, r5\n\t"
12394         "str    r4, [%[r], #52]\n\t"
12395         "ldr    r4, [%[a], #56]\n\t"
12396         "ldr    r5, [%[b], #56]\n\t"
12397         "adc    r4, r5\n\t"
12398         "str    r4, [%[r], #56]\n\t"
12399         "ldr    r4, [%[a], #60]\n\t"
12400         "ldr    r5, [%[b], #60]\n\t"
12401         "adc    r4, r5\n\t"
12402         "str    r4, [%[r], #60]\n\t"
12403         "ldr    r4, [%[a], #64]\n\t"
12404         "ldr    r5, [%[b], #64]\n\t"
12405         "adc    r4, r5\n\t"
12406         "str    r4, [%[r], #64]\n\t"
12407         "ldr    r4, [%[a], #68]\n\t"
12408         "ldr    r5, [%[b], #68]\n\t"
12409         "adc    r4, r5\n\t"
12410         "str    r4, [%[r], #68]\n\t"
12411         "ldr    r4, [%[a], #72]\n\t"
12412         "ldr    r5, [%[b], #72]\n\t"
12413         "adc    r4, r5\n\t"
12414         "str    r4, [%[r], #72]\n\t"
12415         "ldr    r4, [%[a], #76]\n\t"
12416         "ldr    r5, [%[b], #76]\n\t"
12417         "adc    r4, r5\n\t"
12418         "str    r4, [%[r], #76]\n\t"
12419         "ldr    r4, [%[a], #80]\n\t"
12420         "ldr    r5, [%[b], #80]\n\t"
12421         "adc    r4, r5\n\t"
12422         "str    r4, [%[r], #80]\n\t"
12423         "ldr    r4, [%[a], #84]\n\t"
12424         "ldr    r5, [%[b], #84]\n\t"
12425         "adc    r4, r5\n\t"
12426         "str    r4, [%[r], #84]\n\t"
12427         "ldr    r4, [%[a], #88]\n\t"
12428         "ldr    r5, [%[b], #88]\n\t"
12429         "adc    r4, r5\n\t"
12430         "str    r4, [%[r], #88]\n\t"
12431         "ldr    r4, [%[a], #92]\n\t"
12432         "ldr    r5, [%[b], #92]\n\t"
12433         "adc    r4, r5\n\t"
12434         "str    r4, [%[r], #92]\n\t"
12435         "ldr    r4, [%[a], #96]\n\t"
12436         "ldr    r5, [%[b], #96]\n\t"
12437         "adc    r4, r5\n\t"
12438         "str    r4, [%[r], #96]\n\t"
12439         "ldr    r4, [%[a], #100]\n\t"
12440         "ldr    r5, [%[b], #100]\n\t"
12441         "adc    r4, r5\n\t"
12442         "str    r4, [%[r], #100]\n\t"
12443         "ldr    r4, [%[a], #104]\n\t"
12444         "ldr    r5, [%[b], #104]\n\t"
12445         "adc    r4, r5\n\t"
12446         "str    r4, [%[r], #104]\n\t"
12447         "ldr    r4, [%[a], #108]\n\t"
12448         "ldr    r5, [%[b], #108]\n\t"
12449         "adc    r4, r5\n\t"
12450         "str    r4, [%[r], #108]\n\t"
12451         "ldr    r4, [%[a], #112]\n\t"
12452         "ldr    r5, [%[b], #112]\n\t"
12453         "adc    r4, r5\n\t"
12454         "str    r4, [%[r], #112]\n\t"
12455         "ldr    r4, [%[a], #116]\n\t"
12456         "ldr    r5, [%[b], #116]\n\t"
12457         "adc    r4, r5\n\t"
12458         "str    r4, [%[r], #116]\n\t"
12459         "ldr    r4, [%[a], #120]\n\t"
12460         "ldr    r5, [%[b], #120]\n\t"
12461         "adc    r4, r5\n\t"
12462         "str    r4, [%[r], #120]\n\t"
12463         "ldr    r4, [%[a], #124]\n\t"
12464         "ldr    r5, [%[b], #124]\n\t"
12465         "adc    r4, r5\n\t"
12466         "str    r4, [%[r], #124]\n\t"
12467         "mov    %[c], #0\n\t"
12468         "adc    %[c], %[c]\n\t"
12469         "add    %[a], #0x80\n\t"
12470         "add    %[b], #0x80\n\t"
12471         "add    %[r], #0x80\n\t"
12472         "add    %[c], r7\n\t"
12473         "ldr    r4, [%[a], #0]\n\t"
12474         "ldr    r5, [%[b], #0]\n\t"
12475         "adc    r4, r5\n\t"
12476         "str    r4, [%[r], #0]\n\t"
12477         "ldr    r4, [%[a], #4]\n\t"
12478         "ldr    r5, [%[b], #4]\n\t"
12479         "adc    r4, r5\n\t"
12480         "str    r4, [%[r], #4]\n\t"
12481         "ldr    r4, [%[a], #8]\n\t"
12482         "ldr    r5, [%[b], #8]\n\t"
12483         "adc    r4, r5\n\t"
12484         "str    r4, [%[r], #8]\n\t"
12485         "ldr    r4, [%[a], #12]\n\t"
12486         "ldr    r5, [%[b], #12]\n\t"
12487         "adc    r4, r5\n\t"
12488         "str    r4, [%[r], #12]\n\t"
12489         "ldr    r4, [%[a], #16]\n\t"
12490         "ldr    r5, [%[b], #16]\n\t"
12491         "adc    r4, r5\n\t"
12492         "str    r4, [%[r], #16]\n\t"
12493         "ldr    r4, [%[a], #20]\n\t"
12494         "ldr    r5, [%[b], #20]\n\t"
12495         "adc    r4, r5\n\t"
12496         "str    r4, [%[r], #20]\n\t"
12497         "ldr    r4, [%[a], #24]\n\t"
12498         "ldr    r5, [%[b], #24]\n\t"
12499         "adc    r4, r5\n\t"
12500         "str    r4, [%[r], #24]\n\t"
12501         "ldr    r4, [%[a], #28]\n\t"
12502         "ldr    r5, [%[b], #28]\n\t"
12503         "adc    r4, r5\n\t"
12504         "str    r4, [%[r], #28]\n\t"
12505         "ldr    r4, [%[a], #32]\n\t"
12506         "ldr    r5, [%[b], #32]\n\t"
12507         "adc    r4, r5\n\t"
12508         "str    r4, [%[r], #32]\n\t"
12509         "ldr    r4, [%[a], #36]\n\t"
12510         "ldr    r5, [%[b], #36]\n\t"
12511         "adc    r4, r5\n\t"
12512         "str    r4, [%[r], #36]\n\t"
12513         "ldr    r4, [%[a], #40]\n\t"
12514         "ldr    r5, [%[b], #40]\n\t"
12515         "adc    r4, r5\n\t"
12516         "str    r4, [%[r], #40]\n\t"
12517         "ldr    r4, [%[a], #44]\n\t"
12518         "ldr    r5, [%[b], #44]\n\t"
12519         "adc    r4, r5\n\t"
12520         "str    r4, [%[r], #44]\n\t"
12521         "ldr    r4, [%[a], #48]\n\t"
12522         "ldr    r5, [%[b], #48]\n\t"
12523         "adc    r4, r5\n\t"
12524         "str    r4, [%[r], #48]\n\t"
12525         "ldr    r4, [%[a], #52]\n\t"
12526         "ldr    r5, [%[b], #52]\n\t"
12527         "adc    r4, r5\n\t"
12528         "str    r4, [%[r], #52]\n\t"
12529         "ldr    r4, [%[a], #56]\n\t"
12530         "ldr    r5, [%[b], #56]\n\t"
12531         "adc    r4, r5\n\t"
12532         "str    r4, [%[r], #56]\n\t"
12533         "ldr    r4, [%[a], #60]\n\t"
12534         "ldr    r5, [%[b], #60]\n\t"
12535         "adc    r4, r5\n\t"
12536         "str    r4, [%[r], #60]\n\t"
12537         "ldr    r4, [%[a], #64]\n\t"
12538         "ldr    r5, [%[b], #64]\n\t"
12539         "adc    r4, r5\n\t"
12540         "str    r4, [%[r], #64]\n\t"
12541         "ldr    r4, [%[a], #68]\n\t"
12542         "ldr    r5, [%[b], #68]\n\t"
12543         "adc    r4, r5\n\t"
12544         "str    r4, [%[r], #68]\n\t"
12545         "ldr    r4, [%[a], #72]\n\t"
12546         "ldr    r5, [%[b], #72]\n\t"
12547         "adc    r4, r5\n\t"
12548         "str    r4, [%[r], #72]\n\t"
12549         "ldr    r4, [%[a], #76]\n\t"
12550         "ldr    r5, [%[b], #76]\n\t"
12551         "adc    r4, r5\n\t"
12552         "str    r4, [%[r], #76]\n\t"
12553         "ldr    r4, [%[a], #80]\n\t"
12554         "ldr    r5, [%[b], #80]\n\t"
12555         "adc    r4, r5\n\t"
12556         "str    r4, [%[r], #80]\n\t"
12557         "ldr    r4, [%[a], #84]\n\t"
12558         "ldr    r5, [%[b], #84]\n\t"
12559         "adc    r4, r5\n\t"
12560         "str    r4, [%[r], #84]\n\t"
12561         "ldr    r4, [%[a], #88]\n\t"
12562         "ldr    r5, [%[b], #88]\n\t"
12563         "adc    r4, r5\n\t"
12564         "str    r4, [%[r], #88]\n\t"
12565         "ldr    r4, [%[a], #92]\n\t"
12566         "ldr    r5, [%[b], #92]\n\t"
12567         "adc    r4, r5\n\t"
12568         "str    r4, [%[r], #92]\n\t"
12569         "ldr    r4, [%[a], #96]\n\t"
12570         "ldr    r5, [%[b], #96]\n\t"
12571         "adc    r4, r5\n\t"
12572         "str    r4, [%[r], #96]\n\t"
12573         "ldr    r4, [%[a], #100]\n\t"
12574         "ldr    r5, [%[b], #100]\n\t"
12575         "adc    r4, r5\n\t"
12576         "str    r4, [%[r], #100]\n\t"
12577         "ldr    r4, [%[a], #104]\n\t"
12578         "ldr    r5, [%[b], #104]\n\t"
12579         "adc    r4, r5\n\t"
12580         "str    r4, [%[r], #104]\n\t"
12581         "ldr    r4, [%[a], #108]\n\t"
12582         "ldr    r5, [%[b], #108]\n\t"
12583         "adc    r4, r5\n\t"
12584         "str    r4, [%[r], #108]\n\t"
12585         "ldr    r4, [%[a], #112]\n\t"
12586         "ldr    r5, [%[b], #112]\n\t"
12587         "adc    r4, r5\n\t"
12588         "str    r4, [%[r], #112]\n\t"
12589         "ldr    r4, [%[a], #116]\n\t"
12590         "ldr    r5, [%[b], #116]\n\t"
12591         "adc    r4, r5\n\t"
12592         "str    r4, [%[r], #116]\n\t"
12593         "ldr    r4, [%[a], #120]\n\t"
12594         "ldr    r5, [%[b], #120]\n\t"
12595         "adc    r4, r5\n\t"
12596         "str    r4, [%[r], #120]\n\t"
12597         "ldr    r4, [%[a], #124]\n\t"
12598         "ldr    r5, [%[b], #124]\n\t"
12599         "adc    r4, r5\n\t"
12600         "str    r4, [%[r], #124]\n\t"
12601         "mov    %[c], #0\n\t"
12602         "adc    %[c], %[c]\n\t"
12603         "add    %[a], #0x80\n\t"
12604         "add    %[b], #0x80\n\t"
12605         "add    %[r], #0x80\n\t"
12606         "add    %[c], r7\n\t"
12607         "ldr    r4, [%[a], #0]\n\t"
12608         "ldr    r5, [%[b], #0]\n\t"
12609         "adc    r4, r5\n\t"
12610         "str    r4, [%[r], #0]\n\t"
12611         "ldr    r4, [%[a], #4]\n\t"
12612         "ldr    r5, [%[b], #4]\n\t"
12613         "adc    r4, r5\n\t"
12614         "str    r4, [%[r], #4]\n\t"
12615         "ldr    r4, [%[a], #8]\n\t"
12616         "ldr    r5, [%[b], #8]\n\t"
12617         "adc    r4, r5\n\t"
12618         "str    r4, [%[r], #8]\n\t"
12619         "ldr    r4, [%[a], #12]\n\t"
12620         "ldr    r5, [%[b], #12]\n\t"
12621         "adc    r4, r5\n\t"
12622         "str    r4, [%[r], #12]\n\t"
12623         "ldr    r4, [%[a], #16]\n\t"
12624         "ldr    r5, [%[b], #16]\n\t"
12625         "adc    r4, r5\n\t"
12626         "str    r4, [%[r], #16]\n\t"
12627         "ldr    r4, [%[a], #20]\n\t"
12628         "ldr    r5, [%[b], #20]\n\t"
12629         "adc    r4, r5\n\t"
12630         "str    r4, [%[r], #20]\n\t"
12631         "ldr    r4, [%[a], #24]\n\t"
12632         "ldr    r5, [%[b], #24]\n\t"
12633         "adc    r4, r5\n\t"
12634         "str    r4, [%[r], #24]\n\t"
12635         "ldr    r4, [%[a], #28]\n\t"
12636         "ldr    r5, [%[b], #28]\n\t"
12637         "adc    r4, r5\n\t"
12638         "str    r4, [%[r], #28]\n\t"
12639         "ldr    r4, [%[a], #32]\n\t"
12640         "ldr    r5, [%[b], #32]\n\t"
12641         "adc    r4, r5\n\t"
12642         "str    r4, [%[r], #32]\n\t"
12643         "ldr    r4, [%[a], #36]\n\t"
12644         "ldr    r5, [%[b], #36]\n\t"
12645         "adc    r4, r5\n\t"
12646         "str    r4, [%[r], #36]\n\t"
12647         "ldr    r4, [%[a], #40]\n\t"
12648         "ldr    r5, [%[b], #40]\n\t"
12649         "adc    r4, r5\n\t"
12650         "str    r4, [%[r], #40]\n\t"
12651         "ldr    r4, [%[a], #44]\n\t"
12652         "ldr    r5, [%[b], #44]\n\t"
12653         "adc    r4, r5\n\t"
12654         "str    r4, [%[r], #44]\n\t"
12655         "ldr    r4, [%[a], #48]\n\t"
12656         "ldr    r5, [%[b], #48]\n\t"
12657         "adc    r4, r5\n\t"
12658         "str    r4, [%[r], #48]\n\t"
12659         "ldr    r4, [%[a], #52]\n\t"
12660         "ldr    r5, [%[b], #52]\n\t"
12661         "adc    r4, r5\n\t"
12662         "str    r4, [%[r], #52]\n\t"
12663         "ldr    r4, [%[a], #56]\n\t"
12664         "ldr    r5, [%[b], #56]\n\t"
12665         "adc    r4, r5\n\t"
12666         "str    r4, [%[r], #56]\n\t"
12667         "ldr    r4, [%[a], #60]\n\t"
12668         "ldr    r5, [%[b], #60]\n\t"
12669         "adc    r4, r5\n\t"
12670         "str    r4, [%[r], #60]\n\t"
12671         "ldr    r4, [%[a], #64]\n\t"
12672         "ldr    r5, [%[b], #64]\n\t"
12673         "adc    r4, r5\n\t"
12674         "str    r4, [%[r], #64]\n\t"
12675         "ldr    r4, [%[a], #68]\n\t"
12676         "ldr    r5, [%[b], #68]\n\t"
12677         "adc    r4, r5\n\t"
12678         "str    r4, [%[r], #68]\n\t"
12679         "ldr    r4, [%[a], #72]\n\t"
12680         "ldr    r5, [%[b], #72]\n\t"
12681         "adc    r4, r5\n\t"
12682         "str    r4, [%[r], #72]\n\t"
12683         "ldr    r4, [%[a], #76]\n\t"
12684         "ldr    r5, [%[b], #76]\n\t"
12685         "adc    r4, r5\n\t"
12686         "str    r4, [%[r], #76]\n\t"
12687         "ldr    r4, [%[a], #80]\n\t"
12688         "ldr    r5, [%[b], #80]\n\t"
12689         "adc    r4, r5\n\t"
12690         "str    r4, [%[r], #80]\n\t"
12691         "ldr    r4, [%[a], #84]\n\t"
12692         "ldr    r5, [%[b], #84]\n\t"
12693         "adc    r4, r5\n\t"
12694         "str    r4, [%[r], #84]\n\t"
12695         "ldr    r4, [%[a], #88]\n\t"
12696         "ldr    r5, [%[b], #88]\n\t"
12697         "adc    r4, r5\n\t"
12698         "str    r4, [%[r], #88]\n\t"
12699         "ldr    r4, [%[a], #92]\n\t"
12700         "ldr    r5, [%[b], #92]\n\t"
12701         "adc    r4, r5\n\t"
12702         "str    r4, [%[r], #92]\n\t"
12703         "ldr    r4, [%[a], #96]\n\t"
12704         "ldr    r5, [%[b], #96]\n\t"
12705         "adc    r4, r5\n\t"
12706         "str    r4, [%[r], #96]\n\t"
12707         "ldr    r4, [%[a], #100]\n\t"
12708         "ldr    r5, [%[b], #100]\n\t"
12709         "adc    r4, r5\n\t"
12710         "str    r4, [%[r], #100]\n\t"
12711         "ldr    r4, [%[a], #104]\n\t"
12712         "ldr    r5, [%[b], #104]\n\t"
12713         "adc    r4, r5\n\t"
12714         "str    r4, [%[r], #104]\n\t"
12715         "ldr    r4, [%[a], #108]\n\t"
12716         "ldr    r5, [%[b], #108]\n\t"
12717         "adc    r4, r5\n\t"
12718         "str    r4, [%[r], #108]\n\t"
12719         "ldr    r4, [%[a], #112]\n\t"
12720         "ldr    r5, [%[b], #112]\n\t"
12721         "adc    r4, r5\n\t"
12722         "str    r4, [%[r], #112]\n\t"
12723         "ldr    r4, [%[a], #116]\n\t"
12724         "ldr    r5, [%[b], #116]\n\t"
12725         "adc    r4, r5\n\t"
12726         "str    r4, [%[r], #116]\n\t"
12727         "ldr    r4, [%[a], #120]\n\t"
12728         "ldr    r5, [%[b], #120]\n\t"
12729         "adc    r4, r5\n\t"
12730         "str    r4, [%[r], #120]\n\t"
12731         "ldr    r4, [%[a], #124]\n\t"
12732         "ldr    r5, [%[b], #124]\n\t"
12733         "adc    r4, r5\n\t"
12734         "str    r4, [%[r], #124]\n\t"
12735         "mov    %[c], #0\n\t"
12736         "adc    %[c], %[c]\n\t"
12737         "add    %[a], #0x80\n\t"
12738         "add    %[b], #0x80\n\t"
12739         "add    %[r], #0x80\n\t"
12740         "add    %[c], r7\n\t"
12741         "ldr    r4, [%[a], #0]\n\t"
12742         "ldr    r5, [%[b], #0]\n\t"
12743         "adc    r4, r5\n\t"
12744         "str    r4, [%[r], #0]\n\t"
12745         "ldr    r4, [%[a], #4]\n\t"
12746         "ldr    r5, [%[b], #4]\n\t"
12747         "adc    r4, r5\n\t"
12748         "str    r4, [%[r], #4]\n\t"
12749         "ldr    r4, [%[a], #8]\n\t"
12750         "ldr    r5, [%[b], #8]\n\t"
12751         "adc    r4, r5\n\t"
12752         "str    r4, [%[r], #8]\n\t"
12753         "ldr    r4, [%[a], #12]\n\t"
12754         "ldr    r5, [%[b], #12]\n\t"
12755         "adc    r4, r5\n\t"
12756         "str    r4, [%[r], #12]\n\t"
12757         "ldr    r4, [%[a], #16]\n\t"
12758         "ldr    r5, [%[b], #16]\n\t"
12759         "adc    r4, r5\n\t"
12760         "str    r4, [%[r], #16]\n\t"
12761         "ldr    r4, [%[a], #20]\n\t"
12762         "ldr    r5, [%[b], #20]\n\t"
12763         "adc    r4, r5\n\t"
12764         "str    r4, [%[r], #20]\n\t"
12765         "ldr    r4, [%[a], #24]\n\t"
12766         "ldr    r5, [%[b], #24]\n\t"
12767         "adc    r4, r5\n\t"
12768         "str    r4, [%[r], #24]\n\t"
12769         "ldr    r4, [%[a], #28]\n\t"
12770         "ldr    r5, [%[b], #28]\n\t"
12771         "adc    r4, r5\n\t"
12772         "str    r4, [%[r], #28]\n\t"
12773         "ldr    r4, [%[a], #32]\n\t"
12774         "ldr    r5, [%[b], #32]\n\t"
12775         "adc    r4, r5\n\t"
12776         "str    r4, [%[r], #32]\n\t"
12777         "ldr    r4, [%[a], #36]\n\t"
12778         "ldr    r5, [%[b], #36]\n\t"
12779         "adc    r4, r5\n\t"
12780         "str    r4, [%[r], #36]\n\t"
12781         "ldr    r4, [%[a], #40]\n\t"
12782         "ldr    r5, [%[b], #40]\n\t"
12783         "adc    r4, r5\n\t"
12784         "str    r4, [%[r], #40]\n\t"
12785         "ldr    r4, [%[a], #44]\n\t"
12786         "ldr    r5, [%[b], #44]\n\t"
12787         "adc    r4, r5\n\t"
12788         "str    r4, [%[r], #44]\n\t"
12789         "ldr    r4, [%[a], #48]\n\t"
12790         "ldr    r5, [%[b], #48]\n\t"
12791         "adc    r4, r5\n\t"
12792         "str    r4, [%[r], #48]\n\t"
12793         "ldr    r4, [%[a], #52]\n\t"
12794         "ldr    r5, [%[b], #52]\n\t"
12795         "adc    r4, r5\n\t"
12796         "str    r4, [%[r], #52]\n\t"
12797         "ldr    r4, [%[a], #56]\n\t"
12798         "ldr    r5, [%[b], #56]\n\t"
12799         "adc    r4, r5\n\t"
12800         "str    r4, [%[r], #56]\n\t"
12801         "ldr    r4, [%[a], #60]\n\t"
12802         "ldr    r5, [%[b], #60]\n\t"
12803         "adc    r4, r5\n\t"
12804         "str    r4, [%[r], #60]\n\t"
12805         "ldr    r4, [%[a], #64]\n\t"
12806         "ldr    r5, [%[b], #64]\n\t"
12807         "adc    r4, r5\n\t"
12808         "str    r4, [%[r], #64]\n\t"
12809         "ldr    r4, [%[a], #68]\n\t"
12810         "ldr    r5, [%[b], #68]\n\t"
12811         "adc    r4, r5\n\t"
12812         "str    r4, [%[r], #68]\n\t"
12813         "ldr    r4, [%[a], #72]\n\t"
12814         "ldr    r5, [%[b], #72]\n\t"
12815         "adc    r4, r5\n\t"
12816         "str    r4, [%[r], #72]\n\t"
12817         "ldr    r4, [%[a], #76]\n\t"
12818         "ldr    r5, [%[b], #76]\n\t"
12819         "adc    r4, r5\n\t"
12820         "str    r4, [%[r], #76]\n\t"
12821         "ldr    r4, [%[a], #80]\n\t"
12822         "ldr    r5, [%[b], #80]\n\t"
12823         "adc    r4, r5\n\t"
12824         "str    r4, [%[r], #80]\n\t"
12825         "ldr    r4, [%[a], #84]\n\t"
12826         "ldr    r5, [%[b], #84]\n\t"
12827         "adc    r4, r5\n\t"
12828         "str    r4, [%[r], #84]\n\t"
12829         "ldr    r4, [%[a], #88]\n\t"
12830         "ldr    r5, [%[b], #88]\n\t"
12831         "adc    r4, r5\n\t"
12832         "str    r4, [%[r], #88]\n\t"
12833         "ldr    r4, [%[a], #92]\n\t"
12834         "ldr    r5, [%[b], #92]\n\t"
12835         "adc    r4, r5\n\t"
12836         "str    r4, [%[r], #92]\n\t"
12837         "ldr    r4, [%[a], #96]\n\t"
12838         "ldr    r5, [%[b], #96]\n\t"
12839         "adc    r4, r5\n\t"
12840         "str    r4, [%[r], #96]\n\t"
12841         "ldr    r4, [%[a], #100]\n\t"
12842         "ldr    r5, [%[b], #100]\n\t"
12843         "adc    r4, r5\n\t"
12844         "str    r4, [%[r], #100]\n\t"
12845         "ldr    r4, [%[a], #104]\n\t"
12846         "ldr    r5, [%[b], #104]\n\t"
12847         "adc    r4, r5\n\t"
12848         "str    r4, [%[r], #104]\n\t"
12849         "ldr    r4, [%[a], #108]\n\t"
12850         "ldr    r5, [%[b], #108]\n\t"
12851         "adc    r4, r5\n\t"
12852         "str    r4, [%[r], #108]\n\t"
12853         "ldr    r4, [%[a], #112]\n\t"
12854         "ldr    r5, [%[b], #112]\n\t"
12855         "adc    r4, r5\n\t"
12856         "str    r4, [%[r], #112]\n\t"
12857         "ldr    r4, [%[a], #116]\n\t"
12858         "ldr    r5, [%[b], #116]\n\t"
12859         "adc    r4, r5\n\t"
12860         "str    r4, [%[r], #116]\n\t"
12861         "ldr    r4, [%[a], #120]\n\t"
12862         "ldr    r5, [%[b], #120]\n\t"
12863         "adc    r4, r5\n\t"
12864         "str    r4, [%[r], #120]\n\t"
12865         "ldr    r4, [%[a], #124]\n\t"
12866         "ldr    r5, [%[b], #124]\n\t"
12867         "adc    r4, r5\n\t"
12868         "str    r4, [%[r], #124]\n\t"
12869         "mov    %[c], #0\n\t"
12870         "adc    %[c], %[c]\n\t"
12871         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
12872         :
12873         : "memory", "r4", "r5", "r7"
12874     );
12875 
12876     return c;
12877 }
12878 
12879 /* Multiply a and b into r. (r = a * b)
12880  *
12881  * r  A single precision integer.
12882  * a  A single precision integer.
12883  * b  A single precision integer.
12884  */
12885 SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
12886         const sp_digit* b)
12887 {
12888     sp_digit tmp[64 * 2];
12889     __asm__ __volatile__ (
12890         "mov    r3, #0\n\t"
12891         "mov    r4, #0\n\t"
12892         "mov    r8, r3\n\t"
12893         "mov    r11, %[r]\n\t"
12894         "mov    r9, %[a]\n\t"
12895         "mov    r10, %[b]\n\t"
12896         "mov    r6, #1\n\t"
12897         "lsl    r6, r6, #8\n\t"
12898         "add    r6, r9\n\t"
12899         "mov    r12, r6\n\t"
12900         "\n1:\n\t"
12901         "mov    %[r], #0\n\t"
12902         "mov    r5, #0\n\t"
12903         "mov    r6, #252\n\t"
12904         "mov    %[a], r8\n\t"
12905         "sub    %[a], r6\n\t"
12906         "sbc    r6, r6\n\t"
12907         "mvn    r6, r6\n\t"
12908         "and    %[a], r6\n\t"
12909         "mov    %[b], r8\n\t"
12910         "sub    %[b], %[a]\n\t"
12911         "add    %[a], r9\n\t"
12912         "add    %[b], r10\n\t"
12913         "\n2:\n\t"
12914         "# Multiply Start\n\t"
12915         "ldr    r6, [%[a]]\n\t"
12916         "ldr    r7, [%[b]]\n\t"
12917         "lsl    r6, r6, #16\n\t"
12918         "lsl    r7, r7, #16\n\t"
12919         "lsr    r6, r6, #16\n\t"
12920         "lsr    r7, r7, #16\n\t"
12921         "mul    r7, r6\n\t"
12922         "add    r3, r7\n\t"
12923         "adc    r4, %[r]\n\t"
12924         "adc    r5, %[r]\n\t"
12925         "ldr    r7, [%[b]]\n\t"
12926         "lsr    r7, r7, #16\n\t"
12927         "mul    r6, r7\n\t"
12928         "lsr    r7, r6, #16\n\t"
12929         "lsl    r6, r6, #16\n\t"
12930         "add    r3, r6\n\t"
12931         "adc    r4, r7\n\t"
12932         "adc    r5, %[r]\n\t"
12933         "ldr    r6, [%[a]]\n\t"
12934         "ldr    r7, [%[b]]\n\t"
12935         "lsr    r6, r6, #16\n\t"
12936         "lsr    r7, r7, #16\n\t"
12937         "mul    r7, r6\n\t"
12938         "add    r4, r7\n\t"
12939         "adc    r5, %[r]\n\t"
12940         "ldr    r7, [%[b]]\n\t"
12941         "lsl    r7, r7, #16\n\t"
12942         "lsr    r7, r7, #16\n\t"
12943         "mul    r6, r7\n\t"
12944         "lsr    r7, r6, #16\n\t"
12945         "lsl    r6, r6, #16\n\t"
12946         "add    r3, r6\n\t"
12947         "adc    r4, r7\n\t"
12948         "adc    r5, %[r]\n\t"
12949         "# Multiply Done\n\t"
12950         "add    %[a], #4\n\t"
12951         "sub    %[b], #4\n\t"
12952         "cmp    %[a], r12\n\t"
12953         "beq    3f\n\t"
12954         "mov    r6, r8\n\t"
12955         "add    r6, r9\n\t"
12956         "cmp    %[a], r6\n\t"
12957         "ble    2b\n\t"
12958         "\n3:\n\t"
12959         "mov    %[r], r11\n\t"
12960         "mov    r7, r8\n\t"
12961         "str    r3, [%[r], r7]\n\t"
12962         "mov    r3, r4\n\t"
12963         "mov    r4, r5\n\t"
12964         "add    r7, #4\n\t"
12965         "mov    r8, r7\n\t"
12966         "mov    r6, #1\n\t"
12967         "lsl    r6, r6, #8\n\t"
12968         "add    r6, #248\n\t"
12969         "cmp    r7, r6\n\t"
12970         "ble    1b\n\t"
12971         "str    r3, [%[r], r7]\n\t"
12972         "mov    %[a], r9\n\t"
12973         "mov    %[b], r10\n\t"
12974         :
12975         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
12976         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
12977     );
12978 
12979     XMEMCPY(r, tmp, sizeof(tmp));
12980 }
12981 
12982 /* AND m into each word of a and store in r.
12983  *
12984  * r  A single precision integer.
12985  * a  A single precision integer.
12986  * m  Mask to AND against each digit.
12987  */
12988 static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
12989 {
12990 #ifdef WOLFSSL_SP_SMALL
12991     int i;
12992 
12993     for (i=0; i<64; i++) {
12994         r[i] = a[i] & m;
12995     }
12996 #else
12997     int i;
12998 
12999     for (i = 0; i < 64; i += 8) {
13000         r[i+0] = a[i+0] & m;
13001         r[i+1] = a[i+1] & m;
13002         r[i+2] = a[i+2] & m;
13003         r[i+3] = a[i+3] & m;
13004         r[i+4] = a[i+4] & m;
13005         r[i+5] = a[i+5] & m;
13006         r[i+6] = a[i+6] & m;
13007         r[i+7] = a[i+7] & m;
13008     }
13009 #endif
13010 }
13011 
13012 /* Multiply a and b into r. (r = a * b)
13013  *
13014  * r  A single precision integer.
13015  * a  A single precision integer.
13016  * b  A single precision integer.
13017  */
13018 SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
13019         const sp_digit* b)
13020 {
13021     sp_digit* z0 = r;
13022     sp_digit z1[128];
13023     sp_digit a1[64];
13024     sp_digit b1[64];
13025     sp_digit z2[128];
13026     sp_digit u, ca, cb;
13027 
13028     ca = sp_2048_add_64(a1, a, &a[64]);
13029     cb = sp_2048_add_64(b1, b, &b[64]);
13030     u  = ca & cb;
13031     sp_2048_mul_64(z1, a1, b1);
13032     sp_2048_mul_64(z2, &a[64], &b[64]);
13033     sp_2048_mul_64(z0, a, b);
13034     sp_2048_mask_64(r + 128, a1, 0 - cb);
13035     sp_2048_mask_64(b1, b1, 0 - ca);
13036     u += sp_2048_add_64(r + 128, r + 128, b1);
13037     u += sp_4096_sub_in_place_128(z1, z2);
13038     u += sp_4096_sub_in_place_128(z1, z0);
13039     u += sp_4096_add_128(r + 64, r + 64, z1);
13040     r[192] = u;
13041     XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
13042     (void)sp_4096_add_128(r + 128, r + 128, z2);
13043 }
13044 
13045 /* Square a and put result in r. (r = a * a)
13046  *
13047  * r  A single precision integer.
13048  * a  A single precision integer.
13049  */
13050 SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
13051 {
13052     __asm__ __volatile__ (
13053         "mov    r3, #0\n\t"
13054         "mov    r4, #0\n\t"
13055         "mov    r5, #0\n\t"
13056         "mov    r8, r3\n\t"
13057         "mov    r11, %[r]\n\t"
13058         "mov    r6, #2\n\t"
13059         "lsl    r6, r6, #8\n\t"
13060         "neg    r6, r6\n\t"
13061         "add    sp, r6\n\t"
13062         "mov    r10, sp\n\t"
13063         "mov    r9, %[a]\n\t"
13064         "\n1:\n\t"
13065         "mov    %[r], #0\n\t"
13066         "mov    r6, #252\n\t"
13067         "mov    %[a], r8\n\t"
13068         "sub    %[a], r6\n\t"
13069         "sbc    r6, r6\n\t"
13070         "mvn    r6, r6\n\t"
13071         "and    %[a], r6\n\t"
13072         "mov    r2, r8\n\t"
13073         "sub    r2, %[a]\n\t"
13074         "add    %[a], r9\n\t"
13075         "add    r2, r9\n\t"
13076         "\n2:\n\t"
13077         "cmp    r2, %[a]\n\t"
13078         "beq    4f\n\t"
13079         "# Multiply * 2: Start\n\t"
13080         "ldr    r6, [%[a]]\n\t"
13081         "ldr    r7, [r2]\n\t"
13082         "lsl    r6, r6, #16\n\t"
13083         "lsl    r7, r7, #16\n\t"
13084         "lsr    r6, r6, #16\n\t"
13085         "lsr    r7, r7, #16\n\t"
13086         "mul    r7, r6\n\t"
13087         "add    r3, r7\n\t"
13088         "adc    r4, %[r]\n\t"
13089         "adc    r5, %[r]\n\t"
13090         "add    r3, r7\n\t"
13091         "adc    r4, %[r]\n\t"
13092         "adc    r5, %[r]\n\t"
13093         "ldr    r7, [r2]\n\t"
13094         "lsr    r7, r7, #16\n\t"
13095         "mul    r6, r7\n\t"
13096         "lsr    r7, r6, #16\n\t"
13097         "lsl    r6, r6, #16\n\t"
13098         "add    r3, r6\n\t"
13099         "adc    r4, r7\n\t"
13100         "adc    r5, %[r]\n\t"
13101         "add    r3, r6\n\t"
13102         "adc    r4, r7\n\t"
13103         "adc    r5, %[r]\n\t"
13104         "ldr    r6, [%[a]]\n\t"
13105         "ldr    r7, [r2]\n\t"
13106         "lsr    r6, r6, #16\n\t"
13107         "lsr    r7, r7, #16\n\t"
13108         "mul    r7, r6\n\t"
13109         "add    r4, r7\n\t"
13110         "adc    r5, %[r]\n\t"
13111         "add    r4, r7\n\t"
13112         "adc    r5, %[r]\n\t"
13113         "ldr    r7, [r2]\n\t"
13114         "lsl    r7, r7, #16\n\t"
13115         "lsr    r7, r7, #16\n\t"
13116         "mul    r6, r7\n\t"
13117         "lsr    r7, r6, #16\n\t"
13118         "lsl    r6, r6, #16\n\t"
13119         "add    r3, r6\n\t"
13120         "adc    r4, r7\n\t"
13121         "adc    r5, %[r]\n\t"
13122         "add    r3, r6\n\t"
13123         "adc    r4, r7\n\t"
13124         "adc    r5, %[r]\n\t"
13125         "# Multiply * 2: Done\n\t"
13126         "bal    5f\n\t"
13127         "\n4:\n\t"
13128         "# Square: Start\n\t"
13129         "ldr    r6, [%[a]]\n\t"
13130         "lsr    r7, r6, #16\n\t"
13131         "lsl    r6, r6, #16\n\t"
13132         "lsr    r6, r6, #16\n\t"
13133         "mul    r6, r6\n\t"
13134         "add    r3, r6\n\t"
13135         "adc    r4, %[r]\n\t"
13136         "adc    r5, %[r]\n\t"
13137         "mul    r7, r7\n\t"
13138         "add    r4, r7\n\t"
13139         "adc    r5, %[r]\n\t"
13140         "ldr    r6, [%[a]]\n\t"
13141         "lsr    r7, r6, #16\n\t"
13142         "lsl    r6, r6, #16\n\t"
13143         "lsr    r6, r6, #16\n\t"
13144         "mul    r6, r7\n\t"
13145         "lsr    r7, r6, #15\n\t"
13146         "lsl    r6, r6, #17\n\t"
13147         "add    r3, r6\n\t"
13148         "adc    r4, r7\n\t"
13149         "adc    r5, %[r]\n\t"
13150         "# Square: Done\n\t"
13151         "\n5:\n\t"
13152         "add    %[a], #4\n\t"
13153         "sub    r2, #4\n\t"
13154         "mov    r6, #1\n\t"
13155         "lsl    r6, r6, #8\n\t"
13156         "add    r6, r9\n\t"
13157         "cmp    %[a], r6\n\t"
13158         "beq    3f\n\t"
13159         "cmp    %[a], r2\n\t"
13160         "bgt    3f\n\t"
13161         "mov    r7, r8\n\t"
13162         "add    r7, r9\n\t"
13163         "cmp    %[a], r7\n\t"
13164         "ble    2b\n\t"
13165         "\n3:\n\t"
13166         "mov    %[r], r10\n\t"
13167         "mov    r7, r8\n\t"
13168         "str    r3, [%[r], r7]\n\t"
13169         "mov    r3, r4\n\t"
13170         "mov    r4, r5\n\t"
13171         "mov    r5, #0\n\t"
13172         "add    r7, #4\n\t"
13173         "mov    r8, r7\n\t"
13174         "mov    r6, #1\n\t"
13175         "lsl    r6, r6, #8\n\t"
13176         "add    r6, #248\n\t"
13177         "cmp    r7, r6\n\t"
13178         "ble    1b\n\t"
13179         "mov    %[a], r9\n\t"
13180         "str    r3, [%[r], r7]\n\t"
13181         "mov    %[r], r11\n\t"
13182         "mov    %[a], r10\n\t"
13183         "mov    r3, #1\n\t"
13184         "lsl    r3, r3, #8\n\t"
13185         "add    r3, #252\n\t"
13186         "\n4:\n\t"
13187         "ldr    r6, [%[a], r3]\n\t"
13188         "str    r6, [%[r], r3]\n\t"
13189         "sub    r3, #4\n\t"
13190         "bge    4b\n\t"
13191         "mov    r6, #2\n\t"
13192         "lsl    r6, r6, #8\n\t"
13193         "add    sp, r6\n\t"
13194         :
13195         : [r] "r" (r), [a] "r" (a)
13196         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
13197     );
13198 }
13199 
13200 /* Square a and put result in r. (r = a * a)
13201  *
13202  * r  A single precision integer.
13203  * a  A single precision integer.
13204  */
13205 SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
13206 {
13207     sp_digit* z0 = r;
13208     sp_digit z2[128];
13209     sp_digit z1[128];
13210     sp_digit a1[64];
13211     sp_digit u;
13212 
13213     u = sp_2048_add_64(a1, a, &a[64]);
13214     sp_2048_sqr_64(z1, a1);
13215     sp_2048_sqr_64(z2, &a[64]);
13216     sp_2048_sqr_64(z0, a);
13217     sp_2048_mask_64(r + 128, a1, 0 - u);
13218     u += sp_2048_add_64(r + 128, r + 128, r + 128);
13219     u += sp_4096_sub_in_place_128(z1, z2);
13220     u += sp_4096_sub_in_place_128(z1, z0);
13221     u += sp_4096_add_128(r + 64, r + 64, z1);
13222     r[192] = u;
13223     XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
13224     (void)sp_4096_add_128(r + 128, r + 128, z2);
13225 }
13226 
13227 #endif /* !WOLFSSL_SP_SMALL */
13228 #ifdef WOLFSSL_SP_SMALL
13229 /* Add b to a into r. (r = a + b)
13230  *
13231  * r  A single precision integer.
13232  * a  A single precision integer.
13233  * b  A single precision integer.
13234  */
13235 SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
13236         const sp_digit* b)
13237 {
13238     sp_digit c = 0;
13239 
13240     __asm__ __volatile__ (
13241         "mov    r6, %[a]\n\t"
13242         "mov    r7, #0\n\t"
13243         "mov    r4, #2\n\t"
13244         "lsl    r4, #8\n\t"
13245         "sub    r7, #1\n\t"
13246         "add    r6, r4\n\t"
13247         "\n1:\n\t"
13248         "add    %[c], r7\n\t"
13249         "ldr    r4, [%[a]]\n\t"
13250         "ldr    r5, [%[b]]\n\t"
13251         "adc    r4, r5\n\t"
13252         "str    r4, [%[r]]\n\t"
13253         "mov    %[c], #0\n\t"
13254         "adc    %[c], %[c]\n\t"
13255         "add    %[a], #4\n\t"
13256         "add    %[b], #4\n\t"
13257         "add    %[r], #4\n\t"
13258         "cmp    %[a], r6\n\t"
13259         "bne    1b\n\t"
13260         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
13261         :
13262         : "memory", "r4", "r5", "r6", "r7"
13263     );
13264 
13265     return c;
13266 }
13267 
13268 #endif /* WOLFSSL_SP_SMALL */
13269 #ifdef WOLFSSL_SP_SMALL
13270 /* Sub b from a into a. (a -= b)
13271  *
13272  * a  A single precision integer.
13273  * b  A single precision integer.
13274  */
13275 SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
13276         const sp_digit* b)
13277 {
13278     sp_digit c = 0;
13279     __asm__ __volatile__ (
13280         "mov    r7, %[a]\n\t"
13281         "mov    r5, #2\n\t"
13282         "lsl    r5, #8\n\t"
13283         "add    r7, r5\n\t"
13284         "\n1:\n\t"
13285         "mov    r5, #0\n\t"
13286         "sub    r5, %[c]\n\t"
13287         "ldr    r3, [%[a]]\n\t"
13288         "ldr    r4, [%[a], #4]\n\t"
13289         "ldr    r5, [%[b]]\n\t"
13290         "ldr    r6, [%[b], #4]\n\t"
13291         "sbc    r3, r5\n\t"
13292         "sbc    r4, r6\n\t"
13293         "str    r3, [%[a]]\n\t"
13294         "str    r4, [%[a], #4]\n\t"
13295         "sbc    %[c], %[c]\n\t"
13296         "add    %[a], #8\n\t"
13297         "add    %[b], #8\n\t"
13298         "cmp    %[a], r7\n\t"
13299         "bne    1b\n\t"
13300         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
13301         :
13302         : "memory", "r3", "r4", "r5", "r6", "r7"
13303     );
13304 
13305     return c;
13306 }
13307 
13308 #endif /* WOLFSSL_SP_SMALL */
13309 #ifdef WOLFSSL_SP_SMALL
13310 /* Multiply a and b into r. (r = a * b)
13311  *
13312  * r  A single precision integer.
13313  * a  A single precision integer.
13314  * b  A single precision integer.
13315  */
13316 SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
13317         const sp_digit* b)
13318 {
13319     sp_digit tmp[128 * 2];
13320     __asm__ __volatile__ (
13321         "mov    r3, #0\n\t"
13322         "mov    r4, #0\n\t"
13323         "mov    r8, r3\n\t"
13324         "mov    r11, %[r]\n\t"
13325         "mov    r9, %[a]\n\t"
13326         "mov    r10, %[b]\n\t"
13327         "mov    r6, #2\n\t"
13328         "lsl    r6, r6, #8\n\t"
13329         "add    r6, r9\n\t"
13330         "mov    r12, r6\n\t"
13331         "\n1:\n\t"
13332         "mov    %[r], #0\n\t"
13333         "mov    r5, #0\n\t"
13334         "mov    r6, #1\n\t"
13335         "lsl    r6, r6, #8\n\t"
13336         "add    r6, #252\n\t"
13337         "mov    %[a], r8\n\t"
13338         "sub    %[a], r6\n\t"
13339         "sbc    r6, r6\n\t"
13340         "mvn    r6, r6\n\t"
13341         "and    %[a], r6\n\t"
13342         "mov    %[b], r8\n\t"
13343         "sub    %[b], %[a]\n\t"
13344         "add    %[a], r9\n\t"
13345         "add    %[b], r10\n\t"
13346         "\n2:\n\t"
13347         "# Multiply Start\n\t"
13348         "ldr    r6, [%[a]]\n\t"
13349         "ldr    r7, [%[b]]\n\t"
13350         "lsl    r6, r6, #16\n\t"
13351         "lsl    r7, r7, #16\n\t"
13352         "lsr    r6, r6, #16\n\t"
13353         "lsr    r7, r7, #16\n\t"
13354         "mul    r7, r6\n\t"
13355         "add    r3, r7\n\t"
13356         "adc    r4, %[r]\n\t"
13357         "adc    r5, %[r]\n\t"
13358         "ldr    r7, [%[b]]\n\t"
13359         "lsr    r7, r7, #16\n\t"
13360         "mul    r6, r7\n\t"
13361         "lsr    r7, r6, #16\n\t"
13362         "lsl    r6, r6, #16\n\t"
13363         "add    r3, r6\n\t"
13364         "adc    r4, r7\n\t"
13365         "adc    r5, %[r]\n\t"
13366         "ldr    r6, [%[a]]\n\t"
13367         "ldr    r7, [%[b]]\n\t"
13368         "lsr    r6, r6, #16\n\t"
13369         "lsr    r7, r7, #16\n\t"
13370         "mul    r7, r6\n\t"
13371         "add    r4, r7\n\t"
13372         "adc    r5, %[r]\n\t"
13373         "ldr    r7, [%[b]]\n\t"
13374         "lsl    r7, r7, #16\n\t"
13375         "lsr    r7, r7, #16\n\t"
13376         "mul    r6, r7\n\t"
13377         "lsr    r7, r6, #16\n\t"
13378         "lsl    r6, r6, #16\n\t"
13379         "add    r3, r6\n\t"
13380         "adc    r4, r7\n\t"
13381         "adc    r5, %[r]\n\t"
13382         "# Multiply Done\n\t"
13383         "add    %[a], #4\n\t"
13384         "sub    %[b], #4\n\t"
13385         "cmp    %[a], r12\n\t"
13386         "beq    3f\n\t"
13387         "mov    r6, r8\n\t"
13388         "add    r6, r9\n\t"
13389         "cmp    %[a], r6\n\t"
13390         "ble    2b\n\t"
13391         "\n3:\n\t"
13392         "mov    %[r], r11\n\t"
13393         "mov    r7, r8\n\t"
13394         "str    r3, [%[r], r7]\n\t"
13395         "mov    r3, r4\n\t"
13396         "mov    r4, r5\n\t"
13397         "add    r7, #4\n\t"
13398         "mov    r8, r7\n\t"
13399         "mov    r6, #3\n\t"
13400         "lsl    r6, r6, #8\n\t"
13401         "add    r6, #248\n\t"
13402         "cmp    r7, r6\n\t"
13403         "ble    1b\n\t"
13404         "str    r3, [%[r], r7]\n\t"
13405         "mov    %[a], r9\n\t"
13406         "mov    %[b], r10\n\t"
13407         :
13408         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
13409         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
13410     );
13411 
13412     XMEMCPY(r, tmp, sizeof(tmp));
13413 }
13414 
13415 /* Square a and put result in r. (r = a * a)
13416  *
13417  * r  A single precision integer.
13418  * a  A single precision integer.
13419  */
13420 SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
13421 {
13422     __asm__ __volatile__ (
13423         "mov    r3, #0\n\t"
13424         "mov    r4, #0\n\t"
13425         "mov    r5, #0\n\t"
13426         "mov    r8, r3\n\t"
13427         "mov    r11, %[r]\n\t"
13428         "mov    r6, #4\n\t"
13429         "lsl    r6, r6, #8\n\t"
13430         "neg    r6, r6\n\t"
13431         "add    sp, r6\n\t"
13432         "mov    r10, sp\n\t"
13433         "mov    r9, %[a]\n\t"
13434         "\n1:\n\t"
13435         "mov    %[r], #0\n\t"
13436         "mov    r6, #1\n\t"
13437         "lsl    r6, r6, #8\n\t"
13438         "add    r6, #252\n\t"
13439         "mov    %[a], r8\n\t"
13440         "sub    %[a], r6\n\t"
13441         "sbc    r6, r6\n\t"
13442         "mvn    r6, r6\n\t"
13443         "and    %[a], r6\n\t"
13444         "mov    r2, r8\n\t"
13445         "sub    r2, %[a]\n\t"
13446         "add    %[a], r9\n\t"
13447         "add    r2, r9\n\t"
13448         "\n2:\n\t"
13449         "cmp    r2, %[a]\n\t"
13450         "beq    4f\n\t"
13451         "# Multiply * 2: Start\n\t"
13452         "ldr    r6, [%[a]]\n\t"
13453         "ldr    r7, [r2]\n\t"
13454         "lsl    r6, r6, #16\n\t"
13455         "lsl    r7, r7, #16\n\t"
13456         "lsr    r6, r6, #16\n\t"
13457         "lsr    r7, r7, #16\n\t"
13458         "mul    r7, r6\n\t"
13459         "add    r3, r7\n\t"
13460         "adc    r4, %[r]\n\t"
13461         "adc    r5, %[r]\n\t"
13462         "add    r3, r7\n\t"
13463         "adc    r4, %[r]\n\t"
13464         "adc    r5, %[r]\n\t"
13465         "ldr    r7, [r2]\n\t"
13466         "lsr    r7, r7, #16\n\t"
13467         "mul    r6, r7\n\t"
13468         "lsr    r7, r6, #16\n\t"
13469         "lsl    r6, r6, #16\n\t"
13470         "add    r3, r6\n\t"
13471         "adc    r4, r7\n\t"
13472         "adc    r5, %[r]\n\t"
13473         "add    r3, r6\n\t"
13474         "adc    r4, r7\n\t"
13475         "adc    r5, %[r]\n\t"
13476         "ldr    r6, [%[a]]\n\t"
13477         "ldr    r7, [r2]\n\t"
13478         "lsr    r6, r6, #16\n\t"
13479         "lsr    r7, r7, #16\n\t"
13480         "mul    r7, r6\n\t"
13481         "add    r4, r7\n\t"
13482         "adc    r5, %[r]\n\t"
13483         "add    r4, r7\n\t"
13484         "adc    r5, %[r]\n\t"
13485         "ldr    r7, [r2]\n\t"
13486         "lsl    r7, r7, #16\n\t"
13487         "lsr    r7, r7, #16\n\t"
13488         "mul    r6, r7\n\t"
13489         "lsr    r7, r6, #16\n\t"
13490         "lsl    r6, r6, #16\n\t"
13491         "add    r3, r6\n\t"
13492         "adc    r4, r7\n\t"
13493         "adc    r5, %[r]\n\t"
13494         "add    r3, r6\n\t"
13495         "adc    r4, r7\n\t"
13496         "adc    r5, %[r]\n\t"
13497         "# Multiply * 2: Done\n\t"
13498         "bal    5f\n\t"
13499         "\n4:\n\t"
13500         "# Square: Start\n\t"
13501         "ldr    r6, [%[a]]\n\t"
13502         "lsr    r7, r6, #16\n\t"
13503         "lsl    r6, r6, #16\n\t"
13504         "lsr    r6, r6, #16\n\t"
13505         "mul    r6, r6\n\t"
13506         "add    r3, r6\n\t"
13507         "adc    r4, %[r]\n\t"
13508         "adc    r5, %[r]\n\t"
13509         "mul    r7, r7\n\t"
13510         "add    r4, r7\n\t"
13511         "adc    r5, %[r]\n\t"
13512         "ldr    r6, [%[a]]\n\t"
13513         "lsr    r7, r6, #16\n\t"
13514         "lsl    r6, r6, #16\n\t"
13515         "lsr    r6, r6, #16\n\t"
13516         "mul    r6, r7\n\t"
13517         "lsr    r7, r6, #15\n\t"
13518         "lsl    r6, r6, #17\n\t"
13519         "add    r3, r6\n\t"
13520         "adc    r4, r7\n\t"
13521         "adc    r5, %[r]\n\t"
13522         "# Square: Done\n\t"
13523         "\n5:\n\t"
13524         "add    %[a], #4\n\t"
13525         "sub    r2, #4\n\t"
13526         "mov    r6, #2\n\t"
13527         "lsl    r6, r6, #8\n\t"
13528         "add    r6, r9\n\t"
13529         "cmp    %[a], r6\n\t"
13530         "beq    3f\n\t"
13531         "cmp    %[a], r2\n\t"
13532         "bgt    3f\n\t"
13533         "mov    r7, r8\n\t"
13534         "add    r7, r9\n\t"
13535         "cmp    %[a], r7\n\t"
13536         "ble    2b\n\t"
13537         "\n3:\n\t"
13538         "mov    %[r], r10\n\t"
13539         "mov    r7, r8\n\t"
13540         "str    r3, [%[r], r7]\n\t"
13541         "mov    r3, r4\n\t"
13542         "mov    r4, r5\n\t"
13543         "mov    r5, #0\n\t"
13544         "add    r7, #4\n\t"
13545         "mov    r8, r7\n\t"
13546         "mov    r6, #3\n\t"
13547         "lsl    r6, r6, #8\n\t"
13548         "add    r6, #248\n\t"
13549         "cmp    r7, r6\n\t"
13550         "ble    1b\n\t"
13551         "mov    %[a], r9\n\t"
13552         "str    r3, [%[r], r7]\n\t"
13553         "mov    %[r], r11\n\t"
13554         "mov    %[a], r10\n\t"
13555         "mov    r3, #3\n\t"
13556         "lsl    r3, r3, #8\n\t"
13557         "add    r3, #252\n\t"
13558         "\n4:\n\t"
13559         "ldr    r6, [%[a], r3]\n\t"
13560         "str    r6, [%[r], r3]\n\t"
13561         "sub    r3, #4\n\t"
13562         "bge    4b\n\t"
13563         "mov    r6, #4\n\t"
13564         "lsl    r6, r6, #8\n\t"
13565         "add    sp, r6\n\t"
13566         :
13567         : [r] "r" (r), [a] "r" (a)
13568         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
13569     );
13570 }
13571 
13572 #endif /* WOLFSSL_SP_SMALL */
13573 /* Caclulate the bottom digit of -1/a mod 2^n.
13574  *
13575  * a    A single precision number.
13576  * rho  Bottom word of inverse.
13577  */
13578 static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
13579 {
13580     sp_digit x, b;
13581 
13582     b = a[0];
13583     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
13584     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
13585     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
13586     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
13587 
13588     /* rho = -1/m mod b */
13589     *rho = -x;
13590 }
13591 
13592 /* Mul a by digit b into r. (r = a * b)
13593  *
13594  * r  A single precision integer.
13595  * a  A single precision integer.
13596  * b  A single precision digit.
13597  */
13598 SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
13599         sp_digit b)
13600 {
13601     __asm__ __volatile__ (
13602         "mov    r6, #2\n\t"
13603         "lsl    r6, r6, #8\n\t"
13604         "add    r6, %[a]\n\t"
13605         "mov    r8, %[r]\n\t"
13606         "mov    r9, r6\n\t"
13607         "mov    r3, #0\n\t"
13608         "mov    r4, #0\n\t"
13609         "1:\n\t"
13610         "mov    %[r], #0\n\t"
13611         "mov    r5, #0\n\t"
13612         "# A[] * B\n\t"
13613         "ldr    r6, [%[a]]\n\t"
13614         "lsl    r6, r6, #16\n\t"
13615         "lsl    r7, %[b], #16\n\t"
13616         "lsr    r6, r6, #16\n\t"
13617         "lsr    r7, r7, #16\n\t"
13618         "mul    r7, r6\n\t"
13619         "add    r3, r7\n\t"
13620         "adc    r4, %[r]\n\t"
13621         "adc    r5, %[r]\n\t"
13622         "lsr    r7, %[b], #16\n\t"
13623         "mul    r6, r7\n\t"
13624         "lsr    r7, r6, #16\n\t"
13625         "lsl    r6, r6, #16\n\t"
13626         "add    r3, r6\n\t"
13627         "adc    r4, r7\n\t"
13628         "adc    r5, %[r]\n\t"
13629         "ldr    r6, [%[a]]\n\t"
13630         "lsr    r6, r6, #16\n\t"
13631         "lsr    r7, %[b], #16\n\t"
13632         "mul    r7, r6\n\t"
13633         "add    r4, r7\n\t"
13634         "adc    r5, %[r]\n\t"
13635         "lsl    r7, %[b], #16\n\t"
13636         "lsr    r7, r7, #16\n\t"
13637         "mul    r6, r7\n\t"
13638         "lsr    r7, r6, #16\n\t"
13639         "lsl    r6, r6, #16\n\t"
13640         "add    r3, r6\n\t"
13641         "adc    r4, r7\n\t"
13642         "adc    r5, %[r]\n\t"
13643         "# A[] * B - Done\n\t"
13644         "mov    %[r], r8\n\t"
13645         "str    r3, [%[r]]\n\t"
13646         "mov    r3, r4\n\t"
13647         "mov    r4, r5\n\t"
13648         "add    %[r], #4\n\t"
13649         "add    %[a], #4\n\t"
13650         "mov    r8, %[r]\n\t"
13651         "cmp    %[a], r9\n\t"
13652         "blt    1b\n\t"
13653         "str    r3, [%[r]]\n\t"
13654         : [r] "+r" (r), [a] "+r" (a)
13655         : [b] "r" (b)
13656         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
13657     );
13658 }
13659 
13660 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
13661 /* r = 2^n mod m where n is the number of bits to reduce by.
13662  * Given m must be 4096 bits, just need to subtract.
13663  *
13664  * r  A single precision number.
13665  * m  A single precision number.
13666  */
13667 static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
13668 {
13669     XMEMSET(r, 0, sizeof(sp_digit) * 128);
13670 
13671     /* r = 2^n mod m */
13672     sp_4096_sub_in_place_128(r, m);
13673 }
13674 
13675 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
13676 /* Conditionally subtract b from a using the mask m.
13677  * m is -1 to subtract and 0 when not copying.
13678  *
13679  * r  A single precision number representing condition subtract result.
13680  * a  A single precision number to subtract from.
13681  * b  A single precision number to subtract.
13682  * m  Mask value to apply.
13683  */
13684 SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a,
13685         const sp_digit* b, sp_digit m)
13686 {
13687     sp_digit c = 0;
13688 
13689     __asm__ __volatile__ (
13690         "mov    r5, #2\n\t"
13691         "lsl    r5, r5, #8\n\t"
13692         "mov    r8, r5\n\t"
13693         "mov    r7, #0\n\t"
13694         "1:\n\t"
13695         "ldr    r6, [%[b], r7]\n\t"
13696         "and    r6, %[m]\n\t"
13697         "mov    r5, #0\n\t"
13698         "sub    r5, %[c]\n\t"
13699         "ldr    r5, [%[a], r7]\n\t"
13700         "sbc    r5, r6\n\t"
13701         "sbc    %[c], %[c]\n\t"
13702         "str    r5, [%[r], r7]\n\t"
13703         "add    r7, #4\n\t"
13704         "cmp    r7, r8\n\t"
13705         "blt    1b\n\t"
13706         : [c] "+r" (c)
13707         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
13708         : "memory", "r5", "r6", "r7", "r8"
13709     );
13710 
13711     return c;
13712 }
13713 
13714 /* Reduce the number back to 4096 bits using Montgomery reduction.
13715  *
13716  * a   A single precision number to reduce in place.
13717  * m   The single precision number representing the modulus.
13718  * mp  The digit representing the negative inverse of m mod 2^n.
13719  */
13720 SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
13721         sp_digit mp)
13722 {
13723     sp_digit ca = 0;
13724 
13725     __asm__ __volatile__ (
13726         "mov    r8, %[mp]\n\t"
13727         "mov    r12, %[ca]\n\t"
13728         "mov    r14, %[m]\n\t"
13729         "mov    r9, %[a]\n\t"
13730         "mov    r4, #0\n\t"
13731         "# i = 0\n\t"
13732         "mov    r11, r4\n\t"
13733         "\n1:\n\t"
13734         "mov    r5, #0\n\t"
13735         "mov    %[ca], #0\n\t"
13736         "# mu = a[i] * mp\n\t"
13737         "mov    %[mp], r8\n\t"
13738         "ldr    %[a], [%[a]]\n\t"
13739         "mul    %[mp], %[a]\n\t"
13740         "mov    %[m], r14\n\t"
13741         "mov    r10, r9\n\t"
13742         "\n2:\n\t"
13743         "# a[i+j] += m[j] * mu\n\t"
13744         "mov    %[a], r10\n\t"
13745         "ldr    %[a], [%[a]]\n\t"
13746         "mov    %[ca], #0\n\t"
13747         "mov    r4, r5\n\t"
13748         "mov    r5, #0\n\t"
13749         "# Multiply m[j] and mu - Start\n\t"
13750         "ldr    r7, [%[m]]\n\t"
13751         "lsl    r6, %[mp], #16\n\t"
13752         "lsl    r7, r7, #16\n\t"
13753         "lsr    r6, r6, #16\n\t"
13754         "lsr    r7, r7, #16\n\t"
13755         "mul    r7, r6\n\t"
13756         "add    %[a], r7\n\t"
13757         "adc    r5, %[ca]\n\t"
13758         "ldr    r7, [%[m]]\n\t"
13759         "lsr    r7, r7, #16\n\t"
13760         "mul    r6, r7\n\t"
13761         "lsr    r7, r6, #16\n\t"
13762         "lsl    r6, r6, #16\n\t"
13763         "add    %[a], r6\n\t"
13764         "adc    r5, r7\n\t"
13765         "ldr    r7, [%[m]]\n\t"
13766         "lsr    r6, %[mp], #16\n\t"
13767         "lsr    r7, r7, #16\n\t"
13768         "mul    r7, r6\n\t"
13769         "add    r5, r7\n\t"
13770         "ldr    r7, [%[m]]\n\t"
13771         "lsl    r7, r7, #16\n\t"
13772         "lsr    r7, r7, #16\n\t"
13773         "mul    r6, r7\n\t"
13774         "lsr    r7, r6, #16\n\t"
13775         "lsl    r6, r6, #16\n\t"
13776         "add    %[a], r6\n\t"
13777         "adc    r5, r7\n\t"
13778         "# Multiply m[j] and mu - Done\n\t"
13779         "add    r4, %[a]\n\t"
13780         "adc    r5, %[ca]\n\t"
13781         "mov    %[a], r10\n\t"
13782         "str    r4, [%[a]]\n\t"
13783         "mov    r6, #4\n\t"
13784         "add    %[m], #4\n\t"
13785         "add    r10, r6\n\t"
13786         "mov    r4, #1\n\t"
13787         "lsl    r4, r4, #8\n\t"
13788         "add    r4, #252\n\t"
13789         "add    r4, r9\n\t"
13790         "cmp    r10, r4\n\t"
13791         "blt    2b\n\t"
13792         "# a[i+127] += m[127] * mu\n\t"
13793         "mov    %[ca], #0\n\t"
13794         "mov    r4, r12\n\t"
13795         "mov    %[a], #0\n\t"
13796         "# Multiply m[127] and mu - Start\n\t"
13797         "ldr    r7, [%[m]]\n\t"
13798         "lsl    r6, %[mp], #16\n\t"
13799         "lsl    r7, r7, #16\n\t"
13800         "lsr    r6, r6, #16\n\t"
13801         "lsr    r7, r7, #16\n\t"
13802         "mul    r7, r6\n\t"
13803         "add    r5, r7\n\t"
13804         "adc    r4, %[ca]\n\t"
13805         "adc    %[a], %[ca]\n\t"
13806         "ldr    r7, [%[m]]\n\t"
13807         "lsr    r7, r7, #16\n\t"
13808         "mul    r6, r7\n\t"
13809         "lsr    r7, r6, #16\n\t"
13810         "lsl    r6, r6, #16\n\t"
13811         "add    r5, r6\n\t"
13812         "adc    r4, r7\n\t"
13813         "adc    %[a], %[ca]\n\t"
13814         "ldr    r7, [%[m]]\n\t"
13815         "lsr    r6, %[mp], #16\n\t"
13816         "lsr    r7, r7, #16\n\t"
13817         "mul    r7, r6\n\t"
13818         "add    r4, r7\n\t"
13819         "adc    %[a], %[ca]\n\t"
13820         "ldr    r7, [%[m]]\n\t"
13821         "lsl    r7, r7, #16\n\t"
13822         "lsr    r7, r7, #16\n\t"
13823         "mul    r6, r7\n\t"
13824         "lsr    r7, r6, #16\n\t"
13825         "lsl    r6, r6, #16\n\t"
13826         "add    r5, r6\n\t"
13827         "adc    r4, r7\n\t"
13828         "adc    %[a], %[ca]\n\t"
13829         "# Multiply m[127] and mu - Done\n\t"
13830         "mov    %[ca], %[a]\n\t"
13831         "mov    %[a], r10\n\t"
13832         "ldr    r7, [%[a], #4]\n\t"
13833         "ldr    %[a], [%[a]]\n\t"
13834         "mov    r6, #0\n\t"
13835         "add    r5, %[a]\n\t"
13836         "adc    r7, r4\n\t"
13837         "adc    %[ca], r6\n\t"
13838         "mov    %[a], r10\n\t"
13839         "str    r5, [%[a]]\n\t"
13840         "str    r7, [%[a], #4]\n\t"
13841         "# i += 1\n\t"
13842         "mov    r6, #4\n\t"
13843         "add    r9, r6\n\t"
13844         "add    r11, r6\n\t"
13845         "mov    r12, %[ca]\n\t"
13846         "mov    %[a], r9\n\t"
13847         "mov    r4, #2\n\t"
13848         "lsl    r4, r4, #8\n\t"
13849         "cmp    r11, r4\n\t"
13850         "blt    1b\n\t"
13851         "mov    %[m], r14\n\t"
13852         : [ca] "+r" (ca), [a] "+r" (a)
13853         : [m] "r" (m), [mp] "r" (mp)
13854         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
13855     );
13856 
13857     sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
13858 }
13859 
13860 /* Multiply two Montogmery form numbers mod the modulus (prime).
13861  * (r = a * b mod m)
13862  *
13863  * r   Result of multiplication.
13864  * a   First number to multiply in Montogmery form.
13865  * b   Second number to multiply in Montogmery form.
13866  * m   Modulus (prime).
13867  * mp  Montogmery mulitplier.
13868  */
13869 static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b,
13870         const sp_digit* m, sp_digit mp)
13871 {
13872     sp_4096_mul_128(r, a, b);
13873     sp_4096_mont_reduce_128(r, m, mp);
13874 }
13875 
13876 /* Square the Montgomery form number. (r = a * a mod m)
13877  *
13878  * r   Result of squaring.
13879  * a   Number to square in Montogmery form.
13880  * m   Modulus (prime).
13881  * mp  Montogmery mulitplier.
13882  */
13883 static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, const sp_digit* m,
13884         sp_digit mp)
13885 {
13886     sp_4096_sqr_128(r, a);
13887     sp_4096_mont_reduce_128(r, m, mp);
13888 }
13889 
13890 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
13891  *
13892  * d1   The high order half of the number to divide.
13893  * d0   The low order half of the number to divide.
13894  * div  The dividend.
13895  * returns the result of the division.
13896  *
13897  * Note that this is an approximate div. It may give an answer 1 larger.
13898  */
13899 SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0,
13900         sp_digit div)
13901 {
13902     sp_digit r = 0;
13903 
13904     __asm__ __volatile__ (
13905         "lsr    r5, %[div], #1\n\t"
13906         "add    r5, #1\n\t"
13907         "mov    r8, %[d0]\n\t"
13908         "mov    r9, %[d1]\n\t"
13909         "# Do top 32\n\t"
13910         "mov    r6, r5\n\t"
13911         "sub    r6, %[d1]\n\t"
13912         "sbc    r6, r6\n\t"
13913         "add    %[r], %[r]\n\t"
13914         "sub    %[r], r6\n\t"
13915         "and    r6, r5\n\t"
13916         "sub    %[d1], r6\n\t"
13917         "# Next 30 bits\n\t"
13918         "mov    r4, #29\n\t"
13919         "1:\n\t"
13920         "lsl    %[d0], %[d0], #1\n\t"
13921         "adc    %[d1], %[d1]\n\t"
13922         "mov    r6, r5\n\t"
13923         "sub    r6, %[d1]\n\t"
13924         "sbc    r6, r6\n\t"
13925         "add    %[r], %[r]\n\t"
13926         "sub    %[r], r6\n\t"
13927         "and    r6, r5\n\t"
13928         "sub    %[d1], r6\n\t"
13929         "sub    r4, #1\n\t"
13930         "bpl    1b\n\t"
13931         "mov    r7, #0\n\t"
13932         "add    %[r], %[r]\n\t"
13933         "add    %[r], #1\n\t"
13934         "# r * div - Start\n\t"
13935         "lsl    %[d1], %[r], #16\n\t"
13936         "lsl    r4, %[div], #16\n\t"
13937         "lsr    %[d1], %[d1], #16\n\t"
13938         "lsr    r4, r4, #16\n\t"
13939         "mul    r4, %[d1]\n\t"
13940         "lsr    r6, %[div], #16\n\t"
13941         "mul    %[d1], r6\n\t"
13942         "lsr    r5, %[d1], #16\n\t"
13943         "lsl    %[d1], %[d1], #16\n\t"
13944         "add    r4, %[d1]\n\t"
13945         "adc    r5, r7\n\t"
13946         "lsr    %[d1], %[r], #16\n\t"
13947         "mul    r6, %[d1]\n\t"
13948         "add    r5, r6\n\t"
13949         "lsl    r6, %[div], #16\n\t"
13950         "lsr    r6, r6, #16\n\t"
13951         "mul    %[d1], r6\n\t"
13952         "lsr    r6, %[d1], #16\n\t"
13953         "lsl    %[d1], %[d1], #16\n\t"
13954         "add    r4, %[d1]\n\t"
13955         "adc    r5, r6\n\t"
13956         "# r * div - Done\n\t"
13957         "mov    %[d1], r8\n\t"
13958         "sub    %[d1], r4\n\t"
13959         "mov    r4, %[d1]\n\t"
13960         "mov    %[d1], r9\n\t"
13961         "sbc    %[d1], r5\n\t"
13962         "mov    r5, %[d1]\n\t"
13963         "add    %[r], r5\n\t"
13964         "# r * div - Start\n\t"
13965         "lsl    %[d1], %[r], #16\n\t"
13966         "lsl    r4, %[div], #16\n\t"
13967         "lsr    %[d1], %[d1], #16\n\t"
13968         "lsr    r4, r4, #16\n\t"
13969         "mul    r4, %[d1]\n\t"
13970         "lsr    r6, %[div], #16\n\t"
13971         "mul    %[d1], r6\n\t"
13972         "lsr    r5, %[d1], #16\n\t"
13973         "lsl    %[d1], %[d1], #16\n\t"
13974         "add    r4, %[d1]\n\t"
13975         "adc    r5, r7\n\t"
13976         "lsr    %[d1], %[r], #16\n\t"
13977         "mul    r6, %[d1]\n\t"
13978         "add    r5, r6\n\t"
13979         "lsl    r6, %[div], #16\n\t"
13980         "lsr    r6, r6, #16\n\t"
13981         "mul    %[d1], r6\n\t"
13982         "lsr    r6, %[d1], #16\n\t"
13983         "lsl    %[d1], %[d1], #16\n\t"
13984         "add    r4, %[d1]\n\t"
13985         "adc    r5, r6\n\t"
13986         "# r * div - Done\n\t"
13987         "mov    %[d1], r8\n\t"
13988         "mov    r6, r9\n\t"
13989         "sub    r4, %[d1], r4\n\t"
13990         "sbc    r6, r5\n\t"
13991         "mov    r5, r6\n\t"
13992         "add    %[r], r5\n\t"
13993         "# r * div - Start\n\t"
13994         "lsl    %[d1], %[r], #16\n\t"
13995         "lsl    r4, %[div], #16\n\t"
13996         "lsr    %[d1], %[d1], #16\n\t"
13997         "lsr    r4, r4, #16\n\t"
13998         "mul    r4, %[d1]\n\t"
13999         "lsr    r6, %[div], #16\n\t"
14000         "mul    %[d1], r6\n\t"
14001         "lsr    r5, %[d1], #16\n\t"
14002         "lsl    %[d1], %[d1], #16\n\t"
14003         "add    r4, %[d1]\n\t"
14004         "adc    r5, r7\n\t"
14005         "lsr    %[d1], %[r], #16\n\t"
14006         "mul    r6, %[d1]\n\t"
14007         "add    r5, r6\n\t"
14008         "lsl    r6, %[div], #16\n\t"
14009         "lsr    r6, r6, #16\n\t"
14010         "mul    %[d1], r6\n\t"
14011         "lsr    r6, %[d1], #16\n\t"
14012         "lsl    %[d1], %[d1], #16\n\t"
14013         "add    r4, %[d1]\n\t"
14014         "adc    r5, r6\n\t"
14015         "# r * div - Done\n\t"
14016         "mov    %[d1], r8\n\t"
14017         "mov    r6, r9\n\t"
14018         "sub    r4, %[d1], r4\n\t"
14019         "sbc    r6, r5\n\t"
14020         "mov    r5, r6\n\t"
14021         "add    %[r], r5\n\t"
14022         "mov    r6, %[div]\n\t"
14023         "sub    r6, r4\n\t"
14024         "sbc    r6, r6\n\t"
14025         "sub    %[r], r6\n\t"
14026         : [r] "+r" (r)
14027         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
14028         : "r4", "r5", "r7", "r6", "r8", "r9"
14029     );
14030     return r;
14031 }
14032 
14033 /* AND m into each word of a and store in r.
14034  *
14035  * r  A single precision integer.
14036  * a  A single precision integer.
14037  * m  Mask to AND against each digit.
14038  */
14039 static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
14040 {
14041 #ifdef WOLFSSL_SP_SMALL
14042     int i;
14043 
14044     for (i=0; i<128; i++) {
14045         r[i] = a[i] & m;
14046     }
14047 #else
14048     int i;
14049 
14050     for (i = 0; i < 128; i += 8) {
14051         r[i+0] = a[i+0] & m;
14052         r[i+1] = a[i+1] & m;
14053         r[i+2] = a[i+2] & m;
14054         r[i+3] = a[i+3] & m;
14055         r[i+4] = a[i+4] & m;
14056         r[i+5] = a[i+5] & m;
14057         r[i+6] = a[i+6] & m;
14058         r[i+7] = a[i+7] & m;
14059     }
14060 #endif
14061 }
14062 
14063 /* Compare a with b in constant time.
14064  *
14065  * a  A single precision integer.
14066  * b  A single precision integer.
14067  * return -ve, 0 or +ve if a is less than, equal to or greater than b
14068  * respectively.
14069  */
14070 SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
14071 {
14072     sp_digit r = 0;
14073 
14074 
14075     __asm__ __volatile__ (
14076         "mov    r3, #0\n\t"
14077         "mvn    r3, r3\n\t"
14078         "mov    r6, #1\n\t"
14079         "lsl    r6, r6, #8\n\t"
14080         "add    r6, #252\n\t"
14081         "1:\n\t"
14082         "ldr    r7, [%[a], r6]\n\t"
14083         "ldr    r5, [%[b], r6]\n\t"
14084         "and    r7, r3\n\t"
14085         "and    r5, r3\n\t"
14086         "mov    r4, r7\n\t"
14087         "sub    r7, r5\n\t"
14088         "sbc    r7, r7\n\t"
14089         "add    %[r], r7\n\t"
14090         "mvn    r7, r7\n\t"
14091         "and    r3, r7\n\t"
14092         "sub    r5, r4\n\t"
14093         "sbc    r7, r7\n\t"
14094         "sub    %[r], r7\n\t"
14095         "mvn    r7, r7\n\t"
14096         "and    r3, r7\n\t"
14097         "sub    r6, #4\n\t"
14098         "cmp    r6, #0\n\t"
14099         "bge    1b\n\t"
14100         : [r] "+r" (r)
14101         : [a] "r" (a), [b] "r" (b)
14102         : "r3", "r4", "r5", "r6", "r7"
14103     );
14104 
14105     return r;
14106 }
14107 
14108 /* Divide d in a and put remainder into r (m*d + r = a)
14109  * m is not calculated as it is not needed at this time.
14110  *
14111  * a  Nmber to be divided.
14112  * d  Number to divide with.
14113  * m  Multiplier result.
14114  * r  Remainder from the division.
14115  * returns MP_OKAY indicating success.
14116  */
14117 static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
14118         sp_digit* r)
14119 {
14120     sp_digit t1[256], t2[129];
14121     sp_digit div, r1;
14122     int i;
14123 
14124     (void)m;
14125 
14126     div = d[127];
14127     XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
14128     for (i=127; i>=0; i--) {
14129         r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
14130 
14131         sp_4096_mul_d_128(t2, d, r1);
14132         t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
14133         t1[128 + i] -= t2[128];
14134         sp_4096_mask_128(t2, d, t1[128 + i]);
14135         t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
14136         sp_4096_mask_128(t2, d, t1[128 + i]);
14137         t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
14138     }
14139 
14140     r1 = sp_4096_cmp_128(t1, d) >= 0;
14141     sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
14142 
14143     return MP_OKAY;
14144 }
14145 
14146 /* Reduce a modulo m into r. (r = a mod m)
14147  *
14148  * r  A single precision number that is the reduced result.
14149  * a  A single precision number that is to be reduced.
14150  * m  A single precision number that is the modulus to reduce with.
14151  * returns MP_OKAY indicating success.
14152  */
14153 static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
14154 {
14155     return sp_4096_div_128(a, m, NULL, r);
14156 }
14157 
14158 /* Divide d in a and put remainder into r (m*d + r = a)
14159  * m is not calculated as it is not needed at this time.
14160  *
14161  * a  Nmber to be divided.
14162  * d  Number to divide with.
14163  * m  Multiplier result.
14164  * r  Remainder from the division.
14165  * returns MP_OKAY indicating success.
14166  */
14167 static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
14168         sp_digit* r)
14169 {
14170     sp_digit t1[256], t2[129];
14171     sp_digit div, r1;
14172     int i;
14173 
14174     (void)m;
14175 
14176     div = d[127];
14177     XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
14178     for (i=127; i>=0; i--) {
14179         r1 = div_4096_word_128(t1[128 + i], t1[128 + i - 1], div);
14180 
14181         sp_4096_mul_d_128(t2, d, r1);
14182         t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
14183         t1[128 + i] -= t2[128];
14184         if (t1[128 + i] != 0) {
14185             t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
14186             if (t1[128 + i] != 0)
14187                 t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
14188         }
14189     }
14190 
14191     r1 = sp_4096_cmp_128(t1, d) >= 0;
14192     sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
14193 
14194     return MP_OKAY;
14195 }
14196 
14197 /* Reduce a modulo m into r. (r = a mod m)
14198  *
14199  * r  A single precision number that is the reduced result.
14200  * a  A single precision number that is to be reduced.
14201  * m  A single precision number that is the modulus to reduce with.
14202  * returns MP_OKAY indicating success.
14203  */
14204 static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
14205 {
14206     return sp_4096_div_128_cond(a, m, NULL, r);
14207 }
14208 
14209 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
14210                                                      defined(WOLFSSL_HAVE_SP_DH)
14211 #ifdef WOLFSSL_SP_SMALL
14212 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
14213  *
14214  * r     A single precision number that is the result of the operation.
14215  * a     A single precision number being exponentiated.
14216  * e     A single precision number that is the exponent.
14217  * bits  The number of bits in the exponent.
14218  * m     A single precision number that is the modulus.
14219  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
14220  */
14221 static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
14222         int bits, const sp_digit* m, int reduceA)
14223 {
14224 #ifndef WOLFSSL_SMALL_STACK
14225     sp_digit t[16][256];
14226 #else
14227     sp_digit* t[16];
14228     sp_digit* td;
14229 #endif
14230     sp_digit* norm;
14231     sp_digit mp = 1;
14232     sp_digit n;
14233     sp_digit mask;
14234     int i;
14235     int c, y;
14236     int err = MP_OKAY;
14237 
14238 #ifdef WOLFSSL_SMALL_STACK
14239     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 256, NULL,
14240                             DYNAMIC_TYPE_TMP_BUFFER);
14241     if (td == NULL) {
14242         err = MEMORY_E;
14243     }
14244 #endif
14245 
14246     if (err == MP_OKAY) {
14247 #ifdef WOLFSSL_SMALL_STACK
14248         for (i=0; i<16; i++) {
14249             t[i] = td + i * 256;
14250         }
14251 #endif
14252         norm = t[0];
14253 
14254         sp_4096_mont_setup(m, &mp);
14255         sp_4096_mont_norm_128(norm, m);
14256 
14257         XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
14258         if (reduceA != 0) {
14259             err = sp_4096_mod_128(t[1] + 128, a, m);
14260             if (err == MP_OKAY) {
14261                 err = sp_4096_mod_128(t[1], t[1], m);
14262             }
14263         }
14264         else {
14265             XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
14266             err = sp_4096_mod_128(t[1], t[1], m);
14267         }
14268     }
14269 
14270     if (err == MP_OKAY) {
14271         sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
14272         sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
14273         sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
14274         sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
14275         sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
14276         sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
14277         sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
14278         sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
14279         sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
14280         sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
14281         sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
14282         sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
14283         sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
14284         sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
14285 
14286         i = (bits - 1) / 32;
14287         n = e[i--];
14288         c = bits & 31;
14289         if (c == 0) {
14290             c = 32;
14291         }
14292         c -= bits % 4;
14293         if (c == 32) {
14294             c = 28;
14295         }
14296         y = (int)(n >> c);
14297         n <<= 32 - c;
14298         XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
14299         for (; i>=0 || c>=4; ) {
14300             if (c == 0) {
14301                 n = e[i--];
14302                 y = n >> 28;
14303                 n <<= 4;
14304                 c = 28;
14305             }
14306             else if (c < 4) {
14307                 y = n >> 28;
14308                 n = e[i--];
14309                 c = 4 - c;
14310                 y |= n >> (32 - c);
14311                 n <<= c;
14312                 c = 32 - c;
14313             }
14314             else {
14315                 y = (n >> 28) & 0xf;
14316                 n <<= 4;
14317                 c -= 4;
14318             }
14319 
14320             sp_4096_mont_sqr_128(r, r, m, mp);
14321             sp_4096_mont_sqr_128(r, r, m, mp);
14322             sp_4096_mont_sqr_128(r, r, m, mp);
14323             sp_4096_mont_sqr_128(r, r, m, mp);
14324 
14325             sp_4096_mont_mul_128(r, r, t[y], m, mp);
14326         }
14327 
14328         XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
14329         sp_4096_mont_reduce_128(r, m, mp);
14330 
14331         mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
14332         sp_4096_cond_sub_128(r, r, m, mask);
14333     }
14334 
14335 #ifdef WOLFSSL_SMALL_STACK
14336     if (td != NULL) {
14337         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
14338     }
14339 #endif
14340 
14341     return err;
14342 }
14343 #else
14344 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
14345  *
14346  * r     A single precision number that is the result of the operation.
14347  * a     A single precision number being exponentiated.
14348  * e     A single precision number that is the exponent.
14349  * bits  The number of bits in the exponent.
14350  * m     A single precision number that is the modulus.
14351  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
14352  */
14353 static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
14354         int bits, const sp_digit* m, int reduceA)
14355 {
14356 #ifndef WOLFSSL_SMALL_STACK
14357     sp_digit t[32][256];
14358 #else
14359     sp_digit* t[32];
14360     sp_digit* td;
14361 #endif
14362     sp_digit* norm;
14363     sp_digit mp = 1;
14364     sp_digit n;
14365     sp_digit mask;
14366     int i;
14367     int c, y;
14368     int err = MP_OKAY;
14369 
14370 #ifdef WOLFSSL_SMALL_STACK
14371     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 256, NULL,
14372                             DYNAMIC_TYPE_TMP_BUFFER);
14373     if (td == NULL) {
14374         err = MEMORY_E;
14375     }
14376 #endif
14377 
14378     if (err == MP_OKAY) {
14379 #ifdef WOLFSSL_SMALL_STACK
14380         for (i=0; i<32; i++) {
14381             t[i] = td + i * 256;
14382         }
14383 #endif
14384         norm = t[0];
14385 
14386         sp_4096_mont_setup(m, &mp);
14387         sp_4096_mont_norm_128(norm, m);
14388 
14389         XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
14390         if (reduceA != 0) {
14391             err = sp_4096_mod_128(t[1] + 128, a, m);
14392             if (err == MP_OKAY) {
14393                 err = sp_4096_mod_128(t[1], t[1], m);
14394             }
14395         }
14396         else {
14397             XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
14398             err = sp_4096_mod_128(t[1], t[1], m);
14399         }
14400     }
14401 
14402     if (err == MP_OKAY) {
14403         sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
14404         sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
14405         sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
14406         sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
14407         sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
14408         sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
14409         sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
14410         sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
14411         sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
14412         sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
14413         sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
14414         sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
14415         sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
14416         sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
14417         sp_4096_mont_sqr_128(t[16], t[ 8], m, mp);
14418         sp_4096_mont_mul_128(t[17], t[ 9], t[ 8], m, mp);
14419         sp_4096_mont_sqr_128(t[18], t[ 9], m, mp);
14420         sp_4096_mont_mul_128(t[19], t[10], t[ 9], m, mp);
14421         sp_4096_mont_sqr_128(t[20], t[10], m, mp);
14422         sp_4096_mont_mul_128(t[21], t[11], t[10], m, mp);
14423         sp_4096_mont_sqr_128(t[22], t[11], m, mp);
14424         sp_4096_mont_mul_128(t[23], t[12], t[11], m, mp);
14425         sp_4096_mont_sqr_128(t[24], t[12], m, mp);
14426         sp_4096_mont_mul_128(t[25], t[13], t[12], m, mp);
14427         sp_4096_mont_sqr_128(t[26], t[13], m, mp);
14428         sp_4096_mont_mul_128(t[27], t[14], t[13], m, mp);
14429         sp_4096_mont_sqr_128(t[28], t[14], m, mp);
14430         sp_4096_mont_mul_128(t[29], t[15], t[14], m, mp);
14431         sp_4096_mont_sqr_128(t[30], t[15], m, mp);
14432         sp_4096_mont_mul_128(t[31], t[16], t[15], m, mp);
14433 
14434         i = (bits - 1) / 32;
14435         n = e[i--];
14436         c = bits & 31;
14437         if (c == 0) {
14438             c = 32;
14439         }
14440         c -= bits % 5;
14441         if (c == 32) {
14442             c = 27;
14443         }
14444         y = (int)(n >> c);
14445         n <<= 32 - c;
14446         XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
14447         for (; i>=0 || c>=5; ) {
14448             if (c == 0) {
14449                 n = e[i--];
14450                 y = n >> 27;
14451                 n <<= 5;
14452                 c = 27;
14453             }
14454             else if (c < 5) {
14455                 y = n >> 27;
14456                 n = e[i--];
14457                 c = 5 - c;
14458                 y |= n >> (32 - c);
14459                 n <<= c;
14460                 c = 32 - c;
14461             }
14462             else {
14463                 y = (n >> 27) & 0x1f;
14464                 n <<= 5;
14465                 c -= 5;
14466             }
14467 
14468             sp_4096_mont_sqr_128(r, r, m, mp);
14469             sp_4096_mont_sqr_128(r, r, m, mp);
14470             sp_4096_mont_sqr_128(r, r, m, mp);
14471             sp_4096_mont_sqr_128(r, r, m, mp);
14472             sp_4096_mont_sqr_128(r, r, m, mp);
14473 
14474             sp_4096_mont_mul_128(r, r, t[y], m, mp);
14475         }
14476 
14477         XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
14478         sp_4096_mont_reduce_128(r, m, mp);
14479 
14480         mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
14481         sp_4096_cond_sub_128(r, r, m, mask);
14482     }
14483 
14484 #ifdef WOLFSSL_SMALL_STACK
14485     if (td != NULL) {
14486         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
14487     }
14488 #endif
14489 
14490     return err;
14491 }
14492 #endif /* WOLFSSL_SP_SMALL */
14493 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
14494 
14495 #ifdef WOLFSSL_HAVE_SP_RSA
14496 /* RSA public key operation.
14497  *
14498  * in      Array of bytes representing the number to exponentiate, base.
14499  * inLen   Number of bytes in base.
14500  * em      Public exponent.
14501  * mm      Modulus.
14502  * out     Buffer to hold big-endian bytes of exponentiation result.
14503  *         Must be at least 512 bytes long.
14504  * outLen  Number of bytes in result.
14505  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
14506  * an array is too long and MEMORY_E when dynamic memory allocation fails.
14507  */
14508 int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
14509     byte* out, word32* outLen)
14510 {
14511 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
14512     sp_digit a[256], m[128], r[256];
14513 #else
14514     sp_digit* d = NULL;
14515     sp_digit* a;
14516     sp_digit* m;
14517     sp_digit* r;
14518 #endif
14519     sp_digit *ah;
14520     sp_digit e[1];
14521     int err = MP_OKAY;
14522 
14523     if (*outLen < 512)
14524         err = MP_TO_E;
14525     if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 512 ||
14526                                                      mp_count_bits(mm) != 4096))
14527         err = MP_READ_E;
14528 
14529 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
14530     if (err == MP_OKAY) {
14531         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
14532                                                               DYNAMIC_TYPE_RSA);
14533         if (d == NULL)
14534             err = MEMORY_E;
14535     }
14536 
14537     if (err == MP_OKAY) {
14538         a = d;
14539         r = a + 128 * 2;
14540         m = r + 128 * 2;
14541     }
14542 #endif
14543 
14544     if (err == MP_OKAY) {
14545         ah = a + 128;
14546 
14547         sp_4096_from_bin(ah, 128, in, inLen);
14548 #if DIGIT_BIT >= 32
14549         e[0] = em->dp[0];
14550 #else
14551         e[0] = em->dp[0];
14552         if (em->used > 1) {
14553             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
14554         }
14555 #endif
14556         if (e[0] == 0) {
14557             err = MP_EXPTMOD_E;
14558         }
14559     }
14560     if (err == MP_OKAY) {
14561         sp_4096_from_mp(m, 128, mm);
14562 
14563         if (e[0] == 0x3) {
14564             if (err == MP_OKAY) {
14565                 sp_4096_sqr_128(r, ah);
14566                 err = sp_4096_mod_128_cond(r, r, m);
14567             }
14568             if (err == MP_OKAY) {
14569                 sp_4096_mul_128(r, ah, r);
14570                 err = sp_4096_mod_128_cond(r, r, m);
14571             }
14572         }
14573         else {
14574             int i;
14575             sp_digit mp;
14576 
14577             sp_4096_mont_setup(m, &mp);
14578 
14579             /* Convert to Montgomery form. */
14580             XMEMSET(a, 0, sizeof(sp_digit) * 128);
14581             err = sp_4096_mod_128_cond(a, a, m);
14582 
14583             if (err == MP_OKAY) {
14584                 for (i = 31; i >= 0; i--) {
14585                     if (e[0] >> i) {
14586                         break;
14587                     }
14588                 }
14589 
14590                 XMEMCPY(r, a, sizeof(sp_digit) * 128);
14591                 for (i--; i>=0; i--) {
14592                     sp_4096_mont_sqr_128(r, r, m, mp);
14593                     if (((e[0] >> i) & 1) == 1) {
14594                         sp_4096_mont_mul_128(r, r, a, m, mp);
14595                     }
14596                 }
14597                 XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
14598                 sp_4096_mont_reduce_128(r, m, mp);
14599 
14600                 for (i = 127; i > 0; i--) {
14601                     if (r[i] != m[i]) {
14602                         break;
14603                     }
14604                 }
14605                 if (r[i] >= m[i]) {
14606                     sp_4096_sub_in_place_128(r, m);
14607                 }
14608             }
14609         }
14610     }
14611 
14612     if (err == MP_OKAY) {
14613         sp_4096_to_bin(r, out);
14614         *outLen = 512;
14615     }
14616 
14617 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
14618     if (d != NULL) {
14619         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
14620     }
14621 #endif
14622 
14623     return err;
14624 }
14625 
14626 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
14627     sp_digit* a;
14628     sp_digit* d = NULL;
14629     sp_digit* m;
14630     sp_digit* r;
14631     int err = MP_OKAY;
14632 
14633     (void)pm;
14634     (void)qm;
14635     (void)dpm;
14636     (void)dqm;
14637     (void)qim;
14638 
14639     if (*outLen < 512U) {
14640         err = MP_TO_E;
14641     }
14642     if (err == MP_OKAY) {
14643         if (mp_count_bits(dm) > 4096) {
14644            err = MP_READ_E;
14645         }
14646         if (inLen > 512) {
14647             err = MP_READ_E;
14648         }
14649         if (mp_count_bits(mm) != 4096) {
14650             err = MP_READ_E;
14651         }
14652     }
14653 
14654     if (err == MP_OKAY) {
14655         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
14656                                                               DYNAMIC_TYPE_RSA);
14657         if (d == NULL) {
14658             err = MEMORY_E;
14659         }
14660     }
14661     if (err == MP_OKAY) {
14662         a = d + 128;
14663         m = a + 256;
14664         r = a;
14665 
14666         sp_4096_from_bin(a, 128, in, inLen);
14667         sp_4096_from_mp(d, 128, dm);
14668         sp_4096_from_mp(m, 128, mm);
14669         err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
14670     }
14671     if (err == MP_OKAY) {
14672         sp_4096_to_bin(r, out);
14673         *outLen = 512;
14674     }
14675 
14676     if (d != NULL) {
14677         XMEMSET(d, 0, sizeof(sp_digit) * 128);
14678         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
14679     }
14680 
14681     return err;
14682 #else
14683 #ifndef WOLFSSL_RSA_PUBLIC_ONLY
14684 /* Conditionally add a and b using the mask m.
14685  * m is -1 to add and 0 when not.
14686  *
14687  * r  A single precision number representing conditional add result.
14688  * a  A single precision number to add with.
14689  * b  A single precision number to add.
14690  * m  Mask value to apply.
14691  */
14692 SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
14693         sp_digit m)
14694 {
14695     sp_digit c = 0;
14696 
14697     __asm__ __volatile__ (
14698         "mov    r5, #1\n\t"
14699         "lsl    r5, r5, #8\n\t"
14700         "mov    r8, r5\n\t"
14701         "mov    r7, #0\n\t"
14702         "1:\n\t"
14703         "ldr    r6, [%[b], r7]\n\t"
14704         "and    r6, %[m]\n\t"
14705         "mov    r5, #0\n\t"
14706         "sub    r5, #1\n\t"
14707         "add    r5, %[c]\n\t"
14708         "ldr    r5, [%[a], r7]\n\t"
14709         "adc    r5, r6\n\t"
14710         "mov    %[c], #0\n\t"
14711         "adc    %[c], %[c]\n\t"
14712         "str    r5, [%[r], r7]\n\t"
14713         "add    r7, #4\n\t"
14714         "cmp    r7, r8\n\t"
14715         "blt    1b\n\t"
14716         : [c] "+r" (c)
14717         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
14718         : "memory", "r5", "r6", "r7", "r8"
14719     );
14720 
14721     return c;
14722 }
14723 
14724 /* RSA private key operation.
14725  *
14726  * in      Array of bytes representing the number to exponentiate, base.
14727  * inLen   Number of bytes in base.
14728  * dm      Private exponent.
14729  * pm      First prime.
14730  * qm      Second prime.
14731  * dpm     First prime's CRT exponent.
14732  * dqm     Second prime's CRT exponent.
14733  * qim     Inverse of second prime mod p.
14734  * mm      Modulus.
14735  * out     Buffer to hold big-endian bytes of exponentiation result.
14736  *         Must be at least 512 bytes long.
14737  * outLen  Number of bytes in result.
14738  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
14739  * an array is too long and MEMORY_E when dynamic memory allocation fails.
14740  */
14741 int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm,
14742     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
14743     byte* out, word32* outLen)
14744 {
14745 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
14746     sp_digit a[128 * 2];
14747     sp_digit p[64], q[64], dp[64];
14748     sp_digit tmpa[128], tmpb[128];
14749 #else
14750     sp_digit* t = NULL;
14751     sp_digit* a;
14752     sp_digit* p;
14753     sp_digit* q;
14754     sp_digit* dp;
14755     sp_digit* tmpa;
14756     sp_digit* tmpb;
14757 #endif
14758     sp_digit* r;
14759     sp_digit* qi;
14760     sp_digit* dq;
14761     sp_digit c;
14762     int err = MP_OKAY;
14763 
14764     (void)dm;
14765     (void)mm;
14766 
14767     if (*outLen < 512)
14768         err = MP_TO_E;
14769     if (err == MP_OKAY && (inLen > 512 || mp_count_bits(mm) != 4096))
14770         err = MP_READ_E;
14771 
14772 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
14773     if (err == MP_OKAY) {
14774         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
14775                                                               DYNAMIC_TYPE_RSA);
14776         if (t == NULL)
14777             err = MEMORY_E;
14778     }
14779     if (err == MP_OKAY) {
14780         a = t;
14781         p = a + 128 * 2;
14782         q = p + 64;
14783         qi = dq = dp = q + 64;
14784         tmpa = qi + 64;
14785         tmpb = tmpa + 128;
14786 
14787         r = t + 128;
14788     }
14789 #else
14790 #endif
14791 
14792     if (err == MP_OKAY) {
14793 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
14794         r = a;
14795         qi = dq = dp;
14796 #endif
14797         sp_4096_from_bin(a, 128, in, inLen);
14798         sp_4096_from_mp(p, 64, pm);
14799         sp_4096_from_mp(q, 64, qm);
14800         sp_4096_from_mp(dp, 64, dpm);
14801 
14802         err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
14803     }
14804     if (err == MP_OKAY) {
14805         sp_4096_from_mp(dq, 64, dqm);
14806         err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
14807     }
14808 
14809     if (err == MP_OKAY) {
14810         c = sp_2048_sub_in_place_64(tmpa, tmpb);
14811         c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
14812         sp_4096_cond_add_64(tmpa, tmpa, p, c);
14813 
14814         sp_2048_from_mp(qi, 64, qim);
14815         sp_2048_mul_64(tmpa, tmpa, qi);
14816         err = sp_2048_mod_64(tmpa, tmpa, p);
14817     }
14818 
14819     if (err == MP_OKAY) {
14820         sp_2048_mul_64(tmpa, q, tmpa);
14821         XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
14822         sp_4096_add_128(r, tmpb, tmpa);
14823 
14824         sp_4096_to_bin(r, out);
14825         *outLen = 512;
14826     }
14827 
14828 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
14829     if (t != NULL) {
14830         XMEMSET(t, 0, sizeof(sp_digit) * 64 * 11);
14831         XFREE(t, NULL, DYNAMIC_TYPE_RSA);
14832     }
14833 #else
14834     XMEMSET(tmpa, 0, sizeof(tmpa));
14835     XMEMSET(tmpb, 0, sizeof(tmpb));
14836     XMEMSET(p,    0, sizeof(p));
14837     XMEMSET(q,    0, sizeof(q));
14838     XMEMSET(dp,   0, sizeof(dp));
14839 #endif
14840 
14841     return err;
14842 }
14843 #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
14844 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
14845 #endif /* WOLFSSL_HAVE_SP_RSA */
14846 #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
14847                                               !defined(WOLFSSL_RSA_PUBLIC_ONLY))
14848 /* Convert an array of sp_digit to an mp_int.
14849  *
14850  * a  A single precision integer.
14851  * r  A multi-precision integer.
14852  */
14853 static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
14854 {
14855     int err;
14856 
14857     err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
14858     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
14859 #if DIGIT_BIT == 32
14860         XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
14861         r->used = 128;
14862         mp_clamp(r);
14863 #elif DIGIT_BIT < 32
14864         int i, j = 0, s = 0;
14865 
14866         r->dp[0] = 0;
14867         for (i = 0; i < 128; i++) {
14868             r->dp[j] |= (mp_digit)(a[i] << s);
14869             r->dp[j] &= (1L << DIGIT_BIT) - 1;
14870             s = DIGIT_BIT - s;
14871             r->dp[++j] = (mp_digit)(a[i] >> s);
14872             while (s + DIGIT_BIT <= 32) {
14873                 s += DIGIT_BIT;
14874                 r->dp[j++] &= (1L << DIGIT_BIT) - 1;
14875                 if (s == SP_WORD_SIZE) {
14876                     r->dp[j] = 0;
14877                 }
14878                 else {
14879                     r->dp[j] = (mp_digit)(a[i] >> s);
14880                 }
14881             }
14882             s = 32 - s;
14883         }
14884         r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
14885         mp_clamp(r);
14886 #else
14887         int i, j = 0, s = 0;
14888 
14889         r->dp[0] = 0;
14890         for (i = 0; i < 128; i++) {
14891             r->dp[j] |= ((mp_digit)a[i]) << s;
14892             if (s + 32 >= DIGIT_BIT) {
14893     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
14894                 r->dp[j] &= (1L << DIGIT_BIT) - 1;
14895     #endif
14896                 s = DIGIT_BIT - s;
14897                 r->dp[++j] = a[i] >> s;
14898                 s = 32 - s;
14899             }
14900             else {
14901                 s += 32;
14902             }
14903         }
14904         r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
14905         mp_clamp(r);
14906 #endif
14907     }
14908 
14909     return err;
14910 }
14911 
14912 /* Perform the modular exponentiation for Diffie-Hellman.
14913  *
14914  * base  Base. MP integer.
14915  * exp   Exponent. MP integer.
14916  * mod   Modulus. MP integer.
14917  * res   Result. MP integer.
14918  * returns 0 on success, MP_READ_E if there are too many bytes in an array
14919  * and MEMORY_E if memory allocation fails.
14920  */
14921 int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
14922 {
14923     int err = MP_OKAY;
14924     sp_digit b[256], e[128], m[128];
14925     sp_digit* r = b;
14926     int expBits = mp_count_bits(exp);
14927 
14928     if (mp_count_bits(base) > 4096) {
14929         err = MP_READ_E;
14930     }
14931 
14932     if (err == MP_OKAY) {
14933         if (expBits > 4096) {
14934             err = MP_READ_E;
14935         }
14936     }
14937 
14938     if (err == MP_OKAY) {
14939         if (mp_count_bits(mod) != 4096) {
14940             err = MP_READ_E;
14941         }
14942     }
14943 
14944     if (err == MP_OKAY) {
14945         sp_4096_from_mp(b, 128, base);
14946         sp_4096_from_mp(e, 128, exp);
14947         sp_4096_from_mp(m, 128, mod);
14948 
14949         err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
14950     }
14951 
14952     if (err == MP_OKAY) {
14953         err = sp_4096_to_mp(r, res);
14954     }
14955 
14956     XMEMSET(e, 0, sizeof(e));
14957 
14958     return err;
14959 }
14960 
14961 #ifdef WOLFSSL_HAVE_SP_DH
14962 
14963 #ifdef HAVE_FFDHE_4096
14964 static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
14965 {
14966     __asm__ __volatile__ (
14967         "mov    r6, #31\n\t"
14968         "sub    r6, r6, %[n]\n\t"
14969         "add    %[a], %[a], #255\n\t"
14970         "add    %[r], %[r], #255\n\t"
14971         "add    %[a], %[a], #193\n\t"
14972         "add    %[r], %[r], #193\n\t"
14973         "ldr    r3, [%[a], #60]\n\t"
14974         "lsr    r4, r3, #1\n\t"
14975         "lsl    r3, r3, %[n]\n\t"
14976         "lsr    r4, r4, r6\n\t"
14977         "ldr    r2, [%[a], #56]\n\t"
14978         "str    r4, [%[r], #64]\n\t"
14979         "lsr    r5, r2, #1\n\t"
14980         "lsl    r2, r2, %[n]\n\t"
14981         "lsr    r5, r5, r6\n\t"
14982         "orr    r3, r3, r5\n\t"
14983         "ldr    r4, [%[a], #52]\n\t"
14984         "str    r3, [%[r], #60]\n\t"
14985         "lsr    r5, r4, #1\n\t"
14986         "lsl    r4, r4, %[n]\n\t"
14987         "lsr    r5, r5, r6\n\t"
14988         "orr    r2, r2, r5\n\t"
14989         "ldr    r3, [%[a], #48]\n\t"
14990         "str    r2, [%[r], #56]\n\t"
14991         "lsr    r5, r3, #1\n\t"
14992         "lsl    r3, r3, %[n]\n\t"
14993         "lsr    r5, r5, r6\n\t"
14994         "orr    r4, r4, r5\n\t"
14995         "ldr    r2, [%[a], #44]\n\t"
14996         "str    r4, [%[r], #52]\n\t"
14997         "lsr    r5, r2, #1\n\t"
14998         "lsl    r2, r2, %[n]\n\t"
14999         "lsr    r5, r5, r6\n\t"
15000         "orr    r3, r3, r5\n\t"
15001         "ldr    r4, [%[a], #40]\n\t"
15002         "str    r3, [%[r], #48]\n\t"
15003         "lsr    r5, r4, #1\n\t"
15004         "lsl    r4, r4, %[n]\n\t"
15005         "lsr    r5, r5, r6\n\t"
15006         "orr    r2, r2, r5\n\t"
15007         "ldr    r3, [%[a], #36]\n\t"
15008         "str    r2, [%[r], #44]\n\t"
15009         "lsr    r5, r3, #1\n\t"
15010         "lsl    r3, r3, %[n]\n\t"
15011         "lsr    r5, r5, r6\n\t"
15012         "orr    r4, r4, r5\n\t"
15013         "ldr    r2, [%[a], #32]\n\t"
15014         "str    r4, [%[r], #40]\n\t"
15015         "lsr    r5, r2, #1\n\t"
15016         "lsl    r2, r2, %[n]\n\t"
15017         "lsr    r5, r5, r6\n\t"
15018         "orr    r3, r3, r5\n\t"
15019         "ldr    r4, [%[a], #28]\n\t"
15020         "str    r3, [%[r], #36]\n\t"
15021         "lsr    r5, r4, #1\n\t"
15022         "lsl    r4, r4, %[n]\n\t"
15023         "lsr    r5, r5, r6\n\t"
15024         "orr    r2, r2, r5\n\t"
15025         "ldr    r3, [%[a], #24]\n\t"
15026         "str    r2, [%[r], #32]\n\t"
15027         "lsr    r5, r3, #1\n\t"
15028         "lsl    r3, r3, %[n]\n\t"
15029         "lsr    r5, r5, r6\n\t"
15030         "orr    r4, r4, r5\n\t"
15031         "ldr    r2, [%[a], #20]\n\t"
15032         "str    r4, [%[r], #28]\n\t"
15033         "lsr    r5, r2, #1\n\t"
15034         "lsl    r2, r2, %[n]\n\t"
15035         "lsr    r5, r5, r6\n\t"
15036         "orr    r3, r3, r5\n\t"
15037         "ldr    r4, [%[a], #16]\n\t"
15038         "str    r3, [%[r], #24]\n\t"
15039         "lsr    r5, r4, #1\n\t"
15040         "lsl    r4, r4, %[n]\n\t"
15041         "lsr    r5, r5, r6\n\t"
15042         "orr    r2, r2, r5\n\t"
15043         "ldr    r3, [%[a], #12]\n\t"
15044         "str    r2, [%[r], #20]\n\t"
15045         "lsr    r5, r3, #1\n\t"
15046         "lsl    r3, r3, %[n]\n\t"
15047         "lsr    r5, r5, r6\n\t"
15048         "orr    r4, r4, r5\n\t"
15049         "ldr    r2, [%[a], #8]\n\t"
15050         "str    r4, [%[r], #16]\n\t"
15051         "lsr    r5, r2, #1\n\t"
15052         "lsl    r2, r2, %[n]\n\t"
15053         "lsr    r5, r5, r6\n\t"
15054         "orr    r3, r3, r5\n\t"
15055         "ldr    r4, [%[a], #4]\n\t"
15056         "str    r3, [%[r], #12]\n\t"
15057         "lsr    r5, r4, #1\n\t"
15058         "lsl    r4, r4, %[n]\n\t"
15059         "lsr    r5, r5, r6\n\t"
15060         "orr    r2, r2, r5\n\t"
15061         "ldr    r3, [%[a], #0]\n\t"
15062         "str    r2, [%[r], #8]\n\t"
15063         "lsr    r5, r3, #1\n\t"
15064         "lsl    r3, r3, %[n]\n\t"
15065         "lsr    r5, r5, r6\n\t"
15066         "orr    r4, r4, r5\n\t"
15067         "sub    %[a], %[a], #64\n\t"
15068         "sub    %[r], %[r], #64\n\t"
15069         "ldr    r2, [%[a], #60]\n\t"
15070         "str    r4, [%[r], #68]\n\t"
15071         "lsr    r5, r2, #1\n\t"
15072         "lsl    r2, r2, %[n]\n\t"
15073         "lsr    r5, r5, r6\n\t"
15074         "orr    r3, r3, r5\n\t"
15075         "ldr    r4, [%[a], #56]\n\t"
15076         "str    r3, [%[r], #64]\n\t"
15077         "lsr    r5, r4, #1\n\t"
15078         "lsl    r4, r4, %[n]\n\t"
15079         "lsr    r5, r5, r6\n\t"
15080         "orr    r2, r2, r5\n\t"
15081         "ldr    r3, [%[a], #52]\n\t"
15082         "str    r2, [%[r], #60]\n\t"
15083         "lsr    r5, r3, #1\n\t"
15084         "lsl    r3, r3, %[n]\n\t"
15085         "lsr    r5, r5, r6\n\t"
15086         "orr    r4, r4, r5\n\t"
15087         "ldr    r2, [%[a], #48]\n\t"
15088         "str    r4, [%[r], #56]\n\t"
15089         "lsr    r5, r2, #1\n\t"
15090         "lsl    r2, r2, %[n]\n\t"
15091         "lsr    r5, r5, r6\n\t"
15092         "orr    r3, r3, r5\n\t"
15093         "ldr    r4, [%[a], #44]\n\t"
15094         "str    r3, [%[r], #52]\n\t"
15095         "lsr    r5, r4, #1\n\t"
15096         "lsl    r4, r4, %[n]\n\t"
15097         "lsr    r5, r5, r6\n\t"
15098         "orr    r2, r2, r5\n\t"
15099         "ldr    r3, [%[a], #40]\n\t"
15100         "str    r2, [%[r], #48]\n\t"
15101         "lsr    r5, r3, #1\n\t"
15102         "lsl    r3, r3, %[n]\n\t"
15103         "lsr    r5, r5, r6\n\t"
15104         "orr    r4, r4, r5\n\t"
15105         "ldr    r2, [%[a], #36]\n\t"
15106         "str    r4, [%[r], #44]\n\t"
15107         "lsr    r5, r2, #1\n\t"
15108         "lsl    r2, r2, %[n]\n\t"
15109         "lsr    r5, r5, r6\n\t"
15110         "orr    r3, r3, r5\n\t"
15111         "ldr    r4, [%[a], #32]\n\t"
15112         "str    r3, [%[r], #40]\n\t"
15113         "lsr    r5, r4, #1\n\t"
15114         "lsl    r4, r4, %[n]\n\t"
15115         "lsr    r5, r5, r6\n\t"
15116         "orr    r2, r2, r5\n\t"
15117         "ldr    r3, [%[a], #28]\n\t"
15118         "str    r2, [%[r], #36]\n\t"
15119         "lsr    r5, r3, #1\n\t"
15120         "lsl    r3, r3, %[n]\n\t"
15121         "lsr    r5, r5, r6\n\t"
15122         "orr    r4, r4, r5\n\t"
15123         "ldr    r2, [%[a], #24]\n\t"
15124         "str    r4, [%[r], #32]\n\t"
15125         "lsr    r5, r2, #1\n\t"
15126         "lsl    r2, r2, %[n]\n\t"
15127         "lsr    r5, r5, r6\n\t"
15128         "orr    r3, r3, r5\n\t"
15129         "ldr    r4, [%[a], #20]\n\t"
15130         "str    r3, [%[r], #28]\n\t"
15131         "lsr    r5, r4, #1\n\t"
15132         "lsl    r4, r4, %[n]\n\t"
15133         "lsr    r5, r5, r6\n\t"
15134         "orr    r2, r2, r5\n\t"
15135         "ldr    r3, [%[a], #16]\n\t"
15136         "str    r2, [%[r], #24]\n\t"
15137         "lsr    r5, r3, #1\n\t"
15138         "lsl    r3, r3, %[n]\n\t"
15139         "lsr    r5, r5, r6\n\t"
15140         "orr    r4, r4, r5\n\t"
15141         "ldr    r2, [%[a], #12]\n\t"
15142         "str    r4, [%[r], #20]\n\t"
15143         "lsr    r5, r2, #1\n\t"
15144         "lsl    r2, r2, %[n]\n\t"
15145         "lsr    r5, r5, r6\n\t"
15146         "orr    r3, r3, r5\n\t"
15147         "ldr    r4, [%[a], #8]\n\t"
15148         "str    r3, [%[r], #16]\n\t"
15149         "lsr    r5, r4, #1\n\t"
15150         "lsl    r4, r4, %[n]\n\t"
15151         "lsr    r5, r5, r6\n\t"
15152         "orr    r2, r2, r5\n\t"
15153         "ldr    r3, [%[a], #4]\n\t"
15154         "str    r2, [%[r], #12]\n\t"
15155         "lsr    r5, r3, #1\n\t"
15156         "lsl    r3, r3, %[n]\n\t"
15157         "lsr    r5, r5, r6\n\t"
15158         "orr    r4, r4, r5\n\t"
15159         "ldr    r2, [%[a], #0]\n\t"
15160         "str    r4, [%[r], #8]\n\t"
15161         "lsr    r5, r2, #1\n\t"
15162         "lsl    r2, r2, %[n]\n\t"
15163         "lsr    r5, r5, r6\n\t"
15164         "orr    r3, r3, r5\n\t"
15165         "sub    %[a], %[a], #64\n\t"
15166         "sub    %[r], %[r], #64\n\t"
15167         "ldr    r4, [%[a], #60]\n\t"
15168         "str    r3, [%[r], #68]\n\t"
15169         "lsr    r5, r4, #1\n\t"
15170         "lsl    r4, r4, %[n]\n\t"
15171         "lsr    r5, r5, r6\n\t"
15172         "orr    r2, r2, r5\n\t"
15173         "ldr    r3, [%[a], #56]\n\t"
15174         "str    r2, [%[r], #64]\n\t"
15175         "lsr    r5, r3, #1\n\t"
15176         "lsl    r3, r3, %[n]\n\t"
15177         "lsr    r5, r5, r6\n\t"
15178         "orr    r4, r4, r5\n\t"
15179         "ldr    r2, [%[a], #52]\n\t"
15180         "str    r4, [%[r], #60]\n\t"
15181         "lsr    r5, r2, #1\n\t"
15182         "lsl    r2, r2, %[n]\n\t"
15183         "lsr    r5, r5, r6\n\t"
15184         "orr    r3, r3, r5\n\t"
15185         "ldr    r4, [%[a], #48]\n\t"
15186         "str    r3, [%[r], #56]\n\t"
15187         "lsr    r5, r4, #1\n\t"
15188         "lsl    r4, r4, %[n]\n\t"
15189         "lsr    r5, r5, r6\n\t"
15190         "orr    r2, r2, r5\n\t"
15191         "ldr    r3, [%[a], #44]\n\t"
15192         "str    r2, [%[r], #52]\n\t"
15193         "lsr    r5, r3, #1\n\t"
15194         "lsl    r3, r3, %[n]\n\t"
15195         "lsr    r5, r5, r6\n\t"
15196         "orr    r4, r4, r5\n\t"
15197         "ldr    r2, [%[a], #40]\n\t"
15198         "str    r4, [%[r], #48]\n\t"
15199         "lsr    r5, r2, #1\n\t"
15200         "lsl    r2, r2, %[n]\n\t"
15201         "lsr    r5, r5, r6\n\t"
15202         "orr    r3, r3, r5\n\t"
15203         "ldr    r4, [%[a], #36]\n\t"
15204         "str    r3, [%[r], #44]\n\t"
15205         "lsr    r5, r4, #1\n\t"
15206         "lsl    r4, r4, %[n]\n\t"
15207         "lsr    r5, r5, r6\n\t"
15208         "orr    r2, r2, r5\n\t"
15209         "ldr    r3, [%[a], #32]\n\t"
15210         "str    r2, [%[r], #40]\n\t"
15211         "lsr    r5, r3, #1\n\t"
15212         "lsl    r3, r3, %[n]\n\t"
15213         "lsr    r5, r5, r6\n\t"
15214         "orr    r4, r4, r5\n\t"
15215         "ldr    r2, [%[a], #28]\n\t"
15216         "str    r4, [%[r], #36]\n\t"
15217         "lsr    r5, r2, #1\n\t"
15218         "lsl    r2, r2, %[n]\n\t"
15219         "lsr    r5, r5, r6\n\t"
15220         "orr    r3, r3, r5\n\t"
15221         "ldr    r4, [%[a], #24]\n\t"
15222         "str    r3, [%[r], #32]\n\t"
15223         "lsr    r5, r4, #1\n\t"
15224         "lsl    r4, r4, %[n]\n\t"
15225         "lsr    r5, r5, r6\n\t"
15226         "orr    r2, r2, r5\n\t"
15227         "ldr    r3, [%[a], #20]\n\t"
15228         "str    r2, [%[r], #28]\n\t"
15229         "lsr    r5, r3, #1\n\t"
15230         "lsl    r3, r3, %[n]\n\t"
15231         "lsr    r5, r5, r6\n\t"
15232         "orr    r4, r4, r5\n\t"
15233         "ldr    r2, [%[a], #16]\n\t"
15234         "str    r4, [%[r], #24]\n\t"
15235         "lsr    r5, r2, #1\n\t"
15236         "lsl    r2, r2, %[n]\n\t"
15237         "lsr    r5, r5, r6\n\t"
15238         "orr    r3, r3, r5\n\t"
15239         "ldr    r4, [%[a], #12]\n\t"
15240         "str    r3, [%[r], #20]\n\t"
15241         "lsr    r5, r4, #1\n\t"
15242         "lsl    r4, r4, %[n]\n\t"
15243         "lsr    r5, r5, r6\n\t"
15244         "orr    r2, r2, r5\n\t"
15245         "ldr    r3, [%[a], #8]\n\t"
15246         "str    r2, [%[r], #16]\n\t"
15247         "lsr    r5, r3, #1\n\t"
15248         "lsl    r3, r3, %[n]\n\t"
15249         "lsr    r5, r5, r6\n\t"
15250         "orr    r4, r4, r5\n\t"
15251         "ldr    r2, [%[a], #4]\n\t"
15252         "str    r4, [%[r], #12]\n\t"
15253         "lsr    r5, r2, #1\n\t"
15254         "lsl    r2, r2, %[n]\n\t"
15255         "lsr    r5, r5, r6\n\t"
15256         "orr    r3, r3, r5\n\t"
15257         "ldr    r4, [%[a], #0]\n\t"
15258         "str    r3, [%[r], #8]\n\t"
15259         "lsr    r5, r4, #1\n\t"
15260         "lsl    r4, r4, %[n]\n\t"
15261         "lsr    r5, r5, r6\n\t"
15262         "orr    r2, r2, r5\n\t"
15263         "sub    %[a], %[a], #64\n\t"
15264         "sub    %[r], %[r], #64\n\t"
15265         "ldr    r3, [%[a], #60]\n\t"
15266         "str    r2, [%[r], #68]\n\t"
15267         "lsr    r5, r3, #1\n\t"
15268         "lsl    r3, r3, %[n]\n\t"
15269         "lsr    r5, r5, r6\n\t"
15270         "orr    r4, r4, r5\n\t"
15271         "ldr    r2, [%[a], #56]\n\t"
15272         "str    r4, [%[r], #64]\n\t"
15273         "lsr    r5, r2, #1\n\t"
15274         "lsl    r2, r2, %[n]\n\t"
15275         "lsr    r5, r5, r6\n\t"
15276         "orr    r3, r3, r5\n\t"
15277         "ldr    r4, [%[a], #52]\n\t"
15278         "str    r3, [%[r], #60]\n\t"
15279         "lsr    r5, r4, #1\n\t"
15280         "lsl    r4, r4, %[n]\n\t"
15281         "lsr    r5, r5, r6\n\t"
15282         "orr    r2, r2, r5\n\t"
15283         "ldr    r3, [%[a], #48]\n\t"
15284         "str    r2, [%[r], #56]\n\t"
15285         "lsr    r5, r3, #1\n\t"
15286         "lsl    r3, r3, %[n]\n\t"
15287         "lsr    r5, r5, r6\n\t"
15288         "orr    r4, r4, r5\n\t"
15289         "ldr    r2, [%[a], #44]\n\t"
15290         "str    r4, [%[r], #52]\n\t"
15291         "lsr    r5, r2, #1\n\t"
15292         "lsl    r2, r2, %[n]\n\t"
15293         "lsr    r5, r5, r6\n\t"
15294         "orr    r3, r3, r5\n\t"
15295         "ldr    r4, [%[a], #40]\n\t"
15296         "str    r3, [%[r], #48]\n\t"
15297         "lsr    r5, r4, #1\n\t"
15298         "lsl    r4, r4, %[n]\n\t"
15299         "lsr    r5, r5, r6\n\t"
15300         "orr    r2, r2, r5\n\t"
15301         "ldr    r3, [%[a], #36]\n\t"
15302         "str    r2, [%[r], #44]\n\t"
15303         "lsr    r5, r3, #1\n\t"
15304         "lsl    r3, r3, %[n]\n\t"
15305         "lsr    r5, r5, r6\n\t"
15306         "orr    r4, r4, r5\n\t"
15307         "ldr    r2, [%[a], #32]\n\t"
15308         "str    r4, [%[r], #40]\n\t"
15309         "lsr    r5, r2, #1\n\t"
15310         "lsl    r2, r2, %[n]\n\t"
15311         "lsr    r5, r5, r6\n\t"
15312         "orr    r3, r3, r5\n\t"
15313         "ldr    r4, [%[a], #28]\n\t"
15314         "str    r3, [%[r], #36]\n\t"
15315         "lsr    r5, r4, #1\n\t"
15316         "lsl    r4, r4, %[n]\n\t"
15317         "lsr    r5, r5, r6\n\t"
15318         "orr    r2, r2, r5\n\t"
15319         "ldr    r3, [%[a], #24]\n\t"
15320         "str    r2, [%[r], #32]\n\t"
15321         "lsr    r5, r3, #1\n\t"
15322         "lsl    r3, r3, %[n]\n\t"
15323         "lsr    r5, r5, r6\n\t"
15324         "orr    r4, r4, r5\n\t"
15325         "ldr    r2, [%[a], #20]\n\t"
15326         "str    r4, [%[r], #28]\n\t"
15327         "lsr    r5, r2, #1\n\t"
15328         "lsl    r2, r2, %[n]\n\t"
15329         "lsr    r5, r5, r6\n\t"
15330         "orr    r3, r3, r5\n\t"
15331         "ldr    r4, [%[a], #16]\n\t"
15332         "str    r3, [%[r], #24]\n\t"
15333         "lsr    r5, r4, #1\n\t"
15334         "lsl    r4, r4, %[n]\n\t"
15335         "lsr    r5, r5, r6\n\t"
15336         "orr    r2, r2, r5\n\t"
15337         "ldr    r3, [%[a], #12]\n\t"
15338         "str    r2, [%[r], #20]\n\t"
15339         "lsr    r5, r3, #1\n\t"
15340         "lsl    r3, r3, %[n]\n\t"
15341         "lsr    r5, r5, r6\n\t"
15342         "orr    r4, r4, r5\n\t"
15343         "ldr    r2, [%[a], #8]\n\t"
15344         "str    r4, [%[r], #16]\n\t"
15345         "lsr    r5, r2, #1\n\t"
15346         "lsl    r2, r2, %[n]\n\t"
15347         "lsr    r5, r5, r6\n\t"
15348         "orr    r3, r3, r5\n\t"
15349         "ldr    r4, [%[a], #4]\n\t"
15350         "str    r3, [%[r], #12]\n\t"
15351         "lsr    r5, r4, #1\n\t"
15352         "lsl    r4, r4, %[n]\n\t"
15353         "lsr    r5, r5, r6\n\t"
15354         "orr    r2, r2, r5\n\t"
15355         "ldr    r3, [%[a], #0]\n\t"
15356         "str    r2, [%[r], #8]\n\t"
15357         "lsr    r5, r3, #1\n\t"
15358         "lsl    r3, r3, %[n]\n\t"
15359         "lsr    r5, r5, r6\n\t"
15360         "orr    r4, r4, r5\n\t"
15361         "sub    %[a], %[a], #64\n\t"
15362         "sub    %[r], %[r], #64\n\t"
15363         "ldr    r2, [%[a], #60]\n\t"
15364         "str    r4, [%[r], #68]\n\t"
15365         "lsr    r5, r2, #1\n\t"
15366         "lsl    r2, r2, %[n]\n\t"
15367         "lsr    r5, r5, r6\n\t"
15368         "orr    r3, r3, r5\n\t"
15369         "ldr    r4, [%[a], #56]\n\t"
15370         "str    r3, [%[r], #64]\n\t"
15371         "lsr    r5, r4, #1\n\t"
15372         "lsl    r4, r4, %[n]\n\t"
15373         "lsr    r5, r5, r6\n\t"
15374         "orr    r2, r2, r5\n\t"
15375         "ldr    r3, [%[a], #52]\n\t"
15376         "str    r2, [%[r], #60]\n\t"
15377         "lsr    r5, r3, #1\n\t"
15378         "lsl    r3, r3, %[n]\n\t"
15379         "lsr    r5, r5, r6\n\t"
15380         "orr    r4, r4, r5\n\t"
15381         "ldr    r2, [%[a], #48]\n\t"
15382         "str    r4, [%[r], #56]\n\t"
15383         "lsr    r5, r2, #1\n\t"
15384         "lsl    r2, r2, %[n]\n\t"
15385         "lsr    r5, r5, r6\n\t"
15386         "orr    r3, r3, r5\n\t"
15387         "ldr    r4, [%[a], #44]\n\t"
15388         "str    r3, [%[r], #52]\n\t"
15389         "lsr    r5, r4, #1\n\t"
15390         "lsl    r4, r4, %[n]\n\t"
15391         "lsr    r5, r5, r6\n\t"
15392         "orr    r2, r2, r5\n\t"
15393         "ldr    r3, [%[a], #40]\n\t"
15394         "str    r2, [%[r], #48]\n\t"
15395         "lsr    r5, r3, #1\n\t"
15396         "lsl    r3, r3, %[n]\n\t"
15397         "lsr    r5, r5, r6\n\t"
15398         "orr    r4, r4, r5\n\t"
15399         "ldr    r2, [%[a], #36]\n\t"
15400         "str    r4, [%[r], #44]\n\t"
15401         "lsr    r5, r2, #1\n\t"
15402         "lsl    r2, r2, %[n]\n\t"
15403         "lsr    r5, r5, r6\n\t"
15404         "orr    r3, r3, r5\n\t"
15405         "ldr    r4, [%[a], #32]\n\t"
15406         "str    r3, [%[r], #40]\n\t"
15407         "lsr    r5, r4, #1\n\t"
15408         "lsl    r4, r4, %[n]\n\t"
15409         "lsr    r5, r5, r6\n\t"
15410         "orr    r2, r2, r5\n\t"
15411         "ldr    r3, [%[a], #28]\n\t"
15412         "str    r2, [%[r], #36]\n\t"
15413         "lsr    r5, r3, #1\n\t"
15414         "lsl    r3, r3, %[n]\n\t"
15415         "lsr    r5, r5, r6\n\t"
15416         "orr    r4, r4, r5\n\t"
15417         "ldr    r2, [%[a], #24]\n\t"
15418         "str    r4, [%[r], #32]\n\t"
15419         "lsr    r5, r2, #1\n\t"
15420         "lsl    r2, r2, %[n]\n\t"
15421         "lsr    r5, r5, r6\n\t"
15422         "orr    r3, r3, r5\n\t"
15423         "ldr    r4, [%[a], #20]\n\t"
15424         "str    r3, [%[r], #28]\n\t"
15425         "lsr    r5, r4, #1\n\t"
15426         "lsl    r4, r4, %[n]\n\t"
15427         "lsr    r5, r5, r6\n\t"
15428         "orr    r2, r2, r5\n\t"
15429         "ldr    r3, [%[a], #16]\n\t"
15430         "str    r2, [%[r], #24]\n\t"
15431         "lsr    r5, r3, #1\n\t"
15432         "lsl    r3, r3, %[n]\n\t"
15433         "lsr    r5, r5, r6\n\t"
15434         "orr    r4, r4, r5\n\t"
15435         "ldr    r2, [%[a], #12]\n\t"
15436         "str    r4, [%[r], #20]\n\t"
15437         "lsr    r5, r2, #1\n\t"
15438         "lsl    r2, r2, %[n]\n\t"
15439         "lsr    r5, r5, r6\n\t"
15440         "orr    r3, r3, r5\n\t"
15441         "ldr    r4, [%[a], #8]\n\t"
15442         "str    r3, [%[r], #16]\n\t"
15443         "lsr    r5, r4, #1\n\t"
15444         "lsl    r4, r4, %[n]\n\t"
15445         "lsr    r5, r5, r6\n\t"
15446         "orr    r2, r2, r5\n\t"
15447         "ldr    r3, [%[a], #4]\n\t"
15448         "str    r2, [%[r], #12]\n\t"
15449         "lsr    r5, r3, #1\n\t"
15450         "lsl    r3, r3, %[n]\n\t"
15451         "lsr    r5, r5, r6\n\t"
15452         "orr    r4, r4, r5\n\t"
15453         "ldr    r2, [%[a], #0]\n\t"
15454         "str    r4, [%[r], #8]\n\t"
15455         "lsr    r5, r2, #1\n\t"
15456         "lsl    r2, r2, %[n]\n\t"
15457         "lsr    r5, r5, r6\n\t"
15458         "orr    r3, r3, r5\n\t"
15459         "sub    %[a], %[a], #64\n\t"
15460         "sub    %[r], %[r], #64\n\t"
15461         "ldr    r4, [%[a], #60]\n\t"
15462         "str    r3, [%[r], #68]\n\t"
15463         "lsr    r5, r4, #1\n\t"
15464         "lsl    r4, r4, %[n]\n\t"
15465         "lsr    r5, r5, r6\n\t"
15466         "orr    r2, r2, r5\n\t"
15467         "ldr    r3, [%[a], #56]\n\t"
15468         "str    r2, [%[r], #64]\n\t"
15469         "lsr    r5, r3, #1\n\t"
15470         "lsl    r3, r3, %[n]\n\t"
15471         "lsr    r5, r5, r6\n\t"
15472         "orr    r4, r4, r5\n\t"
15473         "ldr    r2, [%[a], #52]\n\t"
15474         "str    r4, [%[r], #60]\n\t"
15475         "lsr    r5, r2, #1\n\t"
15476         "lsl    r2, r2, %[n]\n\t"
15477         "lsr    r5, r5, r6\n\t"
15478         "orr    r3, r3, r5\n\t"
15479         "ldr    r4, [%[a], #48]\n\t"
15480         "str    r3, [%[r], #56]\n\t"
15481         "lsr    r5, r4, #1\n\t"
15482         "lsl    r4, r4, %[n]\n\t"
15483         "lsr    r5, r5, r6\n\t"
15484         "orr    r2, r2, r5\n\t"
15485         "ldr    r3, [%[a], #44]\n\t"
15486         "str    r2, [%[r], #52]\n\t"
15487         "lsr    r5, r3, #1\n\t"
15488         "lsl    r3, r3, %[n]\n\t"
15489         "lsr    r5, r5, r6\n\t"
15490         "orr    r4, r4, r5\n\t"
15491         "ldr    r2, [%[a], #40]\n\t"
15492         "str    r4, [%[r], #48]\n\t"
15493         "lsr    r5, r2, #1\n\t"
15494         "lsl    r2, r2, %[n]\n\t"
15495         "lsr    r5, r5, r6\n\t"
15496         "orr    r3, r3, r5\n\t"
15497         "ldr    r4, [%[a], #36]\n\t"
15498         "str    r3, [%[r], #44]\n\t"
15499         "lsr    r5, r4, #1\n\t"
15500         "lsl    r4, r4, %[n]\n\t"
15501         "lsr    r5, r5, r6\n\t"
15502         "orr    r2, r2, r5\n\t"
15503         "ldr    r3, [%[a], #32]\n\t"
15504         "str    r2, [%[r], #40]\n\t"
15505         "lsr    r5, r3, #1\n\t"
15506         "lsl    r3, r3, %[n]\n\t"
15507         "lsr    r5, r5, r6\n\t"
15508         "orr    r4, r4, r5\n\t"
15509         "ldr    r2, [%[a], #28]\n\t"
15510         "str    r4, [%[r], #36]\n\t"
15511         "lsr    r5, r2, #1\n\t"
15512         "lsl    r2, r2, %[n]\n\t"
15513         "lsr    r5, r5, r6\n\t"
15514         "orr    r3, r3, r5\n\t"
15515         "ldr    r4, [%[a], #24]\n\t"
15516         "str    r3, [%[r], #32]\n\t"
15517         "lsr    r5, r4, #1\n\t"
15518         "lsl    r4, r4, %[n]\n\t"
15519         "lsr    r5, r5, r6\n\t"
15520         "orr    r2, r2, r5\n\t"
15521         "ldr    r3, [%[a], #20]\n\t"
15522         "str    r2, [%[r], #28]\n\t"
15523         "lsr    r5, r3, #1\n\t"
15524         "lsl    r3, r3, %[n]\n\t"
15525         "lsr    r5, r5, r6\n\t"
15526         "orr    r4, r4, r5\n\t"
15527         "ldr    r2, [%[a], #16]\n\t"
15528         "str    r4, [%[r], #24]\n\t"
15529         "lsr    r5, r2, #1\n\t"
15530         "lsl    r2, r2, %[n]\n\t"
15531         "lsr    r5, r5, r6\n\t"
15532         "orr    r3, r3, r5\n\t"
15533         "ldr    r4, [%[a], #12]\n\t"
15534         "str    r3, [%[r], #20]\n\t"
15535         "lsr    r5, r4, #1\n\t"
15536         "lsl    r4, r4, %[n]\n\t"
15537         "lsr    r5, r5, r6\n\t"
15538         "orr    r2, r2, r5\n\t"
15539         "ldr    r3, [%[a], #8]\n\t"
15540         "str    r2, [%[r], #16]\n\t"
15541         "lsr    r5, r3, #1\n\t"
15542         "lsl    r3, r3, %[n]\n\t"
15543         "lsr    r5, r5, r6\n\t"
15544         "orr    r4, r4, r5\n\t"
15545         "ldr    r2, [%[a], #4]\n\t"
15546         "str    r4, [%[r], #12]\n\t"
15547         "lsr    r5, r2, #1\n\t"
15548         "lsl    r2, r2, %[n]\n\t"
15549         "lsr    r5, r5, r6\n\t"
15550         "orr    r3, r3, r5\n\t"
15551         "ldr    r4, [%[a], #0]\n\t"
15552         "str    r3, [%[r], #8]\n\t"
15553         "lsr    r5, r4, #1\n\t"
15554         "lsl    r4, r4, %[n]\n\t"
15555         "lsr    r5, r5, r6\n\t"
15556         "orr    r2, r2, r5\n\t"
15557         "sub    %[a], %[a], #64\n\t"
15558         "sub    %[r], %[r], #64\n\t"
15559         "ldr    r3, [%[a], #60]\n\t"
15560         "str    r2, [%[r], #68]\n\t"
15561         "lsr    r5, r3, #1\n\t"
15562         "lsl    r3, r3, %[n]\n\t"
15563         "lsr    r5, r5, r6\n\t"
15564         "orr    r4, r4, r5\n\t"
15565         "ldr    r2, [%[a], #56]\n\t"
15566         "str    r4, [%[r], #64]\n\t"
15567         "lsr    r5, r2, #1\n\t"
15568         "lsl    r2, r2, %[n]\n\t"
15569         "lsr    r5, r5, r6\n\t"
15570         "orr    r3, r3, r5\n\t"
15571         "ldr    r4, [%[a], #52]\n\t"
15572         "str    r3, [%[r], #60]\n\t"
15573         "lsr    r5, r4, #1\n\t"
15574         "lsl    r4, r4, %[n]\n\t"
15575         "lsr    r5, r5, r6\n\t"
15576         "orr    r2, r2, r5\n\t"
15577         "ldr    r3, [%[a], #48]\n\t"
15578         "str    r2, [%[r], #56]\n\t"
15579         "lsr    r5, r3, #1\n\t"
15580         "lsl    r3, r3, %[n]\n\t"
15581         "lsr    r5, r5, r6\n\t"
15582         "orr    r4, r4, r5\n\t"
15583         "ldr    r2, [%[a], #44]\n\t"
15584         "str    r4, [%[r], #52]\n\t"
15585         "lsr    r5, r2, #1\n\t"
15586         "lsl    r2, r2, %[n]\n\t"
15587         "lsr    r5, r5, r6\n\t"
15588         "orr    r3, r3, r5\n\t"
15589         "ldr    r4, [%[a], #40]\n\t"
15590         "str    r3, [%[r], #48]\n\t"
15591         "lsr    r5, r4, #1\n\t"
15592         "lsl    r4, r4, %[n]\n\t"
15593         "lsr    r5, r5, r6\n\t"
15594         "orr    r2, r2, r5\n\t"
15595         "ldr    r3, [%[a], #36]\n\t"
15596         "str    r2, [%[r], #44]\n\t"
15597         "lsr    r5, r3, #1\n\t"
15598         "lsl    r3, r3, %[n]\n\t"
15599         "lsr    r5, r5, r6\n\t"
15600         "orr    r4, r4, r5\n\t"
15601         "ldr    r2, [%[a], #32]\n\t"
15602         "str    r4, [%[r], #40]\n\t"
15603         "lsr    r5, r2, #1\n\t"
15604         "lsl    r2, r2, %[n]\n\t"
15605         "lsr    r5, r5, r6\n\t"
15606         "orr    r3, r3, r5\n\t"
15607         "ldr    r4, [%[a], #28]\n\t"
15608         "str    r3, [%[r], #36]\n\t"
15609         "lsr    r5, r4, #1\n\t"
15610         "lsl    r4, r4, %[n]\n\t"
15611         "lsr    r5, r5, r6\n\t"
15612         "orr    r2, r2, r5\n\t"
15613         "ldr    r3, [%[a], #24]\n\t"
15614         "str    r2, [%[r], #32]\n\t"
15615         "lsr    r5, r3, #1\n\t"
15616         "lsl    r3, r3, %[n]\n\t"
15617         "lsr    r5, r5, r6\n\t"
15618         "orr    r4, r4, r5\n\t"
15619         "ldr    r2, [%[a], #20]\n\t"
15620         "str    r4, [%[r], #28]\n\t"
15621         "lsr    r5, r2, #1\n\t"
15622         "lsl    r2, r2, %[n]\n\t"
15623         "lsr    r5, r5, r6\n\t"
15624         "orr    r3, r3, r5\n\t"
15625         "ldr    r4, [%[a], #16]\n\t"
15626         "str    r3, [%[r], #24]\n\t"
15627         "lsr    r5, r4, #1\n\t"
15628         "lsl    r4, r4, %[n]\n\t"
15629         "lsr    r5, r5, r6\n\t"
15630         "orr    r2, r2, r5\n\t"
15631         "ldr    r3, [%[a], #12]\n\t"
15632         "str    r2, [%[r], #20]\n\t"
15633         "lsr    r5, r3, #1\n\t"
15634         "lsl    r3, r3, %[n]\n\t"
15635         "lsr    r5, r5, r6\n\t"
15636         "orr    r4, r4, r5\n\t"
15637         "ldr    r2, [%[a], #8]\n\t"
15638         "str    r4, [%[r], #16]\n\t"
15639         "lsr    r5, r2, #1\n\t"
15640         "lsl    r2, r2, %[n]\n\t"
15641         "lsr    r5, r5, r6\n\t"
15642         "orr    r3, r3, r5\n\t"
15643         "ldr    r4, [%[a], #4]\n\t"
15644         "str    r3, [%[r], #12]\n\t"
15645         "lsr    r5, r4, #1\n\t"
15646         "lsl    r4, r4, %[n]\n\t"
15647         "lsr    r5, r5, r6\n\t"
15648         "orr    r2, r2, r5\n\t"
15649         "ldr    r3, [%[a], #0]\n\t"
15650         "str    r2, [%[r], #8]\n\t"
15651         "lsr    r5, r3, #1\n\t"
15652         "lsl    r3, r3, %[n]\n\t"
15653         "lsr    r5, r5, r6\n\t"
15654         "orr    r4, r4, r5\n\t"
15655         "sub    %[a], %[a], #64\n\t"
15656         "sub    %[r], %[r], #64\n\t"
15657         "ldr    r2, [%[a], #60]\n\t"
15658         "str    r4, [%[r], #68]\n\t"
15659         "lsr    r5, r2, #1\n\t"
15660         "lsl    r2, r2, %[n]\n\t"
15661         "lsr    r5, r5, r6\n\t"
15662         "orr    r3, r3, r5\n\t"
15663         "ldr    r4, [%[a], #56]\n\t"
15664         "str    r3, [%[r], #64]\n\t"
15665         "lsr    r5, r4, #1\n\t"
15666         "lsl    r4, r4, %[n]\n\t"
15667         "lsr    r5, r5, r6\n\t"
15668         "orr    r2, r2, r5\n\t"
15669         "ldr    r3, [%[a], #52]\n\t"
15670         "str    r2, [%[r], #60]\n\t"
15671         "lsr    r5, r3, #1\n\t"
15672         "lsl    r3, r3, %[n]\n\t"
15673         "lsr    r5, r5, r6\n\t"
15674         "orr    r4, r4, r5\n\t"
15675         "ldr    r2, [%[a], #48]\n\t"
15676         "str    r4, [%[r], #56]\n\t"
15677         "lsr    r5, r2, #1\n\t"
15678         "lsl    r2, r2, %[n]\n\t"
15679         "lsr    r5, r5, r6\n\t"
15680         "orr    r3, r3, r5\n\t"
15681         "ldr    r4, [%[a], #44]\n\t"
15682         "str    r3, [%[r], #52]\n\t"
15683         "lsr    r5, r4, #1\n\t"
15684         "lsl    r4, r4, %[n]\n\t"
15685         "lsr    r5, r5, r6\n\t"
15686         "orr    r2, r2, r5\n\t"
15687         "ldr    r3, [%[a], #40]\n\t"
15688         "str    r2, [%[r], #48]\n\t"
15689         "lsr    r5, r3, #1\n\t"
15690         "lsl    r3, r3, %[n]\n\t"
15691         "lsr    r5, r5, r6\n\t"
15692         "orr    r4, r4, r5\n\t"
15693         "ldr    r2, [%[a], #36]\n\t"
15694         "str    r4, [%[r], #44]\n\t"
15695         "lsr    r5, r2, #1\n\t"
15696         "lsl    r2, r2, %[n]\n\t"
15697         "lsr    r5, r5, r6\n\t"
15698         "orr    r3, r3, r5\n\t"
15699         "ldr    r4, [%[a], #32]\n\t"
15700         "str    r3, [%[r], #40]\n\t"
15701         "lsr    r5, r4, #1\n\t"
15702         "lsl    r4, r4, %[n]\n\t"
15703         "lsr    r5, r5, r6\n\t"
15704         "orr    r2, r2, r5\n\t"
15705         "ldr    r3, [%[a], #28]\n\t"
15706         "str    r2, [%[r], #36]\n\t"
15707         "lsr    r5, r3, #1\n\t"
15708         "lsl    r3, r3, %[n]\n\t"
15709         "lsr    r5, r5, r6\n\t"
15710         "orr    r4, r4, r5\n\t"
15711         "ldr    r2, [%[a], #24]\n\t"
15712         "str    r4, [%[r], #32]\n\t"
15713         "lsr    r5, r2, #1\n\t"
15714         "lsl    r2, r2, %[n]\n\t"
15715         "lsr    r5, r5, r6\n\t"
15716         "orr    r3, r3, r5\n\t"
15717         "ldr    r4, [%[a], #20]\n\t"
15718         "str    r3, [%[r], #28]\n\t"
15719         "lsr    r5, r4, #1\n\t"
15720         "lsl    r4, r4, %[n]\n\t"
15721         "lsr    r5, r5, r6\n\t"
15722         "orr    r2, r2, r5\n\t"
15723         "ldr    r3, [%[a], #16]\n\t"
15724         "str    r2, [%[r], #24]\n\t"
15725         "lsr    r5, r3, #1\n\t"
15726         "lsl    r3, r3, %[n]\n\t"
15727         "lsr    r5, r5, r6\n\t"
15728         "orr    r4, r4, r5\n\t"
15729         "ldr    r2, [%[a], #12]\n\t"
15730         "str    r4, [%[r], #20]\n\t"
15731         "lsr    r5, r2, #1\n\t"
15732         "lsl    r2, r2, %[n]\n\t"
15733         "lsr    r5, r5, r6\n\t"
15734         "orr    r3, r3, r5\n\t"
15735         "ldr    r4, [%[a], #8]\n\t"
15736         "str    r3, [%[r], #16]\n\t"
15737         "lsr    r5, r4, #1\n\t"
15738         "lsl    r4, r4, %[n]\n\t"
15739         "lsr    r5, r5, r6\n\t"
15740         "orr    r2, r2, r5\n\t"
15741         "ldr    r3, [%[a], #4]\n\t"
15742         "str    r2, [%[r], #12]\n\t"
15743         "lsr    r5, r3, #1\n\t"
15744         "lsl    r3, r3, %[n]\n\t"
15745         "lsr    r5, r5, r6\n\t"
15746         "orr    r4, r4, r5\n\t"
15747         "ldr    r2, [%[a], #0]\n\t"
15748         "str    r4, [%[r], #8]\n\t"
15749         "lsr    r5, r2, #1\n\t"
15750         "lsl    r2, r2, %[n]\n\t"
15751         "lsr    r5, r5, r6\n\t"
15752         "orr    r3, r3, r5\n\t"
15753         "str    r2, [%[r]]\n\t"
15754         "str    r3, [%[r], #4]\n\t"
15755         :
15756         : [r] "r" (r), [a] "r" (a), [n] "r" (n)
15757         : "memory", "r2", "r3", "r4", "r5", "r6"
15758     );
15759 }
15760 
15761 /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
15762  *
15763  * r     A single precision number that is the result of the operation.
15764  * e     A single precision number that is the exponent.
15765  * bits  The number of bits in the exponent.
15766  * m     A single precision number that is the modulus.
15767  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
15768  */
15769 static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
15770         const sp_digit* m)
15771 {
15772 #ifndef WOLFSSL_SMALL_STACK
15773     sp_digit nd[256];
15774     sp_digit td[129];
15775 #else
15776     sp_digit* td;
15777 #endif
15778     sp_digit* norm;
15779     sp_digit* tmp;
15780     sp_digit mp = 1;
15781     sp_digit n, o;
15782     sp_digit mask;
15783     int i;
15784     int c, y;
15785     int err = MP_OKAY;
15786 
15787 #ifdef WOLFSSL_SMALL_STACK
15788     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
15789                             DYNAMIC_TYPE_TMP_BUFFER);
15790     if (td == NULL) {
15791         err = MEMORY_E;
15792     }
15793 #endif
15794 
15795     if (err == MP_OKAY) {
15796 #ifdef WOLFSSL_SMALL_STACK
15797         norm = td;
15798         tmp  = td + 256;
15799 #else
15800         norm = nd;
15801         tmp  = td;
15802 #endif
15803 
15804         sp_4096_mont_setup(m, &mp);
15805         sp_4096_mont_norm_128(norm, m);
15806 
15807         i = (bits - 1) / 32;
15808         n = e[i--];
15809         c = bits & 31;
15810         if (c == 0) {
15811             c = 32;
15812         }
15813         c -= bits % 5;
15814         if (c == 32) {
15815             c = 27;
15816         }
15817         y = (int)(n >> c);
15818         n <<= 32 - c;
15819         sp_4096_lshift_128(r, norm, y);
15820         for (; i>=0 || c>=5; ) {
15821             if (c == 0) {
15822                 n = e[i--];
15823                 y = n >> 27;
15824                 n <<= 5;
15825                 c = 27;
15826             }
15827             else if (c < 5) {
15828                 y = n >> 27;
15829                 n = e[i--];
15830                 c = 5 - c;
15831                 y |= n >> (32 - c);
15832                 n <<= c;
15833                 c = 32 - c;
15834             }
15835             else {
15836                 y = (n >> 27) & 0x1f;
15837                 n <<= 5;
15838                 c -= 5;
15839             }
15840 
15841             sp_4096_mont_sqr_128(r, r, m, mp);
15842             sp_4096_mont_sqr_128(r, r, m, mp);
15843             sp_4096_mont_sqr_128(r, r, m, mp);
15844             sp_4096_mont_sqr_128(r, r, m, mp);
15845             sp_4096_mont_sqr_128(r, r, m, mp);
15846 
15847             sp_4096_lshift_128(r, r, y);
15848             sp_4096_mul_d_128(tmp, norm, r[128]);
15849             r[128] = 0;
15850             o = sp_4096_add_128(r, r, tmp);
15851             sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
15852         }
15853 
15854         XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
15855         sp_4096_mont_reduce_128(r, m, mp);
15856 
15857         mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
15858         sp_4096_cond_sub_128(r, r, m, mask);
15859     }
15860 
15861 #ifdef WOLFSSL_SMALL_STACK
15862     if (td != NULL) {
15863         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
15864     }
15865 #endif
15866 
15867     return err;
15868 }
15869 #endif /* HAVE_FFDHE_4096 */
15870 
15871 /* Perform the modular exponentiation for Diffie-Hellman.
15872  *
15873  * base     Base.
15874  * exp      Array of bytes that is the exponent.
15875  * expLen   Length of data, in bytes, in exponent.
15876  * mod      Modulus.
15877  * out      Buffer to hold big-endian bytes of exponentiation result.
15878  *          Must be at least 512 bytes long.
15879  * outLen   Length, in bytes, of exponentiation result.
15880  * returns 0 on success, MP_READ_E if there are too many bytes in an array
15881  * and MEMORY_E if memory allocation fails.
15882  */
15883 int sp_DhExp_4096(mp_int* base, const byte* exp, word32 expLen,
15884     mp_int* mod, byte* out, word32* outLen)
15885 {
15886     int err = MP_OKAY;
15887     sp_digit b[256], e[128], m[128];
15888     sp_digit* r = b;
15889     word32 i;
15890 
15891     if (mp_count_bits(base) > 4096) {
15892         err = MP_READ_E;
15893     }
15894 
15895     if (err == MP_OKAY) {
15896         if (expLen > 512) {
15897             err = MP_READ_E;
15898         }
15899     }
15900 
15901     if (err == MP_OKAY) {
15902         if (mp_count_bits(mod) != 4096) {
15903             err = MP_READ_E;
15904         }
15905     }
15906 
15907     if (err == MP_OKAY) {
15908         sp_4096_from_mp(b, 128, base);
15909         sp_4096_from_bin(e, 128, exp, expLen);
15910         sp_4096_from_mp(m, 128, mod);
15911 
15912     #ifdef HAVE_FFDHE_4096
15913         if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
15914             err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
15915         else
15916     #endif
15917             err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
15918 
15919     }
15920 
15921     if (err == MP_OKAY) {
15922         sp_4096_to_bin(r, out);
15923         *outLen = 512;
15924         for (i=0; i<512 && out[i] == 0; i++) {
15925         }
15926         *outLen -= i;
15927         XMEMMOVE(out, out + i, *outLen);
15928 
15929     }
15930 
15931     XMEMSET(e, 0, sizeof(e));
15932 
15933     return err;
15934 }
15935 #endif /* WOLFSSL_HAVE_SP_DH */
15936 
15937 #endif /* WOLFSSL_HAVE_SP_DH || (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) */
15938 
15939 #endif /* WOLFSSL_SP_4096 */
15940 
15941 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
15942 #ifdef WOLFSSL_HAVE_SP_ECC
15943 #ifndef WOLFSSL_SP_NO_256
15944 
15945 /* Point structure to use. */
15946 typedef struct sp_point_256 {
15947     sp_digit x[2 * 8];
15948     sp_digit y[2 * 8];
15949     sp_digit z[2 * 8];
15950     int infinity;
15951 } sp_point_256;
15952 
15953 /* The modulus (prime) of the curve P256. */
15954 static const sp_digit p256_mod[8] = {
15955     0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
15956     0x00000001,0xffffffff
15957 };
15958 /* The Montogmery normalizer for modulus of the curve P256. */
15959 static const sp_digit p256_norm_mod[8] = {
15960     0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
15961     0xfffffffe,0x00000000
15962 };
15963 /* The Montogmery multiplier for modulus of the curve P256. */
15964 static const sp_digit p256_mp_mod = 0x00000001;
15965 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
15966                                             defined(HAVE_ECC_VERIFY)
15967 /* The order of the curve P256. */
15968 static const sp_digit p256_order[8] = {
15969     0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
15970     0x00000000,0xffffffff
15971 };
15972 #endif
15973 /* The order of the curve P256 minus 2. */
15974 static const sp_digit p256_order2[8] = {
15975     0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
15976     0x00000000,0xffffffff
15977 };
15978 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
15979 /* The Montogmery normalizer for order of the curve P256. */
15980 static const sp_digit p256_norm_order[8] = {
15981     0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
15982     0xffffffff,0x00000000
15983 };
15984 #endif
15985 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
15986 /* The Montogmery multiplier for order of the curve P256. */
15987 static const sp_digit p256_mp_order = 0xee00bc4f;
15988 #endif
15989 /* The base point of curve P256. */
15990 static const sp_point_256 p256_base = {
15991     /* X ordinate */
15992     {
15993         0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
15994         0xe12c4247,0x6b17d1f2,
15995         0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
15996     },
15997     /* Y ordinate */
15998     {
15999         0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
16000         0xfe1a7f9b,0x4fe342e2,
16001         0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
16002     },
16003     /* Z ordinate */
16004     {
16005         0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
16006         0x00000000,0x00000000,
16007         0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
16008     },
16009     /* infinity */
16010     0
16011 };
16012 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
16013 static const sp_digit p256_b[8] = {
16014     0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
16015     0xaa3a93e7,0x5ac635d8
16016 };
16017 #endif
16018 
16019 static int sp_256_point_new_ex_8(void* heap, sp_point_256* sp, sp_point_256** p)
16020 {
16021     int ret = MP_OKAY;
16022     (void)heap;
16023 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
16024     (void)sp;
16025     *p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
16026 #else
16027     *p = sp;
16028 #endif
16029     if (*p == NULL) {
16030         ret = MEMORY_E;
16031     }
16032     return ret;
16033 }
16034 
16035 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
16036 /* Allocate memory for point and return error. */
16037 #define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), NULL, &(p))
16038 #else
16039 /* Set pointer to data and return no error. */
16040 #define sp_256_point_new_8(heap, sp, p) sp_256_point_new_ex_8((heap), &(sp), &(p))
16041 #endif
16042 
16043 
16044 static void sp_256_point_free_8(sp_point_256* p, int clear, void* heap)
16045 {
16046 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
16047 /* If valid pointer then clear point data if requested and free data. */
16048     if (p != NULL) {
16049         if (clear != 0) {
16050             XMEMSET(p, 0, sizeof(*p));
16051         }
16052         XFREE(p, heap, DYNAMIC_TYPE_ECC);
16053     }
16054 #else
16055 /* Clear point data if requested. */
16056     if (clear != 0) {
16057         XMEMSET(p, 0, sizeof(*p));
16058     }
16059 #endif
16060     (void)heap;
16061 }
16062 
16063 /* Multiply a number by Montogmery normalizer mod modulus (prime).
16064  *
16065  * r  The resulting Montgomery form number.
16066  * a  The number to convert.
16067  * m  The modulus (prime).
16068  */
16069 static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
16070 {
16071     int64_t t[8];
16072     int64_t a64[8];
16073     int64_t o;
16074 
16075     (void)m;
16076 
16077     a64[0] = a[0];
16078     a64[1] = a[1];
16079     a64[2] = a[2];
16080     a64[3] = a[3];
16081     a64[4] = a[4];
16082     a64[5] = a[5];
16083     a64[6] = a[6];
16084     a64[7] = a[7];
16085 
16086     /*  1  1  0 -1 -1 -1 -1  0 */
16087     t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6];
16088     /*  0  1  1  0 -1 -1 -1 -1 */
16089     t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7];
16090     /*  0  0  1  1  0 -1 -1 -1 */
16091     t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7];
16092     /* -1 -1  0  2  2  1  0 -1 */
16093     t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7];
16094     /*  0 -1 -1  0  2  2  1  0 */
16095     t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6];
16096     /*  0  0 -1 -1  0  2  2  1 */
16097     t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7];
16098     /* -1 -1  0  0  0  1  3  2 */
16099     t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7];
16100     /*  1  0 -1 -1 -1 -1  0  3 */
16101     t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7];
16102 
16103     t[1] += t[0] >> 32; t[0] &= 0xffffffff;
16104     t[2] += t[1] >> 32; t[1] &= 0xffffffff;
16105     t[3] += t[2] >> 32; t[2] &= 0xffffffff;
16106     t[4] += t[3] >> 32; t[3] &= 0xffffffff;
16107     t[5] += t[4] >> 32; t[4] &= 0xffffffff;
16108     t[6] += t[5] >> 32; t[5] &= 0xffffffff;
16109     t[7] += t[6] >> 32; t[6] &= 0xffffffff;
16110     o     = t[7] >> 32; t[7] &= 0xffffffff;
16111     t[0] += o;
16112     t[3] -= o;
16113     t[6] -= o;
16114     t[7] += o;
16115     t[1] += t[0] >> 32; t[0] &= 0xffffffff;
16116     t[2] += t[1] >> 32; t[1] &= 0xffffffff;
16117     t[3] += t[2] >> 32; t[2] &= 0xffffffff;
16118     t[4] += t[3] >> 32; t[3] &= 0xffffffff;
16119     t[5] += t[4] >> 32; t[4] &= 0xffffffff;
16120     t[6] += t[5] >> 32; t[5] &= 0xffffffff;
16121     t[7] += t[6] >> 32; t[6] &= 0xffffffff;
16122     r[0] = t[0];
16123     r[1] = t[1];
16124     r[2] = t[2];
16125     r[3] = t[3];
16126     r[4] = t[4];
16127     r[5] = t[5];
16128     r[6] = t[6];
16129     r[7] = t[7];
16130 
16131     return MP_OKAY;
16132 }
16133 
16134 /* Convert an mp_int to an array of sp_digit.
16135  *
16136  * r  A single precision integer.
16137  * size  Maximum number of bytes to convert
16138  * a  A multi-precision integer.
16139  */
16140 static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
16141 {
16142 #if DIGIT_BIT == 32
16143     int j;
16144 
16145     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
16146 
16147     for (j = a->used; j < size; j++) {
16148         r[j] = 0;
16149     }
16150 #elif DIGIT_BIT > 32
16151     int i, j = 0;
16152     word32 s = 0;
16153 
16154     r[0] = 0;
16155     for (i = 0; i < a->used && j < size; i++) {
16156         r[j] |= ((sp_digit)a->dp[i] << s);
16157         r[j] &= 0xffffffff;
16158         s = 32U - s;
16159         if (j + 1 >= size) {
16160             break;
16161         }
16162         /* lint allow cast of mismatch word32 and mp_digit */
16163         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
16164         while ((s + 32U) <= (word32)DIGIT_BIT) {
16165             s += 32U;
16166             r[j] &= 0xffffffff;
16167             if (j + 1 >= size) {
16168                 break;
16169             }
16170             if (s < (word32)DIGIT_BIT) {
16171                 /* lint allow cast of mismatch word32 and mp_digit */
16172                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
16173             }
16174             else {
16175                 r[++j] = 0L;
16176             }
16177         }
16178         s = (word32)DIGIT_BIT - s;
16179     }
16180 
16181     for (j++; j < size; j++) {
16182         r[j] = 0;
16183     }
16184 #else
16185     int i, j = 0, s = 0;
16186 
16187     r[0] = 0;
16188     for (i = 0; i < a->used && j < size; i++) {
16189         r[j] |= ((sp_digit)a->dp[i]) << s;
16190         if (s + DIGIT_BIT >= 32) {
16191             r[j] &= 0xffffffff;
16192             if (j + 1 >= size) {
16193                 break;
16194             }
16195             s = 32 - s;
16196             if (s == DIGIT_BIT) {
16197                 r[++j] = 0;
16198                 s = 0;
16199             }
16200             else {
16201                 r[++j] = a->dp[i] >> s;
16202                 s = DIGIT_BIT - s;
16203             }
16204         }
16205         else {
16206             s += DIGIT_BIT;
16207         }
16208     }
16209 
16210     for (j++; j < size; j++) {
16211         r[j] = 0;
16212     }
16213 #endif
16214 }
16215 
16216 /* Convert a point of type ecc_point to type sp_point_256.
16217  *
16218  * p   Point of type sp_point_256 (result).
16219  * pm  Point of type ecc_point.
16220  */
16221 static void sp_256_point_from_ecc_point_8(sp_point_256* p, const ecc_point* pm)
16222 {
16223     XMEMSET(p->x, 0, sizeof(p->x));
16224     XMEMSET(p->y, 0, sizeof(p->y));
16225     XMEMSET(p->z, 0, sizeof(p->z));
16226     sp_256_from_mp(p->x, 8, pm->x);
16227     sp_256_from_mp(p->y, 8, pm->y);
16228     sp_256_from_mp(p->z, 8, pm->z);
16229     p->infinity = 0;
16230 }
16231 
16232 /* Convert an array of sp_digit to an mp_int.
16233  *
16234  * a  A single precision integer.
16235  * r  A multi-precision integer.
16236  */
16237 static int sp_256_to_mp(const sp_digit* a, mp_int* r)
16238 {
16239     int err;
16240 
16241     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
16242     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
16243 #if DIGIT_BIT == 32
16244         XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
16245         r->used = 8;
16246         mp_clamp(r);
16247 #elif DIGIT_BIT < 32
16248         int i, j = 0, s = 0;
16249 
16250         r->dp[0] = 0;
16251         for (i = 0; i < 8; i++) {
16252             r->dp[j] |= (mp_digit)(a[i] << s);
16253             r->dp[j] &= (1L << DIGIT_BIT) - 1;
16254             s = DIGIT_BIT - s;
16255             r->dp[++j] = (mp_digit)(a[i] >> s);
16256             while (s + DIGIT_BIT <= 32) {
16257                 s += DIGIT_BIT;
16258                 r->dp[j++] &= (1L << DIGIT_BIT) - 1;
16259                 if (s == SP_WORD_SIZE) {
16260                     r->dp[j] = 0;
16261                 }
16262                 else {
16263                     r->dp[j] = (mp_digit)(a[i] >> s);
16264                 }
16265             }
16266             s = 32 - s;
16267         }
16268         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
16269         mp_clamp(r);
16270 #else
16271         int i, j = 0, s = 0;
16272 
16273         r->dp[0] = 0;
16274         for (i = 0; i < 8; i++) {
16275             r->dp[j] |= ((mp_digit)a[i]) << s;
16276             if (s + 32 >= DIGIT_BIT) {
16277     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
16278                 r->dp[j] &= (1L << DIGIT_BIT) - 1;
16279     #endif
16280                 s = DIGIT_BIT - s;
16281                 r->dp[++j] = a[i] >> s;
16282                 s = 32 - s;
16283             }
16284             else {
16285                 s += 32;
16286             }
16287         }
16288         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
16289         mp_clamp(r);
16290 #endif
16291     }
16292 
16293     return err;
16294 }
16295 
16296 /* Convert a point of type sp_point_256 to type ecc_point.
16297  *
16298  * p   Point of type sp_point_256.
16299  * pm  Point of type ecc_point (result).
16300  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
16301  * MP_OKAY.
16302  */
16303 static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
16304 {
16305     int err;
16306 
16307     err = sp_256_to_mp(p->x, pm->x);
16308     if (err == MP_OKAY) {
16309         err = sp_256_to_mp(p->y, pm->y);
16310     }
16311     if (err == MP_OKAY) {
16312         err = sp_256_to_mp(p->z, pm->z);
16313     }
16314 
16315     return err;
16316 }
16317 
16318 /* Multiply a and b into r. (r = a * b)
16319  *
16320  * r  A single precision integer.
16321  * a  A single precision integer.
16322  * b  A single precision integer.
16323  */
16324 SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
16325         const sp_digit* b)
16326 {
16327     sp_digit tmp[8 * 2];
16328     __asm__ __volatile__ (
16329         "mov    r3, #0\n\t"
16330         "mov    r4, #0\n\t"
16331         "mov    r8, r3\n\t"
16332         "mov    r11, %[r]\n\t"
16333         "mov    r9, %[a]\n\t"
16334         "mov    r10, %[b]\n\t"
16335         "mov    r6, #32\n\t"
16336         "add    r6, r9\n\t"
16337         "mov    r12, r6\n\t"
16338         "\n1:\n\t"
16339         "mov    %[r], #0\n\t"
16340         "mov    r5, #0\n\t"
16341         "mov    r6, #28\n\t"
16342         "mov    %[a], r8\n\t"
16343         "sub    %[a], r6\n\t"
16344         "sbc    r6, r6\n\t"
16345         "mvn    r6, r6\n\t"
16346         "and    %[a], r6\n\t"
16347         "mov    %[b], r8\n\t"
16348         "sub    %[b], %[a]\n\t"
16349         "add    %[a], r9\n\t"
16350         "add    %[b], r10\n\t"
16351         "\n2:\n\t"
16352         "# Multiply Start\n\t"
16353         "ldr    r6, [%[a]]\n\t"
16354         "ldr    r7, [%[b]]\n\t"
16355         "lsl    r6, r6, #16\n\t"
16356         "lsl    r7, r7, #16\n\t"
16357         "lsr    r6, r6, #16\n\t"
16358         "lsr    r7, r7, #16\n\t"
16359         "mul    r7, r6\n\t"
16360         "add    r3, r7\n\t"
16361         "adc    r4, %[r]\n\t"
16362         "adc    r5, %[r]\n\t"
16363         "ldr    r7, [%[b]]\n\t"
16364         "lsr    r7, r7, #16\n\t"
16365         "mul    r6, r7\n\t"
16366         "lsr    r7, r6, #16\n\t"
16367         "lsl    r6, r6, #16\n\t"
16368         "add    r3, r6\n\t"
16369         "adc    r4, r7\n\t"
16370         "adc    r5, %[r]\n\t"
16371         "ldr    r6, [%[a]]\n\t"
16372         "ldr    r7, [%[b]]\n\t"
16373         "lsr    r6, r6, #16\n\t"
16374         "lsr    r7, r7, #16\n\t"
16375         "mul    r7, r6\n\t"
16376         "add    r4, r7\n\t"
16377         "adc    r5, %[r]\n\t"
16378         "ldr    r7, [%[b]]\n\t"
16379         "lsl    r7, r7, #16\n\t"
16380         "lsr    r7, r7, #16\n\t"
16381         "mul    r6, r7\n\t"
16382         "lsr    r7, r6, #16\n\t"
16383         "lsl    r6, r6, #16\n\t"
16384         "add    r3, r6\n\t"
16385         "adc    r4, r7\n\t"
16386         "adc    r5, %[r]\n\t"
16387         "# Multiply Done\n\t"
16388         "add    %[a], #4\n\t"
16389         "sub    %[b], #4\n\t"
16390         "cmp    %[a], r12\n\t"
16391         "beq    3f\n\t"
16392         "mov    r6, r8\n\t"
16393         "add    r6, r9\n\t"
16394         "cmp    %[a], r6\n\t"
16395         "ble    2b\n\t"
16396         "\n3:\n\t"
16397         "mov    %[r], r11\n\t"
16398         "mov    r7, r8\n\t"
16399         "str    r3, [%[r], r7]\n\t"
16400         "mov    r3, r4\n\t"
16401         "mov    r4, r5\n\t"
16402         "add    r7, #4\n\t"
16403         "mov    r8, r7\n\t"
16404         "mov    r6, #56\n\t"
16405         "cmp    r7, r6\n\t"
16406         "ble    1b\n\t"
16407         "str    r3, [%[r], r7]\n\t"
16408         "mov    %[a], r9\n\t"
16409         "mov    %[b], r10\n\t"
16410         :
16411         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
16412         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
16413     );
16414 
16415     XMEMCPY(r, tmp, sizeof(tmp));
16416 }
16417 
16418 /* Conditionally subtract b from a using the mask m.
16419  * m is -1 to subtract and 0 when not copying.
16420  *
16421  * r  A single precision number representing condition subtract result.
16422  * a  A single precision number to subtract from.
16423  * b  A single precision number to subtract.
16424  * m  Mask value to apply.
16425  */
16426 SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a,
16427         const sp_digit* b, sp_digit m)
16428 {
16429     sp_digit c = 0;
16430 
16431     __asm__ __volatile__ (
16432         "mov    r5, #32\n\t"
16433         "mov    r8, r5\n\t"
16434         "mov    r7, #0\n\t"
16435         "1:\n\t"
16436         "ldr    r6, [%[b], r7]\n\t"
16437         "and    r6, %[m]\n\t"
16438         "mov    r5, #0\n\t"
16439         "sub    r5, %[c]\n\t"
16440         "ldr    r5, [%[a], r7]\n\t"
16441         "sbc    r5, r6\n\t"
16442         "sbc    %[c], %[c]\n\t"
16443         "str    r5, [%[r], r7]\n\t"
16444         "add    r7, #4\n\t"
16445         "cmp    r7, r8\n\t"
16446         "blt    1b\n\t"
16447         : [c] "+r" (c)
16448         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
16449         : "memory", "r5", "r6", "r7", "r8"
16450     );
16451 
16452     return c;
16453 }
16454 
16455 /* Reduce the number back to 256 bits using Montgomery reduction.
16456  *
16457  * a   A single precision number to reduce in place.
16458  * m   The single precision number representing the modulus.
16459  * mp  The digit representing the negative inverse of m mod 2^n.
16460  */
16461 SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
16462         sp_digit mp)
16463 {
16464     (void)mp;
16465     (void)m;
16466 
16467     __asm__ __volatile__ (
16468         "mov    r2, #0\n\t"
16469         "mov    r1, #0\n\t"
16470         "# i = 0\n\t"
16471         "mov    r8, r2\n\t"
16472         "\n1:\n\t"
16473         "mov    r4, #0\n\t"
16474         "# mu = a[i] * 1 (mp) = a[i]\n\t"
16475         "ldr    r3, [%[a]]\n\t"
16476         "# a[i+0] += -1 * mu\n\t"
16477         "mov    r5, r3\n\t"
16478         "str    r4, [%[a], #0]\n\t"
16479         "# a[i+1] += -1 * mu\n\t"
16480         "ldr    r6, [%[a], #4]\n\t"
16481         "mov    r4, r3\n\t"
16482         "sub    r5, r3\n\t"
16483         "sbc    r4, r2\n\t"
16484         "add    r5, r6\n\t"
16485         "adc    r4, r2\n\t"
16486         "str    r5, [%[a], #4]\n\t"
16487         "# a[i+2] += -1 * mu\n\t"
16488         "ldr    r6, [%[a], #8]\n\t"
16489         "mov    r5, r3\n\t"
16490         "sub    r4, r3\n\t"
16491         "sbc    r5, r2\n\t"
16492         "add    r4, r6\n\t"
16493         "adc    r5, r2\n\t"
16494         "str    r4, [%[a], #8]\n\t"
16495         "# a[i+3] += 0 * mu\n\t"
16496         "ldr    r6, [%[a], #12]\n\t"
16497         "mov    r4, #0\n\t"
16498         "add    r5, r6\n\t"
16499         "adc    r4, r2\n\t"
16500         "str    r5, [%[a], #12]\n\t"
16501         "# a[i+4] += 0 * mu\n\t"
16502         "ldr    r6, [%[a], #16]\n\t"
16503         "mov    r5, #0\n\t"
16504         "add    r4, r6\n\t"
16505         "adc    r5, r2\n\t"
16506         "str    r4, [%[a], #16]\n\t"
16507         "# a[i+5] += 0 * mu\n\t"
16508         "ldr    r6, [%[a], #20]\n\t"
16509         "mov    r4, #0\n\t"
16510         "add    r5, r6\n\t"
16511         "adc    r4, r2\n\t"
16512         "str    r5, [%[a], #20]\n\t"
16513         "# a[i+6] += 1 * mu\n\t"
16514         "ldr    r6, [%[a], #24]\n\t"
16515         "mov    r5, #0\n\t"
16516         "add    r4, r3\n\t"
16517         "adc    r5, r2\n\t"
16518         "add    r4, r6\n\t"
16519         "adc    r5, r2\n\t"
16520         "str    r4, [%[a], #24]\n\t"
16521         "# a[i+7] += -1 * mu\n\t"
16522         "ldr    r6, [%[a], #28]\n\t"
16523         "ldr    r7, [%[a], #32]\n\t"
16524         "add    r4, r1, r3\n\t"
16525         "mov    r1, #0\n\t"
16526         "adc    r1, r2\n\t"
16527         "sub    r5, r3\n\t"
16528         "sbc    r4, r2\n\t"
16529         "sbc    r1, r2\n\t"
16530         "add    r5, r6\n\t"
16531         "adc    r4, r7\n\t"
16532         "adc    r1, r2\n\t"
16533         "str    r5, [%[a],  #28]\n\t"
16534         "str    r4, [%[a], #32]\n\t"
16535         "# i += 1\n\t"
16536         "mov    r6, #4\n\t"
16537         "add    r8, r6\n\t"
16538         "add    %[a], #4\n\t"
16539         "mov    r6, #32\n\t"
16540         "cmp    r8, r6\n\t"
16541         "blt    1b\n\t"
16542         "sub    %[a], #32\n\t"
16543         "mov    r3, r1\n\t"
16544         "sub    r1, #1\n\t"
16545         "mvn    r1, r1\n\t"
16546         "ldr    r5, [%[a],#32]\n\t"
16547         "ldr    r4, [%[a],#36]\n\t"
16548         "ldr    r6, [%[a],#40]\n\t"
16549         "ldr    r7, [%[a],#44]\n\t"
16550         "sub    r5, r1\n\t"
16551         "sbc    r4, r1\n\t"
16552         "sbc    r6, r1\n\t"
16553         "sbc    r7, r2\n\t"
16554         "str    r5, [%[a],#0]\n\t"
16555         "str    r4, [%[a],#4]\n\t"
16556         "str    r6, [%[a],#8]\n\t"
16557         "str    r7, [%[a],#12]\n\t"
16558         "ldr    r5, [%[a],#48]\n\t"
16559         "ldr    r4, [%[a],#52]\n\t"
16560         "ldr    r6, [%[a],#56]\n\t"
16561         "ldr    r7, [%[a],#60]\n\t"
16562         "sbc    r5, r2\n\t"
16563         "sbc    r4, r2\n\t"
16564         "sbc    r6, r3\n\t"
16565         "sbc    r7, r1\n\t"
16566         "str    r5, [%[a],#16]\n\t"
16567         "str    r4, [%[a],#20]\n\t"
16568         "str    r6, [%[a],#24]\n\t"
16569         "str    r7, [%[a],#28]\n\t"
16570         : [a] "+r" (a)
16571         :
16572         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
16573     );
16574 
16575 
16576     (void)m;
16577     (void)mp;
16578 }
16579 
16580 /* Reduce the number back to 256 bits using Montgomery reduction.
16581  *
16582  * a   A single precision number to reduce in place.
16583  * m   The single precision number representing the modulus.
16584  * mp  The digit representing the negative inverse of m mod 2^n.
16585  */
16586 SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m,
16587         sp_digit mp)
16588 {
16589     sp_digit ca = 0;
16590 
16591     __asm__ __volatile__ (
16592         "mov    r8, %[mp]\n\t"
16593         "mov    r12, %[ca]\n\t"
16594         "mov    r14, %[m]\n\t"
16595         "mov    r9, %[a]\n\t"
16596         "mov    r4, #0\n\t"
16597         "# i = 0\n\t"
16598         "mov    r11, r4\n\t"
16599         "\n1:\n\t"
16600         "mov    r5, #0\n\t"
16601         "mov    %[ca], #0\n\t"
16602         "# mu = a[i] * mp\n\t"
16603         "mov    %[mp], r8\n\t"
16604         "ldr    %[a], [%[a]]\n\t"
16605         "mul    %[mp], %[a]\n\t"
16606         "mov    %[m], r14\n\t"
16607         "mov    r10, r9\n\t"
16608         "\n2:\n\t"
16609         "# a[i+j] += m[j] * mu\n\t"
16610         "mov    %[a], r10\n\t"
16611         "ldr    %[a], [%[a]]\n\t"
16612         "mov    %[ca], #0\n\t"
16613         "mov    r4, r5\n\t"
16614         "mov    r5, #0\n\t"
16615         "# Multiply m[j] and mu - Start\n\t"
16616         "ldr    r7, [%[m]]\n\t"
16617         "lsl    r6, %[mp], #16\n\t"
16618         "lsl    r7, r7, #16\n\t"
16619         "lsr    r6, r6, #16\n\t"
16620         "lsr    r7, r7, #16\n\t"
16621         "mul    r7, r6\n\t"
16622         "add    %[a], r7\n\t"
16623         "adc    r5, %[ca]\n\t"
16624         "ldr    r7, [%[m]]\n\t"
16625         "lsr    r7, r7, #16\n\t"
16626         "mul    r6, r7\n\t"
16627         "lsr    r7, r6, #16\n\t"
16628         "lsl    r6, r6, #16\n\t"
16629         "add    %[a], r6\n\t"
16630         "adc    r5, r7\n\t"
16631         "ldr    r7, [%[m]]\n\t"
16632         "lsr    r6, %[mp], #16\n\t"
16633         "lsr    r7, r7, #16\n\t"
16634         "mul    r7, r6\n\t"
16635         "add    r5, r7\n\t"
16636         "ldr    r7, [%[m]]\n\t"
16637         "lsl    r7, r7, #16\n\t"
16638         "lsr    r7, r7, #16\n\t"
16639         "mul    r6, r7\n\t"
16640         "lsr    r7, r6, #16\n\t"
16641         "lsl    r6, r6, #16\n\t"
16642         "add    %[a], r6\n\t"
16643         "adc    r5, r7\n\t"
16644         "# Multiply m[j] and mu - Done\n\t"
16645         "add    r4, %[a]\n\t"
16646         "adc    r5, %[ca]\n\t"
16647         "mov    %[a], r10\n\t"
16648         "str    r4, [%[a]]\n\t"
16649         "mov    r6, #4\n\t"
16650         "add    %[m], #4\n\t"
16651         "add    r10, r6\n\t"
16652         "mov    r4, #28\n\t"
16653         "add    r4, r9\n\t"
16654         "cmp    r10, r4\n\t"
16655         "blt    2b\n\t"
16656         "# a[i+7] += m[7] * mu\n\t"
16657         "mov    %[ca], #0\n\t"
16658         "mov    r4, r12\n\t"
16659         "mov    %[a], #0\n\t"
16660         "# Multiply m[7] and mu - Start\n\t"
16661         "ldr    r7, [%[m]]\n\t"
16662         "lsl    r6, %[mp], #16\n\t"
16663         "lsl    r7, r7, #16\n\t"
16664         "lsr    r6, r6, #16\n\t"
16665         "lsr    r7, r7, #16\n\t"
16666         "mul    r7, r6\n\t"
16667         "add    r5, r7\n\t"
16668         "adc    r4, %[ca]\n\t"
16669         "adc    %[a], %[ca]\n\t"
16670         "ldr    r7, [%[m]]\n\t"
16671         "lsr    r7, r7, #16\n\t"
16672         "mul    r6, r7\n\t"
16673         "lsr    r7, r6, #16\n\t"
16674         "lsl    r6, r6, #16\n\t"
16675         "add    r5, r6\n\t"
16676         "adc    r4, r7\n\t"
16677         "adc    %[a], %[ca]\n\t"
16678         "ldr    r7, [%[m]]\n\t"
16679         "lsr    r6, %[mp], #16\n\t"
16680         "lsr    r7, r7, #16\n\t"
16681         "mul    r7, r6\n\t"
16682         "add    r4, r7\n\t"
16683         "adc    %[a], %[ca]\n\t"
16684         "ldr    r7, [%[m]]\n\t"
16685         "lsl    r7, r7, #16\n\t"
16686         "lsr    r7, r7, #16\n\t"
16687         "mul    r6, r7\n\t"
16688         "lsr    r7, r6, #16\n\t"
16689         "lsl    r6, r6, #16\n\t"
16690         "add    r5, r6\n\t"
16691         "adc    r4, r7\n\t"
16692         "adc    %[a], %[ca]\n\t"
16693         "# Multiply m[7] and mu - Done\n\t"
16694         "mov    %[ca], %[a]\n\t"
16695         "mov    %[a], r10\n\t"
16696         "ldr    r7, [%[a], #4]\n\t"
16697         "ldr    %[a], [%[a]]\n\t"
16698         "mov    r6, #0\n\t"
16699         "add    r5, %[a]\n\t"
16700         "adc    r7, r4\n\t"
16701         "adc    %[ca], r6\n\t"
16702         "mov    %[a], r10\n\t"
16703         "str    r5, [%[a]]\n\t"
16704         "str    r7, [%[a], #4]\n\t"
16705         "# i += 1\n\t"
16706         "mov    r6, #4\n\t"
16707         "add    r9, r6\n\t"
16708         "add    r11, r6\n\t"
16709         "mov    r12, %[ca]\n\t"
16710         "mov    %[a], r9\n\t"
16711         "mov    r4, #32\n\t"
16712         "cmp    r11, r4\n\t"
16713         "blt    1b\n\t"
16714         "mov    %[m], r14\n\t"
16715         : [ca] "+r" (ca), [a] "+r" (a)
16716         : [m] "r" (m), [mp] "r" (mp)
16717         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
16718     );
16719 
16720     sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
16721 }
16722 
16723 /* Multiply two Montogmery form numbers mod the modulus (prime).
16724  * (r = a * b mod m)
16725  *
16726  * r   Result of multiplication.
16727  * a   First number to multiply in Montogmery form.
16728  * b   Second number to multiply in Montogmery form.
16729  * m   Modulus (prime).
16730  * mp  Montogmery mulitplier.
16731  */
16732 static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
16733         const sp_digit* m, sp_digit mp)
16734 {
16735     sp_256_mul_8(r, a, b);
16736     sp_256_mont_reduce_8(r, m, mp);
16737 }
16738 
16739 /* Square a and put result in r. (r = a * a)
16740  *
16741  * r  A single precision integer.
16742  * a  A single precision integer.
16743  */
16744 SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
16745 {
16746     __asm__ __volatile__ (
16747         "mov    r3, #0\n\t"
16748         "mov    r4, #0\n\t"
16749         "mov    r5, #0\n\t"
16750         "mov    r8, r3\n\t"
16751         "mov    r11, %[r]\n\t"
16752         "mov    r6, #64\n\t"
16753         "neg    r6, r6\n\t"
16754         "add    sp, r6\n\t"
16755         "mov    r10, sp\n\t"
16756         "mov    r9, %[a]\n\t"
16757         "\n1:\n\t"
16758         "mov    %[r], #0\n\t"
16759         "mov    r6, #28\n\t"
16760         "mov    %[a], r8\n\t"
16761         "sub    %[a], r6\n\t"
16762         "sbc    r6, r6\n\t"
16763         "mvn    r6, r6\n\t"
16764         "and    %[a], r6\n\t"
16765         "mov    r2, r8\n\t"
16766         "sub    r2, %[a]\n\t"
16767         "add    %[a], r9\n\t"
16768         "add    r2, r9\n\t"
16769         "\n2:\n\t"
16770         "cmp    r2, %[a]\n\t"
16771         "beq    4f\n\t"
16772         "# Multiply * 2: Start\n\t"
16773         "ldr    r6, [%[a]]\n\t"
16774         "ldr    r7, [r2]\n\t"
16775         "lsl    r6, r6, #16\n\t"
16776         "lsl    r7, r7, #16\n\t"
16777         "lsr    r6, r6, #16\n\t"
16778         "lsr    r7, r7, #16\n\t"
16779         "mul    r7, r6\n\t"
16780         "add    r3, r7\n\t"
16781         "adc    r4, %[r]\n\t"
16782         "adc    r5, %[r]\n\t"
16783         "add    r3, r7\n\t"
16784         "adc    r4, %[r]\n\t"
16785         "adc    r5, %[r]\n\t"
16786         "ldr    r7, [r2]\n\t"
16787         "lsr    r7, r7, #16\n\t"
16788         "mul    r6, r7\n\t"
16789         "lsr    r7, r6, #16\n\t"
16790         "lsl    r6, r6, #16\n\t"
16791         "add    r3, r6\n\t"
16792         "adc    r4, r7\n\t"
16793         "adc    r5, %[r]\n\t"
16794         "add    r3, r6\n\t"
16795         "adc    r4, r7\n\t"
16796         "adc    r5, %[r]\n\t"
16797         "ldr    r6, [%[a]]\n\t"
16798         "ldr    r7, [r2]\n\t"
16799         "lsr    r6, r6, #16\n\t"
16800         "lsr    r7, r7, #16\n\t"
16801         "mul    r7, r6\n\t"
16802         "add    r4, r7\n\t"
16803         "adc    r5, %[r]\n\t"
16804         "add    r4, r7\n\t"
16805         "adc    r5, %[r]\n\t"
16806         "ldr    r7, [r2]\n\t"
16807         "lsl    r7, r7, #16\n\t"
16808         "lsr    r7, r7, #16\n\t"
16809         "mul    r6, r7\n\t"
16810         "lsr    r7, r6, #16\n\t"
16811         "lsl    r6, r6, #16\n\t"
16812         "add    r3, r6\n\t"
16813         "adc    r4, r7\n\t"
16814         "adc    r5, %[r]\n\t"
16815         "add    r3, r6\n\t"
16816         "adc    r4, r7\n\t"
16817         "adc    r5, %[r]\n\t"
16818         "# Multiply * 2: Done\n\t"
16819         "bal    5f\n\t"
16820         "\n4:\n\t"
16821         "# Square: Start\n\t"
16822         "ldr    r6, [%[a]]\n\t"
16823         "lsr    r7, r6, #16\n\t"
16824         "lsl    r6, r6, #16\n\t"
16825         "lsr    r6, r6, #16\n\t"
16826         "mul    r6, r6\n\t"
16827         "add    r3, r6\n\t"
16828         "adc    r4, %[r]\n\t"
16829         "adc    r5, %[r]\n\t"
16830         "mul    r7, r7\n\t"
16831         "add    r4, r7\n\t"
16832         "adc    r5, %[r]\n\t"
16833         "ldr    r6, [%[a]]\n\t"
16834         "lsr    r7, r6, #16\n\t"
16835         "lsl    r6, r6, #16\n\t"
16836         "lsr    r6, r6, #16\n\t"
16837         "mul    r6, r7\n\t"
16838         "lsr    r7, r6, #15\n\t"
16839         "lsl    r6, r6, #17\n\t"
16840         "add    r3, r6\n\t"
16841         "adc    r4, r7\n\t"
16842         "adc    r5, %[r]\n\t"
16843         "# Square: Done\n\t"
16844         "\n5:\n\t"
16845         "add    %[a], #4\n\t"
16846         "sub    r2, #4\n\t"
16847         "mov    r6, #32\n\t"
16848         "add    r6, r9\n\t"
16849         "cmp    %[a], r6\n\t"
16850         "beq    3f\n\t"
16851         "cmp    %[a], r2\n\t"
16852         "bgt    3f\n\t"
16853         "mov    r7, r8\n\t"
16854         "add    r7, r9\n\t"
16855         "cmp    %[a], r7\n\t"
16856         "ble    2b\n\t"
16857         "\n3:\n\t"
16858         "mov    %[r], r10\n\t"
16859         "mov    r7, r8\n\t"
16860         "str    r3, [%[r], r7]\n\t"
16861         "mov    r3, r4\n\t"
16862         "mov    r4, r5\n\t"
16863         "mov    r5, #0\n\t"
16864         "add    r7, #4\n\t"
16865         "mov    r8, r7\n\t"
16866         "mov    r6, #56\n\t"
16867         "cmp    r7, r6\n\t"
16868         "ble    1b\n\t"
16869         "mov    %[a], r9\n\t"
16870         "str    r3, [%[r], r7]\n\t"
16871         "mov    %[r], r11\n\t"
16872         "mov    %[a], r10\n\t"
16873         "mov    r3, #60\n\t"
16874         "\n4:\n\t"
16875         "ldr    r6, [%[a], r3]\n\t"
16876         "str    r6, [%[r], r3]\n\t"
16877         "sub    r3, #4\n\t"
16878         "bge    4b\n\t"
16879         "mov    r6, #64\n\t"
16880         "add    sp, r6\n\t"
16881         :
16882         : [r] "r" (r), [a] "r" (a)
16883         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
16884     );
16885 }
16886 
16887 /* Square the Montgomery form number. (r = a * a mod m)
16888  *
16889  * r   Result of squaring.
16890  * a   Number to square in Montogmery form.
16891  * m   Modulus (prime).
16892  * mp  Montogmery mulitplier.
16893  */
16894 static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
16895         sp_digit mp)
16896 {
16897     sp_256_sqr_8(r, a);
16898     sp_256_mont_reduce_8(r, m, mp);
16899 }
16900 
16901 #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
16902 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
16903  *
16904  * r   Result of squaring.
16905  * a   Number to square in Montogmery form.
16906  * n   Number of times to square.
16907  * m   Modulus (prime).
16908  * mp  Montogmery mulitplier.
16909  */
16910 static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
16911         const sp_digit* m, sp_digit mp)
16912 {
16913     sp_256_mont_sqr_8(r, a, m, mp);
16914     for (; n > 1; n--) {
16915         sp_256_mont_sqr_8(r, r, m, mp);
16916     }
16917 }
16918 
16919 #endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
16920 #ifdef WOLFSSL_SP_SMALL
16921 /* Mod-2 for the P256 curve. */
16922 static const uint32_t p256_mod_minus_2[8] = {
16923     0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
16924     0x00000001U,0xffffffffU
16925 };
16926 #endif /* !WOLFSSL_SP_SMALL */
16927 
16928 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
16929  * P256 curve. (r = 1 / a mod m)
16930  *
16931  * r   Inverse result.
16932  * a   Number to invert.
16933  * td  Temporary data.
16934  */
16935 static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
16936 {
16937 #ifdef WOLFSSL_SP_SMALL
16938     sp_digit* t = td;
16939     int i;
16940 
16941     XMEMCPY(t, a, sizeof(sp_digit) * 8);
16942     for (i=254; i>=0; i--) {
16943         sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
16944         if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
16945             sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
16946     }
16947     XMEMCPY(r, t, sizeof(sp_digit) * 8);
16948 #else
16949     sp_digit* t1 = td;
16950     sp_digit* t2 = td + 2 * 8;
16951     sp_digit* t3 = td + 4 * 8;
16952     /* 0x2 */
16953     sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
16954     /* 0x3 */
16955     sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
16956     /* 0xc */
16957     sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
16958     /* 0xd */
16959     sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
16960     /* 0xf */
16961     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
16962     /* 0xf0 */
16963     sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
16964     /* 0xfd */
16965     sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
16966     /* 0xff */
16967     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
16968     /* 0xff00 */
16969     sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
16970     /* 0xfffd */
16971     sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
16972     /* 0xffff */
16973     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
16974     /* 0xffff0000 */
16975     sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
16976     /* 0xfffffffd */
16977     sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
16978     /* 0xffffffff */
16979     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
16980     /* 0xffffffff00000000 */
16981     sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
16982     /* 0xffffffffffffffff */
16983     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
16984     /* 0xffffffff00000001 */
16985     sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
16986     /* 0xffffffff000000010000000000000000000000000000000000000000 */
16987     sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
16988     /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
16989     sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
16990     /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
16991     sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
16992     /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
16993     sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
16994 #endif /* WOLFSSL_SP_SMALL */
16995 }
16996 
16997 /* Compare a with b in constant time.
16998  *
16999  * a  A single precision integer.
17000  * b  A single precision integer.
17001  * return -ve, 0 or +ve if a is less than, equal to or greater than b
17002  * respectively.
17003  */
17004 SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
17005 {
17006     sp_digit r = 0;
17007 
17008 
17009     __asm__ __volatile__ (
17010         "mov    r3, #0\n\t"
17011         "mvn    r3, r3\n\t"
17012         "mov    r6, #28\n\t"
17013         "1:\n\t"
17014         "ldr    r7, [%[a], r6]\n\t"
17015         "ldr    r5, [%[b], r6]\n\t"
17016         "and    r7, r3\n\t"
17017         "and    r5, r3\n\t"
17018         "mov    r4, r7\n\t"
17019         "sub    r7, r5\n\t"
17020         "sbc    r7, r7\n\t"
17021         "add    %[r], r7\n\t"
17022         "mvn    r7, r7\n\t"
17023         "and    r3, r7\n\t"
17024         "sub    r5, r4\n\t"
17025         "sbc    r7, r7\n\t"
17026         "sub    %[r], r7\n\t"
17027         "mvn    r7, r7\n\t"
17028         "and    r3, r7\n\t"
17029         "sub    r6, #4\n\t"
17030         "cmp    r6, #0\n\t"
17031         "bge    1b\n\t"
17032         : [r] "+r" (r)
17033         : [a] "r" (a), [b] "r" (b)
17034         : "r3", "r4", "r5", "r6", "r7"
17035     );
17036 
17037     return r;
17038 }
17039 
17040 /* Normalize the values in each word to 32.
17041  *
17042  * a  Array of sp_digit to normalize.
17043  */
17044 #define sp_256_norm_8(a)
17045 
17046 /* Map the Montgomery form projective coordinate point to an affine point.
17047  *
17048  * r  Resulting affine coordinate point.
17049  * p  Montgomery form projective coordinate point.
17050  * t  Temporary ordinate data.
17051  */
17052 static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
17053 {
17054     sp_digit* t1 = t;
17055     sp_digit* t2 = t + 2*8;
17056     int32_t n;
17057 
17058     sp_256_mont_inv_8(t1, p->z, t + 2*8);
17059 
17060     sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
17061     sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
17062 
17063     /* x /= z^2 */
17064     sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
17065     XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
17066     sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
17067     /* Reduce x to less than modulus */
17068     n = sp_256_cmp_8(r->x, p256_mod);
17069     sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
17070                 (sp_digit)1 : (sp_digit)0));
17071     sp_256_norm_8(r->x);
17072 
17073     /* y /= z^3 */
17074     sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
17075     XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
17076     sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
17077     /* Reduce y to less than modulus */
17078     n = sp_256_cmp_8(r->y, p256_mod);
17079     sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
17080                 (sp_digit)1 : (sp_digit)0));
17081     sp_256_norm_8(r->y);
17082 
17083     XMEMSET(r->z, 0, sizeof(r->z));
17084     r->z[0] = 1;
17085 
17086 }
17087 
17088 #ifdef WOLFSSL_SP_SMALL
17089 /* Add b to a into r. (r = a + b)
17090  *
17091  * r  A single precision integer.
17092  * a  A single precision integer.
17093  * b  A single precision integer.
17094  */
17095 SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
17096         const sp_digit* b)
17097 {
17098     sp_digit c = 0;
17099 
17100     __asm__ __volatile__ (
17101         "mov    r6, %[a]\n\t"
17102         "mov    r7, #0\n\t"
17103         "add    r6, #32\n\t"
17104         "sub    r7, #1\n\t"
17105         "\n1:\n\t"
17106         "add    %[c], r7\n\t"
17107         "ldr    r4, [%[a]]\n\t"
17108         "ldr    r5, [%[b]]\n\t"
17109         "adc    r4, r5\n\t"
17110         "str    r4, [%[r]]\n\t"
17111         "mov    %[c], #0\n\t"
17112         "adc    %[c], %[c]\n\t"
17113         "add    %[a], #4\n\t"
17114         "add    %[b], #4\n\t"
17115         "add    %[r], #4\n\t"
17116         "cmp    %[a], r6\n\t"
17117         "bne    1b\n\t"
17118         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
17119         :
17120         : "memory", "r4", "r5", "r6", "r7"
17121     );
17122 
17123     return c;
17124 }
17125 
17126 #else
17127 /* Add b to a into r. (r = a + b)
17128  *
17129  * r  A single precision integer.
17130  * a  A single precision integer.
17131  * b  A single precision integer.
17132  */
17133 SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
17134         const sp_digit* b)
17135 {
17136     sp_digit c = 0;
17137 
17138     __asm__ __volatile__ (
17139         "ldr    r4, [%[a], #0]\n\t"
17140         "ldr    r5, [%[b], #0]\n\t"
17141         "add    r4, r5\n\t"
17142         "str    r4, [%[r], #0]\n\t"
17143         "ldr    r4, [%[a], #4]\n\t"
17144         "ldr    r5, [%[b], #4]\n\t"
17145         "adc    r4, r5\n\t"
17146         "str    r4, [%[r], #4]\n\t"
17147         "ldr    r4, [%[a], #8]\n\t"
17148         "ldr    r5, [%[b], #8]\n\t"
17149         "adc    r4, r5\n\t"
17150         "str    r4, [%[r], #8]\n\t"
17151         "ldr    r4, [%[a], #12]\n\t"
17152         "ldr    r5, [%[b], #12]\n\t"
17153         "adc    r4, r5\n\t"
17154         "str    r4, [%[r], #12]\n\t"
17155         "ldr    r4, [%[a], #16]\n\t"
17156         "ldr    r5, [%[b], #16]\n\t"
17157         "adc    r4, r5\n\t"
17158         "str    r4, [%[r], #16]\n\t"
17159         "ldr    r4, [%[a], #20]\n\t"
17160         "ldr    r5, [%[b], #20]\n\t"
17161         "adc    r4, r5\n\t"
17162         "str    r4, [%[r], #20]\n\t"
17163         "ldr    r4, [%[a], #24]\n\t"
17164         "ldr    r5, [%[b], #24]\n\t"
17165         "adc    r4, r5\n\t"
17166         "str    r4, [%[r], #24]\n\t"
17167         "ldr    r4, [%[a], #28]\n\t"
17168         "ldr    r5, [%[b], #28]\n\t"
17169         "adc    r4, r5\n\t"
17170         "str    r4, [%[r], #28]\n\t"
17171         "mov    %[c], #0\n\t"
17172         "adc    %[c], %[c]\n\t"
17173         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
17174         :
17175         : "memory", "r4", "r5"
17176     );
17177 
17178     return c;
17179 }
17180 
17181 #endif /* WOLFSSL_SP_SMALL */
17182 /* Add two Montgomery form numbers (r = a + b % m).
17183  *
17184  * r   Result of addition.
17185  * a   First number to add in Montogmery form.
17186  * b   Second number to add in Montogmery form.
17187  * m   Modulus (prime).
17188  */
17189 SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
17190         const sp_digit* m)
17191 {
17192     (void)m;
17193 
17194     __asm__ __volatile__ (
17195         "mov    r3, #0\n\t"
17196         "ldr    r4, [%[a],#0]\n\t"
17197         "ldr    r5, [%[a],#4]\n\t"
17198         "ldr    r6, [%[b],#0]\n\t"
17199         "ldr    r7, [%[b],#4]\n\t"
17200         "add    r4, r6\n\t"
17201         "adc    r5, r7\n\t"
17202         "str    r4, [%[r],#0]\n\t"
17203         "str    r5, [%[r],#4]\n\t"
17204         "ldr    r4, [%[a],#8]\n\t"
17205         "ldr    r5, [%[a],#12]\n\t"
17206         "ldr    r6, [%[b],#8]\n\t"
17207         "ldr    r7, [%[b],#12]\n\t"
17208         "adc    r4, r6\n\t"
17209         "adc    r5, r7\n\t"
17210         "str    r4, [%[r],#8]\n\t"
17211         "str    r5, [%[r],#12]\n\t"
17212         "ldr    r4, [%[a],#16]\n\t"
17213         "ldr    r5, [%[a],#20]\n\t"
17214         "ldr    r6, [%[b],#16]\n\t"
17215         "ldr    r7, [%[b],#20]\n\t"
17216         "adc    r4, r6\n\t"
17217         "adc    r5, r7\n\t"
17218         "mov    r8, r4\n\t"
17219         "mov    r9, r5\n\t"
17220         "ldr    r4, [%[a],#24]\n\t"
17221         "ldr    r5, [%[a],#28]\n\t"
17222         "ldr    r6, [%[b],#24]\n\t"
17223         "ldr    r7, [%[b],#28]\n\t"
17224         "adc    r4, r6\n\t"
17225         "adc    r5, r7\n\t"
17226         "mov    r10, r4\n\t"
17227         "mov    r11, r5\n\t"
17228         "adc    r3, r3\n\t"
17229         "mov    r6, r3\n\t"
17230         "sub    r3, #1\n\t"
17231         "mvn    r3, r3\n\t"
17232         "mov    r7, #0\n\t"
17233         "ldr    r4, [%[r],#0]\n\t"
17234         "ldr    r5, [%[r],#4]\n\t"
17235         "sub    r4, r3\n\t"
17236         "sbc    r5, r3\n\t"
17237         "str    r4, [%[r],#0]\n\t"
17238         "str    r5, [%[r],#4]\n\t"
17239         "ldr    r4, [%[r],#8]\n\t"
17240         "ldr    r5, [%[r],#12]\n\t"
17241         "sbc    r4, r3\n\t"
17242         "sbc    r5, r7\n\t"
17243         "str    r4, [%[r],#8]\n\t"
17244         "str    r5, [%[r],#12]\n\t"
17245         "mov    r4, r8\n\t"
17246         "mov    r5, r9\n\t"
17247         "sbc    r4, r7\n\t"
17248         "sbc    r5, r7\n\t"
17249         "str    r4, [%[r],#16]\n\t"
17250         "str    r5, [%[r],#20]\n\t"
17251         "mov    r4, r10\n\t"
17252         "mov    r5, r11\n\t"
17253         "sbc    r4, r6\n\t"
17254         "sbc    r5, r3\n\t"
17255         "str    r4, [%[r],#24]\n\t"
17256         "str    r5, [%[r],#28]\n\t"
17257         :
17258         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
17259         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
17260     );
17261 }
17262 
17263 /* Double a Montgomery form number (r = a + a % m).
17264  *
17265  * r   Result of doubling.
17266  * a   Number to double in Montogmery form.
17267  * m   Modulus (prime).
17268  */
17269 SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
17270 {
17271     (void)m;
17272 
17273     __asm__ __volatile__ (
17274         "ldr    r4, [%[a],#0]\n\t"
17275         "ldr    r5, [%[a],#4]\n\t"
17276         "ldr    r6, [%[a],#8]\n\t"
17277         "ldr    r7, [%[a],#12]\n\t"
17278         "add    r4, r4\n\t"
17279         "adc    r5, r5\n\t"
17280         "adc    r6, r6\n\t"
17281         "adc    r7, r7\n\t"
17282         "str    r4, [%[r],#0]\n\t"
17283         "str    r5, [%[r],#4]\n\t"
17284         "str    r6, [%[r],#8]\n\t"
17285         "str    r7, [%[r],#12]\n\t"
17286         "ldr    r4, [%[a],#16]\n\t"
17287         "ldr    r5, [%[a],#20]\n\t"
17288         "ldr    r6, [%[a],#24]\n\t"
17289         "ldr    r7, [%[a],#28]\n\t"
17290         "adc    r4, r4\n\t"
17291         "adc    r5, r5\n\t"
17292         "adc    r6, r6\n\t"
17293         "adc    r7, r7\n\t"
17294         "mov    r8, r4\n\t"
17295         "mov    r9, r5\n\t"
17296         "mov    r10, r6\n\t"
17297         "mov    r11, r7\n\t"
17298         "mov    r3, #0\n\t"
17299         "mov    r7, #0\n\t"
17300         "adc    r3, r3\n\t"
17301         "mov    r2, r3\n\t"
17302         "sub    r3, #1\n\t"
17303         "mvn    r3, r3\n\t"
17304         "ldr    r4, [%[r],#0]\n\t"
17305         "ldr    r5, [%[r],#4]\n\t"
17306         "ldr    r6, [%[r],#8]\n\t"
17307         "sub    r4, r3\n\t"
17308         "sbc    r5, r3\n\t"
17309         "sbc    r6, r3\n\t"
17310         "str    r4, [%[r],#0]\n\t"
17311         "str    r5, [%[r],#4]\n\t"
17312         "str    r6, [%[r],#8]\n\t"
17313         "ldr    r4, [%[r],#12]\n\t"
17314         "mov    r5, r8\n\t"
17315         "mov    r6, r9\n\t"
17316         "sbc    r4, r7\n\t"
17317         "sbc    r5, r7\n\t"
17318         "sbc    r6, r7\n\t"
17319         "str    r4, [%[r],#12]\n\t"
17320         "str    r5, [%[r],#16]\n\t"
17321         "str    r6, [%[r],#20]\n\t"
17322         "mov    r4, r10\n\t"
17323         "mov    r5, r11\n\t"
17324         "sbc    r4, r2\n\t"
17325         "sbc    r5, r3\n\t"
17326         "str    r4, [%[r],#24]\n\t"
17327         "str    r5, [%[r],#28]\n\t"
17328         :
17329         : [r] "r" (r), [a] "r" (a)
17330         : "memory", "r3", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
17331     );
17332 }
17333 
17334 /* Triple a Montgomery form number (r = a + a + a % m).
17335  *
17336  * r   Result of Tripling.
17337  * a   Number to triple in Montogmery form.
17338  * m   Modulus (prime).
17339  */
17340 SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
17341 {
17342     (void)m;
17343 
17344     __asm__ __volatile__ (
17345         "ldr   r6, [%[a],#0]\n\t"
17346         "ldr   r7, [%[a],#4]\n\t"
17347         "ldr   r4, [%[a],#8]\n\t"
17348         "ldr   r5, [%[a],#12]\n\t"
17349         "add   r6, r6\n\t"
17350         "adc   r7, r7\n\t"
17351         "adc   r4, r4\n\t"
17352         "adc   r5, r5\n\t"
17353         "mov   r8, r4\n\t"
17354         "mov   r9, r5\n\t"
17355         "ldr   r2, [%[a],#16]\n\t"
17356         "ldr   r3, [%[a],#20]\n\t"
17357         "ldr   r4, [%[a],#24]\n\t"
17358         "ldr   r5, [%[a],#28]\n\t"
17359         "adc   r2, r2\n\t"
17360         "adc   r3, r3\n\t"
17361         "adc   r4, r4\n\t"
17362         "adc   r5, r5\n\t"
17363         "mov   r10, r2\n\t"
17364         "mov   r11, r3\n\t"
17365         "mov   r12, r4\n\t"
17366         "mov   r14, r5\n\t"
17367         "mov   r3, #0\n\t"
17368         "mov   r5, #0\n\t"
17369         "adc   r3, r3\n\t"
17370         "mov   r4, r3\n\t"
17371         "sub   r3, #1\n\t"
17372         "mvn   r3, r3\n\t"
17373         "sub   r6, r3\n\t"
17374         "sbc   r7, r3\n\t"
17375         "mov   r2, r8\n\t"
17376         "sbc   r2, r3\n\t"
17377         "mov   r8, r2\n\t"
17378         "mov   r2, r9\n\t"
17379         "sbc   r2, r5\n\t"
17380         "mov   r9, r2\n\t"
17381         "mov   r2, r10\n\t"
17382         "sbc   r2, r5\n\t"
17383         "mov   r10, r2\n\t"
17384         "mov   r2, r11\n\t"
17385         "sbc   r2, r5\n\t"
17386         "mov   r11, r2\n\t"
17387         "mov   r2, r12\n\t"
17388         "sbc   r2, r4\n\t"
17389         "mov   r12, r2\n\t"
17390         "mov   r2, r14\n\t"
17391         "sbc   r2, r3\n\t"
17392         "mov   r14, r2\n\t"
17393         "ldr    r2, [%[a],#0]\n\t"
17394         "ldr    r3, [%[a],#4]\n\t"
17395         "add    r6, r2\n\t"
17396         "adc    r7, r3\n\t"
17397         "ldr    r2, [%[a],#8]\n\t"
17398         "ldr    r3, [%[a],#12]\n\t"
17399         "mov    r4, r8\n\t"
17400         "mov    r5, r9\n\t"
17401         "adc    r2, r4\n\t"
17402         "adc    r3, r5\n\t"
17403         "mov   r8, r2\n\t"
17404         "mov   r9, r3\n\t"
17405         "ldr    r2, [%[a],#16]\n\t"
17406         "ldr    r3, [%[a],#20]\n\t"
17407         "mov    r4, r10\n\t"
17408         "mov    r5, r11\n\t"
17409         "adc    r2, r4\n\t"
17410         "adc    r3, r5\n\t"
17411         "mov    r10, r2\n\t"
17412         "mov    r11, r3\n\t"
17413         "ldr    r2, [%[a],#24]\n\t"
17414         "ldr    r3, [%[a],#28]\n\t"
17415         "mov    r4, r12\n\t"
17416         "mov    r5, r14\n\t"
17417         "adc    r2, r4\n\t"
17418         "adc    r3, r5\n\t"
17419         "mov    r12, r2\n\t"
17420         "mov    r14, r3\n\t"
17421         "mov   r3, #0\n\t"
17422         "mov    r5, #0\n\t"
17423         "adc    r3, r3\n\t"
17424         "mov    r4, r3\n\t"
17425         "sub    r3, #1\n\t"
17426         "mvn    r3, r3\n\t"
17427         "sub    r6, r3\n\t"
17428         "str    r6, [%[r],#0]\n\t"
17429         "sbc    r7, r3\n\t"
17430         "str    r7, [%[r],#4]\n\t"
17431         "mov   r2, r8\n\t"
17432         "sbc   r2, r3\n\t"
17433         "str    r2, [%[r],#8]\n\t"
17434         "mov   r2, r9\n\t"
17435         "sbc   r2, r5\n\t"
17436         "str    r2, [%[r],#12]\n\t"
17437         "mov   r2, r10\n\t"
17438         "sbc   r2, r5\n\t"
17439         "str    r2, [%[r],#16]\n\t"
17440         "mov   r2, r11\n\t"
17441         "sbc   r2, r5\n\t"
17442         "str    r2, [%[r],#20]\n\t"
17443         "mov   r2, r12\n\t"
17444         "sbc   r2, r4\n\t"
17445         "str    r2, [%[r],#24]\n\t"
17446         "mov   r2, r14\n\t"
17447         "sbc   r2, r3\n\t"
17448         "str    r2, [%[r],#28]\n\t"
17449         :
17450         : [r] "r" (r), [a] "r" (a)
17451         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
17452     );
17453 }
17454 
17455 /* Subtract two Montgomery form numbers (r = a - b % m).
17456  *
17457  * r   Result of subtration.
17458  * a   Number to subtract from in Montogmery form.
17459  * b   Number to subtract with in Montogmery form.
17460  * m   Modulus (prime).
17461  */
17462 SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
17463         const sp_digit* m)
17464 {
17465     (void)m;
17466 
17467     __asm__ __volatile__ (
17468         "ldr    r4, [%[a],#0]\n\t"
17469         "ldr    r5, [%[a],#4]\n\t"
17470         "ldr    r6, [%[b],#0]\n\t"
17471         "ldr    r7, [%[b],#4]\n\t"
17472         "sub    r4, r6\n\t"
17473         "sbc    r5, r7\n\t"
17474         "str    r4, [%[r],#0]\n\t"
17475         "str    r5, [%[r],#4]\n\t"
17476         "ldr    r4, [%[a],#8]\n\t"
17477         "ldr    r5, [%[a],#12]\n\t"
17478         "ldr    r6, [%[b],#8]\n\t"
17479         "ldr    r7, [%[b],#12]\n\t"
17480         "sbc    r4, r6\n\t"
17481         "sbc    r5, r7\n\t"
17482         "str    r4, [%[r],#8]\n\t"
17483         "str    r5, [%[r],#12]\n\t"
17484         "ldr    r4, [%[a],#16]\n\t"
17485         "ldr    r5, [%[a],#20]\n\t"
17486         "ldr    r6, [%[b],#16]\n\t"
17487         "ldr    r7, [%[b],#20]\n\t"
17488         "sbc    r4, r6\n\t"
17489         "sbc    r5, r7\n\t"
17490         "mov    r8, r4\n\t"
17491         "mov    r9, r5\n\t"
17492         "ldr    r4, [%[a],#24]\n\t"
17493         "ldr    r5, [%[a],#28]\n\t"
17494         "ldr    r6, [%[b],#24]\n\t"
17495         "ldr    r7, [%[b],#28]\n\t"
17496         "sbc    r4, r6\n\t"
17497         "sbc    r5, r7\n\t"
17498         "mov    r10, r4\n\t"
17499         "mov    r11, r5\n\t"
17500         "sbc   r3, r3\n\t"
17501         "lsr   r7, r3, #31\n\t"
17502         "mov   r6, #0\n\t"
17503         "ldr    r4, [%[r],#0]\n\t"
17504         "ldr    r5, [%[r],#4]\n\t"
17505         "add    r4, r3\n\t"
17506         "adc    r5, r3\n\t"
17507         "str    r4, [%[r],#0]\n\t"
17508         "str    r5, [%[r],#4]\n\t"
17509         "ldr    r4, [%[r],#8]\n\t"
17510         "ldr    r5, [%[r],#12]\n\t"
17511         "adc    r4, r3\n\t"
17512         "adc    r5, r6\n\t"
17513         "str    r4, [%[r],#8]\n\t"
17514         "str    r5, [%[r],#12]\n\t"
17515         "mov    r4, r8\n\t"
17516         "mov    r5, r9\n\t"
17517         "adc    r4, r6\n\t"
17518         "adc    r5, r6\n\t"
17519         "str    r4, [%[r],#16]\n\t"
17520         "str    r5, [%[r],#20]\n\t"
17521         "mov    r4, r10\n\t"
17522         "mov    r5, r11\n\t"
17523         "adc    r4, r7\n\t"
17524         "adc    r5, r3\n\t"
17525         "str    r4, [%[r],#24]\n\t"
17526         "str    r5, [%[r],#28]\n\t"
17527         :
17528         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
17529         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
17530     );
17531 }
17532 
17533 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
17534  *
17535  * r  Result of division by 2.
17536  * a  Number to divide.
17537  * m  Modulus (prime).
17538  */
17539 SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
17540 {
17541     __asm__ __volatile__ (
17542         "ldr    r7, [%[a], #0]\n\t"
17543         "lsl    r7, r7, #31\n\t"
17544         "lsr    r7, r7, #31\n\t"
17545         "mov    r5, #0\n\t"
17546         "sub    r5, r7\n\t"
17547         "mov    r7, #0\n\t"
17548         "lsl    r6, r5, #31\n\t"
17549         "lsr    r6, r6, #31\n\t"
17550         "ldr    r3, [%[a], #0]\n\t"
17551         "ldr    r4, [%[a], #4]\n\t"
17552         "add    r3, r5\n\t"
17553         "adc    r4, r5\n\t"
17554         "str    r3, [%[r], #0]\n\t"
17555         "str    r4, [%[r], #4]\n\t"
17556         "ldr    r3, [%[a], #8]\n\t"
17557         "ldr    r4, [%[a], #12]\n\t"
17558         "adc    r3, r5\n\t"
17559         "adc    r4, r7\n\t"
17560         "str    r3, [%[r], #8]\n\t"
17561         "str    r4, [%[r], #12]\n\t"
17562         "ldr    r3, [%[a], #16]\n\t"
17563         "ldr    r4, [%[a], #20]\n\t"
17564         "adc    r3, r7\n\t"
17565         "adc    r4, r7\n\t"
17566         "str    r3, [%[r], #16]\n\t"
17567         "str    r4, [%[r], #20]\n\t"
17568         "ldr    r3, [%[a], #24]\n\t"
17569         "ldr    r4, [%[a], #28]\n\t"
17570         "adc    r3, r6\n\t"
17571         "adc    r4, r5\n\t"
17572         "adc    r7, r7\n\t"
17573         "lsl    r7, r7, #31\n\t"
17574         "lsr    r5, r3, #1\n\t"
17575         "lsl    r3, r3, #31\n\t"
17576         "lsr    r6, r4, #1\n\t"
17577         "lsl    r4, r4, #31\n\t"
17578         "orr    r5, r4\n\t"
17579         "orr    r6, r7\n\t"
17580         "mov    r7, r3\n\t"
17581         "str    r5, [%[r], #24]\n\t"
17582         "str    r6, [%[r], #28]\n\t"
17583         "ldr    r3, [%[a], #16]\n\t"
17584         "ldr    r4, [%[a], #20]\n\t"
17585         "lsr    r5, r3, #1\n\t"
17586         "lsl    r3, r3, #31\n\t"
17587         "lsr    r6, r4, #1\n\t"
17588         "lsl    r4, r4, #31\n\t"
17589         "orr    r5, r4\n\t"
17590         "orr    r6, r7\n\t"
17591         "mov    r7, r3\n\t"
17592         "str    r5, [%[r], #16]\n\t"
17593         "str    r6, [%[r], #20]\n\t"
17594         "ldr    r3, [%[a], #8]\n\t"
17595         "ldr    r4, [%[a], #12]\n\t"
17596         "lsr    r5, r3, #1\n\t"
17597         "lsl    r3, r3, #31\n\t"
17598         "lsr    r6, r4, #1\n\t"
17599         "lsl    r4, r4, #31\n\t"
17600         "orr    r5, r4\n\t"
17601         "orr    r6, r7\n\t"
17602         "mov    r7, r3\n\t"
17603         "str    r5, [%[r], #8]\n\t"
17604         "str    r6, [%[r], #12]\n\t"
17605         "ldr    r3, [%[r], #0]\n\t"
17606         "ldr    r4, [%[r], #4]\n\t"
17607         "lsr    r5, r3, #1\n\t"
17608         "lsr    r6, r4, #1\n\t"
17609         "lsl    r4, r4, #31\n\t"
17610         "orr    r5, r4\n\t"
17611         "orr    r6, r7\n\t"
17612         "str    r5, [%[r], #0]\n\t"
17613         "str    r6, [%[r], #4]\n\t"
17614         :
17615         : [r] "r" (r), [a] "r" (a), [m] "r" (m)
17616         : "memory", "r3", "r4", "r5", "r6", "r7"
17617     );
17618 }
17619 
17620 /* Double the Montgomery form projective point p.
17621  *
17622  * r  Result of doubling point.
17623  * p  Point to double.
17624  * t  Temporary ordinate data.
17625  */
17626 static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
17627 {
17628     sp_digit* t1 = t;
17629     sp_digit* t2 = t + 2*8;
17630     sp_digit* x;
17631     sp_digit* y;
17632     sp_digit* z;
17633 
17634     x = r->x;
17635     y = r->y;
17636     z = r->z;
17637     /* Put infinity into result. */
17638     if (r != p) {
17639         r->infinity = p->infinity;
17640     }
17641 
17642     /* T1 = Z * Z */
17643     sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
17644     /* Z = Y * Z */
17645     sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
17646     /* Z = 2Z */
17647     sp_256_mont_dbl_8(z, z, p256_mod);
17648     /* T2 = X - T1 */
17649     sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
17650     /* T1 = X + T1 */
17651     sp_256_mont_add_8(t1, p->x, t1, p256_mod);
17652     /* T2 = T1 * T2 */
17653     sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
17654     /* T1 = 3T2 */
17655     sp_256_mont_tpl_8(t1, t2, p256_mod);
17656     /* Y = 2Y */
17657     sp_256_mont_dbl_8(y, p->y, p256_mod);
17658     /* Y = Y * Y */
17659     sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
17660     /* T2 = Y * Y */
17661     sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
17662     /* T2 = T2/2 */
17663     sp_256_div2_8(t2, t2, p256_mod);
17664     /* Y = Y * X */
17665     sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
17666     /* X = T1 * T1 */
17667     sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
17668     /* X = X - Y */
17669     sp_256_mont_sub_8(x, x, y, p256_mod);
17670     /* X = X - Y */
17671     sp_256_mont_sub_8(x, x, y, p256_mod);
17672     /* Y = Y - X */
17673     sp_256_mont_sub_8(y, y, x, p256_mod);
17674     /* Y = Y * T1 */
17675     sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
17676     /* Y = Y - T2 */
17677     sp_256_mont_sub_8(y, y, t2, p256_mod);
17678 }
17679 
17680 #ifdef WOLFSSL_SP_SMALL
17681 /* Sub b from a into r. (r = a - b)
17682  *
17683  * r  A single precision integer.
17684  * a  A single precision integer.
17685  * b  A single precision integer.
17686  */
17687 SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
17688         const sp_digit* b)
17689 {
17690     sp_digit c = 0;
17691 
17692     __asm__ __volatile__ (
17693         "mov    r6, %[a]\n\t"
17694         "add    r6, #32\n\t"
17695         "\n1:\n\t"
17696         "mov    r5, #0\n\t"
17697         "sub    r5, %[c]\n\t"
17698         "ldr    r4, [%[a]]\n\t"
17699         "ldr    r5, [%[b]]\n\t"
17700         "sbc    r4, r5\n\t"
17701         "str    r4, [%[r]]\n\t"
17702         "sbc    %[c], %[c]\n\t"
17703         "add    %[a], #4\n\t"
17704         "add    %[b], #4\n\t"
17705         "add    %[r], #4\n\t"
17706         "cmp    %[a], r6\n\t"
17707         "bne    1b\n\t"
17708         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
17709         :
17710         : "memory", "r4", "r5", "r6"
17711     );
17712 
17713     return c;
17714 }
17715 
17716 #else
17717 /* Sub b from a into r. (r = a - b)
17718  *
17719  * r  A single precision integer.
17720  * a  A single precision integer.
17721  * b  A single precision integer.
17722  */
17723 SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
17724         const sp_digit* b)
17725 {
17726     sp_digit c = 0;
17727 
17728     __asm__ __volatile__ (
17729         "ldr    r4, [%[a], #0]\n\t"
17730         "ldr    r5, [%[a], #4]\n\t"
17731         "ldr    r6, [%[b], #0]\n\t"
17732         "ldr    r7, [%[b], #4]\n\t"
17733         "sub    r4, r6\n\t"
17734         "sbc    r5, r7\n\t"
17735         "str    r4, [%[r], #0]\n\t"
17736         "str    r5, [%[r], #4]\n\t"
17737         "ldr    r4, [%[a], #8]\n\t"
17738         "ldr    r5, [%[a], #12]\n\t"
17739         "ldr    r6, [%[b], #8]\n\t"
17740         "ldr    r7, [%[b], #12]\n\t"
17741         "sbc    r4, r6\n\t"
17742         "sbc    r5, r7\n\t"
17743         "str    r4, [%[r], #8]\n\t"
17744         "str    r5, [%[r], #12]\n\t"
17745         "ldr    r4, [%[a], #16]\n\t"
17746         "ldr    r5, [%[a], #20]\n\t"
17747         "ldr    r6, [%[b], #16]\n\t"
17748         "ldr    r7, [%[b], #20]\n\t"
17749         "sbc    r4, r6\n\t"
17750         "sbc    r5, r7\n\t"
17751         "str    r4, [%[r], #16]\n\t"
17752         "str    r5, [%[r], #20]\n\t"
17753         "ldr    r4, [%[a], #24]\n\t"
17754         "ldr    r5, [%[a], #28]\n\t"
17755         "ldr    r6, [%[b], #24]\n\t"
17756         "ldr    r7, [%[b], #28]\n\t"
17757         "sbc    r4, r6\n\t"
17758         "sbc    r5, r7\n\t"
17759         "str    r4, [%[r], #24]\n\t"
17760         "str    r5, [%[r], #28]\n\t"
17761         "sbc    %[c], %[c]\n\t"
17762         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
17763         :
17764         : "memory", "r4", "r5", "r6", "r7"
17765     );
17766 
17767     return c;
17768 }
17769 
17770 #endif /* WOLFSSL_SP_SMALL */
17771 /* Compare two numbers to determine if they are equal.
17772  * Constant time implementation.
17773  *
17774  * a  First number to compare.
17775  * b  Second number to compare.
17776  * returns 1 when equal and 0 otherwise.
17777  */
17778 static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
17779 {
17780     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
17781             (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
17782 }
17783 
17784 /* Add two Montgomery form projective points.
17785  *
17786  * r  Result of addition.
17787  * p  First point to add.
17788  * q  Second point to add.
17789  * t  Temporary ordinate data.
17790  */
17791 static void sp_256_proj_point_add_8(sp_point_256* r, const sp_point_256* p, const sp_point_256* q,
17792         sp_digit* t)
17793 {
17794     const sp_point_256* ap[2];
17795     sp_point_256* rp[2];
17796     sp_digit* t1 = t;
17797     sp_digit* t2 = t + 2*8;
17798     sp_digit* t3 = t + 4*8;
17799     sp_digit* t4 = t + 6*8;
17800     sp_digit* t5 = t + 8*8;
17801     sp_digit* x;
17802     sp_digit* y;
17803     sp_digit* z;
17804     int i;
17805 
17806     /* Ensure only the first point is the same as the result. */
17807     if (q == r) {
17808         const sp_point_256* a = p;
17809         p = q;
17810         q = a;
17811     }
17812 
17813     /* Check double */
17814     (void)sp_256_sub_8(t1, p256_mod, q->y);
17815     sp_256_norm_8(t1);
17816     if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
17817         (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
17818         sp_256_proj_point_dbl_8(r, p, t);
17819     }
17820     else {
17821         rp[0] = r;
17822 
17823         /*lint allow cast to different type of pointer*/
17824         rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
17825         XMEMSET(rp[1], 0, sizeof(sp_point_256));
17826         x = rp[p->infinity | q->infinity]->x;
17827         y = rp[p->infinity | q->infinity]->y;
17828         z = rp[p->infinity | q->infinity]->z;
17829 
17830         ap[0] = p;
17831         ap[1] = q;
17832         for (i=0; i<8; i++) {
17833             r->x[i] = ap[p->infinity]->x[i];
17834         }
17835         for (i=0; i<8; i++) {
17836             r->y[i] = ap[p->infinity]->y[i];
17837         }
17838         for (i=0; i<8; i++) {
17839             r->z[i] = ap[p->infinity]->z[i];
17840         }
17841         r->infinity = ap[p->infinity]->infinity;
17842 
17843         /* U1 = X1*Z2^2 */
17844         sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
17845         sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
17846         sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
17847         /* U2 = X2*Z1^2 */
17848         sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
17849         sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
17850         sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
17851         /* S1 = Y1*Z2^3 */
17852         sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
17853         /* S2 = Y2*Z1^3 */
17854         sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
17855         /* H = U2 - U1 */
17856         sp_256_mont_sub_8(t2, t2, t1, p256_mod);
17857         /* R = S2 - S1 */
17858         sp_256_mont_sub_8(t4, t4, t3, p256_mod);
17859         /* Z3 = H*Z1*Z2 */
17860         sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
17861         sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
17862         /* X3 = R^2 - H^3 - 2*U1*H^2 */
17863         sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
17864         sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
17865         sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
17866         sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
17867         sp_256_mont_sub_8(x, x, t5, p256_mod);
17868         sp_256_mont_dbl_8(t1, y, p256_mod);
17869         sp_256_mont_sub_8(x, x, t1, p256_mod);
17870         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
17871         sp_256_mont_sub_8(y, y, x, p256_mod);
17872         sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
17873         sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
17874         sp_256_mont_sub_8(y, y, t5, p256_mod);
17875     }
17876 }
17877 
17878 /* Multiply the point by the scalar and return the result.
17879  * If map is true then convert result to affine coordinates.
17880  *
17881  * r     Resulting point.
17882  * g     Point to multiply.
17883  * k     Scalar to multiply by.
17884  * map   Indicates whether to convert result to affine.
17885  * heap  Heap to use for allocation.
17886  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
17887  */
17888 static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
17889         int map, void* heap)
17890 {
17891 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
17892     sp_point_256 td[16];
17893     sp_point_256 rtd;
17894     sp_digit tmpd[2 * 8 * 5];
17895 #endif
17896     sp_point_256* t;
17897     sp_point_256* rt;
17898     sp_digit* tmp;
17899     sp_digit n;
17900     int i;
17901     int c, y;
17902     int err;
17903 
17904     (void)heap;
17905 
17906     err = sp_256_point_new_8(heap, rtd, rt);
17907 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
17908     t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 16, heap, DYNAMIC_TYPE_ECC);
17909     if (t == NULL)
17910         err = MEMORY_E;
17911     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
17912                              DYNAMIC_TYPE_ECC);
17913     if (tmp == NULL)
17914         err = MEMORY_E;
17915 #else
17916     t = td;
17917     tmp = tmpd;
17918 #endif
17919 
17920     if (err == MP_OKAY) {
17921         /* t[0] = {0, 0, 1} * norm */
17922         XMEMSET(&t[0], 0, sizeof(t[0]));
17923         t[0].infinity = 1;
17924         /* t[1] = {g->x, g->y, g->z} * norm */
17925         (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
17926         (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
17927         (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
17928         t[1].infinity = 0;
17929         sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
17930         t[ 2].infinity = 0;
17931         sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
17932         t[ 3].infinity = 0;
17933         sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
17934         t[ 4].infinity = 0;
17935         sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
17936         t[ 5].infinity = 0;
17937         sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
17938         t[ 6].infinity = 0;
17939         sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
17940         t[ 7].infinity = 0;
17941         sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
17942         t[ 8].infinity = 0;
17943         sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
17944         t[ 9].infinity = 0;
17945         sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
17946         t[10].infinity = 0;
17947         sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
17948         t[11].infinity = 0;
17949         sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
17950         t[12].infinity = 0;
17951         sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
17952         t[13].infinity = 0;
17953         sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
17954         t[14].infinity = 0;
17955         sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
17956         t[15].infinity = 0;
17957 
17958         i = 6;
17959         n = k[i+1] << 0;
17960         c = 28;
17961         y = n >> 28;
17962         XMEMCPY(rt, &t[y], sizeof(sp_point_256));
17963         n <<= 4;
17964         for (; i>=0 || c>=4; ) {
17965             if (c < 4) {
17966                 n |= k[i--];
17967                 c += 32;
17968             }
17969             y = (n >> 28) & 0xf;
17970             n <<= 4;
17971             c -= 4;
17972 
17973             sp_256_proj_point_dbl_8(rt, rt, tmp);
17974             sp_256_proj_point_dbl_8(rt, rt, tmp);
17975             sp_256_proj_point_dbl_8(rt, rt, tmp);
17976             sp_256_proj_point_dbl_8(rt, rt, tmp);
17977 
17978             sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
17979         }
17980 
17981         if (map != 0) {
17982             sp_256_map_8(r, rt, tmp);
17983         }
17984         else {
17985             XMEMCPY(r, rt, sizeof(sp_point_256));
17986         }
17987     }
17988 
17989 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
17990     if (tmp != NULL) {
17991         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
17992         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
17993     }
17994     if (t != NULL) {
17995         XMEMSET(t, 0, sizeof(sp_point_256) * 16);
17996         XFREE(t, heap, DYNAMIC_TYPE_ECC);
17997     }
17998 #else
17999     ForceZero(tmpd, sizeof(tmpd));
18000     ForceZero(td, sizeof(td));
18001 #endif
18002     sp_256_point_free_8(rt, 1, heap);
18003 
18004     return err;
18005 }
18006 
18007 /* A table entry for pre-computed points. */
18008 typedef struct sp_table_entry_256 {
18009     sp_digit x[8];
18010     sp_digit y[8];
18011 } sp_table_entry_256;
18012 
18013 #ifdef FP_ECC
18014 /* Double the Montgomery form projective point p a number of times.
18015  *
18016  * r  Result of repeated doubling of point.
18017  * p  Point to double.
18018  * n  Number of times to double
18019  * t  Temporary ordinate data.
18020  */
18021 static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n, sp_digit* t)
18022 {
18023     sp_digit* w = t;
18024     sp_digit* a = t + 2*8;
18025     sp_digit* b = t + 4*8;
18026     sp_digit* t1 = t + 6*8;
18027     sp_digit* t2 = t + 8*8;
18028     sp_digit* x;
18029     sp_digit* y;
18030     sp_digit* z;
18031 
18032     x = p->x;
18033     y = p->y;
18034     z = p->z;
18035 
18036     /* Y = 2*Y */
18037     sp_256_mont_dbl_8(y, y, p256_mod);
18038     /* W = Z^4 */
18039     sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
18040     sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
18041 
18042 #ifndef WOLFSSL_SP_SMALL
18043     while (--n > 0)
18044 #else
18045     while (--n >= 0)
18046 #endif
18047     {
18048         /* A = 3*(X^2 - W) */
18049         sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
18050         sp_256_mont_sub_8(t1, t1, w, p256_mod);
18051         sp_256_mont_tpl_8(a, t1, p256_mod);
18052         /* B = X*Y^2 */
18053         sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
18054         sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
18055         /* X = A^2 - 2B */
18056         sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
18057         sp_256_mont_dbl_8(t2, b, p256_mod);
18058         sp_256_mont_sub_8(x, x, t2, p256_mod);
18059         /* Z = Z*Y */
18060         sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
18061         /* t2 = Y^4 */
18062         sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
18063 #ifdef WOLFSSL_SP_SMALL
18064         if (n != 0)
18065 #endif
18066         {
18067             /* W = W*Y^4 */
18068             sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
18069         }
18070         /* y = 2*A*(B - X) - Y^4 */
18071         sp_256_mont_sub_8(y, b, x, p256_mod);
18072         sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
18073         sp_256_mont_dbl_8(y, y, p256_mod);
18074         sp_256_mont_sub_8(y, y, t1, p256_mod);
18075     }
18076 #ifndef WOLFSSL_SP_SMALL
18077     /* A = 3*(X^2 - W) */
18078     sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
18079     sp_256_mont_sub_8(t1, t1, w, p256_mod);
18080     sp_256_mont_tpl_8(a, t1, p256_mod);
18081     /* B = X*Y^2 */
18082     sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
18083     sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
18084     /* X = A^2 - 2B */
18085     sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
18086     sp_256_mont_dbl_8(t2, b, p256_mod);
18087     sp_256_mont_sub_8(x, x, t2, p256_mod);
18088     /* Z = Z*Y */
18089     sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
18090     /* t2 = Y^4 */
18091     sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
18092     /* y = 2*A*(B - X) - Y^4 */
18093     sp_256_mont_sub_8(y, b, x, p256_mod);
18094     sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
18095     sp_256_mont_dbl_8(y, y, p256_mod);
18096     sp_256_mont_sub_8(y, y, t1, p256_mod);
18097 #endif
18098     /* Y = Y/2 */
18099     sp_256_div2_8(y, y, p256_mod);
18100 }
18101 
18102 #endif /* FP_ECC */
18103 /* Add two Montgomery form projective points. The second point has a q value of
18104  * one.
18105  * Only the first point can be the same pointer as the result point.
18106  *
18107  * r  Result of addition.
18108  * p  First point to add.
18109  * q  Second point to add.
18110  * t  Temporary ordinate data.
18111  */
18112 static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
18113         const sp_point_256* q, sp_digit* t)
18114 {
18115     const sp_point_256* ap[2];
18116     sp_point_256* rp[2];
18117     sp_digit* t1 = t;
18118     sp_digit* t2 = t + 2*8;
18119     sp_digit* t3 = t + 4*8;
18120     sp_digit* t4 = t + 6*8;
18121     sp_digit* t5 = t + 8*8;
18122     sp_digit* x;
18123     sp_digit* y;
18124     sp_digit* z;
18125     int i;
18126 
18127     /* Check double */
18128     (void)sp_256_sub_8(t1, p256_mod, q->y);
18129     sp_256_norm_8(t1);
18130     if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
18131         (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
18132         sp_256_proj_point_dbl_8(r, p, t);
18133     }
18134     else {
18135         rp[0] = r;
18136 
18137         /*lint allow cast to different type of pointer*/
18138         rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
18139         XMEMSET(rp[1], 0, sizeof(sp_point_256));
18140         x = rp[p->infinity | q->infinity]->x;
18141         y = rp[p->infinity | q->infinity]->y;
18142         z = rp[p->infinity | q->infinity]->z;
18143 
18144         ap[0] = p;
18145         ap[1] = q;
18146         for (i=0; i<8; i++) {
18147             r->x[i] = ap[p->infinity]->x[i];
18148         }
18149         for (i=0; i<8; i++) {
18150             r->y[i] = ap[p->infinity]->y[i];
18151         }
18152         for (i=0; i<8; i++) {
18153             r->z[i] = ap[p->infinity]->z[i];
18154         }
18155         r->infinity = ap[p->infinity]->infinity;
18156 
18157         /* U2 = X2*Z1^2 */
18158         sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
18159         sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
18160         sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
18161         /* S2 = Y2*Z1^3 */
18162         sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
18163         /* H = U2 - X1 */
18164         sp_256_mont_sub_8(t2, t2, x, p256_mod);
18165         /* R = S2 - Y1 */
18166         sp_256_mont_sub_8(t4, t4, y, p256_mod);
18167         /* Z3 = H*Z1 */
18168         sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
18169         /* X3 = R^2 - H^3 - 2*X1*H^2 */
18170         sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
18171         sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
18172         sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
18173         sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
18174         sp_256_mont_sub_8(x, t1, t5, p256_mod);
18175         sp_256_mont_dbl_8(t1, t3, p256_mod);
18176         sp_256_mont_sub_8(x, x, t1, p256_mod);
18177         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
18178         sp_256_mont_sub_8(t3, t3, x, p256_mod);
18179         sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
18180         sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
18181         sp_256_mont_sub_8(y, t3, t5, p256_mod);
18182     }
18183 }
18184 
18185 #ifdef WOLFSSL_SP_SMALL
18186 #ifdef FP_ECC
18187 /* Convert the projective point to affine.
18188  * Ordinates are in Montgomery form.
18189  *
18190  * a  Point to convert.
18191  * t  Temporary data.
18192  */
18193 static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
18194 {
18195     sp_digit* t1 = t;
18196     sp_digit* t2 = t + 2 * 8;
18197     sp_digit* tmp = t + 4 * 8;
18198 
18199     sp_256_mont_inv_8(t1, a->z, tmp);
18200 
18201     sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
18202     sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
18203 
18204     sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
18205     sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
18206     XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
18207 }
18208 
18209 /* Generate the pre-computed table of points for the base point.
18210  *
18211  * a      The base point.
18212  * table  Place to store generated point data.
18213  * tmp    Temporary data.
18214  * heap  Heap to use for allocation.
18215  */
18216 static int sp_256_gen_stripe_table_8(const sp_point_256* a,
18217         sp_table_entry_256* table, sp_digit* tmp, void* heap)
18218 {
18219 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
18220     sp_point_256 td, s1d, s2d;
18221 #endif
18222     sp_point_256* t;
18223     sp_point_256* s1 = NULL;
18224     sp_point_256* s2 = NULL;
18225     int i, j;
18226     int err;
18227 
18228     (void)heap;
18229 
18230     err = sp_256_point_new_8(heap, td, t);
18231     if (err == MP_OKAY) {
18232         err = sp_256_point_new_8(heap, s1d, s1);
18233     }
18234     if (err == MP_OKAY) {
18235         err = sp_256_point_new_8(heap, s2d, s2);
18236     }
18237 
18238     if (err == MP_OKAY) {
18239         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
18240     }
18241     if (err == MP_OKAY) {
18242         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
18243     }
18244     if (err == MP_OKAY) {
18245         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
18246     }
18247     if (err == MP_OKAY) {
18248         t->infinity = 0;
18249         sp_256_proj_to_affine_8(t, tmp);
18250 
18251         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
18252         s1->infinity = 0;
18253         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
18254         s2->infinity = 0;
18255 
18256         /* table[0] = {0, 0, infinity} */
18257         XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
18258         /* table[1] = Affine version of 'a' in Montgomery form */
18259         XMEMCPY(table[1].x, t->x, sizeof(table->x));
18260         XMEMCPY(table[1].y, t->y, sizeof(table->y));
18261 
18262         for (i=1; i<4; i++) {
18263             sp_256_proj_point_dbl_n_8(t, 64, tmp);
18264             sp_256_proj_to_affine_8(t, tmp);
18265             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
18266             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
18267         }
18268 
18269         for (i=1; i<4; i++) {
18270             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
18271             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
18272             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
18273                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
18274                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
18275                 sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
18276                 sp_256_proj_to_affine_8(t, tmp);
18277                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
18278                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
18279             }
18280         }
18281     }
18282 
18283     sp_256_point_free_8(s2, 0, heap);
18284     sp_256_point_free_8(s1, 0, heap);
18285     sp_256_point_free_8( t, 0, heap);
18286 
18287     return err;
18288 }
18289 
18290 #endif /* FP_ECC */
18291 /* Multiply the point by the scalar and return the result.
18292  * If map is true then convert result to affine coordinates.
18293  *
18294  * r     Resulting point.
18295  * k     Scalar to multiply by.
18296  * map   Indicates whether to convert result to affine.
18297  * heap  Heap to use for allocation.
18298  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
18299  */
18300 static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
18301         const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
18302 {
18303 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
18304     sp_point_256 rtd;
18305     sp_point_256 pd;
18306     sp_digit td[2 * 8 * 5];
18307 #endif
18308     sp_point_256* rt;
18309     sp_point_256* p = NULL;
18310     sp_digit* t;
18311     int i, j;
18312     int y, x;
18313     int err;
18314 
18315     (void)g;
18316     (void)heap;
18317 
18318 
18319     err = sp_256_point_new_8(heap, rtd, rt);
18320     if (err == MP_OKAY) {
18321         err = sp_256_point_new_8(heap, pd, p);
18322     }
18323 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
18324     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
18325                            DYNAMIC_TYPE_ECC);
18326     if (t == NULL) {
18327         err = MEMORY_E;
18328     }
18329 #else
18330     t = td;
18331 #endif
18332 
18333     if (err == MP_OKAY) {
18334         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
18335         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
18336 
18337         y = 0;
18338         for (j=0,x=63; j<4; j++,x+=64) {
18339             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
18340         }
18341         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
18342         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
18343         rt->infinity = !y;
18344         for (i=62; i>=0; i--) {
18345             y = 0;
18346             for (j=0,x=i; j<4; j++,x+=64) {
18347                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
18348             }
18349 
18350             sp_256_proj_point_dbl_8(rt, rt, t);
18351             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
18352             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
18353             p->infinity = !y;
18354             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
18355         }
18356 
18357         if (map != 0) {
18358             sp_256_map_8(r, rt, t);
18359         }
18360         else {
18361             XMEMCPY(r, rt, sizeof(sp_point_256));
18362         }
18363     }
18364 
18365 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
18366     if (t != NULL) {
18367         XFREE(t, heap, DYNAMIC_TYPE_ECC);
18368     }
18369 #endif
18370     sp_256_point_free_8(p, 0, heap);
18371     sp_256_point_free_8(rt, 0, heap);
18372 
18373     return err;
18374 }
18375 
18376 #ifdef FP_ECC
18377 #ifndef FP_ENTRIES
18378     #define FP_ENTRIES 16
18379 #endif
18380 
18381 typedef struct sp_cache_256_t {
18382     sp_digit x[8];
18383     sp_digit y[8];
18384     sp_table_entry_256 table[16];
18385     uint32_t cnt;
18386     int set;
18387 } sp_cache_256_t;
18388 
18389 static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
18390 static THREAD_LS_T int sp_cache_256_last = -1;
18391 static THREAD_LS_T int sp_cache_256_inited = 0;
18392 
18393 #ifndef HAVE_THREAD_LS
18394     static volatile int initCacheMutex_256 = 0;
18395     static wolfSSL_Mutex sp_cache_256_lock;
18396 #endif
18397 
18398 static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
18399 {
18400     int i, j;
18401     uint32_t least;
18402 
18403     if (sp_cache_256_inited == 0) {
18404         for (i=0; i<FP_ENTRIES; i++) {
18405             sp_cache_256[i].set = 0;
18406         }
18407         sp_cache_256_inited = 1;
18408     }
18409 
18410     /* Compare point with those in cache. */
18411     for (i=0; i<FP_ENTRIES; i++) {
18412         if (!sp_cache_256[i].set)
18413             continue;
18414 
18415         if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
18416                            sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
18417             sp_cache_256[i].cnt++;
18418             break;
18419         }
18420     }
18421 
18422     /* No match. */
18423     if (i == FP_ENTRIES) {
18424         /* Find empty entry. */
18425         i = (sp_cache_256_last + 1) % FP_ENTRIES;
18426         for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
18427             if (!sp_cache_256[i].set) {
18428                 break;
18429             }
18430         }
18431 
18432         /* Evict least used. */
18433         if (i == sp_cache_256_last) {
18434             least = sp_cache_256[0].cnt;
18435             for (j=1; j<FP_ENTRIES; j++) {
18436                 if (sp_cache_256[j].cnt < least) {
18437                     i = j;
18438                     least = sp_cache_256[i].cnt;
18439                 }
18440             }
18441         }
18442 
18443         XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
18444         XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
18445         sp_cache_256[i].set = 1;
18446         sp_cache_256[i].cnt = 1;
18447     }
18448 
18449     *cache = &sp_cache_256[i];
18450     sp_cache_256_last = i;
18451 }
18452 #endif /* FP_ECC */
18453 
18454 /* Multiply the base point of P256 by the scalar and return the result.
18455  * If map is true then convert result to affine coordinates.
18456  *
18457  * r     Resulting point.
18458  * g     Point to multiply.
18459  * k     Scalar to multiply by.
18460  * map   Indicates whether to convert result to affine.
18461  * heap  Heap to use for allocation.
18462  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
18463  */
18464 static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
18465         int map, void* heap)
18466 {
18467 #ifndef FP_ECC
18468     return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
18469 #else
18470     sp_digit tmp[2 * 8 * 5];
18471     sp_cache_256_t* cache;
18472     int err = MP_OKAY;
18473 
18474 #ifndef HAVE_THREAD_LS
18475     if (initCacheMutex_256 == 0) {
18476          wc_InitMutex(&sp_cache_256_lock);
18477          initCacheMutex_256 = 1;
18478     }
18479     if (wc_LockMutex(&sp_cache_256_lock) != 0)
18480        err = BAD_MUTEX_E;
18481 #endif /* HAVE_THREAD_LS */
18482 
18483     if (err == MP_OKAY) {
18484         sp_ecc_get_cache_256(g, &cache);
18485         if (cache->cnt == 2)
18486             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
18487 
18488 #ifndef HAVE_THREAD_LS
18489         wc_UnLockMutex(&sp_cache_256_lock);
18490 #endif /* HAVE_THREAD_LS */
18491 
18492         if (cache->cnt < 2) {
18493             err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
18494         }
18495         else {
18496             err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
18497                     map, heap);
18498         }
18499     }
18500 
18501     return err;
18502 #endif
18503 }
18504 
18505 #else
18506 #ifdef FP_ECC
18507 /* Generate the pre-computed table of points for the base point.
18508  *
18509  * a      The base point.
18510  * table  Place to store generated point data.
18511  * tmp    Temporary data.
18512  * heap  Heap to use for allocation.
18513  */
18514 static int sp_256_gen_stripe_table_8(const sp_point_256* a,
18515         sp_table_entry_256* table, sp_digit* tmp, void* heap)
18516 {
18517 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
18518     sp_point_256 td, s1d, s2d;
18519 #endif
18520     sp_point_256* t;
18521     sp_point_256* s1 = NULL;
18522     sp_point_256* s2 = NULL;
18523     int i, j;
18524     int err;
18525 
18526     (void)heap;
18527 
18528     err = sp_256_point_new_8(heap, td, t);
18529     if (err == MP_OKAY) {
18530         err = sp_256_point_new_8(heap, s1d, s1);
18531     }
18532     if (err == MP_OKAY) {
18533         err = sp_256_point_new_8(heap, s2d, s2);
18534     }
18535 
18536     if (err == MP_OKAY) {
18537         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
18538     }
18539     if (err == MP_OKAY) {
18540         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
18541     }
18542     if (err == MP_OKAY) {
18543         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
18544     }
18545     if (err == MP_OKAY) {
18546         t->infinity = 0;
18547         sp_256_proj_to_affine_8(t, tmp);
18548 
18549         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
18550         s1->infinity = 0;
18551         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
18552         s2->infinity = 0;
18553 
18554         /* table[0] = {0, 0, infinity} */
18555         XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
18556         /* table[1] = Affine version of 'a' in Montgomery form */
18557         XMEMCPY(table[1].x, t->x, sizeof(table->x));
18558         XMEMCPY(table[1].y, t->y, sizeof(table->y));
18559 
18560         for (i=1; i<8; i++) {
18561             sp_256_proj_point_dbl_n_8(t, 32, tmp);
18562             sp_256_proj_to_affine_8(t, tmp);
18563             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
18564             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
18565         }
18566 
18567         for (i=1; i<8; i++) {
18568             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
18569             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
18570             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
18571                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
18572                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
18573                 sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
18574                 sp_256_proj_to_affine_8(t, tmp);
18575                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
18576                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
18577             }
18578         }
18579     }
18580 
18581     sp_256_point_free_8(s2, 0, heap);
18582     sp_256_point_free_8(s1, 0, heap);
18583     sp_256_point_free_8( t, 0, heap);
18584 
18585     return err;
18586 }
18587 
18588 #endif /* FP_ECC */
18589 /* Multiply the point by the scalar and return the result.
18590  * If map is true then convert result to affine coordinates.
18591  *
18592  * r     Resulting point.
18593  * k     Scalar to multiply by.
18594  * map   Indicates whether to convert result to affine.
18595  * heap  Heap to use for allocation.
18596  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
18597  */
18598 static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
18599         const sp_table_entry_256* table, const sp_digit* k, int map, void* heap)
18600 {
18601 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
18602     sp_point_256 rtd;
18603     sp_point_256 pd;
18604     sp_digit td[2 * 8 * 5];
18605 #endif
18606     sp_point_256* rt;
18607     sp_point_256* p = NULL;
18608     sp_digit* t;
18609     int i, j;
18610     int y, x;
18611     int err;
18612 
18613     (void)g;
18614     (void)heap;
18615 
18616 
18617     err = sp_256_point_new_8(heap, rtd, rt);
18618     if (err == MP_OKAY) {
18619         err = sp_256_point_new_8(heap, pd, p);
18620     }
18621 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
18622     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
18623                            DYNAMIC_TYPE_ECC);
18624     if (t == NULL) {
18625         err = MEMORY_E;
18626     }
18627 #else
18628     t = td;
18629 #endif
18630 
18631     if (err == MP_OKAY) {
18632         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
18633         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
18634 
18635         y = 0;
18636         for (j=0,x=31; j<8; j++,x+=32) {
18637             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
18638         }
18639         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
18640         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
18641         rt->infinity = !y;
18642         for (i=30; i>=0; i--) {
18643             y = 0;
18644             for (j=0,x=i; j<8; j++,x+=32) {
18645                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
18646             }
18647 
18648             sp_256_proj_point_dbl_8(rt, rt, t);
18649             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
18650             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
18651             p->infinity = !y;
18652             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
18653         }
18654 
18655         if (map != 0) {
18656             sp_256_map_8(r, rt, t);
18657         }
18658         else {
18659             XMEMCPY(r, rt, sizeof(sp_point_256));
18660         }
18661     }
18662 
18663 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
18664     if (t != NULL) {
18665         XFREE(t, heap, DYNAMIC_TYPE_ECC);
18666     }
18667 #endif
18668     sp_256_point_free_8(p, 0, heap);
18669     sp_256_point_free_8(rt, 0, heap);
18670 
18671     return err;
18672 }
18673 
18674 #ifdef FP_ECC
18675 #ifndef FP_ENTRIES
18676     #define FP_ENTRIES 16
18677 #endif
18678 
18679 typedef struct sp_cache_256_t {
18680     sp_digit x[8];
18681     sp_digit y[8];
18682     sp_table_entry_256 table[256];
18683     uint32_t cnt;
18684     int set;
18685 } sp_cache_256_t;
18686 
18687 static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
18688 static THREAD_LS_T int sp_cache_256_last = -1;
18689 static THREAD_LS_T int sp_cache_256_inited = 0;
18690 
18691 #ifndef HAVE_THREAD_LS
18692     static volatile int initCacheMutex_256 = 0;
18693     static wolfSSL_Mutex sp_cache_256_lock;
18694 #endif
18695 
18696 static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
18697 {
18698     int i, j;
18699     uint32_t least;
18700 
18701     if (sp_cache_256_inited == 0) {
18702         for (i=0; i<FP_ENTRIES; i++) {
18703             sp_cache_256[i].set = 0;
18704         }
18705         sp_cache_256_inited = 1;
18706     }
18707 
18708     /* Compare point with those in cache. */
18709     for (i=0; i<FP_ENTRIES; i++) {
18710         if (!sp_cache_256[i].set)
18711             continue;
18712 
18713         if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
18714                            sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
18715             sp_cache_256[i].cnt++;
18716             break;
18717         }
18718     }
18719 
18720     /* No match. */
18721     if (i == FP_ENTRIES) {
18722         /* Find empty entry. */
18723         i = (sp_cache_256_last + 1) % FP_ENTRIES;
18724         for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
18725             if (!sp_cache_256[i].set) {
18726                 break;
18727             }
18728         }
18729 
18730         /* Evict least used. */
18731         if (i == sp_cache_256_last) {
18732             least = sp_cache_256[0].cnt;
18733             for (j=1; j<FP_ENTRIES; j++) {
18734                 if (sp_cache_256[j].cnt < least) {
18735                     i = j;
18736                     least = sp_cache_256[i].cnt;
18737                 }
18738             }
18739         }
18740 
18741         XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
18742         XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
18743         sp_cache_256[i].set = 1;
18744         sp_cache_256[i].cnt = 1;
18745     }
18746 
18747     *cache = &sp_cache_256[i];
18748     sp_cache_256_last = i;
18749 }
18750 #endif /* FP_ECC */
18751 
18752 /* Multiply the base point of P256 by the scalar and return the result.
18753  * If map is true then convert result to affine coordinates.
18754  *
18755  * r     Resulting point.
18756  * g     Point to multiply.
18757  * k     Scalar to multiply by.
18758  * map   Indicates whether to convert result to affine.
18759  * heap  Heap to use for allocation.
18760  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
18761  */
18762 static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
18763         int map, void* heap)
18764 {
18765 #ifndef FP_ECC
18766     return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
18767 #else
18768     sp_digit tmp[2 * 8 * 5];
18769     sp_cache_256_t* cache;
18770     int err = MP_OKAY;
18771 
18772 #ifndef HAVE_THREAD_LS
18773     if (initCacheMutex_256 == 0) {
18774          wc_InitMutex(&sp_cache_256_lock);
18775          initCacheMutex_256 = 1;
18776     }
18777     if (wc_LockMutex(&sp_cache_256_lock) != 0)
18778        err = BAD_MUTEX_E;
18779 #endif /* HAVE_THREAD_LS */
18780 
18781     if (err == MP_OKAY) {
18782         sp_ecc_get_cache_256(g, &cache);
18783         if (cache->cnt == 2)
18784             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
18785 
18786 #ifndef HAVE_THREAD_LS
18787         wc_UnLockMutex(&sp_cache_256_lock);
18788 #endif /* HAVE_THREAD_LS */
18789 
18790         if (cache->cnt < 2) {
18791             err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
18792         }
18793         else {
18794             err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
18795                     map, heap);
18796         }
18797     }
18798 
18799     return err;
18800 #endif
18801 }
18802 
18803 #endif /* WOLFSSL_SP_SMALL */
18804 /* Multiply the point by the scalar and return the result.
18805  * If map is true then convert result to affine coordinates.
18806  *
18807  * km    Scalar to multiply by.
18808  * p     Point to multiply.
18809  * r     Resulting point.
18810  * map   Indicates whether to convert result to affine.
18811  * heap  Heap to use for allocation.
18812  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
18813  */
18814 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
18815         void* heap)
18816 {
18817 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
18818     sp_point_256 p;
18819     sp_digit kd[8];
18820 #endif
18821     sp_point_256* point;
18822     sp_digit* k = NULL;
18823     int err = MP_OKAY;
18824 
18825     err = sp_256_point_new_8(heap, p, point);
18826 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
18827     if (err == MP_OKAY) {
18828         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
18829                                                               DYNAMIC_TYPE_ECC);
18830         if (k == NULL)
18831             err = MEMORY_E;
18832     }
18833 #else
18834     k = kd;
18835 #endif
18836     if (err == MP_OKAY) {
18837         sp_256_from_mp(k, 8, km);
18838         sp_256_point_from_ecc_point_8(point, gm);
18839 
18840             err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
18841     }
18842     if (err == MP_OKAY) {
18843         err = sp_256_point_to_ecc_point_8(point, r);
18844     }
18845 
18846 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
18847     if (k != NULL) {
18848         XFREE(k, heap, DYNAMIC_TYPE_ECC);
18849     }
18850 #endif
18851     sp_256_point_free_8(point, 0, heap);
18852 
18853     return err;
18854 }
18855 
18856 #ifdef WOLFSSL_SP_SMALL
18857 static const sp_table_entry_256 p256_table[16] = {
18858     /* 0 */
18859     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
18860       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
18861     /* 1 */
18862     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
18863         0xa53755c6,0x18905f76 },
18864       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
18865         0x25885d85,0x8571ff18 } },
18866     /* 2 */
18867     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
18868         0xfd1b667f,0x2f5e6961 },
18869       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
18870         0x8d6f0f7b,0xf648f916 } },
18871     /* 3 */
18872     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
18873         0x133d0015,0x5abe0285 },
18874       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
18875         0x6b6f7383,0x94bb725b } },
18876     /* 4 */
18877     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
18878         0x21d324f6,0x61d587d4 },
18879       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
18880         0x4621efbe,0xfa11fe12 } },
18881     /* 5 */
18882     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
18883         0x1f13bedc,0x586eb04c },
18884       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
18885         0x70864f11,0x19d5ac08 } },
18886     /* 6 */
18887     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
18888         0xc3b266b1,0xbb6de651 },
18889       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
18890         0x5d18b99b,0x60b4619a } },
18891     /* 7 */
18892     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
18893         0xaeebffcd,0x9d0f27b2 },
18894       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
18895         0x356ec48d,0x244a566d } },
18896     /* 8 */
18897     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
18898         0xcd42ab1b,0x803f3e02 },
18899       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
18900         0x5067adc1,0xc097440e } },
18901     /* 9 */
18902     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
18903         0x915f1f30,0xf1af32d5 },
18904       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
18905         0xe2d41c8b,0x23d0f130 } },
18906     /* 10 */
18907     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
18908         0x7990216a,0x50bbb4d9 },
18909       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
18910         0x01fe49c3,0x2b100118 } },
18911     /* 11 */
18912     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
18913         0x83fbae0c,0xdd558999 },
18914       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
18915         0x149d6041,0xe6e4c551 } },
18916     /* 12 */
18917     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
18918         0xdb7e63af,0xfad27148 },
18919       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
18920         0x9f0e1a84,0x77387de3 } },
18921     /* 13 */
18922     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
18923         0xbef0c47e,0xb37b85c0 },
18924       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
18925         0xf9f628d5,0x9c135ac8 } },
18926     /* 14 */
18927     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
18928         0x91ece900,0xc109f9cb },
18929       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
18930         0x2eee1ee1,0x9bc3344f } },
18931     /* 15 */
18932     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
18933         0x5f1a4cc1,0x29591d52 },
18934       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
18935         0x18ef332c,0x6376551f } },
18936 };
18937 
18938 /* Multiply the base point of P256 by the scalar and return the result.
18939  * If map is true then convert result to affine coordinates.
18940  *
18941  * r     Resulting point.
18942  * k     Scalar to multiply by.
18943  * map   Indicates whether to convert result to affine.
18944  * heap  Heap to use for allocation.
18945  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
18946  */
18947 static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
18948         int map, void* heap)
18949 {
18950     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
18951                                       k, map, heap);
18952 }
18953 
18954 #else
18955 static const sp_table_entry_256 p256_table[256] = {
18956     /* 0 */
18957     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
18958       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
18959     /* 1 */
18960     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
18961         0xa53755c6,0x18905f76 },
18962       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
18963         0x25885d85,0x8571ff18 } },
18964     /* 2 */
18965     { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
18966         0xdbdf58e9,0xd953c50d },
18967       { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
18968         0x9eb288f3,0x863ebb7e } },
18969     /* 3 */
18970     { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
18971         0xb5ff80a0,0x00076055 },
18972       { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
18973         0x34373ee0,0x83087761 } },
18974     /* 4 */
18975     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
18976         0xfd1b667f,0x2f5e6961 },
18977       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
18978         0x8d6f0f7b,0xf648f916 } },
18979     /* 5 */
18980     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
18981         0x133d0015,0x5abe0285 },
18982       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
18983         0x6b6f7383,0x94bb725b } },
18984     /* 6 */
18985     { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
18986         0x2f7dc4ef,0xcdd6bbcb },
18987       { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
18988         0x4bdae5f6,0xa361bebd } },
18989     /* 7 */
18990     { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
18991         0xc4b5292c,0xba12ca09 },
18992       { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
18993         0x701fef4b,0x53ebb99d } },
18994     /* 8 */
18995     { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
18996         0x06d54831,0x8589fb92 },
18997       { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
18998         0x02541c4f,0xebb0696d } },
18999     /* 9 */
19000     { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
19001         0xd1b27da3,0xeb2820cb },
19002       { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
19003         0x55a7da1d,0x1f28289b } },
19004     /* 10 */
19005     { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
19006         0x05e54d63,0x337a4b59 },
19007       { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
19008         0xf4c2fbd6,0x0d65e0d5 } },
19009     /* 11 */
19010     { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
19011         0x52f4a232,0xc23da242 },
19012       { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
19013         0xc790cff1,0x19de3b8c } },
19014     /* 12 */
19015     { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
19016         0x91fccbfd,0xe34dcbd4 },
19017       { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
19018         0x7b4e0f7f,0xe7641f44 } },
19019     /* 13 */
19020     { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
19021         0x052a57bf,0x4a12df57 },
19022       { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
19023         0xbb5bea46,0x6af5aa93 } },
19024     /* 14 */
19025     { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
19026         0x66a44013,0x5fe3475a },
19027       { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
19028         0xecfea916,0xb544e308 } },
19029     /* 15 */
19030     { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
19031         0xa6b0c20b,0xe0b6b2bd },
19032       { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
19033         0x25a63774,0x71c023de } },
19034     /* 16 */
19035     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
19036         0x21d324f6,0x61d587d4 },
19037       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
19038         0x4621efbe,0xfa11fe12 } },
19039     /* 17 */
19040     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
19041         0x1f13bedc,0x586eb04c },
19042       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
19043         0x70864f11,0x19d5ac08 } },
19044     /* 18 */
19045     { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
19046         0x7f9c563f,0xe7c0073f },
19047       { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
19048         0xc65b3c0a,0xe08504fe } },
19049     /* 19 */
19050     { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
19051         0x5b0996b4,0x78f01882 },
19052       { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
19053         0x7e94747a,0x43a773b8 } },
19054     /* 20 */
19055     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
19056         0xc3b266b1,0xbb6de651 },
19057       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
19058         0x5d18b99b,0x60b4619a } },
19059     /* 21 */
19060     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
19061         0xaeebffcd,0x9d0f27b2 },
19062       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
19063         0x356ec48d,0x244a566d } },
19064     /* 22 */
19065     { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
19066         0x3581ef69,0x45e58c87 },
19067       { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
19068         0xc1e4b7a4,0xc040e21c } },
19069     /* 23 */
19070     { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
19071         0x682c6ec7,0x1cdf5c97 },
19072       { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
19073         0xa92dff3d,0x046755f8 } },
19074     /* 24 */
19075     { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
19076         0x3b83a5f3,0x046e5e11 },
19077       { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
19078         0x303d005b,0x6e0106c3 } },
19079     /* 25 */
19080     { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
19081         0xe901cf1f,0x442594ed },
19082       { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
19083         0x4c2ee68e,0xa796fa51 } },
19084     /* 26 */
19085     { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
19086         0xc69766e9,0xe4ad2da9 },
19087       { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
19088         0xc37b5143,0xc5e94046 } },
19089     /* 27 */
19090     { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
19091         0xdb464747,0x63283daf },
19092       { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
19093         0x1981a938,0x68bd19ab } },
19094     /* 28 */
19095     { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
19096         0x3c6fdfd6,0x495292f5 },
19097       { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
19098         0x26036837,0x0ec7530d } },
19099     /* 29 */
19100     { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
19101         0x64863f0b,0x0f6207a6 },
19102       { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
19103         0x08ed6dcf,0xff0db072 } },
19104     /* 30 */
19105     { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
19106         0x88740ea3,0x313b513c },
19107       { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
19108         0x86f19f81,0x2d3abcf9 } },
19109     /* 31 */
19110     { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
19111         0xded98cdf,0xc036fa10 },
19112       { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
19113         0xb6d40194,0xa6b2a2c4 } },
19114     /* 32 */
19115     { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
19116         0xaf7c9860,0x810ee252 },
19117       { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
19118         0x92731745,0xd485717a } },
19119     /* 33 */
19120     { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
19121         0x2f9a604e,0x6a6045a7 },
19122       { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
19123         0xf9e15790,0xd3e45cfa } },
19124     /* 34 */
19125     { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
19126         0xe3c2c19c,0x207755de },
19127       { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
19128         0x7154b00d,0x48dc5ee5 } },
19129     /* 35 */
19130     { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
19131         0xdff6f445,0xf2fb0aed },
19132       { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
19133         0xdb28d525,0xa13e9015 } },
19134     /* 36 */
19135     { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
19136         0x1497526f,0x2bf0d6b0 },
19137       { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
19138         0x162fe89f,0x42a94a5a } },
19139     /* 37 */
19140     { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
19141         0xc65ede3d,0x2c2dd969 },
19142       { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
19143         0x42c56dbc,0xf437fa1f } },
19144     /* 38 */
19145     { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
19146         0x54707aa8,0xaaf45b33 },
19147       { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
19148         0xf4f272bc,0xcdf6310d } },
19149     /* 39 */
19150     { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
19151         0xda9e2ff2,0xf0d008ba },
19152       { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
19153         0xca887b8b,0x5bd5c2f5 } },
19154     /* 40 */
19155     { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
19156         0xa09e4719,0xaa12dfc8 },
19157       { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
19158         0xe48ca901,0x6c036e73 } },
19159     /* 41 */
19160     { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
19161         0x96afbe24,0x292ff658 },
19162       { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
19163         0x311b7276,0x644e0c90 } },
19164     /* 42 */
19165     { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
19166         0xcab79a77,0xf25ae793 },
19167       { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
19168         0x13db0a3e,0x39b8e653 } },
19169     /* 43 */
19170     { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
19171         0x0f19db06,0x39122f2f },
19172       { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
19173         0xce80ff8d,0x8de80af8 } },
19174     /* 44 */
19175     { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
19176         0x2e368c04,0x87194906 },
19177       { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
19178         0x5b74fde1,0xfc315e6a } },
19179     /* 45 */
19180     { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
19181         0xee389088,0xe6d4a7ad },
19182       { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
19183         0x9be2ae57,0x35dfaf9a } },
19184     /* 46 */
19185     { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
19186         0x1c830d2b,0x1da5c7d7 },
19187       { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
19188         0xdbf4b9d6,0x7077c0fd } },
19189     /* 47 */
19190     { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
19191         0xe50efe44,0x53a8632e },
19192       { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
19193         0x34e1fcc1,0x028ca76d } },
19194     /* 48 */
19195     { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
19196         0x6962f046,0x04c17cd8 },
19197       { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
19198         0xfed97474,0xf7ba4de9 } },
19199     /* 49 */
19200     { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
19201         0x52131c41,0xe31f9600 },
19202       { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
19203         0xce34d47b,0xaa3a6259 } },
19204     /* 50 */
19205     { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
19206         0x7e79daee,0x2398dd62 },
19207       { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
19208         0x1c046210,0x5717f5b2 } },
19209     /* 51 */
19210     { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
19211         0x0e3c28de,0x660a2c56 },
19212       { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
19213         0x4f522453,0x624ee54c } },
19214     /* 52 */
19215     { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
19216         0x92bdfbc0,0x4f392afb },
19217       { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
19218         0xccdb399c,0x8a3e7977 } },
19219     /* 53 */
19220     { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
19221         0x70c24404,0x3888d023 },
19222       { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
19223         0x18102336,0xa5e62e47 } },
19224     /* 54 */
19225     { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
19226         0x466a5adc,0x2c4768e6 },
19227       { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
19228         0xf9e652a0,0x7b5e6441 } },
19229     /* 55 */
19230     { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
19231         0x0c8d744a,0xb8af73cb },
19232       { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
19233         0x7f3f0895,0xa036395f } },
19234     /* 56 */
19235     { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
19236         0x875fb533,0x4be36b01 },
19237       { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
19238         0x1bdc00c0,0x8cbc9a87 } },
19239     /* 57 */
19240     { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
19241         0x0c0835f8,0x44e7553e },
19242       { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
19243         0x5eb8fc18,0x470a683a } },
19244     /* 58 */
19245     { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
19246         0xc63dc6ef,0x16410690 },
19247       { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
19248         0x7abcbb4f,0xd73479fd } },
19249     /* 59 */
19250     { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
19251         0x0771666b,0x816469e3 },
19252       { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
19253         0xf0dd3f9c,0x0a36dd23 } },
19254     /* 60 */
19255     { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
19256         0xfdbab118,0xe331dfd6 },
19257       { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
19258         0x492e3389,0xd3b4782a } },
19259     /* 61 */
19260     { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
19261         0x4c86a5bd,0x7281275a },
19262       { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
19263         0xce145059,0x2c062e7e } },
19264     /* 62 */
19265     { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
19266         0x2c4e7ef1,0x282a35f9 },
19267       { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
19268         0x554d2abd,0xc71cd513 } },
19269     /* 63 */
19270     { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
19271         0xcf47f3a3,0xc50f6740 },
19272       { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
19273         0x212958dc,0xb9ecb3a7 } },
19274     /* 64 */
19275     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
19276         0xcd42ab1b,0x803f3e02 },
19277       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
19278         0x5067adc1,0xc097440e } },
19279     /* 65 */
19280     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
19281         0x915f1f30,0xf1af32d5 },
19282       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
19283         0xe2d41c8b,0x23d0f130 } },
19284     /* 66 */
19285     { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
19286         0xc0a3fadd,0xb0288dd6 },
19287       { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
19288         0xf408c8d2,0xffd3724f } },
19289     /* 67 */
19290     { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
19291         0xd78c26df,0xf5590f4a },
19292       { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
19293         0xf6f74a20,0x18d6da54 } },
19294     /* 68 */
19295     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
19296         0x7990216a,0x50bbb4d9 },
19297       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
19298         0x01fe49c3,0x2b100118 } },
19299     /* 69 */
19300     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
19301         0x83fbae0c,0xdd558999 },
19302       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
19303         0x149d6041,0xe6e4c551 } },
19304     /* 70 */
19305     { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
19306         0x07ed56ff,0x51e00db1 },
19307       { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
19308         0x49829177,0xe22f4241 } },
19309     /* 71 */
19310     { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
19311         0x52dc48c9,0xf709373d },
19312       { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
19313         0xe7275b11,0xbd52d288 } },
19314     /* 72 */
19315     { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
19316         0xc8aa77a6,0xa0d0f8e4 },
19317       { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
19318         0x946d6a00,0xa56c78c7 } },
19319     /* 73 */
19320     { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
19321         0x731a367a,0xd8befdf8 },
19322       { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
19323         0xce9f6478,0x854a68a5 } },
19324     /* 74 */
19325     { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
19326         0x98846a95,0x5cacea0b },
19327       { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
19328         0x35e4efa9,0xe4982d12 } },
19329     /* 75 */
19330     { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
19331         0x16b20499,0x8046b7f6 },
19332       { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
19333         0x9082af55,0xeb17ca7b } },
19334     /* 76 */
19335     { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
19336         0xfab5e131,0x097b00ba },
19337       { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
19338         0xafdbcc9e,0xf95c747b } },
19339     /* 77 */
19340     { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
19341         0x566ed837,0x3512601e },
19342       { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
19343         0x6068ab6b,0x0ef97123 } },
19344     /* 78 */
19345     { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
19346         0x3b4fbc95,0xfc16d933 },
19347       { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
19348         0xb95d7a17,0x14ca4af1 } },
19349     /* 79 */
19350     { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
19351         0xf59c231d,0x4057b063 },
19352       { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
19353         0xf1330b13,0x1c3b5d64 } },
19354     /* 80 */
19355     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
19356         0xdb7e63af,0xfad27148 },
19357       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
19358         0x9f0e1a84,0x77387de3 } },
19359     /* 81 */
19360     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
19361         0xbef0c47e,0xb37b85c0 },
19362       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
19363         0xf9f628d5,0x9c135ac8 } },
19364     /* 82 */
19365     { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
19366         0xc433851f,0x5721361f },
19367       { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
19368         0xe6bb11bd,0xdcbac3c9 } },
19369     /* 83 */
19370     { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
19371         0x2d626862,0xb8c1c89e },
19372       { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
19373         0x2f9422d4,0x5d23bbda } },
19374     /* 84 */
19375     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
19376         0x91ece900,0xc109f9cb },
19377       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
19378         0x2eee1ee1,0x9bc3344f } },
19379     /* 85 */
19380     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
19381         0x5f1a4cc1,0x29591d52 },
19382       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
19383         0x18ef332c,0x6376551f } },
19384     /* 86 */
19385     { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
19386         0x08e2987a,0xbdb79dc8 },
19387       { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
19388         0xadd3c14a,0x8ee86001 } },
19389     /* 87 */
19390     { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
19391         0x6f77aa4b,0x92e51d7a },
19392       { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
19393         0x0a56aaaa,0x5182f86f } },
19394     /* 88 */
19395     { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
19396         0x4073a6f2,0x91dcab5d },
19397       { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
19398         0x97974f2b,0x17a0cedb } },
19399     /* 89 */
19400     { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
19401         0x7f4cdf41,0x2e8ce36c },
19402       { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
19403         0x34f668f3,0xf4ccc6cb } },
19404     /* 90 */
19405     { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
19406         0x9a0df3c9,0xac0db488 },
19407       { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
19408         0x94c974a2,0x95a64a61 } },
19409     /* 91 */
19410     { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
19411         0x29210677,0x231e54ba },
19412       { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
19413         0xd8a731e1,0xab0be032 } },
19414     /* 92 */
19415     { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
19416         0x2cf6a679,0xf1bcc880 },
19417       { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
19418         0x5aebb271,0x85169469 } },
19419     /* 93 */
19420     { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
19421         0xdaad55d8,0x8f67d9d2 },
19422       { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
19423         0xc0728b5d,0xf84572b9 } },
19424     /* 94 */
19425     { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
19426         0x616b2c19,0xedee2710 },
19427       { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
19428         0x44ebd7f4,0x9fd27e9b } },
19429     /* 95 */
19430     { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
19431         0x958ff387,0xa40c2fb6 },
19432       { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
19433         0x7dc6decf,0x99bc9bb8 } },
19434     /* 96 */
19435     { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
19436         0xa16d7e64,0x9abe210b },
19437       { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
19438         0x87f344b0,0x7881c257 } },
19439     /* 97 */
19440     { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
19441         0xa30e8940,0x15e6e319 },
19442       { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
19443         0x191172ce,0x0e55facf } },
19444     /* 98 */
19445     { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
19446         0x6fe96577,0xd73d0976 },
19447       { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
19448         0x8f15a50b,0x9250a374 } },
19449     /* 99 */
19450     { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
19451         0xc1cc8c0b,0x77414082 },
19452       { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
19453         0x12eb20b9,0x8cb04f4d } },
19454     /* 100 */
19455     { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
19456         0x47123b51,0xe4e429ef },
19457       { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
19458         0x3c6e6552,0x37bca2ff } },
19459     /* 101 */
19460     { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
19461         0x3002b22a,0x59913edc },
19462       { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
19463         0xb013e226,0x43786e4a } },
19464     /* 102 */
19465     { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
19466         0xb7e79e7a,0x8638ca98 },
19467       { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
19468         0x7b3aa6f0,0x1ecdd36a } },
19469     /* 103 */
19470     { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
19471         0xd459f32d,0xd85d0f85 },
19472       { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
19473         0xb4ed3c62,0xa04f19c3 } },
19474     /* 104 */
19475     { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
19476         0x5c0950b0,0x92b2eeea },
19477       { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
19478         0x5834276c,0x1ee78221 } },
19479     /* 105 */
19480     { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
19481         0x57a6e150,0xf3f2ced8 },
19482       { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
19483         0x3da3e210,0x0f56a454 } },
19484     /* 106 */
19485     { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
19486         0x1969e263,0xbd8f1741 },
19487       { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
19488         0x30ccfa09,0x2d1a1c35 } },
19489     /* 107 */
19490     { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
19491         0xb91fba46,0xa107a65e },
19492       { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
19493         0xf87a9af2,0x183d760a } },
19494     /* 108 */
19495     { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
19496         0xc269d754,0x1d44179d },
19497       { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
19498         0x9606d262,0x771f9cc2 } },
19499     /* 109 */
19500     { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
19501         0x0362718e,0x64427a31 },
19502       { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
19503         0x6ae90d6d,0x49d9b749 } },
19504     /* 110 */
19505     { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
19506         0x3f605445,0x9037d81b },
19507       { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
19508         0x7cc0639c,0x08c3de6a } },
19509     /* 111 */
19510     { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
19511         0x45796b2f,0xc6909442 },
19512       { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
19513         0xcafe3ac0,0x3fa3db02 } },
19514     /* 112 */
19515     { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
19516         0xfdb808ff,0xc5c4bdb0 },
19517       { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
19518         0x46c2b6b5,0x2d56db94 } },
19519     /* 113 */
19520     { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
19521         0xe503ba42,0x0f56bd9d },
19522       { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
19523         0x1173b5f1,0x4003bb9d } },
19524     /* 114 */
19525     { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
19526         0xa07f2f9e,0x53765522 },
19527       { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
19528         0x6c5d4549,0x7a056f58 } },
19529     /* 115 */
19530     { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
19531         0x7a1a2675,0x77d482f1 },
19532       { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
19533         0x2b38b0e4,0x4115012b } },
19534     /* 116 */
19535     { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
19536         0xfbea0946,0xcdf04572 },
19537       { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
19538         0x97383109,0xee703dda } },
19539     /* 117 */
19540     { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
19541         0xa162ce21,0x2a0ad89d },
19542       { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
19543         0xac2b4659,0xd62d0b67 } },
19544     /* 118 */
19545     { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
19546         0x991c2426,0xb39a23f2 },
19547       { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
19548         0xc0674cc5,0x04ed0092 } },
19549     /* 119 */
19550     { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
19551         0x0177c387,0xa0a91fc1 },
19552       { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
19553         0x9ed20c41,0x084cf988 } },
19554     /* 120 */
19555     { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
19556         0x73abf77e,0xd57955b2 },
19557       { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
19558         0x02d141f1,0x8e14ea42 } },
19559     /* 121 */
19560     { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
19561         0x2aa4d158,0x597e1a37 },
19562       { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
19563         0x199b4dea,0xca3f0236 } },
19564     /* 122 */
19565     { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
19566         0x309c07e4,0xbde7fd7e },
19567       { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
19568         0x0a7dd198,0xb623ad0e } },
19569     /* 123 */
19570     { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
19571         0x58ec137b,0xd6aa2e46 },
19572       { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
19573         0x2dcc513a,0x111662e0 } },
19574     /* 124 */
19575     { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
19576         0x94b750f8,0xdb3ee1cb },
19577       { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
19578         0x52206a59,0x886a6442 } },
19579     /* 125 */
19580     { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
19581         0x018a17bc,0xa70cf4eb },
19582       { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
19583         0xd1747b77,0xaa4772ab } },
19584     /* 126 */
19585     { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
19586         0x30faf974,0x611a6ddc },
19587       { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
19588         0x16429c88,0x5cfffaf8 } },
19589     /* 127 */
19590     { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
19591         0x7dc1994c,0x6e5a6b23 },
19592       { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
19593         0x242dabcc,0x481a238d } },
19594     /* 128 */
19595     { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
19596         0xe0cdf943,0x2c41114c },
19597       { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
19598         0x42ff9297,0x20477abf } },
19599     /* 129 */
19600     { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
19601         0xc77396b6,0xac66409a },
19602       { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
19603         0xcc122f85,0xce8e6975 } },
19604     /* 130 */
19605     { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
19606         0x250bb4a8,0x08fde365 },
19607       { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
19608         0x565d6cd7,0x2f7e2fd2 } },
19609     /* 131 */
19610     { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
19611         0x907702ae,0xc65be92e },
19612       { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
19613         0xd1193b3a,0x4bff8e47 } },
19614     /* 132 */
19615     { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
19616         0x5772967d,0x3e4e4ae6 },
19617       { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
19618         0x58ec6028,0x5388aefd } },
19619     /* 133 */
19620     { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
19621         0x4f75be0e,0x5cf908d1 },
19622       { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
19623         0x60f00ce2,0xa698ba40 } },
19624     /* 134 */
19625     { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
19626         0x7aebad8d,0xb142ef8a },
19627       { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
19628         0x58515075,0xd1896a96 } },
19629     /* 135 */
19630     { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
19631         0x7981da39,0x267b0e0b },
19632       { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
19633         0xa1119393,0xb54e287a } },
19634     /* 136 */
19635     { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
19636         0x5f87d4e6,0x84abb28b },
19637       { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
19638         0x17655640,0xe5436f67 } },
19639     /* 137 */
19640     { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
19641         0x5b9ce99e,0x0404f68b },
19642       { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
19643         0x0ac1c701,0x3a4263df } },
19644     /* 138 */
19645     { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
19646         0x905ea367,0x0ca8fd3f },
19647       { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
19648         0x4ddb0c33,0x96dca264 } },
19649     /* 139 */
19650     { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
19651         0x3aad59dc,0x4363e212 },
19652       { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
19653         0xd8bb98c4,0x840e115c } },
19654     /* 140 */
19655     { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
19656         0x30ded6d4,0x5e0d6abd },
19657       { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
19658         0x2945a25a,0x7dea48f4 } },
19659     /* 141 */
19660     { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
19661         0xebfd16d1,0xabc2a2be },
19662       { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
19663         0x6c7eefc1,0x4ea35394 } },
19664     /* 142 */
19665     { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
19666         0x1c94ffc3,0x3a76e689 },
19667       { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
19668         0x465e6464,0x8212a10a } },
19669     /* 143 */
19670     { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
19671         0x599cb164,0xaa7cab71 },
19672       { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
19673         0xfe0617c3,0x40e38073 } },
19674     /* 144 */
19675     { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
19676         0xb3055526,0xe3604700 },
19677       { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
19678         0xa3dee15f,0x6542d677 } },
19679     /* 145 */
19680     { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
19681         0x09bb6f21,0xa6534aee },
19682       { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
19683         0xdc9aef22,0xf3cb672f } },
19684     /* 146 */
19685     { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
19686         0xaae870e7,0x7cafaa2e },
19687       { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
19688         0xb9bd522e,0x0aab13c1 } },
19689     /* 147 */
19690     { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
19691         0x847012e9,0x4b91a602 },
19692       { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
19693         0x72321cab,0x49534c53 } },
19694     /* 148 */
19695     { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
19696         0xd65ac5ee,0xcaf46c4f },
19697       { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
19698         0x04c6770f,0x14ce9e57 } },
19699     /* 149 */
19700     { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
19701         0x3e4c9a71,0x1bb708a5 },
19702       { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
19703         0xda300102,0xf9d126f2 } },
19704     /* 150 */
19705     { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
19706         0x729ecc69,0x807afcb9 },
19707       { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
19708         0x6568cd8c,0x751adcd1 } },
19709     /* 151 */
19710     { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
19711         0x2537743f,0x29ec4468 },
19712       { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
19713         0x92a4077d,0xff9370e3 } },
19714     /* 152 */
19715     { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
19716         0xa2a9d01a,0x9776478b },
19717       { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
19718         0xac2f82fa,0x74a6313f } },
19719     /* 153 */
19720     { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
19721         0x0ff4863d,0xab75be15 },
19722       { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
19723         0x0b4459f6,0x4ebeac2e } },
19724     /* 154 */
19725     { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
19726         0x2c1baffc,0xdf99887b },
19727       { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
19728         0x779f4058,0x27b040a7 } },
19729     /* 155 */
19730     { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
19731         0xe4cfa3f5,0xb393dd37 },
19732       { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
19733         0xd0463419,0x09588c12 } },
19734     /* 156 */
19735     { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
19736         0xdb9f648b,0x81c879a9 },
19737       { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
19738         0x5fc11bc4,0xfa0d48f5 } },
19739     /* 157 */
19740     { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
19741         0xb6a367d6,0x8ea0e156 },
19742       { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
19743         0xfa00b5ac,0x3f5ab924 } },
19744     /* 158 */
19745     { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
19746         0x2b74256e,0x8bc76887 },
19747       { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
19748         0x60fcf34f,0xb386f190 } },
19749     /* 159 */
19750     { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
19751         0x1b069c4d,0x4cb460f7 },
19752       { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
19753         0x95ef5223,0x52c0d508 } },
19754     /* 160 */
19755     { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
19756         0x2bb09c0b,0x4ac3c938 },
19757       { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
19758         0xe39705f4,0x380d94c7 } },
19759     /* 161 */
19760     { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
19761         0xde2637af,0x2ce3e171 },
19762       { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
19763         0x0b624e4d,0x2e6cd852 } },
19764     /* 162 */
19765     { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
19766         0x42c69d54,0xca177547 },
19767       { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
19768         0x9cab2ce6,0xa976a713 } },
19769     /* 163 */
19770     { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
19771         0x0a1f4999,0x8720a717 },
19772       { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
19773         0xc769893c,0x9719ef29 } },
19774     /* 164 */
19775     { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
19776         0xe15704c1,0xa5072976 },
19777       { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
19778         0xf7b77725,0x99389c9d } },
19779     /* 165 */
19780     { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
19781         0x202c82e4,0xa88806aa },
19782       { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
19783         0x4738dcfe,0x0043bffb } },
19784     /* 166 */
19785     { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
19786         0xba6c4866,0x52f3ef01 },
19787       { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
19788         0x9ef27e75,0x3296bd89 } },
19789     /* 167 */
19790     { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
19791         0xaee571e9,0x3b90febf },
19792       { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
19793         0x9f810b18,0x6e88069d } },
19794     /* 168 */
19795     { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
19796         0xdefaad13,0xa7222bea },
19797       { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
19798         0xbc2ac690,0xbe94d523 } },
19799     /* 169 */
19800     { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
19801         0x9be8c766,0x7782defe },
19802       { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
19803         0xa2892e4b,0x03838567 } },
19804     /* 170 */
19805     { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
19806         0xadf7b420,0xdbd986c4 },
19807       { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
19808         0x6860bbd0,0x8e24d3c4 } },
19809     /* 171 */
19810     { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
19811         0x407bafc8,0x541a99c4 },
19812       { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
19813         0xf57d35d1,0xc0092c49 } },
19814     /* 172 */
19815     { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
19816         0x7286944d,0x75e40634 },
19817       { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
19818         0xc7848586,0x5b7cb658 } },
19819     /* 173 */
19820     { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
19821         0x8df097a1,0x7ae13eba },
19822       { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
19823         0xe2a8e3fd,0x787d8074 } },
19824     /* 174 */
19825     { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
19826         0x9ef28484,0x5c222819 },
19827       { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
19828         0xbaf0f2b0,0xe45d37ab } },
19829     /* 175 */
19830     { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
19831         0x84dfb9d3,0xed7bc122 },
19832       { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
19833         0x45ca6d27,0xaac97cc9 } },
19834     /* 176 */
19835     { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
19836         0x1163dc4e,0x318f97b3 },
19837       { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
19838         0x9a84ff4d,0xfa41faa1 } },
19839     /* 177 */
19840     { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
19841         0x1d26e9e2,0x38bb6b2c },
19842       { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
19843         0xce7601a5,0x94dd0905 } },
19844     /* 178 */
19845     { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
19846         0xd25c2ae9,0x92077867 },
19847       { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
19848         0xd29beb51,0x81e8428b } },
19849     /* 179 */
19850     { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
19851         0xdbbfa4b1,0x1b94ab62 },
19852       { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
19853         0x055590ee,0x06a38e28 } },
19854     /* 180 */
19855     { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
19856         0x83d9d4f8,0xa7b36c20 },
19857       { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
19858         0xa2822a20,0xbe54c6b4 } },
19859     /* 181 */
19860     { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
19861         0xeae022bb,0xbf30a5ab },
19862       { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
19863         0x2732d13a,0xd1c820de } },
19864     /* 182 */
19865     { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
19866         0x68a18da3,0xb7d17bed },
19867       { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
19868         0x6412cc64,0x3997fd5e } },
19869     /* 183 */
19870     { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
19871         0x3c6c13e8,0x0eeb8929 },
19872       { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
19873         0xc922b6ef,0x228916f8 } },
19874     /* 184 */
19875     { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
19876         0x6e93097e,0xec05ad1d },
19877       { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
19878         0x7ff11b37,0x7d314156 } },
19879     /* 185 */
19880     { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
19881         0x9bc1d7a3,0xe9ce66fc },
19882       { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
19883         0x72280651,0xd9650b01 } },
19884     /* 186 */
19885     { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
19886         0x804eb7a2,0x14d6699a },
19887       { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
19888         0x0d43598a,0x6f4c6841 } },
19889     /* 187 */
19890     { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
19891         0x61189abb,0x4c4350fd },
19892       { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
19893         0x5a3118b5,0xa726d242 } },
19894     /* 188 */
19895     { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
19896         0xcc6cf392,0x13639e82 },
19897       { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
19898         0xc1a335a3,0xca9365e1 } },
19899     /* 189 */
19900     { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
19901         0x970b72a5,0x9ce29c34 },
19902       { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
19903         0xab42af98,0x48c4abd7 } },
19904     /* 190 */
19905     { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
19906         0xf67b33cb,0x78017c32 },
19907       { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
19908         0xde5c1c04,0x53cd0454 } },
19909     /* 191 */
19910     { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
19911         0xd3d7fa8f,0xeea465c1 },
19912       { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
19913         0x7ae69193,0x1b6e42a4 } },
19914     /* 192 */
19915     { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
19916         0x187fbd3d,0x0224da14 },
19917       { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
19918         0x42bfff33,0x60838ef0 } },
19919     /* 193 */
19920     { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
19921         0x2d331643,0x636eb202 },
19922       { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
19923         0x39218bac,0x8844eeb6 } },
19924     /* 194 */
19925     { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
19926         0x51fb789e,0x27ba83dc },
19927       { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
19928         0x87f3a4ab,0xadb62d34 } },
19929     /* 195 */
19930     { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
19931         0x75e7c8b2,0xb990fd76 },
19932       { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
19933         0x4d10d18d,0x81707ef9 } },
19934     /* 196 */
19935     { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
19936         0xd5a8aa5c,0x3792daea },
19937       { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
19938         0x94b001ba,0x5abd635e } },
19939     /* 197 */
19940     { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
19941         0x846ab610,0x5995bf21 },
19942       { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
19943         0xd483411e,0x44c32ca2 } },
19944     /* 198 */
19945     { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
19946         0x8082a54c,0x1f2162fb },
19947       { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
19948         0xc3e907c9,0x8f1d402b } },
19949     /* 199 */
19950     { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
19951         0x926edbf9,0xb1980f43 },
19952       { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
19953         0x37448e45,0x2828ad9b } },
19954     /* 200 */
19955     { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
19956         0x5a14b390,0x4973f127 },
19957       { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
19958         0xdb168ac7,0x6dac8ed0 } },
19959     /* 201 */
19960     { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
19961         0x20b9de4c,0x4b23ef59 },
19962       { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
19963         0xddf49a4e,0x4dd71534 } },
19964     /* 202 */
19965     { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
19966         0x2f4a4dbb,0xfd317000 },
19967       { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
19968         0x9569f365,0x14fac58c } },
19969     /* 203 */
19970     { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
19971         0x36abda50,0xed7c7651 },
19972       { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
19973         0x4d2e9f53,0xfefcb7f7 } },
19974     /* 204 */
19975     { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
19976         0x87e0d80b,0x1801a57e },
19977       { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
19978         0x1ead1064,0x9f8fc11e } },
19979     /* 205 */
19980     { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
19981         0x3d3a69a9,0xa9d3809d },
19982       { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
19983         0xe1178ef7,0x3006b9ae } },
19984     /* 206 */
19985     { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
19986         0x45f8f761,0x0ab85fd7 },
19987       { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
19988         0x11e942c2,0xb122d675 } },
19989     /* 207 */
19990     { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
19991         0x097dbaec,0x9f599dc1 },
19992       { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
19993         0x8a294b78,0x7d5528e0 } },
19994     /* 208 */
19995     { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
19996         0x303f1730,0x28ccea01 },
19997       { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
19998         0xa1d013bf,0xc18baf48 } },
19999     /* 209 */
20000     { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
20001         0xb7a9596b,0x9def809d },
20002       { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
20003         0x68808ce5,0x0357f8b0 } },
20004     /* 210 */
20005     { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
20006         0x1b489887,0xe4a01add },
20007       { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
20008         0xce10cc30,0x466d7d79 } },
20009     /* 211 */
20010     { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
20011         0x451ead1a,0xc672a522 },
20012       { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
20013         0xf2a67513,0x5e3d64fa } },
20014     /* 212 */
20015     { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
20016         0xeb8e42fc,0x6c8a7a95 },
20017       { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
20018         0xad82ca91,0x348ae422 } },
20019     /* 213 */
20020     { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
20021         0xd9ef2d2e,0xc1074de0 },
20022       { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
20023         0xc9e54ffc,0xfbadfbdb } },
20024     /* 214 */
20025     { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
20026         0x83716fcd,0xb7f976b4 },
20027       { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
20028         0xcafcc805,0xf4d41b2e } },
20029     /* 215 */
20030     { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
20031         0xe0160f10,0x180824ea },
20032       { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
20033         0x83cf6d25,0x67e5f639 } },
20034     /* 216 */
20035     { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
20036         0x04c11fc6,0x9fef789a },
20037       { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
20038         0xa99c4e20,0xbc80c181 } },
20039     /* 217 */
20040     { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
20041         0x9f8cdf10,0x49270e62 },
20042       { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
20043         0x61372f7f,0xd2ee52f9 } },
20044     /* 218 */
20045     { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
20046         0xe5abb733,0xdfb478be },
20047       { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
20048         0x08df473a,0xd9a140b4 } },
20049     /* 219 */
20050     { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
20051         0x623f4b1a,0x760c058d },
20052       { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
20053         0x8f190409,0x7141982d } },
20054     /* 220 */
20055     { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
20056         0x89d54e47,0x3af9d1ce },
20057       { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
20058         0x73957dd6,0xb1f815c3 } },
20059     /* 221 */
20060     { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
20061         0x1543f052,0xa41aed14 },
20062       { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
20063         0x86fb60ef,0xd6e9c1dd } },
20064     /* 222 */
20065     { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
20066         0xae9bf8c2,0x9c9c6e10 },
20067       { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
20068         0x40fa61b6,0x566bd596 } },
20069     /* 223 */
20070     { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
20071         0xf525345e,0xcf2c7390 },
20072       { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
20073         0x8aa20979,0x02f51755 } },
20074     /* 224 */
20075     { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
20076         0xe8d4d97d,0x14e9ada5 },
20077       { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
20078         0x8e9d9ae8,0xa0ad4fab } },
20079     /* 225 */
20080     { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
20081         0x6e56ed1e,0xbcd530b8 },
20082       { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
20083         0x6979341d,0x909283cf } },
20084     /* 226 */
20085     { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
20086         0xace1549a,0x35eeb7c9 },
20087       { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
20088         0x448ae864,0x9a8b2cf4 } },
20089     /* 227 */
20090     { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
20091         0xd4491379,0x6bdb60f4 },
20092       { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
20093         0x94ba08a9,0x01ec3cfd } },
20094     /* 228 */
20095     { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
20096         0x475464f6,0xd1acb1c0 },
20097       { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
20098         0x405626c2,0x7dcd079d } },
20099     /* 229 */
20100     { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
20101         0x377d19b8,0x0bf53589 },
20102       { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
20103         0xe16686fc,0xd28be4d9 } },
20104     /* 230 */
20105     { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
20106         0x510f88ce,0xd76007aa },
20107       { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
20108         0xb303bb01,0xf2b52f68 } },
20109     /* 231 */
20110     { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
20111         0xcc5aed3a,0xd8dbe98e },
20112       { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
20113         0xee559705,0xe01593a3 } },
20114     /* 232 */
20115     { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
20116         0xaeb8ef06,0xafec07b1 },
20117       { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
20118         0x6e2dbfdd,0xa71b9354 } },
20119     /* 233 */
20120     { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
20121         0x628523d9,0x53a2005c },
20122       { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
20123         0x3d588e3d,0xbf47d19b } },
20124     /* 234 */
20125     { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
20126         0x39c9a1b6,0x001c2c7f },
20127       { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
20128         0x86ffb99b,0xfdadf8e7 } },
20129     /* 235 */
20130     { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
20131         0x5aa43c94,0x3a838e4d },
20132       { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
20133         0x873e1da3,0x3cdb8257 } },
20134     /* 236 */
20135     { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
20136         0xf1f57fba,0x5a60cc89 },
20137       { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
20138         0xdbfd8fc0,0x922ff56f } },
20139     /* 237 */
20140     { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
20141         0xf6c5cd62,0x72919a7d },
20142       { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
20143         0x3624089a,0x5e791780 } },
20144     /* 238 */
20145     { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
20146         0xe24c2fab,0x4e0a5371 },
20147       { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
20148         0xd56604ee,0xf5ff7818 } },
20149     /* 239 */
20150     { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
20151         0x533f5e64,0xe41df0e9 },
20152       { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
20153         0xac4f155f,0x8edd7d6e } },
20154     /* 240 */
20155     { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
20156         0xed8aee96,0x1432c1ca },
20157       { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
20158         0x5ac8d2c6,0xcaef480b } },
20159     /* 241 */
20160     { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
20161         0x8efae236,0xd0ba177e },
20162       { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
20163         0x1c54ae16,0xf31c957c } },
20164     /* 242 */
20165     { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
20166         0x96e17c3a,0x013404cb },
20167       { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
20168         0x91933e6c,0x6f377c4b } },
20169     /* 243 */
20170     { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
20171         0xd2d09506,0x6dba3e4e },
20172       { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
20173         0x3becf4a7,0xf13cf342 } },
20174     /* 244 */
20175     { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
20176         0x274bbad3,0xc83fa9a9 },
20177       { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
20178         0x5d702683,0xb49d70f4 } },
20179     /* 245 */
20180     { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
20181         0x0c30f1cf,0x59cfadbb },
20182       { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
20183         0x354a4b67,0x5babf362 } },
20184     /* 246 */
20185     { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
20186         0x9026c8f0,0x6188c6a7 },
20187       { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
20188         0xdf50b9d9,0x993fe475 } },
20189     /* 247 */
20190     { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
20191         0x4c80616b,0x81f76466 },
20192       { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
20193         0x5fe9060d,0x564a812a } },
20194     /* 248 */
20195     { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
20196         0x00e51d6c,0x226bf3cf },
20197       { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
20198         0xff257836,0x68779f47 } },
20199     /* 249 */
20200     { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
20201         0xeb092e0b,0x97bcb0d1 },
20202       { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
20203         0x0a784655,0xa872ffe8 } },
20204     /* 250 */
20205     { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
20206         0xb732a36a,0x02812bfc },
20207       { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
20208         0xfe5396af,0x07391cc9 } },
20209     /* 251 */
20210     { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
20211         0x7e6d2a08,0x355d2adc },
20212       { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
20213         0x7c2a3a79,0x3dc2b1e3 } },
20214     /* 252 */
20215     { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
20216         0x3ccd846b,0xc4786910 },
20217       { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
20218         0xd5bb4d32,0xccc42968 } },
20219     /* 253 */
20220     { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
20221         0xaa4871cf,0xe147eb42 },
20222       { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
20223         0x080e96e3,0x239ac047 } },
20224     /* 254 */
20225     { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
20226         0xf5f7e59d,0xc55fa1a3 },
20227       { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
20228         0xd4f4b699,0x094cd99c } },
20229     /* 255 */
20230     { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
20231         0x42abad33,0xb90a30b6 },
20232       { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
20233         0x1b7924f7,0x019f8b9a } },
20234 };
20235 
20236 /* Multiply the base point of P256 by the scalar and return the result.
20237  * If map is true then convert result to affine coordinates.
20238  *
20239  * r     Resulting point.
20240  * k     Scalar to multiply by.
20241  * map   Indicates whether to convert result to affine.
20242  * heap  Heap to use for allocation.
20243  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20244  */
20245 static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
20246         int map, void* heap)
20247 {
20248     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
20249                                       k, map, heap);
20250 }
20251 
20252 #endif
20253 
20254 /* Multiply the base point of P256 by the scalar and return the result.
20255  * If map is true then convert result to affine coordinates.
20256  *
20257  * km    Scalar to multiply by.
20258  * r     Resulting point.
20259  * map   Indicates whether to convert result to affine.
20260  * heap  Heap to use for allocation.
20261  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20262  */
20263 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
20264 {
20265 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
20266     sp_point_256 p;
20267     sp_digit kd[8];
20268 #endif
20269     sp_point_256* point;
20270     sp_digit* k = NULL;
20271     int err = MP_OKAY;
20272 
20273     err = sp_256_point_new_8(heap, p, point);
20274 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
20275     if (err == MP_OKAY) {
20276         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
20277                                                               DYNAMIC_TYPE_ECC);
20278         if (k == NULL) {
20279             err = MEMORY_E;
20280         }
20281     }
20282 #else
20283     k = kd;
20284 #endif
20285     if (err == MP_OKAY) {
20286         sp_256_from_mp(k, 8, km);
20287 
20288             err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
20289     }
20290     if (err == MP_OKAY) {
20291         err = sp_256_point_to_ecc_point_8(point, r);
20292     }
20293 
20294 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
20295     if (k != NULL) {
20296         XFREE(k, heap, DYNAMIC_TYPE_ECC);
20297     }
20298 #endif
20299     sp_256_point_free_8(point, 0, heap);
20300 
20301     return err;
20302 }
20303 
20304 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
20305                                                         defined(HAVE_ECC_VERIFY)
20306 /* Returns 1 if the number of zero.
20307  * Implementation is constant time.
20308  *
20309  * a  Number to check.
20310  * returns 1 if the number is zero and 0 otherwise.
20311  */
20312 static int sp_256_iszero_8(const sp_digit* a)
20313 {
20314     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
20315 }
20316 
20317 #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
20318 /* Add 1 to a. (a = a + 1)
20319  *
20320  * a  A single precision integer.
20321  */
20322 SP_NOINLINE static void sp_256_add_one_8(sp_digit* a)
20323 {
20324     __asm__ __volatile__ (
20325         "mov    r2, #1\n\t"
20326         "ldr    r1, [%[a], #0]\n\t"
20327         "add    r1, r2\n\t"
20328         "mov    r2, #0\n\t"
20329         "str    r1, [%[a], #0]\n\t"
20330         "ldr    r1, [%[a], #4]\n\t"
20331         "adc    r1, r2\n\t"
20332         "str    r1, [%[a], #4]\n\t"
20333         "ldr    r1, [%[a], #8]\n\t"
20334         "adc    r1, r2\n\t"
20335         "str    r1, [%[a], #8]\n\t"
20336         "ldr    r1, [%[a], #12]\n\t"
20337         "adc    r1, r2\n\t"
20338         "str    r1, [%[a], #12]\n\t"
20339         "ldr    r1, [%[a], #16]\n\t"
20340         "adc    r1, r2\n\t"
20341         "str    r1, [%[a], #16]\n\t"
20342         "ldr    r1, [%[a], #20]\n\t"
20343         "adc    r1, r2\n\t"
20344         "str    r1, [%[a], #20]\n\t"
20345         "ldr    r1, [%[a], #24]\n\t"
20346         "adc    r1, r2\n\t"
20347         "str    r1, [%[a], #24]\n\t"
20348         "ldr    r1, [%[a], #28]\n\t"
20349         "adc    r1, r2\n\t"
20350         "str    r1, [%[a], #28]\n\t"
20351         :
20352         : [a] "r" (a)
20353         : "memory", "r1", "r2"
20354     );
20355 }
20356 
20357 /* Read big endian unsigned byte array into r.
20358  *
20359  * r  A single precision integer.
20360  * size  Maximum number of bytes to convert
20361  * a  Byte array.
20362  * n  Number of bytes in array to read.
20363  */
20364 static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
20365 {
20366     int i, j = 0;
20367     word32 s = 0;
20368 
20369     r[0] = 0;
20370     for (i = n-1; i >= 0; i--) {
20371         r[j] |= (((sp_digit)a[i]) << s);
20372         if (s >= 24U) {
20373             r[j] &= 0xffffffff;
20374             s = 32U - s;
20375             if (j + 1 >= size) {
20376                 break;
20377             }
20378             r[++j] = (sp_digit)a[i] >> s;
20379             s = 8U - s;
20380         }
20381         else {
20382             s += 8U;
20383         }
20384     }
20385 
20386     for (j++; j < size; j++) {
20387         r[j] = 0;
20388     }
20389 }
20390 
20391 /* Generates a scalar that is in the range 1..order-1.
20392  *
20393  * rng  Random number generator.
20394  * k    Scalar value.
20395  * returns RNG failures, MEMORY_E when memory allocation fails and
20396  * MP_OKAY on success.
20397  */
20398 static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
20399 {
20400     int err;
20401     byte buf[32];
20402 
20403     do {
20404         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
20405         if (err == 0) {
20406             sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
20407             if (sp_256_cmp_8(k, p256_order2) < 0) {
20408                 sp_256_add_one_8(k);
20409                 break;
20410             }
20411         }
20412     }
20413     while (err == 0);
20414 
20415     return err;
20416 }
20417 
20418 /* Makes a random EC key pair.
20419  *
20420  * rng   Random number generator.
20421  * priv  Generated private value.
20422  * pub   Generated public point.
20423  * heap  Heap to use for allocation.
20424  * returns ECC_INF_E when the point does not have the correct order, RNG
20425  * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
20426  */
20427 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
20428 {
20429 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
20430     sp_point_256 p;
20431     sp_digit kd[8];
20432 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
20433     sp_point_256 inf;
20434 #endif
20435 #endif
20436     sp_point_256* point;
20437     sp_digit* k = NULL;
20438 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
20439     sp_point_256* infinity;
20440 #endif
20441     int err;
20442 
20443     (void)heap;
20444 
20445     err = sp_256_point_new_8(heap, p, point);
20446 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
20447     if (err == MP_OKAY) {
20448         err = sp_256_point_new_8(heap, inf, infinity);
20449     }
20450 #endif
20451 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
20452     if (err == MP_OKAY) {
20453         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
20454                                                               DYNAMIC_TYPE_ECC);
20455         if (k == NULL) {
20456             err = MEMORY_E;
20457         }
20458     }
20459 #else
20460     k = kd;
20461 #endif
20462 
20463     if (err == MP_OKAY) {
20464         err = sp_256_ecc_gen_k_8(rng, k);
20465     }
20466     if (err == MP_OKAY) {
20467             err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
20468     }
20469 
20470 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
20471     if (err == MP_OKAY) {
20472             err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
20473     }
20474     if (err == MP_OKAY) {
20475         if ((sp_256_iszero_8(point->x) == 0) || (sp_256_iszero_8(point->y) == 0)) {
20476             err = ECC_INF_E;
20477         }
20478     }
20479 #endif
20480 
20481     if (err == MP_OKAY) {
20482         err = sp_256_to_mp(k, priv);
20483     }
20484     if (err == MP_OKAY) {
20485         err = sp_256_point_to_ecc_point_8(point, pub);
20486     }
20487 
20488 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
20489     if (k != NULL) {
20490         XFREE(k, heap, DYNAMIC_TYPE_ECC);
20491     }
20492 #endif
20493 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
20494     sp_256_point_free_8(infinity, 1, heap);
20495 #endif
20496     sp_256_point_free_8(point, 1, heap);
20497 
20498     return err;
20499 }
20500 
20501 #ifdef HAVE_ECC_DHE
20502 /* Write r as big endian to byte array.
20503  * Fixed length number of bytes written: 32
20504  *
20505  * r  A single precision integer.
20506  * a  Byte array.
20507  */
20508 static void sp_256_to_bin(sp_digit* r, byte* a)
20509 {
20510     int i, j, s = 0, b;
20511 
20512     j = 256 / 8 - 1;
20513     a[j] = 0;
20514     for (i=0; i<8 && j>=0; i++) {
20515         b = 0;
20516         /* lint allow cast of mismatch sp_digit and int */
20517         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
20518         b += 8 - s;
20519         if (j < 0) {
20520             break;
20521         }
20522         while (b < 32) {
20523             a[j--] = (byte)(r[i] >> b);
20524             b += 8;
20525             if (j < 0) {
20526                 break;
20527             }
20528         }
20529         s = 8 - (b - 32);
20530         if (j >= 0) {
20531             a[j] = 0;
20532         }
20533         if (s != 0) {
20534             j++;
20535         }
20536     }
20537 }
20538 
20539 /* Multiply the point by the scalar and serialize the X ordinate.
20540  * The number is 0 padded to maximum size on output.
20541  *
20542  * priv    Scalar to multiply the point by.
20543  * pub     Point to multiply.
20544  * out     Buffer to hold X ordinate.
20545  * outLen  On entry, size of the buffer in bytes.
20546  *         On exit, length of data in buffer in bytes.
20547  * heap    Heap to use for allocation.
20548  * returns BUFFER_E if the buffer is to small for output size,
20549  * MEMORY_E when memory allocation fails and MP_OKAY on success.
20550  */
20551 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
20552                           word32* outLen, void* heap)
20553 {
20554 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
20555     sp_point_256 p;
20556     sp_digit kd[8];
20557 #endif
20558     sp_point_256* point = NULL;
20559     sp_digit* k = NULL;
20560     int err = MP_OKAY;
20561 
20562     if (*outLen < 32U) {
20563         err = BUFFER_E;
20564     }
20565 
20566     if (err == MP_OKAY) {
20567         err = sp_256_point_new_8(heap, p, point);
20568     }
20569 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
20570     if (err == MP_OKAY) {
20571         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
20572                                                               DYNAMIC_TYPE_ECC);
20573         if (k == NULL)
20574             err = MEMORY_E;
20575     }
20576 #else
20577     k = kd;
20578 #endif
20579 
20580     if (err == MP_OKAY) {
20581         sp_256_from_mp(k, 8, priv);
20582         sp_256_point_from_ecc_point_8(point, pub);
20583             err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
20584     }
20585     if (err == MP_OKAY) {
20586         sp_256_to_bin(point->x, out);
20587         *outLen = 32;
20588     }
20589 
20590 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
20591     if (k != NULL) {
20592         XFREE(k, heap, DYNAMIC_TYPE_ECC);
20593     }
20594 #endif
20595     sp_256_point_free_8(point, 0, heap);
20596 
20597     return err;
20598 }
20599 #endif /* HAVE_ECC_DHE */
20600 
20601 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
20602 #endif
20603 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
20604 #ifdef WOLFSSL_SP_SMALL
20605 /* Sub b from a into a. (a -= b)
20606  *
20607  * a  A single precision integer.
20608  * b  A single precision integer.
20609  */
20610 SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
20611         const sp_digit* b)
20612 {
20613     sp_digit c = 0;
20614     __asm__ __volatile__ (
20615         "mov    r7, %[a]\n\t"
20616         "add    r7, #32\n\t"
20617         "\n1:\n\t"
20618         "mov    r5, #0\n\t"
20619         "sub    r5, %[c]\n\t"
20620         "ldr    r3, [%[a]]\n\t"
20621         "ldr    r4, [%[a], #4]\n\t"
20622         "ldr    r5, [%[b]]\n\t"
20623         "ldr    r6, [%[b], #4]\n\t"
20624         "sbc    r3, r5\n\t"
20625         "sbc    r4, r6\n\t"
20626         "str    r3, [%[a]]\n\t"
20627         "str    r4, [%[a], #4]\n\t"
20628         "sbc    %[c], %[c]\n\t"
20629         "add    %[a], #8\n\t"
20630         "add    %[b], #8\n\t"
20631         "cmp    %[a], r7\n\t"
20632         "bne    1b\n\t"
20633         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
20634         :
20635         : "memory", "r3", "r4", "r5", "r6", "r7"
20636     );
20637 
20638     return c;
20639 }
20640 
20641 #else
20642 /* Sub b from a into r. (r = a - b)
20643  *
20644  * r  A single precision integer.
20645  * a  A single precision integer.
20646  * b  A single precision integer.
20647  */
20648 SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
20649         const sp_digit* b)
20650 {
20651     sp_digit c = 0;
20652 
20653     __asm__ __volatile__ (
20654         "ldr    r3, [%[a], #0]\n\t"
20655         "ldr    r4, [%[a], #4]\n\t"
20656         "ldr    r5, [%[b], #0]\n\t"
20657         "ldr    r6, [%[b], #4]\n\t"
20658         "sub    r3, r5\n\t"
20659         "sbc    r4, r6\n\t"
20660         "str    r3, [%[a], #0]\n\t"
20661         "str    r4, [%[a], #4]\n\t"
20662         "ldr    r3, [%[a], #8]\n\t"
20663         "ldr    r4, [%[a], #12]\n\t"
20664         "ldr    r5, [%[b], #8]\n\t"
20665         "ldr    r6, [%[b], #12]\n\t"
20666         "sbc    r3, r5\n\t"
20667         "sbc    r4, r6\n\t"
20668         "str    r3, [%[a], #8]\n\t"
20669         "str    r4, [%[a], #12]\n\t"
20670         "ldr    r3, [%[a], #16]\n\t"
20671         "ldr    r4, [%[a], #20]\n\t"
20672         "ldr    r5, [%[b], #16]\n\t"
20673         "ldr    r6, [%[b], #20]\n\t"
20674         "sbc    r3, r5\n\t"
20675         "sbc    r4, r6\n\t"
20676         "str    r3, [%[a], #16]\n\t"
20677         "str    r4, [%[a], #20]\n\t"
20678         "ldr    r3, [%[a], #24]\n\t"
20679         "ldr    r4, [%[a], #28]\n\t"
20680         "ldr    r5, [%[b], #24]\n\t"
20681         "ldr    r6, [%[b], #28]\n\t"
20682         "sbc    r3, r5\n\t"
20683         "sbc    r4, r6\n\t"
20684         "str    r3, [%[a], #24]\n\t"
20685         "str    r4, [%[a], #28]\n\t"
20686         "sbc    %[c], %[c]\n\t"
20687         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
20688         :
20689         : "memory", "r3", "r4", "r5", "r6"
20690     );
20691 
20692     return c;
20693 }
20694 
20695 #endif /* WOLFSSL_SP_SMALL */
20696 /* Mul a by digit b into r. (r = a * b)
20697  *
20698  * r  A single precision integer.
20699  * a  A single precision integer.
20700  * b  A single precision digit.
20701  */
20702 SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
20703         sp_digit b)
20704 {
20705     __asm__ __volatile__ (
20706         "mov    r6, #32\n\t"
20707         "add    r6, %[a]\n\t"
20708         "mov    r8, %[r]\n\t"
20709         "mov    r9, r6\n\t"
20710         "mov    r3, #0\n\t"
20711         "mov    r4, #0\n\t"
20712         "1:\n\t"
20713         "mov    %[r], #0\n\t"
20714         "mov    r5, #0\n\t"
20715         "# A[] * B\n\t"
20716         "ldr    r6, [%[a]]\n\t"
20717         "lsl    r6, r6, #16\n\t"
20718         "lsl    r7, %[b], #16\n\t"
20719         "lsr    r6, r6, #16\n\t"
20720         "lsr    r7, r7, #16\n\t"
20721         "mul    r7, r6\n\t"
20722         "add    r3, r7\n\t"
20723         "adc    r4, %[r]\n\t"
20724         "adc    r5, %[r]\n\t"
20725         "lsr    r7, %[b], #16\n\t"
20726         "mul    r6, r7\n\t"
20727         "lsr    r7, r6, #16\n\t"
20728         "lsl    r6, r6, #16\n\t"
20729         "add    r3, r6\n\t"
20730         "adc    r4, r7\n\t"
20731         "adc    r5, %[r]\n\t"
20732         "ldr    r6, [%[a]]\n\t"
20733         "lsr    r6, r6, #16\n\t"
20734         "lsr    r7, %[b], #16\n\t"
20735         "mul    r7, r6\n\t"
20736         "add    r4, r7\n\t"
20737         "adc    r5, %[r]\n\t"
20738         "lsl    r7, %[b], #16\n\t"
20739         "lsr    r7, r7, #16\n\t"
20740         "mul    r6, r7\n\t"
20741         "lsr    r7, r6, #16\n\t"
20742         "lsl    r6, r6, #16\n\t"
20743         "add    r3, r6\n\t"
20744         "adc    r4, r7\n\t"
20745         "adc    r5, %[r]\n\t"
20746         "# A[] * B - Done\n\t"
20747         "mov    %[r], r8\n\t"
20748         "str    r3, [%[r]]\n\t"
20749         "mov    r3, r4\n\t"
20750         "mov    r4, r5\n\t"
20751         "add    %[r], #4\n\t"
20752         "add    %[a], #4\n\t"
20753         "mov    r8, %[r]\n\t"
20754         "cmp    %[a], r9\n\t"
20755         "blt    1b\n\t"
20756         "str    r3, [%[r]]\n\t"
20757         : [r] "+r" (r), [a] "+r" (a)
20758         : [b] "r" (b)
20759         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
20760     );
20761 }
20762 
20763 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
20764  *
20765  * d1   The high order half of the number to divide.
20766  * d0   The low order half of the number to divide.
20767  * div  The dividend.
20768  * returns the result of the division.
20769  *
20770  * Note that this is an approximate div. It may give an answer 1 larger.
20771  */
20772 SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0,
20773         sp_digit div)
20774 {
20775     sp_digit r = 0;
20776 
20777     __asm__ __volatile__ (
20778         "lsr    r5, %[div], #1\n\t"
20779         "add    r5, #1\n\t"
20780         "mov    r8, %[d0]\n\t"
20781         "mov    r9, %[d1]\n\t"
20782         "# Do top 32\n\t"
20783         "mov    r6, r5\n\t"
20784         "sub    r6, %[d1]\n\t"
20785         "sbc    r6, r6\n\t"
20786         "add    %[r], %[r]\n\t"
20787         "sub    %[r], r6\n\t"
20788         "and    r6, r5\n\t"
20789         "sub    %[d1], r6\n\t"
20790         "# Next 30 bits\n\t"
20791         "mov    r4, #29\n\t"
20792         "1:\n\t"
20793         "lsl    %[d0], %[d0], #1\n\t"
20794         "adc    %[d1], %[d1]\n\t"
20795         "mov    r6, r5\n\t"
20796         "sub    r6, %[d1]\n\t"
20797         "sbc    r6, r6\n\t"
20798         "add    %[r], %[r]\n\t"
20799         "sub    %[r], r6\n\t"
20800         "and    r6, r5\n\t"
20801         "sub    %[d1], r6\n\t"
20802         "sub    r4, #1\n\t"
20803         "bpl    1b\n\t"
20804         "mov    r7, #0\n\t"
20805         "add    %[r], %[r]\n\t"
20806         "add    %[r], #1\n\t"
20807         "# r * div - Start\n\t"
20808         "lsl    %[d1], %[r], #16\n\t"
20809         "lsl    r4, %[div], #16\n\t"
20810         "lsr    %[d1], %[d1], #16\n\t"
20811         "lsr    r4, r4, #16\n\t"
20812         "mul    r4, %[d1]\n\t"
20813         "lsr    r6, %[div], #16\n\t"
20814         "mul    %[d1], r6\n\t"
20815         "lsr    r5, %[d1], #16\n\t"
20816         "lsl    %[d1], %[d1], #16\n\t"
20817         "add    r4, %[d1]\n\t"
20818         "adc    r5, r7\n\t"
20819         "lsr    %[d1], %[r], #16\n\t"
20820         "mul    r6, %[d1]\n\t"
20821         "add    r5, r6\n\t"
20822         "lsl    r6, %[div], #16\n\t"
20823         "lsr    r6, r6, #16\n\t"
20824         "mul    %[d1], r6\n\t"
20825         "lsr    r6, %[d1], #16\n\t"
20826         "lsl    %[d1], %[d1], #16\n\t"
20827         "add    r4, %[d1]\n\t"
20828         "adc    r5, r6\n\t"
20829         "# r * div - Done\n\t"
20830         "mov    %[d1], r8\n\t"
20831         "sub    %[d1], r4\n\t"
20832         "mov    r4, %[d1]\n\t"
20833         "mov    %[d1], r9\n\t"
20834         "sbc    %[d1], r5\n\t"
20835         "mov    r5, %[d1]\n\t"
20836         "add    %[r], r5\n\t"
20837         "# r * div - Start\n\t"
20838         "lsl    %[d1], %[r], #16\n\t"
20839         "lsl    r4, %[div], #16\n\t"
20840         "lsr    %[d1], %[d1], #16\n\t"
20841         "lsr    r4, r4, #16\n\t"
20842         "mul    r4, %[d1]\n\t"
20843         "lsr    r6, %[div], #16\n\t"
20844         "mul    %[d1], r6\n\t"
20845         "lsr    r5, %[d1], #16\n\t"
20846         "lsl    %[d1], %[d1], #16\n\t"
20847         "add    r4, %[d1]\n\t"
20848         "adc    r5, r7\n\t"
20849         "lsr    %[d1], %[r], #16\n\t"
20850         "mul    r6, %[d1]\n\t"
20851         "add    r5, r6\n\t"
20852         "lsl    r6, %[div], #16\n\t"
20853         "lsr    r6, r6, #16\n\t"
20854         "mul    %[d1], r6\n\t"
20855         "lsr    r6, %[d1], #16\n\t"
20856         "lsl    %[d1], %[d1], #16\n\t"
20857         "add    r4, %[d1]\n\t"
20858         "adc    r5, r6\n\t"
20859         "# r * div - Done\n\t"
20860         "mov    %[d1], r8\n\t"
20861         "mov    r6, r9\n\t"
20862         "sub    r4, %[d1], r4\n\t"
20863         "sbc    r6, r5\n\t"
20864         "mov    r5, r6\n\t"
20865         "add    %[r], r5\n\t"
20866         "# r * div - Start\n\t"
20867         "lsl    %[d1], %[r], #16\n\t"
20868         "lsl    r4, %[div], #16\n\t"
20869         "lsr    %[d1], %[d1], #16\n\t"
20870         "lsr    r4, r4, #16\n\t"
20871         "mul    r4, %[d1]\n\t"
20872         "lsr    r6, %[div], #16\n\t"
20873         "mul    %[d1], r6\n\t"
20874         "lsr    r5, %[d1], #16\n\t"
20875         "lsl    %[d1], %[d1], #16\n\t"
20876         "add    r4, %[d1]\n\t"
20877         "adc    r5, r7\n\t"
20878         "lsr    %[d1], %[r], #16\n\t"
20879         "mul    r6, %[d1]\n\t"
20880         "add    r5, r6\n\t"
20881         "lsl    r6, %[div], #16\n\t"
20882         "lsr    r6, r6, #16\n\t"
20883         "mul    %[d1], r6\n\t"
20884         "lsr    r6, %[d1], #16\n\t"
20885         "lsl    %[d1], %[d1], #16\n\t"
20886         "add    r4, %[d1]\n\t"
20887         "adc    r5, r6\n\t"
20888         "# r * div - Done\n\t"
20889         "mov    %[d1], r8\n\t"
20890         "mov    r6, r9\n\t"
20891         "sub    r4, %[d1], r4\n\t"
20892         "sbc    r6, r5\n\t"
20893         "mov    r5, r6\n\t"
20894         "add    %[r], r5\n\t"
20895         "mov    r6, %[div]\n\t"
20896         "sub    r6, r4\n\t"
20897         "sbc    r6, r6\n\t"
20898         "sub    %[r], r6\n\t"
20899         : [r] "+r" (r)
20900         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
20901         : "r4", "r5", "r7", "r6", "r8", "r9"
20902     );
20903     return r;
20904 }
20905 
20906 /* AND m into each word of a and store in r.
20907  *
20908  * r  A single precision integer.
20909  * a  A single precision integer.
20910  * m  Mask to AND against each digit.
20911  */
20912 static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
20913 {
20914 #ifdef WOLFSSL_SP_SMALL
20915     int i;
20916 
20917     for (i=0; i<8; i++) {
20918         r[i] = a[i] & m;
20919     }
20920 #else
20921     r[0] = a[0] & m;
20922     r[1] = a[1] & m;
20923     r[2] = a[2] & m;
20924     r[3] = a[3] & m;
20925     r[4] = a[4] & m;
20926     r[5] = a[5] & m;
20927     r[6] = a[6] & m;
20928     r[7] = a[7] & m;
20929 #endif
20930 }
20931 
20932 /* Divide d in a and put remainder into r (m*d + r = a)
20933  * m is not calculated as it is not needed at this time.
20934  *
20935  * a  Nmber to be divided.
20936  * d  Number to divide with.
20937  * m  Multiplier result.
20938  * r  Remainder from the division.
20939  * returns MP_OKAY indicating success.
20940  */
20941 static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
20942         sp_digit* r)
20943 {
20944     sp_digit t1[16], t2[9];
20945     sp_digit div, r1;
20946     int i;
20947 
20948     (void)m;
20949 
20950     div = d[7];
20951     XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
20952     for (i=7; i>=0; i--) {
20953         r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div);
20954 
20955         sp_256_mul_d_8(t2, d, r1);
20956         t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
20957         t1[8 + i] -= t2[8];
20958         sp_256_mask_8(t2, d, t1[8 + i]);
20959         t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
20960         sp_256_mask_8(t2, d, t1[8 + i]);
20961         t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
20962     }
20963 
20964     r1 = sp_256_cmp_8(t1, d) >= 0;
20965     sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
20966 
20967     return MP_OKAY;
20968 }
20969 
20970 /* Reduce a modulo m into r. (r = a mod m)
20971  *
20972  * r  A single precision number that is the reduced result.
20973  * a  A single precision number that is to be reduced.
20974  * m  A single precision number that is the modulus to reduce with.
20975  * returns MP_OKAY indicating success.
20976  */
20977 static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
20978 {
20979     return sp_256_div_8(a, m, NULL, r);
20980 }
20981 
20982 #endif
20983 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
20984 #ifdef WOLFSSL_SP_SMALL
20985 /* Order-2 for the P256 curve. */
20986 static const uint32_t p256_order_minus_2[8] = {
20987     0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
20988     0x00000000U,0xffffffffU
20989 };
20990 #else
20991 /* The low half of the order-2 of the P256 curve. */
20992 static const uint32_t p256_order_low[4] = {
20993     0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
20994 };
20995 #endif /* WOLFSSL_SP_SMALL */
20996 
20997 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
20998  *
20999  * r  Result of the multiplication.
21000  * a  First operand of the multiplication.
21001  * b  Second operand of the multiplication.
21002  */
21003 static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
21004 {
21005     sp_256_mul_8(r, a, b);
21006     sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
21007 }
21008 
21009 /* Square number mod the order of P256 curve. (r = a * a mod order)
21010  *
21011  * r  Result of the squaring.
21012  * a  Number to square.
21013  */
21014 static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
21015 {
21016     sp_256_sqr_8(r, a);
21017     sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
21018 }
21019 
21020 #ifndef WOLFSSL_SP_SMALL
21021 /* Square number mod the order of P256 curve a number of times.
21022  * (r = a ^ n mod order)
21023  *
21024  * r  Result of the squaring.
21025  * a  Number to square.
21026  */
21027 static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
21028 {
21029     int i;
21030 
21031     sp_256_mont_sqr_order_8(r, a);
21032     for (i=1; i<n; i++) {
21033         sp_256_mont_sqr_order_8(r, r);
21034     }
21035 }
21036 #endif /* !WOLFSSL_SP_SMALL */
21037 
21038 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
21039  * (r = 1 / a mod order)
21040  *
21041  * r   Inverse result.
21042  * a   Number to invert.
21043  * td  Temporary data.
21044  */
21045 static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
21046         sp_digit* td)
21047 {
21048 #ifdef WOLFSSL_SP_SMALL
21049     sp_digit* t = td;
21050     int i;
21051 
21052     XMEMCPY(t, a, sizeof(sp_digit) * 8);
21053     for (i=254; i>=0; i--) {
21054         sp_256_mont_sqr_order_8(t, t);
21055         if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
21056             sp_256_mont_mul_order_8(t, t, a);
21057         }
21058     }
21059     XMEMCPY(r, t, sizeof(sp_digit) * 8U);
21060 #else
21061     sp_digit* t = td;
21062     sp_digit* t2 = td + 2 * 8;
21063     sp_digit* t3 = td + 4 * 8;
21064     int i;
21065 
21066     /* t = a^2 */
21067     sp_256_mont_sqr_order_8(t, a);
21068     /* t = a^3 = t * a */
21069     sp_256_mont_mul_order_8(t, t, a);
21070     /* t2= a^c = t ^ 2 ^ 2 */
21071     sp_256_mont_sqr_n_order_8(t2, t, 2);
21072     /* t3= a^f = t2 * t */
21073     sp_256_mont_mul_order_8(t3, t2, t);
21074     /* t2= a^f0 = t3 ^ 2 ^ 4 */
21075     sp_256_mont_sqr_n_order_8(t2, t3, 4);
21076     /* t = a^ff = t2 * t3 */
21077     sp_256_mont_mul_order_8(t, t2, t3);
21078     /* t3= a^ff00 = t ^ 2 ^ 8 */
21079     sp_256_mont_sqr_n_order_8(t2, t, 8);
21080     /* t = a^ffff = t2 * t */
21081     sp_256_mont_mul_order_8(t, t2, t);
21082     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
21083     sp_256_mont_sqr_n_order_8(t2, t, 16);
21084     /* t = a^ffffffff = t2 * t */
21085     sp_256_mont_mul_order_8(t, t2, t);
21086     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
21087     sp_256_mont_sqr_n_order_8(t2, t, 64);
21088     /* t2= a^ffffffff00000000ffffffff = t2 * t */
21089     sp_256_mont_mul_order_8(t2, t2, t);
21090     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
21091     sp_256_mont_sqr_n_order_8(t2, t2, 32);
21092     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
21093     sp_256_mont_mul_order_8(t2, t2, t);
21094     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
21095     for (i=127; i>=112; i--) {
21096         sp_256_mont_sqr_order_8(t2, t2);
21097         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
21098             sp_256_mont_mul_order_8(t2, t2, a);
21099         }
21100     }
21101     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
21102     sp_256_mont_sqr_n_order_8(t2, t2, 4);
21103     sp_256_mont_mul_order_8(t2, t2, t3);
21104     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
21105     for (i=107; i>=64; i--) {
21106         sp_256_mont_sqr_order_8(t2, t2);
21107         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
21108             sp_256_mont_mul_order_8(t2, t2, a);
21109         }
21110     }
21111     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
21112     sp_256_mont_sqr_n_order_8(t2, t2, 4);
21113     sp_256_mont_mul_order_8(t2, t2, t3);
21114     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
21115     for (i=59; i>=32; i--) {
21116         sp_256_mont_sqr_order_8(t2, t2);
21117         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
21118             sp_256_mont_mul_order_8(t2, t2, a);
21119         }
21120     }
21121     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
21122     sp_256_mont_sqr_n_order_8(t2, t2, 4);
21123     sp_256_mont_mul_order_8(t2, t2, t3);
21124     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
21125     for (i=27; i>=0; i--) {
21126         sp_256_mont_sqr_order_8(t2, t2);
21127         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
21128             sp_256_mont_mul_order_8(t2, t2, a);
21129         }
21130     }
21131     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
21132     sp_256_mont_sqr_n_order_8(t2, t2, 4);
21133     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
21134     sp_256_mont_mul_order_8(r, t2, t3);
21135 #endif /* WOLFSSL_SP_SMALL */
21136 }
21137 
21138 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
21139 #ifdef HAVE_ECC_SIGN
21140 #ifndef SP_ECC_MAX_SIG_GEN
21141 #define SP_ECC_MAX_SIG_GEN  64
21142 #endif
21143 
21144 /* Sign the hash using the private key.
21145  *   e = [hash, 256 bits] from binary
21146  *   r = (k.G)->x mod order
21147  *   s = (r * x + e) / k mod order
21148  * The hash is truncated to the first 256 bits.
21149  *
21150  * hash     Hash to sign.
21151  * hashLen  Length of the hash data.
21152  * rng      Random number generator.
21153  * priv     Private part of key - scalar.
21154  * rm       First part of result as an mp_int.
21155  * sm       Sirst part of result as an mp_int.
21156  * heap     Heap to use for allocation.
21157  * returns RNG failures, MEMORY_E when memory allocation fails and
21158  * MP_OKAY on success.
21159  */
21160 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
21161                     mp_int* rm, mp_int* sm, mp_int* km, void* heap)
21162 {
21163 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21164     sp_digit* d = NULL;
21165 #else
21166     sp_digit ed[2*8];
21167     sp_digit xd[2*8];
21168     sp_digit kd[2*8];
21169     sp_digit rd[2*8];
21170     sp_digit td[3 * 2*8];
21171     sp_point_256 p;
21172 #endif
21173     sp_digit* e = NULL;
21174     sp_digit* x = NULL;
21175     sp_digit* k = NULL;
21176     sp_digit* r = NULL;
21177     sp_digit* tmp = NULL;
21178     sp_point_256* point = NULL;
21179     sp_digit carry;
21180     sp_digit* s = NULL;
21181     sp_digit* kInv = NULL;
21182     int err = MP_OKAY;
21183     int32_t c;
21184     int i;
21185 
21186     (void)heap;
21187 
21188     err = sp_256_point_new_8(heap, p, point);
21189 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21190     if (err == MP_OKAY) {
21191         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
21192                                                               DYNAMIC_TYPE_ECC);
21193         if (d == NULL) {
21194             err = MEMORY_E;
21195         }
21196     }
21197 #endif
21198 
21199     if (err == MP_OKAY) {
21200 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21201         e = d + 0 * 8;
21202         x = d + 2 * 8;
21203         k = d + 4 * 8;
21204         r = d + 6 * 8;
21205         tmp = d + 8 * 8;
21206 #else
21207         e = ed;
21208         x = xd;
21209         k = kd;
21210         r = rd;
21211         tmp = td;
21212 #endif
21213         s = e;
21214         kInv = k;
21215 
21216         if (hashLen > 32U) {
21217             hashLen = 32U;
21218         }
21219 
21220         sp_256_from_bin(e, 8, hash, (int)hashLen);
21221     }
21222 
21223     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
21224         sp_256_from_mp(x, 8, priv);
21225 
21226         /* New random point. */
21227         if (km == NULL || mp_iszero(km)) {
21228             err = sp_256_ecc_gen_k_8(rng, k);
21229         }
21230         else {
21231             sp_256_from_mp(k, 8, km);
21232             mp_zero(km);
21233         }
21234         if (err == MP_OKAY) {
21235                 err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
21236         }
21237 
21238         if (err == MP_OKAY) {
21239             /* r = point->x mod order */
21240             XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
21241             sp_256_norm_8(r);
21242             c = sp_256_cmp_8(r, p256_order);
21243             sp_256_cond_sub_8(r, r, p256_order, 0L - (sp_digit)(c >= 0));
21244             sp_256_norm_8(r);
21245 
21246             /* Conv k to Montgomery form (mod order) */
21247                 sp_256_mul_8(k, k, p256_norm_order);
21248             err = sp_256_mod_8(k, k, p256_order);
21249         }
21250         if (err == MP_OKAY) {
21251             sp_256_norm_8(k);
21252             /* kInv = 1/k mod order */
21253                 sp_256_mont_inv_order_8(kInv, k, tmp);
21254             sp_256_norm_8(kInv);
21255 
21256             /* s = r * x + e */
21257                 sp_256_mul_8(x, x, r);
21258             err = sp_256_mod_8(x, x, p256_order);
21259         }
21260         if (err == MP_OKAY) {
21261             sp_256_norm_8(x);
21262             carry = sp_256_add_8(s, e, x);
21263             sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
21264             sp_256_norm_8(s);
21265             c = sp_256_cmp_8(s, p256_order);
21266             sp_256_cond_sub_8(s, s, p256_order, 0L - (sp_digit)(c >= 0));
21267             sp_256_norm_8(s);
21268 
21269             /* s = s * k^-1 mod order */
21270                 sp_256_mont_mul_order_8(s, s, kInv);
21271             sp_256_norm_8(s);
21272 
21273             /* Check that signature is usable. */
21274             if (sp_256_iszero_8(s) == 0) {
21275                 break;
21276             }
21277         }
21278     }
21279 
21280     if (i == 0) {
21281         err = RNG_FAILURE_E;
21282     }
21283 
21284     if (err == MP_OKAY) {
21285         err = sp_256_to_mp(r, rm);
21286     }
21287     if (err == MP_OKAY) {
21288         err = sp_256_to_mp(s, sm);
21289     }
21290 
21291 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21292     if (d != NULL) {
21293         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
21294         XFREE(d, heap, DYNAMIC_TYPE_ECC);
21295     }
21296 #else
21297     XMEMSET(e, 0, sizeof(sp_digit) * 2U * 8U);
21298     XMEMSET(x, 0, sizeof(sp_digit) * 2U * 8U);
21299     XMEMSET(k, 0, sizeof(sp_digit) * 2U * 8U);
21300     XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
21301     XMEMSET(r, 0, sizeof(sp_digit) * 2U * 8U);
21302     XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
21303 #endif
21304     sp_256_point_free_8(point, 1, heap);
21305 
21306     return err;
21307 }
21308 #endif /* HAVE_ECC_SIGN */
21309 
21310 #ifdef HAVE_ECC_VERIFY
21311 /* Verify the signature values with the hash and public key.
21312  *   e = Truncate(hash, 256)
21313  *   u1 = e/s mod order
21314  *   u2 = r/s mod order
21315  *   r == (u1.G + u2.Q)->x mod order
21316  * Optimization: Leave point in projective form.
21317  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
21318  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
21319  * The hash is truncated to the first 256 bits.
21320  *
21321  * hash     Hash to sign.
21322  * hashLen  Length of the hash data.
21323  * rng      Random number generator.
21324  * priv     Private part of key - scalar.
21325  * rm       First part of result as an mp_int.
21326  * sm       Sirst part of result as an mp_int.
21327  * heap     Heap to use for allocation.
21328  * returns RNG failures, MEMORY_E when memory allocation fails and
21329  * MP_OKAY on success.
21330  */
21331 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
21332     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
21333 {
21334 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21335     sp_digit* d = NULL;
21336 #else
21337     sp_digit u1d[2*8];
21338     sp_digit u2d[2*8];
21339     sp_digit sd[2*8];
21340     sp_digit tmpd[2*8 * 5];
21341     sp_point_256 p1d;
21342     sp_point_256 p2d;
21343 #endif
21344     sp_digit* u1 = NULL;
21345     sp_digit* u2 = NULL;
21346     sp_digit* s = NULL;
21347     sp_digit* tmp = NULL;
21348     sp_point_256* p1;
21349     sp_point_256* p2 = NULL;
21350     sp_digit carry;
21351     int32_t c;
21352     int err;
21353 
21354     err = sp_256_point_new_8(heap, p1d, p1);
21355     if (err == MP_OKAY) {
21356         err = sp_256_point_new_8(heap, p2d, p2);
21357     }
21358 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21359     if (err == MP_OKAY) {
21360         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
21361                                                               DYNAMIC_TYPE_ECC);
21362         if (d == NULL) {
21363             err = MEMORY_E;
21364         }
21365     }
21366 #endif
21367 
21368     if (err == MP_OKAY) {
21369 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21370         u1  = d + 0 * 8;
21371         u2  = d + 2 * 8;
21372         s   = d + 4 * 8;
21373         tmp = d + 6 * 8;
21374 #else
21375         u1 = u1d;
21376         u2 = u2d;
21377         s  = sd;
21378         tmp = tmpd;
21379 #endif
21380 
21381         if (hashLen > 32U) {
21382             hashLen = 32U;
21383         }
21384 
21385         sp_256_from_bin(u1, 8, hash, (int)hashLen);
21386         sp_256_from_mp(u2, 8, r);
21387         sp_256_from_mp(s, 8, sm);
21388         sp_256_from_mp(p2->x, 8, pX);
21389         sp_256_from_mp(p2->y, 8, pY);
21390         sp_256_from_mp(p2->z, 8, pZ);
21391 
21392         {
21393             sp_256_mul_8(s, s, p256_norm_order);
21394         }
21395         err = sp_256_mod_8(s, s, p256_order);
21396     }
21397     if (err == MP_OKAY) {
21398         sp_256_norm_8(s);
21399         {
21400             sp_256_mont_inv_order_8(s, s, tmp);
21401             sp_256_mont_mul_order_8(u1, u1, s);
21402             sp_256_mont_mul_order_8(u2, u2, s);
21403         }
21404 
21405             err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
21406     }
21407     if (err == MP_OKAY) {
21408             err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
21409     }
21410 
21411     if (err == MP_OKAY) {
21412         {
21413             sp_256_proj_point_add_8(p1, p1, p2, tmp);
21414             if (sp_256_iszero_8(p1->z)) {
21415                 if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
21416                     sp_256_proj_point_dbl_8(p1, p2, tmp);
21417                 }
21418                 else {
21419                     /* Y ordinate is not used from here - don't set. */
21420                     p1->x[0] = 0;
21421                     p1->x[1] = 0;
21422                     p1->x[2] = 0;
21423                     p1->x[3] = 0;
21424                     p1->x[4] = 0;
21425                     p1->x[5] = 0;
21426                     p1->x[6] = 0;
21427                     p1->x[7] = 0;
21428                     XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
21429                 }
21430             }
21431         }
21432 
21433         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
21434         /* Reload r and convert to Montgomery form. */
21435         sp_256_from_mp(u2, 8, r);
21436         err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
21437     }
21438 
21439     if (err == MP_OKAY) {
21440         /* u1 = r.z'.z' mod prime */
21441         sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
21442         sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
21443         *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
21444         if (*res == 0) {
21445             /* Reload r and add order. */
21446             sp_256_from_mp(u2, 8, r);
21447             carry = sp_256_add_8(u2, u2, p256_order);
21448             /* Carry means result is greater than mod and is not valid. */
21449             if (carry == 0) {
21450                 sp_256_norm_8(u2);
21451 
21452                 /* Compare with mod and if greater or equal then not valid. */
21453                 c = sp_256_cmp_8(u2, p256_mod);
21454                 if (c < 0) {
21455                     /* Convert to Montogomery form */
21456                     err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
21457                     if (err == MP_OKAY) {
21458                         /* u1 = (r + 1*order).z'.z' mod prime */
21459                         sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
21460                                                                   p256_mp_mod);
21461                         *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
21462                     }
21463                 }
21464             }
21465         }
21466     }
21467 
21468 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21469     if (d != NULL)
21470         XFREE(d, heap, DYNAMIC_TYPE_ECC);
21471 #endif
21472     sp_256_point_free_8(p1, 0, heap);
21473     sp_256_point_free_8(p2, 0, heap);
21474 
21475     return err;
21476 }
21477 #endif /* HAVE_ECC_VERIFY */
21478 
21479 #ifdef HAVE_ECC_CHECK_KEY
21480 /* Check that the x and y oridinates are a valid point on the curve.
21481  *
21482  * point  EC point.
21483  * heap   Heap to use if dynamically allocating.
21484  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
21485  * not on the curve and MP_OKAY otherwise.
21486  */
21487 static int sp_256_ecc_is_point_8(sp_point_256* point, void* heap)
21488 {
21489 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21490     sp_digit* d = NULL;
21491 #else
21492     sp_digit t1d[2*8];
21493     sp_digit t2d[2*8];
21494 #endif
21495     sp_digit* t1;
21496     sp_digit* t2;
21497     int err = MP_OKAY;
21498 
21499 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21500     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
21501     if (d == NULL) {
21502         err = MEMORY_E;
21503     }
21504 #endif
21505 
21506     if (err == MP_OKAY) {
21507 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21508         t1 = d + 0 * 8;
21509         t2 = d + 2 * 8;
21510 #else
21511         (void)heap;
21512 
21513         t1 = t1d;
21514         t2 = t2d;
21515 #endif
21516 
21517         sp_256_sqr_8(t1, point->y);
21518         (void)sp_256_mod_8(t1, t1, p256_mod);
21519         sp_256_sqr_8(t2, point->x);
21520         (void)sp_256_mod_8(t2, t2, p256_mod);
21521         sp_256_mul_8(t2, t2, point->x);
21522         (void)sp_256_mod_8(t2, t2, p256_mod);
21523         (void)sp_256_sub_8(t2, p256_mod, t2);
21524         sp_256_mont_add_8(t1, t1, t2, p256_mod);
21525 
21526         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
21527         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
21528         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
21529 
21530         if (sp_256_cmp_8(t1, p256_b) != 0) {
21531             err = MP_VAL;
21532         }
21533     }
21534 
21535 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21536     if (d != NULL) {
21537         XFREE(d, heap, DYNAMIC_TYPE_ECC);
21538     }
21539 #endif
21540 
21541     return err;
21542 }
21543 
21544 /* Check that the x and y oridinates are a valid point on the curve.
21545  *
21546  * pX  X ordinate of EC point.
21547  * pY  Y ordinate of EC point.
21548  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
21549  * not on the curve and MP_OKAY otherwise.
21550  */
21551 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
21552 {
21553 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
21554     sp_point_256 pubd;
21555 #endif
21556     sp_point_256* pub;
21557     byte one[1] = { 1 };
21558     int err;
21559 
21560     err = sp_256_point_new_8(NULL, pubd, pub);
21561     if (err == MP_OKAY) {
21562         sp_256_from_mp(pub->x, 8, pX);
21563         sp_256_from_mp(pub->y, 8, pY);
21564         sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
21565 
21566         err = sp_256_ecc_is_point_8(pub, NULL);
21567     }
21568 
21569     sp_256_point_free_8(pub, 0, NULL);
21570 
21571     return err;
21572 }
21573 
21574 /* Check that the private scalar generates the EC point (px, py), the point is
21575  * on the curve and the point has the correct order.
21576  *
21577  * pX     X ordinate of EC point.
21578  * pY     Y ordinate of EC point.
21579  * privm  Private scalar that generates EC point.
21580  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
21581  * not on the curve, ECC_INF_E if the point does not have the correct order,
21582  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
21583  * MP_OKAY otherwise.
21584  */
21585 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
21586 {
21587 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
21588     sp_digit privd[8];
21589     sp_point_256 pubd;
21590     sp_point_256 pd;
21591 #endif
21592     sp_digit* priv = NULL;
21593     sp_point_256* pub;
21594     sp_point_256* p = NULL;
21595     byte one[1] = { 1 };
21596     int err;
21597 
21598     err = sp_256_point_new_8(heap, pubd, pub);
21599     if (err == MP_OKAY) {
21600         err = sp_256_point_new_8(heap, pd, p);
21601     }
21602 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21603     if (err == MP_OKAY) {
21604         priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
21605                                                               DYNAMIC_TYPE_ECC);
21606         if (priv == NULL) {
21607             err = MEMORY_E;
21608         }
21609     }
21610 #endif
21611 
21612     if (err == MP_OKAY) {
21613 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
21614         priv = privd;
21615 #endif
21616 
21617         sp_256_from_mp(pub->x, 8, pX);
21618         sp_256_from_mp(pub->y, 8, pY);
21619         sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
21620         sp_256_from_mp(priv, 8, privm);
21621 
21622         /* Check point at infinitiy. */
21623         if ((sp_256_iszero_8(pub->x) != 0) &&
21624             (sp_256_iszero_8(pub->y) != 0)) {
21625             err = ECC_INF_E;
21626         }
21627     }
21628 
21629     if (err == MP_OKAY) {
21630         /* Check range of X and Y */
21631         if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
21632             sp_256_cmp_8(pub->y, p256_mod) >= 0) {
21633             err = ECC_OUT_OF_RANGE_E;
21634         }
21635     }
21636 
21637     if (err == MP_OKAY) {
21638         /* Check point is on curve */
21639         err = sp_256_ecc_is_point_8(pub, heap);
21640     }
21641 
21642     if (err == MP_OKAY) {
21643         /* Point * order = infinity */
21644             err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
21645     }
21646     if (err == MP_OKAY) {
21647         /* Check result is infinity */
21648         if ((sp_256_iszero_8(p->x) == 0) ||
21649             (sp_256_iszero_8(p->y) == 0)) {
21650             err = ECC_INF_E;
21651         }
21652     }
21653 
21654     if (err == MP_OKAY) {
21655         /* Base * private = point */
21656             err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
21657     }
21658     if (err == MP_OKAY) {
21659         /* Check result is public key */
21660         if (sp_256_cmp_8(p->x, pub->x) != 0 ||
21661             sp_256_cmp_8(p->y, pub->y) != 0) {
21662             err = ECC_PRIV_KEY_E;
21663         }
21664     }
21665 
21666 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21667     if (priv != NULL) {
21668         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
21669     }
21670 #endif
21671     sp_256_point_free_8(p, 0, heap);
21672     sp_256_point_free_8(pub, 0, heap);
21673 
21674     return err;
21675 }
21676 #endif
21677 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
21678 /* Add two projective EC points together.
21679  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
21680  *
21681  * pX   First EC point's X ordinate.
21682  * pY   First EC point's Y ordinate.
21683  * pZ   First EC point's Z ordinate.
21684  * qX   Second EC point's X ordinate.
21685  * qY   Second EC point's Y ordinate.
21686  * qZ   Second EC point's Z ordinate.
21687  * rX   Resultant EC point's X ordinate.
21688  * rY   Resultant EC point's Y ordinate.
21689  * rZ   Resultant EC point's Z ordinate.
21690  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
21691  */
21692 int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
21693                               mp_int* qX, mp_int* qY, mp_int* qZ,
21694                               mp_int* rX, mp_int* rY, mp_int* rZ)
21695 {
21696 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
21697     sp_digit tmpd[2 * 8 * 5];
21698     sp_point_256 pd;
21699     sp_point_256 qd;
21700 #endif
21701     sp_digit* tmp;
21702     sp_point_256* p;
21703     sp_point_256* q = NULL;
21704     int err;
21705 
21706     err = sp_256_point_new_8(NULL, pd, p);
21707     if (err == MP_OKAY) {
21708         err = sp_256_point_new_8(NULL, qd, q);
21709     }
21710 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21711     if (err == MP_OKAY) {
21712         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
21713                                                               DYNAMIC_TYPE_ECC);
21714         if (tmp == NULL) {
21715             err = MEMORY_E;
21716         }
21717     }
21718 #else
21719     tmp = tmpd;
21720 #endif
21721 
21722     if (err == MP_OKAY) {
21723         sp_256_from_mp(p->x, 8, pX);
21724         sp_256_from_mp(p->y, 8, pY);
21725         sp_256_from_mp(p->z, 8, pZ);
21726         sp_256_from_mp(q->x, 8, qX);
21727         sp_256_from_mp(q->y, 8, qY);
21728         sp_256_from_mp(q->z, 8, qZ);
21729 
21730             sp_256_proj_point_add_8(p, p, q, tmp);
21731     }
21732 
21733     if (err == MP_OKAY) {
21734         err = sp_256_to_mp(p->x, rX);
21735     }
21736     if (err == MP_OKAY) {
21737         err = sp_256_to_mp(p->y, rY);
21738     }
21739     if (err == MP_OKAY) {
21740         err = sp_256_to_mp(p->z, rZ);
21741     }
21742 
21743 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21744     if (tmp != NULL) {
21745         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
21746     }
21747 #endif
21748     sp_256_point_free_8(q, 0, NULL);
21749     sp_256_point_free_8(p, 0, NULL);
21750 
21751     return err;
21752 }
21753 
21754 /* Double a projective EC point.
21755  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
21756  *
21757  * pX   EC point's X ordinate.
21758  * pY   EC point's Y ordinate.
21759  * pZ   EC point's Z ordinate.
21760  * rX   Resultant EC point's X ordinate.
21761  * rY   Resultant EC point's Y ordinate.
21762  * rZ   Resultant EC point's Z ordinate.
21763  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
21764  */
21765 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
21766                               mp_int* rX, mp_int* rY, mp_int* rZ)
21767 {
21768 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
21769     sp_digit tmpd[2 * 8 * 2];
21770     sp_point_256 pd;
21771 #endif
21772     sp_digit* tmp;
21773     sp_point_256* p;
21774     int err;
21775 
21776     err = sp_256_point_new_8(NULL, pd, p);
21777 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21778     if (err == MP_OKAY) {
21779         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
21780                                                               DYNAMIC_TYPE_ECC);
21781         if (tmp == NULL) {
21782             err = MEMORY_E;
21783         }
21784     }
21785 #else
21786     tmp = tmpd;
21787 #endif
21788 
21789     if (err == MP_OKAY) {
21790         sp_256_from_mp(p->x, 8, pX);
21791         sp_256_from_mp(p->y, 8, pY);
21792         sp_256_from_mp(p->z, 8, pZ);
21793 
21794             sp_256_proj_point_dbl_8(p, p, tmp);
21795     }
21796 
21797     if (err == MP_OKAY) {
21798         err = sp_256_to_mp(p->x, rX);
21799     }
21800     if (err == MP_OKAY) {
21801         err = sp_256_to_mp(p->y, rY);
21802     }
21803     if (err == MP_OKAY) {
21804         err = sp_256_to_mp(p->z, rZ);
21805     }
21806 
21807 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21808     if (tmp != NULL) {
21809         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
21810     }
21811 #endif
21812     sp_256_point_free_8(p, 0, NULL);
21813 
21814     return err;
21815 }
21816 
21817 /* Map a projective EC point to affine in place.
21818  * pZ will be one.
21819  *
21820  * pX   EC point's X ordinate.
21821  * pY   EC point's Y ordinate.
21822  * pZ   EC point's Z ordinate.
21823  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
21824  */
21825 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
21826 {
21827 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
21828     sp_digit tmpd[2 * 8 * 4];
21829     sp_point_256 pd;
21830 #endif
21831     sp_digit* tmp;
21832     sp_point_256* p;
21833     int err;
21834 
21835     err = sp_256_point_new_8(NULL, pd, p);
21836 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21837     if (err == MP_OKAY) {
21838         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
21839                                                               DYNAMIC_TYPE_ECC);
21840         if (tmp == NULL) {
21841             err = MEMORY_E;
21842         }
21843     }
21844 #else
21845     tmp = tmpd;
21846 #endif
21847     if (err == MP_OKAY) {
21848         sp_256_from_mp(p->x, 8, pX);
21849         sp_256_from_mp(p->y, 8, pY);
21850         sp_256_from_mp(p->z, 8, pZ);
21851 
21852         sp_256_map_8(p, p, tmp);
21853     }
21854 
21855     if (err == MP_OKAY) {
21856         err = sp_256_to_mp(p->x, pX);
21857     }
21858     if (err == MP_OKAY) {
21859         err = sp_256_to_mp(p->y, pY);
21860     }
21861     if (err == MP_OKAY) {
21862         err = sp_256_to_mp(p->z, pZ);
21863     }
21864 
21865 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21866     if (tmp != NULL) {
21867         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
21868     }
21869 #endif
21870     sp_256_point_free_8(p, 0, NULL);
21871 
21872     return err;
21873 }
21874 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
21875 #ifdef HAVE_COMP_KEY
21876 /* Find the square root of a number mod the prime of the curve.
21877  *
21878  * y  The number to operate on and the result.
21879  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
21880  */
21881 static int sp_256_mont_sqrt_8(sp_digit* y)
21882 {
21883 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21884     sp_digit* d;
21885 #else
21886     sp_digit t1d[2 * 8];
21887     sp_digit t2d[2 * 8];
21888 #endif
21889     sp_digit* t1;
21890     sp_digit* t2;
21891     int err = MP_OKAY;
21892 
21893 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21894     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
21895     if (d == NULL) {
21896         err = MEMORY_E;
21897     }
21898 #endif
21899 
21900     if (err == MP_OKAY) {
21901 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21902         t1 = d + 0 * 8;
21903         t2 = d + 2 * 8;
21904 #else
21905         t1 = t1d;
21906         t2 = t2d;
21907 #endif
21908 
21909         {
21910             /* t2 = y ^ 0x2 */
21911             sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
21912             /* t1 = y ^ 0x3 */
21913             sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
21914             /* t2 = y ^ 0xc */
21915             sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
21916             /* t1 = y ^ 0xf */
21917             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
21918             /* t2 = y ^ 0xf0 */
21919             sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
21920             /* t1 = y ^ 0xff */
21921             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
21922             /* t2 = y ^ 0xff00 */
21923             sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
21924             /* t1 = y ^ 0xffff */
21925             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
21926             /* t2 = y ^ 0xffff0000 */
21927             sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
21928             /* t1 = y ^ 0xffffffff */
21929             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
21930             /* t1 = y ^ 0xffffffff00000000 */
21931             sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
21932             /* t1 = y ^ 0xffffffff00000001 */
21933             sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
21934             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
21935             sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
21936             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
21937             sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
21938             sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
21939         }
21940     }
21941 
21942 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21943     if (d != NULL) {
21944         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
21945     }
21946 #endif
21947 
21948     return err;
21949 }
21950 
21951 
21952 /* Uncompress the point given the X ordinate.
21953  *
21954  * xm    X ordinate.
21955  * odd   Whether the Y ordinate is odd.
21956  * ym    Calculated Y ordinate.
21957  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
21958  */
21959 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
21960 {
21961 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21962     sp_digit* d;
21963 #else
21964     sp_digit xd[2 * 8];
21965     sp_digit yd[2 * 8];
21966 #endif
21967     sp_digit* x = NULL;
21968     sp_digit* y = NULL;
21969     int err = MP_OKAY;
21970 
21971 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21972     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
21973     if (d == NULL) {
21974         err = MEMORY_E;
21975     }
21976 #endif
21977 
21978     if (err == MP_OKAY) {
21979 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
21980         x = d + 0 * 8;
21981         y = d + 2 * 8;
21982 #else
21983         x = xd;
21984         y = yd;
21985 #endif
21986 
21987         sp_256_from_mp(x, 8, xm);
21988         err = sp_256_mod_mul_norm_8(x, x, p256_mod);
21989     }
21990     if (err == MP_OKAY) {
21991         /* y = x^3 */
21992         {
21993             sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
21994             sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
21995         }
21996         /* y = x^3 - 3x */
21997         sp_256_mont_sub_8(y, y, x, p256_mod);
21998         sp_256_mont_sub_8(y, y, x, p256_mod);
21999         sp_256_mont_sub_8(y, y, x, p256_mod);
22000         /* y = x^3 - 3x + b */
22001         err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
22002     }
22003     if (err == MP_OKAY) {
22004         sp_256_mont_add_8(y, y, x, p256_mod);
22005         /* y = sqrt(x^3 - 3x + b) */
22006         err = sp_256_mont_sqrt_8(y);
22007     }
22008     if (err == MP_OKAY) {
22009         XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
22010         sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
22011         if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
22012             sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
22013         }
22014 
22015         err = sp_256_to_mp(y, ym);
22016     }
22017 
22018 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
22019     if (d != NULL) {
22020         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
22021     }
22022 #endif
22023 
22024     return err;
22025 }
22026 #endif
22027 #endif /* !WOLFSSL_SP_NO_256 */
22028 #ifdef WOLFSSL_SP_384
22029 
22030 /* Point structure to use. */
22031 typedef struct sp_point_384 {
22032     sp_digit x[2 * 12];
22033     sp_digit y[2 * 12];
22034     sp_digit z[2 * 12];
22035     int infinity;
22036 } sp_point_384;
22037 
22038 /* The modulus (prime) of the curve P384. */
22039 static const sp_digit p384_mod[12] = {
22040     0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
22041     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
22042 };
22043 /* The Montogmery normalizer for modulus of the curve P384. */
22044 static const sp_digit p384_norm_mod[12] = {
22045     0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
22046     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
22047 };
22048 /* The Montogmery multiplier for modulus of the curve P384. */
22049 static sp_digit p384_mp_mod = 0x00000001;
22050 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
22051                                             defined(HAVE_ECC_VERIFY)
22052 /* The order of the curve P384. */
22053 static const sp_digit p384_order[12] = {
22054     0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
22055     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
22056 };
22057 #endif
22058 /* The order of the curve P384 minus 2. */
22059 static const sp_digit p384_order2[12] = {
22060     0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
22061     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
22062 };
22063 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
22064 /* The Montogmery normalizer for order of the curve P384. */
22065 static const sp_digit p384_norm_order[12] = {
22066     0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
22067     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
22068 };
22069 #endif
22070 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
22071 /* The Montogmery multiplier for order of the curve P384. */
22072 static sp_digit p384_mp_order = 0xe88fdc45;
22073 #endif
22074 /* The base point of curve P384. */
22075 static const sp_point_384 p384_base = {
22076     /* X ordinate */
22077     {
22078         0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
22079         0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
22080         0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
22081     },
22082     /* Y ordinate */
22083     {
22084         0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
22085         0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
22086         0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
22087     },
22088     /* Z ordinate */
22089     {
22090         0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
22091         0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
22092         0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
22093     },
22094     /* infinity */
22095     0
22096 };
22097 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
22098 static const sp_digit p384_b[12] = {
22099     0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
22100     0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
22101 };
22102 #endif
22103 
22104 static int sp_384_point_new_ex_12(void* heap, sp_point_384* sp, sp_point_384** p)
22105 {
22106     int ret = MP_OKAY;
22107     (void)heap;
22108 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
22109     (void)sp;
22110     *p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
22111 #else
22112     *p = sp;
22113 #endif
22114     if (*p == NULL) {
22115         ret = MEMORY_E;
22116     }
22117     return ret;
22118 }
22119 
22120 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
22121 /* Allocate memory for point and return error. */
22122 #define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), NULL, &(p))
22123 #else
22124 /* Set pointer to data and return no error. */
22125 #define sp_384_point_new_12(heap, sp, p) sp_384_point_new_ex_12((heap), &(sp), &(p))
22126 #endif
22127 
22128 
22129 static void sp_384_point_free_12(sp_point_384* p, int clear, void* heap)
22130 {
22131 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
22132 /* If valid pointer then clear point data if requested and free data. */
22133     if (p != NULL) {
22134         if (clear != 0) {
22135             XMEMSET(p, 0, sizeof(*p));
22136         }
22137         XFREE(p, heap, DYNAMIC_TYPE_ECC);
22138     }
22139 #else
22140 /* Clear point data if requested. */
22141     if (clear != 0) {
22142         XMEMSET(p, 0, sizeof(*p));
22143     }
22144 #endif
22145     (void)heap;
22146 }
22147 
22148 /* Multiply a number by Montogmery normalizer mod modulus (prime).
22149  *
22150  * r  The resulting Montgomery form number.
22151  * a  The number to convert.
22152  * m  The modulus (prime).
22153  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
22154  */
22155 static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
22156 {
22157 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
22158     int64_t* t;
22159 #else
22160     int64_t t[12];
22161 #endif
22162     int64_t o;
22163     int err = MP_OKAY;
22164 
22165     (void)m;
22166 
22167 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
22168     t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
22169     if (t == NULL) {
22170         err = MEMORY_E;
22171     }
22172 #endif
22173 
22174     if (err == MP_OKAY) {
22175         /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
22176         t[0] = 0 + (uint64_t)a[0] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[11];
22177         /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
22178         t[1] = 0 - (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[8] + (uint64_t)a[10] + (uint64_t)a[11];
22179         /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
22180         t[2] = 0 - (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[9] + (uint64_t)a[11];
22181         /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
22182         t[3] = 0 + (uint64_t)a[0] - (uint64_t)a[2] + (uint64_t)a[3] + (uint64_t)a[8] + (uint64_t)a[9] - (uint64_t)a[10] - (uint64_t)a[11];
22183         /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
22184         t[4] = 0 + (uint64_t)a[0] + (uint64_t)a[1] - (uint64_t)a[3] + (uint64_t)a[4] + (uint64_t)a[8] + 2 * (uint64_t)a[9] + (uint64_t)a[10] -  2 * (uint64_t)a[11];
22185         /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
22186         t[5] = 0 + (uint64_t)a[1] + (uint64_t)a[2] - (uint64_t)a[4] + (uint64_t)a[5] + (uint64_t)a[9] + 2 * (uint64_t)a[10] + (uint64_t)a[11];
22187         /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
22188         t[6] = 0 + (uint64_t)a[2] + (uint64_t)a[3] - (uint64_t)a[5] + (uint64_t)a[6] + (uint64_t)a[10] + 2 * (uint64_t)a[11];
22189         /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
22190         t[7] = 0 + (uint64_t)a[3] + (uint64_t)a[4] - (uint64_t)a[6] + (uint64_t)a[7] + (uint64_t)a[11];
22191         /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
22192         t[8] = 0 + (uint64_t)a[4] + (uint64_t)a[5] - (uint64_t)a[7] + (uint64_t)a[8];
22193         /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
22194         t[9] = 0 + (uint64_t)a[5] + (uint64_t)a[6] - (uint64_t)a[8] + (uint64_t)a[9];
22195         /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
22196         t[10] = 0 + (uint64_t)a[6] + (uint64_t)a[7] - (uint64_t)a[9] + (uint64_t)a[10];
22197         /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
22198         t[11] = 0 + (uint64_t)a[7] + (uint64_t)a[8] - (uint64_t)a[10] + (uint64_t)a[11];
22199 
22200         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
22201         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
22202         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
22203         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
22204         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
22205         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
22206         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
22207         t[8] += t[7] >> 32; t[7] &= 0xffffffff;
22208         t[9] += t[8] >> 32; t[8] &= 0xffffffff;
22209         t[10] += t[9] >> 32; t[9] &= 0xffffffff;
22210         t[11] += t[10] >> 32; t[10] &= 0xffffffff;
22211         o     = t[11] >> 32; t[11] &= 0xffffffff;
22212         t[0] += o;
22213         t[1] -= o;
22214         t[3] += o;
22215         t[4] += o;
22216         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
22217         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
22218         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
22219         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
22220         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
22221         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
22222         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
22223         t[8] += t[7] >> 32; t[7] &= 0xffffffff;
22224         t[9] += t[8] >> 32; t[8] &= 0xffffffff;
22225         t[10] += t[9] >> 32; t[9] &= 0xffffffff;
22226         t[11] += t[10] >> 32; t[10] &= 0xffffffff;
22227 
22228         r[0] = t[0];
22229         r[1] = t[1];
22230         r[2] = t[2];
22231         r[3] = t[3];
22232         r[4] = t[4];
22233         r[5] = t[5];
22234         r[6] = t[6];
22235         r[7] = t[7];
22236         r[8] = t[8];
22237         r[9] = t[9];
22238         r[10] = t[10];
22239         r[11] = t[11];
22240     }
22241 
22242 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
22243     if (t != NULL)
22244         XFREE(t, NULL, DYNAMIC_TYPE_ECC);
22245 #endif
22246 
22247     return err;
22248 }
22249 
22250 /* Convert an mp_int to an array of sp_digit.
22251  *
22252  * r  A single precision integer.
22253  * size  Maximum number of bytes to convert
22254  * a  A multi-precision integer.
22255  */
22256 static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
22257 {
22258 #if DIGIT_BIT == 32
22259     int j;
22260 
22261     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
22262 
22263     for (j = a->used; j < size; j++) {
22264         r[j] = 0;
22265     }
22266 #elif DIGIT_BIT > 32
22267     int i, j = 0;
22268     word32 s = 0;
22269 
22270     r[0] = 0;
22271     for (i = 0; i < a->used && j < size; i++) {
22272         r[j] |= ((sp_digit)a->dp[i] << s);
22273         r[j] &= 0xffffffff;
22274         s = 32U - s;
22275         if (j + 1 >= size) {
22276             break;
22277         }
22278         /* lint allow cast of mismatch word32 and mp_digit */
22279         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
22280         while ((s + 32U) <= (word32)DIGIT_BIT) {
22281             s += 32U;
22282             r[j] &= 0xffffffff;
22283             if (j + 1 >= size) {
22284                 break;
22285             }
22286             if (s < (word32)DIGIT_BIT) {
22287                 /* lint allow cast of mismatch word32 and mp_digit */
22288                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
22289             }
22290             else {
22291                 r[++j] = 0L;
22292             }
22293         }
22294         s = (word32)DIGIT_BIT - s;
22295     }
22296 
22297     for (j++; j < size; j++) {
22298         r[j] = 0;
22299     }
22300 #else
22301     int i, j = 0, s = 0;
22302 
22303     r[0] = 0;
22304     for (i = 0; i < a->used && j < size; i++) {
22305         r[j] |= ((sp_digit)a->dp[i]) << s;
22306         if (s + DIGIT_BIT >= 32) {
22307             r[j] &= 0xffffffff;
22308             if (j + 1 >= size) {
22309                 break;
22310             }
22311             s = 32 - s;
22312             if (s == DIGIT_BIT) {
22313                 r[++j] = 0;
22314                 s = 0;
22315             }
22316             else {
22317                 r[++j] = a->dp[i] >> s;
22318                 s = DIGIT_BIT - s;
22319             }
22320         }
22321         else {
22322             s += DIGIT_BIT;
22323         }
22324     }
22325 
22326     for (j++; j < size; j++) {
22327         r[j] = 0;
22328     }
22329 #endif
22330 }
22331 
22332 /* Convert a point of type ecc_point to type sp_point_384.
22333  *
22334  * p   Point of type sp_point_384 (result).
22335  * pm  Point of type ecc_point.
22336  */
22337 static void sp_384_point_from_ecc_point_12(sp_point_384* p, const ecc_point* pm)
22338 {
22339     XMEMSET(p->x, 0, sizeof(p->x));
22340     XMEMSET(p->y, 0, sizeof(p->y));
22341     XMEMSET(p->z, 0, sizeof(p->z));
22342     sp_384_from_mp(p->x, 12, pm->x);
22343     sp_384_from_mp(p->y, 12, pm->y);
22344     sp_384_from_mp(p->z, 12, pm->z);
22345     p->infinity = 0;
22346 }
22347 
22348 /* Convert an array of sp_digit to an mp_int.
22349  *
22350  * a  A single precision integer.
22351  * r  A multi-precision integer.
22352  */
22353 static int sp_384_to_mp(const sp_digit* a, mp_int* r)
22354 {
22355     int err;
22356 
22357     err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
22358     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
22359 #if DIGIT_BIT == 32
22360         XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
22361         r->used = 12;
22362         mp_clamp(r);
22363 #elif DIGIT_BIT < 32
22364         int i, j = 0, s = 0;
22365 
22366         r->dp[0] = 0;
22367         for (i = 0; i < 12; i++) {
22368             r->dp[j] |= (mp_digit)(a[i] << s);
22369             r->dp[j] &= (1L << DIGIT_BIT) - 1;
22370             s = DIGIT_BIT - s;
22371             r->dp[++j] = (mp_digit)(a[i] >> s);
22372             while (s + DIGIT_BIT <= 32) {
22373                 s += DIGIT_BIT;
22374                 r->dp[j++] &= (1L << DIGIT_BIT) - 1;
22375                 if (s == SP_WORD_SIZE) {
22376                     r->dp[j] = 0;
22377                 }
22378                 else {
22379                     r->dp[j] = (mp_digit)(a[i] >> s);
22380                 }
22381             }
22382             s = 32 - s;
22383         }
22384         r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
22385         mp_clamp(r);
22386 #else
22387         int i, j = 0, s = 0;
22388 
22389         r->dp[0] = 0;
22390         for (i = 0; i < 12; i++) {
22391             r->dp[j] |= ((mp_digit)a[i]) << s;
22392             if (s + 32 >= DIGIT_BIT) {
22393     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
22394                 r->dp[j] &= (1L << DIGIT_BIT) - 1;
22395     #endif
22396                 s = DIGIT_BIT - s;
22397                 r->dp[++j] = a[i] >> s;
22398                 s = 32 - s;
22399             }
22400             else {
22401                 s += 32;
22402             }
22403         }
22404         r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
22405         mp_clamp(r);
22406 #endif
22407     }
22408 
22409     return err;
22410 }
22411 
22412 /* Convert a point of type sp_point_384 to type ecc_point.
22413  *
22414  * p   Point of type sp_point_384.
22415  * pm  Point of type ecc_point (result).
22416  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
22417  * MP_OKAY.
22418  */
22419 static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
22420 {
22421     int err;
22422 
22423     err = sp_384_to_mp(p->x, pm->x);
22424     if (err == MP_OKAY) {
22425         err = sp_384_to_mp(p->y, pm->y);
22426     }
22427     if (err == MP_OKAY) {
22428         err = sp_384_to_mp(p->z, pm->z);
22429     }
22430 
22431     return err;
22432 }
22433 
22434 /* Multiply a and b into r. (r = a * b)
22435  *
22436  * r  A single precision integer.
22437  * a  A single precision integer.
22438  * b  A single precision integer.
22439  */
22440 SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a,
22441         const sp_digit* b)
22442 {
22443     sp_digit tmp[12 * 2];
22444     __asm__ __volatile__ (
22445         "mov    r3, #0\n\t"
22446         "mov    r4, #0\n\t"
22447         "mov    r8, r3\n\t"
22448         "mov    r11, %[r]\n\t"
22449         "mov    r9, %[a]\n\t"
22450         "mov    r10, %[b]\n\t"
22451         "mov    r6, #48\n\t"
22452         "add    r6, r9\n\t"
22453         "mov    r12, r6\n\t"
22454         "\n1:\n\t"
22455         "mov    %[r], #0\n\t"
22456         "mov    r5, #0\n\t"
22457         "mov    r6, #44\n\t"
22458         "mov    %[a], r8\n\t"
22459         "sub    %[a], r6\n\t"
22460         "sbc    r6, r6\n\t"
22461         "mvn    r6, r6\n\t"
22462         "and    %[a], r6\n\t"
22463         "mov    %[b], r8\n\t"
22464         "sub    %[b], %[a]\n\t"
22465         "add    %[a], r9\n\t"
22466         "add    %[b], r10\n\t"
22467         "\n2:\n\t"
22468         "# Multiply Start\n\t"
22469         "ldr    r6, [%[a]]\n\t"
22470         "ldr    r7, [%[b]]\n\t"
22471         "lsl    r6, r6, #16\n\t"
22472         "lsl    r7, r7, #16\n\t"
22473         "lsr    r6, r6, #16\n\t"
22474         "lsr    r7, r7, #16\n\t"
22475         "mul    r7, r6\n\t"
22476         "add    r3, r7\n\t"
22477         "adc    r4, %[r]\n\t"
22478         "adc    r5, %[r]\n\t"
22479         "ldr    r7, [%[b]]\n\t"
22480         "lsr    r7, r7, #16\n\t"
22481         "mul    r6, r7\n\t"
22482         "lsr    r7, r6, #16\n\t"
22483         "lsl    r6, r6, #16\n\t"
22484         "add    r3, r6\n\t"
22485         "adc    r4, r7\n\t"
22486         "adc    r5, %[r]\n\t"
22487         "ldr    r6, [%[a]]\n\t"
22488         "ldr    r7, [%[b]]\n\t"
22489         "lsr    r6, r6, #16\n\t"
22490         "lsr    r7, r7, #16\n\t"
22491         "mul    r7, r6\n\t"
22492         "add    r4, r7\n\t"
22493         "adc    r5, %[r]\n\t"
22494         "ldr    r7, [%[b]]\n\t"
22495         "lsl    r7, r7, #16\n\t"
22496         "lsr    r7, r7, #16\n\t"
22497         "mul    r6, r7\n\t"
22498         "lsr    r7, r6, #16\n\t"
22499         "lsl    r6, r6, #16\n\t"
22500         "add    r3, r6\n\t"
22501         "adc    r4, r7\n\t"
22502         "adc    r5, %[r]\n\t"
22503         "# Multiply Done\n\t"
22504         "add    %[a], #4\n\t"
22505         "sub    %[b], #4\n\t"
22506         "cmp    %[a], r12\n\t"
22507         "beq    3f\n\t"
22508         "mov    r6, r8\n\t"
22509         "add    r6, r9\n\t"
22510         "cmp    %[a], r6\n\t"
22511         "ble    2b\n\t"
22512         "\n3:\n\t"
22513         "mov    %[r], r11\n\t"
22514         "mov    r7, r8\n\t"
22515         "str    r3, [%[r], r7]\n\t"
22516         "mov    r3, r4\n\t"
22517         "mov    r4, r5\n\t"
22518         "add    r7, #4\n\t"
22519         "mov    r8, r7\n\t"
22520         "mov    r6, #88\n\t"
22521         "cmp    r7, r6\n\t"
22522         "ble    1b\n\t"
22523         "str    r3, [%[r], r7]\n\t"
22524         "mov    %[a], r9\n\t"
22525         "mov    %[b], r10\n\t"
22526         :
22527         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
22528         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
22529     );
22530 
22531     XMEMCPY(r, tmp, sizeof(tmp));
22532 }
22533 
22534 /* Conditionally subtract b from a using the mask m.
22535  * m is -1 to subtract and 0 when not copying.
22536  *
22537  * r  A single precision number representing condition subtract result.
22538  * a  A single precision number to subtract from.
22539  * b  A single precision number to subtract.
22540  * m  Mask value to apply.
22541  */
22542 SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a,
22543         const sp_digit* b, sp_digit m)
22544 {
22545     sp_digit c = 0;
22546 
22547     __asm__ __volatile__ (
22548         "mov    r5, #48\n\t"
22549         "mov    r8, r5\n\t"
22550         "mov    r7, #0\n\t"
22551         "1:\n\t"
22552         "ldr    r6, [%[b], r7]\n\t"
22553         "and    r6, %[m]\n\t"
22554         "mov    r5, #0\n\t"
22555         "sub    r5, %[c]\n\t"
22556         "ldr    r5, [%[a], r7]\n\t"
22557         "sbc    r5, r6\n\t"
22558         "sbc    %[c], %[c]\n\t"
22559         "str    r5, [%[r], r7]\n\t"
22560         "add    r7, #4\n\t"
22561         "cmp    r7, r8\n\t"
22562         "blt    1b\n\t"
22563         : [c] "+r" (c)
22564         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
22565         : "memory", "r5", "r6", "r7", "r8"
22566     );
22567 
22568     return c;
22569 }
22570 
22571 #define sp_384_mont_reduce_order_12   sp_384_mont_reduce_12
22572 
22573 /* Reduce the number back to 384 bits using Montgomery reduction.
22574  *
22575  * a   A single precision number to reduce in place.
22576  * m   The single precision number representing the modulus.
22577  * mp  The digit representing the negative inverse of m mod 2^n.
22578  */
22579 SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
22580         sp_digit mp)
22581 {
22582     sp_digit ca = 0;
22583 
22584     __asm__ __volatile__ (
22585         "mov    r8, %[mp]\n\t"
22586         "mov    r12, %[ca]\n\t"
22587         "mov    r14, %[m]\n\t"
22588         "mov    r9, %[a]\n\t"
22589         "mov    r4, #0\n\t"
22590         "# i = 0\n\t"
22591         "mov    r11, r4\n\t"
22592         "\n1:\n\t"
22593         "mov    r5, #0\n\t"
22594         "mov    %[ca], #0\n\t"
22595         "# mu = a[i] * mp\n\t"
22596         "mov    %[mp], r8\n\t"
22597         "ldr    %[a], [%[a]]\n\t"
22598         "mul    %[mp], %[a]\n\t"
22599         "mov    %[m], r14\n\t"
22600         "mov    r10, r9\n\t"
22601         "\n2:\n\t"
22602         "# a[i+j] += m[j] * mu\n\t"
22603         "mov    %[a], r10\n\t"
22604         "ldr    %[a], [%[a]]\n\t"
22605         "mov    %[ca], #0\n\t"
22606         "mov    r4, r5\n\t"
22607         "mov    r5, #0\n\t"
22608         "# Multiply m[j] and mu - Start\n\t"
22609         "ldr    r7, [%[m]]\n\t"
22610         "lsl    r6, %[mp], #16\n\t"
22611         "lsl    r7, r7, #16\n\t"
22612         "lsr    r6, r6, #16\n\t"
22613         "lsr    r7, r7, #16\n\t"
22614         "mul    r7, r6\n\t"
22615         "add    %[a], r7\n\t"
22616         "adc    r5, %[ca]\n\t"
22617         "ldr    r7, [%[m]]\n\t"
22618         "lsr    r7, r7, #16\n\t"
22619         "mul    r6, r7\n\t"
22620         "lsr    r7, r6, #16\n\t"
22621         "lsl    r6, r6, #16\n\t"
22622         "add    %[a], r6\n\t"
22623         "adc    r5, r7\n\t"
22624         "ldr    r7, [%[m]]\n\t"
22625         "lsr    r6, %[mp], #16\n\t"
22626         "lsr    r7, r7, #16\n\t"
22627         "mul    r7, r6\n\t"
22628         "add    r5, r7\n\t"
22629         "ldr    r7, [%[m]]\n\t"
22630         "lsl    r7, r7, #16\n\t"
22631         "lsr    r7, r7, #16\n\t"
22632         "mul    r6, r7\n\t"
22633         "lsr    r7, r6, #16\n\t"
22634         "lsl    r6, r6, #16\n\t"
22635         "add    %[a], r6\n\t"
22636         "adc    r5, r7\n\t"
22637         "# Multiply m[j] and mu - Done\n\t"
22638         "add    r4, %[a]\n\t"
22639         "adc    r5, %[ca]\n\t"
22640         "mov    %[a], r10\n\t"
22641         "str    r4, [%[a]]\n\t"
22642         "mov    r6, #4\n\t"
22643         "add    %[m], #4\n\t"
22644         "add    r10, r6\n\t"
22645         "mov    r4, #44\n\t"
22646         "add    r4, r9\n\t"
22647         "cmp    r10, r4\n\t"
22648         "blt    2b\n\t"
22649         "# a[i+11] += m[11] * mu\n\t"
22650         "mov    %[ca], #0\n\t"
22651         "mov    r4, r12\n\t"
22652         "mov    %[a], #0\n\t"
22653         "# Multiply m[11] and mu - Start\n\t"
22654         "ldr    r7, [%[m]]\n\t"
22655         "lsl    r6, %[mp], #16\n\t"
22656         "lsl    r7, r7, #16\n\t"
22657         "lsr    r6, r6, #16\n\t"
22658         "lsr    r7, r7, #16\n\t"
22659         "mul    r7, r6\n\t"
22660         "add    r5, r7\n\t"
22661         "adc    r4, %[ca]\n\t"
22662         "adc    %[a], %[ca]\n\t"
22663         "ldr    r7, [%[m]]\n\t"
22664         "lsr    r7, r7, #16\n\t"
22665         "mul    r6, r7\n\t"
22666         "lsr    r7, r6, #16\n\t"
22667         "lsl    r6, r6, #16\n\t"
22668         "add    r5, r6\n\t"
22669         "adc    r4, r7\n\t"
22670         "adc    %[a], %[ca]\n\t"
22671         "ldr    r7, [%[m]]\n\t"
22672         "lsr    r6, %[mp], #16\n\t"
22673         "lsr    r7, r7, #16\n\t"
22674         "mul    r7, r6\n\t"
22675         "add    r4, r7\n\t"
22676         "adc    %[a], %[ca]\n\t"
22677         "ldr    r7, [%[m]]\n\t"
22678         "lsl    r7, r7, #16\n\t"
22679         "lsr    r7, r7, #16\n\t"
22680         "mul    r6, r7\n\t"
22681         "lsr    r7, r6, #16\n\t"
22682         "lsl    r6, r6, #16\n\t"
22683         "add    r5, r6\n\t"
22684         "adc    r4, r7\n\t"
22685         "adc    %[a], %[ca]\n\t"
22686         "# Multiply m[11] and mu - Done\n\t"
22687         "mov    %[ca], %[a]\n\t"
22688         "mov    %[a], r10\n\t"
22689         "ldr    r7, [%[a], #4]\n\t"
22690         "ldr    %[a], [%[a]]\n\t"
22691         "mov    r6, #0\n\t"
22692         "add    r5, %[a]\n\t"
22693         "adc    r7, r4\n\t"
22694         "adc    %[ca], r6\n\t"
22695         "mov    %[a], r10\n\t"
22696         "str    r5, [%[a]]\n\t"
22697         "str    r7, [%[a], #4]\n\t"
22698         "# i += 1\n\t"
22699         "mov    r6, #4\n\t"
22700         "add    r9, r6\n\t"
22701         "add    r11, r6\n\t"
22702         "mov    r12, %[ca]\n\t"
22703         "mov    %[a], r9\n\t"
22704         "mov    r4, #48\n\t"
22705         "cmp    r11, r4\n\t"
22706         "blt    1b\n\t"
22707         "mov    %[m], r14\n\t"
22708         : [ca] "+r" (ca), [a] "+r" (a)
22709         : [m] "r" (m), [mp] "r" (mp)
22710         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14"
22711     );
22712 
22713     sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
22714 }
22715 
22716 /* Multiply two Montogmery form numbers mod the modulus (prime).
22717  * (r = a * b mod m)
22718  *
22719  * r   Result of multiplication.
22720  * a   First number to multiply in Montogmery form.
22721  * b   Second number to multiply in Montogmery form.
22722  * m   Modulus (prime).
22723  * mp  Montogmery mulitplier.
22724  */
22725 static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
22726         const sp_digit* m, sp_digit mp)
22727 {
22728     sp_384_mul_12(r, a, b);
22729     sp_384_mont_reduce_12(r, m, mp);
22730 }
22731 
22732 /* Square a and put result in r. (r = a * a)
22733  *
22734  * r  A single precision integer.
22735  * a  A single precision integer.
22736  */
22737 SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
22738 {
22739     __asm__ __volatile__ (
22740         "mov    r3, #0\n\t"
22741         "mov    r4, #0\n\t"
22742         "mov    r5, #0\n\t"
22743         "mov    r8, r3\n\t"
22744         "mov    r11, %[r]\n\t"
22745         "mov    r6, #96\n\t"
22746         "neg    r6, r6\n\t"
22747         "add    sp, r6\n\t"
22748         "mov    r10, sp\n\t"
22749         "mov    r9, %[a]\n\t"
22750         "\n1:\n\t"
22751         "mov    %[r], #0\n\t"
22752         "mov    r6, #44\n\t"
22753         "mov    %[a], r8\n\t"
22754         "sub    %[a], r6\n\t"
22755         "sbc    r6, r6\n\t"
22756         "mvn    r6, r6\n\t"
22757         "and    %[a], r6\n\t"
22758         "mov    r2, r8\n\t"
22759         "sub    r2, %[a]\n\t"
22760         "add    %[a], r9\n\t"
22761         "add    r2, r9\n\t"
22762         "\n2:\n\t"
22763         "cmp    r2, %[a]\n\t"
22764         "beq    4f\n\t"
22765         "# Multiply * 2: Start\n\t"
22766         "ldr    r6, [%[a]]\n\t"
22767         "ldr    r7, [r2]\n\t"
22768         "lsl    r6, r6, #16\n\t"
22769         "lsl    r7, r7, #16\n\t"
22770         "lsr    r6, r6, #16\n\t"
22771         "lsr    r7, r7, #16\n\t"
22772         "mul    r7, r6\n\t"
22773         "add    r3, r7\n\t"
22774         "adc    r4, %[r]\n\t"
22775         "adc    r5, %[r]\n\t"
22776         "add    r3, r7\n\t"
22777         "adc    r4, %[r]\n\t"
22778         "adc    r5, %[r]\n\t"
22779         "ldr    r7, [r2]\n\t"
22780         "lsr    r7, r7, #16\n\t"
22781         "mul    r6, r7\n\t"
22782         "lsr    r7, r6, #16\n\t"
22783         "lsl    r6, r6, #16\n\t"
22784         "add    r3, r6\n\t"
22785         "adc    r4, r7\n\t"
22786         "adc    r5, %[r]\n\t"
22787         "add    r3, r6\n\t"
22788         "adc    r4, r7\n\t"
22789         "adc    r5, %[r]\n\t"
22790         "ldr    r6, [%[a]]\n\t"
22791         "ldr    r7, [r2]\n\t"
22792         "lsr    r6, r6, #16\n\t"
22793         "lsr    r7, r7, #16\n\t"
22794         "mul    r7, r6\n\t"
22795         "add    r4, r7\n\t"
22796         "adc    r5, %[r]\n\t"
22797         "add    r4, r7\n\t"
22798         "adc    r5, %[r]\n\t"
22799         "ldr    r7, [r2]\n\t"
22800         "lsl    r7, r7, #16\n\t"
22801         "lsr    r7, r7, #16\n\t"
22802         "mul    r6, r7\n\t"
22803         "lsr    r7, r6, #16\n\t"
22804         "lsl    r6, r6, #16\n\t"
22805         "add    r3, r6\n\t"
22806         "adc    r4, r7\n\t"
22807         "adc    r5, %[r]\n\t"
22808         "add    r3, r6\n\t"
22809         "adc    r4, r7\n\t"
22810         "adc    r5, %[r]\n\t"
22811         "# Multiply * 2: Done\n\t"
22812         "bal    5f\n\t"
22813         "\n4:\n\t"
22814         "# Square: Start\n\t"
22815         "ldr    r6, [%[a]]\n\t"
22816         "lsr    r7, r6, #16\n\t"
22817         "lsl    r6, r6, #16\n\t"
22818         "lsr    r6, r6, #16\n\t"
22819         "mul    r6, r6\n\t"
22820         "add    r3, r6\n\t"
22821         "adc    r4, %[r]\n\t"
22822         "adc    r5, %[r]\n\t"
22823         "mul    r7, r7\n\t"
22824         "add    r4, r7\n\t"
22825         "adc    r5, %[r]\n\t"
22826         "ldr    r6, [%[a]]\n\t"
22827         "lsr    r7, r6, #16\n\t"
22828         "lsl    r6, r6, #16\n\t"
22829         "lsr    r6, r6, #16\n\t"
22830         "mul    r6, r7\n\t"
22831         "lsr    r7, r6, #15\n\t"
22832         "lsl    r6, r6, #17\n\t"
22833         "add    r3, r6\n\t"
22834         "adc    r4, r7\n\t"
22835         "adc    r5, %[r]\n\t"
22836         "# Square: Done\n\t"
22837         "\n5:\n\t"
22838         "add    %[a], #4\n\t"
22839         "sub    r2, #4\n\t"
22840         "mov    r6, #48\n\t"
22841         "add    r6, r9\n\t"
22842         "cmp    %[a], r6\n\t"
22843         "beq    3f\n\t"
22844         "cmp    %[a], r2\n\t"
22845         "bgt    3f\n\t"
22846         "mov    r7, r8\n\t"
22847         "add    r7, r9\n\t"
22848         "cmp    %[a], r7\n\t"
22849         "ble    2b\n\t"
22850         "\n3:\n\t"
22851         "mov    %[r], r10\n\t"
22852         "mov    r7, r8\n\t"
22853         "str    r3, [%[r], r7]\n\t"
22854         "mov    r3, r4\n\t"
22855         "mov    r4, r5\n\t"
22856         "mov    r5, #0\n\t"
22857         "add    r7, #4\n\t"
22858         "mov    r8, r7\n\t"
22859         "mov    r6, #88\n\t"
22860         "cmp    r7, r6\n\t"
22861         "ble    1b\n\t"
22862         "mov    %[a], r9\n\t"
22863         "str    r3, [%[r], r7]\n\t"
22864         "mov    %[r], r11\n\t"
22865         "mov    %[a], r10\n\t"
22866         "mov    r3, #92\n\t"
22867         "\n4:\n\t"
22868         "ldr    r6, [%[a], r3]\n\t"
22869         "str    r6, [%[r], r3]\n\t"
22870         "sub    r3, #4\n\t"
22871         "bge    4b\n\t"
22872         "mov    r6, #96\n\t"
22873         "add    sp, r6\n\t"
22874         :
22875         : [r] "r" (r), [a] "r" (a)
22876         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
22877     );
22878 }
22879 
22880 /* Square the Montgomery form number. (r = a * a mod m)
22881  *
22882  * r   Result of squaring.
22883  * a   Number to square in Montogmery form.
22884  * m   Modulus (prime).
22885  * mp  Montogmery mulitplier.
22886  */
22887 static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a, const sp_digit* m,
22888         sp_digit mp)
22889 {
22890     sp_384_sqr_12(r, a);
22891     sp_384_mont_reduce_12(r, m, mp);
22892 }
22893 
22894 #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
22895 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
22896  *
22897  * r   Result of squaring.
22898  * a   Number to square in Montogmery form.
22899  * n   Number of times to square.
22900  * m   Modulus (prime).
22901  * mp  Montogmery mulitplier.
22902  */
22903 static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
22904         const sp_digit* m, sp_digit mp)
22905 {
22906     sp_384_mont_sqr_12(r, a, m, mp);
22907     for (; n > 1; n--) {
22908         sp_384_mont_sqr_12(r, r, m, mp);
22909     }
22910 }
22911 
22912 #endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
22913 #ifdef WOLFSSL_SP_SMALL
22914 /* Mod-2 for the P384 curve. */
22915 static const uint32_t p384_mod_minus_2[12] = {
22916     0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
22917     0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
22918 };
22919 #endif /* !WOLFSSL_SP_SMALL */
22920 
22921 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
22922  * P384 curve. (r = 1 / a mod m)
22923  *
22924  * r   Inverse result.
22925  * a   Number to invert.
22926  * td  Temporary data.
22927  */
22928 static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
22929 {
22930 #ifdef WOLFSSL_SP_SMALL
22931     sp_digit* t = td;
22932     int i;
22933 
22934     XMEMCPY(t, a, sizeof(sp_digit) * 12);
22935     for (i=382; i>=0; i--) {
22936         sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
22937         if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
22938             sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
22939     }
22940     XMEMCPY(r, t, sizeof(sp_digit) * 12);
22941 #else
22942     sp_digit* t1 = td;
22943     sp_digit* t2 = td + 2 * 12;
22944     sp_digit* t3 = td + 4 * 12;
22945     sp_digit* t4 = td + 6 * 12;
22946     sp_digit* t5 = td + 8 * 12;
22947 
22948     /* 0x2 */
22949     sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
22950     /* 0x3 */
22951     sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
22952     /* 0xc */
22953     sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
22954     /* 0xf */
22955     sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
22956     /* 0x1e */
22957     sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
22958     /* 0x1f */
22959     sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
22960     /* 0x3e0 */
22961     sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
22962     /* 0x3ff */
22963     sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
22964     /* 0x7fe0 */
22965     sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
22966     /* 0x7fff */
22967     sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
22968     /* 0x3fff8000 */
22969     sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
22970     /* 0x3fffffff */
22971     sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
22972     /* 0xfffffffc */
22973     sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
22974     /* 0xfffffffd */
22975     sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
22976     /* 0xffffffff */
22977     sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
22978     /* 0xfffffffc0000000 */
22979     sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
22980     /* 0xfffffffffffffff */
22981     sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
22982     /* 0xfffffffffffffff000000000000000 */
22983     sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
22984     /* 0xffffffffffffffffffffffffffffff */
22985     sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
22986     /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
22987     sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
22988     /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
22989     sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
22990     /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
22991     sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
22992     /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
22993     sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
22994     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
22995     sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
22996     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
22997     sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
22998     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
22999     sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
23000     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
23001     sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
23002 
23003 #endif /* WOLFSSL_SP_SMALL */
23004 }
23005 
23006 /* Compare a with b in constant time.
23007  *
23008  * a  A single precision integer.
23009  * b  A single precision integer.
23010  * return -ve, 0 or +ve if a is less than, equal to or greater than b
23011  * respectively.
23012  */
23013 SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
23014 {
23015     sp_digit r = 0;
23016 
23017 
23018     __asm__ __volatile__ (
23019         "mov    r3, #0\n\t"
23020         "mvn    r3, r3\n\t"
23021         "mov    r6, #44\n\t"
23022         "1:\n\t"
23023         "ldr    r7, [%[a], r6]\n\t"
23024         "ldr    r5, [%[b], r6]\n\t"
23025         "and    r7, r3\n\t"
23026         "and    r5, r3\n\t"
23027         "mov    r4, r7\n\t"
23028         "sub    r7, r5\n\t"
23029         "sbc    r7, r7\n\t"
23030         "add    %[r], r7\n\t"
23031         "mvn    r7, r7\n\t"
23032         "and    r3, r7\n\t"
23033         "sub    r5, r4\n\t"
23034         "sbc    r7, r7\n\t"
23035         "sub    %[r], r7\n\t"
23036         "mvn    r7, r7\n\t"
23037         "and    r3, r7\n\t"
23038         "sub    r6, #4\n\t"
23039         "cmp    r6, #0\n\t"
23040         "bge    1b\n\t"
23041         : [r] "+r" (r)
23042         : [a] "r" (a), [b] "r" (b)
23043         : "r3", "r4", "r5", "r6", "r7"
23044     );
23045 
23046     return r;
23047 }
23048 
23049 /* Normalize the values in each word to 32.
23050  *
23051  * a  Array of sp_digit to normalize.
23052  */
23053 #define sp_384_norm_12(a)
23054 
23055 /* Map the Montgomery form projective coordinate point to an affine point.
23056  *
23057  * r  Resulting affine coordinate point.
23058  * p  Montgomery form projective coordinate point.
23059  * t  Temporary ordinate data.
23060  */
23061 static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
23062 {
23063     sp_digit* t1 = t;
23064     sp_digit* t2 = t + 2*12;
23065     int32_t n;
23066 
23067     sp_384_mont_inv_12(t1, p->z, t + 2*12);
23068 
23069     sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
23070     sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
23071 
23072     /* x /= z^2 */
23073     sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
23074     XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
23075     sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
23076     /* Reduce x to less than modulus */
23077     n = sp_384_cmp_12(r->x, p384_mod);
23078     sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
23079                 (sp_digit)1 : (sp_digit)0));
23080     sp_384_norm_12(r->x);
23081 
23082     /* y /= z^3 */
23083     sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
23084     XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
23085     sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
23086     /* Reduce y to less than modulus */
23087     n = sp_384_cmp_12(r->y, p384_mod);
23088     sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
23089                 (sp_digit)1 : (sp_digit)0));
23090     sp_384_norm_12(r->y);
23091 
23092     XMEMSET(r->z, 0, sizeof(r->z));
23093     r->z[0] = 1;
23094 
23095 }
23096 
23097 #ifdef WOLFSSL_SP_SMALL
23098 /* Add b to a into r. (r = a + b)
23099  *
23100  * r  A single precision integer.
23101  * a  A single precision integer.
23102  * b  A single precision integer.
23103  */
23104 SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
23105         const sp_digit* b)
23106 {
23107     sp_digit c = 0;
23108 
23109     __asm__ __volatile__ (
23110         "mov    r6, %[a]\n\t"
23111         "mov    r7, #0\n\t"
23112         "add    r6, #48\n\t"
23113         "sub    r7, #1\n\t"
23114         "\n1:\n\t"
23115         "add    %[c], r7\n\t"
23116         "ldr    r4, [%[a]]\n\t"
23117         "ldr    r5, [%[b]]\n\t"
23118         "adc    r4, r5\n\t"
23119         "str    r4, [%[r]]\n\t"
23120         "mov    %[c], #0\n\t"
23121         "adc    %[c], %[c]\n\t"
23122         "add    %[a], #4\n\t"
23123         "add    %[b], #4\n\t"
23124         "add    %[r], #4\n\t"
23125         "cmp    %[a], r6\n\t"
23126         "bne    1b\n\t"
23127         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
23128         :
23129         : "memory", "r4", "r5", "r6", "r7"
23130     );
23131 
23132     return c;
23133 }
23134 
23135 #else
23136 /* Add b to a into r. (r = a + b)
23137  *
23138  * r  A single precision integer.
23139  * a  A single precision integer.
23140  * b  A single precision integer.
23141  */
23142 SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
23143         const sp_digit* b)
23144 {
23145     sp_digit c = 0;
23146 
23147     __asm__ __volatile__ (
23148         "ldr    r4, [%[a], #0]\n\t"
23149         "ldr    r5, [%[b], #0]\n\t"
23150         "add    r4, r5\n\t"
23151         "str    r4, [%[r], #0]\n\t"
23152         "ldr    r4, [%[a], #4]\n\t"
23153         "ldr    r5, [%[b], #4]\n\t"
23154         "adc    r4, r5\n\t"
23155         "str    r4, [%[r], #4]\n\t"
23156         "ldr    r4, [%[a], #8]\n\t"
23157         "ldr    r5, [%[b], #8]\n\t"
23158         "adc    r4, r5\n\t"
23159         "str    r4, [%[r], #8]\n\t"
23160         "ldr    r4, [%[a], #12]\n\t"
23161         "ldr    r5, [%[b], #12]\n\t"
23162         "adc    r4, r5\n\t"
23163         "str    r4, [%[r], #12]\n\t"
23164         "ldr    r4, [%[a], #16]\n\t"
23165         "ldr    r5, [%[b], #16]\n\t"
23166         "adc    r4, r5\n\t"
23167         "str    r4, [%[r], #16]\n\t"
23168         "ldr    r4, [%[a], #20]\n\t"
23169         "ldr    r5, [%[b], #20]\n\t"
23170         "adc    r4, r5\n\t"
23171         "str    r4, [%[r], #20]\n\t"
23172         "ldr    r4, [%[a], #24]\n\t"
23173         "ldr    r5, [%[b], #24]\n\t"
23174         "adc    r4, r5\n\t"
23175         "str    r4, [%[r], #24]\n\t"
23176         "ldr    r4, [%[a], #28]\n\t"
23177         "ldr    r5, [%[b], #28]\n\t"
23178         "adc    r4, r5\n\t"
23179         "str    r4, [%[r], #28]\n\t"
23180         "ldr    r4, [%[a], #32]\n\t"
23181         "ldr    r5, [%[b], #32]\n\t"
23182         "adc    r4, r5\n\t"
23183         "str    r4, [%[r], #32]\n\t"
23184         "ldr    r4, [%[a], #36]\n\t"
23185         "ldr    r5, [%[b], #36]\n\t"
23186         "adc    r4, r5\n\t"
23187         "str    r4, [%[r], #36]\n\t"
23188         "ldr    r4, [%[a], #40]\n\t"
23189         "ldr    r5, [%[b], #40]\n\t"
23190         "adc    r4, r5\n\t"
23191         "str    r4, [%[r], #40]\n\t"
23192         "ldr    r4, [%[a], #44]\n\t"
23193         "ldr    r5, [%[b], #44]\n\t"
23194         "adc    r4, r5\n\t"
23195         "str    r4, [%[r], #44]\n\t"
23196         "mov    %[c], #0\n\t"
23197         "adc    %[c], %[c]\n\t"
23198         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
23199         :
23200         : "memory", "r4", "r5"
23201     );
23202 
23203     return c;
23204 }
23205 
23206 #endif /* WOLFSSL_SP_SMALL */
23207 /* Add two Montgomery form numbers (r = a + b % m).
23208  *
23209  * r   Result of addition.
23210  * a   First number to add in Montogmery form.
23211  * b   Second number to add in Montogmery form.
23212  * m   Modulus (prime).
23213  */
23214 SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
23215         const sp_digit* m)
23216 {
23217     sp_digit o;
23218 
23219     o = sp_384_add_12(r, a, b);
23220     sp_384_cond_sub_12(r, r, m, 0 - o);
23221 }
23222 
23223 /* Double a Montgomery form number (r = a + a % m).
23224  *
23225  * r   Result of doubling.
23226  * a   Number to double in Montogmery form.
23227  * m   Modulus (prime).
23228  */
23229 SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
23230 {
23231     sp_digit o;
23232 
23233     o = sp_384_add_12(r, a, a);
23234     sp_384_cond_sub_12(r, r, m, 0 - o);
23235 }
23236 
23237 /* Triple a Montgomery form number (r = a + a + a % m).
23238  *
23239  * r   Result of Tripling.
23240  * a   Number to triple in Montogmery form.
23241  * m   Modulus (prime).
23242  */
23243 SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
23244 {
23245     sp_digit o;
23246 
23247     o = sp_384_add_12(r, a, a);
23248     sp_384_cond_sub_12(r, r, m, 0 - o);
23249     o = sp_384_add_12(r, r, a);
23250     sp_384_cond_sub_12(r, r, m, 0 - o);
23251 }
23252 
23253 #ifdef WOLFSSL_SP_SMALL
23254 /* Sub b from a into r. (r = a - b)
23255  *
23256  * r  A single precision integer.
23257  * a  A single precision integer.
23258  * b  A single precision integer.
23259  */
23260 SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
23261         const sp_digit* b)
23262 {
23263     sp_digit c = 0;
23264 
23265     __asm__ __volatile__ (
23266         "mov    r6, %[a]\n\t"
23267         "add    r6, #48\n\t"
23268         "\n1:\n\t"
23269         "mov    r5, #0\n\t"
23270         "sub    r5, %[c]\n\t"
23271         "ldr    r4, [%[a]]\n\t"
23272         "ldr    r5, [%[b]]\n\t"
23273         "sbc    r4, r5\n\t"
23274         "str    r4, [%[r]]\n\t"
23275         "sbc    %[c], %[c]\n\t"
23276         "add    %[a], #4\n\t"
23277         "add    %[b], #4\n\t"
23278         "add    %[r], #4\n\t"
23279         "cmp    %[a], r6\n\t"
23280         "bne    1b\n\t"
23281         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
23282         :
23283         : "memory", "r4", "r5", "r6"
23284     );
23285 
23286     return c;
23287 }
23288 
23289 #else
23290 /* Sub b from a into r. (r = a - b)
23291  *
23292  * r  A single precision integer.
23293  * a  A single precision integer.
23294  * b  A single precision integer.
23295  */
23296 SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
23297         const sp_digit* b)
23298 {
23299     sp_digit c = 0;
23300 
23301     __asm__ __volatile__ (
23302         "ldr    r4, [%[a], #0]\n\t"
23303         "ldr    r5, [%[a], #4]\n\t"
23304         "ldr    r6, [%[b], #0]\n\t"
23305         "ldr    r7, [%[b], #4]\n\t"
23306         "sub    r4, r6\n\t"
23307         "sbc    r5, r7\n\t"
23308         "str    r4, [%[r], #0]\n\t"
23309         "str    r5, [%[r], #4]\n\t"
23310         "ldr    r4, [%[a], #8]\n\t"
23311         "ldr    r5, [%[a], #12]\n\t"
23312         "ldr    r6, [%[b], #8]\n\t"
23313         "ldr    r7, [%[b], #12]\n\t"
23314         "sbc    r4, r6\n\t"
23315         "sbc    r5, r7\n\t"
23316         "str    r4, [%[r], #8]\n\t"
23317         "str    r5, [%[r], #12]\n\t"
23318         "ldr    r4, [%[a], #16]\n\t"
23319         "ldr    r5, [%[a], #20]\n\t"
23320         "ldr    r6, [%[b], #16]\n\t"
23321         "ldr    r7, [%[b], #20]\n\t"
23322         "sbc    r4, r6\n\t"
23323         "sbc    r5, r7\n\t"
23324         "str    r4, [%[r], #16]\n\t"
23325         "str    r5, [%[r], #20]\n\t"
23326         "ldr    r4, [%[a], #24]\n\t"
23327         "ldr    r5, [%[a], #28]\n\t"
23328         "ldr    r6, [%[b], #24]\n\t"
23329         "ldr    r7, [%[b], #28]\n\t"
23330         "sbc    r4, r6\n\t"
23331         "sbc    r5, r7\n\t"
23332         "str    r4, [%[r], #24]\n\t"
23333         "str    r5, [%[r], #28]\n\t"
23334         "ldr    r4, [%[a], #32]\n\t"
23335         "ldr    r5, [%[a], #36]\n\t"
23336         "ldr    r6, [%[b], #32]\n\t"
23337         "ldr    r7, [%[b], #36]\n\t"
23338         "sbc    r4, r6\n\t"
23339         "sbc    r5, r7\n\t"
23340         "str    r4, [%[r], #32]\n\t"
23341         "str    r5, [%[r], #36]\n\t"
23342         "ldr    r4, [%[a], #40]\n\t"
23343         "ldr    r5, [%[a], #44]\n\t"
23344         "ldr    r6, [%[b], #40]\n\t"
23345         "ldr    r7, [%[b], #44]\n\t"
23346         "sbc    r4, r6\n\t"
23347         "sbc    r5, r7\n\t"
23348         "str    r4, [%[r], #40]\n\t"
23349         "str    r5, [%[r], #44]\n\t"
23350         "sbc    %[c], %[c]\n\t"
23351         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
23352         :
23353         : "memory", "r4", "r5", "r6", "r7"
23354     );
23355 
23356     return c;
23357 }
23358 
23359 #endif /* WOLFSSL_SP_SMALL */
23360 /* Conditionally add a and b using the mask m.
23361  * m is -1 to add and 0 when not.
23362  *
23363  * r  A single precision number representing conditional add result.
23364  * a  A single precision number to add with.
23365  * b  A single precision number to add.
23366  * m  Mask value to apply.
23367  */
23368 SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
23369         sp_digit m)
23370 {
23371     sp_digit c = 0;
23372 
23373     __asm__ __volatile__ (
23374         "mov    r5, #48\n\t"
23375         "mov    r8, r5\n\t"
23376         "mov    r7, #0\n\t"
23377         "1:\n\t"
23378         "ldr    r6, [%[b], r7]\n\t"
23379         "and    r6, %[m]\n\t"
23380         "mov    r5, #0\n\t"
23381         "sub    r5, #1\n\t"
23382         "add    r5, %[c]\n\t"
23383         "ldr    r5, [%[a], r7]\n\t"
23384         "adc    r5, r6\n\t"
23385         "mov    %[c], #0\n\t"
23386         "adc    %[c], %[c]\n\t"
23387         "str    r5, [%[r], r7]\n\t"
23388         "add    r7, #4\n\t"
23389         "cmp    r7, r8\n\t"
23390         "blt    1b\n\t"
23391         : [c] "+r" (c)
23392         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
23393         : "memory", "r5", "r6", "r7", "r8"
23394     );
23395 
23396     return c;
23397 }
23398 
23399 /* Subtract two Montgomery form numbers (r = a - b % m).
23400  *
23401  * r   Result of subtration.
23402  * a   Number to subtract from in Montogmery form.
23403  * b   Number to subtract with in Montogmery form.
23404  * m   Modulus (prime).
23405  */
23406 SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
23407         const sp_digit* m)
23408 {
23409     sp_digit o;
23410 
23411     o = sp_384_sub_12(r, a, b);
23412     sp_384_cond_add_12(r, r, m, o);
23413 }
23414 
23415 static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
23416 {
23417     __asm__ __volatile__ (
23418         "ldr    r2, [%[a]]\n\t"
23419         "ldr    r3, [%[a], #4]\n\t"
23420         "lsr    r2, r2, #1\n\t"
23421         "lsl    r5, r3, #31\n\t"
23422         "lsr    r3, r3, #1\n\t"
23423         "orr    r2, r2, r5\n\t"
23424         "ldr    r4, [%[a], #8]\n\t"
23425         "str    r2, [%[r], #0]\n\t"
23426         "lsl    r5, r4, #31\n\t"
23427         "lsr    r4, r4, #1\n\t"
23428         "orr    r3, r3, r5\n\t"
23429         "ldr    r2, [%[a], #12]\n\t"
23430         "str    r3, [%[r], #4]\n\t"
23431         "lsl    r5, r2, #31\n\t"
23432         "lsr    r2, r2, #1\n\t"
23433         "orr    r4, r4, r5\n\t"
23434         "ldr    r3, [%[a], #16]\n\t"
23435         "str    r4, [%[r], #8]\n\t"
23436         "lsl    r5, r3, #31\n\t"
23437         "lsr    r3, r3, #1\n\t"
23438         "orr    r2, r2, r5\n\t"
23439         "ldr    r4, [%[a], #20]\n\t"
23440         "str    r2, [%[r], #12]\n\t"
23441         "lsl    r5, r4, #31\n\t"
23442         "lsr    r4, r4, #1\n\t"
23443         "orr    r3, r3, r5\n\t"
23444         "ldr    r2, [%[a], #24]\n\t"
23445         "str    r3, [%[r], #16]\n\t"
23446         "lsl    r5, r2, #31\n\t"
23447         "lsr    r2, r2, #1\n\t"
23448         "orr    r4, r4, r5\n\t"
23449         "ldr    r3, [%[a], #28]\n\t"
23450         "str    r4, [%[r], #20]\n\t"
23451         "lsl    r5, r3, #31\n\t"
23452         "lsr    r3, r3, #1\n\t"
23453         "orr    r2, r2, r5\n\t"
23454         "ldr    r4, [%[a], #32]\n\t"
23455         "str    r2, [%[r], #24]\n\t"
23456         "lsl    r5, r4, #31\n\t"
23457         "lsr    r4, r4, #1\n\t"
23458         "orr    r3, r3, r5\n\t"
23459         "ldr    r2, [%[a], #36]\n\t"
23460         "str    r3, [%[r], #28]\n\t"
23461         "lsl    r5, r2, #31\n\t"
23462         "lsr    r2, r2, #1\n\t"
23463         "orr    r4, r4, r5\n\t"
23464         "ldr    r3, [%[a], #40]\n\t"
23465         "str    r4, [%[r], #32]\n\t"
23466         "lsl    r5, r3, #31\n\t"
23467         "lsr    r3, r3, #1\n\t"
23468         "orr    r2, r2, r5\n\t"
23469         "ldr    r4, [%[a], #44]\n\t"
23470         "str    r2, [%[r], #36]\n\t"
23471         "lsl    r5, r4, #31\n\t"
23472         "lsr    r4, r4, #1\n\t"
23473         "orr    r3, r3, r5\n\t"
23474         "str    r3, [%[r], #40]\n\t"
23475         "str    r4, [%[r], #44]\n\t"
23476         :
23477         : [r] "r" (r), [a] "r" (a)
23478         : "memory", "r2", "r3", "r4", "r5"
23479     );
23480 }
23481 
23482 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
23483  *
23484  * r  Result of division by 2.
23485  * a  Number to divide.
23486  * m  Modulus (prime).
23487  */
23488 SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
23489 {
23490     sp_digit o;
23491 
23492     o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
23493     sp_384_rshift1_12(r, r);
23494     r[11] |= o << 31;
23495 }
23496 
23497 /* Double the Montgomery form projective point p.
23498  *
23499  * r  Result of doubling point.
23500  * p  Point to double.
23501  * t  Temporary ordinate data.
23502  */
23503 static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
23504 {
23505     sp_digit* t1 = t;
23506     sp_digit* t2 = t + 2*12;
23507     sp_digit* x;
23508     sp_digit* y;
23509     sp_digit* z;
23510 
23511     x = r->x;
23512     y = r->y;
23513     z = r->z;
23514     /* Put infinity into result. */
23515     if (r != p) {
23516         r->infinity = p->infinity;
23517     }
23518 
23519     /* T1 = Z * Z */
23520     sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
23521     /* Z = Y * Z */
23522     sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
23523     /* Z = 2Z */
23524     sp_384_mont_dbl_12(z, z, p384_mod);
23525     /* T2 = X - T1 */
23526     sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
23527     /* T1 = X + T1 */
23528     sp_384_mont_add_12(t1, p->x, t1, p384_mod);
23529     /* T2 = T1 * T2 */
23530     sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
23531     /* T1 = 3T2 */
23532     sp_384_mont_tpl_12(t1, t2, p384_mod);
23533     /* Y = 2Y */
23534     sp_384_mont_dbl_12(y, p->y, p384_mod);
23535     /* Y = Y * Y */
23536     sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
23537     /* T2 = Y * Y */
23538     sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
23539     /* T2 = T2/2 */
23540     sp_384_div2_12(t2, t2, p384_mod);
23541     /* Y = Y * X */
23542     sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
23543     /* X = T1 * T1 */
23544     sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
23545     /* X = X - Y */
23546     sp_384_mont_sub_12(x, x, y, p384_mod);
23547     /* X = X - Y */
23548     sp_384_mont_sub_12(x, x, y, p384_mod);
23549     /* Y = Y - X */
23550     sp_384_mont_sub_12(y, y, x, p384_mod);
23551     /* Y = Y * T1 */
23552     sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
23553     /* Y = Y - T2 */
23554     sp_384_mont_sub_12(y, y, t2, p384_mod);
23555 }
23556 
23557 /* Compare two numbers to determine if they are equal.
23558  * Constant time implementation.
23559  *
23560  * a  First number to compare.
23561  * b  Second number to compare.
23562  * returns 1 when equal and 0 otherwise.
23563  */
23564 static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
23565 {
23566     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
23567             (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
23568             (a[8] ^ b[8]) | (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
23569 }
23570 
23571 /* Add two Montgomery form projective points.
23572  *
23573  * r  Result of addition.
23574  * p  First point to add.
23575  * q  Second point to add.
23576  * t  Temporary ordinate data.
23577  */
23578 static void sp_384_proj_point_add_12(sp_point_384* r, const sp_point_384* p, const sp_point_384* q,
23579         sp_digit* t)
23580 {
23581     const sp_point_384* ap[2];
23582     sp_point_384* rp[2];
23583     sp_digit* t1 = t;
23584     sp_digit* t2 = t + 2*12;
23585     sp_digit* t3 = t + 4*12;
23586     sp_digit* t4 = t + 6*12;
23587     sp_digit* t5 = t + 8*12;
23588     sp_digit* x;
23589     sp_digit* y;
23590     sp_digit* z;
23591     int i;
23592 
23593     /* Ensure only the first point is the same as the result. */
23594     if (q == r) {
23595         const sp_point_384* a = p;
23596         p = q;
23597         q = a;
23598     }
23599 
23600     /* Check double */
23601     (void)sp_384_sub_12(t1, p384_mod, q->y);
23602     sp_384_norm_12(t1);
23603     if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
23604         (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
23605         sp_384_proj_point_dbl_12(r, p, t);
23606     }
23607     else {
23608         rp[0] = r;
23609 
23610         /*lint allow cast to different type of pointer*/
23611         rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
23612         XMEMSET(rp[1], 0, sizeof(sp_point_384));
23613         x = rp[p->infinity | q->infinity]->x;
23614         y = rp[p->infinity | q->infinity]->y;
23615         z = rp[p->infinity | q->infinity]->z;
23616 
23617         ap[0] = p;
23618         ap[1] = q;
23619         for (i=0; i<12; i++) {
23620             r->x[i] = ap[p->infinity]->x[i];
23621         }
23622         for (i=0; i<12; i++) {
23623             r->y[i] = ap[p->infinity]->y[i];
23624         }
23625         for (i=0; i<12; i++) {
23626             r->z[i] = ap[p->infinity]->z[i];
23627         }
23628         r->infinity = ap[p->infinity]->infinity;
23629 
23630         /* U1 = X1*Z2^2 */
23631         sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
23632         sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
23633         sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
23634         /* U2 = X2*Z1^2 */
23635         sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
23636         sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
23637         sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
23638         /* S1 = Y1*Z2^3 */
23639         sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
23640         /* S2 = Y2*Z1^3 */
23641         sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
23642         /* H = U2 - U1 */
23643         sp_384_mont_sub_12(t2, t2, t1, p384_mod);
23644         /* R = S2 - S1 */
23645         sp_384_mont_sub_12(t4, t4, t3, p384_mod);
23646         /* Z3 = H*Z1*Z2 */
23647         sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
23648         sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
23649         /* X3 = R^2 - H^3 - 2*U1*H^2 */
23650         sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
23651         sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
23652         sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
23653         sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
23654         sp_384_mont_sub_12(x, x, t5, p384_mod);
23655         sp_384_mont_dbl_12(t1, y, p384_mod);
23656         sp_384_mont_sub_12(x, x, t1, p384_mod);
23657         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
23658         sp_384_mont_sub_12(y, y, x, p384_mod);
23659         sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
23660         sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
23661         sp_384_mont_sub_12(y, y, t5, p384_mod);
23662     }
23663 }
23664 
23665 /* Multiply the point by the scalar and return the result.
23666  * If map is true then convert result to affine coordinates.
23667  *
23668  * r     Resulting point.
23669  * g     Point to multiply.
23670  * k     Scalar to multiply by.
23671  * map   Indicates whether to convert result to affine.
23672  * heap  Heap to use for allocation.
23673  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
23674  */
23675 static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
23676         int map, void* heap)
23677 {
23678 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
23679     sp_point_384 td[16];
23680     sp_point_384 rtd;
23681     sp_digit tmpd[2 * 12 * 6];
23682 #endif
23683     sp_point_384* t;
23684     sp_point_384* rt;
23685     sp_digit* tmp;
23686     sp_digit n;
23687     int i;
23688     int c, y;
23689     int err;
23690 
23691     (void)heap;
23692 
23693     err = sp_384_point_new_12(heap, rtd, rt);
23694 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
23695     t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 16, heap, DYNAMIC_TYPE_ECC);
23696     if (t == NULL)
23697         err = MEMORY_E;
23698     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
23699                              DYNAMIC_TYPE_ECC);
23700     if (tmp == NULL)
23701         err = MEMORY_E;
23702 #else
23703     t = td;
23704     tmp = tmpd;
23705 #endif
23706 
23707     if (err == MP_OKAY) {
23708         /* t[0] = {0, 0, 1} * norm */
23709         XMEMSET(&t[0], 0, sizeof(t[0]));
23710         t[0].infinity = 1;
23711         /* t[1] = {g->x, g->y, g->z} * norm */
23712         (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
23713         (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
23714         (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
23715         t[1].infinity = 0;
23716         sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
23717         t[ 2].infinity = 0;
23718         sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
23719         t[ 3].infinity = 0;
23720         sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
23721         t[ 4].infinity = 0;
23722         sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
23723         t[ 5].infinity = 0;
23724         sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
23725         t[ 6].infinity = 0;
23726         sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
23727         t[ 7].infinity = 0;
23728         sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
23729         t[ 8].infinity = 0;
23730         sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
23731         t[ 9].infinity = 0;
23732         sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
23733         t[10].infinity = 0;
23734         sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
23735         t[11].infinity = 0;
23736         sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
23737         t[12].infinity = 0;
23738         sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
23739         t[13].infinity = 0;
23740         sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
23741         t[14].infinity = 0;
23742         sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
23743         t[15].infinity = 0;
23744 
23745         i = 10;
23746         n = k[i+1] << 0;
23747         c = 28;
23748         y = n >> 28;
23749         XMEMCPY(rt, &t[y], sizeof(sp_point_384));
23750         n <<= 4;
23751         for (; i>=0 || c>=4; ) {
23752             if (c < 4) {
23753                 n |= k[i--];
23754                 c += 32;
23755             }
23756             y = (n >> 28) & 0xf;
23757             n <<= 4;
23758             c -= 4;
23759 
23760             sp_384_proj_point_dbl_12(rt, rt, tmp);
23761             sp_384_proj_point_dbl_12(rt, rt, tmp);
23762             sp_384_proj_point_dbl_12(rt, rt, tmp);
23763             sp_384_proj_point_dbl_12(rt, rt, tmp);
23764 
23765             sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
23766         }
23767 
23768         if (map != 0) {
23769             sp_384_map_12(r, rt, tmp);
23770         }
23771         else {
23772             XMEMCPY(r, rt, sizeof(sp_point_384));
23773         }
23774     }
23775 
23776 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
23777     if (tmp != NULL) {
23778         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 12 * 6);
23779         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
23780     }
23781     if (t != NULL) {
23782         XMEMSET(t, 0, sizeof(sp_point_384) * 16);
23783         XFREE(t, heap, DYNAMIC_TYPE_ECC);
23784     }
23785 #else
23786     ForceZero(tmpd, sizeof(tmpd));
23787     ForceZero(td, sizeof(td));
23788 #endif
23789     sp_384_point_free_12(rt, 1, heap);
23790 
23791     return err;
23792 }
23793 
23794 /* A table entry for pre-computed points. */
23795 typedef struct sp_table_entry_384 {
23796     sp_digit x[12];
23797     sp_digit y[12];
23798 } sp_table_entry_384;
23799 
23800 #ifdef FP_ECC
23801 /* Double the Montgomery form projective point p a number of times.
23802  *
23803  * r  Result of repeated doubling of point.
23804  * p  Point to double.
23805  * n  Number of times to double
23806  * t  Temporary ordinate data.
23807  */
23808 static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n, sp_digit* t)
23809 {
23810     sp_digit* w = t;
23811     sp_digit* a = t + 2*12;
23812     sp_digit* b = t + 4*12;
23813     sp_digit* t1 = t + 6*12;
23814     sp_digit* t2 = t + 8*12;
23815     sp_digit* x;
23816     sp_digit* y;
23817     sp_digit* z;
23818 
23819     x = p->x;
23820     y = p->y;
23821     z = p->z;
23822 
23823     /* Y = 2*Y */
23824     sp_384_mont_dbl_12(y, y, p384_mod);
23825     /* W = Z^4 */
23826     sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
23827     sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
23828 
23829 #ifndef WOLFSSL_SP_SMALL
23830     while (--n > 0)
23831 #else
23832     while (--n >= 0)
23833 #endif
23834     {
23835         /* A = 3*(X^2 - W) */
23836         sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
23837         sp_384_mont_sub_12(t1, t1, w, p384_mod);
23838         sp_384_mont_tpl_12(a, t1, p384_mod);
23839         /* B = X*Y^2 */
23840         sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
23841         sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
23842         /* X = A^2 - 2B */
23843         sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
23844         sp_384_mont_dbl_12(t2, b, p384_mod);
23845         sp_384_mont_sub_12(x, x, t2, p384_mod);
23846         /* Z = Z*Y */
23847         sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
23848         /* t2 = Y^4 */
23849         sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
23850 #ifdef WOLFSSL_SP_SMALL
23851         if (n != 0)
23852 #endif
23853         {
23854             /* W = W*Y^4 */
23855             sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
23856         }
23857         /* y = 2*A*(B - X) - Y^4 */
23858         sp_384_mont_sub_12(y, b, x, p384_mod);
23859         sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
23860         sp_384_mont_dbl_12(y, y, p384_mod);
23861         sp_384_mont_sub_12(y, y, t1, p384_mod);
23862     }
23863 #ifndef WOLFSSL_SP_SMALL
23864     /* A = 3*(X^2 - W) */
23865     sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
23866     sp_384_mont_sub_12(t1, t1, w, p384_mod);
23867     sp_384_mont_tpl_12(a, t1, p384_mod);
23868     /* B = X*Y^2 */
23869     sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
23870     sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
23871     /* X = A^2 - 2B */
23872     sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
23873     sp_384_mont_dbl_12(t2, b, p384_mod);
23874     sp_384_mont_sub_12(x, x, t2, p384_mod);
23875     /* Z = Z*Y */
23876     sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
23877     /* t2 = Y^4 */
23878     sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
23879     /* y = 2*A*(B - X) - Y^4 */
23880     sp_384_mont_sub_12(y, b, x, p384_mod);
23881     sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
23882     sp_384_mont_dbl_12(y, y, p384_mod);
23883     sp_384_mont_sub_12(y, y, t1, p384_mod);
23884 #endif
23885     /* Y = Y/2 */
23886     sp_384_div2_12(y, y, p384_mod);
23887 }
23888 
23889 #endif /* FP_ECC */
23890 /* Add two Montgomery form projective points. The second point has a q value of
23891  * one.
23892  * Only the first point can be the same pointer as the result point.
23893  *
23894  * r  Result of addition.
23895  * p  First point to add.
23896  * q  Second point to add.
23897  * t  Temporary ordinate data.
23898  */
23899 static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
23900         const sp_point_384* q, sp_digit* t)
23901 {
23902     const sp_point_384* ap[2];
23903     sp_point_384* rp[2];
23904     sp_digit* t1 = t;
23905     sp_digit* t2 = t + 2*12;
23906     sp_digit* t3 = t + 4*12;
23907     sp_digit* t4 = t + 6*12;
23908     sp_digit* t5 = t + 8*12;
23909     sp_digit* x;
23910     sp_digit* y;
23911     sp_digit* z;
23912     int i;
23913 
23914     /* Check double */
23915     (void)sp_384_sub_12(t1, p384_mod, q->y);
23916     sp_384_norm_12(t1);
23917     if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
23918         (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
23919         sp_384_proj_point_dbl_12(r, p, t);
23920     }
23921     else {
23922         rp[0] = r;
23923 
23924         /*lint allow cast to different type of pointer*/
23925         rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
23926         XMEMSET(rp[1], 0, sizeof(sp_point_384));
23927         x = rp[p->infinity | q->infinity]->x;
23928         y = rp[p->infinity | q->infinity]->y;
23929         z = rp[p->infinity | q->infinity]->z;
23930 
23931         ap[0] = p;
23932         ap[1] = q;
23933         for (i=0; i<12; i++) {
23934             r->x[i] = ap[p->infinity]->x[i];
23935         }
23936         for (i=0; i<12; i++) {
23937             r->y[i] = ap[p->infinity]->y[i];
23938         }
23939         for (i=0; i<12; i++) {
23940             r->z[i] = ap[p->infinity]->z[i];
23941         }
23942         r->infinity = ap[p->infinity]->infinity;
23943 
23944         /* U2 = X2*Z1^2 */
23945         sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
23946         sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
23947         sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
23948         /* S2 = Y2*Z1^3 */
23949         sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
23950         /* H = U2 - X1 */
23951         sp_384_mont_sub_12(t2, t2, x, p384_mod);
23952         /* R = S2 - Y1 */
23953         sp_384_mont_sub_12(t4, t4, y, p384_mod);
23954         /* Z3 = H*Z1 */
23955         sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
23956         /* X3 = R^2 - H^3 - 2*X1*H^2 */
23957         sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
23958         sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
23959         sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
23960         sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
23961         sp_384_mont_sub_12(x, t1, t5, p384_mod);
23962         sp_384_mont_dbl_12(t1, t3, p384_mod);
23963         sp_384_mont_sub_12(x, x, t1, p384_mod);
23964         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
23965         sp_384_mont_sub_12(t3, t3, x, p384_mod);
23966         sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
23967         sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
23968         sp_384_mont_sub_12(y, t3, t5, p384_mod);
23969     }
23970 }
23971 
23972 #ifdef WOLFSSL_SP_SMALL
23973 #ifdef FP_ECC
23974 /* Convert the projective point to affine.
23975  * Ordinates are in Montgomery form.
23976  *
23977  * a  Point to convert.
23978  * t  Temporary data.
23979  */
23980 static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
23981 {
23982     sp_digit* t1 = t;
23983     sp_digit* t2 = t + 2 * 12;
23984     sp_digit* tmp = t + 4 * 12;
23985 
23986     sp_384_mont_inv_12(t1, a->z, tmp);
23987 
23988     sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
23989     sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
23990 
23991     sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
23992     sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
23993     XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
23994 }
23995 
23996 /* Generate the pre-computed table of points for the base point.
23997  *
23998  * a      The base point.
23999  * table  Place to store generated point data.
24000  * tmp    Temporary data.
24001  * heap  Heap to use for allocation.
24002  */
24003 static int sp_384_gen_stripe_table_12(const sp_point_384* a,
24004         sp_table_entry_384* table, sp_digit* tmp, void* heap)
24005 {
24006 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
24007     sp_point_384 td, s1d, s2d;
24008 #endif
24009     sp_point_384* t;
24010     sp_point_384* s1 = NULL;
24011     sp_point_384* s2 = NULL;
24012     int i, j;
24013     int err;
24014 
24015     (void)heap;
24016 
24017     err = sp_384_point_new_12(heap, td, t);
24018     if (err == MP_OKAY) {
24019         err = sp_384_point_new_12(heap, s1d, s1);
24020     }
24021     if (err == MP_OKAY) {
24022         err = sp_384_point_new_12(heap, s2d, s2);
24023     }
24024 
24025     if (err == MP_OKAY) {
24026         err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
24027     }
24028     if (err == MP_OKAY) {
24029         err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
24030     }
24031     if (err == MP_OKAY) {
24032         err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
24033     }
24034     if (err == MP_OKAY) {
24035         t->infinity = 0;
24036         sp_384_proj_to_affine_12(t, tmp);
24037 
24038         XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
24039         s1->infinity = 0;
24040         XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
24041         s2->infinity = 0;
24042 
24043         /* table[0] = {0, 0, infinity} */
24044         XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
24045         /* table[1] = Affine version of 'a' in Montgomery form */
24046         XMEMCPY(table[1].x, t->x, sizeof(table->x));
24047         XMEMCPY(table[1].y, t->y, sizeof(table->y));
24048 
24049         for (i=1; i<4; i++) {
24050             sp_384_proj_point_dbl_n_12(t, 96, tmp);
24051             sp_384_proj_to_affine_12(t, tmp);
24052             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
24053             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
24054         }
24055 
24056         for (i=1; i<4; i++) {
24057             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
24058             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
24059             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
24060                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
24061                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
24062                 sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
24063                 sp_384_proj_to_affine_12(t, tmp);
24064                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
24065                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
24066             }
24067         }
24068     }
24069 
24070     sp_384_point_free_12(s2, 0, heap);
24071     sp_384_point_free_12(s1, 0, heap);
24072     sp_384_point_free_12( t, 0, heap);
24073 
24074     return err;
24075 }
24076 
24077 #endif /* FP_ECC */
24078 /* Multiply the point by the scalar and return the result.
24079  * If map is true then convert result to affine coordinates.
24080  *
24081  * r     Resulting point.
24082  * k     Scalar to multiply by.
24083  * map   Indicates whether to convert result to affine.
24084  * heap  Heap to use for allocation.
24085  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
24086  */
24087 static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
24088         const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
24089 {
24090 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
24091     sp_point_384 rtd;
24092     sp_point_384 pd;
24093     sp_digit td[2 * 12 * 6];
24094 #endif
24095     sp_point_384* rt;
24096     sp_point_384* p = NULL;
24097     sp_digit* t;
24098     int i, j;
24099     int y, x;
24100     int err;
24101 
24102     (void)g;
24103     (void)heap;
24104 
24105 
24106     err = sp_384_point_new_12(heap, rtd, rt);
24107     if (err == MP_OKAY) {
24108         err = sp_384_point_new_12(heap, pd, p);
24109     }
24110 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
24111     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
24112                            DYNAMIC_TYPE_ECC);
24113     if (t == NULL) {
24114         err = MEMORY_E;
24115     }
24116 #else
24117     t = td;
24118 #endif
24119 
24120     if (err == MP_OKAY) {
24121         XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
24122         XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
24123 
24124         y = 0;
24125         for (j=0,x=95; j<4; j++,x+=96) {
24126             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
24127         }
24128         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
24129         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
24130         rt->infinity = !y;
24131         for (i=94; i>=0; i--) {
24132             y = 0;
24133             for (j=0,x=i; j<4; j++,x+=96) {
24134                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
24135             }
24136 
24137             sp_384_proj_point_dbl_12(rt, rt, t);
24138             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
24139             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
24140             p->infinity = !y;
24141             sp_384_proj_point_add_qz1_12(rt, rt, p, t);
24142         }
24143 
24144         if (map != 0) {
24145             sp_384_map_12(r, rt, t);
24146         }
24147         else {
24148             XMEMCPY(r, rt, sizeof(sp_point_384));
24149         }
24150     }
24151 
24152 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
24153     if (t != NULL) {
24154         XFREE(t, heap, DYNAMIC_TYPE_ECC);
24155     }
24156 #endif
24157     sp_384_point_free_12(p, 0, heap);
24158     sp_384_point_free_12(rt, 0, heap);
24159 
24160     return err;
24161 }
24162 
24163 #ifdef FP_ECC
24164 #ifndef FP_ENTRIES
24165     #define FP_ENTRIES 16
24166 #endif
24167 
24168 typedef struct sp_cache_384_t {
24169     sp_digit x[12];
24170     sp_digit y[12];
24171     sp_table_entry_384 table[16];
24172     uint32_t cnt;
24173     int set;
24174 } sp_cache_384_t;
24175 
24176 static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
24177 static THREAD_LS_T int sp_cache_384_last = -1;
24178 static THREAD_LS_T int sp_cache_384_inited = 0;
24179 
24180 #ifndef HAVE_THREAD_LS
24181     static volatile int initCacheMutex_384 = 0;
24182     static wolfSSL_Mutex sp_cache_384_lock;
24183 #endif
24184 
24185 static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
24186 {
24187     int i, j;
24188     uint32_t least;
24189 
24190     if (sp_cache_384_inited == 0) {
24191         for (i=0; i<FP_ENTRIES; i++) {
24192             sp_cache_384[i].set = 0;
24193         }
24194         sp_cache_384_inited = 1;
24195     }
24196 
24197     /* Compare point with those in cache. */
24198     for (i=0; i<FP_ENTRIES; i++) {
24199         if (!sp_cache_384[i].set)
24200             continue;
24201 
24202         if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
24203                            sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
24204             sp_cache_384[i].cnt++;
24205             break;
24206         }
24207     }
24208 
24209     /* No match. */
24210     if (i == FP_ENTRIES) {
24211         /* Find empty entry. */
24212         i = (sp_cache_384_last + 1) % FP_ENTRIES;
24213         for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
24214             if (!sp_cache_384[i].set) {
24215                 break;
24216             }
24217         }
24218 
24219         /* Evict least used. */
24220         if (i == sp_cache_384_last) {
24221             least = sp_cache_384[0].cnt;
24222             for (j=1; j<FP_ENTRIES; j++) {
24223                 if (sp_cache_384[j].cnt < least) {
24224                     i = j;
24225                     least = sp_cache_384[i].cnt;
24226                 }
24227             }
24228         }
24229 
24230         XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
24231         XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
24232         sp_cache_384[i].set = 1;
24233         sp_cache_384[i].cnt = 1;
24234     }
24235 
24236     *cache = &sp_cache_384[i];
24237     sp_cache_384_last = i;
24238 }
24239 #endif /* FP_ECC */
24240 
24241 /* Multiply the base point of P384 by the scalar and return the result.
24242  * If map is true then convert result to affine coordinates.
24243  *
24244  * r     Resulting point.
24245  * g     Point to multiply.
24246  * k     Scalar to multiply by.
24247  * map   Indicates whether to convert result to affine.
24248  * heap  Heap to use for allocation.
24249  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
24250  */
24251 static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
24252         int map, void* heap)
24253 {
24254 #ifndef FP_ECC
24255     return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
24256 #else
24257     sp_digit tmp[2 * 12 * 7];
24258     sp_cache_384_t* cache;
24259     int err = MP_OKAY;
24260 
24261 #ifndef HAVE_THREAD_LS
24262     if (initCacheMutex_384 == 0) {
24263          wc_InitMutex(&sp_cache_384_lock);
24264          initCacheMutex_384 = 1;
24265     }
24266     if (wc_LockMutex(&sp_cache_384_lock) != 0)
24267        err = BAD_MUTEX_E;
24268 #endif /* HAVE_THREAD_LS */
24269 
24270     if (err == MP_OKAY) {
24271         sp_ecc_get_cache_384(g, &cache);
24272         if (cache->cnt == 2)
24273             sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
24274 
24275 #ifndef HAVE_THREAD_LS
24276         wc_UnLockMutex(&sp_cache_384_lock);
24277 #endif /* HAVE_THREAD_LS */
24278 
24279         if (cache->cnt < 2) {
24280             err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
24281         }
24282         else {
24283             err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
24284                     map, heap);
24285         }
24286     }
24287 
24288     return err;
24289 #endif
24290 }
24291 
24292 #else
24293 #ifdef FP_ECC
24294 /* Generate the pre-computed table of points for the base point.
24295  *
24296  * a      The base point.
24297  * table  Place to store generated point data.
24298  * tmp    Temporary data.
24299  * heap  Heap to use for allocation.
24300  */
24301 static int sp_384_gen_stripe_table_12(const sp_point_384* a,
24302         sp_table_entry_384* table, sp_digit* tmp, void* heap)
24303 {
24304 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
24305     sp_point_384 td, s1d, s2d;
24306 #endif
24307     sp_point_384* t;
24308     sp_point_384* s1 = NULL;
24309     sp_point_384* s2 = NULL;
24310     int i, j;
24311     int err;
24312 
24313     (void)heap;
24314 
24315     err = sp_384_point_new_12(heap, td, t);
24316     if (err == MP_OKAY) {
24317         err = sp_384_point_new_12(heap, s1d, s1);
24318     }
24319     if (err == MP_OKAY) {
24320         err = sp_384_point_new_12(heap, s2d, s2);
24321     }
24322 
24323     if (err == MP_OKAY) {
24324         err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
24325     }
24326     if (err == MP_OKAY) {
24327         err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
24328     }
24329     if (err == MP_OKAY) {
24330         err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
24331     }
24332     if (err == MP_OKAY) {
24333         t->infinity = 0;
24334         sp_384_proj_to_affine_12(t, tmp);
24335 
24336         XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
24337         s1->infinity = 0;
24338         XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
24339         s2->infinity = 0;
24340 
24341         /* table[0] = {0, 0, infinity} */
24342         XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
24343         /* table[1] = Affine version of 'a' in Montgomery form */
24344         XMEMCPY(table[1].x, t->x, sizeof(table->x));
24345         XMEMCPY(table[1].y, t->y, sizeof(table->y));
24346 
24347         for (i=1; i<8; i++) {
24348             sp_384_proj_point_dbl_n_12(t, 48, tmp);
24349             sp_384_proj_to_affine_12(t, tmp);
24350             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
24351             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
24352         }
24353 
24354         for (i=1; i<8; i++) {
24355             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
24356             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
24357             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
24358                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
24359                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
24360                 sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
24361                 sp_384_proj_to_affine_12(t, tmp);
24362                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
24363                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
24364             }
24365         }
24366     }
24367 
24368     sp_384_point_free_12(s2, 0, heap);
24369     sp_384_point_free_12(s1, 0, heap);
24370     sp_384_point_free_12( t, 0, heap);
24371 
24372     return err;
24373 }
24374 
24375 #endif /* FP_ECC */
24376 /* Multiply the point by the scalar and return the result.
24377  * If map is true then convert result to affine coordinates.
24378  *
24379  * r     Resulting point.
24380  * k     Scalar to multiply by.
24381  * map   Indicates whether to convert result to affine.
24382  * heap  Heap to use for allocation.
24383  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
24384  */
24385 static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
24386         const sp_table_entry_384* table, const sp_digit* k, int map, void* heap)
24387 {
24388 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
24389     sp_point_384 rtd;
24390     sp_point_384 pd;
24391     sp_digit td[2 * 12 * 6];
24392 #endif
24393     sp_point_384* rt;
24394     sp_point_384* p = NULL;
24395     sp_digit* t;
24396     int i, j;
24397     int y, x;
24398     int err;
24399 
24400     (void)g;
24401     (void)heap;
24402 
24403 
24404     err = sp_384_point_new_12(heap, rtd, rt);
24405     if (err == MP_OKAY) {
24406         err = sp_384_point_new_12(heap, pd, p);
24407     }
24408 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
24409     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
24410                            DYNAMIC_TYPE_ECC);
24411     if (t == NULL) {
24412         err = MEMORY_E;
24413     }
24414 #else
24415     t = td;
24416 #endif
24417 
24418     if (err == MP_OKAY) {
24419         XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
24420         XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
24421 
24422         y = 0;
24423         for (j=0,x=47; j<8; j++,x+=48) {
24424             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
24425         }
24426         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
24427         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
24428         rt->infinity = !y;
24429         for (i=46; i>=0; i--) {
24430             y = 0;
24431             for (j=0,x=i; j<8; j++,x+=48) {
24432                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
24433             }
24434 
24435             sp_384_proj_point_dbl_12(rt, rt, t);
24436             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
24437             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
24438             p->infinity = !y;
24439             sp_384_proj_point_add_qz1_12(rt, rt, p, t);
24440         }
24441 
24442         if (map != 0) {
24443             sp_384_map_12(r, rt, t);
24444         }
24445         else {
24446             XMEMCPY(r, rt, sizeof(sp_point_384));
24447         }
24448     }
24449 
24450 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
24451     if (t != NULL) {
24452         XFREE(t, heap, DYNAMIC_TYPE_ECC);
24453     }
24454 #endif
24455     sp_384_point_free_12(p, 0, heap);
24456     sp_384_point_free_12(rt, 0, heap);
24457 
24458     return err;
24459 }
24460 
24461 #ifdef FP_ECC
24462 #ifndef FP_ENTRIES
24463     #define FP_ENTRIES 16
24464 #endif
24465 
24466 typedef struct sp_cache_384_t {
24467     sp_digit x[12];
24468     sp_digit y[12];
24469     sp_table_entry_384 table[256];
24470     uint32_t cnt;
24471     int set;
24472 } sp_cache_384_t;
24473 
24474 static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
24475 static THREAD_LS_T int sp_cache_384_last = -1;
24476 static THREAD_LS_T int sp_cache_384_inited = 0;
24477 
24478 #ifndef HAVE_THREAD_LS
24479     static volatile int initCacheMutex_384 = 0;
24480     static wolfSSL_Mutex sp_cache_384_lock;
24481 #endif
24482 
24483 static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
24484 {
24485     int i, j;
24486     uint32_t least;
24487 
24488     if (sp_cache_384_inited == 0) {
24489         for (i=0; i<FP_ENTRIES; i++) {
24490             sp_cache_384[i].set = 0;
24491         }
24492         sp_cache_384_inited = 1;
24493     }
24494 
24495     /* Compare point with those in cache. */
24496     for (i=0; i<FP_ENTRIES; i++) {
24497         if (!sp_cache_384[i].set)
24498             continue;
24499 
24500         if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
24501                            sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
24502             sp_cache_384[i].cnt++;
24503             break;
24504         }
24505     }
24506 
24507     /* No match. */
24508     if (i == FP_ENTRIES) {
24509         /* Find empty entry. */
24510         i = (sp_cache_384_last + 1) % FP_ENTRIES;
24511         for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
24512             if (!sp_cache_384[i].set) {
24513                 break;
24514             }
24515         }
24516 
24517         /* Evict least used. */
24518         if (i == sp_cache_384_last) {
24519             least = sp_cache_384[0].cnt;
24520             for (j=1; j<FP_ENTRIES; j++) {
24521                 if (sp_cache_384[j].cnt < least) {
24522                     i = j;
24523                     least = sp_cache_384[i].cnt;
24524                 }
24525             }
24526         }
24527 
24528         XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
24529         XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
24530         sp_cache_384[i].set = 1;
24531         sp_cache_384[i].cnt = 1;
24532     }
24533 
24534     *cache = &sp_cache_384[i];
24535     sp_cache_384_last = i;
24536 }
24537 #endif /* FP_ECC */
24538 
24539 /* Multiply the base point of P384 by the scalar and return the result.
24540  * If map is true then convert result to affine coordinates.
24541  *
24542  * r     Resulting point.
24543  * g     Point to multiply.
24544  * k     Scalar to multiply by.
24545  * map   Indicates whether to convert result to affine.
24546  * heap  Heap to use for allocation.
24547  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
24548  */
24549 static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
24550         int map, void* heap)
24551 {
24552 #ifndef FP_ECC
24553     return sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
24554 #else
24555     sp_digit tmp[2 * 12 * 7];
24556     sp_cache_384_t* cache;
24557     int err = MP_OKAY;
24558 
24559 #ifndef HAVE_THREAD_LS
24560     if (initCacheMutex_384 == 0) {
24561          wc_InitMutex(&sp_cache_384_lock);
24562          initCacheMutex_384 = 1;
24563     }
24564     if (wc_LockMutex(&sp_cache_384_lock) != 0)
24565        err = BAD_MUTEX_E;
24566 #endif /* HAVE_THREAD_LS */
24567 
24568     if (err == MP_OKAY) {
24569         sp_ecc_get_cache_384(g, &cache);
24570         if (cache->cnt == 2)
24571             sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
24572 
24573 #ifndef HAVE_THREAD_LS
24574         wc_UnLockMutex(&sp_cache_384_lock);
24575 #endif /* HAVE_THREAD_LS */
24576 
24577         if (cache->cnt < 2) {
24578             err = sp_384_ecc_mulmod_fast_12(r, g, k, map, heap);
24579         }
24580         else {
24581             err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
24582                     map, heap);
24583         }
24584     }
24585 
24586     return err;
24587 #endif
24588 }
24589 
24590 #endif /* WOLFSSL_SP_SMALL */
24591 /* Multiply the point by the scalar and return the result.
24592  * If map is true then convert result to affine coordinates.
24593  *
24594  * km    Scalar to multiply by.
24595  * p     Point to multiply.
24596  * r     Resulting point.
24597  * map   Indicates whether to convert result to affine.
24598  * heap  Heap to use for allocation.
24599  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
24600  */
24601 int sp_ecc_mulmod_384(mp_int* km, ecc_point* gm, ecc_point* r, int map,
24602         void* heap)
24603 {
24604 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
24605     sp_point_384 p;
24606     sp_digit kd[12];
24607 #endif
24608     sp_point_384* point;
24609     sp_digit* k = NULL;
24610     int err = MP_OKAY;
24611 
24612     err = sp_384_point_new_12(heap, p, point);
24613 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
24614     if (err == MP_OKAY) {
24615         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
24616                                                               DYNAMIC_TYPE_ECC);
24617         if (k == NULL)
24618             err = MEMORY_E;
24619     }
24620 #else
24621     k = kd;
24622 #endif
24623     if (err == MP_OKAY) {
24624         sp_384_from_mp(k, 12, km);
24625         sp_384_point_from_ecc_point_12(point, gm);
24626 
24627             err = sp_384_ecc_mulmod_12(point, point, k, map, heap);
24628     }
24629     if (err == MP_OKAY) {
24630         err = sp_384_point_to_ecc_point_12(point, r);
24631     }
24632 
24633 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
24634     if (k != NULL) {
24635         XFREE(k, heap, DYNAMIC_TYPE_ECC);
24636     }
24637 #endif
24638     sp_384_point_free_12(point, 0, heap);
24639 
24640     return err;
24641 }
24642 
24643 #ifdef WOLFSSL_SP_SMALL
24644 static const sp_table_entry_384 p384_table[16] = {
24645     /* 0 */
24646     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
24647       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
24648     /* 1 */
24649     { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
24650         0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
24651       { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
24652         0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
24653     /* 2 */
24654     { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
24655         0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
24656       { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
24657         0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
24658     /* 3 */
24659     { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
24660         0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
24661       { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
24662         0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
24663     /* 4 */
24664     { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
24665         0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
24666       { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
24667         0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
24668     /* 5 */
24669     { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
24670         0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
24671       { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
24672         0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
24673     /* 6 */
24674     { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
24675         0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
24676       { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
24677         0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
24678     /* 7 */
24679     { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
24680         0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
24681       { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
24682         0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
24683     /* 8 */
24684     { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
24685         0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
24686       { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
24687         0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
24688     /* 9 */
24689     { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
24690         0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
24691       { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
24692         0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
24693     /* 10 */
24694     { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
24695         0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
24696       { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
24697         0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
24698     /* 11 */
24699     { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
24700         0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
24701       { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
24702         0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
24703     /* 12 */
24704     { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
24705         0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
24706       { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
24707         0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
24708     /* 13 */
24709     { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
24710         0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
24711       { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
24712         0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
24713     /* 14 */
24714     { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
24715         0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
24716       { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
24717         0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
24718     /* 15 */
24719     { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
24720         0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
24721       { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
24722         0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
24723 };
24724 
24725 /* Multiply the base point of P384 by the scalar and return the result.
24726  * If map is true then convert result to affine coordinates.
24727  *
24728  * r     Resulting point.
24729  * k     Scalar to multiply by.
24730  * map   Indicates whether to convert result to affine.
24731  * heap  Heap to use for allocation.
24732  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
24733  */
24734 static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
24735         int map, void* heap)
24736 {
24737     return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
24738                                       k, map, heap);
24739 }
24740 
24741 #else
24742 static const sp_table_entry_384 p384_table[256] = {
24743     /* 0 */
24744     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
24745       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
24746     /* 1 */
24747     { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
24748         0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
24749       { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
24750         0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
24751     /* 2 */
24752     { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
24753         0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
24754       { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
24755         0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
24756     /* 3 */
24757     { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
24758         0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
24759       { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
24760         0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
24761     /* 4 */
24762     { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
24763         0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
24764       { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
24765         0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
24766     /* 5 */
24767     { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
24768         0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
24769       { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
24770         0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
24771     /* 6 */
24772     { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
24773         0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
24774       { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
24775         0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
24776     /* 7 */
24777     { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
24778         0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
24779       { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
24780         0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
24781     /* 8 */
24782     { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
24783         0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
24784       { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
24785         0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
24786     /* 9 */
24787     { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
24788         0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
24789       { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
24790         0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
24791     /* 10 */
24792     { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
24793         0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
24794       { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
24795         0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
24796     /* 11 */
24797     { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
24798         0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
24799       { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
24800         0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
24801     /* 12 */
24802     { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
24803         0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
24804       { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
24805         0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
24806     /* 13 */
24807     { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
24808         0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
24809       { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
24810         0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
24811     /* 14 */
24812     { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
24813         0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
24814       { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
24815         0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
24816     /* 15 */
24817     { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
24818         0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
24819       { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
24820         0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
24821     /* 16 */
24822     { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
24823         0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
24824       { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
24825         0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
24826     /* 17 */
24827     { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
24828         0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
24829       { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
24830         0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
24831     /* 18 */
24832     { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
24833         0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
24834       { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
24835         0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
24836     /* 19 */
24837     { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
24838         0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
24839       { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
24840         0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
24841     /* 20 */
24842     { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
24843         0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
24844       { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
24845         0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
24846     /* 21 */
24847     { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
24848         0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
24849       { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
24850         0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
24851     /* 22 */
24852     { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
24853         0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
24854       { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
24855         0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
24856     /* 23 */
24857     { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
24858         0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
24859       { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
24860         0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
24861     /* 24 */
24862     { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
24863         0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
24864       { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
24865         0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
24866     /* 25 */
24867     { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
24868         0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
24869       { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
24870         0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
24871     /* 26 */
24872     { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
24873         0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
24874       { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
24875         0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
24876     /* 27 */
24877     { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
24878         0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
24879       { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
24880         0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
24881     /* 28 */
24882     { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
24883         0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
24884       { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
24885         0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
24886     /* 29 */
24887     { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
24888         0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
24889       { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
24890         0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
24891     /* 30 */
24892     { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
24893         0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
24894       { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
24895         0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
24896     /* 31 */
24897     { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
24898         0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
24899       { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
24900         0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
24901     /* 32 */
24902     { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
24903         0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
24904       { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
24905         0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
24906     /* 33 */
24907     { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
24908         0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
24909       { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
24910         0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
24911     /* 34 */
24912     { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
24913         0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
24914       { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
24915         0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
24916     /* 35 */
24917     { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
24918         0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
24919       { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
24920         0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
24921     /* 36 */
24922     { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
24923         0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
24924       { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
24925         0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
24926     /* 37 */
24927     { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
24928         0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
24929       { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
24930         0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
24931     /* 38 */
24932     { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
24933         0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
24934       { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
24935         0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
24936     /* 39 */
24937     { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
24938         0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
24939       { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
24940         0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
24941     /* 40 */
24942     { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
24943         0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
24944       { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
24945         0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
24946     /* 41 */
24947     { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
24948         0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
24949       { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
24950         0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
24951     /* 42 */
24952     { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
24953         0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
24954       { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
24955         0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
24956     /* 43 */
24957     { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
24958         0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
24959       { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
24960         0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
24961     /* 44 */
24962     { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
24963         0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
24964       { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
24965         0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
24966     /* 45 */
24967     { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
24968         0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
24969       { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
24970         0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
24971     /* 46 */
24972     { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
24973         0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
24974       { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
24975         0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
24976     /* 47 */
24977     { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
24978         0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
24979       { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
24980         0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
24981     /* 48 */
24982     { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
24983         0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
24984       { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
24985         0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
24986     /* 49 */
24987     { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
24988         0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
24989       { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
24990         0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
24991     /* 50 */
24992     { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
24993         0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
24994       { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
24995         0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
24996     /* 51 */
24997     { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
24998         0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
24999       { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
25000         0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
25001     /* 52 */
25002     { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
25003         0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
25004       { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
25005         0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
25006     /* 53 */
25007     { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
25008         0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
25009       { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
25010         0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
25011     /* 54 */
25012     { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
25013         0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
25014       { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
25015         0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
25016     /* 55 */
25017     { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
25018         0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
25019       { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
25020         0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
25021     /* 56 */
25022     { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
25023         0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
25024       { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
25025         0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
25026     /* 57 */
25027     { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
25028         0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
25029       { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
25030         0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
25031     /* 58 */
25032     { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
25033         0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
25034       { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
25035         0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
25036     /* 59 */
25037     { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
25038         0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
25039       { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
25040         0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
25041     /* 60 */
25042     { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
25043         0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
25044       { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
25045         0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
25046     /* 61 */
25047     { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
25048         0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
25049       { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
25050         0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
25051     /* 62 */
25052     { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
25053         0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
25054       { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
25055         0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
25056     /* 63 */
25057     { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
25058         0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
25059       { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
25060         0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
25061     /* 64 */
25062     { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
25063         0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
25064       { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
25065         0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
25066     /* 65 */
25067     { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
25068         0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
25069       { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
25070         0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
25071     /* 66 */
25072     { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
25073         0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
25074       { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
25075         0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
25076     /* 67 */
25077     { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
25078         0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
25079       { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
25080         0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
25081     /* 68 */
25082     { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
25083         0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
25084       { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
25085         0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
25086     /* 69 */
25087     { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
25088         0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
25089       { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
25090         0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
25091     /* 70 */
25092     { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
25093         0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
25094       { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
25095         0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
25096     /* 71 */
25097     { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
25098         0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
25099       { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
25100         0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
25101     /* 72 */
25102     { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
25103         0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
25104       { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
25105         0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
25106     /* 73 */
25107     { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
25108         0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
25109       { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
25110         0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
25111     /* 74 */
25112     { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
25113         0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
25114       { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
25115         0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
25116     /* 75 */
25117     { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
25118         0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
25119       { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
25120         0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
25121     /* 76 */
25122     { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
25123         0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
25124       { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
25125         0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
25126     /* 77 */
25127     { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
25128         0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
25129       { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
25130         0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
25131     /* 78 */
25132     { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
25133         0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
25134       { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
25135         0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
25136     /* 79 */
25137     { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
25138         0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
25139       { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
25140         0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
25141     /* 80 */
25142     { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
25143         0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
25144       { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
25145         0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
25146     /* 81 */
25147     { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
25148         0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
25149       { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
25150         0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
25151     /* 82 */
25152     { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
25153         0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
25154       { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
25155         0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
25156     /* 83 */
25157     { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
25158         0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
25159       { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
25160         0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
25161     /* 84 */
25162     { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
25163         0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
25164       { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
25165         0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
25166     /* 85 */
25167     { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
25168         0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
25169       { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
25170         0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
25171     /* 86 */
25172     { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
25173         0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
25174       { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
25175         0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
25176     /* 87 */
25177     { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
25178         0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
25179       { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
25180         0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
25181     /* 88 */
25182     { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
25183         0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
25184       { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
25185         0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
25186     /* 89 */
25187     { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
25188         0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
25189       { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
25190         0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
25191     /* 90 */
25192     { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
25193         0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
25194       { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
25195         0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
25196     /* 91 */
25197     { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
25198         0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
25199       { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
25200         0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
25201     /* 92 */
25202     { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
25203         0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
25204       { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
25205         0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
25206     /* 93 */
25207     { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
25208         0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
25209       { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
25210         0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
25211     /* 94 */
25212     { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
25213         0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
25214       { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
25215         0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
25216     /* 95 */
25217     { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
25218         0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
25219       { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
25220         0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
25221     /* 96 */
25222     { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
25223         0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
25224       { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
25225         0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
25226     /* 97 */
25227     { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
25228         0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
25229       { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
25230         0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
25231     /* 98 */
25232     { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
25233         0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
25234       { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
25235         0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
25236     /* 99 */
25237     { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
25238         0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
25239       { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
25240         0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
25241     /* 100 */
25242     { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
25243         0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
25244       { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
25245         0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
25246     /* 101 */
25247     { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
25248         0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
25249       { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
25250         0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
25251     /* 102 */
25252     { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
25253         0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
25254       { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
25255         0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
25256     /* 103 */
25257     { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
25258         0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
25259       { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
25260         0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
25261     /* 104 */
25262     { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
25263         0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
25264       { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
25265         0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
25266     /* 105 */
25267     { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
25268         0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
25269       { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
25270         0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
25271     /* 106 */
25272     { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
25273         0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
25274       { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
25275         0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
25276     /* 107 */
25277     { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
25278         0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
25279       { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
25280         0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
25281     /* 108 */
25282     { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
25283         0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
25284       { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
25285         0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
25286     /* 109 */
25287     { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
25288         0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
25289       { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
25290         0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
25291     /* 110 */
25292     { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
25293         0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
25294       { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
25295         0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
25296     /* 111 */
25297     { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
25298         0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
25299       { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
25300         0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
25301     /* 112 */
25302     { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
25303         0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
25304       { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
25305         0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
25306     /* 113 */
25307     { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
25308         0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
25309       { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
25310         0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
25311     /* 114 */
25312     { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
25313         0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
25314       { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
25315         0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
25316     /* 115 */
25317     { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
25318         0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
25319       { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
25320         0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
25321     /* 116 */
25322     { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
25323         0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
25324       { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
25325         0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
25326     /* 117 */
25327     { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
25328         0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
25329       { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
25330         0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
25331     /* 118 */
25332     { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
25333         0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
25334       { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
25335         0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
25336     /* 119 */
25337     { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
25338         0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
25339       { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
25340         0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
25341     /* 120 */
25342     { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
25343         0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
25344       { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
25345         0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
25346     /* 121 */
25347     { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
25348         0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
25349       { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
25350         0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
25351     /* 122 */
25352     { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
25353         0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
25354       { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
25355         0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
25356     /* 123 */
25357     { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
25358         0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
25359       { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
25360         0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
25361     /* 124 */
25362     { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
25363         0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
25364       { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
25365         0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
25366     /* 125 */
25367     { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
25368         0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
25369       { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
25370         0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
25371     /* 126 */
25372     { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
25373         0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
25374       { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
25375         0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
25376     /* 127 */
25377     { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
25378         0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
25379       { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
25380         0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
25381     /* 128 */
25382     { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
25383         0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
25384       { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
25385         0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
25386     /* 129 */
25387     { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
25388         0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
25389       { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
25390         0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
25391     /* 130 */
25392     { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
25393         0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
25394       { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
25395         0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
25396     /* 131 */
25397     { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
25398         0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
25399       { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
25400         0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
25401     /* 132 */
25402     { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
25403         0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
25404       { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
25405         0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
25406     /* 133 */
25407     { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
25408         0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
25409       { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
25410         0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
25411     /* 134 */
25412     { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
25413         0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
25414       { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
25415         0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
25416     /* 135 */
25417     { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
25418         0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
25419       { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
25420         0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
25421     /* 136 */
25422     { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
25423         0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
25424       { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
25425         0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
25426     /* 137 */
25427     { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
25428         0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
25429       { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
25430         0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
25431     /* 138 */
25432     { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
25433         0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
25434       { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
25435         0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
25436     /* 139 */
25437     { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
25438         0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
25439       { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
25440         0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
25441     /* 140 */
25442     { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
25443         0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
25444       { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
25445         0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
25446     /* 141 */
25447     { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
25448         0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
25449       { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
25450         0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
25451     /* 142 */
25452     { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
25453         0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
25454       { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
25455         0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
25456     /* 143 */
25457     { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
25458         0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
25459       { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
25460         0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
25461     /* 144 */
25462     { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
25463         0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
25464       { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
25465         0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
25466     /* 145 */
25467     { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
25468         0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
25469       { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
25470         0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
25471     /* 146 */
25472     { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
25473         0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
25474       { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
25475         0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
25476     /* 147 */
25477     { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
25478         0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
25479       { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
25480         0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
25481     /* 148 */
25482     { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
25483         0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
25484       { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
25485         0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
25486     /* 149 */
25487     { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
25488         0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
25489       { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
25490         0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
25491     /* 150 */
25492     { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
25493         0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
25494       { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
25495         0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
25496     /* 151 */
25497     { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
25498         0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
25499       { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
25500         0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
25501     /* 152 */
25502     { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
25503         0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
25504       { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
25505         0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
25506     /* 153 */
25507     { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
25508         0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
25509       { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
25510         0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
25511     /* 154 */
25512     { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
25513         0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
25514       { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
25515         0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
25516     /* 155 */
25517     { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
25518         0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
25519       { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
25520         0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
25521     /* 156 */
25522     { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
25523         0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
25524       { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
25525         0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
25526     /* 157 */
25527     { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
25528         0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
25529       { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
25530         0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
25531     /* 158 */
25532     { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
25533         0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
25534       { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
25535         0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
25536     /* 159 */
25537     { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
25538         0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
25539       { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
25540         0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
25541     /* 160 */
25542     { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
25543         0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
25544       { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
25545         0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
25546     /* 161 */
25547     { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
25548         0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
25549       { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
25550         0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
25551     /* 162 */
25552     { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
25553         0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
25554       { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
25555         0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
25556     /* 163 */
25557     { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
25558         0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
25559       { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
25560         0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
25561     /* 164 */
25562     { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
25563         0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
25564       { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
25565         0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
25566     /* 165 */
25567     { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
25568         0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
25569       { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
25570         0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
25571     /* 166 */
25572     { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
25573         0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
25574       { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
25575         0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
25576     /* 167 */
25577     { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
25578         0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
25579       { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
25580         0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
25581     /* 168 */
25582     { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
25583         0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
25584       { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
25585         0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
25586     /* 169 */
25587     { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
25588         0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
25589       { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
25590         0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
25591     /* 170 */
25592     { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
25593         0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
25594       { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
25595         0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
25596     /* 171 */
25597     { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
25598         0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
25599       { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
25600         0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
25601     /* 172 */
25602     { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
25603         0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
25604       { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
25605         0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
25606     /* 173 */
25607     { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
25608         0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
25609       { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
25610         0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
25611     /* 174 */
25612     { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
25613         0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
25614       { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
25615         0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
25616     /* 175 */
25617     { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
25618         0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
25619       { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
25620         0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
25621     /* 176 */
25622     { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
25623         0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
25624       { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
25625         0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
25626     /* 177 */
25627     { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
25628         0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
25629       { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
25630         0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
25631     /* 178 */
25632     { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
25633         0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
25634       { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
25635         0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
25636     /* 179 */
25637     { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
25638         0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
25639       { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
25640         0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
25641     /* 180 */
25642     { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
25643         0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
25644       { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
25645         0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
25646     /* 181 */
25647     { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
25648         0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
25649       { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
25650         0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
25651     /* 182 */
25652     { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
25653         0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
25654       { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
25655         0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
25656     /* 183 */
25657     { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
25658         0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
25659       { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
25660         0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
25661     /* 184 */
25662     { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
25663         0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
25664       { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
25665         0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
25666     /* 185 */
25667     { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
25668         0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
25669       { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
25670         0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
25671     /* 186 */
25672     { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
25673         0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
25674       { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
25675         0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
25676     /* 187 */
25677     { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
25678         0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
25679       { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
25680         0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
25681     /* 188 */
25682     { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
25683         0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
25684       { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
25685         0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
25686     /* 189 */
25687     { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
25688         0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
25689       { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
25690         0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
25691     /* 190 */
25692     { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
25693         0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
25694       { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
25695         0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
25696     /* 191 */
25697     { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
25698         0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
25699       { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
25700         0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
25701     /* 192 */
25702     { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
25703         0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
25704       { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
25705         0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
25706     /* 193 */
25707     { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
25708         0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
25709       { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
25710         0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
25711     /* 194 */
25712     { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
25713         0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
25714       { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
25715         0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
25716     /* 195 */
25717     { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
25718         0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
25719       { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
25720         0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
25721     /* 196 */
25722     { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
25723         0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
25724       { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
25725         0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
25726     /* 197 */
25727     { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
25728         0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
25729       { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
25730         0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
25731     /* 198 */
25732     { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
25733         0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
25734       { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
25735         0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
25736     /* 199 */
25737     { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
25738         0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
25739       { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
25740         0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
25741     /* 200 */
25742     { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
25743         0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
25744       { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
25745         0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
25746     /* 201 */
25747     { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
25748         0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
25749       { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
25750         0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
25751     /* 202 */
25752     { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
25753         0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
25754       { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
25755         0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
25756     /* 203 */
25757     { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
25758         0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
25759       { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
25760         0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
25761     /* 204 */
25762     { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
25763         0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
25764       { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
25765         0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
25766     /* 205 */
25767     { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
25768         0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
25769       { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
25770         0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
25771     /* 206 */
25772     { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
25773         0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
25774       { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
25775         0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
25776     /* 207 */
25777     { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
25778         0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
25779       { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
25780         0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
25781     /* 208 */
25782     { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
25783         0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
25784       { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
25785         0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
25786     /* 209 */
25787     { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
25788         0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
25789       { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
25790         0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
25791     /* 210 */
25792     { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
25793         0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
25794       { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
25795         0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
25796     /* 211 */
25797     { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
25798         0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
25799       { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
25800         0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
25801     /* 212 */
25802     { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
25803         0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
25804       { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
25805         0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
25806     /* 213 */
25807     { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
25808         0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
25809       { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
25810         0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
25811     /* 214 */
25812     { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
25813         0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
25814       { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
25815         0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
25816     /* 215 */
25817     { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
25818         0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
25819       { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
25820         0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
25821     /* 216 */
25822     { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
25823         0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
25824       { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
25825         0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
25826     /* 217 */
25827     { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
25828         0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
25829       { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
25830         0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
25831     /* 218 */
25832     { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
25833         0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
25834       { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
25835         0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
25836     /* 219 */
25837     { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
25838         0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
25839       { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
25840         0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
25841     /* 220 */
25842     { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
25843         0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
25844       { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
25845         0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
25846     /* 221 */
25847     { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
25848         0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
25849       { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
25850         0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
25851     /* 222 */
25852     { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
25853         0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
25854       { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
25855         0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
25856     /* 223 */
25857     { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
25858         0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
25859       { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
25860         0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
25861     /* 224 */
25862     { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
25863         0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
25864       { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
25865         0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
25866     /* 225 */
25867     { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
25868         0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
25869       { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
25870         0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
25871     /* 226 */
25872     { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
25873         0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
25874       { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
25875         0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
25876     /* 227 */
25877     { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
25878         0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
25879       { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
25880         0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
25881     /* 228 */
25882     { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
25883         0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
25884       { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
25885         0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
25886     /* 229 */
25887     { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
25888         0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
25889       { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
25890         0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
25891     /* 230 */
25892     { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
25893         0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
25894       { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
25895         0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
25896     /* 231 */
25897     { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
25898         0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
25899       { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
25900         0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
25901     /* 232 */
25902     { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
25903         0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
25904       { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
25905         0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
25906     /* 233 */
25907     { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
25908         0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
25909       { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
25910         0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
25911     /* 234 */
25912     { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
25913         0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
25914       { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
25915         0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
25916     /* 235 */
25917     { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
25918         0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
25919       { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
25920         0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
25921     /* 236 */
25922     { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
25923         0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
25924       { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
25925         0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
25926     /* 237 */
25927     { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
25928         0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
25929       { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
25930         0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
25931     /* 238 */
25932     { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
25933         0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
25934       { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
25935         0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
25936     /* 239 */
25937     { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
25938         0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
25939       { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
25940         0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
25941     /* 240 */
25942     { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
25943         0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
25944       { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
25945         0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
25946     /* 241 */
25947     { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
25948         0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
25949       { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
25950         0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
25951     /* 242 */
25952     { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
25953         0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
25954       { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
25955         0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
25956     /* 243 */
25957     { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
25958         0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
25959       { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
25960         0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
25961     /* 244 */
25962     { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
25963         0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
25964       { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
25965         0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
25966     /* 245 */
25967     { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
25968         0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
25969       { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
25970         0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
25971     /* 246 */
25972     { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
25973         0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
25974       { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
25975         0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
25976     /* 247 */
25977     { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
25978         0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
25979       { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
25980         0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
25981     /* 248 */
25982     { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
25983         0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
25984       { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
25985         0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
25986     /* 249 */
25987     { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
25988         0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
25989       { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
25990         0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
25991     /* 250 */
25992     { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
25993         0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
25994       { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
25995         0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
25996     /* 251 */
25997     { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
25998         0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
25999       { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
26000         0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
26001     /* 252 */
26002     { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
26003         0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
26004       { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
26005         0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
26006     /* 253 */
26007     { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
26008         0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
26009       { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
26010         0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
26011     /* 254 */
26012     { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
26013         0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
26014       { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
26015         0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
26016     /* 255 */
26017     { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
26018         0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
26019       { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
26020         0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
26021 };
26022 
26023 /* Multiply the base point of P384 by the scalar and return the result.
26024  * If map is true then convert result to affine coordinates.
26025  *
26026  * r     Resulting point.
26027  * k     Scalar to multiply by.
26028  * map   Indicates whether to convert result to affine.
26029  * heap  Heap to use for allocation.
26030  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
26031  */
26032 static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
26033         int map, void* heap)
26034 {
26035     return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
26036                                       k, map, heap);
26037 }
26038 
26039 #endif
26040 
26041 /* Multiply the base point of P384 by the scalar and return the result.
26042  * If map is true then convert result to affine coordinates.
26043  *
26044  * km    Scalar to multiply by.
26045  * r     Resulting point.
26046  * map   Indicates whether to convert result to affine.
26047  * heap  Heap to use for allocation.
26048  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
26049  */
26050 int sp_ecc_mulmod_base_384(mp_int* km, ecc_point* r, int map, void* heap)
26051 {
26052 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
26053     sp_point_384 p;
26054     sp_digit kd[12];
26055 #endif
26056     sp_point_384* point;
26057     sp_digit* k = NULL;
26058     int err = MP_OKAY;
26059 
26060     err = sp_384_point_new_12(heap, p, point);
26061 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26062     if (err == MP_OKAY) {
26063         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
26064                                                               DYNAMIC_TYPE_ECC);
26065         if (k == NULL) {
26066             err = MEMORY_E;
26067         }
26068     }
26069 #else
26070     k = kd;
26071 #endif
26072     if (err == MP_OKAY) {
26073         sp_384_from_mp(k, 12, km);
26074 
26075             err = sp_384_ecc_mulmod_base_12(point, k, map, heap);
26076     }
26077     if (err == MP_OKAY) {
26078         err = sp_384_point_to_ecc_point_12(point, r);
26079     }
26080 
26081 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26082     if (k != NULL) {
26083         XFREE(k, heap, DYNAMIC_TYPE_ECC);
26084     }
26085 #endif
26086     sp_384_point_free_12(point, 0, heap);
26087 
26088     return err;
26089 }
26090 
26091 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
26092                                                         defined(HAVE_ECC_VERIFY)
26093 /* Returns 1 if the number of zero.
26094  * Implementation is constant time.
26095  *
26096  * a  Number to check.
26097  * returns 1 if the number is zero and 0 otherwise.
26098  */
26099 static int sp_384_iszero_12(const sp_digit* a)
26100 {
26101     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
26102             a[8] | a[9] | a[10] | a[11]) == 0;
26103 }
26104 
26105 #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
26106 /* Add 1 to a. (a = a + 1)
26107  *
26108  * a  A single precision integer.
26109  */
26110 SP_NOINLINE static void sp_384_add_one_12(sp_digit* a)
26111 {
26112     __asm__ __volatile__ (
26113         "mov    r2, #1\n\t"
26114         "ldr    r1, [%[a], #0]\n\t"
26115         "add    r1, r2\n\t"
26116         "mov    r2, #0\n\t"
26117         "str    r1, [%[a], #0]\n\t"
26118         "ldr    r1, [%[a], #4]\n\t"
26119         "adc    r1, r2\n\t"
26120         "str    r1, [%[a], #4]\n\t"
26121         "ldr    r1, [%[a], #8]\n\t"
26122         "adc    r1, r2\n\t"
26123         "str    r1, [%[a], #8]\n\t"
26124         "ldr    r1, [%[a], #12]\n\t"
26125         "adc    r1, r2\n\t"
26126         "str    r1, [%[a], #12]\n\t"
26127         "ldr    r1, [%[a], #16]\n\t"
26128         "adc    r1, r2\n\t"
26129         "str    r1, [%[a], #16]\n\t"
26130         "ldr    r1, [%[a], #20]\n\t"
26131         "adc    r1, r2\n\t"
26132         "str    r1, [%[a], #20]\n\t"
26133         "ldr    r1, [%[a], #24]\n\t"
26134         "adc    r1, r2\n\t"
26135         "str    r1, [%[a], #24]\n\t"
26136         "ldr    r1, [%[a], #28]\n\t"
26137         "adc    r1, r2\n\t"
26138         "str    r1, [%[a], #28]\n\t"
26139         "ldr    r1, [%[a], #32]\n\t"
26140         "adc    r1, r2\n\t"
26141         "str    r1, [%[a], #32]\n\t"
26142         "ldr    r1, [%[a], #36]\n\t"
26143         "adc    r1, r2\n\t"
26144         "str    r1, [%[a], #36]\n\t"
26145         "ldr    r1, [%[a], #40]\n\t"
26146         "adc    r1, r2\n\t"
26147         "str    r1, [%[a], #40]\n\t"
26148         "ldr    r1, [%[a], #44]\n\t"
26149         "adc    r1, r2\n\t"
26150         "str    r1, [%[a], #44]\n\t"
26151         :
26152         : [a] "r" (a)
26153         : "memory", "r1", "r2"
26154     );
26155 }
26156 
26157 /* Read big endian unsigned byte array into r.
26158  *
26159  * r  A single precision integer.
26160  * size  Maximum number of bytes to convert
26161  * a  Byte array.
26162  * n  Number of bytes in array to read.
26163  */
26164 static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
26165 {
26166     int i, j = 0;
26167     word32 s = 0;
26168 
26169     r[0] = 0;
26170     for (i = n-1; i >= 0; i--) {
26171         r[j] |= (((sp_digit)a[i]) << s);
26172         if (s >= 24U) {
26173             r[j] &= 0xffffffff;
26174             s = 32U - s;
26175             if (j + 1 >= size) {
26176                 break;
26177             }
26178             r[++j] = (sp_digit)a[i] >> s;
26179             s = 8U - s;
26180         }
26181         else {
26182             s += 8U;
26183         }
26184     }
26185 
26186     for (j++; j < size; j++) {
26187         r[j] = 0;
26188     }
26189 }
26190 
26191 /* Generates a scalar that is in the range 1..order-1.
26192  *
26193  * rng  Random number generator.
26194  * k    Scalar value.
26195  * returns RNG failures, MEMORY_E when memory allocation fails and
26196  * MP_OKAY on success.
26197  */
26198 static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
26199 {
26200     int err;
26201     byte buf[48];
26202 
26203     do {
26204         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
26205         if (err == 0) {
26206             sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
26207             if (sp_384_cmp_12(k, p384_order2) < 0) {
26208                 sp_384_add_one_12(k);
26209                 break;
26210             }
26211         }
26212     }
26213     while (err == 0);
26214 
26215     return err;
26216 }
26217 
26218 /* Makes a random EC key pair.
26219  *
26220  * rng   Random number generator.
26221  * priv  Generated private value.
26222  * pub   Generated public point.
26223  * heap  Heap to use for allocation.
26224  * returns ECC_INF_E when the point does not have the correct order, RNG
26225  * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
26226  */
26227 int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
26228 {
26229 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
26230     sp_point_384 p;
26231     sp_digit kd[12];
26232 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
26233     sp_point_384 inf;
26234 #endif
26235 #endif
26236     sp_point_384* point;
26237     sp_digit* k = NULL;
26238 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
26239     sp_point_384* infinity;
26240 #endif
26241     int err;
26242 
26243     (void)heap;
26244 
26245     err = sp_384_point_new_12(heap, p, point);
26246 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
26247     if (err == MP_OKAY) {
26248         err = sp_384_point_new_12(heap, inf, infinity);
26249     }
26250 #endif
26251 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26252     if (err == MP_OKAY) {
26253         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
26254                                                               DYNAMIC_TYPE_ECC);
26255         if (k == NULL) {
26256             err = MEMORY_E;
26257         }
26258     }
26259 #else
26260     k = kd;
26261 #endif
26262 
26263     if (err == MP_OKAY) {
26264         err = sp_384_ecc_gen_k_12(rng, k);
26265     }
26266     if (err == MP_OKAY) {
26267             err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
26268     }
26269 
26270 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
26271     if (err == MP_OKAY) {
26272             err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, NULL);
26273     }
26274     if (err == MP_OKAY) {
26275         if ((sp_384_iszero_12(point->x) == 0) || (sp_384_iszero_12(point->y) == 0)) {
26276             err = ECC_INF_E;
26277         }
26278     }
26279 #endif
26280 
26281     if (err == MP_OKAY) {
26282         err = sp_384_to_mp(k, priv);
26283     }
26284     if (err == MP_OKAY) {
26285         err = sp_384_point_to_ecc_point_12(point, pub);
26286     }
26287 
26288 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26289     if (k != NULL) {
26290         XFREE(k, heap, DYNAMIC_TYPE_ECC);
26291     }
26292 #endif
26293 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
26294     sp_384_point_free_12(infinity, 1, heap);
26295 #endif
26296     sp_384_point_free_12(point, 1, heap);
26297 
26298     return err;
26299 }
26300 
26301 #ifdef HAVE_ECC_DHE
26302 /* Write r as big endian to byte array.
26303  * Fixed length number of bytes written: 48
26304  *
26305  * r  A single precision integer.
26306  * a  Byte array.
26307  */
26308 static void sp_384_to_bin(sp_digit* r, byte* a)
26309 {
26310     int i, j, s = 0, b;
26311 
26312     j = 384 / 8 - 1;
26313     a[j] = 0;
26314     for (i=0; i<12 && j>=0; i++) {
26315         b = 0;
26316         /* lint allow cast of mismatch sp_digit and int */
26317         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
26318         b += 8 - s;
26319         if (j < 0) {
26320             break;
26321         }
26322         while (b < 32) {
26323             a[j--] = (byte)(r[i] >> b);
26324             b += 8;
26325             if (j < 0) {
26326                 break;
26327             }
26328         }
26329         s = 8 - (b - 32);
26330         if (j >= 0) {
26331             a[j] = 0;
26332         }
26333         if (s != 0) {
26334             j++;
26335         }
26336     }
26337 }
26338 
26339 /* Multiply the point by the scalar and serialize the X ordinate.
26340  * The number is 0 padded to maximum size on output.
26341  *
26342  * priv    Scalar to multiply the point by.
26343  * pub     Point to multiply.
26344  * out     Buffer to hold X ordinate.
26345  * outLen  On entry, size of the buffer in bytes.
26346  *         On exit, length of data in buffer in bytes.
26347  * heap    Heap to use for allocation.
26348  * returns BUFFER_E if the buffer is to small for output size,
26349  * MEMORY_E when memory allocation fails and MP_OKAY on success.
26350  */
26351 int sp_ecc_secret_gen_384(mp_int* priv, ecc_point* pub, byte* out,
26352                           word32* outLen, void* heap)
26353 {
26354 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
26355     sp_point_384 p;
26356     sp_digit kd[12];
26357 #endif
26358     sp_point_384* point = NULL;
26359     sp_digit* k = NULL;
26360     int err = MP_OKAY;
26361 
26362     if (*outLen < 48U) {
26363         err = BUFFER_E;
26364     }
26365 
26366     if (err == MP_OKAY) {
26367         err = sp_384_point_new_12(heap, p, point);
26368     }
26369 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26370     if (err == MP_OKAY) {
26371         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
26372                                                               DYNAMIC_TYPE_ECC);
26373         if (k == NULL)
26374             err = MEMORY_E;
26375     }
26376 #else
26377     k = kd;
26378 #endif
26379 
26380     if (err == MP_OKAY) {
26381         sp_384_from_mp(k, 12, priv);
26382         sp_384_point_from_ecc_point_12(point, pub);
26383             err = sp_384_ecc_mulmod_12(point, point, k, 1, heap);
26384     }
26385     if (err == MP_OKAY) {
26386         sp_384_to_bin(point->x, out);
26387         *outLen = 48;
26388     }
26389 
26390 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26391     if (k != NULL) {
26392         XFREE(k, heap, DYNAMIC_TYPE_ECC);
26393     }
26394 #endif
26395     sp_384_point_free_12(point, 0, heap);
26396 
26397     return err;
26398 }
26399 #endif /* HAVE_ECC_DHE */
26400 
26401 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
26402 #endif
26403 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
26404 #ifdef WOLFSSL_SP_SMALL
26405 /* Sub b from a into a. (a -= b)
26406  *
26407  * a  A single precision integer.
26408  * b  A single precision integer.
26409  */
26410 SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
26411         const sp_digit* b)
26412 {
26413     sp_digit c = 0;
26414     __asm__ __volatile__ (
26415         "mov    r7, %[a]\n\t"
26416         "add    r7, #48\n\t"
26417         "\n1:\n\t"
26418         "mov    r5, #0\n\t"
26419         "sub    r5, %[c]\n\t"
26420         "ldr    r3, [%[a]]\n\t"
26421         "ldr    r4, [%[a], #4]\n\t"
26422         "ldr    r5, [%[b]]\n\t"
26423         "ldr    r6, [%[b], #4]\n\t"
26424         "sbc    r3, r5\n\t"
26425         "sbc    r4, r6\n\t"
26426         "str    r3, [%[a]]\n\t"
26427         "str    r4, [%[a], #4]\n\t"
26428         "sbc    %[c], %[c]\n\t"
26429         "add    %[a], #8\n\t"
26430         "add    %[b], #8\n\t"
26431         "cmp    %[a], r7\n\t"
26432         "bne    1b\n\t"
26433         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
26434         :
26435         : "memory", "r3", "r4", "r5", "r6", "r7"
26436     );
26437 
26438     return c;
26439 }
26440 
26441 #else
26442 /* Sub b from a into r. (r = a - b)
26443  *
26444  * r  A single precision integer.
26445  * a  A single precision integer.
26446  * b  A single precision integer.
26447  */
26448 SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
26449         const sp_digit* b)
26450 {
26451     sp_digit c = 0;
26452 
26453     __asm__ __volatile__ (
26454         "ldr    r3, [%[a], #0]\n\t"
26455         "ldr    r4, [%[a], #4]\n\t"
26456         "ldr    r5, [%[b], #0]\n\t"
26457         "ldr    r6, [%[b], #4]\n\t"
26458         "sub    r3, r5\n\t"
26459         "sbc    r4, r6\n\t"
26460         "str    r3, [%[a], #0]\n\t"
26461         "str    r4, [%[a], #4]\n\t"
26462         "ldr    r3, [%[a], #8]\n\t"
26463         "ldr    r4, [%[a], #12]\n\t"
26464         "ldr    r5, [%[b], #8]\n\t"
26465         "ldr    r6, [%[b], #12]\n\t"
26466         "sbc    r3, r5\n\t"
26467         "sbc    r4, r6\n\t"
26468         "str    r3, [%[a], #8]\n\t"
26469         "str    r4, [%[a], #12]\n\t"
26470         "ldr    r3, [%[a], #16]\n\t"
26471         "ldr    r4, [%[a], #20]\n\t"
26472         "ldr    r5, [%[b], #16]\n\t"
26473         "ldr    r6, [%[b], #20]\n\t"
26474         "sbc    r3, r5\n\t"
26475         "sbc    r4, r6\n\t"
26476         "str    r3, [%[a], #16]\n\t"
26477         "str    r4, [%[a], #20]\n\t"
26478         "ldr    r3, [%[a], #24]\n\t"
26479         "ldr    r4, [%[a], #28]\n\t"
26480         "ldr    r5, [%[b], #24]\n\t"
26481         "ldr    r6, [%[b], #28]\n\t"
26482         "sbc    r3, r5\n\t"
26483         "sbc    r4, r6\n\t"
26484         "str    r3, [%[a], #24]\n\t"
26485         "str    r4, [%[a], #28]\n\t"
26486         "ldr    r3, [%[a], #32]\n\t"
26487         "ldr    r4, [%[a], #36]\n\t"
26488         "ldr    r5, [%[b], #32]\n\t"
26489         "ldr    r6, [%[b], #36]\n\t"
26490         "sbc    r3, r5\n\t"
26491         "sbc    r4, r6\n\t"
26492         "str    r3, [%[a], #32]\n\t"
26493         "str    r4, [%[a], #36]\n\t"
26494         "ldr    r3, [%[a], #40]\n\t"
26495         "ldr    r4, [%[a], #44]\n\t"
26496         "ldr    r5, [%[b], #40]\n\t"
26497         "ldr    r6, [%[b], #44]\n\t"
26498         "sbc    r3, r5\n\t"
26499         "sbc    r4, r6\n\t"
26500         "str    r3, [%[a], #40]\n\t"
26501         "str    r4, [%[a], #44]\n\t"
26502         "sbc    %[c], %[c]\n\t"
26503         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
26504         :
26505         : "memory", "r3", "r4", "r5", "r6"
26506     );
26507 
26508     return c;
26509 }
26510 
26511 #endif /* WOLFSSL_SP_SMALL */
26512 /* Mul a by digit b into r. (r = a * b)
26513  *
26514  * r  A single precision integer.
26515  * a  A single precision integer.
26516  * b  A single precision digit.
26517  */
26518 SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
26519         sp_digit b)
26520 {
26521     __asm__ __volatile__ (
26522         "mov    r6, #48\n\t"
26523         "add    r6, %[a]\n\t"
26524         "mov    r8, %[r]\n\t"
26525         "mov    r9, r6\n\t"
26526         "mov    r3, #0\n\t"
26527         "mov    r4, #0\n\t"
26528         "1:\n\t"
26529         "mov    %[r], #0\n\t"
26530         "mov    r5, #0\n\t"
26531         "# A[] * B\n\t"
26532         "ldr    r6, [%[a]]\n\t"
26533         "lsl    r6, r6, #16\n\t"
26534         "lsl    r7, %[b], #16\n\t"
26535         "lsr    r6, r6, #16\n\t"
26536         "lsr    r7, r7, #16\n\t"
26537         "mul    r7, r6\n\t"
26538         "add    r3, r7\n\t"
26539         "adc    r4, %[r]\n\t"
26540         "adc    r5, %[r]\n\t"
26541         "lsr    r7, %[b], #16\n\t"
26542         "mul    r6, r7\n\t"
26543         "lsr    r7, r6, #16\n\t"
26544         "lsl    r6, r6, #16\n\t"
26545         "add    r3, r6\n\t"
26546         "adc    r4, r7\n\t"
26547         "adc    r5, %[r]\n\t"
26548         "ldr    r6, [%[a]]\n\t"
26549         "lsr    r6, r6, #16\n\t"
26550         "lsr    r7, %[b], #16\n\t"
26551         "mul    r7, r6\n\t"
26552         "add    r4, r7\n\t"
26553         "adc    r5, %[r]\n\t"
26554         "lsl    r7, %[b], #16\n\t"
26555         "lsr    r7, r7, #16\n\t"
26556         "mul    r6, r7\n\t"
26557         "lsr    r7, r6, #16\n\t"
26558         "lsl    r6, r6, #16\n\t"
26559         "add    r3, r6\n\t"
26560         "adc    r4, r7\n\t"
26561         "adc    r5, %[r]\n\t"
26562         "# A[] * B - Done\n\t"
26563         "mov    %[r], r8\n\t"
26564         "str    r3, [%[r]]\n\t"
26565         "mov    r3, r4\n\t"
26566         "mov    r4, r5\n\t"
26567         "add    %[r], #4\n\t"
26568         "add    %[a], #4\n\t"
26569         "mov    r8, %[r]\n\t"
26570         "cmp    %[a], r9\n\t"
26571         "blt    1b\n\t"
26572         "str    r3, [%[r]]\n\t"
26573         : [r] "+r" (r), [a] "+r" (a)
26574         : [b] "r" (b)
26575         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
26576     );
26577 }
26578 
26579 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
26580  *
26581  * d1   The high order half of the number to divide.
26582  * d0   The low order half of the number to divide.
26583  * div  The dividend.
26584  * returns the result of the division.
26585  *
26586  * Note that this is an approximate div. It may give an answer 1 larger.
26587  */
26588 SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0,
26589         sp_digit div)
26590 {
26591     sp_digit r = 0;
26592 
26593     __asm__ __volatile__ (
26594         "lsr    r5, %[div], #1\n\t"
26595         "add    r5, #1\n\t"
26596         "mov    r8, %[d0]\n\t"
26597         "mov    r9, %[d1]\n\t"
26598         "# Do top 32\n\t"
26599         "mov    r6, r5\n\t"
26600         "sub    r6, %[d1]\n\t"
26601         "sbc    r6, r6\n\t"
26602         "add    %[r], %[r]\n\t"
26603         "sub    %[r], r6\n\t"
26604         "and    r6, r5\n\t"
26605         "sub    %[d1], r6\n\t"
26606         "# Next 30 bits\n\t"
26607         "mov    r4, #29\n\t"
26608         "1:\n\t"
26609         "lsl    %[d0], %[d0], #1\n\t"
26610         "adc    %[d1], %[d1]\n\t"
26611         "mov    r6, r5\n\t"
26612         "sub    r6, %[d1]\n\t"
26613         "sbc    r6, r6\n\t"
26614         "add    %[r], %[r]\n\t"
26615         "sub    %[r], r6\n\t"
26616         "and    r6, r5\n\t"
26617         "sub    %[d1], r6\n\t"
26618         "sub    r4, #1\n\t"
26619         "bpl    1b\n\t"
26620         "mov    r7, #0\n\t"
26621         "add    %[r], %[r]\n\t"
26622         "add    %[r], #1\n\t"
26623         "# r * div - Start\n\t"
26624         "lsl    %[d1], %[r], #16\n\t"
26625         "lsl    r4, %[div], #16\n\t"
26626         "lsr    %[d1], %[d1], #16\n\t"
26627         "lsr    r4, r4, #16\n\t"
26628         "mul    r4, %[d1]\n\t"
26629         "lsr    r6, %[div], #16\n\t"
26630         "mul    %[d1], r6\n\t"
26631         "lsr    r5, %[d1], #16\n\t"
26632         "lsl    %[d1], %[d1], #16\n\t"
26633         "add    r4, %[d1]\n\t"
26634         "adc    r5, r7\n\t"
26635         "lsr    %[d1], %[r], #16\n\t"
26636         "mul    r6, %[d1]\n\t"
26637         "add    r5, r6\n\t"
26638         "lsl    r6, %[div], #16\n\t"
26639         "lsr    r6, r6, #16\n\t"
26640         "mul    %[d1], r6\n\t"
26641         "lsr    r6, %[d1], #16\n\t"
26642         "lsl    %[d1], %[d1], #16\n\t"
26643         "add    r4, %[d1]\n\t"
26644         "adc    r5, r6\n\t"
26645         "# r * div - Done\n\t"
26646         "mov    %[d1], r8\n\t"
26647         "sub    %[d1], r4\n\t"
26648         "mov    r4, %[d1]\n\t"
26649         "mov    %[d1], r9\n\t"
26650         "sbc    %[d1], r5\n\t"
26651         "mov    r5, %[d1]\n\t"
26652         "add    %[r], r5\n\t"
26653         "# r * div - Start\n\t"
26654         "lsl    %[d1], %[r], #16\n\t"
26655         "lsl    r4, %[div], #16\n\t"
26656         "lsr    %[d1], %[d1], #16\n\t"
26657         "lsr    r4, r4, #16\n\t"
26658         "mul    r4, %[d1]\n\t"
26659         "lsr    r6, %[div], #16\n\t"
26660         "mul    %[d1], r6\n\t"
26661         "lsr    r5, %[d1], #16\n\t"
26662         "lsl    %[d1], %[d1], #16\n\t"
26663         "add    r4, %[d1]\n\t"
26664         "adc    r5, r7\n\t"
26665         "lsr    %[d1], %[r], #16\n\t"
26666         "mul    r6, %[d1]\n\t"
26667         "add    r5, r6\n\t"
26668         "lsl    r6, %[div], #16\n\t"
26669         "lsr    r6, r6, #16\n\t"
26670         "mul    %[d1], r6\n\t"
26671         "lsr    r6, %[d1], #16\n\t"
26672         "lsl    %[d1], %[d1], #16\n\t"
26673         "add    r4, %[d1]\n\t"
26674         "adc    r5, r6\n\t"
26675         "# r * div - Done\n\t"
26676         "mov    %[d1], r8\n\t"
26677         "mov    r6, r9\n\t"
26678         "sub    r4, %[d1], r4\n\t"
26679         "sbc    r6, r5\n\t"
26680         "mov    r5, r6\n\t"
26681         "add    %[r], r5\n\t"
26682         "# r * div - Start\n\t"
26683         "lsl    %[d1], %[r], #16\n\t"
26684         "lsl    r4, %[div], #16\n\t"
26685         "lsr    %[d1], %[d1], #16\n\t"
26686         "lsr    r4, r4, #16\n\t"
26687         "mul    r4, %[d1]\n\t"
26688         "lsr    r6, %[div], #16\n\t"
26689         "mul    %[d1], r6\n\t"
26690         "lsr    r5, %[d1], #16\n\t"
26691         "lsl    %[d1], %[d1], #16\n\t"
26692         "add    r4, %[d1]\n\t"
26693         "adc    r5, r7\n\t"
26694         "lsr    %[d1], %[r], #16\n\t"
26695         "mul    r6, %[d1]\n\t"
26696         "add    r5, r6\n\t"
26697         "lsl    r6, %[div], #16\n\t"
26698         "lsr    r6, r6, #16\n\t"
26699         "mul    %[d1], r6\n\t"
26700         "lsr    r6, %[d1], #16\n\t"
26701         "lsl    %[d1], %[d1], #16\n\t"
26702         "add    r4, %[d1]\n\t"
26703         "adc    r5, r6\n\t"
26704         "# r * div - Done\n\t"
26705         "mov    %[d1], r8\n\t"
26706         "mov    r6, r9\n\t"
26707         "sub    r4, %[d1], r4\n\t"
26708         "sbc    r6, r5\n\t"
26709         "mov    r5, r6\n\t"
26710         "add    %[r], r5\n\t"
26711         "mov    r6, %[div]\n\t"
26712         "sub    r6, r4\n\t"
26713         "sbc    r6, r6\n\t"
26714         "sub    %[r], r6\n\t"
26715         : [r] "+r" (r)
26716         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
26717         : "r4", "r5", "r7", "r6", "r8", "r9"
26718     );
26719     return r;
26720 }
26721 
26722 /* AND m into each word of a and store in r.
26723  *
26724  * r  A single precision integer.
26725  * a  A single precision integer.
26726  * m  Mask to AND against each digit.
26727  */
26728 static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
26729 {
26730 #ifdef WOLFSSL_SP_SMALL
26731     int i;
26732 
26733     for (i=0; i<12; i++) {
26734         r[i] = a[i] & m;
26735     }
26736 #else
26737     r[0] = a[0] & m;
26738     r[1] = a[1] & m;
26739     r[2] = a[2] & m;
26740     r[3] = a[3] & m;
26741     r[4] = a[4] & m;
26742     r[5] = a[5] & m;
26743     r[6] = a[6] & m;
26744     r[7] = a[7] & m;
26745     r[8] = a[8] & m;
26746     r[9] = a[9] & m;
26747     r[10] = a[10] & m;
26748     r[11] = a[11] & m;
26749 #endif
26750 }
26751 
26752 /* Divide d in a and put remainder into r (m*d + r = a)
26753  * m is not calculated as it is not needed at this time.
26754  *
26755  * a  Nmber to be divided.
26756  * d  Number to divide with.
26757  * m  Multiplier result.
26758  * r  Remainder from the division.
26759  * returns MP_OKAY indicating success.
26760  */
26761 static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
26762         sp_digit* r)
26763 {
26764     sp_digit t1[24], t2[13];
26765     sp_digit div, r1;
26766     int i;
26767 
26768     (void)m;
26769 
26770     div = d[11];
26771     XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
26772     for (i=11; i>=0; i--) {
26773         r1 = div_384_word_12(t1[12 + i], t1[12 + i - 1], div);
26774 
26775         sp_384_mul_d_12(t2, d, r1);
26776         t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
26777         t1[12 + i] -= t2[12];
26778         sp_384_mask_12(t2, d, t1[12 + i]);
26779         t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
26780         sp_384_mask_12(t2, d, t1[12 + i]);
26781         t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
26782     }
26783 
26784     r1 = sp_384_cmp_12(t1, d) >= 0;
26785     sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
26786 
26787     return MP_OKAY;
26788 }
26789 
26790 /* Reduce a modulo m into r. (r = a mod m)
26791  *
26792  * r  A single precision number that is the reduced result.
26793  * a  A single precision number that is to be reduced.
26794  * m  A single precision number that is the modulus to reduce with.
26795  * returns MP_OKAY indicating success.
26796  */
26797 static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
26798 {
26799     return sp_384_div_12(a, m, NULL, r);
26800 }
26801 
26802 #endif
26803 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
26804 #ifdef WOLFSSL_SP_SMALL
26805 /* Order-2 for the P384 curve. */
26806 static const uint32_t p384_order_minus_2[12] = {
26807     0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
26808     0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
26809 };
26810 #else
26811 /* The low half of the order-2 of the P384 curve. */
26812 static const uint32_t p384_order_low[6] = {
26813     0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
26814     
26815 };
26816 #endif /* WOLFSSL_SP_SMALL */
26817 
26818 /* Multiply two number mod the order of P384 curve. (r = a * b mod order)
26819  *
26820  * r  Result of the multiplication.
26821  * a  First operand of the multiplication.
26822  * b  Second operand of the multiplication.
26823  */
26824 static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
26825 {
26826     sp_384_mul_12(r, a, b);
26827     sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
26828 }
26829 
26830 /* Square number mod the order of P384 curve. (r = a * a mod order)
26831  *
26832  * r  Result of the squaring.
26833  * a  Number to square.
26834  */
26835 static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
26836 {
26837     sp_384_sqr_12(r, a);
26838     sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
26839 }
26840 
26841 #ifndef WOLFSSL_SP_SMALL
26842 /* Square number mod the order of P384 curve a number of times.
26843  * (r = a ^ n mod order)
26844  *
26845  * r  Result of the squaring.
26846  * a  Number to square.
26847  */
26848 static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
26849 {
26850     int i;
26851 
26852     sp_384_mont_sqr_order_12(r, a);
26853     for (i=1; i<n; i++) {
26854         sp_384_mont_sqr_order_12(r, r);
26855     }
26856 }
26857 #endif /* !WOLFSSL_SP_SMALL */
26858 
26859 /* Invert the number, in Montgomery form, modulo the order of the P384 curve.
26860  * (r = 1 / a mod order)
26861  *
26862  * r   Inverse result.
26863  * a   Number to invert.
26864  * td  Temporary data.
26865  */
26866 static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
26867         sp_digit* td)
26868 {
26869 #ifdef WOLFSSL_SP_SMALL
26870     sp_digit* t = td;
26871     int i;
26872 
26873     XMEMCPY(t, a, sizeof(sp_digit) * 12);
26874     for (i=382; i>=0; i--) {
26875         sp_384_mont_sqr_order_12(t, t);
26876         if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
26877             sp_384_mont_mul_order_12(t, t, a);
26878         }
26879     }
26880     XMEMCPY(r, t, sizeof(sp_digit) * 12U);
26881 #else
26882     sp_digit* t = td;
26883     sp_digit* t2 = td + 2 * 12;
26884     sp_digit* t3 = td + 4 * 12;
26885     int i;
26886 
26887     /* t = a^2 */
26888     sp_384_mont_sqr_order_12(t, a);
26889     /* t = a^3 = t * a */
26890     sp_384_mont_mul_order_12(t, t, a);
26891     /* t2= a^c = t ^ 2 ^ 2 */
26892     sp_384_mont_sqr_n_order_12(t2, t, 2);
26893     /* t = a^f = t2 * t */
26894     sp_384_mont_mul_order_12(t, t2, t);
26895     /* t2= a^f0 = t ^ 2 ^ 4 */
26896     sp_384_mont_sqr_n_order_12(t2, t, 4);
26897     /* t = a^ff = t2 * t */
26898     sp_384_mont_mul_order_12(t, t2, t);
26899     /* t2= a^ff00 = t ^ 2 ^ 8 */
26900     sp_384_mont_sqr_n_order_12(t2, t, 8);
26901     /* t3= a^ffff = t2 * t */
26902     sp_384_mont_mul_order_12(t3, t2, t);
26903     /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
26904     sp_384_mont_sqr_n_order_12(t2, t3, 16);
26905     /* t = a^ffffffff = t2 * t3 */
26906     sp_384_mont_mul_order_12(t, t2, t3);
26907     /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
26908     sp_384_mont_sqr_n_order_12(t2, t, 16);
26909     /* t = a^ffffffffffff = t2 * t3 */
26910     sp_384_mont_mul_order_12(t, t2, t3);
26911     /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
26912     sp_384_mont_sqr_n_order_12(t2, t, 48);
26913     /* t= a^fffffffffffffffffffffffff = t2 * t */
26914     sp_384_mont_mul_order_12(t, t2, t);
26915     /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
26916     sp_384_mont_sqr_n_order_12(t2, t, 96);
26917     /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
26918     sp_384_mont_mul_order_12(t2, t2, t);
26919     for (i=191; i>=1; i--) {
26920         sp_384_mont_sqr_order_12(t2, t2);
26921         if (((sp_digit)p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
26922             sp_384_mont_mul_order_12(t2, t2, a);
26923         }
26924     }
26925     sp_384_mont_sqr_order_12(t2, t2);
26926     sp_384_mont_mul_order_12(r, t2, a);
26927 #endif /* WOLFSSL_SP_SMALL */
26928 }
26929 
26930 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
26931 #ifdef HAVE_ECC_SIGN
26932 #ifndef SP_ECC_MAX_SIG_GEN
26933 #define SP_ECC_MAX_SIG_GEN  64
26934 #endif
26935 
26936 /* Sign the hash using the private key.
26937  *   e = [hash, 384 bits] from binary
26938  *   r = (k.G)->x mod order
26939  *   s = (r * x + e) / k mod order
26940  * The hash is truncated to the first 384 bits.
26941  *
26942  * hash     Hash to sign.
26943  * hashLen  Length of the hash data.
26944  * rng      Random number generator.
26945  * priv     Private part of key - scalar.
26946  * rm       First part of result as an mp_int.
26947  * sm       Sirst part of result as an mp_int.
26948  * heap     Heap to use for allocation.
26949  * returns RNG failures, MEMORY_E when memory allocation fails and
26950  * MP_OKAY on success.
26951  */
26952 int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
26953                     mp_int* rm, mp_int* sm, mp_int* km, void* heap)
26954 {
26955 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26956     sp_digit* d = NULL;
26957 #else
26958     sp_digit ed[2*12];
26959     sp_digit xd[2*12];
26960     sp_digit kd[2*12];
26961     sp_digit rd[2*12];
26962     sp_digit td[3 * 2*12];
26963     sp_point_384 p;
26964 #endif
26965     sp_digit* e = NULL;
26966     sp_digit* x = NULL;
26967     sp_digit* k = NULL;
26968     sp_digit* r = NULL;
26969     sp_digit* tmp = NULL;
26970     sp_point_384* point = NULL;
26971     sp_digit carry;
26972     sp_digit* s = NULL;
26973     sp_digit* kInv = NULL;
26974     int err = MP_OKAY;
26975     int32_t c;
26976     int i;
26977 
26978     (void)heap;
26979 
26980     err = sp_384_point_new_12(heap, p, point);
26981 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26982     if (err == MP_OKAY) {
26983         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
26984                                                               DYNAMIC_TYPE_ECC);
26985         if (d == NULL) {
26986             err = MEMORY_E;
26987         }
26988     }
26989 #endif
26990 
26991     if (err == MP_OKAY) {
26992 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
26993         e = d + 0 * 12;
26994         x = d + 2 * 12;
26995         k = d + 4 * 12;
26996         r = d + 6 * 12;
26997         tmp = d + 8 * 12;
26998 #else
26999         e = ed;
27000         x = xd;
27001         k = kd;
27002         r = rd;
27003         tmp = td;
27004 #endif
27005         s = e;
27006         kInv = k;
27007 
27008         if (hashLen > 48U) {
27009             hashLen = 48U;
27010         }
27011 
27012         sp_384_from_bin(e, 12, hash, (int)hashLen);
27013     }
27014 
27015     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
27016         sp_384_from_mp(x, 12, priv);
27017 
27018         /* New random point. */
27019         if (km == NULL || mp_iszero(km)) {
27020             err = sp_384_ecc_gen_k_12(rng, k);
27021         }
27022         else {
27023             sp_384_from_mp(k, 12, km);
27024             mp_zero(km);
27025         }
27026         if (err == MP_OKAY) {
27027                 err = sp_384_ecc_mulmod_base_12(point, k, 1, NULL);
27028         }
27029 
27030         if (err == MP_OKAY) {
27031             /* r = point->x mod order */
27032             XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
27033             sp_384_norm_12(r);
27034             c = sp_384_cmp_12(r, p384_order);
27035             sp_384_cond_sub_12(r, r, p384_order, 0L - (sp_digit)(c >= 0));
27036             sp_384_norm_12(r);
27037 
27038             /* Conv k to Montgomery form (mod order) */
27039                 sp_384_mul_12(k, k, p384_norm_order);
27040             err = sp_384_mod_12(k, k, p384_order);
27041         }
27042         if (err == MP_OKAY) {
27043             sp_384_norm_12(k);
27044             /* kInv = 1/k mod order */
27045                 sp_384_mont_inv_order_12(kInv, k, tmp);
27046             sp_384_norm_12(kInv);
27047 
27048             /* s = r * x + e */
27049                 sp_384_mul_12(x, x, r);
27050             err = sp_384_mod_12(x, x, p384_order);
27051         }
27052         if (err == MP_OKAY) {
27053             sp_384_norm_12(x);
27054             carry = sp_384_add_12(s, e, x);
27055             sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
27056             sp_384_norm_12(s);
27057             c = sp_384_cmp_12(s, p384_order);
27058             sp_384_cond_sub_12(s, s, p384_order, 0L - (sp_digit)(c >= 0));
27059             sp_384_norm_12(s);
27060 
27061             /* s = s * k^-1 mod order */
27062                 sp_384_mont_mul_order_12(s, s, kInv);
27063             sp_384_norm_12(s);
27064 
27065             /* Check that signature is usable. */
27066             if (sp_384_iszero_12(s) == 0) {
27067                 break;
27068             }
27069         }
27070     }
27071 
27072     if (i == 0) {
27073         err = RNG_FAILURE_E;
27074     }
27075 
27076     if (err == MP_OKAY) {
27077         err = sp_384_to_mp(r, rm);
27078     }
27079     if (err == MP_OKAY) {
27080         err = sp_384_to_mp(s, sm);
27081     }
27082 
27083 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27084     if (d != NULL) {
27085         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 12);
27086         XFREE(d, heap, DYNAMIC_TYPE_ECC);
27087     }
27088 #else
27089     XMEMSET(e, 0, sizeof(sp_digit) * 2U * 12U);
27090     XMEMSET(x, 0, sizeof(sp_digit) * 2U * 12U);
27091     XMEMSET(k, 0, sizeof(sp_digit) * 2U * 12U);
27092     XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
27093     XMEMSET(r, 0, sizeof(sp_digit) * 2U * 12U);
27094     XMEMSET(tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
27095 #endif
27096     sp_384_point_free_12(point, 1, heap);
27097 
27098     return err;
27099 }
27100 #endif /* HAVE_ECC_SIGN */
27101 
27102 #ifdef HAVE_ECC_VERIFY
27103 /* Verify the signature values with the hash and public key.
27104  *   e = Truncate(hash, 384)
27105  *   u1 = e/s mod order
27106  *   u2 = r/s mod order
27107  *   r == (u1.G + u2.Q)->x mod order
27108  * Optimization: Leave point in projective form.
27109  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
27110  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
27111  * The hash is truncated to the first 384 bits.
27112  *
27113  * hash     Hash to sign.
27114  * hashLen  Length of the hash data.
27115  * rng      Random number generator.
27116  * priv     Private part of key - scalar.
27117  * rm       First part of result as an mp_int.
27118  * sm       Sirst part of result as an mp_int.
27119  * heap     Heap to use for allocation.
27120  * returns RNG failures, MEMORY_E when memory allocation fails and
27121  * MP_OKAY on success.
27122  */
27123 int sp_ecc_verify_384(const byte* hash, word32 hashLen, mp_int* pX,
27124     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
27125 {
27126 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27127     sp_digit* d = NULL;
27128 #else
27129     sp_digit u1d[2*12];
27130     sp_digit u2d[2*12];
27131     sp_digit sd[2*12];
27132     sp_digit tmpd[2*12 * 5];
27133     sp_point_384 p1d;
27134     sp_point_384 p2d;
27135 #endif
27136     sp_digit* u1 = NULL;
27137     sp_digit* u2 = NULL;
27138     sp_digit* s = NULL;
27139     sp_digit* tmp = NULL;
27140     sp_point_384* p1;
27141     sp_point_384* p2 = NULL;
27142     sp_digit carry;
27143     int32_t c;
27144     int err;
27145 
27146     err = sp_384_point_new_12(heap, p1d, p1);
27147     if (err == MP_OKAY) {
27148         err = sp_384_point_new_12(heap, p2d, p2);
27149     }
27150 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27151     if (err == MP_OKAY) {
27152         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
27153                                                               DYNAMIC_TYPE_ECC);
27154         if (d == NULL) {
27155             err = MEMORY_E;
27156         }
27157     }
27158 #endif
27159 
27160     if (err == MP_OKAY) {
27161 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27162         u1  = d + 0 * 12;
27163         u2  = d + 2 * 12;
27164         s   = d + 4 * 12;
27165         tmp = d + 6 * 12;
27166 #else
27167         u1 = u1d;
27168         u2 = u2d;
27169         s  = sd;
27170         tmp = tmpd;
27171 #endif
27172 
27173         if (hashLen > 48U) {
27174             hashLen = 48U;
27175         }
27176 
27177         sp_384_from_bin(u1, 12, hash, (int)hashLen);
27178         sp_384_from_mp(u2, 12, r);
27179         sp_384_from_mp(s, 12, sm);
27180         sp_384_from_mp(p2->x, 12, pX);
27181         sp_384_from_mp(p2->y, 12, pY);
27182         sp_384_from_mp(p2->z, 12, pZ);
27183 
27184         {
27185             sp_384_mul_12(s, s, p384_norm_order);
27186         }
27187         err = sp_384_mod_12(s, s, p384_order);
27188     }
27189     if (err == MP_OKAY) {
27190         sp_384_norm_12(s);
27191         {
27192             sp_384_mont_inv_order_12(s, s, tmp);
27193             sp_384_mont_mul_order_12(u1, u1, s);
27194             sp_384_mont_mul_order_12(u2, u2, s);
27195         }
27196 
27197             err = sp_384_ecc_mulmod_base_12(p1, u1, 0, heap);
27198     }
27199     if (err == MP_OKAY) {
27200             err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, heap);
27201     }
27202 
27203     if (err == MP_OKAY) {
27204         {
27205             sp_384_proj_point_add_12(p1, p1, p2, tmp);
27206             if (sp_384_iszero_12(p1->z)) {
27207                 if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
27208                     sp_384_proj_point_dbl_12(p1, p2, tmp);
27209                 }
27210                 else {
27211                     /* Y ordinate is not used from here - don't set. */
27212                     p1->x[0] = 0;
27213                     p1->x[1] = 0;
27214                     p1->x[2] = 0;
27215                     p1->x[3] = 0;
27216                     p1->x[4] = 0;
27217                     p1->x[5] = 0;
27218                     p1->x[6] = 0;
27219                     p1->x[7] = 0;
27220                     p1->x[8] = 0;
27221                     p1->x[9] = 0;
27222                     p1->x[10] = 0;
27223                     p1->x[11] = 0;
27224                     XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
27225                 }
27226             }
27227         }
27228 
27229         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
27230         /* Reload r and convert to Montgomery form. */
27231         sp_384_from_mp(u2, 12, r);
27232         err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
27233     }
27234 
27235     if (err == MP_OKAY) {
27236         /* u1 = r.z'.z' mod prime */
27237         sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
27238         sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
27239         *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
27240         if (*res == 0) {
27241             /* Reload r and add order. */
27242             sp_384_from_mp(u2, 12, r);
27243             carry = sp_384_add_12(u2, u2, p384_order);
27244             /* Carry means result is greater than mod and is not valid. */
27245             if (carry == 0) {
27246                 sp_384_norm_12(u2);
27247 
27248                 /* Compare with mod and if greater or equal then not valid. */
27249                 c = sp_384_cmp_12(u2, p384_mod);
27250                 if (c < 0) {
27251                     /* Convert to Montogomery form */
27252                     err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
27253                     if (err == MP_OKAY) {
27254                         /* u1 = (r + 1*order).z'.z' mod prime */
27255                         sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
27256                                                                   p384_mp_mod);
27257                         *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
27258                     }
27259                 }
27260             }
27261         }
27262     }
27263 
27264 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27265     if (d != NULL)
27266         XFREE(d, heap, DYNAMIC_TYPE_ECC);
27267 #endif
27268     sp_384_point_free_12(p1, 0, heap);
27269     sp_384_point_free_12(p2, 0, heap);
27270 
27271     return err;
27272 }
27273 #endif /* HAVE_ECC_VERIFY */
27274 
27275 #ifdef HAVE_ECC_CHECK_KEY
27276 /* Check that the x and y oridinates are a valid point on the curve.
27277  *
27278  * point  EC point.
27279  * heap   Heap to use if dynamically allocating.
27280  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
27281  * not on the curve and MP_OKAY otherwise.
27282  */
27283 static int sp_384_ecc_is_point_12(sp_point_384* point, void* heap)
27284 {
27285 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27286     sp_digit* d = NULL;
27287 #else
27288     sp_digit t1d[2*12];
27289     sp_digit t2d[2*12];
27290 #endif
27291     sp_digit* t1;
27292     sp_digit* t2;
27293     int err = MP_OKAY;
27294 
27295 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27296     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
27297     if (d == NULL) {
27298         err = MEMORY_E;
27299     }
27300 #endif
27301 
27302     if (err == MP_OKAY) {
27303 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27304         t1 = d + 0 * 12;
27305         t2 = d + 2 * 12;
27306 #else
27307         (void)heap;
27308 
27309         t1 = t1d;
27310         t2 = t2d;
27311 #endif
27312 
27313         sp_384_sqr_12(t1, point->y);
27314         (void)sp_384_mod_12(t1, t1, p384_mod);
27315         sp_384_sqr_12(t2, point->x);
27316         (void)sp_384_mod_12(t2, t2, p384_mod);
27317         sp_384_mul_12(t2, t2, point->x);
27318         (void)sp_384_mod_12(t2, t2, p384_mod);
27319         (void)sp_384_sub_12(t2, p384_mod, t2);
27320         sp_384_mont_add_12(t1, t1, t2, p384_mod);
27321 
27322         sp_384_mont_add_12(t1, t1, point->x, p384_mod);
27323         sp_384_mont_add_12(t1, t1, point->x, p384_mod);
27324         sp_384_mont_add_12(t1, t1, point->x, p384_mod);
27325 
27326         if (sp_384_cmp_12(t1, p384_b) != 0) {
27327             err = MP_VAL;
27328         }
27329     }
27330 
27331 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27332     if (d != NULL) {
27333         XFREE(d, heap, DYNAMIC_TYPE_ECC);
27334     }
27335 #endif
27336 
27337     return err;
27338 }
27339 
27340 /* Check that the x and y oridinates are a valid point on the curve.
27341  *
27342  * pX  X ordinate of EC point.
27343  * pY  Y ordinate of EC point.
27344  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
27345  * not on the curve and MP_OKAY otherwise.
27346  */
27347 int sp_ecc_is_point_384(mp_int* pX, mp_int* pY)
27348 {
27349 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
27350     sp_point_384 pubd;
27351 #endif
27352     sp_point_384* pub;
27353     byte one[1] = { 1 };
27354     int err;
27355 
27356     err = sp_384_point_new_12(NULL, pubd, pub);
27357     if (err == MP_OKAY) {
27358         sp_384_from_mp(pub->x, 12, pX);
27359         sp_384_from_mp(pub->y, 12, pY);
27360         sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
27361 
27362         err = sp_384_ecc_is_point_12(pub, NULL);
27363     }
27364 
27365     sp_384_point_free_12(pub, 0, NULL);
27366 
27367     return err;
27368 }
27369 
27370 /* Check that the private scalar generates the EC point (px, py), the point is
27371  * on the curve and the point has the correct order.
27372  *
27373  * pX     X ordinate of EC point.
27374  * pY     Y ordinate of EC point.
27375  * privm  Private scalar that generates EC point.
27376  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
27377  * not on the curve, ECC_INF_E if the point does not have the correct order,
27378  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
27379  * MP_OKAY otherwise.
27380  */
27381 int sp_ecc_check_key_384(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
27382 {
27383 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
27384     sp_digit privd[12];
27385     sp_point_384 pubd;
27386     sp_point_384 pd;
27387 #endif
27388     sp_digit* priv = NULL;
27389     sp_point_384* pub;
27390     sp_point_384* p = NULL;
27391     byte one[1] = { 1 };
27392     int err;
27393 
27394     err = sp_384_point_new_12(heap, pubd, pub);
27395     if (err == MP_OKAY) {
27396         err = sp_384_point_new_12(heap, pd, p);
27397     }
27398 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27399     if (err == MP_OKAY) {
27400         priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
27401                                                               DYNAMIC_TYPE_ECC);
27402         if (priv == NULL) {
27403             err = MEMORY_E;
27404         }
27405     }
27406 #endif
27407 
27408     if (err == MP_OKAY) {
27409 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
27410         priv = privd;
27411 #endif
27412 
27413         sp_384_from_mp(pub->x, 12, pX);
27414         sp_384_from_mp(pub->y, 12, pY);
27415         sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
27416         sp_384_from_mp(priv, 12, privm);
27417 
27418         /* Check point at infinitiy. */
27419         if ((sp_384_iszero_12(pub->x) != 0) &&
27420             (sp_384_iszero_12(pub->y) != 0)) {
27421             err = ECC_INF_E;
27422         }
27423     }
27424 
27425     if (err == MP_OKAY) {
27426         /* Check range of X and Y */
27427         if (sp_384_cmp_12(pub->x, p384_mod) >= 0 ||
27428             sp_384_cmp_12(pub->y, p384_mod) >= 0) {
27429             err = ECC_OUT_OF_RANGE_E;
27430         }
27431     }
27432 
27433     if (err == MP_OKAY) {
27434         /* Check point is on curve */
27435         err = sp_384_ecc_is_point_12(pub, heap);
27436     }
27437 
27438     if (err == MP_OKAY) {
27439         /* Point * order = infinity */
27440             err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, heap);
27441     }
27442     if (err == MP_OKAY) {
27443         /* Check result is infinity */
27444         if ((sp_384_iszero_12(p->x) == 0) ||
27445             (sp_384_iszero_12(p->y) == 0)) {
27446             err = ECC_INF_E;
27447         }
27448     }
27449 
27450     if (err == MP_OKAY) {
27451         /* Base * private = point */
27452             err = sp_384_ecc_mulmod_base_12(p, priv, 1, heap);
27453     }
27454     if (err == MP_OKAY) {
27455         /* Check result is public key */
27456         if (sp_384_cmp_12(p->x, pub->x) != 0 ||
27457             sp_384_cmp_12(p->y, pub->y) != 0) {
27458             err = ECC_PRIV_KEY_E;
27459         }
27460     }
27461 
27462 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27463     if (priv != NULL) {
27464         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
27465     }
27466 #endif
27467     sp_384_point_free_12(p, 0, heap);
27468     sp_384_point_free_12(pub, 0, heap);
27469 
27470     return err;
27471 }
27472 #endif
27473 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
27474 /* Add two projective EC points together.
27475  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
27476  *
27477  * pX   First EC point's X ordinate.
27478  * pY   First EC point's Y ordinate.
27479  * pZ   First EC point's Z ordinate.
27480  * qX   Second EC point's X ordinate.
27481  * qY   Second EC point's Y ordinate.
27482  * qZ   Second EC point's Z ordinate.
27483  * rX   Resultant EC point's X ordinate.
27484  * rY   Resultant EC point's Y ordinate.
27485  * rZ   Resultant EC point's Z ordinate.
27486  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
27487  */
27488 int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
27489                               mp_int* qX, mp_int* qY, mp_int* qZ,
27490                               mp_int* rX, mp_int* rY, mp_int* rZ)
27491 {
27492 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
27493     sp_digit tmpd[2 * 12 * 5];
27494     sp_point_384 pd;
27495     sp_point_384 qd;
27496 #endif
27497     sp_digit* tmp;
27498     sp_point_384* p;
27499     sp_point_384* q = NULL;
27500     int err;
27501 
27502     err = sp_384_point_new_12(NULL, pd, p);
27503     if (err == MP_OKAY) {
27504         err = sp_384_point_new_12(NULL, qd, q);
27505     }
27506 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27507     if (err == MP_OKAY) {
27508         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
27509                                                               DYNAMIC_TYPE_ECC);
27510         if (tmp == NULL) {
27511             err = MEMORY_E;
27512         }
27513     }
27514 #else
27515     tmp = tmpd;
27516 #endif
27517 
27518     if (err == MP_OKAY) {
27519         sp_384_from_mp(p->x, 12, pX);
27520         sp_384_from_mp(p->y, 12, pY);
27521         sp_384_from_mp(p->z, 12, pZ);
27522         sp_384_from_mp(q->x, 12, qX);
27523         sp_384_from_mp(q->y, 12, qY);
27524         sp_384_from_mp(q->z, 12, qZ);
27525 
27526             sp_384_proj_point_add_12(p, p, q, tmp);
27527     }
27528 
27529     if (err == MP_OKAY) {
27530         err = sp_384_to_mp(p->x, rX);
27531     }
27532     if (err == MP_OKAY) {
27533         err = sp_384_to_mp(p->y, rY);
27534     }
27535     if (err == MP_OKAY) {
27536         err = sp_384_to_mp(p->z, rZ);
27537     }
27538 
27539 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27540     if (tmp != NULL) {
27541         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
27542     }
27543 #endif
27544     sp_384_point_free_12(q, 0, NULL);
27545     sp_384_point_free_12(p, 0, NULL);
27546 
27547     return err;
27548 }
27549 
27550 /* Double a projective EC point.
27551  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
27552  *
27553  * pX   EC point's X ordinate.
27554  * pY   EC point's Y ordinate.
27555  * pZ   EC point's Z ordinate.
27556  * rX   Resultant EC point's X ordinate.
27557  * rY   Resultant EC point's Y ordinate.
27558  * rZ   Resultant EC point's Z ordinate.
27559  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
27560  */
27561 int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
27562                               mp_int* rX, mp_int* rY, mp_int* rZ)
27563 {
27564 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
27565     sp_digit tmpd[2 * 12 * 2];
27566     sp_point_384 pd;
27567 #endif
27568     sp_digit* tmp;
27569     sp_point_384* p;
27570     int err;
27571 
27572     err = sp_384_point_new_12(NULL, pd, p);
27573 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27574     if (err == MP_OKAY) {
27575         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
27576                                                               DYNAMIC_TYPE_ECC);
27577         if (tmp == NULL) {
27578             err = MEMORY_E;
27579         }
27580     }
27581 #else
27582     tmp = tmpd;
27583 #endif
27584 
27585     if (err == MP_OKAY) {
27586         sp_384_from_mp(p->x, 12, pX);
27587         sp_384_from_mp(p->y, 12, pY);
27588         sp_384_from_mp(p->z, 12, pZ);
27589 
27590             sp_384_proj_point_dbl_12(p, p, tmp);
27591     }
27592 
27593     if (err == MP_OKAY) {
27594         err = sp_384_to_mp(p->x, rX);
27595     }
27596     if (err == MP_OKAY) {
27597         err = sp_384_to_mp(p->y, rY);
27598     }
27599     if (err == MP_OKAY) {
27600         err = sp_384_to_mp(p->z, rZ);
27601     }
27602 
27603 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27604     if (tmp != NULL) {
27605         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
27606     }
27607 #endif
27608     sp_384_point_free_12(p, 0, NULL);
27609 
27610     return err;
27611 }
27612 
27613 /* Map a projective EC point to affine in place.
27614  * pZ will be one.
27615  *
27616  * pX   EC point's X ordinate.
27617  * pY   EC point's Y ordinate.
27618  * pZ   EC point's Z ordinate.
27619  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
27620  */
27621 int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
27622 {
27623 #if (!defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)) || defined(WOLFSSL_SP_NO_MALLOC)
27624     sp_digit tmpd[2 * 12 * 6];
27625     sp_point_384 pd;
27626 #endif
27627     sp_digit* tmp;
27628     sp_point_384* p;
27629     int err;
27630 
27631     err = sp_384_point_new_12(NULL, pd, p);
27632 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27633     if (err == MP_OKAY) {
27634         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
27635                                                               DYNAMIC_TYPE_ECC);
27636         if (tmp == NULL) {
27637             err = MEMORY_E;
27638         }
27639     }
27640 #else
27641     tmp = tmpd;
27642 #endif
27643     if (err == MP_OKAY) {
27644         sp_384_from_mp(p->x, 12, pX);
27645         sp_384_from_mp(p->y, 12, pY);
27646         sp_384_from_mp(p->z, 12, pZ);
27647 
27648         sp_384_map_12(p, p, tmp);
27649     }
27650 
27651     if (err == MP_OKAY) {
27652         err = sp_384_to_mp(p->x, pX);
27653     }
27654     if (err == MP_OKAY) {
27655         err = sp_384_to_mp(p->y, pY);
27656     }
27657     if (err == MP_OKAY) {
27658         err = sp_384_to_mp(p->z, pZ);
27659     }
27660 
27661 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27662     if (tmp != NULL) {
27663         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
27664     }
27665 #endif
27666     sp_384_point_free_12(p, 0, NULL);
27667 
27668     return err;
27669 }
27670 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
27671 #ifdef HAVE_COMP_KEY
27672 /* Find the square root of a number mod the prime of the curve.
27673  *
27674  * y  The number to operate on and the result.
27675  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
27676  */
27677 static int sp_384_mont_sqrt_12(sp_digit* y)
27678 {
27679 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27680     sp_digit* d;
27681 #else
27682     sp_digit t1d[2 * 12];
27683     sp_digit t2d[2 * 12];
27684     sp_digit t3d[2 * 12];
27685     sp_digit t4d[2 * 12];
27686     sp_digit t5d[2 * 12];
27687 #endif
27688     sp_digit* t1;
27689     sp_digit* t2;
27690     sp_digit* t3;
27691     sp_digit* t4;
27692     sp_digit* t5;
27693     int err = MP_OKAY;
27694 
27695 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27696     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
27697     if (d == NULL) {
27698         err = MEMORY_E;
27699     }
27700 #endif
27701 
27702     if (err == MP_OKAY) {
27703 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27704         t1 = d + 0 * 12;
27705         t2 = d + 2 * 12;
27706         t3 = d + 4 * 12;
27707         t4 = d + 6 * 12;
27708         t5 = d + 8 * 12;
27709 #else
27710         t1 = t1d;
27711         t2 = t2d;
27712         t3 = t3d;
27713         t4 = t4d;
27714         t5 = t5d;
27715 #endif
27716 
27717         {
27718             /* t2 = y ^ 0x2 */
27719             sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
27720             /* t1 = y ^ 0x3 */
27721             sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
27722             /* t5 = y ^ 0xc */
27723             sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
27724             /* t1 = y ^ 0xf */
27725             sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
27726             /* t2 = y ^ 0x1e */
27727             sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
27728             /* t3 = y ^ 0x1f */
27729             sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
27730             /* t2 = y ^ 0x3e0 */
27731             sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
27732             /* t1 = y ^ 0x3ff */
27733             sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
27734             /* t2 = y ^ 0x7fe0 */
27735             sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
27736             /* t3 = y ^ 0x7fff */
27737             sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
27738             /* t2 = y ^ 0x3fff800 */
27739             sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
27740             /* t4 = y ^ 0x3ffffff */
27741             sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
27742             /* t2 = y ^ 0xffffffc000000 */
27743             sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
27744             /* t1 = y ^ 0xfffffffffffff */
27745             sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
27746             /* t2 = y ^ 0xfffffffffffffff000000000000000 */
27747             sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
27748             /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
27749             sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
27750             /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
27751             sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
27752             /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
27753             sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
27754             /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
27755             sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
27756             /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
27757             sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
27758             /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
27759             sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
27760             /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
27761             sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
27762             /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
27763             sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
27764             /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
27765             sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
27766             /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
27767             sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
27768             /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
27769             sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
27770             /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
27771             sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
27772         }
27773     }
27774 
27775 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27776     if (d != NULL) {
27777         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
27778     }
27779 #endif
27780 
27781     return err;
27782 }
27783 
27784 
27785 /* Uncompress the point given the X ordinate.
27786  *
27787  * xm    X ordinate.
27788  * odd   Whether the Y ordinate is odd.
27789  * ym    Calculated Y ordinate.
27790  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
27791  */
27792 int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
27793 {
27794 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27795     sp_digit* d;
27796 #else
27797     sp_digit xd[2 * 12];
27798     sp_digit yd[2 * 12];
27799 #endif
27800     sp_digit* x = NULL;
27801     sp_digit* y = NULL;
27802     int err = MP_OKAY;
27803 
27804 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27805     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
27806     if (d == NULL) {
27807         err = MEMORY_E;
27808     }
27809 #endif
27810 
27811     if (err == MP_OKAY) {
27812 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27813         x = d + 0 * 12;
27814         y = d + 2 * 12;
27815 #else
27816         x = xd;
27817         y = yd;
27818 #endif
27819 
27820         sp_384_from_mp(x, 12, xm);
27821         err = sp_384_mod_mul_norm_12(x, x, p384_mod);
27822     }
27823     if (err == MP_OKAY) {
27824         /* y = x^3 */
27825         {
27826             sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
27827             sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
27828         }
27829         /* y = x^3 - 3x */
27830         sp_384_mont_sub_12(y, y, x, p384_mod);
27831         sp_384_mont_sub_12(y, y, x, p384_mod);
27832         sp_384_mont_sub_12(y, y, x, p384_mod);
27833         /* y = x^3 - 3x + b */
27834         err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
27835     }
27836     if (err == MP_OKAY) {
27837         sp_384_mont_add_12(y, y, x, p384_mod);
27838         /* y = sqrt(x^3 - 3x + b) */
27839         err = sp_384_mont_sqrt_12(y);
27840     }
27841     if (err == MP_OKAY) {
27842         XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
27843         sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
27844         if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
27845             sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
27846         }
27847 
27848         err = sp_384_to_mp(y, ym);
27849     }
27850 
27851 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
27852     if (d != NULL) {
27853         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
27854     }
27855 #endif
27856 
27857     return err;
27858 }
27859 #endif
27860 #endif /* WOLFSSL_SP_384 */
27861 #endif /* WOLFSSL_HAVE_SP_ECC */
27862 #endif /* WOLFSSL_SP_ARM_THUMB_ASM */
27863 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
27864