Xuyi Wang / wolfcrypt

Dependents:   OS

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers sp_arm32.c Source File

sp_arm32.c

00001 /* sp.c
00002  *
00003  * Copyright (C) 2006-2018 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 /* Implementation by Sean Parkinson. */
00023 
00024 #ifdef HAVE_CONFIG_H
00025     #include <config.h>
00026 #endif
00027 
00028 #include <wolfcrypt/settings.h>
00029 #include <wolfcrypt/error-crypt.h>
00030 #include <wolfcrypt/cpuid.h>
00031 #ifdef NO_INLINE
00032     #include <wolfcrypt/misc.h>
00033 #else
00034     #define WOLFSSL_MISC_INCLUDED
00035     #include <wolfcrypt/src/misc.c>
00036 #endif
00037 
00038 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
00039                                     defined(WOLFSSL_HAVE_SP_ECC)
00040 
00041 #ifdef RSA_LOW_MEM
00042 #define SP_RSA_PRIVATE_EXP_D
00043 
00044 #ifndef WOLFSSL_SP_SMALL
00045 #define WOLFSSL_SP_SMALL
00046 #endif
00047 #endif
00048 
00049 #include <wolfcrypt/sp.h>
00050 
00051 #ifdef WOLFSSL_SP_ARM32_ASM
00052 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
00053 #ifndef WOLFSSL_SP_NO_2048
00054 /* Read big endian unsigned byte aray into r.
00055  *
00056  * r  A single precision integer.
00057  * a  Byte array.
00058  * n  Number of bytes in array to read.
00059  */
00060 static void sp_2048_from_bin(sp_digit* r, int max, const byte* a, int n)
00061 {
00062     int i, j = 0, s = 0;
00063 
00064     r[0] = 0;
00065     for (i = n-1; i >= 0; i--) {
00066         r[j] |= ((sp_digit)a[i]) << s;
00067         if (s >= 24) {
00068             r[j] &= 0xffffffff;
00069             s = 32 - s;
00070             if (j + 1 >= max)
00071                 break;
00072             r[++j] = a[i] >> s;
00073             s = 8 - s;
00074         }
00075         else
00076             s += 8;
00077     }
00078 
00079     for (j++; j < max; j++)
00080         r[j] = 0;
00081 }
00082 
00083 /* Convert an mp_int to an array of sp_digit.
00084  *
00085  * r  A single precision integer.
00086  * a  A multi-precision integer.
00087  */
00088 static void sp_2048_from_mp(sp_digit* r, int max, mp_int* a)
00089 {
00090 #if DIGIT_BIT == 32
00091     int j;
00092 
00093     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
00094 
00095     for (j = a->used; j < max; j++)
00096         r[j] = 0;
00097 #elif DIGIT_BIT > 32
00098     int i, j = 0, s = 0;
00099 
00100     r[0] = 0;
00101     for (i = 0; i < a->used && j < max; i++) {
00102         r[j] |= a->dp[i] << s;
00103         r[j] &= 0xffffffff;
00104         s = 32 - s;
00105         if (j + 1 >= max)
00106             break;
00107         r[++j] = a->dp[i] >> s;
00108         while (s + 32 <= DIGIT_BIT) {
00109             s += 32;
00110             r[j] &= 0xffffffff;
00111             if (j + 1 >= max)
00112                 break;
00113             if (s < DIGIT_BIT)
00114                 r[++j] = a->dp[i] >> s;
00115             else
00116                 r[++j] = 0;
00117         }
00118         s = DIGIT_BIT - s;
00119     }
00120 
00121     for (j++; j < max; j++)
00122         r[j] = 0;
00123 #else
00124     int i, j = 0, s = 0;
00125 
00126     r[0] = 0;
00127     for (i = 0; i < a->used && j < max; i++) {
00128         r[j] |= ((sp_digit)a->dp[i]) << s;
00129         if (s + DIGIT_BIT >= 32) {
00130             r[j] &= 0xffffffff;
00131             if (j + 1 >= max)
00132                 break;
00133             s = 32 - s;
00134             if (s == DIGIT_BIT) {
00135                 r[++j] = 0;
00136                 s = 0;
00137             }
00138             else {
00139                 r[++j] = a->dp[i] >> s;
00140                 s = DIGIT_BIT - s;
00141             }
00142         }
00143         else
00144             s += DIGIT_BIT;
00145     }
00146 
00147     for (j++; j < max; j++)
00148         r[j] = 0;
00149 #endif
00150 }
00151 
00152 /* Write r as big endian to byte aray.
00153  * Fixed length number of bytes written: 256
00154  *
00155  * r  A single precision integer.
00156  * a  Byte array.
00157  */
00158 static void sp_2048_to_bin(sp_digit* r, byte* a)
00159 {
00160     int i, j, s = 0, b;
00161 
00162     j = 2048 / 8 - 1;
00163     a[j] = 0;
00164     for (i=0; i<64 && j>=0; i++) {
00165         b = 0;
00166         a[j--] |= r[i] << s; b += 8 - s;
00167         if (j < 0)
00168             break;
00169         while (b < 32) {
00170             a[j--] = r[i] >> b; b += 8;
00171             if (j < 0)
00172                 break;
00173         }
00174         s = 8 - (b - 32);
00175         if (j >= 0)
00176             a[j] = 0;
00177         if (s != 0)
00178             j++;
00179     }
00180 }
00181 
00182 #ifndef WOLFSSL_SP_SMALL
00183 /* Multiply a and b into r. (r = a * b)
00184  *
00185  * r  A single precision integer.
00186  * a  A single precision integer.
00187  * b  A single precision integer.
00188  */
00189 static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
00190 {
00191     sp_digit tmp[8];
00192 
00193     __asm__ __volatile__ (
00194         "mov    r10, #0\n\t"
00195         "#  A[0] * B[0]\n\t"
00196         "ldr    r8, [%[a], #0]\n\t"
00197         "ldr    r9, [%[b], #0]\n\t"
00198         "umull  r3, r4, r8, r9\n\t"
00199         "mov    r5, #0\n\t"
00200         "str    r3, [%[tmp]]\n\t"
00201         "#  A[0] * B[1]\n\t"
00202         "ldr    r8, [%[a], #0]\n\t"
00203         "ldr    r9, [%[b], #4]\n\t"
00204         "umull  r6, r7, r8, r9\n\t"
00205         "adds   r4, r4, r6\n\t"
00206         "adcs   r5, r5, r7\n\t"
00207         "adc    r3, r10, r10\n\t"
00208         "#  A[1] * B[0]\n\t"
00209         "ldr    r8, [%[a], #4]\n\t"
00210         "ldr    r9, [%[b], #0]\n\t"
00211         "umull  r6, r7, r8, r9\n\t"
00212         "adds   r4, r4, r6\n\t"
00213         "adcs   r5, r5, r7\n\t"
00214         "adc    r3, r3, r10\n\t"
00215         "str    r4, [%[tmp], #4]\n\t"
00216         "#  A[0] * B[2]\n\t"
00217         "ldr    r8, [%[a], #0]\n\t"
00218         "ldr    r9, [%[b], #8]\n\t"
00219         "umull  r6, r7, r8, r9\n\t"
00220         "adds   r5, r5, r6\n\t"
00221         "adcs   r3, r3, r7\n\t"
00222         "adc    r4, r10, r10\n\t"
00223         "#  A[1] * B[1]\n\t"
00224         "ldr    r8, [%[a], #4]\n\t"
00225         "ldr    r9, [%[b], #4]\n\t"
00226         "umull  r6, r7, r8, r9\n\t"
00227         "adds   r5, r5, r6\n\t"
00228         "adcs   r3, r3, r7\n\t"
00229         "adc    r4, r4, r10\n\t"
00230         "#  A[2] * B[0]\n\t"
00231         "ldr    r8, [%[a], #8]\n\t"
00232         "ldr    r9, [%[b], #0]\n\t"
00233         "umull  r6, r7, r8, r9\n\t"
00234         "adds   r5, r5, r6\n\t"
00235         "adcs   r3, r3, r7\n\t"
00236         "adc    r4, r4, r10\n\t"
00237         "str    r5, [%[tmp], #8]\n\t"
00238         "#  A[0] * B[3]\n\t"
00239         "ldr    r8, [%[a], #0]\n\t"
00240         "ldr    r9, [%[b], #12]\n\t"
00241         "umull  r6, r7, r8, r9\n\t"
00242         "adds   r3, r3, r6\n\t"
00243         "adcs   r4, r4, r7\n\t"
00244         "adc    r5, r10, r10\n\t"
00245         "#  A[1] * B[2]\n\t"
00246         "ldr    r8, [%[a], #4]\n\t"
00247         "ldr    r9, [%[b], #8]\n\t"
00248         "umull  r6, r7, r8, r9\n\t"
00249         "adds   r3, r3, r6\n\t"
00250         "adcs   r4, r4, r7\n\t"
00251         "adc    r5, r5, r10\n\t"
00252         "#  A[2] * B[1]\n\t"
00253         "ldr    r8, [%[a], #8]\n\t"
00254         "ldr    r9, [%[b], #4]\n\t"
00255         "umull  r6, r7, r8, r9\n\t"
00256         "adds   r3, r3, r6\n\t"
00257         "adcs   r4, r4, r7\n\t"
00258         "adc    r5, r5, r10\n\t"
00259         "#  A[3] * B[0]\n\t"
00260         "ldr    r8, [%[a], #12]\n\t"
00261         "ldr    r9, [%[b], #0]\n\t"
00262         "umull  r6, r7, r8, r9\n\t"
00263         "adds   r3, r3, r6\n\t"
00264         "adcs   r4, r4, r7\n\t"
00265         "adc    r5, r5, r10\n\t"
00266         "str    r3, [%[tmp], #12]\n\t"
00267         "#  A[0] * B[4]\n\t"
00268         "ldr    r8, [%[a], #0]\n\t"
00269         "ldr    r9, [%[b], #16]\n\t"
00270         "umull  r6, r7, r8, r9\n\t"
00271         "adds   r4, r4, r6\n\t"
00272         "adcs   r5, r5, r7\n\t"
00273         "adc    r3, r10, r10\n\t"
00274         "#  A[1] * B[3]\n\t"
00275         "ldr    r8, [%[a], #4]\n\t"
00276         "ldr    r9, [%[b], #12]\n\t"
00277         "umull  r6, r7, r8, r9\n\t"
00278         "adds   r4, r4, r6\n\t"
00279         "adcs   r5, r5, r7\n\t"
00280         "adc    r3, r3, r10\n\t"
00281         "#  A[2] * B[2]\n\t"
00282         "ldr    r8, [%[a], #8]\n\t"
00283         "ldr    r9, [%[b], #8]\n\t"
00284         "umull  r6, r7, r8, r9\n\t"
00285         "adds   r4, r4, r6\n\t"
00286         "adcs   r5, r5, r7\n\t"
00287         "adc    r3, r3, r10\n\t"
00288         "#  A[3] * B[1]\n\t"
00289         "ldr    r8, [%[a], #12]\n\t"
00290         "ldr    r9, [%[b], #4]\n\t"
00291         "umull  r6, r7, r8, r9\n\t"
00292         "adds   r4, r4, r6\n\t"
00293         "adcs   r5, r5, r7\n\t"
00294         "adc    r3, r3, r10\n\t"
00295         "#  A[4] * B[0]\n\t"
00296         "ldr    r8, [%[a], #16]\n\t"
00297         "ldr    r9, [%[b], #0]\n\t"
00298         "umull  r6, r7, r8, r9\n\t"
00299         "adds   r4, r4, r6\n\t"
00300         "adcs   r5, r5, r7\n\t"
00301         "adc    r3, r3, r10\n\t"
00302         "str    r4, [%[tmp], #16]\n\t"
00303         "#  A[0] * B[5]\n\t"
00304         "ldr    r8, [%[a], #0]\n\t"
00305         "ldr    r9, [%[b], #20]\n\t"
00306         "umull  r6, r7, r8, r9\n\t"
00307         "adds   r5, r5, r6\n\t"
00308         "adcs   r3, r3, r7\n\t"
00309         "adc    r4, r10, r10\n\t"
00310         "#  A[1] * B[4]\n\t"
00311         "ldr    r8, [%[a], #4]\n\t"
00312         "ldr    r9, [%[b], #16]\n\t"
00313         "umull  r6, r7, r8, r9\n\t"
00314         "adds   r5, r5, r6\n\t"
00315         "adcs   r3, r3, r7\n\t"
00316         "adc    r4, r4, r10\n\t"
00317         "#  A[2] * B[3]\n\t"
00318         "ldr    r8, [%[a], #8]\n\t"
00319         "ldr    r9, [%[b], #12]\n\t"
00320         "umull  r6, r7, r8, r9\n\t"
00321         "adds   r5, r5, r6\n\t"
00322         "adcs   r3, r3, r7\n\t"
00323         "adc    r4, r4, r10\n\t"
00324         "#  A[3] * B[2]\n\t"
00325         "ldr    r8, [%[a], #12]\n\t"
00326         "ldr    r9, [%[b], #8]\n\t"
00327         "umull  r6, r7, r8, r9\n\t"
00328         "adds   r5, r5, r6\n\t"
00329         "adcs   r3, r3, r7\n\t"
00330         "adc    r4, r4, r10\n\t"
00331         "#  A[4] * B[1]\n\t"
00332         "ldr    r8, [%[a], #16]\n\t"
00333         "ldr    r9, [%[b], #4]\n\t"
00334         "umull  r6, r7, r8, r9\n\t"
00335         "adds   r5, r5, r6\n\t"
00336         "adcs   r3, r3, r7\n\t"
00337         "adc    r4, r4, r10\n\t"
00338         "#  A[5] * B[0]\n\t"
00339         "ldr    r8, [%[a], #20]\n\t"
00340         "ldr    r9, [%[b], #0]\n\t"
00341         "umull  r6, r7, r8, r9\n\t"
00342         "adds   r5, r5, r6\n\t"
00343         "adcs   r3, r3, r7\n\t"
00344         "adc    r4, r4, r10\n\t"
00345         "str    r5, [%[tmp], #20]\n\t"
00346         "#  A[0] * B[6]\n\t"
00347         "ldr    r8, [%[a], #0]\n\t"
00348         "ldr    r9, [%[b], #24]\n\t"
00349         "umull  r6, r7, r8, r9\n\t"
00350         "adds   r3, r3, r6\n\t"
00351         "adcs   r4, r4, r7\n\t"
00352         "adc    r5, r10, r10\n\t"
00353         "#  A[1] * B[5]\n\t"
00354         "ldr    r8, [%[a], #4]\n\t"
00355         "ldr    r9, [%[b], #20]\n\t"
00356         "umull  r6, r7, r8, r9\n\t"
00357         "adds   r3, r3, r6\n\t"
00358         "adcs   r4, r4, r7\n\t"
00359         "adc    r5, r5, r10\n\t"
00360         "#  A[2] * B[4]\n\t"
00361         "ldr    r8, [%[a], #8]\n\t"
00362         "ldr    r9, [%[b], #16]\n\t"
00363         "umull  r6, r7, r8, r9\n\t"
00364         "adds   r3, r3, r6\n\t"
00365         "adcs   r4, r4, r7\n\t"
00366         "adc    r5, r5, r10\n\t"
00367         "#  A[3] * B[3]\n\t"
00368         "ldr    r8, [%[a], #12]\n\t"
00369         "ldr    r9, [%[b], #12]\n\t"
00370         "umull  r6, r7, r8, r9\n\t"
00371         "adds   r3, r3, r6\n\t"
00372         "adcs   r4, r4, r7\n\t"
00373         "adc    r5, r5, r10\n\t"
00374         "#  A[4] * B[2]\n\t"
00375         "ldr    r8, [%[a], #16]\n\t"
00376         "ldr    r9, [%[b], #8]\n\t"
00377         "umull  r6, r7, r8, r9\n\t"
00378         "adds   r3, r3, r6\n\t"
00379         "adcs   r4, r4, r7\n\t"
00380         "adc    r5, r5, r10\n\t"
00381         "#  A[5] * B[1]\n\t"
00382         "ldr    r8, [%[a], #20]\n\t"
00383         "ldr    r9, [%[b], #4]\n\t"
00384         "umull  r6, r7, r8, r9\n\t"
00385         "adds   r3, r3, r6\n\t"
00386         "adcs   r4, r4, r7\n\t"
00387         "adc    r5, r5, r10\n\t"
00388         "#  A[6] * B[0]\n\t"
00389         "ldr    r8, [%[a], #24]\n\t"
00390         "ldr    r9, [%[b], #0]\n\t"
00391         "umull  r6, r7, r8, r9\n\t"
00392         "adds   r3, r3, r6\n\t"
00393         "adcs   r4, r4, r7\n\t"
00394         "adc    r5, r5, r10\n\t"
00395         "str    r3, [%[tmp], #24]\n\t"
00396         "#  A[0] * B[7]\n\t"
00397         "ldr    r8, [%[a], #0]\n\t"
00398         "ldr    r9, [%[b], #28]\n\t"
00399         "umull  r6, r7, r8, r9\n\t"
00400         "adds   r4, r4, r6\n\t"
00401         "adcs   r5, r5, r7\n\t"
00402         "adc    r3, r10, r10\n\t"
00403         "#  A[1] * B[6]\n\t"
00404         "ldr    r8, [%[a], #4]\n\t"
00405         "ldr    r9, [%[b], #24]\n\t"
00406         "umull  r6, r7, r8, r9\n\t"
00407         "adds   r4, r4, r6\n\t"
00408         "adcs   r5, r5, r7\n\t"
00409         "adc    r3, r3, r10\n\t"
00410         "#  A[2] * B[5]\n\t"
00411         "ldr    r8, [%[a], #8]\n\t"
00412         "ldr    r9, [%[b], #20]\n\t"
00413         "umull  r6, r7, r8, r9\n\t"
00414         "adds   r4, r4, r6\n\t"
00415         "adcs   r5, r5, r7\n\t"
00416         "adc    r3, r3, r10\n\t"
00417         "#  A[3] * B[4]\n\t"
00418         "ldr    r8, [%[a], #12]\n\t"
00419         "ldr    r9, [%[b], #16]\n\t"
00420         "umull  r6, r7, r8, r9\n\t"
00421         "adds   r4, r4, r6\n\t"
00422         "adcs   r5, r5, r7\n\t"
00423         "adc    r3, r3, r10\n\t"
00424         "#  A[4] * B[3]\n\t"
00425         "ldr    r8, [%[a], #16]\n\t"
00426         "ldr    r9, [%[b], #12]\n\t"
00427         "umull  r6, r7, r8, r9\n\t"
00428         "adds   r4, r4, r6\n\t"
00429         "adcs   r5, r5, r7\n\t"
00430         "adc    r3, r3, r10\n\t"
00431         "#  A[5] * B[2]\n\t"
00432         "ldr    r8, [%[a], #20]\n\t"
00433         "ldr    r9, [%[b], #8]\n\t"
00434         "umull  r6, r7, r8, r9\n\t"
00435         "adds   r4, r4, r6\n\t"
00436         "adcs   r5, r5, r7\n\t"
00437         "adc    r3, r3, r10\n\t"
00438         "#  A[6] * B[1]\n\t"
00439         "ldr    r8, [%[a], #24]\n\t"
00440         "ldr    r9, [%[b], #4]\n\t"
00441         "umull  r6, r7, r8, r9\n\t"
00442         "adds   r4, r4, r6\n\t"
00443         "adcs   r5, r5, r7\n\t"
00444         "adc    r3, r3, r10\n\t"
00445         "#  A[7] * B[0]\n\t"
00446         "ldr    r8, [%[a], #28]\n\t"
00447         "ldr    r9, [%[b], #0]\n\t"
00448         "umull  r6, r7, r8, r9\n\t"
00449         "adds   r4, r4, r6\n\t"
00450         "adcs   r5, r5, r7\n\t"
00451         "adc    r3, r3, r10\n\t"
00452         "str    r4, [%[tmp], #28]\n\t"
00453         "#  A[1] * B[7]\n\t"
00454         "ldr    r8, [%[a], #4]\n\t"
00455         "ldr    r9, [%[b], #28]\n\t"
00456         "umull  r6, r7, r8, r9\n\t"
00457         "adds   r5, r5, r6\n\t"
00458         "adcs   r3, r3, r7\n\t"
00459         "adc    r4, r10, r10\n\t"
00460         "#  A[2] * B[6]\n\t"
00461         "ldr    r8, [%[a], #8]\n\t"
00462         "ldr    r9, [%[b], #24]\n\t"
00463         "umull  r6, r7, r8, r9\n\t"
00464         "adds   r5, r5, r6\n\t"
00465         "adcs   r3, r3, r7\n\t"
00466         "adc    r4, r4, r10\n\t"
00467         "#  A[3] * B[5]\n\t"
00468         "ldr    r8, [%[a], #12]\n\t"
00469         "ldr    r9, [%[b], #20]\n\t"
00470         "umull  r6, r7, r8, r9\n\t"
00471         "adds   r5, r5, r6\n\t"
00472         "adcs   r3, r3, r7\n\t"
00473         "adc    r4, r4, r10\n\t"
00474         "#  A[4] * B[4]\n\t"
00475         "ldr    r8, [%[a], #16]\n\t"
00476         "ldr    r9, [%[b], #16]\n\t"
00477         "umull  r6, r7, r8, r9\n\t"
00478         "adds   r5, r5, r6\n\t"
00479         "adcs   r3, r3, r7\n\t"
00480         "adc    r4, r4, r10\n\t"
00481         "#  A[5] * B[3]\n\t"
00482         "ldr    r8, [%[a], #20]\n\t"
00483         "ldr    r9, [%[b], #12]\n\t"
00484         "umull  r6, r7, r8, r9\n\t"
00485         "adds   r5, r5, r6\n\t"
00486         "adcs   r3, r3, r7\n\t"
00487         "adc    r4, r4, r10\n\t"
00488         "#  A[6] * B[2]\n\t"
00489         "ldr    r8, [%[a], #24]\n\t"
00490         "ldr    r9, [%[b], #8]\n\t"
00491         "umull  r6, r7, r8, r9\n\t"
00492         "adds   r5, r5, r6\n\t"
00493         "adcs   r3, r3, r7\n\t"
00494         "adc    r4, r4, r10\n\t"
00495         "#  A[7] * B[1]\n\t"
00496         "ldr    r8, [%[a], #28]\n\t"
00497         "ldr    r9, [%[b], #4]\n\t"
00498         "umull  r6, r7, r8, r9\n\t"
00499         "adds   r5, r5, r6\n\t"
00500         "adcs   r3, r3, r7\n\t"
00501         "adc    r4, r4, r10\n\t"
00502         "str    r5, [%[r], #32]\n\t"
00503         "#  A[2] * B[7]\n\t"
00504         "ldr    r8, [%[a], #8]\n\t"
00505         "ldr    r9, [%[b], #28]\n\t"
00506         "umull  r6, r7, r8, r9\n\t"
00507         "adds   r3, r3, r6\n\t"
00508         "adcs   r4, r4, r7\n\t"
00509         "adc    r5, r10, r10\n\t"
00510         "#  A[3] * B[6]\n\t"
00511         "ldr    r8, [%[a], #12]\n\t"
00512         "ldr    r9, [%[b], #24]\n\t"
00513         "umull  r6, r7, r8, r9\n\t"
00514         "adds   r3, r3, r6\n\t"
00515         "adcs   r4, r4, r7\n\t"
00516         "adc    r5, r5, r10\n\t"
00517         "#  A[4] * B[5]\n\t"
00518         "ldr    r8, [%[a], #16]\n\t"
00519         "ldr    r9, [%[b], #20]\n\t"
00520         "umull  r6, r7, r8, r9\n\t"
00521         "adds   r3, r3, r6\n\t"
00522         "adcs   r4, r4, r7\n\t"
00523         "adc    r5, r5, r10\n\t"
00524         "#  A[5] * B[4]\n\t"
00525         "ldr    r8, [%[a], #20]\n\t"
00526         "ldr    r9, [%[b], #16]\n\t"
00527         "umull  r6, r7, r8, r9\n\t"
00528         "adds   r3, r3, r6\n\t"
00529         "adcs   r4, r4, r7\n\t"
00530         "adc    r5, r5, r10\n\t"
00531         "#  A[6] * B[3]\n\t"
00532         "ldr    r8, [%[a], #24]\n\t"
00533         "ldr    r9, [%[b], #12]\n\t"
00534         "umull  r6, r7, r8, r9\n\t"
00535         "adds   r3, r3, r6\n\t"
00536         "adcs   r4, r4, r7\n\t"
00537         "adc    r5, r5, r10\n\t"
00538         "#  A[7] * B[2]\n\t"
00539         "ldr    r8, [%[a], #28]\n\t"
00540         "ldr    r9, [%[b], #8]\n\t"
00541         "umull  r6, r7, r8, r9\n\t"
00542         "adds   r3, r3, r6\n\t"
00543         "adcs   r4, r4, r7\n\t"
00544         "adc    r5, r5, r10\n\t"
00545         "str    r3, [%[r], #36]\n\t"
00546         "#  A[3] * B[7]\n\t"
00547         "ldr    r8, [%[a], #12]\n\t"
00548         "ldr    r9, [%[b], #28]\n\t"
00549         "umull  r6, r7, r8, r9\n\t"
00550         "adds   r4, r4, r6\n\t"
00551         "adcs   r5, r5, r7\n\t"
00552         "adc    r3, r10, r10\n\t"
00553         "#  A[4] * B[6]\n\t"
00554         "ldr    r8, [%[a], #16]\n\t"
00555         "ldr    r9, [%[b], #24]\n\t"
00556         "umull  r6, r7, r8, r9\n\t"
00557         "adds   r4, r4, r6\n\t"
00558         "adcs   r5, r5, r7\n\t"
00559         "adc    r3, r3, r10\n\t"
00560         "#  A[5] * B[5]\n\t"
00561         "ldr    r8, [%[a], #20]\n\t"
00562         "ldr    r9, [%[b], #20]\n\t"
00563         "umull  r6, r7, r8, r9\n\t"
00564         "adds   r4, r4, r6\n\t"
00565         "adcs   r5, r5, r7\n\t"
00566         "adc    r3, r3, r10\n\t"
00567         "#  A[6] * B[4]\n\t"
00568         "ldr    r8, [%[a], #24]\n\t"
00569         "ldr    r9, [%[b], #16]\n\t"
00570         "umull  r6, r7, r8, r9\n\t"
00571         "adds   r4, r4, r6\n\t"
00572         "adcs   r5, r5, r7\n\t"
00573         "adc    r3, r3, r10\n\t"
00574         "#  A[7] * B[3]\n\t"
00575         "ldr    r8, [%[a], #28]\n\t"
00576         "ldr    r9, [%[b], #12]\n\t"
00577         "umull  r6, r7, r8, r9\n\t"
00578         "adds   r4, r4, r6\n\t"
00579         "adcs   r5, r5, r7\n\t"
00580         "adc    r3, r3, r10\n\t"
00581         "str    r4, [%[r], #40]\n\t"
00582         "#  A[4] * B[7]\n\t"
00583         "ldr    r8, [%[a], #16]\n\t"
00584         "ldr    r9, [%[b], #28]\n\t"
00585         "umull  r6, r7, r8, r9\n\t"
00586         "adds   r5, r5, r6\n\t"
00587         "adcs   r3, r3, r7\n\t"
00588         "adc    r4, r10, r10\n\t"
00589         "#  A[5] * B[6]\n\t"
00590         "ldr    r8, [%[a], #20]\n\t"
00591         "ldr    r9, [%[b], #24]\n\t"
00592         "umull  r6, r7, r8, r9\n\t"
00593         "adds   r5, r5, r6\n\t"
00594         "adcs   r3, r3, r7\n\t"
00595         "adc    r4, r4, r10\n\t"
00596         "#  A[6] * B[5]\n\t"
00597         "ldr    r8, [%[a], #24]\n\t"
00598         "ldr    r9, [%[b], #20]\n\t"
00599         "umull  r6, r7, r8, r9\n\t"
00600         "adds   r5, r5, r6\n\t"
00601         "adcs   r3, r3, r7\n\t"
00602         "adc    r4, r4, r10\n\t"
00603         "#  A[7] * B[4]\n\t"
00604         "ldr    r8, [%[a], #28]\n\t"
00605         "ldr    r9, [%[b], #16]\n\t"
00606         "umull  r6, r7, r8, r9\n\t"
00607         "adds   r5, r5, r6\n\t"
00608         "adcs   r3, r3, r7\n\t"
00609         "adc    r4, r4, r10\n\t"
00610         "str    r5, [%[r], #44]\n\t"
00611         "#  A[5] * B[7]\n\t"
00612         "ldr    r8, [%[a], #20]\n\t"
00613         "ldr    r9, [%[b], #28]\n\t"
00614         "umull  r6, r7, r8, r9\n\t"
00615         "adds   r3, r3, r6\n\t"
00616         "adcs   r4, r4, r7\n\t"
00617         "adc    r5, r10, r10\n\t"
00618         "#  A[6] * B[6]\n\t"
00619         "ldr    r8, [%[a], #24]\n\t"
00620         "ldr    r9, [%[b], #24]\n\t"
00621         "umull  r6, r7, r8, r9\n\t"
00622         "adds   r3, r3, r6\n\t"
00623         "adcs   r4, r4, r7\n\t"
00624         "adc    r5, r5, r10\n\t"
00625         "#  A[7] * B[5]\n\t"
00626         "ldr    r8, [%[a], #28]\n\t"
00627         "ldr    r9, [%[b], #20]\n\t"
00628         "umull  r6, r7, r8, r9\n\t"
00629         "adds   r3, r3, r6\n\t"
00630         "adcs   r4, r4, r7\n\t"
00631         "adc    r5, r5, r10\n\t"
00632         "str    r3, [%[r], #48]\n\t"
00633         "#  A[6] * B[7]\n\t"
00634         "ldr    r8, [%[a], #24]\n\t"
00635         "ldr    r9, [%[b], #28]\n\t"
00636         "umull  r6, r7, r8, r9\n\t"
00637         "adds   r4, r4, r6\n\t"
00638         "adcs   r5, r5, r7\n\t"
00639         "adc    r3, r10, r10\n\t"
00640         "#  A[7] * B[6]\n\t"
00641         "ldr    r8, [%[a], #28]\n\t"
00642         "ldr    r9, [%[b], #24]\n\t"
00643         "umull  r6, r7, r8, r9\n\t"
00644         "adds   r4, r4, r6\n\t"
00645         "adcs   r5, r5, r7\n\t"
00646         "adc    r3, r3, r10\n\t"
00647         "str    r4, [%[r], #52]\n\t"
00648         "#  A[7] * B[7]\n\t"
00649         "ldr    r8, [%[a], #28]\n\t"
00650         "ldr    r9, [%[b], #28]\n\t"
00651         "umull  r6, r7, r8, r9\n\t"
00652         "adds   r5, r5, r6\n\t"
00653         "adc    r3, r3, r7\n\t"
00654         "str    r5, [%[r], #56]\n\t"
00655         "str    r3, [%[r], #60]\n\t"
00656         :
00657         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
00658         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
00659     );
00660 
00661     XMEMCPY(r, tmp, sizeof(tmp));
00662 }
00663 
00664 /* Square a and put result in r. (r = a * a)
00665  *
00666  * r  A single precision integer.
00667  * a  A single precision integer.
00668  */
00669 static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
00670 {
00671     sp_digit tmp[8];
00672 
00673     __asm__ __volatile__ (
00674         "mov    r14, #0\n\t"
00675         "#  A[0] * A[0]\n\t"
00676         "ldr    r10, [%[a], #0]\n\t"
00677         "umull  r8, r3, r10, r10\n\t"
00678         "mov    r4, #0\n\t"
00679         "str    r8, [%[tmp]]\n\t"
00680         "#  A[0] * A[1]\n\t"
00681         "ldr    r10, [%[a], #4]\n\t"
00682         "ldr    r8, [%[a], #0]\n\t"
00683         "umull  r8, r9, r10, r8\n\t"
00684         "adds   r3, r3, r8\n\t"
00685         "adcs   r4, r4, r9\n\t"
00686         "adc    r2, r14, r14\n\t"
00687         "adds   r3, r3, r8\n\t"
00688         "adcs   r4, r4, r9\n\t"
00689         "adc    r2, r2, r14\n\t"
00690         "str    r3, [%[tmp], #4]\n\t"
00691         "#  A[0] * A[2]\n\t"
00692         "ldr    r10, [%[a], #8]\n\t"
00693         "ldr    r8, [%[a], #0]\n\t"
00694         "umull  r8, r9, r10, r8\n\t"
00695         "adds   r4, r4, r8\n\t"
00696         "adcs   r2, r2, r9\n\t"
00697         "adc    r3, r14, r14\n\t"
00698         "adds   r4, r4, r8\n\t"
00699         "adcs   r2, r2, r9\n\t"
00700         "adc    r3, r3, r14\n\t"
00701         "#  A[1] * A[1]\n\t"
00702         "ldr    r10, [%[a], #4]\n\t"
00703         "umull  r8, r9, r10, r10\n\t"
00704         "adds   r4, r4, r8\n\t"
00705         "adcs   r2, r2, r9\n\t"
00706         "adc    r3, r3, r14\n\t"
00707         "str    r4, [%[tmp], #8]\n\t"
00708         "#  A[0] * A[3]\n\t"
00709         "ldr    r10, [%[a], #12]\n\t"
00710         "ldr    r8, [%[a], #0]\n\t"
00711         "umull  r8, r9, r10, r8\n\t"
00712         "adds   r2, r2, r8\n\t"
00713         "adcs   r3, r3, r9\n\t"
00714         "adc    r4, r14, r14\n\t"
00715         "adds   r2, r2, r8\n\t"
00716         "adcs   r3, r3, r9\n\t"
00717         "adc    r4, r4, r14\n\t"
00718         "#  A[1] * A[2]\n\t"
00719         "ldr    r10, [%[a], #8]\n\t"
00720         "ldr    r8, [%[a], #4]\n\t"
00721         "umull  r8, r9, r10, r8\n\t"
00722         "adds   r2, r2, r8\n\t"
00723         "adcs   r3, r3, r9\n\t"
00724         "adc    r4, r4, r14\n\t"
00725         "adds   r2, r2, r8\n\t"
00726         "adcs   r3, r3, r9\n\t"
00727         "adc    r4, r4, r14\n\t"
00728         "str    r2, [%[tmp], #12]\n\t"
00729         "#  A[0] * A[4]\n\t"
00730         "ldr    r10, [%[a], #16]\n\t"
00731         "ldr    r8, [%[a], #0]\n\t"
00732         "umull  r8, r9, r10, r8\n\t"
00733         "adds   r3, r3, r8\n\t"
00734         "adcs   r4, r4, r9\n\t"
00735         "adc    r2, r14, r14\n\t"
00736         "adds   r3, r3, r8\n\t"
00737         "adcs   r4, r4, r9\n\t"
00738         "adc    r2, r2, r14\n\t"
00739         "#  A[1] * A[3]\n\t"
00740         "ldr    r10, [%[a], #12]\n\t"
00741         "ldr    r8, [%[a], #4]\n\t"
00742         "umull  r8, r9, r10, r8\n\t"
00743         "adds   r3, r3, r8\n\t"
00744         "adcs   r4, r4, r9\n\t"
00745         "adc    r2, r2, r14\n\t"
00746         "adds   r3, r3, r8\n\t"
00747         "adcs   r4, r4, r9\n\t"
00748         "adc    r2, r2, r14\n\t"
00749         "#  A[2] * A[2]\n\t"
00750         "ldr    r10, [%[a], #8]\n\t"
00751         "umull  r8, r9, r10, r10\n\t"
00752         "adds   r3, r3, r8\n\t"
00753         "adcs   r4, r4, r9\n\t"
00754         "adc    r2, r2, r14\n\t"
00755         "str    r3, [%[tmp], #16]\n\t"
00756         "#  A[0] * A[5]\n\t"
00757         "ldr    r10, [%[a], #20]\n\t"
00758         "ldr    r8, [%[a], #0]\n\t"
00759         "umull  r5, r6, r10, r8\n\t"
00760         "mov    r3, #0\n\t"
00761         "mov    r7, #0\n\t"
00762         "#  A[1] * A[4]\n\t"
00763         "ldr    r10, [%[a], #16]\n\t"
00764         "ldr    r8, [%[a], #4]\n\t"
00765         "umull  r8, r9, r10, r8\n\t"
00766         "adds   r5, r5, r8\n\t"
00767         "adcs   r6, r6, r9\n\t"
00768         "adc    r7, r7, r14\n\t"
00769         "#  A[2] * A[3]\n\t"
00770         "ldr    r10, [%[a], #12]\n\t"
00771         "ldr    r8, [%[a], #8]\n\t"
00772         "umull  r8, r9, r10, r8\n\t"
00773         "adds   r5, r5, r8\n\t"
00774         "adcs   r6, r6, r9\n\t"
00775         "adc    r7, r7, r14\n\t"
00776         "adds   r5, r5, r5\n\t"
00777         "adcs   r6, r6, r6\n\t"
00778         "adc    r7, r7, r7\n\t"
00779         "adds   r4, r4, r5\n\t"
00780         "adcs   r2, r2, r6\n\t"
00781         "adc    r3, r3, r7\n\t"
00782         "str    r4, [%[tmp], #20]\n\t"
00783         "#  A[0] * A[6]\n\t"
00784         "ldr    r10, [%[a], #24]\n\t"
00785         "ldr    r8, [%[a], #0]\n\t"
00786         "umull  r5, r6, r10, r8\n\t"
00787         "mov    r4, #0\n\t"
00788         "mov    r7, #0\n\t"
00789         "#  A[1] * A[5]\n\t"
00790         "ldr    r10, [%[a], #20]\n\t"
00791         "ldr    r8, [%[a], #4]\n\t"
00792         "umull  r8, r9, r10, r8\n\t"
00793         "adds   r5, r5, r8\n\t"
00794         "adcs   r6, r6, r9\n\t"
00795         "adc    r7, r7, r14\n\t"
00796         "#  A[2] * A[4]\n\t"
00797         "ldr    r10, [%[a], #16]\n\t"
00798         "ldr    r8, [%[a], #8]\n\t"
00799         "umull  r8, r9, r10, r8\n\t"
00800         "adds   r5, r5, r8\n\t"
00801         "adcs   r6, r6, r9\n\t"
00802         "adc    r7, r7, r14\n\t"
00803         "#  A[3] * A[3]\n\t"
00804         "ldr    r10, [%[a], #12]\n\t"
00805         "umull  r8, r9, r10, r10\n\t"
00806         "adds   r5, r5, r5\n\t"
00807         "adcs   r6, r6, r6\n\t"
00808         "adc    r7, r7, r7\n\t"
00809         "adds   r5, r5, r8\n\t"
00810         "adcs   r6, r6, r9\n\t"
00811         "adc    r7, r7, r14\n\t"
00812         "adds   r2, r2, r5\n\t"
00813         "adcs   r3, r3, r6\n\t"
00814         "adc    r4, r4, r7\n\t"
00815         "str    r2, [%[tmp], #24]\n\t"
00816         "#  A[0] * A[7]\n\t"
00817         "ldr    r10, [%[a], #28]\n\t"
00818         "ldr    r8, [%[a], #0]\n\t"
00819         "umull  r5, r6, r10, r8\n\t"
00820         "mov    r2, #0\n\t"
00821         "mov    r7, #0\n\t"
00822         "#  A[1] * A[6]\n\t"
00823         "ldr    r10, [%[a], #24]\n\t"
00824         "ldr    r8, [%[a], #4]\n\t"
00825         "umull  r8, r9, r10, r8\n\t"
00826         "adds   r5, r5, r8\n\t"
00827         "adcs   r6, r6, r9\n\t"
00828         "adc    r7, r7, r14\n\t"
00829         "#  A[2] * A[5]\n\t"
00830         "ldr    r10, [%[a], #20]\n\t"
00831         "ldr    r8, [%[a], #8]\n\t"
00832         "umull  r8, r9, r10, r8\n\t"
00833         "adds   r5, r5, r8\n\t"
00834         "adcs   r6, r6, r9\n\t"
00835         "adc    r7, r7, r14\n\t"
00836         "#  A[3] * A[4]\n\t"
00837         "ldr    r10, [%[a], #16]\n\t"
00838         "ldr    r8, [%[a], #12]\n\t"
00839         "umull  r8, r9, r10, r8\n\t"
00840         "adds   r5, r5, r8\n\t"
00841         "adcs   r6, r6, r9\n\t"
00842         "adc    r7, r7, r14\n\t"
00843         "adds   r5, r5, r5\n\t"
00844         "adcs   r6, r6, r6\n\t"
00845         "adc    r7, r7, r7\n\t"
00846         "adds   r3, r3, r5\n\t"
00847         "adcs   r4, r4, r6\n\t"
00848         "adc    r2, r2, r7\n\t"
00849         "str    r3, [%[tmp], #28]\n\t"
00850         "#  A[1] * A[7]\n\t"
00851         "ldr    r10, [%[a], #28]\n\t"
00852         "ldr    r8, [%[a], #4]\n\t"
00853         "umull  r5, r6, r10, r8\n\t"
00854         "mov    r3, #0\n\t"
00855         "mov    r7, #0\n\t"
00856         "#  A[2] * A[6]\n\t"
00857         "ldr    r10, [%[a], #24]\n\t"
00858         "ldr    r8, [%[a], #8]\n\t"
00859         "umull  r8, r9, r10, r8\n\t"
00860         "adds   r5, r5, r8\n\t"
00861         "adcs   r6, r6, r9\n\t"
00862         "adc    r7, r7, r14\n\t"
00863         "#  A[3] * A[5]\n\t"
00864         "ldr    r10, [%[a], #20]\n\t"
00865         "ldr    r8, [%[a], #12]\n\t"
00866         "umull  r8, r9, r10, r8\n\t"
00867         "adds   r5, r5, r8\n\t"
00868         "adcs   r6, r6, r9\n\t"
00869         "adc    r7, r7, r14\n\t"
00870         "#  A[4] * A[4]\n\t"
00871         "ldr    r10, [%[a], #16]\n\t"
00872         "umull  r8, r9, r10, r10\n\t"
00873         "adds   r5, r5, r5\n\t"
00874         "adcs   r6, r6, r6\n\t"
00875         "adc    r7, r7, r7\n\t"
00876         "adds   r5, r5, r8\n\t"
00877         "adcs   r6, r6, r9\n\t"
00878         "adc    r7, r7, r14\n\t"
00879         "adds   r4, r4, r5\n\t"
00880         "adcs   r2, r2, r6\n\t"
00881         "adc    r3, r3, r7\n\t"
00882         "str    r4, [%[r], #32]\n\t"
00883         "#  A[2] * A[7]\n\t"
00884         "ldr    r10, [%[a], #28]\n\t"
00885         "ldr    r8, [%[a], #8]\n\t"
00886         "umull  r5, r6, r10, r8\n\t"
00887         "mov    r4, #0\n\t"
00888         "mov    r7, #0\n\t"
00889         "#  A[3] * A[6]\n\t"
00890         "ldr    r10, [%[a], #24]\n\t"
00891         "ldr    r8, [%[a], #12]\n\t"
00892         "umull  r8, r9, r10, r8\n\t"
00893         "adds   r5, r5, r8\n\t"
00894         "adcs   r6, r6, r9\n\t"
00895         "adc    r7, r7, r14\n\t"
00896         "#  A[4] * A[5]\n\t"
00897         "ldr    r10, [%[a], #20]\n\t"
00898         "ldr    r8, [%[a], #16]\n\t"
00899         "umull  r8, r9, r10, r8\n\t"
00900         "adds   r5, r5, r8\n\t"
00901         "adcs   r6, r6, r9\n\t"
00902         "adc    r7, r7, r14\n\t"
00903         "adds   r5, r5, r5\n\t"
00904         "adcs   r6, r6, r6\n\t"
00905         "adc    r7, r7, r7\n\t"
00906         "adds   r2, r2, r5\n\t"
00907         "adcs   r3, r3, r6\n\t"
00908         "adc    r4, r4, r7\n\t"
00909         "str    r2, [%[r], #36]\n\t"
00910         "#  A[3] * A[7]\n\t"
00911         "ldr    r10, [%[a], #28]\n\t"
00912         "ldr    r8, [%[a], #12]\n\t"
00913         "umull  r8, r9, r10, r8\n\t"
00914         "adds   r3, r3, r8\n\t"
00915         "adcs   r4, r4, r9\n\t"
00916         "adc    r2, r14, r14\n\t"
00917         "adds   r3, r3, r8\n\t"
00918         "adcs   r4, r4, r9\n\t"
00919         "adc    r2, r2, r14\n\t"
00920         "#  A[4] * A[6]\n\t"
00921         "ldr    r10, [%[a], #24]\n\t"
00922         "ldr    r8, [%[a], #16]\n\t"
00923         "umull  r8, r9, r10, r8\n\t"
00924         "adds   r3, r3, r8\n\t"
00925         "adcs   r4, r4, r9\n\t"
00926         "adc    r2, r2, r14\n\t"
00927         "adds   r3, r3, r8\n\t"
00928         "adcs   r4, r4, r9\n\t"
00929         "adc    r2, r2, r14\n\t"
00930         "#  A[5] * A[5]\n\t"
00931         "ldr    r10, [%[a], #20]\n\t"
00932         "umull  r8, r9, r10, r10\n\t"
00933         "adds   r3, r3, r8\n\t"
00934         "adcs   r4, r4, r9\n\t"
00935         "adc    r2, r2, r14\n\t"
00936         "str    r3, [%[r], #40]\n\t"
00937         "#  A[4] * A[7]\n\t"
00938         "ldr    r10, [%[a], #28]\n\t"
00939         "ldr    r8, [%[a], #16]\n\t"
00940         "umull  r8, r9, r10, r8\n\t"
00941         "adds   r4, r4, r8\n\t"
00942         "adcs   r2, r2, r9\n\t"
00943         "adc    r3, r14, r14\n\t"
00944         "adds   r4, r4, r8\n\t"
00945         "adcs   r2, r2, r9\n\t"
00946         "adc    r3, r3, r14\n\t"
00947         "#  A[5] * A[6]\n\t"
00948         "ldr    r10, [%[a], #24]\n\t"
00949         "ldr    r8, [%[a], #20]\n\t"
00950         "umull  r8, r9, r10, r8\n\t"
00951         "adds   r4, r4, r8\n\t"
00952         "adcs   r2, r2, r9\n\t"
00953         "adc    r3, r3, r14\n\t"
00954         "adds   r4, r4, r8\n\t"
00955         "adcs   r2, r2, r9\n\t"
00956         "adc    r3, r3, r14\n\t"
00957         "str    r4, [%[r], #44]\n\t"
00958         "#  A[5] * A[7]\n\t"
00959         "ldr    r10, [%[a], #28]\n\t"
00960         "ldr    r8, [%[a], #20]\n\t"
00961         "umull  r8, r9, r10, r8\n\t"
00962         "adds   r2, r2, r8\n\t"
00963         "adcs   r3, r3, r9\n\t"
00964         "adc    r4, r14, r14\n\t"
00965         "adds   r2, r2, r8\n\t"
00966         "adcs   r3, r3, r9\n\t"
00967         "adc    r4, r4, r14\n\t"
00968         "#  A[6] * A[6]\n\t"
00969         "ldr    r10, [%[a], #24]\n\t"
00970         "umull  r8, r9, r10, r10\n\t"
00971         "adds   r2, r2, r8\n\t"
00972         "adcs   r3, r3, r9\n\t"
00973         "adc    r4, r4, r14\n\t"
00974         "str    r2, [%[r], #48]\n\t"
00975         "#  A[6] * A[7]\n\t"
00976         "ldr    r10, [%[a], #28]\n\t"
00977         "ldr    r8, [%[a], #24]\n\t"
00978         "umull  r8, r9, r10, r8\n\t"
00979         "adds   r3, r3, r8\n\t"
00980         "adcs   r4, r4, r9\n\t"
00981         "adc    r2, r14, r14\n\t"
00982         "adds   r3, r3, r8\n\t"
00983         "adcs   r4, r4, r9\n\t"
00984         "adc    r2, r2, r14\n\t"
00985         "str    r3, [%[r], #52]\n\t"
00986         "#  A[7] * A[7]\n\t"
00987         "ldr    r10, [%[a], #28]\n\t"
00988         "umull  r8, r9, r10, r10\n\t"
00989         "adds   r4, r4, r8\n\t"
00990         "adc    r2, r2, r9\n\t"
00991         "str    r4, [%[r], #56]\n\t"
00992         "str    r2, [%[r], #60]\n\t"
00993         :
00994         : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
00995         : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
00996     );
00997 
00998     XMEMCPY(r, tmp, sizeof(tmp));
00999 }
01000 
01001 /* Add b to a into r. (r = a + b)
01002  *
01003  * r  A single precision integer.
01004  * a  A single precision integer.
01005  * b  A single precision integer.
01006  */
01007 static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
01008         const sp_digit* b)
01009 {
01010     sp_digit c = 0;
01011 
01012     __asm__ __volatile__ (
01013         "mov    r12, #0\n\t"
01014         "ldr    r4, [%[a], #0]\n\t"
01015         "ldr    r5, [%[a], #4]\n\t"
01016         "ldr    r6, [%[a], #8]\n\t"
01017         "ldr    r7, [%[a], #12]\n\t"
01018         "ldr    r8, [%[b], #0]\n\t"
01019         "ldr    r9, [%[b], #4]\n\t"
01020         "ldr    r10, [%[b], #8]\n\t"
01021         "ldr    r14, [%[b], #12]\n\t"
01022         "adds   r4, r4, r8\n\t"
01023         "adcs   r5, r5, r9\n\t"
01024         "adcs   r6, r6, r10\n\t"
01025         "adcs   r7, r7, r14\n\t"
01026         "str    r4, [%[r], #0]\n\t"
01027         "str    r5, [%[r], #4]\n\t"
01028         "str    r6, [%[r], #8]\n\t"
01029         "str    r7, [%[r], #12]\n\t"
01030         "ldr    r4, [%[a], #16]\n\t"
01031         "ldr    r5, [%[a], #20]\n\t"
01032         "ldr    r6, [%[a], #24]\n\t"
01033         "ldr    r7, [%[a], #28]\n\t"
01034         "ldr    r8, [%[b], #16]\n\t"
01035         "ldr    r9, [%[b], #20]\n\t"
01036         "ldr    r10, [%[b], #24]\n\t"
01037         "ldr    r14, [%[b], #28]\n\t"
01038         "adcs   r4, r4, r8\n\t"
01039         "adcs   r5, r5, r9\n\t"
01040         "adcs   r6, r6, r10\n\t"
01041         "adcs   r7, r7, r14\n\t"
01042         "str    r4, [%[r], #16]\n\t"
01043         "str    r5, [%[r], #20]\n\t"
01044         "str    r6, [%[r], #24]\n\t"
01045         "str    r7, [%[r], #28]\n\t"
01046         "adc    %[c], r12, r12\n\t"
01047         : [c] "+r" (c)
01048         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
01049         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
01050     );
01051 
01052     return c;
01053 }
01054 
01055 /* Sub b from a into a. (a -= b)
01056  *
01057  * a  A single precision integer and result.
01058  * b  A single precision integer.
01059  */
01060 static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b)
01061 {
01062     sp_digit c = 0;
01063 
01064     __asm__ __volatile__ (
01065         "ldr    r2, [%[a], #0]\n\t"
01066         "ldr    r3, [%[a], #4]\n\t"
01067         "ldr    r4, [%[a], #8]\n\t"
01068         "ldr    r5, [%[a], #12]\n\t"
01069         "ldr    r6, [%[b], #0]\n\t"
01070         "ldr    r7, [%[b], #4]\n\t"
01071         "ldr    r8, [%[b], #8]\n\t"
01072         "ldr    r9, [%[b], #12]\n\t"
01073         "subs   r2, r2, r6\n\t"
01074         "sbcs   r3, r3, r7\n\t"
01075         "sbcs   r4, r4, r8\n\t"
01076         "sbcs   r5, r5, r9\n\t"
01077         "str    r2, [%[a], #0]\n\t"
01078         "str    r3, [%[a], #4]\n\t"
01079         "str    r4, [%[a], #8]\n\t"
01080         "str    r5, [%[a], #12]\n\t"
01081         "ldr    r2, [%[a], #16]\n\t"
01082         "ldr    r3, [%[a], #20]\n\t"
01083         "ldr    r4, [%[a], #24]\n\t"
01084         "ldr    r5, [%[a], #28]\n\t"
01085         "ldr    r6, [%[b], #16]\n\t"
01086         "ldr    r7, [%[b], #20]\n\t"
01087         "ldr    r8, [%[b], #24]\n\t"
01088         "ldr    r9, [%[b], #28]\n\t"
01089         "sbcs   r2, r2, r6\n\t"
01090         "sbcs   r3, r3, r7\n\t"
01091         "sbcs   r4, r4, r8\n\t"
01092         "sbcs   r5, r5, r9\n\t"
01093         "str    r2, [%[a], #16]\n\t"
01094         "str    r3, [%[a], #20]\n\t"
01095         "str    r4, [%[a], #24]\n\t"
01096         "str    r5, [%[a], #28]\n\t"
01097         "ldr    r2, [%[a], #32]\n\t"
01098         "ldr    r3, [%[a], #36]\n\t"
01099         "ldr    r4, [%[a], #40]\n\t"
01100         "ldr    r5, [%[a], #44]\n\t"
01101         "ldr    r6, [%[b], #32]\n\t"
01102         "ldr    r7, [%[b], #36]\n\t"
01103         "ldr    r8, [%[b], #40]\n\t"
01104         "ldr    r9, [%[b], #44]\n\t"
01105         "sbcs   r2, r2, r6\n\t"
01106         "sbcs   r3, r3, r7\n\t"
01107         "sbcs   r4, r4, r8\n\t"
01108         "sbcs   r5, r5, r9\n\t"
01109         "str    r2, [%[a], #32]\n\t"
01110         "str    r3, [%[a], #36]\n\t"
01111         "str    r4, [%[a], #40]\n\t"
01112         "str    r5, [%[a], #44]\n\t"
01113         "ldr    r2, [%[a], #48]\n\t"
01114         "ldr    r3, [%[a], #52]\n\t"
01115         "ldr    r4, [%[a], #56]\n\t"
01116         "ldr    r5, [%[a], #60]\n\t"
01117         "ldr    r6, [%[b], #48]\n\t"
01118         "ldr    r7, [%[b], #52]\n\t"
01119         "ldr    r8, [%[b], #56]\n\t"
01120         "ldr    r9, [%[b], #60]\n\t"
01121         "sbcs   r2, r2, r6\n\t"
01122         "sbcs   r3, r3, r7\n\t"
01123         "sbcs   r4, r4, r8\n\t"
01124         "sbcs   r5, r5, r9\n\t"
01125         "str    r2, [%[a], #48]\n\t"
01126         "str    r3, [%[a], #52]\n\t"
01127         "str    r4, [%[a], #56]\n\t"
01128         "str    r5, [%[a], #60]\n\t"
01129         "sbc    %[c], r9, r9\n\t"
01130         : [c] "+r" (c)
01131         : [a] "r" (a), [b] "r" (b)
01132         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
01133     );
01134 
01135     return c;
01136 }
01137 
01138 /* Add b to a into r. (r = a + b)
01139  *
01140  * r  A single precision integer.
01141  * a  A single precision integer.
01142  * b  A single precision integer.
01143  */
01144 static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
01145         const sp_digit* b)
01146 {
01147     sp_digit c = 0;
01148 
01149     __asm__ __volatile__ (
01150         "mov    r12, #0\n\t"
01151         "ldr    r4, [%[a], #0]\n\t"
01152         "ldr    r5, [%[a], #4]\n\t"
01153         "ldr    r6, [%[a], #8]\n\t"
01154         "ldr    r7, [%[a], #12]\n\t"
01155         "ldr    r8, [%[b], #0]\n\t"
01156         "ldr    r9, [%[b], #4]\n\t"
01157         "ldr    r10, [%[b], #8]\n\t"
01158         "ldr    r14, [%[b], #12]\n\t"
01159         "adds   r4, r4, r8\n\t"
01160         "adcs   r5, r5, r9\n\t"
01161         "adcs   r6, r6, r10\n\t"
01162         "adcs   r7, r7, r14\n\t"
01163         "str    r4, [%[r], #0]\n\t"
01164         "str    r5, [%[r], #4]\n\t"
01165         "str    r6, [%[r], #8]\n\t"
01166         "str    r7, [%[r], #12]\n\t"
01167         "ldr    r4, [%[a], #16]\n\t"
01168         "ldr    r5, [%[a], #20]\n\t"
01169         "ldr    r6, [%[a], #24]\n\t"
01170         "ldr    r7, [%[a], #28]\n\t"
01171         "ldr    r8, [%[b], #16]\n\t"
01172         "ldr    r9, [%[b], #20]\n\t"
01173         "ldr    r10, [%[b], #24]\n\t"
01174         "ldr    r14, [%[b], #28]\n\t"
01175         "adcs   r4, r4, r8\n\t"
01176         "adcs   r5, r5, r9\n\t"
01177         "adcs   r6, r6, r10\n\t"
01178         "adcs   r7, r7, r14\n\t"
01179         "str    r4, [%[r], #16]\n\t"
01180         "str    r5, [%[r], #20]\n\t"
01181         "str    r6, [%[r], #24]\n\t"
01182         "str    r7, [%[r], #28]\n\t"
01183         "ldr    r4, [%[a], #32]\n\t"
01184         "ldr    r5, [%[a], #36]\n\t"
01185         "ldr    r6, [%[a], #40]\n\t"
01186         "ldr    r7, [%[a], #44]\n\t"
01187         "ldr    r8, [%[b], #32]\n\t"
01188         "ldr    r9, [%[b], #36]\n\t"
01189         "ldr    r10, [%[b], #40]\n\t"
01190         "ldr    r14, [%[b], #44]\n\t"
01191         "adcs   r4, r4, r8\n\t"
01192         "adcs   r5, r5, r9\n\t"
01193         "adcs   r6, r6, r10\n\t"
01194         "adcs   r7, r7, r14\n\t"
01195         "str    r4, [%[r], #32]\n\t"
01196         "str    r5, [%[r], #36]\n\t"
01197         "str    r6, [%[r], #40]\n\t"
01198         "str    r7, [%[r], #44]\n\t"
01199         "ldr    r4, [%[a], #48]\n\t"
01200         "ldr    r5, [%[a], #52]\n\t"
01201         "ldr    r6, [%[a], #56]\n\t"
01202         "ldr    r7, [%[a], #60]\n\t"
01203         "ldr    r8, [%[b], #48]\n\t"
01204         "ldr    r9, [%[b], #52]\n\t"
01205         "ldr    r10, [%[b], #56]\n\t"
01206         "ldr    r14, [%[b], #60]\n\t"
01207         "adcs   r4, r4, r8\n\t"
01208         "adcs   r5, r5, r9\n\t"
01209         "adcs   r6, r6, r10\n\t"
01210         "adcs   r7, r7, r14\n\t"
01211         "str    r4, [%[r], #48]\n\t"
01212         "str    r5, [%[r], #52]\n\t"
01213         "str    r6, [%[r], #56]\n\t"
01214         "str    r7, [%[r], #60]\n\t"
01215         "adc    %[c], r12, r12\n\t"
01216         : [c] "+r" (c)
01217         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
01218         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
01219     );
01220 
01221     return c;
01222 }
01223 
01224 /* AND m into each word of a and store in r.
01225  *
01226  * r  A single precision integer.
01227  * a  A single precision integer.
01228  * m  Mask to AND against each digit.
01229  */
01230 static void sp_2048_mask_8(sp_digit* r, sp_digit* a, sp_digit m)
01231 {
01232 #ifdef WOLFSSL_SP_SMALL
01233     int i;
01234 
01235     for (i=0; i<8; i++)
01236         r[i] = a[i] & m;
01237 #else
01238     r[0] = a[0] & m;
01239     r[1] = a[1] & m;
01240     r[2] = a[2] & m;
01241     r[3] = a[3] & m;
01242     r[4] = a[4] & m;
01243     r[5] = a[5] & m;
01244     r[6] = a[6] & m;
01245     r[7] = a[7] & m;
01246 #endif
01247 }
01248 
01249 /* Multiply a and b into r. (r = a * b)
01250  *
01251  * r  A single precision integer.
01252  * a  A single precision integer.
01253  * b  A single precision integer.
01254  */
01255 static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
01256         const sp_digit* b)
01257 {
01258     sp_digit* z0 = r;
01259     sp_digit z1[16];
01260     sp_digit a1[8];
01261     sp_digit b1[8];
01262     sp_digit z2[16];
01263     sp_digit u, ca, cb;
01264 
01265     ca = sp_2048_add_8(a1, a, &a[8]);
01266     cb = sp_2048_add_8(b1, b, &b[8]);
01267     u  = ca & cb;
01268     sp_2048_mul_8(z1, a1, b1);
01269     sp_2048_mul_8(z2, &a[8], &b[8]);
01270     sp_2048_mul_8(z0, a, b);
01271     sp_2048_mask_8(r + 16, a1, 0 - cb);
01272     sp_2048_mask_8(b1, b1, 0 - ca);
01273     u += sp_2048_add_8(r + 16, r + 16, b1);
01274     u += sp_2048_sub_in_place_16(z1, z2);
01275     u += sp_2048_sub_in_place_16(z1, z0);
01276     u += sp_2048_add_16(r + 8, r + 8, z1);
01277     r[24] = u;
01278     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
01279     sp_2048_add_16(r + 16, r + 16, z2);
01280 }
01281 
01282 /* Square a and put result in r. (r = a * a)
01283  *
01284  * r  A single precision integer.
01285  * a  A single precision integer.
01286  */
01287 static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
01288 {
01289     sp_digit* z0 = r;
01290     sp_digit z2[16];
01291     sp_digit z1[16];
01292     sp_digit a1[8];
01293     sp_digit u;
01294 
01295     u = sp_2048_add_8(a1, a, &a[8]);
01296     sp_2048_sqr_8(z1, a1);
01297     sp_2048_sqr_8(z2, &a[8]);
01298     sp_2048_sqr_8(z0, a);
01299     sp_2048_mask_8(r + 16, a1, 0 - u);
01300     u += sp_2048_add_8(r + 16, r + 16, r + 16);
01301     u += sp_2048_sub_in_place_16(z1, z2);
01302     u += sp_2048_sub_in_place_16(z1, z0);
01303     u += sp_2048_add_16(r + 8, r + 8, z1);
01304     r[24] = u;
01305     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
01306     sp_2048_add_16(r + 16, r + 16, z2);
01307 }
01308 
01309 /* Sub b from a into a. (a -= b)
01310  *
01311  * a  A single precision integer and result.
01312  * b  A single precision integer.
01313  */
01314 static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
01315 {
01316     sp_digit c = 0;
01317 
01318     __asm__ __volatile__ (
01319         "ldr    r2, [%[a], #0]\n\t"
01320         "ldr    r3, [%[a], #4]\n\t"
01321         "ldr    r4, [%[a], #8]\n\t"
01322         "ldr    r5, [%[a], #12]\n\t"
01323         "ldr    r6, [%[b], #0]\n\t"
01324         "ldr    r7, [%[b], #4]\n\t"
01325         "ldr    r8, [%[b], #8]\n\t"
01326         "ldr    r9, [%[b], #12]\n\t"
01327         "subs   r2, r2, r6\n\t"
01328         "sbcs   r3, r3, r7\n\t"
01329         "sbcs   r4, r4, r8\n\t"
01330         "sbcs   r5, r5, r9\n\t"
01331         "str    r2, [%[a], #0]\n\t"
01332         "str    r3, [%[a], #4]\n\t"
01333         "str    r4, [%[a], #8]\n\t"
01334         "str    r5, [%[a], #12]\n\t"
01335         "ldr    r2, [%[a], #16]\n\t"
01336         "ldr    r3, [%[a], #20]\n\t"
01337         "ldr    r4, [%[a], #24]\n\t"
01338         "ldr    r5, [%[a], #28]\n\t"
01339         "ldr    r6, [%[b], #16]\n\t"
01340         "ldr    r7, [%[b], #20]\n\t"
01341         "ldr    r8, [%[b], #24]\n\t"
01342         "ldr    r9, [%[b], #28]\n\t"
01343         "sbcs   r2, r2, r6\n\t"
01344         "sbcs   r3, r3, r7\n\t"
01345         "sbcs   r4, r4, r8\n\t"
01346         "sbcs   r5, r5, r9\n\t"
01347         "str    r2, [%[a], #16]\n\t"
01348         "str    r3, [%[a], #20]\n\t"
01349         "str    r4, [%[a], #24]\n\t"
01350         "str    r5, [%[a], #28]\n\t"
01351         "ldr    r2, [%[a], #32]\n\t"
01352         "ldr    r3, [%[a], #36]\n\t"
01353         "ldr    r4, [%[a], #40]\n\t"
01354         "ldr    r5, [%[a], #44]\n\t"
01355         "ldr    r6, [%[b], #32]\n\t"
01356         "ldr    r7, [%[b], #36]\n\t"
01357         "ldr    r8, [%[b], #40]\n\t"
01358         "ldr    r9, [%[b], #44]\n\t"
01359         "sbcs   r2, r2, r6\n\t"
01360         "sbcs   r3, r3, r7\n\t"
01361         "sbcs   r4, r4, r8\n\t"
01362         "sbcs   r5, r5, r9\n\t"
01363         "str    r2, [%[a], #32]\n\t"
01364         "str    r3, [%[a], #36]\n\t"
01365         "str    r4, [%[a], #40]\n\t"
01366         "str    r5, [%[a], #44]\n\t"
01367         "ldr    r2, [%[a], #48]\n\t"
01368         "ldr    r3, [%[a], #52]\n\t"
01369         "ldr    r4, [%[a], #56]\n\t"
01370         "ldr    r5, [%[a], #60]\n\t"
01371         "ldr    r6, [%[b], #48]\n\t"
01372         "ldr    r7, [%[b], #52]\n\t"
01373         "ldr    r8, [%[b], #56]\n\t"
01374         "ldr    r9, [%[b], #60]\n\t"
01375         "sbcs   r2, r2, r6\n\t"
01376         "sbcs   r3, r3, r7\n\t"
01377         "sbcs   r4, r4, r8\n\t"
01378         "sbcs   r5, r5, r9\n\t"
01379         "str    r2, [%[a], #48]\n\t"
01380         "str    r3, [%[a], #52]\n\t"
01381         "str    r4, [%[a], #56]\n\t"
01382         "str    r5, [%[a], #60]\n\t"
01383         "ldr    r2, [%[a], #64]\n\t"
01384         "ldr    r3, [%[a], #68]\n\t"
01385         "ldr    r4, [%[a], #72]\n\t"
01386         "ldr    r5, [%[a], #76]\n\t"
01387         "ldr    r6, [%[b], #64]\n\t"
01388         "ldr    r7, [%[b], #68]\n\t"
01389         "ldr    r8, [%[b], #72]\n\t"
01390         "ldr    r9, [%[b], #76]\n\t"
01391         "sbcs   r2, r2, r6\n\t"
01392         "sbcs   r3, r3, r7\n\t"
01393         "sbcs   r4, r4, r8\n\t"
01394         "sbcs   r5, r5, r9\n\t"
01395         "str    r2, [%[a], #64]\n\t"
01396         "str    r3, [%[a], #68]\n\t"
01397         "str    r4, [%[a], #72]\n\t"
01398         "str    r5, [%[a], #76]\n\t"
01399         "ldr    r2, [%[a], #80]\n\t"
01400         "ldr    r3, [%[a], #84]\n\t"
01401         "ldr    r4, [%[a], #88]\n\t"
01402         "ldr    r5, [%[a], #92]\n\t"
01403         "ldr    r6, [%[b], #80]\n\t"
01404         "ldr    r7, [%[b], #84]\n\t"
01405         "ldr    r8, [%[b], #88]\n\t"
01406         "ldr    r9, [%[b], #92]\n\t"
01407         "sbcs   r2, r2, r6\n\t"
01408         "sbcs   r3, r3, r7\n\t"
01409         "sbcs   r4, r4, r8\n\t"
01410         "sbcs   r5, r5, r9\n\t"
01411         "str    r2, [%[a], #80]\n\t"
01412         "str    r3, [%[a], #84]\n\t"
01413         "str    r4, [%[a], #88]\n\t"
01414         "str    r5, [%[a], #92]\n\t"
01415         "ldr    r2, [%[a], #96]\n\t"
01416         "ldr    r3, [%[a], #100]\n\t"
01417         "ldr    r4, [%[a], #104]\n\t"
01418         "ldr    r5, [%[a], #108]\n\t"
01419         "ldr    r6, [%[b], #96]\n\t"
01420         "ldr    r7, [%[b], #100]\n\t"
01421         "ldr    r8, [%[b], #104]\n\t"
01422         "ldr    r9, [%[b], #108]\n\t"
01423         "sbcs   r2, r2, r6\n\t"
01424         "sbcs   r3, r3, r7\n\t"
01425         "sbcs   r4, r4, r8\n\t"
01426         "sbcs   r5, r5, r9\n\t"
01427         "str    r2, [%[a], #96]\n\t"
01428         "str    r3, [%[a], #100]\n\t"
01429         "str    r4, [%[a], #104]\n\t"
01430         "str    r5, [%[a], #108]\n\t"
01431         "ldr    r2, [%[a], #112]\n\t"
01432         "ldr    r3, [%[a], #116]\n\t"
01433         "ldr    r4, [%[a], #120]\n\t"
01434         "ldr    r5, [%[a], #124]\n\t"
01435         "ldr    r6, [%[b], #112]\n\t"
01436         "ldr    r7, [%[b], #116]\n\t"
01437         "ldr    r8, [%[b], #120]\n\t"
01438         "ldr    r9, [%[b], #124]\n\t"
01439         "sbcs   r2, r2, r6\n\t"
01440         "sbcs   r3, r3, r7\n\t"
01441         "sbcs   r4, r4, r8\n\t"
01442         "sbcs   r5, r5, r9\n\t"
01443         "str    r2, [%[a], #112]\n\t"
01444         "str    r3, [%[a], #116]\n\t"
01445         "str    r4, [%[a], #120]\n\t"
01446         "str    r5, [%[a], #124]\n\t"
01447         "sbc    %[c], r9, r9\n\t"
01448         : [c] "+r" (c)
01449         : [a] "r" (a), [b] "r" (b)
01450         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
01451     );
01452 
01453     return c;
01454 }
01455 
01456 /* Add b to a into r. (r = a + b)
01457  *
01458  * r  A single precision integer.
01459  * a  A single precision integer.
01460  * b  A single precision integer.
01461  */
01462 static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
01463         const sp_digit* b)
01464 {
01465     sp_digit c = 0;
01466 
01467     __asm__ __volatile__ (
01468         "mov    r12, #0\n\t"
01469         "ldr    r4, [%[a], #0]\n\t"
01470         "ldr    r5, [%[a], #4]\n\t"
01471         "ldr    r6, [%[a], #8]\n\t"
01472         "ldr    r7, [%[a], #12]\n\t"
01473         "ldr    r8, [%[b], #0]\n\t"
01474         "ldr    r9, [%[b], #4]\n\t"
01475         "ldr    r10, [%[b], #8]\n\t"
01476         "ldr    r14, [%[b], #12]\n\t"
01477         "adds   r4, r4, r8\n\t"
01478         "adcs   r5, r5, r9\n\t"
01479         "adcs   r6, r6, r10\n\t"
01480         "adcs   r7, r7, r14\n\t"
01481         "str    r4, [%[r], #0]\n\t"
01482         "str    r5, [%[r], #4]\n\t"
01483         "str    r6, [%[r], #8]\n\t"
01484         "str    r7, [%[r], #12]\n\t"
01485         "ldr    r4, [%[a], #16]\n\t"
01486         "ldr    r5, [%[a], #20]\n\t"
01487         "ldr    r6, [%[a], #24]\n\t"
01488         "ldr    r7, [%[a], #28]\n\t"
01489         "ldr    r8, [%[b], #16]\n\t"
01490         "ldr    r9, [%[b], #20]\n\t"
01491         "ldr    r10, [%[b], #24]\n\t"
01492         "ldr    r14, [%[b], #28]\n\t"
01493         "adcs   r4, r4, r8\n\t"
01494         "adcs   r5, r5, r9\n\t"
01495         "adcs   r6, r6, r10\n\t"
01496         "adcs   r7, r7, r14\n\t"
01497         "str    r4, [%[r], #16]\n\t"
01498         "str    r5, [%[r], #20]\n\t"
01499         "str    r6, [%[r], #24]\n\t"
01500         "str    r7, [%[r], #28]\n\t"
01501         "ldr    r4, [%[a], #32]\n\t"
01502         "ldr    r5, [%[a], #36]\n\t"
01503         "ldr    r6, [%[a], #40]\n\t"
01504         "ldr    r7, [%[a], #44]\n\t"
01505         "ldr    r8, [%[b], #32]\n\t"
01506         "ldr    r9, [%[b], #36]\n\t"
01507         "ldr    r10, [%[b], #40]\n\t"
01508         "ldr    r14, [%[b], #44]\n\t"
01509         "adcs   r4, r4, r8\n\t"
01510         "adcs   r5, r5, r9\n\t"
01511         "adcs   r6, r6, r10\n\t"
01512         "adcs   r7, r7, r14\n\t"
01513         "str    r4, [%[r], #32]\n\t"
01514         "str    r5, [%[r], #36]\n\t"
01515         "str    r6, [%[r], #40]\n\t"
01516         "str    r7, [%[r], #44]\n\t"
01517         "ldr    r4, [%[a], #48]\n\t"
01518         "ldr    r5, [%[a], #52]\n\t"
01519         "ldr    r6, [%[a], #56]\n\t"
01520         "ldr    r7, [%[a], #60]\n\t"
01521         "ldr    r8, [%[b], #48]\n\t"
01522         "ldr    r9, [%[b], #52]\n\t"
01523         "ldr    r10, [%[b], #56]\n\t"
01524         "ldr    r14, [%[b], #60]\n\t"
01525         "adcs   r4, r4, r8\n\t"
01526         "adcs   r5, r5, r9\n\t"
01527         "adcs   r6, r6, r10\n\t"
01528         "adcs   r7, r7, r14\n\t"
01529         "str    r4, [%[r], #48]\n\t"
01530         "str    r5, [%[r], #52]\n\t"
01531         "str    r6, [%[r], #56]\n\t"
01532         "str    r7, [%[r], #60]\n\t"
01533         "ldr    r4, [%[a], #64]\n\t"
01534         "ldr    r5, [%[a], #68]\n\t"
01535         "ldr    r6, [%[a], #72]\n\t"
01536         "ldr    r7, [%[a], #76]\n\t"
01537         "ldr    r8, [%[b], #64]\n\t"
01538         "ldr    r9, [%[b], #68]\n\t"
01539         "ldr    r10, [%[b], #72]\n\t"
01540         "ldr    r14, [%[b], #76]\n\t"
01541         "adcs   r4, r4, r8\n\t"
01542         "adcs   r5, r5, r9\n\t"
01543         "adcs   r6, r6, r10\n\t"
01544         "adcs   r7, r7, r14\n\t"
01545         "str    r4, [%[r], #64]\n\t"
01546         "str    r5, [%[r], #68]\n\t"
01547         "str    r6, [%[r], #72]\n\t"
01548         "str    r7, [%[r], #76]\n\t"
01549         "ldr    r4, [%[a], #80]\n\t"
01550         "ldr    r5, [%[a], #84]\n\t"
01551         "ldr    r6, [%[a], #88]\n\t"
01552         "ldr    r7, [%[a], #92]\n\t"
01553         "ldr    r8, [%[b], #80]\n\t"
01554         "ldr    r9, [%[b], #84]\n\t"
01555         "ldr    r10, [%[b], #88]\n\t"
01556         "ldr    r14, [%[b], #92]\n\t"
01557         "adcs   r4, r4, r8\n\t"
01558         "adcs   r5, r5, r9\n\t"
01559         "adcs   r6, r6, r10\n\t"
01560         "adcs   r7, r7, r14\n\t"
01561         "str    r4, [%[r], #80]\n\t"
01562         "str    r5, [%[r], #84]\n\t"
01563         "str    r6, [%[r], #88]\n\t"
01564         "str    r7, [%[r], #92]\n\t"
01565         "ldr    r4, [%[a], #96]\n\t"
01566         "ldr    r5, [%[a], #100]\n\t"
01567         "ldr    r6, [%[a], #104]\n\t"
01568         "ldr    r7, [%[a], #108]\n\t"
01569         "ldr    r8, [%[b], #96]\n\t"
01570         "ldr    r9, [%[b], #100]\n\t"
01571         "ldr    r10, [%[b], #104]\n\t"
01572         "ldr    r14, [%[b], #108]\n\t"
01573         "adcs   r4, r4, r8\n\t"
01574         "adcs   r5, r5, r9\n\t"
01575         "adcs   r6, r6, r10\n\t"
01576         "adcs   r7, r7, r14\n\t"
01577         "str    r4, [%[r], #96]\n\t"
01578         "str    r5, [%[r], #100]\n\t"
01579         "str    r6, [%[r], #104]\n\t"
01580         "str    r7, [%[r], #108]\n\t"
01581         "ldr    r4, [%[a], #112]\n\t"
01582         "ldr    r5, [%[a], #116]\n\t"
01583         "ldr    r6, [%[a], #120]\n\t"
01584         "ldr    r7, [%[a], #124]\n\t"
01585         "ldr    r8, [%[b], #112]\n\t"
01586         "ldr    r9, [%[b], #116]\n\t"
01587         "ldr    r10, [%[b], #120]\n\t"
01588         "ldr    r14, [%[b], #124]\n\t"
01589         "adcs   r4, r4, r8\n\t"
01590         "adcs   r5, r5, r9\n\t"
01591         "adcs   r6, r6, r10\n\t"
01592         "adcs   r7, r7, r14\n\t"
01593         "str    r4, [%[r], #112]\n\t"
01594         "str    r5, [%[r], #116]\n\t"
01595         "str    r6, [%[r], #120]\n\t"
01596         "str    r7, [%[r], #124]\n\t"
01597         "adc    %[c], r12, r12\n\t"
01598         : [c] "+r" (c)
01599         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
01600         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
01601     );
01602 
01603     return c;
01604 }
01605 
01606 /* AND m into each word of a and store in r.
01607  *
01608  * r  A single precision integer.
01609  * a  A single precision integer.
01610  * m  Mask to AND against each digit.
01611  */
01612 static void sp_2048_mask_16(sp_digit* r, sp_digit* a, sp_digit m)
01613 {
01614 #ifdef WOLFSSL_SP_SMALL
01615     int i;
01616 
01617     for (i=0; i<16; i++)
01618         r[i] = a[i] & m;
01619 #else
01620     int i;
01621 
01622     for (i = 0; i < 16; i += 8) {
01623         r[i+0] = a[i+0] & m;
01624         r[i+1] = a[i+1] & m;
01625         r[i+2] = a[i+2] & m;
01626         r[i+3] = a[i+3] & m;
01627         r[i+4] = a[i+4] & m;
01628         r[i+5] = a[i+5] & m;
01629         r[i+6] = a[i+6] & m;
01630         r[i+7] = a[i+7] & m;
01631     }
01632 #endif
01633 }
01634 
01635 /* Multiply a and b into r. (r = a * b)
01636  *
01637  * r  A single precision integer.
01638  * a  A single precision integer.
01639  * b  A single precision integer.
01640  */
01641 static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
01642         const sp_digit* b)
01643 {
01644     sp_digit* z0 = r;
01645     sp_digit z1[32];
01646     sp_digit a1[16];
01647     sp_digit b1[16];
01648     sp_digit z2[32];
01649     sp_digit u, ca, cb;
01650 
01651     ca = sp_2048_add_16(a1, a, &a[16]);
01652     cb = sp_2048_add_16(b1, b, &b[16]);
01653     u  = ca & cb;
01654     sp_2048_mul_16(z1, a1, b1);
01655     sp_2048_mul_16(z2, &a[16], &b[16]);
01656     sp_2048_mul_16(z0, a, b);
01657     sp_2048_mask_16(r + 32, a1, 0 - cb);
01658     sp_2048_mask_16(b1, b1, 0 - ca);
01659     u += sp_2048_add_16(r + 32, r + 32, b1);
01660     u += sp_2048_sub_in_place_32(z1, z2);
01661     u += sp_2048_sub_in_place_32(z1, z0);
01662     u += sp_2048_add_32(r + 16, r + 16, z1);
01663     r[48] = u;
01664     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
01665     sp_2048_add_32(r + 32, r + 32, z2);
01666 }
01667 
01668 /* Square a and put result in r. (r = a * a)
01669  *
01670  * r  A single precision integer.
01671  * a  A single precision integer.
01672  */
01673 static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
01674 {
01675     sp_digit* z0 = r;
01676     sp_digit z2[32];
01677     sp_digit z1[32];
01678     sp_digit a1[16];
01679     sp_digit u;
01680 
01681     u = sp_2048_add_16(a1, a, &a[16]);
01682     sp_2048_sqr_16(z1, a1);
01683     sp_2048_sqr_16(z2, &a[16]);
01684     sp_2048_sqr_16(z0, a);
01685     sp_2048_mask_16(r + 32, a1, 0 - u);
01686     u += sp_2048_add_16(r + 32, r + 32, r + 32);
01687     u += sp_2048_sub_in_place_32(z1, z2);
01688     u += sp_2048_sub_in_place_32(z1, z0);
01689     u += sp_2048_add_32(r + 16, r + 16, z1);
01690     r[48] = u;
01691     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
01692     sp_2048_add_32(r + 32, r + 32, z2);
01693 }
01694 
01695 /* Sub b from a into a. (a -= b)
01696  *
01697  * a  A single precision integer and result.
01698  * b  A single precision integer.
01699  */
01700 static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
01701 {
01702     sp_digit c = 0;
01703 
01704     __asm__ __volatile__ (
01705         "ldr    r2, [%[a], #0]\n\t"
01706         "ldr    r3, [%[a], #4]\n\t"
01707         "ldr    r4, [%[a], #8]\n\t"
01708         "ldr    r5, [%[a], #12]\n\t"
01709         "ldr    r6, [%[b], #0]\n\t"
01710         "ldr    r7, [%[b], #4]\n\t"
01711         "ldr    r8, [%[b], #8]\n\t"
01712         "ldr    r9, [%[b], #12]\n\t"
01713         "subs   r2, r2, r6\n\t"
01714         "sbcs   r3, r3, r7\n\t"
01715         "sbcs   r4, r4, r8\n\t"
01716         "sbcs   r5, r5, r9\n\t"
01717         "str    r2, [%[a], #0]\n\t"
01718         "str    r3, [%[a], #4]\n\t"
01719         "str    r4, [%[a], #8]\n\t"
01720         "str    r5, [%[a], #12]\n\t"
01721         "ldr    r2, [%[a], #16]\n\t"
01722         "ldr    r3, [%[a], #20]\n\t"
01723         "ldr    r4, [%[a], #24]\n\t"
01724         "ldr    r5, [%[a], #28]\n\t"
01725         "ldr    r6, [%[b], #16]\n\t"
01726         "ldr    r7, [%[b], #20]\n\t"
01727         "ldr    r8, [%[b], #24]\n\t"
01728         "ldr    r9, [%[b], #28]\n\t"
01729         "sbcs   r2, r2, r6\n\t"
01730         "sbcs   r3, r3, r7\n\t"
01731         "sbcs   r4, r4, r8\n\t"
01732         "sbcs   r5, r5, r9\n\t"
01733         "str    r2, [%[a], #16]\n\t"
01734         "str    r3, [%[a], #20]\n\t"
01735         "str    r4, [%[a], #24]\n\t"
01736         "str    r5, [%[a], #28]\n\t"
01737         "ldr    r2, [%[a], #32]\n\t"
01738         "ldr    r3, [%[a], #36]\n\t"
01739         "ldr    r4, [%[a], #40]\n\t"
01740         "ldr    r5, [%[a], #44]\n\t"
01741         "ldr    r6, [%[b], #32]\n\t"
01742         "ldr    r7, [%[b], #36]\n\t"
01743         "ldr    r8, [%[b], #40]\n\t"
01744         "ldr    r9, [%[b], #44]\n\t"
01745         "sbcs   r2, r2, r6\n\t"
01746         "sbcs   r3, r3, r7\n\t"
01747         "sbcs   r4, r4, r8\n\t"
01748         "sbcs   r5, r5, r9\n\t"
01749         "str    r2, [%[a], #32]\n\t"
01750         "str    r3, [%[a], #36]\n\t"
01751         "str    r4, [%[a], #40]\n\t"
01752         "str    r5, [%[a], #44]\n\t"
01753         "ldr    r2, [%[a], #48]\n\t"
01754         "ldr    r3, [%[a], #52]\n\t"
01755         "ldr    r4, [%[a], #56]\n\t"
01756         "ldr    r5, [%[a], #60]\n\t"
01757         "ldr    r6, [%[b], #48]\n\t"
01758         "ldr    r7, [%[b], #52]\n\t"
01759         "ldr    r8, [%[b], #56]\n\t"
01760         "ldr    r9, [%[b], #60]\n\t"
01761         "sbcs   r2, r2, r6\n\t"
01762         "sbcs   r3, r3, r7\n\t"
01763         "sbcs   r4, r4, r8\n\t"
01764         "sbcs   r5, r5, r9\n\t"
01765         "str    r2, [%[a], #48]\n\t"
01766         "str    r3, [%[a], #52]\n\t"
01767         "str    r4, [%[a], #56]\n\t"
01768         "str    r5, [%[a], #60]\n\t"
01769         "ldr    r2, [%[a], #64]\n\t"
01770         "ldr    r3, [%[a], #68]\n\t"
01771         "ldr    r4, [%[a], #72]\n\t"
01772         "ldr    r5, [%[a], #76]\n\t"
01773         "ldr    r6, [%[b], #64]\n\t"
01774         "ldr    r7, [%[b], #68]\n\t"
01775         "ldr    r8, [%[b], #72]\n\t"
01776         "ldr    r9, [%[b], #76]\n\t"
01777         "sbcs   r2, r2, r6\n\t"
01778         "sbcs   r3, r3, r7\n\t"
01779         "sbcs   r4, r4, r8\n\t"
01780         "sbcs   r5, r5, r9\n\t"
01781         "str    r2, [%[a], #64]\n\t"
01782         "str    r3, [%[a], #68]\n\t"
01783         "str    r4, [%[a], #72]\n\t"
01784         "str    r5, [%[a], #76]\n\t"
01785         "ldr    r2, [%[a], #80]\n\t"
01786         "ldr    r3, [%[a], #84]\n\t"
01787         "ldr    r4, [%[a], #88]\n\t"
01788         "ldr    r5, [%[a], #92]\n\t"
01789         "ldr    r6, [%[b], #80]\n\t"
01790         "ldr    r7, [%[b], #84]\n\t"
01791         "ldr    r8, [%[b], #88]\n\t"
01792         "ldr    r9, [%[b], #92]\n\t"
01793         "sbcs   r2, r2, r6\n\t"
01794         "sbcs   r3, r3, r7\n\t"
01795         "sbcs   r4, r4, r8\n\t"
01796         "sbcs   r5, r5, r9\n\t"
01797         "str    r2, [%[a], #80]\n\t"
01798         "str    r3, [%[a], #84]\n\t"
01799         "str    r4, [%[a], #88]\n\t"
01800         "str    r5, [%[a], #92]\n\t"
01801         "ldr    r2, [%[a], #96]\n\t"
01802         "ldr    r3, [%[a], #100]\n\t"
01803         "ldr    r4, [%[a], #104]\n\t"
01804         "ldr    r5, [%[a], #108]\n\t"
01805         "ldr    r6, [%[b], #96]\n\t"
01806         "ldr    r7, [%[b], #100]\n\t"
01807         "ldr    r8, [%[b], #104]\n\t"
01808         "ldr    r9, [%[b], #108]\n\t"
01809         "sbcs   r2, r2, r6\n\t"
01810         "sbcs   r3, r3, r7\n\t"
01811         "sbcs   r4, r4, r8\n\t"
01812         "sbcs   r5, r5, r9\n\t"
01813         "str    r2, [%[a], #96]\n\t"
01814         "str    r3, [%[a], #100]\n\t"
01815         "str    r4, [%[a], #104]\n\t"
01816         "str    r5, [%[a], #108]\n\t"
01817         "ldr    r2, [%[a], #112]\n\t"
01818         "ldr    r3, [%[a], #116]\n\t"
01819         "ldr    r4, [%[a], #120]\n\t"
01820         "ldr    r5, [%[a], #124]\n\t"
01821         "ldr    r6, [%[b], #112]\n\t"
01822         "ldr    r7, [%[b], #116]\n\t"
01823         "ldr    r8, [%[b], #120]\n\t"
01824         "ldr    r9, [%[b], #124]\n\t"
01825         "sbcs   r2, r2, r6\n\t"
01826         "sbcs   r3, r3, r7\n\t"
01827         "sbcs   r4, r4, r8\n\t"
01828         "sbcs   r5, r5, r9\n\t"
01829         "str    r2, [%[a], #112]\n\t"
01830         "str    r3, [%[a], #116]\n\t"
01831         "str    r4, [%[a], #120]\n\t"
01832         "str    r5, [%[a], #124]\n\t"
01833         "ldr    r2, [%[a], #128]\n\t"
01834         "ldr    r3, [%[a], #132]\n\t"
01835         "ldr    r4, [%[a], #136]\n\t"
01836         "ldr    r5, [%[a], #140]\n\t"
01837         "ldr    r6, [%[b], #128]\n\t"
01838         "ldr    r7, [%[b], #132]\n\t"
01839         "ldr    r8, [%[b], #136]\n\t"
01840         "ldr    r9, [%[b], #140]\n\t"
01841         "sbcs   r2, r2, r6\n\t"
01842         "sbcs   r3, r3, r7\n\t"
01843         "sbcs   r4, r4, r8\n\t"
01844         "sbcs   r5, r5, r9\n\t"
01845         "str    r2, [%[a], #128]\n\t"
01846         "str    r3, [%[a], #132]\n\t"
01847         "str    r4, [%[a], #136]\n\t"
01848         "str    r5, [%[a], #140]\n\t"
01849         "ldr    r2, [%[a], #144]\n\t"
01850         "ldr    r3, [%[a], #148]\n\t"
01851         "ldr    r4, [%[a], #152]\n\t"
01852         "ldr    r5, [%[a], #156]\n\t"
01853         "ldr    r6, [%[b], #144]\n\t"
01854         "ldr    r7, [%[b], #148]\n\t"
01855         "ldr    r8, [%[b], #152]\n\t"
01856         "ldr    r9, [%[b], #156]\n\t"
01857         "sbcs   r2, r2, r6\n\t"
01858         "sbcs   r3, r3, r7\n\t"
01859         "sbcs   r4, r4, r8\n\t"
01860         "sbcs   r5, r5, r9\n\t"
01861         "str    r2, [%[a], #144]\n\t"
01862         "str    r3, [%[a], #148]\n\t"
01863         "str    r4, [%[a], #152]\n\t"
01864         "str    r5, [%[a], #156]\n\t"
01865         "ldr    r2, [%[a], #160]\n\t"
01866         "ldr    r3, [%[a], #164]\n\t"
01867         "ldr    r4, [%[a], #168]\n\t"
01868         "ldr    r5, [%[a], #172]\n\t"
01869         "ldr    r6, [%[b], #160]\n\t"
01870         "ldr    r7, [%[b], #164]\n\t"
01871         "ldr    r8, [%[b], #168]\n\t"
01872         "ldr    r9, [%[b], #172]\n\t"
01873         "sbcs   r2, r2, r6\n\t"
01874         "sbcs   r3, r3, r7\n\t"
01875         "sbcs   r4, r4, r8\n\t"
01876         "sbcs   r5, r5, r9\n\t"
01877         "str    r2, [%[a], #160]\n\t"
01878         "str    r3, [%[a], #164]\n\t"
01879         "str    r4, [%[a], #168]\n\t"
01880         "str    r5, [%[a], #172]\n\t"
01881         "ldr    r2, [%[a], #176]\n\t"
01882         "ldr    r3, [%[a], #180]\n\t"
01883         "ldr    r4, [%[a], #184]\n\t"
01884         "ldr    r5, [%[a], #188]\n\t"
01885         "ldr    r6, [%[b], #176]\n\t"
01886         "ldr    r7, [%[b], #180]\n\t"
01887         "ldr    r8, [%[b], #184]\n\t"
01888         "ldr    r9, [%[b], #188]\n\t"
01889         "sbcs   r2, r2, r6\n\t"
01890         "sbcs   r3, r3, r7\n\t"
01891         "sbcs   r4, r4, r8\n\t"
01892         "sbcs   r5, r5, r9\n\t"
01893         "str    r2, [%[a], #176]\n\t"
01894         "str    r3, [%[a], #180]\n\t"
01895         "str    r4, [%[a], #184]\n\t"
01896         "str    r5, [%[a], #188]\n\t"
01897         "ldr    r2, [%[a], #192]\n\t"
01898         "ldr    r3, [%[a], #196]\n\t"
01899         "ldr    r4, [%[a], #200]\n\t"
01900         "ldr    r5, [%[a], #204]\n\t"
01901         "ldr    r6, [%[b], #192]\n\t"
01902         "ldr    r7, [%[b], #196]\n\t"
01903         "ldr    r8, [%[b], #200]\n\t"
01904         "ldr    r9, [%[b], #204]\n\t"
01905         "sbcs   r2, r2, r6\n\t"
01906         "sbcs   r3, r3, r7\n\t"
01907         "sbcs   r4, r4, r8\n\t"
01908         "sbcs   r5, r5, r9\n\t"
01909         "str    r2, [%[a], #192]\n\t"
01910         "str    r3, [%[a], #196]\n\t"
01911         "str    r4, [%[a], #200]\n\t"
01912         "str    r5, [%[a], #204]\n\t"
01913         "ldr    r2, [%[a], #208]\n\t"
01914         "ldr    r3, [%[a], #212]\n\t"
01915         "ldr    r4, [%[a], #216]\n\t"
01916         "ldr    r5, [%[a], #220]\n\t"
01917         "ldr    r6, [%[b], #208]\n\t"
01918         "ldr    r7, [%[b], #212]\n\t"
01919         "ldr    r8, [%[b], #216]\n\t"
01920         "ldr    r9, [%[b], #220]\n\t"
01921         "sbcs   r2, r2, r6\n\t"
01922         "sbcs   r3, r3, r7\n\t"
01923         "sbcs   r4, r4, r8\n\t"
01924         "sbcs   r5, r5, r9\n\t"
01925         "str    r2, [%[a], #208]\n\t"
01926         "str    r3, [%[a], #212]\n\t"
01927         "str    r4, [%[a], #216]\n\t"
01928         "str    r5, [%[a], #220]\n\t"
01929         "ldr    r2, [%[a], #224]\n\t"
01930         "ldr    r3, [%[a], #228]\n\t"
01931         "ldr    r4, [%[a], #232]\n\t"
01932         "ldr    r5, [%[a], #236]\n\t"
01933         "ldr    r6, [%[b], #224]\n\t"
01934         "ldr    r7, [%[b], #228]\n\t"
01935         "ldr    r8, [%[b], #232]\n\t"
01936         "ldr    r9, [%[b], #236]\n\t"
01937         "sbcs   r2, r2, r6\n\t"
01938         "sbcs   r3, r3, r7\n\t"
01939         "sbcs   r4, r4, r8\n\t"
01940         "sbcs   r5, r5, r9\n\t"
01941         "str    r2, [%[a], #224]\n\t"
01942         "str    r3, [%[a], #228]\n\t"
01943         "str    r4, [%[a], #232]\n\t"
01944         "str    r5, [%[a], #236]\n\t"
01945         "ldr    r2, [%[a], #240]\n\t"
01946         "ldr    r3, [%[a], #244]\n\t"
01947         "ldr    r4, [%[a], #248]\n\t"
01948         "ldr    r5, [%[a], #252]\n\t"
01949         "ldr    r6, [%[b], #240]\n\t"
01950         "ldr    r7, [%[b], #244]\n\t"
01951         "ldr    r8, [%[b], #248]\n\t"
01952         "ldr    r9, [%[b], #252]\n\t"
01953         "sbcs   r2, r2, r6\n\t"
01954         "sbcs   r3, r3, r7\n\t"
01955         "sbcs   r4, r4, r8\n\t"
01956         "sbcs   r5, r5, r9\n\t"
01957         "str    r2, [%[a], #240]\n\t"
01958         "str    r3, [%[a], #244]\n\t"
01959         "str    r4, [%[a], #248]\n\t"
01960         "str    r5, [%[a], #252]\n\t"
01961         "sbc    %[c], r9, r9\n\t"
01962         : [c] "+r" (c)
01963         : [a] "r" (a), [b] "r" (b)
01964         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
01965     );
01966 
01967     return c;
01968 }
01969 
01970 /* Add b to a into r. (r = a + b)
01971  *
01972  * r  A single precision integer.
01973  * a  A single precision integer.
01974  * b  A single precision integer.
01975  */
01976 static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
01977         const sp_digit* b)
01978 {
01979     sp_digit c = 0;
01980 
01981     __asm__ __volatile__ (
01982         "mov    r12, #0\n\t"
01983         "ldr    r4, [%[a], #0]\n\t"
01984         "ldr    r5, [%[a], #4]\n\t"
01985         "ldr    r6, [%[a], #8]\n\t"
01986         "ldr    r7, [%[a], #12]\n\t"
01987         "ldr    r8, [%[b], #0]\n\t"
01988         "ldr    r9, [%[b], #4]\n\t"
01989         "ldr    r10, [%[b], #8]\n\t"
01990         "ldr    r14, [%[b], #12]\n\t"
01991         "adds   r4, r4, r8\n\t"
01992         "adcs   r5, r5, r9\n\t"
01993         "adcs   r6, r6, r10\n\t"
01994         "adcs   r7, r7, r14\n\t"
01995         "str    r4, [%[r], #0]\n\t"
01996         "str    r5, [%[r], #4]\n\t"
01997         "str    r6, [%[r], #8]\n\t"
01998         "str    r7, [%[r], #12]\n\t"
01999         "ldr    r4, [%[a], #16]\n\t"
02000         "ldr    r5, [%[a], #20]\n\t"
02001         "ldr    r6, [%[a], #24]\n\t"
02002         "ldr    r7, [%[a], #28]\n\t"
02003         "ldr    r8, [%[b], #16]\n\t"
02004         "ldr    r9, [%[b], #20]\n\t"
02005         "ldr    r10, [%[b], #24]\n\t"
02006         "ldr    r14, [%[b], #28]\n\t"
02007         "adcs   r4, r4, r8\n\t"
02008         "adcs   r5, r5, r9\n\t"
02009         "adcs   r6, r6, r10\n\t"
02010         "adcs   r7, r7, r14\n\t"
02011         "str    r4, [%[r], #16]\n\t"
02012         "str    r5, [%[r], #20]\n\t"
02013         "str    r6, [%[r], #24]\n\t"
02014         "str    r7, [%[r], #28]\n\t"
02015         "ldr    r4, [%[a], #32]\n\t"
02016         "ldr    r5, [%[a], #36]\n\t"
02017         "ldr    r6, [%[a], #40]\n\t"
02018         "ldr    r7, [%[a], #44]\n\t"
02019         "ldr    r8, [%[b], #32]\n\t"
02020         "ldr    r9, [%[b], #36]\n\t"
02021         "ldr    r10, [%[b], #40]\n\t"
02022         "ldr    r14, [%[b], #44]\n\t"
02023         "adcs   r4, r4, r8\n\t"
02024         "adcs   r5, r5, r9\n\t"
02025         "adcs   r6, r6, r10\n\t"
02026         "adcs   r7, r7, r14\n\t"
02027         "str    r4, [%[r], #32]\n\t"
02028         "str    r5, [%[r], #36]\n\t"
02029         "str    r6, [%[r], #40]\n\t"
02030         "str    r7, [%[r], #44]\n\t"
02031         "ldr    r4, [%[a], #48]\n\t"
02032         "ldr    r5, [%[a], #52]\n\t"
02033         "ldr    r6, [%[a], #56]\n\t"
02034         "ldr    r7, [%[a], #60]\n\t"
02035         "ldr    r8, [%[b], #48]\n\t"
02036         "ldr    r9, [%[b], #52]\n\t"
02037         "ldr    r10, [%[b], #56]\n\t"
02038         "ldr    r14, [%[b], #60]\n\t"
02039         "adcs   r4, r4, r8\n\t"
02040         "adcs   r5, r5, r9\n\t"
02041         "adcs   r6, r6, r10\n\t"
02042         "adcs   r7, r7, r14\n\t"
02043         "str    r4, [%[r], #48]\n\t"
02044         "str    r5, [%[r], #52]\n\t"
02045         "str    r6, [%[r], #56]\n\t"
02046         "str    r7, [%[r], #60]\n\t"
02047         "ldr    r4, [%[a], #64]\n\t"
02048         "ldr    r5, [%[a], #68]\n\t"
02049         "ldr    r6, [%[a], #72]\n\t"
02050         "ldr    r7, [%[a], #76]\n\t"
02051         "ldr    r8, [%[b], #64]\n\t"
02052         "ldr    r9, [%[b], #68]\n\t"
02053         "ldr    r10, [%[b], #72]\n\t"
02054         "ldr    r14, [%[b], #76]\n\t"
02055         "adcs   r4, r4, r8\n\t"
02056         "adcs   r5, r5, r9\n\t"
02057         "adcs   r6, r6, r10\n\t"
02058         "adcs   r7, r7, r14\n\t"
02059         "str    r4, [%[r], #64]\n\t"
02060         "str    r5, [%[r], #68]\n\t"
02061         "str    r6, [%[r], #72]\n\t"
02062         "str    r7, [%[r], #76]\n\t"
02063         "ldr    r4, [%[a], #80]\n\t"
02064         "ldr    r5, [%[a], #84]\n\t"
02065         "ldr    r6, [%[a], #88]\n\t"
02066         "ldr    r7, [%[a], #92]\n\t"
02067         "ldr    r8, [%[b], #80]\n\t"
02068         "ldr    r9, [%[b], #84]\n\t"
02069         "ldr    r10, [%[b], #88]\n\t"
02070         "ldr    r14, [%[b], #92]\n\t"
02071         "adcs   r4, r4, r8\n\t"
02072         "adcs   r5, r5, r9\n\t"
02073         "adcs   r6, r6, r10\n\t"
02074         "adcs   r7, r7, r14\n\t"
02075         "str    r4, [%[r], #80]\n\t"
02076         "str    r5, [%[r], #84]\n\t"
02077         "str    r6, [%[r], #88]\n\t"
02078         "str    r7, [%[r], #92]\n\t"
02079         "ldr    r4, [%[a], #96]\n\t"
02080         "ldr    r5, [%[a], #100]\n\t"
02081         "ldr    r6, [%[a], #104]\n\t"
02082         "ldr    r7, [%[a], #108]\n\t"
02083         "ldr    r8, [%[b], #96]\n\t"
02084         "ldr    r9, [%[b], #100]\n\t"
02085         "ldr    r10, [%[b], #104]\n\t"
02086         "ldr    r14, [%[b], #108]\n\t"
02087         "adcs   r4, r4, r8\n\t"
02088         "adcs   r5, r5, r9\n\t"
02089         "adcs   r6, r6, r10\n\t"
02090         "adcs   r7, r7, r14\n\t"
02091         "str    r4, [%[r], #96]\n\t"
02092         "str    r5, [%[r], #100]\n\t"
02093         "str    r6, [%[r], #104]\n\t"
02094         "str    r7, [%[r], #108]\n\t"
02095         "ldr    r4, [%[a], #112]\n\t"
02096         "ldr    r5, [%[a], #116]\n\t"
02097         "ldr    r6, [%[a], #120]\n\t"
02098         "ldr    r7, [%[a], #124]\n\t"
02099         "ldr    r8, [%[b], #112]\n\t"
02100         "ldr    r9, [%[b], #116]\n\t"
02101         "ldr    r10, [%[b], #120]\n\t"
02102         "ldr    r14, [%[b], #124]\n\t"
02103         "adcs   r4, r4, r8\n\t"
02104         "adcs   r5, r5, r9\n\t"
02105         "adcs   r6, r6, r10\n\t"
02106         "adcs   r7, r7, r14\n\t"
02107         "str    r4, [%[r], #112]\n\t"
02108         "str    r5, [%[r], #116]\n\t"
02109         "str    r6, [%[r], #120]\n\t"
02110         "str    r7, [%[r], #124]\n\t"
02111         "ldr    r4, [%[a], #128]\n\t"
02112         "ldr    r5, [%[a], #132]\n\t"
02113         "ldr    r6, [%[a], #136]\n\t"
02114         "ldr    r7, [%[a], #140]\n\t"
02115         "ldr    r8, [%[b], #128]\n\t"
02116         "ldr    r9, [%[b], #132]\n\t"
02117         "ldr    r10, [%[b], #136]\n\t"
02118         "ldr    r14, [%[b], #140]\n\t"
02119         "adcs   r4, r4, r8\n\t"
02120         "adcs   r5, r5, r9\n\t"
02121         "adcs   r6, r6, r10\n\t"
02122         "adcs   r7, r7, r14\n\t"
02123         "str    r4, [%[r], #128]\n\t"
02124         "str    r5, [%[r], #132]\n\t"
02125         "str    r6, [%[r], #136]\n\t"
02126         "str    r7, [%[r], #140]\n\t"
02127         "ldr    r4, [%[a], #144]\n\t"
02128         "ldr    r5, [%[a], #148]\n\t"
02129         "ldr    r6, [%[a], #152]\n\t"
02130         "ldr    r7, [%[a], #156]\n\t"
02131         "ldr    r8, [%[b], #144]\n\t"
02132         "ldr    r9, [%[b], #148]\n\t"
02133         "ldr    r10, [%[b], #152]\n\t"
02134         "ldr    r14, [%[b], #156]\n\t"
02135         "adcs   r4, r4, r8\n\t"
02136         "adcs   r5, r5, r9\n\t"
02137         "adcs   r6, r6, r10\n\t"
02138         "adcs   r7, r7, r14\n\t"
02139         "str    r4, [%[r], #144]\n\t"
02140         "str    r5, [%[r], #148]\n\t"
02141         "str    r6, [%[r], #152]\n\t"
02142         "str    r7, [%[r], #156]\n\t"
02143         "ldr    r4, [%[a], #160]\n\t"
02144         "ldr    r5, [%[a], #164]\n\t"
02145         "ldr    r6, [%[a], #168]\n\t"
02146         "ldr    r7, [%[a], #172]\n\t"
02147         "ldr    r8, [%[b], #160]\n\t"
02148         "ldr    r9, [%[b], #164]\n\t"
02149         "ldr    r10, [%[b], #168]\n\t"
02150         "ldr    r14, [%[b], #172]\n\t"
02151         "adcs   r4, r4, r8\n\t"
02152         "adcs   r5, r5, r9\n\t"
02153         "adcs   r6, r6, r10\n\t"
02154         "adcs   r7, r7, r14\n\t"
02155         "str    r4, [%[r], #160]\n\t"
02156         "str    r5, [%[r], #164]\n\t"
02157         "str    r6, [%[r], #168]\n\t"
02158         "str    r7, [%[r], #172]\n\t"
02159         "ldr    r4, [%[a], #176]\n\t"
02160         "ldr    r5, [%[a], #180]\n\t"
02161         "ldr    r6, [%[a], #184]\n\t"
02162         "ldr    r7, [%[a], #188]\n\t"
02163         "ldr    r8, [%[b], #176]\n\t"
02164         "ldr    r9, [%[b], #180]\n\t"
02165         "ldr    r10, [%[b], #184]\n\t"
02166         "ldr    r14, [%[b], #188]\n\t"
02167         "adcs   r4, r4, r8\n\t"
02168         "adcs   r5, r5, r9\n\t"
02169         "adcs   r6, r6, r10\n\t"
02170         "adcs   r7, r7, r14\n\t"
02171         "str    r4, [%[r], #176]\n\t"
02172         "str    r5, [%[r], #180]\n\t"
02173         "str    r6, [%[r], #184]\n\t"
02174         "str    r7, [%[r], #188]\n\t"
02175         "ldr    r4, [%[a], #192]\n\t"
02176         "ldr    r5, [%[a], #196]\n\t"
02177         "ldr    r6, [%[a], #200]\n\t"
02178         "ldr    r7, [%[a], #204]\n\t"
02179         "ldr    r8, [%[b], #192]\n\t"
02180         "ldr    r9, [%[b], #196]\n\t"
02181         "ldr    r10, [%[b], #200]\n\t"
02182         "ldr    r14, [%[b], #204]\n\t"
02183         "adcs   r4, r4, r8\n\t"
02184         "adcs   r5, r5, r9\n\t"
02185         "adcs   r6, r6, r10\n\t"
02186         "adcs   r7, r7, r14\n\t"
02187         "str    r4, [%[r], #192]\n\t"
02188         "str    r5, [%[r], #196]\n\t"
02189         "str    r6, [%[r], #200]\n\t"
02190         "str    r7, [%[r], #204]\n\t"
02191         "ldr    r4, [%[a], #208]\n\t"
02192         "ldr    r5, [%[a], #212]\n\t"
02193         "ldr    r6, [%[a], #216]\n\t"
02194         "ldr    r7, [%[a], #220]\n\t"
02195         "ldr    r8, [%[b], #208]\n\t"
02196         "ldr    r9, [%[b], #212]\n\t"
02197         "ldr    r10, [%[b], #216]\n\t"
02198         "ldr    r14, [%[b], #220]\n\t"
02199         "adcs   r4, r4, r8\n\t"
02200         "adcs   r5, r5, r9\n\t"
02201         "adcs   r6, r6, r10\n\t"
02202         "adcs   r7, r7, r14\n\t"
02203         "str    r4, [%[r], #208]\n\t"
02204         "str    r5, [%[r], #212]\n\t"
02205         "str    r6, [%[r], #216]\n\t"
02206         "str    r7, [%[r], #220]\n\t"
02207         "ldr    r4, [%[a], #224]\n\t"
02208         "ldr    r5, [%[a], #228]\n\t"
02209         "ldr    r6, [%[a], #232]\n\t"
02210         "ldr    r7, [%[a], #236]\n\t"
02211         "ldr    r8, [%[b], #224]\n\t"
02212         "ldr    r9, [%[b], #228]\n\t"
02213         "ldr    r10, [%[b], #232]\n\t"
02214         "ldr    r14, [%[b], #236]\n\t"
02215         "adcs   r4, r4, r8\n\t"
02216         "adcs   r5, r5, r9\n\t"
02217         "adcs   r6, r6, r10\n\t"
02218         "adcs   r7, r7, r14\n\t"
02219         "str    r4, [%[r], #224]\n\t"
02220         "str    r5, [%[r], #228]\n\t"
02221         "str    r6, [%[r], #232]\n\t"
02222         "str    r7, [%[r], #236]\n\t"
02223         "ldr    r4, [%[a], #240]\n\t"
02224         "ldr    r5, [%[a], #244]\n\t"
02225         "ldr    r6, [%[a], #248]\n\t"
02226         "ldr    r7, [%[a], #252]\n\t"
02227         "ldr    r8, [%[b], #240]\n\t"
02228         "ldr    r9, [%[b], #244]\n\t"
02229         "ldr    r10, [%[b], #248]\n\t"
02230         "ldr    r14, [%[b], #252]\n\t"
02231         "adcs   r4, r4, r8\n\t"
02232         "adcs   r5, r5, r9\n\t"
02233         "adcs   r6, r6, r10\n\t"
02234         "adcs   r7, r7, r14\n\t"
02235         "str    r4, [%[r], #240]\n\t"
02236         "str    r5, [%[r], #244]\n\t"
02237         "str    r6, [%[r], #248]\n\t"
02238         "str    r7, [%[r], #252]\n\t"
02239         "adc    %[c], r12, r12\n\t"
02240         : [c] "+r" (c)
02241         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
02242         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
02243     );
02244 
02245     return c;
02246 }
02247 
02248 /* AND m into each word of a and store in r.
02249  *
02250  * r  A single precision integer.
02251  * a  A single precision integer.
02252  * m  Mask to AND against each digit.
02253  */
02254 static void sp_2048_mask_32(sp_digit* r, sp_digit* a, sp_digit m)
02255 {
02256 #ifdef WOLFSSL_SP_SMALL
02257     int i;
02258 
02259     for (i=0; i<32; i++)
02260         r[i] = a[i] & m;
02261 #else
02262     int i;
02263 
02264     for (i = 0; i < 32; i += 8) {
02265         r[i+0] = a[i+0] & m;
02266         r[i+1] = a[i+1] & m;
02267         r[i+2] = a[i+2] & m;
02268         r[i+3] = a[i+3] & m;
02269         r[i+4] = a[i+4] & m;
02270         r[i+5] = a[i+5] & m;
02271         r[i+6] = a[i+6] & m;
02272         r[i+7] = a[i+7] & m;
02273     }
02274 #endif
02275 }
02276 
02277 /* Multiply a and b into r. (r = a * b)
02278  *
02279  * r  A single precision integer.
02280  * a  A single precision integer.
02281  * b  A single precision integer.
02282  */
02283 static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
02284         const sp_digit* b)
02285 {
02286     sp_digit* z0 = r;
02287     sp_digit z1[64];
02288     sp_digit a1[32];
02289     sp_digit b1[32];
02290     sp_digit z2[64];
02291     sp_digit u, ca, cb;
02292 
02293     ca = sp_2048_add_32(a1, a, &a[32]);
02294     cb = sp_2048_add_32(b1, b, &b[32]);
02295     u  = ca & cb;
02296     sp_2048_mul_32(z1, a1, b1);
02297     sp_2048_mul_32(z2, &a[32], &b[32]);
02298     sp_2048_mul_32(z0, a, b);
02299     sp_2048_mask_32(r + 64, a1, 0 - cb);
02300     sp_2048_mask_32(b1, b1, 0 - ca);
02301     u += sp_2048_add_32(r + 64, r + 64, b1);
02302     u += sp_2048_sub_in_place_64(z1, z2);
02303     u += sp_2048_sub_in_place_64(z1, z0);
02304     u += sp_2048_add_64(r + 32, r + 32, z1);
02305     r[96] = u;
02306     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
02307     sp_2048_add_64(r + 64, r + 64, z2);
02308 }
02309 
02310 /* Square a and put result in r. (r = a * a)
02311  *
02312  * r  A single precision integer.
02313  * a  A single precision integer.
02314  */
02315 static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
02316 {
02317     sp_digit* z0 = r;
02318     sp_digit z2[64];
02319     sp_digit z1[64];
02320     sp_digit a1[32];
02321     sp_digit u;
02322 
02323     u = sp_2048_add_32(a1, a, &a[32]);
02324     sp_2048_sqr_32(z1, a1);
02325     sp_2048_sqr_32(z2, &a[32]);
02326     sp_2048_sqr_32(z0, a);
02327     sp_2048_mask_32(r + 64, a1, 0 - u);
02328     u += sp_2048_add_32(r + 64, r + 64, r + 64);
02329     u += sp_2048_sub_in_place_64(z1, z2);
02330     u += sp_2048_sub_in_place_64(z1, z0);
02331     u += sp_2048_add_64(r + 32, r + 32, z1);
02332     r[96] = u;
02333     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
02334     sp_2048_add_64(r + 64, r + 64, z2);
02335 }
02336 
02337 #endif /* WOLFSSL_SP_SMALL */
02338 #ifdef WOLFSSL_SP_SMALL
02339 /* Add b to a into r. (r = a + b)
02340  *
02341  * r  A single precision integer.
02342  * a  A single precision integer.
02343  * b  A single precision integer.
02344  */
02345 static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
02346         const sp_digit* b)
02347 {
02348     sp_digit c = 0;
02349 
02350     __asm__ __volatile__ (
02351         "add    r12, %[a], #256\n\t"
02352         "\n1:\n\t"
02353         "adds   %[c], %[c], #-1\n\t"
02354         "ldr    r4, [%[a]], #4\n\t"
02355         "ldr    r5, [%[a]], #4\n\t"
02356         "ldr    r6, [%[a]], #4\n\t"
02357         "ldr    r7, [%[a]], #4\n\t"
02358         "ldr    r8, [%[b]], #4\n\t"
02359         "ldr    r9, [%[b]], #4\n\t"
02360         "ldr    r10, [%[b]], #4\n\t"
02361         "ldr    r14, [%[b]], #4\n\t"
02362         "adcs   r4, r4, r8\n\t"
02363         "adcs   r5, r5, r9\n\t"
02364         "adcs   r6, r6, r10\n\t"
02365         "adcs   r7, r7, r14\n\t"
02366         "str    r4, [%[r]], #4\n\t"
02367         "str    r5, [%[r]], #4\n\t"
02368         "str    r6, [%[r]], #4\n\t"
02369         "str    r7, [%[r]], #4\n\t"
02370         "mov    r4, #0\n\t"
02371         "adc    %[c], r4, #0\n\t"
02372         "cmp    %[a], r12\n\t"
02373         "bne    1b\n\t"
02374         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
02375         :
02376         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
02377     );
02378 
02379     return c;
02380 }
02381 
02382 #endif /* WOLFSSL_SP_SMALL */
02383 #ifdef WOLFSSL_SP_SMALL
02384 /* Sub b from a into a. (a -= b)
02385  *
02386  * a  A single precision integer.
02387  * b  A single precision integer.
02388  */
02389 static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b)
02390 {
02391     sp_digit c = 0;
02392 
02393     __asm__ __volatile__ (
02394         "mov    r14, #0\n\t"
02395         "add    r12, %[a], #256\n\t"
02396         "\n1:\n\t"
02397         "subs   %[c], r14, %[c]\n\t"
02398         "ldr    r3, [%[a]]\n\t"
02399         "ldr    r4, [%[a], #4]\n\t"
02400         "ldr    r5, [%[a], #8]\n\t"
02401         "ldr    r6, [%[a], #12]\n\t"
02402         "ldr    r7, [%[b]], #4\n\t"
02403         "ldr    r8, [%[b]], #4\n\t"
02404         "ldr    r9, [%[b]], #4\n\t"
02405         "ldr    r10, [%[b]], #4\n\t"
02406         "sbcs   r3, r3, r7\n\t"
02407         "sbcs   r4, r4, r8\n\t"
02408         "sbcs   r5, r5, r9\n\t"
02409         "sbcs   r6, r6, r10\n\t"
02410         "str    r3, [%[a]], #4\n\t"
02411         "str    r4, [%[a]], #4\n\t"
02412         "str    r5, [%[a]], #4\n\t"
02413         "str    r6, [%[a]], #4\n\t"
02414         "sbc    %[c], r14, r14\n\t"
02415         "cmp    %[a], r12\n\t"
02416         "bne    1b\n\t"
02417         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
02418         :
02419         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
02420     );
02421 
02422     return c;
02423 }
02424 
02425 #endif /* WOLFSSL_SP_SMALL */
02426 #ifdef WOLFSSL_SP_SMALL
02427 /* Multiply a and b into r. (r = a * b)
02428  *
02429  * r  A single precision integer.
02430  * a  A single precision integer.
02431  * b  A single precision integer.
02432  */
02433 static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b)
02434 {
02435     sp_digit tmp[128];
02436 
02437     __asm__ __volatile__ (
02438         "mov    r5, #0\n\t"
02439         "mov    r6, #0\n\t"
02440         "mov    r7, #0\n\t"
02441         "mov    r8, #0\n\t"
02442         "\n1:\n\t"
02443         "subs   r3, r5, #252\n\t"
02444         "movcc  r3, #0\n\t"
02445         "sub    r4, r5, r3\n\t"
02446         "\n2:\n\t"
02447         "ldr    r14, [%[a], r3]\n\t"
02448         "ldr    r12, [%[b], r4]\n\t"
02449         "umull  r9, r10, r14, r12\n\t"
02450         "adds   r6, r6, r9\n\t"
02451         "adcs   r7, r7, r10\n\t"
02452         "adc    r8, r8, #0\n\t"
02453         "add    r3, r3, #4\n\t"
02454         "sub    r4, r4, #4\n\t"
02455         "cmp    r3, #256\n\t"
02456         "beq    3f\n\t"
02457         "cmp    r3, r5\n\t"
02458         "ble    2b\n\t"
02459         "\n3:\n\t"
02460         "str    r6, [%[r], r5]\n\t"
02461         "mov    r6, r7\n\t"
02462         "mov    r7, r8\n\t"
02463         "mov    r8, #0\n\t"
02464         "add    r5, r5, #4\n\t"
02465         "cmp    r5, #504\n\t"
02466         "ble    1b\n\t"
02467         "str    r6, [%[r], r5]\n\t"
02468         :
02469         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
02470         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
02471     );
02472 
02473     XMEMCPY(r, tmp, sizeof(tmp));
02474 }
02475 
02476 /* Square a and put result in r. (r = a * a)
02477  *
02478  * r  A single precision integer.
02479  * a  A single precision integer.
02480  */
02481 static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
02482 {
02483     sp_digit tmp[128];
02484 
02485     __asm__ __volatile__ (
02486         "mov    r12, #0\n\t"
02487         "mov    r6, #0\n\t"
02488         "mov    r7, #0\n\t"
02489         "mov    r8, #0\n\t"
02490         "mov    r5, #0\n\t"
02491         "\n1:\n\t"
02492         "subs   r3, r5, #252\n\t"
02493         "movcc  r3, r12\n\t"
02494         "sub    r4, r5, r3\n\t"
02495         "\n2:\n\t"
02496         "cmp    r4, r3\n\t"
02497         "beq    4f\n\t"
02498         "ldr    r14, [%[a], r3]\n\t"
02499         "ldr    r9, [%[a], r4]\n\t"
02500         "umull  r9, r10, r14, r9\n\t"
02501         "adds   r6, r6, r9\n\t"
02502         "adcs   r7, r7, r10\n\t"
02503         "adc    r8, r8, r12\n\t"
02504         "adds   r6, r6, r9\n\t"
02505         "adcs   r7, r7, r10\n\t"
02506         "adc    r8, r8, r12\n\t"
02507         "bal    5f\n\t"
02508         "\n4:\n\t"
02509         "ldr    r14, [%[a], r3]\n\t"
02510         "umull  r9, r10, r14, r14\n\t"
02511         "adds   r6, r6, r9\n\t"
02512         "adcs   r7, r7, r10\n\t"
02513         "adc    r8, r8, r12\n\t"
02514         "\n5:\n\t"
02515         "add    r3, r3, #4\n\t"
02516         "sub    r4, r4, #4\n\t"
02517         "cmp    r3, #256\n\t"
02518         "beq    3f\n\t"
02519         "cmp    r3, r4\n\t"
02520         "bgt    3f\n\t"
02521         "cmp    r3, r5\n\t"
02522         "ble    2b\n\t"
02523         "\n3:\n\t"
02524         "str    r6, [%[r], r5]\n\t"
02525         "mov    r6, r7\n\t"
02526         "mov    r7, r8\n\t"
02527         "mov    r8, #0\n\t"
02528         "add    r5, r5, #4\n\t"
02529         "cmp    r5, #504\n\t"
02530         "ble    1b\n\t"
02531         "str    r6, [%[r], r5]\n\t"
02532         :
02533         : [r] "r" (tmp), [a] "r" (a)
02534         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
02535     );
02536 
02537     XMEMCPY(r, tmp, sizeof(tmp));
02538 }
02539 
02540 #endif /* WOLFSSL_SP_SMALL */
02541 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
02542 #ifdef WOLFSSL_SP_SMALL
02543 /* AND m into each word of a and store in r.
02544  *
02545  * r  A single precision integer.
02546  * a  A single precision integer.
02547  * m  Mask to AND against each digit.
02548  */
02549 static void sp_2048_mask_32(sp_digit* r, sp_digit* a, sp_digit m)
02550 {
02551     int i;
02552 
02553     for (i=0; i<32; i++)
02554         r[i] = a[i] & m;
02555 }
02556 
02557 #endif /* WOLFSSL_SP_SMALL */
02558 #ifdef WOLFSSL_SP_SMALL
02559 /* Add b to a into r. (r = a + b)
02560  *
02561  * r  A single precision integer.
02562  * a  A single precision integer.
02563  * b  A single precision integer.
02564  */
02565 static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
02566         const sp_digit* b)
02567 {
02568     sp_digit c = 0;
02569 
02570     __asm__ __volatile__ (
02571         "add    r12, %[a], #128\n\t"
02572         "\n1:\n\t"
02573         "adds   %[c], %[c], #-1\n\t"
02574         "ldr    r4, [%[a]], #4\n\t"
02575         "ldr    r5, [%[a]], #4\n\t"
02576         "ldr    r6, [%[a]], #4\n\t"
02577         "ldr    r7, [%[a]], #4\n\t"
02578         "ldr    r8, [%[b]], #4\n\t"
02579         "ldr    r9, [%[b]], #4\n\t"
02580         "ldr    r10, [%[b]], #4\n\t"
02581         "ldr    r14, [%[b]], #4\n\t"
02582         "adcs   r4, r4, r8\n\t"
02583         "adcs   r5, r5, r9\n\t"
02584         "adcs   r6, r6, r10\n\t"
02585         "adcs   r7, r7, r14\n\t"
02586         "str    r4, [%[r]], #4\n\t"
02587         "str    r5, [%[r]], #4\n\t"
02588         "str    r6, [%[r]], #4\n\t"
02589         "str    r7, [%[r]], #4\n\t"
02590         "mov    r4, #0\n\t"
02591         "adc    %[c], r4, #0\n\t"
02592         "cmp    %[a], r12\n\t"
02593         "bne    1b\n\t"
02594         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
02595         :
02596         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
02597     );
02598 
02599     return c;
02600 }
02601 
02602 #endif /* WOLFSSL_SP_SMALL */
02603 #ifdef WOLFSSL_SP_SMALL
02604 /* Sub b from a into a. (a -= b)
02605  *
02606  * a  A single precision integer.
02607  * b  A single precision integer.
02608  */
02609 static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b)
02610 {
02611     sp_digit c = 0;
02612 
02613     __asm__ __volatile__ (
02614         "mov    r14, #0\n\t"
02615         "add    r12, %[a], #128\n\t"
02616         "\n1:\n\t"
02617         "subs   %[c], r14, %[c]\n\t"
02618         "ldr    r3, [%[a]]\n\t"
02619         "ldr    r4, [%[a], #4]\n\t"
02620         "ldr    r5, [%[a], #8]\n\t"
02621         "ldr    r6, [%[a], #12]\n\t"
02622         "ldr    r7, [%[b]], #4\n\t"
02623         "ldr    r8, [%[b]], #4\n\t"
02624         "ldr    r9, [%[b]], #4\n\t"
02625         "ldr    r10, [%[b]], #4\n\t"
02626         "sbcs   r3, r3, r7\n\t"
02627         "sbcs   r4, r4, r8\n\t"
02628         "sbcs   r5, r5, r9\n\t"
02629         "sbcs   r6, r6, r10\n\t"
02630         "str    r3, [%[a]], #4\n\t"
02631         "str    r4, [%[a]], #4\n\t"
02632         "str    r5, [%[a]], #4\n\t"
02633         "str    r6, [%[a]], #4\n\t"
02634         "sbc    %[c], r14, r14\n\t"
02635         "cmp    %[a], r12\n\t"
02636         "bne    1b\n\t"
02637         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
02638         :
02639         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
02640     );
02641 
02642     return c;
02643 }
02644 
02645 #endif /* WOLFSSL_SP_SMALL */
02646 #ifdef WOLFSSL_SP_SMALL
02647 /* Multiply a and b into r. (r = a * b)
02648  *
02649  * r  A single precision integer.
02650  * a  A single precision integer.
02651  * b  A single precision integer.
02652  */
02653 static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b)
02654 {
02655     sp_digit tmp[64];
02656 
02657     __asm__ __volatile__ (
02658         "mov    r5, #0\n\t"
02659         "mov    r6, #0\n\t"
02660         "mov    r7, #0\n\t"
02661         "mov    r8, #0\n\t"
02662         "\n1:\n\t"
02663         "subs   r3, r5, #124\n\t"
02664         "movcc  r3, #0\n\t"
02665         "sub    r4, r5, r3\n\t"
02666         "\n2:\n\t"
02667         "ldr    r14, [%[a], r3]\n\t"
02668         "ldr    r12, [%[b], r4]\n\t"
02669         "umull  r9, r10, r14, r12\n\t"
02670         "adds   r6, r6, r9\n\t"
02671         "adcs   r7, r7, r10\n\t"
02672         "adc    r8, r8, #0\n\t"
02673         "add    r3, r3, #4\n\t"
02674         "sub    r4, r4, #4\n\t"
02675         "cmp    r3, #128\n\t"
02676         "beq    3f\n\t"
02677         "cmp    r3, r5\n\t"
02678         "ble    2b\n\t"
02679         "\n3:\n\t"
02680         "str    r6, [%[r], r5]\n\t"
02681         "mov    r6, r7\n\t"
02682         "mov    r7, r8\n\t"
02683         "mov    r8, #0\n\t"
02684         "add    r5, r5, #4\n\t"
02685         "cmp    r5, #248\n\t"
02686         "ble    1b\n\t"
02687         "str    r6, [%[r], r5]\n\t"
02688         :
02689         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
02690         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
02691     );
02692 
02693     XMEMCPY(r, tmp, sizeof(tmp));
02694 }
02695 
02696 /* Square a and put result in r. (r = a * a)
02697  *
02698  * r  A single precision integer.
02699  * a  A single precision integer.
02700  */
02701 static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
02702 {
02703     sp_digit tmp[64];
02704 
02705     __asm__ __volatile__ (
02706         "mov    r12, #0\n\t"
02707         "mov    r6, #0\n\t"
02708         "mov    r7, #0\n\t"
02709         "mov    r8, #0\n\t"
02710         "mov    r5, #0\n\t"
02711         "\n1:\n\t"
02712         "subs   r3, r5, #124\n\t"
02713         "movcc  r3, r12\n\t"
02714         "sub    r4, r5, r3\n\t"
02715         "\n2:\n\t"
02716         "cmp    r4, r3\n\t"
02717         "beq    4f\n\t"
02718         "ldr    r14, [%[a], r3]\n\t"
02719         "ldr    r9, [%[a], r4]\n\t"
02720         "umull  r9, r10, r14, r9\n\t"
02721         "adds   r6, r6, r9\n\t"
02722         "adcs   r7, r7, r10\n\t"
02723         "adc    r8, r8, r12\n\t"
02724         "adds   r6, r6, r9\n\t"
02725         "adcs   r7, r7, r10\n\t"
02726         "adc    r8, r8, r12\n\t"
02727         "bal    5f\n\t"
02728         "\n4:\n\t"
02729         "ldr    r14, [%[a], r3]\n\t"
02730         "umull  r9, r10, r14, r14\n\t"
02731         "adds   r6, r6, r9\n\t"
02732         "adcs   r7, r7, r10\n\t"
02733         "adc    r8, r8, r12\n\t"
02734         "\n5:\n\t"
02735         "add    r3, r3, #4\n\t"
02736         "sub    r4, r4, #4\n\t"
02737         "cmp    r3, #128\n\t"
02738         "beq    3f\n\t"
02739         "cmp    r3, r4\n\t"
02740         "bgt    3f\n\t"
02741         "cmp    r3, r5\n\t"
02742         "ble    2b\n\t"
02743         "\n3:\n\t"
02744         "str    r6, [%[r], r5]\n\t"
02745         "mov    r6, r7\n\t"
02746         "mov    r7, r8\n\t"
02747         "mov    r8, #0\n\t"
02748         "add    r5, r5, #4\n\t"
02749         "cmp    r5, #248\n\t"
02750         "ble    1b\n\t"
02751         "str    r6, [%[r], r5]\n\t"
02752         :
02753         : [r] "r" (tmp), [a] "r" (a)
02754         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
02755     );
02756 
02757     XMEMCPY(r, tmp, sizeof(tmp));
02758 }
02759 
02760 #endif /* WOLFSSL_SP_SMALL */
02761 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
02762 
02763 /* Caclulate the bottom digit of -1/a mod 2^n.
02764  *
02765  * a    A single precision number.
02766  * rho  Bottom word of inverse.
02767  */
02768 static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
02769 {
02770     sp_digit x, b;
02771 
02772     b = a[0];
02773     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
02774     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
02775     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02776     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02777 
02778     /* rho = -1/m mod b */
02779     *rho = -x;
02780 }
02781 
02782 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
02783 /* r = 2^n mod m where n is the number of bits to reduce by.
02784  * Given m must be 2048 bits, just need to subtract.
02785  *
02786  * r  A single precision number.
02787  * m  A signle precision number.
02788  */
02789 static void sp_2048_mont_norm_32(sp_digit* r, sp_digit* m)
02790 {
02791     XMEMSET(r, 0, sizeof(sp_digit) * 32);
02792 
02793     /* r = 2^n mod m */
02794     sp_2048_sub_in_place_32(r, m);
02795 }
02796 
02797 /* Conditionally subtract b from a using the mask m.
02798  * m is -1 to subtract and 0 when not copying.
02799  *
02800  * r  A single precision number representing condition subtract result.
02801  * a  A single precision number to subtract from.
02802  * b  A single precision number to subtract.
02803  * m  Mask value to apply.
02804  */
02805 static sp_digit sp_2048_cond_sub_32(sp_digit* r, sp_digit* a, sp_digit* b,
02806         sp_digit m)
02807 {
02808     sp_digit c = 0;
02809 
02810 #ifdef WOLFSSL_SP_SMALL
02811     __asm__ __volatile__ (
02812         "mov    r9, #0\n\t"
02813         "mov    r8, #0\n\t"
02814         "1:\n\t"
02815         "subs   %[c], r9, %[c]\n\t"
02816         "ldr    r4, [%[a], r8]\n\t"
02817         "ldr    r5, [%[b], r8]\n\t"
02818         "and    r5, r5, %[m]\n\t"
02819         "sbcs   r4, r4, r5\n\t"
02820         "sbc    %[c], r9, r9\n\t"
02821         "str    r4, [%[r], r8]\n\t"
02822         "add    r8, r8, #4\n\t"
02823         "cmp    r8, #128\n\t"
02824         "blt    1b\n\t"
02825         : [c] "+r" (c)
02826         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
02827         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
02828     );
02829 #else
02830     __asm__ __volatile__ (
02831 
02832         "mov    r9, #0\n\t"
02833         "ldr    r4, [%[a], #0]\n\t"
02834         "ldr    r6, [%[a], #4]\n\t"
02835         "ldr    r5, [%[b], #0]\n\t"
02836         "ldr    r7, [%[b], #4]\n\t"
02837         "and    r5, r5, %[m]\n\t"
02838         "and    r7, r7, %[m]\n\t"
02839         "subs   r4, r4, r5\n\t"
02840         "sbcs   r6, r6, r7\n\t"
02841         "str    r4, [%[r], #0]\n\t"
02842         "str    r6, [%[r], #4]\n\t"
02843         "ldr    r4, [%[a], #8]\n\t"
02844         "ldr    r6, [%[a], #12]\n\t"
02845         "ldr    r5, [%[b], #8]\n\t"
02846         "ldr    r7, [%[b], #12]\n\t"
02847         "and    r5, r5, %[m]\n\t"
02848         "and    r7, r7, %[m]\n\t"
02849         "sbcs   r4, r4, r5\n\t"
02850         "sbcs   r6, r6, r7\n\t"
02851         "str    r4, [%[r], #8]\n\t"
02852         "str    r6, [%[r], #12]\n\t"
02853         "ldr    r4, [%[a], #16]\n\t"
02854         "ldr    r6, [%[a], #20]\n\t"
02855         "ldr    r5, [%[b], #16]\n\t"
02856         "ldr    r7, [%[b], #20]\n\t"
02857         "and    r5, r5, %[m]\n\t"
02858         "and    r7, r7, %[m]\n\t"
02859         "sbcs   r4, r4, r5\n\t"
02860         "sbcs   r6, r6, r7\n\t"
02861         "str    r4, [%[r], #16]\n\t"
02862         "str    r6, [%[r], #20]\n\t"
02863         "ldr    r4, [%[a], #24]\n\t"
02864         "ldr    r6, [%[a], #28]\n\t"
02865         "ldr    r5, [%[b], #24]\n\t"
02866         "ldr    r7, [%[b], #28]\n\t"
02867         "and    r5, r5, %[m]\n\t"
02868         "and    r7, r7, %[m]\n\t"
02869         "sbcs   r4, r4, r5\n\t"
02870         "sbcs   r6, r6, r7\n\t"
02871         "str    r4, [%[r], #24]\n\t"
02872         "str    r6, [%[r], #28]\n\t"
02873         "ldr    r4, [%[a], #32]\n\t"
02874         "ldr    r6, [%[a], #36]\n\t"
02875         "ldr    r5, [%[b], #32]\n\t"
02876         "ldr    r7, [%[b], #36]\n\t"
02877         "and    r5, r5, %[m]\n\t"
02878         "and    r7, r7, %[m]\n\t"
02879         "sbcs   r4, r4, r5\n\t"
02880         "sbcs   r6, r6, r7\n\t"
02881         "str    r4, [%[r], #32]\n\t"
02882         "str    r6, [%[r], #36]\n\t"
02883         "ldr    r4, [%[a], #40]\n\t"
02884         "ldr    r6, [%[a], #44]\n\t"
02885         "ldr    r5, [%[b], #40]\n\t"
02886         "ldr    r7, [%[b], #44]\n\t"
02887         "and    r5, r5, %[m]\n\t"
02888         "and    r7, r7, %[m]\n\t"
02889         "sbcs   r4, r4, r5\n\t"
02890         "sbcs   r6, r6, r7\n\t"
02891         "str    r4, [%[r], #40]\n\t"
02892         "str    r6, [%[r], #44]\n\t"
02893         "ldr    r4, [%[a], #48]\n\t"
02894         "ldr    r6, [%[a], #52]\n\t"
02895         "ldr    r5, [%[b], #48]\n\t"
02896         "ldr    r7, [%[b], #52]\n\t"
02897         "and    r5, r5, %[m]\n\t"
02898         "and    r7, r7, %[m]\n\t"
02899         "sbcs   r4, r4, r5\n\t"
02900         "sbcs   r6, r6, r7\n\t"
02901         "str    r4, [%[r], #48]\n\t"
02902         "str    r6, [%[r], #52]\n\t"
02903         "ldr    r4, [%[a], #56]\n\t"
02904         "ldr    r6, [%[a], #60]\n\t"
02905         "ldr    r5, [%[b], #56]\n\t"
02906         "ldr    r7, [%[b], #60]\n\t"
02907         "and    r5, r5, %[m]\n\t"
02908         "and    r7, r7, %[m]\n\t"
02909         "sbcs   r4, r4, r5\n\t"
02910         "sbcs   r6, r6, r7\n\t"
02911         "str    r4, [%[r], #56]\n\t"
02912         "str    r6, [%[r], #60]\n\t"
02913         "ldr    r4, [%[a], #64]\n\t"
02914         "ldr    r6, [%[a], #68]\n\t"
02915         "ldr    r5, [%[b], #64]\n\t"
02916         "ldr    r7, [%[b], #68]\n\t"
02917         "and    r5, r5, %[m]\n\t"
02918         "and    r7, r7, %[m]\n\t"
02919         "sbcs   r4, r4, r5\n\t"
02920         "sbcs   r6, r6, r7\n\t"
02921         "str    r4, [%[r], #64]\n\t"
02922         "str    r6, [%[r], #68]\n\t"
02923         "ldr    r4, [%[a], #72]\n\t"
02924         "ldr    r6, [%[a], #76]\n\t"
02925         "ldr    r5, [%[b], #72]\n\t"
02926         "ldr    r7, [%[b], #76]\n\t"
02927         "and    r5, r5, %[m]\n\t"
02928         "and    r7, r7, %[m]\n\t"
02929         "sbcs   r4, r4, r5\n\t"
02930         "sbcs   r6, r6, r7\n\t"
02931         "str    r4, [%[r], #72]\n\t"
02932         "str    r6, [%[r], #76]\n\t"
02933         "ldr    r4, [%[a], #80]\n\t"
02934         "ldr    r6, [%[a], #84]\n\t"
02935         "ldr    r5, [%[b], #80]\n\t"
02936         "ldr    r7, [%[b], #84]\n\t"
02937         "and    r5, r5, %[m]\n\t"
02938         "and    r7, r7, %[m]\n\t"
02939         "sbcs   r4, r4, r5\n\t"
02940         "sbcs   r6, r6, r7\n\t"
02941         "str    r4, [%[r], #80]\n\t"
02942         "str    r6, [%[r], #84]\n\t"
02943         "ldr    r4, [%[a], #88]\n\t"
02944         "ldr    r6, [%[a], #92]\n\t"
02945         "ldr    r5, [%[b], #88]\n\t"
02946         "ldr    r7, [%[b], #92]\n\t"
02947         "and    r5, r5, %[m]\n\t"
02948         "and    r7, r7, %[m]\n\t"
02949         "sbcs   r4, r4, r5\n\t"
02950         "sbcs   r6, r6, r7\n\t"
02951         "str    r4, [%[r], #88]\n\t"
02952         "str    r6, [%[r], #92]\n\t"
02953         "ldr    r4, [%[a], #96]\n\t"
02954         "ldr    r6, [%[a], #100]\n\t"
02955         "ldr    r5, [%[b], #96]\n\t"
02956         "ldr    r7, [%[b], #100]\n\t"
02957         "and    r5, r5, %[m]\n\t"
02958         "and    r7, r7, %[m]\n\t"
02959         "sbcs   r4, r4, r5\n\t"
02960         "sbcs   r6, r6, r7\n\t"
02961         "str    r4, [%[r], #96]\n\t"
02962         "str    r6, [%[r], #100]\n\t"
02963         "ldr    r4, [%[a], #104]\n\t"
02964         "ldr    r6, [%[a], #108]\n\t"
02965         "ldr    r5, [%[b], #104]\n\t"
02966         "ldr    r7, [%[b], #108]\n\t"
02967         "and    r5, r5, %[m]\n\t"
02968         "and    r7, r7, %[m]\n\t"
02969         "sbcs   r4, r4, r5\n\t"
02970         "sbcs   r6, r6, r7\n\t"
02971         "str    r4, [%[r], #104]\n\t"
02972         "str    r6, [%[r], #108]\n\t"
02973         "ldr    r4, [%[a], #112]\n\t"
02974         "ldr    r6, [%[a], #116]\n\t"
02975         "ldr    r5, [%[b], #112]\n\t"
02976         "ldr    r7, [%[b], #116]\n\t"
02977         "and    r5, r5, %[m]\n\t"
02978         "and    r7, r7, %[m]\n\t"
02979         "sbcs   r4, r4, r5\n\t"
02980         "sbcs   r6, r6, r7\n\t"
02981         "str    r4, [%[r], #112]\n\t"
02982         "str    r6, [%[r], #116]\n\t"
02983         "ldr    r4, [%[a], #120]\n\t"
02984         "ldr    r6, [%[a], #124]\n\t"
02985         "ldr    r5, [%[b], #120]\n\t"
02986         "ldr    r7, [%[b], #124]\n\t"
02987         "and    r5, r5, %[m]\n\t"
02988         "and    r7, r7, %[m]\n\t"
02989         "sbcs   r4, r4, r5\n\t"
02990         "sbcs   r6, r6, r7\n\t"
02991         "str    r4, [%[r], #120]\n\t"
02992         "str    r6, [%[r], #124]\n\t"
02993         "sbc    %[c], r9, r9\n\t"
02994         : [c] "+r" (c)
02995         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
02996         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
02997     );
02998 #endif /* WOLFSSL_SP_SMALL */
02999 
03000     return c;
03001 }
03002 
03003 /* Reduce the number back to 2048 bits using Montgomery reduction.
03004  *
03005  * a   A single precision number to reduce in place.
03006  * m   The single precision number representing the modulus.
03007  * mp  The digit representing the negative inverse of m mod 2^n.
03008  */
03009 SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, sp_digit* m,
03010         sp_digit mp)
03011 {
03012     sp_digit ca = 0;
03013 
03014     __asm__ __volatile__ (
03015         "# i = 0\n\t"
03016         "mov    r12, #0\n\t"
03017         "ldr    r10, [%[a], #0]\n\t"
03018         "ldr    r14, [%[a], #4]\n\t"
03019         "\n1:\n\t"
03020         "# mu = a[i] * mp\n\t"
03021         "mul    r8, %[mp], r10\n\t"
03022         "# a[i+0] += m[0] * mu\n\t"
03023         "ldr    r7, [%[m], #0]\n\t"
03024         "ldr    r9, [%[a], #0]\n\t"
03025         "umull  r6, r7, r8, r7\n\t"
03026         "adds   r10, r10, r6\n\t"
03027         "adc    r5, r7, #0\n\t"
03028         "# a[i+1] += m[1] * mu\n\t"
03029         "ldr    r7, [%[m], #4]\n\t"
03030         "ldr    r9, [%[a], #4]\n\t"
03031         "umull  r6, r7, r8, r7\n\t"
03032         "adds   r10, r14, r6\n\t"
03033         "adc    r4, r7, #0\n\t"
03034         "adds   r10, r10, r5\n\t"
03035         "adc    r4, r4, #0\n\t"
03036         "# a[i+2] += m[2] * mu\n\t"
03037         "ldr    r7, [%[m], #8]\n\t"
03038         "ldr    r14, [%[a], #8]\n\t"
03039         "umull  r6, r7, r8, r7\n\t"
03040         "adds   r14, r14, r6\n\t"
03041         "adc    r5, r7, #0\n\t"
03042         "adds   r14, r14, r4\n\t"
03043         "adc    r5, r5, #0\n\t"
03044         "# a[i+3] += m[3] * mu\n\t"
03045         "ldr    r7, [%[m], #12]\n\t"
03046         "ldr    r9, [%[a], #12]\n\t"
03047         "umull  r6, r7, r8, r7\n\t"
03048         "adds   r9, r9, r6\n\t"
03049         "adc    r4, r7, #0\n\t"
03050         "adds   r9, r9, r5\n\t"
03051         "str    r9, [%[a], #12]\n\t"
03052         "adc    r4, r4, #0\n\t"
03053         "# a[i+4] += m[4] * mu\n\t"
03054         "ldr    r7, [%[m], #16]\n\t"
03055         "ldr    r9, [%[a], #16]\n\t"
03056         "umull  r6, r7, r8, r7\n\t"
03057         "adds   r9, r9, r6\n\t"
03058         "adc    r5, r7, #0\n\t"
03059         "adds   r9, r9, r4\n\t"
03060         "str    r9, [%[a], #16]\n\t"
03061         "adc    r5, r5, #0\n\t"
03062         "# a[i+5] += m[5] * mu\n\t"
03063         "ldr    r7, [%[m], #20]\n\t"
03064         "ldr    r9, [%[a], #20]\n\t"
03065         "umull  r6, r7, r8, r7\n\t"
03066         "adds   r9, r9, r6\n\t"
03067         "adc    r4, r7, #0\n\t"
03068         "adds   r9, r9, r5\n\t"
03069         "str    r9, [%[a], #20]\n\t"
03070         "adc    r4, r4, #0\n\t"
03071         "# a[i+6] += m[6] * mu\n\t"
03072         "ldr    r7, [%[m], #24]\n\t"
03073         "ldr    r9, [%[a], #24]\n\t"
03074         "umull  r6, r7, r8, r7\n\t"
03075         "adds   r9, r9, r6\n\t"
03076         "adc    r5, r7, #0\n\t"
03077         "adds   r9, r9, r4\n\t"
03078         "str    r9, [%[a], #24]\n\t"
03079         "adc    r5, r5, #0\n\t"
03080         "# a[i+7] += m[7] * mu\n\t"
03081         "ldr    r7, [%[m], #28]\n\t"
03082         "ldr    r9, [%[a], #28]\n\t"
03083         "umull  r6, r7, r8, r7\n\t"
03084         "adds   r9, r9, r6\n\t"
03085         "adc    r4, r7, #0\n\t"
03086         "adds   r9, r9, r5\n\t"
03087         "str    r9, [%[a], #28]\n\t"
03088         "adc    r4, r4, #0\n\t"
03089         "# a[i+8] += m[8] * mu\n\t"
03090         "ldr    r7, [%[m], #32]\n\t"
03091         "ldr    r9, [%[a], #32]\n\t"
03092         "umull  r6, r7, r8, r7\n\t"
03093         "adds   r9, r9, r6\n\t"
03094         "adc    r5, r7, #0\n\t"
03095         "adds   r9, r9, r4\n\t"
03096         "str    r9, [%[a], #32]\n\t"
03097         "adc    r5, r5, #0\n\t"
03098         "# a[i+9] += m[9] * mu\n\t"
03099         "ldr    r7, [%[m], #36]\n\t"
03100         "ldr    r9, [%[a], #36]\n\t"
03101         "umull  r6, r7, r8, r7\n\t"
03102         "adds   r9, r9, r6\n\t"
03103         "adc    r4, r7, #0\n\t"
03104         "adds   r9, r9, r5\n\t"
03105         "str    r9, [%[a], #36]\n\t"
03106         "adc    r4, r4, #0\n\t"
03107         "# a[i+10] += m[10] * mu\n\t"
03108         "ldr    r7, [%[m], #40]\n\t"
03109         "ldr    r9, [%[a], #40]\n\t"
03110         "umull  r6, r7, r8, r7\n\t"
03111         "adds   r9, r9, r6\n\t"
03112         "adc    r5, r7, #0\n\t"
03113         "adds   r9, r9, r4\n\t"
03114         "str    r9, [%[a], #40]\n\t"
03115         "adc    r5, r5, #0\n\t"
03116         "# a[i+11] += m[11] * mu\n\t"
03117         "ldr    r7, [%[m], #44]\n\t"
03118         "ldr    r9, [%[a], #44]\n\t"
03119         "umull  r6, r7, r8, r7\n\t"
03120         "adds   r9, r9, r6\n\t"
03121         "adc    r4, r7, #0\n\t"
03122         "adds   r9, r9, r5\n\t"
03123         "str    r9, [%[a], #44]\n\t"
03124         "adc    r4, r4, #0\n\t"
03125         "# a[i+12] += m[12] * mu\n\t"
03126         "ldr    r7, [%[m], #48]\n\t"
03127         "ldr    r9, [%[a], #48]\n\t"
03128         "umull  r6, r7, r8, r7\n\t"
03129         "adds   r9, r9, r6\n\t"
03130         "adc    r5, r7, #0\n\t"
03131         "adds   r9, r9, r4\n\t"
03132         "str    r9, [%[a], #48]\n\t"
03133         "adc    r5, r5, #0\n\t"
03134         "# a[i+13] += m[13] * mu\n\t"
03135         "ldr    r7, [%[m], #52]\n\t"
03136         "ldr    r9, [%[a], #52]\n\t"
03137         "umull  r6, r7, r8, r7\n\t"
03138         "adds   r9, r9, r6\n\t"
03139         "adc    r4, r7, #0\n\t"
03140         "adds   r9, r9, r5\n\t"
03141         "str    r9, [%[a], #52]\n\t"
03142         "adc    r4, r4, #0\n\t"
03143         "# a[i+14] += m[14] * mu\n\t"
03144         "ldr    r7, [%[m], #56]\n\t"
03145         "ldr    r9, [%[a], #56]\n\t"
03146         "umull  r6, r7, r8, r7\n\t"
03147         "adds   r9, r9, r6\n\t"
03148         "adc    r5, r7, #0\n\t"
03149         "adds   r9, r9, r4\n\t"
03150         "str    r9, [%[a], #56]\n\t"
03151         "adc    r5, r5, #0\n\t"
03152         "# a[i+15] += m[15] * mu\n\t"
03153         "ldr    r7, [%[m], #60]\n\t"
03154         "ldr    r9, [%[a], #60]\n\t"
03155         "umull  r6, r7, r8, r7\n\t"
03156         "adds   r9, r9, r6\n\t"
03157         "adc    r4, r7, #0\n\t"
03158         "adds   r9, r9, r5\n\t"
03159         "str    r9, [%[a], #60]\n\t"
03160         "adc    r4, r4, #0\n\t"
03161         "# a[i+16] += m[16] * mu\n\t"
03162         "ldr    r7, [%[m], #64]\n\t"
03163         "ldr    r9, [%[a], #64]\n\t"
03164         "umull  r6, r7, r8, r7\n\t"
03165         "adds   r9, r9, r6\n\t"
03166         "adc    r5, r7, #0\n\t"
03167         "adds   r9, r9, r4\n\t"
03168         "str    r9, [%[a], #64]\n\t"
03169         "adc    r5, r5, #0\n\t"
03170         "# a[i+17] += m[17] * mu\n\t"
03171         "ldr    r7, [%[m], #68]\n\t"
03172         "ldr    r9, [%[a], #68]\n\t"
03173         "umull  r6, r7, r8, r7\n\t"
03174         "adds   r9, r9, r6\n\t"
03175         "adc    r4, r7, #0\n\t"
03176         "adds   r9, r9, r5\n\t"
03177         "str    r9, [%[a], #68]\n\t"
03178         "adc    r4, r4, #0\n\t"
03179         "# a[i+18] += m[18] * mu\n\t"
03180         "ldr    r7, [%[m], #72]\n\t"
03181         "ldr    r9, [%[a], #72]\n\t"
03182         "umull  r6, r7, r8, r7\n\t"
03183         "adds   r9, r9, r6\n\t"
03184         "adc    r5, r7, #0\n\t"
03185         "adds   r9, r9, r4\n\t"
03186         "str    r9, [%[a], #72]\n\t"
03187         "adc    r5, r5, #0\n\t"
03188         "# a[i+19] += m[19] * mu\n\t"
03189         "ldr    r7, [%[m], #76]\n\t"
03190         "ldr    r9, [%[a], #76]\n\t"
03191         "umull  r6, r7, r8, r7\n\t"
03192         "adds   r9, r9, r6\n\t"
03193         "adc    r4, r7, #0\n\t"
03194         "adds   r9, r9, r5\n\t"
03195         "str    r9, [%[a], #76]\n\t"
03196         "adc    r4, r4, #0\n\t"
03197         "# a[i+20] += m[20] * mu\n\t"
03198         "ldr    r7, [%[m], #80]\n\t"
03199         "ldr    r9, [%[a], #80]\n\t"
03200         "umull  r6, r7, r8, r7\n\t"
03201         "adds   r9, r9, r6\n\t"
03202         "adc    r5, r7, #0\n\t"
03203         "adds   r9, r9, r4\n\t"
03204         "str    r9, [%[a], #80]\n\t"
03205         "adc    r5, r5, #0\n\t"
03206         "# a[i+21] += m[21] * mu\n\t"
03207         "ldr    r7, [%[m], #84]\n\t"
03208         "ldr    r9, [%[a], #84]\n\t"
03209         "umull  r6, r7, r8, r7\n\t"
03210         "adds   r9, r9, r6\n\t"
03211         "adc    r4, r7, #0\n\t"
03212         "adds   r9, r9, r5\n\t"
03213         "str    r9, [%[a], #84]\n\t"
03214         "adc    r4, r4, #0\n\t"
03215         "# a[i+22] += m[22] * mu\n\t"
03216         "ldr    r7, [%[m], #88]\n\t"
03217         "ldr    r9, [%[a], #88]\n\t"
03218         "umull  r6, r7, r8, r7\n\t"
03219         "adds   r9, r9, r6\n\t"
03220         "adc    r5, r7, #0\n\t"
03221         "adds   r9, r9, r4\n\t"
03222         "str    r9, [%[a], #88]\n\t"
03223         "adc    r5, r5, #0\n\t"
03224         "# a[i+23] += m[23] * mu\n\t"
03225         "ldr    r7, [%[m], #92]\n\t"
03226         "ldr    r9, [%[a], #92]\n\t"
03227         "umull  r6, r7, r8, r7\n\t"
03228         "adds   r9, r9, r6\n\t"
03229         "adc    r4, r7, #0\n\t"
03230         "adds   r9, r9, r5\n\t"
03231         "str    r9, [%[a], #92]\n\t"
03232         "adc    r4, r4, #0\n\t"
03233         "# a[i+24] += m[24] * mu\n\t"
03234         "ldr    r7, [%[m], #96]\n\t"
03235         "ldr    r9, [%[a], #96]\n\t"
03236         "umull  r6, r7, r8, r7\n\t"
03237         "adds   r9, r9, r6\n\t"
03238         "adc    r5, r7, #0\n\t"
03239         "adds   r9, r9, r4\n\t"
03240         "str    r9, [%[a], #96]\n\t"
03241         "adc    r5, r5, #0\n\t"
03242         "# a[i+25] += m[25] * mu\n\t"
03243         "ldr    r7, [%[m], #100]\n\t"
03244         "ldr    r9, [%[a], #100]\n\t"
03245         "umull  r6, r7, r8, r7\n\t"
03246         "adds   r9, r9, r6\n\t"
03247         "adc    r4, r7, #0\n\t"
03248         "adds   r9, r9, r5\n\t"
03249         "str    r9, [%[a], #100]\n\t"
03250         "adc    r4, r4, #0\n\t"
03251         "# a[i+26] += m[26] * mu\n\t"
03252         "ldr    r7, [%[m], #104]\n\t"
03253         "ldr    r9, [%[a], #104]\n\t"
03254         "umull  r6, r7, r8, r7\n\t"
03255         "adds   r9, r9, r6\n\t"
03256         "adc    r5, r7, #0\n\t"
03257         "adds   r9, r9, r4\n\t"
03258         "str    r9, [%[a], #104]\n\t"
03259         "adc    r5, r5, #0\n\t"
03260         "# a[i+27] += m[27] * mu\n\t"
03261         "ldr    r7, [%[m], #108]\n\t"
03262         "ldr    r9, [%[a], #108]\n\t"
03263         "umull  r6, r7, r8, r7\n\t"
03264         "adds   r9, r9, r6\n\t"
03265         "adc    r4, r7, #0\n\t"
03266         "adds   r9, r9, r5\n\t"
03267         "str    r9, [%[a], #108]\n\t"
03268         "adc    r4, r4, #0\n\t"
03269         "# a[i+28] += m[28] * mu\n\t"
03270         "ldr    r7, [%[m], #112]\n\t"
03271         "ldr    r9, [%[a], #112]\n\t"
03272         "umull  r6, r7, r8, r7\n\t"
03273         "adds   r9, r9, r6\n\t"
03274         "adc    r5, r7, #0\n\t"
03275         "adds   r9, r9, r4\n\t"
03276         "str    r9, [%[a], #112]\n\t"
03277         "adc    r5, r5, #0\n\t"
03278         "# a[i+29] += m[29] * mu\n\t"
03279         "ldr    r7, [%[m], #116]\n\t"
03280         "ldr    r9, [%[a], #116]\n\t"
03281         "umull  r6, r7, r8, r7\n\t"
03282         "adds   r9, r9, r6\n\t"
03283         "adc    r4, r7, #0\n\t"
03284         "adds   r9, r9, r5\n\t"
03285         "str    r9, [%[a], #116]\n\t"
03286         "adc    r4, r4, #0\n\t"
03287         "# a[i+30] += m[30] * mu\n\t"
03288         "ldr    r7, [%[m], #120]\n\t"
03289         "ldr    r9, [%[a], #120]\n\t"
03290         "umull  r6, r7, r8, r7\n\t"
03291         "adds   r9, r9, r6\n\t"
03292         "adc    r5, r7, #0\n\t"
03293         "adds   r9, r9, r4\n\t"
03294         "str    r9, [%[a], #120]\n\t"
03295         "adc    r5, r5, #0\n\t"
03296         "# a[i+31] += m[31] * mu\n\t"
03297         "ldr    r7, [%[m], #124]\n\t"
03298         "ldr   r9, [%[a], #124]\n\t"
03299         "umull  r6, r7, r8, r7\n\t"
03300         "adds   r5, r5, r6\n\t"
03301         "adcs   r7, r7, %[ca]\n\t"
03302         "mov    %[ca], #0\n\t"
03303         "adc    %[ca], %[ca], %[ca]\n\t"
03304         "adds   r9, r9, r5\n\t"
03305         "str    r9, [%[a], #124]\n\t"
03306         "ldr    r9, [%[a], #128]\n\t"
03307         "adcs   r9, r9, r7\n\t"
03308         "str    r9, [%[a], #128]\n\t"
03309         "adc    %[ca], %[ca], #0\n\t"
03310         "# i += 1\n\t"
03311         "add    %[a], %[a], #4\n\t"
03312         "add    r12, r12, #4\n\t"
03313         "cmp    r12, #128\n\t"
03314         "blt    1b\n\t"
03315         "str    r10, [%[a], #0]\n\t"
03316         "str    r14, [%[a], #4]\n\t"
03317         : [ca] "+r" (ca), [a] "+r" (a)
03318         : [m] "r" (m), [mp] "r" (mp)
03319         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
03320     );
03321 
03322     sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
03323 }
03324 
03325 /* Multiply two Montogmery form numbers mod the modulus (prime).
03326  * (r = a * b mod m)
03327  *
03328  * r   Result of multiplication.
03329  * a   First number to multiply in Montogmery form.
03330  * b   Second number to multiply in Montogmery form.
03331  * m   Modulus (prime).
03332  * mp  Montogmery mulitplier.
03333  */
03334 static void sp_2048_mont_mul_32(sp_digit* r, sp_digit* a, sp_digit* b,
03335         sp_digit* m, sp_digit mp)
03336 {
03337     sp_2048_mul_32(r, a, b);
03338     sp_2048_mont_reduce_32(r, m, mp);
03339 }
03340 
03341 /* Square the Montgomery form number. (r = a * a mod m)
03342  *
03343  * r   Result of squaring.
03344  * a   Number to square in Montogmery form.
03345  * m   Modulus (prime).
03346  * mp  Montogmery mulitplier.
03347  */
03348 static void sp_2048_mont_sqr_32(sp_digit* r, sp_digit* a, sp_digit* m,
03349         sp_digit mp)
03350 {
03351     sp_2048_sqr_32(r, a);
03352     sp_2048_mont_reduce_32(r, m, mp);
03353 }
03354 
03355 /* Mul a by digit b into r. (r = a * b)
03356  *
03357  * r  A single precision integer.
03358  * a  A single precision integer.
03359  * b  A single precision digit.
03360  */
03361 static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
03362         const sp_digit b)
03363 {
03364 #ifdef WOLFSSL_SP_SMALL
03365     __asm__ __volatile__ (
03366         "mov    r10, #0\n\t"
03367         "# A[0] * B\n\t"
03368         "ldr    r8, [%[a]]\n\t"
03369         "umull  r5, r3, %[b], r8\n\t"
03370         "mov    r4, #0\n\t"
03371         "str    r5, [%[r]]\n\t"
03372         "mov    r5, #0\n\t"
03373         "mov    r9, #4\n\t"
03374         "1:\n\t"
03375         "ldr    r8, [%[a], r9]\n\t"
03376         "umull  r6, r7, %[b], r8\n\t"
03377         "adds   r3, r3, r6\n\t"
03378         "adcs   r4, r4, r7\n\t"
03379         "adc    r5, r10, r10\n\t"
03380         "str    r3, [%[r], r9]\n\t"
03381         "mov    r3, r4\n\t"
03382         "mov    r4, r5\n\t"
03383         "mov    r5, #0\n\t"
03384         "add    r9, r9, #4\n\t"
03385         "cmp    r9, #128\n\t"
03386         "blt    1b\n\t"
03387         "str    r3, [%[r], #128]\n\t"
03388         :
03389         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
03390         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
03391     );
03392 #else
03393     __asm__ __volatile__ (
03394         "mov    r10, #0\n\t"
03395         "# A[0] * B\n\t"
03396         "ldr    r8, [%[a]]\n\t"
03397         "umull  r3, r4, %[b], r8\n\t"
03398         "mov    r5, #0\n\t"
03399         "str    r3, [%[r]]\n\t"
03400         "# A[1] * B\n\t"
03401         "ldr    r8, [%[a], #4]\n\t"
03402         "mov    r3, #0\n\t"
03403         "umull  r6, r7, %[b], r8\n\t"
03404         "adds   r4, r4, r6\n\t"
03405         "adcs   r5, r5, r7\n\t"
03406         "adc    r3, r10, r10\n\t"
03407         "str    r4, [%[r], #4]\n\t"
03408         "# A[2] * B\n\t"
03409         "ldr    r8, [%[a], #8]\n\t"
03410         "mov    r4, #0\n\t"
03411         "umull  r6, r7, %[b], r8\n\t"
03412         "adds   r5, r5, r6\n\t"
03413         "adcs   r3, r3, r7\n\t"
03414         "adc    r4, r10, r10\n\t"
03415         "str    r5, [%[r], #8]\n\t"
03416         "# A[3] * B\n\t"
03417         "ldr    r8, [%[a], #12]\n\t"
03418         "mov    r5, #0\n\t"
03419         "umull  r6, r7, %[b], r8\n\t"
03420         "adds   r3, r3, r6\n\t"
03421         "adcs   r4, r4, r7\n\t"
03422         "adc    r5, r10, r10\n\t"
03423         "str    r3, [%[r], #12]\n\t"
03424         "# A[4] * B\n\t"
03425         "ldr    r8, [%[a], #16]\n\t"
03426         "mov    r3, #0\n\t"
03427         "umull  r6, r7, %[b], r8\n\t"
03428         "adds   r4, r4, r6\n\t"
03429         "adcs   r5, r5, r7\n\t"
03430         "adc    r3, r10, r10\n\t"
03431         "str    r4, [%[r], #16]\n\t"
03432         "# A[5] * B\n\t"
03433         "ldr    r8, [%[a], #20]\n\t"
03434         "mov    r4, #0\n\t"
03435         "umull  r6, r7, %[b], r8\n\t"
03436         "adds   r5, r5, r6\n\t"
03437         "adcs   r3, r3, r7\n\t"
03438         "adc    r4, r10, r10\n\t"
03439         "str    r5, [%[r], #20]\n\t"
03440         "# A[6] * B\n\t"
03441         "ldr    r8, [%[a], #24]\n\t"
03442         "mov    r5, #0\n\t"
03443         "umull  r6, r7, %[b], r8\n\t"
03444         "adds   r3, r3, r6\n\t"
03445         "adcs   r4, r4, r7\n\t"
03446         "adc    r5, r10, r10\n\t"
03447         "str    r3, [%[r], #24]\n\t"
03448         "# A[7] * B\n\t"
03449         "ldr    r8, [%[a], #28]\n\t"
03450         "mov    r3, #0\n\t"
03451         "umull  r6, r7, %[b], r8\n\t"
03452         "adds   r4, r4, r6\n\t"
03453         "adcs   r5, r5, r7\n\t"
03454         "adc    r3, r10, r10\n\t"
03455         "str    r4, [%[r], #28]\n\t"
03456         "# A[8] * B\n\t"
03457         "ldr    r8, [%[a], #32]\n\t"
03458         "mov    r4, #0\n\t"
03459         "umull  r6, r7, %[b], r8\n\t"
03460         "adds   r5, r5, r6\n\t"
03461         "adcs   r3, r3, r7\n\t"
03462         "adc    r4, r10, r10\n\t"
03463         "str    r5, [%[r], #32]\n\t"
03464         "# A[9] * B\n\t"
03465         "ldr    r8, [%[a], #36]\n\t"
03466         "mov    r5, #0\n\t"
03467         "umull  r6, r7, %[b], r8\n\t"
03468         "adds   r3, r3, r6\n\t"
03469         "adcs   r4, r4, r7\n\t"
03470         "adc    r5, r10, r10\n\t"
03471         "str    r3, [%[r], #36]\n\t"
03472         "# A[10] * B\n\t"
03473         "ldr    r8, [%[a], #40]\n\t"
03474         "mov    r3, #0\n\t"
03475         "umull  r6, r7, %[b], r8\n\t"
03476         "adds   r4, r4, r6\n\t"
03477         "adcs   r5, r5, r7\n\t"
03478         "adc    r3, r10, r10\n\t"
03479         "str    r4, [%[r], #40]\n\t"
03480         "# A[11] * B\n\t"
03481         "ldr    r8, [%[a], #44]\n\t"
03482         "mov    r4, #0\n\t"
03483         "umull  r6, r7, %[b], r8\n\t"
03484         "adds   r5, r5, r6\n\t"
03485         "adcs   r3, r3, r7\n\t"
03486         "adc    r4, r10, r10\n\t"
03487         "str    r5, [%[r], #44]\n\t"
03488         "# A[12] * B\n\t"
03489         "ldr    r8, [%[a], #48]\n\t"
03490         "mov    r5, #0\n\t"
03491         "umull  r6, r7, %[b], r8\n\t"
03492         "adds   r3, r3, r6\n\t"
03493         "adcs   r4, r4, r7\n\t"
03494         "adc    r5, r10, r10\n\t"
03495         "str    r3, [%[r], #48]\n\t"
03496         "# A[13] * B\n\t"
03497         "ldr    r8, [%[a], #52]\n\t"
03498         "mov    r3, #0\n\t"
03499         "umull  r6, r7, %[b], r8\n\t"
03500         "adds   r4, r4, r6\n\t"
03501         "adcs   r5, r5, r7\n\t"
03502         "adc    r3, r10, r10\n\t"
03503         "str    r4, [%[r], #52]\n\t"
03504         "# A[14] * B\n\t"
03505         "ldr    r8, [%[a], #56]\n\t"
03506         "mov    r4, #0\n\t"
03507         "umull  r6, r7, %[b], r8\n\t"
03508         "adds   r5, r5, r6\n\t"
03509         "adcs   r3, r3, r7\n\t"
03510         "adc    r4, r10, r10\n\t"
03511         "str    r5, [%[r], #56]\n\t"
03512         "# A[15] * B\n\t"
03513         "ldr    r8, [%[a], #60]\n\t"
03514         "mov    r5, #0\n\t"
03515         "umull  r6, r7, %[b], r8\n\t"
03516         "adds   r3, r3, r6\n\t"
03517         "adcs   r4, r4, r7\n\t"
03518         "adc    r5, r10, r10\n\t"
03519         "str    r3, [%[r], #60]\n\t"
03520         "# A[16] * B\n\t"
03521         "ldr    r8, [%[a], #64]\n\t"
03522         "mov    r3, #0\n\t"
03523         "umull  r6, r7, %[b], r8\n\t"
03524         "adds   r4, r4, r6\n\t"
03525         "adcs   r5, r5, r7\n\t"
03526         "adc    r3, r10, r10\n\t"
03527         "str    r4, [%[r], #64]\n\t"
03528         "# A[17] * B\n\t"
03529         "ldr    r8, [%[a], #68]\n\t"
03530         "mov    r4, #0\n\t"
03531         "umull  r6, r7, %[b], r8\n\t"
03532         "adds   r5, r5, r6\n\t"
03533         "adcs   r3, r3, r7\n\t"
03534         "adc    r4, r10, r10\n\t"
03535         "str    r5, [%[r], #68]\n\t"
03536         "# A[18] * B\n\t"
03537         "ldr    r8, [%[a], #72]\n\t"
03538         "mov    r5, #0\n\t"
03539         "umull  r6, r7, %[b], r8\n\t"
03540         "adds   r3, r3, r6\n\t"
03541         "adcs   r4, r4, r7\n\t"
03542         "adc    r5, r10, r10\n\t"
03543         "str    r3, [%[r], #72]\n\t"
03544         "# A[19] * B\n\t"
03545         "ldr    r8, [%[a], #76]\n\t"
03546         "mov    r3, #0\n\t"
03547         "umull  r6, r7, %[b], r8\n\t"
03548         "adds   r4, r4, r6\n\t"
03549         "adcs   r5, r5, r7\n\t"
03550         "adc    r3, r10, r10\n\t"
03551         "str    r4, [%[r], #76]\n\t"
03552         "# A[20] * B\n\t"
03553         "ldr    r8, [%[a], #80]\n\t"
03554         "mov    r4, #0\n\t"
03555         "umull  r6, r7, %[b], r8\n\t"
03556         "adds   r5, r5, r6\n\t"
03557         "adcs   r3, r3, r7\n\t"
03558         "adc    r4, r10, r10\n\t"
03559         "str    r5, [%[r], #80]\n\t"
03560         "# A[21] * B\n\t"
03561         "ldr    r8, [%[a], #84]\n\t"
03562         "mov    r5, #0\n\t"
03563         "umull  r6, r7, %[b], r8\n\t"
03564         "adds   r3, r3, r6\n\t"
03565         "adcs   r4, r4, r7\n\t"
03566         "adc    r5, r10, r10\n\t"
03567         "str    r3, [%[r], #84]\n\t"
03568         "# A[22] * B\n\t"
03569         "ldr    r8, [%[a], #88]\n\t"
03570         "mov    r3, #0\n\t"
03571         "umull  r6, r7, %[b], r8\n\t"
03572         "adds   r4, r4, r6\n\t"
03573         "adcs   r5, r5, r7\n\t"
03574         "adc    r3, r10, r10\n\t"
03575         "str    r4, [%[r], #88]\n\t"
03576         "# A[23] * B\n\t"
03577         "ldr    r8, [%[a], #92]\n\t"
03578         "mov    r4, #0\n\t"
03579         "umull  r6, r7, %[b], r8\n\t"
03580         "adds   r5, r5, r6\n\t"
03581         "adcs   r3, r3, r7\n\t"
03582         "adc    r4, r10, r10\n\t"
03583         "str    r5, [%[r], #92]\n\t"
03584         "# A[24] * B\n\t"
03585         "ldr    r8, [%[a], #96]\n\t"
03586         "mov    r5, #0\n\t"
03587         "umull  r6, r7, %[b], r8\n\t"
03588         "adds   r3, r3, r6\n\t"
03589         "adcs   r4, r4, r7\n\t"
03590         "adc    r5, r10, r10\n\t"
03591         "str    r3, [%[r], #96]\n\t"
03592         "# A[25] * B\n\t"
03593         "ldr    r8, [%[a], #100]\n\t"
03594         "mov    r3, #0\n\t"
03595         "umull  r6, r7, %[b], r8\n\t"
03596         "adds   r4, r4, r6\n\t"
03597         "adcs   r5, r5, r7\n\t"
03598         "adc    r3, r10, r10\n\t"
03599         "str    r4, [%[r], #100]\n\t"
03600         "# A[26] * B\n\t"
03601         "ldr    r8, [%[a], #104]\n\t"
03602         "mov    r4, #0\n\t"
03603         "umull  r6, r7, %[b], r8\n\t"
03604         "adds   r5, r5, r6\n\t"
03605         "adcs   r3, r3, r7\n\t"
03606         "adc    r4, r10, r10\n\t"
03607         "str    r5, [%[r], #104]\n\t"
03608         "# A[27] * B\n\t"
03609         "ldr    r8, [%[a], #108]\n\t"
03610         "mov    r5, #0\n\t"
03611         "umull  r6, r7, %[b], r8\n\t"
03612         "adds   r3, r3, r6\n\t"
03613         "adcs   r4, r4, r7\n\t"
03614         "adc    r5, r10, r10\n\t"
03615         "str    r3, [%[r], #108]\n\t"
03616         "# A[28] * B\n\t"
03617         "ldr    r8, [%[a], #112]\n\t"
03618         "mov    r3, #0\n\t"
03619         "umull  r6, r7, %[b], r8\n\t"
03620         "adds   r4, r4, r6\n\t"
03621         "adcs   r5, r5, r7\n\t"
03622         "adc    r3, r10, r10\n\t"
03623         "str    r4, [%[r], #112]\n\t"
03624         "# A[29] * B\n\t"
03625         "ldr    r8, [%[a], #116]\n\t"
03626         "mov    r4, #0\n\t"
03627         "umull  r6, r7, %[b], r8\n\t"
03628         "adds   r5, r5, r6\n\t"
03629         "adcs   r3, r3, r7\n\t"
03630         "adc    r4, r10, r10\n\t"
03631         "str    r5, [%[r], #116]\n\t"
03632         "# A[30] * B\n\t"
03633         "ldr    r8, [%[a], #120]\n\t"
03634         "mov    r5, #0\n\t"
03635         "umull  r6, r7, %[b], r8\n\t"
03636         "adds   r3, r3, r6\n\t"
03637         "adcs   r4, r4, r7\n\t"
03638         "adc    r5, r10, r10\n\t"
03639         "str    r3, [%[r], #120]\n\t"
03640         "# A[31] * B\n\t"
03641         "ldr    r8, [%[a], #124]\n\t"
03642         "umull  r6, r7, %[b], r8\n\t"
03643         "adds   r4, r4, r6\n\t"
03644         "adc    r5, r5, r7\n\t"
03645         "str    r4, [%[r], #124]\n\t"
03646         "str    r5, [%[r], #128]\n\t"
03647         :
03648         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
03649         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
03650     );
03651 #endif
03652 }
03653 
03654 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
03655  *
03656  * d1   The high order half of the number to divide.
03657  * d0   The low order half of the number to divide.
03658  * div  The dividend.
03659  * returns the result of the division.
03660  *
03661  * Note that this is an approximate div. It may give an answer 1 larger.
03662  */
03663 static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div)
03664 {
03665     sp_digit r = 0;
03666 
03667     __asm__ __volatile__ (
03668         "lsr    r5, %[div], #1\n\t"
03669         "add    r5, r5, #1\n\t"
03670         "mov    r6, %[d0]\n\t"
03671         "mov    r7, %[d1]\n\t"
03672         "# Do top 32\n\t"
03673         "subs   r8, r5, r7\n\t"
03674         "sbc    r8, r8, r8\n\t"
03675         "add    %[r], %[r], %[r]\n\t"
03676         "sub    %[r], %[r], r8\n\t"
03677         "and    r8, r8, r5\n\t"
03678         "subs   r7, r7, r8\n\t"
03679         "# Next 30 bits\n\t"
03680         "mov    r4, #29\n\t"
03681         "1:\n\t"
03682         "movs   r6, r6, lsl #1\n\t"
03683         "adc    r7, r7, r7\n\t"
03684         "subs   r8, r5, r7\n\t"
03685         "sbc    r8, r8, r8\n\t"
03686         "add    %[r], %[r], %[r]\n\t"
03687         "sub    %[r], %[r], r8\n\t"
03688         "and    r8, r8, r5\n\t"
03689         "subs   r7, r7, r8\n\t"
03690         "subs   r4, r4, #1\n\t"
03691         "bpl    1b\n\t"
03692         "add    %[r], %[r], %[r]\n\t"
03693         "add    %[r], %[r], #1\n\t"
03694         "umull  r4, r5, %[r], %[div]\n\t"
03695         "subs   r4, %[d0], r4\n\t"
03696         "sbc    r5, %[d1], r5\n\t"
03697         "add    %[r], %[r], r5\n\t"
03698         "umull  r4, r5, %[r], %[div]\n\t"
03699         "subs   r4, %[d0], r4\n\t"
03700         "sbc    r5, %[d1], r5\n\t"
03701         "add    %[r], %[r], r5\n\t"
03702         "subs   r8, %[div], r4\n\t"
03703         "sbc    r8, r8, r8\n\t"
03704         "sub    %[r], %[r], r8\n\t"
03705         : [r] "+r" (r)
03706         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
03707         : "r4", "r5", "r6", "r7", "r8"
03708     );
03709     return r;
03710 }
03711 
03712 /* Compare a with b in constant time.
03713  *
03714  * a  A single precision integer.
03715  * b  A single precision integer.
03716  * return -ve, 0 or +ve if a is less than, equal to or greater than b
03717  * respectively.
03718  */
03719 static int32_t sp_2048_cmp_32(sp_digit* a, sp_digit* b)
03720 {
03721     sp_digit r = -1;
03722     sp_digit one = 1;
03723 
03724 #ifdef WOLFSSL_SP_SMALL
03725     __asm__ __volatile__ (
03726         "mov    r7, #0\n\t"
03727         "mov    r3, #-1\n\t"
03728         "mov    r6, #124\n\t"
03729         "1:\n\t"
03730         "ldr    r4, [%[a], r6]\n\t"
03731         "ldr    r5, [%[b], r6]\n\t"
03732         "and    r4, r4, r3\n\t"
03733         "and    r5, r5, r3\n\t"
03734         "subs   r4, r4, r5\n\t"
03735         "movhi  %[r], %[one]\n\t"
03736         "movlo  %[r], r3\n\t"
03737         "movne  r3, r7\n\t"
03738         "sub    r6, r6, #4\n\t"
03739         "bcc    1b\n\t"
03740         "eor    %[r], %[r], r3\n\t"
03741         : [r] "+r" (r)
03742         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
03743         : "r2", "r3", "r4", "r5", "r6", "r7"
03744     );
03745 #else
03746     __asm__ __volatile__ (
03747         "mov    r7, #0\n\t"
03748         "mov    r3, #-1\n\t"
03749         "ldr        r4, [%[a], #124]\n\t"
03750         "ldr        r5, [%[b], #124]\n\t"
03751         "and        r4, r4, r3\n\t"
03752         "and        r5, r5, r3\n\t"
03753         "subs   r4, r4, r5\n\t"
03754         "movhi  %[r], %[one]\n\t"
03755         "movlo  %[r], r3\n\t"
03756         "movne  r3, r7\n\t"
03757         "ldr        r4, [%[a], #120]\n\t"
03758         "ldr        r5, [%[b], #120]\n\t"
03759         "and        r4, r4, r3\n\t"
03760         "and        r5, r5, r3\n\t"
03761         "subs   r4, r4, r5\n\t"
03762         "movhi  %[r], %[one]\n\t"
03763         "movlo  %[r], r3\n\t"
03764         "movne  r3, r7\n\t"
03765         "ldr        r4, [%[a], #116]\n\t"
03766         "ldr        r5, [%[b], #116]\n\t"
03767         "and        r4, r4, r3\n\t"
03768         "and        r5, r5, r3\n\t"
03769         "subs   r4, r4, r5\n\t"
03770         "movhi  %[r], %[one]\n\t"
03771         "movlo  %[r], r3\n\t"
03772         "movne  r3, r7\n\t"
03773         "ldr        r4, [%[a], #112]\n\t"
03774         "ldr        r5, [%[b], #112]\n\t"
03775         "and        r4, r4, r3\n\t"
03776         "and        r5, r5, r3\n\t"
03777         "subs   r4, r4, r5\n\t"
03778         "movhi  %[r], %[one]\n\t"
03779         "movlo  %[r], r3\n\t"
03780         "movne  r3, r7\n\t"
03781         "ldr        r4, [%[a], #108]\n\t"
03782         "ldr        r5, [%[b], #108]\n\t"
03783         "and        r4, r4, r3\n\t"
03784         "and        r5, r5, r3\n\t"
03785         "subs   r4, r4, r5\n\t"
03786         "movhi  %[r], %[one]\n\t"
03787         "movlo  %[r], r3\n\t"
03788         "movne  r3, r7\n\t"
03789         "ldr        r4, [%[a], #104]\n\t"
03790         "ldr        r5, [%[b], #104]\n\t"
03791         "and        r4, r4, r3\n\t"
03792         "and        r5, r5, r3\n\t"
03793         "subs   r4, r4, r5\n\t"
03794         "movhi  %[r], %[one]\n\t"
03795         "movlo  %[r], r3\n\t"
03796         "movne  r3, r7\n\t"
03797         "ldr        r4, [%[a], #100]\n\t"
03798         "ldr        r5, [%[b], #100]\n\t"
03799         "and        r4, r4, r3\n\t"
03800         "and        r5, r5, r3\n\t"
03801         "subs   r4, r4, r5\n\t"
03802         "movhi  %[r], %[one]\n\t"
03803         "movlo  %[r], r3\n\t"
03804         "movne  r3, r7\n\t"
03805         "ldr        r4, [%[a], #96]\n\t"
03806         "ldr        r5, [%[b], #96]\n\t"
03807         "and        r4, r4, r3\n\t"
03808         "and        r5, r5, r3\n\t"
03809         "subs   r4, r4, r5\n\t"
03810         "movhi  %[r], %[one]\n\t"
03811         "movlo  %[r], r3\n\t"
03812         "movne  r3, r7\n\t"
03813         "ldr        r4, [%[a], #92]\n\t"
03814         "ldr        r5, [%[b], #92]\n\t"
03815         "and        r4, r4, r3\n\t"
03816         "and        r5, r5, r3\n\t"
03817         "subs   r4, r4, r5\n\t"
03818         "movhi  %[r], %[one]\n\t"
03819         "movlo  %[r], r3\n\t"
03820         "movne  r3, r7\n\t"
03821         "ldr        r4, [%[a], #88]\n\t"
03822         "ldr        r5, [%[b], #88]\n\t"
03823         "and        r4, r4, r3\n\t"
03824         "and        r5, r5, r3\n\t"
03825         "subs   r4, r4, r5\n\t"
03826         "movhi  %[r], %[one]\n\t"
03827         "movlo  %[r], r3\n\t"
03828         "movne  r3, r7\n\t"
03829         "ldr        r4, [%[a], #84]\n\t"
03830         "ldr        r5, [%[b], #84]\n\t"
03831         "and        r4, r4, r3\n\t"
03832         "and        r5, r5, r3\n\t"
03833         "subs   r4, r4, r5\n\t"
03834         "movhi  %[r], %[one]\n\t"
03835         "movlo  %[r], r3\n\t"
03836         "movne  r3, r7\n\t"
03837         "ldr        r4, [%[a], #80]\n\t"
03838         "ldr        r5, [%[b], #80]\n\t"
03839         "and        r4, r4, r3\n\t"
03840         "and        r5, r5, r3\n\t"
03841         "subs   r4, r4, r5\n\t"
03842         "movhi  %[r], %[one]\n\t"
03843         "movlo  %[r], r3\n\t"
03844         "movne  r3, r7\n\t"
03845         "ldr        r4, [%[a], #76]\n\t"
03846         "ldr        r5, [%[b], #76]\n\t"
03847         "and        r4, r4, r3\n\t"
03848         "and        r5, r5, r3\n\t"
03849         "subs   r4, r4, r5\n\t"
03850         "movhi  %[r], %[one]\n\t"
03851         "movlo  %[r], r3\n\t"
03852         "movne  r3, r7\n\t"
03853         "ldr        r4, [%[a], #72]\n\t"
03854         "ldr        r5, [%[b], #72]\n\t"
03855         "and        r4, r4, r3\n\t"
03856         "and        r5, r5, r3\n\t"
03857         "subs   r4, r4, r5\n\t"
03858         "movhi  %[r], %[one]\n\t"
03859         "movlo  %[r], r3\n\t"
03860         "movne  r3, r7\n\t"
03861         "ldr        r4, [%[a], #68]\n\t"
03862         "ldr        r5, [%[b], #68]\n\t"
03863         "and        r4, r4, r3\n\t"
03864         "and        r5, r5, r3\n\t"
03865         "subs   r4, r4, r5\n\t"
03866         "movhi  %[r], %[one]\n\t"
03867         "movlo  %[r], r3\n\t"
03868         "movne  r3, r7\n\t"
03869         "ldr        r4, [%[a], #64]\n\t"
03870         "ldr        r5, [%[b], #64]\n\t"
03871         "and        r4, r4, r3\n\t"
03872         "and        r5, r5, r3\n\t"
03873         "subs   r4, r4, r5\n\t"
03874         "movhi  %[r], %[one]\n\t"
03875         "movlo  %[r], r3\n\t"
03876         "movne  r3, r7\n\t"
03877         "ldr        r4, [%[a], #60]\n\t"
03878         "ldr        r5, [%[b], #60]\n\t"
03879         "and        r4, r4, r3\n\t"
03880         "and        r5, r5, r3\n\t"
03881         "subs   r4, r4, r5\n\t"
03882         "movhi  %[r], %[one]\n\t"
03883         "movlo  %[r], r3\n\t"
03884         "movne  r3, r7\n\t"
03885         "ldr        r4, [%[a], #56]\n\t"
03886         "ldr        r5, [%[b], #56]\n\t"
03887         "and        r4, r4, r3\n\t"
03888         "and        r5, r5, r3\n\t"
03889         "subs   r4, r4, r5\n\t"
03890         "movhi  %[r], %[one]\n\t"
03891         "movlo  %[r], r3\n\t"
03892         "movne  r3, r7\n\t"
03893         "ldr        r4, [%[a], #52]\n\t"
03894         "ldr        r5, [%[b], #52]\n\t"
03895         "and        r4, r4, r3\n\t"
03896         "and        r5, r5, r3\n\t"
03897         "subs   r4, r4, r5\n\t"
03898         "movhi  %[r], %[one]\n\t"
03899         "movlo  %[r], r3\n\t"
03900         "movne  r3, r7\n\t"
03901         "ldr        r4, [%[a], #48]\n\t"
03902         "ldr        r5, [%[b], #48]\n\t"
03903         "and        r4, r4, r3\n\t"
03904         "and        r5, r5, r3\n\t"
03905         "subs   r4, r4, r5\n\t"
03906         "movhi  %[r], %[one]\n\t"
03907         "movlo  %[r], r3\n\t"
03908         "movne  r3, r7\n\t"
03909         "ldr        r4, [%[a], #44]\n\t"
03910         "ldr        r5, [%[b], #44]\n\t"
03911         "and        r4, r4, r3\n\t"
03912         "and        r5, r5, r3\n\t"
03913         "subs   r4, r4, r5\n\t"
03914         "movhi  %[r], %[one]\n\t"
03915         "movlo  %[r], r3\n\t"
03916         "movne  r3, r7\n\t"
03917         "ldr        r4, [%[a], #40]\n\t"
03918         "ldr        r5, [%[b], #40]\n\t"
03919         "and        r4, r4, r3\n\t"
03920         "and        r5, r5, r3\n\t"
03921         "subs   r4, r4, r5\n\t"
03922         "movhi  %[r], %[one]\n\t"
03923         "movlo  %[r], r3\n\t"
03924         "movne  r3, r7\n\t"
03925         "ldr        r4, [%[a], #36]\n\t"
03926         "ldr        r5, [%[b], #36]\n\t"
03927         "and        r4, r4, r3\n\t"
03928         "and        r5, r5, r3\n\t"
03929         "subs   r4, r4, r5\n\t"
03930         "movhi  %[r], %[one]\n\t"
03931         "movlo  %[r], r3\n\t"
03932         "movne  r3, r7\n\t"
03933         "ldr        r4, [%[a], #32]\n\t"
03934         "ldr        r5, [%[b], #32]\n\t"
03935         "and        r4, r4, r3\n\t"
03936         "and        r5, r5, r3\n\t"
03937         "subs   r4, r4, r5\n\t"
03938         "movhi  %[r], %[one]\n\t"
03939         "movlo  %[r], r3\n\t"
03940         "movne  r3, r7\n\t"
03941         "ldr        r4, [%[a], #28]\n\t"
03942         "ldr        r5, [%[b], #28]\n\t"
03943         "and        r4, r4, r3\n\t"
03944         "and        r5, r5, r3\n\t"
03945         "subs   r4, r4, r5\n\t"
03946         "movhi  %[r], %[one]\n\t"
03947         "movlo  %[r], r3\n\t"
03948         "movne  r3, r7\n\t"
03949         "ldr        r4, [%[a], #24]\n\t"
03950         "ldr        r5, [%[b], #24]\n\t"
03951         "and        r4, r4, r3\n\t"
03952         "and        r5, r5, r3\n\t"
03953         "subs   r4, r4, r5\n\t"
03954         "movhi  %[r], %[one]\n\t"
03955         "movlo  %[r], r3\n\t"
03956         "movne  r3, r7\n\t"
03957         "ldr        r4, [%[a], #20]\n\t"
03958         "ldr        r5, [%[b], #20]\n\t"
03959         "and        r4, r4, r3\n\t"
03960         "and        r5, r5, r3\n\t"
03961         "subs   r4, r4, r5\n\t"
03962         "movhi  %[r], %[one]\n\t"
03963         "movlo  %[r], r3\n\t"
03964         "movne  r3, r7\n\t"
03965         "ldr        r4, [%[a], #16]\n\t"
03966         "ldr        r5, [%[b], #16]\n\t"
03967         "and        r4, r4, r3\n\t"
03968         "and        r5, r5, r3\n\t"
03969         "subs   r4, r4, r5\n\t"
03970         "movhi  %[r], %[one]\n\t"
03971         "movlo  %[r], r3\n\t"
03972         "movne  r3, r7\n\t"
03973         "ldr        r4, [%[a], #12]\n\t"
03974         "ldr        r5, [%[b], #12]\n\t"
03975         "and        r4, r4, r3\n\t"
03976         "and        r5, r5, r3\n\t"
03977         "subs   r4, r4, r5\n\t"
03978         "movhi  %[r], %[one]\n\t"
03979         "movlo  %[r], r3\n\t"
03980         "movne  r3, r7\n\t"
03981         "ldr        r4, [%[a], #8]\n\t"
03982         "ldr        r5, [%[b], #8]\n\t"
03983         "and        r4, r4, r3\n\t"
03984         "and        r5, r5, r3\n\t"
03985         "subs   r4, r4, r5\n\t"
03986         "movhi  %[r], %[one]\n\t"
03987         "movlo  %[r], r3\n\t"
03988         "movne  r3, r7\n\t"
03989         "ldr        r4, [%[a], #4]\n\t"
03990         "ldr        r5, [%[b], #4]\n\t"
03991         "and        r4, r4, r3\n\t"
03992         "and        r5, r5, r3\n\t"
03993         "subs   r4, r4, r5\n\t"
03994         "movhi  %[r], %[one]\n\t"
03995         "movlo  %[r], r3\n\t"
03996         "movne  r3, r7\n\t"
03997         "ldr        r4, [%[a], #0]\n\t"
03998         "ldr        r5, [%[b], #0]\n\t"
03999         "and        r4, r4, r3\n\t"
04000         "and        r5, r5, r3\n\t"
04001         "subs   r4, r4, r5\n\t"
04002         "movhi  %[r], %[one]\n\t"
04003         "movlo  %[r], r3\n\t"
04004         "movne  r3, r7\n\t"
04005         "eor    %[r], %[r], r3\n\t"
04006         : [r] "+r" (r)
04007         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
04008         : "r2", "r3", "r4", "r5", "r6", "r7"
04009     );
04010 #endif
04011 
04012     return r;
04013 }
04014 
04015 /* Divide d in a and put remainder into r (m*d + r = a)
04016  * m is not calculated as it is not needed at this time.
04017  *
04018  * a  Nmber to be divided.
04019  * d  Number to divide with.
04020  * m  Multiplier result.
04021  * r  Remainder from the division.
04022  * returns MP_OKAY indicating success.
04023  */
04024 static WC_INLINE int sp_2048_div_32(sp_digit* a, sp_digit* d, sp_digit* m,
04025         sp_digit* r)
04026 {
04027     sp_digit t1[64], t2[33];
04028     sp_digit div, r1;
04029     int i;
04030 
04031     (void)m;
04032 
04033     div = d[31];
04034     XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
04035     for (i=31; i>=0; i--) {
04036         r1 = div_2048_word_32(t1[32 + i], t1[32 + i - 1], div);
04037 
04038         sp_2048_mul_d_32(t2, d, r1);
04039         t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
04040         t1[32 + i] -= t2[32];
04041         sp_2048_mask_32(t2, d, t1[32 + i]);
04042         t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
04043         sp_2048_mask_32(t2, d, t1[32 + i]);
04044         t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
04045     }
04046 
04047     r1 = sp_2048_cmp_32(t1, d) >= 0;
04048     sp_2048_cond_sub_32(r, t1, t2, (sp_digit)0 - r1);
04049 
04050     return MP_OKAY;
04051 }
04052 
04053 /* Reduce a modulo m into r. (r = a mod m)
04054  *
04055  * r  A single precision number that is the reduced result.
04056  * a  A single precision number that is to be reduced.
04057  * m  A single precision number that is the modulus to reduce with.
04058  * returns MP_OKAY indicating success.
04059  */
04060 static WC_INLINE int sp_2048_mod_32(sp_digit* r, sp_digit* a, sp_digit* m)
04061 {
04062     return sp_2048_div_32(a, m, NULL, r);
04063 }
04064 
04065 #ifdef WOLFSSL_SP_SMALL
04066 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
04067  *
04068  * r     A single precision number that is the result of the operation.
04069  * a     A single precision number being exponentiated.
04070  * e     A single precision number that is the exponent.
04071  * bits  The number of bits in the exponent.
04072  * m     A single precision number that is the modulus.
04073  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
04074  */
04075 static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
04076         int bits, sp_digit* m, int reduceA)
04077 {
04078 #ifndef WOLFSSL_SMALL_STACK
04079     sp_digit t[16][64];
04080 #else
04081     sp_digit* t[16];
04082     sp_digit* td;
04083 #endif
04084     sp_digit* norm;
04085     sp_digit mp = 1;
04086     sp_digit n;
04087     sp_digit mask;
04088     int i;
04089     int c, y;
04090     int err = MP_OKAY;
04091 
04092 #ifdef WOLFSSL_SMALL_STACK
04093     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 64, NULL,
04094                             DYNAMIC_TYPE_TMP_BUFFER);
04095     if (td == NULL)
04096         err = MEMORY_E;
04097 
04098     if (err == MP_OKAY) {
04099         for (i=0; i<16; i++)
04100             t[i] = td + i * 64;
04101         norm = t[0];
04102     }
04103 #else
04104     norm = t[0];
04105 #endif
04106 
04107     if (err == MP_OKAY) {
04108         sp_2048_mont_setup(m, &mp);
04109         sp_2048_mont_norm_32(norm, m);
04110 
04111         XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
04112         if (reduceA) {
04113             err = sp_2048_mod_32(t[1] + 32, a, m);
04114             if (err == MP_OKAY)
04115                 err = sp_2048_mod_32(t[1], t[1], m);
04116         }
04117         else {
04118             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
04119             err = sp_2048_mod_32(t[1], t[1], m);
04120         }
04121     }
04122 
04123     if (err == MP_OKAY) {
04124         sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
04125         sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
04126         sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
04127         sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
04128         sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
04129         sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
04130         sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
04131         sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
04132         sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
04133         sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
04134         sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
04135         sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
04136         sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
04137         sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
04138 
04139         i = (bits - 1) / 32;
04140         n = e[i--];
04141         y = n >> 28;
04142         n <<= 4;
04143         c = 28;
04144         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
04145         for (; i>=0 || c>=4; ) {
04146             if (c == 0) {
04147                 n = e[i--];
04148                 y = n >> 28;
04149                 n <<= 4;
04150                 c = 28;
04151             }
04152             else if (c < 4) {
04153                 y = n >> 28;
04154                 n = e[i--];
04155                 c = 4 - c;
04156                 y |= n >> (32 - c);
04157                 n <<= c;
04158                 c = 32 - c;
04159             }
04160             else {
04161                 y = (n >> 28) & 0xf;
04162                 n <<= 4;
04163                 c -= 4;
04164             }
04165 
04166             sp_2048_mont_sqr_32(r, r, m, mp);
04167             sp_2048_mont_sqr_32(r, r, m, mp);
04168             sp_2048_mont_sqr_32(r, r, m, mp);
04169             sp_2048_mont_sqr_32(r, r, m, mp);
04170 
04171             sp_2048_mont_mul_32(r, r, t[y], m, mp);
04172         }
04173 
04174         XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
04175         sp_2048_mont_reduce_32(r, m, mp);
04176 
04177         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
04178         sp_2048_cond_sub_32(r, r, m, mask);
04179     }
04180 
04181 #ifdef WOLFSSL_SMALL_STACK
04182     if (td != NULL)
04183         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04184 #endif
04185 
04186     return err;
04187 }
04188 #else
04189 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
04190  *
04191  * r     A single precision number that is the result of the operation.
04192  * a     A single precision number being exponentiated.
04193  * e     A single precision number that is the exponent.
04194  * bits  The number of bits in the exponent.
04195  * m     A single precision number that is the modulus.
04196  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
04197  */
04198 static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
04199         int bits, sp_digit* m, int reduceA)
04200 {
04201 #ifndef WOLFSSL_SMALL_STACK
04202     sp_digit t[32][64];
04203 #else
04204     sp_digit* t[32];
04205     sp_digit* td;
04206 #endif
04207     sp_digit* norm;
04208     sp_digit mp = 1;
04209     sp_digit n;
04210     sp_digit mask;
04211     int i;
04212     int c, y;
04213     int err = MP_OKAY;
04214 
04215 #ifdef WOLFSSL_SMALL_STACK
04216     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 64, NULL,
04217                             DYNAMIC_TYPE_TMP_BUFFER);
04218     if (td == NULL)
04219         err = MEMORY_E;
04220 
04221     if (err == MP_OKAY) {
04222         for (i=0; i<32; i++)
04223             t[i] = td + i * 64;
04224         norm = t[0];
04225     }
04226 #else
04227     norm = t[0];
04228 #endif
04229 
04230     if (err == MP_OKAY) {
04231         sp_2048_mont_setup(m, &mp);
04232         sp_2048_mont_norm_32(norm, m);
04233 
04234         XMEMSET(t[1], 0, sizeof(sp_digit) * 32);
04235         if (reduceA) {
04236             err = sp_2048_mod_32(t[1] + 32, a, m);
04237             if (err == MP_OKAY)
04238                 err = sp_2048_mod_32(t[1], t[1], m);
04239         }
04240         else {
04241             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
04242             err = sp_2048_mod_32(t[1], t[1], m);
04243         }
04244     }
04245 
04246     if (err == MP_OKAY) {
04247         sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
04248         sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
04249         sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
04250         sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
04251         sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
04252         sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
04253         sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
04254         sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
04255         sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
04256         sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
04257         sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
04258         sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
04259         sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
04260         sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
04261         sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
04262         sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
04263         sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
04264         sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
04265         sp_2048_mont_sqr_32(t[20], t[10], m, mp);
04266         sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
04267         sp_2048_mont_sqr_32(t[22], t[11], m, mp);
04268         sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
04269         sp_2048_mont_sqr_32(t[24], t[12], m, mp);
04270         sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
04271         sp_2048_mont_sqr_32(t[26], t[13], m, mp);
04272         sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
04273         sp_2048_mont_sqr_32(t[28], t[14], m, mp);
04274         sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
04275         sp_2048_mont_sqr_32(t[30], t[15], m, mp);
04276         sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
04277 
04278         i = (bits - 1) / 32;
04279         n = e[i--];
04280         y = n >> 27;
04281         n <<= 5;
04282         c = 27;
04283         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
04284         for (; i>=0 || c>=5; ) {
04285             if (c == 0) {
04286                 n = e[i--];
04287                 y = n >> 27;
04288                 n <<= 5;
04289                 c = 27;
04290             }
04291             else if (c < 5) {
04292                 y = n >> 27;
04293                 n = e[i--];
04294                 c = 5 - c;
04295                 y |= n >> (32 - c);
04296                 n <<= c;
04297                 c = 32 - c;
04298             }
04299             else {
04300                 y = (n >> 27) & 0x1f;
04301                 n <<= 5;
04302                 c -= 5;
04303             }
04304 
04305             sp_2048_mont_sqr_32(r, r, m, mp);
04306             sp_2048_mont_sqr_32(r, r, m, mp);
04307             sp_2048_mont_sqr_32(r, r, m, mp);
04308             sp_2048_mont_sqr_32(r, r, m, mp);
04309             sp_2048_mont_sqr_32(r, r, m, mp);
04310 
04311             sp_2048_mont_mul_32(r, r, t[y], m, mp);
04312         }
04313         y = e[0] & 0xf;
04314         sp_2048_mont_sqr_32(r, r, m, mp);
04315         sp_2048_mont_sqr_32(r, r, m, mp);
04316         sp_2048_mont_sqr_32(r, r, m, mp);
04317         sp_2048_mont_sqr_32(r, r, m, mp);
04318         sp_2048_mont_mul_32(r, r, t[y], m, mp);
04319 
04320         XMEMSET(&r[32], 0, sizeof(sp_digit) * 32);
04321         sp_2048_mont_reduce_32(r, m, mp);
04322 
04323         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
04324         sp_2048_cond_sub_32(r, r, m, mask);
04325     }
04326 
04327 #ifdef WOLFSSL_SMALL_STACK
04328     if (td != NULL)
04329         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
04330 #endif
04331 
04332     return err;
04333 }
04334 #endif /* WOLFSSL_SP_SMALL */
04335 
04336 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
04337 
04338 /* r = 2^n mod m where n is the number of bits to reduce by.
04339  * Given m must be 2048 bits, just need to subtract.
04340  *
04341  * r  A single precision number.
04342  * m  A signle precision number.
04343  */
04344 static void sp_2048_mont_norm_64(sp_digit* r, sp_digit* m)
04345 {
04346     XMEMSET(r, 0, sizeof(sp_digit) * 64);
04347 
04348     /* r = 2^n mod m */
04349     sp_2048_sub_in_place_64(r, m);
04350 }
04351 
04352 /* Conditionally subtract b from a using the mask m.
04353  * m is -1 to subtract and 0 when not copying.
04354  *
04355  * r  A single precision number representing condition subtract result.
04356  * a  A single precision number to subtract from.
04357  * b  A single precision number to subtract.
04358  * m  Mask value to apply.
04359  */
04360 static sp_digit sp_2048_cond_sub_64(sp_digit* r, sp_digit* a, sp_digit* b,
04361         sp_digit m)
04362 {
04363     sp_digit c = 0;
04364 
04365 #ifdef WOLFSSL_SP_SMALL
04366     __asm__ __volatile__ (
04367         "mov    r9, #0\n\t"
04368         "mov    r8, #0\n\t"
04369         "1:\n\t"
04370         "subs   %[c], r9, %[c]\n\t"
04371         "ldr    r4, [%[a], r8]\n\t"
04372         "ldr    r5, [%[b], r8]\n\t"
04373         "and    r5, r5, %[m]\n\t"
04374         "sbcs   r4, r4, r5\n\t"
04375         "sbc    %[c], r9, r9\n\t"
04376         "str    r4, [%[r], r8]\n\t"
04377         "add    r8, r8, #4\n\t"
04378         "cmp    r8, #256\n\t"
04379         "blt    1b\n\t"
04380         : [c] "+r" (c)
04381         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
04382         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
04383     );
04384 #else
04385     __asm__ __volatile__ (
04386 
04387         "mov    r9, #0\n\t"
04388         "ldr    r4, [%[a], #0]\n\t"
04389         "ldr    r6, [%[a], #4]\n\t"
04390         "ldr    r5, [%[b], #0]\n\t"
04391         "ldr    r7, [%[b], #4]\n\t"
04392         "and    r5, r5, %[m]\n\t"
04393         "and    r7, r7, %[m]\n\t"
04394         "subs   r4, r4, r5\n\t"
04395         "sbcs   r6, r6, r7\n\t"
04396         "str    r4, [%[r], #0]\n\t"
04397         "str    r6, [%[r], #4]\n\t"
04398         "ldr    r4, [%[a], #8]\n\t"
04399         "ldr    r6, [%[a], #12]\n\t"
04400         "ldr    r5, [%[b], #8]\n\t"
04401         "ldr    r7, [%[b], #12]\n\t"
04402         "and    r5, r5, %[m]\n\t"
04403         "and    r7, r7, %[m]\n\t"
04404         "sbcs   r4, r4, r5\n\t"
04405         "sbcs   r6, r6, r7\n\t"
04406         "str    r4, [%[r], #8]\n\t"
04407         "str    r6, [%[r], #12]\n\t"
04408         "ldr    r4, [%[a], #16]\n\t"
04409         "ldr    r6, [%[a], #20]\n\t"
04410         "ldr    r5, [%[b], #16]\n\t"
04411         "ldr    r7, [%[b], #20]\n\t"
04412         "and    r5, r5, %[m]\n\t"
04413         "and    r7, r7, %[m]\n\t"
04414         "sbcs   r4, r4, r5\n\t"
04415         "sbcs   r6, r6, r7\n\t"
04416         "str    r4, [%[r], #16]\n\t"
04417         "str    r6, [%[r], #20]\n\t"
04418         "ldr    r4, [%[a], #24]\n\t"
04419         "ldr    r6, [%[a], #28]\n\t"
04420         "ldr    r5, [%[b], #24]\n\t"
04421         "ldr    r7, [%[b], #28]\n\t"
04422         "and    r5, r5, %[m]\n\t"
04423         "and    r7, r7, %[m]\n\t"
04424         "sbcs   r4, r4, r5\n\t"
04425         "sbcs   r6, r6, r7\n\t"
04426         "str    r4, [%[r], #24]\n\t"
04427         "str    r6, [%[r], #28]\n\t"
04428         "ldr    r4, [%[a], #32]\n\t"
04429         "ldr    r6, [%[a], #36]\n\t"
04430         "ldr    r5, [%[b], #32]\n\t"
04431         "ldr    r7, [%[b], #36]\n\t"
04432         "and    r5, r5, %[m]\n\t"
04433         "and    r7, r7, %[m]\n\t"
04434         "sbcs   r4, r4, r5\n\t"
04435         "sbcs   r6, r6, r7\n\t"
04436         "str    r4, [%[r], #32]\n\t"
04437         "str    r6, [%[r], #36]\n\t"
04438         "ldr    r4, [%[a], #40]\n\t"
04439         "ldr    r6, [%[a], #44]\n\t"
04440         "ldr    r5, [%[b], #40]\n\t"
04441         "ldr    r7, [%[b], #44]\n\t"
04442         "and    r5, r5, %[m]\n\t"
04443         "and    r7, r7, %[m]\n\t"
04444         "sbcs   r4, r4, r5\n\t"
04445         "sbcs   r6, r6, r7\n\t"
04446         "str    r4, [%[r], #40]\n\t"
04447         "str    r6, [%[r], #44]\n\t"
04448         "ldr    r4, [%[a], #48]\n\t"
04449         "ldr    r6, [%[a], #52]\n\t"
04450         "ldr    r5, [%[b], #48]\n\t"
04451         "ldr    r7, [%[b], #52]\n\t"
04452         "and    r5, r5, %[m]\n\t"
04453         "and    r7, r7, %[m]\n\t"
04454         "sbcs   r4, r4, r5\n\t"
04455         "sbcs   r6, r6, r7\n\t"
04456         "str    r4, [%[r], #48]\n\t"
04457         "str    r6, [%[r], #52]\n\t"
04458         "ldr    r4, [%[a], #56]\n\t"
04459         "ldr    r6, [%[a], #60]\n\t"
04460         "ldr    r5, [%[b], #56]\n\t"
04461         "ldr    r7, [%[b], #60]\n\t"
04462         "and    r5, r5, %[m]\n\t"
04463         "and    r7, r7, %[m]\n\t"
04464         "sbcs   r4, r4, r5\n\t"
04465         "sbcs   r6, r6, r7\n\t"
04466         "str    r4, [%[r], #56]\n\t"
04467         "str    r6, [%[r], #60]\n\t"
04468         "ldr    r4, [%[a], #64]\n\t"
04469         "ldr    r6, [%[a], #68]\n\t"
04470         "ldr    r5, [%[b], #64]\n\t"
04471         "ldr    r7, [%[b], #68]\n\t"
04472         "and    r5, r5, %[m]\n\t"
04473         "and    r7, r7, %[m]\n\t"
04474         "sbcs   r4, r4, r5\n\t"
04475         "sbcs   r6, r6, r7\n\t"
04476         "str    r4, [%[r], #64]\n\t"
04477         "str    r6, [%[r], #68]\n\t"
04478         "ldr    r4, [%[a], #72]\n\t"
04479         "ldr    r6, [%[a], #76]\n\t"
04480         "ldr    r5, [%[b], #72]\n\t"
04481         "ldr    r7, [%[b], #76]\n\t"
04482         "and    r5, r5, %[m]\n\t"
04483         "and    r7, r7, %[m]\n\t"
04484         "sbcs   r4, r4, r5\n\t"
04485         "sbcs   r6, r6, r7\n\t"
04486         "str    r4, [%[r], #72]\n\t"
04487         "str    r6, [%[r], #76]\n\t"
04488         "ldr    r4, [%[a], #80]\n\t"
04489         "ldr    r6, [%[a], #84]\n\t"
04490         "ldr    r5, [%[b], #80]\n\t"
04491         "ldr    r7, [%[b], #84]\n\t"
04492         "and    r5, r5, %[m]\n\t"
04493         "and    r7, r7, %[m]\n\t"
04494         "sbcs   r4, r4, r5\n\t"
04495         "sbcs   r6, r6, r7\n\t"
04496         "str    r4, [%[r], #80]\n\t"
04497         "str    r6, [%[r], #84]\n\t"
04498         "ldr    r4, [%[a], #88]\n\t"
04499         "ldr    r6, [%[a], #92]\n\t"
04500         "ldr    r5, [%[b], #88]\n\t"
04501         "ldr    r7, [%[b], #92]\n\t"
04502         "and    r5, r5, %[m]\n\t"
04503         "and    r7, r7, %[m]\n\t"
04504         "sbcs   r4, r4, r5\n\t"
04505         "sbcs   r6, r6, r7\n\t"
04506         "str    r4, [%[r], #88]\n\t"
04507         "str    r6, [%[r], #92]\n\t"
04508         "ldr    r4, [%[a], #96]\n\t"
04509         "ldr    r6, [%[a], #100]\n\t"
04510         "ldr    r5, [%[b], #96]\n\t"
04511         "ldr    r7, [%[b], #100]\n\t"
04512         "and    r5, r5, %[m]\n\t"
04513         "and    r7, r7, %[m]\n\t"
04514         "sbcs   r4, r4, r5\n\t"
04515         "sbcs   r6, r6, r7\n\t"
04516         "str    r4, [%[r], #96]\n\t"
04517         "str    r6, [%[r], #100]\n\t"
04518         "ldr    r4, [%[a], #104]\n\t"
04519         "ldr    r6, [%[a], #108]\n\t"
04520         "ldr    r5, [%[b], #104]\n\t"
04521         "ldr    r7, [%[b], #108]\n\t"
04522         "and    r5, r5, %[m]\n\t"
04523         "and    r7, r7, %[m]\n\t"
04524         "sbcs   r4, r4, r5\n\t"
04525         "sbcs   r6, r6, r7\n\t"
04526         "str    r4, [%[r], #104]\n\t"
04527         "str    r6, [%[r], #108]\n\t"
04528         "ldr    r4, [%[a], #112]\n\t"
04529         "ldr    r6, [%[a], #116]\n\t"
04530         "ldr    r5, [%[b], #112]\n\t"
04531         "ldr    r7, [%[b], #116]\n\t"
04532         "and    r5, r5, %[m]\n\t"
04533         "and    r7, r7, %[m]\n\t"
04534         "sbcs   r4, r4, r5\n\t"
04535         "sbcs   r6, r6, r7\n\t"
04536         "str    r4, [%[r], #112]\n\t"
04537         "str    r6, [%[r], #116]\n\t"
04538         "ldr    r4, [%[a], #120]\n\t"
04539         "ldr    r6, [%[a], #124]\n\t"
04540         "ldr    r5, [%[b], #120]\n\t"
04541         "ldr    r7, [%[b], #124]\n\t"
04542         "and    r5, r5, %[m]\n\t"
04543         "and    r7, r7, %[m]\n\t"
04544         "sbcs   r4, r4, r5\n\t"
04545         "sbcs   r6, r6, r7\n\t"
04546         "str    r4, [%[r], #120]\n\t"
04547         "str    r6, [%[r], #124]\n\t"
04548         "ldr    r4, [%[a], #128]\n\t"
04549         "ldr    r6, [%[a], #132]\n\t"
04550         "ldr    r5, [%[b], #128]\n\t"
04551         "ldr    r7, [%[b], #132]\n\t"
04552         "and    r5, r5, %[m]\n\t"
04553         "and    r7, r7, %[m]\n\t"
04554         "sbcs   r4, r4, r5\n\t"
04555         "sbcs   r6, r6, r7\n\t"
04556         "str    r4, [%[r], #128]\n\t"
04557         "str    r6, [%[r], #132]\n\t"
04558         "ldr    r4, [%[a], #136]\n\t"
04559         "ldr    r6, [%[a], #140]\n\t"
04560         "ldr    r5, [%[b], #136]\n\t"
04561         "ldr    r7, [%[b], #140]\n\t"
04562         "and    r5, r5, %[m]\n\t"
04563         "and    r7, r7, %[m]\n\t"
04564         "sbcs   r4, r4, r5\n\t"
04565         "sbcs   r6, r6, r7\n\t"
04566         "str    r4, [%[r], #136]\n\t"
04567         "str    r6, [%[r], #140]\n\t"
04568         "ldr    r4, [%[a], #144]\n\t"
04569         "ldr    r6, [%[a], #148]\n\t"
04570         "ldr    r5, [%[b], #144]\n\t"
04571         "ldr    r7, [%[b], #148]\n\t"
04572         "and    r5, r5, %[m]\n\t"
04573         "and    r7, r7, %[m]\n\t"
04574         "sbcs   r4, r4, r5\n\t"
04575         "sbcs   r6, r6, r7\n\t"
04576         "str    r4, [%[r], #144]\n\t"
04577         "str    r6, [%[r], #148]\n\t"
04578         "ldr    r4, [%[a], #152]\n\t"
04579         "ldr    r6, [%[a], #156]\n\t"
04580         "ldr    r5, [%[b], #152]\n\t"
04581         "ldr    r7, [%[b], #156]\n\t"
04582         "and    r5, r5, %[m]\n\t"
04583         "and    r7, r7, %[m]\n\t"
04584         "sbcs   r4, r4, r5\n\t"
04585         "sbcs   r6, r6, r7\n\t"
04586         "str    r4, [%[r], #152]\n\t"
04587         "str    r6, [%[r], #156]\n\t"
04588         "ldr    r4, [%[a], #160]\n\t"
04589         "ldr    r6, [%[a], #164]\n\t"
04590         "ldr    r5, [%[b], #160]\n\t"
04591         "ldr    r7, [%[b], #164]\n\t"
04592         "and    r5, r5, %[m]\n\t"
04593         "and    r7, r7, %[m]\n\t"
04594         "sbcs   r4, r4, r5\n\t"
04595         "sbcs   r6, r6, r7\n\t"
04596         "str    r4, [%[r], #160]\n\t"
04597         "str    r6, [%[r], #164]\n\t"
04598         "ldr    r4, [%[a], #168]\n\t"
04599         "ldr    r6, [%[a], #172]\n\t"
04600         "ldr    r5, [%[b], #168]\n\t"
04601         "ldr    r7, [%[b], #172]\n\t"
04602         "and    r5, r5, %[m]\n\t"
04603         "and    r7, r7, %[m]\n\t"
04604         "sbcs   r4, r4, r5\n\t"
04605         "sbcs   r6, r6, r7\n\t"
04606         "str    r4, [%[r], #168]\n\t"
04607         "str    r6, [%[r], #172]\n\t"
04608         "ldr    r4, [%[a], #176]\n\t"
04609         "ldr    r6, [%[a], #180]\n\t"
04610         "ldr    r5, [%[b], #176]\n\t"
04611         "ldr    r7, [%[b], #180]\n\t"
04612         "and    r5, r5, %[m]\n\t"
04613         "and    r7, r7, %[m]\n\t"
04614         "sbcs   r4, r4, r5\n\t"
04615         "sbcs   r6, r6, r7\n\t"
04616         "str    r4, [%[r], #176]\n\t"
04617         "str    r6, [%[r], #180]\n\t"
04618         "ldr    r4, [%[a], #184]\n\t"
04619         "ldr    r6, [%[a], #188]\n\t"
04620         "ldr    r5, [%[b], #184]\n\t"
04621         "ldr    r7, [%[b], #188]\n\t"
04622         "and    r5, r5, %[m]\n\t"
04623         "and    r7, r7, %[m]\n\t"
04624         "sbcs   r4, r4, r5\n\t"
04625         "sbcs   r6, r6, r7\n\t"
04626         "str    r4, [%[r], #184]\n\t"
04627         "str    r6, [%[r], #188]\n\t"
04628         "ldr    r4, [%[a], #192]\n\t"
04629         "ldr    r6, [%[a], #196]\n\t"
04630         "ldr    r5, [%[b], #192]\n\t"
04631         "ldr    r7, [%[b], #196]\n\t"
04632         "and    r5, r5, %[m]\n\t"
04633         "and    r7, r7, %[m]\n\t"
04634         "sbcs   r4, r4, r5\n\t"
04635         "sbcs   r6, r6, r7\n\t"
04636         "str    r4, [%[r], #192]\n\t"
04637         "str    r6, [%[r], #196]\n\t"
04638         "ldr    r4, [%[a], #200]\n\t"
04639         "ldr    r6, [%[a], #204]\n\t"
04640         "ldr    r5, [%[b], #200]\n\t"
04641         "ldr    r7, [%[b], #204]\n\t"
04642         "and    r5, r5, %[m]\n\t"
04643         "and    r7, r7, %[m]\n\t"
04644         "sbcs   r4, r4, r5\n\t"
04645         "sbcs   r6, r6, r7\n\t"
04646         "str    r4, [%[r], #200]\n\t"
04647         "str    r6, [%[r], #204]\n\t"
04648         "ldr    r4, [%[a], #208]\n\t"
04649         "ldr    r6, [%[a], #212]\n\t"
04650         "ldr    r5, [%[b], #208]\n\t"
04651         "ldr    r7, [%[b], #212]\n\t"
04652         "and    r5, r5, %[m]\n\t"
04653         "and    r7, r7, %[m]\n\t"
04654         "sbcs   r4, r4, r5\n\t"
04655         "sbcs   r6, r6, r7\n\t"
04656         "str    r4, [%[r], #208]\n\t"
04657         "str    r6, [%[r], #212]\n\t"
04658         "ldr    r4, [%[a], #216]\n\t"
04659         "ldr    r6, [%[a], #220]\n\t"
04660         "ldr    r5, [%[b], #216]\n\t"
04661         "ldr    r7, [%[b], #220]\n\t"
04662         "and    r5, r5, %[m]\n\t"
04663         "and    r7, r7, %[m]\n\t"
04664         "sbcs   r4, r4, r5\n\t"
04665         "sbcs   r6, r6, r7\n\t"
04666         "str    r4, [%[r], #216]\n\t"
04667         "str    r6, [%[r], #220]\n\t"
04668         "ldr    r4, [%[a], #224]\n\t"
04669         "ldr    r6, [%[a], #228]\n\t"
04670         "ldr    r5, [%[b], #224]\n\t"
04671         "ldr    r7, [%[b], #228]\n\t"
04672         "and    r5, r5, %[m]\n\t"
04673         "and    r7, r7, %[m]\n\t"
04674         "sbcs   r4, r4, r5\n\t"
04675         "sbcs   r6, r6, r7\n\t"
04676         "str    r4, [%[r], #224]\n\t"
04677         "str    r6, [%[r], #228]\n\t"
04678         "ldr    r4, [%[a], #232]\n\t"
04679         "ldr    r6, [%[a], #236]\n\t"
04680         "ldr    r5, [%[b], #232]\n\t"
04681         "ldr    r7, [%[b], #236]\n\t"
04682         "and    r5, r5, %[m]\n\t"
04683         "and    r7, r7, %[m]\n\t"
04684         "sbcs   r4, r4, r5\n\t"
04685         "sbcs   r6, r6, r7\n\t"
04686         "str    r4, [%[r], #232]\n\t"
04687         "str    r6, [%[r], #236]\n\t"
04688         "ldr    r4, [%[a], #240]\n\t"
04689         "ldr    r6, [%[a], #244]\n\t"
04690         "ldr    r5, [%[b], #240]\n\t"
04691         "ldr    r7, [%[b], #244]\n\t"
04692         "and    r5, r5, %[m]\n\t"
04693         "and    r7, r7, %[m]\n\t"
04694         "sbcs   r4, r4, r5\n\t"
04695         "sbcs   r6, r6, r7\n\t"
04696         "str    r4, [%[r], #240]\n\t"
04697         "str    r6, [%[r], #244]\n\t"
04698         "ldr    r4, [%[a], #248]\n\t"
04699         "ldr    r6, [%[a], #252]\n\t"
04700         "ldr    r5, [%[b], #248]\n\t"
04701         "ldr    r7, [%[b], #252]\n\t"
04702         "and    r5, r5, %[m]\n\t"
04703         "and    r7, r7, %[m]\n\t"
04704         "sbcs   r4, r4, r5\n\t"
04705         "sbcs   r6, r6, r7\n\t"
04706         "str    r4, [%[r], #248]\n\t"
04707         "str    r6, [%[r], #252]\n\t"
04708         "sbc    %[c], r9, r9\n\t"
04709         : [c] "+r" (c)
04710         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
04711         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
04712     );
04713 #endif /* WOLFSSL_SP_SMALL */
04714 
04715     return c;
04716 }
04717 
04718 /* Reduce the number back to 2048 bits using Montgomery reduction.
04719  *
04720  * a   A single precision number to reduce in place.
04721  * m   The single precision number representing the modulus.
04722  * mp  The digit representing the negative inverse of m mod 2^n.
04723  */
04724 SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, sp_digit* m,
04725         sp_digit mp)
04726 {
04727     sp_digit ca = 0;
04728 
04729     __asm__ __volatile__ (
04730         "# i = 0\n\t"
04731         "mov    r12, #0\n\t"
04732         "ldr    r10, [%[a], #0]\n\t"
04733         "ldr    r14, [%[a], #4]\n\t"
04734         "\n1:\n\t"
04735         "# mu = a[i] * mp\n\t"
04736         "mul    r8, %[mp], r10\n\t"
04737         "# a[i+0] += m[0] * mu\n\t"
04738         "ldr    r7, [%[m], #0]\n\t"
04739         "ldr    r9, [%[a], #0]\n\t"
04740         "umull  r6, r7, r8, r7\n\t"
04741         "adds   r10, r10, r6\n\t"
04742         "adc    r5, r7, #0\n\t"
04743         "# a[i+1] += m[1] * mu\n\t"
04744         "ldr    r7, [%[m], #4]\n\t"
04745         "ldr    r9, [%[a], #4]\n\t"
04746         "umull  r6, r7, r8, r7\n\t"
04747         "adds   r10, r14, r6\n\t"
04748         "adc    r4, r7, #0\n\t"
04749         "adds   r10, r10, r5\n\t"
04750         "adc    r4, r4, #0\n\t"
04751         "# a[i+2] += m[2] * mu\n\t"
04752         "ldr    r7, [%[m], #8]\n\t"
04753         "ldr    r14, [%[a], #8]\n\t"
04754         "umull  r6, r7, r8, r7\n\t"
04755         "adds   r14, r14, r6\n\t"
04756         "adc    r5, r7, #0\n\t"
04757         "adds   r14, r14, r4\n\t"
04758         "adc    r5, r5, #0\n\t"
04759         "# a[i+3] += m[3] * mu\n\t"
04760         "ldr    r7, [%[m], #12]\n\t"
04761         "ldr    r9, [%[a], #12]\n\t"
04762         "umull  r6, r7, r8, r7\n\t"
04763         "adds   r9, r9, r6\n\t"
04764         "adc    r4, r7, #0\n\t"
04765         "adds   r9, r9, r5\n\t"
04766         "str    r9, [%[a], #12]\n\t"
04767         "adc    r4, r4, #0\n\t"
04768         "# a[i+4] += m[4] * mu\n\t"
04769         "ldr    r7, [%[m], #16]\n\t"
04770         "ldr    r9, [%[a], #16]\n\t"
04771         "umull  r6, r7, r8, r7\n\t"
04772         "adds   r9, r9, r6\n\t"
04773         "adc    r5, r7, #0\n\t"
04774         "adds   r9, r9, r4\n\t"
04775         "str    r9, [%[a], #16]\n\t"
04776         "adc    r5, r5, #0\n\t"
04777         "# a[i+5] += m[5] * mu\n\t"
04778         "ldr    r7, [%[m], #20]\n\t"
04779         "ldr    r9, [%[a], #20]\n\t"
04780         "umull  r6, r7, r8, r7\n\t"
04781         "adds   r9, r9, r6\n\t"
04782         "adc    r4, r7, #0\n\t"
04783         "adds   r9, r9, r5\n\t"
04784         "str    r9, [%[a], #20]\n\t"
04785         "adc    r4, r4, #0\n\t"
04786         "# a[i+6] += m[6] * mu\n\t"
04787         "ldr    r7, [%[m], #24]\n\t"
04788         "ldr    r9, [%[a], #24]\n\t"
04789         "umull  r6, r7, r8, r7\n\t"
04790         "adds   r9, r9, r6\n\t"
04791         "adc    r5, r7, #0\n\t"
04792         "adds   r9, r9, r4\n\t"
04793         "str    r9, [%[a], #24]\n\t"
04794         "adc    r5, r5, #0\n\t"
04795         "# a[i+7] += m[7] * mu\n\t"
04796         "ldr    r7, [%[m], #28]\n\t"
04797         "ldr    r9, [%[a], #28]\n\t"
04798         "umull  r6, r7, r8, r7\n\t"
04799         "adds   r9, r9, r6\n\t"
04800         "adc    r4, r7, #0\n\t"
04801         "adds   r9, r9, r5\n\t"
04802         "str    r9, [%[a], #28]\n\t"
04803         "adc    r4, r4, #0\n\t"
04804         "# a[i+8] += m[8] * mu\n\t"
04805         "ldr    r7, [%[m], #32]\n\t"
04806         "ldr    r9, [%[a], #32]\n\t"
04807         "umull  r6, r7, r8, r7\n\t"
04808         "adds   r9, r9, r6\n\t"
04809         "adc    r5, r7, #0\n\t"
04810         "adds   r9, r9, r4\n\t"
04811         "str    r9, [%[a], #32]\n\t"
04812         "adc    r5, r5, #0\n\t"
04813         "# a[i+9] += m[9] * mu\n\t"
04814         "ldr    r7, [%[m], #36]\n\t"
04815         "ldr    r9, [%[a], #36]\n\t"
04816         "umull  r6, r7, r8, r7\n\t"
04817         "adds   r9, r9, r6\n\t"
04818         "adc    r4, r7, #0\n\t"
04819         "adds   r9, r9, r5\n\t"
04820         "str    r9, [%[a], #36]\n\t"
04821         "adc    r4, r4, #0\n\t"
04822         "# a[i+10] += m[10] * mu\n\t"
04823         "ldr    r7, [%[m], #40]\n\t"
04824         "ldr    r9, [%[a], #40]\n\t"
04825         "umull  r6, r7, r8, r7\n\t"
04826         "adds   r9, r9, r6\n\t"
04827         "adc    r5, r7, #0\n\t"
04828         "adds   r9, r9, r4\n\t"
04829         "str    r9, [%[a], #40]\n\t"
04830         "adc    r5, r5, #0\n\t"
04831         "# a[i+11] += m[11] * mu\n\t"
04832         "ldr    r7, [%[m], #44]\n\t"
04833         "ldr    r9, [%[a], #44]\n\t"
04834         "umull  r6, r7, r8, r7\n\t"
04835         "adds   r9, r9, r6\n\t"
04836         "adc    r4, r7, #0\n\t"
04837         "adds   r9, r9, r5\n\t"
04838         "str    r9, [%[a], #44]\n\t"
04839         "adc    r4, r4, #0\n\t"
04840         "# a[i+12] += m[12] * mu\n\t"
04841         "ldr    r7, [%[m], #48]\n\t"
04842         "ldr    r9, [%[a], #48]\n\t"
04843         "umull  r6, r7, r8, r7\n\t"
04844         "adds   r9, r9, r6\n\t"
04845         "adc    r5, r7, #0\n\t"
04846         "adds   r9, r9, r4\n\t"
04847         "str    r9, [%[a], #48]\n\t"
04848         "adc    r5, r5, #0\n\t"
04849         "# a[i+13] += m[13] * mu\n\t"
04850         "ldr    r7, [%[m], #52]\n\t"
04851         "ldr    r9, [%[a], #52]\n\t"
04852         "umull  r6, r7, r8, r7\n\t"
04853         "adds   r9, r9, r6\n\t"
04854         "adc    r4, r7, #0\n\t"
04855         "adds   r9, r9, r5\n\t"
04856         "str    r9, [%[a], #52]\n\t"
04857         "adc    r4, r4, #0\n\t"
04858         "# a[i+14] += m[14] * mu\n\t"
04859         "ldr    r7, [%[m], #56]\n\t"
04860         "ldr    r9, [%[a], #56]\n\t"
04861         "umull  r6, r7, r8, r7\n\t"
04862         "adds   r9, r9, r6\n\t"
04863         "adc    r5, r7, #0\n\t"
04864         "adds   r9, r9, r4\n\t"
04865         "str    r9, [%[a], #56]\n\t"
04866         "adc    r5, r5, #0\n\t"
04867         "# a[i+15] += m[15] * mu\n\t"
04868         "ldr    r7, [%[m], #60]\n\t"
04869         "ldr    r9, [%[a], #60]\n\t"
04870         "umull  r6, r7, r8, r7\n\t"
04871         "adds   r9, r9, r6\n\t"
04872         "adc    r4, r7, #0\n\t"
04873         "adds   r9, r9, r5\n\t"
04874         "str    r9, [%[a], #60]\n\t"
04875         "adc    r4, r4, #0\n\t"
04876         "# a[i+16] += m[16] * mu\n\t"
04877         "ldr    r7, [%[m], #64]\n\t"
04878         "ldr    r9, [%[a], #64]\n\t"
04879         "umull  r6, r7, r8, r7\n\t"
04880         "adds   r9, r9, r6\n\t"
04881         "adc    r5, r7, #0\n\t"
04882         "adds   r9, r9, r4\n\t"
04883         "str    r9, [%[a], #64]\n\t"
04884         "adc    r5, r5, #0\n\t"
04885         "# a[i+17] += m[17] * mu\n\t"
04886         "ldr    r7, [%[m], #68]\n\t"
04887         "ldr    r9, [%[a], #68]\n\t"
04888         "umull  r6, r7, r8, r7\n\t"
04889         "adds   r9, r9, r6\n\t"
04890         "adc    r4, r7, #0\n\t"
04891         "adds   r9, r9, r5\n\t"
04892         "str    r9, [%[a], #68]\n\t"
04893         "adc    r4, r4, #0\n\t"
04894         "# a[i+18] += m[18] * mu\n\t"
04895         "ldr    r7, [%[m], #72]\n\t"
04896         "ldr    r9, [%[a], #72]\n\t"
04897         "umull  r6, r7, r8, r7\n\t"
04898         "adds   r9, r9, r6\n\t"
04899         "adc    r5, r7, #0\n\t"
04900         "adds   r9, r9, r4\n\t"
04901         "str    r9, [%[a], #72]\n\t"
04902         "adc    r5, r5, #0\n\t"
04903         "# a[i+19] += m[19] * mu\n\t"
04904         "ldr    r7, [%[m], #76]\n\t"
04905         "ldr    r9, [%[a], #76]\n\t"
04906         "umull  r6, r7, r8, r7\n\t"
04907         "adds   r9, r9, r6\n\t"
04908         "adc    r4, r7, #0\n\t"
04909         "adds   r9, r9, r5\n\t"
04910         "str    r9, [%[a], #76]\n\t"
04911         "adc    r4, r4, #0\n\t"
04912         "# a[i+20] += m[20] * mu\n\t"
04913         "ldr    r7, [%[m], #80]\n\t"
04914         "ldr    r9, [%[a], #80]\n\t"
04915         "umull  r6, r7, r8, r7\n\t"
04916         "adds   r9, r9, r6\n\t"
04917         "adc    r5, r7, #0\n\t"
04918         "adds   r9, r9, r4\n\t"
04919         "str    r9, [%[a], #80]\n\t"
04920         "adc    r5, r5, #0\n\t"
04921         "# a[i+21] += m[21] * mu\n\t"
04922         "ldr    r7, [%[m], #84]\n\t"
04923         "ldr    r9, [%[a], #84]\n\t"
04924         "umull  r6, r7, r8, r7\n\t"
04925         "adds   r9, r9, r6\n\t"
04926         "adc    r4, r7, #0\n\t"
04927         "adds   r9, r9, r5\n\t"
04928         "str    r9, [%[a], #84]\n\t"
04929         "adc    r4, r4, #0\n\t"
04930         "# a[i+22] += m[22] * mu\n\t"
04931         "ldr    r7, [%[m], #88]\n\t"
04932         "ldr    r9, [%[a], #88]\n\t"
04933         "umull  r6, r7, r8, r7\n\t"
04934         "adds   r9, r9, r6\n\t"
04935         "adc    r5, r7, #0\n\t"
04936         "adds   r9, r9, r4\n\t"
04937         "str    r9, [%[a], #88]\n\t"
04938         "adc    r5, r5, #0\n\t"
04939         "# a[i+23] += m[23] * mu\n\t"
04940         "ldr    r7, [%[m], #92]\n\t"
04941         "ldr    r9, [%[a], #92]\n\t"
04942         "umull  r6, r7, r8, r7\n\t"
04943         "adds   r9, r9, r6\n\t"
04944         "adc    r4, r7, #0\n\t"
04945         "adds   r9, r9, r5\n\t"
04946         "str    r9, [%[a], #92]\n\t"
04947         "adc    r4, r4, #0\n\t"
04948         "# a[i+24] += m[24] * mu\n\t"
04949         "ldr    r7, [%[m], #96]\n\t"
04950         "ldr    r9, [%[a], #96]\n\t"
04951         "umull  r6, r7, r8, r7\n\t"
04952         "adds   r9, r9, r6\n\t"
04953         "adc    r5, r7, #0\n\t"
04954         "adds   r9, r9, r4\n\t"
04955         "str    r9, [%[a], #96]\n\t"
04956         "adc    r5, r5, #0\n\t"
04957         "# a[i+25] += m[25] * mu\n\t"
04958         "ldr    r7, [%[m], #100]\n\t"
04959         "ldr    r9, [%[a], #100]\n\t"
04960         "umull  r6, r7, r8, r7\n\t"
04961         "adds   r9, r9, r6\n\t"
04962         "adc    r4, r7, #0\n\t"
04963         "adds   r9, r9, r5\n\t"
04964         "str    r9, [%[a], #100]\n\t"
04965         "adc    r4, r4, #0\n\t"
04966         "# a[i+26] += m[26] * mu\n\t"
04967         "ldr    r7, [%[m], #104]\n\t"
04968         "ldr    r9, [%[a], #104]\n\t"
04969         "umull  r6, r7, r8, r7\n\t"
04970         "adds   r9, r9, r6\n\t"
04971         "adc    r5, r7, #0\n\t"
04972         "adds   r9, r9, r4\n\t"
04973         "str    r9, [%[a], #104]\n\t"
04974         "adc    r5, r5, #0\n\t"
04975         "# a[i+27] += m[27] * mu\n\t"
04976         "ldr    r7, [%[m], #108]\n\t"
04977         "ldr    r9, [%[a], #108]\n\t"
04978         "umull  r6, r7, r8, r7\n\t"
04979         "adds   r9, r9, r6\n\t"
04980         "adc    r4, r7, #0\n\t"
04981         "adds   r9, r9, r5\n\t"
04982         "str    r9, [%[a], #108]\n\t"
04983         "adc    r4, r4, #0\n\t"
04984         "# a[i+28] += m[28] * mu\n\t"
04985         "ldr    r7, [%[m], #112]\n\t"
04986         "ldr    r9, [%[a], #112]\n\t"
04987         "umull  r6, r7, r8, r7\n\t"
04988         "adds   r9, r9, r6\n\t"
04989         "adc    r5, r7, #0\n\t"
04990         "adds   r9, r9, r4\n\t"
04991         "str    r9, [%[a], #112]\n\t"
04992         "adc    r5, r5, #0\n\t"
04993         "# a[i+29] += m[29] * mu\n\t"
04994         "ldr    r7, [%[m], #116]\n\t"
04995         "ldr    r9, [%[a], #116]\n\t"
04996         "umull  r6, r7, r8, r7\n\t"
04997         "adds   r9, r9, r6\n\t"
04998         "adc    r4, r7, #0\n\t"
04999         "adds   r9, r9, r5\n\t"
05000         "str    r9, [%[a], #116]\n\t"
05001         "adc    r4, r4, #0\n\t"
05002         "# a[i+30] += m[30] * mu\n\t"
05003         "ldr    r7, [%[m], #120]\n\t"
05004         "ldr    r9, [%[a], #120]\n\t"
05005         "umull  r6, r7, r8, r7\n\t"
05006         "adds   r9, r9, r6\n\t"
05007         "adc    r5, r7, #0\n\t"
05008         "adds   r9, r9, r4\n\t"
05009         "str    r9, [%[a], #120]\n\t"
05010         "adc    r5, r5, #0\n\t"
05011         "# a[i+31] += m[31] * mu\n\t"
05012         "ldr    r7, [%[m], #124]\n\t"
05013         "ldr    r9, [%[a], #124]\n\t"
05014         "umull  r6, r7, r8, r7\n\t"
05015         "adds   r9, r9, r6\n\t"
05016         "adc    r4, r7, #0\n\t"
05017         "adds   r9, r9, r5\n\t"
05018         "str    r9, [%[a], #124]\n\t"
05019         "adc    r4, r4, #0\n\t"
05020         "# a[i+32] += m[32] * mu\n\t"
05021         "ldr    r7, [%[m], #128]\n\t"
05022         "ldr    r9, [%[a], #128]\n\t"
05023         "umull  r6, r7, r8, r7\n\t"
05024         "adds   r9, r9, r6\n\t"
05025         "adc    r5, r7, #0\n\t"
05026         "adds   r9, r9, r4\n\t"
05027         "str    r9, [%[a], #128]\n\t"
05028         "adc    r5, r5, #0\n\t"
05029         "# a[i+33] += m[33] * mu\n\t"
05030         "ldr    r7, [%[m], #132]\n\t"
05031         "ldr    r9, [%[a], #132]\n\t"
05032         "umull  r6, r7, r8, r7\n\t"
05033         "adds   r9, r9, r6\n\t"
05034         "adc    r4, r7, #0\n\t"
05035         "adds   r9, r9, r5\n\t"
05036         "str    r9, [%[a], #132]\n\t"
05037         "adc    r4, r4, #0\n\t"
05038         "# a[i+34] += m[34] * mu\n\t"
05039         "ldr    r7, [%[m], #136]\n\t"
05040         "ldr    r9, [%[a], #136]\n\t"
05041         "umull  r6, r7, r8, r7\n\t"
05042         "adds   r9, r9, r6\n\t"
05043         "adc    r5, r7, #0\n\t"
05044         "adds   r9, r9, r4\n\t"
05045         "str    r9, [%[a], #136]\n\t"
05046         "adc    r5, r5, #0\n\t"
05047         "# a[i+35] += m[35] * mu\n\t"
05048         "ldr    r7, [%[m], #140]\n\t"
05049         "ldr    r9, [%[a], #140]\n\t"
05050         "umull  r6, r7, r8, r7\n\t"
05051         "adds   r9, r9, r6\n\t"
05052         "adc    r4, r7, #0\n\t"
05053         "adds   r9, r9, r5\n\t"
05054         "str    r9, [%[a], #140]\n\t"
05055         "adc    r4, r4, #0\n\t"
05056         "# a[i+36] += m[36] * mu\n\t"
05057         "ldr    r7, [%[m], #144]\n\t"
05058         "ldr    r9, [%[a], #144]\n\t"
05059         "umull  r6, r7, r8, r7\n\t"
05060         "adds   r9, r9, r6\n\t"
05061         "adc    r5, r7, #0\n\t"
05062         "adds   r9, r9, r4\n\t"
05063         "str    r9, [%[a], #144]\n\t"
05064         "adc    r5, r5, #0\n\t"
05065         "# a[i+37] += m[37] * mu\n\t"
05066         "ldr    r7, [%[m], #148]\n\t"
05067         "ldr    r9, [%[a], #148]\n\t"
05068         "umull  r6, r7, r8, r7\n\t"
05069         "adds   r9, r9, r6\n\t"
05070         "adc    r4, r7, #0\n\t"
05071         "adds   r9, r9, r5\n\t"
05072         "str    r9, [%[a], #148]\n\t"
05073         "adc    r4, r4, #0\n\t"
05074         "# a[i+38] += m[38] * mu\n\t"
05075         "ldr    r7, [%[m], #152]\n\t"
05076         "ldr    r9, [%[a], #152]\n\t"
05077         "umull  r6, r7, r8, r7\n\t"
05078         "adds   r9, r9, r6\n\t"
05079         "adc    r5, r7, #0\n\t"
05080         "adds   r9, r9, r4\n\t"
05081         "str    r9, [%[a], #152]\n\t"
05082         "adc    r5, r5, #0\n\t"
05083         "# a[i+39] += m[39] * mu\n\t"
05084         "ldr    r7, [%[m], #156]\n\t"
05085         "ldr    r9, [%[a], #156]\n\t"
05086         "umull  r6, r7, r8, r7\n\t"
05087         "adds   r9, r9, r6\n\t"
05088         "adc    r4, r7, #0\n\t"
05089         "adds   r9, r9, r5\n\t"
05090         "str    r9, [%[a], #156]\n\t"
05091         "adc    r4, r4, #0\n\t"
05092         "# a[i+40] += m[40] * mu\n\t"
05093         "ldr    r7, [%[m], #160]\n\t"
05094         "ldr    r9, [%[a], #160]\n\t"
05095         "umull  r6, r7, r8, r7\n\t"
05096         "adds   r9, r9, r6\n\t"
05097         "adc    r5, r7, #0\n\t"
05098         "adds   r9, r9, r4\n\t"
05099         "str    r9, [%[a], #160]\n\t"
05100         "adc    r5, r5, #0\n\t"
05101         "# a[i+41] += m[41] * mu\n\t"
05102         "ldr    r7, [%[m], #164]\n\t"
05103         "ldr    r9, [%[a], #164]\n\t"
05104         "umull  r6, r7, r8, r7\n\t"
05105         "adds   r9, r9, r6\n\t"
05106         "adc    r4, r7, #0\n\t"
05107         "adds   r9, r9, r5\n\t"
05108         "str    r9, [%[a], #164]\n\t"
05109         "adc    r4, r4, #0\n\t"
05110         "# a[i+42] += m[42] * mu\n\t"
05111         "ldr    r7, [%[m], #168]\n\t"
05112         "ldr    r9, [%[a], #168]\n\t"
05113         "umull  r6, r7, r8, r7\n\t"
05114         "adds   r9, r9, r6\n\t"
05115         "adc    r5, r7, #0\n\t"
05116         "adds   r9, r9, r4\n\t"
05117         "str    r9, [%[a], #168]\n\t"
05118         "adc    r5, r5, #0\n\t"
05119         "# a[i+43] += m[43] * mu\n\t"
05120         "ldr    r7, [%[m], #172]\n\t"
05121         "ldr    r9, [%[a], #172]\n\t"
05122         "umull  r6, r7, r8, r7\n\t"
05123         "adds   r9, r9, r6\n\t"
05124         "adc    r4, r7, #0\n\t"
05125         "adds   r9, r9, r5\n\t"
05126         "str    r9, [%[a], #172]\n\t"
05127         "adc    r4, r4, #0\n\t"
05128         "# a[i+44] += m[44] * mu\n\t"
05129         "ldr    r7, [%[m], #176]\n\t"
05130         "ldr    r9, [%[a], #176]\n\t"
05131         "umull  r6, r7, r8, r7\n\t"
05132         "adds   r9, r9, r6\n\t"
05133         "adc    r5, r7, #0\n\t"
05134         "adds   r9, r9, r4\n\t"
05135         "str    r9, [%[a], #176]\n\t"
05136         "adc    r5, r5, #0\n\t"
05137         "# a[i+45] += m[45] * mu\n\t"
05138         "ldr    r7, [%[m], #180]\n\t"
05139         "ldr    r9, [%[a], #180]\n\t"
05140         "umull  r6, r7, r8, r7\n\t"
05141         "adds   r9, r9, r6\n\t"
05142         "adc    r4, r7, #0\n\t"
05143         "adds   r9, r9, r5\n\t"
05144         "str    r9, [%[a], #180]\n\t"
05145         "adc    r4, r4, #0\n\t"
05146         "# a[i+46] += m[46] * mu\n\t"
05147         "ldr    r7, [%[m], #184]\n\t"
05148         "ldr    r9, [%[a], #184]\n\t"
05149         "umull  r6, r7, r8, r7\n\t"
05150         "adds   r9, r9, r6\n\t"
05151         "adc    r5, r7, #0\n\t"
05152         "adds   r9, r9, r4\n\t"
05153         "str    r9, [%[a], #184]\n\t"
05154         "adc    r5, r5, #0\n\t"
05155         "# a[i+47] += m[47] * mu\n\t"
05156         "ldr    r7, [%[m], #188]\n\t"
05157         "ldr    r9, [%[a], #188]\n\t"
05158         "umull  r6, r7, r8, r7\n\t"
05159         "adds   r9, r9, r6\n\t"
05160         "adc    r4, r7, #0\n\t"
05161         "adds   r9, r9, r5\n\t"
05162         "str    r9, [%[a], #188]\n\t"
05163         "adc    r4, r4, #0\n\t"
05164         "# a[i+48] += m[48] * mu\n\t"
05165         "ldr    r7, [%[m], #192]\n\t"
05166         "ldr    r9, [%[a], #192]\n\t"
05167         "umull  r6, r7, r8, r7\n\t"
05168         "adds   r9, r9, r6\n\t"
05169         "adc    r5, r7, #0\n\t"
05170         "adds   r9, r9, r4\n\t"
05171         "str    r9, [%[a], #192]\n\t"
05172         "adc    r5, r5, #0\n\t"
05173         "# a[i+49] += m[49] * mu\n\t"
05174         "ldr    r7, [%[m], #196]\n\t"
05175         "ldr    r9, [%[a], #196]\n\t"
05176         "umull  r6, r7, r8, r7\n\t"
05177         "adds   r9, r9, r6\n\t"
05178         "adc    r4, r7, #0\n\t"
05179         "adds   r9, r9, r5\n\t"
05180         "str    r9, [%[a], #196]\n\t"
05181         "adc    r4, r4, #0\n\t"
05182         "# a[i+50] += m[50] * mu\n\t"
05183         "ldr    r7, [%[m], #200]\n\t"
05184         "ldr    r9, [%[a], #200]\n\t"
05185         "umull  r6, r7, r8, r7\n\t"
05186         "adds   r9, r9, r6\n\t"
05187         "adc    r5, r7, #0\n\t"
05188         "adds   r9, r9, r4\n\t"
05189         "str    r9, [%[a], #200]\n\t"
05190         "adc    r5, r5, #0\n\t"
05191         "# a[i+51] += m[51] * mu\n\t"
05192         "ldr    r7, [%[m], #204]\n\t"
05193         "ldr    r9, [%[a], #204]\n\t"
05194         "umull  r6, r7, r8, r7\n\t"
05195         "adds   r9, r9, r6\n\t"
05196         "adc    r4, r7, #0\n\t"
05197         "adds   r9, r9, r5\n\t"
05198         "str    r9, [%[a], #204]\n\t"
05199         "adc    r4, r4, #0\n\t"
05200         "# a[i+52] += m[52] * mu\n\t"
05201         "ldr    r7, [%[m], #208]\n\t"
05202         "ldr    r9, [%[a], #208]\n\t"
05203         "umull  r6, r7, r8, r7\n\t"
05204         "adds   r9, r9, r6\n\t"
05205         "adc    r5, r7, #0\n\t"
05206         "adds   r9, r9, r4\n\t"
05207         "str    r9, [%[a], #208]\n\t"
05208         "adc    r5, r5, #0\n\t"
05209         "# a[i+53] += m[53] * mu\n\t"
05210         "ldr    r7, [%[m], #212]\n\t"
05211         "ldr    r9, [%[a], #212]\n\t"
05212         "umull  r6, r7, r8, r7\n\t"
05213         "adds   r9, r9, r6\n\t"
05214         "adc    r4, r7, #0\n\t"
05215         "adds   r9, r9, r5\n\t"
05216         "str    r9, [%[a], #212]\n\t"
05217         "adc    r4, r4, #0\n\t"
05218         "# a[i+54] += m[54] * mu\n\t"
05219         "ldr    r7, [%[m], #216]\n\t"
05220         "ldr    r9, [%[a], #216]\n\t"
05221         "umull  r6, r7, r8, r7\n\t"
05222         "adds   r9, r9, r6\n\t"
05223         "adc    r5, r7, #0\n\t"
05224         "adds   r9, r9, r4\n\t"
05225         "str    r9, [%[a], #216]\n\t"
05226         "adc    r5, r5, #0\n\t"
05227         "# a[i+55] += m[55] * mu\n\t"
05228         "ldr    r7, [%[m], #220]\n\t"
05229         "ldr    r9, [%[a], #220]\n\t"
05230         "umull  r6, r7, r8, r7\n\t"
05231         "adds   r9, r9, r6\n\t"
05232         "adc    r4, r7, #0\n\t"
05233         "adds   r9, r9, r5\n\t"
05234         "str    r9, [%[a], #220]\n\t"
05235         "adc    r4, r4, #0\n\t"
05236         "# a[i+56] += m[56] * mu\n\t"
05237         "ldr    r7, [%[m], #224]\n\t"
05238         "ldr    r9, [%[a], #224]\n\t"
05239         "umull  r6, r7, r8, r7\n\t"
05240         "adds   r9, r9, r6\n\t"
05241         "adc    r5, r7, #0\n\t"
05242         "adds   r9, r9, r4\n\t"
05243         "str    r9, [%[a], #224]\n\t"
05244         "adc    r5, r5, #0\n\t"
05245         "# a[i+57] += m[57] * mu\n\t"
05246         "ldr    r7, [%[m], #228]\n\t"
05247         "ldr    r9, [%[a], #228]\n\t"
05248         "umull  r6, r7, r8, r7\n\t"
05249         "adds   r9, r9, r6\n\t"
05250         "adc    r4, r7, #0\n\t"
05251         "adds   r9, r9, r5\n\t"
05252         "str    r9, [%[a], #228]\n\t"
05253         "adc    r4, r4, #0\n\t"
05254         "# a[i+58] += m[58] * mu\n\t"
05255         "ldr    r7, [%[m], #232]\n\t"
05256         "ldr    r9, [%[a], #232]\n\t"
05257         "umull  r6, r7, r8, r7\n\t"
05258         "adds   r9, r9, r6\n\t"
05259         "adc    r5, r7, #0\n\t"
05260         "adds   r9, r9, r4\n\t"
05261         "str    r9, [%[a], #232]\n\t"
05262         "adc    r5, r5, #0\n\t"
05263         "# a[i+59] += m[59] * mu\n\t"
05264         "ldr    r7, [%[m], #236]\n\t"
05265         "ldr    r9, [%[a], #236]\n\t"
05266         "umull  r6, r7, r8, r7\n\t"
05267         "adds   r9, r9, r6\n\t"
05268         "adc    r4, r7, #0\n\t"
05269         "adds   r9, r9, r5\n\t"
05270         "str    r9, [%[a], #236]\n\t"
05271         "adc    r4, r4, #0\n\t"
05272         "# a[i+60] += m[60] * mu\n\t"
05273         "ldr    r7, [%[m], #240]\n\t"
05274         "ldr    r9, [%[a], #240]\n\t"
05275         "umull  r6, r7, r8, r7\n\t"
05276         "adds   r9, r9, r6\n\t"
05277         "adc    r5, r7, #0\n\t"
05278         "adds   r9, r9, r4\n\t"
05279         "str    r9, [%[a], #240]\n\t"
05280         "adc    r5, r5, #0\n\t"
05281         "# a[i+61] += m[61] * mu\n\t"
05282         "ldr    r7, [%[m], #244]\n\t"
05283         "ldr    r9, [%[a], #244]\n\t"
05284         "umull  r6, r7, r8, r7\n\t"
05285         "adds   r9, r9, r6\n\t"
05286         "adc    r4, r7, #0\n\t"
05287         "adds   r9, r9, r5\n\t"
05288         "str    r9, [%[a], #244]\n\t"
05289         "adc    r4, r4, #0\n\t"
05290         "# a[i+62] += m[62] * mu\n\t"
05291         "ldr    r7, [%[m], #248]\n\t"
05292         "ldr    r9, [%[a], #248]\n\t"
05293         "umull  r6, r7, r8, r7\n\t"
05294         "adds   r9, r9, r6\n\t"
05295         "adc    r5, r7, #0\n\t"
05296         "adds   r9, r9, r4\n\t"
05297         "str    r9, [%[a], #248]\n\t"
05298         "adc    r5, r5, #0\n\t"
05299         "# a[i+63] += m[63] * mu\n\t"
05300         "ldr    r7, [%[m], #252]\n\t"
05301         "ldr   r9, [%[a], #252]\n\t"
05302         "umull  r6, r7, r8, r7\n\t"
05303         "adds   r5, r5, r6\n\t"
05304         "adcs   r7, r7, %[ca]\n\t"
05305         "mov    %[ca], #0\n\t"
05306         "adc    %[ca], %[ca], %[ca]\n\t"
05307         "adds   r9, r9, r5\n\t"
05308         "str    r9, [%[a], #252]\n\t"
05309         "ldr    r9, [%[a], #256]\n\t"
05310         "adcs   r9, r9, r7\n\t"
05311         "str    r9, [%[a], #256]\n\t"
05312         "adc    %[ca], %[ca], #0\n\t"
05313         "# i += 1\n\t"
05314         "add    %[a], %[a], #4\n\t"
05315         "add    r12, r12, #4\n\t"
05316         "cmp    r12, #256\n\t"
05317         "blt    1b\n\t"
05318         "str    r10, [%[a], #0]\n\t"
05319         "str    r14, [%[a], #4]\n\t"
05320         : [ca] "+r" (ca), [a] "+r" (a)
05321         : [m] "r" (m), [mp] "r" (mp)
05322         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
05323     );
05324 
05325     sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
05326 }
05327 
05328 /* Multiply two Montogmery form numbers mod the modulus (prime).
05329  * (r = a * b mod m)
05330  *
05331  * r   Result of multiplication.
05332  * a   First number to multiply in Montogmery form.
05333  * b   Second number to multiply in Montogmery form.
05334  * m   Modulus (prime).
05335  * mp  Montogmery mulitplier.
05336  */
05337 static void sp_2048_mont_mul_64(sp_digit* r, sp_digit* a, sp_digit* b,
05338         sp_digit* m, sp_digit mp)
05339 {
05340     sp_2048_mul_64(r, a, b);
05341     sp_2048_mont_reduce_64(r, m, mp);
05342 }
05343 
05344 /* Square the Montgomery form number. (r = a * a mod m)
05345  *
05346  * r   Result of squaring.
05347  * a   Number to square in Montogmery form.
05348  * m   Modulus (prime).
05349  * mp  Montogmery mulitplier.
05350  */
05351 static void sp_2048_mont_sqr_64(sp_digit* r, sp_digit* a, sp_digit* m,
05352         sp_digit mp)
05353 {
05354     sp_2048_sqr_64(r, a);
05355     sp_2048_mont_reduce_64(r, m, mp);
05356 }
05357 
05358 /* Mul a by digit b into r. (r = a * b)
05359  *
05360  * r  A single precision integer.
05361  * a  A single precision integer.
05362  * b  A single precision digit.
05363  */
05364 static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
05365         const sp_digit b)
05366 {
05367 #ifdef WOLFSSL_SP_SMALL
05368     __asm__ __volatile__ (
05369         "mov    r10, #0\n\t"
05370         "# A[0] * B\n\t"
05371         "ldr    r8, [%[a]]\n\t"
05372         "umull  r5, r3, %[b], r8\n\t"
05373         "mov    r4, #0\n\t"
05374         "str    r5, [%[r]]\n\t"
05375         "mov    r5, #0\n\t"
05376         "mov    r9, #4\n\t"
05377         "1:\n\t"
05378         "ldr    r8, [%[a], r9]\n\t"
05379         "umull  r6, r7, %[b], r8\n\t"
05380         "adds   r3, r3, r6\n\t"
05381         "adcs   r4, r4, r7\n\t"
05382         "adc    r5, r10, r10\n\t"
05383         "str    r3, [%[r], r9]\n\t"
05384         "mov    r3, r4\n\t"
05385         "mov    r4, r5\n\t"
05386         "mov    r5, #0\n\t"
05387         "add    r9, r9, #4\n\t"
05388         "cmp    r9, #256\n\t"
05389         "blt    1b\n\t"
05390         "str    r3, [%[r], #256]\n\t"
05391         :
05392         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
05393         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
05394     );
05395 #else
05396     __asm__ __volatile__ (
05397         "mov    r10, #0\n\t"
05398         "# A[0] * B\n\t"
05399         "ldr    r8, [%[a]]\n\t"
05400         "umull  r3, r4, %[b], r8\n\t"
05401         "mov    r5, #0\n\t"
05402         "str    r3, [%[r]]\n\t"
05403         "# A[1] * B\n\t"
05404         "ldr    r8, [%[a], #4]\n\t"
05405         "mov    r3, #0\n\t"
05406         "umull  r6, r7, %[b], r8\n\t"
05407         "adds   r4, r4, r6\n\t"
05408         "adcs   r5, r5, r7\n\t"
05409         "adc    r3, r10, r10\n\t"
05410         "str    r4, [%[r], #4]\n\t"
05411         "# A[2] * B\n\t"
05412         "ldr    r8, [%[a], #8]\n\t"
05413         "mov    r4, #0\n\t"
05414         "umull  r6, r7, %[b], r8\n\t"
05415         "adds   r5, r5, r6\n\t"
05416         "adcs   r3, r3, r7\n\t"
05417         "adc    r4, r10, r10\n\t"
05418         "str    r5, [%[r], #8]\n\t"
05419         "# A[3] * B\n\t"
05420         "ldr    r8, [%[a], #12]\n\t"
05421         "mov    r5, #0\n\t"
05422         "umull  r6, r7, %[b], r8\n\t"
05423         "adds   r3, r3, r6\n\t"
05424         "adcs   r4, r4, r7\n\t"
05425         "adc    r5, r10, r10\n\t"
05426         "str    r3, [%[r], #12]\n\t"
05427         "# A[4] * B\n\t"
05428         "ldr    r8, [%[a], #16]\n\t"
05429         "mov    r3, #0\n\t"
05430         "umull  r6, r7, %[b], r8\n\t"
05431         "adds   r4, r4, r6\n\t"
05432         "adcs   r5, r5, r7\n\t"
05433         "adc    r3, r10, r10\n\t"
05434         "str    r4, [%[r], #16]\n\t"
05435         "# A[5] * B\n\t"
05436         "ldr    r8, [%[a], #20]\n\t"
05437         "mov    r4, #0\n\t"
05438         "umull  r6, r7, %[b], r8\n\t"
05439         "adds   r5, r5, r6\n\t"
05440         "adcs   r3, r3, r7\n\t"
05441         "adc    r4, r10, r10\n\t"
05442         "str    r5, [%[r], #20]\n\t"
05443         "# A[6] * B\n\t"
05444         "ldr    r8, [%[a], #24]\n\t"
05445         "mov    r5, #0\n\t"
05446         "umull  r6, r7, %[b], r8\n\t"
05447         "adds   r3, r3, r6\n\t"
05448         "adcs   r4, r4, r7\n\t"
05449         "adc    r5, r10, r10\n\t"
05450         "str    r3, [%[r], #24]\n\t"
05451         "# A[7] * B\n\t"
05452         "ldr    r8, [%[a], #28]\n\t"
05453         "mov    r3, #0\n\t"
05454         "umull  r6, r7, %[b], r8\n\t"
05455         "adds   r4, r4, r6\n\t"
05456         "adcs   r5, r5, r7\n\t"
05457         "adc    r3, r10, r10\n\t"
05458         "str    r4, [%[r], #28]\n\t"
05459         "# A[8] * B\n\t"
05460         "ldr    r8, [%[a], #32]\n\t"
05461         "mov    r4, #0\n\t"
05462         "umull  r6, r7, %[b], r8\n\t"
05463         "adds   r5, r5, r6\n\t"
05464         "adcs   r3, r3, r7\n\t"
05465         "adc    r4, r10, r10\n\t"
05466         "str    r5, [%[r], #32]\n\t"
05467         "# A[9] * B\n\t"
05468         "ldr    r8, [%[a], #36]\n\t"
05469         "mov    r5, #0\n\t"
05470         "umull  r6, r7, %[b], r8\n\t"
05471         "adds   r3, r3, r6\n\t"
05472         "adcs   r4, r4, r7\n\t"
05473         "adc    r5, r10, r10\n\t"
05474         "str    r3, [%[r], #36]\n\t"
05475         "# A[10] * B\n\t"
05476         "ldr    r8, [%[a], #40]\n\t"
05477         "mov    r3, #0\n\t"
05478         "umull  r6, r7, %[b], r8\n\t"
05479         "adds   r4, r4, r6\n\t"
05480         "adcs   r5, r5, r7\n\t"
05481         "adc    r3, r10, r10\n\t"
05482         "str    r4, [%[r], #40]\n\t"
05483         "# A[11] * B\n\t"
05484         "ldr    r8, [%[a], #44]\n\t"
05485         "mov    r4, #0\n\t"
05486         "umull  r6, r7, %[b], r8\n\t"
05487         "adds   r5, r5, r6\n\t"
05488         "adcs   r3, r3, r7\n\t"
05489         "adc    r4, r10, r10\n\t"
05490         "str    r5, [%[r], #44]\n\t"
05491         "# A[12] * B\n\t"
05492         "ldr    r8, [%[a], #48]\n\t"
05493         "mov    r5, #0\n\t"
05494         "umull  r6, r7, %[b], r8\n\t"
05495         "adds   r3, r3, r6\n\t"
05496         "adcs   r4, r4, r7\n\t"
05497         "adc    r5, r10, r10\n\t"
05498         "str    r3, [%[r], #48]\n\t"
05499         "# A[13] * B\n\t"
05500         "ldr    r8, [%[a], #52]\n\t"
05501         "mov    r3, #0\n\t"
05502         "umull  r6, r7, %[b], r8\n\t"
05503         "adds   r4, r4, r6\n\t"
05504         "adcs   r5, r5, r7\n\t"
05505         "adc    r3, r10, r10\n\t"
05506         "str    r4, [%[r], #52]\n\t"
05507         "# A[14] * B\n\t"
05508         "ldr    r8, [%[a], #56]\n\t"
05509         "mov    r4, #0\n\t"
05510         "umull  r6, r7, %[b], r8\n\t"
05511         "adds   r5, r5, r6\n\t"
05512         "adcs   r3, r3, r7\n\t"
05513         "adc    r4, r10, r10\n\t"
05514         "str    r5, [%[r], #56]\n\t"
05515         "# A[15] * B\n\t"
05516         "ldr    r8, [%[a], #60]\n\t"
05517         "mov    r5, #0\n\t"
05518         "umull  r6, r7, %[b], r8\n\t"
05519         "adds   r3, r3, r6\n\t"
05520         "adcs   r4, r4, r7\n\t"
05521         "adc    r5, r10, r10\n\t"
05522         "str    r3, [%[r], #60]\n\t"
05523         "# A[16] * B\n\t"
05524         "ldr    r8, [%[a], #64]\n\t"
05525         "mov    r3, #0\n\t"
05526         "umull  r6, r7, %[b], r8\n\t"
05527         "adds   r4, r4, r6\n\t"
05528         "adcs   r5, r5, r7\n\t"
05529         "adc    r3, r10, r10\n\t"
05530         "str    r4, [%[r], #64]\n\t"
05531         "# A[17] * B\n\t"
05532         "ldr    r8, [%[a], #68]\n\t"
05533         "mov    r4, #0\n\t"
05534         "umull  r6, r7, %[b], r8\n\t"
05535         "adds   r5, r5, r6\n\t"
05536         "adcs   r3, r3, r7\n\t"
05537         "adc    r4, r10, r10\n\t"
05538         "str    r5, [%[r], #68]\n\t"
05539         "# A[18] * B\n\t"
05540         "ldr    r8, [%[a], #72]\n\t"
05541         "mov    r5, #0\n\t"
05542         "umull  r6, r7, %[b], r8\n\t"
05543         "adds   r3, r3, r6\n\t"
05544         "adcs   r4, r4, r7\n\t"
05545         "adc    r5, r10, r10\n\t"
05546         "str    r3, [%[r], #72]\n\t"
05547         "# A[19] * B\n\t"
05548         "ldr    r8, [%[a], #76]\n\t"
05549         "mov    r3, #0\n\t"
05550         "umull  r6, r7, %[b], r8\n\t"
05551         "adds   r4, r4, r6\n\t"
05552         "adcs   r5, r5, r7\n\t"
05553         "adc    r3, r10, r10\n\t"
05554         "str    r4, [%[r], #76]\n\t"
05555         "# A[20] * B\n\t"
05556         "ldr    r8, [%[a], #80]\n\t"
05557         "mov    r4, #0\n\t"
05558         "umull  r6, r7, %[b], r8\n\t"
05559         "adds   r5, r5, r6\n\t"
05560         "adcs   r3, r3, r7\n\t"
05561         "adc    r4, r10, r10\n\t"
05562         "str    r5, [%[r], #80]\n\t"
05563         "# A[21] * B\n\t"
05564         "ldr    r8, [%[a], #84]\n\t"
05565         "mov    r5, #0\n\t"
05566         "umull  r6, r7, %[b], r8\n\t"
05567         "adds   r3, r3, r6\n\t"
05568         "adcs   r4, r4, r7\n\t"
05569         "adc    r5, r10, r10\n\t"
05570         "str    r3, [%[r], #84]\n\t"
05571         "# A[22] * B\n\t"
05572         "ldr    r8, [%[a], #88]\n\t"
05573         "mov    r3, #0\n\t"
05574         "umull  r6, r7, %[b], r8\n\t"
05575         "adds   r4, r4, r6\n\t"
05576         "adcs   r5, r5, r7\n\t"
05577         "adc    r3, r10, r10\n\t"
05578         "str    r4, [%[r], #88]\n\t"
05579         "# A[23] * B\n\t"
05580         "ldr    r8, [%[a], #92]\n\t"
05581         "mov    r4, #0\n\t"
05582         "umull  r6, r7, %[b], r8\n\t"
05583         "adds   r5, r5, r6\n\t"
05584         "adcs   r3, r3, r7\n\t"
05585         "adc    r4, r10, r10\n\t"
05586         "str    r5, [%[r], #92]\n\t"
05587         "# A[24] * B\n\t"
05588         "ldr    r8, [%[a], #96]\n\t"
05589         "mov    r5, #0\n\t"
05590         "umull  r6, r7, %[b], r8\n\t"
05591         "adds   r3, r3, r6\n\t"
05592         "adcs   r4, r4, r7\n\t"
05593         "adc    r5, r10, r10\n\t"
05594         "str    r3, [%[r], #96]\n\t"
05595         "# A[25] * B\n\t"
05596         "ldr    r8, [%[a], #100]\n\t"
05597         "mov    r3, #0\n\t"
05598         "umull  r6, r7, %[b], r8\n\t"
05599         "adds   r4, r4, r6\n\t"
05600         "adcs   r5, r5, r7\n\t"
05601         "adc    r3, r10, r10\n\t"
05602         "str    r4, [%[r], #100]\n\t"
05603         "# A[26] * B\n\t"
05604         "ldr    r8, [%[a], #104]\n\t"
05605         "mov    r4, #0\n\t"
05606         "umull  r6, r7, %[b], r8\n\t"
05607         "adds   r5, r5, r6\n\t"
05608         "adcs   r3, r3, r7\n\t"
05609         "adc    r4, r10, r10\n\t"
05610         "str    r5, [%[r], #104]\n\t"
05611         "# A[27] * B\n\t"
05612         "ldr    r8, [%[a], #108]\n\t"
05613         "mov    r5, #0\n\t"
05614         "umull  r6, r7, %[b], r8\n\t"
05615         "adds   r3, r3, r6\n\t"
05616         "adcs   r4, r4, r7\n\t"
05617         "adc    r5, r10, r10\n\t"
05618         "str    r3, [%[r], #108]\n\t"
05619         "# A[28] * B\n\t"
05620         "ldr    r8, [%[a], #112]\n\t"
05621         "mov    r3, #0\n\t"
05622         "umull  r6, r7, %[b], r8\n\t"
05623         "adds   r4, r4, r6\n\t"
05624         "adcs   r5, r5, r7\n\t"
05625         "adc    r3, r10, r10\n\t"
05626         "str    r4, [%[r], #112]\n\t"
05627         "# A[29] * B\n\t"
05628         "ldr    r8, [%[a], #116]\n\t"
05629         "mov    r4, #0\n\t"
05630         "umull  r6, r7, %[b], r8\n\t"
05631         "adds   r5, r5, r6\n\t"
05632         "adcs   r3, r3, r7\n\t"
05633         "adc    r4, r10, r10\n\t"
05634         "str    r5, [%[r], #116]\n\t"
05635         "# A[30] * B\n\t"
05636         "ldr    r8, [%[a], #120]\n\t"
05637         "mov    r5, #0\n\t"
05638         "umull  r6, r7, %[b], r8\n\t"
05639         "adds   r3, r3, r6\n\t"
05640         "adcs   r4, r4, r7\n\t"
05641         "adc    r5, r10, r10\n\t"
05642         "str    r3, [%[r], #120]\n\t"
05643         "# A[31] * B\n\t"
05644         "ldr    r8, [%[a], #124]\n\t"
05645         "mov    r3, #0\n\t"
05646         "umull  r6, r7, %[b], r8\n\t"
05647         "adds   r4, r4, r6\n\t"
05648         "adcs   r5, r5, r7\n\t"
05649         "adc    r3, r10, r10\n\t"
05650         "str    r4, [%[r], #124]\n\t"
05651         "# A[32] * B\n\t"
05652         "ldr    r8, [%[a], #128]\n\t"
05653         "mov    r4, #0\n\t"
05654         "umull  r6, r7, %[b], r8\n\t"
05655         "adds   r5, r5, r6\n\t"
05656         "adcs   r3, r3, r7\n\t"
05657         "adc    r4, r10, r10\n\t"
05658         "str    r5, [%[r], #128]\n\t"
05659         "# A[33] * B\n\t"
05660         "ldr    r8, [%[a], #132]\n\t"
05661         "mov    r5, #0\n\t"
05662         "umull  r6, r7, %[b], r8\n\t"
05663         "adds   r3, r3, r6\n\t"
05664         "adcs   r4, r4, r7\n\t"
05665         "adc    r5, r10, r10\n\t"
05666         "str    r3, [%[r], #132]\n\t"
05667         "# A[34] * B\n\t"
05668         "ldr    r8, [%[a], #136]\n\t"
05669         "mov    r3, #0\n\t"
05670         "umull  r6, r7, %[b], r8\n\t"
05671         "adds   r4, r4, r6\n\t"
05672         "adcs   r5, r5, r7\n\t"
05673         "adc    r3, r10, r10\n\t"
05674         "str    r4, [%[r], #136]\n\t"
05675         "# A[35] * B\n\t"
05676         "ldr    r8, [%[a], #140]\n\t"
05677         "mov    r4, #0\n\t"
05678         "umull  r6, r7, %[b], r8\n\t"
05679         "adds   r5, r5, r6\n\t"
05680         "adcs   r3, r3, r7\n\t"
05681         "adc    r4, r10, r10\n\t"
05682         "str    r5, [%[r], #140]\n\t"
05683         "# A[36] * B\n\t"
05684         "ldr    r8, [%[a], #144]\n\t"
05685         "mov    r5, #0\n\t"
05686         "umull  r6, r7, %[b], r8\n\t"
05687         "adds   r3, r3, r6\n\t"
05688         "adcs   r4, r4, r7\n\t"
05689         "adc    r5, r10, r10\n\t"
05690         "str    r3, [%[r], #144]\n\t"
05691         "# A[37] * B\n\t"
05692         "ldr    r8, [%[a], #148]\n\t"
05693         "mov    r3, #0\n\t"
05694         "umull  r6, r7, %[b], r8\n\t"
05695         "adds   r4, r4, r6\n\t"
05696         "adcs   r5, r5, r7\n\t"
05697         "adc    r3, r10, r10\n\t"
05698         "str    r4, [%[r], #148]\n\t"
05699         "# A[38] * B\n\t"
05700         "ldr    r8, [%[a], #152]\n\t"
05701         "mov    r4, #0\n\t"
05702         "umull  r6, r7, %[b], r8\n\t"
05703         "adds   r5, r5, r6\n\t"
05704         "adcs   r3, r3, r7\n\t"
05705         "adc    r4, r10, r10\n\t"
05706         "str    r5, [%[r], #152]\n\t"
05707         "# A[39] * B\n\t"
05708         "ldr    r8, [%[a], #156]\n\t"
05709         "mov    r5, #0\n\t"
05710         "umull  r6, r7, %[b], r8\n\t"
05711         "adds   r3, r3, r6\n\t"
05712         "adcs   r4, r4, r7\n\t"
05713         "adc    r5, r10, r10\n\t"
05714         "str    r3, [%[r], #156]\n\t"
05715         "# A[40] * B\n\t"
05716         "ldr    r8, [%[a], #160]\n\t"
05717         "mov    r3, #0\n\t"
05718         "umull  r6, r7, %[b], r8\n\t"
05719         "adds   r4, r4, r6\n\t"
05720         "adcs   r5, r5, r7\n\t"
05721         "adc    r3, r10, r10\n\t"
05722         "str    r4, [%[r], #160]\n\t"
05723         "# A[41] * B\n\t"
05724         "ldr    r8, [%[a], #164]\n\t"
05725         "mov    r4, #0\n\t"
05726         "umull  r6, r7, %[b], r8\n\t"
05727         "adds   r5, r5, r6\n\t"
05728         "adcs   r3, r3, r7\n\t"
05729         "adc    r4, r10, r10\n\t"
05730         "str    r5, [%[r], #164]\n\t"
05731         "# A[42] * B\n\t"
05732         "ldr    r8, [%[a], #168]\n\t"
05733         "mov    r5, #0\n\t"
05734         "umull  r6, r7, %[b], r8\n\t"
05735         "adds   r3, r3, r6\n\t"
05736         "adcs   r4, r4, r7\n\t"
05737         "adc    r5, r10, r10\n\t"
05738         "str    r3, [%[r], #168]\n\t"
05739         "# A[43] * B\n\t"
05740         "ldr    r8, [%[a], #172]\n\t"
05741         "mov    r3, #0\n\t"
05742         "umull  r6, r7, %[b], r8\n\t"
05743         "adds   r4, r4, r6\n\t"
05744         "adcs   r5, r5, r7\n\t"
05745         "adc    r3, r10, r10\n\t"
05746         "str    r4, [%[r], #172]\n\t"
05747         "# A[44] * B\n\t"
05748         "ldr    r8, [%[a], #176]\n\t"
05749         "mov    r4, #0\n\t"
05750         "umull  r6, r7, %[b], r8\n\t"
05751         "adds   r5, r5, r6\n\t"
05752         "adcs   r3, r3, r7\n\t"
05753         "adc    r4, r10, r10\n\t"
05754         "str    r5, [%[r], #176]\n\t"
05755         "# A[45] * B\n\t"
05756         "ldr    r8, [%[a], #180]\n\t"
05757         "mov    r5, #0\n\t"
05758         "umull  r6, r7, %[b], r8\n\t"
05759         "adds   r3, r3, r6\n\t"
05760         "adcs   r4, r4, r7\n\t"
05761         "adc    r5, r10, r10\n\t"
05762         "str    r3, [%[r], #180]\n\t"
05763         "# A[46] * B\n\t"
05764         "ldr    r8, [%[a], #184]\n\t"
05765         "mov    r3, #0\n\t"
05766         "umull  r6, r7, %[b], r8\n\t"
05767         "adds   r4, r4, r6\n\t"
05768         "adcs   r5, r5, r7\n\t"
05769         "adc    r3, r10, r10\n\t"
05770         "str    r4, [%[r], #184]\n\t"
05771         "# A[47] * B\n\t"
05772         "ldr    r8, [%[a], #188]\n\t"
05773         "mov    r4, #0\n\t"
05774         "umull  r6, r7, %[b], r8\n\t"
05775         "adds   r5, r5, r6\n\t"
05776         "adcs   r3, r3, r7\n\t"
05777         "adc    r4, r10, r10\n\t"
05778         "str    r5, [%[r], #188]\n\t"
05779         "# A[48] * B\n\t"
05780         "ldr    r8, [%[a], #192]\n\t"
05781         "mov    r5, #0\n\t"
05782         "umull  r6, r7, %[b], r8\n\t"
05783         "adds   r3, r3, r6\n\t"
05784         "adcs   r4, r4, r7\n\t"
05785         "adc    r5, r10, r10\n\t"
05786         "str    r3, [%[r], #192]\n\t"
05787         "# A[49] * B\n\t"
05788         "ldr    r8, [%[a], #196]\n\t"
05789         "mov    r3, #0\n\t"
05790         "umull  r6, r7, %[b], r8\n\t"
05791         "adds   r4, r4, r6\n\t"
05792         "adcs   r5, r5, r7\n\t"
05793         "adc    r3, r10, r10\n\t"
05794         "str    r4, [%[r], #196]\n\t"
05795         "# A[50] * B\n\t"
05796         "ldr    r8, [%[a], #200]\n\t"
05797         "mov    r4, #0\n\t"
05798         "umull  r6, r7, %[b], r8\n\t"
05799         "adds   r5, r5, r6\n\t"
05800         "adcs   r3, r3, r7\n\t"
05801         "adc    r4, r10, r10\n\t"
05802         "str    r5, [%[r], #200]\n\t"
05803         "# A[51] * B\n\t"
05804         "ldr    r8, [%[a], #204]\n\t"
05805         "mov    r5, #0\n\t"
05806         "umull  r6, r7, %[b], r8\n\t"
05807         "adds   r3, r3, r6\n\t"
05808         "adcs   r4, r4, r7\n\t"
05809         "adc    r5, r10, r10\n\t"
05810         "str    r3, [%[r], #204]\n\t"
05811         "# A[52] * B\n\t"
05812         "ldr    r8, [%[a], #208]\n\t"
05813         "mov    r3, #0\n\t"
05814         "umull  r6, r7, %[b], r8\n\t"
05815         "adds   r4, r4, r6\n\t"
05816         "adcs   r5, r5, r7\n\t"
05817         "adc    r3, r10, r10\n\t"
05818         "str    r4, [%[r], #208]\n\t"
05819         "# A[53] * B\n\t"
05820         "ldr    r8, [%[a], #212]\n\t"
05821         "mov    r4, #0\n\t"
05822         "umull  r6, r7, %[b], r8\n\t"
05823         "adds   r5, r5, r6\n\t"
05824         "adcs   r3, r3, r7\n\t"
05825         "adc    r4, r10, r10\n\t"
05826         "str    r5, [%[r], #212]\n\t"
05827         "# A[54] * B\n\t"
05828         "ldr    r8, [%[a], #216]\n\t"
05829         "mov    r5, #0\n\t"
05830         "umull  r6, r7, %[b], r8\n\t"
05831         "adds   r3, r3, r6\n\t"
05832         "adcs   r4, r4, r7\n\t"
05833         "adc    r5, r10, r10\n\t"
05834         "str    r3, [%[r], #216]\n\t"
05835         "# A[55] * B\n\t"
05836         "ldr    r8, [%[a], #220]\n\t"
05837         "mov    r3, #0\n\t"
05838         "umull  r6, r7, %[b], r8\n\t"
05839         "adds   r4, r4, r6\n\t"
05840         "adcs   r5, r5, r7\n\t"
05841         "adc    r3, r10, r10\n\t"
05842         "str    r4, [%[r], #220]\n\t"
05843         "# A[56] * B\n\t"
05844         "ldr    r8, [%[a], #224]\n\t"
05845         "mov    r4, #0\n\t"
05846         "umull  r6, r7, %[b], r8\n\t"
05847         "adds   r5, r5, r6\n\t"
05848         "adcs   r3, r3, r7\n\t"
05849         "adc    r4, r10, r10\n\t"
05850         "str    r5, [%[r], #224]\n\t"
05851         "# A[57] * B\n\t"
05852         "ldr    r8, [%[a], #228]\n\t"
05853         "mov    r5, #0\n\t"
05854         "umull  r6, r7, %[b], r8\n\t"
05855         "adds   r3, r3, r6\n\t"
05856         "adcs   r4, r4, r7\n\t"
05857         "adc    r5, r10, r10\n\t"
05858         "str    r3, [%[r], #228]\n\t"
05859         "# A[58] * B\n\t"
05860         "ldr    r8, [%[a], #232]\n\t"
05861         "mov    r3, #0\n\t"
05862         "umull  r6, r7, %[b], r8\n\t"
05863         "adds   r4, r4, r6\n\t"
05864         "adcs   r5, r5, r7\n\t"
05865         "adc    r3, r10, r10\n\t"
05866         "str    r4, [%[r], #232]\n\t"
05867         "# A[59] * B\n\t"
05868         "ldr    r8, [%[a], #236]\n\t"
05869         "mov    r4, #0\n\t"
05870         "umull  r6, r7, %[b], r8\n\t"
05871         "adds   r5, r5, r6\n\t"
05872         "adcs   r3, r3, r7\n\t"
05873         "adc    r4, r10, r10\n\t"
05874         "str    r5, [%[r], #236]\n\t"
05875         "# A[60] * B\n\t"
05876         "ldr    r8, [%[a], #240]\n\t"
05877         "mov    r5, #0\n\t"
05878         "umull  r6, r7, %[b], r8\n\t"
05879         "adds   r3, r3, r6\n\t"
05880         "adcs   r4, r4, r7\n\t"
05881         "adc    r5, r10, r10\n\t"
05882         "str    r3, [%[r], #240]\n\t"
05883         "# A[61] * B\n\t"
05884         "ldr    r8, [%[a], #244]\n\t"
05885         "mov    r3, #0\n\t"
05886         "umull  r6, r7, %[b], r8\n\t"
05887         "adds   r4, r4, r6\n\t"
05888         "adcs   r5, r5, r7\n\t"
05889         "adc    r3, r10, r10\n\t"
05890         "str    r4, [%[r], #244]\n\t"
05891         "# A[62] * B\n\t"
05892         "ldr    r8, [%[a], #248]\n\t"
05893         "mov    r4, #0\n\t"
05894         "umull  r6, r7, %[b], r8\n\t"
05895         "adds   r5, r5, r6\n\t"
05896         "adcs   r3, r3, r7\n\t"
05897         "adc    r4, r10, r10\n\t"
05898         "str    r5, [%[r], #248]\n\t"
05899         "# A[63] * B\n\t"
05900         "ldr    r8, [%[a], #252]\n\t"
05901         "umull  r6, r7, %[b], r8\n\t"
05902         "adds   r3, r3, r6\n\t"
05903         "adc    r4, r4, r7\n\t"
05904         "str    r3, [%[r], #252]\n\t"
05905         "str    r4, [%[r], #256]\n\t"
05906         :
05907         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
05908         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
05909     );
05910 #endif
05911 }
05912 
05913 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
05914  *
05915  * d1   The high order half of the number to divide.
05916  * d0   The low order half of the number to divide.
05917  * div  The dividend.
05918  * returns the result of the division.
05919  *
05920  * Note that this is an approximate div. It may give an answer 1 larger.
05921  */
05922 static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div)
05923 {
05924     sp_digit r = 0;
05925 
05926     __asm__ __volatile__ (
05927         "lsr    r5, %[div], #1\n\t"
05928         "add    r5, r5, #1\n\t"
05929         "mov    r6, %[d0]\n\t"
05930         "mov    r7, %[d1]\n\t"
05931         "# Do top 32\n\t"
05932         "subs   r8, r5, r7\n\t"
05933         "sbc    r8, r8, r8\n\t"
05934         "add    %[r], %[r], %[r]\n\t"
05935         "sub    %[r], %[r], r8\n\t"
05936         "and    r8, r8, r5\n\t"
05937         "subs   r7, r7, r8\n\t"
05938         "# Next 30 bits\n\t"
05939         "mov    r4, #29\n\t"
05940         "1:\n\t"
05941         "movs   r6, r6, lsl #1\n\t"
05942         "adc    r7, r7, r7\n\t"
05943         "subs   r8, r5, r7\n\t"
05944         "sbc    r8, r8, r8\n\t"
05945         "add    %[r], %[r], %[r]\n\t"
05946         "sub    %[r], %[r], r8\n\t"
05947         "and    r8, r8, r5\n\t"
05948         "subs   r7, r7, r8\n\t"
05949         "subs   r4, r4, #1\n\t"
05950         "bpl    1b\n\t"
05951         "add    %[r], %[r], %[r]\n\t"
05952         "add    %[r], %[r], #1\n\t"
05953         "umull  r4, r5, %[r], %[div]\n\t"
05954         "subs   r4, %[d0], r4\n\t"
05955         "sbc    r5, %[d1], r5\n\t"
05956         "add    %[r], %[r], r5\n\t"
05957         "umull  r4, r5, %[r], %[div]\n\t"
05958         "subs   r4, %[d0], r4\n\t"
05959         "sbc    r5, %[d1], r5\n\t"
05960         "add    %[r], %[r], r5\n\t"
05961         "subs   r8, %[div], r4\n\t"
05962         "sbc    r8, r8, r8\n\t"
05963         "sub    %[r], %[r], r8\n\t"
05964         : [r] "+r" (r)
05965         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
05966         : "r4", "r5", "r6", "r7", "r8"
05967     );
05968     return r;
05969 }
05970 
05971 /* AND m into each word of a and store in r.
05972  *
05973  * r  A single precision integer.
05974  * a  A single precision integer.
05975  * m  Mask to AND against each digit.
05976  */
05977 static void sp_2048_mask_64(sp_digit* r, sp_digit* a, sp_digit m)
05978 {
05979 #ifdef WOLFSSL_SP_SMALL
05980     int i;
05981 
05982     for (i=0; i<64; i++)
05983         r[i] = a[i] & m;
05984 #else
05985     int i;
05986 
05987     for (i = 0; i < 64; i += 8) {
05988         r[i+0] = a[i+0] & m;
05989         r[i+1] = a[i+1] & m;
05990         r[i+2] = a[i+2] & m;
05991         r[i+3] = a[i+3] & m;
05992         r[i+4] = a[i+4] & m;
05993         r[i+5] = a[i+5] & m;
05994         r[i+6] = a[i+6] & m;
05995         r[i+7] = a[i+7] & m;
05996     }
05997 #endif
05998 }
05999 
06000 /* Compare a with b in constant time.
06001  *
06002  * a  A single precision integer.
06003  * b  A single precision integer.
06004  * return -ve, 0 or +ve if a is less than, equal to or greater than b
06005  * respectively.
06006  */
06007 static int32_t sp_2048_cmp_64(sp_digit* a, sp_digit* b)
06008 {
06009     sp_digit r = -1;
06010     sp_digit one = 1;
06011 
06012 #ifdef WOLFSSL_SP_SMALL
06013     __asm__ __volatile__ (
06014         "mov    r7, #0\n\t"
06015         "mov    r3, #-1\n\t"
06016         "mov    r6, #252\n\t"
06017         "1:\n\t"
06018         "ldr    r4, [%[a], r6]\n\t"
06019         "ldr    r5, [%[b], r6]\n\t"
06020         "and    r4, r4, r3\n\t"
06021         "and    r5, r5, r3\n\t"
06022         "subs   r4, r4, r5\n\t"
06023         "movhi  %[r], %[one]\n\t"
06024         "movlo  %[r], r3\n\t"
06025         "movne  r3, r7\n\t"
06026         "sub    r6, r6, #4\n\t"
06027         "bcc    1b\n\t"
06028         "eor    %[r], %[r], r3\n\t"
06029         : [r] "+r" (r)
06030         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
06031         : "r2", "r3", "r4", "r5", "r6", "r7"
06032     );
06033 #else
06034     __asm__ __volatile__ (
06035         "mov    r7, #0\n\t"
06036         "mov    r3, #-1\n\t"
06037         "ldr        r4, [%[a], #252]\n\t"
06038         "ldr        r5, [%[b], #252]\n\t"
06039         "and        r4, r4, r3\n\t"
06040         "and        r5, r5, r3\n\t"
06041         "subs   r4, r4, r5\n\t"
06042         "movhi  %[r], %[one]\n\t"
06043         "movlo  %[r], r3\n\t"
06044         "movne  r3, r7\n\t"
06045         "ldr        r4, [%[a], #248]\n\t"
06046         "ldr        r5, [%[b], #248]\n\t"
06047         "and        r4, r4, r3\n\t"
06048         "and        r5, r5, r3\n\t"
06049         "subs   r4, r4, r5\n\t"
06050         "movhi  %[r], %[one]\n\t"
06051         "movlo  %[r], r3\n\t"
06052         "movne  r3, r7\n\t"
06053         "ldr        r4, [%[a], #244]\n\t"
06054         "ldr        r5, [%[b], #244]\n\t"
06055         "and        r4, r4, r3\n\t"
06056         "and        r5, r5, r3\n\t"
06057         "subs   r4, r4, r5\n\t"
06058         "movhi  %[r], %[one]\n\t"
06059         "movlo  %[r], r3\n\t"
06060         "movne  r3, r7\n\t"
06061         "ldr        r4, [%[a], #240]\n\t"
06062         "ldr        r5, [%[b], #240]\n\t"
06063         "and        r4, r4, r3\n\t"
06064         "and        r5, r5, r3\n\t"
06065         "subs   r4, r4, r5\n\t"
06066         "movhi  %[r], %[one]\n\t"
06067         "movlo  %[r], r3\n\t"
06068         "movne  r3, r7\n\t"
06069         "ldr        r4, [%[a], #236]\n\t"
06070         "ldr        r5, [%[b], #236]\n\t"
06071         "and        r4, r4, r3\n\t"
06072         "and        r5, r5, r3\n\t"
06073         "subs   r4, r4, r5\n\t"
06074         "movhi  %[r], %[one]\n\t"
06075         "movlo  %[r], r3\n\t"
06076         "movne  r3, r7\n\t"
06077         "ldr        r4, [%[a], #232]\n\t"
06078         "ldr        r5, [%[b], #232]\n\t"
06079         "and        r4, r4, r3\n\t"
06080         "and        r5, r5, r3\n\t"
06081         "subs   r4, r4, r5\n\t"
06082         "movhi  %[r], %[one]\n\t"
06083         "movlo  %[r], r3\n\t"
06084         "movne  r3, r7\n\t"
06085         "ldr        r4, [%[a], #228]\n\t"
06086         "ldr        r5, [%[b], #228]\n\t"
06087         "and        r4, r4, r3\n\t"
06088         "and        r5, r5, r3\n\t"
06089         "subs   r4, r4, r5\n\t"
06090         "movhi  %[r], %[one]\n\t"
06091         "movlo  %[r], r3\n\t"
06092         "movne  r3, r7\n\t"
06093         "ldr        r4, [%[a], #224]\n\t"
06094         "ldr        r5, [%[b], #224]\n\t"
06095         "and        r4, r4, r3\n\t"
06096         "and        r5, r5, r3\n\t"
06097         "subs   r4, r4, r5\n\t"
06098         "movhi  %[r], %[one]\n\t"
06099         "movlo  %[r], r3\n\t"
06100         "movne  r3, r7\n\t"
06101         "ldr        r4, [%[a], #220]\n\t"
06102         "ldr        r5, [%[b], #220]\n\t"
06103         "and        r4, r4, r3\n\t"
06104         "and        r5, r5, r3\n\t"
06105         "subs   r4, r4, r5\n\t"
06106         "movhi  %[r], %[one]\n\t"
06107         "movlo  %[r], r3\n\t"
06108         "movne  r3, r7\n\t"
06109         "ldr        r4, [%[a], #216]\n\t"
06110         "ldr        r5, [%[b], #216]\n\t"
06111         "and        r4, r4, r3\n\t"
06112         "and        r5, r5, r3\n\t"
06113         "subs   r4, r4, r5\n\t"
06114         "movhi  %[r], %[one]\n\t"
06115         "movlo  %[r], r3\n\t"
06116         "movne  r3, r7\n\t"
06117         "ldr        r4, [%[a], #212]\n\t"
06118         "ldr        r5, [%[b], #212]\n\t"
06119         "and        r4, r4, r3\n\t"
06120         "and        r5, r5, r3\n\t"
06121         "subs   r4, r4, r5\n\t"
06122         "movhi  %[r], %[one]\n\t"
06123         "movlo  %[r], r3\n\t"
06124         "movne  r3, r7\n\t"
06125         "ldr        r4, [%[a], #208]\n\t"
06126         "ldr        r5, [%[b], #208]\n\t"
06127         "and        r4, r4, r3\n\t"
06128         "and        r5, r5, r3\n\t"
06129         "subs   r4, r4, r5\n\t"
06130         "movhi  %[r], %[one]\n\t"
06131         "movlo  %[r], r3\n\t"
06132         "movne  r3, r7\n\t"
06133         "ldr        r4, [%[a], #204]\n\t"
06134         "ldr        r5, [%[b], #204]\n\t"
06135         "and        r4, r4, r3\n\t"
06136         "and        r5, r5, r3\n\t"
06137         "subs   r4, r4, r5\n\t"
06138         "movhi  %[r], %[one]\n\t"
06139         "movlo  %[r], r3\n\t"
06140         "movne  r3, r7\n\t"
06141         "ldr        r4, [%[a], #200]\n\t"
06142         "ldr        r5, [%[b], #200]\n\t"
06143         "and        r4, r4, r3\n\t"
06144         "and        r5, r5, r3\n\t"
06145         "subs   r4, r4, r5\n\t"
06146         "movhi  %[r], %[one]\n\t"
06147         "movlo  %[r], r3\n\t"
06148         "movne  r3, r7\n\t"
06149         "ldr        r4, [%[a], #196]\n\t"
06150         "ldr        r5, [%[b], #196]\n\t"
06151         "and        r4, r4, r3\n\t"
06152         "and        r5, r5, r3\n\t"
06153         "subs   r4, r4, r5\n\t"
06154         "movhi  %[r], %[one]\n\t"
06155         "movlo  %[r], r3\n\t"
06156         "movne  r3, r7\n\t"
06157         "ldr        r4, [%[a], #192]\n\t"
06158         "ldr        r5, [%[b], #192]\n\t"
06159         "and        r4, r4, r3\n\t"
06160         "and        r5, r5, r3\n\t"
06161         "subs   r4, r4, r5\n\t"
06162         "movhi  %[r], %[one]\n\t"
06163         "movlo  %[r], r3\n\t"
06164         "movne  r3, r7\n\t"
06165         "ldr        r4, [%[a], #188]\n\t"
06166         "ldr        r5, [%[b], #188]\n\t"
06167         "and        r4, r4, r3\n\t"
06168         "and        r5, r5, r3\n\t"
06169         "subs   r4, r4, r5\n\t"
06170         "movhi  %[r], %[one]\n\t"
06171         "movlo  %[r], r3\n\t"
06172         "movne  r3, r7\n\t"
06173         "ldr        r4, [%[a], #184]\n\t"
06174         "ldr        r5, [%[b], #184]\n\t"
06175         "and        r4, r4, r3\n\t"
06176         "and        r5, r5, r3\n\t"
06177         "subs   r4, r4, r5\n\t"
06178         "movhi  %[r], %[one]\n\t"
06179         "movlo  %[r], r3\n\t"
06180         "movne  r3, r7\n\t"
06181         "ldr        r4, [%[a], #180]\n\t"
06182         "ldr        r5, [%[b], #180]\n\t"
06183         "and        r4, r4, r3\n\t"
06184         "and        r5, r5, r3\n\t"
06185         "subs   r4, r4, r5\n\t"
06186         "movhi  %[r], %[one]\n\t"
06187         "movlo  %[r], r3\n\t"
06188         "movne  r3, r7\n\t"
06189         "ldr        r4, [%[a], #176]\n\t"
06190         "ldr        r5, [%[b], #176]\n\t"
06191         "and        r4, r4, r3\n\t"
06192         "and        r5, r5, r3\n\t"
06193         "subs   r4, r4, r5\n\t"
06194         "movhi  %[r], %[one]\n\t"
06195         "movlo  %[r], r3\n\t"
06196         "movne  r3, r7\n\t"
06197         "ldr        r4, [%[a], #172]\n\t"
06198         "ldr        r5, [%[b], #172]\n\t"
06199         "and        r4, r4, r3\n\t"
06200         "and        r5, r5, r3\n\t"
06201         "subs   r4, r4, r5\n\t"
06202         "movhi  %[r], %[one]\n\t"
06203         "movlo  %[r], r3\n\t"
06204         "movne  r3, r7\n\t"
06205         "ldr        r4, [%[a], #168]\n\t"
06206         "ldr        r5, [%[b], #168]\n\t"
06207         "and        r4, r4, r3\n\t"
06208         "and        r5, r5, r3\n\t"
06209         "subs   r4, r4, r5\n\t"
06210         "movhi  %[r], %[one]\n\t"
06211         "movlo  %[r], r3\n\t"
06212         "movne  r3, r7\n\t"
06213         "ldr        r4, [%[a], #164]\n\t"
06214         "ldr        r5, [%[b], #164]\n\t"
06215         "and        r4, r4, r3\n\t"
06216         "and        r5, r5, r3\n\t"
06217         "subs   r4, r4, r5\n\t"
06218         "movhi  %[r], %[one]\n\t"
06219         "movlo  %[r], r3\n\t"
06220         "movne  r3, r7\n\t"
06221         "ldr        r4, [%[a], #160]\n\t"
06222         "ldr        r5, [%[b], #160]\n\t"
06223         "and        r4, r4, r3\n\t"
06224         "and        r5, r5, r3\n\t"
06225         "subs   r4, r4, r5\n\t"
06226         "movhi  %[r], %[one]\n\t"
06227         "movlo  %[r], r3\n\t"
06228         "movne  r3, r7\n\t"
06229         "ldr        r4, [%[a], #156]\n\t"
06230         "ldr        r5, [%[b], #156]\n\t"
06231         "and        r4, r4, r3\n\t"
06232         "and        r5, r5, r3\n\t"
06233         "subs   r4, r4, r5\n\t"
06234         "movhi  %[r], %[one]\n\t"
06235         "movlo  %[r], r3\n\t"
06236         "movne  r3, r7\n\t"
06237         "ldr        r4, [%[a], #152]\n\t"
06238         "ldr        r5, [%[b], #152]\n\t"
06239         "and        r4, r4, r3\n\t"
06240         "and        r5, r5, r3\n\t"
06241         "subs   r4, r4, r5\n\t"
06242         "movhi  %[r], %[one]\n\t"
06243         "movlo  %[r], r3\n\t"
06244         "movne  r3, r7\n\t"
06245         "ldr        r4, [%[a], #148]\n\t"
06246         "ldr        r5, [%[b], #148]\n\t"
06247         "and        r4, r4, r3\n\t"
06248         "and        r5, r5, r3\n\t"
06249         "subs   r4, r4, r5\n\t"
06250         "movhi  %[r], %[one]\n\t"
06251         "movlo  %[r], r3\n\t"
06252         "movne  r3, r7\n\t"
06253         "ldr        r4, [%[a], #144]\n\t"
06254         "ldr        r5, [%[b], #144]\n\t"
06255         "and        r4, r4, r3\n\t"
06256         "and        r5, r5, r3\n\t"
06257         "subs   r4, r4, r5\n\t"
06258         "movhi  %[r], %[one]\n\t"
06259         "movlo  %[r], r3\n\t"
06260         "movne  r3, r7\n\t"
06261         "ldr        r4, [%[a], #140]\n\t"
06262         "ldr        r5, [%[b], #140]\n\t"
06263         "and        r4, r4, r3\n\t"
06264         "and        r5, r5, r3\n\t"
06265         "subs   r4, r4, r5\n\t"
06266         "movhi  %[r], %[one]\n\t"
06267         "movlo  %[r], r3\n\t"
06268         "movne  r3, r7\n\t"
06269         "ldr        r4, [%[a], #136]\n\t"
06270         "ldr        r5, [%[b], #136]\n\t"
06271         "and        r4, r4, r3\n\t"
06272         "and        r5, r5, r3\n\t"
06273         "subs   r4, r4, r5\n\t"
06274         "movhi  %[r], %[one]\n\t"
06275         "movlo  %[r], r3\n\t"
06276         "movne  r3, r7\n\t"
06277         "ldr        r4, [%[a], #132]\n\t"
06278         "ldr        r5, [%[b], #132]\n\t"
06279         "and        r4, r4, r3\n\t"
06280         "and        r5, r5, r3\n\t"
06281         "subs   r4, r4, r5\n\t"
06282         "movhi  %[r], %[one]\n\t"
06283         "movlo  %[r], r3\n\t"
06284         "movne  r3, r7\n\t"
06285         "ldr        r4, [%[a], #128]\n\t"
06286         "ldr        r5, [%[b], #128]\n\t"
06287         "and        r4, r4, r3\n\t"
06288         "and        r5, r5, r3\n\t"
06289         "subs   r4, r4, r5\n\t"
06290         "movhi  %[r], %[one]\n\t"
06291         "movlo  %[r], r3\n\t"
06292         "movne  r3, r7\n\t"
06293         "ldr        r4, [%[a], #124]\n\t"
06294         "ldr        r5, [%[b], #124]\n\t"
06295         "and        r4, r4, r3\n\t"
06296         "and        r5, r5, r3\n\t"
06297         "subs   r4, r4, r5\n\t"
06298         "movhi  %[r], %[one]\n\t"
06299         "movlo  %[r], r3\n\t"
06300         "movne  r3, r7\n\t"
06301         "ldr        r4, [%[a], #120]\n\t"
06302         "ldr        r5, [%[b], #120]\n\t"
06303         "and        r4, r4, r3\n\t"
06304         "and        r5, r5, r3\n\t"
06305         "subs   r4, r4, r5\n\t"
06306         "movhi  %[r], %[one]\n\t"
06307         "movlo  %[r], r3\n\t"
06308         "movne  r3, r7\n\t"
06309         "ldr        r4, [%[a], #116]\n\t"
06310         "ldr        r5, [%[b], #116]\n\t"
06311         "and        r4, r4, r3\n\t"
06312         "and        r5, r5, r3\n\t"
06313         "subs   r4, r4, r5\n\t"
06314         "movhi  %[r], %[one]\n\t"
06315         "movlo  %[r], r3\n\t"
06316         "movne  r3, r7\n\t"
06317         "ldr        r4, [%[a], #112]\n\t"
06318         "ldr        r5, [%[b], #112]\n\t"
06319         "and        r4, r4, r3\n\t"
06320         "and        r5, r5, r3\n\t"
06321         "subs   r4, r4, r5\n\t"
06322         "movhi  %[r], %[one]\n\t"
06323         "movlo  %[r], r3\n\t"
06324         "movne  r3, r7\n\t"
06325         "ldr        r4, [%[a], #108]\n\t"
06326         "ldr        r5, [%[b], #108]\n\t"
06327         "and        r4, r4, r3\n\t"
06328         "and        r5, r5, r3\n\t"
06329         "subs   r4, r4, r5\n\t"
06330         "movhi  %[r], %[one]\n\t"
06331         "movlo  %[r], r3\n\t"
06332         "movne  r3, r7\n\t"
06333         "ldr        r4, [%[a], #104]\n\t"
06334         "ldr        r5, [%[b], #104]\n\t"
06335         "and        r4, r4, r3\n\t"
06336         "and        r5, r5, r3\n\t"
06337         "subs   r4, r4, r5\n\t"
06338         "movhi  %[r], %[one]\n\t"
06339         "movlo  %[r], r3\n\t"
06340         "movne  r3, r7\n\t"
06341         "ldr        r4, [%[a], #100]\n\t"
06342         "ldr        r5, [%[b], #100]\n\t"
06343         "and        r4, r4, r3\n\t"
06344         "and        r5, r5, r3\n\t"
06345         "subs   r4, r4, r5\n\t"
06346         "movhi  %[r], %[one]\n\t"
06347         "movlo  %[r], r3\n\t"
06348         "movne  r3, r7\n\t"
06349         "ldr        r4, [%[a], #96]\n\t"
06350         "ldr        r5, [%[b], #96]\n\t"
06351         "and        r4, r4, r3\n\t"
06352         "and        r5, r5, r3\n\t"
06353         "subs   r4, r4, r5\n\t"
06354         "movhi  %[r], %[one]\n\t"
06355         "movlo  %[r], r3\n\t"
06356         "movne  r3, r7\n\t"
06357         "ldr        r4, [%[a], #92]\n\t"
06358         "ldr        r5, [%[b], #92]\n\t"
06359         "and        r4, r4, r3\n\t"
06360         "and        r5, r5, r3\n\t"
06361         "subs   r4, r4, r5\n\t"
06362         "movhi  %[r], %[one]\n\t"
06363         "movlo  %[r], r3\n\t"
06364         "movne  r3, r7\n\t"
06365         "ldr        r4, [%[a], #88]\n\t"
06366         "ldr        r5, [%[b], #88]\n\t"
06367         "and        r4, r4, r3\n\t"
06368         "and        r5, r5, r3\n\t"
06369         "subs   r4, r4, r5\n\t"
06370         "movhi  %[r], %[one]\n\t"
06371         "movlo  %[r], r3\n\t"
06372         "movne  r3, r7\n\t"
06373         "ldr        r4, [%[a], #84]\n\t"
06374         "ldr        r5, [%[b], #84]\n\t"
06375         "and        r4, r4, r3\n\t"
06376         "and        r5, r5, r3\n\t"
06377         "subs   r4, r4, r5\n\t"
06378         "movhi  %[r], %[one]\n\t"
06379         "movlo  %[r], r3\n\t"
06380         "movne  r3, r7\n\t"
06381         "ldr        r4, [%[a], #80]\n\t"
06382         "ldr        r5, [%[b], #80]\n\t"
06383         "and        r4, r4, r3\n\t"
06384         "and        r5, r5, r3\n\t"
06385         "subs   r4, r4, r5\n\t"
06386         "movhi  %[r], %[one]\n\t"
06387         "movlo  %[r], r3\n\t"
06388         "movne  r3, r7\n\t"
06389         "ldr        r4, [%[a], #76]\n\t"
06390         "ldr        r5, [%[b], #76]\n\t"
06391         "and        r4, r4, r3\n\t"
06392         "and        r5, r5, r3\n\t"
06393         "subs   r4, r4, r5\n\t"
06394         "movhi  %[r], %[one]\n\t"
06395         "movlo  %[r], r3\n\t"
06396         "movne  r3, r7\n\t"
06397         "ldr        r4, [%[a], #72]\n\t"
06398         "ldr        r5, [%[b], #72]\n\t"
06399         "and        r4, r4, r3\n\t"
06400         "and        r5, r5, r3\n\t"
06401         "subs   r4, r4, r5\n\t"
06402         "movhi  %[r], %[one]\n\t"
06403         "movlo  %[r], r3\n\t"
06404         "movne  r3, r7\n\t"
06405         "ldr        r4, [%[a], #68]\n\t"
06406         "ldr        r5, [%[b], #68]\n\t"
06407         "and        r4, r4, r3\n\t"
06408         "and        r5, r5, r3\n\t"
06409         "subs   r4, r4, r5\n\t"
06410         "movhi  %[r], %[one]\n\t"
06411         "movlo  %[r], r3\n\t"
06412         "movne  r3, r7\n\t"
06413         "ldr        r4, [%[a], #64]\n\t"
06414         "ldr        r5, [%[b], #64]\n\t"
06415         "and        r4, r4, r3\n\t"
06416         "and        r5, r5, r3\n\t"
06417         "subs   r4, r4, r5\n\t"
06418         "movhi  %[r], %[one]\n\t"
06419         "movlo  %[r], r3\n\t"
06420         "movne  r3, r7\n\t"
06421         "ldr        r4, [%[a], #60]\n\t"
06422         "ldr        r5, [%[b], #60]\n\t"
06423         "and        r4, r4, r3\n\t"
06424         "and        r5, r5, r3\n\t"
06425         "subs   r4, r4, r5\n\t"
06426         "movhi  %[r], %[one]\n\t"
06427         "movlo  %[r], r3\n\t"
06428         "movne  r3, r7\n\t"
06429         "ldr        r4, [%[a], #56]\n\t"
06430         "ldr        r5, [%[b], #56]\n\t"
06431         "and        r4, r4, r3\n\t"
06432         "and        r5, r5, r3\n\t"
06433         "subs   r4, r4, r5\n\t"
06434         "movhi  %[r], %[one]\n\t"
06435         "movlo  %[r], r3\n\t"
06436         "movne  r3, r7\n\t"
06437         "ldr        r4, [%[a], #52]\n\t"
06438         "ldr        r5, [%[b], #52]\n\t"
06439         "and        r4, r4, r3\n\t"
06440         "and        r5, r5, r3\n\t"
06441         "subs   r4, r4, r5\n\t"
06442         "movhi  %[r], %[one]\n\t"
06443         "movlo  %[r], r3\n\t"
06444         "movne  r3, r7\n\t"
06445         "ldr        r4, [%[a], #48]\n\t"
06446         "ldr        r5, [%[b], #48]\n\t"
06447         "and        r4, r4, r3\n\t"
06448         "and        r5, r5, r3\n\t"
06449         "subs   r4, r4, r5\n\t"
06450         "movhi  %[r], %[one]\n\t"
06451         "movlo  %[r], r3\n\t"
06452         "movne  r3, r7\n\t"
06453         "ldr        r4, [%[a], #44]\n\t"
06454         "ldr        r5, [%[b], #44]\n\t"
06455         "and        r4, r4, r3\n\t"
06456         "and        r5, r5, r3\n\t"
06457         "subs   r4, r4, r5\n\t"
06458         "movhi  %[r], %[one]\n\t"
06459         "movlo  %[r], r3\n\t"
06460         "movne  r3, r7\n\t"
06461         "ldr        r4, [%[a], #40]\n\t"
06462         "ldr        r5, [%[b], #40]\n\t"
06463         "and        r4, r4, r3\n\t"
06464         "and        r5, r5, r3\n\t"
06465         "subs   r4, r4, r5\n\t"
06466         "movhi  %[r], %[one]\n\t"
06467         "movlo  %[r], r3\n\t"
06468         "movne  r3, r7\n\t"
06469         "ldr        r4, [%[a], #36]\n\t"
06470         "ldr        r5, [%[b], #36]\n\t"
06471         "and        r4, r4, r3\n\t"
06472         "and        r5, r5, r3\n\t"
06473         "subs   r4, r4, r5\n\t"
06474         "movhi  %[r], %[one]\n\t"
06475         "movlo  %[r], r3\n\t"
06476         "movne  r3, r7\n\t"
06477         "ldr        r4, [%[a], #32]\n\t"
06478         "ldr        r5, [%[b], #32]\n\t"
06479         "and        r4, r4, r3\n\t"
06480         "and        r5, r5, r3\n\t"
06481         "subs   r4, r4, r5\n\t"
06482         "movhi  %[r], %[one]\n\t"
06483         "movlo  %[r], r3\n\t"
06484         "movne  r3, r7\n\t"
06485         "ldr        r4, [%[a], #28]\n\t"
06486         "ldr        r5, [%[b], #28]\n\t"
06487         "and        r4, r4, r3\n\t"
06488         "and        r5, r5, r3\n\t"
06489         "subs   r4, r4, r5\n\t"
06490         "movhi  %[r], %[one]\n\t"
06491         "movlo  %[r], r3\n\t"
06492         "movne  r3, r7\n\t"
06493         "ldr        r4, [%[a], #24]\n\t"
06494         "ldr        r5, [%[b], #24]\n\t"
06495         "and        r4, r4, r3\n\t"
06496         "and        r5, r5, r3\n\t"
06497         "subs   r4, r4, r5\n\t"
06498         "movhi  %[r], %[one]\n\t"
06499         "movlo  %[r], r3\n\t"
06500         "movne  r3, r7\n\t"
06501         "ldr        r4, [%[a], #20]\n\t"
06502         "ldr        r5, [%[b], #20]\n\t"
06503         "and        r4, r4, r3\n\t"
06504         "and        r5, r5, r3\n\t"
06505         "subs   r4, r4, r5\n\t"
06506         "movhi  %[r], %[one]\n\t"
06507         "movlo  %[r], r3\n\t"
06508         "movne  r3, r7\n\t"
06509         "ldr        r4, [%[a], #16]\n\t"
06510         "ldr        r5, [%[b], #16]\n\t"
06511         "and        r4, r4, r3\n\t"
06512         "and        r5, r5, r3\n\t"
06513         "subs   r4, r4, r5\n\t"
06514         "movhi  %[r], %[one]\n\t"
06515         "movlo  %[r], r3\n\t"
06516         "movne  r3, r7\n\t"
06517         "ldr        r4, [%[a], #12]\n\t"
06518         "ldr        r5, [%[b], #12]\n\t"
06519         "and        r4, r4, r3\n\t"
06520         "and        r5, r5, r3\n\t"
06521         "subs   r4, r4, r5\n\t"
06522         "movhi  %[r], %[one]\n\t"
06523         "movlo  %[r], r3\n\t"
06524         "movne  r3, r7\n\t"
06525         "ldr        r4, [%[a], #8]\n\t"
06526         "ldr        r5, [%[b], #8]\n\t"
06527         "and        r4, r4, r3\n\t"
06528         "and        r5, r5, r3\n\t"
06529         "subs   r4, r4, r5\n\t"
06530         "movhi  %[r], %[one]\n\t"
06531         "movlo  %[r], r3\n\t"
06532         "movne  r3, r7\n\t"
06533         "ldr        r4, [%[a], #4]\n\t"
06534         "ldr        r5, [%[b], #4]\n\t"
06535         "and        r4, r4, r3\n\t"
06536         "and        r5, r5, r3\n\t"
06537         "subs   r4, r4, r5\n\t"
06538         "movhi  %[r], %[one]\n\t"
06539         "movlo  %[r], r3\n\t"
06540         "movne  r3, r7\n\t"
06541         "ldr        r4, [%[a], #0]\n\t"
06542         "ldr        r5, [%[b], #0]\n\t"
06543         "and        r4, r4, r3\n\t"
06544         "and        r5, r5, r3\n\t"
06545         "subs   r4, r4, r5\n\t"
06546         "movhi  %[r], %[one]\n\t"
06547         "movlo  %[r], r3\n\t"
06548         "movne  r3, r7\n\t"
06549         "eor    %[r], %[r], r3\n\t"
06550         : [r] "+r" (r)
06551         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
06552         : "r2", "r3", "r4", "r5", "r6", "r7"
06553     );
06554 #endif
06555 
06556     return r;
06557 }
06558 
06559 /* Divide d in a and put remainder into r (m*d + r = a)
06560  * m is not calculated as it is not needed at this time.
06561  *
06562  * a  Nmber to be divided.
06563  * d  Number to divide with.
06564  * m  Multiplier result.
06565  * r  Remainder from the division.
06566  * returns MP_OKAY indicating success.
06567  */
06568 static WC_INLINE int sp_2048_div_64(sp_digit* a, sp_digit* d, sp_digit* m,
06569         sp_digit* r)
06570 {
06571     sp_digit t1[128], t2[65];
06572     sp_digit div, r1;
06573     int i;
06574 
06575     (void)m;
06576 
06577     div = d[63];
06578     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
06579     for (i=63; i>=0; i--) {
06580         r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
06581 
06582         sp_2048_mul_d_64(t2, d, r1);
06583         t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
06584         t1[64 + i] -= t2[64];
06585         sp_2048_mask_64(t2, d, t1[64 + i]);
06586         t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
06587         sp_2048_mask_64(t2, d, t1[64 + i]);
06588         t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
06589     }
06590 
06591     r1 = sp_2048_cmp_64(t1, d) >= 0;
06592     sp_2048_cond_sub_64(r, t1, t2, (sp_digit)0 - r1);
06593 
06594     return MP_OKAY;
06595 }
06596 
06597 /* Reduce a modulo m into r. (r = a mod m)
06598  *
06599  * r  A single precision number that is the reduced result.
06600  * a  A single precision number that is to be reduced.
06601  * m  A single precision number that is the modulus to reduce with.
06602  * returns MP_OKAY indicating success.
06603  */
06604 static WC_INLINE int sp_2048_mod_64(sp_digit* r, sp_digit* a, sp_digit* m)
06605 {
06606     return sp_2048_div_64(a, m, NULL, r);
06607 }
06608 
06609 /* Divide d in a and put remainder into r (m*d + r = a)
06610  * m is not calculated as it is not needed at this time.
06611  *
06612  * a  Nmber to be divided.
06613  * d  Number to divide with.
06614  * m  Multiplier result.
06615  * r  Remainder from the division.
06616  * returns MP_OKAY indicating success.
06617  */
06618 static WC_INLINE int sp_2048_div_64_cond(sp_digit* a, sp_digit* d, sp_digit* m,
06619         sp_digit* r)
06620 {
06621     sp_digit t1[128], t2[65];
06622     sp_digit div, r1;
06623     int i;
06624 
06625     (void)m;
06626 
06627     div = d[63];
06628     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
06629     for (i=63; i>=0; i--) {
06630         r1 = div_2048_word_64(t1[64 + i], t1[64 + i - 1], div);
06631 
06632         sp_2048_mul_d_64(t2, d, r1);
06633         t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
06634         t1[64 + i] -= t2[64];
06635         if (t1[64 + i] != 0) {
06636             t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
06637             if (t1[64 + i] != 0)
06638                 t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
06639         }
06640     }
06641 
06642     r1 = sp_2048_cmp_64(t1, d) >= 0;
06643     sp_2048_cond_sub_64(r, t1, t2, (sp_digit)0 - r1);
06644 
06645     return MP_OKAY;
06646 }
06647 
06648 /* Reduce a modulo m into r. (r = a mod m)
06649  *
06650  * r  A single precision number that is the reduced result.
06651  * a  A single precision number that is to be reduced.
06652  * m  A single precision number that is the modulus to reduce with.
06653  * returns MP_OKAY indicating success.
06654  */
06655 static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, sp_digit* a, sp_digit* m)
06656 {
06657     return sp_2048_div_64_cond(a, m, NULL, r);
06658 }
06659 
06660 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
06661 #ifdef WOLFSSL_SP_SMALL
06662 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
06663  *
06664  * r     A single precision number that is the result of the operation.
06665  * a     A single precision number being exponentiated.
06666  * e     A single precision number that is the exponent.
06667  * bits  The number of bits in the exponent.
06668  * m     A single precision number that is the modulus.
06669  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
06670  */
06671 static int sp_2048_mod_exp_64(sp_digit* r, sp_digit* a, sp_digit* e,
06672         int bits, sp_digit* m, int reduceA)
06673 {
06674 #ifndef WOLFSSL_SMALL_STACK
06675     sp_digit t[16][128];
06676 #else
06677     sp_digit* t[16];
06678     sp_digit* td;
06679 #endif
06680     sp_digit* norm;
06681     sp_digit mp = 1;
06682     sp_digit n;
06683     sp_digit mask;
06684     int i;
06685     int c, y;
06686     int err = MP_OKAY;
06687 
06688 #ifdef WOLFSSL_SMALL_STACK
06689     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 128, NULL,
06690                             DYNAMIC_TYPE_TMP_BUFFER);
06691     if (td == NULL)
06692         err = MEMORY_E;
06693 
06694     if (err == MP_OKAY) {
06695         for (i=0; i<16; i++)
06696             t[i] = td + i * 128;
06697         norm = t[0];
06698     }
06699 #else
06700     norm = t[0];
06701 #endif
06702 
06703     if (err == MP_OKAY) {
06704         sp_2048_mont_setup(m, &mp);
06705         sp_2048_mont_norm_64(norm, m);
06706 
06707         XMEMSET(t[1], 0, sizeof(sp_digit) * 64);
06708         if (reduceA) {
06709             err = sp_2048_mod_64(t[1] + 64, a, m);
06710             if (err == MP_OKAY)
06711                 err = sp_2048_mod_64(t[1], t[1], m);
06712         }
06713         else {
06714             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
06715             err = sp_2048_mod_64(t[1], t[1], m);
06716         }
06717     }
06718 
06719     if (err == MP_OKAY) {
06720         sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
06721         sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
06722         sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
06723         sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
06724         sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
06725         sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
06726         sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
06727         sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
06728         sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
06729         sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
06730         sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
06731         sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
06732         sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
06733         sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
06734 
06735         i = (bits - 1) / 32;
06736         n = e[i--];
06737         y = n >> 28;
06738         n <<= 4;
06739         c = 28;
06740         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
06741         for (; i>=0 || c>=4; ) {
06742             if (c == 0) {
06743                 n = e[i--];
06744                 y = n >> 28;
06745                 n <<= 4;
06746                 c = 28;
06747             }
06748             else if (c < 4) {
06749                 y = n >> 28;
06750                 n = e[i--];
06751                 c = 4 - c;
06752                 y |= n >> (32 - c);
06753                 n <<= c;
06754                 c = 32 - c;
06755             }
06756             else {
06757                 y = (n >> 28) & 0xf;
06758                 n <<= 4;
06759                 c -= 4;
06760             }
06761 
06762             sp_2048_mont_sqr_64(r, r, m, mp);
06763             sp_2048_mont_sqr_64(r, r, m, mp);
06764             sp_2048_mont_sqr_64(r, r, m, mp);
06765             sp_2048_mont_sqr_64(r, r, m, mp);
06766 
06767             sp_2048_mont_mul_64(r, r, t[y], m, mp);
06768         }
06769 
06770         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
06771         sp_2048_mont_reduce_64(r, m, mp);
06772 
06773         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
06774         sp_2048_cond_sub_64(r, r, m, mask);
06775     }
06776 
06777 #ifdef WOLFSSL_SMALL_STACK
06778     if (td != NULL)
06779         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06780 #endif
06781 
06782     return err;
06783 }
06784 #else
06785 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
06786  *
06787  * r     A single precision number that is the result of the operation.
06788  * a     A single precision number being exponentiated.
06789  * e     A single precision number that is the exponent.
06790  * bits  The number of bits in the exponent.
06791  * m     A single precision number that is the modulus.
06792  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
06793  */
06794 static int sp_2048_mod_exp_64(sp_digit* r, sp_digit* a, sp_digit* e,
06795         int bits, sp_digit* m, int reduceA)
06796 {
06797 #ifndef WOLFSSL_SMALL_STACK
06798     sp_digit t[32][128];
06799 #else
06800     sp_digit* t[32];
06801     sp_digit* td;
06802 #endif
06803     sp_digit* norm;
06804     sp_digit mp = 1;
06805     sp_digit n;
06806     sp_digit mask;
06807     int i;
06808     int c, y;
06809     int err = MP_OKAY;
06810 
06811 #ifdef WOLFSSL_SMALL_STACK
06812     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 128, NULL,
06813                             DYNAMIC_TYPE_TMP_BUFFER);
06814     if (td == NULL)
06815         err = MEMORY_E;
06816 
06817     if (err == MP_OKAY) {
06818         for (i=0; i<32; i++)
06819             t[i] = td + i * 128;
06820         norm = t[0];
06821     }
06822 #else
06823     norm = t[0];
06824 #endif
06825 
06826     if (err == MP_OKAY) {
06827         sp_2048_mont_setup(m, &mp);
06828         sp_2048_mont_norm_64(norm, m);
06829 
06830         XMEMSET(t[1], 0, sizeof(sp_digit) * 64);
06831         if (reduceA) {
06832             err = sp_2048_mod_64(t[1] + 64, a, m);
06833             if (err == MP_OKAY)
06834                 err = sp_2048_mod_64(t[1], t[1], m);
06835         }
06836         else {
06837             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
06838             err = sp_2048_mod_64(t[1], t[1], m);
06839         }
06840     }
06841 
06842     if (err == MP_OKAY) {
06843         sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
06844         sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
06845         sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
06846         sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
06847         sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
06848         sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
06849         sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
06850         sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
06851         sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
06852         sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
06853         sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
06854         sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
06855         sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
06856         sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
06857         sp_2048_mont_sqr_64(t[16], t[ 8], m, mp);
06858         sp_2048_mont_mul_64(t[17], t[ 9], t[ 8], m, mp);
06859         sp_2048_mont_sqr_64(t[18], t[ 9], m, mp);
06860         sp_2048_mont_mul_64(t[19], t[10], t[ 9], m, mp);
06861         sp_2048_mont_sqr_64(t[20], t[10], m, mp);
06862         sp_2048_mont_mul_64(t[21], t[11], t[10], m, mp);
06863         sp_2048_mont_sqr_64(t[22], t[11], m, mp);
06864         sp_2048_mont_mul_64(t[23], t[12], t[11], m, mp);
06865         sp_2048_mont_sqr_64(t[24], t[12], m, mp);
06866         sp_2048_mont_mul_64(t[25], t[13], t[12], m, mp);
06867         sp_2048_mont_sqr_64(t[26], t[13], m, mp);
06868         sp_2048_mont_mul_64(t[27], t[14], t[13], m, mp);
06869         sp_2048_mont_sqr_64(t[28], t[14], m, mp);
06870         sp_2048_mont_mul_64(t[29], t[15], t[14], m, mp);
06871         sp_2048_mont_sqr_64(t[30], t[15], m, mp);
06872         sp_2048_mont_mul_64(t[31], t[16], t[15], m, mp);
06873 
06874         i = (bits - 1) / 32;
06875         n = e[i--];
06876         y = n >> 27;
06877         n <<= 5;
06878         c = 27;
06879         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
06880         for (; i>=0 || c>=5; ) {
06881             if (c == 0) {
06882                 n = e[i--];
06883                 y = n >> 27;
06884                 n <<= 5;
06885                 c = 27;
06886             }
06887             else if (c < 5) {
06888                 y = n >> 27;
06889                 n = e[i--];
06890                 c = 5 - c;
06891                 y |= n >> (32 - c);
06892                 n <<= c;
06893                 c = 32 - c;
06894             }
06895             else {
06896                 y = (n >> 27) & 0x1f;
06897                 n <<= 5;
06898                 c -= 5;
06899             }
06900 
06901             sp_2048_mont_sqr_64(r, r, m, mp);
06902             sp_2048_mont_sqr_64(r, r, m, mp);
06903             sp_2048_mont_sqr_64(r, r, m, mp);
06904             sp_2048_mont_sqr_64(r, r, m, mp);
06905             sp_2048_mont_sqr_64(r, r, m, mp);
06906 
06907             sp_2048_mont_mul_64(r, r, t[y], m, mp);
06908         }
06909         y = e[0] & 0x7;
06910         sp_2048_mont_sqr_64(r, r, m, mp);
06911         sp_2048_mont_sqr_64(r, r, m, mp);
06912         sp_2048_mont_sqr_64(r, r, m, mp);
06913         sp_2048_mont_mul_64(r, r, t[y], m, mp);
06914 
06915         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
06916         sp_2048_mont_reduce_64(r, m, mp);
06917 
06918         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
06919         sp_2048_cond_sub_64(r, r, m, mask);
06920     }
06921 
06922 #ifdef WOLFSSL_SMALL_STACK
06923     if (td != NULL)
06924         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
06925 #endif
06926 
06927     return err;
06928 }
06929 #endif /* WOLFSSL_SP_SMALL */
06930 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
06931 
06932 #ifdef WOLFSSL_HAVE_SP_RSA
06933 /* RSA public key operation.
06934  *
06935  * in      Array of bytes representing the number to exponentiate, base.
06936  * inLen   Number of bytes in base.
06937  * em      Public exponent.
06938  * mm      Modulus.
06939  * out     Buffer to hold big-endian bytes of exponentiation result.
06940  *         Must be at least 256 bytes long.
06941  * outLen  Number of bytes in result.
06942  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
06943  * an array is too long and MEMORY_E when dynamic memory allocation fails.
06944  */
06945 int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
06946     byte* out, word32* outLen)
06947 {
06948 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
06949     sp_digit ad[128], md[64], rd[128];
06950 #else
06951     sp_digit* d = NULL;
06952 #endif
06953     sp_digit* a;
06954     sp_digit *ah;
06955     sp_digit* m;
06956     sp_digit* r;
06957     sp_digit e[1];
06958     int err = MP_OKAY;
06959 
06960     if (*outLen < 256)
06961         err = MP_TO_E;
06962     if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 256 ||
06963                                                      mp_count_bits(mm) != 2048))
06964         err = MP_READ_E;
06965 
06966 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
06967     if (err == MP_OKAY) {
06968         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
06969                                DYNAMIC_TYPE_TMP_BUFFER);
06970         if (d == NULL)
06971             err = MEMORY_E;
06972     }
06973 
06974     if (err == MP_OKAY) {
06975         a = d;
06976         r = a + 64 * 2;
06977         m = r + 64 * 2;
06978         ah = a + 64;
06979     }
06980 #else
06981     a = ad;
06982     m = md;
06983     r = rd;
06984     ah = a + 64;
06985 #endif
06986 
06987     if (err == MP_OKAY) {
06988         sp_2048_from_bin(ah, 64, in, inLen);
06989 #if DIGIT_BIT >= 32
06990         e[0] = em->dp[0];
06991 #else
06992         e[0] = em->dp[0];
06993         if (em->used > 1)
06994             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
06995 #endif
06996         if (e[0] == 0)
06997             err = MP_EXPTMOD_E;
06998     }
06999     if (err == MP_OKAY) {
07000         sp_2048_from_mp(m, 64, mm);
07001 
07002         if (e[0] == 0x3) {
07003             if (err == MP_OKAY) {
07004                 sp_2048_sqr_64(r, ah);
07005                 err = sp_2048_mod_64_cond(r, r, m);
07006             }
07007             if (err == MP_OKAY) {
07008                 sp_2048_mul_64(r, ah, r);
07009                 err = sp_2048_mod_64_cond(r, r, m);
07010             }
07011         }
07012         else {
07013             int i;
07014             sp_digit mp;
07015 
07016             sp_2048_mont_setup(m, &mp);
07017 
07018             /* Convert to Montgomery form. */
07019             XMEMSET(a, 0, sizeof(sp_digit) * 64);
07020             err = sp_2048_mod_64_cond(a, a, m);
07021 
07022             if (err == MP_OKAY) {
07023                 for (i=31; i>=0; i--)
07024                     if (e[0] >> i)
07025                         break;
07026 
07027                 XMEMCPY(r, a, sizeof(sp_digit) * 64);
07028                 for (i--; i>=0; i--) {
07029                     sp_2048_mont_sqr_64(r, r, m, mp);
07030                     if (((e[0] >> i) & 1) == 1)
07031                         sp_2048_mont_mul_64(r, r, a, m, mp);
07032                 }
07033                 XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
07034                 sp_2048_mont_reduce_64(r, m, mp);
07035 
07036                 for (i = 63; i > 0; i--) {
07037                     if (r[i] != m[i])
07038                         break;
07039                 }
07040                 if (r[i] >= m[i])
07041                     sp_2048_sub_in_place_64(r, m);
07042             }
07043         }
07044     }
07045 
07046     if (err == MP_OKAY) {
07047         sp_2048_to_bin(r, out);
07048         *outLen = 256;
07049     }
07050 
07051 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07052     if (d != NULL)
07053         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07054 #endif
07055 
07056     return err;
07057 }
07058 
07059 /* RSA private key operation.
07060  *
07061  * in      Array of bytes representing the number to exponentiate, base.
07062  * inLen   Number of bytes in base.
07063  * dm      Private exponent.
07064  * pm      First prime.
07065  * qm      Second prime.
07066  * dpm     First prime's CRT exponent.
07067  * dqm     Second prime's CRT exponent.
07068  * qim     Inverse of second prime mod p.
07069  * mm      Modulus.
07070  * out     Buffer to hold big-endian bytes of exponentiation result.
07071  *         Must be at least 256 bytes long.
07072  * outLen  Number of bytes in result.
07073  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
07074  * an array is too long and MEMORY_E when dynamic memory allocation fails.
07075  */
07076 int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
07077     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
07078     byte* out, word32* outLen)
07079 {
07080 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
07081     sp_digit ad[64 * 2];
07082     sp_digit pd[32], qd[32], dpd[32];
07083     sp_digit tmpad[64], tmpbd[64];
07084 #else
07085     sp_digit* t = NULL;
07086 #endif
07087     sp_digit* a;
07088     sp_digit* p;
07089     sp_digit* q;
07090     sp_digit* dp;
07091     sp_digit* dq;
07092     sp_digit* qi;
07093     sp_digit* tmp;
07094     sp_digit* tmpa;
07095     sp_digit* tmpb;
07096     sp_digit* r;
07097     sp_digit c;
07098     int err = MP_OKAY;
07099 
07100     (void)dm;
07101     (void)mm;
07102 
07103     if (*outLen < 256)
07104         err = MP_TO_E;
07105     if (err == MP_OKAY && (inLen > 256 || mp_count_bits(mm) != 2048))
07106         err = MP_READ_E;
07107 
07108 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07109     if (err == MP_OKAY) {
07110         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
07111                                DYNAMIC_TYPE_TMP_BUFFER);
07112         if (t == NULL)
07113             err = MEMORY_E;
07114     }
07115     if (err == MP_OKAY) {
07116         a = t;
07117         p = a + 64 * 2;
07118         q = p + 32;
07119         qi = dq = dp = q + 32;
07120         tmpa = qi + 32;
07121         tmpb = tmpa + 64;
07122 
07123         tmp = t;
07124         r = tmp + 64;
07125     }
07126 #else
07127     r = a = ad;
07128     p = pd;
07129     q = qd;
07130     qi = dq = dp = dpd;
07131     tmpa = tmpad;
07132     tmpb = tmpbd;
07133     tmp = a + 64;
07134 #endif
07135 
07136     if (err == MP_OKAY) {
07137         sp_2048_from_bin(a, 64, in, inLen);
07138         sp_2048_from_mp(p, 32, pm);
07139         sp_2048_from_mp(q, 32, qm);
07140         sp_2048_from_mp(dp, 32, dpm);
07141 
07142         err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
07143     }
07144     if (err == MP_OKAY) {
07145         sp_2048_from_mp(dq, 32, dqm);
07146         err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
07147     }
07148 
07149     if (err == MP_OKAY) {
07150         c = sp_2048_sub_in_place_32(tmpa, tmpb);
07151         sp_2048_mask_32(tmp, p, c);
07152         sp_2048_add_32(tmpa, tmpa, tmp);
07153 
07154         sp_2048_from_mp(qi, 32, qim);
07155         sp_2048_mul_32(tmpa, tmpa, qi);
07156         err = sp_2048_mod_32(tmpa, tmpa, p);
07157     }
07158 
07159     if (err == MP_OKAY) {
07160         sp_2048_mul_32(tmpa, q, tmpa);
07161         XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
07162         sp_2048_add_64(r, tmpb, tmpa);
07163 
07164         sp_2048_to_bin(r, out);
07165         *outLen = 256;
07166     }
07167 
07168 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
07169     if (t != NULL) {
07170         XMEMSET(t, 0, sizeof(sp_digit) * 32 * 11);
07171         XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
07172     }
07173 #else
07174     XMEMSET(tmpad, 0, sizeof(tmpad));
07175     XMEMSET(tmpbd, 0, sizeof(tmpbd));
07176     XMEMSET(pd, 0, sizeof(pd));
07177     XMEMSET(qd, 0, sizeof(qd));
07178     XMEMSET(dpd, 0, sizeof(dpd));
07179 #endif
07180 
07181     return err;
07182 }
07183 #endif /* WOLFSSL_HAVE_SP_RSA */
07184 #ifdef WOLFSSL_HAVE_SP_DH
07185 /* Convert an array of sp_digit to an mp_int.
07186  *
07187  * a  A single precision integer.
07188  * r  A multi-precision integer.
07189  */
07190 static int sp_2048_to_mp(sp_digit* a, mp_int* r)
07191 {
07192     int err;
07193 
07194     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
07195     if (err == MP_OKAY) {
07196 #if DIGIT_BIT == 32
07197         XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
07198         r->used = 64;
07199         mp_clamp(r);
07200 #elif DIGIT_BIT < 32
07201         int i, j = 0, s = 0;
07202 
07203         r->dp[0] = 0;
07204         for (i = 0; i < 64; i++) {
07205             r->dp[j] |= a[i] << s;
07206             r->dp[j] &= (1l << DIGIT_BIT) - 1;
07207             s = DIGIT_BIT - s;
07208             r->dp[++j] = a[i] >> s;
07209             while (s + DIGIT_BIT <= 32) {
07210                 s += DIGIT_BIT;
07211                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
07212                 r->dp[++j] = a[i] >> s;
07213             }
07214             s = 32 - s;
07215         }
07216         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
07217         mp_clamp(r);
07218 #else
07219         int i, j = 0, s = 0;
07220 
07221         r->dp[0] = 0;
07222         for (i = 0; i < 64; i++) {
07223             r->dp[j] |= ((mp_digit)a[i]) << s;
07224             if (s + 32 >= DIGIT_BIT) {
07225     #if DIGIT_BIT < 32
07226                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
07227     #endif
07228                 s = DIGIT_BIT - s;
07229                 r->dp[++j] = a[i] >> s;
07230                 s = 32 - s;
07231             }
07232             else
07233                 s += 32;
07234         }
07235         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
07236         mp_clamp(r);
07237 #endif
07238     }
07239 
07240     return err;
07241 }
07242 
07243 /* Perform the modular exponentiation for Diffie-Hellman.
07244  *
07245  * base  Base. MP integer.
07246  * exp   Exponent. MP integer.
07247  * mod   Modulus. MP integer.
07248  * res   Result. MP integer.
07249  * returs 0 on success, MP_READ_E if there are too many bytes in an array
07250  * and MEMORY_E if memory allocation fails.
07251  */
07252 int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
07253 {
07254     int err = MP_OKAY;
07255     sp_digit b[128], e[64], m[64];
07256     sp_digit* r = b;
07257     int expBits = mp_count_bits(exp);
07258 
07259     if (mp_count_bits(base) > 2048 || expBits > 2048 ||
07260                                                    mp_count_bits(mod) != 2048) {
07261         err = MP_READ_E;
07262     }
07263 
07264     if (err == MP_OKAY) {
07265         sp_2048_from_mp(b, 64, base);
07266         sp_2048_from_mp(e, 64, exp);
07267         sp_2048_from_mp(m, 64, mod);
07268 
07269         err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
07270     }
07271 
07272     if (err == MP_OKAY) {
07273         err = sp_2048_to_mp(r, res);
07274     }
07275 
07276     XMEMSET(e, 0, sizeof(e));
07277 
07278     return err;
07279 }
07280 
07281 /* Perform the modular exponentiation for Diffie-Hellman.
07282  *
07283  * base     Base.
07284  * exp      Array of bytes that is the exponent.
07285  * expLen   Length of data, in bytes, in exponent.
07286  * mod      Modulus.
07287  * out      Buffer to hold big-endian bytes of exponentiation result.
07288  *          Must be at least 256 bytes long.
07289  * outLen   Length, in bytes, of exponentiation result.
07290  * returs 0 on success, MP_READ_E if there are too many bytes in an array
07291  * and MEMORY_E if memory allocation fails.
07292  */
07293 int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
07294     mp_int* mod, byte* out, word32* outLen)
07295 {
07296     int err = MP_OKAY;
07297     sp_digit b[128], e[64], m[64];
07298     sp_digit* r = b;
07299     word32 i;
07300 
07301     if (mp_count_bits(base) > 2048 || expLen > 256 ||
07302                                                    mp_count_bits(mod) != 2048) {
07303         err = MP_READ_E;
07304     }
07305 
07306     if (err == MP_OKAY) {
07307         sp_2048_from_mp(b, 64, base);
07308         sp_2048_from_bin(e, 64, exp, expLen);
07309         sp_2048_from_mp(m, 64, mod);
07310 
07311         err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
07312     }
07313 
07314     if (err == MP_OKAY) {
07315         sp_2048_to_bin(r, out);
07316         *outLen = 256;
07317         for (i=0; i<256 && out[i] == 0; i++) {
07318         }
07319         *outLen -= i;
07320         XMEMMOVE(out, out + i, *outLen);
07321 
07322     }
07323 
07324     XMEMSET(e, 0, sizeof(e));
07325 
07326     return err;
07327 }
07328 
07329 #endif /* WOLFSSL_HAVE_SP_DH */
07330 
07331 #endif /* WOLFSSL_SP_NO_2048 */
07332 
07333 #ifndef WOLFSSL_SP_NO_3072
07334 /* Read big endian unsigned byte aray into r.
07335  *
07336  * r  A single precision integer.
07337  * a  Byte array.
07338  * n  Number of bytes in array to read.
07339  */
07340 static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n)
07341 {
07342     int i, j = 0, s = 0;
07343 
07344     r[0] = 0;
07345     for (i = n-1; i >= 0; i--) {
07346         r[j] |= ((sp_digit)a[i]) << s;
07347         if (s >= 24) {
07348             r[j] &= 0xffffffff;
07349             s = 32 - s;
07350             if (j + 1 >= max)
07351                 break;
07352             r[++j] = a[i] >> s;
07353             s = 8 - s;
07354         }
07355         else
07356             s += 8;
07357     }
07358 
07359     for (j++; j < max; j++)
07360         r[j] = 0;
07361 }
07362 
07363 /* Convert an mp_int to an array of sp_digit.
07364  *
07365  * r  A single precision integer.
07366  * a  A multi-precision integer.
07367  */
07368 static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a)
07369 {
07370 #if DIGIT_BIT == 32
07371     int j;
07372 
07373     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
07374 
07375     for (j = a->used; j < max; j++)
07376         r[j] = 0;
07377 #elif DIGIT_BIT > 32
07378     int i, j = 0, s = 0;
07379 
07380     r[0] = 0;
07381     for (i = 0; i < a->used && j < max; i++) {
07382         r[j] |= a->dp[i] << s;
07383         r[j] &= 0xffffffff;
07384         s = 32 - s;
07385         if (j + 1 >= max)
07386             break;
07387         r[++j] = a->dp[i] >> s;
07388         while (s + 32 <= DIGIT_BIT) {
07389             s += 32;
07390             r[j] &= 0xffffffff;
07391             if (j + 1 >= max)
07392                 break;
07393             if (s < DIGIT_BIT)
07394                 r[++j] = a->dp[i] >> s;
07395             else
07396                 r[++j] = 0;
07397         }
07398         s = DIGIT_BIT - s;
07399     }
07400 
07401     for (j++; j < max; j++)
07402         r[j] = 0;
07403 #else
07404     int i, j = 0, s = 0;
07405 
07406     r[0] = 0;
07407     for (i = 0; i < a->used && j < max; i++) {
07408         r[j] |= ((sp_digit)a->dp[i]) << s;
07409         if (s + DIGIT_BIT >= 32) {
07410             r[j] &= 0xffffffff;
07411             if (j + 1 >= max)
07412                 break;
07413             s = 32 - s;
07414             if (s == DIGIT_BIT) {
07415                 r[++j] = 0;
07416                 s = 0;
07417             }
07418             else {
07419                 r[++j] = a->dp[i] >> s;
07420                 s = DIGIT_BIT - s;
07421             }
07422         }
07423         else
07424             s += DIGIT_BIT;
07425     }
07426 
07427     for (j++; j < max; j++)
07428         r[j] = 0;
07429 #endif
07430 }
07431 
07432 /* Write r as big endian to byte aray.
07433  * Fixed length number of bytes written: 384
07434  *
07435  * r  A single precision integer.
07436  * a  Byte array.
07437  */
07438 static void sp_3072_to_bin(sp_digit* r, byte* a)
07439 {
07440     int i, j, s = 0, b;
07441 
07442     j = 3072 / 8 - 1;
07443     a[j] = 0;
07444     for (i=0; i<96 && j>=0; i++) {
07445         b = 0;
07446         a[j--] |= r[i] << s; b += 8 - s;
07447         if (j < 0)
07448             break;
07449         while (b < 32) {
07450             a[j--] = r[i] >> b; b += 8;
07451             if (j < 0)
07452                 break;
07453         }
07454         s = 8 - (b - 32);
07455         if (j >= 0)
07456             a[j] = 0;
07457         if (s != 0)
07458             j++;
07459     }
07460 }
07461 
07462 #ifndef WOLFSSL_SP_SMALL
07463 /* Multiply a and b into r. (r = a * b)
07464  *
07465  * r  A single precision integer.
07466  * a  A single precision integer.
07467  * b  A single precision integer.
07468  */
07469 static void sp_3072_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
07470 {
07471     sp_digit tmp[8];
07472 
07473     __asm__ __volatile__ (
07474         "mov    r10, #0\n\t"
07475         "#  A[0] * B[0]\n\t"
07476         "ldr    r8, [%[a], #0]\n\t"
07477         "ldr    r9, [%[b], #0]\n\t"
07478         "umull  r3, r4, r8, r9\n\t"
07479         "mov    r5, #0\n\t"
07480         "str    r3, [%[tmp]]\n\t"
07481         "#  A[0] * B[1]\n\t"
07482         "ldr    r8, [%[a], #0]\n\t"
07483         "ldr    r9, [%[b], #4]\n\t"
07484         "umull  r6, r7, r8, r9\n\t"
07485         "adds   r4, r4, r6\n\t"
07486         "adcs   r5, r5, r7\n\t"
07487         "adc    r3, r10, r10\n\t"
07488         "#  A[1] * B[0]\n\t"
07489         "ldr    r8, [%[a], #4]\n\t"
07490         "ldr    r9, [%[b], #0]\n\t"
07491         "umull  r6, r7, r8, r9\n\t"
07492         "adds   r4, r4, r6\n\t"
07493         "adcs   r5, r5, r7\n\t"
07494         "adc    r3, r3, r10\n\t"
07495         "str    r4, [%[tmp], #4]\n\t"
07496         "#  A[0] * B[2]\n\t"
07497         "ldr    r8, [%[a], #0]\n\t"
07498         "ldr    r9, [%[b], #8]\n\t"
07499         "umull  r6, r7, r8, r9\n\t"
07500         "adds   r5, r5, r6\n\t"
07501         "adcs   r3, r3, r7\n\t"
07502         "adc    r4, r10, r10\n\t"
07503         "#  A[1] * B[1]\n\t"
07504         "ldr    r8, [%[a], #4]\n\t"
07505         "ldr    r9, [%[b], #4]\n\t"
07506         "umull  r6, r7, r8, r9\n\t"
07507         "adds   r5, r5, r6\n\t"
07508         "adcs   r3, r3, r7\n\t"
07509         "adc    r4, r4, r10\n\t"
07510         "#  A[2] * B[0]\n\t"
07511         "ldr    r8, [%[a], #8]\n\t"
07512         "ldr    r9, [%[b], #0]\n\t"
07513         "umull  r6, r7, r8, r9\n\t"
07514         "adds   r5, r5, r6\n\t"
07515         "adcs   r3, r3, r7\n\t"
07516         "adc    r4, r4, r10\n\t"
07517         "str    r5, [%[tmp], #8]\n\t"
07518         "#  A[0] * B[3]\n\t"
07519         "ldr    r8, [%[a], #0]\n\t"
07520         "ldr    r9, [%[b], #12]\n\t"
07521         "umull  r6, r7, r8, r9\n\t"
07522         "adds   r3, r3, r6\n\t"
07523         "adcs   r4, r4, r7\n\t"
07524         "adc    r5, r10, r10\n\t"
07525         "#  A[1] * B[2]\n\t"
07526         "ldr    r8, [%[a], #4]\n\t"
07527         "ldr    r9, [%[b], #8]\n\t"
07528         "umull  r6, r7, r8, r9\n\t"
07529         "adds   r3, r3, r6\n\t"
07530         "adcs   r4, r4, r7\n\t"
07531         "adc    r5, r5, r10\n\t"
07532         "#  A[2] * B[1]\n\t"
07533         "ldr    r8, [%[a], #8]\n\t"
07534         "ldr    r9, [%[b], #4]\n\t"
07535         "umull  r6, r7, r8, r9\n\t"
07536         "adds   r3, r3, r6\n\t"
07537         "adcs   r4, r4, r7\n\t"
07538         "adc    r5, r5, r10\n\t"
07539         "#  A[3] * B[0]\n\t"
07540         "ldr    r8, [%[a], #12]\n\t"
07541         "ldr    r9, [%[b], #0]\n\t"
07542         "umull  r6, r7, r8, r9\n\t"
07543         "adds   r3, r3, r6\n\t"
07544         "adcs   r4, r4, r7\n\t"
07545         "adc    r5, r5, r10\n\t"
07546         "str    r3, [%[tmp], #12]\n\t"
07547         "#  A[0] * B[4]\n\t"
07548         "ldr    r8, [%[a], #0]\n\t"
07549         "ldr    r9, [%[b], #16]\n\t"
07550         "umull  r6, r7, r8, r9\n\t"
07551         "adds   r4, r4, r6\n\t"
07552         "adcs   r5, r5, r7\n\t"
07553         "adc    r3, r10, r10\n\t"
07554         "#  A[1] * B[3]\n\t"
07555         "ldr    r8, [%[a], #4]\n\t"
07556         "ldr    r9, [%[b], #12]\n\t"
07557         "umull  r6, r7, r8, r9\n\t"
07558         "adds   r4, r4, r6\n\t"
07559         "adcs   r5, r5, r7\n\t"
07560         "adc    r3, r3, r10\n\t"
07561         "#  A[2] * B[2]\n\t"
07562         "ldr    r8, [%[a], #8]\n\t"
07563         "ldr    r9, [%[b], #8]\n\t"
07564         "umull  r6, r7, r8, r9\n\t"
07565         "adds   r4, r4, r6\n\t"
07566         "adcs   r5, r5, r7\n\t"
07567         "adc    r3, r3, r10\n\t"
07568         "#  A[3] * B[1]\n\t"
07569         "ldr    r8, [%[a], #12]\n\t"
07570         "ldr    r9, [%[b], #4]\n\t"
07571         "umull  r6, r7, r8, r9\n\t"
07572         "adds   r4, r4, r6\n\t"
07573         "adcs   r5, r5, r7\n\t"
07574         "adc    r3, r3, r10\n\t"
07575         "#  A[4] * B[0]\n\t"
07576         "ldr    r8, [%[a], #16]\n\t"
07577         "ldr    r9, [%[b], #0]\n\t"
07578         "umull  r6, r7, r8, r9\n\t"
07579         "adds   r4, r4, r6\n\t"
07580         "adcs   r5, r5, r7\n\t"
07581         "adc    r3, r3, r10\n\t"
07582         "str    r4, [%[tmp], #16]\n\t"
07583         "#  A[0] * B[5]\n\t"
07584         "ldr    r8, [%[a], #0]\n\t"
07585         "ldr    r9, [%[b], #20]\n\t"
07586         "umull  r6, r7, r8, r9\n\t"
07587         "adds   r5, r5, r6\n\t"
07588         "adcs   r3, r3, r7\n\t"
07589         "adc    r4, r10, r10\n\t"
07590         "#  A[1] * B[4]\n\t"
07591         "ldr    r8, [%[a], #4]\n\t"
07592         "ldr    r9, [%[b], #16]\n\t"
07593         "umull  r6, r7, r8, r9\n\t"
07594         "adds   r5, r5, r6\n\t"
07595         "adcs   r3, r3, r7\n\t"
07596         "adc    r4, r4, r10\n\t"
07597         "#  A[2] * B[3]\n\t"
07598         "ldr    r8, [%[a], #8]\n\t"
07599         "ldr    r9, [%[b], #12]\n\t"
07600         "umull  r6, r7, r8, r9\n\t"
07601         "adds   r5, r5, r6\n\t"
07602         "adcs   r3, r3, r7\n\t"
07603         "adc    r4, r4, r10\n\t"
07604         "#  A[3] * B[2]\n\t"
07605         "ldr    r8, [%[a], #12]\n\t"
07606         "ldr    r9, [%[b], #8]\n\t"
07607         "umull  r6, r7, r8, r9\n\t"
07608         "adds   r5, r5, r6\n\t"
07609         "adcs   r3, r3, r7\n\t"
07610         "adc    r4, r4, r10\n\t"
07611         "#  A[4] * B[1]\n\t"
07612         "ldr    r8, [%[a], #16]\n\t"
07613         "ldr    r9, [%[b], #4]\n\t"
07614         "umull  r6, r7, r8, r9\n\t"
07615         "adds   r5, r5, r6\n\t"
07616         "adcs   r3, r3, r7\n\t"
07617         "adc    r4, r4, r10\n\t"
07618         "#  A[5] * B[0]\n\t"
07619         "ldr    r8, [%[a], #20]\n\t"
07620         "ldr    r9, [%[b], #0]\n\t"
07621         "umull  r6, r7, r8, r9\n\t"
07622         "adds   r5, r5, r6\n\t"
07623         "adcs   r3, r3, r7\n\t"
07624         "adc    r4, r4, r10\n\t"
07625         "str    r5, [%[tmp], #20]\n\t"
07626         "#  A[0] * B[6]\n\t"
07627         "ldr    r8, [%[a], #0]\n\t"
07628         "ldr    r9, [%[b], #24]\n\t"
07629         "umull  r6, r7, r8, r9\n\t"
07630         "adds   r3, r3, r6\n\t"
07631         "adcs   r4, r4, r7\n\t"
07632         "adc    r5, r10, r10\n\t"
07633         "#  A[1] * B[5]\n\t"
07634         "ldr    r8, [%[a], #4]\n\t"
07635         "ldr    r9, [%[b], #20]\n\t"
07636         "umull  r6, r7, r8, r9\n\t"
07637         "adds   r3, r3, r6\n\t"
07638         "adcs   r4, r4, r7\n\t"
07639         "adc    r5, r5, r10\n\t"
07640         "#  A[2] * B[4]\n\t"
07641         "ldr    r8, [%[a], #8]\n\t"
07642         "ldr    r9, [%[b], #16]\n\t"
07643         "umull  r6, r7, r8, r9\n\t"
07644         "adds   r3, r3, r6\n\t"
07645         "adcs   r4, r4, r7\n\t"
07646         "adc    r5, r5, r10\n\t"
07647         "#  A[3] * B[3]\n\t"
07648         "ldr    r8, [%[a], #12]\n\t"
07649         "ldr    r9, [%[b], #12]\n\t"
07650         "umull  r6, r7, r8, r9\n\t"
07651         "adds   r3, r3, r6\n\t"
07652         "adcs   r4, r4, r7\n\t"
07653         "adc    r5, r5, r10\n\t"
07654         "#  A[4] * B[2]\n\t"
07655         "ldr    r8, [%[a], #16]\n\t"
07656         "ldr    r9, [%[b], #8]\n\t"
07657         "umull  r6, r7, r8, r9\n\t"
07658         "adds   r3, r3, r6\n\t"
07659         "adcs   r4, r4, r7\n\t"
07660         "adc    r5, r5, r10\n\t"
07661         "#  A[5] * B[1]\n\t"
07662         "ldr    r8, [%[a], #20]\n\t"
07663         "ldr    r9, [%[b], #4]\n\t"
07664         "umull  r6, r7, r8, r9\n\t"
07665         "adds   r3, r3, r6\n\t"
07666         "adcs   r4, r4, r7\n\t"
07667         "adc    r5, r5, r10\n\t"
07668         "#  A[6] * B[0]\n\t"
07669         "ldr    r8, [%[a], #24]\n\t"
07670         "ldr    r9, [%[b], #0]\n\t"
07671         "umull  r6, r7, r8, r9\n\t"
07672         "adds   r3, r3, r6\n\t"
07673         "adcs   r4, r4, r7\n\t"
07674         "adc    r5, r5, r10\n\t"
07675         "str    r3, [%[tmp], #24]\n\t"
07676         "#  A[0] * B[7]\n\t"
07677         "ldr    r8, [%[a], #0]\n\t"
07678         "ldr    r9, [%[b], #28]\n\t"
07679         "umull  r6, r7, r8, r9\n\t"
07680         "adds   r4, r4, r6\n\t"
07681         "adcs   r5, r5, r7\n\t"
07682         "adc    r3, r10, r10\n\t"
07683         "#  A[1] * B[6]\n\t"
07684         "ldr    r8, [%[a], #4]\n\t"
07685         "ldr    r9, [%[b], #24]\n\t"
07686         "umull  r6, r7, r8, r9\n\t"
07687         "adds   r4, r4, r6\n\t"
07688         "adcs   r5, r5, r7\n\t"
07689         "adc    r3, r3, r10\n\t"
07690         "#  A[2] * B[5]\n\t"
07691         "ldr    r8, [%[a], #8]\n\t"
07692         "ldr    r9, [%[b], #20]\n\t"
07693         "umull  r6, r7, r8, r9\n\t"
07694         "adds   r4, r4, r6\n\t"
07695         "adcs   r5, r5, r7\n\t"
07696         "adc    r3, r3, r10\n\t"
07697         "#  A[3] * B[4]\n\t"
07698         "ldr    r8, [%[a], #12]\n\t"
07699         "ldr    r9, [%[b], #16]\n\t"
07700         "umull  r6, r7, r8, r9\n\t"
07701         "adds   r4, r4, r6\n\t"
07702         "adcs   r5, r5, r7\n\t"
07703         "adc    r3, r3, r10\n\t"
07704         "#  A[4] * B[3]\n\t"
07705         "ldr    r8, [%[a], #16]\n\t"
07706         "ldr    r9, [%[b], #12]\n\t"
07707         "umull  r6, r7, r8, r9\n\t"
07708         "adds   r4, r4, r6\n\t"
07709         "adcs   r5, r5, r7\n\t"
07710         "adc    r3, r3, r10\n\t"
07711         "#  A[5] * B[2]\n\t"
07712         "ldr    r8, [%[a], #20]\n\t"
07713         "ldr    r9, [%[b], #8]\n\t"
07714         "umull  r6, r7, r8, r9\n\t"
07715         "adds   r4, r4, r6\n\t"
07716         "adcs   r5, r5, r7\n\t"
07717         "adc    r3, r3, r10\n\t"
07718         "#  A[6] * B[1]\n\t"
07719         "ldr    r8, [%[a], #24]\n\t"
07720         "ldr    r9, [%[b], #4]\n\t"
07721         "umull  r6, r7, r8, r9\n\t"
07722         "adds   r4, r4, r6\n\t"
07723         "adcs   r5, r5, r7\n\t"
07724         "adc    r3, r3, r10\n\t"
07725         "#  A[7] * B[0]\n\t"
07726         "ldr    r8, [%[a], #28]\n\t"
07727         "ldr    r9, [%[b], #0]\n\t"
07728         "umull  r6, r7, r8, r9\n\t"
07729         "adds   r4, r4, r6\n\t"
07730         "adcs   r5, r5, r7\n\t"
07731         "adc    r3, r3, r10\n\t"
07732         "str    r4, [%[tmp], #28]\n\t"
07733         "#  A[1] * B[7]\n\t"
07734         "ldr    r8, [%[a], #4]\n\t"
07735         "ldr    r9, [%[b], #28]\n\t"
07736         "umull  r6, r7, r8, r9\n\t"
07737         "adds   r5, r5, r6\n\t"
07738         "adcs   r3, r3, r7\n\t"
07739         "adc    r4, r10, r10\n\t"
07740         "#  A[2] * B[6]\n\t"
07741         "ldr    r8, [%[a], #8]\n\t"
07742         "ldr    r9, [%[b], #24]\n\t"
07743         "umull  r6, r7, r8, r9\n\t"
07744         "adds   r5, r5, r6\n\t"
07745         "adcs   r3, r3, r7\n\t"
07746         "adc    r4, r4, r10\n\t"
07747         "#  A[3] * B[5]\n\t"
07748         "ldr    r8, [%[a], #12]\n\t"
07749         "ldr    r9, [%[b], #20]\n\t"
07750         "umull  r6, r7, r8, r9\n\t"
07751         "adds   r5, r5, r6\n\t"
07752         "adcs   r3, r3, r7\n\t"
07753         "adc    r4, r4, r10\n\t"
07754         "#  A[4] * B[4]\n\t"
07755         "ldr    r8, [%[a], #16]\n\t"
07756         "ldr    r9, [%[b], #16]\n\t"
07757         "umull  r6, r7, r8, r9\n\t"
07758         "adds   r5, r5, r6\n\t"
07759         "adcs   r3, r3, r7\n\t"
07760         "adc    r4, r4, r10\n\t"
07761         "#  A[5] * B[3]\n\t"
07762         "ldr    r8, [%[a], #20]\n\t"
07763         "ldr    r9, [%[b], #12]\n\t"
07764         "umull  r6, r7, r8, r9\n\t"
07765         "adds   r5, r5, r6\n\t"
07766         "adcs   r3, r3, r7\n\t"
07767         "adc    r4, r4, r10\n\t"
07768         "#  A[6] * B[2]\n\t"
07769         "ldr    r8, [%[a], #24]\n\t"
07770         "ldr    r9, [%[b], #8]\n\t"
07771         "umull  r6, r7, r8, r9\n\t"
07772         "adds   r5, r5, r6\n\t"
07773         "adcs   r3, r3, r7\n\t"
07774         "adc    r4, r4, r10\n\t"
07775         "#  A[7] * B[1]\n\t"
07776         "ldr    r8, [%[a], #28]\n\t"
07777         "ldr    r9, [%[b], #4]\n\t"
07778         "umull  r6, r7, r8, r9\n\t"
07779         "adds   r5, r5, r6\n\t"
07780         "adcs   r3, r3, r7\n\t"
07781         "adc    r4, r4, r10\n\t"
07782         "str    r5, [%[r], #32]\n\t"
07783         "#  A[2] * B[7]\n\t"
07784         "ldr    r8, [%[a], #8]\n\t"
07785         "ldr    r9, [%[b], #28]\n\t"
07786         "umull  r6, r7, r8, r9\n\t"
07787         "adds   r3, r3, r6\n\t"
07788         "adcs   r4, r4, r7\n\t"
07789         "adc    r5, r10, r10\n\t"
07790         "#  A[3] * B[6]\n\t"
07791         "ldr    r8, [%[a], #12]\n\t"
07792         "ldr    r9, [%[b], #24]\n\t"
07793         "umull  r6, r7, r8, r9\n\t"
07794         "adds   r3, r3, r6\n\t"
07795         "adcs   r4, r4, r7\n\t"
07796         "adc    r5, r5, r10\n\t"
07797         "#  A[4] * B[5]\n\t"
07798         "ldr    r8, [%[a], #16]\n\t"
07799         "ldr    r9, [%[b], #20]\n\t"
07800         "umull  r6, r7, r8, r9\n\t"
07801         "adds   r3, r3, r6\n\t"
07802         "adcs   r4, r4, r7\n\t"
07803         "adc    r5, r5, r10\n\t"
07804         "#  A[5] * B[4]\n\t"
07805         "ldr    r8, [%[a], #20]\n\t"
07806         "ldr    r9, [%[b], #16]\n\t"
07807         "umull  r6, r7, r8, r9\n\t"
07808         "adds   r3, r3, r6\n\t"
07809         "adcs   r4, r4, r7\n\t"
07810         "adc    r5, r5, r10\n\t"
07811         "#  A[6] * B[3]\n\t"
07812         "ldr    r8, [%[a], #24]\n\t"
07813         "ldr    r9, [%[b], #12]\n\t"
07814         "umull  r6, r7, r8, r9\n\t"
07815         "adds   r3, r3, r6\n\t"
07816         "adcs   r4, r4, r7\n\t"
07817         "adc    r5, r5, r10\n\t"
07818         "#  A[7] * B[2]\n\t"
07819         "ldr    r8, [%[a], #28]\n\t"
07820         "ldr    r9, [%[b], #8]\n\t"
07821         "umull  r6, r7, r8, r9\n\t"
07822         "adds   r3, r3, r6\n\t"
07823         "adcs   r4, r4, r7\n\t"
07824         "adc    r5, r5, r10\n\t"
07825         "str    r3, [%[r], #36]\n\t"
07826         "#  A[3] * B[7]\n\t"
07827         "ldr    r8, [%[a], #12]\n\t"
07828         "ldr    r9, [%[b], #28]\n\t"
07829         "umull  r6, r7, r8, r9\n\t"
07830         "adds   r4, r4, r6\n\t"
07831         "adcs   r5, r5, r7\n\t"
07832         "adc    r3, r10, r10\n\t"
07833         "#  A[4] * B[6]\n\t"
07834         "ldr    r8, [%[a], #16]\n\t"
07835         "ldr    r9, [%[b], #24]\n\t"
07836         "umull  r6, r7, r8, r9\n\t"
07837         "adds   r4, r4, r6\n\t"
07838         "adcs   r5, r5, r7\n\t"
07839         "adc    r3, r3, r10\n\t"
07840         "#  A[5] * B[5]\n\t"
07841         "ldr    r8, [%[a], #20]\n\t"
07842         "ldr    r9, [%[b], #20]\n\t"
07843         "umull  r6, r7, r8, r9\n\t"
07844         "adds   r4, r4, r6\n\t"
07845         "adcs   r5, r5, r7\n\t"
07846         "adc    r3, r3, r10\n\t"
07847         "#  A[6] * B[4]\n\t"
07848         "ldr    r8, [%[a], #24]\n\t"
07849         "ldr    r9, [%[b], #16]\n\t"
07850         "umull  r6, r7, r8, r9\n\t"
07851         "adds   r4, r4, r6\n\t"
07852         "adcs   r5, r5, r7\n\t"
07853         "adc    r3, r3, r10\n\t"
07854         "#  A[7] * B[3]\n\t"
07855         "ldr    r8, [%[a], #28]\n\t"
07856         "ldr    r9, [%[b], #12]\n\t"
07857         "umull  r6, r7, r8, r9\n\t"
07858         "adds   r4, r4, r6\n\t"
07859         "adcs   r5, r5, r7\n\t"
07860         "adc    r3, r3, r10\n\t"
07861         "str    r4, [%[r], #40]\n\t"
07862         "#  A[4] * B[7]\n\t"
07863         "ldr    r8, [%[a], #16]\n\t"
07864         "ldr    r9, [%[b], #28]\n\t"
07865         "umull  r6, r7, r8, r9\n\t"
07866         "adds   r5, r5, r6\n\t"
07867         "adcs   r3, r3, r7\n\t"
07868         "adc    r4, r10, r10\n\t"
07869         "#  A[5] * B[6]\n\t"
07870         "ldr    r8, [%[a], #20]\n\t"
07871         "ldr    r9, [%[b], #24]\n\t"
07872         "umull  r6, r7, r8, r9\n\t"
07873         "adds   r5, r5, r6\n\t"
07874         "adcs   r3, r3, r7\n\t"
07875         "adc    r4, r4, r10\n\t"
07876         "#  A[6] * B[5]\n\t"
07877         "ldr    r8, [%[a], #24]\n\t"
07878         "ldr    r9, [%[b], #20]\n\t"
07879         "umull  r6, r7, r8, r9\n\t"
07880         "adds   r5, r5, r6\n\t"
07881         "adcs   r3, r3, r7\n\t"
07882         "adc    r4, r4, r10\n\t"
07883         "#  A[7] * B[4]\n\t"
07884         "ldr    r8, [%[a], #28]\n\t"
07885         "ldr    r9, [%[b], #16]\n\t"
07886         "umull  r6, r7, r8, r9\n\t"
07887         "adds   r5, r5, r6\n\t"
07888         "adcs   r3, r3, r7\n\t"
07889         "adc    r4, r4, r10\n\t"
07890         "str    r5, [%[r], #44]\n\t"
07891         "#  A[5] * B[7]\n\t"
07892         "ldr    r8, [%[a], #20]\n\t"
07893         "ldr    r9, [%[b], #28]\n\t"
07894         "umull  r6, r7, r8, r9\n\t"
07895         "adds   r3, r3, r6\n\t"
07896         "adcs   r4, r4, r7\n\t"
07897         "adc    r5, r10, r10\n\t"
07898         "#  A[6] * B[6]\n\t"
07899         "ldr    r8, [%[a], #24]\n\t"
07900         "ldr    r9, [%[b], #24]\n\t"
07901         "umull  r6, r7, r8, r9\n\t"
07902         "adds   r3, r3, r6\n\t"
07903         "adcs   r4, r4, r7\n\t"
07904         "adc    r5, r5, r10\n\t"
07905         "#  A[7] * B[5]\n\t"
07906         "ldr    r8, [%[a], #28]\n\t"
07907         "ldr    r9, [%[b], #20]\n\t"
07908         "umull  r6, r7, r8, r9\n\t"
07909         "adds   r3, r3, r6\n\t"
07910         "adcs   r4, r4, r7\n\t"
07911         "adc    r5, r5, r10\n\t"
07912         "str    r3, [%[r], #48]\n\t"
07913         "#  A[6] * B[7]\n\t"
07914         "ldr    r8, [%[a], #24]\n\t"
07915         "ldr    r9, [%[b], #28]\n\t"
07916         "umull  r6, r7, r8, r9\n\t"
07917         "adds   r4, r4, r6\n\t"
07918         "adcs   r5, r5, r7\n\t"
07919         "adc    r3, r10, r10\n\t"
07920         "#  A[7] * B[6]\n\t"
07921         "ldr    r8, [%[a], #28]\n\t"
07922         "ldr    r9, [%[b], #24]\n\t"
07923         "umull  r6, r7, r8, r9\n\t"
07924         "adds   r4, r4, r6\n\t"
07925         "adcs   r5, r5, r7\n\t"
07926         "adc    r3, r3, r10\n\t"
07927         "str    r4, [%[r], #52]\n\t"
07928         "#  A[7] * B[7]\n\t"
07929         "ldr    r8, [%[a], #28]\n\t"
07930         "ldr    r9, [%[b], #28]\n\t"
07931         "umull  r6, r7, r8, r9\n\t"
07932         "adds   r5, r5, r6\n\t"
07933         "adc    r3, r3, r7\n\t"
07934         "str    r5, [%[r], #56]\n\t"
07935         "str    r3, [%[r], #60]\n\t"
07936         :
07937         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
07938         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
07939     );
07940 
07941     XMEMCPY(r, tmp, sizeof(tmp));
07942 }
07943 
07944 /* Square a and put result in r. (r = a * a)
07945  *
07946  * r  A single precision integer.
07947  * a  A single precision integer.
07948  */
07949 static void sp_3072_sqr_8(sp_digit* r, const sp_digit* a)
07950 {
07951     sp_digit tmp[8];
07952 
07953     __asm__ __volatile__ (
07954         "mov    r14, #0\n\t"
07955         "#  A[0] * A[0]\n\t"
07956         "ldr    r10, [%[a], #0]\n\t"
07957         "umull  r8, r3, r10, r10\n\t"
07958         "mov    r4, #0\n\t"
07959         "str    r8, [%[tmp]]\n\t"
07960         "#  A[0] * A[1]\n\t"
07961         "ldr    r10, [%[a], #4]\n\t"
07962         "ldr    r8, [%[a], #0]\n\t"
07963         "umull  r8, r9, r10, r8\n\t"
07964         "adds   r3, r3, r8\n\t"
07965         "adcs   r4, r4, r9\n\t"
07966         "adc    r2, r14, r14\n\t"
07967         "adds   r3, r3, r8\n\t"
07968         "adcs   r4, r4, r9\n\t"
07969         "adc    r2, r2, r14\n\t"
07970         "str    r3, [%[tmp], #4]\n\t"
07971         "#  A[0] * A[2]\n\t"
07972         "ldr    r10, [%[a], #8]\n\t"
07973         "ldr    r8, [%[a], #0]\n\t"
07974         "umull  r8, r9, r10, r8\n\t"
07975         "adds   r4, r4, r8\n\t"
07976         "adcs   r2, r2, r9\n\t"
07977         "adc    r3, r14, r14\n\t"
07978         "adds   r4, r4, r8\n\t"
07979         "adcs   r2, r2, r9\n\t"
07980         "adc    r3, r3, r14\n\t"
07981         "#  A[1] * A[1]\n\t"
07982         "ldr    r10, [%[a], #4]\n\t"
07983         "umull  r8, r9, r10, r10\n\t"
07984         "adds   r4, r4, r8\n\t"
07985         "adcs   r2, r2, r9\n\t"
07986         "adc    r3, r3, r14\n\t"
07987         "str    r4, [%[tmp], #8]\n\t"
07988         "#  A[0] * A[3]\n\t"
07989         "ldr    r10, [%[a], #12]\n\t"
07990         "ldr    r8, [%[a], #0]\n\t"
07991         "umull  r8, r9, r10, r8\n\t"
07992         "adds   r2, r2, r8\n\t"
07993         "adcs   r3, r3, r9\n\t"
07994         "adc    r4, r14, r14\n\t"
07995         "adds   r2, r2, r8\n\t"
07996         "adcs   r3, r3, r9\n\t"
07997         "adc    r4, r4, r14\n\t"
07998         "#  A[1] * A[2]\n\t"
07999         "ldr    r10, [%[a], #8]\n\t"
08000         "ldr    r8, [%[a], #4]\n\t"
08001         "umull  r8, r9, r10, r8\n\t"
08002         "adds   r2, r2, r8\n\t"
08003         "adcs   r3, r3, r9\n\t"
08004         "adc    r4, r4, r14\n\t"
08005         "adds   r2, r2, r8\n\t"
08006         "adcs   r3, r3, r9\n\t"
08007         "adc    r4, r4, r14\n\t"
08008         "str    r2, [%[tmp], #12]\n\t"
08009         "#  A[0] * A[4]\n\t"
08010         "ldr    r10, [%[a], #16]\n\t"
08011         "ldr    r8, [%[a], #0]\n\t"
08012         "umull  r8, r9, r10, r8\n\t"
08013         "adds   r3, r3, r8\n\t"
08014         "adcs   r4, r4, r9\n\t"
08015         "adc    r2, r14, r14\n\t"
08016         "adds   r3, r3, r8\n\t"
08017         "adcs   r4, r4, r9\n\t"
08018         "adc    r2, r2, r14\n\t"
08019         "#  A[1] * A[3]\n\t"
08020         "ldr    r10, [%[a], #12]\n\t"
08021         "ldr    r8, [%[a], #4]\n\t"
08022         "umull  r8, r9, r10, r8\n\t"
08023         "adds   r3, r3, r8\n\t"
08024         "adcs   r4, r4, r9\n\t"
08025         "adc    r2, r2, r14\n\t"
08026         "adds   r3, r3, r8\n\t"
08027         "adcs   r4, r4, r9\n\t"
08028         "adc    r2, r2, r14\n\t"
08029         "#  A[2] * A[2]\n\t"
08030         "ldr    r10, [%[a], #8]\n\t"
08031         "umull  r8, r9, r10, r10\n\t"
08032         "adds   r3, r3, r8\n\t"
08033         "adcs   r4, r4, r9\n\t"
08034         "adc    r2, r2, r14\n\t"
08035         "str    r3, [%[tmp], #16]\n\t"
08036         "#  A[0] * A[5]\n\t"
08037         "ldr    r10, [%[a], #20]\n\t"
08038         "ldr    r8, [%[a], #0]\n\t"
08039         "umull  r5, r6, r10, r8\n\t"
08040         "mov    r3, #0\n\t"
08041         "mov    r7, #0\n\t"
08042         "#  A[1] * A[4]\n\t"
08043         "ldr    r10, [%[a], #16]\n\t"
08044         "ldr    r8, [%[a], #4]\n\t"
08045         "umull  r8, r9, r10, r8\n\t"
08046         "adds   r5, r5, r8\n\t"
08047         "adcs   r6, r6, r9\n\t"
08048         "adc    r7, r7, r14\n\t"
08049         "#  A[2] * A[3]\n\t"
08050         "ldr    r10, [%[a], #12]\n\t"
08051         "ldr    r8, [%[a], #8]\n\t"
08052         "umull  r8, r9, r10, r8\n\t"
08053         "adds   r5, r5, r8\n\t"
08054         "adcs   r6, r6, r9\n\t"
08055         "adc    r7, r7, r14\n\t"
08056         "adds   r5, r5, r5\n\t"
08057         "adcs   r6, r6, r6\n\t"
08058         "adc    r7, r7, r7\n\t"
08059         "adds   r4, r4, r5\n\t"
08060         "adcs   r2, r2, r6\n\t"
08061         "adc    r3, r3, r7\n\t"
08062         "str    r4, [%[tmp], #20]\n\t"
08063         "#  A[0] * A[6]\n\t"
08064         "ldr    r10, [%[a], #24]\n\t"
08065         "ldr    r8, [%[a], #0]\n\t"
08066         "umull  r5, r6, r10, r8\n\t"
08067         "mov    r4, #0\n\t"
08068         "mov    r7, #0\n\t"
08069         "#  A[1] * A[5]\n\t"
08070         "ldr    r10, [%[a], #20]\n\t"
08071         "ldr    r8, [%[a], #4]\n\t"
08072         "umull  r8, r9, r10, r8\n\t"
08073         "adds   r5, r5, r8\n\t"
08074         "adcs   r6, r6, r9\n\t"
08075         "adc    r7, r7, r14\n\t"
08076         "#  A[2] * A[4]\n\t"
08077         "ldr    r10, [%[a], #16]\n\t"
08078         "ldr    r8, [%[a], #8]\n\t"
08079         "umull  r8, r9, r10, r8\n\t"
08080         "adds   r5, r5, r8\n\t"
08081         "adcs   r6, r6, r9\n\t"
08082         "adc    r7, r7, r14\n\t"
08083         "#  A[3] * A[3]\n\t"
08084         "ldr    r10, [%[a], #12]\n\t"
08085         "umull  r8, r9, r10, r10\n\t"
08086         "adds   r5, r5, r5\n\t"
08087         "adcs   r6, r6, r6\n\t"
08088         "adc    r7, r7, r7\n\t"
08089         "adds   r5, r5, r8\n\t"
08090         "adcs   r6, r6, r9\n\t"
08091         "adc    r7, r7, r14\n\t"
08092         "adds   r2, r2, r5\n\t"
08093         "adcs   r3, r3, r6\n\t"
08094         "adc    r4, r4, r7\n\t"
08095         "str    r2, [%[tmp], #24]\n\t"
08096         "#  A[0] * A[7]\n\t"
08097         "ldr    r10, [%[a], #28]\n\t"
08098         "ldr    r8, [%[a], #0]\n\t"
08099         "umull  r5, r6, r10, r8\n\t"
08100         "mov    r2, #0\n\t"
08101         "mov    r7, #0\n\t"
08102         "#  A[1] * A[6]\n\t"
08103         "ldr    r10, [%[a], #24]\n\t"
08104         "ldr    r8, [%[a], #4]\n\t"
08105         "umull  r8, r9, r10, r8\n\t"
08106         "adds   r5, r5, r8\n\t"
08107         "adcs   r6, r6, r9\n\t"
08108         "adc    r7, r7, r14\n\t"
08109         "#  A[2] * A[5]\n\t"
08110         "ldr    r10, [%[a], #20]\n\t"
08111         "ldr    r8, [%[a], #8]\n\t"
08112         "umull  r8, r9, r10, r8\n\t"
08113         "adds   r5, r5, r8\n\t"
08114         "adcs   r6, r6, r9\n\t"
08115         "adc    r7, r7, r14\n\t"
08116         "#  A[3] * A[4]\n\t"
08117         "ldr    r10, [%[a], #16]\n\t"
08118         "ldr    r8, [%[a], #12]\n\t"
08119         "umull  r8, r9, r10, r8\n\t"
08120         "adds   r5, r5, r8\n\t"
08121         "adcs   r6, r6, r9\n\t"
08122         "adc    r7, r7, r14\n\t"
08123         "adds   r5, r5, r5\n\t"
08124         "adcs   r6, r6, r6\n\t"
08125         "adc    r7, r7, r7\n\t"
08126         "adds   r3, r3, r5\n\t"
08127         "adcs   r4, r4, r6\n\t"
08128         "adc    r2, r2, r7\n\t"
08129         "str    r3, [%[tmp], #28]\n\t"
08130         "#  A[1] * A[7]\n\t"
08131         "ldr    r10, [%[a], #28]\n\t"
08132         "ldr    r8, [%[a], #4]\n\t"
08133         "umull  r5, r6, r10, r8\n\t"
08134         "mov    r3, #0\n\t"
08135         "mov    r7, #0\n\t"
08136         "#  A[2] * A[6]\n\t"
08137         "ldr    r10, [%[a], #24]\n\t"
08138         "ldr    r8, [%[a], #8]\n\t"
08139         "umull  r8, r9, r10, r8\n\t"
08140         "adds   r5, r5, r8\n\t"
08141         "adcs   r6, r6, r9\n\t"
08142         "adc    r7, r7, r14\n\t"
08143         "#  A[3] * A[5]\n\t"
08144         "ldr    r10, [%[a], #20]\n\t"
08145         "ldr    r8, [%[a], #12]\n\t"
08146         "umull  r8, r9, r10, r8\n\t"
08147         "adds   r5, r5, r8\n\t"
08148         "adcs   r6, r6, r9\n\t"
08149         "adc    r7, r7, r14\n\t"
08150         "#  A[4] * A[4]\n\t"
08151         "ldr    r10, [%[a], #16]\n\t"
08152         "umull  r8, r9, r10, r10\n\t"
08153         "adds   r5, r5, r5\n\t"
08154         "adcs   r6, r6, r6\n\t"
08155         "adc    r7, r7, r7\n\t"
08156         "adds   r5, r5, r8\n\t"
08157         "adcs   r6, r6, r9\n\t"
08158         "adc    r7, r7, r14\n\t"
08159         "adds   r4, r4, r5\n\t"
08160         "adcs   r2, r2, r6\n\t"
08161         "adc    r3, r3, r7\n\t"
08162         "str    r4, [%[r], #32]\n\t"
08163         "#  A[2] * A[7]\n\t"
08164         "ldr    r10, [%[a], #28]\n\t"
08165         "ldr    r8, [%[a], #8]\n\t"
08166         "umull  r5, r6, r10, r8\n\t"
08167         "mov    r4, #0\n\t"
08168         "mov    r7, #0\n\t"
08169         "#  A[3] * A[6]\n\t"
08170         "ldr    r10, [%[a], #24]\n\t"
08171         "ldr    r8, [%[a], #12]\n\t"
08172         "umull  r8, r9, r10, r8\n\t"
08173         "adds   r5, r5, r8\n\t"
08174         "adcs   r6, r6, r9\n\t"
08175         "adc    r7, r7, r14\n\t"
08176         "#  A[4] * A[5]\n\t"
08177         "ldr    r10, [%[a], #20]\n\t"
08178         "ldr    r8, [%[a], #16]\n\t"
08179         "umull  r8, r9, r10, r8\n\t"
08180         "adds   r5, r5, r8\n\t"
08181         "adcs   r6, r6, r9\n\t"
08182         "adc    r7, r7, r14\n\t"
08183         "adds   r5, r5, r5\n\t"
08184         "adcs   r6, r6, r6\n\t"
08185         "adc    r7, r7, r7\n\t"
08186         "adds   r2, r2, r5\n\t"
08187         "adcs   r3, r3, r6\n\t"
08188         "adc    r4, r4, r7\n\t"
08189         "str    r2, [%[r], #36]\n\t"
08190         "#  A[3] * A[7]\n\t"
08191         "ldr    r10, [%[a], #28]\n\t"
08192         "ldr    r8, [%[a], #12]\n\t"
08193         "umull  r8, r9, r10, r8\n\t"
08194         "adds   r3, r3, r8\n\t"
08195         "adcs   r4, r4, r9\n\t"
08196         "adc    r2, r14, r14\n\t"
08197         "adds   r3, r3, r8\n\t"
08198         "adcs   r4, r4, r9\n\t"
08199         "adc    r2, r2, r14\n\t"
08200         "#  A[4] * A[6]\n\t"
08201         "ldr    r10, [%[a], #24]\n\t"
08202         "ldr    r8, [%[a], #16]\n\t"
08203         "umull  r8, r9, r10, r8\n\t"
08204         "adds   r3, r3, r8\n\t"
08205         "adcs   r4, r4, r9\n\t"
08206         "adc    r2, r2, r14\n\t"
08207         "adds   r3, r3, r8\n\t"
08208         "adcs   r4, r4, r9\n\t"
08209         "adc    r2, r2, r14\n\t"
08210         "#  A[5] * A[5]\n\t"
08211         "ldr    r10, [%[a], #20]\n\t"
08212         "umull  r8, r9, r10, r10\n\t"
08213         "adds   r3, r3, r8\n\t"
08214         "adcs   r4, r4, r9\n\t"
08215         "adc    r2, r2, r14\n\t"
08216         "str    r3, [%[r], #40]\n\t"
08217         "#  A[4] * A[7]\n\t"
08218         "ldr    r10, [%[a], #28]\n\t"
08219         "ldr    r8, [%[a], #16]\n\t"
08220         "umull  r8, r9, r10, r8\n\t"
08221         "adds   r4, r4, r8\n\t"
08222         "adcs   r2, r2, r9\n\t"
08223         "adc    r3, r14, r14\n\t"
08224         "adds   r4, r4, r8\n\t"
08225         "adcs   r2, r2, r9\n\t"
08226         "adc    r3, r3, r14\n\t"
08227         "#  A[5] * A[6]\n\t"
08228         "ldr    r10, [%[a], #24]\n\t"
08229         "ldr    r8, [%[a], #20]\n\t"
08230         "umull  r8, r9, r10, r8\n\t"
08231         "adds   r4, r4, r8\n\t"
08232         "adcs   r2, r2, r9\n\t"
08233         "adc    r3, r3, r14\n\t"
08234         "adds   r4, r4, r8\n\t"
08235         "adcs   r2, r2, r9\n\t"
08236         "adc    r3, r3, r14\n\t"
08237         "str    r4, [%[r], #44]\n\t"
08238         "#  A[5] * A[7]\n\t"
08239         "ldr    r10, [%[a], #28]\n\t"
08240         "ldr    r8, [%[a], #20]\n\t"
08241         "umull  r8, r9, r10, r8\n\t"
08242         "adds   r2, r2, r8\n\t"
08243         "adcs   r3, r3, r9\n\t"
08244         "adc    r4, r14, r14\n\t"
08245         "adds   r2, r2, r8\n\t"
08246         "adcs   r3, r3, r9\n\t"
08247         "adc    r4, r4, r14\n\t"
08248         "#  A[6] * A[6]\n\t"
08249         "ldr    r10, [%[a], #24]\n\t"
08250         "umull  r8, r9, r10, r10\n\t"
08251         "adds   r2, r2, r8\n\t"
08252         "adcs   r3, r3, r9\n\t"
08253         "adc    r4, r4, r14\n\t"
08254         "str    r2, [%[r], #48]\n\t"
08255         "#  A[6] * A[7]\n\t"
08256         "ldr    r10, [%[a], #28]\n\t"
08257         "ldr    r8, [%[a], #24]\n\t"
08258         "umull  r8, r9, r10, r8\n\t"
08259         "adds   r3, r3, r8\n\t"
08260         "adcs   r4, r4, r9\n\t"
08261         "adc    r2, r14, r14\n\t"
08262         "adds   r3, r3, r8\n\t"
08263         "adcs   r4, r4, r9\n\t"
08264         "adc    r2, r2, r14\n\t"
08265         "str    r3, [%[r], #52]\n\t"
08266         "#  A[7] * A[7]\n\t"
08267         "ldr    r10, [%[a], #28]\n\t"
08268         "umull  r8, r9, r10, r10\n\t"
08269         "adds   r4, r4, r8\n\t"
08270         "adc    r2, r2, r9\n\t"
08271         "str    r4, [%[r], #56]\n\t"
08272         "str    r2, [%[r], #60]\n\t"
08273         :
08274         : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
08275         : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
08276     );
08277 
08278     XMEMCPY(r, tmp, sizeof(tmp));
08279 }
08280 
08281 /* Add b to a into r. (r = a + b)
08282  *
08283  * r  A single precision integer.
08284  * a  A single precision integer.
08285  * b  A single precision integer.
08286  */
08287 static sp_digit sp_3072_add_8(sp_digit* r, const sp_digit* a,
08288         const sp_digit* b)
08289 {
08290     sp_digit c = 0;
08291 
08292     __asm__ __volatile__ (
08293         "mov    r12, #0\n\t"
08294         "ldr    r4, [%[a], #0]\n\t"
08295         "ldr    r5, [%[a], #4]\n\t"
08296         "ldr    r6, [%[a], #8]\n\t"
08297         "ldr    r7, [%[a], #12]\n\t"
08298         "ldr    r8, [%[b], #0]\n\t"
08299         "ldr    r9, [%[b], #4]\n\t"
08300         "ldr    r10, [%[b], #8]\n\t"
08301         "ldr    r14, [%[b], #12]\n\t"
08302         "adds   r4, r4, r8\n\t"
08303         "adcs   r5, r5, r9\n\t"
08304         "adcs   r6, r6, r10\n\t"
08305         "adcs   r7, r7, r14\n\t"
08306         "str    r4, [%[r], #0]\n\t"
08307         "str    r5, [%[r], #4]\n\t"
08308         "str    r6, [%[r], #8]\n\t"
08309         "str    r7, [%[r], #12]\n\t"
08310         "ldr    r4, [%[a], #16]\n\t"
08311         "ldr    r5, [%[a], #20]\n\t"
08312         "ldr    r6, [%[a], #24]\n\t"
08313         "ldr    r7, [%[a], #28]\n\t"
08314         "ldr    r8, [%[b], #16]\n\t"
08315         "ldr    r9, [%[b], #20]\n\t"
08316         "ldr    r10, [%[b], #24]\n\t"
08317         "ldr    r14, [%[b], #28]\n\t"
08318         "adcs   r4, r4, r8\n\t"
08319         "adcs   r5, r5, r9\n\t"
08320         "adcs   r6, r6, r10\n\t"
08321         "adcs   r7, r7, r14\n\t"
08322         "str    r4, [%[r], #16]\n\t"
08323         "str    r5, [%[r], #20]\n\t"
08324         "str    r6, [%[r], #24]\n\t"
08325         "str    r7, [%[r], #28]\n\t"
08326         "adc    %[c], r12, r12\n\t"
08327         : [c] "+r" (c)
08328         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
08329         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
08330     );
08331 
08332     return c;
08333 }
08334 
08335 /* Sub b from a into a. (a -= b)
08336  *
08337  * a  A single precision integer and result.
08338  * b  A single precision integer.
08339  */
08340 static sp_digit sp_3072_sub_in_place_16(sp_digit* a, const sp_digit* b)
08341 {
08342     sp_digit c = 0;
08343 
08344     __asm__ __volatile__ (
08345         "ldr    r2, [%[a], #0]\n\t"
08346         "ldr    r3, [%[a], #4]\n\t"
08347         "ldr    r4, [%[a], #8]\n\t"
08348         "ldr    r5, [%[a], #12]\n\t"
08349         "ldr    r6, [%[b], #0]\n\t"
08350         "ldr    r7, [%[b], #4]\n\t"
08351         "ldr    r8, [%[b], #8]\n\t"
08352         "ldr    r9, [%[b], #12]\n\t"
08353         "subs   r2, r2, r6\n\t"
08354         "sbcs   r3, r3, r7\n\t"
08355         "sbcs   r4, r4, r8\n\t"
08356         "sbcs   r5, r5, r9\n\t"
08357         "str    r2, [%[a], #0]\n\t"
08358         "str    r3, [%[a], #4]\n\t"
08359         "str    r4, [%[a], #8]\n\t"
08360         "str    r5, [%[a], #12]\n\t"
08361         "ldr    r2, [%[a], #16]\n\t"
08362         "ldr    r3, [%[a], #20]\n\t"
08363         "ldr    r4, [%[a], #24]\n\t"
08364         "ldr    r5, [%[a], #28]\n\t"
08365         "ldr    r6, [%[b], #16]\n\t"
08366         "ldr    r7, [%[b], #20]\n\t"
08367         "ldr    r8, [%[b], #24]\n\t"
08368         "ldr    r9, [%[b], #28]\n\t"
08369         "sbcs   r2, r2, r6\n\t"
08370         "sbcs   r3, r3, r7\n\t"
08371         "sbcs   r4, r4, r8\n\t"
08372         "sbcs   r5, r5, r9\n\t"
08373         "str    r2, [%[a], #16]\n\t"
08374         "str    r3, [%[a], #20]\n\t"
08375         "str    r4, [%[a], #24]\n\t"
08376         "str    r5, [%[a], #28]\n\t"
08377         "ldr    r2, [%[a], #32]\n\t"
08378         "ldr    r3, [%[a], #36]\n\t"
08379         "ldr    r4, [%[a], #40]\n\t"
08380         "ldr    r5, [%[a], #44]\n\t"
08381         "ldr    r6, [%[b], #32]\n\t"
08382         "ldr    r7, [%[b], #36]\n\t"
08383         "ldr    r8, [%[b], #40]\n\t"
08384         "ldr    r9, [%[b], #44]\n\t"
08385         "sbcs   r2, r2, r6\n\t"
08386         "sbcs   r3, r3, r7\n\t"
08387         "sbcs   r4, r4, r8\n\t"
08388         "sbcs   r5, r5, r9\n\t"
08389         "str    r2, [%[a], #32]\n\t"
08390         "str    r3, [%[a], #36]\n\t"
08391         "str    r4, [%[a], #40]\n\t"
08392         "str    r5, [%[a], #44]\n\t"
08393         "ldr    r2, [%[a], #48]\n\t"
08394         "ldr    r3, [%[a], #52]\n\t"
08395         "ldr    r4, [%[a], #56]\n\t"
08396         "ldr    r5, [%[a], #60]\n\t"
08397         "ldr    r6, [%[b], #48]\n\t"
08398         "ldr    r7, [%[b], #52]\n\t"
08399         "ldr    r8, [%[b], #56]\n\t"
08400         "ldr    r9, [%[b], #60]\n\t"
08401         "sbcs   r2, r2, r6\n\t"
08402         "sbcs   r3, r3, r7\n\t"
08403         "sbcs   r4, r4, r8\n\t"
08404         "sbcs   r5, r5, r9\n\t"
08405         "str    r2, [%[a], #48]\n\t"
08406         "str    r3, [%[a], #52]\n\t"
08407         "str    r4, [%[a], #56]\n\t"
08408         "str    r5, [%[a], #60]\n\t"
08409         "sbc    %[c], r9, r9\n\t"
08410         : [c] "+r" (c)
08411         : [a] "r" (a), [b] "r" (b)
08412         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
08413     );
08414 
08415     return c;
08416 }
08417 
08418 /* Add b to a into r. (r = a + b)
08419  *
08420  * r  A single precision integer.
08421  * a  A single precision integer.
08422  * b  A single precision integer.
08423  */
08424 static sp_digit sp_3072_add_16(sp_digit* r, const sp_digit* a,
08425         const sp_digit* b)
08426 {
08427     sp_digit c = 0;
08428 
08429     __asm__ __volatile__ (
08430         "mov    r12, #0\n\t"
08431         "ldr    r4, [%[a], #0]\n\t"
08432         "ldr    r5, [%[a], #4]\n\t"
08433         "ldr    r6, [%[a], #8]\n\t"
08434         "ldr    r7, [%[a], #12]\n\t"
08435         "ldr    r8, [%[b], #0]\n\t"
08436         "ldr    r9, [%[b], #4]\n\t"
08437         "ldr    r10, [%[b], #8]\n\t"
08438         "ldr    r14, [%[b], #12]\n\t"
08439         "adds   r4, r4, r8\n\t"
08440         "adcs   r5, r5, r9\n\t"
08441         "adcs   r6, r6, r10\n\t"
08442         "adcs   r7, r7, r14\n\t"
08443         "str    r4, [%[r], #0]\n\t"
08444         "str    r5, [%[r], #4]\n\t"
08445         "str    r6, [%[r], #8]\n\t"
08446         "str    r7, [%[r], #12]\n\t"
08447         "ldr    r4, [%[a], #16]\n\t"
08448         "ldr    r5, [%[a], #20]\n\t"
08449         "ldr    r6, [%[a], #24]\n\t"
08450         "ldr    r7, [%[a], #28]\n\t"
08451         "ldr    r8, [%[b], #16]\n\t"
08452         "ldr    r9, [%[b], #20]\n\t"
08453         "ldr    r10, [%[b], #24]\n\t"
08454         "ldr    r14, [%[b], #28]\n\t"
08455         "adcs   r4, r4, r8\n\t"
08456         "adcs   r5, r5, r9\n\t"
08457         "adcs   r6, r6, r10\n\t"
08458         "adcs   r7, r7, r14\n\t"
08459         "str    r4, [%[r], #16]\n\t"
08460         "str    r5, [%[r], #20]\n\t"
08461         "str    r6, [%[r], #24]\n\t"
08462         "str    r7, [%[r], #28]\n\t"
08463         "ldr    r4, [%[a], #32]\n\t"
08464         "ldr    r5, [%[a], #36]\n\t"
08465         "ldr    r6, [%[a], #40]\n\t"
08466         "ldr    r7, [%[a], #44]\n\t"
08467         "ldr    r8, [%[b], #32]\n\t"
08468         "ldr    r9, [%[b], #36]\n\t"
08469         "ldr    r10, [%[b], #40]\n\t"
08470         "ldr    r14, [%[b], #44]\n\t"
08471         "adcs   r4, r4, r8\n\t"
08472         "adcs   r5, r5, r9\n\t"
08473         "adcs   r6, r6, r10\n\t"
08474         "adcs   r7, r7, r14\n\t"
08475         "str    r4, [%[r], #32]\n\t"
08476         "str    r5, [%[r], #36]\n\t"
08477         "str    r6, [%[r], #40]\n\t"
08478         "str    r7, [%[r], #44]\n\t"
08479         "ldr    r4, [%[a], #48]\n\t"
08480         "ldr    r5, [%[a], #52]\n\t"
08481         "ldr    r6, [%[a], #56]\n\t"
08482         "ldr    r7, [%[a], #60]\n\t"
08483         "ldr    r8, [%[b], #48]\n\t"
08484         "ldr    r9, [%[b], #52]\n\t"
08485         "ldr    r10, [%[b], #56]\n\t"
08486         "ldr    r14, [%[b], #60]\n\t"
08487         "adcs   r4, r4, r8\n\t"
08488         "adcs   r5, r5, r9\n\t"
08489         "adcs   r6, r6, r10\n\t"
08490         "adcs   r7, r7, r14\n\t"
08491         "str    r4, [%[r], #48]\n\t"
08492         "str    r5, [%[r], #52]\n\t"
08493         "str    r6, [%[r], #56]\n\t"
08494         "str    r7, [%[r], #60]\n\t"
08495         "adc    %[c], r12, r12\n\t"
08496         : [c] "+r" (c)
08497         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
08498         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
08499     );
08500 
08501     return c;
08502 }
08503 
08504 /* AND m into each word of a and store in r.
08505  *
08506  * r  A single precision integer.
08507  * a  A single precision integer.
08508  * m  Mask to AND against each digit.
08509  */
08510 static void sp_3072_mask_8(sp_digit* r, sp_digit* a, sp_digit m)
08511 {
08512 #ifdef WOLFSSL_SP_SMALL
08513     int i;
08514 
08515     for (i=0; i<8; i++)
08516         r[i] = a[i] & m;
08517 #else
08518     r[0] = a[0] & m;
08519     r[1] = a[1] & m;
08520     r[2] = a[2] & m;
08521     r[3] = a[3] & m;
08522     r[4] = a[4] & m;
08523     r[5] = a[5] & m;
08524     r[6] = a[6] & m;
08525     r[7] = a[7] & m;
08526 #endif
08527 }
08528 
08529 /* Multiply a and b into r. (r = a * b)
08530  *
08531  * r  A single precision integer.
08532  * a  A single precision integer.
08533  * b  A single precision integer.
08534  */
08535 static void sp_3072_mul_16(sp_digit* r, const sp_digit* a,
08536         const sp_digit* b)
08537 {
08538     sp_digit* z0 = r;
08539     sp_digit z1[16];
08540     sp_digit a1[8];
08541     sp_digit b1[8];
08542     sp_digit z2[16];
08543     sp_digit u, ca, cb;
08544 
08545     ca = sp_3072_add_8(a1, a, &a[8]);
08546     cb = sp_3072_add_8(b1, b, &b[8]);
08547     u  = ca & cb;
08548     sp_3072_mul_8(z1, a1, b1);
08549     sp_3072_mul_8(z2, &a[8], &b[8]);
08550     sp_3072_mul_8(z0, a, b);
08551     sp_3072_mask_8(r + 16, a1, 0 - cb);
08552     sp_3072_mask_8(b1, b1, 0 - ca);
08553     u += sp_3072_add_8(r + 16, r + 16, b1);
08554     u += sp_3072_sub_in_place_16(z1, z2);
08555     u += sp_3072_sub_in_place_16(z1, z0);
08556     u += sp_3072_add_16(r + 8, r + 8, z1);
08557     r[24] = u;
08558     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
08559     sp_3072_add_16(r + 16, r + 16, z2);
08560 }
08561 
08562 /* Square a and put result in r. (r = a * a)
08563  *
08564  * r  A single precision integer.
08565  * a  A single precision integer.
08566  */
08567 static void sp_3072_sqr_16(sp_digit* r, const sp_digit* a)
08568 {
08569     sp_digit* z0 = r;
08570     sp_digit z2[16];
08571     sp_digit z1[16];
08572     sp_digit a1[8];
08573     sp_digit u;
08574 
08575     u = sp_3072_add_8(a1, a, &a[8]);
08576     sp_3072_sqr_8(z1, a1);
08577     sp_3072_sqr_8(z2, &a[8]);
08578     sp_3072_sqr_8(z0, a);
08579     sp_3072_mask_8(r + 16, a1, 0 - u);
08580     u += sp_3072_add_8(r + 16, r + 16, r + 16);
08581     u += sp_3072_sub_in_place_16(z1, z2);
08582     u += sp_3072_sub_in_place_16(z1, z0);
08583     u += sp_3072_add_16(r + 8, r + 8, z1);
08584     r[24] = u;
08585     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
08586     sp_3072_add_16(r + 16, r + 16, z2);
08587 }
08588 
08589 /* Sub b from a into r. (r = a - b)
08590  *
08591  * r  A single precision integer.
08592  * a  A single precision integer.
08593  * b  A single precision integer.
08594  */
08595 static sp_digit sp_3072_sub_32(sp_digit* r, const sp_digit* a,
08596         const sp_digit* b)
08597 {
08598     sp_digit c = 0;
08599 
08600     __asm__ __volatile__ (
08601         "ldr    r3, [%[a], #0]\n\t"
08602         "ldr    r4, [%[a], #4]\n\t"
08603         "ldr    r5, [%[a], #8]\n\t"
08604         "ldr    r6, [%[a], #12]\n\t"
08605         "ldr    r7, [%[b], #0]\n\t"
08606         "ldr    r8, [%[b], #4]\n\t"
08607         "ldr    r9, [%[b], #8]\n\t"
08608         "ldr    r10, [%[b], #12]\n\t"
08609         "subs   r3, r3, r7\n\t"
08610         "sbcs   r4, r4, r8\n\t"
08611         "sbcs   r5, r5, r9\n\t"
08612         "sbcs   r6, r6, r10\n\t"
08613         "str    r3, [%[r], #0]\n\t"
08614         "str    r4, [%[r], #4]\n\t"
08615         "str    r5, [%[r], #8]\n\t"
08616         "str    r6, [%[r], #12]\n\t"
08617         "ldr    r3, [%[a], #16]\n\t"
08618         "ldr    r4, [%[a], #20]\n\t"
08619         "ldr    r5, [%[a], #24]\n\t"
08620         "ldr    r6, [%[a], #28]\n\t"
08621         "ldr    r7, [%[b], #16]\n\t"
08622         "ldr    r8, [%[b], #20]\n\t"
08623         "ldr    r9, [%[b], #24]\n\t"
08624         "ldr    r10, [%[b], #28]\n\t"
08625         "sbcs   r3, r3, r7\n\t"
08626         "sbcs   r4, r4, r8\n\t"
08627         "sbcs   r5, r5, r9\n\t"
08628         "sbcs   r6, r6, r10\n\t"
08629         "str    r3, [%[r], #16]\n\t"
08630         "str    r4, [%[r], #20]\n\t"
08631         "str    r5, [%[r], #24]\n\t"
08632         "str    r6, [%[r], #28]\n\t"
08633         "ldr    r3, [%[a], #32]\n\t"
08634         "ldr    r4, [%[a], #36]\n\t"
08635         "ldr    r5, [%[a], #40]\n\t"
08636         "ldr    r6, [%[a], #44]\n\t"
08637         "ldr    r7, [%[b], #32]\n\t"
08638         "ldr    r8, [%[b], #36]\n\t"
08639         "ldr    r9, [%[b], #40]\n\t"
08640         "ldr    r10, [%[b], #44]\n\t"
08641         "sbcs   r3, r3, r7\n\t"
08642         "sbcs   r4, r4, r8\n\t"
08643         "sbcs   r5, r5, r9\n\t"
08644         "sbcs   r6, r6, r10\n\t"
08645         "str    r3, [%[r], #32]\n\t"
08646         "str    r4, [%[r], #36]\n\t"
08647         "str    r5, [%[r], #40]\n\t"
08648         "str    r6, [%[r], #44]\n\t"
08649         "ldr    r3, [%[a], #48]\n\t"
08650         "ldr    r4, [%[a], #52]\n\t"
08651         "ldr    r5, [%[a], #56]\n\t"
08652         "ldr    r6, [%[a], #60]\n\t"
08653         "ldr    r7, [%[b], #48]\n\t"
08654         "ldr    r8, [%[b], #52]\n\t"
08655         "ldr    r9, [%[b], #56]\n\t"
08656         "ldr    r10, [%[b], #60]\n\t"
08657         "sbcs   r3, r3, r7\n\t"
08658         "sbcs   r4, r4, r8\n\t"
08659         "sbcs   r5, r5, r9\n\t"
08660         "sbcs   r6, r6, r10\n\t"
08661         "str    r3, [%[r], #48]\n\t"
08662         "str    r4, [%[r], #52]\n\t"
08663         "str    r5, [%[r], #56]\n\t"
08664         "str    r6, [%[r], #60]\n\t"
08665         "ldr    r3, [%[a], #64]\n\t"
08666         "ldr    r4, [%[a], #68]\n\t"
08667         "ldr    r5, [%[a], #72]\n\t"
08668         "ldr    r6, [%[a], #76]\n\t"
08669         "ldr    r7, [%[b], #64]\n\t"
08670         "ldr    r8, [%[b], #68]\n\t"
08671         "ldr    r9, [%[b], #72]\n\t"
08672         "ldr    r10, [%[b], #76]\n\t"
08673         "sbcs   r3, r3, r7\n\t"
08674         "sbcs   r4, r4, r8\n\t"
08675         "sbcs   r5, r5, r9\n\t"
08676         "sbcs   r6, r6, r10\n\t"
08677         "str    r3, [%[r], #64]\n\t"
08678         "str    r4, [%[r], #68]\n\t"
08679         "str    r5, [%[r], #72]\n\t"
08680         "str    r6, [%[r], #76]\n\t"
08681         "ldr    r3, [%[a], #80]\n\t"
08682         "ldr    r4, [%[a], #84]\n\t"
08683         "ldr    r5, [%[a], #88]\n\t"
08684         "ldr    r6, [%[a], #92]\n\t"
08685         "ldr    r7, [%[b], #80]\n\t"
08686         "ldr    r8, [%[b], #84]\n\t"
08687         "ldr    r9, [%[b], #88]\n\t"
08688         "ldr    r10, [%[b], #92]\n\t"
08689         "sbcs   r3, r3, r7\n\t"
08690         "sbcs   r4, r4, r8\n\t"
08691         "sbcs   r5, r5, r9\n\t"
08692         "sbcs   r6, r6, r10\n\t"
08693         "str    r3, [%[r], #80]\n\t"
08694         "str    r4, [%[r], #84]\n\t"
08695         "str    r5, [%[r], #88]\n\t"
08696         "str    r6, [%[r], #92]\n\t"
08697         "ldr    r3, [%[a], #96]\n\t"
08698         "ldr    r4, [%[a], #100]\n\t"
08699         "ldr    r5, [%[a], #104]\n\t"
08700         "ldr    r6, [%[a], #108]\n\t"
08701         "ldr    r7, [%[b], #96]\n\t"
08702         "ldr    r8, [%[b], #100]\n\t"
08703         "ldr    r9, [%[b], #104]\n\t"
08704         "ldr    r10, [%[b], #108]\n\t"
08705         "sbcs   r3, r3, r7\n\t"
08706         "sbcs   r4, r4, r8\n\t"
08707         "sbcs   r5, r5, r9\n\t"
08708         "sbcs   r6, r6, r10\n\t"
08709         "str    r3, [%[r], #96]\n\t"
08710         "str    r4, [%[r], #100]\n\t"
08711         "str    r5, [%[r], #104]\n\t"
08712         "str    r6, [%[r], #108]\n\t"
08713         "ldr    r3, [%[a], #112]\n\t"
08714         "ldr    r4, [%[a], #116]\n\t"
08715         "ldr    r5, [%[a], #120]\n\t"
08716         "ldr    r6, [%[a], #124]\n\t"
08717         "ldr    r7, [%[b], #112]\n\t"
08718         "ldr    r8, [%[b], #116]\n\t"
08719         "ldr    r9, [%[b], #120]\n\t"
08720         "ldr    r10, [%[b], #124]\n\t"
08721         "sbcs   r3, r3, r7\n\t"
08722         "sbcs   r4, r4, r8\n\t"
08723         "sbcs   r5, r5, r9\n\t"
08724         "sbcs   r6, r6, r10\n\t"
08725         "str    r3, [%[r], #112]\n\t"
08726         "str    r4, [%[r], #116]\n\t"
08727         "str    r5, [%[r], #120]\n\t"
08728         "str    r6, [%[r], #124]\n\t"
08729         "sbc    %[c], %[c], #0\n\t"
08730         : [c] "+r" (c)
08731         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
08732         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
08733     );
08734 
08735     return c;
08736 }
08737 
08738 /* Add b to a into r. (r = a + b)
08739  *
08740  * r  A single precision integer.
08741  * a  A single precision integer.
08742  * b  A single precision integer.
08743  */
08744 static sp_digit sp_3072_add_32(sp_digit* r, const sp_digit* a,
08745         const sp_digit* b)
08746 {
08747     sp_digit c = 0;
08748 
08749     __asm__ __volatile__ (
08750         "mov    r12, #0\n\t"
08751         "ldr    r4, [%[a], #0]\n\t"
08752         "ldr    r5, [%[a], #4]\n\t"
08753         "ldr    r6, [%[a], #8]\n\t"
08754         "ldr    r7, [%[a], #12]\n\t"
08755         "ldr    r8, [%[b], #0]\n\t"
08756         "ldr    r9, [%[b], #4]\n\t"
08757         "ldr    r10, [%[b], #8]\n\t"
08758         "ldr    r14, [%[b], #12]\n\t"
08759         "adds   r4, r4, r8\n\t"
08760         "adcs   r5, r5, r9\n\t"
08761         "adcs   r6, r6, r10\n\t"
08762         "adcs   r7, r7, r14\n\t"
08763         "str    r4, [%[r], #0]\n\t"
08764         "str    r5, [%[r], #4]\n\t"
08765         "str    r6, [%[r], #8]\n\t"
08766         "str    r7, [%[r], #12]\n\t"
08767         "ldr    r4, [%[a], #16]\n\t"
08768         "ldr    r5, [%[a], #20]\n\t"
08769         "ldr    r6, [%[a], #24]\n\t"
08770         "ldr    r7, [%[a], #28]\n\t"
08771         "ldr    r8, [%[b], #16]\n\t"
08772         "ldr    r9, [%[b], #20]\n\t"
08773         "ldr    r10, [%[b], #24]\n\t"
08774         "ldr    r14, [%[b], #28]\n\t"
08775         "adcs   r4, r4, r8\n\t"
08776         "adcs   r5, r5, r9\n\t"
08777         "adcs   r6, r6, r10\n\t"
08778         "adcs   r7, r7, r14\n\t"
08779         "str    r4, [%[r], #16]\n\t"
08780         "str    r5, [%[r], #20]\n\t"
08781         "str    r6, [%[r], #24]\n\t"
08782         "str    r7, [%[r], #28]\n\t"
08783         "ldr    r4, [%[a], #32]\n\t"
08784         "ldr    r5, [%[a], #36]\n\t"
08785         "ldr    r6, [%[a], #40]\n\t"
08786         "ldr    r7, [%[a], #44]\n\t"
08787         "ldr    r8, [%[b], #32]\n\t"
08788         "ldr    r9, [%[b], #36]\n\t"
08789         "ldr    r10, [%[b], #40]\n\t"
08790         "ldr    r14, [%[b], #44]\n\t"
08791         "adcs   r4, r4, r8\n\t"
08792         "adcs   r5, r5, r9\n\t"
08793         "adcs   r6, r6, r10\n\t"
08794         "adcs   r7, r7, r14\n\t"
08795         "str    r4, [%[r], #32]\n\t"
08796         "str    r5, [%[r], #36]\n\t"
08797         "str    r6, [%[r], #40]\n\t"
08798         "str    r7, [%[r], #44]\n\t"
08799         "ldr    r4, [%[a], #48]\n\t"
08800         "ldr    r5, [%[a], #52]\n\t"
08801         "ldr    r6, [%[a], #56]\n\t"
08802         "ldr    r7, [%[a], #60]\n\t"
08803         "ldr    r8, [%[b], #48]\n\t"
08804         "ldr    r9, [%[b], #52]\n\t"
08805         "ldr    r10, [%[b], #56]\n\t"
08806         "ldr    r14, [%[b], #60]\n\t"
08807         "adcs   r4, r4, r8\n\t"
08808         "adcs   r5, r5, r9\n\t"
08809         "adcs   r6, r6, r10\n\t"
08810         "adcs   r7, r7, r14\n\t"
08811         "str    r4, [%[r], #48]\n\t"
08812         "str    r5, [%[r], #52]\n\t"
08813         "str    r6, [%[r], #56]\n\t"
08814         "str    r7, [%[r], #60]\n\t"
08815         "ldr    r4, [%[a], #64]\n\t"
08816         "ldr    r5, [%[a], #68]\n\t"
08817         "ldr    r6, [%[a], #72]\n\t"
08818         "ldr    r7, [%[a], #76]\n\t"
08819         "ldr    r8, [%[b], #64]\n\t"
08820         "ldr    r9, [%[b], #68]\n\t"
08821         "ldr    r10, [%[b], #72]\n\t"
08822         "ldr    r14, [%[b], #76]\n\t"
08823         "adcs   r4, r4, r8\n\t"
08824         "adcs   r5, r5, r9\n\t"
08825         "adcs   r6, r6, r10\n\t"
08826         "adcs   r7, r7, r14\n\t"
08827         "str    r4, [%[r], #64]\n\t"
08828         "str    r5, [%[r], #68]\n\t"
08829         "str    r6, [%[r], #72]\n\t"
08830         "str    r7, [%[r], #76]\n\t"
08831         "ldr    r4, [%[a], #80]\n\t"
08832         "ldr    r5, [%[a], #84]\n\t"
08833         "ldr    r6, [%[a], #88]\n\t"
08834         "ldr    r7, [%[a], #92]\n\t"
08835         "ldr    r8, [%[b], #80]\n\t"
08836         "ldr    r9, [%[b], #84]\n\t"
08837         "ldr    r10, [%[b], #88]\n\t"
08838         "ldr    r14, [%[b], #92]\n\t"
08839         "adcs   r4, r4, r8\n\t"
08840         "adcs   r5, r5, r9\n\t"
08841         "adcs   r6, r6, r10\n\t"
08842         "adcs   r7, r7, r14\n\t"
08843         "str    r4, [%[r], #80]\n\t"
08844         "str    r5, [%[r], #84]\n\t"
08845         "str    r6, [%[r], #88]\n\t"
08846         "str    r7, [%[r], #92]\n\t"
08847         "ldr    r4, [%[a], #96]\n\t"
08848         "ldr    r5, [%[a], #100]\n\t"
08849         "ldr    r6, [%[a], #104]\n\t"
08850         "ldr    r7, [%[a], #108]\n\t"
08851         "ldr    r8, [%[b], #96]\n\t"
08852         "ldr    r9, [%[b], #100]\n\t"
08853         "ldr    r10, [%[b], #104]\n\t"
08854         "ldr    r14, [%[b], #108]\n\t"
08855         "adcs   r4, r4, r8\n\t"
08856         "adcs   r5, r5, r9\n\t"
08857         "adcs   r6, r6, r10\n\t"
08858         "adcs   r7, r7, r14\n\t"
08859         "str    r4, [%[r], #96]\n\t"
08860         "str    r5, [%[r], #100]\n\t"
08861         "str    r6, [%[r], #104]\n\t"
08862         "str    r7, [%[r], #108]\n\t"
08863         "ldr    r4, [%[a], #112]\n\t"
08864         "ldr    r5, [%[a], #116]\n\t"
08865         "ldr    r6, [%[a], #120]\n\t"
08866         "ldr    r7, [%[a], #124]\n\t"
08867         "ldr    r8, [%[b], #112]\n\t"
08868         "ldr    r9, [%[b], #116]\n\t"
08869         "ldr    r10, [%[b], #120]\n\t"
08870         "ldr    r14, [%[b], #124]\n\t"
08871         "adcs   r4, r4, r8\n\t"
08872         "adcs   r5, r5, r9\n\t"
08873         "adcs   r6, r6, r10\n\t"
08874         "adcs   r7, r7, r14\n\t"
08875         "str    r4, [%[r], #112]\n\t"
08876         "str    r5, [%[r], #116]\n\t"
08877         "str    r6, [%[r], #120]\n\t"
08878         "str    r7, [%[r], #124]\n\t"
08879         "adc    %[c], r12, r12\n\t"
08880         : [c] "+r" (c)
08881         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
08882         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
08883     );
08884 
08885     return c;
08886 }
08887 
08888 /* Multiply a and b into r. (r = a * b)
08889  *
08890  * r  A single precision integer.
08891  * a  A single precision integer.
08892  * b  A single precision integer.
08893  */
08894 SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
08895     const sp_digit* b)
08896 {
08897     sp_digit p0[32];
08898     sp_digit p1[32];
08899     sp_digit p2[32];
08900     sp_digit p3[32];
08901     sp_digit p4[32];
08902     sp_digit p5[32];
08903     sp_digit t0[32];
08904     sp_digit t1[32];
08905     sp_digit t2[32];
08906     sp_digit a0[16];
08907     sp_digit a1[16];
08908     sp_digit a2[16];
08909     sp_digit b0[16];
08910     sp_digit b1[16];
08911     sp_digit b2[16];
08912     sp_3072_add_16(a0, a, &a[16]);
08913     sp_3072_add_16(b0, b, &b[16]);
08914     sp_3072_add_16(a1, &a[16], &a[32]);
08915     sp_3072_add_16(b1, &b[16], &b[32]);
08916     sp_3072_add_16(a2, a0, &a[32]);
08917     sp_3072_add_16(b2, b0, &b[32]);
08918     sp_3072_mul_16(p0, a, b);
08919     sp_3072_mul_16(p2, &a[16], &b[16]);
08920     sp_3072_mul_16(p4, &a[32], &b[32]);
08921     sp_3072_mul_16(p1, a0, b0);
08922     sp_3072_mul_16(p3, a1, b1);
08923     sp_3072_mul_16(p5, a2, b2);
08924     XMEMSET(r, 0, sizeof(*r)*2*48);
08925     sp_3072_sub_32(t0, p3, p2);
08926     sp_3072_sub_32(t1, p1, p2);
08927     sp_3072_sub_32(t2, p5, t0);
08928     sp_3072_sub_32(t2, t2, t1);
08929     sp_3072_sub_32(t0, t0, p4);
08930     sp_3072_sub_32(t1, t1, p0);
08931     sp_3072_add_32(r, r, p0);
08932     sp_3072_add_32(&r[16], &r[16], t1);
08933     sp_3072_add_32(&r[32], &r[32], t2);
08934     sp_3072_add_32(&r[48], &r[48], t0);
08935     sp_3072_add_32(&r[64], &r[64], p4);
08936 }
08937 
08938 /* Square a into r. (r = a * a)
08939  *
08940  * r  A single precision integer.
08941  * a  A single precision integer.
08942  */
08943 SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
08944 {
08945     sp_digit p0[32];
08946     sp_digit p1[32];
08947     sp_digit p2[32];
08948     sp_digit p3[32];
08949     sp_digit p4[32];
08950     sp_digit p5[32];
08951     sp_digit t0[32];
08952     sp_digit t1[32];
08953     sp_digit t2[32];
08954     sp_digit a0[16];
08955     sp_digit a1[16];
08956     sp_digit a2[16];
08957     sp_3072_add_16(a0, a, &a[16]);
08958     sp_3072_add_16(a1, &a[16], &a[32]);
08959     sp_3072_add_16(a2, a0, &a[32]);
08960     sp_3072_sqr_16(p0, a);
08961     sp_3072_sqr_16(p2, &a[16]);
08962     sp_3072_sqr_16(p4, &a[32]);
08963     sp_3072_sqr_16(p1, a0);
08964     sp_3072_sqr_16(p3, a1);
08965     sp_3072_sqr_16(p5, a2);
08966     XMEMSET(r, 0, sizeof(*r)*2*48);
08967     sp_3072_sub_32(t0, p3, p2);
08968     sp_3072_sub_32(t1, p1, p2);
08969     sp_3072_sub_32(t2, p5, t0);
08970     sp_3072_sub_32(t2, t2, t1);
08971     sp_3072_sub_32(t0, t0, p4);
08972     sp_3072_sub_32(t1, t1, p0);
08973     sp_3072_add_32(r, r, p0);
08974     sp_3072_add_32(&r[16], &r[16], t1);
08975     sp_3072_add_32(&r[32], &r[32], t2);
08976     sp_3072_add_32(&r[48], &r[48], t0);
08977     sp_3072_add_32(&r[64], &r[64], p4);
08978 }
08979 
08980 /* Add b to a into r. (r = a + b)
08981  *
08982  * r  A single precision integer.
08983  * a  A single precision integer.
08984  * b  A single precision integer.
08985  */
08986 static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
08987         const sp_digit* b)
08988 {
08989     sp_digit c = 0;
08990 
08991     __asm__ __volatile__ (
08992         "mov    r12, #0\n\t"
08993         "ldr    r4, [%[a], #0]\n\t"
08994         "ldr    r5, [%[a], #4]\n\t"
08995         "ldr    r6, [%[a], #8]\n\t"
08996         "ldr    r7, [%[a], #12]\n\t"
08997         "ldr    r8, [%[b], #0]\n\t"
08998         "ldr    r9, [%[b], #4]\n\t"
08999         "ldr    r10, [%[b], #8]\n\t"
09000         "ldr    r14, [%[b], #12]\n\t"
09001         "adds   r4, r4, r8\n\t"
09002         "adcs   r5, r5, r9\n\t"
09003         "adcs   r6, r6, r10\n\t"
09004         "adcs   r7, r7, r14\n\t"
09005         "str    r4, [%[r], #0]\n\t"
09006         "str    r5, [%[r], #4]\n\t"
09007         "str    r6, [%[r], #8]\n\t"
09008         "str    r7, [%[r], #12]\n\t"
09009         "ldr    r4, [%[a], #16]\n\t"
09010         "ldr    r5, [%[a], #20]\n\t"
09011         "ldr    r6, [%[a], #24]\n\t"
09012         "ldr    r7, [%[a], #28]\n\t"
09013         "ldr    r8, [%[b], #16]\n\t"
09014         "ldr    r9, [%[b], #20]\n\t"
09015         "ldr    r10, [%[b], #24]\n\t"
09016         "ldr    r14, [%[b], #28]\n\t"
09017         "adcs   r4, r4, r8\n\t"
09018         "adcs   r5, r5, r9\n\t"
09019         "adcs   r6, r6, r10\n\t"
09020         "adcs   r7, r7, r14\n\t"
09021         "str    r4, [%[r], #16]\n\t"
09022         "str    r5, [%[r], #20]\n\t"
09023         "str    r6, [%[r], #24]\n\t"
09024         "str    r7, [%[r], #28]\n\t"
09025         "ldr    r4, [%[a], #32]\n\t"
09026         "ldr    r5, [%[a], #36]\n\t"
09027         "ldr    r6, [%[a], #40]\n\t"
09028         "ldr    r7, [%[a], #44]\n\t"
09029         "ldr    r8, [%[b], #32]\n\t"
09030         "ldr    r9, [%[b], #36]\n\t"
09031         "ldr    r10, [%[b], #40]\n\t"
09032         "ldr    r14, [%[b], #44]\n\t"
09033         "adcs   r4, r4, r8\n\t"
09034         "adcs   r5, r5, r9\n\t"
09035         "adcs   r6, r6, r10\n\t"
09036         "adcs   r7, r7, r14\n\t"
09037         "str    r4, [%[r], #32]\n\t"
09038         "str    r5, [%[r], #36]\n\t"
09039         "str    r6, [%[r], #40]\n\t"
09040         "str    r7, [%[r], #44]\n\t"
09041         "ldr    r4, [%[a], #48]\n\t"
09042         "ldr    r5, [%[a], #52]\n\t"
09043         "ldr    r6, [%[a], #56]\n\t"
09044         "ldr    r7, [%[a], #60]\n\t"
09045         "ldr    r8, [%[b], #48]\n\t"
09046         "ldr    r9, [%[b], #52]\n\t"
09047         "ldr    r10, [%[b], #56]\n\t"
09048         "ldr    r14, [%[b], #60]\n\t"
09049         "adcs   r4, r4, r8\n\t"
09050         "adcs   r5, r5, r9\n\t"
09051         "adcs   r6, r6, r10\n\t"
09052         "adcs   r7, r7, r14\n\t"
09053         "str    r4, [%[r], #48]\n\t"
09054         "str    r5, [%[r], #52]\n\t"
09055         "str    r6, [%[r], #56]\n\t"
09056         "str    r7, [%[r], #60]\n\t"
09057         "ldr    r4, [%[a], #64]\n\t"
09058         "ldr    r5, [%[a], #68]\n\t"
09059         "ldr    r6, [%[a], #72]\n\t"
09060         "ldr    r7, [%[a], #76]\n\t"
09061         "ldr    r8, [%[b], #64]\n\t"
09062         "ldr    r9, [%[b], #68]\n\t"
09063         "ldr    r10, [%[b], #72]\n\t"
09064         "ldr    r14, [%[b], #76]\n\t"
09065         "adcs   r4, r4, r8\n\t"
09066         "adcs   r5, r5, r9\n\t"
09067         "adcs   r6, r6, r10\n\t"
09068         "adcs   r7, r7, r14\n\t"
09069         "str    r4, [%[r], #64]\n\t"
09070         "str    r5, [%[r], #68]\n\t"
09071         "str    r6, [%[r], #72]\n\t"
09072         "str    r7, [%[r], #76]\n\t"
09073         "ldr    r4, [%[a], #80]\n\t"
09074         "ldr    r5, [%[a], #84]\n\t"
09075         "ldr    r6, [%[a], #88]\n\t"
09076         "ldr    r7, [%[a], #92]\n\t"
09077         "ldr    r8, [%[b], #80]\n\t"
09078         "ldr    r9, [%[b], #84]\n\t"
09079         "ldr    r10, [%[b], #88]\n\t"
09080         "ldr    r14, [%[b], #92]\n\t"
09081         "adcs   r4, r4, r8\n\t"
09082         "adcs   r5, r5, r9\n\t"
09083         "adcs   r6, r6, r10\n\t"
09084         "adcs   r7, r7, r14\n\t"
09085         "str    r4, [%[r], #80]\n\t"
09086         "str    r5, [%[r], #84]\n\t"
09087         "str    r6, [%[r], #88]\n\t"
09088         "str    r7, [%[r], #92]\n\t"
09089         "ldr    r4, [%[a], #96]\n\t"
09090         "ldr    r5, [%[a], #100]\n\t"
09091         "ldr    r6, [%[a], #104]\n\t"
09092         "ldr    r7, [%[a], #108]\n\t"
09093         "ldr    r8, [%[b], #96]\n\t"
09094         "ldr    r9, [%[b], #100]\n\t"
09095         "ldr    r10, [%[b], #104]\n\t"
09096         "ldr    r14, [%[b], #108]\n\t"
09097         "adcs   r4, r4, r8\n\t"
09098         "adcs   r5, r5, r9\n\t"
09099         "adcs   r6, r6, r10\n\t"
09100         "adcs   r7, r7, r14\n\t"
09101         "str    r4, [%[r], #96]\n\t"
09102         "str    r5, [%[r], #100]\n\t"
09103         "str    r6, [%[r], #104]\n\t"
09104         "str    r7, [%[r], #108]\n\t"
09105         "ldr    r4, [%[a], #112]\n\t"
09106         "ldr    r5, [%[a], #116]\n\t"
09107         "ldr    r6, [%[a], #120]\n\t"
09108         "ldr    r7, [%[a], #124]\n\t"
09109         "ldr    r8, [%[b], #112]\n\t"
09110         "ldr    r9, [%[b], #116]\n\t"
09111         "ldr    r10, [%[b], #120]\n\t"
09112         "ldr    r14, [%[b], #124]\n\t"
09113         "adcs   r4, r4, r8\n\t"
09114         "adcs   r5, r5, r9\n\t"
09115         "adcs   r6, r6, r10\n\t"
09116         "adcs   r7, r7, r14\n\t"
09117         "str    r4, [%[r], #112]\n\t"
09118         "str    r5, [%[r], #116]\n\t"
09119         "str    r6, [%[r], #120]\n\t"
09120         "str    r7, [%[r], #124]\n\t"
09121         "ldr    r4, [%[a], #128]\n\t"
09122         "ldr    r5, [%[a], #132]\n\t"
09123         "ldr    r6, [%[a], #136]\n\t"
09124         "ldr    r7, [%[a], #140]\n\t"
09125         "ldr    r8, [%[b], #128]\n\t"
09126         "ldr    r9, [%[b], #132]\n\t"
09127         "ldr    r10, [%[b], #136]\n\t"
09128         "ldr    r14, [%[b], #140]\n\t"
09129         "adcs   r4, r4, r8\n\t"
09130         "adcs   r5, r5, r9\n\t"
09131         "adcs   r6, r6, r10\n\t"
09132         "adcs   r7, r7, r14\n\t"
09133         "str    r4, [%[r], #128]\n\t"
09134         "str    r5, [%[r], #132]\n\t"
09135         "str    r6, [%[r], #136]\n\t"
09136         "str    r7, [%[r], #140]\n\t"
09137         "ldr    r4, [%[a], #144]\n\t"
09138         "ldr    r5, [%[a], #148]\n\t"
09139         "ldr    r6, [%[a], #152]\n\t"
09140         "ldr    r7, [%[a], #156]\n\t"
09141         "ldr    r8, [%[b], #144]\n\t"
09142         "ldr    r9, [%[b], #148]\n\t"
09143         "ldr    r10, [%[b], #152]\n\t"
09144         "ldr    r14, [%[b], #156]\n\t"
09145         "adcs   r4, r4, r8\n\t"
09146         "adcs   r5, r5, r9\n\t"
09147         "adcs   r6, r6, r10\n\t"
09148         "adcs   r7, r7, r14\n\t"
09149         "str    r4, [%[r], #144]\n\t"
09150         "str    r5, [%[r], #148]\n\t"
09151         "str    r6, [%[r], #152]\n\t"
09152         "str    r7, [%[r], #156]\n\t"
09153         "ldr    r4, [%[a], #160]\n\t"
09154         "ldr    r5, [%[a], #164]\n\t"
09155         "ldr    r6, [%[a], #168]\n\t"
09156         "ldr    r7, [%[a], #172]\n\t"
09157         "ldr    r8, [%[b], #160]\n\t"
09158         "ldr    r9, [%[b], #164]\n\t"
09159         "ldr    r10, [%[b], #168]\n\t"
09160         "ldr    r14, [%[b], #172]\n\t"
09161         "adcs   r4, r4, r8\n\t"
09162         "adcs   r5, r5, r9\n\t"
09163         "adcs   r6, r6, r10\n\t"
09164         "adcs   r7, r7, r14\n\t"
09165         "str    r4, [%[r], #160]\n\t"
09166         "str    r5, [%[r], #164]\n\t"
09167         "str    r6, [%[r], #168]\n\t"
09168         "str    r7, [%[r], #172]\n\t"
09169         "ldr    r4, [%[a], #176]\n\t"
09170         "ldr    r5, [%[a], #180]\n\t"
09171         "ldr    r6, [%[a], #184]\n\t"
09172         "ldr    r7, [%[a], #188]\n\t"
09173         "ldr    r8, [%[b], #176]\n\t"
09174         "ldr    r9, [%[b], #180]\n\t"
09175         "ldr    r10, [%[b], #184]\n\t"
09176         "ldr    r14, [%[b], #188]\n\t"
09177         "adcs   r4, r4, r8\n\t"
09178         "adcs   r5, r5, r9\n\t"
09179         "adcs   r6, r6, r10\n\t"
09180         "adcs   r7, r7, r14\n\t"
09181         "str    r4, [%[r], #176]\n\t"
09182         "str    r5, [%[r], #180]\n\t"
09183         "str    r6, [%[r], #184]\n\t"
09184         "str    r7, [%[r], #188]\n\t"
09185         "adc    %[c], r12, r12\n\t"
09186         : [c] "+r" (c)
09187         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
09188         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
09189     );
09190 
09191     return c;
09192 }
09193 
09194 /* Sub b from a into a. (a -= b)
09195  *
09196  * a  A single precision integer and result.
09197  * b  A single precision integer.
09198  */
09199 static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
09200 {
09201     sp_digit c = 0;
09202 
09203     __asm__ __volatile__ (
09204         "ldr    r2, [%[a], #0]\n\t"
09205         "ldr    r3, [%[a], #4]\n\t"
09206         "ldr    r4, [%[a], #8]\n\t"
09207         "ldr    r5, [%[a], #12]\n\t"
09208         "ldr    r6, [%[b], #0]\n\t"
09209         "ldr    r7, [%[b], #4]\n\t"
09210         "ldr    r8, [%[b], #8]\n\t"
09211         "ldr    r9, [%[b], #12]\n\t"
09212         "subs   r2, r2, r6\n\t"
09213         "sbcs   r3, r3, r7\n\t"
09214         "sbcs   r4, r4, r8\n\t"
09215         "sbcs   r5, r5, r9\n\t"
09216         "str    r2, [%[a], #0]\n\t"
09217         "str    r3, [%[a], #4]\n\t"
09218         "str    r4, [%[a], #8]\n\t"
09219         "str    r5, [%[a], #12]\n\t"
09220         "ldr    r2, [%[a], #16]\n\t"
09221         "ldr    r3, [%[a], #20]\n\t"
09222         "ldr    r4, [%[a], #24]\n\t"
09223         "ldr    r5, [%[a], #28]\n\t"
09224         "ldr    r6, [%[b], #16]\n\t"
09225         "ldr    r7, [%[b], #20]\n\t"
09226         "ldr    r8, [%[b], #24]\n\t"
09227         "ldr    r9, [%[b], #28]\n\t"
09228         "sbcs   r2, r2, r6\n\t"
09229         "sbcs   r3, r3, r7\n\t"
09230         "sbcs   r4, r4, r8\n\t"
09231         "sbcs   r5, r5, r9\n\t"
09232         "str    r2, [%[a], #16]\n\t"
09233         "str    r3, [%[a], #20]\n\t"
09234         "str    r4, [%[a], #24]\n\t"
09235         "str    r5, [%[a], #28]\n\t"
09236         "ldr    r2, [%[a], #32]\n\t"
09237         "ldr    r3, [%[a], #36]\n\t"
09238         "ldr    r4, [%[a], #40]\n\t"
09239         "ldr    r5, [%[a], #44]\n\t"
09240         "ldr    r6, [%[b], #32]\n\t"
09241         "ldr    r7, [%[b], #36]\n\t"
09242         "ldr    r8, [%[b], #40]\n\t"
09243         "ldr    r9, [%[b], #44]\n\t"
09244         "sbcs   r2, r2, r6\n\t"
09245         "sbcs   r3, r3, r7\n\t"
09246         "sbcs   r4, r4, r8\n\t"
09247         "sbcs   r5, r5, r9\n\t"
09248         "str    r2, [%[a], #32]\n\t"
09249         "str    r3, [%[a], #36]\n\t"
09250         "str    r4, [%[a], #40]\n\t"
09251         "str    r5, [%[a], #44]\n\t"
09252         "ldr    r2, [%[a], #48]\n\t"
09253         "ldr    r3, [%[a], #52]\n\t"
09254         "ldr    r4, [%[a], #56]\n\t"
09255         "ldr    r5, [%[a], #60]\n\t"
09256         "ldr    r6, [%[b], #48]\n\t"
09257         "ldr    r7, [%[b], #52]\n\t"
09258         "ldr    r8, [%[b], #56]\n\t"
09259         "ldr    r9, [%[b], #60]\n\t"
09260         "sbcs   r2, r2, r6\n\t"
09261         "sbcs   r3, r3, r7\n\t"
09262         "sbcs   r4, r4, r8\n\t"
09263         "sbcs   r5, r5, r9\n\t"
09264         "str    r2, [%[a], #48]\n\t"
09265         "str    r3, [%[a], #52]\n\t"
09266         "str    r4, [%[a], #56]\n\t"
09267         "str    r5, [%[a], #60]\n\t"
09268         "ldr    r2, [%[a], #64]\n\t"
09269         "ldr    r3, [%[a], #68]\n\t"
09270         "ldr    r4, [%[a], #72]\n\t"
09271         "ldr    r5, [%[a], #76]\n\t"
09272         "ldr    r6, [%[b], #64]\n\t"
09273         "ldr    r7, [%[b], #68]\n\t"
09274         "ldr    r8, [%[b], #72]\n\t"
09275         "ldr    r9, [%[b], #76]\n\t"
09276         "sbcs   r2, r2, r6\n\t"
09277         "sbcs   r3, r3, r7\n\t"
09278         "sbcs   r4, r4, r8\n\t"
09279         "sbcs   r5, r5, r9\n\t"
09280         "str    r2, [%[a], #64]\n\t"
09281         "str    r3, [%[a], #68]\n\t"
09282         "str    r4, [%[a], #72]\n\t"
09283         "str    r5, [%[a], #76]\n\t"
09284         "ldr    r2, [%[a], #80]\n\t"
09285         "ldr    r3, [%[a], #84]\n\t"
09286         "ldr    r4, [%[a], #88]\n\t"
09287         "ldr    r5, [%[a], #92]\n\t"
09288         "ldr    r6, [%[b], #80]\n\t"
09289         "ldr    r7, [%[b], #84]\n\t"
09290         "ldr    r8, [%[b], #88]\n\t"
09291         "ldr    r9, [%[b], #92]\n\t"
09292         "sbcs   r2, r2, r6\n\t"
09293         "sbcs   r3, r3, r7\n\t"
09294         "sbcs   r4, r4, r8\n\t"
09295         "sbcs   r5, r5, r9\n\t"
09296         "str    r2, [%[a], #80]\n\t"
09297         "str    r3, [%[a], #84]\n\t"
09298         "str    r4, [%[a], #88]\n\t"
09299         "str    r5, [%[a], #92]\n\t"
09300         "ldr    r2, [%[a], #96]\n\t"
09301         "ldr    r3, [%[a], #100]\n\t"
09302         "ldr    r4, [%[a], #104]\n\t"
09303         "ldr    r5, [%[a], #108]\n\t"
09304         "ldr    r6, [%[b], #96]\n\t"
09305         "ldr    r7, [%[b], #100]\n\t"
09306         "ldr    r8, [%[b], #104]\n\t"
09307         "ldr    r9, [%[b], #108]\n\t"
09308         "sbcs   r2, r2, r6\n\t"
09309         "sbcs   r3, r3, r7\n\t"
09310         "sbcs   r4, r4, r8\n\t"
09311         "sbcs   r5, r5, r9\n\t"
09312         "str    r2, [%[a], #96]\n\t"
09313         "str    r3, [%[a], #100]\n\t"
09314         "str    r4, [%[a], #104]\n\t"
09315         "str    r5, [%[a], #108]\n\t"
09316         "ldr    r2, [%[a], #112]\n\t"
09317         "ldr    r3, [%[a], #116]\n\t"
09318         "ldr    r4, [%[a], #120]\n\t"
09319         "ldr    r5, [%[a], #124]\n\t"
09320         "ldr    r6, [%[b], #112]\n\t"
09321         "ldr    r7, [%[b], #116]\n\t"
09322         "ldr    r8, [%[b], #120]\n\t"
09323         "ldr    r9, [%[b], #124]\n\t"
09324         "sbcs   r2, r2, r6\n\t"
09325         "sbcs   r3, r3, r7\n\t"
09326         "sbcs   r4, r4, r8\n\t"
09327         "sbcs   r5, r5, r9\n\t"
09328         "str    r2, [%[a], #112]\n\t"
09329         "str    r3, [%[a], #116]\n\t"
09330         "str    r4, [%[a], #120]\n\t"
09331         "str    r5, [%[a], #124]\n\t"
09332         "ldr    r2, [%[a], #128]\n\t"
09333         "ldr    r3, [%[a], #132]\n\t"
09334         "ldr    r4, [%[a], #136]\n\t"
09335         "ldr    r5, [%[a], #140]\n\t"
09336         "ldr    r6, [%[b], #128]\n\t"
09337         "ldr    r7, [%[b], #132]\n\t"
09338         "ldr    r8, [%[b], #136]\n\t"
09339         "ldr    r9, [%[b], #140]\n\t"
09340         "sbcs   r2, r2, r6\n\t"
09341         "sbcs   r3, r3, r7\n\t"
09342         "sbcs   r4, r4, r8\n\t"
09343         "sbcs   r5, r5, r9\n\t"
09344         "str    r2, [%[a], #128]\n\t"
09345         "str    r3, [%[a], #132]\n\t"
09346         "str    r4, [%[a], #136]\n\t"
09347         "str    r5, [%[a], #140]\n\t"
09348         "ldr    r2, [%[a], #144]\n\t"
09349         "ldr    r3, [%[a], #148]\n\t"
09350         "ldr    r4, [%[a], #152]\n\t"
09351         "ldr    r5, [%[a], #156]\n\t"
09352         "ldr    r6, [%[b], #144]\n\t"
09353         "ldr    r7, [%[b], #148]\n\t"
09354         "ldr    r8, [%[b], #152]\n\t"
09355         "ldr    r9, [%[b], #156]\n\t"
09356         "sbcs   r2, r2, r6\n\t"
09357         "sbcs   r3, r3, r7\n\t"
09358         "sbcs   r4, r4, r8\n\t"
09359         "sbcs   r5, r5, r9\n\t"
09360         "str    r2, [%[a], #144]\n\t"
09361         "str    r3, [%[a], #148]\n\t"
09362         "str    r4, [%[a], #152]\n\t"
09363         "str    r5, [%[a], #156]\n\t"
09364         "ldr    r2, [%[a], #160]\n\t"
09365         "ldr    r3, [%[a], #164]\n\t"
09366         "ldr    r4, [%[a], #168]\n\t"
09367         "ldr    r5, [%[a], #172]\n\t"
09368         "ldr    r6, [%[b], #160]\n\t"
09369         "ldr    r7, [%[b], #164]\n\t"
09370         "ldr    r8, [%[b], #168]\n\t"
09371         "ldr    r9, [%[b], #172]\n\t"
09372         "sbcs   r2, r2, r6\n\t"
09373         "sbcs   r3, r3, r7\n\t"
09374         "sbcs   r4, r4, r8\n\t"
09375         "sbcs   r5, r5, r9\n\t"
09376         "str    r2, [%[a], #160]\n\t"
09377         "str    r3, [%[a], #164]\n\t"
09378         "str    r4, [%[a], #168]\n\t"
09379         "str    r5, [%[a], #172]\n\t"
09380         "ldr    r2, [%[a], #176]\n\t"
09381         "ldr    r3, [%[a], #180]\n\t"
09382         "ldr    r4, [%[a], #184]\n\t"
09383         "ldr    r5, [%[a], #188]\n\t"
09384         "ldr    r6, [%[b], #176]\n\t"
09385         "ldr    r7, [%[b], #180]\n\t"
09386         "ldr    r8, [%[b], #184]\n\t"
09387         "ldr    r9, [%[b], #188]\n\t"
09388         "sbcs   r2, r2, r6\n\t"
09389         "sbcs   r3, r3, r7\n\t"
09390         "sbcs   r4, r4, r8\n\t"
09391         "sbcs   r5, r5, r9\n\t"
09392         "str    r2, [%[a], #176]\n\t"
09393         "str    r3, [%[a], #180]\n\t"
09394         "str    r4, [%[a], #184]\n\t"
09395         "str    r5, [%[a], #188]\n\t"
09396         "ldr    r2, [%[a], #192]\n\t"
09397         "ldr    r3, [%[a], #196]\n\t"
09398         "ldr    r4, [%[a], #200]\n\t"
09399         "ldr    r5, [%[a], #204]\n\t"
09400         "ldr    r6, [%[b], #192]\n\t"
09401         "ldr    r7, [%[b], #196]\n\t"
09402         "ldr    r8, [%[b], #200]\n\t"
09403         "ldr    r9, [%[b], #204]\n\t"
09404         "sbcs   r2, r2, r6\n\t"
09405         "sbcs   r3, r3, r7\n\t"
09406         "sbcs   r4, r4, r8\n\t"
09407         "sbcs   r5, r5, r9\n\t"
09408         "str    r2, [%[a], #192]\n\t"
09409         "str    r3, [%[a], #196]\n\t"
09410         "str    r4, [%[a], #200]\n\t"
09411         "str    r5, [%[a], #204]\n\t"
09412         "ldr    r2, [%[a], #208]\n\t"
09413         "ldr    r3, [%[a], #212]\n\t"
09414         "ldr    r4, [%[a], #216]\n\t"
09415         "ldr    r5, [%[a], #220]\n\t"
09416         "ldr    r6, [%[b], #208]\n\t"
09417         "ldr    r7, [%[b], #212]\n\t"
09418         "ldr    r8, [%[b], #216]\n\t"
09419         "ldr    r9, [%[b], #220]\n\t"
09420         "sbcs   r2, r2, r6\n\t"
09421         "sbcs   r3, r3, r7\n\t"
09422         "sbcs   r4, r4, r8\n\t"
09423         "sbcs   r5, r5, r9\n\t"
09424         "str    r2, [%[a], #208]\n\t"
09425         "str    r3, [%[a], #212]\n\t"
09426         "str    r4, [%[a], #216]\n\t"
09427         "str    r5, [%[a], #220]\n\t"
09428         "ldr    r2, [%[a], #224]\n\t"
09429         "ldr    r3, [%[a], #228]\n\t"
09430         "ldr    r4, [%[a], #232]\n\t"
09431         "ldr    r5, [%[a], #236]\n\t"
09432         "ldr    r6, [%[b], #224]\n\t"
09433         "ldr    r7, [%[b], #228]\n\t"
09434         "ldr    r8, [%[b], #232]\n\t"
09435         "ldr    r9, [%[b], #236]\n\t"
09436         "sbcs   r2, r2, r6\n\t"
09437         "sbcs   r3, r3, r7\n\t"
09438         "sbcs   r4, r4, r8\n\t"
09439         "sbcs   r5, r5, r9\n\t"
09440         "str    r2, [%[a], #224]\n\t"
09441         "str    r3, [%[a], #228]\n\t"
09442         "str    r4, [%[a], #232]\n\t"
09443         "str    r5, [%[a], #236]\n\t"
09444         "ldr    r2, [%[a], #240]\n\t"
09445         "ldr    r3, [%[a], #244]\n\t"
09446         "ldr    r4, [%[a], #248]\n\t"
09447         "ldr    r5, [%[a], #252]\n\t"
09448         "ldr    r6, [%[b], #240]\n\t"
09449         "ldr    r7, [%[b], #244]\n\t"
09450         "ldr    r8, [%[b], #248]\n\t"
09451         "ldr    r9, [%[b], #252]\n\t"
09452         "sbcs   r2, r2, r6\n\t"
09453         "sbcs   r3, r3, r7\n\t"
09454         "sbcs   r4, r4, r8\n\t"
09455         "sbcs   r5, r5, r9\n\t"
09456         "str    r2, [%[a], #240]\n\t"
09457         "str    r3, [%[a], #244]\n\t"
09458         "str    r4, [%[a], #248]\n\t"
09459         "str    r5, [%[a], #252]\n\t"
09460         "ldr    r2, [%[a], #256]\n\t"
09461         "ldr    r3, [%[a], #260]\n\t"
09462         "ldr    r4, [%[a], #264]\n\t"
09463         "ldr    r5, [%[a], #268]\n\t"
09464         "ldr    r6, [%[b], #256]\n\t"
09465         "ldr    r7, [%[b], #260]\n\t"
09466         "ldr    r8, [%[b], #264]\n\t"
09467         "ldr    r9, [%[b], #268]\n\t"
09468         "sbcs   r2, r2, r6\n\t"
09469         "sbcs   r3, r3, r7\n\t"
09470         "sbcs   r4, r4, r8\n\t"
09471         "sbcs   r5, r5, r9\n\t"
09472         "str    r2, [%[a], #256]\n\t"
09473         "str    r3, [%[a], #260]\n\t"
09474         "str    r4, [%[a], #264]\n\t"
09475         "str    r5, [%[a], #268]\n\t"
09476         "ldr    r2, [%[a], #272]\n\t"
09477         "ldr    r3, [%[a], #276]\n\t"
09478         "ldr    r4, [%[a], #280]\n\t"
09479         "ldr    r5, [%[a], #284]\n\t"
09480         "ldr    r6, [%[b], #272]\n\t"
09481         "ldr    r7, [%[b], #276]\n\t"
09482         "ldr    r8, [%[b], #280]\n\t"
09483         "ldr    r9, [%[b], #284]\n\t"
09484         "sbcs   r2, r2, r6\n\t"
09485         "sbcs   r3, r3, r7\n\t"
09486         "sbcs   r4, r4, r8\n\t"
09487         "sbcs   r5, r5, r9\n\t"
09488         "str    r2, [%[a], #272]\n\t"
09489         "str    r3, [%[a], #276]\n\t"
09490         "str    r4, [%[a], #280]\n\t"
09491         "str    r5, [%[a], #284]\n\t"
09492         "ldr    r2, [%[a], #288]\n\t"
09493         "ldr    r3, [%[a], #292]\n\t"
09494         "ldr    r4, [%[a], #296]\n\t"
09495         "ldr    r5, [%[a], #300]\n\t"
09496         "ldr    r6, [%[b], #288]\n\t"
09497         "ldr    r7, [%[b], #292]\n\t"
09498         "ldr    r8, [%[b], #296]\n\t"
09499         "ldr    r9, [%[b], #300]\n\t"
09500         "sbcs   r2, r2, r6\n\t"
09501         "sbcs   r3, r3, r7\n\t"
09502         "sbcs   r4, r4, r8\n\t"
09503         "sbcs   r5, r5, r9\n\t"
09504         "str    r2, [%[a], #288]\n\t"
09505         "str    r3, [%[a], #292]\n\t"
09506         "str    r4, [%[a], #296]\n\t"
09507         "str    r5, [%[a], #300]\n\t"
09508         "ldr    r2, [%[a], #304]\n\t"
09509         "ldr    r3, [%[a], #308]\n\t"
09510         "ldr    r4, [%[a], #312]\n\t"
09511         "ldr    r5, [%[a], #316]\n\t"
09512         "ldr    r6, [%[b], #304]\n\t"
09513         "ldr    r7, [%[b], #308]\n\t"
09514         "ldr    r8, [%[b], #312]\n\t"
09515         "ldr    r9, [%[b], #316]\n\t"
09516         "sbcs   r2, r2, r6\n\t"
09517         "sbcs   r3, r3, r7\n\t"
09518         "sbcs   r4, r4, r8\n\t"
09519         "sbcs   r5, r5, r9\n\t"
09520         "str    r2, [%[a], #304]\n\t"
09521         "str    r3, [%[a], #308]\n\t"
09522         "str    r4, [%[a], #312]\n\t"
09523         "str    r5, [%[a], #316]\n\t"
09524         "ldr    r2, [%[a], #320]\n\t"
09525         "ldr    r3, [%[a], #324]\n\t"
09526         "ldr    r4, [%[a], #328]\n\t"
09527         "ldr    r5, [%[a], #332]\n\t"
09528         "ldr    r6, [%[b], #320]\n\t"
09529         "ldr    r7, [%[b], #324]\n\t"
09530         "ldr    r8, [%[b], #328]\n\t"
09531         "ldr    r9, [%[b], #332]\n\t"
09532         "sbcs   r2, r2, r6\n\t"
09533         "sbcs   r3, r3, r7\n\t"
09534         "sbcs   r4, r4, r8\n\t"
09535         "sbcs   r5, r5, r9\n\t"
09536         "str    r2, [%[a], #320]\n\t"
09537         "str    r3, [%[a], #324]\n\t"
09538         "str    r4, [%[a], #328]\n\t"
09539         "str    r5, [%[a], #332]\n\t"
09540         "ldr    r2, [%[a], #336]\n\t"
09541         "ldr    r3, [%[a], #340]\n\t"
09542         "ldr    r4, [%[a], #344]\n\t"
09543         "ldr    r5, [%[a], #348]\n\t"
09544         "ldr    r6, [%[b], #336]\n\t"
09545         "ldr    r7, [%[b], #340]\n\t"
09546         "ldr    r8, [%[b], #344]\n\t"
09547         "ldr    r9, [%[b], #348]\n\t"
09548         "sbcs   r2, r2, r6\n\t"
09549         "sbcs   r3, r3, r7\n\t"
09550         "sbcs   r4, r4, r8\n\t"
09551         "sbcs   r5, r5, r9\n\t"
09552         "str    r2, [%[a], #336]\n\t"
09553         "str    r3, [%[a], #340]\n\t"
09554         "str    r4, [%[a], #344]\n\t"
09555         "str    r5, [%[a], #348]\n\t"
09556         "ldr    r2, [%[a], #352]\n\t"
09557         "ldr    r3, [%[a], #356]\n\t"
09558         "ldr    r4, [%[a], #360]\n\t"
09559         "ldr    r5, [%[a], #364]\n\t"
09560         "ldr    r6, [%[b], #352]\n\t"
09561         "ldr    r7, [%[b], #356]\n\t"
09562         "ldr    r8, [%[b], #360]\n\t"
09563         "ldr    r9, [%[b], #364]\n\t"
09564         "sbcs   r2, r2, r6\n\t"
09565         "sbcs   r3, r3, r7\n\t"
09566         "sbcs   r4, r4, r8\n\t"
09567         "sbcs   r5, r5, r9\n\t"
09568         "str    r2, [%[a], #352]\n\t"
09569         "str    r3, [%[a], #356]\n\t"
09570         "str    r4, [%[a], #360]\n\t"
09571         "str    r5, [%[a], #364]\n\t"
09572         "ldr    r2, [%[a], #368]\n\t"
09573         "ldr    r3, [%[a], #372]\n\t"
09574         "ldr    r4, [%[a], #376]\n\t"
09575         "ldr    r5, [%[a], #380]\n\t"
09576         "ldr    r6, [%[b], #368]\n\t"
09577         "ldr    r7, [%[b], #372]\n\t"
09578         "ldr    r8, [%[b], #376]\n\t"
09579         "ldr    r9, [%[b], #380]\n\t"
09580         "sbcs   r2, r2, r6\n\t"
09581         "sbcs   r3, r3, r7\n\t"
09582         "sbcs   r4, r4, r8\n\t"
09583         "sbcs   r5, r5, r9\n\t"
09584         "str    r2, [%[a], #368]\n\t"
09585         "str    r3, [%[a], #372]\n\t"
09586         "str    r4, [%[a], #376]\n\t"
09587         "str    r5, [%[a], #380]\n\t"
09588         "sbc    %[c], r9, r9\n\t"
09589         : [c] "+r" (c)
09590         : [a] "r" (a), [b] "r" (b)
09591         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
09592     );
09593 
09594     return c;
09595 }
09596 
09597 /* Add b to a into r. (r = a + b)
09598  *
09599  * r  A single precision integer.
09600  * a  A single precision integer.
09601  * b  A single precision integer.
09602  */
09603 static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
09604         const sp_digit* b)
09605 {
09606     sp_digit c = 0;
09607 
09608     __asm__ __volatile__ (
09609         "mov    r12, #0\n\t"
09610         "ldr    r4, [%[a], #0]\n\t"
09611         "ldr    r5, [%[a], #4]\n\t"
09612         "ldr    r6, [%[a], #8]\n\t"
09613         "ldr    r7, [%[a], #12]\n\t"
09614         "ldr    r8, [%[b], #0]\n\t"
09615         "ldr    r9, [%[b], #4]\n\t"
09616         "ldr    r10, [%[b], #8]\n\t"
09617         "ldr    r14, [%[b], #12]\n\t"
09618         "adds   r4, r4, r8\n\t"
09619         "adcs   r5, r5, r9\n\t"
09620         "adcs   r6, r6, r10\n\t"
09621         "adcs   r7, r7, r14\n\t"
09622         "str    r4, [%[r], #0]\n\t"
09623         "str    r5, [%[r], #4]\n\t"
09624         "str    r6, [%[r], #8]\n\t"
09625         "str    r7, [%[r], #12]\n\t"
09626         "ldr    r4, [%[a], #16]\n\t"
09627         "ldr    r5, [%[a], #20]\n\t"
09628         "ldr    r6, [%[a], #24]\n\t"
09629         "ldr    r7, [%[a], #28]\n\t"
09630         "ldr    r8, [%[b], #16]\n\t"
09631         "ldr    r9, [%[b], #20]\n\t"
09632         "ldr    r10, [%[b], #24]\n\t"
09633         "ldr    r14, [%[b], #28]\n\t"
09634         "adcs   r4, r4, r8\n\t"
09635         "adcs   r5, r5, r9\n\t"
09636         "adcs   r6, r6, r10\n\t"
09637         "adcs   r7, r7, r14\n\t"
09638         "str    r4, [%[r], #16]\n\t"
09639         "str    r5, [%[r], #20]\n\t"
09640         "str    r6, [%[r], #24]\n\t"
09641         "str    r7, [%[r], #28]\n\t"
09642         "ldr    r4, [%[a], #32]\n\t"
09643         "ldr    r5, [%[a], #36]\n\t"
09644         "ldr    r6, [%[a], #40]\n\t"
09645         "ldr    r7, [%[a], #44]\n\t"
09646         "ldr    r8, [%[b], #32]\n\t"
09647         "ldr    r9, [%[b], #36]\n\t"
09648         "ldr    r10, [%[b], #40]\n\t"
09649         "ldr    r14, [%[b], #44]\n\t"
09650         "adcs   r4, r4, r8\n\t"
09651         "adcs   r5, r5, r9\n\t"
09652         "adcs   r6, r6, r10\n\t"
09653         "adcs   r7, r7, r14\n\t"
09654         "str    r4, [%[r], #32]\n\t"
09655         "str    r5, [%[r], #36]\n\t"
09656         "str    r6, [%[r], #40]\n\t"
09657         "str    r7, [%[r], #44]\n\t"
09658         "ldr    r4, [%[a], #48]\n\t"
09659         "ldr    r5, [%[a], #52]\n\t"
09660         "ldr    r6, [%[a], #56]\n\t"
09661         "ldr    r7, [%[a], #60]\n\t"
09662         "ldr    r8, [%[b], #48]\n\t"
09663         "ldr    r9, [%[b], #52]\n\t"
09664         "ldr    r10, [%[b], #56]\n\t"
09665         "ldr    r14, [%[b], #60]\n\t"
09666         "adcs   r4, r4, r8\n\t"
09667         "adcs   r5, r5, r9\n\t"
09668         "adcs   r6, r6, r10\n\t"
09669         "adcs   r7, r7, r14\n\t"
09670         "str    r4, [%[r], #48]\n\t"
09671         "str    r5, [%[r], #52]\n\t"
09672         "str    r6, [%[r], #56]\n\t"
09673         "str    r7, [%[r], #60]\n\t"
09674         "ldr    r4, [%[a], #64]\n\t"
09675         "ldr    r5, [%[a], #68]\n\t"
09676         "ldr    r6, [%[a], #72]\n\t"
09677         "ldr    r7, [%[a], #76]\n\t"
09678         "ldr    r8, [%[b], #64]\n\t"
09679         "ldr    r9, [%[b], #68]\n\t"
09680         "ldr    r10, [%[b], #72]\n\t"
09681         "ldr    r14, [%[b], #76]\n\t"
09682         "adcs   r4, r4, r8\n\t"
09683         "adcs   r5, r5, r9\n\t"
09684         "adcs   r6, r6, r10\n\t"
09685         "adcs   r7, r7, r14\n\t"
09686         "str    r4, [%[r], #64]\n\t"
09687         "str    r5, [%[r], #68]\n\t"
09688         "str    r6, [%[r], #72]\n\t"
09689         "str    r7, [%[r], #76]\n\t"
09690         "ldr    r4, [%[a], #80]\n\t"
09691         "ldr    r5, [%[a], #84]\n\t"
09692         "ldr    r6, [%[a], #88]\n\t"
09693         "ldr    r7, [%[a], #92]\n\t"
09694         "ldr    r8, [%[b], #80]\n\t"
09695         "ldr    r9, [%[b], #84]\n\t"
09696         "ldr    r10, [%[b], #88]\n\t"
09697         "ldr    r14, [%[b], #92]\n\t"
09698         "adcs   r4, r4, r8\n\t"
09699         "adcs   r5, r5, r9\n\t"
09700         "adcs   r6, r6, r10\n\t"
09701         "adcs   r7, r7, r14\n\t"
09702         "str    r4, [%[r], #80]\n\t"
09703         "str    r5, [%[r], #84]\n\t"
09704         "str    r6, [%[r], #88]\n\t"
09705         "str    r7, [%[r], #92]\n\t"
09706         "ldr    r4, [%[a], #96]\n\t"
09707         "ldr    r5, [%[a], #100]\n\t"
09708         "ldr    r6, [%[a], #104]\n\t"
09709         "ldr    r7, [%[a], #108]\n\t"
09710         "ldr    r8, [%[b], #96]\n\t"
09711         "ldr    r9, [%[b], #100]\n\t"
09712         "ldr    r10, [%[b], #104]\n\t"
09713         "ldr    r14, [%[b], #108]\n\t"
09714         "adcs   r4, r4, r8\n\t"
09715         "adcs   r5, r5, r9\n\t"
09716         "adcs   r6, r6, r10\n\t"
09717         "adcs   r7, r7, r14\n\t"
09718         "str    r4, [%[r], #96]\n\t"
09719         "str    r5, [%[r], #100]\n\t"
09720         "str    r6, [%[r], #104]\n\t"
09721         "str    r7, [%[r], #108]\n\t"
09722         "ldr    r4, [%[a], #112]\n\t"
09723         "ldr    r5, [%[a], #116]\n\t"
09724         "ldr    r6, [%[a], #120]\n\t"
09725         "ldr    r7, [%[a], #124]\n\t"
09726         "ldr    r8, [%[b], #112]\n\t"
09727         "ldr    r9, [%[b], #116]\n\t"
09728         "ldr    r10, [%[b], #120]\n\t"
09729         "ldr    r14, [%[b], #124]\n\t"
09730         "adcs   r4, r4, r8\n\t"
09731         "adcs   r5, r5, r9\n\t"
09732         "adcs   r6, r6, r10\n\t"
09733         "adcs   r7, r7, r14\n\t"
09734         "str    r4, [%[r], #112]\n\t"
09735         "str    r5, [%[r], #116]\n\t"
09736         "str    r6, [%[r], #120]\n\t"
09737         "str    r7, [%[r], #124]\n\t"
09738         "ldr    r4, [%[a], #128]\n\t"
09739         "ldr    r5, [%[a], #132]\n\t"
09740         "ldr    r6, [%[a], #136]\n\t"
09741         "ldr    r7, [%[a], #140]\n\t"
09742         "ldr    r8, [%[b], #128]\n\t"
09743         "ldr    r9, [%[b], #132]\n\t"
09744         "ldr    r10, [%[b], #136]\n\t"
09745         "ldr    r14, [%[b], #140]\n\t"
09746         "adcs   r4, r4, r8\n\t"
09747         "adcs   r5, r5, r9\n\t"
09748         "adcs   r6, r6, r10\n\t"
09749         "adcs   r7, r7, r14\n\t"
09750         "str    r4, [%[r], #128]\n\t"
09751         "str    r5, [%[r], #132]\n\t"
09752         "str    r6, [%[r], #136]\n\t"
09753         "str    r7, [%[r], #140]\n\t"
09754         "ldr    r4, [%[a], #144]\n\t"
09755         "ldr    r5, [%[a], #148]\n\t"
09756         "ldr    r6, [%[a], #152]\n\t"
09757         "ldr    r7, [%[a], #156]\n\t"
09758         "ldr    r8, [%[b], #144]\n\t"
09759         "ldr    r9, [%[b], #148]\n\t"
09760         "ldr    r10, [%[b], #152]\n\t"
09761         "ldr    r14, [%[b], #156]\n\t"
09762         "adcs   r4, r4, r8\n\t"
09763         "adcs   r5, r5, r9\n\t"
09764         "adcs   r6, r6, r10\n\t"
09765         "adcs   r7, r7, r14\n\t"
09766         "str    r4, [%[r], #144]\n\t"
09767         "str    r5, [%[r], #148]\n\t"
09768         "str    r6, [%[r], #152]\n\t"
09769         "str    r7, [%[r], #156]\n\t"
09770         "ldr    r4, [%[a], #160]\n\t"
09771         "ldr    r5, [%[a], #164]\n\t"
09772         "ldr    r6, [%[a], #168]\n\t"
09773         "ldr    r7, [%[a], #172]\n\t"
09774         "ldr    r8, [%[b], #160]\n\t"
09775         "ldr    r9, [%[b], #164]\n\t"
09776         "ldr    r10, [%[b], #168]\n\t"
09777         "ldr    r14, [%[b], #172]\n\t"
09778         "adcs   r4, r4, r8\n\t"
09779         "adcs   r5, r5, r9\n\t"
09780         "adcs   r6, r6, r10\n\t"
09781         "adcs   r7, r7, r14\n\t"
09782         "str    r4, [%[r], #160]\n\t"
09783         "str    r5, [%[r], #164]\n\t"
09784         "str    r6, [%[r], #168]\n\t"
09785         "str    r7, [%[r], #172]\n\t"
09786         "ldr    r4, [%[a], #176]\n\t"
09787         "ldr    r5, [%[a], #180]\n\t"
09788         "ldr    r6, [%[a], #184]\n\t"
09789         "ldr    r7, [%[a], #188]\n\t"
09790         "ldr    r8, [%[b], #176]\n\t"
09791         "ldr    r9, [%[b], #180]\n\t"
09792         "ldr    r10, [%[b], #184]\n\t"
09793         "ldr    r14, [%[b], #188]\n\t"
09794         "adcs   r4, r4, r8\n\t"
09795         "adcs   r5, r5, r9\n\t"
09796         "adcs   r6, r6, r10\n\t"
09797         "adcs   r7, r7, r14\n\t"
09798         "str    r4, [%[r], #176]\n\t"
09799         "str    r5, [%[r], #180]\n\t"
09800         "str    r6, [%[r], #184]\n\t"
09801         "str    r7, [%[r], #188]\n\t"
09802         "ldr    r4, [%[a], #192]\n\t"
09803         "ldr    r5, [%[a], #196]\n\t"
09804         "ldr    r6, [%[a], #200]\n\t"
09805         "ldr    r7, [%[a], #204]\n\t"
09806         "ldr    r8, [%[b], #192]\n\t"
09807         "ldr    r9, [%[b], #196]\n\t"
09808         "ldr    r10, [%[b], #200]\n\t"
09809         "ldr    r14, [%[b], #204]\n\t"
09810         "adcs   r4, r4, r8\n\t"
09811         "adcs   r5, r5, r9\n\t"
09812         "adcs   r6, r6, r10\n\t"
09813         "adcs   r7, r7, r14\n\t"
09814         "str    r4, [%[r], #192]\n\t"
09815         "str    r5, [%[r], #196]\n\t"
09816         "str    r6, [%[r], #200]\n\t"
09817         "str    r7, [%[r], #204]\n\t"
09818         "ldr    r4, [%[a], #208]\n\t"
09819         "ldr    r5, [%[a], #212]\n\t"
09820         "ldr    r6, [%[a], #216]\n\t"
09821         "ldr    r7, [%[a], #220]\n\t"
09822         "ldr    r8, [%[b], #208]\n\t"
09823         "ldr    r9, [%[b], #212]\n\t"
09824         "ldr    r10, [%[b], #216]\n\t"
09825         "ldr    r14, [%[b], #220]\n\t"
09826         "adcs   r4, r4, r8\n\t"
09827         "adcs   r5, r5, r9\n\t"
09828         "adcs   r6, r6, r10\n\t"
09829         "adcs   r7, r7, r14\n\t"
09830         "str    r4, [%[r], #208]\n\t"
09831         "str    r5, [%[r], #212]\n\t"
09832         "str    r6, [%[r], #216]\n\t"
09833         "str    r7, [%[r], #220]\n\t"
09834         "ldr    r4, [%[a], #224]\n\t"
09835         "ldr    r5, [%[a], #228]\n\t"
09836         "ldr    r6, [%[a], #232]\n\t"
09837         "ldr    r7, [%[a], #236]\n\t"
09838         "ldr    r8, [%[b], #224]\n\t"
09839         "ldr    r9, [%[b], #228]\n\t"
09840         "ldr    r10, [%[b], #232]\n\t"
09841         "ldr    r14, [%[b], #236]\n\t"
09842         "adcs   r4, r4, r8\n\t"
09843         "adcs   r5, r5, r9\n\t"
09844         "adcs   r6, r6, r10\n\t"
09845         "adcs   r7, r7, r14\n\t"
09846         "str    r4, [%[r], #224]\n\t"
09847         "str    r5, [%[r], #228]\n\t"
09848         "str    r6, [%[r], #232]\n\t"
09849         "str    r7, [%[r], #236]\n\t"
09850         "ldr    r4, [%[a], #240]\n\t"
09851         "ldr    r5, [%[a], #244]\n\t"
09852         "ldr    r6, [%[a], #248]\n\t"
09853         "ldr    r7, [%[a], #252]\n\t"
09854         "ldr    r8, [%[b], #240]\n\t"
09855         "ldr    r9, [%[b], #244]\n\t"
09856         "ldr    r10, [%[b], #248]\n\t"
09857         "ldr    r14, [%[b], #252]\n\t"
09858         "adcs   r4, r4, r8\n\t"
09859         "adcs   r5, r5, r9\n\t"
09860         "adcs   r6, r6, r10\n\t"
09861         "adcs   r7, r7, r14\n\t"
09862         "str    r4, [%[r], #240]\n\t"
09863         "str    r5, [%[r], #244]\n\t"
09864         "str    r6, [%[r], #248]\n\t"
09865         "str    r7, [%[r], #252]\n\t"
09866         "ldr    r4, [%[a], #256]\n\t"
09867         "ldr    r5, [%[a], #260]\n\t"
09868         "ldr    r6, [%[a], #264]\n\t"
09869         "ldr    r7, [%[a], #268]\n\t"
09870         "ldr    r8, [%[b], #256]\n\t"
09871         "ldr    r9, [%[b], #260]\n\t"
09872         "ldr    r10, [%[b], #264]\n\t"
09873         "ldr    r14, [%[b], #268]\n\t"
09874         "adcs   r4, r4, r8\n\t"
09875         "adcs   r5, r5, r9\n\t"
09876         "adcs   r6, r6, r10\n\t"
09877         "adcs   r7, r7, r14\n\t"
09878         "str    r4, [%[r], #256]\n\t"
09879         "str    r5, [%[r], #260]\n\t"
09880         "str    r6, [%[r], #264]\n\t"
09881         "str    r7, [%[r], #268]\n\t"
09882         "ldr    r4, [%[a], #272]\n\t"
09883         "ldr    r5, [%[a], #276]\n\t"
09884         "ldr    r6, [%[a], #280]\n\t"
09885         "ldr    r7, [%[a], #284]\n\t"
09886         "ldr    r8, [%[b], #272]\n\t"
09887         "ldr    r9, [%[b], #276]\n\t"
09888         "ldr    r10, [%[b], #280]\n\t"
09889         "ldr    r14, [%[b], #284]\n\t"
09890         "adcs   r4, r4, r8\n\t"
09891         "adcs   r5, r5, r9\n\t"
09892         "adcs   r6, r6, r10\n\t"
09893         "adcs   r7, r7, r14\n\t"
09894         "str    r4, [%[r], #272]\n\t"
09895         "str    r5, [%[r], #276]\n\t"
09896         "str    r6, [%[r], #280]\n\t"
09897         "str    r7, [%[r], #284]\n\t"
09898         "ldr    r4, [%[a], #288]\n\t"
09899         "ldr    r5, [%[a], #292]\n\t"
09900         "ldr    r6, [%[a], #296]\n\t"
09901         "ldr    r7, [%[a], #300]\n\t"
09902         "ldr    r8, [%[b], #288]\n\t"
09903         "ldr    r9, [%[b], #292]\n\t"
09904         "ldr    r10, [%[b], #296]\n\t"
09905         "ldr    r14, [%[b], #300]\n\t"
09906         "adcs   r4, r4, r8\n\t"
09907         "adcs   r5, r5, r9\n\t"
09908         "adcs   r6, r6, r10\n\t"
09909         "adcs   r7, r7, r14\n\t"
09910         "str    r4, [%[r], #288]\n\t"
09911         "str    r5, [%[r], #292]\n\t"
09912         "str    r6, [%[r], #296]\n\t"
09913         "str    r7, [%[r], #300]\n\t"
09914         "ldr    r4, [%[a], #304]\n\t"
09915         "ldr    r5, [%[a], #308]\n\t"
09916         "ldr    r6, [%[a], #312]\n\t"
09917         "ldr    r7, [%[a], #316]\n\t"
09918         "ldr    r8, [%[b], #304]\n\t"
09919         "ldr    r9, [%[b], #308]\n\t"
09920         "ldr    r10, [%[b], #312]\n\t"
09921         "ldr    r14, [%[b], #316]\n\t"
09922         "adcs   r4, r4, r8\n\t"
09923         "adcs   r5, r5, r9\n\t"
09924         "adcs   r6, r6, r10\n\t"
09925         "adcs   r7, r7, r14\n\t"
09926         "str    r4, [%[r], #304]\n\t"
09927         "str    r5, [%[r], #308]\n\t"
09928         "str    r6, [%[r], #312]\n\t"
09929         "str    r7, [%[r], #316]\n\t"
09930         "ldr    r4, [%[a], #320]\n\t"
09931         "ldr    r5, [%[a], #324]\n\t"
09932         "ldr    r6, [%[a], #328]\n\t"
09933         "ldr    r7, [%[a], #332]\n\t"
09934         "ldr    r8, [%[b], #320]\n\t"
09935         "ldr    r9, [%[b], #324]\n\t"
09936         "ldr    r10, [%[b], #328]\n\t"
09937         "ldr    r14, [%[b], #332]\n\t"
09938         "adcs   r4, r4, r8\n\t"
09939         "adcs   r5, r5, r9\n\t"
09940         "adcs   r6, r6, r10\n\t"
09941         "adcs   r7, r7, r14\n\t"
09942         "str    r4, [%[r], #320]\n\t"
09943         "str    r5, [%[r], #324]\n\t"
09944         "str    r6, [%[r], #328]\n\t"
09945         "str    r7, [%[r], #332]\n\t"
09946         "ldr    r4, [%[a], #336]\n\t"
09947         "ldr    r5, [%[a], #340]\n\t"
09948         "ldr    r6, [%[a], #344]\n\t"
09949         "ldr    r7, [%[a], #348]\n\t"
09950         "ldr    r8, [%[b], #336]\n\t"
09951         "ldr    r9, [%[b], #340]\n\t"
09952         "ldr    r10, [%[b], #344]\n\t"
09953         "ldr    r14, [%[b], #348]\n\t"
09954         "adcs   r4, r4, r8\n\t"
09955         "adcs   r5, r5, r9\n\t"
09956         "adcs   r6, r6, r10\n\t"
09957         "adcs   r7, r7, r14\n\t"
09958         "str    r4, [%[r], #336]\n\t"
09959         "str    r5, [%[r], #340]\n\t"
09960         "str    r6, [%[r], #344]\n\t"
09961         "str    r7, [%[r], #348]\n\t"
09962         "ldr    r4, [%[a], #352]\n\t"
09963         "ldr    r5, [%[a], #356]\n\t"
09964         "ldr    r6, [%[a], #360]\n\t"
09965         "ldr    r7, [%[a], #364]\n\t"
09966         "ldr    r8, [%[b], #352]\n\t"
09967         "ldr    r9, [%[b], #356]\n\t"
09968         "ldr    r10, [%[b], #360]\n\t"
09969         "ldr    r14, [%[b], #364]\n\t"
09970         "adcs   r4, r4, r8\n\t"
09971         "adcs   r5, r5, r9\n\t"
09972         "adcs   r6, r6, r10\n\t"
09973         "adcs   r7, r7, r14\n\t"
09974         "str    r4, [%[r], #352]\n\t"
09975         "str    r5, [%[r], #356]\n\t"
09976         "str    r6, [%[r], #360]\n\t"
09977         "str    r7, [%[r], #364]\n\t"
09978         "ldr    r4, [%[a], #368]\n\t"
09979         "ldr    r5, [%[a], #372]\n\t"
09980         "ldr    r6, [%[a], #376]\n\t"
09981         "ldr    r7, [%[a], #380]\n\t"
09982         "ldr    r8, [%[b], #368]\n\t"
09983         "ldr    r9, [%[b], #372]\n\t"
09984         "ldr    r10, [%[b], #376]\n\t"
09985         "ldr    r14, [%[b], #380]\n\t"
09986         "adcs   r4, r4, r8\n\t"
09987         "adcs   r5, r5, r9\n\t"
09988         "adcs   r6, r6, r10\n\t"
09989         "adcs   r7, r7, r14\n\t"
09990         "str    r4, [%[r], #368]\n\t"
09991         "str    r5, [%[r], #372]\n\t"
09992         "str    r6, [%[r], #376]\n\t"
09993         "str    r7, [%[r], #380]\n\t"
09994         "adc    %[c], r12, r12\n\t"
09995         : [c] "+r" (c)
09996         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
09997         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
09998     );
09999 
10000     return c;
10001 }
10002 
10003 /* AND m into each word of a and store in r.
10004  *
10005  * r  A single precision integer.
10006  * a  A single precision integer.
10007  * m  Mask to AND against each digit.
10008  */
10009 static void sp_3072_mask_48(sp_digit* r, sp_digit* a, sp_digit m)
10010 {
10011 #ifdef WOLFSSL_SP_SMALL
10012     int i;
10013 
10014     for (i=0; i<48; i++)
10015         r[i] = a[i] & m;
10016 #else
10017     int i;
10018 
10019     for (i = 0; i < 48; i += 8) {
10020         r[i+0] = a[i+0] & m;
10021         r[i+1] = a[i+1] & m;
10022         r[i+2] = a[i+2] & m;
10023         r[i+3] = a[i+3] & m;
10024         r[i+4] = a[i+4] & m;
10025         r[i+5] = a[i+5] & m;
10026         r[i+6] = a[i+6] & m;
10027         r[i+7] = a[i+7] & m;
10028     }
10029 #endif
10030 }
10031 
10032 /* Multiply a and b into r. (r = a * b)
10033  *
10034  * r  A single precision integer.
10035  * a  A single precision integer.
10036  * b  A single precision integer.
10037  */
10038 static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
10039         const sp_digit* b)
10040 {
10041     sp_digit* z0 = r;
10042     sp_digit z1[96];
10043     sp_digit a1[48];
10044     sp_digit b1[48];
10045     sp_digit z2[96];
10046     sp_digit u, ca, cb;
10047 
10048     ca = sp_3072_add_48(a1, a, &a[48]);
10049     cb = sp_3072_add_48(b1, b, &b[48]);
10050     u  = ca & cb;
10051     sp_3072_mul_48(z1, a1, b1);
10052     sp_3072_mul_48(z2, &a[48], &b[48]);
10053     sp_3072_mul_48(z0, a, b);
10054     sp_3072_mask_48(r + 96, a1, 0 - cb);
10055     sp_3072_mask_48(b1, b1, 0 - ca);
10056     u += sp_3072_add_48(r + 96, r + 96, b1);
10057     u += sp_3072_sub_in_place_96(z1, z2);
10058     u += sp_3072_sub_in_place_96(z1, z0);
10059     u += sp_3072_add_96(r + 48, r + 48, z1);
10060     r[144] = u;
10061     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
10062     sp_3072_add_96(r + 96, r + 96, z2);
10063 }
10064 
10065 /* Square a and put result in r. (r = a * a)
10066  *
10067  * r  A single precision integer.
10068  * a  A single precision integer.
10069  */
10070 static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
10071 {
10072     sp_digit* z0 = r;
10073     sp_digit z2[96];
10074     sp_digit z1[96];
10075     sp_digit a1[48];
10076     sp_digit u;
10077 
10078     u = sp_3072_add_48(a1, a, &a[48]);
10079     sp_3072_sqr_48(z1, a1);
10080     sp_3072_sqr_48(z2, &a[48]);
10081     sp_3072_sqr_48(z0, a);
10082     sp_3072_mask_48(r + 96, a1, 0 - u);
10083     u += sp_3072_add_48(r + 96, r + 96, r + 96);
10084     u += sp_3072_sub_in_place_96(z1, z2);
10085     u += sp_3072_sub_in_place_96(z1, z0);
10086     u += sp_3072_add_96(r + 48, r + 48, z1);
10087     r[144] = u;
10088     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
10089     sp_3072_add_96(r + 96, r + 96, z2);
10090 }
10091 
10092 #endif /* WOLFSSL_SP_SMALL */
10093 #ifdef WOLFSSL_SP_SMALL
10094 /* Add b to a into r. (r = a + b)
10095  *
10096  * r  A single precision integer.
10097  * a  A single precision integer.
10098  * b  A single precision integer.
10099  */
10100 static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
10101         const sp_digit* b)
10102 {
10103     sp_digit c = 0;
10104 
10105     __asm__ __volatile__ (
10106         "add    r12, %[a], #384\n\t"
10107         "\n1:\n\t"
10108         "adds   %[c], %[c], #-1\n\t"
10109         "ldr    r4, [%[a]], #4\n\t"
10110         "ldr    r5, [%[a]], #4\n\t"
10111         "ldr    r6, [%[a]], #4\n\t"
10112         "ldr    r7, [%[a]], #4\n\t"
10113         "ldr    r8, [%[b]], #4\n\t"
10114         "ldr    r9, [%[b]], #4\n\t"
10115         "ldr    r10, [%[b]], #4\n\t"
10116         "ldr    r14, [%[b]], #4\n\t"
10117         "adcs   r4, r4, r8\n\t"
10118         "adcs   r5, r5, r9\n\t"
10119         "adcs   r6, r6, r10\n\t"
10120         "adcs   r7, r7, r14\n\t"
10121         "str    r4, [%[r]], #4\n\t"
10122         "str    r5, [%[r]], #4\n\t"
10123         "str    r6, [%[r]], #4\n\t"
10124         "str    r7, [%[r]], #4\n\t"
10125         "mov    r4, #0\n\t"
10126         "adc    %[c], r4, #0\n\t"
10127         "cmp    %[a], r12\n\t"
10128         "bne    1b\n\t"
10129         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
10130         :
10131         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
10132     );
10133 
10134     return c;
10135 }
10136 
10137 #endif /* WOLFSSL_SP_SMALL */
10138 #ifdef WOLFSSL_SP_SMALL
10139 /* Sub b from a into a. (a -= b)
10140  *
10141  * a  A single precision integer.
10142  * b  A single precision integer.
10143  */
10144 static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b)
10145 {
10146     sp_digit c = 0;
10147 
10148     __asm__ __volatile__ (
10149         "mov    r14, #0\n\t"
10150         "add    r12, %[a], #384\n\t"
10151         "\n1:\n\t"
10152         "subs   %[c], r14, %[c]\n\t"
10153         "ldr    r3, [%[a]]\n\t"
10154         "ldr    r4, [%[a], #4]\n\t"
10155         "ldr    r5, [%[a], #8]\n\t"
10156         "ldr    r6, [%[a], #12]\n\t"
10157         "ldr    r7, [%[b]], #4\n\t"
10158         "ldr    r8, [%[b]], #4\n\t"
10159         "ldr    r9, [%[b]], #4\n\t"
10160         "ldr    r10, [%[b]], #4\n\t"
10161         "sbcs   r3, r3, r7\n\t"
10162         "sbcs   r4, r4, r8\n\t"
10163         "sbcs   r5, r5, r9\n\t"
10164         "sbcs   r6, r6, r10\n\t"
10165         "str    r3, [%[a]], #4\n\t"
10166         "str    r4, [%[a]], #4\n\t"
10167         "str    r5, [%[a]], #4\n\t"
10168         "str    r6, [%[a]], #4\n\t"
10169         "sbc    %[c], r14, r14\n\t"
10170         "cmp    %[a], r12\n\t"
10171         "bne    1b\n\t"
10172         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
10173         :
10174         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
10175     );
10176 
10177     return c;
10178 }
10179 
10180 #endif /* WOLFSSL_SP_SMALL */
10181 #ifdef WOLFSSL_SP_SMALL
10182 /* Multiply a and b into r. (r = a * b)
10183  *
10184  * r  A single precision integer.
10185  * a  A single precision integer.
10186  * b  A single precision integer.
10187  */
10188 static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b)
10189 {
10190     sp_digit tmp[192];
10191 
10192     __asm__ __volatile__ (
10193         "mov    r5, #0\n\t"
10194         "mov    r6, #0\n\t"
10195         "mov    r7, #0\n\t"
10196         "mov    r8, #0\n\t"
10197         "\n1:\n\t"
10198         "subs   r3, r5, #380\n\t"
10199         "movcc  r3, #0\n\t"
10200         "sub    r4, r5, r3\n\t"
10201         "\n2:\n\t"
10202         "ldr    r14, [%[a], r3]\n\t"
10203         "ldr    r12, [%[b], r4]\n\t"
10204         "umull  r9, r10, r14, r12\n\t"
10205         "adds   r6, r6, r9\n\t"
10206         "adcs   r7, r7, r10\n\t"
10207         "adc    r8, r8, #0\n\t"
10208         "add    r3, r3, #4\n\t"
10209         "sub    r4, r4, #4\n\t"
10210         "cmp    r3, #384\n\t"
10211         "beq    3f\n\t"
10212         "cmp    r3, r5\n\t"
10213         "ble    2b\n\t"
10214         "\n3:\n\t"
10215         "str    r6, [%[r], r5]\n\t"
10216         "mov    r6, r7\n\t"
10217         "mov    r7, r8\n\t"
10218         "mov    r8, #0\n\t"
10219         "add    r5, r5, #4\n\t"
10220         "cmp    r5, #760\n\t"
10221         "ble    1b\n\t"
10222         "str    r6, [%[r], r5]\n\t"
10223         :
10224         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
10225         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
10226     );
10227 
10228     XMEMCPY(r, tmp, sizeof(tmp));
10229 }
10230 
10231 /* Square a and put result in r. (r = a * a)
10232  *
10233  * r  A single precision integer.
10234  * a  A single precision integer.
10235  */
10236 static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
10237 {
10238     sp_digit tmp[192];
10239 
10240     __asm__ __volatile__ (
10241         "mov    r12, #0\n\t"
10242         "mov    r6, #0\n\t"
10243         "mov    r7, #0\n\t"
10244         "mov    r8, #0\n\t"
10245         "mov    r5, #0\n\t"
10246         "\n1:\n\t"
10247         "subs   r3, r5, #380\n\t"
10248         "movcc  r3, r12\n\t"
10249         "sub    r4, r5, r3\n\t"
10250         "\n2:\n\t"
10251         "cmp    r4, r3\n\t"
10252         "beq    4f\n\t"
10253         "ldr    r14, [%[a], r3]\n\t"
10254         "ldr    r9, [%[a], r4]\n\t"
10255         "umull  r9, r10, r14, r9\n\t"
10256         "adds   r6, r6, r9\n\t"
10257         "adcs   r7, r7, r10\n\t"
10258         "adc    r8, r8, r12\n\t"
10259         "adds   r6, r6, r9\n\t"
10260         "adcs   r7, r7, r10\n\t"
10261         "adc    r8, r8, r12\n\t"
10262         "bal    5f\n\t"
10263         "\n4:\n\t"
10264         "ldr    r14, [%[a], r3]\n\t"
10265         "umull  r9, r10, r14, r14\n\t"
10266         "adds   r6, r6, r9\n\t"
10267         "adcs   r7, r7, r10\n\t"
10268         "adc    r8, r8, r12\n\t"
10269         "\n5:\n\t"
10270         "add    r3, r3, #4\n\t"
10271         "sub    r4, r4, #4\n\t"
10272         "cmp    r3, #384\n\t"
10273         "beq    3f\n\t"
10274         "cmp    r3, r4\n\t"
10275         "bgt    3f\n\t"
10276         "cmp    r3, r5\n\t"
10277         "ble    2b\n\t"
10278         "\n3:\n\t"
10279         "str    r6, [%[r], r5]\n\t"
10280         "mov    r6, r7\n\t"
10281         "mov    r7, r8\n\t"
10282         "mov    r8, #0\n\t"
10283         "add    r5, r5, #4\n\t"
10284         "cmp    r5, #760\n\t"
10285         "ble    1b\n\t"
10286         "str    r6, [%[r], r5]\n\t"
10287         :
10288         : [r] "r" (tmp), [a] "r" (a)
10289         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
10290     );
10291 
10292     XMEMCPY(r, tmp, sizeof(tmp));
10293 }
10294 
10295 #endif /* WOLFSSL_SP_SMALL */
10296 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
10297 #ifdef WOLFSSL_SP_SMALL
10298 /* AND m into each word of a and store in r.
10299  *
10300  * r  A single precision integer.
10301  * a  A single precision integer.
10302  * m  Mask to AND against each digit.
10303  */
10304 static void sp_3072_mask_48(sp_digit* r, sp_digit* a, sp_digit m)
10305 {
10306     int i;
10307 
10308     for (i=0; i<48; i++)
10309         r[i] = a[i] & m;
10310 }
10311 
10312 #endif /* WOLFSSL_SP_SMALL */
10313 #ifdef WOLFSSL_SP_SMALL
10314 /* Add b to a into r. (r = a + b)
10315  *
10316  * r  A single precision integer.
10317  * a  A single precision integer.
10318  * b  A single precision integer.
10319  */
10320 static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
10321         const sp_digit* b)
10322 {
10323     sp_digit c = 0;
10324 
10325     __asm__ __volatile__ (
10326         "add    r12, %[a], #192\n\t"
10327         "\n1:\n\t"
10328         "adds   %[c], %[c], #-1\n\t"
10329         "ldr    r4, [%[a]], #4\n\t"
10330         "ldr    r5, [%[a]], #4\n\t"
10331         "ldr    r6, [%[a]], #4\n\t"
10332         "ldr    r7, [%[a]], #4\n\t"
10333         "ldr    r8, [%[b]], #4\n\t"
10334         "ldr    r9, [%[b]], #4\n\t"
10335         "ldr    r10, [%[b]], #4\n\t"
10336         "ldr    r14, [%[b]], #4\n\t"
10337         "adcs   r4, r4, r8\n\t"
10338         "adcs   r5, r5, r9\n\t"
10339         "adcs   r6, r6, r10\n\t"
10340         "adcs   r7, r7, r14\n\t"
10341         "str    r4, [%[r]], #4\n\t"
10342         "str    r5, [%[r]], #4\n\t"
10343         "str    r6, [%[r]], #4\n\t"
10344         "str    r7, [%[r]], #4\n\t"
10345         "mov    r4, #0\n\t"
10346         "adc    %[c], r4, #0\n\t"
10347         "cmp    %[a], r12\n\t"
10348         "bne    1b\n\t"
10349         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
10350         :
10351         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
10352     );
10353 
10354     return c;
10355 }
10356 
10357 #endif /* WOLFSSL_SP_SMALL */
10358 #ifdef WOLFSSL_SP_SMALL
10359 /* Multiply a and b into r. (r = a * b)
10360  *
10361  * r  A single precision integer.
10362  * a  A single precision integer.
10363  * b  A single precision integer.
10364  */
10365 static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b)
10366 {
10367     sp_digit tmp[96];
10368 
10369     __asm__ __volatile__ (
10370         "mov    r5, #0\n\t"
10371         "mov    r6, #0\n\t"
10372         "mov    r7, #0\n\t"
10373         "mov    r8, #0\n\t"
10374         "\n1:\n\t"
10375         "subs   r3, r5, #188\n\t"
10376         "movcc  r3, #0\n\t"
10377         "sub    r4, r5, r3\n\t"
10378         "\n2:\n\t"
10379         "ldr    r14, [%[a], r3]\n\t"
10380         "ldr    r12, [%[b], r4]\n\t"
10381         "umull  r9, r10, r14, r12\n\t"
10382         "adds   r6, r6, r9\n\t"
10383         "adcs   r7, r7, r10\n\t"
10384         "adc    r8, r8, #0\n\t"
10385         "add    r3, r3, #4\n\t"
10386         "sub    r4, r4, #4\n\t"
10387         "cmp    r3, #192\n\t"
10388         "beq    3f\n\t"
10389         "cmp    r3, r5\n\t"
10390         "ble    2b\n\t"
10391         "\n3:\n\t"
10392         "str    r6, [%[r], r5]\n\t"
10393         "mov    r6, r7\n\t"
10394         "mov    r7, r8\n\t"
10395         "mov    r8, #0\n\t"
10396         "add    r5, r5, #4\n\t"
10397         "cmp    r5, #376\n\t"
10398         "ble    1b\n\t"
10399         "str    r6, [%[r], r5]\n\t"
10400         :
10401         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
10402         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
10403     );
10404 
10405     XMEMCPY(r, tmp, sizeof(tmp));
10406 }
10407 
10408 /* Square a and put result in r. (r = a * a)
10409  *
10410  * r  A single precision integer.
10411  * a  A single precision integer.
10412  */
10413 static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
10414 {
10415     sp_digit tmp[96];
10416 
10417     __asm__ __volatile__ (
10418         "mov    r12, #0\n\t"
10419         "mov    r6, #0\n\t"
10420         "mov    r7, #0\n\t"
10421         "mov    r8, #0\n\t"
10422         "mov    r5, #0\n\t"
10423         "\n1:\n\t"
10424         "subs   r3, r5, #188\n\t"
10425         "movcc  r3, r12\n\t"
10426         "sub    r4, r5, r3\n\t"
10427         "\n2:\n\t"
10428         "cmp    r4, r3\n\t"
10429         "beq    4f\n\t"
10430         "ldr    r14, [%[a], r3]\n\t"
10431         "ldr    r9, [%[a], r4]\n\t"
10432         "umull  r9, r10, r14, r9\n\t"
10433         "adds   r6, r6, r9\n\t"
10434         "adcs   r7, r7, r10\n\t"
10435         "adc    r8, r8, r12\n\t"
10436         "adds   r6, r6, r9\n\t"
10437         "adcs   r7, r7, r10\n\t"
10438         "adc    r8, r8, r12\n\t"
10439         "bal    5f\n\t"
10440         "\n4:\n\t"
10441         "ldr    r14, [%[a], r3]\n\t"
10442         "umull  r9, r10, r14, r14\n\t"
10443         "adds   r6, r6, r9\n\t"
10444         "adcs   r7, r7, r10\n\t"
10445         "adc    r8, r8, r12\n\t"
10446         "\n5:\n\t"
10447         "add    r3, r3, #4\n\t"
10448         "sub    r4, r4, #4\n\t"
10449         "cmp    r3, #192\n\t"
10450         "beq    3f\n\t"
10451         "cmp    r3, r4\n\t"
10452         "bgt    3f\n\t"
10453         "cmp    r3, r5\n\t"
10454         "ble    2b\n\t"
10455         "\n3:\n\t"
10456         "str    r6, [%[r], r5]\n\t"
10457         "mov    r6, r7\n\t"
10458         "mov    r7, r8\n\t"
10459         "mov    r8, #0\n\t"
10460         "add    r5, r5, #4\n\t"
10461         "cmp    r5, #376\n\t"
10462         "ble    1b\n\t"
10463         "str    r6, [%[r], r5]\n\t"
10464         :
10465         : [r] "r" (tmp), [a] "r" (a)
10466         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
10467     );
10468 
10469     XMEMCPY(r, tmp, sizeof(tmp));
10470 }
10471 
10472 #endif /* WOLFSSL_SP_SMALL */
10473 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
10474 
10475 /* Caclulate the bottom digit of -1/a mod 2^n.
10476  *
10477  * a    A single precision number.
10478  * rho  Bottom word of inverse.
10479  */
10480 static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
10481 {
10482     sp_digit x, b;
10483 
10484     b = a[0];
10485     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
10486     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
10487     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
10488     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
10489 
10490     /* rho = -1/m mod b */
10491     *rho = -x;
10492 }
10493 
10494 #if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
10495 #ifdef WOLFSSL_SP_SMALL
10496 /* Sub b from a into a. (a -= b)
10497  *
10498  * a  A single precision integer.
10499  * b  A single precision integer.
10500  */
10501 static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
10502 {
10503     sp_digit c = 0;
10504 
10505     __asm__ __volatile__ (
10506         "mov    r14, #0\n\t"
10507         "add    r12, %[a], #192\n\t"
10508         "\n1:\n\t"
10509         "subs   %[c], r14, %[c]\n\t"
10510         "ldr    r3, [%[a]]\n\t"
10511         "ldr    r4, [%[a], #4]\n\t"
10512         "ldr    r5, [%[a], #8]\n\t"
10513         "ldr    r6, [%[a], #12]\n\t"
10514         "ldr    r7, [%[b]], #4\n\t"
10515         "ldr    r8, [%[b]], #4\n\t"
10516         "ldr    r9, [%[b]], #4\n\t"
10517         "ldr    r10, [%[b]], #4\n\t"
10518         "sbcs   r3, r3, r7\n\t"
10519         "sbcs   r4, r4, r8\n\t"
10520         "sbcs   r5, r5, r9\n\t"
10521         "sbcs   r6, r6, r10\n\t"
10522         "str    r3, [%[a]], #4\n\t"
10523         "str    r4, [%[a]], #4\n\t"
10524         "str    r5, [%[a]], #4\n\t"
10525         "str    r6, [%[a]], #4\n\t"
10526         "sbc    %[c], r14, r14\n\t"
10527         "cmp    %[a], r12\n\t"
10528         "bne    1b\n\t"
10529         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
10530         :
10531         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
10532     );
10533 
10534     return c;
10535 }
10536 
10537 #else
10538 /* Sub b from a into a. (a -= b)
10539  *
10540  * a  A single precision integer and result.
10541  * b  A single precision integer.
10542  */
10543 static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b)
10544 {
10545     sp_digit c = 0;
10546 
10547     __asm__ __volatile__ (
10548         "ldr    r2, [%[a], #0]\n\t"
10549         "ldr    r3, [%[a], #4]\n\t"
10550         "ldr    r4, [%[a], #8]\n\t"
10551         "ldr    r5, [%[a], #12]\n\t"
10552         "ldr    r6, [%[b], #0]\n\t"
10553         "ldr    r7, [%[b], #4]\n\t"
10554         "ldr    r8, [%[b], #8]\n\t"
10555         "ldr    r9, [%[b], #12]\n\t"
10556         "subs   r2, r2, r6\n\t"
10557         "sbcs   r3, r3, r7\n\t"
10558         "sbcs   r4, r4, r8\n\t"
10559         "sbcs   r5, r5, r9\n\t"
10560         "str    r2, [%[a], #0]\n\t"
10561         "str    r3, [%[a], #4]\n\t"
10562         "str    r4, [%[a], #8]\n\t"
10563         "str    r5, [%[a], #12]\n\t"
10564         "ldr    r2, [%[a], #16]\n\t"
10565         "ldr    r3, [%[a], #20]\n\t"
10566         "ldr    r4, [%[a], #24]\n\t"
10567         "ldr    r5, [%[a], #28]\n\t"
10568         "ldr    r6, [%[b], #16]\n\t"
10569         "ldr    r7, [%[b], #20]\n\t"
10570         "ldr    r8, [%[b], #24]\n\t"
10571         "ldr    r9, [%[b], #28]\n\t"
10572         "sbcs   r2, r2, r6\n\t"
10573         "sbcs   r3, r3, r7\n\t"
10574         "sbcs   r4, r4, r8\n\t"
10575         "sbcs   r5, r5, r9\n\t"
10576         "str    r2, [%[a], #16]\n\t"
10577         "str    r3, [%[a], #20]\n\t"
10578         "str    r4, [%[a], #24]\n\t"
10579         "str    r5, [%[a], #28]\n\t"
10580         "ldr    r2, [%[a], #32]\n\t"
10581         "ldr    r3, [%[a], #36]\n\t"
10582         "ldr    r4, [%[a], #40]\n\t"
10583         "ldr    r5, [%[a], #44]\n\t"
10584         "ldr    r6, [%[b], #32]\n\t"
10585         "ldr    r7, [%[b], #36]\n\t"
10586         "ldr    r8, [%[b], #40]\n\t"
10587         "ldr    r9, [%[b], #44]\n\t"
10588         "sbcs   r2, r2, r6\n\t"
10589         "sbcs   r3, r3, r7\n\t"
10590         "sbcs   r4, r4, r8\n\t"
10591         "sbcs   r5, r5, r9\n\t"
10592         "str    r2, [%[a], #32]\n\t"
10593         "str    r3, [%[a], #36]\n\t"
10594         "str    r4, [%[a], #40]\n\t"
10595         "str    r5, [%[a], #44]\n\t"
10596         "ldr    r2, [%[a], #48]\n\t"
10597         "ldr    r3, [%[a], #52]\n\t"
10598         "ldr    r4, [%[a], #56]\n\t"
10599         "ldr    r5, [%[a], #60]\n\t"
10600         "ldr    r6, [%[b], #48]\n\t"
10601         "ldr    r7, [%[b], #52]\n\t"
10602         "ldr    r8, [%[b], #56]\n\t"
10603         "ldr    r9, [%[b], #60]\n\t"
10604         "sbcs   r2, r2, r6\n\t"
10605         "sbcs   r3, r3, r7\n\t"
10606         "sbcs   r4, r4, r8\n\t"
10607         "sbcs   r5, r5, r9\n\t"
10608         "str    r2, [%[a], #48]\n\t"
10609         "str    r3, [%[a], #52]\n\t"
10610         "str    r4, [%[a], #56]\n\t"
10611         "str    r5, [%[a], #60]\n\t"
10612         "ldr    r2, [%[a], #64]\n\t"
10613         "ldr    r3, [%[a], #68]\n\t"
10614         "ldr    r4, [%[a], #72]\n\t"
10615         "ldr    r5, [%[a], #76]\n\t"
10616         "ldr    r6, [%[b], #64]\n\t"
10617         "ldr    r7, [%[b], #68]\n\t"
10618         "ldr    r8, [%[b], #72]\n\t"
10619         "ldr    r9, [%[b], #76]\n\t"
10620         "sbcs   r2, r2, r6\n\t"
10621         "sbcs   r3, r3, r7\n\t"
10622         "sbcs   r4, r4, r8\n\t"
10623         "sbcs   r5, r5, r9\n\t"
10624         "str    r2, [%[a], #64]\n\t"
10625         "str    r3, [%[a], #68]\n\t"
10626         "str    r4, [%[a], #72]\n\t"
10627         "str    r5, [%[a], #76]\n\t"
10628         "ldr    r2, [%[a], #80]\n\t"
10629         "ldr    r3, [%[a], #84]\n\t"
10630         "ldr    r4, [%[a], #88]\n\t"
10631         "ldr    r5, [%[a], #92]\n\t"
10632         "ldr    r6, [%[b], #80]\n\t"
10633         "ldr    r7, [%[b], #84]\n\t"
10634         "ldr    r8, [%[b], #88]\n\t"
10635         "ldr    r9, [%[b], #92]\n\t"
10636         "sbcs   r2, r2, r6\n\t"
10637         "sbcs   r3, r3, r7\n\t"
10638         "sbcs   r4, r4, r8\n\t"
10639         "sbcs   r5, r5, r9\n\t"
10640         "str    r2, [%[a], #80]\n\t"
10641         "str    r3, [%[a], #84]\n\t"
10642         "str    r4, [%[a], #88]\n\t"
10643         "str    r5, [%[a], #92]\n\t"
10644         "ldr    r2, [%[a], #96]\n\t"
10645         "ldr    r3, [%[a], #100]\n\t"
10646         "ldr    r4, [%[a], #104]\n\t"
10647         "ldr    r5, [%[a], #108]\n\t"
10648         "ldr    r6, [%[b], #96]\n\t"
10649         "ldr    r7, [%[b], #100]\n\t"
10650         "ldr    r8, [%[b], #104]\n\t"
10651         "ldr    r9, [%[b], #108]\n\t"
10652         "sbcs   r2, r2, r6\n\t"
10653         "sbcs   r3, r3, r7\n\t"
10654         "sbcs   r4, r4, r8\n\t"
10655         "sbcs   r5, r5, r9\n\t"
10656         "str    r2, [%[a], #96]\n\t"
10657         "str    r3, [%[a], #100]\n\t"
10658         "str    r4, [%[a], #104]\n\t"
10659         "str    r5, [%[a], #108]\n\t"
10660         "ldr    r2, [%[a], #112]\n\t"
10661         "ldr    r3, [%[a], #116]\n\t"
10662         "ldr    r4, [%[a], #120]\n\t"
10663         "ldr    r5, [%[a], #124]\n\t"
10664         "ldr    r6, [%[b], #112]\n\t"
10665         "ldr    r7, [%[b], #116]\n\t"
10666         "ldr    r8, [%[b], #120]\n\t"
10667         "ldr    r9, [%[b], #124]\n\t"
10668         "sbcs   r2, r2, r6\n\t"
10669         "sbcs   r3, r3, r7\n\t"
10670         "sbcs   r4, r4, r8\n\t"
10671         "sbcs   r5, r5, r9\n\t"
10672         "str    r2, [%[a], #112]\n\t"
10673         "str    r3, [%[a], #116]\n\t"
10674         "str    r4, [%[a], #120]\n\t"
10675         "str    r5, [%[a], #124]\n\t"
10676         "ldr    r2, [%[a], #128]\n\t"
10677         "ldr    r3, [%[a], #132]\n\t"
10678         "ldr    r4, [%[a], #136]\n\t"
10679         "ldr    r5, [%[a], #140]\n\t"
10680         "ldr    r6, [%[b], #128]\n\t"
10681         "ldr    r7, [%[b], #132]\n\t"
10682         "ldr    r8, [%[b], #136]\n\t"
10683         "ldr    r9, [%[b], #140]\n\t"
10684         "sbcs   r2, r2, r6\n\t"
10685         "sbcs   r3, r3, r7\n\t"
10686         "sbcs   r4, r4, r8\n\t"
10687         "sbcs   r5, r5, r9\n\t"
10688         "str    r2, [%[a], #128]\n\t"
10689         "str    r3, [%[a], #132]\n\t"
10690         "str    r4, [%[a], #136]\n\t"
10691         "str    r5, [%[a], #140]\n\t"
10692         "ldr    r2, [%[a], #144]\n\t"
10693         "ldr    r3, [%[a], #148]\n\t"
10694         "ldr    r4, [%[a], #152]\n\t"
10695         "ldr    r5, [%[a], #156]\n\t"
10696         "ldr    r6, [%[b], #144]\n\t"
10697         "ldr    r7, [%[b], #148]\n\t"
10698         "ldr    r8, [%[b], #152]\n\t"
10699         "ldr    r9, [%[b], #156]\n\t"
10700         "sbcs   r2, r2, r6\n\t"
10701         "sbcs   r3, r3, r7\n\t"
10702         "sbcs   r4, r4, r8\n\t"
10703         "sbcs   r5, r5, r9\n\t"
10704         "str    r2, [%[a], #144]\n\t"
10705         "str    r3, [%[a], #148]\n\t"
10706         "str    r4, [%[a], #152]\n\t"
10707         "str    r5, [%[a], #156]\n\t"
10708         "ldr    r2, [%[a], #160]\n\t"
10709         "ldr    r3, [%[a], #164]\n\t"
10710         "ldr    r4, [%[a], #168]\n\t"
10711         "ldr    r5, [%[a], #172]\n\t"
10712         "ldr    r6, [%[b], #160]\n\t"
10713         "ldr    r7, [%[b], #164]\n\t"
10714         "ldr    r8, [%[b], #168]\n\t"
10715         "ldr    r9, [%[b], #172]\n\t"
10716         "sbcs   r2, r2, r6\n\t"
10717         "sbcs   r3, r3, r7\n\t"
10718         "sbcs   r4, r4, r8\n\t"
10719         "sbcs   r5, r5, r9\n\t"
10720         "str    r2, [%[a], #160]\n\t"
10721         "str    r3, [%[a], #164]\n\t"
10722         "str    r4, [%[a], #168]\n\t"
10723         "str    r5, [%[a], #172]\n\t"
10724         "ldr    r2, [%[a], #176]\n\t"
10725         "ldr    r3, [%[a], #180]\n\t"
10726         "ldr    r4, [%[a], #184]\n\t"
10727         "ldr    r5, [%[a], #188]\n\t"
10728         "ldr    r6, [%[b], #176]\n\t"
10729         "ldr    r7, [%[b], #180]\n\t"
10730         "ldr    r8, [%[b], #184]\n\t"
10731         "ldr    r9, [%[b], #188]\n\t"
10732         "sbcs   r2, r2, r6\n\t"
10733         "sbcs   r3, r3, r7\n\t"
10734         "sbcs   r4, r4, r8\n\t"
10735         "sbcs   r5, r5, r9\n\t"
10736         "str    r2, [%[a], #176]\n\t"
10737         "str    r3, [%[a], #180]\n\t"
10738         "str    r4, [%[a], #184]\n\t"
10739         "str    r5, [%[a], #188]\n\t"
10740         "sbc    %[c], r9, r9\n\t"
10741         : [c] "+r" (c)
10742         : [a] "r" (a), [b] "r" (b)
10743         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
10744     );
10745 
10746     return c;
10747 }
10748 
10749 #endif /* WOLFSSL_SP_SMALL */
10750 /* r = 2^n mod m where n is the number of bits to reduce by.
10751  * Given m must be 3072 bits, just need to subtract.
10752  *
10753  * r  A single precision number.
10754  * m  A signle precision number.
10755  */
10756 static void sp_3072_mont_norm_48(sp_digit* r, sp_digit* m)
10757 {
10758     XMEMSET(r, 0, sizeof(sp_digit) * 48);
10759 
10760     /* r = 2^n mod m */
10761     sp_3072_sub_in_place_48(r, m);
10762 }
10763 
10764 /* Conditionally subtract b from a using the mask m.
10765  * m is -1 to subtract and 0 when not copying.
10766  *
10767  * r  A single precision number representing condition subtract result.
10768  * a  A single precision number to subtract from.
10769  * b  A single precision number to subtract.
10770  * m  Mask value to apply.
10771  */
10772 static sp_digit sp_3072_cond_sub_48(sp_digit* r, sp_digit* a, sp_digit* b,
10773         sp_digit m)
10774 {
10775     sp_digit c = 0;
10776 
10777 #ifdef WOLFSSL_SP_SMALL
10778     __asm__ __volatile__ (
10779         "mov    r9, #0\n\t"
10780         "mov    r8, #0\n\t"
10781         "1:\n\t"
10782         "subs   %[c], r9, %[c]\n\t"
10783         "ldr    r4, [%[a], r8]\n\t"
10784         "ldr    r5, [%[b], r8]\n\t"
10785         "and    r5, r5, %[m]\n\t"
10786         "sbcs   r4, r4, r5\n\t"
10787         "sbc    %[c], r9, r9\n\t"
10788         "str    r4, [%[r], r8]\n\t"
10789         "add    r8, r8, #4\n\t"
10790         "cmp    r8, #192\n\t"
10791         "blt    1b\n\t"
10792         : [c] "+r" (c)
10793         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
10794         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
10795     );
10796 #else
10797     __asm__ __volatile__ (
10798 
10799         "mov    r9, #0\n\t"
10800         "ldr    r4, [%[a], #0]\n\t"
10801         "ldr    r6, [%[a], #4]\n\t"
10802         "ldr    r5, [%[b], #0]\n\t"
10803         "ldr    r7, [%[b], #4]\n\t"
10804         "and    r5, r5, %[m]\n\t"
10805         "and    r7, r7, %[m]\n\t"
10806         "subs   r4, r4, r5\n\t"
10807         "sbcs   r6, r6, r7\n\t"
10808         "str    r4, [%[r], #0]\n\t"
10809         "str    r6, [%[r], #4]\n\t"
10810         "ldr    r4, [%[a], #8]\n\t"
10811         "ldr    r6, [%[a], #12]\n\t"
10812         "ldr    r5, [%[b], #8]\n\t"
10813         "ldr    r7, [%[b], #12]\n\t"
10814         "and    r5, r5, %[m]\n\t"
10815         "and    r7, r7, %[m]\n\t"
10816         "sbcs   r4, r4, r5\n\t"
10817         "sbcs   r6, r6, r7\n\t"
10818         "str    r4, [%[r], #8]\n\t"
10819         "str    r6, [%[r], #12]\n\t"
10820         "ldr    r4, [%[a], #16]\n\t"
10821         "ldr    r6, [%[a], #20]\n\t"
10822         "ldr    r5, [%[b], #16]\n\t"
10823         "ldr    r7, [%[b], #20]\n\t"
10824         "and    r5, r5, %[m]\n\t"
10825         "and    r7, r7, %[m]\n\t"
10826         "sbcs   r4, r4, r5\n\t"
10827         "sbcs   r6, r6, r7\n\t"
10828         "str    r4, [%[r], #16]\n\t"
10829         "str    r6, [%[r], #20]\n\t"
10830         "ldr    r4, [%[a], #24]\n\t"
10831         "ldr    r6, [%[a], #28]\n\t"
10832         "ldr    r5, [%[b], #24]\n\t"
10833         "ldr    r7, [%[b], #28]\n\t"
10834         "and    r5, r5, %[m]\n\t"
10835         "and    r7, r7, %[m]\n\t"
10836         "sbcs   r4, r4, r5\n\t"
10837         "sbcs   r6, r6, r7\n\t"
10838         "str    r4, [%[r], #24]\n\t"
10839         "str    r6, [%[r], #28]\n\t"
10840         "ldr    r4, [%[a], #32]\n\t"
10841         "ldr    r6, [%[a], #36]\n\t"
10842         "ldr    r5, [%[b], #32]\n\t"
10843         "ldr    r7, [%[b], #36]\n\t"
10844         "and    r5, r5, %[m]\n\t"
10845         "and    r7, r7, %[m]\n\t"
10846         "sbcs   r4, r4, r5\n\t"
10847         "sbcs   r6, r6, r7\n\t"
10848         "str    r4, [%[r], #32]\n\t"
10849         "str    r6, [%[r], #36]\n\t"
10850         "ldr    r4, [%[a], #40]\n\t"
10851         "ldr    r6, [%[a], #44]\n\t"
10852         "ldr    r5, [%[b], #40]\n\t"
10853         "ldr    r7, [%[b], #44]\n\t"
10854         "and    r5, r5, %[m]\n\t"
10855         "and    r7, r7, %[m]\n\t"
10856         "sbcs   r4, r4, r5\n\t"
10857         "sbcs   r6, r6, r7\n\t"
10858         "str    r4, [%[r], #40]\n\t"
10859         "str    r6, [%[r], #44]\n\t"
10860         "ldr    r4, [%[a], #48]\n\t"
10861         "ldr    r6, [%[a], #52]\n\t"
10862         "ldr    r5, [%[b], #48]\n\t"
10863         "ldr    r7, [%[b], #52]\n\t"
10864         "and    r5, r5, %[m]\n\t"
10865         "and    r7, r7, %[m]\n\t"
10866         "sbcs   r4, r4, r5\n\t"
10867         "sbcs   r6, r6, r7\n\t"
10868         "str    r4, [%[r], #48]\n\t"
10869         "str    r6, [%[r], #52]\n\t"
10870         "ldr    r4, [%[a], #56]\n\t"
10871         "ldr    r6, [%[a], #60]\n\t"
10872         "ldr    r5, [%[b], #56]\n\t"
10873         "ldr    r7, [%[b], #60]\n\t"
10874         "and    r5, r5, %[m]\n\t"
10875         "and    r7, r7, %[m]\n\t"
10876         "sbcs   r4, r4, r5\n\t"
10877         "sbcs   r6, r6, r7\n\t"
10878         "str    r4, [%[r], #56]\n\t"
10879         "str    r6, [%[r], #60]\n\t"
10880         "ldr    r4, [%[a], #64]\n\t"
10881         "ldr    r6, [%[a], #68]\n\t"
10882         "ldr    r5, [%[b], #64]\n\t"
10883         "ldr    r7, [%[b], #68]\n\t"
10884         "and    r5, r5, %[m]\n\t"
10885         "and    r7, r7, %[m]\n\t"
10886         "sbcs   r4, r4, r5\n\t"
10887         "sbcs   r6, r6, r7\n\t"
10888         "str    r4, [%[r], #64]\n\t"
10889         "str    r6, [%[r], #68]\n\t"
10890         "ldr    r4, [%[a], #72]\n\t"
10891         "ldr    r6, [%[a], #76]\n\t"
10892         "ldr    r5, [%[b], #72]\n\t"
10893         "ldr    r7, [%[b], #76]\n\t"
10894         "and    r5, r5, %[m]\n\t"
10895         "and    r7, r7, %[m]\n\t"
10896         "sbcs   r4, r4, r5\n\t"
10897         "sbcs   r6, r6, r7\n\t"
10898         "str    r4, [%[r], #72]\n\t"
10899         "str    r6, [%[r], #76]\n\t"
10900         "ldr    r4, [%[a], #80]\n\t"
10901         "ldr    r6, [%[a], #84]\n\t"
10902         "ldr    r5, [%[b], #80]\n\t"
10903         "ldr    r7, [%[b], #84]\n\t"
10904         "and    r5, r5, %[m]\n\t"
10905         "and    r7, r7, %[m]\n\t"
10906         "sbcs   r4, r4, r5\n\t"
10907         "sbcs   r6, r6, r7\n\t"
10908         "str    r4, [%[r], #80]\n\t"
10909         "str    r6, [%[r], #84]\n\t"
10910         "ldr    r4, [%[a], #88]\n\t"
10911         "ldr    r6, [%[a], #92]\n\t"
10912         "ldr    r5, [%[b], #88]\n\t"
10913         "ldr    r7, [%[b], #92]\n\t"
10914         "and    r5, r5, %[m]\n\t"
10915         "and    r7, r7, %[m]\n\t"
10916         "sbcs   r4, r4, r5\n\t"
10917         "sbcs   r6, r6, r7\n\t"
10918         "str    r4, [%[r], #88]\n\t"
10919         "str    r6, [%[r], #92]\n\t"
10920         "ldr    r4, [%[a], #96]\n\t"
10921         "ldr    r6, [%[a], #100]\n\t"
10922         "ldr    r5, [%[b], #96]\n\t"
10923         "ldr    r7, [%[b], #100]\n\t"
10924         "and    r5, r5, %[m]\n\t"
10925         "and    r7, r7, %[m]\n\t"
10926         "sbcs   r4, r4, r5\n\t"
10927         "sbcs   r6, r6, r7\n\t"
10928         "str    r4, [%[r], #96]\n\t"
10929         "str    r6, [%[r], #100]\n\t"
10930         "ldr    r4, [%[a], #104]\n\t"
10931         "ldr    r6, [%[a], #108]\n\t"
10932         "ldr    r5, [%[b], #104]\n\t"
10933         "ldr    r7, [%[b], #108]\n\t"
10934         "and    r5, r5, %[m]\n\t"
10935         "and    r7, r7, %[m]\n\t"
10936         "sbcs   r4, r4, r5\n\t"
10937         "sbcs   r6, r6, r7\n\t"
10938         "str    r4, [%[r], #104]\n\t"
10939         "str    r6, [%[r], #108]\n\t"
10940         "ldr    r4, [%[a], #112]\n\t"
10941         "ldr    r6, [%[a], #116]\n\t"
10942         "ldr    r5, [%[b], #112]\n\t"
10943         "ldr    r7, [%[b], #116]\n\t"
10944         "and    r5, r5, %[m]\n\t"
10945         "and    r7, r7, %[m]\n\t"
10946         "sbcs   r4, r4, r5\n\t"
10947         "sbcs   r6, r6, r7\n\t"
10948         "str    r4, [%[r], #112]\n\t"
10949         "str    r6, [%[r], #116]\n\t"
10950         "ldr    r4, [%[a], #120]\n\t"
10951         "ldr    r6, [%[a], #124]\n\t"
10952         "ldr    r5, [%[b], #120]\n\t"
10953         "ldr    r7, [%[b], #124]\n\t"
10954         "and    r5, r5, %[m]\n\t"
10955         "and    r7, r7, %[m]\n\t"
10956         "sbcs   r4, r4, r5\n\t"
10957         "sbcs   r6, r6, r7\n\t"
10958         "str    r4, [%[r], #120]\n\t"
10959         "str    r6, [%[r], #124]\n\t"
10960         "ldr    r4, [%[a], #128]\n\t"
10961         "ldr    r6, [%[a], #132]\n\t"
10962         "ldr    r5, [%[b], #128]\n\t"
10963         "ldr    r7, [%[b], #132]\n\t"
10964         "and    r5, r5, %[m]\n\t"
10965         "and    r7, r7, %[m]\n\t"
10966         "sbcs   r4, r4, r5\n\t"
10967         "sbcs   r6, r6, r7\n\t"
10968         "str    r4, [%[r], #128]\n\t"
10969         "str    r6, [%[r], #132]\n\t"
10970         "ldr    r4, [%[a], #136]\n\t"
10971         "ldr    r6, [%[a], #140]\n\t"
10972         "ldr    r5, [%[b], #136]\n\t"
10973         "ldr    r7, [%[b], #140]\n\t"
10974         "and    r5, r5, %[m]\n\t"
10975         "and    r7, r7, %[m]\n\t"
10976         "sbcs   r4, r4, r5\n\t"
10977         "sbcs   r6, r6, r7\n\t"
10978         "str    r4, [%[r], #136]\n\t"
10979         "str    r6, [%[r], #140]\n\t"
10980         "ldr    r4, [%[a], #144]\n\t"
10981         "ldr    r6, [%[a], #148]\n\t"
10982         "ldr    r5, [%[b], #144]\n\t"
10983         "ldr    r7, [%[b], #148]\n\t"
10984         "and    r5, r5, %[m]\n\t"
10985         "and    r7, r7, %[m]\n\t"
10986         "sbcs   r4, r4, r5\n\t"
10987         "sbcs   r6, r6, r7\n\t"
10988         "str    r4, [%[r], #144]\n\t"
10989         "str    r6, [%[r], #148]\n\t"
10990         "ldr    r4, [%[a], #152]\n\t"
10991         "ldr    r6, [%[a], #156]\n\t"
10992         "ldr    r5, [%[b], #152]\n\t"
10993         "ldr    r7, [%[b], #156]\n\t"
10994         "and    r5, r5, %[m]\n\t"
10995         "and    r7, r7, %[m]\n\t"
10996         "sbcs   r4, r4, r5\n\t"
10997         "sbcs   r6, r6, r7\n\t"
10998         "str    r4, [%[r], #152]\n\t"
10999         "str    r6, [%[r], #156]\n\t"
11000         "ldr    r4, [%[a], #160]\n\t"
11001         "ldr    r6, [%[a], #164]\n\t"
11002         "ldr    r5, [%[b], #160]\n\t"
11003         "ldr    r7, [%[b], #164]\n\t"
11004         "and    r5, r5, %[m]\n\t"
11005         "and    r7, r7, %[m]\n\t"
11006         "sbcs   r4, r4, r5\n\t"
11007         "sbcs   r6, r6, r7\n\t"
11008         "str    r4, [%[r], #160]\n\t"
11009         "str    r6, [%[r], #164]\n\t"
11010         "ldr    r4, [%[a], #168]\n\t"
11011         "ldr    r6, [%[a], #172]\n\t"
11012         "ldr    r5, [%[b], #168]\n\t"
11013         "ldr    r7, [%[b], #172]\n\t"
11014         "and    r5, r5, %[m]\n\t"
11015         "and    r7, r7, %[m]\n\t"
11016         "sbcs   r4, r4, r5\n\t"
11017         "sbcs   r6, r6, r7\n\t"
11018         "str    r4, [%[r], #168]\n\t"
11019         "str    r6, [%[r], #172]\n\t"
11020         "ldr    r4, [%[a], #176]\n\t"
11021         "ldr    r6, [%[a], #180]\n\t"
11022         "ldr    r5, [%[b], #176]\n\t"
11023         "ldr    r7, [%[b], #180]\n\t"
11024         "and    r5, r5, %[m]\n\t"
11025         "and    r7, r7, %[m]\n\t"
11026         "sbcs   r4, r4, r5\n\t"
11027         "sbcs   r6, r6, r7\n\t"
11028         "str    r4, [%[r], #176]\n\t"
11029         "str    r6, [%[r], #180]\n\t"
11030         "ldr    r4, [%[a], #184]\n\t"
11031         "ldr    r6, [%[a], #188]\n\t"
11032         "ldr    r5, [%[b], #184]\n\t"
11033         "ldr    r7, [%[b], #188]\n\t"
11034         "and    r5, r5, %[m]\n\t"
11035         "and    r7, r7, %[m]\n\t"
11036         "sbcs   r4, r4, r5\n\t"
11037         "sbcs   r6, r6, r7\n\t"
11038         "str    r4, [%[r], #184]\n\t"
11039         "str    r6, [%[r], #188]\n\t"
11040         "sbc    %[c], r9, r9\n\t"
11041         : [c] "+r" (c)
11042         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
11043         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
11044     );
11045 #endif /* WOLFSSL_SP_SMALL */
11046 
11047     return c;
11048 }
11049 
11050 /* Reduce the number back to 3072 bits using Montgomery reduction.
11051  *
11052  * a   A single precision number to reduce in place.
11053  * m   The single precision number representing the modulus.
11054  * mp  The digit representing the negative inverse of m mod 2^n.
11055  */
11056 SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, sp_digit* m,
11057         sp_digit mp)
11058 {
11059     sp_digit ca = 0;
11060 
11061     __asm__ __volatile__ (
11062         "# i = 0\n\t"
11063         "mov    r12, #0\n\t"
11064         "ldr    r10, [%[a], #0]\n\t"
11065         "ldr    r14, [%[a], #4]\n\t"
11066         "\n1:\n\t"
11067         "# mu = a[i] * mp\n\t"
11068         "mul    r8, %[mp], r10\n\t"
11069         "# a[i+0] += m[0] * mu\n\t"
11070         "ldr    r7, [%[m], #0]\n\t"
11071         "ldr    r9, [%[a], #0]\n\t"
11072         "umull  r6, r7, r8, r7\n\t"
11073         "adds   r10, r10, r6\n\t"
11074         "adc    r5, r7, #0\n\t"
11075         "# a[i+1] += m[1] * mu\n\t"
11076         "ldr    r7, [%[m], #4]\n\t"
11077         "ldr    r9, [%[a], #4]\n\t"
11078         "umull  r6, r7, r8, r7\n\t"
11079         "adds   r10, r14, r6\n\t"
11080         "adc    r4, r7, #0\n\t"
11081         "adds   r10, r10, r5\n\t"
11082         "adc    r4, r4, #0\n\t"
11083         "# a[i+2] += m[2] * mu\n\t"
11084         "ldr    r7, [%[m], #8]\n\t"
11085         "ldr    r14, [%[a], #8]\n\t"
11086         "umull  r6, r7, r8, r7\n\t"
11087         "adds   r14, r14, r6\n\t"
11088         "adc    r5, r7, #0\n\t"
11089         "adds   r14, r14, r4\n\t"
11090         "adc    r5, r5, #0\n\t"
11091         "# a[i+3] += m[3] * mu\n\t"
11092         "ldr    r7, [%[m], #12]\n\t"
11093         "ldr    r9, [%[a], #12]\n\t"
11094         "umull  r6, r7, r8, r7\n\t"
11095         "adds   r9, r9, r6\n\t"
11096         "adc    r4, r7, #0\n\t"
11097         "adds   r9, r9, r5\n\t"
11098         "str    r9, [%[a], #12]\n\t"
11099         "adc    r4, r4, #0\n\t"
11100         "# a[i+4] += m[4] * mu\n\t"
11101         "ldr    r7, [%[m], #16]\n\t"
11102         "ldr    r9, [%[a], #16]\n\t"
11103         "umull  r6, r7, r8, r7\n\t"
11104         "adds   r9, r9, r6\n\t"
11105         "adc    r5, r7, #0\n\t"
11106         "adds   r9, r9, r4\n\t"
11107         "str    r9, [%[a], #16]\n\t"
11108         "adc    r5, r5, #0\n\t"
11109         "# a[i+5] += m[5] * mu\n\t"
11110         "ldr    r7, [%[m], #20]\n\t"
11111         "ldr    r9, [%[a], #20]\n\t"
11112         "umull  r6, r7, r8, r7\n\t"
11113         "adds   r9, r9, r6\n\t"
11114         "adc    r4, r7, #0\n\t"
11115         "adds   r9, r9, r5\n\t"
11116         "str    r9, [%[a], #20]\n\t"
11117         "adc    r4, r4, #0\n\t"
11118         "# a[i+6] += m[6] * mu\n\t"
11119         "ldr    r7, [%[m], #24]\n\t"
11120         "ldr    r9, [%[a], #24]\n\t"
11121         "umull  r6, r7, r8, r7\n\t"
11122         "adds   r9, r9, r6\n\t"
11123         "adc    r5, r7, #0\n\t"
11124         "adds   r9, r9, r4\n\t"
11125         "str    r9, [%[a], #24]\n\t"
11126         "adc    r5, r5, #0\n\t"
11127         "# a[i+7] += m[7] * mu\n\t"
11128         "ldr    r7, [%[m], #28]\n\t"
11129         "ldr    r9, [%[a], #28]\n\t"
11130         "umull  r6, r7, r8, r7\n\t"
11131         "adds   r9, r9, r6\n\t"
11132         "adc    r4, r7, #0\n\t"
11133         "adds   r9, r9, r5\n\t"
11134         "str    r9, [%[a], #28]\n\t"
11135         "adc    r4, r4, #0\n\t"
11136         "# a[i+8] += m[8] * mu\n\t"
11137         "ldr    r7, [%[m], #32]\n\t"
11138         "ldr    r9, [%[a], #32]\n\t"
11139         "umull  r6, r7, r8, r7\n\t"
11140         "adds   r9, r9, r6\n\t"
11141         "adc    r5, r7, #0\n\t"
11142         "adds   r9, r9, r4\n\t"
11143         "str    r9, [%[a], #32]\n\t"
11144         "adc    r5, r5, #0\n\t"
11145         "# a[i+9] += m[9] * mu\n\t"
11146         "ldr    r7, [%[m], #36]\n\t"
11147         "ldr    r9, [%[a], #36]\n\t"
11148         "umull  r6, r7, r8, r7\n\t"
11149         "adds   r9, r9, r6\n\t"
11150         "adc    r4, r7, #0\n\t"
11151         "adds   r9, r9, r5\n\t"
11152         "str    r9, [%[a], #36]\n\t"
11153         "adc    r4, r4, #0\n\t"
11154         "# a[i+10] += m[10] * mu\n\t"
11155         "ldr    r7, [%[m], #40]\n\t"
11156         "ldr    r9, [%[a], #40]\n\t"
11157         "umull  r6, r7, r8, r7\n\t"
11158         "adds   r9, r9, r6\n\t"
11159         "adc    r5, r7, #0\n\t"
11160         "adds   r9, r9, r4\n\t"
11161         "str    r9, [%[a], #40]\n\t"
11162         "adc    r5, r5, #0\n\t"
11163         "# a[i+11] += m[11] * mu\n\t"
11164         "ldr    r7, [%[m], #44]\n\t"
11165         "ldr    r9, [%[a], #44]\n\t"
11166         "umull  r6, r7, r8, r7\n\t"
11167         "adds   r9, r9, r6\n\t"
11168         "adc    r4, r7, #0\n\t"
11169         "adds   r9, r9, r5\n\t"
11170         "str    r9, [%[a], #44]\n\t"
11171         "adc    r4, r4, #0\n\t"
11172         "# a[i+12] += m[12] * mu\n\t"
11173         "ldr    r7, [%[m], #48]\n\t"
11174         "ldr    r9, [%[a], #48]\n\t"
11175         "umull  r6, r7, r8, r7\n\t"
11176         "adds   r9, r9, r6\n\t"
11177         "adc    r5, r7, #0\n\t"
11178         "adds   r9, r9, r4\n\t"
11179         "str    r9, [%[a], #48]\n\t"
11180         "adc    r5, r5, #0\n\t"
11181         "# a[i+13] += m[13] * mu\n\t"
11182         "ldr    r7, [%[m], #52]\n\t"
11183         "ldr    r9, [%[a], #52]\n\t"
11184         "umull  r6, r7, r8, r7\n\t"
11185         "adds   r9, r9, r6\n\t"
11186         "adc    r4, r7, #0\n\t"
11187         "adds   r9, r9, r5\n\t"
11188         "str    r9, [%[a], #52]\n\t"
11189         "adc    r4, r4, #0\n\t"
11190         "# a[i+14] += m[14] * mu\n\t"
11191         "ldr    r7, [%[m], #56]\n\t"
11192         "ldr    r9, [%[a], #56]\n\t"
11193         "umull  r6, r7, r8, r7\n\t"
11194         "adds   r9, r9, r6\n\t"
11195         "adc    r5, r7, #0\n\t"
11196         "adds   r9, r9, r4\n\t"
11197         "str    r9, [%[a], #56]\n\t"
11198         "adc    r5, r5, #0\n\t"
11199         "# a[i+15] += m[15] * mu\n\t"
11200         "ldr    r7, [%[m], #60]\n\t"
11201         "ldr    r9, [%[a], #60]\n\t"
11202         "umull  r6, r7, r8, r7\n\t"
11203         "adds   r9, r9, r6\n\t"
11204         "adc    r4, r7, #0\n\t"
11205         "adds   r9, r9, r5\n\t"
11206         "str    r9, [%[a], #60]\n\t"
11207         "adc    r4, r4, #0\n\t"
11208         "# a[i+16] += m[16] * mu\n\t"
11209         "ldr    r7, [%[m], #64]\n\t"
11210         "ldr    r9, [%[a], #64]\n\t"
11211         "umull  r6, r7, r8, r7\n\t"
11212         "adds   r9, r9, r6\n\t"
11213         "adc    r5, r7, #0\n\t"
11214         "adds   r9, r9, r4\n\t"
11215         "str    r9, [%[a], #64]\n\t"
11216         "adc    r5, r5, #0\n\t"
11217         "# a[i+17] += m[17] * mu\n\t"
11218         "ldr    r7, [%[m], #68]\n\t"
11219         "ldr    r9, [%[a], #68]\n\t"
11220         "umull  r6, r7, r8, r7\n\t"
11221         "adds   r9, r9, r6\n\t"
11222         "adc    r4, r7, #0\n\t"
11223         "adds   r9, r9, r5\n\t"
11224         "str    r9, [%[a], #68]\n\t"
11225         "adc    r4, r4, #0\n\t"
11226         "# a[i+18] += m[18] * mu\n\t"
11227         "ldr    r7, [%[m], #72]\n\t"
11228         "ldr    r9, [%[a], #72]\n\t"
11229         "umull  r6, r7, r8, r7\n\t"
11230         "adds   r9, r9, r6\n\t"
11231         "adc    r5, r7, #0\n\t"
11232         "adds   r9, r9, r4\n\t"
11233         "str    r9, [%[a], #72]\n\t"
11234         "adc    r5, r5, #0\n\t"
11235         "# a[i+19] += m[19] * mu\n\t"
11236         "ldr    r7, [%[m], #76]\n\t"
11237         "ldr    r9, [%[a], #76]\n\t"
11238         "umull  r6, r7, r8, r7\n\t"
11239         "adds   r9, r9, r6\n\t"
11240         "adc    r4, r7, #0\n\t"
11241         "adds   r9, r9, r5\n\t"
11242         "str    r9, [%[a], #76]\n\t"
11243         "adc    r4, r4, #0\n\t"
11244         "# a[i+20] += m[20] * mu\n\t"
11245         "ldr    r7, [%[m], #80]\n\t"
11246         "ldr    r9, [%[a], #80]\n\t"
11247         "umull  r6, r7, r8, r7\n\t"
11248         "adds   r9, r9, r6\n\t"
11249         "adc    r5, r7, #0\n\t"
11250         "adds   r9, r9, r4\n\t"
11251         "str    r9, [%[a], #80]\n\t"
11252         "adc    r5, r5, #0\n\t"
11253         "# a[i+21] += m[21] * mu\n\t"
11254         "ldr    r7, [%[m], #84]\n\t"
11255         "ldr    r9, [%[a], #84]\n\t"
11256         "umull  r6, r7, r8, r7\n\t"
11257         "adds   r9, r9, r6\n\t"
11258         "adc    r4, r7, #0\n\t"
11259         "adds   r9, r9, r5\n\t"
11260         "str    r9, [%[a], #84]\n\t"
11261         "adc    r4, r4, #0\n\t"
11262         "# a[i+22] += m[22] * mu\n\t"
11263         "ldr    r7, [%[m], #88]\n\t"
11264         "ldr    r9, [%[a], #88]\n\t"
11265         "umull  r6, r7, r8, r7\n\t"
11266         "adds   r9, r9, r6\n\t"
11267         "adc    r5, r7, #0\n\t"
11268         "adds   r9, r9, r4\n\t"
11269         "str    r9, [%[a], #88]\n\t"
11270         "adc    r5, r5, #0\n\t"
11271         "# a[i+23] += m[23] * mu\n\t"
11272         "ldr    r7, [%[m], #92]\n\t"
11273         "ldr    r9, [%[a], #92]\n\t"
11274         "umull  r6, r7, r8, r7\n\t"
11275         "adds   r9, r9, r6\n\t"
11276         "adc    r4, r7, #0\n\t"
11277         "adds   r9, r9, r5\n\t"
11278         "str    r9, [%[a], #92]\n\t"
11279         "adc    r4, r4, #0\n\t"
11280         "# a[i+24] += m[24] * mu\n\t"
11281         "ldr    r7, [%[m], #96]\n\t"
11282         "ldr    r9, [%[a], #96]\n\t"
11283         "umull  r6, r7, r8, r7\n\t"
11284         "adds   r9, r9, r6\n\t"
11285         "adc    r5, r7, #0\n\t"
11286         "adds   r9, r9, r4\n\t"
11287         "str    r9, [%[a], #96]\n\t"
11288         "adc    r5, r5, #0\n\t"
11289         "# a[i+25] += m[25] * mu\n\t"
11290         "ldr    r7, [%[m], #100]\n\t"
11291         "ldr    r9, [%[a], #100]\n\t"
11292         "umull  r6, r7, r8, r7\n\t"
11293         "adds   r9, r9, r6\n\t"
11294         "adc    r4, r7, #0\n\t"
11295         "adds   r9, r9, r5\n\t"
11296         "str    r9, [%[a], #100]\n\t"
11297         "adc    r4, r4, #0\n\t"
11298         "# a[i+26] += m[26] * mu\n\t"
11299         "ldr    r7, [%[m], #104]\n\t"
11300         "ldr    r9, [%[a], #104]\n\t"
11301         "umull  r6, r7, r8, r7\n\t"
11302         "adds   r9, r9, r6\n\t"
11303         "adc    r5, r7, #0\n\t"
11304         "adds   r9, r9, r4\n\t"
11305         "str    r9, [%[a], #104]\n\t"
11306         "adc    r5, r5, #0\n\t"
11307         "# a[i+27] += m[27] * mu\n\t"
11308         "ldr    r7, [%[m], #108]\n\t"
11309         "ldr    r9, [%[a], #108]\n\t"
11310         "umull  r6, r7, r8, r7\n\t"
11311         "adds   r9, r9, r6\n\t"
11312         "adc    r4, r7, #0\n\t"
11313         "adds   r9, r9, r5\n\t"
11314         "str    r9, [%[a], #108]\n\t"
11315         "adc    r4, r4, #0\n\t"
11316         "# a[i+28] += m[28] * mu\n\t"
11317         "ldr    r7, [%[m], #112]\n\t"
11318         "ldr    r9, [%[a], #112]\n\t"
11319         "umull  r6, r7, r8, r7\n\t"
11320         "adds   r9, r9, r6\n\t"
11321         "adc    r5, r7, #0\n\t"
11322         "adds   r9, r9, r4\n\t"
11323         "str    r9, [%[a], #112]\n\t"
11324         "adc    r5, r5, #0\n\t"
11325         "# a[i+29] += m[29] * mu\n\t"
11326         "ldr    r7, [%[m], #116]\n\t"
11327         "ldr    r9, [%[a], #116]\n\t"
11328         "umull  r6, r7, r8, r7\n\t"
11329         "adds   r9, r9, r6\n\t"
11330         "adc    r4, r7, #0\n\t"
11331         "adds   r9, r9, r5\n\t"
11332         "str    r9, [%[a], #116]\n\t"
11333         "adc    r4, r4, #0\n\t"
11334         "# a[i+30] += m[30] * mu\n\t"
11335         "ldr    r7, [%[m], #120]\n\t"
11336         "ldr    r9, [%[a], #120]\n\t"
11337         "umull  r6, r7, r8, r7\n\t"
11338         "adds   r9, r9, r6\n\t"
11339         "adc    r5, r7, #0\n\t"
11340         "adds   r9, r9, r4\n\t"
11341         "str    r9, [%[a], #120]\n\t"
11342         "adc    r5, r5, #0\n\t"
11343         "# a[i+31] += m[31] * mu\n\t"
11344         "ldr    r7, [%[m], #124]\n\t"
11345         "ldr    r9, [%[a], #124]\n\t"
11346         "umull  r6, r7, r8, r7\n\t"
11347         "adds   r9, r9, r6\n\t"
11348         "adc    r4, r7, #0\n\t"
11349         "adds   r9, r9, r5\n\t"
11350         "str    r9, [%[a], #124]\n\t"
11351         "adc    r4, r4, #0\n\t"
11352         "# a[i+32] += m[32] * mu\n\t"
11353         "ldr    r7, [%[m], #128]\n\t"
11354         "ldr    r9, [%[a], #128]\n\t"
11355         "umull  r6, r7, r8, r7\n\t"
11356         "adds   r9, r9, r6\n\t"
11357         "adc    r5, r7, #0\n\t"
11358         "adds   r9, r9, r4\n\t"
11359         "str    r9, [%[a], #128]\n\t"
11360         "adc    r5, r5, #0\n\t"
11361         "# a[i+33] += m[33] * mu\n\t"
11362         "ldr    r7, [%[m], #132]\n\t"
11363         "ldr    r9, [%[a], #132]\n\t"
11364         "umull  r6, r7, r8, r7\n\t"
11365         "adds   r9, r9, r6\n\t"
11366         "adc    r4, r7, #0\n\t"
11367         "adds   r9, r9, r5\n\t"
11368         "str    r9, [%[a], #132]\n\t"
11369         "adc    r4, r4, #0\n\t"
11370         "# a[i+34] += m[34] * mu\n\t"
11371         "ldr    r7, [%[m], #136]\n\t"
11372         "ldr    r9, [%[a], #136]\n\t"
11373         "umull  r6, r7, r8, r7\n\t"
11374         "adds   r9, r9, r6\n\t"
11375         "adc    r5, r7, #0\n\t"
11376         "adds   r9, r9, r4\n\t"
11377         "str    r9, [%[a], #136]\n\t"
11378         "adc    r5, r5, #0\n\t"
11379         "# a[i+35] += m[35] * mu\n\t"
11380         "ldr    r7, [%[m], #140]\n\t"
11381         "ldr    r9, [%[a], #140]\n\t"
11382         "umull  r6, r7, r8, r7\n\t"
11383         "adds   r9, r9, r6\n\t"
11384         "adc    r4, r7, #0\n\t"
11385         "adds   r9, r9, r5\n\t"
11386         "str    r9, [%[a], #140]\n\t"
11387         "adc    r4, r4, #0\n\t"
11388         "# a[i+36] += m[36] * mu\n\t"
11389         "ldr    r7, [%[m], #144]\n\t"
11390         "ldr    r9, [%[a], #144]\n\t"
11391         "umull  r6, r7, r8, r7\n\t"
11392         "adds   r9, r9, r6\n\t"
11393         "adc    r5, r7, #0\n\t"
11394         "adds   r9, r9, r4\n\t"
11395         "str    r9, [%[a], #144]\n\t"
11396         "adc    r5, r5, #0\n\t"
11397         "# a[i+37] += m[37] * mu\n\t"
11398         "ldr    r7, [%[m], #148]\n\t"
11399         "ldr    r9, [%[a], #148]\n\t"
11400         "umull  r6, r7, r8, r7\n\t"
11401         "adds   r9, r9, r6\n\t"
11402         "adc    r4, r7, #0\n\t"
11403         "adds   r9, r9, r5\n\t"
11404         "str    r9, [%[a], #148]\n\t"
11405         "adc    r4, r4, #0\n\t"
11406         "# a[i+38] += m[38] * mu\n\t"
11407         "ldr    r7, [%[m], #152]\n\t"
11408         "ldr    r9, [%[a], #152]\n\t"
11409         "umull  r6, r7, r8, r7\n\t"
11410         "adds   r9, r9, r6\n\t"
11411         "adc    r5, r7, #0\n\t"
11412         "adds   r9, r9, r4\n\t"
11413         "str    r9, [%[a], #152]\n\t"
11414         "adc    r5, r5, #0\n\t"
11415         "# a[i+39] += m[39] * mu\n\t"
11416         "ldr    r7, [%[m], #156]\n\t"
11417         "ldr    r9, [%[a], #156]\n\t"
11418         "umull  r6, r7, r8, r7\n\t"
11419         "adds   r9, r9, r6\n\t"
11420         "adc    r4, r7, #0\n\t"
11421         "adds   r9, r9, r5\n\t"
11422         "str    r9, [%[a], #156]\n\t"
11423         "adc    r4, r4, #0\n\t"
11424         "# a[i+40] += m[40] * mu\n\t"
11425         "ldr    r7, [%[m], #160]\n\t"
11426         "ldr    r9, [%[a], #160]\n\t"
11427         "umull  r6, r7, r8, r7\n\t"
11428         "adds   r9, r9, r6\n\t"
11429         "adc    r5, r7, #0\n\t"
11430         "adds   r9, r9, r4\n\t"
11431         "str    r9, [%[a], #160]\n\t"
11432         "adc    r5, r5, #0\n\t"
11433         "# a[i+41] += m[41] * mu\n\t"
11434         "ldr    r7, [%[m], #164]\n\t"
11435         "ldr    r9, [%[a], #164]\n\t"
11436         "umull  r6, r7, r8, r7\n\t"
11437         "adds   r9, r9, r6\n\t"
11438         "adc    r4, r7, #0\n\t"
11439         "adds   r9, r9, r5\n\t"
11440         "str    r9, [%[a], #164]\n\t"
11441         "adc    r4, r4, #0\n\t"
11442         "# a[i+42] += m[42] * mu\n\t"
11443         "ldr    r7, [%[m], #168]\n\t"
11444         "ldr    r9, [%[a], #168]\n\t"
11445         "umull  r6, r7, r8, r7\n\t"
11446         "adds   r9, r9, r6\n\t"
11447         "adc    r5, r7, #0\n\t"
11448         "adds   r9, r9, r4\n\t"
11449         "str    r9, [%[a], #168]\n\t"
11450         "adc    r5, r5, #0\n\t"
11451         "# a[i+43] += m[43] * mu\n\t"
11452         "ldr    r7, [%[m], #172]\n\t"
11453         "ldr    r9, [%[a], #172]\n\t"
11454         "umull  r6, r7, r8, r7\n\t"
11455         "adds   r9, r9, r6\n\t"
11456         "adc    r4, r7, #0\n\t"
11457         "adds   r9, r9, r5\n\t"
11458         "str    r9, [%[a], #172]\n\t"
11459         "adc    r4, r4, #0\n\t"
11460         "# a[i+44] += m[44] * mu\n\t"
11461         "ldr    r7, [%[m], #176]\n\t"
11462         "ldr    r9, [%[a], #176]\n\t"
11463         "umull  r6, r7, r8, r7\n\t"
11464         "adds   r9, r9, r6\n\t"
11465         "adc    r5, r7, #0\n\t"
11466         "adds   r9, r9, r4\n\t"
11467         "str    r9, [%[a], #176]\n\t"
11468         "adc    r5, r5, #0\n\t"
11469         "# a[i+45] += m[45] * mu\n\t"
11470         "ldr    r7, [%[m], #180]\n\t"
11471         "ldr    r9, [%[a], #180]\n\t"
11472         "umull  r6, r7, r8, r7\n\t"
11473         "adds   r9, r9, r6\n\t"
11474         "adc    r4, r7, #0\n\t"
11475         "adds   r9, r9, r5\n\t"
11476         "str    r9, [%[a], #180]\n\t"
11477         "adc    r4, r4, #0\n\t"
11478         "# a[i+46] += m[46] * mu\n\t"
11479         "ldr    r7, [%[m], #184]\n\t"
11480         "ldr    r9, [%[a], #184]\n\t"
11481         "umull  r6, r7, r8, r7\n\t"
11482         "adds   r9, r9, r6\n\t"
11483         "adc    r5, r7, #0\n\t"
11484         "adds   r9, r9, r4\n\t"
11485         "str    r9, [%[a], #184]\n\t"
11486         "adc    r5, r5, #0\n\t"
11487         "# a[i+47] += m[47] * mu\n\t"
11488         "ldr    r7, [%[m], #188]\n\t"
11489         "ldr   r9, [%[a], #188]\n\t"
11490         "umull  r6, r7, r8, r7\n\t"
11491         "adds   r5, r5, r6\n\t"
11492         "adcs   r7, r7, %[ca]\n\t"
11493         "mov    %[ca], #0\n\t"
11494         "adc    %[ca], %[ca], %[ca]\n\t"
11495         "adds   r9, r9, r5\n\t"
11496         "str    r9, [%[a], #188]\n\t"
11497         "ldr    r9, [%[a], #192]\n\t"
11498         "adcs   r9, r9, r7\n\t"
11499         "str    r9, [%[a], #192]\n\t"
11500         "adc    %[ca], %[ca], #0\n\t"
11501         "# i += 1\n\t"
11502         "add    %[a], %[a], #4\n\t"
11503         "add    r12, r12, #4\n\t"
11504         "cmp    r12, #192\n\t"
11505         "blt    1b\n\t"
11506         "str    r10, [%[a], #0]\n\t"
11507         "str    r14, [%[a], #4]\n\t"
11508         : [ca] "+r" (ca), [a] "+r" (a)
11509         : [m] "r" (m), [mp] "r" (mp)
11510         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
11511     );
11512 
11513     sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
11514 }
11515 
11516 /* Multiply two Montogmery form numbers mod the modulus (prime).
11517  * (r = a * b mod m)
11518  *
11519  * r   Result of multiplication.
11520  * a   First number to multiply in Montogmery form.
11521  * b   Second number to multiply in Montogmery form.
11522  * m   Modulus (prime).
11523  * mp  Montogmery mulitplier.
11524  */
11525 static void sp_3072_mont_mul_48(sp_digit* r, sp_digit* a, sp_digit* b,
11526         sp_digit* m, sp_digit mp)
11527 {
11528     sp_3072_mul_48(r, a, b);
11529     sp_3072_mont_reduce_48(r, m, mp);
11530 }
11531 
11532 /* Square the Montgomery form number. (r = a * a mod m)
11533  *
11534  * r   Result of squaring.
11535  * a   Number to square in Montogmery form.
11536  * m   Modulus (prime).
11537  * mp  Montogmery mulitplier.
11538  */
11539 static void sp_3072_mont_sqr_48(sp_digit* r, sp_digit* a, sp_digit* m,
11540         sp_digit mp)
11541 {
11542     sp_3072_sqr_48(r, a);
11543     sp_3072_mont_reduce_48(r, m, mp);
11544 }
11545 
11546 /* Mul a by digit b into r. (r = a * b)
11547  *
11548  * r  A single precision integer.
11549  * a  A single precision integer.
11550  * b  A single precision digit.
11551  */
11552 static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
11553         const sp_digit b)
11554 {
11555 #ifdef WOLFSSL_SP_SMALL
11556     __asm__ __volatile__ (
11557         "mov    r10, #0\n\t"
11558         "# A[0] * B\n\t"
11559         "ldr    r8, [%[a]]\n\t"
11560         "umull  r5, r3, %[b], r8\n\t"
11561         "mov    r4, #0\n\t"
11562         "str    r5, [%[r]]\n\t"
11563         "mov    r5, #0\n\t"
11564         "mov    r9, #4\n\t"
11565         "1:\n\t"
11566         "ldr    r8, [%[a], r9]\n\t"
11567         "umull  r6, r7, %[b], r8\n\t"
11568         "adds   r3, r3, r6\n\t"
11569         "adcs   r4, r4, r7\n\t"
11570         "adc    r5, r10, r10\n\t"
11571         "str    r3, [%[r], r9]\n\t"
11572         "mov    r3, r4\n\t"
11573         "mov    r4, r5\n\t"
11574         "mov    r5, #0\n\t"
11575         "add    r9, r9, #4\n\t"
11576         "cmp    r9, #192\n\t"
11577         "blt    1b\n\t"
11578         "str    r3, [%[r], #192]\n\t"
11579         :
11580         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
11581         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
11582     );
11583 #else
11584     __asm__ __volatile__ (
11585         "mov    r10, #0\n\t"
11586         "# A[0] * B\n\t"
11587         "ldr    r8, [%[a]]\n\t"
11588         "umull  r3, r4, %[b], r8\n\t"
11589         "mov    r5, #0\n\t"
11590         "str    r3, [%[r]]\n\t"
11591         "# A[1] * B\n\t"
11592         "ldr    r8, [%[a], #4]\n\t"
11593         "mov    r3, #0\n\t"
11594         "umull  r6, r7, %[b], r8\n\t"
11595         "adds   r4, r4, r6\n\t"
11596         "adcs   r5, r5, r7\n\t"
11597         "adc    r3, r10, r10\n\t"
11598         "str    r4, [%[r], #4]\n\t"
11599         "# A[2] * B\n\t"
11600         "ldr    r8, [%[a], #8]\n\t"
11601         "mov    r4, #0\n\t"
11602         "umull  r6, r7, %[b], r8\n\t"
11603         "adds   r5, r5, r6\n\t"
11604         "adcs   r3, r3, r7\n\t"
11605         "adc    r4, r10, r10\n\t"
11606         "str    r5, [%[r], #8]\n\t"
11607         "# A[3] * B\n\t"
11608         "ldr    r8, [%[a], #12]\n\t"
11609         "mov    r5, #0\n\t"
11610         "umull  r6, r7, %[b], r8\n\t"
11611         "adds   r3, r3, r6\n\t"
11612         "adcs   r4, r4, r7\n\t"
11613         "adc    r5, r10, r10\n\t"
11614         "str    r3, [%[r], #12]\n\t"
11615         "# A[4] * B\n\t"
11616         "ldr    r8, [%[a], #16]\n\t"
11617         "mov    r3, #0\n\t"
11618         "umull  r6, r7, %[b], r8\n\t"
11619         "adds   r4, r4, r6\n\t"
11620         "adcs   r5, r5, r7\n\t"
11621         "adc    r3, r10, r10\n\t"
11622         "str    r4, [%[r], #16]\n\t"
11623         "# A[5] * B\n\t"
11624         "ldr    r8, [%[a], #20]\n\t"
11625         "mov    r4, #0\n\t"
11626         "umull  r6, r7, %[b], r8\n\t"
11627         "adds   r5, r5, r6\n\t"
11628         "adcs   r3, r3, r7\n\t"
11629         "adc    r4, r10, r10\n\t"
11630         "str    r5, [%[r], #20]\n\t"
11631         "# A[6] * B\n\t"
11632         "ldr    r8, [%[a], #24]\n\t"
11633         "mov    r5, #0\n\t"
11634         "umull  r6, r7, %[b], r8\n\t"
11635         "adds   r3, r3, r6\n\t"
11636         "adcs   r4, r4, r7\n\t"
11637         "adc    r5, r10, r10\n\t"
11638         "str    r3, [%[r], #24]\n\t"
11639         "# A[7] * B\n\t"
11640         "ldr    r8, [%[a], #28]\n\t"
11641         "mov    r3, #0\n\t"
11642         "umull  r6, r7, %[b], r8\n\t"
11643         "adds   r4, r4, r6\n\t"
11644         "adcs   r5, r5, r7\n\t"
11645         "adc    r3, r10, r10\n\t"
11646         "str    r4, [%[r], #28]\n\t"
11647         "# A[8] * B\n\t"
11648         "ldr    r8, [%[a], #32]\n\t"
11649         "mov    r4, #0\n\t"
11650         "umull  r6, r7, %[b], r8\n\t"
11651         "adds   r5, r5, r6\n\t"
11652         "adcs   r3, r3, r7\n\t"
11653         "adc    r4, r10, r10\n\t"
11654         "str    r5, [%[r], #32]\n\t"
11655         "# A[9] * B\n\t"
11656         "ldr    r8, [%[a], #36]\n\t"
11657         "mov    r5, #0\n\t"
11658         "umull  r6, r7, %[b], r8\n\t"
11659         "adds   r3, r3, r6\n\t"
11660         "adcs   r4, r4, r7\n\t"
11661         "adc    r5, r10, r10\n\t"
11662         "str    r3, [%[r], #36]\n\t"
11663         "# A[10] * B\n\t"
11664         "ldr    r8, [%[a], #40]\n\t"
11665         "mov    r3, #0\n\t"
11666         "umull  r6, r7, %[b], r8\n\t"
11667         "adds   r4, r4, r6\n\t"
11668         "adcs   r5, r5, r7\n\t"
11669         "adc    r3, r10, r10\n\t"
11670         "str    r4, [%[r], #40]\n\t"
11671         "# A[11] * B\n\t"
11672         "ldr    r8, [%[a], #44]\n\t"
11673         "mov    r4, #0\n\t"
11674         "umull  r6, r7, %[b], r8\n\t"
11675         "adds   r5, r5, r6\n\t"
11676         "adcs   r3, r3, r7\n\t"
11677         "adc    r4, r10, r10\n\t"
11678         "str    r5, [%[r], #44]\n\t"
11679         "# A[12] * B\n\t"
11680         "ldr    r8, [%[a], #48]\n\t"
11681         "mov    r5, #0\n\t"
11682         "umull  r6, r7, %[b], r8\n\t"
11683         "adds   r3, r3, r6\n\t"
11684         "adcs   r4, r4, r7\n\t"
11685         "adc    r5, r10, r10\n\t"
11686         "str    r3, [%[r], #48]\n\t"
11687         "# A[13] * B\n\t"
11688         "ldr    r8, [%[a], #52]\n\t"
11689         "mov    r3, #0\n\t"
11690         "umull  r6, r7, %[b], r8\n\t"
11691         "adds   r4, r4, r6\n\t"
11692         "adcs   r5, r5, r7\n\t"
11693         "adc    r3, r10, r10\n\t"
11694         "str    r4, [%[r], #52]\n\t"
11695         "# A[14] * B\n\t"
11696         "ldr    r8, [%[a], #56]\n\t"
11697         "mov    r4, #0\n\t"
11698         "umull  r6, r7, %[b], r8\n\t"
11699         "adds   r5, r5, r6\n\t"
11700         "adcs   r3, r3, r7\n\t"
11701         "adc    r4, r10, r10\n\t"
11702         "str    r5, [%[r], #56]\n\t"
11703         "# A[15] * B\n\t"
11704         "ldr    r8, [%[a], #60]\n\t"
11705         "mov    r5, #0\n\t"
11706         "umull  r6, r7, %[b], r8\n\t"
11707         "adds   r3, r3, r6\n\t"
11708         "adcs   r4, r4, r7\n\t"
11709         "adc    r5, r10, r10\n\t"
11710         "str    r3, [%[r], #60]\n\t"
11711         "# A[16] * B\n\t"
11712         "ldr    r8, [%[a], #64]\n\t"
11713         "mov    r3, #0\n\t"
11714         "umull  r6, r7, %[b], r8\n\t"
11715         "adds   r4, r4, r6\n\t"
11716         "adcs   r5, r5, r7\n\t"
11717         "adc    r3, r10, r10\n\t"
11718         "str    r4, [%[r], #64]\n\t"
11719         "# A[17] * B\n\t"
11720         "ldr    r8, [%[a], #68]\n\t"
11721         "mov    r4, #0\n\t"
11722         "umull  r6, r7, %[b], r8\n\t"
11723         "adds   r5, r5, r6\n\t"
11724         "adcs   r3, r3, r7\n\t"
11725         "adc    r4, r10, r10\n\t"
11726         "str    r5, [%[r], #68]\n\t"
11727         "# A[18] * B\n\t"
11728         "ldr    r8, [%[a], #72]\n\t"
11729         "mov    r5, #0\n\t"
11730         "umull  r6, r7, %[b], r8\n\t"
11731         "adds   r3, r3, r6\n\t"
11732         "adcs   r4, r4, r7\n\t"
11733         "adc    r5, r10, r10\n\t"
11734         "str    r3, [%[r], #72]\n\t"
11735         "# A[19] * B\n\t"
11736         "ldr    r8, [%[a], #76]\n\t"
11737         "mov    r3, #0\n\t"
11738         "umull  r6, r7, %[b], r8\n\t"
11739         "adds   r4, r4, r6\n\t"
11740         "adcs   r5, r5, r7\n\t"
11741         "adc    r3, r10, r10\n\t"
11742         "str    r4, [%[r], #76]\n\t"
11743         "# A[20] * B\n\t"
11744         "ldr    r8, [%[a], #80]\n\t"
11745         "mov    r4, #0\n\t"
11746         "umull  r6, r7, %[b], r8\n\t"
11747         "adds   r5, r5, r6\n\t"
11748         "adcs   r3, r3, r7\n\t"
11749         "adc    r4, r10, r10\n\t"
11750         "str    r5, [%[r], #80]\n\t"
11751         "# A[21] * B\n\t"
11752         "ldr    r8, [%[a], #84]\n\t"
11753         "mov    r5, #0\n\t"
11754         "umull  r6, r7, %[b], r8\n\t"
11755         "adds   r3, r3, r6\n\t"
11756         "adcs   r4, r4, r7\n\t"
11757         "adc    r5, r10, r10\n\t"
11758         "str    r3, [%[r], #84]\n\t"
11759         "# A[22] * B\n\t"
11760         "ldr    r8, [%[a], #88]\n\t"
11761         "mov    r3, #0\n\t"
11762         "umull  r6, r7, %[b], r8\n\t"
11763         "adds   r4, r4, r6\n\t"
11764         "adcs   r5, r5, r7\n\t"
11765         "adc    r3, r10, r10\n\t"
11766         "str    r4, [%[r], #88]\n\t"
11767         "# A[23] * B\n\t"
11768         "ldr    r8, [%[a], #92]\n\t"
11769         "mov    r4, #0\n\t"
11770         "umull  r6, r7, %[b], r8\n\t"
11771         "adds   r5, r5, r6\n\t"
11772         "adcs   r3, r3, r7\n\t"
11773         "adc    r4, r10, r10\n\t"
11774         "str    r5, [%[r], #92]\n\t"
11775         "# A[24] * B\n\t"
11776         "ldr    r8, [%[a], #96]\n\t"
11777         "mov    r5, #0\n\t"
11778         "umull  r6, r7, %[b], r8\n\t"
11779         "adds   r3, r3, r6\n\t"
11780         "adcs   r4, r4, r7\n\t"
11781         "adc    r5, r10, r10\n\t"
11782         "str    r3, [%[r], #96]\n\t"
11783         "# A[25] * B\n\t"
11784         "ldr    r8, [%[a], #100]\n\t"
11785         "mov    r3, #0\n\t"
11786         "umull  r6, r7, %[b], r8\n\t"
11787         "adds   r4, r4, r6\n\t"
11788         "adcs   r5, r5, r7\n\t"
11789         "adc    r3, r10, r10\n\t"
11790         "str    r4, [%[r], #100]\n\t"
11791         "# A[26] * B\n\t"
11792         "ldr    r8, [%[a], #104]\n\t"
11793         "mov    r4, #0\n\t"
11794         "umull  r6, r7, %[b], r8\n\t"
11795         "adds   r5, r5, r6\n\t"
11796         "adcs   r3, r3, r7\n\t"
11797         "adc    r4, r10, r10\n\t"
11798         "str    r5, [%[r], #104]\n\t"
11799         "# A[27] * B\n\t"
11800         "ldr    r8, [%[a], #108]\n\t"
11801         "mov    r5, #0\n\t"
11802         "umull  r6, r7, %[b], r8\n\t"
11803         "adds   r3, r3, r6\n\t"
11804         "adcs   r4, r4, r7\n\t"
11805         "adc    r5, r10, r10\n\t"
11806         "str    r3, [%[r], #108]\n\t"
11807         "# A[28] * B\n\t"
11808         "ldr    r8, [%[a], #112]\n\t"
11809         "mov    r3, #0\n\t"
11810         "umull  r6, r7, %[b], r8\n\t"
11811         "adds   r4, r4, r6\n\t"
11812         "adcs   r5, r5, r7\n\t"
11813         "adc    r3, r10, r10\n\t"
11814         "str    r4, [%[r], #112]\n\t"
11815         "# A[29] * B\n\t"
11816         "ldr    r8, [%[a], #116]\n\t"
11817         "mov    r4, #0\n\t"
11818         "umull  r6, r7, %[b], r8\n\t"
11819         "adds   r5, r5, r6\n\t"
11820         "adcs   r3, r3, r7\n\t"
11821         "adc    r4, r10, r10\n\t"
11822         "str    r5, [%[r], #116]\n\t"
11823         "# A[30] * B\n\t"
11824         "ldr    r8, [%[a], #120]\n\t"
11825         "mov    r5, #0\n\t"
11826         "umull  r6, r7, %[b], r8\n\t"
11827         "adds   r3, r3, r6\n\t"
11828         "adcs   r4, r4, r7\n\t"
11829         "adc    r5, r10, r10\n\t"
11830         "str    r3, [%[r], #120]\n\t"
11831         "# A[31] * B\n\t"
11832         "ldr    r8, [%[a], #124]\n\t"
11833         "mov    r3, #0\n\t"
11834         "umull  r6, r7, %[b], r8\n\t"
11835         "adds   r4, r4, r6\n\t"
11836         "adcs   r5, r5, r7\n\t"
11837         "adc    r3, r10, r10\n\t"
11838         "str    r4, [%[r], #124]\n\t"
11839         "# A[32] * B\n\t"
11840         "ldr    r8, [%[a], #128]\n\t"
11841         "mov    r4, #0\n\t"
11842         "umull  r6, r7, %[b], r8\n\t"
11843         "adds   r5, r5, r6\n\t"
11844         "adcs   r3, r3, r7\n\t"
11845         "adc    r4, r10, r10\n\t"
11846         "str    r5, [%[r], #128]\n\t"
11847         "# A[33] * B\n\t"
11848         "ldr    r8, [%[a], #132]\n\t"
11849         "mov    r5, #0\n\t"
11850         "umull  r6, r7, %[b], r8\n\t"
11851         "adds   r3, r3, r6\n\t"
11852         "adcs   r4, r4, r7\n\t"
11853         "adc    r5, r10, r10\n\t"
11854         "str    r3, [%[r], #132]\n\t"
11855         "# A[34] * B\n\t"
11856         "ldr    r8, [%[a], #136]\n\t"
11857         "mov    r3, #0\n\t"
11858         "umull  r6, r7, %[b], r8\n\t"
11859         "adds   r4, r4, r6\n\t"
11860         "adcs   r5, r5, r7\n\t"
11861         "adc    r3, r10, r10\n\t"
11862         "str    r4, [%[r], #136]\n\t"
11863         "# A[35] * B\n\t"
11864         "ldr    r8, [%[a], #140]\n\t"
11865         "mov    r4, #0\n\t"
11866         "umull  r6, r7, %[b], r8\n\t"
11867         "adds   r5, r5, r6\n\t"
11868         "adcs   r3, r3, r7\n\t"
11869         "adc    r4, r10, r10\n\t"
11870         "str    r5, [%[r], #140]\n\t"
11871         "# A[36] * B\n\t"
11872         "ldr    r8, [%[a], #144]\n\t"
11873         "mov    r5, #0\n\t"
11874         "umull  r6, r7, %[b], r8\n\t"
11875         "adds   r3, r3, r6\n\t"
11876         "adcs   r4, r4, r7\n\t"
11877         "adc    r5, r10, r10\n\t"
11878         "str    r3, [%[r], #144]\n\t"
11879         "# A[37] * B\n\t"
11880         "ldr    r8, [%[a], #148]\n\t"
11881         "mov    r3, #0\n\t"
11882         "umull  r6, r7, %[b], r8\n\t"
11883         "adds   r4, r4, r6\n\t"
11884         "adcs   r5, r5, r7\n\t"
11885         "adc    r3, r10, r10\n\t"
11886         "str    r4, [%[r], #148]\n\t"
11887         "# A[38] * B\n\t"
11888         "ldr    r8, [%[a], #152]\n\t"
11889         "mov    r4, #0\n\t"
11890         "umull  r6, r7, %[b], r8\n\t"
11891         "adds   r5, r5, r6\n\t"
11892         "adcs   r3, r3, r7\n\t"
11893         "adc    r4, r10, r10\n\t"
11894         "str    r5, [%[r], #152]\n\t"
11895         "# A[39] * B\n\t"
11896         "ldr    r8, [%[a], #156]\n\t"
11897         "mov    r5, #0\n\t"
11898         "umull  r6, r7, %[b], r8\n\t"
11899         "adds   r3, r3, r6\n\t"
11900         "adcs   r4, r4, r7\n\t"
11901         "adc    r5, r10, r10\n\t"
11902         "str    r3, [%[r], #156]\n\t"
11903         "# A[40] * B\n\t"
11904         "ldr    r8, [%[a], #160]\n\t"
11905         "mov    r3, #0\n\t"
11906         "umull  r6, r7, %[b], r8\n\t"
11907         "adds   r4, r4, r6\n\t"
11908         "adcs   r5, r5, r7\n\t"
11909         "adc    r3, r10, r10\n\t"
11910         "str    r4, [%[r], #160]\n\t"
11911         "# A[41] * B\n\t"
11912         "ldr    r8, [%[a], #164]\n\t"
11913         "mov    r4, #0\n\t"
11914         "umull  r6, r7, %[b], r8\n\t"
11915         "adds   r5, r5, r6\n\t"
11916         "adcs   r3, r3, r7\n\t"
11917         "adc    r4, r10, r10\n\t"
11918         "str    r5, [%[r], #164]\n\t"
11919         "# A[42] * B\n\t"
11920         "ldr    r8, [%[a], #168]\n\t"
11921         "mov    r5, #0\n\t"
11922         "umull  r6, r7, %[b], r8\n\t"
11923         "adds   r3, r3, r6\n\t"
11924         "adcs   r4, r4, r7\n\t"
11925         "adc    r5, r10, r10\n\t"
11926         "str    r3, [%[r], #168]\n\t"
11927         "# A[43] * B\n\t"
11928         "ldr    r8, [%[a], #172]\n\t"
11929         "mov    r3, #0\n\t"
11930         "umull  r6, r7, %[b], r8\n\t"
11931         "adds   r4, r4, r6\n\t"
11932         "adcs   r5, r5, r7\n\t"
11933         "adc    r3, r10, r10\n\t"
11934         "str    r4, [%[r], #172]\n\t"
11935         "# A[44] * B\n\t"
11936         "ldr    r8, [%[a], #176]\n\t"
11937         "mov    r4, #0\n\t"
11938         "umull  r6, r7, %[b], r8\n\t"
11939         "adds   r5, r5, r6\n\t"
11940         "adcs   r3, r3, r7\n\t"
11941         "adc    r4, r10, r10\n\t"
11942         "str    r5, [%[r], #176]\n\t"
11943         "# A[45] * B\n\t"
11944         "ldr    r8, [%[a], #180]\n\t"
11945         "mov    r5, #0\n\t"
11946         "umull  r6, r7, %[b], r8\n\t"
11947         "adds   r3, r3, r6\n\t"
11948         "adcs   r4, r4, r7\n\t"
11949         "adc    r5, r10, r10\n\t"
11950         "str    r3, [%[r], #180]\n\t"
11951         "# A[46] * B\n\t"
11952         "ldr    r8, [%[a], #184]\n\t"
11953         "mov    r3, #0\n\t"
11954         "umull  r6, r7, %[b], r8\n\t"
11955         "adds   r4, r4, r6\n\t"
11956         "adcs   r5, r5, r7\n\t"
11957         "adc    r3, r10, r10\n\t"
11958         "str    r4, [%[r], #184]\n\t"
11959         "# A[47] * B\n\t"
11960         "ldr    r8, [%[a], #188]\n\t"
11961         "umull  r6, r7, %[b], r8\n\t"
11962         "adds   r5, r5, r6\n\t"
11963         "adc    r3, r3, r7\n\t"
11964         "str    r5, [%[r], #188]\n\t"
11965         "str    r3, [%[r], #192]\n\t"
11966         :
11967         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
11968         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
11969     );
11970 #endif
11971 }
11972 
11973 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
11974  *
11975  * d1   The high order half of the number to divide.
11976  * d0   The low order half of the number to divide.
11977  * div  The dividend.
11978  * returns the result of the division.
11979  *
11980  * Note that this is an approximate div. It may give an answer 1 larger.
11981  */
11982 static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div)
11983 {
11984     sp_digit r = 0;
11985 
11986     __asm__ __volatile__ (
11987         "lsr    r5, %[div], #1\n\t"
11988         "add    r5, r5, #1\n\t"
11989         "mov    r6, %[d0]\n\t"
11990         "mov    r7, %[d1]\n\t"
11991         "# Do top 32\n\t"
11992         "subs   r8, r5, r7\n\t"
11993         "sbc    r8, r8, r8\n\t"
11994         "add    %[r], %[r], %[r]\n\t"
11995         "sub    %[r], %[r], r8\n\t"
11996         "and    r8, r8, r5\n\t"
11997         "subs   r7, r7, r8\n\t"
11998         "# Next 30 bits\n\t"
11999         "mov    r4, #29\n\t"
12000         "1:\n\t"
12001         "movs   r6, r6, lsl #1\n\t"
12002         "adc    r7, r7, r7\n\t"
12003         "subs   r8, r5, r7\n\t"
12004         "sbc    r8, r8, r8\n\t"
12005         "add    %[r], %[r], %[r]\n\t"
12006         "sub    %[r], %[r], r8\n\t"
12007         "and    r8, r8, r5\n\t"
12008         "subs   r7, r7, r8\n\t"
12009         "subs   r4, r4, #1\n\t"
12010         "bpl    1b\n\t"
12011         "add    %[r], %[r], %[r]\n\t"
12012         "add    %[r], %[r], #1\n\t"
12013         "umull  r4, r5, %[r], %[div]\n\t"
12014         "subs   r4, %[d0], r4\n\t"
12015         "sbc    r5, %[d1], r5\n\t"
12016         "add    %[r], %[r], r5\n\t"
12017         "umull  r4, r5, %[r], %[div]\n\t"
12018         "subs   r4, %[d0], r4\n\t"
12019         "sbc    r5, %[d1], r5\n\t"
12020         "add    %[r], %[r], r5\n\t"
12021         "subs   r8, %[div], r4\n\t"
12022         "sbc    r8, r8, r8\n\t"
12023         "sub    %[r], %[r], r8\n\t"
12024         : [r] "+r" (r)
12025         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
12026         : "r4", "r5", "r6", "r7", "r8"
12027     );
12028     return r;
12029 }
12030 
12031 /* Compare a with b in constant time.
12032  *
12033  * a  A single precision integer.
12034  * b  A single precision integer.
12035  * return -ve, 0 or +ve if a is less than, equal to or greater than b
12036  * respectively.
12037  */
12038 static int32_t sp_3072_cmp_48(sp_digit* a, sp_digit* b)
12039 {
12040     sp_digit r = -1;
12041     sp_digit one = 1;
12042 
12043 #ifdef WOLFSSL_SP_SMALL
12044     __asm__ __volatile__ (
12045         "mov    r7, #0\n\t"
12046         "mov    r3, #-1\n\t"
12047         "mov    r6, #188\n\t"
12048         "1:\n\t"
12049         "ldr    r4, [%[a], r6]\n\t"
12050         "ldr    r5, [%[b], r6]\n\t"
12051         "and    r4, r4, r3\n\t"
12052         "and    r5, r5, r3\n\t"
12053         "subs   r4, r4, r5\n\t"
12054         "movhi  %[r], %[one]\n\t"
12055         "movlo  %[r], r3\n\t"
12056         "movne  r3, r7\n\t"
12057         "sub    r6, r6, #4\n\t"
12058         "bcc    1b\n\t"
12059         "eor    %[r], %[r], r3\n\t"
12060         : [r] "+r" (r)
12061         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
12062         : "r2", "r3", "r4", "r5", "r6", "r7"
12063     );
12064 #else
12065     __asm__ __volatile__ (
12066         "mov    r7, #0\n\t"
12067         "mov    r3, #-1\n\t"
12068         "ldr        r4, [%[a], #188]\n\t"
12069         "ldr        r5, [%[b], #188]\n\t"
12070         "and        r4, r4, r3\n\t"
12071         "and        r5, r5, r3\n\t"
12072         "subs   r4, r4, r5\n\t"
12073         "movhi  %[r], %[one]\n\t"
12074         "movlo  %[r], r3\n\t"
12075         "movne  r3, r7\n\t"
12076         "ldr        r4, [%[a], #184]\n\t"
12077         "ldr        r5, [%[b], #184]\n\t"
12078         "and        r4, r4, r3\n\t"
12079         "and        r5, r5, r3\n\t"
12080         "subs   r4, r4, r5\n\t"
12081         "movhi  %[r], %[one]\n\t"
12082         "movlo  %[r], r3\n\t"
12083         "movne  r3, r7\n\t"
12084         "ldr        r4, [%[a], #180]\n\t"
12085         "ldr        r5, [%[b], #180]\n\t"
12086         "and        r4, r4, r3\n\t"
12087         "and        r5, r5, r3\n\t"
12088         "subs   r4, r4, r5\n\t"
12089         "movhi  %[r], %[one]\n\t"
12090         "movlo  %[r], r3\n\t"
12091         "movne  r3, r7\n\t"
12092         "ldr        r4, [%[a], #176]\n\t"
12093         "ldr        r5, [%[b], #176]\n\t"
12094         "and        r4, r4, r3\n\t"
12095         "and        r5, r5, r3\n\t"
12096         "subs   r4, r4, r5\n\t"
12097         "movhi  %[r], %[one]\n\t"
12098         "movlo  %[r], r3\n\t"
12099         "movne  r3, r7\n\t"
12100         "ldr        r4, [%[a], #172]\n\t"
12101         "ldr        r5, [%[b], #172]\n\t"
12102         "and        r4, r4, r3\n\t"
12103         "and        r5, r5, r3\n\t"
12104         "subs   r4, r4, r5\n\t"
12105         "movhi  %[r], %[one]\n\t"
12106         "movlo  %[r], r3\n\t"
12107         "movne  r3, r7\n\t"
12108         "ldr        r4, [%[a], #168]\n\t"
12109         "ldr        r5, [%[b], #168]\n\t"
12110         "and        r4, r4, r3\n\t"
12111         "and        r5, r5, r3\n\t"
12112         "subs   r4, r4, r5\n\t"
12113         "movhi  %[r], %[one]\n\t"
12114         "movlo  %[r], r3\n\t"
12115         "movne  r3, r7\n\t"
12116         "ldr        r4, [%[a], #164]\n\t"
12117         "ldr        r5, [%[b], #164]\n\t"
12118         "and        r4, r4, r3\n\t"
12119         "and        r5, r5, r3\n\t"
12120         "subs   r4, r4, r5\n\t"
12121         "movhi  %[r], %[one]\n\t"
12122         "movlo  %[r], r3\n\t"
12123         "movne  r3, r7\n\t"
12124         "ldr        r4, [%[a], #160]\n\t"
12125         "ldr        r5, [%[b], #160]\n\t"
12126         "and        r4, r4, r3\n\t"
12127         "and        r5, r5, r3\n\t"
12128         "subs   r4, r4, r5\n\t"
12129         "movhi  %[r], %[one]\n\t"
12130         "movlo  %[r], r3\n\t"
12131         "movne  r3, r7\n\t"
12132         "ldr        r4, [%[a], #156]\n\t"
12133         "ldr        r5, [%[b], #156]\n\t"
12134         "and        r4, r4, r3\n\t"
12135         "and        r5, r5, r3\n\t"
12136         "subs   r4, r4, r5\n\t"
12137         "movhi  %[r], %[one]\n\t"
12138         "movlo  %[r], r3\n\t"
12139         "movne  r3, r7\n\t"
12140         "ldr        r4, [%[a], #152]\n\t"
12141         "ldr        r5, [%[b], #152]\n\t"
12142         "and        r4, r4, r3\n\t"
12143         "and        r5, r5, r3\n\t"
12144         "subs   r4, r4, r5\n\t"
12145         "movhi  %[r], %[one]\n\t"
12146         "movlo  %[r], r3\n\t"
12147         "movne  r3, r7\n\t"
12148         "ldr        r4, [%[a], #148]\n\t"
12149         "ldr        r5, [%[b], #148]\n\t"
12150         "and        r4, r4, r3\n\t"
12151         "and        r5, r5, r3\n\t"
12152         "subs   r4, r4, r5\n\t"
12153         "movhi  %[r], %[one]\n\t"
12154         "movlo  %[r], r3\n\t"
12155         "movne  r3, r7\n\t"
12156         "ldr        r4, [%[a], #144]\n\t"
12157         "ldr        r5, [%[b], #144]\n\t"
12158         "and        r4, r4, r3\n\t"
12159         "and        r5, r5, r3\n\t"
12160         "subs   r4, r4, r5\n\t"
12161         "movhi  %[r], %[one]\n\t"
12162         "movlo  %[r], r3\n\t"
12163         "movne  r3, r7\n\t"
12164         "ldr        r4, [%[a], #140]\n\t"
12165         "ldr        r5, [%[b], #140]\n\t"
12166         "and        r4, r4, r3\n\t"
12167         "and        r5, r5, r3\n\t"
12168         "subs   r4, r4, r5\n\t"
12169         "movhi  %[r], %[one]\n\t"
12170         "movlo  %[r], r3\n\t"
12171         "movne  r3, r7\n\t"
12172         "ldr        r4, [%[a], #136]\n\t"
12173         "ldr        r5, [%[b], #136]\n\t"
12174         "and        r4, r4, r3\n\t"
12175         "and        r5, r5, r3\n\t"
12176         "subs   r4, r4, r5\n\t"
12177         "movhi  %[r], %[one]\n\t"
12178         "movlo  %[r], r3\n\t"
12179         "movne  r3, r7\n\t"
12180         "ldr        r4, [%[a], #132]\n\t"
12181         "ldr        r5, [%[b], #132]\n\t"
12182         "and        r4, r4, r3\n\t"
12183         "and        r5, r5, r3\n\t"
12184         "subs   r4, r4, r5\n\t"
12185         "movhi  %[r], %[one]\n\t"
12186         "movlo  %[r], r3\n\t"
12187         "movne  r3, r7\n\t"
12188         "ldr        r4, [%[a], #128]\n\t"
12189         "ldr        r5, [%[b], #128]\n\t"
12190         "and        r4, r4, r3\n\t"
12191         "and        r5, r5, r3\n\t"
12192         "subs   r4, r4, r5\n\t"
12193         "movhi  %[r], %[one]\n\t"
12194         "movlo  %[r], r3\n\t"
12195         "movne  r3, r7\n\t"
12196         "ldr        r4, [%[a], #124]\n\t"
12197         "ldr        r5, [%[b], #124]\n\t"
12198         "and        r4, r4, r3\n\t"
12199         "and        r5, r5, r3\n\t"
12200         "subs   r4, r4, r5\n\t"
12201         "movhi  %[r], %[one]\n\t"
12202         "movlo  %[r], r3\n\t"
12203         "movne  r3, r7\n\t"
12204         "ldr        r4, [%[a], #120]\n\t"
12205         "ldr        r5, [%[b], #120]\n\t"
12206         "and        r4, r4, r3\n\t"
12207         "and        r5, r5, r3\n\t"
12208         "subs   r4, r4, r5\n\t"
12209         "movhi  %[r], %[one]\n\t"
12210         "movlo  %[r], r3\n\t"
12211         "movne  r3, r7\n\t"
12212         "ldr        r4, [%[a], #116]\n\t"
12213         "ldr        r5, [%[b], #116]\n\t"
12214         "and        r4, r4, r3\n\t"
12215         "and        r5, r5, r3\n\t"
12216         "subs   r4, r4, r5\n\t"
12217         "movhi  %[r], %[one]\n\t"
12218         "movlo  %[r], r3\n\t"
12219         "movne  r3, r7\n\t"
12220         "ldr        r4, [%[a], #112]\n\t"
12221         "ldr        r5, [%[b], #112]\n\t"
12222         "and        r4, r4, r3\n\t"
12223         "and        r5, r5, r3\n\t"
12224         "subs   r4, r4, r5\n\t"
12225         "movhi  %[r], %[one]\n\t"
12226         "movlo  %[r], r3\n\t"
12227         "movne  r3, r7\n\t"
12228         "ldr        r4, [%[a], #108]\n\t"
12229         "ldr        r5, [%[b], #108]\n\t"
12230         "and        r4, r4, r3\n\t"
12231         "and        r5, r5, r3\n\t"
12232         "subs   r4, r4, r5\n\t"
12233         "movhi  %[r], %[one]\n\t"
12234         "movlo  %[r], r3\n\t"
12235         "movne  r3, r7\n\t"
12236         "ldr        r4, [%[a], #104]\n\t"
12237         "ldr        r5, [%[b], #104]\n\t"
12238         "and        r4, r4, r3\n\t"
12239         "and        r5, r5, r3\n\t"
12240         "subs   r4, r4, r5\n\t"
12241         "movhi  %[r], %[one]\n\t"
12242         "movlo  %[r], r3\n\t"
12243         "movne  r3, r7\n\t"
12244         "ldr        r4, [%[a], #100]\n\t"
12245         "ldr        r5, [%[b], #100]\n\t"
12246         "and        r4, r4, r3\n\t"
12247         "and        r5, r5, r3\n\t"
12248         "subs   r4, r4, r5\n\t"
12249         "movhi  %[r], %[one]\n\t"
12250         "movlo  %[r], r3\n\t"
12251         "movne  r3, r7\n\t"
12252         "ldr        r4, [%[a], #96]\n\t"
12253         "ldr        r5, [%[b], #96]\n\t"
12254         "and        r4, r4, r3\n\t"
12255         "and        r5, r5, r3\n\t"
12256         "subs   r4, r4, r5\n\t"
12257         "movhi  %[r], %[one]\n\t"
12258         "movlo  %[r], r3\n\t"
12259         "movne  r3, r7\n\t"
12260         "ldr        r4, [%[a], #92]\n\t"
12261         "ldr        r5, [%[b], #92]\n\t"
12262         "and        r4, r4, r3\n\t"
12263         "and        r5, r5, r3\n\t"
12264         "subs   r4, r4, r5\n\t"
12265         "movhi  %[r], %[one]\n\t"
12266         "movlo  %[r], r3\n\t"
12267         "movne  r3, r7\n\t"
12268         "ldr        r4, [%[a], #88]\n\t"
12269         "ldr        r5, [%[b], #88]\n\t"
12270         "and        r4, r4, r3\n\t"
12271         "and        r5, r5, r3\n\t"
12272         "subs   r4, r4, r5\n\t"
12273         "movhi  %[r], %[one]\n\t"
12274         "movlo  %[r], r3\n\t"
12275         "movne  r3, r7\n\t"
12276         "ldr        r4, [%[a], #84]\n\t"
12277         "ldr        r5, [%[b], #84]\n\t"
12278         "and        r4, r4, r3\n\t"
12279         "and        r5, r5, r3\n\t"
12280         "subs   r4, r4, r5\n\t"
12281         "movhi  %[r], %[one]\n\t"
12282         "movlo  %[r], r3\n\t"
12283         "movne  r3, r7\n\t"
12284         "ldr        r4, [%[a], #80]\n\t"
12285         "ldr        r5, [%[b], #80]\n\t"
12286         "and        r4, r4, r3\n\t"
12287         "and        r5, r5, r3\n\t"
12288         "subs   r4, r4, r5\n\t"
12289         "movhi  %[r], %[one]\n\t"
12290         "movlo  %[r], r3\n\t"
12291         "movne  r3, r7\n\t"
12292         "ldr        r4, [%[a], #76]\n\t"
12293         "ldr        r5, [%[b], #76]\n\t"
12294         "and        r4, r4, r3\n\t"
12295         "and        r5, r5, r3\n\t"
12296         "subs   r4, r4, r5\n\t"
12297         "movhi  %[r], %[one]\n\t"
12298         "movlo  %[r], r3\n\t"
12299         "movne  r3, r7\n\t"
12300         "ldr        r4, [%[a], #72]\n\t"
12301         "ldr        r5, [%[b], #72]\n\t"
12302         "and        r4, r4, r3\n\t"
12303         "and        r5, r5, r3\n\t"
12304         "subs   r4, r4, r5\n\t"
12305         "movhi  %[r], %[one]\n\t"
12306         "movlo  %[r], r3\n\t"
12307         "movne  r3, r7\n\t"
12308         "ldr        r4, [%[a], #68]\n\t"
12309         "ldr        r5, [%[b], #68]\n\t"
12310         "and        r4, r4, r3\n\t"
12311         "and        r5, r5, r3\n\t"
12312         "subs   r4, r4, r5\n\t"
12313         "movhi  %[r], %[one]\n\t"
12314         "movlo  %[r], r3\n\t"
12315         "movne  r3, r7\n\t"
12316         "ldr        r4, [%[a], #64]\n\t"
12317         "ldr        r5, [%[b], #64]\n\t"
12318         "and        r4, r4, r3\n\t"
12319         "and        r5, r5, r3\n\t"
12320         "subs   r4, r4, r5\n\t"
12321         "movhi  %[r], %[one]\n\t"
12322         "movlo  %[r], r3\n\t"
12323         "movne  r3, r7\n\t"
12324         "ldr        r4, [%[a], #60]\n\t"
12325         "ldr        r5, [%[b], #60]\n\t"
12326         "and        r4, r4, r3\n\t"
12327         "and        r5, r5, r3\n\t"
12328         "subs   r4, r4, r5\n\t"
12329         "movhi  %[r], %[one]\n\t"
12330         "movlo  %[r], r3\n\t"
12331         "movne  r3, r7\n\t"
12332         "ldr        r4, [%[a], #56]\n\t"
12333         "ldr        r5, [%[b], #56]\n\t"
12334         "and        r4, r4, r3\n\t"
12335         "and        r5, r5, r3\n\t"
12336         "subs   r4, r4, r5\n\t"
12337         "movhi  %[r], %[one]\n\t"
12338         "movlo  %[r], r3\n\t"
12339         "movne  r3, r7\n\t"
12340         "ldr        r4, [%[a], #52]\n\t"
12341         "ldr        r5, [%[b], #52]\n\t"
12342         "and        r4, r4, r3\n\t"
12343         "and        r5, r5, r3\n\t"
12344         "subs   r4, r4, r5\n\t"
12345         "movhi  %[r], %[one]\n\t"
12346         "movlo  %[r], r3\n\t"
12347         "movne  r3, r7\n\t"
12348         "ldr        r4, [%[a], #48]\n\t"
12349         "ldr        r5, [%[b], #48]\n\t"
12350         "and        r4, r4, r3\n\t"
12351         "and        r5, r5, r3\n\t"
12352         "subs   r4, r4, r5\n\t"
12353         "movhi  %[r], %[one]\n\t"
12354         "movlo  %[r], r3\n\t"
12355         "movne  r3, r7\n\t"
12356         "ldr        r4, [%[a], #44]\n\t"
12357         "ldr        r5, [%[b], #44]\n\t"
12358         "and        r4, r4, r3\n\t"
12359         "and        r5, r5, r3\n\t"
12360         "subs   r4, r4, r5\n\t"
12361         "movhi  %[r], %[one]\n\t"
12362         "movlo  %[r], r3\n\t"
12363         "movne  r3, r7\n\t"
12364         "ldr        r4, [%[a], #40]\n\t"
12365         "ldr        r5, [%[b], #40]\n\t"
12366         "and        r4, r4, r3\n\t"
12367         "and        r5, r5, r3\n\t"
12368         "subs   r4, r4, r5\n\t"
12369         "movhi  %[r], %[one]\n\t"
12370         "movlo  %[r], r3\n\t"
12371         "movne  r3, r7\n\t"
12372         "ldr        r4, [%[a], #36]\n\t"
12373         "ldr        r5, [%[b], #36]\n\t"
12374         "and        r4, r4, r3\n\t"
12375         "and        r5, r5, r3\n\t"
12376         "subs   r4, r4, r5\n\t"
12377         "movhi  %[r], %[one]\n\t"
12378         "movlo  %[r], r3\n\t"
12379         "movne  r3, r7\n\t"
12380         "ldr        r4, [%[a], #32]\n\t"
12381         "ldr        r5, [%[b], #32]\n\t"
12382         "and        r4, r4, r3\n\t"
12383         "and        r5, r5, r3\n\t"
12384         "subs   r4, r4, r5\n\t"
12385         "movhi  %[r], %[one]\n\t"
12386         "movlo  %[r], r3\n\t"
12387         "movne  r3, r7\n\t"
12388         "ldr        r4, [%[a], #28]\n\t"
12389         "ldr        r5, [%[b], #28]\n\t"
12390         "and        r4, r4, r3\n\t"
12391         "and        r5, r5, r3\n\t"
12392         "subs   r4, r4, r5\n\t"
12393         "movhi  %[r], %[one]\n\t"
12394         "movlo  %[r], r3\n\t"
12395         "movne  r3, r7\n\t"
12396         "ldr        r4, [%[a], #24]\n\t"
12397         "ldr        r5, [%[b], #24]\n\t"
12398         "and        r4, r4, r3\n\t"
12399         "and        r5, r5, r3\n\t"
12400         "subs   r4, r4, r5\n\t"
12401         "movhi  %[r], %[one]\n\t"
12402         "movlo  %[r], r3\n\t"
12403         "movne  r3, r7\n\t"
12404         "ldr        r4, [%[a], #20]\n\t"
12405         "ldr        r5, [%[b], #20]\n\t"
12406         "and        r4, r4, r3\n\t"
12407         "and        r5, r5, r3\n\t"
12408         "subs   r4, r4, r5\n\t"
12409         "movhi  %[r], %[one]\n\t"
12410         "movlo  %[r], r3\n\t"
12411         "movne  r3, r7\n\t"
12412         "ldr        r4, [%[a], #16]\n\t"
12413         "ldr        r5, [%[b], #16]\n\t"
12414         "and        r4, r4, r3\n\t"
12415         "and        r5, r5, r3\n\t"
12416         "subs   r4, r4, r5\n\t"
12417         "movhi  %[r], %[one]\n\t"
12418         "movlo  %[r], r3\n\t"
12419         "movne  r3, r7\n\t"
12420         "ldr        r4, [%[a], #12]\n\t"
12421         "ldr        r5, [%[b], #12]\n\t"
12422         "and        r4, r4, r3\n\t"
12423         "and        r5, r5, r3\n\t"
12424         "subs   r4, r4, r5\n\t"
12425         "movhi  %[r], %[one]\n\t"
12426         "movlo  %[r], r3\n\t"
12427         "movne  r3, r7\n\t"
12428         "ldr        r4, [%[a], #8]\n\t"
12429         "ldr        r5, [%[b], #8]\n\t"
12430         "and        r4, r4, r3\n\t"
12431         "and        r5, r5, r3\n\t"
12432         "subs   r4, r4, r5\n\t"
12433         "movhi  %[r], %[one]\n\t"
12434         "movlo  %[r], r3\n\t"
12435         "movne  r3, r7\n\t"
12436         "ldr        r4, [%[a], #4]\n\t"
12437         "ldr        r5, [%[b], #4]\n\t"
12438         "and        r4, r4, r3\n\t"
12439         "and        r5, r5, r3\n\t"
12440         "subs   r4, r4, r5\n\t"
12441         "movhi  %[r], %[one]\n\t"
12442         "movlo  %[r], r3\n\t"
12443         "movne  r3, r7\n\t"
12444         "ldr        r4, [%[a], #0]\n\t"
12445         "ldr        r5, [%[b], #0]\n\t"
12446         "and        r4, r4, r3\n\t"
12447         "and        r5, r5, r3\n\t"
12448         "subs   r4, r4, r5\n\t"
12449         "movhi  %[r], %[one]\n\t"
12450         "movlo  %[r], r3\n\t"
12451         "movne  r3, r7\n\t"
12452         "eor    %[r], %[r], r3\n\t"
12453         : [r] "+r" (r)
12454         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
12455         : "r2", "r3", "r4", "r5", "r6", "r7"
12456     );
12457 #endif
12458 
12459     return r;
12460 }
12461 
12462 /* Divide d in a and put remainder into r (m*d + r = a)
12463  * m is not calculated as it is not needed at this time.
12464  *
12465  * a  Nmber to be divided.
12466  * d  Number to divide with.
12467  * m  Multiplier result.
12468  * r  Remainder from the division.
12469  * returns MP_OKAY indicating success.
12470  */
12471 static WC_INLINE int sp_3072_div_48(sp_digit* a, sp_digit* d, sp_digit* m,
12472         sp_digit* r)
12473 {
12474     sp_digit t1[96], t2[49];
12475     sp_digit div, r1;
12476     int i;
12477 
12478     (void)m;
12479 
12480     div = d[47];
12481     XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
12482     for (i=47; i>=0; i--) {
12483         r1 = div_3072_word_48(t1[48 + i], t1[48 + i - 1], div);
12484 
12485         sp_3072_mul_d_48(t2, d, r1);
12486         t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
12487         t1[48 + i] -= t2[48];
12488         sp_3072_mask_48(t2, d, t1[48 + i]);
12489         t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
12490         sp_3072_mask_48(t2, d, t1[48 + i]);
12491         t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
12492     }
12493 
12494     r1 = sp_3072_cmp_48(t1, d) >= 0;
12495     sp_3072_cond_sub_48(r, t1, t2, (sp_digit)0 - r1);
12496 
12497     return MP_OKAY;
12498 }
12499 
12500 /* Reduce a modulo m into r. (r = a mod m)
12501  *
12502  * r  A single precision number that is the reduced result.
12503  * a  A single precision number that is to be reduced.
12504  * m  A single precision number that is the modulus to reduce with.
12505  * returns MP_OKAY indicating success.
12506  */
12507 static WC_INLINE int sp_3072_mod_48(sp_digit* r, sp_digit* a, sp_digit* m)
12508 {
12509     return sp_3072_div_48(a, m, NULL, r);
12510 }
12511 
12512 #ifdef WOLFSSL_SP_SMALL
12513 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
12514  *
12515  * r     A single precision number that is the result of the operation.
12516  * a     A single precision number being exponentiated.
12517  * e     A single precision number that is the exponent.
12518  * bits  The number of bits in the exponent.
12519  * m     A single precision number that is the modulus.
12520  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
12521  */
12522 static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
12523         int bits, sp_digit* m, int reduceA)
12524 {
12525 #ifndef WOLFSSL_SMALL_STACK
12526     sp_digit t[16][96];
12527 #else
12528     sp_digit* t[16];
12529     sp_digit* td;
12530 #endif
12531     sp_digit* norm;
12532     sp_digit mp = 1;
12533     sp_digit n;
12534     sp_digit mask;
12535     int i;
12536     int c, y;
12537     int err = MP_OKAY;
12538 
12539 #ifdef WOLFSSL_SMALL_STACK
12540     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 96, NULL,
12541                             DYNAMIC_TYPE_TMP_BUFFER);
12542     if (td == NULL)
12543         err = MEMORY_E;
12544 
12545     if (err == MP_OKAY) {
12546         for (i=0; i<16; i++)
12547             t[i] = td + i * 96;
12548         norm = t[0];
12549     }
12550 #else
12551     norm = t[0];
12552 #endif
12553 
12554     if (err == MP_OKAY) {
12555         sp_3072_mont_setup(m, &mp);
12556         sp_3072_mont_norm_48(norm, m);
12557 
12558         XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
12559         if (reduceA) {
12560             err = sp_3072_mod_48(t[1] + 48, a, m);
12561             if (err == MP_OKAY)
12562                 err = sp_3072_mod_48(t[1], t[1], m);
12563         }
12564         else {
12565             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
12566             err = sp_3072_mod_48(t[1], t[1], m);
12567         }
12568     }
12569 
12570     if (err == MP_OKAY) {
12571         sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
12572         sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
12573         sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
12574         sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
12575         sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
12576         sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
12577         sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
12578         sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
12579         sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
12580         sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
12581         sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
12582         sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
12583         sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
12584         sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
12585 
12586         i = (bits - 1) / 32;
12587         n = e[i--];
12588         y = n >> 28;
12589         n <<= 4;
12590         c = 28;
12591         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
12592         for (; i>=0 || c>=4; ) {
12593             if (c == 0) {
12594                 n = e[i--];
12595                 y = n >> 28;
12596                 n <<= 4;
12597                 c = 28;
12598             }
12599             else if (c < 4) {
12600                 y = n >> 28;
12601                 n = e[i--];
12602                 c = 4 - c;
12603                 y |= n >> (32 - c);
12604                 n <<= c;
12605                 c = 32 - c;
12606             }
12607             else {
12608                 y = (n >> 28) & 0xf;
12609                 n <<= 4;
12610                 c -= 4;
12611             }
12612 
12613             sp_3072_mont_sqr_48(r, r, m, mp);
12614             sp_3072_mont_sqr_48(r, r, m, mp);
12615             sp_3072_mont_sqr_48(r, r, m, mp);
12616             sp_3072_mont_sqr_48(r, r, m, mp);
12617 
12618             sp_3072_mont_mul_48(r, r, t[y], m, mp);
12619         }
12620 
12621         XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
12622         sp_3072_mont_reduce_48(r, m, mp);
12623 
12624         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
12625         sp_3072_cond_sub_48(r, r, m, mask);
12626     }
12627 
12628 #ifdef WOLFSSL_SMALL_STACK
12629     if (td != NULL)
12630         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
12631 #endif
12632 
12633     return err;
12634 }
12635 #else
12636 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
12637  *
12638  * r     A single precision number that is the result of the operation.
12639  * a     A single precision number being exponentiated.
12640  * e     A single precision number that is the exponent.
12641  * bits  The number of bits in the exponent.
12642  * m     A single precision number that is the modulus.
12643  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
12644  */
12645 static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
12646         int bits, sp_digit* m, int reduceA)
12647 {
12648 #ifndef WOLFSSL_SMALL_STACK
12649     sp_digit t[32][96];
12650 #else
12651     sp_digit* t[32];
12652     sp_digit* td;
12653 #endif
12654     sp_digit* norm;
12655     sp_digit mp = 1;
12656     sp_digit n;
12657     sp_digit mask;
12658     int i;
12659     int c, y;
12660     int err = MP_OKAY;
12661 
12662 #ifdef WOLFSSL_SMALL_STACK
12663     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 96, NULL,
12664                             DYNAMIC_TYPE_TMP_BUFFER);
12665     if (td == NULL)
12666         err = MEMORY_E;
12667 
12668     if (err == MP_OKAY) {
12669         for (i=0; i<32; i++)
12670             t[i] = td + i * 96;
12671         norm = t[0];
12672     }
12673 #else
12674     norm = t[0];
12675 #endif
12676 
12677     if (err == MP_OKAY) {
12678         sp_3072_mont_setup(m, &mp);
12679         sp_3072_mont_norm_48(norm, m);
12680 
12681         XMEMSET(t[1], 0, sizeof(sp_digit) * 48);
12682         if (reduceA) {
12683             err = sp_3072_mod_48(t[1] + 48, a, m);
12684             if (err == MP_OKAY)
12685                 err = sp_3072_mod_48(t[1], t[1], m);
12686         }
12687         else {
12688             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
12689             err = sp_3072_mod_48(t[1], t[1], m);
12690         }
12691     }
12692 
12693     if (err == MP_OKAY) {
12694         sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
12695         sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
12696         sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
12697         sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
12698         sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
12699         sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
12700         sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
12701         sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
12702         sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
12703         sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
12704         sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
12705         sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
12706         sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
12707         sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
12708         sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
12709         sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
12710         sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
12711         sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
12712         sp_3072_mont_sqr_48(t[20], t[10], m, mp);
12713         sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
12714         sp_3072_mont_sqr_48(t[22], t[11], m, mp);
12715         sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
12716         sp_3072_mont_sqr_48(t[24], t[12], m, mp);
12717         sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
12718         sp_3072_mont_sqr_48(t[26], t[13], m, mp);
12719         sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
12720         sp_3072_mont_sqr_48(t[28], t[14], m, mp);
12721         sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
12722         sp_3072_mont_sqr_48(t[30], t[15], m, mp);
12723         sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
12724 
12725         i = (bits - 1) / 32;
12726         n = e[i--];
12727         y = n >> 27;
12728         n <<= 5;
12729         c = 27;
12730         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
12731         for (; i>=0 || c>=5; ) {
12732             if (c == 0) {
12733                 n = e[i--];
12734                 y = n >> 27;
12735                 n <<= 5;
12736                 c = 27;
12737             }
12738             else if (c < 5) {
12739                 y = n >> 27;
12740                 n = e[i--];
12741                 c = 5 - c;
12742                 y |= n >> (32 - c);
12743                 n <<= c;
12744                 c = 32 - c;
12745             }
12746             else {
12747                 y = (n >> 27) & 0x1f;
12748                 n <<= 5;
12749                 c -= 5;
12750             }
12751 
12752             sp_3072_mont_sqr_48(r, r, m, mp);
12753             sp_3072_mont_sqr_48(r, r, m, mp);
12754             sp_3072_mont_sqr_48(r, r, m, mp);
12755             sp_3072_mont_sqr_48(r, r, m, mp);
12756             sp_3072_mont_sqr_48(r, r, m, mp);
12757 
12758             sp_3072_mont_mul_48(r, r, t[y], m, mp);
12759         }
12760         y = e[0] & 0x1;
12761         sp_3072_mont_sqr_48(r, r, m, mp);
12762         sp_3072_mont_mul_48(r, r, t[y], m, mp);
12763 
12764         XMEMSET(&r[48], 0, sizeof(sp_digit) * 48);
12765         sp_3072_mont_reduce_48(r, m, mp);
12766 
12767         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
12768         sp_3072_cond_sub_48(r, r, m, mask);
12769     }
12770 
12771 #ifdef WOLFSSL_SMALL_STACK
12772     if (td != NULL)
12773         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
12774 #endif
12775 
12776     return err;
12777 }
12778 #endif /* WOLFSSL_SP_SMALL */
12779 
12780 #endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
12781 
12782 /* r = 2^n mod m where n is the number of bits to reduce by.
12783  * Given m must be 3072 bits, just need to subtract.
12784  *
12785  * r  A single precision number.
12786  * m  A signle precision number.
12787  */
12788 static void sp_3072_mont_norm_96(sp_digit* r, sp_digit* m)
12789 {
12790     XMEMSET(r, 0, sizeof(sp_digit) * 96);
12791 
12792     /* r = 2^n mod m */
12793     sp_3072_sub_in_place_96(r, m);
12794 }
12795 
12796 /* Conditionally subtract b from a using the mask m.
12797  * m is -1 to subtract and 0 when not copying.
12798  *
12799  * r  A single precision number representing condition subtract result.
12800  * a  A single precision number to subtract from.
12801  * b  A single precision number to subtract.
12802  * m  Mask value to apply.
12803  */
12804 static sp_digit sp_3072_cond_sub_96(sp_digit* r, sp_digit* a, sp_digit* b,
12805         sp_digit m)
12806 {
12807     sp_digit c = 0;
12808 
12809 #ifdef WOLFSSL_SP_SMALL
12810     __asm__ __volatile__ (
12811         "mov    r9, #0\n\t"
12812         "mov    r8, #0\n\t"
12813         "1:\n\t"
12814         "subs   %[c], r9, %[c]\n\t"
12815         "ldr    r4, [%[a], r8]\n\t"
12816         "ldr    r5, [%[b], r8]\n\t"
12817         "and    r5, r5, %[m]\n\t"
12818         "sbcs   r4, r4, r5\n\t"
12819         "sbc    %[c], r9, r9\n\t"
12820         "str    r4, [%[r], r8]\n\t"
12821         "add    r8, r8, #4\n\t"
12822         "cmp    r8, #384\n\t"
12823         "blt    1b\n\t"
12824         : [c] "+r" (c)
12825         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
12826         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
12827     );
12828 #else
12829     __asm__ __volatile__ (
12830 
12831         "mov    r9, #0\n\t"
12832         "ldr    r4, [%[a], #0]\n\t"
12833         "ldr    r6, [%[a], #4]\n\t"
12834         "ldr    r5, [%[b], #0]\n\t"
12835         "ldr    r7, [%[b], #4]\n\t"
12836         "and    r5, r5, %[m]\n\t"
12837         "and    r7, r7, %[m]\n\t"
12838         "subs   r4, r4, r5\n\t"
12839         "sbcs   r6, r6, r7\n\t"
12840         "str    r4, [%[r], #0]\n\t"
12841         "str    r6, [%[r], #4]\n\t"
12842         "ldr    r4, [%[a], #8]\n\t"
12843         "ldr    r6, [%[a], #12]\n\t"
12844         "ldr    r5, [%[b], #8]\n\t"
12845         "ldr    r7, [%[b], #12]\n\t"
12846         "and    r5, r5, %[m]\n\t"
12847         "and    r7, r7, %[m]\n\t"
12848         "sbcs   r4, r4, r5\n\t"
12849         "sbcs   r6, r6, r7\n\t"
12850         "str    r4, [%[r], #8]\n\t"
12851         "str    r6, [%[r], #12]\n\t"
12852         "ldr    r4, [%[a], #16]\n\t"
12853         "ldr    r6, [%[a], #20]\n\t"
12854         "ldr    r5, [%[b], #16]\n\t"
12855         "ldr    r7, [%[b], #20]\n\t"
12856         "and    r5, r5, %[m]\n\t"
12857         "and    r7, r7, %[m]\n\t"
12858         "sbcs   r4, r4, r5\n\t"
12859         "sbcs   r6, r6, r7\n\t"
12860         "str    r4, [%[r], #16]\n\t"
12861         "str    r6, [%[r], #20]\n\t"
12862         "ldr    r4, [%[a], #24]\n\t"
12863         "ldr    r6, [%[a], #28]\n\t"
12864         "ldr    r5, [%[b], #24]\n\t"
12865         "ldr    r7, [%[b], #28]\n\t"
12866         "and    r5, r5, %[m]\n\t"
12867         "and    r7, r7, %[m]\n\t"
12868         "sbcs   r4, r4, r5\n\t"
12869         "sbcs   r6, r6, r7\n\t"
12870         "str    r4, [%[r], #24]\n\t"
12871         "str    r6, [%[r], #28]\n\t"
12872         "ldr    r4, [%[a], #32]\n\t"
12873         "ldr    r6, [%[a], #36]\n\t"
12874         "ldr    r5, [%[b], #32]\n\t"
12875         "ldr    r7, [%[b], #36]\n\t"
12876         "and    r5, r5, %[m]\n\t"
12877         "and    r7, r7, %[m]\n\t"
12878         "sbcs   r4, r4, r5\n\t"
12879         "sbcs   r6, r6, r7\n\t"
12880         "str    r4, [%[r], #32]\n\t"
12881         "str    r6, [%[r], #36]\n\t"
12882         "ldr    r4, [%[a], #40]\n\t"
12883         "ldr    r6, [%[a], #44]\n\t"
12884         "ldr    r5, [%[b], #40]\n\t"
12885         "ldr    r7, [%[b], #44]\n\t"
12886         "and    r5, r5, %[m]\n\t"
12887         "and    r7, r7, %[m]\n\t"
12888         "sbcs   r4, r4, r5\n\t"
12889         "sbcs   r6, r6, r7\n\t"
12890         "str    r4, [%[r], #40]\n\t"
12891         "str    r6, [%[r], #44]\n\t"
12892         "ldr    r4, [%[a], #48]\n\t"
12893         "ldr    r6, [%[a], #52]\n\t"
12894         "ldr    r5, [%[b], #48]\n\t"
12895         "ldr    r7, [%[b], #52]\n\t"
12896         "and    r5, r5, %[m]\n\t"
12897         "and    r7, r7, %[m]\n\t"
12898         "sbcs   r4, r4, r5\n\t"
12899         "sbcs   r6, r6, r7\n\t"
12900         "str    r4, [%[r], #48]\n\t"
12901         "str    r6, [%[r], #52]\n\t"
12902         "ldr    r4, [%[a], #56]\n\t"
12903         "ldr    r6, [%[a], #60]\n\t"
12904         "ldr    r5, [%[b], #56]\n\t"
12905         "ldr    r7, [%[b], #60]\n\t"
12906         "and    r5, r5, %[m]\n\t"
12907         "and    r7, r7, %[m]\n\t"
12908         "sbcs   r4, r4, r5\n\t"
12909         "sbcs   r6, r6, r7\n\t"
12910         "str    r4, [%[r], #56]\n\t"
12911         "str    r6, [%[r], #60]\n\t"
12912         "ldr    r4, [%[a], #64]\n\t"
12913         "ldr    r6, [%[a], #68]\n\t"
12914         "ldr    r5, [%[b], #64]\n\t"
12915         "ldr    r7, [%[b], #68]\n\t"
12916         "and    r5, r5, %[m]\n\t"
12917         "and    r7, r7, %[m]\n\t"
12918         "sbcs   r4, r4, r5\n\t"
12919         "sbcs   r6, r6, r7\n\t"
12920         "str    r4, [%[r], #64]\n\t"
12921         "str    r6, [%[r], #68]\n\t"
12922         "ldr    r4, [%[a], #72]\n\t"
12923         "ldr    r6, [%[a], #76]\n\t"
12924         "ldr    r5, [%[b], #72]\n\t"
12925         "ldr    r7, [%[b], #76]\n\t"
12926         "and    r5, r5, %[m]\n\t"
12927         "and    r7, r7, %[m]\n\t"
12928         "sbcs   r4, r4, r5\n\t"
12929         "sbcs   r6, r6, r7\n\t"
12930         "str    r4, [%[r], #72]\n\t"
12931         "str    r6, [%[r], #76]\n\t"
12932         "ldr    r4, [%[a], #80]\n\t"
12933         "ldr    r6, [%[a], #84]\n\t"
12934         "ldr    r5, [%[b], #80]\n\t"
12935         "ldr    r7, [%[b], #84]\n\t"
12936         "and    r5, r5, %[m]\n\t"
12937         "and    r7, r7, %[m]\n\t"
12938         "sbcs   r4, r4, r5\n\t"
12939         "sbcs   r6, r6, r7\n\t"
12940         "str    r4, [%[r], #80]\n\t"
12941         "str    r6, [%[r], #84]\n\t"
12942         "ldr    r4, [%[a], #88]\n\t"
12943         "ldr    r6, [%[a], #92]\n\t"
12944         "ldr    r5, [%[b], #88]\n\t"
12945         "ldr    r7, [%[b], #92]\n\t"
12946         "and    r5, r5, %[m]\n\t"
12947         "and    r7, r7, %[m]\n\t"
12948         "sbcs   r4, r4, r5\n\t"
12949         "sbcs   r6, r6, r7\n\t"
12950         "str    r4, [%[r], #88]\n\t"
12951         "str    r6, [%[r], #92]\n\t"
12952         "ldr    r4, [%[a], #96]\n\t"
12953         "ldr    r6, [%[a], #100]\n\t"
12954         "ldr    r5, [%[b], #96]\n\t"
12955         "ldr    r7, [%[b], #100]\n\t"
12956         "and    r5, r5, %[m]\n\t"
12957         "and    r7, r7, %[m]\n\t"
12958         "sbcs   r4, r4, r5\n\t"
12959         "sbcs   r6, r6, r7\n\t"
12960         "str    r4, [%[r], #96]\n\t"
12961         "str    r6, [%[r], #100]\n\t"
12962         "ldr    r4, [%[a], #104]\n\t"
12963         "ldr    r6, [%[a], #108]\n\t"
12964         "ldr    r5, [%[b], #104]\n\t"
12965         "ldr    r7, [%[b], #108]\n\t"
12966         "and    r5, r5, %[m]\n\t"
12967         "and    r7, r7, %[m]\n\t"
12968         "sbcs   r4, r4, r5\n\t"
12969         "sbcs   r6, r6, r7\n\t"
12970         "str    r4, [%[r], #104]\n\t"
12971         "str    r6, [%[r], #108]\n\t"
12972         "ldr    r4, [%[a], #112]\n\t"
12973         "ldr    r6, [%[a], #116]\n\t"
12974         "ldr    r5, [%[b], #112]\n\t"
12975         "ldr    r7, [%[b], #116]\n\t"
12976         "and    r5, r5, %[m]\n\t"
12977         "and    r7, r7, %[m]\n\t"
12978         "sbcs   r4, r4, r5\n\t"
12979         "sbcs   r6, r6, r7\n\t"
12980         "str    r4, [%[r], #112]\n\t"
12981         "str    r6, [%[r], #116]\n\t"
12982         "ldr    r4, [%[a], #120]\n\t"
12983         "ldr    r6, [%[a], #124]\n\t"
12984         "ldr    r5, [%[b], #120]\n\t"
12985         "ldr    r7, [%[b], #124]\n\t"
12986         "and    r5, r5, %[m]\n\t"
12987         "and    r7, r7, %[m]\n\t"
12988         "sbcs   r4, r4, r5\n\t"
12989         "sbcs   r6, r6, r7\n\t"
12990         "str    r4, [%[r], #120]\n\t"
12991         "str    r6, [%[r], #124]\n\t"
12992         "ldr    r4, [%[a], #128]\n\t"
12993         "ldr    r6, [%[a], #132]\n\t"
12994         "ldr    r5, [%[b], #128]\n\t"
12995         "ldr    r7, [%[b], #132]\n\t"
12996         "and    r5, r5, %[m]\n\t"
12997         "and    r7, r7, %[m]\n\t"
12998         "sbcs   r4, r4, r5\n\t"
12999         "sbcs   r6, r6, r7\n\t"
13000         "str    r4, [%[r], #128]\n\t"
13001         "str    r6, [%[r], #132]\n\t"
13002         "ldr    r4, [%[a], #136]\n\t"
13003         "ldr    r6, [%[a], #140]\n\t"
13004         "ldr    r5, [%[b], #136]\n\t"
13005         "ldr    r7, [%[b], #140]\n\t"
13006         "and    r5, r5, %[m]\n\t"
13007         "and    r7, r7, %[m]\n\t"
13008         "sbcs   r4, r4, r5\n\t"
13009         "sbcs   r6, r6, r7\n\t"
13010         "str    r4, [%[r], #136]\n\t"
13011         "str    r6, [%[r], #140]\n\t"
13012         "ldr    r4, [%[a], #144]\n\t"
13013         "ldr    r6, [%[a], #148]\n\t"
13014         "ldr    r5, [%[b], #144]\n\t"
13015         "ldr    r7, [%[b], #148]\n\t"
13016         "and    r5, r5, %[m]\n\t"
13017         "and    r7, r7, %[m]\n\t"
13018         "sbcs   r4, r4, r5\n\t"
13019         "sbcs   r6, r6, r7\n\t"
13020         "str    r4, [%[r], #144]\n\t"
13021         "str    r6, [%[r], #148]\n\t"
13022         "ldr    r4, [%[a], #152]\n\t"
13023         "ldr    r6, [%[a], #156]\n\t"
13024         "ldr    r5, [%[b], #152]\n\t"
13025         "ldr    r7, [%[b], #156]\n\t"
13026         "and    r5, r5, %[m]\n\t"
13027         "and    r7, r7, %[m]\n\t"
13028         "sbcs   r4, r4, r5\n\t"
13029         "sbcs   r6, r6, r7\n\t"
13030         "str    r4, [%[r], #152]\n\t"
13031         "str    r6, [%[r], #156]\n\t"
13032         "ldr    r4, [%[a], #160]\n\t"
13033         "ldr    r6, [%[a], #164]\n\t"
13034         "ldr    r5, [%[b], #160]\n\t"
13035         "ldr    r7, [%[b], #164]\n\t"
13036         "and    r5, r5, %[m]\n\t"
13037         "and    r7, r7, %[m]\n\t"
13038         "sbcs   r4, r4, r5\n\t"
13039         "sbcs   r6, r6, r7\n\t"
13040         "str    r4, [%[r], #160]\n\t"
13041         "str    r6, [%[r], #164]\n\t"
13042         "ldr    r4, [%[a], #168]\n\t"
13043         "ldr    r6, [%[a], #172]\n\t"
13044         "ldr    r5, [%[b], #168]\n\t"
13045         "ldr    r7, [%[b], #172]\n\t"
13046         "and    r5, r5, %[m]\n\t"
13047         "and    r7, r7, %[m]\n\t"
13048         "sbcs   r4, r4, r5\n\t"
13049         "sbcs   r6, r6, r7\n\t"
13050         "str    r4, [%[r], #168]\n\t"
13051         "str    r6, [%[r], #172]\n\t"
13052         "ldr    r4, [%[a], #176]\n\t"
13053         "ldr    r6, [%[a], #180]\n\t"
13054         "ldr    r5, [%[b], #176]\n\t"
13055         "ldr    r7, [%[b], #180]\n\t"
13056         "and    r5, r5, %[m]\n\t"
13057         "and    r7, r7, %[m]\n\t"
13058         "sbcs   r4, r4, r5\n\t"
13059         "sbcs   r6, r6, r7\n\t"
13060         "str    r4, [%[r], #176]\n\t"
13061         "str    r6, [%[r], #180]\n\t"
13062         "ldr    r4, [%[a], #184]\n\t"
13063         "ldr    r6, [%[a], #188]\n\t"
13064         "ldr    r5, [%[b], #184]\n\t"
13065         "ldr    r7, [%[b], #188]\n\t"
13066         "and    r5, r5, %[m]\n\t"
13067         "and    r7, r7, %[m]\n\t"
13068         "sbcs   r4, r4, r5\n\t"
13069         "sbcs   r6, r6, r7\n\t"
13070         "str    r4, [%[r], #184]\n\t"
13071         "str    r6, [%[r], #188]\n\t"
13072         "ldr    r4, [%[a], #192]\n\t"
13073         "ldr    r6, [%[a], #196]\n\t"
13074         "ldr    r5, [%[b], #192]\n\t"
13075         "ldr    r7, [%[b], #196]\n\t"
13076         "and    r5, r5, %[m]\n\t"
13077         "and    r7, r7, %[m]\n\t"
13078         "sbcs   r4, r4, r5\n\t"
13079         "sbcs   r6, r6, r7\n\t"
13080         "str    r4, [%[r], #192]\n\t"
13081         "str    r6, [%[r], #196]\n\t"
13082         "ldr    r4, [%[a], #200]\n\t"
13083         "ldr    r6, [%[a], #204]\n\t"
13084         "ldr    r5, [%[b], #200]\n\t"
13085         "ldr    r7, [%[b], #204]\n\t"
13086         "and    r5, r5, %[m]\n\t"
13087         "and    r7, r7, %[m]\n\t"
13088         "sbcs   r4, r4, r5\n\t"
13089         "sbcs   r6, r6, r7\n\t"
13090         "str    r4, [%[r], #200]\n\t"
13091         "str    r6, [%[r], #204]\n\t"
13092         "ldr    r4, [%[a], #208]\n\t"
13093         "ldr    r6, [%[a], #212]\n\t"
13094         "ldr    r5, [%[b], #208]\n\t"
13095         "ldr    r7, [%[b], #212]\n\t"
13096         "and    r5, r5, %[m]\n\t"
13097         "and    r7, r7, %[m]\n\t"
13098         "sbcs   r4, r4, r5\n\t"
13099         "sbcs   r6, r6, r7\n\t"
13100         "str    r4, [%[r], #208]\n\t"
13101         "str    r6, [%[r], #212]\n\t"
13102         "ldr    r4, [%[a], #216]\n\t"
13103         "ldr    r6, [%[a], #220]\n\t"
13104         "ldr    r5, [%[b], #216]\n\t"
13105         "ldr    r7, [%[b], #220]\n\t"
13106         "and    r5, r5, %[m]\n\t"
13107         "and    r7, r7, %[m]\n\t"
13108         "sbcs   r4, r4, r5\n\t"
13109         "sbcs   r6, r6, r7\n\t"
13110         "str    r4, [%[r], #216]\n\t"
13111         "str    r6, [%[r], #220]\n\t"
13112         "ldr    r4, [%[a], #224]\n\t"
13113         "ldr    r6, [%[a], #228]\n\t"
13114         "ldr    r5, [%[b], #224]\n\t"
13115         "ldr    r7, [%[b], #228]\n\t"
13116         "and    r5, r5, %[m]\n\t"
13117         "and    r7, r7, %[m]\n\t"
13118         "sbcs   r4, r4, r5\n\t"
13119         "sbcs   r6, r6, r7\n\t"
13120         "str    r4, [%[r], #224]\n\t"
13121         "str    r6, [%[r], #228]\n\t"
13122         "ldr    r4, [%[a], #232]\n\t"
13123         "ldr    r6, [%[a], #236]\n\t"
13124         "ldr    r5, [%[b], #232]\n\t"
13125         "ldr    r7, [%[b], #236]\n\t"
13126         "and    r5, r5, %[m]\n\t"
13127         "and    r7, r7, %[m]\n\t"
13128         "sbcs   r4, r4, r5\n\t"
13129         "sbcs   r6, r6, r7\n\t"
13130         "str    r4, [%[r], #232]\n\t"
13131         "str    r6, [%[r], #236]\n\t"
13132         "ldr    r4, [%[a], #240]\n\t"
13133         "ldr    r6, [%[a], #244]\n\t"
13134         "ldr    r5, [%[b], #240]\n\t"
13135         "ldr    r7, [%[b], #244]\n\t"
13136         "and    r5, r5, %[m]\n\t"
13137         "and    r7, r7, %[m]\n\t"
13138         "sbcs   r4, r4, r5\n\t"
13139         "sbcs   r6, r6, r7\n\t"
13140         "str    r4, [%[r], #240]\n\t"
13141         "str    r6, [%[r], #244]\n\t"
13142         "ldr    r4, [%[a], #248]\n\t"
13143         "ldr    r6, [%[a], #252]\n\t"
13144         "ldr    r5, [%[b], #248]\n\t"
13145         "ldr    r7, [%[b], #252]\n\t"
13146         "and    r5, r5, %[m]\n\t"
13147         "and    r7, r7, %[m]\n\t"
13148         "sbcs   r4, r4, r5\n\t"
13149         "sbcs   r6, r6, r7\n\t"
13150         "str    r4, [%[r], #248]\n\t"
13151         "str    r6, [%[r], #252]\n\t"
13152         "ldr    r4, [%[a], #256]\n\t"
13153         "ldr    r6, [%[a], #260]\n\t"
13154         "ldr    r5, [%[b], #256]\n\t"
13155         "ldr    r7, [%[b], #260]\n\t"
13156         "and    r5, r5, %[m]\n\t"
13157         "and    r7, r7, %[m]\n\t"
13158         "sbcs   r4, r4, r5\n\t"
13159         "sbcs   r6, r6, r7\n\t"
13160         "str    r4, [%[r], #256]\n\t"
13161         "str    r6, [%[r], #260]\n\t"
13162         "ldr    r4, [%[a], #264]\n\t"
13163         "ldr    r6, [%[a], #268]\n\t"
13164         "ldr    r5, [%[b], #264]\n\t"
13165         "ldr    r7, [%[b], #268]\n\t"
13166         "and    r5, r5, %[m]\n\t"
13167         "and    r7, r7, %[m]\n\t"
13168         "sbcs   r4, r4, r5\n\t"
13169         "sbcs   r6, r6, r7\n\t"
13170         "str    r4, [%[r], #264]\n\t"
13171         "str    r6, [%[r], #268]\n\t"
13172         "ldr    r4, [%[a], #272]\n\t"
13173         "ldr    r6, [%[a], #276]\n\t"
13174         "ldr    r5, [%[b], #272]\n\t"
13175         "ldr    r7, [%[b], #276]\n\t"
13176         "and    r5, r5, %[m]\n\t"
13177         "and    r7, r7, %[m]\n\t"
13178         "sbcs   r4, r4, r5\n\t"
13179         "sbcs   r6, r6, r7\n\t"
13180         "str    r4, [%[r], #272]\n\t"
13181         "str    r6, [%[r], #276]\n\t"
13182         "ldr    r4, [%[a], #280]\n\t"
13183         "ldr    r6, [%[a], #284]\n\t"
13184         "ldr    r5, [%[b], #280]\n\t"
13185         "ldr    r7, [%[b], #284]\n\t"
13186         "and    r5, r5, %[m]\n\t"
13187         "and    r7, r7, %[m]\n\t"
13188         "sbcs   r4, r4, r5\n\t"
13189         "sbcs   r6, r6, r7\n\t"
13190         "str    r4, [%[r], #280]\n\t"
13191         "str    r6, [%[r], #284]\n\t"
13192         "ldr    r4, [%[a], #288]\n\t"
13193         "ldr    r6, [%[a], #292]\n\t"
13194         "ldr    r5, [%[b], #288]\n\t"
13195         "ldr    r7, [%[b], #292]\n\t"
13196         "and    r5, r5, %[m]\n\t"
13197         "and    r7, r7, %[m]\n\t"
13198         "sbcs   r4, r4, r5\n\t"
13199         "sbcs   r6, r6, r7\n\t"
13200         "str    r4, [%[r], #288]\n\t"
13201         "str    r6, [%[r], #292]\n\t"
13202         "ldr    r4, [%[a], #296]\n\t"
13203         "ldr    r6, [%[a], #300]\n\t"
13204         "ldr    r5, [%[b], #296]\n\t"
13205         "ldr    r7, [%[b], #300]\n\t"
13206         "and    r5, r5, %[m]\n\t"
13207         "and    r7, r7, %[m]\n\t"
13208         "sbcs   r4, r4, r5\n\t"
13209         "sbcs   r6, r6, r7\n\t"
13210         "str    r4, [%[r], #296]\n\t"
13211         "str    r6, [%[r], #300]\n\t"
13212         "ldr    r4, [%[a], #304]\n\t"
13213         "ldr    r6, [%[a], #308]\n\t"
13214         "ldr    r5, [%[b], #304]\n\t"
13215         "ldr    r7, [%[b], #308]\n\t"
13216         "and    r5, r5, %[m]\n\t"
13217         "and    r7, r7, %[m]\n\t"
13218         "sbcs   r4, r4, r5\n\t"
13219         "sbcs   r6, r6, r7\n\t"
13220         "str    r4, [%[r], #304]\n\t"
13221         "str    r6, [%[r], #308]\n\t"
13222         "ldr    r4, [%[a], #312]\n\t"
13223         "ldr    r6, [%[a], #316]\n\t"
13224         "ldr    r5, [%[b], #312]\n\t"
13225         "ldr    r7, [%[b], #316]\n\t"
13226         "and    r5, r5, %[m]\n\t"
13227         "and    r7, r7, %[m]\n\t"
13228         "sbcs   r4, r4, r5\n\t"
13229         "sbcs   r6, r6, r7\n\t"
13230         "str    r4, [%[r], #312]\n\t"
13231         "str    r6, [%[r], #316]\n\t"
13232         "ldr    r4, [%[a], #320]\n\t"
13233         "ldr    r6, [%[a], #324]\n\t"
13234         "ldr    r5, [%[b], #320]\n\t"
13235         "ldr    r7, [%[b], #324]\n\t"
13236         "and    r5, r5, %[m]\n\t"
13237         "and    r7, r7, %[m]\n\t"
13238         "sbcs   r4, r4, r5\n\t"
13239         "sbcs   r6, r6, r7\n\t"
13240         "str    r4, [%[r], #320]\n\t"
13241         "str    r6, [%[r], #324]\n\t"
13242         "ldr    r4, [%[a], #328]\n\t"
13243         "ldr    r6, [%[a], #332]\n\t"
13244         "ldr    r5, [%[b], #328]\n\t"
13245         "ldr    r7, [%[b], #332]\n\t"
13246         "and    r5, r5, %[m]\n\t"
13247         "and    r7, r7, %[m]\n\t"
13248         "sbcs   r4, r4, r5\n\t"
13249         "sbcs   r6, r6, r7\n\t"
13250         "str    r4, [%[r], #328]\n\t"
13251         "str    r6, [%[r], #332]\n\t"
13252         "ldr    r4, [%[a], #336]\n\t"
13253         "ldr    r6, [%[a], #340]\n\t"
13254         "ldr    r5, [%[b], #336]\n\t"
13255         "ldr    r7, [%[b], #340]\n\t"
13256         "and    r5, r5, %[m]\n\t"
13257         "and    r7, r7, %[m]\n\t"
13258         "sbcs   r4, r4, r5\n\t"
13259         "sbcs   r6, r6, r7\n\t"
13260         "str    r4, [%[r], #336]\n\t"
13261         "str    r6, [%[r], #340]\n\t"
13262         "ldr    r4, [%[a], #344]\n\t"
13263         "ldr    r6, [%[a], #348]\n\t"
13264         "ldr    r5, [%[b], #344]\n\t"
13265         "ldr    r7, [%[b], #348]\n\t"
13266         "and    r5, r5, %[m]\n\t"
13267         "and    r7, r7, %[m]\n\t"
13268         "sbcs   r4, r4, r5\n\t"
13269         "sbcs   r6, r6, r7\n\t"
13270         "str    r4, [%[r], #344]\n\t"
13271         "str    r6, [%[r], #348]\n\t"
13272         "ldr    r4, [%[a], #352]\n\t"
13273         "ldr    r6, [%[a], #356]\n\t"
13274         "ldr    r5, [%[b], #352]\n\t"
13275         "ldr    r7, [%[b], #356]\n\t"
13276         "and    r5, r5, %[m]\n\t"
13277         "and    r7, r7, %[m]\n\t"
13278         "sbcs   r4, r4, r5\n\t"
13279         "sbcs   r6, r6, r7\n\t"
13280         "str    r4, [%[r], #352]\n\t"
13281         "str    r6, [%[r], #356]\n\t"
13282         "ldr    r4, [%[a], #360]\n\t"
13283         "ldr    r6, [%[a], #364]\n\t"
13284         "ldr    r5, [%[b], #360]\n\t"
13285         "ldr    r7, [%[b], #364]\n\t"
13286         "and    r5, r5, %[m]\n\t"
13287         "and    r7, r7, %[m]\n\t"
13288         "sbcs   r4, r4, r5\n\t"
13289         "sbcs   r6, r6, r7\n\t"
13290         "str    r4, [%[r], #360]\n\t"
13291         "str    r6, [%[r], #364]\n\t"
13292         "ldr    r4, [%[a], #368]\n\t"
13293         "ldr    r6, [%[a], #372]\n\t"
13294         "ldr    r5, [%[b], #368]\n\t"
13295         "ldr    r7, [%[b], #372]\n\t"
13296         "and    r5, r5, %[m]\n\t"
13297         "and    r7, r7, %[m]\n\t"
13298         "sbcs   r4, r4, r5\n\t"
13299         "sbcs   r6, r6, r7\n\t"
13300         "str    r4, [%[r], #368]\n\t"
13301         "str    r6, [%[r], #372]\n\t"
13302         "ldr    r4, [%[a], #376]\n\t"
13303         "ldr    r6, [%[a], #380]\n\t"
13304         "ldr    r5, [%[b], #376]\n\t"
13305         "ldr    r7, [%[b], #380]\n\t"
13306         "and    r5, r5, %[m]\n\t"
13307         "and    r7, r7, %[m]\n\t"
13308         "sbcs   r4, r4, r5\n\t"
13309         "sbcs   r6, r6, r7\n\t"
13310         "str    r4, [%[r], #376]\n\t"
13311         "str    r6, [%[r], #380]\n\t"
13312         "sbc    %[c], r9, r9\n\t"
13313         : [c] "+r" (c)
13314         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
13315         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
13316     );
13317 #endif /* WOLFSSL_SP_SMALL */
13318 
13319     return c;
13320 }
13321 
13322 /* Reduce the number back to 3072 bits using Montgomery reduction.
13323  *
13324  * a   A single precision number to reduce in place.
13325  * m   The single precision number representing the modulus.
13326  * mp  The digit representing the negative inverse of m mod 2^n.
13327  */
13328 SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, sp_digit* m,
13329         sp_digit mp)
13330 {
13331     sp_digit ca = 0;
13332 
13333     __asm__ __volatile__ (
13334         "# i = 0\n\t"
13335         "mov    r12, #0\n\t"
13336         "ldr    r10, [%[a], #0]\n\t"
13337         "ldr    r14, [%[a], #4]\n\t"
13338         "\n1:\n\t"
13339         "# mu = a[i] * mp\n\t"
13340         "mul    r8, %[mp], r10\n\t"
13341         "# a[i+0] += m[0] * mu\n\t"
13342         "ldr    r7, [%[m], #0]\n\t"
13343         "ldr    r9, [%[a], #0]\n\t"
13344         "umull  r6, r7, r8, r7\n\t"
13345         "adds   r10, r10, r6\n\t"
13346         "adc    r5, r7, #0\n\t"
13347         "# a[i+1] += m[1] * mu\n\t"
13348         "ldr    r7, [%[m], #4]\n\t"
13349         "ldr    r9, [%[a], #4]\n\t"
13350         "umull  r6, r7, r8, r7\n\t"
13351         "adds   r10, r14, r6\n\t"
13352         "adc    r4, r7, #0\n\t"
13353         "adds   r10, r10, r5\n\t"
13354         "adc    r4, r4, #0\n\t"
13355         "# a[i+2] += m[2] * mu\n\t"
13356         "ldr    r7, [%[m], #8]\n\t"
13357         "ldr    r14, [%[a], #8]\n\t"
13358         "umull  r6, r7, r8, r7\n\t"
13359         "adds   r14, r14, r6\n\t"
13360         "adc    r5, r7, #0\n\t"
13361         "adds   r14, r14, r4\n\t"
13362         "adc    r5, r5, #0\n\t"
13363         "# a[i+3] += m[3] * mu\n\t"
13364         "ldr    r7, [%[m], #12]\n\t"
13365         "ldr    r9, [%[a], #12]\n\t"
13366         "umull  r6, r7, r8, r7\n\t"
13367         "adds   r9, r9, r6\n\t"
13368         "adc    r4, r7, #0\n\t"
13369         "adds   r9, r9, r5\n\t"
13370         "str    r9, [%[a], #12]\n\t"
13371         "adc    r4, r4, #0\n\t"
13372         "# a[i+4] += m[4] * mu\n\t"
13373         "ldr    r7, [%[m], #16]\n\t"
13374         "ldr    r9, [%[a], #16]\n\t"
13375         "umull  r6, r7, r8, r7\n\t"
13376         "adds   r9, r9, r6\n\t"
13377         "adc    r5, r7, #0\n\t"
13378         "adds   r9, r9, r4\n\t"
13379         "str    r9, [%[a], #16]\n\t"
13380         "adc    r5, r5, #0\n\t"
13381         "# a[i+5] += m[5] * mu\n\t"
13382         "ldr    r7, [%[m], #20]\n\t"
13383         "ldr    r9, [%[a], #20]\n\t"
13384         "umull  r6, r7, r8, r7\n\t"
13385         "adds   r9, r9, r6\n\t"
13386         "adc    r4, r7, #0\n\t"
13387         "adds   r9, r9, r5\n\t"
13388         "str    r9, [%[a], #20]\n\t"
13389         "adc    r4, r4, #0\n\t"
13390         "# a[i+6] += m[6] * mu\n\t"
13391         "ldr    r7, [%[m], #24]\n\t"
13392         "ldr    r9, [%[a], #24]\n\t"
13393         "umull  r6, r7, r8, r7\n\t"
13394         "adds   r9, r9, r6\n\t"
13395         "adc    r5, r7, #0\n\t"
13396         "adds   r9, r9, r4\n\t"
13397         "str    r9, [%[a], #24]\n\t"
13398         "adc    r5, r5, #0\n\t"
13399         "# a[i+7] += m[7] * mu\n\t"
13400         "ldr    r7, [%[m], #28]\n\t"
13401         "ldr    r9, [%[a], #28]\n\t"
13402         "umull  r6, r7, r8, r7\n\t"
13403         "adds   r9, r9, r6\n\t"
13404         "adc    r4, r7, #0\n\t"
13405         "adds   r9, r9, r5\n\t"
13406         "str    r9, [%[a], #28]\n\t"
13407         "adc    r4, r4, #0\n\t"
13408         "# a[i+8] += m[8] * mu\n\t"
13409         "ldr    r7, [%[m], #32]\n\t"
13410         "ldr    r9, [%[a], #32]\n\t"
13411         "umull  r6, r7, r8, r7\n\t"
13412         "adds   r9, r9, r6\n\t"
13413         "adc    r5, r7, #0\n\t"
13414         "adds   r9, r9, r4\n\t"
13415         "str    r9, [%[a], #32]\n\t"
13416         "adc    r5, r5, #0\n\t"
13417         "# a[i+9] += m[9] * mu\n\t"
13418         "ldr    r7, [%[m], #36]\n\t"
13419         "ldr    r9, [%[a], #36]\n\t"
13420         "umull  r6, r7, r8, r7\n\t"
13421         "adds   r9, r9, r6\n\t"
13422         "adc    r4, r7, #0\n\t"
13423         "adds   r9, r9, r5\n\t"
13424         "str    r9, [%[a], #36]\n\t"
13425         "adc    r4, r4, #0\n\t"
13426         "# a[i+10] += m[10] * mu\n\t"
13427         "ldr    r7, [%[m], #40]\n\t"
13428         "ldr    r9, [%[a], #40]\n\t"
13429         "umull  r6, r7, r8, r7\n\t"
13430         "adds   r9, r9, r6\n\t"
13431         "adc    r5, r7, #0\n\t"
13432         "adds   r9, r9, r4\n\t"
13433         "str    r9, [%[a], #40]\n\t"
13434         "adc    r5, r5, #0\n\t"
13435         "# a[i+11] += m[11] * mu\n\t"
13436         "ldr    r7, [%[m], #44]\n\t"
13437         "ldr    r9, [%[a], #44]\n\t"
13438         "umull  r6, r7, r8, r7\n\t"
13439         "adds   r9, r9, r6\n\t"
13440         "adc    r4, r7, #0\n\t"
13441         "adds   r9, r9, r5\n\t"
13442         "str    r9, [%[a], #44]\n\t"
13443         "adc    r4, r4, #0\n\t"
13444         "# a[i+12] += m[12] * mu\n\t"
13445         "ldr    r7, [%[m], #48]\n\t"
13446         "ldr    r9, [%[a], #48]\n\t"
13447         "umull  r6, r7, r8, r7\n\t"
13448         "adds   r9, r9, r6\n\t"
13449         "adc    r5, r7, #0\n\t"
13450         "adds   r9, r9, r4\n\t"
13451         "str    r9, [%[a], #48]\n\t"
13452         "adc    r5, r5, #0\n\t"
13453         "# a[i+13] += m[13] * mu\n\t"
13454         "ldr    r7, [%[m], #52]\n\t"
13455         "ldr    r9, [%[a], #52]\n\t"
13456         "umull  r6, r7, r8, r7\n\t"
13457         "adds   r9, r9, r6\n\t"
13458         "adc    r4, r7, #0\n\t"
13459         "adds   r9, r9, r5\n\t"
13460         "str    r9, [%[a], #52]\n\t"
13461         "adc    r4, r4, #0\n\t"
13462         "# a[i+14] += m[14] * mu\n\t"
13463         "ldr    r7, [%[m], #56]\n\t"
13464         "ldr    r9, [%[a], #56]\n\t"
13465         "umull  r6, r7, r8, r7\n\t"
13466         "adds   r9, r9, r6\n\t"
13467         "adc    r5, r7, #0\n\t"
13468         "adds   r9, r9, r4\n\t"
13469         "str    r9, [%[a], #56]\n\t"
13470         "adc    r5, r5, #0\n\t"
13471         "# a[i+15] += m[15] * mu\n\t"
13472         "ldr    r7, [%[m], #60]\n\t"
13473         "ldr    r9, [%[a], #60]\n\t"
13474         "umull  r6, r7, r8, r7\n\t"
13475         "adds   r9, r9, r6\n\t"
13476         "adc    r4, r7, #0\n\t"
13477         "adds   r9, r9, r5\n\t"
13478         "str    r9, [%[a], #60]\n\t"
13479         "adc    r4, r4, #0\n\t"
13480         "# a[i+16] += m[16] * mu\n\t"
13481         "ldr    r7, [%[m], #64]\n\t"
13482         "ldr    r9, [%[a], #64]\n\t"
13483         "umull  r6, r7, r8, r7\n\t"
13484         "adds   r9, r9, r6\n\t"
13485         "adc    r5, r7, #0\n\t"
13486         "adds   r9, r9, r4\n\t"
13487         "str    r9, [%[a], #64]\n\t"
13488         "adc    r5, r5, #0\n\t"
13489         "# a[i+17] += m[17] * mu\n\t"
13490         "ldr    r7, [%[m], #68]\n\t"
13491         "ldr    r9, [%[a], #68]\n\t"
13492         "umull  r6, r7, r8, r7\n\t"
13493         "adds   r9, r9, r6\n\t"
13494         "adc    r4, r7, #0\n\t"
13495         "adds   r9, r9, r5\n\t"
13496         "str    r9, [%[a], #68]\n\t"
13497         "adc    r4, r4, #0\n\t"
13498         "# a[i+18] += m[18] * mu\n\t"
13499         "ldr    r7, [%[m], #72]\n\t"
13500         "ldr    r9, [%[a], #72]\n\t"
13501         "umull  r6, r7, r8, r7\n\t"
13502         "adds   r9, r9, r6\n\t"
13503         "adc    r5, r7, #0\n\t"
13504         "adds   r9, r9, r4\n\t"
13505         "str    r9, [%[a], #72]\n\t"
13506         "adc    r5, r5, #0\n\t"
13507         "# a[i+19] += m[19] * mu\n\t"
13508         "ldr    r7, [%[m], #76]\n\t"
13509         "ldr    r9, [%[a], #76]\n\t"
13510         "umull  r6, r7, r8, r7\n\t"
13511         "adds   r9, r9, r6\n\t"
13512         "adc    r4, r7, #0\n\t"
13513         "adds   r9, r9, r5\n\t"
13514         "str    r9, [%[a], #76]\n\t"
13515         "adc    r4, r4, #0\n\t"
13516         "# a[i+20] += m[20] * mu\n\t"
13517         "ldr    r7, [%[m], #80]\n\t"
13518         "ldr    r9, [%[a], #80]\n\t"
13519         "umull  r6, r7, r8, r7\n\t"
13520         "adds   r9, r9, r6\n\t"
13521         "adc    r5, r7, #0\n\t"
13522         "adds   r9, r9, r4\n\t"
13523         "str    r9, [%[a], #80]\n\t"
13524         "adc    r5, r5, #0\n\t"
13525         "# a[i+21] += m[21] * mu\n\t"
13526         "ldr    r7, [%[m], #84]\n\t"
13527         "ldr    r9, [%[a], #84]\n\t"
13528         "umull  r6, r7, r8, r7\n\t"
13529         "adds   r9, r9, r6\n\t"
13530         "adc    r4, r7, #0\n\t"
13531         "adds   r9, r9, r5\n\t"
13532         "str    r9, [%[a], #84]\n\t"
13533         "adc    r4, r4, #0\n\t"
13534         "# a[i+22] += m[22] * mu\n\t"
13535         "ldr    r7, [%[m], #88]\n\t"
13536         "ldr    r9, [%[a], #88]\n\t"
13537         "umull  r6, r7, r8, r7\n\t"
13538         "adds   r9, r9, r6\n\t"
13539         "adc    r5, r7, #0\n\t"
13540         "adds   r9, r9, r4\n\t"
13541         "str    r9, [%[a], #88]\n\t"
13542         "adc    r5, r5, #0\n\t"
13543         "# a[i+23] += m[23] * mu\n\t"
13544         "ldr    r7, [%[m], #92]\n\t"
13545         "ldr    r9, [%[a], #92]\n\t"
13546         "umull  r6, r7, r8, r7\n\t"
13547         "adds   r9, r9, r6\n\t"
13548         "adc    r4, r7, #0\n\t"
13549         "adds   r9, r9, r5\n\t"
13550         "str    r9, [%[a], #92]\n\t"
13551         "adc    r4, r4, #0\n\t"
13552         "# a[i+24] += m[24] * mu\n\t"
13553         "ldr    r7, [%[m], #96]\n\t"
13554         "ldr    r9, [%[a], #96]\n\t"
13555         "umull  r6, r7, r8, r7\n\t"
13556         "adds   r9, r9, r6\n\t"
13557         "adc    r5, r7, #0\n\t"
13558         "adds   r9, r9, r4\n\t"
13559         "str    r9, [%[a], #96]\n\t"
13560         "adc    r5, r5, #0\n\t"
13561         "# a[i+25] += m[25] * mu\n\t"
13562         "ldr    r7, [%[m], #100]\n\t"
13563         "ldr    r9, [%[a], #100]\n\t"
13564         "umull  r6, r7, r8, r7\n\t"
13565         "adds   r9, r9, r6\n\t"
13566         "adc    r4, r7, #0\n\t"
13567         "adds   r9, r9, r5\n\t"
13568         "str    r9, [%[a], #100]\n\t"
13569         "adc    r4, r4, #0\n\t"
13570         "# a[i+26] += m[26] * mu\n\t"
13571         "ldr    r7, [%[m], #104]\n\t"
13572         "ldr    r9, [%[a], #104]\n\t"
13573         "umull  r6, r7, r8, r7\n\t"
13574         "adds   r9, r9, r6\n\t"
13575         "adc    r5, r7, #0\n\t"
13576         "adds   r9, r9, r4\n\t"
13577         "str    r9, [%[a], #104]\n\t"
13578         "adc    r5, r5, #0\n\t"
13579         "# a[i+27] += m[27] * mu\n\t"
13580         "ldr    r7, [%[m], #108]\n\t"
13581         "ldr    r9, [%[a], #108]\n\t"
13582         "umull  r6, r7, r8, r7\n\t"
13583         "adds   r9, r9, r6\n\t"
13584         "adc    r4, r7, #0\n\t"
13585         "adds   r9, r9, r5\n\t"
13586         "str    r9, [%[a], #108]\n\t"
13587         "adc    r4, r4, #0\n\t"
13588         "# a[i+28] += m[28] * mu\n\t"
13589         "ldr    r7, [%[m], #112]\n\t"
13590         "ldr    r9, [%[a], #112]\n\t"
13591         "umull  r6, r7, r8, r7\n\t"
13592         "adds   r9, r9, r6\n\t"
13593         "adc    r5, r7, #0\n\t"
13594         "adds   r9, r9, r4\n\t"
13595         "str    r9, [%[a], #112]\n\t"
13596         "adc    r5, r5, #0\n\t"
13597         "# a[i+29] += m[29] * mu\n\t"
13598         "ldr    r7, [%[m], #116]\n\t"
13599         "ldr    r9, [%[a], #116]\n\t"
13600         "umull  r6, r7, r8, r7\n\t"
13601         "adds   r9, r9, r6\n\t"
13602         "adc    r4, r7, #0\n\t"
13603         "adds   r9, r9, r5\n\t"
13604         "str    r9, [%[a], #116]\n\t"
13605         "adc    r4, r4, #0\n\t"
13606         "# a[i+30] += m[30] * mu\n\t"
13607         "ldr    r7, [%[m], #120]\n\t"
13608         "ldr    r9, [%[a], #120]\n\t"
13609         "umull  r6, r7, r8, r7\n\t"
13610         "adds   r9, r9, r6\n\t"
13611         "adc    r5, r7, #0\n\t"
13612         "adds   r9, r9, r4\n\t"
13613         "str    r9, [%[a], #120]\n\t"
13614         "adc    r5, r5, #0\n\t"
13615         "# a[i+31] += m[31] * mu\n\t"
13616         "ldr    r7, [%[m], #124]\n\t"
13617         "ldr    r9, [%[a], #124]\n\t"
13618         "umull  r6, r7, r8, r7\n\t"
13619         "adds   r9, r9, r6\n\t"
13620         "adc    r4, r7, #0\n\t"
13621         "adds   r9, r9, r5\n\t"
13622         "str    r9, [%[a], #124]\n\t"
13623         "adc    r4, r4, #0\n\t"
13624         "# a[i+32] += m[32] * mu\n\t"
13625         "ldr    r7, [%[m], #128]\n\t"
13626         "ldr    r9, [%[a], #128]\n\t"
13627         "umull  r6, r7, r8, r7\n\t"
13628         "adds   r9, r9, r6\n\t"
13629         "adc    r5, r7, #0\n\t"
13630         "adds   r9, r9, r4\n\t"
13631         "str    r9, [%[a], #128]\n\t"
13632         "adc    r5, r5, #0\n\t"
13633         "# a[i+33] += m[33] * mu\n\t"
13634         "ldr    r7, [%[m], #132]\n\t"
13635         "ldr    r9, [%[a], #132]\n\t"
13636         "umull  r6, r7, r8, r7\n\t"
13637         "adds   r9, r9, r6\n\t"
13638         "adc    r4, r7, #0\n\t"
13639         "adds   r9, r9, r5\n\t"
13640         "str    r9, [%[a], #132]\n\t"
13641         "adc    r4, r4, #0\n\t"
13642         "# a[i+34] += m[34] * mu\n\t"
13643         "ldr    r7, [%[m], #136]\n\t"
13644         "ldr    r9, [%[a], #136]\n\t"
13645         "umull  r6, r7, r8, r7\n\t"
13646         "adds   r9, r9, r6\n\t"
13647         "adc    r5, r7, #0\n\t"
13648         "adds   r9, r9, r4\n\t"
13649         "str    r9, [%[a], #136]\n\t"
13650         "adc    r5, r5, #0\n\t"
13651         "# a[i+35] += m[35] * mu\n\t"
13652         "ldr    r7, [%[m], #140]\n\t"
13653         "ldr    r9, [%[a], #140]\n\t"
13654         "umull  r6, r7, r8, r7\n\t"
13655         "adds   r9, r9, r6\n\t"
13656         "adc    r4, r7, #0\n\t"
13657         "adds   r9, r9, r5\n\t"
13658         "str    r9, [%[a], #140]\n\t"
13659         "adc    r4, r4, #0\n\t"
13660         "# a[i+36] += m[36] * mu\n\t"
13661         "ldr    r7, [%[m], #144]\n\t"
13662         "ldr    r9, [%[a], #144]\n\t"
13663         "umull  r6, r7, r8, r7\n\t"
13664         "adds   r9, r9, r6\n\t"
13665         "adc    r5, r7, #0\n\t"
13666         "adds   r9, r9, r4\n\t"
13667         "str    r9, [%[a], #144]\n\t"
13668         "adc    r5, r5, #0\n\t"
13669         "# a[i+37] += m[37] * mu\n\t"
13670         "ldr    r7, [%[m], #148]\n\t"
13671         "ldr    r9, [%[a], #148]\n\t"
13672         "umull  r6, r7, r8, r7\n\t"
13673         "adds   r9, r9, r6\n\t"
13674         "adc    r4, r7, #0\n\t"
13675         "adds   r9, r9, r5\n\t"
13676         "str    r9, [%[a], #148]\n\t"
13677         "adc    r4, r4, #0\n\t"
13678         "# a[i+38] += m[38] * mu\n\t"
13679         "ldr    r7, [%[m], #152]\n\t"
13680         "ldr    r9, [%[a], #152]\n\t"
13681         "umull  r6, r7, r8, r7\n\t"
13682         "adds   r9, r9, r6\n\t"
13683         "adc    r5, r7, #0\n\t"
13684         "adds   r9, r9, r4\n\t"
13685         "str    r9, [%[a], #152]\n\t"
13686         "adc    r5, r5, #0\n\t"
13687         "# a[i+39] += m[39] * mu\n\t"
13688         "ldr    r7, [%[m], #156]\n\t"
13689         "ldr    r9, [%[a], #156]\n\t"
13690         "umull  r6, r7, r8, r7\n\t"
13691         "adds   r9, r9, r6\n\t"
13692         "adc    r4, r7, #0\n\t"
13693         "adds   r9, r9, r5\n\t"
13694         "str    r9, [%[a], #156]\n\t"
13695         "adc    r4, r4, #0\n\t"
13696         "# a[i+40] += m[40] * mu\n\t"
13697         "ldr    r7, [%[m], #160]\n\t"
13698         "ldr    r9, [%[a], #160]\n\t"
13699         "umull  r6, r7, r8, r7\n\t"
13700         "adds   r9, r9, r6\n\t"
13701         "adc    r5, r7, #0\n\t"
13702         "adds   r9, r9, r4\n\t"
13703         "str    r9, [%[a], #160]\n\t"
13704         "adc    r5, r5, #0\n\t"
13705         "# a[i+41] += m[41] * mu\n\t"
13706         "ldr    r7, [%[m], #164]\n\t"
13707         "ldr    r9, [%[a], #164]\n\t"
13708         "umull  r6, r7, r8, r7\n\t"
13709         "adds   r9, r9, r6\n\t"
13710         "adc    r4, r7, #0\n\t"
13711         "adds   r9, r9, r5\n\t"
13712         "str    r9, [%[a], #164]\n\t"
13713         "adc    r4, r4, #0\n\t"
13714         "# a[i+42] += m[42] * mu\n\t"
13715         "ldr    r7, [%[m], #168]\n\t"
13716         "ldr    r9, [%[a], #168]\n\t"
13717         "umull  r6, r7, r8, r7\n\t"
13718         "adds   r9, r9, r6\n\t"
13719         "adc    r5, r7, #0\n\t"
13720         "adds   r9, r9, r4\n\t"
13721         "str    r9, [%[a], #168]\n\t"
13722         "adc    r5, r5, #0\n\t"
13723         "# a[i+43] += m[43] * mu\n\t"
13724         "ldr    r7, [%[m], #172]\n\t"
13725         "ldr    r9, [%[a], #172]\n\t"
13726         "umull  r6, r7, r8, r7\n\t"
13727         "adds   r9, r9, r6\n\t"
13728         "adc    r4, r7, #0\n\t"
13729         "adds   r9, r9, r5\n\t"
13730         "str    r9, [%[a], #172]\n\t"
13731         "adc    r4, r4, #0\n\t"
13732         "# a[i+44] += m[44] * mu\n\t"
13733         "ldr    r7, [%[m], #176]\n\t"
13734         "ldr    r9, [%[a], #176]\n\t"
13735         "umull  r6, r7, r8, r7\n\t"
13736         "adds   r9, r9, r6\n\t"
13737         "adc    r5, r7, #0\n\t"
13738         "adds   r9, r9, r4\n\t"
13739         "str    r9, [%[a], #176]\n\t"
13740         "adc    r5, r5, #0\n\t"
13741         "# a[i+45] += m[45] * mu\n\t"
13742         "ldr    r7, [%[m], #180]\n\t"
13743         "ldr    r9, [%[a], #180]\n\t"
13744         "umull  r6, r7, r8, r7\n\t"
13745         "adds   r9, r9, r6\n\t"
13746         "adc    r4, r7, #0\n\t"
13747         "adds   r9, r9, r5\n\t"
13748         "str    r9, [%[a], #180]\n\t"
13749         "adc    r4, r4, #0\n\t"
13750         "# a[i+46] += m[46] * mu\n\t"
13751         "ldr    r7, [%[m], #184]\n\t"
13752         "ldr    r9, [%[a], #184]\n\t"
13753         "umull  r6, r7, r8, r7\n\t"
13754         "adds   r9, r9, r6\n\t"
13755         "adc    r5, r7, #0\n\t"
13756         "adds   r9, r9, r4\n\t"
13757         "str    r9, [%[a], #184]\n\t"
13758         "adc    r5, r5, #0\n\t"
13759         "# a[i+47] += m[47] * mu\n\t"
13760         "ldr    r7, [%[m], #188]\n\t"
13761         "ldr    r9, [%[a], #188]\n\t"
13762         "umull  r6, r7, r8, r7\n\t"
13763         "adds   r9, r9, r6\n\t"
13764         "adc    r4, r7, #0\n\t"
13765         "adds   r9, r9, r5\n\t"
13766         "str    r9, [%[a], #188]\n\t"
13767         "adc    r4, r4, #0\n\t"
13768         "# a[i+48] += m[48] * mu\n\t"
13769         "ldr    r7, [%[m], #192]\n\t"
13770         "ldr    r9, [%[a], #192]\n\t"
13771         "umull  r6, r7, r8, r7\n\t"
13772         "adds   r9, r9, r6\n\t"
13773         "adc    r5, r7, #0\n\t"
13774         "adds   r9, r9, r4\n\t"
13775         "str    r9, [%[a], #192]\n\t"
13776         "adc    r5, r5, #0\n\t"
13777         "# a[i+49] += m[49] * mu\n\t"
13778         "ldr    r7, [%[m], #196]\n\t"
13779         "ldr    r9, [%[a], #196]\n\t"
13780         "umull  r6, r7, r8, r7\n\t"
13781         "adds   r9, r9, r6\n\t"
13782         "adc    r4, r7, #0\n\t"
13783         "adds   r9, r9, r5\n\t"
13784         "str    r9, [%[a], #196]\n\t"
13785         "adc    r4, r4, #0\n\t"
13786         "# a[i+50] += m[50] * mu\n\t"
13787         "ldr    r7, [%[m], #200]\n\t"
13788         "ldr    r9, [%[a], #200]\n\t"
13789         "umull  r6, r7, r8, r7\n\t"
13790         "adds   r9, r9, r6\n\t"
13791         "adc    r5, r7, #0\n\t"
13792         "adds   r9, r9, r4\n\t"
13793         "str    r9, [%[a], #200]\n\t"
13794         "adc    r5, r5, #0\n\t"
13795         "# a[i+51] += m[51] * mu\n\t"
13796         "ldr    r7, [%[m], #204]\n\t"
13797         "ldr    r9, [%[a], #204]\n\t"
13798         "umull  r6, r7, r8, r7\n\t"
13799         "adds   r9, r9, r6\n\t"
13800         "adc    r4, r7, #0\n\t"
13801         "adds   r9, r9, r5\n\t"
13802         "str    r9, [%[a], #204]\n\t"
13803         "adc    r4, r4, #0\n\t"
13804         "# a[i+52] += m[52] * mu\n\t"
13805         "ldr    r7, [%[m], #208]\n\t"
13806         "ldr    r9, [%[a], #208]\n\t"
13807         "umull  r6, r7, r8, r7\n\t"
13808         "adds   r9, r9, r6\n\t"
13809         "adc    r5, r7, #0\n\t"
13810         "adds   r9, r9, r4\n\t"
13811         "str    r9, [%[a], #208]\n\t"
13812         "adc    r5, r5, #0\n\t"
13813         "# a[i+53] += m[53] * mu\n\t"
13814         "ldr    r7, [%[m], #212]\n\t"
13815         "ldr    r9, [%[a], #212]\n\t"
13816         "umull  r6, r7, r8, r7\n\t"
13817         "adds   r9, r9, r6\n\t"
13818         "adc    r4, r7, #0\n\t"
13819         "adds   r9, r9, r5\n\t"
13820         "str    r9, [%[a], #212]\n\t"
13821         "adc    r4, r4, #0\n\t"
13822         "# a[i+54] += m[54] * mu\n\t"
13823         "ldr    r7, [%[m], #216]\n\t"
13824         "ldr    r9, [%[a], #216]\n\t"
13825         "umull  r6, r7, r8, r7\n\t"
13826         "adds   r9, r9, r6\n\t"
13827         "adc    r5, r7, #0\n\t"
13828         "adds   r9, r9, r4\n\t"
13829         "str    r9, [%[a], #216]\n\t"
13830         "adc    r5, r5, #0\n\t"
13831         "# a[i+55] += m[55] * mu\n\t"
13832         "ldr    r7, [%[m], #220]\n\t"
13833         "ldr    r9, [%[a], #220]\n\t"
13834         "umull  r6, r7, r8, r7\n\t"
13835         "adds   r9, r9, r6\n\t"
13836         "adc    r4, r7, #0\n\t"
13837         "adds   r9, r9, r5\n\t"
13838         "str    r9, [%[a], #220]\n\t"
13839         "adc    r4, r4, #0\n\t"
13840         "# a[i+56] += m[56] * mu\n\t"
13841         "ldr    r7, [%[m], #224]\n\t"
13842         "ldr    r9, [%[a], #224]\n\t"
13843         "umull  r6, r7, r8, r7\n\t"
13844         "adds   r9, r9, r6\n\t"
13845         "adc    r5, r7, #0\n\t"
13846         "adds   r9, r9, r4\n\t"
13847         "str    r9, [%[a], #224]\n\t"
13848         "adc    r5, r5, #0\n\t"
13849         "# a[i+57] += m[57] * mu\n\t"
13850         "ldr    r7, [%[m], #228]\n\t"
13851         "ldr    r9, [%[a], #228]\n\t"
13852         "umull  r6, r7, r8, r7\n\t"
13853         "adds   r9, r9, r6\n\t"
13854         "adc    r4, r7, #0\n\t"
13855         "adds   r9, r9, r5\n\t"
13856         "str    r9, [%[a], #228]\n\t"
13857         "adc    r4, r4, #0\n\t"
13858         "# a[i+58] += m[58] * mu\n\t"
13859         "ldr    r7, [%[m], #232]\n\t"
13860         "ldr    r9, [%[a], #232]\n\t"
13861         "umull  r6, r7, r8, r7\n\t"
13862         "adds   r9, r9, r6\n\t"
13863         "adc    r5, r7, #0\n\t"
13864         "adds   r9, r9, r4\n\t"
13865         "str    r9, [%[a], #232]\n\t"
13866         "adc    r5, r5, #0\n\t"
13867         "# a[i+59] += m[59] * mu\n\t"
13868         "ldr    r7, [%[m], #236]\n\t"
13869         "ldr    r9, [%[a], #236]\n\t"
13870         "umull  r6, r7, r8, r7\n\t"
13871         "adds   r9, r9, r6\n\t"
13872         "adc    r4, r7, #0\n\t"
13873         "adds   r9, r9, r5\n\t"
13874         "str    r9, [%[a], #236]\n\t"
13875         "adc    r4, r4, #0\n\t"
13876         "# a[i+60] += m[60] * mu\n\t"
13877         "ldr    r7, [%[m], #240]\n\t"
13878         "ldr    r9, [%[a], #240]\n\t"
13879         "umull  r6, r7, r8, r7\n\t"
13880         "adds   r9, r9, r6\n\t"
13881         "adc    r5, r7, #0\n\t"
13882         "adds   r9, r9, r4\n\t"
13883         "str    r9, [%[a], #240]\n\t"
13884         "adc    r5, r5, #0\n\t"
13885         "# a[i+61] += m[61] * mu\n\t"
13886         "ldr    r7, [%[m], #244]\n\t"
13887         "ldr    r9, [%[a], #244]\n\t"
13888         "umull  r6, r7, r8, r7\n\t"
13889         "adds   r9, r9, r6\n\t"
13890         "adc    r4, r7, #0\n\t"
13891         "adds   r9, r9, r5\n\t"
13892         "str    r9, [%[a], #244]\n\t"
13893         "adc    r4, r4, #0\n\t"
13894         "# a[i+62] += m[62] * mu\n\t"
13895         "ldr    r7, [%[m], #248]\n\t"
13896         "ldr    r9, [%[a], #248]\n\t"
13897         "umull  r6, r7, r8, r7\n\t"
13898         "adds   r9, r9, r6\n\t"
13899         "adc    r5, r7, #0\n\t"
13900         "adds   r9, r9, r4\n\t"
13901         "str    r9, [%[a], #248]\n\t"
13902         "adc    r5, r5, #0\n\t"
13903         "# a[i+63] += m[63] * mu\n\t"
13904         "ldr    r7, [%[m], #252]\n\t"
13905         "ldr    r9, [%[a], #252]\n\t"
13906         "umull  r6, r7, r8, r7\n\t"
13907         "adds   r9, r9, r6\n\t"
13908         "adc    r4, r7, #0\n\t"
13909         "adds   r9, r9, r5\n\t"
13910         "str    r9, [%[a], #252]\n\t"
13911         "adc    r4, r4, #0\n\t"
13912         "# a[i+64] += m[64] * mu\n\t"
13913         "ldr    r7, [%[m], #256]\n\t"
13914         "ldr    r9, [%[a], #256]\n\t"
13915         "umull  r6, r7, r8, r7\n\t"
13916         "adds   r9, r9, r6\n\t"
13917         "adc    r5, r7, #0\n\t"
13918         "adds   r9, r9, r4\n\t"
13919         "str    r9, [%[a], #256]\n\t"
13920         "adc    r5, r5, #0\n\t"
13921         "# a[i+65] += m[65] * mu\n\t"
13922         "ldr    r7, [%[m], #260]\n\t"
13923         "ldr    r9, [%[a], #260]\n\t"
13924         "umull  r6, r7, r8, r7\n\t"
13925         "adds   r9, r9, r6\n\t"
13926         "adc    r4, r7, #0\n\t"
13927         "adds   r9, r9, r5\n\t"
13928         "str    r9, [%[a], #260]\n\t"
13929         "adc    r4, r4, #0\n\t"
13930         "# a[i+66] += m[66] * mu\n\t"
13931         "ldr    r7, [%[m], #264]\n\t"
13932         "ldr    r9, [%[a], #264]\n\t"
13933         "umull  r6, r7, r8, r7\n\t"
13934         "adds   r9, r9, r6\n\t"
13935         "adc    r5, r7, #0\n\t"
13936         "adds   r9, r9, r4\n\t"
13937         "str    r9, [%[a], #264]\n\t"
13938         "adc    r5, r5, #0\n\t"
13939         "# a[i+67] += m[67] * mu\n\t"
13940         "ldr    r7, [%[m], #268]\n\t"
13941         "ldr    r9, [%[a], #268]\n\t"
13942         "umull  r6, r7, r8, r7\n\t"
13943         "adds   r9, r9, r6\n\t"
13944         "adc    r4, r7, #0\n\t"
13945         "adds   r9, r9, r5\n\t"
13946         "str    r9, [%[a], #268]\n\t"
13947         "adc    r4, r4, #0\n\t"
13948         "# a[i+68] += m[68] * mu\n\t"
13949         "ldr    r7, [%[m], #272]\n\t"
13950         "ldr    r9, [%[a], #272]\n\t"
13951         "umull  r6, r7, r8, r7\n\t"
13952         "adds   r9, r9, r6\n\t"
13953         "adc    r5, r7, #0\n\t"
13954         "adds   r9, r9, r4\n\t"
13955         "str    r9, [%[a], #272]\n\t"
13956         "adc    r5, r5, #0\n\t"
13957         "# a[i+69] += m[69] * mu\n\t"
13958         "ldr    r7, [%[m], #276]\n\t"
13959         "ldr    r9, [%[a], #276]\n\t"
13960         "umull  r6, r7, r8, r7\n\t"
13961         "adds   r9, r9, r6\n\t"
13962         "adc    r4, r7, #0\n\t"
13963         "adds   r9, r9, r5\n\t"
13964         "str    r9, [%[a], #276]\n\t"
13965         "adc    r4, r4, #0\n\t"
13966         "# a[i+70] += m[70] * mu\n\t"
13967         "ldr    r7, [%[m], #280]\n\t"
13968         "ldr    r9, [%[a], #280]\n\t"
13969         "umull  r6, r7, r8, r7\n\t"
13970         "adds   r9, r9, r6\n\t"
13971         "adc    r5, r7, #0\n\t"
13972         "adds   r9, r9, r4\n\t"
13973         "str    r9, [%[a], #280]\n\t"
13974         "adc    r5, r5, #0\n\t"
13975         "# a[i+71] += m[71] * mu\n\t"
13976         "ldr    r7, [%[m], #284]\n\t"
13977         "ldr    r9, [%[a], #284]\n\t"
13978         "umull  r6, r7, r8, r7\n\t"
13979         "adds   r9, r9, r6\n\t"
13980         "adc    r4, r7, #0\n\t"
13981         "adds   r9, r9, r5\n\t"
13982         "str    r9, [%[a], #284]\n\t"
13983         "adc    r4, r4, #0\n\t"
13984         "# a[i+72] += m[72] * mu\n\t"
13985         "ldr    r7, [%[m], #288]\n\t"
13986         "ldr    r9, [%[a], #288]\n\t"
13987         "umull  r6, r7, r8, r7\n\t"
13988         "adds   r9, r9, r6\n\t"
13989         "adc    r5, r7, #0\n\t"
13990         "adds   r9, r9, r4\n\t"
13991         "str    r9, [%[a], #288]\n\t"
13992         "adc    r5, r5, #0\n\t"
13993         "# a[i+73] += m[73] * mu\n\t"
13994         "ldr    r7, [%[m], #292]\n\t"
13995         "ldr    r9, [%[a], #292]\n\t"
13996         "umull  r6, r7, r8, r7\n\t"
13997         "adds   r9, r9, r6\n\t"
13998         "adc    r4, r7, #0\n\t"
13999         "adds   r9, r9, r5\n\t"
14000         "str    r9, [%[a], #292]\n\t"
14001         "adc    r4, r4, #0\n\t"
14002         "# a[i+74] += m[74] * mu\n\t"
14003         "ldr    r7, [%[m], #296]\n\t"
14004         "ldr    r9, [%[a], #296]\n\t"
14005         "umull  r6, r7, r8, r7\n\t"
14006         "adds   r9, r9, r6\n\t"
14007         "adc    r5, r7, #0\n\t"
14008         "adds   r9, r9, r4\n\t"
14009         "str    r9, [%[a], #296]\n\t"
14010         "adc    r5, r5, #0\n\t"
14011         "# a[i+75] += m[75] * mu\n\t"
14012         "ldr    r7, [%[m], #300]\n\t"
14013         "ldr    r9, [%[a], #300]\n\t"
14014         "umull  r6, r7, r8, r7\n\t"
14015         "adds   r9, r9, r6\n\t"
14016         "adc    r4, r7, #0\n\t"
14017         "adds   r9, r9, r5\n\t"
14018         "str    r9, [%[a], #300]\n\t"
14019         "adc    r4, r4, #0\n\t"
14020         "# a[i+76] += m[76] * mu\n\t"
14021         "ldr    r7, [%[m], #304]\n\t"
14022         "ldr    r9, [%[a], #304]\n\t"
14023         "umull  r6, r7, r8, r7\n\t"
14024         "adds   r9, r9, r6\n\t"
14025         "adc    r5, r7, #0\n\t"
14026         "adds   r9, r9, r4\n\t"
14027         "str    r9, [%[a], #304]\n\t"
14028         "adc    r5, r5, #0\n\t"
14029         "# a[i+77] += m[77] * mu\n\t"
14030         "ldr    r7, [%[m], #308]\n\t"
14031         "ldr    r9, [%[a], #308]\n\t"
14032         "umull  r6, r7, r8, r7\n\t"
14033         "adds   r9, r9, r6\n\t"
14034         "adc    r4, r7, #0\n\t"
14035         "adds   r9, r9, r5\n\t"
14036         "str    r9, [%[a], #308]\n\t"
14037         "adc    r4, r4, #0\n\t"
14038         "# a[i+78] += m[78] * mu\n\t"
14039         "ldr    r7, [%[m], #312]\n\t"
14040         "ldr    r9, [%[a], #312]\n\t"
14041         "umull  r6, r7, r8, r7\n\t"
14042         "adds   r9, r9, r6\n\t"
14043         "adc    r5, r7, #0\n\t"
14044         "adds   r9, r9, r4\n\t"
14045         "str    r9, [%[a], #312]\n\t"
14046         "adc    r5, r5, #0\n\t"
14047         "# a[i+79] += m[79] * mu\n\t"
14048         "ldr    r7, [%[m], #316]\n\t"
14049         "ldr    r9, [%[a], #316]\n\t"
14050         "umull  r6, r7, r8, r7\n\t"
14051         "adds   r9, r9, r6\n\t"
14052         "adc    r4, r7, #0\n\t"
14053         "adds   r9, r9, r5\n\t"
14054         "str    r9, [%[a], #316]\n\t"
14055         "adc    r4, r4, #0\n\t"
14056         "# a[i+80] += m[80] * mu\n\t"
14057         "ldr    r7, [%[m], #320]\n\t"
14058         "ldr    r9, [%[a], #320]\n\t"
14059         "umull  r6, r7, r8, r7\n\t"
14060         "adds   r9, r9, r6\n\t"
14061         "adc    r5, r7, #0\n\t"
14062         "adds   r9, r9, r4\n\t"
14063         "str    r9, [%[a], #320]\n\t"
14064         "adc    r5, r5, #0\n\t"
14065         "# a[i+81] += m[81] * mu\n\t"
14066         "ldr    r7, [%[m], #324]\n\t"
14067         "ldr    r9, [%[a], #324]\n\t"
14068         "umull  r6, r7, r8, r7\n\t"
14069         "adds   r9, r9, r6\n\t"
14070         "adc    r4, r7, #0\n\t"
14071         "adds   r9, r9, r5\n\t"
14072         "str    r9, [%[a], #324]\n\t"
14073         "adc    r4, r4, #0\n\t"
14074         "# a[i+82] += m[82] * mu\n\t"
14075         "ldr    r7, [%[m], #328]\n\t"
14076         "ldr    r9, [%[a], #328]\n\t"
14077         "umull  r6, r7, r8, r7\n\t"
14078         "adds   r9, r9, r6\n\t"
14079         "adc    r5, r7, #0\n\t"
14080         "adds   r9, r9, r4\n\t"
14081         "str    r9, [%[a], #328]\n\t"
14082         "adc    r5, r5, #0\n\t"
14083         "# a[i+83] += m[83] * mu\n\t"
14084         "ldr    r7, [%[m], #332]\n\t"
14085         "ldr    r9, [%[a], #332]\n\t"
14086         "umull  r6, r7, r8, r7\n\t"
14087         "adds   r9, r9, r6\n\t"
14088         "adc    r4, r7, #0\n\t"
14089         "adds   r9, r9, r5\n\t"
14090         "str    r9, [%[a], #332]\n\t"
14091         "adc    r4, r4, #0\n\t"
14092         "# a[i+84] += m[84] * mu\n\t"
14093         "ldr    r7, [%[m], #336]\n\t"
14094         "ldr    r9, [%[a], #336]\n\t"
14095         "umull  r6, r7, r8, r7\n\t"
14096         "adds   r9, r9, r6\n\t"
14097         "adc    r5, r7, #0\n\t"
14098         "adds   r9, r9, r4\n\t"
14099         "str    r9, [%[a], #336]\n\t"
14100         "adc    r5, r5, #0\n\t"
14101         "# a[i+85] += m[85] * mu\n\t"
14102         "ldr    r7, [%[m], #340]\n\t"
14103         "ldr    r9, [%[a], #340]\n\t"
14104         "umull  r6, r7, r8, r7\n\t"
14105         "adds   r9, r9, r6\n\t"
14106         "adc    r4, r7, #0\n\t"
14107         "adds   r9, r9, r5\n\t"
14108         "str    r9, [%[a], #340]\n\t"
14109         "adc    r4, r4, #0\n\t"
14110         "# a[i+86] += m[86] * mu\n\t"
14111         "ldr    r7, [%[m], #344]\n\t"
14112         "ldr    r9, [%[a], #344]\n\t"
14113         "umull  r6, r7, r8, r7\n\t"
14114         "adds   r9, r9, r6\n\t"
14115         "adc    r5, r7, #0\n\t"
14116         "adds   r9, r9, r4\n\t"
14117         "str    r9, [%[a], #344]\n\t"
14118         "adc    r5, r5, #0\n\t"
14119         "# a[i+87] += m[87] * mu\n\t"
14120         "ldr    r7, [%[m], #348]\n\t"
14121         "ldr    r9, [%[a], #348]\n\t"
14122         "umull  r6, r7, r8, r7\n\t"
14123         "adds   r9, r9, r6\n\t"
14124         "adc    r4, r7, #0\n\t"
14125         "adds   r9, r9, r5\n\t"
14126         "str    r9, [%[a], #348]\n\t"
14127         "adc    r4, r4, #0\n\t"
14128         "# a[i+88] += m[88] * mu\n\t"
14129         "ldr    r7, [%[m], #352]\n\t"
14130         "ldr    r9, [%[a], #352]\n\t"
14131         "umull  r6, r7, r8, r7\n\t"
14132         "adds   r9, r9, r6\n\t"
14133         "adc    r5, r7, #0\n\t"
14134         "adds   r9, r9, r4\n\t"
14135         "str    r9, [%[a], #352]\n\t"
14136         "adc    r5, r5, #0\n\t"
14137         "# a[i+89] += m[89] * mu\n\t"
14138         "ldr    r7, [%[m], #356]\n\t"
14139         "ldr    r9, [%[a], #356]\n\t"
14140         "umull  r6, r7, r8, r7\n\t"
14141         "adds   r9, r9, r6\n\t"
14142         "adc    r4, r7, #0\n\t"
14143         "adds   r9, r9, r5\n\t"
14144         "str    r9, [%[a], #356]\n\t"
14145         "adc    r4, r4, #0\n\t"
14146         "# a[i+90] += m[90] * mu\n\t"
14147         "ldr    r7, [%[m], #360]\n\t"
14148         "ldr    r9, [%[a], #360]\n\t"
14149         "umull  r6, r7, r8, r7\n\t"
14150         "adds   r9, r9, r6\n\t"
14151         "adc    r5, r7, #0\n\t"
14152         "adds   r9, r9, r4\n\t"
14153         "str    r9, [%[a], #360]\n\t"
14154         "adc    r5, r5, #0\n\t"
14155         "# a[i+91] += m[91] * mu\n\t"
14156         "ldr    r7, [%[m], #364]\n\t"
14157         "ldr    r9, [%[a], #364]\n\t"
14158         "umull  r6, r7, r8, r7\n\t"
14159         "adds   r9, r9, r6\n\t"
14160         "adc    r4, r7, #0\n\t"
14161         "adds   r9, r9, r5\n\t"
14162         "str    r9, [%[a], #364]\n\t"
14163         "adc    r4, r4, #0\n\t"
14164         "# a[i+92] += m[92] * mu\n\t"
14165         "ldr    r7, [%[m], #368]\n\t"
14166         "ldr    r9, [%[a], #368]\n\t"
14167         "umull  r6, r7, r8, r7\n\t"
14168         "adds   r9, r9, r6\n\t"
14169         "adc    r5, r7, #0\n\t"
14170         "adds   r9, r9, r4\n\t"
14171         "str    r9, [%[a], #368]\n\t"
14172         "adc    r5, r5, #0\n\t"
14173         "# a[i+93] += m[93] * mu\n\t"
14174         "ldr    r7, [%[m], #372]\n\t"
14175         "ldr    r9, [%[a], #372]\n\t"
14176         "umull  r6, r7, r8, r7\n\t"
14177         "adds   r9, r9, r6\n\t"
14178         "adc    r4, r7, #0\n\t"
14179         "adds   r9, r9, r5\n\t"
14180         "str    r9, [%[a], #372]\n\t"
14181         "adc    r4, r4, #0\n\t"
14182         "# a[i+94] += m[94] * mu\n\t"
14183         "ldr    r7, [%[m], #376]\n\t"
14184         "ldr    r9, [%[a], #376]\n\t"
14185         "umull  r6, r7, r8, r7\n\t"
14186         "adds   r9, r9, r6\n\t"
14187         "adc    r5, r7, #0\n\t"
14188         "adds   r9, r9, r4\n\t"
14189         "str    r9, [%[a], #376]\n\t"
14190         "adc    r5, r5, #0\n\t"
14191         "# a[i+95] += m[95] * mu\n\t"
14192         "ldr    r7, [%[m], #380]\n\t"
14193         "ldr   r9, [%[a], #380]\n\t"
14194         "umull  r6, r7, r8, r7\n\t"
14195         "adds   r5, r5, r6\n\t"
14196         "adcs   r7, r7, %[ca]\n\t"
14197         "mov    %[ca], #0\n\t"
14198         "adc    %[ca], %[ca], %[ca]\n\t"
14199         "adds   r9, r9, r5\n\t"
14200         "str    r9, [%[a], #380]\n\t"
14201         "ldr    r9, [%[a], #384]\n\t"
14202         "adcs   r9, r9, r7\n\t"
14203         "str    r9, [%[a], #384]\n\t"
14204         "adc    %[ca], %[ca], #0\n\t"
14205         "# i += 1\n\t"
14206         "add    %[a], %[a], #4\n\t"
14207         "add    r12, r12, #4\n\t"
14208         "cmp    r12, #384\n\t"
14209         "blt    1b\n\t"
14210         "str    r10, [%[a], #0]\n\t"
14211         "str    r14, [%[a], #4]\n\t"
14212         : [ca] "+r" (ca), [a] "+r" (a)
14213         : [m] "r" (m), [mp] "r" (mp)
14214         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
14215     );
14216 
14217     sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
14218 }
14219 
14220 /* Multiply two Montogmery form numbers mod the modulus (prime).
14221  * (r = a * b mod m)
14222  *
14223  * r   Result of multiplication.
14224  * a   First number to multiply in Montogmery form.
14225  * b   Second number to multiply in Montogmery form.
14226  * m   Modulus (prime).
14227  * mp  Montogmery mulitplier.
14228  */
14229 static void sp_3072_mont_mul_96(sp_digit* r, sp_digit* a, sp_digit* b,
14230         sp_digit* m, sp_digit mp)
14231 {
14232     sp_3072_mul_96(r, a, b);
14233     sp_3072_mont_reduce_96(r, m, mp);
14234 }
14235 
14236 /* Square the Montgomery form number. (r = a * a mod m)
14237  *
14238  * r   Result of squaring.
14239  * a   Number to square in Montogmery form.
14240  * m   Modulus (prime).
14241  * mp  Montogmery mulitplier.
14242  */
14243 static void sp_3072_mont_sqr_96(sp_digit* r, sp_digit* a, sp_digit* m,
14244         sp_digit mp)
14245 {
14246     sp_3072_sqr_96(r, a);
14247     sp_3072_mont_reduce_96(r, m, mp);
14248 }
14249 
14250 /* Mul a by digit b into r. (r = a * b)
14251  *
14252  * r  A single precision integer.
14253  * a  A single precision integer.
14254  * b  A single precision digit.
14255  */
14256 static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
14257         const sp_digit b)
14258 {
14259 #ifdef WOLFSSL_SP_SMALL
14260     __asm__ __volatile__ (
14261         "mov    r10, #0\n\t"
14262         "# A[0] * B\n\t"
14263         "ldr    r8, [%[a]]\n\t"
14264         "umull  r5, r3, %[b], r8\n\t"
14265         "mov    r4, #0\n\t"
14266         "str    r5, [%[r]]\n\t"
14267         "mov    r5, #0\n\t"
14268         "mov    r9, #4\n\t"
14269         "1:\n\t"
14270         "ldr    r8, [%[a], r9]\n\t"
14271         "umull  r6, r7, %[b], r8\n\t"
14272         "adds   r3, r3, r6\n\t"
14273         "adcs   r4, r4, r7\n\t"
14274         "adc    r5, r10, r10\n\t"
14275         "str    r3, [%[r], r9]\n\t"
14276         "mov    r3, r4\n\t"
14277         "mov    r4, r5\n\t"
14278         "mov    r5, #0\n\t"
14279         "add    r9, r9, #4\n\t"
14280         "cmp    r9, #384\n\t"
14281         "blt    1b\n\t"
14282         "str    r3, [%[r], #384]\n\t"
14283         :
14284         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
14285         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
14286     );
14287 #else
14288     __asm__ __volatile__ (
14289         "mov    r10, #0\n\t"
14290         "# A[0] * B\n\t"
14291         "ldr    r8, [%[a]]\n\t"
14292         "umull  r3, r4, %[b], r8\n\t"
14293         "mov    r5, #0\n\t"
14294         "str    r3, [%[r]]\n\t"
14295         "# A[1] * B\n\t"
14296         "ldr    r8, [%[a], #4]\n\t"
14297         "mov    r3, #0\n\t"
14298         "umull  r6, r7, %[b], r8\n\t"
14299         "adds   r4, r4, r6\n\t"
14300         "adcs   r5, r5, r7\n\t"
14301         "adc    r3, r10, r10\n\t"
14302         "str    r4, [%[r], #4]\n\t"
14303         "# A[2] * B\n\t"
14304         "ldr    r8, [%[a], #8]\n\t"
14305         "mov    r4, #0\n\t"
14306         "umull  r6, r7, %[b], r8\n\t"
14307         "adds   r5, r5, r6\n\t"
14308         "adcs   r3, r3, r7\n\t"
14309         "adc    r4, r10, r10\n\t"
14310         "str    r5, [%[r], #8]\n\t"
14311         "# A[3] * B\n\t"
14312         "ldr    r8, [%[a], #12]\n\t"
14313         "mov    r5, #0\n\t"
14314         "umull  r6, r7, %[b], r8\n\t"
14315         "adds   r3, r3, r6\n\t"
14316         "adcs   r4, r4, r7\n\t"
14317         "adc    r5, r10, r10\n\t"
14318         "str    r3, [%[r], #12]\n\t"
14319         "# A[4] * B\n\t"
14320         "ldr    r8, [%[a], #16]\n\t"
14321         "mov    r3, #0\n\t"
14322         "umull  r6, r7, %[b], r8\n\t"
14323         "adds   r4, r4, r6\n\t"
14324         "adcs   r5, r5, r7\n\t"
14325         "adc    r3, r10, r10\n\t"
14326         "str    r4, [%[r], #16]\n\t"
14327         "# A[5] * B\n\t"
14328         "ldr    r8, [%[a], #20]\n\t"
14329         "mov    r4, #0\n\t"
14330         "umull  r6, r7, %[b], r8\n\t"
14331         "adds   r5, r5, r6\n\t"
14332         "adcs   r3, r3, r7\n\t"
14333         "adc    r4, r10, r10\n\t"
14334         "str    r5, [%[r], #20]\n\t"
14335         "# A[6] * B\n\t"
14336         "ldr    r8, [%[a], #24]\n\t"
14337         "mov    r5, #0\n\t"
14338         "umull  r6, r7, %[b], r8\n\t"
14339         "adds   r3, r3, r6\n\t"
14340         "adcs   r4, r4, r7\n\t"
14341         "adc    r5, r10, r10\n\t"
14342         "str    r3, [%[r], #24]\n\t"
14343         "# A[7] * B\n\t"
14344         "ldr    r8, [%[a], #28]\n\t"
14345         "mov    r3, #0\n\t"
14346         "umull  r6, r7, %[b], r8\n\t"
14347         "adds   r4, r4, r6\n\t"
14348         "adcs   r5, r5, r7\n\t"
14349         "adc    r3, r10, r10\n\t"
14350         "str    r4, [%[r], #28]\n\t"
14351         "# A[8] * B\n\t"
14352         "ldr    r8, [%[a], #32]\n\t"
14353         "mov    r4, #0\n\t"
14354         "umull  r6, r7, %[b], r8\n\t"
14355         "adds   r5, r5, r6\n\t"
14356         "adcs   r3, r3, r7\n\t"
14357         "adc    r4, r10, r10\n\t"
14358         "str    r5, [%[r], #32]\n\t"
14359         "# A[9] * B\n\t"
14360         "ldr    r8, [%[a], #36]\n\t"
14361         "mov    r5, #0\n\t"
14362         "umull  r6, r7, %[b], r8\n\t"
14363         "adds   r3, r3, r6\n\t"
14364         "adcs   r4, r4, r7\n\t"
14365         "adc    r5, r10, r10\n\t"
14366         "str    r3, [%[r], #36]\n\t"
14367         "# A[10] * B\n\t"
14368         "ldr    r8, [%[a], #40]\n\t"
14369         "mov    r3, #0\n\t"
14370         "umull  r6, r7, %[b], r8\n\t"
14371         "adds   r4, r4, r6\n\t"
14372         "adcs   r5, r5, r7\n\t"
14373         "adc    r3, r10, r10\n\t"
14374         "str    r4, [%[r], #40]\n\t"
14375         "# A[11] * B\n\t"
14376         "ldr    r8, [%[a], #44]\n\t"
14377         "mov    r4, #0\n\t"
14378         "umull  r6, r7, %[b], r8\n\t"
14379         "adds   r5, r5, r6\n\t"
14380         "adcs   r3, r3, r7\n\t"
14381         "adc    r4, r10, r10\n\t"
14382         "str    r5, [%[r], #44]\n\t"
14383         "# A[12] * B\n\t"
14384         "ldr    r8, [%[a], #48]\n\t"
14385         "mov    r5, #0\n\t"
14386         "umull  r6, r7, %[b], r8\n\t"
14387         "adds   r3, r3, r6\n\t"
14388         "adcs   r4, r4, r7\n\t"
14389         "adc    r5, r10, r10\n\t"
14390         "str    r3, [%[r], #48]\n\t"
14391         "# A[13] * B\n\t"
14392         "ldr    r8, [%[a], #52]\n\t"
14393         "mov    r3, #0\n\t"
14394         "umull  r6, r7, %[b], r8\n\t"
14395         "adds   r4, r4, r6\n\t"
14396         "adcs   r5, r5, r7\n\t"
14397         "adc    r3, r10, r10\n\t"
14398         "str    r4, [%[r], #52]\n\t"
14399         "# A[14] * B\n\t"
14400         "ldr    r8, [%[a], #56]\n\t"
14401         "mov    r4, #0\n\t"
14402         "umull  r6, r7, %[b], r8\n\t"
14403         "adds   r5, r5, r6\n\t"
14404         "adcs   r3, r3, r7\n\t"
14405         "adc    r4, r10, r10\n\t"
14406         "str    r5, [%[r], #56]\n\t"
14407         "# A[15] * B\n\t"
14408         "ldr    r8, [%[a], #60]\n\t"
14409         "mov    r5, #0\n\t"
14410         "umull  r6, r7, %[b], r8\n\t"
14411         "adds   r3, r3, r6\n\t"
14412         "adcs   r4, r4, r7\n\t"
14413         "adc    r5, r10, r10\n\t"
14414         "str    r3, [%[r], #60]\n\t"
14415         "# A[16] * B\n\t"
14416         "ldr    r8, [%[a], #64]\n\t"
14417         "mov    r3, #0\n\t"
14418         "umull  r6, r7, %[b], r8\n\t"
14419         "adds   r4, r4, r6\n\t"
14420         "adcs   r5, r5, r7\n\t"
14421         "adc    r3, r10, r10\n\t"
14422         "str    r4, [%[r], #64]\n\t"
14423         "# A[17] * B\n\t"
14424         "ldr    r8, [%[a], #68]\n\t"
14425         "mov    r4, #0\n\t"
14426         "umull  r6, r7, %[b], r8\n\t"
14427         "adds   r5, r5, r6\n\t"
14428         "adcs   r3, r3, r7\n\t"
14429         "adc    r4, r10, r10\n\t"
14430         "str    r5, [%[r], #68]\n\t"
14431         "# A[18] * B\n\t"
14432         "ldr    r8, [%[a], #72]\n\t"
14433         "mov    r5, #0\n\t"
14434         "umull  r6, r7, %[b], r8\n\t"
14435         "adds   r3, r3, r6\n\t"
14436         "adcs   r4, r4, r7\n\t"
14437         "adc    r5, r10, r10\n\t"
14438         "str    r3, [%[r], #72]\n\t"
14439         "# A[19] * B\n\t"
14440         "ldr    r8, [%[a], #76]\n\t"
14441         "mov    r3, #0\n\t"
14442         "umull  r6, r7, %[b], r8\n\t"
14443         "adds   r4, r4, r6\n\t"
14444         "adcs   r5, r5, r7\n\t"
14445         "adc    r3, r10, r10\n\t"
14446         "str    r4, [%[r], #76]\n\t"
14447         "# A[20] * B\n\t"
14448         "ldr    r8, [%[a], #80]\n\t"
14449         "mov    r4, #0\n\t"
14450         "umull  r6, r7, %[b], r8\n\t"
14451         "adds   r5, r5, r6\n\t"
14452         "adcs   r3, r3, r7\n\t"
14453         "adc    r4, r10, r10\n\t"
14454         "str    r5, [%[r], #80]\n\t"
14455         "# A[21] * B\n\t"
14456         "ldr    r8, [%[a], #84]\n\t"
14457         "mov    r5, #0\n\t"
14458         "umull  r6, r7, %[b], r8\n\t"
14459         "adds   r3, r3, r6\n\t"
14460         "adcs   r4, r4, r7\n\t"
14461         "adc    r5, r10, r10\n\t"
14462         "str    r3, [%[r], #84]\n\t"
14463         "# A[22] * B\n\t"
14464         "ldr    r8, [%[a], #88]\n\t"
14465         "mov    r3, #0\n\t"
14466         "umull  r6, r7, %[b], r8\n\t"
14467         "adds   r4, r4, r6\n\t"
14468         "adcs   r5, r5, r7\n\t"
14469         "adc    r3, r10, r10\n\t"
14470         "str    r4, [%[r], #88]\n\t"
14471         "# A[23] * B\n\t"
14472         "ldr    r8, [%[a], #92]\n\t"
14473         "mov    r4, #0\n\t"
14474         "umull  r6, r7, %[b], r8\n\t"
14475         "adds   r5, r5, r6\n\t"
14476         "adcs   r3, r3, r7\n\t"
14477         "adc    r4, r10, r10\n\t"
14478         "str    r5, [%[r], #92]\n\t"
14479         "# A[24] * B\n\t"
14480         "ldr    r8, [%[a], #96]\n\t"
14481         "mov    r5, #0\n\t"
14482         "umull  r6, r7, %[b], r8\n\t"
14483         "adds   r3, r3, r6\n\t"
14484         "adcs   r4, r4, r7\n\t"
14485         "adc    r5, r10, r10\n\t"
14486         "str    r3, [%[r], #96]\n\t"
14487         "# A[25] * B\n\t"
14488         "ldr    r8, [%[a], #100]\n\t"
14489         "mov    r3, #0\n\t"
14490         "umull  r6, r7, %[b], r8\n\t"
14491         "adds   r4, r4, r6\n\t"
14492         "adcs   r5, r5, r7\n\t"
14493         "adc    r3, r10, r10\n\t"
14494         "str    r4, [%[r], #100]\n\t"
14495         "# A[26] * B\n\t"
14496         "ldr    r8, [%[a], #104]\n\t"
14497         "mov    r4, #0\n\t"
14498         "umull  r6, r7, %[b], r8\n\t"
14499         "adds   r5, r5, r6\n\t"
14500         "adcs   r3, r3, r7\n\t"
14501         "adc    r4, r10, r10\n\t"
14502         "str    r5, [%[r], #104]\n\t"
14503         "# A[27] * B\n\t"
14504         "ldr    r8, [%[a], #108]\n\t"
14505         "mov    r5, #0\n\t"
14506         "umull  r6, r7, %[b], r8\n\t"
14507         "adds   r3, r3, r6\n\t"
14508         "adcs   r4, r4, r7\n\t"
14509         "adc    r5, r10, r10\n\t"
14510         "str    r3, [%[r], #108]\n\t"
14511         "# A[28] * B\n\t"
14512         "ldr    r8, [%[a], #112]\n\t"
14513         "mov    r3, #0\n\t"
14514         "umull  r6, r7, %[b], r8\n\t"
14515         "adds   r4, r4, r6\n\t"
14516         "adcs   r5, r5, r7\n\t"
14517         "adc    r3, r10, r10\n\t"
14518         "str    r4, [%[r], #112]\n\t"
14519         "# A[29] * B\n\t"
14520         "ldr    r8, [%[a], #116]\n\t"
14521         "mov    r4, #0\n\t"
14522         "umull  r6, r7, %[b], r8\n\t"
14523         "adds   r5, r5, r6\n\t"
14524         "adcs   r3, r3, r7\n\t"
14525         "adc    r4, r10, r10\n\t"
14526         "str    r5, [%[r], #116]\n\t"
14527         "# A[30] * B\n\t"
14528         "ldr    r8, [%[a], #120]\n\t"
14529         "mov    r5, #0\n\t"
14530         "umull  r6, r7, %[b], r8\n\t"
14531         "adds   r3, r3, r6\n\t"
14532         "adcs   r4, r4, r7\n\t"
14533         "adc    r5, r10, r10\n\t"
14534         "str    r3, [%[r], #120]\n\t"
14535         "# A[31] * B\n\t"
14536         "ldr    r8, [%[a], #124]\n\t"
14537         "mov    r3, #0\n\t"
14538         "umull  r6, r7, %[b], r8\n\t"
14539         "adds   r4, r4, r6\n\t"
14540         "adcs   r5, r5, r7\n\t"
14541         "adc    r3, r10, r10\n\t"
14542         "str    r4, [%[r], #124]\n\t"
14543         "# A[32] * B\n\t"
14544         "ldr    r8, [%[a], #128]\n\t"
14545         "mov    r4, #0\n\t"
14546         "umull  r6, r7, %[b], r8\n\t"
14547         "adds   r5, r5, r6\n\t"
14548         "adcs   r3, r3, r7\n\t"
14549         "adc    r4, r10, r10\n\t"
14550         "str    r5, [%[r], #128]\n\t"
14551         "# A[33] * B\n\t"
14552         "ldr    r8, [%[a], #132]\n\t"
14553         "mov    r5, #0\n\t"
14554         "umull  r6, r7, %[b], r8\n\t"
14555         "adds   r3, r3, r6\n\t"
14556         "adcs   r4, r4, r7\n\t"
14557         "adc    r5, r10, r10\n\t"
14558         "str    r3, [%[r], #132]\n\t"
14559         "# A[34] * B\n\t"
14560         "ldr    r8, [%[a], #136]\n\t"
14561         "mov    r3, #0\n\t"
14562         "umull  r6, r7, %[b], r8\n\t"
14563         "adds   r4, r4, r6\n\t"
14564         "adcs   r5, r5, r7\n\t"
14565         "adc    r3, r10, r10\n\t"
14566         "str    r4, [%[r], #136]\n\t"
14567         "# A[35] * B\n\t"
14568         "ldr    r8, [%[a], #140]\n\t"
14569         "mov    r4, #0\n\t"
14570         "umull  r6, r7, %[b], r8\n\t"
14571         "adds   r5, r5, r6\n\t"
14572         "adcs   r3, r3, r7\n\t"
14573         "adc    r4, r10, r10\n\t"
14574         "str    r5, [%[r], #140]\n\t"
14575         "# A[36] * B\n\t"
14576         "ldr    r8, [%[a], #144]\n\t"
14577         "mov    r5, #0\n\t"
14578         "umull  r6, r7, %[b], r8\n\t"
14579         "adds   r3, r3, r6\n\t"
14580         "adcs   r4, r4, r7\n\t"
14581         "adc    r5, r10, r10\n\t"
14582         "str    r3, [%[r], #144]\n\t"
14583         "# A[37] * B\n\t"
14584         "ldr    r8, [%[a], #148]\n\t"
14585         "mov    r3, #0\n\t"
14586         "umull  r6, r7, %[b], r8\n\t"
14587         "adds   r4, r4, r6\n\t"
14588         "adcs   r5, r5, r7\n\t"
14589         "adc    r3, r10, r10\n\t"
14590         "str    r4, [%[r], #148]\n\t"
14591         "# A[38] * B\n\t"
14592         "ldr    r8, [%[a], #152]\n\t"
14593         "mov    r4, #0\n\t"
14594         "umull  r6, r7, %[b], r8\n\t"
14595         "adds   r5, r5, r6\n\t"
14596         "adcs   r3, r3, r7\n\t"
14597         "adc    r4, r10, r10\n\t"
14598         "str    r5, [%[r], #152]\n\t"
14599         "# A[39] * B\n\t"
14600         "ldr    r8, [%[a], #156]\n\t"
14601         "mov    r5, #0\n\t"
14602         "umull  r6, r7, %[b], r8\n\t"
14603         "adds   r3, r3, r6\n\t"
14604         "adcs   r4, r4, r7\n\t"
14605         "adc    r5, r10, r10\n\t"
14606         "str    r3, [%[r], #156]\n\t"
14607         "# A[40] * B\n\t"
14608         "ldr    r8, [%[a], #160]\n\t"
14609         "mov    r3, #0\n\t"
14610         "umull  r6, r7, %[b], r8\n\t"
14611         "adds   r4, r4, r6\n\t"
14612         "adcs   r5, r5, r7\n\t"
14613         "adc    r3, r10, r10\n\t"
14614         "str    r4, [%[r], #160]\n\t"
14615         "# A[41] * B\n\t"
14616         "ldr    r8, [%[a], #164]\n\t"
14617         "mov    r4, #0\n\t"
14618         "umull  r6, r7, %[b], r8\n\t"
14619         "adds   r5, r5, r6\n\t"
14620         "adcs   r3, r3, r7\n\t"
14621         "adc    r4, r10, r10\n\t"
14622         "str    r5, [%[r], #164]\n\t"
14623         "# A[42] * B\n\t"
14624         "ldr    r8, [%[a], #168]\n\t"
14625         "mov    r5, #0\n\t"
14626         "umull  r6, r7, %[b], r8\n\t"
14627         "adds   r3, r3, r6\n\t"
14628         "adcs   r4, r4, r7\n\t"
14629         "adc    r5, r10, r10\n\t"
14630         "str    r3, [%[r], #168]\n\t"
14631         "# A[43] * B\n\t"
14632         "ldr    r8, [%[a], #172]\n\t"
14633         "mov    r3, #0\n\t"
14634         "umull  r6, r7, %[b], r8\n\t"
14635         "adds   r4, r4, r6\n\t"
14636         "adcs   r5, r5, r7\n\t"
14637         "adc    r3, r10, r10\n\t"
14638         "str    r4, [%[r], #172]\n\t"
14639         "# A[44] * B\n\t"
14640         "ldr    r8, [%[a], #176]\n\t"
14641         "mov    r4, #0\n\t"
14642         "umull  r6, r7, %[b], r8\n\t"
14643         "adds   r5, r5, r6\n\t"
14644         "adcs   r3, r3, r7\n\t"
14645         "adc    r4, r10, r10\n\t"
14646         "str    r5, [%[r], #176]\n\t"
14647         "# A[45] * B\n\t"
14648         "ldr    r8, [%[a], #180]\n\t"
14649         "mov    r5, #0\n\t"
14650         "umull  r6, r7, %[b], r8\n\t"
14651         "adds   r3, r3, r6\n\t"
14652         "adcs   r4, r4, r7\n\t"
14653         "adc    r5, r10, r10\n\t"
14654         "str    r3, [%[r], #180]\n\t"
14655         "# A[46] * B\n\t"
14656         "ldr    r8, [%[a], #184]\n\t"
14657         "mov    r3, #0\n\t"
14658         "umull  r6, r7, %[b], r8\n\t"
14659         "adds   r4, r4, r6\n\t"
14660         "adcs   r5, r5, r7\n\t"
14661         "adc    r3, r10, r10\n\t"
14662         "str    r4, [%[r], #184]\n\t"
14663         "# A[47] * B\n\t"
14664         "ldr    r8, [%[a], #188]\n\t"
14665         "mov    r4, #0\n\t"
14666         "umull  r6, r7, %[b], r8\n\t"
14667         "adds   r5, r5, r6\n\t"
14668         "adcs   r3, r3, r7\n\t"
14669         "adc    r4, r10, r10\n\t"
14670         "str    r5, [%[r], #188]\n\t"
14671         "# A[48] * B\n\t"
14672         "ldr    r8, [%[a], #192]\n\t"
14673         "mov    r5, #0\n\t"
14674         "umull  r6, r7, %[b], r8\n\t"
14675         "adds   r3, r3, r6\n\t"
14676         "adcs   r4, r4, r7\n\t"
14677         "adc    r5, r10, r10\n\t"
14678         "str    r3, [%[r], #192]\n\t"
14679         "# A[49] * B\n\t"
14680         "ldr    r8, [%[a], #196]\n\t"
14681         "mov    r3, #0\n\t"
14682         "umull  r6, r7, %[b], r8\n\t"
14683         "adds   r4, r4, r6\n\t"
14684         "adcs   r5, r5, r7\n\t"
14685         "adc    r3, r10, r10\n\t"
14686         "str    r4, [%[r], #196]\n\t"
14687         "# A[50] * B\n\t"
14688         "ldr    r8, [%[a], #200]\n\t"
14689         "mov    r4, #0\n\t"
14690         "umull  r6, r7, %[b], r8\n\t"
14691         "adds   r5, r5, r6\n\t"
14692         "adcs   r3, r3, r7\n\t"
14693         "adc    r4, r10, r10\n\t"
14694         "str    r5, [%[r], #200]\n\t"
14695         "# A[51] * B\n\t"
14696         "ldr    r8, [%[a], #204]\n\t"
14697         "mov    r5, #0\n\t"
14698         "umull  r6, r7, %[b], r8\n\t"
14699         "adds   r3, r3, r6\n\t"
14700         "adcs   r4, r4, r7\n\t"
14701         "adc    r5, r10, r10\n\t"
14702         "str    r3, [%[r], #204]\n\t"
14703         "# A[52] * B\n\t"
14704         "ldr    r8, [%[a], #208]\n\t"
14705         "mov    r3, #0\n\t"
14706         "umull  r6, r7, %[b], r8\n\t"
14707         "adds   r4, r4, r6\n\t"
14708         "adcs   r5, r5, r7\n\t"
14709         "adc    r3, r10, r10\n\t"
14710         "str    r4, [%[r], #208]\n\t"
14711         "# A[53] * B\n\t"
14712         "ldr    r8, [%[a], #212]\n\t"
14713         "mov    r4, #0\n\t"
14714         "umull  r6, r7, %[b], r8\n\t"
14715         "adds   r5, r5, r6\n\t"
14716         "adcs   r3, r3, r7\n\t"
14717         "adc    r4, r10, r10\n\t"
14718         "str    r5, [%[r], #212]\n\t"
14719         "# A[54] * B\n\t"
14720         "ldr    r8, [%[a], #216]\n\t"
14721         "mov    r5, #0\n\t"
14722         "umull  r6, r7, %[b], r8\n\t"
14723         "adds   r3, r3, r6\n\t"
14724         "adcs   r4, r4, r7\n\t"
14725         "adc    r5, r10, r10\n\t"
14726         "str    r3, [%[r], #216]\n\t"
14727         "# A[55] * B\n\t"
14728         "ldr    r8, [%[a], #220]\n\t"
14729         "mov    r3, #0\n\t"
14730         "umull  r6, r7, %[b], r8\n\t"
14731         "adds   r4, r4, r6\n\t"
14732         "adcs   r5, r5, r7\n\t"
14733         "adc    r3, r10, r10\n\t"
14734         "str    r4, [%[r], #220]\n\t"
14735         "# A[56] * B\n\t"
14736         "ldr    r8, [%[a], #224]\n\t"
14737         "mov    r4, #0\n\t"
14738         "umull  r6, r7, %[b], r8\n\t"
14739         "adds   r5, r5, r6\n\t"
14740         "adcs   r3, r3, r7\n\t"
14741         "adc    r4, r10, r10\n\t"
14742         "str    r5, [%[r], #224]\n\t"
14743         "# A[57] * B\n\t"
14744         "ldr    r8, [%[a], #228]\n\t"
14745         "mov    r5, #0\n\t"
14746         "umull  r6, r7, %[b], r8\n\t"
14747         "adds   r3, r3, r6\n\t"
14748         "adcs   r4, r4, r7\n\t"
14749         "adc    r5, r10, r10\n\t"
14750         "str    r3, [%[r], #228]\n\t"
14751         "# A[58] * B\n\t"
14752         "ldr    r8, [%[a], #232]\n\t"
14753         "mov    r3, #0\n\t"
14754         "umull  r6, r7, %[b], r8\n\t"
14755         "adds   r4, r4, r6\n\t"
14756         "adcs   r5, r5, r7\n\t"
14757         "adc    r3, r10, r10\n\t"
14758         "str    r4, [%[r], #232]\n\t"
14759         "# A[59] * B\n\t"
14760         "ldr    r8, [%[a], #236]\n\t"
14761         "mov    r4, #0\n\t"
14762         "umull  r6, r7, %[b], r8\n\t"
14763         "adds   r5, r5, r6\n\t"
14764         "adcs   r3, r3, r7\n\t"
14765         "adc    r4, r10, r10\n\t"
14766         "str    r5, [%[r], #236]\n\t"
14767         "# A[60] * B\n\t"
14768         "ldr    r8, [%[a], #240]\n\t"
14769         "mov    r5, #0\n\t"
14770         "umull  r6, r7, %[b], r8\n\t"
14771         "adds   r3, r3, r6\n\t"
14772         "adcs   r4, r4, r7\n\t"
14773         "adc    r5, r10, r10\n\t"
14774         "str    r3, [%[r], #240]\n\t"
14775         "# A[61] * B\n\t"
14776         "ldr    r8, [%[a], #244]\n\t"
14777         "mov    r3, #0\n\t"
14778         "umull  r6, r7, %[b], r8\n\t"
14779         "adds   r4, r4, r6\n\t"
14780         "adcs   r5, r5, r7\n\t"
14781         "adc    r3, r10, r10\n\t"
14782         "str    r4, [%[r], #244]\n\t"
14783         "# A[62] * B\n\t"
14784         "ldr    r8, [%[a], #248]\n\t"
14785         "mov    r4, #0\n\t"
14786         "umull  r6, r7, %[b], r8\n\t"
14787         "adds   r5, r5, r6\n\t"
14788         "adcs   r3, r3, r7\n\t"
14789         "adc    r4, r10, r10\n\t"
14790         "str    r5, [%[r], #248]\n\t"
14791         "# A[63] * B\n\t"
14792         "ldr    r8, [%[a], #252]\n\t"
14793         "mov    r5, #0\n\t"
14794         "umull  r6, r7, %[b], r8\n\t"
14795         "adds   r3, r3, r6\n\t"
14796         "adcs   r4, r4, r7\n\t"
14797         "adc    r5, r10, r10\n\t"
14798         "str    r3, [%[r], #252]\n\t"
14799         "# A[64] * B\n\t"
14800         "ldr    r8, [%[a], #256]\n\t"
14801         "mov    r3, #0\n\t"
14802         "umull  r6, r7, %[b], r8\n\t"
14803         "adds   r4, r4, r6\n\t"
14804         "adcs   r5, r5, r7\n\t"
14805         "adc    r3, r10, r10\n\t"
14806         "str    r4, [%[r], #256]\n\t"
14807         "# A[65] * B\n\t"
14808         "ldr    r8, [%[a], #260]\n\t"
14809         "mov    r4, #0\n\t"
14810         "umull  r6, r7, %[b], r8\n\t"
14811         "adds   r5, r5, r6\n\t"
14812         "adcs   r3, r3, r7\n\t"
14813         "adc    r4, r10, r10\n\t"
14814         "str    r5, [%[r], #260]\n\t"
14815         "# A[66] * B\n\t"
14816         "ldr    r8, [%[a], #264]\n\t"
14817         "mov    r5, #0\n\t"
14818         "umull  r6, r7, %[b], r8\n\t"
14819         "adds   r3, r3, r6\n\t"
14820         "adcs   r4, r4, r7\n\t"
14821         "adc    r5, r10, r10\n\t"
14822         "str    r3, [%[r], #264]\n\t"
14823         "# A[67] * B\n\t"
14824         "ldr    r8, [%[a], #268]\n\t"
14825         "mov    r3, #0\n\t"
14826         "umull  r6, r7, %[b], r8\n\t"
14827         "adds   r4, r4, r6\n\t"
14828         "adcs   r5, r5, r7\n\t"
14829         "adc    r3, r10, r10\n\t"
14830         "str    r4, [%[r], #268]\n\t"
14831         "# A[68] * B\n\t"
14832         "ldr    r8, [%[a], #272]\n\t"
14833         "mov    r4, #0\n\t"
14834         "umull  r6, r7, %[b], r8\n\t"
14835         "adds   r5, r5, r6\n\t"
14836         "adcs   r3, r3, r7\n\t"
14837         "adc    r4, r10, r10\n\t"
14838         "str    r5, [%[r], #272]\n\t"
14839         "# A[69] * B\n\t"
14840         "ldr    r8, [%[a], #276]\n\t"
14841         "mov    r5, #0\n\t"
14842         "umull  r6, r7, %[b], r8\n\t"
14843         "adds   r3, r3, r6\n\t"
14844         "adcs   r4, r4, r7\n\t"
14845         "adc    r5, r10, r10\n\t"
14846         "str    r3, [%[r], #276]\n\t"
14847         "# A[70] * B\n\t"
14848         "ldr    r8, [%[a], #280]\n\t"
14849         "mov    r3, #0\n\t"
14850         "umull  r6, r7, %[b], r8\n\t"
14851         "adds   r4, r4, r6\n\t"
14852         "adcs   r5, r5, r7\n\t"
14853         "adc    r3, r10, r10\n\t"
14854         "str    r4, [%[r], #280]\n\t"
14855         "# A[71] * B\n\t"
14856         "ldr    r8, [%[a], #284]\n\t"
14857         "mov    r4, #0\n\t"
14858         "umull  r6, r7, %[b], r8\n\t"
14859         "adds   r5, r5, r6\n\t"
14860         "adcs   r3, r3, r7\n\t"
14861         "adc    r4, r10, r10\n\t"
14862         "str    r5, [%[r], #284]\n\t"
14863         "# A[72] * B\n\t"
14864         "ldr    r8, [%[a], #288]\n\t"
14865         "mov    r5, #0\n\t"
14866         "umull  r6, r7, %[b], r8\n\t"
14867         "adds   r3, r3, r6\n\t"
14868         "adcs   r4, r4, r7\n\t"
14869         "adc    r5, r10, r10\n\t"
14870         "str    r3, [%[r], #288]\n\t"
14871         "# A[73] * B\n\t"
14872         "ldr    r8, [%[a], #292]\n\t"
14873         "mov    r3, #0\n\t"
14874         "umull  r6, r7, %[b], r8\n\t"
14875         "adds   r4, r4, r6\n\t"
14876         "adcs   r5, r5, r7\n\t"
14877         "adc    r3, r10, r10\n\t"
14878         "str    r4, [%[r], #292]\n\t"
14879         "# A[74] * B\n\t"
14880         "ldr    r8, [%[a], #296]\n\t"
14881         "mov    r4, #0\n\t"
14882         "umull  r6, r7, %[b], r8\n\t"
14883         "adds   r5, r5, r6\n\t"
14884         "adcs   r3, r3, r7\n\t"
14885         "adc    r4, r10, r10\n\t"
14886         "str    r5, [%[r], #296]\n\t"
14887         "# A[75] * B\n\t"
14888         "ldr    r8, [%[a], #300]\n\t"
14889         "mov    r5, #0\n\t"
14890         "umull  r6, r7, %[b], r8\n\t"
14891         "adds   r3, r3, r6\n\t"
14892         "adcs   r4, r4, r7\n\t"
14893         "adc    r5, r10, r10\n\t"
14894         "str    r3, [%[r], #300]\n\t"
14895         "# A[76] * B\n\t"
14896         "ldr    r8, [%[a], #304]\n\t"
14897         "mov    r3, #0\n\t"
14898         "umull  r6, r7, %[b], r8\n\t"
14899         "adds   r4, r4, r6\n\t"
14900         "adcs   r5, r5, r7\n\t"
14901         "adc    r3, r10, r10\n\t"
14902         "str    r4, [%[r], #304]\n\t"
14903         "# A[77] * B\n\t"
14904         "ldr    r8, [%[a], #308]\n\t"
14905         "mov    r4, #0\n\t"
14906         "umull  r6, r7, %[b], r8\n\t"
14907         "adds   r5, r5, r6\n\t"
14908         "adcs   r3, r3, r7\n\t"
14909         "adc    r4, r10, r10\n\t"
14910         "str    r5, [%[r], #308]\n\t"
14911         "# A[78] * B\n\t"
14912         "ldr    r8, [%[a], #312]\n\t"
14913         "mov    r5, #0\n\t"
14914         "umull  r6, r7, %[b], r8\n\t"
14915         "adds   r3, r3, r6\n\t"
14916         "adcs   r4, r4, r7\n\t"
14917         "adc    r5, r10, r10\n\t"
14918         "str    r3, [%[r], #312]\n\t"
14919         "# A[79] * B\n\t"
14920         "ldr    r8, [%[a], #316]\n\t"
14921         "mov    r3, #0\n\t"
14922         "umull  r6, r7, %[b], r8\n\t"
14923         "adds   r4, r4, r6\n\t"
14924         "adcs   r5, r5, r7\n\t"
14925         "adc    r3, r10, r10\n\t"
14926         "str    r4, [%[r], #316]\n\t"
14927         "# A[80] * B\n\t"
14928         "ldr    r8, [%[a], #320]\n\t"
14929         "mov    r4, #0\n\t"
14930         "umull  r6, r7, %[b], r8\n\t"
14931         "adds   r5, r5, r6\n\t"
14932         "adcs   r3, r3, r7\n\t"
14933         "adc    r4, r10, r10\n\t"
14934         "str    r5, [%[r], #320]\n\t"
14935         "# A[81] * B\n\t"
14936         "ldr    r8, [%[a], #324]\n\t"
14937         "mov    r5, #0\n\t"
14938         "umull  r6, r7, %[b], r8\n\t"
14939         "adds   r3, r3, r6\n\t"
14940         "adcs   r4, r4, r7\n\t"
14941         "adc    r5, r10, r10\n\t"
14942         "str    r3, [%[r], #324]\n\t"
14943         "# A[82] * B\n\t"
14944         "ldr    r8, [%[a], #328]\n\t"
14945         "mov    r3, #0\n\t"
14946         "umull  r6, r7, %[b], r8\n\t"
14947         "adds   r4, r4, r6\n\t"
14948         "adcs   r5, r5, r7\n\t"
14949         "adc    r3, r10, r10\n\t"
14950         "str    r4, [%[r], #328]\n\t"
14951         "# A[83] * B\n\t"
14952         "ldr    r8, [%[a], #332]\n\t"
14953         "mov    r4, #0\n\t"
14954         "umull  r6, r7, %[b], r8\n\t"
14955         "adds   r5, r5, r6\n\t"
14956         "adcs   r3, r3, r7\n\t"
14957         "adc    r4, r10, r10\n\t"
14958         "str    r5, [%[r], #332]\n\t"
14959         "# A[84] * B\n\t"
14960         "ldr    r8, [%[a], #336]\n\t"
14961         "mov    r5, #0\n\t"
14962         "umull  r6, r7, %[b], r8\n\t"
14963         "adds   r3, r3, r6\n\t"
14964         "adcs   r4, r4, r7\n\t"
14965         "adc    r5, r10, r10\n\t"
14966         "str    r3, [%[r], #336]\n\t"
14967         "# A[85] * B\n\t"
14968         "ldr    r8, [%[a], #340]\n\t"
14969         "mov    r3, #0\n\t"
14970         "umull  r6, r7, %[b], r8\n\t"
14971         "adds   r4, r4, r6\n\t"
14972         "adcs   r5, r5, r7\n\t"
14973         "adc    r3, r10, r10\n\t"
14974         "str    r4, [%[r], #340]\n\t"
14975         "# A[86] * B\n\t"
14976         "ldr    r8, [%[a], #344]\n\t"
14977         "mov    r4, #0\n\t"
14978         "umull  r6, r7, %[b], r8\n\t"
14979         "adds   r5, r5, r6\n\t"
14980         "adcs   r3, r3, r7\n\t"
14981         "adc    r4, r10, r10\n\t"
14982         "str    r5, [%[r], #344]\n\t"
14983         "# A[87] * B\n\t"
14984         "ldr    r8, [%[a], #348]\n\t"
14985         "mov    r5, #0\n\t"
14986         "umull  r6, r7, %[b], r8\n\t"
14987         "adds   r3, r3, r6\n\t"
14988         "adcs   r4, r4, r7\n\t"
14989         "adc    r5, r10, r10\n\t"
14990         "str    r3, [%[r], #348]\n\t"
14991         "# A[88] * B\n\t"
14992         "ldr    r8, [%[a], #352]\n\t"
14993         "mov    r3, #0\n\t"
14994         "umull  r6, r7, %[b], r8\n\t"
14995         "adds   r4, r4, r6\n\t"
14996         "adcs   r5, r5, r7\n\t"
14997         "adc    r3, r10, r10\n\t"
14998         "str    r4, [%[r], #352]\n\t"
14999         "# A[89] * B\n\t"
15000         "ldr    r8, [%[a], #356]\n\t"
15001         "mov    r4, #0\n\t"
15002         "umull  r6, r7, %[b], r8\n\t"
15003         "adds   r5, r5, r6\n\t"
15004         "adcs   r3, r3, r7\n\t"
15005         "adc    r4, r10, r10\n\t"
15006         "str    r5, [%[r], #356]\n\t"
15007         "# A[90] * B\n\t"
15008         "ldr    r8, [%[a], #360]\n\t"
15009         "mov    r5, #0\n\t"
15010         "umull  r6, r7, %[b], r8\n\t"
15011         "adds   r3, r3, r6\n\t"
15012         "adcs   r4, r4, r7\n\t"
15013         "adc    r5, r10, r10\n\t"
15014         "str    r3, [%[r], #360]\n\t"
15015         "# A[91] * B\n\t"
15016         "ldr    r8, [%[a], #364]\n\t"
15017         "mov    r3, #0\n\t"
15018         "umull  r6, r7, %[b], r8\n\t"
15019         "adds   r4, r4, r6\n\t"
15020         "adcs   r5, r5, r7\n\t"
15021         "adc    r3, r10, r10\n\t"
15022         "str    r4, [%[r], #364]\n\t"
15023         "# A[92] * B\n\t"
15024         "ldr    r8, [%[a], #368]\n\t"
15025         "mov    r4, #0\n\t"
15026         "umull  r6, r7, %[b], r8\n\t"
15027         "adds   r5, r5, r6\n\t"
15028         "adcs   r3, r3, r7\n\t"
15029         "adc    r4, r10, r10\n\t"
15030         "str    r5, [%[r], #368]\n\t"
15031         "# A[93] * B\n\t"
15032         "ldr    r8, [%[a], #372]\n\t"
15033         "mov    r5, #0\n\t"
15034         "umull  r6, r7, %[b], r8\n\t"
15035         "adds   r3, r3, r6\n\t"
15036         "adcs   r4, r4, r7\n\t"
15037         "adc    r5, r10, r10\n\t"
15038         "str    r3, [%[r], #372]\n\t"
15039         "# A[94] * B\n\t"
15040         "ldr    r8, [%[a], #376]\n\t"
15041         "mov    r3, #0\n\t"
15042         "umull  r6, r7, %[b], r8\n\t"
15043         "adds   r4, r4, r6\n\t"
15044         "adcs   r5, r5, r7\n\t"
15045         "adc    r3, r10, r10\n\t"
15046         "str    r4, [%[r], #376]\n\t"
15047         "# A[95] * B\n\t"
15048         "ldr    r8, [%[a], #380]\n\t"
15049         "umull  r6, r7, %[b], r8\n\t"
15050         "adds   r5, r5, r6\n\t"
15051         "adc    r3, r3, r7\n\t"
15052         "str    r5, [%[r], #380]\n\t"
15053         "str    r3, [%[r], #384]\n\t"
15054         :
15055         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
15056         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
15057     );
15058 #endif
15059 }
15060 
15061 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
15062  *
15063  * d1   The high order half of the number to divide.
15064  * d0   The low order half of the number to divide.
15065  * div  The dividend.
15066  * returns the result of the division.
15067  *
15068  * Note that this is an approximate div. It may give an answer 1 larger.
15069  */
15070 static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div)
15071 {
15072     sp_digit r = 0;
15073 
15074     __asm__ __volatile__ (
15075         "lsr    r5, %[div], #1\n\t"
15076         "add    r5, r5, #1\n\t"
15077         "mov    r6, %[d0]\n\t"
15078         "mov    r7, %[d1]\n\t"
15079         "# Do top 32\n\t"
15080         "subs   r8, r5, r7\n\t"
15081         "sbc    r8, r8, r8\n\t"
15082         "add    %[r], %[r], %[r]\n\t"
15083         "sub    %[r], %[r], r8\n\t"
15084         "and    r8, r8, r5\n\t"
15085         "subs   r7, r7, r8\n\t"
15086         "# Next 30 bits\n\t"
15087         "mov    r4, #29\n\t"
15088         "1:\n\t"
15089         "movs   r6, r6, lsl #1\n\t"
15090         "adc    r7, r7, r7\n\t"
15091         "subs   r8, r5, r7\n\t"
15092         "sbc    r8, r8, r8\n\t"
15093         "add    %[r], %[r], %[r]\n\t"
15094         "sub    %[r], %[r], r8\n\t"
15095         "and    r8, r8, r5\n\t"
15096         "subs   r7, r7, r8\n\t"
15097         "subs   r4, r4, #1\n\t"
15098         "bpl    1b\n\t"
15099         "add    %[r], %[r], %[r]\n\t"
15100         "add    %[r], %[r], #1\n\t"
15101         "umull  r4, r5, %[r], %[div]\n\t"
15102         "subs   r4, %[d0], r4\n\t"
15103         "sbc    r5, %[d1], r5\n\t"
15104         "add    %[r], %[r], r5\n\t"
15105         "umull  r4, r5, %[r], %[div]\n\t"
15106         "subs   r4, %[d0], r4\n\t"
15107         "sbc    r5, %[d1], r5\n\t"
15108         "add    %[r], %[r], r5\n\t"
15109         "subs   r8, %[div], r4\n\t"
15110         "sbc    r8, r8, r8\n\t"
15111         "sub    %[r], %[r], r8\n\t"
15112         : [r] "+r" (r)
15113         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
15114         : "r4", "r5", "r6", "r7", "r8"
15115     );
15116     return r;
15117 }
15118 
15119 /* AND m into each word of a and store in r.
15120  *
15121  * r  A single precision integer.
15122  * a  A single precision integer.
15123  * m  Mask to AND against each digit.
15124  */
15125 static void sp_3072_mask_96(sp_digit* r, sp_digit* a, sp_digit m)
15126 {
15127 #ifdef WOLFSSL_SP_SMALL
15128     int i;
15129 
15130     for (i=0; i<96; i++)
15131         r[i] = a[i] & m;
15132 #else
15133     int i;
15134 
15135     for (i = 0; i < 96; i += 8) {
15136         r[i+0] = a[i+0] & m;
15137         r[i+1] = a[i+1] & m;
15138         r[i+2] = a[i+2] & m;
15139         r[i+3] = a[i+3] & m;
15140         r[i+4] = a[i+4] & m;
15141         r[i+5] = a[i+5] & m;
15142         r[i+6] = a[i+6] & m;
15143         r[i+7] = a[i+7] & m;
15144     }
15145 #endif
15146 }
15147 
15148 /* Compare a with b in constant time.
15149  *
15150  * a  A single precision integer.
15151  * b  A single precision integer.
15152  * return -ve, 0 or +ve if a is less than, equal to or greater than b
15153  * respectively.
15154  */
15155 static int32_t sp_3072_cmp_96(sp_digit* a, sp_digit* b)
15156 {
15157     sp_digit r = -1;
15158     sp_digit one = 1;
15159 
15160 #ifdef WOLFSSL_SP_SMALL
15161     __asm__ __volatile__ (
15162         "mov    r7, #0\n\t"
15163         "mov    r3, #-1\n\t"
15164         "mov    r6, #380\n\t"
15165         "1:\n\t"
15166         "ldr    r4, [%[a], r6]\n\t"
15167         "ldr    r5, [%[b], r6]\n\t"
15168         "and    r4, r4, r3\n\t"
15169         "and    r5, r5, r3\n\t"
15170         "subs   r4, r4, r5\n\t"
15171         "movhi  %[r], %[one]\n\t"
15172         "movlo  %[r], r3\n\t"
15173         "movne  r3, r7\n\t"
15174         "sub    r6, r6, #4\n\t"
15175         "bcc    1b\n\t"
15176         "eor    %[r], %[r], r3\n\t"
15177         : [r] "+r" (r)
15178         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
15179         : "r2", "r3", "r4", "r5", "r6", "r7"
15180     );
15181 #else
15182     __asm__ __volatile__ (
15183         "mov    r7, #0\n\t"
15184         "mov    r3, #-1\n\t"
15185         "ldr        r4, [%[a], #380]\n\t"
15186         "ldr        r5, [%[b], #380]\n\t"
15187         "and        r4, r4, r3\n\t"
15188         "and        r5, r5, r3\n\t"
15189         "subs   r4, r4, r5\n\t"
15190         "movhi  %[r], %[one]\n\t"
15191         "movlo  %[r], r3\n\t"
15192         "movne  r3, r7\n\t"
15193         "ldr        r4, [%[a], #376]\n\t"
15194         "ldr        r5, [%[b], #376]\n\t"
15195         "and        r4, r4, r3\n\t"
15196         "and        r5, r5, r3\n\t"
15197         "subs   r4, r4, r5\n\t"
15198         "movhi  %[r], %[one]\n\t"
15199         "movlo  %[r], r3\n\t"
15200         "movne  r3, r7\n\t"
15201         "ldr        r4, [%[a], #372]\n\t"
15202         "ldr        r5, [%[b], #372]\n\t"
15203         "and        r4, r4, r3\n\t"
15204         "and        r5, r5, r3\n\t"
15205         "subs   r4, r4, r5\n\t"
15206         "movhi  %[r], %[one]\n\t"
15207         "movlo  %[r], r3\n\t"
15208         "movne  r3, r7\n\t"
15209         "ldr        r4, [%[a], #368]\n\t"
15210         "ldr        r5, [%[b], #368]\n\t"
15211         "and        r4, r4, r3\n\t"
15212         "and        r5, r5, r3\n\t"
15213         "subs   r4, r4, r5\n\t"
15214         "movhi  %[r], %[one]\n\t"
15215         "movlo  %[r], r3\n\t"
15216         "movne  r3, r7\n\t"
15217         "ldr        r4, [%[a], #364]\n\t"
15218         "ldr        r5, [%[b], #364]\n\t"
15219         "and        r4, r4, r3\n\t"
15220         "and        r5, r5, r3\n\t"
15221         "subs   r4, r4, r5\n\t"
15222         "movhi  %[r], %[one]\n\t"
15223         "movlo  %[r], r3\n\t"
15224         "movne  r3, r7\n\t"
15225         "ldr        r4, [%[a], #360]\n\t"
15226         "ldr        r5, [%[b], #360]\n\t"
15227         "and        r4, r4, r3\n\t"
15228         "and        r5, r5, r3\n\t"
15229         "subs   r4, r4, r5\n\t"
15230         "movhi  %[r], %[one]\n\t"
15231         "movlo  %[r], r3\n\t"
15232         "movne  r3, r7\n\t"
15233         "ldr        r4, [%[a], #356]\n\t"
15234         "ldr        r5, [%[b], #356]\n\t"
15235         "and        r4, r4, r3\n\t"
15236         "and        r5, r5, r3\n\t"
15237         "subs   r4, r4, r5\n\t"
15238         "movhi  %[r], %[one]\n\t"
15239         "movlo  %[r], r3\n\t"
15240         "movne  r3, r7\n\t"
15241         "ldr        r4, [%[a], #352]\n\t"
15242         "ldr        r5, [%[b], #352]\n\t"
15243         "and        r4, r4, r3\n\t"
15244         "and        r5, r5, r3\n\t"
15245         "subs   r4, r4, r5\n\t"
15246         "movhi  %[r], %[one]\n\t"
15247         "movlo  %[r], r3\n\t"
15248         "movne  r3, r7\n\t"
15249         "ldr        r4, [%[a], #348]\n\t"
15250         "ldr        r5, [%[b], #348]\n\t"
15251         "and        r4, r4, r3\n\t"
15252         "and        r5, r5, r3\n\t"
15253         "subs   r4, r4, r5\n\t"
15254         "movhi  %[r], %[one]\n\t"
15255         "movlo  %[r], r3\n\t"
15256         "movne  r3, r7\n\t"
15257         "ldr        r4, [%[a], #344]\n\t"
15258         "ldr        r5, [%[b], #344]\n\t"
15259         "and        r4, r4, r3\n\t"
15260         "and        r5, r5, r3\n\t"
15261         "subs   r4, r4, r5\n\t"
15262         "movhi  %[r], %[one]\n\t"
15263         "movlo  %[r], r3\n\t"
15264         "movne  r3, r7\n\t"
15265         "ldr        r4, [%[a], #340]\n\t"
15266         "ldr        r5, [%[b], #340]\n\t"
15267         "and        r4, r4, r3\n\t"
15268         "and        r5, r5, r3\n\t"
15269         "subs   r4, r4, r5\n\t"
15270         "movhi  %[r], %[one]\n\t"
15271         "movlo  %[r], r3\n\t"
15272         "movne  r3, r7\n\t"
15273         "ldr        r4, [%[a], #336]\n\t"
15274         "ldr        r5, [%[b], #336]\n\t"
15275         "and        r4, r4, r3\n\t"
15276         "and        r5, r5, r3\n\t"
15277         "subs   r4, r4, r5\n\t"
15278         "movhi  %[r], %[one]\n\t"
15279         "movlo  %[r], r3\n\t"
15280         "movne  r3, r7\n\t"
15281         "ldr        r4, [%[a], #332]\n\t"
15282         "ldr        r5, [%[b], #332]\n\t"
15283         "and        r4, r4, r3\n\t"
15284         "and        r5, r5, r3\n\t"
15285         "subs   r4, r4, r5\n\t"
15286         "movhi  %[r], %[one]\n\t"
15287         "movlo  %[r], r3\n\t"
15288         "movne  r3, r7\n\t"
15289         "ldr        r4, [%[a], #328]\n\t"
15290         "ldr        r5, [%[b], #328]\n\t"
15291         "and        r4, r4, r3\n\t"
15292         "and        r5, r5, r3\n\t"
15293         "subs   r4, r4, r5\n\t"
15294         "movhi  %[r], %[one]\n\t"
15295         "movlo  %[r], r3\n\t"
15296         "movne  r3, r7\n\t"
15297         "ldr        r4, [%[a], #324]\n\t"
15298         "ldr        r5, [%[b], #324]\n\t"
15299         "and        r4, r4, r3\n\t"
15300         "and        r5, r5, r3\n\t"
15301         "subs   r4, r4, r5\n\t"
15302         "movhi  %[r], %[one]\n\t"
15303         "movlo  %[r], r3\n\t"
15304         "movne  r3, r7\n\t"
15305         "ldr        r4, [%[a], #320]\n\t"
15306         "ldr        r5, [%[b], #320]\n\t"
15307         "and        r4, r4, r3\n\t"
15308         "and        r5, r5, r3\n\t"
15309         "subs   r4, r4, r5\n\t"
15310         "movhi  %[r], %[one]\n\t"
15311         "movlo  %[r], r3\n\t"
15312         "movne  r3, r7\n\t"
15313         "ldr        r4, [%[a], #316]\n\t"
15314         "ldr        r5, [%[b], #316]\n\t"
15315         "and        r4, r4, r3\n\t"
15316         "and        r5, r5, r3\n\t"
15317         "subs   r4, r4, r5\n\t"
15318         "movhi  %[r], %[one]\n\t"
15319         "movlo  %[r], r3\n\t"
15320         "movne  r3, r7\n\t"
15321         "ldr        r4, [%[a], #312]\n\t"
15322         "ldr        r5, [%[b], #312]\n\t"
15323         "and        r4, r4, r3\n\t"
15324         "and        r5, r5, r3\n\t"
15325         "subs   r4, r4, r5\n\t"
15326         "movhi  %[r], %[one]\n\t"
15327         "movlo  %[r], r3\n\t"
15328         "movne  r3, r7\n\t"
15329         "ldr        r4, [%[a], #308]\n\t"
15330         "ldr        r5, [%[b], #308]\n\t"
15331         "and        r4, r4, r3\n\t"
15332         "and        r5, r5, r3\n\t"
15333         "subs   r4, r4, r5\n\t"
15334         "movhi  %[r], %[one]\n\t"
15335         "movlo  %[r], r3\n\t"
15336         "movne  r3, r7\n\t"
15337         "ldr        r4, [%[a], #304]\n\t"
15338         "ldr        r5, [%[b], #304]\n\t"
15339         "and        r4, r4, r3\n\t"
15340         "and        r5, r5, r3\n\t"
15341         "subs   r4, r4, r5\n\t"
15342         "movhi  %[r], %[one]\n\t"
15343         "movlo  %[r], r3\n\t"
15344         "movne  r3, r7\n\t"
15345         "ldr        r4, [%[a], #300]\n\t"
15346         "ldr        r5, [%[b], #300]\n\t"
15347         "and        r4, r4, r3\n\t"
15348         "and        r5, r5, r3\n\t"
15349         "subs   r4, r4, r5\n\t"
15350         "movhi  %[r], %[one]\n\t"
15351         "movlo  %[r], r3\n\t"
15352         "movne  r3, r7\n\t"
15353         "ldr        r4, [%[a], #296]\n\t"
15354         "ldr        r5, [%[b], #296]\n\t"
15355         "and        r4, r4, r3\n\t"
15356         "and        r5, r5, r3\n\t"
15357         "subs   r4, r4, r5\n\t"
15358         "movhi  %[r], %[one]\n\t"
15359         "movlo  %[r], r3\n\t"
15360         "movne  r3, r7\n\t"
15361         "ldr        r4, [%[a], #292]\n\t"
15362         "ldr        r5, [%[b], #292]\n\t"
15363         "and        r4, r4, r3\n\t"
15364         "and        r5, r5, r3\n\t"
15365         "subs   r4, r4, r5\n\t"
15366         "movhi  %[r], %[one]\n\t"
15367         "movlo  %[r], r3\n\t"
15368         "movne  r3, r7\n\t"
15369         "ldr        r4, [%[a], #288]\n\t"
15370         "ldr        r5, [%[b], #288]\n\t"
15371         "and        r4, r4, r3\n\t"
15372         "and        r5, r5, r3\n\t"
15373         "subs   r4, r4, r5\n\t"
15374         "movhi  %[r], %[one]\n\t"
15375         "movlo  %[r], r3\n\t"
15376         "movne  r3, r7\n\t"
15377         "ldr        r4, [%[a], #284]\n\t"
15378         "ldr        r5, [%[b], #284]\n\t"
15379         "and        r4, r4, r3\n\t"
15380         "and        r5, r5, r3\n\t"
15381         "subs   r4, r4, r5\n\t"
15382         "movhi  %[r], %[one]\n\t"
15383         "movlo  %[r], r3\n\t"
15384         "movne  r3, r7\n\t"
15385         "ldr        r4, [%[a], #280]\n\t"
15386         "ldr        r5, [%[b], #280]\n\t"
15387         "and        r4, r4, r3\n\t"
15388         "and        r5, r5, r3\n\t"
15389         "subs   r4, r4, r5\n\t"
15390         "movhi  %[r], %[one]\n\t"
15391         "movlo  %[r], r3\n\t"
15392         "movne  r3, r7\n\t"
15393         "ldr        r4, [%[a], #276]\n\t"
15394         "ldr        r5, [%[b], #276]\n\t"
15395         "and        r4, r4, r3\n\t"
15396         "and        r5, r5, r3\n\t"
15397         "subs   r4, r4, r5\n\t"
15398         "movhi  %[r], %[one]\n\t"
15399         "movlo  %[r], r3\n\t"
15400         "movne  r3, r7\n\t"
15401         "ldr        r4, [%[a], #272]\n\t"
15402         "ldr        r5, [%[b], #272]\n\t"
15403         "and        r4, r4, r3\n\t"
15404         "and        r5, r5, r3\n\t"
15405         "subs   r4, r4, r5\n\t"
15406         "movhi  %[r], %[one]\n\t"
15407         "movlo  %[r], r3\n\t"
15408         "movne  r3, r7\n\t"
15409         "ldr        r4, [%[a], #268]\n\t"
15410         "ldr        r5, [%[b], #268]\n\t"
15411         "and        r4, r4, r3\n\t"
15412         "and        r5, r5, r3\n\t"
15413         "subs   r4, r4, r5\n\t"
15414         "movhi  %[r], %[one]\n\t"
15415         "movlo  %[r], r3\n\t"
15416         "movne  r3, r7\n\t"
15417         "ldr        r4, [%[a], #264]\n\t"
15418         "ldr        r5, [%[b], #264]\n\t"
15419         "and        r4, r4, r3\n\t"
15420         "and        r5, r5, r3\n\t"
15421         "subs   r4, r4, r5\n\t"
15422         "movhi  %[r], %[one]\n\t"
15423         "movlo  %[r], r3\n\t"
15424         "movne  r3, r7\n\t"
15425         "ldr        r4, [%[a], #260]\n\t"
15426         "ldr        r5, [%[b], #260]\n\t"
15427         "and        r4, r4, r3\n\t"
15428         "and        r5, r5, r3\n\t"
15429         "subs   r4, r4, r5\n\t"
15430         "movhi  %[r], %[one]\n\t"
15431         "movlo  %[r], r3\n\t"
15432         "movne  r3, r7\n\t"
15433         "ldr        r4, [%[a], #256]\n\t"
15434         "ldr        r5, [%[b], #256]\n\t"
15435         "and        r4, r4, r3\n\t"
15436         "and        r5, r5, r3\n\t"
15437         "subs   r4, r4, r5\n\t"
15438         "movhi  %[r], %[one]\n\t"
15439         "movlo  %[r], r3\n\t"
15440         "movne  r3, r7\n\t"
15441         "ldr        r4, [%[a], #252]\n\t"
15442         "ldr        r5, [%[b], #252]\n\t"
15443         "and        r4, r4, r3\n\t"
15444         "and        r5, r5, r3\n\t"
15445         "subs   r4, r4, r5\n\t"
15446         "movhi  %[r], %[one]\n\t"
15447         "movlo  %[r], r3\n\t"
15448         "movne  r3, r7\n\t"
15449         "ldr        r4, [%[a], #248]\n\t"
15450         "ldr        r5, [%[b], #248]\n\t"
15451         "and        r4, r4, r3\n\t"
15452         "and        r5, r5, r3\n\t"
15453         "subs   r4, r4, r5\n\t"
15454         "movhi  %[r], %[one]\n\t"
15455         "movlo  %[r], r3\n\t"
15456         "movne  r3, r7\n\t"
15457         "ldr        r4, [%[a], #244]\n\t"
15458         "ldr        r5, [%[b], #244]\n\t"
15459         "and        r4, r4, r3\n\t"
15460         "and        r5, r5, r3\n\t"
15461         "subs   r4, r4, r5\n\t"
15462         "movhi  %[r], %[one]\n\t"
15463         "movlo  %[r], r3\n\t"
15464         "movne  r3, r7\n\t"
15465         "ldr        r4, [%[a], #240]\n\t"
15466         "ldr        r5, [%[b], #240]\n\t"
15467         "and        r4, r4, r3\n\t"
15468         "and        r5, r5, r3\n\t"
15469         "subs   r4, r4, r5\n\t"
15470         "movhi  %[r], %[one]\n\t"
15471         "movlo  %[r], r3\n\t"
15472         "movne  r3, r7\n\t"
15473         "ldr        r4, [%[a], #236]\n\t"
15474         "ldr        r5, [%[b], #236]\n\t"
15475         "and        r4, r4, r3\n\t"
15476         "and        r5, r5, r3\n\t"
15477         "subs   r4, r4, r5\n\t"
15478         "movhi  %[r], %[one]\n\t"
15479         "movlo  %[r], r3\n\t"
15480         "movne  r3, r7\n\t"
15481         "ldr        r4, [%[a], #232]\n\t"
15482         "ldr        r5, [%[b], #232]\n\t"
15483         "and        r4, r4, r3\n\t"
15484         "and        r5, r5, r3\n\t"
15485         "subs   r4, r4, r5\n\t"
15486         "movhi  %[r], %[one]\n\t"
15487         "movlo  %[r], r3\n\t"
15488         "movne  r3, r7\n\t"
15489         "ldr        r4, [%[a], #228]\n\t"
15490         "ldr        r5, [%[b], #228]\n\t"
15491         "and        r4, r4, r3\n\t"
15492         "and        r5, r5, r3\n\t"
15493         "subs   r4, r4, r5\n\t"
15494         "movhi  %[r], %[one]\n\t"
15495         "movlo  %[r], r3\n\t"
15496         "movne  r3, r7\n\t"
15497         "ldr        r4, [%[a], #224]\n\t"
15498         "ldr        r5, [%[b], #224]\n\t"
15499         "and        r4, r4, r3\n\t"
15500         "and        r5, r5, r3\n\t"
15501         "subs   r4, r4, r5\n\t"
15502         "movhi  %[r], %[one]\n\t"
15503         "movlo  %[r], r3\n\t"
15504         "movne  r3, r7\n\t"
15505         "ldr        r4, [%[a], #220]\n\t"
15506         "ldr        r5, [%[b], #220]\n\t"
15507         "and        r4, r4, r3\n\t"
15508         "and        r5, r5, r3\n\t"
15509         "subs   r4, r4, r5\n\t"
15510         "movhi  %[r], %[one]\n\t"
15511         "movlo  %[r], r3\n\t"
15512         "movne  r3, r7\n\t"
15513         "ldr        r4, [%[a], #216]\n\t"
15514         "ldr        r5, [%[b], #216]\n\t"
15515         "and        r4, r4, r3\n\t"
15516         "and        r5, r5, r3\n\t"
15517         "subs   r4, r4, r5\n\t"
15518         "movhi  %[r], %[one]\n\t"
15519         "movlo  %[r], r3\n\t"
15520         "movne  r3, r7\n\t"
15521         "ldr        r4, [%[a], #212]\n\t"
15522         "ldr        r5, [%[b], #212]\n\t"
15523         "and        r4, r4, r3\n\t"
15524         "and        r5, r5, r3\n\t"
15525         "subs   r4, r4, r5\n\t"
15526         "movhi  %[r], %[one]\n\t"
15527         "movlo  %[r], r3\n\t"
15528         "movne  r3, r7\n\t"
15529         "ldr        r4, [%[a], #208]\n\t"
15530         "ldr        r5, [%[b], #208]\n\t"
15531         "and        r4, r4, r3\n\t"
15532         "and        r5, r5, r3\n\t"
15533         "subs   r4, r4, r5\n\t"
15534         "movhi  %[r], %[one]\n\t"
15535         "movlo  %[r], r3\n\t"
15536         "movne  r3, r7\n\t"
15537         "ldr        r4, [%[a], #204]\n\t"
15538         "ldr        r5, [%[b], #204]\n\t"
15539         "and        r4, r4, r3\n\t"
15540         "and        r5, r5, r3\n\t"
15541         "subs   r4, r4, r5\n\t"
15542         "movhi  %[r], %[one]\n\t"
15543         "movlo  %[r], r3\n\t"
15544         "movne  r3, r7\n\t"
15545         "ldr        r4, [%[a], #200]\n\t"
15546         "ldr        r5, [%[b], #200]\n\t"
15547         "and        r4, r4, r3\n\t"
15548         "and        r5, r5, r3\n\t"
15549         "subs   r4, r4, r5\n\t"
15550         "movhi  %[r], %[one]\n\t"
15551         "movlo  %[r], r3\n\t"
15552         "movne  r3, r7\n\t"
15553         "ldr        r4, [%[a], #196]\n\t"
15554         "ldr        r5, [%[b], #196]\n\t"
15555         "and        r4, r4, r3\n\t"
15556         "and        r5, r5, r3\n\t"
15557         "subs   r4, r4, r5\n\t"
15558         "movhi  %[r], %[one]\n\t"
15559         "movlo  %[r], r3\n\t"
15560         "movne  r3, r7\n\t"
15561         "ldr        r4, [%[a], #192]\n\t"
15562         "ldr        r5, [%[b], #192]\n\t"
15563         "and        r4, r4, r3\n\t"
15564         "and        r5, r5, r3\n\t"
15565         "subs   r4, r4, r5\n\t"
15566         "movhi  %[r], %[one]\n\t"
15567         "movlo  %[r], r3\n\t"
15568         "movne  r3, r7\n\t"
15569         "ldr        r4, [%[a], #188]\n\t"
15570         "ldr        r5, [%[b], #188]\n\t"
15571         "and        r4, r4, r3\n\t"
15572         "and        r5, r5, r3\n\t"
15573         "subs   r4, r4, r5\n\t"
15574         "movhi  %[r], %[one]\n\t"
15575         "movlo  %[r], r3\n\t"
15576         "movne  r3, r7\n\t"
15577         "ldr        r4, [%[a], #184]\n\t"
15578         "ldr        r5, [%[b], #184]\n\t"
15579         "and        r4, r4, r3\n\t"
15580         "and        r5, r5, r3\n\t"
15581         "subs   r4, r4, r5\n\t"
15582         "movhi  %[r], %[one]\n\t"
15583         "movlo  %[r], r3\n\t"
15584         "movne  r3, r7\n\t"
15585         "ldr        r4, [%[a], #180]\n\t"
15586         "ldr        r5, [%[b], #180]\n\t"
15587         "and        r4, r4, r3\n\t"
15588         "and        r5, r5, r3\n\t"
15589         "subs   r4, r4, r5\n\t"
15590         "movhi  %[r], %[one]\n\t"
15591         "movlo  %[r], r3\n\t"
15592         "movne  r3, r7\n\t"
15593         "ldr        r4, [%[a], #176]\n\t"
15594         "ldr        r5, [%[b], #176]\n\t"
15595         "and        r4, r4, r3\n\t"
15596         "and        r5, r5, r3\n\t"
15597         "subs   r4, r4, r5\n\t"
15598         "movhi  %[r], %[one]\n\t"
15599         "movlo  %[r], r3\n\t"
15600         "movne  r3, r7\n\t"
15601         "ldr        r4, [%[a], #172]\n\t"
15602         "ldr        r5, [%[b], #172]\n\t"
15603         "and        r4, r4, r3\n\t"
15604         "and        r5, r5, r3\n\t"
15605         "subs   r4, r4, r5\n\t"
15606         "movhi  %[r], %[one]\n\t"
15607         "movlo  %[r], r3\n\t"
15608         "movne  r3, r7\n\t"
15609         "ldr        r4, [%[a], #168]\n\t"
15610         "ldr        r5, [%[b], #168]\n\t"
15611         "and        r4, r4, r3\n\t"
15612         "and        r5, r5, r3\n\t"
15613         "subs   r4, r4, r5\n\t"
15614         "movhi  %[r], %[one]\n\t"
15615         "movlo  %[r], r3\n\t"
15616         "movne  r3, r7\n\t"
15617         "ldr        r4, [%[a], #164]\n\t"
15618         "ldr        r5, [%[b], #164]\n\t"
15619         "and        r4, r4, r3\n\t"
15620         "and        r5, r5, r3\n\t"
15621         "subs   r4, r4, r5\n\t"
15622         "movhi  %[r], %[one]\n\t"
15623         "movlo  %[r], r3\n\t"
15624         "movne  r3, r7\n\t"
15625         "ldr        r4, [%[a], #160]\n\t"
15626         "ldr        r5, [%[b], #160]\n\t"
15627         "and        r4, r4, r3\n\t"
15628         "and        r5, r5, r3\n\t"
15629         "subs   r4, r4, r5\n\t"
15630         "movhi  %[r], %[one]\n\t"
15631         "movlo  %[r], r3\n\t"
15632         "movne  r3, r7\n\t"
15633         "ldr        r4, [%[a], #156]\n\t"
15634         "ldr        r5, [%[b], #156]\n\t"
15635         "and        r4, r4, r3\n\t"
15636         "and        r5, r5, r3\n\t"
15637         "subs   r4, r4, r5\n\t"
15638         "movhi  %[r], %[one]\n\t"
15639         "movlo  %[r], r3\n\t"
15640         "movne  r3, r7\n\t"
15641         "ldr        r4, [%[a], #152]\n\t"
15642         "ldr        r5, [%[b], #152]\n\t"
15643         "and        r4, r4, r3\n\t"
15644         "and        r5, r5, r3\n\t"
15645         "subs   r4, r4, r5\n\t"
15646         "movhi  %[r], %[one]\n\t"
15647         "movlo  %[r], r3\n\t"
15648         "movne  r3, r7\n\t"
15649         "ldr        r4, [%[a], #148]\n\t"
15650         "ldr        r5, [%[b], #148]\n\t"
15651         "and        r4, r4, r3\n\t"
15652         "and        r5, r5, r3\n\t"
15653         "subs   r4, r4, r5\n\t"
15654         "movhi  %[r], %[one]\n\t"
15655         "movlo  %[r], r3\n\t"
15656         "movne  r3, r7\n\t"
15657         "ldr        r4, [%[a], #144]\n\t"
15658         "ldr        r5, [%[b], #144]\n\t"
15659         "and        r4, r4, r3\n\t"
15660         "and        r5, r5, r3\n\t"
15661         "subs   r4, r4, r5\n\t"
15662         "movhi  %[r], %[one]\n\t"
15663         "movlo  %[r], r3\n\t"
15664         "movne  r3, r7\n\t"
15665         "ldr        r4, [%[a], #140]\n\t"
15666         "ldr        r5, [%[b], #140]\n\t"
15667         "and        r4, r4, r3\n\t"
15668         "and        r5, r5, r3\n\t"
15669         "subs   r4, r4, r5\n\t"
15670         "movhi  %[r], %[one]\n\t"
15671         "movlo  %[r], r3\n\t"
15672         "movne  r3, r7\n\t"
15673         "ldr        r4, [%[a], #136]\n\t"
15674         "ldr        r5, [%[b], #136]\n\t"
15675         "and        r4, r4, r3\n\t"
15676         "and        r5, r5, r3\n\t"
15677         "subs   r4, r4, r5\n\t"
15678         "movhi  %[r], %[one]\n\t"
15679         "movlo  %[r], r3\n\t"
15680         "movne  r3, r7\n\t"
15681         "ldr        r4, [%[a], #132]\n\t"
15682         "ldr        r5, [%[b], #132]\n\t"
15683         "and        r4, r4, r3\n\t"
15684         "and        r5, r5, r3\n\t"
15685         "subs   r4, r4, r5\n\t"
15686         "movhi  %[r], %[one]\n\t"
15687         "movlo  %[r], r3\n\t"
15688         "movne  r3, r7\n\t"
15689         "ldr        r4, [%[a], #128]\n\t"
15690         "ldr        r5, [%[b], #128]\n\t"
15691         "and        r4, r4, r3\n\t"
15692         "and        r5, r5, r3\n\t"
15693         "subs   r4, r4, r5\n\t"
15694         "movhi  %[r], %[one]\n\t"
15695         "movlo  %[r], r3\n\t"
15696         "movne  r3, r7\n\t"
15697         "ldr        r4, [%[a], #124]\n\t"
15698         "ldr        r5, [%[b], #124]\n\t"
15699         "and        r4, r4, r3\n\t"
15700         "and        r5, r5, r3\n\t"
15701         "subs   r4, r4, r5\n\t"
15702         "movhi  %[r], %[one]\n\t"
15703         "movlo  %[r], r3\n\t"
15704         "movne  r3, r7\n\t"
15705         "ldr        r4, [%[a], #120]\n\t"
15706         "ldr        r5, [%[b], #120]\n\t"
15707         "and        r4, r4, r3\n\t"
15708         "and        r5, r5, r3\n\t"
15709         "subs   r4, r4, r5\n\t"
15710         "movhi  %[r], %[one]\n\t"
15711         "movlo  %[r], r3\n\t"
15712         "movne  r3, r7\n\t"
15713         "ldr        r4, [%[a], #116]\n\t"
15714         "ldr        r5, [%[b], #116]\n\t"
15715         "and        r4, r4, r3\n\t"
15716         "and        r5, r5, r3\n\t"
15717         "subs   r4, r4, r5\n\t"
15718         "movhi  %[r], %[one]\n\t"
15719         "movlo  %[r], r3\n\t"
15720         "movne  r3, r7\n\t"
15721         "ldr        r4, [%[a], #112]\n\t"
15722         "ldr        r5, [%[b], #112]\n\t"
15723         "and        r4, r4, r3\n\t"
15724         "and        r5, r5, r3\n\t"
15725         "subs   r4, r4, r5\n\t"
15726         "movhi  %[r], %[one]\n\t"
15727         "movlo  %[r], r3\n\t"
15728         "movne  r3, r7\n\t"
15729         "ldr        r4, [%[a], #108]\n\t"
15730         "ldr        r5, [%[b], #108]\n\t"
15731         "and        r4, r4, r3\n\t"
15732         "and        r5, r5, r3\n\t"
15733         "subs   r4, r4, r5\n\t"
15734         "movhi  %[r], %[one]\n\t"
15735         "movlo  %[r], r3\n\t"
15736         "movne  r3, r7\n\t"
15737         "ldr        r4, [%[a], #104]\n\t"
15738         "ldr        r5, [%[b], #104]\n\t"
15739         "and        r4, r4, r3\n\t"
15740         "and        r5, r5, r3\n\t"
15741         "subs   r4, r4, r5\n\t"
15742         "movhi  %[r], %[one]\n\t"
15743         "movlo  %[r], r3\n\t"
15744         "movne  r3, r7\n\t"
15745         "ldr        r4, [%[a], #100]\n\t"
15746         "ldr        r5, [%[b], #100]\n\t"
15747         "and        r4, r4, r3\n\t"
15748         "and        r5, r5, r3\n\t"
15749         "subs   r4, r4, r5\n\t"
15750         "movhi  %[r], %[one]\n\t"
15751         "movlo  %[r], r3\n\t"
15752         "movne  r3, r7\n\t"
15753         "ldr        r4, [%[a], #96]\n\t"
15754         "ldr        r5, [%[b], #96]\n\t"
15755         "and        r4, r4, r3\n\t"
15756         "and        r5, r5, r3\n\t"
15757         "subs   r4, r4, r5\n\t"
15758         "movhi  %[r], %[one]\n\t"
15759         "movlo  %[r], r3\n\t"
15760         "movne  r3, r7\n\t"
15761         "ldr        r4, [%[a], #92]\n\t"
15762         "ldr        r5, [%[b], #92]\n\t"
15763         "and        r4, r4, r3\n\t"
15764         "and        r5, r5, r3\n\t"
15765         "subs   r4, r4, r5\n\t"
15766         "movhi  %[r], %[one]\n\t"
15767         "movlo  %[r], r3\n\t"
15768         "movne  r3, r7\n\t"
15769         "ldr        r4, [%[a], #88]\n\t"
15770         "ldr        r5, [%[b], #88]\n\t"
15771         "and        r4, r4, r3\n\t"
15772         "and        r5, r5, r3\n\t"
15773         "subs   r4, r4, r5\n\t"
15774         "movhi  %[r], %[one]\n\t"
15775         "movlo  %[r], r3\n\t"
15776         "movne  r3, r7\n\t"
15777         "ldr        r4, [%[a], #84]\n\t"
15778         "ldr        r5, [%[b], #84]\n\t"
15779         "and        r4, r4, r3\n\t"
15780         "and        r5, r5, r3\n\t"
15781         "subs   r4, r4, r5\n\t"
15782         "movhi  %[r], %[one]\n\t"
15783         "movlo  %[r], r3\n\t"
15784         "movne  r3, r7\n\t"
15785         "ldr        r4, [%[a], #80]\n\t"
15786         "ldr        r5, [%[b], #80]\n\t"
15787         "and        r4, r4, r3\n\t"
15788         "and        r5, r5, r3\n\t"
15789         "subs   r4, r4, r5\n\t"
15790         "movhi  %[r], %[one]\n\t"
15791         "movlo  %[r], r3\n\t"
15792         "movne  r3, r7\n\t"
15793         "ldr        r4, [%[a], #76]\n\t"
15794         "ldr        r5, [%[b], #76]\n\t"
15795         "and        r4, r4, r3\n\t"
15796         "and        r5, r5, r3\n\t"
15797         "subs   r4, r4, r5\n\t"
15798         "movhi  %[r], %[one]\n\t"
15799         "movlo  %[r], r3\n\t"
15800         "movne  r3, r7\n\t"
15801         "ldr        r4, [%[a], #72]\n\t"
15802         "ldr        r5, [%[b], #72]\n\t"
15803         "and        r4, r4, r3\n\t"
15804         "and        r5, r5, r3\n\t"
15805         "subs   r4, r4, r5\n\t"
15806         "movhi  %[r], %[one]\n\t"
15807         "movlo  %[r], r3\n\t"
15808         "movne  r3, r7\n\t"
15809         "ldr        r4, [%[a], #68]\n\t"
15810         "ldr        r5, [%[b], #68]\n\t"
15811         "and        r4, r4, r3\n\t"
15812         "and        r5, r5, r3\n\t"
15813         "subs   r4, r4, r5\n\t"
15814         "movhi  %[r], %[one]\n\t"
15815         "movlo  %[r], r3\n\t"
15816         "movne  r3, r7\n\t"
15817         "ldr        r4, [%[a], #64]\n\t"
15818         "ldr        r5, [%[b], #64]\n\t"
15819         "and        r4, r4, r3\n\t"
15820         "and        r5, r5, r3\n\t"
15821         "subs   r4, r4, r5\n\t"
15822         "movhi  %[r], %[one]\n\t"
15823         "movlo  %[r], r3\n\t"
15824         "movne  r3, r7\n\t"
15825         "ldr        r4, [%[a], #60]\n\t"
15826         "ldr        r5, [%[b], #60]\n\t"
15827         "and        r4, r4, r3\n\t"
15828         "and        r5, r5, r3\n\t"
15829         "subs   r4, r4, r5\n\t"
15830         "movhi  %[r], %[one]\n\t"
15831         "movlo  %[r], r3\n\t"
15832         "movne  r3, r7\n\t"
15833         "ldr        r4, [%[a], #56]\n\t"
15834         "ldr        r5, [%[b], #56]\n\t"
15835         "and        r4, r4, r3\n\t"
15836         "and        r5, r5, r3\n\t"
15837         "subs   r4, r4, r5\n\t"
15838         "movhi  %[r], %[one]\n\t"
15839         "movlo  %[r], r3\n\t"
15840         "movne  r3, r7\n\t"
15841         "ldr        r4, [%[a], #52]\n\t"
15842         "ldr        r5, [%[b], #52]\n\t"
15843         "and        r4, r4, r3\n\t"
15844         "and        r5, r5, r3\n\t"
15845         "subs   r4, r4, r5\n\t"
15846         "movhi  %[r], %[one]\n\t"
15847         "movlo  %[r], r3\n\t"
15848         "movne  r3, r7\n\t"
15849         "ldr        r4, [%[a], #48]\n\t"
15850         "ldr        r5, [%[b], #48]\n\t"
15851         "and        r4, r4, r3\n\t"
15852         "and        r5, r5, r3\n\t"
15853         "subs   r4, r4, r5\n\t"
15854         "movhi  %[r], %[one]\n\t"
15855         "movlo  %[r], r3\n\t"
15856         "movne  r3, r7\n\t"
15857         "ldr        r4, [%[a], #44]\n\t"
15858         "ldr        r5, [%[b], #44]\n\t"
15859         "and        r4, r4, r3\n\t"
15860         "and        r5, r5, r3\n\t"
15861         "subs   r4, r4, r5\n\t"
15862         "movhi  %[r], %[one]\n\t"
15863         "movlo  %[r], r3\n\t"
15864         "movne  r3, r7\n\t"
15865         "ldr        r4, [%[a], #40]\n\t"
15866         "ldr        r5, [%[b], #40]\n\t"
15867         "and        r4, r4, r3\n\t"
15868         "and        r5, r5, r3\n\t"
15869         "subs   r4, r4, r5\n\t"
15870         "movhi  %[r], %[one]\n\t"
15871         "movlo  %[r], r3\n\t"
15872         "movne  r3, r7\n\t"
15873         "ldr        r4, [%[a], #36]\n\t"
15874         "ldr        r5, [%[b], #36]\n\t"
15875         "and        r4, r4, r3\n\t"
15876         "and        r5, r5, r3\n\t"
15877         "subs   r4, r4, r5\n\t"
15878         "movhi  %[r], %[one]\n\t"
15879         "movlo  %[r], r3\n\t"
15880         "movne  r3, r7\n\t"
15881         "ldr        r4, [%[a], #32]\n\t"
15882         "ldr        r5, [%[b], #32]\n\t"
15883         "and        r4, r4, r3\n\t"
15884         "and        r5, r5, r3\n\t"
15885         "subs   r4, r4, r5\n\t"
15886         "movhi  %[r], %[one]\n\t"
15887         "movlo  %[r], r3\n\t"
15888         "movne  r3, r7\n\t"
15889         "ldr        r4, [%[a], #28]\n\t"
15890         "ldr        r5, [%[b], #28]\n\t"
15891         "and        r4, r4, r3\n\t"
15892         "and        r5, r5, r3\n\t"
15893         "subs   r4, r4, r5\n\t"
15894         "movhi  %[r], %[one]\n\t"
15895         "movlo  %[r], r3\n\t"
15896         "movne  r3, r7\n\t"
15897         "ldr        r4, [%[a], #24]\n\t"
15898         "ldr        r5, [%[b], #24]\n\t"
15899         "and        r4, r4, r3\n\t"
15900         "and        r5, r5, r3\n\t"
15901         "subs   r4, r4, r5\n\t"
15902         "movhi  %[r], %[one]\n\t"
15903         "movlo  %[r], r3\n\t"
15904         "movne  r3, r7\n\t"
15905         "ldr        r4, [%[a], #20]\n\t"
15906         "ldr        r5, [%[b], #20]\n\t"
15907         "and        r4, r4, r3\n\t"
15908         "and        r5, r5, r3\n\t"
15909         "subs   r4, r4, r5\n\t"
15910         "movhi  %[r], %[one]\n\t"
15911         "movlo  %[r], r3\n\t"
15912         "movne  r3, r7\n\t"
15913         "ldr        r4, [%[a], #16]\n\t"
15914         "ldr        r5, [%[b], #16]\n\t"
15915         "and        r4, r4, r3\n\t"
15916         "and        r5, r5, r3\n\t"
15917         "subs   r4, r4, r5\n\t"
15918         "movhi  %[r], %[one]\n\t"
15919         "movlo  %[r], r3\n\t"
15920         "movne  r3, r7\n\t"
15921         "ldr        r4, [%[a], #12]\n\t"
15922         "ldr        r5, [%[b], #12]\n\t"
15923         "and        r4, r4, r3\n\t"
15924         "and        r5, r5, r3\n\t"
15925         "subs   r4, r4, r5\n\t"
15926         "movhi  %[r], %[one]\n\t"
15927         "movlo  %[r], r3\n\t"
15928         "movne  r3, r7\n\t"
15929         "ldr        r4, [%[a], #8]\n\t"
15930         "ldr        r5, [%[b], #8]\n\t"
15931         "and        r4, r4, r3\n\t"
15932         "and        r5, r5, r3\n\t"
15933         "subs   r4, r4, r5\n\t"
15934         "movhi  %[r], %[one]\n\t"
15935         "movlo  %[r], r3\n\t"
15936         "movne  r3, r7\n\t"
15937         "ldr        r4, [%[a], #4]\n\t"
15938         "ldr        r5, [%[b], #4]\n\t"
15939         "and        r4, r4, r3\n\t"
15940         "and        r5, r5, r3\n\t"
15941         "subs   r4, r4, r5\n\t"
15942         "movhi  %[r], %[one]\n\t"
15943         "movlo  %[r], r3\n\t"
15944         "movne  r3, r7\n\t"
15945         "ldr        r4, [%[a], #0]\n\t"
15946         "ldr        r5, [%[b], #0]\n\t"
15947         "and        r4, r4, r3\n\t"
15948         "and        r5, r5, r3\n\t"
15949         "subs   r4, r4, r5\n\t"
15950         "movhi  %[r], %[one]\n\t"
15951         "movlo  %[r], r3\n\t"
15952         "movne  r3, r7\n\t"
15953         "eor    %[r], %[r], r3\n\t"
15954         : [r] "+r" (r)
15955         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
15956         : "r2", "r3", "r4", "r5", "r6", "r7"
15957     );
15958 #endif
15959 
15960     return r;
15961 }
15962 
15963 /* Divide d in a and put remainder into r (m*d + r = a)
15964  * m is not calculated as it is not needed at this time.
15965  *
15966  * a  Nmber to be divided.
15967  * d  Number to divide with.
15968  * m  Multiplier result.
15969  * r  Remainder from the division.
15970  * returns MP_OKAY indicating success.
15971  */
15972 static WC_INLINE int sp_3072_div_96(sp_digit* a, sp_digit* d, sp_digit* m,
15973         sp_digit* r)
15974 {
15975     sp_digit t1[192], t2[97];
15976     sp_digit div, r1;
15977     int i;
15978 
15979     (void)m;
15980 
15981     div = d[95];
15982     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
15983     for (i=95; i>=0; i--) {
15984         r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
15985 
15986         sp_3072_mul_d_96(t2, d, r1);
15987         t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
15988         t1[96 + i] -= t2[96];
15989         sp_3072_mask_96(t2, d, t1[96 + i]);
15990         t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
15991         sp_3072_mask_96(t2, d, t1[96 + i]);
15992         t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
15993     }
15994 
15995     r1 = sp_3072_cmp_96(t1, d) >= 0;
15996     sp_3072_cond_sub_96(r, t1, t2, (sp_digit)0 - r1);
15997 
15998     return MP_OKAY;
15999 }
16000 
16001 /* Reduce a modulo m into r. (r = a mod m)
16002  *
16003  * r  A single precision number that is the reduced result.
16004  * a  A single precision number that is to be reduced.
16005  * m  A single precision number that is the modulus to reduce with.
16006  * returns MP_OKAY indicating success.
16007  */
16008 static WC_INLINE int sp_3072_mod_96(sp_digit* r, sp_digit* a, sp_digit* m)
16009 {
16010     return sp_3072_div_96(a, m, NULL, r);
16011 }
16012 
16013 /* Divide d in a and put remainder into r (m*d + r = a)
16014  * m is not calculated as it is not needed at this time.
16015  *
16016  * a  Nmber to be divided.
16017  * d  Number to divide with.
16018  * m  Multiplier result.
16019  * r  Remainder from the division.
16020  * returns MP_OKAY indicating success.
16021  */
16022 static WC_INLINE int sp_3072_div_96_cond(sp_digit* a, sp_digit* d, sp_digit* m,
16023         sp_digit* r)
16024 {
16025     sp_digit t1[192], t2[97];
16026     sp_digit div, r1;
16027     int i;
16028 
16029     (void)m;
16030 
16031     div = d[95];
16032     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
16033     for (i=95; i>=0; i--) {
16034         r1 = div_3072_word_96(t1[96 + i], t1[96 + i - 1], div);
16035 
16036         sp_3072_mul_d_96(t2, d, r1);
16037         t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
16038         t1[96 + i] -= t2[96];
16039         if (t1[96 + i] != 0) {
16040             t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
16041             if (t1[96 + i] != 0)
16042                 t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
16043         }
16044     }
16045 
16046     r1 = sp_3072_cmp_96(t1, d) >= 0;
16047     sp_3072_cond_sub_96(r, t1, t2, (sp_digit)0 - r1);
16048 
16049     return MP_OKAY;
16050 }
16051 
16052 /* Reduce a modulo m into r. (r = a mod m)
16053  *
16054  * r  A single precision number that is the reduced result.
16055  * a  A single precision number that is to be reduced.
16056  * m  A single precision number that is the modulus to reduce with.
16057  * returns MP_OKAY indicating success.
16058  */
16059 static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, sp_digit* a, sp_digit* m)
16060 {
16061     return sp_3072_div_96_cond(a, m, NULL, r);
16062 }
16063 
16064 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
16065 #ifdef WOLFSSL_SP_SMALL
16066 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
16067  *
16068  * r     A single precision number that is the result of the operation.
16069  * a     A single precision number being exponentiated.
16070  * e     A single precision number that is the exponent.
16071  * bits  The number of bits in the exponent.
16072  * m     A single precision number that is the modulus.
16073  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
16074  */
16075 static int sp_3072_mod_exp_96(sp_digit* r, sp_digit* a, sp_digit* e,
16076         int bits, sp_digit* m, int reduceA)
16077 {
16078 #ifndef WOLFSSL_SMALL_STACK
16079     sp_digit t[16][192];
16080 #else
16081     sp_digit* t[16];
16082     sp_digit* td;
16083 #endif
16084     sp_digit* norm;
16085     sp_digit mp = 1;
16086     sp_digit n;
16087     sp_digit mask;
16088     int i;
16089     int c, y;
16090     int err = MP_OKAY;
16091 
16092 #ifdef WOLFSSL_SMALL_STACK
16093     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 192, NULL,
16094                             DYNAMIC_TYPE_TMP_BUFFER);
16095     if (td == NULL)
16096         err = MEMORY_E;
16097 
16098     if (err == MP_OKAY) {
16099         for (i=0; i<16; i++)
16100             t[i] = td + i * 192;
16101         norm = t[0];
16102     }
16103 #else
16104     norm = t[0];
16105 #endif
16106 
16107     if (err == MP_OKAY) {
16108         sp_3072_mont_setup(m, &mp);
16109         sp_3072_mont_norm_96(norm, m);
16110 
16111         XMEMSET(t[1], 0, sizeof(sp_digit) * 96);
16112         if (reduceA) {
16113             err = sp_3072_mod_96(t[1] + 96, a, m);
16114             if (err == MP_OKAY)
16115                 err = sp_3072_mod_96(t[1], t[1], m);
16116         }
16117         else {
16118             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
16119             err = sp_3072_mod_96(t[1], t[1], m);
16120         }
16121     }
16122 
16123     if (err == MP_OKAY) {
16124         sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
16125         sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
16126         sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
16127         sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
16128         sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
16129         sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
16130         sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
16131         sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
16132         sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
16133         sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
16134         sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
16135         sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
16136         sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
16137         sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
16138 
16139         i = (bits - 1) / 32;
16140         n = e[i--];
16141         y = n >> 28;
16142         n <<= 4;
16143         c = 28;
16144         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
16145         for (; i>=0 || c>=4; ) {
16146             if (c == 0) {
16147                 n = e[i--];
16148                 y = n >> 28;
16149                 n <<= 4;
16150                 c = 28;
16151             }
16152             else if (c < 4) {
16153                 y = n >> 28;
16154                 n = e[i--];
16155                 c = 4 - c;
16156                 y |= n >> (32 - c);
16157                 n <<= c;
16158                 c = 32 - c;
16159             }
16160             else {
16161                 y = (n >> 28) & 0xf;
16162                 n <<= 4;
16163                 c -= 4;
16164             }
16165 
16166             sp_3072_mont_sqr_96(r, r, m, mp);
16167             sp_3072_mont_sqr_96(r, r, m, mp);
16168             sp_3072_mont_sqr_96(r, r, m, mp);
16169             sp_3072_mont_sqr_96(r, r, m, mp);
16170 
16171             sp_3072_mont_mul_96(r, r, t[y], m, mp);
16172         }
16173 
16174         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
16175         sp_3072_mont_reduce_96(r, m, mp);
16176 
16177         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
16178         sp_3072_cond_sub_96(r, r, m, mask);
16179     }
16180 
16181 #ifdef WOLFSSL_SMALL_STACK
16182     if (td != NULL)
16183         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
16184 #endif
16185 
16186     return err;
16187 }
16188 #else
16189 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
16190  *
16191  * r     A single precision number that is the result of the operation.
16192  * a     A single precision number being exponentiated.
16193  * e     A single precision number that is the exponent.
16194  * bits  The number of bits in the exponent.
16195  * m     A single precision number that is the modulus.
16196  * returns 0 on success and MEMORY_E on dynamic memory allocation failure.
16197  */
16198 static int sp_3072_mod_exp_96(sp_digit* r, sp_digit* a, sp_digit* e,
16199         int bits, sp_digit* m, int reduceA)
16200 {
16201 #ifndef WOLFSSL_SMALL_STACK
16202     sp_digit t[32][192];
16203 #else
16204     sp_digit* t[32];
16205     sp_digit* td;
16206 #endif
16207     sp_digit* norm;
16208     sp_digit mp = 1;
16209     sp_digit n;
16210     sp_digit mask;
16211     int i;
16212     int c, y;
16213     int err = MP_OKAY;
16214 
16215 #ifdef WOLFSSL_SMALL_STACK
16216     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 192, NULL,
16217                             DYNAMIC_TYPE_TMP_BUFFER);
16218     if (td == NULL)
16219         err = MEMORY_E;
16220 
16221     if (err == MP_OKAY) {
16222         for (i=0; i<32; i++)
16223             t[i] = td + i * 192;
16224         norm = t[0];
16225     }
16226 #else
16227     norm = t[0];
16228 #endif
16229 
16230     if (err == MP_OKAY) {
16231         sp_3072_mont_setup(m, &mp);
16232         sp_3072_mont_norm_96(norm, m);
16233 
16234         XMEMSET(t[1], 0, sizeof(sp_digit) * 96);
16235         if (reduceA) {
16236             err = sp_3072_mod_96(t[1] + 96, a, m);
16237             if (err == MP_OKAY)
16238                 err = sp_3072_mod_96(t[1], t[1], m);
16239         }
16240         else {
16241             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
16242             err = sp_3072_mod_96(t[1], t[1], m);
16243         }
16244     }
16245 
16246     if (err == MP_OKAY) {
16247         sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
16248         sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
16249         sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
16250         sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
16251         sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
16252         sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
16253         sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
16254         sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
16255         sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
16256         sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
16257         sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
16258         sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
16259         sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
16260         sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
16261         sp_3072_mont_sqr_96(t[16], t[ 8], m, mp);
16262         sp_3072_mont_mul_96(t[17], t[ 9], t[ 8], m, mp);
16263         sp_3072_mont_sqr_96(t[18], t[ 9], m, mp);
16264         sp_3072_mont_mul_96(t[19], t[10], t[ 9], m, mp);
16265         sp_3072_mont_sqr_96(t[20], t[10], m, mp);
16266         sp_3072_mont_mul_96(t[21], t[11], t[10], m, mp);
16267         sp_3072_mont_sqr_96(t[22], t[11], m, mp);
16268         sp_3072_mont_mul_96(t[23], t[12], t[11], m, mp);
16269         sp_3072_mont_sqr_96(t[24], t[12], m, mp);
16270         sp_3072_mont_mul_96(t[25], t[13], t[12], m, mp);
16271         sp_3072_mont_sqr_96(t[26], t[13], m, mp);
16272         sp_3072_mont_mul_96(t[27], t[14], t[13], m, mp);
16273         sp_3072_mont_sqr_96(t[28], t[14], m, mp);
16274         sp_3072_mont_mul_96(t[29], t[15], t[14], m, mp);
16275         sp_3072_mont_sqr_96(t[30], t[15], m, mp);
16276         sp_3072_mont_mul_96(t[31], t[16], t[15], m, mp);
16277 
16278         i = (bits - 1) / 32;
16279         n = e[i--];
16280         y = n >> 27;
16281         n <<= 5;
16282         c = 27;
16283         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
16284         for (; i>=0 || c>=5; ) {
16285             if (c == 0) {
16286                 n = e[i--];
16287                 y = n >> 27;
16288                 n <<= 5;
16289                 c = 27;
16290             }
16291             else if (c < 5) {
16292                 y = n >> 27;
16293                 n = e[i--];
16294                 c = 5 - c;
16295                 y |= n >> (32 - c);
16296                 n <<= c;
16297                 c = 32 - c;
16298             }
16299             else {
16300                 y = (n >> 27) & 0x1f;
16301                 n <<= 5;
16302                 c -= 5;
16303             }
16304 
16305             sp_3072_mont_sqr_96(r, r, m, mp);
16306             sp_3072_mont_sqr_96(r, r, m, mp);
16307             sp_3072_mont_sqr_96(r, r, m, mp);
16308             sp_3072_mont_sqr_96(r, r, m, mp);
16309             sp_3072_mont_sqr_96(r, r, m, mp);
16310 
16311             sp_3072_mont_mul_96(r, r, t[y], m, mp);
16312         }
16313         y = e[0] & 0x3;
16314         sp_3072_mont_sqr_96(r, r, m, mp);
16315         sp_3072_mont_sqr_96(r, r, m, mp);
16316         sp_3072_mont_mul_96(r, r, t[y], m, mp);
16317 
16318         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
16319         sp_3072_mont_reduce_96(r, m, mp);
16320 
16321         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
16322         sp_3072_cond_sub_96(r, r, m, mask);
16323     }
16324 
16325 #ifdef WOLFSSL_SMALL_STACK
16326     if (td != NULL)
16327         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
16328 #endif
16329 
16330     return err;
16331 }
16332 #endif /* WOLFSSL_SP_SMALL */
16333 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
16334 
16335 #ifdef WOLFSSL_HAVE_SP_RSA
16336 /* RSA public key operation.
16337  *
16338  * in      Array of bytes representing the number to exponentiate, base.
16339  * inLen   Number of bytes in base.
16340  * em      Public exponent.
16341  * mm      Modulus.
16342  * out     Buffer to hold big-endian bytes of exponentiation result.
16343  *         Must be at least 384 bytes long.
16344  * outLen  Number of bytes in result.
16345  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
16346  * an array is too long and MEMORY_E when dynamic memory allocation fails.
16347  */
16348 int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm,
16349     byte* out, word32* outLen)
16350 {
16351 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
16352     sp_digit ad[192], md[96], rd[192];
16353 #else
16354     sp_digit* d = NULL;
16355 #endif
16356     sp_digit* a;
16357     sp_digit *ah;
16358     sp_digit* m;
16359     sp_digit* r;
16360     sp_digit e[1];
16361     int err = MP_OKAY;
16362 
16363     if (*outLen < 384)
16364         err = MP_TO_E;
16365     if (err == MP_OKAY && (mp_count_bits(em) > 32 || inLen > 384 ||
16366                                                      mp_count_bits(mm) != 3072))
16367         err = MP_READ_E;
16368 
16369 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
16370     if (err == MP_OKAY) {
16371         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
16372                                DYNAMIC_TYPE_TMP_BUFFER);
16373         if (d == NULL)
16374             err = MEMORY_E;
16375     }
16376 
16377     if (err == MP_OKAY) {
16378         a = d;
16379         r = a + 96 * 2;
16380         m = r + 96 * 2;
16381         ah = a + 96;
16382     }
16383 #else
16384     a = ad;
16385     m = md;
16386     r = rd;
16387     ah = a + 96;
16388 #endif
16389 
16390     if (err == MP_OKAY) {
16391         sp_3072_from_bin(ah, 96, in, inLen);
16392 #if DIGIT_BIT >= 32
16393         e[0] = em->dp[0];
16394 #else
16395         e[0] = em->dp[0];
16396         if (em->used > 1)
16397             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
16398 #endif
16399         if (e[0] == 0)
16400             err = MP_EXPTMOD_E;
16401     }
16402     if (err == MP_OKAY) {
16403         sp_3072_from_mp(m, 96, mm);
16404 
16405         if (e[0] == 0x3) {
16406             if (err == MP_OKAY) {
16407                 sp_3072_sqr_96(r, ah);
16408                 err = sp_3072_mod_96_cond(r, r, m);
16409             }
16410             if (err == MP_OKAY) {
16411                 sp_3072_mul_96(r, ah, r);
16412                 err = sp_3072_mod_96_cond(r, r, m);
16413             }
16414         }
16415         else {
16416             int i;
16417             sp_digit mp;
16418 
16419             sp_3072_mont_setup(m, &mp);
16420 
16421             /* Convert to Montgomery form. */
16422             XMEMSET(a, 0, sizeof(sp_digit) * 96);
16423             err = sp_3072_mod_96_cond(a, a, m);
16424 
16425             if (err == MP_OKAY) {
16426                 for (i=31; i>=0; i--)
16427                     if (e[0] >> i)
16428                         break;
16429 
16430                 XMEMCPY(r, a, sizeof(sp_digit) * 96);
16431                 for (i--; i>=0; i--) {
16432                     sp_3072_mont_sqr_96(r, r, m, mp);
16433                     if (((e[0] >> i) & 1) == 1)
16434                         sp_3072_mont_mul_96(r, r, a, m, mp);
16435                 }
16436                 XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
16437                 sp_3072_mont_reduce_96(r, m, mp);
16438 
16439                 for (i = 95; i > 0; i--) {
16440                     if (r[i] != m[i])
16441                         break;
16442                 }
16443                 if (r[i] >= m[i])
16444                     sp_3072_sub_in_place_96(r, m);
16445             }
16446         }
16447     }
16448 
16449     if (err == MP_OKAY) {
16450         sp_3072_to_bin(r, out);
16451         *outLen = 384;
16452     }
16453 
16454 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
16455     if (d != NULL)
16456         XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER);
16457 #endif
16458 
16459     return err;
16460 }
16461 
16462 /* RSA private key operation.
16463  *
16464  * in      Array of bytes representing the number to exponentiate, base.
16465  * inLen   Number of bytes in base.
16466  * dm      Private exponent.
16467  * pm      First prime.
16468  * qm      Second prime.
16469  * dpm     First prime's CRT exponent.
16470  * dqm     Second prime's CRT exponent.
16471  * qim     Inverse of second prime mod p.
16472  * mm      Modulus.
16473  * out     Buffer to hold big-endian bytes of exponentiation result.
16474  *         Must be at least 384 bytes long.
16475  * outLen  Number of bytes in result.
16476  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
16477  * an array is too long and MEMORY_E when dynamic memory allocation fails.
16478  */
16479 int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
16480     mp_int* pm, mp_int* qm, mp_int* dpm, mp_int* dqm, mp_int* qim, mp_int* mm,
16481     byte* out, word32* outLen)
16482 {
16483 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
16484     sp_digit ad[96 * 2];
16485     sp_digit pd[48], qd[48], dpd[48];
16486     sp_digit tmpad[96], tmpbd[96];
16487 #else
16488     sp_digit* t = NULL;
16489 #endif
16490     sp_digit* a;
16491     sp_digit* p;
16492     sp_digit* q;
16493     sp_digit* dp;
16494     sp_digit* dq;
16495     sp_digit* qi;
16496     sp_digit* tmp;
16497     sp_digit* tmpa;
16498     sp_digit* tmpb;
16499     sp_digit* r;
16500     sp_digit c;
16501     int err = MP_OKAY;
16502 
16503     (void)dm;
16504     (void)mm;
16505 
16506     if (*outLen < 384)
16507         err = MP_TO_E;
16508     if (err == MP_OKAY && (inLen > 384 || mp_count_bits(mm) != 3072))
16509         err = MP_READ_E;
16510 
16511 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
16512     if (err == MP_OKAY) {
16513         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
16514                                DYNAMIC_TYPE_TMP_BUFFER);
16515         if (t == NULL)
16516             err = MEMORY_E;
16517     }
16518     if (err == MP_OKAY) {
16519         a = t;
16520         p = a + 96 * 2;
16521         q = p + 48;
16522         qi = dq = dp = q + 48;
16523         tmpa = qi + 48;
16524         tmpb = tmpa + 96;
16525 
16526         tmp = t;
16527         r = tmp + 96;
16528     }
16529 #else
16530     r = a = ad;
16531     p = pd;
16532     q = qd;
16533     qi = dq = dp = dpd;
16534     tmpa = tmpad;
16535     tmpb = tmpbd;
16536     tmp = a + 96;
16537 #endif
16538 
16539     if (err == MP_OKAY) {
16540         sp_3072_from_bin(a, 96, in, inLen);
16541         sp_3072_from_mp(p, 48, pm);
16542         sp_3072_from_mp(q, 48, qm);
16543         sp_3072_from_mp(dp, 48, dpm);
16544 
16545         err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
16546     }
16547     if (err == MP_OKAY) {
16548         sp_3072_from_mp(dq, 48, dqm);
16549         err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
16550     }
16551 
16552     if (err == MP_OKAY) {
16553         c = sp_3072_sub_in_place_48(tmpa, tmpb);
16554         sp_3072_mask_48(tmp, p, c);
16555         sp_3072_add_48(tmpa, tmpa, tmp);
16556 
16557         sp_3072_from_mp(qi, 48, qim);
16558         sp_3072_mul_48(tmpa, tmpa, qi);
16559         err = sp_3072_mod_48(tmpa, tmpa, p);
16560     }
16561 
16562     if (err == MP_OKAY) {
16563         sp_3072_mul_48(tmpa, q, tmpa);
16564         XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
16565         sp_3072_add_96(r, tmpb, tmpa);
16566 
16567         sp_3072_to_bin(r, out);
16568         *outLen = 384;
16569     }
16570 
16571 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
16572     if (t != NULL) {
16573         XMEMSET(t, 0, sizeof(sp_digit) * 48 * 11);
16574         XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
16575     }
16576 #else
16577     XMEMSET(tmpad, 0, sizeof(tmpad));
16578     XMEMSET(tmpbd, 0, sizeof(tmpbd));
16579     XMEMSET(pd, 0, sizeof(pd));
16580     XMEMSET(qd, 0, sizeof(qd));
16581     XMEMSET(dpd, 0, sizeof(dpd));
16582 #endif
16583 
16584     return err;
16585 }
16586 #endif /* WOLFSSL_HAVE_SP_RSA */
16587 #ifdef WOLFSSL_HAVE_SP_DH
16588 /* Convert an array of sp_digit to an mp_int.
16589  *
16590  * a  A single precision integer.
16591  * r  A multi-precision integer.
16592  */
16593 static int sp_3072_to_mp(sp_digit* a, mp_int* r)
16594 {
16595     int err;
16596 
16597     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
16598     if (err == MP_OKAY) {
16599 #if DIGIT_BIT == 32
16600         XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
16601         r->used = 96;
16602         mp_clamp(r);
16603 #elif DIGIT_BIT < 32
16604         int i, j = 0, s = 0;
16605 
16606         r->dp[0] = 0;
16607         for (i = 0; i < 96; i++) {
16608             r->dp[j] |= a[i] << s;
16609             r->dp[j] &= (1l << DIGIT_BIT) - 1;
16610             s = DIGIT_BIT - s;
16611             r->dp[++j] = a[i] >> s;
16612             while (s + DIGIT_BIT <= 32) {
16613                 s += DIGIT_BIT;
16614                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
16615                 r->dp[++j] = a[i] >> s;
16616             }
16617             s = 32 - s;
16618         }
16619         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
16620         mp_clamp(r);
16621 #else
16622         int i, j = 0, s = 0;
16623 
16624         r->dp[0] = 0;
16625         for (i = 0; i < 96; i++) {
16626             r->dp[j] |= ((mp_digit)a[i]) << s;
16627             if (s + 32 >= DIGIT_BIT) {
16628     #if DIGIT_BIT < 32
16629                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
16630     #endif
16631                 s = DIGIT_BIT - s;
16632                 r->dp[++j] = a[i] >> s;
16633                 s = 32 - s;
16634             }
16635             else
16636                 s += 32;
16637         }
16638         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
16639         mp_clamp(r);
16640 #endif
16641     }
16642 
16643     return err;
16644 }
16645 
16646 /* Perform the modular exponentiation for Diffie-Hellman.
16647  *
16648  * base  Base. MP integer.
16649  * exp   Exponent. MP integer.
16650  * mod   Modulus. MP integer.
16651  * res   Result. MP integer.
16652  * returs 0 on success, MP_READ_E if there are too many bytes in an array
16653  * and MEMORY_E if memory allocation fails.
16654  */
16655 int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
16656 {
16657     int err = MP_OKAY;
16658     sp_digit b[192], e[96], m[96];
16659     sp_digit* r = b;
16660     int expBits = mp_count_bits(exp);
16661 
16662     if (mp_count_bits(base) > 3072 || expBits > 3072 ||
16663                                                    mp_count_bits(mod) != 3072) {
16664         err = MP_READ_E;
16665     }
16666 
16667     if (err == MP_OKAY) {
16668         sp_3072_from_mp(b, 96, base);
16669         sp_3072_from_mp(e, 96, exp);
16670         sp_3072_from_mp(m, 96, mod);
16671 
16672         err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
16673     }
16674 
16675     if (err == MP_OKAY) {
16676         err = sp_3072_to_mp(r, res);
16677     }
16678 
16679     XMEMSET(e, 0, sizeof(e));
16680 
16681     return err;
16682 }
16683 
16684 /* Perform the modular exponentiation for Diffie-Hellman.
16685  *
16686  * base     Base.
16687  * exp      Array of bytes that is the exponent.
16688  * expLen   Length of data, in bytes, in exponent.
16689  * mod      Modulus.
16690  * out      Buffer to hold big-endian bytes of exponentiation result.
16691  *          Must be at least 384 bytes long.
16692  * outLen   Length, in bytes, of exponentiation result.
16693  * returs 0 on success, MP_READ_E if there are too many bytes in an array
16694  * and MEMORY_E if memory allocation fails.
16695  */
16696 int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
16697     mp_int* mod, byte* out, word32* outLen)
16698 {
16699     int err = MP_OKAY;
16700     sp_digit b[192], e[96], m[96];
16701     sp_digit* r = b;
16702     word32 i;
16703 
16704     if (mp_count_bits(base) > 3072 || expLen > 384 ||
16705                                                    mp_count_bits(mod) != 3072) {
16706         err = MP_READ_E;
16707     }
16708 
16709     if (err == MP_OKAY) {
16710         sp_3072_from_mp(b, 96, base);
16711         sp_3072_from_bin(e, 96, exp, expLen);
16712         sp_3072_from_mp(m, 96, mod);
16713 
16714         err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
16715     }
16716 
16717     if (err == MP_OKAY) {
16718         sp_3072_to_bin(r, out);
16719         *outLen = 384;
16720         for (i=0; i<384 && out[i] == 0; i++) {
16721         }
16722         *outLen -= i;
16723         XMEMMOVE(out, out + i, *outLen);
16724 
16725     }
16726 
16727     XMEMSET(e, 0, sizeof(e));
16728 
16729     return err;
16730 }
16731 
16732 #endif /* WOLFSSL_HAVE_SP_DH */
16733 
16734 #endif /* WOLFSSL_SP_NO_3072 */
16735 
16736 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
16737 #ifdef WOLFSSL_HAVE_SP_ECC
16738 #ifndef WOLFSSL_SP_NO_256
16739 
16740 /* Point structure to use. */
16741 typedef struct sp_point {
16742     sp_digit x[2 * 8];
16743     sp_digit y[2 * 8];
16744     sp_digit z[2 * 8];
16745     int infinity;
16746 } sp_point;
16747 
16748 /* The modulus (prime) of the curve P256. */
16749 static sp_digit p256_mod[8] = {
16750     0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
16751     0x00000001,0xffffffff
16752 };
16753 /* The Montogmery normalizer for modulus of the curve P256. */
16754 static sp_digit p256_norm_mod[8] = {
16755     0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
16756     0xfffffffe,0x00000000
16757 };
16758 /* The Montogmery multiplier for modulus of the curve P256. */
16759 static sp_digit p256_mp_mod = 0x00000001;
16760 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
16761                                             defined(HAVE_ECC_VERIFY)
16762 /* The order of the curve P256. */
16763 static sp_digit p256_order[8] = {
16764     0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
16765     0x00000000,0xffffffff
16766 };
16767 #endif
16768 /* The order of the curve P256 minus 2. */
16769 static sp_digit p256_order2[8] = {
16770     0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
16771     0x00000000,0xffffffff
16772 };
16773 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
16774 /* The Montogmery normalizer for order of the curve P256. */
16775 static sp_digit p256_norm_order[8] = {
16776     0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
16777     0xffffffff,0x00000000
16778 };
16779 #endif
16780 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
16781 /* The Montogmery multiplier for order of the curve P256. */
16782 static sp_digit p256_mp_order = 0xee00bc4f;
16783 #endif
16784 /* The base point of curve P256. */
16785 static sp_point p256_base = {
16786     /* X ordinate */
16787     {
16788         0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
16789         0xe12c4247,0x6b17d1f2
16790     },
16791     /* Y ordinate */
16792     {
16793         0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
16794         0xfe1a7f9b,0x4fe342e2
16795     },
16796     /* Z ordinate */
16797     {
16798         0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
16799         0x00000000,0x00000000
16800     },
16801     /* infinity */
16802     0
16803 };
16804 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
16805 static sp_digit p256_b[8] = {
16806     0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
16807     0xaa3a93e7,0x5ac635d8
16808 };
16809 #endif
16810 
16811 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
16812 /* Allocate memory for point and return error. */
16813 #define sp_ecc_point_new(heap, sp, p)                                   \
16814     ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \
16815         MEMORY_E : MP_OKAY
16816 #else
16817 /* Set pointer to data and return no error. */
16818 #define sp_ecc_point_new(heap, sp, p)   ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY
16819 #endif
16820 
16821 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
16822 /* If valid pointer then clear point data if requested and free data. */
16823 #define sp_ecc_point_free(p, clear, heap)     \
16824     do {                                      \
16825         if (p != NULL) {                      \
16826             if (clear)                        \
16827                 XMEMSET(p, 0, sizeof(*p));    \
16828             XFREE(p, heap, DYNAMIC_TYPE_ECC); \
16829         }                                     \
16830     }                                         \
16831     while (0)
16832 #else
16833 /* Clear point data if requested. */
16834 #define sp_ecc_point_free(p, clear, heap) \
16835     do {                                  \
16836         if (clear)                        \
16837             XMEMSET(p, 0, sizeof(*p));    \
16838     }                                     \
16839     while (0)
16840 #endif
16841 
16842 /* Multiply a number by Montogmery normalizer mod modulus (prime).
16843  *
16844  * r  The resulting Montgomery form number.
16845  * a  The number to convert.
16846  * m  The modulus (prime).
16847  */
16848 static int sp_256_mod_mul_norm_8(sp_digit* r, sp_digit* a, sp_digit* m)
16849 {
16850     int64_t t[8];
16851     int64_t a64[8];
16852     int64_t o;
16853 
16854     (void)m;
16855 
16856     a64[0] = a[0];
16857     a64[1] = a[1];
16858     a64[2] = a[2];
16859     a64[3] = a[3];
16860     a64[4] = a[4];
16861     a64[5] = a[5];
16862     a64[6] = a[6];
16863     a64[7] = a[7];
16864 
16865     /*  1  1  0 -1 -1 -1 -1  0 */
16866     t[0] = 0 + a64[0] + a64[1] - a64[3] - a64[4] - a64[5] - a64[6];
16867     /*  0  1  1  0 -1 -1 -1 -1 */
16868     t[1] = 0 + a64[1] + a64[2] - a64[4] - a64[5] - a64[6] - a64[7];
16869     /*  0  0  1  1  0 -1 -1 -1 */
16870     t[2] = 0 + a64[2] + a64[3] - a64[5] - a64[6] - a64[7];
16871     /* -1 -1  0  2  2  1  0 -1 */
16872     t[3] = 0 - a64[0] - a64[1] + 2 * a64[3] + 2 * a64[4] + a64[5] - a64[7];
16873     /*  0 -1 -1  0  2  2  1  0 */
16874     t[4] = 0 - a64[1] - a64[2] + 2 * a64[4] + 2 * a64[5] + a64[6];
16875     /*  0  0 -1 -1  0  2  2  1 */
16876     t[5] = 0 - a64[2] - a64[3] + 2 * a64[5] + 2 * a64[6] + a64[7];
16877     /* -1 -1  0  0  0  1  3  2 */
16878     t[6] = 0 - a64[0] - a64[1] + a64[5] + 3 * a64[6] + 2 * a64[7];
16879     /*  1  0 -1 -1 -1 -1  0  3 */
16880     t[7] = 0 + a64[0] - a64[2] - a64[3] - a64[4] - a64[5] + 3 * a64[7];
16881 
16882     t[1] += t[0] >> 32; t[0] &= 0xffffffff;
16883     t[2] += t[1] >> 32; t[1] &= 0xffffffff;
16884     t[3] += t[2] >> 32; t[2] &= 0xffffffff;
16885     t[4] += t[3] >> 32; t[3] &= 0xffffffff;
16886     t[5] += t[4] >> 32; t[4] &= 0xffffffff;
16887     t[6] += t[5] >> 32; t[5] &= 0xffffffff;
16888     t[7] += t[6] >> 32; t[6] &= 0xffffffff;
16889     o     = t[7] >> 32; t[7] &= 0xffffffff;
16890     t[0] += o;
16891     t[3] -= o;
16892     t[6] -= o;
16893     t[7] += o;
16894     t[1] += t[0] >> 32; t[0] &= 0xffffffff;
16895     t[2] += t[1] >> 32; t[1] &= 0xffffffff;
16896     t[3] += t[2] >> 32; t[2] &= 0xffffffff;
16897     t[4] += t[3] >> 32; t[3] &= 0xffffffff;
16898     t[5] += t[4] >> 32; t[4] &= 0xffffffff;
16899     t[6] += t[5] >> 32; t[5] &= 0xffffffff;
16900     t[7] += t[6] >> 32; t[6] &= 0xffffffff;
16901     r[0] = t[0];
16902     r[1] = t[1];
16903     r[2] = t[2];
16904     r[3] = t[3];
16905     r[4] = t[4];
16906     r[5] = t[5];
16907     r[6] = t[6];
16908     r[7] = t[7];
16909 
16910     return MP_OKAY;
16911 }
16912 
16913 /* Convert an mp_int to an array of sp_digit.
16914  *
16915  * r  A single precision integer.
16916  * a  A multi-precision integer.
16917  */
16918 static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
16919 {
16920 #if DIGIT_BIT == 32
16921     int j;
16922 
16923     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
16924 
16925     for (j = a->used; j < max; j++)
16926         r[j] = 0;
16927 #elif DIGIT_BIT > 32
16928     int i, j = 0, s = 0;
16929 
16930     r[0] = 0;
16931     for (i = 0; i < a->used && j < max; i++) {
16932         r[j] |= a->dp[i] << s;
16933         r[j] &= 0xffffffff;
16934         s = 32 - s;
16935         if (j + 1 >= max)
16936             break;
16937         r[++j] = a->dp[i] >> s;
16938         while (s + 32 <= DIGIT_BIT) {
16939             s += 32;
16940             r[j] &= 0xffffffff;
16941             if (j + 1 >= max)
16942                 break;
16943             if (s < DIGIT_BIT)
16944                 r[++j] = a->dp[i] >> s;
16945             else
16946                 r[++j] = 0;
16947         }
16948         s = DIGIT_BIT - s;
16949     }
16950 
16951     for (j++; j < max; j++)
16952         r[j] = 0;
16953 #else
16954     int i, j = 0, s = 0;
16955 
16956     r[0] = 0;
16957     for (i = 0; i < a->used && j < max; i++) {
16958         r[j] |= ((sp_digit)a->dp[i]) << s;
16959         if (s + DIGIT_BIT >= 32) {
16960             r[j] &= 0xffffffff;
16961             if (j + 1 >= max)
16962                 break;
16963             s = 32 - s;
16964             if (s == DIGIT_BIT) {
16965                 r[++j] = 0;
16966                 s = 0;
16967             }
16968             else {
16969                 r[++j] = a->dp[i] >> s;
16970                 s = DIGIT_BIT - s;
16971             }
16972         }
16973         else
16974             s += DIGIT_BIT;
16975     }
16976 
16977     for (j++; j < max; j++)
16978         r[j] = 0;
16979 #endif
16980 }
16981 
16982 /* Convert a point of type ecc_point to type sp_point.
16983  *
16984  * p   Point of type sp_point (result).
16985  * pm  Point of type ecc_point.
16986  */
16987 static void sp_256_point_from_ecc_point_8(sp_point* p, ecc_point* pm)
16988 {
16989     XMEMSET(p->x, 0, sizeof(p->x));
16990     XMEMSET(p->y, 0, sizeof(p->y));
16991     XMEMSET(p->z, 0, sizeof(p->z));
16992     sp_256_from_mp(p->x, 8, pm->x);
16993     sp_256_from_mp(p->y, 8, pm->y);
16994     sp_256_from_mp(p->z, 8, pm->z);
16995     p->infinity = 0;
16996 }
16997 
16998 /* Convert an array of sp_digit to an mp_int.
16999  *
17000  * a  A single precision integer.
17001  * r  A multi-precision integer.
17002  */
17003 static int sp_256_to_mp(sp_digit* a, mp_int* r)
17004 {
17005     int err;
17006 
17007     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
17008     if (err == MP_OKAY) {
17009 #if DIGIT_BIT == 32
17010         XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
17011         r->used = 8;
17012         mp_clamp(r);
17013 #elif DIGIT_BIT < 32
17014         int i, j = 0, s = 0;
17015 
17016         r->dp[0] = 0;
17017         for (i = 0; i < 8; i++) {
17018             r->dp[j] |= a[i] << s;
17019             r->dp[j] &= (1l << DIGIT_BIT) - 1;
17020             s = DIGIT_BIT - s;
17021             r->dp[++j] = a[i] >> s;
17022             while (s + DIGIT_BIT <= 32) {
17023                 s += DIGIT_BIT;
17024                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
17025                 r->dp[++j] = a[i] >> s;
17026             }
17027             s = 32 - s;
17028         }
17029         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
17030         mp_clamp(r);
17031 #else
17032         int i, j = 0, s = 0;
17033 
17034         r->dp[0] = 0;
17035         for (i = 0; i < 8; i++) {
17036             r->dp[j] |= ((mp_digit)a[i]) << s;
17037             if (s + 32 >= DIGIT_BIT) {
17038     #if DIGIT_BIT < 32
17039                 r->dp[j] &= (1l << DIGIT_BIT) - 1;
17040     #endif
17041                 s = DIGIT_BIT - s;
17042                 r->dp[++j] = a[i] >> s;
17043                 s = 32 - s;
17044             }
17045             else
17046                 s += 32;
17047         }
17048         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
17049         mp_clamp(r);
17050 #endif
17051     }
17052 
17053     return err;
17054 }
17055 
17056 /* Convert a point of type sp_point to type ecc_point.
17057  *
17058  * p   Point of type sp_point.
17059  * pm  Point of type ecc_point (result).
17060  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
17061  * MP_OKAY.
17062  */
17063 static int sp_256_point_to_ecc_point_8(sp_point* p, ecc_point* pm)
17064 {
17065     int err;
17066 
17067     err = sp_256_to_mp(p->x, pm->x);
17068     if (err == MP_OKAY)
17069         err = sp_256_to_mp(p->y, pm->y);
17070     if (err == MP_OKAY)
17071         err = sp_256_to_mp(p->z, pm->z);
17072 
17073     return err;
17074 }
17075 
17076 /* Compare a with b in constant time.
17077  *
17078  * a  A single precision integer.
17079  * b  A single precision integer.
17080  * return -ve, 0 or +ve if a is less than, equal to or greater than b
17081  * respectively.
17082  */
17083 static int32_t sp_256_cmp_8(sp_digit* a, sp_digit* b)
17084 {
17085     sp_digit r = -1;
17086     sp_digit one = 1;
17087 
17088 #ifdef WOLFSSL_SP_SMALL
17089     __asm__ __volatile__ (
17090         "mov    r7, #0\n\t"
17091         "mov    r3, #-1\n\t"
17092         "mov    r6, #28\n\t"
17093         "1:\n\t"
17094         "ldr    r4, [%[a], r6]\n\t"
17095         "ldr    r5, [%[b], r6]\n\t"
17096         "and    r4, r4, r3\n\t"
17097         "and    r5, r5, r3\n\t"
17098         "subs   r4, r4, r5\n\t"
17099         "movhi  %[r], %[one]\n\t"
17100         "movlo  %[r], r3\n\t"
17101         "movne  r3, r7\n\t"
17102         "sub    r6, r6, #4\n\t"
17103         "bcc    1b\n\t"
17104         "eor    %[r], %[r], r3\n\t"
17105         : [r] "+r" (r)
17106         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
17107         : "r2", "r3", "r4", "r5", "r6", "r7"
17108     );
17109 #else
17110     __asm__ __volatile__ (
17111         "mov    r7, #0\n\t"
17112         "mov    r3, #-1\n\t"
17113         "ldr        r4, [%[a], #28]\n\t"
17114         "ldr        r5, [%[b], #28]\n\t"
17115         "and        r4, r4, r3\n\t"
17116         "and        r5, r5, r3\n\t"
17117         "subs   r4, r4, r5\n\t"
17118         "movhi  %[r], %[one]\n\t"
17119         "movlo  %[r], r3\n\t"
17120         "movne  r3, r7\n\t"
17121         "ldr        r4, [%[a], #24]\n\t"
17122         "ldr        r5, [%[b], #24]\n\t"
17123         "and        r4, r4, r3\n\t"
17124         "and        r5, r5, r3\n\t"
17125         "subs   r4, r4, r5\n\t"
17126         "movhi  %[r], %[one]\n\t"
17127         "movlo  %[r], r3\n\t"
17128         "movne  r3, r7\n\t"
17129         "ldr        r4, [%[a], #20]\n\t"
17130         "ldr        r5, [%[b], #20]\n\t"
17131         "and        r4, r4, r3\n\t"
17132         "and        r5, r5, r3\n\t"
17133         "subs   r4, r4, r5\n\t"
17134         "movhi  %[r], %[one]\n\t"
17135         "movlo  %[r], r3\n\t"
17136         "movne  r3, r7\n\t"
17137         "ldr        r4, [%[a], #16]\n\t"
17138         "ldr        r5, [%[b], #16]\n\t"
17139         "and        r4, r4, r3\n\t"
17140         "and        r5, r5, r3\n\t"
17141         "subs   r4, r4, r5\n\t"
17142         "movhi  %[r], %[one]\n\t"
17143         "movlo  %[r], r3\n\t"
17144         "movne  r3, r7\n\t"
17145         "ldr        r4, [%[a], #12]\n\t"
17146         "ldr        r5, [%[b], #12]\n\t"
17147         "and        r4, r4, r3\n\t"
17148         "and        r5, r5, r3\n\t"
17149         "subs   r4, r4, r5\n\t"
17150         "movhi  %[r], %[one]\n\t"
17151         "movlo  %[r], r3\n\t"
17152         "movne  r3, r7\n\t"
17153         "ldr        r4, [%[a], #8]\n\t"
17154         "ldr        r5, [%[b], #8]\n\t"
17155         "and        r4, r4, r3\n\t"
17156         "and        r5, r5, r3\n\t"
17157         "subs   r4, r4, r5\n\t"
17158         "movhi  %[r], %[one]\n\t"
17159         "movlo  %[r], r3\n\t"
17160         "movne  r3, r7\n\t"
17161         "ldr        r4, [%[a], #4]\n\t"
17162         "ldr        r5, [%[b], #4]\n\t"
17163         "and        r4, r4, r3\n\t"
17164         "and        r5, r5, r3\n\t"
17165         "subs   r4, r4, r5\n\t"
17166         "movhi  %[r], %[one]\n\t"
17167         "movlo  %[r], r3\n\t"
17168         "movne  r3, r7\n\t"
17169         "ldr        r4, [%[a], #0]\n\t"
17170         "ldr        r5, [%[b], #0]\n\t"
17171         "and        r4, r4, r3\n\t"
17172         "and        r5, r5, r3\n\t"
17173         "subs   r4, r4, r5\n\t"
17174         "movhi  %[r], %[one]\n\t"
17175         "movlo  %[r], r3\n\t"
17176         "movne  r3, r7\n\t"
17177         "eor    %[r], %[r], r3\n\t"
17178         : [r] "+r" (r)
17179         : [a] "r" (a), [b] "r" (b), [one] "r" (one)
17180         : "r2", "r3", "r4", "r5", "r6", "r7"
17181     );
17182 #endif
17183 
17184     return r;
17185 }
17186 
17187 /* Normalize the values in each word to 32.
17188  *
17189  * a  Array of sp_digit to normalize.
17190  */
17191 #define sp_256_norm_8(a)
17192 
17193 /* Conditionally subtract b from a using the mask m.
17194  * m is -1 to subtract and 0 when not copying.
17195  *
17196  * r  A single precision number representing condition subtract result.
17197  * a  A single precision number to subtract from.
17198  * b  A single precision number to subtract.
17199  * m  Mask value to apply.
17200  */
17201 static sp_digit sp_256_cond_sub_8(sp_digit* r, sp_digit* a, sp_digit* b,
17202         sp_digit m)
17203 {
17204     sp_digit c = 0;
17205 
17206 #ifdef WOLFSSL_SP_SMALL
17207     __asm__ __volatile__ (
17208         "mov    r9, #0\n\t"
17209         "mov    r8, #0\n\t"
17210         "1:\n\t"
17211         "subs   %[c], r9, %[c]\n\t"
17212         "ldr    r4, [%[a], r8]\n\t"
17213         "ldr    r5, [%[b], r8]\n\t"
17214         "and    r5, r5, %[m]\n\t"
17215         "sbcs   r4, r4, r5\n\t"
17216         "sbc    %[c], r9, r9\n\t"
17217         "str    r4, [%[r], r8]\n\t"
17218         "add    r8, r8, #4\n\t"
17219         "cmp    r8, #32\n\t"
17220         "blt    1b\n\t"
17221         : [c] "+r" (c)
17222         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
17223         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
17224     );
17225 #else
17226     __asm__ __volatile__ (
17227 
17228         "mov    r9, #0\n\t"
17229         "ldr    r4, [%[a], #0]\n\t"
17230         "ldr    r6, [%[a], #4]\n\t"
17231         "ldr    r5, [%[b], #0]\n\t"
17232         "ldr    r7, [%[b], #4]\n\t"
17233         "and    r5, r5, %[m]\n\t"
17234         "and    r7, r7, %[m]\n\t"
17235         "subs   r4, r4, r5\n\t"
17236         "sbcs   r6, r6, r7\n\t"
17237         "str    r4, [%[r], #0]\n\t"
17238         "str    r6, [%[r], #4]\n\t"
17239         "ldr    r4, [%[a], #8]\n\t"
17240         "ldr    r6, [%[a], #12]\n\t"
17241         "ldr    r5, [%[b], #8]\n\t"
17242         "ldr    r7, [%[b], #12]\n\t"
17243         "and    r5, r5, %[m]\n\t"
17244         "and    r7, r7, %[m]\n\t"
17245         "sbcs   r4, r4, r5\n\t"
17246         "sbcs   r6, r6, r7\n\t"
17247         "str    r4, [%[r], #8]\n\t"
17248         "str    r6, [%[r], #12]\n\t"
17249         "ldr    r4, [%[a], #16]\n\t"
17250         "ldr    r6, [%[a], #20]\n\t"
17251         "ldr    r5, [%[b], #16]\n\t"
17252         "ldr    r7, [%[b], #20]\n\t"
17253         "and    r5, r5, %[m]\n\t"
17254         "and    r7, r7, %[m]\n\t"
17255         "sbcs   r4, r4, r5\n\t"
17256         "sbcs   r6, r6, r7\n\t"
17257         "str    r4, [%[r], #16]\n\t"
17258         "str    r6, [%[r], #20]\n\t"
17259         "ldr    r4, [%[a], #24]\n\t"
17260         "ldr    r6, [%[a], #28]\n\t"
17261         "ldr    r5, [%[b], #24]\n\t"
17262         "ldr    r7, [%[b], #28]\n\t"
17263         "and    r5, r5, %[m]\n\t"
17264         "and    r7, r7, %[m]\n\t"
17265         "sbcs   r4, r4, r5\n\t"
17266         "sbcs   r6, r6, r7\n\t"
17267         "str    r4, [%[r], #24]\n\t"
17268         "str    r6, [%[r], #28]\n\t"
17269         "sbc    %[c], r9, r9\n\t"
17270         : [c] "+r" (c)
17271         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
17272         : "memory", "r4", "r6", "r5", "r7", "r8", "r9"
17273     );
17274 #endif /* WOLFSSL_SP_SMALL */
17275 
17276     return c;
17277 }
17278 
17279 /* Reduce the number back to 256 bits using Montgomery reduction.
17280  *
17281  * a   A single precision number to reduce in place.
17282  * m   The single precision number representing the modulus.
17283  * mp  The digit representing the negative inverse of m mod 2^n.
17284  */
17285 SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m,
17286         sp_digit mp)
17287 {
17288     sp_digit ca = 0;
17289 
17290     __asm__ __volatile__ (
17291         "# i = 0\n\t"
17292         "mov    r12, #0\n\t"
17293         "ldr    r10, [%[a], #0]\n\t"
17294         "ldr    r14, [%[a], #4]\n\t"
17295         "\n1:\n\t"
17296         "# mu = a[i] * mp\n\t"
17297         "mul    r8, %[mp], r10\n\t"
17298         "# a[i+0] += m[0] * mu\n\t"
17299         "ldr    r7, [%[m], #0]\n\t"
17300         "ldr    r9, [%[a], #0]\n\t"
17301         "umull  r6, r7, r8, r7\n\t"
17302         "adds   r10, r10, r6\n\t"
17303         "adc    r5, r7, #0\n\t"
17304         "# a[i+1] += m[1] * mu\n\t"
17305         "ldr    r7, [%[m], #4]\n\t"
17306         "ldr    r9, [%[a], #4]\n\t"
17307         "umull  r6, r7, r8, r7\n\t"
17308         "adds   r10, r14, r6\n\t"
17309         "adc    r4, r7, #0\n\t"
17310         "adds   r10, r10, r5\n\t"
17311         "adc    r4, r4, #0\n\t"
17312         "# a[i+2] += m[2] * mu\n\t"
17313         "ldr    r7, [%[m], #8]\n\t"
17314         "ldr    r14, [%[a], #8]\n\t"
17315         "umull  r6, r7, r8, r7\n\t"
17316         "adds   r14, r14, r6\n\t"
17317         "adc    r5, r7, #0\n\t"
17318         "adds   r14, r14, r4\n\t"
17319         "adc    r5, r5, #0\n\t"
17320         "# a[i+3] += m[3] * mu\n\t"
17321         "ldr    r7, [%[m], #12]\n\t"
17322         "ldr    r9, [%[a], #12]\n\t"
17323         "umull  r6, r7, r8, r7\n\t"
17324         "adds   r9, r9, r6\n\t"
17325         "adc    r4, r7, #0\n\t"
17326         "adds   r9, r9, r5\n\t"
17327         "str    r9, [%[a], #12]\n\t"
17328         "adc    r4, r4, #0\n\t"
17329         "# a[i+4] += m[4] * mu\n\t"
17330         "ldr    r7, [%[m], #16]\n\t"
17331         "ldr    r9, [%[a], #16]\n\t"
17332         "umull  r6, r7, r8, r7\n\t"
17333         "adds   r9, r9, r6\n\t"
17334         "adc    r5, r7, #0\n\t"
17335         "adds   r9, r9, r4\n\t"
17336         "str    r9, [%[a], #16]\n\t"
17337         "adc    r5, r5, #0\n\t"
17338         "# a[i+5] += m[5] * mu\n\t"
17339         "ldr    r7, [%[m], #20]\n\t"
17340         "ldr    r9, [%[a], #20]\n\t"
17341         "umull  r6, r7, r8, r7\n\t"
17342         "adds   r9, r9, r6\n\t"
17343         "adc    r4, r7, #0\n\t"
17344         "adds   r9, r9, r5\n\t"
17345         "str    r9, [%[a], #20]\n\t"
17346         "adc    r4, r4, #0\n\t"
17347         "# a[i+6] += m[6] * mu\n\t"
17348         "ldr    r7, [%[m], #24]\n\t"
17349         "ldr    r9, [%[a], #24]\n\t"
17350         "umull  r6, r7, r8, r7\n\t"
17351         "adds   r9, r9, r6\n\t"
17352         "adc    r5, r7, #0\n\t"
17353         "adds   r9, r9, r4\n\t"
17354         "str    r9, [%[a], #24]\n\t"
17355         "adc    r5, r5, #0\n\t"
17356         "# a[i+7] += m[7] * mu\n\t"
17357         "ldr    r7, [%[m], #28]\n\t"
17358         "ldr   r9, [%[a], #28]\n\t"
17359         "umull  r6, r7, r8, r7\n\t"
17360         "adds   r5, r5, r6\n\t"
17361         "adcs   r7, r7, %[ca]\n\t"
17362         "mov    %[ca], #0\n\t"
17363         "adc    %[ca], %[ca], %[ca]\n\t"
17364         "adds   r9, r9, r5\n\t"
17365         "str    r9, [%[a], #28]\n\t"
17366         "ldr    r9, [%[a], #32]\n\t"
17367         "adcs   r9, r9, r7\n\t"
17368         "str    r9, [%[a], #32]\n\t"
17369         "adc    %[ca], %[ca], #0\n\t"
17370         "# i += 1\n\t"
17371         "add    %[a], %[a], #4\n\t"
17372         "add    r12, r12, #4\n\t"
17373         "cmp    r12, #32\n\t"
17374         "blt    1b\n\t"
17375         "str    r10, [%[a], #0]\n\t"
17376         "str    r14, [%[a], #4]\n\t"
17377         : [ca] "+r" (ca), [a] "+r" (a)
17378         : [m] "r" (m), [mp] "r" (mp)
17379         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
17380     );
17381 
17382     sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
17383 }
17384 
17385 /* Multiply two Montogmery form numbers mod the modulus (prime).
17386  * (r = a * b mod m)
17387  *
17388  * r   Result of multiplication.
17389  * a   First number to multiply in Montogmery form.
17390  * b   Second number to multiply in Montogmery form.
17391  * m   Modulus (prime).
17392  * mp  Montogmery mulitplier.
17393  */
17394 SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, sp_digit* a, sp_digit* b,
17395         sp_digit* m, sp_digit mp)
17396 {
17397     sp_digit tmp[9];
17398 
17399     (void)mp;
17400     (void)m;
17401 
17402     __asm__ __volatile__ (
17403         "mov    r5, #0\n\t"
17404         "#  A[0] * B[0]\n\t"
17405         "ldr    r6, [%[a], #0]\n\t"
17406         "ldr    r7, [%[b], #0]\n\t"
17407         "umull  r8, r9, r6, r7\n\t"
17408         "str    r8, [%[tmp], #0]\n\t"
17409         "#  A[0] * B[1]\n\t"
17410         "ldr    r6, [%[a], #0]\n\t"
17411         "ldr    r7, [%[b], #4]\n\t"
17412         "umull  r3, r4, r6, r7\n\t"
17413         "adds   r9, r3, r9\n\t"
17414         "adc    r10, r4, #0\n\t"
17415         "#  A[1] * B[0]\n\t"
17416         "ldr    r6, [%[a], #4]\n\t"
17417         "ldr    r7, [%[b], #0]\n\t"
17418         "umull  r3, r4, r6, r7\n\t"
17419         "adds   r9, r3, r9\n\t"
17420         "adcs   r10, r4, r10\n\t"
17421         "adc    r14, r5, #0\n\t"
17422         "str    r9, [%[tmp], #4]\n\t"
17423         "#  A[0] * B[2]\n\t"
17424         "ldr    r6, [%[a], #0]\n\t"
17425         "ldr    r7, [%[b], #8]\n\t"
17426         "umull  r3, r4, r6, r7\n\t"
17427         "adds   r10, r3, r10\n\t"
17428         "adc    r14, r4, r14\n\t"
17429         "#  A[1] * B[1]\n\t"
17430         "ldr    r6, [%[a], #4]\n\t"
17431         "ldr    r7, [%[b], #4]\n\t"
17432         "umull  r3, r4, r6, r7\n\t"
17433         "adds   r10, r3, r10\n\t"
17434         "adcs   r14, r4, r14\n\t"
17435         "adc    r8, r5, #0\n\t"
17436         "#  A[2] * B[0]\n\t"
17437         "ldr    r6, [%[a], #8]\n\t"
17438         "ldr    r7, [%[b], #0]\n\t"
17439         "umull  r3, r4, r6, r7\n\t"
17440         "adds   r10, r3, r10\n\t"
17441         "adcs   r14, r4, r14\n\t"
17442         "adc    r8, r5, r8\n\t"
17443         "str    r10, [%[tmp], #8]\n\t"
17444         "#  A[0] * B[3]\n\t"
17445         "ldr    r6, [%[a], #0]\n\t"
17446         "ldr    r7, [%[b], #12]\n\t"
17447         "umull  r3, r4, r6, r7\n\t"
17448         "adds   r14, r3, r14\n\t"
17449         "adcs   r8, r4, r8\n\t"
17450         "adc    r9, r5, #0\n\t"
17451         "#  A[1] * B[2]\n\t"
17452         "ldr    r6, [%[a], #4]\n\t"
17453         "ldr    r7, [%[b], #8]\n\t"
17454         "umull  r3, r4, r6, r7\n\t"
17455         "adds   r14, r3, r14\n\t"
17456         "adcs   r8, r4, r8\n\t"
17457         "adc    r9, r5, r9\n\t"
17458         "#  A[2] * B[1]\n\t"
17459         "ldr    r6, [%[a], #8]\n\t"
17460         "ldr    r7, [%[b], #4]\n\t"
17461         "umull  r3, r4, r6, r7\n\t"
17462         "adds   r14, r3, r14\n\t"
17463         "adcs   r8, r4, r8\n\t"
17464         "adc    r9, r5, r9\n\t"
17465         "#  A[3] * B[0]\n\t"
17466         "ldr    r6, [%[a], #12]\n\t"
17467         "ldr    r7, [%[b], #0]\n\t"
17468         "umull  r3, r4, r6, r7\n\t"
17469         "adds   r14, r3, r14\n\t"
17470         "adcs   r8, r4, r8\n\t"
17471         "adc    r9, r5, r9\n\t"
17472         "str    r14, [%[tmp], #12]\n\t"
17473         "#  A[0] * B[4]\n\t"
17474         "ldr    r6, [%[a], #0]\n\t"
17475         "ldr    r7, [%[b], #16]\n\t"
17476         "umull  r3, r4, r6, r7\n\t"
17477         "adds   r8, r3, r8\n\t"
17478         "adcs   r9, r4, r9\n\t"
17479         "adc    r10, r5, #0\n\t"
17480         "#  A[1] * B[3]\n\t"
17481         "ldr    r6, [%[a], #4]\n\t"
17482         "ldr    r7, [%[b], #12]\n\t"
17483         "umull  r3, r4, r6, r7\n\t"
17484         "adds   r8, r3, r8\n\t"
17485         "adcs   r9, r4, r9\n\t"
17486         "adc    r10, r5, r10\n\t"
17487         "#  A[2] * B[2]\n\t"
17488         "ldr    r6, [%[a], #8]\n\t"
17489         "ldr    r7, [%[b], #8]\n\t"
17490         "umull  r3, r4, r6, r7\n\t"
17491         "adds   r8, r3, r8\n\t"
17492         "adcs   r9, r4, r9\n\t"
17493         "adc    r10, r5, r10\n\t"
17494         "#  A[3] * B[1]\n\t"
17495         "ldr    r6, [%[a], #12]\n\t"
17496         "ldr    r7, [%[b], #4]\n\t"
17497         "umull  r3, r4, r6, r7\n\t"
17498         "adds   r8, r3, r8\n\t"
17499         "adcs   r9, r4, r9\n\t"
17500         "adc    r10, r5, r10\n\t"
17501         "#  A[4] * B[0]\n\t"
17502         "ldr    r6, [%[a], #16]\n\t"
17503         "ldr    r7, [%[b], #0]\n\t"
17504         "umull  r3, r4, r6, r7\n\t"
17505         "adds   r8, r3, r8\n\t"
17506         "adcs   r9, r4, r9\n\t"
17507         "adc    r10, r5, r10\n\t"
17508         "str    r8, [%[tmp], #16]\n\t"
17509         "#  A[0] * B[5]\n\t"
17510         "ldr    r6, [%[a], #0]\n\t"
17511         "ldr    r7, [%[b], #20]\n\t"
17512         "umull  r3, r4, r6, r7\n\t"
17513         "adds   r9, r3, r9\n\t"
17514         "adcs   r10, r4, r10\n\t"
17515         "adc    r14, r5, #0\n\t"
17516         "#  A[1] * B[4]\n\t"
17517         "ldr    r6, [%[a], #4]\n\t"
17518         "ldr    r7, [%[b], #16]\n\t"
17519         "umull  r3, r4, r6, r7\n\t"
17520         "adds   r9, r3, r9\n\t"
17521         "adcs   r10, r4, r10\n\t"
17522         "adc    r14, r5, r14\n\t"
17523         "#  A[2] * B[3]\n\t"
17524         "ldr    r6, [%[a], #8]\n\t"
17525         "ldr    r7, [%[b], #12]\n\t"
17526         "umull  r3, r4, r6, r7\n\t"
17527         "adds   r9, r3, r9\n\t"
17528         "adcs   r10, r4, r10\n\t"
17529         "adc    r14, r5, r14\n\t"
17530         "#  A[3] * B[2]\n\t"
17531         "ldr    r6, [%[a], #12]\n\t"
17532         "ldr    r7, [%[b], #8]\n\t"
17533         "umull  r3, r4, r6, r7\n\t"
17534         "adds   r9, r3, r9\n\t"
17535         "adcs   r10, r4, r10\n\t"
17536         "adc    r14, r5, r14\n\t"
17537         "#  A[4] * B[1]\n\t"
17538         "ldr    r6, [%[a], #16]\n\t"
17539         "ldr    r7, [%[b], #4]\n\t"
17540         "umull  r3, r4, r6, r7\n\t"
17541         "adds   r9, r3, r9\n\t"
17542         "adcs   r10, r4, r10\n\t"
17543         "adc    r14, r5, r14\n\t"
17544         "#  A[5] * B[0]\n\t"
17545         "ldr    r6, [%[a], #20]\n\t"
17546         "ldr    r7, [%[b], #0]\n\t"
17547         "umull  r3, r4, r6, r7\n\t"
17548         "adds   r9, r3, r9\n\t"
17549         "adcs   r10, r4, r10\n\t"
17550         "adc    r14, r5, r14\n\t"
17551         "str    r9, [%[tmp], #20]\n\t"
17552         "#  A[0] * B[6]\n\t"
17553         "ldr    r6, [%[a], #0]\n\t"
17554         "ldr    r7, [%[b], #24]\n\t"
17555         "umull  r3, r4, r6, r7\n\t"
17556         "adds   r10, r3, r10\n\t"
17557         "adcs   r14, r4, r14\n\t"
17558         "adc    r8, r5, #0\n\t"
17559         "#  A[1] * B[5]\n\t"
17560         "ldr    r6, [%[a], #4]\n\t"
17561         "ldr    r7, [%[b], #20]\n\t"
17562         "umull  r3, r4, r6, r7\n\t"
17563         "adds   r10, r3, r10\n\t"
17564         "adcs   r14, r4, r14\n\t"
17565         "adc    r8, r5, r8\n\t"
17566         "#  A[2] * B[4]\n\t"
17567         "ldr    r6, [%[a], #8]\n\t"
17568         "ldr    r7, [%[b], #16]\n\t"
17569         "umull  r3, r4, r6, r7\n\t"
17570         "adds   r10, r3, r10\n\t"
17571         "adcs   r14, r4, r14\n\t"
17572         "adc    r8, r5, r8\n\t"
17573         "#  A[3] * B[3]\n\t"
17574         "ldr    r6, [%[a], #12]\n\t"
17575         "ldr    r7, [%[b], #12]\n\t"
17576         "umull  r3, r4, r6, r7\n\t"
17577         "adds   r10, r3, r10\n\t"
17578         "adcs   r14, r4, r14\n\t"
17579         "adc    r8, r5, r8\n\t"
17580         "#  A[4] * B[2]\n\t"
17581         "ldr    r6, [%[a], #16]\n\t"
17582         "ldr    r7, [%[b], #8]\n\t"
17583         "umull  r3, r4, r6, r7\n\t"
17584         "adds   r10, r3, r10\n\t"
17585         "adcs   r14, r4, r14\n\t"
17586         "adc    r8, r5, r8\n\t"
17587         "#  A[5] * B[1]\n\t"
17588         "ldr    r6, [%[a], #20]\n\t"
17589         "ldr    r7, [%[b], #4]\n\t"
17590         "umull  r3, r4, r6, r7\n\t"
17591         "adds   r10, r3, r10\n\t"
17592         "adcs   r14, r4, r14\n\t"
17593         "adc    r8, r5, r8\n\t"
17594         "#  A[6] * B[0]\n\t"
17595         "ldr    r6, [%[a], #24]\n\t"
17596         "ldr    r7, [%[b], #0]\n\t"
17597         "umull  r3, r4, r6, r7\n\t"
17598         "adds   r10, r3, r10\n\t"
17599         "adcs   r14, r4, r14\n\t"
17600         "adc    r8, r5, r8\n\t"
17601         "str    r10, [%[tmp], #24]\n\t"
17602         "#  A[0] * B[7]\n\t"
17603         "ldr    r6, [%[a], #0]\n\t"
17604         "ldr    r7, [%[b], #28]\n\t"
17605         "umull  r3, r4, r6, r7\n\t"
17606         "adds   r14, r3, r14\n\t"
17607         "adcs   r8, r4, r8\n\t"
17608         "adc    r9, r5, #0\n\t"
17609         "#  A[1] * B[6]\n\t"
17610         "ldr    r6, [%[a], #4]\n\t"
17611         "ldr    r7, [%[b], #24]\n\t"
17612         "umull  r3, r4, r6, r7\n\t"
17613         "adds   r14, r3, r14\n\t"
17614         "adcs   r8, r4, r8\n\t"
17615         "adc    r9, r5, r9\n\t"
17616         "#  A[2] * B[5]\n\t"
17617         "ldr    r6, [%[a], #8]\n\t"
17618         "ldr    r7, [%[b], #20]\n\t"
17619         "umull  r3, r4, r6, r7\n\t"
17620         "adds   r14, r3, r14\n\t"
17621         "adcs   r8, r4, r8\n\t"
17622         "adc    r9, r5, r9\n\t"
17623         "#  A[3] * B[4]\n\t"
17624         "ldr    r6, [%[a], #12]\n\t"
17625         "ldr    r7, [%[b], #16]\n\t"
17626         "umull  r3, r4, r6, r7\n\t"
17627         "adds   r14, r3, r14\n\t"
17628         "adcs   r8, r4, r8\n\t"
17629         "adc    r9, r5, r9\n\t"
17630         "#  A[4] * B[3]\n\t"
17631         "ldr    r6, [%[a], #16]\n\t"
17632         "ldr    r7, [%[b], #12]\n\t"
17633         "umull  r3, r4, r6, r7\n\t"
17634         "adds   r14, r3, r14\n\t"
17635         "adcs   r8, r4, r8\n\t"
17636         "adc    r9, r5, r9\n\t"
17637         "#  A[5] * B[2]\n\t"
17638         "ldr    r6, [%[a], #20]\n\t"
17639         "ldr    r7, [%[b], #8]\n\t"
17640         "umull  r3, r4, r6, r7\n\t"
17641         "adds   r14, r3, r14\n\t"
17642         "adcs   r8, r4, r8\n\t"
17643         "adc    r9, r5, r9\n\t"
17644         "#  A[6] * B[1]\n\t"
17645         "ldr    r6, [%[a], #24]\n\t"
17646         "ldr    r7, [%[b], #4]\n\t"
17647         "umull  r3, r4, r6, r7\n\t"
17648         "adds   r14, r3, r14\n\t"
17649         "adcs   r8, r4, r8\n\t"
17650         "adc    r9, r5, r9\n\t"
17651         "#  A[7] * B[0]\n\t"
17652         "ldr    r6, [%[a], #28]\n\t"
17653         "ldr    r7, [%[b], #0]\n\t"
17654         "umull  r3, r4, r6, r7\n\t"
17655         "adds   r14, r3, r14\n\t"
17656         "adcs   r8, r4, r8\n\t"
17657         "adc    r9, r5, r9\n\t"
17658         "str    r14, [%[tmp], #28]\n\t"
17659         "#  A[1] * B[7]\n\t"
17660         "ldr    r6, [%[a], #4]\n\t"
17661         "ldr    r7, [%[b], #28]\n\t"
17662         "umull  r3, r4, r6, r7\n\t"
17663         "adds   r8, r3, r8\n\t"
17664         "adcs   r9, r4, r9\n\t"
17665         "adc    r10, r5, #0\n\t"
17666         "#  A[2] * B[6]\n\t"
17667         "ldr    r6, [%[a], #8]\n\t"
17668         "ldr    r7, [%[b], #24]\n\t"
17669         "umull  r3, r4, r6, r7\n\t"
17670         "adds   r8, r3, r8\n\t"
17671         "adcs   r9, r4, r9\n\t"
17672         "adc    r10, r5, r10\n\t"
17673         "#  A[3] * B[5]\n\t"
17674         "ldr    r6, [%[a], #12]\n\t"
17675         "ldr    r7, [%[b], #20]\n\t"
17676         "umull  r3, r4, r6, r7\n\t"
17677         "adds   r8, r3, r8\n\t"
17678         "adcs   r9, r4, r9\n\t"
17679         "adc    r10, r5, r10\n\t"
17680         "#  A[4] * B[4]\n\t"
17681         "ldr    r6, [%[a], #16]\n\t"
17682         "ldr    r7, [%[b], #16]\n\t"
17683         "umull  r3, r4, r6, r7\n\t"
17684         "adds   r8, r3, r8\n\t"
17685         "adcs   r9, r4, r9\n\t"
17686         "adc    r10, r5, r10\n\t"
17687         "#  A[5] * B[3]\n\t"
17688         "ldr    r6, [%[a], #20]\n\t"
17689         "ldr    r7, [%[b], #12]\n\t"
17690         "umull  r3, r4, r6, r7\n\t"
17691         "adds   r8, r3, r8\n\t"
17692         "adcs   r9, r4, r9\n\t"
17693         "adc    r10, r5, r10\n\t"
17694         "#  A[6] * B[2]\n\t"
17695         "ldr    r6, [%[a], #24]\n\t"
17696         "ldr    r7, [%[b], #8]\n\t"
17697         "umull  r3, r4, r6, r7\n\t"
17698         "adds   r8, r3, r8\n\t"
17699         "adcs   r9, r4, r9\n\t"
17700         "adc    r10, r5, r10\n\t"
17701         "#  A[7] * B[1]\n\t"
17702         "ldr    r6, [%[a], #28]\n\t"
17703         "ldr    r7, [%[b], #4]\n\t"
17704         "umull  r3, r4, r6, r7\n\t"
17705         "adds   r8, r3, r8\n\t"
17706         "adcs   r9, r4, r9\n\t"
17707         "adc    r10, r5, r10\n\t"
17708         "str    r8, [%[r], #0]\n\t"
17709         "#  A[2] * B[7]\n\t"
17710         "ldr    r6, [%[a], #8]\n\t"
17711         "ldr    r7, [%[b], #28]\n\t"
17712         "umull  r3, r4, r6, r7\n\t"
17713         "adds   r9, r3, r9\n\t"
17714         "adcs   r10, r4, r10\n\t"
17715         "adc    r14, r5, #0\n\t"
17716         "#  A[3] * B[6]\n\t"
17717         "ldr    r6, [%[a], #12]\n\t"
17718         "ldr    r7, [%[b], #24]\n\t"
17719         "umull  r3, r4, r6, r7\n\t"
17720         "adds   r9, r3, r9\n\t"
17721         "adcs   r10, r4, r10\n\t"
17722         "adc    r14, r5, r14\n\t"
17723         "#  A[4] * B[5]\n\t"
17724         "ldr    r6, [%[a], #16]\n\t"
17725         "ldr    r7, [%[b], #20]\n\t"
17726         "umull  r3, r4, r6, r7\n\t"
17727         "adds   r9, r3, r9\n\t"
17728         "adcs   r10, r4, r10\n\t"
17729         "adc    r14, r5, r14\n\t"
17730         "#  A[5] * B[4]\n\t"
17731         "ldr    r6, [%[a], #20]\n\t"
17732         "ldr    r7, [%[b], #16]\n\t"
17733         "umull  r3, r4, r6, r7\n\t"
17734         "adds   r9, r3, r9\n\t"
17735         "adcs   r10, r4, r10\n\t"
17736         "adc    r14, r5, r14\n\t"
17737         "#  A[6] * B[3]\n\t"
17738         "ldr    r6, [%[a], #24]\n\t"
17739         "ldr    r7, [%[b], #12]\n\t"
17740         "umull  r3, r4, r6, r7\n\t"
17741         "adds   r9, r3, r9\n\t"
17742         "adcs   r10, r4, r10\n\t"
17743         "adc    r14, r5, r14\n\t"
17744         "#  A[7] * B[2]\n\t"
17745         "ldr    r6, [%[a], #28]\n\t"
17746         "ldr    r7, [%[b], #8]\n\t"
17747         "umull  r3, r4, r6, r7\n\t"
17748         "adds   r9, r3, r9\n\t"
17749         "adcs   r10, r4, r10\n\t"
17750         "adc    r14, r5, r14\n\t"
17751         "str    r9, [%[r], #4]\n\t"
17752         "#  A[3] * B[7]\n\t"
17753         "ldr    r6, [%[a], #12]\n\t"
17754         "ldr    r7, [%[b], #28]\n\t"
17755         "umull  r3, r4, r6, r7\n\t"
17756         "adds   r10, r3, r10\n\t"
17757         "adcs   r14, r4, r14\n\t"
17758         "adc    r8, r5, #0\n\t"
17759         "#  A[4] * B[6]\n\t"
17760         "ldr    r6, [%[a], #16]\n\t"
17761         "ldr    r7, [%[b], #24]\n\t"
17762         "umull  r3, r4, r6, r7\n\t"
17763         "adds   r10, r3, r10\n\t"
17764         "adcs   r14, r4, r14\n\t"
17765         "adc    r8, r5, r8\n\t"
17766         "#  A[5] * B[5]\n\t"
17767         "ldr    r6, [%[a], #20]\n\t"
17768         "ldr    r7, [%[b], #20]\n\t"
17769         "umull  r3, r4, r6, r7\n\t"
17770         "adds   r10, r3, r10\n\t"
17771         "adcs   r14, r4, r14\n\t"
17772         "adc    r8, r5, r8\n\t"
17773         "#  A[6] * B[4]\n\t"
17774         "ldr    r6, [%[a], #24]\n\t"
17775         "ldr    r7, [%[b], #16]\n\t"
17776         "umull  r3, r4, r6, r7\n\t"
17777         "adds   r10, r3, r10\n\t"
17778         "adcs   r14, r4, r14\n\t"
17779         "adc    r8, r5, r8\n\t"
17780         "#  A[7] * B[3]\n\t"
17781         "ldr    r6, [%[a], #28]\n\t"
17782         "ldr    r7, [%[b], #12]\n\t"
17783         "umull  r3, r4, r6, r7\n\t"
17784         "adds   r10, r3, r10\n\t"
17785         "adcs   r14, r4, r14\n\t"
17786         "adc    r8, r5, r8\n\t"
17787         "str    r10, [%[r], #8]\n\t"
17788         "#  A[4] * B[7]\n\t"
17789         "ldr    r6, [%[a], #16]\n\t"
17790         "ldr    r7, [%[b], #28]\n\t"
17791         "umull  r3, r4, r6, r7\n\t"
17792         "adds   r14, r3, r14\n\t"
17793         "adcs   r8, r4, r8\n\t"
17794         "adc    r9, r5, #0\n\t"
17795         "#  A[5] * B[6]\n\t"
17796         "ldr    r6, [%[a], #20]\n\t"
17797         "ldr    r7, [%[b], #24]\n\t"
17798         "umull  r3, r4, r6, r7\n\t"
17799         "adds   r14, r3, r14\n\t"
17800         "adcs   r8, r4, r8\n\t"
17801         "adc    r9, r5, r9\n\t"
17802         "#  A[6] * B[5]\n\t"
17803         "ldr    r6, [%[a], #24]\n\t"
17804         "ldr    r7, [%[b], #20]\n\t"
17805         "umull  r3, r4, r6, r7\n\t"
17806         "adds   r14, r3, r14\n\t"
17807         "adcs   r8, r4, r8\n\t"
17808         "adc    r9, r5, r9\n\t"
17809         "#  A[7] * B[4]\n\t"
17810         "ldr    r6, [%[a], #28]\n\t"
17811         "ldr    r7, [%[b], #16]\n\t"
17812         "umull  r3, r4, r6, r7\n\t"
17813         "adds   r14, r3, r14\n\t"
17814         "adcs   r8, r4, r8\n\t"
17815         "adc    r9, r5, r9\n\t"
17816         "str    r14, [%[r], #12]\n\t"
17817         "#  A[5] * B[7]\n\t"
17818         "ldr    r6, [%[a], #20]\n\t"
17819         "ldr    r7, [%[b], #28]\n\t"
17820         "umull  r3, r4, r6, r7\n\t"
17821         "adds   r8, r3, r8\n\t"
17822         "adcs   r9, r4, r9\n\t"
17823         "adc    r10, r5, #0\n\t"
17824         "#  A[6] * B[6]\n\t"
17825         "ldr    r6, [%[a], #24]\n\t"
17826         "ldr    r7, [%[b], #24]\n\t"
17827         "umull  r3, r4, r6, r7\n\t"
17828         "adds   r8, r3, r8\n\t"
17829         "adcs   r9, r4, r9\n\t"
17830         "adc    r10, r5, r10\n\t"
17831         "#  A[7] * B[5]\n\t"
17832         "ldr    r6, [%[a], #28]\n\t"
17833         "ldr    r7, [%[b], #20]\n\t"
17834         "umull  r3, r4, r6, r7\n\t"
17835         "adds   r8, r3, r8\n\t"
17836         "adcs   r9, r4, r9\n\t"
17837         "adc    r10, r5, r10\n\t"
17838         "#  A[6] * B[7]\n\t"
17839         "ldr    r6, [%[a], #24]\n\t"
17840         "ldr    r7, [%[b], #28]\n\t"
17841         "umull  r3, r4, r6, r7\n\t"
17842         "adds   r9, r3, r9\n\t"
17843         "adcs   r10, r4, r10\n\t"
17844         "adc    r14, r5, #0\n\t"
17845         "#  A[7] * B[6]\n\t"
17846         "ldr    r6, [%[a], #28]\n\t"
17847         "ldr    r7, [%[b], #24]\n\t"
17848         "umull  r3, r4, r6, r7\n\t"
17849         "adds   r9, r3, r9\n\t"
17850         "adcs   r10, r4, r10\n\t"
17851         "adc    r14, r5, r14\n\t"
17852         "#  A[7] * B[7]\n\t"
17853         "ldr    r6, [%[a], #28]\n\t"
17854         "ldr    r7, [%[b], #28]\n\t"
17855         "umull  r3, r4, r6, r7\n\t"
17856         "adds   r10, r3, r10\n\t"
17857         "adc    r14, r4, r14\n\t"
17858         "str    r8, [%[r], #16]\n\t"
17859         "str    r9, [%[r], #20]\n\t"
17860         "str    r10, [%[r], #24]\n\t"
17861         "str    r14, [%[r], #28]\n\t"
17862         "# Start Reduction\n\t"
17863         "ldr    r4, [%[tmp], #0]\n\t"
17864         "ldr    r5, [%[tmp], #4]\n\t"
17865         "ldr    r6, [%[tmp], #8]\n\t"
17866         "ldr    r7, [%[tmp], #12]\n\t"
17867         "ldr    r8, [%[tmp], #16]\n\t"
17868         "ldr    r9, [%[tmp], #20]\n\t"
17869         "ldr    r10, [%[tmp], #24]\n\t"
17870         "ldr    r14, [%[tmp], #28]\n\t"
17871         "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
17872         "#    - a[0] << 224\n\t"
17873         "#   + (a[0]-a[1] * 2) << (6 * 32)\n\t"
17874         "adds   r10, r10, r4\n\t"
17875         "adc    r14, r14, r5\n\t"
17876         "adds   r10, r10, r4\n\t"
17877         "adc    r14, r14, r5\n\t"
17878         "#   - a[0] << (7 * 32)\n\t"
17879         "sub    r14, r14, r4\n\t"
17880         "#   + a[0]-a[4] << (3 * 32)\n\t"
17881         "mov    %[a], r7\n\t"
17882         "mov    %[b], r8\n\t"
17883         "adds   r7, r7, r4\n\t"
17884         "adcs   r8, r8, r5\n\t"
17885         "adcs   r9, r9, r6\n\t"
17886         "adcs   r10, r10, %[a]\n\t"
17887         "adc    r14, r14, %[b]\n\t"
17888         "str    r4, [%[tmp], #0]\n\t"
17889         "str    r5, [%[tmp], #4]\n\t"
17890         "str    r6, [%[tmp], #8]\n\t"
17891         "str    r7, [%[tmp], #12]\n\t"
17892         "str    r8, [%[tmp], #16]\n\t"
17893         "str    r9, [%[tmp], #20]\n\t"
17894         "# a += mu * m\n\t"
17895         "#   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
17896         "mov    %[a], #0\n\t"
17897         "# a[6] +=        t[0] + t[3]\n\t"
17898         "ldr    r3, [%[tmp], #24]\n\t"
17899         "adds   r3, r3, r4\n\t"
17900         "adc    %[b], %[a], #0\n\t"
17901         "adds   r3, r3, r7\n\t"
17902         "adc    %[b], %[b], #0\n\t"
17903         "str    r10, [%[tmp], #24]\n\t"
17904         "# a[7] +=        t[1] + t[4]\n\t"
17905         "ldr    r3, [%[tmp], #28]\n\t"
17906         "adds   r3, r3, %[b]\n\t"
17907         "adc    %[b], %[a], #0\n\t"
17908         "adds   r3, r3, r5\n\t"
17909         "adc    %[b], %[b], #0\n\t"
17910         "adds   r3, r3, r8\n\t"
17911         "adc    %[b], %[b], #0\n\t"
17912         "str    r14, [%[tmp], #28]\n\t"
17913         "str    r3, [%[tmp], #32]\n\t"
17914         "# a[8] += t[0] + t[2] + t[5]\n\t"
17915         "ldr    r3, [%[r], #0]\n\t"
17916         "adds   r3, r3, %[b]\n\t"
17917         "adc    %[b], %[a], #0\n\t"
17918         "adds   r3, r3, r4\n\t"
17919         "adc    %[b], %[b], #0\n\t"
17920         "adds   r3, r3, r6\n\t"
17921         "adc    %[b], %[b], #0\n\t"
17922         "adds   r3, r3, r9\n\t"
17923         "adc    %[b], %[b], #0\n\t"
17924         "str    r3, [%[r], #0]\n\t"
17925         "# a[9]  += t[1] + t[3] + t[6]\n\t"
17926         "# a[10] += t[2] + t[4] + t[7]\n\t"
17927         "ldr    r3, [%[r], #4]\n\t"
17928         "ldr    r4, [%[r], #8]\n\t"
17929         "adds   r3, r3, %[b]\n\t"
17930         "adcs   r4, r4, #0\n\t"
17931         "adc    %[b], %[a], #0\n\t"
17932         "adds   r3, r3, r5\n\t"
17933         "adcs   r4, r4, r6\n\t"
17934         "adc    %[b], %[b], #0\n\t"
17935         "adds   r3, r3, r7\n\t"
17936         "adcs   r4, r4, r8\n\t"
17937         "adc    %[b], %[b], #0\n\t"
17938         "adds   r3, r3, r10\n\t"
17939         "adcs   r4, r4, r14\n\t"
17940         "adc    %[b], %[b], #0\n\t"
17941         "str    r3, [%[r], #4]\n\t"
17942         "str    r4, [%[r], #8]\n\t"
17943         "# a[11] += t[3] + t[5]\n\t"
17944         "# a[12] += t[4] + t[6]\n\t"
17945         "# a[13] += t[5] + t[7]\n\t"
17946         "# a[14] += t[6]\n\t"
17947         "ldr    r3, [%[r], #12]\n\t"
17948         "ldr    r4, [%[r], #16]\n\t"
17949         "ldr    r5, [%[r], #20]\n\t"
17950         "ldr    r6, [%[r], #24]\n\t"
17951         "adds   r3, r3, %[b]\n\t"
17952         "adcs   r4, r4, #0\n\t"
17953         "adcs   r5, r5, #0\n\t"
17954         "adcs   r6, r6, #0\n\t"
17955         "adc    %[b], %[a], #0\n\t"
17956         "adds   r3, r3, r7\n\t"
17957         "adcs   r4, r4, r8\n\t"
17958         "adcs   r5, r5, r9\n\t"
17959         "adcs   r6, r6, r10\n\t"
17960         "adc    %[b], %[b], #0\n\t"
17961         "adds   r3, r3, r9\n\t"
17962         "adcs   r4, r4, r10\n\t"
17963         "adcs   r5, r5, r14\n\t"
17964         "adcs   r6, r6, #0\n\t"
17965         "adc    %[b], %[b], #0\n\t"
17966         "str    r3, [%[r], #12]\n\t"
17967         "str    r4, [%[r], #16]\n\t"
17968         "str    r5, [%[r], #20]\n\t"
17969         "str    r6, [%[r], #24]\n\t"
17970         "# a[15] += t[7]\n\t"
17971         "ldr    r3, [%[r], #28]\n\t"
17972         "adds   r3, r3, %[b]\n\t"
17973         "adc    %[b], %[a], #0\n\t"
17974         "adds   r3, r3, r14\n\t"
17975         "adc    %[b], %[b], #0\n\t"
17976         "str    r3, [%[r], #28]\n\t"
17977         "ldr    r3, [%[tmp], #32]\n\t"
17978         "ldr    r4, [%[r], #0]\n\t"
17979         "ldr    r5, [%[r], #4]\n\t"
17980         "ldr    r6, [%[r], #8]\n\t"
17981         "ldr    r8, [%[tmp], #0]\n\t"
17982         "ldr    r9, [%[tmp], #4]\n\t"
17983         "ldr    r10, [%[tmp], #8]\n\t"
17984         "ldr    r14, [%[tmp], #12]\n\t"
17985         "subs   r3, r3, r8\n\t"
17986         "sbcs   r4, r4, r9\n\t"
17987         "sbcs   r5, r5, r10\n\t"
17988         "sbcs   r6, r6, r14\n\t"
17989         "str    r4, [%[r], #0]\n\t"
17990         "str    r5, [%[r], #4]\n\t"
17991         "str    r6, [%[r], #8]\n\t"
17992         "ldr    r3, [%[r], #12]\n\t"
17993         "ldr    r4, [%[r], #16]\n\t"
17994         "ldr    r5, [%[r], #20]\n\t"
17995         "ldr    r6, [%[r], #24]\n\t"
17996         "ldr    r7, [%[r], #28]\n\t"
17997         "ldr    r8, [%[tmp], #16]\n\t"
17998         "ldr    r9, [%[tmp], #20]\n\t"
17999         "ldr    r10, [%[tmp], #24]\n\t"
18000         "ldr    r14, [%[tmp], #28]\n\t"
18001         "sbcs   r3, r3, r8\n\t"
18002         "sbcs   r4, r4, r9\n\t"
18003         "sbcs   r5, r5, r10\n\t"
18004         "sbcs   r6, r6, r14\n\t"
18005         "sbc    r7, r7, #0\n\t"
18006         "str    r3, [%[r], #12]\n\t"
18007         "str    r4, [%[r], #16]\n\t"
18008         "str    r5, [%[r], #20]\n\t"
18009         "str    r6, [%[r], #24]\n\t"
18010         "str    r7, [%[r], #28]\n\t"
18011         "# mask m and sub from result if overflow\n\t"
18012         "sub    %[b], %[a], %[b]\n\t"
18013         "and    %[a], %[b], #1\n\t"
18014         "ldr    r3, [%[r], #0]\n\t"
18015         "ldr    r4, [%[r], #4]\n\t"
18016         "ldr    r5, [%[r], #8]\n\t"
18017         "ldr    r6, [%[r], #12]\n\t"
18018         "ldr    r7, [%[r], #16]\n\t"
18019         "ldr    r8, [%[r], #20]\n\t"
18020         "ldr    r9, [%[r], #24]\n\t"
18021         "ldr    r10, [%[r], #28]\n\t"
18022         "subs   r3, r3, %[b]\n\t"
18023         "sbcs   r4, r4, %[b]\n\t"
18024         "sbcs   r5, r5, %[b]\n\t"
18025         "sbcs   r6, r6, #0\n\t"
18026         "sbcs   r7, r7, #0\n\t"
18027         "sbcs   r8, r8, #0\n\t"
18028         "sbcs   r9, r9, %[a]\n\t"
18029         "sbc    r10, r10, %[b]\n\t"
18030         "str    r3, [%[r], #0]\n\t"
18031         "str    r4, [%[r], #4]\n\t"
18032         "str    r5, [%[r], #8]\n\t"
18033         "str    r6, [%[r], #12]\n\t"
18034         "str    r7, [%[r], #16]\n\t"
18035         "str    r8, [%[r], #20]\n\t"
18036         "str    r9, [%[r], #24]\n\t"
18037         "str    r10, [%[r], #28]\n\t"
18038         : [a] "+r" (a), [b] "+r" (b)
18039         : [r] "r" (r), [tmp] "r" (tmp)
18040         : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7"
18041     );
18042 }
18043 
18044 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
18045  *
18046  * r   Result of squaring.
18047  * a   Number to square in Montogmery form.
18048  * m   Modulus (prime).
18049  * mp  Montogmery mulitplier.
18050  */
18051 SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, sp_digit* a, sp_digit* m,
18052         sp_digit mp)
18053 {
18054     sp_digit tmp[16];
18055 
18056     (void)mp;
18057     (void)m;
18058 
18059     __asm__ __volatile__ (
18060         "mov    r5, #0\n\t"
18061         "#  A[0] * A[1]\n\t"
18062         "ldr    r6, [%[a], #0]\n\t"
18063         "ldr    r7, [%[a], #4]\n\t"
18064         "umull  r9, r10, r6, r7\n\t"
18065         "str    r9, [%[tmp], #4]\n\t"
18066         "#  A[0] * A[2]\n\t"
18067         "ldr    r6, [%[a], #0]\n\t"
18068         "ldr    r7, [%[a], #8]\n\t"
18069         "umull  r3, r4, r6, r7\n\t"
18070         "adds   r10, r3, r10\n\t"
18071         "adc    r14, r4, #0\n\t"
18072         "str    r10, [%[tmp], #8]\n\t"
18073         "#  A[0] * A[3]\n\t"
18074         "ldr    r6, [%[a], #0]\n\t"
18075         "ldr    r7, [%[a], #12]\n\t"
18076         "umull  r3, r4, r6, r7\n\t"
18077         "adds   r14, r3, r14\n\t"
18078         "adc    r8, r4, #0\n\t"
18079         "#  A[1] * A[2]\n\t"
18080         "ldr    r6, [%[a], #4]\n\t"
18081         "ldr    r7, [%[a], #8]\n\t"
18082         "umull  r3, r4, r6, r7\n\t"
18083         "adds   r14, r3, r14\n\t"
18084         "adcs   r8, r4, r8\n\t"
18085         "adc    r9, r5, #0\n\t"
18086         "str    r14, [%[tmp], #12]\n\t"
18087         "#  A[0] * A[4]\n\t"
18088         "ldr    r6, [%[a], #0]\n\t"
18089         "ldr    r7, [%[a], #16]\n\t"
18090         "umull  r3, r4, r6, r7\n\t"
18091         "adds   r8, r3, r8\n\t"
18092         "adc    r9, r4, r9\n\t"
18093         "#  A[1] * A[3]\n\t"
18094         "ldr    r6, [%[a], #4]\n\t"
18095         "ldr    r7, [%[a], #12]\n\t"
18096         "umull  r3, r4, r6, r7\n\t"
18097         "adds   r8, r3, r8\n\t"
18098         "adcs   r9, r4, r9\n\t"
18099         "adc    r10, r5, #0\n\t"
18100         "str    r8, [%[tmp], #16]\n\t"
18101         "#  A[0] * A[5]\n\t"
18102         "ldr    r6, [%[a], #0]\n\t"
18103         "ldr    r7, [%[a], #20]\n\t"
18104         "umull  r3, r4, r6, r7\n\t"
18105         "adds   r9, r3, r9\n\t"
18106         "adc    r10, r4, r10\n\t"
18107         "#  A[1] * A[4]\n\t"
18108         "ldr    r6, [%[a], #4]\n\t"
18109         "ldr    r7, [%[a], #16]\n\t"
18110         "umull  r3, r4, r6, r7\n\t"
18111         "adds   r9, r3, r9\n\t"
18112         "adcs   r10, r4, r10\n\t"
18113         "adc    r14, r5, #0\n\t"
18114         "#  A[2] * A[3]\n\t"
18115         "ldr    r6, [%[a], #8]\n\t"
18116         "ldr    r7, [%[a], #12]\n\t"
18117         "umull  r3, r4, r6, r7\n\t"
18118         "adds   r9, r3, r9\n\t"
18119         "adcs   r10, r4, r10\n\t"
18120         "adc    r14, r5, r14\n\t"
18121         "str    r9, [%[tmp], #20]\n\t"
18122         "#  A[0] * A[6]\n\t"
18123         "ldr    r6, [%[a], #0]\n\t"
18124         "ldr    r7, [%[a], #24]\n\t"
18125         "umull  r3, r4, r6, r7\n\t"
18126         "adds   r10, r3, r10\n\t"
18127         "adcs   r14, r4, r14\n\t"
18128         "adc    r8, r5, #0\n\t"
18129         "#  A[1] * A[5]\n\t"
18130         "ldr    r6, [%[a], #4]\n\t"
18131         "ldr    r7, [%[a], #20]\n\t"
18132         "umull  r3, r4, r6, r7\n\t"
18133         "adds   r10, r3, r10\n\t"
18134         "adcs   r14, r4, r14\n\t"
18135         "adc    r8, r5, r8\n\t"
18136         "#  A[2] * A[4]\n\t"
18137         "ldr    r6, [%[a], #8]\n\t"
18138         "ldr    r7, [%[a], #16]\n\t"
18139         "umull  r3, r4, r6, r7\n\t"
18140         "adds   r10, r3, r10\n\t"
18141         "adcs   r14, r4, r14\n\t"
18142         "adc    r8, r5, r8\n\t"
18143         "str    r10, [%[tmp], #24]\n\t"
18144         "#  A[0] * A[7]\n\t"
18145         "ldr    r6, [%[a], #0]\n\t"
18146         "ldr    r7, [%[a], #28]\n\t"
18147         "umull  r3, r4, r6, r7\n\t"
18148         "adds   r14, r3, r14\n\t"
18149         "adcs   r8, r4, r8\n\t"
18150         "adc    r9, r5, #0\n\t"
18151         "#  A[1] * A[6]\n\t"
18152         "ldr    r6, [%[a], #4]\n\t"
18153         "ldr    r7, [%[a], #24]\n\t"
18154         "umull  r3, r4, r6, r7\n\t"
18155         "adds   r14, r3, r14\n\t"
18156         "adcs   r8, r4, r8\n\t"
18157         "adc    r9, r5, r9\n\t"
18158         "#  A[2] * A[5]\n\t"
18159         "ldr    r6, [%[a], #8]\n\t"
18160         "ldr    r7, [%[a], #20]\n\t"
18161         "umull  r3, r4, r6, r7\n\t"
18162         "adds   r14, r3, r14\n\t"
18163         "adcs   r8, r4, r8\n\t"
18164         "adc    r9, r5, r9\n\t"
18165         "#  A[3] * A[4]\n\t"
18166         "ldr    r6, [%[a], #12]\n\t"
18167         "ldr    r7, [%[a], #16]\n\t"
18168         "umull  r3, r4, r6, r7\n\t"
18169         "adds   r14, r3, r14\n\t"
18170         "adcs   r8, r4, r8\n\t"
18171         "adc    r9, r5, r9\n\t"
18172         "str    r14, [%[tmp], #28]\n\t"
18173         "#  A[1] * A[7]\n\t"
18174         "ldr    r6, [%[a], #4]\n\t"
18175         "ldr    r7, [%[a], #28]\n\t"
18176         "umull  r3, r4, r6, r7\n\t"
18177         "adds   r8, r3, r8\n\t"
18178         "adcs   r9, r4, r9\n\t"
18179         "adc    r10, r5, #0\n\t"
18180         "#  A[2] * A[6]\n\t"
18181         "ldr    r6, [%[a], #8]\n\t"
18182         "ldr    r7, [%[a], #24]\n\t"
18183         "umull  r3, r4, r6, r7\n\t"
18184         "adds   r8, r3, r8\n\t"
18185         "adcs   r9, r4, r9\n\t"
18186         "adc    r10, r5, r10\n\t"
18187         "#  A[3] * A[5]\n\t"
18188         "ldr    r6, [%[a], #12]\n\t"
18189         "ldr    r7, [%[a], #20]\n\t"
18190         "umull  r3, r4, r6, r7\n\t"
18191         "adds   r8, r3, r8\n\t"
18192         "adcs   r9, r4, r9\n\t"
18193         "adc    r10, r5, r10\n\t"
18194         "str    r8, [%[tmp], #32]\n\t"
18195         "#  A[2] * A[7]\n\t"
18196         "ldr    r6, [%[a], #8]\n\t"
18197         "ldr    r7, [%[a], #28]\n\t"
18198         "umull  r3, r4, r6, r7\n\t"
18199         "adds   r9, r3, r9\n\t"
18200         "adcs   r10, r4, r10\n\t"
18201         "adc    r14, r5, #0\n\t"
18202         "#  A[3] * A[6]\n\t"
18203         "ldr    r6, [%[a], #12]\n\t"
18204         "ldr    r7, [%[a], #24]\n\t"
18205         "umull  r3, r4, r6, r7\n\t"
18206         "adds   r9, r3, r9\n\t"
18207         "adcs   r10, r4, r10\n\t"
18208         "adc    r14, r5, r14\n\t"
18209         "#  A[4] * A[5]\n\t"
18210         "ldr    r6, [%[a], #16]\n\t"
18211         "ldr    r7, [%[a], #20]\n\t"
18212         "umull  r3, r4, r6, r7\n\t"
18213         "adds   r9, r3, r9\n\t"
18214         "adcs   r10, r4, r10\n\t"
18215         "adc    r14, r5, r14\n\t"
18216         "str    r9, [%[tmp], #36]\n\t"
18217         "#  A[3] * A[7]\n\t"
18218         "ldr    r6, [%[a], #12]\n\t"
18219         "ldr    r7, [%[a], #28]\n\t"
18220         "umull  r3, r4, r6, r7\n\t"
18221         "adds   r10, r3, r10\n\t"
18222         "adcs   r14, r4, r14\n\t"
18223         "adc    r8, r5, #0\n\t"
18224         "#  A[4] * A[6]\n\t"
18225         "ldr    r6, [%[a], #16]\n\t"
18226         "ldr    r7, [%[a], #24]\n\t"
18227         "umull  r3, r4, r6, r7\n\t"
18228         "adds   r10, r3, r10\n\t"
18229         "adcs   r14, r4, r14\n\t"
18230         "adc    r8, r5, r8\n\t"
18231         "str    r10, [%[tmp], #40]\n\t"
18232         "#  A[4] * A[7]\n\t"
18233         "ldr    r6, [%[a], #16]\n\t"
18234         "ldr    r7, [%[a], #28]\n\t"
18235         "umull  r3, r4, r6, r7\n\t"
18236         "adds   r14, r3, r14\n\t"
18237         "adcs   r8, r4, r8\n\t"
18238         "adc    r9, r5, #0\n\t"
18239         "#  A[5] * A[6]\n\t"
18240         "ldr    r6, [%[a], #20]\n\t"
18241         "ldr    r7, [%[a], #24]\n\t"
18242         "umull  r3, r4, r6, r7\n\t"
18243         "adds   r14, r3, r14\n\t"
18244         "adcs   r8, r4, r8\n\t"
18245         "adc    r9, r5, r9\n\t"
18246         "str    r14, [%[tmp], #44]\n\t"
18247         "#  A[5] * A[7]\n\t"
18248         "ldr    r6, [%[a], #20]\n\t"
18249         "ldr    r7, [%[a], #28]\n\t"
18250         "umull  r3, r4, r6, r7\n\t"
18251         "adds   r8, r3, r8\n\t"
18252         "adcs   r9, r4, r9\n\t"
18253         "adc    r10, r5, #0\n\t"
18254         "str    r8, [%[tmp], #48]\n\t"
18255         "#  A[6] * A[7]\n\t"
18256         "ldr    r6, [%[a], #24]\n\t"
18257         "ldr    r7, [%[a], #28]\n\t"
18258         "umull  r3, r4, r6, r7\n\t"
18259         "adds   r9, r3, r9\n\t"
18260         "adc    r10, r4, r10\n\t"
18261         "str    r9, [%[tmp], #52]\n\t"
18262         "str    r10, [%[tmp], #56]\n\t"
18263         "# Double\n\t"
18264         "ldr    r4, [%[tmp], #4]\n\t"
18265         "ldr    r6, [%[tmp], #8]\n\t"
18266         "ldr    r7, [%[tmp], #12]\n\t"
18267         "ldr    r8, [%[tmp], #16]\n\t"
18268         "ldr    r9, [%[tmp], #20]\n\t"
18269         "ldr    r10, [%[tmp], #24]\n\t"
18270         "ldr    r14, [%[tmp], #28]\n\t"
18271         "ldr    r12, [%[tmp], #32]\n\t"
18272         "ldr    r3, [%[tmp], #36]\n\t"
18273         "adds   r4, r4, r4\n\t"
18274         "adcs   r6, r6, r6\n\t"
18275         "adcs   r7, r7, r7\n\t"
18276         "adcs   r8, r8, r8\n\t"
18277         "adcs   r9, r9, r9\n\t"
18278         "adcs   r10, r10, r10\n\t"
18279         "adcs   r14, r14, r14\n\t"
18280         "adcs   r12, r12, r12\n\t"
18281         "adcs   r3, r3, r3\n\t"
18282         "str    r4, [%[tmp], #4]\n\t"
18283         "str    r6, [%[tmp], #8]\n\t"
18284         "str    r7, [%[tmp], #12]\n\t"
18285         "str    r8, [%[tmp], #16]\n\t"
18286         "str    r9, [%[tmp], #20]\n\t"
18287         "str    r10, [%[tmp], #24]\n\t"
18288         "str    r14, [%[tmp], #28]\n\t"
18289         "str    r12, [%[tmp], #32]\n\t"
18290         "str    r3, [%[tmp], #36]\n\t"
18291         "ldr    r4, [%[tmp], #40]\n\t"
18292         "ldr    r6, [%[tmp], #44]\n\t"
18293         "ldr    r7, [%[tmp], #48]\n\t"
18294         "ldr    r8, [%[tmp], #52]\n\t"
18295         "ldr    r9, [%[tmp], #56]\n\t"
18296         "adcs   r4, r4, r4\n\t"
18297         "adcs   r6, r6, r6\n\t"
18298         "adcs   r7, r7, r7\n\t"
18299         "adcs   r8, r8, r8\n\t"
18300         "adcs   r9, r9, r9\n\t"
18301         "str    r4, [%[tmp], #40]\n\t"
18302         "str    r6, [%[tmp], #44]\n\t"
18303         "str    r7, [%[tmp], #48]\n\t"
18304         "str    r8, [%[tmp], #52]\n\t"
18305         "str    r9, [%[tmp], #56]\n\t"
18306         "adc    r10, r5, #0\n\t"
18307         "str    r10, [%[tmp], #60]\n\t"
18308         "ldr    r4, [%[tmp], #4]\n\t"
18309         "ldr    r5, [%[tmp], #8]\n\t"
18310         "ldr    r12, [%[tmp], #12]\n\t"
18311         "#  A[0] * A[0]\n\t"
18312         "ldr    r6, [%[a], #0]\n\t"
18313         "umull  r8, r9, r6, r6\n\t"
18314         "#  A[1] * A[1]\n\t"
18315         "ldr    r6, [%[a], #4]\n\t"
18316         "umull  r10, r14, r6, r6\n\t"
18317         "adds   r9, r9, r4\n\t"
18318         "adcs   r10, r10, r5\n\t"
18319         "adcs   r14, r14, r12\n\t"
18320         "str    r8, [%[tmp], #0]\n\t"
18321         "str    r9, [%[tmp], #4]\n\t"
18322         "str    r10, [%[tmp], #8]\n\t"
18323         "str    r14, [%[tmp], #12]\n\t"
18324         "ldr    r3, [%[tmp], #16]\n\t"
18325         "ldr    r4, [%[tmp], #20]\n\t"
18326         "ldr    r5, [%[tmp], #24]\n\t"
18327         "ldr    r12, [%[tmp], #28]\n\t"
18328         "#  A[2] * A[2]\n\t"
18329         "ldr    r6, [%[a], #8]\n\t"
18330         "umull  r8, r9, r6, r6\n\t"
18331         "#  A[3] * A[3]\n\t"
18332         "ldr    r6, [%[a], #12]\n\t"
18333         "umull  r10, r14, r6, r6\n\t"
18334         "adcs   r8, r8, r3\n\t"
18335         "adcs   r9, r9, r4\n\t"
18336         "adcs   r10, r10, r5\n\t"
18337         "adcs   r14, r14, r12\n\t"
18338         "str    r8, [%[tmp], #16]\n\t"
18339         "str    r9, [%[tmp], #20]\n\t"
18340         "str    r10, [%[tmp], #24]\n\t"
18341         "str    r14, [%[tmp], #28]\n\t"
18342         "ldr    r3, [%[tmp], #32]\n\t"
18343         "ldr    r4, [%[tmp], #36]\n\t"
18344         "ldr    r5, [%[tmp], #40]\n\t"
18345         "ldr    r12, [%[tmp], #44]\n\t"
18346         "#  A[4] * A[4]\n\t"
18347         "ldr    r6, [%[a], #16]\n\t"
18348         "umull  r8, r9, r6, r6\n\t"
18349         "#  A[5] * A[5]\n\t"
18350         "ldr    r6, [%[a], #20]\n\t"
18351         "umull  r10, r14, r6, r6\n\t"
18352         "adcs   r8, r8, r3\n\t"
18353         "adcs   r9, r9, r4\n\t"
18354         "adcs   r10, r10, r5\n\t"
18355         "adcs   r14, r14, r12\n\t"
18356         "str    r8, [%[r], #0]\n\t"
18357         "str    r9, [%[r], #4]\n\t"
18358         "str    r10, [%[r], #8]\n\t"
18359         "str    r14, [%[r], #12]\n\t"
18360         "ldr    r3, [%[tmp], #48]\n\t"
18361         "ldr    r4, [%[tmp], #52]\n\t"
18362         "ldr    r5, [%[tmp], #56]\n\t"
18363         "ldr    r12, [%[tmp], #60]\n\t"
18364         "#  A[6] * A[6]\n\t"
18365         "ldr    r6, [%[a], #24]\n\t"
18366         "umull  r8, r9, r6, r6\n\t"
18367         "#  A[7] * A[7]\n\t"
18368         "ldr    r6, [%[a], #28]\n\t"
18369         "umull  r10, r14, r6, r6\n\t"
18370         "adcs   r8, r8, r3\n\t"
18371         "adcs   r9, r9, r4\n\t"
18372         "adcs   r10, r10, r5\n\t"
18373         "adc    r14, r14, r12\n\t"
18374         "str    r8, [%[r], #16]\n\t"
18375         "str    r9, [%[r], #20]\n\t"
18376         "str    r10, [%[r], #24]\n\t"
18377         "str    r14, [%[r], #28]\n\t"
18378         "# Start Reduction\n\t"
18379         "ldr    r4, [%[tmp], #0]\n\t"
18380         "ldr    r5, [%[tmp], #4]\n\t"
18381         "ldr    r6, [%[tmp], #8]\n\t"
18382         "ldr    r7, [%[tmp], #12]\n\t"
18383         "ldr    r8, [%[tmp], #16]\n\t"
18384         "ldr    r9, [%[tmp], #20]\n\t"
18385         "ldr    r10, [%[tmp], #24]\n\t"
18386         "ldr    r14, [%[tmp], #28]\n\t"
18387         "# mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192\n\t"
18388         "#    - a[0] << 224\n\t"
18389         "#   + (a[0]-a[1] * 2) << (6 * 32)\n\t"
18390         "adds   r10, r10, r4\n\t"
18391         "adc    r14, r14, r5\n\t"
18392         "adds   r10, r10, r4\n\t"
18393         "adc    r14, r14, r5\n\t"
18394         "#   - a[0] << (7 * 32)\n\t"
18395         "sub    r14, r14, r4\n\t"
18396         "#   + a[0]-a[4] << (3 * 32)\n\t"
18397         "mov    %[a], r7\n\t"
18398         "mov    r12, r8\n\t"
18399         "adds   r7, r7, r4\n\t"
18400         "adcs   r8, r8, r5\n\t"
18401         "adcs   r9, r9, r6\n\t"
18402         "adcs   r10, r10, %[a]\n\t"
18403         "adc    r14, r14, r12\n\t"
18404         "str    r4, [%[tmp], #0]\n\t"
18405         "str    r5, [%[tmp], #4]\n\t"
18406         "str    r6, [%[tmp], #8]\n\t"
18407         "str    r7, [%[tmp], #12]\n\t"
18408         "str    r8, [%[tmp], #16]\n\t"
18409         "str    r9, [%[tmp], #20]\n\t"
18410         "# a += mu * m\n\t"
18411         "#   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t"
18412         "mov    %[a], #0\n\t"
18413         "# a[6] +=        t[0] + t[3]\n\t"
18414         "ldr    r3, [%[tmp], #24]\n\t"
18415         "adds   r3, r3, r4\n\t"
18416         "adc    r12, %[a], #0\n\t"
18417         "adds   r3, r3, r7\n\t"
18418         "adc    r12, r12, #0\n\t"
18419         "str    r10, [%[tmp], #24]\n\t"
18420         "# a[7] +=        t[1] + t[4]\n\t"
18421         "ldr    r3, [%[tmp], #28]\n\t"
18422         "adds   r3, r3, r12\n\t"
18423         "adc    r12, %[a], #0\n\t"
18424         "adds   r3, r3, r5\n\t"
18425         "adc    r12, r12, #0\n\t"
18426         "adds   r3, r3, r8\n\t"
18427         "adc    r12, r12, #0\n\t"
18428         "str    r14, [%[tmp], #28]\n\t"
18429         "str    r3, [%[tmp], #32]\n\t"
18430         "# a[8] += t[0] + t[2] + t[5]\n\t"
18431         "ldr    r3, [%[r], #0]\n\t"
18432         "adds   r3, r3, r12\n\t"
18433         "adc    r12, %[a], #0\n\t"
18434         "adds   r3, r3, r4\n\t"
18435         "adc    r12, r12, #0\n\t"
18436         "adds   r3, r3, r6\n\t"
18437         "adc    r12, r12, #0\n\t"
18438         "adds   r3, r3, r9\n\t"
18439         "adc    r12, r12, #0\n\t"
18440         "str    r3, [%[r], #0]\n\t"
18441         "# a[9]  += t[1] + t[3] + t[6]\n\t"
18442         "# a[10] += t[2] + t[4] + t[7]\n\t"
18443         "ldr    r3, [%[r], #4]\n\t"
18444         "ldr    r4, [%[r], #8]\n\t"
18445         "adds   r3, r3, r12\n\t"
18446         "adcs   r4, r4, #0\n\t"
18447         "adc    r12, %[a], #0\n\t"
18448         "adds   r3, r3, r5\n\t"
18449         "adcs   r4, r4, r6\n\t"
18450         "adc    r12, r12, #0\n\t"
18451         "adds   r3, r3, r7\n\t"
18452         "adcs   r4, r4, r8\n\t"
18453         "adc    r12, r12, #0\n\t"
18454         "adds   r3, r3, r10\n\t"
18455         "adcs   r4, r4, r14\n\t"
18456         "adc    r12, r12, #0\n\t"
18457         "str    r3, [%[r], #4]\n\t"
18458         "str    r4, [%[r], #8]\n\t"
18459         "# a[11] += t[3] + t[5]\n\t"
18460         "# a[12] += t[4] + t[6]\n\t"
18461         "# a[13] += t[5] + t[7]\n\t"
18462         "# a[14] += t[6]\n\t"
18463         "ldr    r3, [%[r], #12]\n\t"
18464         "ldr    r4, [%[r], #16]\n\t"
18465         "ldr    r5, [%[r], #20]\n\t"
18466         "ldr    r6, [%[r], #24]\n\t"
18467         "adds   r3, r3, r12\n\t"
18468         "adcs   r4, r4, #0\n\t"
18469         "adcs   r5, r5, #0\n\t"
18470         "adcs   r6, r6, #0\n\t"
18471         "adc    r12, %[a], #0\n\t"
18472         "adds   r3, r3, r7\n\t"
18473         "adcs   r4, r4, r8\n\t"
18474         "adcs   r5, r5, r9\n\t"
18475         "adcs   r6, r6, r10\n\t"
18476         "adc    r12, r12, #0\n\t"
18477         "adds   r3, r3, r9\n\t"
18478         "adcs   r4, r4, r10\n\t"
18479         "adcs   r5, r5, r14\n\t"
18480         "adcs   r6, r6, #0\n\t"
18481         "adc    r12, r12, #0\n\t"
18482         "str    r3, [%[r], #12]\n\t"
18483         "str    r4, [%[r], #16]\n\t"
18484         "str    r5, [%[r], #20]\n\t"
18485         "str    r6, [%[r], #24]\n\t"
18486         "# a[15] += t[7]\n\t"
18487         "ldr    r3, [%[r], #28]\n\t"
18488         "adds   r3, r3, r12\n\t"
18489         "adc    r12, %[a], #0\n\t"
18490         "adds   r3, r3, r14\n\t"
18491         "adc    r12, r12, #0\n\t"
18492         "str    r3, [%[r], #28]\n\t"
18493         "ldr    r3, [%[tmp], #32]\n\t"
18494         "ldr    r4, [%[r], #0]\n\t"
18495         "ldr    r5, [%[r], #4]\n\t"
18496         "ldr    r6, [%[r], #8]\n\t"
18497         "ldr    r8, [%[tmp], #0]\n\t"
18498         "ldr    r9, [%[tmp], #4]\n\t"
18499         "ldr    r10, [%[tmp], #8]\n\t"
18500         "ldr    r14, [%[tmp], #12]\n\t"
18501         "subs   r3, r3, r8\n\t"
18502         "sbcs   r4, r4, r9\n\t"
18503         "sbcs   r5, r5, r10\n\t"
18504         "sbcs   r6, r6, r14\n\t"
18505         "str    r4, [%[r], #0]\n\t"
18506         "str    r5, [%[r], #4]\n\t"
18507         "str    r6, [%[r], #8]\n\t"
18508         "ldr    r3, [%[r], #12]\n\t"
18509         "ldr    r4, [%[r], #16]\n\t"
18510         "ldr    r5, [%[r], #20]\n\t"
18511         "ldr    r6, [%[r], #24]\n\t"
18512         "ldr    r7, [%[r], #28]\n\t"
18513         "ldr    r8, [%[tmp], #16]\n\t"
18514         "ldr    r9, [%[tmp], #20]\n\t"
18515         "ldr    r10, [%[tmp], #24]\n\t"
18516         "ldr    r14, [%[tmp], #28]\n\t"
18517         "sbcs   r3, r3, r8\n\t"
18518         "sbcs   r4, r4, r9\n\t"
18519         "sbcs   r5, r5, r10\n\t"
18520         "sbcs   r6, r6, r14\n\t"
18521         "sbc    r7, r7, #0\n\t"
18522         "str    r3, [%[r], #12]\n\t"
18523         "str    r4, [%[r], #16]\n\t"
18524         "str    r5, [%[r], #20]\n\t"
18525         "str    r6, [%[r], #24]\n\t"
18526         "str    r7, [%[r], #28]\n\t"
18527         "# mask m and sub from result if overflow\n\t"
18528         "sub    r12, %[a], r12\n\t"
18529         "and    %[a], r12, #1\n\t"
18530         "ldr    r3, [%[r], #0]\n\t"
18531         "ldr    r4, [%[r], #4]\n\t"
18532         "ldr    r5, [%[r], #8]\n\t"
18533         "ldr    r6, [%[r], #12]\n\t"
18534         "ldr    r7, [%[r], #16]\n\t"
18535         "ldr    r8, [%[r], #20]\n\t"
18536         "ldr    r9, [%[r], #24]\n\t"
18537         "ldr    r10, [%[r], #28]\n\t"
18538         "subs   r3, r3, r12\n\t"
18539         "sbcs   r4, r4, r12\n\t"
18540         "sbcs   r5, r5, r12\n\t"
18541         "sbcs   r6, r6, #0\n\t"
18542         "sbcs   r7, r7, #0\n\t"
18543         "sbcs   r8, r8, #0\n\t"
18544         "sbcs   r9, r9, %[a]\n\t"
18545         "sbc    r10, r10, r12\n\t"
18546         "str    r3, [%[r], #0]\n\t"
18547         "str    r4, [%[r], #4]\n\t"
18548         "str    r5, [%[r], #8]\n\t"
18549         "str    r6, [%[r], #12]\n\t"
18550         "str    r7, [%[r], #16]\n\t"
18551         "str    r8, [%[r], #20]\n\t"
18552         "str    r9, [%[r], #24]\n\t"
18553         "str    r10, [%[r], #28]\n\t"
18554         : [a] "+r" (a)
18555         : [r] "r" (r), [tmp] "r" (tmp)
18556         : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7", "r12"
18557     );
18558 }
18559 
18560 #ifndef WOLFSSL_SP_SMALL
18561 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
18562  *
18563  * r   Result of squaring.
18564  * a   Number to square in Montogmery form.
18565  * n   Number of times to square.
18566  * m   Modulus (prime).
18567  * mp  Montogmery mulitplier.
18568  */
18569 static void sp_256_mont_sqr_n_8(sp_digit* r, sp_digit* a, int n,
18570         sp_digit* m, sp_digit mp)
18571 {
18572     sp_256_mont_sqr_8(r, a, m, mp);
18573     for (; n > 1; n--)
18574         sp_256_mont_sqr_8(r, r, m, mp);
18575 }
18576 
18577 #else
18578 /* Mod-2 for the P256 curve. */
18579 static const uint32_t p256_mod_2[8] = {
18580     0xfffffffd,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
18581     0x00000001,0xffffffff
18582 };
18583 #endif /* !WOLFSSL_SP_SMALL */
18584 
18585 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
18586  * P256 curve. (r = 1 / a mod m)
18587  *
18588  * r   Inverse result.
18589  * a   Number to invert.
18590  * td  Temporary data.
18591  */
18592 static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a, sp_digit* td)
18593 {
18594 #ifdef WOLFSSL_SP_SMALL
18595     sp_digit* t = td;
18596     int i;
18597 
18598     XMEMCPY(t, a, sizeof(sp_digit) * 8);
18599     for (i=254; i>=0; i--) {
18600         sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
18601         if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))
18602             sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
18603     }
18604     XMEMCPY(r, t, sizeof(sp_digit) * 8);
18605 #else
18606     sp_digit* t = td;
18607     sp_digit* t2 = td + 2 * 8;
18608     sp_digit* t3 = td + 4 * 8;
18609 
18610     /* t = a^2 */
18611     sp_256_mont_sqr_8(t, a, p256_mod, p256_mp_mod);
18612     /* t = a^3 = t * a */
18613     sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
18614     /* t2= a^c = t ^ 2 ^ 2 */
18615     sp_256_mont_sqr_n_8(t2, t, 2, p256_mod, p256_mp_mod);
18616     /* t3= a^d = t2 * a */
18617     sp_256_mont_mul_8(t3, t2, a, p256_mod, p256_mp_mod);
18618     /* t = a^f = t2 * t */
18619     sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
18620     /* t2= a^f0 = t ^ 2 ^ 4 */
18621     sp_256_mont_sqr_n_8(t2, t, 4, p256_mod, p256_mp_mod);
18622     /* t3= a^fd = t2 * t3 */
18623     sp_256_mont_mul_8(t3, t2, t3, p256_mod, p256_mp_mod);
18624     /* t = a^ff = t2 * t */
18625     sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
18626     /* t2= a^ff00 = t ^ 2 ^ 8 */
18627     sp_256_mont_sqr_n_8(t2, t, 8, p256_mod, p256_mp_mod);
18628     /* t3= a^fffd = t2 * t3 */
18629     sp_256_mont_mul_8(t3, t2, t3, p256_mod, p256_mp_mod);
18630     /* t = a^ffff = t2 * t */
18631     sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
18632     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
18633     sp_256_mont_sqr_n_8(t2, t, 16, p256_mod, p256_mp_mod);
18634     /* t3= a^fffffffd = t2 * t3 */
18635     sp_256_mont_mul_8(t3, t2, t3, p256_mod, p256_mp_mod);
18636     /* t = a^ffffffff = t2 * t */
18637     sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
18638     /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
18639     sp_256_mont_sqr_n_8(t2, t, 32, p256_mod, p256_mp_mod);
18640     /* t2= a^ffffffffffffffff = t2 * t */
18641     sp_256_mont_mul_8(t, t2, t, p256_mod, p256_mp_mod);
18642     /* t2= a^ffffffff00000001 = t2 * a */
18643     sp_256_mont_mul_8(t2, t2, a, p256_mod, p256_mp_mod);
18644     /* t2= a^ffffffff000000010000000000000000000000000000000000000000
18645      *   = t2 ^ 2 ^ 160 */
18646     sp_256_mont_sqr_n_8(t2, t2, 160, p256_mod, p256_mp_mod);
18647     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
18648      *   = t2 * t */
18649     sp_256_mont_mul_8(t2, t2, t, p256_mod, p256_mp_mod);
18650     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
18651      *   = t2 ^ 2 ^ 32 */
18652     sp_256_mont_sqr_n_8(t2, t2, 32, p256_mod, p256_mp_mod);
18653     /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
18654      *   = t2 * t3 */
18655     sp_256_mont_mul_8(r, t2, t3, p256_mod, p256_mp_mod);
18656 #endif /* WOLFSSL_SP_SMALL */
18657 }
18658 
18659 /* Map the Montgomery form projective co-ordinate point to an affine point.
18660  *
18661  * r  Resulting affine co-ordinate point.
18662  * p  Montgomery form projective co-ordinate point.
18663  * t  Temporary ordinate data.
18664  */
18665 static void sp_256_map_8(sp_point* r, sp_point* p, sp_digit* t)
18666 {
18667     sp_digit* t1 = t;
18668     sp_digit* t2 = t + 2*8;
18669     int32_t n;
18670 
18671     sp_256_mont_inv_8(t1, p->z, t + 2*8);
18672 
18673     sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
18674     sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
18675 
18676     /* x /= z^2 */
18677     sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
18678     XMEMSET(r->x + 8, 0, sizeof(r->x) / 2);
18679     sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
18680     /* Reduce x to less than modulus */
18681     n = sp_256_cmp_8(r->x, p256_mod);
18682     sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - (n >= 0));
18683     sp_256_norm_8(r->x);
18684 
18685     /* y /= z^3 */
18686     sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
18687     XMEMSET(r->y + 8, 0, sizeof(r->y) / 2);
18688     sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
18689     /* Reduce y to less than modulus */
18690     n = sp_256_cmp_8(r->y, p256_mod);
18691     sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - (n >= 0));
18692     sp_256_norm_8(r->y);
18693 
18694     XMEMSET(r->z, 0, sizeof(r->z));
18695     r->z[0] = 1;
18696 
18697 }
18698 
18699 #ifdef WOLFSSL_SP_SMALL
18700 /* Add b to a into r. (r = a + b)
18701  *
18702  * r  A single precision integer.
18703  * a  A single precision integer.
18704  * b  A single precision integer.
18705  */
18706 static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
18707         const sp_digit* b)
18708 {
18709     sp_digit c = 0;
18710 
18711     __asm__ __volatile__ (
18712         "add    r12, %[a], #32\n\t"
18713         "\n1:\n\t"
18714         "adds   %[c], %[c], #-1\n\t"
18715         "ldr    r4, [%[a]], #4\n\t"
18716         "ldr    r5, [%[a]], #4\n\t"
18717         "ldr    r6, [%[a]], #4\n\t"
18718         "ldr    r7, [%[a]], #4\n\t"
18719         "ldr    r8, [%[b]], #4\n\t"
18720         "ldr    r9, [%[b]], #4\n\t"
18721         "ldr    r10, [%[b]], #4\n\t"
18722         "ldr    r14, [%[b]], #4\n\t"
18723         "adcs   r4, r4, r8\n\t"
18724         "adcs   r5, r5, r9\n\t"
18725         "adcs   r6, r6, r10\n\t"
18726         "adcs   r7, r7, r14\n\t"
18727         "str    r4, [%[r]], #4\n\t"
18728         "str    r5, [%[r]], #4\n\t"
18729         "str    r6, [%[r]], #4\n\t"
18730         "str    r7, [%[r]], #4\n\t"
18731         "mov    r4, #0\n\t"
18732         "adc    %[c], r4, #0\n\t"
18733         "cmp    %[a], r12\n\t"
18734         "bne    1b\n\t"
18735         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
18736         :
18737         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
18738     );
18739 
18740     return c;
18741 }
18742 
18743 #else
18744 /* Add b to a into r. (r = a + b)
18745  *
18746  * r  A single precision integer.
18747  * a  A single precision integer.
18748  * b  A single precision integer.
18749  */
18750 static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
18751         const sp_digit* b)
18752 {
18753     sp_digit c = 0;
18754 
18755     __asm__ __volatile__ (
18756         "mov    r12, #0\n\t"
18757         "ldr    r4, [%[a], #0]\n\t"
18758         "ldr    r5, [%[a], #4]\n\t"
18759         "ldr    r6, [%[a], #8]\n\t"
18760         "ldr    r7, [%[a], #12]\n\t"
18761         "ldr    r8, [%[b], #0]\n\t"
18762         "ldr    r9, [%[b], #4]\n\t"
18763         "ldr    r10, [%[b], #8]\n\t"
18764         "ldr    r14, [%[b], #12]\n\t"
18765         "adds   r4, r4, r8\n\t"
18766         "adcs   r5, r5, r9\n\t"
18767         "adcs   r6, r6, r10\n\t"
18768         "adcs   r7, r7, r14\n\t"
18769         "str    r4, [%[r], #0]\n\t"
18770         "str    r5, [%[r], #4]\n\t"
18771         "str    r6, [%[r], #8]\n\t"
18772         "str    r7, [%[r], #12]\n\t"
18773         "ldr    r4, [%[a], #16]\n\t"
18774         "ldr    r5, [%[a], #20]\n\t"
18775         "ldr    r6, [%[a], #24]\n\t"
18776         "ldr    r7, [%[a], #28]\n\t"
18777         "ldr    r8, [%[b], #16]\n\t"
18778         "ldr    r9, [%[b], #20]\n\t"
18779         "ldr    r10, [%[b], #24]\n\t"
18780         "ldr    r14, [%[b], #28]\n\t"
18781         "adcs   r4, r4, r8\n\t"
18782         "adcs   r5, r5, r9\n\t"
18783         "adcs   r6, r6, r10\n\t"
18784         "adcs   r7, r7, r14\n\t"
18785         "str    r4, [%[r], #16]\n\t"
18786         "str    r5, [%[r], #20]\n\t"
18787         "str    r6, [%[r], #24]\n\t"
18788         "str    r7, [%[r], #28]\n\t"
18789         "adc    %[c], r12, r12\n\t"
18790         : [c] "+r" (c)
18791         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
18792         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
18793     );
18794 
18795     return c;
18796 }
18797 
18798 #endif /* WOLFSSL_SP_SMALL */
18799 /* Add two Montgomery form numbers (r = a + b % m).
18800  *
18801  * r   Result of addition.
18802  * a   First number to add in Montogmery form.
18803  * b   Second number to add in Montogmery form.
18804  * m   Modulus (prime).
18805  */
18806 static void sp_256_mont_add_8(sp_digit* r, sp_digit* a, sp_digit* b,
18807         sp_digit* m)
18808 {
18809     (void)m;
18810 
18811     __asm__ __volatile__ (
18812         "mov    r12, #0\n\t"
18813         "ldr    r4, [%[a],#0]\n\t"
18814         "ldr    r5, [%[a],#4]\n\t"
18815         "ldr    r6, [%[a],#8]\n\t"
18816         "ldr    r7, [%[a],#12]\n\t"
18817         "ldr    r8, [%[b],#0]\n\t"
18818         "ldr    r9, [%[b],#4]\n\t"
18819         "ldr    r10, [%[b],#8]\n\t"
18820         "ldr    r14, [%[b],#12]\n\t"
18821         "adds   r4, r4, r8\n\t"
18822         "adcs   r5, r5, r9\n\t"
18823         "adcs   r6, r6, r10\n\t"
18824         "adcs   r7, r7, r14\n\t"
18825         "str    r4, [%[r],#0]\n\t"
18826         "str    r5, [%[r],#4]\n\t"
18827         "str    r6, [%[r],#8]\n\t"
18828         "str    r7, [%[r],#12]\n\t"
18829         "ldr    r4, [%[a],#16]\n\t"
18830         "ldr    r5, [%[a],#20]\n\t"
18831         "ldr    r6, [%[a],#24]\n\t"
18832         "ldr    r7, [%[a],#28]\n\t"
18833         "ldr    r8, [%[b],#16]\n\t"
18834         "ldr    r9, [%[b],#20]\n\t"
18835         "ldr    r10, [%[b],#24]\n\t"
18836         "ldr    r14, [%[b],#28]\n\t"
18837         "adcs   r4, r4, r8\n\t"
18838         "adcs   r5, r5, r9\n\t"
18839         "adcs   r6, r6, r10\n\t"
18840         "adcs   r7, r7, r14\n\t"
18841         "adc    r3, r12, #0\n\t"
18842         "sub    r3, r12, r3\n\t"
18843         "and    r12, r3, #1\n\t"
18844         "ldr    r8, [%[r],#0]\n\t"
18845         "ldr    r9, [%[r],#4]\n\t"
18846         "ldr    r10, [%[r],#8]\n\t"
18847         "ldr    r14, [%[r],#12]\n\t"
18848         "subs   r8, r8, r3\n\t"
18849         "sbcs   r9, r9, r3\n\t"
18850         "sbcs   r10, r10, r3\n\t"
18851         "sbcs   r14, r14, #0\n\t"
18852         "sbcs   r4, r4, #0\n\t"
18853         "sbcs   r5, r5, #0\n\t"
18854         "sbcs   r6, r6, r12\n\t"
18855         "sbc    r7, r7, r3\n\t"
18856         "str    r8, [%[r],#0]\n\t"
18857         "str    r9, [%[r],#4]\n\t"
18858         "str    r10, [%[r],#8]\n\t"
18859         "str    r14, [%[r],#12]\n\t"
18860         "str    r4, [%[r],#16]\n\t"
18861         "str    r5, [%[r],#20]\n\t"
18862         "str    r6, [%[r],#24]\n\t"
18863         "str    r7, [%[r],#28]\n\t"
18864         :
18865         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
18866         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
18867     );
18868 }
18869 
18870 /* Double a Montgomery form number (r = a + a % m).
18871  *
18872  * r   Result of doubling.
18873  * a   Number to double in Montogmery form.
18874  * m   Modulus (prime).
18875  */
18876 static void sp_256_mont_dbl_8(sp_digit* r, sp_digit* a, sp_digit* m)
18877 {
18878     (void)m;
18879 
18880     __asm__ __volatile__ (
18881         "mov    r12, #0\n\t"
18882         "ldr    r4, [%[a],#0]\n\t"
18883         "ldr    r5, [%[a],#4]\n\t"
18884         "ldr    r6, [%[a],#8]\n\t"
18885         "ldr    r7, [%[a],#12]\n\t"
18886         "ldr    r8, [%[a],#16]\n\t"
18887         "ldr    r9, [%[a],#20]\n\t"
18888         "ldr    r10, [%[a],#24]\n\t"
18889         "ldr    r14, [%[a],#28]\n\t"
18890         "adds   r4, r4, r4\n\t"
18891         "adcs   r5, r5, r5\n\t"
18892         "adcs   r6, r6, r6\n\t"
18893         "adcs   r7, r7, r7\n\t"
18894         "adcs   r8, r8, r8\n\t"
18895         "adcs   r9, r9, r9\n\t"
18896         "adcs   r10, r10, r10\n\t"
18897         "adcs   r14, r14, r14\n\t"
18898         "adc    r3, r12, #0\n\t"
18899         "sub    r3, r12, r3\n\t"
18900         "and    r12, r3, #1\n\t"
18901         "subs   r4, r4, r3\n\t"
18902         "sbcs   r5, r5, r3\n\t"
18903         "sbcs   r6, r6, r3\n\t"
18904         "sbcs   r7, r7, #0\n\t"
18905         "sbcs   r8, r8, #0\n\t"
18906         "sbcs   r9, r9, #0\n\t"
18907         "sbcs   r10, r10, r12\n\t"
18908         "sbc    r14, r14, r3\n\t"
18909         "str    r4, [%[r],#0]\n\t"
18910         "str    r5, [%[r],#4]\n\t"
18911         "str    r6, [%[r],#8]\n\t"
18912         "str    r7, [%[r],#12]\n\t"
18913         "str    r8, [%[r],#16]\n\t"
18914         "str    r9, [%[r],#20]\n\t"
18915         "str    r10, [%[r],#24]\n\t"
18916         "str    r14, [%[r],#28]\n\t"
18917         :
18918         : [r] "r" (r), [a] "r" (a)
18919         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
18920     );
18921 }
18922 
18923 /* Triple a Montgomery form number (r = a + a + a % m).
18924  *
18925  * r   Result of Tripling.
18926  * a   Number to triple in Montogmery form.
18927  * m   Modulus (prime).
18928  */
18929 static void sp_256_mont_tpl_8(sp_digit* r, sp_digit* a, sp_digit* m)
18930 {
18931     (void)m;
18932 
18933     __asm__ __volatile__ (
18934         "mov    r12, #0\n\t"
18935         "ldr    r4, [%[a],#0]\n\t"
18936         "ldr    r5, [%[a],#4]\n\t"
18937         "ldr    r6, [%[a],#8]\n\t"
18938         "ldr    r7, [%[a],#12]\n\t"
18939         "ldr    r8, [%[a],#16]\n\t"
18940         "ldr    r9, [%[a],#20]\n\t"
18941         "ldr    r10, [%[a],#24]\n\t"
18942         "ldr    r14, [%[a],#28]\n\t"
18943         "adds   r4, r4, r4\n\t"
18944         "adcs   r5, r5, r5\n\t"
18945         "adcs   r6, r6, r6\n\t"
18946         "adcs   r7, r7, r7\n\t"
18947         "adcs   r8, r8, r8\n\t"
18948         "adcs   r9, r9, r9\n\t"
18949         "adcs   r10, r10, r10\n\t"
18950         "adcs   r14, r14, r14\n\t"
18951         "adc    r3, r12, #0\n\t"
18952         "sub    r3, r12, r3\n\t"
18953         "and    r12, r3, #1\n\t"
18954         "subs   r4, r4, r3\n\t"
18955         "sbcs   r5, r5, r3\n\t"
18956         "sbcs   r6, r6, r3\n\t"
18957         "sbcs   r7, r7, #0\n\t"
18958         "sbcs   r8, r8, #0\n\t"
18959         "sbcs   r9, r9, #0\n\t"
18960         "sbcs   r10, r10, r12\n\t"
18961         "sbc    r14, r14, r3\n\t"
18962         "str    r8, [%[r],#16]\n\t"
18963         "str    r9, [%[r],#20]\n\t"
18964         "str    r10, [%[r],#24]\n\t"
18965         "str    r14, [%[r],#28]\n\t"
18966         "mov    r12, #0\n\t"
18967         "ldr    r8, [%[a],#0]\n\t"
18968         "ldr    r9, [%[a],#4]\n\t"
18969         "ldr    r10, [%[a],#8]\n\t"
18970         "ldr    r14, [%[a],#12]\n\t"
18971         "adds   r8, r8, r4\n\t"
18972         "adcs   r9, r9, r5\n\t"
18973         "adcs   r10, r10, r6\n\t"
18974         "adcs   r14, r14, r7\n\t"
18975         "str    r8, [%[r],#0]\n\t"
18976         "str    r9, [%[r],#4]\n\t"
18977         "str    r10, [%[r],#8]\n\t"
18978         "str    r14, [%[r],#12]\n\t"
18979         "ldr    r8, [%[a],#16]\n\t"
18980         "ldr    r9, [%[a],#20]\n\t"
18981         "ldr    r10, [%[a],#24]\n\t"
18982         "ldr    r14, [%[a],#28]\n\t"
18983         "ldr    r4, [%[r],#16]\n\t"
18984         "ldr    r5, [%[r],#20]\n\t"
18985         "ldr    r6, [%[r],#24]\n\t"
18986         "ldr    r7, [%[r],#28]\n\t"
18987         "adcs   r8, r8, r4\n\t"
18988         "adcs   r9, r9, r5\n\t"
18989         "adcs   r10, r10, r6\n\t"
18990         "adcs   r14, r14, r7\n\t"
18991         "adc    r3, r12, #0\n\t"
18992         "sub    r3, r12, r3\n\t"
18993         "and    r12, r3, #1\n\t"
18994         "ldr    r4, [%[r],#0]\n\t"
18995         "ldr    r5, [%[r],#4]\n\t"
18996         "ldr    r6, [%[r],#8]\n\t"
18997         "ldr    r7, [%[r],#12]\n\t"
18998         "subs   r4, r4, r3\n\t"
18999         "sbcs   r5, r5, r3\n\t"
19000         "sbcs   r6, r6, r3\n\t"
19001         "sbcs   r7, r7, #0\n\t"
19002         "sbcs   r8, r8, #0\n\t"
19003         "sbcs   r9, r9, #0\n\t"
19004         "sbcs   r10, r10, r12\n\t"
19005         "sbc    r14, r14, r3\n\t"
19006         "str    r4, [%[r],#0]\n\t"
19007         "str    r5, [%[r],#4]\n\t"
19008         "str    r6, [%[r],#8]\n\t"
19009         "str    r7, [%[r],#12]\n\t"
19010         "str    r8, [%[r],#16]\n\t"
19011         "str    r9, [%[r],#20]\n\t"
19012         "str    r10, [%[r],#24]\n\t"
19013         "str    r14, [%[r],#28]\n\t"
19014         :
19015         : [r] "r" (r), [a] "r" (a)
19016         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
19017     );
19018 }
19019 
19020 /* Subtract two Montgomery form numbers (r = a - b % m).
19021  *
19022  * r   Result of subtration.
19023  * a   Number to subtract from in Montogmery form.
19024  * b   Number to subtract with in Montogmery form.
19025  * m   Modulus (prime).
19026  */
19027 static void sp_256_mont_sub_8(sp_digit* r, sp_digit* a, sp_digit* b,
19028         sp_digit* m)
19029 {
19030     (void)m;
19031 
19032     __asm__ __volatile__ (
19033         "mov    r12, #0\n\t"
19034         "ldr    r4, [%[a],#0]\n\t"
19035         "ldr    r5, [%[a],#4]\n\t"
19036         "ldr    r6, [%[a],#8]\n\t"
19037         "ldr    r7, [%[a],#12]\n\t"
19038         "ldr    r8, [%[b],#0]\n\t"
19039         "ldr    r9, [%[b],#4]\n\t"
19040         "ldr    r10, [%[b],#8]\n\t"
19041         "ldr    r14, [%[b],#12]\n\t"
19042         "subs   r4, r4, r8\n\t"
19043         "sbcs   r5, r5, r9\n\t"
19044         "sbcs   r6, r6, r10\n\t"
19045         "sbcs   r7, r7, r14\n\t"
19046         "str    r4, [%[r],#0]\n\t"
19047         "str    r5, [%[r],#4]\n\t"
19048         "str    r6, [%[r],#8]\n\t"
19049         "str    r7, [%[r],#12]\n\t"
19050         "ldr    r4, [%[a],#16]\n\t"
19051         "ldr    r5, [%[a],#20]\n\t"
19052         "ldr    r6, [%[a],#24]\n\t"
19053         "ldr    r7, [%[a],#28]\n\t"
19054         "ldr    r8, [%[b],#16]\n\t"
19055         "ldr    r9, [%[b],#20]\n\t"
19056         "ldr    r10, [%[b],#24]\n\t"
19057         "ldr    r14, [%[b],#28]\n\t"
19058         "sbcs   r4, r4, r8\n\t"
19059         "sbcs   r5, r5, r9\n\t"
19060         "sbcs   r6, r6, r10\n\t"
19061         "sbcs   r7, r7, r14\n\t"
19062         "sbc    r3, r12, #0\n\t"
19063         "and    r12, r3, #1\n\t"
19064         "ldr    r8, [%[r],#0]\n\t"
19065         "ldr    r9, [%[r],#4]\n\t"
19066         "ldr    r10, [%[r],#8]\n\t"
19067         "ldr    r14, [%[r],#12]\n\t"
19068         "adds   r8, r8, r3\n\t"
19069         "adcs   r9, r9, r3\n\t"
19070         "adcs   r10, r10, r3\n\t"
19071         "adcs   r14, r14, #0\n\t"
19072         "adcs   r4, r4, #0\n\t"
19073         "adcs   r5, r5, #0\n\t"
19074         "adcs   r6, r6, r12\n\t"
19075         "adc    r7, r7, r3\n\t"
19076         "str    r8, [%[r],#0]\n\t"
19077         "str    r9, [%[r],#4]\n\t"
19078         "str    r10, [%[r],#8]\n\t"
19079         "str    r14, [%[r],#12]\n\t"
19080         "str    r4, [%[r],#16]\n\t"
19081         "str    r5, [%[r],#20]\n\t"
19082         "str    r6, [%[r],#24]\n\t"
19083         "str    r7, [%[r],#28]\n\t"
19084         :
19085         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
19086         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12"
19087     );
19088 }
19089 
19090 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
19091  *
19092  * r  Result of division by 2.
19093  * a  Number to divide.
19094  * m  Modulus (prime).
19095  */
19096 static void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m)
19097 {
19098     __asm__ __volatile__ (
19099         "mov    r10, #0\n\t"
19100         "ldr    r3, [%[a], #0]\n\t"
19101         "ldr    r4, [%[a], #4]\n\t"
19102         "ldr    r5, [%[a], #8]\n\t"
19103         "ldr    r6, [%[a], #12]\n\t"
19104         "and    r9, r3, #1\n\t"
19105         "sub    r7, r10, r9\n\t"
19106         "and    r8, r7, #1\n\t"
19107         "adds   r3, r3, r7\n\t"
19108         "adcs   r4, r4, r7\n\t"
19109         "adcs   r5, r5, r7\n\t"
19110         "adcs   r6, r6, r10\n\t"
19111         "str    r3, [%[r], #0]\n\t"
19112         "str    r4, [%[r], #4]\n\t"
19113         "str    r5, [%[r], #8]\n\t"
19114         "str    r6, [%[r], #12]\n\t"
19115         "ldr    r3, [%[a], #16]\n\t"
19116         "ldr    r4, [%[a], #20]\n\t"
19117         "ldr    r5, [%[a], #24]\n\t"
19118         "ldr    r6, [%[a], #28]\n\t"
19119         "adcs   r3, r3, r10\n\t"
19120         "adcs   r4, r4, r10\n\t"
19121         "adcs   r5, r5, r8\n\t"
19122         "adcs   r6, r6, r7\n\t"
19123         "adc    r9, r10, r10\n\t"
19124         "lsr    r7, r3, #1\n\t"
19125         "and    r3, r3, #1\n\t"
19126         "lsr    r8, r4, #1\n\t"
19127         "lsr    r10, r5, #1\n\t"
19128         "lsr    r14, r6, #1\n\t"
19129         "orr    r7, r7, r4, lsl #31\n\t"
19130         "orr    r8, r8, r5, lsl #31\n\t"
19131         "orr    r10, r10, r6, lsl #31\n\t"
19132         "orr    r14, r14, r9, lsl #31\n\t"
19133         "mov    r9, r3\n\t"
19134         "str    r7, [%[r], #16]\n\t"
19135         "str    r8, [%[r], #20]\n\t"
19136         "str    r10, [%[r], #24]\n\t"
19137         "str    r14, [%[r], #28]\n\t"
19138         "ldr    r3, [%[r], #0]\n\t"
19139         "ldr    r4, [%[r], #4]\n\t"
19140         "ldr    r5, [%[r], #8]\n\t"
19141         "ldr    r6, [%[r], #12]\n\t"
19142         "lsr    r7, r3, #1\n\t"
19143         "lsr    r8, r4, #1\n\t"
19144         "lsr    r10, r5, #1\n\t"
19145         "lsr    r14, r6, #1\n\t"
19146         "orr    r7, r7, r4, lsl #31\n\t"
19147         "orr    r8, r8, r5, lsl #31\n\t"
19148         "orr    r10, r10, r6, lsl #31\n\t"
19149         "orr    r14, r14, r9, lsl #31\n\t"
19150         "str    r7, [%[r], #0]\n\t"
19151         "str    r8, [%[r], #4]\n\t"
19152         "str    r10, [%[r], #8]\n\t"
19153         "str    r14, [%[r], #12]\n\t"
19154         :
19155         : [r] "r" (r), [a] "r" (a), [m] "r" (m)
19156         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9"
19157     );
19158 
19159 }
19160 
19161 /* Double the Montgomery form projective point p.
19162  *
19163  * r  Result of doubling point.
19164  * p  Point to double.
19165  * t  Temporary ordinate data.
19166  */
19167 static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p, sp_digit* t)
19168 {
19169     sp_point *rp[2];
19170     sp_point tp;
19171     sp_digit* t1 = t;
19172     sp_digit* t2 = t + 2*8;
19173     sp_digit* x;
19174     sp_digit* y;
19175     sp_digit* z;
19176     int i;
19177 
19178     /* When infinity don't double point passed in - constant time. */
19179     rp[0] = r;
19180     rp[1] = &tp;
19181     x = rp[p->infinity]->x;
19182     y = rp[p->infinity]->y;
19183     z = rp[p->infinity]->z;
19184     /* Put point to double into result - good for infinty. */
19185     if (r != p) {
19186         for (i=0; i<8; i++)
19187             r->x[i] = p->x[i];
19188         for (i=0; i<8; i++)
19189             r->y[i] = p->y[i];
19190         for (i=0; i<8; i++)
19191             r->z[i] = p->z[i];
19192         r->infinity = p->infinity;
19193     }
19194 
19195     /* T1 = Z * Z */
19196     sp_256_mont_sqr_8(t1, z, p256_mod, p256_mp_mod);
19197     /* Z = Y * Z */
19198     sp_256_mont_mul_8(z, y, z, p256_mod, p256_mp_mod);
19199     /* Z = 2Z */
19200     sp_256_mont_dbl_8(z, z, p256_mod);
19201     /* T2 = X - T1 */
19202     sp_256_mont_sub_8(t2, x, t1, p256_mod);
19203     /* T1 = X + T1 */
19204     sp_256_mont_add_8(t1, x, t1, p256_mod);
19205     /* T2 = T1 * T2 */
19206     sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
19207     /* T1 = 3T2 */
19208     sp_256_mont_tpl_8(t1, t2, p256_mod);
19209     /* Y = 2Y */
19210     sp_256_mont_dbl_8(y, y, p256_mod);
19211     /* Y = Y * Y */
19212     sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
19213     /* T2 = Y * Y */
19214     sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
19215     /* T2 = T2/2 */
19216     sp_256_div2_8(t2, t2, p256_mod);
19217     /* Y = Y * X */
19218     sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
19219     /* X = T1 * T1 */
19220     sp_256_mont_mul_8(x, t1, t1, p256_mod, p256_mp_mod);
19221     /* X = X - Y */
19222     sp_256_mont_sub_8(x, x, y, p256_mod);
19223     /* X = X - Y */
19224     sp_256_mont_sub_8(x, x, y, p256_mod);
19225     /* Y = Y - X */
19226     sp_256_mont_sub_8(y, y, x, p256_mod);
19227     /* Y = Y * T1 */
19228     sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
19229     /* Y = Y - T2 */
19230     sp_256_mont_sub_8(y, y, t2, p256_mod);
19231 
19232 }
19233 
19234 #ifdef WOLFSSL_SP_SMALL
19235 /* Sub b from a into r. (r = a - b)
19236  *
19237  * r  A single precision integer.
19238  * a  A single precision integer.
19239  * b  A single precision integer.
19240  */
19241 static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
19242         const sp_digit* b)
19243 {
19244     sp_digit c = 0;
19245 
19246     __asm__ __volatile__ (
19247         "add    r12, %[a], #32\n\t"
19248         "\n1:\n\t"
19249         "rsbs   %[c], %[c], #0\n\t"
19250         "ldr    r4, [%[a]], #4\n\t"
19251         "ldr    r5, [%[a]], #4\n\t"
19252         "ldr    r6, [%[a]], #4\n\t"
19253         "ldr    r7, [%[a]], #4\n\t"
19254         "ldr    r8, [%[b]], #4\n\t"
19255         "ldr    r9, [%[b]], #4\n\t"
19256         "ldr    r10, [%[b]], #4\n\t"
19257         "ldr    r14, [%[b]], #4\n\t"
19258         "sbcs   r4, r4, r8\n\t"
19259         "sbcs   r5, r5, r9\n\t"
19260         "sbcs   r6, r6, r10\n\t"
19261         "sbcs   r7, r7, r14\n\t"
19262         "str    r4, [%[r]], #4\n\t"
19263         "str    r5, [%[r]], #4\n\t"
19264         "str    r6, [%[r]], #4\n\t"
19265         "str    r7, [%[r]], #4\n\t"
19266         "sbc    %[c], r4, r4\n\t"
19267         "cmp    %[a], r12\n\t"
19268         "bne    1b\n\t"
19269         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
19270         :
19271         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
19272     );
19273 
19274     return c;
19275 }
19276 
19277 #else
19278 /* Sub b from a into r. (r = a - b)
19279  *
19280  * r  A single precision integer.
19281  * a  A single precision integer.
19282  * b  A single precision integer.
19283  */
19284 static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
19285         const sp_digit* b)
19286 {
19287     sp_digit c = 0;
19288 
19289     __asm__ __volatile__ (
19290         "ldr    r3, [%[a], #0]\n\t"
19291         "ldr    r4, [%[a], #4]\n\t"
19292         "ldr    r5, [%[a], #8]\n\t"
19293         "ldr    r6, [%[a], #12]\n\t"
19294         "ldr    r7, [%[b], #0]\n\t"
19295         "ldr    r8, [%[b], #4]\n\t"
19296         "ldr    r9, [%[b], #8]\n\t"
19297         "ldr    r10, [%[b], #12]\n\t"
19298         "subs   r3, r3, r7\n\t"
19299         "sbcs   r4, r4, r8\n\t"
19300         "sbcs   r5, r5, r9\n\t"
19301         "sbcs   r6, r6, r10\n\t"
19302         "str    r3, [%[r], #0]\n\t"
19303         "str    r4, [%[r], #4]\n\t"
19304         "str    r5, [%[r], #8]\n\t"
19305         "str    r6, [%[r], #12]\n\t"
19306         "ldr    r3, [%[a], #16]\n\t"
19307         "ldr    r4, [%[a], #20]\n\t"
19308         "ldr    r5, [%[a], #24]\n\t"
19309         "ldr    r6, [%[a], #28]\n\t"
19310         "ldr    r7, [%[b], #16]\n\t"
19311         "ldr    r8, [%[b], #20]\n\t"
19312         "ldr    r9, [%[b], #24]\n\t"
19313         "ldr    r10, [%[b], #28]\n\t"
19314         "sbcs   r3, r3, r7\n\t"
19315         "sbcs   r4, r4, r8\n\t"
19316         "sbcs   r5, r5, r9\n\t"
19317         "sbcs   r6, r6, r10\n\t"
19318         "str    r3, [%[r], #16]\n\t"
19319         "str    r4, [%[r], #20]\n\t"
19320         "str    r5, [%[r], #24]\n\t"
19321         "str    r6, [%[r], #28]\n\t"
19322         "sbc    %[c], %[c], #0\n\t"
19323         : [c] "+r" (c)
19324         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
19325         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
19326     );
19327 
19328     return c;
19329 }
19330 
19331 #endif /* WOLFSSL_SP_SMALL */
19332 /* Compare two numbers to determine if they are equal.
19333  * Constant time implementation.
19334  *
19335  * a  First number to compare.
19336  * b  Second number to compare.
19337  * returns 1 when equal and 0 otherwise.
19338  */
19339 static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
19340 {
19341     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
19342             (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
19343 }
19344 
19345 /* Add two Montgomery form projective points.
19346  *
19347  * r  Result of addition.
19348  * p  Frist point to add.
19349  * q  Second point to add.
19350  * t  Temporary ordinate data.
19351  */
19352 static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q,
19353         sp_digit* t)
19354 {
19355     sp_point *ap[2];
19356     sp_point *rp[2];
19357     sp_point tp;
19358     sp_digit* t1 = t;
19359     sp_digit* t2 = t + 2*8;
19360     sp_digit* t3 = t + 4*8;
19361     sp_digit* t4 = t + 6*8;
19362     sp_digit* t5 = t + 8*8;
19363     sp_digit* x;
19364     sp_digit* y;
19365     sp_digit* z;
19366     int i;
19367 
19368     /* Ensure only the first point is the same as the result. */
19369     if (q == r) {
19370         sp_point* a = p;
19371         p = q;
19372         q = a;
19373     }
19374 
19375     /* Check double */
19376     sp_256_sub_8(t1, p256_mod, q->y);
19377     sp_256_norm_8(t1);
19378     if (sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
19379         (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) {
19380         sp_256_proj_point_dbl_8(r, p, t);
19381     }
19382     else {
19383         rp[0] = r;
19384         rp[1] = &tp;
19385         XMEMSET(&tp, 0, sizeof(tp));
19386         x = rp[p->infinity | q->infinity]->x;
19387         y = rp[p->infinity | q->infinity]->y;
19388         z = rp[p->infinity | q->infinity]->z;
19389 
19390         ap[0] = p;
19391         ap[1] = q;
19392         for (i=0; i<8; i++)
19393             r->x[i] = ap[p->infinity]->x[i];
19394         for (i=0; i<8; i++)
19395             r->y[i] = ap[p->infinity]->y[i];
19396         for (i=0; i<8; i++)
19397             r->z[i] = ap[p->infinity]->z[i];
19398         r->infinity = ap[p->infinity]->infinity;
19399 
19400         /* U1 = X1*Z2^2 */
19401         sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
19402         sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
19403         sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
19404         /* U2 = X2*Z1^2 */
19405         sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
19406         sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
19407         sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
19408         /* S1 = Y1*Z2^3 */
19409         sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
19410         /* S2 = Y2*Z1^3 */
19411         sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
19412         /* H = U2 - U1 */
19413         sp_256_mont_sub_8(t2, t2, t1, p256_mod);
19414         /* R = S2 - S1 */
19415         sp_256_mont_sub_8(t4, t4, t3, p256_mod);
19416         /* Z3 = H*Z1*Z2 */
19417         sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
19418         sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
19419         /* X3 = R^2 - H^3 - 2*U1*H^2 */
19420         sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
19421         sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
19422         sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
19423         sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
19424         sp_256_mont_sub_8(x, x, t5, p256_mod);
19425         sp_256_mont_dbl_8(t1, y, p256_mod);
19426         sp_256_mont_sub_8(x, x, t1, p256_mod);
19427         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
19428         sp_256_mont_sub_8(y, y, x, p256_mod);
19429         sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
19430         sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
19431         sp_256_mont_sub_8(y, y, t5, p256_mod);
19432     }
19433 }
19434 
19435 /* Multiply the point by the scalar and return the result.
19436  * If map is true then convert result to affine co-ordinates.
19437  *
19438  * r     Resulting point.
19439  * g     Point to multiply.
19440  * k     Scalar to multiply by.
19441  * map   Indicates whether to convert result to affine.
19442  * heap  Heap to use for allocation.
19443  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
19444  */
19445 static int sp_256_ecc_mulmod_fast_8(sp_point* r, sp_point* g, sp_digit* k,
19446         int map, void* heap)
19447 {
19448 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
19449     sp_point td[16];
19450     sp_point rtd;
19451     sp_digit tmpd[2 * 8 * 5];
19452 #endif
19453     sp_point* t;
19454     sp_point* rt;
19455     sp_digit* tmp;
19456     sp_digit n;
19457     int i;
19458     int c, y;
19459     int err;
19460 
19461     (void)heap;
19462 
19463     err = sp_ecc_point_new(heap, rtd, rt);
19464 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
19465     t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
19466     if (t == NULL)
19467         err = MEMORY_E;
19468     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
19469                              DYNAMIC_TYPE_ECC);
19470     if (tmp == NULL)
19471         err = MEMORY_E;
19472 #else
19473     t = td;
19474     tmp = tmpd;
19475 #endif
19476 
19477     if (err == MP_OKAY) {
19478         /* t[0] = {0, 0, 1} * norm */
19479         XMEMSET(&t[0], 0, sizeof(t[0]));
19480         t[0].infinity = 1;
19481         /* t[1] = {g->x, g->y, g->z} * norm */
19482         sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
19483         sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
19484         sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
19485         t[1].infinity = 0;
19486         sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
19487         t[ 2].infinity = 0;
19488         sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
19489         t[ 3].infinity = 0;
19490         sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
19491         t[ 4].infinity = 0;
19492         sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
19493         t[ 5].infinity = 0;
19494         sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
19495         t[ 6].infinity = 0;
19496         sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
19497         t[ 7].infinity = 0;
19498         sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
19499         t[ 8].infinity = 0;
19500         sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
19501         t[ 9].infinity = 0;
19502         sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
19503         t[10].infinity = 0;
19504         sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
19505         t[11].infinity = 0;
19506         sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
19507         t[12].infinity = 0;
19508         sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
19509         t[13].infinity = 0;
19510         sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
19511         t[14].infinity = 0;
19512         sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
19513         t[15].infinity = 0;
19514 
19515         i = 6;
19516         n = k[i+1] << 0;
19517         c = 28;
19518         y = n >> 28;
19519         XMEMCPY(rt, &t[y], sizeof(sp_point));
19520         n <<= 4;
19521         for (; i>=0 || c>=4; ) {
19522             if (c < 4) {
19523                 n |= k[i--] << (0 - c);
19524                 c += 32;
19525             }
19526             y = (n >> 28) & 0xf;
19527             n <<= 4;
19528             c -= 4;
19529 
19530             sp_256_proj_point_dbl_8(rt, rt, tmp);
19531             sp_256_proj_point_dbl_8(rt, rt, tmp);
19532             sp_256_proj_point_dbl_8(rt, rt, tmp);
19533             sp_256_proj_point_dbl_8(rt, rt, tmp);
19534 
19535             sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
19536         }
19537 
19538         if (map)
19539             sp_256_map_8(r, rt, tmp);
19540         else
19541             XMEMCPY(r, rt, sizeof(sp_point));
19542     }
19543 
19544 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
19545     if (tmp != NULL) {
19546         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 8 * 5);
19547         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
19548     }
19549     if (t != NULL) {
19550         XMEMSET(t, 0, sizeof(sp_point) * 16);
19551         XFREE(t, heap, DYNAMIC_TYPE_ECC);
19552     }
19553 #else
19554     ForceZero(tmpd, sizeof(tmpd));
19555     ForceZero(td, sizeof(td));
19556 #endif
19557     sp_ecc_point_free(rt, 1, heap);
19558 
19559     return err;
19560 }
19561 
19562 /* A table entry for pre-computed points. */
19563 typedef struct sp_table_entry {
19564     sp_digit x[8];
19565     sp_digit y[8];
19566     byte infinity;
19567 } sp_table_entry;
19568 
19569 #ifdef FP_ECC
19570 /* Double the Montgomery form projective point p a number of times.
19571  *
19572  * r  Result of repeated doubling of point.
19573  * p  Point to double.
19574  * n  Number of times to double
19575  * t  Temporary ordinate data.
19576  */
19577 static void sp_256_proj_point_dbl_n_8(sp_point* r, sp_point* p, int n,
19578         sp_digit* t)
19579 {
19580     sp_point *rp[2];
19581     sp_point tp;
19582     sp_digit* w = t;
19583     sp_digit* a = t + 2*8;
19584     sp_digit* b = t + 4*8;
19585     sp_digit* t1 = t + 6*8;
19586     sp_digit* t2 = t + 8*8;
19587     sp_digit* x;
19588     sp_digit* y;
19589     sp_digit* z;
19590     int i;
19591 
19592     rp[0] = r;
19593     rp[1] = &tp;
19594     x = rp[p->infinity]->x;
19595     y = rp[p->infinity]->y;
19596     z = rp[p->infinity]->z;
19597     if (r != p) {
19598         for (i=0; i<8; i++)
19599             r->x[i] = p->x[i];
19600         for (i=0; i<8; i++)
19601             r->y[i] = p->y[i];
19602         for (i=0; i<8; i++)
19603             r->z[i] = p->z[i];
19604         r->infinity = p->infinity;
19605     }
19606 
19607     /* Y = 2*Y */
19608     sp_256_mont_dbl_8(y, y, p256_mod);
19609     /* W = Z^4 */
19610     sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
19611     sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
19612     while (n--) {
19613         /* A = 3*(X^2 - W) */
19614         sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
19615         sp_256_mont_sub_8(t1, t1, w, p256_mod);
19616         sp_256_mont_tpl_8(a, t1, p256_mod);
19617         /* B = X*Y^2 */
19618         sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
19619         sp_256_mont_mul_8(b, t2, x, p256_mod, p256_mp_mod);
19620         /* X = A^2 - 2B */
19621         sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
19622         sp_256_mont_dbl_8(t1, b, p256_mod);
19623         sp_256_mont_sub_8(x, x, t1, p256_mod);
19624         /* Z = Z*Y */
19625         sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
19626         /* t2 = Y^4 */
19627         sp_256_mont_sqr_8(t2, t2, p256_mod, p256_mp_mod);
19628         if (n) {
19629             /* W = W*Y^4 */
19630             sp_256_mont_mul_8(w, w, t2, p256_mod, p256_mp_mod);
19631         }
19632         /* y = 2*A*(B - X) - Y^4 */
19633         sp_256_mont_sub_8(y, b, x, p256_mod);
19634         sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
19635         sp_256_mont_dbl_8(y, y, p256_mod);
19636         sp_256_mont_sub_8(y, y, t2, p256_mod);
19637     }
19638     /* Y = Y/2 */
19639     sp_256_div2_8(y, y, p256_mod);
19640 }
19641 
19642 #endif /* FP_ECC */
19643 /* Add two Montgomery form projective points. The second point has a q value of
19644  * one.
19645  * Only the first point can be the same pointer as the result point.
19646  *
19647  * r  Result of addition.
19648  * p  Frist point to add.
19649  * q  Second point to add.
19650  * t  Temporary ordinate data.
19651  */
19652 static void sp_256_proj_point_add_qz1_8(sp_point* r, sp_point* p,
19653         sp_point* q, sp_digit* t)
19654 {
19655     sp_point *ap[2];
19656     sp_point *rp[2];
19657     sp_point tp;
19658     sp_digit* t1 = t;
19659     sp_digit* t2 = t + 2*8;
19660     sp_digit* t3 = t + 4*8;
19661     sp_digit* t4 = t + 6*8;
19662     sp_digit* t5 = t + 8*8;
19663     sp_digit* x;
19664     sp_digit* y;
19665     sp_digit* z;
19666     int i;
19667 
19668     /* Check double */
19669     sp_256_sub_8(t1, p256_mod, q->y);
19670     sp_256_norm_8(t1);
19671     if (sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
19672         (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) {
19673         sp_256_proj_point_dbl_8(r, p, t);
19674     }
19675     else {
19676         rp[0] = r;
19677         rp[1] = &tp;
19678         XMEMSET(&tp, 0, sizeof(tp));
19679         x = rp[p->infinity | q->infinity]->x;
19680         y = rp[p->infinity | q->infinity]->y;
19681         z = rp[p->infinity | q->infinity]->z;
19682 
19683         ap[0] = p;
19684         ap[1] = q;
19685         for (i=0; i<8; i++)
19686             r->x[i] = ap[p->infinity]->x[i];
19687         for (i=0; i<8; i++)
19688             r->y[i] = ap[p->infinity]->y[i];
19689         for (i=0; i<8; i++)
19690             r->z[i] = ap[p->infinity]->z[i];
19691         r->infinity = ap[p->infinity]->infinity;
19692 
19693         /* U2 = X2*Z1^2 */
19694         sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
19695         sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
19696         sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
19697         /* S2 = Y2*Z1^3 */
19698         sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
19699         /* H = U2 - X1 */
19700         sp_256_mont_sub_8(t2, t2, x, p256_mod);
19701         /* R = S2 - Y1 */
19702         sp_256_mont_sub_8(t4, t4, y, p256_mod);
19703         /* Z3 = H*Z1 */
19704         sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
19705         /* X3 = R^2 - H^3 - 2*X1*H^2 */
19706         sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
19707         sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
19708         sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
19709         sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
19710         sp_256_mont_sub_8(x, t1, t5, p256_mod);
19711         sp_256_mont_dbl_8(t1, t3, p256_mod);
19712         sp_256_mont_sub_8(x, x, t1, p256_mod);
19713         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
19714         sp_256_mont_sub_8(t3, t3, x, p256_mod);
19715         sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
19716         sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
19717         sp_256_mont_sub_8(y, t3, t5, p256_mod);
19718     }
19719 }
19720 
19721 #ifdef WOLFSSL_SP_SMALL
19722 #ifdef FP_ECC
19723 /* Convert the projective point to affine.
19724  * Ordinates are in Montgomery form.
19725  *
19726  * a  Point to convert.
19727  * t  Temprorary data.
19728  */
19729 static void sp_256_proj_to_affine_8(sp_point* a, sp_digit* t)
19730 {
19731     sp_digit* t1 = t;
19732     sp_digit* t2 = t + 2 * 8;
19733     sp_digit* tmp = t + 4 * 8;
19734 
19735     sp_256_mont_inv_8(t1, a->z, tmp);
19736 
19737     sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
19738     sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
19739 
19740     sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
19741     sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
19742     XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
19743 }
19744 
19745 /* Generate the pre-computed table of points for the base point.
19746  *
19747  * a      The base point.
19748  * table  Place to store generated point data.
19749  * tmp    Temprorary data.
19750  * heap  Heap to use for allocation.
19751  */
19752 static int sp_256_gen_stripe_table_8(sp_point* a,
19753         sp_table_entry* table, sp_digit* tmp, void* heap)
19754 {
19755 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
19756     sp_point td, s1d, s2d;
19757 #endif
19758     sp_point* t;
19759     sp_point* s1 = NULL;
19760     sp_point* s2 = NULL;
19761     int i, j;
19762     int err;
19763 
19764     (void)heap;
19765 
19766     err = sp_ecc_point_new(heap, td, t);
19767     if (err == MP_OKAY)
19768         err = sp_ecc_point_new(heap, s1d, s1);
19769     if (err == MP_OKAY)
19770         err = sp_ecc_point_new(heap, s2d, s2);
19771 
19772     if (err == MP_OKAY)
19773         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
19774     if (err == MP_OKAY)
19775         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
19776     if (err == MP_OKAY)
19777         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
19778     if (err == MP_OKAY) {
19779         t->infinity = 0;
19780         sp_256_proj_to_affine_8(t, tmp);
19781 
19782         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
19783         s1->infinity = 0;
19784         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
19785         s2->infinity = 0;
19786 
19787         /* table[0] = {0, 0, infinity} */
19788         XMEMSET(&table[0], 0, sizeof(sp_table_entry));
19789         table[0].infinity = 1;
19790         /* table[1] = Affine version of 'a' in Montgomery form */
19791         XMEMCPY(table[1].x, t->x, sizeof(table->x));
19792         XMEMCPY(table[1].y, t->y, sizeof(table->y));
19793         table[1].infinity = 0;
19794 
19795         for (i=1; i<4; i++) {
19796             sp_256_proj_point_dbl_n_8(t, t, 64, tmp);
19797             sp_256_proj_to_affine_8(t, tmp);
19798             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
19799             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
19800             table[1<<i].infinity = 0;
19801         }
19802 
19803         for (i=1; i<4; i++) {
19804             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
19805             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
19806             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
19807                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
19808                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
19809                 sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
19810                 sp_256_proj_to_affine_8(t, tmp);
19811                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
19812                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
19813                 table[j].infinity = 0;
19814             }
19815         }
19816     }
19817 
19818     sp_ecc_point_free(s2, 0, heap);
19819     sp_ecc_point_free(s1, 0, heap);
19820     sp_ecc_point_free( t, 0, heap);
19821 
19822     return err;
19823 }
19824 
19825 #endif /* FP_ECC */
19826 /* Multiply the point by the scalar and return the result.
19827  * If map is true then convert result to affine co-ordinates.
19828  *
19829  * r     Resulting point.
19830  * k     Scalar to multiply by.
19831  * map   Indicates whether to convert result to affine.
19832  * heap  Heap to use for allocation.
19833  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
19834  */
19835 static int sp_256_ecc_mulmod_stripe_8(sp_point* r, sp_point* g,
19836         sp_table_entry* table, sp_digit* k, int map, void* heap)
19837 {
19838 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
19839     sp_point rtd;
19840     sp_point pd;
19841     sp_digit td[2 * 8 * 5];
19842 #endif
19843     sp_point* rt;
19844     sp_point* p = NULL;
19845     sp_digit* t;
19846     int i, j;
19847     int y, x;
19848     int err;
19849 
19850     (void)g;
19851     (void)heap;
19852 
19853     err = sp_ecc_point_new(heap, rtd, rt);
19854     if (err == MP_OKAY)
19855         err = sp_ecc_point_new(heap, pd, p);
19856 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
19857     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
19858                            DYNAMIC_TYPE_ECC);
19859     if (t == NULL)
19860         err = MEMORY_E;
19861 #else
19862     t = td;
19863 #endif
19864 
19865     if (err == MP_OKAY) {
19866         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
19867         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
19868 
19869         y = 0;
19870         for (j=0,x=63; j<4; j++,x+=32)
19871             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
19872         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
19873         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
19874         rt->infinity = table[y].infinity;
19875         for (i=62; i>=0; i--) {
19876             y = 0;
19877             for (j=0,x=i; j<4; j++,x+=64)
19878                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
19879 
19880             sp_256_proj_point_dbl_8(rt, rt, t);
19881             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
19882             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
19883             p->infinity = table[y].infinity;
19884             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
19885         }
19886 
19887         if (map)
19888             sp_256_map_8(r, rt, t);
19889         else
19890             XMEMCPY(r, rt, sizeof(sp_point));
19891     }
19892 
19893 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
19894     if (t != NULL)
19895         XFREE(t, heap, DYNAMIC_TYPE_ECC);
19896 #endif
19897     sp_ecc_point_free(p, 0, heap);
19898     sp_ecc_point_free(rt, 0, heap);
19899 
19900     return err;
19901 }
19902 
19903 #ifdef FP_ECC
19904 #ifndef FP_ENTRIES
19905     #define FP_ENTRIES 16
19906 #endif
19907 
19908 typedef struct sp_cache_t {
19909     sp_digit x[8];
19910     sp_digit y[8];
19911     sp_table_entry table[16];
19912     uint32_t cnt;
19913     int set;
19914 } sp_cache_t;
19915 
19916 static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
19917 static THREAD_LS_T int sp_cache_last = -1;
19918 static THREAD_LS_T int sp_cache_inited = 0;
19919 
19920 #ifndef HAVE_THREAD_LS
19921     static volatile int initCacheMutex = 0;
19922     static wolfSSL_Mutex sp_cache_lock;
19923 #endif
19924 
19925 static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
19926 {
19927     int i, j;
19928     uint32_t least;
19929 
19930     if (sp_cache_inited == 0) {
19931         for (i=0; i<FP_ENTRIES; i++) {
19932             sp_cache[i].set = 0;
19933         }
19934         sp_cache_inited = 1;
19935     }
19936 
19937     /* Compare point with those in cache. */
19938     for (i=0; i<FP_ENTRIES; i++) {
19939         if (!sp_cache[i].set)
19940             continue;
19941 
19942         if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & 
19943                            sp_256_cmp_equal_8(g->y, sp_cache[i].y)) {
19944             sp_cache[i].cnt++;
19945             break;
19946         }
19947     }
19948 
19949     /* No match. */
19950     if (i == FP_ENTRIES) {
19951         /* Find empty entry. */
19952         i = (sp_cache_last + 1) % FP_ENTRIES;
19953         for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
19954             if (!sp_cache[i].set) {
19955                 break;
19956             }
19957         }
19958 
19959         /* Evict least used. */
19960         if (i == sp_cache_last) {
19961             least = sp_cache[0].cnt;
19962             for (j=1; j<FP_ENTRIES; j++) {
19963                 if (sp_cache[j].cnt < least) {
19964                     i = j;
19965                     least = sp_cache[i].cnt;
19966                 }
19967             }
19968         }
19969 
19970         XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
19971         XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
19972         sp_cache[i].set = 1;
19973         sp_cache[i].cnt = 1;
19974     }
19975 
19976     *cache = &sp_cache[i];
19977     sp_cache_last = i;
19978 }
19979 #endif /* FP_ECC */
19980 
19981 /* Multiply the base point of P256 by the scalar and return the result.
19982  * If map is true then convert result to affine co-ordinates.
19983  *
19984  * r     Resulting point.
19985  * g     Point to multiply.
19986  * k     Scalar to multiply by.
19987  * map   Indicates whether to convert result to affine.
19988  * heap  Heap to use for allocation.
19989  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
19990  */
19991 static int sp_256_ecc_mulmod_8(sp_point* r, sp_point* g, sp_digit* k,
19992         int map, void* heap)
19993 {
19994 #ifndef FP_ECC
19995     return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
19996 #else
19997     sp_digit tmp[2 * 8 * 5];
19998     sp_cache_t* cache;
19999     int err = MP_OKAY;
20000 
20001 #ifndef HAVE_THREAD_LS
20002     if (initCacheMutex == 0) {
20003          wc_InitMutex(&sp_cache_lock);
20004          initCacheMutex = 1;
20005     }
20006     if (wc_LockMutex(&sp_cache_lock) != 0)
20007        err = BAD_MUTEX_E;
20008 #endif /* HAVE_THREAD_LS */
20009 
20010     if (err == MP_OKAY) {
20011         sp_ecc_get_cache(g, &cache);
20012         if (cache->cnt == 2)
20013             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
20014 
20015 #ifndef HAVE_THREAD_LS
20016         wc_UnLockMutex(&sp_cache_lock);
20017 #endif /* HAVE_THREAD_LS */
20018 
20019         if (cache->cnt < 2) {
20020             err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
20021         }
20022         else {
20023             err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
20024                     map, heap);
20025         }
20026     }
20027 
20028     return err;
20029 #endif
20030 }
20031 
20032 #else
20033 #ifdef FP_ECC
20034 /* Generate the pre-computed table of points for the base point.
20035  *
20036  * a      The base point.
20037  * table  Place to store generated point data.
20038  * tmp    Temprorary data.
20039  * heap  Heap to use for allocation.
20040  */
20041 static int sp_256_gen_stripe_table_8(sp_point* a,
20042         sp_table_entry* table, sp_digit* tmp, void* heap)
20043 {
20044 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
20045     sp_point td, s1d, s2d;
20046 #endif
20047     sp_point* t;
20048     sp_point* s1 = NULL;
20049     sp_point* s2 = NULL;
20050     int i, j;
20051     int err;
20052 
20053     (void)heap;
20054 
20055     err = sp_ecc_point_new(heap, td, t);
20056     if (err == MP_OKAY)
20057         err = sp_ecc_point_new(heap, s1d, s1);
20058     if (err == MP_OKAY)
20059         err = sp_ecc_point_new(heap, s2d, s2);
20060 
20061     if (err == MP_OKAY)
20062         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
20063     if (err == MP_OKAY)
20064         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
20065     if (err == MP_OKAY)
20066         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
20067     if (err == MP_OKAY) {
20068         t->infinity = 0;
20069         sp_256_proj_to_affine_8(t, tmp);
20070 
20071         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
20072         s1->infinity = 0;
20073         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
20074         s2->infinity = 0;
20075 
20076         /* table[0] = {0, 0, infinity} */
20077         XMEMSET(&table[0], 0, sizeof(sp_table_entry));
20078         table[0].infinity = 1;
20079         /* table[1] = Affine version of 'a' in Montgomery form */
20080         XMEMCPY(table[1].x, t->x, sizeof(table->x));
20081         XMEMCPY(table[1].y, t->y, sizeof(table->y));
20082         table[1].infinity = 0;
20083 
20084         for (i=1; i<8; i++) {
20085             sp_256_proj_point_dbl_n_8(t, t, 32, tmp);
20086             sp_256_proj_to_affine_8(t, tmp);
20087             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
20088             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
20089             table[1<<i].infinity = 0;
20090         }
20091 
20092         for (i=1; i<8; i++) {
20093             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
20094             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
20095             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
20096                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
20097                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
20098                 sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
20099                 sp_256_proj_to_affine_8(t, tmp);
20100                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
20101                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
20102                 table[j].infinity = 0;
20103             }
20104         }
20105     }
20106 
20107     sp_ecc_point_free(s2, 0, heap);
20108     sp_ecc_point_free(s1, 0, heap);
20109     sp_ecc_point_free( t, 0, heap);
20110 
20111     return err;
20112 }
20113 
20114 #endif /* FP_ECC */
20115 /* Multiply the point by the scalar and return the result.
20116  * If map is true then convert result to affine co-ordinates.
20117  *
20118  * r     Resulting point.
20119  * k     Scalar to multiply by.
20120  * map   Indicates whether to convert result to affine.
20121  * heap  Heap to use for allocation.
20122  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20123  */
20124 static int sp_256_ecc_mulmod_stripe_8(sp_point* r, sp_point* g,
20125         sp_table_entry* table, sp_digit* k, int map, void* heap)
20126 {
20127 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
20128     sp_point rtd;
20129     sp_point pd;
20130     sp_digit td[2 * 8 * 5];
20131 #endif
20132     sp_point* rt;
20133     sp_point* p = NULL;
20134     sp_digit* t;
20135     int i, j;
20136     int y, x;
20137     int err;
20138 
20139     (void)g;
20140     (void)heap;
20141 
20142     err = sp_ecc_point_new(heap, rtd, rt);
20143     if (err == MP_OKAY)
20144         err = sp_ecc_point_new(heap, pd, p);
20145 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
20146     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
20147                            DYNAMIC_TYPE_ECC);
20148     if (t == NULL)
20149         err = MEMORY_E;
20150 #else
20151     t = td;
20152 #endif
20153 
20154     if (err == MP_OKAY) {
20155         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
20156         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
20157 
20158         y = 0;
20159         for (j=0,x=31; j<8; j++,x+=32)
20160             y |= ((k[x / 32] >> (x % 32)) & 1) << j;
20161         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
20162         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
20163         rt->infinity = table[y].infinity;
20164         for (i=30; i>=0; i--) {
20165             y = 0;
20166             for (j=0,x=i; j<8; j++,x+=32)
20167                 y |= ((k[x / 32] >> (x % 32)) & 1) << j;
20168 
20169             sp_256_proj_point_dbl_8(rt, rt, t);
20170             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
20171             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
20172             p->infinity = table[y].infinity;
20173             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
20174         }
20175 
20176         if (map)
20177             sp_256_map_8(r, rt, t);
20178         else
20179             XMEMCPY(r, rt, sizeof(sp_point));
20180     }
20181 
20182 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
20183     if (t != NULL)
20184         XFREE(t, heap, DYNAMIC_TYPE_ECC);
20185 #endif
20186     sp_ecc_point_free(p, 0, heap);
20187     sp_ecc_point_free(rt, 0, heap);
20188 
20189     return err;
20190 }
20191 
20192 #ifdef FP_ECC
20193 #ifndef FP_ENTRIES
20194     #define FP_ENTRIES 16
20195 #endif
20196 
20197 typedef struct sp_cache_t {
20198     sp_digit x[8];
20199     sp_digit y[8];
20200     sp_table_entry table[256];
20201     uint32_t cnt;
20202     int set;
20203 } sp_cache_t;
20204 
20205 static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
20206 static THREAD_LS_T int sp_cache_last = -1;
20207 static THREAD_LS_T int sp_cache_inited = 0;
20208 
20209 #ifndef HAVE_THREAD_LS
20210     static volatile int initCacheMutex = 0;
20211     static wolfSSL_Mutex sp_cache_lock;
20212 #endif
20213 
20214 static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache)
20215 {
20216     int i, j;
20217     uint32_t least;
20218 
20219     if (sp_cache_inited == 0) {
20220         for (i=0; i<FP_ENTRIES; i++) {
20221             sp_cache[i].set = 0;
20222         }
20223         sp_cache_inited = 1;
20224     }
20225 
20226     /* Compare point with those in cache. */
20227     for (i=0; i<FP_ENTRIES; i++) {
20228         if (!sp_cache[i].set)
20229             continue;
20230 
20231         if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & 
20232                            sp_256_cmp_equal_8(g->y, sp_cache[i].y)) {
20233             sp_cache[i].cnt++;
20234             break;
20235         }
20236     }
20237 
20238     /* No match. */
20239     if (i == FP_ENTRIES) {
20240         /* Find empty entry. */
20241         i = (sp_cache_last + 1) % FP_ENTRIES;
20242         for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
20243             if (!sp_cache[i].set) {
20244                 break;
20245             }
20246         }
20247 
20248         /* Evict least used. */
20249         if (i == sp_cache_last) {
20250             least = sp_cache[0].cnt;
20251             for (j=1; j<FP_ENTRIES; j++) {
20252                 if (sp_cache[j].cnt < least) {
20253                     i = j;
20254                     least = sp_cache[i].cnt;
20255                 }
20256             }
20257         }
20258 
20259         XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
20260         XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
20261         sp_cache[i].set = 1;
20262         sp_cache[i].cnt = 1;
20263     }
20264 
20265     *cache = &sp_cache[i];
20266     sp_cache_last = i;
20267 }
20268 #endif /* FP_ECC */
20269 
20270 /* Multiply the base point of P256 by the scalar and return the result.
20271  * If map is true then convert result to affine co-ordinates.
20272  *
20273  * r     Resulting point.
20274  * g     Point to multiply.
20275  * k     Scalar to multiply by.
20276  * map   Indicates whether to convert result to affine.
20277  * heap  Heap to use for allocation.
20278  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20279  */
20280 static int sp_256_ecc_mulmod_8(sp_point* r, sp_point* g, sp_digit* k,
20281         int map, void* heap)
20282 {
20283 #ifndef FP_ECC
20284     return sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
20285 #else
20286     sp_digit tmp[2 * 8 * 5];
20287     sp_cache_t* cache;
20288     int err = MP_OKAY;
20289 
20290 #ifndef HAVE_THREAD_LS
20291     if (initCacheMutex == 0) {
20292          wc_InitMutex(&sp_cache_lock);
20293          initCacheMutex = 1;
20294     }
20295     if (wc_LockMutex(&sp_cache_lock) != 0)
20296        err = BAD_MUTEX_E;
20297 #endif /* HAVE_THREAD_LS */
20298 
20299     if (err == MP_OKAY) {
20300         sp_ecc_get_cache(g, &cache);
20301         if (cache->cnt == 2)
20302             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
20303 
20304 #ifndef HAVE_THREAD_LS
20305         wc_UnLockMutex(&sp_cache_lock);
20306 #endif /* HAVE_THREAD_LS */
20307 
20308         if (cache->cnt < 2) {
20309             err = sp_256_ecc_mulmod_fast_8(r, g, k, map, heap);
20310         }
20311         else {
20312             err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
20313                     map, heap);
20314         }
20315     }
20316 
20317     return err;
20318 #endif
20319 }
20320 
20321 #endif /* WOLFSSL_SP_SMALL */
20322 /* Multiply the point by the scalar and return the result.
20323  * If map is true then convert result to affine co-ordinates.
20324  *
20325  * km    Scalar to multiply by.
20326  * p     Point to multiply.
20327  * r     Resulting point.
20328  * map   Indicates whether to convert result to affine.
20329  * heap  Heap to use for allocation.
20330  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20331  */
20332 int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map,
20333         void* heap)
20334 {
20335 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
20336     sp_point p;
20337     sp_digit kd[8];
20338 #endif
20339     sp_point* point;
20340     sp_digit* k = NULL;
20341     int err = MP_OKAY;
20342 #ifdef HAVE_INTEL_AVX2
20343     word32 cpuid_flags = cpuid_get_flags();
20344 #endif
20345 
20346     err = sp_ecc_point_new(heap, p, point);
20347 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
20348     if (err == MP_OKAY) {
20349         k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
20350         if (k == NULL)
20351             err = MEMORY_E;
20352     }
20353 #else
20354     k = kd;
20355 #endif
20356     if (err == MP_OKAY) {
20357         sp_256_from_mp(k, 8, km);
20358         sp_256_point_from_ecc_point_8(point, gm);
20359 
20360 #ifdef HAVE_INTEL_AVX2
20361         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
20362             err = sp_256_ecc_mulmod_avx2_8(point, point, k, map, heap);
20363         else
20364 #endif
20365             err = sp_256_ecc_mulmod_8(point, point, k, map, heap);
20366     }
20367     if (err == MP_OKAY)
20368         err = sp_256_point_to_ecc_point_8(point, r);
20369 
20370 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
20371     if (k != NULL)
20372         XFREE(k, heap, DYNAMIC_TYPE_ECC);
20373 #endif
20374     sp_ecc_point_free(point, 0, heap);
20375 
20376     return err;
20377 }
20378 
20379 #ifdef WOLFSSL_SP_SMALL
20380 static sp_table_entry p256_table[16] = {
20381     /* 0 */
20382     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
20383       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
20384       1 },
20385     /* 1 */
20386     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
20387         0xa53755c6,0x18905f76 },
20388       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
20389         0x25885d85,0x8571ff18 },
20390       0 },
20391     /* 2 */
20392     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
20393         0xfd1b667f,0x2f5e6961 },
20394       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
20395         0x8d6f0f7b,0xf648f916 },
20396       0 },
20397     /* 3 */
20398     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
20399         0x133d0015,0x5abe0285 },
20400       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
20401         0x6b6f7383,0x94bb725b },
20402       0 },
20403     /* 4 */
20404     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
20405         0x21d324f6,0x61d587d4 },
20406       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
20407         0x4621efbe,0xfa11fe12 },
20408       0 },
20409     /* 5 */
20410     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
20411         0x1f13bedc,0x586eb04c },
20412       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
20413         0x70864f11,0x19d5ac08 },
20414       0 },
20415     /* 6 */
20416     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
20417         0xc3b266b1,0xbb6de651 },
20418       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
20419         0x5d18b99b,0x60b4619a },
20420       0 },
20421     /* 7 */
20422     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
20423         0xaeebffcd,0x9d0f27b2 },
20424       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
20425         0x356ec48d,0x244a566d },
20426       0 },
20427     /* 8 */
20428     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
20429         0xcd42ab1b,0x803f3e02 },
20430       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
20431         0x5067adc1,0xc097440e },
20432       0 },
20433     /* 9 */
20434     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
20435         0x915f1f30,0xf1af32d5 },
20436       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
20437         0xe2d41c8b,0x23d0f130 },
20438       0 },
20439     /* 10 */
20440     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
20441         0x7990216a,0x50bbb4d9 },
20442       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
20443         0x01fe49c3,0x2b100118 },
20444       0 },
20445     /* 11 */
20446     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
20447         0x83fbae0c,0xdd558999 },
20448       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
20449         0x149d6041,0xe6e4c551 },
20450       0 },
20451     /* 12 */
20452     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
20453         0xdb7e63af,0xfad27148 },
20454       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
20455         0x9f0e1a84,0x77387de3 },
20456       0 },
20457     /* 13 */
20458     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
20459         0xbef0c47e,0xb37b85c0 },
20460       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
20461         0xf9f628d5,0x9c135ac8 },
20462       0 },
20463     /* 14 */
20464     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
20465         0x91ece900,0xc109f9cb },
20466       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
20467         0x2eee1ee1,0x9bc3344f },
20468       0 },
20469     /* 15 */
20470     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
20471         0x5f1a4cc1,0x29591d52 },
20472       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
20473         0x18ef332c,0x6376551f },
20474       0 },
20475 };
20476 
20477 /* Multiply the base point of P256 by the scalar and return the result.
20478  * If map is true then convert result to affine co-ordinates.
20479  *
20480  * r     Resulting point.
20481  * k     Scalar to multiply by.
20482  * map   Indicates whether to convert result to affine.
20483  * heap  Heap to use for allocation.
20484  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20485  */
20486 static int sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k,
20487         int map, void* heap)
20488 {
20489     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
20490                                       k, map, heap);
20491 }
20492 
20493 #else
20494 static sp_table_entry p256_table[256] = {
20495     /* 0 */
20496     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
20497       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
20498       1 },
20499     /* 1 */
20500     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
20501         0xa53755c6,0x18905f76 },
20502       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
20503         0x25885d85,0x8571ff18 },
20504       0 },
20505     /* 2 */
20506     { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
20507         0xdbdf58e9,0xd953c50d },
20508       { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
20509         0x9eb288f3,0x863ebb7e },
20510       0 },
20511     /* 3 */
20512     { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
20513         0xb5ff80a0,0x00076055 },
20514       { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
20515         0x34373ee0,0x83087761 },
20516       0 },
20517     /* 4 */
20518     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
20519         0xfd1b667f,0x2f5e6961 },
20520       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
20521         0x8d6f0f7b,0xf648f916 },
20522       0 },
20523     /* 5 */
20524     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
20525         0x133d0015,0x5abe0285 },
20526       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
20527         0x6b6f7383,0x94bb725b },
20528       0 },
20529     /* 6 */
20530     { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
20531         0x2f7dc4ef,0xcdd6bbcb },
20532       { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
20533         0x4bdae5f6,0xa361bebd },
20534       0 },
20535     /* 7 */
20536     { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
20537         0xc4b5292c,0xba12ca09 },
20538       { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
20539         0x701fef4b,0x53ebb99d },
20540       0 },
20541     /* 8 */
20542     { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
20543         0x06d54831,0x8589fb92 },
20544       { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
20545         0x02541c4f,0xebb0696d },
20546       0 },
20547     /* 9 */
20548     { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
20549         0xd1b27da3,0xeb2820cb },
20550       { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
20551         0x55a7da1d,0x1f28289b },
20552       0 },
20553     /* 10 */
20554     { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
20555         0x05e54d63,0x337a4b59 },
20556       { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
20557         0xf4c2fbd6,0x0d65e0d5 },
20558       0 },
20559     /* 11 */
20560     { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
20561         0x52f4a232,0xc23da242 },
20562       { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
20563         0xc790cff1,0x19de3b8c },
20564       0 },
20565     /* 12 */
20566     { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
20567         0x91fccbfd,0xe34dcbd4 },
20568       { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
20569         0x7b4e0f7f,0xe7641f44 },
20570       0 },
20571     /* 13 */
20572     { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
20573         0x052a57bf,0x4a12df57 },
20574       { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
20575         0xbb5bea46,0x6af5aa93 },
20576       0 },
20577     /* 14 */
20578     { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
20579         0x66a44013,0x5fe3475a },
20580       { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
20581         0xecfea916,0xb544e308 },
20582       0 },
20583     /* 15 */
20584     { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
20585         0xa6b0c20b,0xe0b6b2bd },
20586       { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
20587         0x25a63774,0x71c023de },
20588       0 },
20589     /* 16 */
20590     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
20591         0x21d324f6,0x61d587d4 },
20592       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
20593         0x4621efbe,0xfa11fe12 },
20594       0 },
20595     /* 17 */
20596     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
20597         0x1f13bedc,0x586eb04c },
20598       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
20599         0x70864f11,0x19d5ac08 },
20600       0 },
20601     /* 18 */
20602     { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
20603         0x7f9c563f,0xe7c0073f },
20604       { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
20605         0xc65b3c0a,0xe08504fe },
20606       0 },
20607     /* 19 */
20608     { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
20609         0x5b0996b4,0x78f01882 },
20610       { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
20611         0x7e94747a,0x43a773b8 },
20612       0 },
20613     /* 20 */
20614     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
20615         0xc3b266b1,0xbb6de651 },
20616       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
20617         0x5d18b99b,0x60b4619a },
20618       0 },
20619     /* 21 */
20620     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
20621         0xaeebffcd,0x9d0f27b2 },
20622       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
20623         0x356ec48d,0x244a566d },
20624       0 },
20625     /* 22 */
20626     { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
20627         0x3581ef69,0x45e58c87 },
20628       { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
20629         0xc1e4b7a4,0xc040e21c },
20630       0 },
20631     /* 23 */
20632     { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
20633         0x682c6ec7,0x1cdf5c97 },
20634       { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
20635         0xa92dff3d,0x046755f8 },
20636       0 },
20637     /* 24 */
20638     { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
20639         0x3b83a5f3,0x046e5e11 },
20640       { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
20641         0x303d005b,0x6e0106c3 },
20642       0 },
20643     /* 25 */
20644     { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
20645         0xe901cf1f,0x442594ed },
20646       { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
20647         0x4c2ee68e,0xa796fa51 },
20648       0 },
20649     /* 26 */
20650     { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
20651         0xc69766e9,0xe4ad2da9 },
20652       { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
20653         0xc37b5143,0xc5e94046 },
20654       0 },
20655     /* 27 */
20656     { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
20657         0xdb464747,0x63283daf },
20658       { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
20659         0x1981a938,0x68bd19ab },
20660       0 },
20661     /* 28 */
20662     { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
20663         0x3c6fdfd6,0x495292f5 },
20664       { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
20665         0x26036837,0x0ec7530d },
20666       0 },
20667     /* 29 */
20668     { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
20669         0x64863f0b,0x0f6207a6 },
20670       { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
20671         0x08ed6dcf,0xff0db072 },
20672       0 },
20673     /* 30 */
20674     { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
20675         0x88740ea3,0x313b513c },
20676       { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
20677         0x86f19f81,0x2d3abcf9 },
20678       0 },
20679     /* 31 */
20680     { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
20681         0xded98cdf,0xc036fa10 },
20682       { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
20683         0xb6d40194,0xa6b2a2c4 },
20684       0 },
20685     /* 32 */
20686     { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
20687         0xaf7c9860,0x810ee252 },
20688       { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
20689         0x92731745,0xd485717a },
20690       0 },
20691     /* 33 */
20692     { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
20693         0x2f9a604e,0x6a6045a7 },
20694       { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
20695         0xf9e15790,0xd3e45cfa },
20696       0 },
20697     /* 34 */
20698     { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
20699         0xe3c2c19c,0x207755de },
20700       { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
20701         0x7154b00d,0x48dc5ee5 },
20702       0 },
20703     /* 35 */
20704     { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
20705         0xdff6f445,0xf2fb0aed },
20706       { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
20707         0xdb28d525,0xa13e9015 },
20708       0 },
20709     /* 36 */
20710     { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
20711         0x1497526f,0x2bf0d6b0 },
20712       { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
20713         0x162fe89f,0x42a94a5a },
20714       0 },
20715     /* 37 */
20716     { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
20717         0xc65ede3d,0x2c2dd969 },
20718       { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
20719         0x42c56dbc,0xf437fa1f },
20720       0 },
20721     /* 38 */
20722     { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
20723         0x54707aa8,0xaaf45b33 },
20724       { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
20725         0xf4f272bc,0xcdf6310d },
20726       0 },
20727     /* 39 */
20728     { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
20729         0xda9e2ff2,0xf0d008ba },
20730       { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
20731         0xca887b8b,0x5bd5c2f5 },
20732       0 },
20733     /* 40 */
20734     { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
20735         0xa09e4719,0xaa12dfc8 },
20736       { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
20737         0xe48ca901,0x6c036e73 },
20738       0 },
20739     /* 41 */
20740     { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
20741         0x96afbe24,0x292ff658 },
20742       { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
20743         0x311b7276,0x644e0c90 },
20744       0 },
20745     /* 42 */
20746     { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
20747         0xcab79a77,0xf25ae793 },
20748       { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
20749         0x13db0a3e,0x39b8e653 },
20750       0 },
20751     /* 43 */
20752     { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
20753         0x0f19db06,0x39122f2f },
20754       { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
20755         0xce80ff8d,0x8de80af8 },
20756       0 },
20757     /* 44 */
20758     { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
20759         0x2e368c04,0x87194906 },
20760       { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
20761         0x5b74fde1,0xfc315e6a },
20762       0 },
20763     /* 45 */
20764     { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
20765         0xee389088,0xe6d4a7ad },
20766       { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
20767         0x9be2ae57,0x35dfaf9a },
20768       0 },
20769     /* 46 */
20770     { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
20771         0x1c830d2b,0x1da5c7d7 },
20772       { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
20773         0xdbf4b9d6,0x7077c0fd },
20774       0 },
20775     /* 47 */
20776     { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
20777         0xe50efe44,0x53a8632e },
20778       { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
20779         0x34e1fcc1,0x028ca76d },
20780       0 },
20781     /* 48 */
20782     { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
20783         0x6962f046,0x04c17cd8 },
20784       { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
20785         0xfed97474,0xf7ba4de9 },
20786       0 },
20787     /* 49 */
20788     { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
20789         0x52131c41,0xe31f9600 },
20790       { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
20791         0xce34d47b,0xaa3a6259 },
20792       0 },
20793     /* 50 */
20794     { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
20795         0x7e79daee,0x2398dd62 },
20796       { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
20797         0x1c046210,0x5717f5b2 },
20798       0 },
20799     /* 51 */
20800     { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
20801         0x0e3c28de,0x660a2c56 },
20802       { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
20803         0x4f522453,0x624ee54c },
20804       0 },
20805     /* 52 */
20806     { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
20807         0x92bdfbc0,0x4f392afb },
20808       { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
20809         0xccdb399c,0x8a3e7977 },
20810       0 },
20811     /* 53 */
20812     { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
20813         0x70c24404,0x3888d023 },
20814       { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
20815         0x18102336,0xa5e62e47 },
20816       0 },
20817     /* 54 */
20818     { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
20819         0x466a5adc,0x2c4768e6 },
20820       { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
20821         0xf9e652a0,0x7b5e6441 },
20822       0 },
20823     /* 55 */
20824     { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
20825         0x0c8d744a,0xb8af73cb },
20826       { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
20827         0x7f3f0895,0xa036395f },
20828       0 },
20829     /* 56 */
20830     { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
20831         0x875fb533,0x4be36b01 },
20832       { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
20833         0x1bdc00c0,0x8cbc9a87 },
20834       0 },
20835     /* 57 */
20836     { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
20837         0x0c0835f8,0x44e7553e },
20838       { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
20839         0x5eb8fc18,0x470a683a },
20840       0 },
20841     /* 58 */
20842     { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
20843         0xc63dc6ef,0x16410690 },
20844       { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
20845         0x7abcbb4f,0xd73479fd },
20846       0 },
20847     /* 59 */
20848     { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
20849         0x0771666b,0x816469e3 },
20850       { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
20851         0xf0dd3f9c,0x0a36dd23 },
20852       0 },
20853     /* 60 */
20854     { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
20855         0xfdbab118,0xe331dfd6 },
20856       { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
20857         0x492e3389,0xd3b4782a },
20858       0 },
20859     /* 61 */
20860     { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
20861         0x4c86a5bd,0x7281275a },
20862       { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
20863         0xce145059,0x2c062e7e },
20864       0 },
20865     /* 62 */
20866     { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
20867         0x2c4e7ef1,0x282a35f9 },
20868       { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
20869         0x554d2abd,0xc71cd513 },
20870       0 },
20871     /* 63 */
20872     { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
20873         0xcf47f3a3,0xc50f6740 },
20874       { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
20875         0x212958dc,0xb9ecb3a7 },
20876       0 },
20877     /* 64 */
20878     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
20879         0xcd42ab1b,0x803f3e02 },
20880       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
20881         0x5067adc1,0xc097440e },
20882       0 },
20883     /* 65 */
20884     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
20885         0x915f1f30,0xf1af32d5 },
20886       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
20887         0xe2d41c8b,0x23d0f130 },
20888       0 },
20889     /* 66 */
20890     { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
20891         0xc0a3fadd,0xb0288dd6 },
20892       { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
20893         0xf408c8d2,0xffd3724f },
20894       0 },
20895     /* 67 */
20896     { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
20897         0xd78c26df,0xf5590f4a },
20898       { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
20899         0xf6f74a20,0x18d6da54 },
20900       0 },
20901     /* 68 */
20902     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
20903         0x7990216a,0x50bbb4d9 },
20904       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
20905         0x01fe49c3,0x2b100118 },
20906       0 },
20907     /* 69 */
20908     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
20909         0x83fbae0c,0xdd558999 },
20910       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
20911         0x149d6041,0xe6e4c551 },
20912       0 },
20913     /* 70 */
20914     { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
20915         0x07ed56ff,0x51e00db1 },
20916       { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
20917         0x49829177,0xe22f4241 },
20918       0 },
20919     /* 71 */
20920     { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
20921         0x52dc48c9,0xf709373d },
20922       { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
20923         0xe7275b11,0xbd52d288 },
20924       0 },
20925     /* 72 */
20926     { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
20927         0xc8aa77a6,0xa0d0f8e4 },
20928       { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
20929         0x946d6a00,0xa56c78c7 },
20930       0 },
20931     /* 73 */
20932     { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
20933         0x731a367a,0xd8befdf8 },
20934       { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
20935         0xce9f6478,0x854a68a5 },
20936       0 },
20937     /* 74 */
20938     { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
20939         0x98846a95,0x5cacea0b },
20940       { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
20941         0x35e4efa9,0xe4982d12 },
20942       0 },
20943     /* 75 */
20944     { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
20945         0x16b20499,0x8046b7f6 },
20946       { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
20947         0x9082af55,0xeb17ca7b },
20948       0 },
20949     /* 76 */
20950     { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
20951         0xfab5e131,0x097b00ba },
20952       { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
20953         0xafdbcc9e,0xf95c747b },
20954       0 },
20955     /* 77 */
20956     { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
20957         0x566ed837,0x3512601e },
20958       { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
20959         0x6068ab6b,0x0ef97123 },
20960       0 },
20961     /* 78 */
20962     { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
20963         0x3b4fbc95,0xfc16d933 },
20964       { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
20965         0xb95d7a17,0x14ca4af1 },
20966       0 },
20967     /* 79 */
20968     { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
20969         0xf59c231d,0x4057b063 },
20970       { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
20971         0xf1330b13,0x1c3b5d64 },
20972       0 },
20973     /* 80 */
20974     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
20975         0xdb7e63af,0xfad27148 },
20976       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
20977         0x9f0e1a84,0x77387de3 },
20978       0 },
20979     /* 81 */
20980     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
20981         0xbef0c47e,0xb37b85c0 },
20982       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
20983         0xf9f628d5,0x9c135ac8 },
20984       0 },
20985     /* 82 */
20986     { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
20987         0xc433851f,0x5721361f },
20988       { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
20989         0xe6bb11bd,0xdcbac3c9 },
20990       0 },
20991     /* 83 */
20992     { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
20993         0x2d626862,0xb8c1c89e },
20994       { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
20995         0x2f9422d4,0x5d23bbda },
20996       0 },
20997     /* 84 */
20998     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
20999         0x91ece900,0xc109f9cb },
21000       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
21001         0x2eee1ee1,0x9bc3344f },
21002       0 },
21003     /* 85 */
21004     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
21005         0x5f1a4cc1,0x29591d52 },
21006       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
21007         0x18ef332c,0x6376551f },
21008       0 },
21009     /* 86 */
21010     { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
21011         0x08e2987a,0xbdb79dc8 },
21012       { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
21013         0xadd3c14a,0x8ee86001 },
21014       0 },
21015     /* 87 */
21016     { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
21017         0x6f77aa4b,0x92e51d7a },
21018       { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
21019         0x0a56aaaa,0x5182f86f },
21020       0 },
21021     /* 88 */
21022     { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
21023         0x4073a6f2,0x91dcab5d },
21024       { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
21025         0x97974f2b,0x17a0cedb },
21026       0 },
21027     /* 89 */
21028     { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
21029         0x7f4cdf41,0x2e8ce36c },
21030       { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
21031         0x34f668f3,0xf4ccc6cb },
21032       0 },
21033     /* 90 */
21034     { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
21035         0x9a0df3c9,0xac0db488 },
21036       { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
21037         0x94c974a2,0x95a64a61 },
21038       0 },
21039     /* 91 */
21040     { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
21041         0x29210677,0x231e54ba },
21042       { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
21043         0xd8a731e1,0xab0be032 },
21044       0 },
21045     /* 92 */
21046     { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
21047         0x2cf6a679,0xf1bcc880 },
21048       { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
21049         0x5aebb271,0x85169469 },
21050       0 },
21051     /* 93 */
21052     { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
21053         0xdaad55d8,0x8f67d9d2 },
21054       { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
21055         0xc0728b5d,0xf84572b9 },
21056       0 },
21057     /* 94 */
21058     { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
21059         0x616b2c19,0xedee2710 },
21060       { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
21061         0x44ebd7f4,0x9fd27e9b },
21062       0 },
21063     /* 95 */
21064     { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
21065         0x958ff387,0xa40c2fb6 },
21066       { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
21067         0x7dc6decf,0x99bc9bb8 },
21068       0 },
21069     /* 96 */
21070     { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
21071         0xa16d7e64,0x9abe210b },
21072       { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
21073         0x87f344b0,0x7881c257 },
21074       0 },
21075     /* 97 */
21076     { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
21077         0xa30e8940,0x15e6e319 },
21078       { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
21079         0x191172ce,0x0e55facf },
21080       0 },
21081     /* 98 */
21082     { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
21083         0x6fe96577,0xd73d0976 },
21084       { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
21085         0x8f15a50b,0x9250a374 },
21086       0 },
21087     /* 99 */
21088     { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
21089         0xc1cc8c0b,0x77414082 },
21090       { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
21091         0x12eb20b9,0x8cb04f4d },
21092       0 },
21093     /* 100 */
21094     { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
21095         0x47123b51,0xe4e429ef },
21096       { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
21097         0x3c6e6552,0x37bca2ff },
21098       0 },
21099     /* 101 */
21100     { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
21101         0x3002b22a,0x59913edc },
21102       { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
21103         0xb013e226,0x43786e4a },
21104       0 },
21105     /* 102 */
21106     { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
21107         0xb7e79e7a,0x8638ca98 },
21108       { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
21109         0x7b3aa6f0,0x1ecdd36a },
21110       0 },
21111     /* 103 */
21112     { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
21113         0xd459f32d,0xd85d0f85 },
21114       { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
21115         0xb4ed3c62,0xa04f19c3 },
21116       0 },
21117     /* 104 */
21118     { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
21119         0x5c0950b0,0x92b2eeea },
21120       { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
21121         0x5834276c,0x1ee78221 },
21122       0 },
21123     /* 105 */
21124     { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
21125         0x57a6e150,0xf3f2ced8 },
21126       { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
21127         0x3da3e210,0x0f56a454 },
21128       0 },
21129     /* 106 */
21130     { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
21131         0x1969e263,0xbd8f1741 },
21132       { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
21133         0x30ccfa09,0x2d1a1c35 },
21134       0 },
21135     /* 107 */
21136     { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
21137         0xb91fba46,0xa107a65e },
21138       { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
21139         0xf87a9af2,0x183d760a },
21140       0 },
21141     /* 108 */
21142     { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
21143         0xc269d754,0x1d44179d },
21144       { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
21145         0x9606d262,0x771f9cc2 },
21146       0 },
21147     /* 109 */
21148     { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
21149         0x0362718e,0x64427a31 },
21150       { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
21151         0x6ae90d6d,0x49d9b749 },
21152       0 },
21153     /* 110 */
21154     { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
21155         0x3f605445,0x9037d81b },
21156       { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
21157         0x7cc0639c,0x08c3de6a },
21158       0 },
21159     /* 111 */
21160     { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
21161         0x45796b2f,0xc6909442 },
21162       { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
21163         0xcafe3ac0,0x3fa3db02 },
21164       0 },
21165     /* 112 */
21166     { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
21167         0xfdb808ff,0xc5c4bdb0 },
21168       { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
21169         0x46c2b6b5,0x2d56db94 },
21170       0 },
21171     /* 113 */
21172     { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
21173         0xe503ba42,0x0f56bd9d },
21174       { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
21175         0x1173b5f1,0x4003bb9d },
21176       0 },
21177     /* 114 */
21178     { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
21179         0xa07f2f9e,0x53765522 },
21180       { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
21181         0x6c5d4549,0x7a056f58 },
21182       0 },
21183     /* 115 */
21184     { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
21185         0x7a1a2675,0x77d482f1 },
21186       { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
21187         0x2b38b0e4,0x4115012b },
21188       0 },
21189     /* 116 */
21190     { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
21191         0xfbea0946,0xcdf04572 },
21192       { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
21193         0x97383109,0xee703dda },
21194       0 },
21195     /* 117 */
21196     { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
21197         0xa162ce21,0x2a0ad89d },
21198       { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
21199         0xac2b4659,0xd62d0b67 },
21200       0 },
21201     /* 118 */
21202     { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
21203         0x991c2426,0xb39a23f2 },
21204       { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
21205         0xc0674cc5,0x04ed0092 },
21206       0 },
21207     /* 119 */
21208     { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
21209         0x0177c387,0xa0a91fc1 },
21210       { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
21211         0x9ed20c41,0x084cf988 },
21212       0 },
21213     /* 120 */
21214     { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
21215         0x73abf77e,0xd57955b2 },
21216       { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
21217         0x02d141f1,0x8e14ea42 },
21218       0 },
21219     /* 121 */
21220     { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
21221         0x2aa4d158,0x597e1a37 },
21222       { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
21223         0x199b4dea,0xca3f0236 },
21224       0 },
21225     /* 122 */
21226     { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
21227         0x309c07e4,0xbde7fd7e },
21228       { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
21229         0x0a7dd198,0xb623ad0e },
21230       0 },
21231     /* 123 */
21232     { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
21233         0x58ec137b,0xd6aa2e46 },
21234       { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
21235         0x2dcc513a,0x111662e0 },
21236       0 },
21237     /* 124 */
21238     { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
21239         0x94b750f8,0xdb3ee1cb },
21240       { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
21241         0x52206a59,0x886a6442 },
21242       0 },
21243     /* 125 */
21244     { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
21245         0x018a17bc,0xa70cf4eb },
21246       { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
21247         0xd1747b77,0xaa4772ab },
21248       0 },
21249     /* 126 */
21250     { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
21251         0x30faf974,0x611a6ddc },
21252       { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
21253         0x16429c88,0x5cfffaf8 },
21254       0 },
21255     /* 127 */
21256     { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
21257         0x7dc1994c,0x6e5a6b23 },
21258       { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
21259         0x242dabcc,0x481a238d },
21260       0 },
21261     /* 128 */
21262     { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
21263         0xe0cdf943,0x2c41114c },
21264       { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
21265         0x42ff9297,0x20477abf },
21266       0 },
21267     /* 129 */
21268     { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
21269         0xc77396b6,0xac66409a },
21270       { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
21271         0xcc122f85,0xce8e6975 },
21272       0 },
21273     /* 130 */
21274     { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
21275         0x250bb4a8,0x08fde365 },
21276       { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
21277         0x565d6cd7,0x2f7e2fd2 },
21278       0 },
21279     /* 131 */
21280     { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
21281         0x907702ae,0xc65be92e },
21282       { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
21283         0xd1193b3a,0x4bff8e47 },
21284       0 },
21285     /* 132 */
21286     { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
21287         0x5772967d,0x3e4e4ae6 },
21288       { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
21289         0x58ec6028,0x5388aefd },
21290       0 },
21291     /* 133 */
21292     { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
21293         0x4f75be0e,0x5cf908d1 },
21294       { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
21295         0x60f00ce2,0xa698ba40 },
21296       0 },
21297     /* 134 */
21298     { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
21299         0x7aebad8d,0xb142ef8a },
21300       { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
21301         0x58515075,0xd1896a96 },
21302       0 },
21303     /* 135 */
21304     { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
21305         0x7981da39,0x267b0e0b },
21306       { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
21307         0xa1119393,0xb54e287a },
21308       0 },
21309     /* 136 */
21310     { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
21311         0x5f87d4e6,0x84abb28b },
21312       { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
21313         0x17655640,0xe5436f67 },
21314       0 },
21315     /* 137 */
21316     { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
21317         0x5b9ce99e,0x0404f68b },
21318       { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
21319         0x0ac1c701,0x3a4263df },
21320       0 },
21321     /* 138 */
21322     { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
21323         0x905ea367,0x0ca8fd3f },
21324       { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
21325         0x4ddb0c33,0x96dca264 },
21326       0 },
21327     /* 139 */
21328     { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
21329         0x3aad59dc,0x4363e212 },
21330       { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
21331         0xd8bb98c4,0x840e115c },
21332       0 },
21333     /* 140 */
21334     { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
21335         0x30ded6d4,0x5e0d6abd },
21336       { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
21337         0x2945a25a,0x7dea48f4 },
21338       0 },
21339     /* 141 */
21340     { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
21341         0xebfd16d1,0xabc2a2be },
21342       { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
21343         0x6c7eefc1,0x4ea35394 },
21344       0 },
21345     /* 142 */
21346     { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
21347         0x1c94ffc3,0x3a76e689 },
21348       { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
21349         0x465e6464,0x8212a10a },
21350       0 },
21351     /* 143 */
21352     { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
21353         0x599cb164,0xaa7cab71 },
21354       { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
21355         0xfe0617c3,0x40e38073 },
21356       0 },
21357     /* 144 */
21358     { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
21359         0xb3055526,0xe3604700 },
21360       { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
21361         0xa3dee15f,0x6542d677 },
21362       0 },
21363     /* 145 */
21364     { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
21365         0x09bb6f21,0xa6534aee },
21366       { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
21367         0xdc9aef22,0xf3cb672f },
21368       0 },
21369     /* 146 */
21370     { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
21371         0xaae870e7,0x7cafaa2e },
21372       { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
21373         0xb9bd522e,0x0aab13c1 },
21374       0 },
21375     /* 147 */
21376     { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
21377         0x847012e9,0x4b91a602 },
21378       { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
21379         0x72321cab,0x49534c53 },
21380       0 },
21381     /* 148 */
21382     { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
21383         0xd65ac5ee,0xcaf46c4f },
21384       { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
21385         0x04c6770f,0x14ce9e57 },
21386       0 },
21387     /* 149 */
21388     { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
21389         0x3e4c9a71,0x1bb708a5 },
21390       { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
21391         0xda300102,0xf9d126f2 },
21392       0 },
21393     /* 150 */
21394     { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
21395         0x729ecc69,0x807afcb9 },
21396       { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
21397         0x6568cd8c,0x751adcd1 },
21398       0 },
21399     /* 151 */
21400     { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
21401         0x2537743f,0x29ec4468 },
21402       { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
21403         0x92a4077d,0xff9370e3 },
21404       0 },
21405     /* 152 */
21406     { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
21407         0xa2a9d01a,0x9776478b },
21408       { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
21409         0xac2f82fa,0x74a6313f },
21410       0 },
21411     /* 153 */
21412     { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
21413         0x0ff4863d,0xab75be15 },
21414       { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
21415         0x0b4459f6,0x4ebeac2e },
21416       0 },
21417     /* 154 */
21418     { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
21419         0x2c1baffc,0xdf99887b },
21420       { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
21421         0x779f4058,0x27b040a7 },
21422       0 },
21423     /* 155 */
21424     { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
21425         0xe4cfa3f5,0xb393dd37 },
21426       { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
21427         0xd0463419,0x09588c12 },
21428       0 },
21429     /* 156 */
21430     { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
21431         0xdb9f648b,0x81c879a9 },
21432       { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
21433         0x5fc11bc4,0xfa0d48f5 },
21434       0 },
21435     /* 157 */
21436     { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
21437         0xb6a367d6,0x8ea0e156 },
21438       { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
21439         0xfa00b5ac,0x3f5ab924 },
21440       0 },
21441     /* 158 */
21442     { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
21443         0x2b74256e,0x8bc76887 },
21444       { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
21445         0x60fcf34f,0xb386f190 },
21446       0 },
21447     /* 159 */
21448     { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
21449         0x1b069c4d,0x4cb460f7 },
21450       { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
21451         0x95ef5223,0x52c0d508 },
21452       0 },
21453     /* 160 */
21454     { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
21455         0x2bb09c0b,0x4ac3c938 },
21456       { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
21457         0xe39705f4,0x380d94c7 },
21458       0 },
21459     /* 161 */
21460     { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
21461         0xde2637af,0x2ce3e171 },
21462       { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
21463         0x0b624e4d,0x2e6cd852 },
21464       0 },
21465     /* 162 */
21466     { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
21467         0x42c69d54,0xca177547 },
21468       { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
21469         0x9cab2ce6,0xa976a713 },
21470       0 },
21471     /* 163 */
21472     { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
21473         0x0a1f4999,0x8720a717 },
21474       { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
21475         0xc769893c,0x9719ef29 },
21476       0 },
21477     /* 164 */
21478     { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
21479         0xe15704c1,0xa5072976 },
21480       { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
21481         0xf7b77725,0x99389c9d },
21482       0 },
21483     /* 165 */
21484     { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
21485         0x202c82e4,0xa88806aa },
21486       { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
21487         0x4738dcfe,0x0043bffb },
21488       0 },
21489     /* 166 */
21490     { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
21491         0xba6c4866,0x52f3ef01 },
21492       { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
21493         0x9ef27e75,0x3296bd89 },
21494       0 },
21495     /* 167 */
21496     { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
21497         0xaee571e9,0x3b90febf },
21498       { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
21499         0x9f810b18,0x6e88069d },
21500       0 },
21501     /* 168 */
21502     { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
21503         0xdefaad13,0xa7222bea },
21504       { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
21505         0xbc2ac690,0xbe94d523 },
21506       0 },
21507     /* 169 */
21508     { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
21509         0x9be8c766,0x7782defe },
21510       { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
21511         0xa2892e4b,0x03838567 },
21512       0 },
21513     /* 170 */
21514     { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
21515         0xadf7b420,0xdbd986c4 },
21516       { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
21517         0x6860bbd0,0x8e24d3c4 },
21518       0 },
21519     /* 171 */
21520     { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
21521         0x407bafc8,0x541a99c4 },
21522       { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
21523         0xf57d35d1,0xc0092c49 },
21524       0 },
21525     /* 172 */
21526     { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
21527         0x7286944d,0x75e40634 },
21528       { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
21529         0xc7848586,0x5b7cb658 },
21530       0 },
21531     /* 173 */
21532     { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
21533         0x8df097a1,0x7ae13eba },
21534       { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
21535         0xe2a8e3fd,0x787d8074 },
21536       0 },
21537     /* 174 */
21538     { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
21539         0x9ef28484,0x5c222819 },
21540       { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
21541         0xbaf0f2b0,0xe45d37ab },
21542       0 },
21543     /* 175 */
21544     { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
21545         0x84dfb9d3,0xed7bc122 },
21546       { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
21547         0x45ca6d27,0xaac97cc9 },
21548       0 },
21549     /* 176 */
21550     { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
21551         0x1163dc4e,0x318f97b3 },
21552       { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
21553         0x9a84ff4d,0xfa41faa1 },
21554       0 },
21555     /* 177 */
21556     { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
21557         0x1d26e9e2,0x38bb6b2c },
21558       { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
21559         0xce7601a5,0x94dd0905 },
21560       0 },
21561     /* 178 */
21562     { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
21563         0xd25c2ae9,0x92077867 },
21564       { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
21565         0xd29beb51,0x81e8428b },
21566       0 },
21567     /* 179 */
21568     { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
21569         0xdbbfa4b1,0x1b94ab62 },
21570       { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
21571         0x055590ee,0x06a38e28 },
21572       0 },
21573     /* 180 */
21574     { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
21575         0x83d9d4f8,0xa7b36c20 },
21576       { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
21577         0xa2822a20,0xbe54c6b4 },
21578       0 },
21579     /* 181 */
21580     { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
21581         0xeae022bb,0xbf30a5ab },
21582       { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
21583         0x2732d13a,0xd1c820de },
21584       0 },
21585     /* 182 */
21586     { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
21587         0x68a18da3,0xb7d17bed },
21588       { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
21589         0x6412cc64,0x3997fd5e },
21590       0 },
21591     /* 183 */
21592     { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
21593         0x3c6c13e8,0x0eeb8929 },
21594       { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
21595         0xc922b6ef,0x228916f8 },
21596       0 },
21597     /* 184 */
21598     { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
21599         0x6e93097e,0xec05ad1d },
21600       { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
21601         0x7ff11b37,0x7d314156 },
21602       0 },
21603     /* 185 */
21604     { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
21605         0x9bc1d7a3,0xe9ce66fc },
21606       { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
21607         0x72280651,0xd9650b01 },
21608       0 },
21609     /* 186 */
21610     { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
21611         0x804eb7a2,0x14d6699a },
21612       { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
21613         0x0d43598a,0x6f4c6841 },
21614       0 },
21615     /* 187 */
21616     { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
21617         0x61189abb,0x4c4350fd },
21618       { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
21619         0x5a3118b5,0xa726d242 },
21620       0 },
21621     /* 188 */
21622     { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
21623         0xcc6cf392,0x13639e82 },
21624       { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
21625         0xc1a335a3,0xca9365e1 },
21626       0 },
21627     /* 189 */
21628     { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
21629         0x970b72a5,0x9ce29c34 },
21630       { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
21631         0xab42af98,0x48c4abd7 },
21632       0 },
21633     /* 190 */
21634     { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
21635         0xf67b33cb,0x78017c32 },
21636       { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
21637         0xde5c1c04,0x53cd0454 },
21638       0 },
21639     /* 191 */
21640     { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
21641         0xd3d7fa8f,0xeea465c1 },
21642       { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
21643         0x7ae69193,0x1b6e42a4 },
21644       0 },
21645     /* 192 */
21646     { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
21647         0x187fbd3d,0x0224da14 },
21648       { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
21649         0x42bfff33,0x60838ef0 },
21650       0 },
21651     /* 193 */
21652     { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
21653         0x2d331643,0x636eb202 },
21654       { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
21655         0x39218bac,0x8844eeb6 },
21656       0 },
21657     /* 194 */
21658     { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
21659         0x51fb789e,0x27ba83dc },
21660       { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
21661         0x87f3a4ab,0xadb62d34 },
21662       0 },
21663     /* 195 */
21664     { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
21665         0x75e7c8b2,0xb990fd76 },
21666       { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
21667         0x4d10d18d,0x81707ef9 },
21668       0 },
21669     /* 196 */
21670     { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
21671         0xd5a8aa5c,0x3792daea },
21672       { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
21673         0x94b001ba,0x5abd635e },
21674       0 },
21675     /* 197 */
21676     { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
21677         0x846ab610,0x5995bf21 },
21678       { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
21679         0xd483411e,0x44c32ca2 },
21680       0 },
21681     /* 198 */
21682     { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
21683         0x8082a54c,0x1f2162fb },
21684       { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
21685         0xc3e907c9,0x8f1d402b },
21686       0 },
21687     /* 199 */
21688     { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
21689         0x926edbf9,0xb1980f43 },
21690       { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
21691         0x37448e45,0x2828ad9b },
21692       0 },
21693     /* 200 */
21694     { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
21695         0x5a14b390,0x4973f127 },
21696       { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
21697         0xdb168ac7,0x6dac8ed0 },
21698       0 },
21699     /* 201 */
21700     { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
21701         0x20b9de4c,0x4b23ef59 },
21702       { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
21703         0xddf49a4e,0x4dd71534 },
21704       0 },
21705     /* 202 */
21706     { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
21707         0x2f4a4dbb,0xfd317000 },
21708       { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
21709         0x9569f365,0x14fac58c },
21710       0 },
21711     /* 203 */
21712     { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
21713         0x36abda50,0xed7c7651 },
21714       { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
21715         0x4d2e9f53,0xfefcb7f7 },
21716       0 },
21717     /* 204 */
21718     { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
21719         0x87e0d80b,0x1801a57e },
21720       { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
21721         0x1ead1064,0x9f8fc11e },
21722       0 },
21723     /* 205 */
21724     { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
21725         0x3d3a69a9,0xa9d3809d },
21726       { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
21727         0xe1178ef7,0x3006b9ae },
21728       0 },
21729     /* 206 */
21730     { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
21731         0x45f8f761,0x0ab85fd7 },
21732       { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
21733         0x11e942c2,0xb122d675 },
21734       0 },
21735     /* 207 */
21736     { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
21737         0x097dbaec,0x9f599dc1 },
21738       { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
21739         0x8a294b78,0x7d5528e0 },
21740       0 },
21741     /* 208 */
21742     { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
21743         0x303f1730,0x28ccea01 },
21744       { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
21745         0xa1d013bf,0xc18baf48 },
21746       0 },
21747     /* 209 */
21748     { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
21749         0xb7a9596b,0x9def809d },
21750       { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
21751         0x68808ce5,0x0357f8b0 },
21752       0 },
21753     /* 210 */
21754     { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
21755         0x1b489887,0xe4a01add },
21756       { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
21757         0xce10cc30,0x466d7d79 },
21758       0 },
21759     /* 211 */
21760     { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
21761         0x451ead1a,0xc672a522 },
21762       { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
21763         0xf2a67513,0x5e3d64fa },
21764       0 },
21765     /* 212 */
21766     { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
21767         0xeb8e42fc,0x6c8a7a95 },
21768       { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
21769         0xad82ca91,0x348ae422 },
21770       0 },
21771     /* 213 */
21772     { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
21773         0xd9ef2d2e,0xc1074de0 },
21774       { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
21775         0xc9e54ffc,0xfbadfbdb },
21776       0 },
21777     /* 214 */
21778     { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
21779         0x83716fcd,0xb7f976b4 },
21780       { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
21781         0xcafcc805,0xf4d41b2e },
21782       0 },
21783     /* 215 */
21784     { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
21785         0xe0160f10,0x180824ea },
21786       { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
21787         0x83cf6d25,0x67e5f639 },
21788       0 },
21789     /* 216 */
21790     { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
21791         0x04c11fc6,0x9fef789a },
21792       { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
21793         0xa99c4e20,0xbc80c181 },
21794       0 },
21795     /* 217 */
21796     { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
21797         0x9f8cdf10,0x49270e62 },
21798       { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
21799         0x61372f7f,0xd2ee52f9 },
21800       0 },
21801     /* 218 */
21802     { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
21803         0xe5abb733,0xdfb478be },
21804       { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
21805         0x08df473a,0xd9a140b4 },
21806       0 },
21807     /* 219 */
21808     { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
21809         0x623f4b1a,0x760c058d },
21810       { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
21811         0x8f190409,0x7141982d },
21812       0 },
21813     /* 220 */
21814     { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
21815         0x89d54e47,0x3af9d1ce },
21816       { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
21817         0x73957dd6,0xb1f815c3 },
21818       0 },
21819     /* 221 */
21820     { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
21821         0x1543f052,0xa41aed14 },
21822       { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
21823         0x86fb60ef,0xd6e9c1dd },
21824       0 },
21825     /* 222 */
21826     { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
21827         0xae9bf8c2,0x9c9c6e10 },
21828       { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
21829         0x40fa61b6,0x566bd596 },
21830       0 },
21831     /* 223 */
21832     { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
21833         0xf525345e,0xcf2c7390 },
21834       { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
21835         0x8aa20979,0x02f51755 },
21836       0 },
21837     /* 224 */
21838     { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
21839         0xe8d4d97d,0x14e9ada5 },
21840       { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
21841         0x8e9d9ae8,0xa0ad4fab },
21842       0 },
21843     /* 225 */
21844     { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
21845         0x6e56ed1e,0xbcd530b8 },
21846       { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
21847         0x6979341d,0x909283cf },
21848       0 },
21849     /* 226 */
21850     { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
21851         0xace1549a,0x35eeb7c9 },
21852       { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
21853         0x448ae864,0x9a8b2cf4 },
21854       0 },
21855     /* 227 */
21856     { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
21857         0xd4491379,0x6bdb60f4 },
21858       { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
21859         0x94ba08a9,0x01ec3cfd },
21860       0 },
21861     /* 228 */
21862     { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
21863         0x475464f6,0xd1acb1c0 },
21864       { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
21865         0x405626c2,0x7dcd079d },
21866       0 },
21867     /* 229 */
21868     { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
21869         0x377d19b8,0x0bf53589 },
21870       { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
21871         0xe16686fc,0xd28be4d9 },
21872       0 },
21873     /* 230 */
21874     { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
21875         0x510f88ce,0xd76007aa },
21876       { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
21877         0xb303bb01,0xf2b52f68 },
21878       0 },
21879     /* 231 */
21880     { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
21881         0xcc5aed3a,0xd8dbe98e },
21882       { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
21883         0xee559705,0xe01593a3 },
21884       0 },
21885     /* 232 */
21886     { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
21887         0xaeb8ef06,0xafec07b1 },
21888       { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
21889         0x6e2dbfdd,0xa71b9354 },
21890       0 },
21891     /* 233 */
21892     { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
21893         0x628523d9,0x53a2005c },
21894       { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
21895         0x3d588e3d,0xbf47d19b },
21896       0 },
21897     /* 234 */
21898     { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
21899         0x39c9a1b6,0x001c2c7f },
21900       { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
21901         0x86ffb99b,0xfdadf8e7 },
21902       0 },
21903     /* 235 */
21904     { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
21905         0x5aa43c94,0x3a838e4d },
21906       { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
21907         0x873e1da3,0x3cdb8257 },
21908       0 },
21909     /* 236 */
21910     { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
21911         0xf1f57fba,0x5a60cc89 },
21912       { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
21913         0xdbfd8fc0,0x922ff56f },
21914       0 },
21915     /* 237 */
21916     { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
21917         0xf6c5cd62,0x72919a7d },
21918       { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
21919         0x3624089a,0x5e791780 },
21920       0 },
21921     /* 238 */
21922     { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
21923         0xe24c2fab,0x4e0a5371 },
21924       { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
21925         0xd56604ee,0xf5ff7818 },
21926       0 },
21927     /* 239 */
21928     { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
21929         0x533f5e64,0xe41df0e9 },
21930       { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
21931         0xac4f155f,0x8edd7d6e },
21932       0 },
21933     /* 240 */
21934     { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
21935         0xed8aee96,0x1432c1ca },
21936       { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
21937         0x5ac8d2c6,0xcaef480b },
21938       0 },
21939     /* 241 */
21940     { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
21941         0x8efae236,0xd0ba177e },
21942       { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
21943         0x1c54ae16,0xf31c957c },
21944       0 },
21945     /* 242 */
21946     { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
21947         0x96e17c3a,0x013404cb },
21948       { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
21949         0x91933e6c,0x6f377c4b },
21950       0 },
21951     /* 243 */
21952     { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
21953         0xd2d09506,0x6dba3e4e },
21954       { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
21955         0x3becf4a7,0xf13cf342 },
21956       0 },
21957     /* 244 */
21958     { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
21959         0x274bbad3,0xc83fa9a9 },
21960       { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
21961         0x5d702683,0xb49d70f4 },
21962       0 },
21963     /* 245 */
21964     { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
21965         0x0c30f1cf,0x59cfadbb },
21966       { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
21967         0x354a4b67,0x5babf362 },
21968       0 },
21969     /* 246 */
21970     { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
21971         0x9026c8f0,0x6188c6a7 },
21972       { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
21973         0xdf50b9d9,0x993fe475 },
21974       0 },
21975     /* 247 */
21976     { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
21977         0x4c80616b,0x81f76466 },
21978       { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
21979         0x5fe9060d,0x564a812a },
21980       0 },
21981     /* 248 */
21982     { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
21983         0x00e51d6c,0x226bf3cf },
21984       { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
21985         0xff257836,0x68779f47 },
21986       0 },
21987     /* 249 */
21988     { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
21989         0xeb092e0b,0x97bcb0d1 },
21990       { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
21991         0x0a784655,0xa872ffe8 },
21992       0 },
21993     /* 250 */
21994     { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
21995         0xb732a36a,0x02812bfc },
21996       { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
21997         0xfe5396af,0x07391cc9 },
21998       0 },
21999     /* 251 */
22000     { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
22001         0x7e6d2a08,0x355d2adc },
22002       { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
22003         0x7c2a3a79,0x3dc2b1e3 },
22004       0 },
22005     /* 252 */
22006     { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
22007         0x3ccd846b,0xc4786910 },
22008       { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
22009         0xd5bb4d32,0xccc42968 },
22010       0 },
22011     /* 253 */
22012     { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
22013         0xaa4871cf,0xe147eb42 },
22014       { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
22015         0x080e96e3,0x239ac047 },
22016       0 },
22017     /* 254 */
22018     { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
22019         0xf5f7e59d,0xc55fa1a3 },
22020       { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
22021         0xd4f4b699,0x094cd99c },
22022       0 },
22023     /* 255 */
22024     { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
22025         0x42abad33,0xb90a30b6 },
22026       { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
22027         0x1b7924f7,0x019f8b9a },
22028       0 },
22029 };
22030 
22031 /* Multiply the base point of P256 by the scalar and return the result.
22032  * If map is true then convert result to affine co-ordinates.
22033  *
22034  * r     Resulting point.
22035  * k     Scalar to multiply by.
22036  * map   Indicates whether to convert result to affine.
22037  * heap  Heap to use for allocation.
22038  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
22039  */
22040 static int sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k,
22041         int map, void* heap)
22042 {
22043     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
22044                                       k, map, heap);
22045 }
22046 
22047 #endif
22048 
22049 /* Multiply the base point of P256 by the scalar and return the result.
22050  * If map is true then convert result to affine co-ordinates.
22051  *
22052  * km    Scalar to multiply by.
22053  * r     Resulting point.
22054  * map   Indicates whether to convert result to affine.
22055  * heap  Heap to use for allocation.
22056  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
22057  */
22058 int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
22059 {
22060 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
22061     sp_point p;
22062     sp_digit kd[8];
22063 #endif
22064     sp_point* point;
22065     sp_digit* k = NULL;
22066     int err = MP_OKAY;
22067 #ifdef HAVE_INTEL_AVX2
22068     word32 cpuid_flags = cpuid_get_flags();
22069 #endif
22070 
22071     err = sp_ecc_point_new(heap, p, point);
22072 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
22073     if (err == MP_OKAY) {
22074         k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
22075         if (k == NULL)
22076             err = MEMORY_E;
22077     }
22078 #else
22079     k = kd;
22080 #endif
22081     if (err == MP_OKAY) {
22082         sp_256_from_mp(k, 8, km);
22083 
22084 #ifdef HAVE_INTEL_AVX2
22085         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
22086             err = sp_256_ecc_mulmod_base_avx2_8(point, k, map, heap);
22087         else
22088 #endif
22089             err = sp_256_ecc_mulmod_base_8(point, k, map, heap);
22090     }
22091     if (err == MP_OKAY)
22092         err = sp_256_point_to_ecc_point_8(point, r);
22093 
22094 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
22095     if (k != NULL)
22096         XFREE(k, heap, DYNAMIC_TYPE_ECC);
22097 #endif
22098     sp_ecc_point_free(point, 0, heap);
22099 
22100     return err;
22101 }
22102 
22103 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
22104 /* Returns 1 if the number of zero.
22105  * Implementation is constant time.
22106  *
22107  * a  Number to check.
22108  * returns 1 if the number is zero and 0 otherwise.
22109  */
22110 static int sp_256_iszero_8(const sp_digit* a)
22111 {
22112     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
22113 }
22114 
22115 #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
22116 /* Add 1 to a. (a = a + 1)
22117  *
22118  * r  A single precision integer.
22119  * a  A single precision integer.
22120  */
22121 static void sp_256_add_one_8(sp_digit* a)
22122 {
22123     __asm__ __volatile__ (
22124         "ldr    r1, [%[a], #0]\n\t"
22125         "ldr    r2, [%[a], #4]\n\t"
22126         "ldr    r3, [%[a], #8]\n\t"
22127         "ldr    r4, [%[a], #12]\n\t"
22128         "adds   r1, r1, #1\n\t"
22129         "adcs   r2, r2, #0\n\t"
22130         "adcs   r3, r3, #0\n\t"
22131         "adcs   r4, r4, #0\n\t"
22132         "str    r1, [%[a], #0]\n\t"
22133         "str    r2, [%[a], #4]\n\t"
22134         "str    r3, [%[a], #8]\n\t"
22135         "str    r4, [%[a], #12]\n\t"
22136         "ldr    r1, [%[a], #16]\n\t"
22137         "ldr    r2, [%[a], #20]\n\t"
22138         "ldr    r3, [%[a], #24]\n\t"
22139         "ldr    r4, [%[a], #28]\n\t"
22140         "adcs   r1, r1, #0\n\t"
22141         "adcs   r2, r2, #0\n\t"
22142         "adcs   r3, r3, #0\n\t"
22143         "adcs   r4, r4, #0\n\t"
22144         "str    r1, [%[a], #16]\n\t"
22145         "str    r2, [%[a], #20]\n\t"
22146         "str    r3, [%[a], #24]\n\t"
22147         "str    r4, [%[a], #28]\n\t"
22148         :
22149         : [a] "r" (a)
22150         : "memory", "r1", "r2", "r3", "r4"
22151     );
22152 }
22153 
22154 /* Read big endian unsigned byte aray into r.
22155  *
22156  * r  A single precision integer.
22157  * a  Byte array.
22158  * n  Number of bytes in array to read.
22159  */
22160 static void sp_256_from_bin(sp_digit* r, int max, const byte* a, int n)
22161 {
22162     int i, j = 0, s = 0;
22163 
22164     r[0] = 0;
22165     for (i = n-1; i >= 0; i--) {
22166         r[j] |= ((sp_digit)a[i]) << s;
22167         if (s >= 24) {
22168             r[j] &= 0xffffffff;
22169             s = 32 - s;
22170             if (j + 1 >= max)
22171                 break;
22172             r[++j] = a[i] >> s;
22173             s = 8 - s;
22174         }
22175         else
22176             s += 8;
22177     }
22178 
22179     for (j++; j < max; j++)
22180         r[j] = 0;
22181 }
22182 
22183 /* Generates a scalar that is in the range 1..order-1.
22184  *
22185  * rng  Random number generator.
22186  * k    Scalar value.
22187  * returns RNG failures, MEMORY_E when memory allocation fails and
22188  * MP_OKAY on success.
22189  */
22190 static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
22191 {
22192     int err;
22193     byte buf[32];
22194 
22195     do {
22196         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
22197         if (err == 0) {
22198             sp_256_from_bin(k, 8, buf, sizeof(buf));
22199             if (sp_256_cmp_8(k, p256_order2) < 0) {
22200                 sp_256_add_one_8(k);
22201                 break;
22202             }
22203         }
22204     }
22205     while (err == 0);
22206 
22207     return err;
22208 }
22209 
22210 /* Makes a random EC key pair.
22211  *
22212  * rng   Random number generator.
22213  * priv  Generated private value.
22214  * pub   Generated public point.
22215  * heap  Heap to use for allocation.
22216  * returns ECC_INF_E when the point does not have the correct order, RNG
22217  * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
22218  */
22219 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
22220 {
22221 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
22222     sp_point p;
22223     sp_digit kd[8];
22224 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22225     sp_point inf;
22226 #endif
22227 #endif
22228     sp_point* point;
22229     sp_digit* k = NULL;
22230 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22231     sp_point* infinity;
22232 #endif
22233     int err;
22234 #ifdef HAVE_INTEL_AVX2
22235     word32 cpuid_flags = cpuid_get_flags();
22236 #endif
22237 
22238     (void)heap;
22239 
22240     err = sp_ecc_point_new(heap, p, point);
22241 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22242     if (err == MP_OKAY)
22243         err = sp_ecc_point_new(heap, inf, infinity);
22244 #endif
22245 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
22246     if (err == MP_OKAY) {
22247         k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
22248         if (k == NULL)
22249             err = MEMORY_E;
22250     }
22251 #else
22252     k = kd;
22253 #endif
22254 
22255     if (err == MP_OKAY)
22256         err = sp_256_ecc_gen_k_8(rng, k);
22257     if (err == MP_OKAY) {
22258 #ifdef HAVE_INTEL_AVX2
22259         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
22260             err = sp_256_ecc_mulmod_base_avx2_8(point, k, 1, NULL);
22261         else
22262 #endif
22263             err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
22264     }
22265 
22266 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22267     if (err == MP_OKAY) {
22268 #ifdef HAVE_INTEL_AVX2
22269         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
22270             err = sp_256_ecc_mulmod_avx2_8(infinity, point, p256_order, 1,
22271                                                                           NULL);
22272         }
22273         else
22274 #endif
22275             err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, NULL);
22276     }
22277     if (err == MP_OKAY) {
22278         if (!sp_256_iszero_8(point->x) || !sp_256_iszero_8(point->y))
22279             err = ECC_INF_E;
22280     }
22281 #endif
22282 
22283     if (err == MP_OKAY)
22284         err = sp_256_to_mp(k, priv);
22285     if (err == MP_OKAY)
22286         err = sp_256_point_to_ecc_point_8(point, pub);
22287 
22288 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
22289     if (k != NULL)
22290         XFREE(k, heap, DYNAMIC_TYPE_ECC);
22291 #endif
22292 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22293     sp_ecc_point_free(infinity, 1, heap);
22294 #endif
22295     sp_ecc_point_free(point, 1, heap);
22296 
22297     return err;
22298 }
22299 
22300 #ifdef HAVE_ECC_DHE
22301 /* Write r as big endian to byte aray.
22302  * Fixed length number of bytes written: 32
22303  *
22304  * r  A single precision integer.
22305  * a  Byte array.
22306  */
22307 static void sp_256_to_bin(sp_digit* r, byte* a)
22308 {
22309     int i, j, s = 0, b;
22310 
22311     j = 256 / 8 - 1;
22312     a[j] = 0;
22313     for (i=0; i<8 && j>=0; i++) {
22314         b = 0;
22315         a[j--] |= r[i] << s; b += 8 - s;
22316         if (j < 0)
22317             break;
22318         while (b < 32) {
22319             a[j--] = r[i] >> b; b += 8;
22320             if (j < 0)
22321                 break;
22322         }
22323         s = 8 - (b - 32);
22324         if (j >= 0)
22325             a[j] = 0;
22326         if (s != 0)
22327             j++;
22328     }
22329 }
22330 
22331 /* Multiply the point by the scalar and serialize the X ordinate.
22332  * The number is 0 padded to maximum size on output.
22333  *
22334  * priv    Scalar to multiply the point by.
22335  * pub     Point to multiply.
22336  * out     Buffer to hold X ordinate.
22337  * outLen  On entry, size of the buffer in bytes.
22338  *         On exit, length of data in buffer in bytes.
22339  * heap    Heap to use for allocation.
22340  * returns BUFFER_E if the buffer is to small for output size,
22341  * MEMORY_E when memory allocation fails and MP_OKAY on success.
22342  */
22343 int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out,
22344                           word32* outLen, void* heap)
22345 {
22346 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
22347     sp_point p;
22348     sp_digit kd[8];
22349 #endif
22350     sp_point* point = NULL;
22351     sp_digit* k = NULL;
22352     int err = MP_OKAY;
22353 #ifdef HAVE_INTEL_AVX2
22354     word32 cpuid_flags = cpuid_get_flags();
22355 #endif
22356 
22357     if (*outLen < 32)
22358         err = BUFFER_E;
22359 
22360     if (err == MP_OKAY)
22361         err = sp_ecc_point_new(heap, p, point);
22362 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
22363     if (err == MP_OKAY) {
22364         k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
22365         if (k == NULL)
22366             err = MEMORY_E;
22367     }
22368 #else
22369     k = kd;
22370 #endif
22371 
22372     if (err == MP_OKAY) {
22373         sp_256_from_mp(k, 8, priv);
22374         sp_256_point_from_ecc_point_8(point, pub);
22375 #ifdef HAVE_INTEL_AVX2
22376         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
22377             err = sp_256_ecc_mulmod_avx2_8(point, point, k, 1, heap);
22378         else
22379 #endif
22380             err = sp_256_ecc_mulmod_8(point, point, k, 1, heap);
22381     }
22382     if (err == MP_OKAY) {
22383         sp_256_to_bin(point->x, out);
22384         *outLen = 32;
22385     }
22386 
22387 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
22388     if (k != NULL)
22389         XFREE(k, heap, DYNAMIC_TYPE_ECC);
22390 #endif
22391     sp_ecc_point_free(point, 0, heap);
22392 
22393     return err;
22394 }
22395 #endif /* HAVE_ECC_DHE */
22396 
22397 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
22398 #ifdef WOLFSSL_SP_SMALL
22399 /* Multiply a and b into r. (r = a * b)
22400  *
22401  * r  A single precision integer.
22402  * a  A single precision integer.
22403  * b  A single precision integer.
22404  */
22405 static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
22406 {
22407     sp_digit tmp[16];
22408 
22409     __asm__ __volatile__ (
22410         "mov    r5, #0\n\t"
22411         "mov    r6, #0\n\t"
22412         "mov    r7, #0\n\t"
22413         "mov    r8, #0\n\t"
22414         "\n1:\n\t"
22415         "subs   r3, r5, #28\n\t"
22416         "movcc  r3, #0\n\t"
22417         "sub    r4, r5, r3\n\t"
22418         "\n2:\n\t"
22419         "ldr    r14, [%[a], r3]\n\t"
22420         "ldr    r12, [%[b], r4]\n\t"
22421         "umull  r9, r10, r14, r12\n\t"
22422         "adds   r6, r6, r9\n\t"
22423         "adcs   r7, r7, r10\n\t"
22424         "adc    r8, r8, #0\n\t"
22425         "add    r3, r3, #4\n\t"
22426         "sub    r4, r4, #4\n\t"
22427         "cmp    r3, #32\n\t"
22428         "beq    3f\n\t"
22429         "cmp    r3, r5\n\t"
22430         "ble    2b\n\t"
22431         "\n3:\n\t"
22432         "str    r6, [%[r], r5]\n\t"
22433         "mov    r6, r7\n\t"
22434         "mov    r7, r8\n\t"
22435         "mov    r8, #0\n\t"
22436         "add    r5, r5, #4\n\t"
22437         "cmp    r5, #56\n\t"
22438         "ble    1b\n\t"
22439         "str    r6, [%[r], r5]\n\t"
22440         :
22441         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
22442         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
22443     );
22444 
22445     XMEMCPY(r, tmp, sizeof(tmp));
22446 }
22447 
22448 #else
22449 /* Multiply a and b into r. (r = a * b)
22450  *
22451  * r  A single precision integer.
22452  * a  A single precision integer.
22453  * b  A single precision integer.
22454  */
22455 static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
22456 {
22457     sp_digit tmp[8];
22458 
22459     __asm__ __volatile__ (
22460         "mov    r10, #0\n\t"
22461         "#  A[0] * B[0]\n\t"
22462         "ldr    r8, [%[a], #0]\n\t"
22463         "ldr    r9, [%[b], #0]\n\t"
22464         "umull  r3, r4, r8, r9\n\t"
22465         "mov    r5, #0\n\t"
22466         "str    r3, [%[tmp]]\n\t"
22467         "#  A[0] * B[1]\n\t"
22468         "ldr    r8, [%[a], #0]\n\t"
22469         "ldr    r9, [%[b], #4]\n\t"
22470         "umull  r6, r7, r8, r9\n\t"
22471         "adds   r4, r4, r6\n\t"
22472         "adcs   r5, r5, r7\n\t"
22473         "adc    r3, r10, r10\n\t"
22474         "#  A[1] * B[0]\n\t"
22475         "ldr    r8, [%[a], #4]\n\t"
22476         "ldr    r9, [%[b], #0]\n\t"
22477         "umull  r6, r7, r8, r9\n\t"
22478         "adds   r4, r4, r6\n\t"
22479         "adcs   r5, r5, r7\n\t"
22480         "adc    r3, r3, r10\n\t"
22481         "str    r4, [%[tmp], #4]\n\t"
22482         "#  A[0] * B[2]\n\t"
22483         "ldr    r8, [%[a], #0]\n\t"
22484         "ldr    r9, [%[b], #8]\n\t"
22485         "umull  r6, r7, r8, r9\n\t"
22486         "adds   r5, r5, r6\n\t"
22487         "adcs   r3, r3, r7\n\t"
22488         "adc    r4, r10, r10\n\t"
22489         "#  A[1] * B[1]\n\t"
22490         "ldr    r8, [%[a], #4]\n\t"
22491         "ldr    r9, [%[b], #4]\n\t"
22492         "umull  r6, r7, r8, r9\n\t"
22493         "adds   r5, r5, r6\n\t"
22494         "adcs   r3, r3, r7\n\t"
22495         "adc    r4, r4, r10\n\t"
22496         "#  A[2] * B[0]\n\t"
22497         "ldr    r8, [%[a], #8]\n\t"
22498         "ldr    r9, [%[b], #0]\n\t"
22499         "umull  r6, r7, r8, r9\n\t"
22500         "adds   r5, r5, r6\n\t"
22501         "adcs   r3, r3, r7\n\t"
22502         "adc    r4, r4, r10\n\t"
22503         "str    r5, [%[tmp], #8]\n\t"
22504         "#  A[0] * B[3]\n\t"
22505         "ldr    r8, [%[a], #0]\n\t"
22506         "ldr    r9, [%[b], #12]\n\t"
22507         "umull  r6, r7, r8, r9\n\t"
22508         "adds   r3, r3, r6\n\t"
22509         "adcs   r4, r4, r7\n\t"
22510         "adc    r5, r10, r10\n\t"
22511         "#  A[1] * B[2]\n\t"
22512         "ldr    r8, [%[a], #4]\n\t"
22513         "ldr    r9, [%[b], #8]\n\t"
22514         "umull  r6, r7, r8, r9\n\t"
22515         "adds   r3, r3, r6\n\t"
22516         "adcs   r4, r4, r7\n\t"
22517         "adc    r5, r5, r10\n\t"
22518         "#  A[2] * B[1]\n\t"
22519         "ldr    r8, [%[a], #8]\n\t"
22520         "ldr    r9, [%[b], #4]\n\t"
22521         "umull  r6, r7, r8, r9\n\t"
22522         "adds   r3, r3, r6\n\t"
22523         "adcs   r4, r4, r7\n\t"
22524         "adc    r5, r5, r10\n\t"
22525         "#  A[3] * B[0]\n\t"
22526         "ldr    r8, [%[a], #12]\n\t"
22527         "ldr    r9, [%[b], #0]\n\t"
22528         "umull  r6, r7, r8, r9\n\t"
22529         "adds   r3, r3, r6\n\t"
22530         "adcs   r4, r4, r7\n\t"
22531         "adc    r5, r5, r10\n\t"
22532         "str    r3, [%[tmp], #12]\n\t"
22533         "#  A[0] * B[4]\n\t"
22534         "ldr    r8, [%[a], #0]\n\t"
22535         "ldr    r9, [%[b], #16]\n\t"
22536         "umull  r6, r7, r8, r9\n\t"
22537         "adds   r4, r4, r6\n\t"
22538         "adcs   r5, r5, r7\n\t"
22539         "adc    r3, r10, r10\n\t"
22540         "#  A[1] * B[3]\n\t"
22541         "ldr    r8, [%[a], #4]\n\t"
22542         "ldr    r9, [%[b], #12]\n\t"
22543         "umull  r6, r7, r8, r9\n\t"
22544         "adds   r4, r4, r6\n\t"
22545         "adcs   r5, r5, r7\n\t"
22546         "adc    r3, r3, r10\n\t"
22547         "#  A[2] * B[2]\n\t"
22548         "ldr    r8, [%[a], #8]\n\t"
22549         "ldr    r9, [%[b], #8]\n\t"
22550         "umull  r6, r7, r8, r9\n\t"
22551         "adds   r4, r4, r6\n\t"
22552         "adcs   r5, r5, r7\n\t"
22553         "adc    r3, r3, r10\n\t"
22554         "#  A[3] * B[1]\n\t"
22555         "ldr    r8, [%[a], #12]\n\t"
22556         "ldr    r9, [%[b], #4]\n\t"
22557         "umull  r6, r7, r8, r9\n\t"
22558         "adds   r4, r4, r6\n\t"
22559         "adcs   r5, r5, r7\n\t"
22560         "adc    r3, r3, r10\n\t"
22561         "#  A[4] * B[0]\n\t"
22562         "ldr    r8, [%[a], #16]\n\t"
22563         "ldr    r9, [%[b], #0]\n\t"
22564         "umull  r6, r7, r8, r9\n\t"
22565         "adds   r4, r4, r6\n\t"
22566         "adcs   r5, r5, r7\n\t"
22567         "adc    r3, r3, r10\n\t"
22568         "str    r4, [%[tmp], #16]\n\t"
22569         "#  A[0] * B[5]\n\t"
22570         "ldr    r8, [%[a], #0]\n\t"
22571         "ldr    r9, [%[b], #20]\n\t"
22572         "umull  r6, r7, r8, r9\n\t"
22573         "adds   r5, r5, r6\n\t"
22574         "adcs   r3, r3, r7\n\t"
22575         "adc    r4, r10, r10\n\t"
22576         "#  A[1] * B[4]\n\t"
22577         "ldr    r8, [%[a], #4]\n\t"
22578         "ldr    r9, [%[b], #16]\n\t"
22579         "umull  r6, r7, r8, r9\n\t"
22580         "adds   r5, r5, r6\n\t"
22581         "adcs   r3, r3, r7\n\t"
22582         "adc    r4, r4, r10\n\t"
22583         "#  A[2] * B[3]\n\t"
22584         "ldr    r8, [%[a], #8]\n\t"
22585         "ldr    r9, [%[b], #12]\n\t"
22586         "umull  r6, r7, r8, r9\n\t"
22587         "adds   r5, r5, r6\n\t"
22588         "adcs   r3, r3, r7\n\t"
22589         "adc    r4, r4, r10\n\t"
22590         "#  A[3] * B[2]\n\t"
22591         "ldr    r8, [%[a], #12]\n\t"
22592         "ldr    r9, [%[b], #8]\n\t"
22593         "umull  r6, r7, r8, r9\n\t"
22594         "adds   r5, r5, r6\n\t"
22595         "adcs   r3, r3, r7\n\t"
22596         "adc    r4, r4, r10\n\t"
22597         "#  A[4] * B[1]\n\t"
22598         "ldr    r8, [%[a], #16]\n\t"
22599         "ldr    r9, [%[b], #4]\n\t"
22600         "umull  r6, r7, r8, r9\n\t"
22601         "adds   r5, r5, r6\n\t"
22602         "adcs   r3, r3, r7\n\t"
22603         "adc    r4, r4, r10\n\t"
22604         "#  A[5] * B[0]\n\t"
22605         "ldr    r8, [%[a], #20]\n\t"
22606         "ldr    r9, [%[b], #0]\n\t"
22607         "umull  r6, r7, r8, r9\n\t"
22608         "adds   r5, r5, r6\n\t"
22609         "adcs   r3, r3, r7\n\t"
22610         "adc    r4, r4, r10\n\t"
22611         "str    r5, [%[tmp], #20]\n\t"
22612         "#  A[0] * B[6]\n\t"
22613         "ldr    r8, [%[a], #0]\n\t"
22614         "ldr    r9, [%[b], #24]\n\t"
22615         "umull  r6, r7, r8, r9\n\t"
22616         "adds   r3, r3, r6\n\t"
22617         "adcs   r4, r4, r7\n\t"
22618         "adc    r5, r10, r10\n\t"
22619         "#  A[1] * B[5]\n\t"
22620         "ldr    r8, [%[a], #4]\n\t"
22621         "ldr    r9, [%[b], #20]\n\t"
22622         "umull  r6, r7, r8, r9\n\t"
22623         "adds   r3, r3, r6\n\t"
22624         "adcs   r4, r4, r7\n\t"
22625         "adc    r5, r5, r10\n\t"
22626         "#  A[2] * B[4]\n\t"
22627         "ldr    r8, [%[a], #8]\n\t"
22628         "ldr    r9, [%[b], #16]\n\t"
22629         "umull  r6, r7, r8, r9\n\t"
22630         "adds   r3, r3, r6\n\t"
22631         "adcs   r4, r4, r7\n\t"
22632         "adc    r5, r5, r10\n\t"
22633         "#  A[3] * B[3]\n\t"
22634         "ldr    r8, [%[a], #12]\n\t"
22635         "ldr    r9, [%[b], #12]\n\t"
22636         "umull  r6, r7, r8, r9\n\t"
22637         "adds   r3, r3, r6\n\t"
22638         "adcs   r4, r4, r7\n\t"
22639         "adc    r5, r5, r10\n\t"
22640         "#  A[4] * B[2]\n\t"
22641         "ldr    r8, [%[a], #16]\n\t"
22642         "ldr    r9, [%[b], #8]\n\t"
22643         "umull  r6, r7, r8, r9\n\t"
22644         "adds   r3, r3, r6\n\t"
22645         "adcs   r4, r4, r7\n\t"
22646         "adc    r5, r5, r10\n\t"
22647         "#  A[5] * B[1]\n\t"
22648         "ldr    r8, [%[a], #20]\n\t"
22649         "ldr    r9, [%[b], #4]\n\t"
22650         "umull  r6, r7, r8, r9\n\t"
22651         "adds   r3, r3, r6\n\t"
22652         "adcs   r4, r4, r7\n\t"
22653         "adc    r5, r5, r10\n\t"
22654         "#  A[6] * B[0]\n\t"
22655         "ldr    r8, [%[a], #24]\n\t"
22656         "ldr    r9, [%[b], #0]\n\t"
22657         "umull  r6, r7, r8, r9\n\t"
22658         "adds   r3, r3, r6\n\t"
22659         "adcs   r4, r4, r7\n\t"
22660         "adc    r5, r5, r10\n\t"
22661         "str    r3, [%[tmp], #24]\n\t"
22662         "#  A[0] * B[7]\n\t"
22663         "ldr    r8, [%[a], #0]\n\t"
22664         "ldr    r9, [%[b], #28]\n\t"
22665         "umull  r6, r7, r8, r9\n\t"
22666         "adds   r4, r4, r6\n\t"
22667         "adcs   r5, r5, r7\n\t"
22668         "adc    r3, r10, r10\n\t"
22669         "#  A[1] * B[6]\n\t"
22670         "ldr    r8, [%[a], #4]\n\t"
22671         "ldr    r9, [%[b], #24]\n\t"
22672         "umull  r6, r7, r8, r9\n\t"
22673         "adds   r4, r4, r6\n\t"
22674         "adcs   r5, r5, r7\n\t"
22675         "adc    r3, r3, r10\n\t"
22676         "#  A[2] * B[5]\n\t"
22677         "ldr    r8, [%[a], #8]\n\t"
22678         "ldr    r9, [%[b], #20]\n\t"
22679         "umull  r6, r7, r8, r9\n\t"
22680         "adds   r4, r4, r6\n\t"
22681         "adcs   r5, r5, r7\n\t"
22682         "adc    r3, r3, r10\n\t"
22683         "#  A[3] * B[4]\n\t"
22684         "ldr    r8, [%[a], #12]\n\t"
22685         "ldr    r9, [%[b], #16]\n\t"
22686         "umull  r6, r7, r8, r9\n\t"
22687         "adds   r4, r4, r6\n\t"
22688         "adcs   r5, r5, r7\n\t"
22689         "adc    r3, r3, r10\n\t"
22690         "#  A[4] * B[3]\n\t"
22691         "ldr    r8, [%[a], #16]\n\t"
22692         "ldr    r9, [%[b], #12]\n\t"
22693         "umull  r6, r7, r8, r9\n\t"
22694         "adds   r4, r4, r6\n\t"
22695         "adcs   r5, r5, r7\n\t"
22696         "adc    r3, r3, r10\n\t"
22697         "#  A[5] * B[2]\n\t"
22698         "ldr    r8, [%[a], #20]\n\t"
22699         "ldr    r9, [%[b], #8]\n\t"
22700         "umull  r6, r7, r8, r9\n\t"
22701         "adds   r4, r4, r6\n\t"
22702         "adcs   r5, r5, r7\n\t"
22703         "adc    r3, r3, r10\n\t"
22704         "#  A[6] * B[1]\n\t"
22705         "ldr    r8, [%[a], #24]\n\t"
22706         "ldr    r9, [%[b], #4]\n\t"
22707         "umull  r6, r7, r8, r9\n\t"
22708         "adds   r4, r4, r6\n\t"
22709         "adcs   r5, r5, r7\n\t"
22710         "adc    r3, r3, r10\n\t"
22711         "#  A[7] * B[0]\n\t"
22712         "ldr    r8, [%[a], #28]\n\t"
22713         "ldr    r9, [%[b], #0]\n\t"
22714         "umull  r6, r7, r8, r9\n\t"
22715         "adds   r4, r4, r6\n\t"
22716         "adcs   r5, r5, r7\n\t"
22717         "adc    r3, r3, r10\n\t"
22718         "str    r4, [%[tmp], #28]\n\t"
22719         "#  A[1] * B[7]\n\t"
22720         "ldr    r8, [%[a], #4]\n\t"
22721         "ldr    r9, [%[b], #28]\n\t"
22722         "umull  r6, r7, r8, r9\n\t"
22723         "adds   r5, r5, r6\n\t"
22724         "adcs   r3, r3, r7\n\t"
22725         "adc    r4, r10, r10\n\t"
22726         "#  A[2] * B[6]\n\t"
22727         "ldr    r8, [%[a], #8]\n\t"
22728         "ldr    r9, [%[b], #24]\n\t"
22729         "umull  r6, r7, r8, r9\n\t"
22730         "adds   r5, r5, r6\n\t"
22731         "adcs   r3, r3, r7\n\t"
22732         "adc    r4, r4, r10\n\t"
22733         "#  A[3] * B[5]\n\t"
22734         "ldr    r8, [%[a], #12]\n\t"
22735         "ldr    r9, [%[b], #20]\n\t"
22736         "umull  r6, r7, r8, r9\n\t"
22737         "adds   r5, r5, r6\n\t"
22738         "adcs   r3, r3, r7\n\t"
22739         "adc    r4, r4, r10\n\t"
22740         "#  A[4] * B[4]\n\t"
22741         "ldr    r8, [%[a], #16]\n\t"
22742         "ldr    r9, [%[b], #16]\n\t"
22743         "umull  r6, r7, r8, r9\n\t"
22744         "adds   r5, r5, r6\n\t"
22745         "adcs   r3, r3, r7\n\t"
22746         "adc    r4, r4, r10\n\t"
22747         "#  A[5] * B[3]\n\t"
22748         "ldr    r8, [%[a], #20]\n\t"
22749         "ldr    r9, [%[b], #12]\n\t"
22750         "umull  r6, r7, r8, r9\n\t"
22751         "adds   r5, r5, r6\n\t"
22752         "adcs   r3, r3, r7\n\t"
22753         "adc    r4, r4, r10\n\t"
22754         "#  A[6] * B[2]\n\t"
22755         "ldr    r8, [%[a], #24]\n\t"
22756         "ldr    r9, [%[b], #8]\n\t"
22757         "umull  r6, r7, r8, r9\n\t"
22758         "adds   r5, r5, r6\n\t"
22759         "adcs   r3, r3, r7\n\t"
22760         "adc    r4, r4, r10\n\t"
22761         "#  A[7] * B[1]\n\t"
22762         "ldr    r8, [%[a], #28]\n\t"
22763         "ldr    r9, [%[b], #4]\n\t"
22764         "umull  r6, r7, r8, r9\n\t"
22765         "adds   r5, r5, r6\n\t"
22766         "adcs   r3, r3, r7\n\t"
22767         "adc    r4, r4, r10\n\t"
22768         "str    r5, [%[r], #32]\n\t"
22769         "#  A[2] * B[7]\n\t"
22770         "ldr    r8, [%[a], #8]\n\t"
22771         "ldr    r9, [%[b], #28]\n\t"
22772         "umull  r6, r7, r8, r9\n\t"
22773         "adds   r3, r3, r6\n\t"
22774         "adcs   r4, r4, r7\n\t"
22775         "adc    r5, r10, r10\n\t"
22776         "#  A[3] * B[6]\n\t"
22777         "ldr    r8, [%[a], #12]\n\t"
22778         "ldr    r9, [%[b], #24]\n\t"
22779         "umull  r6, r7, r8, r9\n\t"
22780         "adds   r3, r3, r6\n\t"
22781         "adcs   r4, r4, r7\n\t"
22782         "adc    r5, r5, r10\n\t"
22783         "#  A[4] * B[5]\n\t"
22784         "ldr    r8, [%[a], #16]\n\t"
22785         "ldr    r9, [%[b], #20]\n\t"
22786         "umull  r6, r7, r8, r9\n\t"
22787         "adds   r3, r3, r6\n\t"
22788         "adcs   r4, r4, r7\n\t"
22789         "adc    r5, r5, r10\n\t"
22790         "#  A[5] * B[4]\n\t"
22791         "ldr    r8, [%[a], #20]\n\t"
22792         "ldr    r9, [%[b], #16]\n\t"
22793         "umull  r6, r7, r8, r9\n\t"
22794         "adds   r3, r3, r6\n\t"
22795         "adcs   r4, r4, r7\n\t"
22796         "adc    r5, r5, r10\n\t"
22797         "#  A[6] * B[3]\n\t"
22798         "ldr    r8, [%[a], #24]\n\t"
22799         "ldr    r9, [%[b], #12]\n\t"
22800         "umull  r6, r7, r8, r9\n\t"
22801         "adds   r3, r3, r6\n\t"
22802         "adcs   r4, r4, r7\n\t"
22803         "adc    r5, r5, r10\n\t"
22804         "#  A[7] * B[2]\n\t"
22805         "ldr    r8, [%[a], #28]\n\t"
22806         "ldr    r9, [%[b], #8]\n\t"
22807         "umull  r6, r7, r8, r9\n\t"
22808         "adds   r3, r3, r6\n\t"
22809         "adcs   r4, r4, r7\n\t"
22810         "adc    r5, r5, r10\n\t"
22811         "str    r3, [%[r], #36]\n\t"
22812         "#  A[3] * B[7]\n\t"
22813         "ldr    r8, [%[a], #12]\n\t"
22814         "ldr    r9, [%[b], #28]\n\t"
22815         "umull  r6, r7, r8, r9\n\t"
22816         "adds   r4, r4, r6\n\t"
22817         "adcs   r5, r5, r7\n\t"
22818         "adc    r3, r10, r10\n\t"
22819         "#  A[4] * B[6]\n\t"
22820         "ldr    r8, [%[a], #16]\n\t"
22821         "ldr    r9, [%[b], #24]\n\t"
22822         "umull  r6, r7, r8, r9\n\t"
22823         "adds   r4, r4, r6\n\t"
22824         "adcs   r5, r5, r7\n\t"
22825         "adc    r3, r3, r10\n\t"
22826         "#  A[5] * B[5]\n\t"
22827         "ldr    r8, [%[a], #20]\n\t"
22828         "ldr    r9, [%[b], #20]\n\t"
22829         "umull  r6, r7, r8, r9\n\t"
22830         "adds   r4, r4, r6\n\t"
22831         "adcs   r5, r5, r7\n\t"
22832         "adc    r3, r3, r10\n\t"
22833         "#  A[6] * B[4]\n\t"
22834         "ldr    r8, [%[a], #24]\n\t"
22835         "ldr    r9, [%[b], #16]\n\t"
22836         "umull  r6, r7, r8, r9\n\t"
22837         "adds   r4, r4, r6\n\t"
22838         "adcs   r5, r5, r7\n\t"
22839         "adc    r3, r3, r10\n\t"
22840         "#  A[7] * B[3]\n\t"
22841         "ldr    r8, [%[a], #28]\n\t"
22842         "ldr    r9, [%[b], #12]\n\t"
22843         "umull  r6, r7, r8, r9\n\t"
22844         "adds   r4, r4, r6\n\t"
22845         "adcs   r5, r5, r7\n\t"
22846         "adc    r3, r3, r10\n\t"
22847         "str    r4, [%[r], #40]\n\t"
22848         "#  A[4] * B[7]\n\t"
22849         "ldr    r8, [%[a], #16]\n\t"
22850         "ldr    r9, [%[b], #28]\n\t"
22851         "umull  r6, r7, r8, r9\n\t"
22852         "adds   r5, r5, r6\n\t"
22853         "adcs   r3, r3, r7\n\t"
22854         "adc    r4, r10, r10\n\t"
22855         "#  A[5] * B[6]\n\t"
22856         "ldr    r8, [%[a], #20]\n\t"
22857         "ldr    r9, [%[b], #24]\n\t"
22858         "umull  r6, r7, r8, r9\n\t"
22859         "adds   r5, r5, r6\n\t"
22860         "adcs   r3, r3, r7\n\t"
22861         "adc    r4, r4, r10\n\t"
22862         "#  A[6] * B[5]\n\t"
22863         "ldr    r8, [%[a], #24]\n\t"
22864         "ldr    r9, [%[b], #20]\n\t"
22865         "umull  r6, r7, r8, r9\n\t"
22866         "adds   r5, r5, r6\n\t"
22867         "adcs   r3, r3, r7\n\t"
22868         "adc    r4, r4, r10\n\t"
22869         "#  A[7] * B[4]\n\t"
22870         "ldr    r8, [%[a], #28]\n\t"
22871         "ldr    r9, [%[b], #16]\n\t"
22872         "umull  r6, r7, r8, r9\n\t"
22873         "adds   r5, r5, r6\n\t"
22874         "adcs   r3, r3, r7\n\t"
22875         "adc    r4, r4, r10\n\t"
22876         "str    r5, [%[r], #44]\n\t"
22877         "#  A[5] * B[7]\n\t"
22878         "ldr    r8, [%[a], #20]\n\t"
22879         "ldr    r9, [%[b], #28]\n\t"
22880         "umull  r6, r7, r8, r9\n\t"
22881         "adds   r3, r3, r6\n\t"
22882         "adcs   r4, r4, r7\n\t"
22883         "adc    r5, r10, r10\n\t"
22884         "#  A[6] * B[6]\n\t"
22885         "ldr    r8, [%[a], #24]\n\t"
22886         "ldr    r9, [%[b], #24]\n\t"
22887         "umull  r6, r7, r8, r9\n\t"
22888         "adds   r3, r3, r6\n\t"
22889         "adcs   r4, r4, r7\n\t"
22890         "adc    r5, r5, r10\n\t"
22891         "#  A[7] * B[5]\n\t"
22892         "ldr    r8, [%[a], #28]\n\t"
22893         "ldr    r9, [%[b], #20]\n\t"
22894         "umull  r6, r7, r8, r9\n\t"
22895         "adds   r3, r3, r6\n\t"
22896         "adcs   r4, r4, r7\n\t"
22897         "adc    r5, r5, r10\n\t"
22898         "str    r3, [%[r], #48]\n\t"
22899         "#  A[6] * B[7]\n\t"
22900         "ldr    r8, [%[a], #24]\n\t"
22901         "ldr    r9, [%[b], #28]\n\t"
22902         "umull  r6, r7, r8, r9\n\t"
22903         "adds   r4, r4, r6\n\t"
22904         "adcs   r5, r5, r7\n\t"
22905         "adc    r3, r10, r10\n\t"
22906         "#  A[7] * B[6]\n\t"
22907         "ldr    r8, [%[a], #28]\n\t"
22908         "ldr    r9, [%[b], #24]\n\t"
22909         "umull  r6, r7, r8, r9\n\t"
22910         "adds   r4, r4, r6\n\t"
22911         "adcs   r5, r5, r7\n\t"
22912         "adc    r3, r3, r10\n\t"
22913         "str    r4, [%[r], #52]\n\t"
22914         "#  A[7] * B[7]\n\t"
22915         "ldr    r8, [%[a], #28]\n\t"
22916         "ldr    r9, [%[b], #28]\n\t"
22917         "umull  r6, r7, r8, r9\n\t"
22918         "adds   r5, r5, r6\n\t"
22919         "adc    r3, r3, r7\n\t"
22920         "str    r5, [%[r], #56]\n\t"
22921         "str    r3, [%[r], #60]\n\t"
22922         :
22923         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
22924         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
22925     );
22926 
22927     XMEMCPY(r, tmp, sizeof(tmp));
22928 }
22929 
22930 #endif /* WOLFSSL_SP_SMALL */
22931 #ifdef HAVE_INTEL_AVX2
22932 #endif /* HAVE_INTEL_AVX2 */
22933 #endif
22934 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
22935 #ifdef WOLFSSL_SP_SMALL
22936 /* Sub b from a into a. (a -= b)
22937  *
22938  * a  A single precision integer.
22939  * b  A single precision integer.
22940  */
22941 static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
22942 {
22943     sp_digit c = 0;
22944 
22945     __asm__ __volatile__ (
22946         "mov    r14, #0\n\t"
22947         "add    r12, %[a], #32\n\t"
22948         "\n1:\n\t"
22949         "subs   %[c], r14, %[c]\n\t"
22950         "ldr    r3, [%[a]]\n\t"
22951         "ldr    r4, [%[a], #4]\n\t"
22952         "ldr    r5, [%[a], #8]\n\t"
22953         "ldr    r6, [%[a], #12]\n\t"
22954         "ldr    r7, [%[b]], #4\n\t"
22955         "ldr    r8, [%[b]], #4\n\t"
22956         "ldr    r9, [%[b]], #4\n\t"
22957         "ldr    r10, [%[b]], #4\n\t"
22958         "sbcs   r3, r3, r7\n\t"
22959         "sbcs   r4, r4, r8\n\t"
22960         "sbcs   r5, r5, r9\n\t"
22961         "sbcs   r6, r6, r10\n\t"
22962         "str    r3, [%[a]], #4\n\t"
22963         "str    r4, [%[a]], #4\n\t"
22964         "str    r5, [%[a]], #4\n\t"
22965         "str    r6, [%[a]], #4\n\t"
22966         "sbc    %[c], r14, r14\n\t"
22967         "cmp    %[a], r12\n\t"
22968         "bne    1b\n\t"
22969         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
22970         :
22971         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14"
22972     );
22973 
22974     return c;
22975 }
22976 
22977 #else
22978 /* Sub b from a into a. (a -= b)
22979  *
22980  * a  A single precision integer and result.
22981  * b  A single precision integer.
22982  */
22983 static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b)
22984 {
22985     sp_digit c = 0;
22986 
22987     __asm__ __volatile__ (
22988         "ldr    r2, [%[a], #0]\n\t"
22989         "ldr    r3, [%[a], #4]\n\t"
22990         "ldr    r4, [%[a], #8]\n\t"
22991         "ldr    r5, [%[a], #12]\n\t"
22992         "ldr    r6, [%[b], #0]\n\t"
22993         "ldr    r7, [%[b], #4]\n\t"
22994         "ldr    r8, [%[b], #8]\n\t"
22995         "ldr    r9, [%[b], #12]\n\t"
22996         "subs   r2, r2, r6\n\t"
22997         "sbcs   r3, r3, r7\n\t"
22998         "sbcs   r4, r4, r8\n\t"
22999         "sbcs   r5, r5, r9\n\t"
23000         "str    r2, [%[a], #0]\n\t"
23001         "str    r3, [%[a], #4]\n\t"
23002         "str    r4, [%[a], #8]\n\t"
23003         "str    r5, [%[a], #12]\n\t"
23004         "ldr    r2, [%[a], #16]\n\t"
23005         "ldr    r3, [%[a], #20]\n\t"
23006         "ldr    r4, [%[a], #24]\n\t"
23007         "ldr    r5, [%[a], #28]\n\t"
23008         "ldr    r6, [%[b], #16]\n\t"
23009         "ldr    r7, [%[b], #20]\n\t"
23010         "ldr    r8, [%[b], #24]\n\t"
23011         "ldr    r9, [%[b], #28]\n\t"
23012         "sbcs   r2, r2, r6\n\t"
23013         "sbcs   r3, r3, r7\n\t"
23014         "sbcs   r4, r4, r8\n\t"
23015         "sbcs   r5, r5, r9\n\t"
23016         "str    r2, [%[a], #16]\n\t"
23017         "str    r3, [%[a], #20]\n\t"
23018         "str    r4, [%[a], #24]\n\t"
23019         "str    r5, [%[a], #28]\n\t"
23020         "sbc    %[c], r9, r9\n\t"
23021         : [c] "+r" (c)
23022         : [a] "r" (a), [b] "r" (b)
23023         : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
23024     );
23025 
23026     return c;
23027 }
23028 
23029 #endif /* WOLFSSL_SP_SMALL */
23030 /* Mul a by digit b into r. (r = a * b)
23031  *
23032  * r  A single precision integer.
23033  * a  A single precision integer.
23034  * b  A single precision digit.
23035  */
23036 static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
23037         const sp_digit b)
23038 {
23039 #ifdef WOLFSSL_SP_SMALL
23040     __asm__ __volatile__ (
23041         "mov    r10, #0\n\t"
23042         "# A[0] * B\n\t"
23043         "ldr    r8, [%[a]]\n\t"
23044         "umull  r5, r3, %[b], r8\n\t"
23045         "mov    r4, #0\n\t"
23046         "str    r5, [%[r]]\n\t"
23047         "mov    r5, #0\n\t"
23048         "mov    r9, #4\n\t"
23049         "1:\n\t"
23050         "ldr    r8, [%[a], r9]\n\t"
23051         "umull  r6, r7, %[b], r8\n\t"
23052         "adds   r3, r3, r6\n\t"
23053         "adcs   r4, r4, r7\n\t"
23054         "adc    r5, r10, r10\n\t"
23055         "str    r3, [%[r], r9]\n\t"
23056         "mov    r3, r4\n\t"
23057         "mov    r4, r5\n\t"
23058         "mov    r5, #0\n\t"
23059         "add    r9, r9, #4\n\t"
23060         "cmp    r9, #32\n\t"
23061         "blt    1b\n\t"
23062         "str    r3, [%[r], #32]\n\t"
23063         :
23064         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
23065         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
23066     );
23067 #else
23068     __asm__ __volatile__ (
23069         "mov    r10, #0\n\t"
23070         "# A[0] * B\n\t"
23071         "ldr    r8, [%[a]]\n\t"
23072         "umull  r3, r4, %[b], r8\n\t"
23073         "mov    r5, #0\n\t"
23074         "str    r3, [%[r]]\n\t"
23075         "# A[1] * B\n\t"
23076         "ldr    r8, [%[a], #4]\n\t"
23077         "mov    r3, #0\n\t"
23078         "umull  r6, r7, %[b], r8\n\t"
23079         "adds   r4, r4, r6\n\t"
23080         "adcs   r5, r5, r7\n\t"
23081         "adc    r3, r10, r10\n\t"
23082         "str    r4, [%[r], #4]\n\t"
23083         "# A[2] * B\n\t"
23084         "ldr    r8, [%[a], #8]\n\t"
23085         "mov    r4, #0\n\t"
23086         "umull  r6, r7, %[b], r8\n\t"
23087         "adds   r5, r5, r6\n\t"
23088         "adcs   r3, r3, r7\n\t"
23089         "adc    r4, r10, r10\n\t"
23090         "str    r5, [%[r], #8]\n\t"
23091         "# A[3] * B\n\t"
23092         "ldr    r8, [%[a], #12]\n\t"
23093         "mov    r5, #0\n\t"
23094         "umull  r6, r7, %[b], r8\n\t"
23095         "adds   r3, r3, r6\n\t"
23096         "adcs   r4, r4, r7\n\t"
23097         "adc    r5, r10, r10\n\t"
23098         "str    r3, [%[r], #12]\n\t"
23099         "# A[4] * B\n\t"
23100         "ldr    r8, [%[a], #16]\n\t"
23101         "mov    r3, #0\n\t"
23102         "umull  r6, r7, %[b], r8\n\t"
23103         "adds   r4, r4, r6\n\t"
23104         "adcs   r5, r5, r7\n\t"
23105         "adc    r3, r10, r10\n\t"
23106         "str    r4, [%[r], #16]\n\t"
23107         "# A[5] * B\n\t"
23108         "ldr    r8, [%[a], #20]\n\t"
23109         "mov    r4, #0\n\t"
23110         "umull  r6, r7, %[b], r8\n\t"
23111         "adds   r5, r5, r6\n\t"
23112         "adcs   r3, r3, r7\n\t"
23113         "adc    r4, r10, r10\n\t"
23114         "str    r5, [%[r], #20]\n\t"
23115         "# A[6] * B\n\t"
23116         "ldr    r8, [%[a], #24]\n\t"
23117         "mov    r5, #0\n\t"
23118         "umull  r6, r7, %[b], r8\n\t"
23119         "adds   r3, r3, r6\n\t"
23120         "adcs   r4, r4, r7\n\t"
23121         "adc    r5, r10, r10\n\t"
23122         "str    r3, [%[r], #24]\n\t"
23123         "# A[7] * B\n\t"
23124         "ldr    r8, [%[a], #28]\n\t"
23125         "umull  r6, r7, %[b], r8\n\t"
23126         "adds   r4, r4, r6\n\t"
23127         "adc    r5, r5, r7\n\t"
23128         "str    r4, [%[r], #28]\n\t"
23129         "str    r5, [%[r], #32]\n\t"
23130         :
23131         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
23132         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"
23133     );
23134 #endif
23135 }
23136 
23137 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
23138  *
23139  * d1   The high order half of the number to divide.
23140  * d0   The low order half of the number to divide.
23141  * div  The dividend.
23142  * returns the result of the division.
23143  *
23144  * Note that this is an approximate div. It may give an answer 1 larger.
23145  */
23146 static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div)
23147 {
23148     sp_digit r = 0;
23149 
23150     __asm__ __volatile__ (
23151         "lsr    r5, %[div], #1\n\t"
23152         "add    r5, r5, #1\n\t"
23153         "mov    r6, %[d0]\n\t"
23154         "mov    r7, %[d1]\n\t"
23155         "# Do top 32\n\t"
23156         "subs   r8, r5, r7\n\t"
23157         "sbc    r8, r8, r8\n\t"
23158         "add    %[r], %[r], %[r]\n\t"
23159         "sub    %[r], %[r], r8\n\t"
23160         "and    r8, r8, r5\n\t"
23161         "subs   r7, r7, r8\n\t"
23162         "# Next 30 bits\n\t"
23163         "mov    r4, #29\n\t"
23164         "1:\n\t"
23165         "movs   r6, r6, lsl #1\n\t"
23166         "adc    r7, r7, r7\n\t"
23167         "subs   r8, r5, r7\n\t"
23168         "sbc    r8, r8, r8\n\t"
23169         "add    %[r], %[r], %[r]\n\t"
23170         "sub    %[r], %[r], r8\n\t"
23171         "and    r8, r8, r5\n\t"
23172         "subs   r7, r7, r8\n\t"
23173         "subs   r4, r4, #1\n\t"
23174         "bpl    1b\n\t"
23175         "add    %[r], %[r], %[r]\n\t"
23176         "add    %[r], %[r], #1\n\t"
23177         "umull  r4, r5, %[r], %[div]\n\t"
23178         "subs   r4, %[d0], r4\n\t"
23179         "sbc    r5, %[d1], r5\n\t"
23180         "add    %[r], %[r], r5\n\t"
23181         "umull  r4, r5, %[r], %[div]\n\t"
23182         "subs   r4, %[d0], r4\n\t"
23183         "sbc    r5, %[d1], r5\n\t"
23184         "add    %[r], %[r], r5\n\t"
23185         "subs   r8, %[div], r4\n\t"
23186         "sbc    r8, r8, r8\n\t"
23187         "sub    %[r], %[r], r8\n\t"
23188         : [r] "+r" (r)
23189         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
23190         : "r4", "r5", "r6", "r7", "r8"
23191     );
23192     return r;
23193 }
23194 
23195 /* AND m into each word of a and store in r.
23196  *
23197  * r  A single precision integer.
23198  * a  A single precision integer.
23199  * m  Mask to AND against each digit.
23200  */
23201 static void sp_256_mask_8(sp_digit* r, sp_digit* a, sp_digit m)
23202 {
23203 #ifdef WOLFSSL_SP_SMALL
23204     int i;
23205 
23206     for (i=0; i<8; i++)
23207         r[i] = a[i] & m;
23208 #else
23209     r[0] = a[0] & m;
23210     r[1] = a[1] & m;
23211     r[2] = a[2] & m;
23212     r[3] = a[3] & m;
23213     r[4] = a[4] & m;
23214     r[5] = a[5] & m;
23215     r[6] = a[6] & m;
23216     r[7] = a[7] & m;
23217 #endif
23218 }
23219 
23220 /* Divide d in a and put remainder into r (m*d + r = a)
23221  * m is not calculated as it is not needed at this time.
23222  *
23223  * a  Nmber to be divided.
23224  * d  Number to divide with.
23225  * m  Multiplier result.
23226  * r  Remainder from the division.
23227  * returns MP_OKAY indicating success.
23228  */
23229 static WC_INLINE int sp_256_div_8(sp_digit* a, sp_digit* d, sp_digit* m,
23230         sp_digit* r)
23231 {
23232     sp_digit t1[16], t2[9];
23233     sp_digit div, r1;
23234     int i;
23235 
23236     (void)m;
23237 
23238     div = d[7];
23239     XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
23240     for (i=7; i>=0; i--) {
23241         r1 = div_256_word_8(t1[8 + i], t1[8 + i - 1], div);
23242 
23243         sp_256_mul_d_8(t2, d, r1);
23244         t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
23245         t1[8 + i] -= t2[8];
23246         sp_256_mask_8(t2, d, t1[8 + i]);
23247         t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
23248         sp_256_mask_8(t2, d, t1[8 + i]);
23249         t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
23250     }
23251 
23252     r1 = sp_256_cmp_8(t1, d) >= 0;
23253     sp_256_cond_sub_8(r, t1, t2, (sp_digit)0 - r1);
23254 
23255     return MP_OKAY;
23256 }
23257 
23258 /* Reduce a modulo m into r. (r = a mod m)
23259  *
23260  * r  A single precision number that is the reduced result.
23261  * a  A single precision number that is to be reduced.
23262  * m  A single precision number that is the modulus to reduce with.
23263  * returns MP_OKAY indicating success.
23264  */
23265 static WC_INLINE int sp_256_mod_8(sp_digit* r, sp_digit* a, sp_digit* m)
23266 {
23267     return sp_256_div_8(a, m, NULL, r);
23268 }
23269 
23270 #endif
23271 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
23272 #ifdef WOLFSSL_SP_SMALL
23273 /* Square a and put result in r. (r = a * a)
23274  *
23275  * r  A single precision integer.
23276  * a  A single precision integer.
23277  */
23278 static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
23279 {
23280     sp_digit tmp[16];
23281 
23282     __asm__ __volatile__ (
23283         "mov    r12, #0\n\t"
23284         "mov    r6, #0\n\t"
23285         "mov    r7, #0\n\t"
23286         "mov    r8, #0\n\t"
23287         "mov    r5, #0\n\t"
23288         "\n1:\n\t"
23289         "subs   r3, r5, #28\n\t"
23290         "movcc  r3, r12\n\t"
23291         "sub    r4, r5, r3\n\t"
23292         "\n2:\n\t"
23293         "cmp    r4, r3\n\t"
23294         "beq    4f\n\t"
23295         "ldr    r14, [%[a], r3]\n\t"
23296         "ldr    r9, [%[a], r4]\n\t"
23297         "umull  r9, r10, r14, r9\n\t"
23298         "adds   r6, r6, r9\n\t"
23299         "adcs   r7, r7, r10\n\t"
23300         "adc    r8, r8, r12\n\t"
23301         "adds   r6, r6, r9\n\t"
23302         "adcs   r7, r7, r10\n\t"
23303         "adc    r8, r8, r12\n\t"
23304         "bal    5f\n\t"
23305         "\n4:\n\t"
23306         "ldr    r14, [%[a], r3]\n\t"
23307         "umull  r9, r10, r14, r14\n\t"
23308         "adds   r6, r6, r9\n\t"
23309         "adcs   r7, r7, r10\n\t"
23310         "adc    r8, r8, r12\n\t"
23311         "\n5:\n\t"
23312         "add    r3, r3, #4\n\t"
23313         "sub    r4, r4, #4\n\t"
23314         "cmp    r3, #32\n\t"
23315         "beq    3f\n\t"
23316         "cmp    r3, r4\n\t"
23317         "bgt    3f\n\t"
23318         "cmp    r3, r5\n\t"
23319         "ble    2b\n\t"
23320         "\n3:\n\t"
23321         "str    r6, [%[r], r5]\n\t"
23322         "mov    r6, r7\n\t"
23323         "mov    r7, r8\n\t"
23324         "mov    r8, #0\n\t"
23325         "add    r5, r5, #4\n\t"
23326         "cmp    r5, #56\n\t"
23327         "ble    1b\n\t"
23328         "str    r6, [%[r], r5]\n\t"
23329         :
23330         : [r] "r" (tmp), [a] "r" (a)
23331         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12"
23332     );
23333 
23334     XMEMCPY(r, tmp, sizeof(tmp));
23335 }
23336 
23337 #else
23338 /* Square a and put result in r. (r = a * a)
23339  *
23340  * r  A single precision integer.
23341  * a  A single precision integer.
23342  */
23343 static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
23344 {
23345     sp_digit tmp[8];
23346 
23347     __asm__ __volatile__ (
23348         "mov    r14, #0\n\t"
23349         "#  A[0] * A[0]\n\t"
23350         "ldr    r10, [%[a], #0]\n\t"
23351         "umull  r8, r3, r10, r10\n\t"
23352         "mov    r4, #0\n\t"
23353         "str    r8, [%[tmp]]\n\t"
23354         "#  A[0] * A[1]\n\t"
23355         "ldr    r10, [%[a], #4]\n\t"
23356         "ldr    r8, [%[a], #0]\n\t"
23357         "umull  r8, r9, r10, r8\n\t"
23358         "adds   r3, r3, r8\n\t"
23359         "adcs   r4, r4, r9\n\t"
23360         "adc    r2, r14, r14\n\t"
23361         "adds   r3, r3, r8\n\t"
23362         "adcs   r4, r4, r9\n\t"
23363         "adc    r2, r2, r14\n\t"
23364         "str    r3, [%[tmp], #4]\n\t"
23365         "#  A[0] * A[2]\n\t"
23366         "ldr    r10, [%[a], #8]\n\t"
23367         "ldr    r8, [%[a], #0]\n\t"
23368         "umull  r8, r9, r10, r8\n\t"
23369         "adds   r4, r4, r8\n\t"
23370         "adcs   r2, r2, r9\n\t"
23371         "adc    r3, r14, r14\n\t"
23372         "adds   r4, r4, r8\n\t"
23373         "adcs   r2, r2, r9\n\t"
23374         "adc    r3, r3, r14\n\t"
23375         "#  A[1] * A[1]\n\t"
23376         "ldr    r10, [%[a], #4]\n\t"
23377         "umull  r8, r9, r10, r10\n\t"
23378         "adds   r4, r4, r8\n\t"
23379         "adcs   r2, r2, r9\n\t"
23380         "adc    r3, r3, r14\n\t"
23381         "str    r4, [%[tmp], #8]\n\t"
23382         "#  A[0] * A[3]\n\t"
23383         "ldr    r10, [%[a], #12]\n\t"
23384         "ldr    r8, [%[a], #0]\n\t"
23385         "umull  r8, r9, r10, r8\n\t"
23386         "adds   r2, r2, r8\n\t"
23387         "adcs   r3, r3, r9\n\t"
23388         "adc    r4, r14, r14\n\t"
23389         "adds   r2, r2, r8\n\t"
23390         "adcs   r3, r3, r9\n\t"
23391         "adc    r4, r4, r14\n\t"
23392         "#  A[1] * A[2]\n\t"
23393         "ldr    r10, [%[a], #8]\n\t"
23394         "ldr    r8, [%[a], #4]\n\t"
23395         "umull  r8, r9, r10, r8\n\t"
23396         "adds   r2, r2, r8\n\t"
23397         "adcs   r3, r3, r9\n\t"
23398         "adc    r4, r4, r14\n\t"
23399         "adds   r2, r2, r8\n\t"
23400         "adcs   r3, r3, r9\n\t"
23401         "adc    r4, r4, r14\n\t"
23402         "str    r2, [%[tmp], #12]\n\t"
23403         "#  A[0] * A[4]\n\t"
23404         "ldr    r10, [%[a], #16]\n\t"
23405         "ldr    r8, [%[a], #0]\n\t"
23406         "umull  r8, r9, r10, r8\n\t"
23407         "adds   r3, r3, r8\n\t"
23408         "adcs   r4, r4, r9\n\t"
23409         "adc    r2, r14, r14\n\t"
23410         "adds   r3, r3, r8\n\t"
23411         "adcs   r4, r4, r9\n\t"
23412         "adc    r2, r2, r14\n\t"
23413         "#  A[1] * A[3]\n\t"
23414         "ldr    r10, [%[a], #12]\n\t"
23415         "ldr    r8, [%[a], #4]\n\t"
23416         "umull  r8, r9, r10, r8\n\t"
23417         "adds   r3, r3, r8\n\t"
23418         "adcs   r4, r4, r9\n\t"
23419         "adc    r2, r2, r14\n\t"
23420         "adds   r3, r3, r8\n\t"
23421         "adcs   r4, r4, r9\n\t"
23422         "adc    r2, r2, r14\n\t"
23423         "#  A[2] * A[2]\n\t"
23424         "ldr    r10, [%[a], #8]\n\t"
23425         "umull  r8, r9, r10, r10\n\t"
23426         "adds   r3, r3, r8\n\t"
23427         "adcs   r4, r4, r9\n\t"
23428         "adc    r2, r2, r14\n\t"
23429         "str    r3, [%[tmp], #16]\n\t"
23430         "#  A[0] * A[5]\n\t"
23431         "ldr    r10, [%[a], #20]\n\t"
23432         "ldr    r8, [%[a], #0]\n\t"
23433         "umull  r5, r6, r10, r8\n\t"
23434         "mov    r3, #0\n\t"
23435         "mov    r7, #0\n\t"
23436         "#  A[1] * A[4]\n\t"
23437         "ldr    r10, [%[a], #16]\n\t"
23438         "ldr    r8, [%[a], #4]\n\t"
23439         "umull  r8, r9, r10, r8\n\t"
23440         "adds   r5, r5, r8\n\t"
23441         "adcs   r6, r6, r9\n\t"
23442         "adc    r7, r7, r14\n\t"
23443         "#  A[2] * A[3]\n\t"
23444         "ldr    r10, [%[a], #12]\n\t"
23445         "ldr    r8, [%[a], #8]\n\t"
23446         "umull  r8, r9, r10, r8\n\t"
23447         "adds   r5, r5, r8\n\t"
23448         "adcs   r6, r6, r9\n\t"
23449         "adc    r7, r7, r14\n\t"
23450         "adds   r5, r5, r5\n\t"
23451         "adcs   r6, r6, r6\n\t"
23452         "adc    r7, r7, r7\n\t"
23453         "adds   r4, r4, r5\n\t"
23454         "adcs   r2, r2, r6\n\t"
23455         "adc    r3, r3, r7\n\t"
23456         "str    r4, [%[tmp], #20]\n\t"
23457         "#  A[0] * A[6]\n\t"
23458         "ldr    r10, [%[a], #24]\n\t"
23459         "ldr    r8, [%[a], #0]\n\t"
23460         "umull  r5, r6, r10, r8\n\t"
23461         "mov    r4, #0\n\t"
23462         "mov    r7, #0\n\t"
23463         "#  A[1] * A[5]\n\t"
23464         "ldr    r10, [%[a], #20]\n\t"
23465         "ldr    r8, [%[a], #4]\n\t"
23466         "umull  r8, r9, r10, r8\n\t"
23467         "adds   r5, r5, r8\n\t"
23468         "adcs   r6, r6, r9\n\t"
23469         "adc    r7, r7, r14\n\t"
23470         "#  A[2] * A[4]\n\t"
23471         "ldr    r10, [%[a], #16]\n\t"
23472         "ldr    r8, [%[a], #8]\n\t"
23473         "umull  r8, r9, r10, r8\n\t"
23474         "adds   r5, r5, r8\n\t"
23475         "adcs   r6, r6, r9\n\t"
23476         "adc    r7, r7, r14\n\t"
23477         "#  A[3] * A[3]\n\t"
23478         "ldr    r10, [%[a], #12]\n\t"
23479         "umull  r8, r9, r10, r10\n\t"
23480         "adds   r5, r5, r5\n\t"
23481         "adcs   r6, r6, r6\n\t"
23482         "adc    r7, r7, r7\n\t"
23483         "adds   r5, r5, r8\n\t"
23484         "adcs   r6, r6, r9\n\t"
23485         "adc    r7, r7, r14\n\t"
23486         "adds   r2, r2, r5\n\t"
23487         "adcs   r3, r3, r6\n\t"
23488         "adc    r4, r4, r7\n\t"
23489         "str    r2, [%[tmp], #24]\n\t"
23490         "#  A[0] * A[7]\n\t"
23491         "ldr    r10, [%[a], #28]\n\t"
23492         "ldr    r8, [%[a], #0]\n\t"
23493         "umull  r5, r6, r10, r8\n\t"
23494         "mov    r2, #0\n\t"
23495         "mov    r7, #0\n\t"
23496         "#  A[1] * A[6]\n\t"
23497         "ldr    r10, [%[a], #24]\n\t"
23498         "ldr    r8, [%[a], #4]\n\t"
23499         "umull  r8, r9, r10, r8\n\t"
23500         "adds   r5, r5, r8\n\t"
23501         "adcs   r6, r6, r9\n\t"
23502         "adc    r7, r7, r14\n\t"
23503         "#  A[2] * A[5]\n\t"
23504         "ldr    r10, [%[a], #20]\n\t"
23505         "ldr    r8, [%[a], #8]\n\t"
23506         "umull  r8, r9, r10, r8\n\t"
23507         "adds   r5, r5, r8\n\t"
23508         "adcs   r6, r6, r9\n\t"
23509         "adc    r7, r7, r14\n\t"
23510         "#  A[3] * A[4]\n\t"
23511         "ldr    r10, [%[a], #16]\n\t"
23512         "ldr    r8, [%[a], #12]\n\t"
23513         "umull  r8, r9, r10, r8\n\t"
23514         "adds   r5, r5, r8\n\t"
23515         "adcs   r6, r6, r9\n\t"
23516         "adc    r7, r7, r14\n\t"
23517         "adds   r5, r5, r5\n\t"
23518         "adcs   r6, r6, r6\n\t"
23519         "adc    r7, r7, r7\n\t"
23520         "adds   r3, r3, r5\n\t"
23521         "adcs   r4, r4, r6\n\t"
23522         "adc    r2, r2, r7\n\t"
23523         "str    r3, [%[tmp], #28]\n\t"
23524         "#  A[1] * A[7]\n\t"
23525         "ldr    r10, [%[a], #28]\n\t"
23526         "ldr    r8, [%[a], #4]\n\t"
23527         "umull  r5, r6, r10, r8\n\t"
23528         "mov    r3, #0\n\t"
23529         "mov    r7, #0\n\t"
23530         "#  A[2] * A[6]\n\t"
23531         "ldr    r10, [%[a], #24]\n\t"
23532         "ldr    r8, [%[a], #8]\n\t"
23533         "umull  r8, r9, r10, r8\n\t"
23534         "adds   r5, r5, r8\n\t"
23535         "adcs   r6, r6, r9\n\t"
23536         "adc    r7, r7, r14\n\t"
23537         "#  A[3] * A[5]\n\t"
23538         "ldr    r10, [%[a], #20]\n\t"
23539         "ldr    r8, [%[a], #12]\n\t"
23540         "umull  r8, r9, r10, r8\n\t"
23541         "adds   r5, r5, r8\n\t"
23542         "adcs   r6, r6, r9\n\t"
23543         "adc    r7, r7, r14\n\t"
23544         "#  A[4] * A[4]\n\t"
23545         "ldr    r10, [%[a], #16]\n\t"
23546         "umull  r8, r9, r10, r10\n\t"
23547         "adds   r5, r5, r5\n\t"
23548         "adcs   r6, r6, r6\n\t"
23549         "adc    r7, r7, r7\n\t"
23550         "adds   r5, r5, r8\n\t"
23551         "adcs   r6, r6, r9\n\t"
23552         "adc    r7, r7, r14\n\t"
23553         "adds   r4, r4, r5\n\t"
23554         "adcs   r2, r2, r6\n\t"
23555         "adc    r3, r3, r7\n\t"
23556         "str    r4, [%[r], #32]\n\t"
23557         "#  A[2] * A[7]\n\t"
23558         "ldr    r10, [%[a], #28]\n\t"
23559         "ldr    r8, [%[a], #8]\n\t"
23560         "umull  r5, r6, r10, r8\n\t"
23561         "mov    r4, #0\n\t"
23562         "mov    r7, #0\n\t"
23563         "#  A[3] * A[6]\n\t"
23564         "ldr    r10, [%[a], #24]\n\t"
23565         "ldr    r8, [%[a], #12]\n\t"
23566         "umull  r8, r9, r10, r8\n\t"
23567         "adds   r5, r5, r8\n\t"
23568         "adcs   r6, r6, r9\n\t"
23569         "adc    r7, r7, r14\n\t"
23570         "#  A[4] * A[5]\n\t"
23571         "ldr    r10, [%[a], #20]\n\t"
23572         "ldr    r8, [%[a], #16]\n\t"
23573         "umull  r8, r9, r10, r8\n\t"
23574         "adds   r5, r5, r8\n\t"
23575         "adcs   r6, r6, r9\n\t"
23576         "adc    r7, r7, r14\n\t"
23577         "adds   r5, r5, r5\n\t"
23578         "adcs   r6, r6, r6\n\t"
23579         "adc    r7, r7, r7\n\t"
23580         "adds   r2, r2, r5\n\t"
23581         "adcs   r3, r3, r6\n\t"
23582         "adc    r4, r4, r7\n\t"
23583         "str    r2, [%[r], #36]\n\t"
23584         "#  A[3] * A[7]\n\t"
23585         "ldr    r10, [%[a], #28]\n\t"
23586         "ldr    r8, [%[a], #12]\n\t"
23587         "umull  r8, r9, r10, r8\n\t"
23588         "adds   r3, r3, r8\n\t"
23589         "adcs   r4, r4, r9\n\t"
23590         "adc    r2, r14, r14\n\t"
23591         "adds   r3, r3, r8\n\t"
23592         "adcs   r4, r4, r9\n\t"
23593         "adc    r2, r2, r14\n\t"
23594         "#  A[4] * A[6]\n\t"
23595         "ldr    r10, [%[a], #24]\n\t"
23596         "ldr    r8, [%[a], #16]\n\t"
23597         "umull  r8, r9, r10, r8\n\t"
23598         "adds   r3, r3, r8\n\t"
23599         "adcs   r4, r4, r9\n\t"
23600         "adc    r2, r2, r14\n\t"
23601         "adds   r3, r3, r8\n\t"
23602         "adcs   r4, r4, r9\n\t"
23603         "adc    r2, r2, r14\n\t"
23604         "#  A[5] * A[5]\n\t"
23605         "ldr    r10, [%[a], #20]\n\t"
23606         "umull  r8, r9, r10, r10\n\t"
23607         "adds   r3, r3, r8\n\t"
23608         "adcs   r4, r4, r9\n\t"
23609         "adc    r2, r2, r14\n\t"
23610         "str    r3, [%[r], #40]\n\t"
23611         "#  A[4] * A[7]\n\t"
23612         "ldr    r10, [%[a], #28]\n\t"
23613         "ldr    r8, [%[a], #16]\n\t"
23614         "umull  r8, r9, r10, r8\n\t"
23615         "adds   r4, r4, r8\n\t"
23616         "adcs   r2, r2, r9\n\t"
23617         "adc    r3, r14, r14\n\t"
23618         "adds   r4, r4, r8\n\t"
23619         "adcs   r2, r2, r9\n\t"
23620         "adc    r3, r3, r14\n\t"
23621         "#  A[5] * A[6]\n\t"
23622         "ldr    r10, [%[a], #24]\n\t"
23623         "ldr    r8, [%[a], #20]\n\t"
23624         "umull  r8, r9, r10, r8\n\t"
23625         "adds   r4, r4, r8\n\t"
23626         "adcs   r2, r2, r9\n\t"
23627         "adc    r3, r3, r14\n\t"
23628         "adds   r4, r4, r8\n\t"
23629         "adcs   r2, r2, r9\n\t"
23630         "adc    r3, r3, r14\n\t"
23631         "str    r4, [%[r], #44]\n\t"
23632         "#  A[5] * A[7]\n\t"
23633         "ldr    r10, [%[a], #28]\n\t"
23634         "ldr    r8, [%[a], #20]\n\t"
23635         "umull  r8, r9, r10, r8\n\t"
23636         "adds   r2, r2, r8\n\t"
23637         "adcs   r3, r3, r9\n\t"
23638         "adc    r4, r14, r14\n\t"
23639         "adds   r2, r2, r8\n\t"
23640         "adcs   r3, r3, r9\n\t"
23641         "adc    r4, r4, r14\n\t"
23642         "#  A[6] * A[6]\n\t"
23643         "ldr    r10, [%[a], #24]\n\t"
23644         "umull  r8, r9, r10, r10\n\t"
23645         "adds   r2, r2, r8\n\t"
23646         "adcs   r3, r3, r9\n\t"
23647         "adc    r4, r4, r14\n\t"
23648         "str    r2, [%[r], #48]\n\t"
23649         "#  A[6] * A[7]\n\t"
23650         "ldr    r10, [%[a], #28]\n\t"
23651         "ldr    r8, [%[a], #24]\n\t"
23652         "umull  r8, r9, r10, r8\n\t"
23653         "adds   r3, r3, r8\n\t"
23654         "adcs   r4, r4, r9\n\t"
23655         "adc    r2, r14, r14\n\t"
23656         "adds   r3, r3, r8\n\t"
23657         "adcs   r4, r4, r9\n\t"
23658         "adc    r2, r2, r14\n\t"
23659         "str    r3, [%[r], #52]\n\t"
23660         "#  A[7] * A[7]\n\t"
23661         "ldr    r10, [%[a], #28]\n\t"
23662         "umull  r8, r9, r10, r10\n\t"
23663         "adds   r4, r4, r8\n\t"
23664         "adc    r2, r2, r9\n\t"
23665         "str    r4, [%[r], #56]\n\t"
23666         "str    r2, [%[r], #60]\n\t"
23667         :
23668         : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
23669         : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r14"
23670     );
23671 
23672     XMEMCPY(r, tmp, sizeof(tmp));
23673 }
23674 
23675 #endif /* WOLFSSL_SP_SMALL */
23676 #ifdef WOLFSSL_SP_SMALL
23677 /* Order-2 for the P256 curve. */
23678 static const uint32_t p256_order_2[8] = {
23679     0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
23680     0x00000000,0xffffffff
23681 };
23682 #else
23683 /* The low half of the order-2 of the P256 curve. */
23684 static const uint32_t p256_order_low[4] = {
23685     0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad
23686 };
23687 #endif /* WOLFSSL_SP_SMALL */
23688 
23689 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
23690  *
23691  * r  Result of the multiplication.
23692  * a  First operand of the multiplication.
23693  * b  Second operand of the multiplication.
23694  */
23695 static void sp_256_mont_mul_order_8(sp_digit* r, sp_digit* a, sp_digit* b)
23696 {
23697     sp_256_mul_8(r, a, b);
23698     sp_256_mont_reduce_8(r, p256_order, p256_mp_order);
23699 }
23700 
23701 /* Square number mod the order of P256 curve. (r = a * a mod order)
23702  *
23703  * r  Result of the squaring.
23704  * a  Number to square.
23705  */
23706 static void sp_256_mont_sqr_order_8(sp_digit* r, sp_digit* a)
23707 {
23708     sp_256_sqr_8(r, a);
23709     sp_256_mont_reduce_8(r, p256_order, p256_mp_order);
23710 }
23711 
23712 #ifndef WOLFSSL_SP_SMALL
23713 /* Square number mod the order of P256 curve a number of times.
23714  * (r = a ^ n mod order)
23715  *
23716  * r  Result of the squaring.
23717  * a  Number to square.
23718  */
23719 static void sp_256_mont_sqr_n_order_8(sp_digit* r, sp_digit* a, int n)
23720 {
23721     int i;
23722 
23723     sp_256_mont_sqr_order_8(r, a);
23724     for (i=1; i<n; i++)
23725         sp_256_mont_sqr_order_8(r, r);
23726 }
23727 #endif /* !WOLFSSL_SP_SMALL */
23728 
23729 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
23730  * (r = 1 / a mod order)
23731  *
23732  * r   Inverse result.
23733  * a   Number to invert.
23734  * td  Temporary data.
23735  */
23736 static void sp_256_mont_inv_order_8(sp_digit* r, sp_digit* a,
23737         sp_digit* td)
23738 {
23739 #ifdef WOLFSSL_SP_SMALL
23740     sp_digit* t = td;
23741     int i;
23742 
23743     XMEMCPY(t, a, sizeof(sp_digit) * 8);
23744     for (i=254; i>=0; i--) {
23745         sp_256_mont_sqr_order_8(t, t);
23746         if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
23747             sp_256_mont_mul_order_8(t, t, a);
23748     }
23749     XMEMCPY(r, t, sizeof(sp_digit) * 8);
23750 #else
23751     sp_digit* t = td;
23752     sp_digit* t2 = td + 2 * 8;
23753     sp_digit* t3 = td + 4 * 8;
23754     int i;
23755 
23756     /* t = a^2 */
23757     sp_256_mont_sqr_order_8(t, a);
23758     /* t = a^3 = t * a */
23759     sp_256_mont_mul_order_8(t, t, a);
23760     /* t2= a^c = t ^ 2 ^ 2 */
23761     sp_256_mont_sqr_n_order_8(t2, t, 2);
23762     /* t3= a^f = t2 * t */
23763     sp_256_mont_mul_order_8(t3, t2, t);
23764     /* t2= a^f0 = t3 ^ 2 ^ 4 */
23765     sp_256_mont_sqr_n_order_8(t2, t3, 4);
23766     /* t = a^ff = t2 * t3 */
23767     sp_256_mont_mul_order_8(t, t2, t3);
23768     /* t3= a^ff00 = t ^ 2 ^ 8 */
23769     sp_256_mont_sqr_n_order_8(t2, t, 8);
23770     /* t = a^ffff = t2 * t */
23771     sp_256_mont_mul_order_8(t, t2, t);
23772     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
23773     sp_256_mont_sqr_n_order_8(t2, t, 16);
23774     /* t = a^ffffffff = t2 * t */
23775     sp_256_mont_mul_order_8(t, t2, t);
23776     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
23777     sp_256_mont_sqr_n_order_8(t2, t, 64);
23778     /* t2= a^ffffffff00000000ffffffff = t2 * t */
23779     sp_256_mont_mul_order_8(t2, t2, t);
23780     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
23781     sp_256_mont_sqr_n_order_8(t2, t2, 32);
23782     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
23783     sp_256_mont_mul_order_8(t2, t2, t);
23784     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
23785     for (i=127; i>=112; i--) {
23786         sp_256_mont_sqr_order_8(t2, t2);
23787         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23788             sp_256_mont_mul_order_8(t2, t2, a);
23789     }
23790     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
23791     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23792     sp_256_mont_mul_order_8(t2, t2, t3);
23793     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
23794     for (i=107; i>=64; i--) {
23795         sp_256_mont_sqr_order_8(t2, t2);
23796         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23797             sp_256_mont_mul_order_8(t2, t2, a);
23798     }
23799     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
23800     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23801     sp_256_mont_mul_order_8(t2, t2, t3);
23802     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
23803     for (i=59; i>=32; i--) {
23804         sp_256_mont_sqr_order_8(t2, t2);
23805         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23806             sp_256_mont_mul_order_8(t2, t2, a);
23807     }
23808     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
23809     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23810     sp_256_mont_mul_order_8(t2, t2, t3);
23811     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
23812     for (i=27; i>=0; i--) {
23813         sp_256_mont_sqr_order_8(t2, t2);
23814         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23815             sp_256_mont_mul_order_8(t2, t2, a);
23816     }
23817     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
23818     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23819     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
23820     sp_256_mont_mul_order_8(r, t2, t3);
23821 #endif /* WOLFSSL_SP_SMALL */
23822 }
23823 
23824 #ifdef HAVE_INTEL_AVX2
23825 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
23826  *
23827  * r  Result of the multiplication.
23828  * a  First operand of the multiplication.
23829  * b  Second operand of the multiplication.
23830  */
23831 static void sp_256_mont_mul_order_avx2_8(sp_digit* r, sp_digit* a, sp_digit* b)
23832 {
23833     sp_256_mul_avx2_8(r, a, b);
23834     sp_256_mont_reduce_avx2_8(r, p256_order, p256_mp_order);
23835 }
23836 
23837 /* Square number mod the order of P256 curve. (r = a * a mod order)
23838  *
23839  * r  Result of the squaring.
23840  * a  Number to square.
23841  */
23842 static void sp_256_mont_sqr_order_avx2_8(sp_digit* r, sp_digit* a)
23843 {
23844     sp_256_sqr_avx2_8(r, a);
23845     sp_256_mont_reduce_avx2_8(r, p256_order, p256_mp_order);
23846 }
23847 
23848 #ifndef WOLFSSL_SP_SMALL
23849 /* Square number mod the order of P256 curve a number of times.
23850  * (r = a ^ n mod order)
23851  *
23852  * r  Result of the squaring.
23853  * a  Number to square.
23854  */
23855 static void sp_256_mont_sqr_n_order_avx2_8(sp_digit* r, sp_digit* a, int n)
23856 {
23857     int i;
23858 
23859     sp_256_mont_sqr_order_avx2_8(r, a);
23860     for (i=1; i<n; i++)
23861         sp_256_mont_sqr_order_avx2_8(r, r);
23862 }
23863 #endif /* !WOLFSSL_SP_SMALL */
23864 
23865 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
23866  * (r = 1 / a mod order)
23867  *
23868  * r   Inverse result.
23869  * a   Number to invert.
23870  * td  Temporary data.
23871  */
23872 static void sp_256_mont_inv_order_avx2_8(sp_digit* r, sp_digit* a,
23873         sp_digit* td)
23874 {
23875 #ifdef WOLFSSL_SP_SMALL
23876     sp_digit* t = td;
23877     int i;
23878 
23879     XMEMCPY(t, a, sizeof(sp_digit) * 8);
23880     for (i=254; i>=0; i--) {
23881         sp_256_mont_sqr_order_avx2_8(t, t);
23882         if (p256_order_2[i / 32] & ((sp_digit)1 << (i % 32)))
23883             sp_256_mont_mul_order_avx2_8(t, t, a);
23884     }
23885     XMEMCPY(r, t, sizeof(sp_digit) * 8);
23886 #else
23887     sp_digit* t = td;
23888     sp_digit* t2 = td + 2 * 8;
23889     sp_digit* t3 = td + 4 * 8;
23890     int i;
23891 
23892     /* t = a^2 */
23893     sp_256_mont_sqr_order_avx2_8(t, a);
23894     /* t = a^3 = t * a */
23895     sp_256_mont_mul_order_avx2_8(t, t, a);
23896     /* t2= a^c = t ^ 2 ^ 2 */
23897     sp_256_mont_sqr_n_order_avx2_8(t2, t, 2);
23898     /* t3= a^f = t2 * t */
23899     sp_256_mont_mul_order_avx2_8(t3, t2, t);
23900     /* t2= a^f0 = t3 ^ 2 ^ 4 */
23901     sp_256_mont_sqr_n_order_avx2_8(t2, t3, 4);
23902     /* t = a^ff = t2 * t3 */
23903     sp_256_mont_mul_order_avx2_8(t, t2, t3);
23904     /* t3= a^ff00 = t ^ 2 ^ 8 */
23905     sp_256_mont_sqr_n_order_avx2_8(t2, t, 8);
23906     /* t = a^ffff = t2 * t */
23907     sp_256_mont_mul_order_avx2_8(t, t2, t);
23908     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
23909     sp_256_mont_sqr_n_order_avx2_8(t2, t, 16);
23910     /* t = a^ffffffff = t2 * t */
23911     sp_256_mont_mul_order_avx2_8(t, t2, t);
23912     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
23913     sp_256_mont_sqr_n_order_avx2_8(t2, t, 64);
23914     /* t2= a^ffffffff00000000ffffffff = t2 * t */
23915     sp_256_mont_mul_order_avx2_8(t2, t2, t);
23916     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
23917     sp_256_mont_sqr_n_order_avx2_8(t2, t2, 32);
23918     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
23919     sp_256_mont_mul_order_avx2_8(t2, t2, t);
23920     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
23921     for (i=127; i>=112; i--) {
23922         sp_256_mont_sqr_order_avx2_8(t2, t2);
23923         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23924             sp_256_mont_mul_order_avx2_8(t2, t2, a);
23925     }
23926     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
23927     sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
23928     sp_256_mont_mul_order_avx2_8(t2, t2, t3);
23929     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
23930     for (i=107; i>=64; i--) {
23931         sp_256_mont_sqr_order_avx2_8(t2, t2);
23932         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23933             sp_256_mont_mul_order_avx2_8(t2, t2, a);
23934     }
23935     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
23936     sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
23937     sp_256_mont_mul_order_avx2_8(t2, t2, t3);
23938     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
23939     for (i=59; i>=32; i--) {
23940         sp_256_mont_sqr_order_avx2_8(t2, t2);
23941         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23942             sp_256_mont_mul_order_avx2_8(t2, t2, a);
23943     }
23944     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
23945     sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
23946     sp_256_mont_mul_order_avx2_8(t2, t2, t3);
23947     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
23948     for (i=27; i>=0; i--) {
23949         sp_256_mont_sqr_order_avx2_8(t2, t2);
23950         if (p256_order_low[i / 32] & ((sp_digit)1 << (i % 32)))
23951             sp_256_mont_mul_order_avx2_8(t2, t2, a);
23952     }
23953     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
23954     sp_256_mont_sqr_n_order_avx2_8(t2, t2, 4);
23955     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
23956     sp_256_mont_mul_order_avx2_8(r, t2, t3);
23957 #endif /* WOLFSSL_SP_SMALL */
23958 }
23959 
23960 #endif /* HAVE_INTEL_AVX2 */
23961 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
23962 #ifdef HAVE_ECC_SIGN
23963 #ifndef SP_ECC_MAX_SIG_GEN
23964 #define SP_ECC_MAX_SIG_GEN  64
23965 #endif
23966 
23967 /* Sign the hash using the private key.
23968  *   e = [hash, 256 bits] from binary
23969  *   r = (k.G)->x mod order
23970  *   s = (r * x + e) / k mod order
23971  * The hash is truncated to the first 256 bits.
23972  *
23973  * hash     Hash to sign.
23974  * hashLen  Length of the hash data.
23975  * rng      Random number generator.
23976  * priv     Private part of key - scalar.
23977  * rm       First part of result as an mp_int.
23978  * sm       Sirst part of result as an mp_int.
23979  * heap     Heap to use for allocation.
23980  * returns RNG failures, MEMORY_E when memory allocation fails and
23981  * MP_OKAY on success.
23982  */
23983 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
23984                     mp_int* rm, mp_int* sm, void* heap)
23985 {
23986 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
23987     sp_digit* d;
23988 #else
23989     sp_digit ed[2*8];
23990     sp_digit xd[2*8];
23991     sp_digit kd[2*8];
23992     sp_digit rd[2*8];
23993     sp_digit td[3 * 2*8];
23994     sp_point p;
23995 #endif
23996     sp_digit* e = NULL;
23997     sp_digit* x = NULL;
23998     sp_digit* k = NULL;
23999     sp_digit* r = NULL;
24000     sp_digit* tmp = NULL;
24001     sp_point* point = NULL;
24002     sp_digit carry;
24003     sp_digit* s;
24004     sp_digit* kInv;
24005     int err = MP_OKAY;
24006     int32_t c;
24007     int i;
24008 #ifdef HAVE_INTEL_AVX2
24009     word32 cpuid_flags = cpuid_get_flags();
24010 #endif
24011 
24012     (void)heap;
24013 
24014     err = sp_ecc_point_new(heap, p, point);
24015 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24016     if (err == MP_OKAY) {
24017         d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, DYNAMIC_TYPE_ECC);
24018         if (d != NULL) {
24019             e = d + 0 * 8;
24020             x = d + 2 * 8;
24021             k = d + 4 * 8;
24022             r = d + 6 * 8;
24023             tmp = d + 8 * 8;
24024         }
24025         else
24026             err = MEMORY_E;
24027     }
24028 #else
24029     e = ed;
24030     x = xd;
24031     k = kd;
24032     r = rd;
24033     tmp = td;
24034 #endif
24035     s = e;
24036     kInv = k;
24037 
24038     if (err == MP_OKAY) {
24039         if (hashLen > 32)
24040             hashLen = 32;
24041 
24042         sp_256_from_bin(e, 8, hash, hashLen);
24043         sp_256_from_mp(x, 8, priv);
24044     }
24045 
24046     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
24047         /* New random point. */
24048         err = sp_256_ecc_gen_k_8(rng, k);
24049         if (err == MP_OKAY) {
24050 #ifdef HAVE_INTEL_AVX2
24051             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24052                 err = sp_256_ecc_mulmod_base_avx2_8(point, k, 1, heap);
24053             else
24054 #endif
24055                 err = sp_256_ecc_mulmod_base_8(point, k, 1, NULL);
24056         }
24057 
24058         if (err == MP_OKAY) {
24059             /* r = point->x mod order */
24060             XMEMCPY(r, point->x, sizeof(sp_digit) * 8);
24061             sp_256_norm_8(r);
24062             c = sp_256_cmp_8(r, p256_order);
24063             sp_256_cond_sub_8(r, r, p256_order, 0 - (c >= 0));
24064             sp_256_norm_8(r);
24065 
24066             /* Conv k to Montgomery form (mod order) */
24067 #ifdef HAVE_INTEL_AVX2
24068             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24069                 sp_256_mul_avx2_8(k, k, p256_norm_order);
24070             else
24071 #endif
24072                 sp_256_mul_8(k, k, p256_norm_order);
24073             err = sp_256_mod_8(k, k, p256_order);
24074         }
24075         if (err == MP_OKAY) {
24076             sp_256_norm_8(k);
24077             /* kInv = 1/k mod order */
24078 #ifdef HAVE_INTEL_AVX2
24079             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24080                 sp_256_mont_inv_order_avx2_8(kInv, k, tmp);
24081             else
24082 #endif
24083                 sp_256_mont_inv_order_8(kInv, k, tmp);
24084             sp_256_norm_8(kInv);
24085 
24086             /* s = r * x + e */
24087 #ifdef HAVE_INTEL_AVX2
24088             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24089                 sp_256_mul_avx2_8(x, x, r);
24090             else
24091 #endif
24092                 sp_256_mul_8(x, x, r);
24093             err = sp_256_mod_8(x, x, p256_order);
24094         }
24095         if (err == MP_OKAY) {
24096             sp_256_norm_8(x);
24097             carry = sp_256_add_8(s, e, x);
24098             sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
24099             sp_256_norm_8(s);
24100             c = sp_256_cmp_8(s, p256_order);
24101             sp_256_cond_sub_8(s, s, p256_order, 0 - (c >= 0));
24102             sp_256_norm_8(s);
24103 
24104             /* s = s * k^-1 mod order */
24105 #ifdef HAVE_INTEL_AVX2
24106             if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24107                 sp_256_mont_mul_order_avx2_8(s, s, kInv);
24108             else
24109 #endif
24110                 sp_256_mont_mul_order_8(s, s, kInv);
24111             sp_256_norm_8(s);
24112 
24113             /* Check that signature is usable. */
24114             if (!sp_256_iszero_8(s))
24115                 break;
24116         }
24117     }
24118 
24119     if (i == 0)
24120         err = RNG_FAILURE_E;
24121 
24122     if (err == MP_OKAY)
24123         err = sp_256_to_mp(r, rm);
24124     if (err == MP_OKAY)
24125         err = sp_256_to_mp(s, sm);
24126 
24127 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24128     if (d != NULL) {
24129         XMEMSET(d, 0, sizeof(sp_digit) * 8 * 8);
24130         XFREE(d, heap, DYNAMIC_TYPE_ECC);
24131     }
24132 #else
24133     XMEMSET(e, 0, sizeof(sp_digit) * 2 * 8);
24134     XMEMSET(x, 0, sizeof(sp_digit) * 2 * 8);
24135     XMEMSET(k, 0, sizeof(sp_digit) * 2 * 8);
24136     XMEMSET(r, 0, sizeof(sp_digit) * 2 * 8);
24137     XMEMSET(r, 0, sizeof(sp_digit) * 2 * 8);
24138     XMEMSET(tmp, 0, sizeof(sp_digit) * 3 * 2*8);
24139 #endif
24140     sp_ecc_point_free(point, 1, heap);
24141 
24142     return err;
24143 }
24144 #endif /* HAVE_ECC_SIGN */
24145 
24146 #ifdef HAVE_ECC_VERIFY
24147 /* Verify the signature values with the hash and public key.
24148  *   e = Truncate(hash, 256)
24149  *   u1 = e/s mod order
24150  *   u2 = r/s mod order
24151  *   r == (u1.G + u2.Q)->x mod order
24152  * Optimization: Leave point in projective form.
24153  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
24154  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
24155  * The hash is truncated to the first 256 bits.
24156  *
24157  * hash     Hash to sign.
24158  * hashLen  Length of the hash data.
24159  * rng      Random number generator.
24160  * priv     Private part of key - scalar.
24161  * rm       First part of result as an mp_int.
24162  * sm       Sirst part of result as an mp_int.
24163  * heap     Heap to use for allocation.
24164  * returns RNG failures, MEMORY_E when memory allocation fails and
24165  * MP_OKAY on success.
24166  */
24167 int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX,
24168     mp_int* pY, mp_int* pZ, mp_int* r, mp_int* sm, int* res, void* heap)
24169 {
24170 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24171     sp_digit* d = NULL;
24172 #else
24173     sp_digit u1d[2*8];
24174     sp_digit u2d[2*8];
24175     sp_digit sd[2*8];
24176     sp_digit tmpd[2*8 * 5];
24177     sp_point p1d;
24178     sp_point p2d;
24179 #endif
24180     sp_digit* u1;
24181     sp_digit* u2;
24182     sp_digit* s;
24183     sp_digit* tmp;
24184     sp_point* p1;
24185     sp_point* p2 = NULL;
24186     sp_digit carry;
24187     int32_t c;
24188     int err;
24189 #ifdef HAVE_INTEL_AVX2
24190     word32 cpuid_flags = cpuid_get_flags();
24191 #endif
24192 
24193     err = sp_ecc_point_new(heap, p1d, p1);
24194     if (err == MP_OKAY)
24195         err = sp_ecc_point_new(heap, p2d, p2);
24196 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24197     if (err == MP_OKAY) {
24198         d = XMALLOC(sizeof(sp_digit) * 16 * 8, heap, DYNAMIC_TYPE_ECC);
24199         if (d != NULL) {
24200             u1  = d + 0 * 8;
24201             u2  = d + 2 * 8;
24202             s   = d + 4 * 8;
24203             tmp = d + 6 * 8;
24204         }
24205         else
24206             err = MEMORY_E;
24207     }
24208 #else
24209     u1 = u1d;
24210     u2 = u2d;
24211     s  = sd;
24212     tmp = tmpd;
24213 #endif
24214 
24215     if (err == MP_OKAY) {
24216         if (hashLen > 32)
24217             hashLen = 32;
24218 
24219         sp_256_from_bin(u1, 8, hash, hashLen);
24220         sp_256_from_mp(u2, 8, r);
24221         sp_256_from_mp(s, 8, sm);
24222         sp_256_from_mp(p2->x, 8, pX);
24223         sp_256_from_mp(p2->y, 8, pY);
24224         sp_256_from_mp(p2->z, 8, pZ);
24225 
24226 #ifdef HAVE_INTEL_AVX2
24227         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24228             sp_256_mul_avx2_8(s, s, p256_norm_order);
24229         else
24230 #endif
24231             sp_256_mul_8(s, s, p256_norm_order);
24232         err = sp_256_mod_8(s, s, p256_order);
24233     }
24234     if (err == MP_OKAY) {
24235         sp_256_norm_8(s);
24236 #ifdef HAVE_INTEL_AVX2
24237         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
24238             sp_256_mont_inv_order_avx2_8(s, s, tmp);
24239             sp_256_mont_mul_order_avx2_8(u1, u1, s);
24240             sp_256_mont_mul_order_avx2_8(u2, u2, s);
24241         }
24242         else
24243 #endif
24244         {
24245             sp_256_mont_inv_order_8(s, s, tmp);
24246             sp_256_mont_mul_order_8(u1, u1, s);
24247             sp_256_mont_mul_order_8(u2, u2, s);
24248         }
24249 
24250 #ifdef HAVE_INTEL_AVX2
24251         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24252             err = sp_256_ecc_mulmod_base_avx2_8(p1, u1, 0, heap);
24253         else
24254 #endif
24255             err = sp_256_ecc_mulmod_base_8(p1, u1, 0, heap);
24256     }
24257     if (err == MP_OKAY) {
24258 #ifdef HAVE_INTEL_AVX2
24259         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24260             err = sp_256_ecc_mulmod_avx2_8(p2, p2, u2, 0, heap);
24261         else
24262 #endif
24263             err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, heap);
24264     }
24265 
24266     if (err == MP_OKAY) {
24267 #ifdef HAVE_INTEL_AVX2
24268         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24269             sp_256_proj_point_add_avx2_8(p1, p1, p2, tmp);
24270         else
24271 #endif
24272             sp_256_proj_point_add_8(p1, p1, p2, tmp);
24273 
24274         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
24275         /* Reload r and convert to Montgomery form. */
24276         sp_256_from_mp(u2, 8, r);
24277         err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
24278     }
24279 
24280     if (err == MP_OKAY) {
24281         /* u1 = r.z'.z' mod prime */
24282         sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
24283         sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
24284         *res = sp_256_cmp_8(p1->x, u1) == 0;
24285         if (*res == 0) {
24286             /* Reload r and add order. */
24287             sp_256_from_mp(u2, 8, r);
24288             carry = sp_256_add_8(u2, u2, p256_order);
24289             /* Carry means result is greater than mod and is not valid. */
24290             if (!carry) {
24291                 sp_256_norm_8(u2);
24292 
24293                 /* Compare with mod and if greater or equal then not valid. */
24294                 c = sp_256_cmp_8(u2, p256_mod);
24295                 if (c < 0) {
24296                     /* Convert to Montogomery form */
24297                     err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
24298                     if (err == MP_OKAY) {
24299                         /* u1 = (r + 1*order).z'.z' mod prime */
24300                         sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
24301                                                                   p256_mp_mod);
24302                         *res = sp_256_cmp_8(p1->x, u2) == 0;
24303                     }
24304                 }
24305             }
24306         }
24307     }
24308 
24309 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24310     if (d != NULL)
24311         XFREE(d, heap, DYNAMIC_TYPE_ECC);
24312 #endif
24313     sp_ecc_point_free(p1, 0, heap);
24314     sp_ecc_point_free(p2, 0, heap);
24315 
24316     return err;
24317 }
24318 #endif /* HAVE_ECC_VERIFY */
24319 
24320 #ifdef HAVE_ECC_CHECK_KEY
24321 /* Check that the x and y oridinates are a valid point on the curve.
24322  *
24323  * point  EC point.
24324  * heap   Heap to use if dynamically allocating.
24325  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
24326  * not on the curve and MP_OKAY otherwise.
24327  */
24328 static int sp_256_ecc_is_point_8(sp_point* point, void* heap)
24329 {
24330 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24331     sp_digit* d = NULL;
24332 #else
24333     sp_digit t1d[2*8];
24334     sp_digit t2d[2*8];
24335 #endif
24336     sp_digit* t1;
24337     sp_digit* t2;
24338     int err = MP_OKAY;
24339 
24340 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24341     d = XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
24342     if (d != NULL) {
24343         t1 = d + 0 * 8;
24344         t2 = d + 2 * 8;
24345     }
24346     else
24347         err = MEMORY_E;
24348 #else
24349     (void)heap;
24350 
24351     t1 = t1d;
24352     t2 = t2d;
24353 #endif
24354 
24355     if (err == MP_OKAY) {
24356         sp_256_sqr_8(t1, point->y);
24357         sp_256_mod_8(t1, t1, p256_mod);
24358         sp_256_sqr_8(t2, point->x);
24359         sp_256_mod_8(t2, t2, p256_mod);
24360         sp_256_mul_8(t2, t2, point->x);
24361         sp_256_mod_8(t2, t2, p256_mod);
24362     sp_256_sub_8(t2, p256_mod, t2);
24363         sp_256_mont_add_8(t1, t1, t2, p256_mod);
24364 
24365         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
24366         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
24367         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
24368 
24369         if (sp_256_cmp_8(t1, p256_b) != 0)
24370             err = MP_VAL;
24371     }
24372 
24373 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24374     if (d != NULL)
24375         XFREE(d, heap, DYNAMIC_TYPE_ECC);
24376 #endif
24377 
24378     return err;
24379 }
24380 
24381 /* Check that the x and y oridinates are a valid point on the curve.
24382  *
24383  * pX  X ordinate of EC point.
24384  * pY  Y ordinate of EC point.
24385  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
24386  * not on the curve and MP_OKAY otherwise.
24387  */
24388 int sp_ecc_is_point_256(mp_int* pX, mp_int* pY)
24389 {
24390 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
24391     sp_point pubd;
24392 #endif
24393     sp_point* pub;
24394     byte one[1] = { 1 };
24395     int err;
24396 
24397     err = sp_ecc_point_new(NULL, pubd, pub);
24398     if (err == MP_OKAY) {
24399         sp_256_from_mp(pub->x, 8, pX);
24400         sp_256_from_mp(pub->y, 8, pY);
24401         sp_256_from_bin(pub->z, 8, one, sizeof(one));
24402 
24403         err = sp_256_ecc_is_point_8(pub, NULL);
24404     }
24405 
24406     sp_ecc_point_free(pub, 0, NULL);
24407 
24408     return err;
24409 }
24410 
24411 /* Check that the private scalar generates the EC point (px, py), the point is
24412  * on the curve and the point has the correct order.
24413  *
24414  * pX     X ordinate of EC point.
24415  * pY     Y ordinate of EC point.
24416  * privm  Private scalar that generates EC point.
24417  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
24418  * not on the curve, ECC_INF_E if the point does not have the correct order,
24419  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
24420  * MP_OKAY otherwise.
24421  */
24422 int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap)
24423 {
24424 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
24425     sp_digit privd[8];
24426     sp_point pubd;
24427     sp_point pd;
24428 #endif
24429     sp_digit* priv = NULL;
24430     sp_point* pub;
24431     sp_point* p = NULL;
24432     byte one[1] = { 1 };
24433     int err;
24434 #ifdef HAVE_INTEL_AVX2
24435     word32 cpuid_flags = cpuid_get_flags();
24436 #endif
24437 
24438     err = sp_ecc_point_new(heap, pubd, pub);
24439     if (err == MP_OKAY)
24440         err = sp_ecc_point_new(heap, pd, p);
24441 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24442     if (err == MP_OKAY) {
24443         priv = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC);
24444         if (priv == NULL)
24445             err = MEMORY_E;
24446     }
24447 #else
24448     priv = privd;
24449 #endif
24450 
24451     if (err == MP_OKAY) {
24452         sp_256_from_mp(pub->x, 8, pX);
24453         sp_256_from_mp(pub->y, 8, pY);
24454         sp_256_from_bin(pub->z, 8, one, sizeof(one));
24455         sp_256_from_mp(priv, 8, privm);
24456 
24457         /* Check point at infinitiy. */
24458         if (sp_256_iszero_8(pub->x) &&
24459             sp_256_iszero_8(pub->y))
24460             err = ECC_INF_E;
24461     }
24462 
24463     if (err == MP_OKAY) {
24464         /* Check range of X and Y */
24465         if (sp_256_cmp_8(pub->x, p256_mod) >= 0 ||
24466             sp_256_cmp_8(pub->y, p256_mod) >= 0)
24467             err = ECC_OUT_OF_RANGE_E;
24468     }
24469 
24470     if (err == MP_OKAY) {
24471         /* Check point is on curve */
24472         err = sp_256_ecc_is_point_8(pub, heap);
24473     }
24474 
24475     if (err == MP_OKAY) {
24476         /* Point * order = infinity */
24477 #ifdef HAVE_INTEL_AVX2
24478         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24479             err = sp_256_ecc_mulmod_avx2_8(p, pub, p256_order, 1, heap);
24480         else
24481 #endif
24482             err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, heap);
24483     }
24484     if (err == MP_OKAY) {
24485         /* Check result is infinity */
24486         if (!sp_256_iszero_8(p->x) ||
24487             !sp_256_iszero_8(p->y)) {
24488             err = ECC_INF_E;
24489         }
24490     }
24491 
24492     if (err == MP_OKAY) {
24493         /* Base * private = point */
24494 #ifdef HAVE_INTEL_AVX2
24495         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24496             err = sp_256_ecc_mulmod_base_avx2_8(p, priv, 1, heap);
24497         else
24498 #endif
24499             err = sp_256_ecc_mulmod_base_8(p, priv, 1, heap);
24500     }
24501     if (err == MP_OKAY) {
24502         /* Check result is public key */
24503         if (sp_256_cmp_8(p->x, pub->x) != 0 ||
24504             sp_256_cmp_8(p->y, pub->y) != 0) {
24505             err = ECC_PRIV_KEY_E;
24506         }
24507     }
24508 
24509 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24510     if (priv != NULL)
24511         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
24512 #endif
24513     sp_ecc_point_free(p, 0, heap);
24514     sp_ecc_point_free(pub, 0, heap);
24515 
24516     return err;
24517 }
24518 #endif
24519 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
24520 /* Add two projective EC points together.
24521  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
24522  *
24523  * pX   First EC point's X ordinate.
24524  * pY   First EC point's Y ordinate.
24525  * pZ   First EC point's Z ordinate.
24526  * qX   Second EC point's X ordinate.
24527  * qY   Second EC point's Y ordinate.
24528  * qZ   Second EC point's Z ordinate.
24529  * rX   Resultant EC point's X ordinate.
24530  * rY   Resultant EC point's Y ordinate.
24531  * rZ   Resultant EC point's Z ordinate.
24532  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24533  */
24534 int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
24535                               mp_int* qX, mp_int* qY, mp_int* qZ,
24536                               mp_int* rX, mp_int* rY, mp_int* rZ)
24537 {
24538 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
24539     sp_digit tmpd[2 * 8 * 5];
24540     sp_point pd;
24541     sp_point qd;
24542 #endif
24543     sp_digit* tmp;
24544     sp_point* p;
24545     sp_point* q = NULL;
24546     int err;
24547 #ifdef HAVE_INTEL_AVX2
24548     word32 cpuid_flags = cpuid_get_flags();
24549 #endif
24550 
24551     err = sp_ecc_point_new(NULL, pd, p);
24552     if (err == MP_OKAY)
24553         err = sp_ecc_point_new(NULL, qd, q);
24554 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24555     if (err == MP_OKAY) {
24556         tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, DYNAMIC_TYPE_ECC);
24557         if (tmp == NULL)
24558             err = MEMORY_E;
24559     }
24560 #else
24561     tmp = tmpd;
24562 #endif
24563 
24564     if (err == MP_OKAY) {
24565         sp_256_from_mp(p->x, 8, pX);
24566         sp_256_from_mp(p->y, 8, pY);
24567         sp_256_from_mp(p->z, 8, pZ);
24568         sp_256_from_mp(q->x, 8, qX);
24569         sp_256_from_mp(q->y, 8, qY);
24570         sp_256_from_mp(q->z, 8, qZ);
24571 
24572 #ifdef HAVE_INTEL_AVX2
24573         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24574             sp_256_proj_point_add_avx2_8(p, p, q, tmp);
24575         else
24576 #endif
24577             sp_256_proj_point_add_8(p, p, q, tmp);
24578     }
24579 
24580     if (err == MP_OKAY)
24581         err = sp_256_to_mp(p->x, rX);
24582     if (err == MP_OKAY)
24583         err = sp_256_to_mp(p->y, rY);
24584     if (err == MP_OKAY)
24585         err = sp_256_to_mp(p->z, rZ);
24586 
24587 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24588     if (tmp != NULL)
24589         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
24590 #endif
24591     sp_ecc_point_free(q, 0, NULL);
24592     sp_ecc_point_free(p, 0, NULL);
24593 
24594     return err;
24595 }
24596 
24597 /* Double a projective EC point.
24598  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
24599  *
24600  * pX   EC point's X ordinate.
24601  * pY   EC point's Y ordinate.
24602  * pZ   EC point's Z ordinate.
24603  * rX   Resultant EC point's X ordinate.
24604  * rY   Resultant EC point's Y ordinate.
24605  * rZ   Resultant EC point's Z ordinate.
24606  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24607  */
24608 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
24609                               mp_int* rX, mp_int* rY, mp_int* rZ)
24610 {
24611 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
24612     sp_digit tmpd[2 * 8 * 2];
24613     sp_point pd;
24614 #endif
24615     sp_digit* tmp;
24616     sp_point* p;
24617     int err;
24618 #ifdef HAVE_INTEL_AVX2
24619     word32 cpuid_flags = cpuid_get_flags();
24620 #endif
24621 
24622     err = sp_ecc_point_new(NULL, pd, p);
24623 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24624     if (err == MP_OKAY) {
24625         tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL, DYNAMIC_TYPE_ECC);
24626         if (tmp == NULL)
24627             err = MEMORY_E;
24628     }
24629 #else
24630     tmp = tmpd;
24631 #endif
24632 
24633     if (err == MP_OKAY) {
24634         sp_256_from_mp(p->x, 8, pX);
24635         sp_256_from_mp(p->y, 8, pY);
24636         sp_256_from_mp(p->z, 8, pZ);
24637 
24638 #ifdef HAVE_INTEL_AVX2
24639         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
24640             sp_256_proj_point_dbl_avx2_8(p, p, tmp);
24641         else
24642 #endif
24643             sp_256_proj_point_dbl_8(p, p, tmp);
24644     }
24645 
24646     if (err == MP_OKAY)
24647         err = sp_256_to_mp(p->x, rX);
24648     if (err == MP_OKAY)
24649         err = sp_256_to_mp(p->y, rY);
24650     if (err == MP_OKAY)
24651         err = sp_256_to_mp(p->z, rZ);
24652 
24653 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24654     if (tmp != NULL)
24655         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
24656 #endif
24657     sp_ecc_point_free(p, 0, NULL);
24658 
24659     return err;
24660 }
24661 
24662 /* Map a projective EC point to affine in place.
24663  * pZ will be one.
24664  *
24665  * pX   EC point's X ordinate.
24666  * pY   EC point's Y ordinate.
24667  * pZ   EC point's Z ordinate.
24668  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24669  */
24670 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
24671 {
24672 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
24673     sp_digit tmpd[2 * 8 * 4];
24674     sp_point pd;
24675 #endif
24676     sp_digit* tmp;
24677     sp_point* p;
24678     int err;
24679 
24680     err = sp_ecc_point_new(NULL, pd, p);
24681 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24682     if (err == MP_OKAY) {
24683         tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL, DYNAMIC_TYPE_ECC);
24684         if (tmp == NULL)
24685             err = MEMORY_E;
24686     }
24687 #else
24688     tmp = tmpd;
24689 #endif
24690     if (err == MP_OKAY) {
24691         sp_256_from_mp(p->x, 8, pX);
24692         sp_256_from_mp(p->y, 8, pY);
24693         sp_256_from_mp(p->z, 8, pZ);
24694 
24695         sp_256_map_8(p, p, tmp);
24696     }
24697 
24698     if (err == MP_OKAY)
24699         err = sp_256_to_mp(p->x, pX);
24700     if (err == MP_OKAY)
24701         err = sp_256_to_mp(p->y, pY);
24702     if (err == MP_OKAY)
24703         err = sp_256_to_mp(p->z, pZ);
24704 
24705 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24706     if (tmp != NULL)
24707         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
24708 #endif
24709     sp_ecc_point_free(p, 0, NULL);
24710 
24711     return err;
24712 }
24713 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
24714 #ifdef HAVE_COMP_KEY
24715 /* Find the square root of a number mod the prime of the curve.
24716  *
24717  * y  The number to operate on and the result.
24718  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24719  */
24720 static int sp_256_mont_sqrt_8(sp_digit* y)
24721 {
24722 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24723     sp_digit* d;
24724 #else
24725     sp_digit t1d[2 * 8];
24726     sp_digit t2d[2 * 8];
24727 #endif
24728     sp_digit* t1;
24729     sp_digit* t2;
24730     int err = MP_OKAY;
24731 #ifdef HAVE_INTEL_AVX2
24732     word32 cpuid_flags = cpuid_get_flags();
24733 #endif
24734 
24735 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24736     d = XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
24737     if (d != NULL) {
24738         t1 = d + 0 * 8;
24739         t2 = d + 2 * 8;
24740     }
24741     else
24742         err = MEMORY_E;
24743 #else
24744     t1 = t1d;
24745     t2 = t2d;
24746 #endif
24747 
24748     if (err == MP_OKAY) {
24749 #ifdef HAVE_INTEL_AVX2
24750         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
24751             /* t2 = y ^ 0x2 */
24752             sp_256_mont_sqr_avx2_8(t2, y, p256_mod, p256_mp_mod);
24753             /* t1 = y ^ 0x3 */
24754             sp_256_mont_mul_avx2_8(t1, t2, y, p256_mod, p256_mp_mod);
24755             /* t2 = y ^ 0xc */
24756             sp_256_mont_sqr_n_avx2_8(t2, t1, 2, p256_mod, p256_mp_mod);
24757             /* t1 = y ^ 0xf */
24758             sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
24759             /* t2 = y ^ 0xf0 */
24760             sp_256_mont_sqr_n_avx2_8(t2, t1, 4, p256_mod, p256_mp_mod);
24761             /* t1 = y ^ 0xff */
24762             sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
24763             /* t2 = y ^ 0xff00 */
24764             sp_256_mont_sqr_n_avx2_8(t2, t1, 8, p256_mod, p256_mp_mod);
24765             /* t1 = y ^ 0xffff */
24766             sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
24767             /* t2 = y ^ 0xffff0000 */
24768             sp_256_mont_sqr_n_avx2_8(t2, t1, 16, p256_mod, p256_mp_mod);
24769             /* t1 = y ^ 0xffffffff */
24770             sp_256_mont_mul_avx2_8(t1, t1, t2, p256_mod, p256_mp_mod);
24771             /* t1 = y ^ 0xffffffff00000000 */
24772             sp_256_mont_sqr_n_avx2_8(t1, t1, 32, p256_mod, p256_mp_mod);
24773             /* t1 = y ^ 0xffffffff00000001 */
24774             sp_256_mont_mul_avx2_8(t1, t1, y, p256_mod, p256_mp_mod);
24775             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
24776             sp_256_mont_sqr_n_avx2_8(t1, t1, 96, p256_mod, p256_mp_mod);
24777             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
24778             sp_256_mont_mul_avx2_8(t1, t1, y, p256_mod, p256_mp_mod);
24779             sp_256_mont_sqr_n_avx2_8(y, t1, 94, p256_mod, p256_mp_mod);
24780         }
24781         else
24782 #endif
24783         {
24784             /* t2 = y ^ 0x2 */
24785             sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
24786             /* t1 = y ^ 0x3 */
24787             sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
24788             /* t2 = y ^ 0xc */
24789             sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
24790             /* t1 = y ^ 0xf */
24791             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24792             /* t2 = y ^ 0xf0 */
24793             sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
24794             /* t1 = y ^ 0xff */
24795             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24796             /* t2 = y ^ 0xff00 */
24797             sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
24798             /* t1 = y ^ 0xffff */
24799             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24800             /* t2 = y ^ 0xffff0000 */
24801             sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
24802             /* t1 = y ^ 0xffffffff */
24803             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24804             /* t1 = y ^ 0xffffffff00000000 */
24805             sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
24806             /* t1 = y ^ 0xffffffff00000001 */
24807             sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
24808             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
24809             sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
24810             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
24811             sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
24812             sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
24813         }
24814     }
24815 
24816 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24817     if (d != NULL)
24818         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
24819 #endif
24820 
24821     return err;
24822 }
24823 
24824 /* Uncompress the point given the X ordinate.
24825  *
24826  * xm    X ordinate.
24827  * odd   Whether the Y ordinate is odd.
24828  * ym    Calculated Y ordinate.
24829  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24830  */
24831 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
24832 {
24833 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24834     sp_digit* d;
24835 #else
24836     sp_digit xd[2 * 8];
24837     sp_digit yd[2 * 8];
24838 #endif
24839     sp_digit* x;
24840     sp_digit* y;
24841     int err = MP_OKAY;
24842 #ifdef HAVE_INTEL_AVX2
24843     word32 cpuid_flags = cpuid_get_flags();
24844 #endif
24845 
24846 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24847     d = XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
24848     if (d != NULL) {
24849         x = d + 0 * 8;
24850         y = d + 2 * 8;
24851     }
24852     else
24853         err = MEMORY_E;
24854 #else
24855     x = xd;
24856     y = yd;
24857 #endif
24858 
24859     if (err == MP_OKAY) {
24860         sp_256_from_mp(x, 8, xm);
24861 
24862         err = sp_256_mod_mul_norm_8(x, x, p256_mod);
24863     }
24864 
24865     if (err == MP_OKAY) {
24866         /* y = x^3 */
24867 #ifdef HAVE_INTEL_AVX2
24868         if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) {
24869             sp_256_mont_sqr_avx2_8(y, x, p256_mod, p256_mp_mod);
24870             sp_256_mont_mul_avx2_8(y, y, x, p256_mod, p256_mp_mod);
24871         }
24872         else
24873 #endif
24874         {
24875             sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
24876             sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
24877         }
24878         /* y = x^3 - 3x */
24879         sp_256_mont_sub_8(y, y, x, p256_mod);
24880         sp_256_mont_sub_8(y, y, x, p256_mod);
24881         sp_256_mont_sub_8(y, y, x, p256_mod);
24882         /* y = x^3 - 3x + b */
24883         err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
24884     }
24885     if (err == MP_OKAY) {
24886         sp_256_mont_add_8(y, y, x, p256_mod);
24887         /* y = sqrt(x^3 - 3x + b) */
24888         err = sp_256_mont_sqrt_8(y);
24889     }
24890     if (err == MP_OKAY) {
24891         XMEMSET(y + 8, 0, 8 * sizeof(sp_digit));
24892         sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
24893         if (((y[0] ^ odd) & 1) != 0)
24894             sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
24895 
24896         err = sp_256_to_mp(y, ym);
24897     }
24898 
24899 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
24900     if (d != NULL)
24901         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
24902 #endif
24903 
24904     return err;
24905 }
24906 #endif
24907 #endif /* WOLFSSL_SP_NO_256 */
24908 #endif /* WOLFSSL_HAVE_SP_ECC */
24909 #endif /* WOLFSSL_SP_ARM32_ASM */
24910 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
24911