Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbedtls by
bn_mul.h
00001 /** 00002 * \file bn_mul.h 00003 * 00004 * \brief Multi-precision integer library 00005 * 00006 * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved 00007 * SPDX-License-Identifier: Apache-2.0 00008 * 00009 * Licensed under the Apache License, Version 2.0 (the "License"); you may 00010 * not use this file except in compliance with the License. 00011 * You may obtain a copy of the License at 00012 * 00013 * http://www.apache.org/licenses/LICENSE-2.0 00014 * 00015 * Unless required by applicable law or agreed to in writing, software 00016 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 00017 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00018 * See the License for the specific language governing permissions and 00019 * limitations under the License. 00020 * 00021 * This file is part of mbed TLS (https://tls.mbed.org) 00022 */ 00023 /* 00024 * Multiply source vector [s] with b, add result 00025 * to destination vector [d] and set carry c. 00026 * 00027 * Currently supports: 00028 * 00029 * . IA-32 (386+) . AMD64 / EM64T 00030 * . IA-32 (SSE2) . Motorola 68000 00031 * . PowerPC, 32-bit . MicroBlaze 00032 * . PowerPC, 64-bit . TriCore 00033 * . SPARC v8 . ARM v3+ 00034 * . Alpha . MIPS32 00035 * . C, longlong . C, generic 00036 */ 00037 #ifndef MBEDTLS_BN_MUL_H 00038 #define MBEDTLS_BN_MUL_H 00039 00040 #include "bignum.h" 00041 00042 #if defined(MBEDTLS_HAVE_ASM) 00043 00044 #ifndef asm 00045 #define asm __asm 00046 #endif 00047 00048 /* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */ 00049 #if defined(__GNUC__) && \ 00050 ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 ) 00051 #if defined(__i386__) 00052 00053 #define MULADDC_INIT \ 00054 asm( \ 00055 "movl %%ebx, %0 \n\t" \ 00056 "movl %5, %%esi \n\t" \ 00057 "movl %6, %%edi \n\t" \ 00058 "movl %7, %%ecx \n\t" \ 00059 "movl %8, %%ebx \n\t" 00060 00061 #define MULADDC_CORE \ 00062 "lodsl \n\t" \ 00063 "mull %%ebx \n\t" \ 00064 "addl %%ecx, %%eax \n\t" \ 00065 "adcl $0, %%edx \n\t" \ 00066 "addl (%%edi), %%eax \n\t" \ 00067 "adcl $0, %%edx \n\t" \ 00068 "movl %%edx, %%ecx \n\t" \ 00069 "stosl \n\t" 00070 00071 #if defined(MBEDTLS_HAVE_SSE2) 00072 00073 #define MULADDC_HUIT \ 00074 "movd %%ecx, %%mm1 \n\t" \ 00075 "movd %%ebx, %%mm0 \n\t" \ 00076 "movd (%%edi), %%mm3 \n\t" \ 00077 "paddq %%mm3, %%mm1 \n\t" \ 00078 "movd (%%esi), %%mm2 \n\t" \ 00079 "pmuludq %%mm0, %%mm2 \n\t" \ 00080 "movd 4(%%esi), %%mm4 \n\t" \ 00081 "pmuludq %%mm0, %%mm4 \n\t" \ 00082 "movd 8(%%esi), %%mm6 \n\t" \ 00083 "pmuludq %%mm0, %%mm6 \n\t" \ 00084 "movd 12(%%esi), %%mm7 \n\t" \ 00085 "pmuludq %%mm0, %%mm7 \n\t" \ 00086 "paddq %%mm2, %%mm1 \n\t" \ 00087 "movd 4(%%edi), %%mm3 \n\t" \ 00088 "paddq %%mm4, %%mm3 \n\t" \ 00089 "movd 8(%%edi), %%mm5 \n\t" \ 00090 "paddq %%mm6, %%mm5 \n\t" \ 00091 "movd 12(%%edi), %%mm4 \n\t" \ 00092 "paddq %%mm4, %%mm7 \n\t" \ 00093 "movd %%mm1, (%%edi) \n\t" \ 00094 "movd 16(%%esi), %%mm2 \n\t" \ 00095 "pmuludq %%mm0, %%mm2 \n\t" \ 00096 "psrlq $32, %%mm1 \n\t" \ 00097 "movd 20(%%esi), %%mm4 \n\t" \ 00098 "pmuludq %%mm0, %%mm4 \n\t" \ 00099 "paddq %%mm3, %%mm1 \n\t" \ 00100 "movd 24(%%esi), %%mm6 \n\t" \ 00101 "pmuludq %%mm0, %%mm6 \n\t" \ 00102 "movd %%mm1, 4(%%edi) \n\t" \ 00103 "psrlq $32, %%mm1 \n\t" \ 00104 "movd 28(%%esi), %%mm3 \n\t" \ 00105 "pmuludq %%mm0, %%mm3 \n\t" \ 00106 "paddq %%mm5, %%mm1 \n\t" \ 00107 "movd 16(%%edi), %%mm5 \n\t" \ 00108 "paddq %%mm5, %%mm2 \n\t" \ 00109 "movd %%mm1, 8(%%edi) \n\t" \ 00110 "psrlq $32, %%mm1 \n\t" \ 00111 "paddq %%mm7, %%mm1 \n\t" \ 00112 "movd 20(%%edi), %%mm5 \n\t" \ 00113 "paddq %%mm5, %%mm4 \n\t" \ 00114 "movd %%mm1, 12(%%edi) \n\t" \ 00115 "psrlq $32, %%mm1 \n\t" \ 00116 "paddq %%mm2, %%mm1 \n\t" \ 00117 "movd 24(%%edi), %%mm5 \n\t" \ 00118 "paddq %%mm5, %%mm6 \n\t" \ 00119 "movd %%mm1, 16(%%edi) \n\t" \ 00120 "psrlq $32, %%mm1 \n\t" \ 00121 "paddq %%mm4, %%mm1 \n\t" \ 00122 "movd 28(%%edi), %%mm5 \n\t" \ 00123 "paddq %%mm5, %%mm3 \n\t" \ 00124 "movd %%mm1, 20(%%edi) \n\t" \ 00125 "psrlq $32, %%mm1 \n\t" \ 00126 "paddq %%mm6, %%mm1 \n\t" \ 00127 "movd %%mm1, 24(%%edi) \n\t" \ 00128 "psrlq $32, %%mm1 \n\t" \ 00129 "paddq %%mm3, %%mm1 \n\t" \ 00130 "movd %%mm1, 28(%%edi) \n\t" \ 00131 "addl $32, %%edi \n\t" \ 00132 "addl $32, %%esi \n\t" \ 00133 "psrlq $32, %%mm1 \n\t" \ 00134 "movd %%mm1, %%ecx \n\t" 00135 00136 #define MULADDC_STOP \ 00137 "emms \n\t" \ 00138 "movl %4, %%ebx \n\t" \ 00139 "movl %%ecx, %1 \n\t" \ 00140 "movl %%edi, %2 \n\t" \ 00141 "movl %%esi, %3 \n\t" \ 00142 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ 00143 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ 00144 : "eax", "ecx", "edx", "esi", "edi" \ 00145 ); 00146 00147 #else 00148 00149 #define MULADDC_STOP \ 00150 "movl %4, %%ebx \n\t" \ 00151 "movl %%ecx, %1 \n\t" \ 00152 "movl %%edi, %2 \n\t" \ 00153 "movl %%esi, %3 \n\t" \ 00154 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ 00155 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ 00156 : "eax", "ecx", "edx", "esi", "edi" \ 00157 ); 00158 #endif /* SSE2 */ 00159 #endif /* i386 */ 00160 00161 #if defined(__amd64__) || defined (__x86_64__) 00162 00163 #define MULADDC_INIT \ 00164 asm( \ 00165 "xorq %%r8, %%r8 \n\t" 00166 00167 #define MULADDC_CORE \ 00168 "movq (%%rsi), %%rax \n\t" \ 00169 "mulq %%rbx \n\t" \ 00170 "addq $8, %%rsi \n\t" \ 00171 "addq %%rcx, %%rax \n\t" \ 00172 "movq %%r8, %%rcx \n\t" \ 00173 "adcq $0, %%rdx \n\t" \ 00174 "nop \n\t" \ 00175 "addq %%rax, (%%rdi) \n\t" \ 00176 "adcq %%rdx, %%rcx \n\t" \ 00177 "addq $8, %%rdi \n\t" 00178 00179 #define MULADDC_STOP \ 00180 : "+c" (c), "+D" (d), "+S" (s) \ 00181 : "b" (b) \ 00182 : "rax", "rdx", "r8" \ 00183 ); 00184 00185 #endif /* AMD64 */ 00186 00187 #if defined(__mc68020__) || defined(__mcpu32__) 00188 00189 #define MULADDC_INIT \ 00190 asm( \ 00191 "movl %3, %%a2 \n\t" \ 00192 "movl %4, %%a3 \n\t" \ 00193 "movl %5, %%d3 \n\t" \ 00194 "movl %6, %%d2 \n\t" \ 00195 "moveq #0, %%d0 \n\t" 00196 00197 #define MULADDC_CORE \ 00198 "movel %%a2@+, %%d1 \n\t" \ 00199 "mulul %%d2, %%d4:%%d1 \n\t" \ 00200 "addl %%d3, %%d1 \n\t" \ 00201 "addxl %%d0, %%d4 \n\t" \ 00202 "moveq #0, %%d3 \n\t" \ 00203 "addl %%d1, %%a3@+ \n\t" \ 00204 "addxl %%d4, %%d3 \n\t" 00205 00206 #define MULADDC_STOP \ 00207 "movl %%d3, %0 \n\t" \ 00208 "movl %%a3, %1 \n\t" \ 00209 "movl %%a2, %2 \n\t" \ 00210 : "=m" (c), "=m" (d), "=m" (s) \ 00211 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00212 : "d0", "d1", "d2", "d3", "d4", "a2", "a3" \ 00213 ); 00214 00215 #define MULADDC_HUIT \ 00216 "movel %%a2@+, %%d1 \n\t" \ 00217 "mulul %%d2, %%d4:%%d1 \n\t" \ 00218 "addxl %%d3, %%d1 \n\t" \ 00219 "addxl %%d0, %%d4 \n\t" \ 00220 "addl %%d1, %%a3@+ \n\t" \ 00221 "movel %%a2@+, %%d1 \n\t" \ 00222 "mulul %%d2, %%d3:%%d1 \n\t" \ 00223 "addxl %%d4, %%d1 \n\t" \ 00224 "addxl %%d0, %%d3 \n\t" \ 00225 "addl %%d1, %%a3@+ \n\t" \ 00226 "movel %%a2@+, %%d1 \n\t" \ 00227 "mulul %%d2, %%d4:%%d1 \n\t" \ 00228 "addxl %%d3, %%d1 \n\t" \ 00229 "addxl %%d0, %%d4 \n\t" \ 00230 "addl %%d1, %%a3@+ \n\t" \ 00231 "movel %%a2@+, %%d1 \n\t" \ 00232 "mulul %%d2, %%d3:%%d1 \n\t" \ 00233 "addxl %%d4, %%d1 \n\t" \ 00234 "addxl %%d0, %%d3 \n\t" \ 00235 "addl %%d1, %%a3@+ \n\t" \ 00236 "movel %%a2@+, %%d1 \n\t" \ 00237 "mulul %%d2, %%d4:%%d1 \n\t" \ 00238 "addxl %%d3, %%d1 \n\t" \ 00239 "addxl %%d0, %%d4 \n\t" \ 00240 "addl %%d1, %%a3@+ \n\t" \ 00241 "movel %%a2@+, %%d1 \n\t" \ 00242 "mulul %%d2, %%d3:%%d1 \n\t" \ 00243 "addxl %%d4, %%d1 \n\t" \ 00244 "addxl %%d0, %%d3 \n\t" \ 00245 "addl %%d1, %%a3@+ \n\t" \ 00246 "movel %%a2@+, %%d1 \n\t" \ 00247 "mulul %%d2, %%d4:%%d1 \n\t" \ 00248 "addxl %%d3, %%d1 \n\t" \ 00249 "addxl %%d0, %%d4 \n\t" \ 00250 "addl %%d1, %%a3@+ \n\t" \ 00251 "movel %%a2@+, %%d1 \n\t" \ 00252 "mulul %%d2, %%d3:%%d1 \n\t" \ 00253 "addxl %%d4, %%d1 \n\t" \ 00254 "addxl %%d0, %%d3 \n\t" \ 00255 "addl %%d1, %%a3@+ \n\t" \ 00256 "addxl %%d0, %%d3 \n\t" 00257 00258 #endif /* MC68000 */ 00259 00260 #if defined(__powerpc64__) || defined(__ppc64__) 00261 00262 #if defined(__MACH__) && defined(__APPLE__) 00263 00264 #define MULADDC_INIT \ 00265 asm( \ 00266 "ld r3, %3 \n\t" \ 00267 "ld r4, %4 \n\t" \ 00268 "ld r5, %5 \n\t" \ 00269 "ld r6, %6 \n\t" \ 00270 "addi r3, r3, -8 \n\t" \ 00271 "addi r4, r4, -8 \n\t" \ 00272 "addic r5, r5, 0 \n\t" 00273 00274 #define MULADDC_CORE \ 00275 "ldu r7, 8(r3) \n\t" \ 00276 "mulld r8, r7, r6 \n\t" \ 00277 "mulhdu r9, r7, r6 \n\t" \ 00278 "adde r8, r8, r5 \n\t" \ 00279 "ld r7, 8(r4) \n\t" \ 00280 "addze r5, r9 \n\t" \ 00281 "addc r8, r8, r7 \n\t" \ 00282 "stdu r8, 8(r4) \n\t" 00283 00284 #define MULADDC_STOP \ 00285 "addze r5, r5 \n\t" \ 00286 "addi r4, r4, 8 \n\t" \ 00287 "addi r3, r3, 8 \n\t" \ 00288 "std r5, %0 \n\t" \ 00289 "std r4, %1 \n\t" \ 00290 "std r3, %2 \n\t" \ 00291 : "=m" (c), "=m" (d), "=m" (s) \ 00292 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00293 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00294 ); 00295 00296 00297 #else /* __MACH__ && __APPLE__ */ 00298 00299 #define MULADDC_INIT \ 00300 asm( \ 00301 "ld %%r3, %3 \n\t" \ 00302 "ld %%r4, %4 \n\t" \ 00303 "ld %%r5, %5 \n\t" \ 00304 "ld %%r6, %6 \n\t" \ 00305 "addi %%r3, %%r3, -8 \n\t" \ 00306 "addi %%r4, %%r4, -8 \n\t" \ 00307 "addic %%r5, %%r5, 0 \n\t" 00308 00309 #define MULADDC_CORE \ 00310 "ldu %%r7, 8(%%r3) \n\t" \ 00311 "mulld %%r8, %%r7, %%r6 \n\t" \ 00312 "mulhdu %%r9, %%r7, %%r6 \n\t" \ 00313 "adde %%r8, %%r8, %%r5 \n\t" \ 00314 "ld %%r7, 8(%%r4) \n\t" \ 00315 "addze %%r5, %%r9 \n\t" \ 00316 "addc %%r8, %%r8, %%r7 \n\t" \ 00317 "stdu %%r8, 8(%%r4) \n\t" 00318 00319 #define MULADDC_STOP \ 00320 "addze %%r5, %%r5 \n\t" \ 00321 "addi %%r4, %%r4, 8 \n\t" \ 00322 "addi %%r3, %%r3, 8 \n\t" \ 00323 "std %%r5, %0 \n\t" \ 00324 "std %%r4, %1 \n\t" \ 00325 "std %%r3, %2 \n\t" \ 00326 : "=m" (c), "=m" (d), "=m" (s) \ 00327 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00328 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00329 ); 00330 00331 #endif /* __MACH__ && __APPLE__ */ 00332 00333 #elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32 */ 00334 00335 #if defined(__MACH__) && defined(__APPLE__) 00336 00337 #define MULADDC_INIT \ 00338 asm( \ 00339 "lwz r3, %3 \n\t" \ 00340 "lwz r4, %4 \n\t" \ 00341 "lwz r5, %5 \n\t" \ 00342 "lwz r6, %6 \n\t" \ 00343 "addi r3, r3, -4 \n\t" \ 00344 "addi r4, r4, -4 \n\t" \ 00345 "addic r5, r5, 0 \n\t" 00346 00347 #define MULADDC_CORE \ 00348 "lwzu r7, 4(r3) \n\t" \ 00349 "mullw r8, r7, r6 \n\t" \ 00350 "mulhwu r9, r7, r6 \n\t" \ 00351 "adde r8, r8, r5 \n\t" \ 00352 "lwz r7, 4(r4) \n\t" \ 00353 "addze r5, r9 \n\t" \ 00354 "addc r8, r8, r7 \n\t" \ 00355 "stwu r8, 4(r4) \n\t" 00356 00357 #define MULADDC_STOP \ 00358 "addze r5, r5 \n\t" \ 00359 "addi r4, r4, 4 \n\t" \ 00360 "addi r3, r3, 4 \n\t" \ 00361 "stw r5, %0 \n\t" \ 00362 "stw r4, %1 \n\t" \ 00363 "stw r3, %2 \n\t" \ 00364 : "=m" (c), "=m" (d), "=m" (s) \ 00365 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00366 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00367 ); 00368 00369 #else /* __MACH__ && __APPLE__ */ 00370 00371 #define MULADDC_INIT \ 00372 asm( \ 00373 "lwz %%r3, %3 \n\t" \ 00374 "lwz %%r4, %4 \n\t" \ 00375 "lwz %%r5, %5 \n\t" \ 00376 "lwz %%r6, %6 \n\t" \ 00377 "addi %%r3, %%r3, -4 \n\t" \ 00378 "addi %%r4, %%r4, -4 \n\t" \ 00379 "addic %%r5, %%r5, 0 \n\t" 00380 00381 #define MULADDC_CORE \ 00382 "lwzu %%r7, 4(%%r3) \n\t" \ 00383 "mullw %%r8, %%r7, %%r6 \n\t" \ 00384 "mulhwu %%r9, %%r7, %%r6 \n\t" \ 00385 "adde %%r8, %%r8, %%r5 \n\t" \ 00386 "lwz %%r7, 4(%%r4) \n\t" \ 00387 "addze %%r5, %%r9 \n\t" \ 00388 "addc %%r8, %%r8, %%r7 \n\t" \ 00389 "stwu %%r8, 4(%%r4) \n\t" 00390 00391 #define MULADDC_STOP \ 00392 "addze %%r5, %%r5 \n\t" \ 00393 "addi %%r4, %%r4, 4 \n\t" \ 00394 "addi %%r3, %%r3, 4 \n\t" \ 00395 "stw %%r5, %0 \n\t" \ 00396 "stw %%r4, %1 \n\t" \ 00397 "stw %%r3, %2 \n\t" \ 00398 : "=m" (c), "=m" (d), "=m" (s) \ 00399 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00400 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00401 ); 00402 00403 #endif /* __MACH__ && __APPLE__ */ 00404 00405 #endif /* PPC32 */ 00406 00407 /* 00408 * The Sparc(64) assembly is reported to be broken. 00409 * Disable it for now, until we're able to fix it. 00410 */ 00411 #if 0 && defined(__sparc__) 00412 #if defined(__sparc64__) 00413 00414 #define MULADDC_INIT \ 00415 asm( \ 00416 "ldx %3, %%o0 \n\t" \ 00417 "ldx %4, %%o1 \n\t" \ 00418 "ld %5, %%o2 \n\t" \ 00419 "ld %6, %%o3 \n\t" 00420 00421 #define MULADDC_CORE \ 00422 "ld [%%o0], %%o4 \n\t" \ 00423 "inc 4, %%o0 \n\t" \ 00424 "ld [%%o1], %%o5 \n\t" \ 00425 "umul %%o3, %%o4, %%o4 \n\t" \ 00426 "addcc %%o4, %%o2, %%o4 \n\t" \ 00427 "rd %%y, %%g1 \n\t" \ 00428 "addx %%g1, 0, %%g1 \n\t" \ 00429 "addcc %%o4, %%o5, %%o4 \n\t" \ 00430 "st %%o4, [%%o1] \n\t" \ 00431 "addx %%g1, 0, %%o2 \n\t" \ 00432 "inc 4, %%o1 \n\t" 00433 00434 #define MULADDC_STOP \ 00435 "st %%o2, %0 \n\t" \ 00436 "stx %%o1, %1 \n\t" \ 00437 "stx %%o0, %2 \n\t" \ 00438 : "=m" (c), "=m" (d), "=m" (s) \ 00439 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00440 : "g1", "o0", "o1", "o2", "o3", "o4", \ 00441 "o5" \ 00442 ); 00443 00444 #else /* __sparc64__ */ 00445 00446 #define MULADDC_INIT \ 00447 asm( \ 00448 "ld %3, %%o0 \n\t" \ 00449 "ld %4, %%o1 \n\t" \ 00450 "ld %5, %%o2 \n\t" \ 00451 "ld %6, %%o3 \n\t" 00452 00453 #define MULADDC_CORE \ 00454 "ld [%%o0], %%o4 \n\t" \ 00455 "inc 4, %%o0 \n\t" \ 00456 "ld [%%o1], %%o5 \n\t" \ 00457 "umul %%o3, %%o4, %%o4 \n\t" \ 00458 "addcc %%o4, %%o2, %%o4 \n\t" \ 00459 "rd %%y, %%g1 \n\t" \ 00460 "addx %%g1, 0, %%g1 \n\t" \ 00461 "addcc %%o4, %%o5, %%o4 \n\t" \ 00462 "st %%o4, [%%o1] \n\t" \ 00463 "addx %%g1, 0, %%o2 \n\t" \ 00464 "inc 4, %%o1 \n\t" 00465 00466 #define MULADDC_STOP \ 00467 "st %%o2, %0 \n\t" \ 00468 "st %%o1, %1 \n\t" \ 00469 "st %%o0, %2 \n\t" \ 00470 : "=m" (c), "=m" (d), "=m" (s) \ 00471 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00472 : "g1", "o0", "o1", "o2", "o3", "o4", \ 00473 "o5" \ 00474 ); 00475 00476 #endif /* __sparc64__ */ 00477 #endif /* __sparc__ */ 00478 00479 #if defined(__microblaze__) || defined(microblaze) 00480 00481 #define MULADDC_INIT \ 00482 asm( \ 00483 "lwi r3, %3 \n\t" \ 00484 "lwi r4, %4 \n\t" \ 00485 "lwi r5, %5 \n\t" \ 00486 "lwi r6, %6 \n\t" \ 00487 "andi r7, r6, 0xffff \n\t" \ 00488 "bsrli r6, r6, 16 \n\t" 00489 00490 #define MULADDC_CORE \ 00491 "lhui r8, r3, 0 \n\t" \ 00492 "addi r3, r3, 2 \n\t" \ 00493 "lhui r9, r3, 0 \n\t" \ 00494 "addi r3, r3, 2 \n\t" \ 00495 "mul r10, r9, r6 \n\t" \ 00496 "mul r11, r8, r7 \n\t" \ 00497 "mul r12, r9, r7 \n\t" \ 00498 "mul r13, r8, r6 \n\t" \ 00499 "bsrli r8, r10, 16 \n\t" \ 00500 "bsrli r9, r11, 16 \n\t" \ 00501 "add r13, r13, r8 \n\t" \ 00502 "add r13, r13, r9 \n\t" \ 00503 "bslli r10, r10, 16 \n\t" \ 00504 "bslli r11, r11, 16 \n\t" \ 00505 "add r12, r12, r10 \n\t" \ 00506 "addc r13, r13, r0 \n\t" \ 00507 "add r12, r12, r11 \n\t" \ 00508 "addc r13, r13, r0 \n\t" \ 00509 "lwi r10, r4, 0 \n\t" \ 00510 "add r12, r12, r10 \n\t" \ 00511 "addc r13, r13, r0 \n\t" \ 00512 "add r12, r12, r5 \n\t" \ 00513 "addc r5, r13, r0 \n\t" \ 00514 "swi r12, r4, 0 \n\t" \ 00515 "addi r4, r4, 4 \n\t" 00516 00517 #define MULADDC_STOP \ 00518 "swi r5, %0 \n\t" \ 00519 "swi r4, %1 \n\t" \ 00520 "swi r3, %2 \n\t" \ 00521 : "=m" (c), "=m" (d), "=m" (s) \ 00522 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00523 : "r3", "r4" "r5", "r6", "r7", "r8", \ 00524 "r9", "r10", "r11", "r12", "r13" \ 00525 ); 00526 00527 #endif /* MicroBlaze */ 00528 00529 #if defined(__tricore__) 00530 00531 #define MULADDC_INIT \ 00532 asm( \ 00533 "ld.a %%a2, %3 \n\t" \ 00534 "ld.a %%a3, %4 \n\t" \ 00535 "ld.w %%d4, %5 \n\t" \ 00536 "ld.w %%d1, %6 \n\t" \ 00537 "xor %%d5, %%d5 \n\t" 00538 00539 #define MULADDC_CORE \ 00540 "ld.w %%d0, [%%a2+] \n\t" \ 00541 "madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \ 00542 "ld.w %%d0, [%%a3] \n\t" \ 00543 "addx %%d2, %%d2, %%d0 \n\t" \ 00544 "addc %%d3, %%d3, 0 \n\t" \ 00545 "mov %%d4, %%d3 \n\t" \ 00546 "st.w [%%a3+], %%d2 \n\t" 00547 00548 #define MULADDC_STOP \ 00549 "st.w %0, %%d4 \n\t" \ 00550 "st.a %1, %%a3 \n\t" \ 00551 "st.a %2, %%a2 \n\t" \ 00552 : "=m" (c), "=m" (d), "=m" (s) \ 00553 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00554 : "d0", "d1", "e2", "d4", "a2", "a3" \ 00555 ); 00556 00557 #endif /* TriCore */ 00558 00559 /* 00560 * gcc -O0 by default uses r7 for the frame pointer, so it complains about our 00561 * use of r7 below, unless -fomit-frame-pointer is passed. Unfortunately, 00562 * passing that option is not easy when building with yotta. 00563 * 00564 * On the other hand, -fomit-frame-pointer is implied by any -Ox options with 00565 * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by 00566 * clang and armcc5 under the same conditions). 00567 * 00568 * So, only use the optimized assembly below for optimized build, which avoids 00569 * the build error and is pretty reasonable anyway. 00570 */ 00571 #if defined(__GNUC__) && !defined(__OPTIMIZE__) 00572 #define MULADDC_CANNOT_USE_R7 00573 #endif 00574 00575 #if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7) 00576 00577 #if defined(__thumb__) && !defined(__thumb2__) 00578 00579 #define MULADDC_INIT \ 00580 asm( \ 00581 "ldr r0, %3 \n\t" \ 00582 "ldr r1, %4 \n\t" \ 00583 "ldr r2, %5 \n\t" \ 00584 "ldr r3, %6 \n\t" \ 00585 "lsr r7, r3, #16 \n\t" \ 00586 "mov r9, r7 \n\t" \ 00587 "lsl r7, r3, #16 \n\t" \ 00588 "lsr r7, r7, #16 \n\t" \ 00589 "mov r8, r7 \n\t" 00590 00591 #define MULADDC_CORE \ 00592 "ldmia r0!, {r6} \n\t" \ 00593 "lsr r7, r6, #16 \n\t" \ 00594 "lsl r6, r6, #16 \n\t" \ 00595 "lsr r6, r6, #16 \n\t" \ 00596 "mov r4, r8 \n\t" \ 00597 "mul r4, r6 \n\t" \ 00598 "mov r3, r9 \n\t" \ 00599 "mul r6, r3 \n\t" \ 00600 "mov r5, r9 \n\t" \ 00601 "mul r5, r7 \n\t" \ 00602 "mov r3, r8 \n\t" \ 00603 "mul r7, r3 \n\t" \ 00604 "lsr r3, r6, #16 \n\t" \ 00605 "add r5, r5, r3 \n\t" \ 00606 "lsr r3, r7, #16 \n\t" \ 00607 "add r5, r5, r3 \n\t" \ 00608 "add r4, r4, r2 \n\t" \ 00609 "mov r2, #0 \n\t" \ 00610 "adc r5, r2 \n\t" \ 00611 "lsl r3, r6, #16 \n\t" \ 00612 "add r4, r4, r3 \n\t" \ 00613 "adc r5, r2 \n\t" \ 00614 "lsl r3, r7, #16 \n\t" \ 00615 "add r4, r4, r3 \n\t" \ 00616 "adc r5, r2 \n\t" \ 00617 "ldr r3, [r1] \n\t" \ 00618 "add r4, r4, r3 \n\t" \ 00619 "adc r2, r5 \n\t" \ 00620 "stmia r1!, {r4} \n\t" 00621 00622 #define MULADDC_STOP \ 00623 "str r2, %0 \n\t" \ 00624 "str r1, %1 \n\t" \ 00625 "str r0, %2 \n\t" \ 00626 : "=m" (c), "=m" (d), "=m" (s) \ 00627 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00628 : "r0", "r1", "r2", "r3", "r4", "r5", \ 00629 "r6", "r7", "r8", "r9", "cc" \ 00630 ); 00631 00632 #else 00633 00634 #define MULADDC_INIT \ 00635 asm( \ 00636 "ldr r0, %3 \n\t" \ 00637 "ldr r1, %4 \n\t" \ 00638 "ldr r2, %5 \n\t" \ 00639 "ldr r3, %6 \n\t" 00640 00641 #define MULADDC_CORE \ 00642 "ldr r4, [r0], #4 \n\t" \ 00643 "mov r5, #0 \n\t" \ 00644 "ldr r6, [r1] \n\t" \ 00645 "umlal r2, r5, r3, r4 \n\t" \ 00646 "adds r7, r6, r2 \n\t" \ 00647 "adc r2, r5, #0 \n\t" \ 00648 "str r7, [r1], #4 \n\t" 00649 00650 #define MULADDC_STOP \ 00651 "str r2, %0 \n\t" \ 00652 "str r1, %1 \n\t" \ 00653 "str r0, %2 \n\t" \ 00654 : "=m" (c), "=m" (d), "=m" (s) \ 00655 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00656 : "r0", "r1", "r2", "r3", "r4", "r5", \ 00657 "r6", "r7", "cc" \ 00658 ); 00659 00660 #endif /* Thumb */ 00661 00662 #endif /* ARMv3 */ 00663 00664 #if defined(__alpha__) 00665 00666 #define MULADDC_INIT \ 00667 asm( \ 00668 "ldq $1, %3 \n\t" \ 00669 "ldq $2, %4 \n\t" \ 00670 "ldq $3, %5 \n\t" \ 00671 "ldq $4, %6 \n\t" 00672 00673 #define MULADDC_CORE \ 00674 "ldq $6, 0($1) \n\t" \ 00675 "addq $1, 8, $1 \n\t" \ 00676 "mulq $6, $4, $7 \n\t" \ 00677 "umulh $6, $4, $6 \n\t" \ 00678 "addq $7, $3, $7 \n\t" \ 00679 "cmpult $7, $3, $3 \n\t" \ 00680 "ldq $5, 0($2) \n\t" \ 00681 "addq $7, $5, $7 \n\t" \ 00682 "cmpult $7, $5, $5 \n\t" \ 00683 "stq $7, 0($2) \n\t" \ 00684 "addq $2, 8, $2 \n\t" \ 00685 "addq $6, $3, $3 \n\t" \ 00686 "addq $5, $3, $3 \n\t" 00687 00688 #define MULADDC_STOP \ 00689 "stq $3, %0 \n\t" \ 00690 "stq $2, %1 \n\t" \ 00691 "stq $1, %2 \n\t" \ 00692 : "=m" (c), "=m" (d), "=m" (s) \ 00693 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00694 : "$1", "$2", "$3", "$4", "$5", "$6", "$7" \ 00695 ); 00696 #endif /* Alpha */ 00697 00698 #if defined(__mips__) && !defined(__mips64) 00699 00700 #define MULADDC_INIT \ 00701 asm( \ 00702 "lw $10, %3 \n\t" \ 00703 "lw $11, %4 \n\t" \ 00704 "lw $12, %5 \n\t" \ 00705 "lw $13, %6 \n\t" 00706 00707 #define MULADDC_CORE \ 00708 "lw $14, 0($10) \n\t" \ 00709 "multu $13, $14 \n\t" \ 00710 "addi $10, $10, 4 \n\t" \ 00711 "mflo $14 \n\t" \ 00712 "mfhi $9 \n\t" \ 00713 "addu $14, $12, $14 \n\t" \ 00714 "lw $15, 0($11) \n\t" \ 00715 "sltu $12, $14, $12 \n\t" \ 00716 "addu $15, $14, $15 \n\t" \ 00717 "sltu $14, $15, $14 \n\t" \ 00718 "addu $12, $12, $9 \n\t" \ 00719 "sw $15, 0($11) \n\t" \ 00720 "addu $12, $12, $14 \n\t" \ 00721 "addi $11, $11, 4 \n\t" 00722 00723 #define MULADDC_STOP \ 00724 "sw $12, %0 \n\t" \ 00725 "sw $11, %1 \n\t" \ 00726 "sw $10, %2 \n\t" \ 00727 : "=m" (c), "=m" (d), "=m" (s) \ 00728 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00729 : "$9", "$10", "$11", "$12", "$13", "$14", "$15" \ 00730 ); 00731 00732 #endif /* MIPS */ 00733 #endif /* GNUC */ 00734 00735 #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) 00736 00737 #define MULADDC_INIT \ 00738 __asm mov esi, s \ 00739 __asm mov edi, d \ 00740 __asm mov ecx, c \ 00741 __asm mov ebx, b 00742 00743 #define MULADDC_CORE \ 00744 __asm lodsd \ 00745 __asm mul ebx \ 00746 __asm add eax, ecx \ 00747 __asm adc edx, 0 \ 00748 __asm add eax, [edi] \ 00749 __asm adc edx, 0 \ 00750 __asm mov ecx, edx \ 00751 __asm stosd 00752 00753 #if defined(MBEDTLS_HAVE_SSE2) 00754 00755 #define EMIT __asm _emit 00756 00757 #define MULADDC_HUIT \ 00758 EMIT 0x0F EMIT 0x6E EMIT 0xC9 \ 00759 EMIT 0x0F EMIT 0x6E EMIT 0xC3 \ 00760 EMIT 0x0F EMIT 0x6E EMIT 0x1F \ 00761 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 00762 EMIT 0x0F EMIT 0x6E EMIT 0x16 \ 00763 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ 00764 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \ 00765 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ 00766 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \ 00767 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ 00768 EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \ 00769 EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \ 00770 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ 00771 EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \ 00772 EMIT 0x0F EMIT 0xD4 EMIT 0xDC \ 00773 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \ 00774 EMIT 0x0F EMIT 0xD4 EMIT 0xEE \ 00775 EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \ 00776 EMIT 0x0F EMIT 0xD4 EMIT 0xFC \ 00777 EMIT 0x0F EMIT 0x7E EMIT 0x0F \ 00778 EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \ 00779 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ 00780 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00781 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \ 00782 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ 00783 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 00784 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \ 00785 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ 00786 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \ 00787 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00788 EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \ 00789 EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \ 00790 EMIT 0x0F EMIT 0xD4 EMIT 0xCD \ 00791 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \ 00792 EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \ 00793 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \ 00794 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00795 EMIT 0x0F EMIT 0xD4 EMIT 0xCF \ 00796 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \ 00797 EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \ 00798 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \ 00799 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00800 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ 00801 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \ 00802 EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \ 00803 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \ 00804 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00805 EMIT 0x0F EMIT 0xD4 EMIT 0xCC \ 00806 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \ 00807 EMIT 0x0F EMIT 0xD4 EMIT 0xDD \ 00808 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \ 00809 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00810 EMIT 0x0F EMIT 0xD4 EMIT 0xCE \ 00811 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \ 00812 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00813 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 00814 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \ 00815 EMIT 0x83 EMIT 0xC7 EMIT 0x20 \ 00816 EMIT 0x83 EMIT 0xC6 EMIT 0x20 \ 00817 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00818 EMIT 0x0F EMIT 0x7E EMIT 0xC9 00819 00820 #define MULADDC_STOP \ 00821 EMIT 0x0F EMIT 0x77 \ 00822 __asm mov c, ecx \ 00823 __asm mov d, edi \ 00824 __asm mov s, esi \ 00825 00826 #else 00827 00828 #define MULADDC_STOP \ 00829 __asm mov c, ecx \ 00830 __asm mov d, edi \ 00831 __asm mov s, esi \ 00832 00833 #endif /* SSE2 */ 00834 #endif /* MSVC */ 00835 00836 #endif /* MBEDTLS_HAVE_ASM */ 00837 00838 #if !defined(MULADDC_CORE) 00839 #if defined(MBEDTLS_HAVE_UDBL) 00840 00841 #define MULADDC_INIT \ 00842 { \ 00843 mbedtls_t_udbl r; \ 00844 mbedtls_mpi_uint r0, r1; 00845 00846 #define MULADDC_CORE \ 00847 r = *(s++) * (mbedtls_t_udbl) b; \ 00848 r0 = (mbedtls_mpi_uint) r; \ 00849 r1 = (mbedtls_mpi_uint)( r >> biL ); \ 00850 r0 += c; r1 += (r0 < c); \ 00851 r0 += *d; r1 += (r0 < *d); \ 00852 c = r1; *(d++) = r0; 00853 00854 #define MULADDC_STOP \ 00855 } 00856 00857 #else 00858 #define MULADDC_INIT \ 00859 { \ 00860 mbedtls_mpi_uint s0, s1, b0, b1; \ 00861 mbedtls_mpi_uint r0, r1, rx, ry; \ 00862 b0 = ( b << biH ) >> biH; \ 00863 b1 = ( b >> biH ); 00864 00865 #define MULADDC_CORE \ 00866 s0 = ( *s << biH ) >> biH; \ 00867 s1 = ( *s >> biH ); s++; \ 00868 rx = s0 * b1; r0 = s0 * b0; \ 00869 ry = s1 * b0; r1 = s1 * b1; \ 00870 r1 += ( rx >> biH ); \ 00871 r1 += ( ry >> biH ); \ 00872 rx <<= biH; ry <<= biH; \ 00873 r0 += rx; r1 += (r0 < rx); \ 00874 r0 += ry; r1 += (r0 < ry); \ 00875 r0 += c; r1 += (r0 < c); \ 00876 r0 += *d; r1 += (r0 < *d); \ 00877 c = r1; *(d++) = r0; 00878 00879 #define MULADDC_STOP \ 00880 } 00881 00882 #endif /* C (generic) */ 00883 #endif /* C (longlong) */ 00884 00885 #endif /* bn_mul.h */
Generated on Tue Jul 12 2022 17:25:41 by
