Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of OmniWheels by
bn_mul.h
00001 /** 00002 * \file bn_mul.h 00003 * 00004 * \brief Multi-precision integer library 00005 */ 00006 /* 00007 * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved 00008 * SPDX-License-Identifier: Apache-2.0 00009 * 00010 * Licensed under the Apache License, Version 2.0 (the "License"); you may 00011 * not use this file except in compliance with the License. 00012 * You may obtain a copy of the License at 00013 * 00014 * http://www.apache.org/licenses/LICENSE-2.0 00015 * 00016 * Unless required by applicable law or agreed to in writing, software 00017 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 00018 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00019 * See the License for the specific language governing permissions and 00020 * limitations under the License. 00021 * 00022 * This file is part of mbed TLS (https://tls.mbed.org) 00023 */ 00024 /* 00025 * Multiply source vector [s] with b, add result 00026 * to destination vector [d] and set carry c. 00027 * 00028 * Currently supports: 00029 * 00030 * . IA-32 (386+) . AMD64 / EM64T 00031 * . IA-32 (SSE2) . Motorola 68000 00032 * . PowerPC, 32-bit . MicroBlaze 00033 * . PowerPC, 64-bit . TriCore 00034 * . SPARC v8 . ARM v3+ 00035 * . Alpha . MIPS32 00036 * . C, longlong . C, generic 00037 */ 00038 #ifndef MBEDTLS_BN_MUL_H 00039 #define MBEDTLS_BN_MUL_H 00040 00041 #include "bignum.h" 00042 00043 #if defined(MBEDTLS_HAVE_ASM) 00044 00045 #ifndef asm 00046 #define asm __asm 00047 #endif 00048 00049 /* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */ 00050 #if defined(__GNUC__) && \ 00051 ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 ) 00052 #if defined(__i386__) 00053 00054 #define MULADDC_INIT \ 00055 asm( \ 00056 "movl %%ebx, %0 \n\t" \ 00057 "movl %5, %%esi \n\t" \ 00058 "movl %6, %%edi \n\t" \ 00059 "movl %7, %%ecx \n\t" \ 00060 "movl %8, %%ebx \n\t" 00061 00062 #define MULADDC_CORE \ 00063 "lodsl \n\t" \ 00064 "mull %%ebx \n\t" \ 00065 "addl %%ecx, %%eax \n\t" \ 00066 "adcl $0, %%edx \n\t" \ 00067 "addl (%%edi), %%eax \n\t" \ 00068 "adcl $0, %%edx \n\t" \ 00069 "movl %%edx, %%ecx \n\t" \ 00070 "stosl \n\t" 00071 00072 #if defined(MBEDTLS_HAVE_SSE2) 00073 00074 #define MULADDC_HUIT \ 00075 "movd %%ecx, %%mm1 \n\t" \ 00076 "movd %%ebx, %%mm0 \n\t" \ 00077 "movd (%%edi), %%mm3 \n\t" \ 00078 "paddq %%mm3, %%mm1 \n\t" \ 00079 "movd (%%esi), %%mm2 \n\t" \ 00080 "pmuludq %%mm0, %%mm2 \n\t" \ 00081 "movd 4(%%esi), %%mm4 \n\t" \ 00082 "pmuludq %%mm0, %%mm4 \n\t" \ 00083 "movd 8(%%esi), %%mm6 \n\t" \ 00084 "pmuludq %%mm0, %%mm6 \n\t" \ 00085 "movd 12(%%esi), %%mm7 \n\t" \ 00086 "pmuludq %%mm0, %%mm7 \n\t" \ 00087 "paddq %%mm2, %%mm1 \n\t" \ 00088 "movd 4(%%edi), %%mm3 \n\t" \ 00089 "paddq %%mm4, %%mm3 \n\t" \ 00090 "movd 8(%%edi), %%mm5 \n\t" \ 00091 "paddq %%mm6, %%mm5 \n\t" \ 00092 "movd 12(%%edi), %%mm4 \n\t" \ 00093 "paddq %%mm4, %%mm7 \n\t" \ 00094 "movd %%mm1, (%%edi) \n\t" \ 00095 "movd 16(%%esi), %%mm2 \n\t" \ 00096 "pmuludq %%mm0, %%mm2 \n\t" \ 00097 "psrlq $32, %%mm1 \n\t" \ 00098 "movd 20(%%esi), %%mm4 \n\t" \ 00099 "pmuludq %%mm0, %%mm4 \n\t" \ 00100 "paddq %%mm3, %%mm1 \n\t" \ 00101 "movd 24(%%esi), %%mm6 \n\t" \ 00102 "pmuludq %%mm0, %%mm6 \n\t" \ 00103 "movd %%mm1, 4(%%edi) \n\t" \ 00104 "psrlq $32, %%mm1 \n\t" \ 00105 "movd 28(%%esi), %%mm3 \n\t" \ 00106 "pmuludq %%mm0, %%mm3 \n\t" \ 00107 "paddq %%mm5, %%mm1 \n\t" \ 00108 "movd 16(%%edi), %%mm5 \n\t" \ 00109 "paddq %%mm5, %%mm2 \n\t" \ 00110 "movd %%mm1, 8(%%edi) \n\t" \ 00111 "psrlq $32, %%mm1 \n\t" \ 00112 "paddq %%mm7, %%mm1 \n\t" \ 00113 "movd 20(%%edi), %%mm5 \n\t" \ 00114 "paddq %%mm5, %%mm4 \n\t" \ 00115 "movd %%mm1, 12(%%edi) \n\t" \ 00116 "psrlq $32, %%mm1 \n\t" \ 00117 "paddq %%mm2, %%mm1 \n\t" \ 00118 "movd 24(%%edi), %%mm5 \n\t" \ 00119 "paddq %%mm5, %%mm6 \n\t" \ 00120 "movd %%mm1, 16(%%edi) \n\t" \ 00121 "psrlq $32, %%mm1 \n\t" \ 00122 "paddq %%mm4, %%mm1 \n\t" \ 00123 "movd 28(%%edi), %%mm5 \n\t" \ 00124 "paddq %%mm5, %%mm3 \n\t" \ 00125 "movd %%mm1, 20(%%edi) \n\t" \ 00126 "psrlq $32, %%mm1 \n\t" \ 00127 "paddq %%mm6, %%mm1 \n\t" \ 00128 "movd %%mm1, 24(%%edi) \n\t" \ 00129 "psrlq $32, %%mm1 \n\t" \ 00130 "paddq %%mm3, %%mm1 \n\t" \ 00131 "movd %%mm1, 28(%%edi) \n\t" \ 00132 "addl $32, %%edi \n\t" \ 00133 "addl $32, %%esi \n\t" \ 00134 "psrlq $32, %%mm1 \n\t" \ 00135 "movd %%mm1, %%ecx \n\t" 00136 00137 #define MULADDC_STOP \ 00138 "emms \n\t" \ 00139 "movl %4, %%ebx \n\t" \ 00140 "movl %%ecx, %1 \n\t" \ 00141 "movl %%edi, %2 \n\t" \ 00142 "movl %%esi, %3 \n\t" \ 00143 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ 00144 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ 00145 : "eax", "ecx", "edx", "esi", "edi" \ 00146 ); 00147 00148 #else 00149 00150 #define MULADDC_STOP \ 00151 "movl %4, %%ebx \n\t" \ 00152 "movl %%ecx, %1 \n\t" \ 00153 "movl %%edi, %2 \n\t" \ 00154 "movl %%esi, %3 \n\t" \ 00155 : "=m" (t), "=m" (c), "=m" (d), "=m" (s) \ 00156 : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \ 00157 : "eax", "ecx", "edx", "esi", "edi" \ 00158 ); 00159 #endif /* SSE2 */ 00160 #endif /* i386 */ 00161 00162 #if defined(__amd64__) || defined (__x86_64__) 00163 00164 #define MULADDC_INIT \ 00165 asm( \ 00166 "xorq %%r8, %%r8 \n\t" 00167 00168 #define MULADDC_CORE \ 00169 "movq (%%rsi), %%rax \n\t" \ 00170 "mulq %%rbx \n\t" \ 00171 "addq $8, %%rsi \n\t" \ 00172 "addq %%rcx, %%rax \n\t" \ 00173 "movq %%r8, %%rcx \n\t" \ 00174 "adcq $0, %%rdx \n\t" \ 00175 "nop \n\t" \ 00176 "addq %%rax, (%%rdi) \n\t" \ 00177 "adcq %%rdx, %%rcx \n\t" \ 00178 "addq $8, %%rdi \n\t" 00179 00180 #define MULADDC_STOP \ 00181 : "+c" (c), "+D" (d), "+S" (s) \ 00182 : "b" (b) \ 00183 : "rax", "rdx", "r8" \ 00184 ); 00185 00186 #endif /* AMD64 */ 00187 00188 #if defined(__mc68020__) || defined(__mcpu32__) 00189 00190 #define MULADDC_INIT \ 00191 asm( \ 00192 "movl %3, %%a2 \n\t" \ 00193 "movl %4, %%a3 \n\t" \ 00194 "movl %5, %%d3 \n\t" \ 00195 "movl %6, %%d2 \n\t" \ 00196 "moveq #0, %%d0 \n\t" 00197 00198 #define MULADDC_CORE \ 00199 "movel %%a2@+, %%d1 \n\t" \ 00200 "mulul %%d2, %%d4:%%d1 \n\t" \ 00201 "addl %%d3, %%d1 \n\t" \ 00202 "addxl %%d0, %%d4 \n\t" \ 00203 "moveq #0, %%d3 \n\t" \ 00204 "addl %%d1, %%a3@+ \n\t" \ 00205 "addxl %%d4, %%d3 \n\t" 00206 00207 #define MULADDC_STOP \ 00208 "movl %%d3, %0 \n\t" \ 00209 "movl %%a3, %1 \n\t" \ 00210 "movl %%a2, %2 \n\t" \ 00211 : "=m" (c), "=m" (d), "=m" (s) \ 00212 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00213 : "d0", "d1", "d2", "d3", "d4", "a2", "a3" \ 00214 ); 00215 00216 #define MULADDC_HUIT \ 00217 "movel %%a2@+, %%d1 \n\t" \ 00218 "mulul %%d2, %%d4:%%d1 \n\t" \ 00219 "addxl %%d3, %%d1 \n\t" \ 00220 "addxl %%d0, %%d4 \n\t" \ 00221 "addl %%d1, %%a3@+ \n\t" \ 00222 "movel %%a2@+, %%d1 \n\t" \ 00223 "mulul %%d2, %%d3:%%d1 \n\t" \ 00224 "addxl %%d4, %%d1 \n\t" \ 00225 "addxl %%d0, %%d3 \n\t" \ 00226 "addl %%d1, %%a3@+ \n\t" \ 00227 "movel %%a2@+, %%d1 \n\t" \ 00228 "mulul %%d2, %%d4:%%d1 \n\t" \ 00229 "addxl %%d3, %%d1 \n\t" \ 00230 "addxl %%d0, %%d4 \n\t" \ 00231 "addl %%d1, %%a3@+ \n\t" \ 00232 "movel %%a2@+, %%d1 \n\t" \ 00233 "mulul %%d2, %%d3:%%d1 \n\t" \ 00234 "addxl %%d4, %%d1 \n\t" \ 00235 "addxl %%d0, %%d3 \n\t" \ 00236 "addl %%d1, %%a3@+ \n\t" \ 00237 "movel %%a2@+, %%d1 \n\t" \ 00238 "mulul %%d2, %%d4:%%d1 \n\t" \ 00239 "addxl %%d3, %%d1 \n\t" \ 00240 "addxl %%d0, %%d4 \n\t" \ 00241 "addl %%d1, %%a3@+ \n\t" \ 00242 "movel %%a2@+, %%d1 \n\t" \ 00243 "mulul %%d2, %%d3:%%d1 \n\t" \ 00244 "addxl %%d4, %%d1 \n\t" \ 00245 "addxl %%d0, %%d3 \n\t" \ 00246 "addl %%d1, %%a3@+ \n\t" \ 00247 "movel %%a2@+, %%d1 \n\t" \ 00248 "mulul %%d2, %%d4:%%d1 \n\t" \ 00249 "addxl %%d3, %%d1 \n\t" \ 00250 "addxl %%d0, %%d4 \n\t" \ 00251 "addl %%d1, %%a3@+ \n\t" \ 00252 "movel %%a2@+, %%d1 \n\t" \ 00253 "mulul %%d2, %%d3:%%d1 \n\t" \ 00254 "addxl %%d4, %%d1 \n\t" \ 00255 "addxl %%d0, %%d3 \n\t" \ 00256 "addl %%d1, %%a3@+ \n\t" \ 00257 "addxl %%d0, %%d3 \n\t" 00258 00259 #endif /* MC68000 */ 00260 00261 #if defined(__powerpc64__) || defined(__ppc64__) 00262 00263 #if defined(__MACH__) && defined(__APPLE__) 00264 00265 #define MULADDC_INIT \ 00266 asm( \ 00267 "ld r3, %3 \n\t" \ 00268 "ld r4, %4 \n\t" \ 00269 "ld r5, %5 \n\t" \ 00270 "ld r6, %6 \n\t" \ 00271 "addi r3, r3, -8 \n\t" \ 00272 "addi r4, r4, -8 \n\t" \ 00273 "addic r5, r5, 0 \n\t" 00274 00275 #define MULADDC_CORE \ 00276 "ldu r7, 8(r3) \n\t" \ 00277 "mulld r8, r7, r6 \n\t" \ 00278 "mulhdu r9, r7, r6 \n\t" \ 00279 "adde r8, r8, r5 \n\t" \ 00280 "ld r7, 8(r4) \n\t" \ 00281 "addze r5, r9 \n\t" \ 00282 "addc r8, r8, r7 \n\t" \ 00283 "stdu r8, 8(r4) \n\t" 00284 00285 #define MULADDC_STOP \ 00286 "addze r5, r5 \n\t" \ 00287 "addi r4, r4, 8 \n\t" \ 00288 "addi r3, r3, 8 \n\t" \ 00289 "std r5, %0 \n\t" \ 00290 "std r4, %1 \n\t" \ 00291 "std r3, %2 \n\t" \ 00292 : "=m" (c), "=m" (d), "=m" (s) \ 00293 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00294 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00295 ); 00296 00297 00298 #else /* __MACH__ && __APPLE__ */ 00299 00300 #define MULADDC_INIT \ 00301 asm( \ 00302 "ld %%r3, %3 \n\t" \ 00303 "ld %%r4, %4 \n\t" \ 00304 "ld %%r5, %5 \n\t" \ 00305 "ld %%r6, %6 \n\t" \ 00306 "addi %%r3, %%r3, -8 \n\t" \ 00307 "addi %%r4, %%r4, -8 \n\t" \ 00308 "addic %%r5, %%r5, 0 \n\t" 00309 00310 #define MULADDC_CORE \ 00311 "ldu %%r7, 8(%%r3) \n\t" \ 00312 "mulld %%r8, %%r7, %%r6 \n\t" \ 00313 "mulhdu %%r9, %%r7, %%r6 \n\t" \ 00314 "adde %%r8, %%r8, %%r5 \n\t" \ 00315 "ld %%r7, 8(%%r4) \n\t" \ 00316 "addze %%r5, %%r9 \n\t" \ 00317 "addc %%r8, %%r8, %%r7 \n\t" \ 00318 "stdu %%r8, 8(%%r4) \n\t" 00319 00320 #define MULADDC_STOP \ 00321 "addze %%r5, %%r5 \n\t" \ 00322 "addi %%r4, %%r4, 8 \n\t" \ 00323 "addi %%r3, %%r3, 8 \n\t" \ 00324 "std %%r5, %0 \n\t" \ 00325 "std %%r4, %1 \n\t" \ 00326 "std %%r3, %2 \n\t" \ 00327 : "=m" (c), "=m" (d), "=m" (s) \ 00328 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00329 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00330 ); 00331 00332 #endif /* __MACH__ && __APPLE__ */ 00333 00334 #elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32 */ 00335 00336 #if defined(__MACH__) && defined(__APPLE__) 00337 00338 #define MULADDC_INIT \ 00339 asm( \ 00340 "lwz r3, %3 \n\t" \ 00341 "lwz r4, %4 \n\t" \ 00342 "lwz r5, %5 \n\t" \ 00343 "lwz r6, %6 \n\t" \ 00344 "addi r3, r3, -4 \n\t" \ 00345 "addi r4, r4, -4 \n\t" \ 00346 "addic r5, r5, 0 \n\t" 00347 00348 #define MULADDC_CORE \ 00349 "lwzu r7, 4(r3) \n\t" \ 00350 "mullw r8, r7, r6 \n\t" \ 00351 "mulhwu r9, r7, r6 \n\t" \ 00352 "adde r8, r8, r5 \n\t" \ 00353 "lwz r7, 4(r4) \n\t" \ 00354 "addze r5, r9 \n\t" \ 00355 "addc r8, r8, r7 \n\t" \ 00356 "stwu r8, 4(r4) \n\t" 00357 00358 #define MULADDC_STOP \ 00359 "addze r5, r5 \n\t" \ 00360 "addi r4, r4, 4 \n\t" \ 00361 "addi r3, r3, 4 \n\t" \ 00362 "stw r5, %0 \n\t" \ 00363 "stw r4, %1 \n\t" \ 00364 "stw r3, %2 \n\t" \ 00365 : "=m" (c), "=m" (d), "=m" (s) \ 00366 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00367 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00368 ); 00369 00370 #else /* __MACH__ && __APPLE__ */ 00371 00372 #define MULADDC_INIT \ 00373 asm( \ 00374 "lwz %%r3, %3 \n\t" \ 00375 "lwz %%r4, %4 \n\t" \ 00376 "lwz %%r5, %5 \n\t" \ 00377 "lwz %%r6, %6 \n\t" \ 00378 "addi %%r3, %%r3, -4 \n\t" \ 00379 "addi %%r4, %%r4, -4 \n\t" \ 00380 "addic %%r5, %%r5, 0 \n\t" 00381 00382 #define MULADDC_CORE \ 00383 "lwzu %%r7, 4(%%r3) \n\t" \ 00384 "mullw %%r8, %%r7, %%r6 \n\t" \ 00385 "mulhwu %%r9, %%r7, %%r6 \n\t" \ 00386 "adde %%r8, %%r8, %%r5 \n\t" \ 00387 "lwz %%r7, 4(%%r4) \n\t" \ 00388 "addze %%r5, %%r9 \n\t" \ 00389 "addc %%r8, %%r8, %%r7 \n\t" \ 00390 "stwu %%r8, 4(%%r4) \n\t" 00391 00392 #define MULADDC_STOP \ 00393 "addze %%r5, %%r5 \n\t" \ 00394 "addi %%r4, %%r4, 4 \n\t" \ 00395 "addi %%r3, %%r3, 4 \n\t" \ 00396 "stw %%r5, %0 \n\t" \ 00397 "stw %%r4, %1 \n\t" \ 00398 "stw %%r3, %2 \n\t" \ 00399 : "=m" (c), "=m" (d), "=m" (s) \ 00400 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00401 : "r3", "r4", "r5", "r6", "r7", "r8", "r9" \ 00402 ); 00403 00404 #endif /* __MACH__ && __APPLE__ */ 00405 00406 #endif /* PPC32 */ 00407 00408 /* 00409 * The Sparc(64) assembly is reported to be broken. 00410 * Disable it for now, until we're able to fix it. 00411 */ 00412 #if 0 && defined(__sparc__) 00413 #if defined(__sparc64__) 00414 00415 #define MULADDC_INIT \ 00416 asm( \ 00417 "ldx %3, %%o0 \n\t" \ 00418 "ldx %4, %%o1 \n\t" \ 00419 "ld %5, %%o2 \n\t" \ 00420 "ld %6, %%o3 \n\t" 00421 00422 #define MULADDC_CORE \ 00423 "ld [%%o0], %%o4 \n\t" \ 00424 "inc 4, %%o0 \n\t" \ 00425 "ld [%%o1], %%o5 \n\t" \ 00426 "umul %%o3, %%o4, %%o4 \n\t" \ 00427 "addcc %%o4, %%o2, %%o4 \n\t" \ 00428 "rd %%y, %%g1 \n\t" \ 00429 "addx %%g1, 0, %%g1 \n\t" \ 00430 "addcc %%o4, %%o5, %%o4 \n\t" \ 00431 "st %%o4, [%%o1] \n\t" \ 00432 "addx %%g1, 0, %%o2 \n\t" \ 00433 "inc 4, %%o1 \n\t" 00434 00435 #define MULADDC_STOP \ 00436 "st %%o2, %0 \n\t" \ 00437 "stx %%o1, %1 \n\t" \ 00438 "stx %%o0, %2 \n\t" \ 00439 : "=m" (c), "=m" (d), "=m" (s) \ 00440 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00441 : "g1", "o0", "o1", "o2", "o3", "o4", \ 00442 "o5" \ 00443 ); 00444 00445 #else /* __sparc64__ */ 00446 00447 #define MULADDC_INIT \ 00448 asm( \ 00449 "ld %3, %%o0 \n\t" \ 00450 "ld %4, %%o1 \n\t" \ 00451 "ld %5, %%o2 \n\t" \ 00452 "ld %6, %%o3 \n\t" 00453 00454 #define MULADDC_CORE \ 00455 "ld [%%o0], %%o4 \n\t" \ 00456 "inc 4, %%o0 \n\t" \ 00457 "ld [%%o1], %%o5 \n\t" \ 00458 "umul %%o3, %%o4, %%o4 \n\t" \ 00459 "addcc %%o4, %%o2, %%o4 \n\t" \ 00460 "rd %%y, %%g1 \n\t" \ 00461 "addx %%g1, 0, %%g1 \n\t" \ 00462 "addcc %%o4, %%o5, %%o4 \n\t" \ 00463 "st %%o4, [%%o1] \n\t" \ 00464 "addx %%g1, 0, %%o2 \n\t" \ 00465 "inc 4, %%o1 \n\t" 00466 00467 #define MULADDC_STOP \ 00468 "st %%o2, %0 \n\t" \ 00469 "st %%o1, %1 \n\t" \ 00470 "st %%o0, %2 \n\t" \ 00471 : "=m" (c), "=m" (d), "=m" (s) \ 00472 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00473 : "g1", "o0", "o1", "o2", "o3", "o4", \ 00474 "o5" \ 00475 ); 00476 00477 #endif /* __sparc64__ */ 00478 #endif /* __sparc__ */ 00479 00480 #if defined(__microblaze__) || defined(microblaze) 00481 00482 #define MULADDC_INIT \ 00483 asm( \ 00484 "lwi r3, %3 \n\t" \ 00485 "lwi r4, %4 \n\t" \ 00486 "lwi r5, %5 \n\t" \ 00487 "lwi r6, %6 \n\t" \ 00488 "andi r7, r6, 0xffff \n\t" \ 00489 "bsrli r6, r6, 16 \n\t" 00490 00491 #define MULADDC_CORE \ 00492 "lhui r8, r3, 0 \n\t" \ 00493 "addi r3, r3, 2 \n\t" \ 00494 "lhui r9, r3, 0 \n\t" \ 00495 "addi r3, r3, 2 \n\t" \ 00496 "mul r10, r9, r6 \n\t" \ 00497 "mul r11, r8, r7 \n\t" \ 00498 "mul r12, r9, r7 \n\t" \ 00499 "mul r13, r8, r6 \n\t" \ 00500 "bsrli r8, r10, 16 \n\t" \ 00501 "bsrli r9, r11, 16 \n\t" \ 00502 "add r13, r13, r8 \n\t" \ 00503 "add r13, r13, r9 \n\t" \ 00504 "bslli r10, r10, 16 \n\t" \ 00505 "bslli r11, r11, 16 \n\t" \ 00506 "add r12, r12, r10 \n\t" \ 00507 "addc r13, r13, r0 \n\t" \ 00508 "add r12, r12, r11 \n\t" \ 00509 "addc r13, r13, r0 \n\t" \ 00510 "lwi r10, r4, 0 \n\t" \ 00511 "add r12, r12, r10 \n\t" \ 00512 "addc r13, r13, r0 \n\t" \ 00513 "add r12, r12, r5 \n\t" \ 00514 "addc r5, r13, r0 \n\t" \ 00515 "swi r12, r4, 0 \n\t" \ 00516 "addi r4, r4, 4 \n\t" 00517 00518 #define MULADDC_STOP \ 00519 "swi r5, %0 \n\t" \ 00520 "swi r4, %1 \n\t" \ 00521 "swi r3, %2 \n\t" \ 00522 : "=m" (c), "=m" (d), "=m" (s) \ 00523 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00524 : "r3", "r4" "r5", "r6", "r7", "r8", \ 00525 "r9", "r10", "r11", "r12", "r13" \ 00526 ); 00527 00528 #endif /* MicroBlaze */ 00529 00530 #if defined(__tricore__) 00531 00532 #define MULADDC_INIT \ 00533 asm( \ 00534 "ld.a %%a2, %3 \n\t" \ 00535 "ld.a %%a3, %4 \n\t" \ 00536 "ld.w %%d4, %5 \n\t" \ 00537 "ld.w %%d1, %6 \n\t" \ 00538 "xor %%d5, %%d5 \n\t" 00539 00540 #define MULADDC_CORE \ 00541 "ld.w %%d0, [%%a2+] \n\t" \ 00542 "madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \ 00543 "ld.w %%d0, [%%a3] \n\t" \ 00544 "addx %%d2, %%d2, %%d0 \n\t" \ 00545 "addc %%d3, %%d3, 0 \n\t" \ 00546 "mov %%d4, %%d3 \n\t" \ 00547 "st.w [%%a3+], %%d2 \n\t" 00548 00549 #define MULADDC_STOP \ 00550 "st.w %0, %%d4 \n\t" \ 00551 "st.a %1, %%a3 \n\t" \ 00552 "st.a %2, %%a2 \n\t" \ 00553 : "=m" (c), "=m" (d), "=m" (s) \ 00554 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00555 : "d0", "d1", "e2", "d4", "a2", "a3" \ 00556 ); 00557 00558 #endif /* TriCore */ 00559 00560 /* 00561 * gcc -O0 by default uses r7 for the frame pointer, so it complains about our 00562 * use of r7 below, unless -fomit-frame-pointer is passed. Unfortunately, 00563 * passing that option is not easy when building with yotta. 00564 * 00565 * On the other hand, -fomit-frame-pointer is implied by any -Ox options with 00566 * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by 00567 * clang and armcc5 under the same conditions). 00568 * 00569 * So, only use the optimized assembly below for optimized build, which avoids 00570 * the build error and is pretty reasonable anyway. 00571 */ 00572 #if defined(__GNUC__) && !defined(__OPTIMIZE__) 00573 #define MULADDC_CANNOT_USE_R7 00574 #endif 00575 00576 #if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7) 00577 00578 #if defined(__thumb__) && !defined(__thumb2__) 00579 00580 #define MULADDC_INIT \ 00581 asm( \ 00582 "ldr r0, %3 \n\t" \ 00583 "ldr r1, %4 \n\t" \ 00584 "ldr r2, %5 \n\t" \ 00585 "ldr r3, %6 \n\t" \ 00586 "lsr r7, r3, #16 \n\t" \ 00587 "mov r9, r7 \n\t" \ 00588 "lsl r7, r3, #16 \n\t" \ 00589 "lsr r7, r7, #16 \n\t" \ 00590 "mov r8, r7 \n\t" 00591 00592 #define MULADDC_CORE \ 00593 "ldmia r0!, {r6} \n\t" \ 00594 "lsr r7, r6, #16 \n\t" \ 00595 "lsl r6, r6, #16 \n\t" \ 00596 "lsr r6, r6, #16 \n\t" \ 00597 "mov r4, r8 \n\t" \ 00598 "mul r4, r6 \n\t" \ 00599 "mov r3, r9 \n\t" \ 00600 "mul r6, r3 \n\t" \ 00601 "mov r5, r9 \n\t" \ 00602 "mul r5, r7 \n\t" \ 00603 "mov r3, r8 \n\t" \ 00604 "mul r7, r3 \n\t" \ 00605 "lsr r3, r6, #16 \n\t" \ 00606 "add r5, r5, r3 \n\t" \ 00607 "lsr r3, r7, #16 \n\t" \ 00608 "add r5, r5, r3 \n\t" \ 00609 "add r4, r4, r2 \n\t" \ 00610 "mov r2, #0 \n\t" \ 00611 "adc r5, r2 \n\t" \ 00612 "lsl r3, r6, #16 \n\t" \ 00613 "add r4, r4, r3 \n\t" \ 00614 "adc r5, r2 \n\t" \ 00615 "lsl r3, r7, #16 \n\t" \ 00616 "add r4, r4, r3 \n\t" \ 00617 "adc r5, r2 \n\t" \ 00618 "ldr r3, [r1] \n\t" \ 00619 "add r4, r4, r3 \n\t" \ 00620 "adc r2, r5 \n\t" \ 00621 "stmia r1!, {r4} \n\t" 00622 00623 #define MULADDC_STOP \ 00624 "str r2, %0 \n\t" \ 00625 "str r1, %1 \n\t" \ 00626 "str r0, %2 \n\t" \ 00627 : "=m" (c), "=m" (d), "=m" (s) \ 00628 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00629 : "r0", "r1", "r2", "r3", "r4", "r5", \ 00630 "r6", "r7", "r8", "r9", "cc" \ 00631 ); 00632 00633 #else 00634 00635 #define MULADDC_INIT \ 00636 asm( \ 00637 "ldr r0, %3 \n\t" \ 00638 "ldr r1, %4 \n\t" \ 00639 "ldr r2, %5 \n\t" \ 00640 "ldr r3, %6 \n\t" 00641 00642 #define MULADDC_CORE \ 00643 "ldr r4, [r0], #4 \n\t" \ 00644 "mov r5, #0 \n\t" \ 00645 "ldr r6, [r1] \n\t" \ 00646 "umlal r2, r5, r3, r4 \n\t" \ 00647 "adds r7, r6, r2 \n\t" \ 00648 "adc r2, r5, #0 \n\t" \ 00649 "str r7, [r1], #4 \n\t" 00650 00651 #define MULADDC_STOP \ 00652 "str r2, %0 \n\t" \ 00653 "str r1, %1 \n\t" \ 00654 "str r0, %2 \n\t" \ 00655 : "=m" (c), "=m" (d), "=m" (s) \ 00656 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00657 : "r0", "r1", "r2", "r3", "r4", "r5", \ 00658 "r6", "r7", "cc" \ 00659 ); 00660 00661 #endif /* Thumb */ 00662 00663 #endif /* ARMv3 */ 00664 00665 #if defined(__alpha__) 00666 00667 #define MULADDC_INIT \ 00668 asm( \ 00669 "ldq $1, %3 \n\t" \ 00670 "ldq $2, %4 \n\t" \ 00671 "ldq $3, %5 \n\t" \ 00672 "ldq $4, %6 \n\t" 00673 00674 #define MULADDC_CORE \ 00675 "ldq $6, 0($1) \n\t" \ 00676 "addq $1, 8, $1 \n\t" \ 00677 "mulq $6, $4, $7 \n\t" \ 00678 "umulh $6, $4, $6 \n\t" \ 00679 "addq $7, $3, $7 \n\t" \ 00680 "cmpult $7, $3, $3 \n\t" \ 00681 "ldq $5, 0($2) \n\t" \ 00682 "addq $7, $5, $7 \n\t" \ 00683 "cmpult $7, $5, $5 \n\t" \ 00684 "stq $7, 0($2) \n\t" \ 00685 "addq $2, 8, $2 \n\t" \ 00686 "addq $6, $3, $3 \n\t" \ 00687 "addq $5, $3, $3 \n\t" 00688 00689 #define MULADDC_STOP \ 00690 "stq $3, %0 \n\t" \ 00691 "stq $2, %1 \n\t" \ 00692 "stq $1, %2 \n\t" \ 00693 : "=m" (c), "=m" (d), "=m" (s) \ 00694 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00695 : "$1", "$2", "$3", "$4", "$5", "$6", "$7" \ 00696 ); 00697 #endif /* Alpha */ 00698 00699 #if defined(__mips__) && !defined(__mips64) 00700 00701 #define MULADDC_INIT \ 00702 asm( \ 00703 "lw $10, %3 \n\t" \ 00704 "lw $11, %4 \n\t" \ 00705 "lw $12, %5 \n\t" \ 00706 "lw $13, %6 \n\t" 00707 00708 #define MULADDC_CORE \ 00709 "lw $14, 0($10) \n\t" \ 00710 "multu $13, $14 \n\t" \ 00711 "addi $10, $10, 4 \n\t" \ 00712 "mflo $14 \n\t" \ 00713 "mfhi $9 \n\t" \ 00714 "addu $14, $12, $14 \n\t" \ 00715 "lw $15, 0($11) \n\t" \ 00716 "sltu $12, $14, $12 \n\t" \ 00717 "addu $15, $14, $15 \n\t" \ 00718 "sltu $14, $15, $14 \n\t" \ 00719 "addu $12, $12, $9 \n\t" \ 00720 "sw $15, 0($11) \n\t" \ 00721 "addu $12, $12, $14 \n\t" \ 00722 "addi $11, $11, 4 \n\t" 00723 00724 #define MULADDC_STOP \ 00725 "sw $12, %0 \n\t" \ 00726 "sw $11, %1 \n\t" \ 00727 "sw $10, %2 \n\t" \ 00728 : "=m" (c), "=m" (d), "=m" (s) \ 00729 : "m" (s), "m" (d), "m" (c), "m" (b) \ 00730 : "$9", "$10", "$11", "$12", "$13", "$14", "$15" \ 00731 ); 00732 00733 #endif /* MIPS */ 00734 #endif /* GNUC */ 00735 00736 #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) 00737 00738 #define MULADDC_INIT \ 00739 __asm mov esi, s \ 00740 __asm mov edi, d \ 00741 __asm mov ecx, c \ 00742 __asm mov ebx, b 00743 00744 #define MULADDC_CORE \ 00745 __asm lodsd \ 00746 __asm mul ebx \ 00747 __asm add eax, ecx \ 00748 __asm adc edx, 0 \ 00749 __asm add eax, [edi] \ 00750 __asm adc edx, 0 \ 00751 __asm mov ecx, edx \ 00752 __asm stosd 00753 00754 #if defined(MBEDTLS_HAVE_SSE2) 00755 00756 #define EMIT __asm _emit 00757 00758 #define MULADDC_HUIT \ 00759 EMIT 0x0F EMIT 0x6E EMIT 0xC9 \ 00760 EMIT 0x0F EMIT 0x6E EMIT 0xC3 \ 00761 EMIT 0x0F EMIT 0x6E EMIT 0x1F \ 00762 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 00763 EMIT 0x0F EMIT 0x6E EMIT 0x16 \ 00764 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ 00765 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \ 00766 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ 00767 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \ 00768 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ 00769 EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \ 00770 EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \ 00771 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ 00772 EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \ 00773 EMIT 0x0F EMIT 0xD4 EMIT 0xDC \ 00774 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \ 00775 EMIT 0x0F EMIT 0xD4 EMIT 0xEE \ 00776 EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \ 00777 EMIT 0x0F EMIT 0xD4 EMIT 0xFC \ 00778 EMIT 0x0F EMIT 0x7E EMIT 0x0F \ 00779 EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \ 00780 EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \ 00781 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00782 EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \ 00783 EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \ 00784 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 00785 EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \ 00786 EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \ 00787 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \ 00788 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00789 EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \ 00790 EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \ 00791 EMIT 0x0F EMIT 0xD4 EMIT 0xCD \ 00792 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \ 00793 EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \ 00794 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \ 00795 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00796 EMIT 0x0F EMIT 0xD4 EMIT 0xCF \ 00797 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \ 00798 EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \ 00799 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \ 00800 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00801 EMIT 0x0F EMIT 0xD4 EMIT 0xCA \ 00802 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \ 00803 EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \ 00804 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \ 00805 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00806 EMIT 0x0F EMIT 0xD4 EMIT 0xCC \ 00807 EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \ 00808 EMIT 0x0F EMIT 0xD4 EMIT 0xDD \ 00809 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \ 00810 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00811 EMIT 0x0F EMIT 0xD4 EMIT 0xCE \ 00812 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \ 00813 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00814 EMIT 0x0F EMIT 0xD4 EMIT 0xCB \ 00815 EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \ 00816 EMIT 0x83 EMIT 0xC7 EMIT 0x20 \ 00817 EMIT 0x83 EMIT 0xC6 EMIT 0x20 \ 00818 EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \ 00819 EMIT 0x0F EMIT 0x7E EMIT 0xC9 00820 00821 #define MULADDC_STOP \ 00822 EMIT 0x0F EMIT 0x77 \ 00823 __asm mov c, ecx \ 00824 __asm mov d, edi \ 00825 __asm mov s, esi \ 00826 00827 #else 00828 00829 #define MULADDC_STOP \ 00830 __asm mov c, ecx \ 00831 __asm mov d, edi \ 00832 __asm mov s, esi \ 00833 00834 #endif /* SSE2 */ 00835 #endif /* MSVC */ 00836 00837 #endif /* MBEDTLS_HAVE_ASM */ 00838 00839 #if !defined(MULADDC_CORE) 00840 #if defined(MBEDTLS_HAVE_UDBL) 00841 00842 #define MULADDC_INIT \ 00843 { \ 00844 mbedtls_t_udbl r; \ 00845 mbedtls_mpi_uint r0, r1; 00846 00847 #define MULADDC_CORE \ 00848 r = *(s++) * (mbedtls_t_udbl) b; \ 00849 r0 = (mbedtls_mpi_uint) r; \ 00850 r1 = (mbedtls_mpi_uint)( r >> biL ); \ 00851 r0 += c; r1 += (r0 < c); \ 00852 r0 += *d; r1 += (r0 < *d); \ 00853 c = r1; *(d++) = r0; 00854 00855 #define MULADDC_STOP \ 00856 } 00857 00858 #else 00859 #define MULADDC_INIT \ 00860 { \ 00861 mbedtls_mpi_uint s0, s1, b0, b1; \ 00862 mbedtls_mpi_uint r0, r1, rx, ry; \ 00863 b0 = ( b << biH ) >> biH; \ 00864 b1 = ( b >> biH ); 00865 00866 #define MULADDC_CORE \ 00867 s0 = ( *s << biH ) >> biH; \ 00868 s1 = ( *s >> biH ); s++; \ 00869 rx = s0 * b1; r0 = s0 * b0; \ 00870 ry = s1 * b0; r1 = s1 * b1; \ 00871 r1 += ( rx >> biH ); \ 00872 r1 += ( ry >> biH ); \ 00873 rx <<= biH; ry <<= biH; \ 00874 r0 += rx; r1 += (r0 < rx); \ 00875 r0 += ry; r1 += (r0 < ry); \ 00876 r0 += c; r1 += (r0 < c); \ 00877 r0 += *d; r1 += (r0 < *d); \ 00878 c = r1; *(d++) = r0; 00879 00880 #define MULADDC_STOP \ 00881 } 00882 00883 #endif /* C (generic) */ 00884 #endif /* C (longlong) */ 00885 00886 #endif /* bn_mul.h */
Generated on Fri Jul 22 2022 04:53:45 by
1.7.2
