Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
wolfcrypt/src/sha512.c@15:117db924cf7c, 2018-08-18 (annotated)
- Committer:
- wolfSSL
- Date:
- Sat Aug 18 22:20:43 2018 +0000
- Revision:
- 15:117db924cf7c
wolfSSL 3.15.3
Who changed what in which revision?
| User | Revision | Line number | New contents of line |
|---|---|---|---|
| wolfSSL | 15:117db924cf7c | 1 | /* sha512.c |
| wolfSSL | 15:117db924cf7c | 2 | * |
| wolfSSL | 15:117db924cf7c | 3 | * Copyright (C) 2006-2017 wolfSSL Inc. |
| wolfSSL | 15:117db924cf7c | 4 | * |
| wolfSSL | 15:117db924cf7c | 5 | * This file is part of wolfSSL. |
| wolfSSL | 15:117db924cf7c | 6 | * |
| wolfSSL | 15:117db924cf7c | 7 | * wolfSSL is free software; you can redistribute it and/or modify |
| wolfSSL | 15:117db924cf7c | 8 | * it under the terms of the GNU General Public License as published by |
| wolfSSL | 15:117db924cf7c | 9 | * the Free Software Foundation; either version 2 of the License, or |
| wolfSSL | 15:117db924cf7c | 10 | * (at your option) any later version. |
| wolfSSL | 15:117db924cf7c | 11 | * |
| wolfSSL | 15:117db924cf7c | 12 | * wolfSSL is distributed in the hope that it will be useful, |
| wolfSSL | 15:117db924cf7c | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| wolfSSL | 15:117db924cf7c | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| wolfSSL | 15:117db924cf7c | 15 | * GNU General Public License for more details. |
| wolfSSL | 15:117db924cf7c | 16 | * |
| wolfSSL | 15:117db924cf7c | 17 | * You should have received a copy of the GNU General Public License |
| wolfSSL | 15:117db924cf7c | 18 | * along with this program; if not, write to the Free Software |
| wolfSSL | 15:117db924cf7c | 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA |
| wolfSSL | 15:117db924cf7c | 20 | */ |
| wolfSSL | 15:117db924cf7c | 21 | |
| wolfSSL | 15:117db924cf7c | 22 | |
| wolfSSL | 15:117db924cf7c | 23 | #ifdef HAVE_CONFIG_H |
| wolfSSL | 15:117db924cf7c | 24 | #include <config.h> |
| wolfSSL | 15:117db924cf7c | 25 | #endif |
| wolfSSL | 15:117db924cf7c | 26 | |
| wolfSSL | 15:117db924cf7c | 27 | #include <wolfssl/wolfcrypt/settings.h> |
| wolfSSL | 15:117db924cf7c | 28 | |
| wolfSSL | 15:117db924cf7c | 29 | #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) |
| wolfSSL | 15:117db924cf7c | 30 | |
| wolfSSL | 15:117db924cf7c | 31 | #if defined(HAVE_FIPS) && \ |
| wolfSSL | 15:117db924cf7c | 32 | defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) |
| wolfSSL | 15:117db924cf7c | 33 | |
| wolfSSL | 15:117db924cf7c | 34 | /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ |
| wolfSSL | 15:117db924cf7c | 35 | #define FIPS_NO_WRAPPERS |
| wolfSSL | 15:117db924cf7c | 36 | |
| wolfSSL | 15:117db924cf7c | 37 | #ifdef USE_WINDOWS_API |
| wolfSSL | 15:117db924cf7c | 38 | #pragma code_seg(".fipsA$k") |
| wolfSSL | 15:117db924cf7c | 39 | #pragma const_seg(".fipsB$k") |
| wolfSSL | 15:117db924cf7c | 40 | #endif |
| wolfSSL | 15:117db924cf7c | 41 | #endif |
| wolfSSL | 15:117db924cf7c | 42 | |
| wolfSSL | 15:117db924cf7c | 43 | #include <wolfssl/wolfcrypt/sha512.h> |
| wolfSSL | 15:117db924cf7c | 44 | #include <wolfssl/wolfcrypt/error-crypt.h> |
| wolfSSL | 15:117db924cf7c | 45 | #include <wolfssl/wolfcrypt/cpuid.h> |
| wolfSSL | 15:117db924cf7c | 46 | |
| wolfSSL | 15:117db924cf7c | 47 | /* deprecated USE_SLOW_SHA2 (replaced with USE_SLOW_SHA512) */ |
| wolfSSL | 15:117db924cf7c | 48 | #if defined(USE_SLOW_SHA2) && !defined(USE_SLOW_SHA512) |
| wolfSSL | 15:117db924cf7c | 49 | #define USE_SLOW_SHA512 |
| wolfSSL | 15:117db924cf7c | 50 | #endif |
| wolfSSL | 15:117db924cf7c | 51 | |
| wolfSSL | 15:117db924cf7c | 52 | /* fips wrapper calls, user can call direct */ |
| wolfSSL | 15:117db924cf7c | 53 | #if defined(HAVE_FIPS) && \ |
| wolfSSL | 15:117db924cf7c | 54 | (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) |
| wolfSSL | 15:117db924cf7c | 55 | |
| wolfSSL | 15:117db924cf7c | 56 | #ifdef WOLFSSL_SHA512 |
| wolfSSL | 15:117db924cf7c | 57 | |
| wolfSSL | 15:117db924cf7c | 58 | int wc_InitSha512(wc_Sha512* sha) |
| wolfSSL | 15:117db924cf7c | 59 | { |
| wolfSSL | 15:117db924cf7c | 60 | if (sha == NULL) { |
| wolfSSL | 15:117db924cf7c | 61 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 62 | } |
| wolfSSL | 15:117db924cf7c | 63 | |
| wolfSSL | 15:117db924cf7c | 64 | return InitSha512_fips(sha); |
| wolfSSL | 15:117db924cf7c | 65 | } |
| wolfSSL | 15:117db924cf7c | 66 | int wc_InitSha512_ex(wc_Sha512* sha, void* heap, int devId) |
| wolfSSL | 15:117db924cf7c | 67 | { |
| wolfSSL | 15:117db924cf7c | 68 | (void)heap; |
| wolfSSL | 15:117db924cf7c | 69 | (void)devId; |
| wolfSSL | 15:117db924cf7c | 70 | if (sha == NULL) { |
| wolfSSL | 15:117db924cf7c | 71 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 72 | } |
| wolfSSL | 15:117db924cf7c | 73 | return InitSha512_fips(sha); |
| wolfSSL | 15:117db924cf7c | 74 | } |
| wolfSSL | 15:117db924cf7c | 75 | int wc_Sha512Update(wc_Sha512* sha, const byte* data, word32 len) |
| wolfSSL | 15:117db924cf7c | 76 | { |
| wolfSSL | 15:117db924cf7c | 77 | if (sha == NULL || (data == NULL && len > 0)) { |
| wolfSSL | 15:117db924cf7c | 78 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 79 | } |
| wolfSSL | 15:117db924cf7c | 80 | |
| wolfSSL | 15:117db924cf7c | 81 | return Sha512Update_fips(sha, data, len); |
| wolfSSL | 15:117db924cf7c | 82 | } |
| wolfSSL | 15:117db924cf7c | 83 | int wc_Sha512Final(wc_Sha512* sha, byte* out) |
| wolfSSL | 15:117db924cf7c | 84 | { |
| wolfSSL | 15:117db924cf7c | 85 | if (sha == NULL || out == NULL) { |
| wolfSSL | 15:117db924cf7c | 86 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 87 | } |
| wolfSSL | 15:117db924cf7c | 88 | |
| wolfSSL | 15:117db924cf7c | 89 | return Sha512Final_fips(sha, out); |
| wolfSSL | 15:117db924cf7c | 90 | } |
| wolfSSL | 15:117db924cf7c | 91 | void wc_Sha512Free(wc_Sha512* sha) |
| wolfSSL | 15:117db924cf7c | 92 | { |
| wolfSSL | 15:117db924cf7c | 93 | (void)sha; |
| wolfSSL | 15:117db924cf7c | 94 | /* Not supported in FIPS */ |
| wolfSSL | 15:117db924cf7c | 95 | } |
| wolfSSL | 15:117db924cf7c | 96 | #endif |
| wolfSSL | 15:117db924cf7c | 97 | |
| wolfSSL | 15:117db924cf7c | 98 | #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM) |
| wolfSSL | 15:117db924cf7c | 99 | int wc_InitSha384(wc_Sha384* sha) |
| wolfSSL | 15:117db924cf7c | 100 | { |
| wolfSSL | 15:117db924cf7c | 101 | if (sha == NULL) { |
| wolfSSL | 15:117db924cf7c | 102 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 103 | } |
| wolfSSL | 15:117db924cf7c | 104 | return InitSha384_fips(sha); |
| wolfSSL | 15:117db924cf7c | 105 | } |
| wolfSSL | 15:117db924cf7c | 106 | int wc_InitSha384_ex(wc_Sha384* sha, void* heap, int devId) |
| wolfSSL | 15:117db924cf7c | 107 | { |
| wolfSSL | 15:117db924cf7c | 108 | (void)heap; |
| wolfSSL | 15:117db924cf7c | 109 | (void)devId; |
| wolfSSL | 15:117db924cf7c | 110 | if (sha == NULL) { |
| wolfSSL | 15:117db924cf7c | 111 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 112 | } |
| wolfSSL | 15:117db924cf7c | 113 | return InitSha384_fips(sha); |
| wolfSSL | 15:117db924cf7c | 114 | } |
| wolfSSL | 15:117db924cf7c | 115 | int wc_Sha384Update(wc_Sha384* sha, const byte* data, word32 len) |
| wolfSSL | 15:117db924cf7c | 116 | { |
| wolfSSL | 15:117db924cf7c | 117 | if (sha == NULL || (data == NULL && len > 0)) { |
| wolfSSL | 15:117db924cf7c | 118 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 119 | } |
| wolfSSL | 15:117db924cf7c | 120 | return Sha384Update_fips(sha, data, len); |
| wolfSSL | 15:117db924cf7c | 121 | } |
| wolfSSL | 15:117db924cf7c | 122 | int wc_Sha384Final(wc_Sha384* sha, byte* out) |
| wolfSSL | 15:117db924cf7c | 123 | { |
| wolfSSL | 15:117db924cf7c | 124 | if (sha == NULL || out == NULL) { |
| wolfSSL | 15:117db924cf7c | 125 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 126 | } |
| wolfSSL | 15:117db924cf7c | 127 | return Sha384Final_fips(sha, out); |
| wolfSSL | 15:117db924cf7c | 128 | } |
| wolfSSL | 15:117db924cf7c | 129 | void wc_Sha384Free(wc_Sha384* sha) |
| wolfSSL | 15:117db924cf7c | 130 | { |
| wolfSSL | 15:117db924cf7c | 131 | (void)sha; |
| wolfSSL | 15:117db924cf7c | 132 | /* Not supported in FIPS */ |
| wolfSSL | 15:117db924cf7c | 133 | } |
| wolfSSL | 15:117db924cf7c | 134 | #endif /* WOLFSSL_SHA384 || HAVE_AESGCM */ |
| wolfSSL | 15:117db924cf7c | 135 | |
| wolfSSL | 15:117db924cf7c | 136 | #else /* else build without fips, or for FIPS v2 */ |
| wolfSSL | 15:117db924cf7c | 137 | |
| wolfSSL | 15:117db924cf7c | 138 | #include <wolfssl/wolfcrypt/logging.h> |
| wolfSSL | 15:117db924cf7c | 139 | |
| wolfSSL | 15:117db924cf7c | 140 | #ifdef NO_INLINE |
| wolfSSL | 15:117db924cf7c | 141 | #include <wolfssl/wolfcrypt/misc.h> |
| wolfSSL | 15:117db924cf7c | 142 | #else |
| wolfSSL | 15:117db924cf7c | 143 | #define WOLFSSL_MISC_INCLUDED |
| wolfSSL | 15:117db924cf7c | 144 | #include <wolfcrypt/src/misc.c> |
| wolfSSL | 15:117db924cf7c | 145 | #endif |
| wolfSSL | 15:117db924cf7c | 146 | |
| wolfSSL | 15:117db924cf7c | 147 | |
| wolfSSL | 15:117db924cf7c | 148 | #if defined(USE_INTEL_SPEEDUP) |
| wolfSSL | 15:117db924cf7c | 149 | #define HAVE_INTEL_AVX1 |
| wolfSSL | 15:117db924cf7c | 150 | |
| wolfSSL | 15:117db924cf7c | 151 | #if defined(__GNUC__) && ((__GNUC__ < 4) || \ |
| wolfSSL | 15:117db924cf7c | 152 | (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) |
| wolfSSL | 15:117db924cf7c | 153 | #define NO_AVX2_SUPPORT |
| wolfSSL | 15:117db924cf7c | 154 | #endif |
| wolfSSL | 15:117db924cf7c | 155 | #if defined(__clang__) && ((__clang_major__ < 3) || \ |
| wolfSSL | 15:117db924cf7c | 156 | (__clang_major__ == 3 && __clang_minor__ <= 5)) |
| wolfSSL | 15:117db924cf7c | 157 | #define NO_AVX2_SUPPORT |
| wolfSSL | 15:117db924cf7c | 158 | #elif defined(__clang__) && defined(NO_AVX2_SUPPORT) |
| wolfSSL | 15:117db924cf7c | 159 | #undef NO_AVX2_SUPPORT |
| wolfSSL | 15:117db924cf7c | 160 | #endif |
| wolfSSL | 15:117db924cf7c | 161 | |
| wolfSSL | 15:117db924cf7c | 162 | #define HAVE_INTEL_AVX1 |
| wolfSSL | 15:117db924cf7c | 163 | #ifndef NO_AVX2_SUPPORT |
| wolfSSL | 15:117db924cf7c | 164 | #define HAVE_INTEL_AVX2 |
| wolfSSL | 15:117db924cf7c | 165 | #endif |
| wolfSSL | 15:117db924cf7c | 166 | #endif |
| wolfSSL | 15:117db924cf7c | 167 | |
| wolfSSL | 15:117db924cf7c | 168 | #if defined(HAVE_INTEL_AVX1) |
| wolfSSL | 15:117db924cf7c | 169 | /* #define DEBUG_XMM */ |
| wolfSSL | 15:117db924cf7c | 170 | #endif |
| wolfSSL | 15:117db924cf7c | 171 | |
| wolfSSL | 15:117db924cf7c | 172 | #if defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 173 | #define HAVE_INTEL_RORX |
| wolfSSL | 15:117db924cf7c | 174 | /* #define DEBUG_YMM */ |
| wolfSSL | 15:117db924cf7c | 175 | #endif |
| wolfSSL | 15:117db924cf7c | 176 | |
| wolfSSL | 15:117db924cf7c | 177 | #if defined(HAVE_BYTEREVERSE64) && \ |
| wolfSSL | 15:117db924cf7c | 178 | !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 179 | #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size) |
| wolfSSL | 15:117db924cf7c | 180 | #define ByteReverseWords64_1(buf, size) \ |
| wolfSSL | 15:117db924cf7c | 181 | { unsigned int i ;\ |
| wolfSSL | 15:117db924cf7c | 182 | for(i=0; i< size/sizeof(word64); i++){\ |
| wolfSSL | 15:117db924cf7c | 183 | __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\ |
| wolfSSL | 15:117db924cf7c | 184 | }\ |
| wolfSSL | 15:117db924cf7c | 185 | } |
| wolfSSL | 15:117db924cf7c | 186 | #endif |
| wolfSSL | 15:117db924cf7c | 187 | |
| wolfSSL | 15:117db924cf7c | 188 | #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) |
| wolfSSL | 15:117db924cf7c | 189 | /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */ |
| wolfSSL | 15:117db924cf7c | 190 | #else |
| wolfSSL | 15:117db924cf7c | 191 | |
| wolfSSL | 15:117db924cf7c | 192 | #ifdef WOLFSSL_SHA512 |
| wolfSSL | 15:117db924cf7c | 193 | |
| wolfSSL | 15:117db924cf7c | 194 | static int InitSha512(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 195 | { |
| wolfSSL | 15:117db924cf7c | 196 | if (sha512 == NULL) |
| wolfSSL | 15:117db924cf7c | 197 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 198 | |
| wolfSSL | 15:117db924cf7c | 199 | sha512->digest[0] = W64LIT(0x6a09e667f3bcc908); |
| wolfSSL | 15:117db924cf7c | 200 | sha512->digest[1] = W64LIT(0xbb67ae8584caa73b); |
| wolfSSL | 15:117db924cf7c | 201 | sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b); |
| wolfSSL | 15:117db924cf7c | 202 | sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1); |
| wolfSSL | 15:117db924cf7c | 203 | sha512->digest[4] = W64LIT(0x510e527fade682d1); |
| wolfSSL | 15:117db924cf7c | 204 | sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f); |
| wolfSSL | 15:117db924cf7c | 205 | sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b); |
| wolfSSL | 15:117db924cf7c | 206 | sha512->digest[7] = W64LIT(0x5be0cd19137e2179); |
| wolfSSL | 15:117db924cf7c | 207 | |
| wolfSSL | 15:117db924cf7c | 208 | sha512->buffLen = 0; |
| wolfSSL | 15:117db924cf7c | 209 | sha512->loLen = 0; |
| wolfSSL | 15:117db924cf7c | 210 | sha512->hiLen = 0; |
| wolfSSL | 15:117db924cf7c | 211 | |
| wolfSSL | 15:117db924cf7c | 212 | return 0; |
| wolfSSL | 15:117db924cf7c | 213 | } |
| wolfSSL | 15:117db924cf7c | 214 | |
| wolfSSL | 15:117db924cf7c | 215 | #endif /* WOLFSSL_SHA512 */ |
| wolfSSL | 15:117db924cf7c | 216 | |
| wolfSSL | 15:117db924cf7c | 217 | /* Hardware Acceleration */ |
| wolfSSL | 15:117db924cf7c | 218 | #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 219 | |
| wolfSSL | 15:117db924cf7c | 220 | #ifdef WOLFSSL_SHA512 |
| wolfSSL | 15:117db924cf7c | 221 | |
| wolfSSL | 15:117db924cf7c | 222 | /***** |
| wolfSSL | 15:117db924cf7c | 223 | Intel AVX1/AVX2 Macro Control Structure |
| wolfSSL | 15:117db924cf7c | 224 | |
| wolfSSL | 15:117db924cf7c | 225 | #if defined(HAVE_INteL_SPEEDUP) |
| wolfSSL | 15:117db924cf7c | 226 | #define HAVE_INTEL_AVX1 |
| wolfSSL | 15:117db924cf7c | 227 | #define HAVE_INTEL_AVX2 |
| wolfSSL | 15:117db924cf7c | 228 | #endif |
| wolfSSL | 15:117db924cf7c | 229 | |
| wolfSSL | 15:117db924cf7c | 230 | int InitSha512(wc_Sha512* sha512) { |
| wolfSSL | 15:117db924cf7c | 231 | Save/Recover XMM, YMM |
| wolfSSL | 15:117db924cf7c | 232 | ... |
| wolfSSL | 15:117db924cf7c | 233 | |
| wolfSSL | 15:117db924cf7c | 234 | Check Intel AVX cpuid flags |
| wolfSSL | 15:117db924cf7c | 235 | } |
| wolfSSL | 15:117db924cf7c | 236 | |
| wolfSSL | 15:117db924cf7c | 237 | #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 238 | Transform_Sha512_AVX1(); # Function prototype |
| wolfSSL | 15:117db924cf7c | 239 | Transform_Sha512_AVX2(); # |
| wolfSSL | 15:117db924cf7c | 240 | #endif |
| wolfSSL | 15:117db924cf7c | 241 | |
| wolfSSL | 15:117db924cf7c | 242 | _Transform_Sha512() { # Native Transform Function body |
| wolfSSL | 15:117db924cf7c | 243 | |
| wolfSSL | 15:117db924cf7c | 244 | } |
| wolfSSL | 15:117db924cf7c | 245 | |
| wolfSSL | 15:117db924cf7c | 246 | int Sha512Update() { |
| wolfSSL | 15:117db924cf7c | 247 | Save/Recover XMM, YMM |
| wolfSSL | 15:117db924cf7c | 248 | ... |
| wolfSSL | 15:117db924cf7c | 249 | } |
| wolfSSL | 15:117db924cf7c | 250 | |
| wolfSSL | 15:117db924cf7c | 251 | int Sha512Final() { |
| wolfSSL | 15:117db924cf7c | 252 | Save/Recover XMM, YMM |
| wolfSSL | 15:117db924cf7c | 253 | ... |
| wolfSSL | 15:117db924cf7c | 254 | } |
| wolfSSL | 15:117db924cf7c | 255 | |
| wolfSSL | 15:117db924cf7c | 256 | |
| wolfSSL | 15:117db924cf7c | 257 | #if defined(HAVE_INTEL_AVX1) |
| wolfSSL | 15:117db924cf7c | 258 | |
| wolfSSL | 15:117db924cf7c | 259 | XMM Instructions/INLINE asm Definitions |
| wolfSSL | 15:117db924cf7c | 260 | |
| wolfSSL | 15:117db924cf7c | 261 | #endif |
| wolfSSL | 15:117db924cf7c | 262 | |
| wolfSSL | 15:117db924cf7c | 263 | #if defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 264 | |
| wolfSSL | 15:117db924cf7c | 265 | YMM Instructions/INLINE asm Definitions |
| wolfSSL | 15:117db924cf7c | 266 | |
| wolfSSL | 15:117db924cf7c | 267 | #endif |
| wolfSSL | 15:117db924cf7c | 268 | |
| wolfSSL | 15:117db924cf7c | 269 | #if defnied(HAVE_INTEL_AVX1) |
| wolfSSL | 15:117db924cf7c | 270 | |
| wolfSSL | 15:117db924cf7c | 271 | int Transform_Sha512_AVX1() { |
| wolfSSL | 15:117db924cf7c | 272 | Stitched Message Sched/Round |
| wolfSSL | 15:117db924cf7c | 273 | } |
| wolfSSL | 15:117db924cf7c | 274 | |
| wolfSSL | 15:117db924cf7c | 275 | #endif |
| wolfSSL | 15:117db924cf7c | 276 | |
| wolfSSL | 15:117db924cf7c | 277 | #if defnied(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 278 | |
| wolfSSL | 15:117db924cf7c | 279 | int Transform_Sha512_AVX2() { |
| wolfSSL | 15:117db924cf7c | 280 | Stitched Message Sched/Round |
| wolfSSL | 15:117db924cf7c | 281 | } |
| wolfSSL | 15:117db924cf7c | 282 | #endif |
| wolfSSL | 15:117db924cf7c | 283 | |
| wolfSSL | 15:117db924cf7c | 284 | */ |
| wolfSSL | 15:117db924cf7c | 285 | |
| wolfSSL | 15:117db924cf7c | 286 | |
| wolfSSL | 15:117db924cf7c | 287 | /* Each platform needs to query info type 1 from cpuid to see if aesni is |
| wolfSSL | 15:117db924cf7c | 288 | * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts |
| wolfSSL | 15:117db924cf7c | 289 | */ |
| wolfSSL | 15:117db924cf7c | 290 | |
| wolfSSL | 15:117db924cf7c | 291 | #if defined(HAVE_INTEL_AVX1) |
| wolfSSL | 15:117db924cf7c | 292 | static int Transform_Sha512_AVX1(wc_Sha512 *sha512); |
| wolfSSL | 15:117db924cf7c | 293 | static int Transform_Sha512_AVX1_Len(wc_Sha512 *sha512, word32 len); |
| wolfSSL | 15:117db924cf7c | 294 | #endif |
| wolfSSL | 15:117db924cf7c | 295 | #if defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 296 | static int Transform_Sha512_AVX2(wc_Sha512 *sha512); |
| wolfSSL | 15:117db924cf7c | 297 | static int Transform_Sha512_AVX2_Len(wc_Sha512 *sha512, word32 len); |
| wolfSSL | 15:117db924cf7c | 298 | #if defined(HAVE_INTEL_RORX) |
| wolfSSL | 15:117db924cf7c | 299 | static int Transform_Sha512_AVX1_RORX(wc_Sha512 *sha512); |
| wolfSSL | 15:117db924cf7c | 300 | static int Transform_Sha512_AVX1_RORX_Len(wc_Sha512 *sha512, |
| wolfSSL | 15:117db924cf7c | 301 | word32 len); |
| wolfSSL | 15:117db924cf7c | 302 | static int Transform_Sha512_AVX2_RORX(wc_Sha512 *sha512); |
| wolfSSL | 15:117db924cf7c | 303 | static int Transform_Sha512_AVX2_RORX_Len(wc_Sha512 *sha512, |
| wolfSSL | 15:117db924cf7c | 304 | word32 len); |
| wolfSSL | 15:117db924cf7c | 305 | #endif |
| wolfSSL | 15:117db924cf7c | 306 | #endif |
| wolfSSL | 15:117db924cf7c | 307 | static int _Transform_Sha512(wc_Sha512 *sha512); |
| wolfSSL | 15:117db924cf7c | 308 | static int (*Transform_Sha512_p)(wc_Sha512* sha512) = _Transform_Sha512; |
| wolfSSL | 15:117db924cf7c | 309 | static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL; |
| wolfSSL | 15:117db924cf7c | 310 | static int transform_check = 0; |
| wolfSSL | 15:117db924cf7c | 311 | static int intel_flags; |
| wolfSSL | 15:117db924cf7c | 312 | #define Transform_Sha512(sha512) (*Transform_Sha512_p)(sha512) |
| wolfSSL | 15:117db924cf7c | 313 | #define Transform_Sha512_Len(sha512, len) \ |
| wolfSSL | 15:117db924cf7c | 314 | (*Transform_Sha512_Len_p)(sha512, len) |
| wolfSSL | 15:117db924cf7c | 315 | |
| wolfSSL | 15:117db924cf7c | 316 | static void Sha512_SetTransform() |
| wolfSSL | 15:117db924cf7c | 317 | { |
| wolfSSL | 15:117db924cf7c | 318 | if (transform_check) |
| wolfSSL | 15:117db924cf7c | 319 | return; |
| wolfSSL | 15:117db924cf7c | 320 | |
| wolfSSL | 15:117db924cf7c | 321 | intel_flags = cpuid_get_flags(); |
| wolfSSL | 15:117db924cf7c | 322 | |
| wolfSSL | 15:117db924cf7c | 323 | #if defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 324 | if (IS_INTEL_AVX2(intel_flags)) { |
| wolfSSL | 15:117db924cf7c | 325 | #ifdef HAVE_INTEL_RORX |
| wolfSSL | 15:117db924cf7c | 326 | if (IS_INTEL_BMI2(intel_flags)) { |
| wolfSSL | 15:117db924cf7c | 327 | Transform_Sha512_p = Transform_Sha512_AVX2_RORX; |
| wolfSSL | 15:117db924cf7c | 328 | Transform_Sha512_Len_p = Transform_Sha512_AVX2_RORX_Len; |
| wolfSSL | 15:117db924cf7c | 329 | } |
| wolfSSL | 15:117db924cf7c | 330 | else |
| wolfSSL | 15:117db924cf7c | 331 | #endif |
| wolfSSL | 15:117db924cf7c | 332 | if (1) { |
| wolfSSL | 15:117db924cf7c | 333 | Transform_Sha512_p = Transform_Sha512_AVX2; |
| wolfSSL | 15:117db924cf7c | 334 | Transform_Sha512_Len_p = Transform_Sha512_AVX2_Len; |
| wolfSSL | 15:117db924cf7c | 335 | } |
| wolfSSL | 15:117db924cf7c | 336 | #ifdef HAVE_INTEL_RORX |
| wolfSSL | 15:117db924cf7c | 337 | else { |
| wolfSSL | 15:117db924cf7c | 338 | Transform_Sha512_p = Transform_Sha512_AVX1_RORX; |
| wolfSSL | 15:117db924cf7c | 339 | Transform_Sha512_Len_p = Transform_Sha512_AVX1_RORX_Len; |
| wolfSSL | 15:117db924cf7c | 340 | } |
| wolfSSL | 15:117db924cf7c | 341 | #endif |
| wolfSSL | 15:117db924cf7c | 342 | } |
| wolfSSL | 15:117db924cf7c | 343 | else |
| wolfSSL | 15:117db924cf7c | 344 | #endif |
| wolfSSL | 15:117db924cf7c | 345 | #if defined(HAVE_INTEL_AVX1) |
| wolfSSL | 15:117db924cf7c | 346 | if (IS_INTEL_AVX1(intel_flags)) { |
| wolfSSL | 15:117db924cf7c | 347 | Transform_Sha512_p = Transform_Sha512_AVX1; |
| wolfSSL | 15:117db924cf7c | 348 | Transform_Sha512_Len_p = Transform_Sha512_AVX1_Len; |
| wolfSSL | 15:117db924cf7c | 349 | } |
| wolfSSL | 15:117db924cf7c | 350 | else |
| wolfSSL | 15:117db924cf7c | 351 | #endif |
| wolfSSL | 15:117db924cf7c | 352 | Transform_Sha512_p = _Transform_Sha512; |
| wolfSSL | 15:117db924cf7c | 353 | |
| wolfSSL | 15:117db924cf7c | 354 | transform_check = 1; |
| wolfSSL | 15:117db924cf7c | 355 | } |
| wolfSSL | 15:117db924cf7c | 356 | |
| wolfSSL | 15:117db924cf7c | 357 | int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId) |
| wolfSSL | 15:117db924cf7c | 358 | { |
| wolfSSL | 15:117db924cf7c | 359 | int ret = InitSha512(sha512); |
| wolfSSL | 15:117db924cf7c | 360 | |
| wolfSSL | 15:117db924cf7c | 361 | (void)heap; |
| wolfSSL | 15:117db924cf7c | 362 | (void)devId; |
| wolfSSL | 15:117db924cf7c | 363 | |
| wolfSSL | 15:117db924cf7c | 364 | Sha512_SetTransform(); |
| wolfSSL | 15:117db924cf7c | 365 | |
| wolfSSL | 15:117db924cf7c | 366 | return ret; |
| wolfSSL | 15:117db924cf7c | 367 | } |
| wolfSSL | 15:117db924cf7c | 368 | |
| wolfSSL | 15:117db924cf7c | 369 | #endif /* WOLFSSL_SHA512 */ |
| wolfSSL | 15:117db924cf7c | 370 | |
| wolfSSL | 15:117db924cf7c | 371 | #else |
| wolfSSL | 15:117db924cf7c | 372 | #define Transform_Sha512(sha512) _Transform_Sha512(sha512) |
| wolfSSL | 15:117db924cf7c | 373 | |
| wolfSSL | 15:117db924cf7c | 374 | #ifdef WOLFSSL_SHA512 |
| wolfSSL | 15:117db924cf7c | 375 | |
| wolfSSL | 15:117db924cf7c | 376 | int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId) |
| wolfSSL | 15:117db924cf7c | 377 | { |
| wolfSSL | 15:117db924cf7c | 378 | int ret = 0; |
| wolfSSL | 15:117db924cf7c | 379 | |
| wolfSSL | 15:117db924cf7c | 380 | if (sha512 == NULL) |
| wolfSSL | 15:117db924cf7c | 381 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 382 | |
| wolfSSL | 15:117db924cf7c | 383 | sha512->heap = heap; |
| wolfSSL | 15:117db924cf7c | 384 | |
| wolfSSL | 15:117db924cf7c | 385 | ret = InitSha512(sha512); |
| wolfSSL | 15:117db924cf7c | 386 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 387 | return ret; |
| wolfSSL | 15:117db924cf7c | 388 | |
| wolfSSL | 15:117db924cf7c | 389 | #ifdef WOLFSSL_SMALL_STACK_CACHE |
| wolfSSL | 15:117db924cf7c | 390 | sha512->W = NULL; |
| wolfSSL | 15:117db924cf7c | 391 | #endif |
| wolfSSL | 15:117db924cf7c | 392 | |
| wolfSSL | 15:117db924cf7c | 393 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) |
| wolfSSL | 15:117db924cf7c | 394 | ret = wolfAsync_DevCtxInit(&sha512->asyncDev, |
| wolfSSL | 15:117db924cf7c | 395 | WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId); |
| wolfSSL | 15:117db924cf7c | 396 | #else |
| wolfSSL | 15:117db924cf7c | 397 | (void)devId; |
| wolfSSL | 15:117db924cf7c | 398 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 399 | |
| wolfSSL | 15:117db924cf7c | 400 | return ret; |
| wolfSSL | 15:117db924cf7c | 401 | } |
| wolfSSL | 15:117db924cf7c | 402 | |
| wolfSSL | 15:117db924cf7c | 403 | #endif /* WOLFSSL_SHA512 */ |
| wolfSSL | 15:117db924cf7c | 404 | |
| wolfSSL | 15:117db924cf7c | 405 | #endif /* Hardware Acceleration */ |
| wolfSSL | 15:117db924cf7c | 406 | |
| wolfSSL | 15:117db924cf7c | 407 | static const word64 K512[80] = { |
| wolfSSL | 15:117db924cf7c | 408 | W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), |
| wolfSSL | 15:117db924cf7c | 409 | W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), |
| wolfSSL | 15:117db924cf7c | 410 | W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), |
| wolfSSL | 15:117db924cf7c | 411 | W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), |
| wolfSSL | 15:117db924cf7c | 412 | W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), |
| wolfSSL | 15:117db924cf7c | 413 | W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), |
| wolfSSL | 15:117db924cf7c | 414 | W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), |
| wolfSSL | 15:117db924cf7c | 415 | W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), |
| wolfSSL | 15:117db924cf7c | 416 | W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), |
| wolfSSL | 15:117db924cf7c | 417 | W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), |
| wolfSSL | 15:117db924cf7c | 418 | W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), |
| wolfSSL | 15:117db924cf7c | 419 | W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), |
| wolfSSL | 15:117db924cf7c | 420 | W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), |
| wolfSSL | 15:117db924cf7c | 421 | W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), |
| wolfSSL | 15:117db924cf7c | 422 | W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), |
| wolfSSL | 15:117db924cf7c | 423 | W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), |
| wolfSSL | 15:117db924cf7c | 424 | W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), |
| wolfSSL | 15:117db924cf7c | 425 | W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), |
| wolfSSL | 15:117db924cf7c | 426 | W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), |
| wolfSSL | 15:117db924cf7c | 427 | W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), |
| wolfSSL | 15:117db924cf7c | 428 | W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), |
| wolfSSL | 15:117db924cf7c | 429 | W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), |
| wolfSSL | 15:117db924cf7c | 430 | W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), |
| wolfSSL | 15:117db924cf7c | 431 | W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), |
| wolfSSL | 15:117db924cf7c | 432 | W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), |
| wolfSSL | 15:117db924cf7c | 433 | W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), |
| wolfSSL | 15:117db924cf7c | 434 | W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), |
| wolfSSL | 15:117db924cf7c | 435 | W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), |
| wolfSSL | 15:117db924cf7c | 436 | W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), |
| wolfSSL | 15:117db924cf7c | 437 | W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), |
| wolfSSL | 15:117db924cf7c | 438 | W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), |
| wolfSSL | 15:117db924cf7c | 439 | W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), |
| wolfSSL | 15:117db924cf7c | 440 | W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), |
| wolfSSL | 15:117db924cf7c | 441 | W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), |
| wolfSSL | 15:117db924cf7c | 442 | W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), |
| wolfSSL | 15:117db924cf7c | 443 | W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), |
| wolfSSL | 15:117db924cf7c | 444 | W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), |
| wolfSSL | 15:117db924cf7c | 445 | W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), |
| wolfSSL | 15:117db924cf7c | 446 | W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), |
| wolfSSL | 15:117db924cf7c | 447 | W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) |
| wolfSSL | 15:117db924cf7c | 448 | }; |
| wolfSSL | 15:117db924cf7c | 449 | |
| wolfSSL | 15:117db924cf7c | 450 | #define blk0(i) (W[i] = sha512->buffer[i]) |
| wolfSSL | 15:117db924cf7c | 451 | |
| wolfSSL | 15:117db924cf7c | 452 | #define blk2(i) (\ |
| wolfSSL | 15:117db924cf7c | 453 | W[ i & 15] += \ |
| wolfSSL | 15:117db924cf7c | 454 | s1(W[(i-2) & 15])+ \ |
| wolfSSL | 15:117db924cf7c | 455 | W[(i-7) & 15] + \ |
| wolfSSL | 15:117db924cf7c | 456 | s0(W[(i-15) & 15]) \ |
| wolfSSL | 15:117db924cf7c | 457 | ) |
| wolfSSL | 15:117db924cf7c | 458 | |
| wolfSSL | 15:117db924cf7c | 459 | #define Ch(x,y,z) (z ^ (x & (y ^ z))) |
| wolfSSL | 15:117db924cf7c | 460 | #define Maj(x,y,z) ((x & y) | (z & (x | y))) |
| wolfSSL | 15:117db924cf7c | 461 | |
| wolfSSL | 15:117db924cf7c | 462 | #define a(i) T[(0-i) & 7] |
| wolfSSL | 15:117db924cf7c | 463 | #define b(i) T[(1-i) & 7] |
| wolfSSL | 15:117db924cf7c | 464 | #define c(i) T[(2-i) & 7] |
| wolfSSL | 15:117db924cf7c | 465 | #define d(i) T[(3-i) & 7] |
| wolfSSL | 15:117db924cf7c | 466 | #define e(i) T[(4-i) & 7] |
| wolfSSL | 15:117db924cf7c | 467 | #define f(i) T[(5-i) & 7] |
| wolfSSL | 15:117db924cf7c | 468 | #define g(i) T[(6-i) & 7] |
| wolfSSL | 15:117db924cf7c | 469 | #define h(i) T[(7-i) & 7] |
| wolfSSL | 15:117db924cf7c | 470 | |
| wolfSSL | 15:117db924cf7c | 471 | #define S0(x) (rotrFixed64(x,28) ^ rotrFixed64(x,34) ^ rotrFixed64(x,39)) |
| wolfSSL | 15:117db924cf7c | 472 | #define S1(x) (rotrFixed64(x,14) ^ rotrFixed64(x,18) ^ rotrFixed64(x,41)) |
| wolfSSL | 15:117db924cf7c | 473 | #define s0(x) (rotrFixed64(x,1) ^ rotrFixed64(x,8) ^ (x>>7)) |
| wolfSSL | 15:117db924cf7c | 474 | #define s1(x) (rotrFixed64(x,19) ^ rotrFixed64(x,61) ^ (x>>6)) |
| wolfSSL | 15:117db924cf7c | 475 | |
| wolfSSL | 15:117db924cf7c | 476 | #define R(i) \ |
| wolfSSL | 15:117db924cf7c | 477 | h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j ? blk2(i) : blk0(i)); \ |
| wolfSSL | 15:117db924cf7c | 478 | d(i) += h(i); \ |
| wolfSSL | 15:117db924cf7c | 479 | h(i) += S0(a(i)) + Maj(a(i),b(i),c(i)) |
| wolfSSL | 15:117db924cf7c | 480 | |
| wolfSSL | 15:117db924cf7c | 481 | static int _Transform_Sha512(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 482 | { |
| wolfSSL | 15:117db924cf7c | 483 | const word64* K = K512; |
| wolfSSL | 15:117db924cf7c | 484 | word32 j; |
| wolfSSL | 15:117db924cf7c | 485 | word64 T[8]; |
| wolfSSL | 15:117db924cf7c | 486 | |
| wolfSSL | 15:117db924cf7c | 487 | #ifdef WOLFSSL_SMALL_STACK_CACHE |
| wolfSSL | 15:117db924cf7c | 488 | word64* W = sha512->W; |
| wolfSSL | 15:117db924cf7c | 489 | if (W == NULL) { |
| wolfSSL | 15:117db924cf7c | 490 | W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, |
| wolfSSL | 15:117db924cf7c | 491 | DYNAMIC_TYPE_TMP_BUFFER); |
| wolfSSL | 15:117db924cf7c | 492 | if (W == NULL) |
| wolfSSL | 15:117db924cf7c | 493 | return MEMORY_E; |
| wolfSSL | 15:117db924cf7c | 494 | sha512->W = W; |
| wolfSSL | 15:117db924cf7c | 495 | } |
| wolfSSL | 15:117db924cf7c | 496 | #elif defined(WOLFSSL_SMALL_STACK) |
| wolfSSL | 15:117db924cf7c | 497 | word64* W; |
| wolfSSL | 15:117db924cf7c | 498 | W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); |
| wolfSSL | 15:117db924cf7c | 499 | if (W == NULL) |
| wolfSSL | 15:117db924cf7c | 500 | return MEMORY_E; |
| wolfSSL | 15:117db924cf7c | 501 | #else |
| wolfSSL | 15:117db924cf7c | 502 | word64 W[16]; |
| wolfSSL | 15:117db924cf7c | 503 | #endif |
| wolfSSL | 15:117db924cf7c | 504 | |
| wolfSSL | 15:117db924cf7c | 505 | /* Copy digest to working vars */ |
| wolfSSL | 15:117db924cf7c | 506 | XMEMCPY(T, sha512->digest, sizeof(T)); |
| wolfSSL | 15:117db924cf7c | 507 | |
| wolfSSL | 15:117db924cf7c | 508 | #ifdef USE_SLOW_SHA512 |
| wolfSSL | 15:117db924cf7c | 509 | /* over twice as small, but 50% slower */ |
| wolfSSL | 15:117db924cf7c | 510 | /* 80 operations, not unrolled */ |
| wolfSSL | 15:117db924cf7c | 511 | for (j = 0; j < 80; j += 16) { |
| wolfSSL | 15:117db924cf7c | 512 | int m; |
| wolfSSL | 15:117db924cf7c | 513 | for (m = 0; m < 16; m++) { /* braces needed here for macros {} */ |
| wolfSSL | 15:117db924cf7c | 514 | R(m); |
| wolfSSL | 15:117db924cf7c | 515 | } |
| wolfSSL | 15:117db924cf7c | 516 | } |
| wolfSSL | 15:117db924cf7c | 517 | #else |
| wolfSSL | 15:117db924cf7c | 518 | /* 80 operations, partially loop unrolled */ |
| wolfSSL | 15:117db924cf7c | 519 | for (j = 0; j < 80; j += 16) { |
| wolfSSL | 15:117db924cf7c | 520 | R( 0); R( 1); R( 2); R( 3); |
| wolfSSL | 15:117db924cf7c | 521 | R( 4); R( 5); R( 6); R( 7); |
| wolfSSL | 15:117db924cf7c | 522 | R( 8); R( 9); R(10); R(11); |
| wolfSSL | 15:117db924cf7c | 523 | R(12); R(13); R(14); R(15); |
| wolfSSL | 15:117db924cf7c | 524 | } |
| wolfSSL | 15:117db924cf7c | 525 | #endif /* USE_SLOW_SHA512 */ |
| wolfSSL | 15:117db924cf7c | 526 | |
| wolfSSL | 15:117db924cf7c | 527 | /* Add the working vars back into digest */ |
| wolfSSL | 15:117db924cf7c | 528 | sha512->digest[0] += a(0); |
| wolfSSL | 15:117db924cf7c | 529 | sha512->digest[1] += b(0); |
| wolfSSL | 15:117db924cf7c | 530 | sha512->digest[2] += c(0); |
| wolfSSL | 15:117db924cf7c | 531 | sha512->digest[3] += d(0); |
| wolfSSL | 15:117db924cf7c | 532 | sha512->digest[4] += e(0); |
| wolfSSL | 15:117db924cf7c | 533 | sha512->digest[5] += f(0); |
| wolfSSL | 15:117db924cf7c | 534 | sha512->digest[6] += g(0); |
| wolfSSL | 15:117db924cf7c | 535 | sha512->digest[7] += h(0); |
| wolfSSL | 15:117db924cf7c | 536 | |
| wolfSSL | 15:117db924cf7c | 537 | /* Wipe variables */ |
| wolfSSL | 15:117db924cf7c | 538 | ForceZero(W, sizeof(word64) * 16); |
| wolfSSL | 15:117db924cf7c | 539 | ForceZero(T, sizeof(T)); |
| wolfSSL | 15:117db924cf7c | 540 | |
| wolfSSL | 15:117db924cf7c | 541 | #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE) |
| wolfSSL | 15:117db924cf7c | 542 | XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER); |
| wolfSSL | 15:117db924cf7c | 543 | #endif |
| wolfSSL | 15:117db924cf7c | 544 | |
| wolfSSL | 15:117db924cf7c | 545 | return 0; |
| wolfSSL | 15:117db924cf7c | 546 | } |
| wolfSSL | 15:117db924cf7c | 547 | |
| wolfSSL | 15:117db924cf7c | 548 | |
| wolfSSL | 15:117db924cf7c | 549 | static WC_INLINE void AddLength(wc_Sha512* sha512, word32 len) |
| wolfSSL | 15:117db924cf7c | 550 | { |
| wolfSSL | 15:117db924cf7c | 551 | word64 tmp = sha512->loLen; |
| wolfSSL | 15:117db924cf7c | 552 | if ( (sha512->loLen += len) < tmp) |
| wolfSSL | 15:117db924cf7c | 553 | sha512->hiLen++; /* carry low to high */ |
| wolfSSL | 15:117db924cf7c | 554 | } |
| wolfSSL | 15:117db924cf7c | 555 | |
| wolfSSL | 15:117db924cf7c | 556 | static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 len) |
| wolfSSL | 15:117db924cf7c | 557 | { |
| wolfSSL | 15:117db924cf7c | 558 | int ret = 0; |
| wolfSSL | 15:117db924cf7c | 559 | /* do block size increments */ |
| wolfSSL | 15:117db924cf7c | 560 | byte* local = (byte*)sha512->buffer; |
| wolfSSL | 15:117db924cf7c | 561 | |
| wolfSSL | 15:117db924cf7c | 562 | /* check that internal buffLen is valid */ |
| wolfSSL | 15:117db924cf7c | 563 | if (sha512->buffLen >= WC_SHA512_BLOCK_SIZE) |
| wolfSSL | 15:117db924cf7c | 564 | return BUFFER_E; |
| wolfSSL | 15:117db924cf7c | 565 | |
| wolfSSL | 15:117db924cf7c | 566 | if (sha512->buffLen > 0) { |
| wolfSSL | 15:117db924cf7c | 567 | word32 add = min(len, WC_SHA512_BLOCK_SIZE - sha512->buffLen); |
| wolfSSL | 15:117db924cf7c | 568 | if (add > 0) { |
| wolfSSL | 15:117db924cf7c | 569 | XMEMCPY(&local[sha512->buffLen], data, add); |
| wolfSSL | 15:117db924cf7c | 570 | |
| wolfSSL | 15:117db924cf7c | 571 | sha512->buffLen += add; |
| wolfSSL | 15:117db924cf7c | 572 | data += add; |
| wolfSSL | 15:117db924cf7c | 573 | len -= add; |
| wolfSSL | 15:117db924cf7c | 574 | } |
| wolfSSL | 15:117db924cf7c | 575 | |
| wolfSSL | 15:117db924cf7c | 576 | if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) { |
| wolfSSL | 15:117db924cf7c | 577 | #if defined(LITTLE_ENDIAN_ORDER) |
| wolfSSL | 15:117db924cf7c | 578 | #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 579 | if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) |
| wolfSSL | 15:117db924cf7c | 580 | #endif |
| wolfSSL | 15:117db924cf7c | 581 | { |
| wolfSSL | 15:117db924cf7c | 582 | ByteReverseWords64(sha512->buffer, sha512->buffer, |
| wolfSSL | 15:117db924cf7c | 583 | WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 584 | } |
| wolfSSL | 15:117db924cf7c | 585 | #endif |
| wolfSSL | 15:117db924cf7c | 586 | ret = Transform_Sha512(sha512); |
| wolfSSL | 15:117db924cf7c | 587 | if (ret == 0) { |
| wolfSSL | 15:117db924cf7c | 588 | AddLength(sha512, WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 589 | sha512->buffLen = 0; |
| wolfSSL | 15:117db924cf7c | 590 | } |
| wolfSSL | 15:117db924cf7c | 591 | else |
| wolfSSL | 15:117db924cf7c | 592 | len = 0; |
| wolfSSL | 15:117db924cf7c | 593 | } |
| wolfSSL | 15:117db924cf7c | 594 | } |
| wolfSSL | 15:117db924cf7c | 595 | |
| wolfSSL | 15:117db924cf7c | 596 | #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 597 | if (Transform_Sha512_Len_p != NULL) { |
| wolfSSL | 15:117db924cf7c | 598 | word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1); |
| wolfSSL | 15:117db924cf7c | 599 | |
| wolfSSL | 15:117db924cf7c | 600 | if (blocksLen > 0) { |
| wolfSSL | 15:117db924cf7c | 601 | AddLength(sha512, blocksLen); |
| wolfSSL | 15:117db924cf7c | 602 | sha512->data = data; |
| wolfSSL | 15:117db924cf7c | 603 | /* Byte reversal performed in function if required. */ |
| wolfSSL | 15:117db924cf7c | 604 | Transform_Sha512_Len(sha512, blocksLen); |
| wolfSSL | 15:117db924cf7c | 605 | data += blocksLen; |
| wolfSSL | 15:117db924cf7c | 606 | len -= blocksLen; |
| wolfSSL | 15:117db924cf7c | 607 | } |
| wolfSSL | 15:117db924cf7c | 608 | } |
| wolfSSL | 15:117db924cf7c | 609 | else |
| wolfSSL | 15:117db924cf7c | 610 | #endif |
| wolfSSL | 15:117db924cf7c | 611 | #if !defined(LITTLE_ENDIAN_ORDER) || defined(FREESCALE_MMCAU_SHA) || \ |
| wolfSSL | 15:117db924cf7c | 612 | defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 613 | { |
| wolfSSL | 15:117db924cf7c | 614 | word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1); |
| wolfSSL | 15:117db924cf7c | 615 | |
| wolfSSL | 15:117db924cf7c | 616 | AddLength(sha512, blocksLen); |
| wolfSSL | 15:117db924cf7c | 617 | while (len >= WC_SHA512_BLOCK_SIZE) { |
| wolfSSL | 15:117db924cf7c | 618 | XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 619 | |
| wolfSSL | 15:117db924cf7c | 620 | data += WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 621 | len -= WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 622 | |
| wolfSSL | 15:117db924cf7c | 623 | /* Byte reversal performed in function if required. */ |
| wolfSSL | 15:117db924cf7c | 624 | ret = Transform_Sha512(sha512); |
| wolfSSL | 15:117db924cf7c | 625 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 626 | break; |
| wolfSSL | 15:117db924cf7c | 627 | } |
| wolfSSL | 15:117db924cf7c | 628 | } |
| wolfSSL | 15:117db924cf7c | 629 | #else |
| wolfSSL | 15:117db924cf7c | 630 | { |
| wolfSSL | 15:117db924cf7c | 631 | word32 blocksLen = len & ~(WC_SHA512_BLOCK_SIZE-1); |
| wolfSSL | 15:117db924cf7c | 632 | |
| wolfSSL | 15:117db924cf7c | 633 | AddLength(sha512, blocksLen); |
| wolfSSL | 15:117db924cf7c | 634 | while (len >= WC_SHA512_BLOCK_SIZE) { |
| wolfSSL | 15:117db924cf7c | 635 | XMEMCPY(local, data, WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 636 | |
| wolfSSL | 15:117db924cf7c | 637 | data += WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 638 | len -= WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 639 | |
| wolfSSL | 15:117db924cf7c | 640 | ByteReverseWords64(sha512->buffer, sha512->buffer, |
| wolfSSL | 15:117db924cf7c | 641 | WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 642 | ret = Transform_Sha512(sha512); |
| wolfSSL | 15:117db924cf7c | 643 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 644 | break; |
| wolfSSL | 15:117db924cf7c | 645 | } |
| wolfSSL | 15:117db924cf7c | 646 | } |
| wolfSSL | 15:117db924cf7c | 647 | #endif |
| wolfSSL | 15:117db924cf7c | 648 | |
| wolfSSL | 15:117db924cf7c | 649 | if (len > 0) { |
| wolfSSL | 15:117db924cf7c | 650 | XMEMCPY(local, data, len); |
| wolfSSL | 15:117db924cf7c | 651 | sha512->buffLen = len; |
| wolfSSL | 15:117db924cf7c | 652 | } |
| wolfSSL | 15:117db924cf7c | 653 | |
| wolfSSL | 15:117db924cf7c | 654 | return ret; |
| wolfSSL | 15:117db924cf7c | 655 | } |
| wolfSSL | 15:117db924cf7c | 656 | |
| wolfSSL | 15:117db924cf7c | 657 | #ifdef WOLFSSL_SHA512 |
| wolfSSL | 15:117db924cf7c | 658 | |
| wolfSSL | 15:117db924cf7c | 659 | int wc_Sha512Update(wc_Sha512* sha512, const byte* data, word32 len) |
| wolfSSL | 15:117db924cf7c | 660 | { |
| wolfSSL | 15:117db924cf7c | 661 | if (sha512 == NULL || (data == NULL && len > 0)) { |
| wolfSSL | 15:117db924cf7c | 662 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 663 | } |
| wolfSSL | 15:117db924cf7c | 664 | |
| wolfSSL | 15:117db924cf7c | 665 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) |
| wolfSSL | 15:117db924cf7c | 666 | if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) { |
| wolfSSL | 15:117db924cf7c | 667 | #if defined(HAVE_INTEL_QA) |
| wolfSSL | 15:117db924cf7c | 668 | return IntelQaSymSha512(&sha512->asyncDev, NULL, data, len); |
| wolfSSL | 15:117db924cf7c | 669 | #endif |
| wolfSSL | 15:117db924cf7c | 670 | } |
| wolfSSL | 15:117db924cf7c | 671 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 672 | |
| wolfSSL | 15:117db924cf7c | 673 | return Sha512Update(sha512, data, len); |
| wolfSSL | 15:117db924cf7c | 674 | } |
| wolfSSL | 15:117db924cf7c | 675 | |
| wolfSSL | 15:117db924cf7c | 676 | #endif /* WOLFSSL_SHA512 */ |
| wolfSSL | 15:117db924cf7c | 677 | |
| wolfSSL | 15:117db924cf7c | 678 | #endif /* WOLFSSL_IMX6_CAAM */ |
| wolfSSL | 15:117db924cf7c | 679 | |
| wolfSSL | 15:117db924cf7c | 680 | static WC_INLINE int Sha512Final(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 681 | { |
| wolfSSL | 15:117db924cf7c | 682 | byte* local = (byte*)sha512->buffer; |
| wolfSSL | 15:117db924cf7c | 683 | int ret; |
| wolfSSL | 15:117db924cf7c | 684 | |
| wolfSSL | 15:117db924cf7c | 685 | if (sha512 == NULL) { |
| wolfSSL | 15:117db924cf7c | 686 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 687 | } |
| wolfSSL | 15:117db924cf7c | 688 | |
| wolfSSL | 15:117db924cf7c | 689 | AddLength(sha512, sha512->buffLen); /* before adding pads */ |
| wolfSSL | 15:117db924cf7c | 690 | |
| wolfSSL | 15:117db924cf7c | 691 | local[sha512->buffLen++] = 0x80; /* add 1 */ |
| wolfSSL | 15:117db924cf7c | 692 | |
| wolfSSL | 15:117db924cf7c | 693 | /* pad with zeros */ |
| wolfSSL | 15:117db924cf7c | 694 | if (sha512->buffLen > WC_SHA512_PAD_SIZE) { |
| wolfSSL | 15:117db924cf7c | 695 | XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_BLOCK_SIZE - sha512->buffLen); |
| wolfSSL | 15:117db924cf7c | 696 | sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen; |
| wolfSSL | 15:117db924cf7c | 697 | #if defined(LITTLE_ENDIAN_ORDER) |
| wolfSSL | 15:117db924cf7c | 698 | #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 699 | if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) |
| wolfSSL | 15:117db924cf7c | 700 | #endif |
| wolfSSL | 15:117db924cf7c | 701 | { |
| wolfSSL | 15:117db924cf7c | 702 | ByteReverseWords64(sha512->buffer,sha512->buffer, |
| wolfSSL | 15:117db924cf7c | 703 | WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 704 | } |
| wolfSSL | 15:117db924cf7c | 705 | #endif /* LITTLE_ENDIAN_ORDER */ |
| wolfSSL | 15:117db924cf7c | 706 | ret = Transform_Sha512(sha512); |
| wolfSSL | 15:117db924cf7c | 707 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 708 | return ret; |
| wolfSSL | 15:117db924cf7c | 709 | |
| wolfSSL | 15:117db924cf7c | 710 | sha512->buffLen = 0; |
| wolfSSL | 15:117db924cf7c | 711 | } |
| wolfSSL | 15:117db924cf7c | 712 | XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_PAD_SIZE - sha512->buffLen); |
| wolfSSL | 15:117db924cf7c | 713 | |
| wolfSSL | 15:117db924cf7c | 714 | /* put lengths in bits */ |
| wolfSSL | 15:117db924cf7c | 715 | sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) + |
| wolfSSL | 15:117db924cf7c | 716 | (sha512->hiLen << 3); |
| wolfSSL | 15:117db924cf7c | 717 | sha512->loLen = sha512->loLen << 3; |
| wolfSSL | 15:117db924cf7c | 718 | |
| wolfSSL | 15:117db924cf7c | 719 | /* store lengths */ |
| wolfSSL | 15:117db924cf7c | 720 | #if defined(LITTLE_ENDIAN_ORDER) |
| wolfSSL | 15:117db924cf7c | 721 | #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 722 | if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) |
| wolfSSL | 15:117db924cf7c | 723 | #endif |
| wolfSSL | 15:117db924cf7c | 724 | ByteReverseWords64(sha512->buffer, sha512->buffer, WC_SHA512_PAD_SIZE); |
| wolfSSL | 15:117db924cf7c | 725 | #endif |
| wolfSSL | 15:117db924cf7c | 726 | /* ! length ordering dependent on digest endian type ! */ |
| wolfSSL | 15:117db924cf7c | 727 | |
| wolfSSL | 15:117db924cf7c | 728 | sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen; |
| wolfSSL | 15:117db924cf7c | 729 | sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen; |
| wolfSSL | 15:117db924cf7c | 730 | #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 731 | if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) |
| wolfSSL | 15:117db924cf7c | 732 | ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), |
| wolfSSL | 15:117db924cf7c | 733 | &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), |
| wolfSSL | 15:117db924cf7c | 734 | WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE); |
| wolfSSL | 15:117db924cf7c | 735 | #endif |
| wolfSSL | 15:117db924cf7c | 736 | ret = Transform_Sha512(sha512); |
| wolfSSL | 15:117db924cf7c | 737 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 738 | return ret; |
| wolfSSL | 15:117db924cf7c | 739 | |
| wolfSSL | 15:117db924cf7c | 740 | #ifdef LITTLE_ENDIAN_ORDER |
| wolfSSL | 15:117db924cf7c | 741 | ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 742 | #endif |
| wolfSSL | 15:117db924cf7c | 743 | |
| wolfSSL | 15:117db924cf7c | 744 | return 0; |
| wolfSSL | 15:117db924cf7c | 745 | } |
| wolfSSL | 15:117db924cf7c | 746 | |
| wolfSSL | 15:117db924cf7c | 747 | #ifdef WOLFSSL_SHA512 |
| wolfSSL | 15:117db924cf7c | 748 | |
| wolfSSL | 15:117db924cf7c | 749 | int wc_Sha512FinalRaw(wc_Sha512* sha512, byte* hash) |
| wolfSSL | 15:117db924cf7c | 750 | { |
| wolfSSL | 15:117db924cf7c | 751 | #ifdef LITTLE_ENDIAN_ORDER |
| wolfSSL | 15:117db924cf7c | 752 | word64 digest[WC_SHA512_DIGEST_SIZE / sizeof(word64)]; |
| wolfSSL | 15:117db924cf7c | 753 | #endif |
| wolfSSL | 15:117db924cf7c | 754 | |
| wolfSSL | 15:117db924cf7c | 755 | if (sha512 == NULL || hash == NULL) { |
| wolfSSL | 15:117db924cf7c | 756 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 757 | } |
| wolfSSL | 15:117db924cf7c | 758 | |
| wolfSSL | 15:117db924cf7c | 759 | #ifdef LITTLE_ENDIAN_ORDER |
| wolfSSL | 15:117db924cf7c | 760 | ByteReverseWords64((word64*)digest, (word64*)sha512->digest, |
| wolfSSL | 15:117db924cf7c | 761 | WC_SHA512_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 762 | XMEMCPY(hash, digest, WC_SHA512_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 763 | #else |
| wolfSSL | 15:117db924cf7c | 764 | XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 765 | #endif |
| wolfSSL | 15:117db924cf7c | 766 | |
| wolfSSL | 15:117db924cf7c | 767 | return 0; |
| wolfSSL | 15:117db924cf7c | 768 | } |
| wolfSSL | 15:117db924cf7c | 769 | |
| wolfSSL | 15:117db924cf7c | 770 | int wc_Sha512Final(wc_Sha512* sha512, byte* hash) |
| wolfSSL | 15:117db924cf7c | 771 | { |
| wolfSSL | 15:117db924cf7c | 772 | int ret; |
| wolfSSL | 15:117db924cf7c | 773 | |
| wolfSSL | 15:117db924cf7c | 774 | if (sha512 == NULL || hash == NULL) { |
| wolfSSL | 15:117db924cf7c | 775 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 776 | } |
| wolfSSL | 15:117db924cf7c | 777 | |
| wolfSSL | 15:117db924cf7c | 778 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) |
| wolfSSL | 15:117db924cf7c | 779 | if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) { |
| wolfSSL | 15:117db924cf7c | 780 | #if defined(HAVE_INTEL_QA) |
| wolfSSL | 15:117db924cf7c | 781 | return IntelQaSymSha512(&sha512->asyncDev, hash, NULL, |
| wolfSSL | 15:117db924cf7c | 782 | WC_SHA512_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 783 | #endif |
| wolfSSL | 15:117db924cf7c | 784 | } |
| wolfSSL | 15:117db924cf7c | 785 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 786 | |
| wolfSSL | 15:117db924cf7c | 787 | ret = Sha512Final(sha512); |
| wolfSSL | 15:117db924cf7c | 788 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 789 | return ret; |
| wolfSSL | 15:117db924cf7c | 790 | |
| wolfSSL | 15:117db924cf7c | 791 | XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 792 | |
| wolfSSL | 15:117db924cf7c | 793 | return InitSha512(sha512); /* reset state */ |
| wolfSSL | 15:117db924cf7c | 794 | } |
| wolfSSL | 15:117db924cf7c | 795 | |
| wolfSSL | 15:117db924cf7c | 796 | |
| wolfSSL | 15:117db924cf7c | 797 | int wc_InitSha512(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 798 | { |
| wolfSSL | 15:117db924cf7c | 799 | return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID); |
| wolfSSL | 15:117db924cf7c | 800 | } |
| wolfSSL | 15:117db924cf7c | 801 | |
| wolfSSL | 15:117db924cf7c | 802 | void wc_Sha512Free(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 803 | { |
| wolfSSL | 15:117db924cf7c | 804 | if (sha512 == NULL) |
| wolfSSL | 15:117db924cf7c | 805 | return; |
| wolfSSL | 15:117db924cf7c | 806 | |
| wolfSSL | 15:117db924cf7c | 807 | #ifdef WOLFSSL_SMALL_STACK_CACHE |
| wolfSSL | 15:117db924cf7c | 808 | if (sha512->W != NULL) { |
| wolfSSL | 15:117db924cf7c | 809 | XFREE(sha512->W, NULL, DYNAMIC_TYPE_TMP_BUFFER); |
| wolfSSL | 15:117db924cf7c | 810 | sha512->W = NULL; |
| wolfSSL | 15:117db924cf7c | 811 | } |
| wolfSSL | 15:117db924cf7c | 812 | #endif |
| wolfSSL | 15:117db924cf7c | 813 | |
| wolfSSL | 15:117db924cf7c | 814 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) |
| wolfSSL | 15:117db924cf7c | 815 | wolfAsync_DevCtxFree(&sha512->asyncDev, WOLFSSL_ASYNC_MARKER_SHA512); |
| wolfSSL | 15:117db924cf7c | 816 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 817 | } |
| wolfSSL | 15:117db924cf7c | 818 | |
| wolfSSL | 15:117db924cf7c | 819 | |
| wolfSSL | 15:117db924cf7c | 820 | #if defined(HAVE_INTEL_AVX1) |
| wolfSSL | 15:117db924cf7c | 821 | |
| wolfSSL | 15:117db924cf7c | 822 | static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f }; |
| wolfSSL | 15:117db924cf7c | 823 | |
| wolfSSL | 15:117db924cf7c | 824 | #define W_0 xmm0 |
| wolfSSL | 15:117db924cf7c | 825 | #define W_2 xmm1 |
| wolfSSL | 15:117db924cf7c | 826 | #define W_4 xmm2 |
| wolfSSL | 15:117db924cf7c | 827 | #define W_6 xmm3 |
| wolfSSL | 15:117db924cf7c | 828 | #define W_8 xmm4 |
| wolfSSL | 15:117db924cf7c | 829 | #define W_10 xmm5 |
| wolfSSL | 15:117db924cf7c | 830 | #define W_12 xmm6 |
| wolfSSL | 15:117db924cf7c | 831 | #define W_14 xmm7 |
| wolfSSL | 15:117db924cf7c | 832 | |
| wolfSSL | 15:117db924cf7c | 833 | #define W_M15 xmm12 |
| wolfSSL | 15:117db924cf7c | 834 | #define W_M7 xmm13 |
| wolfSSL | 15:117db924cf7c | 835 | #define MASK xmm14 |
| wolfSSL | 15:117db924cf7c | 836 | |
| wolfSSL | 15:117db924cf7c | 837 | #define XTMP1 xmm8 |
| wolfSSL | 15:117db924cf7c | 838 | #define XTMP2 xmm9 |
| wolfSSL | 15:117db924cf7c | 839 | #define XTMP3 xmm10 |
| wolfSSL | 15:117db924cf7c | 840 | #define XTMP4 xmm11 |
| wolfSSL | 15:117db924cf7c | 841 | |
| wolfSSL | 15:117db924cf7c | 842 | #define XMM_REGS \ |
| wolfSSL | 15:117db924cf7c | 843 | "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \ |
| wolfSSL | 15:117db924cf7c | 844 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" |
| wolfSSL | 15:117db924cf7c | 845 | |
| wolfSSL | 15:117db924cf7c | 846 | #define _VPALIGNR(dest, src1, src2, bits) \ |
| wolfSSL | 15:117db924cf7c | 847 | "vpalignr $" #bits ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 848 | #define VPALIGNR(dest, src1, src2, bits) \ |
| wolfSSL | 15:117db924cf7c | 849 | _VPALIGNR(dest, src1, src2, bits) |
| wolfSSL | 15:117db924cf7c | 850 | |
| wolfSSL | 15:117db924cf7c | 851 | #define _V_SHIFT_R(dest, src, bits) \ |
| wolfSSL | 15:117db924cf7c | 852 | "vpsrlq $" #bits ", %%" #src ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 853 | #define V_SHIFT_R(dest, src, bits) \ |
| wolfSSL | 15:117db924cf7c | 854 | _V_SHIFT_R(dest, src, bits) |
| wolfSSL | 15:117db924cf7c | 855 | |
| wolfSSL | 15:117db924cf7c | 856 | #define _V_SHIFT_L(dest, src, bits) \ |
| wolfSSL | 15:117db924cf7c | 857 | "vpsllq $" #bits ", %%" #src ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 858 | #define V_SHIFT_L(dest, src, bits) \ |
| wolfSSL | 15:117db924cf7c | 859 | _V_SHIFT_L(dest, src, bits) |
| wolfSSL | 15:117db924cf7c | 860 | |
| wolfSSL | 15:117db924cf7c | 861 | #define _V_ADD(dest, src1, src2) \ |
| wolfSSL | 15:117db924cf7c | 862 | "vpaddq %%" #src1 ", %%" #src2 ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 863 | #define V_ADD(dest, src1, src2) \ |
| wolfSSL | 15:117db924cf7c | 864 | _V_ADD(dest, src1, src2) |
| wolfSSL | 15:117db924cf7c | 865 | |
| wolfSSL | 15:117db924cf7c | 866 | #define _V_XOR(dest, src1, src2) \ |
| wolfSSL | 15:117db924cf7c | 867 | "vpxor %%" #src1 ", %%" #src2 ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 868 | #define V_XOR(dest, src1, src2) \ |
| wolfSSL | 15:117db924cf7c | 869 | _V_XOR(dest, src1, src2) |
| wolfSSL | 15:117db924cf7c | 870 | |
| wolfSSL | 15:117db924cf7c | 871 | #define _V_OR(dest, src1, src2) \ |
| wolfSSL | 15:117db924cf7c | 872 | "vpor %%" #src1 ", %%" #src2 ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 873 | #define V_OR(dest, src1, src2) \ |
| wolfSSL | 15:117db924cf7c | 874 | _V_OR(dest, src1, src2) |
| wolfSSL | 15:117db924cf7c | 875 | |
| wolfSSL | 15:117db924cf7c | 876 | #define RA %%r8 |
| wolfSSL | 15:117db924cf7c | 877 | #define RB %%r9 |
| wolfSSL | 15:117db924cf7c | 878 | #define RC %%r10 |
| wolfSSL | 15:117db924cf7c | 879 | #define RD %%r11 |
| wolfSSL | 15:117db924cf7c | 880 | #define RE %%r12 |
| wolfSSL | 15:117db924cf7c | 881 | #define RF %%r13 |
| wolfSSL | 15:117db924cf7c | 882 | #define RG %%r14 |
| wolfSSL | 15:117db924cf7c | 883 | #define RH %%r15 |
| wolfSSL | 15:117db924cf7c | 884 | |
| wolfSSL | 15:117db924cf7c | 885 | #define STATE_REGS "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" |
| wolfSSL | 15:117db924cf7c | 886 | |
| wolfSSL | 15:117db924cf7c | 887 | #define L1 "%%rax" |
| wolfSSL | 15:117db924cf7c | 888 | #define L2 "%%rcx" |
| wolfSSL | 15:117db924cf7c | 889 | #define L3 "%%rdx" |
| wolfSSL | 15:117db924cf7c | 890 | #define L4 "%%rbx" |
| wolfSSL | 15:117db924cf7c | 891 | #define WX "%%rsp" |
| wolfSSL | 15:117db924cf7c | 892 | |
| wolfSSL | 15:117db924cf7c | 893 | #define WORK_REGS "rax", "rbx", "rcx", "rdx" |
| wolfSSL | 15:117db924cf7c | 894 | |
| wolfSSL | 15:117db924cf7c | 895 | #define RND_0_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 896 | /* L1 = e >>> 23 */ \ |
| wolfSSL | 15:117db924cf7c | 897 | "rorq $23, " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 898 | |
| wolfSSL | 15:117db924cf7c | 899 | #define RND_0_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 900 | /* L3 = a */ \ |
| wolfSSL | 15:117db924cf7c | 901 | "movq "#a", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 902 | /* L2 = f */ \ |
| wolfSSL | 15:117db924cf7c | 903 | "movq "#f", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 904 | /* h += W_X[i] */ \ |
| wolfSSL | 15:117db924cf7c | 905 | "addq ("#i")*8(" WX "), "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 906 | /* L2 = f ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 907 | "xorq "#g", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 908 | |
| wolfSSL | 15:117db924cf7c | 909 | #define RND_0_2_A(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 910 | /* L3 = a */ \ |
| wolfSSL | 15:117db924cf7c | 911 | "movq "#a", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 912 | /* L2 = f */ \ |
| wolfSSL | 15:117db924cf7c | 913 | "movq "#f", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 914 | |
| wolfSSL | 15:117db924cf7c | 915 | #define RND_0_2_B(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 916 | /* h += W_X[i] */ \ |
| wolfSSL | 15:117db924cf7c | 917 | "addq ("#i")*8(" WX "), "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 918 | /* L2 = f ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 919 | "xorq "#g", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 920 | |
| wolfSSL | 15:117db924cf7c | 921 | #define RND_0_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 922 | /* L1 = (e >>> 23) ^ e */ \ |
| wolfSSL | 15:117db924cf7c | 923 | "xorq "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 924 | /* L2 = (f ^ g) & e */ \ |
| wolfSSL | 15:117db924cf7c | 925 | "andq "#e", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 926 | |
| wolfSSL | 15:117db924cf7c | 927 | #define RND_0_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 928 | /* L1 = ((e >>> 23) ^ e) >>> 4 */ \ |
| wolfSSL | 15:117db924cf7c | 929 | "rorq $4, " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 930 | /* L2 = ((f ^ g) & e) ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 931 | "xorq "#g", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 932 | |
| wolfSSL | 15:117db924cf7c | 933 | #define RND_0_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 934 | /* L1 = (((e >>> 23) ^ e) >>> 4) ^ e */ \ |
| wolfSSL | 15:117db924cf7c | 935 | "xorq "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 936 | /* h += Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 937 | "addq " L2 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 938 | |
| wolfSSL | 15:117db924cf7c | 939 | #define RND_0_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 940 | /* L1 = ((((e >>> 23) ^ e) >>> 4) ^ e) >>> 14 */ \ |
| wolfSSL | 15:117db924cf7c | 941 | "rorq $14, " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 942 | /* L3 = a ^ b */ \ |
| wolfSSL | 15:117db924cf7c | 943 | "xorq "#b", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 944 | |
| wolfSSL | 15:117db924cf7c | 945 | #define RND_0_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 946 | /* h += Sigma1(e) */ \ |
| wolfSSL | 15:117db924cf7c | 947 | "addq " L1 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 948 | /* L2 = a */ \ |
| wolfSSL | 15:117db924cf7c | 949 | "movq "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 950 | |
| wolfSSL | 15:117db924cf7c | 951 | #define RND_0_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 952 | /* L4 = (a ^ b) & (b ^ c) */ \ |
| wolfSSL | 15:117db924cf7c | 953 | "andq " L3 ", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 954 | /* L2 = a >>> 5 */ \ |
| wolfSSL | 15:117db924cf7c | 955 | "rorq $5, " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 956 | |
| wolfSSL | 15:117db924cf7c | 957 | #define RND_0_9(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 958 | /* L2 = (a >>> 5) ^ a */ \ |
| wolfSSL | 15:117db924cf7c | 959 | "xorq "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 960 | /* L4 = ((a ^ b) & (b ^ c) ^ b */ \ |
| wolfSSL | 15:117db924cf7c | 961 | "xorq "#b", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 962 | |
| wolfSSL | 15:117db924cf7c | 963 | #define RND_0_10(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 964 | /* L2 = ((a >>> 5) ^ a) >>> 6 */ \ |
| wolfSSL | 15:117db924cf7c | 965 | "rorq $6, " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 966 | /* d += h */ \ |
| wolfSSL | 15:117db924cf7c | 967 | "addq "#h", "#d"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 968 | |
| wolfSSL | 15:117db924cf7c | 969 | #define RND_0_11(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 970 | /* L2 = (((a >>> 5) ^ a) >>> 6) ^ a */ \ |
| wolfSSL | 15:117db924cf7c | 971 | "xorq "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 972 | /* h += Sigma0(a) */ \ |
| wolfSSL | 15:117db924cf7c | 973 | "addq " L4 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 974 | |
| wolfSSL | 15:117db924cf7c | 975 | #define RND_0_12(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 976 | /* L2 = ((((a >>> 5) ^ a) >>> 6) ^ a) >>> 28 */ \ |
| wolfSSL | 15:117db924cf7c | 977 | "rorq $28, " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 978 | /* d (= e next RND) */ \ |
| wolfSSL | 15:117db924cf7c | 979 | "movq "#d", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 980 | /* h += Maj(a,b,c) */ \ |
| wolfSSL | 15:117db924cf7c | 981 | "addq " L2 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 982 | |
| wolfSSL | 15:117db924cf7c | 983 | #define RND_1_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 984 | /* L1 = e >>> 23 */ \ |
| wolfSSL | 15:117db924cf7c | 985 | "rorq $23, " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 986 | |
| wolfSSL | 15:117db924cf7c | 987 | #define RND_1_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 988 | /* L4 = a */ \ |
| wolfSSL | 15:117db924cf7c | 989 | "movq "#a", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 990 | /* L2 = f */ \ |
| wolfSSL | 15:117db924cf7c | 991 | "movq "#f", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 992 | /* h += W_X[i] */ \ |
| wolfSSL | 15:117db924cf7c | 993 | "addq ("#i")*8(" WX "), "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 994 | /* L2 = f ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 995 | "xorq "#g", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 996 | |
| wolfSSL | 15:117db924cf7c | 997 | #define RND_1_2_A(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 998 | /* L4 = a */ \ |
| wolfSSL | 15:117db924cf7c | 999 | "movq "#a", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1000 | /* L2 = f */ \ |
| wolfSSL | 15:117db924cf7c | 1001 | "movq "#f", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1002 | |
| wolfSSL | 15:117db924cf7c | 1003 | #define RND_1_2_B(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1004 | /* h += W_X[i] */ \ |
| wolfSSL | 15:117db924cf7c | 1005 | "addq ("#i")*8(" WX "), "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1006 | /* L2 = f ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 1007 | "xorq "#g", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1008 | |
| wolfSSL | 15:117db924cf7c | 1009 | #define RND_1_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1010 | /* L1 = (e >>> 23) ^ e */ \ |
| wolfSSL | 15:117db924cf7c | 1011 | "xorq "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1012 | /* L2 = (f ^ g) & e */ \ |
| wolfSSL | 15:117db924cf7c | 1013 | "andq "#e", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1014 | |
| wolfSSL | 15:117db924cf7c | 1015 | #define RND_1_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1016 | /* ((e >>> 23) ^ e) >>> 4 */ \ |
| wolfSSL | 15:117db924cf7c | 1017 | "rorq $4, " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1018 | /* ((f ^ g) & e) ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 1019 | "xorq "#g", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1020 | |
| wolfSSL | 15:117db924cf7c | 1021 | #define RND_1_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1022 | /* (((e >>> 23) ^ e) >>> 4) ^ e */ \ |
| wolfSSL | 15:117db924cf7c | 1023 | "xorq "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1024 | /* h += Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 1025 | "addq " L2 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1026 | |
| wolfSSL | 15:117db924cf7c | 1027 | #define RND_1_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1028 | /* L1 = ((((e >>> 23) ^ e) >>> 4) ^ e) >>> 14 */ \ |
| wolfSSL | 15:117db924cf7c | 1029 | "rorq $14, " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1030 | /* L4 = a ^ b */ \ |
| wolfSSL | 15:117db924cf7c | 1031 | "xorq "#b", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1032 | |
| wolfSSL | 15:117db924cf7c | 1033 | #define RND_1_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1034 | /* h += Sigma1(e) */ \ |
| wolfSSL | 15:117db924cf7c | 1035 | "addq " L1 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1036 | /* L2 = a */ \ |
| wolfSSL | 15:117db924cf7c | 1037 | "movq "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1038 | |
| wolfSSL | 15:117db924cf7c | 1039 | #define RND_1_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1040 | /* L3 = (a ^ b) & (b ^ c) */ \ |
| wolfSSL | 15:117db924cf7c | 1041 | "andq " L4 ", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1042 | /* L2 = a >>> 5 */ \ |
| wolfSSL | 15:117db924cf7c | 1043 | "rorq $5, " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1044 | |
| wolfSSL | 15:117db924cf7c | 1045 | #define RND_1_9(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1046 | /* L2 = (a >>> 5) ^ a */ \ |
| wolfSSL | 15:117db924cf7c | 1047 | "xorq "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1048 | /* L3 = ((a ^ b) & (b ^ c) ^ b */ \ |
| wolfSSL | 15:117db924cf7c | 1049 | "xorq "#b", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1050 | |
| wolfSSL | 15:117db924cf7c | 1051 | #define RND_1_10(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1052 | /* L2 = ((a >>> 5) ^ a) >>> 6 */ \ |
| wolfSSL | 15:117db924cf7c | 1053 | "rorq $6, " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1054 | /* d += h */ \ |
| wolfSSL | 15:117db924cf7c | 1055 | "addq "#h", "#d"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1056 | |
| wolfSSL | 15:117db924cf7c | 1057 | #define RND_1_11(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1058 | /* L2 = (((a >>> 5) ^ a) >>> 6) ^ a */ \ |
| wolfSSL | 15:117db924cf7c | 1059 | "xorq "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1060 | /* h += Sigma0(a) */ \ |
| wolfSSL | 15:117db924cf7c | 1061 | "addq " L3 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1062 | |
| wolfSSL | 15:117db924cf7c | 1063 | #define RND_1_12(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1064 | /* L2 = ((((a >>> 5) ^ a) >>> 6) ^ a) >>> 28 */ \ |
| wolfSSL | 15:117db924cf7c | 1065 | "rorq $28, " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1066 | /* d (= e next RND) */ \ |
| wolfSSL | 15:117db924cf7c | 1067 | "movq "#d", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1068 | /* h += Maj(a,b,c) */ \ |
| wolfSSL | 15:117db924cf7c | 1069 | "addq " L2 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1070 | |
| wolfSSL | 15:117db924cf7c | 1071 | |
| wolfSSL | 15:117db924cf7c | 1072 | #define MsgSched2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1073 | RND_0_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1074 | VPALIGNR(W_M15, W_2, W_0, 8) \ |
| wolfSSL | 15:117db924cf7c | 1075 | VPALIGNR(W_M7, W_10, W_8, 8) \ |
| wolfSSL | 15:117db924cf7c | 1076 | RND_0_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1077 | V_SHIFT_R(XTMP1, W_M15, 1) \ |
| wolfSSL | 15:117db924cf7c | 1078 | V_SHIFT_L(XTMP2, W_M15, 63) \ |
| wolfSSL | 15:117db924cf7c | 1079 | RND_0_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1080 | RND_0_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1081 | V_SHIFT_R(XTMP3, W_M15, 8) \ |
| wolfSSL | 15:117db924cf7c | 1082 | V_SHIFT_L(XTMP4, W_M15, 56) \ |
| wolfSSL | 15:117db924cf7c | 1083 | RND_0_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1084 | RND_0_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1085 | V_OR(XTMP1, XTMP2, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1086 | V_OR(XTMP3, XTMP4, XTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1087 | RND_0_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1088 | RND_0_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1089 | V_SHIFT_R(XTMP4, W_M15, 7) \ |
| wolfSSL | 15:117db924cf7c | 1090 | V_XOR(XTMP1, XTMP3, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1091 | RND_0_9(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1092 | RND_0_10(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1093 | V_XOR(XTMP1, XTMP4, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1094 | V_ADD(W_0, W_0, W_M7) \ |
| wolfSSL | 15:117db924cf7c | 1095 | RND_0_11(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1096 | RND_0_12(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1097 | RND_1_1(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1098 | V_ADD(W_0, W_0, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1099 | RND_1_2(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1100 | V_SHIFT_R(XTMP1, W_14, 19) \ |
| wolfSSL | 15:117db924cf7c | 1101 | V_SHIFT_L(XTMP2, W_14, 45) \ |
| wolfSSL | 15:117db924cf7c | 1102 | RND_1_3(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1103 | RND_1_4(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1104 | V_SHIFT_R(XTMP3, W_14, 61) \ |
| wolfSSL | 15:117db924cf7c | 1105 | V_SHIFT_L(XTMP4, W_14, 3) \ |
| wolfSSL | 15:117db924cf7c | 1106 | RND_1_5(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1107 | RND_1_6(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1108 | RND_1_7(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1109 | V_OR(XTMP1, XTMP2, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1110 | V_OR(XTMP3, XTMP4, XTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1111 | RND_1_8(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1112 | RND_1_9(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1113 | V_XOR(XTMP1, XTMP3, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1114 | V_SHIFT_R(XTMP4, W_14, 6) \ |
| wolfSSL | 15:117db924cf7c | 1115 | RND_1_10(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1116 | RND_1_11(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1117 | V_XOR(XTMP1, XTMP4, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1118 | RND_1_12(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1119 | V_ADD(W_0, W_0, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1120 | |
| wolfSSL | 15:117db924cf7c | 1121 | #define RND_ALL_2(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1122 | RND_0_1 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1123 | RND_0_2 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1124 | RND_0_3 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1125 | RND_0_4 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1126 | RND_0_5 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1127 | RND_0_6 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1128 | RND_0_7 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1129 | RND_0_8 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1130 | RND_0_9 (a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1131 | RND_0_10(a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1132 | RND_0_11(a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1133 | RND_0_12(a, b, c, d, e, f, g, h, i ) \ |
| wolfSSL | 15:117db924cf7c | 1134 | RND_1_1 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1135 | RND_1_2 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1136 | RND_1_3 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1137 | RND_1_4 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1138 | RND_1_5 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1139 | RND_1_6 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1140 | RND_1_7 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1141 | RND_1_8 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1142 | RND_1_9 (h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1143 | RND_1_10(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1144 | RND_1_11(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1145 | RND_1_12(h, a, b, c, d, e, f, g, i+1) |
| wolfSSL | 15:117db924cf7c | 1146 | |
| wolfSSL | 15:117db924cf7c | 1147 | |
| wolfSSL | 15:117db924cf7c | 1148 | #if defined(HAVE_INTEL_RORX) |
| wolfSSL | 15:117db924cf7c | 1149 | |
| wolfSSL | 15:117db924cf7c | 1150 | #define RND_RORX_0_1(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1151 | /* L1 = e>>>14 */ \ |
| wolfSSL | 15:117db924cf7c | 1152 | "rorxq $14, "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1153 | /* L2 = e>>>18 */ \ |
| wolfSSL | 15:117db924cf7c | 1154 | "rorxq $18, "#e", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1155 | /* Prev RND: h += Maj(a,b,c) */ \ |
| wolfSSL | 15:117db924cf7c | 1156 | "addq " L3 ", "#a"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1157 | |
| wolfSSL | 15:117db924cf7c | 1158 | #define RND_RORX_0_2(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1159 | /* h += w_k */ \ |
| wolfSSL | 15:117db924cf7c | 1160 | "addq ("#i")*8(" WX "), "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1161 | /* L3 = f */ \ |
| wolfSSL | 15:117db924cf7c | 1162 | "movq "#f", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1163 | /* L2 = (e>>>14) ^ (e>>>18) */ \ |
| wolfSSL | 15:117db924cf7c | 1164 | "xorq " L1 ", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1165 | |
| wolfSSL | 15:117db924cf7c | 1166 | #define RND_RORX_0_3(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1167 | /* L3 = f ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 1168 | "xorq "#g", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1169 | /* L1 = e>>>41 */ \ |
| wolfSSL | 15:117db924cf7c | 1170 | "rorxq $41, "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1171 | /* L1 = Sigma1(e) */ \ |
| wolfSSL | 15:117db924cf7c | 1172 | "xorq " L2 ", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1173 | |
| wolfSSL | 15:117db924cf7c | 1174 | #define RND_RORX_0_4(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1175 | /* L3 = (f ^ g) & e */ \ |
| wolfSSL | 15:117db924cf7c | 1176 | "andq "#e", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1177 | /* h += Sigma1(e) */ \ |
| wolfSSL | 15:117db924cf7c | 1178 | "addq " L1 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1179 | /* L1 = a>>>28 */ \ |
| wolfSSL | 15:117db924cf7c | 1180 | "rorxq $28, "#a", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1181 | |
| wolfSSL | 15:117db924cf7c | 1182 | #define RND_RORX_0_5(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1183 | /* L2 = a>>>34 */ \ |
| wolfSSL | 15:117db924cf7c | 1184 | "rorxq $34, "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1185 | /* L3 = Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 1186 | "xorq "#g", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1187 | /* L2 = (a>>>28) ^ (a>>>34) */ \ |
| wolfSSL | 15:117db924cf7c | 1188 | "xorq " L1 ", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1189 | |
| wolfSSL | 15:117db924cf7c | 1190 | #define RND_RORX_0_6(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1191 | /* L1 = a>>>39 */ \ |
| wolfSSL | 15:117db924cf7c | 1192 | "rorxq $39, "#a", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1193 | /* h += Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 1194 | "addq " L3 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1195 | /* L1 = Sigma0(a) */ \ |
| wolfSSL | 15:117db924cf7c | 1196 | "xorq " L2 ", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1197 | |
| wolfSSL | 15:117db924cf7c | 1198 | #define RND_RORX_0_7(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1199 | /* L3 = b */ \ |
| wolfSSL | 15:117db924cf7c | 1200 | "movq "#b", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1201 | /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 1202 | "addq "#h", "#d"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1203 | /* L3 = a ^ b */ \ |
| wolfSSL | 15:117db924cf7c | 1204 | "xorq "#a", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1205 | |
| wolfSSL | 15:117db924cf7c | 1206 | #define RND_RORX_0_8(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1207 | /* L4 = (a ^ b) & (b ^ c) */ \ |
| wolfSSL | 15:117db924cf7c | 1208 | "andq " L3 ", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1209 | /* h += Sigma0(a) */ \ |
| wolfSSL | 15:117db924cf7c | 1210 | "addq " L1 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1211 | /* L4 = Maj(a,b,c) */ \ |
| wolfSSL | 15:117db924cf7c | 1212 | "xorq "#b", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1213 | |
| wolfSSL | 15:117db924cf7c | 1214 | #define RND_RORX_1_1(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1215 | /* L1 = e>>>14 */ \ |
| wolfSSL | 15:117db924cf7c | 1216 | "rorxq $14, "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1217 | /* L2 = e>>>18 */ \ |
| wolfSSL | 15:117db924cf7c | 1218 | "rorxq $18, "#e", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1219 | /* Prev RND: h += Maj(a,b,c) */ \ |
| wolfSSL | 15:117db924cf7c | 1220 | "addq " L4 ", "#a"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1221 | |
| wolfSSL | 15:117db924cf7c | 1222 | #define RND_RORX_1_2(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1223 | /* h += w_k */ \ |
| wolfSSL | 15:117db924cf7c | 1224 | "addq ("#i")*8(" WX "), "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1225 | /* L4 = f */ \ |
| wolfSSL | 15:117db924cf7c | 1226 | "movq "#f", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1227 | /* L2 = (e>>>14) ^ (e>>>18) */ \ |
| wolfSSL | 15:117db924cf7c | 1228 | "xorq " L1 ", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1229 | |
| wolfSSL | 15:117db924cf7c | 1230 | #define RND_RORX_1_3(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1231 | /* L4 = f ^ g */ \ |
| wolfSSL | 15:117db924cf7c | 1232 | "xorq "#g", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1233 | /* L1 = e>>>41 */ \ |
| wolfSSL | 15:117db924cf7c | 1234 | "rorxq $41, "#e", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1235 | /* L1 = Sigma1(e) */ \ |
| wolfSSL | 15:117db924cf7c | 1236 | "xorq " L2 ", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1237 | |
| wolfSSL | 15:117db924cf7c | 1238 | #define RND_RORX_1_4(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1239 | /* L4 = (f ^ g) & e */ \ |
| wolfSSL | 15:117db924cf7c | 1240 | "andq "#e", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1241 | /* h += Sigma1(e) */ \ |
| wolfSSL | 15:117db924cf7c | 1242 | "addq " L1 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1243 | /* L1 = a>>>28 */ \ |
| wolfSSL | 15:117db924cf7c | 1244 | "rorxq $28, "#a", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1245 | |
| wolfSSL | 15:117db924cf7c | 1246 | #define RND_RORX_1_5(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1247 | /* L2 = a>>>34 */ \ |
| wolfSSL | 15:117db924cf7c | 1248 | "rorxq $34, "#a", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1249 | /* L4 = Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 1250 | "xorq "#g", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1251 | /* L2 = (a>>>28) ^ (a>>>34) */ \ |
| wolfSSL | 15:117db924cf7c | 1252 | "xorq " L1 ", " L2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1253 | |
| wolfSSL | 15:117db924cf7c | 1254 | #define RND_RORX_1_6(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1255 | /* L1 = a>>>39 */ \ |
| wolfSSL | 15:117db924cf7c | 1256 | "rorxq $39, "#a", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1257 | /* h += Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 1258 | "addq " L4 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1259 | /* L1 = Sigma0(a) */ \ |
| wolfSSL | 15:117db924cf7c | 1260 | "xorq " L2 ", " L1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1261 | |
| wolfSSL | 15:117db924cf7c | 1262 | #define RND_RORX_1_7(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1263 | /* L4 = b */ \ |
| wolfSSL | 15:117db924cf7c | 1264 | "movq "#b", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1265 | /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \ |
| wolfSSL | 15:117db924cf7c | 1266 | "addq "#h", "#d"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1267 | /* L4 = a ^ b */ \ |
| wolfSSL | 15:117db924cf7c | 1268 | "xorq "#a", " L4 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1269 | |
| wolfSSL | 15:117db924cf7c | 1270 | #define RND_RORX_1_8(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1271 | /* L2 = (a ^ b) & (b ^ c) */ \ |
| wolfSSL | 15:117db924cf7c | 1272 | "andq " L4 ", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1273 | /* h += Sigma0(a) */ \ |
| wolfSSL | 15:117db924cf7c | 1274 | "addq " L1 ", "#h"\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1275 | /* L3 = Maj(a,b,c) */ \ |
| wolfSSL | 15:117db924cf7c | 1276 | "xorq "#b", " L3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1277 | |
| wolfSSL | 15:117db924cf7c | 1278 | #define RND_RORX_ALL_2(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1279 | RND_RORX_0_1(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1280 | RND_RORX_0_2(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1281 | RND_RORX_0_3(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1282 | RND_RORX_0_4(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1283 | RND_RORX_0_5(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1284 | RND_RORX_0_6(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1285 | RND_RORX_0_7(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1286 | RND_RORX_0_8(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1287 | RND_RORX_1_1(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1288 | RND_RORX_1_2(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1289 | RND_RORX_1_3(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1290 | RND_RORX_1_4(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1291 | RND_RORX_1_5(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1292 | RND_RORX_1_6(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1293 | RND_RORX_1_7(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1294 | RND_RORX_1_8(h, a, b, c, d, e, f, g, i+1) \ |
| wolfSSL | 15:117db924cf7c | 1295 | |
| wolfSSL | 15:117db924cf7c | 1296 | #define RND_RORX_ALL_4(a, b, c, d, e, f, g, h, i) \ |
| wolfSSL | 15:117db924cf7c | 1297 | RND_RORX_ALL_2(a, b, c, d, e, f, g, h, i+0) \ |
| wolfSSL | 15:117db924cf7c | 1298 | RND_RORX_ALL_2(g, h, a, b, c, d, e, f, i+2) |
| wolfSSL | 15:117db924cf7c | 1299 | |
| wolfSSL | 15:117db924cf7c | 1300 | #define MsgSched_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1301 | RND_RORX_0_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1302 | VPALIGNR(W_M15, W_2, W_0, 8) \ |
| wolfSSL | 15:117db924cf7c | 1303 | VPALIGNR(W_M7, W_10, W_8, 8) \ |
| wolfSSL | 15:117db924cf7c | 1304 | RND_RORX_0_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1305 | V_SHIFT_R(XTMP1, W_M15, 1) \ |
| wolfSSL | 15:117db924cf7c | 1306 | V_SHIFT_L(XTMP2, W_M15, 63) \ |
| wolfSSL | 15:117db924cf7c | 1307 | RND_RORX_0_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1308 | V_SHIFT_R(XTMP3, W_M15, 8) \ |
| wolfSSL | 15:117db924cf7c | 1309 | V_SHIFT_L(XTMP4, W_M15, 56) \ |
| wolfSSL | 15:117db924cf7c | 1310 | RND_RORX_0_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1311 | V_OR(XTMP1, XTMP2, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1312 | V_OR(XTMP3, XTMP4, XTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1313 | RND_RORX_0_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1314 | V_SHIFT_R(XTMP4, W_M15, 7) \ |
| wolfSSL | 15:117db924cf7c | 1315 | V_XOR(XTMP1, XTMP3, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1316 | RND_RORX_0_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1317 | V_XOR(XTMP1, XTMP4, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1318 | V_ADD(W_0, W_0, W_M7) \ |
| wolfSSL | 15:117db924cf7c | 1319 | RND_RORX_0_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1320 | RND_RORX_0_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1321 | V_ADD(W_0, W_0, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1322 | RND_RORX_1_1(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1323 | V_SHIFT_R(XTMP1, W_14, 19) \ |
| wolfSSL | 15:117db924cf7c | 1324 | V_SHIFT_L(XTMP2, W_14, 45) \ |
| wolfSSL | 15:117db924cf7c | 1325 | RND_RORX_1_2(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1326 | V_SHIFT_R(XTMP3, W_14, 61) \ |
| wolfSSL | 15:117db924cf7c | 1327 | V_SHIFT_L(XTMP4, W_14, 3) \ |
| wolfSSL | 15:117db924cf7c | 1328 | RND_RORX_1_3(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1329 | V_OR(XTMP1, XTMP2, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1330 | V_OR(XTMP3, XTMP4, XTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1331 | RND_RORX_1_4(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1332 | RND_RORX_1_5(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1333 | V_XOR(XTMP1, XTMP3, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1334 | V_SHIFT_R(XTMP4, W_14, 6) \ |
| wolfSSL | 15:117db924cf7c | 1335 | RND_RORX_1_6(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1336 | RND_RORX_1_7(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1337 | V_XOR(XTMP1, XTMP4, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1338 | RND_RORX_1_8(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1339 | V_ADD(W_0, W_0, XTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1340 | |
| wolfSSL | 15:117db924cf7c | 1341 | #endif |
| wolfSSL | 15:117db924cf7c | 1342 | |
| wolfSSL | 15:117db924cf7c | 1343 | #define _INIT_MASK(mask) \ |
| wolfSSL | 15:117db924cf7c | 1344 | "vmovdqu %[mask], %%" #mask "\n\t" |
| wolfSSL | 15:117db924cf7c | 1345 | #define INIT_MASK(mask) \ |
| wolfSSL | 15:117db924cf7c | 1346 | _INIT_MASK(mask) |
| wolfSSL | 15:117db924cf7c | 1347 | |
| wolfSSL | 15:117db924cf7c | 1348 | #define _LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 1349 | "vmovdqu " #i1 "*16(%%" #reg "), %%" #xmm1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1350 | "vmovdqu " #i2 "*16(%%" #reg "), %%" #xmm2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1351 | "vpshufb %%" #mask ", %%" #xmm1 ", %%" #xmm1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1352 | "vpshufb %%" #mask ", %%" #xmm2 ", %%" #xmm2 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1353 | #define LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 1354 | _LOAD_W_2(i1, i2, xmm1, xmm2, mask, reg) |
| wolfSSL | 15:117db924cf7c | 1355 | |
| wolfSSL | 15:117db924cf7c | 1356 | #define LOAD_W(mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 1357 | /* X0..3(xmm4..7), W[0..15] = buffer[0.15]; */ \ |
| wolfSSL | 15:117db924cf7c | 1358 | LOAD_W_2(0, 1, W_0 , W_2 , mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 1359 | LOAD_W_2(2, 3, W_4 , W_6 , mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 1360 | LOAD_W_2(4, 5, W_8 , W_10, mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 1361 | LOAD_W_2(6, 7, W_12, W_14, mask, reg) |
| wolfSSL | 15:117db924cf7c | 1362 | |
| wolfSSL | 15:117db924cf7c | 1363 | #define _SET_W_X_2(xmm0, xmm1, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 1364 | "vpaddq " #i "+ 0(%%" #reg "), %%" #xmm0 ", %%xmm8\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1365 | "vpaddq " #i "+16(%%" #reg "), %%" #xmm1 ", %%xmm9\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1366 | "vmovdqu %%xmm8, " #i "+ 0(" WX ")\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1367 | "vmovdqu %%xmm9, " #i "+16(" WX ")\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1368 | |
| wolfSSL | 15:117db924cf7c | 1369 | #define SET_W_X_2(xmm0, xmm1, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 1370 | _SET_W_X_2(xmm0, xmm1, reg, i) |
| wolfSSL | 15:117db924cf7c | 1371 | |
| wolfSSL | 15:117db924cf7c | 1372 | #define SET_W_X(reg) \ |
| wolfSSL | 15:117db924cf7c | 1373 | SET_W_X_2(W_0 , W_2 , reg, 0) \ |
| wolfSSL | 15:117db924cf7c | 1374 | SET_W_X_2(W_4 , W_6 , reg, 32) \ |
| wolfSSL | 15:117db924cf7c | 1375 | SET_W_X_2(W_8 , W_10, reg, 64) \ |
| wolfSSL | 15:117db924cf7c | 1376 | SET_W_X_2(W_12, W_14, reg, 96) |
| wolfSSL | 15:117db924cf7c | 1377 | |
| wolfSSL | 15:117db924cf7c | 1378 | #define LOAD_DIGEST() \ |
| wolfSSL | 15:117db924cf7c | 1379 | "movq (%[sha512]), %%r8 \n\t" \ |
| wolfSSL | 15:117db924cf7c | 1380 | "movq 8(%[sha512]), %%r9 \n\t" \ |
| wolfSSL | 15:117db924cf7c | 1381 | "movq 16(%[sha512]), %%r10\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1382 | "movq 24(%[sha512]), %%r11\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1383 | "movq 32(%[sha512]), %%r12\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1384 | "movq 40(%[sha512]), %%r13\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1385 | "movq 48(%[sha512]), %%r14\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1386 | "movq 56(%[sha512]), %%r15\n\t" |
| wolfSSL | 15:117db924cf7c | 1387 | |
| wolfSSL | 15:117db924cf7c | 1388 | #define STORE_ADD_DIGEST() \ |
| wolfSSL | 15:117db924cf7c | 1389 | "addq %%r8, (%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1390 | "addq %%r9, 8(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1391 | "addq %%r10, 16(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1392 | "addq %%r11, 24(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1393 | "addq %%r12, 32(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1394 | "addq %%r13, 40(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1395 | "addq %%r14, 48(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1396 | "addq %%r15, 56(%[sha512])\n\t" |
| wolfSSL | 15:117db924cf7c | 1397 | |
| wolfSSL | 15:117db924cf7c | 1398 | #define ADD_DIGEST() \ |
| wolfSSL | 15:117db924cf7c | 1399 | "addq (%[sha512]), %%r8 \n\t" \ |
| wolfSSL | 15:117db924cf7c | 1400 | "addq 8(%[sha512]), %%r9 \n\t" \ |
| wolfSSL | 15:117db924cf7c | 1401 | "addq 16(%[sha512]), %%r10\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1402 | "addq 24(%[sha512]), %%r11\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1403 | "addq 32(%[sha512]), %%r12\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1404 | "addq 40(%[sha512]), %%r13\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1405 | "addq 48(%[sha512]), %%r14\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1406 | "addq 56(%[sha512]), %%r15\n\t" |
| wolfSSL | 15:117db924cf7c | 1407 | |
| wolfSSL | 15:117db924cf7c | 1408 | #define STORE_DIGEST() \ |
| wolfSSL | 15:117db924cf7c | 1409 | "movq %%r8, (%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1410 | "movq %%r9, 8(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1411 | "movq %%r10, 16(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1412 | "movq %%r11, 24(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1413 | "movq %%r12, 32(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1414 | "movq %%r13, 40(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1415 | "movq %%r14, 48(%[sha512])\n\t" \ |
| wolfSSL | 15:117db924cf7c | 1416 | "movq %%r15, 56(%[sha512])\n\t" |
| wolfSSL | 15:117db924cf7c | 1417 | |
| wolfSSL | 15:117db924cf7c | 1418 | #endif /* HAVE_INTEL_AVX1 */ |
| wolfSSL | 15:117db924cf7c | 1419 | |
| wolfSSL | 15:117db924cf7c | 1420 | |
| wolfSSL | 15:117db924cf7c | 1421 | /*** Transform Body ***/ |
| wolfSSL | 15:117db924cf7c | 1422 | #if defined(HAVE_INTEL_AVX1) |
| wolfSSL | 15:117db924cf7c | 1423 | static int Transform_Sha512_AVX1(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 1424 | { |
| wolfSSL | 15:117db924cf7c | 1425 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 1426 | |
| wolfSSL | 15:117db924cf7c | 1427 | /* 16 Ws plus loop counter. */ |
| wolfSSL | 15:117db924cf7c | 1428 | "subq $136, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1429 | "leaq 64(%[sha512]), %%rax\n\t" |
| wolfSSL | 15:117db924cf7c | 1430 | |
| wolfSSL | 15:117db924cf7c | 1431 | INIT_MASK(MASK) |
| wolfSSL | 15:117db924cf7c | 1432 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1433 | |
| wolfSSL | 15:117db924cf7c | 1434 | LOAD_W(MASK, rax) |
| wolfSSL | 15:117db924cf7c | 1435 | |
| wolfSSL | 15:117db924cf7c | 1436 | "movl $4, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1437 | "leaq %[K512], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1438 | /* b */ |
| wolfSSL | 15:117db924cf7c | 1439 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1440 | /* e */ |
| wolfSSL | 15:117db924cf7c | 1441 | "movq %%r12, " L1 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1442 | /* b ^ c */ |
| wolfSSL | 15:117db924cf7c | 1443 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1444 | |
| wolfSSL | 15:117db924cf7c | 1445 | "# Start of 16 rounds\n" |
| wolfSSL | 15:117db924cf7c | 1446 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 1447 | |
| wolfSSL | 15:117db924cf7c | 1448 | SET_W_X(rsi) |
| wolfSSL | 15:117db924cf7c | 1449 | |
| wolfSSL | 15:117db924cf7c | 1450 | "addq $128, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1451 | |
| wolfSSL | 15:117db924cf7c | 1452 | MsgSched2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1453 | MsgSched2(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1454 | MsgSched2(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1455 | MsgSched2(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1456 | MsgSched2(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1457 | MsgSched2(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1458 | MsgSched2(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1459 | MsgSched2(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1460 | |
| wolfSSL | 15:117db924cf7c | 1461 | "subl $1, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1462 | "jne 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 1463 | |
| wolfSSL | 15:117db924cf7c | 1464 | SET_W_X(rsi) |
| wolfSSL | 15:117db924cf7c | 1465 | |
| wolfSSL | 15:117db924cf7c | 1466 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1467 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1468 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1469 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1470 | |
| wolfSSL | 15:117db924cf7c | 1471 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1472 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1473 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1474 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1475 | |
| wolfSSL | 15:117db924cf7c | 1476 | STORE_ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1477 | |
| wolfSSL | 15:117db924cf7c | 1478 | "addq $136, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1479 | |
| wolfSSL | 15:117db924cf7c | 1480 | : |
| wolfSSL | 15:117db924cf7c | 1481 | : [mask] "m" (mBYTE_FLIP_MASK), |
| wolfSSL | 15:117db924cf7c | 1482 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 1483 | [K512] "m" (K512) |
| wolfSSL | 15:117db924cf7c | 1484 | : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 1485 | ); |
| wolfSSL | 15:117db924cf7c | 1486 | |
| wolfSSL | 15:117db924cf7c | 1487 | return 0; |
| wolfSSL | 15:117db924cf7c | 1488 | } |
| wolfSSL | 15:117db924cf7c | 1489 | |
| wolfSSL | 15:117db924cf7c | 1490 | static int Transform_Sha512_AVX1_Len(wc_Sha512* sha512, word32 len) |
| wolfSSL | 15:117db924cf7c | 1491 | { |
| wolfSSL | 15:117db924cf7c | 1492 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 1493 | |
| wolfSSL | 15:117db924cf7c | 1494 | "movq 224(%[sha512]), %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1495 | "leaq %[K512], %%rdx\n\t" |
| wolfSSL | 15:117db924cf7c | 1496 | |
| wolfSSL | 15:117db924cf7c | 1497 | INIT_MASK(MASK) |
| wolfSSL | 15:117db924cf7c | 1498 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1499 | |
| wolfSSL | 15:117db924cf7c | 1500 | "# Start of processing a block\n" |
| wolfSSL | 15:117db924cf7c | 1501 | "2:\n\t" |
| wolfSSL | 15:117db924cf7c | 1502 | |
| wolfSSL | 15:117db924cf7c | 1503 | /* 16 Ws plus loop counter and K512. len goes into -4(%rsp). |
| wolfSSL | 15:117db924cf7c | 1504 | * Debug needs more stack space. */ |
| wolfSSL | 15:117db924cf7c | 1505 | "subq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1506 | |
| wolfSSL | 15:117db924cf7c | 1507 | LOAD_W(MASK, rsi) |
| wolfSSL | 15:117db924cf7c | 1508 | |
| wolfSSL | 15:117db924cf7c | 1509 | "movl $4, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1510 | /* b */ |
| wolfSSL | 15:117db924cf7c | 1511 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1512 | /* e */ |
| wolfSSL | 15:117db924cf7c | 1513 | "movq %%r12, " L1 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1514 | /* b ^ c */ |
| wolfSSL | 15:117db924cf7c | 1515 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1516 | |
| wolfSSL | 15:117db924cf7c | 1517 | SET_W_X(rdx) |
| wolfSSL | 15:117db924cf7c | 1518 | |
| wolfSSL | 15:117db924cf7c | 1519 | "# Start of 16 rounds\n" |
| wolfSSL | 15:117db924cf7c | 1520 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 1521 | |
| wolfSSL | 15:117db924cf7c | 1522 | "addq $128, %%rdx\n\t" |
| wolfSSL | 15:117db924cf7c | 1523 | "movq %%rdx, 17*8(%%rsp)\n\t" |
| wolfSSL | 15:117db924cf7c | 1524 | |
| wolfSSL | 15:117db924cf7c | 1525 | MsgSched2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1526 | MsgSched2(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1527 | MsgSched2(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1528 | MsgSched2(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1529 | MsgSched2(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1530 | MsgSched2(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1531 | MsgSched2(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1532 | MsgSched2(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1533 | |
| wolfSSL | 15:117db924cf7c | 1534 | "movq 17*8(%%rsp), %%rdx\n\t" |
| wolfSSL | 15:117db924cf7c | 1535 | |
| wolfSSL | 15:117db924cf7c | 1536 | SET_W_X(rdx) |
| wolfSSL | 15:117db924cf7c | 1537 | |
| wolfSSL | 15:117db924cf7c | 1538 | "subl $1, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1539 | "jne 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 1540 | |
| wolfSSL | 15:117db924cf7c | 1541 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1542 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1543 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1544 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1545 | |
| wolfSSL | 15:117db924cf7c | 1546 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1547 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1548 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1549 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1550 | |
| wolfSSL | 15:117db924cf7c | 1551 | ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1552 | |
| wolfSSL | 15:117db924cf7c | 1553 | "addq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1554 | "leaq %[K512], %%rdx\n\t" |
| wolfSSL | 15:117db924cf7c | 1555 | "addq $128, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1556 | "subl $128, %[len]\n\t" |
| wolfSSL | 15:117db924cf7c | 1557 | |
| wolfSSL | 15:117db924cf7c | 1558 | STORE_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1559 | |
| wolfSSL | 15:117db924cf7c | 1560 | "jnz 2b\n\t" |
| wolfSSL | 15:117db924cf7c | 1561 | |
| wolfSSL | 15:117db924cf7c | 1562 | : |
| wolfSSL | 15:117db924cf7c | 1563 | : [mask] "m" (mBYTE_FLIP_MASK), |
| wolfSSL | 15:117db924cf7c | 1564 | [len] "m" (len), |
| wolfSSL | 15:117db924cf7c | 1565 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 1566 | [K512] "m" (K512) |
| wolfSSL | 15:117db924cf7c | 1567 | : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 1568 | ); |
| wolfSSL | 15:117db924cf7c | 1569 | |
| wolfSSL | 15:117db924cf7c | 1570 | return 0; |
| wolfSSL | 15:117db924cf7c | 1571 | } |
| wolfSSL | 15:117db924cf7c | 1572 | #endif /* HAVE_INTEL_AVX1 */ |
| wolfSSL | 15:117db924cf7c | 1573 | |
| wolfSSL | 15:117db924cf7c | 1574 | #if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX) |
| wolfSSL | 15:117db924cf7c | 1575 | static int Transform_Sha512_AVX1_RORX(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 1576 | { |
| wolfSSL | 15:117db924cf7c | 1577 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 1578 | |
| wolfSSL | 15:117db924cf7c | 1579 | /* 16 Ws plus loop counter and K512. */ |
| wolfSSL | 15:117db924cf7c | 1580 | "subq $144, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1581 | "leaq 64(%[sha512]), %%rax\n\t" |
| wolfSSL | 15:117db924cf7c | 1582 | |
| wolfSSL | 15:117db924cf7c | 1583 | INIT_MASK(MASK) |
| wolfSSL | 15:117db924cf7c | 1584 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1585 | |
| wolfSSL | 15:117db924cf7c | 1586 | LOAD_W(MASK, rax) |
| wolfSSL | 15:117db924cf7c | 1587 | |
| wolfSSL | 15:117db924cf7c | 1588 | "movl $4, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1589 | "leaq %[K512], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1590 | /* L4 = b */ |
| wolfSSL | 15:117db924cf7c | 1591 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1592 | /* L3 = 0 (add to prev h) */ |
| wolfSSL | 15:117db924cf7c | 1593 | "xorq " L3 ", " L3 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1594 | /* L4 = b ^ c */ |
| wolfSSL | 15:117db924cf7c | 1595 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1596 | |
| wolfSSL | 15:117db924cf7c | 1597 | SET_W_X(rsi) |
| wolfSSL | 15:117db924cf7c | 1598 | |
| wolfSSL | 15:117db924cf7c | 1599 | "# Start of 16 rounds\n" |
| wolfSSL | 15:117db924cf7c | 1600 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 1601 | |
| wolfSSL | 15:117db924cf7c | 1602 | "addq $128, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1603 | |
| wolfSSL | 15:117db924cf7c | 1604 | MsgSched_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1605 | MsgSched_RORX(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1606 | MsgSched_RORX(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1607 | MsgSched_RORX(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1608 | MsgSched_RORX(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1609 | MsgSched_RORX(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1610 | MsgSched_RORX(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1611 | MsgSched_RORX(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1612 | |
| wolfSSL | 15:117db924cf7c | 1613 | SET_W_X(rsi) |
| wolfSSL | 15:117db924cf7c | 1614 | |
| wolfSSL | 15:117db924cf7c | 1615 | "subl $1, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1616 | "jne 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 1617 | |
| wolfSSL | 15:117db924cf7c | 1618 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1619 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1620 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1621 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1622 | |
| wolfSSL | 15:117db924cf7c | 1623 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1624 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1625 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1626 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1627 | |
| wolfSSL | 15:117db924cf7c | 1628 | /* Prev RND: h += Maj(a,b,c) */ |
| wolfSSL | 15:117db924cf7c | 1629 | "addq " L3 ", %%r8\n\t" |
| wolfSSL | 15:117db924cf7c | 1630 | "addq $144, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1631 | |
| wolfSSL | 15:117db924cf7c | 1632 | STORE_ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1633 | |
| wolfSSL | 15:117db924cf7c | 1634 | : |
| wolfSSL | 15:117db924cf7c | 1635 | : [mask] "m" (mBYTE_FLIP_MASK), |
| wolfSSL | 15:117db924cf7c | 1636 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 1637 | [K512] "m" (K512) |
| wolfSSL | 15:117db924cf7c | 1638 | : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 1639 | ); |
| wolfSSL | 15:117db924cf7c | 1640 | |
| wolfSSL | 15:117db924cf7c | 1641 | return 0; |
| wolfSSL | 15:117db924cf7c | 1642 | } |
| wolfSSL | 15:117db924cf7c | 1643 | |
| wolfSSL | 15:117db924cf7c | 1644 | static int Transform_Sha512_AVX1_RORX_Len(wc_Sha512* sha512, word32 len) |
| wolfSSL | 15:117db924cf7c | 1645 | { |
| wolfSSL | 15:117db924cf7c | 1646 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 1647 | |
| wolfSSL | 15:117db924cf7c | 1648 | "movq 224(%[sha512]), %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1649 | "leaq %[K512], %%rcx\n\t" |
| wolfSSL | 15:117db924cf7c | 1650 | |
| wolfSSL | 15:117db924cf7c | 1651 | INIT_MASK(MASK) |
| wolfSSL | 15:117db924cf7c | 1652 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1653 | |
| wolfSSL | 15:117db924cf7c | 1654 | "# Start of processing a block\n" |
| wolfSSL | 15:117db924cf7c | 1655 | "2:\n\t" |
| wolfSSL | 15:117db924cf7c | 1656 | |
| wolfSSL | 15:117db924cf7c | 1657 | /* 16 Ws plus loop counter and K512. len goes into -4(%rsp). |
| wolfSSL | 15:117db924cf7c | 1658 | * Debug needs more stack space. */ |
| wolfSSL | 15:117db924cf7c | 1659 | "subq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1660 | |
| wolfSSL | 15:117db924cf7c | 1661 | LOAD_W(MASK, rsi) |
| wolfSSL | 15:117db924cf7c | 1662 | |
| wolfSSL | 15:117db924cf7c | 1663 | "movl $4, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1664 | /* L4 = b */ |
| wolfSSL | 15:117db924cf7c | 1665 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1666 | /* L3 = 0 (add to prev h) */ |
| wolfSSL | 15:117db924cf7c | 1667 | "xorq " L3 ", " L3 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1668 | /* L4 = b ^ c */ |
| wolfSSL | 15:117db924cf7c | 1669 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 1670 | |
| wolfSSL | 15:117db924cf7c | 1671 | SET_W_X(rcx) |
| wolfSSL | 15:117db924cf7c | 1672 | |
| wolfSSL | 15:117db924cf7c | 1673 | "# Start of 16 rounds\n" |
| wolfSSL | 15:117db924cf7c | 1674 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 1675 | |
| wolfSSL | 15:117db924cf7c | 1676 | "addq $128, %%rcx\n\t" |
| wolfSSL | 15:117db924cf7c | 1677 | "movq %%rcx, 17*8(%%rsp)\n\t" |
| wolfSSL | 15:117db924cf7c | 1678 | |
| wolfSSL | 15:117db924cf7c | 1679 | MsgSched_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1680 | MsgSched_RORX(W_2,W_4,W_6,W_8,W_10,W_12,W_14,W_0,RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1681 | MsgSched_RORX(W_4,W_6,W_8,W_10,W_12,W_14,W_0,W_2,RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1682 | MsgSched_RORX(W_6,W_8,W_10,W_12,W_14,W_0,W_2,W_4,RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1683 | MsgSched_RORX(W_8,W_10,W_12,W_14,W_0,W_2,W_4,W_6,RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1684 | MsgSched_RORX(W_10,W_12,W_14,W_0,W_2,W_4,W_6,W_8,RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1685 | MsgSched_RORX(W_12,W_14,W_0,W_2,W_4,W_6,W_8,W_10,RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1686 | MsgSched_RORX(W_14,W_0,W_2,W_4,W_6,W_8,W_10,W_12,RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1687 | |
| wolfSSL | 15:117db924cf7c | 1688 | "movq 17*8(%%rsp), %%rcx\n\t" |
| wolfSSL | 15:117db924cf7c | 1689 | |
| wolfSSL | 15:117db924cf7c | 1690 | SET_W_X(rcx) |
| wolfSSL | 15:117db924cf7c | 1691 | |
| wolfSSL | 15:117db924cf7c | 1692 | "subl $1, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1693 | "jne 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 1694 | |
| wolfSSL | 15:117db924cf7c | 1695 | SET_W_X(rcx) |
| wolfSSL | 15:117db924cf7c | 1696 | |
| wolfSSL | 15:117db924cf7c | 1697 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 1698 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 1699 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 1700 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 1701 | |
| wolfSSL | 15:117db924cf7c | 1702 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 1703 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 1704 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 1705 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 1706 | |
| wolfSSL | 15:117db924cf7c | 1707 | /* Prev RND: h += Maj(a,b,c) */ |
| wolfSSL | 15:117db924cf7c | 1708 | "addq " L3 ", %%r8\n\t" |
| wolfSSL | 15:117db924cf7c | 1709 | "addq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 1710 | |
| wolfSSL | 15:117db924cf7c | 1711 | ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1712 | |
| wolfSSL | 15:117db924cf7c | 1713 | "leaq %[K512], %%rcx\n\t" |
| wolfSSL | 15:117db924cf7c | 1714 | "addq $128, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 1715 | "subl $128, %[len]\n\t" |
| wolfSSL | 15:117db924cf7c | 1716 | |
| wolfSSL | 15:117db924cf7c | 1717 | STORE_DIGEST() |
| wolfSSL | 15:117db924cf7c | 1718 | |
| wolfSSL | 15:117db924cf7c | 1719 | "jnz 2b\n\t" |
| wolfSSL | 15:117db924cf7c | 1720 | |
| wolfSSL | 15:117db924cf7c | 1721 | : |
| wolfSSL | 15:117db924cf7c | 1722 | : [mask] "m" (mBYTE_FLIP_MASK), |
| wolfSSL | 15:117db924cf7c | 1723 | [len] "m" (len), |
| wolfSSL | 15:117db924cf7c | 1724 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 1725 | [K512] "m" (K512) |
| wolfSSL | 15:117db924cf7c | 1726 | : WORK_REGS, STATE_REGS, XMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 1727 | ); |
| wolfSSL | 15:117db924cf7c | 1728 | |
| wolfSSL | 15:117db924cf7c | 1729 | return 0; |
| wolfSSL | 15:117db924cf7c | 1730 | } |
| wolfSSL | 15:117db924cf7c | 1731 | #endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_RORX */ |
| wolfSSL | 15:117db924cf7c | 1732 | |
| wolfSSL | 15:117db924cf7c | 1733 | #if defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 1734 | static const unsigned long mBYTE_FLIP_MASK_Y[] = |
| wolfSSL | 15:117db924cf7c | 1735 | { 0x0001020304050607, 0x08090a0b0c0d0e0f, |
| wolfSSL | 15:117db924cf7c | 1736 | 0x0001020304050607, 0x08090a0b0c0d0e0f }; |
| wolfSSL | 15:117db924cf7c | 1737 | |
| wolfSSL | 15:117db924cf7c | 1738 | #define W_Y_0 ymm0 |
| wolfSSL | 15:117db924cf7c | 1739 | #define W_Y_4 ymm1 |
| wolfSSL | 15:117db924cf7c | 1740 | #define W_Y_8 ymm2 |
| wolfSSL | 15:117db924cf7c | 1741 | #define W_Y_12 ymm3 |
| wolfSSL | 15:117db924cf7c | 1742 | |
| wolfSSL | 15:117db924cf7c | 1743 | #define X0 xmm0 |
| wolfSSL | 15:117db924cf7c | 1744 | #define X1 xmm1 |
| wolfSSL | 15:117db924cf7c | 1745 | #define X2 xmm2 |
| wolfSSL | 15:117db924cf7c | 1746 | #define X3 xmm3 |
| wolfSSL | 15:117db924cf7c | 1747 | #define X4 xmm4 |
| wolfSSL | 15:117db924cf7c | 1748 | #define X5 xmm5 |
| wolfSSL | 15:117db924cf7c | 1749 | #define X6 xmm6 |
| wolfSSL | 15:117db924cf7c | 1750 | #define X7 xmm7 |
| wolfSSL | 15:117db924cf7c | 1751 | #define X8 xmm8 |
| wolfSSL | 15:117db924cf7c | 1752 | #define X9 xmm9 |
| wolfSSL | 15:117db924cf7c | 1753 | #define Y0 ymm0 |
| wolfSSL | 15:117db924cf7c | 1754 | #define Y1 ymm1 |
| wolfSSL | 15:117db924cf7c | 1755 | #define Y2 ymm2 |
| wolfSSL | 15:117db924cf7c | 1756 | #define Y3 ymm3 |
| wolfSSL | 15:117db924cf7c | 1757 | #define Y4 ymm4 |
| wolfSSL | 15:117db924cf7c | 1758 | #define Y5 ymm5 |
| wolfSSL | 15:117db924cf7c | 1759 | #define Y6 ymm6 |
| wolfSSL | 15:117db924cf7c | 1760 | #define Y7 ymm7 |
| wolfSSL | 15:117db924cf7c | 1761 | |
| wolfSSL | 15:117db924cf7c | 1762 | #define W_Y_M15 ymm12 |
| wolfSSL | 15:117db924cf7c | 1763 | #define W_Y_M7 ymm13 |
| wolfSSL | 15:117db924cf7c | 1764 | #define W_Y_M2 ymm14 |
| wolfSSL | 15:117db924cf7c | 1765 | #define MASK_Y ymm15 |
| wolfSSL | 15:117db924cf7c | 1766 | |
| wolfSSL | 15:117db924cf7c | 1767 | #define YTMP1 ymm8 |
| wolfSSL | 15:117db924cf7c | 1768 | #define YTMP2 ymm9 |
| wolfSSL | 15:117db924cf7c | 1769 | #define YTMP3 ymm10 |
| wolfSSL | 15:117db924cf7c | 1770 | #define YTMP4 ymm11 |
| wolfSSL | 15:117db924cf7c | 1771 | |
| wolfSSL | 15:117db924cf7c | 1772 | #define YMM_REGS \ |
| wolfSSL | 15:117db924cf7c | 1773 | "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", \ |
| wolfSSL | 15:117db924cf7c | 1774 | "xmm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" |
| wolfSSL | 15:117db924cf7c | 1775 | |
| wolfSSL | 15:117db924cf7c | 1776 | #define _VPERM2I128(dest, src1, src2, sel) \ |
| wolfSSL | 15:117db924cf7c | 1777 | "vperm2I128 $" #sel ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 1778 | #define VPERM2I128(dest, src1, src2, sel) \ |
| wolfSSL | 15:117db924cf7c | 1779 | _VPERM2I128(dest, src1, src2, sel) |
| wolfSSL | 15:117db924cf7c | 1780 | |
| wolfSSL | 15:117db924cf7c | 1781 | #define _VPERMQ(dest, src, sel) \ |
| wolfSSL | 15:117db924cf7c | 1782 | "vpermq $" #sel ", %%" #src ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 1783 | #define VPERMQ(dest, src, sel) \ |
| wolfSSL | 15:117db924cf7c | 1784 | _VPERMQ(dest, src, sel) |
| wolfSSL | 15:117db924cf7c | 1785 | |
| wolfSSL | 15:117db924cf7c | 1786 | #define _VPBLENDD(dest, src1, src2, sel) \ |
| wolfSSL | 15:117db924cf7c | 1787 | "vpblendd $" #sel ", %%" #src2 ", %%" #src1 ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 1788 | #define VPBLENDD(dest, src1, src2, sel) \ |
| wolfSSL | 15:117db924cf7c | 1789 | _VPBLENDD(dest, src1, src2, sel) |
| wolfSSL | 15:117db924cf7c | 1790 | |
| wolfSSL | 15:117db924cf7c | 1791 | #define _V_ADD_I(dest, src1, addr, i) \ |
| wolfSSL | 15:117db924cf7c | 1792 | "vpaddq "#i"*8(%%" #addr "), %%" #src1 ", %%" #dest "\n\t" |
| wolfSSL | 15:117db924cf7c | 1793 | #define V_ADD_I(dest, src1, addr, i) \ |
| wolfSSL | 15:117db924cf7c | 1794 | _V_ADD_I(dest, src1, addr, i) |
| wolfSSL | 15:117db924cf7c | 1795 | |
| wolfSSL | 15:117db924cf7c | 1796 | #define _VMOVDQU_I(addr, i, src) \ |
| wolfSSL | 15:117db924cf7c | 1797 | "vmovdqu %%" #src ", " #i "*8(%%" #addr ")\n\t" |
| wolfSSL | 15:117db924cf7c | 1798 | #define VMOVDQU_I(addr, i, src) \ |
| wolfSSL | 15:117db924cf7c | 1799 | _VMOVDQU_I(addr, i, src) |
| wolfSSL | 15:117db924cf7c | 1800 | |
| wolfSSL | 15:117db924cf7c | 1801 | #define MsgSched4_AVX2(W_Y_0,W_Y_4,W_Y_8,W_Y_12,a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1802 | RND_0_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1803 | /* W[-13]..W[-15], W[-12] */ \ |
| wolfSSL | 15:117db924cf7c | 1804 | VPBLENDD(W_Y_M15, W_Y_0, W_Y_4, 0x03) \ |
| wolfSSL | 15:117db924cf7c | 1805 | /* W[-5]..W[-7], W[-4] */ \ |
| wolfSSL | 15:117db924cf7c | 1806 | VPBLENDD(W_Y_M7, W_Y_8, W_Y_12, 0x03) \ |
| wolfSSL | 15:117db924cf7c | 1807 | RND_0_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1808 | RND_0_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1809 | /* W_Y_M15 = W[-12]..W[-15] */ \ |
| wolfSSL | 15:117db924cf7c | 1810 | VPERMQ(W_Y_M15, W_Y_M15, 0x39) \ |
| wolfSSL | 15:117db924cf7c | 1811 | RND_0_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1812 | /* W_Y_M7 = W[-4]..W[-7] */ \ |
| wolfSSL | 15:117db924cf7c | 1813 | VPERMQ(W_Y_M7, W_Y_M7, 0x39) \ |
| wolfSSL | 15:117db924cf7c | 1814 | RND_0_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1815 | RND_0_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1816 | /* W[-15] >> 1 */ \ |
| wolfSSL | 15:117db924cf7c | 1817 | V_SHIFT_R(YTMP1, W_Y_M15, 1) \ |
| wolfSSL | 15:117db924cf7c | 1818 | RND_0_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1819 | /* W[-15] << 63 */ \ |
| wolfSSL | 15:117db924cf7c | 1820 | V_SHIFT_L(YTMP2, W_Y_M15, 63) \ |
| wolfSSL | 15:117db924cf7c | 1821 | RND_0_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1822 | /* W[-15] >> 8 */ \ |
| wolfSSL | 15:117db924cf7c | 1823 | V_SHIFT_R(YTMP3, W_Y_M15, 8) \ |
| wolfSSL | 15:117db924cf7c | 1824 | RND_0_9(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1825 | /* W[-15] << 56 */ \ |
| wolfSSL | 15:117db924cf7c | 1826 | V_SHIFT_L(YTMP4, W_Y_M15, 56) \ |
| wolfSSL | 15:117db924cf7c | 1827 | RND_0_10(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1828 | /* W[-15] >>> 1 */ \ |
| wolfSSL | 15:117db924cf7c | 1829 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1830 | RND_0_11(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1831 | /* W[-15] >>> 8 */ \ |
| wolfSSL | 15:117db924cf7c | 1832 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1833 | RND_0_12(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1834 | RND_1_1(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1835 | /* W[-15] >> 7 */ \ |
| wolfSSL | 15:117db924cf7c | 1836 | V_SHIFT_R(YTMP4, W_Y_M15, 7) \ |
| wolfSSL | 15:117db924cf7c | 1837 | RND_1_2_A(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1838 | /* (W[-15] >>> 1) ^ (W[-15] >>> 8) */ \ |
| wolfSSL | 15:117db924cf7c | 1839 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1840 | RND_1_2_B(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1841 | /* (W[-15] >>> 1) ^ (W[-15] >>> 8) ^ (W[-15] >> 7) */ \ |
| wolfSSL | 15:117db924cf7c | 1842 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1843 | RND_1_3(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1844 | /* W[0] = W[-16] + W[-7] */ \ |
| wolfSSL | 15:117db924cf7c | 1845 | V_ADD(W_Y_0, W_Y_0, W_Y_M7) \ |
| wolfSSL | 15:117db924cf7c | 1846 | RND_1_4(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1847 | /* W[0] = W[-16] + W[-7] + s0(W[-15]) */ \ |
| wolfSSL | 15:117db924cf7c | 1848 | V_ADD(W_Y_0, W_Y_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1849 | RND_1_5(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1850 | /* 0, 0, W[-1], W[-2] */ \ |
| wolfSSL | 15:117db924cf7c | 1851 | VPERM2I128(W_Y_M2, W_Y_12, W_Y_12, 0x81) \ |
| wolfSSL | 15:117db924cf7c | 1852 | RND_1_6(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1853 | RND_1_7(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1854 | RND_1_8(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1855 | /* W[-2] >> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 1856 | V_SHIFT_R(YTMP1, W_Y_M2, 19) \ |
| wolfSSL | 15:117db924cf7c | 1857 | RND_1_9(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1858 | /* W[-2] << 45 */ \ |
| wolfSSL | 15:117db924cf7c | 1859 | V_SHIFT_L(YTMP2, W_Y_M2, 45) \ |
| wolfSSL | 15:117db924cf7c | 1860 | RND_1_10(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1861 | /* W[-2] >> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 1862 | V_SHIFT_R(YTMP3, W_Y_M2, 61) \ |
| wolfSSL | 15:117db924cf7c | 1863 | RND_1_11(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1864 | /* W[-2] << 3 */ \ |
| wolfSSL | 15:117db924cf7c | 1865 | V_SHIFT_L(YTMP4, W_Y_M2, 3) \ |
| wolfSSL | 15:117db924cf7c | 1866 | RND_1_12(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1867 | RND_0_1(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1868 | /* W[-2] >>> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 1869 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1870 | RND_0_2(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1871 | /* W[-2] >>> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 1872 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1873 | RND_0_3(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1874 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */ \ |
| wolfSSL | 15:117db924cf7c | 1875 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1876 | RND_0_4(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1877 | /* W[-2] >> 6 */ \ |
| wolfSSL | 15:117db924cf7c | 1878 | V_SHIFT_R(YTMP4, W_Y_M2, 6) \ |
| wolfSSL | 15:117db924cf7c | 1879 | RND_0_5(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1880 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */ \ |
| wolfSSL | 15:117db924cf7c | 1881 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1882 | RND_0_6(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1883 | /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */ \ |
| wolfSSL | 15:117db924cf7c | 1884 | V_ADD(W_Y_0, W_Y_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1885 | RND_0_7(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1886 | RND_0_8(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1887 | /* W[1], W[0], 0, 0 */ \ |
| wolfSSL | 15:117db924cf7c | 1888 | VPERM2I128(W_Y_M2, W_Y_0, W_Y_0, 0x08) \ |
| wolfSSL | 15:117db924cf7c | 1889 | RND_0_9(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1890 | RND_0_10(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1891 | /* W[-2] >> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 1892 | V_SHIFT_R(YTMP1, W_Y_M2, 19) \ |
| wolfSSL | 15:117db924cf7c | 1893 | RND_0_11(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1894 | /* W[-2] << 45 */ \ |
| wolfSSL | 15:117db924cf7c | 1895 | V_SHIFT_L(YTMP2, W_Y_M2, 45) \ |
| wolfSSL | 15:117db924cf7c | 1896 | RND_0_12(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 1897 | RND_1_1(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1898 | /* W[-2] >> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 1899 | V_SHIFT_R(YTMP3, W_Y_M2, 61) \ |
| wolfSSL | 15:117db924cf7c | 1900 | RND_1_2(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1901 | /* W[-2] << 3 */ \ |
| wolfSSL | 15:117db924cf7c | 1902 | V_SHIFT_L(YTMP4, W_Y_M2, 3) \ |
| wolfSSL | 15:117db924cf7c | 1903 | RND_1_3(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1904 | /* W[-2] >>> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 1905 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1906 | RND_1_4(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1907 | /* W[-2] >>> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 1908 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1909 | RND_1_5(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1910 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */ \ |
| wolfSSL | 15:117db924cf7c | 1911 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1912 | RND_1_6(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1913 | /* W[-2] >> 6 */ \ |
| wolfSSL | 15:117db924cf7c | 1914 | V_SHIFT_R(YTMP4, W_Y_M2, 6) \ |
| wolfSSL | 15:117db924cf7c | 1915 | RND_1_7(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1916 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */ \ |
| wolfSSL | 15:117db924cf7c | 1917 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1918 | RND_1_8(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1919 | /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */ \ |
| wolfSSL | 15:117db924cf7c | 1920 | V_ADD(W_Y_0, W_Y_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1921 | RND_1_9(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1922 | RND_1_10(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1923 | RND_1_11(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1924 | RND_1_12(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 1925 | |
| wolfSSL | 15:117db924cf7c | 1926 | #define MsgSched2_AVX2(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1927 | RND_0_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1928 | VPALIGNR(W_Y_M15, W_2, W_0, 8) \ |
| wolfSSL | 15:117db924cf7c | 1929 | VPALIGNR(W_Y_M7, W_10, W_8, 8) \ |
| wolfSSL | 15:117db924cf7c | 1930 | RND_0_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1931 | V_SHIFT_R(YTMP1, W_Y_M15, 1) \ |
| wolfSSL | 15:117db924cf7c | 1932 | V_SHIFT_L(YTMP2, W_Y_M15, 63) \ |
| wolfSSL | 15:117db924cf7c | 1933 | RND_0_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1934 | RND_0_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1935 | V_SHIFT_R(YTMP3, W_Y_M15, 8) \ |
| wolfSSL | 15:117db924cf7c | 1936 | V_SHIFT_L(YTMP4, W_Y_M15, 56) \ |
| wolfSSL | 15:117db924cf7c | 1937 | RND_0_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1938 | RND_0_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1939 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1940 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1941 | RND_0_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1942 | RND_0_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1943 | V_SHIFT_R(YTMP4, W_Y_M15, 7) \ |
| wolfSSL | 15:117db924cf7c | 1944 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1945 | RND_0_9(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1946 | RND_0_10(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1947 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1948 | V_ADD(W_0, W_0, W_Y_M7) \ |
| wolfSSL | 15:117db924cf7c | 1949 | RND_0_11(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1950 | RND_0_12(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1951 | RND_1_1(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1952 | V_ADD(W_0, W_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1953 | RND_1_2(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1954 | V_SHIFT_R(YTMP1, W_14, 19) \ |
| wolfSSL | 15:117db924cf7c | 1955 | V_SHIFT_L(YTMP2, W_14, 45) \ |
| wolfSSL | 15:117db924cf7c | 1956 | RND_1_3(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1957 | RND_1_4(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1958 | V_SHIFT_R(YTMP3, W_14, 61) \ |
| wolfSSL | 15:117db924cf7c | 1959 | V_SHIFT_L(YTMP4, W_14, 3) \ |
| wolfSSL | 15:117db924cf7c | 1960 | RND_1_5(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1961 | RND_1_6(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1962 | RND_1_7(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1963 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1964 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 1965 | RND_1_8(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1966 | RND_1_9(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1967 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1968 | V_SHIFT_R(YTMP4, W_14, 6) \ |
| wolfSSL | 15:117db924cf7c | 1969 | RND_1_10(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1970 | RND_1_11(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1971 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1972 | RND_1_12(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 1973 | V_ADD(W_0, W_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1974 | |
| wolfSSL | 15:117db924cf7c | 1975 | #define MsgSched4_AVX2_RORX_SET(W_Y_0,W_Y_4,W_Y_8,W_Y_12,a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1976 | RND_RORX_0_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1977 | /* W[-13]..W[-15], W[-12] */ \ |
| wolfSSL | 15:117db924cf7c | 1978 | VPBLENDD(W_Y_M15, W_Y_0, W_Y_4, 0x03) \ |
| wolfSSL | 15:117db924cf7c | 1979 | /* W[-5]..W[-7], W[-4] */ \ |
| wolfSSL | 15:117db924cf7c | 1980 | VPBLENDD(W_Y_M7, W_Y_8, W_Y_12, 0x03) \ |
| wolfSSL | 15:117db924cf7c | 1981 | RND_RORX_0_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1982 | /* W_Y_M15 = W[-12]..W[-15] */ \ |
| wolfSSL | 15:117db924cf7c | 1983 | VPERMQ(W_Y_M15, W_Y_M15, 0x39) \ |
| wolfSSL | 15:117db924cf7c | 1984 | RND_RORX_0_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1985 | /* W_Y_M7 = W[-4]..W[-7] */ \ |
| wolfSSL | 15:117db924cf7c | 1986 | VPERMQ(W_Y_M7, W_Y_M7, 0x39) \ |
| wolfSSL | 15:117db924cf7c | 1987 | RND_RORX_0_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1988 | /* W[-15] >> 1 */ \ |
| wolfSSL | 15:117db924cf7c | 1989 | V_SHIFT_R(YTMP1, W_Y_M15, 1) \ |
| wolfSSL | 15:117db924cf7c | 1990 | /* W[-15] << 63 */ \ |
| wolfSSL | 15:117db924cf7c | 1991 | V_SHIFT_L(YTMP2, W_Y_M15, 63) \ |
| wolfSSL | 15:117db924cf7c | 1992 | RND_RORX_0_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 1993 | /* W[-15] >> 8 */ \ |
| wolfSSL | 15:117db924cf7c | 1994 | V_SHIFT_R(YTMP3, W_Y_M15, 8) \ |
| wolfSSL | 15:117db924cf7c | 1995 | /* W[-15] << 56 */ \ |
| wolfSSL | 15:117db924cf7c | 1996 | V_SHIFT_L(YTMP4, W_Y_M15, 56) \ |
| wolfSSL | 15:117db924cf7c | 1997 | /* W[-15] >>> 1 */ \ |
| wolfSSL | 15:117db924cf7c | 1998 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 1999 | /* W[-15] >>> 8 */ \ |
| wolfSSL | 15:117db924cf7c | 2000 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 2001 | RND_RORX_0_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2002 | /* W[-15] >> 7 */ \ |
| wolfSSL | 15:117db924cf7c | 2003 | V_SHIFT_R(YTMP4, W_Y_M15, 7) \ |
| wolfSSL | 15:117db924cf7c | 2004 | RND_RORX_0_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2005 | /* 0, 0, W[-1], W[-2] */ \ |
| wolfSSL | 15:117db924cf7c | 2006 | VPERM2I128(W_Y_M2, W_Y_12, W_Y_12, 0x81) \ |
| wolfSSL | 15:117db924cf7c | 2007 | RND_RORX_0_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2008 | RND_RORX_1_1(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2009 | /* (W[-15] >>> 1) ^ (W[-15] >>> 8) */ \ |
| wolfSSL | 15:117db924cf7c | 2010 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2011 | RND_RORX_1_2(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2012 | /* (W[-15] >>> 1) ^ (W[-15] >>> 8) ^ (W[-15] >> 7) */ \ |
| wolfSSL | 15:117db924cf7c | 2013 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2014 | RND_RORX_1_3(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2015 | /* W[0] = W[-16] + W[-7] */ \ |
| wolfSSL | 15:117db924cf7c | 2016 | V_ADD(W_Y_0, W_Y_0, W_Y_M7) \ |
| wolfSSL | 15:117db924cf7c | 2017 | /* W[0] = W[-16] + W[-7] + s0(W[-15]) */ \ |
| wolfSSL | 15:117db924cf7c | 2018 | V_ADD(W_Y_0, W_Y_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2019 | RND_RORX_1_4(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2020 | /* W[-2] >> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 2021 | V_SHIFT_R(YTMP1, W_Y_M2, 19) \ |
| wolfSSL | 15:117db924cf7c | 2022 | /* W[-2] << 45 */ \ |
| wolfSSL | 15:117db924cf7c | 2023 | V_SHIFT_L(YTMP2, W_Y_M2, 45) \ |
| wolfSSL | 15:117db924cf7c | 2024 | RND_RORX_1_5(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2025 | /* W[-2] >> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 2026 | V_SHIFT_R(YTMP3, W_Y_M2, 61) \ |
| wolfSSL | 15:117db924cf7c | 2027 | /* W[-2] << 3 */ \ |
| wolfSSL | 15:117db924cf7c | 2028 | V_SHIFT_L(YTMP4, W_Y_M2, 3) \ |
| wolfSSL | 15:117db924cf7c | 2029 | /* W[-2] >>> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 2030 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2031 | RND_RORX_1_6(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2032 | /* W[-2] >>> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 2033 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 2034 | RND_RORX_1_7(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2035 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */ \ |
| wolfSSL | 15:117db924cf7c | 2036 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2037 | RND_RORX_1_8(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2038 | /* W[-2] >> 6 */ \ |
| wolfSSL | 15:117db924cf7c | 2039 | V_SHIFT_R(YTMP4, W_Y_M2, 6) \ |
| wolfSSL | 15:117db924cf7c | 2040 | RND_RORX_0_1(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2041 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */ \ |
| wolfSSL | 15:117db924cf7c | 2042 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2043 | RND_RORX_0_2(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2044 | /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */ \ |
| wolfSSL | 15:117db924cf7c | 2045 | V_ADD(W_Y_0, W_Y_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2046 | RND_RORX_0_3(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2047 | /* W[1], W[0], 0, 0 */ \ |
| wolfSSL | 15:117db924cf7c | 2048 | VPERM2I128(W_Y_M2, W_Y_0, W_Y_0, 0x08) \ |
| wolfSSL | 15:117db924cf7c | 2049 | RND_RORX_0_4(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2050 | RND_RORX_0_5(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2051 | /* W[-2] >> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 2052 | V_SHIFT_R(YTMP1, W_Y_M2, 19) \ |
| wolfSSL | 15:117db924cf7c | 2053 | /* W[-2] << 45 */ \ |
| wolfSSL | 15:117db924cf7c | 2054 | V_SHIFT_L(YTMP2, W_Y_M2, 45) \ |
| wolfSSL | 15:117db924cf7c | 2055 | RND_RORX_0_6(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2056 | /* W[-2] >> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 2057 | V_SHIFT_R(YTMP3, W_Y_M2, 61) \ |
| wolfSSL | 15:117db924cf7c | 2058 | /* W[-2] << 3 */ \ |
| wolfSSL | 15:117db924cf7c | 2059 | V_SHIFT_L(YTMP4, W_Y_M2, 3) \ |
| wolfSSL | 15:117db924cf7c | 2060 | /* W[-2] >>> 19 */ \ |
| wolfSSL | 15:117db924cf7c | 2061 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2062 | RND_RORX_0_7(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2063 | /* W[-2] >>> 61 */ \ |
| wolfSSL | 15:117db924cf7c | 2064 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 2065 | RND_RORX_0_8(g,h,a,b,c,d,e,f,i+2) \ |
| wolfSSL | 15:117db924cf7c | 2066 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) */ \ |
| wolfSSL | 15:117db924cf7c | 2067 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2068 | RND_RORX_1_1(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2069 | /* W[-2] >> 6 */ \ |
| wolfSSL | 15:117db924cf7c | 2070 | V_SHIFT_R(YTMP4, W_Y_M2, 6) \ |
| wolfSSL | 15:117db924cf7c | 2071 | RND_RORX_1_2(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2072 | RND_RORX_1_3(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2073 | /* (W[-2] >>> 19) ^ (W[-2] >>> 61) ^ (W[-2] >> 6) */ \ |
| wolfSSL | 15:117db924cf7c | 2074 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2075 | RND_RORX_1_4(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2076 | RND_RORX_1_5(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2077 | /* W[0] = W[-16] + W[-7] + s0(W[-15]) + s1(W[-2]) */ \ |
| wolfSSL | 15:117db924cf7c | 2078 | V_ADD(W_Y_0, W_Y_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2079 | RND_RORX_1_6(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2080 | V_ADD_I(YTMP1, W_Y_0, rsi, i) \ |
| wolfSSL | 15:117db924cf7c | 2081 | RND_RORX_1_7(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2082 | RND_RORX_1_8(f,g,h,a,b,c,d,e,i+3) \ |
| wolfSSL | 15:117db924cf7c | 2083 | VMOVDQU_I(rsp, i, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2084 | |
| wolfSSL | 15:117db924cf7c | 2085 | #define MsgSched2_AVX2_RORX(W_0,W_2,W_4,W_6,W_8,W_10,W_12,W_14,a,b,c,d,e, \ |
| wolfSSL | 15:117db924cf7c | 2086 | f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2087 | RND_RORX_0_1(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2088 | VPALIGNR(W_Y_M15, W_2, W_0, 8) \ |
| wolfSSL | 15:117db924cf7c | 2089 | VPALIGNR(W_Y_M7, W_10, W_8, 8) \ |
| wolfSSL | 15:117db924cf7c | 2090 | RND_RORX_0_2(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2091 | V_SHIFT_R(YTMP1, W_Y_M15, 1) \ |
| wolfSSL | 15:117db924cf7c | 2092 | V_SHIFT_L(YTMP2, W_Y_M15, 63) \ |
| wolfSSL | 15:117db924cf7c | 2093 | RND_RORX_0_3(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2094 | V_SHIFT_R(YTMP3, W_Y_M15, 8) \ |
| wolfSSL | 15:117db924cf7c | 2095 | V_SHIFT_L(YTMP4, W_Y_M15, 56) \ |
| wolfSSL | 15:117db924cf7c | 2096 | RND_RORX_0_4(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2097 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2098 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 2099 | RND_RORX_0_5(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2100 | V_SHIFT_R(YTMP4, W_Y_M15, 7) \ |
| wolfSSL | 15:117db924cf7c | 2101 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2102 | RND_RORX_0_6(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2103 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2104 | V_ADD(W_0, W_0, W_Y_M7) \ |
| wolfSSL | 15:117db924cf7c | 2105 | RND_RORX_0_7(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2106 | RND_RORX_0_8(a,b,c,d,e,f,g,h,i) \ |
| wolfSSL | 15:117db924cf7c | 2107 | V_ADD(W_0, W_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2108 | RND_RORX_1_1(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2109 | V_SHIFT_R(YTMP1, W_14, 19) \ |
| wolfSSL | 15:117db924cf7c | 2110 | V_SHIFT_L(YTMP2, W_14, 45) \ |
| wolfSSL | 15:117db924cf7c | 2111 | RND_RORX_1_2(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2112 | V_SHIFT_R(YTMP3, W_14, 61) \ |
| wolfSSL | 15:117db924cf7c | 2113 | V_SHIFT_L(YTMP4, W_14, 3) \ |
| wolfSSL | 15:117db924cf7c | 2114 | RND_RORX_1_3(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2115 | V_OR(YTMP1, YTMP2, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2116 | V_OR(YTMP3, YTMP4, YTMP3) \ |
| wolfSSL | 15:117db924cf7c | 2117 | RND_RORX_1_4(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2118 | RND_RORX_1_5(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2119 | V_XOR(YTMP1, YTMP3, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2120 | V_SHIFT_R(YTMP4, W_14, 6) \ |
| wolfSSL | 15:117db924cf7c | 2121 | RND_RORX_1_6(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2122 | RND_RORX_1_7(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2123 | V_XOR(YTMP1, YTMP4, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2124 | RND_RORX_1_8(h,a,b,c,d,e,f,g,i+1) \ |
| wolfSSL | 15:117db924cf7c | 2125 | V_ADD(W_0, W_0, YTMP1) \ |
| wolfSSL | 15:117db924cf7c | 2126 | |
| wolfSSL | 15:117db924cf7c | 2127 | |
| wolfSSL | 15:117db924cf7c | 2128 | #define _INIT_MASK_Y(mask) \ |
| wolfSSL | 15:117db924cf7c | 2129 | "vmovdqu %[mask], %%"#mask"\n\t" |
| wolfSSL | 15:117db924cf7c | 2130 | #define INIT_MASK_Y(mask) \ |
| wolfSSL | 15:117db924cf7c | 2131 | _INIT_MASK_Y(mask) |
| wolfSSL | 15:117db924cf7c | 2132 | |
| wolfSSL | 15:117db924cf7c | 2133 | /* Load into YMM registers and swap endian. */ |
| wolfSSL | 15:117db924cf7c | 2134 | #define _LOAD_BLOCK_W_Y_2(mask, ymm0, ymm1, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 2135 | /* buffer[0..15] => ymm0..ymm3; */ \ |
| wolfSSL | 15:117db924cf7c | 2136 | "vmovdqu " #i "+ 0(%%" #reg "), %%" #ymm0 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2137 | "vmovdqu " #i "+32(%%" #reg "), %%" #ymm1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2138 | "vpshufb %%" #mask ", %%" #ymm0 ", %%" #ymm0 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2139 | "vpshufb %%" #mask ", %%" #ymm1 ", %%" #ymm1 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2140 | |
| wolfSSL | 15:117db924cf7c | 2141 | #define LOAD_BLOCK_W_Y_2(mask, ymm1, ymm2, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 2142 | _LOAD_BLOCK_W_Y_2(mask, ymm1, ymm2, reg, i) |
| wolfSSL | 15:117db924cf7c | 2143 | |
| wolfSSL | 15:117db924cf7c | 2144 | #define LOAD_BLOCK_W_Y(mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 2145 | LOAD_BLOCK_W_Y_2(mask, W_Y_0, W_Y_4 , reg, 0) \ |
| wolfSSL | 15:117db924cf7c | 2146 | LOAD_BLOCK_W_Y_2(mask, W_Y_8, W_Y_12, reg, 64) |
| wolfSSL | 15:117db924cf7c | 2147 | |
| wolfSSL | 15:117db924cf7c | 2148 | #define _SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 2149 | "vpaddq " #i "+ 0(%%" #reg "), %%" #ymm0 ", %%" #ymm2 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2150 | "vpaddq " #i "+32(%%" #reg "), %%" #ymm1 ", %%" #ymm3 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2151 | "vmovdqu %%" #ymm2 ", " #i "+ 0(" WX ")\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2152 | "vmovdqu %%" #ymm3 ", " #i "+32(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 2153 | |
| wolfSSL | 15:117db924cf7c | 2154 | #define SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 2155 | _SET_W_Y_2(ymm0, ymm1, ymm2, ymm3, reg, i) |
| wolfSSL | 15:117db924cf7c | 2156 | |
| wolfSSL | 15:117db924cf7c | 2157 | #define SET_BLOCK_W_Y(reg) \ |
| wolfSSL | 15:117db924cf7c | 2158 | SET_W_Y_2(W_Y_0, W_Y_4 , YTMP1, YTMP2, reg, 0) \ |
| wolfSSL | 15:117db924cf7c | 2159 | SET_W_Y_2(W_Y_8, W_Y_12, YTMP1, YTMP2, reg, 64) |
| wolfSSL | 15:117db924cf7c | 2160 | |
| wolfSSL | 15:117db924cf7c | 2161 | /* Load into YMM registers and swap endian. */ |
| wolfSSL | 15:117db924cf7c | 2162 | #define _LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 2163 | "vmovdqu " #i "+ 0(%%" #reg "), %%" #X0 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2164 | "vmovdqu " #i "+ 16(%%" #reg "), %%" #X1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2165 | "vmovdqu " #i "+128(%%" #reg "), %%" #X8 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2166 | "vmovdqu " #i "+144(%%" #reg "), %%" #X9 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2167 | "vinserti128 $1, %%" #X8 ", %%" #Y0 ", %%" #Y0 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2168 | "vinserti128 $1, %%" #X9 ", %%" #Y1 ", %%" #Y1 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2169 | "vpshufb %%" #mask ", %%" #Y0 ", %%" #Y0 "\n\t" \ |
| wolfSSL | 15:117db924cf7c | 2170 | "vpshufb %%" #mask ", %%" #Y1 ", %%" #Y1 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2171 | |
| wolfSSL | 15:117db924cf7c | 2172 | #define LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i) \ |
| wolfSSL | 15:117db924cf7c | 2173 | _LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, i) |
| wolfSSL | 15:117db924cf7c | 2174 | |
| wolfSSL | 15:117db924cf7c | 2175 | #define LOAD_BLOCK2_W_Y(mask, reg) \ |
| wolfSSL | 15:117db924cf7c | 2176 | LOAD_BLOCK2_W_Y_2(mask, Y0, Y1, X0, X1, X8, X9, reg, 0) \ |
| wolfSSL | 15:117db924cf7c | 2177 | LOAD_BLOCK2_W_Y_2(mask, Y2, Y3, X2, X3, X8, X9, reg, 32) \ |
| wolfSSL | 15:117db924cf7c | 2178 | LOAD_BLOCK2_W_Y_2(mask, Y4, Y5, X4, X5, X8, X9, reg, 64) \ |
| wolfSSL | 15:117db924cf7c | 2179 | LOAD_BLOCK2_W_Y_2(mask, Y6, Y7, X6, X7, X8, X9, reg, 96) \ |
| wolfSSL | 15:117db924cf7c | 2180 | |
| wolfSSL | 15:117db924cf7c | 2181 | #define SET_BLOCK2_W_Y(reg) \ |
| wolfSSL | 15:117db924cf7c | 2182 | SET_W_Y_2(Y0, Y1, YTMP1, YTMP2, reg, 0) \ |
| wolfSSL | 15:117db924cf7c | 2183 | SET_W_Y_2(Y2, Y3, YTMP1, YTMP2, reg, 64) \ |
| wolfSSL | 15:117db924cf7c | 2184 | SET_W_Y_2(Y4, Y5, YTMP1, YTMP2, reg, 128) \ |
| wolfSSL | 15:117db924cf7c | 2185 | SET_W_Y_2(Y6, Y7, YTMP1, YTMP2, reg, 192) |
| wolfSSL | 15:117db924cf7c | 2186 | |
| wolfSSL | 15:117db924cf7c | 2187 | static const word64 K512_AVX2[160] = { |
| wolfSSL | 15:117db924cf7c | 2188 | W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), |
| wolfSSL | 15:117db924cf7c | 2189 | W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), |
| wolfSSL | 15:117db924cf7c | 2190 | W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), |
| wolfSSL | 15:117db924cf7c | 2191 | W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), |
| wolfSSL | 15:117db924cf7c | 2192 | W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), |
| wolfSSL | 15:117db924cf7c | 2193 | W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), |
| wolfSSL | 15:117db924cf7c | 2194 | W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), |
| wolfSSL | 15:117db924cf7c | 2195 | W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), |
| wolfSSL | 15:117db924cf7c | 2196 | W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), |
| wolfSSL | 15:117db924cf7c | 2197 | W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), |
| wolfSSL | 15:117db924cf7c | 2198 | W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), |
| wolfSSL | 15:117db924cf7c | 2199 | W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), |
| wolfSSL | 15:117db924cf7c | 2200 | W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), |
| wolfSSL | 15:117db924cf7c | 2201 | W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), |
| wolfSSL | 15:117db924cf7c | 2202 | W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), |
| wolfSSL | 15:117db924cf7c | 2203 | W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), |
| wolfSSL | 15:117db924cf7c | 2204 | W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), |
| wolfSSL | 15:117db924cf7c | 2205 | W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), |
| wolfSSL | 15:117db924cf7c | 2206 | W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), |
| wolfSSL | 15:117db924cf7c | 2207 | W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), |
| wolfSSL | 15:117db924cf7c | 2208 | W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), |
| wolfSSL | 15:117db924cf7c | 2209 | W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), |
| wolfSSL | 15:117db924cf7c | 2210 | W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), |
| wolfSSL | 15:117db924cf7c | 2211 | W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), |
| wolfSSL | 15:117db924cf7c | 2212 | W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), |
| wolfSSL | 15:117db924cf7c | 2213 | W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), |
| wolfSSL | 15:117db924cf7c | 2214 | W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), |
| wolfSSL | 15:117db924cf7c | 2215 | W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), |
| wolfSSL | 15:117db924cf7c | 2216 | W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), |
| wolfSSL | 15:117db924cf7c | 2217 | W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), |
| wolfSSL | 15:117db924cf7c | 2218 | W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), |
| wolfSSL | 15:117db924cf7c | 2219 | W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), |
| wolfSSL | 15:117db924cf7c | 2220 | W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), |
| wolfSSL | 15:117db924cf7c | 2221 | W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), |
| wolfSSL | 15:117db924cf7c | 2222 | W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), |
| wolfSSL | 15:117db924cf7c | 2223 | W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), |
| wolfSSL | 15:117db924cf7c | 2224 | W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), |
| wolfSSL | 15:117db924cf7c | 2225 | W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), |
| wolfSSL | 15:117db924cf7c | 2226 | W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), |
| wolfSSL | 15:117db924cf7c | 2227 | W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), |
| wolfSSL | 15:117db924cf7c | 2228 | W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), |
| wolfSSL | 15:117db924cf7c | 2229 | W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), |
| wolfSSL | 15:117db924cf7c | 2230 | W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), |
| wolfSSL | 15:117db924cf7c | 2231 | W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), |
| wolfSSL | 15:117db924cf7c | 2232 | W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), |
| wolfSSL | 15:117db924cf7c | 2233 | W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), |
| wolfSSL | 15:117db924cf7c | 2234 | W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), |
| wolfSSL | 15:117db924cf7c | 2235 | W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), |
| wolfSSL | 15:117db924cf7c | 2236 | W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), |
| wolfSSL | 15:117db924cf7c | 2237 | W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), |
| wolfSSL | 15:117db924cf7c | 2238 | W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), |
| wolfSSL | 15:117db924cf7c | 2239 | W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), |
| wolfSSL | 15:117db924cf7c | 2240 | W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), |
| wolfSSL | 15:117db924cf7c | 2241 | W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), |
| wolfSSL | 15:117db924cf7c | 2242 | W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), |
| wolfSSL | 15:117db924cf7c | 2243 | W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), |
| wolfSSL | 15:117db924cf7c | 2244 | W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), |
| wolfSSL | 15:117db924cf7c | 2245 | W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), |
| wolfSSL | 15:117db924cf7c | 2246 | W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), |
| wolfSSL | 15:117db924cf7c | 2247 | W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), |
| wolfSSL | 15:117db924cf7c | 2248 | W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), |
| wolfSSL | 15:117db924cf7c | 2249 | W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), |
| wolfSSL | 15:117db924cf7c | 2250 | W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), |
| wolfSSL | 15:117db924cf7c | 2251 | W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), |
| wolfSSL | 15:117db924cf7c | 2252 | W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), |
| wolfSSL | 15:117db924cf7c | 2253 | W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), |
| wolfSSL | 15:117db924cf7c | 2254 | W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), |
| wolfSSL | 15:117db924cf7c | 2255 | W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), |
| wolfSSL | 15:117db924cf7c | 2256 | W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), |
| wolfSSL | 15:117db924cf7c | 2257 | W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), |
| wolfSSL | 15:117db924cf7c | 2258 | W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), |
| wolfSSL | 15:117db924cf7c | 2259 | W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), |
| wolfSSL | 15:117db924cf7c | 2260 | W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), |
| wolfSSL | 15:117db924cf7c | 2261 | W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), |
| wolfSSL | 15:117db924cf7c | 2262 | W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), |
| wolfSSL | 15:117db924cf7c | 2263 | W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), |
| wolfSSL | 15:117db924cf7c | 2264 | W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), |
| wolfSSL | 15:117db924cf7c | 2265 | W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), |
| wolfSSL | 15:117db924cf7c | 2266 | W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817), |
| wolfSSL | 15:117db924cf7c | 2267 | W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) |
| wolfSSL | 15:117db924cf7c | 2268 | }; |
| wolfSSL | 15:117db924cf7c | 2269 | static const word64* K512_AVX2_END = &K512_AVX2[128]; |
| wolfSSL | 15:117db924cf7c | 2270 | |
| wolfSSL | 15:117db924cf7c | 2271 | static int Transform_Sha512_AVX2(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 2272 | { |
| wolfSSL | 15:117db924cf7c | 2273 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 2274 | |
| wolfSSL | 15:117db924cf7c | 2275 | /* 16 Ws plus loop counter and K512. */ |
| wolfSSL | 15:117db924cf7c | 2276 | "subq $136, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2277 | "leaq 64(%[sha512]), %%rax\n\t" |
| wolfSSL | 15:117db924cf7c | 2278 | |
| wolfSSL | 15:117db924cf7c | 2279 | INIT_MASK(MASK_Y) |
| wolfSSL | 15:117db924cf7c | 2280 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2281 | |
| wolfSSL | 15:117db924cf7c | 2282 | LOAD_BLOCK_W_Y(MASK_Y, rax) |
| wolfSSL | 15:117db924cf7c | 2283 | |
| wolfSSL | 15:117db924cf7c | 2284 | "movl $4, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 2285 | "leaq %[K512], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2286 | /* b */ |
| wolfSSL | 15:117db924cf7c | 2287 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2288 | /* e */ |
| wolfSSL | 15:117db924cf7c | 2289 | "movq %%r12, " L1 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2290 | /* b ^ c */ |
| wolfSSL | 15:117db924cf7c | 2291 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2292 | |
| wolfSSL | 15:117db924cf7c | 2293 | SET_BLOCK_W_Y(rsi) |
| wolfSSL | 15:117db924cf7c | 2294 | |
| wolfSSL | 15:117db924cf7c | 2295 | "# Start of 16 rounds\n" |
| wolfSSL | 15:117db924cf7c | 2296 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 2297 | |
| wolfSSL | 15:117db924cf7c | 2298 | "addq $128, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2299 | |
| wolfSSL | 15:117db924cf7c | 2300 | MsgSched4_AVX2(W_Y_0,W_Y_4,W_Y_8,W_Y_12,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2301 | MsgSched4_AVX2(W_Y_4,W_Y_8,W_Y_12,W_Y_0,RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 2302 | MsgSched4_AVX2(W_Y_8,W_Y_12,W_Y_0,W_Y_4,RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 2303 | MsgSched4_AVX2(W_Y_12,W_Y_0,W_Y_4,W_Y_8,RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 2304 | |
| wolfSSL | 15:117db924cf7c | 2305 | SET_BLOCK_W_Y(rsi) |
| wolfSSL | 15:117db924cf7c | 2306 | |
| wolfSSL | 15:117db924cf7c | 2307 | "subl $1, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 2308 | "jne 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 2309 | |
| wolfSSL | 15:117db924cf7c | 2310 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2311 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 2) |
| wolfSSL | 15:117db924cf7c | 2312 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 2313 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB, 6) |
| wolfSSL | 15:117db924cf7c | 2314 | |
| wolfSSL | 15:117db924cf7c | 2315 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 2316 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,10) |
| wolfSSL | 15:117db924cf7c | 2317 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 2318 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 2319 | |
| wolfSSL | 15:117db924cf7c | 2320 | STORE_ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2321 | |
| wolfSSL | 15:117db924cf7c | 2322 | "addq $136, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2323 | |
| wolfSSL | 15:117db924cf7c | 2324 | : |
| wolfSSL | 15:117db924cf7c | 2325 | : [mask] "m" (mBYTE_FLIP_MASK_Y), |
| wolfSSL | 15:117db924cf7c | 2326 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 2327 | [K512] "m" (K512) |
| wolfSSL | 15:117db924cf7c | 2328 | : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 2329 | ); |
| wolfSSL | 15:117db924cf7c | 2330 | |
| wolfSSL | 15:117db924cf7c | 2331 | return 0; |
| wolfSSL | 15:117db924cf7c | 2332 | } |
| wolfSSL | 15:117db924cf7c | 2333 | |
| wolfSSL | 15:117db924cf7c | 2334 | static int Transform_Sha512_AVX2_Len(wc_Sha512* sha512, word32 len) |
| wolfSSL | 15:117db924cf7c | 2335 | { |
| wolfSSL | 15:117db924cf7c | 2336 | if ((len & WC_SHA512_BLOCK_SIZE) != 0) { |
| wolfSSL | 15:117db924cf7c | 2337 | XMEMCPY(sha512->buffer, sha512->data, WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 2338 | Transform_Sha512_AVX2(sha512); |
| wolfSSL | 15:117db924cf7c | 2339 | sha512->data += WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 2340 | len -= WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 2341 | if (len == 0) |
| wolfSSL | 15:117db924cf7c | 2342 | return 0; |
| wolfSSL | 15:117db924cf7c | 2343 | } |
| wolfSSL | 15:117db924cf7c | 2344 | |
| wolfSSL | 15:117db924cf7c | 2345 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 2346 | |
| wolfSSL | 15:117db924cf7c | 2347 | "movq 224(%[sha512]), %%rcx\n\t" |
| wolfSSL | 15:117db924cf7c | 2348 | |
| wolfSSL | 15:117db924cf7c | 2349 | INIT_MASK(MASK_Y) |
| wolfSSL | 15:117db924cf7c | 2350 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2351 | |
| wolfSSL | 15:117db924cf7c | 2352 | "# Start of processing two blocks\n" |
| wolfSSL | 15:117db924cf7c | 2353 | "2:\n\t" |
| wolfSSL | 15:117db924cf7c | 2354 | |
| wolfSSL | 15:117db924cf7c | 2355 | "subq $1344, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2356 | "leaq %[K512], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2357 | |
| wolfSSL | 15:117db924cf7c | 2358 | /* L4 = b */ |
| wolfSSL | 15:117db924cf7c | 2359 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2360 | /* e */ |
| wolfSSL | 15:117db924cf7c | 2361 | "movq %%r12, " L1 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2362 | |
| wolfSSL | 15:117db924cf7c | 2363 | LOAD_BLOCK2_W_Y(MASK_Y, rcx) |
| wolfSSL | 15:117db924cf7c | 2364 | |
| wolfSSL | 15:117db924cf7c | 2365 | /* L4 = b ^ c */ |
| wolfSSL | 15:117db924cf7c | 2366 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2367 | "\n" |
| wolfSSL | 15:117db924cf7c | 2368 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 2369 | SET_BLOCK2_W_Y(rsi) |
| wolfSSL | 15:117db924cf7c | 2370 | MsgSched2_AVX2(Y0,Y1,Y2,Y3,Y4,Y5,Y6,Y7,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2371 | MsgSched2_AVX2(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y0,RG,RH,RA,RB,RC,RD,RE,RF, 4) |
| wolfSSL | 15:117db924cf7c | 2372 | MsgSched2_AVX2(Y2,Y3,Y4,Y5,Y6,Y7,Y0,Y1,RE,RF,RG,RH,RA,RB,RC,RD, 8) |
| wolfSSL | 15:117db924cf7c | 2373 | MsgSched2_AVX2(Y3,Y4,Y5,Y6,Y7,Y0,Y1,Y2,RC,RD,RE,RF,RG,RH,RA,RB,12) |
| wolfSSL | 15:117db924cf7c | 2374 | MsgSched2_AVX2(Y4,Y5,Y6,Y7,Y0,Y1,Y2,Y3,RA,RB,RC,RD,RE,RF,RG,RH,16) |
| wolfSSL | 15:117db924cf7c | 2375 | MsgSched2_AVX2(Y5,Y6,Y7,Y0,Y1,Y2,Y3,Y4,RG,RH,RA,RB,RC,RD,RE,RF,20) |
| wolfSSL | 15:117db924cf7c | 2376 | MsgSched2_AVX2(Y6,Y7,Y0,Y1,Y2,Y3,Y4,Y5,RE,RF,RG,RH,RA,RB,RC,RD,24) |
| wolfSSL | 15:117db924cf7c | 2377 | MsgSched2_AVX2(Y7,Y0,Y1,Y2,Y3,Y4,Y5,Y6,RC,RD,RE,RF,RG,RH,RA,RB,28) |
| wolfSSL | 15:117db924cf7c | 2378 | "addq $256, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2379 | "addq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2380 | "cmpq %[K512_END], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2381 | "jne 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 2382 | |
| wolfSSL | 15:117db924cf7c | 2383 | SET_BLOCK2_W_Y(rsi) |
| wolfSSL | 15:117db924cf7c | 2384 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2385 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 4) |
| wolfSSL | 15:117db924cf7c | 2386 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 8) |
| wolfSSL | 15:117db924cf7c | 2387 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,12) |
| wolfSSL | 15:117db924cf7c | 2388 | |
| wolfSSL | 15:117db924cf7c | 2389 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,16) |
| wolfSSL | 15:117db924cf7c | 2390 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,20) |
| wolfSSL | 15:117db924cf7c | 2391 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,24) |
| wolfSSL | 15:117db924cf7c | 2392 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,28) |
| wolfSSL | 15:117db924cf7c | 2393 | "subq $1024, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2394 | |
| wolfSSL | 15:117db924cf7c | 2395 | ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2396 | STORE_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2397 | |
| wolfSSL | 15:117db924cf7c | 2398 | /* L4 = b */ |
| wolfSSL | 15:117db924cf7c | 2399 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2400 | /* e */ |
| wolfSSL | 15:117db924cf7c | 2401 | "movq %%r12, " L1 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2402 | /* L4 = b ^ c */ |
| wolfSSL | 15:117db924cf7c | 2403 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2404 | |
| wolfSSL | 15:117db924cf7c | 2405 | "movq $5, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2406 | "\n" |
| wolfSSL | 15:117db924cf7c | 2407 | "3:\n\t" |
| wolfSSL | 15:117db924cf7c | 2408 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 2) |
| wolfSSL | 15:117db924cf7c | 2409 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 6) |
| wolfSSL | 15:117db924cf7c | 2410 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,10) |
| wolfSSL | 15:117db924cf7c | 2411 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 2412 | |
| wolfSSL | 15:117db924cf7c | 2413 | RND_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,18) |
| wolfSSL | 15:117db924cf7c | 2414 | RND_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,22) |
| wolfSSL | 15:117db924cf7c | 2415 | RND_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,26) |
| wolfSSL | 15:117db924cf7c | 2416 | RND_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,30) |
| wolfSSL | 15:117db924cf7c | 2417 | "addq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2418 | "subq $1, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2419 | "jnz 3b\n\t" |
| wolfSSL | 15:117db924cf7c | 2420 | |
| wolfSSL | 15:117db924cf7c | 2421 | ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2422 | |
| wolfSSL | 15:117db924cf7c | 2423 | "movq 224(%[sha512]), %%rcx\n\t" |
| wolfSSL | 15:117db924cf7c | 2424 | "addq $64, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2425 | "addq $256, %%rcx\n\t" |
| wolfSSL | 15:117db924cf7c | 2426 | "subl $256, %[len]\n\t" |
| wolfSSL | 15:117db924cf7c | 2427 | "movq %%rcx, 224(%[sha512])\n\t" |
| wolfSSL | 15:117db924cf7c | 2428 | |
| wolfSSL | 15:117db924cf7c | 2429 | STORE_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2430 | |
| wolfSSL | 15:117db924cf7c | 2431 | "jnz 2b\n\t" |
| wolfSSL | 15:117db924cf7c | 2432 | |
| wolfSSL | 15:117db924cf7c | 2433 | : |
| wolfSSL | 15:117db924cf7c | 2434 | : [mask] "m" (mBYTE_FLIP_MASK_Y), |
| wolfSSL | 15:117db924cf7c | 2435 | [len] "m" (len), |
| wolfSSL | 15:117db924cf7c | 2436 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 2437 | [K512] "m" (K512_AVX2), |
| wolfSSL | 15:117db924cf7c | 2438 | [K512_END] "m" (K512_AVX2_END) |
| wolfSSL | 15:117db924cf7c | 2439 | : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 2440 | ); |
| wolfSSL | 15:117db924cf7c | 2441 | |
| wolfSSL | 15:117db924cf7c | 2442 | return 0; |
| wolfSSL | 15:117db924cf7c | 2443 | } |
| wolfSSL | 15:117db924cf7c | 2444 | |
| wolfSSL | 15:117db924cf7c | 2445 | #ifdef HAVE_INTEL_RORX |
| wolfSSL | 15:117db924cf7c | 2446 | static int Transform_Sha512_AVX2_RORX(wc_Sha512* sha512) |
| wolfSSL | 15:117db924cf7c | 2447 | { |
| wolfSSL | 15:117db924cf7c | 2448 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 2449 | |
| wolfSSL | 15:117db924cf7c | 2450 | /* 16 Ws plus loop counter. */ |
| wolfSSL | 15:117db924cf7c | 2451 | "subq $136, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2452 | "leaq 64(%[sha512]), " L2 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2453 | |
| wolfSSL | 15:117db924cf7c | 2454 | INIT_MASK(MASK_Y) |
| wolfSSL | 15:117db924cf7c | 2455 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2456 | |
| wolfSSL | 15:117db924cf7c | 2457 | LOAD_BLOCK_W_Y(MASK_Y, rcx) |
| wolfSSL | 15:117db924cf7c | 2458 | |
| wolfSSL | 15:117db924cf7c | 2459 | "movl $4, 16*8(" WX ")\n\t" |
| wolfSSL | 15:117db924cf7c | 2460 | "leaq %[K512], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2461 | /* b */ |
| wolfSSL | 15:117db924cf7c | 2462 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2463 | /* L3 = 0 (add to prev h) */ |
| wolfSSL | 15:117db924cf7c | 2464 | "xorq " L3 ", " L3 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2465 | /* b ^ c */ |
| wolfSSL | 15:117db924cf7c | 2466 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2467 | |
| wolfSSL | 15:117db924cf7c | 2468 | SET_BLOCK_W_Y(rsi) |
| wolfSSL | 15:117db924cf7c | 2469 | |
| wolfSSL | 15:117db924cf7c | 2470 | "# Start of 16 rounds\n" |
| wolfSSL | 15:117db924cf7c | 2471 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 2472 | |
| wolfSSL | 15:117db924cf7c | 2473 | "addq $128, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2474 | |
| wolfSSL | 15:117db924cf7c | 2475 | MsgSched4_AVX2_RORX_SET(W_Y_0,W_Y_4,W_Y_8,W_Y_12,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2476 | MsgSched4_AVX2_RORX_SET(W_Y_4,W_Y_8,W_Y_12,W_Y_0,RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 2477 | MsgSched4_AVX2_RORX_SET(W_Y_8,W_Y_12,W_Y_0,W_Y_4,RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 2478 | MsgSched4_AVX2_RORX_SET(W_Y_12,W_Y_0,W_Y_4,W_Y_8,RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 2479 | |
| wolfSSL | 15:117db924cf7c | 2480 | "subl $1, 16*8(%%rsp)\n\t" |
| wolfSSL | 15:117db924cf7c | 2481 | "jnz 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 2482 | |
| wolfSSL | 15:117db924cf7c | 2483 | RND_RORX_ALL_4(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2484 | RND_RORX_ALL_4(RE,RF,RG,RH,RA,RB,RC,RD, 4) |
| wolfSSL | 15:117db924cf7c | 2485 | RND_RORX_ALL_4(RA,RB,RC,RD,RE,RF,RG,RH, 8) |
| wolfSSL | 15:117db924cf7c | 2486 | RND_RORX_ALL_4(RE,RF,RG,RH,RA,RB,RC,RD,12) |
| wolfSSL | 15:117db924cf7c | 2487 | /* Prev RND: h += Maj(a,b,c) */ |
| wolfSSL | 15:117db924cf7c | 2488 | "addq " L3 ", %%r8\n\t" |
| wolfSSL | 15:117db924cf7c | 2489 | "addq $136, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2490 | |
| wolfSSL | 15:117db924cf7c | 2491 | STORE_ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2492 | |
| wolfSSL | 15:117db924cf7c | 2493 | : |
| wolfSSL | 15:117db924cf7c | 2494 | : [mask] "m" (mBYTE_FLIP_MASK_Y), |
| wolfSSL | 15:117db924cf7c | 2495 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 2496 | [K512] "m" (K512) |
| wolfSSL | 15:117db924cf7c | 2497 | : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 2498 | ); |
| wolfSSL | 15:117db924cf7c | 2499 | |
| wolfSSL | 15:117db924cf7c | 2500 | return 0; |
| wolfSSL | 15:117db924cf7c | 2501 | } |
| wolfSSL | 15:117db924cf7c | 2502 | |
| wolfSSL | 15:117db924cf7c | 2503 | static int Transform_Sha512_AVX2_RORX_Len(wc_Sha512* sha512, word32 len) |
| wolfSSL | 15:117db924cf7c | 2504 | { |
| wolfSSL | 15:117db924cf7c | 2505 | if ((len & WC_SHA512_BLOCK_SIZE) != 0) { |
| wolfSSL | 15:117db924cf7c | 2506 | XMEMCPY(sha512->buffer, sha512->data, WC_SHA512_BLOCK_SIZE); |
| wolfSSL | 15:117db924cf7c | 2507 | Transform_Sha512_AVX2_RORX(sha512); |
| wolfSSL | 15:117db924cf7c | 2508 | sha512->data += WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 2509 | len -= WC_SHA512_BLOCK_SIZE; |
| wolfSSL | 15:117db924cf7c | 2510 | if (len == 0) |
| wolfSSL | 15:117db924cf7c | 2511 | return 0; |
| wolfSSL | 15:117db924cf7c | 2512 | } |
| wolfSSL | 15:117db924cf7c | 2513 | |
| wolfSSL | 15:117db924cf7c | 2514 | __asm__ __volatile__ ( |
| wolfSSL | 15:117db924cf7c | 2515 | |
| wolfSSL | 15:117db924cf7c | 2516 | "movq 224(%[sha512]), %%rax\n\t" |
| wolfSSL | 15:117db924cf7c | 2517 | |
| wolfSSL | 15:117db924cf7c | 2518 | INIT_MASK(MASK_Y) |
| wolfSSL | 15:117db924cf7c | 2519 | LOAD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2520 | |
| wolfSSL | 15:117db924cf7c | 2521 | "# Start of processing two blocks\n" |
| wolfSSL | 15:117db924cf7c | 2522 | "2:\n\t" |
| wolfSSL | 15:117db924cf7c | 2523 | |
| wolfSSL | 15:117db924cf7c | 2524 | "subq $1344, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2525 | "leaq %[K512], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2526 | |
| wolfSSL | 15:117db924cf7c | 2527 | /* L4 = b */ |
| wolfSSL | 15:117db924cf7c | 2528 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2529 | /* L3 = 0 (add to prev h) */ |
| wolfSSL | 15:117db924cf7c | 2530 | "xorq " L3 ", " L3 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2531 | |
| wolfSSL | 15:117db924cf7c | 2532 | LOAD_BLOCK2_W_Y(MASK_Y, rax) |
| wolfSSL | 15:117db924cf7c | 2533 | |
| wolfSSL | 15:117db924cf7c | 2534 | /* L4 = b ^ c */ |
| wolfSSL | 15:117db924cf7c | 2535 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2536 | "\n" |
| wolfSSL | 15:117db924cf7c | 2537 | "1:\n\t" |
| wolfSSL | 15:117db924cf7c | 2538 | SET_BLOCK2_W_Y(rsi) |
| wolfSSL | 15:117db924cf7c | 2539 | MsgSched2_AVX2_RORX(Y0,Y1,Y2,Y3,Y4,Y5,Y6,Y7,RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2540 | MsgSched2_AVX2_RORX(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y0,RG,RH,RA,RB,RC,RD,RE,RF, 4) |
| wolfSSL | 15:117db924cf7c | 2541 | MsgSched2_AVX2_RORX(Y2,Y3,Y4,Y5,Y6,Y7,Y0,Y1,RE,RF,RG,RH,RA,RB,RC,RD, 8) |
| wolfSSL | 15:117db924cf7c | 2542 | MsgSched2_AVX2_RORX(Y3,Y4,Y5,Y6,Y7,Y0,Y1,Y2,RC,RD,RE,RF,RG,RH,RA,RB,12) |
| wolfSSL | 15:117db924cf7c | 2543 | MsgSched2_AVX2_RORX(Y4,Y5,Y6,Y7,Y0,Y1,Y2,Y3,RA,RB,RC,RD,RE,RF,RG,RH,16) |
| wolfSSL | 15:117db924cf7c | 2544 | MsgSched2_AVX2_RORX(Y5,Y6,Y7,Y0,Y1,Y2,Y3,Y4,RG,RH,RA,RB,RC,RD,RE,RF,20) |
| wolfSSL | 15:117db924cf7c | 2545 | MsgSched2_AVX2_RORX(Y6,Y7,Y0,Y1,Y2,Y3,Y4,Y5,RE,RF,RG,RH,RA,RB,RC,RD,24) |
| wolfSSL | 15:117db924cf7c | 2546 | MsgSched2_AVX2_RORX(Y7,Y0,Y1,Y2,Y3,Y4,Y5,Y6,RC,RD,RE,RF,RG,RH,RA,RB,28) |
| wolfSSL | 15:117db924cf7c | 2547 | "addq $256, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2548 | "addq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2549 | "cmpq %[K512_END], %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2550 | "jne 1b\n\t" |
| wolfSSL | 15:117db924cf7c | 2551 | |
| wolfSSL | 15:117db924cf7c | 2552 | SET_BLOCK2_W_Y(rsi) |
| wolfSSL | 15:117db924cf7c | 2553 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 0) |
| wolfSSL | 15:117db924cf7c | 2554 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 4) |
| wolfSSL | 15:117db924cf7c | 2555 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD, 8) |
| wolfSSL | 15:117db924cf7c | 2556 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,12) |
| wolfSSL | 15:117db924cf7c | 2557 | |
| wolfSSL | 15:117db924cf7c | 2558 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,16) |
| wolfSSL | 15:117db924cf7c | 2559 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,20) |
| wolfSSL | 15:117db924cf7c | 2560 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,24) |
| wolfSSL | 15:117db924cf7c | 2561 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,28) |
| wolfSSL | 15:117db924cf7c | 2562 | "addq " L3 ", %%r8\n\t" |
| wolfSSL | 15:117db924cf7c | 2563 | "subq $1024, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2564 | |
| wolfSSL | 15:117db924cf7c | 2565 | ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2566 | STORE_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2567 | |
| wolfSSL | 15:117db924cf7c | 2568 | /* L4 = b */ |
| wolfSSL | 15:117db924cf7c | 2569 | "movq %%r9, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2570 | /* L3 = 0 (add to prev h) */ |
| wolfSSL | 15:117db924cf7c | 2571 | "xorq " L3 ", " L3 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2572 | /* L4 = b ^ c */ |
| wolfSSL | 15:117db924cf7c | 2573 | "xorq %%r10, " L4 "\n\t" |
| wolfSSL | 15:117db924cf7c | 2574 | |
| wolfSSL | 15:117db924cf7c | 2575 | "movq $5, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2576 | "\n" |
| wolfSSL | 15:117db924cf7c | 2577 | "3:\n\t" |
| wolfSSL | 15:117db924cf7c | 2578 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH, 2) |
| wolfSSL | 15:117db924cf7c | 2579 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF, 6) |
| wolfSSL | 15:117db924cf7c | 2580 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,10) |
| wolfSSL | 15:117db924cf7c | 2581 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,14) |
| wolfSSL | 15:117db924cf7c | 2582 | |
| wolfSSL | 15:117db924cf7c | 2583 | RND_RORX_ALL_2(RA,RB,RC,RD,RE,RF,RG,RH,18) |
| wolfSSL | 15:117db924cf7c | 2584 | RND_RORX_ALL_2(RG,RH,RA,RB,RC,RD,RE,RF,22) |
| wolfSSL | 15:117db924cf7c | 2585 | RND_RORX_ALL_2(RE,RF,RG,RH,RA,RB,RC,RD,26) |
| wolfSSL | 15:117db924cf7c | 2586 | RND_RORX_ALL_2(RC,RD,RE,RF,RG,RH,RA,RB,30) |
| wolfSSL | 15:117db924cf7c | 2587 | "addq $256, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2588 | "subq $1, %%rsi\n\t" |
| wolfSSL | 15:117db924cf7c | 2589 | "jnz 3b\n\t" |
| wolfSSL | 15:117db924cf7c | 2590 | |
| wolfSSL | 15:117db924cf7c | 2591 | "addq " L3 ", %%r8\n\t" |
| wolfSSL | 15:117db924cf7c | 2592 | |
| wolfSSL | 15:117db924cf7c | 2593 | ADD_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2594 | |
| wolfSSL | 15:117db924cf7c | 2595 | "movq 224(%[sha512]), %%rax\n\t" |
| wolfSSL | 15:117db924cf7c | 2596 | "addq $64, %%rsp\n\t" |
| wolfSSL | 15:117db924cf7c | 2597 | "addq $256, %%rax\n\t" |
| wolfSSL | 15:117db924cf7c | 2598 | "subl $256, %[len]\n\t" |
| wolfSSL | 15:117db924cf7c | 2599 | "movq %%rax, 224(%[sha512])\n\t" |
| wolfSSL | 15:117db924cf7c | 2600 | |
| wolfSSL | 15:117db924cf7c | 2601 | STORE_DIGEST() |
| wolfSSL | 15:117db924cf7c | 2602 | |
| wolfSSL | 15:117db924cf7c | 2603 | "jnz 2b\n\t" |
| wolfSSL | 15:117db924cf7c | 2604 | |
| wolfSSL | 15:117db924cf7c | 2605 | : |
| wolfSSL | 15:117db924cf7c | 2606 | : [mask] "m" (mBYTE_FLIP_MASK_Y), |
| wolfSSL | 15:117db924cf7c | 2607 | [len] "m" (len), |
| wolfSSL | 15:117db924cf7c | 2608 | [sha512] "r" (sha512), |
| wolfSSL | 15:117db924cf7c | 2609 | [K512] "m" (K512_AVX2), |
| wolfSSL | 15:117db924cf7c | 2610 | [K512_END] "m" (K512_AVX2_END) |
| wolfSSL | 15:117db924cf7c | 2611 | : WORK_REGS, STATE_REGS, YMM_REGS, "memory", "rsi" |
| wolfSSL | 15:117db924cf7c | 2612 | ); |
| wolfSSL | 15:117db924cf7c | 2613 | |
| wolfSSL | 15:117db924cf7c | 2614 | return 0; |
| wolfSSL | 15:117db924cf7c | 2615 | } |
| wolfSSL | 15:117db924cf7c | 2616 | #endif /* HAVE_INTEL_RORX */ |
| wolfSSL | 15:117db924cf7c | 2617 | #endif /* HAVE_INTEL_AVX2 */ |
| wolfSSL | 15:117db924cf7c | 2618 | |
| wolfSSL | 15:117db924cf7c | 2619 | #endif /* WOLFSSL_SHA512 */ |
| wolfSSL | 15:117db924cf7c | 2620 | |
| wolfSSL | 15:117db924cf7c | 2621 | |
| wolfSSL | 15:117db924cf7c | 2622 | /* -------------------------------------------------------------------------- */ |
| wolfSSL | 15:117db924cf7c | 2623 | /* SHA384 */ |
| wolfSSL | 15:117db924cf7c | 2624 | /* -------------------------------------------------------------------------- */ |
| wolfSSL | 15:117db924cf7c | 2625 | #ifdef WOLFSSL_SHA384 |
| wolfSSL | 15:117db924cf7c | 2626 | |
| wolfSSL | 15:117db924cf7c | 2627 | #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) |
| wolfSSL | 15:117db924cf7c | 2628 | /* functions defined in wolfcrypt/src/port/caam/caam_sha.c */ |
| wolfSSL | 15:117db924cf7c | 2629 | #else |
| wolfSSL | 15:117db924cf7c | 2630 | |
| wolfSSL | 15:117db924cf7c | 2631 | static int InitSha384(wc_Sha384* sha384) |
| wolfSSL | 15:117db924cf7c | 2632 | { |
| wolfSSL | 15:117db924cf7c | 2633 | if (sha384 == NULL) { |
| wolfSSL | 15:117db924cf7c | 2634 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2635 | } |
| wolfSSL | 15:117db924cf7c | 2636 | |
| wolfSSL | 15:117db924cf7c | 2637 | sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8); |
| wolfSSL | 15:117db924cf7c | 2638 | sha384->digest[1] = W64LIT(0x629a292a367cd507); |
| wolfSSL | 15:117db924cf7c | 2639 | sha384->digest[2] = W64LIT(0x9159015a3070dd17); |
| wolfSSL | 15:117db924cf7c | 2640 | sha384->digest[3] = W64LIT(0x152fecd8f70e5939); |
| wolfSSL | 15:117db924cf7c | 2641 | sha384->digest[4] = W64LIT(0x67332667ffc00b31); |
| wolfSSL | 15:117db924cf7c | 2642 | sha384->digest[5] = W64LIT(0x8eb44a8768581511); |
| wolfSSL | 15:117db924cf7c | 2643 | sha384->digest[6] = W64LIT(0xdb0c2e0d64f98fa7); |
| wolfSSL | 15:117db924cf7c | 2644 | sha384->digest[7] = W64LIT(0x47b5481dbefa4fa4); |
| wolfSSL | 15:117db924cf7c | 2645 | |
| wolfSSL | 15:117db924cf7c | 2646 | sha384->buffLen = 0; |
| wolfSSL | 15:117db924cf7c | 2647 | sha384->loLen = 0; |
| wolfSSL | 15:117db924cf7c | 2648 | sha384->hiLen = 0; |
| wolfSSL | 15:117db924cf7c | 2649 | |
| wolfSSL | 15:117db924cf7c | 2650 | return 0; |
| wolfSSL | 15:117db924cf7c | 2651 | } |
| wolfSSL | 15:117db924cf7c | 2652 | |
| wolfSSL | 15:117db924cf7c | 2653 | int wc_Sha384Update(wc_Sha384* sha384, const byte* data, word32 len) |
| wolfSSL | 15:117db924cf7c | 2654 | { |
| wolfSSL | 15:117db924cf7c | 2655 | if (sha384 == NULL || (data == NULL && len > 0)) { |
| wolfSSL | 15:117db924cf7c | 2656 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2657 | } |
| wolfSSL | 15:117db924cf7c | 2658 | |
| wolfSSL | 15:117db924cf7c | 2659 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) |
| wolfSSL | 15:117db924cf7c | 2660 | if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) { |
| wolfSSL | 15:117db924cf7c | 2661 | #if defined(HAVE_INTEL_QA) |
| wolfSSL | 15:117db924cf7c | 2662 | return IntelQaSymSha384(&sha384->asyncDev, NULL, data, len); |
| wolfSSL | 15:117db924cf7c | 2663 | #endif |
| wolfSSL | 15:117db924cf7c | 2664 | } |
| wolfSSL | 15:117db924cf7c | 2665 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 2666 | |
| wolfSSL | 15:117db924cf7c | 2667 | return Sha512Update((wc_Sha512*)sha384, data, len); |
| wolfSSL | 15:117db924cf7c | 2668 | } |
| wolfSSL | 15:117db924cf7c | 2669 | |
| wolfSSL | 15:117db924cf7c | 2670 | |
| wolfSSL | 15:117db924cf7c | 2671 | int wc_Sha384FinalRaw(wc_Sha384* sha384, byte* hash) |
| wolfSSL | 15:117db924cf7c | 2672 | { |
| wolfSSL | 15:117db924cf7c | 2673 | #ifdef LITTLE_ENDIAN_ORDER |
| wolfSSL | 15:117db924cf7c | 2674 | word64 digest[WC_SHA384_DIGEST_SIZE / sizeof(word64)]; |
| wolfSSL | 15:117db924cf7c | 2675 | #endif |
| wolfSSL | 15:117db924cf7c | 2676 | |
| wolfSSL | 15:117db924cf7c | 2677 | if (sha384 == NULL || hash == NULL) { |
| wolfSSL | 15:117db924cf7c | 2678 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2679 | } |
| wolfSSL | 15:117db924cf7c | 2680 | |
| wolfSSL | 15:117db924cf7c | 2681 | #ifdef LITTLE_ENDIAN_ORDER |
| wolfSSL | 15:117db924cf7c | 2682 | ByteReverseWords64((word64*)digest, (word64*)sha384->digest, |
| wolfSSL | 15:117db924cf7c | 2683 | WC_SHA384_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 2684 | XMEMCPY(hash, digest, WC_SHA384_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 2685 | #else |
| wolfSSL | 15:117db924cf7c | 2686 | XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 2687 | #endif |
| wolfSSL | 15:117db924cf7c | 2688 | |
| wolfSSL | 15:117db924cf7c | 2689 | return 0; |
| wolfSSL | 15:117db924cf7c | 2690 | } |
| wolfSSL | 15:117db924cf7c | 2691 | |
| wolfSSL | 15:117db924cf7c | 2692 | int wc_Sha384Final(wc_Sha384* sha384, byte* hash) |
| wolfSSL | 15:117db924cf7c | 2693 | { |
| wolfSSL | 15:117db924cf7c | 2694 | int ret; |
| wolfSSL | 15:117db924cf7c | 2695 | |
| wolfSSL | 15:117db924cf7c | 2696 | if (sha384 == NULL || hash == NULL) { |
| wolfSSL | 15:117db924cf7c | 2697 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2698 | } |
| wolfSSL | 15:117db924cf7c | 2699 | |
| wolfSSL | 15:117db924cf7c | 2700 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) |
| wolfSSL | 15:117db924cf7c | 2701 | if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) { |
| wolfSSL | 15:117db924cf7c | 2702 | #if defined(HAVE_INTEL_QA) |
| wolfSSL | 15:117db924cf7c | 2703 | return IntelQaSymSha384(&sha384->asyncDev, hash, NULL, |
| wolfSSL | 15:117db924cf7c | 2704 | WC_SHA384_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 2705 | #endif |
| wolfSSL | 15:117db924cf7c | 2706 | } |
| wolfSSL | 15:117db924cf7c | 2707 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 2708 | |
| wolfSSL | 15:117db924cf7c | 2709 | ret = Sha512Final((wc_Sha512*)sha384); |
| wolfSSL | 15:117db924cf7c | 2710 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 2711 | return ret; |
| wolfSSL | 15:117db924cf7c | 2712 | |
| wolfSSL | 15:117db924cf7c | 2713 | XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE); |
| wolfSSL | 15:117db924cf7c | 2714 | |
| wolfSSL | 15:117db924cf7c | 2715 | return InitSha384(sha384); /* reset state */ |
| wolfSSL | 15:117db924cf7c | 2716 | } |
| wolfSSL | 15:117db924cf7c | 2717 | |
| wolfSSL | 15:117db924cf7c | 2718 | |
| wolfSSL | 15:117db924cf7c | 2719 | /* Hardware Acceleration */ |
| wolfSSL | 15:117db924cf7c | 2720 | #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) |
| wolfSSL | 15:117db924cf7c | 2721 | int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId) |
| wolfSSL | 15:117db924cf7c | 2722 | { |
| wolfSSL | 15:117db924cf7c | 2723 | int ret = InitSha384(sha384); |
| wolfSSL | 15:117db924cf7c | 2724 | |
| wolfSSL | 15:117db924cf7c | 2725 | (void)heap; |
| wolfSSL | 15:117db924cf7c | 2726 | (void)devId; |
| wolfSSL | 15:117db924cf7c | 2727 | |
| wolfSSL | 15:117db924cf7c | 2728 | Sha512_SetTransform(); |
| wolfSSL | 15:117db924cf7c | 2729 | |
| wolfSSL | 15:117db924cf7c | 2730 | return ret; |
| wolfSSL | 15:117db924cf7c | 2731 | } |
| wolfSSL | 15:117db924cf7c | 2732 | #else |
| wolfSSL | 15:117db924cf7c | 2733 | int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId) |
| wolfSSL | 15:117db924cf7c | 2734 | { |
| wolfSSL | 15:117db924cf7c | 2735 | int ret; |
| wolfSSL | 15:117db924cf7c | 2736 | |
| wolfSSL | 15:117db924cf7c | 2737 | if (sha384 == NULL) { |
| wolfSSL | 15:117db924cf7c | 2738 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2739 | } |
| wolfSSL | 15:117db924cf7c | 2740 | |
| wolfSSL | 15:117db924cf7c | 2741 | sha384->heap = heap; |
| wolfSSL | 15:117db924cf7c | 2742 | ret = InitSha384(sha384); |
| wolfSSL | 15:117db924cf7c | 2743 | if (ret != 0) |
| wolfSSL | 15:117db924cf7c | 2744 | return ret; |
| wolfSSL | 15:117db924cf7c | 2745 | |
| wolfSSL | 15:117db924cf7c | 2746 | #ifdef WOLFSSL_SMALL_STACK_CACHE |
| wolfSSL | 15:117db924cf7c | 2747 | sha384->W = NULL; |
| wolfSSL | 15:117db924cf7c | 2748 | #endif |
| wolfSSL | 15:117db924cf7c | 2749 | |
| wolfSSL | 15:117db924cf7c | 2750 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) |
| wolfSSL | 15:117db924cf7c | 2751 | ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384, |
| wolfSSL | 15:117db924cf7c | 2752 | sha384->heap, devId); |
| wolfSSL | 15:117db924cf7c | 2753 | #else |
| wolfSSL | 15:117db924cf7c | 2754 | (void)devId; |
| wolfSSL | 15:117db924cf7c | 2755 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 2756 | |
| wolfSSL | 15:117db924cf7c | 2757 | return ret; |
| wolfSSL | 15:117db924cf7c | 2758 | } |
| wolfSSL | 15:117db924cf7c | 2759 | #endif |
| wolfSSL | 15:117db924cf7c | 2760 | #endif /* WOLFSSL_IMX6_CAAM */ |
| wolfSSL | 15:117db924cf7c | 2761 | |
| wolfSSL | 15:117db924cf7c | 2762 | int wc_InitSha384(wc_Sha384* sha384) |
| wolfSSL | 15:117db924cf7c | 2763 | { |
| wolfSSL | 15:117db924cf7c | 2764 | return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID); |
| wolfSSL | 15:117db924cf7c | 2765 | } |
| wolfSSL | 15:117db924cf7c | 2766 | |
| wolfSSL | 15:117db924cf7c | 2767 | void wc_Sha384Free(wc_Sha384* sha384) |
| wolfSSL | 15:117db924cf7c | 2768 | { |
| wolfSSL | 15:117db924cf7c | 2769 | if (sha384 == NULL) |
| wolfSSL | 15:117db924cf7c | 2770 | return; |
| wolfSSL | 15:117db924cf7c | 2771 | |
| wolfSSL | 15:117db924cf7c | 2772 | #ifdef WOLFSSL_SMALL_STACK_CACHE |
| wolfSSL | 15:117db924cf7c | 2773 | if (sha384->W != NULL) { |
| wolfSSL | 15:117db924cf7c | 2774 | XFREE(sha384->W, NULL, DYNAMIC_TYPE_TMP_BUFFER); |
| wolfSSL | 15:117db924cf7c | 2775 | sha384->W = NULL; |
| wolfSSL | 15:117db924cf7c | 2776 | } |
| wolfSSL | 15:117db924cf7c | 2777 | #endif |
| wolfSSL | 15:117db924cf7c | 2778 | |
| wolfSSL | 15:117db924cf7c | 2779 | #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) |
| wolfSSL | 15:117db924cf7c | 2780 | wolfAsync_DevCtxFree(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384); |
| wolfSSL | 15:117db924cf7c | 2781 | #endif /* WOLFSSL_ASYNC_CRYPT */ |
| wolfSSL | 15:117db924cf7c | 2782 | } |
| wolfSSL | 15:117db924cf7c | 2783 | |
| wolfSSL | 15:117db924cf7c | 2784 | #endif /* WOLFSSL_SHA384 */ |
| wolfSSL | 15:117db924cf7c | 2785 | |
| wolfSSL | 15:117db924cf7c | 2786 | #endif /* HAVE_FIPS */ |
| wolfSSL | 15:117db924cf7c | 2787 | |
| wolfSSL | 15:117db924cf7c | 2788 | #ifdef WOLFSSL_SHA512 |
| wolfSSL | 15:117db924cf7c | 2789 | |
| wolfSSL | 15:117db924cf7c | 2790 | int wc_Sha512GetHash(wc_Sha512* sha512, byte* hash) |
| wolfSSL | 15:117db924cf7c | 2791 | { |
| wolfSSL | 15:117db924cf7c | 2792 | int ret; |
| wolfSSL | 15:117db924cf7c | 2793 | wc_Sha512 tmpSha512; |
| wolfSSL | 15:117db924cf7c | 2794 | |
| wolfSSL | 15:117db924cf7c | 2795 | if (sha512 == NULL || hash == NULL) |
| wolfSSL | 15:117db924cf7c | 2796 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2797 | |
| wolfSSL | 15:117db924cf7c | 2798 | ret = wc_Sha512Copy(sha512, &tmpSha512); |
| wolfSSL | 15:117db924cf7c | 2799 | if (ret == 0) { |
| wolfSSL | 15:117db924cf7c | 2800 | ret = wc_Sha512Final(&tmpSha512, hash); |
| wolfSSL | 15:117db924cf7c | 2801 | wc_Sha512Free(&tmpSha512); |
| wolfSSL | 15:117db924cf7c | 2802 | } |
| wolfSSL | 15:117db924cf7c | 2803 | return ret; |
| wolfSSL | 15:117db924cf7c | 2804 | } |
| wolfSSL | 15:117db924cf7c | 2805 | |
| wolfSSL | 15:117db924cf7c | 2806 | int wc_Sha512Copy(wc_Sha512* src, wc_Sha512* dst) |
| wolfSSL | 15:117db924cf7c | 2807 | { |
| wolfSSL | 15:117db924cf7c | 2808 | int ret = 0; |
| wolfSSL | 15:117db924cf7c | 2809 | |
| wolfSSL | 15:117db924cf7c | 2810 | if (src == NULL || dst == NULL) |
| wolfSSL | 15:117db924cf7c | 2811 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2812 | |
| wolfSSL | 15:117db924cf7c | 2813 | XMEMCPY(dst, src, sizeof(wc_Sha512)); |
| wolfSSL | 15:117db924cf7c | 2814 | #ifdef WOLFSSL_SMALL_STACK_CACHE |
| wolfSSL | 15:117db924cf7c | 2815 | dst->W = NULL; |
| wolfSSL | 15:117db924cf7c | 2816 | #endif |
| wolfSSL | 15:117db924cf7c | 2817 | |
| wolfSSL | 15:117db924cf7c | 2818 | #ifdef WOLFSSL_ASYNC_CRYPT |
| wolfSSL | 15:117db924cf7c | 2819 | ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); |
| wolfSSL | 15:117db924cf7c | 2820 | #endif |
| wolfSSL | 15:117db924cf7c | 2821 | |
| wolfSSL | 15:117db924cf7c | 2822 | return ret; |
| wolfSSL | 15:117db924cf7c | 2823 | } |
| wolfSSL | 15:117db924cf7c | 2824 | |
| wolfSSL | 15:117db924cf7c | 2825 | #endif /* WOLFSSL_SHA512 */ |
| wolfSSL | 15:117db924cf7c | 2826 | |
| wolfSSL | 15:117db924cf7c | 2827 | #ifdef WOLFSSL_SHA384 |
| wolfSSL | 15:117db924cf7c | 2828 | |
| wolfSSL | 15:117db924cf7c | 2829 | int wc_Sha384GetHash(wc_Sha384* sha384, byte* hash) |
| wolfSSL | 15:117db924cf7c | 2830 | { |
| wolfSSL | 15:117db924cf7c | 2831 | int ret; |
| wolfSSL | 15:117db924cf7c | 2832 | wc_Sha384 tmpSha384; |
| wolfSSL | 15:117db924cf7c | 2833 | |
| wolfSSL | 15:117db924cf7c | 2834 | if (sha384 == NULL || hash == NULL) |
| wolfSSL | 15:117db924cf7c | 2835 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2836 | |
| wolfSSL | 15:117db924cf7c | 2837 | ret = wc_Sha384Copy(sha384, &tmpSha384); |
| wolfSSL | 15:117db924cf7c | 2838 | if (ret == 0) { |
| wolfSSL | 15:117db924cf7c | 2839 | ret = wc_Sha384Final(&tmpSha384, hash); |
| wolfSSL | 15:117db924cf7c | 2840 | wc_Sha384Free(&tmpSha384); |
| wolfSSL | 15:117db924cf7c | 2841 | } |
| wolfSSL | 15:117db924cf7c | 2842 | return ret; |
| wolfSSL | 15:117db924cf7c | 2843 | } |
| wolfSSL | 15:117db924cf7c | 2844 | int wc_Sha384Copy(wc_Sha384* src, wc_Sha384* dst) |
| wolfSSL | 15:117db924cf7c | 2845 | { |
| wolfSSL | 15:117db924cf7c | 2846 | int ret = 0; |
| wolfSSL | 15:117db924cf7c | 2847 | |
| wolfSSL | 15:117db924cf7c | 2848 | if (src == NULL || dst == NULL) |
| wolfSSL | 15:117db924cf7c | 2849 | return BAD_FUNC_ARG; |
| wolfSSL | 15:117db924cf7c | 2850 | |
| wolfSSL | 15:117db924cf7c | 2851 | XMEMCPY(dst, src, sizeof(wc_Sha384)); |
| wolfSSL | 15:117db924cf7c | 2852 | #ifdef WOLFSSL_SMALL_STACK_CACHE |
| wolfSSL | 15:117db924cf7c | 2853 | dst->W = NULL; |
| wolfSSL | 15:117db924cf7c | 2854 | #endif |
| wolfSSL | 15:117db924cf7c | 2855 | |
| wolfSSL | 15:117db924cf7c | 2856 | #ifdef WOLFSSL_ASYNC_CRYPT |
| wolfSSL | 15:117db924cf7c | 2857 | ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); |
| wolfSSL | 15:117db924cf7c | 2858 | #endif |
| wolfSSL | 15:117db924cf7c | 2859 | |
| wolfSSL | 15:117db924cf7c | 2860 | return ret; |
| wolfSSL | 15:117db924cf7c | 2861 | } |
| wolfSSL | 15:117db924cf7c | 2862 | |
| wolfSSL | 15:117db924cf7c | 2863 | #endif /* WOLFSSL_SHA384 */ |
| wolfSSL | 15:117db924cf7c | 2864 | |
| wolfSSL | 15:117db924cf7c | 2865 | #endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */ |
| wolfSSL | 15:117db924cf7c | 2866 |