Xuyi Wang / wolfSSL

Dependents:   OS

Committer:
wolfSSL
Date:
Sat Aug 18 22:20:43 2018 +0000
Revision:
15:117db924cf7c
wolfSSL 3.15.3

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 15:117db924cf7c 1 /* sha256.c
wolfSSL 15:117db924cf7c 2 *
wolfSSL 15:117db924cf7c 3 * Copyright (C) 2006-2017 wolfSSL Inc.
wolfSSL 15:117db924cf7c 4 *
wolfSSL 15:117db924cf7c 5 * This file is part of wolfSSL.
wolfSSL 15:117db924cf7c 6 *
wolfSSL 15:117db924cf7c 7 * wolfSSL is free software; you can redistribute it and/or modify
wolfSSL 15:117db924cf7c 8 * it under the terms of the GNU General Public License as published by
wolfSSL 15:117db924cf7c 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 15:117db924cf7c 10 * (at your option) any later version.
wolfSSL 15:117db924cf7c 11 *
wolfSSL 15:117db924cf7c 12 * wolfSSL is distributed in the hope that it will be useful,
wolfSSL 15:117db924cf7c 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 15:117db924cf7c 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 15:117db924cf7c 15 * GNU General Public License for more details.
wolfSSL 15:117db924cf7c 16 *
wolfSSL 15:117db924cf7c 17 * You should have received a copy of the GNU General Public License
wolfSSL 15:117db924cf7c 18 * along with this program; if not, write to the Free Software
wolfSSL 15:117db924cf7c 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
wolfSSL 15:117db924cf7c 20 */
wolfSSL 15:117db924cf7c 21
wolfSSL 15:117db924cf7c 22
wolfSSL 15:117db924cf7c 23 /* code submitted by raphael.huck@efixo.com */
wolfSSL 15:117db924cf7c 24
wolfSSL 15:117db924cf7c 25 #ifdef HAVE_CONFIG_H
wolfSSL 15:117db924cf7c 26 #include <config.h>
wolfSSL 15:117db924cf7c 27 #endif
wolfSSL 15:117db924cf7c 28
wolfSSL 15:117db924cf7c 29 #include <wolfssl/wolfcrypt/settings.h>
wolfSSL 15:117db924cf7c 30
wolfSSL 15:117db924cf7c 31 #if !defined(NO_SHA256) && !defined(WOLFSSL_ARMASM)
wolfSSL 15:117db924cf7c 32
wolfSSL 15:117db924cf7c 33 #if defined(HAVE_FIPS) && \
wolfSSL 15:117db924cf7c 34 defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
wolfSSL 15:117db924cf7c 35
wolfSSL 15:117db924cf7c 36 /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
wolfSSL 15:117db924cf7c 37 #define FIPS_NO_WRAPPERS
wolfSSL 15:117db924cf7c 38
wolfSSL 15:117db924cf7c 39 #ifdef USE_WINDOWS_API
wolfSSL 15:117db924cf7c 40 #pragma code_seg(".fipsA$d")
wolfSSL 15:117db924cf7c 41 #pragma const_seg(".fipsB$d")
wolfSSL 15:117db924cf7c 42 #endif
wolfSSL 15:117db924cf7c 43 #endif
wolfSSL 15:117db924cf7c 44
wolfSSL 15:117db924cf7c 45 #include <wolfssl/wolfcrypt/sha256.h>
wolfSSL 15:117db924cf7c 46 #include <wolfssl/wolfcrypt/error-crypt.h>
wolfSSL 15:117db924cf7c 47 #include <wolfssl/wolfcrypt/cpuid.h>
wolfSSL 15:117db924cf7c 48
wolfSSL 15:117db924cf7c 49 /* fips wrapper calls, user can call direct */
wolfSSL 15:117db924cf7c 50 #if defined(HAVE_FIPS) && \
wolfSSL 15:117db924cf7c 51 (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
wolfSSL 15:117db924cf7c 52
wolfSSL 15:117db924cf7c 53 int wc_InitSha256(wc_Sha256* sha)
wolfSSL 15:117db924cf7c 54 {
wolfSSL 15:117db924cf7c 55 if (sha == NULL) {
wolfSSL 15:117db924cf7c 56 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 57 }
wolfSSL 15:117db924cf7c 58 return InitSha256_fips(sha);
wolfSSL 15:117db924cf7c 59 }
wolfSSL 15:117db924cf7c 60 int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId)
wolfSSL 15:117db924cf7c 61 {
wolfSSL 15:117db924cf7c 62 (void)heap;
wolfSSL 15:117db924cf7c 63 (void)devId;
wolfSSL 15:117db924cf7c 64 if (sha == NULL) {
wolfSSL 15:117db924cf7c 65 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 66 }
wolfSSL 15:117db924cf7c 67 return InitSha256_fips(sha);
wolfSSL 15:117db924cf7c 68 }
wolfSSL 15:117db924cf7c 69 int wc_Sha256Update(wc_Sha256* sha, const byte* data, word32 len)
wolfSSL 15:117db924cf7c 70 {
wolfSSL 15:117db924cf7c 71 if (sha == NULL || (data == NULL && len > 0)) {
wolfSSL 15:117db924cf7c 72 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 73 }
wolfSSL 15:117db924cf7c 74
wolfSSL 15:117db924cf7c 75 if (data == NULL && len == 0) {
wolfSSL 15:117db924cf7c 76 /* valid, but do nothing */
wolfSSL 15:117db924cf7c 77 return 0;
wolfSSL 15:117db924cf7c 78 }
wolfSSL 15:117db924cf7c 79
wolfSSL 15:117db924cf7c 80 return Sha256Update_fips(sha, data, len);
wolfSSL 15:117db924cf7c 81 }
wolfSSL 15:117db924cf7c 82 int wc_Sha256Final(wc_Sha256* sha, byte* out)
wolfSSL 15:117db924cf7c 83 {
wolfSSL 15:117db924cf7c 84 if (sha == NULL || out == NULL) {
wolfSSL 15:117db924cf7c 85 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 86 }
wolfSSL 15:117db924cf7c 87 return Sha256Final_fips(sha, out);
wolfSSL 15:117db924cf7c 88 }
wolfSSL 15:117db924cf7c 89 void wc_Sha256Free(wc_Sha256* sha)
wolfSSL 15:117db924cf7c 90 {
wolfSSL 15:117db924cf7c 91 (void)sha;
wolfSSL 15:117db924cf7c 92 /* Not supported in FIPS */
wolfSSL 15:117db924cf7c 93 }
wolfSSL 15:117db924cf7c 94
wolfSSL 15:117db924cf7c 95 #else /* else build without fips, or for FIPS v2 */
wolfSSL 15:117db924cf7c 96
wolfSSL 15:117db924cf7c 97
wolfSSL 15:117db924cf7c 98 #if defined(WOLFSSL_TI_HASH)
wolfSSL 15:117db924cf7c 99 /* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
wolfSSL 15:117db924cf7c 100 #else
wolfSSL 15:117db924cf7c 101
wolfSSL 15:117db924cf7c 102 #include <wolfssl/wolfcrypt/logging.h>
wolfSSL 15:117db924cf7c 103
wolfSSL 15:117db924cf7c 104 #ifdef NO_INLINE
wolfSSL 15:117db924cf7c 105 #include <wolfssl/wolfcrypt/misc.h>
wolfSSL 15:117db924cf7c 106 #else
wolfSSL 15:117db924cf7c 107 #define WOLFSSL_MISC_INCLUDED
wolfSSL 15:117db924cf7c 108 #include <wolfcrypt/src/misc.c>
wolfSSL 15:117db924cf7c 109 #endif
wolfSSL 15:117db924cf7c 110
wolfSSL 15:117db924cf7c 111
wolfSSL 15:117db924cf7c 112 #if defined(USE_INTEL_SPEEDUP)
wolfSSL 15:117db924cf7c 113 #define HAVE_INTEL_AVX1
wolfSSL 15:117db924cf7c 114
wolfSSL 15:117db924cf7c 115 #if defined(__GNUC__) && ((__GNUC__ < 4) || \
wolfSSL 15:117db924cf7c 116 (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
wolfSSL 15:117db924cf7c 117 #define NO_AVX2_SUPPORT
wolfSSL 15:117db924cf7c 118 #endif
wolfSSL 15:117db924cf7c 119 #if defined(__clang__) && ((__clang_major__ < 3) || \
wolfSSL 15:117db924cf7c 120 (__clang_major__ == 3 && __clang_minor__ <= 5))
wolfSSL 15:117db924cf7c 121 #define NO_AVX2_SUPPORT
wolfSSL 15:117db924cf7c 122 #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
wolfSSL 15:117db924cf7c 123 #undef NO_AVX2_SUPPORT
wolfSSL 15:117db924cf7c 124 #endif
wolfSSL 15:117db924cf7c 125
wolfSSL 15:117db924cf7c 126 #define HAVE_INTEL_AVX1
wolfSSL 15:117db924cf7c 127 #ifndef NO_AVX2_SUPPORT
wolfSSL 15:117db924cf7c 128 #define HAVE_INTEL_AVX2
wolfSSL 15:117db924cf7c 129 #endif
wolfSSL 15:117db924cf7c 130 #endif /* USE_INTEL_SPEEDUP */
wolfSSL 15:117db924cf7c 131
wolfSSL 15:117db924cf7c 132 #if defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 133 #define HAVE_INTEL_RORX
wolfSSL 15:117db924cf7c 134 #endif
wolfSSL 15:117db924cf7c 135
wolfSSL 15:117db924cf7c 136
wolfSSL 15:117db924cf7c 137 #if !defined(WOLFSSL_PIC32MZ_HASH) && !defined(STM32_HASH_SHA2) && \
wolfSSL 15:117db924cf7c 138 (!defined(WOLFSSL_IMX6_CAAM) || defined(NO_IMX6_CAAM_HASH))
wolfSSL 15:117db924cf7c 139 static int InitSha256(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 140 {
wolfSSL 15:117db924cf7c 141 int ret = 0;
wolfSSL 15:117db924cf7c 142
wolfSSL 15:117db924cf7c 143 if (sha256 == NULL)
wolfSSL 15:117db924cf7c 144 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 145
wolfSSL 15:117db924cf7c 146 XMEMSET(sha256->digest, 0, sizeof(sha256->digest));
wolfSSL 15:117db924cf7c 147 sha256->digest[0] = 0x6A09E667L;
wolfSSL 15:117db924cf7c 148 sha256->digest[1] = 0xBB67AE85L;
wolfSSL 15:117db924cf7c 149 sha256->digest[2] = 0x3C6EF372L;
wolfSSL 15:117db924cf7c 150 sha256->digest[3] = 0xA54FF53AL;
wolfSSL 15:117db924cf7c 151 sha256->digest[4] = 0x510E527FL;
wolfSSL 15:117db924cf7c 152 sha256->digest[5] = 0x9B05688CL;
wolfSSL 15:117db924cf7c 153 sha256->digest[6] = 0x1F83D9ABL;
wolfSSL 15:117db924cf7c 154 sha256->digest[7] = 0x5BE0CD19L;
wolfSSL 15:117db924cf7c 155
wolfSSL 15:117db924cf7c 156 sha256->buffLen = 0;
wolfSSL 15:117db924cf7c 157 sha256->loLen = 0;
wolfSSL 15:117db924cf7c 158 sha256->hiLen = 0;
wolfSSL 15:117db924cf7c 159
wolfSSL 15:117db924cf7c 160 return ret;
wolfSSL 15:117db924cf7c 161 }
wolfSSL 15:117db924cf7c 162 #endif
wolfSSL 15:117db924cf7c 163
wolfSSL 15:117db924cf7c 164
wolfSSL 15:117db924cf7c 165 /* Hardware Acceleration */
wolfSSL 15:117db924cf7c 166 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 167
wolfSSL 15:117db924cf7c 168 /* in case intel instructions aren't available, plus we need the K[] global */
wolfSSL 15:117db924cf7c 169 #define NEED_SOFT_SHA256
wolfSSL 15:117db924cf7c 170
wolfSSL 15:117db924cf7c 171 /*****
wolfSSL 15:117db924cf7c 172 Intel AVX1/AVX2 Macro Control Structure
wolfSSL 15:117db924cf7c 173
wolfSSL 15:117db924cf7c 174 #define HAVE_INTEL_AVX1
wolfSSL 15:117db924cf7c 175 #define HAVE_INTEL_AVX2
wolfSSL 15:117db924cf7c 176
wolfSSL 15:117db924cf7c 177 #define HAVE_INTEL_RORX
wolfSSL 15:117db924cf7c 178
wolfSSL 15:117db924cf7c 179
wolfSSL 15:117db924cf7c 180 int InitSha256(wc_Sha256* sha256) {
wolfSSL 15:117db924cf7c 181 Save/Recover XMM, YMM
wolfSSL 15:117db924cf7c 182 ...
wolfSSL 15:117db924cf7c 183 }
wolfSSL 15:117db924cf7c 184
wolfSSL 15:117db924cf7c 185 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 186 Transform_Sha256(); Function prototype
wolfSSL 15:117db924cf7c 187 #else
wolfSSL 15:117db924cf7c 188 Transform_Sha256() { }
wolfSSL 15:117db924cf7c 189 int Sha256Final() {
wolfSSL 15:117db924cf7c 190 Save/Recover XMM, YMM
wolfSSL 15:117db924cf7c 191 ...
wolfSSL 15:117db924cf7c 192 }
wolfSSL 15:117db924cf7c 193 #endif
wolfSSL 15:117db924cf7c 194
wolfSSL 15:117db924cf7c 195 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 196 #if defined(HAVE_INTEL_RORX
wolfSSL 15:117db924cf7c 197 #define RND with rorx instuction
wolfSSL 15:117db924cf7c 198 #else
wolfSSL 15:117db924cf7c 199 #define RND
wolfSSL 15:117db924cf7c 200 #endif
wolfSSL 15:117db924cf7c 201 #endif
wolfSSL 15:117db924cf7c 202
wolfSSL 15:117db924cf7c 203 #if defined(HAVE_INTEL_AVX1)
wolfSSL 15:117db924cf7c 204
wolfSSL 15:117db924cf7c 205 #define XMM Instructions/inline asm
wolfSSL 15:117db924cf7c 206
wolfSSL 15:117db924cf7c 207 int Transform_Sha256() {
wolfSSL 15:117db924cf7c 208 Stitched Message Sched/Round
wolfSSL 15:117db924cf7c 209 }
wolfSSL 15:117db924cf7c 210
wolfSSL 15:117db924cf7c 211 #elif defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 212
wolfSSL 15:117db924cf7c 213 #define YMM Instructions/inline asm
wolfSSL 15:117db924cf7c 214
wolfSSL 15:117db924cf7c 215 int Transform_Sha256() {
wolfSSL 15:117db924cf7c 216 More granural Stitched Message Sched/Round
wolfSSL 15:117db924cf7c 217 }
wolfSSL 15:117db924cf7c 218
wolfSSL 15:117db924cf7c 219 #endif
wolfSSL 15:117db924cf7c 220
wolfSSL 15:117db924cf7c 221 */
wolfSSL 15:117db924cf7c 222
wolfSSL 15:117db924cf7c 223 /* Each platform needs to query info type 1 from cpuid to see if aesni is
wolfSSL 15:117db924cf7c 224 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
wolfSSL 15:117db924cf7c 225 */
wolfSSL 15:117db924cf7c 226
wolfSSL 15:117db924cf7c 227 /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
wolfSSL 15:117db924cf7c 228 static int Transform_Sha256(wc_Sha256* sha256);
wolfSSL 15:117db924cf7c 229 #if defined(HAVE_INTEL_AVX1)
wolfSSL 15:117db924cf7c 230 static int Transform_Sha256_AVX1(wc_Sha256 *sha256);
wolfSSL 15:117db924cf7c 231 static int Transform_Sha256_AVX1_Len(wc_Sha256* sha256, word32 len);
wolfSSL 15:117db924cf7c 232 #endif
wolfSSL 15:117db924cf7c 233 #if defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 234 static int Transform_Sha256_AVX2(wc_Sha256 *sha256);
wolfSSL 15:117db924cf7c 235 static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, word32 len);
wolfSSL 15:117db924cf7c 236 #ifdef HAVE_INTEL_RORX
wolfSSL 15:117db924cf7c 237 static int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256);
wolfSSL 15:117db924cf7c 238 static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, word32 len);
wolfSSL 15:117db924cf7c 239 static int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256);
wolfSSL 15:117db924cf7c 240 static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, word32 len);
wolfSSL 15:117db924cf7c 241 #endif
wolfSSL 15:117db924cf7c 242 #endif
wolfSSL 15:117db924cf7c 243 static int (*Transform_Sha256_p)(wc_Sha256* sha256);
wolfSSL 15:117db924cf7c 244 /* = _Transform_Sha256 */
wolfSSL 15:117db924cf7c 245 static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, word32 len);
wolfSSL 15:117db924cf7c 246 /* = NULL */
wolfSSL 15:117db924cf7c 247 static int transform_check = 0;
wolfSSL 15:117db924cf7c 248 static word32 intel_flags;
wolfSSL 15:117db924cf7c 249 #define XTRANSFORM(S) (*Transform_Sha256_p)((S))
wolfSSL 15:117db924cf7c 250 #define XTRANSFORM_LEN(S, L) (*Transform_Sha256_Len_p)((S),(L))
wolfSSL 15:117db924cf7c 251
wolfSSL 15:117db924cf7c 252 static void Sha256_SetTransform(void)
wolfSSL 15:117db924cf7c 253 {
wolfSSL 15:117db924cf7c 254
wolfSSL 15:117db924cf7c 255 if (transform_check)
wolfSSL 15:117db924cf7c 256 return;
wolfSSL 15:117db924cf7c 257
wolfSSL 15:117db924cf7c 258 intel_flags = cpuid_get_flags();
wolfSSL 15:117db924cf7c 259
wolfSSL 15:117db924cf7c 260 #ifdef HAVE_INTEL_AVX2
wolfSSL 15:117db924cf7c 261 if (IS_INTEL_AVX2(intel_flags)) {
wolfSSL 15:117db924cf7c 262 #ifdef HAVE_INTEL_RORX
wolfSSL 15:117db924cf7c 263 if (IS_INTEL_BMI2(intel_flags)) {
wolfSSL 15:117db924cf7c 264 Transform_Sha256_p = Transform_Sha256_AVX2_RORX;
wolfSSL 15:117db924cf7c 265 Transform_Sha256_Len_p = Transform_Sha256_AVX2_RORX_Len;
wolfSSL 15:117db924cf7c 266 }
wolfSSL 15:117db924cf7c 267 else
wolfSSL 15:117db924cf7c 268 #endif
wolfSSL 15:117db924cf7c 269 if (1)
wolfSSL 15:117db924cf7c 270 {
wolfSSL 15:117db924cf7c 271 Transform_Sha256_p = Transform_Sha256_AVX2;
wolfSSL 15:117db924cf7c 272 Transform_Sha256_Len_p = Transform_Sha256_AVX2_Len;
wolfSSL 15:117db924cf7c 273 }
wolfSSL 15:117db924cf7c 274 #ifdef HAVE_INTEL_RORX
wolfSSL 15:117db924cf7c 275 else {
wolfSSL 15:117db924cf7c 276 Transform_Sha256_p = Transform_Sha256_AVX1_RORX;
wolfSSL 15:117db924cf7c 277 Transform_Sha256_Len_p = Transform_Sha256_AVX1_RORX_Len;
wolfSSL 15:117db924cf7c 278 }
wolfSSL 15:117db924cf7c 279 #endif
wolfSSL 15:117db924cf7c 280 }
wolfSSL 15:117db924cf7c 281 else
wolfSSL 15:117db924cf7c 282 #endif
wolfSSL 15:117db924cf7c 283 #ifdef HAVE_INTEL_AVX1
wolfSSL 15:117db924cf7c 284 if (IS_INTEL_AVX1(intel_flags)) {
wolfSSL 15:117db924cf7c 285 Transform_Sha256_p = Transform_Sha256_AVX1;
wolfSSL 15:117db924cf7c 286 Transform_Sha256_Len_p = Transform_Sha256_AVX1_Len;
wolfSSL 15:117db924cf7c 287 }
wolfSSL 15:117db924cf7c 288 else
wolfSSL 15:117db924cf7c 289 #endif
wolfSSL 15:117db924cf7c 290 {
wolfSSL 15:117db924cf7c 291 Transform_Sha256_p = Transform_Sha256;
wolfSSL 15:117db924cf7c 292 Transform_Sha256_Len_p = NULL;
wolfSSL 15:117db924cf7c 293 }
wolfSSL 15:117db924cf7c 294
wolfSSL 15:117db924cf7c 295 transform_check = 1;
wolfSSL 15:117db924cf7c 296 }
wolfSSL 15:117db924cf7c 297
wolfSSL 15:117db924cf7c 298 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
wolfSSL 15:117db924cf7c 299 {
wolfSSL 15:117db924cf7c 300 int ret = 0;
wolfSSL 15:117db924cf7c 301 if (sha256 == NULL)
wolfSSL 15:117db924cf7c 302 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 303
wolfSSL 15:117db924cf7c 304 sha256->heap = heap;
wolfSSL 15:117db924cf7c 305
wolfSSL 15:117db924cf7c 306 ret = InitSha256(sha256);
wolfSSL 15:117db924cf7c 307 if (ret != 0)
wolfSSL 15:117db924cf7c 308 return ret;
wolfSSL 15:117db924cf7c 309
wolfSSL 15:117db924cf7c 310 /* choose best Transform function under this runtime environment */
wolfSSL 15:117db924cf7c 311 Sha256_SetTransform();
wolfSSL 15:117db924cf7c 312
wolfSSL 15:117db924cf7c 313 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 15:117db924cf7c 314 ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
wolfSSL 15:117db924cf7c 315 WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
wolfSSL 15:117db924cf7c 316 #else
wolfSSL 15:117db924cf7c 317 (void)devId;
wolfSSL 15:117db924cf7c 318 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 319
wolfSSL 15:117db924cf7c 320 return ret;
wolfSSL 15:117db924cf7c 321 }
wolfSSL 15:117db924cf7c 322
wolfSSL 15:117db924cf7c 323 #elif defined(FREESCALE_LTC_SHA)
wolfSSL 15:117db924cf7c 324 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
wolfSSL 15:117db924cf7c 325 {
wolfSSL 15:117db924cf7c 326 (void)heap;
wolfSSL 15:117db924cf7c 327 (void)devId;
wolfSSL 15:117db924cf7c 328
wolfSSL 15:117db924cf7c 329 LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
wolfSSL 15:117db924cf7c 330
wolfSSL 15:117db924cf7c 331 return 0;
wolfSSL 15:117db924cf7c 332 }
wolfSSL 15:117db924cf7c 333
wolfSSL 15:117db924cf7c 334 #elif defined(FREESCALE_MMCAU_SHA)
wolfSSL 15:117db924cf7c 335
wolfSSL 15:117db924cf7c 336 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
wolfSSL 15:117db924cf7c 337 #include "cau_api.h"
wolfSSL 15:117db924cf7c 338 #else
wolfSSL 15:117db924cf7c 339 #include "fsl_mmcau.h"
wolfSSL 15:117db924cf7c 340 #endif
wolfSSL 15:117db924cf7c 341
wolfSSL 15:117db924cf7c 342 #define XTRANSFORM(S) Transform_Sha256((S))
wolfSSL 15:117db924cf7c 343 #define XTRANSFORM_LEN(S,L) Transform_Sha256_Len((S),(L))
wolfSSL 15:117db924cf7c 344
wolfSSL 15:117db924cf7c 345 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
wolfSSL 15:117db924cf7c 346 {
wolfSSL 15:117db924cf7c 347 int ret = 0;
wolfSSL 15:117db924cf7c 348
wolfSSL 15:117db924cf7c 349 (void)heap;
wolfSSL 15:117db924cf7c 350 (void)devId;
wolfSSL 15:117db924cf7c 351
wolfSSL 15:117db924cf7c 352 ret = wolfSSL_CryptHwMutexLock();
wolfSSL 15:117db924cf7c 353 if (ret != 0) {
wolfSSL 15:117db924cf7c 354 return ret;
wolfSSL 15:117db924cf7c 355 }
wolfSSL 15:117db924cf7c 356 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
wolfSSL 15:117db924cf7c 357 cau_sha256_initialize_output(sha256->digest);
wolfSSL 15:117db924cf7c 358 #else
wolfSSL 15:117db924cf7c 359 MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
wolfSSL 15:117db924cf7c 360 #endif
wolfSSL 15:117db924cf7c 361 wolfSSL_CryptHwMutexUnLock();
wolfSSL 15:117db924cf7c 362
wolfSSL 15:117db924cf7c 363 sha256->buffLen = 0;
wolfSSL 15:117db924cf7c 364 sha256->loLen = 0;
wolfSSL 15:117db924cf7c 365 sha256->hiLen = 0;
wolfSSL 15:117db924cf7c 366
wolfSSL 15:117db924cf7c 367 return ret;
wolfSSL 15:117db924cf7c 368 }
wolfSSL 15:117db924cf7c 369
wolfSSL 15:117db924cf7c 370 static int Transform_Sha256(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 371 {
wolfSSL 15:117db924cf7c 372 int ret = wolfSSL_CryptHwMutexLock();
wolfSSL 15:117db924cf7c 373 if (ret == 0) {
wolfSSL 15:117db924cf7c 374 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
wolfSSL 15:117db924cf7c 375 cau_sha256_hash_n((byte*)sha256->buffer, 1, sha256->digest);
wolfSSL 15:117db924cf7c 376 #else
wolfSSL 15:117db924cf7c 377 MMCAU_SHA256_HashN((byte*)sha256->buffer, 1, sha256->digest);
wolfSSL 15:117db924cf7c 378 #endif
wolfSSL 15:117db924cf7c 379 wolfSSL_CryptHwMutexUnLock();
wolfSSL 15:117db924cf7c 380 }
wolfSSL 15:117db924cf7c 381 return ret;
wolfSSL 15:117db924cf7c 382 }
wolfSSL 15:117db924cf7c 383
wolfSSL 15:117db924cf7c 384 #elif defined(WOLFSSL_PIC32MZ_HASH)
wolfSSL 15:117db924cf7c 385 #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
wolfSSL 15:117db924cf7c 386
wolfSSL 15:117db924cf7c 387 #elif defined(STM32_HASH_SHA2)
wolfSSL 15:117db924cf7c 388
wolfSSL 15:117db924cf7c 389 /* Supports CubeMX HAL or Standard Peripheral Library */
wolfSSL 15:117db924cf7c 390
wolfSSL 15:117db924cf7c 391 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
wolfSSL 15:117db924cf7c 392 {
wolfSSL 15:117db924cf7c 393 if (sha256 == NULL)
wolfSSL 15:117db924cf7c 394 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 395
wolfSSL 15:117db924cf7c 396 (void)devId;
wolfSSL 15:117db924cf7c 397 (void)heap;
wolfSSL 15:117db924cf7c 398
wolfSSL 15:117db924cf7c 399 wc_Stm32_Hash_Init(&sha256->stmCtx);
wolfSSL 15:117db924cf7c 400 return 0;
wolfSSL 15:117db924cf7c 401 }
wolfSSL 15:117db924cf7c 402
wolfSSL 15:117db924cf7c 403 int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
wolfSSL 15:117db924cf7c 404 {
wolfSSL 15:117db924cf7c 405 int ret = 0;
wolfSSL 15:117db924cf7c 406
wolfSSL 15:117db924cf7c 407 if (sha256 == NULL || (data == NULL && len > 0)) {
wolfSSL 15:117db924cf7c 408 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 409 }
wolfSSL 15:117db924cf7c 410
wolfSSL 15:117db924cf7c 411 ret = wolfSSL_CryptHwMutexLock();
wolfSSL 15:117db924cf7c 412 if (ret == 0) {
wolfSSL 15:117db924cf7c 413 ret = wc_Stm32_Hash_Update(&sha256->stmCtx,
wolfSSL 15:117db924cf7c 414 HASH_AlgoSelection_SHA256, data, len);
wolfSSL 15:117db924cf7c 415 wolfSSL_CryptHwMutexUnLock();
wolfSSL 15:117db924cf7c 416 }
wolfSSL 15:117db924cf7c 417 return ret;
wolfSSL 15:117db924cf7c 418 }
wolfSSL 15:117db924cf7c 419
wolfSSL 15:117db924cf7c 420 int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
wolfSSL 15:117db924cf7c 421 {
wolfSSL 15:117db924cf7c 422 int ret = 0;
wolfSSL 15:117db924cf7c 423
wolfSSL 15:117db924cf7c 424 if (sha256 == NULL || hash == NULL) {
wolfSSL 15:117db924cf7c 425 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 426 }
wolfSSL 15:117db924cf7c 427
wolfSSL 15:117db924cf7c 428 ret = wolfSSL_CryptHwMutexLock();
wolfSSL 15:117db924cf7c 429 if (ret == 0) {
wolfSSL 15:117db924cf7c 430 ret = wc_Stm32_Hash_Final(&sha256->stmCtx,
wolfSSL 15:117db924cf7c 431 HASH_AlgoSelection_SHA256, hash, WC_SHA256_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 432 wolfSSL_CryptHwMutexUnLock();
wolfSSL 15:117db924cf7c 433 }
wolfSSL 15:117db924cf7c 434
wolfSSL 15:117db924cf7c 435 (void)wc_InitSha256(sha256); /* reset state */
wolfSSL 15:117db924cf7c 436
wolfSSL 15:117db924cf7c 437 return ret;
wolfSSL 15:117db924cf7c 438 }
wolfSSL 15:117db924cf7c 439
wolfSSL 15:117db924cf7c 440 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
wolfSSL 15:117db924cf7c 441 /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
wolfSSL 15:117db924cf7c 442 #else
wolfSSL 15:117db924cf7c 443 #define NEED_SOFT_SHA256
wolfSSL 15:117db924cf7c 444
wolfSSL 15:117db924cf7c 445 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
wolfSSL 15:117db924cf7c 446 {
wolfSSL 15:117db924cf7c 447 int ret = 0;
wolfSSL 15:117db924cf7c 448 if (sha256 == NULL)
wolfSSL 15:117db924cf7c 449 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 450
wolfSSL 15:117db924cf7c 451 sha256->heap = heap;
wolfSSL 15:117db924cf7c 452
wolfSSL 15:117db924cf7c 453 ret = InitSha256(sha256);
wolfSSL 15:117db924cf7c 454 if (ret != 0)
wolfSSL 15:117db924cf7c 455 return ret;
wolfSSL 15:117db924cf7c 456
wolfSSL 15:117db924cf7c 457 #ifdef WOLFSSL_SMALL_STACK_CACHE
wolfSSL 15:117db924cf7c 458 sha256->W = NULL;
wolfSSL 15:117db924cf7c 459 #endif
wolfSSL 15:117db924cf7c 460
wolfSSL 15:117db924cf7c 461 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 15:117db924cf7c 462 ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
wolfSSL 15:117db924cf7c 463 WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
wolfSSL 15:117db924cf7c 464 #else
wolfSSL 15:117db924cf7c 465 (void)devId;
wolfSSL 15:117db924cf7c 466 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 467
wolfSSL 15:117db924cf7c 468 return ret;
wolfSSL 15:117db924cf7c 469 }
wolfSSL 15:117db924cf7c 470 #endif /* End Hardware Acceleration */
wolfSSL 15:117db924cf7c 471
wolfSSL 15:117db924cf7c 472 #ifdef NEED_SOFT_SHA256
wolfSSL 15:117db924cf7c 473
wolfSSL 15:117db924cf7c 474 static const ALIGN32 word32 K[64] = {
wolfSSL 15:117db924cf7c 475 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
wolfSSL 15:117db924cf7c 476 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
wolfSSL 15:117db924cf7c 477 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
wolfSSL 15:117db924cf7c 478 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
wolfSSL 15:117db924cf7c 479 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
wolfSSL 15:117db924cf7c 480 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
wolfSSL 15:117db924cf7c 481 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
wolfSSL 15:117db924cf7c 482 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
wolfSSL 15:117db924cf7c 483 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
wolfSSL 15:117db924cf7c 484 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
wolfSSL 15:117db924cf7c 485 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
wolfSSL 15:117db924cf7c 486 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
wolfSSL 15:117db924cf7c 487 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
wolfSSL 15:117db924cf7c 488 };
wolfSSL 15:117db924cf7c 489
wolfSSL 15:117db924cf7c 490 #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
wolfSSL 15:117db924cf7c 491 #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
wolfSSL 15:117db924cf7c 492 #define R(x, n) (((x) & 0xFFFFFFFFU) >> (n))
wolfSSL 15:117db924cf7c 493
wolfSSL 15:117db924cf7c 494 #define S(x, n) rotrFixed(x, n)
wolfSSL 15:117db924cf7c 495 #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
wolfSSL 15:117db924cf7c 496 #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
wolfSSL 15:117db924cf7c 497 #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
wolfSSL 15:117db924cf7c 498 #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
wolfSSL 15:117db924cf7c 499
wolfSSL 15:117db924cf7c 500 #define a(i) S[(0-i) & 7]
wolfSSL 15:117db924cf7c 501 #define b(i) S[(1-i) & 7]
wolfSSL 15:117db924cf7c 502 #define c(i) S[(2-i) & 7]
wolfSSL 15:117db924cf7c 503 #define d(i) S[(3-i) & 7]
wolfSSL 15:117db924cf7c 504 #define e(i) S[(4-i) & 7]
wolfSSL 15:117db924cf7c 505 #define f(i) S[(5-i) & 7]
wolfSSL 15:117db924cf7c 506 #define g(i) S[(6-i) & 7]
wolfSSL 15:117db924cf7c 507 #define h(i) S[(7-i) & 7]
wolfSSL 15:117db924cf7c 508
wolfSSL 15:117db924cf7c 509 #define RND(j) \
wolfSSL 15:117db924cf7c 510 t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \
wolfSSL 15:117db924cf7c 511 t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
wolfSSL 15:117db924cf7c 512 d(j) += t0; \
wolfSSL 15:117db924cf7c 513 h(j) = t0 + t1
wolfSSL 15:117db924cf7c 514
wolfSSL 15:117db924cf7c 515 #ifndef XTRANSFORM
wolfSSL 15:117db924cf7c 516 #define XTRANSFORM(S) Transform_Sha256((S))
wolfSSL 15:117db924cf7c 517 #define XTRANSFORM_LEN(S,L) Transform_Sha256_Len((S),(L))
wolfSSL 15:117db924cf7c 518 #endif
wolfSSL 15:117db924cf7c 519
wolfSSL 15:117db924cf7c 520 static int Transform_Sha256(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 521 {
wolfSSL 15:117db924cf7c 522 word32 S[8], t0, t1;
wolfSSL 15:117db924cf7c 523 int i;
wolfSSL 15:117db924cf7c 524
wolfSSL 15:117db924cf7c 525 #ifdef WOLFSSL_SMALL_STACK_CACHE
wolfSSL 15:117db924cf7c 526 word32* W = sha256->W;
wolfSSL 15:117db924cf7c 527 if (W == NULL) {
wolfSSL 15:117db924cf7c 528 W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL,
wolfSSL 15:117db924cf7c 529 DYNAMIC_TYPE_RNG);
wolfSSL 15:117db924cf7c 530 if (W == NULL)
wolfSSL 15:117db924cf7c 531 return MEMORY_E;
wolfSSL 15:117db924cf7c 532 sha256->W = W;
wolfSSL 15:117db924cf7c 533 }
wolfSSL 15:117db924cf7c 534 #elif defined(WOLFSSL_SMALL_STACK)
wolfSSL 15:117db924cf7c 535 word32* W;
wolfSSL 15:117db924cf7c 536 W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL,
wolfSSL 15:117db924cf7c 537 DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 15:117db924cf7c 538 if (W == NULL)
wolfSSL 15:117db924cf7c 539 return MEMORY_E;
wolfSSL 15:117db924cf7c 540 #else
wolfSSL 15:117db924cf7c 541 word32 W[WC_SHA256_BLOCK_SIZE];
wolfSSL 15:117db924cf7c 542 #endif
wolfSSL 15:117db924cf7c 543
wolfSSL 15:117db924cf7c 544 /* Copy context->state[] to working vars */
wolfSSL 15:117db924cf7c 545 for (i = 0; i < 8; i++)
wolfSSL 15:117db924cf7c 546 S[i] = sha256->digest[i];
wolfSSL 15:117db924cf7c 547
wolfSSL 15:117db924cf7c 548 for (i = 0; i < 16; i++)
wolfSSL 15:117db924cf7c 549 W[i] = sha256->buffer[i];
wolfSSL 15:117db924cf7c 550
wolfSSL 15:117db924cf7c 551 for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++)
wolfSSL 15:117db924cf7c 552 W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
wolfSSL 15:117db924cf7c 553
wolfSSL 15:117db924cf7c 554 #ifdef USE_SLOW_SHA256
wolfSSL 15:117db924cf7c 555 /* not unrolled - ~2k smaller and ~25% slower */
wolfSSL 15:117db924cf7c 556 for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
wolfSSL 15:117db924cf7c 557 int j;
wolfSSL 15:117db924cf7c 558 for (j = 0; j < 8; j++) { /* braces needed here for macros {} */
wolfSSL 15:117db924cf7c 559 RND(j);
wolfSSL 15:117db924cf7c 560 }
wolfSSL 15:117db924cf7c 561 }
wolfSSL 15:117db924cf7c 562 #else
wolfSSL 15:117db924cf7c 563 /* partially loop unrolled */
wolfSSL 15:117db924cf7c 564 for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
wolfSSL 15:117db924cf7c 565 RND(0); RND(1); RND(2); RND(3);
wolfSSL 15:117db924cf7c 566 RND(4); RND(5); RND(6); RND(7);
wolfSSL 15:117db924cf7c 567 }
wolfSSL 15:117db924cf7c 568 #endif /* USE_SLOW_SHA256 */
wolfSSL 15:117db924cf7c 569
wolfSSL 15:117db924cf7c 570 /* Add the working vars back into digest state[] */
wolfSSL 15:117db924cf7c 571 for (i = 0; i < 8; i++) {
wolfSSL 15:117db924cf7c 572 sha256->digest[i] += S[i];
wolfSSL 15:117db924cf7c 573 }
wolfSSL 15:117db924cf7c 574
wolfSSL 15:117db924cf7c 575 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE)
wolfSSL 15:117db924cf7c 576 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 15:117db924cf7c 577 #endif
wolfSSL 15:117db924cf7c 578 return 0;
wolfSSL 15:117db924cf7c 579 }
wolfSSL 15:117db924cf7c 580 #endif
wolfSSL 15:117db924cf7c 581 /* End wc_ software implementation */
wolfSSL 15:117db924cf7c 582
wolfSSL 15:117db924cf7c 583
wolfSSL 15:117db924cf7c 584 #ifdef XTRANSFORM
wolfSSL 15:117db924cf7c 585
wolfSSL 15:117db924cf7c 586 static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len)
wolfSSL 15:117db924cf7c 587 {
wolfSSL 15:117db924cf7c 588 word32 tmp = sha256->loLen;
wolfSSL 15:117db924cf7c 589 if ((sha256->loLen += len) < tmp)
wolfSSL 15:117db924cf7c 590 sha256->hiLen++; /* carry low to high */
wolfSSL 15:117db924cf7c 591 }
wolfSSL 15:117db924cf7c 592
wolfSSL 15:117db924cf7c 593 static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
wolfSSL 15:117db924cf7c 594 {
wolfSSL 15:117db924cf7c 595 int ret = 0;
wolfSSL 15:117db924cf7c 596 byte* local;
wolfSSL 15:117db924cf7c 597
wolfSSL 15:117db924cf7c 598 if (sha256 == NULL || (data == NULL && len > 0)) {
wolfSSL 15:117db924cf7c 599 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 600 }
wolfSSL 15:117db924cf7c 601
wolfSSL 15:117db924cf7c 602 if (data == NULL && len == 0) {
wolfSSL 15:117db924cf7c 603 /* valid, but do nothing */
wolfSSL 15:117db924cf7c 604 return 0;
wolfSSL 15:117db924cf7c 605 }
wolfSSL 15:117db924cf7c 606
wolfSSL 15:117db924cf7c 607 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 15:117db924cf7c 608 if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
wolfSSL 15:117db924cf7c 609 #if defined(HAVE_INTEL_QA)
wolfSSL 15:117db924cf7c 610 return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
wolfSSL 15:117db924cf7c 611 #endif
wolfSSL 15:117db924cf7c 612 }
wolfSSL 15:117db924cf7c 613 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 614
wolfSSL 15:117db924cf7c 615 /* do block size increments */
wolfSSL 15:117db924cf7c 616 local = (byte*)sha256->buffer;
wolfSSL 15:117db924cf7c 617
wolfSSL 15:117db924cf7c 618 /* check that internal buffLen is valid */
wolfSSL 15:117db924cf7c 619 if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE)
wolfSSL 15:117db924cf7c 620 return BUFFER_E;
wolfSSL 15:117db924cf7c 621
wolfSSL 15:117db924cf7c 622 if (sha256->buffLen > 0) {
wolfSSL 15:117db924cf7c 623 word32 add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
wolfSSL 15:117db924cf7c 624 XMEMCPY(&local[sha256->buffLen], data, add);
wolfSSL 15:117db924cf7c 625
wolfSSL 15:117db924cf7c 626 sha256->buffLen += add;
wolfSSL 15:117db924cf7c 627 data += add;
wolfSSL 15:117db924cf7c 628 len -= add;
wolfSSL 15:117db924cf7c 629
wolfSSL 15:117db924cf7c 630 if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) {
wolfSSL 15:117db924cf7c 631 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 15:117db924cf7c 632 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 633 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
wolfSSL 15:117db924cf7c 634 #endif
wolfSSL 15:117db924cf7c 635 {
wolfSSL 15:117db924cf7c 636 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 15:117db924cf7c 637 WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 638 }
wolfSSL 15:117db924cf7c 639 #endif
wolfSSL 15:117db924cf7c 640 ret = XTRANSFORM(sha256);
wolfSSL 15:117db924cf7c 641 if (ret == 0) {
wolfSSL 15:117db924cf7c 642 AddLength(sha256, WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 643 sha256->buffLen = 0;
wolfSSL 15:117db924cf7c 644 }
wolfSSL 15:117db924cf7c 645 else
wolfSSL 15:117db924cf7c 646 len = 0;
wolfSSL 15:117db924cf7c 647 }
wolfSSL 15:117db924cf7c 648 }
wolfSSL 15:117db924cf7c 649
wolfSSL 15:117db924cf7c 650 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 651 if (Transform_Sha256_Len_p != NULL) {
wolfSSL 15:117db924cf7c 652 word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
wolfSSL 15:117db924cf7c 653
wolfSSL 15:117db924cf7c 654 if (blocksLen > 0) {
wolfSSL 15:117db924cf7c 655 AddLength(sha256, blocksLen);
wolfSSL 15:117db924cf7c 656 sha256->data = data;
wolfSSL 15:117db924cf7c 657 /* Byte reversal performed in function if required. */
wolfSSL 15:117db924cf7c 658 XTRANSFORM_LEN(sha256, blocksLen);
wolfSSL 15:117db924cf7c 659 data += blocksLen;
wolfSSL 15:117db924cf7c 660 len -= blocksLen;
wolfSSL 15:117db924cf7c 661 }
wolfSSL 15:117db924cf7c 662 }
wolfSSL 15:117db924cf7c 663 else
wolfSSL 15:117db924cf7c 664 #endif
wolfSSL 15:117db924cf7c 665 #if !defined(LITTLE_ENDIAN_ORDER) || defined(FREESCALE_MMCAU_SHA) || \
wolfSSL 15:117db924cf7c 666 defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 667 {
wolfSSL 15:117db924cf7c 668 word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
wolfSSL 15:117db924cf7c 669
wolfSSL 15:117db924cf7c 670 AddLength(sha256, blocksLen);
wolfSSL 15:117db924cf7c 671 while (len >= WC_SHA256_BLOCK_SIZE) {
wolfSSL 15:117db924cf7c 672 XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 673
wolfSSL 15:117db924cf7c 674 data += WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 675 len -= WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 676
wolfSSL 15:117db924cf7c 677 /* Byte reversal performed in function if required. */
wolfSSL 15:117db924cf7c 678 ret = XTRANSFORM(sha256);
wolfSSL 15:117db924cf7c 679 if (ret != 0)
wolfSSL 15:117db924cf7c 680 break;
wolfSSL 15:117db924cf7c 681 }
wolfSSL 15:117db924cf7c 682 }
wolfSSL 15:117db924cf7c 683 #else
wolfSSL 15:117db924cf7c 684 {
wolfSSL 15:117db924cf7c 685 word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
wolfSSL 15:117db924cf7c 686
wolfSSL 15:117db924cf7c 687 AddLength(sha256, blocksLen);
wolfSSL 15:117db924cf7c 688 while (len >= WC_SHA256_BLOCK_SIZE) {
wolfSSL 15:117db924cf7c 689 XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 690
wolfSSL 15:117db924cf7c 691 data += WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 692 len -= WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 693
wolfSSL 15:117db924cf7c 694 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 15:117db924cf7c 695 WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 696 ret = XTRANSFORM(sha256);
wolfSSL 15:117db924cf7c 697 if (ret != 0)
wolfSSL 15:117db924cf7c 698 break;
wolfSSL 15:117db924cf7c 699 }
wolfSSL 15:117db924cf7c 700 }
wolfSSL 15:117db924cf7c 701 #endif
wolfSSL 15:117db924cf7c 702
wolfSSL 15:117db924cf7c 703 if (len > 0) {
wolfSSL 15:117db924cf7c 704 XMEMCPY(local, data, len);
wolfSSL 15:117db924cf7c 705 sha256->buffLen = len;
wolfSSL 15:117db924cf7c 706 }
wolfSSL 15:117db924cf7c 707
wolfSSL 15:117db924cf7c 708 return ret;
wolfSSL 15:117db924cf7c 709 }
wolfSSL 15:117db924cf7c 710
wolfSSL 15:117db924cf7c 711 int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
wolfSSL 15:117db924cf7c 712 {
wolfSSL 15:117db924cf7c 713 return Sha256Update(sha256, data, len);
wolfSSL 15:117db924cf7c 714 }
wolfSSL 15:117db924cf7c 715
wolfSSL 15:117db924cf7c 716 static WC_INLINE int Sha256Final(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 717 {
wolfSSL 15:117db924cf7c 718
wolfSSL 15:117db924cf7c 719 int ret;
wolfSSL 15:117db924cf7c 720 byte* local = (byte*)sha256->buffer;
wolfSSL 15:117db924cf7c 721
wolfSSL 15:117db924cf7c 722 if (sha256 == NULL) {
wolfSSL 15:117db924cf7c 723 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 724 }
wolfSSL 15:117db924cf7c 725
wolfSSL 15:117db924cf7c 726 AddLength(sha256, sha256->buffLen); /* before adding pads */
wolfSSL 15:117db924cf7c 727 local[sha256->buffLen++] = 0x80; /* add 1 */
wolfSSL 15:117db924cf7c 728
wolfSSL 15:117db924cf7c 729 /* pad with zeros */
wolfSSL 15:117db924cf7c 730 if (sha256->buffLen > WC_SHA256_PAD_SIZE) {
wolfSSL 15:117db924cf7c 731 XMEMSET(&local[sha256->buffLen], 0,
wolfSSL 15:117db924cf7c 732 WC_SHA256_BLOCK_SIZE - sha256->buffLen);
wolfSSL 15:117db924cf7c 733 sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
wolfSSL 15:117db924cf7c 734
wolfSSL 15:117db924cf7c 735 {
wolfSSL 15:117db924cf7c 736 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 15:117db924cf7c 737 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 738 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
wolfSSL 15:117db924cf7c 739 #endif
wolfSSL 15:117db924cf7c 740 {
wolfSSL 15:117db924cf7c 741 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 15:117db924cf7c 742 WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 743 }
wolfSSL 15:117db924cf7c 744 #endif
wolfSSL 15:117db924cf7c 745 }
wolfSSL 15:117db924cf7c 746
wolfSSL 15:117db924cf7c 747 ret = XTRANSFORM(sha256);
wolfSSL 15:117db924cf7c 748 if (ret != 0)
wolfSSL 15:117db924cf7c 749 return ret;
wolfSSL 15:117db924cf7c 750
wolfSSL 15:117db924cf7c 751 sha256->buffLen = 0;
wolfSSL 15:117db924cf7c 752 }
wolfSSL 15:117db924cf7c 753 XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen);
wolfSSL 15:117db924cf7c 754
wolfSSL 15:117db924cf7c 755 /* put lengths in bits */
wolfSSL 15:117db924cf7c 756 sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
wolfSSL 15:117db924cf7c 757 (sha256->hiLen << 3);
wolfSSL 15:117db924cf7c 758 sha256->loLen = sha256->loLen << 3;
wolfSSL 15:117db924cf7c 759
wolfSSL 15:117db924cf7c 760 /* store lengths */
wolfSSL 15:117db924cf7c 761 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 15:117db924cf7c 762 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 763 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
wolfSSL 15:117db924cf7c 764 #endif
wolfSSL 15:117db924cf7c 765 {
wolfSSL 15:117db924cf7c 766 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 15:117db924cf7c 767 WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 768 }
wolfSSL 15:117db924cf7c 769 #endif
wolfSSL 15:117db924cf7c 770 /* ! length ordering dependent on digest endian type ! */
wolfSSL 15:117db924cf7c 771 XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
wolfSSL 15:117db924cf7c 772 XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
wolfSSL 15:117db924cf7c 773 sizeof(word32));
wolfSSL 15:117db924cf7c 774
wolfSSL 15:117db924cf7c 775 #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || \
wolfSSL 15:117db924cf7c 776 defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 777 /* Kinetis requires only these bytes reversed */
wolfSSL 15:117db924cf7c 778 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 779 if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
wolfSSL 15:117db924cf7c 780 #endif
wolfSSL 15:117db924cf7c 781 {
wolfSSL 15:117db924cf7c 782 ByteReverseWords(
wolfSSL 15:117db924cf7c 783 &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
wolfSSL 15:117db924cf7c 784 &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
wolfSSL 15:117db924cf7c 785 2 * sizeof(word32));
wolfSSL 15:117db924cf7c 786 }
wolfSSL 15:117db924cf7c 787 #endif
wolfSSL 15:117db924cf7c 788
wolfSSL 15:117db924cf7c 789 return XTRANSFORM(sha256);
wolfSSL 15:117db924cf7c 790 }
wolfSSL 15:117db924cf7c 791
wolfSSL 15:117db924cf7c 792 int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash)
wolfSSL 15:117db924cf7c 793 {
wolfSSL 15:117db924cf7c 794 #ifdef LITTLE_ENDIAN_ORDER
wolfSSL 15:117db924cf7c 795 word32 digest[WC_SHA256_DIGEST_SIZE / sizeof(word32)];
wolfSSL 15:117db924cf7c 796 #endif
wolfSSL 15:117db924cf7c 797
wolfSSL 15:117db924cf7c 798 if (sha256 == NULL || hash == NULL) {
wolfSSL 15:117db924cf7c 799 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 800 }
wolfSSL 15:117db924cf7c 801
wolfSSL 15:117db924cf7c 802 #ifdef LITTLE_ENDIAN_ORDER
wolfSSL 15:117db924cf7c 803 ByteReverseWords((word32*)digest, (word32*)sha256->digest,
wolfSSL 15:117db924cf7c 804 WC_SHA256_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 805 XMEMCPY(hash, digest, WC_SHA256_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 806 #else
wolfSSL 15:117db924cf7c 807 XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 808 #endif
wolfSSL 15:117db924cf7c 809
wolfSSL 15:117db924cf7c 810 return 0;
wolfSSL 15:117db924cf7c 811 }
wolfSSL 15:117db924cf7c 812
wolfSSL 15:117db924cf7c 813 int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
wolfSSL 15:117db924cf7c 814 {
wolfSSL 15:117db924cf7c 815 int ret;
wolfSSL 15:117db924cf7c 816
wolfSSL 15:117db924cf7c 817 if (sha256 == NULL || hash == NULL) {
wolfSSL 15:117db924cf7c 818 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 819 }
wolfSSL 15:117db924cf7c 820
wolfSSL 15:117db924cf7c 821 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 15:117db924cf7c 822 if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
wolfSSL 15:117db924cf7c 823 #if defined(HAVE_INTEL_QA)
wolfSSL 15:117db924cf7c 824 return IntelQaSymSha256(&sha256->asyncDev, hash, NULL,
wolfSSL 15:117db924cf7c 825 WC_SHA256_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 826 #endif
wolfSSL 15:117db924cf7c 827 }
wolfSSL 15:117db924cf7c 828 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 829
wolfSSL 15:117db924cf7c 830 ret = Sha256Final(sha256);
wolfSSL 15:117db924cf7c 831 if (ret != 0)
wolfSSL 15:117db924cf7c 832 return ret;
wolfSSL 15:117db924cf7c 833
wolfSSL 15:117db924cf7c 834 #if defined(LITTLE_ENDIAN_ORDER)
wolfSSL 15:117db924cf7c 835 ByteReverseWords(sha256->digest, sha256->digest, WC_SHA256_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 836 #endif
wolfSSL 15:117db924cf7c 837 XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 838
wolfSSL 15:117db924cf7c 839 return InitSha256(sha256); /* reset state */
wolfSSL 15:117db924cf7c 840 }
wolfSSL 15:117db924cf7c 841
wolfSSL 15:117db924cf7c 842 #endif /* XTRANSFORM */
wolfSSL 15:117db924cf7c 843
wolfSSL 15:117db924cf7c 844
wolfSSL 15:117db924cf7c 845 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 846
wolfSSL 15:117db924cf7c 847 #define _LOAD_DIGEST() \
wolfSSL 15:117db924cf7c 848 "movl (%[sha256]), %%r8d \n\t" \
wolfSSL 15:117db924cf7c 849 "movl 4(%[sha256]), %%r9d \n\t" \
wolfSSL 15:117db924cf7c 850 "movl 8(%[sha256]), %%r10d\n\t" \
wolfSSL 15:117db924cf7c 851 "movl 12(%[sha256]), %%r11d\n\t" \
wolfSSL 15:117db924cf7c 852 "movl 16(%[sha256]), %%r12d\n\t" \
wolfSSL 15:117db924cf7c 853 "movl 20(%[sha256]), %%r13d\n\t" \
wolfSSL 15:117db924cf7c 854 "movl 24(%[sha256]), %%r14d\n\t" \
wolfSSL 15:117db924cf7c 855 "movl 28(%[sha256]), %%r15d\n\t"
wolfSSL 15:117db924cf7c 856
wolfSSL 15:117db924cf7c 857 #define _STORE_ADD_DIGEST() \
wolfSSL 15:117db924cf7c 858 "addl %%r8d , (%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 859 "addl %%r9d , 4(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 860 "addl %%r10d, 8(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 861 "addl %%r11d, 12(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 862 "addl %%r12d, 16(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 863 "addl %%r13d, 20(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 864 "addl %%r14d, 24(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 865 "addl %%r15d, 28(%[sha256])\n\t"
wolfSSL 15:117db924cf7c 866
wolfSSL 15:117db924cf7c 867 #define _ADD_DIGEST() \
wolfSSL 15:117db924cf7c 868 "addl (%[sha256]), %%r8d \n\t" \
wolfSSL 15:117db924cf7c 869 "addl 4(%[sha256]), %%r9d \n\t" \
wolfSSL 15:117db924cf7c 870 "addl 8(%[sha256]), %%r10d\n\t" \
wolfSSL 15:117db924cf7c 871 "addl 12(%[sha256]), %%r11d\n\t" \
wolfSSL 15:117db924cf7c 872 "addl 16(%[sha256]), %%r12d\n\t" \
wolfSSL 15:117db924cf7c 873 "addl 20(%[sha256]), %%r13d\n\t" \
wolfSSL 15:117db924cf7c 874 "addl 24(%[sha256]), %%r14d\n\t" \
wolfSSL 15:117db924cf7c 875 "addl 28(%[sha256]), %%r15d\n\t"
wolfSSL 15:117db924cf7c 876
wolfSSL 15:117db924cf7c 877 #define _STORE_DIGEST() \
wolfSSL 15:117db924cf7c 878 "movl %%r8d , (%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 879 "movl %%r9d , 4(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 880 "movl %%r10d, 8(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 881 "movl %%r11d, 12(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 882 "movl %%r12d, 16(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 883 "movl %%r13d, 20(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 884 "movl %%r14d, 24(%[sha256])\n\t" \
wolfSSL 15:117db924cf7c 885 "movl %%r15d, 28(%[sha256])\n\t"
wolfSSL 15:117db924cf7c 886
wolfSSL 15:117db924cf7c 887 #define LOAD_DIGEST() \
wolfSSL 15:117db924cf7c 888 _LOAD_DIGEST()
wolfSSL 15:117db924cf7c 889
wolfSSL 15:117db924cf7c 890 #define STORE_ADD_DIGEST() \
wolfSSL 15:117db924cf7c 891 _STORE_ADD_DIGEST()
wolfSSL 15:117db924cf7c 892
wolfSSL 15:117db924cf7c 893 #define ADD_DIGEST() \
wolfSSL 15:117db924cf7c 894 _ADD_DIGEST()
wolfSSL 15:117db924cf7c 895
wolfSSL 15:117db924cf7c 896 #define STORE_DIGEST() \
wolfSSL 15:117db924cf7c 897 _STORE_DIGEST()
wolfSSL 15:117db924cf7c 898
wolfSSL 15:117db924cf7c 899
wolfSSL 15:117db924cf7c 900 #define S_0 %r8d
wolfSSL 15:117db924cf7c 901 #define S_1 %r9d
wolfSSL 15:117db924cf7c 902 #define S_2 %r10d
wolfSSL 15:117db924cf7c 903 #define S_3 %r11d
wolfSSL 15:117db924cf7c 904 #define S_4 %r12d
wolfSSL 15:117db924cf7c 905 #define S_5 %r13d
wolfSSL 15:117db924cf7c 906 #define S_6 %r14d
wolfSSL 15:117db924cf7c 907 #define S_7 %r15d
wolfSSL 15:117db924cf7c 908
wolfSSL 15:117db924cf7c 909 #define L1 "%%edx"
wolfSSL 15:117db924cf7c 910 #define L2 "%%ecx"
wolfSSL 15:117db924cf7c 911 #define L3 "%%eax"
wolfSSL 15:117db924cf7c 912 #define L4 "%%ebx"
wolfSSL 15:117db924cf7c 913 #define WK "%%rsp"
wolfSSL 15:117db924cf7c 914
wolfSSL 15:117db924cf7c 915 #define WORK_REGS "eax", "ebx", "ecx", "edx"
wolfSSL 15:117db924cf7c 916 #define STATE_REGS "r8","r9","r10","r11","r12","r13","r14","r15"
wolfSSL 15:117db924cf7c 917 #define XMM_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", \
wolfSSL 15:117db924cf7c 918 "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13"
wolfSSL 15:117db924cf7c 919
wolfSSL 15:117db924cf7c 920 #if defined(HAVE_INTEL_RORX)
wolfSSL 15:117db924cf7c 921 #define RND_STEP_RORX_0_1(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 922 /* L3 = f */ \
wolfSSL 15:117db924cf7c 923 "movl %" #f ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 924 /* L2 = e>>>11 */ \
wolfSSL 15:117db924cf7c 925 "rorx $11, %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 926 /* h += w_k */ \
wolfSSL 15:117db924cf7c 927 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 928
wolfSSL 15:117db924cf7c 929 #define RND_STEP_RORX_0_2(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 930 /* L2 = (e>>>6) ^ (e>>>11) */ \
wolfSSL 15:117db924cf7c 931 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 932 /* L3 = f ^ g */ \
wolfSSL 15:117db924cf7c 933 "xorl %" #g ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 934 /* L1 = e>>>25 */ \
wolfSSL 15:117db924cf7c 935 "rorx $25, %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 936
wolfSSL 15:117db924cf7c 937 #define RND_STEP_RORX_0_3(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 938 /* L3 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 939 "andl %" #e ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 940 /* L1 = Sigma1(e) */ \
wolfSSL 15:117db924cf7c 941 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 942 /* L2 = a>>>13 */ \
wolfSSL 15:117db924cf7c 943 "rorx $13, %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 944
wolfSSL 15:117db924cf7c 945 #define RND_STEP_RORX_0_4(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 946 /* h += Sigma1(e) */ \
wolfSSL 15:117db924cf7c 947 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 948 /* L1 = a>>>2 */ \
wolfSSL 15:117db924cf7c 949 "rorx $2, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 950 /* L3 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 951 "xorl %" #g ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 952
wolfSSL 15:117db924cf7c 953 #define RND_STEP_RORX_0_5(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 954 /* L2 = (a>>>2) ^ (a>>>13) */ \
wolfSSL 15:117db924cf7c 955 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 956 /* L1 = a>>>22 */ \
wolfSSL 15:117db924cf7c 957 "rorx $22, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 958 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 959 "addl " L3 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 960
wolfSSL 15:117db924cf7c 961 #define RND_STEP_RORX_0_6(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 962 /* L1 = Sigma0(a) */ \
wolfSSL 15:117db924cf7c 963 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 964 /* L3 = b */ \
wolfSSL 15:117db924cf7c 965 "movl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 966 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 967 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 968
wolfSSL 15:117db924cf7c 969 #define RND_STEP_RORX_0_7(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 970 /* L3 = a ^ b */ \
wolfSSL 15:117db924cf7c 971 "xorl %" #a ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 972 /* h += Sigma0(a) */ \
wolfSSL 15:117db924cf7c 973 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 974 /* L4 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 975 "andl " L3 ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 976
wolfSSL 15:117db924cf7c 977 #define RND_STEP_RORX_0_8(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 978 /* L4 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 979 "xorl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 980 /* L1 = d>>>6 (= e>>>6 next RND) */ \
wolfSSL 15:117db924cf7c 981 "rorx $6, %" #d ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 982 /* h += Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 983 "addl " L4 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 984
wolfSSL 15:117db924cf7c 985 #define RND_STEP_RORX_1_1(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 986 /* L4 = f */ \
wolfSSL 15:117db924cf7c 987 "movl %" #f ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 988 /* L2 = e>>>11 */ \
wolfSSL 15:117db924cf7c 989 "rorx $11, %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 990 /* h += w_k */ \
wolfSSL 15:117db924cf7c 991 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 992
wolfSSL 15:117db924cf7c 993 #define RND_STEP_RORX_1_2(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 994 /* L2 = (e>>>6) ^ (e>>>11) */ \
wolfSSL 15:117db924cf7c 995 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 996 /* L4 = f ^ g */ \
wolfSSL 15:117db924cf7c 997 "xorl %" #g ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 998 /* L1 = e>>>25 */ \
wolfSSL 15:117db924cf7c 999 "rorx $25, %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1000
wolfSSL 15:117db924cf7c 1001 #define RND_STEP_RORX_1_3(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1002 /* L4 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 1003 "andl %" #e ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1004 /* L1 = Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1005 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1006 /* L2 = a>>>13 */ \
wolfSSL 15:117db924cf7c 1007 "rorx $13, %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1008
wolfSSL 15:117db924cf7c 1009 #define RND_STEP_RORX_1_4(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1010 /* h += Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1011 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1012 /* L1 = a>>>2 */ \
wolfSSL 15:117db924cf7c 1013 "rorx $2, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1014 /* L4 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1015 "xorl %" #g ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1016
wolfSSL 15:117db924cf7c 1017 #define RND_STEP_RORX_1_5(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1018 /* L2 = (a>>>2) ^ (a>>>13) */ \
wolfSSL 15:117db924cf7c 1019 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1020 /* L1 = a>>>22 */ \
wolfSSL 15:117db924cf7c 1021 "rorx $22, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1022 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1023 "addl " L4 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1024
wolfSSL 15:117db924cf7c 1025 #define RND_STEP_RORX_1_6(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1026 /* L1 = Sigma0(a) */ \
wolfSSL 15:117db924cf7c 1027 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1028 /* L4 = b */ \
wolfSSL 15:117db924cf7c 1029 "movl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1030 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1031 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 1032
wolfSSL 15:117db924cf7c 1033 #define RND_STEP_RORX_1_7(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1034 /* L4 = a ^ b */ \
wolfSSL 15:117db924cf7c 1035 "xorl %" #a ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1036 /* h += Sigma0(a) */ \
wolfSSL 15:117db924cf7c 1037 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1038 /* L3 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 1039 "andl " L4 ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1040
wolfSSL 15:117db924cf7c 1041 #define RND_STEP_RORX_1_8(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1042 /* L3 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1043 "xorl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1044 /* L1 = d>>>6 (= e>>>6 next RND) */ \
wolfSSL 15:117db924cf7c 1045 "rorx $6, %" #d ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1046 /* h += Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1047 "addl " L3 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1048
wolfSSL 15:117db924cf7c 1049 #define _RND_RORX_X_0(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1050 /* L1 = e>>>6 */ \
wolfSSL 15:117db924cf7c 1051 "rorx $6, %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1052 /* L2 = e>>>11 */ \
wolfSSL 15:117db924cf7c 1053 "rorx $11, %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1054 /* Prev RND: h += Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1055 "addl " L3 ", %" #a "\n\t" \
wolfSSL 15:117db924cf7c 1056 /* h += w_k */ \
wolfSSL 15:117db924cf7c 1057 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1058 /* L3 = f */ \
wolfSSL 15:117db924cf7c 1059 "movl %" #f ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1060 /* L2 = (e>>>6) ^ (e>>>11) */ \
wolfSSL 15:117db924cf7c 1061 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1062 /* L3 = f ^ g */ \
wolfSSL 15:117db924cf7c 1063 "xorl %" #g ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1064 /* L1 = e>>>25 */ \
wolfSSL 15:117db924cf7c 1065 "rorx $25, %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1066 /* L1 = Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1067 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1068 /* L3 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 1069 "andl %" #e ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1070 /* h += Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1071 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1072 /* L1 = a>>>2 */ \
wolfSSL 15:117db924cf7c 1073 "rorx $2, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1074 /* L2 = a>>>13 */ \
wolfSSL 15:117db924cf7c 1075 "rorx $13, %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1076 /* L3 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1077 "xorl %" #g ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1078 /* L2 = (a>>>2) ^ (a>>>13) */ \
wolfSSL 15:117db924cf7c 1079 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1080 /* L1 = a>>>22 */ \
wolfSSL 15:117db924cf7c 1081 "rorx $22, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1082 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1083 "addl " L3 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1084 /* L1 = Sigma0(a) */ \
wolfSSL 15:117db924cf7c 1085 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1086 /* L3 = b */ \
wolfSSL 15:117db924cf7c 1087 "movl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1088 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1089 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 1090 /* L3 = a ^ b */ \
wolfSSL 15:117db924cf7c 1091 "xorl %" #a ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1092 /* L4 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 1093 "andl " L3 ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1094 /* h += Sigma0(a) */ \
wolfSSL 15:117db924cf7c 1095 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1096 /* L4 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1097 "xorl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1098
wolfSSL 15:117db924cf7c 1099 #define _RND_RORX_X_1(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1100 /* L1 = e>>>6 */ \
wolfSSL 15:117db924cf7c 1101 "rorx $6, %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1102 /* L2 = e>>>11 */ \
wolfSSL 15:117db924cf7c 1103 "rorx $11, %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1104 /* Prev RND: h += Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1105 "addl " L4 ", %" #a "\n\t" \
wolfSSL 15:117db924cf7c 1106 /* h += w_k */ \
wolfSSL 15:117db924cf7c 1107 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1108 /* L4 = f */ \
wolfSSL 15:117db924cf7c 1109 "movl %" #f ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1110 /* L2 = (e>>>6) ^ (e>>>11) */ \
wolfSSL 15:117db924cf7c 1111 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1112 /* L4 = f ^ g */ \
wolfSSL 15:117db924cf7c 1113 "xorl %" #g ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1114 /* L1 = e>>>25 */ \
wolfSSL 15:117db924cf7c 1115 "rorx $25, %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1116 /* L1 = Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1117 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1118 /* L4 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 1119 "andl %" #e ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1120 /* h += Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1121 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1122 /* L1 = a>>>2 */ \
wolfSSL 15:117db924cf7c 1123 "rorx $2, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1124 /* L2 = a>>>13 */ \
wolfSSL 15:117db924cf7c 1125 "rorx $13, %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1126 /* L4 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1127 "xorl %" #g ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1128 /* L2 = (a>>>2) ^ (a>>>13) */ \
wolfSSL 15:117db924cf7c 1129 "xorl " L1 ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1130 /* L1 = a>>>22 */ \
wolfSSL 15:117db924cf7c 1131 "rorx $22, %" #a ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1132 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1133 "addl " L4 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1134 /* L1 = Sigma0(a) */ \
wolfSSL 15:117db924cf7c 1135 "xorl " L2 ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1136 /* L4 = b */ \
wolfSSL 15:117db924cf7c 1137 "movl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1138 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1139 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 1140 /* L4 = a ^ b */ \
wolfSSL 15:117db924cf7c 1141 "xorl %" #a ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1142 /* L2 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 1143 "andl " L4 ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1144 /* h += Sigma0(a) */ \
wolfSSL 15:117db924cf7c 1145 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1146 /* L3 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1147 "xorl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1148
wolfSSL 15:117db924cf7c 1149
wolfSSL 15:117db924cf7c 1150 #define RND_RORX_X_0(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1151 _RND_RORX_X_0(a,b,c,d,e,f,g,h,i)
wolfSSL 15:117db924cf7c 1152 #define RND_RORX_X_1(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1153 _RND_RORX_X_1(a,b,c,d,e,f,g,h,i)
wolfSSL 15:117db924cf7c 1154
wolfSSL 15:117db924cf7c 1155 #define RND_RORX_X4(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1156 RND_RORX_X_0(a,b,c,d,e,f,g,h,i+0) \
wolfSSL 15:117db924cf7c 1157 RND_RORX_X_1(h,a,b,c,d,e,f,g,i+1) \
wolfSSL 15:117db924cf7c 1158 RND_RORX_X_0(g,h,a,b,c,d,e,f,i+2) \
wolfSSL 15:117db924cf7c 1159 RND_RORX_X_1(f,g,h,a,b,c,d,e,i+3)
wolfSSL 15:117db924cf7c 1160
wolfSSL 15:117db924cf7c 1161 #endif /* HAVE_INTEL_RORX */
wolfSSL 15:117db924cf7c 1162
wolfSSL 15:117db924cf7c 1163 #define RND_STEP_0_1(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1164 /* L1 = e>>>14 */ \
wolfSSL 15:117db924cf7c 1165 "rorl $14, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1166
wolfSSL 15:117db924cf7c 1167 #define RND_STEP_0_2(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1168 /* L3 = b */ \
wolfSSL 15:117db924cf7c 1169 "movl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1170 /* L2 = f */ \
wolfSSL 15:117db924cf7c 1171 "movl %" #f ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1172 /* h += w_k */ \
wolfSSL 15:117db924cf7c 1173 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1174 /* L2 = f ^ g */ \
wolfSSL 15:117db924cf7c 1175 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1176
wolfSSL 15:117db924cf7c 1177 #define RND_STEP_0_3(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1178 /* L1 = (e>>>14) ^ e */ \
wolfSSL 15:117db924cf7c 1179 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1180 /* L2 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 1181 "andl %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1182
wolfSSL 15:117db924cf7c 1183 #define RND_STEP_0_4(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1184 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
wolfSSL 15:117db924cf7c 1185 "rorl $5, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1186 /* L2 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1187 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1188 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
wolfSSL 15:117db924cf7c 1189 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1190 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1191 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1192
wolfSSL 15:117db924cf7c 1193 #define RND_STEP_0_5(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1194 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
wolfSSL 15:117db924cf7c 1195 "rorl $6, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1196 /* L3 = a ^ b (= b ^ c of next RND) */ \
wolfSSL 15:117db924cf7c 1197 "xorl %" #a ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1198 /* h = h + w_k + Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1199 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1200 /* L2 = a */ \
wolfSSL 15:117db924cf7c 1201 "movl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1202
wolfSSL 15:117db924cf7c 1203 #define RND_STEP_0_6(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1204 /* L3 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 1205 "andl " L3 ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1206 /* L2 = a>>>9 */ \
wolfSSL 15:117db924cf7c 1207 "rorl $9, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1208 /* L2 = (a>>>9) ^ a */ \
wolfSSL 15:117db924cf7c 1209 "xorl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1210 /* L1 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1211 "xorl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1212
wolfSSL 15:117db924cf7c 1213 #define RND_STEP_0_7(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1214 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
wolfSSL 15:117db924cf7c 1215 "rorl $11, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1216 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1217 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 1218 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
wolfSSL 15:117db924cf7c 1219 "xorl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1220 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1221 "addl " L4 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1222
wolfSSL 15:117db924cf7c 1223 #define RND_STEP_0_8(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1224 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
wolfSSL 15:117db924cf7c 1225 "rorl $2, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1226 /* L1 = d (e of next RND) */ \
wolfSSL 15:117db924cf7c 1227 "movl %" #d ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1228 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1229 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1230
wolfSSL 15:117db924cf7c 1231 #define RND_STEP_1_1(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1232 /* L1 = e>>>14 */ \
wolfSSL 15:117db924cf7c 1233 "rorl $14, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1234
wolfSSL 15:117db924cf7c 1235 #define RND_STEP_1_2(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1236 /* L3 = b */ \
wolfSSL 15:117db924cf7c 1237 "movl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1238 /* L2 = f */ \
wolfSSL 15:117db924cf7c 1239 "movl %" #f ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1240 /* h += w_k */ \
wolfSSL 15:117db924cf7c 1241 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1242 /* L2 = f ^ g */ \
wolfSSL 15:117db924cf7c 1243 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1244
wolfSSL 15:117db924cf7c 1245 #define RND_STEP_1_3(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1246 /* L1 = (e>>>14) ^ e */ \
wolfSSL 15:117db924cf7c 1247 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1248 /* L2 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 1249 "andl %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1250
wolfSSL 15:117db924cf7c 1251 #define RND_STEP_1_4(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1252 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
wolfSSL 15:117db924cf7c 1253 "rorl $5, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1254 /* L2 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1255 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1256 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
wolfSSL 15:117db924cf7c 1257 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1258 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1259 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1260
wolfSSL 15:117db924cf7c 1261 #define RND_STEP_1_5(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1262 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
wolfSSL 15:117db924cf7c 1263 "rorl $6, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1264 /* L4 = a ^ b (= b ^ c of next RND) */ \
wolfSSL 15:117db924cf7c 1265 "xorl %" #a ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1266 /* h = h + w_k + Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1267 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1268 /* L2 = a */ \
wolfSSL 15:117db924cf7c 1269 "movl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1270
wolfSSL 15:117db924cf7c 1271 #define RND_STEP_1_6(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1272 /* L3 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 1273 "andl " L4 ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1274 /* L2 = a>>>9 */ \
wolfSSL 15:117db924cf7c 1275 "rorl $9, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1276 /* L2 = (a>>>9) ^ a */ \
wolfSSL 15:117db924cf7c 1277 "xorl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1278 /* L1 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1279 "xorl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1280
wolfSSL 15:117db924cf7c 1281 #define RND_STEP_1_7(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1282 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
wolfSSL 15:117db924cf7c 1283 "rorl $11, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1284 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1285 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 1286 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
wolfSSL 15:117db924cf7c 1287 "xorl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1288 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1289 "addl " L3 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1290
wolfSSL 15:117db924cf7c 1291 #define RND_STEP_1_8(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1292 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
wolfSSL 15:117db924cf7c 1293 "rorl $2, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1294 /* L1 = d (e of next RND) */ \
wolfSSL 15:117db924cf7c 1295 "movl %" #d ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1296 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1297 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1298
wolfSSL 15:117db924cf7c 1299 #define _RND_ALL_0(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1300 /* h += w_k */ \
wolfSSL 15:117db924cf7c 1301 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1302 /* L2 = f */ \
wolfSSL 15:117db924cf7c 1303 "movl %" #f ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1304 /* L3 = b */ \
wolfSSL 15:117db924cf7c 1305 "movl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1306 /* L2 = f ^ g */ \
wolfSSL 15:117db924cf7c 1307 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1308 /* L1 = e>>>14 */ \
wolfSSL 15:117db924cf7c 1309 "rorl $14, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1310 /* L2 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 1311 "andl %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1312 /* L1 = (e>>>14) ^ e */ \
wolfSSL 15:117db924cf7c 1313 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1314 /* L2 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1315 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1316 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
wolfSSL 15:117db924cf7c 1317 "rorl $5, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1318 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1319 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1320 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
wolfSSL 15:117db924cf7c 1321 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1322 /* L3 = a ^ b */ \
wolfSSL 15:117db924cf7c 1323 "xorl %" #a ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1324 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
wolfSSL 15:117db924cf7c 1325 "rorl $6, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1326 /* L2 = a */ \
wolfSSL 15:117db924cf7c 1327 "movl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1328 /* h = h + w_k + Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1329 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1330 /* L2 = a>>>9 */ \
wolfSSL 15:117db924cf7c 1331 "rorl $9, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1332 /* L3 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 1333 "andl " L3 ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1334 /* L2 = (a>>>9) ^ a */ \
wolfSSL 15:117db924cf7c 1335 "xorl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1336 /* L1 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1337 "xorl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1338 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
wolfSSL 15:117db924cf7c 1339 "rorl $11, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1340 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1341 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 1342 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
wolfSSL 15:117db924cf7c 1343 "xorl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1344 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1345 "addl " L4 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1346 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
wolfSSL 15:117db924cf7c 1347 "rorl $2, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1348 /* L1 = d (e of next RND) */ \
wolfSSL 15:117db924cf7c 1349 "movl %" #d ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1350 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1351 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1352
wolfSSL 15:117db924cf7c 1353 #define _RND_ALL_1(a,b,c,d,e,f,g,h,i) \
wolfSSL 15:117db924cf7c 1354 /* h += w_k */ \
wolfSSL 15:117db924cf7c 1355 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1356 /* L2 = f */ \
wolfSSL 15:117db924cf7c 1357 "movl %" #f ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1358 /* L3 = b */ \
wolfSSL 15:117db924cf7c 1359 "movl %" #b ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1360 /* L2 = f ^ g */ \
wolfSSL 15:117db924cf7c 1361 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1362 /* L1 = e>>>14 */ \
wolfSSL 15:117db924cf7c 1363 "rorl $14, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1364 /* L2 = (f ^ g) & e */ \
wolfSSL 15:117db924cf7c 1365 "andl %" #e ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1366 /* L1 = (e>>>14) ^ e */ \
wolfSSL 15:117db924cf7c 1367 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1368 /* L2 = Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1369 "xorl %" #g ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1370 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
wolfSSL 15:117db924cf7c 1371 "rorl $5, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1372 /* h += Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1373 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1374 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
wolfSSL 15:117db924cf7c 1375 "xorl %" #e ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1376 /* L3 = a ^ b */ \
wolfSSL 15:117db924cf7c 1377 "xorl %" #a ", " L4 "\n\t" \
wolfSSL 15:117db924cf7c 1378 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
wolfSSL 15:117db924cf7c 1379 "rorl $6, " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1380 /* L2 = a */ \
wolfSSL 15:117db924cf7c 1381 "movl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1382 /* h = h + w_k + Sigma1(e) */ \
wolfSSL 15:117db924cf7c 1383 "addl " L1 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1384 /* L2 = a>>>9 */ \
wolfSSL 15:117db924cf7c 1385 "rorl $9, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1386 /* L3 = (a ^ b) & (b ^ c) */ \
wolfSSL 15:117db924cf7c 1387 "andl " L4 ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1388 /* L2 = (a>>>9) ^ a */ \
wolfSSL 15:117db924cf7c 1389 "xorl %" #a", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1390 /* L1 = Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1391 "xorl %" #b ", " L3 "\n\t" \
wolfSSL 15:117db924cf7c 1392 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
wolfSSL 15:117db924cf7c 1393 "rorl $11, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1394 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
wolfSSL 15:117db924cf7c 1395 "addl %" #h ", %" #d "\n\t" \
wolfSSL 15:117db924cf7c 1396 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
wolfSSL 15:117db924cf7c 1397 "xorl %" #a ", " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1398 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1399 "addl " L3 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1400 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
wolfSSL 15:117db924cf7c 1401 "rorl $2, " L2 "\n\t" \
wolfSSL 15:117db924cf7c 1402 /* L1 = d (e of next RND) */ \
wolfSSL 15:117db924cf7c 1403 "movl %" #d ", " L1 "\n\t" \
wolfSSL 15:117db924cf7c 1404 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 15:117db924cf7c 1405 "addl " L2 ", %" #h "\n\t" \
wolfSSL 15:117db924cf7c 1406
wolfSSL 15:117db924cf7c 1407
wolfSSL 15:117db924cf7c 1408 #define RND_ALL_0(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1409 _RND_ALL_0(a, b, c, d, e, f, g, h, i)
wolfSSL 15:117db924cf7c 1410 #define RND_ALL_1(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1411 _RND_ALL_1(a, b, c, d, e, f, g, h, i)
wolfSSL 15:117db924cf7c 1412
wolfSSL 15:117db924cf7c 1413 #define RND_ALL_4(a, b, c, d, e, f, g, h, i) \
wolfSSL 15:117db924cf7c 1414 RND_ALL_0(a, b, c, d, e, f, g, h, i+0) \
wolfSSL 15:117db924cf7c 1415 RND_ALL_1(h, a, b, c, d, e, f, g, i+1) \
wolfSSL 15:117db924cf7c 1416 RND_ALL_0(g, h, a, b, c, d, e, f, i+2) \
wolfSSL 15:117db924cf7c 1417 RND_ALL_1(f, g, h, a, b, c, d, e, i+3)
wolfSSL 15:117db924cf7c 1418
wolfSSL 15:117db924cf7c 1419 #endif /* defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) */
wolfSSL 15:117db924cf7c 1420
wolfSSL 15:117db924cf7c 1421 #if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
wolfSSL 15:117db924cf7c 1422
wolfSSL 15:117db924cf7c 1423 #define _VPALIGNR(op1, op2, op3, op4) \
wolfSSL 15:117db924cf7c 1424 "vpalignr $" #op4", %" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1425 #define VPALIGNR(op1, op2, op3, op4) \
wolfSSL 15:117db924cf7c 1426 _VPALIGNR(op1, op2, op3, op4)
wolfSSL 15:117db924cf7c 1427 #define _VPADDD(op1, op2, op3) \
wolfSSL 15:117db924cf7c 1428 "vpaddd %" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1429 #define VPADDD(op1, op2, op3) \
wolfSSL 15:117db924cf7c 1430 _VPADDD(op1, op2, op3)
wolfSSL 15:117db924cf7c 1431 #define _VPSRLD(op1, op2, op3) \
wolfSSL 15:117db924cf7c 1432 "vpsrld $" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1433 #define VPSRLD(op1, op2, op3) \
wolfSSL 15:117db924cf7c 1434 _VPSRLD(op1, op2, op3)
wolfSSL 15:117db924cf7c 1435 #define _VPSRLQ(op1, op2, op3) \
wolfSSL 15:117db924cf7c 1436 "vpsrlq $" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1437 #define VPSRLQ(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1438 _VPSRLQ(op1,op2,op3)
wolfSSL 15:117db924cf7c 1439 #define _VPSLLD(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1440 "vpslld $" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1441 #define VPSLLD(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1442 _VPSLLD(op1,op2,op3)
wolfSSL 15:117db924cf7c 1443 #define _VPOR(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1444 "vpor %" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1445 #define VPOR(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1446 _VPOR(op1,op2,op3)
wolfSSL 15:117db924cf7c 1447 #define _VPXOR(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1448 "vpxor %" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1449 #define VPXOR(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1450 _VPXOR(op1,op2,op3)
wolfSSL 15:117db924cf7c 1451 #define _VPSHUFD(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1452 "vpshufd $" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1453 #define VPSHUFD(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1454 _VPSHUFD(op1,op2,op3)
wolfSSL 15:117db924cf7c 1455 #define _VPSHUFB(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1456 "vpshufb %" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1457 #define VPSHUFB(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1458 _VPSHUFB(op1,op2,op3)
wolfSSL 15:117db924cf7c 1459 #define _VPSLLDQ(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1460 "vpslldq $" #op3", %" #op2", %" #op1"\n\t"
wolfSSL 15:117db924cf7c 1461 #define VPSLLDQ(op1,op2,op3) \
wolfSSL 15:117db924cf7c 1462 _VPSLLDQ(op1,op2,op3)
wolfSSL 15:117db924cf7c 1463
wolfSSL 15:117db924cf7c 1464 #define MsgSched(X0,X1,X2,X3,a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1465 RND_STEP_0_1(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1466 VPALIGNR (XTMP1, X1, X0, 4) /* XTMP1 = W[-15] */ \
wolfSSL 15:117db924cf7c 1467 VPALIGNR (XTMP0, X3, X2, 4) /* XTMP0 = W[-7] */ \
wolfSSL 15:117db924cf7c 1468 RND_STEP_0_2(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1469 RND_STEP_0_3(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1470 VPSRLD (XTMP2, XTMP1, 7) /* XTMP2 = W[-15] >> 7 */ \
wolfSSL 15:117db924cf7c 1471 VPSLLD (XTMP3, XTMP1, 25) /* XTEMP3 = W[-15] << (32-7) */ \
wolfSSL 15:117db924cf7c 1472 RND_STEP_0_4(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1473 RND_STEP_0_5(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1474 VPSRLD (XTMP4, XTMP1, 18) /* XTEMP4 = W[-15] >> 18 */ \
wolfSSL 15:117db924cf7c 1475 VPSLLD (XTMP5, XTMP1, 14) /* XTEMP5 = W[-15] << (32-18) */ \
wolfSSL 15:117db924cf7c 1476 RND_STEP_0_6(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1477 RND_STEP_0_7(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1478 VPOR (XTMP2, XTMP3, XTMP2) /* XTMP2 = W[-15] >>> 7 */ \
wolfSSL 15:117db924cf7c 1479 VPOR (XTMP4, XTMP5, XTMP4) /* XTMP4 = W[-15] >>> 18 */ \
wolfSSL 15:117db924cf7c 1480 RND_STEP_0_8(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1481 RND_STEP_1_1(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1482 RND_STEP_1_2(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1483 VPSRLD (XTMP5, XTMP1, 3) /* XTMP4 = W[-15] >> 3 */ \
wolfSSL 15:117db924cf7c 1484 VPXOR (XTMP2, XTMP4, XTMP2) \
wolfSSL 15:117db924cf7c 1485 /* XTMP2 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */ \
wolfSSL 15:117db924cf7c 1486 RND_STEP_1_3(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1487 RND_STEP_1_4(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1488 VPXOR (XTMP1, XTMP5, XTMP2) /* XTMP1 = s0 */ \
wolfSSL 15:117db924cf7c 1489 VPSHUFD (XTMP2, X3, 0b11111010) /* XTMP2 = W[-2] {BBAA}*/ \
wolfSSL 15:117db924cf7c 1490 RND_STEP_1_5(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1491 RND_STEP_1_6(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1492 VPSRLD (XTMP4, XTMP2, 10) /* XTMP4 = W[-2] >> 10 {BBAA} */ \
wolfSSL 15:117db924cf7c 1493 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
wolfSSL 15:117db924cf7c 1494 RND_STEP_1_7(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1495 RND_STEP_1_8(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1496 RND_STEP_0_1(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1497 VPSRLQ (XTMP2, XTMP2, 17) /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
wolfSSL 15:117db924cf7c 1498 VPADDD (XTMP0, XTMP0, X0) \
wolfSSL 15:117db924cf7c 1499 RND_STEP_0_2(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1500 RND_STEP_0_3(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1501 RND_STEP_0_4(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1502 VPXOR (XTMP2, XTMP3, XTMP2) \
wolfSSL 15:117db924cf7c 1503 VPADDD (XTMP0, XTMP0, XTMP1) /* XTMP0 = W[-16] + W[-7] + s0 */ \
wolfSSL 15:117db924cf7c 1504 RND_STEP_0_5(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1505 VPXOR (XTMP4, XTMP4, XTMP2) /* XTMP4 = s1 {xBxA} */ \
wolfSSL 15:117db924cf7c 1506 RND_STEP_0_6(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1507 VPSHUFB (XTMP4, XTMP4, SHUF_00BA) /* XTMP4 = s1 {00BA} */ \
wolfSSL 15:117db924cf7c 1508 RND_STEP_0_7(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1509 VPADDD (XTMP0, XTMP0, XTMP4) /* XTMP0 = {..., ..., W[1], W[0]} */ \
wolfSSL 15:117db924cf7c 1510 RND_STEP_0_8(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1511 RND_STEP_1_1(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1512 VPSHUFD (XTMP2, XTMP0, 0b01010000) /* XTMP2 = W[-2] {DDCC} */ \
wolfSSL 15:117db924cf7c 1513 RND_STEP_1_2(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1514 VPSRLQ (XTMP4, XTMP2, 17) /* XTMP4 = W[-2] MY_ROR 17 {xDxC} */ \
wolfSSL 15:117db924cf7c 1515 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
wolfSSL 15:117db924cf7c 1516 RND_STEP_1_3(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1517 RND_STEP_1_4(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1518 VPSRLD (XTMP5, XTMP2, 10) /* XTMP5 = W[-2] >> 10 {DDCC} */ \
wolfSSL 15:117db924cf7c 1519 VPXOR (XTMP4, XTMP3, XTMP4) \
wolfSSL 15:117db924cf7c 1520 RND_STEP_1_5(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1521 RND_STEP_1_6(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1522 VPXOR (XTMP5, XTMP4, XTMP5) /* XTMP5 = s1 {xDxC} */ \
wolfSSL 15:117db924cf7c 1523 RND_STEP_1_7(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1524 VPSHUFB (XTMP5, XTMP5, SHUF_DC00) /* XTMP5 = s1 {DC00} */ \
wolfSSL 15:117db924cf7c 1525 RND_STEP_1_8(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1526 VPADDD (X0, XTMP5, XTMP0) /* X0 = {W[3], W[2], W[1], W[0]} */
wolfSSL 15:117db924cf7c 1527
wolfSSL 15:117db924cf7c 1528 #if defined(HAVE_INTEL_RORX)
wolfSSL 15:117db924cf7c 1529
wolfSSL 15:117db924cf7c 1530 #define MsgSched_RORX(X0,X1,X2,X3,a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1531 RND_STEP_RORX_0_1(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1532 VPALIGNR (XTMP0, X3, X2, 4) \
wolfSSL 15:117db924cf7c 1533 VPALIGNR (XTMP1, X1, X0, 4) /* XTMP1 = W[-15] */ \
wolfSSL 15:117db924cf7c 1534 RND_STEP_RORX_0_2(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1535 RND_STEP_RORX_0_3(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1536 VPSRLD (XTMP2, XTMP1, 7) \
wolfSSL 15:117db924cf7c 1537 VPSLLD (XTMP3, XTMP1, 25) /* VPSLLD (XTMP3, XTMP1, (32-7)) */ \
wolfSSL 15:117db924cf7c 1538 RND_STEP_RORX_0_4(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1539 RND_STEP_RORX_0_5(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1540 VPSRLD (XTMP4, XTMP1, 3) /* XTMP4 = W[-15] >> 3 */ \
wolfSSL 15:117db924cf7c 1541 VPOR (XTMP3, XTMP3, XTMP2) /* XTMP1 = W[-15] MY_ROR 7 */ \
wolfSSL 15:117db924cf7c 1542 RND_STEP_RORX_0_6(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1543 RND_STEP_RORX_0_7(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1544 RND_STEP_RORX_0_8(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1545 \
wolfSSL 15:117db924cf7c 1546 RND_STEP_RORX_1_1(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1547 VPSRLD (XTMP2, XTMP1,18) \
wolfSSL 15:117db924cf7c 1548 RND_STEP_RORX_1_2(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1549 VPSLLD (XTMP1, XTMP1, 14) /* VPSLLD (XTMP1, XTMP1, (32-18)) */ \
wolfSSL 15:117db924cf7c 1550 RND_STEP_RORX_1_3(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1551 VPXOR (XTMP3, XTMP3, XTMP1) \
wolfSSL 15:117db924cf7c 1552 RND_STEP_RORX_1_4(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1553 VPXOR (XTMP3, XTMP3, XTMP2) \
wolfSSL 15:117db924cf7c 1554 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */ \
wolfSSL 15:117db924cf7c 1555 RND_STEP_RORX_1_5(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1556 VPSHUFD (XTMP2, X3, 0b11111010) /* XTMP2 = W[-2] {BBAA}*/ \
wolfSSL 15:117db924cf7c 1557 RND_STEP_RORX_1_6(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1558 VPXOR (XTMP1, XTMP3, XTMP4) /* XTMP1 = s0 */ \
wolfSSL 15:117db924cf7c 1559 RND_STEP_RORX_1_7(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1560 VPSRLD (XTMP4, XTMP2, 10) /* XTMP4 = W[-2] >> 10 {BBAA} */ \
wolfSSL 15:117db924cf7c 1561 RND_STEP_RORX_1_8(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1562 \
wolfSSL 15:117db924cf7c 1563 RND_STEP_RORX_0_1(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1564 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
wolfSSL 15:117db924cf7c 1565 RND_STEP_RORX_0_2(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1566 VPSRLQ (XTMP2, XTMP2, 17) /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
wolfSSL 15:117db924cf7c 1567 VPADDD (XTMP0, XTMP0, X0) \
wolfSSL 15:117db924cf7c 1568 RND_STEP_RORX_0_3(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1569 VPADDD (XTMP0, XTMP0, XTMP1) /* XTMP0 = W[-16] + W[-7] + s0 */ \
wolfSSL 15:117db924cf7c 1570 RND_STEP_RORX_0_4(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1571 VPXOR (XTMP2, XTMP2, XTMP3) \
wolfSSL 15:117db924cf7c 1572 RND_STEP_RORX_0_5(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1573 VPXOR (XTMP4, XTMP4, XTMP2) /* XTMP4 = s1 {xBxA} */ \
wolfSSL 15:117db924cf7c 1574 RND_STEP_RORX_0_6(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1575 VPSHUFB (XTMP4, XTMP4, SHUF_00BA) /* XTMP4 = s1 {00BA} */ \
wolfSSL 15:117db924cf7c 1576 RND_STEP_RORX_0_7(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1577 VPADDD (XTMP0, XTMP0, XTMP4) /* XTMP0 = {..., ..., W[1], W[0]} */ \
wolfSSL 15:117db924cf7c 1578 RND_STEP_RORX_0_8(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1579 \
wolfSSL 15:117db924cf7c 1580 RND_STEP_RORX_1_1(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1581 VPSHUFD (XTMP2, XTMP0, 0b01010000) /* XTMP2 = W[-2] {DDCC} */ \
wolfSSL 15:117db924cf7c 1582 RND_STEP_RORX_1_2(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1583 VPSRLD (XTMP5, XTMP2, 10) /* XTMP5 = W[-2] >> 10 {DDCC} */ \
wolfSSL 15:117db924cf7c 1584 RND_STEP_RORX_1_3(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1585 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
wolfSSL 15:117db924cf7c 1586 RND_STEP_RORX_1_4(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1587 VPSRLQ (XTMP2, XTMP2, 17) /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ \
wolfSSL 15:117db924cf7c 1588 RND_STEP_RORX_1_5(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1589 VPXOR (XTMP2, XTMP2, XTMP3) \
wolfSSL 15:117db924cf7c 1590 RND_STEP_RORX_1_6(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1591 VPXOR (XTMP5, XTMP5, XTMP2) /* XTMP5 = s1 {xDxC} */ \
wolfSSL 15:117db924cf7c 1592 RND_STEP_RORX_1_7(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1593 VPSHUFB (XTMP5, XTMP5, SHUF_DC00) /* XTMP5 = s1 {DC00} */ \
wolfSSL 15:117db924cf7c 1594 RND_STEP_RORX_1_8(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1595 VPADDD (X0, XTMP5, XTMP0) /* X0 = {W[3], W[2], W[1], W[0]} */
wolfSSL 15:117db924cf7c 1596
wolfSSL 15:117db924cf7c 1597 #endif /* HAVE_INTEL_RORX */
wolfSSL 15:117db924cf7c 1598
wolfSSL 15:117db924cf7c 1599
wolfSSL 15:117db924cf7c 1600 #define _W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 1601 "# X0, X1, X2, X3 = W[0..15]\n\t" \
wolfSSL 15:117db924cf7c 1602 "vmovdqu (%%rax), %" #X0 "\n\t" \
wolfSSL 15:117db924cf7c 1603 "vmovdqu 16(%%rax), %" #X1 "\n\t" \
wolfSSL 15:117db924cf7c 1604 VPSHUFB(X0, X0, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 1605 VPSHUFB(X1, X1, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 1606 "vmovdqu 32(%%rax), %" #X2 "\n\t" \
wolfSSL 15:117db924cf7c 1607 "vmovdqu 48(%%rax), %" #X3 "\n\t" \
wolfSSL 15:117db924cf7c 1608 VPSHUFB(X2, X2, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 1609 VPSHUFB(X3, X3, BYTE_FLIP_MASK)
wolfSSL 15:117db924cf7c 1610
wolfSSL 15:117db924cf7c 1611 #define W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 1612 _W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
wolfSSL 15:117db924cf7c 1613
wolfSSL 15:117db924cf7c 1614
wolfSSL 15:117db924cf7c 1615 #define _SET_W_K_XFER_4(i) \
wolfSSL 15:117db924cf7c 1616 "vpaddd (" #i "*4)+ 0+%[K], %%xmm0, %%xmm4\n\t" \
wolfSSL 15:117db924cf7c 1617 "vpaddd (" #i "*4)+16+%[K], %%xmm1, %%xmm5\n\t" \
wolfSSL 15:117db924cf7c 1618 "vmovdqu %%xmm4, (" WK ")\n\t" \
wolfSSL 15:117db924cf7c 1619 "vmovdqu %%xmm5, 16(" WK ")\n\t" \
wolfSSL 15:117db924cf7c 1620 "vpaddd (" #i "*4)+32+%[K], %%xmm2, %%xmm6\n\t" \
wolfSSL 15:117db924cf7c 1621 "vpaddd (" #i "*4)+48+%[K], %%xmm3, %%xmm7\n\t" \
wolfSSL 15:117db924cf7c 1622 "vmovdqu %%xmm6, 32(" WK ")\n\t" \
wolfSSL 15:117db924cf7c 1623 "vmovdqu %%xmm7, 48(" WK ")\n\t"
wolfSSL 15:117db924cf7c 1624
wolfSSL 15:117db924cf7c 1625 #define SET_W_K_XFER_4(i) \
wolfSSL 15:117db924cf7c 1626 _SET_W_K_XFER_4(i)
wolfSSL 15:117db924cf7c 1627
wolfSSL 15:117db924cf7c 1628
wolfSSL 15:117db924cf7c 1629 static const ALIGN32 word64 mSHUF_00BA[] =
wolfSSL 15:117db924cf7c 1630 { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
wolfSSL 15:117db924cf7c 1631 static const ALIGN32 word64 mSHUF_DC00[] =
wolfSSL 15:117db924cf7c 1632 { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
wolfSSL 15:117db924cf7c 1633 static const ALIGN32 word64 mBYTE_FLIP_MASK[] =
wolfSSL 15:117db924cf7c 1634 { 0x0405060700010203, 0x0c0d0e0f08090a0b };
wolfSSL 15:117db924cf7c 1635
wolfSSL 15:117db924cf7c 1636 #define _Init_Masks(mask1, mask2, mask3) \
wolfSSL 15:117db924cf7c 1637 "vmovdqa %[FLIP], %" #mask1 "\n\t" \
wolfSSL 15:117db924cf7c 1638 "vmovdqa %[SHUF00BA], %" #mask2 "\n\t" \
wolfSSL 15:117db924cf7c 1639 "vmovdqa %[SHUFDC00], %" #mask3 "\n\t"
wolfSSL 15:117db924cf7c 1640
wolfSSL 15:117db924cf7c 1641 #define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
wolfSSL 15:117db924cf7c 1642 _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 15:117db924cf7c 1643
wolfSSL 15:117db924cf7c 1644 #define X0 %xmm0
wolfSSL 15:117db924cf7c 1645 #define X1 %xmm1
wolfSSL 15:117db924cf7c 1646 #define X2 %xmm2
wolfSSL 15:117db924cf7c 1647 #define X3 %xmm3
wolfSSL 15:117db924cf7c 1648
wolfSSL 15:117db924cf7c 1649 #define XTMP0 %xmm4
wolfSSL 15:117db924cf7c 1650 #define XTMP1 %xmm5
wolfSSL 15:117db924cf7c 1651 #define XTMP2 %xmm6
wolfSSL 15:117db924cf7c 1652 #define XTMP3 %xmm7
wolfSSL 15:117db924cf7c 1653 #define XTMP4 %xmm8
wolfSSL 15:117db924cf7c 1654 #define XTMP5 %xmm9
wolfSSL 15:117db924cf7c 1655 #define XFER %xmm10
wolfSSL 15:117db924cf7c 1656
wolfSSL 15:117db924cf7c 1657 #define SHUF_00BA %xmm11 /* shuffle xBxA -> 00BA */
wolfSSL 15:117db924cf7c 1658 #define SHUF_DC00 %xmm12 /* shuffle xDxC -> DC00 */
wolfSSL 15:117db924cf7c 1659 #define BYTE_FLIP_MASK %xmm13
wolfSSL 15:117db924cf7c 1660
wolfSSL 15:117db924cf7c 1661
wolfSSL 15:117db924cf7c 1662 SHA256_NOINLINE static int Transform_Sha256_AVX1(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 1663 {
wolfSSL 15:117db924cf7c 1664 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 1665
wolfSSL 15:117db924cf7c 1666 "subq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1667
wolfSSL 15:117db924cf7c 1668 "leaq 32(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 1669 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 15:117db924cf7c 1670 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 1671
wolfSSL 15:117db924cf7c 1672 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
wolfSSL 15:117db924cf7c 1673
wolfSSL 15:117db924cf7c 1674 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1675 "movl %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 1676 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1677
wolfSSL 15:117db924cf7c 1678 SET_W_K_XFER_4(0)
wolfSSL 15:117db924cf7c 1679 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1680 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1681 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1682 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1683
wolfSSL 15:117db924cf7c 1684 SET_W_K_XFER_4(16)
wolfSSL 15:117db924cf7c 1685 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1686 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1687 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1688 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1689
wolfSSL 15:117db924cf7c 1690 SET_W_K_XFER_4(32)
wolfSSL 15:117db924cf7c 1691 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1692 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1693 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1694 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1695
wolfSSL 15:117db924cf7c 1696 SET_W_K_XFER_4(48)
wolfSSL 15:117db924cf7c 1697 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1698 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1699 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1700 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1701
wolfSSL 15:117db924cf7c 1702 STORE_ADD_DIGEST()
wolfSSL 15:117db924cf7c 1703
wolfSSL 15:117db924cf7c 1704 "addq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1705
wolfSSL 15:117db924cf7c 1706 :
wolfSSL 15:117db924cf7c 1707 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
wolfSSL 15:117db924cf7c 1708 [SHUF00BA] "m" (mSHUF_00BA[0]),
wolfSSL 15:117db924cf7c 1709 [SHUFDC00] "m" (mSHUF_DC00[0]),
wolfSSL 15:117db924cf7c 1710 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 1711 [K] "m" (K)
wolfSSL 15:117db924cf7c 1712 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
wolfSSL 15:117db924cf7c 1713 );
wolfSSL 15:117db924cf7c 1714
wolfSSL 15:117db924cf7c 1715 return 0;
wolfSSL 15:117db924cf7c 1716 }
wolfSSL 15:117db924cf7c 1717
wolfSSL 15:117db924cf7c 1718 SHA256_NOINLINE static int Transform_Sha256_AVX1_Len(wc_Sha256* sha256,
wolfSSL 15:117db924cf7c 1719 word32 len)
wolfSSL 15:117db924cf7c 1720 {
wolfSSL 15:117db924cf7c 1721 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 1722
wolfSSL 15:117db924cf7c 1723 "subq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1724 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 1725
wolfSSL 15:117db924cf7c 1726 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 15:117db924cf7c 1727 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 1728
wolfSSL 15:117db924cf7c 1729 "# Start of loop processing a block\n"
wolfSSL 15:117db924cf7c 1730 "1:\n\t"
wolfSSL 15:117db924cf7c 1731
wolfSSL 15:117db924cf7c 1732 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
wolfSSL 15:117db924cf7c 1733
wolfSSL 15:117db924cf7c 1734 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1735 "movl %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 1736 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1737
wolfSSL 15:117db924cf7c 1738 SET_W_K_XFER_4(0)
wolfSSL 15:117db924cf7c 1739 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1740 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1741 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1742 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1743
wolfSSL 15:117db924cf7c 1744 SET_W_K_XFER_4(16)
wolfSSL 15:117db924cf7c 1745 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1746 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1747 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1748 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1749
wolfSSL 15:117db924cf7c 1750 SET_W_K_XFER_4(32)
wolfSSL 15:117db924cf7c 1751 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1752 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1753 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1754 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1755
wolfSSL 15:117db924cf7c 1756 SET_W_K_XFER_4(48)
wolfSSL 15:117db924cf7c 1757 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1758 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1759 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1760 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1761 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 1762
wolfSSL 15:117db924cf7c 1763 ADD_DIGEST()
wolfSSL 15:117db924cf7c 1764
wolfSSL 15:117db924cf7c 1765 "addq $64, %%rax\n\t"
wolfSSL 15:117db924cf7c 1766 "subl $64, %[len]\n\t"
wolfSSL 15:117db924cf7c 1767
wolfSSL 15:117db924cf7c 1768 STORE_DIGEST()
wolfSSL 15:117db924cf7c 1769
wolfSSL 15:117db924cf7c 1770 "movq %%rax, 120(%[sha256])\n\t"
wolfSSL 15:117db924cf7c 1771 "jnz 1b\n\t"
wolfSSL 15:117db924cf7c 1772
wolfSSL 15:117db924cf7c 1773 "addq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1774
wolfSSL 15:117db924cf7c 1775 :
wolfSSL 15:117db924cf7c 1776 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
wolfSSL 15:117db924cf7c 1777 [SHUF00BA] "m" (mSHUF_00BA[0]),
wolfSSL 15:117db924cf7c 1778 [SHUFDC00] "m" (mSHUF_DC00[0]),
wolfSSL 15:117db924cf7c 1779 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 1780 [len] "r" (len),
wolfSSL 15:117db924cf7c 1781 [K] "m" (K)
wolfSSL 15:117db924cf7c 1782 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
wolfSSL 15:117db924cf7c 1783 );
wolfSSL 15:117db924cf7c 1784
wolfSSL 15:117db924cf7c 1785 return 0;
wolfSSL 15:117db924cf7c 1786 }
wolfSSL 15:117db924cf7c 1787 #endif /* HAVE_INTEL_AVX1 */
wolfSSL 15:117db924cf7c 1788
wolfSSL 15:117db924cf7c 1789 #if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
wolfSSL 15:117db924cf7c 1790 SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 1791 {
wolfSSL 15:117db924cf7c 1792 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 1793
wolfSSL 15:117db924cf7c 1794 "subq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1795
wolfSSL 15:117db924cf7c 1796 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 15:117db924cf7c 1797 "leaq 32(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 1798 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
wolfSSL 15:117db924cf7c 1799
wolfSSL 15:117db924cf7c 1800 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 1801
wolfSSL 15:117db924cf7c 1802 SET_W_K_XFER_4(0)
wolfSSL 15:117db924cf7c 1803 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1804 "rorx $6, %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 1805 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1806 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1807 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1808 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1809 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1810
wolfSSL 15:117db924cf7c 1811 SET_W_K_XFER_4(16)
wolfSSL 15:117db924cf7c 1812 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1813 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1814 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1815 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1816
wolfSSL 15:117db924cf7c 1817 SET_W_K_XFER_4(32)
wolfSSL 15:117db924cf7c 1818 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1819 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1820 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1821 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1822
wolfSSL 15:117db924cf7c 1823 SET_W_K_XFER_4(48)
wolfSSL 15:117db924cf7c 1824 "xorl " L3 ", " L3 "\n\t"
wolfSSL 15:117db924cf7c 1825 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1826 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1827 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1828 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1829 /* Prev RND: h += Maj(a,b,c) */
wolfSSL 15:117db924cf7c 1830 "addl " L3 ", %%r8d\n\t"
wolfSSL 15:117db924cf7c 1831
wolfSSL 15:117db924cf7c 1832 STORE_ADD_DIGEST()
wolfSSL 15:117db924cf7c 1833
wolfSSL 15:117db924cf7c 1834 "addq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1835
wolfSSL 15:117db924cf7c 1836 :
wolfSSL 15:117db924cf7c 1837 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
wolfSSL 15:117db924cf7c 1838 [SHUF00BA] "m" (mSHUF_00BA[0]),
wolfSSL 15:117db924cf7c 1839 [SHUFDC00] "m" (mSHUF_DC00[0]),
wolfSSL 15:117db924cf7c 1840 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 1841 [K] "m" (K)
wolfSSL 15:117db924cf7c 1842 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
wolfSSL 15:117db924cf7c 1843 );
wolfSSL 15:117db924cf7c 1844
wolfSSL 15:117db924cf7c 1845 return 0;
wolfSSL 15:117db924cf7c 1846 }
wolfSSL 15:117db924cf7c 1847
wolfSSL 15:117db924cf7c 1848 SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256,
wolfSSL 15:117db924cf7c 1849 word32 len)
wolfSSL 15:117db924cf7c 1850 {
wolfSSL 15:117db924cf7c 1851 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 1852
wolfSSL 15:117db924cf7c 1853 "subq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1854 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 1855
wolfSSL 15:117db924cf7c 1856 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 15:117db924cf7c 1857 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 1858
wolfSSL 15:117db924cf7c 1859 "# Start of loop processing a block\n"
wolfSSL 15:117db924cf7c 1860 "1:\n\t"
wolfSSL 15:117db924cf7c 1861
wolfSSL 15:117db924cf7c 1862 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
wolfSSL 15:117db924cf7c 1863
wolfSSL 15:117db924cf7c 1864 SET_W_K_XFER_4(0)
wolfSSL 15:117db924cf7c 1865 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1866 "rorx $6, %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 1867 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 1868 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1869 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1870 MsgSched_RORX(X2, X3, X0, X1, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
wolfSSL 15:117db924cf7c 1871 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1872
wolfSSL 15:117db924cf7c 1873 SET_W_K_XFER_4(16)
wolfSSL 15:117db924cf7c 1874 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1875 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1876 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1877 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1878
wolfSSL 15:117db924cf7c 1879 SET_W_K_XFER_4(32)
wolfSSL 15:117db924cf7c 1880 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1881 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1882 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1883 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1884
wolfSSL 15:117db924cf7c 1885 SET_W_K_XFER_4(48)
wolfSSL 15:117db924cf7c 1886 "xorl " L3 ", " L3 "\n\t"
wolfSSL 15:117db924cf7c 1887 "xorl " L2 ", " L2 "\n\t"
wolfSSL 15:117db924cf7c 1888 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 1889 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
wolfSSL 15:117db924cf7c 1890 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
wolfSSL 15:117db924cf7c 1891 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 1892 /* Prev RND: h += Maj(a,b,c) */
wolfSSL 15:117db924cf7c 1893 "addl " L3 ", %%r8d\n\t"
wolfSSL 15:117db924cf7c 1894 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 1895
wolfSSL 15:117db924cf7c 1896 ADD_DIGEST()
wolfSSL 15:117db924cf7c 1897
wolfSSL 15:117db924cf7c 1898 "addq $64, %%rax\n\t"
wolfSSL 15:117db924cf7c 1899 "subl $64, %[len]\n\t"
wolfSSL 15:117db924cf7c 1900
wolfSSL 15:117db924cf7c 1901 STORE_DIGEST()
wolfSSL 15:117db924cf7c 1902
wolfSSL 15:117db924cf7c 1903 "movq %%rax, 120(%[sha256])\n\t"
wolfSSL 15:117db924cf7c 1904 "jnz 1b\n\t"
wolfSSL 15:117db924cf7c 1905
wolfSSL 15:117db924cf7c 1906 "addq $64, %%rsp\n\t"
wolfSSL 15:117db924cf7c 1907
wolfSSL 15:117db924cf7c 1908 :
wolfSSL 15:117db924cf7c 1909 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
wolfSSL 15:117db924cf7c 1910 [SHUF00BA] "m" (mSHUF_00BA[0]),
wolfSSL 15:117db924cf7c 1911 [SHUFDC00] "m" (mSHUF_DC00[0]),
wolfSSL 15:117db924cf7c 1912 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 1913 [len] "r" (len),
wolfSSL 15:117db924cf7c 1914 [K] "m" (K)
wolfSSL 15:117db924cf7c 1915 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
wolfSSL 15:117db924cf7c 1916 );
wolfSSL 15:117db924cf7c 1917
wolfSSL 15:117db924cf7c 1918 return 0;
wolfSSL 15:117db924cf7c 1919 }
wolfSSL 15:117db924cf7c 1920 #endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_RORX */
wolfSSL 15:117db924cf7c 1921
wolfSSL 15:117db924cf7c 1922
wolfSSL 15:117db924cf7c 1923 #if defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 1924 #define Y0 %ymm0
wolfSSL 15:117db924cf7c 1925 #define Y1 %ymm1
wolfSSL 15:117db924cf7c 1926 #define Y2 %ymm2
wolfSSL 15:117db924cf7c 1927 #define Y3 %ymm3
wolfSSL 15:117db924cf7c 1928
wolfSSL 15:117db924cf7c 1929 #define YTMP0 %ymm4
wolfSSL 15:117db924cf7c 1930 #define YTMP1 %ymm5
wolfSSL 15:117db924cf7c 1931 #define YTMP2 %ymm6
wolfSSL 15:117db924cf7c 1932 #define YTMP3 %ymm7
wolfSSL 15:117db924cf7c 1933 #define YTMP4 %ymm8
wolfSSL 15:117db924cf7c 1934 #define YTMP5 %ymm9
wolfSSL 15:117db924cf7c 1935 #define YXFER %ymm10
wolfSSL 15:117db924cf7c 1936
wolfSSL 15:117db924cf7c 1937 #define SHUF_Y_00BA %ymm11 /* shuffle xBxA -> 00BA */
wolfSSL 15:117db924cf7c 1938 #define SHUF_Y_DC00 %ymm12 /* shuffle xDxC -> DC00 */
wolfSSL 15:117db924cf7c 1939 #define BYTE_FLIP_Y_MASK %ymm13
wolfSSL 15:117db924cf7c 1940
wolfSSL 15:117db924cf7c 1941 #define YMM_REGS "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", \
wolfSSL 15:117db924cf7c 1942 "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13"
wolfSSL 15:117db924cf7c 1943
wolfSSL 15:117db924cf7c 1944 #define MsgSched_Y(Y0,Y1,Y2,Y3,a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1945 RND_STEP_0_1(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1946 VPALIGNR (YTMP1, Y1, Y0, 4) /* YTMP1 = W[-15] */ \
wolfSSL 15:117db924cf7c 1947 VPALIGNR (YTMP0, Y3, Y2, 4) /* YTMP0 = W[-7] */ \
wolfSSL 15:117db924cf7c 1948 RND_STEP_0_2(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1949 RND_STEP_0_3(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1950 VPSRLD (YTMP2, YTMP1, 7) /* YTMP2 = W[-15] >> 7 */ \
wolfSSL 15:117db924cf7c 1951 VPSLLD (YTMP3, YTMP1, 25) /* YTEMP3 = W[-15] << (32-7) */ \
wolfSSL 15:117db924cf7c 1952 RND_STEP_0_4(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1953 RND_STEP_0_5(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1954 VPSRLD (YTMP4, YTMP1, 18) /* YTEMP4 = W[-15] >> 18 */ \
wolfSSL 15:117db924cf7c 1955 VPSLLD (YTMP5, YTMP1, 14) /* YTEMP5 = W[-15] << (32-18) */ \
wolfSSL 15:117db924cf7c 1956 RND_STEP_0_6(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1957 RND_STEP_0_7(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1958 VPOR (YTMP2, YTMP3, YTMP2) /* YTMP2 = W[-15] >>> 7 */ \
wolfSSL 15:117db924cf7c 1959 VPOR (YTMP4, YTMP5, YTMP4) /* YTMP4 = W[-15] >>> 18 */ \
wolfSSL 15:117db924cf7c 1960 RND_STEP_0_8(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 1961 RND_STEP_1_1(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1962 RND_STEP_1_2(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1963 VPSRLD (YTMP5, YTMP1, 3) /* YTMP4 = W[-15] >> 3 */ \
wolfSSL 15:117db924cf7c 1964 VPXOR (YTMP2, YTMP4, YTMP2) /* YTMP2 = W[-15] >>> 7 ^ W[-15] >>> 18 */ \
wolfSSL 15:117db924cf7c 1965 RND_STEP_1_3(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1966 RND_STEP_1_4(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1967 VPXOR (YTMP1, YTMP5, YTMP2) /* YTMP1 = s0 */ \
wolfSSL 15:117db924cf7c 1968 VPSHUFD (YTMP2, Y3, 0b11111010) /* YTMP2 = W[-2] {BBAA}*/ \
wolfSSL 15:117db924cf7c 1969 RND_STEP_1_5(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1970 RND_STEP_1_6(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1971 VPSRLD (YTMP4, YTMP2, 10) /* YTMP4 = W[-2] >> 10 {BBAA} */ \
wolfSSL 15:117db924cf7c 1972 VPSRLQ (YTMP3, YTMP2, 19) /* YTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
wolfSSL 15:117db924cf7c 1973 RND_STEP_1_7(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1974 RND_STEP_1_8(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 1975 RND_STEP_0_1(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1976 VPSRLQ (YTMP2, YTMP2, 17) /* YTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
wolfSSL 15:117db924cf7c 1977 VPADDD (YTMP0, YTMP0, Y0) \
wolfSSL 15:117db924cf7c 1978 RND_STEP_0_2(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1979 RND_STEP_0_3(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1980 RND_STEP_0_4(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1981 VPXOR (YTMP2, YTMP3, YTMP2) \
wolfSSL 15:117db924cf7c 1982 VPADDD (YTMP0, YTMP0, YTMP1) /* YTMP0 = W[-16] + W[-7] + s0 */ \
wolfSSL 15:117db924cf7c 1983 RND_STEP_0_5(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1984 VPXOR (YTMP4, YTMP4, YTMP2) /* YTMP4 = s1 {xBxA} */ \
wolfSSL 15:117db924cf7c 1985 RND_STEP_0_6(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1986 VPSHUFB (YTMP4, YTMP4, SHUF_Y_00BA) /* YTMP4 = s1 {00BA} */ \
wolfSSL 15:117db924cf7c 1987 RND_STEP_0_7(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1988 VPADDD (YTMP0, YTMP0, YTMP4) /* YTMP0 = {..., ..., W[1], W[0]} */ \
wolfSSL 15:117db924cf7c 1989 RND_STEP_0_8(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 1990 RND_STEP_1_1(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1991 VPSHUFD (YTMP2, YTMP0, 0b01010000) /* YTMP2 = W[-2] {DDCC} */ \
wolfSSL 15:117db924cf7c 1992 RND_STEP_1_2(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1993 VPSRLQ (YTMP4, YTMP2, 17) /* YTMP4 = W[-2] MY_ROR 17 {xDxC} */ \
wolfSSL 15:117db924cf7c 1994 VPSRLQ (YTMP3, YTMP2, 19) /* YTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
wolfSSL 15:117db924cf7c 1995 RND_STEP_1_3(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1996 RND_STEP_1_4(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 1997 VPSRLD (YTMP5, YTMP2, 10) /* YTMP5 = W[-2] >> 10 {DDCC} */ \
wolfSSL 15:117db924cf7c 1998 VPXOR (YTMP4, YTMP3, YTMP4) \
wolfSSL 15:117db924cf7c 1999 RND_STEP_1_5(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2000 RND_STEP_1_6(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2001 VPXOR (YTMP5, YTMP4, YTMP5) /* YTMP5 = s1 {xDxC} */ \
wolfSSL 15:117db924cf7c 2002 RND_STEP_1_7(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2003 VPSHUFB (YTMP5, YTMP5, SHUF_Y_DC00) /* YTMP5 = s1 {DC00} */ \
wolfSSL 15:117db924cf7c 2004 RND_STEP_1_8(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2005 VPADDD (Y0, YTMP5, YTMP0) /* Y0 = {W[3], W[2], W[1], W[0]} */
wolfSSL 15:117db924cf7c 2006
wolfSSL 15:117db924cf7c 2007 #if defined(HAVE_INTEL_RORX)
wolfSSL 15:117db924cf7c 2008
wolfSSL 15:117db924cf7c 2009 #define MsgSched_Y_RORX(Y0,Y1,Y2,Y3,a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2010 RND_STEP_RORX_0_1(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2011 VPALIGNR (YTMP1, Y1, Y0, 4) /* YTMP1 = W[-15] */ \
wolfSSL 15:117db924cf7c 2012 RND_STEP_RORX_0_2(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2013 VPALIGNR (YTMP0, Y3, Y2, 4) /* YTMP0 = W[-7] */ \
wolfSSL 15:117db924cf7c 2014 RND_STEP_RORX_0_3(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2015 VPSRLD (YTMP2, YTMP1, 7) /* YTMP2 = W[-15] >> 7 */ \
wolfSSL 15:117db924cf7c 2016 RND_STEP_RORX_0_4(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2017 VPSLLD (YTMP3, YTMP1, 25) /* YTEMP3 = W[-15] << (32-7) */ \
wolfSSL 15:117db924cf7c 2018 RND_STEP_RORX_0_5(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2019 VPSRLD (YTMP4, YTMP1, 18) /* YTEMP4 = W[-15] >> 18 */ \
wolfSSL 15:117db924cf7c 2020 RND_STEP_RORX_0_6(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2021 VPSLLD (YTMP5, YTMP1, 14) /* YTEMP5 = W[-15] << (32-18) */ \
wolfSSL 15:117db924cf7c 2022 RND_STEP_RORX_0_7(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2023 VPOR (YTMP2, YTMP2, YTMP3) /* YTMP2 = W[-15] >>> 7 */ \
wolfSSL 15:117db924cf7c 2024 RND_STEP_RORX_0_8(a,b,c,d,e,f,g,h,_i) \
wolfSSL 15:117db924cf7c 2025 VPOR (YTMP4, YTMP4, YTMP5) /* YTMP4 = W[-15] >>> 18 */ \
wolfSSL 15:117db924cf7c 2026 RND_STEP_RORX_1_1(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2027 VPSRLD (YTMP5, YTMP1, 3) /* YTMP4 = W[-15] >> 3 */ \
wolfSSL 15:117db924cf7c 2028 RND_STEP_RORX_1_2(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2029 VPXOR (YTMP2, YTMP2, YTMP4) /* YTMP2 = W[-15] >>> 7 ^ W[-15] >>> 18 */ \
wolfSSL 15:117db924cf7c 2030 RND_STEP_RORX_1_3(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2031 VPSHUFD (YTMP3, Y3, 0b11111010) /* YTMP2 = W[-2] {BBAA}*/ \
wolfSSL 15:117db924cf7c 2032 RND_STEP_RORX_1_4(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2033 VPXOR (YTMP1, YTMP5, YTMP2) /* YTMP1 = s0 */ \
wolfSSL 15:117db924cf7c 2034 RND_STEP_RORX_1_5(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2035 VPSRLD (YTMP4, YTMP3, 10) /* YTMP4 = W[-2] >> 10 {BBAA} */ \
wolfSSL 15:117db924cf7c 2036 RND_STEP_RORX_1_6(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2037 VPSRLQ (YTMP2, YTMP3, 19) /* YTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
wolfSSL 15:117db924cf7c 2038 RND_STEP_RORX_1_7(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2039 VPSRLQ (YTMP3, YTMP3, 17) /* YTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
wolfSSL 15:117db924cf7c 2040 RND_STEP_RORX_1_8(h,a,b,c,d,e,f,g,_i+1) \
wolfSSL 15:117db924cf7c 2041 VPADDD (YTMP0, YTMP0, Y0) \
wolfSSL 15:117db924cf7c 2042 RND_STEP_RORX_0_1(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2043 VPXOR (YTMP2, YTMP2, YTMP3) \
wolfSSL 15:117db924cf7c 2044 RND_STEP_RORX_0_2(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2045 VPXOR (YTMP4, YTMP4, YTMP2) /* YTMP4 = s1 {xBxA} */ \
wolfSSL 15:117db924cf7c 2046 RND_STEP_RORX_0_3(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2047 VPADDD (YTMP0, YTMP0, YTMP1) /* YTMP0 = W[-16] + W[-7] + s0 */ \
wolfSSL 15:117db924cf7c 2048 RND_STEP_RORX_0_4(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2049 VPSHUFB (YTMP4, YTMP4, SHUF_Y_00BA) /* YTMP4 = s1 {00BA} */ \
wolfSSL 15:117db924cf7c 2050 RND_STEP_RORX_0_5(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2051 VPADDD (YTMP0, YTMP0, YTMP4) /* YTMP0 = {..., ..., W[1], W[0]} */ \
wolfSSL 15:117db924cf7c 2052 RND_STEP_RORX_0_6(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2053 VPSHUFD (YTMP2, YTMP0, 0b01010000) /* YTMP2 = W[-2] {DDCC} */ \
wolfSSL 15:117db924cf7c 2054 RND_STEP_RORX_0_7(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2055 RND_STEP_RORX_0_8(g,h,a,b,c,d,e,f,_i+2) \
wolfSSL 15:117db924cf7c 2056 VPSRLQ (YTMP4, YTMP2, 17) /* YTMP4 = W[-2] MY_ROR 17 {xDxC} */ \
wolfSSL 15:117db924cf7c 2057 RND_STEP_RORX_1_1(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2058 VPSRLQ (YTMP3, YTMP2, 19) /* YTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
wolfSSL 15:117db924cf7c 2059 RND_STEP_RORX_1_2(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2060 VPSRLD (YTMP5, YTMP2, 10) /* YTMP5 = W[-2] >> 10 {DDCC} */ \
wolfSSL 15:117db924cf7c 2061 RND_STEP_RORX_1_3(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2062 VPXOR (YTMP4, YTMP4, YTMP3) \
wolfSSL 15:117db924cf7c 2063 RND_STEP_RORX_1_4(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2064 VPXOR (YTMP5, YTMP5, YTMP4) /* YTMP5 = s1 {xDxC} */ \
wolfSSL 15:117db924cf7c 2065 RND_STEP_RORX_1_5(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2066 RND_STEP_RORX_1_6(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2067 VPSHUFB (YTMP5, YTMP5, SHUF_Y_DC00) /* YTMP5 = s1 {DC00} */ \
wolfSSL 15:117db924cf7c 2068 RND_STEP_RORX_1_7(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2069 RND_STEP_RORX_1_8(f,g,h,a,b,c,d,e,_i+3) \
wolfSSL 15:117db924cf7c 2070 VPADDD (Y0, YTMP5, YTMP0) /* Y0 = {W[3], W[2], W[1], W[0]} */ \
wolfSSL 15:117db924cf7c 2071
wolfSSL 15:117db924cf7c 2072 #endif /* HAVE_INTEL_RORX */
wolfSSL 15:117db924cf7c 2073
wolfSSL 15:117db924cf7c 2074 #define _VINSERTI128(op1,op2,op3,op4) \
wolfSSL 15:117db924cf7c 2075 "vinserti128 $" #op4 ", %" #op3 ", %" #op2 ", %" #op1 "\n\t"
wolfSSL 15:117db924cf7c 2076 #define VINSERTI128(op1,op2,op3,op4) \
wolfSSL 15:117db924cf7c 2077 _VINSERTI128(op1,op2,op3,op4)
wolfSSL 15:117db924cf7c 2078
wolfSSL 15:117db924cf7c 2079
wolfSSL 15:117db924cf7c 2080 #define _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \
wolfSSL 15:117db924cf7c 2081 "# X0, X1, X2, X3 = W[0..15]\n\t" \
wolfSSL 15:117db924cf7c 2082 "vmovdqu (%%" #reg "), %%xmm0\n\t" \
wolfSSL 15:117db924cf7c 2083 "vmovdqu 16(%%" #reg "), %%xmm1\n\t" \
wolfSSL 15:117db924cf7c 2084 VPSHUFB(X0, X0, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 2085 VPSHUFB(X1, X1, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 2086 "vmovdqu 32(%%" #reg "), %%xmm2\n\t" \
wolfSSL 15:117db924cf7c 2087 "vmovdqu 48(%%" #reg "), %%xmm3\n\t" \
wolfSSL 15:117db924cf7c 2088 VPSHUFB(X2, X2, BYTE_FLIP_MASK) \
wolfSSL 15:117db924cf7c 2089 VPSHUFB(X3, X3, BYTE_FLIP_MASK)
wolfSSL 15:117db924cf7c 2090
wolfSSL 15:117db924cf7c 2091 #define LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \
wolfSSL 15:117db924cf7c 2092 _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg)
wolfSSL 15:117db924cf7c 2093
wolfSSL 15:117db924cf7c 2094
wolfSSL 15:117db924cf7c 2095 #define _LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \
wolfSSL 15:117db924cf7c 2096 "# X0, X1, X2, X3 = W[0..15]\n\t" \
wolfSSL 15:117db924cf7c 2097 "vmovdqu (%%" #reg "), %%xmm0\n\t" \
wolfSSL 15:117db924cf7c 2098 "vmovdqu 16(%%" #reg "), %%xmm1\n\t" \
wolfSSL 15:117db924cf7c 2099 "vmovdqu 64(%%" #reg "), %%xmm4\n\t" \
wolfSSL 15:117db924cf7c 2100 "vmovdqu 80(%%" #reg "), %%xmm5\n\t" \
wolfSSL 15:117db924cf7c 2101 VINSERTI128(Y0, Y0, XTMP0, 1) \
wolfSSL 15:117db924cf7c 2102 VINSERTI128(Y1, Y1, XTMP1, 1) \
wolfSSL 15:117db924cf7c 2103 VPSHUFB(Y0, Y0, BYTE_FLIP_Y_MASK) \
wolfSSL 15:117db924cf7c 2104 VPSHUFB(Y1, Y1, BYTE_FLIP_Y_MASK) \
wolfSSL 15:117db924cf7c 2105 "vmovdqu 32(%%" #reg "), %%xmm2\n\t" \
wolfSSL 15:117db924cf7c 2106 "vmovdqu 48(%%" #reg "), %%xmm3\n\t" \
wolfSSL 15:117db924cf7c 2107 "vmovdqu 96(%%" #reg "), %%xmm6\n\t" \
wolfSSL 15:117db924cf7c 2108 "vmovdqu 112(%%" #reg "), %%xmm7\n\t" \
wolfSSL 15:117db924cf7c 2109 VINSERTI128(Y2, Y2, XTMP2, 1) \
wolfSSL 15:117db924cf7c 2110 VINSERTI128(Y3, Y3, XTMP3, 1) \
wolfSSL 15:117db924cf7c 2111 VPSHUFB(Y2, Y2, BYTE_FLIP_Y_MASK) \
wolfSSL 15:117db924cf7c 2112 VPSHUFB(Y3, Y3, BYTE_FLIP_Y_MASK)
wolfSSL 15:117db924cf7c 2113
wolfSSL 15:117db924cf7c 2114 #define LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \
wolfSSL 15:117db924cf7c 2115 _LOAD_W_K(BYTE_FLIP_Y_MASK, reg)
wolfSSL 15:117db924cf7c 2116
wolfSSL 15:117db924cf7c 2117
wolfSSL 15:117db924cf7c 2118 #define _SET_W_Y_4(i) \
wolfSSL 15:117db924cf7c 2119 "vpaddd (" #i "*8)+ 0+%[K], %%ymm0, %%ymm4\n\t" \
wolfSSL 15:117db924cf7c 2120 "vpaddd (" #i "*8)+32+%[K], %%ymm1, %%ymm5\n\t" \
wolfSSL 15:117db924cf7c 2121 "vmovdqu %%ymm4, (" #i "*8)+ 0(" WK ")\n\t" \
wolfSSL 15:117db924cf7c 2122 "vmovdqu %%ymm5, (" #i "*8)+32(" WK ")\n\t" \
wolfSSL 15:117db924cf7c 2123 "vpaddd (" #i "*8)+64+%[K], %%ymm2, %%ymm4\n\t" \
wolfSSL 15:117db924cf7c 2124 "vpaddd (" #i "*8)+96+%[K], %%ymm3, %%ymm5\n\t" \
wolfSSL 15:117db924cf7c 2125 "vmovdqu %%ymm4, (" #i "*8)+64(" WK ")\n\t" \
wolfSSL 15:117db924cf7c 2126 "vmovdqu %%ymm5, (" #i "*8)+96(" WK ")\n\t"
wolfSSL 15:117db924cf7c 2127
wolfSSL 15:117db924cf7c 2128 #define SET_W_Y_4(i) \
wolfSSL 15:117db924cf7c 2129 _SET_W_Y_4(i)
wolfSSL 15:117db924cf7c 2130
wolfSSL 15:117db924cf7c 2131
wolfSSL 15:117db924cf7c 2132 static const ALIGN32 word64 mSHUF_Y_00BA[] =
wolfSSL 15:117db924cf7c 2133 { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF,
wolfSSL 15:117db924cf7c 2134 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
wolfSSL 15:117db924cf7c 2135 static const ALIGN32 word64 mSHUF_Y_DC00[] =
wolfSSL 15:117db924cf7c 2136 { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100,
wolfSSL 15:117db924cf7c 2137 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
wolfSSL 15:117db924cf7c 2138 static const ALIGN32 word64 mBYTE_FLIP_Y_MASK[] =
wolfSSL 15:117db924cf7c 2139 { 0x0405060700010203, 0x0c0d0e0f08090a0b,
wolfSSL 15:117db924cf7c 2140 0x0405060700010203, 0x0c0d0e0f08090a0b };
wolfSSL 15:117db924cf7c 2141
wolfSSL 15:117db924cf7c 2142 #define _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
wolfSSL 15:117db924cf7c 2143 "vmovdqa %[FLIP], %" #BYTE_FLIP_MASK "\n\t" \
wolfSSL 15:117db924cf7c 2144 "vmovdqa %[SHUF00BA], %" #SHUF_00BA "\n\t" \
wolfSSL 15:117db924cf7c 2145 "vmovdqa %[SHUFDC00], %" #SHUF_DC00 "\n\t"
wolfSSL 15:117db924cf7c 2146
wolfSSL 15:117db924cf7c 2147 #define INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
wolfSSL 15:117db924cf7c 2148 _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 15:117db924cf7c 2149
wolfSSL 15:117db924cf7c 2150 static const ALIGN32 word32 K256[128] = {
wolfSSL 15:117db924cf7c 2151 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L,
wolfSSL 15:117db924cf7c 2152 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L,
wolfSSL 15:117db924cf7c 2153 0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L,
wolfSSL 15:117db924cf7c 2154 0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L,
wolfSSL 15:117db924cf7c 2155 0xD807AA98L, 0x12835B01L, 0x243185BEL, 0x550C7DC3L,
wolfSSL 15:117db924cf7c 2156 0xD807AA98L, 0x12835B01L, 0x243185BEL, 0x550C7DC3L,
wolfSSL 15:117db924cf7c 2157 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, 0xC19BF174L,
wolfSSL 15:117db924cf7c 2158 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, 0xC19BF174L,
wolfSSL 15:117db924cf7c 2159 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
wolfSSL 15:117db924cf7c 2160 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
wolfSSL 15:117db924cf7c 2161 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL,
wolfSSL 15:117db924cf7c 2162 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL,
wolfSSL 15:117db924cf7c 2163 0x983E5152L, 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L,
wolfSSL 15:117db924cf7c 2164 0x983E5152L, 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L,
wolfSSL 15:117db924cf7c 2165 0xC6E00BF3L, 0xD5A79147L, 0x06CA6351L, 0x14292967L,
wolfSSL 15:117db924cf7c 2166 0xC6E00BF3L, 0xD5A79147L, 0x06CA6351L, 0x14292967L,
wolfSSL 15:117db924cf7c 2167 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, 0x53380D13L,
wolfSSL 15:117db924cf7c 2168 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, 0x53380D13L,
wolfSSL 15:117db924cf7c 2169 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
wolfSSL 15:117db924cf7c 2170 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
wolfSSL 15:117db924cf7c 2171 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L,
wolfSSL 15:117db924cf7c 2172 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L,
wolfSSL 15:117db924cf7c 2173 0xD192E819L, 0xD6990624L, 0xF40E3585L, 0x106AA070L,
wolfSSL 15:117db924cf7c 2174 0xD192E819L, 0xD6990624L, 0xF40E3585L, 0x106AA070L,
wolfSSL 15:117db924cf7c 2175 0x19A4C116L, 0x1E376C08L, 0x2748774CL, 0x34B0BCB5L,
wolfSSL 15:117db924cf7c 2176 0x19A4C116L, 0x1E376C08L, 0x2748774CL, 0x34B0BCB5L,
wolfSSL 15:117db924cf7c 2177 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, 0x682E6FF3L,
wolfSSL 15:117db924cf7c 2178 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, 0x682E6FF3L,
wolfSSL 15:117db924cf7c 2179 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
wolfSSL 15:117db924cf7c 2180 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
wolfSSL 15:117db924cf7c 2181 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L,
wolfSSL 15:117db924cf7c 2182 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
wolfSSL 15:117db924cf7c 2183 };
wolfSSL 15:117db924cf7c 2184
wolfSSL 15:117db924cf7c 2185 SHA256_NOINLINE static int Transform_Sha256_AVX2(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 2186 {
wolfSSL 15:117db924cf7c 2187 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 2188
wolfSSL 15:117db924cf7c 2189 "subq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2190 "leaq 32(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 2191
wolfSSL 15:117db924cf7c 2192 INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
wolfSSL 15:117db924cf7c 2193 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 2194
wolfSSL 15:117db924cf7c 2195 LOAD_W_K_LOW(BYTE_FLIP_MASK, rax)
wolfSSL 15:117db924cf7c 2196
wolfSSL 15:117db924cf7c 2197 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2198 "movl %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 2199 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2200
wolfSSL 15:117db924cf7c 2201 SET_W_Y_4(0)
wolfSSL 15:117db924cf7c 2202 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 2203 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
wolfSSL 15:117db924cf7c 2204 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
wolfSSL 15:117db924cf7c 2205 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
wolfSSL 15:117db924cf7c 2206
wolfSSL 15:117db924cf7c 2207 SET_W_Y_4(16)
wolfSSL 15:117db924cf7c 2208 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
wolfSSL 15:117db924cf7c 2209 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
wolfSSL 15:117db924cf7c 2210 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
wolfSSL 15:117db924cf7c 2211 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
wolfSSL 15:117db924cf7c 2212
wolfSSL 15:117db924cf7c 2213 SET_W_Y_4(32)
wolfSSL 15:117db924cf7c 2214 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
wolfSSL 15:117db924cf7c 2215 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
wolfSSL 15:117db924cf7c 2216 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
wolfSSL 15:117db924cf7c 2217 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
wolfSSL 15:117db924cf7c 2218
wolfSSL 15:117db924cf7c 2219 SET_W_Y_4(48)
wolfSSL 15:117db924cf7c 2220 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
wolfSSL 15:117db924cf7c 2221 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
wolfSSL 15:117db924cf7c 2222 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
wolfSSL 15:117db924cf7c 2223 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
wolfSSL 15:117db924cf7c 2224
wolfSSL 15:117db924cf7c 2225 STORE_ADD_DIGEST()
wolfSSL 15:117db924cf7c 2226
wolfSSL 15:117db924cf7c 2227 "addq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2228
wolfSSL 15:117db924cf7c 2229 :
wolfSSL 15:117db924cf7c 2230 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
wolfSSL 15:117db924cf7c 2231 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
wolfSSL 15:117db924cf7c 2232 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
wolfSSL 15:117db924cf7c 2233 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 2234 [K] "m" (K256)
wolfSSL 15:117db924cf7c 2235 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
wolfSSL 15:117db924cf7c 2236 );
wolfSSL 15:117db924cf7c 2237
wolfSSL 15:117db924cf7c 2238 return 0;
wolfSSL 15:117db924cf7c 2239 }
wolfSSL 15:117db924cf7c 2240
wolfSSL 15:117db924cf7c 2241 SHA256_NOINLINE static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256,
wolfSSL 15:117db924cf7c 2242 word32 len)
wolfSSL 15:117db924cf7c 2243 {
wolfSSL 15:117db924cf7c 2244 if ((len & WC_SHA256_BLOCK_SIZE) != 0) {
wolfSSL 15:117db924cf7c 2245 XMEMCPY(sha256->buffer, sha256->data, WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 2246 Transform_Sha256_AVX2(sha256);
wolfSSL 15:117db924cf7c 2247 sha256->data += WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 2248 len -= WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 2249 if (len == 0)
wolfSSL 15:117db924cf7c 2250 return 0;
wolfSSL 15:117db924cf7c 2251 }
wolfSSL 15:117db924cf7c 2252
wolfSSL 15:117db924cf7c 2253 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 2254
wolfSSL 15:117db924cf7c 2255 "subq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2256 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 2257
wolfSSL 15:117db924cf7c 2258 INIT_MASKS_Y(BYTE_FLIP_Y_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
wolfSSL 15:117db924cf7c 2259 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 2260
wolfSSL 15:117db924cf7c 2261 "# Start of loop processing two blocks\n"
wolfSSL 15:117db924cf7c 2262 "1:\n\t"
wolfSSL 15:117db924cf7c 2263
wolfSSL 15:117db924cf7c 2264 LOAD_W_K(BYTE_FLIP_Y_MASK, rax)
wolfSSL 15:117db924cf7c 2265
wolfSSL 15:117db924cf7c 2266 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2267 "movl %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 2268 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2269
wolfSSL 15:117db924cf7c 2270 SET_W_Y_4(0)
wolfSSL 15:117db924cf7c 2271 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 2272 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
wolfSSL 15:117db924cf7c 2273 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
wolfSSL 15:117db924cf7c 2274 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
wolfSSL 15:117db924cf7c 2275
wolfSSL 15:117db924cf7c 2276 SET_W_Y_4(16)
wolfSSL 15:117db924cf7c 2277 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
wolfSSL 15:117db924cf7c 2278 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
wolfSSL 15:117db924cf7c 2279 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
wolfSSL 15:117db924cf7c 2280 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
wolfSSL 15:117db924cf7c 2281
wolfSSL 15:117db924cf7c 2282 SET_W_Y_4(32)
wolfSSL 15:117db924cf7c 2283 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
wolfSSL 15:117db924cf7c 2284 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
wolfSSL 15:117db924cf7c 2285 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
wolfSSL 15:117db924cf7c 2286 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
wolfSSL 15:117db924cf7c 2287
wolfSSL 15:117db924cf7c 2288 SET_W_Y_4(48)
wolfSSL 15:117db924cf7c 2289 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
wolfSSL 15:117db924cf7c 2290 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
wolfSSL 15:117db924cf7c 2291 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
wolfSSL 15:117db924cf7c 2292 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
wolfSSL 15:117db924cf7c 2293
wolfSSL 15:117db924cf7c 2294 ADD_DIGEST()
wolfSSL 15:117db924cf7c 2295 STORE_DIGEST()
wolfSSL 15:117db924cf7c 2296
wolfSSL 15:117db924cf7c 2297 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2298 "movl %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 2299 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2300
wolfSSL 15:117db924cf7c 2301 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 4)
wolfSSL 15:117db924cf7c 2302 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 2303 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 20)
wolfSSL 15:117db924cf7c 2304 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 28)
wolfSSL 15:117db924cf7c 2305 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 36)
wolfSSL 15:117db924cf7c 2306 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 44)
wolfSSL 15:117db924cf7c 2307 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 52)
wolfSSL 15:117db924cf7c 2308 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 60)
wolfSSL 15:117db924cf7c 2309 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 68)
wolfSSL 15:117db924cf7c 2310 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 76)
wolfSSL 15:117db924cf7c 2311 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 84)
wolfSSL 15:117db924cf7c 2312 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 92)
wolfSSL 15:117db924cf7c 2313 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 100)
wolfSSL 15:117db924cf7c 2314 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 108)
wolfSSL 15:117db924cf7c 2315 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 116)
wolfSSL 15:117db924cf7c 2316 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 124)
wolfSSL 15:117db924cf7c 2317
wolfSSL 15:117db924cf7c 2318 ADD_DIGEST()
wolfSSL 15:117db924cf7c 2319
wolfSSL 15:117db924cf7c 2320 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 2321 "addq $128, %%rax\n\t"
wolfSSL 15:117db924cf7c 2322 "subl $128, %[len]\n\t"
wolfSSL 15:117db924cf7c 2323
wolfSSL 15:117db924cf7c 2324 STORE_DIGEST()
wolfSSL 15:117db924cf7c 2325
wolfSSL 15:117db924cf7c 2326 "movq %%rax, 120(%[sha256])\n\t"
wolfSSL 15:117db924cf7c 2327 "jnz 1b\n\t"
wolfSSL 15:117db924cf7c 2328
wolfSSL 15:117db924cf7c 2329 "addq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2330
wolfSSL 15:117db924cf7c 2331 :
wolfSSL 15:117db924cf7c 2332 : [FLIP] "m" (mBYTE_FLIP_Y_MASK[0]),
wolfSSL 15:117db924cf7c 2333 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
wolfSSL 15:117db924cf7c 2334 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
wolfSSL 15:117db924cf7c 2335 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 2336 [len] "r" (len),
wolfSSL 15:117db924cf7c 2337 [K] "m" (K256)
wolfSSL 15:117db924cf7c 2338 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
wolfSSL 15:117db924cf7c 2339 );
wolfSSL 15:117db924cf7c 2340
wolfSSL 15:117db924cf7c 2341 return 0;
wolfSSL 15:117db924cf7c 2342 }
wolfSSL 15:117db924cf7c 2343
wolfSSL 15:117db924cf7c 2344 #if defined(HAVE_INTEL_RORX)
wolfSSL 15:117db924cf7c 2345 SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 2346 {
wolfSSL 15:117db924cf7c 2347 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 2348
wolfSSL 15:117db924cf7c 2349 "subq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2350 "leaq 32(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 2351
wolfSSL 15:117db924cf7c 2352 INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
wolfSSL 15:117db924cf7c 2353 LOAD_W_K_LOW(BYTE_FLIP_MASK, rax)
wolfSSL 15:117db924cf7c 2354
wolfSSL 15:117db924cf7c 2355 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 2356
wolfSSL 15:117db924cf7c 2357 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2358 "rorx $6, %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 2359 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2360
wolfSSL 15:117db924cf7c 2361 SET_W_Y_4(0)
wolfSSL 15:117db924cf7c 2362 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 2363 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
wolfSSL 15:117db924cf7c 2364 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
wolfSSL 15:117db924cf7c 2365 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
wolfSSL 15:117db924cf7c 2366
wolfSSL 15:117db924cf7c 2367 SET_W_Y_4(16)
wolfSSL 15:117db924cf7c 2368 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
wolfSSL 15:117db924cf7c 2369 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
wolfSSL 15:117db924cf7c 2370 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
wolfSSL 15:117db924cf7c 2371 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
wolfSSL 15:117db924cf7c 2372
wolfSSL 15:117db924cf7c 2373 SET_W_Y_4(32)
wolfSSL 15:117db924cf7c 2374 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
wolfSSL 15:117db924cf7c 2375 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
wolfSSL 15:117db924cf7c 2376 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
wolfSSL 15:117db924cf7c 2377 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
wolfSSL 15:117db924cf7c 2378
wolfSSL 15:117db924cf7c 2379 SET_W_Y_4(48)
wolfSSL 15:117db924cf7c 2380 "xorl " L3 ", " L3 "\n\t"
wolfSSL 15:117db924cf7c 2381 "xorl " L2 ", " L2 "\n\t"
wolfSSL 15:117db924cf7c 2382 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
wolfSSL 15:117db924cf7c 2383 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
wolfSSL 15:117db924cf7c 2384 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
wolfSSL 15:117db924cf7c 2385 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
wolfSSL 15:117db924cf7c 2386 /* Prev RND: h += Maj(a,b,c) */
wolfSSL 15:117db924cf7c 2387 "addl " L3 ", %%r8d\n\t"
wolfSSL 15:117db924cf7c 2388
wolfSSL 15:117db924cf7c 2389 STORE_ADD_DIGEST()
wolfSSL 15:117db924cf7c 2390
wolfSSL 15:117db924cf7c 2391 "addq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2392
wolfSSL 15:117db924cf7c 2393 :
wolfSSL 15:117db924cf7c 2394 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
wolfSSL 15:117db924cf7c 2395 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
wolfSSL 15:117db924cf7c 2396 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
wolfSSL 15:117db924cf7c 2397 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 2398 [K] "m" (K256)
wolfSSL 15:117db924cf7c 2399 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
wolfSSL 15:117db924cf7c 2400 );
wolfSSL 15:117db924cf7c 2401
wolfSSL 15:117db924cf7c 2402 return 0;
wolfSSL 15:117db924cf7c 2403 }
wolfSSL 15:117db924cf7c 2404
wolfSSL 15:117db924cf7c 2405 SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256,
wolfSSL 15:117db924cf7c 2406 word32 len)
wolfSSL 15:117db924cf7c 2407 {
wolfSSL 15:117db924cf7c 2408 if ((len & WC_SHA256_BLOCK_SIZE) != 0) {
wolfSSL 15:117db924cf7c 2409 XMEMCPY(sha256->buffer, sha256->data, WC_SHA256_BLOCK_SIZE);
wolfSSL 15:117db924cf7c 2410 Transform_Sha256_AVX2_RORX(sha256);
wolfSSL 15:117db924cf7c 2411 sha256->data += WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 2412 len -= WC_SHA256_BLOCK_SIZE;
wolfSSL 15:117db924cf7c 2413 if (len == 0)
wolfSSL 15:117db924cf7c 2414 return 0;
wolfSSL 15:117db924cf7c 2415 }
wolfSSL 15:117db924cf7c 2416
wolfSSL 15:117db924cf7c 2417 __asm__ __volatile__ (
wolfSSL 15:117db924cf7c 2418
wolfSSL 15:117db924cf7c 2419 "subq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2420 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 2421
wolfSSL 15:117db924cf7c 2422 INIT_MASKS_Y(BYTE_FLIP_Y_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
wolfSSL 15:117db924cf7c 2423 LOAD_DIGEST()
wolfSSL 15:117db924cf7c 2424
wolfSSL 15:117db924cf7c 2425 "# Start of loop processing two blocks\n"
wolfSSL 15:117db924cf7c 2426 "1:\n\t"
wolfSSL 15:117db924cf7c 2427
wolfSSL 15:117db924cf7c 2428 LOAD_W_K(BYTE_FLIP_Y_MASK, rax)
wolfSSL 15:117db924cf7c 2429
wolfSSL 15:117db924cf7c 2430 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2431 "rorx $6, %%r12d, " L1 "\n\t"
wolfSSL 15:117db924cf7c 2432 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2433
wolfSSL 15:117db924cf7c 2434 SET_W_Y_4(0)
wolfSSL 15:117db924cf7c 2435 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
wolfSSL 15:117db924cf7c 2436 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
wolfSSL 15:117db924cf7c 2437 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
wolfSSL 15:117db924cf7c 2438 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
wolfSSL 15:117db924cf7c 2439
wolfSSL 15:117db924cf7c 2440 SET_W_Y_4(16)
wolfSSL 15:117db924cf7c 2441 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
wolfSSL 15:117db924cf7c 2442 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
wolfSSL 15:117db924cf7c 2443 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
wolfSSL 15:117db924cf7c 2444 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
wolfSSL 15:117db924cf7c 2445
wolfSSL 15:117db924cf7c 2446 SET_W_Y_4(32)
wolfSSL 15:117db924cf7c 2447 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
wolfSSL 15:117db924cf7c 2448 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
wolfSSL 15:117db924cf7c 2449 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
wolfSSL 15:117db924cf7c 2450 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
wolfSSL 15:117db924cf7c 2451
wolfSSL 15:117db924cf7c 2452 SET_W_Y_4(48)
wolfSSL 15:117db924cf7c 2453 "xorl " L3 ", " L3 "\n\t"
wolfSSL 15:117db924cf7c 2454 "xorl " L2 ", " L2 "\n\t"
wolfSSL 15:117db924cf7c 2455 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
wolfSSL 15:117db924cf7c 2456 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
wolfSSL 15:117db924cf7c 2457 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
wolfSSL 15:117db924cf7c 2458 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
wolfSSL 15:117db924cf7c 2459 /* Prev RND: h += Maj(a,b,c) */
wolfSSL 15:117db924cf7c 2460 "addl " L3 ", %%r8d\n\t"
wolfSSL 15:117db924cf7c 2461 "xorl " L2 ", " L2 "\n\t"
wolfSSL 15:117db924cf7c 2462
wolfSSL 15:117db924cf7c 2463 ADD_DIGEST()
wolfSSL 15:117db924cf7c 2464 STORE_DIGEST()
wolfSSL 15:117db924cf7c 2465
wolfSSL 15:117db924cf7c 2466 "movl %%r9d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2467 "xorl " L3 ", " L3 "\n\t"
wolfSSL 15:117db924cf7c 2468 "xorl %%r10d, " L4 "\n\t"
wolfSSL 15:117db924cf7c 2469
wolfSSL 15:117db924cf7c 2470 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 4)
wolfSSL 15:117db924cf7c 2471 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
wolfSSL 15:117db924cf7c 2472 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 20)
wolfSSL 15:117db924cf7c 2473 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 28)
wolfSSL 15:117db924cf7c 2474 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 36)
wolfSSL 15:117db924cf7c 2475 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 44)
wolfSSL 15:117db924cf7c 2476 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 52)
wolfSSL 15:117db924cf7c 2477 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 60)
wolfSSL 15:117db924cf7c 2478 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 68)
wolfSSL 15:117db924cf7c 2479 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 76)
wolfSSL 15:117db924cf7c 2480 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 84)
wolfSSL 15:117db924cf7c 2481 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 92)
wolfSSL 15:117db924cf7c 2482 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 100)
wolfSSL 15:117db924cf7c 2483 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 108)
wolfSSL 15:117db924cf7c 2484 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 116)
wolfSSL 15:117db924cf7c 2485 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 124)
wolfSSL 15:117db924cf7c 2486 /* Prev RND: h += Maj(a,b,c) */
wolfSSL 15:117db924cf7c 2487 "addl " L3 ", %%r8d\n\t"
wolfSSL 15:117db924cf7c 2488 "movq 120(%[sha256]), %%rax\n\t"
wolfSSL 15:117db924cf7c 2489
wolfSSL 15:117db924cf7c 2490 ADD_DIGEST()
wolfSSL 15:117db924cf7c 2491
wolfSSL 15:117db924cf7c 2492 "addq $128, %%rax\n\t"
wolfSSL 15:117db924cf7c 2493 "subl $128, %[len]\n\t"
wolfSSL 15:117db924cf7c 2494
wolfSSL 15:117db924cf7c 2495 STORE_DIGEST()
wolfSSL 15:117db924cf7c 2496
wolfSSL 15:117db924cf7c 2497 "movq %%rax, 120(%[sha256])\n\t"
wolfSSL 15:117db924cf7c 2498 "jnz 1b\n\t"
wolfSSL 15:117db924cf7c 2499
wolfSSL 15:117db924cf7c 2500 "addq $512, %%rsp\n\t"
wolfSSL 15:117db924cf7c 2501
wolfSSL 15:117db924cf7c 2502 :
wolfSSL 15:117db924cf7c 2503 : [FLIP] "m" (mBYTE_FLIP_Y_MASK[0]),
wolfSSL 15:117db924cf7c 2504 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
wolfSSL 15:117db924cf7c 2505 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
wolfSSL 15:117db924cf7c 2506 [sha256] "r" (sha256),
wolfSSL 15:117db924cf7c 2507 [len] "r" (len),
wolfSSL 15:117db924cf7c 2508 [K] "m" (K256)
wolfSSL 15:117db924cf7c 2509 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
wolfSSL 15:117db924cf7c 2510 );
wolfSSL 15:117db924cf7c 2511
wolfSSL 15:117db924cf7c 2512 return 0;
wolfSSL 15:117db924cf7c 2513 }
wolfSSL 15:117db924cf7c 2514 #endif /* HAVE_INTEL_RORX */
wolfSSL 15:117db924cf7c 2515 #endif /* HAVE_INTEL_AVX2 */
wolfSSL 15:117db924cf7c 2516
wolfSSL 15:117db924cf7c 2517
wolfSSL 15:117db924cf7c 2518 #ifdef WOLFSSL_SHA224
wolfSSL 15:117db924cf7c 2519
wolfSSL 15:117db924cf7c 2520 #ifdef STM32_HASH_SHA2
wolfSSL 15:117db924cf7c 2521
wolfSSL 15:117db924cf7c 2522 /* Supports CubeMX HAL or Standard Peripheral Library */
wolfSSL 15:117db924cf7c 2523
wolfSSL 15:117db924cf7c 2524 int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId)
wolfSSL 15:117db924cf7c 2525 {
wolfSSL 15:117db924cf7c 2526 if (sha224 == NULL)
wolfSSL 15:117db924cf7c 2527 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2528
wolfSSL 15:117db924cf7c 2529 (void)devId;
wolfSSL 15:117db924cf7c 2530 (void)heap;
wolfSSL 15:117db924cf7c 2531
wolfSSL 15:117db924cf7c 2532 wc_Stm32_Hash_Init(&sha224->stmCtx);
wolfSSL 15:117db924cf7c 2533 return 0;
wolfSSL 15:117db924cf7c 2534 }
wolfSSL 15:117db924cf7c 2535
wolfSSL 15:117db924cf7c 2536 int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len)
wolfSSL 15:117db924cf7c 2537 {
wolfSSL 15:117db924cf7c 2538 int ret = 0;
wolfSSL 15:117db924cf7c 2539
wolfSSL 15:117db924cf7c 2540 if (sha224 == NULL || (data == NULL && len > 0)) {
wolfSSL 15:117db924cf7c 2541 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2542 }
wolfSSL 15:117db924cf7c 2543
wolfSSL 15:117db924cf7c 2544 ret = wolfSSL_CryptHwMutexLock();
wolfSSL 15:117db924cf7c 2545 if (ret == 0) {
wolfSSL 15:117db924cf7c 2546 ret = wc_Stm32_Hash_Update(&sha224->stmCtx,
wolfSSL 15:117db924cf7c 2547 HASH_AlgoSelection_SHA224, data, len);
wolfSSL 15:117db924cf7c 2548 wolfSSL_CryptHwMutexUnLock();
wolfSSL 15:117db924cf7c 2549 }
wolfSSL 15:117db924cf7c 2550 return ret;
wolfSSL 15:117db924cf7c 2551 }
wolfSSL 15:117db924cf7c 2552
wolfSSL 15:117db924cf7c 2553 int wc_Sha224Final(wc_Sha224* sha224, byte* hash)
wolfSSL 15:117db924cf7c 2554 {
wolfSSL 15:117db924cf7c 2555 int ret = 0;
wolfSSL 15:117db924cf7c 2556
wolfSSL 15:117db924cf7c 2557 if (sha224 == NULL || hash == NULL) {
wolfSSL 15:117db924cf7c 2558 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2559 }
wolfSSL 15:117db924cf7c 2560
wolfSSL 15:117db924cf7c 2561 ret = wolfSSL_CryptHwMutexLock();
wolfSSL 15:117db924cf7c 2562 if (ret == 0) {
wolfSSL 15:117db924cf7c 2563 ret = wc_Stm32_Hash_Final(&sha224->stmCtx,
wolfSSL 15:117db924cf7c 2564 HASH_AlgoSelection_SHA224, hash, WC_SHA224_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 2565 wolfSSL_CryptHwMutexUnLock();
wolfSSL 15:117db924cf7c 2566 }
wolfSSL 15:117db924cf7c 2567
wolfSSL 15:117db924cf7c 2568 (void)wc_InitSha224(sha224); /* reset state */
wolfSSL 15:117db924cf7c 2569
wolfSSL 15:117db924cf7c 2570 return ret;
wolfSSL 15:117db924cf7c 2571 }
wolfSSL 15:117db924cf7c 2572
wolfSSL 15:117db924cf7c 2573 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
wolfSSL 15:117db924cf7c 2574 /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
wolfSSL 15:117db924cf7c 2575 #else
wolfSSL 15:117db924cf7c 2576
wolfSSL 15:117db924cf7c 2577 #define NEED_SOFT_SHA224
wolfSSL 15:117db924cf7c 2578
wolfSSL 15:117db924cf7c 2579
wolfSSL 15:117db924cf7c 2580 static int InitSha224(wc_Sha224* sha224)
wolfSSL 15:117db924cf7c 2581 {
wolfSSL 15:117db924cf7c 2582 int ret = 0;
wolfSSL 15:117db924cf7c 2583
wolfSSL 15:117db924cf7c 2584 if (sha224 == NULL) {
wolfSSL 15:117db924cf7c 2585 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2586 }
wolfSSL 15:117db924cf7c 2587
wolfSSL 15:117db924cf7c 2588 sha224->digest[0] = 0xc1059ed8;
wolfSSL 15:117db924cf7c 2589 sha224->digest[1] = 0x367cd507;
wolfSSL 15:117db924cf7c 2590 sha224->digest[2] = 0x3070dd17;
wolfSSL 15:117db924cf7c 2591 sha224->digest[3] = 0xf70e5939;
wolfSSL 15:117db924cf7c 2592 sha224->digest[4] = 0xffc00b31;
wolfSSL 15:117db924cf7c 2593 sha224->digest[5] = 0x68581511;
wolfSSL 15:117db924cf7c 2594 sha224->digest[6] = 0x64f98fa7;
wolfSSL 15:117db924cf7c 2595 sha224->digest[7] = 0xbefa4fa4;
wolfSSL 15:117db924cf7c 2596
wolfSSL 15:117db924cf7c 2597 sha224->buffLen = 0;
wolfSSL 15:117db924cf7c 2598 sha224->loLen = 0;
wolfSSL 15:117db924cf7c 2599 sha224->hiLen = 0;
wolfSSL 15:117db924cf7c 2600
wolfSSL 15:117db924cf7c 2601 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 15:117db924cf7c 2602 /* choose best Transform function under this runtime environment */
wolfSSL 15:117db924cf7c 2603 Sha256_SetTransform();
wolfSSL 15:117db924cf7c 2604 #endif
wolfSSL 15:117db924cf7c 2605
wolfSSL 15:117db924cf7c 2606 return ret;
wolfSSL 15:117db924cf7c 2607 }
wolfSSL 15:117db924cf7c 2608
wolfSSL 15:117db924cf7c 2609 #endif
wolfSSL 15:117db924cf7c 2610
wolfSSL 15:117db924cf7c 2611 #ifdef NEED_SOFT_SHA224
wolfSSL 15:117db924cf7c 2612 int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId)
wolfSSL 15:117db924cf7c 2613 {
wolfSSL 15:117db924cf7c 2614 int ret = 0;
wolfSSL 15:117db924cf7c 2615
wolfSSL 15:117db924cf7c 2616 if (sha224 == NULL)
wolfSSL 15:117db924cf7c 2617 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2618
wolfSSL 15:117db924cf7c 2619 sha224->heap = heap;
wolfSSL 15:117db924cf7c 2620
wolfSSL 15:117db924cf7c 2621 ret = InitSha224(sha224);
wolfSSL 15:117db924cf7c 2622 if (ret != 0)
wolfSSL 15:117db924cf7c 2623 return ret;
wolfSSL 15:117db924cf7c 2624
wolfSSL 15:117db924cf7c 2625 #ifdef WOLFSSL_SMALL_STACK_CACHE
wolfSSL 15:117db924cf7c 2626 sha224->W = NULL;
wolfSSL 15:117db924cf7c 2627 #endif
wolfSSL 15:117db924cf7c 2628
wolfSSL 15:117db924cf7c 2629 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 15:117db924cf7c 2630 ret = wolfAsync_DevCtxInit(&sha224->asyncDev,
wolfSSL 15:117db924cf7c 2631 WOLFSSL_ASYNC_MARKER_SHA224, sha224->heap, devId);
wolfSSL 15:117db924cf7c 2632 #else
wolfSSL 15:117db924cf7c 2633 (void)devId;
wolfSSL 15:117db924cf7c 2634 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 2635
wolfSSL 15:117db924cf7c 2636 return ret;
wolfSSL 15:117db924cf7c 2637 }
wolfSSL 15:117db924cf7c 2638
wolfSSL 15:117db924cf7c 2639 int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len)
wolfSSL 15:117db924cf7c 2640 {
wolfSSL 15:117db924cf7c 2641 int ret;
wolfSSL 15:117db924cf7c 2642
wolfSSL 15:117db924cf7c 2643 if (sha224 == NULL || (data == NULL && len > 0)) {
wolfSSL 15:117db924cf7c 2644 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2645 }
wolfSSL 15:117db924cf7c 2646
wolfSSL 15:117db924cf7c 2647 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 15:117db924cf7c 2648 if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
wolfSSL 15:117db924cf7c 2649 #if defined(HAVE_INTEL_QA)
wolfSSL 15:117db924cf7c 2650 return IntelQaSymSha224(&sha224->asyncDev, NULL, data, len);
wolfSSL 15:117db924cf7c 2651 #endif
wolfSSL 15:117db924cf7c 2652 }
wolfSSL 15:117db924cf7c 2653 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 2654
wolfSSL 15:117db924cf7c 2655 ret = Sha256Update((wc_Sha256*)sha224, data, len);
wolfSSL 15:117db924cf7c 2656
wolfSSL 15:117db924cf7c 2657 return ret;
wolfSSL 15:117db924cf7c 2658 }
wolfSSL 15:117db924cf7c 2659
wolfSSL 15:117db924cf7c 2660 int wc_Sha224Final(wc_Sha224* sha224, byte* hash)
wolfSSL 15:117db924cf7c 2661 {
wolfSSL 15:117db924cf7c 2662 int ret;
wolfSSL 15:117db924cf7c 2663
wolfSSL 15:117db924cf7c 2664 if (sha224 == NULL || hash == NULL) {
wolfSSL 15:117db924cf7c 2665 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2666 }
wolfSSL 15:117db924cf7c 2667
wolfSSL 15:117db924cf7c 2668 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 15:117db924cf7c 2669 if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
wolfSSL 15:117db924cf7c 2670 #if defined(HAVE_INTEL_QA)
wolfSSL 15:117db924cf7c 2671 return IntelQaSymSha224(&sha224->asyncDev, hash, NULL,
wolfSSL 15:117db924cf7c 2672 WC_SHA224_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 2673 #endif
wolfSSL 15:117db924cf7c 2674 }
wolfSSL 15:117db924cf7c 2675 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 2676
wolfSSL 15:117db924cf7c 2677 ret = Sha256Final((wc_Sha256*)sha224);
wolfSSL 15:117db924cf7c 2678 if (ret != 0)
wolfSSL 15:117db924cf7c 2679 return ret;
wolfSSL 15:117db924cf7c 2680
wolfSSL 15:117db924cf7c 2681 #if defined(LITTLE_ENDIAN_ORDER)
wolfSSL 15:117db924cf7c 2682 ByteReverseWords(sha224->digest, sha224->digest, WC_SHA224_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 2683 #endif
wolfSSL 15:117db924cf7c 2684 XMEMCPY(hash, sha224->digest, WC_SHA224_DIGEST_SIZE);
wolfSSL 15:117db924cf7c 2685
wolfSSL 15:117db924cf7c 2686 return InitSha224(sha224); /* reset state */
wolfSSL 15:117db924cf7c 2687 }
wolfSSL 15:117db924cf7c 2688 #endif /* end of SHA224 software implementation */
wolfSSL 15:117db924cf7c 2689
wolfSSL 15:117db924cf7c 2690 int wc_InitSha224(wc_Sha224* sha224)
wolfSSL 15:117db924cf7c 2691 {
wolfSSL 15:117db924cf7c 2692 return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
wolfSSL 15:117db924cf7c 2693 }
wolfSSL 15:117db924cf7c 2694
wolfSSL 15:117db924cf7c 2695 void wc_Sha224Free(wc_Sha224* sha224)
wolfSSL 15:117db924cf7c 2696 {
wolfSSL 15:117db924cf7c 2697 if (sha224 == NULL)
wolfSSL 15:117db924cf7c 2698 return;
wolfSSL 15:117db924cf7c 2699
wolfSSL 15:117db924cf7c 2700 #ifdef WOLFSSL_SMALL_STACK_CACHE
wolfSSL 15:117db924cf7c 2701 if (sha224->W != NULL) {
wolfSSL 15:117db924cf7c 2702 XFREE(sha224->W, NULL, DYNAMIC_TYPE_RNG);
wolfSSL 15:117db924cf7c 2703 sha224->W = NULL;
wolfSSL 15:117db924cf7c 2704 }
wolfSSL 15:117db924cf7c 2705 #endif
wolfSSL 15:117db924cf7c 2706
wolfSSL 15:117db924cf7c 2707 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 15:117db924cf7c 2708 wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224);
wolfSSL 15:117db924cf7c 2709 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 2710 }
wolfSSL 15:117db924cf7c 2711 #endif /* WOLFSSL_SHA224 */
wolfSSL 15:117db924cf7c 2712
wolfSSL 15:117db924cf7c 2713
wolfSSL 15:117db924cf7c 2714 int wc_InitSha256(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 2715 {
wolfSSL 15:117db924cf7c 2716 return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
wolfSSL 15:117db924cf7c 2717 }
wolfSSL 15:117db924cf7c 2718
wolfSSL 15:117db924cf7c 2719 void wc_Sha256Free(wc_Sha256* sha256)
wolfSSL 15:117db924cf7c 2720 {
wolfSSL 15:117db924cf7c 2721 if (sha256 == NULL)
wolfSSL 15:117db924cf7c 2722 return;
wolfSSL 15:117db924cf7c 2723
wolfSSL 15:117db924cf7c 2724 #ifdef WOLFSSL_SMALL_STACK_CACHE
wolfSSL 15:117db924cf7c 2725 if (sha256->W != NULL) {
wolfSSL 15:117db924cf7c 2726 XFREE(sha256->W, NULL, DYNAMIC_TYPE_RNG);
wolfSSL 15:117db924cf7c 2727 sha256->W = NULL;
wolfSSL 15:117db924cf7c 2728 }
wolfSSL 15:117db924cf7c 2729 #endif
wolfSSL 15:117db924cf7c 2730
wolfSSL 15:117db924cf7c 2731 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 15:117db924cf7c 2732 wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256);
wolfSSL 15:117db924cf7c 2733 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 15:117db924cf7c 2734 }
wolfSSL 15:117db924cf7c 2735
wolfSSL 15:117db924cf7c 2736 #endif /* !WOLFSSL_TI_HASH */
wolfSSL 15:117db924cf7c 2737 #endif /* HAVE_FIPS */
wolfSSL 15:117db924cf7c 2738
wolfSSL 15:117db924cf7c 2739
wolfSSL 15:117db924cf7c 2740 #ifndef WOLFSSL_TI_HASH
wolfSSL 15:117db924cf7c 2741 #ifdef WOLFSSL_SHA224
wolfSSL 15:117db924cf7c 2742 int wc_Sha224GetHash(wc_Sha224* sha224, byte* hash)
wolfSSL 15:117db924cf7c 2743 {
wolfSSL 15:117db924cf7c 2744 int ret;
wolfSSL 15:117db924cf7c 2745 wc_Sha224 tmpSha224;
wolfSSL 15:117db924cf7c 2746
wolfSSL 15:117db924cf7c 2747 if (sha224 == NULL || hash == NULL)
wolfSSL 15:117db924cf7c 2748 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2749
wolfSSL 15:117db924cf7c 2750 ret = wc_Sha224Copy(sha224, &tmpSha224);
wolfSSL 15:117db924cf7c 2751 if (ret == 0) {
wolfSSL 15:117db924cf7c 2752 ret = wc_Sha224Final(&tmpSha224, hash);
wolfSSL 15:117db924cf7c 2753 wc_Sha224Free(&tmpSha224);
wolfSSL 15:117db924cf7c 2754 }
wolfSSL 15:117db924cf7c 2755 return ret;
wolfSSL 15:117db924cf7c 2756 }
wolfSSL 15:117db924cf7c 2757 int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst)
wolfSSL 15:117db924cf7c 2758 {
wolfSSL 15:117db924cf7c 2759 int ret = 0;
wolfSSL 15:117db924cf7c 2760
wolfSSL 15:117db924cf7c 2761 if (src == NULL || dst == NULL)
wolfSSL 15:117db924cf7c 2762 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2763
wolfSSL 15:117db924cf7c 2764 XMEMCPY(dst, src, sizeof(wc_Sha224));
wolfSSL 15:117db924cf7c 2765 #ifdef WOLFSSL_SMALL_STACK_CACHE
wolfSSL 15:117db924cf7c 2766 dst->W = NULL;
wolfSSL 15:117db924cf7c 2767 #endif
wolfSSL 15:117db924cf7c 2768
wolfSSL 15:117db924cf7c 2769 #ifdef WOLFSSL_ASYNC_CRYPT
wolfSSL 15:117db924cf7c 2770 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
wolfSSL 15:117db924cf7c 2771 #endif
wolfSSL 15:117db924cf7c 2772
wolfSSL 15:117db924cf7c 2773 return ret;
wolfSSL 15:117db924cf7c 2774 }
wolfSSL 15:117db924cf7c 2775 #endif /* WOLFSSL_SHA224 */
wolfSSL 15:117db924cf7c 2776
wolfSSL 15:117db924cf7c 2777 int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash)
wolfSSL 15:117db924cf7c 2778 {
wolfSSL 15:117db924cf7c 2779 int ret;
wolfSSL 15:117db924cf7c 2780 wc_Sha256 tmpSha256;
wolfSSL 15:117db924cf7c 2781
wolfSSL 15:117db924cf7c 2782 if (sha256 == NULL || hash == NULL)
wolfSSL 15:117db924cf7c 2783 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2784
wolfSSL 15:117db924cf7c 2785 ret = wc_Sha256Copy(sha256, &tmpSha256);
wolfSSL 15:117db924cf7c 2786 if (ret == 0) {
wolfSSL 15:117db924cf7c 2787 ret = wc_Sha256Final(&tmpSha256, hash);
wolfSSL 15:117db924cf7c 2788 wc_Sha256Free(&tmpSha256);
wolfSSL 15:117db924cf7c 2789 }
wolfSSL 15:117db924cf7c 2790 return ret;
wolfSSL 15:117db924cf7c 2791 }
wolfSSL 15:117db924cf7c 2792 int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst)
wolfSSL 15:117db924cf7c 2793 {
wolfSSL 15:117db924cf7c 2794 int ret = 0;
wolfSSL 15:117db924cf7c 2795
wolfSSL 15:117db924cf7c 2796 if (src == NULL || dst == NULL)
wolfSSL 15:117db924cf7c 2797 return BAD_FUNC_ARG;
wolfSSL 15:117db924cf7c 2798
wolfSSL 15:117db924cf7c 2799 XMEMCPY(dst, src, sizeof(wc_Sha256));
wolfSSL 15:117db924cf7c 2800 #ifdef WOLFSSL_SMALL_STACK_CACHE
wolfSSL 15:117db924cf7c 2801 dst->W = NULL;
wolfSSL 15:117db924cf7c 2802 #endif
wolfSSL 15:117db924cf7c 2803
wolfSSL 15:117db924cf7c 2804 #ifdef WOLFSSL_ASYNC_CRYPT
wolfSSL 15:117db924cf7c 2805 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
wolfSSL 15:117db924cf7c 2806 #endif
wolfSSL 15:117db924cf7c 2807 #ifdef WOLFSSL_PIC32MZ_HASH
wolfSSL 15:117db924cf7c 2808 ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
wolfSSL 15:117db924cf7c 2809 #endif
wolfSSL 15:117db924cf7c 2810
wolfSSL 15:117db924cf7c 2811 return ret;
wolfSSL 15:117db924cf7c 2812 }
wolfSSL 15:117db924cf7c 2813 #endif /* !WOLFSSL_TI_HASH */
wolfSSL 15:117db924cf7c 2814
wolfSSL 15:117db924cf7c 2815 #endif /* NO_SHA256 */
wolfSSL 15:117db924cf7c 2816