wolfSSL SSL/TLS library, support up to TLS1.3

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Committer:
wolfSSL
Date:
Tue Aug 22 10:48:22 2017 +0000
Revision:
13:f67a6c6013ca
wolfSSL3.12.0 with TLS1.3

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 13:f67a6c6013ca 1 /* sha256.c
wolfSSL 13:f67a6c6013ca 2 *
wolfSSL 13:f67a6c6013ca 3 * Copyright (C) 2006-2016 wolfSSL Inc.
wolfSSL 13:f67a6c6013ca 4 *
wolfSSL 13:f67a6c6013ca 5 * This file is part of wolfSSL.
wolfSSL 13:f67a6c6013ca 6 *
wolfSSL 13:f67a6c6013ca 7 * wolfSSL is free software; you can redistribute it and/or modify
wolfSSL 13:f67a6c6013ca 8 * it under the terms of the GNU General Public License as published by
wolfSSL 13:f67a6c6013ca 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 13:f67a6c6013ca 10 * (at your option) any later version.
wolfSSL 13:f67a6c6013ca 11 *
wolfSSL 13:f67a6c6013ca 12 * wolfSSL is distributed in the hope that it will be useful,
wolfSSL 13:f67a6c6013ca 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 13:f67a6c6013ca 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 13:f67a6c6013ca 15 * GNU General Public License for more details.
wolfSSL 13:f67a6c6013ca 16 *
wolfSSL 13:f67a6c6013ca 17 * You should have received a copy of the GNU General Public License
wolfSSL 13:f67a6c6013ca 18 * along with this program; if not, write to the Free Software
wolfSSL 13:f67a6c6013ca 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
wolfSSL 13:f67a6c6013ca 20 */
wolfSSL 13:f67a6c6013ca 21
wolfSSL 13:f67a6c6013ca 22
wolfSSL 13:f67a6c6013ca 23 /* code submitted by raphael.huck@efixo.com */
wolfSSL 13:f67a6c6013ca 24
wolfSSL 13:f67a6c6013ca 25 #ifdef HAVE_CONFIG_H
wolfSSL 13:f67a6c6013ca 26 #include <config.h>
wolfSSL 13:f67a6c6013ca 27 #endif
wolfSSL 13:f67a6c6013ca 28
wolfSSL 13:f67a6c6013ca 29 #include <wolfssl/wolfcrypt/settings.h>
wolfSSL 13:f67a6c6013ca 30
wolfSSL 13:f67a6c6013ca 31 #if !defined(NO_SHA256)
wolfSSL 13:f67a6c6013ca 32
wolfSSL 13:f67a6c6013ca 33 #include <wolfssl/wolfcrypt/sha256.h>
wolfSSL 13:f67a6c6013ca 34 #include <wolfssl/wolfcrypt/error-crypt.h>
wolfSSL 13:f67a6c6013ca 35 #include <wolfssl/wolfcrypt/cpuid.h>
wolfSSL 13:f67a6c6013ca 36
wolfSSL 13:f67a6c6013ca 37 /* fips wrapper calls, user can call direct */
wolfSSL 13:f67a6c6013ca 38 #ifdef HAVE_FIPS
wolfSSL 13:f67a6c6013ca 39
wolfSSL 13:f67a6c6013ca 40 int wc_InitSha256(Sha256* sha)
wolfSSL 13:f67a6c6013ca 41 {
wolfSSL 13:f67a6c6013ca 42 if (sha == NULL) {
wolfSSL 13:f67a6c6013ca 43 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 44 }
wolfSSL 13:f67a6c6013ca 45 return InitSha256_fips(sha);
wolfSSL 13:f67a6c6013ca 46 }
wolfSSL 13:f67a6c6013ca 47 int wc_InitSha256_ex(Sha256* sha, void* heap, int devId)
wolfSSL 13:f67a6c6013ca 48 {
wolfSSL 13:f67a6c6013ca 49 (void)heap;
wolfSSL 13:f67a6c6013ca 50 (void)devId;
wolfSSL 13:f67a6c6013ca 51 if (sha == NULL) {
wolfSSL 13:f67a6c6013ca 52 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 53 }
wolfSSL 13:f67a6c6013ca 54 return InitSha256_fips(sha);
wolfSSL 13:f67a6c6013ca 55 }
wolfSSL 13:f67a6c6013ca 56 int wc_Sha256Update(Sha256* sha, const byte* data, word32 len)
wolfSSL 13:f67a6c6013ca 57 {
wolfSSL 13:f67a6c6013ca 58 if (sha == NULL || (data == NULL && len > 0)) {
wolfSSL 13:f67a6c6013ca 59 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 60 }
wolfSSL 13:f67a6c6013ca 61 return Sha256Update_fips(sha, data, len);
wolfSSL 13:f67a6c6013ca 62 }
wolfSSL 13:f67a6c6013ca 63 int wc_Sha256Final(Sha256* sha, byte* out)
wolfSSL 13:f67a6c6013ca 64 {
wolfSSL 13:f67a6c6013ca 65 if (sha == NULL || out == NULL) {
wolfSSL 13:f67a6c6013ca 66 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 67 }
wolfSSL 13:f67a6c6013ca 68 return Sha256Final_fips(sha, out);
wolfSSL 13:f67a6c6013ca 69 }
wolfSSL 13:f67a6c6013ca 70 void wc_Sha256Free(Sha256* sha)
wolfSSL 13:f67a6c6013ca 71 {
wolfSSL 13:f67a6c6013ca 72 (void)sha;
wolfSSL 13:f67a6c6013ca 73 /* Not supported in FIPS */
wolfSSL 13:f67a6c6013ca 74 }
wolfSSL 13:f67a6c6013ca 75
wolfSSL 13:f67a6c6013ca 76 #else /* else build without fips */
wolfSSL 13:f67a6c6013ca 77
wolfSSL 13:f67a6c6013ca 78
wolfSSL 13:f67a6c6013ca 79 #if defined(WOLFSSL_TI_HASH)
wolfSSL 13:f67a6c6013ca 80 /* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
wolfSSL 13:f67a6c6013ca 81 #else
wolfSSL 13:f67a6c6013ca 82
wolfSSL 13:f67a6c6013ca 83 #include <wolfssl/wolfcrypt/logging.h>
wolfSSL 13:f67a6c6013ca 84
wolfSSL 13:f67a6c6013ca 85 #ifdef NO_INLINE
wolfSSL 13:f67a6c6013ca 86 #include <wolfssl/wolfcrypt/misc.h>
wolfSSL 13:f67a6c6013ca 87 #else
wolfSSL 13:f67a6c6013ca 88 #define WOLFSSL_MISC_INCLUDED
wolfSSL 13:f67a6c6013ca 89 #include <wolfcrypt/src/misc.c>
wolfSSL 13:f67a6c6013ca 90 #endif
wolfSSL 13:f67a6c6013ca 91
wolfSSL 13:f67a6c6013ca 92
wolfSSL 13:f67a6c6013ca 93 #if defined(USE_INTEL_SPEEDUP)
wolfSSL 13:f67a6c6013ca 94 #define HAVE_INTEL_AVX1
wolfSSL 13:f67a6c6013ca 95 #define HAVE_INTEL_AVX2
wolfSSL 13:f67a6c6013ca 96 #endif /* USE_INTEL_SPEEDUP */
wolfSSL 13:f67a6c6013ca 97
wolfSSL 13:f67a6c6013ca 98 #if defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 99 #define HAVE_INTEL_RORX
wolfSSL 13:f67a6c6013ca 100 #endif
wolfSSL 13:f67a6c6013ca 101
wolfSSL 13:f67a6c6013ca 102
wolfSSL 13:f67a6c6013ca 103 #ifndef WOLFSSL_PIC32MZ_HASH
wolfSSL 13:f67a6c6013ca 104 static int InitSha256(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 105 {
wolfSSL 13:f67a6c6013ca 106 int ret = 0;
wolfSSL 13:f67a6c6013ca 107
wolfSSL 13:f67a6c6013ca 108 if (sha256 == NULL)
wolfSSL 13:f67a6c6013ca 109 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 110
wolfSSL 13:f67a6c6013ca 111 XMEMSET(sha256->digest, 0, sizeof(sha256->digest));
wolfSSL 13:f67a6c6013ca 112 sha256->digest[0] = 0x6A09E667L;
wolfSSL 13:f67a6c6013ca 113 sha256->digest[1] = 0xBB67AE85L;
wolfSSL 13:f67a6c6013ca 114 sha256->digest[2] = 0x3C6EF372L;
wolfSSL 13:f67a6c6013ca 115 sha256->digest[3] = 0xA54FF53AL;
wolfSSL 13:f67a6c6013ca 116 sha256->digest[4] = 0x510E527FL;
wolfSSL 13:f67a6c6013ca 117 sha256->digest[5] = 0x9B05688CL;
wolfSSL 13:f67a6c6013ca 118 sha256->digest[6] = 0x1F83D9ABL;
wolfSSL 13:f67a6c6013ca 119 sha256->digest[7] = 0x5BE0CD19L;
wolfSSL 13:f67a6c6013ca 120
wolfSSL 13:f67a6c6013ca 121 sha256->buffLen = 0;
wolfSSL 13:f67a6c6013ca 122 sha256->loLen = 0;
wolfSSL 13:f67a6c6013ca 123 sha256->hiLen = 0;
wolfSSL 13:f67a6c6013ca 124
wolfSSL 13:f67a6c6013ca 125 return ret;
wolfSSL 13:f67a6c6013ca 126 }
wolfSSL 13:f67a6c6013ca 127 #endif
wolfSSL 13:f67a6c6013ca 128
wolfSSL 13:f67a6c6013ca 129
wolfSSL 13:f67a6c6013ca 130 /* Hardware Acceleration */
wolfSSL 13:f67a6c6013ca 131 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 132
wolfSSL 13:f67a6c6013ca 133 /* in case intel instructions aren't available, plus we need the K[] global */
wolfSSL 13:f67a6c6013ca 134 #define NEED_SOFT_SHA256
wolfSSL 13:f67a6c6013ca 135
wolfSSL 13:f67a6c6013ca 136 /*****
wolfSSL 13:f67a6c6013ca 137 Intel AVX1/AVX2 Macro Control Structure
wolfSSL 13:f67a6c6013ca 138
wolfSSL 13:f67a6c6013ca 139 #define HAVE_INTEL_AVX1
wolfSSL 13:f67a6c6013ca 140 #define HAVE_INTEL_AVX2
wolfSSL 13:f67a6c6013ca 141
wolfSSL 13:f67a6c6013ca 142 #define HAVE_INTEL_RORX
wolfSSL 13:f67a6c6013ca 143
wolfSSL 13:f67a6c6013ca 144
wolfSSL 13:f67a6c6013ca 145 int InitSha256(Sha256* sha256) {
wolfSSL 13:f67a6c6013ca 146 Save/Recover XMM, YMM
wolfSSL 13:f67a6c6013ca 147 ...
wolfSSL 13:f67a6c6013ca 148 }
wolfSSL 13:f67a6c6013ca 149
wolfSSL 13:f67a6c6013ca 150 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 151 Transform(); Function prototype
wolfSSL 13:f67a6c6013ca 152 #else
wolfSSL 13:f67a6c6013ca 153 Transform() { }
wolfSSL 13:f67a6c6013ca 154 int Sha256Final() {
wolfSSL 13:f67a6c6013ca 155 Save/Recover XMM, YMM
wolfSSL 13:f67a6c6013ca 156 ...
wolfSSL 13:f67a6c6013ca 157 }
wolfSSL 13:f67a6c6013ca 158 #endif
wolfSSL 13:f67a6c6013ca 159
wolfSSL 13:f67a6c6013ca 160 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 161 #if defined(HAVE_INTEL_RORX
wolfSSL 13:f67a6c6013ca 162 #define RND with rorx instuction
wolfSSL 13:f67a6c6013ca 163 #else
wolfSSL 13:f67a6c6013ca 164 #define RND
wolfSSL 13:f67a6c6013ca 165 #endif
wolfSSL 13:f67a6c6013ca 166 #endif
wolfSSL 13:f67a6c6013ca 167
wolfSSL 13:f67a6c6013ca 168 #if defined(HAVE_INTEL_AVX1)
wolfSSL 13:f67a6c6013ca 169
wolfSSL 13:f67a6c6013ca 170 #define XMM Instructions/inline asm
wolfSSL 13:f67a6c6013ca 171
wolfSSL 13:f67a6c6013ca 172 int Transform() {
wolfSSL 13:f67a6c6013ca 173 Stitched Message Sched/Round
wolfSSL 13:f67a6c6013ca 174 }
wolfSSL 13:f67a6c6013ca 175
wolfSSL 13:f67a6c6013ca 176 #elif defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 177
wolfSSL 13:f67a6c6013ca 178 #define YMM Instructions/inline asm
wolfSSL 13:f67a6c6013ca 179
wolfSSL 13:f67a6c6013ca 180 int Transform() {
wolfSSL 13:f67a6c6013ca 181 More granural Stitched Message Sched/Round
wolfSSL 13:f67a6c6013ca 182 }
wolfSSL 13:f67a6c6013ca 183
wolfSSL 13:f67a6c6013ca 184 #endif
wolfSSL 13:f67a6c6013ca 185
wolfSSL 13:f67a6c6013ca 186 */
wolfSSL 13:f67a6c6013ca 187
wolfSSL 13:f67a6c6013ca 188 /* Each platform needs to query info type 1 from cpuid to see if aesni is
wolfSSL 13:f67a6c6013ca 189 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
wolfSSL 13:f67a6c6013ca 190 */
wolfSSL 13:f67a6c6013ca 191
wolfSSL 13:f67a6c6013ca 192 /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
wolfSSL 13:f67a6c6013ca 193 static int Transform(Sha256* sha256);
wolfSSL 13:f67a6c6013ca 194 #if defined(HAVE_INTEL_AVX1)
wolfSSL 13:f67a6c6013ca 195 static int Transform_AVX1(Sha256 *sha256);
wolfSSL 13:f67a6c6013ca 196 #endif
wolfSSL 13:f67a6c6013ca 197 #if defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 198 static int Transform_AVX2(Sha256 *sha256);
wolfSSL 13:f67a6c6013ca 199 static int Transform_AVX1_RORX(Sha256 *sha256);
wolfSSL 13:f67a6c6013ca 200 #endif
wolfSSL 13:f67a6c6013ca 201 static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
wolfSSL 13:f67a6c6013ca 202 static int transform_check = 0;
wolfSSL 13:f67a6c6013ca 203 static word32 intel_flags;
wolfSSL 13:f67a6c6013ca 204 #define XTRANSFORM(S, B) (*Transform_p)((S))
wolfSSL 13:f67a6c6013ca 205
wolfSSL 13:f67a6c6013ca 206 static void Sha256_SetTransform(void)
wolfSSL 13:f67a6c6013ca 207 {
wolfSSL 13:f67a6c6013ca 208
wolfSSL 13:f67a6c6013ca 209 if (transform_check)
wolfSSL 13:f67a6c6013ca 210 return;
wolfSSL 13:f67a6c6013ca 211
wolfSSL 13:f67a6c6013ca 212 intel_flags = cpuid_get_flags();
wolfSSL 13:f67a6c6013ca 213
wolfSSL 13:f67a6c6013ca 214 #if defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 215 if (IS_INTEL_AVX2(intel_flags) && IS_INTEL_BMI2(intel_flags)) {
wolfSSL 13:f67a6c6013ca 216 if (1)
wolfSSL 13:f67a6c6013ca 217 Transform_p = Transform_AVX1_RORX;
wolfSSL 13:f67a6c6013ca 218 else
wolfSSL 13:f67a6c6013ca 219 Transform_p = Transform_AVX2;
wolfSSL 13:f67a6c6013ca 220 }
wolfSSL 13:f67a6c6013ca 221 else
wolfSSL 13:f67a6c6013ca 222 #endif
wolfSSL 13:f67a6c6013ca 223 #if defined(HAVE_INTEL_AVX1)
wolfSSL 13:f67a6c6013ca 224 if (1) {
wolfSSL 13:f67a6c6013ca 225 Transform_p = ((IS_INTEL_AVX1(intel_flags)) ? Transform_AVX1 :
wolfSSL 13:f67a6c6013ca 226 Transform);
wolfSSL 13:f67a6c6013ca 227 }
wolfSSL 13:f67a6c6013ca 228 else
wolfSSL 13:f67a6c6013ca 229 #endif
wolfSSL 13:f67a6c6013ca 230 Transform_p = Transform;
wolfSSL 13:f67a6c6013ca 231
wolfSSL 13:f67a6c6013ca 232 transform_check = 1;
wolfSSL 13:f67a6c6013ca 233 }
wolfSSL 13:f67a6c6013ca 234
wolfSSL 13:f67a6c6013ca 235 /* Dummy for saving MM_REGs on behalf of Transform */
wolfSSL 13:f67a6c6013ca 236 #if defined(HAVE_INTEL_AVX2) && !defined(HAVE_INTEL_AVX1)
wolfSSL 13:f67a6c6013ca 237 #define SAVE_XMM_YMM __asm__ volatile("or %%r8d, %%r8d":::\
wolfSSL 13:f67a6c6013ca 238 "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
wolfSSL 13:f67a6c6013ca 239 #elif defined(HAVE_INTEL_AVX1)
wolfSSL 13:f67a6c6013ca 240 #define SAVE_XMM_YMM __asm__ volatile("or %%r8d, %%r8d":::\
wolfSSL 13:f67a6c6013ca 241 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
wolfSSL 13:f67a6c6013ca 242 "xmm11","xmm12","xmm13","xmm14","xmm15")
wolfSSL 13:f67a6c6013ca 243 #endif
wolfSSL 13:f67a6c6013ca 244
wolfSSL 13:f67a6c6013ca 245 int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
wolfSSL 13:f67a6c6013ca 246 {
wolfSSL 13:f67a6c6013ca 247 int ret = 0;
wolfSSL 13:f67a6c6013ca 248 if (sha256 == NULL)
wolfSSL 13:f67a6c6013ca 249 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 250
wolfSSL 13:f67a6c6013ca 251 sha256->heap = heap;
wolfSSL 13:f67a6c6013ca 252
wolfSSL 13:f67a6c6013ca 253 ret = InitSha256(sha256);
wolfSSL 13:f67a6c6013ca 254 if (ret != 0)
wolfSSL 13:f67a6c6013ca 255 return ret;
wolfSSL 13:f67a6c6013ca 256
wolfSSL 13:f67a6c6013ca 257 /* choose best Transform function under this runtime environment */
wolfSSL 13:f67a6c6013ca 258 Sha256_SetTransform();
wolfSSL 13:f67a6c6013ca 259
wolfSSL 13:f67a6c6013ca 260 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 13:f67a6c6013ca 261 ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
wolfSSL 13:f67a6c6013ca 262 WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
wolfSSL 13:f67a6c6013ca 263 #else
wolfSSL 13:f67a6c6013ca 264 (void)devId;
wolfSSL 13:f67a6c6013ca 265 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 266
wolfSSL 13:f67a6c6013ca 267 return ret;
wolfSSL 13:f67a6c6013ca 268 }
wolfSSL 13:f67a6c6013ca 269
wolfSSL 13:f67a6c6013ca 270 #elif defined(FREESCALE_LTC_SHA)
wolfSSL 13:f67a6c6013ca 271 int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
wolfSSL 13:f67a6c6013ca 272 {
wolfSSL 13:f67a6c6013ca 273 (void)heap;
wolfSSL 13:f67a6c6013ca 274 (void)devId;
wolfSSL 13:f67a6c6013ca 275
wolfSSL 13:f67a6c6013ca 276 LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
wolfSSL 13:f67a6c6013ca 277
wolfSSL 13:f67a6c6013ca 278 return 0;
wolfSSL 13:f67a6c6013ca 279 }
wolfSSL 13:f67a6c6013ca 280
wolfSSL 13:f67a6c6013ca 281 #elif defined(FREESCALE_MMCAU_SHA)
wolfSSL 13:f67a6c6013ca 282
wolfSSL 13:f67a6c6013ca 283 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
wolfSSL 13:f67a6c6013ca 284 #include "cau_api.h"
wolfSSL 13:f67a6c6013ca 285 #else
wolfSSL 13:f67a6c6013ca 286 #include "fsl_mmcau.h"
wolfSSL 13:f67a6c6013ca 287 #endif
wolfSSL 13:f67a6c6013ca 288
wolfSSL 13:f67a6c6013ca 289 #define XTRANSFORM(S, B) Transform((S), (B))
wolfSSL 13:f67a6c6013ca 290
wolfSSL 13:f67a6c6013ca 291 int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
wolfSSL 13:f67a6c6013ca 292 {
wolfSSL 13:f67a6c6013ca 293 int ret = 0;
wolfSSL 13:f67a6c6013ca 294
wolfSSL 13:f67a6c6013ca 295 (void)heap;
wolfSSL 13:f67a6c6013ca 296 (void)devId;
wolfSSL 13:f67a6c6013ca 297
wolfSSL 13:f67a6c6013ca 298 ret = wolfSSL_CryptHwMutexLock();
wolfSSL 13:f67a6c6013ca 299 if (ret != 0) {
wolfSSL 13:f67a6c6013ca 300 return ret;
wolfSSL 13:f67a6c6013ca 301 }
wolfSSL 13:f67a6c6013ca 302 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
wolfSSL 13:f67a6c6013ca 303 cau_sha256_initialize_output(sha256->digest);
wolfSSL 13:f67a6c6013ca 304 #else
wolfSSL 13:f67a6c6013ca 305 MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
wolfSSL 13:f67a6c6013ca 306 #endif
wolfSSL 13:f67a6c6013ca 307 wolfSSL_CryptHwMutexUnLock();
wolfSSL 13:f67a6c6013ca 308
wolfSSL 13:f67a6c6013ca 309 sha256->buffLen = 0;
wolfSSL 13:f67a6c6013ca 310 sha256->loLen = 0;
wolfSSL 13:f67a6c6013ca 311 sha256->hiLen = 0;
wolfSSL 13:f67a6c6013ca 312
wolfSSL 13:f67a6c6013ca 313 return ret;
wolfSSL 13:f67a6c6013ca 314 }
wolfSSL 13:f67a6c6013ca 315
wolfSSL 13:f67a6c6013ca 316 static int Transform(Sha256* sha256, byte* buf)
wolfSSL 13:f67a6c6013ca 317 {
wolfSSL 13:f67a6c6013ca 318 int ret = wolfSSL_CryptHwMutexLock();
wolfSSL 13:f67a6c6013ca 319 if (ret == 0) {
wolfSSL 13:f67a6c6013ca 320 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
wolfSSL 13:f67a6c6013ca 321 cau_sha256_hash_n(buf, 1, sha256->digest);
wolfSSL 13:f67a6c6013ca 322 #else
wolfSSL 13:f67a6c6013ca 323 MMCAU_SHA256_HashN(buf, 1, sha256->digest);
wolfSSL 13:f67a6c6013ca 324 #endif
wolfSSL 13:f67a6c6013ca 325 wolfSSL_CryptHwMutexUnLock();
wolfSSL 13:f67a6c6013ca 326 }
wolfSSL 13:f67a6c6013ca 327 return ret;
wolfSSL 13:f67a6c6013ca 328 }
wolfSSL 13:f67a6c6013ca 329
wolfSSL 13:f67a6c6013ca 330 #elif defined(WOLFSSL_PIC32MZ_HASH)
wolfSSL 13:f67a6c6013ca 331 #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
wolfSSL 13:f67a6c6013ca 332
wolfSSL 13:f67a6c6013ca 333 #else
wolfSSL 13:f67a6c6013ca 334 #define NEED_SOFT_SHA256
wolfSSL 13:f67a6c6013ca 335
wolfSSL 13:f67a6c6013ca 336 int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
wolfSSL 13:f67a6c6013ca 337 {
wolfSSL 13:f67a6c6013ca 338 int ret = 0;
wolfSSL 13:f67a6c6013ca 339 if (sha256 == NULL)
wolfSSL 13:f67a6c6013ca 340 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 341
wolfSSL 13:f67a6c6013ca 342 sha256->heap = heap;
wolfSSL 13:f67a6c6013ca 343
wolfSSL 13:f67a6c6013ca 344 ret = InitSha256(sha256);
wolfSSL 13:f67a6c6013ca 345 if (ret != 0)
wolfSSL 13:f67a6c6013ca 346 return ret;
wolfSSL 13:f67a6c6013ca 347
wolfSSL 13:f67a6c6013ca 348 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 13:f67a6c6013ca 349 ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
wolfSSL 13:f67a6c6013ca 350 WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
wolfSSL 13:f67a6c6013ca 351 #else
wolfSSL 13:f67a6c6013ca 352 (void)devId;
wolfSSL 13:f67a6c6013ca 353 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 354
wolfSSL 13:f67a6c6013ca 355 return ret;
wolfSSL 13:f67a6c6013ca 356 }
wolfSSL 13:f67a6c6013ca 357 #endif /* End Hardware Acceleration */
wolfSSL 13:f67a6c6013ca 358
wolfSSL 13:f67a6c6013ca 359 #ifndef SAVE_XMM_YMM
wolfSSL 13:f67a6c6013ca 360 #define SAVE_XMM_YMM
wolfSSL 13:f67a6c6013ca 361 #endif
wolfSSL 13:f67a6c6013ca 362
wolfSSL 13:f67a6c6013ca 363 #ifdef NEED_SOFT_SHA256
wolfSSL 13:f67a6c6013ca 364
wolfSSL 13:f67a6c6013ca 365 static const ALIGN32 word32 K[64] = {
wolfSSL 13:f67a6c6013ca 366 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
wolfSSL 13:f67a6c6013ca 367 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
wolfSSL 13:f67a6c6013ca 368 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
wolfSSL 13:f67a6c6013ca 369 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
wolfSSL 13:f67a6c6013ca 370 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
wolfSSL 13:f67a6c6013ca 371 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
wolfSSL 13:f67a6c6013ca 372 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
wolfSSL 13:f67a6c6013ca 373 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
wolfSSL 13:f67a6c6013ca 374 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
wolfSSL 13:f67a6c6013ca 375 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
wolfSSL 13:f67a6c6013ca 376 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
wolfSSL 13:f67a6c6013ca 377 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
wolfSSL 13:f67a6c6013ca 378 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
wolfSSL 13:f67a6c6013ca 379 };
wolfSSL 13:f67a6c6013ca 380
wolfSSL 13:f67a6c6013ca 381 #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
wolfSSL 13:f67a6c6013ca 382 #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
wolfSSL 13:f67a6c6013ca 383 #define R(x, n) (((x) & 0xFFFFFFFFU) >> (n))
wolfSSL 13:f67a6c6013ca 384
wolfSSL 13:f67a6c6013ca 385 #define S(x, n) rotrFixed(x, n)
wolfSSL 13:f67a6c6013ca 386 #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
wolfSSL 13:f67a6c6013ca 387 #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
wolfSSL 13:f67a6c6013ca 388 #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
wolfSSL 13:f67a6c6013ca 389 #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
wolfSSL 13:f67a6c6013ca 390
wolfSSL 13:f67a6c6013ca 391 #define RND(a,b,c,d,e,f,g,h,i) \
wolfSSL 13:f67a6c6013ca 392 t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
wolfSSL 13:f67a6c6013ca 393 t1 = Sigma0((a)) + Maj((a), (b), (c)); \
wolfSSL 13:f67a6c6013ca 394 (d) += t0; \
wolfSSL 13:f67a6c6013ca 395 (h) = t0 + t1;
wolfSSL 13:f67a6c6013ca 396
wolfSSL 13:f67a6c6013ca 397 #ifndef XTRANSFORM
wolfSSL 13:f67a6c6013ca 398 #define XTRANSFORM(S, B) Transform((S))
wolfSSL 13:f67a6c6013ca 399 #endif
wolfSSL 13:f67a6c6013ca 400
wolfSSL 13:f67a6c6013ca 401 static int Transform(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 402 {
wolfSSL 13:f67a6c6013ca 403 word32 S[8], t0, t1;
wolfSSL 13:f67a6c6013ca 404 int i;
wolfSSL 13:f67a6c6013ca 405
wolfSSL 13:f67a6c6013ca 406 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 13:f67a6c6013ca 407 word32* W;
wolfSSL 13:f67a6c6013ca 408
wolfSSL 13:f67a6c6013ca 409 W = (word32*)XMALLOC(sizeof(word32) * SHA256_BLOCK_SIZE, NULL,
wolfSSL 13:f67a6c6013ca 410 DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 13:f67a6c6013ca 411 if (W == NULL)
wolfSSL 13:f67a6c6013ca 412 return MEMORY_E;
wolfSSL 13:f67a6c6013ca 413 #else
wolfSSL 13:f67a6c6013ca 414 word32 W[SHA256_BLOCK_SIZE];
wolfSSL 13:f67a6c6013ca 415 #endif
wolfSSL 13:f67a6c6013ca 416
wolfSSL 13:f67a6c6013ca 417 /* Copy context->state[] to working vars */
wolfSSL 13:f67a6c6013ca 418 for (i = 0; i < 8; i++)
wolfSSL 13:f67a6c6013ca 419 S[i] = sha256->digest[i];
wolfSSL 13:f67a6c6013ca 420
wolfSSL 13:f67a6c6013ca 421 for (i = 0; i < 16; i++)
wolfSSL 13:f67a6c6013ca 422 W[i] = sha256->buffer[i];
wolfSSL 13:f67a6c6013ca 423
wolfSSL 13:f67a6c6013ca 424 for (i = 16; i < SHA256_BLOCK_SIZE; i++)
wolfSSL 13:f67a6c6013ca 425 W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
wolfSSL 13:f67a6c6013ca 426
wolfSSL 13:f67a6c6013ca 427 for (i = 0; i < SHA256_BLOCK_SIZE; i += 8) {
wolfSSL 13:f67a6c6013ca 428 RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
wolfSSL 13:f67a6c6013ca 429 RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
wolfSSL 13:f67a6c6013ca 430 RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
wolfSSL 13:f67a6c6013ca 431 RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
wolfSSL 13:f67a6c6013ca 432 RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
wolfSSL 13:f67a6c6013ca 433 RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
wolfSSL 13:f67a6c6013ca 434 RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
wolfSSL 13:f67a6c6013ca 435 RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
wolfSSL 13:f67a6c6013ca 436 }
wolfSSL 13:f67a6c6013ca 437
wolfSSL 13:f67a6c6013ca 438 /* Add the working vars back into digest state[] */
wolfSSL 13:f67a6c6013ca 439 for (i = 0; i < 8; i++) {
wolfSSL 13:f67a6c6013ca 440 sha256->digest[i] += S[i];
wolfSSL 13:f67a6c6013ca 441 }
wolfSSL 13:f67a6c6013ca 442
wolfSSL 13:f67a6c6013ca 443 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 13:f67a6c6013ca 444 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 13:f67a6c6013ca 445 #endif
wolfSSL 13:f67a6c6013ca 446
wolfSSL 13:f67a6c6013ca 447 return 0;
wolfSSL 13:f67a6c6013ca 448 }
wolfSSL 13:f67a6c6013ca 449 #endif
wolfSSL 13:f67a6c6013ca 450 /* End wc_ software implementation */
wolfSSL 13:f67a6c6013ca 451
wolfSSL 13:f67a6c6013ca 452
wolfSSL 13:f67a6c6013ca 453 #ifdef XTRANSFORM
wolfSSL 13:f67a6c6013ca 454
wolfSSL 13:f67a6c6013ca 455 static INLINE void AddLength(Sha256* sha256, word32 len)
wolfSSL 13:f67a6c6013ca 456 {
wolfSSL 13:f67a6c6013ca 457 word32 tmp = sha256->loLen;
wolfSSL 13:f67a6c6013ca 458 if ( (sha256->loLen += len) < tmp)
wolfSSL 13:f67a6c6013ca 459 sha256->hiLen++; /* carry low to high */
wolfSSL 13:f67a6c6013ca 460 }
wolfSSL 13:f67a6c6013ca 461
wolfSSL 13:f67a6c6013ca 462 static INLINE int Sha256Update(Sha256* sha256, const byte* data, word32 len)
wolfSSL 13:f67a6c6013ca 463 {
wolfSSL 13:f67a6c6013ca 464 int ret = 0;
wolfSSL 13:f67a6c6013ca 465 byte* local;
wolfSSL 13:f67a6c6013ca 466
wolfSSL 13:f67a6c6013ca 467 if (sha256 == NULL || (data == NULL && len > 0)) {
wolfSSL 13:f67a6c6013ca 468 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 469 }
wolfSSL 13:f67a6c6013ca 470
wolfSSL 13:f67a6c6013ca 471 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 13:f67a6c6013ca 472 if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
wolfSSL 13:f67a6c6013ca 473 #if defined(HAVE_INTEL_QA)
wolfSSL 13:f67a6c6013ca 474 return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
wolfSSL 13:f67a6c6013ca 475 #endif
wolfSSL 13:f67a6c6013ca 476 }
wolfSSL 13:f67a6c6013ca 477 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 478
wolfSSL 13:f67a6c6013ca 479 /* do block size increments */
wolfSSL 13:f67a6c6013ca 480 local = (byte*)sha256->buffer;
wolfSSL 13:f67a6c6013ca 481
wolfSSL 13:f67a6c6013ca 482 /* check that internal buffLen is valid */
wolfSSL 13:f67a6c6013ca 483 if (sha256->buffLen >= SHA256_BLOCK_SIZE)
wolfSSL 13:f67a6c6013ca 484 return BUFFER_E;
wolfSSL 13:f67a6c6013ca 485
wolfSSL 13:f67a6c6013ca 486 SAVE_XMM_YMM; /* for Intel AVX */
wolfSSL 13:f67a6c6013ca 487
wolfSSL 13:f67a6c6013ca 488 while (len) {
wolfSSL 13:f67a6c6013ca 489 word32 add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
wolfSSL 13:f67a6c6013ca 490 XMEMCPY(&local[sha256->buffLen], data, add);
wolfSSL 13:f67a6c6013ca 491
wolfSSL 13:f67a6c6013ca 492 sha256->buffLen += add;
wolfSSL 13:f67a6c6013ca 493 data += add;
wolfSSL 13:f67a6c6013ca 494 len -= add;
wolfSSL 13:f67a6c6013ca 495
wolfSSL 13:f67a6c6013ca 496 if (sha256->buffLen == SHA256_BLOCK_SIZE) {
wolfSSL 13:f67a6c6013ca 497 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 13:f67a6c6013ca 498 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 499 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
wolfSSL 13:f67a6c6013ca 500 #endif
wolfSSL 13:f67a6c6013ca 501 {
wolfSSL 13:f67a6c6013ca 502 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 13:f67a6c6013ca 503 SHA256_BLOCK_SIZE);
wolfSSL 13:f67a6c6013ca 504 }
wolfSSL 13:f67a6c6013ca 505 #endif
wolfSSL 13:f67a6c6013ca 506 ret = XTRANSFORM(sha256, local);
wolfSSL 13:f67a6c6013ca 507 if (ret != 0) {
wolfSSL 13:f67a6c6013ca 508 break;
wolfSSL 13:f67a6c6013ca 509 }
wolfSSL 13:f67a6c6013ca 510
wolfSSL 13:f67a6c6013ca 511 AddLength(sha256, SHA256_BLOCK_SIZE);
wolfSSL 13:f67a6c6013ca 512 sha256->buffLen = 0;
wolfSSL 13:f67a6c6013ca 513 }
wolfSSL 13:f67a6c6013ca 514 }
wolfSSL 13:f67a6c6013ca 515
wolfSSL 13:f67a6c6013ca 516 return ret;
wolfSSL 13:f67a6c6013ca 517 }
wolfSSL 13:f67a6c6013ca 518
wolfSSL 13:f67a6c6013ca 519 int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
wolfSSL 13:f67a6c6013ca 520 {
wolfSSL 13:f67a6c6013ca 521 return Sha256Update(sha256, data, len);
wolfSSL 13:f67a6c6013ca 522 }
wolfSSL 13:f67a6c6013ca 523
wolfSSL 13:f67a6c6013ca 524 static INLINE int Sha256Final(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 525 {
wolfSSL 13:f67a6c6013ca 526
wolfSSL 13:f67a6c6013ca 527 int ret;
wolfSSL 13:f67a6c6013ca 528 byte* local = (byte*)sha256->buffer;
wolfSSL 13:f67a6c6013ca 529
wolfSSL 13:f67a6c6013ca 530 if (sha256 == NULL) {
wolfSSL 13:f67a6c6013ca 531 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 532 }
wolfSSL 13:f67a6c6013ca 533
wolfSSL 13:f67a6c6013ca 534 SAVE_XMM_YMM; /* for Intel AVX */
wolfSSL 13:f67a6c6013ca 535
wolfSSL 13:f67a6c6013ca 536 AddLength(sha256, sha256->buffLen); /* before adding pads */
wolfSSL 13:f67a6c6013ca 537 local[sha256->buffLen++] = 0x80; /* add 1 */
wolfSSL 13:f67a6c6013ca 538
wolfSSL 13:f67a6c6013ca 539 /* pad with zeros */
wolfSSL 13:f67a6c6013ca 540 if (sha256->buffLen > SHA256_PAD_SIZE) {
wolfSSL 13:f67a6c6013ca 541 XMEMSET(&local[sha256->buffLen], 0,
wolfSSL 13:f67a6c6013ca 542 SHA256_BLOCK_SIZE - sha256->buffLen);
wolfSSL 13:f67a6c6013ca 543 sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
wolfSSL 13:f67a6c6013ca 544
wolfSSL 13:f67a6c6013ca 545 {
wolfSSL 13:f67a6c6013ca 546 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 13:f67a6c6013ca 547 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 548 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
wolfSSL 13:f67a6c6013ca 549 #endif
wolfSSL 13:f67a6c6013ca 550 {
wolfSSL 13:f67a6c6013ca 551 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 13:f67a6c6013ca 552 SHA256_BLOCK_SIZE);
wolfSSL 13:f67a6c6013ca 553 }
wolfSSL 13:f67a6c6013ca 554 #endif
wolfSSL 13:f67a6c6013ca 555 }
wolfSSL 13:f67a6c6013ca 556
wolfSSL 13:f67a6c6013ca 557 ret = XTRANSFORM(sha256, local);
wolfSSL 13:f67a6c6013ca 558 if (ret != 0)
wolfSSL 13:f67a6c6013ca 559 return ret;
wolfSSL 13:f67a6c6013ca 560
wolfSSL 13:f67a6c6013ca 561 sha256->buffLen = 0;
wolfSSL 13:f67a6c6013ca 562 }
wolfSSL 13:f67a6c6013ca 563 XMEMSET(&local[sha256->buffLen], 0, SHA256_PAD_SIZE - sha256->buffLen);
wolfSSL 13:f67a6c6013ca 564
wolfSSL 13:f67a6c6013ca 565 /* put lengths in bits */
wolfSSL 13:f67a6c6013ca 566 sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
wolfSSL 13:f67a6c6013ca 567 (sha256->hiLen << 3);
wolfSSL 13:f67a6c6013ca 568 sha256->loLen = sha256->loLen << 3;
wolfSSL 13:f67a6c6013ca 569
wolfSSL 13:f67a6c6013ca 570 /* store lengths */
wolfSSL 13:f67a6c6013ca 571 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 13:f67a6c6013ca 572 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 573 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
wolfSSL 13:f67a6c6013ca 574 #endif
wolfSSL 13:f67a6c6013ca 575 {
wolfSSL 13:f67a6c6013ca 576 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 13:f67a6c6013ca 577 SHA256_BLOCK_SIZE);
wolfSSL 13:f67a6c6013ca 578 }
wolfSSL 13:f67a6c6013ca 579 #endif
wolfSSL 13:f67a6c6013ca 580 /* ! length ordering dependent on digest endian type ! */
wolfSSL 13:f67a6c6013ca 581 XMEMCPY(&local[SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
wolfSSL 13:f67a6c6013ca 582 XMEMCPY(&local[SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
wolfSSL 13:f67a6c6013ca 583 sizeof(word32));
wolfSSL 13:f67a6c6013ca 584
wolfSSL 13:f67a6c6013ca 585 #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || \
wolfSSL 13:f67a6c6013ca 586 defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 587 /* Kinetis requires only these bytes reversed */
wolfSSL 13:f67a6c6013ca 588 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 589 if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
wolfSSL 13:f67a6c6013ca 590 #endif
wolfSSL 13:f67a6c6013ca 591 {
wolfSSL 13:f67a6c6013ca 592 ByteReverseWords(
wolfSSL 13:f67a6c6013ca 593 &sha256->buffer[SHA256_PAD_SIZE / sizeof(word32)],
wolfSSL 13:f67a6c6013ca 594 &sha256->buffer[SHA256_PAD_SIZE / sizeof(word32)],
wolfSSL 13:f67a6c6013ca 595 2 * sizeof(word32));
wolfSSL 13:f67a6c6013ca 596 }
wolfSSL 13:f67a6c6013ca 597 #endif
wolfSSL 13:f67a6c6013ca 598
wolfSSL 13:f67a6c6013ca 599 return XTRANSFORM(sha256, local);
wolfSSL 13:f67a6c6013ca 600 }
wolfSSL 13:f67a6c6013ca 601
wolfSSL 13:f67a6c6013ca 602 int wc_Sha256Final(Sha256* sha256, byte* hash)
wolfSSL 13:f67a6c6013ca 603 {
wolfSSL 13:f67a6c6013ca 604 int ret;
wolfSSL 13:f67a6c6013ca 605
wolfSSL 13:f67a6c6013ca 606 if (sha256 == NULL || hash == NULL) {
wolfSSL 13:f67a6c6013ca 607 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 608 }
wolfSSL 13:f67a6c6013ca 609
wolfSSL 13:f67a6c6013ca 610 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 13:f67a6c6013ca 611 if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
wolfSSL 13:f67a6c6013ca 612 #if defined(HAVE_INTEL_QA)
wolfSSL 13:f67a6c6013ca 613 return IntelQaSymSha256(&sha256->asyncDev, hash, NULL,
wolfSSL 13:f67a6c6013ca 614 SHA256_DIGEST_SIZE);
wolfSSL 13:f67a6c6013ca 615 #endif
wolfSSL 13:f67a6c6013ca 616 }
wolfSSL 13:f67a6c6013ca 617 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 618
wolfSSL 13:f67a6c6013ca 619 ret = Sha256Final(sha256);
wolfSSL 13:f67a6c6013ca 620 if (ret != 0)
wolfSSL 13:f67a6c6013ca 621 return ret;
wolfSSL 13:f67a6c6013ca 622
wolfSSL 13:f67a6c6013ca 623 #if defined(LITTLE_ENDIAN_ORDER)
wolfSSL 13:f67a6c6013ca 624 ByteReverseWords(sha256->digest, sha256->digest, SHA256_DIGEST_SIZE);
wolfSSL 13:f67a6c6013ca 625 #endif
wolfSSL 13:f67a6c6013ca 626 XMEMCPY(hash, sha256->digest, SHA256_DIGEST_SIZE);
wolfSSL 13:f67a6c6013ca 627
wolfSSL 13:f67a6c6013ca 628 return InitSha256(sha256); /* reset state */
wolfSSL 13:f67a6c6013ca 629 }
wolfSSL 13:f67a6c6013ca 630
wolfSSL 13:f67a6c6013ca 631 #endif /* XTRANSFORM */
wolfSSL 13:f67a6c6013ca 632
wolfSSL 13:f67a6c6013ca 633
wolfSSL 13:f67a6c6013ca 634 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 635
wolfSSL 13:f67a6c6013ca 636 #define _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 637 { word32 d;\
wolfSSL 13:f67a6c6013ca 638 d = sha256->digest[0]; __asm__ volatile("movl %0, %"#S_0::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 639 d = sha256->digest[1]; __asm__ volatile("movl %0, %"#S_1::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 640 d = sha256->digest[2]; __asm__ volatile("movl %0, %"#S_2::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 641 d = sha256->digest[3]; __asm__ volatile("movl %0, %"#S_3::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 642 d = sha256->digest[4]; __asm__ volatile("movl %0, %"#S_4::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 643 d = sha256->digest[5]; __asm__ volatile("movl %0, %"#S_5::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 644 d = sha256->digest[6]; __asm__ volatile("movl %0, %"#S_6::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 645 d = sha256->digest[7]; __asm__ volatile("movl %0, %"#S_7::"r"(d):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 646 }
wolfSSL 13:f67a6c6013ca 647
wolfSSL 13:f67a6c6013ca 648 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 649 { word32 d; \
wolfSSL 13:f67a6c6013ca 650 __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs); sha256->digest[0] += d;\
wolfSSL 13:f67a6c6013ca 651 __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs); sha256->digest[1] += d;\
wolfSSL 13:f67a6c6013ca 652 __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs); sha256->digest[2] += d;\
wolfSSL 13:f67a6c6013ca 653 __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs); sha256->digest[3] += d;\
wolfSSL 13:f67a6c6013ca 654 __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs); sha256->digest[4] += d;\
wolfSSL 13:f67a6c6013ca 655 __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs); sha256->digest[5] += d;\
wolfSSL 13:f67a6c6013ca 656 __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs); sha256->digest[6] += d;\
wolfSSL 13:f67a6c6013ca 657 __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs); sha256->digest[7] += d;\
wolfSSL 13:f67a6c6013ca 658 }
wolfSSL 13:f67a6c6013ca 659
wolfSSL 13:f67a6c6013ca 660
wolfSSL 13:f67a6c6013ca 661 #define DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 662 _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 13:f67a6c6013ca 663
wolfSSL 13:f67a6c6013ca 664 #define RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 665 _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 13:f67a6c6013ca 666
wolfSSL 13:f67a6c6013ca 667
wolfSSL 13:f67a6c6013ca 668 #define S_0 %r15d
wolfSSL 13:f67a6c6013ca 669 #define S_1 %r10d
wolfSSL 13:f67a6c6013ca 670 #define S_2 %r11d
wolfSSL 13:f67a6c6013ca 671 #define S_3 %r12d
wolfSSL 13:f67a6c6013ca 672 #define S_4 %r13d
wolfSSL 13:f67a6c6013ca 673 #define S_5 %r14d
wolfSSL 13:f67a6c6013ca 674 #define S_6 %ebx
wolfSSL 13:f67a6c6013ca 675 #define S_7 %r9d
wolfSSL 13:f67a6c6013ca 676
wolfSSL 13:f67a6c6013ca 677 #define SSE_REGs "%edi", "%ecx", "%esi", "%edx", "%ebx","%r8","%r9","%r10","%r11","%r12","%r13","%r14","%r15"
wolfSSL 13:f67a6c6013ca 678
wolfSSL 13:f67a6c6013ca 679 #if defined(HAVE_INTEL_RORX)
wolfSSL 13:f67a6c6013ca 680 #define RND_STEP_RORX_1(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 681 __asm__ volatile("rorx $6, %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>6 */\
wolfSSL 13:f67a6c6013ca 682
wolfSSL 13:f67a6c6013ca 683 #define RND_STEP_RORX_2(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 684 __asm__ volatile("rorx $11, %"#e",%%edi\n\t":::"%edi",SSE_REGs); /* edi = e>>11 */\
wolfSSL 13:f67a6c6013ca 685 __asm__ volatile("xorl %%edx, %%edi\n\t":::"%edx","%edi",SSE_REGs); /* edi = (e>>11) ^ (e>>6) */\
wolfSSL 13:f67a6c6013ca 686 __asm__ volatile("rorx $25, %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>25 */\
wolfSSL 13:f67a6c6013ca 687
wolfSSL 13:f67a6c6013ca 688 #define RND_STEP_RORX_3(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 689 __asm__ volatile("movl %"#f", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f */\
wolfSSL 13:f67a6c6013ca 690 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f ^ g */\
wolfSSL 13:f67a6c6013ca 691 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma1(e) */\
wolfSSL 13:f67a6c6013ca 692 __asm__ volatile("andl %"#e", %%esi\n\t":::"%esi",SSE_REGs); /* esi = (f ^ g) & e */\
wolfSSL 13:f67a6c6013ca 693 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = Ch(e,f,g) */\
wolfSSL 13:f67a6c6013ca 694
wolfSSL 13:f67a6c6013ca 695 #define RND_STEP_RORX_4(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 696 /*__asm__ volatile("movl %0, %%edx\n\t"::"m"(w_k):"%edx");*/\
wolfSSL 13:f67a6c6013ca 697 __asm__ volatile("addl %0, %"#h"\n\t"::"r"(W_K[i]):SSE_REGs); /* h += w_k */\
wolfSSL 13:f67a6c6013ca 698 __asm__ volatile("addl %%edx, %"#h"\n\t":::"%edx",SSE_REGs); /* h = h + w_k + Sigma1(e) */\
wolfSSL 13:f67a6c6013ca 699 __asm__ volatile("rorx $2, %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a>>2 */\
wolfSSL 13:f67a6c6013ca 700 __asm__ volatile("rorx $13, %"#a", %%edi\n\t":::"%edi",SSE_REGs);/* edi = a>>13 */\
wolfSSL 13:f67a6c6013ca 701
wolfSSL 13:f67a6c6013ca 702 #define RND_STEP_RORX_5(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 703 __asm__ volatile("rorx $22, %"#a", %%edx\n\t":::"%edx",SSE_REGs); /* edx = a>>22 */\
wolfSSL 13:f67a6c6013ca 704 __asm__ volatile("xorl %%r8d, %%edi\n\t":::"%edi","%r8",SSE_REGs);/* edi = (a>>2) ^ (a>>13) */\
wolfSSL 13:f67a6c6013ca 705 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma0(a) */\
wolfSSL 13:f67a6c6013ca 706
wolfSSL 13:f67a6c6013ca 707 #define RND_STEP_RORX_6(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 708 __asm__ volatile("movl %"#b", %%edi\n\t":::"%edi",SSE_REGs); /* edi = b */\
wolfSSL 13:f67a6c6013ca 709 __asm__ volatile("orl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a | b */\
wolfSSL 13:f67a6c6013ca 710 __asm__ volatile("andl %"#c", %%edi\n\t":::"%edi",SSE_REGs); /* edi = (a | b) & c*/\
wolfSSL 13:f67a6c6013ca 711 __asm__ volatile("movl %"#b", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b */\
wolfSSL 13:f67a6c6013ca 712
wolfSSL 13:f67a6c6013ca 713 #define RND_STEP_RORX_7(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 714 __asm__ volatile("addl %%esi, %"#h"\n\t":::"%esi",SSE_REGs); /* h += Ch(e,f,g) */\
wolfSSL 13:f67a6c6013ca 715 __asm__ volatile("andl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b & a */\
wolfSSL 13:f67a6c6013ca 716 __asm__ volatile("orl %%edi, %%r8d\n\t":::"%edi","%r8",SSE_REGs); /* r8d = Maj(a,b,c) */\
wolfSSL 13:f67a6c6013ca 717
wolfSSL 13:f67a6c6013ca 718 #define RND_STEP_RORX_8(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 719 __asm__ volatile("addl "#h", "#d"\n\t"); /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */\
wolfSSL 13:f67a6c6013ca 720 __asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
wolfSSL 13:f67a6c6013ca 721 __asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs); \
wolfSSL 13:f67a6c6013ca 722 __asm__ volatile("movl %r8d, "#h"\n\t");
wolfSSL 13:f67a6c6013ca 723 #endif /* HAVE_INTEL_RORX */
wolfSSL 13:f67a6c6013ca 724
wolfSSL 13:f67a6c6013ca 725 #define RND_STEP_1(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 726 __asm__ volatile("movl %"#e", %%edx\n\t":::"%edx",SSE_REGs);\
wolfSSL 13:f67a6c6013ca 727 __asm__ volatile("roll $26, %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>6 */\
wolfSSL 13:f67a6c6013ca 728 __asm__ volatile("movl %"#e", %%edi\n\t":::"%edi",SSE_REGs);\
wolfSSL 13:f67a6c6013ca 729
wolfSSL 13:f67a6c6013ca 730 #define RND_STEP_2(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 731 __asm__ volatile("roll $21, %%edi\n\t":::"%edi",SSE_REGs); /* edi = e>>11 */\
wolfSSL 13:f67a6c6013ca 732 __asm__ volatile("xorl %%edx, %%edi\n\t":::"%edx","%edi",SSE_REGs); /* edi = (e>>11) ^ (e>>6) */\
wolfSSL 13:f67a6c6013ca 733 __asm__ volatile("movl %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e */\
wolfSSL 13:f67a6c6013ca 734 __asm__ volatile("roll $7, %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>25 */\
wolfSSL 13:f67a6c6013ca 735
wolfSSL 13:f67a6c6013ca 736 #define RND_STEP_3(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 737 __asm__ volatile("movl %"#f", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f */\
wolfSSL 13:f67a6c6013ca 738 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f ^ g */\
wolfSSL 13:f67a6c6013ca 739 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma1(e) */\
wolfSSL 13:f67a6c6013ca 740 __asm__ volatile("andl %"#e", %%esi\n\t":::"%esi",SSE_REGs); /* esi = (f ^ g) & e */\
wolfSSL 13:f67a6c6013ca 741 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = Ch(e,f,g) */\
wolfSSL 13:f67a6c6013ca 742
wolfSSL 13:f67a6c6013ca 743 #define RND_STEP_4(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 744 __asm__ volatile("addl %0, %"#h"\n\t"::"r"(W_K[i]):SSE_REGs); /* h += w_k */\
wolfSSL 13:f67a6c6013ca 745 __asm__ volatile("addl %%edx, %"#h"\n\t":::"%edx",SSE_REGs); /* h = h + w_k + Sigma1(e) */\
wolfSSL 13:f67a6c6013ca 746 __asm__ volatile("movl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a */\
wolfSSL 13:f67a6c6013ca 747 __asm__ volatile("roll $30, %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a>>2 */\
wolfSSL 13:f67a6c6013ca 748 __asm__ volatile("movl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a */\
wolfSSL 13:f67a6c6013ca 749 __asm__ volatile("roll $19, %%edi\n\t":::"%edi",SSE_REGs); /* edi = a>>13 */\
wolfSSL 13:f67a6c6013ca 750 __asm__ volatile("movl %"#a", %%edx\n\t":::"%edx",SSE_REGs); /* edx = a */\
wolfSSL 13:f67a6c6013ca 751
wolfSSL 13:f67a6c6013ca 752 #define RND_STEP_5(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 753 __asm__ volatile("roll $10, %%edx\n\t":::"%edx",SSE_REGs); /* edx = a>>22 */\
wolfSSL 13:f67a6c6013ca 754 __asm__ volatile("xorl %%r8d, %%edi\n\t":::"%edi","%r8",SSE_REGs); /* edi = (a>>2) ^ (a>>13) */\
wolfSSL 13:f67a6c6013ca 755 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs);/* edx = Sigma0(a) */\
wolfSSL 13:f67a6c6013ca 756
wolfSSL 13:f67a6c6013ca 757 #define RND_STEP_6(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 758 __asm__ volatile("movl %"#b", %%edi\n\t":::"%edi",SSE_REGs); /* edi = b */\
wolfSSL 13:f67a6c6013ca 759 __asm__ volatile("orl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a | b */\
wolfSSL 13:f67a6c6013ca 760 __asm__ volatile("andl %"#c", %%edi\n\t":::"%edi",SSE_REGs); /* edi = (a | b) & c */\
wolfSSL 13:f67a6c6013ca 761 __asm__ volatile("movl %"#b", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b */\
wolfSSL 13:f67a6c6013ca 762
wolfSSL 13:f67a6c6013ca 763 #define RND_STEP_7(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 764 __asm__ volatile("addl %%esi, %"#h"\n\t":::"%esi",SSE_REGs); /* h += Ch(e,f,g) */\
wolfSSL 13:f67a6c6013ca 765 __asm__ volatile("andl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b & a */\
wolfSSL 13:f67a6c6013ca 766 __asm__ volatile("orl %%edi, %%r8d\n\t":::"%edi","%r8",SSE_REGs); /* r8d = Maj(a,b,c) */\
wolfSSL 13:f67a6c6013ca 767
wolfSSL 13:f67a6c6013ca 768 #define RND_STEP_8(a,b,c,d,e,f,g,h,i)\
wolfSSL 13:f67a6c6013ca 769 __asm__ volatile("addl "#h", "#d"\n\t"); /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */\
wolfSSL 13:f67a6c6013ca 770 __asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
wolfSSL 13:f67a6c6013ca 771 /* r8b = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */\
wolfSSL 13:f67a6c6013ca 772 __asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs);\
wolfSSL 13:f67a6c6013ca 773 /* r8b = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */\
wolfSSL 13:f67a6c6013ca 774 __asm__ volatile("movl %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
wolfSSL 13:f67a6c6013ca 775 /* h = h + w_k + Sigma1(e) + Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 13:f67a6c6013ca 776
wolfSSL 13:f67a6c6013ca 777 #define RND_X(a,b,c,d,e,f,g,h,i) \
wolfSSL 13:f67a6c6013ca 778 RND_STEP_1(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 779 RND_STEP_2(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 780 RND_STEP_3(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 781 RND_STEP_4(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 782 RND_STEP_5(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 783 RND_STEP_6(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 784 RND_STEP_7(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 785 RND_STEP_8(a,b,c,d,e,f,g,h,i);
wolfSSL 13:f67a6c6013ca 786
wolfSSL 13:f67a6c6013ca 787 #define RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 13:f67a6c6013ca 788 #define RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 13:f67a6c6013ca 789 #define RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 13:f67a6c6013ca 790 #define RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 13:f67a6c6013ca 791 #define RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 13:f67a6c6013ca 792 #define RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 13:f67a6c6013ca 793 #define RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 13:f67a6c6013ca 794 #define RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 13:f67a6c6013ca 795
wolfSSL 13:f67a6c6013ca 796
wolfSSL 13:f67a6c6013ca 797 #define RND_1_3(a,b,c,d,e,f,g,h,i) {\
wolfSSL 13:f67a6c6013ca 798 RND_STEP_1(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 799 RND_STEP_2(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 800 RND_STEP_3(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 801 }
wolfSSL 13:f67a6c6013ca 802
wolfSSL 13:f67a6c6013ca 803 #define RND_4_6(a,b,c,d,e,f,g,h,i) {\
wolfSSL 13:f67a6c6013ca 804 RND_STEP_4(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 805 RND_STEP_5(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 806 RND_STEP_6(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 807 }
wolfSSL 13:f67a6c6013ca 808
wolfSSL 13:f67a6c6013ca 809 #define RND_7_8(a,b,c,d,e,f,g,h,i) {\
wolfSSL 13:f67a6c6013ca 810 RND_STEP_7(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 811 RND_STEP_8(a,b,c,d,e,f,g,h,i); \
wolfSSL 13:f67a6c6013ca 812 }
wolfSSL 13:f67a6c6013ca 813
wolfSSL 13:f67a6c6013ca 814 #define RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 13:f67a6c6013ca 815 #define RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 13:f67a6c6013ca 816 #define RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 13:f67a6c6013ca 817 #define RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 13:f67a6c6013ca 818 #define RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 13:f67a6c6013ca 819 #define RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 13:f67a6c6013ca 820 #define RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 13:f67a6c6013ca 821 #define RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 13:f67a6c6013ca 822
wolfSSL 13:f67a6c6013ca 823
wolfSSL 13:f67a6c6013ca 824 #define RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 13:f67a6c6013ca 825 #define RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 13:f67a6c6013ca 826 #define RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 13:f67a6c6013ca 827 #define RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 13:f67a6c6013ca 828 #define RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 13:f67a6c6013ca 829 #define RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 13:f67a6c6013ca 830 #define RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 13:f67a6c6013ca 831 #define RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 13:f67a6c6013ca 832
wolfSSL 13:f67a6c6013ca 833 #define RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 13:f67a6c6013ca 834 #define RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 13:f67a6c6013ca 835 #define RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 13:f67a6c6013ca 836 #define RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 13:f67a6c6013ca 837 #define RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 13:f67a6c6013ca 838 #define RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 13:f67a6c6013ca 839 #define RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 13:f67a6c6013ca 840 #define RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 13:f67a6c6013ca 841
wolfSSL 13:f67a6c6013ca 842 #define RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 13:f67a6c6013ca 843 #define RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 13:f67a6c6013ca 844 #define RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 13:f67a6c6013ca 845 #define RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 13:f67a6c6013ca 846 #define RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 13:f67a6c6013ca 847 #define RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 13:f67a6c6013ca 848 #define RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 13:f67a6c6013ca 849 #define RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 13:f67a6c6013ca 850
wolfSSL 13:f67a6c6013ca 851 #define FOR(cnt, init, max, inc, loop) \
wolfSSL 13:f67a6c6013ca 852 __asm__ volatile("movl $"#init", %0\n\t"#loop":"::"m"(cnt):)
wolfSSL 13:f67a6c6013ca 853 #define END(cnt, init, max, inc, loop) \
wolfSSL 13:f67a6c6013ca 854 __asm__ volatile("addl $"#inc", %0\n\tcmpl $"#max", %0\n\tjle "#loop"\n\t":"=m"(cnt)::);
wolfSSL 13:f67a6c6013ca 855
wolfSSL 13:f67a6c6013ca 856 #endif /* defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) */
wolfSSL 13:f67a6c6013ca 857
wolfSSL 13:f67a6c6013ca 858 #if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
wolfSSL 13:f67a6c6013ca 859
wolfSSL 13:f67a6c6013ca 860 #define VPALIGNR(op1,op2,op3,op4) __asm__ volatile("vpalignr $"#op4", %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 861 #define VPADDD(op1,op2,op3) __asm__ volatile("vpaddd %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 862 #define VPSRLD(op1,op2,op3) __asm__ volatile("vpsrld $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 863 #define VPSRLQ(op1,op2,op3) __asm__ volatile("vpsrlq $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 864 #define VPSLLD(op1,op2,op3) __asm__ volatile("vpslld $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 865 #define VPOR(op1,op2,op3) __asm__ volatile("vpor %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 866 #define VPXOR(op1,op2,op3) __asm__ volatile("vpxor %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 867 #define VPSHUFD(op1,op2,op3) __asm__ volatile("vpshufd $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 868 #define VPSHUFB(op1,op2,op3) __asm__ volatile("vpshufb %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 13:f67a6c6013ca 869
wolfSSL 13:f67a6c6013ca 870 #define MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, SHUF_00BA, SHUF_DC00,\
wolfSSL 13:f67a6c6013ca 871 a,b,c,d,e,f,g,h,_i)\
wolfSSL 13:f67a6c6013ca 872 RND_STEP_1(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 873 VPALIGNR (XTMP0, X3, X2, 4);\
wolfSSL 13:f67a6c6013ca 874 RND_STEP_2(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 875 VPADDD (XTMP0, XTMP0, X0);\
wolfSSL 13:f67a6c6013ca 876 RND_STEP_3(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 877 VPALIGNR (XTMP1, X1, X0, 4); /* XTMP1 = W[-15] */\
wolfSSL 13:f67a6c6013ca 878 RND_STEP_4(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 879 VPSRLD (XTMP2, XTMP1, 7);\
wolfSSL 13:f67a6c6013ca 880 RND_STEP_5(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 881 VPSLLD (XTMP3, XTMP1, 25); /* VPSLLD (XTMP3, XTMP1, (32-7)) */\
wolfSSL 13:f67a6c6013ca 882 RND_STEP_6(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 883 VPOR (XTMP3, XTMP3, XTMP2); /* XTMP1 = W[-15] MY_ROR 7 */\
wolfSSL 13:f67a6c6013ca 884 RND_STEP_7(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 885 VPSRLD (XTMP2, XTMP1,18);\
wolfSSL 13:f67a6c6013ca 886 RND_STEP_8(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 887 \
wolfSSL 13:f67a6c6013ca 888 RND_STEP_1(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 889 VPSRLD (XTMP4, XTMP1, 3); /* XTMP4 = W[-15] >> 3 */\
wolfSSL 13:f67a6c6013ca 890 RND_STEP_2(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 891 VPSLLD (XTMP1, XTMP1, 14); /* VPSLLD (XTMP1, XTMP1, (32-18)) */\
wolfSSL 13:f67a6c6013ca 892 RND_STEP_3(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 893 VPXOR (XTMP3, XTMP3, XTMP1);\
wolfSSL 13:f67a6c6013ca 894 RND_STEP_4(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 895 VPXOR (XTMP3, XTMP3, XTMP2); /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
wolfSSL 13:f67a6c6013ca 896 RND_STEP_5(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 897 VPXOR (XTMP1, XTMP3, XTMP4); /* XTMP1 = s0 */\
wolfSSL 13:f67a6c6013ca 898 RND_STEP_6(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 899 VPSHUFD(XTMP2, X3, 0b11111010); /* XTMP2 = W[-2] {BBAA}*/\
wolfSSL 13:f67a6c6013ca 900 RND_STEP_7(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 901 VPADDD (XTMP0, XTMP0, XTMP1); /* XTMP0 = W[-16] + W[-7] + s0 */\
wolfSSL 13:f67a6c6013ca 902 RND_STEP_8(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 903 \
wolfSSL 13:f67a6c6013ca 904 RND_STEP_1(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 905 VPSRLD (XTMP4, XTMP2, 10); /* XTMP4 = W[-2] >> 10 {BBAA} */\
wolfSSL 13:f67a6c6013ca 906 RND_STEP_2(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 907 VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
wolfSSL 13:f67a6c6013ca 908 RND_STEP_3(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 909 VPSRLQ (XTMP2, XTMP2, 17); /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
wolfSSL 13:f67a6c6013ca 910 RND_STEP_4(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 911 VPXOR (XTMP2, XTMP2, XTMP3);\
wolfSSL 13:f67a6c6013ca 912 RND_STEP_5(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 913 VPXOR (XTMP4, XTMP4, XTMP2); /* XTMP4 = s1 {xBxA} */\
wolfSSL 13:f67a6c6013ca 914 RND_STEP_6(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 915 VPSHUFB (XTMP4, XTMP4, SHUF_00BA); /* XTMP4 = s1 {00BA} */\
wolfSSL 13:f67a6c6013ca 916 RND_STEP_7(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 917 VPADDD (XTMP0, XTMP0, XTMP4); /* XTMP0 = {..., ..., W[1], W[0]} */\
wolfSSL 13:f67a6c6013ca 918 RND_STEP_8(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 919 \
wolfSSL 13:f67a6c6013ca 920 RND_STEP_1(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 921 VPSHUFD (XTMP2, XTMP0, 0b01010000); /* XTMP2 = W[-2] {DDCC} */\
wolfSSL 13:f67a6c6013ca 922 RND_STEP_2(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 923 VPSRLD (XTMP5, XTMP2, 10); /* XTMP5 = W[-2] >> 10 {DDCC} */\
wolfSSL 13:f67a6c6013ca 924 RND_STEP_3(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 925 VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
wolfSSL 13:f67a6c6013ca 926 RND_STEP_4(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 927 VPSRLQ (XTMP2, XTMP2, 17); /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
wolfSSL 13:f67a6c6013ca 928 RND_STEP_5(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 929 VPXOR (XTMP2, XTMP2, XTMP3);\
wolfSSL 13:f67a6c6013ca 930 RND_STEP_6(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 931 VPXOR (XTMP5, XTMP5, XTMP2); /* XTMP5 = s1 {xDxC} */\
wolfSSL 13:f67a6c6013ca 932 RND_STEP_7(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 933 VPSHUFB (XTMP5, XTMP5, SHUF_DC00); /* XTMP5 = s1 {DC00} */\
wolfSSL 13:f67a6c6013ca 934 RND_STEP_8(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 935 VPADDD (X0, XTMP5, XTMP0); /* X0 = {W[3], W[2], W[1], W[0]} */\
wolfSSL 13:f67a6c6013ca 936
wolfSSL 13:f67a6c6013ca 937 #if defined(HAVE_INTEL_RORX)
wolfSSL 13:f67a6c6013ca 938
wolfSSL 13:f67a6c6013ca 939 #define MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, \
wolfSSL 13:f67a6c6013ca 940 XFER, SHUF_00BA, SHUF_DC00,a,b,c,d,e,f,g,h,_i)\
wolfSSL 13:f67a6c6013ca 941 RND_STEP_RORX_1(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 942 VPALIGNR (XTMP0, X3, X2, 4);\
wolfSSL 13:f67a6c6013ca 943 RND_STEP_RORX_2(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 944 VPADDD (XTMP0, XTMP0, X0);\
wolfSSL 13:f67a6c6013ca 945 RND_STEP_RORX_3(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 946 VPALIGNR (XTMP1, X1, X0, 4); /* XTMP1 = W[-15] */\
wolfSSL 13:f67a6c6013ca 947 RND_STEP_RORX_4(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 948 VPSRLD (XTMP2, XTMP1, 7);\
wolfSSL 13:f67a6c6013ca 949 RND_STEP_RORX_5(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 950 VPSLLD (XTMP3, XTMP1, 25); /* VPSLLD (XTMP3, XTMP1, (32-7)) */\
wolfSSL 13:f67a6c6013ca 951 RND_STEP_RORX_6(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 952 VPOR (XTMP3, XTMP3, XTMP2); /* XTMP1 = W[-15] MY_ROR 7 */\
wolfSSL 13:f67a6c6013ca 953 RND_STEP_RORX_7(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 954 VPSRLD (XTMP2, XTMP1,18);\
wolfSSL 13:f67a6c6013ca 955 RND_STEP_RORX_8(a,b,c,d,e,f,g,h,_i);\
wolfSSL 13:f67a6c6013ca 956 \
wolfSSL 13:f67a6c6013ca 957 RND_STEP_RORX_1(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 958 VPSRLD (XTMP4, XTMP1, 3); /* XTMP4 = W[-15] >> 3 */\
wolfSSL 13:f67a6c6013ca 959 RND_STEP_RORX_2(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 960 VPSLLD (XTMP1, XTMP1, 14); /* VPSLLD (XTMP1, XTMP1, (32-18)) */\
wolfSSL 13:f67a6c6013ca 961 RND_STEP_RORX_3(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 962 VPXOR (XTMP3, XTMP3, XTMP1);\
wolfSSL 13:f67a6c6013ca 963 RND_STEP_RORX_4(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 964 VPXOR (XTMP3, XTMP3, XTMP2); /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
wolfSSL 13:f67a6c6013ca 965 RND_STEP_RORX_5(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 966 VPXOR (XTMP1, XTMP3, XTMP4); /* XTMP1 = s0 */\
wolfSSL 13:f67a6c6013ca 967 RND_STEP_RORX_6(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 968 VPSHUFD(XTMP2, X3, 0b11111010); /* XTMP2 = W[-2] {BBAA}*/\
wolfSSL 13:f67a6c6013ca 969 RND_STEP_RORX_7(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 970 VPADDD (XTMP0, XTMP0, XTMP1); /* XTMP0 = W[-16] + W[-7] + s0 */\
wolfSSL 13:f67a6c6013ca 971 RND_STEP_RORX_8(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 13:f67a6c6013ca 972 \
wolfSSL 13:f67a6c6013ca 973 RND_STEP_RORX_1(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 974 VPSRLD (XTMP4, XTMP2, 10); /* XTMP4 = W[-2] >> 10 {BBAA} */\
wolfSSL 13:f67a6c6013ca 975 RND_STEP_RORX_2(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 976 VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
wolfSSL 13:f67a6c6013ca 977 RND_STEP_RORX_3(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 978 VPSRLQ (XTMP2, XTMP2, 17); /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
wolfSSL 13:f67a6c6013ca 979 RND_STEP_RORX_4(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 980 VPXOR (XTMP2, XTMP2, XTMP3);\
wolfSSL 13:f67a6c6013ca 981 RND_STEP_RORX_5(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 982 VPXOR (XTMP4, XTMP4, XTMP2); /* XTMP4 = s1 {xBxA} */\
wolfSSL 13:f67a6c6013ca 983 RND_STEP_RORX_6(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 984 VPSHUFB (XTMP4, XTMP4, SHUF_00BA); /* XTMP4 = s1 {00BA} */\
wolfSSL 13:f67a6c6013ca 985 RND_STEP_RORX_7(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 986 VPADDD (XTMP0, XTMP0, XTMP4); /* XTMP0 = {..., ..., W[1], W[0]} */\
wolfSSL 13:f67a6c6013ca 987 RND_STEP_RORX_8(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 13:f67a6c6013ca 988 \
wolfSSL 13:f67a6c6013ca 989 RND_STEP_RORX_1(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 990 VPSHUFD (XTMP2, XTMP0, 0b01010000); /* XTMP2 = W[-2] {DDCC} */\
wolfSSL 13:f67a6c6013ca 991 RND_STEP_RORX_2(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 992 VPSRLD (XTMP5, XTMP2, 10); /* XTMP5 = W[-2] >> 10 {DDCC} */\
wolfSSL 13:f67a6c6013ca 993 RND_STEP_RORX_3(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 994 VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
wolfSSL 13:f67a6c6013ca 995 RND_STEP_RORX_4(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 996 VPSRLQ (XTMP2, XTMP2, 17); /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
wolfSSL 13:f67a6c6013ca 997 RND_STEP_RORX_5(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 998 VPXOR (XTMP2, XTMP2, XTMP3);\
wolfSSL 13:f67a6c6013ca 999 RND_STEP_RORX_6(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 1000 VPXOR (XTMP5, XTMP5, XTMP2); /* XTMP5 = s1 {xDxC} */\
wolfSSL 13:f67a6c6013ca 1001 RND_STEP_RORX_7(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 1002 VPSHUFB (XTMP5, XTMP5, SHUF_DC00); /* XTMP5 = s1 {DC00} */\
wolfSSL 13:f67a6c6013ca 1003 RND_STEP_RORX_8(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 13:f67a6c6013ca 1004 VPADDD (X0, XTMP5, XTMP0); /* X0 = {W[3], W[2], W[1], W[0]} */\
wolfSSL 13:f67a6c6013ca 1005
wolfSSL 13:f67a6c6013ca 1006 #endif /* HAVE_INTEL_RORX */
wolfSSL 13:f67a6c6013ca 1007
wolfSSL 13:f67a6c6013ca 1008
wolfSSL 13:f67a6c6013ca 1009 #define W_K_from_buff\
wolfSSL 13:f67a6c6013ca 1010 __asm__ volatile("vmovdqu %0, %%xmm4\n\t"\
wolfSSL 13:f67a6c6013ca 1011 "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t"\
wolfSSL 13:f67a6c6013ca 1012 :: "m"(sha256->buffer[0]):"%xmm4");\
wolfSSL 13:f67a6c6013ca 1013 __asm__ volatile("vmovdqu %0, %%xmm5\n\t"\
wolfSSL 13:f67a6c6013ca 1014 "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t"\
wolfSSL 13:f67a6c6013ca 1015 ::"m"(sha256->buffer[4]):"%xmm5");\
wolfSSL 13:f67a6c6013ca 1016 __asm__ volatile("vmovdqu %0, %%xmm6\n\t"\
wolfSSL 13:f67a6c6013ca 1017 "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t"\
wolfSSL 13:f67a6c6013ca 1018 ::"m"(sha256->buffer[8]):"%xmm6");\
wolfSSL 13:f67a6c6013ca 1019 __asm__ volatile("vmovdqu %0, %%xmm7\n\t"\
wolfSSL 13:f67a6c6013ca 1020 "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t"\
wolfSSL 13:f67a6c6013ca 1021 ::"m"(sha256->buffer[12]):"%xmm7");\
wolfSSL 13:f67a6c6013ca 1022
wolfSSL 13:f67a6c6013ca 1023 #define _SET_W_K_XFER(reg, i)\
wolfSSL 13:f67a6c6013ca 1024 __asm__ volatile("vpaddd %0, %"#reg", %%xmm9"::"m"(K[i]):XMM_REGs);\
wolfSSL 13:f67a6c6013ca 1025 __asm__ volatile("vmovdqa %%xmm9, %0":"=m"(W_K[i])::XMM_REGs);
wolfSSL 13:f67a6c6013ca 1026
wolfSSL 13:f67a6c6013ca 1027 #define SET_W_K_XFER(reg, i) _SET_W_K_XFER(reg, i)
wolfSSL 13:f67a6c6013ca 1028
wolfSSL 13:f67a6c6013ca 1029 static const ALIGN32 word64 mSHUF_00BA[] = { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
wolfSSL 13:f67a6c6013ca 1030 static const ALIGN32 word64 mSHUF_DC00[] = { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
wolfSSL 13:f67a6c6013ca 1031 static const ALIGN32 word64 mBYTE_FLIP_MASK[] = { 0x0405060700010203, 0x0c0d0e0f08090a0b };
wolfSSL 13:f67a6c6013ca 1032
wolfSSL 13:f67a6c6013ca 1033
wolfSSL 13:f67a6c6013ca 1034 #define _Init_Masks(mask1, mask2, mask3)\
wolfSSL 13:f67a6c6013ca 1035 __asm__ volatile("vmovdqu %0, %"#mask1 ::"m"(mBYTE_FLIP_MASK[0]));\
wolfSSL 13:f67a6c6013ca 1036 __asm__ volatile("vmovdqu %0, %"#mask2 ::"m"(mSHUF_00BA[0]));\
wolfSSL 13:f67a6c6013ca 1037 __asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0]));
wolfSSL 13:f67a6c6013ca 1038
wolfSSL 13:f67a6c6013ca 1039 #define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)\
wolfSSL 13:f67a6c6013ca 1040 _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 13:f67a6c6013ca 1041
wolfSSL 13:f67a6c6013ca 1042 #define X0 %xmm4
wolfSSL 13:f67a6c6013ca 1043 #define X1 %xmm5
wolfSSL 13:f67a6c6013ca 1044 #define X2 %xmm6
wolfSSL 13:f67a6c6013ca 1045 #define X3 %xmm7
wolfSSL 13:f67a6c6013ca 1046 #define X_ X0
wolfSSL 13:f67a6c6013ca 1047
wolfSSL 13:f67a6c6013ca 1048 #define XTMP0 %xmm0
wolfSSL 13:f67a6c6013ca 1049 #define XTMP1 %xmm1
wolfSSL 13:f67a6c6013ca 1050 #define XTMP2 %xmm2
wolfSSL 13:f67a6c6013ca 1051 #define XTMP3 %xmm3
wolfSSL 13:f67a6c6013ca 1052 #define XTMP4 %xmm8
wolfSSL 13:f67a6c6013ca 1053 #define XTMP5 %xmm9
wolfSSL 13:f67a6c6013ca 1054 #define XFER %xmm10
wolfSSL 13:f67a6c6013ca 1055
wolfSSL 13:f67a6c6013ca 1056 #define SHUF_00BA %xmm11 /* shuffle xBxA -> 00BA */
wolfSSL 13:f67a6c6013ca 1057 #define SHUF_DC00 %xmm12 /* shuffle xDxC -> DC00 */
wolfSSL 13:f67a6c6013ca 1058 #define BYTE_FLIP_MASK %xmm13
wolfSSL 13:f67a6c6013ca 1059
wolfSSL 13:f67a6c6013ca 1060 #define XMM_REGs /* Registers are saved in Sha256Update/Finel */
wolfSSL 13:f67a6c6013ca 1061 /*"xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13" */
wolfSSL 13:f67a6c6013ca 1062
wolfSSL 13:f67a6c6013ca 1063 static int Transform_AVX1(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 1064 {
wolfSSL 13:f67a6c6013ca 1065 ALIGN32 word32 W_K[64]; /* temp for W+K */
wolfSSL 13:f67a6c6013ca 1066
wolfSSL 13:f67a6c6013ca 1067 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00);
wolfSSL 13:f67a6c6013ca 1068 W_K_from_buff; /* X0, X1, X2, X3 = W[0..15]; */
wolfSSL 13:f67a6c6013ca 1069
wolfSSL 13:f67a6c6013ca 1070 DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
wolfSSL 13:f67a6c6013ca 1071
wolfSSL 13:f67a6c6013ca 1072 SET_W_K_XFER(X0, 0);
wolfSSL 13:f67a6c6013ca 1073
wolfSSL 13:f67a6c6013ca 1074 MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1075 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
wolfSSL 13:f67a6c6013ca 1076 SET_W_K_XFER(X1, 4);
wolfSSL 13:f67a6c6013ca 1077 MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1078 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4);
wolfSSL 13:f67a6c6013ca 1079 SET_W_K_XFER(X2, 8);
wolfSSL 13:f67a6c6013ca 1080 MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1081 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
wolfSSL 13:f67a6c6013ca 1082 SET_W_K_XFER(X3, 12);
wolfSSL 13:f67a6c6013ca 1083 MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1084 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12);
wolfSSL 13:f67a6c6013ca 1085 SET_W_K_XFER(X0, 16);
wolfSSL 13:f67a6c6013ca 1086 MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1087 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
wolfSSL 13:f67a6c6013ca 1088 SET_W_K_XFER(X1, 20);
wolfSSL 13:f67a6c6013ca 1089 MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1090 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20);
wolfSSL 13:f67a6c6013ca 1091 SET_W_K_XFER(X2, 24);
wolfSSL 13:f67a6c6013ca 1092 MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1093 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
wolfSSL 13:f67a6c6013ca 1094 SET_W_K_XFER(X3, 28);
wolfSSL 13:f67a6c6013ca 1095 MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1096 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28);
wolfSSL 13:f67a6c6013ca 1097 SET_W_K_XFER(X0, 32);
wolfSSL 13:f67a6c6013ca 1098 MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1099 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
wolfSSL 13:f67a6c6013ca 1100 SET_W_K_XFER(X1, 36);
wolfSSL 13:f67a6c6013ca 1101 MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1102 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36);
wolfSSL 13:f67a6c6013ca 1103 SET_W_K_XFER(X2, 40);
wolfSSL 13:f67a6c6013ca 1104 MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1105 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
wolfSSL 13:f67a6c6013ca 1106 SET_W_K_XFER(X3, 44);
wolfSSL 13:f67a6c6013ca 1107 MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 13:f67a6c6013ca 1108 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44);
wolfSSL 13:f67a6c6013ca 1109
wolfSSL 13:f67a6c6013ca 1110 SET_W_K_XFER(X0, 48);
wolfSSL 13:f67a6c6013ca 1111 SET_W_K_XFER(X1, 52);
wolfSSL 13:f67a6c6013ca 1112 SET_W_K_XFER(X2, 56);
wolfSSL 13:f67a6c6013ca 1113 SET_W_K_XFER(X3, 60);
wolfSSL 13:f67a6c6013ca 1114
wolfSSL 13:f67a6c6013ca 1115 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
wolfSSL 13:f67a6c6013ca 1116 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
wolfSSL 13:f67a6c6013ca 1117 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
wolfSSL 13:f67a6c6013ca 1118 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
wolfSSL 13:f67a6c6013ca 1119
wolfSSL 13:f67a6c6013ca 1120 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
wolfSSL 13:f67a6c6013ca 1121 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
wolfSSL 13:f67a6c6013ca 1122 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
wolfSSL 13:f67a6c6013ca 1123 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
wolfSSL 13:f67a6c6013ca 1124
wolfSSL 13:f67a6c6013ca 1125 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
wolfSSL 13:f67a6c6013ca 1126 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
wolfSSL 13:f67a6c6013ca 1127 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
wolfSSL 13:f67a6c6013ca 1128 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
wolfSSL 13:f67a6c6013ca 1129
wolfSSL 13:f67a6c6013ca 1130 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
wolfSSL 13:f67a6c6013ca 1131 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
wolfSSL 13:f67a6c6013ca 1132 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
wolfSSL 13:f67a6c6013ca 1133 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
wolfSSL 13:f67a6c6013ca 1134
wolfSSL 13:f67a6c6013ca 1135 RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
wolfSSL 13:f67a6c6013ca 1136
wolfSSL 13:f67a6c6013ca 1137 return 0;
wolfSSL 13:f67a6c6013ca 1138 }
wolfSSL 13:f67a6c6013ca 1139
wolfSSL 13:f67a6c6013ca 1140 #if defined(HAVE_INTEL_RORX)
wolfSSL 13:f67a6c6013ca 1141 static int Transform_AVX1_RORX(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 1142 {
wolfSSL 13:f67a6c6013ca 1143 ALIGN32 word32 W_K[64]; /* temp for W+K */
wolfSSL 13:f67a6c6013ca 1144
wolfSSL 13:f67a6c6013ca 1145 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00);
wolfSSL 13:f67a6c6013ca 1146 W_K_from_buff; /* X0, X1, X2, X3 = W[0..15]; */
wolfSSL 13:f67a6c6013ca 1147
wolfSSL 13:f67a6c6013ca 1148 DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
wolfSSL 13:f67a6c6013ca 1149 SET_W_K_XFER(X0, 0);
wolfSSL 13:f67a6c6013ca 1150 MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1151 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
wolfSSL 13:f67a6c6013ca 1152 SET_W_K_XFER(X1, 4);
wolfSSL 13:f67a6c6013ca 1153 MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1154 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4);
wolfSSL 13:f67a6c6013ca 1155 SET_W_K_XFER(X2, 8);
wolfSSL 13:f67a6c6013ca 1156 MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1157 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
wolfSSL 13:f67a6c6013ca 1158 SET_W_K_XFER(X3, 12);
wolfSSL 13:f67a6c6013ca 1159 MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1160 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12);
wolfSSL 13:f67a6c6013ca 1161 SET_W_K_XFER(X0, 16);
wolfSSL 13:f67a6c6013ca 1162 MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1163 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
wolfSSL 13:f67a6c6013ca 1164 SET_W_K_XFER(X1, 20);
wolfSSL 13:f67a6c6013ca 1165 MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1166 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20);
wolfSSL 13:f67a6c6013ca 1167 SET_W_K_XFER(X2, 24);
wolfSSL 13:f67a6c6013ca 1168 MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1169 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
wolfSSL 13:f67a6c6013ca 1170 SET_W_K_XFER(X3, 28);
wolfSSL 13:f67a6c6013ca 1171 MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1172 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28);
wolfSSL 13:f67a6c6013ca 1173 SET_W_K_XFER(X0, 32);
wolfSSL 13:f67a6c6013ca 1174 MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1175 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
wolfSSL 13:f67a6c6013ca 1176 SET_W_K_XFER(X1, 36);
wolfSSL 13:f67a6c6013ca 1177 MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1178 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36);
wolfSSL 13:f67a6c6013ca 1179 SET_W_K_XFER(X2, 40);
wolfSSL 13:f67a6c6013ca 1180 MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1181 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
wolfSSL 13:f67a6c6013ca 1182 SET_W_K_XFER(X3, 44);
wolfSSL 13:f67a6c6013ca 1183 MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 13:f67a6c6013ca 1184 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44);
wolfSSL 13:f67a6c6013ca 1185
wolfSSL 13:f67a6c6013ca 1186 SET_W_K_XFER(X0, 48);
wolfSSL 13:f67a6c6013ca 1187 SET_W_K_XFER(X1, 52);
wolfSSL 13:f67a6c6013ca 1188 SET_W_K_XFER(X2, 56);
wolfSSL 13:f67a6c6013ca 1189 SET_W_K_XFER(X3, 60);
wolfSSL 13:f67a6c6013ca 1190
wolfSSL 13:f67a6c6013ca 1191 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
wolfSSL 13:f67a6c6013ca 1192 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
wolfSSL 13:f67a6c6013ca 1193 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
wolfSSL 13:f67a6c6013ca 1194 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
wolfSSL 13:f67a6c6013ca 1195
wolfSSL 13:f67a6c6013ca 1196 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
wolfSSL 13:f67a6c6013ca 1197 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
wolfSSL 13:f67a6c6013ca 1198 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
wolfSSL 13:f67a6c6013ca 1199 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
wolfSSL 13:f67a6c6013ca 1200
wolfSSL 13:f67a6c6013ca 1201 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
wolfSSL 13:f67a6c6013ca 1202 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
wolfSSL 13:f67a6c6013ca 1203 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
wolfSSL 13:f67a6c6013ca 1204 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
wolfSSL 13:f67a6c6013ca 1205
wolfSSL 13:f67a6c6013ca 1206 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
wolfSSL 13:f67a6c6013ca 1207 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
wolfSSL 13:f67a6c6013ca 1208 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
wolfSSL 13:f67a6c6013ca 1209 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
wolfSSL 13:f67a6c6013ca 1210
wolfSSL 13:f67a6c6013ca 1211 RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
wolfSSL 13:f67a6c6013ca 1212
wolfSSL 13:f67a6c6013ca 1213 return 0;
wolfSSL 13:f67a6c6013ca 1214 }
wolfSSL 13:f67a6c6013ca 1215 #endif /* HAVE_INTEL_RORX */
wolfSSL 13:f67a6c6013ca 1216 #endif /* HAVE_INTEL_AVX1 */
wolfSSL 13:f67a6c6013ca 1217
wolfSSL 13:f67a6c6013ca 1218
wolfSSL 13:f67a6c6013ca 1219 #if defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 1220
wolfSSL 13:f67a6c6013ca 1221 #define _MOVE_to_REG(ymm, mem) __asm__ volatile("vmovdqu %0, %%"#ymm" ":: "m"(mem):YMM_REGs);
wolfSSL 13:f67a6c6013ca 1222 #define _MOVE_to_MEM(mem, ymm) __asm__ volatile("vmovdqu %%"#ymm", %0" : "=m"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1223 #define _BYTE_SWAP(ymm, map) __asm__ volatile("vpshufb %0, %%"#ymm", %%"#ymm"\n\t"\
wolfSSL 13:f67a6c6013ca 1224 :: "m"(map):YMM_REGs);
wolfSSL 13:f67a6c6013ca 1225 #define _MOVE_128(ymm0, ymm1, ymm2, map) __asm__ volatile("vperm2i128 $"#map", %%"\
wolfSSL 13:f67a6c6013ca 1226 #ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1227 #define _MOVE_BYTE(ymm0, ymm1, map) __asm__ volatile("vpshufb %0, %%"#ymm1", %%"\
wolfSSL 13:f67a6c6013ca 1228 #ymm0"\n\t":: "m"(map):YMM_REGs);
wolfSSL 13:f67a6c6013ca 1229 #define _S_TEMP(dest, src, bits, temp) __asm__ volatile("vpsrld $"#bits", %%"\
wolfSSL 13:f67a6c6013ca 1230 #src", %%"#dest"\n\tvpslld $32-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
wolfSSL 13:f67a6c6013ca 1231 #temp",%%"#dest", %%"#dest" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1232 #define _AVX2_R(dest, src, bits) __asm__ volatile("vpsrld $"#bits", %%"\
wolfSSL 13:f67a6c6013ca 1233 #src", %%"#dest" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1234 #define _XOR(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\
wolfSSL 13:f67a6c6013ca 1235 #src2", %%"#dest" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1236 #define _OR(dest, src1, src2) __asm__ volatile("vpor %%"#src1", %%"\
wolfSSL 13:f67a6c6013ca 1237 #src2", %%"#dest" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1238 #define _ADD(dest, src1, src2) __asm__ volatile("vpaddd %%"#src1", %%"\
wolfSSL 13:f67a6c6013ca 1239 #src2", %%"#dest" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1240 #define _ADD_MEM(dest, src1, mem) __asm__ volatile("vpaddd %0, %%"#src1", %%"\
wolfSSL 13:f67a6c6013ca 1241 #dest" "::"m"(mem):YMM_REGs);
wolfSSL 13:f67a6c6013ca 1242 #define _BLEND(map, dest, src1, src2) __asm__ volatile("vpblendd $"#map", %%"\
wolfSSL 13:f67a6c6013ca 1243 #src1", %%"#src2", %%"#dest" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1244
wolfSSL 13:f67a6c6013ca 1245 #define _EXTRACT_XMM_0(xmm, mem) __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1246 #define _EXTRACT_XMM_1(xmm, mem) __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1247 #define _EXTRACT_XMM_2(xmm, mem) __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1248 #define _EXTRACT_XMM_3(xmm, mem) __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1249 #define _EXTRACT_XMM_4(ymm, xmm, mem)\
wolfSSL 13:f67a6c6013ca 1250 __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1251 __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1252 #define _EXTRACT_XMM_5(xmm, mem) __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1253 #define _EXTRACT_XMM_6(xmm, mem) __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1254 #define _EXTRACT_XMM_7(xmm, mem) __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1255
wolfSSL 13:f67a6c6013ca 1256 #define _SWAP_YMM_HL(ymm) __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs);
wolfSSL 13:f67a6c6013ca 1257 #define SWAP_YMM_HL(ymm) _SWAP_YMM_HL(ymm)
wolfSSL 13:f67a6c6013ca 1258
wolfSSL 13:f67a6c6013ca 1259 #define MOVE_to_REG(ymm, mem) _MOVE_to_REG(ymm, mem)
wolfSSL 13:f67a6c6013ca 1260 #define MOVE_to_MEM(mem, ymm) _MOVE_to_MEM(mem, ymm)
wolfSSL 13:f67a6c6013ca 1261 #define BYTE_SWAP(ymm, map) _BYTE_SWAP(ymm, map)
wolfSSL 13:f67a6c6013ca 1262 #define MOVE_128(ymm0, ymm1, ymm2, map) _MOVE_128(ymm0, ymm1, ymm2, map)
wolfSSL 13:f67a6c6013ca 1263 #define MOVE_BYTE(ymm0, ymm1, map) _MOVE_BYTE(ymm0, ymm1, map)
wolfSSL 13:f67a6c6013ca 1264 #define XOR(dest, src1, src2) _XOR(dest, src1, src2)
wolfSSL 13:f67a6c6013ca 1265 #define OR(dest, src1, src2) _OR(dest, src1, src2)
wolfSSL 13:f67a6c6013ca 1266 #define ADD(dest, src1, src2) _ADD(dest, src1, src2)
wolfSSL 13:f67a6c6013ca 1267 #define ADD_MEM(dest, src1, mem) _ADD_MEM(dest, src1, mem)
wolfSSL 13:f67a6c6013ca 1268 #define BLEND(map, dest, src1, src2) _BLEND(map, dest, src1, src2)
wolfSSL 13:f67a6c6013ca 1269
wolfSSL 13:f67a6c6013ca 1270 #define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp);
wolfSSL 13:f67a6c6013ca 1271 #define AVX2_S(dest, src, bits) S_TMP(dest, src, bits, S_TEMP)
wolfSSL 13:f67a6c6013ca 1272 #define AVX2_R(dest, src, bits) _AVX2_R(dest, src, bits)
wolfSSL 13:f67a6c6013ca 1273
wolfSSL 13:f67a6c6013ca 1274 #define GAMMA0(dest, src) AVX2_S(dest, src, 7); AVX2_S(G_TEMP, src, 18); \
wolfSSL 13:f67a6c6013ca 1275 XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 3); XOR(dest, G_TEMP, dest);
wolfSSL 13:f67a6c6013ca 1276 #define GAMMA0_1(dest, src) AVX2_S(dest, src, 7); AVX2_S(G_TEMP, src, 18);
wolfSSL 13:f67a6c6013ca 1277 #define GAMMA0_2(dest, src) XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 3); \
wolfSSL 13:f67a6c6013ca 1278 XOR(dest, G_TEMP, dest);
wolfSSL 13:f67a6c6013ca 1279
wolfSSL 13:f67a6c6013ca 1280 #define GAMMA1(dest, src) AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19); \
wolfSSL 13:f67a6c6013ca 1281 XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 10); XOR(dest, G_TEMP, dest);
wolfSSL 13:f67a6c6013ca 1282 #define GAMMA1_1(dest, src) AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19);
wolfSSL 13:f67a6c6013ca 1283 #define GAMMA1_2(dest, src) XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 10); \
wolfSSL 13:f67a6c6013ca 1284 XOR(dest, G_TEMP, dest);
wolfSSL 13:f67a6c6013ca 1285
wolfSSL 13:f67a6c6013ca 1286 #define FEEDBACK1_to_W_I_2 MOVE_BYTE(YMM_TEMP0, W_I, mMAP1toW_I_2[0]); \
wolfSSL 13:f67a6c6013ca 1287 BLEND(0x0c, W_I_2, YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1288 #define FEEDBACK2_to_W_I_2 MOVE_128(YMM_TEMP0, W_I, W_I, 0x08); \
wolfSSL 13:f67a6c6013ca 1289 MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAP2toW_I_2[0]); BLEND(0x30, W_I_2, YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1290 #define FEEDBACK3_to_W_I_2 MOVE_BYTE(YMM_TEMP0, W_I, mMAP3toW_I_2[0]); \
wolfSSL 13:f67a6c6013ca 1291 BLEND(0xc0, W_I_2, YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1292
wolfSSL 13:f67a6c6013ca 1293 #define FEEDBACK_to_W_I_7 MOVE_128(YMM_TEMP0, W_I, W_I, 0x08);\
wolfSSL 13:f67a6c6013ca 1294 MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAPtoW_I_7[0]); BLEND(0x80, W_I_7, YMM_TEMP0, W_I_7);
wolfSSL 13:f67a6c6013ca 1295
wolfSSL 13:f67a6c6013ca 1296 #undef voitle
wolfSSL 13:f67a6c6013ca 1297
wolfSSL 13:f67a6c6013ca 1298 #define W_I_16 ymm8
wolfSSL 13:f67a6c6013ca 1299 #define W_I_15 ymm9
wolfSSL 13:f67a6c6013ca 1300 #define W_I_7 ymm10
wolfSSL 13:f67a6c6013ca 1301 #define W_I_2 ymm11
wolfSSL 13:f67a6c6013ca 1302 #define W_I ymm12
wolfSSL 13:f67a6c6013ca 1303 #define G_TEMP ymm13
wolfSSL 13:f67a6c6013ca 1304 #define S_TEMP ymm14
wolfSSL 13:f67a6c6013ca 1305 #define YMM_TEMP0 ymm15
wolfSSL 13:f67a6c6013ca 1306 #define YMM_TEMP0x xmm15
wolfSSL 13:f67a6c6013ca 1307 #define W_I_TEMP ymm7
wolfSSL 13:f67a6c6013ca 1308 #define W_K_TEMP ymm15
wolfSSL 13:f67a6c6013ca 1309 #define W_K_TEMPx xmm15
wolfSSL 13:f67a6c6013ca 1310
wolfSSL 13:f67a6c6013ca 1311 #define YMM_REGs /* Registers are saved in Sha256Update/Finel */
wolfSSL 13:f67a6c6013ca 1312 /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/
wolfSSL 13:f67a6c6013ca 1313
wolfSSL 13:f67a6c6013ca 1314
wolfSSL 13:f67a6c6013ca 1315 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
wolfSSL 13:f67a6c6013ca 1316 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1317 __asm__ volatile("vpblendd $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1318 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_7", %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1319 __asm__ volatile("vpblendd $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1320 __asm__ volatile("vpshufd $0x93, %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1321
wolfSSL 13:f67a6c6013ca 1322 #define MOVE_7_to_15(w_i_15, w_i_7)\
wolfSSL 13:f67a6c6013ca 1323 __asm__ volatile("vmovdqu %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1324
wolfSSL 13:f67a6c6013ca 1325 #define MOVE_I_to_7(w_i_7, w_i)\
wolfSSL 13:f67a6c6013ca 1326 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1327 __asm__ volatile("vpblendd $0x01, %%"#w_i_7", %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1328 __asm__ volatile("vpshufd $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1329
wolfSSL 13:f67a6c6013ca 1330 #define MOVE_I_to_2(w_i_2, w_i)\
wolfSSL 13:f67a6c6013ca 1331 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1332 __asm__ volatile("vpshufd $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs);\
wolfSSL 13:f67a6c6013ca 1333
wolfSSL 13:f67a6c6013ca 1334 #define ROTATE_W(w_i_16, w_i_15, w_i_7, w_i_2, w_i)\
wolfSSL 13:f67a6c6013ca 1335 MOVE_15_to_16(w_i_16, w_i_15, w_i_7); \
wolfSSL 13:f67a6c6013ca 1336 MOVE_7_to_15(w_i_15, w_i_7); \
wolfSSL 13:f67a6c6013ca 1337 MOVE_I_to_7(w_i_7, w_i); \
wolfSSL 13:f67a6c6013ca 1338 MOVE_I_to_2(w_i_2, w_i);\
wolfSSL 13:f67a6c6013ca 1339
wolfSSL 13:f67a6c6013ca 1340 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 1341 { word32 d;\
wolfSSL 13:f67a6c6013ca 1342 __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1343 sha256->digest[0] += d;\
wolfSSL 13:f67a6c6013ca 1344 __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1345 sha256->digest[1] += d;\
wolfSSL 13:f67a6c6013ca 1346 __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1347 sha256->digest[2] += d;\
wolfSSL 13:f67a6c6013ca 1348 __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1349 sha256->digest[3] += d;\
wolfSSL 13:f67a6c6013ca 1350 __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1351 sha256->digest[4] += d;\
wolfSSL 13:f67a6c6013ca 1352 __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1353 sha256->digest[5] += d;\
wolfSSL 13:f67a6c6013ca 1354 __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1355 sha256->digest[6] += d;\
wolfSSL 13:f67a6c6013ca 1356 __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1357 sha256->digest[7] += d;\
wolfSSL 13:f67a6c6013ca 1358 }
wolfSSL 13:f67a6c6013ca 1359
wolfSSL 13:f67a6c6013ca 1360 #define _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 1361 { word32 d[8];\
wolfSSL 13:f67a6c6013ca 1362 __asm__ volatile("movl %"#S_0", %0":"=r"(d[0])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1363 __asm__ volatile("movl %"#S_1", %0":"=r"(d[1])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1364 __asm__ volatile("movl %"#S_2", %0":"=r"(d[2])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1365 __asm__ volatile("movl %"#S_3", %0":"=r"(d[3])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1366 __asm__ volatile("movl %"#S_4", %0":"=r"(d[4])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1367 __asm__ volatile("movl %"#S_5", %0":"=r"(d[5])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1368 __asm__ volatile("movl %"#S_6", %0":"=r"(d[6])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1369 __asm__ volatile("movl %"#S_7", %0":"=r"(d[7])::SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1370 printf("S[0..7]=%08x,%08x,%08x,%08x,%08x,%08x,%08x,%08x\n", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7]);\
wolfSSL 13:f67a6c6013ca 1371 __asm__ volatile("movl %0, %"#S_0::"r"(d[0]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1372 __asm__ volatile("movl %0, %"#S_1::"r"(d[1]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1373 __asm__ volatile("movl %0, %"#S_2::"r"(d[2]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1374 __asm__ volatile("movl %0, %"#S_3::"r"(d[3]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1375 __asm__ volatile("movl %0, %"#S_4::"r"(d[4]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1376 __asm__ volatile("movl %0, %"#S_5::"r"(d[5]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1377 __asm__ volatile("movl %0, %"#S_6::"r"(d[6]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1378 __asm__ volatile("movl %0, %"#S_7::"r"(d[7]):SSE_REGs);\
wolfSSL 13:f67a6c6013ca 1379 }
wolfSSL 13:f67a6c6013ca 1380
wolfSSL 13:f67a6c6013ca 1381
wolfSSL 13:f67a6c6013ca 1382 #define DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 1383 _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 13:f67a6c6013ca 1384
wolfSSL 13:f67a6c6013ca 1385 #define RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 1386 _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 13:f67a6c6013ca 1387
wolfSSL 13:f67a6c6013ca 1388 #define DumS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 13:f67a6c6013ca 1389 _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 13:f67a6c6013ca 1390
wolfSSL 13:f67a6c6013ca 1391
wolfSSL 13:f67a6c6013ca 1392 /* Byte swap Masks to ensure that rest of the words are filled with zero's. */
wolfSSL 13:f67a6c6013ca 1393 static const unsigned long mBYTE_FLIP_MASK_16[] =
wolfSSL 13:f67a6c6013ca 1394 { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b };
wolfSSL 13:f67a6c6013ca 1395 static const unsigned long mBYTE_FLIP_MASK_15[] =
wolfSSL 13:f67a6c6013ca 1396 { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b };
wolfSSL 13:f67a6c6013ca 1397 static const unsigned long mBYTE_FLIP_MASK_7 [] =
wolfSSL 13:f67a6c6013ca 1398 { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x8080808008090a0b };
wolfSSL 13:f67a6c6013ca 1399 static const unsigned long mBYTE_FLIP_MASK_2 [] =
wolfSSL 13:f67a6c6013ca 1400 { 0x0405060700010203, 0x8080808080808080, 0x8080808080808080, 0x8080808080808080 };
wolfSSL 13:f67a6c6013ca 1401
wolfSSL 13:f67a6c6013ca 1402 static const unsigned long mMAPtoW_I_7[] =
wolfSSL 13:f67a6c6013ca 1403 { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0302010080808080 };
wolfSSL 13:f67a6c6013ca 1404 static const unsigned long mMAP1toW_I_2[] =
wolfSSL 13:f67a6c6013ca 1405 { 0x8080808080808080, 0x0706050403020100, 0x8080808080808080, 0x8080808080808080 };
wolfSSL 13:f67a6c6013ca 1406 static const unsigned long mMAP2toW_I_2[] =
wolfSSL 13:f67a6c6013ca 1407 { 0x8080808080808080, 0x8080808080808080, 0x0f0e0d0c0b0a0908, 0x8080808080808080 };
wolfSSL 13:f67a6c6013ca 1408 static const unsigned long mMAP3toW_I_2[] =
wolfSSL 13:f67a6c6013ca 1409 { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0706050403020100 };
wolfSSL 13:f67a6c6013ca 1410
wolfSSL 13:f67a6c6013ca 1411 static int Transform_AVX2(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 1412 {
wolfSSL 13:f67a6c6013ca 1413 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 13:f67a6c6013ca 1414 word32* W_K;
wolfSSL 13:f67a6c6013ca 1415 W_K = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 13:f67a6c6013ca 1416 if (W_K == NULL)
wolfSSL 13:f67a6c6013ca 1417 return MEMORY_E;
wolfSSL 13:f67a6c6013ca 1418 #else
wolfSSL 13:f67a6c6013ca 1419 word32 W_K[64];
wolfSSL 13:f67a6c6013ca 1420 #endif
wolfSSL 13:f67a6c6013ca 1421
wolfSSL 13:f67a6c6013ca 1422 MOVE_to_REG(W_I_16, sha256->buffer[0]); BYTE_SWAP(W_I_16, mBYTE_FLIP_MASK_16[0]);
wolfSSL 13:f67a6c6013ca 1423 MOVE_to_REG(W_I_15, sha256->buffer[1]); BYTE_SWAP(W_I_15, mBYTE_FLIP_MASK_15[0]);
wolfSSL 13:f67a6c6013ca 1424 MOVE_to_REG(W_I, sha256->buffer[8]); BYTE_SWAP(W_I, mBYTE_FLIP_MASK_16[0]);
wolfSSL 13:f67a6c6013ca 1425 MOVE_to_REG(W_I_7, sha256->buffer[16-7]); BYTE_SWAP(W_I_7, mBYTE_FLIP_MASK_7[0]);
wolfSSL 13:f67a6c6013ca 1426 MOVE_to_REG(W_I_2, sha256->buffer[16-2]); BYTE_SWAP(W_I_2, mBYTE_FLIP_MASK_2[0]);
wolfSSL 13:f67a6c6013ca 1427
wolfSSL 13:f67a6c6013ca 1428 DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
wolfSSL 13:f67a6c6013ca 1429
wolfSSL 13:f67a6c6013ca 1430 ADD_MEM(W_K_TEMP, W_I_16, K[0]);
wolfSSL 13:f67a6c6013ca 1431 MOVE_to_MEM(W_K[0], W_K_TEMP);
wolfSSL 13:f67a6c6013ca 1432
wolfSSL 13:f67a6c6013ca 1433 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
wolfSSL 13:f67a6c6013ca 1434 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,1);
wolfSSL 13:f67a6c6013ca 1435 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,2);
wolfSSL 13:f67a6c6013ca 1436 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,3);
wolfSSL 13:f67a6c6013ca 1437 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,4);
wolfSSL 13:f67a6c6013ca 1438 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,5);
wolfSSL 13:f67a6c6013ca 1439 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,6);
wolfSSL 13:f67a6c6013ca 1440 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,7);
wolfSSL 13:f67a6c6013ca 1441
wolfSSL 13:f67a6c6013ca 1442 ADD_MEM(YMM_TEMP0, W_I, K[8]);
wolfSSL 13:f67a6c6013ca 1443 MOVE_to_MEM(W_K[8], YMM_TEMP0);
wolfSSL 13:f67a6c6013ca 1444
wolfSSL 13:f67a6c6013ca 1445 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 13:f67a6c6013ca 1446 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
wolfSSL 13:f67a6c6013ca 1447 GAMMA0_1(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1448 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
wolfSSL 13:f67a6c6013ca 1449 GAMMA0_2(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1450 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
wolfSSL 13:f67a6c6013ca 1451 ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 13:f67a6c6013ca 1452 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
wolfSSL 13:f67a6c6013ca 1453 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1454 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
wolfSSL 13:f67a6c6013ca 1455 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1456 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
wolfSSL 13:f67a6c6013ca 1457 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1458 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
wolfSSL 13:f67a6c6013ca 1459 ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
wolfSSL 13:f67a6c6013ca 1460 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
wolfSSL 13:f67a6c6013ca 1461 FEEDBACK1_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1462 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
wolfSSL 13:f67a6c6013ca 1463 FEEDBACK_to_W_I_7;
wolfSSL 13:f67a6c6013ca 1464 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
wolfSSL 13:f67a6c6013ca 1465 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1466 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
wolfSSL 13:f67a6c6013ca 1467 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1468 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
wolfSSL 13:f67a6c6013ca 1469 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1470 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
wolfSSL 13:f67a6c6013ca 1471 ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
wolfSSL 13:f67a6c6013ca 1472 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
wolfSSL 13:f67a6c6013ca 1473 FEEDBACK2_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1474 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
wolfSSL 13:f67a6c6013ca 1475 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1476 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
wolfSSL 13:f67a6c6013ca 1477 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1478 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
wolfSSL 13:f67a6c6013ca 1479 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
wolfSSL 13:f67a6c6013ca 1480 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
wolfSSL 13:f67a6c6013ca 1481 FEEDBACK3_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1482 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
wolfSSL 13:f67a6c6013ca 1483 GAMMA1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1484 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
wolfSSL 13:f67a6c6013ca 1485 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
wolfSSL 13:f67a6c6013ca 1486 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
wolfSSL 13:f67a6c6013ca 1487 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
wolfSSL 13:f67a6c6013ca 1488
wolfSSL 13:f67a6c6013ca 1489 MOVE_to_REG(YMM_TEMP0, K[16]);
wolfSSL 13:f67a6c6013ca 1490 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
wolfSSL 13:f67a6c6013ca 1491 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
wolfSSL 13:f67a6c6013ca 1492 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
wolfSSL 13:f67a6c6013ca 1493 ADD(YMM_TEMP0, YMM_TEMP0, W_I);
wolfSSL 13:f67a6c6013ca 1494 MOVE_to_MEM(W_K[16], YMM_TEMP0);
wolfSSL 13:f67a6c6013ca 1495
wolfSSL 13:f67a6c6013ca 1496 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 13:f67a6c6013ca 1497 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
wolfSSL 13:f67a6c6013ca 1498 GAMMA0_1(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1499 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
wolfSSL 13:f67a6c6013ca 1500 GAMMA0_2(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1501 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
wolfSSL 13:f67a6c6013ca 1502 ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 13:f67a6c6013ca 1503 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
wolfSSL 13:f67a6c6013ca 1504 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1505 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
wolfSSL 13:f67a6c6013ca 1506 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1507 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
wolfSSL 13:f67a6c6013ca 1508 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1509 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
wolfSSL 13:f67a6c6013ca 1510 ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
wolfSSL 13:f67a6c6013ca 1511 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
wolfSSL 13:f67a6c6013ca 1512 FEEDBACK1_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1513 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
wolfSSL 13:f67a6c6013ca 1514 FEEDBACK_to_W_I_7;
wolfSSL 13:f67a6c6013ca 1515 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
wolfSSL 13:f67a6c6013ca 1516 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1517 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
wolfSSL 13:f67a6c6013ca 1518 GAMMA1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1519 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
wolfSSL 13:f67a6c6013ca 1520 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1521 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
wolfSSL 13:f67a6c6013ca 1522 ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
wolfSSL 13:f67a6c6013ca 1523 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
wolfSSL 13:f67a6c6013ca 1524 FEEDBACK2_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1525 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
wolfSSL 13:f67a6c6013ca 1526 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1527 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
wolfSSL 13:f67a6c6013ca 1528 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1529 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
wolfSSL 13:f67a6c6013ca 1530 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
wolfSSL 13:f67a6c6013ca 1531 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
wolfSSL 13:f67a6c6013ca 1532 FEEDBACK3_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1533 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
wolfSSL 13:f67a6c6013ca 1534 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1535 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
wolfSSL 13:f67a6c6013ca 1536 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1537 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
wolfSSL 13:f67a6c6013ca 1538 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
wolfSSL 13:f67a6c6013ca 1539 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
wolfSSL 13:f67a6c6013ca 1540
wolfSSL 13:f67a6c6013ca 1541 MOVE_to_REG(YMM_TEMP0, K[24]);
wolfSSL 13:f67a6c6013ca 1542 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
wolfSSL 13:f67a6c6013ca 1543 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
wolfSSL 13:f67a6c6013ca 1544 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
wolfSSL 13:f67a6c6013ca 1545 ADD(YMM_TEMP0, YMM_TEMP0, W_I);
wolfSSL 13:f67a6c6013ca 1546 MOVE_to_MEM(W_K[24], YMM_TEMP0);
wolfSSL 13:f67a6c6013ca 1547
wolfSSL 13:f67a6c6013ca 1548 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 13:f67a6c6013ca 1549 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
wolfSSL 13:f67a6c6013ca 1550 GAMMA0_1(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1551 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
wolfSSL 13:f67a6c6013ca 1552 GAMMA0_2(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1553 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
wolfSSL 13:f67a6c6013ca 1554 ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 13:f67a6c6013ca 1555 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
wolfSSL 13:f67a6c6013ca 1556 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1557 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
wolfSSL 13:f67a6c6013ca 1558 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1559 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
wolfSSL 13:f67a6c6013ca 1560 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1561 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
wolfSSL 13:f67a6c6013ca 1562 ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
wolfSSL 13:f67a6c6013ca 1563 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
wolfSSL 13:f67a6c6013ca 1564 FEEDBACK1_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1565 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
wolfSSL 13:f67a6c6013ca 1566 FEEDBACK_to_W_I_7;
wolfSSL 13:f67a6c6013ca 1567 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
wolfSSL 13:f67a6c6013ca 1568 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1569 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
wolfSSL 13:f67a6c6013ca 1570 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1571 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
wolfSSL 13:f67a6c6013ca 1572 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1573 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
wolfSSL 13:f67a6c6013ca 1574 ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
wolfSSL 13:f67a6c6013ca 1575 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
wolfSSL 13:f67a6c6013ca 1576 FEEDBACK2_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1577 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
wolfSSL 13:f67a6c6013ca 1578 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1579 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
wolfSSL 13:f67a6c6013ca 1580 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1581 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
wolfSSL 13:f67a6c6013ca 1582 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
wolfSSL 13:f67a6c6013ca 1583 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
wolfSSL 13:f67a6c6013ca 1584 FEEDBACK3_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1585 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
wolfSSL 13:f67a6c6013ca 1586 GAMMA1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1587 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
wolfSSL 13:f67a6c6013ca 1588 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
wolfSSL 13:f67a6c6013ca 1589 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
wolfSSL 13:f67a6c6013ca 1590 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
wolfSSL 13:f67a6c6013ca 1591
wolfSSL 13:f67a6c6013ca 1592 MOVE_to_REG(YMM_TEMP0, K[32]);
wolfSSL 13:f67a6c6013ca 1593 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
wolfSSL 13:f67a6c6013ca 1594 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
wolfSSL 13:f67a6c6013ca 1595 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
wolfSSL 13:f67a6c6013ca 1596 ADD(YMM_TEMP0, YMM_TEMP0, W_I);
wolfSSL 13:f67a6c6013ca 1597 MOVE_to_MEM(W_K[32], YMM_TEMP0);
wolfSSL 13:f67a6c6013ca 1598
wolfSSL 13:f67a6c6013ca 1599
wolfSSL 13:f67a6c6013ca 1600 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 13:f67a6c6013ca 1601 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
wolfSSL 13:f67a6c6013ca 1602 GAMMA0_1(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1603 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
wolfSSL 13:f67a6c6013ca 1604 GAMMA0_2(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1605 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
wolfSSL 13:f67a6c6013ca 1606 ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 13:f67a6c6013ca 1607 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
wolfSSL 13:f67a6c6013ca 1608 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1609 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
wolfSSL 13:f67a6c6013ca 1610 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1611 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
wolfSSL 13:f67a6c6013ca 1612 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1613 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
wolfSSL 13:f67a6c6013ca 1614 ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
wolfSSL 13:f67a6c6013ca 1615 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
wolfSSL 13:f67a6c6013ca 1616 FEEDBACK1_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1617 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
wolfSSL 13:f67a6c6013ca 1618 FEEDBACK_to_W_I_7;
wolfSSL 13:f67a6c6013ca 1619 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
wolfSSL 13:f67a6c6013ca 1620 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1621 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
wolfSSL 13:f67a6c6013ca 1622 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1623 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
wolfSSL 13:f67a6c6013ca 1624 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1625 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
wolfSSL 13:f67a6c6013ca 1626 ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
wolfSSL 13:f67a6c6013ca 1627 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
wolfSSL 13:f67a6c6013ca 1628 FEEDBACK2_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1629 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
wolfSSL 13:f67a6c6013ca 1630 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1631 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
wolfSSL 13:f67a6c6013ca 1632 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1633 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
wolfSSL 13:f67a6c6013ca 1634 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
wolfSSL 13:f67a6c6013ca 1635 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
wolfSSL 13:f67a6c6013ca 1636 FEEDBACK3_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1637 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
wolfSSL 13:f67a6c6013ca 1638 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1639 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
wolfSSL 13:f67a6c6013ca 1640 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1641 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
wolfSSL 13:f67a6c6013ca 1642 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
wolfSSL 13:f67a6c6013ca 1643 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
wolfSSL 13:f67a6c6013ca 1644
wolfSSL 13:f67a6c6013ca 1645 MOVE_to_REG(YMM_TEMP0, K[40]);
wolfSSL 13:f67a6c6013ca 1646 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
wolfSSL 13:f67a6c6013ca 1647 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
wolfSSL 13:f67a6c6013ca 1648 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
wolfSSL 13:f67a6c6013ca 1649 ADD(YMM_TEMP0, YMM_TEMP0, W_I);
wolfSSL 13:f67a6c6013ca 1650 MOVE_to_MEM(W_K[40], YMM_TEMP0);
wolfSSL 13:f67a6c6013ca 1651
wolfSSL 13:f67a6c6013ca 1652 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 13:f67a6c6013ca 1653 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
wolfSSL 13:f67a6c6013ca 1654 GAMMA0_1(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1655 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
wolfSSL 13:f67a6c6013ca 1656 GAMMA0_2(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1657 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
wolfSSL 13:f67a6c6013ca 1658 ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 13:f67a6c6013ca 1659 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
wolfSSL 13:f67a6c6013ca 1660 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1661 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
wolfSSL 13:f67a6c6013ca 1662 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1663 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
wolfSSL 13:f67a6c6013ca 1664 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1665 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
wolfSSL 13:f67a6c6013ca 1666 ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
wolfSSL 13:f67a6c6013ca 1667 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
wolfSSL 13:f67a6c6013ca 1668 FEEDBACK1_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1669 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
wolfSSL 13:f67a6c6013ca 1670 FEEDBACK_to_W_I_7;
wolfSSL 13:f67a6c6013ca 1671 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
wolfSSL 13:f67a6c6013ca 1672 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1673 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
wolfSSL 13:f67a6c6013ca 1674 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1675 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
wolfSSL 13:f67a6c6013ca 1676 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1677 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
wolfSSL 13:f67a6c6013ca 1678 ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
wolfSSL 13:f67a6c6013ca 1679 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
wolfSSL 13:f67a6c6013ca 1680 FEEDBACK2_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1681 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
wolfSSL 13:f67a6c6013ca 1682 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1683 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
wolfSSL 13:f67a6c6013ca 1684 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1685 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
wolfSSL 13:f67a6c6013ca 1686 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
wolfSSL 13:f67a6c6013ca 1687 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
wolfSSL 13:f67a6c6013ca 1688 FEEDBACK3_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1689 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
wolfSSL 13:f67a6c6013ca 1690 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1691 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
wolfSSL 13:f67a6c6013ca 1692 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1693 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
wolfSSL 13:f67a6c6013ca 1694 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
wolfSSL 13:f67a6c6013ca 1695 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
wolfSSL 13:f67a6c6013ca 1696
wolfSSL 13:f67a6c6013ca 1697 MOVE_to_REG(YMM_TEMP0, K[48]);
wolfSSL 13:f67a6c6013ca 1698 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
wolfSSL 13:f67a6c6013ca 1699 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
wolfSSL 13:f67a6c6013ca 1700 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
wolfSSL 13:f67a6c6013ca 1701 ADD(YMM_TEMP0, YMM_TEMP0, W_I);
wolfSSL 13:f67a6c6013ca 1702 MOVE_to_MEM(W_K[48], YMM_TEMP0);
wolfSSL 13:f67a6c6013ca 1703
wolfSSL 13:f67a6c6013ca 1704 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 13:f67a6c6013ca 1705 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
wolfSSL 13:f67a6c6013ca 1706 GAMMA0_1(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1707 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
wolfSSL 13:f67a6c6013ca 1708 GAMMA0_2(W_I_TEMP, W_I_15);
wolfSSL 13:f67a6c6013ca 1709 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
wolfSSL 13:f67a6c6013ca 1710 ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 13:f67a6c6013ca 1711 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
wolfSSL 13:f67a6c6013ca 1712 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1713 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
wolfSSL 13:f67a6c6013ca 1714 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1715 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
wolfSSL 13:f67a6c6013ca 1716 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1717 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
wolfSSL 13:f67a6c6013ca 1718 ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
wolfSSL 13:f67a6c6013ca 1719 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
wolfSSL 13:f67a6c6013ca 1720 FEEDBACK1_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1721 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
wolfSSL 13:f67a6c6013ca 1722 FEEDBACK_to_W_I_7;
wolfSSL 13:f67a6c6013ca 1723 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
wolfSSL 13:f67a6c6013ca 1724 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 13:f67a6c6013ca 1725 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
wolfSSL 13:f67a6c6013ca 1726 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1727 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
wolfSSL 13:f67a6c6013ca 1728 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1729 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
wolfSSL 13:f67a6c6013ca 1730 ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
wolfSSL 13:f67a6c6013ca 1731 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
wolfSSL 13:f67a6c6013ca 1732 FEEDBACK2_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1733 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
wolfSSL 13:f67a6c6013ca 1734 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1735 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
wolfSSL 13:f67a6c6013ca 1736 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1737 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
wolfSSL 13:f67a6c6013ca 1738 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
wolfSSL 13:f67a6c6013ca 1739 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
wolfSSL 13:f67a6c6013ca 1740 FEEDBACK3_to_W_I_2;
wolfSSL 13:f67a6c6013ca 1741 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
wolfSSL 13:f67a6c6013ca 1742 GAMMA1_1(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1743 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
wolfSSL 13:f67a6c6013ca 1744 GAMMA1_2(YMM_TEMP0, W_I_2);
wolfSSL 13:f67a6c6013ca 1745 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
wolfSSL 13:f67a6c6013ca 1746 ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
wolfSSL 13:f67a6c6013ca 1747 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
wolfSSL 13:f67a6c6013ca 1748
wolfSSL 13:f67a6c6013ca 1749 MOVE_to_REG(YMM_TEMP0, K[56]);
wolfSSL 13:f67a6c6013ca 1750 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
wolfSSL 13:f67a6c6013ca 1751 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
wolfSSL 13:f67a6c6013ca 1752 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
wolfSSL 13:f67a6c6013ca 1753 ADD(YMM_TEMP0, YMM_TEMP0, W_I);
wolfSSL 13:f67a6c6013ca 1754 MOVE_to_MEM(W_K[56], YMM_TEMP0);
wolfSSL 13:f67a6c6013ca 1755
wolfSSL 13:f67a6c6013ca 1756 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
wolfSSL 13:f67a6c6013ca 1757 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
wolfSSL 13:f67a6c6013ca 1758 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
wolfSSL 13:f67a6c6013ca 1759 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
wolfSSL 13:f67a6c6013ca 1760
wolfSSL 13:f67a6c6013ca 1761 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
wolfSSL 13:f67a6c6013ca 1762 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
wolfSSL 13:f67a6c6013ca 1763 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
wolfSSL 13:f67a6c6013ca 1764 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
wolfSSL 13:f67a6c6013ca 1765
wolfSSL 13:f67a6c6013ca 1766 RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
wolfSSL 13:f67a6c6013ca 1767
wolfSSL 13:f67a6c6013ca 1768 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 13:f67a6c6013ca 1769 XFREE(W_K, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 13:f67a6c6013ca 1770 #endif
wolfSSL 13:f67a6c6013ca 1771
wolfSSL 13:f67a6c6013ca 1772 return 0;
wolfSSL 13:f67a6c6013ca 1773 }
wolfSSL 13:f67a6c6013ca 1774
wolfSSL 13:f67a6c6013ca 1775 #endif /* HAVE_INTEL_AVX2 */
wolfSSL 13:f67a6c6013ca 1776
wolfSSL 13:f67a6c6013ca 1777
wolfSSL 13:f67a6c6013ca 1778 #ifdef WOLFSSL_SHA224
wolfSSL 13:f67a6c6013ca 1779 static int InitSha224(Sha224* sha224)
wolfSSL 13:f67a6c6013ca 1780 {
wolfSSL 13:f67a6c6013ca 1781
wolfSSL 13:f67a6c6013ca 1782 int ret = 0;
wolfSSL 13:f67a6c6013ca 1783
wolfSSL 13:f67a6c6013ca 1784 if (sha224 == NULL) {
wolfSSL 13:f67a6c6013ca 1785 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1786 }
wolfSSL 13:f67a6c6013ca 1787
wolfSSL 13:f67a6c6013ca 1788 sha224->digest[0] = 0xc1059ed8;
wolfSSL 13:f67a6c6013ca 1789 sha224->digest[1] = 0x367cd507;
wolfSSL 13:f67a6c6013ca 1790 sha224->digest[2] = 0x3070dd17;
wolfSSL 13:f67a6c6013ca 1791 sha224->digest[3] = 0xf70e5939;
wolfSSL 13:f67a6c6013ca 1792 sha224->digest[4] = 0xffc00b31;
wolfSSL 13:f67a6c6013ca 1793 sha224->digest[5] = 0x68581511;
wolfSSL 13:f67a6c6013ca 1794 sha224->digest[6] = 0x64f98fa7;
wolfSSL 13:f67a6c6013ca 1795 sha224->digest[7] = 0xbefa4fa4;
wolfSSL 13:f67a6c6013ca 1796
wolfSSL 13:f67a6c6013ca 1797 sha224->buffLen = 0;
wolfSSL 13:f67a6c6013ca 1798 sha224->loLen = 0;
wolfSSL 13:f67a6c6013ca 1799 sha224->hiLen = 0;
wolfSSL 13:f67a6c6013ca 1800
wolfSSL 13:f67a6c6013ca 1801 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 13:f67a6c6013ca 1802 /* choose best Transform function under this runtime environment */
wolfSSL 13:f67a6c6013ca 1803 Sha256_SetTransform();
wolfSSL 13:f67a6c6013ca 1804 #endif
wolfSSL 13:f67a6c6013ca 1805
wolfSSL 13:f67a6c6013ca 1806 return ret;
wolfSSL 13:f67a6c6013ca 1807 }
wolfSSL 13:f67a6c6013ca 1808
wolfSSL 13:f67a6c6013ca 1809 int wc_InitSha224_ex(Sha224* sha224, void* heap, int devId)
wolfSSL 13:f67a6c6013ca 1810 {
wolfSSL 13:f67a6c6013ca 1811 int ret = 0;
wolfSSL 13:f67a6c6013ca 1812
wolfSSL 13:f67a6c6013ca 1813 if (sha224 == NULL)
wolfSSL 13:f67a6c6013ca 1814 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1815
wolfSSL 13:f67a6c6013ca 1816 sha224->heap = heap;
wolfSSL 13:f67a6c6013ca 1817
wolfSSL 13:f67a6c6013ca 1818 ret = InitSha224(sha224);
wolfSSL 13:f67a6c6013ca 1819 if (ret != 0)
wolfSSL 13:f67a6c6013ca 1820 return ret;
wolfSSL 13:f67a6c6013ca 1821
wolfSSL 13:f67a6c6013ca 1822 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 13:f67a6c6013ca 1823 ret = wolfAsync_DevCtxInit(&sha224->asyncDev,
wolfSSL 13:f67a6c6013ca 1824 WOLFSSL_ASYNC_MARKER_SHA224, sha224->heap, devId);
wolfSSL 13:f67a6c6013ca 1825 #else
wolfSSL 13:f67a6c6013ca 1826 (void)devId;
wolfSSL 13:f67a6c6013ca 1827 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 1828
wolfSSL 13:f67a6c6013ca 1829 return ret;
wolfSSL 13:f67a6c6013ca 1830 }
wolfSSL 13:f67a6c6013ca 1831
wolfSSL 13:f67a6c6013ca 1832 int wc_InitSha224(Sha224* sha224)
wolfSSL 13:f67a6c6013ca 1833 {
wolfSSL 13:f67a6c6013ca 1834 return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
wolfSSL 13:f67a6c6013ca 1835 }
wolfSSL 13:f67a6c6013ca 1836
wolfSSL 13:f67a6c6013ca 1837 int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
wolfSSL 13:f67a6c6013ca 1838 {
wolfSSL 13:f67a6c6013ca 1839 int ret;
wolfSSL 13:f67a6c6013ca 1840
wolfSSL 13:f67a6c6013ca 1841 if (sha224 == NULL || (data == NULL && len > 0)) {
wolfSSL 13:f67a6c6013ca 1842 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1843 }
wolfSSL 13:f67a6c6013ca 1844
wolfSSL 13:f67a6c6013ca 1845 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 13:f67a6c6013ca 1846 if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
wolfSSL 13:f67a6c6013ca 1847 #if defined(HAVE_INTEL_QA)
wolfSSL 13:f67a6c6013ca 1848 return IntelQaSymSha224(&sha224->asyncDev, NULL, data, len);
wolfSSL 13:f67a6c6013ca 1849 #endif
wolfSSL 13:f67a6c6013ca 1850 }
wolfSSL 13:f67a6c6013ca 1851 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 1852
wolfSSL 13:f67a6c6013ca 1853 ret = Sha256Update((Sha256 *)sha224, data, len);
wolfSSL 13:f67a6c6013ca 1854
wolfSSL 13:f67a6c6013ca 1855 return ret;
wolfSSL 13:f67a6c6013ca 1856 }
wolfSSL 13:f67a6c6013ca 1857
wolfSSL 13:f67a6c6013ca 1858 int wc_Sha224Final(Sha224* sha224, byte* hash)
wolfSSL 13:f67a6c6013ca 1859 {
wolfSSL 13:f67a6c6013ca 1860 int ret;
wolfSSL 13:f67a6c6013ca 1861
wolfSSL 13:f67a6c6013ca 1862 if (sha224 == NULL || hash == NULL) {
wolfSSL 13:f67a6c6013ca 1863 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1864 }
wolfSSL 13:f67a6c6013ca 1865
wolfSSL 13:f67a6c6013ca 1866 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 13:f67a6c6013ca 1867 if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
wolfSSL 13:f67a6c6013ca 1868 #if defined(HAVE_INTEL_QA)
wolfSSL 13:f67a6c6013ca 1869 return IntelQaSymSha224(&sha224->asyncDev, hash, NULL,
wolfSSL 13:f67a6c6013ca 1870 SHA224_DIGEST_SIZE);
wolfSSL 13:f67a6c6013ca 1871 #endif
wolfSSL 13:f67a6c6013ca 1872 }
wolfSSL 13:f67a6c6013ca 1873 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 1874
wolfSSL 13:f67a6c6013ca 1875 ret = Sha256Final((Sha256*)sha224);
wolfSSL 13:f67a6c6013ca 1876 if (ret != 0)
wolfSSL 13:f67a6c6013ca 1877 return ret;
wolfSSL 13:f67a6c6013ca 1878
wolfSSL 13:f67a6c6013ca 1879 #if defined(LITTLE_ENDIAN_ORDER)
wolfSSL 13:f67a6c6013ca 1880 ByteReverseWords(sha224->digest, sha224->digest, SHA224_DIGEST_SIZE);
wolfSSL 13:f67a6c6013ca 1881 #endif
wolfSSL 13:f67a6c6013ca 1882 XMEMCPY(hash, sha224->digest, SHA224_DIGEST_SIZE);
wolfSSL 13:f67a6c6013ca 1883
wolfSSL 13:f67a6c6013ca 1884 return InitSha224(sha224); /* reset state */
wolfSSL 13:f67a6c6013ca 1885 }
wolfSSL 13:f67a6c6013ca 1886
wolfSSL 13:f67a6c6013ca 1887 void wc_Sha224Free(Sha224* sha224)
wolfSSL 13:f67a6c6013ca 1888 {
wolfSSL 13:f67a6c6013ca 1889 if (sha224 == NULL)
wolfSSL 13:f67a6c6013ca 1890 return;
wolfSSL 13:f67a6c6013ca 1891
wolfSSL 13:f67a6c6013ca 1892 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
wolfSSL 13:f67a6c6013ca 1893 wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224);
wolfSSL 13:f67a6c6013ca 1894 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 1895 }
wolfSSL 13:f67a6c6013ca 1896
wolfSSL 13:f67a6c6013ca 1897 #endif /* WOLFSSL_SHA224 */
wolfSSL 13:f67a6c6013ca 1898
wolfSSL 13:f67a6c6013ca 1899
wolfSSL 13:f67a6c6013ca 1900 int wc_InitSha256(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 1901 {
wolfSSL 13:f67a6c6013ca 1902 return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
wolfSSL 13:f67a6c6013ca 1903 }
wolfSSL 13:f67a6c6013ca 1904
wolfSSL 13:f67a6c6013ca 1905 void wc_Sha256Free(Sha256* sha256)
wolfSSL 13:f67a6c6013ca 1906 {
wolfSSL 13:f67a6c6013ca 1907 if (sha256 == NULL)
wolfSSL 13:f67a6c6013ca 1908 return;
wolfSSL 13:f67a6c6013ca 1909
wolfSSL 13:f67a6c6013ca 1910 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
wolfSSL 13:f67a6c6013ca 1911 wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256);
wolfSSL 13:f67a6c6013ca 1912 #endif /* WOLFSSL_ASYNC_CRYPT */
wolfSSL 13:f67a6c6013ca 1913 }
wolfSSL 13:f67a6c6013ca 1914
wolfSSL 13:f67a6c6013ca 1915 #endif /* !WOLFSSL_TI_HASH */
wolfSSL 13:f67a6c6013ca 1916 #endif /* HAVE_FIPS */
wolfSSL 13:f67a6c6013ca 1917
wolfSSL 13:f67a6c6013ca 1918
wolfSSL 13:f67a6c6013ca 1919 #ifndef WOLFSSL_TI_HASH
wolfSSL 13:f67a6c6013ca 1920 #ifdef WOLFSSL_SHA224
wolfSSL 13:f67a6c6013ca 1921 int wc_Sha224GetHash(Sha224* sha224, byte* hash)
wolfSSL 13:f67a6c6013ca 1922 {
wolfSSL 13:f67a6c6013ca 1923 int ret;
wolfSSL 13:f67a6c6013ca 1924 Sha224 tmpSha224;
wolfSSL 13:f67a6c6013ca 1925
wolfSSL 13:f67a6c6013ca 1926 if (sha224 == NULL || hash == NULL)
wolfSSL 13:f67a6c6013ca 1927 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1928
wolfSSL 13:f67a6c6013ca 1929 ret = wc_Sha224Copy(sha224, &tmpSha224);
wolfSSL 13:f67a6c6013ca 1930 if (ret == 0) {
wolfSSL 13:f67a6c6013ca 1931 ret = wc_Sha224Final(&tmpSha224, hash);
wolfSSL 13:f67a6c6013ca 1932 }
wolfSSL 13:f67a6c6013ca 1933 return ret;
wolfSSL 13:f67a6c6013ca 1934 }
wolfSSL 13:f67a6c6013ca 1935 int wc_Sha224Copy(Sha224* src, Sha224* dst)
wolfSSL 13:f67a6c6013ca 1936 {
wolfSSL 13:f67a6c6013ca 1937 int ret = 0;
wolfSSL 13:f67a6c6013ca 1938
wolfSSL 13:f67a6c6013ca 1939 if (src == NULL || dst == NULL)
wolfSSL 13:f67a6c6013ca 1940 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1941
wolfSSL 13:f67a6c6013ca 1942 XMEMCPY(dst, src, sizeof(Sha224));
wolfSSL 13:f67a6c6013ca 1943
wolfSSL 13:f67a6c6013ca 1944 #ifdef WOLFSSL_ASYNC_CRYPT
wolfSSL 13:f67a6c6013ca 1945 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
wolfSSL 13:f67a6c6013ca 1946 #endif
wolfSSL 13:f67a6c6013ca 1947
wolfSSL 13:f67a6c6013ca 1948 return ret;
wolfSSL 13:f67a6c6013ca 1949 }
wolfSSL 13:f67a6c6013ca 1950 #endif /* WOLFSSL_SHA224 */
wolfSSL 13:f67a6c6013ca 1951
wolfSSL 13:f67a6c6013ca 1952 int wc_Sha256GetHash(Sha256* sha256, byte* hash)
wolfSSL 13:f67a6c6013ca 1953 {
wolfSSL 13:f67a6c6013ca 1954 int ret;
wolfSSL 13:f67a6c6013ca 1955 Sha256 tmpSha256;
wolfSSL 13:f67a6c6013ca 1956
wolfSSL 13:f67a6c6013ca 1957 if (sha256 == NULL || hash == NULL)
wolfSSL 13:f67a6c6013ca 1958 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1959
wolfSSL 13:f67a6c6013ca 1960 ret = wc_Sha256Copy(sha256, &tmpSha256);
wolfSSL 13:f67a6c6013ca 1961 if (ret == 0) {
wolfSSL 13:f67a6c6013ca 1962 ret = wc_Sha256Final(&tmpSha256, hash);
wolfSSL 13:f67a6c6013ca 1963 }
wolfSSL 13:f67a6c6013ca 1964 return ret;
wolfSSL 13:f67a6c6013ca 1965 }
wolfSSL 13:f67a6c6013ca 1966 int wc_Sha256Copy(Sha256* src, Sha256* dst)
wolfSSL 13:f67a6c6013ca 1967 {
wolfSSL 13:f67a6c6013ca 1968 int ret = 0;
wolfSSL 13:f67a6c6013ca 1969
wolfSSL 13:f67a6c6013ca 1970 if (src == NULL || dst == NULL)
wolfSSL 13:f67a6c6013ca 1971 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 1972
wolfSSL 13:f67a6c6013ca 1973 XMEMCPY(dst, src, sizeof(Sha256));
wolfSSL 13:f67a6c6013ca 1974
wolfSSL 13:f67a6c6013ca 1975 #ifdef WOLFSSL_ASYNC_CRYPT
wolfSSL 13:f67a6c6013ca 1976 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
wolfSSL 13:f67a6c6013ca 1977 #endif
wolfSSL 13:f67a6c6013ca 1978 #ifdef WOLFSSL_PIC32MZ_HASH
wolfSSL 13:f67a6c6013ca 1979 ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
wolfSSL 13:f67a6c6013ca 1980 #endif
wolfSSL 13:f67a6c6013ca 1981
wolfSSL 13:f67a6c6013ca 1982 return ret;
wolfSSL 13:f67a6c6013ca 1983 }
wolfSSL 13:f67a6c6013ca 1984 #endif /* !WOLFSSL_TI_HASH */
wolfSSL 13:f67a6c6013ca 1985
wolfSSL 13:f67a6c6013ca 1986 #endif /* NO_SHA256 */
wolfSSL 13:f67a6c6013ca 1987