Xuyi Wang / wolfcrypt

Dependents:   OS

Committer:
sPymbed
Date:
Mon Nov 25 14:23:49 2019 +0000
Revision:
1:e4ea39eba2fb
Parent:
0:1387ff3eed4a
improved

Who changed what in which revision?

UserRevisionLine numberNew contents of line
sPymbed 0:1387ff3eed4a 1 /* sha256.c
sPymbed 0:1387ff3eed4a 2 *
sPymbed 0:1387ff3eed4a 3 * Copyright (C) 2006-2017 wolfSSL Inc.
sPymbed 0:1387ff3eed4a 4 *
sPymbed 0:1387ff3eed4a 5 * This file is part of wolfSSL.
sPymbed 0:1387ff3eed4a 6 *
sPymbed 0:1387ff3eed4a 7 * wolfSSL is free software; you can redistribute it and/or modify
sPymbed 0:1387ff3eed4a 8 * it under the terms of the GNU General Public License as published by
sPymbed 0:1387ff3eed4a 9 * the Free Software Foundation; either version 2 of the License, or
sPymbed 0:1387ff3eed4a 10 * (at your option) any later version.
sPymbed 0:1387ff3eed4a 11 *
sPymbed 0:1387ff3eed4a 12 * wolfSSL is distributed in the hope that it will be useful,
sPymbed 0:1387ff3eed4a 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
sPymbed 0:1387ff3eed4a 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
sPymbed 0:1387ff3eed4a 15 * GNU General Public License for more details.
sPymbed 0:1387ff3eed4a 16 *
sPymbed 0:1387ff3eed4a 17 * You should have received a copy of the GNU General Public License
sPymbed 0:1387ff3eed4a 18 * along with this program; if not, write to the Free Software
sPymbed 0:1387ff3eed4a 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
sPymbed 0:1387ff3eed4a 20 */
sPymbed 0:1387ff3eed4a 21
sPymbed 0:1387ff3eed4a 22
sPymbed 0:1387ff3eed4a 23 /* code submitted by raphael.huck@efixo.com */
sPymbed 0:1387ff3eed4a 24
sPymbed 0:1387ff3eed4a 25 #ifdef HAVE_CONFIG_H
sPymbed 0:1387ff3eed4a 26 #include <config.h>
sPymbed 0:1387ff3eed4a 27 #endif
sPymbed 0:1387ff3eed4a 28
sPymbed 0:1387ff3eed4a 29 #include <wolfcrypt/settings.h>
sPymbed 0:1387ff3eed4a 30
sPymbed 0:1387ff3eed4a 31 #if !defined(NO_SHA256) && !defined(WOLFSSL_ARMASM)
sPymbed 0:1387ff3eed4a 32
sPymbed 0:1387ff3eed4a 33 #if defined(HAVE_FIPS) && \
sPymbed 0:1387ff3eed4a 34 defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
sPymbed 0:1387ff3eed4a 35
sPymbed 0:1387ff3eed4a 36 /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
sPymbed 0:1387ff3eed4a 37 #define FIPS_NO_WRAPPERS
sPymbed 0:1387ff3eed4a 38
sPymbed 0:1387ff3eed4a 39 #ifdef USE_WINDOWS_API
sPymbed 0:1387ff3eed4a 40 #pragma code_seg(".fipsA$d")
sPymbed 0:1387ff3eed4a 41 #pragma const_seg(".fipsB$d")
sPymbed 0:1387ff3eed4a 42 #endif
sPymbed 0:1387ff3eed4a 43 #endif
sPymbed 0:1387ff3eed4a 44
sPymbed 0:1387ff3eed4a 45 #include <wolfcrypt/sha256.h>
sPymbed 0:1387ff3eed4a 46 #include <wolfcrypt/error-crypt.h>
sPymbed 0:1387ff3eed4a 47 #include <wolfcrypt/cpuid.h>
sPymbed 0:1387ff3eed4a 48
sPymbed 0:1387ff3eed4a 49 /* fips wrapper calls, user can call direct */
sPymbed 0:1387ff3eed4a 50 #if defined(HAVE_FIPS) && \
sPymbed 0:1387ff3eed4a 51 (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
sPymbed 0:1387ff3eed4a 52
sPymbed 0:1387ff3eed4a 53 int wc_InitSha256(wc_Sha256* sha)
sPymbed 0:1387ff3eed4a 54 {
sPymbed 0:1387ff3eed4a 55 if (sha == NULL) {
sPymbed 0:1387ff3eed4a 56 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 57 }
sPymbed 0:1387ff3eed4a 58 return InitSha256_fips(sha);
sPymbed 0:1387ff3eed4a 59 }
sPymbed 0:1387ff3eed4a 60 int wc_InitSha256_ex(wc_Sha256* sha, void* heap, int devId)
sPymbed 0:1387ff3eed4a 61 {
sPymbed 0:1387ff3eed4a 62 (void)heap;
sPymbed 0:1387ff3eed4a 63 (void)devId;
sPymbed 0:1387ff3eed4a 64 if (sha == NULL) {
sPymbed 0:1387ff3eed4a 65 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 66 }
sPymbed 0:1387ff3eed4a 67 return InitSha256_fips(sha);
sPymbed 0:1387ff3eed4a 68 }
sPymbed 0:1387ff3eed4a 69 int wc_Sha256Update(wc_Sha256* sha, const byte* data, word32 len)
sPymbed 0:1387ff3eed4a 70 {
sPymbed 0:1387ff3eed4a 71 if (sha == NULL || (data == NULL && len > 0)) {
sPymbed 0:1387ff3eed4a 72 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 73 }
sPymbed 0:1387ff3eed4a 74
sPymbed 0:1387ff3eed4a 75 if (data == NULL && len == 0) {
sPymbed 0:1387ff3eed4a 76 /* valid, but do nothing */
sPymbed 0:1387ff3eed4a 77 return 0;
sPymbed 0:1387ff3eed4a 78 }
sPymbed 0:1387ff3eed4a 79
sPymbed 0:1387ff3eed4a 80 return Sha256Update_fips(sha, data, len);
sPymbed 0:1387ff3eed4a 81 }
sPymbed 0:1387ff3eed4a 82 int wc_Sha256Final(wc_Sha256* sha, byte* out)
sPymbed 0:1387ff3eed4a 83 {
sPymbed 0:1387ff3eed4a 84 if (sha == NULL || out == NULL) {
sPymbed 0:1387ff3eed4a 85 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 86 }
sPymbed 0:1387ff3eed4a 87 return Sha256Final_fips(sha, out);
sPymbed 0:1387ff3eed4a 88 }
sPymbed 0:1387ff3eed4a 89 void wc_Sha256Free(wc_Sha256* sha)
sPymbed 0:1387ff3eed4a 90 {
sPymbed 0:1387ff3eed4a 91 (void)sha;
sPymbed 0:1387ff3eed4a 92 /* Not supported in FIPS */
sPymbed 0:1387ff3eed4a 93 }
sPymbed 0:1387ff3eed4a 94
sPymbed 0:1387ff3eed4a 95 #else /* else build without fips, or for FIPS v2 */
sPymbed 0:1387ff3eed4a 96
sPymbed 0:1387ff3eed4a 97
sPymbed 0:1387ff3eed4a 98 #if defined(WOLFSSL_TI_HASH)
sPymbed 0:1387ff3eed4a 99 /* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
sPymbed 0:1387ff3eed4a 100 #else
sPymbed 0:1387ff3eed4a 101
sPymbed 0:1387ff3eed4a 102 #include <wolfcrypt/logging.h>
sPymbed 0:1387ff3eed4a 103
sPymbed 0:1387ff3eed4a 104 #ifdef NO_INLINE
sPymbed 0:1387ff3eed4a 105 #include <wolfcrypt/misc.h>
sPymbed 0:1387ff3eed4a 106 #else
sPymbed 0:1387ff3eed4a 107 #define WOLFSSL_MISC_INCLUDED
sPymbed 0:1387ff3eed4a 108 #include <wolfcrypt/src/misc.c>
sPymbed 0:1387ff3eed4a 109 #endif
sPymbed 0:1387ff3eed4a 110
sPymbed 0:1387ff3eed4a 111
sPymbed 0:1387ff3eed4a 112 #if defined(USE_INTEL_SPEEDUP)
sPymbed 0:1387ff3eed4a 113 #define HAVE_INTEL_AVX1
sPymbed 0:1387ff3eed4a 114
sPymbed 0:1387ff3eed4a 115 #if defined(__GNUC__) && ((__GNUC__ < 4) || \
sPymbed 0:1387ff3eed4a 116 (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
sPymbed 0:1387ff3eed4a 117 #define NO_AVX2_SUPPORT
sPymbed 0:1387ff3eed4a 118 #endif
sPymbed 0:1387ff3eed4a 119 #if defined(__clang__) && ((__clang_major__ < 3) || \
sPymbed 0:1387ff3eed4a 120 (__clang_major__ == 3 && __clang_minor__ <= 5))
sPymbed 0:1387ff3eed4a 121 #define NO_AVX2_SUPPORT
sPymbed 0:1387ff3eed4a 122 #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
sPymbed 0:1387ff3eed4a 123 #undef NO_AVX2_SUPPORT
sPymbed 0:1387ff3eed4a 124 #endif
sPymbed 0:1387ff3eed4a 125
sPymbed 0:1387ff3eed4a 126 #define HAVE_INTEL_AVX1
sPymbed 0:1387ff3eed4a 127 #ifndef NO_AVX2_SUPPORT
sPymbed 0:1387ff3eed4a 128 #define HAVE_INTEL_AVX2
sPymbed 0:1387ff3eed4a 129 #endif
sPymbed 0:1387ff3eed4a 130 #endif /* USE_INTEL_SPEEDUP */
sPymbed 0:1387ff3eed4a 131
sPymbed 0:1387ff3eed4a 132 #if defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 133 #define HAVE_INTEL_RORX
sPymbed 0:1387ff3eed4a 134 #endif
sPymbed 0:1387ff3eed4a 135
sPymbed 0:1387ff3eed4a 136
sPymbed 0:1387ff3eed4a 137 #if !defined(WOLFSSL_PIC32MZ_HASH) && !defined(STM32_HASH_SHA2) && \
sPymbed 0:1387ff3eed4a 138 (!defined(WOLFSSL_IMX6_CAAM) || defined(NO_IMX6_CAAM_HASH))
sPymbed 0:1387ff3eed4a 139 static int InitSha256(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 140 {
sPymbed 0:1387ff3eed4a 141 int ret = 0;
sPymbed 0:1387ff3eed4a 142
sPymbed 0:1387ff3eed4a 143 if (sha256 == NULL)
sPymbed 0:1387ff3eed4a 144 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 145
sPymbed 0:1387ff3eed4a 146 XMEMSET(sha256->digest, 0, sizeof(sha256->digest));
sPymbed 0:1387ff3eed4a 147 sha256->digest[0] = 0x6A09E667L;
sPymbed 0:1387ff3eed4a 148 sha256->digest[1] = 0xBB67AE85L;
sPymbed 0:1387ff3eed4a 149 sha256->digest[2] = 0x3C6EF372L;
sPymbed 0:1387ff3eed4a 150 sha256->digest[3] = 0xA54FF53AL;
sPymbed 0:1387ff3eed4a 151 sha256->digest[4] = 0x510E527FL;
sPymbed 0:1387ff3eed4a 152 sha256->digest[5] = 0x9B05688CL;
sPymbed 0:1387ff3eed4a 153 sha256->digest[6] = 0x1F83D9ABL;
sPymbed 0:1387ff3eed4a 154 sha256->digest[7] = 0x5BE0CD19L;
sPymbed 0:1387ff3eed4a 155
sPymbed 0:1387ff3eed4a 156 sha256->buffLen = 0;
sPymbed 0:1387ff3eed4a 157 sha256->loLen = 0;
sPymbed 0:1387ff3eed4a 158 sha256->hiLen = 0;
sPymbed 0:1387ff3eed4a 159
sPymbed 0:1387ff3eed4a 160 return ret;
sPymbed 0:1387ff3eed4a 161 }
sPymbed 0:1387ff3eed4a 162 #endif
sPymbed 0:1387ff3eed4a 163
sPymbed 0:1387ff3eed4a 164
sPymbed 0:1387ff3eed4a 165 /* Hardware Acceleration */
sPymbed 0:1387ff3eed4a 166 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 167
sPymbed 0:1387ff3eed4a 168 /* in case intel instructions aren't available, plus we need the K[] global */
sPymbed 0:1387ff3eed4a 169 #define NEED_SOFT_SHA256
sPymbed 0:1387ff3eed4a 170
sPymbed 0:1387ff3eed4a 171 /*****
sPymbed 0:1387ff3eed4a 172 Intel AVX1/AVX2 Macro Control Structure
sPymbed 0:1387ff3eed4a 173
sPymbed 0:1387ff3eed4a 174 #define HAVE_INTEL_AVX1
sPymbed 0:1387ff3eed4a 175 #define HAVE_INTEL_AVX2
sPymbed 0:1387ff3eed4a 176
sPymbed 0:1387ff3eed4a 177 #define HAVE_INTEL_RORX
sPymbed 0:1387ff3eed4a 178
sPymbed 0:1387ff3eed4a 179
sPymbed 0:1387ff3eed4a 180 int InitSha256(wc_Sha256* sha256) {
sPymbed 0:1387ff3eed4a 181 Save/Recover XMM, YMM
sPymbed 0:1387ff3eed4a 182 ...
sPymbed 0:1387ff3eed4a 183 }
sPymbed 0:1387ff3eed4a 184
sPymbed 0:1387ff3eed4a 185 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 186 Transform_Sha256(); Function prototype
sPymbed 0:1387ff3eed4a 187 #else
sPymbed 0:1387ff3eed4a 188 Transform_Sha256() { }
sPymbed 0:1387ff3eed4a 189 int Sha256Final() {
sPymbed 0:1387ff3eed4a 190 Save/Recover XMM, YMM
sPymbed 0:1387ff3eed4a 191 ...
sPymbed 0:1387ff3eed4a 192 }
sPymbed 0:1387ff3eed4a 193 #endif
sPymbed 0:1387ff3eed4a 194
sPymbed 0:1387ff3eed4a 195 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 196 #if defined(HAVE_INTEL_RORX
sPymbed 0:1387ff3eed4a 197 #define RND with rorx instuction
sPymbed 0:1387ff3eed4a 198 #else
sPymbed 0:1387ff3eed4a 199 #define RND
sPymbed 0:1387ff3eed4a 200 #endif
sPymbed 0:1387ff3eed4a 201 #endif
sPymbed 0:1387ff3eed4a 202
sPymbed 0:1387ff3eed4a 203 #if defined(HAVE_INTEL_AVX1)
sPymbed 0:1387ff3eed4a 204
sPymbed 0:1387ff3eed4a 205 #define XMM Instructions/inline asm
sPymbed 0:1387ff3eed4a 206
sPymbed 0:1387ff3eed4a 207 int Transform_Sha256() {
sPymbed 0:1387ff3eed4a 208 Stitched Message Sched/Round
sPymbed 0:1387ff3eed4a 209 }
sPymbed 0:1387ff3eed4a 210
sPymbed 0:1387ff3eed4a 211 #elif defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 212
sPymbed 0:1387ff3eed4a 213 #define YMM Instructions/inline asm
sPymbed 0:1387ff3eed4a 214
sPymbed 0:1387ff3eed4a 215 int Transform_Sha256() {
sPymbed 0:1387ff3eed4a 216 More granural Stitched Message Sched/Round
sPymbed 0:1387ff3eed4a 217 }
sPymbed 0:1387ff3eed4a 218
sPymbed 0:1387ff3eed4a 219 #endif
sPymbed 0:1387ff3eed4a 220
sPymbed 0:1387ff3eed4a 221 */
sPymbed 0:1387ff3eed4a 222
sPymbed 0:1387ff3eed4a 223 /* Each platform needs to query info type 1 from cpuid to see if aesni is
sPymbed 0:1387ff3eed4a 224 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
sPymbed 0:1387ff3eed4a 225 */
sPymbed 0:1387ff3eed4a 226
sPymbed 0:1387ff3eed4a 227 /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
sPymbed 0:1387ff3eed4a 228 static int Transform_Sha256(wc_Sha256* sha256);
sPymbed 0:1387ff3eed4a 229 #if defined(HAVE_INTEL_AVX1)
sPymbed 0:1387ff3eed4a 230 static int Transform_Sha256_AVX1(wc_Sha256 *sha256);
sPymbed 0:1387ff3eed4a 231 static int Transform_Sha256_AVX1_Len(wc_Sha256* sha256, word32 len);
sPymbed 0:1387ff3eed4a 232 #endif
sPymbed 0:1387ff3eed4a 233 #if defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 234 static int Transform_Sha256_AVX2(wc_Sha256 *sha256);
sPymbed 0:1387ff3eed4a 235 static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, word32 len);
sPymbed 0:1387ff3eed4a 236 #ifdef HAVE_INTEL_RORX
sPymbed 0:1387ff3eed4a 237 static int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256);
sPymbed 0:1387ff3eed4a 238 static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, word32 len);
sPymbed 0:1387ff3eed4a 239 static int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256);
sPymbed 0:1387ff3eed4a 240 static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, word32 len);
sPymbed 0:1387ff3eed4a 241 #endif
sPymbed 0:1387ff3eed4a 242 #endif
sPymbed 0:1387ff3eed4a 243 static int (*Transform_Sha256_p)(wc_Sha256* sha256);
sPymbed 0:1387ff3eed4a 244 /* = _Transform_Sha256 */
sPymbed 0:1387ff3eed4a 245 static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, word32 len);
sPymbed 0:1387ff3eed4a 246 /* = NULL */
sPymbed 0:1387ff3eed4a 247 static int transform_check = 0;
sPymbed 0:1387ff3eed4a 248 static word32 intel_flags;
sPymbed 0:1387ff3eed4a 249 #define XTRANSFORM(S) (*Transform_Sha256_p)((S))
sPymbed 0:1387ff3eed4a 250 #define XTRANSFORM_LEN(S, L) (*Transform_Sha256_Len_p)((S),(L))
sPymbed 0:1387ff3eed4a 251
sPymbed 0:1387ff3eed4a 252 static void Sha256_SetTransform(void)
sPymbed 0:1387ff3eed4a 253 {
sPymbed 0:1387ff3eed4a 254
sPymbed 0:1387ff3eed4a 255 if (transform_check)
sPymbed 0:1387ff3eed4a 256 return;
sPymbed 0:1387ff3eed4a 257
sPymbed 0:1387ff3eed4a 258 intel_flags = cpuid_get_flags();
sPymbed 0:1387ff3eed4a 259
sPymbed 0:1387ff3eed4a 260 #ifdef HAVE_INTEL_AVX2
sPymbed 0:1387ff3eed4a 261 if (IS_INTEL_AVX2(intel_flags)) {
sPymbed 0:1387ff3eed4a 262 #ifdef HAVE_INTEL_RORX
sPymbed 0:1387ff3eed4a 263 if (IS_INTEL_BMI2(intel_flags)) {
sPymbed 0:1387ff3eed4a 264 Transform_Sha256_p = Transform_Sha256_AVX2_RORX;
sPymbed 0:1387ff3eed4a 265 Transform_Sha256_Len_p = Transform_Sha256_AVX2_RORX_Len;
sPymbed 0:1387ff3eed4a 266 }
sPymbed 0:1387ff3eed4a 267 else
sPymbed 0:1387ff3eed4a 268 #endif
sPymbed 0:1387ff3eed4a 269 if (1)
sPymbed 0:1387ff3eed4a 270 {
sPymbed 0:1387ff3eed4a 271 Transform_Sha256_p = Transform_Sha256_AVX2;
sPymbed 0:1387ff3eed4a 272 Transform_Sha256_Len_p = Transform_Sha256_AVX2_Len;
sPymbed 0:1387ff3eed4a 273 }
sPymbed 0:1387ff3eed4a 274 #ifdef HAVE_INTEL_RORX
sPymbed 0:1387ff3eed4a 275 else {
sPymbed 0:1387ff3eed4a 276 Transform_Sha256_p = Transform_Sha256_AVX1_RORX;
sPymbed 0:1387ff3eed4a 277 Transform_Sha256_Len_p = Transform_Sha256_AVX1_RORX_Len;
sPymbed 0:1387ff3eed4a 278 }
sPymbed 0:1387ff3eed4a 279 #endif
sPymbed 0:1387ff3eed4a 280 }
sPymbed 0:1387ff3eed4a 281 else
sPymbed 0:1387ff3eed4a 282 #endif
sPymbed 0:1387ff3eed4a 283 #ifdef HAVE_INTEL_AVX1
sPymbed 0:1387ff3eed4a 284 if (IS_INTEL_AVX1(intel_flags)) {
sPymbed 0:1387ff3eed4a 285 Transform_Sha256_p = Transform_Sha256_AVX1;
sPymbed 0:1387ff3eed4a 286 Transform_Sha256_Len_p = Transform_Sha256_AVX1_Len;
sPymbed 0:1387ff3eed4a 287 }
sPymbed 0:1387ff3eed4a 288 else
sPymbed 0:1387ff3eed4a 289 #endif
sPymbed 0:1387ff3eed4a 290 {
sPymbed 0:1387ff3eed4a 291 Transform_Sha256_p = Transform_Sha256;
sPymbed 0:1387ff3eed4a 292 Transform_Sha256_Len_p = NULL;
sPymbed 0:1387ff3eed4a 293 }
sPymbed 0:1387ff3eed4a 294
sPymbed 0:1387ff3eed4a 295 transform_check = 1;
sPymbed 0:1387ff3eed4a 296 }
sPymbed 0:1387ff3eed4a 297
sPymbed 0:1387ff3eed4a 298 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
sPymbed 0:1387ff3eed4a 299 {
sPymbed 0:1387ff3eed4a 300 int ret = 0;
sPymbed 0:1387ff3eed4a 301 if (sha256 == NULL)
sPymbed 0:1387ff3eed4a 302 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 303
sPymbed 0:1387ff3eed4a 304 sha256->heap = heap;
sPymbed 0:1387ff3eed4a 305
sPymbed 0:1387ff3eed4a 306 ret = InitSha256(sha256);
sPymbed 0:1387ff3eed4a 307 if (ret != 0)
sPymbed 0:1387ff3eed4a 308 return ret;
sPymbed 0:1387ff3eed4a 309
sPymbed 0:1387ff3eed4a 310 /* choose best Transform function under this runtime environment */
sPymbed 0:1387ff3eed4a 311 Sha256_SetTransform();
sPymbed 0:1387ff3eed4a 312
sPymbed 0:1387ff3eed4a 313 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
sPymbed 0:1387ff3eed4a 314 ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
sPymbed 0:1387ff3eed4a 315 WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
sPymbed 0:1387ff3eed4a 316 #else
sPymbed 0:1387ff3eed4a 317 (void)devId;
sPymbed 0:1387ff3eed4a 318 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 319
sPymbed 0:1387ff3eed4a 320 return ret;
sPymbed 0:1387ff3eed4a 321 }
sPymbed 0:1387ff3eed4a 322
sPymbed 0:1387ff3eed4a 323 #elif defined(FREESCALE_LTC_SHA)
sPymbed 0:1387ff3eed4a 324 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
sPymbed 0:1387ff3eed4a 325 {
sPymbed 0:1387ff3eed4a 326 (void)heap;
sPymbed 0:1387ff3eed4a 327 (void)devId;
sPymbed 0:1387ff3eed4a 328
sPymbed 0:1387ff3eed4a 329 LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
sPymbed 0:1387ff3eed4a 330
sPymbed 0:1387ff3eed4a 331 return 0;
sPymbed 0:1387ff3eed4a 332 }
sPymbed 0:1387ff3eed4a 333
sPymbed 0:1387ff3eed4a 334 #elif defined(FREESCALE_MMCAU_SHA)
sPymbed 0:1387ff3eed4a 335
sPymbed 0:1387ff3eed4a 336 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
sPymbed 0:1387ff3eed4a 337 #include "cau_api.h"
sPymbed 0:1387ff3eed4a 338 #else
sPymbed 0:1387ff3eed4a 339 #include "fsl_mmcau.h"
sPymbed 0:1387ff3eed4a 340 #endif
sPymbed 0:1387ff3eed4a 341
sPymbed 0:1387ff3eed4a 342 #define XTRANSFORM(S) Transform_Sha256((S))
sPymbed 0:1387ff3eed4a 343 #define XTRANSFORM_LEN(S,L) Transform_Sha256_Len((S),(L))
sPymbed 0:1387ff3eed4a 344
sPymbed 0:1387ff3eed4a 345 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
sPymbed 0:1387ff3eed4a 346 {
sPymbed 0:1387ff3eed4a 347 int ret = 0;
sPymbed 0:1387ff3eed4a 348
sPymbed 0:1387ff3eed4a 349 (void)heap;
sPymbed 0:1387ff3eed4a 350 (void)devId;
sPymbed 0:1387ff3eed4a 351
sPymbed 0:1387ff3eed4a 352 ret = wolfSSL_CryptHwMutexLock();
sPymbed 0:1387ff3eed4a 353 if (ret != 0) {
sPymbed 0:1387ff3eed4a 354 return ret;
sPymbed 0:1387ff3eed4a 355 }
sPymbed 0:1387ff3eed4a 356 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
sPymbed 0:1387ff3eed4a 357 cau_sha256_initialize_output(sha256->digest);
sPymbed 0:1387ff3eed4a 358 #else
sPymbed 0:1387ff3eed4a 359 MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
sPymbed 0:1387ff3eed4a 360 #endif
sPymbed 0:1387ff3eed4a 361 wolfSSL_CryptHwMutexUnLock();
sPymbed 0:1387ff3eed4a 362
sPymbed 0:1387ff3eed4a 363 sha256->buffLen = 0;
sPymbed 0:1387ff3eed4a 364 sha256->loLen = 0;
sPymbed 0:1387ff3eed4a 365 sha256->hiLen = 0;
sPymbed 0:1387ff3eed4a 366
sPymbed 0:1387ff3eed4a 367 return ret;
sPymbed 0:1387ff3eed4a 368 }
sPymbed 0:1387ff3eed4a 369
sPymbed 0:1387ff3eed4a 370 static int Transform_Sha256(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 371 {
sPymbed 0:1387ff3eed4a 372 int ret = wolfSSL_CryptHwMutexLock();
sPymbed 0:1387ff3eed4a 373 if (ret == 0) {
sPymbed 0:1387ff3eed4a 374 #ifdef FREESCALE_MMCAU_CLASSIC_SHA
sPymbed 0:1387ff3eed4a 375 cau_sha256_hash_n((byte*)sha256->buffer, 1, sha256->digest);
sPymbed 0:1387ff3eed4a 376 #else
sPymbed 0:1387ff3eed4a 377 MMCAU_SHA256_HashN((byte*)sha256->buffer, 1, sha256->digest);
sPymbed 0:1387ff3eed4a 378 #endif
sPymbed 0:1387ff3eed4a 379 wolfSSL_CryptHwMutexUnLock();
sPymbed 0:1387ff3eed4a 380 }
sPymbed 0:1387ff3eed4a 381 return ret;
sPymbed 0:1387ff3eed4a 382 }
sPymbed 0:1387ff3eed4a 383
sPymbed 0:1387ff3eed4a 384 #elif defined(WOLFSSL_PIC32MZ_HASH)
sPymbed 0:1387ff3eed4a 385 #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
sPymbed 0:1387ff3eed4a 386
sPymbed 0:1387ff3eed4a 387 #elif defined(STM32_HASH_SHA2)
sPymbed 0:1387ff3eed4a 388
sPymbed 0:1387ff3eed4a 389 /* Supports CubeMX HAL or Standard Peripheral Library */
sPymbed 0:1387ff3eed4a 390
sPymbed 0:1387ff3eed4a 391 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
sPymbed 0:1387ff3eed4a 392 {
sPymbed 0:1387ff3eed4a 393 if (sha256 == NULL)
sPymbed 0:1387ff3eed4a 394 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 395
sPymbed 0:1387ff3eed4a 396 (void)devId;
sPymbed 0:1387ff3eed4a 397 (void)heap;
sPymbed 0:1387ff3eed4a 398
sPymbed 0:1387ff3eed4a 399 wc_Stm32_Hash_Init(&sha256->stmCtx);
sPymbed 0:1387ff3eed4a 400 return 0;
sPymbed 0:1387ff3eed4a 401 }
sPymbed 0:1387ff3eed4a 402
sPymbed 0:1387ff3eed4a 403 int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
sPymbed 0:1387ff3eed4a 404 {
sPymbed 0:1387ff3eed4a 405 int ret = 0;
sPymbed 0:1387ff3eed4a 406
sPymbed 0:1387ff3eed4a 407 if (sha256 == NULL || (data == NULL && len > 0)) {
sPymbed 0:1387ff3eed4a 408 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 409 }
sPymbed 0:1387ff3eed4a 410
sPymbed 0:1387ff3eed4a 411 ret = wolfSSL_CryptHwMutexLock();
sPymbed 0:1387ff3eed4a 412 if (ret == 0) {
sPymbed 0:1387ff3eed4a 413 ret = wc_Stm32_Hash_Update(&sha256->stmCtx,
sPymbed 0:1387ff3eed4a 414 HASH_AlgoSelection_SHA256, data, len);
sPymbed 0:1387ff3eed4a 415 wolfSSL_CryptHwMutexUnLock();
sPymbed 0:1387ff3eed4a 416 }
sPymbed 0:1387ff3eed4a 417 return ret;
sPymbed 0:1387ff3eed4a 418 }
sPymbed 0:1387ff3eed4a 419
sPymbed 0:1387ff3eed4a 420 int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
sPymbed 0:1387ff3eed4a 421 {
sPymbed 0:1387ff3eed4a 422 int ret = 0;
sPymbed 0:1387ff3eed4a 423
sPymbed 0:1387ff3eed4a 424 if (sha256 == NULL || hash == NULL) {
sPymbed 0:1387ff3eed4a 425 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 426 }
sPymbed 0:1387ff3eed4a 427
sPymbed 0:1387ff3eed4a 428 ret = wolfSSL_CryptHwMutexLock();
sPymbed 0:1387ff3eed4a 429 if (ret == 0) {
sPymbed 0:1387ff3eed4a 430 ret = wc_Stm32_Hash_Final(&sha256->stmCtx,
sPymbed 0:1387ff3eed4a 431 HASH_AlgoSelection_SHA256, hash, WC_SHA256_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 432 wolfSSL_CryptHwMutexUnLock();
sPymbed 0:1387ff3eed4a 433 }
sPymbed 0:1387ff3eed4a 434
sPymbed 0:1387ff3eed4a 435 (void)wc_InitSha256(sha256); /* reset state */
sPymbed 0:1387ff3eed4a 436
sPymbed 0:1387ff3eed4a 437 return ret;
sPymbed 0:1387ff3eed4a 438 }
sPymbed 0:1387ff3eed4a 439
sPymbed 0:1387ff3eed4a 440 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
sPymbed 0:1387ff3eed4a 441 /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
sPymbed 0:1387ff3eed4a 442 #else
sPymbed 0:1387ff3eed4a 443 #define NEED_SOFT_SHA256
sPymbed 0:1387ff3eed4a 444
sPymbed 0:1387ff3eed4a 445 int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
sPymbed 0:1387ff3eed4a 446 {
sPymbed 0:1387ff3eed4a 447 int ret = 0;
sPymbed 0:1387ff3eed4a 448 if (sha256 == NULL)
sPymbed 0:1387ff3eed4a 449 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 450
sPymbed 0:1387ff3eed4a 451 sha256->heap = heap;
sPymbed 0:1387ff3eed4a 452
sPymbed 0:1387ff3eed4a 453 ret = InitSha256(sha256);
sPymbed 0:1387ff3eed4a 454 if (ret != 0)
sPymbed 0:1387ff3eed4a 455 return ret;
sPymbed 0:1387ff3eed4a 456
sPymbed 0:1387ff3eed4a 457 #ifdef WOLFSSL_SMALL_STACK_CACHE
sPymbed 0:1387ff3eed4a 458 sha256->W = NULL;
sPymbed 0:1387ff3eed4a 459 #endif
sPymbed 0:1387ff3eed4a 460
sPymbed 0:1387ff3eed4a 461 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
sPymbed 0:1387ff3eed4a 462 ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
sPymbed 0:1387ff3eed4a 463 WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
sPymbed 0:1387ff3eed4a 464 #else
sPymbed 0:1387ff3eed4a 465 (void)devId;
sPymbed 0:1387ff3eed4a 466 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 467
sPymbed 0:1387ff3eed4a 468 return ret;
sPymbed 0:1387ff3eed4a 469 }
sPymbed 0:1387ff3eed4a 470 #endif /* End Hardware Acceleration */
sPymbed 0:1387ff3eed4a 471
sPymbed 0:1387ff3eed4a 472 #ifdef NEED_SOFT_SHA256
sPymbed 0:1387ff3eed4a 473
sPymbed 0:1387ff3eed4a 474 static const ALIGN32 word32 K[64] = {
sPymbed 0:1387ff3eed4a 475 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
sPymbed 0:1387ff3eed4a 476 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
sPymbed 0:1387ff3eed4a 477 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
sPymbed 0:1387ff3eed4a 478 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
sPymbed 0:1387ff3eed4a 479 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
sPymbed 0:1387ff3eed4a 480 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
sPymbed 0:1387ff3eed4a 481 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
sPymbed 0:1387ff3eed4a 482 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
sPymbed 0:1387ff3eed4a 483 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
sPymbed 0:1387ff3eed4a 484 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
sPymbed 0:1387ff3eed4a 485 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
sPymbed 0:1387ff3eed4a 486 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
sPymbed 0:1387ff3eed4a 487 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
sPymbed 0:1387ff3eed4a 488 };
sPymbed 0:1387ff3eed4a 489
sPymbed 0:1387ff3eed4a 490 #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
sPymbed 0:1387ff3eed4a 491 #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
sPymbed 0:1387ff3eed4a 492 #define R(x, n) (((x) & 0xFFFFFFFFU) >> (n))
sPymbed 0:1387ff3eed4a 493
sPymbed 0:1387ff3eed4a 494 #define S(x, n) rotrFixed(x, n)
sPymbed 0:1387ff3eed4a 495 #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
sPymbed 0:1387ff3eed4a 496 #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
sPymbed 0:1387ff3eed4a 497 #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
sPymbed 0:1387ff3eed4a 498 #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
sPymbed 0:1387ff3eed4a 499
sPymbed 0:1387ff3eed4a 500 #define a(i) S[(0-i) & 7]
sPymbed 0:1387ff3eed4a 501 #define b(i) S[(1-i) & 7]
sPymbed 0:1387ff3eed4a 502 #define c(i) S[(2-i) & 7]
sPymbed 0:1387ff3eed4a 503 #define d(i) S[(3-i) & 7]
sPymbed 0:1387ff3eed4a 504 #define e(i) S[(4-i) & 7]
sPymbed 0:1387ff3eed4a 505 #define f(i) S[(5-i) & 7]
sPymbed 0:1387ff3eed4a 506 #define g(i) S[(6-i) & 7]
sPymbed 0:1387ff3eed4a 507 #define h(i) S[(7-i) & 7]
sPymbed 0:1387ff3eed4a 508
sPymbed 0:1387ff3eed4a 509 #define RND(j) \
sPymbed 0:1387ff3eed4a 510 t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \
sPymbed 0:1387ff3eed4a 511 t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
sPymbed 0:1387ff3eed4a 512 d(j) += t0; \
sPymbed 0:1387ff3eed4a 513 h(j) = t0 + t1
sPymbed 0:1387ff3eed4a 514
sPymbed 0:1387ff3eed4a 515 #ifndef XTRANSFORM
sPymbed 0:1387ff3eed4a 516 #define XTRANSFORM(S) Transform_Sha256((S))
sPymbed 0:1387ff3eed4a 517 #define XTRANSFORM_LEN(S,L) Transform_Sha256_Len((S),(L))
sPymbed 0:1387ff3eed4a 518 #endif
sPymbed 0:1387ff3eed4a 519
sPymbed 0:1387ff3eed4a 520 static int Transform_Sha256(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 521 {
sPymbed 0:1387ff3eed4a 522 word32 S[8], t0, t1;
sPymbed 0:1387ff3eed4a 523 int i;
sPymbed 0:1387ff3eed4a 524
sPymbed 0:1387ff3eed4a 525 #ifdef WOLFSSL_SMALL_STACK_CACHE
sPymbed 0:1387ff3eed4a 526 word32* W = sha256->W;
sPymbed 0:1387ff3eed4a 527 if (W == NULL) {
sPymbed 0:1387ff3eed4a 528 W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL,
sPymbed 0:1387ff3eed4a 529 DYNAMIC_TYPE_RNG);
sPymbed 0:1387ff3eed4a 530 if (W == NULL)
sPymbed 0:1387ff3eed4a 531 return MEMORY_E;
sPymbed 0:1387ff3eed4a 532 sha256->W = W;
sPymbed 0:1387ff3eed4a 533 }
sPymbed 0:1387ff3eed4a 534 #elif defined(WOLFSSL_SMALL_STACK)
sPymbed 0:1387ff3eed4a 535 word32* W;
sPymbed 0:1387ff3eed4a 536 W = (word32*)XMALLOC(sizeof(word32) * WC_SHA256_BLOCK_SIZE, NULL,
sPymbed 0:1387ff3eed4a 537 DYNAMIC_TYPE_TMP_BUFFER);
sPymbed 0:1387ff3eed4a 538 if (W == NULL)
sPymbed 0:1387ff3eed4a 539 return MEMORY_E;
sPymbed 0:1387ff3eed4a 540 #else
sPymbed 0:1387ff3eed4a 541 word32 W[WC_SHA256_BLOCK_SIZE];
sPymbed 0:1387ff3eed4a 542 #endif
sPymbed 0:1387ff3eed4a 543
sPymbed 0:1387ff3eed4a 544 /* Copy context->state[] to working vars */
sPymbed 0:1387ff3eed4a 545 for (i = 0; i < 8; i++)
sPymbed 0:1387ff3eed4a 546 S[i] = sha256->digest[i];
sPymbed 0:1387ff3eed4a 547
sPymbed 0:1387ff3eed4a 548 for (i = 0; i < 16; i++)
sPymbed 0:1387ff3eed4a 549 W[i] = sha256->buffer[i];
sPymbed 0:1387ff3eed4a 550
sPymbed 0:1387ff3eed4a 551 for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++)
sPymbed 0:1387ff3eed4a 552 W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
sPymbed 0:1387ff3eed4a 553
sPymbed 0:1387ff3eed4a 554 #ifdef USE_SLOW_SHA256
sPymbed 0:1387ff3eed4a 555 /* not unrolled - ~2k smaller and ~25% slower */
sPymbed 0:1387ff3eed4a 556 for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
sPymbed 0:1387ff3eed4a 557 int j;
sPymbed 0:1387ff3eed4a 558 for (j = 0; j < 8; j++) { /* braces needed here for macros {} */
sPymbed 0:1387ff3eed4a 559 RND(j);
sPymbed 0:1387ff3eed4a 560 }
sPymbed 0:1387ff3eed4a 561 }
sPymbed 0:1387ff3eed4a 562 #else
sPymbed 0:1387ff3eed4a 563 /* partially loop unrolled */
sPymbed 0:1387ff3eed4a 564 for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
sPymbed 0:1387ff3eed4a 565 RND(0); RND(1); RND(2); RND(3);
sPymbed 0:1387ff3eed4a 566 RND(4); RND(5); RND(6); RND(7);
sPymbed 0:1387ff3eed4a 567 }
sPymbed 0:1387ff3eed4a 568 #endif /* USE_SLOW_SHA256 */
sPymbed 0:1387ff3eed4a 569
sPymbed 0:1387ff3eed4a 570 /* Add the working vars back into digest state[] */
sPymbed 0:1387ff3eed4a 571 for (i = 0; i < 8; i++) {
sPymbed 0:1387ff3eed4a 572 sha256->digest[i] += S[i];
sPymbed 0:1387ff3eed4a 573 }
sPymbed 0:1387ff3eed4a 574
sPymbed 0:1387ff3eed4a 575 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SMALL_STACK_CACHE)
sPymbed 0:1387ff3eed4a 576 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
sPymbed 0:1387ff3eed4a 577 #endif
sPymbed 0:1387ff3eed4a 578 return 0;
sPymbed 0:1387ff3eed4a 579 }
sPymbed 0:1387ff3eed4a 580 #endif
sPymbed 0:1387ff3eed4a 581 /* End wc_ software implementation */
sPymbed 0:1387ff3eed4a 582
sPymbed 0:1387ff3eed4a 583
sPymbed 0:1387ff3eed4a 584 #ifdef XTRANSFORM
sPymbed 0:1387ff3eed4a 585
sPymbed 0:1387ff3eed4a 586 static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len)
sPymbed 0:1387ff3eed4a 587 {
sPymbed 0:1387ff3eed4a 588 word32 tmp = sha256->loLen;
sPymbed 0:1387ff3eed4a 589 if ((sha256->loLen += len) < tmp)
sPymbed 0:1387ff3eed4a 590 sha256->hiLen++; /* carry low to high */
sPymbed 0:1387ff3eed4a 591 }
sPymbed 0:1387ff3eed4a 592
sPymbed 0:1387ff3eed4a 593 static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
sPymbed 0:1387ff3eed4a 594 {
sPymbed 0:1387ff3eed4a 595 int ret = 0;
sPymbed 0:1387ff3eed4a 596 byte* local;
sPymbed 0:1387ff3eed4a 597
sPymbed 0:1387ff3eed4a 598 if (sha256 == NULL || (data == NULL && len > 0)) {
sPymbed 0:1387ff3eed4a 599 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 600 }
sPymbed 0:1387ff3eed4a 601
sPymbed 0:1387ff3eed4a 602 if (data == NULL && len == 0) {
sPymbed 0:1387ff3eed4a 603 /* valid, but do nothing */
sPymbed 0:1387ff3eed4a 604 return 0;
sPymbed 0:1387ff3eed4a 605 }
sPymbed 0:1387ff3eed4a 606
sPymbed 0:1387ff3eed4a 607 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
sPymbed 0:1387ff3eed4a 608 if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
sPymbed 0:1387ff3eed4a 609 #if defined(HAVE_INTEL_QA)
sPymbed 0:1387ff3eed4a 610 return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
sPymbed 0:1387ff3eed4a 611 #endif
sPymbed 0:1387ff3eed4a 612 }
sPymbed 0:1387ff3eed4a 613 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 614
sPymbed 0:1387ff3eed4a 615 /* do block size increments */
sPymbed 0:1387ff3eed4a 616 local = (byte*)sha256->buffer;
sPymbed 0:1387ff3eed4a 617
sPymbed 0:1387ff3eed4a 618 /* check that internal buffLen is valid */
sPymbed 0:1387ff3eed4a 619 if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE)
sPymbed 0:1387ff3eed4a 620 return BUFFER_E;
sPymbed 0:1387ff3eed4a 621
sPymbed 0:1387ff3eed4a 622 if (sha256->buffLen > 0) {
sPymbed 0:1387ff3eed4a 623 word32 add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen);
sPymbed 0:1387ff3eed4a 624 XMEMCPY(&local[sha256->buffLen], data, add);
sPymbed 0:1387ff3eed4a 625
sPymbed 0:1387ff3eed4a 626 sha256->buffLen += add;
sPymbed 0:1387ff3eed4a 627 data += add;
sPymbed 0:1387ff3eed4a 628 len -= add;
sPymbed 0:1387ff3eed4a 629
sPymbed 0:1387ff3eed4a 630 if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) {
sPymbed 0:1387ff3eed4a 631 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
sPymbed 0:1387ff3eed4a 632 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 633 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
sPymbed 0:1387ff3eed4a 634 #endif
sPymbed 0:1387ff3eed4a 635 {
sPymbed 0:1387ff3eed4a 636 ByteReverseWords(sha256->buffer, sha256->buffer,
sPymbed 0:1387ff3eed4a 637 WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 638 }
sPymbed 0:1387ff3eed4a 639 #endif
sPymbed 0:1387ff3eed4a 640 ret = XTRANSFORM(sha256);
sPymbed 0:1387ff3eed4a 641 if (ret == 0) {
sPymbed 0:1387ff3eed4a 642 AddLength(sha256, WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 643 sha256->buffLen = 0;
sPymbed 0:1387ff3eed4a 644 }
sPymbed 0:1387ff3eed4a 645 else
sPymbed 0:1387ff3eed4a 646 len = 0;
sPymbed 0:1387ff3eed4a 647 }
sPymbed 0:1387ff3eed4a 648 }
sPymbed 0:1387ff3eed4a 649
sPymbed 0:1387ff3eed4a 650 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 651 if (Transform_Sha256_Len_p != NULL) {
sPymbed 0:1387ff3eed4a 652 word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
sPymbed 0:1387ff3eed4a 653
sPymbed 0:1387ff3eed4a 654 if (blocksLen > 0) {
sPymbed 0:1387ff3eed4a 655 AddLength(sha256, blocksLen);
sPymbed 0:1387ff3eed4a 656 sha256->data = data;
sPymbed 0:1387ff3eed4a 657 /* Byte reversal performed in function if required. */
sPymbed 0:1387ff3eed4a 658 XTRANSFORM_LEN(sha256, blocksLen);
sPymbed 0:1387ff3eed4a 659 data += blocksLen;
sPymbed 0:1387ff3eed4a 660 len -= blocksLen;
sPymbed 0:1387ff3eed4a 661 }
sPymbed 0:1387ff3eed4a 662 }
sPymbed 0:1387ff3eed4a 663 else
sPymbed 0:1387ff3eed4a 664 #endif
sPymbed 0:1387ff3eed4a 665 #if !defined(LITTLE_ENDIAN_ORDER) || defined(FREESCALE_MMCAU_SHA) || \
sPymbed 0:1387ff3eed4a 666 defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 667 {
sPymbed 0:1387ff3eed4a 668 word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
sPymbed 0:1387ff3eed4a 669
sPymbed 0:1387ff3eed4a 670 AddLength(sha256, blocksLen);
sPymbed 0:1387ff3eed4a 671 while (len >= WC_SHA256_BLOCK_SIZE) {
sPymbed 0:1387ff3eed4a 672 XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 673
sPymbed 0:1387ff3eed4a 674 data += WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 675 len -= WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 676
sPymbed 0:1387ff3eed4a 677 /* Byte reversal performed in function if required. */
sPymbed 0:1387ff3eed4a 678 ret = XTRANSFORM(sha256);
sPymbed 0:1387ff3eed4a 679 if (ret != 0)
sPymbed 0:1387ff3eed4a 680 break;
sPymbed 0:1387ff3eed4a 681 }
sPymbed 0:1387ff3eed4a 682 }
sPymbed 0:1387ff3eed4a 683 #else
sPymbed 0:1387ff3eed4a 684 {
sPymbed 0:1387ff3eed4a 685 word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1);
sPymbed 0:1387ff3eed4a 686
sPymbed 0:1387ff3eed4a 687 AddLength(sha256, blocksLen);
sPymbed 0:1387ff3eed4a 688 while (len >= WC_SHA256_BLOCK_SIZE) {
sPymbed 0:1387ff3eed4a 689 XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 690
sPymbed 0:1387ff3eed4a 691 data += WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 692 len -= WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 693
sPymbed 0:1387ff3eed4a 694 ByteReverseWords(sha256->buffer, sha256->buffer,
sPymbed 0:1387ff3eed4a 695 WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 696 ret = XTRANSFORM(sha256);
sPymbed 0:1387ff3eed4a 697 if (ret != 0)
sPymbed 0:1387ff3eed4a 698 break;
sPymbed 0:1387ff3eed4a 699 }
sPymbed 0:1387ff3eed4a 700 }
sPymbed 0:1387ff3eed4a 701 #endif
sPymbed 0:1387ff3eed4a 702
sPymbed 0:1387ff3eed4a 703 if (len > 0) {
sPymbed 0:1387ff3eed4a 704 XMEMCPY(local, data, len);
sPymbed 0:1387ff3eed4a 705 sha256->buffLen = len;
sPymbed 0:1387ff3eed4a 706 }
sPymbed 0:1387ff3eed4a 707
sPymbed 0:1387ff3eed4a 708 return ret;
sPymbed 0:1387ff3eed4a 709 }
sPymbed 0:1387ff3eed4a 710
sPymbed 0:1387ff3eed4a 711 int wc_Sha256Update(wc_Sha256* sha256, const byte* data, word32 len)
sPymbed 0:1387ff3eed4a 712 {
sPymbed 0:1387ff3eed4a 713 return Sha256Update(sha256, data, len);
sPymbed 0:1387ff3eed4a 714 }
sPymbed 0:1387ff3eed4a 715
sPymbed 0:1387ff3eed4a 716 static WC_INLINE int Sha256Final(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 717 {
sPymbed 0:1387ff3eed4a 718
sPymbed 0:1387ff3eed4a 719 int ret;
sPymbed 0:1387ff3eed4a 720 byte* local = (byte*)sha256->buffer;
sPymbed 0:1387ff3eed4a 721
sPymbed 0:1387ff3eed4a 722 if (sha256 == NULL) {
sPymbed 0:1387ff3eed4a 723 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 724 }
sPymbed 0:1387ff3eed4a 725
sPymbed 0:1387ff3eed4a 726 AddLength(sha256, sha256->buffLen); /* before adding pads */
sPymbed 0:1387ff3eed4a 727 local[sha256->buffLen++] = 0x80; /* add 1 */
sPymbed 0:1387ff3eed4a 728
sPymbed 0:1387ff3eed4a 729 /* pad with zeros */
sPymbed 0:1387ff3eed4a 730 if (sha256->buffLen > WC_SHA256_PAD_SIZE) {
sPymbed 0:1387ff3eed4a 731 XMEMSET(&local[sha256->buffLen], 0,
sPymbed 0:1387ff3eed4a 732 WC_SHA256_BLOCK_SIZE - sha256->buffLen);
sPymbed 0:1387ff3eed4a 733 sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen;
sPymbed 0:1387ff3eed4a 734
sPymbed 0:1387ff3eed4a 735 {
sPymbed 0:1387ff3eed4a 736 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
sPymbed 0:1387ff3eed4a 737 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 738 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
sPymbed 0:1387ff3eed4a 739 #endif
sPymbed 0:1387ff3eed4a 740 {
sPymbed 0:1387ff3eed4a 741 ByteReverseWords(sha256->buffer, sha256->buffer,
sPymbed 0:1387ff3eed4a 742 WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 743 }
sPymbed 0:1387ff3eed4a 744 #endif
sPymbed 0:1387ff3eed4a 745 }
sPymbed 0:1387ff3eed4a 746
sPymbed 0:1387ff3eed4a 747 ret = XTRANSFORM(sha256);
sPymbed 0:1387ff3eed4a 748 if (ret != 0)
sPymbed 0:1387ff3eed4a 749 return ret;
sPymbed 0:1387ff3eed4a 750
sPymbed 0:1387ff3eed4a 751 sha256->buffLen = 0;
sPymbed 0:1387ff3eed4a 752 }
sPymbed 0:1387ff3eed4a 753 XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen);
sPymbed 0:1387ff3eed4a 754
sPymbed 0:1387ff3eed4a 755 /* put lengths in bits */
sPymbed 0:1387ff3eed4a 756 sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
sPymbed 0:1387ff3eed4a 757 (sha256->hiLen << 3);
sPymbed 0:1387ff3eed4a 758 sha256->loLen = sha256->loLen << 3;
sPymbed 0:1387ff3eed4a 759
sPymbed 0:1387ff3eed4a 760 /* store lengths */
sPymbed 0:1387ff3eed4a 761 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
sPymbed 0:1387ff3eed4a 762 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 763 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
sPymbed 0:1387ff3eed4a 764 #endif
sPymbed 0:1387ff3eed4a 765 {
sPymbed 0:1387ff3eed4a 766 ByteReverseWords(sha256->buffer, sha256->buffer,
sPymbed 0:1387ff3eed4a 767 WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 768 }
sPymbed 0:1387ff3eed4a 769 #endif
sPymbed 0:1387ff3eed4a 770 /* ! length ordering dependent on digest endian type ! */
sPymbed 0:1387ff3eed4a 771 XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
sPymbed 0:1387ff3eed4a 772 XMEMCPY(&local[WC_SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
sPymbed 0:1387ff3eed4a 773 sizeof(word32));
sPymbed 0:1387ff3eed4a 774
sPymbed 0:1387ff3eed4a 775 #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || \
sPymbed 0:1387ff3eed4a 776 defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 777 /* Kinetis requires only these bytes reversed */
sPymbed 0:1387ff3eed4a 778 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 779 if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
sPymbed 0:1387ff3eed4a 780 #endif
sPymbed 0:1387ff3eed4a 781 {
sPymbed 0:1387ff3eed4a 782 ByteReverseWords(
sPymbed 0:1387ff3eed4a 783 &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
sPymbed 0:1387ff3eed4a 784 &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)],
sPymbed 0:1387ff3eed4a 785 2 * sizeof(word32));
sPymbed 0:1387ff3eed4a 786 }
sPymbed 0:1387ff3eed4a 787 #endif
sPymbed 0:1387ff3eed4a 788
sPymbed 0:1387ff3eed4a 789 return XTRANSFORM(sha256);
sPymbed 0:1387ff3eed4a 790 }
sPymbed 0:1387ff3eed4a 791
sPymbed 0:1387ff3eed4a 792 int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash)
sPymbed 0:1387ff3eed4a 793 {
sPymbed 0:1387ff3eed4a 794 #ifdef LITTLE_ENDIAN_ORDER
sPymbed 0:1387ff3eed4a 795 word32 digest[WC_SHA256_DIGEST_SIZE / sizeof(word32)];
sPymbed 0:1387ff3eed4a 796 #endif
sPymbed 0:1387ff3eed4a 797
sPymbed 0:1387ff3eed4a 798 if (sha256 == NULL || hash == NULL) {
sPymbed 0:1387ff3eed4a 799 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 800 }
sPymbed 0:1387ff3eed4a 801
sPymbed 0:1387ff3eed4a 802 #ifdef LITTLE_ENDIAN_ORDER
sPymbed 0:1387ff3eed4a 803 ByteReverseWords((word32*)digest, (word32*)sha256->digest,
sPymbed 0:1387ff3eed4a 804 WC_SHA256_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 805 XMEMCPY(hash, digest, WC_SHA256_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 806 #else
sPymbed 0:1387ff3eed4a 807 XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 808 #endif
sPymbed 0:1387ff3eed4a 809
sPymbed 0:1387ff3eed4a 810 return 0;
sPymbed 0:1387ff3eed4a 811 }
sPymbed 0:1387ff3eed4a 812
sPymbed 0:1387ff3eed4a 813 int wc_Sha256Final(wc_Sha256* sha256, byte* hash)
sPymbed 0:1387ff3eed4a 814 {
sPymbed 0:1387ff3eed4a 815 int ret;
sPymbed 0:1387ff3eed4a 816
sPymbed 0:1387ff3eed4a 817 if (sha256 == NULL || hash == NULL) {
sPymbed 0:1387ff3eed4a 818 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 819 }
sPymbed 0:1387ff3eed4a 820
sPymbed 0:1387ff3eed4a 821 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
sPymbed 0:1387ff3eed4a 822 if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
sPymbed 0:1387ff3eed4a 823 #if defined(HAVE_INTEL_QA)
sPymbed 0:1387ff3eed4a 824 return IntelQaSymSha256(&sha256->asyncDev, hash, NULL,
sPymbed 0:1387ff3eed4a 825 WC_SHA256_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 826 #endif
sPymbed 0:1387ff3eed4a 827 }
sPymbed 0:1387ff3eed4a 828 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 829
sPymbed 0:1387ff3eed4a 830 ret = Sha256Final(sha256);
sPymbed 0:1387ff3eed4a 831 if (ret != 0)
sPymbed 0:1387ff3eed4a 832 return ret;
sPymbed 0:1387ff3eed4a 833
sPymbed 0:1387ff3eed4a 834 #if defined(LITTLE_ENDIAN_ORDER)
sPymbed 0:1387ff3eed4a 835 ByteReverseWords(sha256->digest, sha256->digest, WC_SHA256_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 836 #endif
sPymbed 0:1387ff3eed4a 837 XMEMCPY(hash, sha256->digest, WC_SHA256_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 838
sPymbed 0:1387ff3eed4a 839 return InitSha256(sha256); /* reset state */
sPymbed 0:1387ff3eed4a 840 }
sPymbed 0:1387ff3eed4a 841
sPymbed 0:1387ff3eed4a 842 #endif /* XTRANSFORM */
sPymbed 0:1387ff3eed4a 843
sPymbed 0:1387ff3eed4a 844
sPymbed 0:1387ff3eed4a 845 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 846
sPymbed 0:1387ff3eed4a 847 #define _LOAD_DIGEST() \
sPymbed 0:1387ff3eed4a 848 "movl (%[sha256]), %%r8d \n\t" \
sPymbed 0:1387ff3eed4a 849 "movl 4(%[sha256]), %%r9d \n\t" \
sPymbed 0:1387ff3eed4a 850 "movl 8(%[sha256]), %%r10d\n\t" \
sPymbed 0:1387ff3eed4a 851 "movl 12(%[sha256]), %%r11d\n\t" \
sPymbed 0:1387ff3eed4a 852 "movl 16(%[sha256]), %%r12d\n\t" \
sPymbed 0:1387ff3eed4a 853 "movl 20(%[sha256]), %%r13d\n\t" \
sPymbed 0:1387ff3eed4a 854 "movl 24(%[sha256]), %%r14d\n\t" \
sPymbed 0:1387ff3eed4a 855 "movl 28(%[sha256]), %%r15d\n\t"
sPymbed 0:1387ff3eed4a 856
sPymbed 0:1387ff3eed4a 857 #define _STORE_ADD_DIGEST() \
sPymbed 0:1387ff3eed4a 858 "addl %%r8d , (%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 859 "addl %%r9d , 4(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 860 "addl %%r10d, 8(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 861 "addl %%r11d, 12(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 862 "addl %%r12d, 16(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 863 "addl %%r13d, 20(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 864 "addl %%r14d, 24(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 865 "addl %%r15d, 28(%[sha256])\n\t"
sPymbed 0:1387ff3eed4a 866
sPymbed 0:1387ff3eed4a 867 #define _ADD_DIGEST() \
sPymbed 0:1387ff3eed4a 868 "addl (%[sha256]), %%r8d \n\t" \
sPymbed 0:1387ff3eed4a 869 "addl 4(%[sha256]), %%r9d \n\t" \
sPymbed 0:1387ff3eed4a 870 "addl 8(%[sha256]), %%r10d\n\t" \
sPymbed 0:1387ff3eed4a 871 "addl 12(%[sha256]), %%r11d\n\t" \
sPymbed 0:1387ff3eed4a 872 "addl 16(%[sha256]), %%r12d\n\t" \
sPymbed 0:1387ff3eed4a 873 "addl 20(%[sha256]), %%r13d\n\t" \
sPymbed 0:1387ff3eed4a 874 "addl 24(%[sha256]), %%r14d\n\t" \
sPymbed 0:1387ff3eed4a 875 "addl 28(%[sha256]), %%r15d\n\t"
sPymbed 0:1387ff3eed4a 876
sPymbed 0:1387ff3eed4a 877 #define _STORE_DIGEST() \
sPymbed 0:1387ff3eed4a 878 "movl %%r8d , (%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 879 "movl %%r9d , 4(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 880 "movl %%r10d, 8(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 881 "movl %%r11d, 12(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 882 "movl %%r12d, 16(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 883 "movl %%r13d, 20(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 884 "movl %%r14d, 24(%[sha256])\n\t" \
sPymbed 0:1387ff3eed4a 885 "movl %%r15d, 28(%[sha256])\n\t"
sPymbed 0:1387ff3eed4a 886
sPymbed 0:1387ff3eed4a 887 #define LOAD_DIGEST() \
sPymbed 0:1387ff3eed4a 888 _LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 889
sPymbed 0:1387ff3eed4a 890 #define STORE_ADD_DIGEST() \
sPymbed 0:1387ff3eed4a 891 _STORE_ADD_DIGEST()
sPymbed 0:1387ff3eed4a 892
sPymbed 0:1387ff3eed4a 893 #define ADD_DIGEST() \
sPymbed 0:1387ff3eed4a 894 _ADD_DIGEST()
sPymbed 0:1387ff3eed4a 895
sPymbed 0:1387ff3eed4a 896 #define STORE_DIGEST() \
sPymbed 0:1387ff3eed4a 897 _STORE_DIGEST()
sPymbed 0:1387ff3eed4a 898
sPymbed 0:1387ff3eed4a 899
sPymbed 0:1387ff3eed4a 900 #define S_0 %r8d
sPymbed 0:1387ff3eed4a 901 #define S_1 %r9d
sPymbed 0:1387ff3eed4a 902 #define S_2 %r10d
sPymbed 0:1387ff3eed4a 903 #define S_3 %r11d
sPymbed 0:1387ff3eed4a 904 #define S_4 %r12d
sPymbed 0:1387ff3eed4a 905 #define S_5 %r13d
sPymbed 0:1387ff3eed4a 906 #define S_6 %r14d
sPymbed 0:1387ff3eed4a 907 #define S_7 %r15d
sPymbed 0:1387ff3eed4a 908
sPymbed 0:1387ff3eed4a 909 #define L1 "%%edx"
sPymbed 0:1387ff3eed4a 910 #define L2 "%%ecx"
sPymbed 0:1387ff3eed4a 911 #define L3 "%%eax"
sPymbed 0:1387ff3eed4a 912 #define L4 "%%ebx"
sPymbed 0:1387ff3eed4a 913 #define WK "%%rsp"
sPymbed 0:1387ff3eed4a 914
sPymbed 0:1387ff3eed4a 915 #define WORK_REGS "eax", "ebx", "ecx", "edx"
sPymbed 0:1387ff3eed4a 916 #define STATE_REGS "r8","r9","r10","r11","r12","r13","r14","r15"
sPymbed 0:1387ff3eed4a 917 #define XMM_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", \
sPymbed 0:1387ff3eed4a 918 "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13"
sPymbed 0:1387ff3eed4a 919
sPymbed 0:1387ff3eed4a 920 #if defined(HAVE_INTEL_RORX)
sPymbed 0:1387ff3eed4a 921 #define RND_STEP_RORX_0_1(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 922 /* L3 = f */ \
sPymbed 0:1387ff3eed4a 923 "movl %" #f ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 924 /* L2 = e>>>11 */ \
sPymbed 0:1387ff3eed4a 925 "rorx $11, %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 926 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 927 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 928
sPymbed 0:1387ff3eed4a 929 #define RND_STEP_RORX_0_2(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 930 /* L2 = (e>>>6) ^ (e>>>11) */ \
sPymbed 0:1387ff3eed4a 931 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 932 /* L3 = f ^ g */ \
sPymbed 0:1387ff3eed4a 933 "xorl %" #g ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 934 /* L1 = e>>>25 */ \
sPymbed 0:1387ff3eed4a 935 "rorx $25, %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 936
sPymbed 0:1387ff3eed4a 937 #define RND_STEP_RORX_0_3(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 938 /* L3 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 939 "andl %" #e ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 940 /* L1 = Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 941 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 942 /* L2 = a>>>13 */ \
sPymbed 0:1387ff3eed4a 943 "rorx $13, %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 944
sPymbed 0:1387ff3eed4a 945 #define RND_STEP_RORX_0_4(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 946 /* h += Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 947 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 948 /* L1 = a>>>2 */ \
sPymbed 0:1387ff3eed4a 949 "rorx $2, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 950 /* L3 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 951 "xorl %" #g ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 952
sPymbed 0:1387ff3eed4a 953 #define RND_STEP_RORX_0_5(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 954 /* L2 = (a>>>2) ^ (a>>>13) */ \
sPymbed 0:1387ff3eed4a 955 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 956 /* L1 = a>>>22 */ \
sPymbed 0:1387ff3eed4a 957 "rorx $22, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 958 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 959 "addl " L3 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 960
sPymbed 0:1387ff3eed4a 961 #define RND_STEP_RORX_0_6(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 962 /* L1 = Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 963 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 964 /* L3 = b */ \
sPymbed 0:1387ff3eed4a 965 "movl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 966 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 967 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 968
sPymbed 0:1387ff3eed4a 969 #define RND_STEP_RORX_0_7(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 970 /* L3 = a ^ b */ \
sPymbed 0:1387ff3eed4a 971 "xorl %" #a ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 972 /* h += Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 973 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 974 /* L4 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 975 "andl " L3 ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 976
sPymbed 0:1387ff3eed4a 977 #define RND_STEP_RORX_0_8(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 978 /* L4 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 979 "xorl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 980 /* L1 = d>>>6 (= e>>>6 next RND) */ \
sPymbed 0:1387ff3eed4a 981 "rorx $6, %" #d ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 982 /* h += Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 983 "addl " L4 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 984
sPymbed 0:1387ff3eed4a 985 #define RND_STEP_RORX_1_1(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 986 /* L4 = f */ \
sPymbed 0:1387ff3eed4a 987 "movl %" #f ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 988 /* L2 = e>>>11 */ \
sPymbed 0:1387ff3eed4a 989 "rorx $11, %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 990 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 991 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 992
sPymbed 0:1387ff3eed4a 993 #define RND_STEP_RORX_1_2(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 994 /* L2 = (e>>>6) ^ (e>>>11) */ \
sPymbed 0:1387ff3eed4a 995 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 996 /* L4 = f ^ g */ \
sPymbed 0:1387ff3eed4a 997 "xorl %" #g ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 998 /* L1 = e>>>25 */ \
sPymbed 0:1387ff3eed4a 999 "rorx $25, %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1000
sPymbed 0:1387ff3eed4a 1001 #define RND_STEP_RORX_1_3(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1002 /* L4 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 1003 "andl %" #e ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1004 /* L1 = Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1005 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1006 /* L2 = a>>>13 */ \
sPymbed 0:1387ff3eed4a 1007 "rorx $13, %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1008
sPymbed 0:1387ff3eed4a 1009 #define RND_STEP_RORX_1_4(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1010 /* h += Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1011 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1012 /* L1 = a>>>2 */ \
sPymbed 0:1387ff3eed4a 1013 "rorx $2, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1014 /* L4 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1015 "xorl %" #g ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1016
sPymbed 0:1387ff3eed4a 1017 #define RND_STEP_RORX_1_5(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1018 /* L2 = (a>>>2) ^ (a>>>13) */ \
sPymbed 0:1387ff3eed4a 1019 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1020 /* L1 = a>>>22 */ \
sPymbed 0:1387ff3eed4a 1021 "rorx $22, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1022 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1023 "addl " L4 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1024
sPymbed 0:1387ff3eed4a 1025 #define RND_STEP_RORX_1_6(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1026 /* L1 = Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 1027 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1028 /* L4 = b */ \
sPymbed 0:1387ff3eed4a 1029 "movl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1030 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1031 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 1032
sPymbed 0:1387ff3eed4a 1033 #define RND_STEP_RORX_1_7(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1034 /* L4 = a ^ b */ \
sPymbed 0:1387ff3eed4a 1035 "xorl %" #a ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1036 /* h += Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 1037 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1038 /* L3 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 1039 "andl " L4 ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1040
sPymbed 0:1387ff3eed4a 1041 #define RND_STEP_RORX_1_8(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1042 /* L3 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1043 "xorl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1044 /* L1 = d>>>6 (= e>>>6 next RND) */ \
sPymbed 0:1387ff3eed4a 1045 "rorx $6, %" #d ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1046 /* h += Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1047 "addl " L3 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1048
sPymbed 0:1387ff3eed4a 1049 #define _RND_RORX_X_0(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1050 /* L1 = e>>>6 */ \
sPymbed 0:1387ff3eed4a 1051 "rorx $6, %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1052 /* L2 = e>>>11 */ \
sPymbed 0:1387ff3eed4a 1053 "rorx $11, %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1054 /* Prev RND: h += Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1055 "addl " L3 ", %" #a "\n\t" \
sPymbed 0:1387ff3eed4a 1056 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 1057 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1058 /* L3 = f */ \
sPymbed 0:1387ff3eed4a 1059 "movl %" #f ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1060 /* L2 = (e>>>6) ^ (e>>>11) */ \
sPymbed 0:1387ff3eed4a 1061 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1062 /* L3 = f ^ g */ \
sPymbed 0:1387ff3eed4a 1063 "xorl %" #g ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1064 /* L1 = e>>>25 */ \
sPymbed 0:1387ff3eed4a 1065 "rorx $25, %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1066 /* L1 = Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1067 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1068 /* L3 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 1069 "andl %" #e ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1070 /* h += Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1071 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1072 /* L1 = a>>>2 */ \
sPymbed 0:1387ff3eed4a 1073 "rorx $2, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1074 /* L2 = a>>>13 */ \
sPymbed 0:1387ff3eed4a 1075 "rorx $13, %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1076 /* L3 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1077 "xorl %" #g ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1078 /* L2 = (a>>>2) ^ (a>>>13) */ \
sPymbed 0:1387ff3eed4a 1079 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1080 /* L1 = a>>>22 */ \
sPymbed 0:1387ff3eed4a 1081 "rorx $22, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1082 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1083 "addl " L3 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1084 /* L1 = Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 1085 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1086 /* L3 = b */ \
sPymbed 0:1387ff3eed4a 1087 "movl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1088 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1089 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 1090 /* L3 = a ^ b */ \
sPymbed 0:1387ff3eed4a 1091 "xorl %" #a ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1092 /* L4 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 1093 "andl " L3 ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1094 /* h += Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 1095 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1096 /* L4 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1097 "xorl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1098
sPymbed 0:1387ff3eed4a 1099 #define _RND_RORX_X_1(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1100 /* L1 = e>>>6 */ \
sPymbed 0:1387ff3eed4a 1101 "rorx $6, %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1102 /* L2 = e>>>11 */ \
sPymbed 0:1387ff3eed4a 1103 "rorx $11, %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1104 /* Prev RND: h += Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1105 "addl " L4 ", %" #a "\n\t" \
sPymbed 0:1387ff3eed4a 1106 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 1107 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1108 /* L4 = f */ \
sPymbed 0:1387ff3eed4a 1109 "movl %" #f ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1110 /* L2 = (e>>>6) ^ (e>>>11) */ \
sPymbed 0:1387ff3eed4a 1111 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1112 /* L4 = f ^ g */ \
sPymbed 0:1387ff3eed4a 1113 "xorl %" #g ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1114 /* L1 = e>>>25 */ \
sPymbed 0:1387ff3eed4a 1115 "rorx $25, %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1116 /* L1 = Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1117 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1118 /* L4 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 1119 "andl %" #e ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1120 /* h += Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1121 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1122 /* L1 = a>>>2 */ \
sPymbed 0:1387ff3eed4a 1123 "rorx $2, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1124 /* L2 = a>>>13 */ \
sPymbed 0:1387ff3eed4a 1125 "rorx $13, %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1126 /* L4 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1127 "xorl %" #g ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1128 /* L2 = (a>>>2) ^ (a>>>13) */ \
sPymbed 0:1387ff3eed4a 1129 "xorl " L1 ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1130 /* L1 = a>>>22 */ \
sPymbed 0:1387ff3eed4a 1131 "rorx $22, %" #a ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1132 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1133 "addl " L4 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1134 /* L1 = Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 1135 "xorl " L2 ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1136 /* L4 = b */ \
sPymbed 0:1387ff3eed4a 1137 "movl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1138 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1139 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 1140 /* L4 = a ^ b */ \
sPymbed 0:1387ff3eed4a 1141 "xorl %" #a ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1142 /* L2 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 1143 "andl " L4 ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1144 /* h += Sigma0(a) */ \
sPymbed 0:1387ff3eed4a 1145 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1146 /* L3 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1147 "xorl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1148
sPymbed 0:1387ff3eed4a 1149
sPymbed 0:1387ff3eed4a 1150 #define RND_RORX_X_0(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1151 _RND_RORX_X_0(a,b,c,d,e,f,g,h,i)
sPymbed 0:1387ff3eed4a 1152 #define RND_RORX_X_1(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1153 _RND_RORX_X_1(a,b,c,d,e,f,g,h,i)
sPymbed 0:1387ff3eed4a 1154
sPymbed 0:1387ff3eed4a 1155 #define RND_RORX_X4(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1156 RND_RORX_X_0(a,b,c,d,e,f,g,h,i+0) \
sPymbed 0:1387ff3eed4a 1157 RND_RORX_X_1(h,a,b,c,d,e,f,g,i+1) \
sPymbed 0:1387ff3eed4a 1158 RND_RORX_X_0(g,h,a,b,c,d,e,f,i+2) \
sPymbed 0:1387ff3eed4a 1159 RND_RORX_X_1(f,g,h,a,b,c,d,e,i+3)
sPymbed 0:1387ff3eed4a 1160
sPymbed 0:1387ff3eed4a 1161 #endif /* HAVE_INTEL_RORX */
sPymbed 0:1387ff3eed4a 1162
sPymbed 0:1387ff3eed4a 1163 #define RND_STEP_0_1(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1164 /* L1 = e>>>14 */ \
sPymbed 0:1387ff3eed4a 1165 "rorl $14, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1166
sPymbed 0:1387ff3eed4a 1167 #define RND_STEP_0_2(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1168 /* L3 = b */ \
sPymbed 0:1387ff3eed4a 1169 "movl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1170 /* L2 = f */ \
sPymbed 0:1387ff3eed4a 1171 "movl %" #f ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1172 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 1173 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1174 /* L2 = f ^ g */ \
sPymbed 0:1387ff3eed4a 1175 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1176
sPymbed 0:1387ff3eed4a 1177 #define RND_STEP_0_3(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1178 /* L1 = (e>>>14) ^ e */ \
sPymbed 0:1387ff3eed4a 1179 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1180 /* L2 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 1181 "andl %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1182
sPymbed 0:1387ff3eed4a 1183 #define RND_STEP_0_4(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1184 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
sPymbed 0:1387ff3eed4a 1185 "rorl $5, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1186 /* L2 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1187 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1188 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
sPymbed 0:1387ff3eed4a 1189 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1190 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1191 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1192
sPymbed 0:1387ff3eed4a 1193 #define RND_STEP_0_5(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1194 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
sPymbed 0:1387ff3eed4a 1195 "rorl $6, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1196 /* L3 = a ^ b (= b ^ c of next RND) */ \
sPymbed 0:1387ff3eed4a 1197 "xorl %" #a ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1198 /* h = h + w_k + Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1199 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1200 /* L2 = a */ \
sPymbed 0:1387ff3eed4a 1201 "movl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1202
sPymbed 0:1387ff3eed4a 1203 #define RND_STEP_0_6(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1204 /* L3 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 1205 "andl " L3 ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1206 /* L2 = a>>>9 */ \
sPymbed 0:1387ff3eed4a 1207 "rorl $9, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1208 /* L2 = (a>>>9) ^ a */ \
sPymbed 0:1387ff3eed4a 1209 "xorl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1210 /* L1 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1211 "xorl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1212
sPymbed 0:1387ff3eed4a 1213 #define RND_STEP_0_7(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1214 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
sPymbed 0:1387ff3eed4a 1215 "rorl $11, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1216 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1217 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 1218 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
sPymbed 0:1387ff3eed4a 1219 "xorl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1220 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1221 "addl " L4 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1222
sPymbed 0:1387ff3eed4a 1223 #define RND_STEP_0_8(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1224 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
sPymbed 0:1387ff3eed4a 1225 "rorl $2, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1226 /* L1 = d (e of next RND) */ \
sPymbed 0:1387ff3eed4a 1227 "movl %" #d ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1228 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1229 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1230
sPymbed 0:1387ff3eed4a 1231 #define RND_STEP_1_1(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1232 /* L1 = e>>>14 */ \
sPymbed 0:1387ff3eed4a 1233 "rorl $14, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1234
sPymbed 0:1387ff3eed4a 1235 #define RND_STEP_1_2(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1236 /* L3 = b */ \
sPymbed 0:1387ff3eed4a 1237 "movl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1238 /* L2 = f */ \
sPymbed 0:1387ff3eed4a 1239 "movl %" #f ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1240 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 1241 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1242 /* L2 = f ^ g */ \
sPymbed 0:1387ff3eed4a 1243 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1244
sPymbed 0:1387ff3eed4a 1245 #define RND_STEP_1_3(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1246 /* L1 = (e>>>14) ^ e */ \
sPymbed 0:1387ff3eed4a 1247 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1248 /* L2 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 1249 "andl %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1250
sPymbed 0:1387ff3eed4a 1251 #define RND_STEP_1_4(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1252 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
sPymbed 0:1387ff3eed4a 1253 "rorl $5, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1254 /* L2 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1255 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1256 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
sPymbed 0:1387ff3eed4a 1257 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1258 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1259 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1260
sPymbed 0:1387ff3eed4a 1261 #define RND_STEP_1_5(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1262 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
sPymbed 0:1387ff3eed4a 1263 "rorl $6, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1264 /* L4 = a ^ b (= b ^ c of next RND) */ \
sPymbed 0:1387ff3eed4a 1265 "xorl %" #a ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1266 /* h = h + w_k + Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1267 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1268 /* L2 = a */ \
sPymbed 0:1387ff3eed4a 1269 "movl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1270
sPymbed 0:1387ff3eed4a 1271 #define RND_STEP_1_6(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1272 /* L3 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 1273 "andl " L4 ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1274 /* L2 = a>>>9 */ \
sPymbed 0:1387ff3eed4a 1275 "rorl $9, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1276 /* L2 = (a>>>9) ^ a */ \
sPymbed 0:1387ff3eed4a 1277 "xorl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1278 /* L1 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1279 "xorl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1280
sPymbed 0:1387ff3eed4a 1281 #define RND_STEP_1_7(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1282 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
sPymbed 0:1387ff3eed4a 1283 "rorl $11, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1284 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1285 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 1286 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
sPymbed 0:1387ff3eed4a 1287 "xorl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1288 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1289 "addl " L3 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1290
sPymbed 0:1387ff3eed4a 1291 #define RND_STEP_1_8(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1292 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
sPymbed 0:1387ff3eed4a 1293 "rorl $2, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1294 /* L1 = d (e of next RND) */ \
sPymbed 0:1387ff3eed4a 1295 "movl %" #d ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1296 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1297 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1298
sPymbed 0:1387ff3eed4a 1299 #define _RND_ALL_0(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1300 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 1301 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1302 /* L2 = f */ \
sPymbed 0:1387ff3eed4a 1303 "movl %" #f ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1304 /* L3 = b */ \
sPymbed 0:1387ff3eed4a 1305 "movl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1306 /* L2 = f ^ g */ \
sPymbed 0:1387ff3eed4a 1307 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1308 /* L1 = e>>>14 */ \
sPymbed 0:1387ff3eed4a 1309 "rorl $14, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1310 /* L2 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 1311 "andl %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1312 /* L1 = (e>>>14) ^ e */ \
sPymbed 0:1387ff3eed4a 1313 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1314 /* L2 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1315 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1316 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
sPymbed 0:1387ff3eed4a 1317 "rorl $5, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1318 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1319 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1320 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
sPymbed 0:1387ff3eed4a 1321 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1322 /* L3 = a ^ b */ \
sPymbed 0:1387ff3eed4a 1323 "xorl %" #a ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1324 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
sPymbed 0:1387ff3eed4a 1325 "rorl $6, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1326 /* L2 = a */ \
sPymbed 0:1387ff3eed4a 1327 "movl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1328 /* h = h + w_k + Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1329 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1330 /* L2 = a>>>9 */ \
sPymbed 0:1387ff3eed4a 1331 "rorl $9, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1332 /* L3 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 1333 "andl " L3 ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1334 /* L2 = (a>>>9) ^ a */ \
sPymbed 0:1387ff3eed4a 1335 "xorl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1336 /* L1 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1337 "xorl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1338 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
sPymbed 0:1387ff3eed4a 1339 "rorl $11, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1340 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1341 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 1342 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
sPymbed 0:1387ff3eed4a 1343 "xorl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1344 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1345 "addl " L4 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1346 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
sPymbed 0:1387ff3eed4a 1347 "rorl $2, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1348 /* L1 = d (e of next RND) */ \
sPymbed 0:1387ff3eed4a 1349 "movl %" #d ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1350 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1351 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1352
sPymbed 0:1387ff3eed4a 1353 #define _RND_ALL_1(a,b,c,d,e,f,g,h,i) \
sPymbed 0:1387ff3eed4a 1354 /* h += w_k */ \
sPymbed 0:1387ff3eed4a 1355 "addl (" #i ")*4(" WK "), %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1356 /* L2 = f */ \
sPymbed 0:1387ff3eed4a 1357 "movl %" #f ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1358 /* L3 = b */ \
sPymbed 0:1387ff3eed4a 1359 "movl %" #b ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1360 /* L2 = f ^ g */ \
sPymbed 0:1387ff3eed4a 1361 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1362 /* L1 = e>>>14 */ \
sPymbed 0:1387ff3eed4a 1363 "rorl $14, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1364 /* L2 = (f ^ g) & e */ \
sPymbed 0:1387ff3eed4a 1365 "andl %" #e ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1366 /* L1 = (e>>>14) ^ e */ \
sPymbed 0:1387ff3eed4a 1367 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1368 /* L2 = Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1369 "xorl %" #g ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1370 /* L1 = ((e>>>14) ^ e) >>> 5 */ \
sPymbed 0:1387ff3eed4a 1371 "rorl $5, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1372 /* h += Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1373 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1374 /* L1 = (((e>>>14) ^ e) >>> 5) ^ e */ \
sPymbed 0:1387ff3eed4a 1375 "xorl %" #e ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1376 /* L3 = a ^ b */ \
sPymbed 0:1387ff3eed4a 1377 "xorl %" #a ", " L4 "\n\t" \
sPymbed 0:1387ff3eed4a 1378 /* L1 = ((((e>>>14) ^ e) >>> 5) ^ e) >>> 6 */ \
sPymbed 0:1387ff3eed4a 1379 "rorl $6, " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1380 /* L2 = a */ \
sPymbed 0:1387ff3eed4a 1381 "movl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1382 /* h = h + w_k + Sigma1(e) */ \
sPymbed 0:1387ff3eed4a 1383 "addl " L1 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1384 /* L2 = a>>>9 */ \
sPymbed 0:1387ff3eed4a 1385 "rorl $9, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1386 /* L3 = (a ^ b) & (b ^ c) */ \
sPymbed 0:1387ff3eed4a 1387 "andl " L4 ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1388 /* L2 = (a>>>9) ^ a */ \
sPymbed 0:1387ff3eed4a 1389 "xorl %" #a", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1390 /* L1 = Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1391 "xorl %" #b ", " L3 "\n\t" \
sPymbed 0:1387ff3eed4a 1392 /* L2 = ((a>>>9) ^ a) >>> 11 */ \
sPymbed 0:1387ff3eed4a 1393 "rorl $11, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1394 /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */ \
sPymbed 0:1387ff3eed4a 1395 "addl %" #h ", %" #d "\n\t" \
sPymbed 0:1387ff3eed4a 1396 /* L2 = (((a>>>9) ^ a) >>> 11) ^ a */ \
sPymbed 0:1387ff3eed4a 1397 "xorl %" #a ", " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1398 /* h = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1399 "addl " L3 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1400 /* L2 = ((((a>>>9) ^ a) >>> 11) ^ a) >>> 2 */ \
sPymbed 0:1387ff3eed4a 1401 "rorl $2, " L2 "\n\t" \
sPymbed 0:1387ff3eed4a 1402 /* L1 = d (e of next RND) */ \
sPymbed 0:1387ff3eed4a 1403 "movl %" #d ", " L1 "\n\t" \
sPymbed 0:1387ff3eed4a 1404 /* h = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
sPymbed 0:1387ff3eed4a 1405 "addl " L2 ", %" #h "\n\t" \
sPymbed 0:1387ff3eed4a 1406
sPymbed 0:1387ff3eed4a 1407
sPymbed 0:1387ff3eed4a 1408 #define RND_ALL_0(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1409 _RND_ALL_0(a, b, c, d, e, f, g, h, i)
sPymbed 0:1387ff3eed4a 1410 #define RND_ALL_1(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1411 _RND_ALL_1(a, b, c, d, e, f, g, h, i)
sPymbed 0:1387ff3eed4a 1412
sPymbed 0:1387ff3eed4a 1413 #define RND_ALL_4(a, b, c, d, e, f, g, h, i) \
sPymbed 0:1387ff3eed4a 1414 RND_ALL_0(a, b, c, d, e, f, g, h, i+0) \
sPymbed 0:1387ff3eed4a 1415 RND_ALL_1(h, a, b, c, d, e, f, g, i+1) \
sPymbed 0:1387ff3eed4a 1416 RND_ALL_0(g, h, a, b, c, d, e, f, i+2) \
sPymbed 0:1387ff3eed4a 1417 RND_ALL_1(f, g, h, a, b, c, d, e, i+3)
sPymbed 0:1387ff3eed4a 1418
sPymbed 0:1387ff3eed4a 1419 #endif /* defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) */
sPymbed 0:1387ff3eed4a 1420
sPymbed 0:1387ff3eed4a 1421 #if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
sPymbed 0:1387ff3eed4a 1422
sPymbed 0:1387ff3eed4a 1423 #define _VPALIGNR(op1, op2, op3, op4) \
sPymbed 0:1387ff3eed4a 1424 "vpalignr $" #op4", %" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1425 #define VPALIGNR(op1, op2, op3, op4) \
sPymbed 0:1387ff3eed4a 1426 _VPALIGNR(op1, op2, op3, op4)
sPymbed 0:1387ff3eed4a 1427 #define _VPADDD(op1, op2, op3) \
sPymbed 0:1387ff3eed4a 1428 "vpaddd %" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1429 #define VPADDD(op1, op2, op3) \
sPymbed 0:1387ff3eed4a 1430 _VPADDD(op1, op2, op3)
sPymbed 0:1387ff3eed4a 1431 #define _VPSRLD(op1, op2, op3) \
sPymbed 0:1387ff3eed4a 1432 "vpsrld $" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1433 #define VPSRLD(op1, op2, op3) \
sPymbed 0:1387ff3eed4a 1434 _VPSRLD(op1, op2, op3)
sPymbed 0:1387ff3eed4a 1435 #define _VPSRLQ(op1, op2, op3) \
sPymbed 0:1387ff3eed4a 1436 "vpsrlq $" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1437 #define VPSRLQ(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1438 _VPSRLQ(op1,op2,op3)
sPymbed 0:1387ff3eed4a 1439 #define _VPSLLD(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1440 "vpslld $" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1441 #define VPSLLD(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1442 _VPSLLD(op1,op2,op3)
sPymbed 0:1387ff3eed4a 1443 #define _VPOR(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1444 "vpor %" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1445 #define VPOR(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1446 _VPOR(op1,op2,op3)
sPymbed 0:1387ff3eed4a 1447 #define _VPXOR(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1448 "vpxor %" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1449 #define VPXOR(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1450 _VPXOR(op1,op2,op3)
sPymbed 0:1387ff3eed4a 1451 #define _VPSHUFD(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1452 "vpshufd $" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1453 #define VPSHUFD(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1454 _VPSHUFD(op1,op2,op3)
sPymbed 0:1387ff3eed4a 1455 #define _VPSHUFB(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1456 "vpshufb %" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1457 #define VPSHUFB(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1458 _VPSHUFB(op1,op2,op3)
sPymbed 0:1387ff3eed4a 1459 #define _VPSLLDQ(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1460 "vpslldq $" #op3", %" #op2", %" #op1"\n\t"
sPymbed 0:1387ff3eed4a 1461 #define VPSLLDQ(op1,op2,op3) \
sPymbed 0:1387ff3eed4a 1462 _VPSLLDQ(op1,op2,op3)
sPymbed 0:1387ff3eed4a 1463
sPymbed 0:1387ff3eed4a 1464 #define MsgSched(X0,X1,X2,X3,a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1465 RND_STEP_0_1(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1466 VPALIGNR (XTMP1, X1, X0, 4) /* XTMP1 = W[-15] */ \
sPymbed 0:1387ff3eed4a 1467 VPALIGNR (XTMP0, X3, X2, 4) /* XTMP0 = W[-7] */ \
sPymbed 0:1387ff3eed4a 1468 RND_STEP_0_2(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1469 RND_STEP_0_3(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1470 VPSRLD (XTMP2, XTMP1, 7) /* XTMP2 = W[-15] >> 7 */ \
sPymbed 0:1387ff3eed4a 1471 VPSLLD (XTMP3, XTMP1, 25) /* XTEMP3 = W[-15] << (32-7) */ \
sPymbed 0:1387ff3eed4a 1472 RND_STEP_0_4(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1473 RND_STEP_0_5(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1474 VPSRLD (XTMP4, XTMP1, 18) /* XTEMP4 = W[-15] >> 18 */ \
sPymbed 0:1387ff3eed4a 1475 VPSLLD (XTMP5, XTMP1, 14) /* XTEMP5 = W[-15] << (32-18) */ \
sPymbed 0:1387ff3eed4a 1476 RND_STEP_0_6(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1477 RND_STEP_0_7(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1478 VPOR (XTMP2, XTMP3, XTMP2) /* XTMP2 = W[-15] >>> 7 */ \
sPymbed 0:1387ff3eed4a 1479 VPOR (XTMP4, XTMP5, XTMP4) /* XTMP4 = W[-15] >>> 18 */ \
sPymbed 0:1387ff3eed4a 1480 RND_STEP_0_8(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1481 RND_STEP_1_1(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1482 RND_STEP_1_2(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1483 VPSRLD (XTMP5, XTMP1, 3) /* XTMP4 = W[-15] >> 3 */ \
sPymbed 0:1387ff3eed4a 1484 VPXOR (XTMP2, XTMP4, XTMP2) \
sPymbed 0:1387ff3eed4a 1485 /* XTMP2 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */ \
sPymbed 0:1387ff3eed4a 1486 RND_STEP_1_3(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1487 RND_STEP_1_4(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1488 VPXOR (XTMP1, XTMP5, XTMP2) /* XTMP1 = s0 */ \
sPymbed 0:1387ff3eed4a 1489 VPSHUFD (XTMP2, X3, 0b11111010) /* XTMP2 = W[-2] {BBAA}*/ \
sPymbed 0:1387ff3eed4a 1490 RND_STEP_1_5(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1491 RND_STEP_1_6(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1492 VPSRLD (XTMP4, XTMP2, 10) /* XTMP4 = W[-2] >> 10 {BBAA} */ \
sPymbed 0:1387ff3eed4a 1493 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1494 RND_STEP_1_7(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1495 RND_STEP_1_8(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1496 RND_STEP_0_1(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1497 VPSRLQ (XTMP2, XTMP2, 17) /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1498 VPADDD (XTMP0, XTMP0, X0) \
sPymbed 0:1387ff3eed4a 1499 RND_STEP_0_2(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1500 RND_STEP_0_3(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1501 RND_STEP_0_4(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1502 VPXOR (XTMP2, XTMP3, XTMP2) \
sPymbed 0:1387ff3eed4a 1503 VPADDD (XTMP0, XTMP0, XTMP1) /* XTMP0 = W[-16] + W[-7] + s0 */ \
sPymbed 0:1387ff3eed4a 1504 RND_STEP_0_5(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1505 VPXOR (XTMP4, XTMP4, XTMP2) /* XTMP4 = s1 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1506 RND_STEP_0_6(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1507 VPSHUFB (XTMP4, XTMP4, SHUF_00BA) /* XTMP4 = s1 {00BA} */ \
sPymbed 0:1387ff3eed4a 1508 RND_STEP_0_7(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1509 VPADDD (XTMP0, XTMP0, XTMP4) /* XTMP0 = {..., ..., W[1], W[0]} */ \
sPymbed 0:1387ff3eed4a 1510 RND_STEP_0_8(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1511 RND_STEP_1_1(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1512 VPSHUFD (XTMP2, XTMP0, 0b01010000) /* XTMP2 = W[-2] {DDCC} */ \
sPymbed 0:1387ff3eed4a 1513 RND_STEP_1_2(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1514 VPSRLQ (XTMP4, XTMP2, 17) /* XTMP4 = W[-2] MY_ROR 17 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1515 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1516 RND_STEP_1_3(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1517 RND_STEP_1_4(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1518 VPSRLD (XTMP5, XTMP2, 10) /* XTMP5 = W[-2] >> 10 {DDCC} */ \
sPymbed 0:1387ff3eed4a 1519 VPXOR (XTMP4, XTMP3, XTMP4) \
sPymbed 0:1387ff3eed4a 1520 RND_STEP_1_5(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1521 RND_STEP_1_6(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1522 VPXOR (XTMP5, XTMP4, XTMP5) /* XTMP5 = s1 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1523 RND_STEP_1_7(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1524 VPSHUFB (XTMP5, XTMP5, SHUF_DC00) /* XTMP5 = s1 {DC00} */ \
sPymbed 0:1387ff3eed4a 1525 RND_STEP_1_8(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1526 VPADDD (X0, XTMP5, XTMP0) /* X0 = {W[3], W[2], W[1], W[0]} */
sPymbed 0:1387ff3eed4a 1527
sPymbed 0:1387ff3eed4a 1528 #if defined(HAVE_INTEL_RORX)
sPymbed 0:1387ff3eed4a 1529
sPymbed 0:1387ff3eed4a 1530 #define MsgSched_RORX(X0,X1,X2,X3,a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1531 RND_STEP_RORX_0_1(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1532 VPALIGNR (XTMP0, X3, X2, 4) \
sPymbed 0:1387ff3eed4a 1533 VPALIGNR (XTMP1, X1, X0, 4) /* XTMP1 = W[-15] */ \
sPymbed 0:1387ff3eed4a 1534 RND_STEP_RORX_0_2(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1535 RND_STEP_RORX_0_3(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1536 VPSRLD (XTMP2, XTMP1, 7) \
sPymbed 0:1387ff3eed4a 1537 VPSLLD (XTMP3, XTMP1, 25) /* VPSLLD (XTMP3, XTMP1, (32-7)) */ \
sPymbed 0:1387ff3eed4a 1538 RND_STEP_RORX_0_4(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1539 RND_STEP_RORX_0_5(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1540 VPSRLD (XTMP4, XTMP1, 3) /* XTMP4 = W[-15] >> 3 */ \
sPymbed 0:1387ff3eed4a 1541 VPOR (XTMP3, XTMP3, XTMP2) /* XTMP1 = W[-15] MY_ROR 7 */ \
sPymbed 0:1387ff3eed4a 1542 RND_STEP_RORX_0_6(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1543 RND_STEP_RORX_0_7(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1544 RND_STEP_RORX_0_8(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1545 \
sPymbed 0:1387ff3eed4a 1546 RND_STEP_RORX_1_1(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1547 VPSRLD (XTMP2, XTMP1,18) \
sPymbed 0:1387ff3eed4a 1548 RND_STEP_RORX_1_2(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1549 VPSLLD (XTMP1, XTMP1, 14) /* VPSLLD (XTMP1, XTMP1, (32-18)) */ \
sPymbed 0:1387ff3eed4a 1550 RND_STEP_RORX_1_3(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1551 VPXOR (XTMP3, XTMP3, XTMP1) \
sPymbed 0:1387ff3eed4a 1552 RND_STEP_RORX_1_4(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1553 VPXOR (XTMP3, XTMP3, XTMP2) \
sPymbed 0:1387ff3eed4a 1554 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */ \
sPymbed 0:1387ff3eed4a 1555 RND_STEP_RORX_1_5(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1556 VPSHUFD (XTMP2, X3, 0b11111010) /* XTMP2 = W[-2] {BBAA}*/ \
sPymbed 0:1387ff3eed4a 1557 RND_STEP_RORX_1_6(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1558 VPXOR (XTMP1, XTMP3, XTMP4) /* XTMP1 = s0 */ \
sPymbed 0:1387ff3eed4a 1559 RND_STEP_RORX_1_7(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1560 VPSRLD (XTMP4, XTMP2, 10) /* XTMP4 = W[-2] >> 10 {BBAA} */ \
sPymbed 0:1387ff3eed4a 1561 RND_STEP_RORX_1_8(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1562 \
sPymbed 0:1387ff3eed4a 1563 RND_STEP_RORX_0_1(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1564 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1565 RND_STEP_RORX_0_2(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1566 VPSRLQ (XTMP2, XTMP2, 17) /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1567 VPADDD (XTMP0, XTMP0, X0) \
sPymbed 0:1387ff3eed4a 1568 RND_STEP_RORX_0_3(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1569 VPADDD (XTMP0, XTMP0, XTMP1) /* XTMP0 = W[-16] + W[-7] + s0 */ \
sPymbed 0:1387ff3eed4a 1570 RND_STEP_RORX_0_4(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1571 VPXOR (XTMP2, XTMP2, XTMP3) \
sPymbed 0:1387ff3eed4a 1572 RND_STEP_RORX_0_5(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1573 VPXOR (XTMP4, XTMP4, XTMP2) /* XTMP4 = s1 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1574 RND_STEP_RORX_0_6(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1575 VPSHUFB (XTMP4, XTMP4, SHUF_00BA) /* XTMP4 = s1 {00BA} */ \
sPymbed 0:1387ff3eed4a 1576 RND_STEP_RORX_0_7(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1577 VPADDD (XTMP0, XTMP0, XTMP4) /* XTMP0 = {..., ..., W[1], W[0]} */ \
sPymbed 0:1387ff3eed4a 1578 RND_STEP_RORX_0_8(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1579 \
sPymbed 0:1387ff3eed4a 1580 RND_STEP_RORX_1_1(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1581 VPSHUFD (XTMP2, XTMP0, 0b01010000) /* XTMP2 = W[-2] {DDCC} */ \
sPymbed 0:1387ff3eed4a 1582 RND_STEP_RORX_1_2(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1583 VPSRLD (XTMP5, XTMP2, 10) /* XTMP5 = W[-2] >> 10 {DDCC} */ \
sPymbed 0:1387ff3eed4a 1584 RND_STEP_RORX_1_3(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1585 VPSRLQ (XTMP3, XTMP2, 19) /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1586 RND_STEP_RORX_1_4(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1587 VPSRLQ (XTMP2, XTMP2, 17) /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1588 RND_STEP_RORX_1_5(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1589 VPXOR (XTMP2, XTMP2, XTMP3) \
sPymbed 0:1387ff3eed4a 1590 RND_STEP_RORX_1_6(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1591 VPXOR (XTMP5, XTMP5, XTMP2) /* XTMP5 = s1 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1592 RND_STEP_RORX_1_7(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1593 VPSHUFB (XTMP5, XTMP5, SHUF_DC00) /* XTMP5 = s1 {DC00} */ \
sPymbed 0:1387ff3eed4a 1594 RND_STEP_RORX_1_8(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1595 VPADDD (X0, XTMP5, XTMP0) /* X0 = {W[3], W[2], W[1], W[0]} */
sPymbed 0:1387ff3eed4a 1596
sPymbed 0:1387ff3eed4a 1597 #endif /* HAVE_INTEL_RORX */
sPymbed 0:1387ff3eed4a 1598
sPymbed 0:1387ff3eed4a 1599
sPymbed 0:1387ff3eed4a 1600 #define _W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 1601 "# X0, X1, X2, X3 = W[0..15]\n\t" \
sPymbed 0:1387ff3eed4a 1602 "vmovdqu (%%rax), %" #X0 "\n\t" \
sPymbed 0:1387ff3eed4a 1603 "vmovdqu 16(%%rax), %" #X1 "\n\t" \
sPymbed 0:1387ff3eed4a 1604 VPSHUFB(X0, X0, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 1605 VPSHUFB(X1, X1, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 1606 "vmovdqu 32(%%rax), %" #X2 "\n\t" \
sPymbed 0:1387ff3eed4a 1607 "vmovdqu 48(%%rax), %" #X3 "\n\t" \
sPymbed 0:1387ff3eed4a 1608 VPSHUFB(X2, X2, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 1609 VPSHUFB(X3, X3, BYTE_FLIP_MASK)
sPymbed 0:1387ff3eed4a 1610
sPymbed 0:1387ff3eed4a 1611 #define W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 1612 _W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
sPymbed 0:1387ff3eed4a 1613
sPymbed 0:1387ff3eed4a 1614
sPymbed 0:1387ff3eed4a 1615 #define _SET_W_K_XFER_4(i) \
sPymbed 0:1387ff3eed4a 1616 "vpaddd (" #i "*4)+ 0+%[K], %%xmm0, %%xmm4\n\t" \
sPymbed 0:1387ff3eed4a 1617 "vpaddd (" #i "*4)+16+%[K], %%xmm1, %%xmm5\n\t" \
sPymbed 0:1387ff3eed4a 1618 "vmovdqu %%xmm4, (" WK ")\n\t" \
sPymbed 0:1387ff3eed4a 1619 "vmovdqu %%xmm5, 16(" WK ")\n\t" \
sPymbed 0:1387ff3eed4a 1620 "vpaddd (" #i "*4)+32+%[K], %%xmm2, %%xmm6\n\t" \
sPymbed 0:1387ff3eed4a 1621 "vpaddd (" #i "*4)+48+%[K], %%xmm3, %%xmm7\n\t" \
sPymbed 0:1387ff3eed4a 1622 "vmovdqu %%xmm6, 32(" WK ")\n\t" \
sPymbed 0:1387ff3eed4a 1623 "vmovdqu %%xmm7, 48(" WK ")\n\t"
sPymbed 0:1387ff3eed4a 1624
sPymbed 0:1387ff3eed4a 1625 #define SET_W_K_XFER_4(i) \
sPymbed 0:1387ff3eed4a 1626 _SET_W_K_XFER_4(i)
sPymbed 0:1387ff3eed4a 1627
sPymbed 0:1387ff3eed4a 1628
sPymbed 0:1387ff3eed4a 1629 static const ALIGN32 word64 mSHUF_00BA[] =
sPymbed 0:1387ff3eed4a 1630 { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
sPymbed 0:1387ff3eed4a 1631 static const ALIGN32 word64 mSHUF_DC00[] =
sPymbed 0:1387ff3eed4a 1632 { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
sPymbed 0:1387ff3eed4a 1633 static const ALIGN32 word64 mBYTE_FLIP_MASK[] =
sPymbed 0:1387ff3eed4a 1634 { 0x0405060700010203, 0x0c0d0e0f08090a0b };
sPymbed 0:1387ff3eed4a 1635
sPymbed 0:1387ff3eed4a 1636 #define _Init_Masks(mask1, mask2, mask3) \
sPymbed 0:1387ff3eed4a 1637 "vmovdqa %[FLIP], %" #mask1 "\n\t" \
sPymbed 0:1387ff3eed4a 1638 "vmovdqa %[SHUF00BA], %" #mask2 "\n\t" \
sPymbed 0:1387ff3eed4a 1639 "vmovdqa %[SHUFDC00], %" #mask3 "\n\t"
sPymbed 0:1387ff3eed4a 1640
sPymbed 0:1387ff3eed4a 1641 #define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
sPymbed 0:1387ff3eed4a 1642 _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
sPymbed 0:1387ff3eed4a 1643
sPymbed 0:1387ff3eed4a 1644 #define X0 %xmm0
sPymbed 0:1387ff3eed4a 1645 #define X1 %xmm1
sPymbed 0:1387ff3eed4a 1646 #define X2 %xmm2
sPymbed 0:1387ff3eed4a 1647 #define X3 %xmm3
sPymbed 0:1387ff3eed4a 1648
sPymbed 0:1387ff3eed4a 1649 #define XTMP0 %xmm4
sPymbed 0:1387ff3eed4a 1650 #define XTMP1 %xmm5
sPymbed 0:1387ff3eed4a 1651 #define XTMP2 %xmm6
sPymbed 0:1387ff3eed4a 1652 #define XTMP3 %xmm7
sPymbed 0:1387ff3eed4a 1653 #define XTMP4 %xmm8
sPymbed 0:1387ff3eed4a 1654 #define XTMP5 %xmm9
sPymbed 0:1387ff3eed4a 1655 #define XFER %xmm10
sPymbed 0:1387ff3eed4a 1656
sPymbed 0:1387ff3eed4a 1657 #define SHUF_00BA %xmm11 /* shuffle xBxA -> 00BA */
sPymbed 0:1387ff3eed4a 1658 #define SHUF_DC00 %xmm12 /* shuffle xDxC -> DC00 */
sPymbed 0:1387ff3eed4a 1659 #define BYTE_FLIP_MASK %xmm13
sPymbed 0:1387ff3eed4a 1660
sPymbed 0:1387ff3eed4a 1661
sPymbed 0:1387ff3eed4a 1662 SHA256_NOINLINE static int Transform_Sha256_AVX1(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 1663 {
sPymbed 0:1387ff3eed4a 1664 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 1665
sPymbed 0:1387ff3eed4a 1666 "subq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1667
sPymbed 0:1387ff3eed4a 1668 "leaq 32(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 1669 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
sPymbed 0:1387ff3eed4a 1670 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 1671
sPymbed 0:1387ff3eed4a 1672 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
sPymbed 0:1387ff3eed4a 1673
sPymbed 0:1387ff3eed4a 1674 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1675 "movl %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 1676 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1677
sPymbed 0:1387ff3eed4a 1678 SET_W_K_XFER_4(0)
sPymbed 0:1387ff3eed4a 1679 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1680 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1681 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1682 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1683
sPymbed 0:1387ff3eed4a 1684 SET_W_K_XFER_4(16)
sPymbed 0:1387ff3eed4a 1685 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1686 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1687 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1688 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1689
sPymbed 0:1387ff3eed4a 1690 SET_W_K_XFER_4(32)
sPymbed 0:1387ff3eed4a 1691 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1692 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1693 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1694 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1695
sPymbed 0:1387ff3eed4a 1696 SET_W_K_XFER_4(48)
sPymbed 0:1387ff3eed4a 1697 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1698 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1699 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1700 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1701
sPymbed 0:1387ff3eed4a 1702 STORE_ADD_DIGEST()
sPymbed 0:1387ff3eed4a 1703
sPymbed 0:1387ff3eed4a 1704 "addq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1705
sPymbed 0:1387ff3eed4a 1706 :
sPymbed 0:1387ff3eed4a 1707 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
sPymbed 0:1387ff3eed4a 1708 [SHUF00BA] "m" (mSHUF_00BA[0]),
sPymbed 0:1387ff3eed4a 1709 [SHUFDC00] "m" (mSHUF_DC00[0]),
sPymbed 0:1387ff3eed4a 1710 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 1711 [K] "m" (K)
sPymbed 0:1387ff3eed4a 1712 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 1713 );
sPymbed 0:1387ff3eed4a 1714
sPymbed 0:1387ff3eed4a 1715 return 0;
sPymbed 0:1387ff3eed4a 1716 }
sPymbed 0:1387ff3eed4a 1717
sPymbed 0:1387ff3eed4a 1718 SHA256_NOINLINE static int Transform_Sha256_AVX1_Len(wc_Sha256* sha256,
sPymbed 0:1387ff3eed4a 1719 word32 len)
sPymbed 0:1387ff3eed4a 1720 {
sPymbed 0:1387ff3eed4a 1721 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 1722
sPymbed 0:1387ff3eed4a 1723 "subq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1724 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 1725
sPymbed 0:1387ff3eed4a 1726 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
sPymbed 0:1387ff3eed4a 1727 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 1728
sPymbed 0:1387ff3eed4a 1729 "# Start of loop processing a block\n"
sPymbed 0:1387ff3eed4a 1730 "1:\n\t"
sPymbed 0:1387ff3eed4a 1731
sPymbed 0:1387ff3eed4a 1732 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
sPymbed 0:1387ff3eed4a 1733
sPymbed 0:1387ff3eed4a 1734 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1735 "movl %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 1736 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1737
sPymbed 0:1387ff3eed4a 1738 SET_W_K_XFER_4(0)
sPymbed 0:1387ff3eed4a 1739 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1740 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1741 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1742 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1743
sPymbed 0:1387ff3eed4a 1744 SET_W_K_XFER_4(16)
sPymbed 0:1387ff3eed4a 1745 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1746 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1747 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1748 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1749
sPymbed 0:1387ff3eed4a 1750 SET_W_K_XFER_4(32)
sPymbed 0:1387ff3eed4a 1751 MsgSched(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1752 MsgSched(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1753 MsgSched(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1754 MsgSched(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1755
sPymbed 0:1387ff3eed4a 1756 SET_W_K_XFER_4(48)
sPymbed 0:1387ff3eed4a 1757 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1758 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1759 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1760 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1761 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 1762
sPymbed 0:1387ff3eed4a 1763 ADD_DIGEST()
sPymbed 0:1387ff3eed4a 1764
sPymbed 0:1387ff3eed4a 1765 "addq $64, %%rax\n\t"
sPymbed 0:1387ff3eed4a 1766 "subl $64, %[len]\n\t"
sPymbed 0:1387ff3eed4a 1767
sPymbed 0:1387ff3eed4a 1768 STORE_DIGEST()
sPymbed 0:1387ff3eed4a 1769
sPymbed 0:1387ff3eed4a 1770 "movq %%rax, 120(%[sha256])\n\t"
sPymbed 0:1387ff3eed4a 1771 "jnz 1b\n\t"
sPymbed 0:1387ff3eed4a 1772
sPymbed 0:1387ff3eed4a 1773 "addq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1774
sPymbed 0:1387ff3eed4a 1775 :
sPymbed 0:1387ff3eed4a 1776 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
sPymbed 0:1387ff3eed4a 1777 [SHUF00BA] "m" (mSHUF_00BA[0]),
sPymbed 0:1387ff3eed4a 1778 [SHUFDC00] "m" (mSHUF_DC00[0]),
sPymbed 0:1387ff3eed4a 1779 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 1780 [len] "r" (len),
sPymbed 0:1387ff3eed4a 1781 [K] "m" (K)
sPymbed 0:1387ff3eed4a 1782 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 1783 );
sPymbed 0:1387ff3eed4a 1784
sPymbed 0:1387ff3eed4a 1785 return 0;
sPymbed 0:1387ff3eed4a 1786 }
sPymbed 0:1387ff3eed4a 1787 #endif /* HAVE_INTEL_AVX1 */
sPymbed 0:1387ff3eed4a 1788
sPymbed 0:1387ff3eed4a 1789 #if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
sPymbed 0:1387ff3eed4a 1790 SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 1791 {
sPymbed 0:1387ff3eed4a 1792 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 1793
sPymbed 0:1387ff3eed4a 1794 "subq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1795
sPymbed 0:1387ff3eed4a 1796 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
sPymbed 0:1387ff3eed4a 1797 "leaq 32(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 1798 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
sPymbed 0:1387ff3eed4a 1799
sPymbed 0:1387ff3eed4a 1800 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 1801
sPymbed 0:1387ff3eed4a 1802 SET_W_K_XFER_4(0)
sPymbed 0:1387ff3eed4a 1803 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1804 "rorx $6, %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 1805 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1806 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1807 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1808 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1809 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1810
sPymbed 0:1387ff3eed4a 1811 SET_W_K_XFER_4(16)
sPymbed 0:1387ff3eed4a 1812 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1813 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1814 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1815 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1816
sPymbed 0:1387ff3eed4a 1817 SET_W_K_XFER_4(32)
sPymbed 0:1387ff3eed4a 1818 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1819 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1820 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1821 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1822
sPymbed 0:1387ff3eed4a 1823 SET_W_K_XFER_4(48)
sPymbed 0:1387ff3eed4a 1824 "xorl " L3 ", " L3 "\n\t"
sPymbed 0:1387ff3eed4a 1825 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1826 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1827 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1828 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1829 /* Prev RND: h += Maj(a,b,c) */
sPymbed 0:1387ff3eed4a 1830 "addl " L3 ", %%r8d\n\t"
sPymbed 0:1387ff3eed4a 1831
sPymbed 0:1387ff3eed4a 1832 STORE_ADD_DIGEST()
sPymbed 0:1387ff3eed4a 1833
sPymbed 0:1387ff3eed4a 1834 "addq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1835
sPymbed 0:1387ff3eed4a 1836 :
sPymbed 0:1387ff3eed4a 1837 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
sPymbed 0:1387ff3eed4a 1838 [SHUF00BA] "m" (mSHUF_00BA[0]),
sPymbed 0:1387ff3eed4a 1839 [SHUFDC00] "m" (mSHUF_DC00[0]),
sPymbed 0:1387ff3eed4a 1840 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 1841 [K] "m" (K)
sPymbed 0:1387ff3eed4a 1842 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 1843 );
sPymbed 0:1387ff3eed4a 1844
sPymbed 0:1387ff3eed4a 1845 return 0;
sPymbed 0:1387ff3eed4a 1846 }
sPymbed 0:1387ff3eed4a 1847
sPymbed 0:1387ff3eed4a 1848 SHA256_NOINLINE static int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256,
sPymbed 0:1387ff3eed4a 1849 word32 len)
sPymbed 0:1387ff3eed4a 1850 {
sPymbed 0:1387ff3eed4a 1851 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 1852
sPymbed 0:1387ff3eed4a 1853 "subq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1854 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 1855
sPymbed 0:1387ff3eed4a 1856 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
sPymbed 0:1387ff3eed4a 1857 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 1858
sPymbed 0:1387ff3eed4a 1859 "# Start of loop processing a block\n"
sPymbed 0:1387ff3eed4a 1860 "1:\n\t"
sPymbed 0:1387ff3eed4a 1861
sPymbed 0:1387ff3eed4a 1862 W_K_from_buff(X0, X1, X2, X3, BYTE_FLIP_MASK)
sPymbed 0:1387ff3eed4a 1863
sPymbed 0:1387ff3eed4a 1864 SET_W_K_XFER_4(0)
sPymbed 0:1387ff3eed4a 1865 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1866 "rorx $6, %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 1867 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 1868 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1869 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1870 MsgSched_RORX(X2, X3, X0, X1, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
sPymbed 0:1387ff3eed4a 1871 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1872
sPymbed 0:1387ff3eed4a 1873 SET_W_K_XFER_4(16)
sPymbed 0:1387ff3eed4a 1874 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1875 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1876 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1877 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1878
sPymbed 0:1387ff3eed4a 1879 SET_W_K_XFER_4(32)
sPymbed 0:1387ff3eed4a 1880 MsgSched_RORX(X0, X1, X2, X3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1881 MsgSched_RORX(X1, X2, X3, X0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1882 MsgSched_RORX(X2, X3, X0, X1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1883 MsgSched_RORX(X3, X0, X1, X2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1884
sPymbed 0:1387ff3eed4a 1885 SET_W_K_XFER_4(48)
sPymbed 0:1387ff3eed4a 1886 "xorl " L3 ", " L3 "\n\t"
sPymbed 0:1387ff3eed4a 1887 "xorl " L2 ", " L2 "\n\t"
sPymbed 0:1387ff3eed4a 1888 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 1889 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 4)
sPymbed 0:1387ff3eed4a 1890 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 8)
sPymbed 0:1387ff3eed4a 1891 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 1892 /* Prev RND: h += Maj(a,b,c) */
sPymbed 0:1387ff3eed4a 1893 "addl " L3 ", %%r8d\n\t"
sPymbed 0:1387ff3eed4a 1894 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 1895
sPymbed 0:1387ff3eed4a 1896 ADD_DIGEST()
sPymbed 0:1387ff3eed4a 1897
sPymbed 0:1387ff3eed4a 1898 "addq $64, %%rax\n\t"
sPymbed 0:1387ff3eed4a 1899 "subl $64, %[len]\n\t"
sPymbed 0:1387ff3eed4a 1900
sPymbed 0:1387ff3eed4a 1901 STORE_DIGEST()
sPymbed 0:1387ff3eed4a 1902
sPymbed 0:1387ff3eed4a 1903 "movq %%rax, 120(%[sha256])\n\t"
sPymbed 0:1387ff3eed4a 1904 "jnz 1b\n\t"
sPymbed 0:1387ff3eed4a 1905
sPymbed 0:1387ff3eed4a 1906 "addq $64, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 1907
sPymbed 0:1387ff3eed4a 1908 :
sPymbed 0:1387ff3eed4a 1909 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
sPymbed 0:1387ff3eed4a 1910 [SHUF00BA] "m" (mSHUF_00BA[0]),
sPymbed 0:1387ff3eed4a 1911 [SHUFDC00] "m" (mSHUF_DC00[0]),
sPymbed 0:1387ff3eed4a 1912 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 1913 [len] "r" (len),
sPymbed 0:1387ff3eed4a 1914 [K] "m" (K)
sPymbed 0:1387ff3eed4a 1915 : WORK_REGS, STATE_REGS, XMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 1916 );
sPymbed 0:1387ff3eed4a 1917
sPymbed 0:1387ff3eed4a 1918 return 0;
sPymbed 0:1387ff3eed4a 1919 }
sPymbed 0:1387ff3eed4a 1920 #endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_RORX */
sPymbed 0:1387ff3eed4a 1921
sPymbed 0:1387ff3eed4a 1922
sPymbed 0:1387ff3eed4a 1923 #if defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 1924 #define Y0 %ymm0
sPymbed 0:1387ff3eed4a 1925 #define Y1 %ymm1
sPymbed 0:1387ff3eed4a 1926 #define Y2 %ymm2
sPymbed 0:1387ff3eed4a 1927 #define Y3 %ymm3
sPymbed 0:1387ff3eed4a 1928
sPymbed 0:1387ff3eed4a 1929 #define YTMP0 %ymm4
sPymbed 0:1387ff3eed4a 1930 #define YTMP1 %ymm5
sPymbed 0:1387ff3eed4a 1931 #define YTMP2 %ymm6
sPymbed 0:1387ff3eed4a 1932 #define YTMP3 %ymm7
sPymbed 0:1387ff3eed4a 1933 #define YTMP4 %ymm8
sPymbed 0:1387ff3eed4a 1934 #define YTMP5 %ymm9
sPymbed 0:1387ff3eed4a 1935 #define YXFER %ymm10
sPymbed 0:1387ff3eed4a 1936
sPymbed 0:1387ff3eed4a 1937 #define SHUF_Y_00BA %ymm11 /* shuffle xBxA -> 00BA */
sPymbed 0:1387ff3eed4a 1938 #define SHUF_Y_DC00 %ymm12 /* shuffle xDxC -> DC00 */
sPymbed 0:1387ff3eed4a 1939 #define BYTE_FLIP_Y_MASK %ymm13
sPymbed 0:1387ff3eed4a 1940
sPymbed 0:1387ff3eed4a 1941 #define YMM_REGS "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", \
sPymbed 0:1387ff3eed4a 1942 "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13"
sPymbed 0:1387ff3eed4a 1943
sPymbed 0:1387ff3eed4a 1944 #define MsgSched_Y(Y0,Y1,Y2,Y3,a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1945 RND_STEP_0_1(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1946 VPALIGNR (YTMP1, Y1, Y0, 4) /* YTMP1 = W[-15] */ \
sPymbed 0:1387ff3eed4a 1947 VPALIGNR (YTMP0, Y3, Y2, 4) /* YTMP0 = W[-7] */ \
sPymbed 0:1387ff3eed4a 1948 RND_STEP_0_2(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1949 RND_STEP_0_3(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1950 VPSRLD (YTMP2, YTMP1, 7) /* YTMP2 = W[-15] >> 7 */ \
sPymbed 0:1387ff3eed4a 1951 VPSLLD (YTMP3, YTMP1, 25) /* YTEMP3 = W[-15] << (32-7) */ \
sPymbed 0:1387ff3eed4a 1952 RND_STEP_0_4(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1953 RND_STEP_0_5(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1954 VPSRLD (YTMP4, YTMP1, 18) /* YTEMP4 = W[-15] >> 18 */ \
sPymbed 0:1387ff3eed4a 1955 VPSLLD (YTMP5, YTMP1, 14) /* YTEMP5 = W[-15] << (32-18) */ \
sPymbed 0:1387ff3eed4a 1956 RND_STEP_0_6(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1957 RND_STEP_0_7(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1958 VPOR (YTMP2, YTMP3, YTMP2) /* YTMP2 = W[-15] >>> 7 */ \
sPymbed 0:1387ff3eed4a 1959 VPOR (YTMP4, YTMP5, YTMP4) /* YTMP4 = W[-15] >>> 18 */ \
sPymbed 0:1387ff3eed4a 1960 RND_STEP_0_8(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 1961 RND_STEP_1_1(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1962 RND_STEP_1_2(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1963 VPSRLD (YTMP5, YTMP1, 3) /* YTMP4 = W[-15] >> 3 */ \
sPymbed 0:1387ff3eed4a 1964 VPXOR (YTMP2, YTMP4, YTMP2) /* YTMP2 = W[-15] >>> 7 ^ W[-15] >>> 18 */ \
sPymbed 0:1387ff3eed4a 1965 RND_STEP_1_3(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1966 RND_STEP_1_4(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1967 VPXOR (YTMP1, YTMP5, YTMP2) /* YTMP1 = s0 */ \
sPymbed 0:1387ff3eed4a 1968 VPSHUFD (YTMP2, Y3, 0b11111010) /* YTMP2 = W[-2] {BBAA}*/ \
sPymbed 0:1387ff3eed4a 1969 RND_STEP_1_5(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1970 RND_STEP_1_6(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1971 VPSRLD (YTMP4, YTMP2, 10) /* YTMP4 = W[-2] >> 10 {BBAA} */ \
sPymbed 0:1387ff3eed4a 1972 VPSRLQ (YTMP3, YTMP2, 19) /* YTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1973 RND_STEP_1_7(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1974 RND_STEP_1_8(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 1975 RND_STEP_0_1(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1976 VPSRLQ (YTMP2, YTMP2, 17) /* YTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1977 VPADDD (YTMP0, YTMP0, Y0) \
sPymbed 0:1387ff3eed4a 1978 RND_STEP_0_2(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1979 RND_STEP_0_3(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1980 RND_STEP_0_4(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1981 VPXOR (YTMP2, YTMP3, YTMP2) \
sPymbed 0:1387ff3eed4a 1982 VPADDD (YTMP0, YTMP0, YTMP1) /* YTMP0 = W[-16] + W[-7] + s0 */ \
sPymbed 0:1387ff3eed4a 1983 RND_STEP_0_5(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1984 VPXOR (YTMP4, YTMP4, YTMP2) /* YTMP4 = s1 {xBxA} */ \
sPymbed 0:1387ff3eed4a 1985 RND_STEP_0_6(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1986 VPSHUFB (YTMP4, YTMP4, SHUF_Y_00BA) /* YTMP4 = s1 {00BA} */ \
sPymbed 0:1387ff3eed4a 1987 RND_STEP_0_7(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1988 VPADDD (YTMP0, YTMP0, YTMP4) /* YTMP0 = {..., ..., W[1], W[0]} */ \
sPymbed 0:1387ff3eed4a 1989 RND_STEP_0_8(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 1990 RND_STEP_1_1(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1991 VPSHUFD (YTMP2, YTMP0, 0b01010000) /* YTMP2 = W[-2] {DDCC} */ \
sPymbed 0:1387ff3eed4a 1992 RND_STEP_1_2(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1993 VPSRLQ (YTMP4, YTMP2, 17) /* YTMP4 = W[-2] MY_ROR 17 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1994 VPSRLQ (YTMP3, YTMP2, 19) /* YTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
sPymbed 0:1387ff3eed4a 1995 RND_STEP_1_3(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1996 RND_STEP_1_4(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 1997 VPSRLD (YTMP5, YTMP2, 10) /* YTMP5 = W[-2] >> 10 {DDCC} */ \
sPymbed 0:1387ff3eed4a 1998 VPXOR (YTMP4, YTMP3, YTMP4) \
sPymbed 0:1387ff3eed4a 1999 RND_STEP_1_5(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2000 RND_STEP_1_6(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2001 VPXOR (YTMP5, YTMP4, YTMP5) /* YTMP5 = s1 {xDxC} */ \
sPymbed 0:1387ff3eed4a 2002 RND_STEP_1_7(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2003 VPSHUFB (YTMP5, YTMP5, SHUF_Y_DC00) /* YTMP5 = s1 {DC00} */ \
sPymbed 0:1387ff3eed4a 2004 RND_STEP_1_8(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2005 VPADDD (Y0, YTMP5, YTMP0) /* Y0 = {W[3], W[2], W[1], W[0]} */
sPymbed 0:1387ff3eed4a 2006
sPymbed 0:1387ff3eed4a 2007 #if defined(HAVE_INTEL_RORX)
sPymbed 0:1387ff3eed4a 2008
sPymbed 0:1387ff3eed4a 2009 #define MsgSched_Y_RORX(Y0,Y1,Y2,Y3,a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2010 RND_STEP_RORX_0_1(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2011 VPALIGNR (YTMP1, Y1, Y0, 4) /* YTMP1 = W[-15] */ \
sPymbed 0:1387ff3eed4a 2012 RND_STEP_RORX_0_2(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2013 VPALIGNR (YTMP0, Y3, Y2, 4) /* YTMP0 = W[-7] */ \
sPymbed 0:1387ff3eed4a 2014 RND_STEP_RORX_0_3(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2015 VPSRLD (YTMP2, YTMP1, 7) /* YTMP2 = W[-15] >> 7 */ \
sPymbed 0:1387ff3eed4a 2016 RND_STEP_RORX_0_4(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2017 VPSLLD (YTMP3, YTMP1, 25) /* YTEMP3 = W[-15] << (32-7) */ \
sPymbed 0:1387ff3eed4a 2018 RND_STEP_RORX_0_5(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2019 VPSRLD (YTMP4, YTMP1, 18) /* YTEMP4 = W[-15] >> 18 */ \
sPymbed 0:1387ff3eed4a 2020 RND_STEP_RORX_0_6(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2021 VPSLLD (YTMP5, YTMP1, 14) /* YTEMP5 = W[-15] << (32-18) */ \
sPymbed 0:1387ff3eed4a 2022 RND_STEP_RORX_0_7(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2023 VPOR (YTMP2, YTMP2, YTMP3) /* YTMP2 = W[-15] >>> 7 */ \
sPymbed 0:1387ff3eed4a 2024 RND_STEP_RORX_0_8(a,b,c,d,e,f,g,h,_i) \
sPymbed 0:1387ff3eed4a 2025 VPOR (YTMP4, YTMP4, YTMP5) /* YTMP4 = W[-15] >>> 18 */ \
sPymbed 0:1387ff3eed4a 2026 RND_STEP_RORX_1_1(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2027 VPSRLD (YTMP5, YTMP1, 3) /* YTMP4 = W[-15] >> 3 */ \
sPymbed 0:1387ff3eed4a 2028 RND_STEP_RORX_1_2(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2029 VPXOR (YTMP2, YTMP2, YTMP4) /* YTMP2 = W[-15] >>> 7 ^ W[-15] >>> 18 */ \
sPymbed 0:1387ff3eed4a 2030 RND_STEP_RORX_1_3(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2031 VPSHUFD (YTMP3, Y3, 0b11111010) /* YTMP2 = W[-2] {BBAA}*/ \
sPymbed 0:1387ff3eed4a 2032 RND_STEP_RORX_1_4(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2033 VPXOR (YTMP1, YTMP5, YTMP2) /* YTMP1 = s0 */ \
sPymbed 0:1387ff3eed4a 2034 RND_STEP_RORX_1_5(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2035 VPSRLD (YTMP4, YTMP3, 10) /* YTMP4 = W[-2] >> 10 {BBAA} */ \
sPymbed 0:1387ff3eed4a 2036 RND_STEP_RORX_1_6(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2037 VPSRLQ (YTMP2, YTMP3, 19) /* YTMP3 = W[-2] MY_ROR 19 {xBxA} */ \
sPymbed 0:1387ff3eed4a 2038 RND_STEP_RORX_1_7(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2039 VPSRLQ (YTMP3, YTMP3, 17) /* YTMP2 = W[-2] MY_ROR 17 {xBxA} */ \
sPymbed 0:1387ff3eed4a 2040 RND_STEP_RORX_1_8(h,a,b,c,d,e,f,g,_i+1) \
sPymbed 0:1387ff3eed4a 2041 VPADDD (YTMP0, YTMP0, Y0) \
sPymbed 0:1387ff3eed4a 2042 RND_STEP_RORX_0_1(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2043 VPXOR (YTMP2, YTMP2, YTMP3) \
sPymbed 0:1387ff3eed4a 2044 RND_STEP_RORX_0_2(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2045 VPXOR (YTMP4, YTMP4, YTMP2) /* YTMP4 = s1 {xBxA} */ \
sPymbed 0:1387ff3eed4a 2046 RND_STEP_RORX_0_3(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2047 VPADDD (YTMP0, YTMP0, YTMP1) /* YTMP0 = W[-16] + W[-7] + s0 */ \
sPymbed 0:1387ff3eed4a 2048 RND_STEP_RORX_0_4(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2049 VPSHUFB (YTMP4, YTMP4, SHUF_Y_00BA) /* YTMP4 = s1 {00BA} */ \
sPymbed 0:1387ff3eed4a 2050 RND_STEP_RORX_0_5(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2051 VPADDD (YTMP0, YTMP0, YTMP4) /* YTMP0 = {..., ..., W[1], W[0]} */ \
sPymbed 0:1387ff3eed4a 2052 RND_STEP_RORX_0_6(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2053 VPSHUFD (YTMP2, YTMP0, 0b01010000) /* YTMP2 = W[-2] {DDCC} */ \
sPymbed 0:1387ff3eed4a 2054 RND_STEP_RORX_0_7(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2055 RND_STEP_RORX_0_8(g,h,a,b,c,d,e,f,_i+2) \
sPymbed 0:1387ff3eed4a 2056 VPSRLQ (YTMP4, YTMP2, 17) /* YTMP4 = W[-2] MY_ROR 17 {xDxC} */ \
sPymbed 0:1387ff3eed4a 2057 RND_STEP_RORX_1_1(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2058 VPSRLQ (YTMP3, YTMP2, 19) /* YTMP3 = W[-2] MY_ROR 19 {xDxC} */ \
sPymbed 0:1387ff3eed4a 2059 RND_STEP_RORX_1_2(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2060 VPSRLD (YTMP5, YTMP2, 10) /* YTMP5 = W[-2] >> 10 {DDCC} */ \
sPymbed 0:1387ff3eed4a 2061 RND_STEP_RORX_1_3(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2062 VPXOR (YTMP4, YTMP4, YTMP3) \
sPymbed 0:1387ff3eed4a 2063 RND_STEP_RORX_1_4(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2064 VPXOR (YTMP5, YTMP5, YTMP4) /* YTMP5 = s1 {xDxC} */ \
sPymbed 0:1387ff3eed4a 2065 RND_STEP_RORX_1_5(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2066 RND_STEP_RORX_1_6(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2067 VPSHUFB (YTMP5, YTMP5, SHUF_Y_DC00) /* YTMP5 = s1 {DC00} */ \
sPymbed 0:1387ff3eed4a 2068 RND_STEP_RORX_1_7(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2069 RND_STEP_RORX_1_8(f,g,h,a,b,c,d,e,_i+3) \
sPymbed 0:1387ff3eed4a 2070 VPADDD (Y0, YTMP5, YTMP0) /* Y0 = {W[3], W[2], W[1], W[0]} */ \
sPymbed 0:1387ff3eed4a 2071
sPymbed 0:1387ff3eed4a 2072 #endif /* HAVE_INTEL_RORX */
sPymbed 0:1387ff3eed4a 2073
sPymbed 0:1387ff3eed4a 2074 #define _VINSERTI128(op1,op2,op3,op4) \
sPymbed 0:1387ff3eed4a 2075 "vinserti128 $" #op4 ", %" #op3 ", %" #op2 ", %" #op1 "\n\t"
sPymbed 0:1387ff3eed4a 2076 #define VINSERTI128(op1,op2,op3,op4) \
sPymbed 0:1387ff3eed4a 2077 _VINSERTI128(op1,op2,op3,op4)
sPymbed 0:1387ff3eed4a 2078
sPymbed 0:1387ff3eed4a 2079
sPymbed 0:1387ff3eed4a 2080 #define _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \
sPymbed 0:1387ff3eed4a 2081 "# X0, X1, X2, X3 = W[0..15]\n\t" \
sPymbed 0:1387ff3eed4a 2082 "vmovdqu (%%" #reg "), %%xmm0\n\t" \
sPymbed 0:1387ff3eed4a 2083 "vmovdqu 16(%%" #reg "), %%xmm1\n\t" \
sPymbed 0:1387ff3eed4a 2084 VPSHUFB(X0, X0, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 2085 VPSHUFB(X1, X1, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 2086 "vmovdqu 32(%%" #reg "), %%xmm2\n\t" \
sPymbed 0:1387ff3eed4a 2087 "vmovdqu 48(%%" #reg "), %%xmm3\n\t" \
sPymbed 0:1387ff3eed4a 2088 VPSHUFB(X2, X2, BYTE_FLIP_MASK) \
sPymbed 0:1387ff3eed4a 2089 VPSHUFB(X3, X3, BYTE_FLIP_MASK)
sPymbed 0:1387ff3eed4a 2090
sPymbed 0:1387ff3eed4a 2091 #define LOAD_W_K_LOW(BYTE_FLIP_MASK, reg) \
sPymbed 0:1387ff3eed4a 2092 _LOAD_W_K_LOW(BYTE_FLIP_MASK, reg)
sPymbed 0:1387ff3eed4a 2093
sPymbed 0:1387ff3eed4a 2094
sPymbed 0:1387ff3eed4a 2095 #define _LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \
sPymbed 0:1387ff3eed4a 2096 "# X0, X1, X2, X3 = W[0..15]\n\t" \
sPymbed 0:1387ff3eed4a 2097 "vmovdqu (%%" #reg "), %%xmm0\n\t" \
sPymbed 0:1387ff3eed4a 2098 "vmovdqu 16(%%" #reg "), %%xmm1\n\t" \
sPymbed 0:1387ff3eed4a 2099 "vmovdqu 64(%%" #reg "), %%xmm4\n\t" \
sPymbed 0:1387ff3eed4a 2100 "vmovdqu 80(%%" #reg "), %%xmm5\n\t" \
sPymbed 0:1387ff3eed4a 2101 VINSERTI128(Y0, Y0, XTMP0, 1) \
sPymbed 0:1387ff3eed4a 2102 VINSERTI128(Y1, Y1, XTMP1, 1) \
sPymbed 0:1387ff3eed4a 2103 VPSHUFB(Y0, Y0, BYTE_FLIP_Y_MASK) \
sPymbed 0:1387ff3eed4a 2104 VPSHUFB(Y1, Y1, BYTE_FLIP_Y_MASK) \
sPymbed 0:1387ff3eed4a 2105 "vmovdqu 32(%%" #reg "), %%xmm2\n\t" \
sPymbed 0:1387ff3eed4a 2106 "vmovdqu 48(%%" #reg "), %%xmm3\n\t" \
sPymbed 0:1387ff3eed4a 2107 "vmovdqu 96(%%" #reg "), %%xmm6\n\t" \
sPymbed 0:1387ff3eed4a 2108 "vmovdqu 112(%%" #reg "), %%xmm7\n\t" \
sPymbed 0:1387ff3eed4a 2109 VINSERTI128(Y2, Y2, XTMP2, 1) \
sPymbed 0:1387ff3eed4a 2110 VINSERTI128(Y3, Y3, XTMP3, 1) \
sPymbed 0:1387ff3eed4a 2111 VPSHUFB(Y2, Y2, BYTE_FLIP_Y_MASK) \
sPymbed 0:1387ff3eed4a 2112 VPSHUFB(Y3, Y3, BYTE_FLIP_Y_MASK)
sPymbed 0:1387ff3eed4a 2113
sPymbed 0:1387ff3eed4a 2114 #define LOAD_W_K(BYTE_FLIP_Y_MASK, reg) \
sPymbed 0:1387ff3eed4a 2115 _LOAD_W_K(BYTE_FLIP_Y_MASK, reg)
sPymbed 0:1387ff3eed4a 2116
sPymbed 0:1387ff3eed4a 2117
sPymbed 0:1387ff3eed4a 2118 #define _SET_W_Y_4(i) \
sPymbed 0:1387ff3eed4a 2119 "vpaddd (" #i "*8)+ 0+%[K], %%ymm0, %%ymm4\n\t" \
sPymbed 0:1387ff3eed4a 2120 "vpaddd (" #i "*8)+32+%[K], %%ymm1, %%ymm5\n\t" \
sPymbed 0:1387ff3eed4a 2121 "vmovdqu %%ymm4, (" #i "*8)+ 0(" WK ")\n\t" \
sPymbed 0:1387ff3eed4a 2122 "vmovdqu %%ymm5, (" #i "*8)+32(" WK ")\n\t" \
sPymbed 0:1387ff3eed4a 2123 "vpaddd (" #i "*8)+64+%[K], %%ymm2, %%ymm4\n\t" \
sPymbed 0:1387ff3eed4a 2124 "vpaddd (" #i "*8)+96+%[K], %%ymm3, %%ymm5\n\t" \
sPymbed 0:1387ff3eed4a 2125 "vmovdqu %%ymm4, (" #i "*8)+64(" WK ")\n\t" \
sPymbed 0:1387ff3eed4a 2126 "vmovdqu %%ymm5, (" #i "*8)+96(" WK ")\n\t"
sPymbed 0:1387ff3eed4a 2127
sPymbed 0:1387ff3eed4a 2128 #define SET_W_Y_4(i) \
sPymbed 0:1387ff3eed4a 2129 _SET_W_Y_4(i)
sPymbed 0:1387ff3eed4a 2130
sPymbed 0:1387ff3eed4a 2131
sPymbed 0:1387ff3eed4a 2132 static const ALIGN32 word64 mSHUF_Y_00BA[] =
sPymbed 0:1387ff3eed4a 2133 { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF,
sPymbed 0:1387ff3eed4a 2134 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
sPymbed 0:1387ff3eed4a 2135 static const ALIGN32 word64 mSHUF_Y_DC00[] =
sPymbed 0:1387ff3eed4a 2136 { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100,
sPymbed 0:1387ff3eed4a 2137 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
sPymbed 0:1387ff3eed4a 2138 static const ALIGN32 word64 mBYTE_FLIP_Y_MASK[] =
sPymbed 0:1387ff3eed4a 2139 { 0x0405060700010203, 0x0c0d0e0f08090a0b,
sPymbed 0:1387ff3eed4a 2140 0x0405060700010203, 0x0c0d0e0f08090a0b };
sPymbed 0:1387ff3eed4a 2141
sPymbed 0:1387ff3eed4a 2142 #define _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
sPymbed 0:1387ff3eed4a 2143 "vmovdqa %[FLIP], %" #BYTE_FLIP_MASK "\n\t" \
sPymbed 0:1387ff3eed4a 2144 "vmovdqa %[SHUF00BA], %" #SHUF_00BA "\n\t" \
sPymbed 0:1387ff3eed4a 2145 "vmovdqa %[SHUFDC00], %" #SHUF_DC00 "\n\t"
sPymbed 0:1387ff3eed4a 2146
sPymbed 0:1387ff3eed4a 2147 #define INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) \
sPymbed 0:1387ff3eed4a 2148 _INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
sPymbed 0:1387ff3eed4a 2149
sPymbed 0:1387ff3eed4a 2150 static const ALIGN32 word32 K256[128] = {
sPymbed 0:1387ff3eed4a 2151 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L,
sPymbed 0:1387ff3eed4a 2152 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L,
sPymbed 0:1387ff3eed4a 2153 0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L,
sPymbed 0:1387ff3eed4a 2154 0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L,
sPymbed 0:1387ff3eed4a 2155 0xD807AA98L, 0x12835B01L, 0x243185BEL, 0x550C7DC3L,
sPymbed 0:1387ff3eed4a 2156 0xD807AA98L, 0x12835B01L, 0x243185BEL, 0x550C7DC3L,
sPymbed 0:1387ff3eed4a 2157 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, 0xC19BF174L,
sPymbed 0:1387ff3eed4a 2158 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, 0xC19BF174L,
sPymbed 0:1387ff3eed4a 2159 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
sPymbed 0:1387ff3eed4a 2160 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
sPymbed 0:1387ff3eed4a 2161 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL,
sPymbed 0:1387ff3eed4a 2162 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL,
sPymbed 0:1387ff3eed4a 2163 0x983E5152L, 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L,
sPymbed 0:1387ff3eed4a 2164 0x983E5152L, 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L,
sPymbed 0:1387ff3eed4a 2165 0xC6E00BF3L, 0xD5A79147L, 0x06CA6351L, 0x14292967L,
sPymbed 0:1387ff3eed4a 2166 0xC6E00BF3L, 0xD5A79147L, 0x06CA6351L, 0x14292967L,
sPymbed 0:1387ff3eed4a 2167 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, 0x53380D13L,
sPymbed 0:1387ff3eed4a 2168 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL, 0x53380D13L,
sPymbed 0:1387ff3eed4a 2169 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
sPymbed 0:1387ff3eed4a 2170 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
sPymbed 0:1387ff3eed4a 2171 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L,
sPymbed 0:1387ff3eed4a 2172 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L,
sPymbed 0:1387ff3eed4a 2173 0xD192E819L, 0xD6990624L, 0xF40E3585L, 0x106AA070L,
sPymbed 0:1387ff3eed4a 2174 0xD192E819L, 0xD6990624L, 0xF40E3585L, 0x106AA070L,
sPymbed 0:1387ff3eed4a 2175 0x19A4C116L, 0x1E376C08L, 0x2748774CL, 0x34B0BCB5L,
sPymbed 0:1387ff3eed4a 2176 0x19A4C116L, 0x1E376C08L, 0x2748774CL, 0x34B0BCB5L,
sPymbed 0:1387ff3eed4a 2177 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, 0x682E6FF3L,
sPymbed 0:1387ff3eed4a 2178 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL, 0x682E6FF3L,
sPymbed 0:1387ff3eed4a 2179 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
sPymbed 0:1387ff3eed4a 2180 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
sPymbed 0:1387ff3eed4a 2181 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L,
sPymbed 0:1387ff3eed4a 2182 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
sPymbed 0:1387ff3eed4a 2183 };
sPymbed 0:1387ff3eed4a 2184
sPymbed 0:1387ff3eed4a 2185 SHA256_NOINLINE static int Transform_Sha256_AVX2(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 2186 {
sPymbed 0:1387ff3eed4a 2187 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 2188
sPymbed 0:1387ff3eed4a 2189 "subq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2190 "leaq 32(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 2191
sPymbed 0:1387ff3eed4a 2192 INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
sPymbed 0:1387ff3eed4a 2193 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 2194
sPymbed 0:1387ff3eed4a 2195 LOAD_W_K_LOW(BYTE_FLIP_MASK, rax)
sPymbed 0:1387ff3eed4a 2196
sPymbed 0:1387ff3eed4a 2197 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2198 "movl %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 2199 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2200
sPymbed 0:1387ff3eed4a 2201 SET_W_Y_4(0)
sPymbed 0:1387ff3eed4a 2202 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 2203 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
sPymbed 0:1387ff3eed4a 2204 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
sPymbed 0:1387ff3eed4a 2205 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
sPymbed 0:1387ff3eed4a 2206
sPymbed 0:1387ff3eed4a 2207 SET_W_Y_4(16)
sPymbed 0:1387ff3eed4a 2208 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
sPymbed 0:1387ff3eed4a 2209 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
sPymbed 0:1387ff3eed4a 2210 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
sPymbed 0:1387ff3eed4a 2211 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
sPymbed 0:1387ff3eed4a 2212
sPymbed 0:1387ff3eed4a 2213 SET_W_Y_4(32)
sPymbed 0:1387ff3eed4a 2214 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
sPymbed 0:1387ff3eed4a 2215 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
sPymbed 0:1387ff3eed4a 2216 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
sPymbed 0:1387ff3eed4a 2217 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
sPymbed 0:1387ff3eed4a 2218
sPymbed 0:1387ff3eed4a 2219 SET_W_Y_4(48)
sPymbed 0:1387ff3eed4a 2220 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
sPymbed 0:1387ff3eed4a 2221 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
sPymbed 0:1387ff3eed4a 2222 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
sPymbed 0:1387ff3eed4a 2223 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
sPymbed 0:1387ff3eed4a 2224
sPymbed 0:1387ff3eed4a 2225 STORE_ADD_DIGEST()
sPymbed 0:1387ff3eed4a 2226
sPymbed 0:1387ff3eed4a 2227 "addq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2228
sPymbed 0:1387ff3eed4a 2229 :
sPymbed 0:1387ff3eed4a 2230 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
sPymbed 0:1387ff3eed4a 2231 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
sPymbed 0:1387ff3eed4a 2232 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
sPymbed 0:1387ff3eed4a 2233 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 2234 [K] "m" (K256)
sPymbed 0:1387ff3eed4a 2235 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 2236 );
sPymbed 0:1387ff3eed4a 2237
sPymbed 0:1387ff3eed4a 2238 return 0;
sPymbed 0:1387ff3eed4a 2239 }
sPymbed 0:1387ff3eed4a 2240
sPymbed 0:1387ff3eed4a 2241 SHA256_NOINLINE static int Transform_Sha256_AVX2_Len(wc_Sha256* sha256,
sPymbed 0:1387ff3eed4a 2242 word32 len)
sPymbed 0:1387ff3eed4a 2243 {
sPymbed 0:1387ff3eed4a 2244 if ((len & WC_SHA256_BLOCK_SIZE) != 0) {
sPymbed 0:1387ff3eed4a 2245 XMEMCPY(sha256->buffer, sha256->data, WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 2246 Transform_Sha256_AVX2(sha256);
sPymbed 0:1387ff3eed4a 2247 sha256->data += WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 2248 len -= WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 2249 if (len == 0)
sPymbed 0:1387ff3eed4a 2250 return 0;
sPymbed 0:1387ff3eed4a 2251 }
sPymbed 0:1387ff3eed4a 2252
sPymbed 0:1387ff3eed4a 2253 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 2254
sPymbed 0:1387ff3eed4a 2255 "subq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2256 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 2257
sPymbed 0:1387ff3eed4a 2258 INIT_MASKS_Y(BYTE_FLIP_Y_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
sPymbed 0:1387ff3eed4a 2259 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 2260
sPymbed 0:1387ff3eed4a 2261 "# Start of loop processing two blocks\n"
sPymbed 0:1387ff3eed4a 2262 "1:\n\t"
sPymbed 0:1387ff3eed4a 2263
sPymbed 0:1387ff3eed4a 2264 LOAD_W_K(BYTE_FLIP_Y_MASK, rax)
sPymbed 0:1387ff3eed4a 2265
sPymbed 0:1387ff3eed4a 2266 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2267 "movl %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 2268 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2269
sPymbed 0:1387ff3eed4a 2270 SET_W_Y_4(0)
sPymbed 0:1387ff3eed4a 2271 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 2272 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
sPymbed 0:1387ff3eed4a 2273 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
sPymbed 0:1387ff3eed4a 2274 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
sPymbed 0:1387ff3eed4a 2275
sPymbed 0:1387ff3eed4a 2276 SET_W_Y_4(16)
sPymbed 0:1387ff3eed4a 2277 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
sPymbed 0:1387ff3eed4a 2278 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
sPymbed 0:1387ff3eed4a 2279 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
sPymbed 0:1387ff3eed4a 2280 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
sPymbed 0:1387ff3eed4a 2281
sPymbed 0:1387ff3eed4a 2282 SET_W_Y_4(32)
sPymbed 0:1387ff3eed4a 2283 MsgSched_Y(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
sPymbed 0:1387ff3eed4a 2284 MsgSched_Y(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
sPymbed 0:1387ff3eed4a 2285 MsgSched_Y(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
sPymbed 0:1387ff3eed4a 2286 MsgSched_Y(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
sPymbed 0:1387ff3eed4a 2287
sPymbed 0:1387ff3eed4a 2288 SET_W_Y_4(48)
sPymbed 0:1387ff3eed4a 2289 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
sPymbed 0:1387ff3eed4a 2290 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
sPymbed 0:1387ff3eed4a 2291 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
sPymbed 0:1387ff3eed4a 2292 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
sPymbed 0:1387ff3eed4a 2293
sPymbed 0:1387ff3eed4a 2294 ADD_DIGEST()
sPymbed 0:1387ff3eed4a 2295 STORE_DIGEST()
sPymbed 0:1387ff3eed4a 2296
sPymbed 0:1387ff3eed4a 2297 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2298 "movl %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 2299 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2300
sPymbed 0:1387ff3eed4a 2301 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 4)
sPymbed 0:1387ff3eed4a 2302 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 2303 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 20)
sPymbed 0:1387ff3eed4a 2304 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 28)
sPymbed 0:1387ff3eed4a 2305 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 36)
sPymbed 0:1387ff3eed4a 2306 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 44)
sPymbed 0:1387ff3eed4a 2307 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 52)
sPymbed 0:1387ff3eed4a 2308 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 60)
sPymbed 0:1387ff3eed4a 2309 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 68)
sPymbed 0:1387ff3eed4a 2310 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 76)
sPymbed 0:1387ff3eed4a 2311 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 84)
sPymbed 0:1387ff3eed4a 2312 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 92)
sPymbed 0:1387ff3eed4a 2313 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 100)
sPymbed 0:1387ff3eed4a 2314 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 108)
sPymbed 0:1387ff3eed4a 2315 RND_ALL_4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 116)
sPymbed 0:1387ff3eed4a 2316 RND_ALL_4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 124)
sPymbed 0:1387ff3eed4a 2317
sPymbed 0:1387ff3eed4a 2318 ADD_DIGEST()
sPymbed 0:1387ff3eed4a 2319
sPymbed 0:1387ff3eed4a 2320 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 2321 "addq $128, %%rax\n\t"
sPymbed 0:1387ff3eed4a 2322 "subl $128, %[len]\n\t"
sPymbed 0:1387ff3eed4a 2323
sPymbed 0:1387ff3eed4a 2324 STORE_DIGEST()
sPymbed 0:1387ff3eed4a 2325
sPymbed 0:1387ff3eed4a 2326 "movq %%rax, 120(%[sha256])\n\t"
sPymbed 0:1387ff3eed4a 2327 "jnz 1b\n\t"
sPymbed 0:1387ff3eed4a 2328
sPymbed 0:1387ff3eed4a 2329 "addq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2330
sPymbed 0:1387ff3eed4a 2331 :
sPymbed 0:1387ff3eed4a 2332 : [FLIP] "m" (mBYTE_FLIP_Y_MASK[0]),
sPymbed 0:1387ff3eed4a 2333 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
sPymbed 0:1387ff3eed4a 2334 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
sPymbed 0:1387ff3eed4a 2335 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 2336 [len] "r" (len),
sPymbed 0:1387ff3eed4a 2337 [K] "m" (K256)
sPymbed 0:1387ff3eed4a 2338 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 2339 );
sPymbed 0:1387ff3eed4a 2340
sPymbed 0:1387ff3eed4a 2341 return 0;
sPymbed 0:1387ff3eed4a 2342 }
sPymbed 0:1387ff3eed4a 2343
sPymbed 0:1387ff3eed4a 2344 #if defined(HAVE_INTEL_RORX)
sPymbed 0:1387ff3eed4a 2345 SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 2346 {
sPymbed 0:1387ff3eed4a 2347 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 2348
sPymbed 0:1387ff3eed4a 2349 "subq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2350 "leaq 32(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 2351
sPymbed 0:1387ff3eed4a 2352 INIT_MASKS_Y(BYTE_FLIP_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
sPymbed 0:1387ff3eed4a 2353 LOAD_W_K_LOW(BYTE_FLIP_MASK, rax)
sPymbed 0:1387ff3eed4a 2354
sPymbed 0:1387ff3eed4a 2355 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 2356
sPymbed 0:1387ff3eed4a 2357 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2358 "rorx $6, %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 2359 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2360
sPymbed 0:1387ff3eed4a 2361 SET_W_Y_4(0)
sPymbed 0:1387ff3eed4a 2362 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 2363 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
sPymbed 0:1387ff3eed4a 2364 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
sPymbed 0:1387ff3eed4a 2365 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
sPymbed 0:1387ff3eed4a 2366
sPymbed 0:1387ff3eed4a 2367 SET_W_Y_4(16)
sPymbed 0:1387ff3eed4a 2368 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
sPymbed 0:1387ff3eed4a 2369 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
sPymbed 0:1387ff3eed4a 2370 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
sPymbed 0:1387ff3eed4a 2371 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
sPymbed 0:1387ff3eed4a 2372
sPymbed 0:1387ff3eed4a 2373 SET_W_Y_4(32)
sPymbed 0:1387ff3eed4a 2374 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
sPymbed 0:1387ff3eed4a 2375 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
sPymbed 0:1387ff3eed4a 2376 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
sPymbed 0:1387ff3eed4a 2377 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
sPymbed 0:1387ff3eed4a 2378
sPymbed 0:1387ff3eed4a 2379 SET_W_Y_4(48)
sPymbed 0:1387ff3eed4a 2380 "xorl " L3 ", " L3 "\n\t"
sPymbed 0:1387ff3eed4a 2381 "xorl " L2 ", " L2 "\n\t"
sPymbed 0:1387ff3eed4a 2382 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
sPymbed 0:1387ff3eed4a 2383 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
sPymbed 0:1387ff3eed4a 2384 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
sPymbed 0:1387ff3eed4a 2385 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
sPymbed 0:1387ff3eed4a 2386 /* Prev RND: h += Maj(a,b,c) */
sPymbed 0:1387ff3eed4a 2387 "addl " L3 ", %%r8d\n\t"
sPymbed 0:1387ff3eed4a 2388
sPymbed 0:1387ff3eed4a 2389 STORE_ADD_DIGEST()
sPymbed 0:1387ff3eed4a 2390
sPymbed 0:1387ff3eed4a 2391 "addq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2392
sPymbed 0:1387ff3eed4a 2393 :
sPymbed 0:1387ff3eed4a 2394 : [FLIP] "m" (mBYTE_FLIP_MASK[0]),
sPymbed 0:1387ff3eed4a 2395 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
sPymbed 0:1387ff3eed4a 2396 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
sPymbed 0:1387ff3eed4a 2397 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 2398 [K] "m" (K256)
sPymbed 0:1387ff3eed4a 2399 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 2400 );
sPymbed 0:1387ff3eed4a 2401
sPymbed 0:1387ff3eed4a 2402 return 0;
sPymbed 0:1387ff3eed4a 2403 }
sPymbed 0:1387ff3eed4a 2404
sPymbed 0:1387ff3eed4a 2405 SHA256_NOINLINE static int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256,
sPymbed 0:1387ff3eed4a 2406 word32 len)
sPymbed 0:1387ff3eed4a 2407 {
sPymbed 0:1387ff3eed4a 2408 if ((len & WC_SHA256_BLOCK_SIZE) != 0) {
sPymbed 0:1387ff3eed4a 2409 XMEMCPY(sha256->buffer, sha256->data, WC_SHA256_BLOCK_SIZE);
sPymbed 0:1387ff3eed4a 2410 Transform_Sha256_AVX2_RORX(sha256);
sPymbed 0:1387ff3eed4a 2411 sha256->data += WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 2412 len -= WC_SHA256_BLOCK_SIZE;
sPymbed 0:1387ff3eed4a 2413 if (len == 0)
sPymbed 0:1387ff3eed4a 2414 return 0;
sPymbed 0:1387ff3eed4a 2415 }
sPymbed 0:1387ff3eed4a 2416
sPymbed 0:1387ff3eed4a 2417 __asm__ __volatile__ (
sPymbed 0:1387ff3eed4a 2418
sPymbed 0:1387ff3eed4a 2419 "subq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2420 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 2421
sPymbed 0:1387ff3eed4a 2422 INIT_MASKS_Y(BYTE_FLIP_Y_MASK, SHUF_Y_00BA, SHUF_Y_DC00)
sPymbed 0:1387ff3eed4a 2423 LOAD_DIGEST()
sPymbed 0:1387ff3eed4a 2424
sPymbed 0:1387ff3eed4a 2425 "# Start of loop processing two blocks\n"
sPymbed 0:1387ff3eed4a 2426 "1:\n\t"
sPymbed 0:1387ff3eed4a 2427
sPymbed 0:1387ff3eed4a 2428 LOAD_W_K(BYTE_FLIP_Y_MASK, rax)
sPymbed 0:1387ff3eed4a 2429
sPymbed 0:1387ff3eed4a 2430 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2431 "rorx $6, %%r12d, " L1 "\n\t"
sPymbed 0:1387ff3eed4a 2432 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2433
sPymbed 0:1387ff3eed4a 2434 SET_W_Y_4(0)
sPymbed 0:1387ff3eed4a 2435 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 0)
sPymbed 0:1387ff3eed4a 2436 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 8)
sPymbed 0:1387ff3eed4a 2437 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 16)
sPymbed 0:1387ff3eed4a 2438 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 24)
sPymbed 0:1387ff3eed4a 2439
sPymbed 0:1387ff3eed4a 2440 SET_W_Y_4(16)
sPymbed 0:1387ff3eed4a 2441 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 32)
sPymbed 0:1387ff3eed4a 2442 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 40)
sPymbed 0:1387ff3eed4a 2443 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 48)
sPymbed 0:1387ff3eed4a 2444 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 56)
sPymbed 0:1387ff3eed4a 2445
sPymbed 0:1387ff3eed4a 2446 SET_W_Y_4(32)
sPymbed 0:1387ff3eed4a 2447 MsgSched_Y_RORX(Y0, Y1, Y2, Y3, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 64)
sPymbed 0:1387ff3eed4a 2448 MsgSched_Y_RORX(Y1, Y2, Y3, Y0, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 72)
sPymbed 0:1387ff3eed4a 2449 MsgSched_Y_RORX(Y2, Y3, Y0, Y1, S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 80)
sPymbed 0:1387ff3eed4a 2450 MsgSched_Y_RORX(Y3, Y0, Y1, Y2, S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 88)
sPymbed 0:1387ff3eed4a 2451
sPymbed 0:1387ff3eed4a 2452 SET_W_Y_4(48)
sPymbed 0:1387ff3eed4a 2453 "xorl " L3 ", " L3 "\n\t"
sPymbed 0:1387ff3eed4a 2454 "xorl " L2 ", " L2 "\n\t"
sPymbed 0:1387ff3eed4a 2455 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 96)
sPymbed 0:1387ff3eed4a 2456 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 104)
sPymbed 0:1387ff3eed4a 2457 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 112)
sPymbed 0:1387ff3eed4a 2458 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 120)
sPymbed 0:1387ff3eed4a 2459 /* Prev RND: h += Maj(a,b,c) */
sPymbed 0:1387ff3eed4a 2460 "addl " L3 ", %%r8d\n\t"
sPymbed 0:1387ff3eed4a 2461 "xorl " L2 ", " L2 "\n\t"
sPymbed 0:1387ff3eed4a 2462
sPymbed 0:1387ff3eed4a 2463 ADD_DIGEST()
sPymbed 0:1387ff3eed4a 2464 STORE_DIGEST()
sPymbed 0:1387ff3eed4a 2465
sPymbed 0:1387ff3eed4a 2466 "movl %%r9d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2467 "xorl " L3 ", " L3 "\n\t"
sPymbed 0:1387ff3eed4a 2468 "xorl %%r10d, " L4 "\n\t"
sPymbed 0:1387ff3eed4a 2469
sPymbed 0:1387ff3eed4a 2470 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 4)
sPymbed 0:1387ff3eed4a 2471 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 12)
sPymbed 0:1387ff3eed4a 2472 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 20)
sPymbed 0:1387ff3eed4a 2473 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 28)
sPymbed 0:1387ff3eed4a 2474 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 36)
sPymbed 0:1387ff3eed4a 2475 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 44)
sPymbed 0:1387ff3eed4a 2476 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 52)
sPymbed 0:1387ff3eed4a 2477 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 60)
sPymbed 0:1387ff3eed4a 2478 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 68)
sPymbed 0:1387ff3eed4a 2479 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 76)
sPymbed 0:1387ff3eed4a 2480 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 84)
sPymbed 0:1387ff3eed4a 2481 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 92)
sPymbed 0:1387ff3eed4a 2482 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 100)
sPymbed 0:1387ff3eed4a 2483 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 108)
sPymbed 0:1387ff3eed4a 2484 RND_RORX_X4(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, 116)
sPymbed 0:1387ff3eed4a 2485 RND_RORX_X4(S_4, S_5, S_6, S_7, S_0, S_1, S_2, S_3, 124)
sPymbed 0:1387ff3eed4a 2486 /* Prev RND: h += Maj(a,b,c) */
sPymbed 0:1387ff3eed4a 2487 "addl " L3 ", %%r8d\n\t"
sPymbed 0:1387ff3eed4a 2488 "movq 120(%[sha256]), %%rax\n\t"
sPymbed 0:1387ff3eed4a 2489
sPymbed 0:1387ff3eed4a 2490 ADD_DIGEST()
sPymbed 0:1387ff3eed4a 2491
sPymbed 0:1387ff3eed4a 2492 "addq $128, %%rax\n\t"
sPymbed 0:1387ff3eed4a 2493 "subl $128, %[len]\n\t"
sPymbed 0:1387ff3eed4a 2494
sPymbed 0:1387ff3eed4a 2495 STORE_DIGEST()
sPymbed 0:1387ff3eed4a 2496
sPymbed 0:1387ff3eed4a 2497 "movq %%rax, 120(%[sha256])\n\t"
sPymbed 0:1387ff3eed4a 2498 "jnz 1b\n\t"
sPymbed 0:1387ff3eed4a 2499
sPymbed 0:1387ff3eed4a 2500 "addq $512, %%rsp\n\t"
sPymbed 0:1387ff3eed4a 2501
sPymbed 0:1387ff3eed4a 2502 :
sPymbed 0:1387ff3eed4a 2503 : [FLIP] "m" (mBYTE_FLIP_Y_MASK[0]),
sPymbed 0:1387ff3eed4a 2504 [SHUF00BA] "m" (mSHUF_Y_00BA[0]),
sPymbed 0:1387ff3eed4a 2505 [SHUFDC00] "m" (mSHUF_Y_DC00[0]),
sPymbed 0:1387ff3eed4a 2506 [sha256] "r" (sha256),
sPymbed 0:1387ff3eed4a 2507 [len] "r" (len),
sPymbed 0:1387ff3eed4a 2508 [K] "m" (K256)
sPymbed 0:1387ff3eed4a 2509 : WORK_REGS, STATE_REGS, YMM_REGS, "memory"
sPymbed 0:1387ff3eed4a 2510 );
sPymbed 0:1387ff3eed4a 2511
sPymbed 0:1387ff3eed4a 2512 return 0;
sPymbed 0:1387ff3eed4a 2513 }
sPymbed 0:1387ff3eed4a 2514 #endif /* HAVE_INTEL_RORX */
sPymbed 0:1387ff3eed4a 2515 #endif /* HAVE_INTEL_AVX2 */
sPymbed 0:1387ff3eed4a 2516
sPymbed 0:1387ff3eed4a 2517
sPymbed 0:1387ff3eed4a 2518 #ifdef WOLFSSL_SHA224
sPymbed 0:1387ff3eed4a 2519
sPymbed 0:1387ff3eed4a 2520 #ifdef STM32_HASH_SHA2
sPymbed 0:1387ff3eed4a 2521
sPymbed 0:1387ff3eed4a 2522 /* Supports CubeMX HAL or Standard Peripheral Library */
sPymbed 0:1387ff3eed4a 2523
sPymbed 0:1387ff3eed4a 2524 int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId)
sPymbed 0:1387ff3eed4a 2525 {
sPymbed 0:1387ff3eed4a 2526 if (sha224 == NULL)
sPymbed 0:1387ff3eed4a 2527 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2528
sPymbed 0:1387ff3eed4a 2529 (void)devId;
sPymbed 0:1387ff3eed4a 2530 (void)heap;
sPymbed 0:1387ff3eed4a 2531
sPymbed 0:1387ff3eed4a 2532 wc_Stm32_Hash_Init(&sha224->stmCtx);
sPymbed 0:1387ff3eed4a 2533 return 0;
sPymbed 0:1387ff3eed4a 2534 }
sPymbed 0:1387ff3eed4a 2535
sPymbed 0:1387ff3eed4a 2536 int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len)
sPymbed 0:1387ff3eed4a 2537 {
sPymbed 0:1387ff3eed4a 2538 int ret = 0;
sPymbed 0:1387ff3eed4a 2539
sPymbed 0:1387ff3eed4a 2540 if (sha224 == NULL || (data == NULL && len > 0)) {
sPymbed 0:1387ff3eed4a 2541 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2542 }
sPymbed 0:1387ff3eed4a 2543
sPymbed 0:1387ff3eed4a 2544 ret = wolfSSL_CryptHwMutexLock();
sPymbed 0:1387ff3eed4a 2545 if (ret == 0) {
sPymbed 0:1387ff3eed4a 2546 ret = wc_Stm32_Hash_Update(&sha224->stmCtx,
sPymbed 0:1387ff3eed4a 2547 HASH_AlgoSelection_SHA224, data, len);
sPymbed 0:1387ff3eed4a 2548 wolfSSL_CryptHwMutexUnLock();
sPymbed 0:1387ff3eed4a 2549 }
sPymbed 0:1387ff3eed4a 2550 return ret;
sPymbed 0:1387ff3eed4a 2551 }
sPymbed 0:1387ff3eed4a 2552
sPymbed 0:1387ff3eed4a 2553 int wc_Sha224Final(wc_Sha224* sha224, byte* hash)
sPymbed 0:1387ff3eed4a 2554 {
sPymbed 0:1387ff3eed4a 2555 int ret = 0;
sPymbed 0:1387ff3eed4a 2556
sPymbed 0:1387ff3eed4a 2557 if (sha224 == NULL || hash == NULL) {
sPymbed 0:1387ff3eed4a 2558 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2559 }
sPymbed 0:1387ff3eed4a 2560
sPymbed 0:1387ff3eed4a 2561 ret = wolfSSL_CryptHwMutexLock();
sPymbed 0:1387ff3eed4a 2562 if (ret == 0) {
sPymbed 0:1387ff3eed4a 2563 ret = wc_Stm32_Hash_Final(&sha224->stmCtx,
sPymbed 0:1387ff3eed4a 2564 HASH_AlgoSelection_SHA224, hash, WC_SHA224_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 2565 wolfSSL_CryptHwMutexUnLock();
sPymbed 0:1387ff3eed4a 2566 }
sPymbed 0:1387ff3eed4a 2567
sPymbed 0:1387ff3eed4a 2568 (void)wc_InitSha224(sha224); /* reset state */
sPymbed 0:1387ff3eed4a 2569
sPymbed 0:1387ff3eed4a 2570 return ret;
sPymbed 0:1387ff3eed4a 2571 }
sPymbed 0:1387ff3eed4a 2572
sPymbed 0:1387ff3eed4a 2573 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH)
sPymbed 0:1387ff3eed4a 2574 /* functions defined in wolfcrypt/src/port/caam/caam_sha256.c */
sPymbed 0:1387ff3eed4a 2575 #else
sPymbed 0:1387ff3eed4a 2576
sPymbed 0:1387ff3eed4a 2577 #define NEED_SOFT_SHA224
sPymbed 0:1387ff3eed4a 2578
sPymbed 0:1387ff3eed4a 2579
sPymbed 0:1387ff3eed4a 2580 static int InitSha224(wc_Sha224* sha224)
sPymbed 0:1387ff3eed4a 2581 {
sPymbed 0:1387ff3eed4a 2582 int ret = 0;
sPymbed 0:1387ff3eed4a 2583
sPymbed 0:1387ff3eed4a 2584 if (sha224 == NULL) {
sPymbed 0:1387ff3eed4a 2585 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2586 }
sPymbed 0:1387ff3eed4a 2587
sPymbed 0:1387ff3eed4a 2588 sha224->digest[0] = 0xc1059ed8;
sPymbed 0:1387ff3eed4a 2589 sha224->digest[1] = 0x367cd507;
sPymbed 0:1387ff3eed4a 2590 sha224->digest[2] = 0x3070dd17;
sPymbed 0:1387ff3eed4a 2591 sha224->digest[3] = 0xf70e5939;
sPymbed 0:1387ff3eed4a 2592 sha224->digest[4] = 0xffc00b31;
sPymbed 0:1387ff3eed4a 2593 sha224->digest[5] = 0x68581511;
sPymbed 0:1387ff3eed4a 2594 sha224->digest[6] = 0x64f98fa7;
sPymbed 0:1387ff3eed4a 2595 sha224->digest[7] = 0xbefa4fa4;
sPymbed 0:1387ff3eed4a 2596
sPymbed 0:1387ff3eed4a 2597 sha224->buffLen = 0;
sPymbed 0:1387ff3eed4a 2598 sha224->loLen = 0;
sPymbed 0:1387ff3eed4a 2599 sha224->hiLen = 0;
sPymbed 0:1387ff3eed4a 2600
sPymbed 0:1387ff3eed4a 2601 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
sPymbed 0:1387ff3eed4a 2602 /* choose best Transform function under this runtime environment */
sPymbed 0:1387ff3eed4a 2603 Sha256_SetTransform();
sPymbed 0:1387ff3eed4a 2604 #endif
sPymbed 0:1387ff3eed4a 2605
sPymbed 0:1387ff3eed4a 2606 return ret;
sPymbed 0:1387ff3eed4a 2607 }
sPymbed 0:1387ff3eed4a 2608
sPymbed 0:1387ff3eed4a 2609 #endif
sPymbed 0:1387ff3eed4a 2610
sPymbed 0:1387ff3eed4a 2611 #ifdef NEED_SOFT_SHA224
sPymbed 0:1387ff3eed4a 2612 int wc_InitSha224_ex(wc_Sha224* sha224, void* heap, int devId)
sPymbed 0:1387ff3eed4a 2613 {
sPymbed 0:1387ff3eed4a 2614 int ret = 0;
sPymbed 0:1387ff3eed4a 2615
sPymbed 0:1387ff3eed4a 2616 if (sha224 == NULL)
sPymbed 0:1387ff3eed4a 2617 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2618
sPymbed 0:1387ff3eed4a 2619 sha224->heap = heap;
sPymbed 0:1387ff3eed4a 2620
sPymbed 0:1387ff3eed4a 2621 ret = InitSha224(sha224);
sPymbed 0:1387ff3eed4a 2622 if (ret != 0)
sPymbed 0:1387ff3eed4a 2623 return ret;
sPymbed 0:1387ff3eed4a 2624
sPymbed 0:1387ff3eed4a 2625 #ifdef WOLFSSL_SMALL_STACK_CACHE
sPymbed 0:1387ff3eed4a 2626 sha224->W = NULL;
sPymbed 0:1387ff3eed4a 2627 #endif
sPymbed 0:1387ff3eed4a 2628
sPymbed 0:1387ff3eed4a 2629 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
sPymbed 0:1387ff3eed4a 2630 ret = wolfAsync_DevCtxInit(&sha224->asyncDev,
sPymbed 0:1387ff3eed4a 2631 WOLFSSL_ASYNC_MARKER_SHA224, sha224->heap, devId);
sPymbed 0:1387ff3eed4a 2632 #else
sPymbed 0:1387ff3eed4a 2633 (void)devId;
sPymbed 0:1387ff3eed4a 2634 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 2635
sPymbed 0:1387ff3eed4a 2636 return ret;
sPymbed 0:1387ff3eed4a 2637 }
sPymbed 0:1387ff3eed4a 2638
sPymbed 0:1387ff3eed4a 2639 int wc_Sha224Update(wc_Sha224* sha224, const byte* data, word32 len)
sPymbed 0:1387ff3eed4a 2640 {
sPymbed 0:1387ff3eed4a 2641 int ret;
sPymbed 0:1387ff3eed4a 2642
sPymbed 0:1387ff3eed4a 2643 if (sha224 == NULL || (data == NULL && len > 0)) {
sPymbed 0:1387ff3eed4a 2644 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2645 }
sPymbed 0:1387ff3eed4a 2646
sPymbed 0:1387ff3eed4a 2647 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
sPymbed 0:1387ff3eed4a 2648 if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
sPymbed 0:1387ff3eed4a 2649 #if defined(HAVE_INTEL_QA)
sPymbed 0:1387ff3eed4a 2650 return IntelQaSymSha224(&sha224->asyncDev, NULL, data, len);
sPymbed 0:1387ff3eed4a 2651 #endif
sPymbed 0:1387ff3eed4a 2652 }
sPymbed 0:1387ff3eed4a 2653 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 2654
sPymbed 0:1387ff3eed4a 2655 ret = Sha256Update((wc_Sha256*)sha224, data, len);
sPymbed 0:1387ff3eed4a 2656
sPymbed 0:1387ff3eed4a 2657 return ret;
sPymbed 0:1387ff3eed4a 2658 }
sPymbed 0:1387ff3eed4a 2659
sPymbed 0:1387ff3eed4a 2660 int wc_Sha224Final(wc_Sha224* sha224, byte* hash)
sPymbed 0:1387ff3eed4a 2661 {
sPymbed 0:1387ff3eed4a 2662 int ret;
sPymbed 0:1387ff3eed4a 2663
sPymbed 0:1387ff3eed4a 2664 if (sha224 == NULL || hash == NULL) {
sPymbed 0:1387ff3eed4a 2665 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2666 }
sPymbed 0:1387ff3eed4a 2667
sPymbed 0:1387ff3eed4a 2668 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
sPymbed 0:1387ff3eed4a 2669 if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
sPymbed 0:1387ff3eed4a 2670 #if defined(HAVE_INTEL_QA)
sPymbed 0:1387ff3eed4a 2671 return IntelQaSymSha224(&sha224->asyncDev, hash, NULL,
sPymbed 0:1387ff3eed4a 2672 WC_SHA224_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 2673 #endif
sPymbed 0:1387ff3eed4a 2674 }
sPymbed 0:1387ff3eed4a 2675 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 2676
sPymbed 0:1387ff3eed4a 2677 ret = Sha256Final((wc_Sha256*)sha224);
sPymbed 0:1387ff3eed4a 2678 if (ret != 0)
sPymbed 0:1387ff3eed4a 2679 return ret;
sPymbed 0:1387ff3eed4a 2680
sPymbed 0:1387ff3eed4a 2681 #if defined(LITTLE_ENDIAN_ORDER)
sPymbed 0:1387ff3eed4a 2682 ByteReverseWords(sha224->digest, sha224->digest, WC_SHA224_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 2683 #endif
sPymbed 0:1387ff3eed4a 2684 XMEMCPY(hash, sha224->digest, WC_SHA224_DIGEST_SIZE);
sPymbed 0:1387ff3eed4a 2685
sPymbed 0:1387ff3eed4a 2686 return InitSha224(sha224); /* reset state */
sPymbed 0:1387ff3eed4a 2687 }
sPymbed 0:1387ff3eed4a 2688 #endif /* end of SHA224 software implementation */
sPymbed 0:1387ff3eed4a 2689
sPymbed 0:1387ff3eed4a 2690 int wc_InitSha224(wc_Sha224* sha224)
sPymbed 0:1387ff3eed4a 2691 {
sPymbed 0:1387ff3eed4a 2692 return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
sPymbed 0:1387ff3eed4a 2693 }
sPymbed 0:1387ff3eed4a 2694
sPymbed 0:1387ff3eed4a 2695 void wc_Sha224Free(wc_Sha224* sha224)
sPymbed 0:1387ff3eed4a 2696 {
sPymbed 0:1387ff3eed4a 2697 if (sha224 == NULL)
sPymbed 0:1387ff3eed4a 2698 return;
sPymbed 0:1387ff3eed4a 2699
sPymbed 0:1387ff3eed4a 2700 #ifdef WOLFSSL_SMALL_STACK_CACHE
sPymbed 0:1387ff3eed4a 2701 if (sha224->W != NULL) {
sPymbed 0:1387ff3eed4a 2702 XFREE(sha224->W, NULL, DYNAMIC_TYPE_RNG);
sPymbed 0:1387ff3eed4a 2703 sha224->W = NULL;
sPymbed 0:1387ff3eed4a 2704 }
sPymbed 0:1387ff3eed4a 2705 #endif
sPymbed 0:1387ff3eed4a 2706
sPymbed 0:1387ff3eed4a 2707 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
sPymbed 0:1387ff3eed4a 2708 wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224);
sPymbed 0:1387ff3eed4a 2709 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 2710 }
sPymbed 0:1387ff3eed4a 2711 #endif /* WOLFSSL_SHA224 */
sPymbed 0:1387ff3eed4a 2712
sPymbed 0:1387ff3eed4a 2713
sPymbed 0:1387ff3eed4a 2714 int wc_InitSha256(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 2715 {
sPymbed 0:1387ff3eed4a 2716 return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
sPymbed 0:1387ff3eed4a 2717 }
sPymbed 0:1387ff3eed4a 2718
sPymbed 0:1387ff3eed4a 2719 void wc_Sha256Free(wc_Sha256* sha256)
sPymbed 0:1387ff3eed4a 2720 {
sPymbed 0:1387ff3eed4a 2721 if (sha256 == NULL)
sPymbed 0:1387ff3eed4a 2722 return;
sPymbed 0:1387ff3eed4a 2723
sPymbed 0:1387ff3eed4a 2724 #ifdef WOLFSSL_SMALL_STACK_CACHE
sPymbed 0:1387ff3eed4a 2725 if (sha256->W != NULL) {
sPymbed 0:1387ff3eed4a 2726 XFREE(sha256->W, NULL, DYNAMIC_TYPE_RNG);
sPymbed 0:1387ff3eed4a 2727 sha256->W = NULL;
sPymbed 0:1387ff3eed4a 2728 }
sPymbed 0:1387ff3eed4a 2729 #endif
sPymbed 0:1387ff3eed4a 2730
sPymbed 0:1387ff3eed4a 2731 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
sPymbed 0:1387ff3eed4a 2732 wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256);
sPymbed 0:1387ff3eed4a 2733 #endif /* WOLFSSL_ASYNC_CRYPT */
sPymbed 0:1387ff3eed4a 2734 }
sPymbed 0:1387ff3eed4a 2735
sPymbed 0:1387ff3eed4a 2736 #endif /* !WOLFSSL_TI_HASH */
sPymbed 0:1387ff3eed4a 2737 #endif /* HAVE_FIPS */
sPymbed 0:1387ff3eed4a 2738
sPymbed 0:1387ff3eed4a 2739
sPymbed 0:1387ff3eed4a 2740 #ifndef WOLFSSL_TI_HASH
sPymbed 0:1387ff3eed4a 2741 #ifdef WOLFSSL_SHA224
sPymbed 0:1387ff3eed4a 2742 int wc_Sha224GetHash(wc_Sha224* sha224, byte* hash)
sPymbed 0:1387ff3eed4a 2743 {
sPymbed 0:1387ff3eed4a 2744 int ret;
sPymbed 0:1387ff3eed4a 2745 wc_Sha224 tmpSha224;
sPymbed 0:1387ff3eed4a 2746
sPymbed 0:1387ff3eed4a 2747 if (sha224 == NULL || hash == NULL)
sPymbed 0:1387ff3eed4a 2748 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2749
sPymbed 0:1387ff3eed4a 2750 ret = wc_Sha224Copy(sha224, &tmpSha224);
sPymbed 0:1387ff3eed4a 2751 if (ret == 0) {
sPymbed 0:1387ff3eed4a 2752 ret = wc_Sha224Final(&tmpSha224, hash);
sPymbed 0:1387ff3eed4a 2753 wc_Sha224Free(&tmpSha224);
sPymbed 0:1387ff3eed4a 2754 }
sPymbed 0:1387ff3eed4a 2755 return ret;
sPymbed 0:1387ff3eed4a 2756 }
sPymbed 0:1387ff3eed4a 2757 int wc_Sha224Copy(wc_Sha224* src, wc_Sha224* dst)
sPymbed 0:1387ff3eed4a 2758 {
sPymbed 0:1387ff3eed4a 2759 int ret = 0;
sPymbed 0:1387ff3eed4a 2760
sPymbed 0:1387ff3eed4a 2761 if (src == NULL || dst == NULL)
sPymbed 0:1387ff3eed4a 2762 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2763
sPymbed 0:1387ff3eed4a 2764 XMEMCPY(dst, src, sizeof(wc_Sha224));
sPymbed 0:1387ff3eed4a 2765 #ifdef WOLFSSL_SMALL_STACK_CACHE
sPymbed 0:1387ff3eed4a 2766 dst->W = NULL;
sPymbed 0:1387ff3eed4a 2767 #endif
sPymbed 0:1387ff3eed4a 2768
sPymbed 0:1387ff3eed4a 2769 #ifdef WOLFSSL_ASYNC_CRYPT
sPymbed 0:1387ff3eed4a 2770 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
sPymbed 0:1387ff3eed4a 2771 #endif
sPymbed 0:1387ff3eed4a 2772
sPymbed 0:1387ff3eed4a 2773 return ret;
sPymbed 0:1387ff3eed4a 2774 }
sPymbed 0:1387ff3eed4a 2775 #endif /* WOLFSSL_SHA224 */
sPymbed 0:1387ff3eed4a 2776
sPymbed 0:1387ff3eed4a 2777 int wc_Sha256GetHash(wc_Sha256* sha256, byte* hash)
sPymbed 0:1387ff3eed4a 2778 {
sPymbed 0:1387ff3eed4a 2779 int ret;
sPymbed 0:1387ff3eed4a 2780 wc_Sha256 tmpSha256;
sPymbed 0:1387ff3eed4a 2781
sPymbed 0:1387ff3eed4a 2782 if (sha256 == NULL || hash == NULL)
sPymbed 0:1387ff3eed4a 2783 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2784
sPymbed 0:1387ff3eed4a 2785 ret = wc_Sha256Copy(sha256, &tmpSha256);
sPymbed 0:1387ff3eed4a 2786 if (ret == 0) {
sPymbed 0:1387ff3eed4a 2787 ret = wc_Sha256Final(&tmpSha256, hash);
sPymbed 0:1387ff3eed4a 2788 wc_Sha256Free(&tmpSha256);
sPymbed 0:1387ff3eed4a 2789 }
sPymbed 0:1387ff3eed4a 2790 return ret;
sPymbed 0:1387ff3eed4a 2791 }
sPymbed 0:1387ff3eed4a 2792 int wc_Sha256Copy(wc_Sha256* src, wc_Sha256* dst)
sPymbed 0:1387ff3eed4a 2793 {
sPymbed 0:1387ff3eed4a 2794 int ret = 0;
sPymbed 0:1387ff3eed4a 2795
sPymbed 0:1387ff3eed4a 2796 if (src == NULL || dst == NULL)
sPymbed 0:1387ff3eed4a 2797 return BAD_FUNC_ARG;
sPymbed 0:1387ff3eed4a 2798
sPymbed 0:1387ff3eed4a 2799 XMEMCPY(dst, src, sizeof(wc_Sha256));
sPymbed 0:1387ff3eed4a 2800 #ifdef WOLFSSL_SMALL_STACK_CACHE
sPymbed 0:1387ff3eed4a 2801 dst->W = NULL;
sPymbed 0:1387ff3eed4a 2802 #endif
sPymbed 0:1387ff3eed4a 2803
sPymbed 0:1387ff3eed4a 2804 #ifdef WOLFSSL_ASYNC_CRYPT
sPymbed 0:1387ff3eed4a 2805 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
sPymbed 0:1387ff3eed4a 2806 #endif
sPymbed 0:1387ff3eed4a 2807 #ifdef WOLFSSL_PIC32MZ_HASH
sPymbed 0:1387ff3eed4a 2808 ret = wc_Pic32HashCopy(&src->cache, &dst->cache);
sPymbed 0:1387ff3eed4a 2809 #endif
sPymbed 0:1387ff3eed4a 2810
sPymbed 0:1387ff3eed4a 2811 return ret;
sPymbed 0:1387ff3eed4a 2812 }
sPymbed 0:1387ff3eed4a 2813 #endif /* !WOLFSSL_TI_HASH */
sPymbed 0:1387ff3eed4a 2814
sPymbed 0:1387ff3eed4a 2815 #endif /* NO_SHA256 */
sPymbed 0:1387ff3eed4a 2816