wolfSSL SSL/TLS library, support up to TLS1.3

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Committer:
wolfSSL
Date:
Tue May 02 08:44:47 2017 +0000
Revision:
7:481bce714567
wolfSSL3.10.2

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 7:481bce714567 1 /* sha256.c
wolfSSL 7:481bce714567 2 *
wolfSSL 7:481bce714567 3 * Copyright (C) 2006-2016 wolfSSL Inc.
wolfSSL 7:481bce714567 4 *
wolfSSL 7:481bce714567 5 * This file is part of wolfSSL.
wolfSSL 7:481bce714567 6 *
wolfSSL 7:481bce714567 7 * wolfSSL is free software; you can redistribute it and/or modify
wolfSSL 7:481bce714567 8 * it under the terms of the GNU General Public License as published by
wolfSSL 7:481bce714567 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 7:481bce714567 10 * (at your option) any later version.
wolfSSL 7:481bce714567 11 *
wolfSSL 7:481bce714567 12 * wolfSSL is distributed in the hope that it will be useful,
wolfSSL 7:481bce714567 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 7:481bce714567 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 7:481bce714567 15 * GNU General Public License for more details.
wolfSSL 7:481bce714567 16 *
wolfSSL 7:481bce714567 17 * You should have received a copy of the GNU General Public License
wolfSSL 7:481bce714567 18 * along with this program; if not, write to the Free Software
wolfSSL 7:481bce714567 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
wolfSSL 7:481bce714567 20 */
wolfSSL 7:481bce714567 21
wolfSSL 7:481bce714567 22
wolfSSL 7:481bce714567 23 /* code submitted by raphael.huck@efixo.com */
wolfSSL 7:481bce714567 24
wolfSSL 7:481bce714567 25 #ifdef HAVE_CONFIG_H
wolfSSL 7:481bce714567 26 #include <config.h>
wolfSSL 7:481bce714567 27 #endif
wolfSSL 7:481bce714567 28
wolfSSL 7:481bce714567 29 #include <wolfssl/wolfcrypt/settings.h>
wolfSSL 7:481bce714567 30 #include <wolfssl/wolfcrypt/sha256.h>
wolfSSL 7:481bce714567 31
wolfSSL 7:481bce714567 32 #if !defined(NO_SHA256)
wolfSSL 7:481bce714567 33 #ifdef HAVE_FIPS
wolfSSL 7:481bce714567 34
wolfSSL 7:481bce714567 35 int wc_InitSha256(Sha256* sha)
wolfSSL 7:481bce714567 36 {
wolfSSL 7:481bce714567 37 return InitSha256_fips(sha);
wolfSSL 7:481bce714567 38 }
wolfSSL 7:481bce714567 39
wolfSSL 7:481bce714567 40
wolfSSL 7:481bce714567 41 int wc_Sha256Update(Sha256* sha, const byte* data, word32 len)
wolfSSL 7:481bce714567 42 {
wolfSSL 7:481bce714567 43 return Sha256Update_fips(sha, data, len);
wolfSSL 7:481bce714567 44 }
wolfSSL 7:481bce714567 45
wolfSSL 7:481bce714567 46
wolfSSL 7:481bce714567 47 int wc_Sha256Final(Sha256* sha, byte* out)
wolfSSL 7:481bce714567 48 {
wolfSSL 7:481bce714567 49 return Sha256Final_fips(sha, out);
wolfSSL 7:481bce714567 50 }
wolfSSL 7:481bce714567 51
wolfSSL 7:481bce714567 52 #else /* else build without fips */
wolfSSL 7:481bce714567 53
wolfSSL 7:481bce714567 54 #if !defined(NO_SHA256) && defined(WOLFSSL_TI_HASH)
wolfSSL 7:481bce714567 55 /* #include <wolfcrypt/src/port/ti/ti-hash.c> included by wc_port.c */
wolfSSL 7:481bce714567 56 #else
wolfSSL 7:481bce714567 57
wolfSSL 7:481bce714567 58 #if !defined (ALIGN32)
wolfSSL 7:481bce714567 59 #if defined (__GNUC__)
wolfSSL 7:481bce714567 60 #define ALIGN32 __attribute__ ( (aligned (32)))
wolfSSL 7:481bce714567 61 #elif defined(_MSC_VER)
wolfSSL 7:481bce714567 62 /* disable align warning, we want alignment ! */
wolfSSL 7:481bce714567 63 #pragma warning(disable: 4324)
wolfSSL 7:481bce714567 64 #define ALIGN32 __declspec (align (32))
wolfSSL 7:481bce714567 65 #else
wolfSSL 7:481bce714567 66 #define ALIGN32
wolfSSL 7:481bce714567 67 #endif
wolfSSL 7:481bce714567 68 #endif
wolfSSL 7:481bce714567 69
wolfSSL 7:481bce714567 70 #ifdef WOLFSSL_PIC32MZ_HASH
wolfSSL 7:481bce714567 71 #define wc_InitSha256 wc_InitSha256_sw
wolfSSL 7:481bce714567 72 #define wc_Sha256Update wc_Sha256Update_sw
wolfSSL 7:481bce714567 73 #define wc_Sha256Final wc_Sha256Final_sw
wolfSSL 7:481bce714567 74 #endif
wolfSSL 7:481bce714567 75
wolfSSL 7:481bce714567 76 #ifdef HAVE_FIPS
wolfSSL 7:481bce714567 77 /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
wolfSSL 7:481bce714567 78 #define FIPS_NO_WRAPPERS
wolfSSL 7:481bce714567 79 #endif
wolfSSL 7:481bce714567 80
wolfSSL 7:481bce714567 81 #if defined(USE_INTEL_SPEEDUP)
wolfSSL 7:481bce714567 82 #define HAVE_INTEL_AVX1
wolfSSL 7:481bce714567 83 #define HAVE_INTEL_AVX2
wolfSSL 7:481bce714567 84 #endif
wolfSSL 7:481bce714567 85
wolfSSL 7:481bce714567 86 #if defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 87 #define HAVE_INTEL_RORX
wolfSSL 7:481bce714567 88 #endif
wolfSSL 7:481bce714567 89
wolfSSL 7:481bce714567 90
wolfSSL 7:481bce714567 91 /*****
wolfSSL 7:481bce714567 92 Intel AVX1/AVX2 Macro Control Structure
wolfSSL 7:481bce714567 93
wolfSSL 7:481bce714567 94 #define HAVE_INTEL_AVX1
wolfSSL 7:481bce714567 95 #define HAVE_INTEL_AVX2
wolfSSL 7:481bce714567 96
wolfSSL 7:481bce714567 97 #define HAVE_INTEL_RORX
wolfSSL 7:481bce714567 98
wolfSSL 7:481bce714567 99
wolfSSL 7:481bce714567 100 int InitSha256(Sha256* sha256) {
wolfSSL 7:481bce714567 101 Save/Recover XMM, YMM
wolfSSL 7:481bce714567 102 ...
wolfSSL 7:481bce714567 103 }
wolfSSL 7:481bce714567 104
wolfSSL 7:481bce714567 105 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 106 Transform() ; Function prototype
wolfSSL 7:481bce714567 107 #else
wolfSSL 7:481bce714567 108 Transform() { }
wolfSSL 7:481bce714567 109 int Sha256Final() {
wolfSSL 7:481bce714567 110 Save/Recover XMM, YMM
wolfSSL 7:481bce714567 111 ...
wolfSSL 7:481bce714567 112 }
wolfSSL 7:481bce714567 113 #endif
wolfSSL 7:481bce714567 114
wolfSSL 7:481bce714567 115 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 116 #if defined(HAVE_INTEL_RORX
wolfSSL 7:481bce714567 117 #define RND with rorx instuction
wolfSSL 7:481bce714567 118 #else
wolfSSL 7:481bce714567 119 #define RND
wolfSSL 7:481bce714567 120 #endif
wolfSSL 7:481bce714567 121 #endif
wolfSSL 7:481bce714567 122
wolfSSL 7:481bce714567 123 #if defined(HAVE_INTEL_AVX1)
wolfSSL 7:481bce714567 124
wolfSSL 7:481bce714567 125 #define XMM Instructions/inline asm
wolfSSL 7:481bce714567 126
wolfSSL 7:481bce714567 127 int Transform() {
wolfSSL 7:481bce714567 128 Stitched Message Sched/Round
wolfSSL 7:481bce714567 129 }
wolfSSL 7:481bce714567 130
wolfSSL 7:481bce714567 131 #elif defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 132
wolfSSL 7:481bce714567 133 #define YMM Instructions/inline asm
wolfSSL 7:481bce714567 134
wolfSSL 7:481bce714567 135 int Transform() {
wolfSSL 7:481bce714567 136 More granural Stitched Message Sched/Round
wolfSSL 7:481bce714567 137 }
wolfSSL 7:481bce714567 138
wolfSSL 7:481bce714567 139 */
wolfSSL 7:481bce714567 140
wolfSSL 7:481bce714567 141
wolfSSL 7:481bce714567 142 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 143
wolfSSL 7:481bce714567 144 /* Each platform needs to query info type 1 from cpuid to see if aesni is
wolfSSL 7:481bce714567 145 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
wolfSSL 7:481bce714567 146 */
wolfSSL 7:481bce714567 147
wolfSSL 7:481bce714567 148 #ifndef _MSC_VER
wolfSSL 7:481bce714567 149 #define cpuid(reg, leaf, sub)\
wolfSSL 7:481bce714567 150 __asm__ __volatile__ ("cpuid":\
wolfSSL 7:481bce714567 151 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
wolfSSL 7:481bce714567 152 "a" (leaf), "c"(sub));
wolfSSL 7:481bce714567 153
wolfSSL 7:481bce714567 154 #define XASM_LINK(f) asm(f)
wolfSSL 7:481bce714567 155 #else
wolfSSL 7:481bce714567 156
wolfSSL 7:481bce714567 157 #include <intrin.h>
wolfSSL 7:481bce714567 158 #define cpuid(a,b) __cpuid((int*)a,b)
wolfSSL 7:481bce714567 159
wolfSSL 7:481bce714567 160 #define XASM_LINK(f)
wolfSSL 7:481bce714567 161
wolfSSL 7:481bce714567 162 #endif /* _MSC_VER */
wolfSSL 7:481bce714567 163
wolfSSL 7:481bce714567 164 #define EAX 0
wolfSSL 7:481bce714567 165 #define EBX 1
wolfSSL 7:481bce714567 166 #define ECX 2
wolfSSL 7:481bce714567 167 #define EDX 3
wolfSSL 7:481bce714567 168
wolfSSL 7:481bce714567 169 #define CPUID_AVX1 0x1
wolfSSL 7:481bce714567 170 #define CPUID_AVX2 0x2
wolfSSL 7:481bce714567 171 #define CPUID_RDRAND 0x4
wolfSSL 7:481bce714567 172 #define CPUID_RDSEED 0x8
wolfSSL 7:481bce714567 173 #define CPUID_BMI2 0x10 /* MULX, RORX */
wolfSSL 7:481bce714567 174
wolfSSL 7:481bce714567 175 #define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1)
wolfSSL 7:481bce714567 176 #define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2)
wolfSSL 7:481bce714567 177 #define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2)
wolfSSL 7:481bce714567 178 #define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
wolfSSL 7:481bce714567 179 #define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
wolfSSL 7:481bce714567 180
wolfSSL 7:481bce714567 181 static word32 cpuid_check = 0 ;
wolfSSL 7:481bce714567 182 static word32 cpuid_flags = 0 ;
wolfSSL 7:481bce714567 183
wolfSSL 7:481bce714567 184 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
wolfSSL 7:481bce714567 185 int got_intel_cpu=0;
wolfSSL 7:481bce714567 186 unsigned int reg[5];
wolfSSL 7:481bce714567 187
wolfSSL 7:481bce714567 188 reg[4] = '\0' ;
wolfSSL 7:481bce714567 189 cpuid(reg, 0, 0);
wolfSSL 7:481bce714567 190 if(XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
wolfSSL 7:481bce714567 191 XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
wolfSSL 7:481bce714567 192 XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
wolfSSL 7:481bce714567 193 got_intel_cpu = 1;
wolfSSL 7:481bce714567 194 }
wolfSSL 7:481bce714567 195 if (got_intel_cpu) {
wolfSSL 7:481bce714567 196 cpuid(reg, leaf, sub);
wolfSSL 7:481bce714567 197 return((reg[num]>>bit)&0x1) ;
wolfSSL 7:481bce714567 198 }
wolfSSL 7:481bce714567 199 return 0 ;
wolfSSL 7:481bce714567 200 }
wolfSSL 7:481bce714567 201
wolfSSL 7:481bce714567 202 static int set_cpuid_flags(void) {
wolfSSL 7:481bce714567 203 if(cpuid_check==0) {
wolfSSL 7:481bce714567 204 if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
wolfSSL 7:481bce714567 205 if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
wolfSSL 7:481bce714567 206 if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
wolfSSL 7:481bce714567 207 if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; }
wolfSSL 7:481bce714567 208 if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; }
wolfSSL 7:481bce714567 209 cpuid_check = 1 ;
wolfSSL 7:481bce714567 210 return 0 ;
wolfSSL 7:481bce714567 211 }
wolfSSL 7:481bce714567 212 return 1 ;
wolfSSL 7:481bce714567 213 }
wolfSSL 7:481bce714567 214
wolfSSL 7:481bce714567 215
wolfSSL 7:481bce714567 216 /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
wolfSSL 7:481bce714567 217 static int Transform(Sha256* sha256);
wolfSSL 7:481bce714567 218
wolfSSL 7:481bce714567 219 #if defined(HAVE_INTEL_AVX1)
wolfSSL 7:481bce714567 220 static int Transform_AVX1(Sha256 *sha256) ;
wolfSSL 7:481bce714567 221 #endif
wolfSSL 7:481bce714567 222 #if defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 223 static int Transform_AVX2(Sha256 *sha256) ;
wolfSSL 7:481bce714567 224 static int Transform_AVX1_RORX(Sha256 *sha256) ;
wolfSSL 7:481bce714567 225 #endif
wolfSSL 7:481bce714567 226
wolfSSL 7:481bce714567 227 static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
wolfSSL 7:481bce714567 228
wolfSSL 7:481bce714567 229 #define XTRANSFORM(sha256, B) (*Transform_p)(sha256)
wolfSSL 7:481bce714567 230
wolfSSL 7:481bce714567 231 static void set_Transform(void) {
wolfSSL 7:481bce714567 232 if(set_cpuid_flags())return ;
wolfSSL 7:481bce714567 233
wolfSSL 7:481bce714567 234 #if defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 235 if(IS_INTEL_AVX2 && IS_INTEL_BMI2){
wolfSSL 7:481bce714567 236 Transform_p = Transform_AVX1_RORX; return ;
wolfSSL 7:481bce714567 237 Transform_p = Transform_AVX2 ;
wolfSSL 7:481bce714567 238 /* for avoiding warning,"not used" */
wolfSSL 7:481bce714567 239 }
wolfSSL 7:481bce714567 240 #endif
wolfSSL 7:481bce714567 241 #if defined(HAVE_INTEL_AVX1)
wolfSSL 7:481bce714567 242 Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform) ; return ;
wolfSSL 7:481bce714567 243 #endif
wolfSSL 7:481bce714567 244 Transform_p = Transform ; return ;
wolfSSL 7:481bce714567 245 }
wolfSSL 7:481bce714567 246
wolfSSL 7:481bce714567 247 #else
wolfSSL 7:481bce714567 248 #if defined(FREESCALE_MMCAU_SHA)
wolfSSL 7:481bce714567 249 #define XTRANSFORM(sha256, B) Transform(sha256, B)
wolfSSL 7:481bce714567 250 #else
wolfSSL 7:481bce714567 251 #define XTRANSFORM(sha256, B) Transform(sha256)
wolfSSL 7:481bce714567 252 #endif
wolfSSL 7:481bce714567 253 #endif
wolfSSL 7:481bce714567 254
wolfSSL 7:481bce714567 255 /* Dummy for saving MM_REGs on behalf of Transform */
wolfSSL 7:481bce714567 256 #if defined(HAVE_INTEL_AVX2)&& !defined(HAVE_INTEL_AVX1)
wolfSSL 7:481bce714567 257 #define SAVE_XMM_YMM __asm__ volatile("or %%r8d, %%r8d":::\
wolfSSL 7:481bce714567 258 "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
wolfSSL 7:481bce714567 259 #elif defined(HAVE_INTEL_AVX1)
wolfSSL 7:481bce714567 260 #define SAVE_XMM_YMM __asm__ volatile("or %%r8d, %%r8d":::\
wolfSSL 7:481bce714567 261 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
wolfSSL 7:481bce714567 262 "xmm11","xmm12","xmm13","xmm14","xmm15")
wolfSSL 7:481bce714567 263 #else
wolfSSL 7:481bce714567 264 #define SAVE_XMM_YMM
wolfSSL 7:481bce714567 265 #endif
wolfSSL 7:481bce714567 266
wolfSSL 7:481bce714567 267 #ifdef WOLFSSL_PIC32MZ_HASH
wolfSSL 7:481bce714567 268 #define InitSha256 InitSha256_sw
wolfSSL 7:481bce714567 269 #define Sha256Update Sha256Update_sw
wolfSSL 7:481bce714567 270 #define Sha256Final Sha256Final_sw
wolfSSL 7:481bce714567 271 #endif
wolfSSL 7:481bce714567 272
wolfSSL 7:481bce714567 273 #include <wolfssl/wolfcrypt/logging.h>
wolfSSL 7:481bce714567 274 #include <wolfssl/wolfcrypt/error-crypt.h>
wolfSSL 7:481bce714567 275
wolfSSL 7:481bce714567 276 #ifdef NO_INLINE
wolfSSL 7:481bce714567 277 #include <wolfssl/wolfcrypt/misc.h>
wolfSSL 7:481bce714567 278 #else
wolfSSL 7:481bce714567 279 #define WOLFSSL_MISC_INCLUDED
wolfSSL 7:481bce714567 280 #include <wolfcrypt/src/misc.c>
wolfSSL 7:481bce714567 281 #endif
wolfSSL 7:481bce714567 282
wolfSSL 7:481bce714567 283 #ifdef FREESCALE_MMCAU_SHA
wolfSSL 7:481bce714567 284 #include "fsl_mmcau.h"
wolfSSL 7:481bce714567 285 #endif
wolfSSL 7:481bce714567 286
wolfSSL 7:481bce714567 287
wolfSSL 7:481bce714567 288 #ifdef FREESCALE_LTC_SHA
wolfSSL 7:481bce714567 289 int wc_InitSha256(Sha256* sha256)
wolfSSL 7:481bce714567 290 {
wolfSSL 7:481bce714567 291 LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
wolfSSL 7:481bce714567 292 return 0;
wolfSSL 7:481bce714567 293 }
wolfSSL 7:481bce714567 294 #else
wolfSSL 7:481bce714567 295 int wc_InitSha256(Sha256* sha256)
wolfSSL 7:481bce714567 296 {
wolfSSL 7:481bce714567 297 int ret = 0;
wolfSSL 7:481bce714567 298 #ifdef FREESCALE_MMCAU_SHA
wolfSSL 7:481bce714567 299 ret = wolfSSL_CryptHwMutexLock();
wolfSSL 7:481bce714567 300 if(ret != 0) {
wolfSSL 7:481bce714567 301 return ret;
wolfSSL 7:481bce714567 302 }
wolfSSL 7:481bce714567 303 MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
wolfSSL 7:481bce714567 304 wolfSSL_CryptHwMutexUnLock();
wolfSSL 7:481bce714567 305 #else
wolfSSL 7:481bce714567 306 sha256->digest[0] = 0x6A09E667L;
wolfSSL 7:481bce714567 307 sha256->digest[1] = 0xBB67AE85L;
wolfSSL 7:481bce714567 308 sha256->digest[2] = 0x3C6EF372L;
wolfSSL 7:481bce714567 309 sha256->digest[3] = 0xA54FF53AL;
wolfSSL 7:481bce714567 310 sha256->digest[4] = 0x510E527FL;
wolfSSL 7:481bce714567 311 sha256->digest[5] = 0x9B05688CL;
wolfSSL 7:481bce714567 312 sha256->digest[6] = 0x1F83D9ABL;
wolfSSL 7:481bce714567 313 sha256->digest[7] = 0x5BE0CD19L;
wolfSSL 7:481bce714567 314 #endif
wolfSSL 7:481bce714567 315
wolfSSL 7:481bce714567 316 sha256->buffLen = 0;
wolfSSL 7:481bce714567 317 sha256->loLen = 0;
wolfSSL 7:481bce714567 318 sha256->hiLen = 0;
wolfSSL 7:481bce714567 319
wolfSSL 7:481bce714567 320 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 321 set_Transform() ; /* choose best Transform function under this runtime environment */
wolfSSL 7:481bce714567 322 #endif
wolfSSL 7:481bce714567 323
wolfSSL 7:481bce714567 324 return ret;
wolfSSL 7:481bce714567 325 }
wolfSSL 7:481bce714567 326 #endif /* FREESCALE_LTC_SHA */
wolfSSL 7:481bce714567 327
wolfSSL 7:481bce714567 328 #if !defined(FREESCALE_LTC_SHA)
wolfSSL 7:481bce714567 329 #if !defined(FREESCALE_MMCAU_SHA)
wolfSSL 7:481bce714567 330 static const ALIGN32 word32 K[64] = {
wolfSSL 7:481bce714567 331 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
wolfSSL 7:481bce714567 332 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
wolfSSL 7:481bce714567 333 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
wolfSSL 7:481bce714567 334 0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
wolfSSL 7:481bce714567 335 0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
wolfSSL 7:481bce714567 336 0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
wolfSSL 7:481bce714567 337 0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
wolfSSL 7:481bce714567 338 0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
wolfSSL 7:481bce714567 339 0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
wolfSSL 7:481bce714567 340 0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
wolfSSL 7:481bce714567 341 0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
wolfSSL 7:481bce714567 342 0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
wolfSSL 7:481bce714567 343 0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
wolfSSL 7:481bce714567 344 };
wolfSSL 7:481bce714567 345
wolfSSL 7:481bce714567 346 #endif
wolfSSL 7:481bce714567 347
wolfSSL 7:481bce714567 348 #if defined(FREESCALE_MMCAU_SHA)
wolfSSL 7:481bce714567 349
wolfSSL 7:481bce714567 350 static int Transform(Sha256* sha256, byte* buf)
wolfSSL 7:481bce714567 351 {
wolfSSL 7:481bce714567 352 int ret = wolfSSL_CryptHwMutexLock();
wolfSSL 7:481bce714567 353 if(ret == 0) {
wolfSSL 7:481bce714567 354 MMCAU_SHA256_HashN(buf, 1, (uint32_t*)sha256->digest);
wolfSSL 7:481bce714567 355 wolfSSL_CryptHwMutexUnLock();
wolfSSL 7:481bce714567 356 }
wolfSSL 7:481bce714567 357 return ret;
wolfSSL 7:481bce714567 358 }
wolfSSL 7:481bce714567 359
wolfSSL 7:481bce714567 360 #endif /* FREESCALE_MMCAU_SHA */
wolfSSL 7:481bce714567 361
wolfSSL 7:481bce714567 362 #define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
wolfSSL 7:481bce714567 363 #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
wolfSSL 7:481bce714567 364 #define R(x, n) (((x)&0xFFFFFFFFU)>>(n))
wolfSSL 7:481bce714567 365
wolfSSL 7:481bce714567 366 #define S(x, n) rotrFixed(x, n)
wolfSSL 7:481bce714567 367 #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
wolfSSL 7:481bce714567 368 #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
wolfSSL 7:481bce714567 369 #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
wolfSSL 7:481bce714567 370 #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
wolfSSL 7:481bce714567 371
wolfSSL 7:481bce714567 372 #define RND(a,b,c,d,e,f,g,h,i) \
wolfSSL 7:481bce714567 373 t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
wolfSSL 7:481bce714567 374 t1 = Sigma0((a)) + Maj((a), (b), (c)); \
wolfSSL 7:481bce714567 375 (d) += t0; \
wolfSSL 7:481bce714567 376 (h) = t0 + t1;
wolfSSL 7:481bce714567 377
wolfSSL 7:481bce714567 378 #if !defined(FREESCALE_MMCAU_SHA)
wolfSSL 7:481bce714567 379 static int Transform(Sha256* sha256)
wolfSSL 7:481bce714567 380 {
wolfSSL 7:481bce714567 381 word32 S[8], t0, t1;
wolfSSL 7:481bce714567 382 int i;
wolfSSL 7:481bce714567 383
wolfSSL 7:481bce714567 384 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 7:481bce714567 385 word32* W;
wolfSSL 7:481bce714567 386
wolfSSL 7:481bce714567 387 W = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 7:481bce714567 388 if (W == NULL)
wolfSSL 7:481bce714567 389 return MEMORY_E;
wolfSSL 7:481bce714567 390 #else
wolfSSL 7:481bce714567 391 word32 W[64];
wolfSSL 7:481bce714567 392 #endif
wolfSSL 7:481bce714567 393
wolfSSL 7:481bce714567 394 /* Copy context->state[] to working vars */
wolfSSL 7:481bce714567 395 for (i = 0; i < 8; i++)
wolfSSL 7:481bce714567 396 S[i] = sha256->digest[i];
wolfSSL 7:481bce714567 397
wolfSSL 7:481bce714567 398 for (i = 0; i < 16; i++)
wolfSSL 7:481bce714567 399 W[i] = sha256->buffer[i];
wolfSSL 7:481bce714567 400
wolfSSL 7:481bce714567 401 for (i = 16; i < 64; i++)
wolfSSL 7:481bce714567 402 W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
wolfSSL 7:481bce714567 403
wolfSSL 7:481bce714567 404 for (i = 0; i < 64; i += 8) {
wolfSSL 7:481bce714567 405 RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
wolfSSL 7:481bce714567 406 RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
wolfSSL 7:481bce714567 407 RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
wolfSSL 7:481bce714567 408 RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
wolfSSL 7:481bce714567 409 RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
wolfSSL 7:481bce714567 410 RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
wolfSSL 7:481bce714567 411 RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
wolfSSL 7:481bce714567 412 RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
wolfSSL 7:481bce714567 413 }
wolfSSL 7:481bce714567 414
wolfSSL 7:481bce714567 415 /* Add the working vars back into digest state[] */
wolfSSL 7:481bce714567 416 for (i = 0; i < 8; i++) {
wolfSSL 7:481bce714567 417 sha256->digest[i] += S[i];
wolfSSL 7:481bce714567 418 }
wolfSSL 7:481bce714567 419
wolfSSL 7:481bce714567 420 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 7:481bce714567 421 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 7:481bce714567 422 #endif
wolfSSL 7:481bce714567 423
wolfSSL 7:481bce714567 424 return 0;
wolfSSL 7:481bce714567 425 }
wolfSSL 7:481bce714567 426
wolfSSL 7:481bce714567 427 #endif /* #if !defined(FREESCALE_MMCAU_SHA) */
wolfSSL 7:481bce714567 428
wolfSSL 7:481bce714567 429 static INLINE void AddLength(Sha256* sha256, word32 len)
wolfSSL 7:481bce714567 430 {
wolfSSL 7:481bce714567 431 word32 tmp = sha256->loLen;
wolfSSL 7:481bce714567 432 if ( (sha256->loLen += len) < tmp)
wolfSSL 7:481bce714567 433 sha256->hiLen++; /* carry low to high */
wolfSSL 7:481bce714567 434 }
wolfSSL 7:481bce714567 435 #endif /* FREESCALE_LTC_SHA */
wolfSSL 7:481bce714567 436
wolfSSL 7:481bce714567 437 #ifdef FREESCALE_LTC_SHA
wolfSSL 7:481bce714567 438 int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
wolfSSL 7:481bce714567 439 {
wolfSSL 7:481bce714567 440 LTC_HASH_Update(&sha256->ctx, data, len);
wolfSSL 7:481bce714567 441 return 0;
wolfSSL 7:481bce714567 442 }
wolfSSL 7:481bce714567 443 #else
wolfSSL 7:481bce714567 444 static INLINE int Sha256Update(Sha256* sha256, const byte* data, word32 len)
wolfSSL 7:481bce714567 445 {
wolfSSL 7:481bce714567 446
wolfSSL 7:481bce714567 447 /* do block size increments */
wolfSSL 7:481bce714567 448 byte* local = (byte*)sha256->buffer;
wolfSSL 7:481bce714567 449
wolfSSL 7:481bce714567 450 SAVE_XMM_YMM ; /* for Intel AVX */
wolfSSL 7:481bce714567 451
wolfSSL 7:481bce714567 452 while (len) {
wolfSSL 7:481bce714567 453 word32 add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
wolfSSL 7:481bce714567 454 XMEMCPY(&local[sha256->buffLen], data, add);
wolfSSL 7:481bce714567 455
wolfSSL 7:481bce714567 456 sha256->buffLen += add;
wolfSSL 7:481bce714567 457 data += add;
wolfSSL 7:481bce714567 458 len -= add;
wolfSSL 7:481bce714567 459
wolfSSL 7:481bce714567 460 if (sha256->buffLen == SHA256_BLOCK_SIZE) {
wolfSSL 7:481bce714567 461 int ret;
wolfSSL 7:481bce714567 462
wolfSSL 7:481bce714567 463 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 7:481bce714567 464 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 465 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
wolfSSL 7:481bce714567 466 #endif
wolfSSL 7:481bce714567 467 ByteReverseWords(sha256->buffer, sha256->buffer,
wolfSSL 7:481bce714567 468 SHA256_BLOCK_SIZE);
wolfSSL 7:481bce714567 469 #endif
wolfSSL 7:481bce714567 470 ret = XTRANSFORM(sha256, local);
wolfSSL 7:481bce714567 471 if (ret != 0)
wolfSSL 7:481bce714567 472 return ret;
wolfSSL 7:481bce714567 473
wolfSSL 7:481bce714567 474 AddLength(sha256, SHA256_BLOCK_SIZE);
wolfSSL 7:481bce714567 475 sha256->buffLen = 0;
wolfSSL 7:481bce714567 476 }
wolfSSL 7:481bce714567 477 }
wolfSSL 7:481bce714567 478
wolfSSL 7:481bce714567 479 return 0;
wolfSSL 7:481bce714567 480 }
wolfSSL 7:481bce714567 481
wolfSSL 7:481bce714567 482 int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
wolfSSL 7:481bce714567 483 {
wolfSSL 7:481bce714567 484 return Sha256Update(sha256, data, len);
wolfSSL 7:481bce714567 485 }
wolfSSL 7:481bce714567 486
wolfSSL 7:481bce714567 487 #endif /* FREESCALE_LTC_SHA */
wolfSSL 7:481bce714567 488
wolfSSL 7:481bce714567 489 #ifdef FREESCALE_LTC_SHA
wolfSSL 7:481bce714567 490 int wc_Sha256Final(Sha256* sha256, byte* hash)
wolfSSL 7:481bce714567 491 {
wolfSSL 7:481bce714567 492 uint32_t hashlen = SHA256_DIGEST_SIZE;
wolfSSL 7:481bce714567 493 LTC_HASH_Finish(&sha256->ctx, hash, &hashlen);
wolfSSL 7:481bce714567 494 return wc_InitSha256(sha256); /* reset state */
wolfSSL 7:481bce714567 495 }
wolfSSL 7:481bce714567 496 #else
wolfSSL 7:481bce714567 497 static INLINE int Sha256Final(Sha256* sha256)
wolfSSL 7:481bce714567 498 {
wolfSSL 7:481bce714567 499 byte* local = (byte*)sha256->buffer;
wolfSSL 7:481bce714567 500 int ret;
wolfSSL 7:481bce714567 501
wolfSSL 7:481bce714567 502 SAVE_XMM_YMM ; /* for Intel AVX */
wolfSSL 7:481bce714567 503
wolfSSL 7:481bce714567 504 AddLength(sha256, sha256->buffLen); /* before adding pads */
wolfSSL 7:481bce714567 505
wolfSSL 7:481bce714567 506 local[sha256->buffLen++] = 0x80; /* add 1 */
wolfSSL 7:481bce714567 507
wolfSSL 7:481bce714567 508 /* pad with zeros */
wolfSSL 7:481bce714567 509 if (sha256->buffLen > SHA256_PAD_SIZE) {
wolfSSL 7:481bce714567 510 XMEMSET(&local[sha256->buffLen], 0, SHA256_BLOCK_SIZE - sha256->buffLen);
wolfSSL 7:481bce714567 511 sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
wolfSSL 7:481bce714567 512
wolfSSL 7:481bce714567 513 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 7:481bce714567 514 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 515 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
wolfSSL 7:481bce714567 516 #endif
wolfSSL 7:481bce714567 517 ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
wolfSSL 7:481bce714567 518 #endif
wolfSSL 7:481bce714567 519
wolfSSL 7:481bce714567 520 ret = XTRANSFORM(sha256, local);
wolfSSL 7:481bce714567 521 if (ret != 0)
wolfSSL 7:481bce714567 522 return ret;
wolfSSL 7:481bce714567 523
wolfSSL 7:481bce714567 524 sha256->buffLen = 0;
wolfSSL 7:481bce714567 525 }
wolfSSL 7:481bce714567 526 XMEMSET(&local[sha256->buffLen], 0, SHA256_PAD_SIZE - sha256->buffLen);
wolfSSL 7:481bce714567 527
wolfSSL 7:481bce714567 528 /* put lengths in bits */
wolfSSL 7:481bce714567 529 sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) +
wolfSSL 7:481bce714567 530 (sha256->hiLen << 3);
wolfSSL 7:481bce714567 531 sha256->loLen = sha256->loLen << 3;
wolfSSL 7:481bce714567 532
wolfSSL 7:481bce714567 533 /* store lengths */
wolfSSL 7:481bce714567 534 #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
wolfSSL 7:481bce714567 535 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 536 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
wolfSSL 7:481bce714567 537 #endif
wolfSSL 7:481bce714567 538 ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
wolfSSL 7:481bce714567 539 #endif
wolfSSL 7:481bce714567 540 /* ! length ordering dependent on digest endian type ! */
wolfSSL 7:481bce714567 541 XMEMCPY(&local[SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
wolfSSL 7:481bce714567 542 XMEMCPY(&local[SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
wolfSSL 7:481bce714567 543 sizeof(word32));
wolfSSL 7:481bce714567 544
wolfSSL 7:481bce714567 545 #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 546 /* Kinetis requires only these bytes reversed */
wolfSSL 7:481bce714567 547 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 548 if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
wolfSSL 7:481bce714567 549 #endif
wolfSSL 7:481bce714567 550 ByteReverseWords(&sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
wolfSSL 7:481bce714567 551 &sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
wolfSSL 7:481bce714567 552 2 * sizeof(word32));
wolfSSL 7:481bce714567 553 #endif
wolfSSL 7:481bce714567 554
wolfSSL 7:481bce714567 555 return XTRANSFORM(sha256, local);
wolfSSL 7:481bce714567 556 }
wolfSSL 7:481bce714567 557
wolfSSL 7:481bce714567 558 int wc_Sha256Final(Sha256* sha256, byte* hash)
wolfSSL 7:481bce714567 559 {
wolfSSL 7:481bce714567 560 int ret;
wolfSSL 7:481bce714567 561
wolfSSL 7:481bce714567 562 ret = Sha256Final(sha256);
wolfSSL 7:481bce714567 563 if (ret != 0)
wolfSSL 7:481bce714567 564 return ret;
wolfSSL 7:481bce714567 565
wolfSSL 7:481bce714567 566 #if defined(LITTLE_ENDIAN_ORDER)
wolfSSL 7:481bce714567 567 ByteReverseWords(sha256->digest, sha256->digest, SHA256_DIGEST_SIZE);
wolfSSL 7:481bce714567 568 #endif
wolfSSL 7:481bce714567 569 XMEMCPY(hash, sha256->digest, SHA256_DIGEST_SIZE);
wolfSSL 7:481bce714567 570
wolfSSL 7:481bce714567 571 return wc_InitSha256(sha256); /* reset state */
wolfSSL 7:481bce714567 572 }
wolfSSL 7:481bce714567 573 #endif /* FREESCALE_LTC_SHA */
wolfSSL 7:481bce714567 574
wolfSSL 7:481bce714567 575
wolfSSL 7:481bce714567 576
wolfSSL 7:481bce714567 577 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 578
wolfSSL 7:481bce714567 579 #define _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 580 { word32 d ;\
wolfSSL 7:481bce714567 581 d = sha256->digest[0]; __asm__ volatile("movl %0, %"#S_0::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 582 d = sha256->digest[1]; __asm__ volatile("movl %0, %"#S_1::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 583 d = sha256->digest[2]; __asm__ volatile("movl %0, %"#S_2::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 584 d = sha256->digest[3]; __asm__ volatile("movl %0, %"#S_3::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 585 d = sha256->digest[4]; __asm__ volatile("movl %0, %"#S_4::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 586 d = sha256->digest[5]; __asm__ volatile("movl %0, %"#S_5::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 587 d = sha256->digest[6]; __asm__ volatile("movl %0, %"#S_6::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 588 d = sha256->digest[7]; __asm__ volatile("movl %0, %"#S_7::"r"(d):SSE_REGs) ;\
wolfSSL 7:481bce714567 589 }
wolfSSL 7:481bce714567 590
wolfSSL 7:481bce714567 591 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 592 { word32 d ; \
wolfSSL 7:481bce714567 593 __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ; sha256->digest[0] += d;\
wolfSSL 7:481bce714567 594 __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ; sha256->digest[1] += d;\
wolfSSL 7:481bce714567 595 __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ; sha256->digest[2] += d;\
wolfSSL 7:481bce714567 596 __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ; sha256->digest[3] += d;\
wolfSSL 7:481bce714567 597 __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ; sha256->digest[4] += d;\
wolfSSL 7:481bce714567 598 __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ; sha256->digest[5] += d;\
wolfSSL 7:481bce714567 599 __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ; sha256->digest[6] += d;\
wolfSSL 7:481bce714567 600 __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ; sha256->digest[7] += d;\
wolfSSL 7:481bce714567 601 }
wolfSSL 7:481bce714567 602
wolfSSL 7:481bce714567 603
wolfSSL 7:481bce714567 604 #define DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 605 _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 7:481bce714567 606
wolfSSL 7:481bce714567 607 #define RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 608 _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 7:481bce714567 609
wolfSSL 7:481bce714567 610
wolfSSL 7:481bce714567 611
wolfSSL 7:481bce714567 612
wolfSSL 7:481bce714567 613 #define S_0 %r15d
wolfSSL 7:481bce714567 614 #define S_1 %r10d
wolfSSL 7:481bce714567 615 #define S_2 %r11d
wolfSSL 7:481bce714567 616 #define S_3 %r12d
wolfSSL 7:481bce714567 617 #define S_4 %r13d
wolfSSL 7:481bce714567 618 #define S_5 %r14d
wolfSSL 7:481bce714567 619 #define S_6 %ebx
wolfSSL 7:481bce714567 620 #define S_7 %r9d
wolfSSL 7:481bce714567 621
wolfSSL 7:481bce714567 622 #define SSE_REGs "%edi", "%ecx", "%esi", "%edx", "%ebx","%r8","%r9","%r10","%r11","%r12","%r13","%r14","%r15"
wolfSSL 7:481bce714567 623
wolfSSL 7:481bce714567 624 #if defined(HAVE_INTEL_RORX)
wolfSSL 7:481bce714567 625 #define RND_STEP_RORX_1(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 626 __asm__ volatile("rorx $6, %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>6 */\
wolfSSL 7:481bce714567 627
wolfSSL 7:481bce714567 628 #define RND_STEP_RORX_2(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 629 __asm__ volatile("rorx $11, %"#e",%%edi\n\t":::"%edi",SSE_REGs); /* edi = e>>11 */\
wolfSSL 7:481bce714567 630 __asm__ volatile("xorl %%edx, %%edi\n\t":::"%edx","%edi",SSE_REGs); /* edi = (e>>11) ^ (e>>6) */\
wolfSSL 7:481bce714567 631 __asm__ volatile("rorx $25, %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>25 */\
wolfSSL 7:481bce714567 632
wolfSSL 7:481bce714567 633 #define RND_STEP_RORX_3(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 634 __asm__ volatile("movl %"#f", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f */\
wolfSSL 7:481bce714567 635 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f ^ g */\
wolfSSL 7:481bce714567 636 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma1(e) */\
wolfSSL 7:481bce714567 637 __asm__ volatile("andl %"#e", %%esi\n\t":::"%esi",SSE_REGs); /* esi = (f ^ g) & e */\
wolfSSL 7:481bce714567 638 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = Ch(e,f,g) */\
wolfSSL 7:481bce714567 639
wolfSSL 7:481bce714567 640 #define RND_STEP_RORX_4(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 641 /*__asm__ volatile("movl %0, %%edx\n\t"::"m"(w_k):"%edx");*/\
wolfSSL 7:481bce714567 642 __asm__ volatile("addl %0, %"#h"\n\t"::"r"(W_K[i]):SSE_REGs); /* h += w_k */\
wolfSSL 7:481bce714567 643 __asm__ volatile("addl %%edx, %"#h"\n\t":::"%edx",SSE_REGs); /* h = h + w_k + Sigma1(e) */\
wolfSSL 7:481bce714567 644 __asm__ volatile("rorx $2, %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a>>2 */\
wolfSSL 7:481bce714567 645 __asm__ volatile("rorx $13, %"#a", %%edi\n\t":::"%edi",SSE_REGs);/* edi = a>>13 */\
wolfSSL 7:481bce714567 646
wolfSSL 7:481bce714567 647 #define RND_STEP_RORX_5(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 648 __asm__ volatile("rorx $22, %"#a", %%edx\n\t":::"%edx",SSE_REGs); /* edx = a>>22 */\
wolfSSL 7:481bce714567 649 __asm__ volatile("xorl %%r8d, %%edi\n\t":::"%edi","%r8",SSE_REGs);/* edi = (a>>2) ^ (a>>13) */\
wolfSSL 7:481bce714567 650 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma0(a) */\
wolfSSL 7:481bce714567 651
wolfSSL 7:481bce714567 652 #define RND_STEP_RORX_6(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 653 __asm__ volatile("movl %"#b", %%edi\n\t":::"%edi",SSE_REGs); /* edi = b */\
wolfSSL 7:481bce714567 654 __asm__ volatile("orl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a | b */\
wolfSSL 7:481bce714567 655 __asm__ volatile("andl %"#c", %%edi\n\t":::"%edi",SSE_REGs); /* edi = (a | b) & c*/\
wolfSSL 7:481bce714567 656 __asm__ volatile("movl %"#b", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b */\
wolfSSL 7:481bce714567 657
wolfSSL 7:481bce714567 658 #define RND_STEP_RORX_7(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 659 __asm__ volatile("addl %%esi, %"#h"\n\t":::"%esi",SSE_REGs); /* h += Ch(e,f,g) */\
wolfSSL 7:481bce714567 660 __asm__ volatile("andl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b & a */\
wolfSSL 7:481bce714567 661 __asm__ volatile("orl %%edi, %%r8d\n\t":::"%edi","%r8",SSE_REGs); /* r8d = Maj(a,b,c) */\
wolfSSL 7:481bce714567 662
wolfSSL 7:481bce714567 663 #define RND_STEP_RORX_8(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 664 __asm__ volatile("addl "#h", "#d"\n\t"); /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */\
wolfSSL 7:481bce714567 665 __asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
wolfSSL 7:481bce714567 666 __asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs); \
wolfSSL 7:481bce714567 667 __asm__ volatile("movl %r8d, "#h"\n\t");
wolfSSL 7:481bce714567 668
wolfSSL 7:481bce714567 669 #endif
wolfSSL 7:481bce714567 670
wolfSSL 7:481bce714567 671 #define RND_STEP_1(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 672 __asm__ volatile("movl %"#e", %%edx\n\t":::"%edx",SSE_REGs);\
wolfSSL 7:481bce714567 673 __asm__ volatile("roll $26, %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>6 */\
wolfSSL 7:481bce714567 674 __asm__ volatile("movl %"#e", %%edi\n\t":::"%edi",SSE_REGs);\
wolfSSL 7:481bce714567 675
wolfSSL 7:481bce714567 676 #define RND_STEP_2(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 677 __asm__ volatile("roll $21, %%edi\n\t":::"%edi",SSE_REGs); /* edi = e>>11 */\
wolfSSL 7:481bce714567 678 __asm__ volatile("xorl %%edx, %%edi\n\t":::"%edx","%edi",SSE_REGs); /* edi = (e>>11) ^ (e>>6) */\
wolfSSL 7:481bce714567 679 __asm__ volatile("movl %"#e", %%edx\n\t":::"%edx",SSE_REGs); /* edx = e */\
wolfSSL 7:481bce714567 680 __asm__ volatile("roll $7, %%edx\n\t":::"%edx",SSE_REGs); /* edx = e>>25 */\
wolfSSL 7:481bce714567 681
wolfSSL 7:481bce714567 682 #define RND_STEP_3(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 683 __asm__ volatile("movl %"#f", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f */\
wolfSSL 7:481bce714567 684 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = f ^ g */\
wolfSSL 7:481bce714567 685 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs); /* edx = Sigma1(e) */\
wolfSSL 7:481bce714567 686 __asm__ volatile("andl %"#e", %%esi\n\t":::"%esi",SSE_REGs); /* esi = (f ^ g) & e */\
wolfSSL 7:481bce714567 687 __asm__ volatile("xorl %"#g", %%esi\n\t":::"%esi",SSE_REGs); /* esi = Ch(e,f,g) */\
wolfSSL 7:481bce714567 688
wolfSSL 7:481bce714567 689 #define RND_STEP_4(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 690 __asm__ volatile("addl %0, %"#h"\n\t"::"r"(W_K[i]):SSE_REGs); /* h += w_k */\
wolfSSL 7:481bce714567 691 __asm__ volatile("addl %%edx, %"#h"\n\t":::"%edx",SSE_REGs); /* h = h + w_k + Sigma1(e) */\
wolfSSL 7:481bce714567 692 __asm__ volatile("movl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a */\
wolfSSL 7:481bce714567 693 __asm__ volatile("roll $30, %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = a>>2 */\
wolfSSL 7:481bce714567 694 __asm__ volatile("movl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a */\
wolfSSL 7:481bce714567 695 __asm__ volatile("roll $19, %%edi\n\t":::"%edi",SSE_REGs); /* edi = a>>13 */\
wolfSSL 7:481bce714567 696 __asm__ volatile("movl %"#a", %%edx\n\t":::"%edx",SSE_REGs); /* edx = a */\
wolfSSL 7:481bce714567 697
wolfSSL 7:481bce714567 698 #define RND_STEP_5(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 699 __asm__ volatile("roll $10, %%edx\n\t":::"%edx",SSE_REGs); /* edx = a>>22 */\
wolfSSL 7:481bce714567 700 __asm__ volatile("xorl %%r8d, %%edi\n\t":::"%edi","%r8",SSE_REGs); /* edi = (a>>2) ^ (a>>13) */\
wolfSSL 7:481bce714567 701 __asm__ volatile("xorl %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs);/* edx = Sigma0(a) */\
wolfSSL 7:481bce714567 702
wolfSSL 7:481bce714567 703 #define RND_STEP_6(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 704 __asm__ volatile("movl %"#b", %%edi\n\t":::"%edi",SSE_REGs); /* edi = b */\
wolfSSL 7:481bce714567 705 __asm__ volatile("orl %"#a", %%edi\n\t":::"%edi",SSE_REGs); /* edi = a | b */\
wolfSSL 7:481bce714567 706 __asm__ volatile("andl %"#c", %%edi\n\t":::"%edi",SSE_REGs); /* edi = (a | b) & c */\
wolfSSL 7:481bce714567 707 __asm__ volatile("movl %"#b", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b */\
wolfSSL 7:481bce714567 708
wolfSSL 7:481bce714567 709 #define RND_STEP_7(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 710 __asm__ volatile("addl %%esi, %"#h"\n\t":::"%esi",SSE_REGs); /* h += Ch(e,f,g) */\
wolfSSL 7:481bce714567 711 __asm__ volatile("andl %"#a", %%r8d\n\t":::"%r8",SSE_REGs); /* r8d = b & a */\
wolfSSL 7:481bce714567 712 __asm__ volatile("orl %%edi, %%r8d\n\t":::"%edi","%r8",SSE_REGs); /* r8d = Maj(a,b,c) */\
wolfSSL 7:481bce714567 713
wolfSSL 7:481bce714567 714 #define RND_STEP_8(a,b,c,d,e,f,g,h,i)\
wolfSSL 7:481bce714567 715 __asm__ volatile("addl "#h", "#d"\n\t"); /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */\
wolfSSL 7:481bce714567 716 __asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
wolfSSL 7:481bce714567 717 /* r8b = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */\
wolfSSL 7:481bce714567 718 __asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs);\
wolfSSL 7:481bce714567 719 /* r8b = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */\
wolfSSL 7:481bce714567 720 __asm__ volatile("movl %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
wolfSSL 7:481bce714567 721 /* h = h + w_k + Sigma1(e) + Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
wolfSSL 7:481bce714567 722
wolfSSL 7:481bce714567 723 #define RND_X(a,b,c,d,e,f,g,h,i) \
wolfSSL 7:481bce714567 724 RND_STEP_1(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 725 RND_STEP_2(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 726 RND_STEP_3(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 727 RND_STEP_4(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 728 RND_STEP_5(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 729 RND_STEP_6(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 730 RND_STEP_7(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 731 RND_STEP_8(a,b,c,d,e,f,g,h,i);
wolfSSL 7:481bce714567 732
wolfSSL 7:481bce714567 733 #define RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 7:481bce714567 734 #define RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 7:481bce714567 735 #define RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 7:481bce714567 736 #define RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 7:481bce714567 737 #define RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 7:481bce714567 738 #define RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 7:481bce714567 739 #define RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 7:481bce714567 740 #define RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 7:481bce714567 741
wolfSSL 7:481bce714567 742
wolfSSL 7:481bce714567 743 #define RND_1_3(a,b,c,d,e,f,g,h,i) {\
wolfSSL 7:481bce714567 744 RND_STEP_1(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 745 RND_STEP_2(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 746 RND_STEP_3(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 747 }
wolfSSL 7:481bce714567 748
wolfSSL 7:481bce714567 749 #define RND_4_6(a,b,c,d,e,f,g,h,i) {\
wolfSSL 7:481bce714567 750 RND_STEP_4(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 751 RND_STEP_5(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 752 RND_STEP_6(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 753 }
wolfSSL 7:481bce714567 754
wolfSSL 7:481bce714567 755 #define RND_7_8(a,b,c,d,e,f,g,h,i) {\
wolfSSL 7:481bce714567 756 RND_STEP_7(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 757 RND_STEP_8(a,b,c,d,e,f,g,h,i); \
wolfSSL 7:481bce714567 758 }
wolfSSL 7:481bce714567 759
wolfSSL 7:481bce714567 760 #define RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 7:481bce714567 761 #define RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 7:481bce714567 762 #define RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 7:481bce714567 763 #define RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 7:481bce714567 764 #define RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 7:481bce714567 765 #define RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 7:481bce714567 766 #define RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 7:481bce714567 767 #define RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 7:481bce714567 768
wolfSSL 7:481bce714567 769
wolfSSL 7:481bce714567 770 #define RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 7:481bce714567 771 #define RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 7:481bce714567 772 #define RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 7:481bce714567 773 #define RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 7:481bce714567 774 #define RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 7:481bce714567 775 #define RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 7:481bce714567 776 #define RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 7:481bce714567 777 #define RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_1_3(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 7:481bce714567 778
wolfSSL 7:481bce714567 779 #define RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 7:481bce714567 780 #define RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 7:481bce714567 781 #define RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 7:481bce714567 782 #define RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 7:481bce714567 783 #define RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 7:481bce714567 784 #define RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 7:481bce714567 785 #define RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 7:481bce714567 786 #define RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_4_6(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 7:481bce714567 787
wolfSSL 7:481bce714567 788 #define RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
wolfSSL 7:481bce714567 789 #define RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
wolfSSL 7:481bce714567 790 #define RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_6,S_7,S_0,S_1,S_2,S_3,S_4,S_5,_i);
wolfSSL 7:481bce714567 791 #define RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_5,S_6,S_7,S_0,S_1,S_2,S_3,S_4,_i);
wolfSSL 7:481bce714567 792 #define RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,_i);
wolfSSL 7:481bce714567 793 #define RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_3,S_4,S_5,S_6,S_7,S_0,S_1,S_2,_i);
wolfSSL 7:481bce714567 794 #define RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_2,S_3,S_4,S_5,S_6,S_7,S_0,S_1,_i);
wolfSSL 7:481bce714567 795 #define RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
wolfSSL 7:481bce714567 796
wolfSSL 7:481bce714567 797 #define FOR(cnt, init, max, inc, loop) \
wolfSSL 7:481bce714567 798 __asm__ volatile("movl $"#init", %0\n\t"#loop":"::"m"(cnt):)
wolfSSL 7:481bce714567 799 #define END(cnt, init, max, inc, loop) \
wolfSSL 7:481bce714567 800 __asm__ volatile("addl $"#inc", %0\n\tcmpl $"#max", %0\n\tjle "#loop"\n\t":"=m"(cnt)::) ;
wolfSSL 7:481bce714567 801
wolfSSL 7:481bce714567 802 #endif /* defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) */
wolfSSL 7:481bce714567 803
wolfSSL 7:481bce714567 804 #if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
wolfSSL 7:481bce714567 805
wolfSSL 7:481bce714567 806 #define VPALIGNR(op1,op2,op3,op4) __asm__ volatile("vpalignr $"#op4", %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 807 #define VPADDD(op1,op2,op3) __asm__ volatile("vpaddd %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 808 #define VPSRLD(op1,op2,op3) __asm__ volatile("vpsrld $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 809 #define VPSRLQ(op1,op2,op3) __asm__ volatile("vpsrlq $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 810 #define VPSLLD(op1,op2,op3) __asm__ volatile("vpslld $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 811 #define VPOR(op1,op2,op3) __asm__ volatile("vpor %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 812 #define VPXOR(op1,op2,op3) __asm__ volatile("vpxor %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 813 #define VPSHUFD(op1,op2,op3) __asm__ volatile("vpshufd $"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 814 #define VPSHUFB(op1,op2,op3) __asm__ volatile("vpshufb %"#op3", %"#op2", %"#op1:::XMM_REGs)
wolfSSL 7:481bce714567 815
wolfSSL 7:481bce714567 816 #define MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, SHUF_00BA, SHUF_DC00,\
wolfSSL 7:481bce714567 817 a,b,c,d,e,f,g,h,_i)\
wolfSSL 7:481bce714567 818 RND_STEP_1(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 819 VPALIGNR (XTMP0, X3, X2, 4) ;\
wolfSSL 7:481bce714567 820 RND_STEP_2(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 821 VPADDD (XTMP0, XTMP0, X0) ;\
wolfSSL 7:481bce714567 822 RND_STEP_3(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 823 VPALIGNR (XTMP1, X1, X0, 4) ; /* XTMP1 = W[-15] */\
wolfSSL 7:481bce714567 824 RND_STEP_4(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 825 VPSRLD (XTMP2, XTMP1, 7) ;\
wolfSSL 7:481bce714567 826 RND_STEP_5(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 827 VPSLLD (XTMP3, XTMP1, 25) ; /* VPSLLD (XTMP3, XTMP1, (32-7)) */\
wolfSSL 7:481bce714567 828 RND_STEP_6(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 829 VPOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 */\
wolfSSL 7:481bce714567 830 RND_STEP_7(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 831 VPSRLD (XTMP2, XTMP1,18) ;\
wolfSSL 7:481bce714567 832 RND_STEP_8(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 833 \
wolfSSL 7:481bce714567 834 RND_STEP_1(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 835 VPSRLD (XTMP4, XTMP1, 3) ; /* XTMP4 = W[-15] >> 3 */\
wolfSSL 7:481bce714567 836 RND_STEP_2(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 837 VPSLLD (XTMP1, XTMP1, 14) ; /* VPSLLD (XTMP1, XTMP1, (32-18)) */\
wolfSSL 7:481bce714567 838 RND_STEP_3(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 839 VPXOR (XTMP3, XTMP3, XTMP1) ;\
wolfSSL 7:481bce714567 840 RND_STEP_4(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 841 VPXOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
wolfSSL 7:481bce714567 842 RND_STEP_5(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 843 VPXOR (XTMP1, XTMP3, XTMP4) ; /* XTMP1 = s0 */\
wolfSSL 7:481bce714567 844 RND_STEP_6(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 845 VPSHUFD(XTMP2, X3, 0b11111010) ; /* XTMP2 = W[-2] {BBAA}*/\
wolfSSL 7:481bce714567 846 RND_STEP_7(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 847 VPADDD (XTMP0, XTMP0, XTMP1) ; /* XTMP0 = W[-16] + W[-7] + s0 */\
wolfSSL 7:481bce714567 848 RND_STEP_8(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 849 \
wolfSSL 7:481bce714567 850 RND_STEP_1(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 851 VPSRLD (XTMP4, XTMP2, 10) ; /* XTMP4 = W[-2] >> 10 {BBAA} */\
wolfSSL 7:481bce714567 852 RND_STEP_2(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 853 VPSRLQ (XTMP3, XTMP2, 19) ; /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
wolfSSL 7:481bce714567 854 RND_STEP_3(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 855 VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
wolfSSL 7:481bce714567 856 RND_STEP_4(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 857 VPXOR (XTMP2, XTMP2, XTMP3) ;\
wolfSSL 7:481bce714567 858 RND_STEP_5(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 859 VPXOR (XTMP4, XTMP4, XTMP2) ; /* XTMP4 = s1 {xBxA} */\
wolfSSL 7:481bce714567 860 RND_STEP_6(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 861 VPSHUFB (XTMP4, XTMP4, SHUF_00BA) ; /* XTMP4 = s1 {00BA} */\
wolfSSL 7:481bce714567 862 RND_STEP_7(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 863 VPADDD (XTMP0, XTMP0, XTMP4) ; /* XTMP0 = {..., ..., W[1], W[0]} */\
wolfSSL 7:481bce714567 864 RND_STEP_8(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 865 \
wolfSSL 7:481bce714567 866 RND_STEP_1(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 867 VPSHUFD (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
wolfSSL 7:481bce714567 868 RND_STEP_2(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 869 VPSRLD (XTMP5, XTMP2, 10); /* XTMP5 = W[-2] >> 10 {DDCC} */\
wolfSSL 7:481bce714567 870 RND_STEP_3(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 871 VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
wolfSSL 7:481bce714567 872 RND_STEP_4(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 873 VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
wolfSSL 7:481bce714567 874 RND_STEP_5(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 875 VPXOR (XTMP2, XTMP2, XTMP3) ;\
wolfSSL 7:481bce714567 876 RND_STEP_6(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 877 VPXOR (XTMP5, XTMP5, XTMP2) ; /* XTMP5 = s1 {xDxC} */\
wolfSSL 7:481bce714567 878 RND_STEP_7(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 879 VPSHUFB (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
wolfSSL 7:481bce714567 880 RND_STEP_8(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 881 VPADDD (X0, XTMP5, XTMP0) ; /* X0 = {W[3], W[2], W[1], W[0]} */\
wolfSSL 7:481bce714567 882
wolfSSL 7:481bce714567 883 #if defined(HAVE_INTEL_RORX)
wolfSSL 7:481bce714567 884
wolfSSL 7:481bce714567 885 #define MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, \
wolfSSL 7:481bce714567 886 XFER, SHUF_00BA, SHUF_DC00,a,b,c,d,e,f,g,h,_i)\
wolfSSL 7:481bce714567 887 RND_STEP_RORX_1(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 888 VPALIGNR (XTMP0, X3, X2, 4) ;\
wolfSSL 7:481bce714567 889 RND_STEP_RORX_2(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 890 VPADDD (XTMP0, XTMP0, X0) ;\
wolfSSL 7:481bce714567 891 RND_STEP_RORX_3(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 892 VPALIGNR (XTMP1, X1, X0, 4) ; /* XTMP1 = W[-15] */\
wolfSSL 7:481bce714567 893 RND_STEP_RORX_4(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 894 VPSRLD (XTMP2, XTMP1, 7) ;\
wolfSSL 7:481bce714567 895 RND_STEP_RORX_5(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 896 VPSLLD (XTMP3, XTMP1, 25) ; /* VPSLLD (XTMP3, XTMP1, (32-7)) */\
wolfSSL 7:481bce714567 897 RND_STEP_RORX_6(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 898 VPOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 */\
wolfSSL 7:481bce714567 899 RND_STEP_RORX_7(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 900 VPSRLD (XTMP2, XTMP1,18) ;\
wolfSSL 7:481bce714567 901 RND_STEP_RORX_8(a,b,c,d,e,f,g,h,_i);\
wolfSSL 7:481bce714567 902 \
wolfSSL 7:481bce714567 903 RND_STEP_RORX_1(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 904 VPSRLD (XTMP4, XTMP1, 3) ; /* XTMP4 = W[-15] >> 3 */\
wolfSSL 7:481bce714567 905 RND_STEP_RORX_2(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 906 VPSLLD (XTMP1, XTMP1, 14) ; /* VPSLLD (XTMP1, XTMP1, (32-18)) */\
wolfSSL 7:481bce714567 907 RND_STEP_RORX_3(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 908 VPXOR (XTMP3, XTMP3, XTMP1) ;\
wolfSSL 7:481bce714567 909 RND_STEP_RORX_4(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 910 VPXOR (XTMP3, XTMP3, XTMP2) ; /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
wolfSSL 7:481bce714567 911 RND_STEP_RORX_5(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 912 VPXOR (XTMP1, XTMP3, XTMP4) ; /* XTMP1 = s0 */\
wolfSSL 7:481bce714567 913 RND_STEP_RORX_6(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 914 VPSHUFD(XTMP2, X3, 0b11111010) ; /* XTMP2 = W[-2] {BBAA}*/\
wolfSSL 7:481bce714567 915 RND_STEP_RORX_7(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 916 VPADDD (XTMP0, XTMP0, XTMP1) ; /* XTMP0 = W[-16] + W[-7] + s0 */\
wolfSSL 7:481bce714567 917 RND_STEP_RORX_8(h,a,b,c,d,e,f,g,_i+1);\
wolfSSL 7:481bce714567 918 \
wolfSSL 7:481bce714567 919 RND_STEP_RORX_1(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 920 VPSRLD (XTMP4, XTMP2, 10) ; /* XTMP4 = W[-2] >> 10 {BBAA} */\
wolfSSL 7:481bce714567 921 RND_STEP_RORX_2(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 922 VPSRLQ (XTMP3, XTMP2, 19) ; /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
wolfSSL 7:481bce714567 923 RND_STEP_RORX_3(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 924 VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
wolfSSL 7:481bce714567 925 RND_STEP_RORX_4(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 926 VPXOR (XTMP2, XTMP2, XTMP3) ;\
wolfSSL 7:481bce714567 927 RND_STEP_RORX_5(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 928 VPXOR (XTMP4, XTMP4, XTMP2) ; /* XTMP4 = s1 {xBxA} */\
wolfSSL 7:481bce714567 929 RND_STEP_RORX_6(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 930 VPSHUFB (XTMP4, XTMP4, SHUF_00BA) ; /* XTMP4 = s1 {00BA} */\
wolfSSL 7:481bce714567 931 RND_STEP_RORX_7(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 932 VPADDD (XTMP0, XTMP0, XTMP4) ; /* XTMP0 = {..., ..., W[1], W[0]} */\
wolfSSL 7:481bce714567 933 RND_STEP_RORX_8(g,h,a,b,c,d,e,f,_i+2);\
wolfSSL 7:481bce714567 934 \
wolfSSL 7:481bce714567 935 RND_STEP_RORX_1(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 936 VPSHUFD (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
wolfSSL 7:481bce714567 937 RND_STEP_RORX_2(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 938 VPSRLD (XTMP5, XTMP2, 10); /* XTMP5 = W[-2] >> 10 {DDCC} */\
wolfSSL 7:481bce714567 939 RND_STEP_RORX_3(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 940 VPSRLQ (XTMP3, XTMP2, 19); /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
wolfSSL 7:481bce714567 941 RND_STEP_RORX_4(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 942 VPSRLQ (XTMP2, XTMP2, 17) ; /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
wolfSSL 7:481bce714567 943 RND_STEP_RORX_5(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 944 VPXOR (XTMP2, XTMP2, XTMP3) ;\
wolfSSL 7:481bce714567 945 RND_STEP_RORX_6(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 946 VPXOR (XTMP5, XTMP5, XTMP2) ; /* XTMP5 = s1 {xDxC} */\
wolfSSL 7:481bce714567 947 RND_STEP_RORX_7(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 948 VPSHUFB (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
wolfSSL 7:481bce714567 949 RND_STEP_RORX_8(f,g,h,a,b,c,d,e,_i+3);\
wolfSSL 7:481bce714567 950 VPADDD (X0, XTMP5, XTMP0) ; /* X0 = {W[3], W[2], W[1], W[0]} */\
wolfSSL 7:481bce714567 951
wolfSSL 7:481bce714567 952 #endif
wolfSSL 7:481bce714567 953
wolfSSL 7:481bce714567 954
wolfSSL 7:481bce714567 955 #define W_K_from_buff\
wolfSSL 7:481bce714567 956 __asm__ volatile("vmovdqu %0, %%xmm4\n\t"\
wolfSSL 7:481bce714567 957 "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t"\
wolfSSL 7:481bce714567 958 :: "m"(sha256->buffer[0]):"%xmm4") ;\
wolfSSL 7:481bce714567 959 __asm__ volatile("vmovdqu %0, %%xmm5\n\t"\
wolfSSL 7:481bce714567 960 "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t"\
wolfSSL 7:481bce714567 961 ::"m"(sha256->buffer[4]):"%xmm5") ;\
wolfSSL 7:481bce714567 962 __asm__ volatile("vmovdqu %0, %%xmm6\n\t"\
wolfSSL 7:481bce714567 963 "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t"\
wolfSSL 7:481bce714567 964 ::"m"(sha256->buffer[8]):"%xmm6") ;\
wolfSSL 7:481bce714567 965 __asm__ volatile("vmovdqu %0, %%xmm7\n\t"\
wolfSSL 7:481bce714567 966 "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t"\
wolfSSL 7:481bce714567 967 ::"m"(sha256->buffer[12]):"%xmm7") ;\
wolfSSL 7:481bce714567 968
wolfSSL 7:481bce714567 969 #define _SET_W_K_XFER(reg, i)\
wolfSSL 7:481bce714567 970 __asm__ volatile("vpaddd %0, %"#reg", %%xmm9"::"m"(K[i]):XMM_REGs) ;\
wolfSSL 7:481bce714567 971 __asm__ volatile("vmovdqa %%xmm9, %0":"=m"(W_K[i])::XMM_REGs) ;
wolfSSL 7:481bce714567 972
wolfSSL 7:481bce714567 973 #define SET_W_K_XFER(reg, i) _SET_W_K_XFER(reg, i)
wolfSSL 7:481bce714567 974
wolfSSL 7:481bce714567 975 static const ALIGN32 word64 mSHUF_00BA[] = { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF } ; /* shuffle xBxA -> 00BA */
wolfSSL 7:481bce714567 976 static const ALIGN32 word64 mSHUF_DC00[] = { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 } ; /* shuffle xDxC -> DC00 */
wolfSSL 7:481bce714567 977 static const ALIGN32 word64 mBYTE_FLIP_MASK[] = { 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
wolfSSL 7:481bce714567 978
wolfSSL 7:481bce714567 979
wolfSSL 7:481bce714567 980 #define _Init_Masks(mask1, mask2, mask3)\
wolfSSL 7:481bce714567 981 __asm__ volatile("vmovdqu %0, %"#mask1 ::"m"(mBYTE_FLIP_MASK[0])) ;\
wolfSSL 7:481bce714567 982 __asm__ volatile("vmovdqu %0, %"#mask2 ::"m"(mSHUF_00BA[0])) ;\
wolfSSL 7:481bce714567 983 __asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0])) ;
wolfSSL 7:481bce714567 984
wolfSSL 7:481bce714567 985 #define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)\
wolfSSL 7:481bce714567 986 _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
wolfSSL 7:481bce714567 987
wolfSSL 7:481bce714567 988 #define X0 %xmm4
wolfSSL 7:481bce714567 989 #define X1 %xmm5
wolfSSL 7:481bce714567 990 #define X2 %xmm6
wolfSSL 7:481bce714567 991 #define X3 %xmm7
wolfSSL 7:481bce714567 992 #define X_ X0
wolfSSL 7:481bce714567 993
wolfSSL 7:481bce714567 994 #define XTMP0 %xmm0
wolfSSL 7:481bce714567 995 #define XTMP1 %xmm1
wolfSSL 7:481bce714567 996 #define XTMP2 %xmm2
wolfSSL 7:481bce714567 997 #define XTMP3 %xmm3
wolfSSL 7:481bce714567 998 #define XTMP4 %xmm8
wolfSSL 7:481bce714567 999 #define XTMP5 %xmm9
wolfSSL 7:481bce714567 1000 #define XFER %xmm10
wolfSSL 7:481bce714567 1001
wolfSSL 7:481bce714567 1002 #define SHUF_00BA %xmm11 /* shuffle xBxA -> 00BA */
wolfSSL 7:481bce714567 1003 #define SHUF_DC00 %xmm12 /* shuffle xDxC -> DC00 */
wolfSSL 7:481bce714567 1004 #define BYTE_FLIP_MASK %xmm13
wolfSSL 7:481bce714567 1005
wolfSSL 7:481bce714567 1006 #define XMM_REGs /* Registers are saved in Sha256Update/Finel */
wolfSSL 7:481bce714567 1007 /*"xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13" */
wolfSSL 7:481bce714567 1008
wolfSSL 7:481bce714567 1009 static int Transform_AVX1(Sha256* sha256)
wolfSSL 7:481bce714567 1010 {
wolfSSL 7:481bce714567 1011 ALIGN32 word32 W_K[64] ; /* temp for W+K */
wolfSSL 7:481bce714567 1012
wolfSSL 7:481bce714567 1013 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
wolfSSL 7:481bce714567 1014 W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
wolfSSL 7:481bce714567 1015
wolfSSL 7:481bce714567 1016 DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
wolfSSL 7:481bce714567 1017
wolfSSL 7:481bce714567 1018 SET_W_K_XFER(X0, 0) ;
wolfSSL 7:481bce714567 1019 MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1020 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
wolfSSL 7:481bce714567 1021 SET_W_K_XFER(X1, 4) ;
wolfSSL 7:481bce714567 1022 MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1023 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
wolfSSL 7:481bce714567 1024 SET_W_K_XFER(X2, 8) ;
wolfSSL 7:481bce714567 1025 MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1026 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
wolfSSL 7:481bce714567 1027 SET_W_K_XFER(X3, 12) ;
wolfSSL 7:481bce714567 1028 MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1029 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
wolfSSL 7:481bce714567 1030 SET_W_K_XFER(X0, 16) ;
wolfSSL 7:481bce714567 1031 MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1032 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
wolfSSL 7:481bce714567 1033 SET_W_K_XFER(X1, 20) ;
wolfSSL 7:481bce714567 1034 MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1035 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
wolfSSL 7:481bce714567 1036 SET_W_K_XFER(X2, 24) ;
wolfSSL 7:481bce714567 1037 MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1038 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
wolfSSL 7:481bce714567 1039 SET_W_K_XFER(X3, 28) ;
wolfSSL 7:481bce714567 1040 MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1041 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
wolfSSL 7:481bce714567 1042 SET_W_K_XFER(X0, 32) ;
wolfSSL 7:481bce714567 1043 MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1044 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
wolfSSL 7:481bce714567 1045 SET_W_K_XFER(X1, 36) ;
wolfSSL 7:481bce714567 1046 MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1047 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
wolfSSL 7:481bce714567 1048 SET_W_K_XFER(X2, 40) ;
wolfSSL 7:481bce714567 1049 MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1050 SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
wolfSSL 7:481bce714567 1051 SET_W_K_XFER(X3, 44) ;
wolfSSL 7:481bce714567 1052 MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
wolfSSL 7:481bce714567 1053 SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
wolfSSL 7:481bce714567 1054
wolfSSL 7:481bce714567 1055 SET_W_K_XFER(X0, 48) ;
wolfSSL 7:481bce714567 1056 SET_W_K_XFER(X1, 52) ;
wolfSSL 7:481bce714567 1057 SET_W_K_XFER(X2, 56) ;
wolfSSL 7:481bce714567 1058 SET_W_K_XFER(X3, 60) ;
wolfSSL 7:481bce714567 1059
wolfSSL 7:481bce714567 1060 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
wolfSSL 7:481bce714567 1061 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
wolfSSL 7:481bce714567 1062 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
wolfSSL 7:481bce714567 1063 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
wolfSSL 7:481bce714567 1064
wolfSSL 7:481bce714567 1065 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
wolfSSL 7:481bce714567 1066 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
wolfSSL 7:481bce714567 1067 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
wolfSSL 7:481bce714567 1068 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
wolfSSL 7:481bce714567 1069
wolfSSL 7:481bce714567 1070 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
wolfSSL 7:481bce714567 1071 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
wolfSSL 7:481bce714567 1072 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
wolfSSL 7:481bce714567 1073 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
wolfSSL 7:481bce714567 1074
wolfSSL 7:481bce714567 1075 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
wolfSSL 7:481bce714567 1076 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
wolfSSL 7:481bce714567 1077 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
wolfSSL 7:481bce714567 1078 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
wolfSSL 7:481bce714567 1079
wolfSSL 7:481bce714567 1080 RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
wolfSSL 7:481bce714567 1081
wolfSSL 7:481bce714567 1082
wolfSSL 7:481bce714567 1083 return 0;
wolfSSL 7:481bce714567 1084 }
wolfSSL 7:481bce714567 1085
wolfSSL 7:481bce714567 1086 #if defined(HAVE_INTEL_RORX)
wolfSSL 7:481bce714567 1087 static int Transform_AVX1_RORX(Sha256* sha256)
wolfSSL 7:481bce714567 1088 {
wolfSSL 7:481bce714567 1089 ALIGN32 word32 W_K[64] ; /* temp for W+K */
wolfSSL 7:481bce714567 1090
wolfSSL 7:481bce714567 1091 Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
wolfSSL 7:481bce714567 1092 W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
wolfSSL 7:481bce714567 1093
wolfSSL 7:481bce714567 1094 DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
wolfSSL 7:481bce714567 1095 SET_W_K_XFER(X0, 0) ;
wolfSSL 7:481bce714567 1096 MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1097 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
wolfSSL 7:481bce714567 1098 SET_W_K_XFER(X1, 4) ;
wolfSSL 7:481bce714567 1099 MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1100 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
wolfSSL 7:481bce714567 1101 SET_W_K_XFER(X2, 8) ;
wolfSSL 7:481bce714567 1102 MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1103 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
wolfSSL 7:481bce714567 1104 SET_W_K_XFER(X3, 12) ;
wolfSSL 7:481bce714567 1105 MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1106 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
wolfSSL 7:481bce714567 1107 SET_W_K_XFER(X0, 16) ;
wolfSSL 7:481bce714567 1108 MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1109 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
wolfSSL 7:481bce714567 1110 SET_W_K_XFER(X1, 20) ;
wolfSSL 7:481bce714567 1111 MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1112 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
wolfSSL 7:481bce714567 1113 SET_W_K_XFER(X2, 24) ;
wolfSSL 7:481bce714567 1114 MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1115 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
wolfSSL 7:481bce714567 1116 SET_W_K_XFER(X3, 28) ;
wolfSSL 7:481bce714567 1117 MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1118 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
wolfSSL 7:481bce714567 1119 SET_W_K_XFER(X0, 32) ;
wolfSSL 7:481bce714567 1120 MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1121 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
wolfSSL 7:481bce714567 1122 SET_W_K_XFER(X1, 36) ;
wolfSSL 7:481bce714567 1123 MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1124 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
wolfSSL 7:481bce714567 1125 SET_W_K_XFER(X2, 40) ;
wolfSSL 7:481bce714567 1126 MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1127 XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
wolfSSL 7:481bce714567 1128 SET_W_K_XFER(X3, 44) ;
wolfSSL 7:481bce714567 1129 MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
wolfSSL 7:481bce714567 1130 XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
wolfSSL 7:481bce714567 1131
wolfSSL 7:481bce714567 1132 SET_W_K_XFER(X0, 48) ;
wolfSSL 7:481bce714567 1133 SET_W_K_XFER(X1, 52) ;
wolfSSL 7:481bce714567 1134 SET_W_K_XFER(X2, 56) ;
wolfSSL 7:481bce714567 1135 SET_W_K_XFER(X3, 60) ;
wolfSSL 7:481bce714567 1136
wolfSSL 7:481bce714567 1137 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
wolfSSL 7:481bce714567 1138 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
wolfSSL 7:481bce714567 1139 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
wolfSSL 7:481bce714567 1140 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
wolfSSL 7:481bce714567 1141
wolfSSL 7:481bce714567 1142 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
wolfSSL 7:481bce714567 1143 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
wolfSSL 7:481bce714567 1144 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
wolfSSL 7:481bce714567 1145 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
wolfSSL 7:481bce714567 1146
wolfSSL 7:481bce714567 1147 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
wolfSSL 7:481bce714567 1148 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
wolfSSL 7:481bce714567 1149 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
wolfSSL 7:481bce714567 1150 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
wolfSSL 7:481bce714567 1151
wolfSSL 7:481bce714567 1152 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
wolfSSL 7:481bce714567 1153 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
wolfSSL 7:481bce714567 1154 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
wolfSSL 7:481bce714567 1155 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
wolfSSL 7:481bce714567 1156
wolfSSL 7:481bce714567 1157 RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
wolfSSL 7:481bce714567 1158
wolfSSL 7:481bce714567 1159
wolfSSL 7:481bce714567 1160 return 0;
wolfSSL 7:481bce714567 1161 }
wolfSSL 7:481bce714567 1162 #endif /* HAVE_INTEL_RORX */
wolfSSL 7:481bce714567 1163
wolfSSL 7:481bce714567 1164 #endif /* HAVE_INTEL_AVX1 */
wolfSSL 7:481bce714567 1165
wolfSSL 7:481bce714567 1166
wolfSSL 7:481bce714567 1167 #if defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 1168
wolfSSL 7:481bce714567 1169 #define _MOVE_to_REG(ymm, mem) __asm__ volatile("vmovdqu %0, %%"#ymm" ":: "m"(mem):YMM_REGs) ;
wolfSSL 7:481bce714567 1170 #define _MOVE_to_MEM(mem, ymm) __asm__ volatile("vmovdqu %%"#ymm", %0" : "=m"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1171 #define _BYTE_SWAP(ymm, map) __asm__ volatile("vpshufb %0, %%"#ymm", %%"#ymm"\n\t"\
wolfSSL 7:481bce714567 1172 :: "m"(map):YMM_REGs) ;
wolfSSL 7:481bce714567 1173 #define _MOVE_128(ymm0, ymm1, ymm2, map) __asm__ volatile("vperm2i128 $"#map", %%"\
wolfSSL 7:481bce714567 1174 #ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1175 #define _MOVE_BYTE(ymm0, ymm1, map) __asm__ volatile("vpshufb %0, %%"#ymm1", %%"\
wolfSSL 7:481bce714567 1176 #ymm0"\n\t":: "m"(map):YMM_REGs) ;
wolfSSL 7:481bce714567 1177 #define _S_TEMP(dest, src, bits, temp) __asm__ volatile("vpsrld $"#bits", %%"\
wolfSSL 7:481bce714567 1178 #src", %%"#dest"\n\tvpslld $32-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
wolfSSL 7:481bce714567 1179 #temp",%%"#dest", %%"#dest" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1180 #define _AVX2_R(dest, src, bits) __asm__ volatile("vpsrld $"#bits", %%"\
wolfSSL 7:481bce714567 1181 #src", %%"#dest" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1182 #define _XOR(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\
wolfSSL 7:481bce714567 1183 #src2", %%"#dest" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1184 #define _OR(dest, src1, src2) __asm__ volatile("vpor %%"#src1", %%"\
wolfSSL 7:481bce714567 1185 #src2", %%"#dest" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1186 #define _ADD(dest, src1, src2) __asm__ volatile("vpaddd %%"#src1", %%"\
wolfSSL 7:481bce714567 1187 #src2", %%"#dest" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1188 #define _ADD_MEM(dest, src1, mem) __asm__ volatile("vpaddd %0, %%"#src1", %%"\
wolfSSL 7:481bce714567 1189 #dest" "::"m"(mem):YMM_REGs) ;
wolfSSL 7:481bce714567 1190 #define _BLEND(map, dest, src1, src2) __asm__ volatile("vpblendd $"#map", %%"\
wolfSSL 7:481bce714567 1191 #src1", %%"#src2", %%"#dest" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1192
wolfSSL 7:481bce714567 1193 #define _EXTRACT_XMM_0(xmm, mem) __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1194 #define _EXTRACT_XMM_1(xmm, mem) __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1195 #define _EXTRACT_XMM_2(xmm, mem) __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1196 #define _EXTRACT_XMM_3(xmm, mem) __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1197 #define _EXTRACT_XMM_4(ymm, xmm, mem)\
wolfSSL 7:481bce714567 1198 __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1199 __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1200 #define _EXTRACT_XMM_5(xmm, mem) __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1201 #define _EXTRACT_XMM_6(xmm, mem) __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1202 #define _EXTRACT_XMM_7(xmm, mem) __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
wolfSSL 7:481bce714567 1203
wolfSSL 7:481bce714567 1204 #define _SWAP_YMM_HL(ymm) __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;
wolfSSL 7:481bce714567 1205 #define SWAP_YMM_HL(ymm) _SWAP_YMM_HL(ymm)
wolfSSL 7:481bce714567 1206
wolfSSL 7:481bce714567 1207 #define MOVE_to_REG(ymm, mem) _MOVE_to_REG(ymm, mem)
wolfSSL 7:481bce714567 1208 #define MOVE_to_MEM(mem, ymm) _MOVE_to_MEM(mem, ymm)
wolfSSL 7:481bce714567 1209 #define BYTE_SWAP(ymm, map) _BYTE_SWAP(ymm, map)
wolfSSL 7:481bce714567 1210 #define MOVE_128(ymm0, ymm1, ymm2, map) _MOVE_128(ymm0, ymm1, ymm2, map)
wolfSSL 7:481bce714567 1211 #define MOVE_BYTE(ymm0, ymm1, map) _MOVE_BYTE(ymm0, ymm1, map)
wolfSSL 7:481bce714567 1212 #define XOR(dest, src1, src2) _XOR(dest, src1, src2)
wolfSSL 7:481bce714567 1213 #define OR(dest, src1, src2) _OR(dest, src1, src2)
wolfSSL 7:481bce714567 1214 #define ADD(dest, src1, src2) _ADD(dest, src1, src2)
wolfSSL 7:481bce714567 1215 #define ADD_MEM(dest, src1, mem) _ADD_MEM(dest, src1, mem)
wolfSSL 7:481bce714567 1216 #define BLEND(map, dest, src1, src2) _BLEND(map, dest, src1, src2)
wolfSSL 7:481bce714567 1217
wolfSSL 7:481bce714567 1218 #define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp);
wolfSSL 7:481bce714567 1219 #define AVX2_S(dest, src, bits) S_TMP(dest, src, bits, S_TEMP)
wolfSSL 7:481bce714567 1220 #define AVX2_R(dest, src, bits) _AVX2_R(dest, src, bits)
wolfSSL 7:481bce714567 1221
wolfSSL 7:481bce714567 1222 #define GAMMA0(dest, src) AVX2_S(dest, src, 7); AVX2_S(G_TEMP, src, 18); \
wolfSSL 7:481bce714567 1223 XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3); XOR(dest, G_TEMP, dest) ;
wolfSSL 7:481bce714567 1224 #define GAMMA0_1(dest, src) AVX2_S(dest, src, 7); AVX2_S(G_TEMP, src, 18);
wolfSSL 7:481bce714567 1225 #define GAMMA0_2(dest, src) XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3); \
wolfSSL 7:481bce714567 1226 XOR(dest, G_TEMP, dest) ;
wolfSSL 7:481bce714567 1227
wolfSSL 7:481bce714567 1228 #define GAMMA1(dest, src) AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19); \
wolfSSL 7:481bce714567 1229 XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); XOR(dest, G_TEMP, dest) ;
wolfSSL 7:481bce714567 1230 #define GAMMA1_1(dest, src) AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19);
wolfSSL 7:481bce714567 1231 #define GAMMA1_2(dest, src) XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); \
wolfSSL 7:481bce714567 1232 XOR(dest, G_TEMP, dest) ;
wolfSSL 7:481bce714567 1233
wolfSSL 7:481bce714567 1234 #define FEEDBACK1_to_W_I_2 MOVE_BYTE(YMM_TEMP0, W_I, mMAP1toW_I_2[0]) ; \
wolfSSL 7:481bce714567 1235 BLEND(0x0c, W_I_2, YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1236 #define FEEDBACK2_to_W_I_2 MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ; \
wolfSSL 7:481bce714567 1237 MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAP2toW_I_2[0]) ; BLEND(0x30, W_I_2, YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1238 #define FEEDBACK3_to_W_I_2 MOVE_BYTE(YMM_TEMP0, W_I, mMAP3toW_I_2[0]) ; \
wolfSSL 7:481bce714567 1239 BLEND(0xc0, W_I_2, YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1240
wolfSSL 7:481bce714567 1241 #define FEEDBACK_to_W_I_7 MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ;\
wolfSSL 7:481bce714567 1242 MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAPtoW_I_7[0]) ; BLEND(0x80, W_I_7, YMM_TEMP0, W_I_7) ;
wolfSSL 7:481bce714567 1243
wolfSSL 7:481bce714567 1244 #undef voitle
wolfSSL 7:481bce714567 1245
wolfSSL 7:481bce714567 1246 #define W_I_16 ymm8
wolfSSL 7:481bce714567 1247 #define W_I_15 ymm9
wolfSSL 7:481bce714567 1248 #define W_I_7 ymm10
wolfSSL 7:481bce714567 1249 #define W_I_2 ymm11
wolfSSL 7:481bce714567 1250 #define W_I ymm12
wolfSSL 7:481bce714567 1251 #define G_TEMP ymm13
wolfSSL 7:481bce714567 1252 #define S_TEMP ymm14
wolfSSL 7:481bce714567 1253 #define YMM_TEMP0 ymm15
wolfSSL 7:481bce714567 1254 #define YMM_TEMP0x xmm15
wolfSSL 7:481bce714567 1255 #define W_I_TEMP ymm7
wolfSSL 7:481bce714567 1256 #define W_K_TEMP ymm15
wolfSSL 7:481bce714567 1257 #define W_K_TEMPx xmm15
wolfSSL 7:481bce714567 1258
wolfSSL 7:481bce714567 1259 #define YMM_REGs /* Registers are saved in Sha256Update/Finel */
wolfSSL 7:481bce714567 1260 /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/
wolfSSL 7:481bce714567 1261
wolfSSL 7:481bce714567 1262
wolfSSL 7:481bce714567 1263 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
wolfSSL 7:481bce714567 1264 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1265 __asm__ volatile("vpblendd $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1266 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_7", %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1267 __asm__ volatile("vpblendd $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1268 __asm__ volatile("vpshufd $0x93, %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1269
wolfSSL 7:481bce714567 1270 #define MOVE_7_to_15(w_i_15, w_i_7)\
wolfSSL 7:481bce714567 1271 __asm__ volatile("vmovdqu %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1272
wolfSSL 7:481bce714567 1273 #define MOVE_I_to_7(w_i_7, w_i)\
wolfSSL 7:481bce714567 1274 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1275 __asm__ volatile("vpblendd $0x01, %%"#w_i_7", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1276 __asm__ volatile("vpshufd $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1277
wolfSSL 7:481bce714567 1278 #define MOVE_I_to_2(w_i_2, w_i)\
wolfSSL 7:481bce714567 1279 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1280 __asm__ volatile("vpshufd $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\
wolfSSL 7:481bce714567 1281
wolfSSL 7:481bce714567 1282 #define ROTATE_W(w_i_16, w_i_15, w_i_7, w_i_2, w_i)\
wolfSSL 7:481bce714567 1283 MOVE_15_to_16(w_i_16, w_i_15, w_i_7) ; \
wolfSSL 7:481bce714567 1284 MOVE_7_to_15(w_i_15, w_i_7) ; \
wolfSSL 7:481bce714567 1285 MOVE_I_to_7(w_i_7, w_i) ; \
wolfSSL 7:481bce714567 1286 MOVE_I_to_2(w_i_2, w_i) ;\
wolfSSL 7:481bce714567 1287
wolfSSL 7:481bce714567 1288 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 1289 { word32 d ;\
wolfSSL 7:481bce714567 1290 __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1291 sha256->digest[0] += d;\
wolfSSL 7:481bce714567 1292 __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1293 sha256->digest[1] += d;\
wolfSSL 7:481bce714567 1294 __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1295 sha256->digest[2] += d;\
wolfSSL 7:481bce714567 1296 __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1297 sha256->digest[3] += d;\
wolfSSL 7:481bce714567 1298 __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1299 sha256->digest[4] += d;\
wolfSSL 7:481bce714567 1300 __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1301 sha256->digest[5] += d;\
wolfSSL 7:481bce714567 1302 __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1303 sha256->digest[6] += d;\
wolfSSL 7:481bce714567 1304 __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ;\
wolfSSL 7:481bce714567 1305 sha256->digest[7] += d;\
wolfSSL 7:481bce714567 1306 }
wolfSSL 7:481bce714567 1307
wolfSSL 7:481bce714567 1308 #define _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 1309 { word32 d[8] ;\
wolfSSL 7:481bce714567 1310 __asm__ volatile("movl %"#S_0", %0":"=r"(d[0])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1311 __asm__ volatile("movl %"#S_1", %0":"=r"(d[1])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1312 __asm__ volatile("movl %"#S_2", %0":"=r"(d[2])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1313 __asm__ volatile("movl %"#S_3", %0":"=r"(d[3])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1314 __asm__ volatile("movl %"#S_4", %0":"=r"(d[4])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1315 __asm__ volatile("movl %"#S_5", %0":"=r"(d[5])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1316 __asm__ volatile("movl %"#S_6", %0":"=r"(d[6])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1317 __asm__ volatile("movl %"#S_7", %0":"=r"(d[7])::SSE_REGs) ;\
wolfSSL 7:481bce714567 1318 printf("S[0..7]=%08x,%08x,%08x,%08x,%08x,%08x,%08x,%08x\n", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7]);\
wolfSSL 7:481bce714567 1319 __asm__ volatile("movl %0, %"#S_0::"r"(d[0]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1320 __asm__ volatile("movl %0, %"#S_1::"r"(d[1]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1321 __asm__ volatile("movl %0, %"#S_2::"r"(d[2]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1322 __asm__ volatile("movl %0, %"#S_3::"r"(d[3]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1323 __asm__ volatile("movl %0, %"#S_4::"r"(d[4]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1324 __asm__ volatile("movl %0, %"#S_5::"r"(d[5]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1325 __asm__ volatile("movl %0, %"#S_6::"r"(d[6]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1326 __asm__ volatile("movl %0, %"#S_7::"r"(d[7]):SSE_REGs) ;\
wolfSSL 7:481bce714567 1327 }
wolfSSL 7:481bce714567 1328
wolfSSL 7:481bce714567 1329
wolfSSL 7:481bce714567 1330 #define DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 1331 _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 7:481bce714567 1332
wolfSSL 7:481bce714567 1333 #define RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 1334 _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 7:481bce714567 1335
wolfSSL 7:481bce714567 1336 #define DumS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
wolfSSL 7:481bce714567 1337 _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
wolfSSL 7:481bce714567 1338
wolfSSL 7:481bce714567 1339
wolfSSL 7:481bce714567 1340 /* Byte swap Masks to ensure that rest of the words are filled with zero's. */
wolfSSL 7:481bce714567 1341 static const unsigned long mBYTE_FLIP_MASK_16[] =
wolfSSL 7:481bce714567 1342 { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
wolfSSL 7:481bce714567 1343 static const unsigned long mBYTE_FLIP_MASK_15[] =
wolfSSL 7:481bce714567 1344 { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
wolfSSL 7:481bce714567 1345 static const unsigned long mBYTE_FLIP_MASK_7 [] =
wolfSSL 7:481bce714567 1346 { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x8080808008090a0b } ;
wolfSSL 7:481bce714567 1347 static const unsigned long mBYTE_FLIP_MASK_2 [] =
wolfSSL 7:481bce714567 1348 { 0x0405060700010203, 0x8080808080808080, 0x8080808080808080, 0x8080808080808080 } ;
wolfSSL 7:481bce714567 1349
wolfSSL 7:481bce714567 1350 static const unsigned long mMAPtoW_I_7[] =
wolfSSL 7:481bce714567 1351 { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0302010080808080 } ;
wolfSSL 7:481bce714567 1352 static const unsigned long mMAP1toW_I_2[] =
wolfSSL 7:481bce714567 1353 { 0x8080808080808080, 0x0706050403020100, 0x8080808080808080, 0x8080808080808080 } ;
wolfSSL 7:481bce714567 1354 static const unsigned long mMAP2toW_I_2[] =
wolfSSL 7:481bce714567 1355 { 0x8080808080808080, 0x8080808080808080, 0x0f0e0d0c0b0a0908, 0x8080808080808080 } ;
wolfSSL 7:481bce714567 1356 static const unsigned long mMAP3toW_I_2[] =
wolfSSL 7:481bce714567 1357 { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0706050403020100 } ;
wolfSSL 7:481bce714567 1358
wolfSSL 7:481bce714567 1359 static int Transform_AVX2(Sha256* sha256)
wolfSSL 7:481bce714567 1360 {
wolfSSL 7:481bce714567 1361
wolfSSL 7:481bce714567 1362 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 7:481bce714567 1363 word32* W_K;
wolfSSL 7:481bce714567 1364 W_K = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 7:481bce714567 1365 if (W_K == NULL)
wolfSSL 7:481bce714567 1366 return MEMORY_E;
wolfSSL 7:481bce714567 1367 #else
wolfSSL 7:481bce714567 1368 word32 W_K[64] ;
wolfSSL 7:481bce714567 1369 #endif
wolfSSL 7:481bce714567 1370
wolfSSL 7:481bce714567 1371 MOVE_to_REG(W_I_16, sha256->buffer[0]); BYTE_SWAP(W_I_16, mBYTE_FLIP_MASK_16[0]) ;
wolfSSL 7:481bce714567 1372 MOVE_to_REG(W_I_15, sha256->buffer[1]); BYTE_SWAP(W_I_15, mBYTE_FLIP_MASK_15[0]) ;
wolfSSL 7:481bce714567 1373 MOVE_to_REG(W_I, sha256->buffer[8]) ; BYTE_SWAP(W_I, mBYTE_FLIP_MASK_16[0]) ;
wolfSSL 7:481bce714567 1374 MOVE_to_REG(W_I_7, sha256->buffer[16-7]) ; BYTE_SWAP(W_I_7, mBYTE_FLIP_MASK_7[0]) ;
wolfSSL 7:481bce714567 1375 MOVE_to_REG(W_I_2, sha256->buffer[16-2]) ; BYTE_SWAP(W_I_2, mBYTE_FLIP_MASK_2[0]) ;
wolfSSL 7:481bce714567 1376
wolfSSL 7:481bce714567 1377 DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
wolfSSL 7:481bce714567 1378
wolfSSL 7:481bce714567 1379 ADD_MEM(W_K_TEMP, W_I_16, K[0]) ;
wolfSSL 7:481bce714567 1380 MOVE_to_MEM(W_K[0], W_K_TEMP) ;
wolfSSL 7:481bce714567 1381
wolfSSL 7:481bce714567 1382 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
wolfSSL 7:481bce714567 1383 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,1) ;
wolfSSL 7:481bce714567 1384 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,2) ;
wolfSSL 7:481bce714567 1385 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,3) ;
wolfSSL 7:481bce714567 1386 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,4) ;
wolfSSL 7:481bce714567 1387 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,5) ;
wolfSSL 7:481bce714567 1388 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,6) ;
wolfSSL 7:481bce714567 1389 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,7) ;
wolfSSL 7:481bce714567 1390
wolfSSL 7:481bce714567 1391 ADD_MEM(YMM_TEMP0, W_I, K[8]) ;
wolfSSL 7:481bce714567 1392 MOVE_to_MEM(W_K[8], YMM_TEMP0) ;
wolfSSL 7:481bce714567 1393
wolfSSL 7:481bce714567 1394 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 7:481bce714567 1395 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
wolfSSL 7:481bce714567 1396 GAMMA0_1(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1397 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
wolfSSL 7:481bce714567 1398 GAMMA0_2(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1399 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
wolfSSL 7:481bce714567 1400 ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 7:481bce714567 1401 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
wolfSSL 7:481bce714567 1402 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1403 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
wolfSSL 7:481bce714567 1404 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1405 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
wolfSSL 7:481bce714567 1406 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1407 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
wolfSSL 7:481bce714567 1408 ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
wolfSSL 7:481bce714567 1409 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
wolfSSL 7:481bce714567 1410 FEEDBACK1_to_W_I_2 ;
wolfSSL 7:481bce714567 1411 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
wolfSSL 7:481bce714567 1412 FEEDBACK_to_W_I_7 ;
wolfSSL 7:481bce714567 1413 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
wolfSSL 7:481bce714567 1414 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1415 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
wolfSSL 7:481bce714567 1416 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1417 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
wolfSSL 7:481bce714567 1418 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1419 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
wolfSSL 7:481bce714567 1420 ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
wolfSSL 7:481bce714567 1421 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
wolfSSL 7:481bce714567 1422 FEEDBACK2_to_W_I_2 ;
wolfSSL 7:481bce714567 1423 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
wolfSSL 7:481bce714567 1424 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1425 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
wolfSSL 7:481bce714567 1426 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1427 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
wolfSSL 7:481bce714567 1428 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
wolfSSL 7:481bce714567 1429 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
wolfSSL 7:481bce714567 1430 FEEDBACK3_to_W_I_2 ;
wolfSSL 7:481bce714567 1431 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
wolfSSL 7:481bce714567 1432 GAMMA1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1433 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
wolfSSL 7:481bce714567 1434 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
wolfSSL 7:481bce714567 1435 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
wolfSSL 7:481bce714567 1436 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
wolfSSL 7:481bce714567 1437
wolfSSL 7:481bce714567 1438 MOVE_to_REG(YMM_TEMP0, K[16]) ;
wolfSSL 7:481bce714567 1439 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
wolfSSL 7:481bce714567 1440 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
wolfSSL 7:481bce714567 1441 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
wolfSSL 7:481bce714567 1442 ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
wolfSSL 7:481bce714567 1443 MOVE_to_MEM(W_K[16], YMM_TEMP0) ;
wolfSSL 7:481bce714567 1444
wolfSSL 7:481bce714567 1445 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 7:481bce714567 1446 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
wolfSSL 7:481bce714567 1447 GAMMA0_1(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1448 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
wolfSSL 7:481bce714567 1449 GAMMA0_2(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1450 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
wolfSSL 7:481bce714567 1451 ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 7:481bce714567 1452 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
wolfSSL 7:481bce714567 1453 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1454 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
wolfSSL 7:481bce714567 1455 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1456 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
wolfSSL 7:481bce714567 1457 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1458 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
wolfSSL 7:481bce714567 1459 ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
wolfSSL 7:481bce714567 1460 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
wolfSSL 7:481bce714567 1461 FEEDBACK1_to_W_I_2 ;
wolfSSL 7:481bce714567 1462 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
wolfSSL 7:481bce714567 1463 FEEDBACK_to_W_I_7 ;
wolfSSL 7:481bce714567 1464 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
wolfSSL 7:481bce714567 1465 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1466 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
wolfSSL 7:481bce714567 1467 GAMMA1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1468 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
wolfSSL 7:481bce714567 1469 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1470 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
wolfSSL 7:481bce714567 1471 ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
wolfSSL 7:481bce714567 1472 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
wolfSSL 7:481bce714567 1473 FEEDBACK2_to_W_I_2 ;
wolfSSL 7:481bce714567 1474 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
wolfSSL 7:481bce714567 1475 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1476 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
wolfSSL 7:481bce714567 1477 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1478 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
wolfSSL 7:481bce714567 1479 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
wolfSSL 7:481bce714567 1480 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
wolfSSL 7:481bce714567 1481 FEEDBACK3_to_W_I_2 ;
wolfSSL 7:481bce714567 1482 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
wolfSSL 7:481bce714567 1483 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1484 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
wolfSSL 7:481bce714567 1485 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1486 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
wolfSSL 7:481bce714567 1487 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
wolfSSL 7:481bce714567 1488 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
wolfSSL 7:481bce714567 1489
wolfSSL 7:481bce714567 1490 MOVE_to_REG(YMM_TEMP0, K[24]) ;
wolfSSL 7:481bce714567 1491 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
wolfSSL 7:481bce714567 1492 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
wolfSSL 7:481bce714567 1493 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
wolfSSL 7:481bce714567 1494 ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
wolfSSL 7:481bce714567 1495 MOVE_to_MEM(W_K[24], YMM_TEMP0) ;
wolfSSL 7:481bce714567 1496
wolfSSL 7:481bce714567 1497 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 7:481bce714567 1498 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
wolfSSL 7:481bce714567 1499 GAMMA0_1(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1500 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
wolfSSL 7:481bce714567 1501 GAMMA0_2(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1502 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
wolfSSL 7:481bce714567 1503 ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 7:481bce714567 1504 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
wolfSSL 7:481bce714567 1505 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1506 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
wolfSSL 7:481bce714567 1507 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1508 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
wolfSSL 7:481bce714567 1509 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1510 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
wolfSSL 7:481bce714567 1511 ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
wolfSSL 7:481bce714567 1512 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
wolfSSL 7:481bce714567 1513 FEEDBACK1_to_W_I_2 ;
wolfSSL 7:481bce714567 1514 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
wolfSSL 7:481bce714567 1515 FEEDBACK_to_W_I_7 ;
wolfSSL 7:481bce714567 1516 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
wolfSSL 7:481bce714567 1517 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1518 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
wolfSSL 7:481bce714567 1519 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1520 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
wolfSSL 7:481bce714567 1521 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1522 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
wolfSSL 7:481bce714567 1523 ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
wolfSSL 7:481bce714567 1524 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
wolfSSL 7:481bce714567 1525 FEEDBACK2_to_W_I_2 ;
wolfSSL 7:481bce714567 1526 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
wolfSSL 7:481bce714567 1527 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1528 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
wolfSSL 7:481bce714567 1529 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1530 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
wolfSSL 7:481bce714567 1531 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
wolfSSL 7:481bce714567 1532 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
wolfSSL 7:481bce714567 1533 FEEDBACK3_to_W_I_2 ;
wolfSSL 7:481bce714567 1534 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
wolfSSL 7:481bce714567 1535 GAMMA1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1536 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
wolfSSL 7:481bce714567 1537 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
wolfSSL 7:481bce714567 1538 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
wolfSSL 7:481bce714567 1539 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
wolfSSL 7:481bce714567 1540
wolfSSL 7:481bce714567 1541 MOVE_to_REG(YMM_TEMP0, K[32]) ;
wolfSSL 7:481bce714567 1542 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
wolfSSL 7:481bce714567 1543 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
wolfSSL 7:481bce714567 1544 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
wolfSSL 7:481bce714567 1545 ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
wolfSSL 7:481bce714567 1546 MOVE_to_MEM(W_K[32], YMM_TEMP0) ;
wolfSSL 7:481bce714567 1547
wolfSSL 7:481bce714567 1548
wolfSSL 7:481bce714567 1549 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 7:481bce714567 1550 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
wolfSSL 7:481bce714567 1551 GAMMA0_1(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1552 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
wolfSSL 7:481bce714567 1553 GAMMA0_2(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1554 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
wolfSSL 7:481bce714567 1555 ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 7:481bce714567 1556 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
wolfSSL 7:481bce714567 1557 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1558 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
wolfSSL 7:481bce714567 1559 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1560 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
wolfSSL 7:481bce714567 1561 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1562 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
wolfSSL 7:481bce714567 1563 ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
wolfSSL 7:481bce714567 1564 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
wolfSSL 7:481bce714567 1565 FEEDBACK1_to_W_I_2 ;
wolfSSL 7:481bce714567 1566 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
wolfSSL 7:481bce714567 1567 FEEDBACK_to_W_I_7 ;
wolfSSL 7:481bce714567 1568 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
wolfSSL 7:481bce714567 1569 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1570 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
wolfSSL 7:481bce714567 1571 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1572 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
wolfSSL 7:481bce714567 1573 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1574 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
wolfSSL 7:481bce714567 1575 ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
wolfSSL 7:481bce714567 1576 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
wolfSSL 7:481bce714567 1577 FEEDBACK2_to_W_I_2 ;
wolfSSL 7:481bce714567 1578 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
wolfSSL 7:481bce714567 1579 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1580 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
wolfSSL 7:481bce714567 1581 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1582 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
wolfSSL 7:481bce714567 1583 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
wolfSSL 7:481bce714567 1584 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
wolfSSL 7:481bce714567 1585 FEEDBACK3_to_W_I_2 ;
wolfSSL 7:481bce714567 1586 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
wolfSSL 7:481bce714567 1587 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1588 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
wolfSSL 7:481bce714567 1589 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1590 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
wolfSSL 7:481bce714567 1591 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
wolfSSL 7:481bce714567 1592 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
wolfSSL 7:481bce714567 1593
wolfSSL 7:481bce714567 1594 MOVE_to_REG(YMM_TEMP0, K[40]) ;
wolfSSL 7:481bce714567 1595 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
wolfSSL 7:481bce714567 1596 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
wolfSSL 7:481bce714567 1597 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
wolfSSL 7:481bce714567 1598 ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
wolfSSL 7:481bce714567 1599 MOVE_to_MEM(W_K[40], YMM_TEMP0) ;
wolfSSL 7:481bce714567 1600
wolfSSL 7:481bce714567 1601 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 7:481bce714567 1602 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
wolfSSL 7:481bce714567 1603 GAMMA0_1(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1604 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
wolfSSL 7:481bce714567 1605 GAMMA0_2(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1606 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
wolfSSL 7:481bce714567 1607 ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 7:481bce714567 1608 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
wolfSSL 7:481bce714567 1609 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1610 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
wolfSSL 7:481bce714567 1611 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1612 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
wolfSSL 7:481bce714567 1613 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1614 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
wolfSSL 7:481bce714567 1615 ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
wolfSSL 7:481bce714567 1616 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
wolfSSL 7:481bce714567 1617 FEEDBACK1_to_W_I_2 ;
wolfSSL 7:481bce714567 1618 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
wolfSSL 7:481bce714567 1619 FEEDBACK_to_W_I_7 ;
wolfSSL 7:481bce714567 1620 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
wolfSSL 7:481bce714567 1621 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1622 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
wolfSSL 7:481bce714567 1623 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1624 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
wolfSSL 7:481bce714567 1625 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1626 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
wolfSSL 7:481bce714567 1627 ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
wolfSSL 7:481bce714567 1628 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
wolfSSL 7:481bce714567 1629 FEEDBACK2_to_W_I_2 ;
wolfSSL 7:481bce714567 1630 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
wolfSSL 7:481bce714567 1631 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1632 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
wolfSSL 7:481bce714567 1633 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1634 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
wolfSSL 7:481bce714567 1635 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
wolfSSL 7:481bce714567 1636 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
wolfSSL 7:481bce714567 1637 FEEDBACK3_to_W_I_2 ;
wolfSSL 7:481bce714567 1638 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
wolfSSL 7:481bce714567 1639 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1640 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
wolfSSL 7:481bce714567 1641 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1642 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
wolfSSL 7:481bce714567 1643 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
wolfSSL 7:481bce714567 1644 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
wolfSSL 7:481bce714567 1645
wolfSSL 7:481bce714567 1646 MOVE_to_REG(YMM_TEMP0, K[48]) ;
wolfSSL 7:481bce714567 1647 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
wolfSSL 7:481bce714567 1648 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
wolfSSL 7:481bce714567 1649 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
wolfSSL 7:481bce714567 1650 ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
wolfSSL 7:481bce714567 1651 MOVE_to_MEM(W_K[48], YMM_TEMP0) ;
wolfSSL 7:481bce714567 1652
wolfSSL 7:481bce714567 1653 /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
wolfSSL 7:481bce714567 1654 RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
wolfSSL 7:481bce714567 1655 GAMMA0_1(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1656 RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
wolfSSL 7:481bce714567 1657 GAMMA0_2(W_I_TEMP, W_I_15) ;
wolfSSL 7:481bce714567 1658 RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
wolfSSL 7:481bce714567 1659 ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
wolfSSL 7:481bce714567 1660 RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
wolfSSL 7:481bce714567 1661 ADD(W_I, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1662 RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
wolfSSL 7:481bce714567 1663 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1664 RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
wolfSSL 7:481bce714567 1665 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1666 RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
wolfSSL 7:481bce714567 1667 ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
wolfSSL 7:481bce714567 1668 RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
wolfSSL 7:481bce714567 1669 FEEDBACK1_to_W_I_2 ;
wolfSSL 7:481bce714567 1670 RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
wolfSSL 7:481bce714567 1671 FEEDBACK_to_W_I_7 ;
wolfSSL 7:481bce714567 1672 RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
wolfSSL 7:481bce714567 1673 ADD(W_I_TEMP, W_I_7, W_I_TEMP);
wolfSSL 7:481bce714567 1674 RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
wolfSSL 7:481bce714567 1675 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1676 RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
wolfSSL 7:481bce714567 1677 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1678 RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
wolfSSL 7:481bce714567 1679 ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
wolfSSL 7:481bce714567 1680 RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
wolfSSL 7:481bce714567 1681 FEEDBACK2_to_W_I_2 ;
wolfSSL 7:481bce714567 1682 RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
wolfSSL 7:481bce714567 1683 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1684 RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
wolfSSL 7:481bce714567 1685 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1686 RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
wolfSSL 7:481bce714567 1687 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
wolfSSL 7:481bce714567 1688 RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
wolfSSL 7:481bce714567 1689 FEEDBACK3_to_W_I_2 ;
wolfSSL 7:481bce714567 1690 RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
wolfSSL 7:481bce714567 1691 GAMMA1_1(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1692 RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
wolfSSL 7:481bce714567 1693 GAMMA1_2(YMM_TEMP0, W_I_2) ;
wolfSSL 7:481bce714567 1694 RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
wolfSSL 7:481bce714567 1695 ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
wolfSSL 7:481bce714567 1696 RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
wolfSSL 7:481bce714567 1697
wolfSSL 7:481bce714567 1698 MOVE_to_REG(YMM_TEMP0, K[56]) ;
wolfSSL 7:481bce714567 1699 RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
wolfSSL 7:481bce714567 1700 ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
wolfSSL 7:481bce714567 1701 RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
wolfSSL 7:481bce714567 1702 ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
wolfSSL 7:481bce714567 1703 MOVE_to_MEM(W_K[56], YMM_TEMP0) ;
wolfSSL 7:481bce714567 1704
wolfSSL 7:481bce714567 1705 RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
wolfSSL 7:481bce714567 1706 RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
wolfSSL 7:481bce714567 1707 RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
wolfSSL 7:481bce714567 1708 RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
wolfSSL 7:481bce714567 1709
wolfSSL 7:481bce714567 1710 RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
wolfSSL 7:481bce714567 1711 RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
wolfSSL 7:481bce714567 1712 RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
wolfSSL 7:481bce714567 1713 RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
wolfSSL 7:481bce714567 1714
wolfSSL 7:481bce714567 1715 RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
wolfSSL 7:481bce714567 1716
wolfSSL 7:481bce714567 1717 #ifdef WOLFSSL_SMALL_STACK
wolfSSL 7:481bce714567 1718 XFREE(W_K, NULL, DYNAMIC_TYPE_TMP_BUFFER);
wolfSSL 7:481bce714567 1719 #endif
wolfSSL 7:481bce714567 1720
wolfSSL 7:481bce714567 1721 return 0;
wolfSSL 7:481bce714567 1722 }
wolfSSL 7:481bce714567 1723
wolfSSL 7:481bce714567 1724 #endif /* HAVE_INTEL_AVX2 */
wolfSSL 7:481bce714567 1725
wolfSSL 7:481bce714567 1726 #ifdef WOLFSSL_SHA224
wolfSSL 7:481bce714567 1727 int wc_InitSha224(Sha224* sha224)
wolfSSL 7:481bce714567 1728 {
wolfSSL 7:481bce714567 1729 sha224->digest[0] = 0xc1059ed8;
wolfSSL 7:481bce714567 1730 sha224->digest[1] = 0x367cd507;
wolfSSL 7:481bce714567 1731 sha224->digest[2] = 0x3070dd17;
wolfSSL 7:481bce714567 1732 sha224->digest[3] = 0xf70e5939;
wolfSSL 7:481bce714567 1733 sha224->digest[4] = 0xffc00b31;
wolfSSL 7:481bce714567 1734 sha224->digest[5] = 0x68581511;
wolfSSL 7:481bce714567 1735 sha224->digest[6] = 0x64f98fa7;
wolfSSL 7:481bce714567 1736 sha224->digest[7] = 0xbefa4fa4;
wolfSSL 7:481bce714567 1737
wolfSSL 7:481bce714567 1738 sha224->buffLen = 0;
wolfSSL 7:481bce714567 1739 sha224->loLen = 0;
wolfSSL 7:481bce714567 1740 sha224->hiLen = 0;
wolfSSL 7:481bce714567 1741
wolfSSL 7:481bce714567 1742 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
wolfSSL 7:481bce714567 1743 set_Transform() ;
wolfSSL 7:481bce714567 1744 #endif
wolfSSL 7:481bce714567 1745
wolfSSL 7:481bce714567 1746 return 0;
wolfSSL 7:481bce714567 1747 }
wolfSSL 7:481bce714567 1748
wolfSSL 7:481bce714567 1749 int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
wolfSSL 7:481bce714567 1750 {
wolfSSL 7:481bce714567 1751 return Sha256Update((Sha256 *)sha224, data, len);
wolfSSL 7:481bce714567 1752 }
wolfSSL 7:481bce714567 1753
wolfSSL 7:481bce714567 1754
wolfSSL 7:481bce714567 1755 int wc_Sha224Final(Sha224* sha224, byte* hash)
wolfSSL 7:481bce714567 1756 {
wolfSSL 7:481bce714567 1757 int ret = Sha256Final((Sha256 *)sha224);
wolfSSL 7:481bce714567 1758 if (ret != 0)
wolfSSL 7:481bce714567 1759 return ret;
wolfSSL 7:481bce714567 1760
wolfSSL 7:481bce714567 1761 #if defined(LITTLE_ENDIAN_ORDER)
wolfSSL 7:481bce714567 1762 ByteReverseWords(sha224->digest, sha224->digest, SHA224_DIGEST_SIZE);
wolfSSL 7:481bce714567 1763 #endif
wolfSSL 7:481bce714567 1764 XMEMCPY(hash, sha224->digest, SHA224_DIGEST_SIZE);
wolfSSL 7:481bce714567 1765
wolfSSL 7:481bce714567 1766 return wc_InitSha224(sha224); /* reset state */
wolfSSL 7:481bce714567 1767 }
wolfSSL 7:481bce714567 1768 #endif /* WOLFSSL_SHA224 */
wolfSSL 7:481bce714567 1769
wolfSSL 7:481bce714567 1770 #endif /* HAVE_FIPS */
wolfSSL 7:481bce714567 1771
wolfSSL 7:481bce714567 1772 #endif /* WOLFSSL_TI_HAHS */
wolfSSL 7:481bce714567 1773
wolfSSL 7:481bce714567 1774 #endif /* NO_SHA256 */
wolfSSL 7:481bce714567 1775
wolfSSL 7:481bce714567 1776