Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of wolfSSL by
sha512.c
00001 /* sha512.c 00002 * 00003 * Copyright (C) 2006-2016 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 #ifdef HAVE_CONFIG_H 00024 #include <config.h> 00025 #endif 00026 00027 #include <wolfssl/wolfcrypt/settings.h> 00028 00029 #ifdef WOLFSSL_SHA512 00030 00031 #include <wolfssl/wolfcrypt/sha512.h> 00032 #include <wolfssl/wolfcrypt/error-crypt.h> 00033 00034 /* fips wrapper calls, user can call direct */ 00035 #ifdef HAVE_FIPS 00036 int wc_InitSha512(Sha512* sha) 00037 { 00038 return InitSha512_fips(sha); 00039 } 00040 int wc_InitSha512_ex(Sha512* sha, void* heap, int devId) 00041 { 00042 (void)heap; 00043 (void)devId; 00044 return InitSha512_fips(sha); 00045 } 00046 int wc_Sha512Update(Sha512* sha, const byte* data, word32 len) 00047 { 00048 return Sha512Update_fips(sha, data, len); 00049 } 00050 int wc_Sha512Final(Sha512* sha, byte* out) 00051 { 00052 return Sha512Final_fips(sha, out); 00053 } 00054 void wc_Sha512Free(Sha512* sha) 00055 { 00056 (void)sha; 00057 /* Not supported in FIPS */ 00058 } 00059 00060 #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM) 00061 int wc_InitSha384(Sha384* sha) 00062 { 00063 return InitSha384_fips(sha); 00064 } 00065 int wc_InitSha384_ex(Sha384* sha, void* heap, int devId) 00066 { 00067 (void)heap; 00068 (void)devId; 00069 return InitSha384_fips(sha); 00070 } 00071 int wc_Sha384Update(Sha384* sha, const byte* data, word32 len) 00072 { 00073 return Sha384Update_fips(sha, data, len); 00074 } 00075 int wc_Sha384Final(Sha384* sha, byte* out) 00076 { 00077 return Sha384Final_fips(sha, out); 00078 } 00079 void wc_Sha384Free(Sha384* sha) 00080 { 00081 (void)sha; 00082 /* Not supported in FIPS */ 00083 } 00084 #endif /* WOLFSSL_SHA384 || HAVE_AESGCM */ 00085 00086 #else /* else build without using fips */ 00087 00088 #include <wolfssl/wolfcrypt/logging.h> 00089 00090 #ifdef NO_INLINE 00091 #include <wolfssl/wolfcrypt/misc.h> 00092 #else 00093 #define WOLFSSL_MISC_INCLUDED 00094 #include <wolfcrypt/src/misc.c> 00095 #endif 00096 00097 00098 #if defined(USE_INTEL_SPEEDUP) 00099 #define HAVE_INTEL_AVX1 00100 #define HAVE_INTEL_AVX2 00101 #endif 00102 00103 #if defined(HAVE_INTEL_AVX1) 00104 /* #define DEBUG_XMM */ 00105 #endif 00106 00107 #if defined(HAVE_INTEL_AVX2) 00108 #define HAVE_INTEL_RORX 00109 /* #define DEBUG_YMM */ 00110 #endif 00111 00112 00113 #if defined(HAVE_INTEL_RORX) 00114 #define ROTR(func, bits, x) \ 00115 word64 func(word64 x) { word64 ret ;\ 00116 __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\ 00117 return ret ;\ 00118 } 00119 00120 static INLINE ROTR(rotrFixed64_28, 28, x); 00121 static INLINE ROTR(rotrFixed64_34, 34, x); 00122 static INLINE ROTR(rotrFixed64_39, 39, x); 00123 static INLINE ROTR(rotrFixed64_14, 14, x); 00124 static INLINE ROTR(rotrFixed64_18, 18, x); 00125 static INLINE ROTR(rotrFixed64_41, 41, x); 00126 00127 #define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x)) 00128 #define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x)) 00129 #endif /* HAVE_INTEL_RORX */ 00130 00131 #if defined(HAVE_BYTEREVERSE64) && \ 00132 !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) 00133 #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size) 00134 #define ByteReverseWords64_1(buf, size) \ 00135 { unsigned int i ;\ 00136 for(i=0; i< size/sizeof(word64); i++){\ 00137 __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\ 00138 }\ 00139 } 00140 #endif 00141 00142 static int InitSha512(Sha512* sha512) 00143 { 00144 if (sha512 == NULL) 00145 return BAD_FUNC_ARG; 00146 00147 sha512->digest[0] = W64LIT(0x6a09e667f3bcc908); 00148 sha512->digest[1] = W64LIT(0xbb67ae8584caa73b); 00149 sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b); 00150 sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1); 00151 sha512->digest[4] = W64LIT(0x510e527fade682d1); 00152 sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f); 00153 sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b); 00154 sha512->digest[7] = W64LIT(0x5be0cd19137e2179); 00155 00156 sha512->buffLen = 0; 00157 sha512->loLen = 0; 00158 sha512->hiLen = 0; 00159 00160 return 0; 00161 } 00162 00163 00164 /* Hardware Acceleration */ 00165 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00166 00167 /***** 00168 Intel AVX1/AVX2 Macro Control Structure 00169 00170 #if defined(HAVE_INteL_SPEEDUP) 00171 #define HAVE_INTEL_AVX1 00172 #define HAVE_INTEL_AVX2 00173 #endif 00174 00175 int InitSha512(Sha512* sha512) { 00176 Save/Recover XMM, YMM 00177 ... 00178 00179 Check Intel AVX cpuid flags 00180 } 00181 00182 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) 00183 Transform_AVX1(); # Function prototype 00184 Transform_AVX2(); # 00185 #endif 00186 00187 _Transform() { # Native Transform Function body 00188 00189 } 00190 00191 int Sha512Update() { 00192 Save/Recover XMM, YMM 00193 ... 00194 } 00195 00196 int Sha512Final() { 00197 Save/Recover XMM, YMM 00198 ... 00199 } 00200 00201 00202 #if defined(HAVE_INTEL_AVX1) 00203 00204 XMM Instructions/INLINE asm Definitions 00205 00206 #endif 00207 00208 #if defined(HAVE_INTEL_AVX2) 00209 00210 YMM Instructions/INLINE asm Definitions 00211 00212 #endif 00213 00214 #if defnied(HAVE_INTEL_AVX1) 00215 00216 int Transform_AVX1() { 00217 Stitched Message Sched/Round 00218 } 00219 00220 #endif 00221 00222 #if defnied(HAVE_INTEL_AVX2) 00223 00224 int Transform_AVX2() { 00225 Stitched Message Sched/Round 00226 } 00227 #endif 00228 00229 */ 00230 00231 00232 /* Each platform needs to query info type 1 from cpuid to see if aesni is 00233 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts 00234 */ 00235 00236 #ifndef _MSC_VER 00237 #define cpuid(reg, leaf, sub)\ 00238 __asm__ __volatile__ ("cpuid":\ 00239 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\ 00240 "a" (leaf), "c"(sub)); 00241 00242 #define XASM_LINK(f) asm(f) 00243 #else 00244 00245 #include <intrin.h> 00246 #define cpuid(a,b) __cpuid((int*)a,b) 00247 00248 #define XASM_LINK(f) 00249 #endif /* _MSC_VER */ 00250 00251 #define EAX 0 00252 #define EBX 1 00253 #define ECX 2 00254 #define EDX 3 00255 00256 #define CPUID_AVX1 0x1 00257 #define CPUID_AVX2 0x2 00258 #define CPUID_RDRAND 0x4 00259 #define CPUID_RDSEED 0x8 00260 #define CPUID_BMI2 0x10 /* MULX, RORX */ 00261 00262 #define IS_INTEL_AVX1 (cpuid_flags & CPUID_AVX1) 00263 #define IS_INTEL_AVX2 (cpuid_flags & CPUID_AVX2) 00264 #define IS_INTEL_BMI2 (cpuid_flags & CPUID_BMI2) 00265 #define IS_INTEL_RDRAND (cpuid_flags & CPUID_RDRAND) 00266 #define IS_INTEL_RDSEED (cpuid_flags & CPUID_RDSEED) 00267 00268 static word32 cpuid_check = 0; 00269 static word32 cpuid_flags = 0; 00270 00271 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { 00272 int got_intel_cpu = 0; 00273 unsigned int reg[5]; 00274 00275 reg[4] = '\0'; 00276 cpuid(reg, 0, 0); 00277 if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 && 00278 XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 && 00279 XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) { 00280 got_intel_cpu = 1; 00281 } 00282 if (got_intel_cpu) { 00283 cpuid(reg, leaf, sub); 00284 return ((reg[num] >> bit) & 0x1); 00285 } 00286 return 0; 00287 } 00288 00289 00290 static int set_cpuid_flags() { 00291 if(cpuid_check ==0) { 00292 if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;} 00293 if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; } 00294 if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; } 00295 if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; } 00296 if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; } 00297 cpuid_check = 1 ; 00298 return 0 ; 00299 } 00300 return 1 ; 00301 } 00302 00303 00304 #if defined(HAVE_INTEL_AVX1) 00305 static int Transform_AVX1(Sha512 *sha512); 00306 #endif 00307 #if defined(HAVE_INTEL_AVX2) 00308 static int Transform_AVX2(Sha512 *sha512); 00309 #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX) 00310 static int Transform_AVX1_RORX(Sha512 *sha512); 00311 #endif 00312 #endif 00313 static int _Transform(Sha512 *sha512); 00314 static int (*Transform_p)(Sha512* sha512) = _Transform; 00315 #define Transform(sha512) (*Transform_p)(sha512) 00316 00317 /* Dummy for saving MM_REGs on behalf of Transform */ 00318 /* #if defined(HAVE_INTEL_AVX2) 00319 #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\ 00320 "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\ 00321 "%ymm12","%ymm13","%ymm14","%ymm15") 00322 */ 00323 #if defined(HAVE_INTEL_AVX1) 00324 #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\ 00325 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15") 00326 #endif 00327 00328 00329 int wc_InitSha512_ex(Sha512* sha512, void* heap, int devId) 00330 { 00331 int ret = InitSha512(sha512); 00332 00333 (void)heap; 00334 (void)devId; 00335 00336 if (set_cpuid_flags()) 00337 return ret; 00338 00339 #if defined(HAVE_INTEL_AVX2) 00340 if (IS_INTEL_AVX2 && IS_INTEL_BMI2) { 00341 Transform_p = Transform_AVX1_RORX; return ret; 00342 Transform_p = Transform_AVX2; 00343 /* for avoiding warning,"not used" */ 00344 } 00345 #endif 00346 #if defined(HAVE_INTEL_AVX1) 00347 Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform); return ret; 00348 #endif 00349 Transform_p = _Transform; 00350 00351 return ret; 00352 } 00353 00354 #else 00355 #define Transform(sha512) _Transform(sha512) 00356 00357 int wc_InitSha512_ex(Sha512* sha512, void* heap, int devId) 00358 { 00359 int ret = 0; 00360 00361 if (sha512 == NULL) 00362 return BAD_FUNC_ARG; 00363 00364 sha512->heap = heap; 00365 00366 ret = InitSha512(sha512); 00367 if (ret != 0) 00368 return ret; 00369 00370 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) 00371 ret = wolfAsync_DevCtxInit(&sha512->asyncDev, 00372 WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId); 00373 #else 00374 (void)devId; 00375 #endif /* WOLFSSL_ASYNC_CRYPT */ 00376 00377 return ret; 00378 } 00379 00380 #endif /* Hardware Acceleration */ 00381 00382 #ifndef SAVE_XMM_YMM 00383 #define SAVE_XMM_YMM 00384 #endif 00385 00386 static const word64 K512[80] = { 00387 W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), 00388 W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), 00389 W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), 00390 W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), 00391 W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), 00392 W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), 00393 W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), 00394 W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), 00395 W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), 00396 W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), 00397 W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), 00398 W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), 00399 W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), 00400 W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), 00401 W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), 00402 W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), 00403 W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), 00404 W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), 00405 W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), 00406 W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), 00407 W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), 00408 W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), 00409 W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), 00410 W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), 00411 W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), 00412 W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), 00413 W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), 00414 W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), 00415 W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), 00416 W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), 00417 W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), 00418 W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), 00419 W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), 00420 W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), 00421 W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), 00422 W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), 00423 W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), 00424 W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), 00425 W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), 00426 W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) 00427 }; 00428 00429 00430 00431 #define blk0(i) (W[i] = sha512->buffer[i]) 00432 00433 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) 00434 00435 #define Ch(x,y,z) (z^(x&(y^z))) 00436 #define Maj(x,y,z) ((x&y)|(z&(x|y))) 00437 00438 #define a(i) T[(0-i)&7] 00439 #define b(i) T[(1-i)&7] 00440 #define c(i) T[(2-i)&7] 00441 #define d(i) T[(3-i)&7] 00442 #define e(i) T[(4-i)&7] 00443 #define f(i) T[(5-i)&7] 00444 #define g(i) T[(6-i)&7] 00445 #define h(i) T[(7-i)&7] 00446 00447 #define S0(x) (rotrFixed64(x,28)^rotrFixed64(x,34)^rotrFixed64(x,39)) 00448 #define S1(x) (rotrFixed64(x,14)^rotrFixed64(x,18)^rotrFixed64(x,41)) 00449 #define s0(x) (rotrFixed64(x,1)^rotrFixed64(x,8)^(x>>7)) 00450 #define s1(x) (rotrFixed64(x,19)^rotrFixed64(x,61)^(x>>6)) 00451 00452 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\ 00453 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) 00454 00455 static int _Transform(Sha512* sha512) 00456 { 00457 const word64* K = K512; 00458 00459 word32 j; 00460 word64 T[8]; 00461 00462 00463 #ifdef WOLFSSL_SMALL_STACK 00464 word64* W; 00465 W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); 00466 if (W == NULL) 00467 return MEMORY_E; 00468 #else 00469 word64 W[16]; 00470 #endif 00471 00472 /* Copy digest to working vars */ 00473 XMEMCPY(T, sha512->digest, sizeof(T)); 00474 00475 #ifdef USE_SLOW_SHA2 00476 /* over twice as small, but 50% slower */ 00477 /* 80 operations, not unrolled */ 00478 for (j = 0; j < 80; j += 16) { 00479 int m; 00480 for (m = 0; m < 16; m++) { /* braces needed here for macros {} */ 00481 R(m); 00482 } 00483 } 00484 #else 00485 /* 80 operations, partially loop unrolled */ 00486 for (j = 0; j < 80; j += 16) { 00487 R( 0); R( 1); R( 2); R( 3); 00488 R( 4); R( 5); R( 6); R( 7); 00489 R( 8); R( 9); R(10); R(11); 00490 R(12); R(13); R(14); R(15); 00491 } 00492 #endif /* USE_SLOW_SHA2 */ 00493 00494 /* Add the working vars back into digest */ 00495 00496 sha512->digest[0] += a(0); 00497 sha512->digest[1] += b(0); 00498 sha512->digest[2] += c(0); 00499 sha512->digest[3] += d(0); 00500 sha512->digest[4] += e(0); 00501 sha512->digest[5] += f(0); 00502 sha512->digest[6] += g(0); 00503 sha512->digest[7] += h(0); 00504 00505 /* Wipe variables */ 00506 ForceZero(W, sizeof(word64) * 16); 00507 ForceZero(T, sizeof(T)); 00508 00509 #ifdef WOLFSSL_SMALL_STACK 00510 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER); 00511 #endif 00512 00513 return 0; 00514 } 00515 00516 00517 static INLINE void AddLength(Sha512* sha512, word32 len) 00518 { 00519 word64 tmp = sha512->loLen; 00520 if ( (sha512->loLen += len) < tmp) 00521 sha512->hiLen++; /* carry low to high */ 00522 } 00523 00524 static INLINE int Sha512Update(Sha512* sha512, const byte* data, word32 len) 00525 { 00526 int ret = 0; 00527 00528 /* do block size increments */ 00529 byte* local = (byte*)sha512->buffer; 00530 00531 /* check that internal buffLen is valid */ 00532 if (sha512->buffLen >= SHA512_BLOCK_SIZE) 00533 return BUFFER_E; 00534 00535 SAVE_XMM_YMM; /* for Intel AVX */ 00536 00537 while (len) { 00538 word32 add = min(len, SHA512_BLOCK_SIZE - sha512->buffLen); 00539 XMEMCPY(&local[sha512->buffLen], data, add); 00540 00541 sha512->buffLen += add; 00542 data += add; 00543 len -= add; 00544 00545 if (sha512->buffLen == SHA512_BLOCK_SIZE) { 00546 #if defined(LITTLE_ENDIAN_ORDER) 00547 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00548 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 00549 #endif 00550 ByteReverseWords64(sha512->buffer, sha512->buffer, 00551 SHA512_BLOCK_SIZE); 00552 #endif 00553 ret = Transform(sha512); 00554 if (ret != 0) 00555 break; 00556 00557 AddLength(sha512, SHA512_BLOCK_SIZE); 00558 sha512->buffLen = 0; 00559 } 00560 } 00561 00562 return ret; 00563 } 00564 00565 int wc_Sha512Update(Sha512* sha512, const byte* data, word32 len) 00566 { 00567 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) 00568 if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) { 00569 #if defined(HAVE_INTEL_QA) 00570 return IntelQaSymSha512(&sha512->asyncDev, NULL, data, len); 00571 #endif 00572 } 00573 #endif /* WOLFSSL_ASYNC_CRYPT */ 00574 00575 return Sha512Update(sha512, data, len); 00576 } 00577 00578 00579 static INLINE int Sha512Final(Sha512* sha512) 00580 { 00581 byte* local = (byte*)sha512->buffer; 00582 int ret; 00583 00584 SAVE_XMM_YMM ; /* for Intel AVX */ 00585 AddLength(sha512, sha512->buffLen); /* before adding pads */ 00586 00587 local[sha512->buffLen++] = 0x80; /* add 1 */ 00588 00589 /* pad with zeros */ 00590 if (sha512->buffLen > SHA512_PAD_SIZE) { 00591 XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE - sha512->buffLen); 00592 sha512->buffLen += SHA512_BLOCK_SIZE - sha512->buffLen; 00593 #if defined(LITTLE_ENDIAN_ORDER) 00594 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00595 if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 00596 #endif 00597 ByteReverseWords64(sha512->buffer,sha512->buffer,SHA512_BLOCK_SIZE); 00598 00599 #endif /* LITTLE_ENDIAN_ORDER */ 00600 ret = Transform(sha512); 00601 if (ret != 0) 00602 return ret; 00603 00604 sha512->buffLen = 0; 00605 } 00606 XMEMSET(&local[sha512->buffLen], 0, SHA512_PAD_SIZE - sha512->buffLen); 00607 00608 /* put lengths in bits */ 00609 sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) + 00610 (sha512->hiLen << 3); 00611 sha512->loLen = sha512->loLen << 3; 00612 00613 /* store lengths */ 00614 #if defined(LITTLE_ENDIAN_ORDER) 00615 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00616 if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 00617 #endif 00618 ByteReverseWords64(sha512->buffer, sha512->buffer, SHA512_PAD_SIZE); 00619 #endif 00620 /* ! length ordering dependent on digest endian type ! */ 00621 00622 sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen; 00623 sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen; 00624 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00625 if (IS_INTEL_AVX1 || IS_INTEL_AVX2) 00626 ByteReverseWords64(&(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]), 00627 &(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]), 00628 SHA512_BLOCK_SIZE - SHA512_PAD_SIZE); 00629 #endif 00630 ret = Transform(sha512); 00631 if (ret != 0) 00632 return ret; 00633 00634 #ifdef LITTLE_ENDIAN_ORDER 00635 ByteReverseWords64(sha512->digest, sha512->digest, SHA512_DIGEST_SIZE); 00636 #endif 00637 00638 return 0; 00639 } 00640 00641 int wc_Sha512Final(Sha512* sha512, byte* hash) 00642 { 00643 int ret; 00644 00645 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) 00646 if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) { 00647 #if defined(HAVE_INTEL_QA) 00648 return IntelQaSymSha512(&sha512->asyncDev, hash, NULL, 00649 SHA512_DIGEST_SIZE); 00650 #endif 00651 } 00652 #endif /* WOLFSSL_ASYNC_CRYPT */ 00653 00654 ret = Sha512Final(sha512); 00655 if (ret != 0) 00656 return ret; 00657 00658 XMEMCPY(hash, sha512->digest, SHA512_DIGEST_SIZE); 00659 00660 return InitSha512(sha512); /* reset state */ 00661 } 00662 00663 00664 int wc_InitSha512(Sha512* sha512) 00665 { 00666 return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID); 00667 } 00668 00669 void wc_Sha512Free(Sha512* sha512) 00670 { 00671 if (sha512 == NULL) 00672 return; 00673 00674 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512) 00675 wolfAsync_DevCtxFree(&sha512->asyncDev, WOLFSSL_ASYNC_MARKER_SHA512); 00676 #endif /* WOLFSSL_ASYNC_CRYPT */ 00677 } 00678 00679 00680 #if defined(HAVE_INTEL_AVX1) 00681 00682 #define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i]; 00683 #define Rx_2(i) d(i)+=h(i); 00684 #define Rx_3(i) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)); 00685 00686 #if defined(HAVE_INTEL_RORX) 00687 00688 #define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i]; 00689 #define Rx_RORX_2(i) d(i)+=h(i); 00690 #define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i)); 00691 #endif /* HAVE_INTEL_RORX */ 00692 00693 #endif /* HAVE_INTEL_AVX1 */ 00694 00695 #if defined(HAVE_INTEL_AVX2) 00696 #define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w; 00697 #define Ry_2(i, w) d(i)+=h(i); 00698 #define Ry_3(i, w) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)); 00699 #endif /* HAVE_INTEL_AVX2 */ 00700 00701 /* INLINE Assember for Intel AVX1 instructions */ 00702 #if defined(HAVE_INTEL_AVX1) 00703 #if defined(DEBUG_XMM) 00704 #define SAVE_REG(i) __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0])::XMM_REGs); 00705 #define RECV_REG(i) __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]):XMM_REGs); 00706 00707 #define _DUMP_REG(REG, name)\ 00708 { word64 buf[16];word64 reg[16][2];int k;\ 00709 SAVE_REG(0); SAVE_REG(1); SAVE_REG(2); SAVE_REG(3); SAVE_REG(4); \ 00710 SAVE_REG(5); SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\ 00711 SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \ 00712 __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::XMM_REGs);\ 00713 printf(" "#name":\t"); for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n"); \ 00714 RECV_REG(0); RECV_REG(1); RECV_REG(2); RECV_REG(3); RECV_REG(4);\ 00715 RECV_REG(5); RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\ 00716 RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\ 00717 } 00718 00719 #define DUMP_REG(REG) _DUMP_REG(REG, #REG) 00720 #define PRINTF(fmt, ...) 00721 #else 00722 #define DUMP_REG(REG) 00723 #define PRINTF(fmt, ...) 00724 #endif /* DEBUG_XMM */ 00725 00726 #define _MOVE_to_REG(xymm, mem) __asm__ volatile("vmovdqu %0, %%"#xymm" "\ 00727 :: "m"(mem):XMM_REGs); 00728 #define _MOVE_to_MEM(mem,i, xymm) __asm__ volatile("vmovdqu %%"#xymm", %0" :\ 00729 "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::XMM_REGs); 00730 #define _MOVE(dest, src) __asm__ volatile("vmovdqu %%"#src", %%"\ 00731 #dest" ":::XMM_REGs); 00732 00733 #define _S_TEMP(dest, src, bits, temp) __asm__ volatile("vpsrlq $"#bits", %%"\ 00734 #src", %%"#dest"\n\tvpsllq $64-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\ 00735 #temp",%%"#dest", %%"#dest" ":::XMM_REGs); 00736 #define _AVX1_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\ 00737 #src", %%"#dest" ":::XMM_REGs); 00738 #define _XOR(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\ 00739 #src2", %%"#dest" ":::XMM_REGs); 00740 #define _OR(dest, src1, src2) __asm__ volatile("vpor %%"#src1", %%"\ 00741 #src2", %%"#dest" ":::XMM_REGs); 00742 #define _ADD(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\ 00743 #src2", %%"#dest" ":::XMM_REGs); 00744 #define _ADD_MEM(dest, src1, mem) __asm__ volatile("vpaddq %0, %%"#src1", %%"\ 00745 #dest" "::"m"(mem):XMM_REGs); 00746 00747 #define MOVE_to_REG(xymm, mem) _MOVE_to_REG(xymm, mem) 00748 #define MOVE_to_MEM(mem, i, xymm) _MOVE_to_MEM(mem, i, xymm) 00749 #define MOVE(dest, src) _MOVE(dest, src) 00750 00751 #define XOR(dest, src1, src2) _XOR(dest, src1, src2) 00752 #define OR(dest, src1, src2) _OR(dest, src1, src2) 00753 #define ADD(dest, src1, src2) _ADD(dest, src1, src2) 00754 00755 #define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp); 00756 #define AVX1_S(dest, src, bits) S_TMP(dest, src, bits, S_TEMP) 00757 #define AVX1_R(dest, src, bits) _AVX1_R(dest, src, bits) 00758 00759 #define Init_Mask(mask) \ 00760 __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1"); 00761 00762 #define _W_from_buff1(w, buff, xmm) \ 00763 /* X0..3(xmm4..7), W[0..15] = sha512->buffer[0.15]; */\ 00764 __asm__ volatile("vmovdqu %1, %%"#xmm"\n\t"\ 00765 "vpshufb %%xmm1, %%"#xmm", %%"#xmm"\n\t"\ 00766 "vmovdqu %%"#xmm", %0"\ 00767 :"=m"(w): "m"(buff):"%xmm0"); 00768 00769 #define W_from_buff1(w, buff, xmm) _W_from_buff1(w, buff, xmm) 00770 00771 #define W_from_buff(w, buff)\ 00772 Init_Mask(mBYTE_FLIP_MASK[0]);\ 00773 W_from_buff1(w[0], buff[0], W_0);\ 00774 W_from_buff1(w[2], buff[2], W_2);\ 00775 W_from_buff1(w[4], buff[4], W_4);\ 00776 W_from_buff1(w[6], buff[6], W_6);\ 00777 W_from_buff1(w[8], buff[8], W_8);\ 00778 W_from_buff1(w[10],buff[10],W_10);\ 00779 W_from_buff1(w[12],buff[12],W_12);\ 00780 W_from_buff1(w[14],buff[14],W_14); 00781 00782 static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f }; 00783 00784 #define W_I_15 xmm14 00785 #define W_I_7 xmm11 00786 #define W_I_2 xmm13 00787 #define W_I xmm12 00788 #define G_TEMP xmm0 00789 #define S_TEMP xmm1 00790 #define XMM_TEMP0 xmm2 00791 00792 #define W_0 xmm12 00793 #define W_2 xmm3 00794 #define W_4 xmm4 00795 #define W_6 xmm5 00796 #define W_8 xmm6 00797 #define W_10 xmm7 00798 #define W_12 xmm8 00799 #define W_14 xmm9 00800 00801 #define XMM_REGs 00802 00803 #define s0_1(dest, src) AVX1_S(dest, src, 1); 00804 #define s0_2(dest, src) AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest); 00805 #define s0_3(dest, src) AVX1_R(G_TEMP, src, 7); XOR(dest, G_TEMP, dest); 00806 00807 #define s1_1(dest, src) AVX1_S(dest, src, 19); 00808 #define s1_2(dest, src) AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest); 00809 #define s1_3(dest, src) AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest); 00810 00811 #define s0_(dest, src) s0_1(dest, src); s0_2(dest, src); s0_3(dest, src) 00812 #define s1_(dest, src) s1_1(dest, src); s1_2(dest, src); s1_3(dest, src) 00813 00814 #define Block_xx_1(i) \ 00815 MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\ 00816 MOVE_to_REG(W_I_7, W_X[(i- 7)&15]);\ 00817 00818 #define Block_xx_2(i) \ 00819 MOVE_to_REG(W_I_2, W_X[(i- 2)&15]);\ 00820 MOVE_to_REG(W_I, W_X[(i)]);\ 00821 00822 #define Block_xx_3(i) \ 00823 s0_ (XMM_TEMP0, W_I_15);\ 00824 00825 #define Block_xx_4(i) \ 00826 ADD(W_I, W_I, XMM_TEMP0);\ 00827 ADD(W_I, W_I, W_I_7);\ 00828 00829 #define Block_xx_5(i) \ 00830 s1_ (XMM_TEMP0, W_I_2);\ 00831 00832 #define Block_xx_6(i) \ 00833 ADD(W_I, W_I, XMM_TEMP0);\ 00834 MOVE_to_MEM(W_X,i, W_I);\ 00835 if (i==0)\ 00836 MOVE_to_MEM(W_X,16, W_I);\ 00837 00838 #define Block_xx_7(i) \ 00839 MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\ 00840 MOVE_to_REG(W_I_7, W_X[(i- 7)&15]);\ 00841 00842 #define Block_xx_8(i) \ 00843 MOVE_to_REG(W_I_2, W_X[(i- 2)&15]);\ 00844 MOVE_to_REG(W_I, W_X[(i)]);\ 00845 00846 #define Block_xx_9(i) \ 00847 s0_ (XMM_TEMP0, W_I_15);\ 00848 00849 #define Block_xx_10(i) \ 00850 ADD(W_I, W_I, XMM_TEMP0);\ 00851 ADD(W_I, W_I, W_I_7);\ 00852 00853 #define Block_xx_11(i) \ 00854 s1_ (XMM_TEMP0, W_I_2);\ 00855 00856 #define Block_xx_12(i) \ 00857 ADD(W_I, W_I, XMM_TEMP0);\ 00858 MOVE_to_MEM(W_X,i, W_I);\ 00859 if ((i)==0)\ 00860 MOVE_to_MEM(W_X,16, W_I);\ 00861 00862 static INLINE void Block_0_1(word64 *W_X) { Block_xx_1(0); } 00863 static INLINE void Block_0_2(word64 *W_X) { Block_xx_2(0); } 00864 static INLINE void Block_0_3(void) { Block_xx_3(0); } 00865 static INLINE void Block_0_4(void) { Block_xx_4(0); } 00866 static INLINE void Block_0_5(void) { Block_xx_5(0); } 00867 static INLINE void Block_0_6(word64 *W_X) { Block_xx_6(0); } 00868 static INLINE void Block_0_7(word64 *W_X) { Block_xx_7(2); } 00869 static INLINE void Block_0_8(word64 *W_X) { Block_xx_8(2); } 00870 static INLINE void Block_0_9(void) { Block_xx_9(2); } 00871 static INLINE void Block_0_10(void){ Block_xx_10(2); } 00872 static INLINE void Block_0_11(void){ Block_xx_11(2); } 00873 static INLINE void Block_0_12(word64 *W_X){ Block_xx_12(2); } 00874 00875 static INLINE void Block_4_1(word64 *W_X) { Block_xx_1(4); } 00876 static INLINE void Block_4_2(word64 *W_X) { Block_xx_2(4); } 00877 static INLINE void Block_4_3(void) { Block_xx_3(4); } 00878 static INLINE void Block_4_4(void) { Block_xx_4(4); } 00879 static INLINE void Block_4_5(void) { Block_xx_5(4); } 00880 static INLINE void Block_4_6(word64 *W_X) { Block_xx_6(4); } 00881 static INLINE void Block_4_7(word64 *W_X) { Block_xx_7(6); } 00882 static INLINE void Block_4_8(word64 *W_X) { Block_xx_8(6); } 00883 static INLINE void Block_4_9(void) { Block_xx_9(6); } 00884 static INLINE void Block_4_10(void){ Block_xx_10(6); } 00885 static INLINE void Block_4_11(void){ Block_xx_11(6); } 00886 static INLINE void Block_4_12(word64 *W_X){ Block_xx_12(6); } 00887 00888 static INLINE void Block_8_1(word64 *W_X) { Block_xx_1(8); } 00889 static INLINE void Block_8_2(word64 *W_X) { Block_xx_2(8); } 00890 static INLINE void Block_8_3(void) { Block_xx_3(8); } 00891 static INLINE void Block_8_4(void) { Block_xx_4(8); } 00892 static INLINE void Block_8_5(void) { Block_xx_5(8); } 00893 static INLINE void Block_8_6(word64 *W_X) { Block_xx_6(8); } 00894 static INLINE void Block_8_7(word64 *W_X) { Block_xx_7(10); } 00895 static INLINE void Block_8_8(word64 *W_X) { Block_xx_8(10); } 00896 static INLINE void Block_8_9(void) { Block_xx_9(10); } 00897 static INLINE void Block_8_10(void){ Block_xx_10(10); } 00898 static INLINE void Block_8_11(void){ Block_xx_11(10); } 00899 static INLINE void Block_8_12(word64 *W_X){ Block_xx_12(10); } 00900 00901 static INLINE void Block_12_1(word64 *W_X) { Block_xx_1(12); } 00902 static INLINE void Block_12_2(word64 *W_X) { Block_xx_2(12); } 00903 static INLINE void Block_12_3(void) { Block_xx_3(12); } 00904 static INLINE void Block_12_4(void) { Block_xx_4(12); } 00905 static INLINE void Block_12_5(void) { Block_xx_5(12); } 00906 static INLINE void Block_12_6(word64 *W_X) { Block_xx_6(12); } 00907 static INLINE void Block_12_7(word64 *W_X) { Block_xx_7(14); } 00908 static INLINE void Block_12_8(word64 *W_X) { Block_xx_8(14); } 00909 static INLINE void Block_12_9(void) { Block_xx_9(14); } 00910 static INLINE void Block_12_10(void){ Block_xx_10(14); } 00911 static INLINE void Block_12_11(void){ Block_xx_11(14); } 00912 static INLINE void Block_12_12(word64 *W_X){ Block_xx_12(14); } 00913 00914 #endif /* HAVE_INTEL_AVX1 */ 00915 00916 #if defined(HAVE_INTEL_AVX2) 00917 static const unsigned long mBYTE_FLIP_MASK_Y[] = 00918 { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f }; 00919 00920 #define W_from_buff_Y(buff)\ 00921 { /* X0..3(ymm9..12), W_X[0..15] = sha512->buffer[0.15]; */\ 00922 __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]):YMM_REGs);\ 00923 __asm__ volatile("vmovdqu %0, %%ymm12\n\t"\ 00924 "vmovdqu %1, %%ymm4\n\t"\ 00925 "vpshufb %%ymm8, %%ymm12, %%ymm12\n\t"\ 00926 "vpshufb %%ymm8, %%ymm4, %%ymm4\n\t"\ 00927 :: "m"(buff[0]), "m"(buff[4]):YMM_REGs);\ 00928 __asm__ volatile("vmovdqu %0, %%ymm5\n\t"\ 00929 "vmovdqu %1, %%ymm6\n\t"\ 00930 "vpshufb %%ymm8, %%ymm5, %%ymm5\n\t"\ 00931 "vpshufb %%ymm8, %%ymm6, %%ymm6\n\t"\ 00932 :: "m"(buff[8]), "m"(buff[12]):YMM_REGs);\ 00933 } 00934 00935 #if defined(DEBUG_YMM) 00936 #define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0])::YMM_REGs); 00937 #define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]):YMM_REGs); 00938 00939 #define _DUMP_REG_Y(REG, name)\ 00940 { word64 buf[16];word64 reg[16][2];int k;\ 00941 SAVE_REG_Y(4); SAVE_REG_Y(5); SAVE_REG_Y(6); SAVE_REG_Y(7); \ 00942 SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\ 00943 SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \ 00944 __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::YMM_REGs);\ 00945 printf(" "#name":\t"); for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]); printf("\n"); \ 00946 RECV_REG_Y(4); RECV_REG_Y(5); RECV_REG_Y(6); RECV_REG_Y(7); \ 00947 RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \ 00948 RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\ 00949 } 00950 00951 #define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG) 00952 #define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG) 00953 #define PRINTF_Y(fmt, ...) 00954 #else 00955 #define DUMP_REG_Y(REG) 00956 #define DUMP_REG2_Y(REG) 00957 #define PRINTF_Y(fmt, ...) 00958 #endif /* DEBUG_YMM */ 00959 00960 #define _MOVE_to_REGy(ymm, mem) __asm__ volatile("vmovdqu %0, %%"#ymm" "\ 00961 :: "m"(mem):YMM_REGs); 00962 #define _MOVE_to_MEMy(mem,i, ymm) __asm__ volatile("vmovdqu %%"#ymm", %0" \ 00963 : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::YMM_REGs); 00964 #define _MOVE_128y(ymm0, ymm1, ymm2, map) __asm__ volatile("vperm2i128 $"\ 00965 #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs); 00966 #define _S_TEMPy(dest, src, bits, temp) \ 00967 __asm__ volatile("vpsrlq $"#bits", %%"#src", %%"#dest"\n\tvpsllq $64-"#bits\ 00968 ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" ":::YMM_REGs); 00969 #define _AVX2_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\ 00970 #src", %%"#dest" ":::YMM_REGs); 00971 #define _XORy(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\ 00972 #src2", %%"#dest" ":::YMM_REGs); 00973 #define _ADDy(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\ 00974 #src2", %%"#dest" ":::YMM_REGs); 00975 #define _BLENDy(map, dest, src1, src2) __asm__ volatile("vpblendd $"#map", %%"\ 00976 #src1", %%"#src2", %%"#dest" ":::YMM_REGs); 00977 #define _BLENDQy(map, dest, src1, src2) __asm__ volatile("vblendpd $"#map", %%"\ 00978 #src1", %%"#src2", %%"#dest" ":::YMM_REGs); 00979 #define _PERMQy(map, dest, src) __asm__ volatile("vpermq $"#map", %%"\ 00980 #src", %%"#dest" ":::YMM_REGs); 00981 00982 #define MOVE_to_REGy(ymm, mem) _MOVE_to_REGy(ymm, mem) 00983 #define MOVE_to_MEMy(mem, i, ymm) _MOVE_to_MEMy(mem, i, ymm) 00984 00985 #define MOVE_128y(ymm0, ymm1, ymm2, map) _MOVE_128y(ymm0, ymm1, ymm2, map) 00986 #define XORy(dest, src1, src2) _XORy(dest, src1, src2) 00987 #define ADDy(dest, src1, src2) _ADDy(dest, src1, src2) 00988 #define BLENDy(map, dest, src1, src2) _BLENDy(map, dest, src1, src2) 00989 #define BLENDQy(map, dest, src1, src2) _BLENDQy(map, dest, src1, src2) 00990 #define PERMQy(map, dest, src) _PERMQy(map, dest, src) 00991 00992 00993 #define S_TMPy(dest, src, bits, temp) _S_TEMPy(dest, src, bits, temp); 00994 #define AVX2_S(dest, src, bits) S_TMPy(dest, src, bits, S_TEMPy) 00995 #define AVX2_R(dest, src, bits) _AVX2_R(dest, src, bits) 00996 00997 00998 #define FEEDBACK1_to_W_I_2(w_i_2, w_i) MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08);\ 00999 BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2); 01000 01001 #define MOVE_W_to_W_I_15(w_i_15, w_0, w_4) BLENDQy(0x1, w_i_15, w_4, w_0);\ 01002 PERMQy(0x39, w_i_15, w_i_15); 01003 #define MOVE_W_to_W_I_7(w_i_7, w_8, w_12) BLENDQy(0x1, w_i_7, w_12, w_8);\ 01004 PERMQy(0x39, w_i_7, w_i_7); 01005 #define MOVE_W_to_W_I_2(w_i_2, w_12) BLENDQy(0xc, w_i_2, w_12, w_i_2);\ 01006 PERMQy(0x0e, w_i_2, w_i_2); 01007 01008 01009 #define W_I_16y ymm8 01010 #define W_I_15y ymm9 01011 #define W_I_7y ymm10 01012 #define W_I_2y ymm11 01013 #define W_Iy ymm12 01014 #define G_TEMPy ymm13 01015 #define S_TEMPy ymm14 01016 #define YMM_TEMP0 ymm15 01017 #define YMM_TEMP0x xmm15 01018 #define W_I_TEMPy ymm7 01019 #define W_K_TEMPy ymm15 01020 #define W_K_TEMPx xmm15 01021 #define W_0y ymm12 01022 #define W_4y ymm4 01023 #define W_8y ymm5 01024 #define W_12y ymm6 01025 01026 #define YMM_REGs 01027 /* Registers are saved in Sha512Update/Final */ 01028 /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/ 01029 01030 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\ 01031 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs);\ 01032 __asm__ volatile("vpblendd $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs);\ 01033 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_7", %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\ 01034 __asm__ volatile("vpblendd $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\ 01035 __asm__ volatile("vpshufd $0x93, %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\ 01036 01037 #define MOVE_7_to_15(w_i_15, w_i_7)\ 01038 __asm__ volatile("vmovdqu %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\ 01039 01040 #define MOVE_I_to_7(w_i_7, w_i)\ 01041 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\ 01042 __asm__ volatile("vpblendd $0x01, %%"#w_i_7", %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\ 01043 __asm__ volatile("vpshufd $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs);\ 01044 01045 #define MOVE_I_to_2(w_i_2, w_i)\ 01046 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs);\ 01047 __asm__ volatile("vpshufd $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs);\ 01048 01049 #endif /* HAVE_INTEL_AVX2 */ 01050 01051 01052 /*** Transform Body ***/ 01053 #if defined(HAVE_INTEL_AVX1) 01054 static int Transform_AVX1(Sha512* sha512) 01055 { 01056 const word64* K = K512; 01057 word64 W_X[16+4] = {0}; 01058 word32 j; 01059 word64 T[8]; 01060 01061 /* Copy digest to working vars */ 01062 XMEMCPY(T, sha512->digest, sizeof(T)); 01063 01064 W_from_buff(W_X, sha512->buffer); 01065 for (j = 0; j < 80; j += 16) { 01066 Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3(); 01067 Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X); 01068 Rx_1( 2); Block_0_7(W_X); Rx_2( 2); Block_0_8(W_X); Rx_3( 2); Block_0_9(); 01069 Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X); 01070 01071 Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3(); 01072 Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X); 01073 Rx_1( 6); Block_4_7(W_X); Rx_2( 6); Block_4_8(W_X); Rx_3( 6); Block_4_9(); 01074 Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X); 01075 01076 Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3(); 01077 Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X); 01078 Rx_1(10); Block_8_7(W_X); Rx_2(10); Block_8_8(W_X); Rx_3(10); Block_8_9(); 01079 Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X); 01080 01081 Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3(); 01082 Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X); 01083 Rx_1(14); Block_12_7(W_X); Rx_2(14); Block_12_8(W_X); Rx_3(14); Block_12_9(); 01084 Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X); 01085 } 01086 01087 /* Add the working vars back into digest */ 01088 sha512->digest[0] += a(0); 01089 sha512->digest[1] += b(0); 01090 sha512->digest[2] += c(0); 01091 sha512->digest[3] += d(0); 01092 sha512->digest[4] += e(0); 01093 sha512->digest[5] += f(0); 01094 sha512->digest[6] += g(0); 01095 sha512->digest[7] += h(0); 01096 01097 /* Wipe variables */ 01098 #if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) 01099 XMEMSET(W_X, 0, sizeof(word64) * 16); 01100 #endif 01101 XMEMSET(T, 0, sizeof(T)); 01102 01103 return 0; 01104 } 01105 #endif /* HAVE_INTEL_AVX1 */ 01106 01107 #if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX) 01108 static int Transform_AVX1_RORX(Sha512* sha512) 01109 { 01110 const word64* K = K512; 01111 word64 W_X[16+4] = {0}; 01112 word32 j; 01113 word64 T[8]; 01114 01115 /* Copy digest to working vars */ 01116 XMEMCPY(T, sha512->digest, sizeof(T)); 01117 01118 W_from_buff(W_X, sha512->buffer); 01119 for (j = 0; j < 80; j += 16) { 01120 Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); Block_0_2(W_X); 01121 Rx_RORX_3( 0); Block_0_3(); 01122 Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1); Block_0_5(); 01123 Rx_RORX_3( 1); Block_0_6(W_X); 01124 Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2); Block_0_8(W_X); 01125 Rx_RORX_3( 2); Block_0_9(); 01126 Rx_RORX_1( 3); Block_0_10();Rx_RORX_2( 3); Block_0_11(); 01127 Rx_RORX_3( 3); Block_0_12(W_X); 01128 01129 Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4); Block_4_2(W_X); 01130 Rx_RORX_3( 4); Block_4_3(); 01131 Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5); Block_4_5(); 01132 Rx_RORX_3( 5); Block_4_6(W_X); 01133 Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6); Block_4_8(W_X); 01134 Rx_RORX_3( 6); Block_4_9(); 01135 Rx_RORX_1( 7); Block_4_10();Rx_RORX_2( 7); Block_4_11(); 01136 Rx_RORX_3( 7); Block_4_12(W_X); 01137 01138 Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8); Block_8_2(W_X); 01139 Rx_RORX_3( 8); Block_8_3(); 01140 Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9); Block_8_5(); 01141 Rx_RORX_3( 9); Block_8_6(W_X); 01142 Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10); Block_8_8(W_X); 01143 Rx_RORX_3(10); Block_8_9(); 01144 Rx_RORX_1(11); Block_8_10();Rx_RORX_2(11); Block_8_11(); 01145 Rx_RORX_3(11); Block_8_12(W_X); 01146 01147 Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12); Block_12_2(W_X); 01148 Rx_RORX_3(12); Block_12_3(); 01149 Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13); Block_12_5(); 01150 Rx_RORX_3(13); Block_12_6(W_X); 01151 Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14); Block_12_8(W_X); 01152 Rx_RORX_3(14); Block_12_9(); 01153 Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15); Block_12_11(); 01154 Rx_RORX_3(15); Block_12_12(W_X); 01155 } 01156 01157 /* Add the working vars back into digest */ 01158 sha512->digest[0] += a(0); 01159 sha512->digest[1] += b(0); 01160 sha512->digest[2] += c(0); 01161 sha512->digest[3] += d(0); 01162 sha512->digest[4] += e(0); 01163 sha512->digest[5] += f(0); 01164 sha512->digest[6] += g(0); 01165 sha512->digest[7] += h(0); 01166 01167 /* Wipe variables */ 01168 #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2) 01169 XMEMSET(W_X, 0, sizeof(word64) * 16); 01170 #endif 01171 XMEMSET(T, 0, sizeof(T)); 01172 01173 return 0; 01174 } 01175 #endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_AVX1 && HAVE_INTEL_RORX */ 01176 01177 #if defined(HAVE_INTEL_AVX2) 01178 01179 #define s0_1y(dest, src) AVX2_S(dest, src, 1); 01180 #define s0_2y(dest, src) AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest); 01181 #define s0_3y(dest, src) AVX2_R(G_TEMPy, src, 7); XORy(dest, G_TEMPy, dest); 01182 01183 #define s1_1y(dest, src) AVX2_S(dest, src, 19); 01184 #define s1_2y(dest, src) AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest); 01185 #define s1_3y(dest, src) AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest); 01186 01187 #define s0_y(dest, src) s0_1y(dest, src); s0_2y(dest, src); s0_3y(dest, src) 01188 #define s1_y(dest, src) s1_1y(dest, src); s1_2y(dest, src); s1_3y(dest, src) 01189 01190 01191 #define Block_Y_xx_1(i, w_0, w_4, w_8, w_12)\ 01192 MOVE_W_to_W_I_15(W_I_15y, w_0, w_4);\ 01193 MOVE_W_to_W_I_7 (W_I_7y, w_8, w_12);\ 01194 MOVE_W_to_W_I_2 (W_I_2y, w_12);\ 01195 01196 #define Block_Y_xx_2(i, w_0, w_4, w_8, w_12)\ 01197 s0_1y (YMM_TEMP0, W_I_15y);\ 01198 01199 #define Block_Y_xx_3(i, w_0, w_4, w_8, w_12)\ 01200 s0_2y (YMM_TEMP0, W_I_15y);\ 01201 01202 #define Block_Y_xx_4(i, w_0, w_4, w_8, w_12)\ 01203 s0_3y (YMM_TEMP0, W_I_15y);\ 01204 01205 #define Block_Y_xx_5(i, w_0, w_4, w_8, w_12)\ 01206 ADDy(W_I_TEMPy, w_0, YMM_TEMP0);\ 01207 01208 #define Block_Y_xx_6(i, w_0, w_4, w_8, w_12)\ 01209 ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y);\ 01210 s1_1y (YMM_TEMP0, W_I_2y);\ 01211 01212 #define Block_Y_xx_7(i, w_0, w_4, w_8, w_12)\ 01213 s1_2y (YMM_TEMP0, W_I_2y);\ 01214 01215 #define Block_Y_xx_8(i, w_0, w_4, w_8, w_12)\ 01216 s1_3y (YMM_TEMP0, W_I_2y);\ 01217 ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\ 01218 01219 #define Block_Y_xx_9(i, w_0, w_4, w_8, w_12)\ 01220 FEEDBACK1_to_W_I_2(W_I_2y, w_0);\ 01221 01222 #define Block_Y_xx_10(i, w_0, w_4, w_8, w_12) \ 01223 s1_1y (YMM_TEMP0, W_I_2y);\ 01224 01225 #define Block_Y_xx_11(i, w_0, w_4, w_8, w_12) \ 01226 s1_2y (YMM_TEMP0, W_I_2y);\ 01227 01228 #define Block_Y_xx_12(i, w_0, w_4, w_8, w_12)\ 01229 s1_3y (YMM_TEMP0, W_I_2y);\ 01230 ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\ 01231 MOVE_to_MEMy(w,0, w_4);\ 01232 01233 01234 static INLINE void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y); } 01235 static INLINE void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y); } 01236 static INLINE void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y); } 01237 static INLINE void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y); } 01238 static INLINE void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y); } 01239 static INLINE void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y); } 01240 static INLINE void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y); } 01241 static INLINE void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y); } 01242 static INLINE void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y); } 01243 static INLINE void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y); } 01244 static INLINE void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y); } 01245 static INLINE void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y); } 01246 01247 static INLINE void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y); } 01248 static INLINE void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y); } 01249 static INLINE void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y); } 01250 static INLINE void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y); } 01251 static INLINE void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y); } 01252 static INLINE void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y); } 01253 static INLINE void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y); } 01254 static INLINE void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y); } 01255 static INLINE void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y); } 01256 static INLINE void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y); } 01257 static INLINE void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y); } 01258 static INLINE void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y); } 01259 01260 static INLINE void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y); } 01261 static INLINE void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y); } 01262 static INLINE void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y); } 01263 static INLINE void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y); } 01264 static INLINE void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y); } 01265 static INLINE void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y); } 01266 static INLINE void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y); } 01267 static INLINE void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y); } 01268 static INLINE void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y); } 01269 static INLINE void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y); } 01270 static INLINE void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y); } 01271 static INLINE void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y); } 01272 01273 static INLINE void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y); } 01274 static INLINE void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y); } 01275 static INLINE void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y); } 01276 static INLINE void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y); } 01277 static INLINE void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y); } 01278 static INLINE void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y); } 01279 static INLINE void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y); } 01280 static INLINE void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y); } 01281 static INLINE void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y); } 01282 static INLINE void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y); } 01283 static INLINE void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y); } 01284 static INLINE void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y); } 01285 01286 01287 static int Transform_AVX2(Sha512* sha512) 01288 { 01289 const word64* K = K512; 01290 word64 w[4]; 01291 word32 j; 01292 word64 T[8]; 01293 01294 /* Copy digest to working vars */ 01295 XMEMCPY(T, sha512->digest, sizeof(T)); 01296 01297 W_from_buff_Y(sha512->buffer); 01298 MOVE_to_MEMy(w,0, W_0y); 01299 for (j = 0; j < 80; j += 16) { 01300 Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); Block_Y_0_2(); 01301 Ry_3( 0, w[0]); Block_Y_0_3(); 01302 Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]); Block_Y_0_5(); 01303 Ry_3( 1, w[1]); Block_Y_0_6(); 01304 Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]); Block_Y_0_8(); 01305 Ry_3( 2, w[2]); Block_Y_0_9(); 01306 Ry_1( 3, w[3]); Block_Y_0_10();Ry_2( 3, w[3]); Block_Y_0_11(); 01307 Ry_3( 3, w[3]); Block_Y_0_12(w); 01308 01309 Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]); Block_Y_4_2(); 01310 Ry_3( 4, w[0]); Block_Y_4_3(); 01311 Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]); Block_Y_4_5(); 01312 Ry_3( 5, w[1]); Block_Y_4_6(); 01313 Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]); Block_Y_4_8(); 01314 Ry_3( 6, w[2]); Block_Y_4_9(); 01315 Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]);Block_Y_4_11(); 01316 Ry_3( 7, w[3]);Block_Y_4_12(w); 01317 01318 Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]); Block_Y_8_2(); 01319 Ry_3( 8, w[0]); Block_Y_8_3(); 01320 Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]); Block_Y_8_5(); 01321 Ry_3( 9, w[1]); Block_Y_8_6(); 01322 Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]); Block_Y_8_8(); 01323 Ry_3(10, w[2]); Block_Y_8_9(); 01324 Ry_1(11, w[3]); Block_Y_8_10();Ry_2(11, w[3]); Block_Y_8_11(); 01325 Ry_3(11, w[3]); Block_Y_8_12(w); 01326 01327 Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]); Block_Y_12_2(); 01328 Ry_3(12, w[0]); Block_Y_12_3(); 01329 Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]); Block_Y_12_5(); 01330 Ry_3(13, w[1]); Block_Y_12_6(); 01331 Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]); Block_Y_12_8(); 01332 Ry_3(14, w[2]); Block_Y_12_9(); 01333 Ry_1(15, w[3]); Block_Y_12_10();Ry_2(15, w[3]); Block_Y_12_11(); 01334 Ry_3(15, w[3]);Block_Y_12_12(w); 01335 } 01336 01337 /* Add the working vars back into digest */ 01338 sha512->digest[0] += a(0); 01339 sha512->digest[1] += b(0); 01340 sha512->digest[2] += c(0); 01341 sha512->digest[3] += d(0); 01342 sha512->digest[4] += e(0); 01343 sha512->digest[5] += f(0); 01344 sha512->digest[6] += g(0); 01345 sha512->digest[7] += h(0); 01346 01347 /* Wipe variables */ 01348 #if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) 01349 XMEMSET(W, 0, sizeof(word64) * 16); 01350 #endif 01351 XMEMSET(T, 0, sizeof(T)); 01352 01353 return 0; 01354 } 01355 #endif /* HAVE_INTEL_AVX2 */ 01356 01357 01358 01359 /* -------------------------------------------------------------------------- */ 01360 /* SHA384 */ 01361 /* -------------------------------------------------------------------------- */ 01362 #ifdef WOLFSSL_SHA384 01363 static int InitSha384(Sha384* sha384) 01364 { 01365 sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8); 01366 sha384->digest[1] = W64LIT(0x629a292a367cd507); 01367 sha384->digest[2] = W64LIT(0x9159015a3070dd17); 01368 sha384->digest[3] = W64LIT(0x152fecd8f70e5939); 01369 sha384->digest[4] = W64LIT(0x67332667ffc00b31); 01370 sha384->digest[5] = W64LIT(0x8eb44a8768581511); 01371 sha384->digest[6] = W64LIT(0xdb0c2e0d64f98fa7); 01372 sha384->digest[7] = W64LIT(0x47b5481dbefa4fa4); 01373 01374 sha384->buffLen = 0; 01375 sha384->loLen = 0; 01376 sha384->hiLen = 0; 01377 01378 return 0; 01379 } 01380 01381 int wc_Sha384Update(Sha384* sha384, const byte* data, word32 len) 01382 { 01383 01384 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) 01385 if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) { 01386 #if defined(HAVE_INTEL_QA) 01387 return IntelQaSymSha384(&sha384->asyncDev, NULL, data, len); 01388 #endif 01389 } 01390 #endif /* WOLFSSL_ASYNC_CRYPT */ 01391 01392 return Sha512Update((Sha512*)sha384, data, len); 01393 } 01394 01395 01396 int wc_Sha384Final(Sha384* sha384, byte* hash) 01397 { 01398 int ret; 01399 01400 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) 01401 if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) { 01402 #if defined(HAVE_INTEL_QA) 01403 return IntelQaSymSha384(&sha384->asyncDev, hash, NULL, 01404 SHA384_DIGEST_SIZE); 01405 #endif 01406 } 01407 #endif /* WOLFSSL_ASYNC_CRYPT */ 01408 01409 ret = Sha512Final((Sha512*)sha384); 01410 if (ret != 0) 01411 return ret; 01412 01413 XMEMCPY(hash, sha384->digest, SHA384_DIGEST_SIZE); 01414 01415 return InitSha384(sha384); /* reset state */ 01416 } 01417 01418 01419 int wc_InitSha384_ex(Sha384* sha384, void* heap, int devId) 01420 { 01421 int ret; 01422 01423 if (sha384 == NULL) { 01424 return BAD_FUNC_ARG; 01425 } 01426 01427 sha384->heap = heap; 01428 ret = InitSha384(sha384); 01429 if (ret != 0) 01430 return ret; 01431 01432 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) 01433 ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384, 01434 sha384->heap, devId); 01435 #else 01436 (void)devId; 01437 #endif /* WOLFSSL_ASYNC_CRYPT */ 01438 01439 return ret; 01440 } 01441 01442 int wc_InitSha384(Sha384* sha384) 01443 { 01444 return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID); 01445 } 01446 01447 void wc_Sha384Free(Sha384* sha384) 01448 { 01449 if (sha384 == NULL) 01450 return; 01451 01452 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384) 01453 wolfAsync_DevCtxFree(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384); 01454 #endif /* WOLFSSL_ASYNC_CRYPT */ 01455 } 01456 01457 #endif /* WOLFSSL_SHA384 */ 01458 01459 #endif /* HAVE_FIPS */ 01460 01461 01462 int wc_Sha512GetHash(Sha512* sha512, byte* hash) 01463 { 01464 int ret; 01465 Sha512 tmpSha512; 01466 01467 if (sha512 == NULL || hash == NULL) 01468 return BAD_FUNC_ARG; 01469 01470 ret = wc_Sha512Copy(sha512, &tmpSha512); 01471 if (ret == 0) { 01472 ret = wc_Sha512Final(&tmpSha512, hash); 01473 } 01474 return ret; 01475 } 01476 01477 int wc_Sha512Copy(Sha512* src, Sha512* dst) 01478 { 01479 int ret = 0; 01480 01481 if (src == NULL || dst == NULL) 01482 return BAD_FUNC_ARG; 01483 01484 XMEMCPY(dst, src, sizeof(Sha512)); 01485 01486 #ifdef WOLFSSL_ASYNC_CRYPT 01487 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); 01488 #endif 01489 01490 return ret; 01491 } 01492 01493 #ifdef WOLFSSL_SHA384 01494 int wc_Sha384GetHash(Sha384* sha384, byte* hash) 01495 { 01496 int ret; 01497 Sha384 tmpSha384; 01498 01499 if (sha384 == NULL || hash == NULL) 01500 return BAD_FUNC_ARG; 01501 01502 ret = wc_Sha384Copy(sha384, &tmpSha384); 01503 if (ret == 0) { 01504 ret = wc_Sha384Final(&tmpSha384, hash); 01505 } 01506 return ret; 01507 } 01508 int wc_Sha384Copy(Sha384* src, Sha384* dst) 01509 { 01510 int ret = 0; 01511 01512 if (src == NULL || dst == NULL) 01513 return BAD_FUNC_ARG; 01514 01515 XMEMCPY(dst, src, sizeof(Sha384)); 01516 01517 #ifdef WOLFSSL_ASYNC_CRYPT 01518 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev); 01519 #endif 01520 01521 return ret; 01522 } 01523 #endif /* WOLFSSL_SHA384 */ 01524 01525 #endif /* WOLFSSL_SHA512 */ 01526
Generated on Tue Jul 12 2022 23:30:59 by
