Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
sha512.c
00001 /* sha512.c 00002 * 00003 * Copyright (C) 2006-2016 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 #ifdef HAVE_CONFIG_H 00024 #include <config.h> 00025 #endif 00026 00027 #include <wolfssl/wolfcrypt/settings.h> 00028 #include <wolfssl/wolfcrypt/sha512.h> 00029 00030 #ifdef WOLFSSL_SHA512 00031 00032 #ifdef HAVE_FIPS 00033 int wc_InitSha512(Sha512* sha) 00034 { 00035 return InitSha512_fips(sha); 00036 } 00037 00038 00039 int wc_Sha512Update(Sha512* sha, const byte* data, word32 len) 00040 { 00041 return Sha512Update_fips(sha, data, len); 00042 } 00043 00044 00045 int wc_Sha512Final(Sha512* sha, byte* out) 00046 { 00047 return Sha512Final_fips(sha, out); 00048 } 00049 00050 00051 #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM) 00052 00053 int wc_InitSha384(Sha384* sha) 00054 { 00055 return InitSha384_fips(sha); 00056 } 00057 00058 00059 int wc_Sha384Update(Sha384* sha, const byte* data, word32 len) 00060 { 00061 return Sha384Update_fips(sha, data, len); 00062 } 00063 00064 00065 int wc_Sha384Final(Sha384* sha, byte* out) 00066 { 00067 return Sha384Final_fips(sha, out); 00068 } 00069 00070 00071 #endif /* WOLFSSL_SHA384 */ 00072 #else /* else build without using fips */ 00073 #include <wolfssl/wolfcrypt/logging.h> 00074 #include <wolfssl/wolfcrypt/error-crypt.h> 00075 00076 #ifdef NO_INLINE 00077 #include <wolfssl/wolfcrypt/misc.h> 00078 #else 00079 #include <wolfcrypt/src/misc.c> 00080 #endif 00081 00082 00083 #ifndef WOLFSSL_HAVE_MIN 00084 #define WOLFSSL_HAVE_MIN 00085 00086 static INLINE word32 min(word32 a, word32 b) 00087 { 00088 return a > b ? b : a; 00089 } 00090 00091 #endif /* WOLFSSL_HAVE_MIN */ 00092 00093 #if defined(USE_INTEL_SPEEDUP) 00094 #define HAVE_INTEL_AVX1 00095 #define HAVE_INTEL_AVX2 00096 #endif 00097 00098 #if defined(HAVE_INTEL_AVX1) 00099 /* #define DEBUG_XMM */ 00100 #endif 00101 00102 #if defined(HAVE_INTEL_AVX2) 00103 #define HAVE_INTEL_RORX 00104 /* #define DEBUG_YMM */ 00105 #endif 00106 00107 /***** 00108 Intel AVX1/AVX2 Macro Control Structure 00109 00110 #if defined(HAVE_INteL_SPEEDUP) 00111 #define HAVE_INTEL_AVX1 00112 #define HAVE_INTEL_AVX2 00113 #endif 00114 00115 int InitSha512(Sha512* sha512) { 00116 Save/Recover XMM, YMM 00117 ... 00118 00119 Check Intel AVX cpuid flags 00120 } 00121 00122 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) 00123 Transform_AVX1() ; # Function prototype 00124 Transform_AVX2() ; # 00125 #endif 00126 00127 _Transform() { # Native Transform Function body 00128 00129 } 00130 00131 int Sha512Update() { 00132 Save/Recover XMM, YMM 00133 ... 00134 } 00135 00136 int Sha512Final() { 00137 Save/Recover XMM, YMM 00138 ... 00139 } 00140 00141 00142 #if defined(HAVE_INTEL_AVX1) 00143 00144 XMM Instructions/INLINE asm Definitions 00145 00146 #endif 00147 00148 #if defined(HAVE_INTEL_AVX2) 00149 00150 YMM Instructions/INLINE asm Definitions 00151 00152 #endif 00153 00154 #if defnied(HAVE_INTEL_AVX1) 00155 00156 int Transform_AVX1() { 00157 Stitched Message Sched/Round 00158 } 00159 00160 #endif 00161 00162 #if defnied(HAVE_INTEL_AVX2) 00163 00164 int Transform_AVX2() { 00165 Stitched Message Sched/Round 00166 } 00167 #endif 00168 00169 00170 */ 00171 00172 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00173 00174 00175 /* Each platform needs to query info type 1 from cpuid to see if aesni is 00176 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts 00177 */ 00178 00179 #ifndef _MSC_VER 00180 #define cpuid(reg, leaf, sub)\ 00181 __asm__ __volatile__ ("cpuid":\ 00182 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\ 00183 "a" (leaf), "c"(sub)); 00184 00185 #define XASM_LINK(f) asm(f) 00186 #else 00187 00188 #include <intrin.h> 00189 #define cpuid(a,b) __cpuid((int*)a,b) 00190 00191 #define XASM_LINK(f) 00192 00193 #endif /* _MSC_VER */ 00194 00195 #define EAX 0 00196 #define EBX 1 00197 #define ECX 2 00198 #define EDX 3 00199 00200 #define CPUID_AVX1 0x1 00201 #define CPUID_AVX2 0x2 00202 #define CPUID_RDRAND 0x4 00203 #define CPUID_RDSEED 0x8 00204 #define CPUID_BMI2 0x10 /* MULX, RORX */ 00205 00206 #define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1) 00207 #define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2) 00208 #define IS_INTEL_BMI2 (cpuid_flags&CPUID_BMI2) 00209 #define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND) 00210 #define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED) 00211 00212 static word32 cpuid_check = 0 ; 00213 static word32 cpuid_flags = 0 ; 00214 00215 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { 00216 int got_intel_cpu=0; 00217 unsigned int reg[5]; 00218 00219 reg[4] = '\0' ; 00220 cpuid(reg, 0, 0); 00221 if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 && 00222 memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 && 00223 memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) { 00224 got_intel_cpu = 1; 00225 } 00226 if (got_intel_cpu) { 00227 cpuid(reg, leaf, sub); 00228 return((reg[num]>>bit)&0x1) ; 00229 } 00230 return 0 ; 00231 } 00232 00233 #define CHECK_SHA512 0x1 00234 #define CHECK_SHA384 0x2 00235 00236 static int set_cpuid_flags(int sha) { 00237 if((cpuid_check & sha) ==0) { 00238 if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;} 00239 if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; } 00240 if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; } 00241 if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; } 00242 if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; } 00243 cpuid_check |= sha ; 00244 return 0 ; 00245 } 00246 return 1 ; 00247 } 00248 00249 00250 /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */ 00251 00252 #if defined(HAVE_INTEL_AVX1) 00253 static int Transform_AVX1(Sha512 *sha512) ; 00254 #endif 00255 00256 #if defined(HAVE_INTEL_AVX2) 00257 static int Transform_AVX2(Sha512 *sha512) ; 00258 00259 #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX) 00260 static int Transform_AVX1_RORX(Sha512 *sha512) ; 00261 #endif 00262 00263 #endif 00264 00265 static int _Transform(Sha512 *sha512) ; 00266 00267 static int (*Transform_p)(Sha512* sha512) = _Transform ; 00268 00269 #define Transform(sha512) (*Transform_p)(sha512) 00270 00271 static void set_Transform(void) { 00272 if(set_cpuid_flags(CHECK_SHA512)) return ; 00273 00274 #if defined(HAVE_INTEL_AVX2) 00275 if(IS_INTEL_AVX2 && IS_INTEL_BMI2){ 00276 Transform_p = Transform_AVX1_RORX; return ; 00277 Transform_p = Transform_AVX2 ; 00278 /* for avoiding warning,"not used" */ 00279 } 00280 #endif 00281 #if defined(HAVE_INTEL_AVX1) 00282 Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; return ; 00283 #endif 00284 Transform_p = _Transform ; return ; 00285 } 00286 00287 #else 00288 #define Transform(sha512) _Transform(sha512) 00289 #endif 00290 00291 /* Dummy for saving MM_REGs on behalf of Transform */ 00292 /* #if defined(HAVE_INTEL_AVX2) 00293 #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\ 00294 "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\ 00295 "%ymm12","%ymm13","%ymm14","%ymm15") 00296 */ 00297 #if defined(HAVE_INTEL_AVX1) 00298 #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\ 00299 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15") 00300 #else 00301 #define SAVE_XMM_YMM 00302 #endif 00303 00304 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) 00305 00306 #include <string.h> 00307 00308 #endif /* defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) */ 00309 00310 00311 #if defined(HAVE_INTEL_RORX) 00312 #define ROTR(func, bits, x) \ 00313 word64 func(word64 x) { word64 ret ;\ 00314 __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\ 00315 return ret ;\ 00316 } 00317 00318 static INLINE ROTR(rotrFixed64_28, 28, x) 00319 static INLINE ROTR(rotrFixed64_34, 34, x) 00320 static INLINE ROTR(rotrFixed64_39, 39, x) 00321 static INLINE ROTR(rotrFixed64_14, 14, x) 00322 static INLINE ROTR(rotrFixed64_18, 18, x) 00323 static INLINE ROTR(rotrFixed64_41, 41, x) 00324 00325 #define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x)) 00326 #define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x)) 00327 #endif 00328 00329 #if defined(HAVE_BYTEREVERSE64) && !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) 00330 #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size) 00331 #define ByteReverseWords64_1(buf, size)\ 00332 { unsigned int i ;\ 00333 for(i=0; i< size/sizeof(word64); i++){\ 00334 __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\ 00335 }\ 00336 } 00337 #endif 00338 00339 00340 int wc_InitSha512(Sha512* sha512) 00341 { 00342 sha512->digest[0] = W64LIT(0x6a09e667f3bcc908); 00343 sha512->digest[1] = W64LIT(0xbb67ae8584caa73b); 00344 sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b); 00345 sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1); 00346 sha512->digest[4] = W64LIT(0x510e527fade682d1); 00347 sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f); 00348 sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b); 00349 sha512->digest[7] = W64LIT(0x5be0cd19137e2179); 00350 00351 sha512->buffLen = 0; 00352 sha512->loLen = 0; 00353 sha512->hiLen = 0; 00354 00355 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) 00356 set_Transform() ; /* choose best Transform function under this runtime environment */ 00357 #endif 00358 00359 return 0 ; 00360 } 00361 00362 00363 static const word64 K512[80] = { 00364 W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), 00365 W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), 00366 W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019), 00367 W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118), 00368 W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe), 00369 W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2), 00370 W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1), 00371 W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694), 00372 W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3), 00373 W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65), 00374 W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483), 00375 W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5), 00376 W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210), 00377 W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4), 00378 W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725), 00379 W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70), 00380 W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926), 00381 W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df), 00382 W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8), 00383 W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b), 00384 W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001), 00385 W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30), 00386 W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910), 00387 W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8), 00388 W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53), 00389 W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8), 00390 W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb), 00391 W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3), 00392 W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60), 00393 W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec), 00394 W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9), 00395 W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b), 00396 W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207), 00397 W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178), 00398 W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6), 00399 W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b), 00400 W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493), 00401 W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c), 00402 W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a), 00403 W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) 00404 }; 00405 00406 00407 00408 #define blk0(i) (W[i] = sha512->buffer[i]) 00409 00410 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) 00411 00412 #define Ch(x,y,z) (z^(x&(y^z))) 00413 #define Maj(x,y,z) ((x&y)|(z&(x|y))) 00414 00415 #define a(i) T[(0-i)&7] 00416 #define b(i) T[(1-i)&7] 00417 #define c(i) T[(2-i)&7] 00418 #define d(i) T[(3-i)&7] 00419 #define e(i) T[(4-i)&7] 00420 #define f(i) T[(5-i)&7] 00421 #define g(i) T[(6-i)&7] 00422 #define h(i) T[(7-i)&7] 00423 00424 #define S0(x) (rotrFixed64(x,28)^rotrFixed64(x,34)^rotrFixed64(x,39)) 00425 #define S1(x) (rotrFixed64(x,14)^rotrFixed64(x,18)^rotrFixed64(x,41)) 00426 #define s0(x) (rotrFixed64(x,1)^rotrFixed64(x,8)^(x>>7)) 00427 #define s1(x) (rotrFixed64(x,19)^rotrFixed64(x,61)^(x>>6)) 00428 00429 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\ 00430 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) 00431 00432 #define blk384(i) (W[i] = sha384->buffer[i]) 00433 00434 #define R2(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk384(i));\ 00435 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) 00436 00437 static int _Transform(Sha512* sha512) 00438 { 00439 const word64* K = K512; 00440 00441 word32 j; 00442 word64 T[8]; 00443 00444 00445 #ifdef WOLFSSL_SMALL_STACK 00446 word64* W; 00447 W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); 00448 if (W == NULL) 00449 return MEMORY_E; 00450 #else 00451 word64 W[16]; 00452 #endif 00453 00454 /* Copy digest to working vars */ 00455 XMEMCPY(T, sha512->digest, sizeof(T)); 00456 00457 #ifdef USE_SLOW_SHA2 00458 /* over twice as small, but 50% slower */ 00459 /* 80 operations, not unrolled */ 00460 for (j = 0; j < 80; j += 16) { 00461 int m; 00462 for (m = 0; m < 16; m++) { /* braces needed here for macros {} */ 00463 R(m); 00464 } 00465 } 00466 #else 00467 /* 80 operations, partially loop unrolled */ 00468 for (j = 0; j < 80; j += 16) { 00469 R( 0); R( 1); R( 2); R( 3); 00470 R( 4); R( 5); R( 6); R( 7); 00471 R( 8); R( 9); R(10); R(11); 00472 R(12); R(13); R(14); R(15); 00473 } 00474 #endif /* USE_SLOW_SHA2 */ 00475 00476 /* Add the working vars back into digest */ 00477 00478 sha512->digest[0] += a(0); 00479 sha512->digest[1] += b(0); 00480 sha512->digest[2] += c(0); 00481 sha512->digest[3] += d(0); 00482 sha512->digest[4] += e(0); 00483 sha512->digest[5] += f(0); 00484 sha512->digest[6] += g(0); 00485 sha512->digest[7] += h(0); 00486 00487 /* Wipe variables */ 00488 ForceZero(W, sizeof(word64) * 16); 00489 ForceZero(T, sizeof(T)); 00490 00491 #ifdef WOLFSSL_SMALL_STACK 00492 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER); 00493 #endif 00494 00495 return 0; 00496 } 00497 00498 00499 static INLINE void AddLength(Sha512* sha512, word32 len) 00500 { 00501 word32 tmp = sha512->loLen; 00502 if ( (sha512->loLen += len) < tmp) 00503 sha512->hiLen++; /* carry low to high */ 00504 } 00505 00506 int wc_Sha512Update(Sha512* sha512, const byte* data, word32 len) 00507 { 00508 /* do block size increments */ 00509 byte* local = (byte*)sha512->buffer; 00510 SAVE_XMM_YMM ; /* for Intel AVX */ 00511 00512 while (len) { 00513 word32 add = min(len, SHA512_BLOCK_SIZE - sha512->buffLen); 00514 XMEMCPY(&local[sha512->buffLen], data, add); 00515 00516 sha512->buffLen += add; 00517 data += add; 00518 len -= add; 00519 00520 if (sha512->buffLen == SHA512_BLOCK_SIZE) { 00521 int ret; 00522 #if defined(LITTLE_ENDIAN_ORDER) 00523 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00524 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 00525 #endif 00526 ByteReverseWords64(sha512->buffer, sha512->buffer, 00527 SHA512_BLOCK_SIZE); 00528 #endif 00529 ret = Transform(sha512); 00530 if (ret != 0) 00531 return ret; 00532 00533 AddLength(sha512, SHA512_BLOCK_SIZE); 00534 sha512->buffLen = 0; 00535 } 00536 } 00537 return 0; 00538 } 00539 00540 00541 int wc_Sha512Final(Sha512* sha512, byte* hash) 00542 { 00543 byte* local = (byte*)sha512->buffer; 00544 int ret; 00545 00546 SAVE_XMM_YMM ; /* for Intel AVX */ 00547 AddLength(sha512, sha512->buffLen); /* before adding pads */ 00548 00549 local[sha512->buffLen++] = 0x80; /* add 1 */ 00550 00551 /* pad with zeros */ 00552 if (sha512->buffLen > SHA512_PAD_SIZE) { 00553 XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE -sha512->buffLen); 00554 sha512->buffLen += SHA512_BLOCK_SIZE - sha512->buffLen; 00555 #if defined(LITTLE_ENDIAN_ORDER) 00556 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00557 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 00558 #endif 00559 ByteReverseWords64(sha512->buffer,sha512->buffer,SHA512_BLOCK_SIZE); 00560 #endif 00561 ret = Transform(sha512); 00562 if (ret != 0) 00563 return ret; 00564 00565 sha512->buffLen = 0; 00566 } 00567 XMEMSET(&local[sha512->buffLen], 0, SHA512_PAD_SIZE - sha512->buffLen); 00568 00569 /* put lengths in bits */ 00570 sha512->hiLen = (sha512->loLen >> (8*sizeof(sha512->loLen) - 3)) + 00571 (sha512->hiLen << 3); 00572 sha512->loLen = sha512->loLen << 3; 00573 00574 /* store lengths */ 00575 #if defined(LITTLE_ENDIAN_ORDER) 00576 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00577 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 00578 #endif 00579 ByteReverseWords64(sha512->buffer, sha512->buffer, SHA512_PAD_SIZE); 00580 #endif 00581 /* ! length ordering dependent on digest endian type ! */ 00582 00583 sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen; 00584 sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen; 00585 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 00586 if(IS_INTEL_AVX1 || IS_INTEL_AVX2) 00587 ByteReverseWords64(&(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]), 00588 &(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]), 00589 SHA512_BLOCK_SIZE - SHA512_PAD_SIZE); 00590 #endif 00591 ret = Transform(sha512); 00592 if (ret != 0) 00593 return ret; 00594 00595 #ifdef LITTLE_ENDIAN_ORDER 00596 ByteReverseWords64(sha512->digest, sha512->digest, SHA512_DIGEST_SIZE); 00597 #endif 00598 XMEMCPY(hash, sha512->digest, SHA512_DIGEST_SIZE); 00599 00600 return wc_InitSha512(sha512); /* reset state */ 00601 } 00602 00603 00604 00605 #if defined(HAVE_INTEL_AVX1) 00606 00607 #define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ; 00608 #define Rx_2(i) d(i)+=h(i); 00609 #define Rx_3(i) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)); 00610 00611 #if defined(HAVE_INTEL_RORX) 00612 #define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ; 00613 #define Rx_RORX_2(i) d(i)+=h(i); 00614 #define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i)); 00615 #endif 00616 00617 #endif 00618 00619 #if defined(HAVE_INTEL_AVX2) 00620 #define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w ; 00621 #define Ry_2(i, w) d(i)+=h(i); 00622 #define Ry_3(i, w) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)); 00623 #endif 00624 00625 #if defined(HAVE_INTEL_AVX1) /* INLINE Assember for Intel AVX1 instructions */ 00626 #if defined(DEBUG_XMM) 00627 00628 #define SAVE_REG(i) __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0])::XMM_REGs); 00629 #define RECV_REG(i) __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]):XMM_REGs); 00630 00631 #define _DUMP_REG(REG, name)\ 00632 { word64 buf[16] ;word64 reg[16][2];int k ;\ 00633 SAVE_REG(0); SAVE_REG(1); SAVE_REG(2); SAVE_REG(3); SAVE_REG(4); \ 00634 SAVE_REG(5); SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\ 00635 SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \ 00636 __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::XMM_REGs);\ 00637 printf(" "#name":\t") ; for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n") ; \ 00638 RECV_REG(0); RECV_REG(1); RECV_REG(2); RECV_REG(3); RECV_REG(4);\ 00639 RECV_REG(5); RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\ 00640 RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\ 00641 } 00642 00643 #define DUMP_REG(REG) _DUMP_REG(REG, #REG) 00644 #define PRINTF(fmt, ...) 00645 00646 #else 00647 00648 #define DUMP_REG(REG) 00649 #define PRINTF(fmt, ...) 00650 00651 #endif 00652 00653 #define _MOVE_to_REG(xymm, mem) __asm__ volatile("vmovdqu %0, %%"#xymm" "\ 00654 :: "m"(mem):XMM_REGs) ; 00655 #define _MOVE_to_MEM(mem,i, xymm) __asm__ volatile("vmovdqu %%"#xymm", %0" :\ 00656 "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::XMM_REGs) ; 00657 #define _MOVE(dest, src) __asm__ volatile("vmovdqu %%"#src", %%"\ 00658 #dest" ":::XMM_REGs) ; 00659 00660 #define _S_TEMP(dest, src, bits, temp) __asm__ volatile("vpsrlq $"#bits", %%"\ 00661 #src", %%"#dest"\n\tvpsllq $64-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\ 00662 #temp",%%"#dest", %%"#dest" ":::XMM_REGs) ; 00663 #define _AVX1_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\ 00664 #src", %%"#dest" ":::XMM_REGs) ; 00665 #define _XOR(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\ 00666 #src2", %%"#dest" ":::XMM_REGs) ; 00667 #define _OR(dest, src1, src2) __asm__ volatile("vpor %%"#src1", %%"\ 00668 #src2", %%"#dest" ":::XMM_REGs) ; 00669 #define _ADD(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\ 00670 #src2", %%"#dest" ":::XMM_REGs) ; 00671 #define _ADD_MEM(dest, src1, mem) __asm__ volatile("vpaddq %0, %%"#src1", %%"\ 00672 #dest" "::"m"(mem):XMM_REGs) ; 00673 00674 #define MOVE_to_REG(xymm, mem) _MOVE_to_REG(xymm, mem) 00675 #define MOVE_to_MEM(mem, i, xymm) _MOVE_to_MEM(mem, i, xymm) 00676 #define MOVE(dest, src) _MOVE(dest, src) 00677 00678 #define XOR(dest, src1, src2) _XOR(dest, src1, src2) 00679 #define OR(dest, src1, src2) _OR(dest, src1, src2) 00680 #define ADD(dest, src1, src2) _ADD(dest, src1, src2) 00681 00682 #define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp); 00683 #define AVX1_S(dest, src, bits) S_TMP(dest, src, bits, S_TEMP) 00684 #define AVX1_R(dest, src, bits) _AVX1_R(dest, src, bits) 00685 00686 #define Init_Mask(mask) \ 00687 __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1") ; 00688 00689 #define _W_from_buff1(w, buff, xmm) \ 00690 /* X0..3(xmm4..7), W[0..15] = sha512->buffer[0.15]; */\ 00691 __asm__ volatile("vmovdqu %1, %%"#xmm"\n\t"\ 00692 "vpshufb %%xmm1, %%"#xmm", %%"#xmm"\n\t"\ 00693 "vmovdqu %%"#xmm", %0"\ 00694 :"=m"(w): "m"(buff):"%xmm0") ; 00695 00696 #define W_from_buff1(w, buff, xmm) _W_from_buff1(w, buff, xmm) 00697 00698 #define W_from_buff(w, buff)\ 00699 Init_Mask(mBYTE_FLIP_MASK[0]) ;\ 00700 W_from_buff1(w[0], buff[0], W_0);\ 00701 W_from_buff1(w[2], buff[2], W_2);\ 00702 W_from_buff1(w[4], buff[4], W_4);\ 00703 W_from_buff1(w[6], buff[6], W_6);\ 00704 W_from_buff1(w[8], buff[8], W_8);\ 00705 W_from_buff1(w[10],buff[10],W_10);\ 00706 W_from_buff1(w[12],buff[12],W_12);\ 00707 W_from_buff1(w[14],buff[14],W_14); 00708 00709 static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f } ; 00710 00711 #define W_I_15 xmm14 00712 #define W_I_7 xmm11 00713 #define W_I_2 xmm13 00714 #define W_I xmm12 00715 #define G_TEMP xmm0 00716 #define S_TEMP xmm1 00717 #define XMM_TEMP0 xmm2 00718 00719 #define W_0 xmm12 00720 #define W_2 xmm3 00721 #define W_4 xmm4 00722 #define W_6 xmm5 00723 #define W_8 xmm6 00724 #define W_10 xmm7 00725 #define W_12 xmm8 00726 #define W_14 xmm9 00727 00728 #define XMM_REGs 00729 00730 #define s0_1(dest, src) AVX1_S(dest, src, 1); 00731 #define s0_2(dest, src) AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest) ; 00732 #define s0_3(dest, src) AVX1_R(G_TEMP, src, 7); XOR(dest, G_TEMP, dest) ; 00733 00734 #define s1_1(dest, src) AVX1_S(dest, src, 19); 00735 #define s1_2(dest, src) AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest) ; 00736 #define s1_3(dest, src) AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest) ; 00737 00738 #define s0_(dest, src) s0_1(dest, src) ; s0_2(dest, src) ; s0_3(dest, src) 00739 #define s1_(dest, src) s1_1(dest, src) ; s1_2(dest, src) ; s1_3(dest, src) 00740 00741 #define Block_xx_1(i) \ 00742 MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\ 00743 MOVE_to_REG(W_I_7, W_X[(i- 7)&15]) ;\ 00744 00745 #define Block_xx_2(i) \ 00746 MOVE_to_REG(W_I_2, W_X[(i- 2)&15]) ;\ 00747 MOVE_to_REG(W_I, W_X[(i)]) ;\ 00748 00749 #define Block_xx_3(i) \ 00750 s0_ (XMM_TEMP0, W_I_15) ;\ 00751 00752 #define Block_xx_4(i) \ 00753 ADD(W_I, W_I, XMM_TEMP0) ;\ 00754 ADD(W_I, W_I, W_I_7) ;\ 00755 00756 #define Block_xx_5(i) \ 00757 s1_ (XMM_TEMP0, W_I_2) ;\ 00758 00759 #define Block_xx_6(i) \ 00760 ADD(W_I, W_I, XMM_TEMP0) ;\ 00761 MOVE_to_MEM(W_X,i, W_I) ;\ 00762 if(i==0)\ 00763 MOVE_to_MEM(W_X,16, W_I) ;\ 00764 00765 #define Block_xx_7(i) \ 00766 MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\ 00767 MOVE_to_REG(W_I_7, W_X[(i- 7)&15]) ;\ 00768 00769 #define Block_xx_8(i) \ 00770 MOVE_to_REG(W_I_2, W_X[(i- 2)&15]) ;\ 00771 MOVE_to_REG(W_I, W_X[(i)]) ;\ 00772 00773 #define Block_xx_9(i) \ 00774 s0_ (XMM_TEMP0, W_I_15) ;\ 00775 00776 #define Block_xx_10(i) \ 00777 ADD(W_I, W_I, XMM_TEMP0) ;\ 00778 ADD(W_I, W_I, W_I_7) ;\ 00779 00780 #define Block_xx_11(i) \ 00781 s1_ (XMM_TEMP0, W_I_2) ;\ 00782 00783 #define Block_xx_12(i) \ 00784 ADD(W_I, W_I, XMM_TEMP0) ;\ 00785 MOVE_to_MEM(W_X,i, W_I) ;\ 00786 if((i)==0)\ 00787 MOVE_to_MEM(W_X,16, W_I) ;\ 00788 00789 static INLINE void Block_0_1(word64 *W_X) { Block_xx_1(0) ; } 00790 static INLINE void Block_0_2(word64 *W_X) { Block_xx_2(0) ; } 00791 static INLINE void Block_0_3(void) { Block_xx_3(0) ; } 00792 static INLINE void Block_0_4(void) { Block_xx_4(0) ; } 00793 static INLINE void Block_0_5(void) { Block_xx_5(0) ; } 00794 static INLINE void Block_0_6(word64 *W_X) { Block_xx_6(0) ; } 00795 static INLINE void Block_0_7(word64 *W_X) { Block_xx_7(2) ; } 00796 static INLINE void Block_0_8(word64 *W_X) { Block_xx_8(2) ; } 00797 static INLINE void Block_0_9(void) { Block_xx_9(2) ; } 00798 static INLINE void Block_0_10(void){ Block_xx_10(2) ; } 00799 static INLINE void Block_0_11(void){ Block_xx_11(2) ; } 00800 static INLINE void Block_0_12(word64 *W_X){ Block_xx_12(2) ; } 00801 00802 static INLINE void Block_4_1(word64 *W_X) { Block_xx_1(4) ; } 00803 static INLINE void Block_4_2(word64 *W_X) { Block_xx_2(4) ; } 00804 static INLINE void Block_4_3(void) { Block_xx_3(4) ; } 00805 static INLINE void Block_4_4(void) { Block_xx_4(4) ; } 00806 static INLINE void Block_4_5(void) { Block_xx_5(4) ; } 00807 static INLINE void Block_4_6(word64 *W_X) { Block_xx_6(4) ; } 00808 static INLINE void Block_4_7(word64 *W_X) { Block_xx_7(6) ; } 00809 static INLINE void Block_4_8(word64 *W_X) { Block_xx_8(6) ; } 00810 static INLINE void Block_4_9(void) { Block_xx_9(6) ; } 00811 static INLINE void Block_4_10(void){ Block_xx_10(6) ; } 00812 static INLINE void Block_4_11(void){ Block_xx_11(6) ; } 00813 static INLINE void Block_4_12(word64 *W_X){ Block_xx_12(6) ; } 00814 00815 static INLINE void Block_8_1(word64 *W_X) { Block_xx_1(8) ; } 00816 static INLINE void Block_8_2(word64 *W_X) { Block_xx_2(8) ; } 00817 static INLINE void Block_8_3(void) { Block_xx_3(8) ; } 00818 static INLINE void Block_8_4(void) { Block_xx_4(8) ; } 00819 static INLINE void Block_8_5(void) { Block_xx_5(8) ; } 00820 static INLINE void Block_8_6(word64 *W_X) { Block_xx_6(8) ; } 00821 static INLINE void Block_8_7(word64 *W_X) { Block_xx_7(10) ; } 00822 static INLINE void Block_8_8(word64 *W_X) { Block_xx_8(10) ; } 00823 static INLINE void Block_8_9(void) { Block_xx_9(10) ; } 00824 static INLINE void Block_8_10(void){ Block_xx_10(10) ; } 00825 static INLINE void Block_8_11(void){ Block_xx_11(10) ; } 00826 static INLINE void Block_8_12(word64 *W_X){ Block_xx_12(10) ; } 00827 00828 static INLINE void Block_12_1(word64 *W_X) { Block_xx_1(12) ; } 00829 static INLINE void Block_12_2(word64 *W_X) { Block_xx_2(12) ; } 00830 static INLINE void Block_12_3(void) { Block_xx_3(12) ; } 00831 static INLINE void Block_12_4(void) { Block_xx_4(12) ; } 00832 static INLINE void Block_12_5(void) { Block_xx_5(12) ; } 00833 static INLINE void Block_12_6(word64 *W_X) { Block_xx_6(12) ; } 00834 static INLINE void Block_12_7(word64 *W_X) { Block_xx_7(14) ; } 00835 static INLINE void Block_12_8(word64 *W_X) { Block_xx_8(14) ; } 00836 static INLINE void Block_12_9(void) { Block_xx_9(14) ; } 00837 static INLINE void Block_12_10(void){ Block_xx_10(14) ; } 00838 static INLINE void Block_12_11(void){ Block_xx_11(14) ; } 00839 static INLINE void Block_12_12(word64 *W_X){ Block_xx_12(14) ; } 00840 00841 #endif 00842 00843 #if defined(HAVE_INTEL_AVX2) 00844 static const unsigned long mBYTE_FLIP_MASK_Y[] = 00845 { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f } ; 00846 00847 #define W_from_buff_Y(buff)\ 00848 { /* X0..3(ymm9..12), W_X[0..15] = sha512->buffer[0.15]; */\ 00849 __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]):YMM_REGs) ;\ 00850 __asm__ volatile("vmovdqu %0, %%ymm12\n\t"\ 00851 "vmovdqu %1, %%ymm4\n\t"\ 00852 "vpshufb %%ymm8, %%ymm12, %%ymm12\n\t"\ 00853 "vpshufb %%ymm8, %%ymm4, %%ymm4\n\t"\ 00854 :: "m"(buff[0]), "m"(buff[4]):YMM_REGs) ;\ 00855 __asm__ volatile("vmovdqu %0, %%ymm5\n\t"\ 00856 "vmovdqu %1, %%ymm6\n\t"\ 00857 "vpshufb %%ymm8, %%ymm5, %%ymm5\n\t"\ 00858 "vpshufb %%ymm8, %%ymm6, %%ymm6\n\t"\ 00859 :: "m"(buff[8]), "m"(buff[12]):YMM_REGs) ;\ 00860 } 00861 00862 #if defined(DEBUG_YMM) 00863 00864 #define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0])::YMM_REGs); 00865 #define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]):YMM_REGs); 00866 00867 #define _DUMP_REG_Y(REG, name)\ 00868 { word64 buf[16] ;word64 reg[16][2];int k ;\ 00869 SAVE_REG_Y(4); SAVE_REG_Y(5); SAVE_REG_Y(6); SAVE_REG_Y(7); \ 00870 SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\ 00871 SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \ 00872 __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::YMM_REGs);\ 00873 printf(" "#name":\t") ; for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]) ; printf("\n") ; \ 00874 RECV_REG_Y(4); RECV_REG_Y(5); RECV_REG_Y(6); RECV_REG_Y(7); \ 00875 RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \ 00876 RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\ 00877 } 00878 00879 #define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG) 00880 #define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG) 00881 #define PRINTF_Y(fmt, ...) 00882 00883 #else 00884 00885 #define DUMP_REG_Y(REG) 00886 #define DUMP_REG2_Y(REG) 00887 #define PRINTF_Y(fmt, ...) 00888 00889 #endif 00890 00891 #define _MOVE_to_REGy(ymm, mem) __asm__ volatile("vmovdqu %0, %%"#ymm" "\ 00892 :: "m"(mem):YMM_REGs) ; 00893 #define _MOVE_to_MEMy(mem,i, ymm) __asm__ volatile("vmovdqu %%"#ymm", %0" \ 00894 : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::YMM_REGs) ; 00895 #define _MOVE_128y(ymm0, ymm1, ymm2, map) __asm__ volatile("vperm2i128 $"\ 00896 #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ; 00897 #define _S_TEMPy(dest, src, bits, temp) \ 00898 __asm__ volatile("vpsrlq $"#bits", %%"#src", %%"#dest"\n\tvpsllq $64-"#bits\ 00899 ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" ":::YMM_REGs) ; 00900 #define _AVX2_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\ 00901 #src", %%"#dest" ":::YMM_REGs) ; 00902 #define _XORy(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\ 00903 #src2", %%"#dest" ":::YMM_REGs) ; 00904 #define _ADDy(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\ 00905 #src2", %%"#dest" ":::YMM_REGs) ; 00906 #define _BLENDy(map, dest, src1, src2) __asm__ volatile("vpblendd $"#map", %%"\ 00907 #src1", %%"#src2", %%"#dest" ":::YMM_REGs) ; 00908 #define _BLENDQy(map, dest, src1, src2) __asm__ volatile("vblendpd $"#map", %%"\ 00909 #src1", %%"#src2", %%"#dest" ":::YMM_REGs) ; 00910 #define _PERMQy(map, dest, src) __asm__ volatile("vpermq $"#map", %%"\ 00911 #src", %%"#dest" ":::YMM_REGs) ; 00912 00913 #define MOVE_to_REGy(ymm, mem) _MOVE_to_REGy(ymm, mem) 00914 #define MOVE_to_MEMy(mem, i, ymm) _MOVE_to_MEMy(mem, i, ymm) 00915 00916 #define MOVE_128y(ymm0, ymm1, ymm2, map) _MOVE_128y(ymm0, ymm1, ymm2, map) 00917 #define XORy(dest, src1, src2) _XORy(dest, src1, src2) 00918 #define ADDy(dest, src1, src2) _ADDy(dest, src1, src2) 00919 #define BLENDy(map, dest, src1, src2) _BLENDy(map, dest, src1, src2) 00920 #define BLENDQy(map, dest, src1, src2) _BLENDQy(map, dest, src1, src2) 00921 #define PERMQy(map, dest, src) _PERMQy(map, dest, src) 00922 00923 00924 #define S_TMPy(dest, src, bits, temp) _S_TEMPy(dest, src, bits, temp); 00925 #define AVX2_S(dest, src, bits) S_TMPy(dest, src, bits, S_TEMPy) 00926 #define AVX2_R(dest, src, bits) _AVX2_R(dest, src, bits) 00927 00928 00929 #define FEEDBACK1_to_W_I_2(w_i_2, w_i) MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08) ;\ 00930 BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2) ; 00931 00932 #define MOVE_W_to_W_I_15(w_i_15, w_0, w_4) BLENDQy(0x1, w_i_15, w_4, w_0) ;\ 00933 PERMQy(0x39, w_i_15, w_i_15) ; 00934 #define MOVE_W_to_W_I_7(w_i_7, w_8, w_12) BLENDQy(0x1, w_i_7, w_12, w_8) ;\ 00935 PERMQy(0x39, w_i_7, w_i_7) ; 00936 #define MOVE_W_to_W_I_2(w_i_2, w_12) BLENDQy(0xc, w_i_2, w_12, w_i_2) ;\ 00937 PERMQy(0x0e, w_i_2, w_i_2) ; 00938 00939 00940 #define W_I_16y ymm8 00941 #define W_I_15y ymm9 00942 #define W_I_7y ymm10 00943 #define W_I_2y ymm11 00944 #define W_Iy ymm12 00945 #define G_TEMPy ymm13 00946 #define S_TEMPy ymm14 00947 #define YMM_TEMP0 ymm15 00948 #define YMM_TEMP0x xmm15 00949 #define W_I_TEMPy ymm7 00950 #define W_K_TEMPy ymm15 00951 #define W_K_TEMPx xmm15 00952 #define W_0y ymm12 00953 #define W_4y ymm4 00954 #define W_8y ymm5 00955 #define W_12y ymm6 00956 00957 #define YMM_REGs 00958 /* Registers are saved in Sha512Update/Final */ 00959 /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/ 00960 00961 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\ 00962 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\ 00963 __asm__ volatile("vpblendd $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\ 00964 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_7", %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\ 00965 __asm__ volatile("vpblendd $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\ 00966 __asm__ volatile("vpshufd $0x93, %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\ 00967 00968 #define MOVE_7_to_15(w_i_15, w_i_7)\ 00969 __asm__ volatile("vmovdqu %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\ 00970 00971 #define MOVE_I_to_7(w_i_7, w_i)\ 00972 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\ 00973 __asm__ volatile("vpblendd $0x01, %%"#w_i_7", %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\ 00974 __asm__ volatile("vpshufd $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\ 00975 00976 #define MOVE_I_to_2(w_i_2, w_i)\ 00977 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\ 00978 __asm__ volatile("vpshufd $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\ 00979 00980 #endif 00981 00982 00983 /*** Transform Body ***/ 00984 #if defined(HAVE_INTEL_AVX1) 00985 00986 static int Transform_AVX1(Sha512* sha512) 00987 { 00988 const word64* K = K512; 00989 word64 W_X[16+4]; 00990 word32 j; 00991 word64 T[8]; 00992 /* Copy digest to working vars */ 00993 XMEMCPY(T, sha512->digest, sizeof(T)); 00994 00995 W_from_buff(W_X, sha512->buffer) ; 00996 for (j = 0; j < 80; j += 16) { 00997 Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3(); 00998 Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X); 00999 Rx_1( 2); Block_0_7(W_X); Rx_2( 2); Block_0_8(W_X); Rx_3( 2); Block_0_9(); 01000 Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X); 01001 01002 Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3(); 01003 Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X); 01004 Rx_1( 6); Block_4_7(W_X); Rx_2( 6); Block_4_8(W_X); Rx_3( 6); Block_4_9(); 01005 Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X); 01006 01007 Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3(); 01008 Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X); 01009 Rx_1(10); Block_8_7(W_X); Rx_2(10); Block_8_8(W_X); Rx_3(10); Block_8_9(); 01010 Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X); 01011 01012 Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3(); 01013 Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X); 01014 Rx_1(14); Block_12_7(W_X); Rx_2(14); Block_12_8(W_X); Rx_3(14); Block_12_9(); 01015 Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X); 01016 } 01017 01018 /* Add the working vars back into digest */ 01019 01020 sha512->digest[0] += a(0); 01021 sha512->digest[1] += b(0); 01022 sha512->digest[2] += c(0); 01023 sha512->digest[3] += d(0); 01024 sha512->digest[4] += e(0); 01025 sha512->digest[5] += f(0); 01026 sha512->digest[6] += g(0); 01027 sha512->digest[7] += h(0); 01028 01029 /* Wipe variables */ 01030 #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2) 01031 XMEMSET(W_X, 0, sizeof(word64) * 16); 01032 #endif 01033 XMEMSET(T, 0, sizeof(T)); 01034 01035 return 0; 01036 } 01037 01038 #endif 01039 01040 #if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX) 01041 01042 static int Transform_AVX1_RORX(Sha512* sha512) 01043 { 01044 const word64* K = K512; 01045 word64 W_X[16+4]; 01046 word32 j; 01047 word64 T[8]; 01048 /* Copy digest to working vars */ 01049 XMEMCPY(T, sha512->digest, sizeof(T)); 01050 01051 W_from_buff(W_X, sha512->buffer) ; 01052 for (j = 0; j < 80; j += 16) { 01053 Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); Block_0_2(W_X); 01054 Rx_RORX_3( 0); Block_0_3(); 01055 Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1); Block_0_5(); 01056 Rx_RORX_3( 1); Block_0_6(W_X); 01057 Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2); Block_0_8(W_X); 01058 Rx_RORX_3( 2); Block_0_9(); 01059 Rx_RORX_1( 3); Block_0_10();Rx_RORX_2( 3); Block_0_11(); 01060 Rx_RORX_3( 3); Block_0_12(W_X); 01061 01062 Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4); Block_4_2(W_X); 01063 Rx_RORX_3( 4); Block_4_3(); 01064 Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5); Block_4_5(); 01065 Rx_RORX_3( 5); Block_4_6(W_X); 01066 Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6); Block_4_8(W_X); 01067 Rx_RORX_3( 6); Block_4_9(); 01068 Rx_RORX_1( 7); Block_4_10();Rx_RORX_2( 7); Block_4_11(); 01069 Rx_RORX_3( 7); Block_4_12(W_X); 01070 01071 Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8); Block_8_2(W_X); 01072 Rx_RORX_3( 8); Block_8_3(); 01073 Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9); Block_8_5(); 01074 Rx_RORX_3( 9); Block_8_6(W_X); 01075 Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10); Block_8_8(W_X); 01076 Rx_RORX_3(10); Block_8_9(); 01077 Rx_RORX_1(11); Block_8_10();Rx_RORX_2(11); Block_8_11(); 01078 Rx_RORX_3(11); Block_8_12(W_X); 01079 01080 Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12); Block_12_2(W_X); 01081 Rx_RORX_3(12); Block_12_3(); 01082 Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13); Block_12_5(); 01083 Rx_RORX_3(13); Block_12_6(W_X); 01084 Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14); Block_12_8(W_X); 01085 Rx_RORX_3(14); Block_12_9(); 01086 Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15); Block_12_11(); 01087 Rx_RORX_3(15); Block_12_12(W_X); 01088 } 01089 /* Add the working vars back into digest */ 01090 01091 sha512->digest[0] += a(0); 01092 sha512->digest[1] += b(0); 01093 sha512->digest[2] += c(0); 01094 sha512->digest[3] += d(0); 01095 sha512->digest[4] += e(0); 01096 sha512->digest[5] += f(0); 01097 sha512->digest[6] += g(0); 01098 sha512->digest[7] += h(0); 01099 01100 /* Wipe variables */ 01101 #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2) 01102 XMEMSET(W_X, 0, sizeof(word64) * 16); 01103 #endif 01104 XMEMSET(T, 0, sizeof(T)); 01105 01106 return 0; 01107 } 01108 #endif 01109 01110 #if defined(HAVE_INTEL_AVX2) 01111 01112 #define s0_1y(dest, src) AVX2_S(dest, src, 1); 01113 #define s0_2y(dest, src) AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest) ; 01114 #define s0_3y(dest, src) AVX2_R(G_TEMPy, src, 7); XORy(dest, G_TEMPy, dest) ; 01115 01116 #define s1_1y(dest, src) AVX2_S(dest, src, 19); 01117 #define s1_2y(dest, src) AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest) ; 01118 #define s1_3y(dest, src) AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest) ; 01119 01120 #define s0_y(dest, src) s0_1y(dest, src) ; s0_2y(dest, src) ; s0_3y(dest, src) 01121 #define s1_y(dest, src) s1_1y(dest, src) ; s1_2y(dest, src) ; s1_3y(dest, src) 01122 01123 #define blk384(i) (W[i] = sha384->buffer[i]) 01124 01125 01126 #define Block_Y_xx_1(i, w_0, w_4, w_8, w_12)\ 01127 MOVE_W_to_W_I_15(W_I_15y, w_0, w_4) ;\ 01128 MOVE_W_to_W_I_7 (W_I_7y, w_8, w_12) ;\ 01129 MOVE_W_to_W_I_2 (W_I_2y, w_12) ;\ 01130 01131 #define Block_Y_xx_2(i, w_0, w_4, w_8, w_12)\ 01132 s0_1y (YMM_TEMP0, W_I_15y) ;\ 01133 01134 #define Block_Y_xx_3(i, w_0, w_4, w_8, w_12)\ 01135 s0_2y (YMM_TEMP0, W_I_15y) ;\ 01136 01137 #define Block_Y_xx_4(i, w_0, w_4, w_8, w_12)\ 01138 s0_3y (YMM_TEMP0, W_I_15y) ;\ 01139 01140 #define Block_Y_xx_5(i, w_0, w_4, w_8, w_12)\ 01141 ADDy(W_I_TEMPy, w_0, YMM_TEMP0) ;\ 01142 01143 #define Block_Y_xx_6(i, w_0, w_4, w_8, w_12)\ 01144 ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y) ;\ 01145 s1_1y (YMM_TEMP0, W_I_2y) ;\ 01146 01147 #define Block_Y_xx_7(i, w_0, w_4, w_8, w_12)\ 01148 s1_2y (YMM_TEMP0, W_I_2y) ;\ 01149 01150 #define Block_Y_xx_8(i, w_0, w_4, w_8, w_12)\ 01151 s1_3y (YMM_TEMP0, W_I_2y) ;\ 01152 ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\ 01153 01154 #define Block_Y_xx_9(i, w_0, w_4, w_8, w_12)\ 01155 FEEDBACK1_to_W_I_2(W_I_2y, w_0) ;\ 01156 01157 #define Block_Y_xx_10(i, w_0, w_4, w_8, w_12) \ 01158 s1_1y (YMM_TEMP0, W_I_2y) ;\ 01159 01160 #define Block_Y_xx_11(i, w_0, w_4, w_8, w_12) \ 01161 s1_2y (YMM_TEMP0, W_I_2y) ;\ 01162 01163 #define Block_Y_xx_12(i, w_0, w_4, w_8, w_12)\ 01164 s1_3y (YMM_TEMP0, W_I_2y) ;\ 01165 ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\ 01166 MOVE_to_MEMy(w,0, w_4) ;\ 01167 01168 01169 static INLINE void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y) ; } 01170 static INLINE void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y) ; } 01171 static INLINE void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y) ; } 01172 static INLINE void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y) ; } 01173 static INLINE void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y) ; } 01174 static INLINE void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y) ; } 01175 static INLINE void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y) ; } 01176 static INLINE void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y) ; } 01177 static INLINE void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y) ; } 01178 static INLINE void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y) ; } 01179 static INLINE void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y) ; } 01180 static INLINE void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y) ; } 01181 01182 static INLINE void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y) ; } 01183 static INLINE void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y) ; } 01184 static INLINE void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y) ; } 01185 static INLINE void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y) ; } 01186 static INLINE void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y) ; } 01187 static INLINE void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y) ; } 01188 static INLINE void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y) ; } 01189 static INLINE void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y) ; } 01190 static INLINE void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y) ; } 01191 static INLINE void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y) ; } 01192 static INLINE void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y) ; } 01193 static INLINE void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y) ; } 01194 01195 static INLINE void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y) ; } 01196 static INLINE void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y) ; } 01197 static INLINE void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y) ; } 01198 static INLINE void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y) ; } 01199 static INLINE void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y) ; } 01200 static INLINE void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y) ; } 01201 static INLINE void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y) ; } 01202 static INLINE void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y) ; } 01203 static INLINE void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y) ; } 01204 static INLINE void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y) ; } 01205 static INLINE void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y) ; } 01206 static INLINE void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y) ; } 01207 01208 static INLINE void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y) ; } 01209 static INLINE void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y) ; } 01210 static INLINE void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y) ; } 01211 static INLINE void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y) ; } 01212 static INLINE void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y) ; } 01213 static INLINE void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y) ; } 01214 static INLINE void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y) ; } 01215 static INLINE void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y) ; } 01216 static INLINE void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y) ; } 01217 static INLINE void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y) ; } 01218 static INLINE void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y) ; } 01219 static INLINE void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y) ; } 01220 01221 01222 static int Transform_AVX2(Sha512* sha512) 01223 { 01224 const word64* K = K512; 01225 word64 w[4] ; 01226 word32 j /*, k*/; 01227 word64 T[8]; 01228 /* Copy digest to working vars */ 01229 XMEMCPY(T, sha512->digest, sizeof(T)); 01230 01231 W_from_buff_Y(sha512->buffer) ; 01232 MOVE_to_MEMy(w,0, W_0y) ; 01233 for (j = 0; j < 80; j += 16) { 01234 Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); Block_Y_0_2(); 01235 Ry_3( 0, w[0]); Block_Y_0_3(); 01236 Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]); Block_Y_0_5(); 01237 Ry_3( 1, w[1]); Block_Y_0_6(); 01238 Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]); Block_Y_0_8(); 01239 Ry_3( 2, w[2]); Block_Y_0_9(); 01240 Ry_1( 3, w[3]); Block_Y_0_10();Ry_2( 3, w[3]); Block_Y_0_11(); 01241 Ry_3( 3, w[3]); Block_Y_0_12(w); 01242 01243 Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]); Block_Y_4_2(); 01244 Ry_3( 4, w[0]); Block_Y_4_3(); 01245 Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]); Block_Y_4_5(); 01246 Ry_3( 5, w[1]); Block_Y_4_6(); 01247 Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]); Block_Y_4_8(); 01248 Ry_3( 6, w[2]); Block_Y_4_9(); 01249 Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]);Block_Y_4_11(); 01250 Ry_3( 7, w[3]);Block_Y_4_12(w); 01251 01252 Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]); Block_Y_8_2(); 01253 Ry_3( 8, w[0]); Block_Y_8_3(); 01254 Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]); Block_Y_8_5(); 01255 Ry_3( 9, w[1]); Block_Y_8_6(); 01256 Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]); Block_Y_8_8(); 01257 Ry_3(10, w[2]); Block_Y_8_9(); 01258 Ry_1(11, w[3]); Block_Y_8_10();Ry_2(11, w[3]); Block_Y_8_11(); 01259 Ry_3(11, w[3]); Block_Y_8_12(w); 01260 01261 Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]); Block_Y_12_2(); 01262 Ry_3(12, w[0]); Block_Y_12_3(); 01263 Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]); Block_Y_12_5(); 01264 Ry_3(13, w[1]); Block_Y_12_6(); 01265 Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]); Block_Y_12_8(); 01266 Ry_3(14, w[2]); Block_Y_12_9(); 01267 Ry_1(15, w[3]); Block_Y_12_10();Ry_2(15, w[3]); Block_Y_12_11(); 01268 Ry_3(15, w[3]);Block_Y_12_12(w); 01269 } 01270 01271 /* Add the working vars back into digest */ 01272 01273 sha512->digest[0] += a(0); 01274 sha512->digest[1] += b(0); 01275 sha512->digest[2] += c(0); 01276 sha512->digest[3] += d(0); 01277 sha512->digest[4] += e(0); 01278 sha512->digest[5] += f(0); 01279 sha512->digest[6] += g(0); 01280 sha512->digest[7] += h(0); 01281 01282 /* Wipe variables */ 01283 #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2) 01284 XMEMSET(W, 0, sizeof(word64) * 16); 01285 #endif 01286 XMEMSET(T, 0, sizeof(T)); 01287 01288 return 0; 01289 } 01290 01291 #endif 01292 01293 01294 #ifdef WOLFSSL_SHA384 01295 01296 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 01297 01298 #if defined(HAVE_INTEL_AVX1) 01299 static int Transform384_AVX1(Sha384 *sha384) ; 01300 #endif 01301 #if defined(HAVE_INTEL_AVX2) 01302 static int Transform384_AVX2(Sha384 *sha384) ; 01303 #endif 01304 01305 #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) &&defined(HAVE_INTEL_RORX) 01306 static int Transform384_AVX1_RORX(Sha384 *sha384) ; 01307 #endif 01308 01309 static int _Transform384(Sha384 *sha384) ; 01310 static int (*Transform384_p)(Sha384* sha384) = _Transform384 ; 01311 01312 #define Transform384(sha384) (*Transform384_p)(sha384) 01313 static void set_Transform384(void) { 01314 if(set_cpuid_flags(CHECK_SHA384))return ; 01315 01316 #if defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) 01317 Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ; 01318 #elif defined(HAVE_INTEL_AVX2) 01319 #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX) 01320 if(IS_INTEL_AVX2 && IS_INTEL_BMI2) { Transform384_p = Transform384_AVX1_RORX ; return ; } 01321 #endif 01322 if(IS_INTEL_AVX2) { Transform384_p = Transform384_AVX2 ; return ; } 01323 #if defined(HAVE_INTEL_AVX1) 01324 Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ; 01325 #endif 01326 #else 01327 Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ; 01328 #endif 01329 } 01330 01331 #else 01332 #define Transform384(sha512) _Transform384(sha512) 01333 #endif 01334 01335 int wc_InitSha384(Sha384* sha384) 01336 { 01337 sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8); 01338 sha384->digest[1] = W64LIT(0x629a292a367cd507); 01339 sha384->digest[2] = W64LIT(0x9159015a3070dd17); 01340 sha384->digest[3] = W64LIT(0x152fecd8f70e5939); 01341 sha384->digest[4] = W64LIT(0x67332667ffc00b31); 01342 sha384->digest[5] = W64LIT(0x8eb44a8768581511); 01343 sha384->digest[6] = W64LIT(0xdb0c2e0d64f98fa7); 01344 sha384->digest[7] = W64LIT(0x47b5481dbefa4fa4); 01345 01346 sha384->buffLen = 0; 01347 sha384->loLen = 0; 01348 sha384->hiLen = 0; 01349 01350 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) 01351 set_Transform384() ; 01352 #endif 01353 01354 return 0; 01355 } 01356 01357 static int _Transform384(Sha384* sha384) 01358 { 01359 const word64* K = K512; 01360 01361 word32 j; 01362 word64 T[8]; 01363 01364 #ifdef WOLFSSL_SMALL_STACK 01365 word64* W; 01366 01367 W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); 01368 if (W == NULL) 01369 return MEMORY_E; 01370 #else 01371 word64 W[16]; 01372 #endif 01373 01374 /* Copy digest to working vars */ 01375 XMEMCPY(T, sha384->digest, sizeof(T)); 01376 01377 #ifdef USE_SLOW_SHA2 01378 /* over twice as small, but 50% slower */ 01379 /* 80 operations, not unrolled */ 01380 for (j = 0; j < 80; j += 16) { 01381 int m; 01382 for (m = 0; m < 16; m++) { /* braces needed for macros {} */ 01383 R2(m); 01384 } 01385 } 01386 #else 01387 /* 80 operations, partially loop unrolled */ 01388 for (j = 0; j < 80; j += 16) { 01389 R2( 0); R2( 1); R2( 2); R2( 3); 01390 R2( 4); R2( 5); R2( 6); R2( 7); 01391 R2( 8); R2( 9); R2(10); R2(11); 01392 R2(12); R2(13); R2(14); R2(15); 01393 } 01394 #endif /* USE_SLOW_SHA2 */ 01395 01396 /* Add the working vars back into digest */ 01397 01398 sha384->digest[0] += a(0); 01399 sha384->digest[1] += b(0); 01400 sha384->digest[2] += c(0); 01401 sha384->digest[3] += d(0); 01402 sha384->digest[4] += e(0); 01403 sha384->digest[5] += f(0); 01404 sha384->digest[6] += g(0); 01405 sha384->digest[7] += h(0); 01406 01407 /* Wipe variables */ 01408 XMEMSET(W, 0, sizeof(word64) * 16); 01409 XMEMSET(T, 0, sizeof(T)); 01410 01411 #ifdef WOLFSSL_SMALL_STACK 01412 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER); 01413 #endif 01414 01415 return 0; 01416 } 01417 01418 static INLINE void AddLength384(Sha384* sha384, word32 len) 01419 { 01420 word32 tmp = sha384->loLen; 01421 if ( (sha384->loLen += len) < tmp) 01422 sha384->hiLen++; /* carry low to high */ 01423 } 01424 01425 int wc_Sha384Update(Sha384* sha384, const byte* data, word32 len) 01426 { 01427 /* do block size increments */ 01428 byte* local = (byte*)sha384->buffer; 01429 01430 SAVE_XMM_YMM ; /* for Intel AVX */ 01431 01432 while (len) { 01433 word32 add = min(len, SHA384_BLOCK_SIZE - sha384->buffLen); 01434 XMEMCPY(&local[sha384->buffLen], data, add); 01435 01436 sha384->buffLen += add; 01437 data += add; 01438 len -= add; 01439 01440 if (sha384->buffLen == SHA384_BLOCK_SIZE) { 01441 int ret; 01442 01443 #if defined(LITTLE_ENDIAN_ORDER) 01444 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 01445 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 01446 #endif 01447 ByteReverseWords64(sha384->buffer, sha384->buffer, 01448 SHA384_BLOCK_SIZE); 01449 #endif 01450 ret = Transform384(sha384); 01451 if (ret != 0) 01452 return ret; 01453 01454 AddLength384(sha384, SHA384_BLOCK_SIZE); 01455 sha384->buffLen = 0; 01456 } 01457 } 01458 return 0; 01459 } 01460 01461 01462 int wc_Sha384Final(Sha384* sha384, byte* hash) 01463 { 01464 byte* local = (byte*)sha384->buffer; 01465 int ret; 01466 01467 SAVE_XMM_YMM ; /* for Intel AVX */ 01468 AddLength384(sha384, sha384->buffLen); /* before adding pads */ 01469 01470 local[sha384->buffLen++] = 0x80; /* add 1 */ 01471 01472 /* pad with zeros */ 01473 if (sha384->buffLen > SHA384_PAD_SIZE) { 01474 XMEMSET(&local[sha384->buffLen], 0, SHA384_BLOCK_SIZE -sha384->buffLen); 01475 sha384->buffLen += SHA384_BLOCK_SIZE - sha384->buffLen; 01476 01477 #if defined(LITTLE_ENDIAN_ORDER) 01478 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 01479 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 01480 #endif 01481 ByteReverseWords64(sha384->buffer, sha384->buffer, 01482 SHA384_BLOCK_SIZE); 01483 #endif 01484 ret = Transform384(sha384); 01485 if (ret != 0) 01486 return ret; 01487 01488 sha384->buffLen = 0; 01489 } 01490 XMEMSET(&local[sha384->buffLen], 0, SHA384_PAD_SIZE - sha384->buffLen); 01491 01492 /* put lengths in bits */ 01493 sha384->hiLen = (sha384->loLen >> (8*sizeof(sha384->loLen) - 3)) + 01494 (sha384->hiLen << 3); 01495 sha384->loLen = sha384->loLen << 3; 01496 01497 /* store lengths */ 01498 #if defined(LITTLE_ENDIAN_ORDER) 01499 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 01500 if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 01501 #endif 01502 ByteReverseWords64(sha384->buffer, sha384->buffer, 01503 SHA384_BLOCK_SIZE); 01504 #endif 01505 /* ! length ordering dependent on digest endian type ! */ 01506 sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 2] = sha384->hiLen; 01507 sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 1] = sha384->loLen; 01508 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) 01509 if(IS_INTEL_AVX1 || IS_INTEL_AVX2) 01510 ByteReverseWords64(&(sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 2]), 01511 &(sha384->buffer[SHA384_BLOCK_SIZE / sizeof(word64) - 2]), 01512 SHA384_BLOCK_SIZE - SHA384_PAD_SIZE); 01513 #endif 01514 ret = Transform384(sha384); 01515 if (ret != 0) 01516 return ret; 01517 01518 #ifdef LITTLE_ENDIAN_ORDER 01519 ByteReverseWords64(sha384->digest, sha384->digest, SHA384_DIGEST_SIZE); 01520 #endif 01521 XMEMCPY(hash, sha384->digest, SHA384_DIGEST_SIZE); 01522 01523 return wc_InitSha384(sha384); /* reset state */ 01524 } 01525 01526 01527 01528 #if defined(HAVE_INTEL_AVX1) 01529 01530 static int Transform384_AVX1(Sha384* sha384) 01531 { 01532 const word64* K = K512; 01533 word64 W_X[16+4]; 01534 word32 j; 01535 word64 T[8]; 01536 01537 /* Copy digest to working vars */ 01538 XMEMCPY(T, sha384->digest, sizeof(T)); 01539 W_from_buff(W_X, sha384->buffer) ; 01540 for (j = 0; j < 80; j += 16) { 01541 Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3(); 01542 Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X); 01543 Rx_1( 2); Block_0_7(W_X); Rx_2( 2); Block_0_8(W_X); Rx_3( 2); Block_0_9(); 01544 Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X); 01545 01546 Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3(); 01547 Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X); 01548 Rx_1( 6); Block_4_7(W_X); Rx_2( 6); Block_4_8(W_X); Rx_3( 6); Block_4_9(); 01549 Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X); 01550 01551 Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3(); 01552 Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X); 01553 Rx_1(10); Block_8_7(W_X); Rx_2(10); Block_8_8(W_X); Rx_3(10); Block_8_9(); 01554 Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X); 01555 01556 Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3(); 01557 Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X); 01558 Rx_1(14); Block_12_7(W_X); Rx_2(14); Block_12_8(W_X); Rx_3(14); Block_12_9(); 01559 Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X); 01560 } 01561 01562 /* Add the working vars back into digest */ 01563 01564 sha384->digest[0] += a(0); 01565 sha384->digest[1] += b(0); 01566 sha384->digest[2] += c(0); 01567 sha384->digest[3] += d(0); 01568 sha384->digest[4] += e(0); 01569 sha384->digest[5] += f(0); 01570 sha384->digest[6] += g(0); 01571 sha384->digest[7] += h(0); 01572 01573 /* Wipe variables */ 01574 #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2) 01575 XMEMSET(W, 0, sizeof(word64) * 16); 01576 #endif 01577 XMEMSET(T, 0, sizeof(T)); 01578 01579 return 0; 01580 } 01581 01582 #endif 01583 01584 #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX) 01585 static int Transform384_AVX1_RORX(Sha384* sha384) 01586 { 01587 const word64* K = K512; 01588 word64 W_X[16+4]; 01589 word32 j; 01590 word64 T[8]; 01591 01592 /* Copy digest to working vars */ 01593 XMEMCPY(T, sha384->digest, sizeof(T)); 01594 01595 W_from_buff(W_X, sha384->buffer) ; 01596 for (j = 0; j < 80; j += 16) { 01597 Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); 01598 Block_0_2(W_X); Rx_RORX_3( 0); Block_0_3(); 01599 Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1); 01600 Block_0_5(); Rx_RORX_3( 1); Block_0_6(W_X); 01601 Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2); 01602 Block_0_8(W_X); Rx_RORX_3( 2); Block_0_9(); 01603 Rx_RORX_1( 3); Block_0_10();Rx_RORX_2( 3); 01604 Block_0_11();Rx_RORX_3( 3); Block_0_12(W_X); 01605 01606 Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4); 01607 Block_4_2(W_X); Rx_RORX_3( 4); Block_4_3(); 01608 Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5); 01609 Block_4_5(); Rx_RORX_3( 5); Block_4_6(W_X); 01610 Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6); 01611 Block_4_8(W_X); Rx_RORX_3( 6); Block_4_9(); 01612 Rx_RORX_1( 7); Block_4_10();Rx_RORX_2( 7); 01613 Block_4_11();Rx_RORX_3( 7); Block_4_12(W_X); 01614 01615 Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8); 01616 Block_8_2(W_X); Rx_RORX_3( 8); Block_8_3(); 01617 Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9); 01618 Block_8_5(); Rx_RORX_3( 9); Block_8_6(W_X); 01619 Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10); 01620 Block_8_8(W_X); Rx_RORX_3(10); Block_8_9(); 01621 Rx_RORX_1(11); Block_8_10();Rx_RORX_2(11); 01622 Block_8_11();Rx_RORX_3(11); Block_8_12(W_X); 01623 01624 Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12); 01625 Block_12_2(W_X); Rx_RORX_3(12); Block_12_3(); 01626 Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13); 01627 Block_12_5(); Rx_RORX_3(13); Block_12_6(W_X); 01628 Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14); 01629 Block_12_8(W_X); Rx_RORX_3(14); Block_12_9(); 01630 Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15); 01631 Block_12_11();Rx_RORX_3(15); Block_12_12(W_X); 01632 } 01633 01634 /* Add the working vars back into digest */ 01635 01636 sha384->digest[0] += a(0); 01637 sha384->digest[1] += b(0); 01638 sha384->digest[2] += c(0); 01639 sha384->digest[3] += d(0); 01640 sha384->digest[4] += e(0); 01641 sha384->digest[5] += f(0); 01642 sha384->digest[6] += g(0); 01643 sha384->digest[7] += h(0); 01644 01645 /* Wipe variables */ 01646 #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2) 01647 XMEMSET(W, 0, sizeof(word64) * 16); 01648 #endif 01649 XMEMSET(T, 0, sizeof(T)); 01650 01651 return 0; 01652 } 01653 #endif 01654 01655 #if defined(HAVE_INTEL_AVX2) 01656 01657 static int Transform384_AVX2(Sha384* sha384) 01658 { 01659 const word64* K = K512; 01660 word64 w[4] ; 01661 word32 j; 01662 word64 T[8]; 01663 01664 /* Copy digest to working vars */ 01665 XMEMCPY(T, sha384->digest, sizeof(T)); 01666 01667 /* over twice as small, but 50% slower */ 01668 /* 80 operations, not unrolled */ 01669 01670 W_from_buff_Y(sha384->buffer) ; 01671 01672 MOVE_to_MEMy(w,0, W_0y) ; 01673 for (j = 0; j < 80; j += 16) { 01674 Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); 01675 Block_Y_0_2(); Ry_3( 0, w[0]); Block_Y_0_3(); 01676 Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]); 01677 Block_Y_0_5(); Ry_3( 1, w[1]); Block_Y_0_6(); 01678 Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]); 01679 Block_Y_0_8(); Ry_3( 2, w[2]); Block_Y_0_9(); 01680 Ry_1( 3, w[3]); Block_Y_0_10();Ry_2( 3, w[3]); 01681 Block_Y_0_11();Ry_3( 3, w[3]); Block_Y_0_12(w); 01682 01683 Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]); 01684 Block_Y_4_2(); Ry_3( 4, w[0]); Block_Y_4_3(); 01685 Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]); 01686 Block_Y_4_5(); Ry_3( 5, w[1]); Block_Y_4_6(); 01687 Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]); 01688 Block_Y_4_8(); Ry_3( 6, w[2]); Block_Y_4_9(); 01689 Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]); 01690 Block_Y_4_11(); Ry_3( 7, w[3]);Block_Y_4_12(w); 01691 01692 Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]); 01693 Block_Y_8_2(); Ry_3( 8, w[0]); Block_Y_8_3(); 01694 Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]); 01695 Block_Y_8_5(); Ry_3( 9, w[1]); Block_Y_8_6(); 01696 Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]); 01697 Block_Y_8_8(); Ry_3(10, w[2]); Block_Y_8_9(); 01698 Ry_1(11, w[3]); Block_Y_8_10();Ry_2(11, w[3]); 01699 Block_Y_8_11();Ry_3(11, w[3]); Block_Y_8_12(w); 01700 01701 Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]); 01702 Block_Y_12_2(); Ry_3(12, w[0]); Block_Y_12_3(); 01703 Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]); 01704 Block_Y_12_5(); Ry_3(13, w[1]); Block_Y_12_6(); 01705 Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]); 01706 Block_Y_12_8(); Ry_3(14, w[2]); Block_Y_12_9(); 01707 Ry_1(15, w[3]); Block_Y_12_10();Ry_2(15, w[3]); 01708 Block_Y_12_11();Ry_3(15, w[3]); Block_Y_12_12(w); 01709 } 01710 01711 /* Add the working vars back into digest */ 01712 01713 sha384->digest[0] += a(0); 01714 sha384->digest[1] += b(0); 01715 sha384->digest[2] += c(0); 01716 sha384->digest[3] += d(0); 01717 sha384->digest[4] += e(0); 01718 sha384->digest[5] += f(0); 01719 sha384->digest[6] += g(0); 01720 sha384->digest[7] += h(0); 01721 01722 /* Wipe variables */ 01723 XMEMSET(T, 0, sizeof(T)); 01724 01725 return 0; 01726 } 01727 01728 #endif 01729 01730 #endif /* WOLFSSL_SHA384 */ 01731 01732 #endif /* HAVE_FIPS */ 01733 01734 #endif /* WOLFSSL_SHA512 */ 01735 01736
Generated on Tue Jul 12 2022 15:55:20 by
1.7.2