Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
aes.c
00001 /* aes.c 00002 * 00003 * Copyright (C) 2006-2017 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 00023 #ifdef HAVE_CONFIG_H 00024 #include <config.h> 00025 #endif 00026 00027 #include <wolfcrypt/settings.h> 00028 #include <wolfcrypt/error-crypt.h> 00029 00030 #if !defined(NO_AES) 00031 00032 #if defined(HAVE_FIPS) && \ 00033 defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) 00034 00035 /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ 00036 #define FIPS_NO_WRAPPERS 00037 00038 #ifdef USE_WINDOWS_API 00039 #pragma code_seg(".fipsA$g") 00040 #pragma const_seg(".fipsB$g") 00041 #endif 00042 #endif 00043 00044 #include <wolfcrypt/aes.h> 00045 #include <wolfcrypt/cpuid.h> 00046 00047 00048 /* fips wrapper calls, user can call direct */ 00049 #if defined(HAVE_FIPS) && \ 00050 (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) 00051 00052 int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv, 00053 int dir) 00054 { 00055 if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) { 00056 return BAD_FUNC_ARG; 00057 } 00058 00059 return AesSetKey_fips(aes, key, len, iv, dir); 00060 } 00061 int wc_AesSetIV(Aes* aes, const byte* iv) 00062 { 00063 if (aes == NULL) { 00064 return BAD_FUNC_ARG; 00065 } 00066 00067 return AesSetIV_fips(aes, iv); 00068 } 00069 #ifdef HAVE_AES_CBC 00070 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 00071 { 00072 if (aes == NULL || out == NULL || in == NULL) { 00073 return BAD_FUNC_ARG; 00074 } 00075 00076 return AesCbcEncrypt_fips(aes, out, in, sz); 00077 } 00078 #ifdef HAVE_AES_DECRYPT 00079 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 00080 { 00081 if (aes == NULL || out == NULL || in == NULL 00082 || sz % AES_BLOCK_SIZE != 0) { 00083 return BAD_FUNC_ARG; 00084 } 00085 00086 return AesCbcDecrypt_fips(aes, out, in, sz); 00087 } 00088 #endif /* HAVE_AES_DECRYPT */ 00089 #endif /* HAVE_AES_CBC */ 00090 00091 /* AES-CTR */ 00092 #ifdef WOLFSSL_AES_COUNTER 00093 int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 00094 { 00095 if (aes == NULL || out == NULL || in == NULL) { 00096 return BAD_FUNC_ARG; 00097 } 00098 00099 return AesCtrEncrypt(aes, out, in, sz); 00100 } 00101 #endif 00102 00103 /* AES-DIRECT */ 00104 #if defined(WOLFSSL_AES_DIRECT) 00105 void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) 00106 { 00107 AesEncryptDirect(aes, out, in); 00108 } 00109 00110 #ifdef HAVE_AES_DECRYPT 00111 void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) 00112 { 00113 AesDecryptDirect(aes, out, in); 00114 } 00115 #endif /* HAVE_AES_DECRYPT */ 00116 00117 int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len, 00118 const byte* iv, int dir) 00119 { 00120 return AesSetKeyDirect(aes, key, len, iv, dir); 00121 } 00122 #endif /* WOLFSSL_AES_DIRECT */ 00123 00124 /* AES-GCM */ 00125 #ifdef HAVE_AESGCM 00126 int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) 00127 { 00128 if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) { 00129 return BAD_FUNC_ARG; 00130 } 00131 00132 return AesGcmSetKey_fips(aes, key, len); 00133 } 00134 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, 00135 const byte* iv, word32 ivSz, 00136 byte* authTag, word32 authTagSz, 00137 const byte* authIn, word32 authInSz) 00138 { 00139 if (aes == NULL || authTagSz > AES_BLOCK_SIZE 00140 || authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || 00141 ivSz > AES_BLOCK_SIZE) { 00142 return BAD_FUNC_ARG; 00143 } 00144 00145 return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag, 00146 authTagSz, authIn, authInSz); 00147 } 00148 00149 #ifdef HAVE_AES_DECRYPT 00150 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, 00151 const byte* iv, word32 ivSz, 00152 const byte* authTag, word32 authTagSz, 00153 const byte* authIn, word32 authInSz) 00154 { 00155 if (aes == NULL || out == NULL || in == NULL || iv == NULL 00156 || authTag == NULL || authTagSz > AES_BLOCK_SIZE || 00157 ivSz > AES_BLOCK_SIZE) { 00158 return BAD_FUNC_ARG; 00159 } 00160 00161 return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag, 00162 authTagSz, authIn, authInSz); 00163 } 00164 #endif /* HAVE_AES_DECRYPT */ 00165 00166 int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) 00167 { 00168 if (gmac == NULL || key == NULL || !((len == 16) || 00169 (len == 24) || (len == 32)) ) { 00170 return BAD_FUNC_ARG; 00171 } 00172 00173 return GmacSetKey(gmac, key, len); 00174 } 00175 int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, 00176 const byte* authIn, word32 authInSz, 00177 byte* authTag, word32 authTagSz) 00178 { 00179 if (gmac == NULL || authTagSz > AES_BLOCK_SIZE || 00180 authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { 00181 return BAD_FUNC_ARG; 00182 } 00183 00184 return GmacUpdate(gmac, iv, ivSz, authIn, authInSz, 00185 authTag, authTagSz); 00186 } 00187 #endif /* HAVE_AESGCM */ 00188 00189 /* AES-CCM */ 00190 #if defined(HAVE_AESCCM) && \ 00191 defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) 00192 int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) 00193 { 00194 return AesCcmSetKey(aes, key, keySz); 00195 } 00196 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, 00197 const byte* nonce, word32 nonceSz, 00198 byte* authTag, word32 authTagSz, 00199 const byte* authIn, word32 authInSz) 00200 { 00201 /* sanity check on arguments */ 00202 if (aes == NULL || out == NULL || in == NULL || nonce == NULL 00203 || authTag == NULL || nonceSz < 7 || nonceSz > 13) 00204 return BAD_FUNC_ARG; 00205 00206 AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, 00207 authTagSz, authIn, authInSz); 00208 return 0; 00209 } 00210 00211 #ifdef HAVE_AES_DECRYPT 00212 int wc_AesCcmDecrypt(Aes* aes, byte* out, 00213 const byte* in, word32 inSz, 00214 const byte* nonce, word32 nonceSz, 00215 const byte* authTag, word32 authTagSz, 00216 const byte* authIn, word32 authInSz) 00217 { 00218 00219 if (aes == NULL || out == NULL || in == NULL || nonce == NULL 00220 || authTag == NULL || nonceSz < 7 || nonceSz > 13) { 00221 return BAD_FUNC_ARG; 00222 } 00223 00224 return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz, 00225 authTag, authTagSz, authIn, authInSz); 00226 } 00227 #endif /* HAVE_AES_DECRYPT */ 00228 #endif /* HAVE_AESCCM && HAVE_FIPS_VERSION 2 */ 00229 00230 int wc_AesInit(Aes* aes, void* h, int i) 00231 { 00232 (void)aes; 00233 (void)h; 00234 (void)i; 00235 /* FIPS doesn't support: 00236 return AesInit(aes, h, i); */ 00237 return 0; 00238 } 00239 void wc_AesFree(Aes* aes) 00240 { 00241 (void)aes; 00242 /* FIPS doesn't support: 00243 AesFree(aes); */ 00244 } 00245 00246 #else /* else build without fips, or for FIPS v2 */ 00247 00248 00249 #if defined(WOLFSSL_TI_CRYPT) 00250 #include <wolfcrypt/src/port/ti/ti-aes.c> 00251 #else 00252 00253 #include <wolfcrypt/logging.h> 00254 00255 #ifdef NO_INLINE 00256 #include <wolfcrypt/misc.h> 00257 #else 00258 #define WOLFSSL_MISC_INCLUDED 00259 #include <wolfcrypt/src/misc.c> 00260 #endif 00261 00262 #if !defined(WOLFSSL_ARMASM) 00263 00264 #ifdef WOLFSSL_IMX6_CAAM_BLOB 00265 /* case of possibly not using hardware acceleration for AES but using key 00266 blobs */ 00267 #include <wolfcrypt/port/caam/wolfcaam.h> 00268 #endif 00269 00270 #ifdef DEBUG_AESNI 00271 #include <stdio.h> 00272 #endif 00273 00274 #ifdef _MSC_VER 00275 /* 4127 warning constant while(1) */ 00276 #pragma warning(disable: 4127) 00277 #endif 00278 00279 00280 /* Define AES implementation includes and functions */ 00281 #if defined(STM32_CRYPTO) 00282 /* STM32F2/F4 hardware AES support for CBC, CTR modes */ 00283 00284 #ifdef WOLFSSL_STM32L4 00285 #define CRYP AES 00286 #endif 00287 00288 /* CRYPT_AES_GCM starts the IV with 2 */ 00289 #define STM32_GCM_IV_START 2 00290 00291 #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) || defined(HAVE_AESCCM) 00292 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00293 { 00294 int ret = 0; 00295 #ifdef WOLFSSL_STM32_CUBEMX 00296 CRYP_HandleTypeDef hcryp; 00297 00298 XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); 00299 switch(aes->rounds) { 00300 case 10: /* 128-bit key */ 00301 hcryp.Init.KeySize = CRYP_KEYSIZE_128B; 00302 break; 00303 #ifdef CRYP_KEYSIZE_192B 00304 case 12: /* 192-bit key */ 00305 hcryp.Init.KeySize = CRYP_KEYSIZE_192B; 00306 break; 00307 #endif 00308 case 14: /* 256-bit key */ 00309 hcryp.Init.KeySize = CRYP_KEYSIZE_256B; 00310 break; 00311 default: 00312 break; 00313 } 00314 hcryp.Instance = CRYP; 00315 hcryp.Init.DataType = CRYP_DATATYPE_8B; 00316 hcryp.Init.pKey = (uint8_t*)aes->key; 00317 00318 HAL_CRYP_Init(&hcryp); 00319 00320 if (HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, 00321 outBlock, STM32_HAL_TIMEOUT) != HAL_OK) { 00322 ret = WC_TIMEOUT_E; 00323 } 00324 00325 HAL_CRYP_DeInit(&hcryp); 00326 #else 00327 word32 *enc_key; 00328 CRYP_InitTypeDef AES_CRYP_InitStructure; 00329 CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; 00330 00331 enc_key = aes->key; 00332 00333 /* crypto structure initialization */ 00334 CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); 00335 CRYP_StructInit(&AES_CRYP_InitStructure); 00336 00337 /* reset registers to their default values */ 00338 CRYP_DeInit(); 00339 00340 /* load key into correct registers */ 00341 switch (aes->rounds) { 00342 case 10: /* 128-bit key */ 00343 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; 00344 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; 00345 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; 00346 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; 00347 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; 00348 break; 00349 00350 case 12: /* 192-bit key */ 00351 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; 00352 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; 00353 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; 00354 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; 00355 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; 00356 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; 00357 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; 00358 break; 00359 00360 case 14: /* 256-bit key */ 00361 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; 00362 AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; 00363 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; 00364 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; 00365 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; 00366 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; 00367 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; 00368 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; 00369 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; 00370 break; 00371 00372 default: 00373 break; 00374 } 00375 CRYP_KeyInit(&AES_CRYP_KeyInitStructure); 00376 00377 /* set direction, mode, and datatype */ 00378 AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; 00379 AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; 00380 AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; 00381 CRYP_Init(&AES_CRYP_InitStructure); 00382 00383 /* enable crypto processor */ 00384 CRYP_Cmd(ENABLE); 00385 00386 /* flush IN/OUT FIFOs */ 00387 CRYP_FIFOFlush(); 00388 00389 CRYP_DataIn(*(uint32_t*)&inBlock[0]); 00390 CRYP_DataIn(*(uint32_t*)&inBlock[4]); 00391 CRYP_DataIn(*(uint32_t*)&inBlock[8]); 00392 CRYP_DataIn(*(uint32_t*)&inBlock[12]); 00393 00394 /* wait until the complete message has been processed */ 00395 while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} 00396 00397 *(uint32_t*)&outBlock[0] = CRYP_DataOut(); 00398 *(uint32_t*)&outBlock[4] = CRYP_DataOut(); 00399 *(uint32_t*)&outBlock[8] = CRYP_DataOut(); 00400 *(uint32_t*)&outBlock[12] = CRYP_DataOut(); 00401 00402 /* disable crypto processor */ 00403 CRYP_Cmd(DISABLE); 00404 #endif /* WOLFSSL_STM32_CUBEMX */ 00405 return ret; 00406 } 00407 #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ 00408 00409 #ifdef HAVE_AES_DECRYPT 00410 #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) 00411 static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00412 { 00413 int ret = 0; 00414 #ifdef WOLFSSL_STM32_CUBEMX 00415 CRYP_HandleTypeDef hcryp; 00416 00417 XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); 00418 switch(aes->rounds) { 00419 case 10: /* 128-bit key */ 00420 hcryp.Init.KeySize = CRYP_KEYSIZE_128B; 00421 break; 00422 #ifdef CRYP_KEYSIZE_192B 00423 case 12: /* 192-bit key */ 00424 hcryp.Init.KeySize = CRYP_KEYSIZE_192B; 00425 break; 00426 #endif 00427 case 14: /* 256-bit key */ 00428 hcryp.Init.KeySize = CRYP_KEYSIZE_256B; 00429 break; 00430 default: 00431 break; 00432 } 00433 hcryp.Instance = CRYP; 00434 hcryp.Init.DataType = CRYP_DATATYPE_8B; 00435 hcryp.Init.pKey = (uint8_t*)aes->key; 00436 00437 HAL_CRYP_Init(&hcryp); 00438 00439 if (HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, 00440 outBlock, STM32_HAL_TIMEOUT) != HAL_OK) { 00441 ret = WC_TIMEOUT_E; 00442 } 00443 00444 HAL_CRYP_DeInit(&hcryp); 00445 #else 00446 word32 *enc_key; 00447 CRYP_InitTypeDef AES_CRYP_InitStructure; 00448 CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; 00449 00450 enc_key = aes->key; 00451 00452 /* crypto structure initialization */ 00453 CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); 00454 CRYP_StructInit(&AES_CRYP_InitStructure); 00455 00456 /* reset registers to their default values */ 00457 CRYP_DeInit(); 00458 00459 /* load key into correct registers */ 00460 switch (aes->rounds) { 00461 case 10: /* 128-bit key */ 00462 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; 00463 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; 00464 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; 00465 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; 00466 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; 00467 break; 00468 00469 case 12: /* 192-bit key */ 00470 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; 00471 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; 00472 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; 00473 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; 00474 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; 00475 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; 00476 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; 00477 break; 00478 00479 case 14: /* 256-bit key */ 00480 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; 00481 AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; 00482 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; 00483 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; 00484 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; 00485 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; 00486 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; 00487 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; 00488 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; 00489 break; 00490 00491 default: 00492 break; 00493 } 00494 CRYP_KeyInit(&AES_CRYP_KeyInitStructure); 00495 00496 /* set direction, key, and datatype */ 00497 AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; 00498 AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; 00499 AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; 00500 CRYP_Init(&AES_CRYP_InitStructure); 00501 00502 /* enable crypto processor */ 00503 CRYP_Cmd(ENABLE); 00504 00505 /* wait until decrypt key has been intialized */ 00506 while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} 00507 00508 /* set direction, mode, and datatype */ 00509 AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; 00510 AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; 00511 AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; 00512 CRYP_Init(&AES_CRYP_InitStructure); 00513 00514 /* enable crypto processor */ 00515 CRYP_Cmd(ENABLE); 00516 00517 /* flush IN/OUT FIFOs */ 00518 CRYP_FIFOFlush(); 00519 00520 CRYP_DataIn(*(uint32_t*)&inBlock[0]); 00521 CRYP_DataIn(*(uint32_t*)&inBlock[4]); 00522 CRYP_DataIn(*(uint32_t*)&inBlock[8]); 00523 CRYP_DataIn(*(uint32_t*)&inBlock[12]); 00524 00525 /* wait until the complete message has been processed */ 00526 while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} 00527 00528 *(uint32_t*)&outBlock[0] = CRYP_DataOut(); 00529 *(uint32_t*)&outBlock[4] = CRYP_DataOut(); 00530 *(uint32_t*)&outBlock[8] = CRYP_DataOut(); 00531 *(uint32_t*)&outBlock[12] = CRYP_DataOut(); 00532 00533 /* disable crypto processor */ 00534 CRYP_Cmd(DISABLE); 00535 #endif /* WOLFSSL_STM32_CUBEMX */ 00536 return ret; 00537 } 00538 #endif /* WOLFSSL_AES_DIRECT || HAVE_AESCCM */ 00539 #endif /* HAVE_AES_DECRYPT */ 00540 00541 #elif defined(HAVE_COLDFIRE_SEC) 00542 /* Freescale Coldfire SEC support for CBC mode. 00543 * NOTE: no support for AES-CTR/GCM/CCM/Direct */ 00544 #include <wolfssl/wolfcrypt/types.h> 00545 #include "sec.h" 00546 #include "mcf5475_sec.h" 00547 #include "mcf5475_siu.h" 00548 #elif defined(FREESCALE_LTC) 00549 #include "fsl_ltc.h" 00550 #if defined(FREESCALE_LTC_AES_GCM) 00551 #undef NEED_AES_TABLES 00552 #undef GCM_TABLE 00553 #else 00554 /* if LTC doesn't have GCM, use software with LTC AES ECB mode */ 00555 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00556 { 00557 wc_AesEncryptDirect(aes, outBlock, inBlock); 00558 return 0; 00559 } 00560 static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00561 { 00562 wc_AesDecryptDirect(aes, outBlock, inBlock); 00563 return 0; 00564 } 00565 #endif 00566 #elif defined(FREESCALE_MMCAU) 00567 /* Freescale mmCAU hardware AES support for Direct, CBC, CCM, GCM modes 00568 * through the CAU/mmCAU library. Documentation located in 00569 * ColdFire/ColdFire+ CAU and Kinetis mmCAU Software Library User 00570 * Guide (See note in README). */ 00571 #ifdef FREESCALE_MMCAU_CLASSIC 00572 /* MMCAU 1.4 library used with non-KSDK / classic MQX builds */ 00573 #include "cau_api.h" 00574 #else 00575 #include "fsl_mmcau.h" 00576 #endif 00577 00578 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00579 { 00580 int ret; 00581 00582 #ifdef FREESCALE_MMCAU_CLASSIC 00583 if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) { 00584 WOLFSSL_MSG("Bad cau_aes_encrypt alignment"); 00585 return BAD_ALIGN_E; 00586 } 00587 #endif 00588 00589 ret = wolfSSL_CryptHwMutexLock(); 00590 if(ret == 0) { 00591 #ifdef FREESCALE_MMCAU_CLASSIC 00592 cau_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); 00593 #else 00594 MMCAU_AES_EncryptEcb(inBlock, (byte*)aes->key, aes->rounds, 00595 outBlock); 00596 #endif 00597 wolfSSL_CryptHwMutexUnLock(); 00598 } 00599 return ret; 00600 } 00601 #ifdef HAVE_AES_DECRYPT 00602 static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00603 { 00604 int ret; 00605 00606 #ifdef FREESCALE_MMCAU_CLASSIC 00607 if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) { 00608 WOLFSSL_MSG("Bad cau_aes_decrypt alignment"); 00609 return BAD_ALIGN_E; 00610 } 00611 #endif 00612 00613 ret = wolfSSL_CryptHwMutexLock(); 00614 if(ret == 0) { 00615 #ifdef FREESCALE_MMCAU_CLASSIC 00616 cau_aes_decrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); 00617 #else 00618 MMCAU_AES_DecryptEcb(inBlock, (byte*)aes->key, aes->rounds, 00619 outBlock); 00620 #endif 00621 wolfSSL_CryptHwMutexUnLock(); 00622 } 00623 return ret; 00624 } 00625 #endif /* HAVE_AES_DECRYPT */ 00626 00627 #elif defined(WOLFSSL_PIC32MZ_CRYPT) 00628 00629 #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h> 00630 00631 #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) 00632 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00633 { 00634 return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0, 00635 outBlock, inBlock, AES_BLOCK_SIZE, 00636 PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB); 00637 } 00638 #endif 00639 00640 #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) 00641 static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00642 { 00643 return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0, 00644 outBlock, inBlock, AES_BLOCK_SIZE, 00645 PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB); 00646 } 00647 #endif 00648 00649 #elif defined(WOLFSSL_NRF51_AES) 00650 /* Use built-in AES hardware - AES 128 ECB Encrypt Only */ 00651 #include "wolfssl/wolfcrypt/port/nrf51.h" 00652 00653 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00654 { 00655 return nrf51_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); 00656 } 00657 00658 #ifdef HAVE_AES_DECRYPT 00659 #error nRF51 AES Hardware does not support decrypt 00660 #endif /* HAVE_AES_DECRYPT */ 00661 00662 00663 #elif defined(WOLFSSL_AESNI) 00664 00665 #define NEED_AES_TABLES 00666 00667 /* Each platform needs to query info type 1 from cpuid to see if aesni is 00668 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts 00669 */ 00670 00671 #ifndef AESNI_ALIGN 00672 #define AESNI_ALIGN 16 00673 #endif 00674 00675 #ifndef _MSC_VER 00676 #define XASM_LINK(f) asm(f) 00677 #else 00678 #define XASM_LINK(f) 00679 #endif /* _MSC_VER */ 00680 00681 static int checkAESNI = 0; 00682 static int haveAESNI = 0; 00683 static word32 intel_flags = 0; 00684 00685 static int Check_CPU_support_AES(void) 00686 { 00687 intel_flags = cpuid_get_flags(); 00688 00689 return IS_INTEL_AESNI(intel_flags) != 0; 00690 } 00691 00692 00693 /* tell C compiler these are asm functions in case any mix up of ABI underscore 00694 prefix between clang/gcc/llvm etc */ 00695 #ifdef HAVE_AES_CBC 00696 void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, 00697 unsigned char* ivec, unsigned long length, 00698 const unsigned char* KS, int nr) 00699 XASM_LINK("AES_CBC_encrypt"); 00700 00701 #ifdef HAVE_AES_DECRYPT 00702 #if defined(WOLFSSL_AESNI_BY4) 00703 void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out, 00704 unsigned char* ivec, unsigned long length, 00705 const unsigned char* KS, int nr) 00706 XASM_LINK("AES_CBC_decrypt_by4"); 00707 #elif defined(WOLFSSL_AESNI_BY6) 00708 void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out, 00709 unsigned char* ivec, unsigned long length, 00710 const unsigned char* KS, int nr) 00711 XASM_LINK("AES_CBC_decrypt_by6"); 00712 #else /* WOLFSSL_AESNI_BYx */ 00713 void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out, 00714 unsigned char* ivec, unsigned long length, 00715 const unsigned char* KS, int nr) 00716 XASM_LINK("AES_CBC_decrypt_by8"); 00717 #endif /* WOLFSSL_AESNI_BYx */ 00718 #endif /* HAVE_AES_DECRYPT */ 00719 #endif /* HAVE_AES_CBC */ 00720 00721 void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, 00722 unsigned long length, const unsigned char* KS, int nr) 00723 XASM_LINK("AES_ECB_encrypt"); 00724 00725 #ifdef HAVE_AES_DECRYPT 00726 void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, 00727 unsigned long length, const unsigned char* KS, int nr) 00728 XASM_LINK("AES_ECB_decrypt"); 00729 #endif 00730 00731 void AES_128_Key_Expansion(const unsigned char* userkey, 00732 unsigned char* key_schedule) 00733 XASM_LINK("AES_128_Key_Expansion"); 00734 00735 void AES_192_Key_Expansion(const unsigned char* userkey, 00736 unsigned char* key_schedule) 00737 XASM_LINK("AES_192_Key_Expansion"); 00738 00739 void AES_256_Key_Expansion(const unsigned char* userkey, 00740 unsigned char* key_schedule) 00741 XASM_LINK("AES_256_Key_Expansion"); 00742 00743 00744 static int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 00745 Aes* aes) 00746 { 00747 int ret; 00748 00749 if (!userKey || !aes) 00750 return BAD_FUNC_ARG; 00751 00752 switch (bits) { 00753 case 128: 00754 AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10; 00755 return 0; 00756 case 192: 00757 AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12; 00758 return 0; 00759 case 256: 00760 AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14; 00761 return 0; 00762 default: 00763 ret = BAD_FUNC_ARG; 00764 } 00765 00766 return ret; 00767 } 00768 00769 #ifdef HAVE_AES_DECRYPT 00770 static int AES_set_decrypt_key(const unsigned char* userKey, 00771 const int bits, Aes* aes) 00772 { 00773 int nr; 00774 Aes temp_key; 00775 __m128i *Key_Schedule = (__m128i*)aes->key; 00776 __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key; 00777 00778 if (!userKey || !aes) 00779 return BAD_FUNC_ARG; 00780 00781 if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG) 00782 return BAD_FUNC_ARG; 00783 00784 nr = temp_key.rounds; 00785 aes->rounds = nr; 00786 00787 Key_Schedule[nr] = Temp_Key_Schedule[0]; 00788 Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]); 00789 Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]); 00790 Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]); 00791 Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]); 00792 Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]); 00793 Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]); 00794 Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]); 00795 Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]); 00796 Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]); 00797 00798 if (nr>10) { 00799 Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]); 00800 Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]); 00801 } 00802 00803 if (nr>12) { 00804 Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]); 00805 Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]); 00806 } 00807 00808 Key_Schedule[0] = Temp_Key_Schedule[nr]; 00809 00810 return 0; 00811 } 00812 #endif /* HAVE_AES_DECRYPT */ 00813 00814 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) 00815 static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) 00816 { 00817 wc_AesEncryptDirect(aes, outBlock, inBlock); 00818 return 0; 00819 } 00820 #else 00821 00822 /* using wolfCrypt software AES implementation */ 00823 #define NEED_AES_TABLES 00824 #endif 00825 00826 00827 00828 #ifdef NEED_AES_TABLES 00829 00830 static const word32 rcon[] = { 00831 0x01000000, 0x02000000, 0x04000000, 0x08000000, 00832 0x10000000, 0x20000000, 0x40000000, 0x80000000, 00833 0x1B000000, 0x36000000, 00834 /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ 00835 }; 00836 00837 static const word32 Te[4][256] = { 00838 { 00839 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 00840 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, 00841 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, 00842 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, 00843 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, 00844 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, 00845 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, 00846 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, 00847 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, 00848 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, 00849 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, 00850 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, 00851 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, 00852 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, 00853 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, 00854 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, 00855 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, 00856 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, 00857 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, 00858 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, 00859 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, 00860 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, 00861 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, 00862 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, 00863 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, 00864 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, 00865 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, 00866 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, 00867 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, 00868 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, 00869 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, 00870 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, 00871 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, 00872 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, 00873 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, 00874 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, 00875 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, 00876 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, 00877 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, 00878 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, 00879 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, 00880 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, 00881 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, 00882 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, 00883 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, 00884 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, 00885 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, 00886 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, 00887 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, 00888 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, 00889 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, 00890 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, 00891 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, 00892 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, 00893 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, 00894 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, 00895 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, 00896 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, 00897 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, 00898 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, 00899 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, 00900 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, 00901 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, 00902 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, 00903 }, 00904 { 00905 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, 00906 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, 00907 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, 00908 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, 00909 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, 00910 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, 00911 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, 00912 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, 00913 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, 00914 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, 00915 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, 00916 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, 00917 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, 00918 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, 00919 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, 00920 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, 00921 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, 00922 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, 00923 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, 00924 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, 00925 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, 00926 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, 00927 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, 00928 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, 00929 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, 00930 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, 00931 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, 00932 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, 00933 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, 00934 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, 00935 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, 00936 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, 00937 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, 00938 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, 00939 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, 00940 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, 00941 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, 00942 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, 00943 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, 00944 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, 00945 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, 00946 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, 00947 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, 00948 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, 00949 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, 00950 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, 00951 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, 00952 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, 00953 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, 00954 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, 00955 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, 00956 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, 00957 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, 00958 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, 00959 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, 00960 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, 00961 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, 00962 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, 00963 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, 00964 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, 00965 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, 00966 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, 00967 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, 00968 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, 00969 }, 00970 { 00971 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, 00972 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, 00973 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, 00974 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, 00975 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, 00976 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, 00977 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, 00978 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, 00979 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, 00980 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, 00981 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, 00982 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, 00983 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, 00984 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, 00985 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, 00986 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, 00987 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, 00988 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, 00989 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, 00990 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, 00991 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, 00992 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, 00993 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, 00994 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, 00995 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, 00996 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, 00997 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, 00998 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, 00999 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, 01000 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, 01001 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, 01002 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, 01003 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, 01004 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, 01005 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, 01006 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, 01007 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, 01008 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, 01009 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, 01010 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, 01011 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, 01012 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, 01013 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, 01014 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, 01015 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, 01016 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, 01017 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, 01018 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, 01019 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, 01020 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, 01021 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, 01022 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, 01023 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, 01024 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, 01025 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, 01026 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, 01027 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, 01028 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, 01029 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, 01030 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, 01031 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, 01032 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, 01033 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, 01034 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, 01035 }, 01036 { 01037 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, 01038 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, 01039 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, 01040 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, 01041 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, 01042 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, 01043 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, 01044 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, 01045 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, 01046 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, 01047 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, 01048 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, 01049 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, 01050 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, 01051 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, 01052 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, 01053 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, 01054 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, 01055 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, 01056 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, 01057 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, 01058 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, 01059 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, 01060 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, 01061 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, 01062 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, 01063 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, 01064 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, 01065 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, 01066 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, 01067 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, 01068 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, 01069 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, 01070 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, 01071 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, 01072 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, 01073 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, 01074 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, 01075 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, 01076 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, 01077 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, 01078 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, 01079 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, 01080 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, 01081 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, 01082 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, 01083 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, 01084 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, 01085 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, 01086 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, 01087 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, 01088 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, 01089 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, 01090 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, 01091 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, 01092 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, 01093 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, 01094 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, 01095 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, 01096 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, 01097 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, 01098 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, 01099 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 01100 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, 01101 } 01102 }; 01103 01104 #ifdef HAVE_AES_DECRYPT 01105 static const word32 Td[4][256] = { 01106 { 01107 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 01108 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, 01109 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, 01110 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, 01111 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, 01112 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, 01113 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, 01114 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, 01115 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, 01116 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, 01117 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, 01118 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, 01119 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, 01120 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, 01121 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, 01122 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, 01123 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, 01124 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, 01125 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, 01126 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, 01127 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, 01128 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, 01129 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, 01130 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, 01131 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, 01132 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, 01133 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, 01134 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, 01135 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, 01136 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, 01137 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, 01138 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, 01139 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, 01140 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, 01141 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, 01142 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, 01143 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, 01144 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, 01145 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, 01146 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, 01147 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, 01148 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, 01149 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, 01150 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, 01151 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, 01152 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, 01153 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, 01154 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, 01155 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, 01156 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, 01157 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, 01158 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, 01159 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, 01160 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, 01161 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, 01162 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, 01163 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, 01164 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, 01165 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, 01166 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, 01167 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, 01168 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, 01169 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, 01170 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, 01171 }, 01172 { 01173 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, 01174 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, 01175 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, 01176 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, 01177 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, 01178 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, 01179 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, 01180 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, 01181 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, 01182 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, 01183 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, 01184 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, 01185 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, 01186 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, 01187 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, 01188 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, 01189 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, 01190 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, 01191 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, 01192 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, 01193 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, 01194 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, 01195 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, 01196 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, 01197 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, 01198 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, 01199 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, 01200 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, 01201 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, 01202 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, 01203 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, 01204 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, 01205 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, 01206 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, 01207 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, 01208 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, 01209 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, 01210 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, 01211 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, 01212 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, 01213 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, 01214 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, 01215 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, 01216 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, 01217 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, 01218 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, 01219 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, 01220 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, 01221 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, 01222 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, 01223 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, 01224 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, 01225 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, 01226 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, 01227 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, 01228 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, 01229 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, 01230 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, 01231 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, 01232 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, 01233 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, 01234 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, 01235 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, 01236 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, 01237 }, 01238 { 01239 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, 01240 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, 01241 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, 01242 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, 01243 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, 01244 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, 01245 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, 01246 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, 01247 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, 01248 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, 01249 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, 01250 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, 01251 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, 01252 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, 01253 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, 01254 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, 01255 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, 01256 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, 01257 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, 01258 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, 01259 01260 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, 01261 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, 01262 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, 01263 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, 01264 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, 01265 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, 01266 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, 01267 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, 01268 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, 01269 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, 01270 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, 01271 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, 01272 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, 01273 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, 01274 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, 01275 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, 01276 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, 01277 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, 01278 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, 01279 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, 01280 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, 01281 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, 01282 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, 01283 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, 01284 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, 01285 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, 01286 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, 01287 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, 01288 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, 01289 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, 01290 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, 01291 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, 01292 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, 01293 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, 01294 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, 01295 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, 01296 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, 01297 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, 01298 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, 01299 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, 01300 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, 01301 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, 01302 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, 01303 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, 01304 }, 01305 { 01306 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, 01307 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, 01308 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, 01309 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, 01310 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, 01311 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, 01312 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, 01313 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, 01314 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, 01315 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, 01316 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, 01317 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, 01318 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, 01319 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, 01320 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, 01321 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, 01322 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, 01323 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, 01324 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, 01325 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, 01326 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, 01327 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, 01328 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, 01329 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, 01330 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, 01331 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, 01332 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, 01333 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, 01334 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, 01335 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, 01336 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, 01337 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, 01338 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, 01339 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, 01340 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, 01341 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, 01342 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, 01343 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, 01344 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, 01345 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, 01346 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, 01347 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, 01348 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, 01349 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, 01350 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, 01351 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, 01352 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, 01353 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, 01354 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, 01355 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, 01356 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, 01357 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, 01358 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, 01359 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, 01360 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, 01361 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, 01362 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, 01363 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, 01364 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, 01365 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, 01366 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, 01367 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, 01368 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 01369 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, 01370 } 01371 }; 01372 01373 01374 static const byte Td4[256] = 01375 { 01376 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, 01377 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, 01378 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, 01379 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, 01380 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, 01381 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, 01382 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, 01383 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, 01384 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, 01385 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, 01386 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, 01387 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, 01388 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, 01389 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, 01390 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, 01391 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, 01392 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, 01393 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, 01394 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, 01395 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, 01396 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, 01397 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, 01398 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, 01399 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, 01400 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, 01401 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, 01402 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, 01403 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, 01404 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, 01405 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, 01406 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, 01407 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU, 01408 }; 01409 #endif /* HAVE_AES_DECRYPT */ 01410 01411 #define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y)))) 01412 01413 01414 01415 #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) 01416 01417 #ifndef WC_CACHE_LINE_SZ 01418 #if defined(__x86_64__) || defined(_M_X64) || \ 01419 (defined(__ILP32__) && (__ILP32__ >= 1)) 01420 #define WC_CACHE_LINE_SZ 64 01421 #else 01422 /* default cache line size */ 01423 #define WC_CACHE_LINE_SZ 32 01424 #endif 01425 #endif 01426 01427 01428 /* load 4 Te Tables into cache by cache line stride */ 01429 static WC_INLINE word32 PreFetchTe(void) 01430 { 01431 word32 x = 0; 01432 int i,j; 01433 01434 for (i = 0; i < 4; i++) { 01435 /* 256 elements, each one is 4 bytes */ 01436 for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) { 01437 x &= Te[i][j]; 01438 } 01439 } 01440 return x; 01441 } 01442 01443 01444 static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) 01445 { 01446 word32 s0, s1, s2, s3; 01447 word32 t0, t1, t2, t3; 01448 word32 r = aes->rounds >> 1; 01449 const word32* rk = aes->key; 01450 01451 if (r > 7 || r == 0) { 01452 WOLFSSL_MSG("AesEncrypt encountered improper key, set it up"); 01453 return; /* stop instead of segfaulting, set up your keys! */ 01454 } 01455 01456 #ifdef WOLFSSL_AESNI 01457 if (haveAESNI && aes->use_aesni) { 01458 #ifdef DEBUG_AESNI 01459 printf("about to aes encrypt\n"); 01460 printf("in = %p\n", inBlock); 01461 printf("out = %p\n", outBlock); 01462 printf("aes->key = %p\n", aes->key); 01463 printf("aes->rounds = %d\n", aes->rounds); 01464 printf("sz = %d\n", AES_BLOCK_SIZE); 01465 #endif 01466 01467 /* check alignment, decrypt doesn't need alignment */ 01468 if ((wolfssl_word)inBlock % AESNI_ALIGN) { 01469 #ifndef NO_WOLFSSL_ALLOC_ALIGN 01470 byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE + AESNI_ALIGN, aes->heap, 01471 DYNAMIC_TYPE_TMP_BUFFER); 01472 byte* tmp_align; 01473 if (tmp == NULL) return; 01474 01475 tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); 01476 01477 XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE); 01478 AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, (byte*)aes->key, 01479 aes->rounds); 01480 XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE); 01481 XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 01482 return; 01483 #else 01484 WOLFSSL_MSG("AES-ECB encrypt with bad alignment"); 01485 return; 01486 #endif 01487 } 01488 01489 AES_ECB_encrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key, 01490 aes->rounds); 01491 01492 return; 01493 } 01494 else { 01495 #ifdef DEBUG_AESNI 01496 printf("Skipping AES-NI\n"); 01497 #endif 01498 } 01499 #endif 01500 01501 /* 01502 * map byte array block to cipher state 01503 * and add initial round key: 01504 */ 01505 XMEMCPY(&s0, inBlock, sizeof(s0)); 01506 XMEMCPY(&s1, inBlock + sizeof(s0), sizeof(s1)); 01507 XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2)); 01508 XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3)); 01509 01510 #ifdef LITTLE_ENDIAN_ORDER 01511 s0 = ByteReverseWord32(s0); 01512 s1 = ByteReverseWord32(s1); 01513 s2 = ByteReverseWord32(s2); 01514 s3 = ByteReverseWord32(s3); 01515 #endif 01516 01517 s0 ^= rk[0]; 01518 s1 ^= rk[1]; 01519 s2 ^= rk[2]; 01520 s3 ^= rk[3]; 01521 01522 s0 |= PreFetchTe(); 01523 01524 /* 01525 * Nr - 1 full rounds: 01526 */ 01527 01528 for (;;) { 01529 t0 = 01530 Te[0][GETBYTE(s0, 3)] ^ 01531 Te[1][GETBYTE(s1, 2)] ^ 01532 Te[2][GETBYTE(s2, 1)] ^ 01533 Te[3][GETBYTE(s3, 0)] ^ 01534 rk[4]; 01535 t1 = 01536 Te[0][GETBYTE(s1, 3)] ^ 01537 Te[1][GETBYTE(s2, 2)] ^ 01538 Te[2][GETBYTE(s3, 1)] ^ 01539 Te[3][GETBYTE(s0, 0)] ^ 01540 rk[5]; 01541 t2 = 01542 Te[0][GETBYTE(s2, 3)] ^ 01543 Te[1][GETBYTE(s3, 2)] ^ 01544 Te[2][GETBYTE(s0, 1)] ^ 01545 Te[3][GETBYTE(s1, 0)] ^ 01546 rk[6]; 01547 t3 = 01548 Te[0][GETBYTE(s3, 3)] ^ 01549 Te[1][GETBYTE(s0, 2)] ^ 01550 Te[2][GETBYTE(s1, 1)] ^ 01551 Te[3][GETBYTE(s2, 0)] ^ 01552 rk[7]; 01553 01554 rk += 8; 01555 if (--r == 0) { 01556 break; 01557 } 01558 01559 s0 = 01560 Te[0][GETBYTE(t0, 3)] ^ 01561 Te[1][GETBYTE(t1, 2)] ^ 01562 Te[2][GETBYTE(t2, 1)] ^ 01563 Te[3][GETBYTE(t3, 0)] ^ 01564 rk[0]; 01565 s1 = 01566 Te[0][GETBYTE(t1, 3)] ^ 01567 Te[1][GETBYTE(t2, 2)] ^ 01568 Te[2][GETBYTE(t3, 1)] ^ 01569 Te[3][GETBYTE(t0, 0)] ^ 01570 rk[1]; 01571 s2 = 01572 Te[0][GETBYTE(t2, 3)] ^ 01573 Te[1][GETBYTE(t3, 2)] ^ 01574 Te[2][GETBYTE(t0, 1)] ^ 01575 Te[3][GETBYTE(t1, 0)] ^ 01576 rk[2]; 01577 s3 = 01578 Te[0][GETBYTE(t3, 3)] ^ 01579 Te[1][GETBYTE(t0, 2)] ^ 01580 Te[2][GETBYTE(t1, 1)] ^ 01581 Te[3][GETBYTE(t2, 0)] ^ 01582 rk[3]; 01583 } 01584 01585 /* 01586 * apply last round and 01587 * map cipher state to byte array block: 01588 */ 01589 01590 s0 = 01591 (Te[2][GETBYTE(t0, 3)] & 0xff000000) ^ 01592 (Te[3][GETBYTE(t1, 2)] & 0x00ff0000) ^ 01593 (Te[0][GETBYTE(t2, 1)] & 0x0000ff00) ^ 01594 (Te[1][GETBYTE(t3, 0)] & 0x000000ff) ^ 01595 rk[0]; 01596 s1 = 01597 (Te[2][GETBYTE(t1, 3)] & 0xff000000) ^ 01598 (Te[3][GETBYTE(t2, 2)] & 0x00ff0000) ^ 01599 (Te[0][GETBYTE(t3, 1)] & 0x0000ff00) ^ 01600 (Te[1][GETBYTE(t0, 0)] & 0x000000ff) ^ 01601 rk[1]; 01602 s2 = 01603 (Te[2][GETBYTE(t2, 3)] & 0xff000000) ^ 01604 (Te[3][GETBYTE(t3, 2)] & 0x00ff0000) ^ 01605 (Te[0][GETBYTE(t0, 1)] & 0x0000ff00) ^ 01606 (Te[1][GETBYTE(t1, 0)] & 0x000000ff) ^ 01607 rk[2]; 01608 s3 = 01609 (Te[2][GETBYTE(t3, 3)] & 0xff000000) ^ 01610 (Te[3][GETBYTE(t0, 2)] & 0x00ff0000) ^ 01611 (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^ 01612 (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^ 01613 rk[3]; 01614 01615 /* write out */ 01616 #ifdef LITTLE_ENDIAN_ORDER 01617 s0 = ByteReverseWord32(s0); 01618 s1 = ByteReverseWord32(s1); 01619 s2 = ByteReverseWord32(s2); 01620 s3 = ByteReverseWord32(s3); 01621 #endif 01622 01623 XMEMCPY(outBlock, &s0, sizeof(s0)); 01624 XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1)); 01625 XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2)); 01626 XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); 01627 01628 } 01629 #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */ 01630 01631 #if defined(HAVE_AES_DECRYPT) 01632 #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) 01633 01634 /* load 4 Td Tables into cache by cache line stride */ 01635 static WC_INLINE word32 PreFetchTd(void) 01636 { 01637 word32 x = 0; 01638 int i,j; 01639 01640 for (i = 0; i < 4; i++) { 01641 /* 256 elements, each one is 4 bytes */ 01642 for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) { 01643 x &= Td[i][j]; 01644 } 01645 } 01646 return x; 01647 } 01648 01649 /* load Td Table4 into cache by cache line stride */ 01650 static WC_INLINE word32 PreFetchTd4(void) 01651 { 01652 word32 x = 0; 01653 int i; 01654 01655 for (i = 0; i < 256; i += WC_CACHE_LINE_SZ) { 01656 x &= (word32)Td4[i]; 01657 } 01658 return x; 01659 } 01660 01661 static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) 01662 { 01663 word32 s0, s1, s2, s3; 01664 word32 t0, t1, t2, t3; 01665 word32 r = aes->rounds >> 1; 01666 01667 const word32* rk = aes->key; 01668 if (r > 7 || r == 0) { 01669 WOLFSSL_MSG("AesDecrypt encountered improper key, set it up"); 01670 return; /* stop instead of segfaulting, set up your keys! */ 01671 } 01672 #ifdef WOLFSSL_AESNI 01673 if (haveAESNI && aes->use_aesni) { 01674 #ifdef DEBUG_AESNI 01675 printf("about to aes decrypt\n"); 01676 printf("in = %p\n", inBlock); 01677 printf("out = %p\n", outBlock); 01678 printf("aes->key = %p\n", aes->key); 01679 printf("aes->rounds = %d\n", aes->rounds); 01680 printf("sz = %d\n", AES_BLOCK_SIZE); 01681 #endif 01682 01683 /* if input and output same will overwrite input iv */ 01684 XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE); 01685 AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key, 01686 aes->rounds); 01687 return; 01688 } 01689 else { 01690 #ifdef DEBUG_AESNI 01691 printf("Skipping AES-NI\n"); 01692 #endif 01693 } 01694 #endif /* WOLFSSL_AESNI */ 01695 01696 /* 01697 * map byte array block to cipher state 01698 * and add initial round key: 01699 */ 01700 XMEMCPY(&s0, inBlock, sizeof(s0)); 01701 XMEMCPY(&s1, inBlock + sizeof(s0), sizeof(s1)); 01702 XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2)); 01703 XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3)); 01704 01705 #ifdef LITTLE_ENDIAN_ORDER 01706 s0 = ByteReverseWord32(s0); 01707 s1 = ByteReverseWord32(s1); 01708 s2 = ByteReverseWord32(s2); 01709 s3 = ByteReverseWord32(s3); 01710 #endif 01711 01712 s0 ^= rk[0]; 01713 s1 ^= rk[1]; 01714 s2 ^= rk[2]; 01715 s3 ^= rk[3]; 01716 01717 s0 |= PreFetchTd(); 01718 01719 /* 01720 * Nr - 1 full rounds: 01721 */ 01722 01723 for (;;) { 01724 t0 = 01725 Td[0][GETBYTE(s0, 3)] ^ 01726 Td[1][GETBYTE(s3, 2)] ^ 01727 Td[2][GETBYTE(s2, 1)] ^ 01728 Td[3][GETBYTE(s1, 0)] ^ 01729 rk[4]; 01730 t1 = 01731 Td[0][GETBYTE(s1, 3)] ^ 01732 Td[1][GETBYTE(s0, 2)] ^ 01733 Td[2][GETBYTE(s3, 1)] ^ 01734 Td[3][GETBYTE(s2, 0)] ^ 01735 rk[5]; 01736 t2 = 01737 Td[0][GETBYTE(s2, 3)] ^ 01738 Td[1][GETBYTE(s1, 2)] ^ 01739 Td[2][GETBYTE(s0, 1)] ^ 01740 Td[3][GETBYTE(s3, 0)] ^ 01741 rk[6]; 01742 t3 = 01743 Td[0][GETBYTE(s3, 3)] ^ 01744 Td[1][GETBYTE(s2, 2)] ^ 01745 Td[2][GETBYTE(s1, 1)] ^ 01746 Td[3][GETBYTE(s0, 0)] ^ 01747 rk[7]; 01748 01749 rk += 8; 01750 if (--r == 0) { 01751 break; 01752 } 01753 01754 s0 = 01755 Td[0][GETBYTE(t0, 3)] ^ 01756 Td[1][GETBYTE(t3, 2)] ^ 01757 Td[2][GETBYTE(t2, 1)] ^ 01758 Td[3][GETBYTE(t1, 0)] ^ 01759 rk[0]; 01760 s1 = 01761 Td[0][GETBYTE(t1, 3)] ^ 01762 Td[1][GETBYTE(t0, 2)] ^ 01763 Td[2][GETBYTE(t3, 1)] ^ 01764 Td[3][GETBYTE(t2, 0)] ^ 01765 rk[1]; 01766 s2 = 01767 Td[0][GETBYTE(t2, 3)] ^ 01768 Td[1][GETBYTE(t1, 2)] ^ 01769 Td[2][GETBYTE(t0, 1)] ^ 01770 Td[3][GETBYTE(t3, 0)] ^ 01771 rk[2]; 01772 s3 = 01773 Td[0][GETBYTE(t3, 3)] ^ 01774 Td[1][GETBYTE(t2, 2)] ^ 01775 Td[2][GETBYTE(t1, 1)] ^ 01776 Td[3][GETBYTE(t0, 0)] ^ 01777 rk[3]; 01778 } 01779 /* 01780 * apply last round and 01781 * map cipher state to byte array block: 01782 */ 01783 01784 t0 |= PreFetchTd4(); 01785 01786 s0 = 01787 ((word32)Td4[GETBYTE(t0, 3)] << 24) ^ 01788 ((word32)Td4[GETBYTE(t3, 2)] << 16) ^ 01789 ((word32)Td4[GETBYTE(t2, 1)] << 8) ^ 01790 ((word32)Td4[GETBYTE(t1, 0)]) ^ 01791 rk[0]; 01792 s1 = 01793 ((word32)Td4[GETBYTE(t1, 3)] << 24) ^ 01794 ((word32)Td4[GETBYTE(t0, 2)] << 16) ^ 01795 ((word32)Td4[GETBYTE(t3, 1)] << 8) ^ 01796 ((word32)Td4[GETBYTE(t2, 0)]) ^ 01797 rk[1]; 01798 s2 = 01799 ((word32)Td4[GETBYTE(t2, 3)] << 24) ^ 01800 ((word32)Td4[GETBYTE(t1, 2)] << 16) ^ 01801 ((word32)Td4[GETBYTE(t0, 1)] << 8) ^ 01802 ((word32)Td4[GETBYTE(t3, 0)]) ^ 01803 rk[2]; 01804 s3 = 01805 ((word32)Td4[GETBYTE(t3, 3)] << 24) ^ 01806 ((word32)Td4[GETBYTE(t2, 2)] << 16) ^ 01807 ((word32)Td4[GETBYTE(t1, 1)] << 8) ^ 01808 ((word32)Td4[GETBYTE(t0, 0)]) ^ 01809 rk[3]; 01810 01811 /* write out */ 01812 #ifdef LITTLE_ENDIAN_ORDER 01813 s0 = ByteReverseWord32(s0); 01814 s1 = ByteReverseWord32(s1); 01815 s2 = ByteReverseWord32(s2); 01816 s3 = ByteReverseWord32(s3); 01817 #endif 01818 01819 XMEMCPY(outBlock, &s0, sizeof(s0)); 01820 XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1)); 01821 XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2)); 01822 XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); 01823 } 01824 #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */ 01825 #endif /* HAVE_AES_DECRYPT */ 01826 01827 #endif /* NEED_AES_TABLES */ 01828 01829 01830 01831 /* wc_AesSetKey */ 01832 #if defined(STM32_CRYPTO) 01833 01834 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, 01835 const byte* iv, int dir) 01836 { 01837 word32 *rk = aes->key; 01838 01839 (void)dir; 01840 01841 if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) 01842 return BAD_FUNC_ARG; 01843 01844 aes->keylen = keylen; 01845 aes->rounds = keylen/4 + 6; 01846 XMEMCPY(rk, userKey, keylen); 01847 #ifndef WOLFSSL_STM32_CUBEMX 01848 ByteReverseWords(rk, rk, keylen); 01849 #endif 01850 #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) 01851 aes->left = 0; 01852 #endif 01853 01854 return wc_AesSetIV(aes, iv); 01855 } 01856 #if defined(WOLFSSL_AES_DIRECT) 01857 int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, 01858 const byte* iv, int dir) 01859 { 01860 return wc_AesSetKey(aes, userKey, keylen, iv, dir); 01861 } 01862 #endif 01863 01864 #elif defined(HAVE_COLDFIRE_SEC) 01865 #if defined (HAVE_THREADX) 01866 #include "memory_pools.h" 01867 extern TX_BYTE_POOL mp_ncached; /* Non Cached memory pool */ 01868 #endif 01869 01870 #define AES_BUFFER_SIZE (AES_BLOCK_SIZE * 64) 01871 static unsigned char *AESBuffIn = NULL; 01872 static unsigned char *AESBuffOut = NULL; 01873 static byte *secReg; 01874 static byte *secKey; 01875 static volatile SECdescriptorType *secDesc; 01876 01877 static wolfSSL_Mutex Mutex_AesSEC; 01878 01879 #define SEC_DESC_AES_CBC_ENCRYPT 0x60300010 01880 #define SEC_DESC_AES_CBC_DECRYPT 0x60200010 01881 01882 extern volatile unsigned char __MBAR[]; 01883 01884 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, 01885 const byte* iv, int dir) 01886 { 01887 if (AESBuffIn == NULL) { 01888 #if defined (HAVE_THREADX) 01889 int s1, s2, s3, s4, s5; 01890 s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc, 01891 sizeof(SECdescriptorType), TX_NO_WAIT); 01892 s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn, 01893 AES_BUFFER_SIZE, TX_NO_WAIT); 01894 s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut, 01895 AES_BUFFER_SIZE, TX_NO_WAIT); 01896 s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey, 01897 AES_BLOCK_SIZE*2, TX_NO_WAIT); 01898 s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg, 01899 AES_BLOCK_SIZE, TX_NO_WAIT); 01900 01901 if (s1 || s2 || s3 || s4 || s5) 01902 return BAD_FUNC_ARG; 01903 #else 01904 #warning "Allocate non-Cache buffers" 01905 #endif 01906 01907 wc_InitMutex(&Mutex_AesSEC); 01908 } 01909 01910 if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) 01911 return BAD_FUNC_ARG; 01912 01913 if (aes == NULL) 01914 return BAD_FUNC_ARG; 01915 01916 aes->keylen = keylen; 01917 aes->rounds = keylen/4 + 6; 01918 XMEMCPY(aes->key, userKey, keylen); 01919 01920 if (iv) 01921 XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); 01922 01923 #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) 01924 aes->left = 0; 01925 #endif 01926 01927 return 0; 01928 } 01929 #elif defined(FREESCALE_LTC) 01930 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, 01931 int dir) 01932 { 01933 if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) 01934 return BAD_FUNC_ARG; 01935 01936 aes->rounds = keylen/4 + 6; 01937 XMEMCPY(aes->key, userKey, keylen); 01938 01939 #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) 01940 aes->left = 0; 01941 #endif 01942 01943 return wc_AesSetIV(aes, iv); 01944 } 01945 01946 int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, 01947 const byte* iv, int dir) 01948 { 01949 return wc_AesSetKey(aes, userKey, keylen, iv, dir); 01950 } 01951 #elif defined(FREESCALE_MMCAU) 01952 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, 01953 const byte* iv, int dir) 01954 { 01955 int ret; 01956 byte *rk = (byte*)aes->key; 01957 01958 (void)dir; 01959 01960 if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) 01961 return BAD_FUNC_ARG; 01962 01963 if (rk == NULL) 01964 return BAD_FUNC_ARG; 01965 01966 #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) 01967 aes->left = 0; 01968 #endif 01969 01970 aes->rounds = keylen/4 + 6; 01971 01972 ret = wolfSSL_CryptHwMutexLock(); 01973 if(ret == 0) { 01974 #ifdef FREESCALE_MMCAU_CLASSIC 01975 cau_aes_set_key(userKey, keylen*8, rk); 01976 #else 01977 MMCAU_AES_SetKey(userKey, keylen, rk); 01978 #endif 01979 wolfSSL_CryptHwMutexUnLock(); 01980 01981 ret = wc_AesSetIV(aes, iv); 01982 } 01983 01984 return ret; 01985 } 01986 01987 int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, 01988 const byte* iv, int dir) 01989 { 01990 return wc_AesSetKey(aes, userKey, keylen, iv, dir); 01991 } 01992 01993 #elif defined(WOLFSSL_NRF51_AES) 01994 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, 01995 const byte* iv, int dir) 01996 { 01997 int ret; 01998 01999 (void)dir; 02000 (void)iv; 02001 02002 if (keylen != 16) 02003 return BAD_FUNC_ARG; 02004 02005 aes->keylen = keylen; 02006 aes->rounds = keylen/4 + 6; 02007 ret = nrf51_aes_set_key(userKey); 02008 02009 #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) 02010 aes->left = 0; 02011 #endif 02012 02013 return ret; 02014 } 02015 02016 int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, 02017 const byte* iv, int dir) 02018 { 02019 return wc_AesSetKey(aes, userKey, keylen, iv, dir); 02020 } 02021 02022 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) 02023 /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ 02024 02025 #else 02026 static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen, 02027 const byte* iv, int dir) 02028 { 02029 word32 *rk = aes->key; 02030 #ifdef NEED_AES_TABLES 02031 word32 temp; 02032 unsigned int i = 0; 02033 #endif 02034 02035 #ifdef WOLFSSL_AESNI 02036 aes->use_aesni = 0; 02037 #endif /* WOLFSSL_AESNI */ 02038 #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) 02039 aes->left = 0; 02040 #endif 02041 02042 aes->keylen = keylen; 02043 aes->rounds = (keylen/4) + 6; 02044 02045 XMEMCPY(rk, userKey, keylen); 02046 #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) 02047 ByteReverseWords(rk, rk, keylen); 02048 #endif 02049 02050 #ifdef NEED_AES_TABLES 02051 02052 switch (keylen) { 02053 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \ 02054 defined(WOLFSSL_AES_128) 02055 case 16: 02056 while (1) 02057 { 02058 temp = rk[3]; 02059 rk[4] = rk[0] ^ 02060 (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ 02061 (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ 02062 (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ 02063 (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ 02064 rcon[i]; 02065 rk[5] = rk[1] ^ rk[4]; 02066 rk[6] = rk[2] ^ rk[5]; 02067 rk[7] = rk[3] ^ rk[6]; 02068 if (++i == 10) 02069 break; 02070 rk += 4; 02071 } 02072 break; 02073 #endif /* 128 */ 02074 02075 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \ 02076 defined(WOLFSSL_AES_192) 02077 case 24: 02078 /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */ 02079 while (1) 02080 { 02081 temp = rk[ 5]; 02082 rk[ 6] = rk[ 0] ^ 02083 (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ 02084 (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ 02085 (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ 02086 (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ 02087 rcon[i]; 02088 rk[ 7] = rk[ 1] ^ rk[ 6]; 02089 rk[ 8] = rk[ 2] ^ rk[ 7]; 02090 rk[ 9] = rk[ 3] ^ rk[ 8]; 02091 if (++i == 8) 02092 break; 02093 rk[10] = rk[ 4] ^ rk[ 9]; 02094 rk[11] = rk[ 5] ^ rk[10]; 02095 rk += 6; 02096 } 02097 break; 02098 #endif /* 192 */ 02099 02100 #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \ 02101 defined(WOLFSSL_AES_256) 02102 case 32: 02103 while (1) 02104 { 02105 temp = rk[ 7]; 02106 rk[ 8] = rk[ 0] ^ 02107 (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ 02108 (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ 02109 (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ 02110 (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ 02111 rcon[i]; 02112 rk[ 9] = rk[ 1] ^ rk[ 8]; 02113 rk[10] = rk[ 2] ^ rk[ 9]; 02114 rk[11] = rk[ 3] ^ rk[10]; 02115 if (++i == 7) 02116 break; 02117 temp = rk[11]; 02118 rk[12] = rk[ 4] ^ 02119 (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^ 02120 (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^ 02121 (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^ 02122 (Te[1][GETBYTE(temp, 0)] & 0x000000ff); 02123 rk[13] = rk[ 5] ^ rk[12]; 02124 rk[14] = rk[ 6] ^ rk[13]; 02125 rk[15] = rk[ 7] ^ rk[14]; 02126 02127 rk += 8; 02128 } 02129 break; 02130 #endif /* 256 */ 02131 02132 default: 02133 return BAD_FUNC_ARG; 02134 } /* switch */ 02135 02136 #ifdef HAVE_AES_DECRYPT 02137 if (dir == AES_DECRYPTION) { 02138 unsigned int j; 02139 rk = aes->key; 02140 02141 /* invert the order of the round keys: */ 02142 for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) { 02143 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; 02144 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; 02145 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; 02146 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; 02147 } 02148 /* apply the inverse MixColumn transform to all round keys but the 02149 first and the last: */ 02150 for (i = 1; i < aes->rounds; i++) { 02151 rk += 4; 02152 rk[0] = 02153 Td[0][Te[1][GETBYTE(rk[0], 3)] & 0xff] ^ 02154 Td[1][Te[1][GETBYTE(rk[0], 2)] & 0xff] ^ 02155 Td[2][Te[1][GETBYTE(rk[0], 1)] & 0xff] ^ 02156 Td[3][Te[1][GETBYTE(rk[0], 0)] & 0xff]; 02157 rk[1] = 02158 Td[0][Te[1][GETBYTE(rk[1], 3)] & 0xff] ^ 02159 Td[1][Te[1][GETBYTE(rk[1], 2)] & 0xff] ^ 02160 Td[2][Te[1][GETBYTE(rk[1], 1)] & 0xff] ^ 02161 Td[3][Te[1][GETBYTE(rk[1], 0)] & 0xff]; 02162 rk[2] = 02163 Td[0][Te[1][GETBYTE(rk[2], 3)] & 0xff] ^ 02164 Td[1][Te[1][GETBYTE(rk[2], 2)] & 0xff] ^ 02165 Td[2][Te[1][GETBYTE(rk[2], 1)] & 0xff] ^ 02166 Td[3][Te[1][GETBYTE(rk[2], 0)] & 0xff]; 02167 rk[3] = 02168 Td[0][Te[1][GETBYTE(rk[3], 3)] & 0xff] ^ 02169 Td[1][Te[1][GETBYTE(rk[3], 2)] & 0xff] ^ 02170 Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^ 02171 Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff]; 02172 } 02173 } 02174 #else 02175 (void)dir; 02176 #endif /* HAVE_AES_DECRYPT */ 02177 #endif /* NEED_AES_TABLES */ 02178 02179 return wc_AesSetIV(aes, iv); 02180 } 02181 02182 int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, 02183 const byte* iv, int dir) 02184 { 02185 int ret; 02186 #if defined(AES_MAX_KEY_SIZE) 02187 const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); 02188 #endif 02189 02190 #ifdef WOLFSSL_IMX6_CAAM_BLOB 02191 byte local[32]; 02192 word32 localSz = 32; 02193 02194 if (keylen == (16 + WC_CAAM_BLOB_SZ) || 02195 keylen == (24 + WC_CAAM_BLOB_SZ) || 02196 keylen == (32 + WC_CAAM_BLOB_SZ)) { 02197 if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) != 0) { 02198 return BAD_FUNC_ARG; 02199 } 02200 02201 /* set local values */ 02202 userKey = local; 02203 keylen = localSz; 02204 } 02205 #endif 02206 if (aes == NULL || 02207 !((keylen == 16) || (keylen == 24) || (keylen == 32))) { 02208 return BAD_FUNC_ARG; 02209 } 02210 02211 #if defined(AES_MAX_KEY_SIZE) 02212 /* Check key length */ 02213 if (keylen > max_key_len) { 02214 return BAD_FUNC_ARG; 02215 } 02216 #endif 02217 aes->keylen = keylen; 02218 aes->rounds = keylen/4 + 6; 02219 02220 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) 02221 if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) { 02222 XMEMCPY(aes->asyncKey, userKey, keylen); 02223 if (iv) 02224 XMEMCPY(aes->asyncIv, iv, AES_BLOCK_SIZE); 02225 } 02226 #endif /* WOLFSSL_ASYNC_CRYPT */ 02227 02228 #ifdef WOLFSSL_AESNI 02229 if (checkAESNI == 0) { 02230 haveAESNI = Check_CPU_support_AES(); 02231 checkAESNI = 1; 02232 } 02233 if (haveAESNI) { 02234 #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) 02235 aes->left = 0; 02236 #endif /* WOLFSSL_AES_COUNTER */ 02237 aes->use_aesni = 1; 02238 if (iv) 02239 XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); 02240 if (dir == AES_ENCRYPTION) 02241 return AES_set_encrypt_key(userKey, keylen * 8, aes); 02242 #ifdef HAVE_AES_DECRYPT 02243 else 02244 return AES_set_decrypt_key(userKey, keylen * 8, aes); 02245 #endif 02246 } 02247 #endif /* WOLFSSL_AESNI */ 02248 02249 ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); 02250 02251 #ifdef WOLFSSL_IMX6_CAAM_BLOB 02252 ForceZero(local, sizeof(local)); 02253 #endif 02254 return ret; 02255 } 02256 02257 #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) 02258 /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */ 02259 int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, 02260 const byte* iv, int dir) 02261 { 02262 int ret; 02263 02264 #ifdef WOLFSSL_IMX6_CAAM_BLOB 02265 byte local[32]; 02266 word32 localSz = 32; 02267 02268 if (keylen == (16 + WC_CAAM_BLOB_SZ) || 02269 keylen == (24 + WC_CAAM_BLOB_SZ) || 02270 keylen == (32 + WC_CAAM_BLOB_SZ)) { 02271 if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) 02272 != 0) { 02273 return BAD_FUNC_ARG; 02274 } 02275 02276 /* set local values */ 02277 userKey = local; 02278 keylen = localSz; 02279 } 02280 #endif 02281 ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); 02282 02283 #ifdef WOLFSSL_IMX6_CAAM_BLOB 02284 ForceZero(local, sizeof(local)); 02285 #endif 02286 02287 return ret; 02288 } 02289 #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ 02290 #endif /* wc_AesSetKey block */ 02291 02292 02293 /* wc_AesSetIV is shared between software and hardware */ 02294 int wc_AesSetIV(Aes* aes, const byte* iv) 02295 { 02296 if (aes == NULL) 02297 return BAD_FUNC_ARG; 02298 02299 if (iv) 02300 XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); 02301 else 02302 XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); 02303 02304 return 0; 02305 } 02306 02307 /* AES-DIRECT */ 02308 #if defined(WOLFSSL_AES_DIRECT) 02309 #if defined(HAVE_COLDFIRE_SEC) 02310 #error "Coldfire SEC doesn't yet support AES direct" 02311 02312 #elif defined(FREESCALE_LTC) 02313 /* Allow direct access to one block encrypt */ 02314 void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) 02315 { 02316 byte *key; 02317 uint32_t keySize; 02318 02319 key = (byte*)aes->key; 02320 wc_AesGetKeySize(aes, &keySize); 02321 02322 LTC_AES_EncryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE, 02323 key, keySize); 02324 } 02325 02326 /* Allow direct access to one block decrypt */ 02327 void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) 02328 { 02329 byte *key; 02330 uint32_t keySize; 02331 02332 key = (byte*)aes->key; 02333 wc_AesGetKeySize(aes, &keySize); 02334 02335 LTC_AES_DecryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE, 02336 key, keySize, kLTC_EncryptKey); 02337 } 02338 02339 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) 02340 /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ 02341 02342 #else 02343 /* Allow direct access to one block encrypt */ 02344 void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) 02345 { 02346 wc_AesEncrypt(aes, in, out); 02347 } 02348 #ifdef HAVE_AES_DECRYPT 02349 /* Allow direct access to one block decrypt */ 02350 void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) 02351 { 02352 wc_AesDecrypt(aes, in, out); 02353 } 02354 #endif /* HAVE_AES_DECRYPT */ 02355 #endif /* AES direct block */ 02356 #endif /* WOLFSSL_AES_DIRECT */ 02357 02358 02359 /* AES-CBC */ 02360 #ifdef HAVE_AES_CBC 02361 #if defined(STM32_CRYPTO) 02362 02363 #ifdef WOLFSSL_STM32_CUBEMX 02364 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02365 { 02366 int ret = 0; 02367 word32 blocks = (sz / AES_BLOCK_SIZE); 02368 CRYP_HandleTypeDef hcryp; 02369 02370 XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); 02371 switch (aes->rounds) { 02372 case 10: /* 128-bit key */ 02373 hcryp.Init.KeySize = CRYP_KEYSIZE_128B; 02374 break; 02375 #ifdef CRYP_KEYSIZE_192B 02376 case 12: /* 192-bit key */ 02377 hcryp.Init.KeySize = CRYP_KEYSIZE_192B; 02378 break; 02379 #endif 02380 case 14: /* 256-bit key */ 02381 hcryp.Init.KeySize = CRYP_KEYSIZE_256B; 02382 break; 02383 default: 02384 break; 02385 } 02386 hcryp.Instance = CRYP; 02387 hcryp.Init.DataType = CRYP_DATATYPE_8B; 02388 hcryp.Init.pKey = (uint8_t*)aes->key; 02389 hcryp.Init.pInitVect = (uint8_t*)aes->reg; 02390 02391 HAL_CRYP_Init(&hcryp); 02392 02393 while (blocks--) { 02394 if (HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, 02395 out, STM32_HAL_TIMEOUT) != HAL_OK) { 02396 ret = WC_TIMEOUT_E; 02397 break; 02398 } 02399 02400 /* store iv for next call */ 02401 XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 02402 02403 sz -= AES_BLOCK_SIZE; 02404 in += AES_BLOCK_SIZE; 02405 out += AES_BLOCK_SIZE; 02406 } 02407 02408 HAL_CRYP_DeInit(&hcryp); 02409 02410 return ret; 02411 } 02412 #ifdef HAVE_AES_DECRYPT 02413 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02414 { 02415 int ret = 0; 02416 word32 blocks = (sz / AES_BLOCK_SIZE); 02417 CRYP_HandleTypeDef hcryp; 02418 02419 XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); 02420 switch (aes->rounds) { 02421 case 10: /* 128-bit key */ 02422 hcryp.Init.KeySize = CRYP_KEYSIZE_128B; 02423 break; 02424 #ifdef CRYP_KEYSIZE_192B 02425 case 12: /* 192-bit key */ 02426 hcryp.Init.KeySize = CRYP_KEYSIZE_192B; 02427 break; 02428 #endif 02429 case 14: /* 256-bit key */ 02430 hcryp.Init.KeySize = CRYP_KEYSIZE_256B; 02431 break; 02432 default: 02433 break; 02434 } 02435 hcryp.Instance = CRYP; 02436 hcryp.Init.DataType = CRYP_DATATYPE_8B; 02437 hcryp.Init.pKey = (uint8_t*)aes->key; 02438 hcryp.Init.pInitVect = (uint8_t*)aes->reg; 02439 02440 HAL_CRYP_Init(&hcryp); 02441 02442 while (blocks--) { 02443 if (HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, 02444 out, STM32_HAL_TIMEOUT) != HAL_OK) { 02445 ret = WC_TIMEOUT_E; 02446 } 02447 02448 /* store iv for next call */ 02449 XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); 02450 02451 in += AES_BLOCK_SIZE; 02452 out += AES_BLOCK_SIZE; 02453 } 02454 02455 HAL_CRYP_DeInit(&hcryp); 02456 02457 return ret; 02458 } 02459 #endif /* HAVE_AES_DECRYPT */ 02460 #else 02461 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02462 { 02463 word32 *enc_key, *iv; 02464 word32 blocks = (sz / AES_BLOCK_SIZE); 02465 CRYP_InitTypeDef AES_CRYP_InitStructure; 02466 CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; 02467 CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; 02468 02469 enc_key = aes->key; 02470 iv = aes->reg; 02471 02472 /* crypto structure initialization */ 02473 CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); 02474 CRYP_StructInit(&AES_CRYP_InitStructure); 02475 CRYP_IVStructInit(&AES_CRYP_IVInitStructure); 02476 02477 /* reset registers to their default values */ 02478 CRYP_DeInit(); 02479 02480 /* load key into correct registers */ 02481 switch (aes->rounds) { 02482 case 10: /* 128-bit key */ 02483 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; 02484 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; 02485 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; 02486 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; 02487 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; 02488 break; 02489 02490 case 12: /* 192-bit key */ 02491 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; 02492 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; 02493 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; 02494 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; 02495 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; 02496 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; 02497 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; 02498 break; 02499 02500 case 14: /* 256-bit key */ 02501 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; 02502 AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; 02503 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; 02504 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; 02505 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; 02506 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; 02507 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; 02508 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; 02509 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; 02510 break; 02511 02512 default: 02513 break; 02514 } 02515 CRYP_KeyInit(&AES_CRYP_KeyInitStructure); 02516 02517 /* set iv */ 02518 ByteReverseWords(iv, iv, AES_BLOCK_SIZE); 02519 AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; 02520 AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; 02521 AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2]; 02522 AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3]; 02523 CRYP_IVInit(&AES_CRYP_IVInitStructure); 02524 02525 /* set direction, mode, and datatype */ 02526 AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; 02527 AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; 02528 AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; 02529 CRYP_Init(&AES_CRYP_InitStructure); 02530 02531 /* enable crypto processor */ 02532 CRYP_Cmd(ENABLE); 02533 02534 while (blocks--) { 02535 /* flush IN/OUT FIFOs */ 02536 CRYP_FIFOFlush(); 02537 02538 CRYP_DataIn(*(uint32_t*)&in[0]); 02539 CRYP_DataIn(*(uint32_t*)&in[4]); 02540 CRYP_DataIn(*(uint32_t*)&in[8]); 02541 CRYP_DataIn(*(uint32_t*)&in[12]); 02542 02543 /* wait until the complete message has been processed */ 02544 while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} 02545 02546 *(uint32_t*)&out[0] = CRYP_DataOut(); 02547 *(uint32_t*)&out[4] = CRYP_DataOut(); 02548 *(uint32_t*)&out[8] = CRYP_DataOut(); 02549 *(uint32_t*)&out[12] = CRYP_DataOut(); 02550 02551 /* store iv for next call */ 02552 XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 02553 02554 sz -= AES_BLOCK_SIZE; 02555 in += AES_BLOCK_SIZE; 02556 out += AES_BLOCK_SIZE; 02557 } 02558 02559 /* disable crypto processor */ 02560 CRYP_Cmd(DISABLE); 02561 02562 return 0; 02563 } 02564 02565 #ifdef HAVE_AES_DECRYPT 02566 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02567 { 02568 word32 *dec_key, *iv; 02569 word32 blocks = (sz / AES_BLOCK_SIZE); 02570 CRYP_InitTypeDef AES_CRYP_InitStructure; 02571 CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; 02572 CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; 02573 02574 dec_key = aes->key; 02575 iv = aes->reg; 02576 02577 /* crypto structure initialization */ 02578 CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); 02579 CRYP_StructInit(&AES_CRYP_InitStructure); 02580 CRYP_IVStructInit(&AES_CRYP_IVInitStructure); 02581 02582 /* if input and output same will overwrite input iv */ 02583 XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 02584 02585 /* reset registers to their default values */ 02586 CRYP_DeInit(); 02587 02588 /* load key into correct registers */ 02589 switch (aes->rounds) { 02590 case 10: /* 128-bit key */ 02591 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; 02592 AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[0]; 02593 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[1]; 02594 AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[2]; 02595 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[3]; 02596 break; 02597 02598 case 12: /* 192-bit key */ 02599 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; 02600 AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[0]; 02601 AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[1]; 02602 AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[2]; 02603 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[3]; 02604 AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[4]; 02605 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[5]; 02606 break; 02607 02608 case 14: /* 256-bit key */ 02609 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; 02610 AES_CRYP_KeyInitStructure.CRYP_Key0Left = dec_key[0]; 02611 AES_CRYP_KeyInitStructure.CRYP_Key0Right = dec_key[1]; 02612 AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[2]; 02613 AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[3]; 02614 AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[4]; 02615 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[5]; 02616 AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[6]; 02617 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[7]; 02618 break; 02619 02620 default: 02621 break; 02622 } 02623 02624 /* set direction, mode, and datatype for key preparation */ 02625 AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; 02626 AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; 02627 AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_32b; 02628 CRYP_Init(&AES_CRYP_InitStructure); 02629 CRYP_KeyInit(&AES_CRYP_KeyInitStructure); 02630 02631 /* enable crypto processor */ 02632 CRYP_Cmd(ENABLE); 02633 02634 /* wait until key has been prepared */ 02635 while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} 02636 02637 /* set direction, mode, and datatype for decryption */ 02638 AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; 02639 AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; 02640 AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; 02641 CRYP_Init(&AES_CRYP_InitStructure); 02642 02643 /* set iv */ 02644 ByteReverseWords(iv, iv, AES_BLOCK_SIZE); 02645 02646 AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; 02647 AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; 02648 AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2]; 02649 AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3]; 02650 CRYP_IVInit(&AES_CRYP_IVInitStructure); 02651 02652 /* enable crypto processor */ 02653 CRYP_Cmd(ENABLE); 02654 02655 while (blocks--) { 02656 /* flush IN/OUT FIFOs */ 02657 CRYP_FIFOFlush(); 02658 02659 CRYP_DataIn(*(uint32_t*)&in[0]); 02660 CRYP_DataIn(*(uint32_t*)&in[4]); 02661 CRYP_DataIn(*(uint32_t*)&in[8]); 02662 CRYP_DataIn(*(uint32_t*)&in[12]); 02663 02664 /* wait until the complete message has been processed */ 02665 while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} 02666 02667 *(uint32_t*)&out[0] = CRYP_DataOut(); 02668 *(uint32_t*)&out[4] = CRYP_DataOut(); 02669 *(uint32_t*)&out[8] = CRYP_DataOut(); 02670 *(uint32_t*)&out[12] = CRYP_DataOut(); 02671 02672 /* store iv for next call */ 02673 XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); 02674 02675 in += AES_BLOCK_SIZE; 02676 out += AES_BLOCK_SIZE; 02677 } 02678 02679 /* disable crypto processor */ 02680 CRYP_Cmd(DISABLE); 02681 02682 return 0; 02683 } 02684 #endif /* HAVE_AES_DECRYPT */ 02685 #endif /* WOLFSSL_STM32_CUBEMX */ 02686 02687 #elif defined(HAVE_COLDFIRE_SEC) 02688 static int wc_AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz, 02689 word32 descHeader) 02690 { 02691 #ifdef DEBUG_WOLFSSL 02692 int i; int stat1, stat2; int ret; 02693 #endif 02694 02695 int size; 02696 volatile int v; 02697 02698 if ((pi == NULL) || (po == NULL)) 02699 return BAD_FUNC_ARG; /*wrong pointer*/ 02700 02701 wc_LockMutex(&Mutex_AesSEC); 02702 02703 /* Set descriptor for SEC */ 02704 secDesc->length1 = 0x0; 02705 secDesc->pointer1 = NULL; 02706 02707 secDesc->length2 = AES_BLOCK_SIZE; 02708 secDesc->pointer2 = (byte *)secReg; /* Initial Vector */ 02709 02710 switch(aes->rounds) { 02711 case 10: secDesc->length3 = 16; break; 02712 case 12: secDesc->length3 = 24; break; 02713 case 14: secDesc->length3 = 32; break; 02714 } 02715 XMEMCPY(secKey, aes->key, secDesc->length3); 02716 02717 secDesc->pointer3 = (byte *)secKey; 02718 secDesc->pointer4 = AESBuffIn; 02719 secDesc->pointer5 = AESBuffOut; 02720 secDesc->length6 = 0x0; 02721 secDesc->pointer6 = NULL; 02722 secDesc->length7 = 0x0; 02723 secDesc->pointer7 = NULL; 02724 secDesc->nextDescriptorPtr = NULL; 02725 02726 while (sz) { 02727 secDesc->header = descHeader; 02728 XMEMCPY(secReg, aes->reg, AES_BLOCK_SIZE); 02729 if ((sz % AES_BUFFER_SIZE) == sz) { 02730 size = sz; 02731 sz = 0; 02732 } else { 02733 size = AES_BUFFER_SIZE; 02734 sz -= AES_BUFFER_SIZE; 02735 } 02736 secDesc->length4 = size; 02737 secDesc->length5 = size; 02738 02739 XMEMCPY(AESBuffIn, pi, size); 02740 if(descHeader == SEC_DESC_AES_CBC_DECRYPT) { 02741 XMEMCPY((void*)aes->tmp, (void*)&(pi[size-AES_BLOCK_SIZE]), 02742 AES_BLOCK_SIZE); 02743 } 02744 02745 /* Point SEC to the location of the descriptor */ 02746 MCF_SEC_FR0 = (uint32)secDesc; 02747 /* Initialize SEC and wait for encryption to complete */ 02748 MCF_SEC_CCCR0 = 0x0000001a; 02749 /* poll SISR to determine when channel is complete */ 02750 v=0; 02751 02752 while ((secDesc->header>> 24) != 0xff) v++; 02753 02754 #ifdef DEBUG_WOLFSSL 02755 ret = MCF_SEC_SISRH; 02756 stat1 = MCF_SEC_AESSR; 02757 stat2 = MCF_SEC_AESISR; 02758 if (ret & 0xe0000000) { 02759 db_printf("Aes_Cbc(i=%d):ISRH=%08x, AESSR=%08x, " 02760 "AESISR=%08x\n", i, ret, stat1, stat2); 02761 } 02762 #endif 02763 02764 XMEMCPY(po, AESBuffOut, size); 02765 02766 if (descHeader == SEC_DESC_AES_CBC_ENCRYPT) { 02767 XMEMCPY((void*)aes->reg, (void*)&(po[size-AES_BLOCK_SIZE]), 02768 AES_BLOCK_SIZE); 02769 } else { 02770 XMEMCPY((void*)aes->reg, (void*)aes->tmp, AES_BLOCK_SIZE); 02771 } 02772 02773 pi += size; 02774 po += size; 02775 } 02776 02777 wc_UnLockMutex(&Mutex_AesSEC); 02778 return 0; 02779 } 02780 02781 int wc_AesCbcEncrypt(Aes* aes, byte* po, const byte* pi, word32 sz) 02782 { 02783 return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_ENCRYPT)); 02784 } 02785 02786 #ifdef HAVE_AES_DECRYPT 02787 int wc_AesCbcDecrypt(Aes* aes, byte* po, const byte* pi, word32 sz) 02788 { 02789 return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_DECRYPT)); 02790 } 02791 #endif /* HAVE_AES_DECRYPT */ 02792 02793 #elif defined(FREESCALE_LTC) 02794 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02795 { 02796 uint32_t keySize; 02797 status_t status; 02798 byte *iv, *enc_key; 02799 word32 blocks = (sz / AES_BLOCK_SIZE); 02800 02801 iv = (byte*)aes->reg; 02802 enc_key = (byte*)aes->key; 02803 02804 status = wc_AesGetKeySize(aes, &keySize); 02805 if (status != 0) { 02806 return status; 02807 } 02808 02809 status = LTC_AES_EncryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, 02810 iv, enc_key, keySize); 02811 return (status == kStatus_Success) ? 0 : -1; 02812 } 02813 02814 #ifdef HAVE_AES_DECRYPT 02815 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02816 { 02817 uint32_t keySize; 02818 status_t status; 02819 byte* iv, *dec_key; 02820 word32 blocks = (sz / AES_BLOCK_SIZE); 02821 02822 iv = (byte*)aes->reg; 02823 dec_key = (byte*)aes->key; 02824 02825 status = wc_AesGetKeySize(aes, &keySize); 02826 if (status != 0) { 02827 return status; 02828 } 02829 02830 status = LTC_AES_DecryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, 02831 iv, dec_key, keySize, kLTC_EncryptKey); 02832 return (status == kStatus_Success) ? 0 : -1; 02833 } 02834 #endif /* HAVE_AES_DECRYPT */ 02835 02836 #elif defined(FREESCALE_MMCAU) 02837 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02838 { 02839 int i; 02840 int offset = 0; 02841 word32 blocks = (sz / AES_BLOCK_SIZE); 02842 byte *iv; 02843 byte temp_block[AES_BLOCK_SIZE]; 02844 02845 iv = (byte*)aes->reg; 02846 02847 while (blocks--) { 02848 XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); 02849 02850 /* XOR block with IV for CBC */ 02851 for (i = 0; i < AES_BLOCK_SIZE; i++) 02852 temp_block[i] ^= iv[i]; 02853 02854 wc_AesEncrypt(aes, temp_block, out + offset); 02855 02856 offset += AES_BLOCK_SIZE; 02857 02858 /* store IV for next block */ 02859 XMEMCPY(iv, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 02860 } 02861 02862 return 0; 02863 } 02864 #ifdef HAVE_AES_DECRYPT 02865 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02866 { 02867 int i; 02868 int offset = 0; 02869 word32 blocks = (sz / AES_BLOCK_SIZE); 02870 byte* iv; 02871 byte temp_block[AES_BLOCK_SIZE]; 02872 02873 iv = (byte*)aes->reg; 02874 02875 while (blocks--) { 02876 XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); 02877 02878 wc_AesDecrypt(aes, in + offset, out + offset); 02879 02880 /* XOR block with IV for CBC */ 02881 for (i = 0; i < AES_BLOCK_SIZE; i++) 02882 (out + offset)[i] ^= iv[i]; 02883 02884 /* store IV for next block */ 02885 XMEMCPY(iv, temp_block, AES_BLOCK_SIZE); 02886 02887 offset += AES_BLOCK_SIZE; 02888 } 02889 02890 return 0; 02891 } 02892 #endif /* HAVE_AES_DECRYPT */ 02893 02894 #elif defined(WOLFSSL_PIC32MZ_CRYPT) 02895 02896 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02897 { 02898 int ret; 02899 02900 /* hardware fails on input that is not a multiple of AES block size */ 02901 if (sz % AES_BLOCK_SIZE != 0) { 02902 return BAD_FUNC_ARG; 02903 } 02904 02905 ret = wc_Pic32AesCrypt( 02906 aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, 02907 out, in, sz, PIC32_ENCRYPTION, 02908 PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC); 02909 02910 /* store iv for next call */ 02911 if (ret == 0) { 02912 XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 02913 } 02914 02915 return ret; 02916 } 02917 #ifdef HAVE_AES_DECRYPT 02918 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02919 { 02920 int ret; 02921 byte scratch[AES_BLOCK_SIZE]; 02922 02923 /* hardware fails on input that is not a multiple of AES block size */ 02924 if (sz % AES_BLOCK_SIZE != 0) { 02925 return BAD_FUNC_ARG; 02926 } 02927 XMEMCPY(scratch, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 02928 02929 ret = wc_Pic32AesCrypt( 02930 aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, 02931 out, in, sz, PIC32_DECRYPTION, 02932 PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC); 02933 02934 /* store iv for next call */ 02935 if (ret == 0) { 02936 XMEMCPY((byte*)aes->reg, scratch, AES_BLOCK_SIZE); 02937 } 02938 02939 return ret; 02940 } 02941 #endif /* HAVE_AES_DECRYPT */ 02942 02943 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) 02944 /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ 02945 02946 #else 02947 02948 int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 02949 { 02950 word32 blocks = (sz / AES_BLOCK_SIZE); 02951 02952 if (aes == NULL || out == NULL || in == NULL) { 02953 return BAD_FUNC_ARG; 02954 } 02955 02956 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) 02957 /* if async and byte count above threshold */ 02958 if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && 02959 sz >= WC_ASYNC_THRESH_AES_CBC) { 02960 #if defined(HAVE_CAVIUM) 02961 return NitroxAesCbcEncrypt(aes, out, in, sz); 02962 #elif defined(HAVE_INTEL_QA) 02963 return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz, 02964 (const byte*)aes->asyncKey, aes->keylen, 02965 (const byte*)aes->asyncIv, AES_BLOCK_SIZE); 02966 #else /* WOLFSSL_ASYNC_CRYPT_TEST */ 02967 if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_ENCRYPT)) { 02968 WC_ASYNC_TEST* testDev = &aes->asyncDev.test; 02969 testDev->aes.aes = aes; 02970 testDev->aes.out = out; 02971 testDev->aes.in = in; 02972 testDev->aes.sz = sz; 02973 return WC_PENDING_E; 02974 } 02975 #endif 02976 } 02977 #endif /* WOLFSSL_ASYNC_CRYPT */ 02978 02979 #ifdef WOLFSSL_AESNI 02980 if (haveAESNI) { 02981 #ifdef DEBUG_AESNI 02982 printf("about to aes cbc encrypt\n"); 02983 printf("in = %p\n", in); 02984 printf("out = %p\n", out); 02985 printf("aes->key = %p\n", aes->key); 02986 printf("aes->reg = %p\n", aes->reg); 02987 printf("aes->rounds = %d\n", aes->rounds); 02988 printf("sz = %d\n", sz); 02989 #endif 02990 02991 /* check alignment, decrypt doesn't need alignment */ 02992 if ((wolfssl_word)in % AESNI_ALIGN) { 02993 #ifndef NO_WOLFSSL_ALLOC_ALIGN 02994 byte* tmp = (byte*)XMALLOC(sz + AES_BLOCK_SIZE + AESNI_ALIGN, 02995 aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 02996 byte* tmp_align; 02997 if (tmp == NULL) return MEMORY_E; 02998 02999 tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); 03000 XMEMCPY(tmp_align, in, sz); 03001 AES_CBC_encrypt(tmp_align, tmp_align, (byte*)aes->reg, sz, 03002 (byte*)aes->key, aes->rounds); 03003 /* store iv for next call */ 03004 XMEMCPY(aes->reg, tmp_align + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 03005 03006 XMEMCPY(out, tmp_align, sz); 03007 XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 03008 return 0; 03009 #else 03010 WOLFSSL_MSG("AES-CBC encrypt with bad alignment"); 03011 return BAD_ALIGN_E; 03012 #endif 03013 } 03014 03015 AES_CBC_encrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key, 03016 aes->rounds); 03017 /* store iv for next call */ 03018 XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 03019 03020 return 0; 03021 } 03022 #endif 03023 03024 while (blocks--) { 03025 xorbuf((byte*)aes->reg, in, AES_BLOCK_SIZE); 03026 wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->reg); 03027 XMEMCPY(out, aes->reg, AES_BLOCK_SIZE); 03028 03029 out += AES_BLOCK_SIZE; 03030 in += AES_BLOCK_SIZE; 03031 } 03032 03033 return 0; 03034 } 03035 03036 #ifdef HAVE_AES_DECRYPT 03037 int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 03038 { 03039 word32 blocks; 03040 03041 if (aes == NULL || out == NULL || in == NULL 03042 || sz % AES_BLOCK_SIZE != 0) { 03043 return BAD_FUNC_ARG; 03044 } 03045 03046 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) 03047 /* if async and byte count above threshold */ 03048 if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && 03049 sz >= WC_ASYNC_THRESH_AES_CBC) { 03050 #if defined(HAVE_CAVIUM) 03051 return NitroxAesCbcDecrypt(aes, out, in, sz); 03052 #elif defined(HAVE_INTEL_QA) 03053 return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz, 03054 (const byte*)aes->asyncKey, aes->keylen, 03055 (const byte*)aes->asyncIv, AES_BLOCK_SIZE); 03056 #else /* WOLFSSL_ASYNC_CRYPT_TEST */ 03057 if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_DECRYPT)) { 03058 WC_ASYNC_TEST* testDev = &aes->asyncDev.test; 03059 testDev->aes.aes = aes; 03060 testDev->aes.out = out; 03061 testDev->aes.in = in; 03062 testDev->aes.sz = sz; 03063 return WC_PENDING_E; 03064 } 03065 #endif 03066 } 03067 #endif 03068 03069 #ifdef WOLFSSL_AESNI 03070 if (haveAESNI) { 03071 #ifdef DEBUG_AESNI 03072 printf("about to aes cbc decrypt\n"); 03073 printf("in = %p\n", in); 03074 printf("out = %p\n", out); 03075 printf("aes->key = %p\n", aes->key); 03076 printf("aes->reg = %p\n", aes->reg); 03077 printf("aes->rounds = %d\n", aes->rounds); 03078 printf("sz = %d\n", sz); 03079 #endif 03080 03081 /* if input and output same will overwrite input iv */ 03082 XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 03083 #if defined(WOLFSSL_AESNI_BY4) 03084 AES_CBC_decrypt_by4(in, out, (byte*)aes->reg, sz, (byte*)aes->key, 03085 aes->rounds); 03086 #elif defined(WOLFSSL_AESNI_BY6) 03087 AES_CBC_decrypt_by6(in, out, (byte*)aes->reg, sz, (byte*)aes->key, 03088 aes->rounds); 03089 #else /* WOLFSSL_AESNI_BYx */ 03090 AES_CBC_decrypt_by8(in, out, (byte*)aes->reg, sz, (byte*)aes->key, 03091 aes->rounds); 03092 #endif /* WOLFSSL_AESNI_BYx */ 03093 /* store iv for next call */ 03094 XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); 03095 return 0; 03096 } 03097 #endif 03098 03099 blocks = sz / AES_BLOCK_SIZE; 03100 while (blocks--) { 03101 XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE); 03102 wc_AesDecrypt(aes, (byte*)aes->tmp, out); 03103 xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE); 03104 XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); 03105 03106 out += AES_BLOCK_SIZE; 03107 in += AES_BLOCK_SIZE; 03108 } 03109 03110 return 0; 03111 } 03112 #endif 03113 03114 #endif /* AES-CBC block */ 03115 #endif /* HAVE_AES_CBC */ 03116 03117 /* AES-CTR */ 03118 #if defined(WOLFSSL_AES_COUNTER) 03119 03120 #ifdef STM32_CRYPTO 03121 #define NEED_AES_CTR_SOFT 03122 #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock 03123 03124 int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) 03125 { 03126 int ret = 0; 03127 #ifdef WOLFSSL_STM32_CUBEMX 03128 CRYP_HandleTypeDef hcryp; 03129 03130 XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); 03131 switch (aes->rounds) { 03132 case 10: /* 128-bit key */ 03133 hcryp.Init.KeySize = CRYP_KEYSIZE_128B; 03134 break; 03135 #ifdef CRYP_KEYSIZE_192B 03136 case 12: /* 192-bit key */ 03137 hcryp.Init.KeySize = CRYP_KEYSIZE_192B; 03138 break; 03139 #endif 03140 case 14: /* 256-bit key */ 03141 hcryp.Init.KeySize = CRYP_KEYSIZE_256B; 03142 break; 03143 default: 03144 break; 03145 } 03146 hcryp.Instance = CRYP; 03147 hcryp.Init.DataType = CRYP_DATATYPE_8B; 03148 hcryp.Init.pKey = (byte*)aes->key; 03149 hcryp.Init.pInitVect = (byte*)aes->reg; 03150 03151 HAL_CRYP_Init(&hcryp); 03152 03153 if (HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE, out, 03154 STM32_HAL_TIMEOUT) != HAL_OK) { 03155 /* failed */ 03156 ret = WC_TIMEOUT_E; 03157 } 03158 03159 HAL_CRYP_DeInit(&hcryp); 03160 03161 #else /* STD_PERI_LIB */ 03162 word32 *enc_key, *iv; 03163 CRYP_InitTypeDef AES_CRYP_InitStructure; 03164 CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; 03165 CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; 03166 03167 enc_key = aes->key; 03168 iv = aes->reg; 03169 03170 /* crypto structure initialization */ 03171 CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); 03172 CRYP_StructInit(&AES_CRYP_InitStructure); 03173 CRYP_IVStructInit(&AES_CRYP_IVInitStructure); 03174 03175 /* reset registers to their default values */ 03176 CRYP_DeInit(); 03177 03178 /* load key into correct registers */ 03179 switch (aes->rounds) { 03180 case 10: /* 128-bit key */ 03181 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; 03182 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; 03183 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; 03184 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; 03185 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; 03186 break; 03187 case 12: /* 192-bit key */ 03188 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; 03189 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; 03190 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; 03191 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; 03192 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; 03193 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; 03194 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; 03195 break; 03196 case 14: /* 256-bit key */ 03197 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; 03198 AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; 03199 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; 03200 AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; 03201 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; 03202 AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; 03203 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; 03204 AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; 03205 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; 03206 break; 03207 default: 03208 break; 03209 } 03210 CRYP_KeyInit(&AES_CRYP_KeyInitStructure); 03211 03212 /* set iv */ 03213 AES_CRYP_IVInitStructure.CRYP_IV0Left = ByteReverseWord32(iv[0]); 03214 AES_CRYP_IVInitStructure.CRYP_IV0Right = ByteReverseWord32(iv[1]); 03215 AES_CRYP_IVInitStructure.CRYP_IV1Left = ByteReverseWord32(iv[2]); 03216 AES_CRYP_IVInitStructure.CRYP_IV1Right = ByteReverseWord32(iv[3]); 03217 CRYP_IVInit(&AES_CRYP_IVInitStructure); 03218 03219 /* set direction, mode, and datatype */ 03220 AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; 03221 AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR; 03222 AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; 03223 CRYP_Init(&AES_CRYP_InitStructure); 03224 03225 /* enable crypto processor */ 03226 CRYP_Cmd(ENABLE); 03227 03228 /* flush IN/OUT FIFOs */ 03229 CRYP_FIFOFlush(); 03230 03231 CRYP_DataIn(*(uint32_t*)&in[0]); 03232 CRYP_DataIn(*(uint32_t*)&in[4]); 03233 CRYP_DataIn(*(uint32_t*)&in[8]); 03234 CRYP_DataIn(*(uint32_t*)&in[12]); 03235 03236 /* wait until the complete message has been processed */ 03237 while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} 03238 03239 *(uint32_t*)&out[0] = CRYP_DataOut(); 03240 *(uint32_t*)&out[4] = CRYP_DataOut(); 03241 *(uint32_t*)&out[8] = CRYP_DataOut(); 03242 *(uint32_t*)&out[12] = CRYP_DataOut(); 03243 03244 /* disable crypto processor */ 03245 CRYP_Cmd(DISABLE); 03246 03247 #endif /* WOLFSSL_STM32_CUBEMX */ 03248 return ret; 03249 } 03250 03251 03252 #elif defined(WOLFSSL_PIC32MZ_CRYPT) 03253 03254 #define NEED_AES_CTR_SOFT 03255 #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock 03256 03257 int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) 03258 { 03259 word32 tmpIv[AES_BLOCK_SIZE / sizeof(word32)]; 03260 XMEMCPY(tmpIv, aes->reg, AES_BLOCK_SIZE); 03261 return wc_Pic32AesCrypt( 03262 aes->key, aes->keylen, tmpIv, AES_BLOCK_SIZE, 03263 out, in, AES_BLOCK_SIZE, 03264 PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR); 03265 } 03266 03267 #elif defined(HAVE_COLDFIRE_SEC) 03268 #error "Coldfire SEC doesn't currently support AES-CTR mode" 03269 03270 #elif defined(FREESCALE_LTC) 03271 int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 03272 { 03273 uint32_t keySize; 03274 byte *iv, *enc_key; 03275 byte* tmp; 03276 03277 if (aes == NULL || out == NULL || in == NULL) { 03278 return BAD_FUNC_ARG; 03279 } 03280 03281 /* consume any unused bytes left in aes->tmp */ 03282 tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; 03283 while (aes->left && sz) { 03284 *(out++) = *(in++) ^ *(tmp++); 03285 aes->left--; 03286 sz--; 03287 } 03288 03289 if (sz) { 03290 iv = (byte*)aes->reg; 03291 enc_key = (byte*)aes->key; 03292 03293 wc_AesGetKeySize(aes, &keySize); 03294 03295 LTC_AES_CryptCtr(LTC_BASE, in, out, sz, 03296 iv, enc_key, keySize, (byte*)aes->tmp, 03297 (uint32_t*)&aes->left); 03298 } 03299 03300 return 0; 03301 } 03302 03303 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) 03304 /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ 03305 03306 #else 03307 03308 /* Use software based AES counter */ 03309 #define NEED_AES_CTR_SOFT 03310 #endif 03311 03312 #ifdef NEED_AES_CTR_SOFT 03313 /* Increment AES counter */ 03314 static WC_INLINE void IncrementAesCounter(byte* inOutCtr) 03315 { 03316 /* in network byte order so start at end and work back */ 03317 int i; 03318 for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { 03319 if (++inOutCtr[i]) /* we're done unless we overflow */ 03320 return; 03321 } 03322 } 03323 03324 int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 03325 { 03326 byte* tmp; 03327 03328 if (aes == NULL || out == NULL || in == NULL) { 03329 return BAD_FUNC_ARG; 03330 } 03331 03332 /* consume any unused bytes left in aes->tmp */ 03333 tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; 03334 while (aes->left && sz) { 03335 *(out++) = *(in++) ^ *(tmp++); 03336 aes->left--; 03337 sz--; 03338 } 03339 03340 /* do as many block size ops as possible */ 03341 while (sz >= AES_BLOCK_SIZE) { 03342 #ifdef XTRANSFORM_AESCTRBLOCK 03343 XTRANSFORM_AESCTRBLOCK(aes, out, in); 03344 #else 03345 wc_AesEncrypt(aes, (byte*)aes->reg, out); 03346 xorbuf(out, in, AES_BLOCK_SIZE); 03347 #endif 03348 IncrementAesCounter((byte*)aes->reg); 03349 03350 out += AES_BLOCK_SIZE; 03351 in += AES_BLOCK_SIZE; 03352 sz -= AES_BLOCK_SIZE; 03353 aes->left = 0; 03354 } 03355 03356 /* handle non block size remaining and store unused byte count in left */ 03357 if (sz) { 03358 wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); 03359 IncrementAesCounter((byte*)aes->reg); 03360 03361 aes->left = AES_BLOCK_SIZE; 03362 tmp = (byte*)aes->tmp; 03363 03364 while (sz--) { 03365 *(out++) = *(in++) ^ *(tmp++); 03366 aes->left--; 03367 } 03368 } 03369 03370 return 0; 03371 } 03372 03373 #endif /* NEED_AES_CTR_SOFT */ 03374 03375 #endif /* WOLFSSL_AES_COUNTER */ 03376 #endif /* !WOLFSSL_ARMASM */ 03377 03378 03379 /* 03380 * The IV for AES GCM and CCM, stored in struct Aes's member reg, is comprised 03381 * of two parts in order: 03382 * 1. The fixed field which may be 0 or 4 bytes long. In TLS, this is set 03383 * to the implicit IV. 03384 * 2. The explicit IV is generated by wolfCrypt. It needs to be managed 03385 * by wolfCrypt to ensure the IV is unique for each call to encrypt. 03386 * The IV may be a 96-bit random value, or the 32-bit fixed value and a 03387 * 64-bit set of 0 or random data. The final 32-bits of reg is used as a 03388 * block counter during the encryption. 03389 */ 03390 03391 #if (defined(HAVE_AESGCM) && !defined(WC_NO_RNG)) || defined(HAVE_AESCCM) 03392 static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz) 03393 { 03394 int i; 03395 for (i = ctrSz-1; i >= 0; i--) { 03396 if (++ctr[i]) 03397 break; 03398 } 03399 } 03400 #endif /* HAVE_AESGCM || HAVE_AESCCM */ 03401 03402 03403 #ifdef HAVE_AESGCM 03404 03405 #if defined(HAVE_COLDFIRE_SEC) 03406 #error "Coldfire SEC doesn't currently support AES-GCM mode" 03407 03408 #elif defined(WOLFSSL_NRF51_AES) 03409 #error "nRF51 doesn't currently support AES-GCM mode" 03410 03411 #endif 03412 03413 #ifdef WOLFSSL_ARMASM 03414 /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */ 03415 #else /* software + AESNI implementation */ 03416 03417 #if !defined(FREESCALE_LTC_AES_GCM) 03418 static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) 03419 { 03420 int i; 03421 03422 /* in network byte order so start at end and work back */ 03423 for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) { 03424 if (++inOutCtr[i]) /* we're done unless we overflow */ 03425 return; 03426 } 03427 } 03428 #endif /* !FREESCALE_LTC_AES_GCM */ 03429 03430 #if defined(GCM_SMALL) || defined(GCM_TABLE) 03431 03432 static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) 03433 { 03434 /* Multiply the sz by 8 */ 03435 word32 szHi = (sz >> (8*sizeof(sz) - 3)); 03436 sz <<= 3; 03437 03438 /* copy over the words of the sz into the destination buffer */ 03439 buf[0] = (szHi >> 24) & 0xff; 03440 buf[1] = (szHi >> 16) & 0xff; 03441 buf[2] = (szHi >> 8) & 0xff; 03442 buf[3] = szHi & 0xff; 03443 buf[4] = (sz >> 24) & 0xff; 03444 buf[5] = (sz >> 16) & 0xff; 03445 buf[6] = (sz >> 8) & 0xff; 03446 buf[7] = sz & 0xff; 03447 } 03448 03449 03450 static WC_INLINE void RIGHTSHIFTX(byte* x) 03451 { 03452 int i; 03453 int carryOut = 0; 03454 int carryIn = 0; 03455 int borrow = x[15] & 0x01; 03456 03457 for (i = 0; i < AES_BLOCK_SIZE; i++) { 03458 carryOut = x[i] & 0x01; 03459 x[i] = (x[i] >> 1) | (carryIn ? 0x80 : 0); 03460 carryIn = carryOut; 03461 } 03462 if (borrow) x[0] ^= 0xE1; 03463 } 03464 03465 #endif /* defined(GCM_SMALL) || defined(GCM_TABLE) */ 03466 03467 03468 #ifdef GCM_TABLE 03469 03470 static void GenerateM0(Aes* aes) 03471 { 03472 int i, j; 03473 byte (*m)[AES_BLOCK_SIZE] = aes->M0; 03474 03475 XMEMCPY(m[128], aes->H, AES_BLOCK_SIZE); 03476 03477 for (i = 64; i > 0; i /= 2) { 03478 XMEMCPY(m[i], m[i*2], AES_BLOCK_SIZE); 03479 RIGHTSHIFTX(m[i]); 03480 } 03481 03482 for (i = 2; i < 256; i *= 2) { 03483 for (j = 1; j < i; j++) { 03484 XMEMCPY(m[i+j], m[i], AES_BLOCK_SIZE); 03485 xorbuf(m[i+j], m[j], AES_BLOCK_SIZE); 03486 } 03487 } 03488 03489 XMEMSET(m[0], 0, AES_BLOCK_SIZE); 03490 } 03491 03492 #endif /* GCM_TABLE */ 03493 03494 03495 int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) 03496 { 03497 int ret; 03498 byte iv[AES_BLOCK_SIZE]; 03499 03500 #ifdef WOLFSSL_IMX6_CAAM_BLOB 03501 byte local[32]; 03502 word32 localSz = 32; 03503 03504 if (len == (16 + WC_CAAM_BLOB_SZ) || 03505 len == (24 + WC_CAAM_BLOB_SZ) || 03506 len == (32 + WC_CAAM_BLOB_SZ)) { 03507 if (wc_caamOpenBlob((byte*)key, len, local, &localSz) != 0) { 03508 return BAD_FUNC_ARG; 03509 } 03510 03511 /* set local values */ 03512 key = local; 03513 len = localSz; 03514 } 03515 #endif 03516 03517 if (!((len == 16) || (len == 24) || (len == 32))) 03518 return BAD_FUNC_ARG; 03519 03520 XMEMSET(iv, 0, AES_BLOCK_SIZE); 03521 ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); 03522 03523 #ifdef WOLFSSL_AESNI 03524 /* AES-NI code generates its own H value. */ 03525 if (haveAESNI) 03526 return ret; 03527 #endif /* WOLFSSL_AESNI */ 03528 03529 #if !defined(FREESCALE_LTC_AES_GCM) 03530 if (ret == 0) { 03531 wc_AesEncrypt(aes, iv, aes->H); 03532 #ifdef GCM_TABLE 03533 GenerateM0(aes); 03534 #endif /* GCM_TABLE */ 03535 } 03536 #endif /* FREESCALE_LTC_AES_GCM */ 03537 03538 #if defined(WOLFSSL_XILINX_CRYPT) 03539 wc_AesGcmSetKey_ex(aes, key, len, XSECURE_CSU_AES_KEY_SRC_KUP); 03540 #endif 03541 03542 #ifdef WOLFSSL_IMX6_CAAM_BLOB 03543 ForceZero(local, sizeof(local)); 03544 #endif 03545 03546 return ret; 03547 } 03548 03549 03550 #ifdef WOLFSSL_AESNI 03551 03552 #if defined(USE_INTEL_SPEEDUP) 03553 #define HAVE_INTEL_AVX1 03554 #define HAVE_INTEL_AVX2 03555 #endif /* USE_INTEL_SPEEDUP */ 03556 03557 #ifdef _MSC_VER 03558 #define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF)) 03559 #define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \ 03560 S((x),3), S((x),2), S((x),1), S((x),0), \ 03561 S((y),7), S((y),6), S((y),5), S((y),4), \ 03562 S((y),3), S((y),2), S((y),1), S((y),0) } 03563 #else 03564 #define M128_INIT(x,y) { (x), (y) } 03565 #endif 03566 03567 static const __m128i MOD2_128 = M128_INIT(0x1, 03568 (long long int)0xc200000000000000UL); 03569 03570 03571 /* See Intel® Carry-Less Multiplication Instruction 03572 * and its Usage for Computing the GCM Mode White Paper 03573 * by Shay Gueron, Intel Mobility Group, Israel Development Center; 03574 * and Michael E. Kounavis, Intel Labs, Circuits and Systems Research */ 03575 03576 03577 /* Figure 9. AES-GCM – Encrypt With Single Block Ghash at a Time */ 03578 03579 static const __m128i ONE = M128_INIT(0x0, 0x1); 03580 #ifndef AES_GCM_AESNI_NO_UNROLL 03581 static const __m128i TWO = M128_INIT(0x0, 0x2); 03582 static const __m128i THREE = M128_INIT(0x0, 0x3); 03583 static const __m128i FOUR = M128_INIT(0x0, 0x4); 03584 static const __m128i FIVE = M128_INIT(0x0, 0x5); 03585 static const __m128i SIX = M128_INIT(0x0, 0x6); 03586 static const __m128i SEVEN = M128_INIT(0x0, 0x7); 03587 static const __m128i EIGHT = M128_INIT(0x0, 0x8); 03588 #endif 03589 static const __m128i BSWAP_EPI64 = M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f); 03590 static const __m128i BSWAP_MASK = M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607); 03591 03592 03593 #ifndef _MSC_VER 03594 03595 #define _VAR(a) "" #a "" 03596 #define VAR(a) _VAR(a) 03597 03598 #define HR %%xmm14 03599 #define XR %%xmm15 03600 #define KR %%ebx 03601 #define KR64 %%rbx 03602 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 03603 #define CTR1 128(%%rsp) 03604 #define TR 144(%%rsp) 03605 #define HTR %%rsp 03606 #define STACK_OFFSET 160 03607 #else 03608 #define CTR1 (%%rsp) 03609 #define TR 16(%%rsp) 03610 #define STACK_OFFSET 32 03611 #endif 03612 03613 #define AESENC() \ 03614 "aesenc %%xmm12, %%xmm4\n\t" \ 03615 "aesenc %%xmm12, %%xmm5\n\t" \ 03616 "aesenc %%xmm12, %%xmm6\n\t" \ 03617 "aesenc %%xmm12, %%xmm7\n\t" \ 03618 "aesenc %%xmm12, %%xmm8\n\t" \ 03619 "aesenc %%xmm12, %%xmm9\n\t" \ 03620 "aesenc %%xmm12, %%xmm10\n\t" \ 03621 "aesenc %%xmm12, %%xmm11\n\t" 03622 03623 #define AESENC_SET(o) \ 03624 "movdqa " #o "(%[KEY]), %%xmm12\n\t" \ 03625 AESENC() 03626 03627 #define AESENC_CTR() \ 03628 "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ 03629 "movdqa %[BSWAP_EPI64], %%xmm1\n\t" \ 03630 "movdqu %%xmm4, %%xmm0\n\t" \ 03631 "pshufb %%xmm1, %%xmm4\n\t" \ 03632 "movdqa %%xmm0, %%xmm5\n\t" \ 03633 "paddd %[ONE], %%xmm5\n\t" \ 03634 "pshufb %%xmm1, %%xmm5\n\t" \ 03635 "movdqa %%xmm0, %%xmm6\n\t" \ 03636 "paddd %[TWO], %%xmm6\n\t" \ 03637 "pshufb %%xmm1, %%xmm6\n\t" \ 03638 "movdqa %%xmm0, %%xmm7\n\t" \ 03639 "paddd %[THREE], %%xmm7\n\t" \ 03640 "pshufb %%xmm1, %%xmm7\n\t" \ 03641 "movdqa %%xmm0, %%xmm8\n\t" \ 03642 "paddd %[FOUR], %%xmm8\n\t" \ 03643 "pshufb %%xmm1, %%xmm8\n\t" \ 03644 "movdqa %%xmm0, %%xmm9\n\t" \ 03645 "paddd %[FIVE], %%xmm9\n\t" \ 03646 "pshufb %%xmm1, %%xmm9\n\t" \ 03647 "movdqa %%xmm0, %%xmm10\n\t" \ 03648 "paddd %[SIX], %%xmm10\n\t" \ 03649 "pshufb %%xmm1, %%xmm10\n\t" \ 03650 "movdqa %%xmm0, %%xmm11\n\t" \ 03651 "paddd %[SEVEN], %%xmm11\n\t" \ 03652 "pshufb %%xmm1, %%xmm11\n\t" \ 03653 "paddd %[EIGHT], %%xmm0\n\t" 03654 03655 #define AESENC_XOR() \ 03656 "movdqa (%[KEY]), %%xmm12\n\t" \ 03657 "movdqu %%xmm0, " VAR(CTR1) "\n\t" \ 03658 "pxor %%xmm12, %%xmm4\n\t" \ 03659 "pxor %%xmm12, %%xmm5\n\t" \ 03660 "pxor %%xmm12, %%xmm6\n\t" \ 03661 "pxor %%xmm12, %%xmm7\n\t" \ 03662 "pxor %%xmm12, %%xmm8\n\t" \ 03663 "pxor %%xmm12, %%xmm9\n\t" \ 03664 "pxor %%xmm12, %%xmm10\n\t" \ 03665 "pxor %%xmm12, %%xmm11\n\t" 03666 03667 /* Encrypt and carry-less multiply for AVX1. */ 03668 #define AESENC_PCLMUL_1(src, o1, o2, o3) \ 03669 "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ 03670 "movdqu " #o2 "(" #src "), %%xmm0\n\t" \ 03671 "aesenc " #o1 "(%[KEY]), %%xmm4\n\t" \ 03672 "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ 03673 "pxor %%xmm2, %%xmm0\n\t" \ 03674 "pshufd $0x4e, %%xmm12, %%xmm1\n\t" \ 03675 "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ 03676 "pxor %%xmm12, %%xmm1\n\t" \ 03677 "pxor %%xmm0, %%xmm14\n\t" \ 03678 "movdqa %%xmm0, %%xmm3\n\t" \ 03679 "pclmulqdq $0x11, %%xmm12, %%xmm3\n\t" \ 03680 "aesenc " #o1 "(%[KEY]), %%xmm5\n\t" \ 03681 "aesenc " #o1 "(%[KEY]), %%xmm6\n\t" \ 03682 "movdqa %%xmm0, %%xmm2\n\t" \ 03683 "pclmulqdq $0x00, %%xmm12, %%xmm2\n\t" \ 03684 "aesenc " #o1 "(%[KEY]), %%xmm7\n\t" \ 03685 "aesenc " #o1 "(%[KEY]), %%xmm8\n\t" \ 03686 "pclmulqdq $0x00, %%xmm14, %%xmm1\n\t" \ 03687 "aesenc " #o1 "(%[KEY]), %%xmm9\n\t" \ 03688 "aesenc " #o1 "(%[KEY]), %%xmm10\n\t" \ 03689 "aesenc " #o1 "(%[KEY]), %%xmm11\n\t" \ 03690 "pxor %%xmm2, %%xmm1\n\t" \ 03691 "pxor %%xmm3, %%xmm1\n\t" \ 03692 03693 #define AESENC_PCLMUL_N(src, o1, o2, o3) \ 03694 "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ 03695 "movdqu " #o2 "(" #src" ), %%xmm0\n\t" \ 03696 "pshufd $0x4e, %%xmm12, %%xmm13\n\t" \ 03697 "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ 03698 "aesenc " #o1 "(%[KEY]), %%xmm4\n\t" \ 03699 "pxor %%xmm12, %%xmm13\n\t" \ 03700 "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ 03701 "pxor %%xmm0, %%xmm14\n\t" \ 03702 "movdqa %%xmm0, %%xmm15\n\t" \ 03703 "pclmulqdq $0x11, %%xmm12, %%xmm15\n\t" \ 03704 "aesenc " #o1 "(%[KEY]), %%xmm5\n\t" \ 03705 "aesenc " #o1 "(%[KEY]), %%xmm6\n\t" \ 03706 "pclmulqdq $0x00, %%xmm0, %%xmm12\n\t" \ 03707 "aesenc " #o1 "(%[KEY]), %%xmm7\n\t" \ 03708 "aesenc " #o1 "(%[KEY]), %%xmm8\n\t" \ 03709 "pclmulqdq $0x00, %%xmm14, %%xmm13\n\t" \ 03710 "aesenc " #o1 "(%[KEY]), %%xmm9\n\t" \ 03711 "aesenc " #o1 "(%[KEY]), %%xmm10\n\t" \ 03712 "aesenc " #o1 "(%[KEY]), %%xmm11\n\t" \ 03713 "pxor %%xmm12, %%xmm1\n\t" \ 03714 "pxor %%xmm12, %%xmm2\n\t" \ 03715 "pxor %%xmm15, %%xmm1\n\t" \ 03716 "pxor %%xmm15, %%xmm3\n\t" \ 03717 "pxor %%xmm13, %%xmm1\n\t" \ 03718 03719 #define AESENC_PCLMUL_L(o) \ 03720 "movdqa %%xmm1, %%xmm14\n\t" \ 03721 "psrldq $8, %%xmm1\n\t" \ 03722 "pslldq $8, %%xmm14\n\t" \ 03723 "aesenc " #o "(%[KEY]), %%xmm4\n\t" \ 03724 "pxor %%xmm14, %%xmm2\n\t" \ 03725 "pxor %%xmm1, %%xmm3\n\t" \ 03726 "movdqa %%xmm2, %%xmm12\n\t" \ 03727 "movdqa %%xmm2, %%xmm13\n\t" \ 03728 "movdqa %%xmm2, %%xmm14\n\t" \ 03729 "aesenc " #o "(%[KEY]), %%xmm5\n\t" \ 03730 "pslld $31, %%xmm12\n\t" \ 03731 "pslld $30, %%xmm13\n\t" \ 03732 "pslld $25, %%xmm14\n\t" \ 03733 "aesenc " #o "(%[KEY]), %%xmm6\n\t" \ 03734 "pxor %%xmm13, %%xmm12\n\t" \ 03735 "pxor %%xmm14, %%xmm12\n\t" \ 03736 "aesenc " #o "(%[KEY]), %%xmm7\n\t" \ 03737 "movdqa %%xmm12, %%xmm13\n\t" \ 03738 "pslldq $12, %%xmm12\n\t" \ 03739 "psrldq $4, %%xmm13\n\t" \ 03740 "aesenc " #o "(%[KEY]), %%xmm8\n\t" \ 03741 "pxor %%xmm12, %%xmm2\n\t" \ 03742 "movdqa %%xmm2, %%xmm14\n\t" \ 03743 "movdqa %%xmm2, %%xmm1\n\t" \ 03744 "movdqa %%xmm2, %%xmm0\n\t" \ 03745 "aesenc " #o "(%[KEY]), %%xmm9\n\t" \ 03746 "psrld $1, %%xmm14\n\t" \ 03747 "psrld $2, %%xmm1\n\t" \ 03748 "psrld $7, %%xmm0\n\t" \ 03749 "aesenc " #o "(%[KEY]), %%xmm10\n\t" \ 03750 "pxor %%xmm1, %%xmm14\n\t" \ 03751 "pxor %%xmm0, %%xmm14\n\t" \ 03752 "aesenc " #o "(%[KEY]), %%xmm11\n\t" \ 03753 "pxor %%xmm13, %%xmm14\n\t" \ 03754 "pxor %%xmm14, %%xmm2\n\t" \ 03755 "pxor %%xmm3, %%xmm2\n\t" \ 03756 03757 /* Encrypt and carry-less multiply with last key. */ 03758 #define AESENC_LAST(in, out) \ 03759 "aesenclast %%xmm12, %%xmm4\n\t" \ 03760 "aesenclast %%xmm12, %%xmm5\n\t" \ 03761 "movdqu (" #in "),%%xmm0\n\t" \ 03762 "movdqu 16(" #in "),%%xmm1\n\t" \ 03763 "pxor %%xmm0, %%xmm4\n\t" \ 03764 "pxor %%xmm1, %%xmm5\n\t" \ 03765 "movdqu %%xmm4, (" #out ")\n\t" \ 03766 "movdqu %%xmm5, 16(" #out ")\n\t" \ 03767 "aesenclast %%xmm12, %%xmm6\n\t" \ 03768 "aesenclast %%xmm12, %%xmm7\n\t" \ 03769 "movdqu 32(" #in "),%%xmm0\n\t" \ 03770 "movdqu 48(" #in "),%%xmm1\n\t" \ 03771 "pxor %%xmm0, %%xmm6\n\t" \ 03772 "pxor %%xmm1, %%xmm7\n\t" \ 03773 "movdqu %%xmm6, 32(" #out ")\n\t" \ 03774 "movdqu %%xmm7, 48(" #out ")\n\t" \ 03775 "aesenclast %%xmm12, %%xmm8\n\t" \ 03776 "aesenclast %%xmm12, %%xmm9\n\t" \ 03777 "movdqu 64(" #in "),%%xmm0\n\t" \ 03778 "movdqu 80(" #in "),%%xmm1\n\t" \ 03779 "pxor %%xmm0, %%xmm8\n\t" \ 03780 "pxor %%xmm1, %%xmm9\n\t" \ 03781 "movdqu %%xmm8, 64(" #out ")\n\t" \ 03782 "movdqu %%xmm9, 80(" #out ")\n\t" \ 03783 "aesenclast %%xmm12, %%xmm10\n\t" \ 03784 "aesenclast %%xmm12, %%xmm11\n\t" \ 03785 "movdqu 96(" #in "),%%xmm0\n\t" \ 03786 "movdqu 112(" #in "),%%xmm1\n\t" \ 03787 "pxor %%xmm0, %%xmm10\n\t" \ 03788 "pxor %%xmm1, %%xmm11\n\t" \ 03789 "movdqu %%xmm10, 96(" #out ")\n\t" \ 03790 "movdqu %%xmm11, 112(" #out ")\n\t" 03791 03792 #define _AESENC_AVX(r) \ 03793 "aesenc 16(%[KEY]), " #r "\n\t" \ 03794 "aesenc 32(%[KEY]), " #r "\n\t" \ 03795 "aesenc 48(%[KEY]), " #r "\n\t" \ 03796 "aesenc 64(%[KEY]), " #r "\n\t" \ 03797 "aesenc 80(%[KEY]), " #r "\n\t" \ 03798 "aesenc 96(%[KEY]), " #r "\n\t" \ 03799 "aesenc 112(%[KEY]), " #r "\n\t" \ 03800 "aesenc 128(%[KEY]), " #r "\n\t" \ 03801 "aesenc 144(%[KEY]), " #r "\n\t" \ 03802 "cmpl $11, %[nr]\n\t" \ 03803 "movdqa 160(%[KEY]), %%xmm5\n\t" \ 03804 "jl %=f\n\t" \ 03805 "aesenc %%xmm5, " #r "\n\t" \ 03806 "aesenc 176(%[KEY]), " #r "\n\t" \ 03807 "cmpl $13, %[nr]\n\t" \ 03808 "movdqa 192(%[KEY]), %%xmm5\n\t" \ 03809 "jl %=f\n\t" \ 03810 "aesenc %%xmm5, " #r "\n\t" \ 03811 "aesenc 208(%[KEY]), " #r "\n\t" \ 03812 "movdqa 224(%[KEY]), %%xmm5\n\t" \ 03813 "%=:\n\t" \ 03814 "aesenclast %%xmm5, " #r "\n\t" 03815 #define AESENC_AVX(r) \ 03816 _AESENC_AVX(r) 03817 03818 #define AESENC_BLOCK(in, out) \ 03819 "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ 03820 "movdqu %%xmm4, %%xmm5\n\t" \ 03821 "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \ 03822 "paddd %[ONE], %%xmm5\n\t" \ 03823 "pxor (%[KEY]), %%xmm4\n\t" \ 03824 "movdqu %%xmm5, " VAR(CTR1) "\n\t" \ 03825 AESENC_AVX(%%xmm4) \ 03826 "movdqu (" #in "), %%xmm5\n\t" \ 03827 "pxor %%xmm5, %%xmm4\n\t" \ 03828 "movdqu %%xmm4, (" #out ")\n\t" \ 03829 "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ 03830 "pxor %%xmm4, " VAR(XR) "\n\t" 03831 03832 #define _AESENC_GFMUL(in, out, H, X) \ 03833 "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ 03834 "movdqu %%xmm4, %%xmm5\n\t" \ 03835 "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \ 03836 "paddd %[ONE], %%xmm5\n\t" \ 03837 "pxor (%[KEY]), %%xmm4\n\t" \ 03838 "movdqu %%xmm5, " VAR(CTR1) "\n\t" \ 03839 "movdqa " #X ", %%xmm6\n\t" \ 03840 "pclmulqdq $0x10, " #H ", %%xmm6\n\t" \ 03841 "aesenc 16(%[KEY]), %%xmm4\n\t" \ 03842 "aesenc 32(%[KEY]), %%xmm4\n\t" \ 03843 "movdqa " #X ", %%xmm7\n\t" \ 03844 "pclmulqdq $0x01, " #H ", %%xmm7\n\t" \ 03845 "aesenc 48(%[KEY]), %%xmm4\n\t" \ 03846 "aesenc 64(%[KEY]), %%xmm4\n\t" \ 03847 "movdqa " #X ", %%xmm8\n\t" \ 03848 "pclmulqdq $0x00, " #H ", %%xmm8\n\t" \ 03849 "aesenc 80(%[KEY]), %%xmm4\n\t" \ 03850 "movdqa " #X ", %%xmm1\n\t" \ 03851 "pclmulqdq $0x11, " #H ", %%xmm1\n\t" \ 03852 "aesenc 96(%[KEY]), %%xmm4\n\t" \ 03853 "pxor %%xmm7, %%xmm6\n\t" \ 03854 "movdqa %%xmm6, %%xmm2\n\t" \ 03855 "psrldq $8, %%xmm6\n\t" \ 03856 "pslldq $8, %%xmm2\n\t" \ 03857 "aesenc 112(%[KEY]), %%xmm4\n\t" \ 03858 "movdqa %%xmm1, %%xmm3\n\t" \ 03859 "pxor %%xmm8, %%xmm2\n\t" \ 03860 "pxor %%xmm6, %%xmm3\n\t" \ 03861 "movdqa %[MOD2_128], %%xmm0\n\t" \ 03862 "movdqa %%xmm2, %%xmm7\n\t" \ 03863 "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ 03864 "aesenc 128(%[KEY]), %%xmm4\n\t" \ 03865 "pshufd $0x4e, %%xmm2, %%xmm6\n\t" \ 03866 "pxor %%xmm7, %%xmm6\n\t" \ 03867 "movdqa %%xmm6, %%xmm7\n\t" \ 03868 "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ 03869 "aesenc 144(%[KEY]), %%xmm4\n\t" \ 03870 "pshufd $0x4e, %%xmm6, " VAR(XR) "\n\t" \ 03871 "pxor %%xmm7, " VAR(XR) "\n\t" \ 03872 "pxor %%xmm3, " VAR(XR) "\n\t" \ 03873 "cmpl $11, %[nr]\n\t" \ 03874 "movdqu 160(%[KEY]), %%xmm5\n\t" \ 03875 "jl %=f\n\t" \ 03876 "aesenc %%xmm5, %%xmm4\n\t" \ 03877 "aesenc 176(%[KEY]), %%xmm4\n\t" \ 03878 "cmpl $13, %[nr]\n\t" \ 03879 "movdqu 192(%[KEY]), %%xmm5\n\t" \ 03880 "jl %=f\n\t" \ 03881 "aesenc %%xmm5, %%xmm4\n\t" \ 03882 "aesenc 208(%[KEY]), %%xmm4\n\t" \ 03883 "movdqa 224(%[KEY]), %%xmm5\n\t" \ 03884 "%=:\n\t" \ 03885 "aesenclast %%xmm5, %%xmm4\n\t" \ 03886 "movdqu (" #in "), %%xmm5\n\t" \ 03887 "pxor %%xmm5, %%xmm4\n\t" \ 03888 "movdqu %%xmm4, (" #out ")\n\t" 03889 #define AESENC_GFMUL(in, out, H, X) \ 03890 _AESENC_GFMUL(in, out, H, X) 03891 03892 #define _GHASH_GFMUL_AVX(r, r2, a, b) \ 03893 "pshufd $0x4e, "#a", %%xmm1\n\t" \ 03894 "pshufd $0x4e, "#b", %%xmm2\n\t" \ 03895 "movdqa "#b", %%xmm3\n\t" \ 03896 "movdqa "#b", %%xmm0\n\t" \ 03897 "pclmulqdq $0x11, "#a", %%xmm3\n\t" \ 03898 "pclmulqdq $0x00, "#a", %%xmm0\n\t" \ 03899 "pxor "#a", %%xmm1\n\t" \ 03900 "pxor "#b", %%xmm2\n\t" \ 03901 "pclmulqdq $0x00, %%xmm2, %%xmm1\n\t" \ 03902 "pxor %%xmm0, %%xmm1\n\t" \ 03903 "pxor %%xmm3, %%xmm1\n\t" \ 03904 "movdqa %%xmm1, %%xmm2\n\t" \ 03905 "movdqa %%xmm0, "#r2"\n\t" \ 03906 "movdqa %%xmm3, " #r "\n\t" \ 03907 "pslldq $8, %%xmm2\n\t" \ 03908 "psrldq $8, %%xmm1\n\t" \ 03909 "pxor %%xmm2, "#r2"\n\t" \ 03910 "pxor %%xmm1, " #r "\n\t" 03911 #define GHASH_GFMUL_AVX(r, r2, a, b) \ 03912 _GHASH_GFMUL_AVX(r, r2, a, b) 03913 03914 #define _GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ 03915 "pshufd $0x4e, "#a", %%xmm1\n\t" \ 03916 "pshufd $0x4e, "#b", %%xmm2\n\t" \ 03917 "movdqa "#b", %%xmm3\n\t" \ 03918 "movdqa "#b", %%xmm0\n\t" \ 03919 "pclmulqdq $0x11, "#a", %%xmm3\n\t" \ 03920 "pclmulqdq $0x00, "#a", %%xmm0\n\t" \ 03921 "pxor "#a", %%xmm1\n\t" \ 03922 "pxor "#b", %%xmm2\n\t" \ 03923 "pclmulqdq $0x00, %%xmm2, %%xmm1\n\t" \ 03924 "pxor %%xmm0, %%xmm1\n\t" \ 03925 "pxor %%xmm3, %%xmm1\n\t" \ 03926 "movdqa %%xmm1, %%xmm2\n\t" \ 03927 "pxor %%xmm0, "#r2"\n\t" \ 03928 "pxor %%xmm3, " #r "\n\t" \ 03929 "pslldq $8, %%xmm2\n\t" \ 03930 "psrldq $8, %%xmm1\n\t" \ 03931 "pxor %%xmm2, "#r2"\n\t" \ 03932 "pxor %%xmm1, " #r "\n\t" 03933 #define GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ 03934 _GHASH_GFMUL_XOR_AVX(r, r2, a, b) 03935 03936 #define GHASH_MID_AVX(r, r2) \ 03937 "movdqa "#r2", %%xmm0\n\t" \ 03938 "movdqa " #r ", %%xmm1\n\t" \ 03939 "psrld $31, %%xmm0\n\t" \ 03940 "psrld $31, %%xmm1\n\t" \ 03941 "pslld $1, "#r2"\n\t" \ 03942 "pslld $1, " #r "\n\t" \ 03943 "movdqa %%xmm0, %%xmm2\n\t" \ 03944 "pslldq $4, %%xmm0\n\t" \ 03945 "psrldq $12, %%xmm2\n\t" \ 03946 "pslldq $4, %%xmm1\n\t" \ 03947 "por %%xmm2, " #r "\n\t" \ 03948 "por %%xmm0, "#r2"\n\t" \ 03949 "por %%xmm1, " #r "\n\t" 03950 03951 #define _GHASH_GFMUL_RED_AVX(r, a, b) \ 03952 "pshufd $0x4e, "#a", %%xmm5\n\t" \ 03953 "pshufd $0x4e, "#b", %%xmm6\n\t" \ 03954 "movdqa "#b", %%xmm7\n\t" \ 03955 "movdqa "#b", %%xmm4\n\t" \ 03956 "pclmulqdq $0x11, "#a", %%xmm7\n\t" \ 03957 "pclmulqdq $0x00, "#a", %%xmm4\n\t" \ 03958 "pxor "#a", %%xmm5\n\t" \ 03959 "pxor "#b", %%xmm6\n\t" \ 03960 "pclmulqdq $0x00, %%xmm6, %%xmm5\n\t" \ 03961 "pxor %%xmm4, %%xmm5\n\t" \ 03962 "pxor %%xmm7, %%xmm5\n\t" \ 03963 "movdqa %%xmm5, %%xmm6\n\t" \ 03964 "movdqa %%xmm7, " #r "\n\t" \ 03965 "pslldq $8, %%xmm6\n\t" \ 03966 "psrldq $8, %%xmm5\n\t" \ 03967 "pxor %%xmm6, %%xmm4\n\t" \ 03968 "pxor %%xmm5, " #r "\n\t" \ 03969 "movdqa %%xmm4, %%xmm8\n\t" \ 03970 "movdqa %%xmm4, %%xmm9\n\t" \ 03971 "movdqa %%xmm4, %%xmm10\n\t" \ 03972 "pslld $31, %%xmm8\n\t" \ 03973 "pslld $30, %%xmm9\n\t" \ 03974 "pslld $25, %%xmm10\n\t" \ 03975 "pxor %%xmm9, %%xmm8\n\t" \ 03976 "pxor %%xmm10, %%xmm8\n\t" \ 03977 "movdqa %%xmm8, %%xmm9\n\t" \ 03978 "psrldq $4, %%xmm9\n\t" \ 03979 "pslldq $12, %%xmm8\n\t" \ 03980 "pxor %%xmm8, %%xmm4\n\t" \ 03981 "movdqa %%xmm4, %%xmm10\n\t" \ 03982 "movdqa %%xmm4, %%xmm6\n\t" \ 03983 "movdqa %%xmm4, %%xmm5\n\t" \ 03984 "psrld $1, %%xmm10\n\t" \ 03985 "psrld $2, %%xmm6\n\t" \ 03986 "psrld $7, %%xmm5\n\t" \ 03987 "pxor %%xmm6, %%xmm10\n\t" \ 03988 "pxor %%xmm5, %%xmm10\n\t" \ 03989 "pxor %%xmm9, %%xmm10\n\t" \ 03990 "pxor %%xmm4, %%xmm10\n\t" \ 03991 "pxor %%xmm10, " #r "\n\t" 03992 #define GHASH_GFMUL_RED_AVX(r, a, b) \ 03993 _GHASH_GFMUL_RED_AVX(r, a, b) 03994 03995 #define GHASH_RED_AVX(r, r2) \ 03996 "movdqa "#r2", %%xmm0\n\t" \ 03997 "movdqa "#r2", %%xmm1\n\t" \ 03998 "movdqa "#r2", %%xmm2\n\t" \ 03999 "pslld $31, %%xmm0\n\t" \ 04000 "pslld $30, %%xmm1\n\t" \ 04001 "pslld $25, %%xmm2\n\t" \ 04002 "pxor %%xmm1, %%xmm0\n\t" \ 04003 "pxor %%xmm2, %%xmm0\n\t" \ 04004 "movdqa %%xmm0, %%xmm1\n\t" \ 04005 "psrldq $4, %%xmm1\n\t" \ 04006 "pslldq $12, %%xmm0\n\t" \ 04007 "pxor %%xmm0, "#r2"\n\t" \ 04008 "movdqa "#r2", %%xmm2\n\t" \ 04009 "movdqa "#r2", %%xmm3\n\t" \ 04010 "movdqa "#r2", %%xmm0\n\t" \ 04011 "psrld $1, %%xmm2\n\t" \ 04012 "psrld $2, %%xmm3\n\t" \ 04013 "psrld $7, %%xmm0\n\t" \ 04014 "pxor %%xmm3, %%xmm2\n\t" \ 04015 "pxor %%xmm0, %%xmm2\n\t" \ 04016 "pxor %%xmm1, %%xmm2\n\t" \ 04017 "pxor "#r2", %%xmm2\n\t" \ 04018 "pxor %%xmm2, " #r "\n\t" 04019 04020 #define GHASH_GFMUL_RED_XOR_AVX(r, r2, a, b) \ 04021 GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ 04022 GHASH_RED_AVX(r, r2) 04023 04024 #define GHASH_FULL_AVX(r, r2, a, b) \ 04025 GHASH_GFMUL_AVX(r, r2, a, b) \ 04026 GHASH_MID_AVX(r, r2) \ 04027 GHASH_RED_AVX(r, r2) 04028 04029 #define CALC_IV_12() \ 04030 "# Calculate values when IV is 12 bytes\n\t" \ 04031 "# Set counter based on IV\n\t" \ 04032 "movl $0x01000000, %%ecx\n\t" \ 04033 "pinsrq $0, 0(%%rax), %%xmm13\n\t" \ 04034 "pinsrd $2, 8(%%rax), %%xmm13\n\t" \ 04035 "pinsrd $3, %%ecx, %%xmm13\n\t" \ 04036 "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ 04037 "movdqu %%xmm13, %%xmm1\n\t" \ 04038 "movdqa 0(%[KEY]), " VAR(HR) "\n\t" \ 04039 "pxor " VAR(HR) ", %%xmm1\n\t" \ 04040 "movdqa 16(%[KEY]), %%xmm12\n\t" \ 04041 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04042 "aesenc %%xmm12, %%xmm1\n\t" \ 04043 "movdqa 32(%[KEY]), %%xmm12\n\t" \ 04044 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04045 "aesenc %%xmm12, %%xmm1\n\t" \ 04046 "movdqa 48(%[KEY]), %%xmm12\n\t" \ 04047 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04048 "aesenc %%xmm12, %%xmm1\n\t" \ 04049 "movdqa 64(%[KEY]), %%xmm12\n\t" \ 04050 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04051 "aesenc %%xmm12, %%xmm1\n\t" \ 04052 "movdqa 80(%[KEY]), %%xmm12\n\t" \ 04053 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04054 "aesenc %%xmm12, %%xmm1\n\t" \ 04055 "movdqa 96(%[KEY]), %%xmm12\n\t" \ 04056 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04057 "aesenc %%xmm12, %%xmm1\n\t" \ 04058 "movdqa 112(%[KEY]), %%xmm12\n\t" \ 04059 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04060 "aesenc %%xmm12, %%xmm1\n\t" \ 04061 "movdqa 128(%[KEY]), %%xmm12\n\t" \ 04062 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04063 "aesenc %%xmm12, %%xmm1\n\t" \ 04064 "movdqa 144(%[KEY]), %%xmm12\n\t" \ 04065 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04066 "aesenc %%xmm12, %%xmm1\n\t" \ 04067 "cmpl $11, %[nr]\n\t" \ 04068 "movdqa 160(%[KEY]), %%xmm12\n\t" \ 04069 "jl 31f\n\t" \ 04070 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04071 "aesenc %%xmm12, %%xmm1\n\t" \ 04072 "movdqa 176(%[KEY]), %%xmm12\n\t" \ 04073 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04074 "aesenc %%xmm12, %%xmm1\n\t" \ 04075 "cmpl $13, %[nr]\n\t" \ 04076 "movdqa 192(%[KEY]), %%xmm12\n\t" \ 04077 "jl 31f\n\t" \ 04078 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04079 "aesenc %%xmm12, %%xmm1\n\t" \ 04080 "movdqu 208(%[KEY]), %%xmm12\n\t" \ 04081 "aesenc %%xmm12, " VAR(HR) "\n\t" \ 04082 "aesenc %%xmm12, %%xmm1\n\t" \ 04083 "movdqu 224(%[KEY]), %%xmm12\n\t" \ 04084 "31:\n\t" \ 04085 "aesenclast %%xmm12, " VAR(HR) "\n\t" \ 04086 "aesenclast %%xmm12, %%xmm1\n\t" \ 04087 "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t" \ 04088 "movdqu %%xmm1, " VAR(TR) "\n\t" \ 04089 "jmp 39f\n\t" 04090 04091 #define CALC_IV() \ 04092 "# Calculate values when IV is not 12 bytes\n\t" \ 04093 "# H = Encrypt X(=0)\n\t" \ 04094 "movdqa 0(%[KEY]), " VAR(HR) "\n\t" \ 04095 AESENC_AVX(HR) \ 04096 "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t" \ 04097 "# Calc counter\n\t" \ 04098 "# Initialization vector\n\t" \ 04099 "cmpl $0, %%edx\n\t" \ 04100 "movq $0, %%rcx\n\t" \ 04101 "je 45f\n\t" \ 04102 "cmpl $16, %%edx\n\t" \ 04103 "jl 44f\n\t" \ 04104 "andl $0xfffffff0, %%edx\n\t" \ 04105 "\n" \ 04106 "43:\n\t" \ 04107 "movdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ 04108 "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ 04109 "pxor %%xmm4, %%xmm13\n\t" \ 04110 GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR) \ 04111 "addl $16, %%ecx\n\t" \ 04112 "cmpl %%edx, %%ecx\n\t" \ 04113 "jl 43b\n\t" \ 04114 "movl %[ibytes], %%edx\n\t" \ 04115 "cmpl %%edx, %%ecx\n\t" \ 04116 "je 45f\n\t" \ 04117 "\n" \ 04118 "44:\n\t" \ 04119 "subq $16, %%rsp\n\t" \ 04120 "pxor %%xmm4, %%xmm4\n\t" \ 04121 "xorl %%ebx, %%ebx\n\t" \ 04122 "movdqu %%xmm4, (%%rsp)\n\t" \ 04123 "42:\n\t" \ 04124 "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ 04125 "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ 04126 "incl %%ecx\n\t" \ 04127 "incl %%ebx\n\t" \ 04128 "cmpl %%edx, %%ecx\n\t" \ 04129 "jl 42b\n\t" \ 04130 "movdqu (%%rsp), %%xmm4\n\t" \ 04131 "addq $16, %%rsp\n\t" \ 04132 "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ 04133 "pxor %%xmm4, %%xmm13\n\t" \ 04134 GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR) \ 04135 "\n" \ 04136 "45:\n\t" \ 04137 "# T = Encrypt counter\n\t" \ 04138 "pxor %%xmm0, %%xmm0\n\t" \ 04139 "shll $3, %%edx\n\t" \ 04140 "pinsrq $0, %%rdx, %%xmm0\n\t" \ 04141 "pxor %%xmm0, %%xmm13\n\t" \ 04142 GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR) \ 04143 "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ 04144 "# Encrypt counter\n\t" \ 04145 "movdqa 0(%[KEY]), %%xmm4\n\t" \ 04146 "pxor %%xmm13, %%xmm4\n\t" \ 04147 AESENC_AVX(%%xmm4) \ 04148 "movdqu %%xmm4, " VAR(TR) "\n\t" 04149 04150 #define CALC_AAD() \ 04151 "# Additional authentication data\n\t" \ 04152 "movl %[abytes], %%edx\n\t" \ 04153 "cmpl $0, %%edx\n\t" \ 04154 "je 25f\n\t" \ 04155 "movq %[addt], %%rax\n\t" \ 04156 "xorl %%ecx, %%ecx\n\t" \ 04157 "cmpl $16, %%edx\n\t" \ 04158 "jl 24f\n\t" \ 04159 "andl $0xfffffff0, %%edx\n\t" \ 04160 "\n" \ 04161 "23:\n\t" \ 04162 "movdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ 04163 "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ 04164 "pxor %%xmm4, " VAR(XR) "\n\t" \ 04165 GHASH_FULL_AVX(XR, %%xmm12, XR, HR) \ 04166 "addl $16, %%ecx\n\t" \ 04167 "cmpl %%edx, %%ecx\n\t" \ 04168 "jl 23b\n\t" \ 04169 "movl %[abytes], %%edx\n\t" \ 04170 "cmpl %%edx, %%ecx\n\t" \ 04171 "je 25f\n\t" \ 04172 "\n" \ 04173 "24:\n\t" \ 04174 "subq $16, %%rsp\n\t" \ 04175 "pxor %%xmm4, %%xmm4\n\t" \ 04176 "xorl %%ebx, %%ebx\n\t" \ 04177 "movdqu %%xmm4, (%%rsp)\n\t" \ 04178 "22:\n\t" \ 04179 "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ 04180 "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ 04181 "incl %%ecx\n\t" \ 04182 "incl %%ebx\n\t" \ 04183 "cmpl %%edx, %%ecx\n\t" \ 04184 "jl 22b\n\t" \ 04185 "movdqu (%%rsp), %%xmm4\n\t" \ 04186 "addq $16, %%rsp\n\t" \ 04187 "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ 04188 "pxor %%xmm4, " VAR(XR) "\n\t" \ 04189 GHASH_FULL_AVX(XR, %%xmm12, XR, HR) \ 04190 "\n" \ 04191 "25:\n\t" 04192 04193 #define CALC_HT_8_AVX() \ 04194 "movdqa " VAR(XR) ", %%xmm2\n\t" \ 04195 "# H ^ 1\n\t" \ 04196 "movdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ 04197 "# H ^ 2\n\t" \ 04198 GHASH_GFMUL_RED_AVX(%%xmm0, HR, HR) \ 04199 "movdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ 04200 "# H ^ 3\n\t" \ 04201 GHASH_GFMUL_RED_AVX(%%xmm1, HR, %%xmm0) \ 04202 "movdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ 04203 "# H ^ 4\n\t" \ 04204 GHASH_GFMUL_RED_AVX(%%xmm3, %%xmm0, %%xmm0) \ 04205 "movdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ 04206 "# H ^ 5\n\t" \ 04207 GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm0, %%xmm1) \ 04208 "movdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ 04209 "# H ^ 6\n\t" \ 04210 GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm1) \ 04211 "movdqu %%xmm12, 80(" VAR(HTR) ")\n\t" \ 04212 "# H ^ 7\n\t" \ 04213 GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm3) \ 04214 "movdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ 04215 "# H ^ 8\n\t" \ 04216 GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm3, %%xmm3) \ 04217 "movdqu %%xmm12, 112(" VAR(HTR) ")\n\t" 04218 04219 #define AESENC_128_GHASH_AVX(src, o) \ 04220 "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ 04221 "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ 04222 /* src is either %%rcx or %%rdx */ \ 04223 AESENC_CTR() \ 04224 AESENC_XOR() \ 04225 AESENC_PCLMUL_1(src, 16, o-128, 112) \ 04226 AESENC_PCLMUL_N(src, 32, o-112, 96) \ 04227 AESENC_PCLMUL_N(src, 48, o -96, 80) \ 04228 AESENC_PCLMUL_N(src, 64, o -80, 64) \ 04229 AESENC_PCLMUL_N(src, 80, o -64, 48) \ 04230 AESENC_PCLMUL_N(src, 96, o -48, 32) \ 04231 AESENC_PCLMUL_N(src, 112, o -32, 16) \ 04232 AESENC_PCLMUL_N(src, 128, o -16, 0) \ 04233 AESENC_PCLMUL_L(144) \ 04234 "cmpl $11, %[nr]\n\t" \ 04235 "movdqa 160(%[KEY]), %%xmm12\n\t" \ 04236 "jl 4f\n\t" \ 04237 AESENC() \ 04238 AESENC_SET(176) \ 04239 "cmpl $13, %[nr]\n\t" \ 04240 "movdqa 192(%[KEY]), %%xmm12\n\t" \ 04241 "jl 4f\n\t" \ 04242 AESENC() \ 04243 AESENC_SET(208) \ 04244 "movdqa 224(%[KEY]), %%xmm12\n\t" \ 04245 "\n" \ 04246 "4:\n\t" \ 04247 AESENC_LAST(%%rcx, %%rdx) 04248 04249 #define AESENC_LAST15_ENC_AVX() \ 04250 "movl %[nbytes], %%ecx\n\t" \ 04251 "movl %%ecx, %%edx\n\t" \ 04252 "andl $0x0f, %%ecx\n\t" \ 04253 "jz 55f\n\t" \ 04254 "movdqu " VAR(CTR1) ", %%xmm13\n\t" \ 04255 "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ 04256 "pxor 0(%[KEY]), %%xmm13\n\t" \ 04257 AESENC_AVX(%%xmm13) \ 04258 "subq $16, %%rsp\n\t" \ 04259 "xorl %%ecx, %%ecx\n\t" \ 04260 "movdqu %%xmm13, (%%rsp)\n\t" \ 04261 "\n" \ 04262 "51:\n\t" \ 04263 "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ 04264 "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ 04265 "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ 04266 "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ 04267 "incl " VAR(KR) "\n\t" \ 04268 "incl %%ecx\n\t" \ 04269 "cmpl %%edx, " VAR(KR) "\n\t" \ 04270 "jl 51b\n\t" \ 04271 "xorq %%r13, %%r13\n\t" \ 04272 "cmpl $16, %%ecx\n\t" \ 04273 "je 53f\n\t" \ 04274 "\n" \ 04275 "52:\n\t" \ 04276 "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ 04277 "incl %%ecx\n\t" \ 04278 "cmpl $16, %%ecx\n\t" \ 04279 "jl 52b\n\t" \ 04280 "53:\n\t" \ 04281 "movdqu (%%rsp), %%xmm13\n\t" \ 04282 "addq $16, %%rsp\n\t" \ 04283 "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ 04284 "pxor %%xmm13, " VAR(XR) "\n\t" \ 04285 GHASH_GFMUL_RED_AVX(XR, HR, XR) \ 04286 04287 #define AESENC_LAST15_DEC_AVX() \ 04288 "movl %[nbytes], %%ecx\n\t" \ 04289 "movl %%ecx, %%edx\n\t" \ 04290 "andl $0x0f, %%ecx\n\t" \ 04291 "jz 55f\n\t" \ 04292 "movdqu " VAR(CTR1) ", %%xmm13\n\t" \ 04293 "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ 04294 "pxor 0(%[KEY]), %%xmm13\n\t" \ 04295 AESENC_AVX(%%xmm13) \ 04296 "subq $32, %%rsp\n\t" \ 04297 "xorl %%ecx, %%ecx\n\t" \ 04298 "movdqu %%xmm13, (%%rsp)\n\t" \ 04299 "pxor %%xmm0, %%xmm0\n\t" \ 04300 "movdqu %%xmm0, 16(%%rsp)\n\t" \ 04301 "\n" \ 04302 "51:\n\t" \ 04303 "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ 04304 "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ 04305 "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ 04306 "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ 04307 "incl " VAR(KR) "\n\t" \ 04308 "incl %%ecx\n\t" \ 04309 "cmpl %%edx, " VAR(KR) "\n\t" \ 04310 "jl 51b\n\t" \ 04311 "53:\n\t" \ 04312 "movdqu 16(%%rsp), %%xmm13\n\t" \ 04313 "addq $32, %%rsp\n\t" \ 04314 "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ 04315 "pxor %%xmm13, " VAR(XR) "\n\t" \ 04316 GHASH_GFMUL_RED_AVX(XR, HR, XR) \ 04317 04318 #define CALC_TAG() \ 04319 "movl %[nbytes], %%edx\n\t" \ 04320 "movl %[abytes], %%ecx\n\t" \ 04321 "shlq $3, %%rdx\n\t" \ 04322 "shlq $3, %%rcx\n\t" \ 04323 "pinsrq $0, %%rdx, %%xmm0\n\t" \ 04324 "pinsrq $1, %%rcx, %%xmm0\n\t" \ 04325 "pxor %%xmm0, " VAR(XR) "\n\t" \ 04326 GHASH_GFMUL_RED_AVX(XR, HR, XR) \ 04327 "pshufb %[BSWAP_MASK], " VAR(XR) "\n\t" \ 04328 "movdqu " VAR(TR) ", %%xmm0\n\t" \ 04329 "pxor " VAR(XR) ", %%xmm0\n\t" \ 04330 04331 #define STORE_TAG() \ 04332 "cmpl $16, %[tbytes]\n\t" \ 04333 "je 71f\n\t" \ 04334 "xorq %%rcx, %%rcx\n\t" \ 04335 "movdqu %%xmm0, (%%rsp)\n\t" \ 04336 "73:\n\t" \ 04337 "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ 04338 "movb %%r13b, (%[tag],%%rcx,1)\n\t" \ 04339 "incl %%ecx\n\t" \ 04340 "cmpl %[tbytes], %%ecx\n\t" \ 04341 "jne 73b\n\t" \ 04342 "jmp 72f\n\t" \ 04343 "\n" \ 04344 "71:\n\t" \ 04345 "movdqu %%xmm0, (%[tag])\n\t" \ 04346 "\n" \ 04347 "72:\n\t" 04348 04349 #define CMP_TAG() \ 04350 "cmpl $16, %[tbytes]\n\t" \ 04351 "je 71f\n\t" \ 04352 "subq $16, %%rsp\n\t" \ 04353 "xorq %%rcx, %%rcx\n\t" \ 04354 "xorq %%rax, %%rax\n\t" \ 04355 "movdqu %%xmm0, (%%rsp)\n\t" \ 04356 "\n" \ 04357 "73:\n\t" \ 04358 "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ 04359 "xorb (%[tag],%%rcx,1), %%r13b\n\t" \ 04360 "orb %%r13b, %%al\n\t" \ 04361 "incl %%ecx\n\t" \ 04362 "cmpl %[tbytes], %%ecx\n\t" \ 04363 "jne 73b\n\t" \ 04364 "cmpb $0x00, %%al\n\t" \ 04365 "sete %%al\n\t" \ 04366 "addq $16, %%rsp\n\t" \ 04367 "xorq %%rcx, %%rcx\n\t" \ 04368 "jmp 72f\n\t" \ 04369 "\n" \ 04370 "71:\n\t" \ 04371 "movdqu (%[tag]), %%xmm1\n\t" \ 04372 "pcmpeqb %%xmm1, %%xmm0\n\t" \ 04373 "pmovmskb %%xmm0, %%edx\n\t" \ 04374 "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \ 04375 "xorl %%eax, %%eax\n\t" \ 04376 "cmpl $0xffff, %%edx\n\t" \ 04377 "sete %%al\n\t" \ 04378 "\n" \ 04379 "72:\n\t" \ 04380 "movl %%eax, (%[res])\n\t" 04381 04382 static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, 04383 const unsigned char* addt, 04384 const unsigned char* ivec, unsigned char *tag, 04385 unsigned int nbytes, unsigned int abytes, 04386 unsigned int ibytes, unsigned int tbytes, 04387 const unsigned char* key, int nr) 04388 { 04389 register const unsigned char* iv asm("rax") = ivec; 04390 register unsigned int ivLen asm("ebx") = ibytes; 04391 04392 __asm__ __volatile__ ( 04393 "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 04394 /* Counter is xmm13 */ 04395 "pxor %%xmm13, %%xmm13\n\t" 04396 "pxor " VAR(XR) ", " VAR(XR) "\n\t" 04397 "movl %[ibytes], %%edx\n\t" 04398 "cmpl $12, %%edx\n\t" 04399 "jne 35f\n\t" 04400 CALC_IV_12() 04401 "\n" 04402 "35:\n\t" 04403 CALC_IV() 04404 "\n" 04405 "39:\n\t" 04406 04407 CALC_AAD() 04408 04409 "# Calculate counter and H\n\t" 04410 "pshufb %[BSWAP_EPI64], %%xmm13\n\t" 04411 "movdqa " VAR(HR) ", %%xmm5\n\t" 04412 "paddd %[ONE], %%xmm13\n\t" 04413 "movdqa " VAR(HR) ", %%xmm4\n\t" 04414 "movdqu %%xmm13, " VAR(CTR1) "\n\t" 04415 "psrlq $63, %%xmm5\n\t" 04416 "psllq $1, %%xmm4\n\t" 04417 "pslldq $8, %%xmm5\n\t" 04418 "por %%xmm5, %%xmm4\n\t" 04419 "pshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" 04420 "psrad $31, " VAR(HR) "\n\t" 04421 "pand %[MOD2_128], " VAR(HR) "\n\t" 04422 "pxor %%xmm4, " VAR(HR) "\n\t" 04423 04424 "xorl " VAR(KR) ", " VAR(KR) "\n\t" 04425 04426 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 04427 "cmpl $128, %[nbytes]\n\t" 04428 "movl %[nbytes], %%r13d\n\t" 04429 "jl 5f\n\t" 04430 "andl $0xffffff80, %%r13d\n\t" 04431 04432 CALC_HT_8_AVX() 04433 04434 "# First 128 bytes of input\n\t" 04435 AESENC_CTR() 04436 AESENC_XOR() 04437 AESENC_SET(16) 04438 AESENC_SET(32) 04439 AESENC_SET(48) 04440 AESENC_SET(64) 04441 AESENC_SET(80) 04442 AESENC_SET(96) 04443 AESENC_SET(112) 04444 AESENC_SET(128) 04445 AESENC_SET(144) 04446 "cmpl $11, %[nr]\n\t" 04447 "movdqa 160(%[KEY]), %%xmm12\n\t" 04448 "jl 1f\n\t" 04449 AESENC() 04450 AESENC_SET(176) 04451 "cmpl $13, %[nr]\n\t" 04452 "movdqa 192(%[KEY]), %%xmm12\n\t" 04453 "jl 1f\n\t" 04454 AESENC() 04455 AESENC_SET(208) 04456 "movdqa 224(%[KEY]), %%xmm12\n\t" 04457 "\n" 04458 "1:\n\t" 04459 AESENC_LAST(%[in], %[out]) 04460 04461 "cmpl $128, %%r13d\n\t" 04462 "movl $128, " VAR(KR) "\n\t" 04463 "jle 2f\n\t" 04464 04465 "# More 128 bytes of input\n\t" 04466 "\n" 04467 "3:\n\t" 04468 AESENC_128_GHASH_AVX(%%rdx, 0) 04469 "addl $128, " VAR(KR) "\n\t" 04470 "cmpl %%r13d, " VAR(KR) "\n\t" 04471 "jl 3b\n\t" 04472 "\n" 04473 "2:\n\t" 04474 "movdqa %[BSWAP_MASK], %%xmm13\n\t" 04475 "pshufb %%xmm13, %%xmm4\n\t" 04476 "pshufb %%xmm13, %%xmm5\n\t" 04477 "pshufb %%xmm13, %%xmm6\n\t" 04478 "pshufb %%xmm13, %%xmm7\n\t" 04479 "pxor %%xmm2, %%xmm4\n\t" 04480 "pshufb %%xmm13, %%xmm8\n\t" 04481 "pshufb %%xmm13, %%xmm9\n\t" 04482 "pshufb %%xmm13, %%xmm10\n\t" 04483 "pshufb %%xmm13, %%xmm11\n\t" 04484 04485 "movdqu 112(" VAR(HTR) "), %%xmm12\n\t" 04486 GHASH_GFMUL_AVX(XR, %%xmm13, %%xmm4, %%xmm12) 04487 "movdqu 96(" VAR(HTR) "), %%xmm12\n\t" 04488 GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm5, %%xmm12) 04489 "movdqu 80(" VAR(HTR) "), %%xmm12\n\t" 04490 GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm6, %%xmm12) 04491 "movdqu 64(" VAR(HTR) "), %%xmm12\n\t" 04492 GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm7, %%xmm12) 04493 "movdqu 48(" VAR(HTR) "), %%xmm12\n\t" 04494 GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm8, %%xmm12) 04495 "movdqu 32(" VAR(HTR) "), %%xmm12\n\t" 04496 GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm9, %%xmm12) 04497 "movdqu 16(" VAR(HTR) "), %%xmm12\n\t" 04498 GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm10, %%xmm12) 04499 "movdqu (" VAR(HTR) "), %%xmm12\n\t" 04500 GHASH_GFMUL_RED_XOR_AVX(XR, %%xmm13, %%xmm11, %%xmm12) 04501 04502 "movdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" 04503 "\n" 04504 "5:\n\t" 04505 "movl %[nbytes], %%edx\n\t" 04506 "cmpl %%edx, " VAR(KR) "\n\t" 04507 "jge 55f\n\t" 04508 #endif 04509 04510 "movl %[nbytes], %%r13d\n\t" 04511 "andl $0xfffffff0, %%r13d\n\t" 04512 "cmpl %%r13d, " VAR(KR) "\n\t" 04513 "jge 14f\n\t" 04514 04515 "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" 04516 "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" 04517 AESENC_BLOCK(%%rcx, %%rdx) 04518 "addl $16, " VAR(KR) "\n\t" 04519 "cmpl %%r13d, " VAR(KR) "\n\t" 04520 "jge 13f\n\t" 04521 "\n" 04522 "12:\n\t" 04523 "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" 04524 "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" 04525 AESENC_GFMUL(%%rcx, %%rdx, HR, XR) 04526 "pshufb %[BSWAP_MASK], %%xmm4\n\t" 04527 "pxor %%xmm4, " VAR(XR) "\n\t" 04528 "addl $16, " VAR(KR) "\n\t" 04529 "cmpl %%r13d, " VAR(KR) "\n\t" 04530 "jl 12b\n\t" 04531 "\n" 04532 "13:\n\t" 04533 GHASH_GFMUL_RED_AVX(XR, HR, XR) 04534 "\n" 04535 "14:\n\t" 04536 04537 AESENC_LAST15_ENC_AVX() 04538 "\n" 04539 "55:\n\t" 04540 04541 CALC_TAG() 04542 STORE_TAG() 04543 "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 04544 04545 : 04546 : [KEY] "r" (key), 04547 [in] "r" (in), [out] "r" (out), [nr] "r" (nr), 04548 [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), 04549 [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes), 04550 [tag] "r" (tag), 04551 [BSWAP_MASK] "m" (BSWAP_MASK), 04552 [BSWAP_EPI64] "m" (BSWAP_EPI64), 04553 [ONE] "m" (ONE), 04554 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 04555 [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), 04556 [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), 04557 [EIGHT] "m" (EIGHT), 04558 #endif 04559 [MOD2_128] "m" (MOD2_128) 04560 : "xmm15", "xmm14", "xmm13", "xmm12", 04561 "xmm0", "xmm1", "xmm2", "xmm3", "memory", 04562 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 04563 "rcx", "rdx", "r13" 04564 ); 04565 } 04566 04567 #ifdef HAVE_INTEL_AVX1 04568 /* Encrypt with key in xmm12. */ 04569 #define VAESENC() \ 04570 "vaesenc %%xmm12, %%xmm4, %%xmm4\n\t" \ 04571 "vaesenc %%xmm12, %%xmm5, %%xmm5\n\t" \ 04572 "vaesenc %%xmm12, %%xmm6, %%xmm6\n\t" \ 04573 "vaesenc %%xmm12, %%xmm7, %%xmm7\n\t" \ 04574 "vaesenc %%xmm12, %%xmm8, %%xmm8\n\t" \ 04575 "vaesenc %%xmm12, %%xmm9, %%xmm9\n\t" \ 04576 "vaesenc %%xmm12, %%xmm10, %%xmm10\n\t" \ 04577 "vaesenc %%xmm12, %%xmm11, %%xmm11\n\t" 04578 04579 #define VAESENC_SET(o) \ 04580 "vmovdqa "#o"(%[KEY]), %%xmm12\n\t" \ 04581 VAESENC() 04582 04583 #define VAESENC_CTR() \ 04584 "vmovdqu " VAR(CTR1) ", %%xmm0\n\t" \ 04585 "vmovdqa %[BSWAP_EPI64], %%xmm1\n\t" \ 04586 "vpshufb %%xmm1, %%xmm0, %%xmm4\n\t" \ 04587 "vpaddd %[ONE], %%xmm0, %%xmm5\n\t" \ 04588 "vpshufb %%xmm1, %%xmm5, %%xmm5\n\t" \ 04589 "vpaddd %[TWO], %%xmm0, %%xmm6\n\t" \ 04590 "vpshufb %%xmm1, %%xmm6, %%xmm6\n\t" \ 04591 "vpaddd %[THREE], %%xmm0, %%xmm7\n\t" \ 04592 "vpshufb %%xmm1, %%xmm7, %%xmm7\n\t" \ 04593 "vpaddd %[FOUR], %%xmm0, %%xmm8\n\t" \ 04594 "vpshufb %%xmm1, %%xmm8, %%xmm8\n\t" \ 04595 "vpaddd %[FIVE], %%xmm0, %%xmm9\n\t" \ 04596 "vpshufb %%xmm1, %%xmm9, %%xmm9\n\t" \ 04597 "vpaddd %[SIX], %%xmm0, %%xmm10\n\t" \ 04598 "vpshufb %%xmm1, %%xmm10, %%xmm10\n\t" \ 04599 "vpaddd %[SEVEN], %%xmm0, %%xmm11\n\t" \ 04600 "vpshufb %%xmm1, %%xmm11, %%xmm11\n\t" \ 04601 "vpaddd %[EIGHT], %%xmm0, %%xmm0\n\t" 04602 04603 #define VAESENC_XOR() \ 04604 "vmovdqa (%[KEY]), %%xmm12\n\t" \ 04605 "vmovdqu %%xmm0, " VAR(CTR1) "\n\t" \ 04606 "vpxor %%xmm12, %%xmm4, %%xmm4\n\t" \ 04607 "vpxor %%xmm12, %%xmm5, %%xmm5\n\t" \ 04608 "vpxor %%xmm12, %%xmm6, %%xmm6\n\t" \ 04609 "vpxor %%xmm12, %%xmm7, %%xmm7\n\t" \ 04610 "vpxor %%xmm12, %%xmm8, %%xmm8\n\t" \ 04611 "vpxor %%xmm12, %%xmm9, %%xmm9\n\t" \ 04612 "vpxor %%xmm12, %%xmm10, %%xmm10\n\t" \ 04613 "vpxor %%xmm12, %%xmm11, %%xmm11\n\t" 04614 04615 #define VAESENC_128() \ 04616 VAESENC_CTR() \ 04617 VAESENC_XOR() \ 04618 VAESENC_SET(16) \ 04619 VAESENC_SET(32) \ 04620 VAESENC_SET(48) \ 04621 VAESENC_SET(64) \ 04622 VAESENC_SET(80) \ 04623 VAESENC_SET(96) \ 04624 VAESENC_SET(112) \ 04625 VAESENC_SET(128) \ 04626 VAESENC_SET(144) \ 04627 "cmpl $11, %[nr]\n\t" \ 04628 "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ 04629 "jl 1f\n\t" \ 04630 VAESENC() \ 04631 VAESENC_SET(176) \ 04632 "cmpl $13, %[nr]\n\t" \ 04633 "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ 04634 "jl 1f\n\t" \ 04635 VAESENC() \ 04636 VAESENC_SET(208) \ 04637 "vmovdqa 224(%[KEY]), %%xmm12\n\t" \ 04638 "\n" \ 04639 "1:\n\t" \ 04640 VAESENC_LAST(%[in], %[out]) 04641 04642 /* Encrypt and carry-less multiply for AVX1. */ 04643 #define VAESENC_PCLMUL_1(src, o1, o2, o3) \ 04644 "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ 04645 "vmovdqu " #o2 "(" #src "), %%xmm0\n\t" \ 04646 "vaesenc " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04647 "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ 04648 "vpxor %%xmm2, %%xmm0, %%xmm0\n\t" \ 04649 "vpshufd $0x4e, %%xmm12, %%xmm1\n\t" \ 04650 "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ 04651 "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ 04652 "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ 04653 "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm3\n\t" \ 04654 "vaesenc " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t" \ 04655 "vaesenc " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t" \ 04656 "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm2\n\t" \ 04657 "vaesenc " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t" \ 04658 "vaesenc " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t" \ 04659 "vpclmulqdq $0x00, %%xmm14, %%xmm1, %%xmm1\n\t" \ 04660 "vaesenc " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t" \ 04661 "vaesenc " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \ 04662 "vaesenc " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \ 04663 "vpxor %%xmm2, %%xmm1, %%xmm1\n\t" \ 04664 "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ 04665 04666 #define VAESENC_PCLMUL_N(src, o1, o2, o3) \ 04667 "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ 04668 "vmovdqu " #o2 "(" #src "), %%xmm0\n\t" \ 04669 "vpshufd $0x4e, %%xmm12, %%xmm13\n\t" \ 04670 "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ 04671 "vaesenc " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04672 "vpxor %%xmm12, %%xmm13, %%xmm13\n\t" \ 04673 "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ 04674 "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ 04675 "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm15\n\t" \ 04676 "vaesenc " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t" \ 04677 "vaesenc " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t" \ 04678 "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm12\n\t" \ 04679 "vaesenc " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t" \ 04680 "vaesenc " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t" \ 04681 "vpclmulqdq $0x00, %%xmm14, %%xmm13, %%xmm13\n\t" \ 04682 "vaesenc " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t" \ 04683 "vaesenc " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \ 04684 "vaesenc " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \ 04685 "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ 04686 "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ 04687 "vpxor %%xmm15, %%xmm1, %%xmm1\n\t" \ 04688 "vpxor %%xmm15, %%xmm3, %%xmm3\n\t" \ 04689 "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ 04690 04691 #define VAESENC_PCLMUL_L(o) \ 04692 "vpslldq $8, %%xmm1, %%xmm14\n\t" \ 04693 "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ 04694 "vaesenc "#o"(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04695 "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ 04696 "vpxor %%xmm1, %%xmm3, %%xmm3\n\t" \ 04697 "vaesenc "#o"(%[KEY]), %%xmm5, %%xmm5\n\t" \ 04698 "vpslld $31, %%xmm2, %%xmm12\n\t" \ 04699 "vpslld $30, %%xmm2, %%xmm13\n\t" \ 04700 "vpslld $25, %%xmm2, %%xmm14\n\t" \ 04701 "vaesenc "#o"(%[KEY]), %%xmm6, %%xmm6\n\t" \ 04702 "vpxor %%xmm13, %%xmm12, %%xmm12\n\t" \ 04703 "vpxor %%xmm14, %%xmm12, %%xmm12\n\t" \ 04704 "vaesenc "#o"(%[KEY]), %%xmm7, %%xmm7\n\t" \ 04705 "vpsrldq $4, %%xmm12, %%xmm13\n\t" \ 04706 "vpslldq $12, %%xmm12, %%xmm12\n\t" \ 04707 "vaesenc "#o"(%[KEY]), %%xmm8, %%xmm8\n\t" \ 04708 "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ 04709 "vpsrld $1, %%xmm2, %%xmm14\n\t" \ 04710 "vaesenc "#o"(%[KEY]), %%xmm9, %%xmm9\n\t" \ 04711 "vpsrld $2, %%xmm2, %%xmm1\n\t" \ 04712 "vpsrld $7, %%xmm2, %%xmm0\n\t" \ 04713 "vaesenc "#o"(%[KEY]), %%xmm10, %%xmm10\n\t" \ 04714 "vpxor %%xmm1, %%xmm14, %%xmm14\n\t" \ 04715 "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ 04716 "vaesenc "#o"(%[KEY]), %%xmm11, %%xmm11\n\t" \ 04717 "vpxor %%xmm13, %%xmm14, %%xmm14\n\t" \ 04718 "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ 04719 "vpxor %%xmm3, %%xmm2, %%xmm2\n\t" \ 04720 04721 04722 /* Encrypt and carry-less multiply with last key. */ 04723 #define VAESENC_LAST(in, out) \ 04724 "vaesenclast %%xmm12, %%xmm4, %%xmm4\n\t" \ 04725 "vaesenclast %%xmm12, %%xmm5, %%xmm5\n\t" \ 04726 "vmovdqu (" #in "), %%xmm0\n\t" \ 04727 "vmovdqu 16(" #in "), %%xmm1\n\t" \ 04728 "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ 04729 "vpxor %%xmm1, %%xmm5, %%xmm5\n\t" \ 04730 "vmovdqu %%xmm4, (" #out ")\n\t" \ 04731 "vmovdqu %%xmm5, 16(" #out ")\n\t" \ 04732 "vaesenclast %%xmm12, %%xmm6, %%xmm6\n\t" \ 04733 "vaesenclast %%xmm12, %%xmm7, %%xmm7\n\t" \ 04734 "vmovdqu 32(" #in "), %%xmm0\n\t" \ 04735 "vmovdqu 48(" #in "), %%xmm1\n\t" \ 04736 "vpxor %%xmm0, %%xmm6, %%xmm6\n\t" \ 04737 "vpxor %%xmm1, %%xmm7, %%xmm7\n\t" \ 04738 "vmovdqu %%xmm6, 32(" #out ")\n\t" \ 04739 "vmovdqu %%xmm7, 48(" #out ")\n\t" \ 04740 "vaesenclast %%xmm12, %%xmm8, %%xmm8\n\t" \ 04741 "vaesenclast %%xmm12, %%xmm9, %%xmm9\n\t" \ 04742 "vmovdqu 64(" #in "), %%xmm0\n\t" \ 04743 "vmovdqu 80(" #in "), %%xmm1\n\t" \ 04744 "vpxor %%xmm0, %%xmm8, %%xmm8\n\t" \ 04745 "vpxor %%xmm1, %%xmm9, %%xmm9\n\t" \ 04746 "vmovdqu %%xmm8, 64(" #out ")\n\t" \ 04747 "vmovdqu %%xmm9, 80(" #out ")\n\t" \ 04748 "vaesenclast %%xmm12, %%xmm10, %%xmm10\n\t" \ 04749 "vaesenclast %%xmm12, %%xmm11, %%xmm11\n\t" \ 04750 "vmovdqu 96(" #in "), %%xmm0\n\t" \ 04751 "vmovdqu 112(" #in "), %%xmm1\n\t" \ 04752 "vpxor %%xmm0, %%xmm10, %%xmm10\n\t" \ 04753 "vpxor %%xmm1, %%xmm11, %%xmm11\n\t" \ 04754 "vmovdqu %%xmm10, 96(" #out ")\n\t" \ 04755 "vmovdqu %%xmm11, 112(" #out ")\n\t" 04756 04757 #define VAESENC_BLOCK() \ 04758 "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ 04759 "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ 04760 "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ 04761 "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ 04762 "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ 04763 "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04764 "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04765 "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04766 "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04767 "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04768 "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04769 "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04770 "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04771 "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04772 "cmpl $11, %[nr]\n\t" \ 04773 "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ 04774 "jl %=f\n\t" \ 04775 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 04776 "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04777 "cmpl $13, %[nr]\n\t" \ 04778 "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ 04779 "jl %=f\n\t" \ 04780 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 04781 "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04782 "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ 04783 "%=:\n\t" \ 04784 "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ 04785 "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm5\n\t" \ 04786 "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ 04787 "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \ 04788 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 04789 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" 04790 04791 #define _VAESENC_GFMUL(in, H, X) \ 04792 "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ 04793 "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ 04794 "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ 04795 "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ 04796 "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ 04797 "vpclmulqdq $0x10, " #H ", " #X ", %%xmm6\n\t" \ 04798 "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04799 "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04800 "vpclmulqdq $0x01, " #H ", " #X ", %%xmm7\n\t" \ 04801 "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04802 "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04803 "vpclmulqdq $0x00, " #H ", " #X ", %%xmm8\n\t" \ 04804 "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04805 "vpclmulqdq $0x11, " #H ", " #X ", %%xmm1\n\t" \ 04806 "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04807 "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ 04808 "vpslldq $8, %%xmm6, %%xmm2\n\t" \ 04809 "vpsrldq $8, %%xmm6, %%xmm6\n\t" \ 04810 "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04811 "vpxor %%xmm8, %%xmm2, %%xmm2\n\t" \ 04812 "vpxor %%xmm6, %%xmm1, %%xmm3\n\t" \ 04813 "vmovdqa %[MOD2_128], %%xmm0\n\t" \ 04814 "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm7\n\t" \ 04815 "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04816 "vpshufd $0x4e, %%xmm2, %%xmm6\n\t" \ 04817 "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ 04818 "vpclmulqdq $0x10, %%xmm0, %%xmm6, %%xmm7\n\t" \ 04819 "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04820 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 04821 "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ 04822 "vpxor %%xmm3, %%xmm6, " VAR(XR) "\n\t" \ 04823 "cmpl $11, %[nr]\n\t" \ 04824 "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ 04825 "jl 1f\n\t" \ 04826 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 04827 "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04828 "cmpl $13, %[nr]\n\t" \ 04829 "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ 04830 "jl 1f\n\t" \ 04831 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 04832 "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ 04833 "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ 04834 "1:\n\t" \ 04835 "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ 04836 "vmovdqu " #in ", %%xmm0\n\t" \ 04837 "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ 04838 "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" 04839 #define VAESENC_GFMUL(in, H, X) \ 04840 _VAESENC_GFMUL(in, H, X) 04841 04842 04843 #define _GHASH_GFMUL_AVX1(r, r2, a, b) \ 04844 "vpshufd $0x4e, "#a", %%xmm1\n\t" \ 04845 "vpshufd $0x4e, "#b", %%xmm2\n\t" \ 04846 "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ 04847 "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ 04848 "vpxor "#a", %%xmm1, %%xmm1\n\t" \ 04849 "vpxor "#b", %%xmm2, %%xmm2\n\t" \ 04850 "vpclmulqdq $0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \ 04851 "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ 04852 "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ 04853 "vmovdqa %%xmm0, "#r2"\n\t" \ 04854 "vmovdqa %%xmm3, " #r "\n\t" \ 04855 "vpslldq $8, %%xmm1, %%xmm2\n\t" \ 04856 "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ 04857 "vpxor %%xmm2, "#r2", "#r2"\n\t" \ 04858 "vpxor %%xmm1, " #r ", " #r "\n\t" 04859 #define GHASH_GFMUL_AVX1(r, r2, a, b) \ 04860 _GHASH_GFMUL_AVX1(r, r2, a, b) 04861 04862 #define _GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ 04863 "vpshufd $0x4e, "#a", %%xmm1\n\t" \ 04864 "vpshufd $0x4e, "#b", %%xmm2\n\t" \ 04865 "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ 04866 "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ 04867 "vpxor "#a", %%xmm1, %%xmm1\n\t" \ 04868 "vpxor "#b", %%xmm2, %%xmm2\n\t" \ 04869 "vpclmulqdq $0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \ 04870 "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ 04871 "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ 04872 "vpxor %%xmm0, "#r2", "#r2"\n\t" \ 04873 "vpxor %%xmm3, " #r ", " #r "\n\t" \ 04874 "vpslldq $8, %%xmm1, %%xmm2\n\t" \ 04875 "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ 04876 "vpxor %%xmm2, "#r2", "#r2"\n\t" \ 04877 "vpxor %%xmm1, " #r ", " #r "\n\t" 04878 #define GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ 04879 _GHASH_GFMUL_XOR_AVX1(r, r2, a, b) 04880 04881 #define GHASH_MID_AVX1(r, r2) \ 04882 "vpsrld $31, "#r2", %%xmm0\n\t" \ 04883 "vpsrld $31, " #r ", %%xmm1\n\t" \ 04884 "vpslld $1, "#r2", "#r2"\n\t" \ 04885 "vpslld $1, " #r ", " #r "\n\t" \ 04886 "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ 04887 "vpslldq $4, %%xmm0, %%xmm0\n\t" \ 04888 "vpslldq $4, %%xmm1, %%xmm1\n\t" \ 04889 "vpor %%xmm2, " #r ", " #r "\n\t" \ 04890 "vpor %%xmm0, "#r2", "#r2"\n\t" \ 04891 "vpor %%xmm1, " #r ", " #r "\n\t" 04892 04893 #define _GHASH_GFMUL_RED_AVX1(r, a, b) \ 04894 "vpshufd $0x4e, "#a", %%xmm5\n\t" \ 04895 "vpshufd $0x4e, "#b", %%xmm6\n\t" \ 04896 "vpclmulqdq $0x11, "#a", "#b", %%xmm7\n\t" \ 04897 "vpclmulqdq $0x00, "#a", "#b", %%xmm4\n\t" \ 04898 "vpxor "#a", %%xmm5, %%xmm5\n\t" \ 04899 "vpxor "#b", %%xmm6, %%xmm6\n\t" \ 04900 "vpclmulqdq $0x00, %%xmm6, %%xmm5, %%xmm5\n\t" \ 04901 "vpxor %%xmm4, %%xmm5, %%xmm5\n\t" \ 04902 "vpxor %%xmm7, %%xmm5, %%xmm5\n\t" \ 04903 "vpslldq $8, %%xmm5, %%xmm6\n\t" \ 04904 "vpsrldq $8, %%xmm5, %%xmm5\n\t" \ 04905 "vpxor %%xmm6, %%xmm4, %%xmm4\n\t" \ 04906 "vpxor %%xmm5, %%xmm7, " #r "\n\t" \ 04907 "vpslld $31, %%xmm4, %%xmm8\n\t" \ 04908 "vpslld $30, %%xmm4, %%xmm9\n\t" \ 04909 "vpslld $25, %%xmm4, %%xmm10\n\t" \ 04910 "vpxor %%xmm9, %%xmm8, %%xmm8\n\t" \ 04911 "vpxor %%xmm10, %%xmm8, %%xmm8\n\t" \ 04912 "vpsrldq $4, %%xmm8, %%xmm9\n\t" \ 04913 "vpslldq $12, %%xmm8, %%xmm8\n\t" \ 04914 "vpxor %%xmm8, %%xmm4, %%xmm4\n\t" \ 04915 "vpsrld $1, %%xmm4, %%xmm10\n\t" \ 04916 "vpsrld $2, %%xmm4, %%xmm6\n\t" \ 04917 "vpsrld $7, %%xmm4, %%xmm5\n\t" \ 04918 "vpxor %%xmm6, %%xmm10, %%xmm10\n\t" \ 04919 "vpxor %%xmm5, %%xmm10, %%xmm10\n\t" \ 04920 "vpxor %%xmm9, %%xmm10, %%xmm10\n\t" \ 04921 "vpxor %%xmm4, %%xmm10, %%xmm10\n\t" \ 04922 "vpxor %%xmm10, " #r ", " #r "\n\t" 04923 #define GHASH_GFMUL_RED_AVX1(r, a, b) \ 04924 _GHASH_GFMUL_RED_AVX1(r, a, b) 04925 04926 #define _GHASH_GFSQR_RED_AVX1(r, a) \ 04927 "vpclmulqdq $0x00, "#a", "#a", %%xmm4\n\t" \ 04928 "vpclmulqdq $0x11, "#a", "#a", " #r "\n\t" \ 04929 "vpslld $31, %%xmm4, %%xmm8\n\t" \ 04930 "vpslld $30, %%xmm4, %%xmm9\n\t" \ 04931 "vpslld $25, %%xmm4, %%xmm10\n\t" \ 04932 "vpxor %%xmm9, %%xmm8, %%xmm8\n\t" \ 04933 "vpxor %%xmm10, %%xmm8, %%xmm8\n\t" \ 04934 "vpsrldq $4, %%xmm8, %%xmm9\n\t" \ 04935 "vpslldq $12, %%xmm8, %%xmm8\n\t" \ 04936 "vpxor %%xmm8, %%xmm4, %%xmm4\n\t" \ 04937 "vpsrld $1, %%xmm4, %%xmm10\n\t" \ 04938 "vpsrld $2, %%xmm4, %%xmm6\n\t" \ 04939 "vpsrld $7, %%xmm4, %%xmm5\n\t" \ 04940 "vpxor %%xmm6, %%xmm10, %%xmm10\n\t" \ 04941 "vpxor %%xmm5, %%xmm10, %%xmm10\n\t" \ 04942 "vpxor %%xmm9, %%xmm10, %%xmm10\n\t" \ 04943 "vpxor %%xmm4, %%xmm10, %%xmm10\n\t" \ 04944 "vpxor %%xmm10, " #r ", " #r "\n\t" 04945 #define GHASH_GFSQR_RED_AVX1(r, a) \ 04946 _GHASH_GFSQR_RED_AVX1(r, a) 04947 04948 #define GHASH_RED_AVX1(r, r2) \ 04949 "vpslld $31, "#r2", %%xmm0\n\t" \ 04950 "vpslld $30, "#r2", %%xmm1\n\t" \ 04951 "vpslld $25, "#r2", %%xmm2\n\t" \ 04952 "vpxor %%xmm1, %%xmm0, %%xmm0\n\t" \ 04953 "vpxor %%xmm2, %%xmm0, %%xmm0\n\t" \ 04954 "vmovdqa %%xmm0, %%xmm1\n\t" \ 04955 "vpsrldq $4, %%xmm1, %%xmm1\n\t" \ 04956 "vpslldq $12, %%xmm0, %%xmm0\n\t" \ 04957 "vpxor %%xmm0, "#r2", "#r2"\n\t" \ 04958 "vpsrld $1, "#r2", %%xmm2\n\t" \ 04959 "vpsrld $2, "#r2", %%xmm3\n\t" \ 04960 "vpsrld $7, "#r2", %%xmm0\n\t" \ 04961 "vpxor %%xmm3, %%xmm2, %%xmm2\n\t" \ 04962 "vpxor %%xmm0, %%xmm2, %%xmm2\n\t" \ 04963 "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ 04964 "vpxor "#r2", %%xmm2, %%xmm2\n\t" \ 04965 "vpxor %%xmm2, " #r ", " #r "\n\t" 04966 04967 #define GHASH_GFMUL_RED_XOR_AVX1(r, r2, a, b) \ 04968 GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ 04969 GHASH_RED_AVX1(r, r2) 04970 04971 #define GHASH_FULL_AVX1(r, r2, a, b) \ 04972 GHASH_GFMUL_AVX1(r, r2, a, b) \ 04973 GHASH_MID_AVX1(r, r2) \ 04974 GHASH_RED_AVX1(r, r2) 04975 04976 #define CALC_IV_12_AVX1() \ 04977 "# Calculate values when IV is 12 bytes\n\t" \ 04978 "# Set counter based on IV\n\t" \ 04979 "movl $0x01000000, %%ecx\n\t" \ 04980 "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ 04981 "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ 04982 "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ 04983 "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ 04984 "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ 04985 "vpxor " VAR(HR) ", %%xmm13, %%xmm1\n\t" \ 04986 "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ 04987 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 04988 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 04989 "vmovdqa 32(%[KEY]), %%xmm12\n\t" \ 04990 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 04991 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 04992 "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ 04993 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 04994 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 04995 "vmovdqa 64(%[KEY]), %%xmm12\n\t" \ 04996 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 04997 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 04998 "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ 04999 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05000 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05001 "vmovdqa 96(%[KEY]), %%xmm12\n\t" \ 05002 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05003 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05004 "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ 05005 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05006 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05007 "vmovdqa 128(%[KEY]), %%xmm12\n\t" \ 05008 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05009 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05010 "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ 05011 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05012 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05013 "cmpl $11, %[nr]\n\t" \ 05014 "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ 05015 "jl 31f\n\t" \ 05016 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05017 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05018 "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ 05019 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05020 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05021 "cmpl $13, %[nr]\n\t" \ 05022 "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ 05023 "jl 31f\n\t" \ 05024 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05025 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05026 "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ 05027 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05028 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05029 "vmovdqu 224(%[KEY]), %%xmm12\n\t" \ 05030 "31:\n\t" \ 05031 "vaesenclast %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05032 "vaesenclast %%xmm12, %%xmm1, %%xmm1\n\t" \ 05033 "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ 05034 "vmovdqu %%xmm1, " VAR(TR) "\n\t" \ 05035 "jmp 39f\n\t" 05036 05037 #define CALC_IV_AVX1() \ 05038 "# Calculate values when IV is not 12 bytes\n\t" \ 05039 "# H = Encrypt X(=0)\n\t" \ 05040 "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ 05041 VAESENC_AVX(HR) \ 05042 "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ 05043 "# Calc counter\n\t" \ 05044 "# Initialization vector\n\t" \ 05045 "cmpl $0, %%edx\n\t" \ 05046 "movq $0, %%rcx\n\t" \ 05047 "je 45f\n\t" \ 05048 "cmpl $16, %%edx\n\t" \ 05049 "jl 44f\n\t" \ 05050 "andl $0xfffffff0, %%edx\n\t" \ 05051 "\n" \ 05052 "43:\n\t" \ 05053 "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ 05054 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 05055 "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ 05056 GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ 05057 "addl $16, %%ecx\n\t" \ 05058 "cmpl %%edx, %%ecx\n\t" \ 05059 "jl 43b\n\t" \ 05060 "movl %[ibytes], %%edx\n\t" \ 05061 "cmpl %%edx, %%ecx\n\t" \ 05062 "je 45f\n\t" \ 05063 "\n" \ 05064 "44:\n\t" \ 05065 "subq $16, %%rsp\n\t" \ 05066 "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ 05067 "xorl %%ebx, %%ebx\n\t" \ 05068 "vmovdqu %%xmm4, (%%rsp)\n\t" \ 05069 "42:\n\t" \ 05070 "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ 05071 "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ 05072 "incl %%ecx\n\t" \ 05073 "incl %%ebx\n\t" \ 05074 "cmpl %%edx, %%ecx\n\t" \ 05075 "jl 42b\n\t" \ 05076 "vmovdqu (%%rsp), %%xmm4\n\t" \ 05077 "addq $16, %%rsp\n\t" \ 05078 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 05079 "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ 05080 GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ 05081 "\n" \ 05082 "45:\n\t" \ 05083 "# T = Encrypt counter\n\t" \ 05084 "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ 05085 "shll $3, %%edx\n\t" \ 05086 "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ 05087 "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ 05088 GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ 05089 "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ 05090 "# Encrypt counter\n\t" \ 05091 "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ 05092 "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ 05093 VAESENC_AVX(%%xmm4) \ 05094 "vmovdqu %%xmm4, " VAR(TR) "\n\t" 05095 05096 #define CALC_AAD_AVX1() \ 05097 "# Additional authentication data\n\t" \ 05098 "movl %[abytes], %%edx\n\t" \ 05099 "cmpl $0, %%edx\n\t" \ 05100 "je 25f\n\t" \ 05101 "movq %[addt], %%rax\n\t" \ 05102 "xorl %%ecx, %%ecx\n\t" \ 05103 "cmpl $16, %%edx\n\t" \ 05104 "jl 24f\n\t" \ 05105 "andl $0xfffffff0, %%edx\n\t" \ 05106 "\n" \ 05107 "23:\n\t" \ 05108 "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ 05109 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 05110 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ 05111 GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ 05112 "addl $16, %%ecx\n\t" \ 05113 "cmpl %%edx, %%ecx\n\t" \ 05114 "jl 23b\n\t" \ 05115 "movl %[abytes], %%edx\n\t" \ 05116 "cmpl %%edx, %%ecx\n\t" \ 05117 "je 25f\n\t" \ 05118 "\n" \ 05119 "24:\n\t" \ 05120 "subq $16, %%rsp\n\t" \ 05121 "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ 05122 "xorl %%ebx, %%ebx\n\t" \ 05123 "vmovdqu %%xmm4, (%%rsp)\n\t" \ 05124 "22:\n\t" \ 05125 "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ 05126 "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ 05127 "incl %%ecx\n\t" \ 05128 "incl %%ebx\n\t" \ 05129 "cmpl %%edx, %%ecx\n\t" \ 05130 "jl 22b\n\t" \ 05131 "vmovdqu (%%rsp), %%xmm4\n\t" \ 05132 "addq $16, %%rsp\n\t" \ 05133 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 05134 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ 05135 GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ 05136 "\n" \ 05137 "25:\n\t" 05138 05139 #define CALC_HT_8_AVX1() \ 05140 "vmovdqa " VAR(XR) ", %%xmm2\n\t" \ 05141 "# H ^ 1\n\t" \ 05142 "vmovdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ 05143 "# H ^ 2\n\t" \ 05144 GHASH_GFSQR_RED_AVX1(%%xmm0, HR) \ 05145 "vmovdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ 05146 "# H ^ 3\n\t" \ 05147 GHASH_GFMUL_RED_AVX1(%%xmm1, HR, %%xmm0) \ 05148 "vmovdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ 05149 "# H ^ 4\n\t" \ 05150 GHASH_GFSQR_RED_AVX1(%%xmm3, %%xmm0) \ 05151 "vmovdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ 05152 "# H ^ 5\n\t" \ 05153 GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm0, %%xmm1) \ 05154 "vmovdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ 05155 "# H ^ 6\n\t" \ 05156 GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm1) \ 05157 "vmovdqu %%xmm12, 80(" VAR(HTR) ")\n\t" \ 05158 "# H ^ 7\n\t" \ 05159 GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm1, %%xmm3) \ 05160 "vmovdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ 05161 "# H ^ 8\n\t" \ 05162 GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm3) \ 05163 "vmovdqu %%xmm12, 112(" VAR(HTR) ")\n\t" 05164 05165 #define VAESENC_128_GHASH_AVX1(src, o) \ 05166 "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ 05167 "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ 05168 /* src is either %%rcx or %%rdx */ \ 05169 VAESENC_CTR() \ 05170 VAESENC_XOR() \ 05171 VAESENC_PCLMUL_1(src, 16, (o-128), 112) \ 05172 VAESENC_PCLMUL_N(src, 32, (o-112), 96) \ 05173 VAESENC_PCLMUL_N(src, 48, (o- 96), 80) \ 05174 VAESENC_PCLMUL_N(src, 64, (o- 80), 64) \ 05175 VAESENC_PCLMUL_N(src, 80, (o- 64), 48) \ 05176 VAESENC_PCLMUL_N(src, 96, (o- 48), 32) \ 05177 VAESENC_PCLMUL_N(src, 112, (o- 32), 16) \ 05178 VAESENC_PCLMUL_N(src, 128, (o- 16), 0) \ 05179 VAESENC_PCLMUL_L(144) \ 05180 "cmpl $11, %[nr]\n\t" \ 05181 "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ 05182 "jl 4f\n\t" \ 05183 VAESENC() \ 05184 VAESENC_SET(176) \ 05185 "cmpl $13, %[nr]\n\t" \ 05186 "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ 05187 "jl 4f\n\t" \ 05188 VAESENC() \ 05189 VAESENC_SET(208) \ 05190 "vmovdqa 224(%[KEY]), %%xmm12\n\t" \ 05191 "\n" \ 05192 "4:\n\t" \ 05193 VAESENC_LAST(%%rcx, %%rdx) 05194 05195 #define _VAESENC_AVX(r) \ 05196 "vaesenc 16(%[KEY]), " #r ", " #r "\n\t" \ 05197 "vaesenc 32(%[KEY]), " #r ", " #r "\n\t" \ 05198 "vaesenc 48(%[KEY]), " #r ", " #r "\n\t" \ 05199 "vaesenc 64(%[KEY]), " #r ", " #r "\n\t" \ 05200 "vaesenc 80(%[KEY]), " #r ", " #r "\n\t" \ 05201 "vaesenc 96(%[KEY]), " #r ", " #r "\n\t" \ 05202 "vaesenc 112(%[KEY]), " #r ", " #r "\n\t" \ 05203 "vaesenc 128(%[KEY]), " #r ", " #r "\n\t" \ 05204 "vaesenc 144(%[KEY]), " #r ", " #r "\n\t" \ 05205 "cmpl $11, %[nr]\n\t" \ 05206 "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ 05207 "jl %=f\n\t" \ 05208 "vaesenc %%xmm5, " #r ", " #r "\n\t" \ 05209 "vaesenc 176(%[KEY]), " #r ", " #r "\n\t" \ 05210 "cmpl $13, %[nr]\n\t" \ 05211 "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ 05212 "jl %=f\n\t" \ 05213 "vaesenc %%xmm5, " #r ", " #r "\n\t" \ 05214 "vaesenc 208(%[KEY]), " #r ", " #r "\n\t" \ 05215 "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ 05216 "%=:\n\t" \ 05217 "vaesenclast %%xmm5, " #r ", " #r "\n\t" 05218 #define VAESENC_AVX(r) \ 05219 _VAESENC_AVX(r) 05220 05221 #define AESENC_LAST15_ENC_AVX1() \ 05222 "movl %[nbytes], %%ecx\n\t" \ 05223 "movl %%ecx, %%edx\n\t" \ 05224 "andl $0x0f, %%ecx\n\t" \ 05225 "jz 55f\n\t" \ 05226 "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ 05227 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ 05228 "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ 05229 VAESENC_AVX(%%xmm13) \ 05230 "subq $16, %%rsp\n\t" \ 05231 "xorl %%ecx, %%ecx\n\t" \ 05232 "vmovdqu %%xmm13, (%%rsp)\n\t" \ 05233 "\n" \ 05234 "51:\n\t" \ 05235 "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ 05236 "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ 05237 "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ 05238 "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ 05239 "incl " VAR(KR) "\n\t" \ 05240 "incl %%ecx\n\t" \ 05241 "cmpl %%edx, " VAR(KR) "\n\t" \ 05242 "jl 51b\n\t" \ 05243 "xorq %%r13, %%r13\n\t" \ 05244 "cmpl $16, %%ecx\n\t" \ 05245 "je 53f\n\t" \ 05246 "\n" \ 05247 "52:\n\t" \ 05248 "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ 05249 "incl %%ecx\n\t" \ 05250 "cmpl $16, %%ecx\n\t" \ 05251 "jl 52b\n\t" \ 05252 "53:\n\t" \ 05253 "vmovdqu (%%rsp), %%xmm13\n\t" \ 05254 "addq $16, %%rsp\n\t" \ 05255 "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ 05256 "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ 05257 GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ 05258 05259 #define AESENC_LAST15_DEC_AVX1() \ 05260 "movl %[nbytes], %%ecx\n\t" \ 05261 "movl %%ecx, %%edx\n\t" \ 05262 "andl $0x0f, %%ecx\n\t" \ 05263 "jz 55f\n\t" \ 05264 "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ 05265 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ 05266 "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ 05267 VAESENC_AVX(%%xmm13) \ 05268 "subq $32, %%rsp\n\t" \ 05269 "xorl %%ecx, %%ecx\n\t" \ 05270 "vmovdqu %%xmm13, (%%rsp)\n\t" \ 05271 "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ 05272 "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ 05273 "\n" \ 05274 "51:\n\t" \ 05275 "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ 05276 "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ 05277 "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ 05278 "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ 05279 "incl " VAR(KR) "\n\t" \ 05280 "incl %%ecx\n\t" \ 05281 "cmpl %%edx, " VAR(KR) "\n\t" \ 05282 "jl 51b\n\t" \ 05283 "53:\n\t" \ 05284 "vmovdqu 16(%%rsp), %%xmm13\n\t" \ 05285 "addq $32, %%rsp\n\t" \ 05286 "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ 05287 "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ 05288 GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ 05289 05290 #define CALC_TAG_AVX1() \ 05291 "movl %[nbytes], %%edx\n\t" \ 05292 "movl %[abytes], %%ecx\n\t" \ 05293 "shlq $3, %%rdx\n\t" \ 05294 "shlq $3, %%rcx\n\t" \ 05295 "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ 05296 "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ 05297 "vpxor %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t" \ 05298 GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ 05299 "vpshufb %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \ 05300 "vpxor " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t" \ 05301 05302 #define STORE_TAG_AVX() \ 05303 "cmpl $16, %[tbytes]\n\t" \ 05304 "je 71f\n\t" \ 05305 "xorq %%rcx, %%rcx\n\t" \ 05306 "vmovdqu %%xmm0, (%%rsp)\n\t" \ 05307 "73:\n\t" \ 05308 "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ 05309 "movb %%r13b, (%[tag],%%rcx,1)\n\t" \ 05310 "incl %%ecx\n\t" \ 05311 "cmpl %[tbytes], %%ecx\n\t" \ 05312 "jne 73b\n\t" \ 05313 "jmp 72f\n\t" \ 05314 "\n" \ 05315 "71:\n\t" \ 05316 "vmovdqu %%xmm0, (%[tag])\n\t" \ 05317 "\n" \ 05318 "72:\n\t" 05319 05320 #define CMP_TAG_AVX() \ 05321 "cmpl $16, %[tbytes]\n\t" \ 05322 "je 71f\n\t" \ 05323 "subq $16, %%rsp\n\t" \ 05324 "xorq %%rcx, %%rcx\n\t" \ 05325 "xorq %%rax, %%rax\n\t" \ 05326 "vmovdqu %%xmm0, (%%rsp)\n\t" \ 05327 "\n" \ 05328 "73:\n\t" \ 05329 "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ 05330 "xorb (%[tag],%%rcx,1), %%r13b\n\t" \ 05331 "orb %%r13b, %%al\n\t" \ 05332 "incl %%ecx\n\t" \ 05333 "cmpl %[tbytes], %%ecx\n\t" \ 05334 "jne 73b\n\t" \ 05335 "cmpb $0x00, %%al\n\t" \ 05336 "sete %%al\n\t" \ 05337 "addq $16, %%rsp\n\t" \ 05338 "jmp 72f\n\t" \ 05339 "\n" \ 05340 "71:\n\t" \ 05341 "vmovdqu (%[tag]), %%xmm1\n\t" \ 05342 "vpcmpeqb %%xmm1, %%xmm0, %%xmm0\n\t" \ 05343 "vpmovmskb %%xmm0, %%edx\n\t" \ 05344 "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \ 05345 "xorl %%eax, %%eax\n\t" \ 05346 "cmpl $0xffff, %%edx\n\t" \ 05347 "sete %%al\n\t" \ 05348 "\n" \ 05349 "72:\n\t" \ 05350 "movl %%eax, (%[res])\n\t" 05351 05352 static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, 05353 const unsigned char* addt, 05354 const unsigned char* ivec, unsigned char *tag, 05355 unsigned int nbytes, unsigned int abytes, 05356 unsigned int ibytes, unsigned int tbytes, 05357 const unsigned char* key, int nr) 05358 { 05359 register const unsigned char* iv asm("rax") = ivec; 05360 register unsigned int ivLen asm("ebx") = ibytes; 05361 05362 __asm__ __volatile__ ( 05363 "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 05364 /* Counter is xmm13 */ 05365 "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" 05366 "vpxor " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t" 05367 "movl %[ibytes], %%edx\n\t" 05368 "cmpl $12, %%edx\n\t" 05369 "jne 35f\n\t" 05370 CALC_IV_12_AVX1() 05371 "\n" 05372 "35:\n\t" 05373 CALC_IV_AVX1() 05374 "\n" 05375 "39:\n\t" 05376 05377 CALC_AAD_AVX1() 05378 05379 "# Calculate counter and H\n\t" 05380 "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" 05381 "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" 05382 "vpslldq $8, %%xmm5, %%xmm5\n\t" 05383 "vpor %%xmm5, %%xmm4, %%xmm4\n\t" 05384 "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" 05385 "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" 05386 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" 05387 "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" 05388 "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" 05389 "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" 05390 "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" 05391 05392 "xorl " VAR(KR) ", " VAR(KR) "\n\t" 05393 05394 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 05395 "cmpl $128, %[nbytes]\n\t" 05396 "movl %[nbytes], %%r13d\n\t" 05397 "jl 5f\n\t" 05398 "andl $0xffffff80, %%r13d\n\t" 05399 05400 CALC_HT_8_AVX1() 05401 05402 "# First 128 bytes of input\n\t" 05403 VAESENC_128() 05404 05405 "cmpl $128, %%r13d\n\t" 05406 "movl $128, " VAR(KR) "\n\t" 05407 "jle 2f\n\t" 05408 05409 "# More 128 bytes of input\n\t" 05410 "\n" 05411 "3:\n\t" 05412 VAESENC_128_GHASH_AVX1(%%rdx, 0) 05413 "addl $128, " VAR(KR) "\n\t" 05414 "cmpl %%r13d, " VAR(KR) "\n\t" 05415 "jl 3b\n\t" 05416 "\n" 05417 "2:\n\t" 05418 "vmovdqa %[BSWAP_MASK], %%xmm13\n\t" 05419 "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t" 05420 "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t" 05421 "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t" 05422 "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t" 05423 "vpxor %%xmm2, %%xmm4, %%xmm4\n\t" 05424 "vpshufb %%xmm13, %%xmm8, %%xmm8\n\t" 05425 "vpshufb %%xmm13, %%xmm9, %%xmm9\n\t" 05426 "vpshufb %%xmm13, %%xmm10, %%xmm10\n\t" 05427 "vpshufb %%xmm13, %%xmm11, %%xmm11\n\t" 05428 05429 "vmovdqu (" VAR(HTR) "), %%xmm12\n\t" 05430 "vmovdqu 16(" VAR(HTR) "), %%xmm14\n\t" 05431 GHASH_GFMUL_AVX1(XR, %%xmm13, %%xmm11, %%xmm12) 05432 GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm10, %%xmm14) 05433 "vmovdqu 32(" VAR(HTR) "), %%xmm12\n\t" 05434 "vmovdqu 48(" VAR(HTR) "), %%xmm14\n\t" 05435 GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm9, %%xmm12) 05436 GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm8, %%xmm14) 05437 "vmovdqu 64(" VAR(HTR) "), %%xmm12\n\t" 05438 "vmovdqu 80(" VAR(HTR) "), %%xmm14\n\t" 05439 GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm7, %%xmm12) 05440 GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm6, %%xmm14) 05441 "vmovdqu 96(" VAR(HTR) "), %%xmm12\n\t" 05442 "vmovdqu 112(" VAR(HTR) "), %%xmm14\n\t" 05443 GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm5, %%xmm12) 05444 GHASH_GFMUL_RED_XOR_AVX1(XR, %%xmm13, %%xmm4, %%xmm14) 05445 05446 "vmovdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" 05447 "\n" 05448 "5:\n\t" 05449 "movl %[nbytes], %%edx\n\t" 05450 "cmpl %%edx, " VAR(KR) "\n\t" 05451 "jge 55f\n\t" 05452 #endif 05453 05454 "movl %[nbytes], %%r13d\n\t" 05455 "andl $0xfffffff0, %%r13d\n\t" 05456 "cmpl %%r13d, " VAR(KR) "\n\t" 05457 "jge 14f\n\t" 05458 05459 VAESENC_BLOCK() 05460 "addl $16, " VAR(KR) "\n\t" 05461 "cmpl %%r13d, " VAR(KR) "\n\t" 05462 "jge 13f\n\t" 05463 "\n" 05464 "12:\n\t" 05465 "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" 05466 VAESENC_GFMUL(%%xmm9, HR, XR) 05467 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" 05468 "addl $16, " VAR(KR) "\n\t" 05469 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" 05470 "cmpl %%r13d, " VAR(KR) "\n\t" 05471 "jl 12b\n\t" 05472 "\n" 05473 "13:\n\t" 05474 GHASH_GFMUL_RED_AVX1(XR, HR, XR) 05475 "\n" 05476 "14:\n\t" 05477 05478 AESENC_LAST15_ENC_AVX1() 05479 "\n" 05480 "55:\n\t" 05481 05482 CALC_TAG_AVX1() 05483 STORE_TAG_AVX() 05484 "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 05485 "vzeroupper\n\t" 05486 05487 : 05488 : [KEY] "r" (key), 05489 [in] "r" (in), [out] "r" (out), [nr] "r" (nr), 05490 [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), 05491 [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes), 05492 [tag] "r" (tag), 05493 [BSWAP_MASK] "m" (BSWAP_MASK), 05494 [BSWAP_EPI64] "m" (BSWAP_EPI64), 05495 [ONE] "m" (ONE), 05496 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 05497 [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), 05498 [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), 05499 [EIGHT] "m" (EIGHT), 05500 #endif 05501 [MOD2_128] "m" (MOD2_128) 05502 : "xmm15", "xmm14", "xmm13", "xmm12", 05503 "xmm0", "xmm1", "xmm2", "xmm3", "memory", 05504 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 05505 "rcx", "rdx", "r13" 05506 ); 05507 } 05508 05509 #ifdef HAVE_INTEL_AVX2 05510 /* Encrypt and carry-less multiply for AVX2. */ 05511 #define VAESENC_PCLMUL_AVX2_1(src, o1, o2, o3) \ 05512 "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ 05513 "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ 05514 "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ 05515 "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm13\n\t" \ 05516 "vpxor %%xmm2, %%xmm12, %%xmm12\n\t" \ 05517 "vpclmulqdq $0x10, %%xmm13, %%xmm12, %%xmm1\n\t" \ 05518 "vpclmulqdq $0x01, %%xmm13, %%xmm12, %%xmm14\n\t" \ 05519 "vpclmulqdq $0x00, %%xmm13, %%xmm12, %%xmm2\n\t" \ 05520 "vpclmulqdq $0x11, %%xmm13, %%xmm12, %%xmm3\n\t" \ 05521 "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ 05522 "vaesenc %%xmm0, %%xmm5, %%xmm5\n\t" \ 05523 "vaesenc %%xmm0, %%xmm6, %%xmm6\n\t" \ 05524 "vaesenc %%xmm0, %%xmm7, %%xmm7\n\t" \ 05525 "vaesenc %%xmm0, %%xmm8, %%xmm8\n\t" \ 05526 "vaesenc %%xmm0, %%xmm9, %%xmm9\n\t" \ 05527 "vaesenc %%xmm0, %%xmm10, %%xmm10\n\t" \ 05528 "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ 05529 05530 #define VAESENC_PCLMUL_AVX2_2(src, o1, o2, o3) \ 05531 "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ 05532 "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm0\n\t" \ 05533 "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ 05534 "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ 05535 "vpclmulqdq $0x10, %%xmm0, %%xmm12, %%xmm13\n\t" \ 05536 "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t" \ 05537 "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t" \ 05538 "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t" \ 05539 "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ 05540 "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ 05541 "vpxor %%xmm12, %%xmm3, %%xmm3\n\t" \ 05542 "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ 05543 "vaesenc %%xmm0, %%xmm5, %%xmm5\n\t" \ 05544 "vaesenc %%xmm0, %%xmm6, %%xmm6\n\t" \ 05545 "vaesenc %%xmm0, %%xmm7, %%xmm7\n\t" \ 05546 "vaesenc %%xmm0, %%xmm8, %%xmm8\n\t" \ 05547 "vaesenc %%xmm0, %%xmm9, %%xmm9\n\t" \ 05548 "vaesenc %%xmm0, %%xmm10, %%xmm10\n\t" \ 05549 "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ 05550 05551 #define VAESENC_PCLMUL_AVX2_N(src, o1, o2, o3) \ 05552 "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ 05553 "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm0\n\t" \ 05554 "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ 05555 "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ 05556 "vpxor %%xmm15, %%xmm2, %%xmm2\n\t" \ 05557 "vpclmulqdq $0x10, %%xmm0, %%xmm12, %%xmm13\n\t" \ 05558 "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t" \ 05559 "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t" \ 05560 "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t" \ 05561 "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ 05562 "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ 05563 "vpxor %%xmm12, %%xmm3, %%xmm3\n\t" \ 05564 "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ 05565 "vaesenc %%xmm0, %%xmm5, %%xmm5\n\t" \ 05566 "vaesenc %%xmm0, %%xmm6, %%xmm6\n\t" \ 05567 "vaesenc %%xmm0, %%xmm7, %%xmm7\n\t" \ 05568 "vaesenc %%xmm0, %%xmm8, %%xmm8\n\t" \ 05569 "vaesenc %%xmm0, %%xmm9, %%xmm9\n\t" \ 05570 "vaesenc %%xmm0, %%xmm10, %%xmm10\n\t" \ 05571 "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ 05572 05573 #define VAESENC_PCLMUL_AVX2_L(o) \ 05574 "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ 05575 "vpxor %%xmm15, %%xmm2, %%xmm2\n\t" \ 05576 "vpslldq $8, %%xmm1, %%xmm12\n\t" \ 05577 "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ 05578 "vmovdqa "#o"(%[KEY]), %%xmm15\n\t" \ 05579 "vmovdqa %[MOD2_128], %%xmm0\n\t" \ 05580 "vaesenc %%xmm15, %%xmm4, %%xmm4\n\t" \ 05581 "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ 05582 "vpxor %%xmm1, %%xmm3, %%xmm3\n\t" \ 05583 "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm14\n\t" \ 05584 "vaesenc %%xmm15, %%xmm5, %%xmm5\n\t" \ 05585 "vaesenc %%xmm15, %%xmm6, %%xmm6\n\t" \ 05586 "vaesenc %%xmm15, %%xmm7, %%xmm7\n\t" \ 05587 "vpshufd $0x4e, %%xmm2, %%xmm2\n\t" \ 05588 "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ 05589 "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm14\n\t" \ 05590 "vaesenc %%xmm15, %%xmm8, %%xmm8\n\t" \ 05591 "vaesenc %%xmm15, %%xmm9, %%xmm9\n\t" \ 05592 "vaesenc %%xmm15, %%xmm10, %%xmm10\n\t" \ 05593 "vpshufd $0x4e, %%xmm2, %%xmm2\n\t" \ 05594 "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ 05595 "vpxor %%xmm3, %%xmm2, %%xmm2\n\t" \ 05596 "vaesenc %%xmm15, %%xmm11, %%xmm11\n\t" 05597 05598 #define VAESENC_BLOCK_AVX2() \ 05599 "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ 05600 "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ 05601 "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ 05602 "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ 05603 "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ 05604 "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05605 "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05606 "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05607 "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05608 "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05609 "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05610 "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05611 "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05612 "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05613 "cmpl $11, %[nr]\n\t" \ 05614 "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ 05615 "jl %=f\n\t" \ 05616 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 05617 "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05618 "cmpl $13, %[nr]\n\t" \ 05619 "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ 05620 "jl %=f\n\t" \ 05621 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 05622 "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05623 "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ 05624 "%=:\n\t" \ 05625 "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ 05626 "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm5\n\t" \ 05627 "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ 05628 "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \ 05629 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 05630 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" 05631 05632 /* Karatsuba multiplication - slower 05633 * H01 = H[1] ^ H[0] (top and bottom 64-bits XORed) 05634 */ 05635 #define _VAESENC_GFMUL_AVX2(in, H, X, ctr1, H01) \ 05636 "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ 05637 "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05638 "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05639 "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05640 "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05641 "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05642 "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05643 "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05644 "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05645 "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05646 "cmpl $11, %[nr]\n\t" \ 05647 "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ 05648 "jl %=f\n\t" \ 05649 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 05650 "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05651 "cmpl $13, %[nr]\n\t" \ 05652 "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ 05653 "jl %=f\n\t" \ 05654 "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ 05655 "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05656 "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ 05657 "%=:\n\t" \ 05658 "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ 05659 "vmovdqu " #in ", %%xmm0\n\t" \ 05660 "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ 05661 \ 05662 "vpsrldq $8, " #X ", %%xmm2\n\t" \ 05663 "vpxor " #X ", %%xmm2, %%xmm2\n\t" \ 05664 "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t" \ 05665 "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t" \ 05666 "vpclmulqdq $0x00, "#H01", %%xmm2, %%xmm7\n\t" \ 05667 "vpxor %%xmm5, %%xmm7, %%xmm7\n\t" \ 05668 "vpxor %%xmm8, %%xmm7, %%xmm7\n\t" \ 05669 "vpslldq $8, %%xmm7, %%xmm6\n\t" \ 05670 "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ 05671 "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ 05672 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05673 \ 05674 "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ 05675 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05676 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05677 "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ 05678 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05679 "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ 05680 "vpxor %%xmm5, %%xmm6, " VAR(XR) "\n\t" 05681 #define VAESENC_GFMUL_AVX2(in, H, X, ctr1) \ 05682 _VAESENC_GFMUL_AVX2(in, H, X, ctr1) 05683 05684 #define _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) \ 05685 "vpclmulqdq $0x10, " #H ", " #X ", %%xmm7\n\t" \ 05686 "vpclmulqdq $0x01, " #H ", " #X ", %%xmm6\n\t" \ 05687 "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t" \ 05688 "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t" \ 05689 "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ 05690 "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05691 "vpxor %%xmm6, %%xmm7, %%xmm7\n\t" \ 05692 "vpslldq $8, %%xmm7, %%xmm6\n\t" \ 05693 "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ 05694 "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05695 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05696 "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ 05697 "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05698 "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05699 "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05700 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05701 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05702 "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ 05703 "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05704 "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05705 "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05706 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05707 "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05708 "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ 05709 "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ 05710 "cmpl $11, %[nr]\n\t" \ 05711 "vmovdqa 160(%[KEY]), %%xmm3\n\t" \ 05712 "jl %=f\n\t" \ 05713 "vaesenc %%xmm3, %%xmm4, %%xmm4\n\t" \ 05714 "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05715 "cmpl $13, %[nr]\n\t" \ 05716 "vmovdqa 192(%[KEY]), %%xmm3\n\t" \ 05717 "jl %=f\n\t" \ 05718 "vaesenc %%xmm3, %%xmm4, %%xmm4\n\t" \ 05719 "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ 05720 "vmovdqa 224(%[KEY]), %%xmm3\n\t" \ 05721 "%=:\n\t" \ 05722 "vaesenclast %%xmm3, %%xmm4, %%xmm4\n\t" \ 05723 "vpxor %%xmm5, %%xmm6, " VAR(XR) "\n\t" \ 05724 "vmovdqu " #in ", %%xmm5\n\t" \ 05725 "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" 05726 #define VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) \ 05727 _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) 05728 05729 05730 #define _GHASH_GFMUL_AVX2(r, r2, a, b) \ 05731 "vpclmulqdq $0x10, "#a", "#b", %%xmm2\n\t" \ 05732 "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \ 05733 "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ 05734 "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ 05735 "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ 05736 "vpslldq $8, %%xmm2, %%xmm1\n\t" \ 05737 "vpsrldq $8, %%xmm2, %%xmm2\n\t" \ 05738 "vpxor %%xmm1, %%xmm0, "#r2"\n\t" \ 05739 "vpxor %%xmm2, %%xmm3, " #r "\n\t" 05740 #define GHASH_GFMUL_AVX2(r, r2, a, b) \ 05741 _GHASH_GFMUL_AVX2(r, r2, a, b) 05742 05743 #define GHASH_MID_AVX2(r, r2) \ 05744 "vpsrld $31, "#r2", %%xmm0\n\t" \ 05745 "vpsrld $31, " #r ", %%xmm1\n\t" \ 05746 "vpslld $1, "#r2", "#r2"\n\t" \ 05747 "vpslld $1, " #r ", " #r "\n\t" \ 05748 "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ 05749 "vpslldq $4, %%xmm0, %%xmm0\n\t" \ 05750 "vpslldq $4, %%xmm1, %%xmm1\n\t" \ 05751 "vpor %%xmm2, " #r ", " #r "\n\t" \ 05752 "vpor %%xmm0, "#r2", "#r2"\n\t" \ 05753 "vpor %%xmm1, " #r ", " #r "\n\t" 05754 05755 #define _GHASH_GFMUL_RED_AVX2(r, a, b) \ 05756 "vpclmulqdq $0x10, "#a", "#b", %%xmm7\n\t" \ 05757 "vpclmulqdq $0x01, "#a", "#b", %%xmm6\n\t" \ 05758 "vpclmulqdq $0x00, "#a", "#b", %%xmm5\n\t" \ 05759 "vpxor %%xmm6, %%xmm7, %%xmm7\n\t" \ 05760 "vpslldq $8, %%xmm7, %%xmm6\n\t" \ 05761 "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ 05762 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05763 "vpclmulqdq $0x11, "#a", "#b", %%xmm8\n\t" \ 05764 "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ 05765 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05766 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05767 "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ 05768 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05769 "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ 05770 "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ 05771 "vpxor %%xmm5, %%xmm6, " #r "\n\t" 05772 #define GHASH_GFMUL_RED_AVX2(r, a, b) \ 05773 _GHASH_GFMUL_RED_AVX2(r, a, b) 05774 05775 #define _GHASH_GFSQR_RED2_AVX2(r, a, mod128) \ 05776 "vpclmulqdq $0x00, "#a", "#a", %%xmm6\n\t" \ 05777 "vpclmulqdq $0x11, "#a", "#a", %%xmm8\n\t" \ 05778 "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ 05779 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05780 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05781 "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ 05782 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05783 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05784 "vpxor %%xmm6, %%xmm8, " #r "\n\t" 05785 #define GHASH_GFSQR_RED2_AVX2(r, a, mod128) \ 05786 _GHASH_GFSQR_RED2_AVX2(r, a, mod128) 05787 05788 #define _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) \ 05789 "vpclmulqdq $0x10, "#a", "#b", %%xmm7\n\t" \ 05790 "vpclmulqdq $0x01, "#a", "#b", %%xmm6\n\t" \ 05791 "vpclmulqdq $0x00, "#a", "#b", %%xmm5\n\t" \ 05792 "vpclmulqdq $0x11, "#a", "#b", %%xmm8\n\t" \ 05793 "vpclmulqdq $0x00, "#b", "#b", %%xmm9\n\t" \ 05794 "vpclmulqdq $0x11, "#b", "#b", %%xmm10\n\t" \ 05795 "vpxor %%xmm6, %%xmm7, %%xmm7\n\t" \ 05796 "vpslldq $8, %%xmm7, %%xmm6\n\t" \ 05797 "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ 05798 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05799 "vpclmulqdq $0x10, "#mod128", %%xmm9, %%xmm4\n\t" \ 05800 "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ 05801 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05802 "vpshufd $0x4e, %%xmm9, %%xmm9\n\t" \ 05803 "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ 05804 "vpxor %%xmm4, %%xmm9, %%xmm9\n\t" \ 05805 "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ 05806 "vpclmulqdq $0x10, "#mod128", %%xmm9, %%xmm4\n\t" \ 05807 "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ 05808 "vpshufd $0x4e, %%xmm9, %%xmm9\n\t" \ 05809 "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ 05810 "vpxor %%xmm4, %%xmm9, %%xmm9\n\t" \ 05811 "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ 05812 "vpxor %%xmm10, %%xmm9, "#rs"\n\t" \ 05813 "vpxor %%xmm5, %%xmm6, "#rm"\n\t" 05814 #define GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) \ 05815 _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) 05816 05817 #define CALC_HT_8_AVX2() \ 05818 "vmovdqa %[MOD2_128], %%xmm11\n\t" \ 05819 "vmovdqa " VAR(XR) ", %%xmm2\n\t" \ 05820 "# H ^ 1 and H ^ 2\n\t" \ 05821 GHASH_GFSQR_RED2_AVX2(%%xmm0, HR, %%xmm11) \ 05822 "vmovdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ 05823 "vmovdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ 05824 "# H ^ 3 and H ^ 4\n\t" \ 05825 GHASH_GFMUL_SQR_RED2_AVX2(%%xmm1, %%xmm3, HR, %%xmm0, %%xmm11) \ 05826 "vmovdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ 05827 "vmovdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ 05828 "# H ^ 5 and H ^ 6\n\t" \ 05829 GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm0, %%xmm1, %%xmm11) \ 05830 "vmovdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ 05831 "vmovdqu %%xmm0 , 80(" VAR(HTR) ")\n\t" \ 05832 "# H ^ 7 and H ^ 8\n\t" \ 05833 GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm1, %%xmm3, %%xmm11) \ 05834 "vmovdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ 05835 "vmovdqu %%xmm0 , 112(" VAR(HTR) ")\n\t" 05836 05837 #define _GHASH_RED_AVX2(r, r2) \ 05838 "vmovdqa %[MOD2_128], %%xmm2\n\t" \ 05839 "vpclmulqdq $0x10, %%xmm2, "#r2", %%xmm0\n\t" \ 05840 "vpshufd $0x4e, "#r2", %%xmm1\n\t" \ 05841 "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ 05842 "vpclmulqdq $0x10, %%xmm2, %%xmm1, %%xmm0\n\t" \ 05843 "vpshufd $0x4e, %%xmm1, %%xmm1\n\t" \ 05844 "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ 05845 "vpxor %%xmm1, " #r ", " #r "\n\t" 05846 #define GHASH_RED_AVX2(r, r2) \ 05847 _GHASH_RED_AVX2(r, r2) 05848 05849 #define GHASH_FULL_AVX2(r, r2, a, b) \ 05850 GHASH_GFMUL_AVX2(r, r2, a, b) \ 05851 GHASH_MID_AVX2(r, r2) \ 05852 GHASH_RED_AVX2(r, r2) 05853 05854 #define _GFMUL_3V_AVX2(r, r2, r3, a, b) \ 05855 "vpclmulqdq $0x10, "#a", "#b", "#r3"\n\t" \ 05856 "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \ 05857 "vpclmulqdq $0x00, "#a", "#b", "#r2"\n\t" \ 05858 "vpclmulqdq $0x11, "#a", "#b", " #r "\n\t" \ 05859 "vpxor %%xmm1, "#r3", "#r3"\n\t" 05860 #define GFMUL_3V_AVX2(r, r2, r3, a, b) \ 05861 _GFMUL_3V_AVX2(r, r2, r3, a, b) 05862 05863 #define _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) \ 05864 "vpclmulqdq $0x10, "#a", "#b", %%xmm2\n\t" \ 05865 "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \ 05866 "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ 05867 "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ 05868 "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ 05869 "vpxor %%xmm3, " #r ", " #r "\n\t" \ 05870 "vpxor %%xmm2, "#r3", "#r3"\n\t" \ 05871 "vpxor %%xmm0, "#r2", "#r2"\n\t" 05872 #define GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) \ 05873 _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) 05874 05875 #define GHASH_GFMUL_RED_8_AVX2() \ 05876 "vmovdqu (" VAR(HTR) "), %%xmm12\n\t" \ 05877 GFMUL_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm11, %%xmm12) \ 05878 "vmovdqu 16(" VAR(HTR) "), %%xmm12\n\t" \ 05879 GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm10, %%xmm12) \ 05880 "vmovdqu 32(" VAR(HTR) "), %%xmm11\n\t" \ 05881 "vmovdqu 48(" VAR(HTR) "), %%xmm12\n\t" \ 05882 GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm9, %%xmm11) \ 05883 GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm8, %%xmm12) \ 05884 "vmovdqu 64(" VAR(HTR) "), %%xmm11\n\t" \ 05885 "vmovdqu 80(" VAR(HTR) "), %%xmm12\n\t" \ 05886 GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm7, %%xmm11) \ 05887 GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm6, %%xmm12) \ 05888 "vmovdqu 96(" VAR(HTR) "), %%xmm11\n\t" \ 05889 "vmovdqu 112(" VAR(HTR) "), %%xmm12\n\t" \ 05890 GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm5, %%xmm11) \ 05891 GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm4, %%xmm12) \ 05892 "vpslldq $8, %%xmm14, %%xmm12\n\t" \ 05893 "vpsrldq $8, %%xmm14, %%xmm14\n\t" \ 05894 "vpxor %%xmm12, %%xmm13, %%xmm13\n\t" \ 05895 "vpxor %%xmm14, " VAR(XR) ", " VAR(XR) "\n\t" \ 05896 GHASH_RED_AVX2(XR, %%xmm13) 05897 05898 #define CALC_IV_12_AVX2() \ 05899 "# Calculate values when IV is 12 bytes\n\t" \ 05900 "# Set counter based on IV\n\t" \ 05901 "movl $0x01000000, %%ecx\n\t" \ 05902 "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ 05903 "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ 05904 "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ 05905 "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ 05906 "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ 05907 "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ 05908 "vpxor " VAR(HR) ", %%xmm13, %%xmm1\n\t" \ 05909 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05910 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05911 "vmovdqa 32(%[KEY]), %%xmm0\n\t" \ 05912 "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ 05913 "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ 05914 "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ 05915 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05916 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05917 "vmovdqa 64(%[KEY]), %%xmm0\n\t" \ 05918 "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ 05919 "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ 05920 "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ 05921 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05922 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05923 "vmovdqa 96(%[KEY]), %%xmm0\n\t" \ 05924 "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ 05925 "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ 05926 "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ 05927 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05928 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05929 "vmovdqa 128(%[KEY]), %%xmm0\n\t" \ 05930 "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ 05931 "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ 05932 "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ 05933 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05934 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05935 "cmpl $11, %[nr]\n\t" \ 05936 "vmovdqa 160(%[KEY]), %%xmm0\n\t" \ 05937 "jl 31f\n\t" \ 05938 "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ 05939 "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ 05940 "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ 05941 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05942 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05943 "cmpl $13, %[nr]\n\t" \ 05944 "vmovdqa 192(%[KEY]), %%xmm0\n\t" \ 05945 "jl 31f\n\t" \ 05946 "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ 05947 "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ 05948 "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ 05949 "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ 05950 "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ 05951 "vmovdqu 224(%[KEY]), %%xmm0\n\t" \ 05952 "31:\n\t" \ 05953 "vaesenclast %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ 05954 "vaesenclast %%xmm0, %%xmm1, %%xmm1\n\t" \ 05955 "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ 05956 "vmovdqu %%xmm1, " VAR(TR) "\n\t" \ 05957 05958 #define CALC_IV_AVX2() \ 05959 "# Calculate values when IV is not 12 bytes\n\t" \ 05960 "# H = Encrypt X(=0)\n\t" \ 05961 "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ 05962 VAESENC_AVX(HR) \ 05963 "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ 05964 "# Calc counter\n\t" \ 05965 "# Initialization vector\n\t" \ 05966 "cmpl $0, %%edx\n\t" \ 05967 "movq $0, %%rcx\n\t" \ 05968 "je 45f\n\t" \ 05969 "cmpl $16, %%edx\n\t" \ 05970 "jl 44f\n\t" \ 05971 "andl $0xfffffff0, %%edx\n\t" \ 05972 "\n" \ 05973 "43:\n\t" \ 05974 "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ 05975 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 05976 "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ 05977 GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ 05978 "addl $16, %%ecx\n\t" \ 05979 "cmpl %%edx, %%ecx\n\t" \ 05980 "jl 43b\n\t" \ 05981 "movl %[ibytes], %%edx\n\t" \ 05982 "cmpl %%edx, %%ecx\n\t" \ 05983 "je 45f\n\t" \ 05984 "\n" \ 05985 "44:\n\t" \ 05986 "subq $16, %%rsp\n\t" \ 05987 "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ 05988 "xorl %%ebx, %%ebx\n\t" \ 05989 "vmovdqu %%xmm4, (%%rsp)\n\t" \ 05990 "42:\n\t" \ 05991 "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ 05992 "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ 05993 "incl %%ecx\n\t" \ 05994 "incl %%ebx\n\t" \ 05995 "cmpl %%edx, %%ecx\n\t" \ 05996 "jl 42b\n\t" \ 05997 "vmovdqu (%%rsp), %%xmm4\n\t" \ 05998 "addq $16, %%rsp\n\t" \ 05999 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 06000 "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ 06001 GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ 06002 "\n" \ 06003 "45:\n\t" \ 06004 "# T = Encrypt counter\n\t" \ 06005 "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ 06006 "shll $3, %%edx\n\t" \ 06007 "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ 06008 "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ 06009 GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ 06010 "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ 06011 "# Encrypt counter\n\t" \ 06012 "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ 06013 "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ 06014 VAESENC_AVX(%%xmm4) \ 06015 "vmovdqu %%xmm4, " VAR(TR) "\n\t" 06016 06017 #define CALC_AAD_AVX2() \ 06018 "# Additional authentication data\n\t" \ 06019 "movl %[abytes], %%edx\n\t" \ 06020 "cmpl $0, %%edx\n\t" \ 06021 "je 25f\n\t" \ 06022 "movq %[addt], %%rax\n\t" \ 06023 "xorl %%ecx, %%ecx\n\t" \ 06024 "cmpl $16, %%edx\n\t" \ 06025 "jl 24f\n\t" \ 06026 "andl $0xfffffff0, %%edx\n\t" \ 06027 "\n" \ 06028 "23:\n\t" \ 06029 "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ 06030 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 06031 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ 06032 GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ 06033 "addl $16, %%ecx\n\t" \ 06034 "cmpl %%edx, %%ecx\n\t" \ 06035 "jl 23b\n\t" \ 06036 "movl %[abytes], %%edx\n\t" \ 06037 "cmpl %%edx, %%ecx\n\t" \ 06038 "je 25f\n\t" \ 06039 "\n" \ 06040 "24:\n\t" \ 06041 "subq $16, %%rsp\n\t" \ 06042 "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ 06043 "xorl %%ebx, %%ebx\n\t" \ 06044 "vmovdqu %%xmm4, (%%rsp)\n\t" \ 06045 "22:\n\t" \ 06046 "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ 06047 "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ 06048 "incl %%ecx\n\t" \ 06049 "incl %%ebx\n\t" \ 06050 "cmpl %%edx, %%ecx\n\t" \ 06051 "jl 22b\n\t" \ 06052 "vmovdqu (%%rsp), %%xmm4\n\t" \ 06053 "addq $16, %%rsp\n\t" \ 06054 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ 06055 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ 06056 GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ 06057 "\n" \ 06058 "25:\n\t" 06059 06060 #define VAESENC_128_GHASH_AVX2(src, o) \ 06061 "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ 06062 "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ 06063 /* src is either %%rcx or %%rdx */ \ 06064 VAESENC_CTR() \ 06065 VAESENC_XOR() \ 06066 VAESENC_PCLMUL_AVX2_1(src, 16, (o-128), 112) \ 06067 VAESENC_PCLMUL_AVX2_2(src, 32, (o-112), 96) \ 06068 VAESENC_PCLMUL_AVX2_N(src, 48, (o- 96), 80) \ 06069 VAESENC_PCLMUL_AVX2_N(src, 64, (o- 80), 64) \ 06070 VAESENC_PCLMUL_AVX2_N(src, 80, (o- 64), 48) \ 06071 VAESENC_PCLMUL_AVX2_N(src, 96, (o- 48), 32) \ 06072 VAESENC_PCLMUL_AVX2_N(src, 112, (o- 32), 16) \ 06073 VAESENC_PCLMUL_AVX2_N(src, 128, (o- 16), 0) \ 06074 VAESENC_PCLMUL_AVX2_L(144) \ 06075 "cmpl $11, %[nr]\n\t" \ 06076 "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ 06077 "jl 4f\n\t" \ 06078 VAESENC() \ 06079 VAESENC_SET(176) \ 06080 "cmpl $13, %[nr]\n\t" \ 06081 "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ 06082 "jl 4f\n\t" \ 06083 VAESENC() \ 06084 VAESENC_SET(208) \ 06085 "vmovdqa 224(%[KEY]), %%xmm12\n\t" \ 06086 "\n" \ 06087 "4:\n\t" \ 06088 VAESENC_LAST(%%rcx, %%rdx) 06089 06090 #define AESENC_LAST15_ENC_AVX2() \ 06091 "movl %[nbytes], %%ecx\n\t" \ 06092 "movl %%ecx, %%edx\n\t" \ 06093 "andl $0x0f, %%ecx\n\t" \ 06094 "jz 55f\n\t" \ 06095 "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ 06096 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ 06097 "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ 06098 VAESENC_AVX(%%xmm13) \ 06099 "subq $16, %%rsp\n\t" \ 06100 "xorl %%ecx, %%ecx\n\t" \ 06101 "vmovdqu %%xmm13, (%%rsp)\n\t" \ 06102 "\n" \ 06103 "51:\n\t" \ 06104 "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ 06105 "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ 06106 "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ 06107 "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ 06108 "incl " VAR(KR) "\n\t" \ 06109 "incl %%ecx\n\t" \ 06110 "cmpl %%edx, " VAR(KR) "\n\t" \ 06111 "jl 51b\n\t" \ 06112 "xorq %%r13, %%r13\n\t" \ 06113 "cmpl $16, %%ecx\n\t" \ 06114 "je 53f\n\t" \ 06115 "\n" \ 06116 "52:\n\t" \ 06117 "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ 06118 "incl %%ecx\n\t" \ 06119 "cmpl $16, %%ecx\n\t" \ 06120 "jl 52b\n\t" \ 06121 "53:\n\t" \ 06122 "vmovdqu (%%rsp), %%xmm13\n\t" \ 06123 "addq $16, %%rsp\n\t" \ 06124 "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ 06125 "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ 06126 GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ 06127 06128 #define AESENC_LAST15_DEC_AVX2() \ 06129 "movl %[nbytes], %%ecx\n\t" \ 06130 "movl %%ecx, %%edx\n\t" \ 06131 "andl $0x0f, %%ecx\n\t" \ 06132 "jz 55f\n\t" \ 06133 "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ 06134 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ 06135 "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ 06136 VAESENC_AVX(%%xmm13) \ 06137 "subq $32, %%rsp\n\t" \ 06138 "xorl %%ecx, %%ecx\n\t" \ 06139 "vmovdqu %%xmm13, (%%rsp)\n\t" \ 06140 "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ 06141 "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ 06142 "\n" \ 06143 "51:\n\t" \ 06144 "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ 06145 "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ 06146 "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ 06147 "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ 06148 "incl " VAR(KR) "\n\t" \ 06149 "incl %%ecx\n\t" \ 06150 "cmpl %%edx, " VAR(KR) "\n\t" \ 06151 "jl 51b\n\t" \ 06152 "53:\n\t" \ 06153 "vmovdqu 16(%%rsp), %%xmm13\n\t" \ 06154 "addq $32, %%rsp\n\t" \ 06155 "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ 06156 "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ 06157 GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ 06158 06159 #define CALC_TAG_AVX2() \ 06160 "movl %[nbytes], %%edx\n\t" \ 06161 "movl %[abytes], %%ecx\n\t" \ 06162 "shlq $3, %%rdx\n\t" \ 06163 "shlq $3, %%rcx\n\t" \ 06164 "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ 06165 "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ 06166 "vpxor %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t" \ 06167 GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ 06168 "vpshufb %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \ 06169 "vpxor " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t" \ 06170 06171 06172 static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, 06173 const unsigned char* addt, 06174 const unsigned char* ivec, unsigned char *tag, 06175 unsigned int nbytes, unsigned int abytes, 06176 unsigned int ibytes, unsigned int tbytes, 06177 const unsigned char* key, int nr) 06178 { 06179 register const unsigned char* iv asm("rax") = ivec; 06180 register unsigned int ivLen asm("ebx") = ibytes; 06181 06182 __asm__ __volatile__ ( 06183 "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06184 /* Counter is xmm13 */ 06185 "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" 06186 "vpxor " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t" 06187 "movl %[ibytes], %%edx\n\t" 06188 "cmpl $12, %%edx\n\t" 06189 "jne 35f\n\t" 06190 CALC_IV_12_AVX2() 06191 "jmp 39f\n\t" 06192 "\n" 06193 "35:\n\t" 06194 CALC_IV_AVX2() 06195 "\n" 06196 "39:\n\t" 06197 06198 CALC_AAD_AVX2() 06199 06200 "# Calculate counter and H\n\t" 06201 "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" 06202 "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" 06203 "vpslldq $8, %%xmm5, %%xmm5\n\t" 06204 "vpor %%xmm5, %%xmm4, %%xmm4\n\t" 06205 "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" 06206 "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" 06207 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" 06208 "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" 06209 "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" 06210 "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" 06211 "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" 06212 06213 "xorl " VAR(KR) ", " VAR(KR) "\n\t" 06214 06215 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) 06216 "cmpl $128, %[nbytes]\n\t" 06217 "movl %[nbytes], %%r13d\n\t" 06218 "jl 5f\n\t" 06219 "andl $0xffffff80, %%r13d\n\t" 06220 06221 CALC_HT_8_AVX2() 06222 06223 "# First 128 bytes of input\n\t" 06224 VAESENC_128() 06225 06226 "cmpl $128, %%r13d\n\t" 06227 "movl $128, " VAR(KR) "\n\t" 06228 "jle 2f\n\t" 06229 06230 "# More 128 bytes of input\n\t" 06231 "\n" 06232 "3:\n\t" 06233 VAESENC_128_GHASH_AVX2(%%rdx, 0) 06234 "addl $128, " VAR(KR) "\n\t" 06235 "cmpl %%r13d, " VAR(KR) "\n\t" 06236 "jl 3b\n\t" 06237 "\n" 06238 "2:\n\t" 06239 "vmovdqa %[BSWAP_MASK], %%xmm13\n\t" 06240 "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t" 06241 "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t" 06242 "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t" 06243 "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t" 06244 "vpshufb %%xmm13, %%xmm8, %%xmm8\n\t" 06245 "vpshufb %%xmm13, %%xmm9, %%xmm9\n\t" 06246 "vpshufb %%xmm13, %%xmm10, %%xmm10\n\t" 06247 "vpshufb %%xmm13, %%xmm11, %%xmm11\n\t" 06248 "vpxor %%xmm2, %%xmm4, %%xmm4\n\t" 06249 06250 GHASH_GFMUL_RED_8_AVX2() 06251 06252 "vmovdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" 06253 "\n" 06254 "5:\n\t" 06255 "movl %[nbytes], %%edx\n\t" 06256 "cmpl %%edx, " VAR(KR) "\n\t" 06257 "jge 55f\n\t" 06258 #endif 06259 06260 "movl %[nbytes], %%r13d\n\t" 06261 "andl $0xfffffff0, %%r13d\n\t" 06262 "cmpl %%r13d, " VAR(KR) "\n\t" 06263 "jge 14f\n\t" 06264 06265 VAESENC_BLOCK_AVX2() 06266 "addl $16, " VAR(KR) "\n\t" 06267 "cmpl %%r13d, " VAR(KR) "\n\t" 06268 "jge 13f\n\t" 06269 "vmovdqa %[MOD2_128], %%xmm0\n\t" 06270 "\n" 06271 "12:\n\t" 06272 "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" 06273 "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" 06274 "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" 06275 "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" 06276 "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" 06277 VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, XR, CTR1) 06278 "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" 06279 "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" 06280 "addl $16, " VAR(KR) "\n\t" 06281 "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" 06282 "cmpl %%r13d, " VAR(KR) "\n\t" 06283 "jl 12b\n\t" 06284 "\n" 06285 "13:\n\t" 06286 GHASH_GFMUL_RED_AVX2(XR, HR, XR) 06287 "\n" 06288 "14:\n\t" 06289 06290 AESENC_LAST15_ENC_AVX2() 06291 "\n" 06292 "55:\n\t" 06293 06294 CALC_TAG_AVX2() 06295 STORE_TAG_AVX() 06296 "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06297 "vzeroupper\n\t" 06298 06299 : 06300 : [KEY] "r" (key), 06301 [in] "r" (in), [out] "r" (out), [nr] "r" (nr), 06302 [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), 06303 [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes), 06304 [tag] "r" (tag), 06305 [BSWAP_MASK] "m" (BSWAP_MASK), 06306 [BSWAP_EPI64] "m" (BSWAP_EPI64), 06307 [ONE] "m" (ONE), 06308 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) 06309 [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), 06310 [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), 06311 [EIGHT] "m" (EIGHT), 06312 #endif 06313 [MOD2_128] "m" (MOD2_128) 06314 : "xmm15", "xmm14", "xmm13", "xmm12", 06315 "xmm0", "xmm1", "xmm2", "xmm3", "memory", 06316 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 06317 "rcx", "rdx", "r13" 06318 ); 06319 } 06320 #endif /* HAVE_INTEL_AVX2 */ 06321 #endif /* HAVE_INTEL_AVX1 */ 06322 06323 #ifdef HAVE_AES_DECRYPT 06324 /* Figure 10. AES-GCM – Decrypt With Single Block Ghash at a Time */ 06325 06326 static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, 06327 const unsigned char* addt, 06328 const unsigned char* ivec, const unsigned char *tag, 06329 int nbytes, int abytes, int ibytes, int tbytes, 06330 const unsigned char* key, int nr, int* res) 06331 { 06332 register const unsigned char* iv asm("rax") = ivec; 06333 register int ivLen asm("ebx") = ibytes; 06334 register int tagLen asm("edx") = tbytes; 06335 06336 __asm__ __volatile__ ( 06337 "pushq %%rdx\n\t" 06338 "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06339 /* Counter is xmm13 */ 06340 "pxor %%xmm13, %%xmm13\n\t" 06341 "pxor %%xmm15, %%xmm15\n\t" 06342 "movl %[ibytes], %%edx\n\t" 06343 "cmpl $12, %%edx\n\t" 06344 "jne 35f\n\t" 06345 CALC_IV_12() 06346 "\n" 06347 "35:\n\t" 06348 CALC_IV() 06349 "\n" 06350 "39:\n\t" 06351 06352 CALC_AAD() 06353 06354 "# Calculate counter and H\n\t" 06355 "pshufb %[BSWAP_EPI64], %%xmm13\n\t" 06356 "movdqa " VAR(HR) ", %%xmm5\n\t" 06357 "paddd %[ONE], %%xmm13\n\t" 06358 "movdqa " VAR(HR) ", %%xmm4\n\t" 06359 "movdqu %%xmm13, " VAR(CTR1) "\n\t" 06360 "psrlq $63, %%xmm5\n\t" 06361 "psllq $1, %%xmm4\n\t" 06362 "pslldq $8, %%xmm5\n\t" 06363 "por %%xmm5, %%xmm4\n\t" 06364 "pshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" 06365 "psrad $31, " VAR(HR) "\n\t" 06366 "pand %[MOD2_128], " VAR(HR) "\n\t" 06367 "pxor %%xmm4, " VAR(HR) "\n\t" 06368 06369 "xorl " VAR(KR) ", " VAR(KR) "\n\t" 06370 06371 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 06372 "cmpl $128, %[nbytes]\n\t" 06373 "jl 5f\n\t" 06374 06375 CALC_HT_8_AVX() 06376 06377 "movl %[nbytes], %%r13d\n\t" 06378 "andl $0xffffff80, %%r13d\n\t" 06379 "\n" 06380 "2:\n\t" 06381 AESENC_128_GHASH_AVX(%%rcx, 128) 06382 "addl $128, " VAR(KR) "\n\t" 06383 "cmpl %%r13d, " VAR(KR) "\n\t" 06384 "jl 2b\n\t" 06385 06386 "movdqa %%xmm2, " VAR(XR) "\n\t" 06387 "movdqu (%%rsp), " VAR(HR) "\n\t" 06388 "5:\n\t" 06389 "movl %[nbytes], %%edx\n\t" 06390 "cmpl %%edx, " VAR(KR) "\n\t" 06391 "jge 55f\n\t" 06392 #endif 06393 "movl %[nbytes], %%r13d\n\t" 06394 "andl $0xfffffff0, %%r13d\n\t" 06395 "cmpl %%r13d, " VAR(KR) "\n\t" 06396 "jge 13f\n\t" 06397 06398 "\n" 06399 "12:\n\t" 06400 "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" 06401 "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" 06402 "movdqu (%%rcx), %%xmm1\n\t" 06403 "movdqa " VAR(HR) ", %%xmm0\n\t" 06404 "pshufb %[BSWAP_MASK], %%xmm1\n\t" 06405 "pxor " VAR(XR) ", %%xmm1\n\t" 06406 AESENC_GFMUL(%%rcx, %%rdx, %%xmm0, %%xmm1) 06407 "addl $16, " VAR(KR) "\n\t" 06408 "cmpl %%r13d, " VAR(KR) "\n\t" 06409 "jl 12b\n\t" 06410 "\n" 06411 "13:\n\t" 06412 06413 AESENC_LAST15_DEC_AVX() 06414 "\n" 06415 "55:\n\t" 06416 06417 CALC_TAG() 06418 "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06419 "popq %%rdx\n\t" 06420 CMP_TAG() 06421 06422 : 06423 : [KEY] "r" (key), 06424 [in] "r" (in), [out] "r" (out), [nr] "r" (nr), 06425 [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), 06426 [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen), 06427 [tag] "r" (tag), [res] "r" (res), 06428 [BSWAP_MASK] "m" (BSWAP_MASK), 06429 [BSWAP_EPI64] "m" (BSWAP_EPI64), 06430 [ONE] "m" (ONE), 06431 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 06432 [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), 06433 [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), 06434 [EIGHT] "m" (EIGHT), 06435 #endif 06436 [MOD2_128] "m" (MOD2_128) 06437 : "xmm15", "xmm14", "xmm13", "xmm12", 06438 "xmm0", "xmm1", "xmm2", "xmm3", "memory", 06439 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 06440 "rcx", "r13" 06441 ); 06442 } 06443 06444 #ifdef HAVE_INTEL_AVX1 06445 static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, 06446 const unsigned char* addt, 06447 const unsigned char* ivec, 06448 const unsigned char *tag, int nbytes, 06449 int abytes, int ibytes, int tbytes, 06450 const unsigned char* key, int nr, int* res) 06451 { 06452 register const unsigned char* iv asm("rax") = ivec; 06453 register int ivLen asm("ebx") = ibytes; 06454 register int tagLen asm("edx") = tbytes; 06455 06456 __asm__ __volatile__ ( 06457 "pushq %%rdx\n\t" 06458 "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06459 /* Counter is xmm13 */ 06460 "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" 06461 "vpxor %%xmm15, %%xmm15, %%xmm15\n\t" 06462 "movl %[ibytes], %%edx\n\t" 06463 "cmpl $12, %%edx\n\t" 06464 "jne 35f\n\t" 06465 CALC_IV_12_AVX1() 06466 "\n" 06467 "35:\n\t" 06468 CALC_IV_AVX1() 06469 "\n" 06470 "39:\n\t" 06471 06472 CALC_AAD_AVX1() 06473 06474 "# Calculate counter and H\n\t" 06475 "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" 06476 "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" 06477 "vpslldq $8, %%xmm5, %%xmm5\n\t" 06478 "vpor %%xmm5, %%xmm4, %%xmm4\n\t" 06479 "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" 06480 "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" 06481 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" 06482 "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" 06483 "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" 06484 "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" 06485 "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" 06486 06487 "xorl " VAR(KR) ", " VAR(KR) "\n\t" 06488 06489 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 06490 "cmpl $128, %[nbytes]\n\t" 06491 "jl 5f\n\t" 06492 06493 CALC_HT_8_AVX1() 06494 06495 "movl %[nbytes], %%r13d\n\t" 06496 "andl $0xffffff80, %%r13d\n\t" 06497 "\n" 06498 "2:\n\t" 06499 VAESENC_128_GHASH_AVX1(%%rcx, 128) 06500 "addl $128, " VAR(KR) "\n\t" 06501 "cmpl %%r13d, " VAR(KR) "\n\t" 06502 "jl 2b\n\t" 06503 06504 "vmovdqa %%xmm2, " VAR(XR) "\n\t" 06505 "vmovdqu (%%rsp), " VAR(HR) "\n\t" 06506 "5:\n\t" 06507 "movl %[nbytes], %%edx\n\t" 06508 "cmpl %%edx, " VAR(KR) "\n\t" 06509 "jge 55f\n\t" 06510 #endif 06511 "movl %[nbytes], %%r13d\n\t" 06512 "andl $0xfffffff0, %%r13d\n\t" 06513 "cmpl %%r13d, " VAR(KR) "\n\t" 06514 "jge 13f\n\t" 06515 06516 "\n" 06517 "12:\n\t" 06518 "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" 06519 "vmovdqa " VAR(HR) ", %%xmm0\n\t" 06520 "vpshufb %[BSWAP_MASK], %%xmm9, %%xmm1\n\t" 06521 "vpxor " VAR(XR) ", %%xmm1, %%xmm1\n\t" 06522 VAESENC_GFMUL(%%xmm9, %%xmm0, %%xmm1) 06523 "addl $16, " VAR(KR) "\n\t" 06524 "cmpl %%r13d, " VAR(KR) "\n\t" 06525 "jl 12b\n\t" 06526 "\n" 06527 "13:\n\t" 06528 06529 AESENC_LAST15_DEC_AVX1() 06530 "\n" 06531 "55:\n\t" 06532 06533 CALC_TAG_AVX1() 06534 "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06535 "popq %%rdx\n\t" 06536 CMP_TAG_AVX() 06537 "vzeroupper\n\t" 06538 06539 : 06540 : [KEY] "r" (key), 06541 [in] "r" (in), [out] "r" (out), [nr] "r" (nr), 06542 [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), 06543 [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen), 06544 [tag] "r" (tag), [res] "r" (res), 06545 [BSWAP_MASK] "m" (BSWAP_MASK), 06546 [BSWAP_EPI64] "m" (BSWAP_EPI64), 06547 [ONE] "m" (ONE), 06548 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) 06549 [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), 06550 [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), 06551 [EIGHT] "m" (EIGHT), 06552 #endif 06553 [MOD2_128] "m" (MOD2_128) 06554 : "xmm15", "xmm14", "xmm13", "xmm12", 06555 "xmm0", "xmm1", "xmm2", "xmm3", "memory", 06556 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 06557 "rcx", "r13" 06558 ); 06559 } 06560 06561 #ifdef HAVE_INTEL_AVX2 06562 static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, 06563 const unsigned char* addt, 06564 const unsigned char* ivec, 06565 const unsigned char *tag, int nbytes, 06566 int abytes, int ibytes, int tbytes, 06567 const unsigned char* key, int nr, int* res) 06568 { 06569 register const unsigned char* iv asm("rax") = ivec; 06570 register int ivLen asm("ebx") = ibytes; 06571 register int tagLen asm("edx") = tbytes; 06572 06573 __asm__ __volatile__ ( 06574 "pushq %%rdx\n\t" 06575 "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06576 /* Counter is xmm13 */ 06577 "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" 06578 "vpxor %%xmm15, %%xmm15, %%xmm15\n\t" 06579 "movl %[ibytes], %%edx\n\t" 06580 "cmpl $12, %%edx\n\t" 06581 "jne 35f\n\t" 06582 CALC_IV_12_AVX2() 06583 "jmp 39f\n\t" 06584 "\n" 06585 "35:\n\t" 06586 CALC_IV_AVX2() 06587 "\n" 06588 "39:\n\t" 06589 06590 CALC_AAD_AVX2() 06591 06592 "# Calculate counter and H\n\t" 06593 "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" 06594 "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" 06595 "vpslldq $8, %%xmm5, %%xmm5\n\t" 06596 "vpor %%xmm5, %%xmm4, %%xmm4\n\t" 06597 "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" 06598 "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" 06599 "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" 06600 "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" 06601 "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" 06602 "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" 06603 "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" 06604 06605 "xorl " VAR(KR) ", " VAR(KR) "\n\t" 06606 06607 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) 06608 "cmpl $128, %[nbytes]\n\t" 06609 "jl 5f\n\t" 06610 06611 CALC_HT_8_AVX2() 06612 06613 "movl %[nbytes], %%r13d\n\t" 06614 "andl $0xffffff80, %%r13d\n\t" 06615 "\n" 06616 "2:\n\t" 06617 VAESENC_128_GHASH_AVX2(%%rcx, 128) 06618 "addl $128, " VAR(KR) "\n\t" 06619 "cmpl %%r13d, " VAR(KR) "\n\t" 06620 "jl 2b\n\t" 06621 06622 "vmovdqa %%xmm2, " VAR(XR) "\n\t" 06623 "vmovdqu (%%rsp), " VAR(HR) "\n\t" 06624 "5:\n\t" 06625 "movl %[nbytes], %%edx\n\t" 06626 "cmpl %%edx, " VAR(KR) "\n\t" 06627 "jge 55f\n\t" 06628 #endif 06629 "movl %[nbytes], %%r13d\n\t" 06630 "andl $0xfffffff0, %%r13d\n\t" 06631 "cmpl %%r13d, " VAR(KR) "\n\t" 06632 "jge 13f\n\t" 06633 06634 "vmovdqa %[MOD2_128], %%xmm0\n\t" 06635 "\n" 06636 "12:\n\t" 06637 "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" 06638 "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" 06639 "vpshufb %[BSWAP_MASK], %%xmm9, %%xmm1\n\t" 06640 "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" 06641 "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" 06642 "vpxor " VAR(XR) ", %%xmm1, %%xmm1\n\t" 06643 "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" 06644 VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, %%xmm1, CTR1) 06645 "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" 06646 "addl $16, " VAR(KR) "\n\t" 06647 "cmpl %%r13d, " VAR(KR) "\n\t" 06648 "jl 12b\n\t" 06649 "\n" 06650 "13:\n\t" 06651 06652 AESENC_LAST15_DEC_AVX2() 06653 "\n" 06654 "55:\n\t" 06655 06656 CALC_TAG_AVX2() 06657 "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" 06658 "popq %%rdx\n\t" 06659 CMP_TAG_AVX() 06660 "vzeroupper\n\t" 06661 06662 : 06663 : [KEY] "r" (key), 06664 [in] "r" (in), [out] "r" (out), [nr] "r" (nr), 06665 [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), 06666 [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen), 06667 [tag] "r" (tag), [res] "r" (res), 06668 [BSWAP_MASK] "m" (BSWAP_MASK), 06669 [BSWAP_EPI64] "m" (BSWAP_EPI64), 06670 [ONE] "m" (ONE), 06671 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) 06672 [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), 06673 [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), 06674 [EIGHT] "m" (EIGHT), 06675 #endif 06676 [MOD2_128] "m" (MOD2_128) 06677 : "xmm15", "xmm14", "xmm13", "xmm12", 06678 "xmm0", "xmm1", "xmm2", "xmm3", "memory", 06679 "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", 06680 "rcx", "r13" 06681 ); 06682 } 06683 #endif /* HAVE_INTEL_AVX2 */ 06684 #endif /* HAVE_INTEL_AVX1 */ 06685 #endif /* HAVE_AES_DECRYPT */ 06686 06687 #else /* _MSC_VER */ 06688 /* The following are for MSC based builds which do not allow 06689 * inline assembly. Intrinsic functions are used instead. */ 06690 06691 #define aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T) \ 06692 do \ 06693 { \ 06694 word32 iv12[4]; \ 06695 iv12[0] = *(word32*)&ivec[0]; \ 06696 iv12[1] = *(word32*)&ivec[4]; \ 06697 iv12[2] = *(word32*)&ivec[8]; \ 06698 iv12[3] = 0x01000000; \ 06699 Y = _mm_loadu_si128((__m128i*)iv12); \ 06700 \ 06701 /* (Compute E[ZERO, KS] and E[Y0, KS] together */ \ 06702 tmp1 = _mm_load_si128(&KEY[0]); \ 06703 tmp2 = _mm_xor_si128(Y, KEY[0]); \ 06704 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ 06705 tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); \ 06706 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ 06707 tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); \ 06708 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ 06709 tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); \ 06710 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ 06711 tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); \ 06712 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ 06713 tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); \ 06714 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ 06715 tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); \ 06716 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ 06717 tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); \ 06718 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ 06719 tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); \ 06720 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ 06721 tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); \ 06722 lastKey = KEY[10]; \ 06723 if (nr > 10) { \ 06724 tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ 06725 tmp2 = _mm_aesenc_si128(tmp2, lastKey); \ 06726 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ 06727 tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); \ 06728 lastKey = KEY[12]; \ 06729 if (nr > 12) { \ 06730 tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ 06731 tmp2 = _mm_aesenc_si128(tmp2, lastKey); \ 06732 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ 06733 tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); \ 06734 lastKey = KEY[14]; \ 06735 } \ 06736 } \ 06737 H = _mm_aesenclast_si128(tmp1, lastKey); \ 06738 T = _mm_aesenclast_si128(tmp2, lastKey); \ 06739 H = _mm_shuffle_epi8(H, BSWAP_MASK); \ 06740 } \ 06741 while (0) 06742 06743 #define aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T) \ 06744 do \ 06745 { \ 06746 if (ibytes % 16) { \ 06747 i = ibytes / 16; \ 06748 for (j=0; j < (int)(ibytes%16); j++) \ 06749 ((unsigned char*)&last_block)[j] = ivec[i*16+j]; \ 06750 } \ 06751 tmp1 = _mm_load_si128(&KEY[0]); \ 06752 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ 06753 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ 06754 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ 06755 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ 06756 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ 06757 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ 06758 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ 06759 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ 06760 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ 06761 lastKey = KEY[10]; \ 06762 if (nr > 10) { \ 06763 tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ 06764 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ 06765 lastKey = KEY[12]; \ 06766 if (nr > 12) { \ 06767 tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ 06768 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ 06769 lastKey = KEY[14]; \ 06770 } \ 06771 } \ 06772 H = _mm_aesenclast_si128(tmp1, lastKey); \ 06773 H = _mm_shuffle_epi8(H, BSWAP_MASK); \ 06774 Y = _mm_setzero_si128(); \ 06775 for (i=0; i < (int)(ibytes/16); i++) { \ 06776 tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); \ 06777 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \ 06778 Y = _mm_xor_si128(Y, tmp1); \ 06779 Y = gfmul_sw(Y, H); \ 06780 } \ 06781 if (ibytes % 16) { \ 06782 tmp1 = last_block; \ 06783 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \ 06784 Y = _mm_xor_si128(Y, tmp1); \ 06785 Y = gfmul_sw(Y, H); \ 06786 } \ 06787 tmp1 = _mm_insert_epi64(tmp1, ibytes*8, 0); \ 06788 tmp1 = _mm_insert_epi64(tmp1, 0, 1); \ 06789 Y = _mm_xor_si128(Y, tmp1); \ 06790 Y = gfmul_sw(Y, H); \ 06791 Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /* Compute E(K, Y0) */ \ 06792 tmp1 = _mm_xor_si128(Y, KEY[0]); \ 06793 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ 06794 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ 06795 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ 06796 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ 06797 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ 06798 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ 06799 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ 06800 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ 06801 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ 06802 lastKey = KEY[10]; \ 06803 if (nr > 10) { \ 06804 tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ 06805 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ 06806 lastKey = KEY[12]; \ 06807 if (nr > 12) { \ 06808 tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ 06809 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ 06810 lastKey = KEY[14]; \ 06811 } \ 06812 } \ 06813 T = _mm_aesenclast_si128(tmp1, lastKey); \ 06814 } \ 06815 while (0) 06816 06817 #define AES_ENC_8(j) \ 06818 tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); \ 06819 tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); \ 06820 tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); \ 06821 tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); \ 06822 tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); \ 06823 tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); \ 06824 tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); \ 06825 tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); 06826 06827 #define AES_ENC_LAST_8() \ 06828 tmp1 =_mm_aesenclast_si128(tmp1, lastKey); \ 06829 tmp2 =_mm_aesenclast_si128(tmp2, lastKey); \ 06830 tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[i*8+0])); \ 06831 tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i*)in)[i*8+1])); \ 06832 _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); \ 06833 _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); \ 06834 tmp3 =_mm_aesenclast_si128(tmp3, lastKey); \ 06835 tmp4 =_mm_aesenclast_si128(tmp4, lastKey); \ 06836 tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i*)in)[i*8+2])); \ 06837 tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i*)in)[i*8+3])); \ 06838 _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); \ 06839 _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); \ 06840 tmp5 =_mm_aesenclast_si128(tmp5, lastKey); \ 06841 tmp6 =_mm_aesenclast_si128(tmp6, lastKey); \ 06842 tmp5 = _mm_xor_si128(tmp5, _mm_loadu_si128(&((__m128i*)in)[i*8+4])); \ 06843 tmp6 = _mm_xor_si128(tmp6, _mm_loadu_si128(&((__m128i*)in)[i*8+5])); \ 06844 _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); \ 06845 _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); \ 06846 tmp7 =_mm_aesenclast_si128(tmp7, lastKey); \ 06847 tmp8 =_mm_aesenclast_si128(tmp8, lastKey); \ 06848 tmp7 = _mm_xor_si128(tmp7, _mm_loadu_si128(&((__m128i*)in)[i*8+6])); \ 06849 tmp8 = _mm_xor_si128(tmp8, _mm_loadu_si128(&((__m128i*)in)[i*8+7])); \ 06850 _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); \ 06851 _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); 06852 06853 06854 static __m128i gfmul_sw(__m128i a, __m128i b) 06855 { 06856 __m128i r, t1, t2, t3, t4, t5, t6, t7; 06857 t2 = _mm_shuffle_epi32(b, 78); 06858 t3 = _mm_shuffle_epi32(a, 78); 06859 t2 = _mm_xor_si128(t2, b); 06860 t3 = _mm_xor_si128(t3, a); 06861 t4 = _mm_clmulepi64_si128(b, a, 0x11); 06862 t1 = _mm_clmulepi64_si128(b, a, 0x00); 06863 t2 = _mm_clmulepi64_si128(t2, t3, 0x00); 06864 t2 = _mm_xor_si128(t2, t1); 06865 t2 = _mm_xor_si128(t2, t4); 06866 t3 = _mm_slli_si128(t2, 8); 06867 t2 = _mm_srli_si128(t2, 8); 06868 t1 = _mm_xor_si128(t1, t3); 06869 t4 = _mm_xor_si128(t4, t2); 06870 06871 t5 = _mm_srli_epi32(t1, 31); 06872 t6 = _mm_srli_epi32(t4, 31); 06873 t1 = _mm_slli_epi32(t1, 1); 06874 t4 = _mm_slli_epi32(t4, 1); 06875 t7 = _mm_srli_si128(t5, 12); 06876 t5 = _mm_slli_si128(t5, 4); 06877 t6 = _mm_slli_si128(t6, 4); 06878 t4 = _mm_or_si128(t4, t7); 06879 t1 = _mm_or_si128(t1, t5); 06880 t4 = _mm_or_si128(t4, t6); 06881 06882 t5 = _mm_slli_epi32(t1, 31); 06883 t6 = _mm_slli_epi32(t1, 30); 06884 t7 = _mm_slli_epi32(t1, 25); 06885 t5 = _mm_xor_si128(t5, t6); 06886 t5 = _mm_xor_si128(t5, t7); 06887 06888 t6 = _mm_srli_si128(t5, 4); 06889 t5 = _mm_slli_si128(t5, 12); 06890 t1 = _mm_xor_si128(t1, t5); 06891 t7 = _mm_srli_epi32(t1, 1); 06892 t3 = _mm_srli_epi32(t1, 2); 06893 t2 = _mm_srli_epi32(t1, 7); 06894 06895 t7 = _mm_xor_si128(t7, t3); 06896 t7 = _mm_xor_si128(t7, t2); 06897 t7 = _mm_xor_si128(t7, t6); 06898 t7 = _mm_xor_si128(t7, t1); 06899 r = _mm_xor_si128(t4, t7); 06900 06901 return r; 06902 } 06903 06904 static void gfmul_only(__m128i a, __m128i b, __m128i* r0, __m128i* r1) 06905 { 06906 __m128i t1, t2, t3, t4; 06907 06908 /* 128 x 128 Carryless Multiply */ 06909 t2 = _mm_shuffle_epi32(b, 78); 06910 t3 = _mm_shuffle_epi32(a, 78); 06911 t2 = _mm_xor_si128(t2, b); 06912 t3 = _mm_xor_si128(t3, a); 06913 t4 = _mm_clmulepi64_si128(b, a, 0x11); 06914 t1 = _mm_clmulepi64_si128(b, a, 0x00); 06915 t2 = _mm_clmulepi64_si128(t2, t3, 0x00); 06916 t2 = _mm_xor_si128(t2, t1); 06917 t2 = _mm_xor_si128(t2, t4); 06918 t3 = _mm_slli_si128(t2, 8); 06919 t2 = _mm_srli_si128(t2, 8); 06920 t1 = _mm_xor_si128(t1, t3); 06921 t4 = _mm_xor_si128(t4, t2); 06922 *r0 = _mm_xor_si128(t1, *r0); 06923 *r1 = _mm_xor_si128(t4, *r1); 06924 } 06925 06926 static __m128i gfmul_shl1(__m128i a) 06927 { 06928 __m128i t1 = a, t2; 06929 t2 = _mm_srli_epi64(t1, 63); 06930 t1 = _mm_slli_epi64(t1, 1); 06931 t2 = _mm_slli_si128(t2, 8); 06932 t1 = _mm_or_si128(t1, t2); 06933 /* if (a[1] >> 63) t1 = _mm_xor_si128(t1, MOD2_128); */ 06934 a = _mm_shuffle_epi32(a, 0xff); 06935 a = _mm_srai_epi32(a, 31); 06936 a = _mm_and_si128(a, MOD2_128); 06937 t1 = _mm_xor_si128(t1, a); 06938 return t1; 06939 } 06940 06941 static __m128i ghash_red(__m128i r0, __m128i r1) 06942 { 06943 __m128i t2, t3; 06944 __m128i t5, t6, t7; 06945 06946 t5 = _mm_slli_epi32(r0, 31); 06947 t6 = _mm_slli_epi32(r0, 30); 06948 t7 = _mm_slli_epi32(r0, 25); 06949 t5 = _mm_xor_si128(t5, t6); 06950 t5 = _mm_xor_si128(t5, t7); 06951 06952 t6 = _mm_srli_si128(t5, 4); 06953 t5 = _mm_slli_si128(t5, 12); 06954 r0 = _mm_xor_si128(r0, t5); 06955 t7 = _mm_srli_epi32(r0, 1); 06956 t3 = _mm_srli_epi32(r0, 2); 06957 t2 = _mm_srli_epi32(r0, 7); 06958 06959 t7 = _mm_xor_si128(t7, t3); 06960 t7 = _mm_xor_si128(t7, t2); 06961 t7 = _mm_xor_si128(t7, t6); 06962 t7 = _mm_xor_si128(t7, r0); 06963 return _mm_xor_si128(r1, t7); 06964 } 06965 06966 static __m128i gfmul_shifted(__m128i a, __m128i b) 06967 { 06968 __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128(); 06969 gfmul_only(a, b, &t0, &t1); 06970 return ghash_red(t0, t1); 06971 } 06972 06973 #ifndef AES_GCM_AESNI_NO_UNROLL 06974 static __m128i gfmul8(__m128i a1, __m128i a2, __m128i a3, __m128i a4, 06975 __m128i a5, __m128i a6, __m128i a7, __m128i a8, 06976 __m128i b1, __m128i b2, __m128i b3, __m128i b4, 06977 __m128i b5, __m128i b6, __m128i b7, __m128i b8) 06978 { 06979 __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128(); 06980 gfmul_only(a1, b8, &t0, &t1); 06981 gfmul_only(a2, b7, &t0, &t1); 06982 gfmul_only(a3, b6, &t0, &t1); 06983 gfmul_only(a4, b5, &t0, &t1); 06984 gfmul_only(a5, b4, &t0, &t1); 06985 gfmul_only(a6, b3, &t0, &t1); 06986 gfmul_only(a7, b2, &t0, &t1); 06987 gfmul_only(a8, b1, &t0, &t1); 06988 return ghash_red(t0, t1); 06989 } 06990 #endif 06991 06992 06993 static void AES_GCM_encrypt(const unsigned char *in, 06994 unsigned char *out, 06995 const unsigned char* addt, 06996 const unsigned char* ivec, 06997 unsigned char *tag, unsigned int nbytes, 06998 unsigned int abytes, unsigned int ibytes, 06999 unsigned int tbytes, 07000 const unsigned char* key, int nr) 07001 { 07002 int i, j ,k; 07003 __m128i ctr1; 07004 __m128i H, Y, T; 07005 __m128i X = _mm_setzero_si128(); 07006 __m128i *KEY = (__m128i*)key, lastKey; 07007 __m128i last_block = _mm_setzero_si128(); 07008 __m128i tmp1, tmp2; 07009 #ifndef AES_GCM_AESNI_NO_UNROLL 07010 __m128i HT[8]; 07011 __m128i r0, r1; 07012 __m128i XV; 07013 __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 07014 #endif 07015 07016 if (ibytes == 12) 07017 aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); 07018 else 07019 aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); 07020 07021 for (i=0; i < (int)(abytes/16); i++) { 07022 tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); 07023 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07024 X = _mm_xor_si128(X, tmp1); 07025 X = gfmul_sw(X, H); 07026 } 07027 if (abytes%16) { 07028 last_block = _mm_setzero_si128(); 07029 for (j=0; j < (int)(abytes%16); j++) 07030 ((unsigned char*)&last_block)[j] = addt[i*16+j]; 07031 tmp1 = last_block; 07032 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07033 X = _mm_xor_si128(X, tmp1); 07034 X = gfmul_sw(X, H); 07035 } 07036 tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); 07037 ctr1 = _mm_add_epi32(tmp1, ONE); 07038 H = gfmul_shl1(H); 07039 07040 #ifndef AES_GCM_AESNI_NO_UNROLL 07041 i = 0; 07042 if (nbytes >= 16*8) { 07043 HT[0] = H; 07044 HT[1] = gfmul_shifted(H, H); 07045 HT[2] = gfmul_shifted(H, HT[1]); 07046 HT[3] = gfmul_shifted(HT[1], HT[1]); 07047 HT[4] = gfmul_shifted(HT[1], HT[2]); 07048 HT[5] = gfmul_shifted(HT[2], HT[2]); 07049 HT[6] = gfmul_shifted(HT[2], HT[3]); 07050 HT[7] = gfmul_shifted(HT[3], HT[3]); 07051 07052 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07053 tmp2 = _mm_add_epi32(ctr1, ONE); 07054 tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); 07055 tmp3 = _mm_add_epi32(ctr1, TWO); 07056 tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); 07057 tmp4 = _mm_add_epi32(ctr1, THREE); 07058 tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); 07059 tmp5 = _mm_add_epi32(ctr1, FOUR); 07060 tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); 07061 tmp6 = _mm_add_epi32(ctr1, FIVE); 07062 tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); 07063 tmp7 = _mm_add_epi32(ctr1, SIX); 07064 tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); 07065 tmp8 = _mm_add_epi32(ctr1, SEVEN); 07066 tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); 07067 ctr1 = _mm_add_epi32(ctr1, EIGHT); 07068 tmp1 =_mm_xor_si128(tmp1, KEY[0]); 07069 tmp2 =_mm_xor_si128(tmp2, KEY[0]); 07070 tmp3 =_mm_xor_si128(tmp3, KEY[0]); 07071 tmp4 =_mm_xor_si128(tmp4, KEY[0]); 07072 tmp5 =_mm_xor_si128(tmp5, KEY[0]); 07073 tmp6 =_mm_xor_si128(tmp6, KEY[0]); 07074 tmp7 =_mm_xor_si128(tmp7, KEY[0]); 07075 tmp8 =_mm_xor_si128(tmp8, KEY[0]); 07076 AES_ENC_8(1); 07077 AES_ENC_8(2); 07078 AES_ENC_8(3); 07079 AES_ENC_8(4); 07080 AES_ENC_8(5); 07081 AES_ENC_8(6); 07082 AES_ENC_8(7); 07083 AES_ENC_8(8); 07084 AES_ENC_8(9); 07085 lastKey = KEY[10]; 07086 if (nr > 10) { 07087 AES_ENC_8(10); 07088 AES_ENC_8(11); 07089 lastKey = KEY[12]; 07090 if (nr > 12) { 07091 AES_ENC_8(12); 07092 AES_ENC_8(13); 07093 lastKey = KEY[14]; 07094 } 07095 } 07096 AES_ENC_LAST_8(); 07097 07098 for (i=1; i < (int)(nbytes/16/8); i++) { 07099 r0 = _mm_setzero_si128(); 07100 r1 = _mm_setzero_si128(); 07101 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07102 tmp2 = _mm_add_epi32(ctr1, ONE); 07103 tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); 07104 tmp3 = _mm_add_epi32(ctr1, TWO); 07105 tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); 07106 tmp4 = _mm_add_epi32(ctr1, THREE); 07107 tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); 07108 tmp5 = _mm_add_epi32(ctr1, FOUR); 07109 tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); 07110 tmp6 = _mm_add_epi32(ctr1, FIVE); 07111 tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); 07112 tmp7 = _mm_add_epi32(ctr1, SIX); 07113 tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); 07114 tmp8 = _mm_add_epi32(ctr1, SEVEN); 07115 tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); 07116 ctr1 = _mm_add_epi32(ctr1, EIGHT); 07117 tmp1 =_mm_xor_si128(tmp1, KEY[0]); 07118 tmp2 =_mm_xor_si128(tmp2, KEY[0]); 07119 tmp3 =_mm_xor_si128(tmp3, KEY[0]); 07120 tmp4 =_mm_xor_si128(tmp4, KEY[0]); 07121 tmp5 =_mm_xor_si128(tmp5, KEY[0]); 07122 tmp6 =_mm_xor_si128(tmp6, KEY[0]); 07123 tmp7 =_mm_xor_si128(tmp7, KEY[0]); 07124 tmp8 =_mm_xor_si128(tmp8, KEY[0]); 07125 /* 128 x 128 Carryless Multiply */ 07126 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+0]); 07127 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07128 XV = _mm_xor_si128(XV, X); 07129 gfmul_only(XV, HT[7], &r0, &r1); 07130 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07131 tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); 07132 tmp3 = _mm_aesenc_si128(tmp3, KEY[1]); 07133 tmp4 = _mm_aesenc_si128(tmp4, KEY[1]); 07134 tmp5 = _mm_aesenc_si128(tmp5, KEY[1]); 07135 tmp6 = _mm_aesenc_si128(tmp6, KEY[1]); 07136 tmp7 = _mm_aesenc_si128(tmp7, KEY[1]); 07137 tmp8 = _mm_aesenc_si128(tmp8, KEY[1]); 07138 /* 128 x 128 Carryless Multiply */ 07139 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+1]); 07140 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07141 gfmul_only(XV, HT[6], &r0, &r1); 07142 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07143 tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); 07144 tmp3 = _mm_aesenc_si128(tmp3, KEY[2]); 07145 tmp4 = _mm_aesenc_si128(tmp4, KEY[2]); 07146 tmp5 = _mm_aesenc_si128(tmp5, KEY[2]); 07147 tmp6 = _mm_aesenc_si128(tmp6, KEY[2]); 07148 tmp7 = _mm_aesenc_si128(tmp7, KEY[2]); 07149 tmp8 = _mm_aesenc_si128(tmp8, KEY[2]); 07150 /* 128 x 128 Carryless Multiply */ 07151 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+2]); 07152 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07153 gfmul_only(XV, HT[5], &r0, &r1); 07154 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07155 tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); 07156 tmp3 = _mm_aesenc_si128(tmp3, KEY[3]); 07157 tmp4 = _mm_aesenc_si128(tmp4, KEY[3]); 07158 tmp5 = _mm_aesenc_si128(tmp5, KEY[3]); 07159 tmp6 = _mm_aesenc_si128(tmp6, KEY[3]); 07160 tmp7 = _mm_aesenc_si128(tmp7, KEY[3]); 07161 tmp8 = _mm_aesenc_si128(tmp8, KEY[3]); 07162 /* 128 x 128 Carryless Multiply */ 07163 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+3]); 07164 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07165 gfmul_only(XV, HT[4], &r0, &r1); 07166 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07167 tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); 07168 tmp3 = _mm_aesenc_si128(tmp3, KEY[4]); 07169 tmp4 = _mm_aesenc_si128(tmp4, KEY[4]); 07170 tmp5 = _mm_aesenc_si128(tmp5, KEY[4]); 07171 tmp6 = _mm_aesenc_si128(tmp6, KEY[4]); 07172 tmp7 = _mm_aesenc_si128(tmp7, KEY[4]); 07173 tmp8 = _mm_aesenc_si128(tmp8, KEY[4]); 07174 /* 128 x 128 Carryless Multiply */ 07175 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+4]); 07176 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07177 gfmul_only(XV, HT[3], &r0, &r1); 07178 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07179 tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); 07180 tmp3 = _mm_aesenc_si128(tmp3, KEY[5]); 07181 tmp4 = _mm_aesenc_si128(tmp4, KEY[5]); 07182 tmp5 = _mm_aesenc_si128(tmp5, KEY[5]); 07183 tmp6 = _mm_aesenc_si128(tmp6, KEY[5]); 07184 tmp7 = _mm_aesenc_si128(tmp7, KEY[5]); 07185 tmp8 = _mm_aesenc_si128(tmp8, KEY[5]); 07186 /* 128 x 128 Carryless Multiply */ 07187 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+5]); 07188 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07189 gfmul_only(XV, HT[2], &r0, &r1); 07190 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07191 tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); 07192 tmp3 = _mm_aesenc_si128(tmp3, KEY[6]); 07193 tmp4 = _mm_aesenc_si128(tmp4, KEY[6]); 07194 tmp5 = _mm_aesenc_si128(tmp5, KEY[6]); 07195 tmp6 = _mm_aesenc_si128(tmp6, KEY[6]); 07196 tmp7 = _mm_aesenc_si128(tmp7, KEY[6]); 07197 tmp8 = _mm_aesenc_si128(tmp8, KEY[6]); 07198 /* 128 x 128 Carryless Multiply */ 07199 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+6]); 07200 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07201 gfmul_only(XV, HT[1], &r0, &r1); 07202 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07203 tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); 07204 tmp3 = _mm_aesenc_si128(tmp3, KEY[7]); 07205 tmp4 = _mm_aesenc_si128(tmp4, KEY[7]); 07206 tmp5 = _mm_aesenc_si128(tmp5, KEY[7]); 07207 tmp6 = _mm_aesenc_si128(tmp6, KEY[7]); 07208 tmp7 = _mm_aesenc_si128(tmp7, KEY[7]); 07209 tmp8 = _mm_aesenc_si128(tmp8, KEY[7]); 07210 /* 128 x 128 Carryless Multiply */ 07211 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+7]); 07212 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07213 gfmul_only(XV, HT[0], &r0, &r1); 07214 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07215 tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); 07216 tmp3 = _mm_aesenc_si128(tmp3, KEY[8]); 07217 tmp4 = _mm_aesenc_si128(tmp4, KEY[8]); 07218 tmp5 = _mm_aesenc_si128(tmp5, KEY[8]); 07219 tmp6 = _mm_aesenc_si128(tmp6, KEY[8]); 07220 tmp7 = _mm_aesenc_si128(tmp7, KEY[8]); 07221 tmp8 = _mm_aesenc_si128(tmp8, KEY[8]); 07222 /* Reduction */ 07223 X = ghash_red(r0, r1); 07224 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07225 tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); 07226 tmp3 = _mm_aesenc_si128(tmp3, KEY[9]); 07227 tmp4 = _mm_aesenc_si128(tmp4, KEY[9]); 07228 tmp5 = _mm_aesenc_si128(tmp5, KEY[9]); 07229 tmp6 = _mm_aesenc_si128(tmp6, KEY[9]); 07230 tmp7 = _mm_aesenc_si128(tmp7, KEY[9]); 07231 tmp8 = _mm_aesenc_si128(tmp8, KEY[9]); 07232 lastKey = KEY[10]; 07233 if (nr > 10) { 07234 tmp1 = _mm_aesenc_si128(tmp1, KEY[10]); 07235 tmp2 = _mm_aesenc_si128(tmp2, KEY[10]); 07236 tmp3 = _mm_aesenc_si128(tmp3, KEY[10]); 07237 tmp4 = _mm_aesenc_si128(tmp4, KEY[10]); 07238 tmp5 = _mm_aesenc_si128(tmp5, KEY[10]); 07239 tmp6 = _mm_aesenc_si128(tmp6, KEY[10]); 07240 tmp7 = _mm_aesenc_si128(tmp7, KEY[10]); 07241 tmp8 = _mm_aesenc_si128(tmp8, KEY[10]); 07242 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07243 tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); 07244 tmp3 = _mm_aesenc_si128(tmp3, KEY[11]); 07245 tmp4 = _mm_aesenc_si128(tmp4, KEY[11]); 07246 tmp5 = _mm_aesenc_si128(tmp5, KEY[11]); 07247 tmp6 = _mm_aesenc_si128(tmp6, KEY[11]); 07248 tmp7 = _mm_aesenc_si128(tmp7, KEY[11]); 07249 tmp8 = _mm_aesenc_si128(tmp8, KEY[11]); 07250 lastKey = KEY[12]; 07251 if (nr > 12) { 07252 tmp1 = _mm_aesenc_si128(tmp1, KEY[12]); 07253 tmp2 = _mm_aesenc_si128(tmp2, KEY[12]); 07254 tmp3 = _mm_aesenc_si128(tmp3, KEY[12]); 07255 tmp4 = _mm_aesenc_si128(tmp4, KEY[12]); 07256 tmp5 = _mm_aesenc_si128(tmp5, KEY[12]); 07257 tmp6 = _mm_aesenc_si128(tmp6, KEY[12]); 07258 tmp7 = _mm_aesenc_si128(tmp7, KEY[12]); 07259 tmp8 = _mm_aesenc_si128(tmp8, KEY[12]); 07260 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07261 tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); 07262 tmp3 = _mm_aesenc_si128(tmp3, KEY[13]); 07263 tmp4 = _mm_aesenc_si128(tmp4, KEY[13]); 07264 tmp5 = _mm_aesenc_si128(tmp5, KEY[13]); 07265 tmp6 = _mm_aesenc_si128(tmp6, KEY[13]); 07266 tmp7 = _mm_aesenc_si128(tmp7, KEY[13]); 07267 tmp8 = _mm_aesenc_si128(tmp8, KEY[13]); 07268 lastKey = KEY[14]; 07269 } 07270 } 07271 AES_ENC_LAST_8(); 07272 } 07273 07274 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07275 tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); 07276 tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); 07277 tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); 07278 tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); 07279 tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); 07280 tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); 07281 tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); 07282 tmp1 = _mm_xor_si128(X, tmp1); 07283 X = gfmul8(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, 07284 HT[0], HT[1], HT[2], HT[3], HT[4], HT[5], HT[6], HT[7]); 07285 } 07286 for (k = i*8; k < (int)(nbytes/16); k++) { 07287 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07288 ctr1 = _mm_add_epi32(ctr1, ONE); 07289 tmp1 = _mm_xor_si128(tmp1, KEY[0]); 07290 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07291 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07292 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07293 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07294 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07295 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07296 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07297 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07298 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07299 lastKey = KEY[10]; 07300 if (nr > 10) { 07301 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07302 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07303 lastKey = KEY[12]; 07304 if (nr > 12) { 07305 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07306 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07307 lastKey = KEY[14]; 07308 } 07309 } 07310 tmp1 = _mm_aesenclast_si128(tmp1, lastKey); 07311 tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); 07312 _mm_storeu_si128(&((__m128i*)out)[k], tmp1); 07313 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07314 X =_mm_xor_si128(X, tmp1); 07315 X = gfmul_shifted(X, H); 07316 } 07317 #else /* AES_GCM_AESNI_NO_UNROLL */ 07318 for (k = 0; k < (int)(nbytes/16) && k < 1; k++) { 07319 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07320 ctr1 = _mm_add_epi32(ctr1, ONE); 07321 tmp1 = _mm_xor_si128(tmp1, KEY[0]); 07322 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07323 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07324 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07325 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07326 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07327 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07328 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07329 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07330 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07331 lastKey = KEY[10]; 07332 if (nr > 10) { 07333 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07334 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07335 lastKey = KEY[12]; 07336 if (nr > 12) { 07337 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07338 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07339 lastKey = KEY[14]; 07340 } 07341 } 07342 tmp1 = _mm_aesenclast_si128(tmp1, lastKey); 07343 tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); 07344 _mm_storeu_si128(&((__m128i*)out)[k], tmp1); 07345 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07346 X =_mm_xor_si128(X, tmp1); 07347 } 07348 for (; k < (int)(nbytes/16); k++) { 07349 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07350 ctr1 = _mm_add_epi32(ctr1, ONE); 07351 tmp1 = _mm_xor_si128(tmp1, KEY[0]); 07352 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07353 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07354 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07355 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07356 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07357 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07358 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07359 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07360 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07361 X = gfmul_shifted(X, H); 07362 lastKey = KEY[10]; 07363 if (nr > 10) { 07364 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07365 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07366 lastKey = KEY[12]; 07367 if (nr > 12) { 07368 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07369 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07370 lastKey = KEY[14]; 07371 } 07372 } 07373 tmp1 = _mm_aesenclast_si128(tmp1, lastKey); 07374 tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); 07375 _mm_storeu_si128(&((__m128i*)out)[k], tmp1); 07376 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07377 X =_mm_xor_si128(X, tmp1); 07378 } 07379 if (k > 0) { 07380 X = gfmul_shifted(X, H); 07381 } 07382 #endif /* AES_GCM_AESNI_NO_UNROLL */ 07383 07384 /* If one partial block remains */ 07385 if (nbytes % 16) { 07386 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07387 tmp1 = _mm_xor_si128(tmp1, KEY[0]); 07388 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07389 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07390 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07391 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07392 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07393 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07394 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07395 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07396 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07397 lastKey = KEY[10]; 07398 if (nr > 10) { 07399 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07400 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07401 lastKey = KEY[12]; 07402 if (nr > 12) { 07403 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07404 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07405 lastKey = KEY[14]; 07406 } 07407 } 07408 tmp1 = _mm_aesenclast_si128(tmp1, lastKey); 07409 last_block = tmp1; 07410 for (j=0; j < (int)(nbytes%16); j++) 07411 ((unsigned char*)&last_block)[j] = in[k*16+j]; 07412 tmp1 = _mm_xor_si128(tmp1, last_block); 07413 last_block = tmp1; 07414 for (j=0; j < (int)(nbytes%16); j++) 07415 out[k*16+j] = ((unsigned char*)&last_block)[j]; 07416 tmp1 = last_block; 07417 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07418 X =_mm_xor_si128(X, tmp1); 07419 X = gfmul_shifted(X, H); 07420 } 07421 tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0); 07422 tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1); 07423 X = _mm_xor_si128(X, tmp1); 07424 X = gfmul_shifted(X, H); 07425 X = _mm_shuffle_epi8(X, BSWAP_MASK); 07426 T = _mm_xor_si128(X, T); 07427 /*_mm_storeu_si128((__m128i*)tag, T);*/ 07428 XMEMCPY(tag, &T, tbytes); 07429 } 07430 07431 #ifdef HAVE_AES_DECRYPT 07432 07433 static void AES_GCM_decrypt(const unsigned char *in, 07434 unsigned char *out, 07435 const unsigned char* addt, 07436 const unsigned char* ivec, 07437 const unsigned char *tag, int nbytes, int abytes, 07438 int ibytes, word32 tbytes, const unsigned char* key, 07439 int nr, int* res) 07440 { 07441 int i, j ,k; 07442 __m128i H, Y, T; 07443 __m128i *KEY = (__m128i*)key, lastKey; 07444 __m128i ctr1; 07445 __m128i last_block = _mm_setzero_si128(); 07446 __m128i X = _mm_setzero_si128(); 07447 __m128i tmp1, tmp2, XV; 07448 #ifndef AES_GCM_AESNI_NO_UNROLL 07449 __m128i HT[8]; 07450 __m128i r0, r1; 07451 __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 07452 #endif /* AES_GCM_AESNI_NO_UNROLL */ 07453 07454 if (ibytes == 12) 07455 aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); 07456 else 07457 aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); 07458 07459 for (i=0; i<abytes/16; i++) { 07460 tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); 07461 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07462 X = _mm_xor_si128(X, tmp1); 07463 X = gfmul_sw(X, H); 07464 } 07465 if (abytes%16) { 07466 last_block = _mm_setzero_si128(); 07467 for (j=0; j<abytes%16; j++) 07468 ((unsigned char*)&last_block)[j] = addt[i*16+j]; 07469 tmp1 = last_block; 07470 tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); 07471 X = _mm_xor_si128(X, tmp1); 07472 X = gfmul_sw(X, H); 07473 } 07474 07475 tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); 07476 ctr1 = _mm_add_epi32(tmp1, ONE); 07477 H = gfmul_shl1(H); 07478 i = 0; 07479 07480 #ifndef AES_GCM_AESNI_NO_UNROLL 07481 07482 if (0 < nbytes/16/8) { 07483 HT[0] = H; 07484 HT[1] = gfmul_shifted(H, H); 07485 HT[2] = gfmul_shifted(H, HT[1]); 07486 HT[3] = gfmul_shifted(HT[1], HT[1]); 07487 HT[4] = gfmul_shifted(HT[1], HT[2]); 07488 HT[5] = gfmul_shifted(HT[2], HT[2]); 07489 HT[6] = gfmul_shifted(HT[2], HT[3]); 07490 HT[7] = gfmul_shifted(HT[3], HT[3]); 07491 07492 for (; i < nbytes/16/8; i++) { 07493 r0 = _mm_setzero_si128(); 07494 r1 = _mm_setzero_si128(); 07495 07496 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07497 tmp2 = _mm_add_epi32(ctr1, ONE); 07498 tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); 07499 tmp3 = _mm_add_epi32(ctr1, TWO); 07500 tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); 07501 tmp4 = _mm_add_epi32(ctr1, THREE); 07502 tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); 07503 tmp5 = _mm_add_epi32(ctr1, FOUR); 07504 tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); 07505 tmp6 = _mm_add_epi32(ctr1, FIVE); 07506 tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); 07507 tmp7 = _mm_add_epi32(ctr1, SIX); 07508 tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); 07509 tmp8 = _mm_add_epi32(ctr1, SEVEN); 07510 tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); 07511 ctr1 = _mm_add_epi32(ctr1, EIGHT); 07512 tmp1 =_mm_xor_si128(tmp1, KEY[0]); 07513 tmp2 =_mm_xor_si128(tmp2, KEY[0]); 07514 tmp3 =_mm_xor_si128(tmp3, KEY[0]); 07515 tmp4 =_mm_xor_si128(tmp4, KEY[0]); 07516 tmp5 =_mm_xor_si128(tmp5, KEY[0]); 07517 tmp6 =_mm_xor_si128(tmp6, KEY[0]); 07518 tmp7 =_mm_xor_si128(tmp7, KEY[0]); 07519 tmp8 =_mm_xor_si128(tmp8, KEY[0]); 07520 /* 128 x 128 Carryless Multiply */ 07521 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+0]); 07522 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07523 XV = _mm_xor_si128(XV, X); 07524 gfmul_only(XV, HT[7], &r0, &r1); 07525 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07526 tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); 07527 tmp3 = _mm_aesenc_si128(tmp3, KEY[1]); 07528 tmp4 = _mm_aesenc_si128(tmp4, KEY[1]); 07529 tmp5 = _mm_aesenc_si128(tmp5, KEY[1]); 07530 tmp6 = _mm_aesenc_si128(tmp6, KEY[1]); 07531 tmp7 = _mm_aesenc_si128(tmp7, KEY[1]); 07532 tmp8 = _mm_aesenc_si128(tmp8, KEY[1]); 07533 /* 128 x 128 Carryless Multiply */ 07534 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+1]); 07535 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07536 gfmul_only(XV, HT[6], &r0, &r1); 07537 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07538 tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); 07539 tmp3 = _mm_aesenc_si128(tmp3, KEY[2]); 07540 tmp4 = _mm_aesenc_si128(tmp4, KEY[2]); 07541 tmp5 = _mm_aesenc_si128(tmp5, KEY[2]); 07542 tmp6 = _mm_aesenc_si128(tmp6, KEY[2]); 07543 tmp7 = _mm_aesenc_si128(tmp7, KEY[2]); 07544 tmp8 = _mm_aesenc_si128(tmp8, KEY[2]); 07545 /* 128 x 128 Carryless Multiply */ 07546 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+2]); 07547 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07548 gfmul_only(XV, HT[5], &r0, &r1); 07549 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07550 tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); 07551 tmp3 = _mm_aesenc_si128(tmp3, KEY[3]); 07552 tmp4 = _mm_aesenc_si128(tmp4, KEY[3]); 07553 tmp5 = _mm_aesenc_si128(tmp5, KEY[3]); 07554 tmp6 = _mm_aesenc_si128(tmp6, KEY[3]); 07555 tmp7 = _mm_aesenc_si128(tmp7, KEY[3]); 07556 tmp8 = _mm_aesenc_si128(tmp8, KEY[3]); 07557 /* 128 x 128 Carryless Multiply */ 07558 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+3]); 07559 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07560 gfmul_only(XV, HT[4], &r0, &r1); 07561 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07562 tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); 07563 tmp3 = _mm_aesenc_si128(tmp3, KEY[4]); 07564 tmp4 = _mm_aesenc_si128(tmp4, KEY[4]); 07565 tmp5 = _mm_aesenc_si128(tmp5, KEY[4]); 07566 tmp6 = _mm_aesenc_si128(tmp6, KEY[4]); 07567 tmp7 = _mm_aesenc_si128(tmp7, KEY[4]); 07568 tmp8 = _mm_aesenc_si128(tmp8, KEY[4]); 07569 /* 128 x 128 Carryless Multiply */ 07570 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+4]); 07571 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07572 gfmul_only(XV, HT[3], &r0, &r1); 07573 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07574 tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); 07575 tmp3 = _mm_aesenc_si128(tmp3, KEY[5]); 07576 tmp4 = _mm_aesenc_si128(tmp4, KEY[5]); 07577 tmp5 = _mm_aesenc_si128(tmp5, KEY[5]); 07578 tmp6 = _mm_aesenc_si128(tmp6, KEY[5]); 07579 tmp7 = _mm_aesenc_si128(tmp7, KEY[5]); 07580 tmp8 = _mm_aesenc_si128(tmp8, KEY[5]); 07581 /* 128 x 128 Carryless Multiply */ 07582 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+5]); 07583 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07584 gfmul_only(XV, HT[2], &r0, &r1); 07585 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07586 tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); 07587 tmp3 = _mm_aesenc_si128(tmp3, KEY[6]); 07588 tmp4 = _mm_aesenc_si128(tmp4, KEY[6]); 07589 tmp5 = _mm_aesenc_si128(tmp5, KEY[6]); 07590 tmp6 = _mm_aesenc_si128(tmp6, KEY[6]); 07591 tmp7 = _mm_aesenc_si128(tmp7, KEY[6]); 07592 tmp8 = _mm_aesenc_si128(tmp8, KEY[6]); 07593 /* 128 x 128 Carryless Multiply */ 07594 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+6]); 07595 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07596 gfmul_only(XV, HT[1], &r0, &r1); 07597 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07598 tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); 07599 tmp3 = _mm_aesenc_si128(tmp3, KEY[7]); 07600 tmp4 = _mm_aesenc_si128(tmp4, KEY[7]); 07601 tmp5 = _mm_aesenc_si128(tmp5, KEY[7]); 07602 tmp6 = _mm_aesenc_si128(tmp6, KEY[7]); 07603 tmp7 = _mm_aesenc_si128(tmp7, KEY[7]); 07604 tmp8 = _mm_aesenc_si128(tmp8, KEY[7]); 07605 /* 128 x 128 Carryless Multiply */ 07606 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+7]); 07607 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07608 gfmul_only(XV, HT[0], &r0, &r1); 07609 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07610 tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); 07611 tmp3 = _mm_aesenc_si128(tmp3, KEY[8]); 07612 tmp4 = _mm_aesenc_si128(tmp4, KEY[8]); 07613 tmp5 = _mm_aesenc_si128(tmp5, KEY[8]); 07614 tmp6 = _mm_aesenc_si128(tmp6, KEY[8]); 07615 tmp7 = _mm_aesenc_si128(tmp7, KEY[8]); 07616 tmp8 = _mm_aesenc_si128(tmp8, KEY[8]); 07617 /* Reduction */ 07618 X = ghash_red(r0, r1); 07619 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07620 tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); 07621 tmp3 = _mm_aesenc_si128(tmp3, KEY[9]); 07622 tmp4 = _mm_aesenc_si128(tmp4, KEY[9]); 07623 tmp5 = _mm_aesenc_si128(tmp5, KEY[9]); 07624 tmp6 = _mm_aesenc_si128(tmp6, KEY[9]); 07625 tmp7 = _mm_aesenc_si128(tmp7, KEY[9]); 07626 tmp8 = _mm_aesenc_si128(tmp8, KEY[9]); 07627 lastKey = KEY[10]; 07628 if (nr > 10) { 07629 tmp1 = _mm_aesenc_si128(tmp1, KEY[10]); 07630 tmp2 = _mm_aesenc_si128(tmp2, KEY[10]); 07631 tmp3 = _mm_aesenc_si128(tmp3, KEY[10]); 07632 tmp4 = _mm_aesenc_si128(tmp4, KEY[10]); 07633 tmp5 = _mm_aesenc_si128(tmp5, KEY[10]); 07634 tmp6 = _mm_aesenc_si128(tmp6, KEY[10]); 07635 tmp7 = _mm_aesenc_si128(tmp7, KEY[10]); 07636 tmp8 = _mm_aesenc_si128(tmp8, KEY[10]); 07637 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07638 tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); 07639 tmp3 = _mm_aesenc_si128(tmp3, KEY[11]); 07640 tmp4 = _mm_aesenc_si128(tmp4, KEY[11]); 07641 tmp5 = _mm_aesenc_si128(tmp5, KEY[11]); 07642 tmp6 = _mm_aesenc_si128(tmp6, KEY[11]); 07643 tmp7 = _mm_aesenc_si128(tmp7, KEY[11]); 07644 tmp8 = _mm_aesenc_si128(tmp8, KEY[11]); 07645 lastKey = KEY[12]; 07646 if (nr > 12) { 07647 tmp1 = _mm_aesenc_si128(tmp1, KEY[12]); 07648 tmp2 = _mm_aesenc_si128(tmp2, KEY[12]); 07649 tmp3 = _mm_aesenc_si128(tmp3, KEY[12]); 07650 tmp4 = _mm_aesenc_si128(tmp4, KEY[12]); 07651 tmp5 = _mm_aesenc_si128(tmp5, KEY[12]); 07652 tmp6 = _mm_aesenc_si128(tmp6, KEY[12]); 07653 tmp7 = _mm_aesenc_si128(tmp7, KEY[12]); 07654 tmp8 = _mm_aesenc_si128(tmp8, KEY[12]); 07655 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07656 tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); 07657 tmp3 = _mm_aesenc_si128(tmp3, KEY[13]); 07658 tmp4 = _mm_aesenc_si128(tmp4, KEY[13]); 07659 tmp5 = _mm_aesenc_si128(tmp5, KEY[13]); 07660 tmp6 = _mm_aesenc_si128(tmp6, KEY[13]); 07661 tmp7 = _mm_aesenc_si128(tmp7, KEY[13]); 07662 tmp8 = _mm_aesenc_si128(tmp8, KEY[13]); 07663 lastKey = KEY[14]; 07664 } 07665 } 07666 AES_ENC_LAST_8(); 07667 } 07668 } 07669 07670 #endif /* AES_GCM_AESNI_NO_UNROLL */ 07671 07672 for (k = i*8; k < nbytes/16; k++) { 07673 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07674 ctr1 = _mm_add_epi32(ctr1, ONE); 07675 tmp1 = _mm_xor_si128(tmp1, KEY[0]); 07676 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07677 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07678 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07679 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07680 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07681 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07682 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07683 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07684 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07685 /* 128 x 128 Carryless Multiply */ 07686 XV = _mm_loadu_si128(&((__m128i*)in)[k]); 07687 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07688 XV = _mm_xor_si128(XV, X); 07689 X = gfmul_shifted(XV, H); 07690 lastKey = KEY[10]; 07691 if (nr > 10) { 07692 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07693 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07694 lastKey = KEY[12]; 07695 if (nr > 12) { 07696 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07697 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07698 lastKey = KEY[14]; 07699 } 07700 } 07701 tmp1 = _mm_aesenclast_si128(tmp1, lastKey); 07702 tmp2 = _mm_loadu_si128(&((__m128i*)in)[k]); 07703 tmp1 = _mm_xor_si128(tmp1, tmp2); 07704 _mm_storeu_si128(&((__m128i*)out)[k], tmp1); 07705 } 07706 07707 /* If one partial block remains */ 07708 if (nbytes % 16) { 07709 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); 07710 tmp1 = _mm_xor_si128(tmp1, KEY[0]); 07711 tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); 07712 tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); 07713 tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); 07714 tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); 07715 tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); 07716 tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); 07717 tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); 07718 tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); 07719 tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); 07720 lastKey = KEY[10]; 07721 if (nr > 10) { 07722 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07723 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); 07724 lastKey = KEY[12]; 07725 if (nr > 12) { 07726 tmp1 = _mm_aesenc_si128(tmp1, lastKey); 07727 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); 07728 lastKey = KEY[14]; 07729 } 07730 } 07731 tmp1 = _mm_aesenclast_si128(tmp1, lastKey); 07732 last_block = _mm_setzero_si128(); 07733 for (j=0; j < nbytes%16; j++) 07734 ((unsigned char*)&last_block)[j] = in[k*16+j]; 07735 XV = last_block; 07736 tmp1 = _mm_xor_si128(tmp1, last_block); 07737 last_block = tmp1; 07738 for (j=0; j < nbytes%16; j++) 07739 out[k*16+j] = ((unsigned char*)&last_block)[j]; 07740 XV = _mm_shuffle_epi8(XV, BSWAP_MASK); 07741 XV = _mm_xor_si128(XV, X); 07742 X = gfmul_shifted(XV, H); 07743 } 07744 07745 tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0); 07746 tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1); 07747 /* 128 x 128 Carryless Multiply */ 07748 X = _mm_xor_si128(X, tmp1); 07749 X = gfmul_shifted(X, H); 07750 X = _mm_shuffle_epi8(X, BSWAP_MASK); 07751 T = _mm_xor_si128(X, T); 07752 07753 /* if (0xffff != 07754 _mm_movemask_epi8(_mm_cmpeq_epi8(T, _mm_loadu_si128((__m128i*)tag)))) */ 07755 if (XMEMCMP(tag, &T, tbytes) != 0) 07756 *res = 0; /* in case the authentication failed */ 07757 else 07758 *res = 1; /* when successful returns 1 */ 07759 } 07760 07761 #endif /* HAVE_AES_DECRYPT */ 07762 #endif /* _MSC_VER */ 07763 #endif /* WOLFSSL_AESNI */ 07764 07765 07766 #if defined(GCM_SMALL) 07767 static void GMULT(byte* X, byte* Y) 07768 { 07769 byte Z[AES_BLOCK_SIZE]; 07770 byte V[AES_BLOCK_SIZE]; 07771 int i, j; 07772 07773 XMEMSET(Z, 0, AES_BLOCK_SIZE); 07774 XMEMCPY(V, X, AES_BLOCK_SIZE); 07775 for (i = 0; i < AES_BLOCK_SIZE; i++) 07776 { 07777 byte y = Y[i]; 07778 for (j = 0; j < 8; j++) 07779 { 07780 if (y & 0x80) { 07781 xorbuf(Z, V, AES_BLOCK_SIZE); 07782 } 07783 07784 RIGHTSHIFTX(V); 07785 y = y << 1; 07786 } 07787 } 07788 XMEMCPY(X, Z, AES_BLOCK_SIZE); 07789 } 07790 07791 07792 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, 07793 word32 cSz, byte* s, word32 sSz) 07794 { 07795 byte x[AES_BLOCK_SIZE]; 07796 byte scratch[AES_BLOCK_SIZE]; 07797 word32 blocks, partial; 07798 byte* h = aes->H; 07799 07800 XMEMSET(x, 0, AES_BLOCK_SIZE); 07801 07802 /* Hash in A, the Additional Authentication Data */ 07803 if (aSz != 0 && a != NULL) { 07804 blocks = aSz / AES_BLOCK_SIZE; 07805 partial = aSz % AES_BLOCK_SIZE; 07806 while (blocks--) { 07807 xorbuf(x, a, AES_BLOCK_SIZE); 07808 GMULT(x, h); 07809 a += AES_BLOCK_SIZE; 07810 } 07811 if (partial != 0) { 07812 XMEMSET(scratch, 0, AES_BLOCK_SIZE); 07813 XMEMCPY(scratch, a, partial); 07814 xorbuf(x, scratch, AES_BLOCK_SIZE); 07815 GMULT(x, h); 07816 } 07817 } 07818 07819 /* Hash in C, the Ciphertext */ 07820 if (cSz != 0 && c != NULL) { 07821 blocks = cSz / AES_BLOCK_SIZE; 07822 partial = cSz % AES_BLOCK_SIZE; 07823 while (blocks--) { 07824 xorbuf(x, c, AES_BLOCK_SIZE); 07825 GMULT(x, h); 07826 c += AES_BLOCK_SIZE; 07827 } 07828 if (partial != 0) { 07829 XMEMSET(scratch, 0, AES_BLOCK_SIZE); 07830 XMEMCPY(scratch, c, partial); 07831 xorbuf(x, scratch, AES_BLOCK_SIZE); 07832 GMULT(x, h); 07833 } 07834 } 07835 07836 /* Hash in the lengths of A and C in bits */ 07837 FlattenSzInBits(&scratch[0], aSz); 07838 FlattenSzInBits(&scratch[8], cSz); 07839 xorbuf(x, scratch, AES_BLOCK_SIZE); 07840 GMULT(x, h); 07841 07842 /* Copy the result into s. */ 07843 XMEMCPY(s, x, sSz); 07844 } 07845 07846 /* end GCM_SMALL */ 07847 #elif defined(GCM_TABLE) 07848 07849 static const byte R[256][2] = { 07850 {0x00, 0x00}, {0x01, 0xc2}, {0x03, 0x84}, {0x02, 0x46}, 07851 {0x07, 0x08}, {0x06, 0xca}, {0x04, 0x8c}, {0x05, 0x4e}, 07852 {0x0e, 0x10}, {0x0f, 0xd2}, {0x0d, 0x94}, {0x0c, 0x56}, 07853 {0x09, 0x18}, {0x08, 0xda}, {0x0a, 0x9c}, {0x0b, 0x5e}, 07854 {0x1c, 0x20}, {0x1d, 0xe2}, {0x1f, 0xa4}, {0x1e, 0x66}, 07855 {0x1b, 0x28}, {0x1a, 0xea}, {0x18, 0xac}, {0x19, 0x6e}, 07856 {0x12, 0x30}, {0x13, 0xf2}, {0x11, 0xb4}, {0x10, 0x76}, 07857 {0x15, 0x38}, {0x14, 0xfa}, {0x16, 0xbc}, {0x17, 0x7e}, 07858 {0x38, 0x40}, {0x39, 0x82}, {0x3b, 0xc4}, {0x3a, 0x06}, 07859 {0x3f, 0x48}, {0x3e, 0x8a}, {0x3c, 0xcc}, {0x3d, 0x0e}, 07860 {0x36, 0x50}, {0x37, 0x92}, {0x35, 0xd4}, {0x34, 0x16}, 07861 {0x31, 0x58}, {0x30, 0x9a}, {0x32, 0xdc}, {0x33, 0x1e}, 07862 {0x24, 0x60}, {0x25, 0xa2}, {0x27, 0xe4}, {0x26, 0x26}, 07863 {0x23, 0x68}, {0x22, 0xaa}, {0x20, 0xec}, {0x21, 0x2e}, 07864 {0x2a, 0x70}, {0x2b, 0xb2}, {0x29, 0xf4}, {0x28, 0x36}, 07865 {0x2d, 0x78}, {0x2c, 0xba}, {0x2e, 0xfc}, {0x2f, 0x3e}, 07866 {0x70, 0x80}, {0x71, 0x42}, {0x73, 0x04}, {0x72, 0xc6}, 07867 {0x77, 0x88}, {0x76, 0x4a}, {0x74, 0x0c}, {0x75, 0xce}, 07868 {0x7e, 0x90}, {0x7f, 0x52}, {0x7d, 0x14}, {0x7c, 0xd6}, 07869 {0x79, 0x98}, {0x78, 0x5a}, {0x7a, 0x1c}, {0x7b, 0xde}, 07870 {0x6c, 0xa0}, {0x6d, 0x62}, {0x6f, 0x24}, {0x6e, 0xe6}, 07871 {0x6b, 0xa8}, {0x6a, 0x6a}, {0x68, 0x2c}, {0x69, 0xee}, 07872 {0x62, 0xb0}, {0x63, 0x72}, {0x61, 0x34}, {0x60, 0xf6}, 07873 {0x65, 0xb8}, {0x64, 0x7a}, {0x66, 0x3c}, {0x67, 0xfe}, 07874 {0x48, 0xc0}, {0x49, 0x02}, {0x4b, 0x44}, {0x4a, 0x86}, 07875 {0x4f, 0xc8}, {0x4e, 0x0a}, {0x4c, 0x4c}, {0x4d, 0x8e}, 07876 {0x46, 0xd0}, {0x47, 0x12}, {0x45, 0x54}, {0x44, 0x96}, 07877 {0x41, 0xd8}, {0x40, 0x1a}, {0x42, 0x5c}, {0x43, 0x9e}, 07878 {0x54, 0xe0}, {0x55, 0x22}, {0x57, 0x64}, {0x56, 0xa6}, 07879 {0x53, 0xe8}, {0x52, 0x2a}, {0x50, 0x6c}, {0x51, 0xae}, 07880 {0x5a, 0xf0}, {0x5b, 0x32}, {0x59, 0x74}, {0x58, 0xb6}, 07881 {0x5d, 0xf8}, {0x5c, 0x3a}, {0x5e, 0x7c}, {0x5f, 0xbe}, 07882 {0xe1, 0x00}, {0xe0, 0xc2}, {0xe2, 0x84}, {0xe3, 0x46}, 07883 {0xe6, 0x08}, {0xe7, 0xca}, {0xe5, 0x8c}, {0xe4, 0x4e}, 07884 {0xef, 0x10}, {0xee, 0xd2}, {0xec, 0x94}, {0xed, 0x56}, 07885 {0xe8, 0x18}, {0xe9, 0xda}, {0xeb, 0x9c}, {0xea, 0x5e}, 07886 {0xfd, 0x20}, {0xfc, 0xe2}, {0xfe, 0xa4}, {0xff, 0x66}, 07887 {0xfa, 0x28}, {0xfb, 0xea}, {0xf9, 0xac}, {0xf8, 0x6e}, 07888 {0xf3, 0x30}, {0xf2, 0xf2}, {0xf0, 0xb4}, {0xf1, 0x76}, 07889 {0xf4, 0x38}, {0xf5, 0xfa}, {0xf7, 0xbc}, {0xf6, 0x7e}, 07890 {0xd9, 0x40}, {0xd8, 0x82}, {0xda, 0xc4}, {0xdb, 0x06}, 07891 {0xde, 0x48}, {0xdf, 0x8a}, {0xdd, 0xcc}, {0xdc, 0x0e}, 07892 {0xd7, 0x50}, {0xd6, 0x92}, {0xd4, 0xd4}, {0xd5, 0x16}, 07893 {0xd0, 0x58}, {0xd1, 0x9a}, {0xd3, 0xdc}, {0xd2, 0x1e}, 07894 {0xc5, 0x60}, {0xc4, 0xa2}, {0xc6, 0xe4}, {0xc7, 0x26}, 07895 {0xc2, 0x68}, {0xc3, 0xaa}, {0xc1, 0xec}, {0xc0, 0x2e}, 07896 {0xcb, 0x70}, {0xca, 0xb2}, {0xc8, 0xf4}, {0xc9, 0x36}, 07897 {0xcc, 0x78}, {0xcd, 0xba}, {0xcf, 0xfc}, {0xce, 0x3e}, 07898 {0x91, 0x80}, {0x90, 0x42}, {0x92, 0x04}, {0x93, 0xc6}, 07899 {0x96, 0x88}, {0x97, 0x4a}, {0x95, 0x0c}, {0x94, 0xce}, 07900 {0x9f, 0x90}, {0x9e, 0x52}, {0x9c, 0x14}, {0x9d, 0xd6}, 07901 {0x98, 0x98}, {0x99, 0x5a}, {0x9b, 0x1c}, {0x9a, 0xde}, 07902 {0x8d, 0xa0}, {0x8c, 0x62}, {0x8e, 0x24}, {0x8f, 0xe6}, 07903 {0x8a, 0xa8}, {0x8b, 0x6a}, {0x89, 0x2c}, {0x88, 0xee}, 07904 {0x83, 0xb0}, {0x82, 0x72}, {0x80, 0x34}, {0x81, 0xf6}, 07905 {0x84, 0xb8}, {0x85, 0x7a}, {0x87, 0x3c}, {0x86, 0xfe}, 07906 {0xa9, 0xc0}, {0xa8, 0x02}, {0xaa, 0x44}, {0xab, 0x86}, 07907 {0xae, 0xc8}, {0xaf, 0x0a}, {0xad, 0x4c}, {0xac, 0x8e}, 07908 {0xa7, 0xd0}, {0xa6, 0x12}, {0xa4, 0x54}, {0xa5, 0x96}, 07909 {0xa0, 0xd8}, {0xa1, 0x1a}, {0xa3, 0x5c}, {0xa2, 0x9e}, 07910 {0xb5, 0xe0}, {0xb4, 0x22}, {0xb6, 0x64}, {0xb7, 0xa6}, 07911 {0xb2, 0xe8}, {0xb3, 0x2a}, {0xb1, 0x6c}, {0xb0, 0xae}, 07912 {0xbb, 0xf0}, {0xba, 0x32}, {0xb8, 0x74}, {0xb9, 0xb6}, 07913 {0xbc, 0xf8}, {0xbd, 0x3a}, {0xbf, 0x7c}, {0xbe, 0xbe} }; 07914 07915 07916 static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE]) 07917 { 07918 int i, j; 07919 byte Z[AES_BLOCK_SIZE]; 07920 byte a; 07921 07922 XMEMSET(Z, 0, sizeof(Z)); 07923 07924 for (i = 15; i > 0; i--) { 07925 xorbuf(Z, m[x[i]], AES_BLOCK_SIZE); 07926 a = Z[15]; 07927 07928 for (j = 15; j > 0; j--) { 07929 Z[j] = Z[j-1]; 07930 } 07931 07932 Z[0] = R[a][0]; 07933 Z[1] ^= R[a][1]; 07934 } 07935 xorbuf(Z, m[x[0]], AES_BLOCK_SIZE); 07936 07937 XMEMCPY(x, Z, AES_BLOCK_SIZE); 07938 } 07939 07940 07941 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, 07942 word32 cSz, byte* s, word32 sSz) 07943 { 07944 byte x[AES_BLOCK_SIZE]; 07945 byte scratch[AES_BLOCK_SIZE]; 07946 word32 blocks, partial; 07947 07948 XMEMSET(x, 0, AES_BLOCK_SIZE); 07949 07950 /* Hash in A, the Additional Authentication Data */ 07951 if (aSz != 0 && a != NULL) { 07952 blocks = aSz / AES_BLOCK_SIZE; 07953 partial = aSz % AES_BLOCK_SIZE; 07954 while (blocks--) { 07955 xorbuf(x, a, AES_BLOCK_SIZE); 07956 GMULT(x, aes->M0); 07957 a += AES_BLOCK_SIZE; 07958 } 07959 if (partial != 0) { 07960 XMEMSET(scratch, 0, AES_BLOCK_SIZE); 07961 XMEMCPY(scratch, a, partial); 07962 xorbuf(x, scratch, AES_BLOCK_SIZE); 07963 GMULT(x, aes->M0); 07964 } 07965 } 07966 07967 /* Hash in C, the Ciphertext */ 07968 if (cSz != 0 && c != NULL) { 07969 blocks = cSz / AES_BLOCK_SIZE; 07970 partial = cSz % AES_BLOCK_SIZE; 07971 while (blocks--) { 07972 xorbuf(x, c, AES_BLOCK_SIZE); 07973 GMULT(x, aes->M0); 07974 c += AES_BLOCK_SIZE; 07975 } 07976 if (partial != 0) { 07977 XMEMSET(scratch, 0, AES_BLOCK_SIZE); 07978 XMEMCPY(scratch, c, partial); 07979 xorbuf(x, scratch, AES_BLOCK_SIZE); 07980 GMULT(x, aes->M0); 07981 } 07982 } 07983 07984 /* Hash in the lengths of A and C in bits */ 07985 FlattenSzInBits(&scratch[0], aSz); 07986 FlattenSzInBits(&scratch[8], cSz); 07987 xorbuf(x, scratch, AES_BLOCK_SIZE); 07988 GMULT(x, aes->M0); 07989 07990 /* Copy the result into s. */ 07991 XMEMCPY(s, x, sSz); 07992 } 07993 07994 /* end GCM_TABLE */ 07995 #elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) 07996 07997 #if !defined(FREESCALE_LTC_AES_GCM) 07998 static void GMULT(word64* X, word64* Y) 07999 { 08000 word64 Z[2] = {0,0}; 08001 word64 V[2]; 08002 int i, j; 08003 V[0] = X[0]; V[1] = X[1]; 08004 08005 for (i = 0; i < 2; i++) 08006 { 08007 word64 y = Y[i]; 08008 for (j = 0; j < 64; j++) 08009 { 08010 if (y & 0x8000000000000000ULL) { 08011 Z[0] ^= V[0]; 08012 Z[1] ^= V[1]; 08013 } 08014 08015 if (V[1] & 0x0000000000000001) { 08016 V[1] >>= 1; 08017 V[1] |= ((V[0] & 0x0000000000000001) ? 08018 0x8000000000000000ULL : 0); 08019 V[0] >>= 1; 08020 V[0] ^= 0xE100000000000000ULL; 08021 } 08022 else { 08023 V[1] >>= 1; 08024 V[1] |= ((V[0] & 0x0000000000000001) ? 08025 0x8000000000000000ULL : 0); 08026 V[0] >>= 1; 08027 } 08028 y <<= 1; 08029 } 08030 } 08031 X[0] = Z[0]; 08032 X[1] = Z[1]; 08033 } 08034 08035 08036 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, 08037 word32 cSz, byte* s, word32 sSz) 08038 { 08039 word64 x[2] = {0,0}; 08040 word32 blocks, partial; 08041 word64 bigH[2]; 08042 08043 XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE); 08044 #ifdef LITTLE_ENDIAN_ORDER 08045 ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE); 08046 #endif 08047 08048 /* Hash in A, the Additional Authentication Data */ 08049 if (aSz != 0 && a != NULL) { 08050 word64 bigA[2]; 08051 blocks = aSz / AES_BLOCK_SIZE; 08052 partial = aSz % AES_BLOCK_SIZE; 08053 while (blocks--) { 08054 XMEMCPY(bigA, a, AES_BLOCK_SIZE); 08055 #ifdef LITTLE_ENDIAN_ORDER 08056 ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE); 08057 #endif 08058 x[0] ^= bigA[0]; 08059 x[1] ^= bigA[1]; 08060 GMULT(x, bigH); 08061 a += AES_BLOCK_SIZE; 08062 } 08063 if (partial != 0) { 08064 XMEMSET(bigA, 0, AES_BLOCK_SIZE); 08065 XMEMCPY(bigA, a, partial); 08066 #ifdef LITTLE_ENDIAN_ORDER 08067 ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE); 08068 #endif 08069 x[0] ^= bigA[0]; 08070 x[1] ^= bigA[1]; 08071 GMULT(x, bigH); 08072 } 08073 } 08074 08075 /* Hash in C, the Ciphertext */ 08076 if (cSz != 0 && c != NULL) { 08077 word64 bigC[2]; 08078 blocks = cSz / AES_BLOCK_SIZE; 08079 partial = cSz % AES_BLOCK_SIZE; 08080 while (blocks--) { 08081 XMEMCPY(bigC, c, AES_BLOCK_SIZE); 08082 #ifdef LITTLE_ENDIAN_ORDER 08083 ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE); 08084 #endif 08085 x[0] ^= bigC[0]; 08086 x[1] ^= bigC[1]; 08087 GMULT(x, bigH); 08088 c += AES_BLOCK_SIZE; 08089 } 08090 if (partial != 0) { 08091 XMEMSET(bigC, 0, AES_BLOCK_SIZE); 08092 XMEMCPY(bigC, c, partial); 08093 #ifdef LITTLE_ENDIAN_ORDER 08094 ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE); 08095 #endif 08096 x[0] ^= bigC[0]; 08097 x[1] ^= bigC[1]; 08098 GMULT(x, bigH); 08099 } 08100 } 08101 08102 /* Hash in the lengths in bits of A and C */ 08103 { 08104 word64 len[2]; 08105 len[0] = aSz; len[1] = cSz; 08106 08107 /* Lengths are in bytes. Convert to bits. */ 08108 len[0] *= 8; 08109 len[1] *= 8; 08110 08111 x[0] ^= len[0]; 08112 x[1] ^= len[1]; 08113 GMULT(x, bigH); 08114 } 08115 #ifdef LITTLE_ENDIAN_ORDER 08116 ByteReverseWords64(x, x, AES_BLOCK_SIZE); 08117 #endif 08118 XMEMCPY(s, x, sSz); 08119 } 08120 #endif /* !FREESCALE_LTC_AES_GCM */ 08121 08122 /* end defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) */ 08123 #else /* GCM_WORD32 */ 08124 08125 static void GMULT(word32* X, word32* Y) 08126 { 08127 word32 Z[4] = {0,0,0,0}; 08128 word32 V[4]; 08129 int i, j; 08130 08131 V[0] = X[0]; V[1] = X[1]; V[2] = X[2]; V[3] = X[3]; 08132 08133 for (i = 0; i < 4; i++) 08134 { 08135 word32 y = Y[i]; 08136 for (j = 0; j < 32; j++) 08137 { 08138 if (y & 0x80000000) { 08139 Z[0] ^= V[0]; 08140 Z[1] ^= V[1]; 08141 Z[2] ^= V[2]; 08142 Z[3] ^= V[3]; 08143 } 08144 08145 if (V[3] & 0x00000001) { 08146 V[3] >>= 1; 08147 V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0); 08148 V[2] >>= 1; 08149 V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0); 08150 V[1] >>= 1; 08151 V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0); 08152 V[0] >>= 1; 08153 V[0] ^= 0xE1000000; 08154 } else { 08155 V[3] >>= 1; 08156 V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0); 08157 V[2] >>= 1; 08158 V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0); 08159 V[1] >>= 1; 08160 V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0); 08161 V[0] >>= 1; 08162 } 08163 y <<= 1; 08164 } 08165 } 08166 X[0] = Z[0]; 08167 X[1] = Z[1]; 08168 X[2] = Z[2]; 08169 X[3] = Z[3]; 08170 } 08171 08172 08173 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, 08174 word32 cSz, byte* s, word32 sSz) 08175 { 08176 word32 x[4] = {0,0,0,0}; 08177 word32 blocks, partial; 08178 word32 bigH[4]; 08179 08180 XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE); 08181 #ifdef LITTLE_ENDIAN_ORDER 08182 ByteReverseWords(bigH, bigH, AES_BLOCK_SIZE); 08183 #endif 08184 08185 /* Hash in A, the Additional Authentication Data */ 08186 if (aSz != 0 && a != NULL) { 08187 word32 bigA[4]; 08188 blocks = aSz / AES_BLOCK_SIZE; 08189 partial = aSz % AES_BLOCK_SIZE; 08190 while (blocks--) { 08191 XMEMCPY(bigA, a, AES_BLOCK_SIZE); 08192 #ifdef LITTLE_ENDIAN_ORDER 08193 ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE); 08194 #endif 08195 x[0] ^= bigA[0]; 08196 x[1] ^= bigA[1]; 08197 x[2] ^= bigA[2]; 08198 x[3] ^= bigA[3]; 08199 GMULT(x, bigH); 08200 a += AES_BLOCK_SIZE; 08201 } 08202 if (partial != 0) { 08203 XMEMSET(bigA, 0, AES_BLOCK_SIZE); 08204 XMEMCPY(bigA, a, partial); 08205 #ifdef LITTLE_ENDIAN_ORDER 08206 ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE); 08207 #endif 08208 x[0] ^= bigA[0]; 08209 x[1] ^= bigA[1]; 08210 x[2] ^= bigA[2]; 08211 x[3] ^= bigA[3]; 08212 GMULT(x, bigH); 08213 } 08214 } 08215 08216 /* Hash in C, the Ciphertext */ 08217 if (cSz != 0 && c != NULL) { 08218 word32 bigC[4]; 08219 blocks = cSz / AES_BLOCK_SIZE; 08220 partial = cSz % AES_BLOCK_SIZE; 08221 while (blocks--) { 08222 XMEMCPY(bigC, c, AES_BLOCK_SIZE); 08223 #ifdef LITTLE_ENDIAN_ORDER 08224 ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE); 08225 #endif 08226 x[0] ^= bigC[0]; 08227 x[1] ^= bigC[1]; 08228 x[2] ^= bigC[2]; 08229 x[3] ^= bigC[3]; 08230 GMULT(x, bigH); 08231 c += AES_BLOCK_SIZE; 08232 } 08233 if (partial != 0) { 08234 XMEMSET(bigC, 0, AES_BLOCK_SIZE); 08235 XMEMCPY(bigC, c, partial); 08236 #ifdef LITTLE_ENDIAN_ORDER 08237 ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE); 08238 #endif 08239 x[0] ^= bigC[0]; 08240 x[1] ^= bigC[1]; 08241 x[2] ^= bigC[2]; 08242 x[3] ^= bigC[3]; 08243 GMULT(x, bigH); 08244 } 08245 } 08246 08247 /* Hash in the lengths in bits of A and C */ 08248 { 08249 word32 len[4]; 08250 08251 /* Lengths are in bytes. Convert to bits. */ 08252 len[0] = (aSz >> (8*sizeof(aSz) - 3)); 08253 len[1] = aSz << 3; 08254 len[2] = (cSz >> (8*sizeof(cSz) - 3)); 08255 len[3] = cSz << 3; 08256 08257 x[0] ^= len[0]; 08258 x[1] ^= len[1]; 08259 x[2] ^= len[2]; 08260 x[3] ^= len[3]; 08261 GMULT(x, bigH); 08262 } 08263 #ifdef LITTLE_ENDIAN_ORDER 08264 ByteReverseWords(x, x, AES_BLOCK_SIZE); 08265 #endif 08266 XMEMCPY(s, x, sSz); 08267 } 08268 08269 #endif /* end GCM_WORD32 */ 08270 08271 08272 #if !defined(WOLFSSL_XILINX_CRYPT) 08273 #ifdef FREESCALE_LTC_AES_GCM 08274 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, 08275 const byte* iv, word32 ivSz, 08276 byte* authTag, word32 authTagSz, 08277 const byte* authIn, word32 authInSz) 08278 { 08279 status_t status; 08280 word32 keySize; 08281 08282 /* argument checks */ 08283 if (aes == NULL || authTagSz > AES_BLOCK_SIZE) { 08284 return BAD_FUNC_ARG; 08285 } 08286 08287 if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { 08288 WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); 08289 return BAD_FUNC_ARG; 08290 } 08291 08292 status = wc_AesGetKeySize(aes, &keySize); 08293 if (status) 08294 return status; 08295 08296 status = LTC_AES_EncryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz, 08297 authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz); 08298 08299 return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; 08300 } 08301 #else 08302 #if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \ 08303 defined(WOLFSSL_STM32F7) || \ 08304 defined(WOLFSSL_STM32L4)) 08305 08306 static WC_INLINE int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, 08307 word32 sz, const byte* iv, word32 ivSz, 08308 byte* authTag, word32 authTagSz, 08309 const byte* authIn, word32 authInSz) 08310 { 08311 int ret; 08312 word32 keySize; 08313 byte initialCounter[AES_BLOCK_SIZE]; 08314 #ifdef WOLFSSL_STM32_CUBEMX 08315 CRYP_HandleTypeDef hcryp; 08316 #else 08317 byte keyCopy[AES_BLOCK_SIZE * 2]; 08318 #endif /* WOLFSSL_STM32_CUBEMX */ 08319 int status = 0; 08320 byte* authInPadded = NULL; 08321 byte tag[AES_BLOCK_SIZE]; 08322 int authPadSz; 08323 08324 ret = wc_AesGetKeySize(aes, &keySize); 08325 if (ret != 0) 08326 return ret; 08327 08328 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); 08329 XMEMCPY(initialCounter, iv, ivSz); 08330 initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START; 08331 08332 /* pad authIn if it is not a block multiple */ 08333 if ((authInSz % AES_BLOCK_SIZE) != 0) { 08334 authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; 08335 /* Need to pad the AAD to a full block with zeros. */ 08336 authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 08337 if (authInPadded == NULL) { 08338 return MEMORY_E; 08339 } 08340 XMEMSET(authInPadded, 0, authPadSz); 08341 XMEMCPY(authInPadded, authIn, authInSz); 08342 } else { 08343 authPadSz = authInSz; 08344 authInPadded = (byte*)authIn; 08345 } 08346 08347 08348 #ifdef WOLFSSL_STM32_CUBEMX 08349 XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); 08350 switch (keySize) { 08351 case 16: /* 128-bit key */ 08352 hcryp.Init.KeySize = CRYP_KEYSIZE_128B; 08353 break; 08354 #ifdef CRYP_KEYSIZE_192B 08355 case 24: /* 192-bit key */ 08356 hcryp.Init.KeySize = CRYP_KEYSIZE_192B; 08357 break; 08358 #endif 08359 case 32: /* 256-bit key */ 08360 hcryp.Init.KeySize = CRYP_KEYSIZE_256B; 08361 break; 08362 default: 08363 break; 08364 } 08365 hcryp.Instance = CRYP; 08366 hcryp.Init.DataType = CRYP_DATATYPE_8B; 08367 hcryp.Init.pKey = (byte*)aes->key; 08368 hcryp.Init.pInitVect = initialCounter; 08369 hcryp.Init.Header = authInPadded; 08370 hcryp.Init.HeaderSize = authInSz; 08371 08372 #ifdef WOLFSSL_STM32L4 08373 /* Set the CRYP parameters */ 08374 hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; 08375 hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; 08376 hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE; 08377 HAL_CRYP_Init(&hcryp); 08378 08379 /* GCM init phase */ 08380 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); 08381 if (status == HAL_OK) { 08382 /* GCM header phase */ 08383 hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; 08384 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); 08385 if (status == HAL_OK) { 08386 /* GCM payload phase */ 08387 hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; 08388 status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, sz, out, STM32_HAL_TIMEOUT); 08389 if (status == HAL_OK) { 08390 /* GCM final phase */ 08391 hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; 08392 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); 08393 } 08394 } 08395 } 08396 #else 08397 HAL_CRYP_Init(&hcryp); 08398 08399 status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in, sz, 08400 out, STM32_HAL_TIMEOUT); 08401 /* Compute the authTag */ 08402 if (status == HAL_OK) { 08403 status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); 08404 } 08405 #endif 08406 08407 if (status != HAL_OK) 08408 ret = AES_GCM_AUTH_E; 08409 HAL_CRYP_DeInit(&hcryp); 08410 #else 08411 ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize); 08412 status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)initialCounter, 08413 (uint8_t*)keyCopy, keySize * 8, 08414 (uint8_t*)in, sz, 08415 (uint8_t*)authInPadded,authInSz, 08416 (uint8_t*)out, tag); 08417 if (status != SUCCESS) 08418 ret = AES_GCM_AUTH_E; 08419 #endif /* WOLFSSL_STM32_CUBEMX */ 08420 08421 /* authTag may be shorter than AES_BLOCK_SZ, store separately */ 08422 if (ret == 0) 08423 XMEMCPY(authTag, tag, authTagSz); 08424 08425 /* We only allocate extra memory if authInPadded is not a multiple of AES_BLOCK_SZ */ 08426 if (authInPadded != NULL && authInSz != authPadSz) { 08427 XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 08428 } 08429 08430 return ret; 08431 } 08432 #endif /* STM32_CRYPTO */ 08433 08434 #ifdef WOLFSSL_AESNI 08435 int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, 08436 const byte* iv, word32 ivSz, 08437 byte* authTag, word32 authTagSz, 08438 const byte* authIn, word32 authInSz); 08439 #else 08440 static 08441 #endif 08442 int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, 08443 const byte* iv, word32 ivSz, 08444 byte* authTag, word32 authTagSz, 08445 const byte* authIn, word32 authInSz) 08446 { 08447 int ret = 0; 08448 word32 blocks = sz / AES_BLOCK_SIZE; 08449 word32 partial = sz % AES_BLOCK_SIZE; 08450 const byte* p = in; 08451 byte* c = out; 08452 byte counter[AES_BLOCK_SIZE]; 08453 byte initialCounter[AES_BLOCK_SIZE]; 08454 byte *ctr; 08455 byte scratch[AES_BLOCK_SIZE]; 08456 08457 ctr = counter; 08458 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); 08459 if (ivSz == GCM_NONCE_MID_SZ) { 08460 XMEMCPY(initialCounter, iv, ivSz); 08461 initialCounter[AES_BLOCK_SIZE - 1] = 1; 08462 } 08463 else { 08464 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); 08465 } 08466 XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); 08467 08468 #ifdef WOLFSSL_PIC32MZ_CRYPT 08469 if (blocks) { 08470 /* use intitial IV for PIC32 HW, but don't use it below */ 08471 XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); 08472 08473 ret = wc_Pic32AesCrypt( 08474 aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, 08475 out, in, (blocks * AES_BLOCK_SIZE), 08476 PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM); 08477 if (ret != 0) 08478 return ret; 08479 } 08480 /* process remainder using partial handling */ 08481 #endif 08482 08483 #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) 08484 /* some hardware acceleration can gain performance from doing AES encryption 08485 * of the whole buffer at once */ 08486 if (c != p) { /* can not handle inline encryption */ 08487 while (blocks--) { 08488 IncrementGcmCounter(ctr); 08489 XMEMCPY(c, ctr, AES_BLOCK_SIZE); 08490 c += AES_BLOCK_SIZE; 08491 } 08492 08493 /* reset number of blocks and then do encryption */ 08494 blocks = sz / AES_BLOCK_SIZE; 08495 wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); 08496 xorbuf(out, p, AES_BLOCK_SIZE * blocks); 08497 p += AES_BLOCK_SIZE * blocks; 08498 } 08499 else 08500 #endif /* HAVE_AES_ECB */ 08501 08502 while (blocks--) { 08503 IncrementGcmCounter(ctr); 08504 #ifndef WOLFSSL_PIC32MZ_CRYPT 08505 wc_AesEncrypt(aes, ctr, scratch); 08506 xorbuf(scratch, p, AES_BLOCK_SIZE); 08507 XMEMCPY(c, scratch, AES_BLOCK_SIZE); 08508 #endif 08509 p += AES_BLOCK_SIZE; 08510 c += AES_BLOCK_SIZE; 08511 } 08512 08513 if (partial != 0) { 08514 IncrementGcmCounter(ctr); 08515 wc_AesEncrypt(aes, ctr, scratch); 08516 xorbuf(scratch, p, partial); 08517 XMEMCPY(c, scratch, partial); 08518 } 08519 08520 GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); 08521 wc_AesEncrypt(aes, initialCounter, scratch); 08522 xorbuf(authTag, scratch, authTagSz); 08523 08524 return ret; 08525 } 08526 08527 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, 08528 const byte* iv, word32 ivSz, 08529 byte* authTag, word32 authTagSz, 08530 const byte* authIn, word32 authInSz) 08531 { 08532 /* argument checks */ 08533 if (aes == NULL || authTagSz > AES_BLOCK_SIZE) { 08534 return BAD_FUNC_ARG; 08535 } 08536 08537 if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { 08538 WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); 08539 return BAD_FUNC_ARG; 08540 } 08541 08542 #if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \ 08543 defined(WOLFSSL_STM32F7) || \ 08544 defined(WOLFSSL_STM32L4)) 08545 08546 /* additional argument checks - STM32 HW only supports 12 byte IV */ 08547 if (ivSz != GCM_NONCE_MID_SZ) { 08548 return BAD_FUNC_ARG; 08549 } 08550 08551 /* STM32 HW AES-GCM requires / assumes inputs are a multiple of block size. 08552 * We can avoid this by zero padding (authIn) AAD, but zero-padded plaintext 08553 * will be encrypted and output incorrectly, causing a bad authTag. 08554 * We will use HW accelerated AES-GCM if plain%AES_BLOCK_SZ==0. 08555 * Otherwise, we will use accelerated AES_CTR for encrypt, and then 08556 * perform GHASH in software. 08557 * See NIST SP 800-38D */ 08558 08559 /* Plain text is a multiple of block size, so use HW-Accelerated AES_GCM */ 08560 if (sz % AES_BLOCK_SIZE == 0) { 08561 return wc_AesGcmEncrypt_STM32(aes, out, in, sz, iv, ivSz, 08562 authTag, authTagSz, authIn, authInSz); 08563 } 08564 #endif 08565 08566 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) 08567 /* if async and byte count above threshold */ 08568 /* only 12-byte IV is supported in HW */ 08569 if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && 08570 sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) { 08571 #if defined(HAVE_CAVIUM) 08572 #ifdef HAVE_CAVIUM_V 08573 if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ 08574 return NitroxAesGcmEncrypt(aes, out, in, sz, 08575 (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, 08576 authTag, authTagSz, authIn, authInSz); 08577 } 08578 #endif 08579 #elif defined(HAVE_INTEL_QA) 08580 return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz, 08581 (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, 08582 authTag, authTagSz, authIn, authInSz); 08583 #else /* WOLFSSL_ASYNC_CRYPT_TEST */ 08584 if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_ENCRYPT)) { 08585 WC_ASYNC_TEST* testDev = &aes->asyncDev.test; 08586 testDev->aes.aes = aes; 08587 testDev->aes.out = out; 08588 testDev->aes.in = in; 08589 testDev->aes.sz = sz; 08590 testDev->aes.iv = iv; 08591 testDev->aes.ivSz = ivSz; 08592 testDev->aes.authTag = authTag; 08593 testDev->aes.authTagSz = authTagSz; 08594 testDev->aes.authIn = authIn; 08595 testDev->aes.authInSz = authInSz; 08596 return WC_PENDING_E; 08597 } 08598 #endif 08599 } 08600 #endif /* WOLFSSL_ASYNC_CRYPT */ 08601 08602 /* Software AES-GCM */ 08603 08604 #ifdef WOLFSSL_AESNI 08605 #ifdef HAVE_INTEL_AVX2 08606 if (IS_INTEL_AVX2(intel_flags)) { 08607 AES_GCM_encrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz, 08608 authTagSz, (const byte*)aes->key, aes->rounds); 08609 return 0; 08610 } 08611 else 08612 #endif 08613 #ifdef HAVE_INTEL_AVX1 08614 if (IS_INTEL_AVX1(intel_flags)) { 08615 AES_GCM_encrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, 08616 authTagSz, (const byte*)aes->key, aes->rounds); 08617 return 0; 08618 } 08619 else 08620 #endif 08621 if (haveAESNI) { 08622 AES_GCM_encrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz, 08623 authTagSz, (const byte*)aes->key, aes->rounds); 08624 return 0; 08625 } 08626 else 08627 #endif 08628 { 08629 return AES_GCM_encrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, 08630 authIn, authInSz); 08631 } 08632 } 08633 #endif 08634 08635 08636 #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) 08637 #ifdef FREESCALE_LTC_AES_GCM 08638 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, 08639 const byte* iv, word32 ivSz, 08640 const byte* authTag, word32 authTagSz, 08641 const byte* authIn, word32 authInSz) 08642 { 08643 int ret; 08644 word32 keySize; 08645 status_t status; 08646 08647 /* argument checks */ 08648 if (aes == NULL || out == NULL || in == NULL || iv == NULL || 08649 authTag == NULL || authTagSz > AES_BLOCK_SIZE) { 08650 return BAD_FUNC_ARG; 08651 } 08652 08653 ret = wc_AesGetKeySize(aes, &keySize); 08654 if (ret != 0) { 08655 return ret; 08656 } 08657 08658 status = LTC_AES_DecryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz, 08659 authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz); 08660 08661 return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; 08662 } 08663 #elif defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \ 08664 defined(WOLFSSL_STM32F7) || \ 08665 defined(WOLFSSL_STM32L4)) 08666 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, 08667 const byte* iv, word32 ivSz, 08668 const byte* authTag, word32 authTagSz, 08669 const byte* authIn, word32 authInSz) 08670 { 08671 int ret; 08672 word32 keySize; 08673 #ifdef WOLFSSL_STM32_CUBEMX 08674 CRYP_HandleTypeDef hcryp; 08675 #else 08676 byte keyCopy[AES_BLOCK_SIZE * 2]; 08677 #endif /* WOLFSSL_STM32_CUBEMX */ 08678 int status; 08679 int inPadSz, authPadSz; 08680 byte tag[AES_BLOCK_SIZE]; 08681 byte *inPadded = NULL; 08682 byte *authInPadded = NULL; 08683 byte initialCounter[AES_BLOCK_SIZE]; 08684 08685 /* argument checks */ 08686 if (aes == NULL || out == NULL || in == NULL || iv == NULL || 08687 authTag == NULL || authTagSz > AES_BLOCK_SIZE) { 08688 return BAD_FUNC_ARG; 08689 } 08690 08691 ret = wc_AesGetKeySize(aes, &keySize); 08692 if (ret != 0) { 08693 return ret; 08694 } 08695 08696 /* additional argument checks - STM32 HW only supports 12 byte IV */ 08697 if (ivSz != GCM_NONCE_MID_SZ) { 08698 return BAD_FUNC_ARG; 08699 } 08700 08701 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); 08702 XMEMCPY(initialCounter, iv, ivSz); 08703 initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START; 08704 08705 /* Need to pad the AAD and input cipher text to a full block size since 08706 * CRYP_AES_GCM will assume these are a multiple of AES_BLOCK_SIZE. 08707 * It is okay to pad with zeros because GCM does this before GHASH already. 08708 * See NIST SP 800-38D */ 08709 08710 if ((sz % AES_BLOCK_SIZE) > 0) { 08711 inPadSz = ((sz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; 08712 inPadded = XMALLOC(inPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 08713 if (inPadded == NULL) { 08714 return MEMORY_E; 08715 } 08716 XMEMSET(inPadded, 0, inPadSz); 08717 XMEMCPY(inPadded, in, sz); 08718 } else { 08719 inPadSz = sz; 08720 inPadded = (byte*)in; 08721 } 08722 08723 if ((authInSz % AES_BLOCK_SIZE) > 0) { 08724 authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; 08725 authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 08726 if (authInPadded == NULL) { 08727 if (inPadded != NULL && inPadSz != sz) 08728 XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 08729 return MEMORY_E; 08730 } 08731 XMEMSET(authInPadded, 0, authPadSz); 08732 XMEMCPY(authInPadded, authIn, authInSz); 08733 } else { 08734 authPadSz = authInSz; 08735 authInPadded = (byte*)authIn; 08736 } 08737 08738 #ifdef WOLFSSL_STM32_CUBEMX 08739 XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); 08740 switch(keySize) { 08741 case 16: /* 128-bit key */ 08742 hcryp.Init.KeySize = CRYP_KEYSIZE_128B; 08743 break; 08744 #ifdef CRYP_KEYSIZE_192B 08745 case 24: /* 192-bit key */ 08746 hcryp.Init.KeySize = CRYP_KEYSIZE_192B; 08747 break; 08748 #endif 08749 case 32: /* 256-bit key */ 08750 hcryp.Init.KeySize = CRYP_KEYSIZE_256B; 08751 break; 08752 default: 08753 break; 08754 } 08755 hcryp.Instance = CRYP; 08756 hcryp.Init.DataType = CRYP_DATATYPE_8B; 08757 hcryp.Init.pKey = (byte*)aes->key; 08758 hcryp.Init.pInitVect = initialCounter; 08759 hcryp.Init.Header = authInPadded; 08760 hcryp.Init.HeaderSize = authInSz; 08761 08762 #ifdef WOLFSSL_STM32L4 08763 /* Set the CRYP parameters */ 08764 hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; 08765 hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT; 08766 hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE; 08767 HAL_CRYP_Init(&hcryp); 08768 08769 /* GCM init phase */ 08770 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); 08771 if (status == HAL_OK) { 08772 /* GCM header phase */ 08773 hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; 08774 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); 08775 if (status == HAL_OK) { 08776 /* GCM payload phase */ 08777 hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; 08778 status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)inPadded, sz, inPadded, 08779 STM32_HAL_TIMEOUT); 08780 if (status == HAL_OK) { 08781 /* GCM final phase */ 08782 hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; 08783 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, 08784 STM32_HAL_TIMEOUT); 08785 } 08786 } 08787 } 08788 #else 08789 HAL_CRYP_Init(&hcryp); 08790 /* Use inPadded for output buffer instead of 08791 * out so that we don't overflow our size. */ 08792 status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)inPadded, 08793 sz, inPadded, STM32_HAL_TIMEOUT); 08794 /* Compute the authTag */ 08795 if (status == HAL_OK) { 08796 status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); 08797 } 08798 #endif 08799 08800 if (status != HAL_OK) 08801 ret = AES_GCM_AUTH_E; 08802 08803 HAL_CRYP_DeInit(&hcryp); 08804 #else 08805 ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize); 08806 08807 /* Input size and auth size need to be the actual sizes, even though 08808 * they are not block aligned, because this length (in bits) is used 08809 * in the final GHASH. Use inPadded for output buffer instead of 08810 * out so that we don't overflow our size. */ 08811 status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)initialCounter, 08812 (uint8_t*)keyCopy, keySize * 8, 08813 (uint8_t*)inPadded, sz, 08814 (uint8_t*)authInPadded,authInSz, 08815 (uint8_t*)inPadded, tag); 08816 if (status != SUCCESS) 08817 ret = AES_GCM_AUTH_E; 08818 #endif /* WOLFSSL_STM32_CUBEMX */ 08819 08820 if (ret == 0 && ConstantCompare(authTag, tag, authTagSz) == 0) { 08821 /* Only keep the decrypted data if authTag success. */ 08822 XMEMCPY(out, inPadded, sz); 08823 ret = 0; /* success */ 08824 } 08825 08826 /* only allocate padding buffers if the inputs are not a multiple of block sz */ 08827 if (inPadded != NULL && inPadSz != sz) 08828 XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 08829 if (authInPadded != NULL && authPadSz != authInSz) 08830 XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); 08831 08832 return ret; 08833 } 08834 #else 08835 #ifdef WOLFSSL_AESNI 08836 int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, 08837 const byte* iv, word32 ivSz, 08838 const byte* authTag, word32 authTagSz, 08839 const byte* authIn, word32 authInSz); 08840 #else 08841 static 08842 #endif 08843 int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, 08844 const byte* iv, word32 ivSz, 08845 const byte* authTag, word32 authTagSz, 08846 const byte* authIn, word32 authInSz) 08847 { 08848 int ret = 0; 08849 word32 blocks = sz / AES_BLOCK_SIZE; 08850 word32 partial = sz % AES_BLOCK_SIZE; 08851 const byte* c = in; 08852 byte* p = out; 08853 byte counter[AES_BLOCK_SIZE]; 08854 byte initialCounter[AES_BLOCK_SIZE]; 08855 byte *ctr; 08856 byte scratch[AES_BLOCK_SIZE]; 08857 byte Tprime[AES_BLOCK_SIZE]; 08858 byte EKY0[AES_BLOCK_SIZE]; 08859 ctr = counter; 08860 08861 XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); 08862 if (ivSz == GCM_NONCE_MID_SZ) { 08863 XMEMCPY(initialCounter, iv, ivSz); 08864 initialCounter[AES_BLOCK_SIZE - 1] = 1; 08865 } 08866 else { 08867 GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); 08868 } 08869 XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); 08870 08871 /* Calc the authTag again using the received auth data and the cipher text */ 08872 GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); 08873 wc_AesEncrypt(aes, ctr, EKY0); 08874 xorbuf(Tprime, EKY0, sizeof(Tprime)); 08875 08876 if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { 08877 return AES_GCM_AUTH_E; 08878 } 08879 08880 #ifdef WOLFSSL_PIC32MZ_CRYPT 08881 if (blocks) { 08882 /* use intitial IV for PIC32 HW, but don't use it below */ 08883 XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); 08884 08885 ret = wc_Pic32AesCrypt( 08886 aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, 08887 out, in, (blocks * AES_BLOCK_SIZE), 08888 PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM); 08889 if (ret != 0) 08890 return ret; 08891 } 08892 /* process remainder using partial handling */ 08893 #endif 08894 08895 #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) 08896 /* some hardware acceleration can gain performance from doing AES encryption 08897 * of the whole buffer at once */ 08898 if (c != p) { /* can not handle inline decryption */ 08899 while (blocks--) { 08900 IncrementGcmCounter(ctr); 08901 XMEMCPY(p, ctr, AES_BLOCK_SIZE); 08902 p += AES_BLOCK_SIZE; 08903 } 08904 08905 /* reset number of blocks and then do encryption */ 08906 blocks = sz / AES_BLOCK_SIZE; 08907 wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); 08908 xorbuf(out, c, AES_BLOCK_SIZE * blocks); 08909 c += AES_BLOCK_SIZE * blocks; 08910 } 08911 else 08912 #endif /* HAVE_AES_ECB */ 08913 while (blocks--) { 08914 IncrementGcmCounter(ctr); 08915 #ifndef WOLFSSL_PIC32MZ_CRYPT 08916 wc_AesEncrypt(aes, ctr, scratch); 08917 xorbuf(scratch, c, AES_BLOCK_SIZE); 08918 XMEMCPY(p, scratch, AES_BLOCK_SIZE); 08919 #endif 08920 p += AES_BLOCK_SIZE; 08921 c += AES_BLOCK_SIZE; 08922 } 08923 08924 if (partial != 0) { 08925 IncrementGcmCounter(ctr); 08926 wc_AesEncrypt(aes, ctr, scratch); 08927 xorbuf(scratch, c, partial); 08928 XMEMCPY(p, scratch, partial); 08929 } 08930 08931 return ret; 08932 } 08933 08934 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, 08935 const byte* iv, word32 ivSz, 08936 const byte* authTag, word32 authTagSz, 08937 const byte* authIn, word32 authInSz) 08938 { 08939 #ifdef WOLFSSL_AESNI 08940 int res; 08941 #endif 08942 08943 /* argument checks */ 08944 /* If the sz is non-zero, both in and out must be set. If sz is 0, 08945 * in and out are don't cares, as this is is the GMAC case. */ 08946 if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || 08947 authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0) { 08948 08949 return BAD_FUNC_ARG; 08950 } 08951 08952 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) 08953 /* if async and byte count above threshold */ 08954 /* only 12-byte IV is supported in HW */ 08955 if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && 08956 sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) { 08957 #if defined(HAVE_CAVIUM) 08958 #ifdef HAVE_CAVIUM_V 08959 if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ 08960 return NitroxAesGcmDecrypt(aes, out, in, sz, 08961 (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, 08962 authTag, authTagSz, authIn, authInSz); 08963 } 08964 #endif 08965 #elif defined(HAVE_INTEL_QA) 08966 return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz, 08967 (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, 08968 authTag, authTagSz, authIn, authInSz); 08969 #else /* WOLFSSL_ASYNC_CRYPT_TEST */ 08970 if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_DECRYPT)) { 08971 WC_ASYNC_TEST* testDev = &aes->asyncDev.test; 08972 testDev->aes.aes = aes; 08973 testDev->aes.out = out; 08974 testDev->aes.in = in; 08975 testDev->aes.sz = sz; 08976 testDev->aes.iv = iv; 08977 testDev->aes.ivSz = ivSz; 08978 testDev->aes.authTag = (byte*)authTag; 08979 testDev->aes.authTagSz = authTagSz; 08980 testDev->aes.authIn = authIn; 08981 testDev->aes.authInSz = authInSz; 08982 return WC_PENDING_E; 08983 } 08984 #endif 08985 } 08986 #endif /* WOLFSSL_ASYNC_CRYPT */ 08987 08988 /* software AES GCM */ 08989 08990 #ifdef WOLFSSL_AESNI 08991 #ifdef HAVE_INTEL_AVX2 08992 if (IS_INTEL_AVX2(intel_flags)) { 08993 AES_GCM_decrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz, 08994 authTagSz, (byte*)aes->key, aes->rounds, &res); 08995 if (res == 0) 08996 return AES_GCM_AUTH_E; 08997 return 0; 08998 } 08999 else 09000 #endif 09001 #ifdef HAVE_INTEL_AVX1 09002 if (IS_INTEL_AVX1(intel_flags)) { 09003 AES_GCM_decrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, 09004 authTagSz, (byte*)aes->key, aes->rounds, &res); 09005 if (res == 0) 09006 return AES_GCM_AUTH_E; 09007 return 0; 09008 } 09009 else 09010 #endif 09011 if (haveAESNI) { 09012 AES_GCM_decrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz, 09013 authTagSz, (byte*)aes->key, aes->rounds, &res); 09014 if (res == 0) 09015 return AES_GCM_AUTH_E; 09016 return 0; 09017 } 09018 else 09019 #endif 09020 { 09021 return AES_GCM_decrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, 09022 authIn, authInSz); 09023 } 09024 } 09025 #endif 09026 #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ 09027 #endif /* (WOLFSSL_XILINX_CRYPT) */ 09028 #endif /* end of block for AESGCM implementation selection */ 09029 09030 09031 /* Common to all, abstract functions that build off of lower level AESGCM 09032 * functions */ 09033 #ifndef WC_NO_RNG 09034 09035 int wc_AesGcmSetExtIV(Aes* aes, const byte* iv, word32 ivSz) 09036 { 09037 int ret = 0; 09038 09039 if (aes == NULL || iv == NULL || 09040 (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ && 09041 ivSz != GCM_NONCE_MAX_SZ)) { 09042 09043 ret = BAD_FUNC_ARG; 09044 } 09045 09046 if (ret == 0) { 09047 XMEMCPY((byte*)aes->reg, iv, ivSz); 09048 09049 /* If the IV is 96, allow for a 2^64 invocation counter. 09050 * For any other size for the nonce, limit the invocation 09051 * counter to 32-bits. (SP 800-38D 8.3) */ 09052 aes->invokeCtr[0] = 0; 09053 aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; 09054 aes->nonceSz = ivSz; 09055 } 09056 09057 return ret; 09058 } 09059 09060 09061 int wc_AesGcmSetIV(Aes* aes, word32 ivSz, 09062 const byte* ivFixed, word32 ivFixedSz, 09063 WC_RNG* rng) 09064 { 09065 int ret = 0; 09066 09067 if (aes == NULL || rng == NULL || 09068 (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ && 09069 ivSz != GCM_NONCE_MAX_SZ) || 09070 (ivFixed == NULL && ivFixedSz != 0) || 09071 (ivFixed != NULL && ivFixedSz != AES_IV_FIXED_SZ)) { 09072 09073 ret = BAD_FUNC_ARG; 09074 } 09075 09076 if (ret == 0) { 09077 byte* iv = (byte*)aes->reg; 09078 09079 if (ivFixedSz) 09080 XMEMCPY(iv, ivFixed, ivFixedSz); 09081 09082 ret = wc_RNG_GenerateBlock(rng, iv + ivFixedSz, ivSz - ivFixedSz); 09083 } 09084 09085 if (ret == 0) { 09086 /* If the IV is 96, allow for a 2^64 invocation counter. 09087 * For any other size for the nonce, limit the invocation 09088 * counter to 32-bits. (SP 800-38D 8.3) */ 09089 aes->invokeCtr[0] = 0; 09090 aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; 09091 aes->nonceSz = ivSz; 09092 } 09093 09094 return ret; 09095 } 09096 09097 09098 int wc_AesGcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz, 09099 byte* ivOut, word32 ivOutSz, 09100 byte* authTag, word32 authTagSz, 09101 const byte* authIn, word32 authInSz) 09102 { 09103 int ret = 0; 09104 09105 if (aes == NULL || (sz != 0 && (in == NULL || out == NULL)) || 09106 ivOut == NULL || ivOutSz != aes->nonceSz || 09107 (authIn == NULL && authInSz != 0)) { 09108 09109 ret = BAD_FUNC_ARG; 09110 } 09111 09112 if (ret == 0) { 09113 aes->invokeCtr[0]++; 09114 if (aes->invokeCtr[0] == 0) { 09115 aes->invokeCtr[1]++; 09116 if (aes->invokeCtr[1] == 0) 09117 ret = AES_GCM_OVERFLOW_E; 09118 } 09119 } 09120 09121 if (ret == 0) { 09122 XMEMCPY(ivOut, aes->reg, ivOutSz); 09123 ret = wc_AesGcmEncrypt(aes, out, in, sz, 09124 (byte*)aes->reg, ivOutSz, 09125 authTag, authTagSz, 09126 authIn, authInSz); 09127 IncCtr((byte*)aes->reg, ivOutSz); 09128 } 09129 09130 return ret; 09131 } 09132 09133 int wc_Gmac(const byte* key, word32 keySz, byte* iv, word32 ivSz, 09134 const byte* authIn, word32 authInSz, 09135 byte* authTag, word32 authTagSz, WC_RNG* rng) 09136 { 09137 Aes aes; 09138 int ret = 0; 09139 09140 if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || 09141 authTag == NULL || authTagSz == 0 || rng == NULL) { 09142 09143 ret = BAD_FUNC_ARG; 09144 } 09145 09146 if (ret == 0) 09147 ret = wc_AesGcmSetKey(&aes, key, keySz); 09148 if (ret == 0) 09149 ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng); 09150 if (ret == 0) 09151 ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz, 09152 authTag, authTagSz, authIn, authInSz); 09153 ForceZero(&aes, sizeof(aes)); 09154 09155 return ret; 09156 } 09157 09158 int wc_GmacVerify(const byte* key, word32 keySz, 09159 const byte* iv, word32 ivSz, 09160 const byte* authIn, word32 authInSz, 09161 const byte* authTag, word32 authTagSz) 09162 { 09163 Aes aes; 09164 int ret = 0; 09165 09166 if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || 09167 authTag == NULL || authTagSz == 0 || authTagSz > AES_BLOCK_SIZE) { 09168 09169 ret = BAD_FUNC_ARG; 09170 } 09171 09172 if (ret == 0) 09173 ret = wc_AesGcmSetKey(&aes, key, keySz); 09174 if (ret == 0) 09175 ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz, 09176 authTag, authTagSz, authIn, authInSz); 09177 ForceZero(&aes, sizeof(aes)); 09178 09179 return ret; 09180 } 09181 09182 #endif /* WC_NO_RNG */ 09183 09184 09185 WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) 09186 { 09187 if (gmac == NULL || key == NULL) { 09188 return BAD_FUNC_ARG; 09189 } 09190 return wc_AesGcmSetKey(&gmac->aes, key, len); 09191 } 09192 09193 09194 WOLFSSL_API int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, 09195 const byte* authIn, word32 authInSz, 09196 byte* authTag, word32 authTagSz) 09197 { 09198 return wc_AesGcmEncrypt(&gmac->aes, NULL, NULL, 0, iv, ivSz, 09199 authTag, authTagSz, authIn, authInSz); 09200 } 09201 09202 #endif /* HAVE_AESGCM */ 09203 09204 09205 #ifdef HAVE_AESCCM 09206 09207 int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) 09208 { 09209 if (!((keySz == 16) || (keySz == 24) || (keySz == 32))) 09210 return BAD_FUNC_ARG; 09211 09212 return wc_AesSetKey(aes, key, keySz, NULL, AES_ENCRYPTION); 09213 } 09214 09215 #ifdef WOLFSSL_ARMASM 09216 /* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */ 09217 09218 #elif defined(HAVE_COLDFIRE_SEC) 09219 #error "Coldfire SEC doesn't currently support AES-CCM mode" 09220 09221 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) 09222 /* implemented in wolfcrypt/src/port/caam_aes.c */ 09223 09224 #elif defined(FREESCALE_LTC) 09225 09226 /* return 0 on success */ 09227 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, 09228 const byte* nonce, word32 nonceSz, 09229 byte* authTag, word32 authTagSz, 09230 const byte* authIn, word32 authInSz) 09231 { 09232 byte *key; 09233 uint32_t keySize; 09234 status_t status; 09235 09236 /* sanity check on arguments */ 09237 if (aes == NULL || out == NULL || in == NULL || nonce == NULL 09238 || authTag == NULL || nonceSz < 7 || nonceSz > 13) 09239 return BAD_FUNC_ARG; 09240 09241 key = (byte*)aes->key; 09242 09243 status = wc_AesGetKeySize(aes, &keySize); 09244 if (status != 0) { 09245 return status; 09246 } 09247 09248 status = LTC_AES_EncryptTagCcm(LTC_BASE, in, out, inSz, 09249 nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz); 09250 09251 return (kStatus_Success == status) ? 0 : BAD_FUNC_ARG; 09252 } 09253 09254 #ifdef HAVE_AES_DECRYPT 09255 int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, 09256 const byte* nonce, word32 nonceSz, 09257 const byte* authTag, word32 authTagSz, 09258 const byte* authIn, word32 authInSz) 09259 { 09260 byte *key; 09261 uint32_t keySize; 09262 status_t status; 09263 09264 /* sanity check on arguments */ 09265 if (aes == NULL || out == NULL || in == NULL || nonce == NULL 09266 || authTag == NULL || nonceSz < 7 || nonceSz > 13) 09267 return BAD_FUNC_ARG; 09268 09269 key = (byte*)aes->key; 09270 09271 status = wc_AesGetKeySize(aes, &keySize); 09272 if (status != 0) { 09273 return status; 09274 } 09275 09276 status = LTC_AES_DecryptTagCcm(LTC_BASE, in, out, inSz, 09277 nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz); 09278 09279 if (status == kStatus_Success) { 09280 return 0; 09281 } 09282 else { 09283 XMEMSET(out, 0, inSz); 09284 return AES_CCM_AUTH_E; 09285 } 09286 } 09287 #endif /* HAVE_AES_DECRYPT */ 09288 09289 09290 /* software AES CCM */ 09291 #else 09292 09293 static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out) 09294 { 09295 /* process the bulk of the data */ 09296 while (inSz >= AES_BLOCK_SIZE) { 09297 xorbuf(out, in, AES_BLOCK_SIZE); 09298 in += AES_BLOCK_SIZE; 09299 inSz -= AES_BLOCK_SIZE; 09300 09301 wc_AesEncrypt(aes, out, out); 09302 } 09303 09304 /* process remainder of the data */ 09305 if (inSz > 0) { 09306 xorbuf(out, in, inSz); 09307 wc_AesEncrypt(aes, out, out); 09308 } 09309 } 09310 09311 static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out) 09312 { 09313 word32 authLenSz; 09314 word32 remainder; 09315 09316 /* encode the length in */ 09317 if (inSz <= 0xFEFF) { 09318 authLenSz = 2; 09319 out[0] ^= ((inSz & 0xFF00) >> 8); 09320 out[1] ^= (inSz & 0x00FF); 09321 } 09322 else if (inSz <= 0xFFFFFFFF) { 09323 authLenSz = 6; 09324 out[0] ^= 0xFF; out[1] ^= 0xFE; 09325 out[2] ^= ((inSz & 0xFF000000) >> 24); 09326 out[3] ^= ((inSz & 0x00FF0000) >> 16); 09327 out[4] ^= ((inSz & 0x0000FF00) >> 8); 09328 out[5] ^= (inSz & 0x000000FF); 09329 } 09330 /* Note, the protocol handles auth data up to 2^64, but we are 09331 * using 32-bit sizes right now, so the bigger data isn't handled 09332 * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */ 09333 else 09334 return; 09335 09336 /* start fill out the rest of the first block */ 09337 remainder = AES_BLOCK_SIZE - authLenSz; 09338 if (inSz >= remainder) { 09339 /* plenty of bulk data to fill the remainder of this block */ 09340 xorbuf(out + authLenSz, in, remainder); 09341 inSz -= remainder; 09342 in += remainder; 09343 } 09344 else { 09345 /* not enough bulk data, copy what is available, and pad zero */ 09346 xorbuf(out + authLenSz, in, inSz); 09347 inSz = 0; 09348 } 09349 wc_AesEncrypt(aes, out, out); 09350 09351 if (inSz > 0) 09352 roll_x(aes, in, inSz, out); 09353 } 09354 09355 09356 static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz) 09357 { 09358 word32 i; 09359 09360 for (i = 0; i < lenSz; i++) { 09361 if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return; 09362 } 09363 } 09364 09365 /* return 0 on success */ 09366 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, 09367 const byte* nonce, word32 nonceSz, 09368 byte* authTag, word32 authTagSz, 09369 const byte* authIn, word32 authInSz) 09370 { 09371 byte A[AES_BLOCK_SIZE]; 09372 byte B[AES_BLOCK_SIZE]; 09373 byte lenSz; 09374 word32 i; 09375 byte mask = 0xFF; 09376 const word32 wordSz = (word32)sizeof(word32); 09377 09378 /* sanity check on arguments */ 09379 if (aes == NULL || out == NULL || in == NULL || nonce == NULL 09380 || authTag == NULL || nonceSz < 7 || nonceSz > 13 || 09381 authTagSz > AES_BLOCK_SIZE) 09382 return BAD_FUNC_ARG; 09383 09384 XMEMCPY(B+1, nonce, nonceSz); 09385 lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; 09386 B[0] = (authInSz > 0 ? 64 : 0) 09387 + (8 * (((byte)authTagSz - 2) / 2)) 09388 + (lenSz - 1); 09389 for (i = 0; i < lenSz; i++) { 09390 if (mask && i >= wordSz) 09391 mask = 0x00; 09392 B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; 09393 } 09394 09395 wc_AesEncrypt(aes, B, A); 09396 09397 if (authInSz > 0) 09398 roll_auth(aes, authIn, authInSz, A); 09399 if (inSz > 0) 09400 roll_x(aes, in, inSz, A); 09401 XMEMCPY(authTag, A, authTagSz); 09402 09403 B[0] = lenSz - 1; 09404 for (i = 0; i < lenSz; i++) 09405 B[AES_BLOCK_SIZE - 1 - i] = 0; 09406 wc_AesEncrypt(aes, B, A); 09407 xorbuf(authTag, A, authTagSz); 09408 09409 B[15] = 1; 09410 while (inSz >= AES_BLOCK_SIZE) { 09411 wc_AesEncrypt(aes, B, A); 09412 xorbuf(A, in, AES_BLOCK_SIZE); 09413 XMEMCPY(out, A, AES_BLOCK_SIZE); 09414 09415 AesCcmCtrInc(B, lenSz); 09416 inSz -= AES_BLOCK_SIZE; 09417 in += AES_BLOCK_SIZE; 09418 out += AES_BLOCK_SIZE; 09419 } 09420 if (inSz > 0) { 09421 wc_AesEncrypt(aes, B, A); 09422 xorbuf(A, in, inSz); 09423 XMEMCPY(out, A, inSz); 09424 } 09425 09426 ForceZero(A, AES_BLOCK_SIZE); 09427 ForceZero(B, AES_BLOCK_SIZE); 09428 09429 return 0; 09430 } 09431 09432 #ifdef HAVE_AES_DECRYPT 09433 int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, 09434 const byte* nonce, word32 nonceSz, 09435 const byte* authTag, word32 authTagSz, 09436 const byte* authIn, word32 authInSz) 09437 { 09438 byte A[AES_BLOCK_SIZE]; 09439 byte B[AES_BLOCK_SIZE]; 09440 byte* o; 09441 byte lenSz; 09442 word32 i, oSz; 09443 int result = 0; 09444 byte mask = 0xFF; 09445 const word32 wordSz = (word32)sizeof(word32); 09446 09447 /* sanity check on arguments */ 09448 if (aes == NULL || out == NULL || in == NULL || nonce == NULL 09449 || authTag == NULL || nonceSz < 7 || nonceSz > 13 || 09450 authTagSz > AES_BLOCK_SIZE) 09451 return BAD_FUNC_ARG; 09452 09453 o = out; 09454 oSz = inSz; 09455 XMEMCPY(B+1, nonce, nonceSz); 09456 lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; 09457 09458 B[0] = lenSz - 1; 09459 for (i = 0; i < lenSz; i++) 09460 B[AES_BLOCK_SIZE - 1 - i] = 0; 09461 B[15] = 1; 09462 09463 while (oSz >= AES_BLOCK_SIZE) { 09464 wc_AesEncrypt(aes, B, A); 09465 xorbuf(A, in, AES_BLOCK_SIZE); 09466 XMEMCPY(o, A, AES_BLOCK_SIZE); 09467 09468 AesCcmCtrInc(B, lenSz); 09469 oSz -= AES_BLOCK_SIZE; 09470 in += AES_BLOCK_SIZE; 09471 o += AES_BLOCK_SIZE; 09472 } 09473 if (inSz > 0) { 09474 wc_AesEncrypt(aes, B, A); 09475 xorbuf(A, in, oSz); 09476 XMEMCPY(o, A, oSz); 09477 } 09478 09479 for (i = 0; i < lenSz; i++) 09480 B[AES_BLOCK_SIZE - 1 - i] = 0; 09481 wc_AesEncrypt(aes, B, A); 09482 09483 o = out; 09484 oSz = inSz; 09485 09486 B[0] = (authInSz > 0 ? 64 : 0) 09487 + (8 * (((byte)authTagSz - 2) / 2)) 09488 + (lenSz - 1); 09489 for (i = 0; i < lenSz; i++) { 09490 if (mask && i >= wordSz) 09491 mask = 0x00; 09492 B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; 09493 } 09494 09495 wc_AesEncrypt(aes, B, A); 09496 09497 if (authInSz > 0) 09498 roll_auth(aes, authIn, authInSz, A); 09499 if (inSz > 0) 09500 roll_x(aes, o, oSz, A); 09501 09502 B[0] = lenSz - 1; 09503 for (i = 0; i < lenSz; i++) 09504 B[AES_BLOCK_SIZE - 1 - i] = 0; 09505 wc_AesEncrypt(aes, B, B); 09506 xorbuf(A, B, authTagSz); 09507 09508 if (ConstantCompare(A, authTag, authTagSz) != 0) { 09509 /* If the authTag check fails, don't keep the decrypted data. 09510 * Unfortunately, you need the decrypted data to calculate the 09511 * check value. */ 09512 XMEMSET(out, 0, inSz); 09513 result = AES_CCM_AUTH_E; 09514 } 09515 09516 ForceZero(A, AES_BLOCK_SIZE); 09517 ForceZero(B, AES_BLOCK_SIZE); 09518 o = NULL; 09519 09520 return result; 09521 } 09522 09523 #endif /* HAVE_AES_DECRYPT */ 09524 #endif /* software AES CCM */ 09525 09526 /* abstract functions that call lower level AESCCM functions */ 09527 #ifndef WC_NO_RNG 09528 09529 int wc_AesCcmSetNonce(Aes* aes, const byte* nonce, word32 nonceSz) 09530 { 09531 int ret = 0; 09532 09533 if (aes == NULL || nonce == NULL || 09534 nonceSz < CCM_NONCE_MIN_SZ || nonceSz > CCM_NONCE_MAX_SZ) { 09535 09536 ret = BAD_FUNC_ARG; 09537 } 09538 09539 if (ret == 0) { 09540 XMEMCPY(aes->reg, nonce, nonceSz); 09541 aes->nonceSz = nonceSz; 09542 09543 /* Invocation counter should be 2^61 */ 09544 aes->invokeCtr[0] = 0; 09545 aes->invokeCtr[1] = 0xE0000000; 09546 } 09547 09548 return ret; 09549 } 09550 09551 09552 int wc_AesCcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz, 09553 byte* ivOut, word32 ivOutSz, 09554 byte* authTag, word32 authTagSz, 09555 const byte* authIn, word32 authInSz) 09556 { 09557 int ret = 0; 09558 09559 if (aes == NULL || out == NULL || 09560 (in == NULL && sz != 0) || 09561 ivOut == NULL || 09562 (authIn == NULL && authInSz != 0) || 09563 (ivOutSz != aes->nonceSz)) { 09564 09565 ret = BAD_FUNC_ARG; 09566 } 09567 09568 if (ret == 0) { 09569 aes->invokeCtr[0]++; 09570 if (aes->invokeCtr[0] == 0) { 09571 aes->invokeCtr[1]++; 09572 if (aes->invokeCtr[1] == 0) 09573 ret = AES_CCM_OVERFLOW_E; 09574 } 09575 } 09576 09577 if (ret == 0) { 09578 ret = wc_AesCcmEncrypt(aes, out, in, sz, 09579 (byte*)aes->reg, aes->nonceSz, 09580 authTag, authTagSz, 09581 authIn, authInSz); 09582 XMEMCPY(ivOut, aes->reg, aes->nonceSz); 09583 IncCtr((byte*)aes->reg, aes->nonceSz); 09584 } 09585 09586 return ret; 09587 } 09588 09589 #endif /* WC_NO_RNG */ 09590 09591 #endif /* HAVE_AESCCM */ 09592 09593 09594 /* Initialize Aes for use with async hardware */ 09595 int wc_AesInit(Aes* aes, void* heap, int devId) 09596 { 09597 int ret = 0; 09598 09599 if (aes == NULL) 09600 return BAD_FUNC_ARG; 09601 09602 aes->heap = heap; 09603 09604 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) 09605 ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES, 09606 aes->heap, devId); 09607 #else 09608 (void)devId; 09609 #endif /* WOLFSSL_ASYNC_CRYPT */ 09610 09611 return ret; 09612 } 09613 09614 /* Free Aes from use with async hardware */ 09615 void wc_AesFree(Aes* aes) 09616 { 09617 if (aes == NULL) 09618 return; 09619 09620 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) 09621 wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES); 09622 #endif /* WOLFSSL_ASYNC_CRYPT */ 09623 } 09624 09625 09626 int wc_AesGetKeySize(Aes* aes, word32* keySize) 09627 { 09628 int ret = 0; 09629 09630 if (aes == NULL || keySize == NULL) { 09631 return BAD_FUNC_ARG; 09632 } 09633 09634 switch (aes->rounds) { 09635 #ifdef WOLFSSL_AES_128 09636 case 10: 09637 *keySize = 16; 09638 break; 09639 #endif 09640 #ifdef WOLFSSL_AES_192 09641 case 12: 09642 *keySize = 24; 09643 break; 09644 #endif 09645 #ifdef WOLFSSL_AES_256 09646 case 14: 09647 *keySize = 32; 09648 break; 09649 #endif 09650 default: 09651 *keySize = 0; 09652 ret = BAD_FUNC_ARG; 09653 } 09654 09655 return ret; 09656 } 09657 09658 #endif /* !WOLFSSL_TI_CRYPT */ 09659 09660 #ifdef HAVE_AES_ECB 09661 #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) 09662 /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ 09663 #else 09664 09665 /* software implementation */ 09666 int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 09667 { 09668 word32 blocks = sz / AES_BLOCK_SIZE; 09669 09670 if ((in == NULL) || (out == NULL) || (aes == NULL)) 09671 return BAD_FUNC_ARG; 09672 while (blocks>0) { 09673 wc_AesEncryptDirect(aes, out, in); 09674 out += AES_BLOCK_SIZE; 09675 in += AES_BLOCK_SIZE; 09676 sz -= AES_BLOCK_SIZE; 09677 blocks--; 09678 } 09679 return 0; 09680 } 09681 09682 09683 int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 09684 { 09685 word32 blocks = sz / AES_BLOCK_SIZE; 09686 09687 if ((in == NULL) || (out == NULL) || (aes == NULL)) 09688 return BAD_FUNC_ARG; 09689 while (blocks>0) { 09690 wc_AesDecryptDirect(aes, out, in); 09691 out += AES_BLOCK_SIZE; 09692 in += AES_BLOCK_SIZE; 09693 sz -= AES_BLOCK_SIZE; 09694 blocks--; 09695 } 09696 return 0; 09697 } 09698 #endif 09699 #endif /* HAVE_AES_ECB */ 09700 09701 #ifdef WOLFSSL_AES_CFB 09702 /* CFB 128 09703 * 09704 * aes structure holding key to use for encryption 09705 * out buffer to hold result of encryption (must be at least as large as input 09706 * buffer) 09707 * in buffer to encrypt 09708 * sz size of input buffer 09709 * 09710 * returns 0 on success and negative error values on failure 09711 */ 09712 int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) 09713 { 09714 byte* tmp = NULL; 09715 byte* reg = NULL; 09716 09717 WOLFSSL_ENTER("wc_AesCfbEncrypt"); 09718 09719 if (aes == NULL || out == NULL || in == NULL) { 09720 return BAD_FUNC_ARG; 09721 } 09722 09723 if (aes->left && sz) { 09724 reg = (byte*)aes->reg + AES_BLOCK_SIZE - aes->left; 09725 } 09726 09727 /* consume any unused bytes left in aes->tmp */ 09728 tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; 09729 while (aes->left && sz) { 09730 *(out++) = *(reg++) = *(in++) ^ *(tmp++); 09731 aes->left--; 09732 sz--; 09733 } 09734 09735 while (sz >= AES_BLOCK_SIZE) { 09736 wc_AesEncryptDirect(aes, out, (byte*)aes->reg); 09737 xorbuf(out, in, AES_BLOCK_SIZE); 09738 XMEMCPY(aes->reg, out, AES_BLOCK_SIZE); 09739 out += AES_BLOCK_SIZE; 09740 in += AES_BLOCK_SIZE; 09741 sz -= AES_BLOCK_SIZE; 09742 aes->left = 0; 09743 } 09744 09745 /* encrypt left over data */ 09746 if (sz) { 09747 wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); 09748 aes->left = AES_BLOCK_SIZE; 09749 tmp = (byte*)aes->tmp; 09750 reg = (byte*)aes->reg; 09751 09752 while (sz--) { 09753 *(out++) = *(reg++) = *(in++) ^ *(tmp++); 09754 aes->left--; 09755 } 09756 } 09757 09758 return 0; 09759 } 09760 09761 09762 #ifdef HAVE_AES_DECRYPT 09763 /* CFB 128 09764 * 09765 * aes structure holding key to use for decryption 09766 * out buffer to hold result of decryption (must be at least as large as input 09767 * buffer) 09768 * in buffer to decrypt 09769 * sz size of input buffer 09770 * 09771 * returns 0 on success and negative error values on failure 09772 */ 09773 int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) 09774 { 09775 byte* tmp; 09776 09777 WOLFSSL_ENTER("wc_AesCfbDecrypt"); 09778 09779 if (aes == NULL || out == NULL || in == NULL) { 09780 return BAD_FUNC_ARG; 09781 } 09782 09783 /* check if more input needs copied over to aes->reg */ 09784 if (aes->left && sz) { 09785 int size = min(aes->left, sz); 09786 XMEMCPY((byte*)aes->reg + AES_BLOCK_SIZE - aes->left, in, size); 09787 } 09788 09789 /* consume any unused bytes left in aes->tmp */ 09790 tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; 09791 while (aes->left && sz) { 09792 *(out++) = *(in++) ^ *(tmp++); 09793 aes->left--; 09794 sz--; 09795 } 09796 09797 while (sz > AES_BLOCK_SIZE) { 09798 wc_AesEncryptDirect(aes, out, (byte*)aes->reg); 09799 xorbuf(out, in, AES_BLOCK_SIZE); 09800 XMEMCPY(aes->reg, in, AES_BLOCK_SIZE); 09801 out += AES_BLOCK_SIZE; 09802 in += AES_BLOCK_SIZE; 09803 sz -= AES_BLOCK_SIZE; 09804 aes->left = 0; 09805 } 09806 09807 /* decrypt left over data */ 09808 if (sz) { 09809 wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); 09810 XMEMCPY(aes->reg, in, sz); 09811 aes->left = AES_BLOCK_SIZE; 09812 tmp = (byte*)aes->tmp; 09813 09814 while (sz--) { 09815 *(out++) = *(in++) ^ *(tmp++); 09816 aes->left--; 09817 } 09818 } 09819 09820 return 0; 09821 } 09822 #endif /* HAVE_AES_DECRYPT */ 09823 #endif /* WOLFSSL_AES_CFB */ 09824 09825 09826 #ifdef HAVE_AES_KEYWRAP 09827 09828 /* Initialize key wrap counter with value */ 09829 static WC_INLINE void InitKeyWrapCounter(byte* inOutCtr, word32 value) 09830 { 09831 int i; 09832 word32 bytes; 09833 09834 bytes = sizeof(word32); 09835 for (i = 0; i < (int)sizeof(word32); i++) { 09836 inOutCtr[i+sizeof(word32)] = (value >> ((bytes - 1) * 8)) & 0xFF; 09837 bytes--; 09838 } 09839 } 09840 09841 /* Increment key wrap counter */ 09842 static WC_INLINE void IncrementKeyWrapCounter(byte* inOutCtr) 09843 { 09844 int i; 09845 09846 /* in network byte order so start at end and work back */ 09847 for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) { 09848 if (++inOutCtr[i]) /* we're done unless we overflow */ 09849 return; 09850 } 09851 } 09852 09853 /* Decrement key wrap counter */ 09854 static WC_INLINE void DecrementKeyWrapCounter(byte* inOutCtr) 09855 { 09856 int i; 09857 09858 for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) { 09859 if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */ 09860 return; 09861 } 09862 } 09863 09864 /* perform AES key wrap (RFC3394), return out sz on success, negative on err */ 09865 int wc_AesKeyWrap(const byte* key, word32 keySz, const byte* in, word32 inSz, 09866 byte* out, word32 outSz, const byte* iv) 09867 { 09868 Aes aes; 09869 byte* r; 09870 word32 i; 09871 int ret, j; 09872 09873 byte t[KEYWRAP_BLOCK_SIZE]; 09874 byte tmp[AES_BLOCK_SIZE]; 09875 09876 /* n must be at least 2, output size is n + 8 bytes */ 09877 if (key == NULL || in == NULL || inSz < 2 || 09878 out == NULL || outSz < (inSz + KEYWRAP_BLOCK_SIZE)) 09879 return BAD_FUNC_ARG; 09880 09881 /* input must be multiple of 64-bits */ 09882 if (inSz % KEYWRAP_BLOCK_SIZE != 0) 09883 return BAD_FUNC_ARG; 09884 09885 /* user IV is optional */ 09886 if (iv == NULL) { 09887 XMEMSET(tmp, 0xA6, KEYWRAP_BLOCK_SIZE); 09888 } else { 09889 XMEMCPY(tmp, iv, KEYWRAP_BLOCK_SIZE); 09890 } 09891 09892 r = out + 8; 09893 XMEMCPY(r, in, inSz); 09894 XMEMSET(t, 0, sizeof(t)); 09895 09896 ret = wc_AesInit(&aes, NULL, INVALID_DEVID); 09897 if (ret != 0) 09898 return ret; 09899 09900 ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_ENCRYPTION); 09901 if (ret != 0) 09902 return ret; 09903 09904 for (j = 0; j <= 5; j++) { 09905 for (i = 1; i <= inSz / KEYWRAP_BLOCK_SIZE; i++) { 09906 09907 /* load R[i] */ 09908 XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE); 09909 09910 wc_AesEncryptDirect(&aes, tmp, tmp); 09911 09912 /* calculate new A */ 09913 IncrementKeyWrapCounter(t); 09914 xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE); 09915 09916 /* save R[i] */ 09917 XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE); 09918 r += KEYWRAP_BLOCK_SIZE; 09919 } 09920 r = out + KEYWRAP_BLOCK_SIZE; 09921 } 09922 09923 /* C[0] = A */ 09924 XMEMCPY(out, tmp, KEYWRAP_BLOCK_SIZE); 09925 09926 wc_AesFree(&aes); 09927 09928 return inSz + KEYWRAP_BLOCK_SIZE; 09929 } 09930 09931 int wc_AesKeyUnWrap(const byte* key, word32 keySz, const byte* in, word32 inSz, 09932 byte* out, word32 outSz, const byte* iv) 09933 { 09934 Aes aes; 09935 byte* r; 09936 word32 i, n; 09937 int ret, j; 09938 09939 byte t[KEYWRAP_BLOCK_SIZE]; 09940 byte tmp[AES_BLOCK_SIZE]; 09941 09942 const byte* expIv; 09943 const byte defaultIV[] = { 09944 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6 09945 }; 09946 09947 (void)iv; 09948 09949 if (key == NULL || in == NULL || inSz < 3 || 09950 out == NULL || outSz < (inSz - KEYWRAP_BLOCK_SIZE)) 09951 return BAD_FUNC_ARG; 09952 09953 /* input must be multiple of 64-bits */ 09954 if (inSz % KEYWRAP_BLOCK_SIZE != 0) 09955 return BAD_FUNC_ARG; 09956 09957 /* user IV optional */ 09958 if (iv != NULL) { 09959 expIv = iv; 09960 } else { 09961 expIv = defaultIV; 09962 } 09963 09964 /* A = C[0], R[i] = C[i] */ 09965 XMEMCPY(tmp, in, KEYWRAP_BLOCK_SIZE); 09966 XMEMCPY(out, in + KEYWRAP_BLOCK_SIZE, inSz - KEYWRAP_BLOCK_SIZE); 09967 XMEMSET(t, 0, sizeof(t)); 09968 09969 ret = wc_AesInit(&aes, NULL, INVALID_DEVID); 09970 if (ret != 0) 09971 return ret; 09972 09973 ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_DECRYPTION); 09974 if (ret != 0) 09975 return ret; 09976 09977 /* initialize counter to 6n */ 09978 n = (inSz - 1) / KEYWRAP_BLOCK_SIZE; 09979 InitKeyWrapCounter(t, 6 * n); 09980 09981 for (j = 5; j >= 0; j--) { 09982 for (i = n; i >= 1; i--) { 09983 09984 /* calculate A */ 09985 xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE); 09986 DecrementKeyWrapCounter(t); 09987 09988 /* load R[i], starting at end of R */ 09989 r = out + ((i - 1) * KEYWRAP_BLOCK_SIZE); 09990 XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE); 09991 wc_AesDecryptDirect(&aes, tmp, tmp); 09992 09993 /* save R[i] */ 09994 XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE); 09995 } 09996 } 09997 09998 wc_AesFree(&aes); 09999 10000 /* verify IV */ 10001 if (XMEMCMP(tmp, expIv, KEYWRAP_BLOCK_SIZE) != 0) 10002 return BAD_KEYWRAP_IV_E; 10003 10004 return inSz - KEYWRAP_BLOCK_SIZE; 10005 } 10006 10007 #endif /* HAVE_AES_KEYWRAP */ 10008 10009 #ifdef WOLFSSL_AES_XTS 10010 10011 /* Galios Field to use */ 10012 #define GF_XTS 0x87 10013 10014 /* This is to help with setting keys to correct encrypt or decrypt type. 10015 * 10016 * tweak AES key for tweak in XTS 10017 * aes AES key for encrypt/decrypt process 10018 * key buffer holding aes key | tweak key 10019 * len length of key buffer in bytes. Should be twice that of key size. i.e. 10020 * 32 for a 16 byte key. 10021 * dir direction, either AES_ENCRYPTION or AES_DECRYPTION 10022 * heap heap hint to use for memory. Can be NULL 10023 * devId id to use with async crypto. Can be 0 10024 * 10025 * Note: is up to user to call wc_AesFree on tweak and aes key when done. 10026 * 10027 * return 0 on success 10028 */ 10029 int wc_AesXtsSetKey(XtsAes* aes, const byte* key, word32 len, int dir, 10030 void* heap, int devId) 10031 { 10032 word32 keySz; 10033 int ret = 0; 10034 10035 if (aes == NULL || key == NULL) { 10036 return BAD_FUNC_ARG; 10037 } 10038 10039 if ((ret = wc_AesInit(&aes->tweak, heap, devId)) != 0) { 10040 return ret; 10041 } 10042 if ((ret = wc_AesInit(&aes->aes, heap, devId)) != 0) { 10043 return ret; 10044 } 10045 10046 keySz = len/2; 10047 if (keySz != 16 && keySz != 32) { 10048 WOLFSSL_MSG("Unsupported key size"); 10049 return WC_KEY_SIZE_E; 10050 } 10051 10052 if ((ret = wc_AesSetKey(&aes->aes, key, keySz, NULL, dir)) == 0) { 10053 ret = wc_AesSetKey(&aes->tweak, key + keySz, keySz, NULL, 10054 AES_ENCRYPTION); 10055 if (ret != 0) { 10056 wc_AesFree(&aes->aes); 10057 } 10058 } 10059 10060 return ret; 10061 } 10062 10063 10064 /* This is used to free up resources used by Aes structs 10065 * 10066 * aes AES keys to free 10067 * 10068 * return 0 on success 10069 */ 10070 int wc_AesXtsFree(XtsAes* aes) 10071 { 10072 if (aes != NULL) { 10073 wc_AesFree(&aes->aes); 10074 wc_AesFree(&aes->tweak); 10075 } 10076 10077 return 0; 10078 } 10079 10080 10081 /* Same process as wc_AesXtsEncrypt but uses a word64 type as the tweak value 10082 * instead of a byte array. This just converts the word64 to a byte array and 10083 * calls wc_AesXtsEncrypt. 10084 * 10085 * aes AES keys to use for block encrypt/decrypt 10086 * out output buffer to hold cipher text 10087 * in input plain text buffer to encrypt 10088 * sz size of both out and in buffers 10089 * sector value to use for tweak 10090 * 10091 * returns 0 on success 10092 */ 10093 int wc_AesXtsEncryptSector(XtsAes* aes, byte* out, const byte* in, 10094 word32 sz, word64 sector) 10095 { 10096 byte* pt; 10097 byte i[AES_BLOCK_SIZE]; 10098 10099 XMEMSET(i, 0, AES_BLOCK_SIZE); 10100 #ifdef BIG_ENDIAN_ORDER 10101 sector = ByteReverseWord64(sector); 10102 #endif 10103 pt = (byte*)§or; 10104 XMEMCPY(i, pt, sizeof(word64)); 10105 10106 return wc_AesXtsEncrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE); 10107 } 10108 10109 10110 /* Same process as wc_AesXtsDecrypt but uses a word64 type as the tweak value 10111 * instead of a byte array. This just converts the word64 to a byte array. 10112 * 10113 * aes AES keys to use for block encrypt/decrypt 10114 * out output buffer to hold plain text 10115 * in input cipher text buffer to encrypt 10116 * sz size of both out and in buffers 10117 * sector value to use for tweak 10118 * 10119 * returns 0 on success 10120 */ 10121 int wc_AesXtsDecryptSector(XtsAes* aes, byte* out, const byte* in, word32 sz, 10122 word64 sector) 10123 { 10124 byte* pt; 10125 byte i[AES_BLOCK_SIZE]; 10126 10127 XMEMSET(i, 0, AES_BLOCK_SIZE); 10128 #ifdef BIG_ENDIAN_ORDER 10129 sector = ByteReverseWord64(sector); 10130 #endif 10131 pt = (byte*)§or; 10132 XMEMCPY(i, pt, sizeof(word64)); 10133 10134 return wc_AesXtsDecrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE); 10135 } 10136 10137 #ifdef HAVE_AES_ECB 10138 /* helper function for encrypting / decrypting full buffer at once */ 10139 static int _AesXtsHelper(Aes* aes, byte* out, const byte* in, word32 sz, int dir) 10140 { 10141 word32 outSz = sz; 10142 word32 totalSz = (sz / AES_BLOCK_SIZE) * AES_BLOCK_SIZE; /* total bytes */ 10143 byte* pt = out; 10144 10145 outSz -= AES_BLOCK_SIZE; 10146 10147 while (outSz > 0) { 10148 word32 j; 10149 byte carry = 0; 10150 10151 /* multiply by shift left and propogate carry */ 10152 for (j = 0; j < AES_BLOCK_SIZE && outSz > 0; j++, outSz--) { 10153 byte tmpC; 10154 10155 tmpC = (pt[j] >> 7) & 0x01; 10156 pt[j+AES_BLOCK_SIZE] = ((pt[j] << 1) + carry) & 0xFF; 10157 carry = tmpC; 10158 } 10159 if (carry) { 10160 pt[AES_BLOCK_SIZE] ^= GF_XTS; 10161 } 10162 10163 pt += AES_BLOCK_SIZE; 10164 } 10165 10166 xorbuf(out, in, totalSz); 10167 if (dir == AES_ENCRYPTION) { 10168 return wc_AesEcbEncrypt(aes, out, out, totalSz); 10169 } 10170 else { 10171 return wc_AesEcbDecrypt(aes, out, out, totalSz); 10172 } 10173 } 10174 #endif /* HAVE_AES_ECB */ 10175 10176 10177 /* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. 10178 * 10179 * xaes AES keys to use for block encrypt/decrypt 10180 * out output buffer to hold cipher text 10181 * in input plain text buffer to encrypt 10182 * sz size of both out and in buffers 10183 * i value to use for tweak 10184 * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input 10185 * adds a sanity check on how the user calls the function. 10186 * 10187 * returns 0 on success 10188 */ 10189 int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, 10190 const byte* i, word32 iSz) 10191 { 10192 int ret = 0; 10193 word32 blocks = (sz / AES_BLOCK_SIZE); 10194 Aes *aes, *tweak; 10195 10196 if (xaes == NULL || out == NULL || in == NULL) { 10197 return BAD_FUNC_ARG; 10198 } 10199 10200 aes = &xaes->aes; 10201 tweak = &xaes->tweak; 10202 10203 if (iSz < AES_BLOCK_SIZE) { 10204 return BAD_FUNC_ARG; 10205 } 10206 10207 if (blocks > 0) { 10208 byte tmp[AES_BLOCK_SIZE]; 10209 10210 XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES 10211 * key setup passed to encrypt direct*/ 10212 10213 wc_AesEncryptDirect(tweak, tmp, i); 10214 10215 #ifdef HAVE_AES_ECB 10216 /* encrypt all of buffer at once when possible */ 10217 if (in != out) { /* can not handle inline */ 10218 XMEMCPY(out, tmp, AES_BLOCK_SIZE); 10219 if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) { 10220 return ret; 10221 } 10222 } 10223 #endif 10224 10225 while (blocks > 0) { 10226 word32 j; 10227 byte carry = 0; 10228 byte buf[AES_BLOCK_SIZE]; 10229 10230 #ifdef HAVE_AES_ECB 10231 if (in == out) { /* check for if inline */ 10232 #endif 10233 XMEMCPY(buf, in, AES_BLOCK_SIZE); 10234 xorbuf(buf, tmp, AES_BLOCK_SIZE); 10235 wc_AesEncryptDirect(aes, out, buf); 10236 #ifdef HAVE_AES_ECB 10237 } 10238 #endif 10239 xorbuf(out, tmp, AES_BLOCK_SIZE); 10240 10241 /* multiply by shift left and propogate carry */ 10242 for (j = 0; j < AES_BLOCK_SIZE; j++) { 10243 byte tmpC; 10244 10245 tmpC = (tmp[j] >> 7) & 0x01; 10246 tmp[j] = ((tmp[j] << 1) + carry) & 0xFF; 10247 carry = tmpC; 10248 } 10249 if (carry) { 10250 tmp[0] ^= GF_XTS; 10251 } 10252 10253 in += AES_BLOCK_SIZE; 10254 out += AES_BLOCK_SIZE; 10255 sz -= AES_BLOCK_SIZE; 10256 blocks--; 10257 } 10258 10259 /* stealing operation of XTS to handle left overs */ 10260 if (sz > 0) { 10261 byte buf[AES_BLOCK_SIZE]; 10262 10263 XMEMCPY(buf, out - AES_BLOCK_SIZE, AES_BLOCK_SIZE); 10264 if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ 10265 return BUFFER_E; 10266 } 10267 XMEMCPY(out, buf, sz); 10268 XMEMCPY(buf, in, sz); 10269 10270 xorbuf(buf, tmp, AES_BLOCK_SIZE); 10271 wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); 10272 xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE); 10273 } 10274 } 10275 else { 10276 WOLFSSL_MSG("Plain text input too small for encryption"); 10277 return BAD_FUNC_ARG; 10278 } 10279 10280 return ret; 10281 } 10282 10283 10284 /* Same process as encryption but Aes key is AES_DECRYPTION type. 10285 * 10286 * xaes AES keys to use for block encrypt/decrypt 10287 * out output buffer to hold plain text 10288 * in input cipher text buffer to decrypt 10289 * sz size of both out and in buffers 10290 * i value to use for tweak 10291 * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input 10292 * adds a sanity check on how the user calls the function. 10293 * 10294 * returns 0 on success 10295 */ 10296 int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, 10297 const byte* i, word32 iSz) 10298 { 10299 int ret = 0; 10300 word32 blocks = (sz / AES_BLOCK_SIZE); 10301 Aes *aes, *tweak; 10302 10303 if (xaes == NULL || out == NULL || in == NULL) { 10304 return BAD_FUNC_ARG; 10305 } 10306 10307 aes = &xaes->aes; 10308 tweak = &xaes->tweak; 10309 10310 if (iSz < AES_BLOCK_SIZE) { 10311 return BAD_FUNC_ARG; 10312 } 10313 10314 if (blocks > 0) { 10315 word32 j; 10316 byte carry = 0; 10317 byte tmp[AES_BLOCK_SIZE]; 10318 byte stl = (sz % AES_BLOCK_SIZE); 10319 10320 XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES 10321 * key setup passed to decrypt direct*/ 10322 10323 wc_AesEncryptDirect(tweak, tmp, i); 10324 10325 /* if Stealing then break out of loop one block early to handle special 10326 * case */ 10327 if (stl > 0) { 10328 blocks--; 10329 } 10330 10331 #ifdef HAVE_AES_ECB 10332 /* decrypt all of buffer at once when possible */ 10333 if (in != out) { /* can not handle inline */ 10334 XMEMCPY(out, tmp, AES_BLOCK_SIZE); 10335 if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) { 10336 return ret; 10337 } 10338 } 10339 #endif 10340 10341 while (blocks > 0) { 10342 byte buf[AES_BLOCK_SIZE]; 10343 10344 #ifdef HAVE_AES_ECB 10345 if (in == out) { /* check for if inline */ 10346 #endif 10347 XMEMCPY(buf, in, AES_BLOCK_SIZE); 10348 xorbuf(buf, tmp, AES_BLOCK_SIZE); 10349 wc_AesDecryptDirect(aes, out, buf); 10350 #ifdef HAVE_AES_ECB 10351 } 10352 #endif 10353 xorbuf(out, tmp, AES_BLOCK_SIZE); 10354 10355 /* multiply by shift left and propogate carry */ 10356 for (j = 0; j < AES_BLOCK_SIZE; j++) { 10357 byte tmpC; 10358 10359 tmpC = (tmp[j] >> 7) & 0x01; 10360 tmp[j] = ((tmp[j] << 1) + carry) & 0xFF; 10361 carry = tmpC; 10362 } 10363 if (carry) { 10364 tmp[0] ^= GF_XTS; 10365 } 10366 carry = 0; 10367 10368 in += AES_BLOCK_SIZE; 10369 out += AES_BLOCK_SIZE; 10370 sz -= AES_BLOCK_SIZE; 10371 blocks--; 10372 } 10373 10374 /* stealing operation of XTS to handle left overs */ 10375 if (sz > 0) { 10376 byte buf[AES_BLOCK_SIZE]; 10377 byte tmp2[AES_BLOCK_SIZE]; 10378 10379 /* multiply by shift left and propogate carry */ 10380 for (j = 0; j < AES_BLOCK_SIZE; j++) { 10381 byte tmpC; 10382 10383 tmpC = (tmp[j] >> 7) & 0x01; 10384 tmp2[j] = ((tmp[j] << 1) + carry) & 0xFF; 10385 carry = tmpC; 10386 } 10387 if (carry) { 10388 tmp2[0] ^= GF_XTS; 10389 } 10390 10391 XMEMCPY(buf, in, AES_BLOCK_SIZE); 10392 xorbuf(buf, tmp2, AES_BLOCK_SIZE); 10393 wc_AesDecryptDirect(aes, out, buf); 10394 xorbuf(out, tmp2, AES_BLOCK_SIZE); 10395 10396 /* tmp2 holds partial | last */ 10397 XMEMCPY(tmp2, out, AES_BLOCK_SIZE); 10398 in += AES_BLOCK_SIZE; 10399 out += AES_BLOCK_SIZE; 10400 sz -= AES_BLOCK_SIZE; 10401 10402 /* Make buffer with end of cipher text | last */ 10403 XMEMCPY(buf, tmp2, AES_BLOCK_SIZE); 10404 if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ 10405 return BUFFER_E; 10406 } 10407 XMEMCPY(buf, in, sz); 10408 XMEMCPY(out, tmp2, sz); 10409 10410 xorbuf(buf, tmp, AES_BLOCK_SIZE); 10411 wc_AesDecryptDirect(aes, tmp2, buf); 10412 xorbuf(tmp2, tmp, AES_BLOCK_SIZE); 10413 XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); 10414 } 10415 } 10416 else { 10417 WOLFSSL_MSG("Plain text input too small for encryption"); 10418 return BAD_FUNC_ARG; 10419 } 10420 10421 return ret; 10422 } 10423 10424 #endif /* WOLFSSL_AES_XTS */ 10425 10426 #endif /* HAVE_FIPS */ 10427 #endif /* !NO_AES */ 10428
Generated on Tue Jul 12 2022 16:58:03 by
1.7.2