wolfSSL SSL/TLS library, support up to TLS1.3
Dependents: CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more
Diff: wolfcrypt/src/aes.c
- Revision:
- 16:8e0d178b1d1e
- Parent:
- 15:117db924cf7c
--- a/wolfcrypt/src/aes.c Sat Aug 18 22:20:43 2018 +0000 +++ b/wolfcrypt/src/aes.c Thu Jun 04 23:57:22 2020 +0000 @@ -1,6 +1,6 @@ /* aes.c * - * Copyright (C) 2006-2017 wolfSSL Inc. + * Copyright (C) 2006-2020 wolfSSL Inc. * * This file is part of wolfSSL. * @@ -29,6 +29,8 @@ #if !defined(NO_AES) +/* Tip: Locate the software cipher modes by searching for "Software AES" */ + #if defined(HAVE_FIPS) && \ defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) @@ -44,6 +46,10 @@ #include <wolfssl/wolfcrypt/aes.h> #include <wolfssl/wolfcrypt/cpuid.h> +#ifdef WOLF_CRYPTO_CB + #include <wolfssl/wolfcrypt/cryptocb.h> +#endif + /* fips wrapper calls, user can call direct */ #if defined(HAVE_FIPS) && \ @@ -136,9 +142,9 @@ byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { - if (aes == NULL || authTagSz > AES_BLOCK_SIZE - || authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || - ivSz > AES_BLOCK_SIZE) { + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || + authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || + ivSz == 0 || ivSz > AES_BLOCK_SIZE) { return BAD_FUNC_ARG; } @@ -154,7 +160,7 @@ { if (aes == NULL || out == NULL || in == NULL || iv == NULL || authTag == NULL || authTagSz > AES_BLOCK_SIZE || - ivSz > AES_BLOCK_SIZE) { + ivSz == 0 || ivSz > AES_BLOCK_SIZE) { return BAD_FUNC_ARG; } @@ -227,11 +233,14 @@ #endif /* HAVE_AES_DECRYPT */ #endif /* HAVE_AESCCM && HAVE_FIPS_VERSION 2 */ - int wc_AesInit(Aes* aes, void* h, int i) + int wc_AesInit(Aes* aes, void* h, int i) { - (void)aes; + if (aes == NULL) + return BAD_FUNC_ARG; + (void)h; (void)i; + /* FIPS doesn't support: return AesInit(aes, h, i); */ return 0; @@ -279,106 +288,64 @@ /* Define AES implementation includes and functions */ #if defined(STM32_CRYPTO) - /* STM32F2/F4 hardware AES support for CBC, CTR modes */ - - #ifdef WOLFSSL_STM32L4 - #define CRYP AES - #endif - - /* CRYPT_AES_GCM starts the IV with 2 */ - #define STM32_GCM_IV_START 2 + /* STM32F2/F4/F7/L4 hardware AES support for ECB, CBC, CTR and GCM modes */ #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) || defined(HAVE_AESCCM) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) { int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; - - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch(aes->rounds) { - case 10: /* 128-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_128B; - break; - #ifdef CRYP_KEYSIZE_192B - case 12: /* 192-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_192B; - break; - #endif - case 14: /* 256-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; - } - hcryp.Instance = CRYP; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.pKey = (uint8_t*)aes->key; - + #else + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + #endif + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_ECB; + #endif HAL_CRYP_Init(&hcryp); - if (HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, - outBlock, STM32_HAL_TIMEOUT) != HAL_OK) { - ret = WC_TIMEOUT_E; - } - - HAL_CRYP_DeInit(&hcryp); + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE, + (uint32_t*)outBlock, STM32_HAL_TIMEOUT); #else - word32 *enc_key; - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - - enc_key = aes->key; - - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); + ret = HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); + + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; /* reset registers to their default values */ CRYP_DeInit(); - /* load key into correct registers */ - switch (aes->rounds) { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; - break; - - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; - break; - - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; - break; - - default: - break; - } - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); - - /* set direction, mode, and datatype */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + /* setup key */ + CRYP_KeyInit(&keyInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); @@ -402,6 +369,7 @@ /* disable crypto processor */ CRYP_Cmd(DISABLE); #endif /* WOLFSSL_STM32_CUBEMX */ + return ret; } #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ @@ -413,103 +381,64 @@ int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; - - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch(aes->rounds) { - case 10: /* 128-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_128B; - break; - #ifdef CRYP_KEYSIZE_192B - case 12: /* 192-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_192B; - break; - #endif - case 14: /* 256-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; - } - hcryp.Instance = CRYP; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.pKey = (uint8_t*)aes->key; - + #else + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + #endif + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_ECB; + #endif HAL_CRYP_Init(&hcryp); - if (HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, - outBlock, STM32_HAL_TIMEOUT) != HAL_OK) { - ret = WC_TIMEOUT_E; - } - - HAL_CRYP_DeInit(&hcryp); + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE, + (uint32_t*)outBlock, STM32_HAL_TIMEOUT); #else - word32 *enc_key; - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - - enc_key = aes->key; - - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); + ret = HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); + + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; /* reset registers to their default values */ CRYP_DeInit(); - /* load key into correct registers */ - switch (aes->rounds) { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; - break; - - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; - break; - - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; - break; - - default: - break; - } - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); - - /* set direction, key, and datatype */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + /* set direction and key */ + CRYP_KeyInit(&keyInit); + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); - /* wait until decrypt key has been intialized */ + /* wait until decrypt key has been initialized */ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} - /* set direction, mode, and datatype */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); @@ -533,6 +462,7 @@ /* disable crypto processor */ CRYP_Cmd(DISABLE); #endif /* WOLFSSL_STM32_CUBEMX */ + return ret; } #endif /* WOLFSSL_AES_DIRECT || HAVE_AESCCM */ @@ -659,6 +589,24 @@ #error nRF51 AES Hardware does not support decrypt #endif /* HAVE_AES_DECRYPT */ +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_esp32AesEncrypt(aes, inBlock, outBlock); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_esp32AesDecrypt(aes, inBlock, outBlock); + } + #endif #elif defined(WOLFSSL_AESNI) @@ -672,10 +620,12 @@ #define AESNI_ALIGN 16 #endif - #ifndef _MSC_VER + #ifdef _MSC_VER + #define XASM_LINK(f) + #elif defined(__APPLE__) + #define XASM_LINK(f) asm("_" f) + #else #define XASM_LINK(f) asm(f) - #else - #define XASM_LINK(f) #endif /* _MSC_VER */ static int checkAESNI = 0; @@ -811,15 +761,164 @@ } #endif /* HAVE_AES_DECRYPT */ -#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) +#elif (defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)) || \ + ((defined(WOLFSSL_AFALG) || defined(WOLFSSL_DEVCRYPTO_AES)) && \ + defined(HAVE_AESCCM)) static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) { wc_AesEncryptDirect(aes, outBlock, inBlock); return 0; } + +#elif defined(WOLFSSL_AFALG) +#elif defined(WOLFSSL_DEVCRYPTO_AES) + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + #include "hal_data.h" + + #ifndef WOLFSSL_SCE_AES256_HANDLE + #define WOLFSSL_SCE_AES256_HANDLE g_sce_aes_256 + #endif + + #ifndef WOLFSSL_SCE_AES192_HANDLE + #define WOLFSSL_SCE_AES192_HANDLE g_sce_aes_192 + #endif + + #ifndef WOLFSSL_SCE_AES128_HANDLE + #define WOLFSSL_SCE_AES128_HANDLE g_sce_aes_128 + #endif + + static int AES_ECB_encrypt(Aes* aes, const byte* inBlock, byte* outBlock, + int sz) + { + uint32_t ret; + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + + switch (aes->keylen) { + #ifdef WOLFSSL_AES_128 + case AES_128_KEY_SIZE: + ret = WOLFSSL_SCE_AES128_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES_192_KEY_SIZE: + ret = WOLFSSL_SCE_AES192_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES_256_KEY_SIZE: + ret = WOLFSSL_SCE_AES256_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + default: + WOLFSSL_MSG("Unknown key size"); + return BAD_FUNC_ARG; + } + + if (ret != SSP_SUCCESS) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + return WC_HW_E; + } + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz); + if (inBlock != outBlock) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + } + return 0; + } + + #if defined(HAVE_AES_DECRYPT) + static int AES_ECB_decrypt(Aes* aes, const byte* inBlock, byte* outBlock, + int sz) + { + uint32_t ret; + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + + switch (aes->keylen) { + #ifdef WOLFSSL_AES_128 + case AES_128_KEY_SIZE: + ret = WOLFSSL_SCE_AES128_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES_192_KEY_SIZE: + ret = WOLFSSL_SCE_AES192_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES_256_KEY_SIZE: + ret = WOLFSSL_SCE_AES256_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + default: + WOLFSSL_MSG("Unknown key size"); + return BAD_FUNC_ARG; + } + if (ret != SSP_SUCCESS) { + return WC_HW_E; + } + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz); + if (inBlock != outBlock) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + } + + return 0; + } + + #endif + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + } + #endif #else - /* using wolfCrypt software AES implementation */ + /* using wolfCrypt software implementation */ #define NEED_AES_TABLES #endif @@ -834,6 +933,7 @@ /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ }; +#ifndef WOLFSSL_AES_SMALL_TABLES static const word32 Te[4][256] = { { 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, @@ -1369,8 +1469,12 @@ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, } }; - - +#endif /* HAVE_AES_DECRYPT */ +#endif + +#ifdef HAVE_AES_DECRYPT +#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) \ + || defined(WOLFSSL_AES_DIRECT) static const byte Td4[256] = { 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, @@ -1406,11 +1510,67 @@ 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU, }; +#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */ #endif /* HAVE_AES_DECRYPT */ #define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y)))) - +#ifdef WOLFSSL_AES_SMALL_TABLES +static const byte Tsbox[256] = { + 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U, + 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U, + 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U, + 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U, + 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU, + 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U, + 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU, + 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U, + 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U, + 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U, + 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU, + 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU, + 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U, + 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U, + 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U, + 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U, + 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U, + 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U, + 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U, + 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU, + 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU, + 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U, + 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U, + 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U, + 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U, + 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU, + 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU, + 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU, + 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U, + 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU, + 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U, + 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U +}; + +#define AES_XTIME(x) ((byte)((byte)((x) << 1) ^ ((0 - ((x) >> 7)) & 0x1b))) + +static word32 col_mul(word32 t, int i2, int i3, int ia, int ib) +{ + byte t3 = GETBYTE(t, i3); + byte tm = AES_XTIME(GETBYTE(t, i2) ^ t3); + + return GETBYTE(t, ia) ^ GETBYTE(t, ib) ^ t3 ^ tm; +} + +static word32 inv_col_mul(word32 t, int i9, int ib, int id, int ie) +{ + byte t9 = GETBYTE(t, i9); + byte tb = GETBYTE(t, ib); + byte td = GETBYTE(t, id); + byte te = GETBYTE(t, ie); + byte t0 = t9 ^ tb ^ td; + return t0 ^ AES_XTIME(AES_XTIME(AES_XTIME(t0 ^ te) ^ td ^ te) ^ tb ^ te); +} +#endif #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) @@ -1425,6 +1585,7 @@ #endif +#ifndef WOLFSSL_AES_SMALL_TABLES /* load 4 Te Tables into cache by cache line stride */ static WC_INLINE word32 PreFetchTe(void) { @@ -1439,8 +1600,21 @@ } return x; } - - +#else +/* load sbox into cache by cache line stride */ +static WC_INLINE word32 PreFetchSBox(void) +{ + word32 x = 0; + int i; + + for (i = 0; i < 256; i += WC_CACHE_LINE_SZ/4) { + x &= Tsbox[i]; + } + return x; +} +#endif + +/* Software AES - ECB Encrypt */ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) { word32 s0, s1, s2, s3; @@ -1450,7 +1624,7 @@ if (r > 7 || r == 0) { WOLFSSL_MSG("AesEncrypt encountered improper key, set it up"); - return; /* stop instead of segfaulting, set up your keys! */ + return; /* stop instead of seg-faulting, set up your keys! */ } #ifdef WOLFSSL_AESNI @@ -1475,8 +1649,8 @@ tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE); - AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, (byte*)aes->key, - aes->rounds); + AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, + (byte*)aes->key, aes->rounds); XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE); XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); return; @@ -1497,6 +1671,10 @@ #endif } #endif +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + return; +#endif /* * map byte array block to cipher state @@ -1514,11 +1692,13 @@ s3 = ByteReverseWord32(s3); #endif + /* AddRoundKey */ s0 ^= rk[0]; s1 ^= rk[1]; s2 ^= rk[2]; s3 ^= rk[3]; +#ifndef WOLFSSL_AES_SMALL_TABLES s0 |= PreFetchTe(); /* @@ -1527,28 +1707,28 @@ for (;;) { t0 = - Te[0][GETBYTE(s0, 3)] ^ - Te[1][GETBYTE(s1, 2)] ^ - Te[2][GETBYTE(s2, 1)] ^ - Te[3][GETBYTE(s3, 0)] ^ + Te[0][GETBYTE(s0, 3)] ^ + Te[1][GETBYTE(s1, 2)] ^ + Te[2][GETBYTE(s2, 1)] ^ + Te[3][GETBYTE(s3, 0)] ^ rk[4]; t1 = - Te[0][GETBYTE(s1, 3)] ^ - Te[1][GETBYTE(s2, 2)] ^ - Te[2][GETBYTE(s3, 1)] ^ - Te[3][GETBYTE(s0, 0)] ^ + Te[0][GETBYTE(s1, 3)] ^ + Te[1][GETBYTE(s2, 2)] ^ + Te[2][GETBYTE(s3, 1)] ^ + Te[3][GETBYTE(s0, 0)] ^ rk[5]; t2 = Te[0][GETBYTE(s2, 3)] ^ - Te[1][GETBYTE(s3, 2)] ^ - Te[2][GETBYTE(s0, 1)] ^ - Te[3][GETBYTE(s1, 0)] ^ + Te[1][GETBYTE(s3, 2)] ^ + Te[2][GETBYTE(s0, 1)] ^ + Te[3][GETBYTE(s1, 0)] ^ rk[6]; t3 = Te[0][GETBYTE(s3, 3)] ^ - Te[1][GETBYTE(s0, 2)] ^ - Te[2][GETBYTE(s1, 1)] ^ - Te[3][GETBYTE(s2, 0)] ^ + Te[1][GETBYTE(s0, 2)] ^ + Te[2][GETBYTE(s1, 1)] ^ + Te[3][GETBYTE(s2, 0)] ^ rk[7]; rk += 8; @@ -1611,6 +1791,84 @@ (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^ (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^ rk[3]; +#else + s0 |= PreFetchSBox(); + + r *= 2; + /* Two rounds at a time */ + for (rk += 4; r > 1; r--, rk += 4) { + t0 = + ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s3, 0)]); + t1 = + ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s0, 0)]); + t2 = + ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s1, 0)]); + t3 = + ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s2, 0)]); + + s0 = + (col_mul(t0, 3, 2, 0, 1) << 24) ^ + (col_mul(t0, 2, 1, 0, 3) << 16) ^ + (col_mul(t0, 1, 0, 2, 3) << 8) ^ + (col_mul(t0, 0, 3, 2, 1) ) ^ + rk[0]; + s1 = + (col_mul(t1, 3, 2, 0, 1) << 24) ^ + (col_mul(t1, 2, 1, 0, 3) << 16) ^ + (col_mul(t1, 1, 0, 2, 3) << 8) ^ + (col_mul(t1, 0, 3, 2, 1) ) ^ + rk[1]; + s2 = + (col_mul(t2, 3, 2, 0, 1) << 24) ^ + (col_mul(t2, 2, 1, 0, 3) << 16) ^ + (col_mul(t2, 1, 0, 2, 3) << 8) ^ + (col_mul(t2, 0, 3, 2, 1) ) ^ + rk[2]; + s3 = + (col_mul(t3, 3, 2, 0, 1) << 24) ^ + (col_mul(t3, 2, 1, 0, 3) << 16) ^ + (col_mul(t3, 1, 0, 2, 3) << 8) ^ + (col_mul(t3, 0, 3, 2, 1) ) ^ + rk[3]; + } + + t0 = + ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s3, 0)]); + t1 = + ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s0, 0)]); + t2 = + ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s1, 0)]); + t3 = + ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s2, 0)]); + s0 = t0 ^ rk[0]; + s1 = t1 ^ rk[1]; + s2 = t2 ^ rk[2]; + s3 = t3 ^ rk[3]; +#endif /* write out */ #ifdef LITTLE_ENDIAN_ORDER @@ -1629,8 +1887,10 @@ #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */ #if defined(HAVE_AES_DECRYPT) -#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) - +#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) || \ + defined(WOLFSSL_AES_DIRECT) + +#ifndef WOLFSSL_AES_SMALL_TABLES /* load 4 Td Tables into cache by cache line stride */ static WC_INLINE word32 PreFetchTd(void) { @@ -1645,6 +1905,7 @@ } return x; } +#endif /* load Td Table4 into cache by cache line stride */ static WC_INLINE word32 PreFetchTd4(void) @@ -1658,6 +1919,7 @@ return x; } +/* Software AES - ECB Decrypt */ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) { word32 s0, s1, s2, s3; @@ -1667,7 +1929,7 @@ const word32* rk = aes->key; if (r > 7 || r == 0) { WOLFSSL_MSG("AesDecrypt encountered improper key, set it up"); - return; /* stop instead of segfaulting, set up your keys! */ + return; /* stop instead of seg-faulting, set up your keys! */ } #ifdef WOLFSSL_AESNI if (haveAESNI && aes->use_aesni) { @@ -1681,7 +1943,8 @@ #endif /* if input and output same will overwrite input iv */ - XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE); + if ((const byte*)aes->tmp != inBlock) + XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE); AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key, aes->rounds); return; @@ -1692,6 +1955,9 @@ #endif } #endif /* WOLFSSL_AESNI */ +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); +#endif /* * map byte array block to cipher state @@ -1714,6 +1980,7 @@ s2 ^= rk[2]; s3 ^= rk[3]; +#ifndef WOLFSSL_AES_SMALL_TABLES s0 |= PreFetchTd(); /* @@ -1807,6 +2074,83 @@ ((word32)Td4[GETBYTE(t1, 1)] << 8) ^ ((word32)Td4[GETBYTE(t0, 0)]) ^ rk[3]; +#else + s0 |= PreFetchTd4(); + + r *= 2; + for (rk += 4; r > 1; r--, rk += 4) { + t0 = + ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s1, 0)]) ^ + rk[0]; + t1 = + ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s2, 0)]) ^ + rk[1]; + t2 = + ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s3, 0)]) ^ + rk[2]; + t3 = + ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s0, 0)]) ^ + rk[3]; + + s0 = + (inv_col_mul(t0, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t0, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t0, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t0, 1, 3, 2, 0) ); + s1 = + (inv_col_mul(t1, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t1, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t1, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t1, 1, 3, 2, 0) ); + s2 = + (inv_col_mul(t2, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t2, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t2, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t2, 1, 3, 2, 0) ); + s3 = + (inv_col_mul(t3, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t3, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t3, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t3, 1, 3, 2, 0) ); + } + + t0 = + ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s1, 0)]); + t1 = + ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s2, 0)]); + t2 = + ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s3, 0)]); + t3 = + ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s0, 0)]); + s0 = t0 ^ rk[0]; + s1 = t1 ^ rk[1]; + s2 = t2 ^ rk[2]; + s3 = t3 ^ rk[3]; +#endif /* write out */ #ifdef LITTLE_ENDIAN_ORDER @@ -1834,20 +2178,27 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { - word32 *rk = aes->key; + word32 *rk; (void)dir; - if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) + if (aes == NULL || (keylen != 16 && + #ifdef WOLFSSL_AES_192 + keylen != 24 && + #endif + keylen != 32)) { return BAD_FUNC_ARG; - + } + + rk = aes->key; aes->keylen = keylen; aes->rounds = keylen/4 + 6; XMEMCPY(rk, userKey, keylen); - #ifndef WOLFSSL_STM32_CUBEMX + #if !defined(WOLFSSL_STM32_CUBEMX) || defined(STM32_HAL_V2) ByteReverseWords(rk, rk, keylen); #endif - #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; #endif @@ -1920,7 +2271,8 @@ if (iv) XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); - #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; #endif @@ -1930,13 +2282,14 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { - if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) + if (aes == NULL || !((keylen == 16) || (keylen == 24) || (keylen == 32))) return BAD_FUNC_ARG; aes->rounds = keylen/4 + 6; XMEMCPY(aes->key, userKey, keylen); - #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; #endif @@ -1953,34 +2306,65 @@ const byte* iv, int dir) { int ret; - byte *rk = (byte*)aes->key; + byte* rk; + byte* tmpKey = (byte*)userKey; + int tmpKeyDynamic = 0; + word32 alignOffset = 0; (void)dir; if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) return BAD_FUNC_ARG; - + if (aes == NULL) + return BAD_FUNC_ARG; + + rk = (byte*)aes->key; if (rk == NULL) return BAD_FUNC_ARG; - #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; #endif aes->rounds = keylen/4 + 6; + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)userKey % WOLFSSL_MMCAU_ALIGNMENT) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp = (byte*)XMALLOC(keylen + WOLFSSL_MMCAU_ALIGNMENT, + aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + alignOffset = WOLFSSL_MMCAU_ALIGNMENT - + ((wolfssl_word)tmp % WOLFSSL_MMCAU_ALIGNMENT); + tmpKey = tmp + alignOffset; + XMEMCPY(tmpKey, userKey, keylen); + tmpKeyDynamic = 1; + #else + WOLFSSL_MSG("Bad cau_aes_set_key alignment"); + return BAD_ALIGN_E; + #endif + } + #endif + ret = wolfSSL_CryptHwMutexLock(); if(ret == 0) { #ifdef FREESCALE_MMCAU_CLASSIC - cau_aes_set_key(userKey, keylen*8, rk); + cau_aes_set_key(tmpKey, keylen*8, rk); #else - MMCAU_AES_SetKey(userKey, keylen, rk); + MMCAU_AES_SetKey(tmpKey, keylen, rk); #endif wolfSSL_CryptHwMutexUnLock(); ret = wc_AesSetIV(aes, iv); } + if (tmpKeyDynamic == 1) { + XFREE(tmpKey - alignOffset, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + return ret; } @@ -1999,14 +2383,15 @@ (void)dir; (void)iv; - if (keylen != 16) + if (aes == NULL || keylen != 16) return BAD_FUNC_ARG; aes->keylen = keylen; aes->rounds = keylen/4 + 6; ret = nrf51_aes_set_key(userKey); - #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; #endif @@ -2018,11 +2403,122 @@ { return wc_AesSetKey(aes, userKey, keylen, iv, dir); } +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + (void)dir; + (void)iv; + + if (aes == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) { + return BAD_FUNC_ARG; + } + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + + XMEMCPY(aes->key, userKey, keylen); + #if defined(WOLFSSL_AES_COUNTER) + aes->left = 0; + #endif + return wc_AesSetIV(aes, iv); + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } +#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, + int dir) + { + SaSiError_t ret = SASI_OK; + SaSiAesIv_t iv_aes; + + if (aes == NULL || + (keylen != AES_128_KEY_SIZE && + keylen != AES_192_KEY_SIZE && + keylen != AES_256_KEY_SIZE)) { + return BAD_FUNC_ARG; + } + #if defined(AES_MAX_KEY_SIZE) + if (keylen > (AES_MAX_KEY_SIZE/8)) { + return BAD_FUNC_ARG; + } + #endif + if (dir != AES_ENCRYPTION && + dir != AES_DECRYPTION) { + return BAD_FUNC_ARG; + } + + if (dir == AES_ENCRYPTION) { + aes->ctx.mode = SASI_AES_ENCRYPT; + SaSi_AesInit(&aes->ctx.user_ctx, + SASI_AES_ENCRYPT, + SASI_AES_MODE_CBC, + SASI_AES_PADDING_NONE); + } + else { + aes->ctx.mode = SASI_AES_DECRYPT; + SaSi_AesInit(&aes->ctx.user_ctx, + SASI_AES_DECRYPT, + SASI_AES_MODE_CBC, + SASI_AES_PADDING_NONE); + } + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + XMEMCPY(aes->key, userKey, keylen); + + aes->ctx.key.pKey = (uint8_t*)aes->key; + aes->ctx.key.keySize= keylen; + + ret = SaSi_AesSetKey(&aes->ctx.user_ctx, + SASI_AES_USER_KEY, + &aes->ctx.key, + sizeof(aes->ctx.key)); + if (ret != SASI_OK) { + return BAD_FUNC_ARG; + } + + ret = wc_AesSetIV(aes, iv); + + if (iv) + XMEMCPY(iv_aes, iv, AES_BLOCK_SIZE); + else + XMEMSET(iv_aes, 0, AES_BLOCK_SIZE); + + + ret = SaSi_AesSetIv(&aes->ctx.user_ctx, iv_aes); + if (ret != SASI_OK) { + return ret; + } + return ret; + } + #if defined(WOLFSSL_AES_DIRECT) + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } + #endif #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypto/devcrypto_aes.c */ + #else + + /* Software AES - SetKey */ static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { @@ -2035,7 +2531,8 @@ #ifdef WOLFSSL_AESNI aes->use_aesni = 0; #endif /* WOLFSSL_AESNI */ - #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; #endif @@ -2043,12 +2540,13 @@ aes->rounds = (keylen/4) + 6; XMEMCPY(rk, userKey, keylen); - #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) + #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) && \ + (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)) ByteReverseWords(rk, rk, keylen); #endif #ifdef NEED_AES_TABLES - switch (keylen) { #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \ defined(WOLFSSL_AES_128) @@ -2057,10 +2555,17 @@ { temp = rk[3]; rk[4] = rk[0] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif rcon[i]; rk[5] = rk[1] ^ rk[4]; rk[6] = rk[2] ^ rk[5]; @@ -2080,10 +2585,17 @@ { temp = rk[ 5]; rk[ 6] = rk[ 0] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif rcon[i]; rk[ 7] = rk[ 1] ^ rk[ 6]; rk[ 8] = rk[ 2] ^ rk[ 7]; @@ -2104,10 +2616,17 @@ { temp = rk[ 7]; rk[ 8] = rk[ 0] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif rcon[i]; rk[ 9] = rk[ 1] ^ rk[ 8]; rk[10] = rk[ 2] ^ rk[ 9]; @@ -2116,10 +2635,17 @@ break; temp = rk[11]; rk[12] = rk[ 4] ^ + #ifndef WOLFSSL_AES_SMALL_TABLES (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^ (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^ (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^ (Te[1][GETBYTE(temp, 0)] & 0x000000ff); + #else + ((word32)Tsbox[GETBYTE(temp, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 0)]); + #endif rk[13] = rk[ 5] ^ rk[12]; rk[14] = rk[ 6] ^ rk[13]; rk[15] = rk[ 7] ^ rk[14]; @@ -2133,7 +2659,7 @@ return BAD_FUNC_ARG; } /* switch */ - #ifdef HAVE_AES_DECRYPT + #if defined(HAVE_AES_DECRYPT) if (dir == AES_DECRYPTION) { unsigned int j; rk = aes->key; @@ -2145,6 +2671,7 @@ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; } + #if !defined(WOLFSSL_AES_SMALL_TABLES) /* apply the inverse MixColumn transform to all round keys but the first and the last: */ for (i = 1; i < aes->rounds; i++) { @@ -2170,12 +2697,21 @@ Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^ Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff]; } + #endif } #else (void)dir; #endif /* HAVE_AES_DECRYPT */ + (void)temp; #endif /* NEED_AES_TABLES */ +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + XMEMCPY((byte*)aes->key, userKey, keylen); + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords(aes->key, aes->key, 32); + } +#endif + return wc_AesSetIV(aes, iv); } @@ -2217,13 +2753,16 @@ aes->keylen = keylen; aes->rounds = keylen/4 + 6; - #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) - if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) { - XMEMCPY(aes->asyncKey, userKey, keylen); - if (iv) - XMEMCPY(aes->asyncIv, iv, AES_BLOCK_SIZE); - } - #endif /* WOLFSSL_ASYNC_CRYPT */ + #if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)) + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) + #endif + { + XMEMCPY(aes->devKey, userKey, keylen); + } + #endif #ifdef WOLFSSL_AESNI if (checkAESNI == 0) { @@ -2231,12 +2770,15 @@ checkAESNI = 1; } if (haveAESNI) { - #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) + #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; #endif /* WOLFSSL_AES_COUNTER */ aes->use_aesni = 1; if (iv) XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + else + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); if (dir == AES_ENCRYPTION) return AES_set_encrypt_key(userKey, keylen * 8, aes); #ifdef HAVE_AES_DECRYPT @@ -2248,6 +2790,10 @@ ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); + #if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + aes->ctx.cfd = -1; + #endif #ifdef WOLFSSL_IMX6_CAAM_BLOB ForceZero(local, sizeof(local)); #endif @@ -2339,19 +2885,60 @@ #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + #elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + + #elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + + #elif defined(STM32_CRYPTO) + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + if (wolfSSL_CryptHwMutexLock() == 0) { + wc_AesEncrypt(aes, in, out); + wolfSSL_CryptHwMutexUnLock(); + } + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + if (wolfSSL_CryptHwMutexLock() == 0) { + wc_AesDecrypt(aes, in, out); + wolfSSL_CryptHwMutexUnLock(); + } + } + #endif /* HAVE_AES_DECRYPT */ + + #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesEncrypt(aes, in, out); + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesDecrypt(aes, in, out); + } + #endif /* HAVE_AES_DECRYPT */ #else /* Allow direct access to one block encrypt */ void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) { wc_AesEncrypt(aes, in, out); } - #ifdef HAVE_AES_DECRYPT + #ifdef HAVE_AES_DECRYPT /* Allow direct access to one block decrypt */ void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) { wc_AesDecrypt(aes, in, out); } - #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_DECRYPT */ #endif /* AES direct block */ #endif /* WOLFSSL_AES_DIRECT */ @@ -2367,32 +2954,38 @@ word32 blocks = (sz / AES_BLOCK_SIZE); CRYP_HandleTypeDef hcryp; - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch (aes->rounds) { - case 10: /* 128-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_128B; - break; - #ifdef CRYP_KEYSIZE_192B - case 12: /* 192-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_192B; - break; - #endif - case 14: /* 256-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; - } - hcryp.Instance = CRYP; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.pKey = (uint8_t*)aes->key; - hcryp.Init.pInitVect = (uint8_t*)aes->reg; - + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CBC; + ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE); + #endif + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; HAL_CRYP_Init(&hcryp); while (blocks--) { - if (HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, - out, STM32_HAL_TIMEOUT) != HAL_OK) { + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { ret = WC_TIMEOUT_E; break; } @@ -2407,6 +3000,8 @@ HAL_CRYP_DeInit(&hcryp); + wolfSSL_CryptHwMutexUnLock(); + return ret; } #ifdef HAVE_AES_DECRYPT @@ -2416,33 +3011,44 @@ word32 blocks = (sz / AES_BLOCK_SIZE); CRYP_HandleTypeDef hcryp; - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch (aes->rounds) { - case 10: /* 128-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_128B; - break; - #ifdef CRYP_KEYSIZE_192B - case 12: /* 192-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_192B; - break; - #endif - case 14: /* 256-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; - } - hcryp.Instance = CRYP; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.pKey = (uint8_t*)aes->key; - hcryp.Init.pInitVect = (uint8_t*)aes->reg; - + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* if input and output same will overwrite input iv */ + XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_KEYDERIVATION_DECRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CBC; + ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE); + #endif + + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; HAL_CRYP_Init(&hcryp); while (blocks--) { - if (HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, - out, STM32_HAL_TIMEOUT) != HAL_OK) { + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { ret = WC_TIMEOUT_E; + break; } /* store iv for next call */ @@ -2453,80 +3059,51 @@ } HAL_CRYP_DeInit(&hcryp); + wolfSSL_CryptHwMutexUnLock(); return ret; } #endif /* HAVE_AES_DECRYPT */ -#else + +#else /* STD_PERI_LIB */ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - word32 *enc_key, *iv; + int ret; + word32 *iv; word32 blocks = (sz / AES_BLOCK_SIZE); - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; - - enc_key = aes->key; - iv = aes->reg; - - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); - CRYP_IVStructInit(&AES_CRYP_IVInitStructure); + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } /* reset registers to their default values */ CRYP_DeInit(); - /* load key into correct registers */ - switch (aes->rounds) { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; - break; - - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; - break; - - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; - break; - - default: - break; - } - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); + /* set key */ + CRYP_KeyInit(&keyInit); /* set iv */ + iv = aes->reg; + CRYP_IVStructInit(&ivInit); ByteReverseWords(iv, iv, AES_BLOCK_SIZE); - AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; - AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; - AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2]; - AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3]; - CRYP_IVInit(&AES_CRYP_IVInitStructure); - - /* set direction, mode, and datatype */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + ivInit.CRYP_IV0Left = iv[0]; + ivInit.CRYP_IV0Right = iv[1]; + ivInit.CRYP_IV1Left = iv[2]; + ivInit.CRYP_IV1Right = iv[3]; + CRYP_IVInit(&ivInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); @@ -2558,26 +3135,29 @@ /* disable crypto processor */ CRYP_Cmd(DISABLE); - - return 0; + wolfSSL_CryptHwMutexUnLock(); + + return ret; } #ifdef HAVE_AES_DECRYPT int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - word32 *dec_key, *iv; + int ret; + word32 *iv; word32 blocks = (sz / AES_BLOCK_SIZE); - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; - - dec_key = aes->key; - iv = aes->reg; - - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); - CRYP_IVStructInit(&AES_CRYP_IVInitStructure); + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } /* if input and output same will overwrite input iv */ XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); @@ -2585,48 +3165,11 @@ /* reset registers to their default values */ CRYP_DeInit(); - /* load key into correct registers */ - switch (aes->rounds) { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[3]; - break; - - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[5]; - break; - - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = dec_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = dec_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[7]; - break; - - default: - break; - } - - /* set direction, mode, and datatype for key preparation */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_32b; - CRYP_Init(&AES_CRYP_InitStructure); - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); + /* set direction and key */ + CRYP_KeyInit(&keyInit); + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); @@ -2634,20 +3177,20 @@ /* wait until key has been prepared */ while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} - /* set direction, mode, and datatype for decryption */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; + CRYP_Init(&cryptInit); /* set iv */ + iv = aes->reg; + CRYP_IVStructInit(&ivInit); ByteReverseWords(iv, iv, AES_BLOCK_SIZE); - - AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; - AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; - AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2]; - AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3]; - CRYP_IVInit(&AES_CRYP_IVInitStructure); + ivInit.CRYP_IV0Left = iv[0]; + ivInit.CRYP_IV0Right = iv[1]; + ivInit.CRYP_IV1Left = iv[2]; + ivInit.CRYP_IV1Right = iv[3]; + CRYP_IVInit(&ivInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); @@ -2678,8 +3221,9 @@ /* disable crypto processor */ CRYP_Cmd(DISABLE); - - return 0; + wolfSSL_CryptHwMutexUnLock(); + + return ret; } #endif /* HAVE_AES_DECRYPT */ #endif /* WOLFSSL_STM32_CUBEMX */ @@ -2808,6 +3352,12 @@ status = LTC_AES_EncryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, iv, enc_key, keySize); + + /* store iv for next call */ + if (status == kStatus_Success) { + XMEMCPY(iv, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + return (status == kStatus_Success) ? 0 : -1; } @@ -2818,6 +3368,7 @@ status_t status; byte* iv, *dec_key; word32 blocks = (sz / AES_BLOCK_SIZE); + byte temp_block[AES_BLOCK_SIZE]; iv = (byte*)aes->reg; dec_key = (byte*)aes->key; @@ -2827,8 +3378,17 @@ return status; } + /* get IV for next call */ + XMEMCPY(temp_block, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + status = LTC_AES_DecryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, iv, dec_key, keySize, kLTC_EncryptKey); + + /* store IV for next call */ + if (status == kStatus_Success) { + XMEMCPY(iv, temp_block, AES_BLOCK_SIZE); + } + return (status == kStatus_Success) ? 0 : -1; } #endif /* HAVE_AES_DECRYPT */ @@ -2939,12 +3499,38 @@ return ret; } #endif /* HAVE_AES_DECRYPT */ - +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return wc_esp32AesCbcEncrypt(aes, out, in, sz); + } + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return wc_esp32AesCbcDecrypt(aes, out, in, sz); + } +#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out); + } + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out); + } #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_CBC) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + #else + /* Software AES - CBC Encrypt */ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { word32 blocks = (sz / AES_BLOCK_SIZE); @@ -2953,6 +3539,14 @@ return BAD_FUNC_ARG; } + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesCbcEncrypt(aes, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) /* if async and byte count above threshold */ if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && @@ -2961,8 +3555,8 @@ return NitroxAesCbcEncrypt(aes, out, in, sz); #elif defined(HAVE_INTEL_QA) return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz, - (const byte*)aes->asyncKey, aes->keylen, - (const byte*)aes->asyncIv, AES_BLOCK_SIZE); + (const byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); #else /* WOLFSSL_ASYNC_CRYPT_TEST */ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_ENCRYPT)) { WC_ASYNC_TEST* testDev = &aes->asyncDev.test; @@ -3034,6 +3628,7 @@ } #ifdef HAVE_AES_DECRYPT + /* Software AES - CBC Decrypt */ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { word32 blocks; @@ -3043,6 +3638,14 @@ return BAD_FUNC_ARG; } + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesCbcDecrypt(aes, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) /* if async and byte count above threshold */ if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && @@ -3051,8 +3654,8 @@ return NitroxAesCbcDecrypt(aes, out, in, sz); #elif defined(HAVE_INTEL_QA) return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz, - (const byte*)aes->asyncKey, aes->keylen, - (const byte*)aes->asyncIv, AES_BLOCK_SIZE); + (const byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); #else /* WOLFSSL_ASYNC_CRYPT_TEST */ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_DECRYPT)) { WC_ASYNC_TEST* testDev = &aes->asyncDev.test; @@ -3101,6 +3704,7 @@ XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE); wc_AesDecrypt(aes, (byte*)aes->tmp, out); xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE); + /* store iv for next call */ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); out += AES_BLOCK_SIZE; @@ -3126,101 +3730,83 @@ int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; - - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch (aes->rounds) { - case 10: /* 128-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_128B; - break; - #ifdef CRYP_KEYSIZE_192B - case 12: /* 192-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_192B; - break; - #endif - case 14: /* 256-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; + #ifdef STM32_HAL_V2 + word32 iv[AES_BLOCK_SIZE/sizeof(word32)]; + #endif + #else + word32 *iv; + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; } - hcryp.Instance = CRYP; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.pKey = (byte*)aes->key; - hcryp.Init.pInitVect = (byte*)aes->reg; - + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CTR; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CTR; + ByteReverseWords(iv, aes->reg, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)iv; + #else + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + #endif HAL_CRYP_Init(&hcryp); - if (HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE, out, - STM32_HAL_TIMEOUT) != HAL_OK) { - /* failed */ + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (byte*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { ret = WC_TIMEOUT_E; } - HAL_CRYP_DeInit(&hcryp); #else /* STD_PERI_LIB */ - word32 *enc_key, *iv; - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; - - enc_key = aes->key; - iv = aes->reg; - - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); - CRYP_IVStructInit(&AES_CRYP_IVInitStructure); + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; + } /* reset registers to their default values */ CRYP_DeInit(); - /* load key into correct registers */ - switch (aes->rounds) { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; - break; - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; - break; - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; - break; - default: - break; - } - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); + /* set key */ + CRYP_KeyInit(&keyInit); /* set iv */ - AES_CRYP_IVInitStructure.CRYP_IV0Left = ByteReverseWord32(iv[0]); - AES_CRYP_IVInitStructure.CRYP_IV0Right = ByteReverseWord32(iv[1]); - AES_CRYP_IVInitStructure.CRYP_IV1Left = ByteReverseWord32(iv[2]); - AES_CRYP_IVInitStructure.CRYP_IV1Right = ByteReverseWord32(iv[3]); - CRYP_IVInit(&AES_CRYP_IVInitStructure); - - /* set direction, mode, and datatype */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + iv = aes->reg; + CRYP_IVStructInit(&ivInit); + ivInit.CRYP_IV0Left = ByteReverseWord32(iv[0]); + ivInit.CRYP_IV0Right = ByteReverseWord32(iv[1]); + ivInit.CRYP_IV1Left = ByteReverseWord32(iv[2]); + ivInit.CRYP_IV1Right = ByteReverseWord32(iv[3]); + CRYP_IVInit(&ivInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); @@ -3245,6 +3831,8 @@ CRYP_Cmd(DISABLE); #endif /* WOLFSSL_STM32_CUBEMX */ + + wolfSSL_CryptHwMutexUnLock(); return ret; } @@ -3303,6 +3891,18 @@ #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + #elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + + #elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + + #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + /* esp32 doesn't support CRT mode by hw. */ + /* use aes ecnryption plus sw implementation */ + #define NEED_AES_CTR_SOFT + #else /* Use software based AES counter */ @@ -3321,9 +3921,11 @@ } } + /* Software AES - CTR Encrypt */ int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { byte* tmp; + byte scratch[AES_BLOCK_SIZE]; if (aes == NULL || out == NULL || in == NULL) { return BAD_FUNC_ARG; @@ -3342,8 +3944,9 @@ #ifdef XTRANSFORM_AESCTRBLOCK XTRANSFORM_AESCTRBLOCK(aes, out, in); #else - wc_AesEncrypt(aes, (byte*)aes->reg, out); - xorbuf(out, in, AES_BLOCK_SIZE); + wc_AesEncrypt(aes, (byte*)aes->reg, scratch); + xorbuf(scratch, in, AES_BLOCK_SIZE); + XMEMCPY(out, scratch, AES_BLOCK_SIZE); #endif IncrementAesCounter((byte*)aes->reg); @@ -3352,6 +3955,7 @@ sz -= AES_BLOCK_SIZE; aes->left = 0; } + ForceZero(scratch, AES_BLOCK_SIZE); /* handle non block size remaining and store unused byte count in left */ if (sz) { @@ -3412,6 +4016,13 @@ #ifdef WOLFSSL_ARMASM /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/afalg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + #else /* software + AESNI implementation */ #if !defined(FREESCALE_LTC_AES_GCM) @@ -3425,6 +4036,18 @@ return; } } +#ifdef STM32_CRYPTO_AES_GCM +static WC_INLINE void DecrementGcmCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) { + if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */ + return; + } +} +#endif /* STM32_CRYPTO_AES_GCM */ #endif /* !FREESCALE_LTC_AES_GCM */ #if defined(GCM_SMALL) || defined(GCM_TABLE) @@ -3491,7 +4114,7 @@ #endif /* GCM_TABLE */ - +/* Software AES - GCM SetKey */ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) { int ret; @@ -3517,6 +4140,12 @@ if (!((len == 16) || (len == 24) || (len == 32))) return BAD_FUNC_ARG; +#ifdef OPENSSL_EXTRA + if (aes != NULL) { + XMEMSET(aes->aadH, 0, sizeof(aes->aadH)); + aes->aadLen = 0; + } +#endif XMEMSET(iv, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); @@ -3537,6 +4166,14 @@ #if defined(WOLFSSL_XILINX_CRYPT) wc_AesGcmSetKey_ex(aes, key, len, XSECURE_CSU_AES_KEY_SRC_KUP); +#elif defined(WOLFSSL_AFALG_XILINX_AES) + wc_AesGcmSetKey_ex(aes, key, len, 0); +#endif + +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + XMEMCPY(aes->devKey, key, len); + } #endif #ifdef WOLFSSL_IMX6_CAAM_BLOB @@ -3554,18 +4191,68 @@ #define HAVE_INTEL_AVX2 #endif /* USE_INTEL_SPEEDUP */ -#ifdef _MSC_VER - #define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF)) - #define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \ - S((x),3), S((x),2), S((x),1), S((x),0), \ - S((y),7), S((y),6), S((y),5), S((y),4), \ - S((y),3), S((y),2), S((y),1), S((y),0) } -#else - #define M128_INIT(x,y) { (x), (y) } -#endif - -static const __m128i MOD2_128 = M128_INIT(0x1, - (long long int)0xc200000000000000UL); +#ifndef _MSC_VER + +void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, int nr) + XASM_LINK("AES_GCM_encrypt"); +#ifdef HAVE_INTEL_AVX1 +void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, + int nr) + XASM_LINK("AES_GCM_encrypt_avx1"); +#ifdef HAVE_INTEL_AVX2 +void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, + int nr) + XASM_LINK("AES_GCM_encrypt_avx2"); +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_INTEL_AVX1 */ + +#ifdef HAVE_AES_DECRYPT +void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, int nr, + int* res) + XASM_LINK("AES_GCM_decrypt"); +#ifdef HAVE_INTEL_AVX1 +void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, + int nr, int* res) + XASM_LINK("AES_GCM_decrypt_avx1"); +#ifdef HAVE_INTEL_AVX2 +void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, + int nr, int* res) + XASM_LINK("AES_GCM_decrypt_avx2"); +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_INTEL_AVX1 */ +#endif /* HAVE_AES_DECRYPT */ + +#else /* _MSC_VER */ + +#define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF)) +#define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \ + S((x),3), S((x),2), S((x),1), S((x),0), \ + S((y),7), S((y),6), S((y),5), S((y),4), \ + S((y),3), S((y),2), S((y),1), S((y),0) } + +static const __m128i MOD2_128 = + M128_INIT(0x1, (long long int)0xc200000000000000UL); /* See Intel® Carry-Less Multiplication Instruction @@ -3586,3105 +4273,12 @@ static const __m128i SEVEN = M128_INIT(0x0, 0x7); static const __m128i EIGHT = M128_INIT(0x0, 0x8); #endif -static const __m128i BSWAP_EPI64 = M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f); -static const __m128i BSWAP_MASK = M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607); - - -#ifndef _MSC_VER - -#define _VAR(a) "" #a "" -#define VAR(a) _VAR(a) - -#define HR %%xmm14 -#define XR %%xmm15 -#define KR %%ebx -#define KR64 %%rbx -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) -#define CTR1 128(%%rsp) -#define TR 144(%%rsp) -#define HTR %%rsp -#define STACK_OFFSET 160 -#else -#define CTR1 (%%rsp) -#define TR 16(%%rsp) -#define STACK_OFFSET 32 -#endif - -#define AESENC() \ - "aesenc %%xmm12, %%xmm4\n\t" \ - "aesenc %%xmm12, %%xmm5\n\t" \ - "aesenc %%xmm12, %%xmm6\n\t" \ - "aesenc %%xmm12, %%xmm7\n\t" \ - "aesenc %%xmm12, %%xmm8\n\t" \ - "aesenc %%xmm12, %%xmm9\n\t" \ - "aesenc %%xmm12, %%xmm10\n\t" \ - "aesenc %%xmm12, %%xmm11\n\t" - -#define AESENC_SET(o) \ - "movdqa " #o "(%[KEY]), %%xmm12\n\t" \ - AESENC() - -#define AESENC_CTR() \ - "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ - "movdqa %[BSWAP_EPI64], %%xmm1\n\t" \ - "movdqu %%xmm4, %%xmm0\n\t" \ - "pshufb %%xmm1, %%xmm4\n\t" \ - "movdqa %%xmm0, %%xmm5\n\t" \ - "paddd %[ONE], %%xmm5\n\t" \ - "pshufb %%xmm1, %%xmm5\n\t" \ - "movdqa %%xmm0, %%xmm6\n\t" \ - "paddd %[TWO], %%xmm6\n\t" \ - "pshufb %%xmm1, %%xmm6\n\t" \ - "movdqa %%xmm0, %%xmm7\n\t" \ - "paddd %[THREE], %%xmm7\n\t" \ - "pshufb %%xmm1, %%xmm7\n\t" \ - "movdqa %%xmm0, %%xmm8\n\t" \ - "paddd %[FOUR], %%xmm8\n\t" \ - "pshufb %%xmm1, %%xmm8\n\t" \ - "movdqa %%xmm0, %%xmm9\n\t" \ - "paddd %[FIVE], %%xmm9\n\t" \ - "pshufb %%xmm1, %%xmm9\n\t" \ - "movdqa %%xmm0, %%xmm10\n\t" \ - "paddd %[SIX], %%xmm10\n\t" \ - "pshufb %%xmm1, %%xmm10\n\t" \ - "movdqa %%xmm0, %%xmm11\n\t" \ - "paddd %[SEVEN], %%xmm11\n\t" \ - "pshufb %%xmm1, %%xmm11\n\t" \ - "paddd %[EIGHT], %%xmm0\n\t" - -#define AESENC_XOR() \ - "movdqa (%[KEY]), %%xmm12\n\t" \ - "movdqu %%xmm0, " VAR(CTR1) "\n\t" \ - "pxor %%xmm12, %%xmm4\n\t" \ - "pxor %%xmm12, %%xmm5\n\t" \ - "pxor %%xmm12, %%xmm6\n\t" \ - "pxor %%xmm12, %%xmm7\n\t" \ - "pxor %%xmm12, %%xmm8\n\t" \ - "pxor %%xmm12, %%xmm9\n\t" \ - "pxor %%xmm12, %%xmm10\n\t" \ - "pxor %%xmm12, %%xmm11\n\t" - -/* Encrypt and carry-less multiply for AVX1. */ -#define AESENC_PCLMUL_1(src, o1, o2, o3) \ - "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ - "movdqu " #o2 "(" #src "), %%xmm0\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm4\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ - "pxor %%xmm2, %%xmm0\n\t" \ - "pshufd $0x4e, %%xmm12, %%xmm1\n\t" \ - "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "pxor %%xmm12, %%xmm1\n\t" \ - "pxor %%xmm0, %%xmm14\n\t" \ - "movdqa %%xmm0, %%xmm3\n\t" \ - "pclmulqdq $0x11, %%xmm12, %%xmm3\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm5\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm6\n\t" \ - "movdqa %%xmm0, %%xmm2\n\t" \ - "pclmulqdq $0x00, %%xmm12, %%xmm2\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm7\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm8\n\t" \ - "pclmulqdq $0x00, %%xmm14, %%xmm1\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm9\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm10\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm11\n\t" \ - "pxor %%xmm2, %%xmm1\n\t" \ - "pxor %%xmm3, %%xmm1\n\t" \ - -#define AESENC_PCLMUL_N(src, o1, o2, o3) \ - "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ - "movdqu " #o2 "(" #src" ), %%xmm0\n\t" \ - "pshufd $0x4e, %%xmm12, %%xmm13\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm0\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm4\n\t" \ - "pxor %%xmm12, %%xmm13\n\t" \ - "pshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "pxor %%xmm0, %%xmm14\n\t" \ - "movdqa %%xmm0, %%xmm15\n\t" \ - "pclmulqdq $0x11, %%xmm12, %%xmm15\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm5\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm6\n\t" \ - "pclmulqdq $0x00, %%xmm0, %%xmm12\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm7\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm8\n\t" \ - "pclmulqdq $0x00, %%xmm14, %%xmm13\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm9\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm10\n\t" \ - "aesenc " #o1 "(%[KEY]), %%xmm11\n\t" \ - "pxor %%xmm12, %%xmm1\n\t" \ - "pxor %%xmm12, %%xmm2\n\t" \ - "pxor %%xmm15, %%xmm1\n\t" \ - "pxor %%xmm15, %%xmm3\n\t" \ - "pxor %%xmm13, %%xmm1\n\t" \ - -#define AESENC_PCLMUL_L(o) \ - "movdqa %%xmm1, %%xmm14\n\t" \ - "psrldq $8, %%xmm1\n\t" \ - "pslldq $8, %%xmm14\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm4\n\t" \ - "pxor %%xmm14, %%xmm2\n\t" \ - "pxor %%xmm1, %%xmm3\n\t" \ - "movdqa %%xmm2, %%xmm12\n\t" \ - "movdqa %%xmm2, %%xmm13\n\t" \ - "movdqa %%xmm2, %%xmm14\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm5\n\t" \ - "pslld $31, %%xmm12\n\t" \ - "pslld $30, %%xmm13\n\t" \ - "pslld $25, %%xmm14\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm6\n\t" \ - "pxor %%xmm13, %%xmm12\n\t" \ - "pxor %%xmm14, %%xmm12\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm7\n\t" \ - "movdqa %%xmm12, %%xmm13\n\t" \ - "pslldq $12, %%xmm12\n\t" \ - "psrldq $4, %%xmm13\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm8\n\t" \ - "pxor %%xmm12, %%xmm2\n\t" \ - "movdqa %%xmm2, %%xmm14\n\t" \ - "movdqa %%xmm2, %%xmm1\n\t" \ - "movdqa %%xmm2, %%xmm0\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm9\n\t" \ - "psrld $1, %%xmm14\n\t" \ - "psrld $2, %%xmm1\n\t" \ - "psrld $7, %%xmm0\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm10\n\t" \ - "pxor %%xmm1, %%xmm14\n\t" \ - "pxor %%xmm0, %%xmm14\n\t" \ - "aesenc " #o "(%[KEY]), %%xmm11\n\t" \ - "pxor %%xmm13, %%xmm14\n\t" \ - "pxor %%xmm14, %%xmm2\n\t" \ - "pxor %%xmm3, %%xmm2\n\t" \ - -/* Encrypt and carry-less multiply with last key. */ -#define AESENC_LAST(in, out) \ - "aesenclast %%xmm12, %%xmm4\n\t" \ - "aesenclast %%xmm12, %%xmm5\n\t" \ - "movdqu (" #in "),%%xmm0\n\t" \ - "movdqu 16(" #in "),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm4\n\t" \ - "pxor %%xmm1, %%xmm5\n\t" \ - "movdqu %%xmm4, (" #out ")\n\t" \ - "movdqu %%xmm5, 16(" #out ")\n\t" \ - "aesenclast %%xmm12, %%xmm6\n\t" \ - "aesenclast %%xmm12, %%xmm7\n\t" \ - "movdqu 32(" #in "),%%xmm0\n\t" \ - "movdqu 48(" #in "),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm6\n\t" \ - "pxor %%xmm1, %%xmm7\n\t" \ - "movdqu %%xmm6, 32(" #out ")\n\t" \ - "movdqu %%xmm7, 48(" #out ")\n\t" \ - "aesenclast %%xmm12, %%xmm8\n\t" \ - "aesenclast %%xmm12, %%xmm9\n\t" \ - "movdqu 64(" #in "),%%xmm0\n\t" \ - "movdqu 80(" #in "),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm8\n\t" \ - "pxor %%xmm1, %%xmm9\n\t" \ - "movdqu %%xmm8, 64(" #out ")\n\t" \ - "movdqu %%xmm9, 80(" #out ")\n\t" \ - "aesenclast %%xmm12, %%xmm10\n\t" \ - "aesenclast %%xmm12, %%xmm11\n\t" \ - "movdqu 96(" #in "),%%xmm0\n\t" \ - "movdqu 112(" #in "),%%xmm1\n\t" \ - "pxor %%xmm0, %%xmm10\n\t" \ - "pxor %%xmm1, %%xmm11\n\t" \ - "movdqu %%xmm10, 96(" #out ")\n\t" \ - "movdqu %%xmm11, 112(" #out ")\n\t" - -#define _AESENC_AVX(r) \ - "aesenc 16(%[KEY]), " #r "\n\t" \ - "aesenc 32(%[KEY]), " #r "\n\t" \ - "aesenc 48(%[KEY]), " #r "\n\t" \ - "aesenc 64(%[KEY]), " #r "\n\t" \ - "aesenc 80(%[KEY]), " #r "\n\t" \ - "aesenc 96(%[KEY]), " #r "\n\t" \ - "aesenc 112(%[KEY]), " #r "\n\t" \ - "aesenc 128(%[KEY]), " #r "\n\t" \ - "aesenc 144(%[KEY]), " #r "\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "movdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "aesenc %%xmm5, " #r "\n\t" \ - "aesenc 176(%[KEY]), " #r "\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "movdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "aesenc %%xmm5, " #r "\n\t" \ - "aesenc 208(%[KEY]), " #r "\n\t" \ - "movdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "aesenclast %%xmm5, " #r "\n\t" -#define AESENC_AVX(r) \ - _AESENC_AVX(r) - -#define AESENC_BLOCK(in, out) \ - "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ - "movdqu %%xmm4, %%xmm5\n\t" \ - "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \ - "paddd %[ONE], %%xmm5\n\t" \ - "pxor (%[KEY]), %%xmm4\n\t" \ - "movdqu %%xmm5, " VAR(CTR1) "\n\t" \ - AESENC_AVX(%%xmm4) \ - "movdqu (" #in "), %%xmm5\n\t" \ - "pxor %%xmm5, %%xmm4\n\t" \ - "movdqu %%xmm4, (" #out ")\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, " VAR(XR) "\n\t" - -#define _AESENC_GFMUL(in, out, H, X) \ - "movdqu " VAR(CTR1) ", %%xmm4\n\t" \ - "movdqu %%xmm4, %%xmm5\n\t" \ - "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \ - "paddd %[ONE], %%xmm5\n\t" \ - "pxor (%[KEY]), %%xmm4\n\t" \ - "movdqu %%xmm5, " VAR(CTR1) "\n\t" \ - "movdqa " #X ", %%xmm6\n\t" \ - "pclmulqdq $0x10, " #H ", %%xmm6\n\t" \ - "aesenc 16(%[KEY]), %%xmm4\n\t" \ - "aesenc 32(%[KEY]), %%xmm4\n\t" \ - "movdqa " #X ", %%xmm7\n\t" \ - "pclmulqdq $0x01, " #H ", %%xmm7\n\t" \ - "aesenc 48(%[KEY]), %%xmm4\n\t" \ - "aesenc 64(%[KEY]), %%xmm4\n\t" \ - "movdqa " #X ", %%xmm8\n\t" \ - "pclmulqdq $0x00, " #H ", %%xmm8\n\t" \ - "aesenc 80(%[KEY]), %%xmm4\n\t" \ - "movdqa " #X ", %%xmm1\n\t" \ - "pclmulqdq $0x11, " #H ", %%xmm1\n\t" \ - "aesenc 96(%[KEY]), %%xmm4\n\t" \ - "pxor %%xmm7, %%xmm6\n\t" \ - "movdqa %%xmm6, %%xmm2\n\t" \ - "psrldq $8, %%xmm6\n\t" \ - "pslldq $8, %%xmm2\n\t" \ - "aesenc 112(%[KEY]), %%xmm4\n\t" \ - "movdqa %%xmm1, %%xmm3\n\t" \ - "pxor %%xmm8, %%xmm2\n\t" \ - "pxor %%xmm6, %%xmm3\n\t" \ - "movdqa %[MOD2_128], %%xmm0\n\t" \ - "movdqa %%xmm2, %%xmm7\n\t" \ - "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ - "aesenc 128(%[KEY]), %%xmm4\n\t" \ - "pshufd $0x4e, %%xmm2, %%xmm6\n\t" \ - "pxor %%xmm7, %%xmm6\n\t" \ - "movdqa %%xmm6, %%xmm7\n\t" \ - "pclmulqdq $0x10, %%xmm0, %%xmm7\n\t" \ - "aesenc 144(%[KEY]), %%xmm4\n\t" \ - "pshufd $0x4e, %%xmm6, " VAR(XR) "\n\t" \ - "pxor %%xmm7, " VAR(XR) "\n\t" \ - "pxor %%xmm3, " VAR(XR) "\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "movdqu 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "aesenc %%xmm5, %%xmm4\n\t" \ - "aesenc 176(%[KEY]), %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "movdqu 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "aesenc %%xmm5, %%xmm4\n\t" \ - "aesenc 208(%[KEY]), %%xmm4\n\t" \ - "movdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "aesenclast %%xmm5, %%xmm4\n\t" \ - "movdqu (" #in "), %%xmm5\n\t" \ - "pxor %%xmm5, %%xmm4\n\t" \ - "movdqu %%xmm4, (" #out ")\n\t" -#define AESENC_GFMUL(in, out, H, X) \ - _AESENC_GFMUL(in, out, H, X) - -#define _GHASH_GFMUL_AVX(r, r2, a, b) \ - "pshufd $0x4e, "#a", %%xmm1\n\t" \ - "pshufd $0x4e, "#b", %%xmm2\n\t" \ - "movdqa "#b", %%xmm3\n\t" \ - "movdqa "#b", %%xmm0\n\t" \ - "pclmulqdq $0x11, "#a", %%xmm3\n\t" \ - "pclmulqdq $0x00, "#a", %%xmm0\n\t" \ - "pxor "#a", %%xmm1\n\t" \ - "pxor "#b", %%xmm2\n\t" \ - "pclmulqdq $0x00, %%xmm2, %%xmm1\n\t" \ - "pxor %%xmm0, %%xmm1\n\t" \ - "pxor %%xmm3, %%xmm1\n\t" \ - "movdqa %%xmm1, %%xmm2\n\t" \ - "movdqa %%xmm0, "#r2"\n\t" \ - "movdqa %%xmm3, " #r "\n\t" \ - "pslldq $8, %%xmm2\n\t" \ - "psrldq $8, %%xmm1\n\t" \ - "pxor %%xmm2, "#r2"\n\t" \ - "pxor %%xmm1, " #r "\n\t" -#define GHASH_GFMUL_AVX(r, r2, a, b) \ - _GHASH_GFMUL_AVX(r, r2, a, b) - -#define _GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ - "pshufd $0x4e, "#a", %%xmm1\n\t" \ - "pshufd $0x4e, "#b", %%xmm2\n\t" \ - "movdqa "#b", %%xmm3\n\t" \ - "movdqa "#b", %%xmm0\n\t" \ - "pclmulqdq $0x11, "#a", %%xmm3\n\t" \ - "pclmulqdq $0x00, "#a", %%xmm0\n\t" \ - "pxor "#a", %%xmm1\n\t" \ - "pxor "#b", %%xmm2\n\t" \ - "pclmulqdq $0x00, %%xmm2, %%xmm1\n\t" \ - "pxor %%xmm0, %%xmm1\n\t" \ - "pxor %%xmm3, %%xmm1\n\t" \ - "movdqa %%xmm1, %%xmm2\n\t" \ - "pxor %%xmm0, "#r2"\n\t" \ - "pxor %%xmm3, " #r "\n\t" \ - "pslldq $8, %%xmm2\n\t" \ - "psrldq $8, %%xmm1\n\t" \ - "pxor %%xmm2, "#r2"\n\t" \ - "pxor %%xmm1, " #r "\n\t" -#define GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ - _GHASH_GFMUL_XOR_AVX(r, r2, a, b) - -#define GHASH_MID_AVX(r, r2) \ - "movdqa "#r2", %%xmm0\n\t" \ - "movdqa " #r ", %%xmm1\n\t" \ - "psrld $31, %%xmm0\n\t" \ - "psrld $31, %%xmm1\n\t" \ - "pslld $1, "#r2"\n\t" \ - "pslld $1, " #r "\n\t" \ - "movdqa %%xmm0, %%xmm2\n\t" \ - "pslldq $4, %%xmm0\n\t" \ - "psrldq $12, %%xmm2\n\t" \ - "pslldq $4, %%xmm1\n\t" \ - "por %%xmm2, " #r "\n\t" \ - "por %%xmm0, "#r2"\n\t" \ - "por %%xmm1, " #r "\n\t" - -#define _GHASH_GFMUL_RED_AVX(r, a, b) \ - "pshufd $0x4e, "#a", %%xmm5\n\t" \ - "pshufd $0x4e, "#b", %%xmm6\n\t" \ - "movdqa "#b", %%xmm7\n\t" \ - "movdqa "#b", %%xmm4\n\t" \ - "pclmulqdq $0x11, "#a", %%xmm7\n\t" \ - "pclmulqdq $0x00, "#a", %%xmm4\n\t" \ - "pxor "#a", %%xmm5\n\t" \ - "pxor "#b", %%xmm6\n\t" \ - "pclmulqdq $0x00, %%xmm6, %%xmm5\n\t" \ - "pxor %%xmm4, %%xmm5\n\t" \ - "pxor %%xmm7, %%xmm5\n\t" \ - "movdqa %%xmm5, %%xmm6\n\t" \ - "movdqa %%xmm7, " #r "\n\t" \ - "pslldq $8, %%xmm6\n\t" \ - "psrldq $8, %%xmm5\n\t" \ - "pxor %%xmm6, %%xmm4\n\t" \ - "pxor %%xmm5, " #r "\n\t" \ - "movdqa %%xmm4, %%xmm8\n\t" \ - "movdqa %%xmm4, %%xmm9\n\t" \ - "movdqa %%xmm4, %%xmm10\n\t" \ - "pslld $31, %%xmm8\n\t" \ - "pslld $30, %%xmm9\n\t" \ - "pslld $25, %%xmm10\n\t" \ - "pxor %%xmm9, %%xmm8\n\t" \ - "pxor %%xmm10, %%xmm8\n\t" \ - "movdqa %%xmm8, %%xmm9\n\t" \ - "psrldq $4, %%xmm9\n\t" \ - "pslldq $12, %%xmm8\n\t" \ - "pxor %%xmm8, %%xmm4\n\t" \ - "movdqa %%xmm4, %%xmm10\n\t" \ - "movdqa %%xmm4, %%xmm6\n\t" \ - "movdqa %%xmm4, %%xmm5\n\t" \ - "psrld $1, %%xmm10\n\t" \ - "psrld $2, %%xmm6\n\t" \ - "psrld $7, %%xmm5\n\t" \ - "pxor %%xmm6, %%xmm10\n\t" \ - "pxor %%xmm5, %%xmm10\n\t" \ - "pxor %%xmm9, %%xmm10\n\t" \ - "pxor %%xmm4, %%xmm10\n\t" \ - "pxor %%xmm10, " #r "\n\t" -#define GHASH_GFMUL_RED_AVX(r, a, b) \ - _GHASH_GFMUL_RED_AVX(r, a, b) - -#define GHASH_RED_AVX(r, r2) \ - "movdqa "#r2", %%xmm0\n\t" \ - "movdqa "#r2", %%xmm1\n\t" \ - "movdqa "#r2", %%xmm2\n\t" \ - "pslld $31, %%xmm0\n\t" \ - "pslld $30, %%xmm1\n\t" \ - "pslld $25, %%xmm2\n\t" \ - "pxor %%xmm1, %%xmm0\n\t" \ - "pxor %%xmm2, %%xmm0\n\t" \ - "movdqa %%xmm0, %%xmm1\n\t" \ - "psrldq $4, %%xmm1\n\t" \ - "pslldq $12, %%xmm0\n\t" \ - "pxor %%xmm0, "#r2"\n\t" \ - "movdqa "#r2", %%xmm2\n\t" \ - "movdqa "#r2", %%xmm3\n\t" \ - "movdqa "#r2", %%xmm0\n\t" \ - "psrld $1, %%xmm2\n\t" \ - "psrld $2, %%xmm3\n\t" \ - "psrld $7, %%xmm0\n\t" \ - "pxor %%xmm3, %%xmm2\n\t" \ - "pxor %%xmm0, %%xmm2\n\t" \ - "pxor %%xmm1, %%xmm2\n\t" \ - "pxor "#r2", %%xmm2\n\t" \ - "pxor %%xmm2, " #r "\n\t" - -#define GHASH_GFMUL_RED_XOR_AVX(r, r2, a, b) \ - GHASH_GFMUL_XOR_AVX(r, r2, a, b) \ - GHASH_RED_AVX(r, r2) - -#define GHASH_FULL_AVX(r, r2, a, b) \ - GHASH_GFMUL_AVX(r, r2, a, b) \ - GHASH_MID_AVX(r, r2) \ - GHASH_RED_AVX(r, r2) - -#define CALC_IV_12() \ - "# Calculate values when IV is 12 bytes\n\t" \ - "# Set counter based on IV\n\t" \ - "movl $0x01000000, %%ecx\n\t" \ - "pinsrq $0, 0(%%rax), %%xmm13\n\t" \ - "pinsrd $2, 8(%%rax), %%xmm13\n\t" \ - "pinsrd $3, %%ecx, %%xmm13\n\t" \ - "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ - "movdqu %%xmm13, %%xmm1\n\t" \ - "movdqa 0(%[KEY]), " VAR(HR) "\n\t" \ - "pxor " VAR(HR) ", %%xmm1\n\t" \ - "movdqa 16(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 32(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 48(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 64(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 80(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 96(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 112(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 128(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 144(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "movdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 31f\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqa 176(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "movdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 31f\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqu 208(%[KEY]), %%xmm12\n\t" \ - "aesenc %%xmm12, " VAR(HR) "\n\t" \ - "aesenc %%xmm12, %%xmm1\n\t" \ - "movdqu 224(%[KEY]), %%xmm12\n\t" \ - "31:\n\t" \ - "aesenclast %%xmm12, " VAR(HR) "\n\t" \ - "aesenclast %%xmm12, %%xmm1\n\t" \ - "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t" \ - "movdqu %%xmm1, " VAR(TR) "\n\t" \ - "jmp 39f\n\t" - -#define CALC_IV() \ - "# Calculate values when IV is not 12 bytes\n\t" \ - "# H = Encrypt X(=0)\n\t" \ - "movdqa 0(%[KEY]), " VAR(HR) "\n\t" \ - AESENC_AVX(HR) \ - "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t" \ - "# Calc counter\n\t" \ - "# Initialization vector\n\t" \ - "cmpl $0, %%edx\n\t" \ - "movq $0, %%rcx\n\t" \ - "je 45f\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 44f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "43:\n\t" \ - "movdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, %%xmm13\n\t" \ - GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 43b\n\t" \ - "movl %[ibytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 45f\n\t" \ - "\n" \ - "44:\n\t" \ - "subq $16, %%rsp\n\t" \ - "pxor %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "movdqu %%xmm4, (%%rsp)\n\t" \ - "42:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 42b\n\t" \ - "movdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, %%xmm13\n\t" \ - GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR) \ - "\n" \ - "45:\n\t" \ - "# T = Encrypt counter\n\t" \ - "pxor %%xmm0, %%xmm0\n\t" \ - "shll $3, %%edx\n\t" \ - "pinsrq $0, %%rdx, %%xmm0\n\t" \ - "pxor %%xmm0, %%xmm13\n\t" \ - GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR) \ - "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ - "# Encrypt counter\n\t" \ - "movdqa 0(%[KEY]), %%xmm4\n\t" \ - "pxor %%xmm13, %%xmm4\n\t" \ - AESENC_AVX(%%xmm4) \ - "movdqu %%xmm4, " VAR(TR) "\n\t" - -#define CALC_AAD() \ - "# Additional authentication data\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl $0, %%edx\n\t" \ - "je 25f\n\t" \ - "movq %[addt], %%rax\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 24f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "23:\n\t" \ - "movdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, " VAR(XR) "\n\t" \ - GHASH_FULL_AVX(XR, %%xmm12, XR, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 23b\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 25f\n\t" \ - "\n" \ - "24:\n\t" \ - "subq $16, %%rsp\n\t" \ - "pxor %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "movdqu %%xmm4, (%%rsp)\n\t" \ - "22:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 22b\n\t" \ - "movdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm4\n\t" \ - "pxor %%xmm4, " VAR(XR) "\n\t" \ - GHASH_FULL_AVX(XR, %%xmm12, XR, HR) \ - "\n" \ - "25:\n\t" - -#define CALC_HT_8_AVX() \ - "movdqa " VAR(XR) ", %%xmm2\n\t" \ - "# H ^ 1\n\t" \ - "movdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ - "# H ^ 2\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm0, HR, HR) \ - "movdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ - "# H ^ 3\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm1, HR, %%xmm0) \ - "movdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ - "# H ^ 4\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm3, %%xmm0, %%xmm0) \ - "movdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ - "# H ^ 5\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm0, %%xmm1) \ - "movdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ - "# H ^ 6\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm1) \ - "movdqu %%xmm12, 80(" VAR(HTR) ")\n\t" \ - "# H ^ 7\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm3) \ - "movdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ - "# H ^ 8\n\t" \ - GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm3, %%xmm3) \ - "movdqu %%xmm12, 112(" VAR(HTR) ")\n\t" - -#define AESENC_128_GHASH_AVX(src, o) \ - "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ - "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ - /* src is either %%rcx or %%rdx */ \ - AESENC_CTR() \ - AESENC_XOR() \ - AESENC_PCLMUL_1(src, 16, o-128, 112) \ - AESENC_PCLMUL_N(src, 32, o-112, 96) \ - AESENC_PCLMUL_N(src, 48, o -96, 80) \ - AESENC_PCLMUL_N(src, 64, o -80, 64) \ - AESENC_PCLMUL_N(src, 80, o -64, 48) \ - AESENC_PCLMUL_N(src, 96, o -48, 32) \ - AESENC_PCLMUL_N(src, 112, o -32, 16) \ - AESENC_PCLMUL_N(src, 128, o -16, 0) \ - AESENC_PCLMUL_L(144) \ - "cmpl $11, %[nr]\n\t" \ - "movdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - AESENC() \ - AESENC_SET(176) \ - "cmpl $13, %[nr]\n\t" \ - "movdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - AESENC() \ - AESENC_SET(208) \ - "movdqa 224(%[KEY]), %%xmm12\n\t" \ - "\n" \ -"4:\n\t" \ - AESENC_LAST(%%rcx, %%rdx) - -#define AESENC_LAST15_ENC_AVX() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "movdqu " VAR(CTR1) ", %%xmm13\n\t" \ - "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ - "pxor 0(%[KEY]), %%xmm13\n\t" \ - AESENC_AVX(%%xmm13) \ - "subq $16, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "movdqu %%xmm13, (%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl " VAR(KR) "\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, " VAR(KR) "\n\t" \ - "jl 51b\n\t" \ - "xorq %%r13, %%r13\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "je 53f\n\t" \ - "\n" \ - "52:\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "jl 52b\n\t" \ - "53:\n\t" \ - "movdqu (%%rsp), %%xmm13\n\t" \ - "addq $16, %%rsp\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ - "pxor %%xmm13, " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX(XR, HR, XR) \ - -#define AESENC_LAST15_DEC_AVX() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "movdqu " VAR(CTR1) ", %%xmm13\n\t" \ - "pshufb %[BSWAP_EPI64], %%xmm13\n\t" \ - "pxor 0(%[KEY]), %%xmm13\n\t" \ - AESENC_AVX(%%xmm13) \ - "subq $32, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "movdqu %%xmm13, (%%rsp)\n\t" \ - "pxor %%xmm0, %%xmm0\n\t" \ - "movdqu %%xmm0, 16(%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ - "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ - "incl " VAR(KR) "\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, " VAR(KR) "\n\t" \ - "jl 51b\n\t" \ - "53:\n\t" \ - "movdqu 16(%%rsp), %%xmm13\n\t" \ - "addq $32, %%rsp\n\t" \ - "pshufb %[BSWAP_MASK], %%xmm13\n\t" \ - "pxor %%xmm13, " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX(XR, HR, XR) \ - -#define CALC_TAG() \ - "movl %[nbytes], %%edx\n\t" \ - "movl %[abytes], %%ecx\n\t" \ - "shlq $3, %%rdx\n\t" \ - "shlq $3, %%rcx\n\t" \ - "pinsrq $0, %%rdx, %%xmm0\n\t" \ - "pinsrq $1, %%rcx, %%xmm0\n\t" \ - "pxor %%xmm0, " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX(XR, HR, XR) \ - "pshufb %[BSWAP_MASK], " VAR(XR) "\n\t" \ - "movdqu " VAR(TR) ", %%xmm0\n\t" \ - "pxor " VAR(XR) ", %%xmm0\n\t" \ - -#define STORE_TAG() \ - "cmpl $16, %[tbytes]\n\t" \ - "je 71f\n\t" \ - "xorq %%rcx, %%rcx\n\t" \ - "movdqu %%xmm0, (%%rsp)\n\t" \ - "73:\n\t" \ - "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%[tag],%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %[tbytes], %%ecx\n\t" \ - "jne 73b\n\t" \ - "jmp 72f\n\t" \ - "\n" \ - "71:\n\t" \ - "movdqu %%xmm0, (%[tag])\n\t" \ - "\n" \ - "72:\n\t" - -#define CMP_TAG() \ - "cmpl $16, %[tbytes]\n\t" \ - "je 71f\n\t" \ - "subq $16, %%rsp\n\t" \ - "xorq %%rcx, %%rcx\n\t" \ - "xorq %%rax, %%rax\n\t" \ - "movdqu %%xmm0, (%%rsp)\n\t" \ - "\n" \ - "73:\n\t" \ - "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ - "xorb (%[tag],%%rcx,1), %%r13b\n\t" \ - "orb %%r13b, %%al\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %[tbytes], %%ecx\n\t" \ - "jne 73b\n\t" \ - "cmpb $0x00, %%al\n\t" \ - "sete %%al\n\t" \ - "addq $16, %%rsp\n\t" \ - "xorq %%rcx, %%rcx\n\t" \ - "jmp 72f\n\t" \ - "\n" \ - "71:\n\t" \ - "movdqu (%[tag]), %%xmm1\n\t" \ - "pcmpeqb %%xmm1, %%xmm0\n\t" \ - "pmovmskb %%xmm0, %%edx\n\t" \ - "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \ - "xorl %%eax, %%eax\n\t" \ - "cmpl $0xffff, %%edx\n\t" \ - "sete %%al\n\t" \ - "\n" \ - "72:\n\t" \ - "movl %%eax, (%[res])\n\t" - -static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, - const unsigned char* addt, - const unsigned char* ivec, unsigned char *tag, - unsigned int nbytes, unsigned int abytes, - unsigned int ibytes, unsigned int tbytes, - const unsigned char* key, int nr) -{ - register const unsigned char* iv asm("rax") = ivec; - register unsigned int ivLen asm("ebx") = ibytes; - - __asm__ __volatile__ ( - "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - /* Counter is xmm13 */ - "pxor %%xmm13, %%xmm13\n\t" - "pxor " VAR(XR) ", " VAR(XR) "\n\t" - "movl %[ibytes], %%edx\n\t" - "cmpl $12, %%edx\n\t" - "jne 35f\n\t" - CALC_IV_12() - "\n" - "35:\n\t" - CALC_IV() - "\n" - "39:\n\t" - - CALC_AAD() - - "# Calculate counter and H\n\t" - "pshufb %[BSWAP_EPI64], %%xmm13\n\t" - "movdqa " VAR(HR) ", %%xmm5\n\t" - "paddd %[ONE], %%xmm13\n\t" - "movdqa " VAR(HR) ", %%xmm4\n\t" - "movdqu %%xmm13, " VAR(CTR1) "\n\t" - "psrlq $63, %%xmm5\n\t" - "psllq $1, %%xmm4\n\t" - "pslldq $8, %%xmm5\n\t" - "por %%xmm5, %%xmm4\n\t" - "pshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" - "psrad $31, " VAR(HR) "\n\t" - "pand %[MOD2_128], " VAR(HR) "\n\t" - "pxor %%xmm4, " VAR(HR) "\n\t" - - "xorl " VAR(KR) ", " VAR(KR) "\n\t" - -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - "cmpl $128, %[nbytes]\n\t" - "movl %[nbytes], %%r13d\n\t" - "jl 5f\n\t" - "andl $0xffffff80, %%r13d\n\t" - - CALC_HT_8_AVX() - - "# First 128 bytes of input\n\t" - AESENC_CTR() - AESENC_XOR() - AESENC_SET(16) - AESENC_SET(32) - AESENC_SET(48) - AESENC_SET(64) - AESENC_SET(80) - AESENC_SET(96) - AESENC_SET(112) - AESENC_SET(128) - AESENC_SET(144) - "cmpl $11, %[nr]\n\t" - "movdqa 160(%[KEY]), %%xmm12\n\t" - "jl 1f\n\t" - AESENC() - AESENC_SET(176) - "cmpl $13, %[nr]\n\t" - "movdqa 192(%[KEY]), %%xmm12\n\t" - "jl 1f\n\t" - AESENC() - AESENC_SET(208) - "movdqa 224(%[KEY]), %%xmm12\n\t" - "\n" - "1:\n\t" - AESENC_LAST(%[in], %[out]) - - "cmpl $128, %%r13d\n\t" - "movl $128, " VAR(KR) "\n\t" - "jle 2f\n\t" - - "# More 128 bytes of input\n\t" - "\n" - "3:\n\t" - AESENC_128_GHASH_AVX(%%rdx, 0) - "addl $128, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 3b\n\t" - "\n" - "2:\n\t" - "movdqa %[BSWAP_MASK], %%xmm13\n\t" - "pshufb %%xmm13, %%xmm4\n\t" - "pshufb %%xmm13, %%xmm5\n\t" - "pshufb %%xmm13, %%xmm6\n\t" - "pshufb %%xmm13, %%xmm7\n\t" - "pxor %%xmm2, %%xmm4\n\t" - "pshufb %%xmm13, %%xmm8\n\t" - "pshufb %%xmm13, %%xmm9\n\t" - "pshufb %%xmm13, %%xmm10\n\t" - "pshufb %%xmm13, %%xmm11\n\t" - - "movdqu 112(" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_AVX(XR, %%xmm13, %%xmm4, %%xmm12) - "movdqu 96(" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm5, %%xmm12) - "movdqu 80(" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm6, %%xmm12) - "movdqu 64(" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm7, %%xmm12) - "movdqu 48(" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm8, %%xmm12) - "movdqu 32(" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm9, %%xmm12) - "movdqu 16(" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm10, %%xmm12) - "movdqu (" VAR(HTR) "), %%xmm12\n\t" - GHASH_GFMUL_RED_XOR_AVX(XR, %%xmm13, %%xmm11, %%xmm12) - - "movdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" - "\n" - "5:\n\t" - "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, " VAR(KR) "\n\t" - "jge 55f\n\t" -#endif - - "movl %[nbytes], %%r13d\n\t" - "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 14f\n\t" - - "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" - "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" - AESENC_BLOCK(%%rcx, %%rdx) - "addl $16, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 13f\n\t" - "\n" - "12:\n\t" - "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" - "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" - AESENC_GFMUL(%%rcx, %%rdx, HR, XR) - "pshufb %[BSWAP_MASK], %%xmm4\n\t" - "pxor %%xmm4, " VAR(XR) "\n\t" - "addl $16, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 12b\n\t" - "\n" - "13:\n\t" - GHASH_GFMUL_RED_AVX(XR, HR, XR) - "\n" - "14:\n\t" - - AESENC_LAST15_ENC_AVX() - "\n" - "55:\n\t" - - CALC_TAG() - STORE_TAG() - "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - - : - : [KEY] "r" (key), - [in] "r" (in), [out] "r" (out), [nr] "r" (nr), - [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), - [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes), - [tag] "r" (tag), - [BSWAP_MASK] "m" (BSWAP_MASK), - [BSWAP_EPI64] "m" (BSWAP_EPI64), - [ONE] "m" (ONE), -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), - [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), - [EIGHT] "m" (EIGHT), -#endif - [MOD2_128] "m" (MOD2_128) - : "xmm15", "xmm14", "xmm13", "xmm12", - "xmm0", "xmm1", "xmm2", "xmm3", "memory", - "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", - "rcx", "rdx", "r13" - ); -} - -#ifdef HAVE_INTEL_AVX1 -/* Encrypt with key in xmm12. */ -#define VAESENC() \ - "vaesenc %%xmm12, %%xmm4, %%xmm4\n\t" \ - "vaesenc %%xmm12, %%xmm5, %%xmm5\n\t" \ - "vaesenc %%xmm12, %%xmm6, %%xmm6\n\t" \ - "vaesenc %%xmm12, %%xmm7, %%xmm7\n\t" \ - "vaesenc %%xmm12, %%xmm8, %%xmm8\n\t" \ - "vaesenc %%xmm12, %%xmm9, %%xmm9\n\t" \ - "vaesenc %%xmm12, %%xmm10, %%xmm10\n\t" \ - "vaesenc %%xmm12, %%xmm11, %%xmm11\n\t" - -#define VAESENC_SET(o) \ - "vmovdqa "#o"(%[KEY]), %%xmm12\n\t" \ - VAESENC() - -#define VAESENC_CTR() \ - "vmovdqu " VAR(CTR1) ", %%xmm0\n\t" \ - "vmovdqa %[BSWAP_EPI64], %%xmm1\n\t" \ - "vpshufb %%xmm1, %%xmm0, %%xmm4\n\t" \ - "vpaddd %[ONE], %%xmm0, %%xmm5\n\t" \ - "vpshufb %%xmm1, %%xmm5, %%xmm5\n\t" \ - "vpaddd %[TWO], %%xmm0, %%xmm6\n\t" \ - "vpshufb %%xmm1, %%xmm6, %%xmm6\n\t" \ - "vpaddd %[THREE], %%xmm0, %%xmm7\n\t" \ - "vpshufb %%xmm1, %%xmm7, %%xmm7\n\t" \ - "vpaddd %[FOUR], %%xmm0, %%xmm8\n\t" \ - "vpshufb %%xmm1, %%xmm8, %%xmm8\n\t" \ - "vpaddd %[FIVE], %%xmm0, %%xmm9\n\t" \ - "vpshufb %%xmm1, %%xmm9, %%xmm9\n\t" \ - "vpaddd %[SIX], %%xmm0, %%xmm10\n\t" \ - "vpshufb %%xmm1, %%xmm10, %%xmm10\n\t" \ - "vpaddd %[SEVEN], %%xmm0, %%xmm11\n\t" \ - "vpshufb %%xmm1, %%xmm11, %%xmm11\n\t" \ - "vpaddd %[EIGHT], %%xmm0, %%xmm0\n\t" - -#define VAESENC_XOR() \ - "vmovdqa (%[KEY]), %%xmm12\n\t" \ - "vmovdqu %%xmm0, " VAR(CTR1) "\n\t" \ - "vpxor %%xmm12, %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm12, %%xmm5, %%xmm5\n\t" \ - "vpxor %%xmm12, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm12, %%xmm7, %%xmm7\n\t" \ - "vpxor %%xmm12, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm12, %%xmm9, %%xmm9\n\t" \ - "vpxor %%xmm12, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm12, %%xmm11, %%xmm11\n\t" - -#define VAESENC_128() \ - VAESENC_CTR() \ - VAESENC_XOR() \ - VAESENC_SET(16) \ - VAESENC_SET(32) \ - VAESENC_SET(48) \ - VAESENC_SET(64) \ - VAESENC_SET(80) \ - VAESENC_SET(96) \ - VAESENC_SET(112) \ - VAESENC_SET(128) \ - VAESENC_SET(144) \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 1f\n\t" \ - VAESENC() \ - VAESENC_SET(176) \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 1f\n\t" \ - VAESENC() \ - VAESENC_SET(208) \ - "vmovdqa 224(%[KEY]), %%xmm12\n\t" \ - "\n" \ -"1:\n\t" \ - VAESENC_LAST(%[in], %[out]) - -/* Encrypt and carry-less multiply for AVX1. */ -#define VAESENC_PCLMUL_1(src, o1, o2, o3) \ - "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ - "vmovdqu " #o2 "(" #src "), %%xmm0\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm2, %%xmm0, %%xmm0\n\t" \ - "vpshufd $0x4e, %%xmm12, %%xmm1\n\t" \ - "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ - "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm3\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm2\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t" \ - "vpclmulqdq $0x00, %%xmm14, %%xmm1, %%xmm1\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \ - "vpxor %%xmm2, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ - -#define VAESENC_PCLMUL_N(src, o1, o2, o3) \ - "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \ - "vmovdqu " #o2 "(" #src "), %%xmm0\n\t" \ - "vpshufd $0x4e, %%xmm12, %%xmm13\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm0, %%xmm0\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm12, %%xmm13, %%xmm13\n\t" \ - "vpshufd $0x4e, %%xmm0, %%xmm14\n\t" \ - "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ - "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm15\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm12\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t" \ - "vpclmulqdq $0x00, %%xmm14, %%xmm13, %%xmm13\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \ - "vaesenc " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \ - "vpxor %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm15, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm15, %%xmm3, %%xmm3\n\t" \ - "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ - -#define VAESENC_PCLMUL_L(o) \ - "vpslldq $8, %%xmm1, %%xmm14\n\t" \ - "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm1, %%xmm3, %%xmm3\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm5, %%xmm5\n\t" \ - "vpslld $31, %%xmm2, %%xmm12\n\t" \ - "vpslld $30, %%xmm2, %%xmm13\n\t" \ - "vpslld $25, %%xmm2, %%xmm14\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm13, %%xmm12, %%xmm12\n\t" \ - "vpxor %%xmm14, %%xmm12, %%xmm12\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm7, %%xmm7\n\t" \ - "vpsrldq $4, %%xmm12, %%xmm13\n\t" \ - "vpslldq $12, %%xmm12, %%xmm12\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ - "vpsrld $1, %%xmm2, %%xmm14\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm9, %%xmm9\n\t" \ - "vpsrld $2, %%xmm2, %%xmm1\n\t" \ - "vpsrld $7, %%xmm2, %%xmm0\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm1, %%xmm14, %%xmm14\n\t" \ - "vpxor %%xmm0, %%xmm14, %%xmm14\n\t" \ - "vaesenc "#o"(%[KEY]), %%xmm11, %%xmm11\n\t" \ - "vpxor %%xmm13, %%xmm14, %%xmm14\n\t" \ - "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm3, %%xmm2, %%xmm2\n\t" \ - - -/* Encrypt and carry-less multiply with last key. */ -#define VAESENC_LAST(in, out) \ - "vaesenclast %%xmm12, %%xmm4, %%xmm4\n\t" \ - "vaesenclast %%xmm12, %%xmm5, %%xmm5\n\t" \ - "vmovdqu (" #in "), %%xmm0\n\t" \ - "vmovdqu 16(" #in "), %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm1, %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm4, (" #out ")\n\t" \ - "vmovdqu %%xmm5, 16(" #out ")\n\t" \ - "vaesenclast %%xmm12, %%xmm6, %%xmm6\n\t" \ - "vaesenclast %%xmm12, %%xmm7, %%xmm7\n\t" \ - "vmovdqu 32(" #in "), %%xmm0\n\t" \ - "vmovdqu 48(" #in "), %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm1, %%xmm7, %%xmm7\n\t" \ - "vmovdqu %%xmm6, 32(" #out ")\n\t" \ - "vmovdqu %%xmm7, 48(" #out ")\n\t" \ - "vaesenclast %%xmm12, %%xmm8, %%xmm8\n\t" \ - "vaesenclast %%xmm12, %%xmm9, %%xmm9\n\t" \ - "vmovdqu 64(" #in "), %%xmm0\n\t" \ - "vmovdqu 80(" #in "), %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm1, %%xmm9, %%xmm9\n\t" \ - "vmovdqu %%xmm8, 64(" #out ")\n\t" \ - "vmovdqu %%xmm9, 80(" #out ")\n\t" \ - "vaesenclast %%xmm12, %%xmm10, %%xmm10\n\t" \ - "vaesenclast %%xmm12, %%xmm11, %%xmm11\n\t" \ - "vmovdqu 96(" #in "), %%xmm0\n\t" \ - "vmovdqu 112(" #in "), %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm1, %%xmm11, %%xmm11\n\t" \ - "vmovdqu %%xmm10, 96(" #out ")\n\t" \ - "vmovdqu %%xmm11, 112(" #out ")\n\t" - -#define VAESENC_BLOCK() \ - "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm5\n\t" \ - "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" - -#define _VAESENC_GFMUL(in, H, X) \ - "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x10, " #H ", " #X ", %%xmm6\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x01, " #H ", " #X ", %%xmm7\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x00, " #H ", " #X ", %%xmm8\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpclmulqdq $0x11, " #H ", " #X ", %%xmm1\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ - "vpslldq $8, %%xmm6, %%xmm2\n\t" \ - "vpsrldq $8, %%xmm6, %%xmm6\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm8, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm6, %%xmm1, %%xmm3\n\t" \ - "vmovdqa %[MOD2_128], %%xmm0\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm7\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufd $0x4e, %%xmm2, %%xmm6\n\t" \ - "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm6, %%xmm7\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm7, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm3, %%xmm6, " VAR(XR) "\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl 1f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl 1f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "1:\n\t" \ - "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu " #in ", %%xmm0\n\t" \ - "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ - "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" -#define VAESENC_GFMUL(in, H, X) \ - _VAESENC_GFMUL(in, H, X) - - -#define _GHASH_GFMUL_AVX1(r, r2, a, b) \ - "vpshufd $0x4e, "#a", %%xmm1\n\t" \ - "vpshufd $0x4e, "#b", %%xmm2\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ - "vpxor "#a", %%xmm1, %%xmm1\n\t" \ - "vpxor "#b", %%xmm2, %%xmm2\n\t" \ - "vpclmulqdq $0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ - "vmovdqa %%xmm0, "#r2"\n\t" \ - "vmovdqa %%xmm3, " #r "\n\t" \ - "vpslldq $8, %%xmm1, %%xmm2\n\t" \ - "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm2, "#r2", "#r2"\n\t" \ - "vpxor %%xmm1, " #r ", " #r "\n\t" -#define GHASH_GFMUL_AVX1(r, r2, a, b) \ - _GHASH_GFMUL_AVX1(r, r2, a, b) - -#define _GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ - "vpshufd $0x4e, "#a", %%xmm1\n\t" \ - "vpshufd $0x4e, "#b", %%xmm2\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ - "vpxor "#a", %%xmm1, %%xmm1\n\t" \ - "vpxor "#b", %%xmm2, %%xmm2\n\t" \ - "vpclmulqdq $0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm3, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm0, "#r2", "#r2"\n\t" \ - "vpxor %%xmm3, " #r ", " #r "\n\t" \ - "vpslldq $8, %%xmm1, %%xmm2\n\t" \ - "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm2, "#r2", "#r2"\n\t" \ - "vpxor %%xmm1, " #r ", " #r "\n\t" -#define GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ - _GHASH_GFMUL_XOR_AVX1(r, r2, a, b) - -#define GHASH_MID_AVX1(r, r2) \ - "vpsrld $31, "#r2", %%xmm0\n\t" \ - "vpsrld $31, " #r ", %%xmm1\n\t" \ - "vpslld $1, "#r2", "#r2"\n\t" \ - "vpslld $1, " #r ", " #r "\n\t" \ - "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ - "vpslldq $4, %%xmm0, %%xmm0\n\t" \ - "vpslldq $4, %%xmm1, %%xmm1\n\t" \ - "vpor %%xmm2, " #r ", " #r "\n\t" \ - "vpor %%xmm0, "#r2", "#r2"\n\t" \ - "vpor %%xmm1, " #r ", " #r "\n\t" - -#define _GHASH_GFMUL_RED_AVX1(r, a, b) \ - "vpshufd $0x4e, "#a", %%xmm5\n\t" \ - "vpshufd $0x4e, "#b", %%xmm6\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", %%xmm7\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", %%xmm4\n\t" \ - "vpxor "#a", %%xmm5, %%xmm5\n\t" \ - "vpxor "#b", %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x00, %%xmm6, %%xmm5, %%xmm5\n\t" \ - "vpxor %%xmm4, %%xmm5, %%xmm5\n\t" \ - "vpxor %%xmm7, %%xmm5, %%xmm5\n\t" \ - "vpslldq $8, %%xmm5, %%xmm6\n\t" \ - "vpsrldq $8, %%xmm5, %%xmm5\n\t" \ - "vpxor %%xmm6, %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm5, %%xmm7, " #r "\n\t" \ - "vpslld $31, %%xmm4, %%xmm8\n\t" \ - "vpslld $30, %%xmm4, %%xmm9\n\t" \ - "vpslld $25, %%xmm4, %%xmm10\n\t" \ - "vpxor %%xmm9, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm10, %%xmm8, %%xmm8\n\t" \ - "vpsrldq $4, %%xmm8, %%xmm9\n\t" \ - "vpslldq $12, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm8, %%xmm4, %%xmm4\n\t" \ - "vpsrld $1, %%xmm4, %%xmm10\n\t" \ - "vpsrld $2, %%xmm4, %%xmm6\n\t" \ - "vpsrld $7, %%xmm4, %%xmm5\n\t" \ - "vpxor %%xmm6, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm5, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm9, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm4, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm10, " #r ", " #r "\n\t" -#define GHASH_GFMUL_RED_AVX1(r, a, b) \ - _GHASH_GFMUL_RED_AVX1(r, a, b) - -#define _GHASH_GFSQR_RED_AVX1(r, a) \ - "vpclmulqdq $0x00, "#a", "#a", %%xmm4\n\t" \ - "vpclmulqdq $0x11, "#a", "#a", " #r "\n\t" \ - "vpslld $31, %%xmm4, %%xmm8\n\t" \ - "vpslld $30, %%xmm4, %%xmm9\n\t" \ - "vpslld $25, %%xmm4, %%xmm10\n\t" \ - "vpxor %%xmm9, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm10, %%xmm8, %%xmm8\n\t" \ - "vpsrldq $4, %%xmm8, %%xmm9\n\t" \ - "vpslldq $12, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm8, %%xmm4, %%xmm4\n\t" \ - "vpsrld $1, %%xmm4, %%xmm10\n\t" \ - "vpsrld $2, %%xmm4, %%xmm6\n\t" \ - "vpsrld $7, %%xmm4, %%xmm5\n\t" \ - "vpxor %%xmm6, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm5, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm9, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm4, %%xmm10, %%xmm10\n\t" \ - "vpxor %%xmm10, " #r ", " #r "\n\t" -#define GHASH_GFSQR_RED_AVX1(r, a) \ - _GHASH_GFSQR_RED_AVX1(r, a) - -#define GHASH_RED_AVX1(r, r2) \ - "vpslld $31, "#r2", %%xmm0\n\t" \ - "vpslld $30, "#r2", %%xmm1\n\t" \ - "vpslld $25, "#r2", %%xmm2\n\t" \ - "vpxor %%xmm1, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm2, %%xmm0, %%xmm0\n\t" \ - "vmovdqa %%xmm0, %%xmm1\n\t" \ - "vpsrldq $4, %%xmm1, %%xmm1\n\t" \ - "vpslldq $12, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, "#r2", "#r2"\n\t" \ - "vpsrld $1, "#r2", %%xmm2\n\t" \ - "vpsrld $2, "#r2", %%xmm3\n\t" \ - "vpsrld $7, "#r2", %%xmm0\n\t" \ - "vpxor %%xmm3, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm0, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ - "vpxor "#r2", %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm2, " #r ", " #r "\n\t" - -#define GHASH_GFMUL_RED_XOR_AVX1(r, r2, a, b) \ - GHASH_GFMUL_XOR_AVX1(r, r2, a, b) \ - GHASH_RED_AVX1(r, r2) - -#define GHASH_FULL_AVX1(r, r2, a, b) \ - GHASH_GFMUL_AVX1(r, r2, a, b) \ - GHASH_MID_AVX1(r, r2) \ - GHASH_RED_AVX1(r, r2) - -#define CALC_IV_12_AVX1() \ - "# Calculate values when IV is 12 bytes\n\t" \ - "# Set counter based on IV\n\t" \ - "movl $0x01000000, %%ecx\n\t" \ - "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ - "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ - "vpxor " VAR(HR) ", %%xmm13, %%xmm1\n\t" \ - "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 32(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 64(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 96(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 128(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 31f\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 31f\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqu 224(%[KEY]), %%xmm12\n\t" \ - "31:\n\t" \ - "vaesenclast %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenclast %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ - "vmovdqu %%xmm1, " VAR(TR) "\n\t" \ - "jmp 39f\n\t" - -#define CALC_IV_AVX1() \ - "# Calculate values when IV is not 12 bytes\n\t" \ - "# H = Encrypt X(=0)\n\t" \ - "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ - VAESENC_AVX(HR) \ - "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ - "# Calc counter\n\t" \ - "# Initialization vector\n\t" \ - "cmpl $0, %%edx\n\t" \ - "movq $0, %%rcx\n\t" \ - "je 45f\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 44f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "43:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 43b\n\t" \ - "movl %[ibytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 45f\n\t" \ - "\n" \ - "44:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "42:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 42b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ - "\n" \ - "45:\n\t" \ - "# T = Encrypt counter\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "shll $3, %%edx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR) \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "# Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ - "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ - VAESENC_AVX(%%xmm4) \ - "vmovdqu %%xmm4, " VAR(TR) "\n\t" - -#define CALC_AAD_AVX1() \ - "# Additional authentication data\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl $0, %%edx\n\t" \ - "je 25f\n\t" \ - "movq %[addt], %%rax\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 24f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "23:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 23b\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 25f\n\t" \ - "\n" \ - "24:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "22:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 22b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_FULL_AVX1(XR, %%xmm12, XR, HR) \ - "\n" \ - "25:\n\t" - -#define CALC_HT_8_AVX1() \ - "vmovdqa " VAR(XR) ", %%xmm2\n\t" \ - "# H ^ 1\n\t" \ - "vmovdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ - "# H ^ 2\n\t" \ - GHASH_GFSQR_RED_AVX1(%%xmm0, HR) \ - "vmovdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ - "# H ^ 3\n\t" \ - GHASH_GFMUL_RED_AVX1(%%xmm1, HR, %%xmm0) \ - "vmovdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ - "# H ^ 4\n\t" \ - GHASH_GFSQR_RED_AVX1(%%xmm3, %%xmm0) \ - "vmovdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ - "# H ^ 5\n\t" \ - GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm0, %%xmm1) \ - "vmovdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ - "# H ^ 6\n\t" \ - GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm1) \ - "vmovdqu %%xmm12, 80(" VAR(HTR) ")\n\t" \ - "# H ^ 7\n\t" \ - GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm1, %%xmm3) \ - "vmovdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ - "# H ^ 8\n\t" \ - GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm3) \ - "vmovdqu %%xmm12, 112(" VAR(HTR) ")\n\t" - -#define VAESENC_128_GHASH_AVX1(src, o) \ - "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ - "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ - /* src is either %%rcx or %%rdx */ \ - VAESENC_CTR() \ - VAESENC_XOR() \ - VAESENC_PCLMUL_1(src, 16, (o-128), 112) \ - VAESENC_PCLMUL_N(src, 32, (o-112), 96) \ - VAESENC_PCLMUL_N(src, 48, (o- 96), 80) \ - VAESENC_PCLMUL_N(src, 64, (o- 80), 64) \ - VAESENC_PCLMUL_N(src, 80, (o- 64), 48) \ - VAESENC_PCLMUL_N(src, 96, (o- 48), 32) \ - VAESENC_PCLMUL_N(src, 112, (o- 32), 16) \ - VAESENC_PCLMUL_N(src, 128, (o- 16), 0) \ - VAESENC_PCLMUL_L(144) \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - VAESENC() \ - VAESENC_SET(176) \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - VAESENC() \ - VAESENC_SET(208) \ - "vmovdqa 224(%[KEY]), %%xmm12\n\t" \ - "\n" \ -"4:\n\t" \ - VAESENC_LAST(%%rcx, %%rdx) - -#define _VAESENC_AVX(r) \ - "vaesenc 16(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 32(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 48(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 64(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 80(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 96(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 112(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 128(%[KEY]), " #r ", " #r "\n\t" \ - "vaesenc 144(%[KEY]), " #r ", " #r "\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, " #r ", " #r "\n\t" \ - "vaesenc 176(%[KEY]), " #r ", " #r "\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, " #r ", " #r "\n\t" \ - "vaesenc 208(%[KEY]), " #r ", " #r "\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm5, " #r ", " #r "\n\t" -#define VAESENC_AVX(r) \ - _VAESENC_AVX(r) - -#define AESENC_LAST15_ENC_AVX1() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $16, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl " VAR(KR) "\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, " VAR(KR) "\n\t" \ - "jl 51b\n\t" \ - "xorq %%r13, %%r13\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "je 53f\n\t" \ - "\n" \ - "52:\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "jl 52b\n\t" \ - "53:\n\t" \ - "vmovdqu (%%rsp), %%xmm13\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ - -#define AESENC_LAST15_DEC_AVX1() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $32, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ - "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ - "incl " VAR(KR) "\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, " VAR(KR) "\n\t" \ - "jl 51b\n\t" \ - "53:\n\t" \ - "vmovdqu 16(%%rsp), %%xmm13\n\t" \ - "addq $32, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ - -#define CALC_TAG_AVX1() \ - "movl %[nbytes], %%edx\n\t" \ - "movl %[abytes], %%ecx\n\t" \ - "shlq $3, %%rdx\n\t" \ - "shlq $3, %%rcx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX1(XR, HR, XR) \ - "vpshufb %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \ - "vpxor " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t" \ - -#define STORE_TAG_AVX() \ - "cmpl $16, %[tbytes]\n\t" \ - "je 71f\n\t" \ - "xorq %%rcx, %%rcx\n\t" \ - "vmovdqu %%xmm0, (%%rsp)\n\t" \ - "73:\n\t" \ - "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%[tag],%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %[tbytes], %%ecx\n\t" \ - "jne 73b\n\t" \ - "jmp 72f\n\t" \ - "\n" \ - "71:\n\t" \ - "vmovdqu %%xmm0, (%[tag])\n\t" \ - "\n" \ - "72:\n\t" - -#define CMP_TAG_AVX() \ - "cmpl $16, %[tbytes]\n\t" \ - "je 71f\n\t" \ - "subq $16, %%rsp\n\t" \ - "xorq %%rcx, %%rcx\n\t" \ - "xorq %%rax, %%rax\n\t" \ - "vmovdqu %%xmm0, (%%rsp)\n\t" \ - "\n" \ - "73:\n\t" \ - "movzbl (%%rsp,%%rcx,1), %%r13d\n\t" \ - "xorb (%[tag],%%rcx,1), %%r13b\n\t" \ - "orb %%r13b, %%al\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %[tbytes], %%ecx\n\t" \ - "jne 73b\n\t" \ - "cmpb $0x00, %%al\n\t" \ - "sete %%al\n\t" \ - "addq $16, %%rsp\n\t" \ - "jmp 72f\n\t" \ - "\n" \ - "71:\n\t" \ - "vmovdqu (%[tag]), %%xmm1\n\t" \ - "vpcmpeqb %%xmm1, %%xmm0, %%xmm0\n\t" \ - "vpmovmskb %%xmm0, %%edx\n\t" \ - "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \ - "xorl %%eax, %%eax\n\t" \ - "cmpl $0xffff, %%edx\n\t" \ - "sete %%al\n\t" \ - "\n" \ - "72:\n\t" \ - "movl %%eax, (%[res])\n\t" - -static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, - const unsigned char* addt, - const unsigned char* ivec, unsigned char *tag, - unsigned int nbytes, unsigned int abytes, - unsigned int ibytes, unsigned int tbytes, - const unsigned char* key, int nr) -{ - register const unsigned char* iv asm("rax") = ivec; - register unsigned int ivLen asm("ebx") = ibytes; - - __asm__ __volatile__ ( - "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - /* Counter is xmm13 */ - "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" - "vpxor " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t" - "movl %[ibytes], %%edx\n\t" - "cmpl $12, %%edx\n\t" - "jne 35f\n\t" - CALC_IV_12_AVX1() - "\n" - "35:\n\t" - CALC_IV_AVX1() - "\n" - "39:\n\t" - - CALC_AAD_AVX1() - - "# Calculate counter and H\n\t" - "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" - "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" - "vpslldq $8, %%xmm5, %%xmm5\n\t" - "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" - "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" - "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" - "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - - "xorl " VAR(KR) ", " VAR(KR) "\n\t" - -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - "cmpl $128, %[nbytes]\n\t" - "movl %[nbytes], %%r13d\n\t" - "jl 5f\n\t" - "andl $0xffffff80, %%r13d\n\t" - - CALC_HT_8_AVX1() - - "# First 128 bytes of input\n\t" - VAESENC_128() - - "cmpl $128, %%r13d\n\t" - "movl $128, " VAR(KR) "\n\t" - "jle 2f\n\t" - - "# More 128 bytes of input\n\t" - "\n" - "3:\n\t" - VAESENC_128_GHASH_AVX1(%%rdx, 0) - "addl $128, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 3b\n\t" - "\n" - "2:\n\t" - "vmovdqa %[BSWAP_MASK], %%xmm13\n\t" - "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t" - "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t" - "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t" - "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t" - "vpxor %%xmm2, %%xmm4, %%xmm4\n\t" - "vpshufb %%xmm13, %%xmm8, %%xmm8\n\t" - "vpshufb %%xmm13, %%xmm9, %%xmm9\n\t" - "vpshufb %%xmm13, %%xmm10, %%xmm10\n\t" - "vpshufb %%xmm13, %%xmm11, %%xmm11\n\t" - - "vmovdqu (" VAR(HTR) "), %%xmm12\n\t" - "vmovdqu 16(" VAR(HTR) "), %%xmm14\n\t" - GHASH_GFMUL_AVX1(XR, %%xmm13, %%xmm11, %%xmm12) - GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm10, %%xmm14) - "vmovdqu 32(" VAR(HTR) "), %%xmm12\n\t" - "vmovdqu 48(" VAR(HTR) "), %%xmm14\n\t" - GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm9, %%xmm12) - GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm8, %%xmm14) - "vmovdqu 64(" VAR(HTR) "), %%xmm12\n\t" - "vmovdqu 80(" VAR(HTR) "), %%xmm14\n\t" - GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm7, %%xmm12) - GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm6, %%xmm14) - "vmovdqu 96(" VAR(HTR) "), %%xmm12\n\t" - "vmovdqu 112(" VAR(HTR) "), %%xmm14\n\t" - GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm5, %%xmm12) - GHASH_GFMUL_RED_XOR_AVX1(XR, %%xmm13, %%xmm4, %%xmm14) - - "vmovdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" - "\n" - "5:\n\t" - "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, " VAR(KR) "\n\t" - "jge 55f\n\t" -#endif - - "movl %[nbytes], %%r13d\n\t" - "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 14f\n\t" - - VAESENC_BLOCK() - "addl $16, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 13f\n\t" - "\n" - "12:\n\t" - "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" - VAESENC_GFMUL(%%xmm9, HR, XR) - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" - "addl $16, " VAR(KR) "\n\t" - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 12b\n\t" - "\n" - "13:\n\t" - GHASH_GFMUL_RED_AVX1(XR, HR, XR) - "\n" - "14:\n\t" - - AESENC_LAST15_ENC_AVX1() - "\n" - "55:\n\t" - - CALC_TAG_AVX1() - STORE_TAG_AVX() - "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - "vzeroupper\n\t" - - : - : [KEY] "r" (key), - [in] "r" (in), [out] "r" (out), [nr] "r" (nr), - [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), - [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes), - [tag] "r" (tag), - [BSWAP_MASK] "m" (BSWAP_MASK), - [BSWAP_EPI64] "m" (BSWAP_EPI64), - [ONE] "m" (ONE), -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), - [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), - [EIGHT] "m" (EIGHT), -#endif - [MOD2_128] "m" (MOD2_128) - : "xmm15", "xmm14", "xmm13", "xmm12", - "xmm0", "xmm1", "xmm2", "xmm3", "memory", - "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", - "rcx", "rdx", "r13" - ); -} - -#ifdef HAVE_INTEL_AVX2 -/* Encrypt and carry-less multiply for AVX2. */ -#define VAESENC_PCLMUL_AVX2_1(src, o1, o2, o3) \ - "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ - "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ - "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm13\n\t" \ - "vpxor %%xmm2, %%xmm12, %%xmm12\n\t" \ - "vpclmulqdq $0x10, %%xmm13, %%xmm12, %%xmm1\n\t" \ - "vpclmulqdq $0x01, %%xmm13, %%xmm12, %%xmm14\n\t" \ - "vpclmulqdq $0x00, %%xmm13, %%xmm12, %%xmm2\n\t" \ - "vpclmulqdq $0x11, %%xmm13, %%xmm12, %%xmm3\n\t" \ - "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ - "vaesenc %%xmm0, %%xmm5, %%xmm5\n\t" \ - "vaesenc %%xmm0, %%xmm6, %%xmm6\n\t" \ - "vaesenc %%xmm0, %%xmm7, %%xmm7\n\t" \ - "vaesenc %%xmm0, %%xmm8, %%xmm8\n\t" \ - "vaesenc %%xmm0, %%xmm9, %%xmm9\n\t" \ - "vaesenc %%xmm0, %%xmm10, %%xmm10\n\t" \ - "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ - -#define VAESENC_PCLMUL_AVX2_2(src, o1, o2, o3) \ - "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ - "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm0\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ - "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm12, %%xmm13\n\t" \ - "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t" \ - "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t" \ - "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t" \ - "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ - "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm12, %%xmm3, %%xmm3\n\t" \ - "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ - "vaesenc %%xmm0, %%xmm5, %%xmm5\n\t" \ - "vaesenc %%xmm0, %%xmm6, %%xmm6\n\t" \ - "vaesenc %%xmm0, %%xmm7, %%xmm7\n\t" \ - "vaesenc %%xmm0, %%xmm8, %%xmm8\n\t" \ - "vaesenc %%xmm0, %%xmm9, %%xmm9\n\t" \ - "vaesenc %%xmm0, %%xmm10, %%xmm10\n\t" \ - "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ - -#define VAESENC_PCLMUL_AVX2_N(src, o1, o2, o3) \ - "vmovdqu " #o2 "(" #src "), %%xmm12\n\t" \ - "vmovdqu " #o3 "(" VAR(HTR) "), %%xmm0\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm12, %%xmm12\n\t" \ - "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm15, %%xmm2, %%xmm2\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm12, %%xmm13\n\t" \ - "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t" \ - "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t" \ - "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t" \ - "vmovdqa " #o1 "(%[KEY]), %%xmm0\n\t" \ - "vpxor %%xmm13, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm12, %%xmm3, %%xmm3\n\t" \ - "vaesenc %%xmm0, %%xmm4, %%xmm4\n\t" \ - "vaesenc %%xmm0, %%xmm5, %%xmm5\n\t" \ - "vaesenc %%xmm0, %%xmm6, %%xmm6\n\t" \ - "vaesenc %%xmm0, %%xmm7, %%xmm7\n\t" \ - "vaesenc %%xmm0, %%xmm8, %%xmm8\n\t" \ - "vaesenc %%xmm0, %%xmm9, %%xmm9\n\t" \ - "vaesenc %%xmm0, %%xmm10, %%xmm10\n\t" \ - "vaesenc %%xmm0, %%xmm11, %%xmm11\n\t" \ - -#define VAESENC_PCLMUL_AVX2_L(o) \ - "vpxor %%xmm14, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm15, %%xmm2, %%xmm2\n\t" \ - "vpslldq $8, %%xmm1, %%xmm12\n\t" \ - "vpsrldq $8, %%xmm1, %%xmm1\n\t" \ - "vmovdqa "#o"(%[KEY]), %%xmm15\n\t" \ - "vmovdqa %[MOD2_128], %%xmm0\n\t" \ - "vaesenc %%xmm15, %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm12, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm1, %%xmm3, %%xmm3\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm14\n\t" \ - "vaesenc %%xmm15, %%xmm5, %%xmm5\n\t" \ - "vaesenc %%xmm15, %%xmm6, %%xmm6\n\t" \ - "vaesenc %%xmm15, %%xmm7, %%xmm7\n\t" \ - "vpshufd $0x4e, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ - "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm14\n\t" \ - "vaesenc %%xmm15, %%xmm8, %%xmm8\n\t" \ - "vaesenc %%xmm15, %%xmm9, %%xmm9\n\t" \ - "vaesenc %%xmm15, %%xmm10, %%xmm10\n\t" \ - "vpshufd $0x4e, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm14, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm3, %%xmm2, %%xmm2\n\t" \ - "vaesenc %%xmm15, %%xmm11, %%xmm11\n\t" - -#define VAESENC_BLOCK_AVX2() \ - "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" \ - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" \ - "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm5\n\t" \ - "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" - -/* Karatsuba multiplication - slower - * H01 = H[1] ^ H[0] (top and bottom 64-bits XORed) - */ -#define _VAESENC_GFMUL_AVX2(in, H, X, ctr1, H01) \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm5\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm5\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm5, %%xmm4, %%xmm4\n\t" \ - "vmovdqu " #in ", %%xmm0\n\t" \ - "vpxor %%xmm0, %%xmm4, %%xmm4\n\t" \ - \ - "vpsrldq $8, " #X ", %%xmm2\n\t" \ - "vpxor " #X ", %%xmm2, %%xmm2\n\t" \ - "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t" \ - "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t" \ - "vpclmulqdq $0x00, "#H01", %%xmm2, %%xmm7\n\t" \ - "vpxor %%xmm5, %%xmm7, %%xmm7\n\t" \ - "vpxor %%xmm8, %%xmm7, %%xmm7\n\t" \ - "vpslldq $8, %%xmm7, %%xmm6\n\t" \ - "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ - "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - \ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, " VAR(XR) "\n\t" -#define VAESENC_GFMUL_AVX2(in, H, X, ctr1) \ - _VAESENC_GFMUL_AVX2(in, H, X, ctr1) - -#define _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) \ - "vpclmulqdq $0x10, " #H ", " #X ", %%xmm7\n\t" \ - "vpclmulqdq $0x01, " #H ", " #X ", %%xmm6\n\t" \ - "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t" \ - "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t" \ - "vpxor (%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 16(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm6, %%xmm7, %%xmm7\n\t" \ - "vpslldq $8, %%xmm7, %%xmm6\n\t" \ - "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ - "vaesenc 32(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vaesenc 48(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 64(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 80(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vaesenc 96(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 112(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vaesenc 128(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vaesenc 144(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm3\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm3, %%xmm4, %%xmm4\n\t" \ - "vaesenc 176(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm3\n\t" \ - "jl %=f\n\t" \ - "vaesenc %%xmm3, %%xmm4, %%xmm4\n\t" \ - "vaesenc 208(%[KEY]), %%xmm4, %%xmm4\n\t" \ - "vmovdqa 224(%[KEY]), %%xmm3\n\t" \ - "%=:\n\t" \ - "vaesenclast %%xmm3, %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm5, %%xmm6, " VAR(XR) "\n\t" \ - "vmovdqu " #in ", %%xmm5\n\t" \ - "vpxor %%xmm5, %%xmm4, %%xmm4\n\t" -#define VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) \ - _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1) - - -#define _GHASH_GFMUL_AVX2(r, r2, a, b) \ - "vpclmulqdq $0x10, "#a", "#b", %%xmm2\n\t" \ - "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ - "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ - "vpslldq $8, %%xmm2, %%xmm1\n\t" \ - "vpsrldq $8, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm1, %%xmm0, "#r2"\n\t" \ - "vpxor %%xmm2, %%xmm3, " #r "\n\t" -#define GHASH_GFMUL_AVX2(r, r2, a, b) \ - _GHASH_GFMUL_AVX2(r, r2, a, b) - -#define GHASH_MID_AVX2(r, r2) \ - "vpsrld $31, "#r2", %%xmm0\n\t" \ - "vpsrld $31, " #r ", %%xmm1\n\t" \ - "vpslld $1, "#r2", "#r2"\n\t" \ - "vpslld $1, " #r ", " #r "\n\t" \ - "vpsrldq $12, %%xmm0, %%xmm2\n\t" \ - "vpslldq $4, %%xmm0, %%xmm0\n\t" \ - "vpslldq $4, %%xmm1, %%xmm1\n\t" \ - "vpor %%xmm2, " #r ", " #r "\n\t" \ - "vpor %%xmm0, "#r2", "#r2"\n\t" \ - "vpor %%xmm1, " #r ", " #r "\n\t" - -#define _GHASH_GFMUL_RED_AVX2(r, a, b) \ - "vpclmulqdq $0x10, "#a", "#b", %%xmm7\n\t" \ - "vpclmulqdq $0x01, "#a", "#b", %%xmm6\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", %%xmm5\n\t" \ - "vpxor %%xmm6, %%xmm7, %%xmm7\n\t" \ - "vpslldq $8, %%xmm7, %%xmm6\n\t" \ - "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", %%xmm8\n\t" \ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, " #r "\n\t" -#define GHASH_GFMUL_RED_AVX2(r, a, b) \ - _GHASH_GFMUL_RED_AVX2(r, a, b) - -#define _GHASH_GFSQR_RED2_AVX2(r, a, mod128) \ - "vpclmulqdq $0x00, "#a", "#a", %%xmm6\n\t" \ - "vpclmulqdq $0x11, "#a", "#a", %%xmm8\n\t" \ - "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm6, %%xmm8, " #r "\n\t" -#define GHASH_GFSQR_RED2_AVX2(r, a, mod128) \ - _GHASH_GFSQR_RED2_AVX2(r, a, mod128) - -#define _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) \ - "vpclmulqdq $0x10, "#a", "#b", %%xmm7\n\t" \ - "vpclmulqdq $0x01, "#a", "#b", %%xmm6\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", %%xmm5\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", %%xmm8\n\t" \ - "vpclmulqdq $0x00, "#b", "#b", %%xmm9\n\t" \ - "vpclmulqdq $0x11, "#b", "#b", %%xmm10\n\t" \ - "vpxor %%xmm6, %%xmm7, %%xmm7\n\t" \ - "vpslldq $8, %%xmm7, %%xmm6\n\t" \ - "vpsrldq $8, %%xmm7, %%xmm7\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpclmulqdq $0x10, "#mod128", %%xmm9, %%xmm4\n\t" \ - "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpshufd $0x4e, %%xmm9, %%xmm9\n\t" \ - "vpxor %%xmm5, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm4, %%xmm9, %%xmm9\n\t" \ - "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t" \ - "vpclmulqdq $0x10, "#mod128", %%xmm9, %%xmm4\n\t" \ - "vpshufd $0x4e, %%xmm6, %%xmm6\n\t" \ - "vpshufd $0x4e, %%xmm9, %%xmm9\n\t" \ - "vpxor %%xmm7, %%xmm8, %%xmm8\n\t" \ - "vpxor %%xmm4, %%xmm9, %%xmm9\n\t" \ - "vpxor %%xmm8, %%xmm6, %%xmm6\n\t" \ - "vpxor %%xmm10, %%xmm9, "#rs"\n\t" \ - "vpxor %%xmm5, %%xmm6, "#rm"\n\t" -#define GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) \ - _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) - -#define CALC_HT_8_AVX2() \ - "vmovdqa %[MOD2_128], %%xmm11\n\t" \ - "vmovdqa " VAR(XR) ", %%xmm2\n\t" \ - "# H ^ 1 and H ^ 2\n\t" \ - GHASH_GFSQR_RED2_AVX2(%%xmm0, HR, %%xmm11) \ - "vmovdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \ - "vmovdqu %%xmm0 , 16(" VAR(HTR) ")\n\t" \ - "# H ^ 3 and H ^ 4\n\t" \ - GHASH_GFMUL_SQR_RED2_AVX2(%%xmm1, %%xmm3, HR, %%xmm0, %%xmm11) \ - "vmovdqu %%xmm1 , 32(" VAR(HTR) ")\n\t" \ - "vmovdqu %%xmm3 , 48(" VAR(HTR) ")\n\t" \ - "# H ^ 5 and H ^ 6\n\t" \ - GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm0, %%xmm1, %%xmm11) \ - "vmovdqu %%xmm12, 64(" VAR(HTR) ")\n\t" \ - "vmovdqu %%xmm0 , 80(" VAR(HTR) ")\n\t" \ - "# H ^ 7 and H ^ 8\n\t" \ - GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm1, %%xmm3, %%xmm11) \ - "vmovdqu %%xmm12, 96(" VAR(HTR) ")\n\t" \ - "vmovdqu %%xmm0 , 112(" VAR(HTR) ")\n\t" - -#define _GHASH_RED_AVX2(r, r2) \ - "vmovdqa %[MOD2_128], %%xmm2\n\t" \ - "vpclmulqdq $0x10, %%xmm2, "#r2", %%xmm0\n\t" \ - "vpshufd $0x4e, "#r2", %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vpclmulqdq $0x10, %%xmm2, %%xmm1, %%xmm0\n\t" \ - "vpshufd $0x4e, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vpxor %%xmm1, " #r ", " #r "\n\t" -#define GHASH_RED_AVX2(r, r2) \ - _GHASH_RED_AVX2(r, r2) - -#define GHASH_FULL_AVX2(r, r2, a, b) \ - GHASH_GFMUL_AVX2(r, r2, a, b) \ - GHASH_MID_AVX2(r, r2) \ - GHASH_RED_AVX2(r, r2) - -#define _GFMUL_3V_AVX2(r, r2, r3, a, b) \ - "vpclmulqdq $0x10, "#a", "#b", "#r3"\n\t" \ - "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", "#r2"\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", " #r "\n\t" \ - "vpxor %%xmm1, "#r3", "#r3"\n\t" -#define GFMUL_3V_AVX2(r, r2, r3, a, b) \ - _GFMUL_3V_AVX2(r, r2, r3, a, b) - -#define _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) \ - "vpclmulqdq $0x10, "#a", "#b", %%xmm2\n\t" \ - "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \ - "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \ - "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \ - "vpxor %%xmm1, %%xmm2, %%xmm2\n\t" \ - "vpxor %%xmm3, " #r ", " #r "\n\t" \ - "vpxor %%xmm2, "#r3", "#r3"\n\t" \ - "vpxor %%xmm0, "#r2", "#r2"\n\t" -#define GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) \ - _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b) - -#define GHASH_GFMUL_RED_8_AVX2() \ - "vmovdqu (" VAR(HTR) "), %%xmm12\n\t" \ - GFMUL_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm11, %%xmm12) \ - "vmovdqu 16(" VAR(HTR) "), %%xmm12\n\t" \ - GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm10, %%xmm12) \ - "vmovdqu 32(" VAR(HTR) "), %%xmm11\n\t" \ - "vmovdqu 48(" VAR(HTR) "), %%xmm12\n\t" \ - GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm9, %%xmm11) \ - GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm8, %%xmm12) \ - "vmovdqu 64(" VAR(HTR) "), %%xmm11\n\t" \ - "vmovdqu 80(" VAR(HTR) "), %%xmm12\n\t" \ - GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm7, %%xmm11) \ - GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm6, %%xmm12) \ - "vmovdqu 96(" VAR(HTR) "), %%xmm11\n\t" \ - "vmovdqu 112(" VAR(HTR) "), %%xmm12\n\t" \ - GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm5, %%xmm11) \ - GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm4, %%xmm12) \ - "vpslldq $8, %%xmm14, %%xmm12\n\t" \ - "vpsrldq $8, %%xmm14, %%xmm14\n\t" \ - "vpxor %%xmm12, %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm14, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_RED_AVX2(XR, %%xmm13) - -#define CALC_IV_12_AVX2() \ - "# Calculate values when IV is 12 bytes\n\t" \ - "# Set counter based on IV\n\t" \ - "movl $0x01000000, %%ecx\n\t" \ - "vpinsrq $0, 0(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $2, 8(%%rax), %%xmm13, %%xmm13\n\t" \ - "vpinsrd $3, %%ecx, %%xmm13, %%xmm13\n\t" \ - "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ - "vmovdqa 16(%[KEY]), %%xmm12\n\t" \ - "vpxor " VAR(HR) ", %%xmm13, %%xmm1\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 32(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 48(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 64(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 80(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 96(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 112(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqa 128(%[KEY]), %%xmm0\n\t" \ - "vmovdqa 144(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm0\n\t" \ - "jl 31f\n\t" \ - "vmovdqa 176(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm0\n\t" \ - "jl 31f\n\t" \ - "vmovdqa 208(%[KEY]), %%xmm12\n\t" \ - "vaesenc %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vaesenc %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenc %%xmm12, %%xmm1, %%xmm1\n\t" \ - "vmovdqu 224(%[KEY]), %%xmm0\n\t" \ - "31:\n\t" \ - "vaesenclast %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t" \ - "vaesenclast %%xmm0, %%xmm1, %%xmm1\n\t" \ - "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ - "vmovdqu %%xmm1, " VAR(TR) "\n\t" \ - -#define CALC_IV_AVX2() \ - "# Calculate values when IV is not 12 bytes\n\t" \ - "# H = Encrypt X(=0)\n\t" \ - "vmovdqa 0(%[KEY]), " VAR(HR) "\n\t" \ - VAESENC_AVX(HR) \ - "vpshufb %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \ - "# Calc counter\n\t" \ - "# Initialization vector\n\t" \ - "cmpl $0, %%edx\n\t" \ - "movq $0, %%rcx\n\t" \ - "je 45f\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 44f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "43:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 43b\n\t" \ - "movl %[ibytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 45f\n\t" \ - "\n" \ - "44:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "42:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 42b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ - "\n" \ - "45:\n\t" \ - "# T = Encrypt counter\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "shll $3, %%edx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, %%xmm13, %%xmm13\n\t" \ - GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR) \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "# Encrypt counter\n\t" \ - "vmovdqa 0(%[KEY]), %%xmm4\n\t" \ - "vpxor %%xmm13, %%xmm4, %%xmm4\n\t" \ - VAESENC_AVX(%%xmm4) \ - "vmovdqu %%xmm4, " VAR(TR) "\n\t" - -#define CALC_AAD_AVX2() \ - "# Additional authentication data\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl $0, %%edx\n\t" \ - "je 25f\n\t" \ - "movq %[addt], %%rax\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "cmpl $16, %%edx\n\t" \ - "jl 24f\n\t" \ - "andl $0xfffffff0, %%edx\n\t" \ - "\n" \ - "23:\n\t" \ - "vmovdqu (%%rax,%%rcx,1), %%xmm4\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ - "addl $16, %%ecx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 23b\n\t" \ - "movl %[abytes], %%edx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "je 25f\n\t" \ - "\n" \ - "24:\n\t" \ - "subq $16, %%rsp\n\t" \ - "vpxor %%xmm4, %%xmm4, %%xmm4\n\t" \ - "xorl %%ebx, %%ebx\n\t" \ - "vmovdqu %%xmm4, (%%rsp)\n\t" \ - "22:\n\t" \ - "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \ - "movb %%r13b, (%%rsp,%%rbx,1)\n\t" \ - "incl %%ecx\n\t" \ - "incl %%ebx\n\t" \ - "cmpl %%edx, %%ecx\n\t" \ - "jl 22b\n\t" \ - "vmovdqu (%%rsp), %%xmm4\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" \ - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_FULL_AVX2(XR, %%xmm12, XR, HR) \ - "\n" \ - "25:\n\t" - -#define VAESENC_128_GHASH_AVX2(src, o) \ - "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" \ - "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" \ - /* src is either %%rcx or %%rdx */ \ - VAESENC_CTR() \ - VAESENC_XOR() \ - VAESENC_PCLMUL_AVX2_1(src, 16, (o-128), 112) \ - VAESENC_PCLMUL_AVX2_2(src, 32, (o-112), 96) \ - VAESENC_PCLMUL_AVX2_N(src, 48, (o- 96), 80) \ - VAESENC_PCLMUL_AVX2_N(src, 64, (o- 80), 64) \ - VAESENC_PCLMUL_AVX2_N(src, 80, (o- 64), 48) \ - VAESENC_PCLMUL_AVX2_N(src, 96, (o- 48), 32) \ - VAESENC_PCLMUL_AVX2_N(src, 112, (o- 32), 16) \ - VAESENC_PCLMUL_AVX2_N(src, 128, (o- 16), 0) \ - VAESENC_PCLMUL_AVX2_L(144) \ - "cmpl $11, %[nr]\n\t" \ - "vmovdqa 160(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - VAESENC() \ - VAESENC_SET(176) \ - "cmpl $13, %[nr]\n\t" \ - "vmovdqa 192(%[KEY]), %%xmm12\n\t" \ - "jl 4f\n\t" \ - VAESENC() \ - VAESENC_SET(208) \ - "vmovdqa 224(%[KEY]), %%xmm12\n\t" \ - "\n" \ -"4:\n\t" \ - VAESENC_LAST(%%rcx, %%rdx) - -#define AESENC_LAST15_ENC_AVX2() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $16, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl " VAR(KR) "\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, " VAR(KR) "\n\t" \ - "jl 51b\n\t" \ - "xorq %%r13, %%r13\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "je 53f\n\t" \ - "\n" \ - "52:\n\t" \ - "movb %%r13b, (%%rsp,%%rcx,1)\n\t" \ - "incl %%ecx\n\t" \ - "cmpl $16, %%ecx\n\t" \ - "jl 52b\n\t" \ - "53:\n\t" \ - "vmovdqu (%%rsp), %%xmm13\n\t" \ - "addq $16, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ - -#define AESENC_LAST15_DEC_AVX2() \ - "movl %[nbytes], %%ecx\n\t" \ - "movl %%ecx, %%edx\n\t" \ - "andl $0x0f, %%ecx\n\t" \ - "jz 55f\n\t" \ - "vmovdqu " VAR(CTR1) ", %%xmm13\n\t" \ - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" \ - "vpxor 0(%[KEY]), %%xmm13, %%xmm13\n\t" \ - VAESENC_AVX(%%xmm13) \ - "subq $32, %%rsp\n\t" \ - "xorl %%ecx, %%ecx\n\t" \ - "vmovdqu %%xmm13, (%%rsp)\n\t" \ - "vpxor %%xmm0, %%xmm0, %%xmm0\n\t" \ - "vmovdqu %%xmm0, 16(%%rsp)\n\t" \ - "\n" \ - "51:\n\t" \ - "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t" \ - "movb %%r13b, 16(%%rsp,%%rcx,1)\n\t" \ - "xorb (%%rsp,%%rcx,1), %%r13b\n\t" \ - "movb %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \ - "incl " VAR(KR) "\n\t" \ - "incl %%ecx\n\t" \ - "cmpl %%edx, " VAR(KR) "\n\t" \ - "jl 51b\n\t" \ - "53:\n\t" \ - "vmovdqu 16(%%rsp), %%xmm13\n\t" \ - "addq $32, %%rsp\n\t" \ - "vpshufb %[BSWAP_MASK], %%xmm13, %%xmm13\n\t" \ - "vpxor %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ - -#define CALC_TAG_AVX2() \ - "movl %[nbytes], %%edx\n\t" \ - "movl %[abytes], %%ecx\n\t" \ - "shlq $3, %%rdx\n\t" \ - "shlq $3, %%rcx\n\t" \ - "vpinsrq $0, %%rdx, %%xmm0, %%xmm0\n\t" \ - "vpinsrq $1, %%rcx, %%xmm0, %%xmm0\n\t" \ - "vpxor %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t" \ - GHASH_GFMUL_RED_AVX2(XR, HR, XR) \ - "vpshufb %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \ - "vpxor " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t" \ - - -static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, - const unsigned char* addt, - const unsigned char* ivec, unsigned char *tag, - unsigned int nbytes, unsigned int abytes, - unsigned int ibytes, unsigned int tbytes, - const unsigned char* key, int nr) -{ - register const unsigned char* iv asm("rax") = ivec; - register unsigned int ivLen asm("ebx") = ibytes; - - __asm__ __volatile__ ( - "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - /* Counter is xmm13 */ - "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" - "vpxor " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t" - "movl %[ibytes], %%edx\n\t" - "cmpl $12, %%edx\n\t" - "jne 35f\n\t" - CALC_IV_12_AVX2() - "jmp 39f\n\t" - "\n" - "35:\n\t" - CALC_IV_AVX2() - "\n" - "39:\n\t" - - CALC_AAD_AVX2() - - "# Calculate counter and H\n\t" - "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" - "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" - "vpslldq $8, %%xmm5, %%xmm5\n\t" - "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" - "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" - "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" - "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - - "xorl " VAR(KR) ", " VAR(KR) "\n\t" - -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) - "cmpl $128, %[nbytes]\n\t" - "movl %[nbytes], %%r13d\n\t" - "jl 5f\n\t" - "andl $0xffffff80, %%r13d\n\t" - - CALC_HT_8_AVX2() - - "# First 128 bytes of input\n\t" - VAESENC_128() - - "cmpl $128, %%r13d\n\t" - "movl $128, " VAR(KR) "\n\t" - "jle 2f\n\t" - - "# More 128 bytes of input\n\t" - "\n" - "3:\n\t" - VAESENC_128_GHASH_AVX2(%%rdx, 0) - "addl $128, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 3b\n\t" - "\n" - "2:\n\t" - "vmovdqa %[BSWAP_MASK], %%xmm13\n\t" - "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t" - "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t" - "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t" - "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t" - "vpshufb %%xmm13, %%xmm8, %%xmm8\n\t" - "vpshufb %%xmm13, %%xmm9, %%xmm9\n\t" - "vpshufb %%xmm13, %%xmm10, %%xmm10\n\t" - "vpshufb %%xmm13, %%xmm11, %%xmm11\n\t" - "vpxor %%xmm2, %%xmm4, %%xmm4\n\t" - - GHASH_GFMUL_RED_8_AVX2() - - "vmovdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t" - "\n" - "5:\n\t" - "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, " VAR(KR) "\n\t" - "jge 55f\n\t" -#endif - - "movl %[nbytes], %%r13d\n\t" - "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 14f\n\t" - - VAESENC_BLOCK_AVX2() - "addl $16, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 13f\n\t" - "vmovdqa %[MOD2_128], %%xmm0\n\t" - "\n" - "12:\n\t" - "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" - "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" - "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" - VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, XR, CTR1) - "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" - "vpshufb %[BSWAP_MASK], %%xmm4, %%xmm4\n\t" - "addl $16, " VAR(KR) "\n\t" - "vpxor %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 12b\n\t" - "\n" - "13:\n\t" - GHASH_GFMUL_RED_AVX2(XR, HR, XR) - "\n" - "14:\n\t" - - AESENC_LAST15_ENC_AVX2() - "\n" - "55:\n\t" - - CALC_TAG_AVX2() - STORE_TAG_AVX() - "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - "vzeroupper\n\t" - - : - : [KEY] "r" (key), - [in] "r" (in), [out] "r" (out), [nr] "r" (nr), - [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), - [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes), - [tag] "r" (tag), - [BSWAP_MASK] "m" (BSWAP_MASK), - [BSWAP_EPI64] "m" (BSWAP_EPI64), - [ONE] "m" (ONE), -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) - [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), - [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), - [EIGHT] "m" (EIGHT), -#endif - [MOD2_128] "m" (MOD2_128) - : "xmm15", "xmm14", "xmm13", "xmm12", - "xmm0", "xmm1", "xmm2", "xmm3", "memory", - "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", - "rcx", "rdx", "r13" - ); -} -#endif /* HAVE_INTEL_AVX2 */ -#endif /* HAVE_INTEL_AVX1 */ - -#ifdef HAVE_AES_DECRYPT -/* Figure 10. AES-GCM – Decrypt With Single Block Ghash at a Time */ - -static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, - const unsigned char* addt, - const unsigned char* ivec, const unsigned char *tag, - int nbytes, int abytes, int ibytes, int tbytes, - const unsigned char* key, int nr, int* res) -{ - register const unsigned char* iv asm("rax") = ivec; - register int ivLen asm("ebx") = ibytes; - register int tagLen asm("edx") = tbytes; - - __asm__ __volatile__ ( - "pushq %%rdx\n\t" - "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - /* Counter is xmm13 */ - "pxor %%xmm13, %%xmm13\n\t" - "pxor %%xmm15, %%xmm15\n\t" - "movl %[ibytes], %%edx\n\t" - "cmpl $12, %%edx\n\t" - "jne 35f\n\t" - CALC_IV_12() - "\n" - "35:\n\t" - CALC_IV() - "\n" - "39:\n\t" - - CALC_AAD() - - "# Calculate counter and H\n\t" - "pshufb %[BSWAP_EPI64], %%xmm13\n\t" - "movdqa " VAR(HR) ", %%xmm5\n\t" - "paddd %[ONE], %%xmm13\n\t" - "movdqa " VAR(HR) ", %%xmm4\n\t" - "movdqu %%xmm13, " VAR(CTR1) "\n\t" - "psrlq $63, %%xmm5\n\t" - "psllq $1, %%xmm4\n\t" - "pslldq $8, %%xmm5\n\t" - "por %%xmm5, %%xmm4\n\t" - "pshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" - "psrad $31, " VAR(HR) "\n\t" - "pand %[MOD2_128], " VAR(HR) "\n\t" - "pxor %%xmm4, " VAR(HR) "\n\t" - - "xorl " VAR(KR) ", " VAR(KR) "\n\t" - -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - "cmpl $128, %[nbytes]\n\t" - "jl 5f\n\t" - - CALC_HT_8_AVX() - - "movl %[nbytes], %%r13d\n\t" - "andl $0xffffff80, %%r13d\n\t" - "\n" - "2:\n\t" - AESENC_128_GHASH_AVX(%%rcx, 128) - "addl $128, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 2b\n\t" - - "movdqa %%xmm2, " VAR(XR) "\n\t" - "movdqu (%%rsp), " VAR(HR) "\n\t" - "5:\n\t" - "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, " VAR(KR) "\n\t" - "jge 55f\n\t" -#endif - "movl %[nbytes], %%r13d\n\t" - "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 13f\n\t" - - "\n" - "12:\n\t" - "leaq (%[in]," VAR(KR64) ",1), %%rcx\n\t" - "leaq (%[out]," VAR(KR64) ",1), %%rdx\n\t" - "movdqu (%%rcx), %%xmm1\n\t" - "movdqa " VAR(HR) ", %%xmm0\n\t" - "pshufb %[BSWAP_MASK], %%xmm1\n\t" - "pxor " VAR(XR) ", %%xmm1\n\t" - AESENC_GFMUL(%%rcx, %%rdx, %%xmm0, %%xmm1) - "addl $16, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 12b\n\t" - "\n" - "13:\n\t" - - AESENC_LAST15_DEC_AVX() - "\n" - "55:\n\t" - - CALC_TAG() - "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - "popq %%rdx\n\t" - CMP_TAG() - - : - : [KEY] "r" (key), - [in] "r" (in), [out] "r" (out), [nr] "r" (nr), - [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), - [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen), - [tag] "r" (tag), [res] "r" (res), - [BSWAP_MASK] "m" (BSWAP_MASK), - [BSWAP_EPI64] "m" (BSWAP_EPI64), - [ONE] "m" (ONE), -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), - [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), - [EIGHT] "m" (EIGHT), -#endif - [MOD2_128] "m" (MOD2_128) - : "xmm15", "xmm14", "xmm13", "xmm12", - "xmm0", "xmm1", "xmm2", "xmm3", "memory", - "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", - "rcx", "r13" - ); -} - -#ifdef HAVE_INTEL_AVX1 -static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, - const unsigned char* addt, - const unsigned char* ivec, - const unsigned char *tag, int nbytes, - int abytes, int ibytes, int tbytes, - const unsigned char* key, int nr, int* res) -{ - register const unsigned char* iv asm("rax") = ivec; - register int ivLen asm("ebx") = ibytes; - register int tagLen asm("edx") = tbytes; - - __asm__ __volatile__ ( - "pushq %%rdx\n\t" - "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - /* Counter is xmm13 */ - "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" - "vpxor %%xmm15, %%xmm15, %%xmm15\n\t" - "movl %[ibytes], %%edx\n\t" - "cmpl $12, %%edx\n\t" - "jne 35f\n\t" - CALC_IV_12_AVX1() - "\n" - "35:\n\t" - CALC_IV_AVX1() - "\n" - "39:\n\t" - - CALC_AAD_AVX1() - - "# Calculate counter and H\n\t" - "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" - "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" - "vpslldq $8, %%xmm5, %%xmm5\n\t" - "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" - "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" - "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" - "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - - "xorl " VAR(KR) ", " VAR(KR) "\n\t" - -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - "cmpl $128, %[nbytes]\n\t" - "jl 5f\n\t" - - CALC_HT_8_AVX1() - - "movl %[nbytes], %%r13d\n\t" - "andl $0xffffff80, %%r13d\n\t" - "\n" - "2:\n\t" - VAESENC_128_GHASH_AVX1(%%rcx, 128) - "addl $128, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 2b\n\t" - - "vmovdqa %%xmm2, " VAR(XR) "\n\t" - "vmovdqu (%%rsp), " VAR(HR) "\n\t" - "5:\n\t" - "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, " VAR(KR) "\n\t" - "jge 55f\n\t" -#endif - "movl %[nbytes], %%r13d\n\t" - "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 13f\n\t" - - "\n" - "12:\n\t" - "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" - "vmovdqa " VAR(HR) ", %%xmm0\n\t" - "vpshufb %[BSWAP_MASK], %%xmm9, %%xmm1\n\t" - "vpxor " VAR(XR) ", %%xmm1, %%xmm1\n\t" - VAESENC_GFMUL(%%xmm9, %%xmm0, %%xmm1) - "addl $16, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 12b\n\t" - "\n" - "13:\n\t" - - AESENC_LAST15_DEC_AVX1() - "\n" - "55:\n\t" - - CALC_TAG_AVX1() - "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - "popq %%rdx\n\t" - CMP_TAG_AVX() - "vzeroupper\n\t" - - : - : [KEY] "r" (key), - [in] "r" (in), [out] "r" (out), [nr] "r" (nr), - [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), - [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen), - [tag] "r" (tag), [res] "r" (res), - [BSWAP_MASK] "m" (BSWAP_MASK), - [BSWAP_EPI64] "m" (BSWAP_EPI64), - [ONE] "m" (ONE), -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL) - [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), - [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), - [EIGHT] "m" (EIGHT), -#endif - [MOD2_128] "m" (MOD2_128) - : "xmm15", "xmm14", "xmm13", "xmm12", - "xmm0", "xmm1", "xmm2", "xmm3", "memory", - "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", - "rcx", "r13" - ); -} - -#ifdef HAVE_INTEL_AVX2 -static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, - const unsigned char* addt, - const unsigned char* ivec, - const unsigned char *tag, int nbytes, - int abytes, int ibytes, int tbytes, - const unsigned char* key, int nr, int* res) -{ - register const unsigned char* iv asm("rax") = ivec; - register int ivLen asm("ebx") = ibytes; - register int tagLen asm("edx") = tbytes; - - __asm__ __volatile__ ( - "pushq %%rdx\n\t" - "subq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - /* Counter is xmm13 */ - "vpxor %%xmm13, %%xmm13, %%xmm13\n\t" - "vpxor %%xmm15, %%xmm15, %%xmm15\n\t" - "movl %[ibytes], %%edx\n\t" - "cmpl $12, %%edx\n\t" - "jne 35f\n\t" - CALC_IV_12_AVX2() - "jmp 39f\n\t" - "\n" - "35:\n\t" - CALC_IV_AVX2() - "\n" - "39:\n\t" - - CALC_AAD_AVX2() - - "# Calculate counter and H\n\t" - "vpsrlq $63, " VAR(HR) ", %%xmm5\n\t" - "vpsllq $1, " VAR(HR) ", %%xmm4\n\t" - "vpslldq $8, %%xmm5, %%xmm5\n\t" - "vpor %%xmm5, %%xmm4, %%xmm4\n\t" - "vpshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t" - "vpsrad $31, " VAR(HR) ", " VAR(HR) "\n\t" - "vpshufb %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t" - "vpand %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t" - "vpaddd %[ONE], %%xmm13, %%xmm13\n\t" - "vpxor %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t" - "vmovdqu %%xmm13, " VAR(CTR1) "\n\t" - - "xorl " VAR(KR) ", " VAR(KR) "\n\t" - -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) - "cmpl $128, %[nbytes]\n\t" - "jl 5f\n\t" - - CALC_HT_8_AVX2() - - "movl %[nbytes], %%r13d\n\t" - "andl $0xffffff80, %%r13d\n\t" - "\n" - "2:\n\t" - VAESENC_128_GHASH_AVX2(%%rcx, 128) - "addl $128, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 2b\n\t" - - "vmovdqa %%xmm2, " VAR(XR) "\n\t" - "vmovdqu (%%rsp), " VAR(HR) "\n\t" - "5:\n\t" - "movl %[nbytes], %%edx\n\t" - "cmpl %%edx, " VAR(KR) "\n\t" - "jge 55f\n\t" -#endif - "movl %[nbytes], %%r13d\n\t" - "andl $0xfffffff0, %%r13d\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jge 13f\n\t" - - "vmovdqa %[MOD2_128], %%xmm0\n\t" - "\n" - "12:\n\t" - "vmovdqu (%[in]," VAR(KR64) ",1), %%xmm9\n\t" - "vmovdqu " VAR(CTR1) ", %%xmm5\n\t" - "vpshufb %[BSWAP_MASK], %%xmm9, %%xmm1\n\t" - "vpshufb %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t" - "vpaddd %[ONE], %%xmm5, %%xmm5\n\t" - "vpxor " VAR(XR) ", %%xmm1, %%xmm1\n\t" - "vmovdqu %%xmm5, " VAR(CTR1) "\n\t" - VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, %%xmm1, CTR1) - "vmovdqu %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" - "addl $16, " VAR(KR) "\n\t" - "cmpl %%r13d, " VAR(KR) "\n\t" - "jl 12b\n\t" - "\n" - "13:\n\t" - - AESENC_LAST15_DEC_AVX2() - "\n" - "55:\n\t" - - CALC_TAG_AVX2() - "addq $" VAR(STACK_OFFSET) ", %%rsp\n\t" - "popq %%rdx\n\t" - CMP_TAG_AVX() - "vzeroupper\n\t" - - : - : [KEY] "r" (key), - [in] "r" (in), [out] "r" (out), [nr] "r" (nr), - [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt), - [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen), - [tag] "r" (tag), [res] "r" (res), - [BSWAP_MASK] "m" (BSWAP_MASK), - [BSWAP_EPI64] "m" (BSWAP_EPI64), - [ONE] "m" (ONE), -#if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL) - [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR), - [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN), - [EIGHT] "m" (EIGHT), -#endif - [MOD2_128] "m" (MOD2_128) - : "xmm15", "xmm14", "xmm13", "xmm12", - "xmm0", "xmm1", "xmm2", "xmm3", "memory", - "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", - "rcx", "r13" - ); -} -#endif /* HAVE_INTEL_AVX2 */ -#endif /* HAVE_INTEL_AVX1 */ -#endif /* HAVE_AES_DECRYPT */ - -#else /* _MSC_VER */ +static const __m128i BSWAP_EPI64 = + M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f); +static const __m128i BSWAP_MASK = + M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607); + + /* The following are for MSC based builds which do not allow * inline assembly. Intrinsic functions are used instead. */ @@ -7013,7 +4607,7 @@ __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; #endif - if (ibytes == 12) + if (ibytes == GCM_NONCE_MID_SZ) aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); else aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); @@ -7451,7 +5045,7 @@ __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; #endif /* AES_GCM_AESNI_NO_UNROLL */ - if (ibytes == 12) + if (ibytes == GCM_NONCE_MID_SZ) aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); else aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); @@ -8070,6 +5664,13 @@ x[1] ^= bigA[1]; GMULT(x, bigH); } +#ifdef OPENSSL_EXTRA + /* store AAD partial tag for next call */ + aes->aadH[0] = (word32)((x[0] & 0xFFFFFFFF00000000) >> 32); + aes->aadH[1] = (word32)(x[0] & 0xFFFFFFFF); + aes->aadH[2] = (word32)((x[1] & 0xFFFFFFFF00000000) >> 32); + aes->aadH[3] = (word32)(x[1] & 0xFFFFFFFF); +#endif } /* Hash in C, the Ciphertext */ @@ -8077,6 +5678,13 @@ word64 bigC[2]; blocks = cSz / AES_BLOCK_SIZE; partial = cSz % AES_BLOCK_SIZE; +#ifdef OPENSSL_EXTRA + /* Start from last AAD partial tag */ + if(aes->aadLen) { + x[0] = ((word64)aes->aadH[0]) << 32 | aes->aadH[1]; + x[1] = ((word64)aes->aadH[2]) << 32 | aes->aadH[3]; + } +#endif while (blocks--) { XMEMCPY(bigC, c, AES_BLOCK_SIZE); #ifdef LITTLE_ENDIAN_ORDER @@ -8103,7 +5711,10 @@ { word64 len[2]; len[0] = aSz; len[1] = cSz; - +#ifdef OPENSSL_EXTRA + if (aes->aadLen) + len[0] = (word64)aes->aadLen; +#endif /* Lengths are in bytes. Convert to bits. */ len[0] *= 8; len[1] *= 8; @@ -8269,7 +5880,7 @@ #endif /* end GCM_WORD32 */ -#if !defined(WOLFSSL_XILINX_CRYPT) +#if !defined(WOLFSSL_XILINX_CRYPT) && !defined(WOLFSSL_AFALG_XILINX_AES) #ifdef FREESCALE_LTC_AES_GCM int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, @@ -8280,7 +5891,7 @@ word32 keySize; /* argument checks */ - if (aes == NULL || authTagSz > AES_BLOCK_SIZE) { + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) { return BAD_FUNC_ARG; } @@ -8298,43 +5909,66 @@ return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; } + #else -#if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \ - defined(WOLFSSL_STM32F7) || \ - defined(WOLFSSL_STM32L4)) - -static WC_INLINE int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, - word32 sz, const byte* iv, word32 ivSz, - byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) + +#ifdef STM32_CRYPTO_AES_GCM + +/* this function supports inline encrypt */ +static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { int ret; +#ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; +#else + word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)]; +#endif word32 keySize; - byte initialCounter[AES_BLOCK_SIZE]; - #ifdef WOLFSSL_STM32_CUBEMX - CRYP_HandleTypeDef hcryp; - #else - byte keyCopy[AES_BLOCK_SIZE * 2]; - #endif /* WOLFSSL_STM32_CUBEMX */ - int status = 0; + int status = HAL_OK; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + byte tag[AES_BLOCK_SIZE]; + byte partialBlock[AES_BLOCK_SIZE]; + byte ctr[AES_BLOCK_SIZE]; byte* authInPadded = NULL; - byte tag[AES_BLOCK_SIZE]; int authPadSz; ret = wc_AesGetKeySize(aes, &keySize); if (ret != 0) return ret; - XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); - XMEMCPY(initialCounter, iv, ivSz); - initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START; - - /* pad authIn if it is not a block multiple */ - if ((authInSz % AES_BLOCK_SIZE) != 0) { +#ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; +#endif + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + XMEMSET(ctr, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(ctr, iv, ivSz); + ctr[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE); + } + /* Hardware requires counter + 1 */ + IncrementGcmCounter(ctr); + + if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { + /* Need to pad the AAD to a full block with zeros. */ authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; - /* Need to pad the AAD to a full block with zeros. */ - authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + authInPadded = (byte*)XMALLOC(authPadSz, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); if (authInPadded == NULL) { + wolfSSL_CryptHwMutexUnLock(); return MEMORY_E; } XMEMSET(authInPadded, 0, authPadSz); @@ -8344,32 +5978,12 @@ authInPadded = (byte*)authIn; } - #ifdef WOLFSSL_STM32_CUBEMX - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch (keySize) { - case 16: /* 128-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_128B; - break; -#ifdef CRYP_KEYSIZE_192B - case 24: /* 192-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_192B; - break; -#endif - case 32: /* 256-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; - } - hcryp.Instance = CRYP; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.pKey = (byte*)aes->key; - hcryp.Init.pInitVect = initialCounter; - hcryp.Init.Header = authInPadded; + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr; + hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; hcryp.Init.HeaderSize = authInSz; -#ifdef WOLFSSL_STM32L4 +#ifdef STM32_CRYPTO_AES_ONLY /* Set the CRYP parameters */ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; @@ -8382,24 +5996,59 @@ /* GCM header phase */ hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); - if (status == HAL_OK) { - /* GCM payload phase */ - hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; - status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, sz, out, STM32_HAL_TIMEOUT); - if (status == HAL_OK) { - /* GCM final phase */ - hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; - status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); - } - } + } + if (status == HAL_OK) { + /* GCM payload phase - blocks */ + hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; + if (blocks) { + status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* GCM final phase */ + hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); + } +#elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_GCM; + ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; + HAL_CRYP_Init(&hcryp); + + /* GCM payload phase - can handle partial blocks */ + status = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, + (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, + STM32_HAL_TIMEOUT); } #else HAL_CRYP_Init(&hcryp); - - status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in, sz, - out, STM32_HAL_TIMEOUT); - /* Compute the authTag */ + if (blocks) { + /* GCM payload phase - blocks */ + status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } if (status == HAL_OK) { + /* Compute the authTag */ status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); } #endif @@ -8407,29 +6056,46 @@ if (status != HAL_OK) ret = AES_GCM_AUTH_E; HAL_CRYP_DeInit(&hcryp); -#else - ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize); - status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)initialCounter, - (uint8_t*)keyCopy, keySize * 8, - (uint8_t*)in, sz, - (uint8_t*)authInPadded,authInSz, - (uint8_t*)out, tag); + +#else /* STD_PERI_LIB */ + ByteReverseWords(keyCopy, (word32*)aes->key, keySize); + status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)ctr, + (uint8_t*)keyCopy, keySize * 8, + (uint8_t*)in, sz, + (uint8_t*)authInPadded, authInSz, + (uint8_t*)out, tag); if (status != SUCCESS) ret = AES_GCM_AUTH_E; #endif /* WOLFSSL_STM32_CUBEMX */ - /* authTag may be shorter than AES_BLOCK_SZ, store separately */ - if (ret == 0) - XMEMCPY(authTag, tag, authTagSz); - - /* We only allocate extra memory if authInPadded is not a multiple of AES_BLOCK_SZ */ - if (authInPadded != NULL && authInSz != authPadSz) { + if (ret == 0) { + /* return authTag */ + if (authTag) { + /* STM32 GCM won't compute Auth correctly for partial or + when IV != 12, so use software here */ + if (sz == 0 || partial != 0 || ivSz != GCM_NONCE_MID_SZ) { + DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */ + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncrypt(aes, ctr, tag); + xorbuf(authTag, tag, authTagSz); + } + else { + XMEMCPY(authTag, tag, authTagSz); + } + } + } + + /* Free memory if not a multiple of AES_BLOCK_SZ */ + if (authInPadded != authIn) { XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); } + wolfSSL_CryptHwMutexUnLock(); + return ret; } -#endif /* STM32_CRYPTO */ + +#endif /* STM32_CRYPTO_AES_GCM */ #ifdef WOLFSSL_AESNI int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, @@ -8453,21 +6119,31 @@ byte initialCounter[AES_BLOCK_SIZE]; byte *ctr; byte scratch[AES_BLOCK_SIZE]; - +#ifdef OPENSSL_EXTRA + word32 aadTemp; +#endif ctr = counter; XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + XMEMSET(scratch, 0, AES_BLOCK_SIZE); if (ivSz == GCM_NONCE_MID_SZ) { XMEMCPY(initialCounter, iv, ivSz); initialCounter[AES_BLOCK_SIZE - 1] = 1; } else { +#ifdef OPENSSL_EXTRA + aadTemp = aes->aadLen; + aes->aadLen = 0; +#endif GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); +#ifdef OPENSSL_EXTRA + aes->aadLen = aadTemp; +#endif } XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); #ifdef WOLFSSL_PIC32MZ_CRYPT if (blocks) { - /* use intitial IV for PIC32 HW, but don't use it below */ + /* use initial IV for HW, but don't use it below */ XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); ret = wc_Pic32AesCrypt( @@ -8483,7 +6159,7 @@ #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) /* some hardware acceleration can gain performance from doing AES encryption * of the whole buffer at once */ - if (c != p) { /* can not handle inline encryption */ + if (c != p && blocks > 0) { /* can not handle inline encryption */ while (blocks--) { IncrementGcmCounter(ctr); XMEMCPY(c, ctr, AES_BLOCK_SIZE); @@ -8497,11 +6173,11 @@ p += AES_BLOCK_SIZE * blocks; } else -#endif /* HAVE_AES_ECB */ +#endif /* HAVE_AES_ECB && !WOLFSSL_PIC32MZ_CRYPT */ while (blocks--) { IncrementGcmCounter(ctr); - #ifndef WOLFSSL_PIC32MZ_CRYPT + #if !defined(WOLFSSL_PIC32MZ_CRYPT) wc_AesEncrypt(aes, ctr, scratch); xorbuf(scratch, p, AES_BLOCK_SIZE); XMEMCPY(c, scratch, AES_BLOCK_SIZE); @@ -8516,21 +6192,28 @@ xorbuf(scratch, p, partial); XMEMCPY(c, scratch, partial); } - - GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); - wc_AesEncrypt(aes, initialCounter, scratch); - xorbuf(authTag, scratch, authTagSz); + if (authTag) { + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncrypt(aes, initialCounter, scratch); + xorbuf(authTag, scratch, authTagSz); +#ifdef OPENSSL_EXTRA + if (!in && !sz) + /* store AAD size for next call */ + aes->aadLen = authInSz; +#endif + } return ret; } +/* Software AES - GCM Encrypt */ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { /* argument checks */ - if (aes == NULL || authTagSz > AES_BLOCK_SIZE) { + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) { return BAD_FUNC_ARG; } @@ -8539,27 +6222,13 @@ return BAD_FUNC_ARG; } -#if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \ - defined(WOLFSSL_STM32F7) || \ - defined(WOLFSSL_STM32L4)) - - /* additional argument checks - STM32 HW only supports 12 byte IV */ - if (ivSz != GCM_NONCE_MID_SZ) { - return BAD_FUNC_ARG; - } - - /* STM32 HW AES-GCM requires / assumes inputs are a multiple of block size. - * We can avoid this by zero padding (authIn) AAD, but zero-padded plaintext - * will be encrypted and output incorrectly, causing a bad authTag. - * We will use HW accelerated AES-GCM if plain%AES_BLOCK_SZ==0. - * Otherwise, we will use accelerated AES_CTR for encrypt, and then - * perform GHASH in software. - * See NIST SP 800-38D */ - - /* Plain text is a multiple of block size, so use HW-Accelerated AES_GCM */ - if (sz % AES_BLOCK_SIZE == 0) { - return wc_AesGcmEncrypt_STM32(aes, out, in, sz, iv, ivSz, - authTag, authTagSz, authIn, authInSz); +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesGcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ } #endif @@ -8572,13 +6241,13 @@ #ifdef HAVE_CAVIUM_V if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ return NitroxAesGcmEncrypt(aes, out, in, sz, - (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, authTag, authTagSz, authIn, authInSz); } #endif #elif defined(HAVE_INTEL_QA) return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz, - (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, authTag, authTagSz, authIn, authInSz); #else /* WOLFSSL_ASYNC_CRYPT_TEST */ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_ENCRYPT)) { @@ -8599,7 +6268,17 @@ } #endif /* WOLFSSL_ASYNC_CRYPT */ - /* Software AES-GCM */ +#ifdef STM32_CRYPTO_AES_GCM + /* The STM standard peripheral library API's doesn't support partial blocks */ + #ifdef STD_PERI_LIB + if (partial == 0) + #endif + { + return wc_AesGcmEncrypt_STM32( + aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } +#endif /* STM32_CRYPTO_AES_GCM */ #ifdef WOLFSSL_AESNI #ifdef HAVE_INTEL_AVX2 @@ -8633,6 +6312,8 @@ #endif + +/* AES GCM Decrypt */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) #ifdef FREESCALE_LTC_AES_GCM int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, @@ -8645,8 +6326,12 @@ status_t status; /* argument checks */ - if (aes == NULL || out == NULL || in == NULL || iv == NULL || - authTag == NULL || authTagSz > AES_BLOCK_SIZE) { + /* If the sz is non-zero, both in and out must be set. If sz is 0, + * in and out are don't cares, as this is is the GMAC case. */ + if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || + authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || + ivSz == 0) { + return BAD_FUNC_ARG; } @@ -8660,72 +6345,66 @@ return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; } -#elif defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \ - defined(WOLFSSL_STM32F7) || \ - defined(WOLFSSL_STM32L4)) -int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - const byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) + +#else + +#ifdef STM32_CRYPTO_AES_GCM +/* this function supports inline decrypt */ +static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, + const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { int ret; +#ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; +#else + word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)]; +#endif word32 keySize; - #ifdef WOLFSSL_STM32_CUBEMX - CRYP_HandleTypeDef hcryp; - #else - byte keyCopy[AES_BLOCK_SIZE * 2]; - #endif /* WOLFSSL_STM32_CUBEMX */ - int status; - int inPadSz, authPadSz; + int status = HAL_OK; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; byte tag[AES_BLOCK_SIZE]; - byte *inPadded = NULL; - byte *authInPadded = NULL; - byte initialCounter[AES_BLOCK_SIZE]; - - /* argument checks */ - if (aes == NULL || out == NULL || in == NULL || iv == NULL || - authTag == NULL || authTagSz > AES_BLOCK_SIZE) { - return BAD_FUNC_ARG; - } + byte partialBlock[AES_BLOCK_SIZE]; + byte ctr[AES_BLOCK_SIZE]; + byte* authInPadded = NULL; + int authPadSz; ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + +#ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; +#endif + + ret = wolfSSL_CryptHwMutexLock(); if (ret != 0) { return ret; } - /* additional argument checks - STM32 HW only supports 12 byte IV */ - if (ivSz != GCM_NONCE_MID_SZ) { - return BAD_FUNC_ARG; - } - - XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); - XMEMCPY(initialCounter, iv, ivSz); - initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START; - - /* Need to pad the AAD and input cipher text to a full block size since - * CRYP_AES_GCM will assume these are a multiple of AES_BLOCK_SIZE. - * It is okay to pad with zeros because GCM does this before GHASH already. - * See NIST SP 800-38D */ - - if ((sz % AES_BLOCK_SIZE) > 0) { - inPadSz = ((sz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; - inPadded = XMALLOC(inPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); - if (inPadded == NULL) { - return MEMORY_E; - } - XMEMSET(inPadded, 0, inPadSz); - XMEMCPY(inPadded, in, sz); - } else { - inPadSz = sz; - inPadded = (byte*)in; - } - - if ((authInSz % AES_BLOCK_SIZE) > 0) { + XMEMSET(ctr, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(ctr, iv, ivSz); + ctr[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE); + } + /* Hardware requires counter + 1 */ + IncrementGcmCounter(ctr); + + if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { + /* Need to pad the AAD to a full block with zeros. */ authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; - authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + authInPadded = (byte*)XMALLOC(authPadSz, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); if (authInPadded == NULL) { - if (inPadded != NULL && inPadSz != sz) - XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + wolfSSL_CryptHwMutexUnLock(); return MEMORY_E; } XMEMSET(authInPadded, 0, authPadSz); @@ -8736,30 +6415,11 @@ } #ifdef WOLFSSL_STM32_CUBEMX - XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef)); - switch(keySize) { - case 16: /* 128-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_128B; - break; -#ifdef CRYP_KEYSIZE_192B - case 24: /* 192-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_192B; - break; -#endif - case 32: /* 256-bit key */ - hcryp.Init.KeySize = CRYP_KEYSIZE_256B; - break; - default: - break; - } - hcryp.Instance = CRYP; - hcryp.Init.DataType = CRYP_DATATYPE_8B; - hcryp.Init.pKey = (byte*)aes->key; - hcryp.Init.pInitVect = initialCounter; - hcryp.Init.Header = authInPadded; + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr; + hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; hcryp.Init.HeaderSize = authInSz; -#ifdef WOLFSSL_STM32L4 +#ifdef STM32_CRYPTO_AES_ONLY /* Set the CRYP parameters */ hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT; @@ -8770,29 +6430,61 @@ status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); if (status == HAL_OK) { /* GCM header phase */ - hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; + hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); - if (status == HAL_OK) { - /* GCM payload phase */ - hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; - status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)inPadded, sz, inPadded, - STM32_HAL_TIMEOUT); - if (status == HAL_OK) { - /* GCM final phase */ - hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; - status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, - STM32_HAL_TIMEOUT); - } - } + } + if (status == HAL_OK) { + /* GCM payload phase - blocks */ + hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; + if (blocks) { + status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* GCM final phase */ + hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); + } +#elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_GCM; + ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; + HAL_CRYP_Init(&hcryp); + + /* GCM payload phase - can handle partial blocks */ + status = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, + (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, + STM32_HAL_TIMEOUT); } #else HAL_CRYP_Init(&hcryp); - /* Use inPadded for output buffer instead of - * out so that we don't overflow our size. */ - status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)inPadded, - sz, inPadded, STM32_HAL_TIMEOUT); - /* Compute the authTag */ + if (blocks) { + /* GCM payload phase - blocks */ + status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } if (status == HAL_OK) { + /* Compute the authTag */ status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); } #endif @@ -8801,37 +6493,46 @@ ret = AES_GCM_AUTH_E; HAL_CRYP_DeInit(&hcryp); -#else - ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize); + +#else /* STD_PERI_LIB */ + ByteReverseWords(keyCopy, (word32*)aes->key, aes->keylen); /* Input size and auth size need to be the actual sizes, even though * they are not block aligned, because this length (in bits) is used - * in the final GHASH. Use inPadded for output buffer instead of - * out so that we don't overflow our size. */ - status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)initialCounter, - (uint8_t*)keyCopy, keySize * 8, - (uint8_t*)inPadded, sz, - (uint8_t*)authInPadded,authInSz, - (uint8_t*)inPadded, tag); + * in the final GHASH. */ + status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)ctr, + (uint8_t*)keyCopy, keySize * 8, + (uint8_t*)in, sz, + (uint8_t*)authInPadded, authInSz, + (uint8_t*)out, tag); if (status != SUCCESS) ret = AES_GCM_AUTH_E; #endif /* WOLFSSL_STM32_CUBEMX */ - if (ret == 0 && ConstantCompare(authTag, tag, authTagSz) == 0) { - /* Only keep the decrypted data if authTag success. */ - XMEMCPY(out, inPadded, sz); - ret = 0; /* success */ - } - - /* only allocate padding buffers if the inputs are not a multiple of block sz */ - if (inPadded != NULL && inPadSz != sz) - XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER); - if (authInPadded != NULL && authPadSz != authInSz) + /* STM32 GCM hardware only supports IV of 12 bytes, so use software for auth */ + if (sz == 0 || ivSz != GCM_NONCE_MID_SZ) { + DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */ + GHASH(aes, authIn, authInSz, in, sz, tag, sizeof(tag)); + wc_AesEncrypt(aes, ctr, partialBlock); + xorbuf(tag, partialBlock, sizeof(tag)); + } + + if (ConstantCompare(authTag, tag, authTagSz) != 0) { + ret = AES_GCM_AUTH_E; + } + + /* Free memory if not a multiple of AES_BLOCK_SZ */ + if (authInPadded != authIn) { XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + wolfSSL_CryptHwMutexUnLock(); return ret; } -#else + +#endif /* STM32_CRYPTO_AES_GCM */ + #ifdef WOLFSSL_AESNI int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, @@ -8856,15 +6557,24 @@ byte scratch[AES_BLOCK_SIZE]; byte Tprime[AES_BLOCK_SIZE]; byte EKY0[AES_BLOCK_SIZE]; +#ifdef OPENSSL_EXTRA + word32 aadTemp; +#endif ctr = counter; - XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); if (ivSz == GCM_NONCE_MID_SZ) { XMEMCPY(initialCounter, iv, ivSz); initialCounter[AES_BLOCK_SIZE - 1] = 1; } else { +#ifdef OPENSSL_EXTRA + aadTemp = aes->aadLen; + aes->aadLen = 0; +#endif GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); +#ifdef OPENSSL_EXTRA + aes->aadLen = aadTemp; +#endif } XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); @@ -8873,13 +6583,20 @@ wc_AesEncrypt(aes, ctr, EKY0); xorbuf(Tprime, EKY0, sizeof(Tprime)); +#ifdef OPENSSL_EXTRA + if (!out) { + /* authenticated, non-confidential data */ + /* store AAD size for next call */ + aes->aadLen = authInSz; + } +#endif if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { return AES_GCM_AUTH_E; } -#ifdef WOLFSSL_PIC32MZ_CRYPT +#if defined(WOLFSSL_PIC32MZ_CRYPT) if (blocks) { - /* use intitial IV for PIC32 HW, but don't use it below */ + /* use initial IV for HW, but don't use it below */ XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); ret = wc_Pic32AesCrypt( @@ -8895,7 +6612,7 @@ #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) /* some hardware acceleration can gain performance from doing AES encryption * of the whole buffer at once */ - if (c != p) { /* can not handle inline decryption */ + if (c != p && blocks > 0) { /* can not handle inline decryption */ while (blocks--) { IncrementGcmCounter(ctr); XMEMCPY(p, ctr, AES_BLOCK_SIZE); @@ -8904,15 +6621,16 @@ /* reset number of blocks and then do encryption */ blocks = sz / AES_BLOCK_SIZE; + wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); xorbuf(out, c, AES_BLOCK_SIZE * blocks); c += AES_BLOCK_SIZE * blocks; } else -#endif /* HAVE_AES_ECB */ +#endif /* HAVE_AES_ECB && !PIC32MZ */ while (blocks--) { IncrementGcmCounter(ctr); - #ifndef WOLFSSL_PIC32MZ_CRYPT + #if !defined(WOLFSSL_PIC32MZ_CRYPT) wc_AesEncrypt(aes, ctr, scratch); xorbuf(scratch, c, AES_BLOCK_SIZE); XMEMCPY(p, scratch, AES_BLOCK_SIZE); @@ -8931,24 +6649,36 @@ return ret; } +/* Software AES - GCM Decrypt */ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { #ifdef WOLFSSL_AESNI - int res; + int res = AES_GCM_AUTH_E; #endif /* argument checks */ /* If the sz is non-zero, both in and out must be set. If sz is 0, * in and out are don't cares, as this is is the GMAC case. */ if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || - authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0) { + authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || + ivSz == 0) { return BAD_FUNC_ARG; } +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesGcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) /* if async and byte count above threshold */ /* only 12-byte IV is supported in HW */ @@ -8958,13 +6688,13 @@ #ifdef HAVE_CAVIUM_V if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ return NitroxAesGcmDecrypt(aes, out, in, sz, - (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, authTag, authTagSz, authIn, authInSz); } #endif #elif defined(HAVE_INTEL_QA) return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz, - (const byte*)aes->asyncKey, aes->keylen, iv, ivSz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, authTag, authTagSz, authIn, authInSz); #else /* WOLFSSL_ASYNC_CRYPT_TEST */ if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_DECRYPT)) { @@ -8985,7 +6715,17 @@ } #endif /* WOLFSSL_ASYNC_CRYPT */ - /* software AES GCM */ +#ifdef STM32_CRYPTO_AES_GCM + /* The STM standard peripheral library API's doesn't support partial blocks */ + #ifdef STD_PERI_LIB + if (partial == 0) + #endif + { + return wc_AesGcmDecrypt_STM32( + aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } +#endif /* STM32_CRYPTO_AES_GCM */ #ifdef WOLFSSL_AESNI #ifdef HAVE_INTEL_AVX2 @@ -9024,7 +6764,7 @@ } #endif #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ -#endif /* (WOLFSSL_XILINX_CRYPT) */ +#endif /* WOLFSSL_XILINX_CRYPT */ #endif /* end of block for AESGCM implementation selection */ @@ -9124,7 +6864,8 @@ (byte*)aes->reg, ivOutSz, authTag, authTagSz, authIn, authInSz); - IncCtr((byte*)aes->reg, ivOutSz); + if (ret == 0) + IncCtr((byte*)aes->reg, ivOutSz); } return ret; @@ -9135,21 +6876,24 @@ byte* authTag, word32 authTagSz, WC_RNG* rng) { Aes aes; - int ret = 0; + int ret; if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || authTag == NULL || authTagSz == 0 || rng == NULL) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) + return BAD_FUNC_ARG; + } + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { ret = wc_AesGcmSetKey(&aes, key, keySz); - if (ret == 0) - ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng); - if (ret == 0) - ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz, + if (ret == 0) + ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng); + if (ret == 0) + ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz, authTag, authTagSz, authIn, authInSz); + wc_AesFree(&aes); + } ForceZero(&aes, sizeof(aes)); return ret; @@ -9160,22 +6904,36 @@ const byte* authIn, word32 authInSz, const byte* authTag, word32 authTagSz) { + int ret; +#ifndef NO_AES_DECRYPT Aes aes; - int ret = 0; if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || authTag == NULL || authTagSz == 0 || authTagSz > AES_BLOCK_SIZE) { - ret = BAD_FUNC_ARG; - } - - if (ret == 0) + return BAD_FUNC_ARG; + } + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { ret = wc_AesGcmSetKey(&aes, key, keySz); - if (ret == 0) - ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz, + if (ret == 0) + ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz, authTag, authTagSz, authIn, authInSz); + wc_AesFree(&aes); + } ForceZero(&aes, sizeof(aes)); - +#else + (void)key; + (void)keySz; + (void)iv; + (void)ivSz; + (void)authIn; + (void)authInSz; + (void)authTag; + (void)authTagSz; + ret = NOT_COMPILED_IN; +#endif return ret; } @@ -9286,10 +7044,9 @@ } #endif /* HAVE_AES_DECRYPT */ - -/* software AES CCM */ #else +/* Software CCM */ static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out) { /* process the bulk of the data */ @@ -9362,14 +7119,60 @@ } } +#ifdef WOLFSSL_AESNI +static WC_INLINE void AesCcmCtrIncSet4(byte* B, word32 lenSz) +{ + word32 i; + + /* B+1 = B */ + XMEMCPY(B + AES_BLOCK_SIZE * 1, B, AES_BLOCK_SIZE); + /* B+2,B+3 = B,B+1 */ + XMEMCPY(B + AES_BLOCK_SIZE * 2, B, AES_BLOCK_SIZE * 2); + + for (i = 0; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 1 - 1 - i] != 0) break; + } + B[AES_BLOCK_SIZE * 2 - 1] += 2; + if (B[AES_BLOCK_SIZE * 2 - 1] < 2) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 2 - 1 - i] != 0) break; + } + } + B[AES_BLOCK_SIZE * 3 - 1] += 3; + if (B[AES_BLOCK_SIZE * 3 - 1] < 3) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 3 - 1 - i] != 0) break; + } + } +} + +static WC_INLINE void AesCcmCtrInc4(byte* B, word32 lenSz) +{ + word32 i; + + B[AES_BLOCK_SIZE - 1] += 4; + if (B[AES_BLOCK_SIZE - 1] < 4) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE - 1 - i] != 0) break; + } + } +} +#endif + +/* Software AES - CCM Encrypt */ /* return 0 on success */ int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, const byte* nonce, word32 nonceSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { +#ifndef WOLFSSL_AESNI byte A[AES_BLOCK_SIZE]; byte B[AES_BLOCK_SIZE]; +#else + ALIGN128 byte A[AES_BLOCK_SIZE * 4]; + ALIGN128 byte B[AES_BLOCK_SIZE * 4]; +#endif byte lenSz; word32 i; byte mask = 0xFF; @@ -9381,6 +7184,7 @@ authTagSz > AES_BLOCK_SIZE) return BAD_FUNC_ARG; + XMEMSET(A, 0, sizeof(A)); XMEMCPY(B+1, nonce, nonceSz); lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; B[0] = (authInSz > 0 ? 64 : 0) @@ -9407,6 +7211,26 @@ xorbuf(authTag, A, authTagSz); B[15] = 1; +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + while (inSz >= AES_BLOCK_SIZE * 4) { + AesCcmCtrIncSet4(B, lenSz); + + AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, + aes->rounds); + xorbuf(A, in, AES_BLOCK_SIZE * 4); + XMEMCPY(out, A, AES_BLOCK_SIZE * 4); + + inSz -= AES_BLOCK_SIZE * 4; + in += AES_BLOCK_SIZE * 4; + out += AES_BLOCK_SIZE * 4; + + if (inSz < AES_BLOCK_SIZE * 4) { + AesCcmCtrInc4(B, lenSz); + } + } + } +#endif while (inSz >= AES_BLOCK_SIZE) { wc_AesEncrypt(aes, B, A); xorbuf(A, in, AES_BLOCK_SIZE); @@ -9430,13 +7254,19 @@ } #ifdef HAVE_AES_DECRYPT +/* Software AES - CCM Decrypt */ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, const byte* nonce, word32 nonceSz, const byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { +#ifndef WOLFSSL_AESNI byte A[AES_BLOCK_SIZE]; byte B[AES_BLOCK_SIZE]; +#else + ALIGN128 byte B[AES_BLOCK_SIZE * 4]; + ALIGN128 byte A[AES_BLOCK_SIZE * 4]; +#endif byte* o; byte lenSz; word32 i, oSz; @@ -9460,6 +7290,26 @@ B[AES_BLOCK_SIZE - 1 - i] = 0; B[15] = 1; +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + while (oSz >= AES_BLOCK_SIZE * 4) { + AesCcmCtrIncSet4(B, lenSz); + + AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, + aes->rounds); + xorbuf(A, in, AES_BLOCK_SIZE * 4); + XMEMCPY(o, A, AES_BLOCK_SIZE * 4); + + oSz -= AES_BLOCK_SIZE * 4; + in += AES_BLOCK_SIZE * 4; + o += AES_BLOCK_SIZE * 4; + + if (oSz < AES_BLOCK_SIZE * 4) { + AesCcmCtrInc4(B, lenSz); + } + } + } +#endif while (oSz >= AES_BLOCK_SIZE) { wc_AesEncrypt(aes, B, A); xorbuf(A, in, AES_BLOCK_SIZE); @@ -9521,7 +7371,7 @@ } #endif /* HAVE_AES_DECRYPT */ -#endif /* software AES CCM */ +#endif /* software CCM */ /* abstract functions that call lower level AESCCM functions */ #ifndef WC_NO_RNG @@ -9579,8 +7429,10 @@ (byte*)aes->reg, aes->nonceSz, authTag, authTagSz, authIn, authInSz); - XMEMCPY(ivOut, aes->reg, aes->nonceSz); - IncCtr((byte*)aes->reg, aes->nonceSz); + if (ret == 0) { + XMEMCPY(ivOut, aes->reg, aes->nonceSz); + IncCtr((byte*)aes->reg, aes->nonceSz); + } } return ret; @@ -9601,15 +7453,57 @@ aes->heap = heap; +#ifdef WOLF_CRYPTO_CB + aes->devId = devId; + aes->devCtx = NULL; +#else + (void)devId; +#endif #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES, aes->heap, devId); -#else - (void)devId; #endif /* WOLFSSL_ASYNC_CRYPT */ +#ifdef WOLFSSL_AFALG + aes->alFd = -1; + aes->rdFd = -1; +#endif +#if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + aes->ctx.cfd = -1; +#endif +#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + XMEMSET(&aes->ctx, 0, sizeof(aes->ctx)); +#endif +#ifdef HAVE_AESGCM +#ifdef OPENSSL_EXTRA + XMEMSET(aes->aadH, 0, sizeof(aes->aadH)); + aes->aadLen = 0; +#endif +#endif + return ret; +} + +#ifdef HAVE_PKCS11 +int wc_AesInit_Id(Aes* aes, unsigned char* id, int len, void* heap, int devId) +{ + int ret = 0; + + if (aes == NULL) + ret = BAD_FUNC_ARG; + if (ret == 0 && (len < 0 || len > AES_MAX_ID_LEN)) + ret = BUFFER_E; + + if (ret == 0) + ret = wc_AesInit(aes, heap, devId); + if (ret == 0) { + XMEMCPY(aes->id, id, len); + aes->idLen = len; + } + return ret; } +#endif /* Free Aes from use with async hardware */ void wc_AesFree(Aes* aes) @@ -9620,6 +7514,23 @@ #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES); #endif /* WOLFSSL_ASYNC_CRYPT */ +#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX_AES) + if (aes->rdFd > 0) { /* negative is error case */ + close(aes->rdFd); + } + if (aes->alFd > 0) { + close(aes->alFd); + } +#endif /* WOLFSSL_AFALG */ +#if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + wc_DevCryptoFree(&aes->ctx); +#endif +#if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)) + ForceZero((byte*)aes->devKey, AES_MAX_KEY_SIZE/WOLFSSL_BIT_SIZE); +#endif } @@ -9630,23 +7541,26 @@ if (aes == NULL || keySize == NULL) { return BAD_FUNC_ARG; } - +#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + *keySize = aes->ctx.key.keySize; + return ret; +#endif switch (aes->rounds) { - #ifdef WOLFSSL_AES_128 +#ifdef WOLFSSL_AES_128 case 10: *keySize = 16; break; - #endif - #ifdef WOLFSSL_AES_192 +#endif +#ifdef WOLFSSL_AES_192 case 12: *keySize = 24; break; - #endif - #ifdef WOLFSSL_AES_256 +#endif +#ifdef WOLFSSL_AES_256 case 14: *keySize = 32; break; - #endif +#endif default: *keySize = 0; ret = BAD_FUNC_ARG; @@ -9660,9 +7574,36 @@ #ifdef HAVE_AES_ECB #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + +/* Software AES - ECB */ +int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + + return AES_ECB_encrypt(aes, in, out, sz); +} + + +int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + + return AES_ECB_decrypt(aes, in, out, sz); +} + #else -/* software implementation */ +/* Software AES - ECB */ int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { word32 blocks = sz / AES_BLOCK_SIZE; @@ -9698,44 +7639,66 @@ #endif #endif /* HAVE_AES_ECB */ -#ifdef WOLFSSL_AES_CFB -/* CFB 128 +#if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_OFB) +/* Feedback AES mode * * aes structure holding key to use for encryption * out buffer to hold result of encryption (must be at least as large as input * buffer) * in buffer to encrypt * sz size of input buffer + * mode flag to specify AES mode * * returns 0 on success and negative error values on failure */ -int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +/* Software AES - CFB Encrypt */ +static int wc_AesFeedbackEncrypt(Aes* aes, byte* out, const byte* in, + word32 sz, byte mode) { byte* tmp = NULL; +#ifdef WOLFSSL_AES_CFB byte* reg = NULL; - - WOLFSSL_ENTER("wc_AesCfbEncrypt"); +#endif if (aes == NULL || out == NULL || in == NULL) { return BAD_FUNC_ARG; } +#ifdef WOLFSSL_AES_CFB if (aes->left && sz) { reg = (byte*)aes->reg + AES_BLOCK_SIZE - aes->left; } +#endif /* consume any unused bytes left in aes->tmp */ tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; while (aes->left && sz) { - *(out++) = *(reg++) = *(in++) ^ *(tmp++); + *(out) = *(in++) ^ *(tmp++); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + *(reg++) = *out; + } + #endif + out++; aes->left--; sz--; } while (sz >= AES_BLOCK_SIZE) { - wc_AesEncryptDirect(aes, out, (byte*)aes->reg); - xorbuf(out, in, AES_BLOCK_SIZE); - XMEMCPY(aes->reg, out, AES_BLOCK_SIZE); + /* Using aes->tmp here for inline case i.e. in=out */ + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + XMEMCPY(out, aes->tmp, AES_BLOCK_SIZE); out += AES_BLOCK_SIZE; in += AES_BLOCK_SIZE; sz -= AES_BLOCK_SIZE; @@ -9747,10 +7710,23 @@ wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); aes->left = AES_BLOCK_SIZE; tmp = (byte*)aes->tmp; + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + #ifdef WOLFSSL_AES_CFB reg = (byte*)aes->reg; + #endif while (sz--) { - *(out++) = *(reg++) = *(in++) ^ *(tmp++); + *(out) = *(in++) ^ *(tmp++); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + *(reg++) = *out; + } + #endif + out++; aes->left--; } } @@ -9770,21 +7746,23 @@ * * returns 0 on success and negative error values on failure */ -int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +/* Software AES - CFB Decrypt */ +static int wc_AesFeedbackDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + byte mode) { byte* tmp; - WOLFSSL_ENTER("wc_AesCfbDecrypt"); - if (aes == NULL || out == NULL || in == NULL) { return BAD_FUNC_ARG; } + #ifdef WOLFSSL_AES_CFB /* check if more input needs copied over to aes->reg */ - if (aes->left && sz) { + if (aes->left && sz && mode == AES_CFB_MODE) { int size = min(aes->left, sz); XMEMCPY((byte*)aes->reg + AES_BLOCK_SIZE - aes->left, in, size); } + #endif /* consume any unused bytes left in aes->tmp */ tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; @@ -9795,9 +7773,20 @@ } while (sz > AES_BLOCK_SIZE) { - wc_AesEncryptDirect(aes, out, (byte*)aes->reg); - xorbuf(out, in, AES_BLOCK_SIZE); - XMEMCPY(aes->reg, in, AES_BLOCK_SIZE); + /* Using aes->tmp here for inline case i.e. in=out */ + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY((byte*)aes->reg, (byte*)aes->tmp, AES_BLOCK_SIZE); + } + #endif + xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, in, AES_BLOCK_SIZE); + } + #endif + XMEMCPY(out, (byte*)aes->tmp, AES_BLOCK_SIZE); out += AES_BLOCK_SIZE; in += AES_BLOCK_SIZE; sz -= AES_BLOCK_SIZE; @@ -9807,7 +7796,17 @@ /* decrypt left over data */ if (sz) { wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); - XMEMCPY(aes->reg, in, sz); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, in, sz); + } + #endif + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + aes->left = AES_BLOCK_SIZE; tmp = (byte*)aes->tmp; @@ -9822,6 +7821,282 @@ #endif /* HAVE_AES_DECRYPT */ #endif /* WOLFSSL_AES_CFB */ +#ifdef WOLFSSL_AES_CFB +/* CFB 128 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_CFB_MODE); +} + + +#ifdef HAVE_AES_DECRYPT +/* CFB 128 + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Decrypt */ +int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_CFB_MODE); +} +#endif /* HAVE_AES_DECRYPT */ + + +/* shift the whole AES_BLOCK_SIZE array left by 8 or 1 bits */ +static void shiftLeftArray(byte* ary, byte shift) +{ + int i; + + if (shift == WOLFSSL_BIT_SIZE) { + /* shifting over by 8 bits */ + for (i = 0; i < AES_BLOCK_SIZE - 1; i++) { + ary[i] = ary[i+1]; + } + ary[i] = 0; + } + else { + byte carry = 0; + + /* shifting over by 7 or less bits */ + for (i = 0; i < AES_BLOCK_SIZE - 1; i++) { + carry = ary[i+1] & (0XFF << (WOLFSSL_BIT_SIZE - shift)); + carry >>= (WOLFSSL_BIT_SIZE - shift); + ary[i] = (ary[i] << shift) + carry; + } + ary[i] = ary[i] << shift; + } +} + + +/* returns 0 on success and negative values on failure */ +static int wc_AesFeedbackCFB8(Aes* aes, byte* out, const byte* in, + word32 sz, byte dir) +{ + byte *pt; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (sz == 0) { + return 0; + } + + while (sz > 0) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + if (dir == AES_DECRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray(pt, WOLFSSL_BIT_SIZE); + pt[AES_BLOCK_SIZE - 1] = in[0]; + } + + /* MSB + XOR */ + out[0] = aes->tmp[0] ^ in[0]; + if (dir == AES_ENCRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray(pt, WOLFSSL_BIT_SIZE); + pt[AES_BLOCK_SIZE - 1] = out[0]; + } + + out += 1; + in += 1; + sz -= 1; + } + + return 0; +} + + +/* returns 0 on success and negative values on failure */ +static int wc_AesFeedbackCFB1(Aes* aes, byte* out, const byte* in, + word32 sz, byte dir) +{ + byte tmp; + byte cur = 0; /* hold current work in order to handle inline in=out */ + byte* pt; + int bit = 7; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (sz == 0) { + return 0; + } + + while (sz > 0) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + if (dir == AES_DECRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + tmp = (0X01 << bit) & in[0]; + tmp = tmp >> bit; + tmp &= 0x01; + shiftLeftArray((byte*)aes->reg, 1); + pt[AES_BLOCK_SIZE - 1] |= tmp; + } + + /* MSB + XOR */ + tmp = (0X01 << bit) & in[0]; + pt = (byte*)aes->tmp; + tmp = (pt[0] >> 7) ^ (tmp >> bit); + tmp &= 0x01; + cur |= (tmp << bit); + + + if (dir == AES_ENCRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray((byte*)aes->reg, 1); + pt[AES_BLOCK_SIZE - 1] |= tmp; + } + + bit--; + if (bit < 0) { + out[0] = cur; + out += 1; + in += 1; + sz -= 1; + bit = 7; + cur = 0; + } + else { + sz -= 1; + } + } + + if (bit > 0 && bit < 7) { + out[0] = cur; + } + + return 0; +} + + +/* CFB 1 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt (packed to left, i.e. 101 is 0x90) + * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8) + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb1Encrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB1(aes, out, in, sz, AES_ENCRYPTION); +} + + +/* CFB 8 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb8Encrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB8(aes, out, in, sz, AES_ENCRYPTION); +} +#ifdef HAVE_AES_DECRYPT + +/* CFB 1 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8) + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb1Decrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB1(aes, out, in, sz, AES_DECRYPTION); +} + + +/* CFB 8 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb8Decrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB8(aes, out, in, sz, AES_DECRYPTION); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_CFB */ + +#ifdef WOLFSSL_AES_OFB +/* OFB + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +int wc_AesOfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_OFB_MODE); +} + + +#ifdef HAVE_AES_DECRYPT +/* OFB + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - OFB Decrypt */ +int wc_AesOfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_OFB_MODE); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_OFB */ + #ifdef HAVE_AES_KEYWRAP @@ -10148,7 +8423,7 @@ word32 j; byte carry = 0; - /* multiply by shift left and propogate carry */ + /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE && outSz > 0; j++, outSz--) { byte tmpC; @@ -10186,6 +8461,7 @@ * * returns 0 on success */ +/* Software AES - XTS Encrypt */ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, const byte* i, word32 iSz) { @@ -10238,7 +8514,7 @@ #endif xorbuf(out, tmp, AES_BLOCK_SIZE); - /* multiply by shift left and propogate carry */ + /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; @@ -10293,6 +8569,7 @@ * * returns 0 on success */ +/* Software AES - XTS Decrypt */ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, const byte* i, word32 iSz) { @@ -10352,7 +8629,7 @@ #endif xorbuf(out, tmp, AES_BLOCK_SIZE); - /* multiply by shift left and propogate carry */ + /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; @@ -10376,7 +8653,7 @@ byte buf[AES_BLOCK_SIZE]; byte tmp2[AES_BLOCK_SIZE]; - /* multiply by shift left and propogate carry */ + /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC;