Xuyi Wang / wolfcrypt

Dependents:   OS

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers aes.c Source File

aes.c

00001 /* aes.c
00002  *
00003  * Copyright (C) 2006-2017 wolfSSL Inc.
00004  *
00005  * This file is part of wolfSSL.
00006  *
00007  * wolfSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * wolfSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
00020  */
00021 
00022 
00023 #ifdef HAVE_CONFIG_H
00024     #include <config.h>
00025 #endif
00026 
00027 #include <wolfcrypt/settings.h>
00028 #include <wolfcrypt/error-crypt.h>
00029 
00030 #if !defined(NO_AES)
00031 
00032 #if defined(HAVE_FIPS) && \
00033     defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
00034 
00035     /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
00036     #define FIPS_NO_WRAPPERS
00037 
00038     #ifdef USE_WINDOWS_API
00039         #pragma code_seg(".fipsA$g")
00040         #pragma const_seg(".fipsB$g")
00041     #endif
00042 #endif
00043 
00044 #include <wolfcrypt/aes.h>
00045 #include <wolfcrypt/cpuid.h>
00046 
00047 
00048 /* fips wrapper calls, user can call direct */
00049 #if defined(HAVE_FIPS) && \
00050     (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2))
00051 
00052     int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv,
00053                               int dir)
00054     {
00055         if (aes == NULL ||  !( (len == 16) || (len == 24) || (len == 32)) ) {
00056             return BAD_FUNC_ARG;
00057         }
00058 
00059         return AesSetKey_fips(aes, key, len, iv, dir);
00060     }
00061     int wc_AesSetIV(Aes* aes, const byte* iv)
00062     {
00063         if (aes == NULL) {
00064             return BAD_FUNC_ARG;
00065         }
00066 
00067         return AesSetIV_fips(aes, iv);
00068     }
00069     #ifdef HAVE_AES_CBC
00070         int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
00071         {
00072             if (aes == NULL || out == NULL || in == NULL) {
00073                 return BAD_FUNC_ARG;
00074             }
00075 
00076             return AesCbcEncrypt_fips(aes, out, in, sz);
00077         }
00078         #ifdef HAVE_AES_DECRYPT
00079             int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
00080             {
00081                 if (aes == NULL || out == NULL || in == NULL
00082                                             || sz % AES_BLOCK_SIZE != 0) {
00083                     return BAD_FUNC_ARG;
00084                 }
00085 
00086                 return AesCbcDecrypt_fips(aes, out, in, sz);
00087             }
00088         #endif /* HAVE_AES_DECRYPT */
00089     #endif /* HAVE_AES_CBC */
00090 
00091     /* AES-CTR */
00092     #ifdef WOLFSSL_AES_COUNTER
00093         int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
00094         {
00095             if (aes == NULL || out == NULL || in == NULL) {
00096                 return BAD_FUNC_ARG;
00097             }
00098 
00099             return AesCtrEncrypt(aes, out, in, sz);
00100         }
00101     #endif
00102 
00103     /* AES-DIRECT */
00104     #if defined(WOLFSSL_AES_DIRECT)
00105         void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
00106         {
00107             AesEncryptDirect(aes, out, in);
00108         }
00109 
00110         #ifdef HAVE_AES_DECRYPT
00111             void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
00112             {
00113                 AesDecryptDirect(aes, out, in);
00114             }
00115         #endif /* HAVE_AES_DECRYPT */
00116 
00117         int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
00118                                         const byte* iv, int dir)
00119         {
00120             return AesSetKeyDirect(aes, key, len, iv, dir);
00121         }
00122     #endif /* WOLFSSL_AES_DIRECT */
00123 
00124     /* AES-GCM */
00125     #ifdef HAVE_AESGCM
00126         int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
00127         {
00128             if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) {
00129                 return BAD_FUNC_ARG;
00130             }
00131 
00132             return AesGcmSetKey_fips(aes, key, len);
00133         }
00134         int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
00135                                       const byte* iv, word32 ivSz,
00136                                       byte* authTag, word32 authTagSz,
00137                                       const byte* authIn, word32 authInSz)
00138         {
00139             if (aes == NULL || authTagSz > AES_BLOCK_SIZE
00140                                     || authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ ||
00141                                     ivSz > AES_BLOCK_SIZE) {
00142                 return BAD_FUNC_ARG;
00143             }
00144 
00145             return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
00146                 authTagSz, authIn, authInSz);
00147         }
00148 
00149         #ifdef HAVE_AES_DECRYPT
00150             int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
00151                                           const byte* iv, word32 ivSz,
00152                                           const byte* authTag, word32 authTagSz,
00153                                           const byte* authIn, word32 authInSz)
00154             {
00155                 if (aes == NULL || out == NULL || in == NULL || iv == NULL
00156                         || authTag == NULL || authTagSz > AES_BLOCK_SIZE ||
00157                         ivSz > AES_BLOCK_SIZE) {
00158                     return BAD_FUNC_ARG;
00159                 }
00160 
00161                 return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
00162                     authTagSz, authIn, authInSz);
00163             }
00164         #endif /* HAVE_AES_DECRYPT */
00165 
00166         int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
00167         {
00168             if (gmac == NULL || key == NULL || !((len == 16) ||
00169                                 (len == 24) || (len == 32)) ) {
00170                 return BAD_FUNC_ARG;
00171             }
00172 
00173             return GmacSetKey(gmac, key, len);
00174         }
00175         int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
00176                                       const byte* authIn, word32 authInSz,
00177                                       byte* authTag, word32 authTagSz)
00178         {
00179             if (gmac == NULL || authTagSz > AES_BLOCK_SIZE ||
00180                                authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) {
00181                 return BAD_FUNC_ARG;
00182             }
00183 
00184             return GmacUpdate(gmac, iv, ivSz, authIn, authInSz,
00185                               authTag, authTagSz);
00186         }
00187     #endif /* HAVE_AESGCM */
00188 
00189     /* AES-CCM */
00190     #if defined(HAVE_AESCCM) && \
00191         defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2)
00192         int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
00193         {
00194             return AesCcmSetKey(aes, key, keySz);
00195         }
00196         int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
00197                                       const byte* nonce, word32 nonceSz,
00198                                       byte* authTag, word32 authTagSz,
00199                                       const byte* authIn, word32 authInSz)
00200         {
00201             /* sanity check on arguments */
00202             if (aes == NULL || out == NULL || in == NULL || nonce == NULL
00203                     || authTag == NULL || nonceSz < 7 || nonceSz > 13)
00204                 return BAD_FUNC_ARG;
00205 
00206             AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag,
00207                 authTagSz, authIn, authInSz);
00208             return 0;
00209         }
00210 
00211         #ifdef HAVE_AES_DECRYPT
00212             int  wc_AesCcmDecrypt(Aes* aes, byte* out,
00213                 const byte* in, word32 inSz,
00214                 const byte* nonce, word32 nonceSz,
00215                 const byte* authTag, word32 authTagSz,
00216                 const byte* authIn, word32 authInSz)
00217             {
00218 
00219                 if (aes == NULL || out == NULL || in == NULL || nonce == NULL
00220                     || authTag == NULL || nonceSz < 7 || nonceSz > 13) {
00221                         return BAD_FUNC_ARG;
00222                 }
00223 
00224                 return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz,
00225                     authTag, authTagSz, authIn, authInSz);
00226             }
00227         #endif /* HAVE_AES_DECRYPT */
00228     #endif /* HAVE_AESCCM && HAVE_FIPS_VERSION 2 */
00229 
00230     int  wc_AesInit(Aes* aes, void* h, int i)
00231     {
00232         (void)aes;
00233         (void)h;
00234         (void)i;
00235         /* FIPS doesn't support:
00236             return AesInit(aes, h, i); */
00237         return 0;
00238     }
00239     void wc_AesFree(Aes* aes)
00240     {
00241         (void)aes;
00242         /* FIPS doesn't support:
00243             AesFree(aes); */
00244     }
00245 
00246 #else /* else build without fips, or for FIPS v2 */
00247 
00248 
00249 #if defined(WOLFSSL_TI_CRYPT)
00250     #include <wolfcrypt/src/port/ti/ti-aes.c>
00251 #else
00252 
00253 #include <wolfcrypt/logging.h>
00254 
00255 #ifdef NO_INLINE
00256     #include <wolfcrypt/misc.h>
00257 #else
00258     #define WOLFSSL_MISC_INCLUDED
00259     #include <wolfcrypt/src/misc.c>
00260 #endif
00261 
00262 #if !defined(WOLFSSL_ARMASM)
00263 
00264 #ifdef WOLFSSL_IMX6_CAAM_BLOB
00265     /* case of possibly not using hardware acceleration for AES but using key
00266        blobs */
00267     #include <wolfcrypt/port/caam/wolfcaam.h>
00268 #endif
00269 
00270 #ifdef DEBUG_AESNI
00271     #include <stdio.h>
00272 #endif
00273 
00274 #ifdef _MSC_VER
00275     /* 4127 warning constant while(1)  */
00276     #pragma warning(disable: 4127)
00277 #endif
00278 
00279 
00280 /* Define AES implementation includes and functions */
00281 #if defined(STM32_CRYPTO)
00282      /* STM32F2/F4 hardware AES support for CBC, CTR modes */
00283 
00284     #ifdef WOLFSSL_STM32L4
00285         #define CRYP AES
00286     #endif
00287 
00288     /* CRYPT_AES_GCM starts the IV with 2 */
00289     #define STM32_GCM_IV_START 2
00290 
00291 #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
00292     static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00293     {
00294         int ret = 0;
00295     #ifdef WOLFSSL_STM32_CUBEMX
00296         CRYP_HandleTypeDef hcryp;
00297 
00298         XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
00299         switch(aes->rounds) {
00300             case 10: /* 128-bit key */
00301                 hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
00302                 break;
00303     #ifdef CRYP_KEYSIZE_192B
00304             case 12: /* 192-bit key */
00305                 hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
00306                 break;
00307     #endif
00308             case 14: /* 256-bit key */
00309                 hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
00310                 break;
00311             default:
00312                 break;
00313         }
00314         hcryp.Instance = CRYP;
00315         hcryp.Init.DataType = CRYP_DATATYPE_8B;
00316         hcryp.Init.pKey = (uint8_t*)aes->key;
00317 
00318         HAL_CRYP_Init(&hcryp);
00319 
00320         if (HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
00321                                     outBlock, STM32_HAL_TIMEOUT) != HAL_OK) {
00322             ret = WC_TIMEOUT_E;
00323         }
00324 
00325         HAL_CRYP_DeInit(&hcryp);
00326     #else
00327         word32 *enc_key;
00328         CRYP_InitTypeDef AES_CRYP_InitStructure;
00329         CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
00330 
00331         enc_key = aes->key;
00332 
00333         /* crypto structure initialization */
00334         CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
00335         CRYP_StructInit(&AES_CRYP_InitStructure);
00336 
00337         /* reset registers to their default values */
00338         CRYP_DeInit();
00339 
00340         /* load key into correct registers */
00341         switch (aes->rounds) {
00342             case 10: /* 128-bit key */
00343                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
00344                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
00345                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
00346                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
00347                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
00348                 break;
00349 
00350             case 12: /* 192-bit key */
00351                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
00352                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
00353                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
00354                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
00355                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
00356                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
00357                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
00358                 break;
00359 
00360             case 14: /* 256-bit key */
00361                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
00362                 AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
00363                 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
00364                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
00365                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
00366                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
00367                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
00368                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
00369                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
00370                 break;
00371 
00372             default:
00373                 break;
00374         }
00375         CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
00376 
00377         /* set direction, mode, and datatype */
00378         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
00379         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
00380         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
00381         CRYP_Init(&AES_CRYP_InitStructure);
00382 
00383         /* enable crypto processor */
00384         CRYP_Cmd(ENABLE);
00385 
00386         /* flush IN/OUT FIFOs */
00387         CRYP_FIFOFlush();
00388 
00389         CRYP_DataIn(*(uint32_t*)&inBlock[0]);
00390         CRYP_DataIn(*(uint32_t*)&inBlock[4]);
00391         CRYP_DataIn(*(uint32_t*)&inBlock[8]);
00392         CRYP_DataIn(*(uint32_t*)&inBlock[12]);
00393 
00394         /* wait until the complete message has been processed */
00395         while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
00396 
00397         *(uint32_t*)&outBlock[0]  = CRYP_DataOut();
00398         *(uint32_t*)&outBlock[4]  = CRYP_DataOut();
00399         *(uint32_t*)&outBlock[8]  = CRYP_DataOut();
00400         *(uint32_t*)&outBlock[12] = CRYP_DataOut();
00401 
00402         /* disable crypto processor */
00403         CRYP_Cmd(DISABLE);
00404     #endif /* WOLFSSL_STM32_CUBEMX */
00405         return ret;
00406     }
00407 #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */
00408 
00409 #ifdef HAVE_AES_DECRYPT
00410     #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM)
00411     static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00412     {
00413         int ret = 0;
00414     #ifdef WOLFSSL_STM32_CUBEMX
00415         CRYP_HandleTypeDef hcryp;
00416 
00417         XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
00418         switch(aes->rounds) {
00419             case 10: /* 128-bit key */
00420                 hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
00421                 break;
00422     #ifdef CRYP_KEYSIZE_192B
00423             case 12: /* 192-bit key */
00424                 hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
00425                 break;
00426     #endif
00427             case 14: /* 256-bit key */
00428                 hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
00429                 break;
00430             default:
00431                 break;
00432         }
00433         hcryp.Instance = CRYP;
00434         hcryp.Init.DataType = CRYP_DATATYPE_8B;
00435         hcryp.Init.pKey = (uint8_t*)aes->key;
00436 
00437         HAL_CRYP_Init(&hcryp);
00438 
00439         if (HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE,
00440                                        outBlock, STM32_HAL_TIMEOUT) != HAL_OK) {
00441             ret = WC_TIMEOUT_E;
00442         }
00443 
00444         HAL_CRYP_DeInit(&hcryp);
00445     #else
00446         word32 *enc_key;
00447         CRYP_InitTypeDef AES_CRYP_InitStructure;
00448         CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
00449 
00450         enc_key = aes->key;
00451 
00452         /* crypto structure initialization */
00453         CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
00454         CRYP_StructInit(&AES_CRYP_InitStructure);
00455 
00456         /* reset registers to their default values */
00457         CRYP_DeInit();
00458 
00459         /* load key into correct registers */
00460         switch (aes->rounds) {
00461             case 10: /* 128-bit key */
00462                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
00463                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
00464                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
00465                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
00466                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
00467                 break;
00468 
00469             case 12: /* 192-bit key */
00470                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
00471                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
00472                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
00473                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
00474                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
00475                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
00476                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
00477                 break;
00478 
00479             case 14: /* 256-bit key */
00480                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
00481                 AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
00482                 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
00483                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
00484                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
00485                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
00486                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
00487                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
00488                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
00489                 break;
00490 
00491             default:
00492                 break;
00493         }
00494         CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
00495 
00496         /* set direction, key, and datatype */
00497         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
00498         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
00499         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
00500         CRYP_Init(&AES_CRYP_InitStructure);
00501 
00502         /* enable crypto processor */
00503         CRYP_Cmd(ENABLE);
00504 
00505         /* wait until decrypt key has been intialized */
00506         while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
00507 
00508         /* set direction, mode, and datatype */
00509         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
00510         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB;
00511         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
00512         CRYP_Init(&AES_CRYP_InitStructure);
00513 
00514         /* enable crypto processor */
00515         CRYP_Cmd(ENABLE);
00516 
00517         /* flush IN/OUT FIFOs */
00518         CRYP_FIFOFlush();
00519 
00520         CRYP_DataIn(*(uint32_t*)&inBlock[0]);
00521         CRYP_DataIn(*(uint32_t*)&inBlock[4]);
00522         CRYP_DataIn(*(uint32_t*)&inBlock[8]);
00523         CRYP_DataIn(*(uint32_t*)&inBlock[12]);
00524 
00525         /* wait until the complete message has been processed */
00526         while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
00527 
00528         *(uint32_t*)&outBlock[0]  = CRYP_DataOut();
00529         *(uint32_t*)&outBlock[4]  = CRYP_DataOut();
00530         *(uint32_t*)&outBlock[8]  = CRYP_DataOut();
00531         *(uint32_t*)&outBlock[12] = CRYP_DataOut();
00532 
00533         /* disable crypto processor */
00534         CRYP_Cmd(DISABLE);
00535     #endif /* WOLFSSL_STM32_CUBEMX */
00536         return ret;
00537     }
00538     #endif /* WOLFSSL_AES_DIRECT || HAVE_AESCCM */
00539 #endif /* HAVE_AES_DECRYPT */
00540 
00541 #elif defined(HAVE_COLDFIRE_SEC)
00542     /* Freescale Coldfire SEC support for CBC mode.
00543      * NOTE: no support for AES-CTR/GCM/CCM/Direct */
00544     #include <wolfssl/wolfcrypt/types.h>
00545     #include "sec.h"
00546     #include "mcf5475_sec.h"
00547     #include "mcf5475_siu.h"
00548 #elif defined(FREESCALE_LTC)
00549     #include "fsl_ltc.h"
00550     #if defined(FREESCALE_LTC_AES_GCM)
00551         #undef NEED_AES_TABLES
00552         #undef GCM_TABLE
00553     #else
00554         /* if LTC doesn't have GCM, use software with LTC AES ECB mode */
00555         static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00556         {
00557             wc_AesEncryptDirect(aes, outBlock, inBlock);
00558             return 0;
00559         }
00560         static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00561         {
00562             wc_AesDecryptDirect(aes, outBlock, inBlock);
00563             return 0;
00564         }
00565     #endif
00566 #elif defined(FREESCALE_MMCAU)
00567     /* Freescale mmCAU hardware AES support for Direct, CBC, CCM, GCM modes
00568      * through the CAU/mmCAU library. Documentation located in
00569      * ColdFire/ColdFire+ CAU and Kinetis mmCAU Software Library User
00570      * Guide (See note in README). */
00571     #ifdef FREESCALE_MMCAU_CLASSIC
00572         /* MMCAU 1.4 library used with non-KSDK / classic MQX builds */
00573         #include "cau_api.h"
00574     #else
00575         #include "fsl_mmcau.h"
00576     #endif
00577 
00578     static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00579     {
00580         int ret;
00581 
00582     #ifdef FREESCALE_MMCAU_CLASSIC
00583         if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) {
00584             WOLFSSL_MSG("Bad cau_aes_encrypt alignment");
00585             return BAD_ALIGN_E;
00586         }
00587     #endif
00588 
00589         ret = wolfSSL_CryptHwMutexLock();
00590         if(ret == 0) {
00591         #ifdef FREESCALE_MMCAU_CLASSIC
00592             cau_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
00593         #else
00594             MMCAU_AES_EncryptEcb(inBlock, (byte*)aes->key, aes->rounds,
00595                                  outBlock);
00596         #endif
00597             wolfSSL_CryptHwMutexUnLock();
00598         }
00599         return ret;
00600     }
00601     #ifdef HAVE_AES_DECRYPT
00602     static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00603     {
00604         int ret;
00605 
00606     #ifdef FREESCALE_MMCAU_CLASSIC
00607         if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) {
00608             WOLFSSL_MSG("Bad cau_aes_decrypt alignment");
00609             return BAD_ALIGN_E;
00610         }
00611     #endif
00612 
00613         ret = wolfSSL_CryptHwMutexLock();
00614         if(ret == 0) {
00615         #ifdef FREESCALE_MMCAU_CLASSIC
00616             cau_aes_decrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
00617         #else
00618             MMCAU_AES_DecryptEcb(inBlock, (byte*)aes->key, aes->rounds,
00619                                  outBlock);
00620         #endif
00621             wolfSSL_CryptHwMutexUnLock();
00622         }
00623         return ret;
00624     }
00625     #endif /* HAVE_AES_DECRYPT */
00626 
00627 #elif defined(WOLFSSL_PIC32MZ_CRYPT)
00628 
00629     #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h>
00630 
00631     #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT)
00632     static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00633     {
00634         return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0,
00635             outBlock, inBlock, AES_BLOCK_SIZE,
00636             PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB);
00637     }
00638     #endif
00639 
00640     #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT)
00641     static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00642     {
00643         return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0,
00644             outBlock, inBlock, AES_BLOCK_SIZE,
00645             PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB);
00646     }
00647     #endif
00648 
00649 #elif defined(WOLFSSL_NRF51_AES)
00650     /* Use built-in AES hardware - AES 128 ECB Encrypt Only */
00651     #include "wolfssl/wolfcrypt/port/nrf51.h"
00652 
00653     static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00654     {
00655         return nrf51_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
00656     }
00657 
00658     #ifdef HAVE_AES_DECRYPT
00659         #error nRF51 AES Hardware does not support decrypt
00660     #endif /* HAVE_AES_DECRYPT */
00661 
00662 
00663 #elif defined(WOLFSSL_AESNI)
00664 
00665     #define NEED_AES_TABLES
00666 
00667     /* Each platform needs to query info type 1 from cpuid to see if aesni is
00668      * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
00669      */
00670 
00671     #ifndef AESNI_ALIGN
00672         #define AESNI_ALIGN 16
00673     #endif
00674 
00675     #ifndef _MSC_VER
00676         #define XASM_LINK(f) asm(f)
00677     #else
00678         #define XASM_LINK(f)
00679     #endif /* _MSC_VER */
00680 
00681     static int checkAESNI = 0;
00682     static int haveAESNI  = 0;
00683     static word32 intel_flags = 0;
00684 
00685     static int Check_CPU_support_AES(void)
00686     {
00687         intel_flags = cpuid_get_flags();
00688 
00689         return IS_INTEL_AESNI(intel_flags) != 0;
00690     }
00691 
00692 
00693     /* tell C compiler these are asm functions in case any mix up of ABI underscore
00694        prefix between clang/gcc/llvm etc */
00695     #ifdef HAVE_AES_CBC
00696         void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
00697                              unsigned char* ivec, unsigned long length,
00698                              const unsigned char* KS, int nr)
00699                              XASM_LINK("AES_CBC_encrypt");
00700 
00701         #ifdef HAVE_AES_DECRYPT
00702             #if defined(WOLFSSL_AESNI_BY4)
00703                 void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out,
00704                                          unsigned char* ivec, unsigned long length,
00705                                          const unsigned char* KS, int nr)
00706                                          XASM_LINK("AES_CBC_decrypt_by4");
00707             #elif defined(WOLFSSL_AESNI_BY6)
00708                 void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out,
00709                                          unsigned char* ivec, unsigned long length,
00710                                          const unsigned char* KS, int nr)
00711                                          XASM_LINK("AES_CBC_decrypt_by6");
00712             #else /* WOLFSSL_AESNI_BYx */
00713                 void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out,
00714                                          unsigned char* ivec, unsigned long length,
00715                                          const unsigned char* KS, int nr)
00716                                          XASM_LINK("AES_CBC_decrypt_by8");
00717             #endif /* WOLFSSL_AESNI_BYx */
00718         #endif /* HAVE_AES_DECRYPT */
00719     #endif /* HAVE_AES_CBC */
00720 
00721     void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
00722                          unsigned long length, const unsigned char* KS, int nr)
00723                          XASM_LINK("AES_ECB_encrypt");
00724 
00725     #ifdef HAVE_AES_DECRYPT
00726         void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
00727                              unsigned long length, const unsigned char* KS, int nr)
00728                              XASM_LINK("AES_ECB_decrypt");
00729     #endif
00730 
00731     void AES_128_Key_Expansion(const unsigned char* userkey,
00732                                unsigned char* key_schedule)
00733                                XASM_LINK("AES_128_Key_Expansion");
00734 
00735     void AES_192_Key_Expansion(const unsigned char* userkey,
00736                                unsigned char* key_schedule)
00737                                XASM_LINK("AES_192_Key_Expansion");
00738 
00739     void AES_256_Key_Expansion(const unsigned char* userkey,
00740                                unsigned char* key_schedule)
00741                                XASM_LINK("AES_256_Key_Expansion");
00742 
00743 
00744     static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
00745                                    Aes* aes)
00746     {
00747         int ret;
00748 
00749         if (!userKey || !aes)
00750             return BAD_FUNC_ARG;
00751 
00752         switch (bits) {
00753             case 128:
00754                AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
00755                return 0;
00756             case 192:
00757                AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
00758                return 0;
00759             case 256:
00760                AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
00761                return 0;
00762             default:
00763                 ret = BAD_FUNC_ARG;
00764         }
00765 
00766         return ret;
00767     }
00768 
00769     #ifdef HAVE_AES_DECRYPT
00770         static int AES_set_decrypt_key(const unsigned char* userKey,
00771                                                     const int bits, Aes* aes)
00772         {
00773             int nr;
00774             Aes temp_key;
00775             __m128i *Key_Schedule = (__m128i*)aes->key;
00776             __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
00777 
00778             if (!userKey || !aes)
00779                 return BAD_FUNC_ARG;
00780 
00781             if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
00782                 return BAD_FUNC_ARG;
00783 
00784             nr = temp_key.rounds;
00785             aes->rounds = nr;
00786 
00787             Key_Schedule[nr] = Temp_Key_Schedule[0];
00788             Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
00789             Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
00790             Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
00791             Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
00792             Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
00793             Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
00794             Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
00795             Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
00796             Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
00797 
00798             if (nr>10) {
00799                 Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
00800                 Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
00801             }
00802 
00803             if (nr>12) {
00804                 Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
00805                 Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
00806             }
00807 
00808             Key_Schedule[0] = Temp_Key_Schedule[nr];
00809 
00810             return 0;
00811         }
00812     #endif /* HAVE_AES_DECRYPT */
00813 
00814 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
00815         static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
00816         {
00817             wc_AesEncryptDirect(aes, outBlock, inBlock);
00818             return 0;
00819         }
00820 #else
00821 
00822     /* using wolfCrypt software AES implementation */
00823     #define NEED_AES_TABLES
00824 #endif
00825 
00826 
00827 
00828 #ifdef NEED_AES_TABLES
00829 
00830 static const word32 rcon[] = {
00831     0x01000000, 0x02000000, 0x04000000, 0x08000000,
00832     0x10000000, 0x20000000, 0x40000000, 0x80000000,
00833     0x1B000000, 0x36000000,
00834     /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
00835 };
00836 
00837 static const word32 Te[4][256] = {
00838 {
00839     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
00840     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
00841     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
00842     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
00843     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
00844     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
00845     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
00846     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
00847     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
00848     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
00849     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
00850     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
00851     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
00852     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
00853     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
00854     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
00855     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
00856     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
00857     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
00858     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
00859     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
00860     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
00861     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
00862     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
00863     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
00864     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
00865     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
00866     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
00867     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
00868     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
00869     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
00870     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
00871     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
00872     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
00873     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
00874     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
00875     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
00876     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
00877     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
00878     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
00879     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
00880     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
00881     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
00882     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
00883     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
00884     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
00885     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
00886     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
00887     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
00888     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
00889     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
00890     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
00891     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
00892     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
00893     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
00894     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
00895     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
00896     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
00897     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
00898     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
00899     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
00900     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
00901     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
00902     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
00903 },
00904 {
00905     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
00906     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
00907     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
00908     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
00909     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
00910     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
00911     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
00912     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
00913     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
00914     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
00915     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
00916     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
00917     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
00918     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
00919     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
00920     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
00921     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
00922     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
00923     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
00924     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
00925     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
00926     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
00927     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
00928     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
00929     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
00930     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
00931     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
00932     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
00933     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
00934     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
00935     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
00936     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
00937     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
00938     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
00939     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
00940     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
00941     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
00942     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
00943     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
00944     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
00945     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
00946     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
00947     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
00948     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
00949     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
00950     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
00951     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
00952     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
00953     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
00954     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
00955     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
00956     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
00957     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
00958     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
00959     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
00960     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
00961     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
00962     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
00963     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
00964     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
00965     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
00966     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
00967     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
00968     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
00969 },
00970 {
00971     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
00972     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
00973     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
00974     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
00975     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
00976     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
00977     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
00978     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
00979     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
00980     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
00981     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
00982     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
00983     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
00984     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
00985     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
00986     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
00987     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
00988     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
00989     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
00990     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
00991     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
00992     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
00993     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
00994     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
00995     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
00996     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
00997     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
00998     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
00999     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
01000     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
01001     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
01002     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
01003     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
01004     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
01005     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
01006     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
01007     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
01008     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
01009     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
01010     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
01011     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
01012     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
01013     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
01014     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
01015     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
01016     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
01017     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
01018     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
01019     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
01020     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
01021     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
01022     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
01023     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
01024     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
01025     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
01026     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
01027     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
01028     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
01029     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
01030     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
01031     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
01032     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
01033     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
01034     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
01035 },
01036 {
01037     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
01038     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
01039     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
01040     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
01041     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
01042     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
01043     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
01044     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
01045     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
01046     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
01047     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
01048     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
01049     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
01050     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
01051     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
01052     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
01053     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
01054     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
01055     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
01056     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
01057     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
01058     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
01059     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
01060     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
01061     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
01062     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
01063     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
01064     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
01065     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
01066     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
01067     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
01068     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
01069     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
01070     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
01071     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
01072     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
01073     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
01074     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
01075     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
01076     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
01077     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
01078     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
01079     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
01080     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
01081     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
01082     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
01083     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
01084     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
01085     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
01086     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
01087     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
01088     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
01089     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
01090     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
01091     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
01092     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
01093     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
01094     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
01095     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
01096     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
01097     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
01098     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
01099     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
01100     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
01101 }
01102 };
01103 
01104 #ifdef HAVE_AES_DECRYPT
01105 static const word32 Td[4][256] = {
01106 {
01107     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
01108     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
01109     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
01110     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
01111     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
01112     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
01113     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
01114     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
01115     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
01116     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
01117     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
01118     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
01119     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
01120     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
01121     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
01122     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
01123     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
01124     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
01125     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
01126     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
01127     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
01128     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
01129     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
01130     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
01131     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
01132     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
01133     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
01134     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
01135     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
01136     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
01137     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
01138     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
01139     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
01140     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
01141     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
01142     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
01143     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
01144     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
01145     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
01146     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
01147     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
01148     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
01149     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
01150     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
01151     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
01152     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
01153     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
01154     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
01155     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
01156     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
01157     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
01158     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
01159     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
01160     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
01161     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
01162     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
01163     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
01164     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
01165     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
01166     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
01167     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
01168     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
01169     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
01170     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
01171 },
01172 {
01173     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
01174     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
01175     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
01176     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
01177     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
01178     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
01179     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
01180     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
01181     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
01182     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
01183     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
01184     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
01185     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
01186     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
01187     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
01188     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
01189     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
01190     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
01191     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
01192     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
01193     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
01194     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
01195     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
01196     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
01197     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
01198     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
01199     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
01200     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
01201     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
01202     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
01203     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
01204     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
01205     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
01206     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
01207     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
01208     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
01209     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
01210     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
01211     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
01212     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
01213     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
01214     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
01215     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
01216     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
01217     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
01218     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
01219     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
01220     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
01221     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
01222     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
01223     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
01224     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
01225     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
01226     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
01227     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
01228     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
01229     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
01230     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
01231     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
01232     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
01233     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
01234     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
01235     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
01236     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
01237 },
01238 {
01239     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
01240     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
01241     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
01242     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
01243     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
01244     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
01245     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
01246     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
01247     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
01248     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
01249     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
01250     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
01251     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
01252     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
01253     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
01254     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
01255     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
01256     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
01257     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
01258     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
01259 
01260     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
01261     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
01262     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
01263     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
01264     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
01265     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
01266     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
01267     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
01268     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
01269     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
01270     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
01271     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
01272     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
01273     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
01274     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
01275     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
01276     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
01277     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
01278     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
01279     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
01280     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
01281     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
01282     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
01283     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
01284     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
01285     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
01286     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
01287     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
01288     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
01289     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
01290     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
01291     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
01292     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
01293     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
01294     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
01295     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
01296     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
01297     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
01298     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
01299     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
01300     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
01301     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
01302     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
01303     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
01304 },
01305 {
01306     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
01307     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
01308     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
01309     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
01310     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
01311     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
01312     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
01313     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
01314     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
01315     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
01316     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
01317     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
01318     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
01319     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
01320     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
01321     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
01322     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
01323     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
01324     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
01325     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
01326     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
01327     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
01328     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
01329     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
01330     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
01331     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
01332     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
01333     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
01334     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
01335     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
01336     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
01337     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
01338     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
01339     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
01340     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
01341     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
01342     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
01343     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
01344     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
01345     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
01346     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
01347     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
01348     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
01349     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
01350     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
01351     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
01352     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
01353     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
01354     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
01355     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
01356     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
01357     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
01358     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
01359     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
01360     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
01361     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
01362     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
01363     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
01364     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
01365     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
01366     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
01367     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
01368     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
01369     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
01370 }
01371 };
01372 
01373 
01374 static const byte Td4[256] =
01375 {
01376     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
01377     0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
01378     0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
01379     0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
01380     0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
01381     0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
01382     0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
01383     0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
01384     0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
01385     0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
01386     0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
01387     0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
01388     0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
01389     0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
01390     0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
01391     0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
01392     0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
01393     0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
01394     0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
01395     0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
01396     0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
01397     0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
01398     0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
01399     0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
01400     0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
01401     0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
01402     0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
01403     0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
01404     0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
01405     0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
01406     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
01407     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
01408 };
01409 #endif /* HAVE_AES_DECRYPT */
01410 
01411 #define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y))))
01412 
01413 
01414 
01415 #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM)
01416 
01417 #ifndef WC_CACHE_LINE_SZ
01418     #if defined(__x86_64__) || defined(_M_X64) || \
01419        (defined(__ILP32__) && (__ILP32__ >= 1))
01420         #define WC_CACHE_LINE_SZ 64
01421     #else
01422         /* default cache line size */
01423         #define WC_CACHE_LINE_SZ 32
01424     #endif
01425 #endif
01426 
01427 
01428 /* load 4 Te Tables into cache by cache line stride */
01429 static WC_INLINE word32 PreFetchTe(void)
01430 {
01431     word32 x = 0;
01432     int i,j;
01433 
01434     for (i = 0; i < 4; i++) {
01435         /* 256 elements, each one is 4 bytes */
01436         for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) {
01437             x &= Te[i][j];
01438         }
01439     }
01440     return x;
01441 }
01442 
01443 
01444 static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
01445 {
01446     word32 s0, s1, s2, s3;
01447     word32 t0, t1, t2, t3;
01448     word32 r = aes->rounds >> 1;
01449     const word32* rk = aes->key;
01450 
01451     if (r > 7 || r == 0) {
01452         WOLFSSL_MSG("AesEncrypt encountered improper key, set it up");
01453         return;  /* stop instead of segfaulting, set up your keys! */
01454     }
01455 
01456 #ifdef WOLFSSL_AESNI
01457     if (haveAESNI && aes->use_aesni) {
01458         #ifdef DEBUG_AESNI
01459             printf("about to aes encrypt\n");
01460             printf("in  = %p\n", inBlock);
01461             printf("out = %p\n", outBlock);
01462             printf("aes->key = %p\n", aes->key);
01463             printf("aes->rounds = %d\n", aes->rounds);
01464             printf("sz = %d\n", AES_BLOCK_SIZE);
01465         #endif
01466 
01467         /* check alignment, decrypt doesn't need alignment */
01468         if ((wolfssl_word)inBlock % AESNI_ALIGN) {
01469         #ifndef NO_WOLFSSL_ALLOC_ALIGN
01470             byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE + AESNI_ALIGN, aes->heap,
01471                                                       DYNAMIC_TYPE_TMP_BUFFER);
01472             byte* tmp_align;
01473             if (tmp == NULL) return;
01474 
01475             tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
01476 
01477             XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
01478             AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, (byte*)aes->key,
01479                             aes->rounds);
01480             XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
01481             XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
01482             return;
01483         #else
01484             WOLFSSL_MSG("AES-ECB encrypt with bad alignment");
01485             return;
01486         #endif
01487         }
01488 
01489         AES_ECB_encrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
01490                         aes->rounds);
01491 
01492         return;
01493     }
01494     else {
01495         #ifdef DEBUG_AESNI
01496             printf("Skipping AES-NI\n");
01497         #endif
01498     }
01499 #endif
01500 
01501     /*
01502      * map byte array block to cipher state
01503      * and add initial round key:
01504      */
01505     XMEMCPY(&s0, inBlock,                  sizeof(s0));
01506     XMEMCPY(&s1, inBlock + sizeof(s0),     sizeof(s1));
01507     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
01508     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
01509 
01510 #ifdef LITTLE_ENDIAN_ORDER
01511     s0 = ByteReverseWord32(s0);
01512     s1 = ByteReverseWord32(s1);
01513     s2 = ByteReverseWord32(s2);
01514     s3 = ByteReverseWord32(s3);
01515 #endif
01516 
01517     s0 ^= rk[0];
01518     s1 ^= rk[1];
01519     s2 ^= rk[2];
01520     s3 ^= rk[3];
01521 
01522     s0 |= PreFetchTe();
01523 
01524     /*
01525      * Nr - 1 full rounds:
01526      */
01527 
01528     for (;;) {
01529         t0 =
01530             Te[0][GETBYTE(s0, 3)]  ^
01531             Te[1][GETBYTE(s1, 2)]  ^
01532             Te[2][GETBYTE(s2, 1)]  ^
01533             Te[3][GETBYTE(s3, 0)]  ^
01534             rk[4];
01535         t1 =
01536             Te[0][GETBYTE(s1, 3)]  ^
01537             Te[1][GETBYTE(s2, 2)]  ^
01538             Te[2][GETBYTE(s3, 1)]  ^
01539             Te[3][GETBYTE(s0, 0)]  ^
01540             rk[5];
01541         t2 =
01542             Te[0][GETBYTE(s2, 3)] ^
01543             Te[1][GETBYTE(s3, 2)]  ^
01544             Te[2][GETBYTE(s0, 1)]  ^
01545             Te[3][GETBYTE(s1, 0)]  ^
01546             rk[6];
01547         t3 =
01548             Te[0][GETBYTE(s3, 3)] ^
01549             Te[1][GETBYTE(s0, 2)]  ^
01550             Te[2][GETBYTE(s1, 1)]  ^
01551             Te[3][GETBYTE(s2, 0)]  ^
01552             rk[7];
01553 
01554         rk += 8;
01555         if (--r == 0) {
01556             break;
01557         }
01558 
01559         s0 =
01560             Te[0][GETBYTE(t0, 3)] ^
01561             Te[1][GETBYTE(t1, 2)] ^
01562             Te[2][GETBYTE(t2, 1)] ^
01563             Te[3][GETBYTE(t3, 0)] ^
01564             rk[0];
01565         s1 =
01566             Te[0][GETBYTE(t1, 3)] ^
01567             Te[1][GETBYTE(t2, 2)] ^
01568             Te[2][GETBYTE(t3, 1)] ^
01569             Te[3][GETBYTE(t0, 0)] ^
01570             rk[1];
01571         s2 =
01572             Te[0][GETBYTE(t2, 3)] ^
01573             Te[1][GETBYTE(t3, 2)] ^
01574             Te[2][GETBYTE(t0, 1)] ^
01575             Te[3][GETBYTE(t1, 0)] ^
01576             rk[2];
01577         s3 =
01578             Te[0][GETBYTE(t3, 3)] ^
01579             Te[1][GETBYTE(t0, 2)] ^
01580             Te[2][GETBYTE(t1, 1)] ^
01581             Te[3][GETBYTE(t2, 0)] ^
01582             rk[3];
01583     }
01584 
01585     /*
01586      * apply last round and
01587      * map cipher state to byte array block:
01588      */
01589 
01590     s0 =
01591         (Te[2][GETBYTE(t0, 3)] & 0xff000000) ^
01592         (Te[3][GETBYTE(t1, 2)] & 0x00ff0000) ^
01593         (Te[0][GETBYTE(t2, 1)] & 0x0000ff00) ^
01594         (Te[1][GETBYTE(t3, 0)] & 0x000000ff) ^
01595         rk[0];
01596     s1 =
01597         (Te[2][GETBYTE(t1, 3)] & 0xff000000) ^
01598         (Te[3][GETBYTE(t2, 2)] & 0x00ff0000) ^
01599         (Te[0][GETBYTE(t3, 1)] & 0x0000ff00) ^
01600         (Te[1][GETBYTE(t0, 0)] & 0x000000ff) ^
01601         rk[1];
01602     s2 =
01603         (Te[2][GETBYTE(t2, 3)] & 0xff000000) ^
01604         (Te[3][GETBYTE(t3, 2)] & 0x00ff0000) ^
01605         (Te[0][GETBYTE(t0, 1)] & 0x0000ff00) ^
01606         (Te[1][GETBYTE(t1, 0)] & 0x000000ff) ^
01607         rk[2];
01608     s3 =
01609         (Te[2][GETBYTE(t3, 3)] & 0xff000000) ^
01610         (Te[3][GETBYTE(t0, 2)] & 0x00ff0000) ^
01611         (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^
01612         (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^
01613         rk[3];
01614 
01615     /* write out */
01616 #ifdef LITTLE_ENDIAN_ORDER
01617     s0 = ByteReverseWord32(s0);
01618     s1 = ByteReverseWord32(s1);
01619     s2 = ByteReverseWord32(s2);
01620     s3 = ByteReverseWord32(s3);
01621 #endif
01622 
01623     XMEMCPY(outBlock,                  &s0, sizeof(s0));
01624     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
01625     XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2));
01626     XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3));
01627 
01628 }
01629 #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */
01630 
01631 #if defined(HAVE_AES_DECRYPT)
01632 #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT)
01633 
01634 /* load 4 Td Tables into cache by cache line stride */
01635 static WC_INLINE word32 PreFetchTd(void)
01636 {
01637     word32 x = 0;
01638     int i,j;
01639 
01640     for (i = 0; i < 4; i++) {
01641         /* 256 elements, each one is 4 bytes */
01642         for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) {
01643             x &= Td[i][j];
01644         }
01645     }
01646     return x;
01647 }
01648 
01649 /* load Td Table4 into cache by cache line stride */
01650 static WC_INLINE word32 PreFetchTd4(void)
01651 {
01652     word32 x = 0;
01653     int i;
01654 
01655     for (i = 0; i < 256; i += WC_CACHE_LINE_SZ) {
01656         x &= (word32)Td4[i];
01657     }
01658     return x;
01659 }
01660 
01661 static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
01662 {
01663     word32 s0, s1, s2, s3;
01664     word32 t0, t1, t2, t3;
01665     word32 r = aes->rounds >> 1;
01666 
01667     const word32* rk = aes->key;
01668     if (r > 7 || r == 0) {
01669         WOLFSSL_MSG("AesDecrypt encountered improper key, set it up");
01670         return;  /* stop instead of segfaulting, set up your keys! */
01671     }
01672 #ifdef WOLFSSL_AESNI
01673     if (haveAESNI && aes->use_aesni) {
01674         #ifdef DEBUG_AESNI
01675             printf("about to aes decrypt\n");
01676             printf("in  = %p\n", inBlock);
01677             printf("out = %p\n", outBlock);
01678             printf("aes->key = %p\n", aes->key);
01679             printf("aes->rounds = %d\n", aes->rounds);
01680             printf("sz = %d\n", AES_BLOCK_SIZE);
01681         #endif
01682 
01683         /* if input and output same will overwrite input iv */
01684         XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
01685         AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
01686                         aes->rounds);
01687         return;
01688     }
01689     else {
01690         #ifdef DEBUG_AESNI
01691             printf("Skipping AES-NI\n");
01692         #endif
01693     }
01694 #endif /* WOLFSSL_AESNI */
01695 
01696     /*
01697      * map byte array block to cipher state
01698      * and add initial round key:
01699      */
01700     XMEMCPY(&s0, inBlock,                  sizeof(s0));
01701     XMEMCPY(&s1, inBlock + sizeof(s0),     sizeof(s1));
01702     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
01703     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
01704 
01705 #ifdef LITTLE_ENDIAN_ORDER
01706     s0 = ByteReverseWord32(s0);
01707     s1 = ByteReverseWord32(s1);
01708     s2 = ByteReverseWord32(s2);
01709     s3 = ByteReverseWord32(s3);
01710 #endif
01711 
01712     s0 ^= rk[0];
01713     s1 ^= rk[1];
01714     s2 ^= rk[2];
01715     s3 ^= rk[3];
01716 
01717     s0 |= PreFetchTd();
01718 
01719     /*
01720      * Nr - 1 full rounds:
01721      */
01722 
01723     for (;;) {
01724         t0 =
01725             Td[0][GETBYTE(s0, 3)] ^
01726             Td[1][GETBYTE(s3, 2)] ^
01727             Td[2][GETBYTE(s2, 1)] ^
01728             Td[3][GETBYTE(s1, 0)] ^
01729             rk[4];
01730         t1 =
01731             Td[0][GETBYTE(s1, 3)] ^
01732             Td[1][GETBYTE(s0, 2)] ^
01733             Td[2][GETBYTE(s3, 1)] ^
01734             Td[3][GETBYTE(s2, 0)] ^
01735             rk[5];
01736         t2 =
01737             Td[0][GETBYTE(s2, 3)] ^
01738             Td[1][GETBYTE(s1, 2)] ^
01739             Td[2][GETBYTE(s0, 1)] ^
01740             Td[3][GETBYTE(s3, 0)] ^
01741             rk[6];
01742         t3 =
01743             Td[0][GETBYTE(s3, 3)] ^
01744             Td[1][GETBYTE(s2, 2)] ^
01745             Td[2][GETBYTE(s1, 1)] ^
01746             Td[3][GETBYTE(s0, 0)] ^
01747             rk[7];
01748 
01749         rk += 8;
01750         if (--r == 0) {
01751             break;
01752         }
01753 
01754         s0 =
01755             Td[0][GETBYTE(t0, 3)] ^
01756             Td[1][GETBYTE(t3, 2)] ^
01757             Td[2][GETBYTE(t2, 1)] ^
01758             Td[3][GETBYTE(t1, 0)] ^
01759             rk[0];
01760         s1 =
01761             Td[0][GETBYTE(t1, 3)] ^
01762             Td[1][GETBYTE(t0, 2)] ^
01763             Td[2][GETBYTE(t3, 1)] ^
01764             Td[3][GETBYTE(t2, 0)] ^
01765             rk[1];
01766         s2 =
01767             Td[0][GETBYTE(t2, 3)] ^
01768             Td[1][GETBYTE(t1, 2)] ^
01769             Td[2][GETBYTE(t0, 1)] ^
01770             Td[3][GETBYTE(t3, 0)] ^
01771             rk[2];
01772         s3 =
01773             Td[0][GETBYTE(t3, 3)] ^
01774             Td[1][GETBYTE(t2, 2)] ^
01775             Td[2][GETBYTE(t1, 1)] ^
01776             Td[3][GETBYTE(t0, 0)] ^
01777             rk[3];
01778     }
01779     /*
01780      * apply last round and
01781      * map cipher state to byte array block:
01782      */
01783 
01784     t0 |= PreFetchTd4();
01785 
01786     s0 =
01787         ((word32)Td4[GETBYTE(t0, 3)] << 24) ^
01788         ((word32)Td4[GETBYTE(t3, 2)] << 16) ^
01789         ((word32)Td4[GETBYTE(t2, 1)] <<  8) ^
01790         ((word32)Td4[GETBYTE(t1, 0)]) ^
01791         rk[0];
01792     s1 =
01793         ((word32)Td4[GETBYTE(t1, 3)] << 24) ^
01794         ((word32)Td4[GETBYTE(t0, 2)] << 16) ^
01795         ((word32)Td4[GETBYTE(t3, 1)] <<  8) ^
01796         ((word32)Td4[GETBYTE(t2, 0)]) ^
01797         rk[1];
01798     s2 =
01799         ((word32)Td4[GETBYTE(t2, 3)] << 24) ^
01800         ((word32)Td4[GETBYTE(t1, 2)] << 16) ^
01801         ((word32)Td4[GETBYTE(t0, 1)] <<  8) ^
01802         ((word32)Td4[GETBYTE(t3, 0)]) ^
01803         rk[2];
01804     s3 =
01805         ((word32)Td4[GETBYTE(t3, 3)] << 24) ^
01806         ((word32)Td4[GETBYTE(t2, 2)] << 16) ^
01807         ((word32)Td4[GETBYTE(t1, 1)] <<  8) ^
01808         ((word32)Td4[GETBYTE(t0, 0)]) ^
01809         rk[3];
01810 
01811     /* write out */
01812 #ifdef LITTLE_ENDIAN_ORDER
01813     s0 = ByteReverseWord32(s0);
01814     s1 = ByteReverseWord32(s1);
01815     s2 = ByteReverseWord32(s2);
01816     s3 = ByteReverseWord32(s3);
01817 #endif
01818 
01819     XMEMCPY(outBlock,                  &s0, sizeof(s0));
01820     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
01821     XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2));
01822     XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3));
01823 }
01824 #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */
01825 #endif /* HAVE_AES_DECRYPT */
01826 
01827 #endif /* NEED_AES_TABLES */
01828 
01829 
01830 
01831 /* wc_AesSetKey */
01832 #if defined(STM32_CRYPTO)
01833 
01834     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
01835             const byte* iv, int dir)
01836     {
01837         word32 *rk = aes->key;
01838 
01839         (void)dir;
01840 
01841         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
01842             return BAD_FUNC_ARG;
01843 
01844         aes->keylen = keylen;
01845         aes->rounds = keylen/4 + 6;
01846         XMEMCPY(rk, userKey, keylen);
01847     #ifndef WOLFSSL_STM32_CUBEMX
01848         ByteReverseWords(rk, rk, keylen);
01849     #endif
01850     #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
01851         aes->left = 0;
01852     #endif
01853 
01854         return wc_AesSetIV(aes, iv);
01855     }
01856     #if defined(WOLFSSL_AES_DIRECT)
01857         int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
01858                             const byte* iv, int dir)
01859         {
01860             return wc_AesSetKey(aes, userKey, keylen, iv, dir);
01861         }
01862     #endif
01863 
01864 #elif defined(HAVE_COLDFIRE_SEC)
01865     #if defined (HAVE_THREADX)
01866         #include "memory_pools.h"
01867         extern TX_BYTE_POOL mp_ncached;  /* Non Cached memory pool */
01868     #endif
01869 
01870     #define AES_BUFFER_SIZE (AES_BLOCK_SIZE * 64)
01871     static unsigned char *AESBuffIn = NULL;
01872     static unsigned char *AESBuffOut = NULL;
01873     static byte *secReg;
01874     static byte *secKey;
01875     static volatile SECdescriptorType *secDesc;
01876 
01877     static wolfSSL_Mutex Mutex_AesSEC;
01878 
01879     #define SEC_DESC_AES_CBC_ENCRYPT 0x60300010
01880     #define SEC_DESC_AES_CBC_DECRYPT 0x60200010
01881 
01882     extern volatile unsigned char __MBAR[];
01883 
01884     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
01885         const byte* iv, int dir)
01886     {
01887         if (AESBuffIn == NULL) {
01888         #if defined (HAVE_THREADX)
01889             int s1, s2, s3, s4, s5;
01890             s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
01891                                   sizeof(SECdescriptorType), TX_NO_WAIT);
01892             s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn,
01893                                   AES_BUFFER_SIZE, TX_NO_WAIT);
01894             s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut,
01895                                   AES_BUFFER_SIZE, TX_NO_WAIT);
01896             s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey,
01897                                   AES_BLOCK_SIZE*2, TX_NO_WAIT);
01898             s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg,
01899                                   AES_BLOCK_SIZE, TX_NO_WAIT);
01900 
01901             if (s1 || s2 || s3 || s4 || s5)
01902                 return BAD_FUNC_ARG;
01903         #else
01904             #warning "Allocate non-Cache buffers"
01905         #endif
01906 
01907             wc_InitMutex(&Mutex_AesSEC);
01908         }
01909 
01910         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
01911             return BAD_FUNC_ARG;
01912 
01913         if (aes == NULL)
01914             return BAD_FUNC_ARG;
01915 
01916         aes->keylen = keylen;
01917         aes->rounds = keylen/4 + 6;
01918         XMEMCPY(aes->key, userKey, keylen);
01919 
01920         if (iv)
01921             XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
01922 
01923     #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
01924         aes->left = 0;
01925     #endif
01926 
01927         return 0;
01928     }
01929 #elif defined(FREESCALE_LTC)
01930     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
01931                   int dir)
01932     {
01933         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
01934             return BAD_FUNC_ARG;
01935 
01936         aes->rounds = keylen/4 + 6;
01937         XMEMCPY(aes->key, userKey, keylen);
01938 
01939     #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
01940         aes->left = 0;
01941     #endif
01942 
01943         return wc_AesSetIV(aes, iv);
01944     }
01945 
01946     int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
01947                         const byte* iv, int dir)
01948     {
01949         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
01950     }
01951 #elif defined(FREESCALE_MMCAU)
01952     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
01953         const byte* iv, int dir)
01954     {
01955         int ret;
01956         byte *rk = (byte*)aes->key;
01957 
01958         (void)dir;
01959 
01960         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
01961             return BAD_FUNC_ARG;
01962 
01963         if (rk == NULL)
01964             return BAD_FUNC_ARG;
01965 
01966     #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
01967         aes->left = 0;
01968     #endif
01969 
01970         aes->rounds = keylen/4 + 6;
01971 
01972         ret = wolfSSL_CryptHwMutexLock();
01973         if(ret == 0) {
01974         #ifdef FREESCALE_MMCAU_CLASSIC
01975             cau_aes_set_key(userKey, keylen*8, rk);
01976         #else
01977             MMCAU_AES_SetKey(userKey, keylen, rk);
01978         #endif
01979             wolfSSL_CryptHwMutexUnLock();
01980 
01981             ret = wc_AesSetIV(aes, iv);
01982         }
01983 
01984         return ret;
01985     }
01986 
01987     int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
01988                         const byte* iv, int dir)
01989     {
01990         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
01991     }
01992 
01993 #elif defined(WOLFSSL_NRF51_AES)
01994     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
01995         const byte* iv, int dir)
01996     {
01997         int ret;
01998 
01999         (void)dir;
02000         (void)iv;
02001 
02002         if (keylen != 16)
02003             return BAD_FUNC_ARG;
02004 
02005         aes->keylen = keylen;
02006         aes->rounds = keylen/4 + 6;
02007         ret = nrf51_aes_set_key(userKey);
02008 
02009     #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
02010         aes->left = 0;
02011     #endif
02012 
02013         return ret;
02014     }
02015 
02016     int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
02017                         const byte* iv, int dir)
02018     {
02019         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
02020     }
02021 
02022 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
02023       /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
02024 
02025 #else
02026     static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
02027                 const byte* iv, int dir)
02028     {
02029         word32 *rk = aes->key;
02030     #ifdef NEED_AES_TABLES
02031         word32 temp;
02032         unsigned int i = 0;
02033     #endif
02034 
02035         #ifdef WOLFSSL_AESNI
02036             aes->use_aesni = 0;
02037         #endif /* WOLFSSL_AESNI */
02038         #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER)
02039             aes->left = 0;
02040         #endif
02041 
02042         aes->keylen = keylen;
02043         aes->rounds = (keylen/4) + 6;
02044 
02045         XMEMCPY(rk, userKey, keylen);
02046     #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT)
02047         ByteReverseWords(rk, rk, keylen);
02048     #endif
02049 
02050 #ifdef NEED_AES_TABLES
02051 
02052         switch (keylen) {
02053     #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \
02054             defined(WOLFSSL_AES_128)
02055         case 16:
02056             while (1)
02057             {
02058                 temp  = rk[3];
02059                 rk[4] = rk[0] ^
02060                     (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
02061                     (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
02062                     (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
02063                     (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
02064                     rcon[i];
02065                 rk[5] = rk[1] ^ rk[4];
02066                 rk[6] = rk[2] ^ rk[5];
02067                 rk[7] = rk[3] ^ rk[6];
02068                 if (++i == 10)
02069                     break;
02070                 rk += 4;
02071             }
02072             break;
02073     #endif /* 128 */
02074 
02075     #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \
02076             defined(WOLFSSL_AES_192)
02077         case 24:
02078             /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
02079             while (1)
02080             {
02081                 temp = rk[ 5];
02082                 rk[ 6] = rk[ 0] ^
02083                     (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
02084                     (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
02085                     (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
02086                     (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
02087                     rcon[i];
02088                 rk[ 7] = rk[ 1] ^ rk[ 6];
02089                 rk[ 8] = rk[ 2] ^ rk[ 7];
02090                 rk[ 9] = rk[ 3] ^ rk[ 8];
02091                 if (++i == 8)
02092                     break;
02093                 rk[10] = rk[ 4] ^ rk[ 9];
02094                 rk[11] = rk[ 5] ^ rk[10];
02095                 rk += 6;
02096             }
02097             break;
02098     #endif /* 192 */
02099 
02100     #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \
02101             defined(WOLFSSL_AES_256)
02102         case 32:
02103             while (1)
02104             {
02105                 temp = rk[ 7];
02106                 rk[ 8] = rk[ 0] ^
02107                     (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^
02108                     (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^
02109                     (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^
02110                     (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^
02111                     rcon[i];
02112                 rk[ 9] = rk[ 1] ^ rk[ 8];
02113                 rk[10] = rk[ 2] ^ rk[ 9];
02114                 rk[11] = rk[ 3] ^ rk[10];
02115                 if (++i == 7)
02116                     break;
02117                 temp = rk[11];
02118                 rk[12] = rk[ 4] ^
02119                     (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^
02120                     (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^
02121                     (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^
02122                     (Te[1][GETBYTE(temp, 0)] & 0x000000ff);
02123                 rk[13] = rk[ 5] ^ rk[12];
02124                 rk[14] = rk[ 6] ^ rk[13];
02125                 rk[15] = rk[ 7] ^ rk[14];
02126 
02127                 rk += 8;
02128             }
02129             break;
02130     #endif /* 256 */
02131 
02132         default:
02133             return BAD_FUNC_ARG;
02134         } /* switch */
02135 
02136     #ifdef HAVE_AES_DECRYPT
02137         if (dir == AES_DECRYPTION) {
02138             unsigned int j;
02139             rk = aes->key;
02140 
02141             /* invert the order of the round keys: */
02142             for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) {
02143                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
02144                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
02145                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
02146                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
02147             }
02148             /* apply the inverse MixColumn transform to all round keys but the
02149                first and the last: */
02150             for (i = 1; i < aes->rounds; i++) {
02151                 rk += 4;
02152                 rk[0] =
02153                     Td[0][Te[1][GETBYTE(rk[0], 3)] & 0xff] ^
02154                     Td[1][Te[1][GETBYTE(rk[0], 2)] & 0xff] ^
02155                     Td[2][Te[1][GETBYTE(rk[0], 1)] & 0xff] ^
02156                     Td[3][Te[1][GETBYTE(rk[0], 0)] & 0xff];
02157                 rk[1] =
02158                     Td[0][Te[1][GETBYTE(rk[1], 3)] & 0xff] ^
02159                     Td[1][Te[1][GETBYTE(rk[1], 2)] & 0xff] ^
02160                     Td[2][Te[1][GETBYTE(rk[1], 1)] & 0xff] ^
02161                     Td[3][Te[1][GETBYTE(rk[1], 0)] & 0xff];
02162                 rk[2] =
02163                     Td[0][Te[1][GETBYTE(rk[2], 3)] & 0xff] ^
02164                     Td[1][Te[1][GETBYTE(rk[2], 2)] & 0xff] ^
02165                     Td[2][Te[1][GETBYTE(rk[2], 1)] & 0xff] ^
02166                     Td[3][Te[1][GETBYTE(rk[2], 0)] & 0xff];
02167                 rk[3] =
02168                     Td[0][Te[1][GETBYTE(rk[3], 3)] & 0xff] ^
02169                     Td[1][Te[1][GETBYTE(rk[3], 2)] & 0xff] ^
02170                     Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^
02171                     Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff];
02172             }
02173         }
02174     #else
02175         (void)dir;
02176     #endif /* HAVE_AES_DECRYPT */
02177 #endif /* NEED_AES_TABLES */
02178 
02179         return wc_AesSetIV(aes, iv);
02180     }
02181 
02182     int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
02183         const byte* iv, int dir)
02184     {
02185         int ret;
02186     #if defined(AES_MAX_KEY_SIZE)
02187         const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
02188     #endif
02189 
02190     #ifdef WOLFSSL_IMX6_CAAM_BLOB
02191         byte   local[32];
02192         word32 localSz = 32;
02193 
02194         if (keylen == (16 + WC_CAAM_BLOB_SZ) ||
02195                 keylen == (24 + WC_CAAM_BLOB_SZ) ||
02196                 keylen == (32 + WC_CAAM_BLOB_SZ)) {
02197             if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) != 0) {
02198                 return BAD_FUNC_ARG;
02199             }
02200 
02201             /* set local values */
02202             userKey = local;
02203             keylen = localSz;
02204         }
02205     #endif
02206         if (aes == NULL ||
02207                 !((keylen == 16) || (keylen == 24) || (keylen == 32))) {
02208             return BAD_FUNC_ARG;
02209         }
02210 
02211     #if defined(AES_MAX_KEY_SIZE)
02212         /* Check key length */
02213         if (keylen > max_key_len) {
02214             return BAD_FUNC_ARG;
02215         }
02216     #endif
02217         aes->keylen = keylen;
02218         aes->rounds = keylen/4 + 6;
02219 
02220     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
02221         if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) {
02222             XMEMCPY(aes->asyncKey, userKey, keylen);
02223             if (iv)
02224                 XMEMCPY(aes->asyncIv, iv, AES_BLOCK_SIZE);
02225         }
02226     #endif /* WOLFSSL_ASYNC_CRYPT */
02227 
02228     #ifdef WOLFSSL_AESNI
02229         if (checkAESNI == 0) {
02230             haveAESNI  = Check_CPU_support_AES();
02231             checkAESNI = 1;
02232         }
02233         if (haveAESNI) {
02234             #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB)
02235                 aes->left = 0;
02236             #endif /* WOLFSSL_AES_COUNTER */
02237             aes->use_aesni = 1;
02238             if (iv)
02239                 XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
02240             if (dir == AES_ENCRYPTION)
02241                 return AES_set_encrypt_key(userKey, keylen * 8, aes);
02242         #ifdef HAVE_AES_DECRYPT
02243             else
02244                 return AES_set_decrypt_key(userKey, keylen * 8, aes);
02245         #endif
02246         }
02247     #endif /* WOLFSSL_AESNI */
02248 
02249         ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
02250 
02251     #ifdef WOLFSSL_IMX6_CAAM_BLOB
02252         ForceZero(local, sizeof(local));
02253     #endif
02254         return ret;
02255     }
02256 
02257     #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
02258         /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
02259         int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
02260                             const byte* iv, int dir)
02261         {
02262             int ret;
02263 
02264         #ifdef WOLFSSL_IMX6_CAAM_BLOB
02265             byte   local[32];
02266             word32 localSz = 32;
02267 
02268             if (keylen == (16 + WC_CAAM_BLOB_SZ) ||
02269              keylen == (24 + WC_CAAM_BLOB_SZ) ||
02270              keylen == (32 + WC_CAAM_BLOB_SZ)) {
02271                 if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz)
02272                         != 0) {
02273                     return BAD_FUNC_ARG;
02274                 }
02275 
02276                 /* set local values */
02277                 userKey = local;
02278                 keylen = localSz;
02279             }
02280         #endif
02281             ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
02282 
02283         #ifdef WOLFSSL_IMX6_CAAM_BLOB
02284             ForceZero(local, sizeof(local));
02285         #endif
02286 
02287             return ret;
02288         }
02289     #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
02290 #endif /* wc_AesSetKey block */
02291 
02292 
02293 /* wc_AesSetIV is shared between software and hardware */
02294 int wc_AesSetIV(Aes* aes, const byte* iv)
02295 {
02296     if (aes == NULL)
02297         return BAD_FUNC_ARG;
02298 
02299     if (iv)
02300         XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
02301     else
02302         XMEMSET(aes->reg,  0, AES_BLOCK_SIZE);
02303 
02304     return 0;
02305 }
02306 
02307 /* AES-DIRECT */
02308 #if defined(WOLFSSL_AES_DIRECT)
02309     #if defined(HAVE_COLDFIRE_SEC)
02310         #error "Coldfire SEC doesn't yet support AES direct"
02311 
02312     #elif defined(FREESCALE_LTC)
02313         /* Allow direct access to one block encrypt */
02314         void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
02315         {
02316             byte *key;
02317             uint32_t keySize;
02318 
02319             key = (byte*)aes->key;
02320             wc_AesGetKeySize(aes, &keySize);
02321 
02322             LTC_AES_EncryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE,
02323                 key, keySize);
02324         }
02325 
02326         /* Allow direct access to one block decrypt */
02327         void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
02328         {
02329             byte *key;
02330             uint32_t keySize;
02331 
02332             key = (byte*)aes->key;
02333             wc_AesGetKeySize(aes, &keySize);
02334 
02335             LTC_AES_DecryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE,
02336                 key, keySize, kLTC_EncryptKey);
02337         }
02338 
02339     #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
02340         /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
02341 
02342     #else
02343         /* Allow direct access to one block encrypt */
02344         void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
02345         {
02346             wc_AesEncrypt(aes, in, out);
02347         }
02348     #ifdef HAVE_AES_DECRYPT
02349         /* Allow direct access to one block decrypt */
02350         void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
02351         {
02352             wc_AesDecrypt(aes, in, out);
02353         }
02354     #endif /* HAVE_AES_DECRYPT */
02355     #endif /* AES direct block */
02356 #endif /* WOLFSSL_AES_DIRECT */
02357 
02358 
02359 /* AES-CBC */
02360 #ifdef HAVE_AES_CBC
02361 #if defined(STM32_CRYPTO)
02362 
02363 #ifdef WOLFSSL_STM32_CUBEMX
02364     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02365     {
02366         int ret = 0;
02367         word32 blocks = (sz / AES_BLOCK_SIZE);
02368         CRYP_HandleTypeDef hcryp;
02369 
02370         XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
02371         switch (aes->rounds) {
02372             case 10: /* 128-bit key */
02373                 hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
02374                 break;
02375     #ifdef CRYP_KEYSIZE_192B
02376             case 12: /* 192-bit key */
02377                 hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
02378                 break;
02379     #endif
02380             case 14: /* 256-bit key */
02381                 hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
02382                 break;
02383             default:
02384                 break;
02385         }
02386         hcryp.Instance = CRYP;
02387         hcryp.Init.DataType = CRYP_DATATYPE_8B;
02388         hcryp.Init.pKey = (uint8_t*)aes->key;
02389         hcryp.Init.pInitVect = (uint8_t*)aes->reg;
02390 
02391         HAL_CRYP_Init(&hcryp);
02392 
02393         while (blocks--) {
02394             if (HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
02395                                            out, STM32_HAL_TIMEOUT) != HAL_OK) {
02396                 ret = WC_TIMEOUT_E;
02397                 break;
02398             }
02399 
02400             /* store iv for next call */
02401             XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
02402 
02403             sz  -= AES_BLOCK_SIZE;
02404             in  += AES_BLOCK_SIZE;
02405             out += AES_BLOCK_SIZE;
02406         }
02407 
02408         HAL_CRYP_DeInit(&hcryp);
02409 
02410         return ret;
02411     }
02412     #ifdef HAVE_AES_DECRYPT
02413     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02414     {
02415         int ret = 0;
02416         word32 blocks = (sz / AES_BLOCK_SIZE);
02417         CRYP_HandleTypeDef hcryp;
02418 
02419         XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
02420         switch (aes->rounds) {
02421             case 10: /* 128-bit key */
02422                 hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
02423                 break;
02424     #ifdef CRYP_KEYSIZE_192B
02425             case 12: /* 192-bit key */
02426                 hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
02427                 break;
02428     #endif
02429             case 14: /* 256-bit key */
02430                 hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
02431                 break;
02432             default:
02433                 break;
02434         }
02435         hcryp.Instance = CRYP;
02436         hcryp.Init.DataType = CRYP_DATATYPE_8B;
02437         hcryp.Init.pKey = (uint8_t*)aes->key;
02438         hcryp.Init.pInitVect = (uint8_t*)aes->reg;
02439 
02440         HAL_CRYP_Init(&hcryp);
02441 
02442         while (blocks--) {
02443             if (HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE,
02444                                            out, STM32_HAL_TIMEOUT) != HAL_OK) {
02445                 ret = WC_TIMEOUT_E;
02446             }
02447 
02448             /* store iv for next call */
02449             XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
02450 
02451             in  += AES_BLOCK_SIZE;
02452             out += AES_BLOCK_SIZE;
02453         }
02454 
02455         HAL_CRYP_DeInit(&hcryp);
02456 
02457         return ret;
02458     }
02459     #endif /* HAVE_AES_DECRYPT */
02460 #else
02461     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02462     {
02463         word32 *enc_key, *iv;
02464         word32 blocks = (sz / AES_BLOCK_SIZE);
02465         CRYP_InitTypeDef AES_CRYP_InitStructure;
02466         CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
02467         CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
02468 
02469         enc_key = aes->key;
02470         iv = aes->reg;
02471 
02472         /* crypto structure initialization */
02473         CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
02474         CRYP_StructInit(&AES_CRYP_InitStructure);
02475         CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
02476 
02477         /* reset registers to their default values */
02478         CRYP_DeInit();
02479 
02480         /* load key into correct registers */
02481         switch (aes->rounds) {
02482             case 10: /* 128-bit key */
02483                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
02484                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
02485                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
02486                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
02487                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
02488                 break;
02489 
02490             case 12: /* 192-bit key */
02491                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
02492                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
02493                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
02494                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
02495                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
02496                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
02497                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
02498                 break;
02499 
02500             case 14: /* 256-bit key */
02501                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
02502                 AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
02503                 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
02504                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
02505                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
02506                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
02507                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
02508                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
02509                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
02510                 break;
02511 
02512             default:
02513                 break;
02514         }
02515         CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
02516 
02517         /* set iv */
02518         ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
02519         AES_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
02520         AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
02521         AES_CRYP_IVInitStructure.CRYP_IV1Left  = iv[2];
02522         AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
02523         CRYP_IVInit(&AES_CRYP_IVInitStructure);
02524 
02525         /* set direction, mode, and datatype */
02526         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
02527         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
02528         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
02529         CRYP_Init(&AES_CRYP_InitStructure);
02530 
02531         /* enable crypto processor */
02532         CRYP_Cmd(ENABLE);
02533 
02534         while (blocks--) {
02535             /* flush IN/OUT FIFOs */
02536             CRYP_FIFOFlush();
02537 
02538             CRYP_DataIn(*(uint32_t*)&in[0]);
02539             CRYP_DataIn(*(uint32_t*)&in[4]);
02540             CRYP_DataIn(*(uint32_t*)&in[8]);
02541             CRYP_DataIn(*(uint32_t*)&in[12]);
02542 
02543             /* wait until the complete message has been processed */
02544             while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
02545 
02546             *(uint32_t*)&out[0]  = CRYP_DataOut();
02547             *(uint32_t*)&out[4]  = CRYP_DataOut();
02548             *(uint32_t*)&out[8]  = CRYP_DataOut();
02549             *(uint32_t*)&out[12] = CRYP_DataOut();
02550 
02551             /* store iv for next call */
02552             XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
02553 
02554             sz  -= AES_BLOCK_SIZE;
02555             in  += AES_BLOCK_SIZE;
02556             out += AES_BLOCK_SIZE;
02557         }
02558 
02559         /* disable crypto processor */
02560         CRYP_Cmd(DISABLE);
02561 
02562         return 0;
02563     }
02564 
02565     #ifdef HAVE_AES_DECRYPT
02566     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02567     {
02568         word32 *dec_key, *iv;
02569         word32 blocks = (sz / AES_BLOCK_SIZE);
02570         CRYP_InitTypeDef AES_CRYP_InitStructure;
02571         CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
02572         CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
02573 
02574         dec_key = aes->key;
02575         iv = aes->reg;
02576 
02577         /* crypto structure initialization */
02578         CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
02579         CRYP_StructInit(&AES_CRYP_InitStructure);
02580         CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
02581 
02582         /* if input and output same will overwrite input iv */
02583         XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
02584 
02585         /* reset registers to their default values */
02586         CRYP_DeInit();
02587 
02588         /* load key into correct registers */
02589         switch (aes->rounds) {
02590             case 10: /* 128-bit key */
02591                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
02592                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[0];
02593                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[1];
02594                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[2];
02595                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[3];
02596                 break;
02597 
02598             case 12: /* 192-bit key */
02599                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
02600                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = dec_key[0];
02601                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[1];
02602                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[2];
02603                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[3];
02604                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[4];
02605                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[5];
02606                 break;
02607 
02608             case 14: /* 256-bit key */
02609                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
02610                 AES_CRYP_KeyInitStructure.CRYP_Key0Left  = dec_key[0];
02611                 AES_CRYP_KeyInitStructure.CRYP_Key0Right = dec_key[1];
02612                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = dec_key[2];
02613                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[3];
02614                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[4];
02615                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[5];
02616                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[6];
02617                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[7];
02618                 break;
02619 
02620             default:
02621                 break;
02622         }
02623 
02624         /* set direction, mode, and datatype for key preparation */
02625         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
02626         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
02627         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_32b;
02628         CRYP_Init(&AES_CRYP_InitStructure);
02629         CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
02630 
02631         /* enable crypto processor */
02632         CRYP_Cmd(ENABLE);
02633 
02634         /* wait until key has been prepared */
02635         while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
02636 
02637         /* set direction, mode, and datatype for decryption */
02638         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
02639         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
02640         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
02641         CRYP_Init(&AES_CRYP_InitStructure);
02642 
02643         /* set iv */
02644         ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
02645 
02646         AES_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
02647         AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
02648         AES_CRYP_IVInitStructure.CRYP_IV1Left  = iv[2];
02649         AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
02650         CRYP_IVInit(&AES_CRYP_IVInitStructure);
02651 
02652         /* enable crypto processor */
02653         CRYP_Cmd(ENABLE);
02654 
02655         while (blocks--) {
02656             /* flush IN/OUT FIFOs */
02657             CRYP_FIFOFlush();
02658 
02659             CRYP_DataIn(*(uint32_t*)&in[0]);
02660             CRYP_DataIn(*(uint32_t*)&in[4]);
02661             CRYP_DataIn(*(uint32_t*)&in[8]);
02662             CRYP_DataIn(*(uint32_t*)&in[12]);
02663 
02664             /* wait until the complete message has been processed */
02665             while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
02666 
02667             *(uint32_t*)&out[0]  = CRYP_DataOut();
02668             *(uint32_t*)&out[4]  = CRYP_DataOut();
02669             *(uint32_t*)&out[8]  = CRYP_DataOut();
02670             *(uint32_t*)&out[12] = CRYP_DataOut();
02671 
02672             /* store iv for next call */
02673             XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
02674 
02675             in  += AES_BLOCK_SIZE;
02676             out += AES_BLOCK_SIZE;
02677         }
02678 
02679         /* disable crypto processor */
02680         CRYP_Cmd(DISABLE);
02681 
02682         return 0;
02683     }
02684     #endif /* HAVE_AES_DECRYPT */
02685 #endif /* WOLFSSL_STM32_CUBEMX */
02686 
02687 #elif defined(HAVE_COLDFIRE_SEC)
02688     static int wc_AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz,
02689         word32 descHeader)
02690     {
02691         #ifdef DEBUG_WOLFSSL
02692             int i; int stat1, stat2; int ret;
02693         #endif
02694 
02695         int size;
02696         volatile int v;
02697 
02698         if ((pi == NULL) || (po == NULL))
02699             return BAD_FUNC_ARG;    /*wrong pointer*/
02700 
02701         wc_LockMutex(&Mutex_AesSEC);
02702 
02703         /* Set descriptor for SEC */
02704         secDesc->length1 = 0x0;
02705         secDesc->pointer1 = NULL;
02706 
02707         secDesc->length2 = AES_BLOCK_SIZE;
02708         secDesc->pointer2 = (byte *)secReg; /* Initial Vector */
02709 
02710         switch(aes->rounds) {
02711             case 10: secDesc->length3 = 16; break;
02712             case 12: secDesc->length3 = 24; break;
02713             case 14: secDesc->length3 = 32; break;
02714         }
02715         XMEMCPY(secKey, aes->key, secDesc->length3);
02716 
02717         secDesc->pointer3 = (byte *)secKey;
02718         secDesc->pointer4 = AESBuffIn;
02719         secDesc->pointer5 = AESBuffOut;
02720         secDesc->length6 = 0x0;
02721         secDesc->pointer6 = NULL;
02722         secDesc->length7 = 0x0;
02723         secDesc->pointer7 = NULL;
02724         secDesc->nextDescriptorPtr = NULL;
02725 
02726         while (sz) {
02727             secDesc->header = descHeader;
02728             XMEMCPY(secReg, aes->reg, AES_BLOCK_SIZE);
02729             if ((sz % AES_BUFFER_SIZE) == sz) {
02730                 size = sz;
02731                 sz = 0;
02732             } else {
02733                 size = AES_BUFFER_SIZE;
02734                 sz -= AES_BUFFER_SIZE;
02735             }
02736             secDesc->length4 = size;
02737             secDesc->length5 = size;
02738 
02739             XMEMCPY(AESBuffIn, pi, size);
02740             if(descHeader == SEC_DESC_AES_CBC_DECRYPT) {
02741                 XMEMCPY((void*)aes->tmp, (void*)&(pi[size-AES_BLOCK_SIZE]),
02742                         AES_BLOCK_SIZE);
02743             }
02744 
02745             /* Point SEC to the location of the descriptor */
02746             MCF_SEC_FR0 = (uint32)secDesc;
02747             /* Initialize SEC and wait for encryption to complete */
02748             MCF_SEC_CCCR0 = 0x0000001a;
02749             /* poll SISR to determine when channel is complete */
02750             v=0;
02751 
02752             while ((secDesc->header>> 24) != 0xff) v++;
02753 
02754             #ifdef DEBUG_WOLFSSL
02755                 ret = MCF_SEC_SISRH;
02756                 stat1 = MCF_SEC_AESSR;
02757                 stat2 = MCF_SEC_AESISR;
02758                 if (ret & 0xe0000000) {
02759                     db_printf("Aes_Cbc(i=%d):ISRH=%08x, AESSR=%08x, "
02760                               "AESISR=%08x\n", i, ret, stat1, stat2);
02761                 }
02762             #endif
02763 
02764             XMEMCPY(po, AESBuffOut, size);
02765 
02766             if (descHeader == SEC_DESC_AES_CBC_ENCRYPT) {
02767                 XMEMCPY((void*)aes->reg, (void*)&(po[size-AES_BLOCK_SIZE]),
02768                         AES_BLOCK_SIZE);
02769             } else {
02770                 XMEMCPY((void*)aes->reg, (void*)aes->tmp, AES_BLOCK_SIZE);
02771             }
02772 
02773             pi += size;
02774             po += size;
02775         }
02776 
02777         wc_UnLockMutex(&Mutex_AesSEC);
02778         return 0;
02779     }
02780 
02781     int wc_AesCbcEncrypt(Aes* aes, byte* po, const byte* pi, word32 sz)
02782     {
02783         return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_ENCRYPT));
02784     }
02785 
02786     #ifdef HAVE_AES_DECRYPT
02787     int wc_AesCbcDecrypt(Aes* aes, byte* po, const byte* pi, word32 sz)
02788     {
02789         return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_DECRYPT));
02790     }
02791     #endif /* HAVE_AES_DECRYPT */
02792 
02793 #elif defined(FREESCALE_LTC)
02794     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02795     {
02796         uint32_t keySize;
02797         status_t status;
02798         byte *iv, *enc_key;
02799         word32 blocks = (sz / AES_BLOCK_SIZE);
02800 
02801         iv      = (byte*)aes->reg;
02802         enc_key = (byte*)aes->key;
02803 
02804         status = wc_AesGetKeySize(aes, &keySize);
02805         if (status != 0) {
02806             return status;
02807         }
02808 
02809         status = LTC_AES_EncryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE,
02810             iv, enc_key, keySize);
02811         return (status == kStatus_Success) ? 0 : -1;
02812     }
02813 
02814     #ifdef HAVE_AES_DECRYPT
02815     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02816     {
02817         uint32_t keySize;
02818         status_t status;
02819         byte* iv, *dec_key;
02820         word32 blocks = (sz / AES_BLOCK_SIZE);
02821 
02822         iv      = (byte*)aes->reg;
02823         dec_key = (byte*)aes->key;
02824 
02825         status = wc_AesGetKeySize(aes, &keySize);
02826         if (status != 0) {
02827             return status;
02828         }
02829 
02830         status = LTC_AES_DecryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE,
02831             iv, dec_key, keySize, kLTC_EncryptKey);
02832         return (status == kStatus_Success) ? 0 : -1;
02833     }
02834     #endif /* HAVE_AES_DECRYPT */
02835 
02836 #elif defined(FREESCALE_MMCAU)
02837     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02838     {
02839         int i;
02840         int offset = 0;
02841         word32 blocks = (sz / AES_BLOCK_SIZE);
02842         byte *iv;
02843         byte temp_block[AES_BLOCK_SIZE];
02844 
02845         iv      = (byte*)aes->reg;
02846 
02847         while (blocks--) {
02848             XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE);
02849 
02850             /* XOR block with IV for CBC */
02851             for (i = 0; i < AES_BLOCK_SIZE; i++)
02852                 temp_block[i] ^= iv[i];
02853 
02854             wc_AesEncrypt(aes, temp_block, out + offset);
02855 
02856             offset += AES_BLOCK_SIZE;
02857 
02858             /* store IV for next block */
02859             XMEMCPY(iv, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
02860         }
02861 
02862         return 0;
02863     }
02864     #ifdef HAVE_AES_DECRYPT
02865     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02866     {
02867         int i;
02868         int offset = 0;
02869         word32 blocks = (sz / AES_BLOCK_SIZE);
02870         byte* iv;
02871         byte temp_block[AES_BLOCK_SIZE];
02872 
02873         iv      = (byte*)aes->reg;
02874 
02875         while (blocks--) {
02876             XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE);
02877 
02878             wc_AesDecrypt(aes, in + offset, out + offset);
02879 
02880             /* XOR block with IV for CBC */
02881             for (i = 0; i < AES_BLOCK_SIZE; i++)
02882                 (out + offset)[i] ^= iv[i];
02883 
02884             /* store IV for next block */
02885             XMEMCPY(iv, temp_block, AES_BLOCK_SIZE);
02886 
02887             offset += AES_BLOCK_SIZE;
02888         }
02889 
02890         return 0;
02891     }
02892     #endif /* HAVE_AES_DECRYPT */
02893 
02894 #elif defined(WOLFSSL_PIC32MZ_CRYPT)
02895 
02896     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02897     {
02898         int ret;
02899 
02900         /* hardware fails on input that is not a multiple of AES block size */
02901         if (sz % AES_BLOCK_SIZE != 0) {
02902             return BAD_FUNC_ARG;
02903         }
02904 
02905         ret = wc_Pic32AesCrypt(
02906             aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
02907             out, in, sz, PIC32_ENCRYPTION,
02908             PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC);
02909 
02910         /* store iv for next call */
02911         if (ret == 0) {
02912             XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
02913         }
02914 
02915         return ret;
02916     }
02917     #ifdef HAVE_AES_DECRYPT
02918     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02919     {
02920         int ret;
02921         byte scratch[AES_BLOCK_SIZE];
02922 
02923         /* hardware fails on input that is not a multiple of AES block size */
02924         if (sz % AES_BLOCK_SIZE != 0) {
02925             return BAD_FUNC_ARG;
02926         }
02927         XMEMCPY(scratch, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
02928 
02929         ret = wc_Pic32AesCrypt(
02930             aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
02931             out, in, sz, PIC32_DECRYPTION,
02932             PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC);
02933 
02934         /* store iv for next call */
02935         if (ret == 0) {
02936             XMEMCPY((byte*)aes->reg, scratch, AES_BLOCK_SIZE);
02937         }
02938 
02939         return ret;
02940     }
02941     #endif /* HAVE_AES_DECRYPT */
02942 
02943 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
02944       /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
02945 
02946 #else
02947 
02948     int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
02949     {
02950         word32 blocks = (sz / AES_BLOCK_SIZE);
02951 
02952         if (aes == NULL || out == NULL || in == NULL) {
02953             return BAD_FUNC_ARG;
02954         }
02955 
02956     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
02957         /* if async and byte count above threshold */
02958         if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
02959                                                 sz >= WC_ASYNC_THRESH_AES_CBC) {
02960         #if defined(HAVE_CAVIUM)
02961             return NitroxAesCbcEncrypt(aes, out, in, sz);
02962         #elif defined(HAVE_INTEL_QA)
02963             return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz,
02964                 (const byte*)aes->asyncKey, aes->keylen,
02965                 (const byte*)aes->asyncIv, AES_BLOCK_SIZE);
02966         #else /* WOLFSSL_ASYNC_CRYPT_TEST */
02967             if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_ENCRYPT)) {
02968                 WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
02969                 testDev->aes.aes = aes;
02970                 testDev->aes.out = out;
02971                 testDev->aes.in = in;
02972                 testDev->aes.sz = sz;
02973                 return WC_PENDING_E;
02974             }
02975         #endif
02976         }
02977     #endif /* WOLFSSL_ASYNC_CRYPT */
02978 
02979     #ifdef WOLFSSL_AESNI
02980         if (haveAESNI) {
02981             #ifdef DEBUG_AESNI
02982                 printf("about to aes cbc encrypt\n");
02983                 printf("in  = %p\n", in);
02984                 printf("out = %p\n", out);
02985                 printf("aes->key = %p\n", aes->key);
02986                 printf("aes->reg = %p\n", aes->reg);
02987                 printf("aes->rounds = %d\n", aes->rounds);
02988                 printf("sz = %d\n", sz);
02989             #endif
02990 
02991             /* check alignment, decrypt doesn't need alignment */
02992             if ((wolfssl_word)in % AESNI_ALIGN) {
02993             #ifndef NO_WOLFSSL_ALLOC_ALIGN
02994                 byte* tmp = (byte*)XMALLOC(sz + AES_BLOCK_SIZE + AESNI_ALIGN,
02995                                             aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
02996                 byte* tmp_align;
02997                 if (tmp == NULL) return MEMORY_E;
02998 
02999                 tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
03000                 XMEMCPY(tmp_align, in, sz);
03001                 AES_CBC_encrypt(tmp_align, tmp_align, (byte*)aes->reg, sz,
03002                                                   (byte*)aes->key, aes->rounds);
03003                 /* store iv for next call */
03004                 XMEMCPY(aes->reg, tmp_align + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
03005 
03006                 XMEMCPY(out, tmp_align, sz);
03007                 XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
03008                 return 0;
03009             #else
03010                 WOLFSSL_MSG("AES-CBC encrypt with bad alignment");
03011                 return BAD_ALIGN_E;
03012             #endif
03013             }
03014 
03015             AES_CBC_encrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
03016                             aes->rounds);
03017             /* store iv for next call */
03018             XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
03019 
03020             return 0;
03021         }
03022     #endif
03023 
03024         while (blocks--) {
03025             xorbuf((byte*)aes->reg, in, AES_BLOCK_SIZE);
03026             wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->reg);
03027             XMEMCPY(out, aes->reg, AES_BLOCK_SIZE);
03028 
03029             out += AES_BLOCK_SIZE;
03030             in  += AES_BLOCK_SIZE;
03031         }
03032 
03033         return 0;
03034     }
03035 
03036     #ifdef HAVE_AES_DECRYPT
03037     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
03038     {
03039         word32 blocks;
03040 
03041         if (aes == NULL || out == NULL || in == NULL
03042                                        || sz % AES_BLOCK_SIZE != 0) {
03043             return BAD_FUNC_ARG;
03044         }
03045 
03046     #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
03047         /* if async and byte count above threshold */
03048         if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
03049                                                 sz >= WC_ASYNC_THRESH_AES_CBC) {
03050         #if defined(HAVE_CAVIUM)
03051             return NitroxAesCbcDecrypt(aes, out, in, sz);
03052         #elif defined(HAVE_INTEL_QA)
03053             return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz,
03054                 (const byte*)aes->asyncKey, aes->keylen,
03055                 (const byte*)aes->asyncIv, AES_BLOCK_SIZE);
03056         #else /* WOLFSSL_ASYNC_CRYPT_TEST */
03057             if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_DECRYPT)) {
03058                 WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
03059                 testDev->aes.aes = aes;
03060                 testDev->aes.out = out;
03061                 testDev->aes.in = in;
03062                 testDev->aes.sz = sz;
03063                 return WC_PENDING_E;
03064             }
03065         #endif
03066         }
03067     #endif
03068 
03069     #ifdef WOLFSSL_AESNI
03070         if (haveAESNI) {
03071             #ifdef DEBUG_AESNI
03072                 printf("about to aes cbc decrypt\n");
03073                 printf("in  = %p\n", in);
03074                 printf("out = %p\n", out);
03075                 printf("aes->key = %p\n", aes->key);
03076                 printf("aes->reg = %p\n", aes->reg);
03077                 printf("aes->rounds = %d\n", aes->rounds);
03078                 printf("sz = %d\n", sz);
03079             #endif
03080 
03081             /* if input and output same will overwrite input iv */
03082             XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
03083             #if defined(WOLFSSL_AESNI_BY4)
03084             AES_CBC_decrypt_by4(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
03085                             aes->rounds);
03086             #elif defined(WOLFSSL_AESNI_BY6)
03087             AES_CBC_decrypt_by6(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
03088                             aes->rounds);
03089             #else /* WOLFSSL_AESNI_BYx */
03090             AES_CBC_decrypt_by8(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
03091                             aes->rounds);
03092             #endif /* WOLFSSL_AESNI_BYx */
03093             /* store iv for next call */
03094             XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
03095             return 0;
03096         }
03097     #endif
03098 
03099         blocks = sz / AES_BLOCK_SIZE;
03100         while (blocks--) {
03101             XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE);
03102             wc_AesDecrypt(aes, (byte*)aes->tmp, out);
03103             xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE);
03104             XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
03105 
03106             out += AES_BLOCK_SIZE;
03107             in  += AES_BLOCK_SIZE;
03108         }
03109 
03110         return 0;
03111     }
03112     #endif
03113 
03114 #endif /* AES-CBC block */
03115 #endif /* HAVE_AES_CBC */
03116 
03117 /* AES-CTR */
03118 #if defined(WOLFSSL_AES_COUNTER)
03119 
03120     #ifdef STM32_CRYPTO
03121         #define NEED_AES_CTR_SOFT
03122         #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock
03123 
03124         int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in)
03125         {
03126             int ret = 0;
03127         #ifdef WOLFSSL_STM32_CUBEMX
03128             CRYP_HandleTypeDef hcryp;
03129 
03130             XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
03131             switch (aes->rounds) {
03132                 case 10: /* 128-bit key */
03133                     hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
03134                     break;
03135     #ifdef CRYP_KEYSIZE_192B
03136                 case 12: /* 192-bit key */
03137                     hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
03138                     break;
03139     #endif
03140                 case 14: /* 256-bit key */
03141                     hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
03142                     break;
03143                 default:
03144                     break;
03145             }
03146             hcryp.Instance = CRYP;
03147             hcryp.Init.DataType = CRYP_DATATYPE_8B;
03148             hcryp.Init.pKey = (byte*)aes->key;
03149             hcryp.Init.pInitVect = (byte*)aes->reg;
03150 
03151             HAL_CRYP_Init(&hcryp);
03152 
03153             if (HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE, out,
03154                                                 STM32_HAL_TIMEOUT) != HAL_OK) {
03155                 /* failed */
03156                 ret = WC_TIMEOUT_E;
03157             }
03158 
03159             HAL_CRYP_DeInit(&hcryp);
03160 
03161         #else /* STD_PERI_LIB */
03162             word32 *enc_key, *iv;
03163             CRYP_InitTypeDef AES_CRYP_InitStructure;
03164             CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
03165             CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
03166 
03167             enc_key = aes->key;
03168             iv = aes->reg;
03169 
03170             /* crypto structure initialization */
03171             CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
03172             CRYP_StructInit(&AES_CRYP_InitStructure);
03173             CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
03174 
03175             /* reset registers to their default values */
03176             CRYP_DeInit();
03177 
03178             /* load key into correct registers */
03179             switch (aes->rounds) {
03180                 case 10: /* 128-bit key */
03181                     AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
03182                     AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
03183                     AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
03184                     AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
03185                     AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
03186                     break;
03187                 case 12: /* 192-bit key */
03188                     AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
03189                     AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
03190                     AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
03191                     AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
03192                     AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
03193                     AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
03194                     AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
03195                     break;
03196                 case 14: /* 256-bit key */
03197                     AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
03198                     AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
03199                     AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
03200                     AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
03201                     AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
03202                     AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
03203                     AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
03204                     AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
03205                     AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
03206                     break;
03207                 default:
03208                     break;
03209             }
03210             CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
03211 
03212             /* set iv */
03213             AES_CRYP_IVInitStructure.CRYP_IV0Left  = ByteReverseWord32(iv[0]);
03214             AES_CRYP_IVInitStructure.CRYP_IV0Right = ByteReverseWord32(iv[1]);
03215             AES_CRYP_IVInitStructure.CRYP_IV1Left  = ByteReverseWord32(iv[2]);
03216             AES_CRYP_IVInitStructure.CRYP_IV1Right = ByteReverseWord32(iv[3]);
03217             CRYP_IVInit(&AES_CRYP_IVInitStructure);
03218 
03219             /* set direction, mode, and datatype */
03220             AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
03221             AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR;
03222             AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
03223             CRYP_Init(&AES_CRYP_InitStructure);
03224 
03225             /* enable crypto processor */
03226             CRYP_Cmd(ENABLE);
03227 
03228             /* flush IN/OUT FIFOs */
03229             CRYP_FIFOFlush();
03230 
03231             CRYP_DataIn(*(uint32_t*)&in[0]);
03232             CRYP_DataIn(*(uint32_t*)&in[4]);
03233             CRYP_DataIn(*(uint32_t*)&in[8]);
03234             CRYP_DataIn(*(uint32_t*)&in[12]);
03235 
03236             /* wait until the complete message has been processed */
03237             while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
03238 
03239             *(uint32_t*)&out[0]  = CRYP_DataOut();
03240             *(uint32_t*)&out[4]  = CRYP_DataOut();
03241             *(uint32_t*)&out[8]  = CRYP_DataOut();
03242             *(uint32_t*)&out[12] = CRYP_DataOut();
03243 
03244             /* disable crypto processor */
03245             CRYP_Cmd(DISABLE);
03246 
03247         #endif /* WOLFSSL_STM32_CUBEMX */
03248             return ret;
03249         }
03250 
03251 
03252     #elif defined(WOLFSSL_PIC32MZ_CRYPT)
03253 
03254         #define NEED_AES_CTR_SOFT
03255         #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock
03256 
03257         int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in)
03258         {
03259             word32 tmpIv[AES_BLOCK_SIZE / sizeof(word32)];
03260             XMEMCPY(tmpIv, aes->reg, AES_BLOCK_SIZE);
03261             return wc_Pic32AesCrypt(
03262                 aes->key, aes->keylen, tmpIv, AES_BLOCK_SIZE,
03263                 out, in, AES_BLOCK_SIZE,
03264                 PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
03265         }
03266 
03267     #elif defined(HAVE_COLDFIRE_SEC)
03268         #error "Coldfire SEC doesn't currently support AES-CTR mode"
03269 
03270     #elif defined(FREESCALE_LTC)
03271         int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
03272         {
03273             uint32_t keySize;
03274             byte *iv, *enc_key;
03275             byte* tmp;
03276 
03277             if (aes == NULL || out == NULL || in == NULL) {
03278                 return BAD_FUNC_ARG;
03279             }
03280 
03281             /* consume any unused bytes left in aes->tmp */
03282             tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
03283             while (aes->left && sz) {
03284                 *(out++) = *(in++) ^ *(tmp++);
03285                 aes->left--;
03286                 sz--;
03287             }
03288 
03289             if (sz) {
03290                 iv      = (byte*)aes->reg;
03291                 enc_key = (byte*)aes->key;
03292 
03293                 wc_AesGetKeySize(aes, &keySize);
03294 
03295                 LTC_AES_CryptCtr(LTC_BASE, in, out, sz,
03296                     iv, enc_key, keySize, (byte*)aes->tmp,
03297                     (uint32_t*)&aes->left);
03298             }
03299 
03300             return 0;
03301         }
03302 
03303     #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
03304         /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
03305 
03306     #else
03307 
03308         /* Use software based AES counter */
03309         #define NEED_AES_CTR_SOFT
03310     #endif
03311 
03312     #ifdef NEED_AES_CTR_SOFT
03313         /* Increment AES counter */
03314         static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
03315         {
03316             /* in network byte order so start at end and work back */
03317             int i;
03318             for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
03319                 if (++inOutCtr[i])  /* we're done unless we overflow */
03320                     return;
03321             }
03322         }
03323 
03324         int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
03325         {
03326             byte* tmp;
03327 
03328             if (aes == NULL || out == NULL || in == NULL) {
03329                 return BAD_FUNC_ARG;
03330             }
03331 
03332             /* consume any unused bytes left in aes->tmp */
03333             tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
03334             while (aes->left && sz) {
03335                *(out++) = *(in++) ^ *(tmp++);
03336                aes->left--;
03337                sz--;
03338             }
03339 
03340             /* do as many block size ops as possible */
03341             while (sz >= AES_BLOCK_SIZE) {
03342             #ifdef XTRANSFORM_AESCTRBLOCK
03343                 XTRANSFORM_AESCTRBLOCK(aes, out, in);
03344             #else
03345                 wc_AesEncrypt(aes, (byte*)aes->reg, out);
03346                 xorbuf(out, in, AES_BLOCK_SIZE);
03347             #endif
03348                 IncrementAesCounter((byte*)aes->reg);
03349 
03350                 out += AES_BLOCK_SIZE;
03351                 in  += AES_BLOCK_SIZE;
03352                 sz  -= AES_BLOCK_SIZE;
03353                 aes->left = 0;
03354             }
03355 
03356             /* handle non block size remaining and store unused byte count in left */
03357             if (sz) {
03358                 wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
03359                 IncrementAesCounter((byte*)aes->reg);
03360 
03361                 aes->left = AES_BLOCK_SIZE;
03362                 tmp = (byte*)aes->tmp;
03363 
03364                 while (sz--) {
03365                     *(out++) = *(in++) ^ *(tmp++);
03366                     aes->left--;
03367                 }
03368             }
03369 
03370             return 0;
03371         }
03372 
03373     #endif /* NEED_AES_CTR_SOFT */
03374 
03375 #endif /* WOLFSSL_AES_COUNTER */
03376 #endif /* !WOLFSSL_ARMASM */
03377 
03378 
03379 /*
03380  * The IV for AES GCM and CCM, stored in struct Aes's member reg, is comprised
03381  * of two parts in order:
03382  *   1. The fixed field which may be 0 or 4 bytes long. In TLS, this is set
03383  *      to the implicit IV.
03384  *   2. The explicit IV is generated by wolfCrypt. It needs to be managed
03385  *      by wolfCrypt to ensure the IV is unique for each call to encrypt.
03386  * The IV may be a 96-bit random value, or the 32-bit fixed value and a
03387  * 64-bit set of 0 or random data. The final 32-bits of reg is used as a
03388  * block counter during the encryption.
03389  */
03390 
03391 #if (defined(HAVE_AESGCM) && !defined(WC_NO_RNG)) || defined(HAVE_AESCCM)
03392 static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz)
03393 {
03394     int i;
03395     for (i = ctrSz-1; i >= 0; i--) {
03396         if (++ctr[i])
03397             break;
03398     }
03399 }
03400 #endif /* HAVE_AESGCM || HAVE_AESCCM */
03401 
03402 
03403 #ifdef HAVE_AESGCM
03404 
03405 #if defined(HAVE_COLDFIRE_SEC)
03406     #error "Coldfire SEC doesn't currently support AES-GCM mode"
03407 
03408 #elif defined(WOLFSSL_NRF51_AES)
03409     #error "nRF51 doesn't currently support AES-GCM mode"
03410 
03411 #endif
03412 
03413 #ifdef WOLFSSL_ARMASM
03414     /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */
03415 #else /* software + AESNI implementation */
03416 
03417 #if !defined(FREESCALE_LTC_AES_GCM)
03418 static WC_INLINE void IncrementGcmCounter(byte* inOutCtr)
03419 {
03420     int i;
03421 
03422     /* in network byte order so start at end and work back */
03423     for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) {
03424         if (++inOutCtr[i])  /* we're done unless we overflow */
03425             return;
03426     }
03427 }
03428 #endif /* !FREESCALE_LTC_AES_GCM */
03429 
03430 #if defined(GCM_SMALL) || defined(GCM_TABLE)
03431 
03432 static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz)
03433 {
03434     /* Multiply the sz by 8 */
03435     word32 szHi = (sz >> (8*sizeof(sz) - 3));
03436     sz <<= 3;
03437 
03438     /* copy over the words of the sz into the destination buffer */
03439     buf[0] = (szHi >> 24) & 0xff;
03440     buf[1] = (szHi >> 16) & 0xff;
03441     buf[2] = (szHi >>  8) & 0xff;
03442     buf[3] = szHi & 0xff;
03443     buf[4] = (sz >> 24) & 0xff;
03444     buf[5] = (sz >> 16) & 0xff;
03445     buf[6] = (sz >>  8) & 0xff;
03446     buf[7] = sz & 0xff;
03447 }
03448 
03449 
03450 static WC_INLINE void RIGHTSHIFTX(byte* x)
03451 {
03452     int i;
03453     int carryOut = 0;
03454     int carryIn = 0;
03455     int borrow = x[15] & 0x01;
03456 
03457     for (i = 0; i < AES_BLOCK_SIZE; i++) {
03458         carryOut = x[i] & 0x01;
03459         x[i] = (x[i] >> 1) | (carryIn ? 0x80 : 0);
03460         carryIn = carryOut;
03461     }
03462     if (borrow) x[0] ^= 0xE1;
03463 }
03464 
03465 #endif /* defined(GCM_SMALL) || defined(GCM_TABLE) */
03466 
03467 
03468 #ifdef GCM_TABLE
03469 
03470 static void GenerateM0(Aes* aes)
03471 {
03472     int i, j;
03473     byte (*m)[AES_BLOCK_SIZE] = aes->M0;
03474 
03475     XMEMCPY(m[128], aes->H, AES_BLOCK_SIZE);
03476 
03477     for (i = 64; i > 0; i /= 2) {
03478         XMEMCPY(m[i], m[i*2], AES_BLOCK_SIZE);
03479         RIGHTSHIFTX(m[i]);
03480     }
03481 
03482     for (i = 2; i < 256; i *= 2) {
03483         for (j = 1; j < i; j++) {
03484             XMEMCPY(m[i+j], m[i], AES_BLOCK_SIZE);
03485             xorbuf(m[i+j], m[j], AES_BLOCK_SIZE);
03486         }
03487     }
03488 
03489     XMEMSET(m[0], 0, AES_BLOCK_SIZE);
03490 }
03491 
03492 #endif /* GCM_TABLE */
03493 
03494 
03495 int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
03496 {
03497     int  ret;
03498     byte iv[AES_BLOCK_SIZE];
03499 
03500     #ifdef WOLFSSL_IMX6_CAAM_BLOB
03501         byte   local[32];
03502         word32 localSz = 32;
03503 
03504         if (len == (16 + WC_CAAM_BLOB_SZ) ||
03505           len == (24 + WC_CAAM_BLOB_SZ) ||
03506           len == (32 + WC_CAAM_BLOB_SZ)) {
03507             if (wc_caamOpenBlob((byte*)key, len, local, &localSz) != 0) {
03508                  return BAD_FUNC_ARG;
03509             }
03510 
03511             /* set local values */
03512             key = local;
03513             len = localSz;
03514         }
03515     #endif
03516 
03517     if (!((len == 16) || (len == 24) || (len == 32)))
03518         return BAD_FUNC_ARG;
03519 
03520     XMEMSET(iv, 0, AES_BLOCK_SIZE);
03521     ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION);
03522 
03523     #ifdef WOLFSSL_AESNI
03524         /* AES-NI code generates its own H value. */
03525         if (haveAESNI)
03526             return ret;
03527     #endif /* WOLFSSL_AESNI */
03528 
03529 #if !defined(FREESCALE_LTC_AES_GCM)
03530     if (ret == 0) {
03531         wc_AesEncrypt(aes, iv, aes->H);
03532     #ifdef GCM_TABLE
03533         GenerateM0(aes);
03534     #endif /* GCM_TABLE */
03535     }
03536 #endif /* FREESCALE_LTC_AES_GCM */
03537 
03538 #if defined(WOLFSSL_XILINX_CRYPT)
03539     wc_AesGcmSetKey_ex(aes, key, len, XSECURE_CSU_AES_KEY_SRC_KUP);
03540 #endif
03541 
03542 #ifdef WOLFSSL_IMX6_CAAM_BLOB
03543     ForceZero(local, sizeof(local));
03544 #endif
03545 
03546     return ret;
03547 }
03548 
03549 
03550 #ifdef WOLFSSL_AESNI
03551 
03552 #if defined(USE_INTEL_SPEEDUP)
03553     #define HAVE_INTEL_AVX1
03554     #define HAVE_INTEL_AVX2
03555 #endif /* USE_INTEL_SPEEDUP */
03556 
03557 #ifdef _MSC_VER
03558     #define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF))
03559     #define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \
03560                              S((x),3), S((x),2), S((x),1), S((x),0), \
03561                              S((y),7), S((y),6), S((y),5), S((y),4), \
03562                              S((y),3), S((y),2), S((y),1), S((y),0) }
03563 #else
03564     #define M128_INIT(x,y) { (x), (y) }
03565 #endif
03566 
03567 static const __m128i MOD2_128 = M128_INIT(0x1,
03568                                            (long long int)0xc200000000000000UL);
03569 
03570 
03571 /* See Intel® Carry-Less Multiplication Instruction
03572  * and its Usage for Computing the GCM Mode White Paper
03573  * by Shay Gueron, Intel Mobility Group, Israel Development Center;
03574  * and Michael E. Kounavis, Intel Labs, Circuits and Systems Research */
03575 
03576 
03577 /* Figure 9. AES-GCM – Encrypt With Single Block Ghash at a Time */
03578 
03579 static const __m128i ONE   = M128_INIT(0x0, 0x1);
03580 #ifndef AES_GCM_AESNI_NO_UNROLL
03581 static const __m128i TWO   = M128_INIT(0x0, 0x2);
03582 static const __m128i THREE = M128_INIT(0x0, 0x3);
03583 static const __m128i FOUR  = M128_INIT(0x0, 0x4);
03584 static const __m128i FIVE  = M128_INIT(0x0, 0x5);
03585 static const __m128i SIX   = M128_INIT(0x0, 0x6);
03586 static const __m128i SEVEN = M128_INIT(0x0, 0x7);
03587 static const __m128i EIGHT = M128_INIT(0x0, 0x8);
03588 #endif
03589 static const __m128i BSWAP_EPI64 = M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f);
03590 static const __m128i BSWAP_MASK  = M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607);
03591 
03592 
03593 #ifndef _MSC_VER
03594 
03595 #define _VAR(a) "" #a ""
03596 #define VAR(a) _VAR(a)
03597 
03598 #define HR     %%xmm14
03599 #define XR     %%xmm15
03600 #define KR     %%ebx
03601 #define KR64   %%rbx
03602 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
03603 #define CTR1   128(%%rsp)
03604 #define TR     144(%%rsp)
03605 #define HTR    %%rsp
03606 #define STACK_OFFSET    160
03607 #else
03608 #define CTR1   (%%rsp)
03609 #define TR     16(%%rsp)
03610 #define STACK_OFFSET    32
03611 #endif
03612 
03613 #define AESENC()                      \
03614     "aesenc %%xmm12, %%xmm4\n\t"  \
03615     "aesenc %%xmm12, %%xmm5\n\t"  \
03616     "aesenc %%xmm12, %%xmm6\n\t"  \
03617     "aesenc %%xmm12, %%xmm7\n\t"  \
03618     "aesenc %%xmm12, %%xmm8\n\t"  \
03619     "aesenc %%xmm12, %%xmm9\n\t"  \
03620     "aesenc %%xmm12, %%xmm10\n\t" \
03621     "aesenc %%xmm12, %%xmm11\n\t"
03622 
03623 #define AESENC_SET(o)                        \
03624     "movdqa " #o "(%[KEY]), %%xmm12\n\t" \
03625     AESENC()
03626 
03627 #define AESENC_CTR()                        \
03628     "movdqu " VAR(CTR1) ", %%xmm4\n\t"  \
03629     "movdqa %[BSWAP_EPI64], %%xmm1\n\t" \
03630     "movdqu %%xmm4, %%xmm0\n\t"         \
03631     "pshufb %%xmm1, %%xmm4\n\t"         \
03632     "movdqa %%xmm0, %%xmm5\n\t"         \
03633     "paddd  %[ONE], %%xmm5\n\t"         \
03634     "pshufb %%xmm1, %%xmm5\n\t"         \
03635     "movdqa %%xmm0, %%xmm6\n\t"         \
03636     "paddd  %[TWO], %%xmm6\n\t"         \
03637     "pshufb %%xmm1, %%xmm6\n\t"         \
03638     "movdqa %%xmm0, %%xmm7\n\t"         \
03639     "paddd  %[THREE], %%xmm7\n\t"       \
03640     "pshufb %%xmm1, %%xmm7\n\t"         \
03641     "movdqa %%xmm0, %%xmm8\n\t"         \
03642     "paddd  %[FOUR], %%xmm8\n\t"        \
03643     "pshufb %%xmm1, %%xmm8\n\t"         \
03644     "movdqa %%xmm0, %%xmm9\n\t"         \
03645     "paddd  %[FIVE], %%xmm9\n\t"        \
03646     "pshufb %%xmm1, %%xmm9\n\t"         \
03647     "movdqa %%xmm0, %%xmm10\n\t"        \
03648     "paddd  %[SIX], %%xmm10\n\t"        \
03649     "pshufb %%xmm1, %%xmm10\n\t"        \
03650     "movdqa %%xmm0, %%xmm11\n\t"        \
03651     "paddd  %[SEVEN], %%xmm11\n\t"      \
03652     "pshufb %%xmm1, %%xmm11\n\t"        \
03653     "paddd  %[EIGHT], %%xmm0\n\t"
03654 
03655 #define AESENC_XOR()                       \
03656     "movdqa (%[KEY]), %%xmm12\n\t"     \
03657     "movdqu %%xmm0, " VAR(CTR1) "\n\t" \
03658     "pxor   %%xmm12, %%xmm4\n\t"       \
03659     "pxor   %%xmm12, %%xmm5\n\t"       \
03660     "pxor   %%xmm12, %%xmm6\n\t"       \
03661     "pxor   %%xmm12, %%xmm7\n\t"       \
03662     "pxor   %%xmm12, %%xmm8\n\t"       \
03663     "pxor   %%xmm12, %%xmm9\n\t"       \
03664     "pxor   %%xmm12, %%xmm10\n\t"      \
03665     "pxor   %%xmm12, %%xmm11\n\t"
03666 
03667 /* Encrypt and carry-less multiply for AVX1. */
03668 #define AESENC_PCLMUL_1(src, o1, o2, o3)            \
03669     "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \
03670     "movdqu " #o2 "(" #src "), %%xmm0\n\t"      \
03671     "aesenc " #o1 "(%[KEY]), %%xmm4\n\t"        \
03672     "pshufb %[BSWAP_MASK], %%xmm0\n\t"          \
03673     "pxor   %%xmm2, %%xmm0\n\t"                 \
03674     "pshufd $0x4e, %%xmm12, %%xmm1\n\t"         \
03675     "pshufd $0x4e, %%xmm0, %%xmm14\n\t"         \
03676     "pxor   %%xmm12, %%xmm1\n\t"                \
03677     "pxor   %%xmm0, %%xmm14\n\t"                \
03678     "movdqa %%xmm0, %%xmm3\n\t"                 \
03679     "pclmulqdq  $0x11, %%xmm12, %%xmm3\n\t"         \
03680     "aesenc " #o1 "(%[KEY]), %%xmm5\n\t"        \
03681     "aesenc " #o1 "(%[KEY]), %%xmm6\n\t"        \
03682     "movdqa %%xmm0, %%xmm2\n\t"                 \
03683     "pclmulqdq  $0x00, %%xmm12, %%xmm2\n\t"         \
03684     "aesenc " #o1 "(%[KEY]), %%xmm7\n\t"        \
03685     "aesenc " #o1 "(%[KEY]), %%xmm8\n\t"        \
03686     "pclmulqdq  $0x00, %%xmm14, %%xmm1\n\t"         \
03687     "aesenc " #o1 "(%[KEY]), %%xmm9\n\t"        \
03688     "aesenc " #o1 "(%[KEY]), %%xmm10\n\t"       \
03689     "aesenc " #o1 "(%[KEY]), %%xmm11\n\t"       \
03690     "pxor      %%xmm2, %%xmm1\n\t"                  \
03691     "pxor      %%xmm3, %%xmm1\n\t"                  \
03692 
03693 #define AESENC_PCLMUL_N(src, o1, o2, o3)            \
03694     "movdqu " #o3 "(" VAR(HTR) "), %%xmm12\n\t" \
03695     "movdqu " #o2 "(" #src" ), %%xmm0\n\t"      \
03696     "pshufd $0x4e, %%xmm12, %%xmm13\n\t"        \
03697     "pshufb %[BSWAP_MASK], %%xmm0\n\t"          \
03698     "aesenc " #o1 "(%[KEY]), %%xmm4\n\t"        \
03699     "pxor   %%xmm12, %%xmm13\n\t"               \
03700     "pshufd $0x4e, %%xmm0, %%xmm14\n\t"         \
03701     "pxor   %%xmm0, %%xmm14\n\t"                \
03702     "movdqa %%xmm0, %%xmm15\n\t"                \
03703     "pclmulqdq  $0x11, %%xmm12, %%xmm15\n\t"        \
03704     "aesenc " #o1 "(%[KEY]), %%xmm5\n\t"        \
03705     "aesenc " #o1 "(%[KEY]), %%xmm6\n\t"        \
03706     "pclmulqdq  $0x00, %%xmm0, %%xmm12\n\t"         \
03707     "aesenc " #o1 "(%[KEY]), %%xmm7\n\t"        \
03708     "aesenc " #o1 "(%[KEY]), %%xmm8\n\t"        \
03709     "pclmulqdq  $0x00, %%xmm14, %%xmm13\n\t"        \
03710     "aesenc " #o1 "(%[KEY]), %%xmm9\n\t"        \
03711     "aesenc " #o1 "(%[KEY]), %%xmm10\n\t"       \
03712     "aesenc " #o1 "(%[KEY]), %%xmm11\n\t"       \
03713     "pxor      %%xmm12, %%xmm1\n\t"                 \
03714     "pxor      %%xmm12, %%xmm2\n\t"                 \
03715     "pxor      %%xmm15, %%xmm1\n\t"                 \
03716     "pxor      %%xmm15, %%xmm3\n\t"                 \
03717     "pxor      %%xmm13, %%xmm1\n\t"                 \
03718 
03719 #define AESENC_PCLMUL_L(o)                   \
03720     "movdqa %%xmm1, %%xmm14\n\t"         \
03721     "psrldq $8, %%xmm1\n\t"              \
03722     "pslldq $8, %%xmm14\n\t"             \
03723     "aesenc " #o "(%[KEY]), %%xmm4\n\t"  \
03724     "pxor      %%xmm14, %%xmm2\n\t"          \
03725     "pxor      %%xmm1, %%xmm3\n\t"           \
03726     "movdqa %%xmm2, %%xmm12\n\t"         \
03727     "movdqa %%xmm2, %%xmm13\n\t"         \
03728     "movdqa %%xmm2, %%xmm14\n\t"         \
03729     "aesenc " #o "(%[KEY]), %%xmm5\n\t"  \
03730     "pslld  $31, %%xmm12\n\t"            \
03731     "pslld  $30, %%xmm13\n\t"            \
03732     "pslld  $25, %%xmm14\n\t"            \
03733     "aesenc " #o "(%[KEY]), %%xmm6\n\t"  \
03734     "pxor   %%xmm13, %%xmm12\n\t"        \
03735     "pxor   %%xmm14, %%xmm12\n\t"        \
03736     "aesenc " #o "(%[KEY]), %%xmm7\n\t"  \
03737     "movdqa %%xmm12, %%xmm13\n\t"        \
03738     "pslldq $12, %%xmm12\n\t"            \
03739     "psrldq $4, %%xmm13\n\t"             \
03740     "aesenc " #o "(%[KEY]), %%xmm8\n\t"  \
03741     "pxor   %%xmm12, %%xmm2\n\t"         \
03742     "movdqa %%xmm2, %%xmm14\n\t"         \
03743     "movdqa %%xmm2, %%xmm1\n\t"          \
03744     "movdqa %%xmm2, %%xmm0\n\t"          \
03745     "aesenc " #o "(%[KEY]), %%xmm9\n\t"  \
03746     "psrld  $1, %%xmm14\n\t"             \
03747     "psrld  $2, %%xmm1\n\t"              \
03748     "psrld  $7, %%xmm0\n\t"              \
03749     "aesenc " #o "(%[KEY]), %%xmm10\n\t" \
03750     "pxor   %%xmm1, %%xmm14\n\t"         \
03751     "pxor   %%xmm0, %%xmm14\n\t"         \
03752     "aesenc " #o "(%[KEY]), %%xmm11\n\t" \
03753     "pxor   %%xmm13, %%xmm14\n\t"        \
03754     "pxor   %%xmm14, %%xmm2\n\t"         \
03755     "pxor   %%xmm3, %%xmm2\n\t"          \
03756 
03757 /* Encrypt and carry-less multiply with last key. */
03758 #define AESENC_LAST(in, out)                \
03759     "aesenclast %%xmm12, %%xmm4\n\t"        \
03760     "aesenclast %%xmm12, %%xmm5\n\t"        \
03761     "movdqu    (" #in "),%%xmm0\n\t"    \
03762     "movdqu  16(" #in "),%%xmm1\n\t"    \
03763     "pxor   %%xmm0, %%xmm4\n\t"         \
03764     "pxor   %%xmm1, %%xmm5\n\t"         \
03765     "movdqu %%xmm4,    (" #out ")\n\t"  \
03766     "movdqu %%xmm5,  16(" #out ")\n\t"  \
03767     "aesenclast %%xmm12, %%xmm6\n\t"        \
03768     "aesenclast %%xmm12, %%xmm7\n\t"        \
03769     "movdqu  32(" #in "),%%xmm0\n\t"    \
03770     "movdqu  48(" #in "),%%xmm1\n\t"    \
03771     "pxor   %%xmm0, %%xmm6\n\t"         \
03772     "pxor   %%xmm1, %%xmm7\n\t"         \
03773     "movdqu %%xmm6,  32(" #out ")\n\t"  \
03774     "movdqu %%xmm7,  48(" #out ")\n\t"  \
03775     "aesenclast %%xmm12, %%xmm8\n\t"        \
03776     "aesenclast %%xmm12, %%xmm9\n\t"        \
03777     "movdqu  64(" #in "),%%xmm0\n\t"    \
03778     "movdqu  80(" #in "),%%xmm1\n\t"    \
03779     "pxor   %%xmm0, %%xmm8\n\t"         \
03780     "pxor   %%xmm1, %%xmm9\n\t"         \
03781     "movdqu %%xmm8,  64(" #out ")\n\t"  \
03782     "movdqu %%xmm9,  80(" #out ")\n\t"  \
03783     "aesenclast %%xmm12, %%xmm10\n\t"       \
03784     "aesenclast %%xmm12, %%xmm11\n\t"       \
03785     "movdqu  96(" #in "),%%xmm0\n\t"    \
03786     "movdqu 112(" #in "),%%xmm1\n\t"    \
03787     "pxor   %%xmm0, %%xmm10\n\t"        \
03788     "pxor   %%xmm1, %%xmm11\n\t"        \
03789     "movdqu %%xmm10,  96(" #out ")\n\t" \
03790     "movdqu %%xmm11, 112(" #out ")\n\t"
03791 
03792 #define _AESENC_AVX(r)                    \
03793     "aesenc 16(%[KEY]), " #r "\n\t"   \
03794     "aesenc 32(%[KEY]), " #r "\n\t"   \
03795     "aesenc 48(%[KEY]), " #r "\n\t"   \
03796     "aesenc 64(%[KEY]), " #r "\n\t"   \
03797     "aesenc 80(%[KEY]), " #r "\n\t"   \
03798     "aesenc 96(%[KEY]), " #r "\n\t"   \
03799     "aesenc 112(%[KEY]), " #r "\n\t"  \
03800     "aesenc 128(%[KEY]), " #r "\n\t"  \
03801     "aesenc 144(%[KEY]), " #r "\n\t"  \
03802     "cmpl   $11, %[nr]\n\t"           \
03803     "movdqa 160(%[KEY]), %%xmm5\n\t"  \
03804     "jl     %=f\n\t"                  \
03805     "aesenc %%xmm5, " #r "\n\t"       \
03806     "aesenc 176(%[KEY]), " #r "\n\t"  \
03807     "cmpl   $13, %[nr]\n\t"           \
03808     "movdqa 192(%[KEY]), %%xmm5\n\t"  \
03809     "jl     %=f\n\t"                  \
03810     "aesenc %%xmm5, " #r "\n\t"       \
03811     "aesenc 208(%[KEY]), " #r "\n\t"  \
03812     "movdqa 224(%[KEY]), %%xmm5\n\t"  \
03813     "%=:\n\t"                             \
03814     "aesenclast %%xmm5, " #r "\n\t"
03815 #define AESENC_AVX(r)                     \
03816         _AESENC_AVX(r)
03817 
03818 #define AESENC_BLOCK(in, out)               \
03819     "movdqu " VAR(CTR1) ", %%xmm4\n\t"  \
03820     "movdqu %%xmm4, %%xmm5\n\t"         \
03821     "pshufb %[BSWAP_EPI64], %%xmm4\n\t" \
03822     "paddd  %[ONE], %%xmm5\n\t"         \
03823     "pxor   (%[KEY]), %%xmm4\n\t"       \
03824     "movdqu %%xmm5, " VAR(CTR1) "\n\t"  \
03825     AESENC_AVX(%%xmm4)                      \
03826     "movdqu (" #in "), %%xmm5\n\t"      \
03827     "pxor   %%xmm5, %%xmm4\n\t"         \
03828     "movdqu %%xmm4, (" #out ")\n\t"     \
03829     "pshufb %[BSWAP_MASK], %%xmm4\n\t"  \
03830     "pxor   %%xmm4, " VAR(XR) "\n\t"
03831 
03832 #define _AESENC_GFMUL(in, out, H, X)            \
03833     "movdqu " VAR(CTR1) ", %%xmm4\n\t"      \
03834     "movdqu %%xmm4, %%xmm5\n\t"             \
03835     "pshufb %[BSWAP_EPI64], %%xmm4\n\t"     \
03836     "paddd  %[ONE], %%xmm5\n\t"             \
03837     "pxor   (%[KEY]), %%xmm4\n\t"           \
03838     "movdqu %%xmm5, " VAR(CTR1) "\n\t"      \
03839     "movdqa " #X ", %%xmm6\n\t"             \
03840     "pclmulqdq  $0x10, " #H ", %%xmm6\n\t"      \
03841     "aesenc 16(%[KEY]), %%xmm4\n\t"         \
03842     "aesenc 32(%[KEY]), %%xmm4\n\t"         \
03843     "movdqa " #X ", %%xmm7\n\t"             \
03844     "pclmulqdq  $0x01, " #H ", %%xmm7\n\t"      \
03845     "aesenc 48(%[KEY]), %%xmm4\n\t"         \
03846     "aesenc 64(%[KEY]), %%xmm4\n\t"         \
03847     "movdqa " #X ", %%xmm8\n\t"             \
03848     "pclmulqdq  $0x00, " #H ", %%xmm8\n\t"      \
03849     "aesenc 80(%[KEY]), %%xmm4\n\t"         \
03850     "movdqa " #X ", %%xmm1\n\t"             \
03851     "pclmulqdq  $0x11, " #H ", %%xmm1\n\t"      \
03852     "aesenc 96(%[KEY]), %%xmm4\n\t"         \
03853     "pxor   %%xmm7, %%xmm6\n\t"             \
03854     "movdqa %%xmm6, %%xmm2\n\t"             \
03855     "psrldq $8, %%xmm6\n\t"                 \
03856     "pslldq $8, %%xmm2\n\t"                 \
03857     "aesenc 112(%[KEY]), %%xmm4\n\t"        \
03858     "movdqa %%xmm1, %%xmm3\n\t"             \
03859     "pxor   %%xmm8, %%xmm2\n\t"             \
03860     "pxor   %%xmm6, %%xmm3\n\t"             \
03861     "movdqa %[MOD2_128], %%xmm0\n\t"        \
03862     "movdqa %%xmm2, %%xmm7\n\t"             \
03863     "pclmulqdq  $0x10, %%xmm0, %%xmm7\n\t"      \
03864     "aesenc 128(%[KEY]), %%xmm4\n\t"        \
03865     "pshufd $0x4e, %%xmm2, %%xmm6\n\t"      \
03866     "pxor   %%xmm7, %%xmm6\n\t"             \
03867     "movdqa %%xmm6, %%xmm7\n\t"             \
03868     "pclmulqdq  $0x10, %%xmm0, %%xmm7\n\t"      \
03869     "aesenc 144(%[KEY]), %%xmm4\n\t"        \
03870     "pshufd $0x4e, %%xmm6, " VAR(XR) "\n\t" \
03871     "pxor   %%xmm7, " VAR(XR) "\n\t"        \
03872     "pxor   %%xmm3, " VAR(XR) "\n\t"        \
03873     "cmpl   $11, %[nr]\n\t"                 \
03874     "movdqu 160(%[KEY]), %%xmm5\n\t"        \
03875     "jl     %=f\n\t"                        \
03876     "aesenc %%xmm5, %%xmm4\n\t"             \
03877     "aesenc 176(%[KEY]), %%xmm4\n\t"        \
03878     "cmpl   $13, %[nr]\n\t"                 \
03879     "movdqu 192(%[KEY]), %%xmm5\n\t"        \
03880     "jl     %=f\n\t"                        \
03881     "aesenc %%xmm5, %%xmm4\n\t"             \
03882     "aesenc 208(%[KEY]), %%xmm4\n\t"        \
03883     "movdqa 224(%[KEY]), %%xmm5\n\t"        \
03884     "%=:\n\t"                                   \
03885     "aesenclast %%xmm5, %%xmm4\n\t"             \
03886     "movdqu (" #in "), %%xmm5\n\t"          \
03887     "pxor   %%xmm5, %%xmm4\n\t"             \
03888     "movdqu %%xmm4, (" #out ")\n\t"
03889 #define AESENC_GFMUL(in, out, H, X)             \
03890        _AESENC_GFMUL(in, out, H, X)
03891 
03892 #define _GHASH_GFMUL_AVX(r, r2, a, b)      \
03893     "pshufd $0x4e, "#a", %%xmm1\n\t"   \
03894     "pshufd $0x4e, "#b", %%xmm2\n\t"   \
03895     "movdqa "#b", %%xmm3\n\t"          \
03896     "movdqa "#b", %%xmm0\n\t"          \
03897     "pclmulqdq  $0x11, "#a", %%xmm3\n\t"   \
03898     "pclmulqdq  $0x00, "#a", %%xmm0\n\t"   \
03899     "pxor   "#a", %%xmm1\n\t"          \
03900     "pxor   "#b", %%xmm2\n\t"          \
03901     "pclmulqdq  $0x00, %%xmm2, %%xmm1\n\t" \
03902     "pxor   %%xmm0, %%xmm1\n\t"        \
03903     "pxor   %%xmm3, %%xmm1\n\t"        \
03904     "movdqa %%xmm1, %%xmm2\n\t"        \
03905     "movdqa %%xmm0, "#r2"\n\t"         \
03906     "movdqa %%xmm3, " #r "\n\t"        \
03907     "pslldq $8, %%xmm2\n\t"            \
03908     "psrldq $8, %%xmm1\n\t"            \
03909     "pxor   %%xmm2, "#r2"\n\t"         \
03910     "pxor   %%xmm1, " #r "\n\t"
03911 #define GHASH_GFMUL_AVX(r, r2, a, b)       \
03912        _GHASH_GFMUL_AVX(r, r2, a, b)
03913 
03914 #define _GHASH_GFMUL_XOR_AVX(r, r2, a, b)  \
03915     "pshufd $0x4e, "#a", %%xmm1\n\t"   \
03916     "pshufd $0x4e, "#b", %%xmm2\n\t"   \
03917     "movdqa "#b", %%xmm3\n\t"          \
03918     "movdqa "#b", %%xmm0\n\t"          \
03919     "pclmulqdq  $0x11, "#a", %%xmm3\n\t"   \
03920     "pclmulqdq  $0x00, "#a", %%xmm0\n\t"   \
03921     "pxor   "#a", %%xmm1\n\t"          \
03922     "pxor   "#b", %%xmm2\n\t"          \
03923     "pclmulqdq  $0x00, %%xmm2, %%xmm1\n\t" \
03924     "pxor   %%xmm0, %%xmm1\n\t"        \
03925     "pxor   %%xmm3, %%xmm1\n\t"        \
03926     "movdqa %%xmm1, %%xmm2\n\t"        \
03927     "pxor   %%xmm0, "#r2"\n\t"         \
03928     "pxor   %%xmm3, " #r "\n\t"        \
03929     "pslldq $8, %%xmm2\n\t"            \
03930     "psrldq $8, %%xmm1\n\t"            \
03931     "pxor   %%xmm2, "#r2"\n\t"         \
03932     "pxor   %%xmm1, " #r "\n\t"
03933 #define GHASH_GFMUL_XOR_AVX(r, r2, a, b)   \
03934        _GHASH_GFMUL_XOR_AVX(r, r2, a, b)
03935 
03936 #define GHASH_MID_AVX(r, r2)        \
03937     "movdqa "#r2", %%xmm0\n\t"  \
03938     "movdqa " #r ", %%xmm1\n\t" \
03939     "psrld  $31, %%xmm0\n\t"    \
03940     "psrld  $31, %%xmm1\n\t"    \
03941     "pslld  $1, "#r2"\n\t"      \
03942     "pslld  $1, " #r "\n\t"     \
03943     "movdqa %%xmm0, %%xmm2\n\t" \
03944     "pslldq $4, %%xmm0\n\t"     \
03945     "psrldq $12, %%xmm2\n\t"    \
03946     "pslldq $4, %%xmm1\n\t"     \
03947     "por    %%xmm2, " #r "\n\t" \
03948     "por    %%xmm0, "#r2"\n\t"  \
03949     "por    %%xmm1, " #r "\n\t"
03950 
03951 #define _GHASH_GFMUL_RED_AVX(r, a, b)      \
03952     "pshufd $0x4e, "#a", %%xmm5\n\t"   \
03953     "pshufd $0x4e, "#b", %%xmm6\n\t"   \
03954     "movdqa "#b", %%xmm7\n\t"          \
03955     "movdqa "#b", %%xmm4\n\t"          \
03956     "pclmulqdq  $0x11, "#a", %%xmm7\n\t"   \
03957     "pclmulqdq  $0x00, "#a", %%xmm4\n\t"   \
03958     "pxor   "#a", %%xmm5\n\t"          \
03959     "pxor   "#b", %%xmm6\n\t"          \
03960     "pclmulqdq  $0x00, %%xmm6, %%xmm5\n\t" \
03961     "pxor   %%xmm4, %%xmm5\n\t"        \
03962     "pxor   %%xmm7, %%xmm5\n\t"        \
03963     "movdqa %%xmm5, %%xmm6\n\t"        \
03964     "movdqa %%xmm7, " #r "\n\t"        \
03965     "pslldq $8, %%xmm6\n\t"            \
03966     "psrldq $8, %%xmm5\n\t"            \
03967     "pxor   %%xmm6, %%xmm4\n\t"        \
03968     "pxor   %%xmm5, " #r "\n\t"        \
03969     "movdqa %%xmm4, %%xmm8\n\t"        \
03970     "movdqa %%xmm4, %%xmm9\n\t"        \
03971     "movdqa %%xmm4, %%xmm10\n\t"       \
03972     "pslld  $31, %%xmm8\n\t"           \
03973     "pslld  $30, %%xmm9\n\t"           \
03974     "pslld  $25, %%xmm10\n\t"          \
03975     "pxor   %%xmm9, %%xmm8\n\t"        \
03976     "pxor   %%xmm10, %%xmm8\n\t"       \
03977     "movdqa %%xmm8, %%xmm9\n\t"        \
03978     "psrldq $4, %%xmm9\n\t"            \
03979     "pslldq $12, %%xmm8\n\t"           \
03980     "pxor   %%xmm8, %%xmm4\n\t"        \
03981     "movdqa %%xmm4, %%xmm10\n\t"       \
03982     "movdqa %%xmm4, %%xmm6\n\t"        \
03983     "movdqa %%xmm4, %%xmm5\n\t"        \
03984     "psrld  $1, %%xmm10\n\t"           \
03985     "psrld  $2, %%xmm6\n\t"            \
03986     "psrld  $7, %%xmm5\n\t"            \
03987     "pxor   %%xmm6, %%xmm10\n\t"       \
03988     "pxor   %%xmm5, %%xmm10\n\t"       \
03989     "pxor   %%xmm9, %%xmm10\n\t"       \
03990     "pxor   %%xmm4, %%xmm10\n\t"       \
03991     "pxor   %%xmm10, " #r "\n\t"
03992 #define GHASH_GFMUL_RED_AVX(r, a, b)       \
03993        _GHASH_GFMUL_RED_AVX(r, a, b)
03994 
03995 #define GHASH_RED_AVX(r, r2)           \
03996     "movdqa "#r2", %%xmm0\n\t"     \
03997     "movdqa "#r2", %%xmm1\n\t"     \
03998     "movdqa "#r2", %%xmm2\n\t"     \
03999     "pslld  $31, %%xmm0\n\t"       \
04000     "pslld  $30, %%xmm1\n\t"       \
04001     "pslld  $25, %%xmm2\n\t"       \
04002     "pxor   %%xmm1, %%xmm0\n\t"    \
04003     "pxor   %%xmm2, %%xmm0\n\t"    \
04004     "movdqa %%xmm0, %%xmm1\n\t"    \
04005     "psrldq $4, %%xmm1\n\t"        \
04006     "pslldq $12, %%xmm0\n\t"       \
04007     "pxor   %%xmm0, "#r2"\n\t"     \
04008     "movdqa "#r2", %%xmm2\n\t"     \
04009     "movdqa "#r2", %%xmm3\n\t"     \
04010     "movdqa "#r2", %%xmm0\n\t"     \
04011     "psrld  $1, %%xmm2\n\t"        \
04012     "psrld  $2, %%xmm3\n\t"        \
04013     "psrld  $7, %%xmm0\n\t"        \
04014     "pxor   %%xmm3, %%xmm2\n\t"    \
04015     "pxor   %%xmm0, %%xmm2\n\t"    \
04016     "pxor   %%xmm1, %%xmm2\n\t"    \
04017     "pxor   "#r2", %%xmm2\n\t"     \
04018     "pxor   %%xmm2, " #r "\n\t"
04019 
04020 #define GHASH_GFMUL_RED_XOR_AVX(r, r2, a, b) \
04021     GHASH_GFMUL_XOR_AVX(r, r2, a, b)         \
04022     GHASH_RED_AVX(r, r2)
04023 
04024 #define GHASH_FULL_AVX(r, r2, a, b) \
04025     GHASH_GFMUL_AVX(r, r2, a, b)    \
04026     GHASH_MID_AVX(r, r2)            \
04027     GHASH_RED_AVX(r, r2)
04028 
04029 #define CALC_IV_12() \
04030     "# Calculate values when IV is 12 bytes\n\t"      \
04031     "# Set counter based on IV\n\t"                   \
04032     "movl   $0x01000000, %%ecx\n\t"               \
04033     "pinsrq $0, 0(%%rax), %%xmm13\n\t"            \
04034     "pinsrd $2, 8(%%rax), %%xmm13\n\t"            \
04035     "pinsrd $3, %%ecx, %%xmm13\n\t"               \
04036     "# H = Encrypt X(=0) and T = Encrypt counter\n\t" \
04037     "movdqu %%xmm13, %%xmm1\n\t"                  \
04038     "movdqa   0(%[KEY]), " VAR(HR) "\n\t"         \
04039     "pxor   " VAR(HR) ", %%xmm1\n\t"              \
04040     "movdqa  16(%[KEY]), %%xmm12\n\t"             \
04041     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04042     "aesenc %%xmm12, %%xmm1\n\t"                  \
04043     "movdqa  32(%[KEY]), %%xmm12\n\t"             \
04044     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04045     "aesenc %%xmm12, %%xmm1\n\t"                  \
04046     "movdqa  48(%[KEY]), %%xmm12\n\t"             \
04047     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04048     "aesenc %%xmm12, %%xmm1\n\t"                  \
04049     "movdqa  64(%[KEY]), %%xmm12\n\t"             \
04050     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04051     "aesenc %%xmm12, %%xmm1\n\t"                  \
04052     "movdqa  80(%[KEY]), %%xmm12\n\t"             \
04053     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04054     "aesenc %%xmm12, %%xmm1\n\t"                  \
04055     "movdqa  96(%[KEY]), %%xmm12\n\t"             \
04056     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04057     "aesenc %%xmm12, %%xmm1\n\t"                  \
04058     "movdqa 112(%[KEY]), %%xmm12\n\t"             \
04059     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04060     "aesenc %%xmm12, %%xmm1\n\t"                  \
04061     "movdqa 128(%[KEY]), %%xmm12\n\t"             \
04062     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04063     "aesenc %%xmm12, %%xmm1\n\t"                  \
04064     "movdqa 144(%[KEY]), %%xmm12\n\t"             \
04065     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04066     "aesenc %%xmm12, %%xmm1\n\t"                  \
04067     "cmpl   $11, %[nr]\n\t"                       \
04068     "movdqa 160(%[KEY]), %%xmm12\n\t"             \
04069     "jl 31f\n\t"                                      \
04070     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04071     "aesenc %%xmm12, %%xmm1\n\t"                  \
04072     "movdqa 176(%[KEY]), %%xmm12\n\t"             \
04073     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04074     "aesenc %%xmm12, %%xmm1\n\t"                  \
04075     "cmpl   $13, %[nr]\n\t"                       \
04076     "movdqa 192(%[KEY]), %%xmm12\n\t"             \
04077     "jl 31f\n\t"                                      \
04078     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04079     "aesenc %%xmm12, %%xmm1\n\t"                  \
04080     "movdqu 208(%[KEY]), %%xmm12\n\t"             \
04081     "aesenc %%xmm12, " VAR(HR) "\n\t"             \
04082     "aesenc %%xmm12, %%xmm1\n\t"                  \
04083     "movdqu 224(%[KEY]), %%xmm12\n\t"             \
04084     "31:\n\t"                                         \
04085     "aesenclast %%xmm12, " VAR(HR) "\n\t"             \
04086     "aesenclast %%xmm12, %%xmm1\n\t"                  \
04087     "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t"       \
04088     "movdqu %%xmm1, " VAR(TR) "\n\t"              \
04089     "jmp    39f\n\t"
04090 
04091 #define CALC_IV()                                    \
04092     "# Calculate values when IV is not 12 bytes\n\t" \
04093     "# H = Encrypt X(=0)\n\t"                        \
04094     "movdqa 0(%[KEY]), " VAR(HR) "\n\t"          \
04095     AESENC_AVX(HR)                                   \
04096     "pshufb %[BSWAP_MASK], " VAR(HR) "\n\t"      \
04097     "# Calc counter\n\t"                             \
04098     "# Initialization vector\n\t"                    \
04099     "cmpl   $0, %%edx\n\t"                       \
04100     "movq   $0, %%rcx\n\t"                       \
04101     "je 45f\n\t"                                     \
04102     "cmpl   $16, %%edx\n\t"                      \
04103     "jl 44f\n\t"                                     \
04104     "andl   $0xfffffff0, %%edx\n\t"              \
04105     "\n"                                             \
04106     "43:\n\t"                                        \
04107     "movdqu (%%rax,%%rcx,1), %%xmm4\n\t"         \
04108     "pshufb %[BSWAP_MASK], %%xmm4\n\t"           \
04109     "pxor   %%xmm4, %%xmm13\n\t"                 \
04110     GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR)    \
04111     "addl   $16, %%ecx\n\t"                      \
04112     "cmpl   %%edx, %%ecx\n\t"                    \
04113     "jl 43b\n\t"                                     \
04114     "movl   %[ibytes], %%edx\n\t"                \
04115     "cmpl   %%edx, %%ecx\n\t"                    \
04116     "je 45f\n\t"                                     \
04117     "\n"                                             \
04118     "44:\n\t"                                        \
04119     "subq   $16, %%rsp\n\t"                      \
04120     "pxor   %%xmm4, %%xmm4\n\t"                  \
04121     "xorl   %%ebx, %%ebx\n\t"                    \
04122     "movdqu %%xmm4, (%%rsp)\n\t"                 \
04123     "42:\n\t"                                        \
04124     "movzbl (%%rax,%%rcx,1), %%r13d\n\t"         \
04125     "movb   %%r13b, (%%rsp,%%rbx,1)\n\t"         \
04126     "incl   %%ecx\n\t"                           \
04127     "incl   %%ebx\n\t"                           \
04128     "cmpl   %%edx, %%ecx\n\t"                    \
04129     "jl 42b\n\t"                                     \
04130     "movdqu (%%rsp), %%xmm4\n\t"                 \
04131     "addq   $16, %%rsp\n\t"                      \
04132     "pshufb %[BSWAP_MASK], %%xmm4\n\t"           \
04133     "pxor   %%xmm4, %%xmm13\n\t"                 \
04134     GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR)    \
04135     "\n"                                             \
04136     "45:\n\t"                                        \
04137     "# T = Encrypt counter\n\t"                      \
04138     "pxor   %%xmm0, %%xmm0\n\t"                  \
04139     "shll   $3, %%edx\n\t"                       \
04140     "pinsrq $0, %%rdx, %%xmm0\n\t"               \
04141     "pxor   %%xmm0, %%xmm13\n\t"                 \
04142     GHASH_FULL_AVX(%%xmm13, %%xmm12, %%xmm13, HR)    \
04143     "pshufb %[BSWAP_MASK], %%xmm13\n\t"          \
04144     "#   Encrypt counter\n\t"                        \
04145     "movdqa 0(%[KEY]), %%xmm4\n\t"               \
04146     "pxor   %%xmm13, %%xmm4\n\t"                 \
04147     AESENC_AVX(%%xmm4)                               \
04148     "movdqu %%xmm4, " VAR(TR) "\n\t"
04149 
04150 #define CALC_AAD()                           \
04151     "# Additional authentication data\n\t"   \
04152     "movl   %[abytes], %%edx\n\t"        \
04153     "cmpl   $0, %%edx\n\t"               \
04154     "je     25f\n\t"                     \
04155     "movq   %[addt], %%rax\n\t"          \
04156     "xorl   %%ecx, %%ecx\n\t"            \
04157     "cmpl   $16, %%edx\n\t"              \
04158     "jl     24f\n\t"                     \
04159     "andl   $0xfffffff0, %%edx\n\t"      \
04160     "\n"                                     \
04161     "23:\n\t"                                \
04162     "movdqu (%%rax,%%rcx,1), %%xmm4\n\t" \
04163     "pshufb %[BSWAP_MASK], %%xmm4\n\t"   \
04164     "pxor   %%xmm4, " VAR(XR) "\n\t"     \
04165     GHASH_FULL_AVX(XR, %%xmm12, XR, HR)      \
04166     "addl   $16, %%ecx\n\t"              \
04167     "cmpl   %%edx, %%ecx\n\t"            \
04168     "jl     23b\n\t"                     \
04169     "movl   %[abytes], %%edx\n\t"        \
04170     "cmpl   %%edx, %%ecx\n\t"            \
04171     "je     25f\n\t"                     \
04172     "\n"                                     \
04173     "24:\n\t"                                \
04174     "subq   $16, %%rsp\n\t"              \
04175     "pxor   %%xmm4, %%xmm4\n\t"          \
04176     "xorl   %%ebx, %%ebx\n\t"            \
04177     "movdqu %%xmm4, (%%rsp)\n\t"         \
04178     "22:\n\t"                                \
04179     "movzbl (%%rax,%%rcx,1), %%r13d\n\t" \
04180     "movb   %%r13b, (%%rsp,%%rbx,1)\n\t" \
04181     "incl   %%ecx\n\t"                   \
04182     "incl   %%ebx\n\t"                   \
04183     "cmpl   %%edx, %%ecx\n\t"            \
04184     "jl     22b\n\t"                     \
04185     "movdqu (%%rsp), %%xmm4\n\t"         \
04186     "addq   $16, %%rsp\n\t"              \
04187     "pshufb %[BSWAP_MASK], %%xmm4\n\t"   \
04188     "pxor   %%xmm4, " VAR(XR) "\n\t"     \
04189     GHASH_FULL_AVX(XR, %%xmm12, XR, HR)      \
04190     "\n"                                     \
04191     "25:\n\t"
04192 
04193 #define CALC_HT_8_AVX()                            \
04194     "movdqa " VAR(XR) ", %%xmm2\n\t"           \
04195     "# H ^ 1\n\t"                                  \
04196     "movdqu " VAR(HR) ", 0(" VAR(HTR) ")\n\t"  \
04197     "# H ^ 2\n\t"                                  \
04198     GHASH_GFMUL_RED_AVX(%%xmm0, HR, HR)            \
04199     "movdqu %%xmm0 ,  16(" VAR(HTR) ")\n\t"    \
04200     "# H ^ 3\n\t"                                  \
04201     GHASH_GFMUL_RED_AVX(%%xmm1, HR, %%xmm0)        \
04202     "movdqu %%xmm1 ,  32(" VAR(HTR) ")\n\t"    \
04203     "# H ^ 4\n\t"                                  \
04204     GHASH_GFMUL_RED_AVX(%%xmm3, %%xmm0, %%xmm0)    \
04205     "movdqu %%xmm3 ,  48(" VAR(HTR) ")\n\t"    \
04206     "# H ^ 5\n\t"                                  \
04207     GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm0, %%xmm1)   \
04208     "movdqu %%xmm12,  64(" VAR(HTR) ")\n\t"    \
04209     "# H ^ 6\n\t"                                  \
04210     GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm1)   \
04211     "movdqu %%xmm12,  80(" VAR(HTR) ")\n\t"    \
04212     "# H ^ 7\n\t"                                  \
04213     GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm1, %%xmm3)   \
04214     "movdqu %%xmm12,  96(" VAR(HTR) ")\n\t"    \
04215     "# H ^ 8\n\t"                                  \
04216     GHASH_GFMUL_RED_AVX(%%xmm12, %%xmm3, %%xmm3)   \
04217     "movdqu %%xmm12, 112(" VAR(HTR) ")\n\t"
04218 
04219 #define AESENC_128_GHASH_AVX(src, o)                 \
04220     "leaq   (%[in]," VAR(KR64) ",1), %%rcx\n\t"  \
04221     "leaq   (%[out]," VAR(KR64) ",1), %%rdx\n\t" \
04222     /* src is either %%rcx or %%rdx */               \
04223     AESENC_CTR()                                     \
04224     AESENC_XOR()                                     \
04225     AESENC_PCLMUL_1(src,  16, o-128, 112)            \
04226     AESENC_PCLMUL_N(src,  32, o-112,  96)            \
04227     AESENC_PCLMUL_N(src,  48, o -96,  80)            \
04228     AESENC_PCLMUL_N(src,  64, o -80,  64)            \
04229     AESENC_PCLMUL_N(src,  80, o -64,  48)            \
04230     AESENC_PCLMUL_N(src,  96, o -48,  32)            \
04231     AESENC_PCLMUL_N(src, 112, o -32,  16)            \
04232     AESENC_PCLMUL_N(src, 128, o -16,   0)            \
04233     AESENC_PCLMUL_L(144)                             \
04234     "cmpl   $11, %[nr]\n\t"                      \
04235     "movdqa 160(%[KEY]), %%xmm12\n\t"            \
04236     "jl     4f\n\t"                              \
04237     AESENC()                                         \
04238     AESENC_SET(176)                                  \
04239     "cmpl   $13, %[nr]\n\t"                      \
04240     "movdqa 192(%[KEY]), %%xmm12\n\t"            \
04241     "jl     4f\n\t"                              \
04242     AESENC()                                         \
04243     AESENC_SET(208)                                  \
04244     "movdqa 224(%[KEY]), %%xmm12\n\t"            \
04245     "\n"                                             \
04246 "4:\n\t"                                             \
04247     AESENC_LAST(%%rcx, %%rdx)
04248 
04249 #define AESENC_LAST15_ENC_AVX()                       \
04250     "movl   %[nbytes], %%ecx\n\t"                 \
04251     "movl   %%ecx, %%edx\n\t"                     \
04252     "andl   $0x0f, %%ecx\n\t"                     \
04253     "jz     55f\n\t"                              \
04254     "movdqu " VAR(CTR1) ", %%xmm13\n\t"           \
04255     "pshufb %[BSWAP_EPI64], %%xmm13\n\t"          \
04256     "pxor   0(%[KEY]), %%xmm13\n\t"               \
04257     AESENC_AVX(%%xmm13)                               \
04258     "subq   $16, %%rsp\n\t"                       \
04259     "xorl   %%ecx, %%ecx\n\t"                     \
04260     "movdqu %%xmm13, (%%rsp)\n\t"                 \
04261     "\n"                                              \
04262     "51:\n\t"                                         \
04263     "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t"  \
04264     "xorb   (%%rsp,%%rcx,1), %%r13b\n\t"          \
04265     "movb   %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \
04266     "movb   %%r13b, (%%rsp,%%rcx,1)\n\t"          \
04267     "incl   " VAR(KR) "\n\t"                      \
04268     "incl   %%ecx\n\t"                            \
04269     "cmpl   %%edx, " VAR(KR) "\n\t"               \
04270     "jl     51b\n\t"                              \
04271     "xorq   %%r13, %%r13\n\t"                     \
04272     "cmpl   $16, %%ecx\n\t"                       \
04273     "je     53f\n\t"                              \
04274     "\n"                                              \
04275     "52:\n\t"                                         \
04276     "movb   %%r13b, (%%rsp,%%rcx,1)\n\t"          \
04277     "incl   %%ecx\n\t"                            \
04278     "cmpl   $16, %%ecx\n\t"                       \
04279     "jl     52b\n\t"                              \
04280     "53:\n\t"                                         \
04281     "movdqu (%%rsp), %%xmm13\n\t"                 \
04282     "addq   $16, %%rsp\n\t"                       \
04283     "pshufb %[BSWAP_MASK], %%xmm13\n\t"           \
04284     "pxor   %%xmm13, " VAR(XR) "\n\t"             \
04285     GHASH_GFMUL_RED_AVX(XR, HR, XR)                   \
04286 
04287 #define AESENC_LAST15_DEC_AVX()                       \
04288     "movl   %[nbytes], %%ecx\n\t"                 \
04289     "movl   %%ecx, %%edx\n\t"                     \
04290     "andl   $0x0f, %%ecx\n\t"                     \
04291     "jz     55f\n\t"                              \
04292     "movdqu " VAR(CTR1) ", %%xmm13\n\t"           \
04293     "pshufb %[BSWAP_EPI64], %%xmm13\n\t"          \
04294     "pxor   0(%[KEY]), %%xmm13\n\t"               \
04295     AESENC_AVX(%%xmm13)                               \
04296     "subq   $32, %%rsp\n\t"                       \
04297     "xorl   %%ecx, %%ecx\n\t"                     \
04298     "movdqu %%xmm13, (%%rsp)\n\t"                 \
04299     "pxor   %%xmm0, %%xmm0\n\t"                   \
04300     "movdqu %%xmm0, 16(%%rsp)\n\t"                \
04301     "\n"                                              \
04302     "51:\n\t"                                         \
04303     "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t"  \
04304     "movb   %%r13b, 16(%%rsp,%%rcx,1)\n\t"        \
04305     "xorb   (%%rsp,%%rcx,1), %%r13b\n\t"          \
04306     "movb   %%r13b, (%[out]," VAR(KR64) ",1)\n\t" \
04307     "incl   " VAR(KR) "\n\t"                      \
04308     "incl   %%ecx\n\t"                            \
04309     "cmpl   %%edx, " VAR(KR) "\n\t"               \
04310     "jl     51b\n\t"                              \
04311     "53:\n\t"                                         \
04312     "movdqu 16(%%rsp), %%xmm13\n\t"               \
04313     "addq   $32, %%rsp\n\t"                       \
04314     "pshufb %[BSWAP_MASK], %%xmm13\n\t"           \
04315     "pxor   %%xmm13, " VAR(XR) "\n\t"             \
04316     GHASH_GFMUL_RED_AVX(XR, HR, XR)                   \
04317 
04318 #define CALC_TAG()                              \
04319     "movl   %[nbytes], %%edx\n\t"           \
04320     "movl   %[abytes], %%ecx\n\t"           \
04321     "shlq   $3, %%rdx\n\t"                  \
04322     "shlq   $3, %%rcx\n\t"                  \
04323     "pinsrq $0, %%rdx, %%xmm0\n\t"          \
04324     "pinsrq $1, %%rcx, %%xmm0\n\t"          \
04325     "pxor   %%xmm0, " VAR(XR) "\n\t"        \
04326     GHASH_GFMUL_RED_AVX(XR, HR, XR)             \
04327     "pshufb %[BSWAP_MASK], " VAR(XR) "\n\t" \
04328     "movdqu " VAR(TR) ", %%xmm0\n\t"        \
04329     "pxor   " VAR(XR) ", %%xmm0\n\t"        \
04330 
04331 #define STORE_TAG()                           \
04332     "cmpl   $16, %[tbytes]\n\t"           \
04333     "je     71f\n\t"                      \
04334     "xorq   %%rcx, %%rcx\n\t"             \
04335     "movdqu %%xmm0, (%%rsp)\n\t"          \
04336     "73:\n\t"                                 \
04337     "movzbl (%%rsp,%%rcx,1), %%r13d\n\t"  \
04338     "movb   %%r13b, (%[tag],%%rcx,1)\n\t" \
04339     "incl   %%ecx\n\t"                    \
04340     "cmpl   %[tbytes], %%ecx\n\t"         \
04341     "jne    73b\n\t"                      \
04342     "jmp    72f\n\t"                      \
04343     "\n"                                      \
04344     "71:\n\t"                                 \
04345     "movdqu %%xmm0, (%[tag])\n\t"         \
04346     "\n"                                      \
04347     "72:\n\t"
04348 
04349 #define CMP_TAG()                                          \
04350     "cmpl   $16, %[tbytes]\n\t"                        \
04351     "je     71f\n\t"                                   \
04352     "subq   $16, %%rsp\n\t"                            \
04353     "xorq   %%rcx, %%rcx\n\t"                          \
04354     "xorq   %%rax, %%rax\n\t"                          \
04355     "movdqu %%xmm0, (%%rsp)\n\t"                       \
04356     "\n"                                                   \
04357     "73:\n\t"                                              \
04358     "movzbl (%%rsp,%%rcx,1), %%r13d\n\t"               \
04359     "xorb   (%[tag],%%rcx,1), %%r13b\n\t"              \
04360     "orb    %%r13b, %%al\n\t"                          \
04361     "incl   %%ecx\n\t"                                 \
04362     "cmpl   %[tbytes], %%ecx\n\t"                      \
04363     "jne    73b\n\t"                                   \
04364     "cmpb   $0x00, %%al\n\t"                           \
04365     "sete   %%al\n\t"                                  \
04366     "addq   $16, %%rsp\n\t"                            \
04367     "xorq   %%rcx, %%rcx\n\t"                          \
04368     "jmp    72f\n\t"                                   \
04369     "\n"                                                   \
04370     "71:\n\t"                                              \
04371     "movdqu (%[tag]), %%xmm1\n\t"                      \
04372     "pcmpeqb    %%xmm1, %%xmm0\n\t"                        \
04373     "pmovmskb   %%xmm0, %%edx\n\t"                         \
04374     "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \
04375     "xorl   %%eax, %%eax\n\t"                          \
04376     "cmpl   $0xffff, %%edx\n\t"                        \
04377     "sete   %%al\n\t"                                  \
04378     "\n"                                                   \
04379     "72:\n\t"                                              \
04380     "movl   %%eax, (%[res])\n\t"
04381 
04382 static void AES_GCM_encrypt(const unsigned char *in, unsigned char *out,
04383                             const unsigned char* addt,
04384                             const unsigned char* ivec, unsigned char *tag,
04385                             unsigned int nbytes, unsigned int abytes,
04386                             unsigned int ibytes, unsigned int tbytes,
04387                             const unsigned char* key, int nr)
04388 {
04389     register const unsigned char* iv asm("rax") = ivec;
04390     register unsigned int ivLen asm("ebx") = ibytes;
04391 
04392     __asm__ __volatile__ (
04393         "subq   $" VAR(STACK_OFFSET) ", %%rsp\n\t"
04394         /* Counter is xmm13 */
04395         "pxor   %%xmm13, %%xmm13\n\t"
04396         "pxor   " VAR(XR) ", " VAR(XR) "\n\t"
04397         "movl   %[ibytes], %%edx\n\t"
04398         "cmpl   $12, %%edx\n\t"
04399         "jne    35f\n\t"
04400         CALC_IV_12()
04401         "\n"
04402         "35:\n\t"
04403         CALC_IV()
04404         "\n"
04405         "39:\n\t"
04406 
04407         CALC_AAD()
04408 
04409         "# Calculate counter and H\n\t"
04410         "pshufb %[BSWAP_EPI64], %%xmm13\n\t"
04411         "movdqa " VAR(HR) ", %%xmm5\n\t"
04412         "paddd  %[ONE], %%xmm13\n\t"
04413         "movdqa " VAR(HR) ", %%xmm4\n\t"
04414         "movdqu %%xmm13, " VAR(CTR1) "\n\t"
04415         "psrlq  $63, %%xmm5\n\t"
04416         "psllq  $1, %%xmm4\n\t"
04417         "pslldq $8, %%xmm5\n\t"
04418         "por    %%xmm5, %%xmm4\n\t"
04419         "pshufd $0xff, " VAR(HR) ", " VAR(HR) "\n\t"
04420         "psrad  $31, " VAR(HR) "\n\t"
04421         "pand   %[MOD2_128], " VAR(HR) "\n\t"
04422         "pxor   %%xmm4, " VAR(HR) "\n\t"
04423 
04424         "xorl   " VAR(KR) ", " VAR(KR) "\n\t"
04425 
04426 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
04427         "cmpl   $128, %[nbytes]\n\t"
04428         "movl   %[nbytes], %%r13d\n\t"
04429         "jl 5f\n\t"
04430         "andl   $0xffffff80, %%r13d\n\t"
04431 
04432         CALC_HT_8_AVX()
04433 
04434         "# First 128 bytes of input\n\t"
04435         AESENC_CTR()
04436         AESENC_XOR()
04437         AESENC_SET(16)
04438         AESENC_SET(32)
04439         AESENC_SET(48)
04440         AESENC_SET(64)
04441         AESENC_SET(80)
04442         AESENC_SET(96)
04443         AESENC_SET(112)
04444         AESENC_SET(128)
04445         AESENC_SET(144)
04446         "cmpl   $11, %[nr]\n\t"
04447         "movdqa 160(%[KEY]), %%xmm12\n\t"
04448         "jl 1f\n\t"
04449         AESENC()
04450         AESENC_SET(176)
04451         "cmpl   $13, %[nr]\n\t"
04452         "movdqa 192(%[KEY]), %%xmm12\n\t"
04453         "jl 1f\n\t"
04454         AESENC()
04455         AESENC_SET(208)
04456         "movdqa 224(%[KEY]), %%xmm12\n\t"
04457         "\n"
04458     "1:\n\t"
04459         AESENC_LAST(%[in], %[out])
04460 
04461         "cmpl   $128, %%r13d\n\t"
04462         "movl   $128, " VAR(KR) "\n\t"
04463         "jle    2f\n\t"
04464 
04465         "# More 128 bytes of input\n\t"
04466         "\n"
04467     "3:\n\t"
04468         AESENC_128_GHASH_AVX(%%rdx, 0)
04469         "addl   $128, " VAR(KR) "\n\t"
04470         "cmpl   %%r13d, " VAR(KR) "\n\t"
04471         "jl 3b\n\t"
04472         "\n"
04473     "2:\n\t"
04474         "movdqa %[BSWAP_MASK], %%xmm13\n\t"
04475         "pshufb %%xmm13, %%xmm4\n\t"
04476         "pshufb %%xmm13, %%xmm5\n\t"
04477         "pshufb %%xmm13, %%xmm6\n\t"
04478         "pshufb %%xmm13, %%xmm7\n\t"
04479         "pxor   %%xmm2, %%xmm4\n\t"
04480         "pshufb %%xmm13, %%xmm8\n\t"
04481         "pshufb %%xmm13, %%xmm9\n\t"
04482         "pshufb %%xmm13, %%xmm10\n\t"
04483         "pshufb %%xmm13, %%xmm11\n\t"
04484 
04485         "movdqu 112(" VAR(HTR) "), %%xmm12\n\t"
04486         GHASH_GFMUL_AVX(XR, %%xmm13, %%xmm4, %%xmm12)
04487         "movdqu  96(" VAR(HTR) "), %%xmm12\n\t"
04488         GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm5, %%xmm12)
04489         "movdqu  80(" VAR(HTR) "), %%xmm12\n\t"
04490         GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm6, %%xmm12)
04491         "movdqu  64(" VAR(HTR) "), %%xmm12\n\t"
04492         GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm7, %%xmm12)
04493         "movdqu  48(" VAR(HTR) "), %%xmm12\n\t"
04494         GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm8, %%xmm12)
04495         "movdqu  32(" VAR(HTR) "), %%xmm12\n\t"
04496         GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm9, %%xmm12)
04497         "movdqu  16(" VAR(HTR) "), %%xmm12\n\t"
04498         GHASH_GFMUL_XOR_AVX(XR, %%xmm13, %%xmm10, %%xmm12)
04499         "movdqu    (" VAR(HTR) "), %%xmm12\n\t"
04500         GHASH_GFMUL_RED_XOR_AVX(XR, %%xmm13, %%xmm11, %%xmm12)
04501 
04502         "movdqu 0(" VAR(HTR) "), " VAR(HR) "\n\t"
04503         "\n"
04504     "5:\n\t"
04505         "movl   %[nbytes], %%edx\n\t"
04506         "cmpl   %%edx, " VAR(KR) "\n\t"
04507         "jge    55f\n\t"
04508 #endif
04509 
04510         "movl   %[nbytes], %%r13d\n\t"
04511         "andl   $0xfffffff0, %%r13d\n\t"
04512         "cmpl   %%r13d, " VAR(KR) "\n\t"
04513         "jge    14f\n\t"
04514 
04515         "leaq   (%[in]," VAR(KR64) ",1), %%rcx\n\t"
04516         "leaq   (%[out]," VAR(KR64) ",1), %%rdx\n\t"
04517         AESENC_BLOCK(%%rcx, %%rdx)
04518         "addl   $16, " VAR(KR) "\n\t"
04519         "cmpl   %%r13d, " VAR(KR) "\n\t"
04520         "jge    13f\n\t"
04521         "\n"
04522         "12:\n\t"
04523         "leaq   (%[in]," VAR(KR64) ",1), %%rcx\n\t"
04524         "leaq   (%[out]," VAR(KR64) ",1), %%rdx\n\t"
04525         AESENC_GFMUL(%%rcx, %%rdx, HR, XR)
04526         "pshufb %[BSWAP_MASK], %%xmm4\n\t"
04527         "pxor   %%xmm4, " VAR(XR) "\n\t"
04528         "addl   $16, " VAR(KR) "\n\t"
04529         "cmpl   %%r13d, " VAR(KR) "\n\t"
04530         "jl 12b\n\t"
04531         "\n"
04532         "13:\n\t"
04533         GHASH_GFMUL_RED_AVX(XR, HR, XR)
04534         "\n"
04535         "14:\n\t"
04536 
04537         AESENC_LAST15_ENC_AVX()
04538         "\n"
04539         "55:\n\t"
04540 
04541         CALC_TAG()
04542         STORE_TAG()
04543         "addq   $" VAR(STACK_OFFSET) ", %%rsp\n\t"
04544 
04545         :
04546         : [KEY] "r" (key),
04547           [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
04548           [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
04549           [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes),
04550           [tag] "r" (tag),
04551           [BSWAP_MASK] "m" (BSWAP_MASK),
04552           [BSWAP_EPI64] "m" (BSWAP_EPI64),
04553           [ONE] "m" (ONE),
04554 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
04555           [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
04556           [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
04557           [EIGHT] "m" (EIGHT),
04558 #endif
04559           [MOD2_128] "m" (MOD2_128)
04560         : "xmm15", "xmm14", "xmm13", "xmm12",
04561           "xmm0", "xmm1", "xmm2", "xmm3", "memory",
04562           "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
04563           "rcx", "rdx", "r13"
04564     );
04565 }
04566 
04567 #ifdef HAVE_INTEL_AVX1
04568 /* Encrypt with key in xmm12. */
04569 #define VAESENC()                              \
04570     "vaesenc    %%xmm12, %%xmm4, %%xmm4\n\t"   \
04571     "vaesenc    %%xmm12, %%xmm5, %%xmm5\n\t"   \
04572     "vaesenc    %%xmm12, %%xmm6, %%xmm6\n\t"   \
04573     "vaesenc    %%xmm12, %%xmm7, %%xmm7\n\t"   \
04574     "vaesenc    %%xmm12, %%xmm8, %%xmm8\n\t"   \
04575     "vaesenc    %%xmm12, %%xmm9, %%xmm9\n\t"   \
04576     "vaesenc    %%xmm12, %%xmm10, %%xmm10\n\t" \
04577     "vaesenc    %%xmm12, %%xmm11, %%xmm11\n\t"
04578 
04579 #define VAESENC_SET(o)                         \
04580     "vmovdqa    "#o"(%[KEY]), %%xmm12\n\t"     \
04581     VAESENC()
04582 
04583 #define VAESENC_CTR()                          \
04584     "vmovdqu    " VAR(CTR1) ", %%xmm0\n\t"     \
04585     "vmovdqa    %[BSWAP_EPI64], %%xmm1\n\t"    \
04586     "vpshufb    %%xmm1, %%xmm0, %%xmm4\n\t"    \
04587     "vpaddd %[ONE], %%xmm0, %%xmm5\n\t"    \
04588     "vpshufb    %%xmm1, %%xmm5, %%xmm5\n\t"    \
04589     "vpaddd %[TWO], %%xmm0, %%xmm6\n\t"    \
04590     "vpshufb    %%xmm1, %%xmm6, %%xmm6\n\t"    \
04591     "vpaddd %[THREE], %%xmm0, %%xmm7\n\t"  \
04592     "vpshufb    %%xmm1, %%xmm7, %%xmm7\n\t"    \
04593     "vpaddd %[FOUR], %%xmm0, %%xmm8\n\t"   \
04594     "vpshufb    %%xmm1, %%xmm8, %%xmm8\n\t"    \
04595     "vpaddd %[FIVE], %%xmm0, %%xmm9\n\t"   \
04596     "vpshufb    %%xmm1, %%xmm9, %%xmm9\n\t"    \
04597     "vpaddd %[SIX], %%xmm0, %%xmm10\n\t"   \
04598     "vpshufb    %%xmm1, %%xmm10, %%xmm10\n\t"  \
04599     "vpaddd %[SEVEN], %%xmm0, %%xmm11\n\t" \
04600     "vpshufb    %%xmm1, %%xmm11, %%xmm11\n\t"  \
04601     "vpaddd %[EIGHT], %%xmm0, %%xmm0\n\t"
04602 
04603 #define VAESENC_XOR()                          \
04604     "vmovdqa    (%[KEY]), %%xmm12\n\t"         \
04605     "vmovdqu    %%xmm0, " VAR(CTR1) "\n\t"     \
04606     "vpxor  %%xmm12, %%xmm4, %%xmm4\n\t"   \
04607     "vpxor  %%xmm12, %%xmm5, %%xmm5\n\t"   \
04608     "vpxor  %%xmm12, %%xmm6, %%xmm6\n\t"   \
04609     "vpxor  %%xmm12, %%xmm7, %%xmm7\n\t"   \
04610     "vpxor  %%xmm12, %%xmm8, %%xmm8\n\t"   \
04611     "vpxor  %%xmm12, %%xmm9, %%xmm9\n\t"   \
04612     "vpxor  %%xmm12, %%xmm10, %%xmm10\n\t" \
04613     "vpxor  %%xmm12, %%xmm11, %%xmm11\n\t"
04614 
04615 #define VAESENC_128()                     \
04616     VAESENC_CTR()                         \
04617     VAESENC_XOR()                         \
04618     VAESENC_SET(16)                       \
04619     VAESENC_SET(32)                       \
04620     VAESENC_SET(48)                       \
04621     VAESENC_SET(64)                       \
04622     VAESENC_SET(80)                       \
04623     VAESENC_SET(96)                       \
04624     VAESENC_SET(112)                      \
04625     VAESENC_SET(128)                      \
04626     VAESENC_SET(144)                      \
04627     "cmpl   $11, %[nr]\n\t"           \
04628     "vmovdqa    160(%[KEY]), %%xmm12\n\t" \
04629     "jl 1f\n\t"                           \
04630     VAESENC()                             \
04631     VAESENC_SET(176)                      \
04632     "cmpl   $13, %[nr]\n\t"           \
04633     "vmovdqa    192(%[KEY]), %%xmm12\n\t" \
04634     "jl 1f\n\t"                           \
04635     VAESENC()                             \
04636     VAESENC_SET(208)                      \
04637     "vmovdqa    224(%[KEY]), %%xmm12\n\t" \
04638     "\n"                                  \
04639 "1:\n\t"                                  \
04640     VAESENC_LAST(%[in], %[out])
04641 
04642 /* Encrypt and carry-less multiply for AVX1. */
04643 #define VAESENC_PCLMUL_1(src, o1, o2, o3)              \
04644     "vmovdqu    " #o3 "(" VAR(HTR) "), %%xmm12\n\t"    \
04645     "vmovdqu    " #o2 "(" #src "), %%xmm0\n\t"         \
04646     "vaesenc    " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t"   \
04647     "vpshufb    %[BSWAP_MASK], %%xmm0, %%xmm0\n\t"     \
04648     "vpxor  %%xmm2, %%xmm0, %%xmm0\n\t"            \
04649     "vpshufd    $0x4e, %%xmm12, %%xmm1\n\t"            \
04650     "vpshufd    $0x4e, %%xmm0, %%xmm14\n\t"            \
04651     "vpxor  %%xmm12, %%xmm1, %%xmm1\n\t"           \
04652     "vpxor  %%xmm0, %%xmm14, %%xmm14\n\t"          \
04653     "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm3\n\t"    \
04654     "vaesenc    " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t"   \
04655     "vaesenc    " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t"   \
04656     "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm2\n\t"    \
04657     "vaesenc    " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t"   \
04658     "vaesenc    " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t"   \
04659     "vpclmulqdq $0x00, %%xmm14, %%xmm1, %%xmm1\n\t"    \
04660     "vaesenc    " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t"   \
04661     "vaesenc    " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t" \
04662     "vaesenc    " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t" \
04663     "vpxor      %%xmm2, %%xmm1, %%xmm1\n\t"            \
04664     "vpxor      %%xmm3, %%xmm1, %%xmm1\n\t"            \
04665 
04666 #define VAESENC_PCLMUL_N(src, o1, o2, o3)               \
04667     "vmovdqu    " #o3 "(" VAR(HTR) "), %%xmm12\n\t"     \
04668     "vmovdqu    " #o2 "(" #src "), %%xmm0\n\t"          \
04669     "vpshufd    $0x4e, %%xmm12, %%xmm13\n\t"            \
04670     "vpshufb    %[BSWAP_MASK], %%xmm0, %%xmm0\n\t"      \
04671     "vaesenc    " #o1 "(%[KEY]), %%xmm4, %%xmm4\n\t"    \
04672     "vpxor  %%xmm12, %%xmm13, %%xmm13\n\t"          \
04673     "vpshufd    $0x4e, %%xmm0, %%xmm14\n\t"             \
04674     "vpxor  %%xmm0, %%xmm14, %%xmm14\n\t"           \
04675     "vpclmulqdq $0x11, %%xmm12, %%xmm0, %%xmm15\n\t"    \
04676     "vaesenc    " #o1 "(%[KEY]), %%xmm5, %%xmm5\n\t"    \
04677     "vaesenc    " #o1 "(%[KEY]), %%xmm6, %%xmm6\n\t"    \
04678     "vpclmulqdq $0x00, %%xmm12, %%xmm0, %%xmm12\n\t"    \
04679     "vaesenc    " #o1 "(%[KEY]), %%xmm7, %%xmm7\n\t"    \
04680     "vaesenc    " #o1 "(%[KEY]), %%xmm8, %%xmm8\n\t"    \
04681     "vpclmulqdq $0x00, %%xmm14, %%xmm13, %%xmm13\n\t"   \
04682     "vaesenc    " #o1 "(%[KEY]), %%xmm9, %%xmm9\n\t"    \
04683     "vaesenc    " #o1 "(%[KEY]), %%xmm10, %%xmm10\n\t"  \
04684     "vaesenc    " #o1 "(%[KEY]), %%xmm11, %%xmm11\n\t"  \
04685     "vpxor      %%xmm12, %%xmm1, %%xmm1\n\t"            \
04686     "vpxor      %%xmm12, %%xmm2, %%xmm2\n\t"            \
04687     "vpxor      %%xmm15, %%xmm1, %%xmm1\n\t"            \
04688     "vpxor      %%xmm15, %%xmm3, %%xmm3\n\t"            \
04689     "vpxor      %%xmm13, %%xmm1, %%xmm1\n\t"            \
04690 
04691 #define VAESENC_PCLMUL_L(o)                         \
04692     "vpslldq    $8, %%xmm1, %%xmm14\n\t"            \
04693     "vpsrldq    $8, %%xmm1, %%xmm1\n\t"             \
04694     "vaesenc    "#o"(%[KEY]), %%xmm4, %%xmm4\n\t"   \
04695     "vpxor      %%xmm14, %%xmm2, %%xmm2\n\t"        \
04696     "vpxor      %%xmm1, %%xmm3, %%xmm3\n\t"         \
04697     "vaesenc    "#o"(%[KEY]), %%xmm5, %%xmm5\n\t"   \
04698     "vpslld $31, %%xmm2, %%xmm12\n\t"           \
04699     "vpslld $30, %%xmm2, %%xmm13\n\t"           \
04700     "vpslld $25, %%xmm2, %%xmm14\n\t"           \
04701     "vaesenc    "#o"(%[KEY]), %%xmm6, %%xmm6\n\t"   \
04702     "vpxor  %%xmm13, %%xmm12, %%xmm12\n\t"      \
04703     "vpxor  %%xmm14, %%xmm12, %%xmm12\n\t"      \
04704     "vaesenc    "#o"(%[KEY]), %%xmm7, %%xmm7\n\t"   \
04705     "vpsrldq    $4, %%xmm12, %%xmm13\n\t"           \
04706     "vpslldq    $12, %%xmm12, %%xmm12\n\t"          \
04707     "vaesenc    "#o"(%[KEY]), %%xmm8, %%xmm8\n\t"   \
04708     "vpxor  %%xmm12, %%xmm2, %%xmm2\n\t"        \
04709     "vpsrld $1, %%xmm2, %%xmm14\n\t"            \
04710     "vaesenc    "#o"(%[KEY]), %%xmm9, %%xmm9\n\t"   \
04711     "vpsrld $2, %%xmm2, %%xmm1\n\t"             \
04712     "vpsrld $7, %%xmm2, %%xmm0\n\t"             \
04713     "vaesenc    "#o"(%[KEY]), %%xmm10, %%xmm10\n\t" \
04714     "vpxor  %%xmm1, %%xmm14, %%xmm14\n\t"       \
04715     "vpxor  %%xmm0, %%xmm14, %%xmm14\n\t"       \
04716     "vaesenc    "#o"(%[KEY]), %%xmm11, %%xmm11\n\t" \
04717     "vpxor  %%xmm13, %%xmm14, %%xmm14\n\t"      \
04718     "vpxor  %%xmm14, %%xmm2, %%xmm2\n\t"        \
04719     "vpxor  %%xmm3, %%xmm2, %%xmm2\n\t"         \
04720 
04721 
04722 /* Encrypt and carry-less multiply with last key. */
04723 #define VAESENC_LAST(in, out)                          \
04724     "vaesenclast    %%xmm12, %%xmm4, %%xmm4\n\t"   \
04725     "vaesenclast    %%xmm12, %%xmm5, %%xmm5\n\t"   \
04726     "vmovdqu           (" #in "), %%xmm0\n\t"      \
04727     "vmovdqu         16(" #in "), %%xmm1\n\t"      \
04728     "vpxor      %%xmm0, %%xmm4, %%xmm4\n\t"    \
04729     "vpxor      %%xmm1, %%xmm5, %%xmm5\n\t"    \
04730     "vmovdqu        %%xmm4,    (" #out ")\n\t"     \
04731     "vmovdqu        %%xmm5,  16(" #out ")\n\t"     \
04732     "vaesenclast    %%xmm12, %%xmm6, %%xmm6\n\t"   \
04733     "vaesenclast    %%xmm12, %%xmm7, %%xmm7\n\t"   \
04734     "vmovdqu         32(" #in "), %%xmm0\n\t"      \
04735     "vmovdqu         48(" #in "), %%xmm1\n\t"      \
04736     "vpxor      %%xmm0, %%xmm6, %%xmm6\n\t"    \
04737     "vpxor      %%xmm1, %%xmm7, %%xmm7\n\t"    \
04738     "vmovdqu        %%xmm6,  32(" #out ")\n\t"     \
04739     "vmovdqu        %%xmm7,  48(" #out ")\n\t"     \
04740     "vaesenclast    %%xmm12, %%xmm8, %%xmm8\n\t"   \
04741     "vaesenclast    %%xmm12, %%xmm9, %%xmm9\n\t"   \
04742     "vmovdqu         64(" #in "), %%xmm0\n\t"      \
04743     "vmovdqu         80(" #in "), %%xmm1\n\t"      \
04744     "vpxor      %%xmm0, %%xmm8, %%xmm8\n\t"    \
04745     "vpxor      %%xmm1, %%xmm9, %%xmm9\n\t"    \
04746     "vmovdqu        %%xmm8,  64(" #out ")\n\t"     \
04747     "vmovdqu        %%xmm9,  80(" #out ")\n\t"     \
04748     "vaesenclast    %%xmm12, %%xmm10, %%xmm10\n\t" \
04749     "vaesenclast    %%xmm12, %%xmm11, %%xmm11\n\t" \
04750     "vmovdqu         96(" #in "), %%xmm0\n\t"      \
04751     "vmovdqu        112(" #in "), %%xmm1\n\t"      \
04752     "vpxor      %%xmm0, %%xmm10, %%xmm10\n\t"  \
04753     "vpxor      %%xmm1, %%xmm11, %%xmm11\n\t"  \
04754     "vmovdqu        %%xmm10,  96(" #out ")\n\t"    \
04755     "vmovdqu        %%xmm11, 112(" #out ")\n\t"
04756 
04757 #define VAESENC_BLOCK()                                       \
04758     "vmovdqu        " VAR(CTR1) ", %%xmm5\n\t"            \
04759     "vpshufb        %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"   \
04760     "vpaddd     %[ONE], %%xmm5, %%xmm5\n\t"           \
04761     "vmovdqu        %%xmm5, " VAR(CTR1) "\n\t"            \
04762     "vpxor      (%[KEY]), %%xmm4, %%xmm4\n\t"         \
04763     "vaesenc        16(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04764     "vaesenc        32(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04765     "vaesenc        48(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04766     "vaesenc        64(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04767     "vaesenc        80(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04768     "vaesenc        96(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04769     "vaesenc        112(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04770     "vaesenc        128(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04771     "vaesenc        144(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04772     "cmpl       $11, %[nr]\n\t"                       \
04773     "vmovdqa        160(%[KEY]), %%xmm5\n\t"              \
04774     "jl         %=f\n\t"                              \
04775     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"           \
04776     "vaesenc        176(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04777     "cmpl       $13, %[nr]\n\t"                       \
04778     "vmovdqa        192(%[KEY]), %%xmm5\n\t"              \
04779     "jl         %=f\n\t"                              \
04780     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"           \
04781     "vaesenc        208(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04782     "vmovdqa        224(%[KEY]), %%xmm5\n\t"              \
04783     "%=:\n\t"                                                 \
04784     "vaesenclast    %%xmm5, %%xmm4, %%xmm4\n\t"           \
04785     "vmovdqu        (%[in]," VAR(KR64) ",1), %%xmm5\n\t"  \
04786     "vpxor      %%xmm5, %%xmm4, %%xmm4\n\t"           \
04787     "vmovdqu        %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \
04788     "vpshufb        %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"    \
04789     "vpxor      %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
04790 
04791 #define _VAESENC_GFMUL(in, H, X)                              \
04792     "vmovdqu        " VAR(CTR1) ", %%xmm5\n\t"            \
04793     "vpshufb        %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"   \
04794     "vpaddd     %[ONE], %%xmm5, %%xmm5\n\t"           \
04795     "vmovdqu        %%xmm5, " VAR(CTR1) "\n\t"            \
04796     "vpxor      (%[KEY]), %%xmm4, %%xmm4\n\t"         \
04797     "vpclmulqdq     $0x10, " #H ", " #X ", %%xmm6\n\t"    \
04798     "vaesenc        16(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04799     "vaesenc        32(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04800     "vpclmulqdq     $0x01, " #H ", " #X ", %%xmm7\n\t"    \
04801     "vaesenc        48(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04802     "vaesenc        64(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04803     "vpclmulqdq     $0x00, " #H ", " #X ", %%xmm8\n\t"    \
04804     "vaesenc        80(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04805     "vpclmulqdq     $0x11, " #H ", " #X ", %%xmm1\n\t"    \
04806     "vaesenc        96(%[KEY]), %%xmm4, %%xmm4\n\t"       \
04807     "vpxor      %%xmm7, %%xmm6, %%xmm6\n\t"           \
04808     "vpslldq        $8, %%xmm6, %%xmm2\n\t"               \
04809     "vpsrldq        $8, %%xmm6, %%xmm6\n\t"               \
04810     "vaesenc        112(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04811     "vpxor      %%xmm8, %%xmm2, %%xmm2\n\t"           \
04812     "vpxor      %%xmm6, %%xmm1, %%xmm3\n\t"           \
04813     "vmovdqa        %[MOD2_128], %%xmm0\n\t"              \
04814     "vpclmulqdq     $0x10, %%xmm0, %%xmm2, %%xmm7\n\t"    \
04815     "vaesenc        128(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04816     "vpshufd        $0x4e, %%xmm2, %%xmm6\n\t"            \
04817     "vpxor      %%xmm7, %%xmm6, %%xmm6\n\t"           \
04818     "vpclmulqdq     $0x10, %%xmm0, %%xmm6, %%xmm7\n\t"    \
04819     "vaesenc        144(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04820     "vpshufd        $0x4e, %%xmm6, %%xmm6\n\t"            \
04821     "vpxor      %%xmm7, %%xmm6, %%xmm6\n\t"           \
04822     "vpxor      %%xmm3, %%xmm6, " VAR(XR) "\n\t"      \
04823     "cmpl       $11, %[nr]\n\t"                       \
04824     "vmovdqa        160(%[KEY]), %%xmm5\n\t"              \
04825     "jl         1f\n\t"                               \
04826     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"           \
04827     "vaesenc        176(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04828     "cmpl       $13, %[nr]\n\t"                       \
04829     "vmovdqa        192(%[KEY]), %%xmm5\n\t"              \
04830     "jl         1f\n\t"                               \
04831     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"           \
04832     "vaesenc        208(%[KEY]), %%xmm4, %%xmm4\n\t"      \
04833     "vmovdqa        224(%[KEY]), %%xmm5\n\t"              \
04834     "1:\n\t"                                                  \
04835     "vaesenclast    %%xmm5, %%xmm4, %%xmm4\n\t"           \
04836     "vmovdqu        " #in ", %%xmm0\n\t"                  \
04837     "vpxor      %%xmm0, %%xmm4, %%xmm4\n\t"           \
04838     "vmovdqu        %%xmm4, (%[out]," VAR(KR64) ",1)\n\t"
04839 #define VAESENC_GFMUL(in, H, X)                               \
04840        _VAESENC_GFMUL(in, H, X)
04841 
04842 
04843 #define _GHASH_GFMUL_AVX1(r, r2, a, b)             \
04844     "vpshufd    $0x4e, "#a", %%xmm1\n\t"           \
04845     "vpshufd    $0x4e, "#b", %%xmm2\n\t"           \
04846     "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t"     \
04847     "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t"     \
04848     "vpxor  "#a", %%xmm1, %%xmm1\n\t"          \
04849     "vpxor  "#b", %%xmm2, %%xmm2\n\t"          \
04850     "vpclmulqdq $0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \
04851     "vpxor  %%xmm0, %%xmm1, %%xmm1\n\t"        \
04852     "vpxor  %%xmm3, %%xmm1, %%xmm1\n\t"        \
04853     "vmovdqa    %%xmm0, "#r2"\n\t"                 \
04854     "vmovdqa    %%xmm3, " #r "\n\t"                \
04855     "vpslldq    $8, %%xmm1, %%xmm2\n\t"            \
04856     "vpsrldq    $8, %%xmm1, %%xmm1\n\t"            \
04857     "vpxor  %%xmm2, "#r2", "#r2"\n\t"          \
04858     "vpxor  %%xmm1, " #r ", " #r "\n\t"
04859 #define GHASH_GFMUL_AVX1(r, r2, a, b)              \
04860        _GHASH_GFMUL_AVX1(r, r2, a, b)
04861 
04862 #define _GHASH_GFMUL_XOR_AVX1(r, r2, a, b)         \
04863     "vpshufd    $0x4e, "#a", %%xmm1\n\t"           \
04864     "vpshufd    $0x4e, "#b", %%xmm2\n\t"           \
04865     "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t"     \
04866     "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t"     \
04867     "vpxor  "#a", %%xmm1, %%xmm1\n\t"          \
04868     "vpxor  "#b", %%xmm2, %%xmm2\n\t"          \
04869     "vpclmulqdq $0x00, %%xmm2, %%xmm1, %%xmm1\n\t" \
04870     "vpxor  %%xmm0, %%xmm1, %%xmm1\n\t"        \
04871     "vpxor  %%xmm3, %%xmm1, %%xmm1\n\t"        \
04872     "vpxor  %%xmm0, "#r2", "#r2"\n\t"          \
04873     "vpxor  %%xmm3, " #r ", " #r "\n\t"        \
04874     "vpslldq    $8, %%xmm1, %%xmm2\n\t"            \
04875     "vpsrldq    $8, %%xmm1, %%xmm1\n\t"            \
04876     "vpxor  %%xmm2, "#r2", "#r2"\n\t"          \
04877     "vpxor  %%xmm1, " #r ", " #r "\n\t"
04878 #define GHASH_GFMUL_XOR_AVX1(r, r2, a, b)          \
04879        _GHASH_GFMUL_XOR_AVX1(r, r2, a, b)
04880 
04881 #define GHASH_MID_AVX1(r, r2)               \
04882     "vpsrld $31, "#r2", %%xmm0\n\t"     \
04883     "vpsrld $31, " #r ", %%xmm1\n\t"    \
04884     "vpslld $1, "#r2", "#r2"\n\t"       \
04885     "vpslld $1, " #r ", " #r "\n\t"     \
04886     "vpsrldq    $12, %%xmm0, %%xmm2\n\t"    \
04887     "vpslldq    $4, %%xmm0, %%xmm0\n\t"     \
04888     "vpslldq    $4, %%xmm1, %%xmm1\n\t"     \
04889     "vpor   %%xmm2, " #r ", " #r "\n\t" \
04890     "vpor   %%xmm0, "#r2", "#r2"\n\t"   \
04891     "vpor   %%xmm1, " #r ", " #r "\n\t"
04892 
04893 #define _GHASH_GFMUL_RED_AVX1(r, a, b)             \
04894     "vpshufd    $0x4e, "#a", %%xmm5\n\t"           \
04895     "vpshufd    $0x4e, "#b", %%xmm6\n\t"           \
04896     "vpclmulqdq $0x11, "#a", "#b", %%xmm7\n\t"     \
04897     "vpclmulqdq $0x00, "#a", "#b", %%xmm4\n\t"     \
04898     "vpxor  "#a", %%xmm5, %%xmm5\n\t"          \
04899     "vpxor  "#b", %%xmm6, %%xmm6\n\t"          \
04900     "vpclmulqdq $0x00, %%xmm6, %%xmm5, %%xmm5\n\t" \
04901     "vpxor  %%xmm4, %%xmm5, %%xmm5\n\t"        \
04902     "vpxor  %%xmm7, %%xmm5, %%xmm5\n\t"        \
04903     "vpslldq    $8, %%xmm5, %%xmm6\n\t"            \
04904     "vpsrldq    $8, %%xmm5, %%xmm5\n\t"            \
04905     "vpxor  %%xmm6, %%xmm4, %%xmm4\n\t"        \
04906     "vpxor  %%xmm5, %%xmm7, " #r "\n\t"        \
04907     "vpslld $31, %%xmm4, %%xmm8\n\t"           \
04908     "vpslld $30, %%xmm4, %%xmm9\n\t"           \
04909     "vpslld $25, %%xmm4, %%xmm10\n\t"          \
04910     "vpxor  %%xmm9, %%xmm8, %%xmm8\n\t"        \
04911     "vpxor  %%xmm10, %%xmm8, %%xmm8\n\t"       \
04912     "vpsrldq    $4, %%xmm8, %%xmm9\n\t"            \
04913     "vpslldq    $12, %%xmm8, %%xmm8\n\t"           \
04914     "vpxor  %%xmm8, %%xmm4, %%xmm4\n\t"        \
04915     "vpsrld $1, %%xmm4, %%xmm10\n\t"           \
04916     "vpsrld $2, %%xmm4, %%xmm6\n\t"            \
04917     "vpsrld $7, %%xmm4, %%xmm5\n\t"            \
04918     "vpxor  %%xmm6, %%xmm10, %%xmm10\n\t"      \
04919     "vpxor  %%xmm5, %%xmm10, %%xmm10\n\t"      \
04920     "vpxor  %%xmm9, %%xmm10, %%xmm10\n\t"      \
04921     "vpxor  %%xmm4, %%xmm10, %%xmm10\n\t"      \
04922     "vpxor  %%xmm10, " #r ", " #r "\n\t"
04923 #define GHASH_GFMUL_RED_AVX1(r, a, b)              \
04924        _GHASH_GFMUL_RED_AVX1(r, a, b)
04925 
04926 #define _GHASH_GFSQR_RED_AVX1(r, a)                \
04927     "vpclmulqdq $0x00, "#a", "#a", %%xmm4\n\t"     \
04928     "vpclmulqdq $0x11, "#a", "#a", " #r "\n\t"     \
04929     "vpslld $31, %%xmm4, %%xmm8\n\t"           \
04930     "vpslld $30, %%xmm4, %%xmm9\n\t"           \
04931     "vpslld $25, %%xmm4, %%xmm10\n\t"          \
04932     "vpxor  %%xmm9, %%xmm8, %%xmm8\n\t"        \
04933     "vpxor  %%xmm10, %%xmm8, %%xmm8\n\t"       \
04934     "vpsrldq    $4, %%xmm8, %%xmm9\n\t"            \
04935     "vpslldq    $12, %%xmm8, %%xmm8\n\t"           \
04936     "vpxor  %%xmm8, %%xmm4, %%xmm4\n\t"        \
04937     "vpsrld $1, %%xmm4, %%xmm10\n\t"           \
04938     "vpsrld $2, %%xmm4, %%xmm6\n\t"            \
04939     "vpsrld $7, %%xmm4, %%xmm5\n\t"            \
04940     "vpxor  %%xmm6, %%xmm10, %%xmm10\n\t"      \
04941     "vpxor  %%xmm5, %%xmm10, %%xmm10\n\t"      \
04942     "vpxor  %%xmm9, %%xmm10, %%xmm10\n\t"      \
04943     "vpxor  %%xmm4, %%xmm10, %%xmm10\n\t"      \
04944     "vpxor  %%xmm10, " #r ", " #r "\n\t"
04945 #define GHASH_GFSQR_RED_AVX1(r, a)                 \
04946        _GHASH_GFSQR_RED_AVX1(r, a)
04947 
04948 #define GHASH_RED_AVX1(r, r2)                \
04949     "vpslld $31, "#r2", %%xmm0\n\t"      \
04950     "vpslld $30, "#r2", %%xmm1\n\t"      \
04951     "vpslld $25, "#r2", %%xmm2\n\t"      \
04952     "vpxor  %%xmm1, %%xmm0, %%xmm0\n\t"  \
04953     "vpxor  %%xmm2, %%xmm0, %%xmm0\n\t"  \
04954     "vmovdqa    %%xmm0, %%xmm1\n\t"          \
04955     "vpsrldq    $4, %%xmm1, %%xmm1\n\t"      \
04956     "vpslldq    $12, %%xmm0, %%xmm0\n\t"     \
04957     "vpxor  %%xmm0, "#r2", "#r2"\n\t"    \
04958     "vpsrld $1, "#r2", %%xmm2\n\t"       \
04959     "vpsrld $2, "#r2", %%xmm3\n\t"       \
04960     "vpsrld $7, "#r2", %%xmm0\n\t"       \
04961     "vpxor  %%xmm3, %%xmm2, %%xmm2\n\t"  \
04962     "vpxor  %%xmm0, %%xmm2, %%xmm2\n\t"  \
04963     "vpxor  %%xmm1, %%xmm2, %%xmm2\n\t"  \
04964     "vpxor  "#r2", %%xmm2, %%xmm2\n\t"   \
04965     "vpxor  %%xmm2, " #r ", " #r "\n\t"
04966 
04967 #define GHASH_GFMUL_RED_XOR_AVX1(r, r2, a, b) \
04968     GHASH_GFMUL_XOR_AVX1(r, r2, a, b)         \
04969     GHASH_RED_AVX1(r, r2)
04970 
04971 #define GHASH_FULL_AVX1(r, r2, a, b) \
04972     GHASH_GFMUL_AVX1(r, r2, a, b)    \
04973     GHASH_MID_AVX1(r, r2)            \
04974     GHASH_RED_AVX1(r, r2)
04975 
04976 #define CALC_IV_12_AVX1()                                            \
04977     "# Calculate values when IV is 12 bytes\n\t"                     \
04978     "# Set counter based on IV\n\t"                                  \
04979     "movl       $0x01000000, %%ecx\n\t"                      \
04980     "vpinsrq        $0, 0(%%rax), %%xmm13, %%xmm13\n\t"          \
04981     "vpinsrd        $2, 8(%%rax), %%xmm13, %%xmm13\n\t"          \
04982     "vpinsrd        $3, %%ecx, %%xmm13, %%xmm13\n\t"             \
04983     "# H = Encrypt X(=0) and T = Encrypt counter\n\t"                \
04984     "vmovdqa          0(%[KEY]), " VAR(HR) "\n\t"                \
04985     "vpxor      " VAR(HR) ", %%xmm13, %%xmm1\n\t"            \
04986     "vmovdqa         16(%[KEY]), %%xmm12\n\t"                    \
04987     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
04988     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
04989     "vmovdqa         32(%[KEY]), %%xmm12\n\t"                    \
04990     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
04991     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
04992     "vmovdqa         48(%[KEY]), %%xmm12\n\t"                    \
04993     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
04994     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
04995     "vmovdqa         64(%[KEY]), %%xmm12\n\t"                    \
04996     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
04997     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
04998     "vmovdqa         80(%[KEY]), %%xmm12\n\t"                    \
04999     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05000     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05001     "vmovdqa         96(%[KEY]), %%xmm12\n\t"                    \
05002     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05003     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05004     "vmovdqa        112(%[KEY]), %%xmm12\n\t"                    \
05005     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05006     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05007     "vmovdqa        128(%[KEY]), %%xmm12\n\t"                    \
05008     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05009     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05010     "vmovdqa        144(%[KEY]), %%xmm12\n\t"                    \
05011     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05012     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05013     "cmpl       $11, %[nr]\n\t"                              \
05014     "vmovdqa        160(%[KEY]), %%xmm12\n\t"                    \
05015     "jl 31f\n\t"                                                     \
05016     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05017     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05018     "vmovdqa        176(%[KEY]), %%xmm12\n\t"                    \
05019     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05020     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05021     "cmpl       $13, %[nr]\n\t"                              \
05022     "vmovdqa        192(%[KEY]), %%xmm12\n\t"                    \
05023     "jl 31f\n\t"                                                     \
05024     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05025     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05026     "vmovdqa        208(%[KEY]), %%xmm12\n\t"                    \
05027     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05028     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05029     "vmovdqu        224(%[KEY]), %%xmm12\n\t"                    \
05030     "31:\n\t"                                                        \
05031     "vaesenclast    %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05032     "vaesenclast    %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05033     "vpshufb        %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
05034     "vmovdqu        %%xmm1, " VAR(TR) "\n\t"                     \
05035     "jmp        39f\n\t"
05036 
05037 #define CALC_IV_AVX1()                                       \
05038     "# Calculate values when IV is not 12 bytes\n\t"         \
05039     "# H = Encrypt X(=0)\n\t"                                \
05040     "vmovdqa    0(%[KEY]), " VAR(HR) "\n\t"                  \
05041     VAESENC_AVX(HR)                                          \
05042     "vpshufb    %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
05043     "# Calc counter\n\t"                                     \
05044     "# Initialization vector\n\t"                            \
05045     "cmpl   $0, %%edx\n\t"                               \
05046     "movq   $0, %%rcx\n\t"                               \
05047     "je 45f\n\t"                                             \
05048     "cmpl   $16, %%edx\n\t"                              \
05049     "jl 44f\n\t"                                             \
05050     "andl   $0xfffffff0, %%edx\n\t"                      \
05051     "\n"                                                     \
05052     "43:\n\t"                                                \
05053     "vmovdqu    (%%rax,%%rcx,1), %%xmm4\n\t"                 \
05054     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
05055     "vpxor  %%xmm4, %%xmm13, %%xmm13\n\t"                \
05056     GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR)           \
05057     "addl   $16, %%ecx\n\t"                              \
05058     "cmpl   %%edx, %%ecx\n\t"                            \
05059     "jl 43b\n\t"                                             \
05060     "movl   %[ibytes], %%edx\n\t"                        \
05061     "cmpl   %%edx, %%ecx\n\t"                            \
05062     "je 45f\n\t"                                             \
05063     "\n"                                                     \
05064     "44:\n\t"                                                \
05065     "subq   $16, %%rsp\n\t"                              \
05066     "vpxor  %%xmm4, %%xmm4, %%xmm4\n\t"                  \
05067     "xorl   %%ebx, %%ebx\n\t"                            \
05068     "vmovdqu    %%xmm4, (%%rsp)\n\t"                         \
05069     "42:\n\t"                                                \
05070     "movzbl (%%rax,%%rcx,1), %%r13d\n\t"                 \
05071     "movb   %%r13b, (%%rsp,%%rbx,1)\n\t"                 \
05072     "incl   %%ecx\n\t"                                   \
05073     "incl   %%ebx\n\t"                                   \
05074     "cmpl   %%edx, %%ecx\n\t"                            \
05075     "jl 42b\n\t"                                             \
05076     "vmovdqu    (%%rsp), %%xmm4\n\t"                         \
05077     "addq   $16, %%rsp\n\t"                              \
05078     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
05079     "vpxor  %%xmm4, %%xmm13, %%xmm13\n\t"                \
05080     GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR)           \
05081     "\n"                                                     \
05082     "45:\n\t"                                                \
05083     "# T = Encrypt counter\n\t"                              \
05084     "vpxor  %%xmm0, %%xmm0, %%xmm0\n\t"                  \
05085     "shll   $3, %%edx\n\t"                               \
05086     "vpinsrq    $0, %%rdx, %%xmm0, %%xmm0\n\t"               \
05087     "vpxor  %%xmm0, %%xmm13, %%xmm13\n\t"                \
05088     GHASH_FULL_AVX1(%%xmm13, %%xmm12, %%xmm13, HR)           \
05089     "vpshufb    %[BSWAP_MASK], %%xmm13, %%xmm13\n\t"         \
05090     "#   Encrypt counter\n\t"                                \
05091     "vmovdqa    0(%[KEY]), %%xmm4\n\t"                       \
05092     "vpxor  %%xmm13, %%xmm4, %%xmm4\n\t"                 \
05093     VAESENC_AVX(%%xmm4)                                      \
05094     "vmovdqu    %%xmm4, " VAR(TR) "\n\t"
05095 
05096 #define CALC_AAD_AVX1()                                \
05097     "# Additional authentication data\n\t"             \
05098     "movl   %[abytes], %%edx\n\t"                  \
05099     "cmpl   $0, %%edx\n\t"                         \
05100     "je     25f\n\t"                               \
05101     "movq   %[addt], %%rax\n\t"                    \
05102     "xorl   %%ecx, %%ecx\n\t"                      \
05103     "cmpl   $16, %%edx\n\t"                        \
05104     "jl     24f\n\t"                               \
05105     "andl   $0xfffffff0, %%edx\n\t"                \
05106     "\n"                                               \
05107     "23:\n\t"                                          \
05108     "vmovdqu    (%%rax,%%rcx,1), %%xmm4\n\t"           \
05109     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
05110     "vpxor  %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
05111     GHASH_FULL_AVX1(XR, %%xmm12, XR, HR)               \
05112     "addl   $16, %%ecx\n\t"                        \
05113     "cmpl   %%edx, %%ecx\n\t"                      \
05114     "jl     23b\n\t"                               \
05115     "movl   %[abytes], %%edx\n\t"                  \
05116     "cmpl   %%edx, %%ecx\n\t"                      \
05117     "je     25f\n\t"                               \
05118     "\n"                                               \
05119     "24:\n\t"                                          \
05120     "subq   $16, %%rsp\n\t"                        \
05121     "vpxor  %%xmm4, %%xmm4, %%xmm4\n\t"            \
05122     "xorl   %%ebx, %%ebx\n\t"                      \
05123     "vmovdqu    %%xmm4, (%%rsp)\n\t"                   \
05124     "22:\n\t"                                          \
05125     "movzbl (%%rax,%%rcx,1), %%r13d\n\t"           \
05126     "movb   %%r13b, (%%rsp,%%rbx,1)\n\t"           \
05127     "incl   %%ecx\n\t"                             \
05128     "incl   %%ebx\n\t"                             \
05129     "cmpl   %%edx, %%ecx\n\t"                      \
05130     "jl     22b\n\t"                               \
05131     "vmovdqu    (%%rsp), %%xmm4\n\t"                   \
05132     "addq   $16, %%rsp\n\t"                        \
05133     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
05134     "vpxor  %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
05135     GHASH_FULL_AVX1(XR, %%xmm12, XR, HR)               \
05136     "\n"                                               \
05137     "25:\n\t"
05138 
05139 #define CALC_HT_8_AVX1()                          \
05140     "vmovdqa    " VAR(XR) ", %%xmm2\n\t"          \
05141     "# H ^ 1\n\t"                                 \
05142     "vmovdqu    " VAR(HR) ", 0(" VAR(HTR) ")\n\t" \
05143     "# H ^ 2\n\t"                                 \
05144     GHASH_GFSQR_RED_AVX1(%%xmm0, HR)              \
05145     "vmovdqu    %%xmm0 ,  16(" VAR(HTR) ")\n\t"   \
05146     "# H ^ 3\n\t"                                 \
05147     GHASH_GFMUL_RED_AVX1(%%xmm1, HR, %%xmm0)      \
05148     "vmovdqu    %%xmm1 ,  32(" VAR(HTR) ")\n\t"   \
05149     "# H ^ 4\n\t"                                 \
05150     GHASH_GFSQR_RED_AVX1(%%xmm3, %%xmm0)          \
05151     "vmovdqu    %%xmm3 ,  48(" VAR(HTR) ")\n\t"   \
05152     "# H ^ 5\n\t"                                 \
05153     GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm0, %%xmm1) \
05154     "vmovdqu    %%xmm12,  64(" VAR(HTR) ")\n\t"   \
05155     "# H ^ 6\n\t"                                 \
05156     GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm1)         \
05157     "vmovdqu    %%xmm12,  80(" VAR(HTR) ")\n\t"   \
05158     "# H ^ 7\n\t"                                 \
05159     GHASH_GFMUL_RED_AVX1(%%xmm12, %%xmm1, %%xmm3) \
05160     "vmovdqu    %%xmm12,  96(" VAR(HTR) ")\n\t"   \
05161     "# H ^ 8\n\t"                                 \
05162     GHASH_GFSQR_RED_AVX1(%%xmm12, %%xmm3)         \
05163     "vmovdqu    %%xmm12, 112(" VAR(HTR) ")\n\t"
05164 
05165 #define VAESENC_128_GHASH_AVX1(src, o)               \
05166     "leaq   (%[in]," VAR(KR64) ",1), %%rcx\n\t"  \
05167     "leaq   (%[out]," VAR(KR64) ",1), %%rdx\n\t" \
05168     /* src is either %%rcx or %%rdx */             \
05169     VAESENC_CTR()                                  \
05170     VAESENC_XOR()                                  \
05171     VAESENC_PCLMUL_1(src,  16, (o-128), 112)       \
05172     VAESENC_PCLMUL_N(src,  32, (o-112),  96)       \
05173     VAESENC_PCLMUL_N(src,  48, (o- 96),  80)       \
05174     VAESENC_PCLMUL_N(src,  64, (o- 80),  64)       \
05175     VAESENC_PCLMUL_N(src,  80, (o- 64),  48)       \
05176     VAESENC_PCLMUL_N(src,  96, (o- 48),  32)       \
05177     VAESENC_PCLMUL_N(src, 112, (o- 32),  16)       \
05178     VAESENC_PCLMUL_N(src, 128, (o- 16),   0)       \
05179     VAESENC_PCLMUL_L(144)                          \
05180     "cmpl   $11, %[nr]\n\t"                    \
05181     "vmovdqa    160(%[KEY]), %%xmm12\n\t"          \
05182     "jl     4f\n\t"                            \
05183     VAESENC()                                      \
05184     VAESENC_SET(176)                               \
05185     "cmpl   $13, %[nr]\n\t"                    \
05186     "vmovdqa    192(%[KEY]), %%xmm12\n\t"          \
05187     "jl     4f\n\t"                            \
05188     VAESENC()                                      \
05189     VAESENC_SET(208)                               \
05190     "vmovdqa    224(%[KEY]), %%xmm12\n\t"          \
05191     "\n"                                           \
05192 "4:\n\t"                                           \
05193     VAESENC_LAST(%%rcx, %%rdx)
05194 
05195 #define _VAESENC_AVX(r)                                  \
05196     "vaesenc        16(%[KEY]), " #r ", " #r "\n\t"  \
05197     "vaesenc        32(%[KEY]), " #r ", " #r "\n\t"  \
05198     "vaesenc        48(%[KEY]), " #r ", " #r "\n\t"  \
05199     "vaesenc        64(%[KEY]), " #r ", " #r "\n\t"  \
05200     "vaesenc        80(%[KEY]), " #r ", " #r "\n\t"  \
05201     "vaesenc        96(%[KEY]), " #r ", " #r "\n\t"  \
05202     "vaesenc        112(%[KEY]), " #r ", " #r "\n\t" \
05203     "vaesenc        128(%[KEY]), " #r ", " #r "\n\t" \
05204     "vaesenc        144(%[KEY]), " #r ", " #r "\n\t" \
05205     "cmpl       $11, %[nr]\n\t"                  \
05206     "vmovdqa        160(%[KEY]), %%xmm5\n\t"         \
05207     "jl         %=f\n\t"                         \
05208     "vaesenc        %%xmm5, " #r ", " #r "\n\t"      \
05209     "vaesenc        176(%[KEY]), " #r ", " #r "\n\t" \
05210     "cmpl       $13, %[nr]\n\t"                  \
05211     "vmovdqa        192(%[KEY]), %%xmm5\n\t"         \
05212     "jl         %=f\n\t"                         \
05213     "vaesenc        %%xmm5, " #r ", " #r "\n\t"      \
05214     "vaesenc        208(%[KEY]), " #r ", " #r "\n\t" \
05215     "vmovdqa        224(%[KEY]), %%xmm5\n\t"         \
05216     "%=:\n\t"                                            \
05217     "vaesenclast    %%xmm5, " #r ", " #r "\n\t"
05218 #define VAESENC_AVX(r)                                   \
05219         _VAESENC_AVX(r)
05220 
05221 #define AESENC_LAST15_ENC_AVX1()                        \
05222     "movl   %[nbytes], %%ecx\n\t"                   \
05223     "movl   %%ecx, %%edx\n\t"                       \
05224     "andl   $0x0f, %%ecx\n\t"                       \
05225     "jz     55f\n\t"                                \
05226     "vmovdqu    " VAR(CTR1) ", %%xmm13\n\t"             \
05227     "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
05228     "vpxor  0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
05229     VAESENC_AVX(%%xmm13)                                \
05230     "subq   $16, %%rsp\n\t"                         \
05231     "xorl   %%ecx, %%ecx\n\t"                       \
05232     "vmovdqu    %%xmm13, (%%rsp)\n\t"                   \
05233     "\n"                                                \
05234     "51:\n\t"                                           \
05235     "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
05236     "xorb   (%%rsp,%%rcx,1), %%r13b\n\t"            \
05237     "movb   %%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
05238     "movb   %%r13b, (%%rsp,%%rcx,1)\n\t"            \
05239     "incl   " VAR(KR) "\n\t"                        \
05240     "incl   %%ecx\n\t"                              \
05241     "cmpl   %%edx, " VAR(KR) "\n\t"                 \
05242     "jl     51b\n\t"                                \
05243     "xorq   %%r13, %%r13\n\t"                       \
05244     "cmpl   $16, %%ecx\n\t"                         \
05245     "je     53f\n\t"                                \
05246     "\n"                                                \
05247     "52:\n\t"                                           \
05248     "movb   %%r13b, (%%rsp,%%rcx,1)\n\t"            \
05249     "incl   %%ecx\n\t"                              \
05250     "cmpl   $16, %%ecx\n\t"                         \
05251     "jl     52b\n\t"                                \
05252     "53:\n\t"                                           \
05253     "vmovdqu    (%%rsp), %%xmm13\n\t"                   \
05254     "addq   $16, %%rsp\n\t"                         \
05255     "vpshufb    %[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
05256     "vpxor  %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
05257     GHASH_GFMUL_RED_AVX1(XR, HR, XR)                    \
05258 
05259 #define AESENC_LAST15_DEC_AVX1()                        \
05260     "movl   %[nbytes], %%ecx\n\t"                   \
05261     "movl   %%ecx, %%edx\n\t"                       \
05262     "andl   $0x0f, %%ecx\n\t"                       \
05263     "jz     55f\n\t"                                \
05264     "vmovdqu    " VAR(CTR1) ", %%xmm13\n\t"             \
05265     "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
05266     "vpxor  0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
05267     VAESENC_AVX(%%xmm13)                                \
05268     "subq   $32, %%rsp\n\t"                         \
05269     "xorl   %%ecx, %%ecx\n\t"                       \
05270     "vmovdqu    %%xmm13, (%%rsp)\n\t"                   \
05271     "vpxor  %%xmm0, %%xmm0, %%xmm0\n\t"             \
05272     "vmovdqu    %%xmm0, 16(%%rsp)\n\t"                  \
05273     "\n"                                                \
05274     "51:\n\t"                                           \
05275     "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
05276     "movb   %%r13b, 16(%%rsp,%%rcx,1)\n\t"          \
05277     "xorb   (%%rsp,%%rcx,1), %%r13b\n\t"            \
05278     "movb   %%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
05279     "incl   " VAR(KR) "\n\t"                        \
05280     "incl   %%ecx\n\t"                              \
05281     "cmpl   %%edx, " VAR(KR) "\n\t"                 \
05282     "jl     51b\n\t"                                \
05283     "53:\n\t"                                           \
05284     "vmovdqu    16(%%rsp), %%xmm13\n\t"                 \
05285     "addq   $32, %%rsp\n\t"                         \
05286     "vpshufb    %[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
05287     "vpxor  %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
05288     GHASH_GFMUL_RED_AVX1(XR, HR, XR)                    \
05289 
05290 #define CALC_TAG_AVX1()                                      \
05291     "movl   %[nbytes], %%edx\n\t"                        \
05292     "movl   %[abytes], %%ecx\n\t"                        \
05293     "shlq   $3, %%rdx\n\t"                               \
05294     "shlq   $3, %%rcx\n\t"                               \
05295     "vpinsrq    $0, %%rdx, %%xmm0, %%xmm0\n\t"               \
05296     "vpinsrq    $1, %%rcx, %%xmm0, %%xmm0\n\t"               \
05297     "vpxor  %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t"        \
05298     GHASH_GFMUL_RED_AVX1(XR, HR, XR)                         \
05299     "vpshufb    %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \
05300     "vpxor  " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t"        \
05301 
05302 #define STORE_TAG_AVX()                       \
05303     "cmpl   $16, %[tbytes]\n\t"           \
05304     "je     71f\n\t"                      \
05305     "xorq   %%rcx, %%rcx\n\t"             \
05306     "vmovdqu    %%xmm0, (%%rsp)\n\t"          \
05307     "73:\n\t"                                 \
05308     "movzbl (%%rsp,%%rcx,1), %%r13d\n\t"  \
05309     "movb   %%r13b, (%[tag],%%rcx,1)\n\t" \
05310     "incl   %%ecx\n\t"                    \
05311     "cmpl   %[tbytes], %%ecx\n\t"         \
05312     "jne    73b\n\t"                      \
05313     "jmp    72f\n\t"                      \
05314     "\n"                                      \
05315     "71:\n\t"                                 \
05316     "vmovdqu    %%xmm0, (%[tag])\n\t"         \
05317     "\n"                                      \
05318     "72:\n\t"
05319 
05320 #define CMP_TAG_AVX()                                      \
05321     "cmpl   $16, %[tbytes]\n\t"                        \
05322     "je     71f\n\t"                                   \
05323     "subq   $16, %%rsp\n\t"                            \
05324     "xorq   %%rcx, %%rcx\n\t"                          \
05325     "xorq   %%rax, %%rax\n\t"                          \
05326     "vmovdqu    %%xmm0, (%%rsp)\n\t"                       \
05327     "\n"                                                   \
05328     "73:\n\t"                                              \
05329     "movzbl (%%rsp,%%rcx,1), %%r13d\n\t"               \
05330     "xorb   (%[tag],%%rcx,1), %%r13b\n\t"              \
05331     "orb    %%r13b, %%al\n\t"                          \
05332     "incl   %%ecx\n\t"                                 \
05333     "cmpl   %[tbytes], %%ecx\n\t"                      \
05334     "jne    73b\n\t"                                   \
05335     "cmpb   $0x00, %%al\n\t"                           \
05336     "sete   %%al\n\t"                                  \
05337     "addq   $16, %%rsp\n\t"                            \
05338     "jmp    72f\n\t"                                   \
05339     "\n"                                                   \
05340     "71:\n\t"                                              \
05341     "vmovdqu    (%[tag]), %%xmm1\n\t"                      \
05342     "vpcmpeqb   %%xmm1, %%xmm0, %%xmm0\n\t"                \
05343     "vpmovmskb  %%xmm0, %%edx\n\t"                         \
05344     "# %%edx == 0xFFFF then return 1 else => return 0\n\t" \
05345     "xorl   %%eax, %%eax\n\t"                          \
05346     "cmpl   $0xffff, %%edx\n\t"                        \
05347     "sete   %%al\n\t"                                  \
05348     "\n"                                                   \
05349     "72:\n\t"                                              \
05350     "movl   %%eax, (%[res])\n\t"
05351 
05352 static void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out,
05353                                  const unsigned char* addt,
05354                                  const unsigned char* ivec, unsigned char *tag,
05355                                  unsigned int nbytes, unsigned int abytes,
05356                                  unsigned int ibytes, unsigned int tbytes,
05357                                  const unsigned char* key, int nr)
05358 {
05359     register const unsigned char* iv asm("rax") = ivec;
05360     register unsigned int ivLen asm("ebx") = ibytes;
05361 
05362     __asm__ __volatile__ (
05363         "subq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
05364         /* Counter is xmm13 */
05365         "vpxor      %%xmm13, %%xmm13, %%xmm13\n\t"
05366         "vpxor      " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t"
05367         "movl       %[ibytes], %%edx\n\t"
05368         "cmpl       $12, %%edx\n\t"
05369         "jne        35f\n\t"
05370         CALC_IV_12_AVX1()
05371         "\n"
05372         "35:\n\t"
05373         CALC_IV_AVX1()
05374         "\n"
05375         "39:\n\t"
05376 
05377         CALC_AAD_AVX1()
05378 
05379         "# Calculate counter and H\n\t"
05380         "vpsrlq     $63, " VAR(HR) ", %%xmm5\n\t"
05381         "vpsllq     $1, " VAR(HR) ", %%xmm4\n\t"
05382         "vpslldq    $8, %%xmm5, %%xmm5\n\t"
05383         "vpor       %%xmm5, %%xmm4, %%xmm4\n\t"
05384         "vpshufd    $0xff, " VAR(HR) ", " VAR(HR) "\n\t"
05385         "vpsrad     $31, " VAR(HR) ", " VAR(HR) "\n\t"
05386         "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
05387         "vpand      %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
05388         "vpaddd     %[ONE], %%xmm13, %%xmm13\n\t"
05389         "vpxor      %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
05390         "vmovdqu    %%xmm13, " VAR(CTR1) "\n\t"
05391 
05392         "xorl       " VAR(KR) ", " VAR(KR) "\n\t"
05393 
05394 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
05395         "cmpl   $128, %[nbytes]\n\t"
05396         "movl   %[nbytes], %%r13d\n\t"
05397         "jl 5f\n\t"
05398         "andl   $0xffffff80, %%r13d\n\t"
05399 
05400         CALC_HT_8_AVX1()
05401 
05402         "# First 128 bytes of input\n\t"
05403         VAESENC_128()
05404 
05405         "cmpl   $128, %%r13d\n\t"
05406         "movl   $128, " VAR(KR) "\n\t"
05407         "jle    2f\n\t"
05408 
05409         "# More 128 bytes of input\n\t"
05410         "\n"
05411     "3:\n\t"
05412         VAESENC_128_GHASH_AVX1(%%rdx, 0)
05413         "addl   $128, " VAR(KR) "\n\t"
05414         "cmpl   %%r13d, " VAR(KR) "\n\t"
05415         "jl 3b\n\t"
05416         "\n"
05417     "2:\n\t"
05418         "vmovdqa    %[BSWAP_MASK], %%xmm13\n\t"
05419         "vpshufb    %%xmm13, %%xmm4, %%xmm4\n\t"
05420         "vpshufb    %%xmm13, %%xmm5, %%xmm5\n\t"
05421         "vpshufb    %%xmm13, %%xmm6, %%xmm6\n\t"
05422         "vpshufb    %%xmm13, %%xmm7, %%xmm7\n\t"
05423         "vpxor      %%xmm2, %%xmm4, %%xmm4\n\t"
05424         "vpshufb    %%xmm13, %%xmm8, %%xmm8\n\t"
05425         "vpshufb    %%xmm13, %%xmm9, %%xmm9\n\t"
05426         "vpshufb    %%xmm13, %%xmm10, %%xmm10\n\t"
05427         "vpshufb    %%xmm13, %%xmm11, %%xmm11\n\t"
05428 
05429         "vmovdqu       (" VAR(HTR) "), %%xmm12\n\t"
05430         "vmovdqu     16(" VAR(HTR) "), %%xmm14\n\t"
05431         GHASH_GFMUL_AVX1(XR, %%xmm13, %%xmm11, %%xmm12)
05432         GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm10, %%xmm14)
05433         "vmovdqu     32(" VAR(HTR) "), %%xmm12\n\t"
05434         "vmovdqu     48(" VAR(HTR) "), %%xmm14\n\t"
05435         GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm9, %%xmm12)
05436         GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm8, %%xmm14)
05437         "vmovdqu     64(" VAR(HTR) "), %%xmm12\n\t"
05438         "vmovdqu     80(" VAR(HTR) "), %%xmm14\n\t"
05439         GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm7, %%xmm12)
05440         GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm6, %%xmm14)
05441         "vmovdqu     96(" VAR(HTR) "), %%xmm12\n\t"
05442         "vmovdqu    112(" VAR(HTR) "), %%xmm14\n\t"
05443         GHASH_GFMUL_XOR_AVX1(XR, %%xmm13, %%xmm5, %%xmm12)
05444         GHASH_GFMUL_RED_XOR_AVX1(XR, %%xmm13, %%xmm4, %%xmm14)
05445 
05446         "vmovdqu    0(" VAR(HTR) "), " VAR(HR) "\n\t"
05447         "\n"
05448     "5:\n\t"
05449         "movl       %[nbytes], %%edx\n\t"
05450         "cmpl       %%edx, " VAR(KR) "\n\t"
05451         "jge        55f\n\t"
05452 #endif
05453 
05454         "movl       %[nbytes], %%r13d\n\t"
05455         "andl       $0xfffffff0, %%r13d\n\t"
05456         "cmpl       %%r13d, " VAR(KR) "\n\t"
05457         "jge        14f\n\t"
05458 
05459         VAESENC_BLOCK()
05460         "addl       $16, " VAR(KR) "\n\t"
05461         "cmpl       %%r13d, " VAR(KR) "\n\t"
05462         "jge        13f\n\t"
05463         "\n"
05464         "12:\n\t"
05465         "vmovdqu    (%[in]," VAR(KR64) ",1), %%xmm9\n\t"
05466         VAESENC_GFMUL(%%xmm9, HR, XR)
05467         "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"
05468         "addl       $16, " VAR(KR) "\n\t"
05469         "vpxor      %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
05470         "cmpl       %%r13d, " VAR(KR) "\n\t"
05471         "jl     12b\n\t"
05472         "\n"
05473         "13:\n\t"
05474         GHASH_GFMUL_RED_AVX1(XR, HR, XR)
05475         "\n"
05476         "14:\n\t"
05477 
05478         AESENC_LAST15_ENC_AVX1()
05479         "\n"
05480         "55:\n\t"
05481 
05482         CALC_TAG_AVX1()
05483         STORE_TAG_AVX()
05484         "addq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
05485         "vzeroupper\n\t"
05486 
05487         :
05488         : [KEY] "r" (key),
05489           [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
05490           [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
05491           [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes),
05492           [tag] "r" (tag),
05493           [BSWAP_MASK] "m" (BSWAP_MASK),
05494           [BSWAP_EPI64] "m" (BSWAP_EPI64),
05495           [ONE] "m" (ONE),
05496 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
05497           [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
05498           [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
05499           [EIGHT] "m" (EIGHT),
05500 #endif
05501           [MOD2_128] "m" (MOD2_128)
05502         : "xmm15", "xmm14", "xmm13", "xmm12",
05503           "xmm0", "xmm1", "xmm2", "xmm3", "memory",
05504           "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
05505           "rcx", "rdx", "r13"
05506     );
05507 }
05508 
05509 #ifdef HAVE_INTEL_AVX2
05510 /* Encrypt and carry-less multiply for AVX2. */
05511 #define VAESENC_PCLMUL_AVX2_1(src, o1, o2, o3)        \
05512     "vmovdqu    " #o2 "(" #src "), %%xmm12\n\t"       \
05513     "vmovdqa    " #o1 "(%[KEY]), %%xmm0\n\t"          \
05514     "vpshufb    %[BSWAP_MASK], %%xmm12, %%xmm12\n\t"  \
05515     "vmovdqu    " #o3 "(" VAR(HTR) "), %%xmm13\n\t"   \
05516     "vpxor  %%xmm2, %%xmm12, %%xmm12\n\t"         \
05517     "vpclmulqdq $0x10, %%xmm13, %%xmm12, %%xmm1\n\t"  \
05518     "vpclmulqdq $0x01, %%xmm13, %%xmm12, %%xmm14\n\t" \
05519     "vpclmulqdq $0x00, %%xmm13, %%xmm12, %%xmm2\n\t"  \
05520     "vpclmulqdq $0x11, %%xmm13, %%xmm12, %%xmm3\n\t"  \
05521     "vaesenc    %%xmm0, %%xmm4, %%xmm4\n\t"           \
05522     "vaesenc    %%xmm0, %%xmm5, %%xmm5\n\t"           \
05523     "vaesenc    %%xmm0, %%xmm6, %%xmm6\n\t"           \
05524     "vaesenc    %%xmm0, %%xmm7, %%xmm7\n\t"           \
05525     "vaesenc    %%xmm0, %%xmm8, %%xmm8\n\t"           \
05526     "vaesenc    %%xmm0, %%xmm9, %%xmm9\n\t"           \
05527     "vaesenc    %%xmm0, %%xmm10, %%xmm10\n\t"         \
05528     "vaesenc    %%xmm0, %%xmm11, %%xmm11\n\t"         \
05529 
05530 #define VAESENC_PCLMUL_AVX2_2(src, o1, o2, o3)        \
05531     "vmovdqu    " #o2 "(" #src "), %%xmm12\n\t"       \
05532     "vmovdqu    " #o3 "(" VAR(HTR) "), %%xmm0\n\t"    \
05533     "vpshufb    %[BSWAP_MASK], %%xmm12, %%xmm12\n\t"  \
05534     "vpxor  %%xmm14, %%xmm1, %%xmm1\n\t"          \
05535     "vpclmulqdq $0x10, %%xmm0, %%xmm12, %%xmm13\n\t"  \
05536     "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t"  \
05537     "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t"  \
05538     "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t"  \
05539     "vmovdqa    " #o1 "(%[KEY]), %%xmm0\n\t"          \
05540     "vpxor  %%xmm13, %%xmm1, %%xmm1\n\t"          \
05541     "vpxor  %%xmm12, %%xmm3, %%xmm3\n\t"          \
05542     "vaesenc    %%xmm0, %%xmm4, %%xmm4\n\t"           \
05543     "vaesenc    %%xmm0, %%xmm5, %%xmm5\n\t"           \
05544     "vaesenc    %%xmm0, %%xmm6, %%xmm6\n\t"           \
05545     "vaesenc    %%xmm0, %%xmm7, %%xmm7\n\t"           \
05546     "vaesenc    %%xmm0, %%xmm8, %%xmm8\n\t"           \
05547     "vaesenc    %%xmm0, %%xmm9, %%xmm9\n\t"           \
05548     "vaesenc    %%xmm0, %%xmm10, %%xmm10\n\t"         \
05549     "vaesenc    %%xmm0, %%xmm11, %%xmm11\n\t"         \
05550 
05551 #define VAESENC_PCLMUL_AVX2_N(src, o1, o2, o3)        \
05552     "vmovdqu    " #o2 "(" #src "), %%xmm12\n\t"       \
05553     "vmovdqu    " #o3 "(" VAR(HTR) "), %%xmm0\n\t"    \
05554     "vpshufb    %[BSWAP_MASK], %%xmm12, %%xmm12\n\t"  \
05555     "vpxor  %%xmm14, %%xmm1, %%xmm1\n\t"          \
05556     "vpxor  %%xmm15, %%xmm2, %%xmm2\n\t"          \
05557     "vpclmulqdq $0x10, %%xmm0, %%xmm12, %%xmm13\n\t"  \
05558     "vpclmulqdq $0x01, %%xmm0, %%xmm12, %%xmm14\n\t"  \
05559     "vpclmulqdq $0x00, %%xmm0, %%xmm12, %%xmm15\n\t"  \
05560     "vpclmulqdq $0x11, %%xmm0, %%xmm12, %%xmm12\n\t"  \
05561     "vmovdqa    " #o1 "(%[KEY]), %%xmm0\n\t"          \
05562     "vpxor  %%xmm13, %%xmm1, %%xmm1\n\t"          \
05563     "vpxor  %%xmm12, %%xmm3, %%xmm3\n\t"          \
05564     "vaesenc    %%xmm0, %%xmm4, %%xmm4\n\t"           \
05565     "vaesenc    %%xmm0, %%xmm5, %%xmm5\n\t"           \
05566     "vaesenc    %%xmm0, %%xmm6, %%xmm6\n\t"           \
05567     "vaesenc    %%xmm0, %%xmm7, %%xmm7\n\t"           \
05568     "vaesenc    %%xmm0, %%xmm8, %%xmm8\n\t"           \
05569     "vaesenc    %%xmm0, %%xmm9, %%xmm9\n\t"           \
05570     "vaesenc    %%xmm0, %%xmm10, %%xmm10\n\t"         \
05571     "vaesenc    %%xmm0, %%xmm11, %%xmm11\n\t"         \
05572 
05573 #define VAESENC_PCLMUL_AVX2_L(o)                      \
05574     "vpxor  %%xmm14, %%xmm1, %%xmm1\n\t"          \
05575     "vpxor  %%xmm15, %%xmm2, %%xmm2\n\t"          \
05576     "vpslldq    $8, %%xmm1, %%xmm12\n\t"              \
05577     "vpsrldq    $8, %%xmm1, %%xmm1\n\t"               \
05578     "vmovdqa    "#o"(%[KEY]), %%xmm15\n\t"            \
05579     "vmovdqa    %[MOD2_128], %%xmm0\n\t"              \
05580     "vaesenc    %%xmm15, %%xmm4, %%xmm4\n\t"          \
05581     "vpxor  %%xmm12, %%xmm2, %%xmm2\n\t"          \
05582     "vpxor  %%xmm1, %%xmm3, %%xmm3\n\t"           \
05583     "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm14\n\t"   \
05584     "vaesenc    %%xmm15, %%xmm5, %%xmm5\n\t"          \
05585     "vaesenc    %%xmm15, %%xmm6, %%xmm6\n\t"          \
05586     "vaesenc    %%xmm15, %%xmm7, %%xmm7\n\t"          \
05587     "vpshufd    $0x4e, %%xmm2, %%xmm2\n\t"            \
05588     "vpxor  %%xmm14, %%xmm2, %%xmm2\n\t"          \
05589     "vpclmulqdq $0x10, %%xmm0, %%xmm2, %%xmm14\n\t"   \
05590     "vaesenc    %%xmm15, %%xmm8, %%xmm8\n\t"          \
05591     "vaesenc    %%xmm15, %%xmm9, %%xmm9\n\t"          \
05592     "vaesenc    %%xmm15, %%xmm10, %%xmm10\n\t"        \
05593     "vpshufd    $0x4e, %%xmm2, %%xmm2\n\t"            \
05594     "vpxor  %%xmm14, %%xmm2, %%xmm2\n\t"          \
05595     "vpxor  %%xmm3, %%xmm2, %%xmm2\n\t"           \
05596     "vaesenc    %%xmm15, %%xmm11, %%xmm11\n\t"
05597 
05598 #define VAESENC_BLOCK_AVX2()                                  \
05599     "vmovdqu        " VAR(CTR1) ", %%xmm5\n\t"            \
05600     "vpshufb        %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"   \
05601     "vpaddd     %[ONE], %%xmm5, %%xmm5\n\t"           \
05602     "vmovdqu        %%xmm5, " VAR(CTR1) "\n\t"            \
05603     "vpxor         (%[KEY]), %%xmm4, %%xmm4\n\t"      \
05604     "vaesenc         16(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05605     "vaesenc         32(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05606     "vaesenc         48(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05607     "vaesenc         64(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05608     "vaesenc         80(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05609     "vaesenc         96(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05610     "vaesenc        112(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05611     "vaesenc        128(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05612     "vaesenc        144(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05613     "cmpl       $11, %[nr]\n\t"                       \
05614     "vmovdqa        160(%[KEY]), %%xmm5\n\t"              \
05615     "jl         %=f\n\t"                              \
05616     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"           \
05617     "vaesenc        176(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05618     "cmpl       $13, %[nr]\n\t"                       \
05619     "vmovdqa        192(%[KEY]), %%xmm5\n\t"              \
05620     "jl         %=f\n\t"                              \
05621     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"           \
05622     "vaesenc        208(%[KEY]), %%xmm4, %%xmm4\n\t"      \
05623     "vmovdqa        224(%[KEY]), %%xmm5\n\t"              \
05624     "%=:\n\t"                                                 \
05625     "vaesenclast    %%xmm5, %%xmm4, %%xmm4\n\t"           \
05626     "vmovdqu        (%[in]," VAR(KR64) ",1), %%xmm5\n\t"  \
05627     "vpxor      %%xmm5, %%xmm4, %%xmm4\n\t"           \
05628     "vmovdqu        %%xmm4, (%[out]," VAR(KR64) ",1)\n\t" \
05629     "vpshufb        %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"    \
05630     "vpxor      %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
05631 
05632 /* Karatsuba multiplication - slower
05633  * H01 = H[1] ^ H[0] (top and bottom 64-bits XORed)
05634  */
05635 #define _VAESENC_GFMUL_AVX2(in, H, X, ctr1, H01)            \
05636     "vpxor         (%[KEY]), %%xmm4, %%xmm4\n\t"    \
05637     "vaesenc         16(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05638     "vaesenc         32(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05639     "vaesenc         48(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05640     "vaesenc         64(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05641     "vaesenc         80(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05642     "vaesenc         96(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05643     "vaesenc        112(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05644     "vaesenc        128(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05645     "vaesenc        144(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05646     "cmpl       $11, %[nr]\n\t"                     \
05647     "vmovdqa        160(%[KEY]), %%xmm5\n\t"            \
05648     "jl         %=f\n\t"                            \
05649     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"         \
05650     "vaesenc        176(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05651     "cmpl       $13, %[nr]\n\t"                     \
05652     "vmovdqa        192(%[KEY]), %%xmm5\n\t"            \
05653     "jl         %=f\n\t"                            \
05654     "vaesenc        %%xmm5, %%xmm4, %%xmm4\n\t"         \
05655     "vaesenc        208(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05656     "vmovdqa        224(%[KEY]), %%xmm5\n\t"            \
05657     "%=:\n\t"                                               \
05658     "vaesenclast    %%xmm5, %%xmm4, %%xmm4\n\t"         \
05659     "vmovdqu        " #in ", %%xmm0\n\t"                \
05660     "vpxor      %%xmm0, %%xmm4, %%xmm4\n\t"         \
05661                                                             \
05662     "vpsrldq    $8, " #X ", %%xmm2\n\t"                     \
05663     "vpxor  " #X ", %%xmm2, %%xmm2\n\t"                 \
05664     "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t"          \
05665     "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t"          \
05666     "vpclmulqdq $0x00, "#H01", %%xmm2, %%xmm7\n\t"          \
05667     "vpxor  %%xmm5, %%xmm7, %%xmm7\n\t"                 \
05668     "vpxor  %%xmm8, %%xmm7, %%xmm7\n\t"                 \
05669     "vpslldq    $8, %%xmm7, %%xmm6\n\t"                     \
05670     "vpsrldq    $8, %%xmm7, %%xmm7\n\t"                     \
05671     "vpxor  %%xmm7, %%xmm8, %%xmm8\n\t"                 \
05672     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"                 \
05673                                                             \
05674     "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
05675     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"                  \
05676     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"                 \
05677     "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
05678     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"                  \
05679     "vpxor  %%xmm8, %%xmm6, %%xmm6\n\t"                 \
05680     "vpxor  %%xmm5, %%xmm6, " VAR(XR) "\n\t"
05681 #define VAESENC_GFMUL_AVX2(in, H, X, ctr1)                  \
05682        _VAESENC_GFMUL_AVX2(in, H, X, ctr1)
05683 
05684 #define _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1)              \
05685     "vpclmulqdq $0x10, " #H ", " #X ", %%xmm7\n\t"          \
05686     "vpclmulqdq $0x01, " #H ", " #X ", %%xmm6\n\t"          \
05687     "vpclmulqdq $0x00, " #H ", " #X ", %%xmm5\n\t"          \
05688     "vpclmulqdq $0x11, " #H ", " #X ", %%xmm8\n\t"          \
05689     "vpxor         (%[KEY]), %%xmm4, %%xmm4\n\t"    \
05690     "vaesenc         16(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05691     "vpxor  %%xmm6, %%xmm7, %%xmm7\n\t"                 \
05692     "vpslldq    $8, %%xmm7, %%xmm6\n\t"                     \
05693     "vpsrldq    $8, %%xmm7, %%xmm7\n\t"                     \
05694     "vaesenc         32(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05695     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"                 \
05696     "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
05697     "vaesenc         48(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05698     "vaesenc         64(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05699     "vaesenc         80(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05700     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"                  \
05701     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"                 \
05702     "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t"     \
05703     "vaesenc         96(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05704     "vaesenc        112(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05705     "vaesenc        128(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05706     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"                  \
05707     "vaesenc        144(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05708     "vpxor  %%xmm7, %%xmm8, %%xmm8\n\t"                 \
05709     "vpxor  %%xmm8, %%xmm6, %%xmm6\n\t"                 \
05710     "cmpl       $11, %[nr]\n\t"                     \
05711     "vmovdqa        160(%[KEY]), %%xmm3\n\t"            \
05712     "jl         %=f\n\t"                            \
05713     "vaesenc        %%xmm3, %%xmm4, %%xmm4\n\t"         \
05714     "vaesenc        176(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05715     "cmpl       $13, %[nr]\n\t"                     \
05716     "vmovdqa        192(%[KEY]), %%xmm3\n\t"            \
05717     "jl         %=f\n\t"                            \
05718     "vaesenc        %%xmm3, %%xmm4, %%xmm4\n\t"         \
05719     "vaesenc        208(%[KEY]), %%xmm4, %%xmm4\n\t"    \
05720     "vmovdqa        224(%[KEY]), %%xmm3\n\t"            \
05721     "%=:\n\t"                                               \
05722     "vaesenclast    %%xmm3, %%xmm4, %%xmm4\n\t"         \
05723     "vpxor  %%xmm5, %%xmm6, " VAR(XR) "\n\t"            \
05724     "vmovdqu        " #in ", %%xmm5\n\t"                \
05725     "vpxor      %%xmm5, %%xmm4, %%xmm4\n\t"
05726 #define VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1)               \
05727        _VAESENC_GFMUL_SB_AVX2(in, H, X, ctr1)
05728 
05729 
05730 #define _GHASH_GFMUL_AVX2(r, r2, a, b)         \
05731     "vpclmulqdq $0x10, "#a", "#b", %%xmm2\n\t" \
05732     "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \
05733     "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \
05734     "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \
05735     "vpxor  %%xmm1, %%xmm2, %%xmm2\n\t"    \
05736     "vpslldq    $8, %%xmm2, %%xmm1\n\t"        \
05737     "vpsrldq    $8, %%xmm2, %%xmm2\n\t"        \
05738     "vpxor  %%xmm1, %%xmm0, "#r2"\n\t"     \
05739     "vpxor  %%xmm2, %%xmm3, " #r "\n\t"
05740 #define GHASH_GFMUL_AVX2(r, r2, a, b)          \
05741        _GHASH_GFMUL_AVX2(r, r2, a, b)
05742 
05743 #define GHASH_MID_AVX2(r, r2)               \
05744     "vpsrld $31, "#r2", %%xmm0\n\t"     \
05745     "vpsrld $31, " #r ", %%xmm1\n\t"    \
05746     "vpslld $1, "#r2", "#r2"\n\t"       \
05747     "vpslld $1, " #r ", " #r "\n\t"     \
05748     "vpsrldq    $12, %%xmm0, %%xmm2\n\t"    \
05749     "vpslldq    $4, %%xmm0, %%xmm0\n\t"     \
05750     "vpslldq    $4, %%xmm1, %%xmm1\n\t"     \
05751     "vpor   %%xmm2, " #r ", " #r "\n\t" \
05752     "vpor   %%xmm0, "#r2", "#r2"\n\t"   \
05753     "vpor   %%xmm1, " #r ", " #r "\n\t"
05754 
05755 #define _GHASH_GFMUL_RED_AVX2(r, a, b)                  \
05756     "vpclmulqdq $0x10, "#a", "#b", %%xmm7\n\t"          \
05757     "vpclmulqdq $0x01, "#a", "#b", %%xmm6\n\t"          \
05758     "vpclmulqdq $0x00, "#a", "#b", %%xmm5\n\t"          \
05759     "vpxor  %%xmm6, %%xmm7, %%xmm7\n\t"             \
05760     "vpslldq    $8, %%xmm7, %%xmm6\n\t"                 \
05761     "vpsrldq    $8, %%xmm7, %%xmm7\n\t"                 \
05762     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"             \
05763     "vpclmulqdq $0x11, "#a", "#b", %%xmm8\n\t"          \
05764     "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \
05765     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"              \
05766     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"             \
05767     "vpclmulqdq $0x10, %[MOD2_128], %%xmm6, %%xmm5\n\t" \
05768     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"              \
05769     "vpxor  %%xmm7, %%xmm8, %%xmm8\n\t"             \
05770     "vpxor  %%xmm8, %%xmm6, %%xmm6\n\t"             \
05771     "vpxor  %%xmm5, %%xmm6, " #r "\n\t"
05772 #define GHASH_GFMUL_RED_AVX2(r, a, b)                   \
05773        _GHASH_GFMUL_RED_AVX2(r, a, b)
05774 
05775 #define _GHASH_GFSQR_RED2_AVX2(r, a, mod128)            \
05776     "vpclmulqdq $0x00, "#a", "#a", %%xmm6\n\t"          \
05777     "vpclmulqdq $0x11, "#a", "#a", %%xmm8\n\t"          \
05778     "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t"   \
05779     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"              \
05780     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"             \
05781     "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t"   \
05782     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"              \
05783     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"             \
05784     "vpxor  %%xmm6, %%xmm8, " #r "\n\t"
05785 #define GHASH_GFSQR_RED2_AVX2(r, a, mod128)             \
05786        _GHASH_GFSQR_RED2_AVX2(r, a, mod128)
05787 
05788 #define _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128) \
05789     "vpclmulqdq $0x10, "#a", "#b", %%xmm7\n\t"           \
05790     "vpclmulqdq $0x01, "#a", "#b", %%xmm6\n\t"           \
05791     "vpclmulqdq $0x00, "#a", "#b", %%xmm5\n\t"           \
05792     "vpclmulqdq $0x11, "#a", "#b", %%xmm8\n\t"           \
05793     "vpclmulqdq $0x00, "#b", "#b", %%xmm9\n\t"           \
05794     "vpclmulqdq $0x11, "#b", "#b", %%xmm10\n\t"          \
05795     "vpxor  %%xmm6, %%xmm7, %%xmm7\n\t"              \
05796     "vpslldq    $8, %%xmm7, %%xmm6\n\t"                  \
05797     "vpsrldq    $8, %%xmm7, %%xmm7\n\t"                  \
05798     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"              \
05799     "vpclmulqdq $0x10, "#mod128", %%xmm9, %%xmm4\n\t"    \
05800     "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t"    \
05801     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"               \
05802     "vpshufd    $0x4e, %%xmm9, %%xmm9\n\t"               \
05803     "vpxor  %%xmm5, %%xmm6, %%xmm6\n\t"              \
05804     "vpxor  %%xmm4, %%xmm9, %%xmm9\n\t"              \
05805     "vpclmulqdq $0x10, "#mod128", %%xmm6, %%xmm5\n\t"    \
05806     "vpclmulqdq $0x10, "#mod128", %%xmm9, %%xmm4\n\t"    \
05807     "vpshufd    $0x4e, %%xmm6, %%xmm6\n\t"               \
05808     "vpshufd    $0x4e, %%xmm9, %%xmm9\n\t"               \
05809     "vpxor  %%xmm7, %%xmm8, %%xmm8\n\t"              \
05810     "vpxor  %%xmm4, %%xmm9, %%xmm9\n\t"              \
05811     "vpxor  %%xmm8, %%xmm6, %%xmm6\n\t"              \
05812     "vpxor  %%xmm10, %%xmm9, "#rs"\n\t"              \
05813     "vpxor  %%xmm5, %%xmm6, "#rm"\n\t"
05814 #define GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128)  \
05815        _GHASH_GFMUL_SQR_RED2_AVX2(rm, rs, a, b, mod128)
05816 
05817 #define CALC_HT_8_AVX2()                                                \
05818     "vmovdqa    %[MOD2_128], %%xmm11\n\t"                               \
05819     "vmovdqa    " VAR(XR) ", %%xmm2\n\t"                                \
05820     "# H ^ 1 and H ^ 2\n\t"                                             \
05821     GHASH_GFSQR_RED2_AVX2(%%xmm0, HR, %%xmm11)                          \
05822     "vmovdqu    " VAR(HR) ", 0(" VAR(HTR) ")\n\t"                       \
05823     "vmovdqu    %%xmm0 ,  16(" VAR(HTR) ")\n\t"                         \
05824     "# H ^ 3 and H ^ 4\n\t"                                             \
05825     GHASH_GFMUL_SQR_RED2_AVX2(%%xmm1, %%xmm3, HR, %%xmm0, %%xmm11)      \
05826     "vmovdqu    %%xmm1 ,  32(" VAR(HTR) ")\n\t"                         \
05827     "vmovdqu    %%xmm3 ,  48(" VAR(HTR) ")\n\t"                         \
05828     "# H ^ 5 and H ^ 6\n\t"                                             \
05829     GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm0, %%xmm1, %%xmm11) \
05830     "vmovdqu    %%xmm12,  64(" VAR(HTR) ")\n\t"                         \
05831     "vmovdqu    %%xmm0 ,  80(" VAR(HTR) ")\n\t"                         \
05832     "# H ^ 7 and H ^ 8\n\t"                                             \
05833     GHASH_GFMUL_SQR_RED2_AVX2(%%xmm12, %%xmm0, %%xmm1, %%xmm3, %%xmm11) \
05834     "vmovdqu    %%xmm12,  96(" VAR(HTR) ")\n\t"                         \
05835     "vmovdqu    %%xmm0 , 112(" VAR(HTR) ")\n\t"
05836 
05837 #define _GHASH_RED_AVX2(r, r2)                     \
05838     "vmovdqa    %[MOD2_128], %%xmm2\n\t"           \
05839     "vpclmulqdq $0x10, %%xmm2, "#r2", %%xmm0\n\t"  \
05840     "vpshufd    $0x4e, "#r2", %%xmm1\n\t"          \
05841     "vpxor  %%xmm0, %%xmm1, %%xmm1\n\t"        \
05842     "vpclmulqdq $0x10, %%xmm2, %%xmm1, %%xmm0\n\t" \
05843     "vpshufd    $0x4e, %%xmm1, %%xmm1\n\t"         \
05844     "vpxor  %%xmm0, %%xmm1, %%xmm1\n\t"        \
05845     "vpxor  %%xmm1, " #r ", " #r "\n\t"
05846 #define GHASH_RED_AVX2(r, r2)                      \
05847        _GHASH_RED_AVX2(r, r2)
05848 
05849 #define GHASH_FULL_AVX2(r, r2, a, b) \
05850     GHASH_GFMUL_AVX2(r, r2, a, b)    \
05851     GHASH_MID_AVX2(r, r2)            \
05852     GHASH_RED_AVX2(r, r2)
05853 
05854 #define _GFMUL_3V_AVX2(r, r2, r3, a, b)        \
05855     "vpclmulqdq $0x10, "#a", "#b", "#r3"\n\t"  \
05856     "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \
05857     "vpclmulqdq $0x00, "#a", "#b", "#r2"\n\t"  \
05858     "vpclmulqdq $0x11, "#a", "#b", " #r "\n\t" \
05859     "vpxor  %%xmm1, "#r3", "#r3"\n\t"
05860 #define GFMUL_3V_AVX2(r, r2, r3, a, b)         \
05861        _GFMUL_3V_AVX2(r, r2, r3, a, b)
05862 
05863 #define _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b)    \
05864     "vpclmulqdq $0x10, "#a", "#b", %%xmm2\n\t" \
05865     "vpclmulqdq $0x01, "#a", "#b", %%xmm1\n\t" \
05866     "vpclmulqdq $0x00, "#a", "#b", %%xmm0\n\t" \
05867     "vpclmulqdq $0x11, "#a", "#b", %%xmm3\n\t" \
05868     "vpxor  %%xmm1, %%xmm2, %%xmm2\n\t"    \
05869     "vpxor  %%xmm3, " #r ", " #r "\n\t"    \
05870     "vpxor  %%xmm2, "#r3", "#r3"\n\t"      \
05871     "vpxor  %%xmm0, "#r2", "#r2"\n\t"
05872 #define GFMUL_XOR_3V_AVX2(r, r2, r3, a, b)     \
05873        _GFMUL_XOR_3V_AVX2(r, r2, r3, a, b)
05874 
05875 #define GHASH_GFMUL_RED_8_AVX2()                              \
05876     "vmovdqu       (" VAR(HTR) "), %%xmm12\n\t"               \
05877     GFMUL_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm11, %%xmm12)     \
05878     "vmovdqu     16(" VAR(HTR) "), %%xmm12\n\t"               \
05879     GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm10, %%xmm12) \
05880     "vmovdqu     32(" VAR(HTR) "), %%xmm11\n\t"               \
05881     "vmovdqu     48(" VAR(HTR) "), %%xmm12\n\t"               \
05882     GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm9, %%xmm11)  \
05883     GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm8, %%xmm12)  \
05884     "vmovdqu     64(" VAR(HTR) "), %%xmm11\n\t"               \
05885     "vmovdqu     80(" VAR(HTR) "), %%xmm12\n\t"               \
05886     GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm7, %%xmm11)  \
05887     GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm6, %%xmm12)  \
05888     "vmovdqu     96(" VAR(HTR) "), %%xmm11\n\t"               \
05889     "vmovdqu    112(" VAR(HTR) "), %%xmm12\n\t"               \
05890     GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm5, %%xmm11)  \
05891     GFMUL_XOR_3V_AVX2(XR, %%xmm13, %%xmm14, %%xmm4, %%xmm12)  \
05892     "vpslldq    $8, %%xmm14, %%xmm12\n\t"                     \
05893     "vpsrldq    $8, %%xmm14, %%xmm14\n\t"                     \
05894     "vpxor  %%xmm12, %%xmm13, %%xmm13\n\t"                \
05895     "vpxor  %%xmm14, " VAR(XR) ", " VAR(XR) "\n\t"        \
05896     GHASH_RED_AVX2(XR, %%xmm13)
05897 
05898 #define CALC_IV_12_AVX2()                                            \
05899     "# Calculate values when IV is 12 bytes\n\t"                     \
05900     "# Set counter based on IV\n\t"                                  \
05901     "movl       $0x01000000, %%ecx\n\t"                      \
05902     "vpinsrq        $0, 0(%%rax), %%xmm13, %%xmm13\n\t"          \
05903     "vpinsrd        $2, 8(%%rax), %%xmm13, %%xmm13\n\t"          \
05904     "vpinsrd        $3, %%ecx, %%xmm13, %%xmm13\n\t"             \
05905     "# H = Encrypt X(=0) and T = Encrypt counter\n\t"                \
05906     "vmovdqa          0(%[KEY]), " VAR(HR) "\n\t"                \
05907     "vmovdqa         16(%[KEY]), %%xmm12\n\t"                    \
05908     "vpxor      " VAR(HR) ", %%xmm13, %%xmm1\n\t"            \
05909     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05910     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05911     "vmovdqa         32(%[KEY]), %%xmm0\n\t"                     \
05912     "vmovdqa         48(%[KEY]), %%xmm12\n\t"                    \
05913     "vaesenc        %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
05914     "vaesenc        %%xmm0, %%xmm1, %%xmm1\n\t"                  \
05915     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05916     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05917     "vmovdqa         64(%[KEY]), %%xmm0\n\t"                     \
05918     "vmovdqa         80(%[KEY]), %%xmm12\n\t"                    \
05919     "vaesenc        %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
05920     "vaesenc        %%xmm0, %%xmm1, %%xmm1\n\t"                  \
05921     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05922     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05923     "vmovdqa         96(%[KEY]), %%xmm0\n\t"                     \
05924     "vmovdqa        112(%[KEY]), %%xmm12\n\t"                    \
05925     "vaesenc        %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
05926     "vaesenc        %%xmm0, %%xmm1, %%xmm1\n\t"                  \
05927     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05928     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05929     "vmovdqa        128(%[KEY]), %%xmm0\n\t"                     \
05930     "vmovdqa        144(%[KEY]), %%xmm12\n\t"                    \
05931     "vaesenc        %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
05932     "vaesenc        %%xmm0, %%xmm1, %%xmm1\n\t"                  \
05933     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05934     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05935     "cmpl       $11, %[nr]\n\t"                              \
05936     "vmovdqa        160(%[KEY]), %%xmm0\n\t"                     \
05937     "jl 31f\n\t"                                                     \
05938     "vmovdqa        176(%[KEY]), %%xmm12\n\t"                    \
05939     "vaesenc        %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
05940     "vaesenc        %%xmm0, %%xmm1, %%xmm1\n\t"                  \
05941     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05942     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05943     "cmpl       $13, %[nr]\n\t"                              \
05944     "vmovdqa        192(%[KEY]), %%xmm0\n\t"                     \
05945     "jl 31f\n\t"                                                     \
05946     "vmovdqa        208(%[KEY]), %%xmm12\n\t"                    \
05947     "vaesenc        %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
05948     "vaesenc        %%xmm0, %%xmm1, %%xmm1\n\t"                  \
05949     "vaesenc        %%xmm12, " VAR(HR) ", " VAR(HR) "\n\t"       \
05950     "vaesenc        %%xmm12, %%xmm1, %%xmm1\n\t"                 \
05951     "vmovdqu        224(%[KEY]), %%xmm0\n\t"                     \
05952     "31:\n\t"                                                        \
05953     "vaesenclast    %%xmm0, " VAR(HR) ", " VAR(HR) "\n\t"        \
05954     "vaesenclast    %%xmm0, %%xmm1, %%xmm1\n\t"                  \
05955     "vpshufb        %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
05956     "vmovdqu        %%xmm1, " VAR(TR) "\n\t"                     \
05957 
05958 #define CALC_IV_AVX2()                                       \
05959     "# Calculate values when IV is not 12 bytes\n\t"         \
05960     "# H = Encrypt X(=0)\n\t"                                \
05961     "vmovdqa    0(%[KEY]), " VAR(HR) "\n\t"                  \
05962     VAESENC_AVX(HR)                                          \
05963     "vpshufb    %[BSWAP_MASK], " VAR(HR) ", " VAR(HR) "\n\t" \
05964     "# Calc counter\n\t"                                     \
05965     "# Initialization vector\n\t"                            \
05966     "cmpl   $0, %%edx\n\t"                               \
05967     "movq   $0, %%rcx\n\t"                               \
05968     "je 45f\n\t"                                             \
05969     "cmpl   $16, %%edx\n\t"                              \
05970     "jl 44f\n\t"                                             \
05971     "andl   $0xfffffff0, %%edx\n\t"                      \
05972     "\n"                                                     \
05973     "43:\n\t"                                                \
05974     "vmovdqu    (%%rax,%%rcx,1), %%xmm4\n\t"                 \
05975     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
05976     "vpxor  %%xmm4, %%xmm13, %%xmm13\n\t"                \
05977     GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR)           \
05978     "addl   $16, %%ecx\n\t"                              \
05979     "cmpl   %%edx, %%ecx\n\t"                            \
05980     "jl 43b\n\t"                                             \
05981     "movl   %[ibytes], %%edx\n\t"                        \
05982     "cmpl   %%edx, %%ecx\n\t"                            \
05983     "je 45f\n\t"                                             \
05984     "\n"                                                     \
05985     "44:\n\t"                                                \
05986     "subq   $16, %%rsp\n\t"                              \
05987     "vpxor  %%xmm4, %%xmm4, %%xmm4\n\t"                  \
05988     "xorl   %%ebx, %%ebx\n\t"                            \
05989     "vmovdqu    %%xmm4, (%%rsp)\n\t"                         \
05990     "42:\n\t"                                                \
05991     "movzbl (%%rax,%%rcx,1), %%r13d\n\t"                 \
05992     "movb   %%r13b, (%%rsp,%%rbx,1)\n\t"                 \
05993     "incl   %%ecx\n\t"                                   \
05994     "incl   %%ebx\n\t"                                   \
05995     "cmpl   %%edx, %%ecx\n\t"                            \
05996     "jl 42b\n\t"                                             \
05997     "vmovdqu    (%%rsp), %%xmm4\n\t"                         \
05998     "addq   $16, %%rsp\n\t"                              \
05999     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"           \
06000     "vpxor  %%xmm4, %%xmm13, %%xmm13\n\t"                \
06001     GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR)           \
06002     "\n"                                                     \
06003     "45:\n\t"                                                \
06004     "# T = Encrypt counter\n\t"                              \
06005     "vpxor  %%xmm0, %%xmm0, %%xmm0\n\t"                  \
06006     "shll   $3, %%edx\n\t"                               \
06007     "vpinsrq    $0, %%rdx, %%xmm0, %%xmm0\n\t"               \
06008     "vpxor  %%xmm0, %%xmm13, %%xmm13\n\t"                \
06009     GHASH_FULL_AVX2(%%xmm13, %%xmm12, %%xmm13, HR)           \
06010     "vpshufb    %[BSWAP_MASK], %%xmm13, %%xmm13\n\t"         \
06011     "#   Encrypt counter\n\t"                                \
06012     "vmovdqa    0(%[KEY]), %%xmm4\n\t"                       \
06013     "vpxor  %%xmm13, %%xmm4, %%xmm4\n\t"                 \
06014     VAESENC_AVX(%%xmm4)                                      \
06015     "vmovdqu    %%xmm4, " VAR(TR) "\n\t"
06016 
06017 #define CALC_AAD_AVX2()                                \
06018     "# Additional authentication data\n\t"             \
06019     "movl   %[abytes], %%edx\n\t"                  \
06020     "cmpl   $0, %%edx\n\t"                         \
06021     "je     25f\n\t"                               \
06022     "movq   %[addt], %%rax\n\t"                    \
06023     "xorl   %%ecx, %%ecx\n\t"                      \
06024     "cmpl   $16, %%edx\n\t"                        \
06025     "jl     24f\n\t"                               \
06026     "andl   $0xfffffff0, %%edx\n\t"                \
06027     "\n"                                               \
06028     "23:\n\t"                                          \
06029     "vmovdqu    (%%rax,%%rcx,1), %%xmm4\n\t"           \
06030     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
06031     "vpxor  %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
06032     GHASH_FULL_AVX2(XR, %%xmm12, XR, HR)               \
06033     "addl   $16, %%ecx\n\t"                        \
06034     "cmpl   %%edx, %%ecx\n\t"                      \
06035     "jl     23b\n\t"                               \
06036     "movl   %[abytes], %%edx\n\t"                  \
06037     "cmpl   %%edx, %%ecx\n\t"                      \
06038     "je     25f\n\t"                               \
06039     "\n"                                               \
06040     "24:\n\t"                                          \
06041     "subq   $16, %%rsp\n\t"                        \
06042     "vpxor  %%xmm4, %%xmm4, %%xmm4\n\t"            \
06043     "xorl   %%ebx, %%ebx\n\t"                      \
06044     "vmovdqu    %%xmm4, (%%rsp)\n\t"                   \
06045     "22:\n\t"                                          \
06046     "movzbl (%%rax,%%rcx,1), %%r13d\n\t"           \
06047     "movb   %%r13b, (%%rsp,%%rbx,1)\n\t"           \
06048     "incl   %%ecx\n\t"                             \
06049     "incl   %%ebx\n\t"                             \
06050     "cmpl   %%edx, %%ecx\n\t"                      \
06051     "jl     22b\n\t"                               \
06052     "vmovdqu    (%%rsp), %%xmm4\n\t"                   \
06053     "addq   $16, %%rsp\n\t"                        \
06054     "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"     \
06055     "vpxor  %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"  \
06056     GHASH_FULL_AVX2(XR, %%xmm12, XR, HR)               \
06057     "\n"                                               \
06058     "25:\n\t"
06059 
06060 #define VAESENC_128_GHASH_AVX2(src, o)               \
06061     "leaq   (%[in]," VAR(KR64) ",1), %%rcx\n\t"  \
06062     "leaq   (%[out]," VAR(KR64) ",1), %%rdx\n\t" \
06063     /* src is either %%rcx or %%rdx */             \
06064     VAESENC_CTR()                                  \
06065     VAESENC_XOR()                                  \
06066     VAESENC_PCLMUL_AVX2_1(src,  16, (o-128), 112)  \
06067     VAESENC_PCLMUL_AVX2_2(src,  32, (o-112),  96)  \
06068     VAESENC_PCLMUL_AVX2_N(src,  48, (o- 96),  80)  \
06069     VAESENC_PCLMUL_AVX2_N(src,  64, (o- 80),  64)  \
06070     VAESENC_PCLMUL_AVX2_N(src,  80, (o- 64),  48)  \
06071     VAESENC_PCLMUL_AVX2_N(src,  96, (o- 48),  32)  \
06072     VAESENC_PCLMUL_AVX2_N(src, 112, (o- 32),  16)  \
06073     VAESENC_PCLMUL_AVX2_N(src, 128, (o- 16),   0)  \
06074     VAESENC_PCLMUL_AVX2_L(144)                     \
06075     "cmpl   $11, %[nr]\n\t"                    \
06076     "vmovdqa    160(%[KEY]), %%xmm12\n\t"          \
06077     "jl     4f\n\t"                            \
06078     VAESENC()                                      \
06079     VAESENC_SET(176)                               \
06080     "cmpl   $13, %[nr]\n\t"                    \
06081     "vmovdqa    192(%[KEY]), %%xmm12\n\t"          \
06082     "jl     4f\n\t"                            \
06083     VAESENC()                                      \
06084     VAESENC_SET(208)                               \
06085     "vmovdqa    224(%[KEY]), %%xmm12\n\t"          \
06086     "\n"                                           \
06087 "4:\n\t"                                           \
06088     VAESENC_LAST(%%rcx, %%rdx)
06089 
06090 #define AESENC_LAST15_ENC_AVX2()                        \
06091     "movl   %[nbytes], %%ecx\n\t"                   \
06092     "movl   %%ecx, %%edx\n\t"                       \
06093     "andl   $0x0f, %%ecx\n\t"                       \
06094     "jz     55f\n\t"                                \
06095     "vmovdqu    " VAR(CTR1) ", %%xmm13\n\t"             \
06096     "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
06097     "vpxor  0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
06098     VAESENC_AVX(%%xmm13)                                \
06099     "subq   $16, %%rsp\n\t"                         \
06100     "xorl   %%ecx, %%ecx\n\t"                       \
06101     "vmovdqu    %%xmm13, (%%rsp)\n\t"                   \
06102     "\n"                                                \
06103     "51:\n\t"                                           \
06104     "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
06105     "xorb   (%%rsp,%%rcx,1), %%r13b\n\t"            \
06106     "movb   %%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
06107     "movb   %%r13b, (%%rsp,%%rcx,1)\n\t"            \
06108     "incl   " VAR(KR) "\n\t"                        \
06109     "incl   %%ecx\n\t"                              \
06110     "cmpl   %%edx, " VAR(KR) "\n\t"                 \
06111     "jl     51b\n\t"                                \
06112     "xorq   %%r13, %%r13\n\t"                       \
06113     "cmpl   $16, %%ecx\n\t"                         \
06114     "je     53f\n\t"                                \
06115     "\n"                                                \
06116     "52:\n\t"                                           \
06117     "movb   %%r13b, (%%rsp,%%rcx,1)\n\t"            \
06118     "incl   %%ecx\n\t"                              \
06119     "cmpl   $16, %%ecx\n\t"                         \
06120     "jl     52b\n\t"                                \
06121     "53:\n\t"                                           \
06122     "vmovdqu    (%%rsp), %%xmm13\n\t"                   \
06123     "addq   $16, %%rsp\n\t"                         \
06124     "vpshufb    %[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
06125     "vpxor  %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
06126     GHASH_GFMUL_RED_AVX2(XR, HR, XR)                    \
06127 
06128 #define AESENC_LAST15_DEC_AVX2()                        \
06129     "movl   %[nbytes], %%ecx\n\t"                   \
06130     "movl   %%ecx, %%edx\n\t"                       \
06131     "andl   $0x0f, %%ecx\n\t"                       \
06132     "jz     55f\n\t"                                \
06133     "vmovdqu    " VAR(CTR1) ", %%xmm13\n\t"             \
06134     "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"   \
06135     "vpxor  0(%[KEY]), %%xmm13, %%xmm13\n\t"        \
06136     VAESENC_AVX(%%xmm13)                                \
06137     "subq   $32, %%rsp\n\t"                         \
06138     "xorl   %%ecx, %%ecx\n\t"                       \
06139     "vmovdqu    %%xmm13, (%%rsp)\n\t"                   \
06140     "vpxor  %%xmm0, %%xmm0, %%xmm0\n\t"             \
06141     "vmovdqu    %%xmm0, 16(%%rsp)\n\t"                  \
06142     "\n"                                                \
06143     "51:\n\t"                                           \
06144     "movzbl (%[in]," VAR(KR64) ",1), %%r13d\n\t"    \
06145     "movb   %%r13b, 16(%%rsp,%%rcx,1)\n\t"          \
06146     "xorb   (%%rsp,%%rcx,1), %%r13b\n\t"            \
06147     "movb   %%r13b, (%[out]," VAR(KR64) ",1)\n\t"   \
06148     "incl   " VAR(KR) "\n\t"                        \
06149     "incl   %%ecx\n\t"                              \
06150     "cmpl   %%edx, " VAR(KR) "\n\t"                 \
06151     "jl     51b\n\t"                                \
06152     "53:\n\t"                                           \
06153     "vmovdqu    16(%%rsp), %%xmm13\n\t"                 \
06154     "addq   $32, %%rsp\n\t"                         \
06155     "vpshufb    %[BSWAP_MASK], %%xmm13, %%xmm13\n\t"    \
06156     "vpxor  %%xmm13, " VAR(XR) ", " VAR(XR) "\n\t"  \
06157     GHASH_GFMUL_RED_AVX2(XR, HR, XR)                    \
06158 
06159 #define CALC_TAG_AVX2()                                      \
06160     "movl   %[nbytes], %%edx\n\t"                        \
06161     "movl   %[abytes], %%ecx\n\t"                        \
06162     "shlq   $3, %%rdx\n\t"                               \
06163     "shlq   $3, %%rcx\n\t"                               \
06164     "vpinsrq    $0, %%rdx, %%xmm0, %%xmm0\n\t"               \
06165     "vpinsrq    $1, %%rcx, %%xmm0, %%xmm0\n\t"               \
06166     "vpxor  %%xmm0, " VAR(XR) ", " VAR(XR) "\n\t"        \
06167     GHASH_GFMUL_RED_AVX2(XR, HR, XR)                         \
06168     "vpshufb    %[BSWAP_MASK], " VAR(XR) ", " VAR(XR) "\n\t" \
06169     "vpxor  " VAR(TR) ", " VAR(XR) ", %%xmm0\n\t"        \
06170 
06171 
06172 static void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out,
06173                                  const unsigned char* addt,
06174                                  const unsigned char* ivec, unsigned char *tag,
06175                                  unsigned int nbytes, unsigned int abytes,
06176                                  unsigned int ibytes, unsigned int tbytes,
06177                                  const unsigned char* key, int nr)
06178 {
06179     register const unsigned char* iv asm("rax") = ivec;
06180     register unsigned int ivLen asm("ebx") = ibytes;
06181 
06182     __asm__ __volatile__ (
06183         "subq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06184         /* Counter is xmm13 */
06185         "vpxor      %%xmm13, %%xmm13, %%xmm13\n\t"
06186         "vpxor      " VAR(XR) ", " VAR(XR) ", " VAR(XR) "\n\t"
06187         "movl       %[ibytes], %%edx\n\t"
06188         "cmpl       $12, %%edx\n\t"
06189         "jne        35f\n\t"
06190         CALC_IV_12_AVX2()
06191         "jmp        39f\n\t"
06192         "\n"
06193         "35:\n\t"
06194         CALC_IV_AVX2()
06195         "\n"
06196         "39:\n\t"
06197 
06198         CALC_AAD_AVX2()
06199 
06200         "# Calculate counter and H\n\t"
06201         "vpsrlq     $63, " VAR(HR) ", %%xmm5\n\t"
06202         "vpsllq     $1, " VAR(HR) ", %%xmm4\n\t"
06203         "vpslldq    $8, %%xmm5, %%xmm5\n\t"
06204         "vpor       %%xmm5, %%xmm4, %%xmm4\n\t"
06205         "vpshufd    $0xff, " VAR(HR) ", " VAR(HR) "\n\t"
06206         "vpsrad     $31, " VAR(HR) ", " VAR(HR) "\n\t"
06207         "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
06208         "vpand      %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
06209         "vpaddd     %[ONE], %%xmm13, %%xmm13\n\t"
06210         "vpxor      %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
06211         "vmovdqu    %%xmm13, " VAR(CTR1) "\n\t"
06212 
06213         "xorl       " VAR(KR) ", " VAR(KR) "\n\t"
06214 
06215 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
06216         "cmpl   $128, %[nbytes]\n\t"
06217         "movl   %[nbytes], %%r13d\n\t"
06218         "jl 5f\n\t"
06219         "andl   $0xffffff80, %%r13d\n\t"
06220 
06221         CALC_HT_8_AVX2()
06222 
06223         "# First 128 bytes of input\n\t"
06224         VAESENC_128()
06225 
06226         "cmpl   $128, %%r13d\n\t"
06227         "movl   $128, " VAR(KR) "\n\t"
06228         "jle    2f\n\t"
06229 
06230         "# More 128 bytes of input\n\t"
06231         "\n"
06232     "3:\n\t"
06233         VAESENC_128_GHASH_AVX2(%%rdx, 0)
06234         "addl   $128, " VAR(KR) "\n\t"
06235         "cmpl   %%r13d, " VAR(KR) "\n\t"
06236         "jl 3b\n\t"
06237         "\n"
06238     "2:\n\t"
06239         "vmovdqa    %[BSWAP_MASK], %%xmm13\n\t"
06240         "vpshufb    %%xmm13, %%xmm4, %%xmm4\n\t"
06241         "vpshufb    %%xmm13, %%xmm5, %%xmm5\n\t"
06242         "vpshufb    %%xmm13, %%xmm6, %%xmm6\n\t"
06243         "vpshufb    %%xmm13, %%xmm7, %%xmm7\n\t"
06244         "vpshufb    %%xmm13, %%xmm8, %%xmm8\n\t"
06245         "vpshufb    %%xmm13, %%xmm9, %%xmm9\n\t"
06246         "vpshufb    %%xmm13, %%xmm10, %%xmm10\n\t"
06247         "vpshufb    %%xmm13, %%xmm11, %%xmm11\n\t"
06248         "vpxor      %%xmm2, %%xmm4, %%xmm4\n\t"
06249 
06250         GHASH_GFMUL_RED_8_AVX2()
06251 
06252         "vmovdqu    0(" VAR(HTR) "), " VAR(HR) "\n\t"
06253         "\n"
06254     "5:\n\t"
06255         "movl       %[nbytes], %%edx\n\t"
06256         "cmpl       %%edx, " VAR(KR) "\n\t"
06257         "jge        55f\n\t"
06258 #endif
06259 
06260         "movl       %[nbytes], %%r13d\n\t"
06261         "andl       $0xfffffff0, %%r13d\n\t"
06262         "cmpl       %%r13d, " VAR(KR) "\n\t"
06263         "jge        14f\n\t"
06264 
06265         VAESENC_BLOCK_AVX2()
06266         "addl       $16, " VAR(KR) "\n\t"
06267         "cmpl       %%r13d, " VAR(KR) "\n\t"
06268         "jge        13f\n\t"
06269         "vmovdqa    %[MOD2_128], %%xmm0\n\t"
06270         "\n"
06271         "12:\n\t"
06272         "vmovdqu    (%[in]," VAR(KR64) ",1), %%xmm9\n\t"
06273         "vmovdqu    " VAR(CTR1) ", %%xmm5\n\t"
06274         "vpshufb    %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"
06275         "vpaddd     %[ONE], %%xmm5, %%xmm5\n\t"
06276         "vmovdqu    %%xmm5, " VAR(CTR1) "\n\t"
06277         VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, XR, CTR1)
06278         "vmovdqu    %%xmm4, (%[out]," VAR(KR64) ",1)\n\t"
06279         "vpshufb    %[BSWAP_MASK], %%xmm4, %%xmm4\n\t"
06280         "addl       $16, " VAR(KR) "\n\t"
06281         "vpxor      %%xmm4, " VAR(XR) ", " VAR(XR) "\n\t"
06282         "cmpl       %%r13d, " VAR(KR) "\n\t"
06283         "jl     12b\n\t"
06284         "\n"
06285         "13:\n\t"
06286         GHASH_GFMUL_RED_AVX2(XR, HR, XR)
06287         "\n"
06288         "14:\n\t"
06289 
06290         AESENC_LAST15_ENC_AVX2()
06291         "\n"
06292         "55:\n\t"
06293 
06294         CALC_TAG_AVX2()
06295         STORE_TAG_AVX()
06296         "addq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06297         "vzeroupper\n\t"
06298 
06299         :
06300         : [KEY] "r" (key),
06301           [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
06302           [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
06303           [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tbytes),
06304           [tag] "r" (tag),
06305           [BSWAP_MASK] "m" (BSWAP_MASK),
06306           [BSWAP_EPI64] "m" (BSWAP_EPI64),
06307           [ONE] "m" (ONE),
06308 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
06309           [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
06310           [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
06311           [EIGHT] "m" (EIGHT),
06312 #endif
06313           [MOD2_128] "m" (MOD2_128)
06314         : "xmm15", "xmm14", "xmm13", "xmm12",
06315           "xmm0", "xmm1", "xmm2", "xmm3", "memory",
06316           "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
06317           "rcx", "rdx", "r13"
06318     );
06319 }
06320 #endif /* HAVE_INTEL_AVX2 */
06321 #endif /* HAVE_INTEL_AVX1 */
06322 
06323 #ifdef HAVE_AES_DECRYPT
06324 /* Figure 10. AES-GCM – Decrypt With Single Block Ghash at a Time */
06325 
06326 static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out,
06327                             const unsigned char* addt,
06328                             const unsigned char* ivec, const unsigned char *tag,
06329                             int nbytes, int abytes, int ibytes, int tbytes,
06330                             const unsigned char* key, int nr, int* res)
06331 {
06332     register const unsigned char* iv asm("rax") = ivec;
06333     register int ivLen asm("ebx") = ibytes;
06334     register int tagLen asm("edx") = tbytes;
06335 
06336     __asm__ __volatile__ (
06337         "pushq      %%rdx\n\t"
06338         "subq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06339         /* Counter is xmm13 */
06340         "pxor       %%xmm13, %%xmm13\n\t"
06341         "pxor       %%xmm15, %%xmm15\n\t"
06342         "movl       %[ibytes], %%edx\n\t"
06343         "cmpl       $12, %%edx\n\t"
06344         "jne        35f\n\t"
06345         CALC_IV_12()
06346         "\n"
06347         "35:\n\t"
06348         CALC_IV()
06349         "\n"
06350         "39:\n\t"
06351 
06352         CALC_AAD()
06353 
06354         "# Calculate counter and H\n\t"
06355         "pshufb     %[BSWAP_EPI64], %%xmm13\n\t"
06356         "movdqa     " VAR(HR) ", %%xmm5\n\t"
06357         "paddd      %[ONE], %%xmm13\n\t"
06358         "movdqa     " VAR(HR) ", %%xmm4\n\t"
06359         "movdqu     %%xmm13, " VAR(CTR1) "\n\t"
06360         "psrlq      $63, %%xmm5\n\t"
06361         "psllq      $1, %%xmm4\n\t"
06362         "pslldq     $8, %%xmm5\n\t"
06363         "por        %%xmm5, %%xmm4\n\t"
06364         "pshufd     $0xff, " VAR(HR) ", " VAR(HR) "\n\t"
06365         "psrad      $31, " VAR(HR) "\n\t"
06366         "pand       %[MOD2_128], " VAR(HR) "\n\t"
06367         "pxor       %%xmm4, " VAR(HR) "\n\t"
06368 
06369         "xorl       " VAR(KR) ", " VAR(KR) "\n\t"
06370 
06371 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
06372         "cmpl       $128, %[nbytes]\n\t"
06373         "jl     5f\n\t"
06374 
06375         CALC_HT_8_AVX()
06376 
06377         "movl       %[nbytes], %%r13d\n\t"
06378         "andl       $0xffffff80, %%r13d\n\t"
06379         "\n"
06380         "2:\n\t"
06381         AESENC_128_GHASH_AVX(%%rcx, 128)
06382         "addl       $128, " VAR(KR) "\n\t"
06383         "cmpl       %%r13d, " VAR(KR) "\n\t"
06384         "jl     2b\n\t"
06385 
06386         "movdqa     %%xmm2, " VAR(XR) "\n\t"
06387         "movdqu     (%%rsp), " VAR(HR) "\n\t"
06388     "5:\n\t"
06389         "movl       %[nbytes], %%edx\n\t"
06390         "cmpl       %%edx, " VAR(KR) "\n\t"
06391         "jge        55f\n\t"
06392 #endif
06393         "movl       %[nbytes], %%r13d\n\t"
06394         "andl       $0xfffffff0, %%r13d\n\t"
06395         "cmpl       %%r13d, " VAR(KR) "\n\t"
06396         "jge        13f\n\t"
06397 
06398         "\n"
06399         "12:\n\t"
06400         "leaq       (%[in]," VAR(KR64) ",1), %%rcx\n\t"
06401         "leaq       (%[out]," VAR(KR64) ",1), %%rdx\n\t"
06402         "movdqu     (%%rcx), %%xmm1\n\t"
06403         "movdqa     " VAR(HR) ", %%xmm0\n\t"
06404         "pshufb     %[BSWAP_MASK], %%xmm1\n\t"
06405         "pxor       " VAR(XR) ", %%xmm1\n\t"
06406         AESENC_GFMUL(%%rcx, %%rdx, %%xmm0, %%xmm1)
06407         "addl       $16, " VAR(KR) "\n\t"
06408         "cmpl       %%r13d, " VAR(KR) "\n\t"
06409         "jl     12b\n\t"
06410         "\n"
06411         "13:\n\t"
06412 
06413         AESENC_LAST15_DEC_AVX()
06414         "\n"
06415         "55:\n\t"
06416 
06417         CALC_TAG()
06418         "addq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06419         "popq       %%rdx\n\t"
06420         CMP_TAG()
06421 
06422         :
06423         : [KEY] "r" (key),
06424           [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
06425           [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
06426           [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen),
06427           [tag] "r" (tag), [res] "r" (res),
06428           [BSWAP_MASK] "m" (BSWAP_MASK),
06429           [BSWAP_EPI64] "m" (BSWAP_EPI64),
06430           [ONE] "m" (ONE),
06431 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
06432           [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
06433           [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
06434           [EIGHT] "m" (EIGHT),
06435 #endif
06436           [MOD2_128] "m" (MOD2_128)
06437         : "xmm15", "xmm14", "xmm13", "xmm12",
06438           "xmm0", "xmm1", "xmm2", "xmm3", "memory",
06439           "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
06440           "rcx", "r13"
06441     );
06442 }
06443 
06444 #ifdef HAVE_INTEL_AVX1
06445 static void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out,
06446                                  const unsigned char* addt,
06447                                  const unsigned char* ivec,
06448                                  const unsigned char *tag, int nbytes,
06449                                  int abytes, int ibytes, int tbytes,
06450                                  const unsigned char* key, int nr, int* res)
06451 {
06452     register const unsigned char* iv asm("rax") = ivec;
06453     register int ivLen asm("ebx") = ibytes;
06454     register int tagLen asm("edx") = tbytes;
06455 
06456     __asm__ __volatile__ (
06457         "pushq      %%rdx\n\t"
06458         "subq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06459         /* Counter is xmm13 */
06460         "vpxor      %%xmm13, %%xmm13, %%xmm13\n\t"
06461         "vpxor      %%xmm15, %%xmm15, %%xmm15\n\t"
06462         "movl       %[ibytes], %%edx\n\t"
06463         "cmpl       $12, %%edx\n\t"
06464         "jne        35f\n\t"
06465         CALC_IV_12_AVX1()
06466         "\n"
06467         "35:\n\t"
06468         CALC_IV_AVX1()
06469         "\n"
06470         "39:\n\t"
06471 
06472         CALC_AAD_AVX1()
06473 
06474         "# Calculate counter and H\n\t"
06475         "vpsrlq     $63, " VAR(HR) ", %%xmm5\n\t"
06476         "vpsllq     $1, " VAR(HR) ", %%xmm4\n\t"
06477         "vpslldq    $8, %%xmm5, %%xmm5\n\t"
06478         "vpor       %%xmm5, %%xmm4, %%xmm4\n\t"
06479         "vpshufd    $0xff, " VAR(HR) ", " VAR(HR) "\n\t"
06480         "vpsrad     $31, " VAR(HR) ", " VAR(HR) "\n\t"
06481         "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
06482         "vpand      %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
06483         "vpaddd     %[ONE], %%xmm13, %%xmm13\n\t"
06484         "vpxor      %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
06485         "vmovdqu    %%xmm13, " VAR(CTR1) "\n\t"
06486 
06487         "xorl       " VAR(KR) ", " VAR(KR) "\n\t"
06488 
06489 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
06490         "cmpl       $128, %[nbytes]\n\t"
06491         "jl     5f\n\t"
06492 
06493         CALC_HT_8_AVX1()
06494 
06495         "movl       %[nbytes], %%r13d\n\t"
06496         "andl       $0xffffff80, %%r13d\n\t"
06497         "\n"
06498         "2:\n\t"
06499         VAESENC_128_GHASH_AVX1(%%rcx, 128)
06500         "addl       $128, " VAR(KR) "\n\t"
06501         "cmpl       %%r13d, " VAR(KR) "\n\t"
06502         "jl     2b\n\t"
06503 
06504         "vmovdqa    %%xmm2, " VAR(XR) "\n\t"
06505         "vmovdqu    (%%rsp), " VAR(HR) "\n\t"
06506     "5:\n\t"
06507         "movl       %[nbytes], %%edx\n\t"
06508         "cmpl       %%edx, " VAR(KR) "\n\t"
06509         "jge        55f\n\t"
06510 #endif
06511         "movl       %[nbytes], %%r13d\n\t"
06512         "andl       $0xfffffff0, %%r13d\n\t"
06513         "cmpl       %%r13d, " VAR(KR) "\n\t"
06514         "jge        13f\n\t"
06515 
06516         "\n"
06517         "12:\n\t"
06518         "vmovdqu    (%[in]," VAR(KR64) ",1), %%xmm9\n\t"
06519         "vmovdqa    " VAR(HR) ", %%xmm0\n\t"
06520         "vpshufb    %[BSWAP_MASK], %%xmm9, %%xmm1\n\t"
06521         "vpxor      " VAR(XR) ", %%xmm1, %%xmm1\n\t"
06522         VAESENC_GFMUL(%%xmm9, %%xmm0, %%xmm1)
06523         "addl       $16, " VAR(KR) "\n\t"
06524         "cmpl       %%r13d, " VAR(KR) "\n\t"
06525         "jl     12b\n\t"
06526         "\n"
06527         "13:\n\t"
06528 
06529         AESENC_LAST15_DEC_AVX1()
06530         "\n"
06531         "55:\n\t"
06532 
06533         CALC_TAG_AVX1()
06534         "addq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06535         "popq       %%rdx\n\t"
06536         CMP_TAG_AVX()
06537         "vzeroupper\n\t"
06538 
06539         :
06540         : [KEY] "r" (key),
06541           [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
06542           [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
06543           [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen),
06544           [tag] "r" (tag), [res] "r" (res),
06545           [BSWAP_MASK] "m" (BSWAP_MASK),
06546           [BSWAP_EPI64] "m" (BSWAP_EPI64),
06547           [ONE] "m" (ONE),
06548 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX1_NO_UNROLL)
06549           [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
06550           [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
06551           [EIGHT] "m" (EIGHT),
06552 #endif
06553           [MOD2_128] "m" (MOD2_128)
06554         : "xmm15", "xmm14", "xmm13", "xmm12",
06555           "xmm0", "xmm1", "xmm2", "xmm3", "memory",
06556           "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
06557           "rcx", "r13"
06558     );
06559 }
06560 
06561 #ifdef HAVE_INTEL_AVX2
06562 static void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out,
06563                                  const unsigned char* addt,
06564                                  const unsigned char* ivec,
06565                                  const unsigned char *tag, int nbytes,
06566                                  int abytes, int ibytes, int tbytes,
06567                                  const unsigned char* key, int nr, int* res)
06568 {
06569     register const unsigned char* iv asm("rax") = ivec;
06570     register int ivLen asm("ebx") = ibytes;
06571     register int tagLen asm("edx") = tbytes;
06572 
06573     __asm__ __volatile__ (
06574         "pushq      %%rdx\n\t"
06575         "subq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06576         /* Counter is xmm13 */
06577         "vpxor      %%xmm13, %%xmm13, %%xmm13\n\t"
06578         "vpxor      %%xmm15, %%xmm15, %%xmm15\n\t"
06579         "movl       %[ibytes], %%edx\n\t"
06580         "cmpl       $12, %%edx\n\t"
06581         "jne        35f\n\t"
06582         CALC_IV_12_AVX2()
06583         "jmp        39f\n\t"
06584         "\n"
06585         "35:\n\t"
06586         CALC_IV_AVX2()
06587         "\n"
06588         "39:\n\t"
06589 
06590         CALC_AAD_AVX2()
06591 
06592         "# Calculate counter and H\n\t"
06593         "vpsrlq     $63, " VAR(HR) ", %%xmm5\n\t"
06594         "vpsllq     $1, " VAR(HR) ", %%xmm4\n\t"
06595         "vpslldq    $8, %%xmm5, %%xmm5\n\t"
06596         "vpor       %%xmm5, %%xmm4, %%xmm4\n\t"
06597         "vpshufd    $0xff, " VAR(HR) ", " VAR(HR) "\n\t"
06598         "vpsrad     $31, " VAR(HR) ", " VAR(HR) "\n\t"
06599         "vpshufb    %[BSWAP_EPI64], %%xmm13, %%xmm13\n\t"
06600         "vpand      %[MOD2_128], " VAR(HR) ", " VAR(HR) "\n\t"
06601         "vpaddd     %[ONE], %%xmm13, %%xmm13\n\t"
06602         "vpxor      %%xmm4, " VAR(HR) ", " VAR(HR) "\n\t"
06603         "vmovdqu    %%xmm13, " VAR(CTR1) "\n\t"
06604 
06605         "xorl       " VAR(KR) ", " VAR(KR) "\n\t"
06606 
06607 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
06608         "cmpl       $128, %[nbytes]\n\t"
06609         "jl     5f\n\t"
06610 
06611         CALC_HT_8_AVX2()
06612 
06613         "movl       %[nbytes], %%r13d\n\t"
06614         "andl       $0xffffff80, %%r13d\n\t"
06615         "\n"
06616         "2:\n\t"
06617         VAESENC_128_GHASH_AVX2(%%rcx, 128)
06618         "addl       $128, " VAR(KR) "\n\t"
06619         "cmpl       %%r13d, " VAR(KR) "\n\t"
06620         "jl     2b\n\t"
06621 
06622         "vmovdqa    %%xmm2, " VAR(XR) "\n\t"
06623         "vmovdqu    (%%rsp), " VAR(HR) "\n\t"
06624     "5:\n\t"
06625         "movl       %[nbytes], %%edx\n\t"
06626         "cmpl       %%edx, " VAR(KR) "\n\t"
06627         "jge        55f\n\t"
06628 #endif
06629         "movl       %[nbytes], %%r13d\n\t"
06630         "andl       $0xfffffff0, %%r13d\n\t"
06631         "cmpl       %%r13d, " VAR(KR) "\n\t"
06632         "jge        13f\n\t"
06633 
06634         "vmovdqa    %[MOD2_128], %%xmm0\n\t"
06635         "\n"
06636         "12:\n\t"
06637         "vmovdqu    (%[in]," VAR(KR64) ",1), %%xmm9\n\t"
06638         "vmovdqu    " VAR(CTR1) ", %%xmm5\n\t"
06639         "vpshufb    %[BSWAP_MASK], %%xmm9, %%xmm1\n\t"
06640         "vpshufb    %[BSWAP_EPI64], %%xmm5, %%xmm4\n\t"
06641         "vpaddd     %[ONE], %%xmm5, %%xmm5\n\t"
06642         "vpxor      " VAR(XR) ", %%xmm1, %%xmm1\n\t"
06643         "vmovdqu    %%xmm5, " VAR(CTR1) "\n\t"
06644         VAESENC_GFMUL_SB_AVX2(%%xmm9, HR, %%xmm1, CTR1)
06645         "vmovdqu    %%xmm4, (%[out]," VAR(KR64) ",1)\n\t"
06646         "addl       $16, " VAR(KR) "\n\t"
06647         "cmpl       %%r13d, " VAR(KR) "\n\t"
06648         "jl     12b\n\t"
06649         "\n"
06650         "13:\n\t"
06651 
06652         AESENC_LAST15_DEC_AVX2()
06653         "\n"
06654         "55:\n\t"
06655 
06656         CALC_TAG_AVX2()
06657         "addq       $" VAR(STACK_OFFSET) ", %%rsp\n\t"
06658         "popq       %%rdx\n\t"
06659         CMP_TAG_AVX()
06660         "vzeroupper\n\t"
06661 
06662         :
06663         : [KEY] "r" (key),
06664           [in] "r" (in), [out] "r" (out), [nr] "r" (nr),
06665           [nbytes] "r" (nbytes), [abytes] "r" (abytes), [addt] "r" (addt),
06666           [ivec] "r" (iv), [ibytes] "r" (ivLen), [tbytes] "r" (tagLen),
06667           [tag] "r" (tag), [res] "r" (res),
06668           [BSWAP_MASK] "m" (BSWAP_MASK),
06669           [BSWAP_EPI64] "m" (BSWAP_EPI64),
06670           [ONE] "m" (ONE),
06671 #if !defined(AES_GCM_AESNI_NO_UNROLL) && !defined(AES_GCM_AVX2_NO_UNROLL)
06672           [TWO] "m" (TWO), [THREE] "m" (THREE), [FOUR] "m" (FOUR),
06673           [FIVE] "m" (FIVE), [SIX] "m" (SIX), [SEVEN] "m" (SEVEN),
06674           [EIGHT] "m" (EIGHT),
06675 #endif
06676           [MOD2_128] "m" (MOD2_128)
06677         : "xmm15", "xmm14", "xmm13", "xmm12",
06678           "xmm0", "xmm1", "xmm2", "xmm3", "memory",
06679           "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
06680           "rcx", "r13"
06681     );
06682 }
06683 #endif /* HAVE_INTEL_AVX2 */
06684 #endif /* HAVE_INTEL_AVX1 */
06685 #endif /* HAVE_AES_DECRYPT */
06686 
06687 #else /* _MSC_VER */
06688 /* The following are for MSC based builds which do not allow
06689  * inline assembly. Intrinsic functions are used instead. */
06690 
06691 #define aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T)         \
06692 do                                                         \
06693 {                                                          \
06694     word32 iv12[4];                                        \
06695     iv12[0] = *(word32*)&ivec[0];                          \
06696     iv12[1] = *(word32*)&ivec[4];                          \
06697     iv12[2] = *(word32*)&ivec[8];                          \
06698     iv12[3] = 0x01000000;                                  \
06699     Y = _mm_loadu_si128((__m128i*)iv12);                   \
06700                                                            \
06701     /* (Compute E[ZERO, KS] and E[Y0, KS] together */      \
06702     tmp1 = _mm_load_si128(&KEY[0]);                        \
06703     tmp2 = _mm_xor_si128(Y, KEY[0]);                       \
06704     tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);                 \
06705     tmp2 = _mm_aesenc_si128(tmp2, KEY[1]);                 \
06706     tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);                 \
06707     tmp2 = _mm_aesenc_si128(tmp2, KEY[2]);                 \
06708     tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);                 \
06709     tmp2 = _mm_aesenc_si128(tmp2, KEY[3]);                 \
06710     tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);                 \
06711     tmp2 = _mm_aesenc_si128(tmp2, KEY[4]);                 \
06712     tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);                 \
06713     tmp2 = _mm_aesenc_si128(tmp2, KEY[5]);                 \
06714     tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);                 \
06715     tmp2 = _mm_aesenc_si128(tmp2, KEY[6]);                 \
06716     tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);                 \
06717     tmp2 = _mm_aesenc_si128(tmp2, KEY[7]);                 \
06718     tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);                 \
06719     tmp2 = _mm_aesenc_si128(tmp2, KEY[8]);                 \
06720     tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);                 \
06721     tmp2 = _mm_aesenc_si128(tmp2, KEY[9]);                 \
06722     lastKey = KEY[10];                                     \
06723     if (nr > 10) {                                         \
06724         tmp1 = _mm_aesenc_si128(tmp1, lastKey);            \
06725         tmp2 = _mm_aesenc_si128(tmp2, lastKey);            \
06726         tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);            \
06727         tmp2 = _mm_aesenc_si128(tmp2, KEY[11]);            \
06728         lastKey = KEY[12];                                 \
06729         if (nr > 12) {                                     \
06730             tmp1 = _mm_aesenc_si128(tmp1, lastKey);        \
06731             tmp2 = _mm_aesenc_si128(tmp2, lastKey);        \
06732             tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);        \
06733             tmp2 = _mm_aesenc_si128(tmp2, KEY[13]);        \
06734             lastKey = KEY[14];                             \
06735         }                                                  \
06736     }                                                      \
06737     H = _mm_aesenclast_si128(tmp1, lastKey);               \
06738     T = _mm_aesenclast_si128(tmp2, lastKey);               \
06739     H = _mm_shuffle_epi8(H, BSWAP_MASK);                   \
06740 }                                                          \
06741 while (0)
06742 
06743 #define aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T)         \
06744 do                                                              \
06745 {                                                               \
06746     if (ibytes % 16) {                                          \
06747         i = ibytes / 16;                                        \
06748         for (j=0; j < (int)(ibytes%16); j++)                    \
06749             ((unsigned char*)&last_block)[j] = ivec[i*16+j];    \
06750     }                                                           \
06751     tmp1 = _mm_load_si128(&KEY[0]);                             \
06752     tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);                      \
06753     tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);                      \
06754     tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);                      \
06755     tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);                      \
06756     tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);                      \
06757     tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);                      \
06758     tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);                      \
06759     tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);                      \
06760     tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);                      \
06761     lastKey = KEY[10];                                          \
06762     if (nr > 10) {                                              \
06763         tmp1 = _mm_aesenc_si128(tmp1, lastKey);                 \
06764         tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);                 \
06765         lastKey = KEY[12];                                      \
06766         if (nr > 12) {                                          \
06767             tmp1 = _mm_aesenc_si128(tmp1, lastKey);             \
06768             tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);             \
06769             lastKey = KEY[14];                                  \
06770         }                                                       \
06771     }                                                           \
06772     H = _mm_aesenclast_si128(tmp1, lastKey);                    \
06773     H = _mm_shuffle_epi8(H, BSWAP_MASK);                        \
06774     Y = _mm_setzero_si128();                                    \
06775     for (i=0; i < (int)(ibytes/16); i++) {                      \
06776         tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]);           \
06777         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);              \
06778         Y = _mm_xor_si128(Y, tmp1);                             \
06779         Y = gfmul_sw(Y, H);                                     \
06780     }                                                           \
06781     if (ibytes % 16) {                                          \
06782         tmp1 = last_block;                                      \
06783         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);              \
06784         Y = _mm_xor_si128(Y, tmp1);                             \
06785         Y = gfmul_sw(Y, H);                                     \
06786     }                                                           \
06787     tmp1 = _mm_insert_epi64(tmp1, ibytes*8, 0);                 \
06788     tmp1 = _mm_insert_epi64(tmp1, 0, 1);                        \
06789     Y = _mm_xor_si128(Y, tmp1);                                 \
06790     Y = gfmul_sw(Y, H);                                         \
06791     Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /* Compute E(K, Y0) */ \
06792     tmp1 = _mm_xor_si128(Y, KEY[0]);                            \
06793     tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);                      \
06794     tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);                      \
06795     tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);                      \
06796     tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);                      \
06797     tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);                      \
06798     tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);                      \
06799     tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);                      \
06800     tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);                      \
06801     tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);                      \
06802     lastKey = KEY[10];                                          \
06803     if (nr > 10) {                                              \
06804         tmp1 = _mm_aesenc_si128(tmp1, lastKey);                 \
06805         tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);                 \
06806         lastKey = KEY[12];                                      \
06807         if (nr > 12) {                                          \
06808             tmp1 = _mm_aesenc_si128(tmp1, lastKey);             \
06809             tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);             \
06810             lastKey = KEY[14];                                  \
06811         }                                                       \
06812     }                                                           \
06813     T = _mm_aesenclast_si128(tmp1, lastKey);                    \
06814 }                                                               \
06815 while (0)
06816 
06817 #define AES_ENC_8(j)                       \
06818     tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); \
06819     tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); \
06820     tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); \
06821     tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); \
06822     tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); \
06823     tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); \
06824     tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); \
06825     tmp8 = _mm_aesenc_si128(tmp8, KEY[j]);
06826 
06827 #define AES_ENC_LAST_8()                                                  \
06828     tmp1 =_mm_aesenclast_si128(tmp1, lastKey);                            \
06829     tmp2 =_mm_aesenclast_si128(tmp2, lastKey);                            \
06830     tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[i*8+0]));  \
06831     tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i*)in)[i*8+1]));  \
06832     _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1);                      \
06833     _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2);                      \
06834     tmp3 =_mm_aesenclast_si128(tmp3, lastKey);                            \
06835     tmp4 =_mm_aesenclast_si128(tmp4, lastKey);                            \
06836     tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i*)in)[i*8+2]));  \
06837     tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i*)in)[i*8+3]));  \
06838     _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3);                      \
06839     _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4);                      \
06840     tmp5 =_mm_aesenclast_si128(tmp5, lastKey);                            \
06841     tmp6 =_mm_aesenclast_si128(tmp6, lastKey);                            \
06842     tmp5 = _mm_xor_si128(tmp5, _mm_loadu_si128(&((__m128i*)in)[i*8+4]));  \
06843     tmp6 = _mm_xor_si128(tmp6, _mm_loadu_si128(&((__m128i*)in)[i*8+5]));  \
06844     _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5);                      \
06845     _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6);                      \
06846     tmp7 =_mm_aesenclast_si128(tmp7, lastKey);                            \
06847     tmp8 =_mm_aesenclast_si128(tmp8, lastKey);                            \
06848     tmp7 = _mm_xor_si128(tmp7, _mm_loadu_si128(&((__m128i*)in)[i*8+6]));  \
06849     tmp8 = _mm_xor_si128(tmp8, _mm_loadu_si128(&((__m128i*)in)[i*8+7]));  \
06850     _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7);                      \
06851     _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8);
06852 
06853 
06854 static __m128i gfmul_sw(__m128i a, __m128i b)
06855 {
06856     __m128i r, t1, t2, t3, t4, t5, t6, t7;
06857     t2 = _mm_shuffle_epi32(b, 78);
06858     t3 = _mm_shuffle_epi32(a, 78);
06859     t2 = _mm_xor_si128(t2, b);
06860     t3 = _mm_xor_si128(t3, a);
06861     t4 = _mm_clmulepi64_si128(b, a, 0x11);
06862     t1 = _mm_clmulepi64_si128(b, a, 0x00);
06863     t2 = _mm_clmulepi64_si128(t2, t3, 0x00);
06864     t2 = _mm_xor_si128(t2, t1);
06865     t2 = _mm_xor_si128(t2, t4);
06866     t3 = _mm_slli_si128(t2, 8);
06867     t2 = _mm_srli_si128(t2, 8);
06868     t1 = _mm_xor_si128(t1, t3);
06869     t4 = _mm_xor_si128(t4, t2);
06870 
06871     t5 = _mm_srli_epi32(t1, 31);
06872     t6 = _mm_srli_epi32(t4, 31);
06873     t1 = _mm_slli_epi32(t1, 1);
06874     t4 = _mm_slli_epi32(t4, 1);
06875     t7 = _mm_srli_si128(t5, 12);
06876     t5 = _mm_slli_si128(t5, 4);
06877     t6 = _mm_slli_si128(t6, 4);
06878     t4 = _mm_or_si128(t4, t7);
06879     t1 = _mm_or_si128(t1, t5);
06880     t4 = _mm_or_si128(t4, t6);
06881 
06882     t5 = _mm_slli_epi32(t1, 31);
06883     t6 = _mm_slli_epi32(t1, 30);
06884     t7 = _mm_slli_epi32(t1, 25);
06885     t5 = _mm_xor_si128(t5, t6);
06886     t5 = _mm_xor_si128(t5, t7);
06887 
06888     t6 = _mm_srli_si128(t5, 4);
06889     t5 = _mm_slli_si128(t5, 12);
06890     t1 = _mm_xor_si128(t1, t5);
06891     t7 = _mm_srli_epi32(t1, 1);
06892     t3 = _mm_srli_epi32(t1, 2);
06893     t2 = _mm_srli_epi32(t1, 7);
06894 
06895     t7 = _mm_xor_si128(t7, t3);
06896     t7 = _mm_xor_si128(t7, t2);
06897     t7 = _mm_xor_si128(t7, t6);
06898     t7 = _mm_xor_si128(t7, t1);
06899     r = _mm_xor_si128(t4, t7);
06900 
06901     return r;
06902 }
06903 
06904 static void gfmul_only(__m128i a, __m128i b, __m128i* r0, __m128i* r1)
06905 {
06906     __m128i t1, t2, t3, t4;
06907 
06908     /* 128 x 128 Carryless Multiply */
06909     t2 = _mm_shuffle_epi32(b, 78);
06910     t3 = _mm_shuffle_epi32(a, 78);
06911     t2 = _mm_xor_si128(t2, b);
06912     t3 = _mm_xor_si128(t3, a);
06913     t4 = _mm_clmulepi64_si128(b, a, 0x11);
06914     t1 = _mm_clmulepi64_si128(b, a, 0x00);
06915     t2 = _mm_clmulepi64_si128(t2, t3, 0x00);
06916     t2 = _mm_xor_si128(t2, t1);
06917     t2 = _mm_xor_si128(t2, t4);
06918     t3 = _mm_slli_si128(t2, 8);
06919     t2 = _mm_srli_si128(t2, 8);
06920     t1 = _mm_xor_si128(t1, t3);
06921     t4 = _mm_xor_si128(t4, t2);
06922     *r0 = _mm_xor_si128(t1, *r0);
06923     *r1 = _mm_xor_si128(t4, *r1);
06924 }
06925 
06926 static __m128i gfmul_shl1(__m128i a)
06927 {
06928     __m128i t1 = a, t2;
06929     t2 = _mm_srli_epi64(t1, 63);
06930     t1 = _mm_slli_epi64(t1, 1);
06931     t2 = _mm_slli_si128(t2, 8);
06932     t1 = _mm_or_si128(t1, t2);
06933     /* if (a[1] >> 63) t1 = _mm_xor_si128(t1, MOD2_128); */
06934     a = _mm_shuffle_epi32(a, 0xff);
06935     a = _mm_srai_epi32(a, 31);
06936     a = _mm_and_si128(a, MOD2_128);
06937     t1 = _mm_xor_si128(t1, a);
06938     return t1;
06939 }
06940 
06941 static __m128i ghash_red(__m128i r0, __m128i r1)
06942 {
06943     __m128i t2, t3;
06944     __m128i t5, t6, t7;
06945 
06946     t5 = _mm_slli_epi32(r0, 31);
06947     t6 = _mm_slli_epi32(r0, 30);
06948     t7 = _mm_slli_epi32(r0, 25);
06949     t5 = _mm_xor_si128(t5, t6);
06950     t5 = _mm_xor_si128(t5, t7);
06951 
06952     t6 = _mm_srli_si128(t5, 4);
06953     t5 = _mm_slli_si128(t5, 12);
06954     r0 = _mm_xor_si128(r0, t5);
06955     t7 = _mm_srli_epi32(r0, 1);
06956     t3 = _mm_srli_epi32(r0, 2);
06957     t2 = _mm_srli_epi32(r0, 7);
06958 
06959     t7 = _mm_xor_si128(t7, t3);
06960     t7 = _mm_xor_si128(t7, t2);
06961     t7 = _mm_xor_si128(t7, t6);
06962     t7 = _mm_xor_si128(t7, r0);
06963     return _mm_xor_si128(r1, t7);
06964 }
06965 
06966 static __m128i gfmul_shifted(__m128i a, __m128i b)
06967 {
06968     __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128();
06969     gfmul_only(a, b, &t0, &t1);
06970     return ghash_red(t0, t1);
06971 }
06972 
06973 #ifndef AES_GCM_AESNI_NO_UNROLL
06974 static __m128i gfmul8(__m128i a1, __m128i a2, __m128i a3, __m128i a4,
06975                       __m128i a5, __m128i a6, __m128i a7, __m128i a8,
06976                       __m128i b1, __m128i b2, __m128i b3, __m128i b4,
06977                       __m128i b5, __m128i b6, __m128i b7, __m128i b8)
06978 {
06979     __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128();
06980     gfmul_only(a1, b8, &t0, &t1);
06981     gfmul_only(a2, b7, &t0, &t1);
06982     gfmul_only(a3, b6, &t0, &t1);
06983     gfmul_only(a4, b5, &t0, &t1);
06984     gfmul_only(a5, b4, &t0, &t1);
06985     gfmul_only(a6, b3, &t0, &t1);
06986     gfmul_only(a7, b2, &t0, &t1);
06987     gfmul_only(a8, b1, &t0, &t1);
06988     return ghash_red(t0, t1);
06989 }
06990 #endif
06991 
06992 
06993 static void AES_GCM_encrypt(const unsigned char *in,
06994                               unsigned char *out,
06995                               const unsigned char* addt,
06996                               const unsigned char* ivec,
06997                               unsigned char *tag, unsigned int nbytes,
06998                               unsigned int abytes, unsigned int ibytes,
06999                               unsigned int tbytes,
07000                               const unsigned char* key, int nr)
07001 {
07002     int i, j ,k;
07003     __m128i ctr1;
07004     __m128i H, Y, T;
07005     __m128i X = _mm_setzero_si128();
07006     __m128i *KEY = (__m128i*)key, lastKey;
07007     __m128i last_block = _mm_setzero_si128();
07008     __m128i tmp1, tmp2;
07009 #ifndef AES_GCM_AESNI_NO_UNROLL
07010     __m128i HT[8];
07011     __m128i r0, r1;
07012     __m128i XV;
07013     __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
07014 #endif
07015 
07016     if (ibytes == 12)
07017         aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T);
07018     else
07019         aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T);
07020 
07021     for (i=0; i < (int)(abytes/16); i++) {
07022         tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]);
07023         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07024         X = _mm_xor_si128(X, tmp1);
07025         X = gfmul_sw(X, H);
07026     }
07027     if (abytes%16) {
07028         last_block = _mm_setzero_si128();
07029         for (j=0; j < (int)(abytes%16); j++)
07030             ((unsigned char*)&last_block)[j] = addt[i*16+j];
07031         tmp1 = last_block;
07032         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07033         X = _mm_xor_si128(X, tmp1);
07034         X = gfmul_sw(X, H);
07035     }
07036     tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
07037     ctr1 = _mm_add_epi32(tmp1, ONE);
07038     H = gfmul_shl1(H);
07039 
07040 #ifndef AES_GCM_AESNI_NO_UNROLL
07041     i = 0;
07042     if (nbytes >= 16*8) {
07043         HT[0] = H;
07044         HT[1] = gfmul_shifted(H, H);
07045         HT[2] = gfmul_shifted(H, HT[1]);
07046         HT[3] = gfmul_shifted(HT[1], HT[1]);
07047         HT[4] = gfmul_shifted(HT[1], HT[2]);
07048         HT[5] = gfmul_shifted(HT[2], HT[2]);
07049         HT[6] = gfmul_shifted(HT[2], HT[3]);
07050         HT[7] = gfmul_shifted(HT[3], HT[3]);
07051 
07052         tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07053         tmp2 = _mm_add_epi32(ctr1, ONE);
07054         tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64);
07055         tmp3 = _mm_add_epi32(ctr1, TWO);
07056         tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64);
07057         tmp4 = _mm_add_epi32(ctr1, THREE);
07058         tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64);
07059         tmp5 = _mm_add_epi32(ctr1, FOUR);
07060         tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64);
07061         tmp6 = _mm_add_epi32(ctr1, FIVE);
07062         tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64);
07063         tmp7 = _mm_add_epi32(ctr1, SIX);
07064         tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64);
07065         tmp8 = _mm_add_epi32(ctr1, SEVEN);
07066         tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64);
07067         ctr1 = _mm_add_epi32(ctr1, EIGHT);
07068         tmp1 =_mm_xor_si128(tmp1, KEY[0]);
07069         tmp2 =_mm_xor_si128(tmp2, KEY[0]);
07070         tmp3 =_mm_xor_si128(tmp3, KEY[0]);
07071         tmp4 =_mm_xor_si128(tmp4, KEY[0]);
07072         tmp5 =_mm_xor_si128(tmp5, KEY[0]);
07073         tmp6 =_mm_xor_si128(tmp6, KEY[0]);
07074         tmp7 =_mm_xor_si128(tmp7, KEY[0]);
07075         tmp8 =_mm_xor_si128(tmp8, KEY[0]);
07076         AES_ENC_8(1);
07077         AES_ENC_8(2);
07078         AES_ENC_8(3);
07079         AES_ENC_8(4);
07080         AES_ENC_8(5);
07081         AES_ENC_8(6);
07082         AES_ENC_8(7);
07083         AES_ENC_8(8);
07084         AES_ENC_8(9);
07085         lastKey = KEY[10];
07086         if (nr > 10) {
07087             AES_ENC_8(10);
07088             AES_ENC_8(11);
07089             lastKey = KEY[12];
07090             if (nr > 12) {
07091                 AES_ENC_8(12);
07092                 AES_ENC_8(13);
07093                 lastKey = KEY[14];
07094             }
07095         }
07096         AES_ENC_LAST_8();
07097 
07098         for (i=1; i < (int)(nbytes/16/8); i++) {
07099                 r0 = _mm_setzero_si128();
07100                 r1 = _mm_setzero_si128();
07101             tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07102             tmp2 = _mm_add_epi32(ctr1, ONE);
07103             tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64);
07104             tmp3 = _mm_add_epi32(ctr1, TWO);
07105             tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64);
07106             tmp4 = _mm_add_epi32(ctr1, THREE);
07107             tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64);
07108             tmp5 = _mm_add_epi32(ctr1, FOUR);
07109             tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64);
07110             tmp6 = _mm_add_epi32(ctr1, FIVE);
07111             tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64);
07112             tmp7 = _mm_add_epi32(ctr1, SIX);
07113             tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64);
07114             tmp8 = _mm_add_epi32(ctr1, SEVEN);
07115             tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64);
07116             ctr1 = _mm_add_epi32(ctr1, EIGHT);
07117             tmp1 =_mm_xor_si128(tmp1, KEY[0]);
07118             tmp2 =_mm_xor_si128(tmp2, KEY[0]);
07119             tmp3 =_mm_xor_si128(tmp3, KEY[0]);
07120             tmp4 =_mm_xor_si128(tmp4, KEY[0]);
07121             tmp5 =_mm_xor_si128(tmp5, KEY[0]);
07122             tmp6 =_mm_xor_si128(tmp6, KEY[0]);
07123             tmp7 =_mm_xor_si128(tmp7, KEY[0]);
07124             tmp8 =_mm_xor_si128(tmp8, KEY[0]);
07125                 /* 128 x 128 Carryless Multiply */
07126                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+0]);
07127                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07128                 XV = _mm_xor_si128(XV, X);
07129                 gfmul_only(XV, HT[7], &r0, &r1);
07130             tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07131             tmp2 = _mm_aesenc_si128(tmp2, KEY[1]);
07132             tmp3 = _mm_aesenc_si128(tmp3, KEY[1]);
07133             tmp4 = _mm_aesenc_si128(tmp4, KEY[1]);
07134             tmp5 = _mm_aesenc_si128(tmp5, KEY[1]);
07135             tmp6 = _mm_aesenc_si128(tmp6, KEY[1]);
07136             tmp7 = _mm_aesenc_si128(tmp7, KEY[1]);
07137             tmp8 = _mm_aesenc_si128(tmp8, KEY[1]);
07138                 /* 128 x 128 Carryless Multiply */
07139                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+1]);
07140                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07141                 gfmul_only(XV, HT[6], &r0, &r1);
07142             tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07143             tmp2 = _mm_aesenc_si128(tmp2, KEY[2]);
07144             tmp3 = _mm_aesenc_si128(tmp3, KEY[2]);
07145             tmp4 = _mm_aesenc_si128(tmp4, KEY[2]);
07146             tmp5 = _mm_aesenc_si128(tmp5, KEY[2]);
07147             tmp6 = _mm_aesenc_si128(tmp6, KEY[2]);
07148             tmp7 = _mm_aesenc_si128(tmp7, KEY[2]);
07149             tmp8 = _mm_aesenc_si128(tmp8, KEY[2]);
07150                 /* 128 x 128 Carryless Multiply */
07151                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+2]);
07152                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07153                 gfmul_only(XV, HT[5], &r0, &r1);
07154             tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07155             tmp2 = _mm_aesenc_si128(tmp2, KEY[3]);
07156             tmp3 = _mm_aesenc_si128(tmp3, KEY[3]);
07157             tmp4 = _mm_aesenc_si128(tmp4, KEY[3]);
07158             tmp5 = _mm_aesenc_si128(tmp5, KEY[3]);
07159             tmp6 = _mm_aesenc_si128(tmp6, KEY[3]);
07160             tmp7 = _mm_aesenc_si128(tmp7, KEY[3]);
07161             tmp8 = _mm_aesenc_si128(tmp8, KEY[3]);
07162                 /* 128 x 128 Carryless Multiply */
07163                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+3]);
07164                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07165                 gfmul_only(XV, HT[4], &r0, &r1);
07166             tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07167             tmp2 = _mm_aesenc_si128(tmp2, KEY[4]);
07168             tmp3 = _mm_aesenc_si128(tmp3, KEY[4]);
07169             tmp4 = _mm_aesenc_si128(tmp4, KEY[4]);
07170             tmp5 = _mm_aesenc_si128(tmp5, KEY[4]);
07171             tmp6 = _mm_aesenc_si128(tmp6, KEY[4]);
07172             tmp7 = _mm_aesenc_si128(tmp7, KEY[4]);
07173             tmp8 = _mm_aesenc_si128(tmp8, KEY[4]);
07174                 /* 128 x 128 Carryless Multiply */
07175                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+4]);
07176                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07177                 gfmul_only(XV, HT[3], &r0, &r1);
07178             tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07179             tmp2 = _mm_aesenc_si128(tmp2, KEY[5]);
07180             tmp3 = _mm_aesenc_si128(tmp3, KEY[5]);
07181             tmp4 = _mm_aesenc_si128(tmp4, KEY[5]);
07182             tmp5 = _mm_aesenc_si128(tmp5, KEY[5]);
07183             tmp6 = _mm_aesenc_si128(tmp6, KEY[5]);
07184             tmp7 = _mm_aesenc_si128(tmp7, KEY[5]);
07185             tmp8 = _mm_aesenc_si128(tmp8, KEY[5]);
07186                 /* 128 x 128 Carryless Multiply */
07187                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+5]);
07188                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07189                 gfmul_only(XV, HT[2], &r0, &r1);
07190             tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07191             tmp2 = _mm_aesenc_si128(tmp2, KEY[6]);
07192             tmp3 = _mm_aesenc_si128(tmp3, KEY[6]);
07193             tmp4 = _mm_aesenc_si128(tmp4, KEY[6]);
07194             tmp5 = _mm_aesenc_si128(tmp5, KEY[6]);
07195             tmp6 = _mm_aesenc_si128(tmp6, KEY[6]);
07196             tmp7 = _mm_aesenc_si128(tmp7, KEY[6]);
07197             tmp8 = _mm_aesenc_si128(tmp8, KEY[6]);
07198                 /* 128 x 128 Carryless Multiply */
07199                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+6]);
07200                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07201                 gfmul_only(XV, HT[1], &r0, &r1);
07202             tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07203             tmp2 = _mm_aesenc_si128(tmp2, KEY[7]);
07204             tmp3 = _mm_aesenc_si128(tmp3, KEY[7]);
07205             tmp4 = _mm_aesenc_si128(tmp4, KEY[7]);
07206             tmp5 = _mm_aesenc_si128(tmp5, KEY[7]);
07207             tmp6 = _mm_aesenc_si128(tmp6, KEY[7]);
07208             tmp7 = _mm_aesenc_si128(tmp7, KEY[7]);
07209             tmp8 = _mm_aesenc_si128(tmp8, KEY[7]);
07210                 /* 128 x 128 Carryless Multiply */
07211                 XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+7]);
07212                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07213                 gfmul_only(XV, HT[0], &r0, &r1);
07214             tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07215             tmp2 = _mm_aesenc_si128(tmp2, KEY[8]);
07216             tmp3 = _mm_aesenc_si128(tmp3, KEY[8]);
07217             tmp4 = _mm_aesenc_si128(tmp4, KEY[8]);
07218             tmp5 = _mm_aesenc_si128(tmp5, KEY[8]);
07219             tmp6 = _mm_aesenc_si128(tmp6, KEY[8]);
07220             tmp7 = _mm_aesenc_si128(tmp7, KEY[8]);
07221             tmp8 = _mm_aesenc_si128(tmp8, KEY[8]);
07222                 /* Reduction */
07223                 X = ghash_red(r0, r1);
07224             tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07225             tmp2 = _mm_aesenc_si128(tmp2, KEY[9]);
07226             tmp3 = _mm_aesenc_si128(tmp3, KEY[9]);
07227             tmp4 = _mm_aesenc_si128(tmp4, KEY[9]);
07228             tmp5 = _mm_aesenc_si128(tmp5, KEY[9]);
07229             tmp6 = _mm_aesenc_si128(tmp6, KEY[9]);
07230             tmp7 = _mm_aesenc_si128(tmp7, KEY[9]);
07231             tmp8 = _mm_aesenc_si128(tmp8, KEY[9]);
07232             lastKey = KEY[10];
07233             if (nr > 10) {
07234                 tmp1 = _mm_aesenc_si128(tmp1, KEY[10]);
07235                 tmp2 = _mm_aesenc_si128(tmp2, KEY[10]);
07236                 tmp3 = _mm_aesenc_si128(tmp3, KEY[10]);
07237                 tmp4 = _mm_aesenc_si128(tmp4, KEY[10]);
07238                 tmp5 = _mm_aesenc_si128(tmp5, KEY[10]);
07239                 tmp6 = _mm_aesenc_si128(tmp6, KEY[10]);
07240                 tmp7 = _mm_aesenc_si128(tmp7, KEY[10]);
07241                 tmp8 = _mm_aesenc_si128(tmp8, KEY[10]);
07242                 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07243                 tmp2 = _mm_aesenc_si128(tmp2, KEY[11]);
07244                 tmp3 = _mm_aesenc_si128(tmp3, KEY[11]);
07245                 tmp4 = _mm_aesenc_si128(tmp4, KEY[11]);
07246                 tmp5 = _mm_aesenc_si128(tmp5, KEY[11]);
07247                 tmp6 = _mm_aesenc_si128(tmp6, KEY[11]);
07248                 tmp7 = _mm_aesenc_si128(tmp7, KEY[11]);
07249                 tmp8 = _mm_aesenc_si128(tmp8, KEY[11]);
07250                 lastKey = KEY[12];
07251                 if (nr > 12) {
07252                     tmp1 = _mm_aesenc_si128(tmp1, KEY[12]);
07253                     tmp2 = _mm_aesenc_si128(tmp2, KEY[12]);
07254                     tmp3 = _mm_aesenc_si128(tmp3, KEY[12]);
07255                     tmp4 = _mm_aesenc_si128(tmp4, KEY[12]);
07256                     tmp5 = _mm_aesenc_si128(tmp5, KEY[12]);
07257                     tmp6 = _mm_aesenc_si128(tmp6, KEY[12]);
07258                     tmp7 = _mm_aesenc_si128(tmp7, KEY[12]);
07259                     tmp8 = _mm_aesenc_si128(tmp8, KEY[12]);
07260                     tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07261                     tmp2 = _mm_aesenc_si128(tmp2, KEY[13]);
07262                     tmp3 = _mm_aesenc_si128(tmp3, KEY[13]);
07263                     tmp4 = _mm_aesenc_si128(tmp4, KEY[13]);
07264                     tmp5 = _mm_aesenc_si128(tmp5, KEY[13]);
07265                     tmp6 = _mm_aesenc_si128(tmp6, KEY[13]);
07266                     tmp7 = _mm_aesenc_si128(tmp7, KEY[13]);
07267                     tmp8 = _mm_aesenc_si128(tmp8, KEY[13]);
07268                     lastKey = KEY[14];
07269                 }
07270             }
07271             AES_ENC_LAST_8();
07272         }
07273 
07274         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07275         tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK);
07276         tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK);
07277         tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK);
07278         tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK);
07279         tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK);
07280         tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK);
07281         tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK);
07282         tmp1 = _mm_xor_si128(X, tmp1);
07283         X = gfmul8(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8,
07284                    HT[0], HT[1], HT[2], HT[3], HT[4], HT[5], HT[6], HT[7]);
07285     }
07286     for (k = i*8; k < (int)(nbytes/16); k++) {
07287         tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07288         ctr1 = _mm_add_epi32(ctr1, ONE);
07289         tmp1 = _mm_xor_si128(tmp1, KEY[0]);
07290         tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07291         tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07292         tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07293         tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07294         tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07295         tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07296         tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07297         tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07298         tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07299         lastKey = KEY[10];
07300         if (nr > 10) {
07301             tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07302             tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07303             lastKey = KEY[12];
07304             if (nr > 12) {
07305                 tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07306                 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07307                 lastKey = KEY[14];
07308             }
07309         }
07310         tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
07311         tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
07312         _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
07313         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07314         X =_mm_xor_si128(X, tmp1);
07315         X = gfmul_shifted(X, H);
07316     }
07317 #else /* AES_GCM_AESNI_NO_UNROLL */
07318     for (k = 0; k < (int)(nbytes/16) && k < 1; k++) {
07319         tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07320         ctr1 = _mm_add_epi32(ctr1, ONE);
07321         tmp1 = _mm_xor_si128(tmp1, KEY[0]);
07322         tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07323         tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07324         tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07325         tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07326         tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07327         tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07328         tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07329         tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07330         tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07331         lastKey = KEY[10];
07332         if (nr > 10) {
07333             tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07334             tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07335             lastKey = KEY[12];
07336             if (nr > 12) {
07337                 tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07338                 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07339                 lastKey = KEY[14];
07340             }
07341         }
07342         tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
07343         tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
07344         _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
07345         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07346         X =_mm_xor_si128(X, tmp1);
07347     }
07348     for (; k < (int)(nbytes/16); k++) {
07349         tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07350         ctr1 = _mm_add_epi32(ctr1, ONE);
07351         tmp1 = _mm_xor_si128(tmp1, KEY[0]);
07352         tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07353         tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07354         tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07355         tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07356         tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07357         tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07358         tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07359         tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07360         tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07361         X = gfmul_shifted(X, H);
07362         lastKey = KEY[10];
07363         if (nr > 10) {
07364             tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07365             tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07366             lastKey = KEY[12];
07367             if (nr > 12) {
07368                 tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07369                 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07370                 lastKey = KEY[14];
07371             }
07372         }
07373         tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
07374         tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k]));
07375         _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
07376         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07377         X =_mm_xor_si128(X, tmp1);
07378     }
07379     if (k > 0) {
07380         X = gfmul_shifted(X, H);
07381     }
07382 #endif /* AES_GCM_AESNI_NO_UNROLL */
07383 
07384     /* If one partial block remains */
07385     if (nbytes % 16) {
07386         tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07387         tmp1 = _mm_xor_si128(tmp1, KEY[0]);
07388         tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07389         tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07390         tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07391         tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07392         tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07393         tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07394         tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07395         tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07396         tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07397         lastKey = KEY[10];
07398         if (nr > 10) {
07399             tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07400             tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07401             lastKey = KEY[12];
07402             if (nr > 12) {
07403                 tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07404                 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07405                 lastKey = KEY[14];
07406             }
07407         }
07408         tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
07409         last_block = tmp1;
07410         for (j=0; j < (int)(nbytes%16); j++)
07411             ((unsigned char*)&last_block)[j] = in[k*16+j];
07412         tmp1 = _mm_xor_si128(tmp1, last_block);
07413         last_block = tmp1;
07414         for (j=0; j < (int)(nbytes%16); j++)
07415             out[k*16+j] = ((unsigned char*)&last_block)[j];
07416         tmp1 = last_block;
07417         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07418         X =_mm_xor_si128(X, tmp1);
07419         X = gfmul_shifted(X, H);
07420     }
07421     tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0);
07422     tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1);
07423     X = _mm_xor_si128(X, tmp1);
07424     X = gfmul_shifted(X, H);
07425     X = _mm_shuffle_epi8(X, BSWAP_MASK);
07426     T = _mm_xor_si128(X, T);
07427     /*_mm_storeu_si128((__m128i*)tag, T);*/
07428     XMEMCPY(tag, &T, tbytes);
07429 }
07430 
07431 #ifdef HAVE_AES_DECRYPT
07432 
07433 static void AES_GCM_decrypt(const unsigned char *in,
07434                            unsigned char *out,
07435                            const unsigned char* addt,
07436                            const unsigned char* ivec,
07437                            const unsigned char *tag, int nbytes, int abytes,
07438                            int ibytes, word32 tbytes, const unsigned char* key,
07439                            int nr, int* res)
07440 {
07441     int i, j ,k;
07442     __m128i H, Y, T;
07443     __m128i *KEY = (__m128i*)key, lastKey;
07444     __m128i ctr1;
07445     __m128i last_block = _mm_setzero_si128();
07446     __m128i X = _mm_setzero_si128();
07447     __m128i tmp1, tmp2, XV;
07448 #ifndef AES_GCM_AESNI_NO_UNROLL
07449     __m128i HT[8];
07450     __m128i r0, r1;
07451     __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
07452 #endif /* AES_GCM_AESNI_NO_UNROLL */
07453 
07454     if (ibytes == 12)
07455         aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T);
07456     else
07457         aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T);
07458 
07459     for (i=0; i<abytes/16; i++) {
07460         tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]);
07461         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07462         X = _mm_xor_si128(X, tmp1);
07463         X = gfmul_sw(X, H);
07464     }
07465     if (abytes%16) {
07466         last_block = _mm_setzero_si128();
07467         for (j=0; j<abytes%16; j++)
07468             ((unsigned char*)&last_block)[j] = addt[i*16+j];
07469         tmp1 = last_block;
07470         tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK);
07471         X = _mm_xor_si128(X, tmp1);
07472         X = gfmul_sw(X, H);
07473     }
07474 
07475     tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64);
07476     ctr1 = _mm_add_epi32(tmp1, ONE);
07477     H = gfmul_shl1(H);
07478     i = 0;
07479 
07480 #ifndef AES_GCM_AESNI_NO_UNROLL
07481 
07482     if (0 < nbytes/16/8) {
07483         HT[0] = H;
07484         HT[1] = gfmul_shifted(H, H);
07485         HT[2] = gfmul_shifted(H, HT[1]);
07486         HT[3] = gfmul_shifted(HT[1], HT[1]);
07487         HT[4] = gfmul_shifted(HT[1], HT[2]);
07488         HT[5] = gfmul_shifted(HT[2], HT[2]);
07489         HT[6] = gfmul_shifted(HT[2], HT[3]);
07490         HT[7] = gfmul_shifted(HT[3], HT[3]);
07491 
07492         for (; i < nbytes/16/8; i++) {
07493                 r0 = _mm_setzero_si128();
07494                 r1 = _mm_setzero_si128();
07495 
07496             tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07497             tmp2 = _mm_add_epi32(ctr1, ONE);
07498             tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64);
07499             tmp3 = _mm_add_epi32(ctr1, TWO);
07500             tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64);
07501             tmp4 = _mm_add_epi32(ctr1, THREE);
07502             tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64);
07503             tmp5 = _mm_add_epi32(ctr1, FOUR);
07504             tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64);
07505             tmp6 = _mm_add_epi32(ctr1, FIVE);
07506             tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64);
07507             tmp7 = _mm_add_epi32(ctr1, SIX);
07508             tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64);
07509             tmp8 = _mm_add_epi32(ctr1, SEVEN);
07510             tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64);
07511             ctr1 = _mm_add_epi32(ctr1, EIGHT);
07512             tmp1 =_mm_xor_si128(tmp1, KEY[0]);
07513             tmp2 =_mm_xor_si128(tmp2, KEY[0]);
07514             tmp3 =_mm_xor_si128(tmp3, KEY[0]);
07515             tmp4 =_mm_xor_si128(tmp4, KEY[0]);
07516             tmp5 =_mm_xor_si128(tmp5, KEY[0]);
07517             tmp6 =_mm_xor_si128(tmp6, KEY[0]);
07518             tmp7 =_mm_xor_si128(tmp7, KEY[0]);
07519             tmp8 =_mm_xor_si128(tmp8, KEY[0]);
07520                 /* 128 x 128 Carryless Multiply */
07521                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+0]);
07522                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07523                 XV = _mm_xor_si128(XV, X);
07524                 gfmul_only(XV, HT[7], &r0, &r1);
07525             tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07526             tmp2 = _mm_aesenc_si128(tmp2, KEY[1]);
07527             tmp3 = _mm_aesenc_si128(tmp3, KEY[1]);
07528             tmp4 = _mm_aesenc_si128(tmp4, KEY[1]);
07529             tmp5 = _mm_aesenc_si128(tmp5, KEY[1]);
07530             tmp6 = _mm_aesenc_si128(tmp6, KEY[1]);
07531             tmp7 = _mm_aesenc_si128(tmp7, KEY[1]);
07532             tmp8 = _mm_aesenc_si128(tmp8, KEY[1]);
07533                 /* 128 x 128 Carryless Multiply */
07534                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+1]);
07535                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07536                 gfmul_only(XV, HT[6], &r0, &r1);
07537             tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07538             tmp2 = _mm_aesenc_si128(tmp2, KEY[2]);
07539             tmp3 = _mm_aesenc_si128(tmp3, KEY[2]);
07540             tmp4 = _mm_aesenc_si128(tmp4, KEY[2]);
07541             tmp5 = _mm_aesenc_si128(tmp5, KEY[2]);
07542             tmp6 = _mm_aesenc_si128(tmp6, KEY[2]);
07543             tmp7 = _mm_aesenc_si128(tmp7, KEY[2]);
07544             tmp8 = _mm_aesenc_si128(tmp8, KEY[2]);
07545                 /* 128 x 128 Carryless Multiply */
07546                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+2]);
07547                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07548                 gfmul_only(XV, HT[5], &r0, &r1);
07549             tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07550             tmp2 = _mm_aesenc_si128(tmp2, KEY[3]);
07551             tmp3 = _mm_aesenc_si128(tmp3, KEY[3]);
07552             tmp4 = _mm_aesenc_si128(tmp4, KEY[3]);
07553             tmp5 = _mm_aesenc_si128(tmp5, KEY[3]);
07554             tmp6 = _mm_aesenc_si128(tmp6, KEY[3]);
07555             tmp7 = _mm_aesenc_si128(tmp7, KEY[3]);
07556             tmp8 = _mm_aesenc_si128(tmp8, KEY[3]);
07557                 /* 128 x 128 Carryless Multiply */
07558                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+3]);
07559                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07560                 gfmul_only(XV, HT[4], &r0, &r1);
07561             tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07562             tmp2 = _mm_aesenc_si128(tmp2, KEY[4]);
07563             tmp3 = _mm_aesenc_si128(tmp3, KEY[4]);
07564             tmp4 = _mm_aesenc_si128(tmp4, KEY[4]);
07565             tmp5 = _mm_aesenc_si128(tmp5, KEY[4]);
07566             tmp6 = _mm_aesenc_si128(tmp6, KEY[4]);
07567             tmp7 = _mm_aesenc_si128(tmp7, KEY[4]);
07568             tmp8 = _mm_aesenc_si128(tmp8, KEY[4]);
07569                 /* 128 x 128 Carryless Multiply */
07570                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+4]);
07571                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07572                 gfmul_only(XV, HT[3], &r0, &r1);
07573             tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07574             tmp2 = _mm_aesenc_si128(tmp2, KEY[5]);
07575             tmp3 = _mm_aesenc_si128(tmp3, KEY[5]);
07576             tmp4 = _mm_aesenc_si128(tmp4, KEY[5]);
07577             tmp5 = _mm_aesenc_si128(tmp5, KEY[5]);
07578             tmp6 = _mm_aesenc_si128(tmp6, KEY[5]);
07579             tmp7 = _mm_aesenc_si128(tmp7, KEY[5]);
07580             tmp8 = _mm_aesenc_si128(tmp8, KEY[5]);
07581                 /* 128 x 128 Carryless Multiply */
07582                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+5]);
07583                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07584                 gfmul_only(XV, HT[2], &r0, &r1);
07585             tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07586             tmp2 = _mm_aesenc_si128(tmp2, KEY[6]);
07587             tmp3 = _mm_aesenc_si128(tmp3, KEY[6]);
07588             tmp4 = _mm_aesenc_si128(tmp4, KEY[6]);
07589             tmp5 = _mm_aesenc_si128(tmp5, KEY[6]);
07590             tmp6 = _mm_aesenc_si128(tmp6, KEY[6]);
07591             tmp7 = _mm_aesenc_si128(tmp7, KEY[6]);
07592             tmp8 = _mm_aesenc_si128(tmp8, KEY[6]);
07593                 /* 128 x 128 Carryless Multiply */
07594                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+6]);
07595                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07596                 gfmul_only(XV, HT[1], &r0, &r1);
07597             tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07598             tmp2 = _mm_aesenc_si128(tmp2, KEY[7]);
07599             tmp3 = _mm_aesenc_si128(tmp3, KEY[7]);
07600             tmp4 = _mm_aesenc_si128(tmp4, KEY[7]);
07601             tmp5 = _mm_aesenc_si128(tmp5, KEY[7]);
07602             tmp6 = _mm_aesenc_si128(tmp6, KEY[7]);
07603             tmp7 = _mm_aesenc_si128(tmp7, KEY[7]);
07604             tmp8 = _mm_aesenc_si128(tmp8, KEY[7]);
07605                 /* 128 x 128 Carryless Multiply */
07606                 XV = _mm_loadu_si128(&((__m128i*)in)[i*8+7]);
07607                 XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07608                 gfmul_only(XV, HT[0], &r0, &r1);
07609             tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07610             tmp2 = _mm_aesenc_si128(tmp2, KEY[8]);
07611             tmp3 = _mm_aesenc_si128(tmp3, KEY[8]);
07612             tmp4 = _mm_aesenc_si128(tmp4, KEY[8]);
07613             tmp5 = _mm_aesenc_si128(tmp5, KEY[8]);
07614             tmp6 = _mm_aesenc_si128(tmp6, KEY[8]);
07615             tmp7 = _mm_aesenc_si128(tmp7, KEY[8]);
07616             tmp8 = _mm_aesenc_si128(tmp8, KEY[8]);
07617                 /* Reduction */
07618                 X = ghash_red(r0, r1);
07619             tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07620             tmp2 = _mm_aesenc_si128(tmp2, KEY[9]);
07621             tmp3 = _mm_aesenc_si128(tmp3, KEY[9]);
07622             tmp4 = _mm_aesenc_si128(tmp4, KEY[9]);
07623             tmp5 = _mm_aesenc_si128(tmp5, KEY[9]);
07624             tmp6 = _mm_aesenc_si128(tmp6, KEY[9]);
07625             tmp7 = _mm_aesenc_si128(tmp7, KEY[9]);
07626             tmp8 = _mm_aesenc_si128(tmp8, KEY[9]);
07627             lastKey = KEY[10];
07628             if (nr > 10) {
07629                 tmp1 = _mm_aesenc_si128(tmp1, KEY[10]);
07630                 tmp2 = _mm_aesenc_si128(tmp2, KEY[10]);
07631                 tmp3 = _mm_aesenc_si128(tmp3, KEY[10]);
07632                 tmp4 = _mm_aesenc_si128(tmp4, KEY[10]);
07633                 tmp5 = _mm_aesenc_si128(tmp5, KEY[10]);
07634                 tmp6 = _mm_aesenc_si128(tmp6, KEY[10]);
07635                 tmp7 = _mm_aesenc_si128(tmp7, KEY[10]);
07636                 tmp8 = _mm_aesenc_si128(tmp8, KEY[10]);
07637                 tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07638                 tmp2 = _mm_aesenc_si128(tmp2, KEY[11]);
07639                 tmp3 = _mm_aesenc_si128(tmp3, KEY[11]);
07640                 tmp4 = _mm_aesenc_si128(tmp4, KEY[11]);
07641                 tmp5 = _mm_aesenc_si128(tmp5, KEY[11]);
07642                 tmp6 = _mm_aesenc_si128(tmp6, KEY[11]);
07643                 tmp7 = _mm_aesenc_si128(tmp7, KEY[11]);
07644                 tmp8 = _mm_aesenc_si128(tmp8, KEY[11]);
07645                 lastKey = KEY[12];
07646                 if (nr > 12) {
07647                     tmp1 = _mm_aesenc_si128(tmp1, KEY[12]);
07648                     tmp2 = _mm_aesenc_si128(tmp2, KEY[12]);
07649                     tmp3 = _mm_aesenc_si128(tmp3, KEY[12]);
07650                     tmp4 = _mm_aesenc_si128(tmp4, KEY[12]);
07651                     tmp5 = _mm_aesenc_si128(tmp5, KEY[12]);
07652                     tmp6 = _mm_aesenc_si128(tmp6, KEY[12]);
07653                     tmp7 = _mm_aesenc_si128(tmp7, KEY[12]);
07654                     tmp8 = _mm_aesenc_si128(tmp8, KEY[12]);
07655                     tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07656                     tmp2 = _mm_aesenc_si128(tmp2, KEY[13]);
07657                     tmp3 = _mm_aesenc_si128(tmp3, KEY[13]);
07658                     tmp4 = _mm_aesenc_si128(tmp4, KEY[13]);
07659                     tmp5 = _mm_aesenc_si128(tmp5, KEY[13]);
07660                     tmp6 = _mm_aesenc_si128(tmp6, KEY[13]);
07661                     tmp7 = _mm_aesenc_si128(tmp7, KEY[13]);
07662                     tmp8 = _mm_aesenc_si128(tmp8, KEY[13]);
07663                     lastKey = KEY[14];
07664                 }
07665             }
07666             AES_ENC_LAST_8();
07667         }
07668     }
07669 
07670 #endif /* AES_GCM_AESNI_NO_UNROLL */
07671 
07672     for (k = i*8; k < nbytes/16; k++) {
07673         tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07674         ctr1 = _mm_add_epi32(ctr1, ONE);
07675         tmp1 = _mm_xor_si128(tmp1, KEY[0]);
07676         tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07677         tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07678         tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07679         tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07680         tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07681         tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07682         tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07683         tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07684         tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07685         /* 128 x 128 Carryless Multiply */
07686         XV = _mm_loadu_si128(&((__m128i*)in)[k]);
07687         XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07688         XV = _mm_xor_si128(XV, X);
07689         X = gfmul_shifted(XV, H);
07690         lastKey = KEY[10];
07691         if (nr > 10) {
07692             tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07693             tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07694             lastKey = KEY[12];
07695             if (nr > 12) {
07696                 tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07697                 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07698                 lastKey = KEY[14];
07699             }
07700         }
07701         tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
07702         tmp2 = _mm_loadu_si128(&((__m128i*)in)[k]);
07703         tmp1 = _mm_xor_si128(tmp1, tmp2);
07704         _mm_storeu_si128(&((__m128i*)out)[k], tmp1);
07705     }
07706 
07707     /* If one partial block remains */
07708     if (nbytes % 16) {
07709         tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
07710         tmp1 = _mm_xor_si128(tmp1, KEY[0]);
07711         tmp1 = _mm_aesenc_si128(tmp1, KEY[1]);
07712         tmp1 = _mm_aesenc_si128(tmp1, KEY[2]);
07713         tmp1 = _mm_aesenc_si128(tmp1, KEY[3]);
07714         tmp1 = _mm_aesenc_si128(tmp1, KEY[4]);
07715         tmp1 = _mm_aesenc_si128(tmp1, KEY[5]);
07716         tmp1 = _mm_aesenc_si128(tmp1, KEY[6]);
07717         tmp1 = _mm_aesenc_si128(tmp1, KEY[7]);
07718         tmp1 = _mm_aesenc_si128(tmp1, KEY[8]);
07719         tmp1 = _mm_aesenc_si128(tmp1, KEY[9]);
07720         lastKey = KEY[10];
07721         if (nr > 10) {
07722             tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07723             tmp1 = _mm_aesenc_si128(tmp1, KEY[11]);
07724             lastKey = KEY[12];
07725             if (nr > 12) {
07726                 tmp1 = _mm_aesenc_si128(tmp1, lastKey);
07727                 tmp1 = _mm_aesenc_si128(tmp1, KEY[13]);
07728                 lastKey = KEY[14];
07729             }
07730         }
07731         tmp1 = _mm_aesenclast_si128(tmp1, lastKey);
07732         last_block = _mm_setzero_si128();
07733         for (j=0; j < nbytes%16; j++)
07734             ((unsigned char*)&last_block)[j] = in[k*16+j];
07735         XV = last_block;
07736         tmp1 = _mm_xor_si128(tmp1, last_block);
07737         last_block = tmp1;
07738         for (j=0; j < nbytes%16; j++)
07739             out[k*16+j] = ((unsigned char*)&last_block)[j];
07740         XV = _mm_shuffle_epi8(XV, BSWAP_MASK);
07741         XV = _mm_xor_si128(XV, X);
07742         X = gfmul_shifted(XV, H);
07743     }
07744 
07745     tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0);
07746     tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1);
07747     /* 128 x 128 Carryless Multiply */
07748     X = _mm_xor_si128(X, tmp1);
07749     X = gfmul_shifted(X, H);
07750     X = _mm_shuffle_epi8(X, BSWAP_MASK);
07751     T = _mm_xor_si128(X, T);
07752 
07753 /*    if (0xffff !=
07754            _mm_movemask_epi8(_mm_cmpeq_epi8(T, _mm_loadu_si128((__m128i*)tag)))) */
07755     if (XMEMCMP(tag, &T, tbytes) != 0)
07756         *res = 0; /* in case the authentication failed */
07757     else
07758         *res = 1; /* when successful returns 1 */
07759 }
07760 
07761 #endif /* HAVE_AES_DECRYPT */
07762 #endif /* _MSC_VER */
07763 #endif /* WOLFSSL_AESNI */
07764 
07765 
07766 #if defined(GCM_SMALL)
07767 static void GMULT(byte* X, byte* Y)
07768 {
07769     byte Z[AES_BLOCK_SIZE];
07770     byte V[AES_BLOCK_SIZE];
07771     int i, j;
07772 
07773     XMEMSET(Z, 0, AES_BLOCK_SIZE);
07774     XMEMCPY(V, X, AES_BLOCK_SIZE);
07775     for (i = 0; i < AES_BLOCK_SIZE; i++)
07776     {
07777         byte y = Y[i];
07778         for (j = 0; j < 8; j++)
07779         {
07780             if (y & 0x80) {
07781                 xorbuf(Z, V, AES_BLOCK_SIZE);
07782             }
07783 
07784             RIGHTSHIFTX(V);
07785             y = y << 1;
07786         }
07787     }
07788     XMEMCPY(X, Z, AES_BLOCK_SIZE);
07789 }
07790 
07791 
07792 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
07793     word32 cSz, byte* s, word32 sSz)
07794 {
07795     byte x[AES_BLOCK_SIZE];
07796     byte scratch[AES_BLOCK_SIZE];
07797     word32 blocks, partial;
07798     byte* h = aes->H;
07799 
07800     XMEMSET(x, 0, AES_BLOCK_SIZE);
07801 
07802     /* Hash in A, the Additional Authentication Data */
07803     if (aSz != 0 && a != NULL) {
07804         blocks = aSz / AES_BLOCK_SIZE;
07805         partial = aSz % AES_BLOCK_SIZE;
07806         while (blocks--) {
07807             xorbuf(x, a, AES_BLOCK_SIZE);
07808             GMULT(x, h);
07809             a += AES_BLOCK_SIZE;
07810         }
07811         if (partial != 0) {
07812             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
07813             XMEMCPY(scratch, a, partial);
07814             xorbuf(x, scratch, AES_BLOCK_SIZE);
07815             GMULT(x, h);
07816         }
07817     }
07818 
07819     /* Hash in C, the Ciphertext */
07820     if (cSz != 0 && c != NULL) {
07821         blocks = cSz / AES_BLOCK_SIZE;
07822         partial = cSz % AES_BLOCK_SIZE;
07823         while (blocks--) {
07824             xorbuf(x, c, AES_BLOCK_SIZE);
07825             GMULT(x, h);
07826             c += AES_BLOCK_SIZE;
07827         }
07828         if (partial != 0) {
07829             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
07830             XMEMCPY(scratch, c, partial);
07831             xorbuf(x, scratch, AES_BLOCK_SIZE);
07832             GMULT(x, h);
07833         }
07834     }
07835 
07836     /* Hash in the lengths of A and C in bits */
07837     FlattenSzInBits(&scratch[0], aSz);
07838     FlattenSzInBits(&scratch[8], cSz);
07839     xorbuf(x, scratch, AES_BLOCK_SIZE);
07840     GMULT(x, h);
07841 
07842     /* Copy the result into s. */
07843     XMEMCPY(s, x, sSz);
07844 }
07845 
07846 /* end GCM_SMALL */
07847 #elif defined(GCM_TABLE)
07848 
07849 static const byte R[256][2] = {
07850     {0x00, 0x00}, {0x01, 0xc2}, {0x03, 0x84}, {0x02, 0x46},
07851     {0x07, 0x08}, {0x06, 0xca}, {0x04, 0x8c}, {0x05, 0x4e},
07852     {0x0e, 0x10}, {0x0f, 0xd2}, {0x0d, 0x94}, {0x0c, 0x56},
07853     {0x09, 0x18}, {0x08, 0xda}, {0x0a, 0x9c}, {0x0b, 0x5e},
07854     {0x1c, 0x20}, {0x1d, 0xe2}, {0x1f, 0xa4}, {0x1e, 0x66},
07855     {0x1b, 0x28}, {0x1a, 0xea}, {0x18, 0xac}, {0x19, 0x6e},
07856     {0x12, 0x30}, {0x13, 0xf2}, {0x11, 0xb4}, {0x10, 0x76},
07857     {0x15, 0x38}, {0x14, 0xfa}, {0x16, 0xbc}, {0x17, 0x7e},
07858     {0x38, 0x40}, {0x39, 0x82}, {0x3b, 0xc4}, {0x3a, 0x06},
07859     {0x3f, 0x48}, {0x3e, 0x8a}, {0x3c, 0xcc}, {0x3d, 0x0e},
07860     {0x36, 0x50}, {0x37, 0x92}, {0x35, 0xd4}, {0x34, 0x16},
07861     {0x31, 0x58}, {0x30, 0x9a}, {0x32, 0xdc}, {0x33, 0x1e},
07862     {0x24, 0x60}, {0x25, 0xa2}, {0x27, 0xe4}, {0x26, 0x26},
07863     {0x23, 0x68}, {0x22, 0xaa}, {0x20, 0xec}, {0x21, 0x2e},
07864     {0x2a, 0x70}, {0x2b, 0xb2}, {0x29, 0xf4}, {0x28, 0x36},
07865     {0x2d, 0x78}, {0x2c, 0xba}, {0x2e, 0xfc}, {0x2f, 0x3e},
07866     {0x70, 0x80}, {0x71, 0x42}, {0x73, 0x04}, {0x72, 0xc6},
07867     {0x77, 0x88}, {0x76, 0x4a}, {0x74, 0x0c}, {0x75, 0xce},
07868     {0x7e, 0x90}, {0x7f, 0x52}, {0x7d, 0x14}, {0x7c, 0xd6},
07869     {0x79, 0x98}, {0x78, 0x5a}, {0x7a, 0x1c}, {0x7b, 0xde},
07870     {0x6c, 0xa0}, {0x6d, 0x62}, {0x6f, 0x24}, {0x6e, 0xe6},
07871     {0x6b, 0xa8}, {0x6a, 0x6a}, {0x68, 0x2c}, {0x69, 0xee},
07872     {0x62, 0xb0}, {0x63, 0x72}, {0x61, 0x34}, {0x60, 0xf6},
07873     {0x65, 0xb8}, {0x64, 0x7a}, {0x66, 0x3c}, {0x67, 0xfe},
07874     {0x48, 0xc0}, {0x49, 0x02}, {0x4b, 0x44}, {0x4a, 0x86},
07875     {0x4f, 0xc8}, {0x4e, 0x0a}, {0x4c, 0x4c}, {0x4d, 0x8e},
07876     {0x46, 0xd0}, {0x47, 0x12}, {0x45, 0x54}, {0x44, 0x96},
07877     {0x41, 0xd8}, {0x40, 0x1a}, {0x42, 0x5c}, {0x43, 0x9e},
07878     {0x54, 0xe0}, {0x55, 0x22}, {0x57, 0x64}, {0x56, 0xa6},
07879     {0x53, 0xe8}, {0x52, 0x2a}, {0x50, 0x6c}, {0x51, 0xae},
07880     {0x5a, 0xf0}, {0x5b, 0x32}, {0x59, 0x74}, {0x58, 0xb6},
07881     {0x5d, 0xf8}, {0x5c, 0x3a}, {0x5e, 0x7c}, {0x5f, 0xbe},
07882     {0xe1, 0x00}, {0xe0, 0xc2}, {0xe2, 0x84}, {0xe3, 0x46},
07883     {0xe6, 0x08}, {0xe7, 0xca}, {0xe5, 0x8c}, {0xe4, 0x4e},
07884     {0xef, 0x10}, {0xee, 0xd2}, {0xec, 0x94}, {0xed, 0x56},
07885     {0xe8, 0x18}, {0xe9, 0xda}, {0xeb, 0x9c}, {0xea, 0x5e},
07886     {0xfd, 0x20}, {0xfc, 0xe2}, {0xfe, 0xa4}, {0xff, 0x66},
07887     {0xfa, 0x28}, {0xfb, 0xea}, {0xf9, 0xac}, {0xf8, 0x6e},
07888     {0xf3, 0x30}, {0xf2, 0xf2}, {0xf0, 0xb4}, {0xf1, 0x76},
07889     {0xf4, 0x38}, {0xf5, 0xfa}, {0xf7, 0xbc}, {0xf6, 0x7e},
07890     {0xd9, 0x40}, {0xd8, 0x82}, {0xda, 0xc4}, {0xdb, 0x06},
07891     {0xde, 0x48}, {0xdf, 0x8a}, {0xdd, 0xcc}, {0xdc, 0x0e},
07892     {0xd7, 0x50}, {0xd6, 0x92}, {0xd4, 0xd4}, {0xd5, 0x16},
07893     {0xd0, 0x58}, {0xd1, 0x9a}, {0xd3, 0xdc}, {0xd2, 0x1e},
07894     {0xc5, 0x60}, {0xc4, 0xa2}, {0xc6, 0xe4}, {0xc7, 0x26},
07895     {0xc2, 0x68}, {0xc3, 0xaa}, {0xc1, 0xec}, {0xc0, 0x2e},
07896     {0xcb, 0x70}, {0xca, 0xb2}, {0xc8, 0xf4}, {0xc9, 0x36},
07897     {0xcc, 0x78}, {0xcd, 0xba}, {0xcf, 0xfc}, {0xce, 0x3e},
07898     {0x91, 0x80}, {0x90, 0x42}, {0x92, 0x04}, {0x93, 0xc6},
07899     {0x96, 0x88}, {0x97, 0x4a}, {0x95, 0x0c}, {0x94, 0xce},
07900     {0x9f, 0x90}, {0x9e, 0x52}, {0x9c, 0x14}, {0x9d, 0xd6},
07901     {0x98, 0x98}, {0x99, 0x5a}, {0x9b, 0x1c}, {0x9a, 0xde},
07902     {0x8d, 0xa0}, {0x8c, 0x62}, {0x8e, 0x24}, {0x8f, 0xe6},
07903     {0x8a, 0xa8}, {0x8b, 0x6a}, {0x89, 0x2c}, {0x88, 0xee},
07904     {0x83, 0xb0}, {0x82, 0x72}, {0x80, 0x34}, {0x81, 0xf6},
07905     {0x84, 0xb8}, {0x85, 0x7a}, {0x87, 0x3c}, {0x86, 0xfe},
07906     {0xa9, 0xc0}, {0xa8, 0x02}, {0xaa, 0x44}, {0xab, 0x86},
07907     {0xae, 0xc8}, {0xaf, 0x0a}, {0xad, 0x4c}, {0xac, 0x8e},
07908     {0xa7, 0xd0}, {0xa6, 0x12}, {0xa4, 0x54}, {0xa5, 0x96},
07909     {0xa0, 0xd8}, {0xa1, 0x1a}, {0xa3, 0x5c}, {0xa2, 0x9e},
07910     {0xb5, 0xe0}, {0xb4, 0x22}, {0xb6, 0x64}, {0xb7, 0xa6},
07911     {0xb2, 0xe8}, {0xb3, 0x2a}, {0xb1, 0x6c}, {0xb0, 0xae},
07912     {0xbb, 0xf0}, {0xba, 0x32}, {0xb8, 0x74}, {0xb9, 0xb6},
07913     {0xbc, 0xf8}, {0xbd, 0x3a}, {0xbf, 0x7c}, {0xbe, 0xbe} };
07914 
07915 
07916 static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE])
07917 {
07918     int i, j;
07919     byte Z[AES_BLOCK_SIZE];
07920     byte a;
07921 
07922     XMEMSET(Z, 0, sizeof(Z));
07923 
07924     for (i = 15; i > 0; i--) {
07925         xorbuf(Z, m[x[i]], AES_BLOCK_SIZE);
07926         a = Z[15];
07927 
07928         for (j = 15; j > 0; j--) {
07929             Z[j] = Z[j-1];
07930         }
07931 
07932         Z[0] = R[a][0];
07933         Z[1] ^= R[a][1];
07934     }
07935     xorbuf(Z, m[x[0]], AES_BLOCK_SIZE);
07936 
07937     XMEMCPY(x, Z, AES_BLOCK_SIZE);
07938 }
07939 
07940 
07941 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
07942     word32 cSz, byte* s, word32 sSz)
07943 {
07944     byte x[AES_BLOCK_SIZE];
07945     byte scratch[AES_BLOCK_SIZE];
07946     word32 blocks, partial;
07947 
07948     XMEMSET(x, 0, AES_BLOCK_SIZE);
07949 
07950     /* Hash in A, the Additional Authentication Data */
07951     if (aSz != 0 && a != NULL) {
07952         blocks = aSz / AES_BLOCK_SIZE;
07953         partial = aSz % AES_BLOCK_SIZE;
07954         while (blocks--) {
07955             xorbuf(x, a, AES_BLOCK_SIZE);
07956             GMULT(x, aes->M0);
07957             a += AES_BLOCK_SIZE;
07958         }
07959         if (partial != 0) {
07960             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
07961             XMEMCPY(scratch, a, partial);
07962             xorbuf(x, scratch, AES_BLOCK_SIZE);
07963             GMULT(x, aes->M0);
07964         }
07965     }
07966 
07967     /* Hash in C, the Ciphertext */
07968     if (cSz != 0 && c != NULL) {
07969         blocks = cSz / AES_BLOCK_SIZE;
07970         partial = cSz % AES_BLOCK_SIZE;
07971         while (blocks--) {
07972             xorbuf(x, c, AES_BLOCK_SIZE);
07973             GMULT(x, aes->M0);
07974             c += AES_BLOCK_SIZE;
07975         }
07976         if (partial != 0) {
07977             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
07978             XMEMCPY(scratch, c, partial);
07979             xorbuf(x, scratch, AES_BLOCK_SIZE);
07980             GMULT(x, aes->M0);
07981         }
07982     }
07983 
07984     /* Hash in the lengths of A and C in bits */
07985     FlattenSzInBits(&scratch[0], aSz);
07986     FlattenSzInBits(&scratch[8], cSz);
07987     xorbuf(x, scratch, AES_BLOCK_SIZE);
07988     GMULT(x, aes->M0);
07989 
07990     /* Copy the result into s. */
07991     XMEMCPY(s, x, sSz);
07992 }
07993 
07994 /* end GCM_TABLE */
07995 #elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32)
07996 
07997 #if !defined(FREESCALE_LTC_AES_GCM)
07998 static void GMULT(word64* X, word64* Y)
07999 {
08000     word64 Z[2] = {0,0};
08001     word64 V[2];
08002     int i, j;
08003     V[0] = X[0];  V[1] = X[1];
08004 
08005     for (i = 0; i < 2; i++)
08006     {
08007         word64 y = Y[i];
08008         for (j = 0; j < 64; j++)
08009         {
08010             if (y & 0x8000000000000000ULL) {
08011                 Z[0] ^= V[0];
08012                 Z[1] ^= V[1];
08013             }
08014 
08015             if (V[1] & 0x0000000000000001) {
08016                 V[1] >>= 1;
08017                 V[1] |= ((V[0] & 0x0000000000000001) ?
08018                     0x8000000000000000ULL : 0);
08019                 V[0] >>= 1;
08020                 V[0] ^= 0xE100000000000000ULL;
08021             }
08022             else {
08023                 V[1] >>= 1;
08024                 V[1] |= ((V[0] & 0x0000000000000001) ?
08025                     0x8000000000000000ULL : 0);
08026                 V[0] >>= 1;
08027             }
08028             y <<= 1;
08029         }
08030     }
08031     X[0] = Z[0];
08032     X[1] = Z[1];
08033 }
08034 
08035 
08036 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
08037     word32 cSz, byte* s, word32 sSz)
08038 {
08039     word64 x[2] = {0,0};
08040     word32 blocks, partial;
08041     word64 bigH[2];
08042 
08043     XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE);
08044     #ifdef LITTLE_ENDIAN_ORDER
08045         ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE);
08046     #endif
08047 
08048     /* Hash in A, the Additional Authentication Data */
08049     if (aSz != 0 && a != NULL) {
08050         word64 bigA[2];
08051         blocks = aSz / AES_BLOCK_SIZE;
08052         partial = aSz % AES_BLOCK_SIZE;
08053         while (blocks--) {
08054             XMEMCPY(bigA, a, AES_BLOCK_SIZE);
08055             #ifdef LITTLE_ENDIAN_ORDER
08056                 ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE);
08057             #endif
08058             x[0] ^= bigA[0];
08059             x[1] ^= bigA[1];
08060             GMULT(x, bigH);
08061             a += AES_BLOCK_SIZE;
08062         }
08063         if (partial != 0) {
08064             XMEMSET(bigA, 0, AES_BLOCK_SIZE);
08065             XMEMCPY(bigA, a, partial);
08066             #ifdef LITTLE_ENDIAN_ORDER
08067                 ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE);
08068             #endif
08069             x[0] ^= bigA[0];
08070             x[1] ^= bigA[1];
08071             GMULT(x, bigH);
08072         }
08073     }
08074 
08075     /* Hash in C, the Ciphertext */
08076     if (cSz != 0 && c != NULL) {
08077         word64 bigC[2];
08078         blocks = cSz / AES_BLOCK_SIZE;
08079         partial = cSz % AES_BLOCK_SIZE;
08080         while (blocks--) {
08081             XMEMCPY(bigC, c, AES_BLOCK_SIZE);
08082             #ifdef LITTLE_ENDIAN_ORDER
08083                 ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE);
08084             #endif
08085             x[0] ^= bigC[0];
08086             x[1] ^= bigC[1];
08087             GMULT(x, bigH);
08088             c += AES_BLOCK_SIZE;
08089         }
08090         if (partial != 0) {
08091             XMEMSET(bigC, 0, AES_BLOCK_SIZE);
08092             XMEMCPY(bigC, c, partial);
08093             #ifdef LITTLE_ENDIAN_ORDER
08094                 ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE);
08095             #endif
08096             x[0] ^= bigC[0];
08097             x[1] ^= bigC[1];
08098             GMULT(x, bigH);
08099         }
08100     }
08101 
08102     /* Hash in the lengths in bits of A and C */
08103     {
08104         word64 len[2];
08105         len[0] = aSz; len[1] = cSz;
08106 
08107         /* Lengths are in bytes. Convert to bits. */
08108         len[0] *= 8;
08109         len[1] *= 8;
08110 
08111         x[0] ^= len[0];
08112         x[1] ^= len[1];
08113         GMULT(x, bigH);
08114     }
08115     #ifdef LITTLE_ENDIAN_ORDER
08116         ByteReverseWords64(x, x, AES_BLOCK_SIZE);
08117     #endif
08118     XMEMCPY(s, x, sSz);
08119 }
08120 #endif /* !FREESCALE_LTC_AES_GCM */
08121 
08122 /* end defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) */
08123 #else /* GCM_WORD32 */
08124 
08125 static void GMULT(word32* X, word32* Y)
08126 {
08127     word32 Z[4] = {0,0,0,0};
08128     word32 V[4];
08129     int i, j;
08130 
08131     V[0] = X[0];  V[1] = X[1]; V[2] =  X[2]; V[3] =  X[3];
08132 
08133     for (i = 0; i < 4; i++)
08134     {
08135         word32 y = Y[i];
08136         for (j = 0; j < 32; j++)
08137         {
08138             if (y & 0x80000000) {
08139                 Z[0] ^= V[0];
08140                 Z[1] ^= V[1];
08141                 Z[2] ^= V[2];
08142                 Z[3] ^= V[3];
08143             }
08144 
08145             if (V[3] & 0x00000001) {
08146                 V[3] >>= 1;
08147                 V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0);
08148                 V[2] >>= 1;
08149                 V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0);
08150                 V[1] >>= 1;
08151                 V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0);
08152                 V[0] >>= 1;
08153                 V[0] ^= 0xE1000000;
08154             } else {
08155                 V[3] >>= 1;
08156                 V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0);
08157                 V[2] >>= 1;
08158                 V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0);
08159                 V[1] >>= 1;
08160                 V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0);
08161                 V[0] >>= 1;
08162             }
08163             y <<= 1;
08164         }
08165     }
08166     X[0] = Z[0];
08167     X[1] = Z[1];
08168     X[2] = Z[2];
08169     X[3] = Z[3];
08170 }
08171 
08172 
08173 void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
08174     word32 cSz, byte* s, word32 sSz)
08175 {
08176     word32 x[4] = {0,0,0,0};
08177     word32 blocks, partial;
08178     word32 bigH[4];
08179 
08180     XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE);
08181     #ifdef LITTLE_ENDIAN_ORDER
08182         ByteReverseWords(bigH, bigH, AES_BLOCK_SIZE);
08183     #endif
08184 
08185     /* Hash in A, the Additional Authentication Data */
08186     if (aSz != 0 && a != NULL) {
08187         word32 bigA[4];
08188         blocks = aSz / AES_BLOCK_SIZE;
08189         partial = aSz % AES_BLOCK_SIZE;
08190         while (blocks--) {
08191             XMEMCPY(bigA, a, AES_BLOCK_SIZE);
08192             #ifdef LITTLE_ENDIAN_ORDER
08193                 ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE);
08194             #endif
08195             x[0] ^= bigA[0];
08196             x[1] ^= bigA[1];
08197             x[2] ^= bigA[2];
08198             x[3] ^= bigA[3];
08199             GMULT(x, bigH);
08200             a += AES_BLOCK_SIZE;
08201         }
08202         if (partial != 0) {
08203             XMEMSET(bigA, 0, AES_BLOCK_SIZE);
08204             XMEMCPY(bigA, a, partial);
08205             #ifdef LITTLE_ENDIAN_ORDER
08206                 ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE);
08207             #endif
08208             x[0] ^= bigA[0];
08209             x[1] ^= bigA[1];
08210             x[2] ^= bigA[2];
08211             x[3] ^= bigA[3];
08212             GMULT(x, bigH);
08213         }
08214     }
08215 
08216     /* Hash in C, the Ciphertext */
08217     if (cSz != 0 && c != NULL) {
08218         word32 bigC[4];
08219         blocks = cSz / AES_BLOCK_SIZE;
08220         partial = cSz % AES_BLOCK_SIZE;
08221         while (blocks--) {
08222             XMEMCPY(bigC, c, AES_BLOCK_SIZE);
08223             #ifdef LITTLE_ENDIAN_ORDER
08224                 ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE);
08225             #endif
08226             x[0] ^= bigC[0];
08227             x[1] ^= bigC[1];
08228             x[2] ^= bigC[2];
08229             x[3] ^= bigC[3];
08230             GMULT(x, bigH);
08231             c += AES_BLOCK_SIZE;
08232         }
08233         if (partial != 0) {
08234             XMEMSET(bigC, 0, AES_BLOCK_SIZE);
08235             XMEMCPY(bigC, c, partial);
08236             #ifdef LITTLE_ENDIAN_ORDER
08237                 ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE);
08238             #endif
08239             x[0] ^= bigC[0];
08240             x[1] ^= bigC[1];
08241             x[2] ^= bigC[2];
08242             x[3] ^= bigC[3];
08243             GMULT(x, bigH);
08244         }
08245     }
08246 
08247     /* Hash in the lengths in bits of A and C */
08248     {
08249         word32 len[4];
08250 
08251         /* Lengths are in bytes. Convert to bits. */
08252         len[0] = (aSz >> (8*sizeof(aSz) - 3));
08253         len[1] = aSz << 3;
08254         len[2] = (cSz >> (8*sizeof(cSz) - 3));
08255         len[3] = cSz << 3;
08256 
08257         x[0] ^= len[0];
08258         x[1] ^= len[1];
08259         x[2] ^= len[2];
08260         x[3] ^= len[3];
08261         GMULT(x, bigH);
08262     }
08263     #ifdef LITTLE_ENDIAN_ORDER
08264         ByteReverseWords(x, x, AES_BLOCK_SIZE);
08265     #endif
08266     XMEMCPY(s, x, sSz);
08267 }
08268 
08269 #endif /* end GCM_WORD32 */
08270 
08271 
08272 #if !defined(WOLFSSL_XILINX_CRYPT)
08273 #ifdef FREESCALE_LTC_AES_GCM
08274 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
08275                    const byte* iv, word32 ivSz,
08276                    byte* authTag, word32 authTagSz,
08277                    const byte* authIn, word32 authInSz)
08278 {
08279     status_t status;
08280     word32 keySize;
08281 
08282     /* argument checks */
08283     if (aes == NULL || authTagSz > AES_BLOCK_SIZE) {
08284         return BAD_FUNC_ARG;
08285     }
08286 
08287     if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) {
08288         WOLFSSL_MSG("GcmEncrypt authTagSz too small error");
08289         return BAD_FUNC_ARG;
08290     }
08291 
08292     status = wc_AesGetKeySize(aes, &keySize);
08293     if (status)
08294         return status;
08295 
08296     status = LTC_AES_EncryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz,
08297         authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz);
08298 
08299     return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E;
08300 }
08301 #else
08302 #if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \
08303                               defined(WOLFSSL_STM32F7) || \
08304                               defined(WOLFSSL_STM32L4))
08305 
08306 static WC_INLINE int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in,
08307                                          word32 sz, const byte* iv, word32 ivSz,
08308                                          byte* authTag, word32 authTagSz,
08309                                          const byte* authIn, word32 authInSz)
08310 {
08311     int ret;
08312     word32 keySize;
08313     byte initialCounter[AES_BLOCK_SIZE];
08314     #ifdef WOLFSSL_STM32_CUBEMX
08315         CRYP_HandleTypeDef hcryp;
08316     #else
08317         byte keyCopy[AES_BLOCK_SIZE * 2];
08318     #endif /* WOLFSSL_STM32_CUBEMX */
08319     int status = 0;
08320     byte* authInPadded = NULL;
08321     byte tag[AES_BLOCK_SIZE];
08322     int authPadSz;
08323 
08324     ret = wc_AesGetKeySize(aes, &keySize);
08325     if (ret != 0)
08326         return ret;
08327 
08328     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
08329     XMEMCPY(initialCounter, iv, ivSz);
08330     initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START;
08331 
08332     /* pad authIn if it is not a block multiple */
08333     if ((authInSz % AES_BLOCK_SIZE) != 0) {
08334         authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
08335         /* Need to pad the AAD to a full block with zeros. */
08336         authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
08337         if (authInPadded == NULL) {
08338             return MEMORY_E;
08339         }
08340         XMEMSET(authInPadded, 0, authPadSz);
08341         XMEMCPY(authInPadded, authIn, authInSz);
08342     } else {
08343         authPadSz = authInSz;
08344         authInPadded = (byte*)authIn;
08345     }
08346 
08347 
08348 #ifdef WOLFSSL_STM32_CUBEMX
08349     XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
08350     switch (keySize) {
08351         case 16: /* 128-bit key */
08352             hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
08353             break;
08354 #ifdef CRYP_KEYSIZE_192B
08355         case 24: /* 192-bit key */
08356             hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
08357             break;
08358 #endif
08359         case 32: /* 256-bit key */
08360             hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
08361             break;
08362         default:
08363             break;
08364     }
08365     hcryp.Instance = CRYP;
08366     hcryp.Init.DataType = CRYP_DATATYPE_8B;
08367     hcryp.Init.pKey = (byte*)aes->key;
08368     hcryp.Init.pInitVect = initialCounter;
08369     hcryp.Init.Header = authInPadded;
08370     hcryp.Init.HeaderSize = authInSz;
08371 
08372 #ifdef WOLFSSL_STM32L4
08373     /* Set the CRYP parameters */
08374     hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_GCM_GMAC;
08375     hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT;
08376     hcryp.Init.GCMCMACPhase  = CRYP_INIT_PHASE;
08377     HAL_CRYP_Init(&hcryp);
08378 
08379     /* GCM init phase */
08380     status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
08381     if (status == HAL_OK) {
08382         /* GCM header phase */
08383         hcryp.Init.GCMCMACPhase  = CRYP_HEADER_PHASE;
08384         status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
08385         if (status == HAL_OK) {
08386             /* GCM payload phase */
08387             hcryp.Init.GCMCMACPhase  = CRYP_PAYLOAD_PHASE;
08388             status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, sz, out, STM32_HAL_TIMEOUT);
08389             if (status == HAL_OK) {
08390                 /* GCM final phase */
08391                 hcryp.Init.GCMCMACPhase  = CRYP_FINAL_PHASE;
08392                 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT);
08393             }
08394         }
08395     }
08396 #else
08397     HAL_CRYP_Init(&hcryp);
08398 
08399     status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in, sz,
08400                                        out, STM32_HAL_TIMEOUT);
08401     /* Compute the authTag */
08402     if (status == HAL_OK) {
08403         status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
08404     }
08405 #endif
08406 
08407     if (status != HAL_OK)
08408         ret = AES_GCM_AUTH_E;
08409     HAL_CRYP_DeInit(&hcryp);
08410 #else
08411     ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize);
08412     status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)initialCounter,
08413                          (uint8_t*)keyCopy,     keySize * 8,
08414                          (uint8_t*)in,          sz,
08415                          (uint8_t*)authInPadded,authInSz,
08416                          (uint8_t*)out,         tag);
08417     if (status != SUCCESS)
08418         ret = AES_GCM_AUTH_E;
08419 #endif /* WOLFSSL_STM32_CUBEMX */
08420 
08421     /* authTag may be shorter than AES_BLOCK_SZ, store separately */
08422     if (ret == 0)
08423         XMEMCPY(authTag, tag, authTagSz);
08424 
08425     /* We only allocate extra memory if authInPadded is not a multiple of AES_BLOCK_SZ */
08426     if (authInPadded != NULL && authInSz != authPadSz) {
08427         XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
08428     }
08429 
08430     return ret;
08431 }
08432 #endif /* STM32_CRYPTO */
08433 
08434 #ifdef WOLFSSL_AESNI
08435 int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
08436                       const byte* iv, word32 ivSz,
08437                       byte* authTag, word32 authTagSz,
08438                       const byte* authIn, word32 authInSz);
08439 #else
08440 static
08441 #endif
08442 int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
08443                       const byte* iv, word32 ivSz,
08444                       byte* authTag, word32 authTagSz,
08445                       const byte* authIn, word32 authInSz)
08446 {
08447     int ret = 0;
08448     word32 blocks = sz / AES_BLOCK_SIZE;
08449     word32 partial = sz % AES_BLOCK_SIZE;
08450     const byte* p = in;
08451     byte* c = out;
08452     byte counter[AES_BLOCK_SIZE];
08453     byte initialCounter[AES_BLOCK_SIZE];
08454     byte *ctr;
08455     byte scratch[AES_BLOCK_SIZE];
08456 
08457     ctr = counter;
08458     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
08459     if (ivSz == GCM_NONCE_MID_SZ) {
08460         XMEMCPY(initialCounter, iv, ivSz);
08461         initialCounter[AES_BLOCK_SIZE - 1] = 1;
08462     }
08463     else {
08464         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
08465     }
08466     XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
08467 
08468 #ifdef WOLFSSL_PIC32MZ_CRYPT
08469     if (blocks) {
08470         /* use intitial IV for PIC32 HW, but don't use it below */
08471         XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE);
08472 
08473         ret = wc_Pic32AesCrypt(
08474             aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
08475             out, in, (blocks * AES_BLOCK_SIZE),
08476             PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM);
08477         if (ret != 0)
08478             return ret;
08479     }
08480     /* process remainder using partial handling */
08481 #endif
08482 
08483 #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT)
08484     /* some hardware acceleration can gain performance from doing AES encryption
08485      * of the whole buffer at once */
08486     if (c != p) { /* can not handle inline encryption */
08487         while (blocks--) {
08488             IncrementGcmCounter(ctr);
08489             XMEMCPY(c, ctr, AES_BLOCK_SIZE);
08490             c += AES_BLOCK_SIZE;
08491         }
08492 
08493         /* reset number of blocks and then do encryption */
08494         blocks = sz / AES_BLOCK_SIZE;
08495         wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks);
08496         xorbuf(out, p, AES_BLOCK_SIZE * blocks);
08497         p += AES_BLOCK_SIZE * blocks;
08498     }
08499     else
08500 #endif /* HAVE_AES_ECB */
08501 
08502     while (blocks--) {
08503         IncrementGcmCounter(ctr);
08504     #ifndef WOLFSSL_PIC32MZ_CRYPT
08505         wc_AesEncrypt(aes, ctr, scratch);
08506         xorbuf(scratch, p, AES_BLOCK_SIZE);
08507         XMEMCPY(c, scratch, AES_BLOCK_SIZE);
08508     #endif
08509         p += AES_BLOCK_SIZE;
08510         c += AES_BLOCK_SIZE;
08511     }
08512 
08513     if (partial != 0) {
08514         IncrementGcmCounter(ctr);
08515         wc_AesEncrypt(aes, ctr, scratch);
08516         xorbuf(scratch, p, partial);
08517         XMEMCPY(c, scratch, partial);
08518     }
08519 
08520     GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
08521     wc_AesEncrypt(aes, initialCounter, scratch);
08522     xorbuf(authTag, scratch, authTagSz);
08523 
08524     return ret;
08525 }
08526 
08527 int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
08528                    const byte* iv, word32 ivSz,
08529                    byte* authTag, word32 authTagSz,
08530                    const byte* authIn, word32 authInSz)
08531 {
08532     /* argument checks */
08533     if (aes == NULL || authTagSz > AES_BLOCK_SIZE) {
08534         return BAD_FUNC_ARG;
08535     }
08536 
08537     if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) {
08538         WOLFSSL_MSG("GcmEncrypt authTagSz too small error");
08539         return BAD_FUNC_ARG;
08540     }
08541 
08542 #if defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \
08543                               defined(WOLFSSL_STM32F7) || \
08544                               defined(WOLFSSL_STM32L4))
08545 
08546     /* additional argument checks - STM32 HW only supports 12 byte IV */
08547     if (ivSz != GCM_NONCE_MID_SZ) {
08548         return BAD_FUNC_ARG;
08549     }
08550 
08551     /* STM32 HW AES-GCM requires / assumes inputs are a multiple of block size.
08552      * We can avoid this by zero padding (authIn) AAD, but zero-padded plaintext
08553      * will be encrypted and output incorrectly, causing a bad authTag.
08554      * We will use HW accelerated AES-GCM if plain%AES_BLOCK_SZ==0.
08555      * Otherwise, we will use accelerated AES_CTR for encrypt, and then
08556      * perform GHASH in software.
08557      * See NIST SP 800-38D */
08558 
08559     /* Plain text is a multiple of block size, so use HW-Accelerated AES_GCM */
08560     if (sz % AES_BLOCK_SIZE == 0) {
08561         return wc_AesGcmEncrypt_STM32(aes, out, in, sz, iv, ivSz,
08562                                       authTag, authTagSz, authIn, authInSz);
08563     }
08564 #endif
08565 
08566 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
08567     /* if async and byte count above threshold */
08568     /* only 12-byte IV is supported in HW */
08569     if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
08570                     sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) {
08571     #if defined(HAVE_CAVIUM)
08572         #ifdef HAVE_CAVIUM_V
08573         if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */
08574             return NitroxAesGcmEncrypt(aes, out, in, sz,
08575                 (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
08576                 authTag, authTagSz, authIn, authInSz);
08577         }
08578         #endif
08579     #elif defined(HAVE_INTEL_QA)
08580         return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz,
08581             (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
08582             authTag, authTagSz, authIn, authInSz);
08583     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
08584         if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_ENCRYPT)) {
08585             WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
08586             testDev->aes.aes = aes;
08587             testDev->aes.out = out;
08588             testDev->aes.in = in;
08589             testDev->aes.sz = sz;
08590             testDev->aes.iv = iv;
08591             testDev->aes.ivSz = ivSz;
08592             testDev->aes.authTag = authTag;
08593             testDev->aes.authTagSz = authTagSz;
08594             testDev->aes.authIn = authIn;
08595             testDev->aes.authInSz = authInSz;
08596             return WC_PENDING_E;
08597         }
08598     #endif
08599     }
08600 #endif /* WOLFSSL_ASYNC_CRYPT */
08601 
08602     /* Software AES-GCM */
08603 
08604 #ifdef WOLFSSL_AESNI
08605     #ifdef HAVE_INTEL_AVX2
08606     if (IS_INTEL_AVX2(intel_flags)) {
08607         AES_GCM_encrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
08608                                  authTagSz, (const byte*)aes->key, aes->rounds);
08609         return 0;
08610     }
08611     else
08612     #endif
08613     #ifdef HAVE_INTEL_AVX1
08614     if (IS_INTEL_AVX1(intel_flags)) {
08615         AES_GCM_encrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
08616                                  authTagSz, (const byte*)aes->key, aes->rounds);
08617         return 0;
08618     }
08619     else
08620     #endif
08621     if (haveAESNI) {
08622         AES_GCM_encrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
08623                                  authTagSz, (const byte*)aes->key, aes->rounds);
08624         return 0;
08625     }
08626     else
08627 #endif
08628     {
08629         return AES_GCM_encrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
08630                                                               authIn, authInSz);
08631     }
08632 }
08633 #endif
08634 
08635 
08636 #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT)
08637 #ifdef FREESCALE_LTC_AES_GCM
08638 int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
08639                    const byte* iv, word32 ivSz,
08640                    const byte* authTag, word32 authTagSz,
08641                    const byte* authIn, word32 authInSz)
08642 {
08643     int ret;
08644     word32 keySize;
08645     status_t status;
08646 
08647     /* argument checks */
08648     if (aes == NULL || out == NULL || in == NULL || iv == NULL ||
08649         authTag == NULL || authTagSz > AES_BLOCK_SIZE) {
08650         return BAD_FUNC_ARG;
08651     }
08652 
08653     ret = wc_AesGetKeySize(aes, &keySize);
08654     if (ret != 0) {
08655         return ret;
08656     }
08657 
08658     status = LTC_AES_DecryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz,
08659         authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz);
08660 
08661     return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E;
08662 }
08663 #elif defined(STM32_CRYPTO) && (defined(WOLFSSL_STM32F4) || \
08664                                 defined(WOLFSSL_STM32F7) || \
08665                                 defined(WOLFSSL_STM32L4))
08666 int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
08667                    const byte* iv, word32 ivSz,
08668                    const byte* authTag, word32 authTagSz,
08669                    const byte* authIn, word32 authInSz)
08670 {
08671     int ret;
08672     word32 keySize;
08673     #ifdef WOLFSSL_STM32_CUBEMX
08674         CRYP_HandleTypeDef hcryp;
08675     #else
08676         byte keyCopy[AES_BLOCK_SIZE * 2];
08677     #endif /* WOLFSSL_STM32_CUBEMX */
08678     int  status;
08679     int  inPadSz, authPadSz;
08680     byte tag[AES_BLOCK_SIZE];
08681     byte *inPadded = NULL;
08682     byte *authInPadded = NULL;
08683     byte initialCounter[AES_BLOCK_SIZE];
08684 
08685     /* argument checks */
08686     if (aes == NULL || out == NULL || in == NULL || iv == NULL ||
08687         authTag == NULL || authTagSz > AES_BLOCK_SIZE) {
08688         return BAD_FUNC_ARG;
08689     }
08690 
08691     ret = wc_AesGetKeySize(aes, &keySize);
08692     if (ret != 0) {
08693         return ret;
08694     }
08695 
08696     /* additional argument checks - STM32 HW only supports 12 byte IV */
08697     if (ivSz != GCM_NONCE_MID_SZ) {
08698         return BAD_FUNC_ARG;
08699     }
08700 
08701     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
08702     XMEMCPY(initialCounter, iv, ivSz);
08703     initialCounter[AES_BLOCK_SIZE - 1] = STM32_GCM_IV_START;
08704 
08705     /* Need to pad the AAD and input cipher text to a full block size since
08706      * CRYP_AES_GCM will assume these are a multiple of AES_BLOCK_SIZE.
08707      * It is okay to pad with zeros because GCM does this before GHASH already.
08708      * See NIST SP 800-38D */
08709 
08710     if ((sz % AES_BLOCK_SIZE) > 0) {
08711         inPadSz = ((sz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
08712         inPadded = XMALLOC(inPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
08713         if (inPadded == NULL) {
08714             return MEMORY_E;
08715         }
08716         XMEMSET(inPadded, 0, inPadSz);
08717         XMEMCPY(inPadded, in, sz);
08718     } else {
08719         inPadSz = sz;
08720         inPadded = (byte*)in;
08721     }
08722 
08723     if ((authInSz % AES_BLOCK_SIZE) > 0) {
08724         authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE;
08725         authInPadded = XMALLOC(authPadSz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
08726         if (authInPadded == NULL) {
08727             if (inPadded != NULL && inPadSz != sz)
08728                 XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
08729             return MEMORY_E;
08730         }
08731         XMEMSET(authInPadded, 0, authPadSz);
08732         XMEMCPY(authInPadded, authIn, authInSz);
08733     } else {
08734         authPadSz = authInSz;
08735         authInPadded = (byte*)authIn;
08736     }
08737 
08738 #ifdef WOLFSSL_STM32_CUBEMX
08739     XMEMSET(&hcryp, 0, sizeof(CRYP_HandleTypeDef));
08740     switch(keySize) {
08741         case 16: /* 128-bit key */
08742             hcryp.Init.KeySize = CRYP_KEYSIZE_128B;
08743             break;
08744 #ifdef CRYP_KEYSIZE_192B
08745         case 24: /* 192-bit key */
08746             hcryp.Init.KeySize = CRYP_KEYSIZE_192B;
08747             break;
08748 #endif
08749         case 32: /* 256-bit key */
08750             hcryp.Init.KeySize = CRYP_KEYSIZE_256B;
08751             break;
08752         default:
08753             break;
08754     }
08755     hcryp.Instance = CRYP;
08756     hcryp.Init.DataType = CRYP_DATATYPE_8B;
08757     hcryp.Init.pKey = (byte*)aes->key;
08758     hcryp.Init.pInitVect = initialCounter;
08759     hcryp.Init.Header = authInPadded;
08760     hcryp.Init.HeaderSize = authInSz;
08761 
08762 #ifdef WOLFSSL_STM32L4
08763     /* Set the CRYP parameters */
08764     hcryp.Init.ChainingMode  = CRYP_CHAINMODE_AES_GCM_GMAC;
08765     hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT;
08766     hcryp.Init.GCMCMACPhase  = CRYP_INIT_PHASE;
08767     HAL_CRYP_Init(&hcryp);
08768 
08769     /* GCM init phase */
08770     status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
08771     if (status == HAL_OK) {
08772         /* GCM header phase */
08773         hcryp.Init.GCMCMACPhase  = CRYP_HEADER_PHASE;
08774         status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT);
08775         if (status == HAL_OK) {
08776             /* GCM payload phase */
08777             hcryp.Init.GCMCMACPhase  = CRYP_PAYLOAD_PHASE;
08778             status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)inPadded, sz, inPadded,
08779                 STM32_HAL_TIMEOUT);
08780             if (status == HAL_OK) {
08781                 /* GCM final phase */
08782                 hcryp.Init.GCMCMACPhase  = CRYP_FINAL_PHASE;
08783                 status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag,
08784                     STM32_HAL_TIMEOUT);
08785             }
08786         }
08787     }
08788 #else
08789     HAL_CRYP_Init(&hcryp);
08790     /* Use inPadded for output buffer instead of
08791     * out so that we don't overflow our size. */
08792     status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)inPadded,
08793                                     sz, inPadded, STM32_HAL_TIMEOUT);
08794     /* Compute the authTag */
08795     if (status == HAL_OK) {
08796         status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT);
08797     }
08798 #endif
08799 
08800     if (status != HAL_OK)
08801         ret = AES_GCM_AUTH_E;
08802 
08803     HAL_CRYP_DeInit(&hcryp);
08804 #else
08805     ByteReverseWords((word32*)keyCopy, (word32*)aes->key, keySize);
08806 
08807     /* Input size and auth size need to be the actual sizes, even though
08808      * they are not block aligned, because this length (in bits) is used
08809      * in the final GHASH. Use inPadded for output buffer instead of
08810      * out so that we don't overflow our size.                         */
08811     status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)initialCounter,
08812                          (uint8_t*)keyCopy,     keySize * 8,
08813                          (uint8_t*)inPadded,    sz,
08814                          (uint8_t*)authInPadded,authInSz,
08815                          (uint8_t*)inPadded,    tag);
08816     if (status != SUCCESS)
08817         ret = AES_GCM_AUTH_E;
08818 #endif /* WOLFSSL_STM32_CUBEMX */
08819 
08820     if (ret == 0 && ConstantCompare(authTag, tag, authTagSz) == 0) {
08821         /* Only keep the decrypted data if authTag success. */
08822         XMEMCPY(out, inPadded, sz);
08823         ret = 0; /* success */
08824     }
08825 
08826     /* only allocate padding buffers if the inputs are not a multiple of block sz */
08827     if (inPadded != NULL && inPadSz != sz)
08828         XFREE(inPadded , aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
08829     if (authInPadded != NULL && authPadSz != authInSz)
08830         XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
08831 
08832     return ret;
08833 }
08834 #else
08835 #ifdef WOLFSSL_AESNI
08836 int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
08837                       const byte* iv, word32 ivSz,
08838                       const byte* authTag, word32 authTagSz,
08839                       const byte* authIn, word32 authInSz);
08840 #else
08841 static
08842 #endif
08843 int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz,
08844                       const byte* iv, word32 ivSz,
08845                       const byte* authTag, word32 authTagSz,
08846                       const byte* authIn, word32 authInSz)
08847 {
08848     int ret = 0;
08849     word32 blocks = sz / AES_BLOCK_SIZE;
08850     word32 partial = sz % AES_BLOCK_SIZE;
08851     const byte* c = in;
08852     byte* p = out;
08853     byte counter[AES_BLOCK_SIZE];
08854     byte initialCounter[AES_BLOCK_SIZE];
08855     byte *ctr;
08856     byte scratch[AES_BLOCK_SIZE];
08857     byte Tprime[AES_BLOCK_SIZE];
08858     byte EKY0[AES_BLOCK_SIZE];
08859     ctr = counter;
08860 
08861     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
08862     if (ivSz == GCM_NONCE_MID_SZ) {
08863         XMEMCPY(initialCounter, iv, ivSz);
08864         initialCounter[AES_BLOCK_SIZE - 1] = 1;
08865     }
08866     else {
08867         GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE);
08868     }
08869     XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE);
08870 
08871     /* Calc the authTag again using the received auth data and the cipher text */
08872     GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
08873     wc_AesEncrypt(aes, ctr, EKY0);
08874     xorbuf(Tprime, EKY0, sizeof(Tprime));
08875 
08876     if (ConstantCompare(authTag, Tprime, authTagSz) != 0) {
08877         return AES_GCM_AUTH_E;
08878     }
08879 
08880 #ifdef WOLFSSL_PIC32MZ_CRYPT
08881     if (blocks) {
08882         /* use intitial IV for PIC32 HW, but don't use it below */
08883         XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE);
08884 
08885         ret = wc_Pic32AesCrypt(
08886             aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE,
08887             out, in, (blocks * AES_BLOCK_SIZE),
08888             PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM);
08889         if (ret != 0)
08890             return ret;
08891     }
08892     /* process remainder using partial handling */
08893 #endif
08894 
08895 #if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT)
08896     /* some hardware acceleration can gain performance from doing AES encryption
08897      * of the whole buffer at once */
08898     if (c != p) { /* can not handle inline decryption */
08899         while (blocks--) {
08900             IncrementGcmCounter(ctr);
08901             XMEMCPY(p, ctr, AES_BLOCK_SIZE);
08902             p += AES_BLOCK_SIZE;
08903         }
08904 
08905         /* reset number of blocks and then do encryption */
08906         blocks = sz / AES_BLOCK_SIZE;
08907         wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks);
08908         xorbuf(out, c, AES_BLOCK_SIZE * blocks);
08909         c += AES_BLOCK_SIZE * blocks;
08910     }
08911     else
08912 #endif /* HAVE_AES_ECB */
08913     while (blocks--) {
08914         IncrementGcmCounter(ctr);
08915     #ifndef WOLFSSL_PIC32MZ_CRYPT
08916         wc_AesEncrypt(aes, ctr, scratch);
08917         xorbuf(scratch, c, AES_BLOCK_SIZE);
08918         XMEMCPY(p, scratch, AES_BLOCK_SIZE);
08919     #endif
08920         p += AES_BLOCK_SIZE;
08921         c += AES_BLOCK_SIZE;
08922     }
08923 
08924     if (partial != 0) {
08925         IncrementGcmCounter(ctr);
08926         wc_AesEncrypt(aes, ctr, scratch);
08927         xorbuf(scratch, c, partial);
08928         XMEMCPY(p, scratch, partial);
08929     }
08930 
08931     return ret;
08932 }
08933 
08934 int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
08935                      const byte* iv, word32 ivSz,
08936                      const byte* authTag, word32 authTagSz,
08937                      const byte* authIn, word32 authInSz)
08938 {
08939 #ifdef WOLFSSL_AESNI
08940     int res;
08941 #endif
08942 
08943     /* argument checks */
08944     /* If the sz is non-zero, both in and out must be set. If sz is 0,
08945      * in and out are don't cares, as this is is the GMAC case. */
08946     if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
08947         authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0) {
08948 
08949         return BAD_FUNC_ARG;
08950     }
08951 
08952 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
08953     /* if async and byte count above threshold */
08954     /* only 12-byte IV is supported in HW */
08955     if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
08956                     sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) {
08957     #if defined(HAVE_CAVIUM)
08958         #ifdef HAVE_CAVIUM_V
08959         if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */
08960             return NitroxAesGcmDecrypt(aes, out, in, sz,
08961                 (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
08962                 authTag, authTagSz, authIn, authInSz);
08963         }
08964         #endif
08965     #elif defined(HAVE_INTEL_QA)
08966         return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz,
08967             (const byte*)aes->asyncKey, aes->keylen, iv, ivSz,
08968             authTag, authTagSz, authIn, authInSz);
08969     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
08970         if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_DECRYPT)) {
08971             WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
08972             testDev->aes.aes = aes;
08973             testDev->aes.out = out;
08974             testDev->aes.in = in;
08975             testDev->aes.sz = sz;
08976             testDev->aes.iv = iv;
08977             testDev->aes.ivSz = ivSz;
08978             testDev->aes.authTag = (byte*)authTag;
08979             testDev->aes.authTagSz = authTagSz;
08980             testDev->aes.authIn = authIn;
08981             testDev->aes.authInSz = authInSz;
08982             return WC_PENDING_E;
08983         }
08984     #endif
08985     }
08986 #endif /* WOLFSSL_ASYNC_CRYPT */
08987 
08988     /* software AES GCM */
08989 
08990 #ifdef WOLFSSL_AESNI
08991     #ifdef HAVE_INTEL_AVX2
08992     if (IS_INTEL_AVX2(intel_flags)) {
08993         AES_GCM_decrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
08994                                  authTagSz, (byte*)aes->key, aes->rounds, &res);
08995         if (res == 0)
08996             return AES_GCM_AUTH_E;
08997         return 0;
08998     }
08999     else
09000     #endif
09001     #ifdef HAVE_INTEL_AVX1
09002     if (IS_INTEL_AVX1(intel_flags)) {
09003         AES_GCM_decrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
09004                                  authTagSz, (byte*)aes->key, aes->rounds, &res);
09005         if (res == 0)
09006             return AES_GCM_AUTH_E;
09007         return 0;
09008     }
09009     else
09010     #endif
09011     if (haveAESNI) {
09012         AES_GCM_decrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz,
09013                                  authTagSz, (byte*)aes->key, aes->rounds, &res);
09014         if (res == 0)
09015             return AES_GCM_AUTH_E;
09016         return 0;
09017     }
09018     else
09019 #endif
09020     {
09021         return AES_GCM_decrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
09022                                                               authIn, authInSz);
09023     }
09024 }
09025 #endif
09026 #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */
09027 #endif /* (WOLFSSL_XILINX_CRYPT) */
09028 #endif /* end of block for AESGCM implementation selection */
09029 
09030 
09031 /* Common to all, abstract functions that build off of lower level AESGCM
09032  * functions */
09033 #ifndef WC_NO_RNG
09034 
09035 int wc_AesGcmSetExtIV(Aes* aes, const byte* iv, word32 ivSz)
09036 {
09037     int ret = 0;
09038 
09039     if (aes == NULL || iv == NULL ||
09040         (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ &&
09041          ivSz != GCM_NONCE_MAX_SZ)) {
09042 
09043         ret = BAD_FUNC_ARG;
09044     }
09045 
09046     if (ret == 0) {
09047         XMEMCPY((byte*)aes->reg, iv, ivSz);
09048 
09049         /* If the IV is 96, allow for a 2^64 invocation counter.
09050          * For any other size for the nonce, limit the invocation
09051          * counter to 32-bits. (SP 800-38D 8.3) */
09052         aes->invokeCtr[0] = 0;
09053         aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF;
09054         aes->nonceSz = ivSz;
09055     }
09056 
09057     return ret;
09058 }
09059 
09060 
09061 int wc_AesGcmSetIV(Aes* aes, word32 ivSz,
09062                    const byte* ivFixed, word32 ivFixedSz,
09063                    WC_RNG* rng)
09064 {
09065     int ret = 0;
09066 
09067     if (aes == NULL || rng == NULL ||
09068         (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ &&
09069          ivSz != GCM_NONCE_MAX_SZ) ||
09070         (ivFixed == NULL && ivFixedSz != 0) ||
09071         (ivFixed != NULL && ivFixedSz != AES_IV_FIXED_SZ)) {
09072 
09073         ret = BAD_FUNC_ARG;
09074     }
09075 
09076     if (ret == 0) {
09077         byte* iv = (byte*)aes->reg;
09078 
09079         if (ivFixedSz)
09080             XMEMCPY(iv, ivFixed, ivFixedSz);
09081 
09082         ret = wc_RNG_GenerateBlock(rng, iv + ivFixedSz, ivSz - ivFixedSz);
09083     }
09084 
09085     if (ret == 0) {
09086         /* If the IV is 96, allow for a 2^64 invocation counter.
09087          * For any other size for the nonce, limit the invocation
09088          * counter to 32-bits. (SP 800-38D 8.3) */
09089         aes->invokeCtr[0] = 0;
09090         aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF;
09091         aes->nonceSz = ivSz;
09092     }
09093 
09094     return ret;
09095 }
09096 
09097 
09098 int wc_AesGcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz,
09099                         byte* ivOut, word32 ivOutSz,
09100                         byte* authTag, word32 authTagSz,
09101                         const byte* authIn, word32 authInSz)
09102 {
09103     int ret = 0;
09104 
09105     if (aes == NULL || (sz != 0 && (in == NULL || out == NULL)) ||
09106         ivOut == NULL || ivOutSz != aes->nonceSz ||
09107         (authIn == NULL && authInSz != 0)) {
09108 
09109         ret = BAD_FUNC_ARG;
09110     }
09111 
09112     if (ret == 0) {
09113         aes->invokeCtr[0]++;
09114         if (aes->invokeCtr[0] == 0) {
09115             aes->invokeCtr[1]++;
09116             if (aes->invokeCtr[1] == 0)
09117                 ret = AES_GCM_OVERFLOW_E;
09118         }
09119     }
09120 
09121     if (ret == 0) {
09122         XMEMCPY(ivOut, aes->reg, ivOutSz);
09123         ret = wc_AesGcmEncrypt(aes, out, in, sz,
09124                                (byte*)aes->reg, ivOutSz,
09125                                authTag, authTagSz,
09126                                authIn, authInSz);
09127         IncCtr((byte*)aes->reg, ivOutSz);
09128     }
09129 
09130     return ret;
09131 }
09132 
09133 int wc_Gmac(const byte* key, word32 keySz, byte* iv, word32 ivSz,
09134             const byte* authIn, word32 authInSz,
09135             byte* authTag, word32 authTagSz, WC_RNG* rng)
09136 {
09137     Aes aes;
09138     int ret = 0;
09139 
09140     if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) ||
09141         authTag == NULL || authTagSz == 0 || rng == NULL) {
09142 
09143         ret = BAD_FUNC_ARG;
09144     }
09145 
09146     if (ret == 0)
09147         ret = wc_AesGcmSetKey(&aes, key, keySz);
09148     if (ret == 0)
09149         ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng);
09150     if (ret == 0)
09151         ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz,
09152                                   authTag, authTagSz, authIn, authInSz);
09153     ForceZero(&aes, sizeof(aes));
09154 
09155     return ret;
09156 }
09157 
09158 int wc_GmacVerify(const byte* key, word32 keySz,
09159                   const byte* iv, word32 ivSz,
09160                   const byte* authIn, word32 authInSz,
09161                   const byte* authTag, word32 authTagSz)
09162 {
09163     Aes aes;
09164     int ret = 0;
09165 
09166     if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) ||
09167         authTag == NULL || authTagSz == 0 || authTagSz > AES_BLOCK_SIZE) {
09168 
09169         ret = BAD_FUNC_ARG;
09170     }
09171 
09172     if (ret == 0)
09173         ret = wc_AesGcmSetKey(&aes, key, keySz);
09174     if (ret == 0)
09175         ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz,
09176                                   authTag, authTagSz, authIn, authInSz);
09177     ForceZero(&aes, sizeof(aes));
09178 
09179     return ret;
09180 }
09181 
09182 #endif /* WC_NO_RNG */
09183 
09184 
09185 WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
09186 {
09187     if (gmac == NULL || key == NULL) {
09188         return BAD_FUNC_ARG;
09189     }
09190     return wc_AesGcmSetKey(&gmac->aes, key, len);
09191 }
09192 
09193 
09194 WOLFSSL_API int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
09195                               const byte* authIn, word32 authInSz,
09196                               byte* authTag, word32 authTagSz)
09197 {
09198     return wc_AesGcmEncrypt(&gmac->aes, NULL, NULL, 0, iv, ivSz,
09199                                          authTag, authTagSz, authIn, authInSz);
09200 }
09201 
09202 #endif /* HAVE_AESGCM */
09203 
09204 
09205 #ifdef HAVE_AESCCM
09206 
09207 int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
09208 {
09209     if (!((keySz == 16) || (keySz == 24) || (keySz == 32)))
09210         return BAD_FUNC_ARG;
09211 
09212     return wc_AesSetKey(aes, key, keySz, NULL, AES_ENCRYPTION);
09213 }
09214 
09215 #ifdef WOLFSSL_ARMASM
09216     /* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */
09217 
09218 #elif defined(HAVE_COLDFIRE_SEC)
09219     #error "Coldfire SEC doesn't currently support AES-CCM mode"
09220 
09221 #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
09222     /* implemented in wolfcrypt/src/port/caam_aes.c */
09223 
09224 #elif defined(FREESCALE_LTC)
09225 
09226 /* return 0 on success */
09227 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
09228                    const byte* nonce, word32 nonceSz,
09229                    byte* authTag, word32 authTagSz,
09230                    const byte* authIn, word32 authInSz)
09231 {
09232     byte *key;
09233     uint32_t keySize;
09234     status_t status;
09235 
09236     /* sanity check on arguments */
09237     if (aes == NULL || out == NULL || in == NULL || nonce == NULL
09238             || authTag == NULL || nonceSz < 7 || nonceSz > 13)
09239         return BAD_FUNC_ARG;
09240 
09241     key = (byte*)aes->key;
09242 
09243     status = wc_AesGetKeySize(aes, &keySize);
09244     if (status != 0) {
09245         return status;
09246     }
09247 
09248     status = LTC_AES_EncryptTagCcm(LTC_BASE, in, out, inSz,
09249         nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz);
09250 
09251     return (kStatus_Success == status) ? 0 : BAD_FUNC_ARG;
09252 }
09253 
09254 #ifdef HAVE_AES_DECRYPT
09255 int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
09256                    const byte* nonce, word32 nonceSz,
09257                    const byte* authTag, word32 authTagSz,
09258                    const byte* authIn, word32 authInSz)
09259 {
09260     byte *key;
09261     uint32_t keySize;
09262     status_t status;
09263 
09264     /* sanity check on arguments */
09265     if (aes == NULL || out == NULL || in == NULL || nonce == NULL
09266             || authTag == NULL || nonceSz < 7 || nonceSz > 13)
09267         return BAD_FUNC_ARG;
09268 
09269     key = (byte*)aes->key;
09270 
09271     status = wc_AesGetKeySize(aes, &keySize);
09272     if (status != 0) {
09273         return status;
09274     }
09275 
09276     status = LTC_AES_DecryptTagCcm(LTC_BASE, in, out, inSz,
09277         nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz);
09278 
09279     if (status == kStatus_Success) {
09280         return 0;
09281     }
09282     else {
09283         XMEMSET(out, 0, inSz);
09284         return AES_CCM_AUTH_E;
09285     }
09286 }
09287 #endif /* HAVE_AES_DECRYPT */
09288 
09289 
09290 /* software AES CCM */
09291 #else
09292 
09293 static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out)
09294 {
09295     /* process the bulk of the data */
09296     while (inSz >= AES_BLOCK_SIZE) {
09297         xorbuf(out, in, AES_BLOCK_SIZE);
09298         in += AES_BLOCK_SIZE;
09299         inSz -= AES_BLOCK_SIZE;
09300 
09301         wc_AesEncrypt(aes, out, out);
09302     }
09303 
09304     /* process remainder of the data */
09305     if (inSz > 0) {
09306         xorbuf(out, in, inSz);
09307         wc_AesEncrypt(aes, out, out);
09308     }
09309 }
09310 
09311 static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out)
09312 {
09313     word32 authLenSz;
09314     word32 remainder;
09315 
09316     /* encode the length in */
09317     if (inSz <= 0xFEFF) {
09318         authLenSz = 2;
09319         out[0] ^= ((inSz & 0xFF00) >> 8);
09320         out[1] ^=  (inSz & 0x00FF);
09321     }
09322     else if (inSz <= 0xFFFFFFFF) {
09323         authLenSz = 6;
09324         out[0] ^= 0xFF; out[1] ^= 0xFE;
09325         out[2] ^= ((inSz & 0xFF000000) >> 24);
09326         out[3] ^= ((inSz & 0x00FF0000) >> 16);
09327         out[4] ^= ((inSz & 0x0000FF00) >>  8);
09328         out[5] ^=  (inSz & 0x000000FF);
09329     }
09330     /* Note, the protocol handles auth data up to 2^64, but we are
09331      * using 32-bit sizes right now, so the bigger data isn't handled
09332      * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */
09333     else
09334         return;
09335 
09336     /* start fill out the rest of the first block */
09337     remainder = AES_BLOCK_SIZE - authLenSz;
09338     if (inSz >= remainder) {
09339         /* plenty of bulk data to fill the remainder of this block */
09340         xorbuf(out + authLenSz, in, remainder);
09341         inSz -= remainder;
09342         in += remainder;
09343     }
09344     else {
09345         /* not enough bulk data, copy what is available, and pad zero */
09346         xorbuf(out + authLenSz, in, inSz);
09347         inSz = 0;
09348     }
09349     wc_AesEncrypt(aes, out, out);
09350 
09351     if (inSz > 0)
09352         roll_x(aes, in, inSz, out);
09353 }
09354 
09355 
09356 static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz)
09357 {
09358     word32 i;
09359 
09360     for (i = 0; i < lenSz; i++) {
09361         if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return;
09362     }
09363 }
09364 
09365 /* return 0 on success */
09366 int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
09367                    const byte* nonce, word32 nonceSz,
09368                    byte* authTag, word32 authTagSz,
09369                    const byte* authIn, word32 authInSz)
09370 {
09371     byte A[AES_BLOCK_SIZE];
09372     byte B[AES_BLOCK_SIZE];
09373     byte lenSz;
09374     word32 i;
09375     byte mask = 0xFF;
09376     const word32 wordSz = (word32)sizeof(word32);
09377 
09378     /* sanity check on arguments */
09379     if (aes == NULL || out == NULL || in == NULL || nonce == NULL
09380             || authTag == NULL || nonceSz < 7 || nonceSz > 13 ||
09381             authTagSz > AES_BLOCK_SIZE)
09382         return BAD_FUNC_ARG;
09383 
09384     XMEMCPY(B+1, nonce, nonceSz);
09385     lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
09386     B[0] = (authInSz > 0 ? 64 : 0)
09387          + (8 * (((byte)authTagSz - 2) / 2))
09388          + (lenSz - 1);
09389     for (i = 0; i < lenSz; i++) {
09390         if (mask && i >= wordSz)
09391             mask = 0x00;
09392         B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
09393     }
09394 
09395     wc_AesEncrypt(aes, B, A);
09396 
09397     if (authInSz > 0)
09398         roll_auth(aes, authIn, authInSz, A);
09399     if (inSz > 0)
09400         roll_x(aes, in, inSz, A);
09401     XMEMCPY(authTag, A, authTagSz);
09402 
09403     B[0] = lenSz - 1;
09404     for (i = 0; i < lenSz; i++)
09405         B[AES_BLOCK_SIZE - 1 - i] = 0;
09406     wc_AesEncrypt(aes, B, A);
09407     xorbuf(authTag, A, authTagSz);
09408 
09409     B[15] = 1;
09410     while (inSz >= AES_BLOCK_SIZE) {
09411         wc_AesEncrypt(aes, B, A);
09412         xorbuf(A, in, AES_BLOCK_SIZE);
09413         XMEMCPY(out, A, AES_BLOCK_SIZE);
09414 
09415         AesCcmCtrInc(B, lenSz);
09416         inSz -= AES_BLOCK_SIZE;
09417         in += AES_BLOCK_SIZE;
09418         out += AES_BLOCK_SIZE;
09419     }
09420     if (inSz > 0) {
09421         wc_AesEncrypt(aes, B, A);
09422         xorbuf(A, in, inSz);
09423         XMEMCPY(out, A, inSz);
09424     }
09425 
09426     ForceZero(A, AES_BLOCK_SIZE);
09427     ForceZero(B, AES_BLOCK_SIZE);
09428 
09429     return 0;
09430 }
09431 
09432 #ifdef HAVE_AES_DECRYPT
09433 int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
09434                    const byte* nonce, word32 nonceSz,
09435                    const byte* authTag, word32 authTagSz,
09436                    const byte* authIn, word32 authInSz)
09437 {
09438     byte A[AES_BLOCK_SIZE];
09439     byte B[AES_BLOCK_SIZE];
09440     byte* o;
09441     byte lenSz;
09442     word32 i, oSz;
09443     int result = 0;
09444     byte mask = 0xFF;
09445     const word32 wordSz = (word32)sizeof(word32);
09446 
09447     /* sanity check on arguments */
09448     if (aes == NULL || out == NULL || in == NULL || nonce == NULL
09449             || authTag == NULL || nonceSz < 7 || nonceSz > 13 ||
09450             authTagSz > AES_BLOCK_SIZE)
09451         return BAD_FUNC_ARG;
09452 
09453     o = out;
09454     oSz = inSz;
09455     XMEMCPY(B+1, nonce, nonceSz);
09456     lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
09457 
09458     B[0] = lenSz - 1;
09459     for (i = 0; i < lenSz; i++)
09460         B[AES_BLOCK_SIZE - 1 - i] = 0;
09461     B[15] = 1;
09462 
09463     while (oSz >= AES_BLOCK_SIZE) {
09464         wc_AesEncrypt(aes, B, A);
09465         xorbuf(A, in, AES_BLOCK_SIZE);
09466         XMEMCPY(o, A, AES_BLOCK_SIZE);
09467 
09468         AesCcmCtrInc(B, lenSz);
09469         oSz -= AES_BLOCK_SIZE;
09470         in += AES_BLOCK_SIZE;
09471         o += AES_BLOCK_SIZE;
09472     }
09473     if (inSz > 0) {
09474         wc_AesEncrypt(aes, B, A);
09475         xorbuf(A, in, oSz);
09476         XMEMCPY(o, A, oSz);
09477     }
09478 
09479     for (i = 0; i < lenSz; i++)
09480         B[AES_BLOCK_SIZE - 1 - i] = 0;
09481     wc_AesEncrypt(aes, B, A);
09482 
09483     o = out;
09484     oSz = inSz;
09485 
09486     B[0] = (authInSz > 0 ? 64 : 0)
09487          + (8 * (((byte)authTagSz - 2) / 2))
09488          + (lenSz - 1);
09489     for (i = 0; i < lenSz; i++) {
09490         if (mask && i >= wordSz)
09491             mask = 0x00;
09492         B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask;
09493     }
09494 
09495     wc_AesEncrypt(aes, B, A);
09496 
09497     if (authInSz > 0)
09498         roll_auth(aes, authIn, authInSz, A);
09499     if (inSz > 0)
09500         roll_x(aes, o, oSz, A);
09501 
09502     B[0] = lenSz - 1;
09503     for (i = 0; i < lenSz; i++)
09504         B[AES_BLOCK_SIZE - 1 - i] = 0;
09505     wc_AesEncrypt(aes, B, B);
09506     xorbuf(A, B, authTagSz);
09507 
09508     if (ConstantCompare(A, authTag, authTagSz) != 0) {
09509         /* If the authTag check fails, don't keep the decrypted data.
09510          * Unfortunately, you need the decrypted data to calculate the
09511          * check value. */
09512         XMEMSET(out, 0, inSz);
09513         result = AES_CCM_AUTH_E;
09514     }
09515 
09516     ForceZero(A, AES_BLOCK_SIZE);
09517     ForceZero(B, AES_BLOCK_SIZE);
09518     o = NULL;
09519 
09520     return result;
09521 }
09522 
09523 #endif /* HAVE_AES_DECRYPT */
09524 #endif /* software AES CCM */
09525 
09526 /* abstract functions that call lower level AESCCM functions */
09527 #ifndef WC_NO_RNG
09528 
09529 int wc_AesCcmSetNonce(Aes* aes, const byte* nonce, word32 nonceSz)
09530 {
09531     int ret = 0;
09532 
09533     if (aes == NULL || nonce == NULL ||
09534         nonceSz < CCM_NONCE_MIN_SZ || nonceSz > CCM_NONCE_MAX_SZ) {
09535 
09536         ret = BAD_FUNC_ARG;
09537     }
09538 
09539     if (ret == 0) {
09540         XMEMCPY(aes->reg, nonce, nonceSz);
09541         aes->nonceSz = nonceSz;
09542 
09543         /* Invocation counter should be 2^61 */
09544         aes->invokeCtr[0] = 0;
09545         aes->invokeCtr[1] = 0xE0000000;
09546     }
09547 
09548     return ret;
09549 }
09550 
09551 
09552 int wc_AesCcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz,
09553                         byte* ivOut, word32 ivOutSz,
09554                         byte* authTag, word32 authTagSz,
09555                         const byte* authIn, word32 authInSz)
09556 {
09557     int ret = 0;
09558 
09559     if (aes == NULL || out == NULL ||
09560         (in == NULL && sz != 0) ||
09561         ivOut == NULL ||
09562         (authIn == NULL && authInSz != 0) ||
09563         (ivOutSz != aes->nonceSz)) {
09564 
09565         ret = BAD_FUNC_ARG;
09566     }
09567 
09568     if (ret == 0) {
09569         aes->invokeCtr[0]++;
09570         if (aes->invokeCtr[0] == 0) {
09571             aes->invokeCtr[1]++;
09572             if (aes->invokeCtr[1] == 0)
09573                 ret = AES_CCM_OVERFLOW_E;
09574         }
09575     }
09576 
09577     if (ret == 0) {
09578         ret = wc_AesCcmEncrypt(aes, out, in, sz,
09579                                (byte*)aes->reg, aes->nonceSz,
09580                                authTag, authTagSz,
09581                                authIn, authInSz);
09582         XMEMCPY(ivOut, aes->reg, aes->nonceSz);
09583         IncCtr((byte*)aes->reg, aes->nonceSz);
09584     }
09585 
09586     return ret;
09587 }
09588 
09589 #endif /* WC_NO_RNG */
09590 
09591 #endif /* HAVE_AESCCM */
09592 
09593 
09594 /* Initialize Aes for use with async hardware */
09595 int wc_AesInit(Aes* aes, void* heap, int devId)
09596 {
09597     int ret = 0;
09598 
09599     if (aes == NULL)
09600         return BAD_FUNC_ARG;
09601 
09602     aes->heap = heap;
09603 
09604 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
09605     ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES,
09606                                                         aes->heap, devId);
09607 #else
09608     (void)devId;
09609 #endif /* WOLFSSL_ASYNC_CRYPT */
09610 
09611     return ret;
09612 }
09613 
09614 /* Free Aes from use with async hardware */
09615 void wc_AesFree(Aes* aes)
09616 {
09617     if (aes == NULL)
09618         return;
09619 
09620 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
09621     wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES);
09622 #endif /* WOLFSSL_ASYNC_CRYPT */
09623 }
09624 
09625 
09626 int wc_AesGetKeySize(Aes* aes, word32* keySize)
09627 {
09628     int ret = 0;
09629 
09630     if (aes == NULL || keySize == NULL) {
09631         return BAD_FUNC_ARG;
09632     }
09633 
09634     switch (aes->rounds) {
09635     #ifdef WOLFSSL_AES_128
09636     case 10:
09637         *keySize = 16;
09638         break;
09639     #endif
09640     #ifdef WOLFSSL_AES_192
09641     case 12:
09642         *keySize = 24;
09643         break;
09644     #endif
09645     #ifdef WOLFSSL_AES_256
09646     case 14:
09647         *keySize = 32;
09648         break;
09649     #endif
09650     default:
09651         *keySize = 0;
09652         ret = BAD_FUNC_ARG;
09653     }
09654 
09655     return ret;
09656 }
09657 
09658 #endif /* !WOLFSSL_TI_CRYPT */
09659 
09660 #ifdef HAVE_AES_ECB
09661 #if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)
09662     /* implemented in wolfcrypt/src/port/caam/caam_aes.c */
09663 #else
09664 
09665 /* software implementation */
09666 int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
09667 {
09668     word32 blocks = sz / AES_BLOCK_SIZE;
09669 
09670     if ((in == NULL) || (out == NULL) || (aes == NULL))
09671       return BAD_FUNC_ARG;
09672     while (blocks>0) {
09673       wc_AesEncryptDirect(aes, out, in);
09674       out += AES_BLOCK_SIZE;
09675       in  += AES_BLOCK_SIZE;
09676       sz  -= AES_BLOCK_SIZE;
09677       blocks--;
09678     }
09679     return 0;
09680 }
09681 
09682 
09683 int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
09684 {
09685     word32 blocks = sz / AES_BLOCK_SIZE;
09686 
09687     if ((in == NULL) || (out == NULL) || (aes == NULL))
09688       return BAD_FUNC_ARG;
09689     while (blocks>0) {
09690       wc_AesDecryptDirect(aes, out, in);
09691       out += AES_BLOCK_SIZE;
09692       in  += AES_BLOCK_SIZE;
09693       sz  -= AES_BLOCK_SIZE;
09694       blocks--;
09695     }
09696     return 0;
09697 }
09698 #endif
09699 #endif /* HAVE_AES_ECB */
09700 
09701 #ifdef WOLFSSL_AES_CFB
09702 /* CFB 128
09703  *
09704  * aes structure holding key to use for encryption
09705  * out buffer to hold result of encryption (must be at least as large as input
09706  *     buffer)
09707  * in  buffer to encrypt
09708  * sz  size of input buffer
09709  *
09710  * returns 0 on success and negative error values on failure
09711  */
09712 int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
09713 {
09714     byte*  tmp = NULL;
09715     byte*  reg = NULL;
09716 
09717     WOLFSSL_ENTER("wc_AesCfbEncrypt");
09718 
09719     if (aes == NULL || out == NULL || in == NULL) {
09720         return BAD_FUNC_ARG;
09721     }
09722 
09723     if (aes->left && sz) {
09724         reg = (byte*)aes->reg + AES_BLOCK_SIZE - aes->left;
09725     }
09726 
09727     /* consume any unused bytes left in aes->tmp */
09728     tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
09729     while (aes->left && sz) {
09730         *(out++) = *(reg++) = *(in++) ^ *(tmp++);
09731         aes->left--;
09732         sz--;
09733     }
09734 
09735     while (sz >= AES_BLOCK_SIZE) {
09736         wc_AesEncryptDirect(aes, out, (byte*)aes->reg);
09737         xorbuf(out, in, AES_BLOCK_SIZE);
09738         XMEMCPY(aes->reg, out, AES_BLOCK_SIZE);
09739         out += AES_BLOCK_SIZE;
09740         in  += AES_BLOCK_SIZE;
09741         sz  -= AES_BLOCK_SIZE;
09742         aes->left = 0;
09743     }
09744 
09745     /* encrypt left over data */
09746     if (sz) {
09747         wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
09748         aes->left = AES_BLOCK_SIZE;
09749         tmp = (byte*)aes->tmp;
09750         reg = (byte*)aes->reg;
09751 
09752         while (sz--) {
09753             *(out++) = *(reg++) = *(in++) ^ *(tmp++);
09754             aes->left--;
09755         }
09756     }
09757 
09758     return 0;
09759 }
09760 
09761 
09762 #ifdef HAVE_AES_DECRYPT
09763 /* CFB 128
09764  *
09765  * aes structure holding key to use for decryption
09766  * out buffer to hold result of decryption (must be at least as large as input
09767  *     buffer)
09768  * in  buffer to decrypt
09769  * sz  size of input buffer
09770  *
09771  * returns 0 on success and negative error values on failure
09772  */
09773 int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
09774 {
09775     byte*  tmp;
09776 
09777     WOLFSSL_ENTER("wc_AesCfbDecrypt");
09778 
09779     if (aes == NULL || out == NULL || in == NULL) {
09780         return BAD_FUNC_ARG;
09781     }
09782 
09783     /* check if more input needs copied over to aes->reg */
09784     if (aes->left && sz) {
09785         int size = min(aes->left, sz);
09786         XMEMCPY((byte*)aes->reg + AES_BLOCK_SIZE - aes->left, in, size);
09787     }
09788 
09789     /* consume any unused bytes left in aes->tmp */
09790     tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
09791     while (aes->left && sz) {
09792         *(out++) = *(in++) ^ *(tmp++);
09793         aes->left--;
09794         sz--;
09795     }
09796 
09797     while (sz > AES_BLOCK_SIZE) {
09798         wc_AesEncryptDirect(aes, out, (byte*)aes->reg);
09799         xorbuf(out, in, AES_BLOCK_SIZE);
09800         XMEMCPY(aes->reg, in, AES_BLOCK_SIZE);
09801         out += AES_BLOCK_SIZE;
09802         in  += AES_BLOCK_SIZE;
09803         sz  -= AES_BLOCK_SIZE;
09804         aes->left = 0;
09805     }
09806 
09807     /* decrypt left over data */
09808     if (sz) {
09809         wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg);
09810         XMEMCPY(aes->reg, in, sz);
09811         aes->left = AES_BLOCK_SIZE;
09812         tmp = (byte*)aes->tmp;
09813 
09814         while (sz--) {
09815             *(out++) = *(in++) ^ *(tmp++);
09816             aes->left--;
09817         }
09818     }
09819 
09820     return 0;
09821 }
09822 #endif /* HAVE_AES_DECRYPT */
09823 #endif /* WOLFSSL_AES_CFB */
09824 
09825 
09826 #ifdef HAVE_AES_KEYWRAP
09827 
09828 /* Initialize key wrap counter with value */
09829 static WC_INLINE void InitKeyWrapCounter(byte* inOutCtr, word32 value)
09830 {
09831     int i;
09832     word32 bytes;
09833 
09834     bytes = sizeof(word32);
09835     for (i = 0; i < (int)sizeof(word32); i++) {
09836         inOutCtr[i+sizeof(word32)] = (value >> ((bytes - 1) * 8)) & 0xFF;
09837         bytes--;
09838     }
09839 }
09840 
09841 /* Increment key wrap counter */
09842 static WC_INLINE void IncrementKeyWrapCounter(byte* inOutCtr)
09843 {
09844     int i;
09845 
09846     /* in network byte order so start at end and work back */
09847     for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) {
09848         if (++inOutCtr[i])  /* we're done unless we overflow */
09849             return;
09850     }
09851 }
09852 
09853 /* Decrement key wrap counter */
09854 static WC_INLINE void DecrementKeyWrapCounter(byte* inOutCtr)
09855 {
09856     int i;
09857 
09858     for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) {
09859         if (--inOutCtr[i] != 0xFF)  /* we're done unless we underflow */
09860             return;
09861     }
09862 }
09863 
09864 /* perform AES key wrap (RFC3394), return out sz on success, negative on err */
09865 int wc_AesKeyWrap(const byte* key, word32 keySz, const byte* in, word32 inSz,
09866                   byte* out, word32 outSz, const byte* iv)
09867 {
09868     Aes aes;
09869     byte* r;
09870     word32 i;
09871     int ret, j;
09872 
09873     byte t[KEYWRAP_BLOCK_SIZE];
09874     byte tmp[AES_BLOCK_SIZE];
09875 
09876     /* n must be at least 2, output size is n + 8 bytes */
09877     if (key == NULL || in  == NULL || inSz < 2 ||
09878         out == NULL || outSz < (inSz + KEYWRAP_BLOCK_SIZE))
09879         return BAD_FUNC_ARG;
09880 
09881     /* input must be multiple of 64-bits */
09882     if (inSz % KEYWRAP_BLOCK_SIZE != 0)
09883         return BAD_FUNC_ARG;
09884 
09885     /* user IV is optional */
09886     if (iv == NULL) {
09887         XMEMSET(tmp, 0xA6, KEYWRAP_BLOCK_SIZE);
09888     } else {
09889         XMEMCPY(tmp, iv, KEYWRAP_BLOCK_SIZE);
09890     }
09891 
09892     r = out + 8;
09893     XMEMCPY(r, in, inSz);
09894     XMEMSET(t, 0, sizeof(t));
09895 
09896     ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
09897     if (ret != 0)
09898         return ret;
09899 
09900     ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_ENCRYPTION);
09901     if (ret != 0)
09902         return ret;
09903 
09904     for (j = 0; j <= 5; j++) {
09905         for (i = 1; i <= inSz / KEYWRAP_BLOCK_SIZE; i++) {
09906 
09907             /* load R[i] */
09908             XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE);
09909 
09910             wc_AesEncryptDirect(&aes, tmp, tmp);
09911 
09912             /* calculate new A */
09913             IncrementKeyWrapCounter(t);
09914             xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE);
09915 
09916             /* save R[i] */
09917             XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE);
09918             r += KEYWRAP_BLOCK_SIZE;
09919         }
09920         r = out + KEYWRAP_BLOCK_SIZE;
09921     }
09922 
09923     /* C[0] = A */
09924     XMEMCPY(out, tmp, KEYWRAP_BLOCK_SIZE);
09925 
09926     wc_AesFree(&aes);
09927 
09928     return inSz + KEYWRAP_BLOCK_SIZE;
09929 }
09930 
09931 int wc_AesKeyUnWrap(const byte* key, word32 keySz, const byte* in, word32 inSz,
09932                     byte* out, word32 outSz, const byte* iv)
09933 {
09934     Aes aes;
09935     byte* r;
09936     word32 i, n;
09937     int ret, j;
09938 
09939     byte t[KEYWRAP_BLOCK_SIZE];
09940     byte tmp[AES_BLOCK_SIZE];
09941 
09942     const byte* expIv;
09943     const byte defaultIV[] = {
09944         0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6
09945     };
09946 
09947     (void)iv;
09948 
09949     if (key == NULL || in == NULL || inSz < 3 ||
09950         out == NULL || outSz < (inSz - KEYWRAP_BLOCK_SIZE))
09951         return BAD_FUNC_ARG;
09952 
09953     /* input must be multiple of 64-bits */
09954     if (inSz % KEYWRAP_BLOCK_SIZE != 0)
09955         return BAD_FUNC_ARG;
09956 
09957     /* user IV optional */
09958     if (iv != NULL) {
09959         expIv = iv;
09960     } else {
09961         expIv = defaultIV;
09962     }
09963 
09964     /* A = C[0], R[i] = C[i] */
09965     XMEMCPY(tmp, in, KEYWRAP_BLOCK_SIZE);
09966     XMEMCPY(out, in + KEYWRAP_BLOCK_SIZE, inSz - KEYWRAP_BLOCK_SIZE);
09967     XMEMSET(t, 0, sizeof(t));
09968 
09969     ret = wc_AesInit(&aes, NULL, INVALID_DEVID);
09970     if (ret != 0)
09971         return ret;
09972 
09973     ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_DECRYPTION);
09974     if (ret != 0)
09975         return ret;
09976 
09977     /* initialize counter to 6n */
09978     n = (inSz - 1) / KEYWRAP_BLOCK_SIZE;
09979     InitKeyWrapCounter(t, 6 * n);
09980 
09981     for (j = 5; j >= 0; j--) {
09982         for (i = n; i >= 1; i--) {
09983 
09984             /* calculate A */
09985             xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE);
09986             DecrementKeyWrapCounter(t);
09987 
09988             /* load R[i], starting at end of R */
09989             r = out + ((i - 1) * KEYWRAP_BLOCK_SIZE);
09990             XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE);
09991             wc_AesDecryptDirect(&aes, tmp, tmp);
09992 
09993             /* save R[i] */
09994             XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE);
09995         }
09996     }
09997 
09998     wc_AesFree(&aes);
09999 
10000     /* verify IV */
10001     if (XMEMCMP(tmp, expIv, KEYWRAP_BLOCK_SIZE) != 0)
10002         return BAD_KEYWRAP_IV_E;
10003 
10004     return inSz - KEYWRAP_BLOCK_SIZE;
10005 }
10006 
10007 #endif /* HAVE_AES_KEYWRAP */
10008 
10009 #ifdef WOLFSSL_AES_XTS
10010 
10011 /* Galios Field to use */
10012 #define GF_XTS 0x87
10013 
10014 /* This is to help with setting keys to correct encrypt or decrypt type.
10015  *
10016  * tweak AES key for tweak in XTS
10017  * aes   AES key for encrypt/decrypt process
10018  * key   buffer holding aes key | tweak key
10019  * len   length of key buffer in bytes. Should be twice that of key size. i.e.
10020  *       32 for a 16 byte key.
10021  * dir   direction, either AES_ENCRYPTION or AES_DECRYPTION
10022  * heap  heap hint to use for memory. Can be NULL
10023  * devId id to use with async crypto. Can be 0
10024  *
10025  * Note: is up to user to call wc_AesFree on tweak and aes key when done.
10026  *
10027  * return 0 on success
10028  */
10029 int wc_AesXtsSetKey(XtsAes* aes, const byte* key, word32 len, int dir,
10030         void* heap, int devId)
10031 {
10032     word32 keySz;
10033     int    ret = 0;
10034 
10035     if (aes == NULL || key == NULL) {
10036         return BAD_FUNC_ARG;
10037     }
10038 
10039     if ((ret = wc_AesInit(&aes->tweak, heap, devId)) != 0) {
10040         return ret;
10041     }
10042     if ((ret = wc_AesInit(&aes->aes, heap, devId)) != 0) {
10043         return ret;
10044     }
10045 
10046     keySz = len/2;
10047     if (keySz != 16 && keySz != 32) {
10048         WOLFSSL_MSG("Unsupported key size");
10049         return WC_KEY_SIZE_E;
10050     }
10051 
10052     if ((ret = wc_AesSetKey(&aes->aes, key, keySz, NULL, dir)) == 0) {
10053         ret = wc_AesSetKey(&aes->tweak, key + keySz, keySz, NULL,
10054                 AES_ENCRYPTION);
10055         if (ret != 0) {
10056             wc_AesFree(&aes->aes);
10057         }
10058     }
10059 
10060     return ret;
10061 }
10062 
10063 
10064 /* This is used to free up resources used by Aes structs
10065  *
10066  * aes AES keys to free
10067  *
10068  * return 0 on success
10069  */
10070 int wc_AesXtsFree(XtsAes* aes)
10071 {
10072     if (aes != NULL) {
10073         wc_AesFree(&aes->aes);
10074         wc_AesFree(&aes->tweak);
10075     }
10076 
10077     return 0;
10078 }
10079 
10080 
10081 /* Same process as wc_AesXtsEncrypt but uses a word64 type as the tweak value
10082  * instead of a byte array. This just converts the word64 to a byte array and
10083  * calls wc_AesXtsEncrypt.
10084  *
10085  * aes    AES keys to use for block encrypt/decrypt
10086  * out    output buffer to hold cipher text
10087  * in     input plain text buffer to encrypt
10088  * sz     size of both out and in buffers
10089  * sector value to use for tweak
10090  *
10091  * returns 0 on success
10092  */
10093 int wc_AesXtsEncryptSector(XtsAes* aes, byte* out, const byte* in,
10094         word32 sz, word64 sector)
10095 {
10096     byte* pt;
10097     byte  i[AES_BLOCK_SIZE];
10098 
10099     XMEMSET(i, 0, AES_BLOCK_SIZE);
10100 #ifdef BIG_ENDIAN_ORDER
10101     sector = ByteReverseWord64(sector);
10102 #endif
10103     pt = (byte*)&sector;
10104     XMEMCPY(i, pt, sizeof(word64));
10105 
10106     return wc_AesXtsEncrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE);
10107 }
10108 
10109 
10110 /* Same process as wc_AesXtsDecrypt but uses a word64 type as the tweak value
10111  * instead of a byte array. This just converts the word64 to a byte array.
10112  *
10113  * aes    AES keys to use for block encrypt/decrypt
10114  * out    output buffer to hold plain text
10115  * in     input cipher text buffer to encrypt
10116  * sz     size of both out and in buffers
10117  * sector value to use for tweak
10118  *
10119  * returns 0 on success
10120  */
10121 int wc_AesXtsDecryptSector(XtsAes* aes, byte* out, const byte* in, word32 sz,
10122         word64 sector)
10123 {
10124     byte* pt;
10125     byte  i[AES_BLOCK_SIZE];
10126 
10127     XMEMSET(i, 0, AES_BLOCK_SIZE);
10128 #ifdef BIG_ENDIAN_ORDER
10129     sector = ByteReverseWord64(sector);
10130 #endif
10131     pt = (byte*)&sector;
10132     XMEMCPY(i, pt, sizeof(word64));
10133 
10134     return wc_AesXtsDecrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE);
10135 }
10136 
10137 #ifdef HAVE_AES_ECB
10138 /* helper function for encrypting / decrypting full buffer at once */
10139 static int _AesXtsHelper(Aes* aes, byte* out, const byte* in, word32 sz, int dir)
10140 {
10141     word32 outSz   = sz;
10142     word32 totalSz = (sz / AES_BLOCK_SIZE) * AES_BLOCK_SIZE; /* total bytes */
10143     byte*  pt      = out;
10144 
10145     outSz -= AES_BLOCK_SIZE;
10146 
10147     while (outSz > 0) {
10148         word32 j;
10149         byte carry = 0;
10150 
10151         /* multiply by shift left and propogate carry */
10152         for (j = 0; j < AES_BLOCK_SIZE && outSz > 0; j++, outSz--) {
10153             byte tmpC;
10154 
10155             tmpC   = (pt[j] >> 7) & 0x01;
10156             pt[j+AES_BLOCK_SIZE] = ((pt[j] << 1) + carry) & 0xFF;
10157             carry  = tmpC;
10158         }
10159         if (carry) {
10160             pt[AES_BLOCK_SIZE] ^= GF_XTS;
10161         }
10162 
10163         pt += AES_BLOCK_SIZE;
10164     }
10165 
10166     xorbuf(out, in, totalSz);
10167     if (dir == AES_ENCRYPTION) {
10168         return wc_AesEcbEncrypt(aes, out, out, totalSz);
10169     }
10170     else {
10171         return wc_AesEcbDecrypt(aes, out, out, totalSz);
10172     }
10173 }
10174 #endif /* HAVE_AES_ECB */
10175 
10176 
10177 /* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing.
10178  *
10179  * xaes  AES keys to use for block encrypt/decrypt
10180  * out   output buffer to hold cipher text
10181  * in    input plain text buffer to encrypt
10182  * sz    size of both out and in buffers
10183  * i     value to use for tweak
10184  * iSz   size of i buffer, should always be AES_BLOCK_SIZE but having this input
10185  *       adds a sanity check on how the user calls the function.
10186  *
10187  * returns 0 on success
10188  */
10189 int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
10190         const byte* i, word32 iSz)
10191 {
10192     int ret = 0;
10193     word32 blocks = (sz / AES_BLOCK_SIZE);
10194     Aes *aes, *tweak;
10195 
10196     if (xaes == NULL || out == NULL || in == NULL) {
10197         return BAD_FUNC_ARG;
10198     }
10199 
10200     aes   = &xaes->aes;
10201     tweak = &xaes->tweak;
10202 
10203     if (iSz < AES_BLOCK_SIZE) {
10204         return BAD_FUNC_ARG;
10205     }
10206 
10207     if (blocks > 0) {
10208         byte tmp[AES_BLOCK_SIZE];
10209 
10210         XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES
10211                                           * key setup passed to encrypt direct*/
10212 
10213         wc_AesEncryptDirect(tweak, tmp, i);
10214 
10215     #ifdef HAVE_AES_ECB
10216         /* encrypt all of buffer at once when possible */
10217         if (in != out) { /* can not handle inline */
10218             XMEMCPY(out, tmp, AES_BLOCK_SIZE);
10219             if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) {
10220                 return ret;
10221             }
10222         }
10223     #endif
10224 
10225         while (blocks > 0) {
10226             word32 j;
10227             byte carry = 0;
10228             byte buf[AES_BLOCK_SIZE];
10229 
10230     #ifdef HAVE_AES_ECB
10231             if (in == out) { /* check for if inline */
10232     #endif
10233             XMEMCPY(buf, in, AES_BLOCK_SIZE);
10234             xorbuf(buf, tmp, AES_BLOCK_SIZE);
10235             wc_AesEncryptDirect(aes, out, buf);
10236     #ifdef HAVE_AES_ECB
10237             }
10238     #endif
10239             xorbuf(out, tmp, AES_BLOCK_SIZE);
10240 
10241             /* multiply by shift left and propogate carry */
10242             for (j = 0; j < AES_BLOCK_SIZE; j++) {
10243                 byte tmpC;
10244 
10245                 tmpC   = (tmp[j] >> 7) & 0x01;
10246                 tmp[j] = ((tmp[j] << 1) + carry) & 0xFF;
10247                 carry  = tmpC;
10248             }
10249             if (carry) {
10250                 tmp[0] ^= GF_XTS;
10251             }
10252 
10253             in  += AES_BLOCK_SIZE;
10254             out += AES_BLOCK_SIZE;
10255             sz  -= AES_BLOCK_SIZE;
10256             blocks--;
10257         }
10258 
10259         /* stealing operation of XTS to handle left overs */
10260         if (sz > 0) {
10261             byte buf[AES_BLOCK_SIZE];
10262 
10263             XMEMCPY(buf, out - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
10264             if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */
10265                 return BUFFER_E;
10266             }
10267             XMEMCPY(out, buf, sz);
10268             XMEMCPY(buf, in, sz);
10269 
10270             xorbuf(buf, tmp, AES_BLOCK_SIZE);
10271             wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf);
10272             xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE);
10273         }
10274     }
10275     else {
10276         WOLFSSL_MSG("Plain text input too small for encryption");
10277         return BAD_FUNC_ARG;
10278     }
10279 
10280     return ret;
10281 }
10282 
10283 
10284 /* Same process as encryption but Aes key is AES_DECRYPTION type.
10285  *
10286  * xaes  AES keys to use for block encrypt/decrypt
10287  * out   output buffer to hold plain text
10288  * in    input cipher text buffer to decrypt
10289  * sz    size of both out and in buffers
10290  * i     value to use for tweak
10291  * iSz   size of i buffer, should always be AES_BLOCK_SIZE but having this input
10292  *       adds a sanity check on how the user calls the function.
10293  *
10294  * returns 0 on success
10295  */
10296 int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
10297         const byte* i, word32 iSz)
10298 {
10299     int ret = 0;
10300     word32 blocks = (sz / AES_BLOCK_SIZE);
10301     Aes *aes, *tweak;
10302 
10303     if (xaes == NULL || out == NULL || in == NULL) {
10304         return BAD_FUNC_ARG;
10305     }
10306 
10307     aes   = &xaes->aes;
10308     tweak = &xaes->tweak;
10309 
10310     if (iSz < AES_BLOCK_SIZE) {
10311         return BAD_FUNC_ARG;
10312     }
10313 
10314     if (blocks > 0) {
10315         word32 j;
10316         byte carry = 0;
10317         byte tmp[AES_BLOCK_SIZE];
10318         byte stl = (sz % AES_BLOCK_SIZE);
10319 
10320         XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES
10321                                           * key setup passed to decrypt direct*/
10322 
10323         wc_AesEncryptDirect(tweak, tmp, i);
10324 
10325         /* if Stealing then break out of loop one block early to handle special
10326          * case */
10327         if (stl > 0) {
10328             blocks--;
10329         }
10330 
10331     #ifdef HAVE_AES_ECB
10332         /* decrypt all of buffer at once when possible */
10333         if (in != out) { /* can not handle inline */
10334             XMEMCPY(out, tmp, AES_BLOCK_SIZE);
10335             if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) {
10336                 return ret;
10337             }
10338         }
10339     #endif
10340 
10341         while (blocks > 0) {
10342             byte buf[AES_BLOCK_SIZE];
10343 
10344     #ifdef HAVE_AES_ECB
10345             if (in == out) { /* check for if inline */
10346     #endif
10347             XMEMCPY(buf, in, AES_BLOCK_SIZE);
10348             xorbuf(buf, tmp, AES_BLOCK_SIZE);
10349             wc_AesDecryptDirect(aes, out, buf);
10350     #ifdef HAVE_AES_ECB
10351             }
10352     #endif
10353             xorbuf(out, tmp, AES_BLOCK_SIZE);
10354 
10355             /* multiply by shift left and propogate carry */
10356             for (j = 0; j < AES_BLOCK_SIZE; j++) {
10357                 byte tmpC;
10358 
10359                 tmpC   = (tmp[j] >> 7) & 0x01;
10360                 tmp[j] = ((tmp[j] << 1) + carry) & 0xFF;
10361                 carry  = tmpC;
10362             }
10363             if (carry) {
10364                 tmp[0] ^= GF_XTS;
10365             }
10366             carry = 0;
10367 
10368             in  += AES_BLOCK_SIZE;
10369             out += AES_BLOCK_SIZE;
10370             sz  -= AES_BLOCK_SIZE;
10371             blocks--;
10372         }
10373 
10374         /* stealing operation of XTS to handle left overs */
10375         if (sz > 0) {
10376             byte buf[AES_BLOCK_SIZE];
10377             byte tmp2[AES_BLOCK_SIZE];
10378 
10379             /* multiply by shift left and propogate carry */
10380             for (j = 0; j < AES_BLOCK_SIZE; j++) {
10381                 byte tmpC;
10382 
10383                 tmpC   = (tmp[j] >> 7) & 0x01;
10384                 tmp2[j] = ((tmp[j] << 1) + carry) & 0xFF;
10385                 carry  = tmpC;
10386             }
10387             if (carry) {
10388                 tmp2[0] ^= GF_XTS;
10389             }
10390 
10391             XMEMCPY(buf, in, AES_BLOCK_SIZE);
10392             xorbuf(buf, tmp2, AES_BLOCK_SIZE);
10393             wc_AesDecryptDirect(aes, out, buf);
10394             xorbuf(out, tmp2, AES_BLOCK_SIZE);
10395 
10396             /* tmp2 holds partial | last */
10397             XMEMCPY(tmp2, out, AES_BLOCK_SIZE);
10398             in  += AES_BLOCK_SIZE;
10399             out += AES_BLOCK_SIZE;
10400             sz  -= AES_BLOCK_SIZE;
10401 
10402             /* Make buffer with end of cipher text | last */
10403             XMEMCPY(buf, tmp2, AES_BLOCK_SIZE);
10404             if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */
10405                 return BUFFER_E;
10406             }
10407             XMEMCPY(buf, in,   sz);
10408             XMEMCPY(out, tmp2, sz);
10409 
10410             xorbuf(buf, tmp, AES_BLOCK_SIZE);
10411             wc_AesDecryptDirect(aes, tmp2, buf);
10412             xorbuf(tmp2, tmp, AES_BLOCK_SIZE);
10413             XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE);
10414         }
10415     }
10416     else {
10417         WOLFSSL_MSG("Plain text input too small for encryption");
10418         return BAD_FUNC_ARG;
10419     }
10420 
10421     return ret;
10422 }
10423 
10424 #endif /* WOLFSSL_AES_XTS */
10425 
10426 #endif /* HAVE_FIPS */
10427 #endif /* !NO_AES */
10428