Example program to test AES-GCM functionality. Used for a workshop

Dependencies:   mbed

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers aesni.c Source File

aesni.c

00001 /*
00002  *  AES-NI support functions
00003  *
00004  *  Copyright (C) 2013, Brainspark B.V.
00005  *
00006  *  This file is part of PolarSSL (http://www.polarssl.org)
00007  *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
00008  *
00009  *  All rights reserved.
00010  *
00011  *  This program is free software; you can redistribute it and/or modify
00012  *  it under the terms of the GNU General Public License as published by
00013  *  the Free Software Foundation; either version 2 of the License, or
00014  *  (at your option) any later version.
00015  *
00016  *  This program is distributed in the hope that it will be useful,
00017  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  *  GNU General Public License for more details.
00020  *
00021  *  You should have received a copy of the GNU General Public License along
00022  *  with this program; if not, write to the Free Software Foundation, Inc.,
00023  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00024  */
00025 
00026 /*
00027  * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set
00028  * [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/
00029  */
00030 
00031 #if !defined(POLARSSL_CONFIG_FILE)
00032 #include "polarssl/config.h"
00033 #else
00034 #include POLARSSL_CONFIG_FILE
00035 #endif
00036 
00037 #if defined(POLARSSL_AESNI_C)
00038 
00039 #include "polarssl/aesni.h"
00040 #include <stdio.h>
00041 
00042 #if defined(POLARSSL_HAVE_X86_64)
00043 
00044 /*
00045  * AES-NI support detection routine
00046  */
00047 int aesni_supports( unsigned int what )
00048 {
00049     static int done = 0;
00050     static unsigned int c = 0;
00051 
00052     if( ! done )
00053     {
00054         asm( "movl  $1, %%eax   \n"
00055              "cpuid             \n"
00056              : "=c" (c)
00057              :
00058              : "eax", "ebx", "edx" );
00059         done = 1;
00060     }
00061 
00062     return( ( c & what ) != 0 );
00063 }
00064 
00065 /*
00066  * Binutils needs to be at least 2.19 to support AES-NI instructions.
00067  * Unfortunately, a lot of users have a lower version now (2014-04).
00068  * Emit bytecode directly in order to support "old" version of gas.
00069  *
00070  * Opcodes from the Intel architecture reference manual, vol. 3.
00071  * We always use registers, so we don't need prefixes for memory operands.
00072  * Operand macros are in gas order (src, dst) as opposed to Intel order
00073  * (dst, src) in order to blend better into the surrounding assembly code.
00074  */
00075 #define AESDEC      ".byte 0x66,0x0F,0x38,0xDE,"
00076 #define AESDECLAST  ".byte 0x66,0x0F,0x38,0xDF,"
00077 #define AESENC      ".byte 0x66,0x0F,0x38,0xDC,"
00078 #define AESENCLAST  ".byte 0x66,0x0F,0x38,0xDD,"
00079 #define AESIMC      ".byte 0x66,0x0F,0x38,0xDB,"
00080 #define AESKEYGENA  ".byte 0x66,0x0F,0x3A,0xDF,"
00081 #define PCLMULQDQ   ".byte 0x66,0x0F,0x3A,0x44,"
00082 
00083 #define xmm0_xmm0   "0xC0"
00084 #define xmm0_xmm1   "0xC8"
00085 #define xmm0_xmm2   "0xD0"
00086 #define xmm0_xmm3   "0xD8"
00087 #define xmm0_xmm4   "0xE0"
00088 #define xmm1_xmm0   "0xC1"
00089 #define xmm1_xmm2   "0xD1"
00090 
00091 /*
00092  * AES-NI AES-ECB block en(de)cryption
00093  */
00094 int aesni_crypt_ecb( aes_context *ctx,
00095                      int mode,
00096                      const unsigned char input[16],
00097                      unsigned char output[16] )
00098 {
00099     asm( "movdqu    (%3), %%xmm0    \n" // load input
00100          "movdqu    (%1), %%xmm1    \n" // load round key 0
00101          "pxor      %%xmm1, %%xmm0  \n" // round 0
00102          "addq      $16, %1         \n" // point to next round key
00103          "subl      $1, %0          \n" // normal rounds = nr - 1
00104          "test      %2, %2          \n" // mode?
00105          "jz        2f              \n" // 0 = decrypt
00106 
00107          "1:                        \n" // encryption loop
00108          "movdqu    (%1), %%xmm1    \n" // load round key
00109          AESENC     xmm1_xmm0      "\n" // do round
00110          "addq      $16, %1         \n" // point to next round key
00111          "subl      $1, %0          \n" // loop
00112          "jnz       1b              \n"
00113          "movdqu    (%1), %%xmm1    \n" // load round key
00114          AESENCLAST xmm1_xmm0      "\n" // last round
00115          "jmp       3f              \n"
00116 
00117          "2:                        \n" // decryption loop
00118          "movdqu    (%1), %%xmm1    \n"
00119          AESDEC     xmm1_xmm0      "\n" // do round
00120          "addq      $16, %1         \n"
00121          "subl      $1, %0          \n"
00122          "jnz       2b              \n"
00123          "movdqu    (%1), %%xmm1    \n" // load round key
00124          AESDECLAST xmm1_xmm0      "\n" // last round
00125 
00126          "3:                        \n"
00127          "movdqu    %%xmm0, (%4)    \n" // export output
00128          :
00129          : "r" (ctx->nr ), "r" (ctx->rk ), "r" (mode), "r" (input), "r" (output)
00130          : "memory", "cc", "xmm0", "xmm1" );
00131 
00132 
00133     return( 0 );
00134 }
00135 
00136 /*
00137  * GCM multiplication: c = a times b in GF(2^128)
00138  * Based on [CLMUL-WP] algorithms 1 (with equation 27) and 5.
00139  */
00140 void aesni_gcm_mult( unsigned char c[16],
00141                      const unsigned char a[16],
00142                      const unsigned char b[16] )
00143 {
00144     unsigned char aa[16], bb[16], cc[16];
00145     size_t i;
00146 
00147     /* The inputs are in big-endian order, so byte-reverse them */
00148     for( i = 0; i < 16; i++ )
00149     {
00150         aa[i] = a[15 - i];
00151         bb[i] = b[15 - i];
00152     }
00153 
00154     asm( "movdqu (%0), %%xmm0               \n" // a1:a0
00155          "movdqu (%1), %%xmm1               \n" // b1:b0
00156 
00157          /*
00158           * Caryless multiplication xmm2:xmm1 = xmm0 * xmm1
00159           * using [CLMUL-WP] algorithm 1 (p. 13).
00160           */
00161          "movdqa %%xmm1, %%xmm2             \n" // copy of b1:b0
00162          "movdqa %%xmm1, %%xmm3             \n" // same
00163          "movdqa %%xmm1, %%xmm4             \n" // same
00164          PCLMULQDQ xmm0_xmm1 ",0x00         \n" // a0*b0 = c1:c0
00165          PCLMULQDQ xmm0_xmm2 ",0x11         \n" // a1*b1 = d1:d0
00166          PCLMULQDQ xmm0_xmm3 ",0x10         \n" // a0*b1 = e1:e0
00167          PCLMULQDQ xmm0_xmm4 ",0x01         \n" // a1*b0 = f1:f0
00168          "pxor %%xmm3, %%xmm4               \n" // e1+f1:e0+f0
00169          "movdqa %%xmm4, %%xmm3             \n" // same
00170          "psrldq $8, %%xmm4                 \n" // 0:e1+f1
00171          "pslldq $8, %%xmm3                 \n" // e0+f0:0
00172          "pxor %%xmm4, %%xmm2               \n" // d1:d0+e1+f1
00173          "pxor %%xmm3, %%xmm1               \n" // c1+e0+f1:c0
00174 
00175          /*
00176           * Now shift the result one bit to the left,
00177           * taking advantage of [CLMUL-WP] eq 27 (p. 20)
00178           */
00179          "movdqa %%xmm1, %%xmm3             \n" // r1:r0
00180          "movdqa %%xmm2, %%xmm4             \n" // r3:r2
00181          "psllq $1, %%xmm1                  \n" // r1<<1:r0<<1
00182          "psllq $1, %%xmm2                  \n" // r3<<1:r2<<1
00183          "psrlq $63, %%xmm3                 \n" // r1>>63:r0>>63
00184          "psrlq $63, %%xmm4                 \n" // r3>>63:r2>>63
00185          "movdqa %%xmm3, %%xmm5             \n" // r1>>63:r0>>63
00186          "pslldq $8, %%xmm3                 \n" // r0>>63:0
00187          "pslldq $8, %%xmm4                 \n" // r2>>63:0
00188          "psrldq $8, %%xmm5                 \n" // 0:r1>>63
00189          "por %%xmm3, %%xmm1                \n" // r1<<1|r0>>63:r0<<1
00190          "por %%xmm4, %%xmm2                \n" // r3<<1|r2>>62:r2<<1
00191          "por %%xmm5, %%xmm2                \n" // r3<<1|r2>>62:r2<<1|r1>>63
00192 
00193          /*
00194           * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1
00195           * using [CLMUL-WP] algorithm 5 (p. 20).
00196           * Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted).
00197           */
00198          /* Step 2 (1) */
00199          "movdqa %%xmm1, %%xmm3             \n" // x1:x0
00200          "movdqa %%xmm1, %%xmm4             \n" // same
00201          "movdqa %%xmm1, %%xmm5             \n" // same
00202          "psllq $63, %%xmm3                 \n" // x1<<63:x0<<63 = stuff:a
00203          "psllq $62, %%xmm4                 \n" // x1<<62:x0<<62 = stuff:b
00204          "psllq $57, %%xmm5                 \n" // x1<<57:x0<<57 = stuff:c
00205 
00206          /* Step 2 (2) */
00207          "pxor %%xmm4, %%xmm3               \n" // stuff:a+b
00208          "pxor %%xmm5, %%xmm3               \n" // stuff:a+b+c
00209          "pslldq $8, %%xmm3                 \n" // a+b+c:0
00210          "pxor %%xmm3, %%xmm1               \n" // x1+a+b+c:x0 = d:x0
00211 
00212          /* Steps 3 and 4 */
00213          "movdqa %%xmm1,%%xmm0              \n" // d:x0
00214          "movdqa %%xmm1,%%xmm4              \n" // same
00215          "movdqa %%xmm1,%%xmm5              \n" // same
00216          "psrlq $1, %%xmm0                  \n" // e1:x0>>1 = e1:e0'
00217          "psrlq $2, %%xmm4                  \n" // f1:x0>>2 = f1:f0'
00218          "psrlq $7, %%xmm5                  \n" // g1:x0>>7 = g1:g0'
00219          "pxor %%xmm4, %%xmm0               \n" // e1+f1:e0'+f0'
00220          "pxor %%xmm5, %%xmm0               \n" // e1+f1+g1:e0'+f0'+g0'
00221          // e0'+f0'+g0' is almost e0+f0+g0, except for some missing
00222          // bits carried from d. Now get those bits back in.
00223          "movdqa %%xmm1,%%xmm3              \n" // d:x0
00224          "movdqa %%xmm1,%%xmm4              \n" // same
00225          "movdqa %%xmm1,%%xmm5              \n" // same
00226          "psllq $63, %%xmm3                 \n" // d<<63:stuff
00227          "psllq $62, %%xmm4                 \n" // d<<62:stuff
00228          "psllq $57, %%xmm5                 \n" // d<<57:stuff
00229          "pxor %%xmm4, %%xmm3               \n" // d<<63+d<<62:stuff
00230          "pxor %%xmm5, %%xmm3               \n" // missing bits of d:stuff
00231          "psrldq $8, %%xmm3                 \n" // 0:missing bits of d
00232          "pxor %%xmm3, %%xmm0               \n" // e1+f1+g1:e0+f0+g0
00233          "pxor %%xmm1, %%xmm0               \n" // h1:h0
00234          "pxor %%xmm2, %%xmm0               \n" // x3+h1:x2+h0
00235 
00236          "movdqu %%xmm0, (%2)               \n" // done
00237          :
00238          : "r" (aa), "r" (bb), "r" (cc)
00239          : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" );
00240 
00241     /* Now byte-reverse the outputs */
00242     for( i = 0; i < 16; i++ )
00243         c[i] = cc[15 - i];
00244 
00245     return;
00246 }
00247 
00248 /*
00249  * Compute decryption round keys from encryption round keys
00250  */
00251 void aesni_inverse_key( unsigned char *invkey,
00252                         const unsigned char *fwdkey, int nr )
00253 {
00254     unsigned char *ik = invkey;
00255     const unsigned char *fk = fwdkey + 16 * nr;
00256 
00257     memcpy( ik, fk, 16 );
00258 
00259     for( fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16 )
00260         asm( "movdqu (%0), %%xmm0       \n"
00261              AESIMC  xmm0_xmm0         "\n"
00262              "movdqu %%xmm0, (%1)       \n"
00263              :
00264              : "r" (fk), "r" (ik)
00265              : "memory", "xmm0" );
00266 
00267     memcpy( ik, fk, 16 );
00268 }
00269 
00270 /*
00271  * Key expansion, 128-bit case
00272  */
00273 static void aesni_setkey_enc_128( unsigned char *rk,
00274                                   const unsigned char *key )
00275 {
00276     asm( "movdqu (%1), %%xmm0               \n" // copy the original key
00277          "movdqu %%xmm0, (%0)               \n" // as round key 0
00278          "jmp 2f                            \n" // skip auxiliary routine
00279 
00280          /*
00281           * Finish generating the next round key.
00282           *
00283           * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff
00284           * with X = rot( sub( r3 ) ) ^ RCON.
00285           *
00286           * On exit, xmm0 is r7:r6:r5:r4
00287           * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
00288           * and those are written to the round key buffer.
00289           */
00290          "1:                                \n"
00291          "pshufd $0xff, %%xmm1, %%xmm1      \n" // X:X:X:X
00292          "pxor %%xmm0, %%xmm1               \n" // X+r3:X+r2:X+r1:r4
00293          "pslldq $4, %%xmm0                 \n" // r2:r1:r0:0
00294          "pxor %%xmm0, %%xmm1               \n" // X+r3+r2:X+r2+r1:r5:r4
00295          "pslldq $4, %%xmm0                 \n" // etc
00296          "pxor %%xmm0, %%xmm1               \n"
00297          "pslldq $4, %%xmm0                 \n"
00298          "pxor %%xmm1, %%xmm0               \n" // update xmm0 for next time!
00299          "add $16, %0                       \n" // point to next round key
00300          "movdqu %%xmm0, (%0)               \n" // write it
00301          "ret                               \n"
00302 
00303          /* Main "loop" */
00304          "2:                                \n"
00305          AESKEYGENA xmm0_xmm1 ",0x01        \ncall 1b   \n"
00306          AESKEYGENA xmm0_xmm1 ",0x02        \ncall 1b   \n"
00307          AESKEYGENA xmm0_xmm1 ",0x04        \ncall 1b   \n"
00308          AESKEYGENA xmm0_xmm1 ",0x08        \ncall 1b   \n"
00309          AESKEYGENA xmm0_xmm1 ",0x10        \ncall 1b   \n"
00310          AESKEYGENA xmm0_xmm1 ",0x20        \ncall 1b   \n"
00311          AESKEYGENA xmm0_xmm1 ",0x40        \ncall 1b   \n"
00312          AESKEYGENA xmm0_xmm1 ",0x80        \ncall 1b   \n"
00313          AESKEYGENA xmm0_xmm1 ",0x1B        \ncall 1b   \n"
00314          AESKEYGENA xmm0_xmm1 ",0x36        \ncall 1b   \n"
00315          :
00316          : "r" (rk), "r" (key)
00317          : "memory", "cc", "0" );
00318 }
00319 
00320 /*
00321  * Key expansion, 192-bit case
00322  */
00323 static void aesni_setkey_enc_192( unsigned char *rk,
00324                                   const unsigned char *key )
00325 {
00326     asm( "movdqu (%1), %%xmm0   \n" // copy original round key
00327          "movdqu %%xmm0, (%0)   \n"
00328          "add $16, %0           \n"
00329          "movq 16(%1), %%xmm1   \n"
00330          "movq %%xmm1, (%0)     \n"
00331          "add $8, %0            \n"
00332          "jmp 2f                \n" // skip auxiliary routine
00333 
00334          /*
00335           * Finish generating the next 6 quarter-keys.
00336           *
00337           * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4
00338           * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON.
00339           *
00340           * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
00341           * and those are written to the round key buffer.
00342           */
00343          "1:                            \n"
00344          "pshufd $0x55, %%xmm2, %%xmm2  \n" // X:X:X:X
00345          "pxor %%xmm0, %%xmm2           \n" // X+r3:X+r2:X+r1:r4
00346          "pslldq $4, %%xmm0             \n" // etc
00347          "pxor %%xmm0, %%xmm2           \n"
00348          "pslldq $4, %%xmm0             \n"
00349          "pxor %%xmm0, %%xmm2           \n"
00350          "pslldq $4, %%xmm0             \n"
00351          "pxor %%xmm2, %%xmm0           \n" // update xmm0 = r9:r8:r7:r6
00352          "movdqu %%xmm0, (%0)           \n"
00353          "add $16, %0                   \n"
00354          "pshufd $0xff, %%xmm0, %%xmm2  \n" // r9:r9:r9:r9
00355          "pxor %%xmm1, %%xmm2           \n" // stuff:stuff:r9+r5:r10
00356          "pslldq $4, %%xmm1             \n" // r2:r1:r0:0
00357          "pxor %%xmm2, %%xmm1           \n" // update xmm1 = stuff:stuff:r11:r10
00358          "movq %%xmm1, (%0)             \n"
00359          "add $8, %0                    \n"
00360          "ret                           \n"
00361 
00362          "2:                            \n"
00363          AESKEYGENA xmm1_xmm2 ",0x01    \ncall 1b   \n"
00364          AESKEYGENA xmm1_xmm2 ",0x02    \ncall 1b   \n"
00365          AESKEYGENA xmm1_xmm2 ",0x04    \ncall 1b   \n"
00366          AESKEYGENA xmm1_xmm2 ",0x08    \ncall 1b   \n"
00367          AESKEYGENA xmm1_xmm2 ",0x10    \ncall 1b   \n"
00368          AESKEYGENA xmm1_xmm2 ",0x20    \ncall 1b   \n"
00369          AESKEYGENA xmm1_xmm2 ",0x40    \ncall 1b   \n"
00370          AESKEYGENA xmm1_xmm2 ",0x80    \ncall 1b   \n"
00371 
00372          :
00373          : "r" (rk), "r" (key)
00374          : "memory", "cc", "0" );
00375 }
00376 
00377 /*
00378  * Key expansion, 256-bit case
00379  */
00380 static void aesni_setkey_enc_256( unsigned char *rk,
00381                                   const unsigned char *key )
00382 {
00383     asm( "movdqu (%1), %%xmm0           \n"
00384          "movdqu %%xmm0, (%0)           \n"
00385          "add $16, %0                   \n"
00386          "movdqu 16(%1), %%xmm1         \n"
00387          "movdqu %%xmm1, (%0)           \n"
00388          "jmp 2f                        \n" // skip auxiliary routine
00389 
00390          /*
00391           * Finish generating the next two round keys.
00392           *
00393           * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and
00394           * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
00395           *
00396           * On exit, xmm0 is r11:r10:r9:r8 and xmm1 is r15:r14:r13:r12
00397           * and those have been written to the output buffer.
00398           */
00399          "1:                                \n"
00400          "pshufd $0xff, %%xmm2, %%xmm2      \n"
00401          "pxor %%xmm0, %%xmm2               \n"
00402          "pslldq $4, %%xmm0                 \n"
00403          "pxor %%xmm0, %%xmm2               \n"
00404          "pslldq $4, %%xmm0                 \n"
00405          "pxor %%xmm0, %%xmm2               \n"
00406          "pslldq $4, %%xmm0                 \n"
00407          "pxor %%xmm2, %%xmm0               \n"
00408          "add $16, %0                       \n"
00409          "movdqu %%xmm0, (%0)               \n"
00410 
00411          /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
00412           * and proceed to generate next round key from there */
00413          AESKEYGENA xmm0_xmm2 ",0x00        \n"
00414          "pshufd $0xaa, %%xmm2, %%xmm2      \n"
00415          "pxor %%xmm1, %%xmm2               \n"
00416          "pslldq $4, %%xmm1                 \n"
00417          "pxor %%xmm1, %%xmm2               \n"
00418          "pslldq $4, %%xmm1                 \n"
00419          "pxor %%xmm1, %%xmm2               \n"
00420          "pslldq $4, %%xmm1                 \n"
00421          "pxor %%xmm2, %%xmm1               \n"
00422          "add $16, %0                       \n"
00423          "movdqu %%xmm1, (%0)               \n"
00424          "ret                               \n"
00425 
00426          /*
00427           * Main "loop" - Generating one more key than necessary,
00428           * see definition of aes_context.buf
00429           */
00430          "2:                                \n"
00431          AESKEYGENA xmm1_xmm2 ",0x01        \ncall 1b   \n"
00432          AESKEYGENA xmm1_xmm2 ",0x02        \ncall 1b   \n"
00433          AESKEYGENA xmm1_xmm2 ",0x04        \ncall 1b   \n"
00434          AESKEYGENA xmm1_xmm2 ",0x08        \ncall 1b   \n"
00435          AESKEYGENA xmm1_xmm2 ",0x10        \ncall 1b   \n"
00436          AESKEYGENA xmm1_xmm2 ",0x20        \ncall 1b   \n"
00437          AESKEYGENA xmm1_xmm2 ",0x40        \ncall 1b   \n"
00438          :
00439          : "r" (rk), "r" (key)
00440          : "memory", "cc", "0" );
00441 }
00442 
00443 /*
00444  * Key expansion, wrapper
00445  */
00446 int aesni_setkey_enc( unsigned char *rk,
00447                       const unsigned char *key,
00448                       size_t bits )
00449 {
00450     switch( bits )
00451     {
00452         case 128: aesni_setkey_enc_128( rk, key ); break;
00453         case 192: aesni_setkey_enc_192( rk, key ); break;
00454         case 256: aesni_setkey_enc_256( rk, key ); break;
00455         default : return( POLARSSL_ERR_AES_INVALID_KEY_LENGTH );
00456     }
00457 
00458     return( 0 );
00459 }
00460 
00461 #endif /* POLARSSL_HAVE_X86_64 */
00462 
00463 #endif /* POLARSSL_AESNI_C */
00464 
00465