wolfSSL SSL/TLS library, support up to TLS1.3

Dependents:   CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more

Committer:
wolfSSL
Date:
Tue Aug 22 10:48:22 2017 +0000
Revision:
13:f67a6c6013ca
wolfSSL3.12.0 with TLS1.3

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wolfSSL 13:f67a6c6013ca 1 /* chacha.c
wolfSSL 13:f67a6c6013ca 2 *
wolfSSL 13:f67a6c6013ca 3 * Copyright (C) 2006-2016 wolfSSL Inc.
wolfSSL 13:f67a6c6013ca 4 *
wolfSSL 13:f67a6c6013ca 5 * This file is part of wolfSSL.
wolfSSL 13:f67a6c6013ca 6 *
wolfSSL 13:f67a6c6013ca 7 * wolfSSL is free software; you can redistribute it and/or modify
wolfSSL 13:f67a6c6013ca 8 * it under the terms of the GNU General Public License as published by
wolfSSL 13:f67a6c6013ca 9 * the Free Software Foundation; either version 2 of the License, or
wolfSSL 13:f67a6c6013ca 10 * (at your option) any later version.
wolfSSL 13:f67a6c6013ca 11 *
wolfSSL 13:f67a6c6013ca 12 * wolfSSL is distributed in the hope that it will be useful,
wolfSSL 13:f67a6c6013ca 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
wolfSSL 13:f67a6c6013ca 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
wolfSSL 13:f67a6c6013ca 15 * GNU General Public License for more details.
wolfSSL 13:f67a6c6013ca 16 *
wolfSSL 13:f67a6c6013ca 17 * You should have received a copy of the GNU General Public License
wolfSSL 13:f67a6c6013ca 18 * along with this program; if not, write to the Free Software
wolfSSL 13:f67a6c6013ca 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
wolfSSL 13:f67a6c6013ca 20 *
wolfSSL 13:f67a6c6013ca 21 * based from
wolfSSL 13:f67a6c6013ca 22 * chacha-ref.c version 20080118
wolfSSL 13:f67a6c6013ca 23 * D. J. Bernstein
wolfSSL 13:f67a6c6013ca 24 * Public domain.
wolfSSL 13:f67a6c6013ca 25 */
wolfSSL 13:f67a6c6013ca 26
wolfSSL 13:f67a6c6013ca 27
wolfSSL 13:f67a6c6013ca 28
wolfSSL 13:f67a6c6013ca 29 #ifdef HAVE_CONFIG_H
wolfSSL 13:f67a6c6013ca 30 #include <config.h>
wolfSSL 13:f67a6c6013ca 31 #endif
wolfSSL 13:f67a6c6013ca 32
wolfSSL 13:f67a6c6013ca 33 #include <wolfssl/wolfcrypt/settings.h>
wolfSSL 13:f67a6c6013ca 34
wolfSSL 13:f67a6c6013ca 35 #ifdef HAVE_CHACHA
wolfSSL 13:f67a6c6013ca 36
wolfSSL 13:f67a6c6013ca 37 #include <wolfssl/wolfcrypt/chacha.h>
wolfSSL 13:f67a6c6013ca 38 #include <wolfssl/wolfcrypt/error-crypt.h>
wolfSSL 13:f67a6c6013ca 39 #include <wolfssl/wolfcrypt/logging.h>
wolfSSL 13:f67a6c6013ca 40 #include <wolfssl/wolfcrypt/cpuid.h>
wolfSSL 13:f67a6c6013ca 41 #ifdef NO_INLINE
wolfSSL 13:f67a6c6013ca 42 #include <wolfssl/wolfcrypt/misc.h>
wolfSSL 13:f67a6c6013ca 43 #else
wolfSSL 13:f67a6c6013ca 44 #define WOLFSSL_MISC_INCLUDED
wolfSSL 13:f67a6c6013ca 45 #include <wolfcrypt/src/misc.c>
wolfSSL 13:f67a6c6013ca 46 #endif
wolfSSL 13:f67a6c6013ca 47
wolfSSL 13:f67a6c6013ca 48 #ifdef CHACHA_AEAD_TEST
wolfSSL 13:f67a6c6013ca 49 #include <stdio.h>
wolfSSL 13:f67a6c6013ca 50 #endif
wolfSSL 13:f67a6c6013ca 51
wolfSSL 13:f67a6c6013ca 52 #ifdef WOLFSSL_X86_64_BUILD
wolfSSL 13:f67a6c6013ca 53 #if defined(USE_INTEL_SPEEDUP) && !defined(NO_CHACHA_ASM)
wolfSSL 13:f67a6c6013ca 54 #define USE_INTEL_CHACHA_SPEEDUP
wolfSSL 13:f67a6c6013ca 55 #endif
wolfSSL 13:f67a6c6013ca 56 #endif
wolfSSL 13:f67a6c6013ca 57
wolfSSL 13:f67a6c6013ca 58 #ifdef USE_INTEL_CHACHA_SPEEDUP
wolfSSL 13:f67a6c6013ca 59 #include <emmintrin.h>
wolfSSL 13:f67a6c6013ca 60 #include <immintrin.h>
wolfSSL 13:f67a6c6013ca 61 #define HAVE_INTEL_AVX1
wolfSSL 13:f67a6c6013ca 62 #define HAVE_INTEL_AVX2
wolfSSL 13:f67a6c6013ca 63 #endif
wolfSSL 13:f67a6c6013ca 64
wolfSSL 13:f67a6c6013ca 65 #ifdef BIG_ENDIAN_ORDER
wolfSSL 13:f67a6c6013ca 66 #define LITTLE32(x) ByteReverseWord32(x)
wolfSSL 13:f67a6c6013ca 67 #else
wolfSSL 13:f67a6c6013ca 68 #define LITTLE32(x) (x)
wolfSSL 13:f67a6c6013ca 69 #endif
wolfSSL 13:f67a6c6013ca 70
wolfSSL 13:f67a6c6013ca 71 /* Number of rounds */
wolfSSL 13:f67a6c6013ca 72 #define ROUNDS 20
wolfSSL 13:f67a6c6013ca 73
wolfSSL 13:f67a6c6013ca 74 #define U32C(v) (v##U)
wolfSSL 13:f67a6c6013ca 75 #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
wolfSSL 13:f67a6c6013ca 76 #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
wolfSSL 13:f67a6c6013ca 77
wolfSSL 13:f67a6c6013ca 78 #define ROTATE(v,c) rotlFixed(v, c)
wolfSSL 13:f67a6c6013ca 79 #define XOR(v,w) ((v) ^ (w))
wolfSSL 13:f67a6c6013ca 80 #define PLUS(v,w) (U32V((v) + (w)))
wolfSSL 13:f67a6c6013ca 81 #define PLUSONE(v) (PLUS((v),1))
wolfSSL 13:f67a6c6013ca 82
wolfSSL 13:f67a6c6013ca 83 #define QUARTERROUND(a,b,c,d) \
wolfSSL 13:f67a6c6013ca 84 x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
wolfSSL 13:f67a6c6013ca 85 x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
wolfSSL 13:f67a6c6013ca 86 x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
wolfSSL 13:f67a6c6013ca 87 x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
wolfSSL 13:f67a6c6013ca 88
wolfSSL 13:f67a6c6013ca 89
wolfSSL 13:f67a6c6013ca 90
wolfSSL 13:f67a6c6013ca 91 #define QUARTERROUND_INTEL_ASM(a0,b0,c0,d0, \
wolfSSL 13:f67a6c6013ca 92 a1,b1,c1,d1, \
wolfSSL 13:f67a6c6013ca 93 a2,b2,c2,d2, \
wolfSSL 13:f67a6c6013ca 94 a3,b3,c3,d3, \
wolfSSL 13:f67a6c6013ca 95 t1,o1) \
wolfSSL 13:f67a6c6013ca 96 "vpaddd "#b0", "#a0", "#a0"\n\t" \
wolfSSL 13:f67a6c6013ca 97 "vpxor "#a0", "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 98 "vmovdqa "#o1"(%[x]), "#c3"\n\t" \
wolfSSL 13:f67a6c6013ca 99 "vpshufb %[rotl16], "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 100 "vpaddd "#d0", "#c0", "#c0"\n\t" \
wolfSSL 13:f67a6c6013ca 101 "vpxor "#c0", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 102 "vpaddd "#b1", "#a1", "#a1"\n\t" \
wolfSSL 13:f67a6c6013ca 103 "vpxor "#a1", "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 104 "vpshufb %[rotl16], "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 105 "vpaddd "#d1", "#c1", "#c1"\n\t" \
wolfSSL 13:f67a6c6013ca 106 "vpxor "#c1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 107 "vpaddd "#b2", "#a2", "#a2"\n\t" \
wolfSSL 13:f67a6c6013ca 108 "vpxor "#a2", "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 109 "vpshufb %[rotl16], "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 110 "vpaddd "#d2", "#c2", "#c2"\n\t" \
wolfSSL 13:f67a6c6013ca 111 "vpxor "#c2", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 112 "vpaddd "#b3", "#a3", "#a3"\n\t" \
wolfSSL 13:f67a6c6013ca 113 "vpxor "#a3", "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 114 "vpshufb %[rotl16], "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 115 "vpaddd "#d3", "#c3", "#c3"\n\t" \
wolfSSL 13:f67a6c6013ca 116 "vpxor "#c3", "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 117 "vmovdqa "#c3", "#o1"(%[x])\n\t" \
wolfSSL 13:f67a6c6013ca 118 "vpsrld $20, "#b0", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 119 "vpslld $12, "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 120 "vpxor "#t1", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 121 "vpsrld $20, "#b1", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 122 "vpslld $12, "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 123 "vpxor "#t1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 124 "vpsrld $20, "#b2", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 125 "vpslld $12, "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 126 "vpxor "#t1", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 127 "vpsrld $20, "#b3", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 128 "vpslld $12, "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 129 "vpxor "#t1", "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 130 "vpaddd "#b0", "#a0", "#a0"\n\t" \
wolfSSL 13:f67a6c6013ca 131 "vpxor "#a0", "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 132 "vmovdqa "#o1"(%[x]), "#c3"\n\t" \
wolfSSL 13:f67a6c6013ca 133 "vpshufb %[rotl8], "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 134 "vpaddd "#d0", "#c0", "#c0"\n\t" \
wolfSSL 13:f67a6c6013ca 135 "vpxor "#c0", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 136 "vpaddd "#b1", "#a1", "#a1"\n\t" \
wolfSSL 13:f67a6c6013ca 137 "vpxor "#a1", "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 138 "vpshufb %[rotl8], "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 139 "vpaddd "#d1", "#c1", "#c1"\n\t" \
wolfSSL 13:f67a6c6013ca 140 "vpxor "#c1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 141 "vpaddd "#b2", "#a2", "#a2"\n\t" \
wolfSSL 13:f67a6c6013ca 142 "vpxor "#a2", "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 143 "vpshufb %[rotl8], "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 144 "vpaddd "#d2", "#c2", "#c2"\n\t" \
wolfSSL 13:f67a6c6013ca 145 "vpxor "#c2", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 146 "vpaddd "#b3", "#a3", "#a3"\n\t" \
wolfSSL 13:f67a6c6013ca 147 "vpxor "#a3", "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 148 "vpshufb %[rotl8], "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 149 "vpaddd "#d3", "#c3", "#c3"\n\t" \
wolfSSL 13:f67a6c6013ca 150 "vpxor "#c3", "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 151 "vmovdqa "#c3", "#o1"(%[x])\n\t" \
wolfSSL 13:f67a6c6013ca 152 "vpsrld $25, "#b0", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 153 "vpslld $7, "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 154 "vpxor "#t1", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 155 "vpsrld $25, "#b1", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 156 "vpslld $7, "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 157 "vpxor "#t1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 158 "vpsrld $25, "#b2", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 159 "vpslld $7, "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 160 "vpxor "#t1", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 161 "vpsrld $25, "#b3", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 162 "vpslld $7, "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 163 "vpxor "#t1", "#b3", "#b3"\n\t"
wolfSSL 13:f67a6c6013ca 164
wolfSSL 13:f67a6c6013ca 165 #define QUARTERROUND_INTEL_ASM_2(a0,b0,c0,d0, \
wolfSSL 13:f67a6c6013ca 166 a1,b1,c1,d1, \
wolfSSL 13:f67a6c6013ca 167 a2,b2,c2,d2, \
wolfSSL 13:f67a6c6013ca 168 a3,b3,c3,d3, \
wolfSSL 13:f67a6c6013ca 169 t1,o1) \
wolfSSL 13:f67a6c6013ca 170 "vpaddd "#b0", "#a0", "#a0"\n\t" \
wolfSSL 13:f67a6c6013ca 171 "vpxor "#a0", "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 172 "vmovdqa "#o1"(%[x]), "#c1"\n\t" \
wolfSSL 13:f67a6c6013ca 173 "vpshufb %[rotl16], "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 174 "vpaddd "#d0", "#c0", "#c0"\n\t" \
wolfSSL 13:f67a6c6013ca 175 "vpxor "#c0", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 176 "vpaddd "#b1", "#a1", "#a1"\n\t" \
wolfSSL 13:f67a6c6013ca 177 "vpxor "#a1", "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 178 "vpshufb %[rotl16], "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 179 "vpaddd "#d1", "#c1", "#c1"\n\t" \
wolfSSL 13:f67a6c6013ca 180 "vpxor "#c1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 181 "vpaddd "#b2", "#a2", "#a2"\n\t" \
wolfSSL 13:f67a6c6013ca 182 "vpxor "#a2", "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 183 "vpshufb %[rotl16], "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 184 "vpaddd "#d2", "#c2", "#c2"\n\t" \
wolfSSL 13:f67a6c6013ca 185 "vpxor "#c2", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 186 "vpaddd "#b3", "#a3", "#a3"\n\t" \
wolfSSL 13:f67a6c6013ca 187 "vpxor "#a3", "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 188 "vpshufb %[rotl16], "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 189 "vpaddd "#d3", "#c3", "#c3"\n\t" \
wolfSSL 13:f67a6c6013ca 190 "vpxor "#c3", "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 191 "vmovdqa "#c1", "#o1"(%[x])\n\t" \
wolfSSL 13:f67a6c6013ca 192 "vpsrld $20, "#b0", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 193 "vpslld $12, "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 194 "vpxor "#t1", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 195 "vpsrld $20, "#b1", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 196 "vpslld $12, "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 197 "vpxor "#t1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 198 "vpsrld $20, "#b2", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 199 "vpslld $12, "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 200 "vpxor "#t1", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 201 "vpsrld $20, "#b3", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 202 "vpslld $12, "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 203 "vpxor "#t1", "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 204 "vpaddd "#b0", "#a0", "#a0"\n\t" \
wolfSSL 13:f67a6c6013ca 205 "vpxor "#a0", "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 206 "vmovdqa "#o1"(%[x]), "#c1"\n\t" \
wolfSSL 13:f67a6c6013ca 207 "vpshufb %[rotl8], "#d0", "#d0"\n\t" \
wolfSSL 13:f67a6c6013ca 208 "vpaddd "#d0", "#c0", "#c0"\n\t" \
wolfSSL 13:f67a6c6013ca 209 "vpxor "#c0", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 210 "vpaddd "#b1", "#a1", "#a1"\n\t" \
wolfSSL 13:f67a6c6013ca 211 "vpxor "#a1", "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 212 "vpshufb %[rotl8], "#d1", "#d1"\n\t" \
wolfSSL 13:f67a6c6013ca 213 "vpaddd "#d1", "#c1", "#c1"\n\t" \
wolfSSL 13:f67a6c6013ca 214 "vpxor "#c1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 215 "vpaddd "#b2", "#a2", "#a2"\n\t" \
wolfSSL 13:f67a6c6013ca 216 "vpxor "#a2", "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 217 "vpshufb %[rotl8], "#d2", "#d2"\n\t" \
wolfSSL 13:f67a6c6013ca 218 "vpaddd "#d2", "#c2", "#c2"\n\t" \
wolfSSL 13:f67a6c6013ca 219 "vpxor "#c2", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 220 "vpaddd "#b3", "#a3", "#a3"\n\t" \
wolfSSL 13:f67a6c6013ca 221 "vpxor "#a3", "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 222 "vpshufb %[rotl8], "#d3", "#d3"\n\t" \
wolfSSL 13:f67a6c6013ca 223 "vpaddd "#d3", "#c3", "#c3"\n\t" \
wolfSSL 13:f67a6c6013ca 224 "vpxor "#c3", "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 225 "vmovdqa "#c1", "#o1"(%[x])\n\t" \
wolfSSL 13:f67a6c6013ca 226 "vpsrld $25, "#b0", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 227 "vpslld $7, "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 228 "vpxor "#t1", "#b0", "#b0"\n\t" \
wolfSSL 13:f67a6c6013ca 229 "vpsrld $25, "#b1", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 230 "vpslld $7, "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 231 "vpxor "#t1", "#b1", "#b1"\n\t" \
wolfSSL 13:f67a6c6013ca 232 "vpsrld $25, "#b2", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 233 "vpslld $7, "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 234 "vpxor "#t1", "#b2", "#b2"\n\t" \
wolfSSL 13:f67a6c6013ca 235 "vpsrld $25, "#b3", "#t1"\n\t" \
wolfSSL 13:f67a6c6013ca 236 "vpslld $7, "#b3", "#b3"\n\t" \
wolfSSL 13:f67a6c6013ca 237 "vpxor "#t1", "#b3", "#b3"\n\t"
wolfSSL 13:f67a6c6013ca 238
wolfSSL 13:f67a6c6013ca 239
wolfSSL 13:f67a6c6013ca 240 #define QUARTERROUND_XMM() \
wolfSSL 13:f67a6c6013ca 241 QUARTERROUND_INTEL_ASM(%%xmm0,%%xmm4,%%xmm8,%%xmm12, \
wolfSSL 13:f67a6c6013ca 242 %%xmm1,%%xmm5,%%xmm9,%%xmm13, \
wolfSSL 13:f67a6c6013ca 243 %%xmm2,%%xmm6,%%xmm10,%%xmm14, \
wolfSSL 13:f67a6c6013ca 244 %%xmm3,%%xmm7,%%xmm11,%%xmm15, \
wolfSSL 13:f67a6c6013ca 245 %%xmm11,48)
wolfSSL 13:f67a6c6013ca 246 #define QUARTERROUND_XMM_2() \
wolfSSL 13:f67a6c6013ca 247 QUARTERROUND_INTEL_ASM_2(%%xmm0,%%xmm5,%%xmm10,%%xmm15, \
wolfSSL 13:f67a6c6013ca 248 %%xmm1,%%xmm6,%%xmm11,%%xmm12, \
wolfSSL 13:f67a6c6013ca 249 %%xmm2,%%xmm7,%%xmm8,%%xmm13, \
wolfSSL 13:f67a6c6013ca 250 %%xmm3,%%xmm4,%%xmm9,%%xmm14, \
wolfSSL 13:f67a6c6013ca 251 %%xmm11,48)
wolfSSL 13:f67a6c6013ca 252
wolfSSL 13:f67a6c6013ca 253 #define QUARTERROUND_YMM() \
wolfSSL 13:f67a6c6013ca 254 QUARTERROUND_INTEL_ASM(%%ymm0,%%ymm4,%%ymm8,%%ymm12, \
wolfSSL 13:f67a6c6013ca 255 %%ymm1,%%ymm5,%%ymm9,%%ymm13, \
wolfSSL 13:f67a6c6013ca 256 %%ymm2,%%ymm6,%%ymm10,%%ymm14, \
wolfSSL 13:f67a6c6013ca 257 %%ymm3,%%ymm7,%%ymm11,%%ymm15, \
wolfSSL 13:f67a6c6013ca 258 %%ymm11,96)
wolfSSL 13:f67a6c6013ca 259 #define QUARTERROUND_YMM_2() \
wolfSSL 13:f67a6c6013ca 260 QUARTERROUND_INTEL_ASM_2(%%ymm0,%%ymm5,%%ymm10,%%ymm15, \
wolfSSL 13:f67a6c6013ca 261 %%ymm1,%%ymm6,%%ymm11,%%ymm12, \
wolfSSL 13:f67a6c6013ca 262 %%ymm2,%%ymm7,%%ymm8,%%ymm13, \
wolfSSL 13:f67a6c6013ca 263 %%ymm3,%%ymm4,%%ymm9,%%ymm14, \
wolfSSL 13:f67a6c6013ca 264 %%ymm11,96)
wolfSSL 13:f67a6c6013ca 265
wolfSSL 13:f67a6c6013ca 266 /**
wolfSSL 13:f67a6c6013ca 267 * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
wolfSSL 13:f67a6c6013ca 268 * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
wolfSSL 13:f67a6c6013ca 269 */
wolfSSL 13:f67a6c6013ca 270 int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
wolfSSL 13:f67a6c6013ca 271 {
wolfSSL 13:f67a6c6013ca 272 word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
wolfSSL 13:f67a6c6013ca 273
wolfSSL 13:f67a6c6013ca 274 #ifdef CHACHA_AEAD_TEST
wolfSSL 13:f67a6c6013ca 275 word32 i;
wolfSSL 13:f67a6c6013ca 276 printf("NONCE : ");
wolfSSL 13:f67a6c6013ca 277 for (i = 0; i < CHACHA_IV_BYTES; i++) {
wolfSSL 13:f67a6c6013ca 278 printf("%02x", inIv[i]);
wolfSSL 13:f67a6c6013ca 279 }
wolfSSL 13:f67a6c6013ca 280 printf("\n\n");
wolfSSL 13:f67a6c6013ca 281 #endif
wolfSSL 13:f67a6c6013ca 282
wolfSSL 13:f67a6c6013ca 283 if (ctx == NULL)
wolfSSL 13:f67a6c6013ca 284 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 285
wolfSSL 13:f67a6c6013ca 286 XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
wolfSSL 13:f67a6c6013ca 287
wolfSSL 13:f67a6c6013ca 288 ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */
wolfSSL 13:f67a6c6013ca 289 ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
wolfSSL 13:f67a6c6013ca 290 ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */
wolfSSL 13:f67a6c6013ca 291 ctx->X[CHACHA_IV_BYTES+3] = LITTLE32(temp[2]); /* counter from nonce */
wolfSSL 13:f67a6c6013ca 292
wolfSSL 13:f67a6c6013ca 293 return 0;
wolfSSL 13:f67a6c6013ca 294 }
wolfSSL 13:f67a6c6013ca 295
wolfSSL 13:f67a6c6013ca 296 /* "expand 32-byte k" as unsigned 32 byte */
wolfSSL 13:f67a6c6013ca 297 static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
wolfSSL 13:f67a6c6013ca 298 /* "expand 16-byte k" as unsigned 16 byte */
wolfSSL 13:f67a6c6013ca 299 static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
wolfSSL 13:f67a6c6013ca 300
wolfSSL 13:f67a6c6013ca 301 /**
wolfSSL 13:f67a6c6013ca 302 * Key setup. 8 word iv (nonce)
wolfSSL 13:f67a6c6013ca 303 */
wolfSSL 13:f67a6c6013ca 304 int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
wolfSSL 13:f67a6c6013ca 305 {
wolfSSL 13:f67a6c6013ca 306 const word32* constants;
wolfSSL 13:f67a6c6013ca 307 const byte* k;
wolfSSL 13:f67a6c6013ca 308
wolfSSL 13:f67a6c6013ca 309 #ifdef XSTREAM_ALIGN
wolfSSL 13:f67a6c6013ca 310 word32 alignKey[8];
wolfSSL 13:f67a6c6013ca 311 #endif
wolfSSL 13:f67a6c6013ca 312
wolfSSL 13:f67a6c6013ca 313 if (ctx == NULL)
wolfSSL 13:f67a6c6013ca 314 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 315
wolfSSL 13:f67a6c6013ca 316 if (keySz != 16 && keySz != 32)
wolfSSL 13:f67a6c6013ca 317 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 318
wolfSSL 13:f67a6c6013ca 319 #ifdef XSTREAM_ALIGN
wolfSSL 13:f67a6c6013ca 320 if ((wolfssl_word)key % 4) {
wolfSSL 13:f67a6c6013ca 321 WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
wolfSSL 13:f67a6c6013ca 322 XMEMCPY(alignKey, key, keySz);
wolfSSL 13:f67a6c6013ca 323 k = (byte*)alignKey;
wolfSSL 13:f67a6c6013ca 324 }
wolfSSL 13:f67a6c6013ca 325 else {
wolfSSL 13:f67a6c6013ca 326 k = key;
wolfSSL 13:f67a6c6013ca 327 }
wolfSSL 13:f67a6c6013ca 328 #else
wolfSSL 13:f67a6c6013ca 329 k = key;
wolfSSL 13:f67a6c6013ca 330 #endif /* XSTREAM_ALIGN */
wolfSSL 13:f67a6c6013ca 331
wolfSSL 13:f67a6c6013ca 332 #ifdef CHACHA_AEAD_TEST
wolfSSL 13:f67a6c6013ca 333 word32 i;
wolfSSL 13:f67a6c6013ca 334 printf("ChaCha key used :\n");
wolfSSL 13:f67a6c6013ca 335 for (i = 0; i < keySz; i++) {
wolfSSL 13:f67a6c6013ca 336 printf("%02x", key[i]);
wolfSSL 13:f67a6c6013ca 337 if ((i + 1) % 8 == 0)
wolfSSL 13:f67a6c6013ca 338 printf("\n");
wolfSSL 13:f67a6c6013ca 339 }
wolfSSL 13:f67a6c6013ca 340 printf("\n\n");
wolfSSL 13:f67a6c6013ca 341 #endif
wolfSSL 13:f67a6c6013ca 342
wolfSSL 13:f67a6c6013ca 343 ctx->X[4] = U8TO32_LITTLE(k + 0);
wolfSSL 13:f67a6c6013ca 344 ctx->X[5] = U8TO32_LITTLE(k + 4);
wolfSSL 13:f67a6c6013ca 345 ctx->X[6] = U8TO32_LITTLE(k + 8);
wolfSSL 13:f67a6c6013ca 346 ctx->X[7] = U8TO32_LITTLE(k + 12);
wolfSSL 13:f67a6c6013ca 347 if (keySz == 32) {
wolfSSL 13:f67a6c6013ca 348 k += 16;
wolfSSL 13:f67a6c6013ca 349 constants = sigma;
wolfSSL 13:f67a6c6013ca 350 }
wolfSSL 13:f67a6c6013ca 351 else {
wolfSSL 13:f67a6c6013ca 352 constants = tau;
wolfSSL 13:f67a6c6013ca 353 }
wolfSSL 13:f67a6c6013ca 354 ctx->X[ 8] = U8TO32_LITTLE(k + 0);
wolfSSL 13:f67a6c6013ca 355 ctx->X[ 9] = U8TO32_LITTLE(k + 4);
wolfSSL 13:f67a6c6013ca 356 ctx->X[10] = U8TO32_LITTLE(k + 8);
wolfSSL 13:f67a6c6013ca 357 ctx->X[11] = U8TO32_LITTLE(k + 12);
wolfSSL 13:f67a6c6013ca 358 ctx->X[ 0] = constants[0];
wolfSSL 13:f67a6c6013ca 359 ctx->X[ 1] = constants[1];
wolfSSL 13:f67a6c6013ca 360 ctx->X[ 2] = constants[2];
wolfSSL 13:f67a6c6013ca 361 ctx->X[ 3] = constants[3];
wolfSSL 13:f67a6c6013ca 362
wolfSSL 13:f67a6c6013ca 363 return 0;
wolfSSL 13:f67a6c6013ca 364 }
wolfSSL 13:f67a6c6013ca 365
wolfSSL 13:f67a6c6013ca 366 /**
wolfSSL 13:f67a6c6013ca 367 * Converts word into bytes with rotations having been done.
wolfSSL 13:f67a6c6013ca 368 */
wolfSSL 13:f67a6c6013ca 369 static INLINE void wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS],
wolfSSL 13:f67a6c6013ca 370 const word32 input[CHACHA_CHUNK_WORDS])
wolfSSL 13:f67a6c6013ca 371 {
wolfSSL 13:f67a6c6013ca 372 word32 x[CHACHA_CHUNK_WORDS];
wolfSSL 13:f67a6c6013ca 373 word32 i;
wolfSSL 13:f67a6c6013ca 374
wolfSSL 13:f67a6c6013ca 375 for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
wolfSSL 13:f67a6c6013ca 376 x[i] = input[i];
wolfSSL 13:f67a6c6013ca 377 }
wolfSSL 13:f67a6c6013ca 378
wolfSSL 13:f67a6c6013ca 379 for (i = (ROUNDS); i > 0; i -= 2) {
wolfSSL 13:f67a6c6013ca 380 QUARTERROUND(0, 4, 8, 12)
wolfSSL 13:f67a6c6013ca 381 QUARTERROUND(1, 5, 9, 13)
wolfSSL 13:f67a6c6013ca 382 QUARTERROUND(2, 6, 10, 14)
wolfSSL 13:f67a6c6013ca 383 QUARTERROUND(3, 7, 11, 15)
wolfSSL 13:f67a6c6013ca 384 QUARTERROUND(0, 5, 10, 15)
wolfSSL 13:f67a6c6013ca 385 QUARTERROUND(1, 6, 11, 12)
wolfSSL 13:f67a6c6013ca 386 QUARTERROUND(2, 7, 8, 13)
wolfSSL 13:f67a6c6013ca 387 QUARTERROUND(3, 4, 9, 14)
wolfSSL 13:f67a6c6013ca 388 }
wolfSSL 13:f67a6c6013ca 389
wolfSSL 13:f67a6c6013ca 390 for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
wolfSSL 13:f67a6c6013ca 391 x[i] = PLUS(x[i], input[i]);
wolfSSL 13:f67a6c6013ca 392 }
wolfSSL 13:f67a6c6013ca 393
wolfSSL 13:f67a6c6013ca 394 for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
wolfSSL 13:f67a6c6013ca 395 output[i] = LITTLE32(x[i]);
wolfSSL 13:f67a6c6013ca 396 }
wolfSSL 13:f67a6c6013ca 397 }
wolfSSL 13:f67a6c6013ca 398
wolfSSL 13:f67a6c6013ca 399
wolfSSL 13:f67a6c6013ca 400 #ifdef USE_INTEL_CHACHA_SPEEDUP
wolfSSL 13:f67a6c6013ca 401
wolfSSL 13:f67a6c6013ca 402 #ifdef HAVE_INTEL_AVX1
wolfSSL 13:f67a6c6013ca 403 static void chacha_encrypt_avx(ChaCha* ctx, const byte* m, byte* c,
wolfSSL 13:f67a6c6013ca 404 word32 bytes)
wolfSSL 13:f67a6c6013ca 405 {
wolfSSL 13:f67a6c6013ca 406 ALIGN128 word32 X[4*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
wolfSSL 13:f67a6c6013ca 407 ALIGN128 word32 x[2*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
wolfSSL 13:f67a6c6013ca 408 byte* output;
wolfSSL 13:f67a6c6013ca 409 word32 i;
wolfSSL 13:f67a6c6013ca 410 word32 cnt = 0;
wolfSSL 13:f67a6c6013ca 411 static const word64 add[2] = { 0x0000000100000000UL,0x0000000300000002UL };
wolfSSL 13:f67a6c6013ca 412 static const word64 four[2] = { 0x0000000400000004UL,0x0000000400000004UL };
wolfSSL 13:f67a6c6013ca 413 static const word64 rotl8[2] =
wolfSSL 13:f67a6c6013ca 414 { 0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
wolfSSL 13:f67a6c6013ca 415 static const word64 rotl16[2] =
wolfSSL 13:f67a6c6013ca 416 { 0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
wolfSSL 13:f67a6c6013ca 417
wolfSSL 13:f67a6c6013ca 418 if (bytes == 0)
wolfSSL 13:f67a6c6013ca 419 return;
wolfSSL 13:f67a6c6013ca 420
wolfSSL 13:f67a6c6013ca 421 __asm__ __volatile__ (
wolfSSL 13:f67a6c6013ca 422 "movl %[bytes], %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 423 "shrl $8, %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 424 "jz L_end128\n\t"
wolfSSL 13:f67a6c6013ca 425
wolfSSL 13:f67a6c6013ca 426 "vpshufd $0, (%[key]), %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 427 "vpshufd $0, 4(%[key]), %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 428 "vpshufd $0, 8(%[key]), %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 429 "vpshufd $0, 12(%[key]), %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 430 "vpshufd $0, 16(%[key]), %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 431 "vpshufd $0, 20(%[key]), %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 432 "vpshufd $0, 24(%[key]), %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 433 "vpshufd $0, 28(%[key]), %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 434 "vpshufd $0, 32(%[key]), %%xmm8\n\t"
wolfSSL 13:f67a6c6013ca 435 "vpshufd $0, 36(%[key]), %%xmm9\n\t"
wolfSSL 13:f67a6c6013ca 436 "vpshufd $0, 40(%[key]), %%xmm10\n\t"
wolfSSL 13:f67a6c6013ca 437 "vpshufd $0, 44(%[key]), %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 438 "vpshufd $0, 48(%[key]), %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 439 "vpshufd $0, 52(%[key]), %%xmm13\n\t"
wolfSSL 13:f67a6c6013ca 440 "vpshufd $0, 56(%[key]), %%xmm14\n\t"
wolfSSL 13:f67a6c6013ca 441 "vpshufd $0, 60(%[key]), %%xmm15\n\t"
wolfSSL 13:f67a6c6013ca 442
wolfSSL 13:f67a6c6013ca 443 "vpaddd %[add], %%xmm12, %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 444
wolfSSL 13:f67a6c6013ca 445 "vmovdqa %%xmm0, (%[X])\n\t"
wolfSSL 13:f67a6c6013ca 446 "vmovdqa %%xmm1, 16(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 447 "vmovdqa %%xmm2, 32(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 448 "vmovdqa %%xmm3, 48(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 449 "vmovdqa %%xmm4, 64(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 450 "vmovdqa %%xmm5, 80(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 451 "vmovdqa %%xmm6, 96(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 452 "vmovdqa %%xmm7, 112(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 453 "vmovdqa %%xmm8, 128(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 454 "vmovdqa %%xmm9, 144(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 455 "vmovdqa %%xmm10, 160(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 456 "vmovdqa %%xmm11, 176(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 457 "vmovdqa %%xmm12, 192(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 458 "vmovdqa %%xmm13, 208(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 459 "vmovdqa %%xmm14, 224(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 460 "vmovdqa %%xmm15, 240(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 461 "\n"
wolfSSL 13:f67a6c6013ca 462 "L_enc128_loop:\n\t"
wolfSSL 13:f67a6c6013ca 463 "vmovdqa %%xmm11, 48(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 464 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 465 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 466 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 467 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 468 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 469 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 470 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 471 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 472 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 473 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 474 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 475 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 476 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 477 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 478 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 479 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 480 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 481 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 482 QUARTERROUND_XMM()
wolfSSL 13:f67a6c6013ca 483 QUARTERROUND_XMM_2()
wolfSSL 13:f67a6c6013ca 484 "vmovdqa 48(%[x]), %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 485
wolfSSL 13:f67a6c6013ca 486 "vpaddd (%[X]), %%xmm0, %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 487 "vpaddd 16(%[X]), %%xmm1, %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 488 "vpaddd 32(%[X]), %%xmm2, %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 489 "vpaddd 48(%[X]), %%xmm3, %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 490 "vpaddd 64(%[X]), %%xmm4, %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 491 "vpaddd 80(%[X]), %%xmm5, %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 492 "vpaddd 96(%[X]), %%xmm6, %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 493 "vpaddd 112(%[X]), %%xmm7, %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 494 "vpaddd 128(%[X]), %%xmm8, %%xmm8\n\t"
wolfSSL 13:f67a6c6013ca 495 "vpaddd 144(%[X]), %%xmm9, %%xmm9\n\t"
wolfSSL 13:f67a6c6013ca 496 "vpaddd 160(%[X]), %%xmm10, %%xmm10\n\t"
wolfSSL 13:f67a6c6013ca 497 "vpaddd 176(%[X]), %%xmm11, %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 498 "vpaddd 192(%[X]), %%xmm12, %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 499 "vpaddd 208(%[X]), %%xmm13, %%xmm13\n\t"
wolfSSL 13:f67a6c6013ca 500 "vpaddd 224(%[X]), %%xmm14, %%xmm14\n\t"
wolfSSL 13:f67a6c6013ca 501 "vpaddd 240(%[X]), %%xmm15, %%xmm15\n\t"
wolfSSL 13:f67a6c6013ca 502
wolfSSL 13:f67a6c6013ca 503 "vmovdqa %%xmm8, (%[x])\n\t"
wolfSSL 13:f67a6c6013ca 504 "vmovdqa %%xmm9, 16(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 505 "vmovdqa %%xmm10, 32(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 506 "vmovdqa %%xmm11, 48(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 507 "vmovdqa %%xmm12, 64(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 508 "vmovdqa %%xmm13, 80(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 509 "vmovdqa %%xmm14, 96(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 510 "vmovdqa %%xmm15, 112(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 511
wolfSSL 13:f67a6c6013ca 512 "vpunpckldq %%xmm1, %%xmm0, %%xmm8\n\t"
wolfSSL 13:f67a6c6013ca 513 "vpunpckldq %%xmm3, %%xmm2, %%xmm9\n\t"
wolfSSL 13:f67a6c6013ca 514 "vpunpckhdq %%xmm1, %%xmm0, %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 515 "vpunpckhdq %%xmm3, %%xmm2, %%xmm13\n\t"
wolfSSL 13:f67a6c6013ca 516 "vpunpckldq %%xmm5, %%xmm4, %%xmm10\n\t"
wolfSSL 13:f67a6c6013ca 517 "vpunpckldq %%xmm7, %%xmm6, %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 518 "vpunpckhdq %%xmm5, %%xmm4, %%xmm14\n\t"
wolfSSL 13:f67a6c6013ca 519 "vpunpckhdq %%xmm7, %%xmm6, %%xmm15\n\t"
wolfSSL 13:f67a6c6013ca 520 "vpunpcklqdq %%xmm9, %%xmm8, %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 521 "vpunpcklqdq %%xmm11, %%xmm10, %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 522 "vpunpckhqdq %%xmm9, %%xmm8, %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 523 "vpunpckhqdq %%xmm11, %%xmm10, %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 524 "vpunpcklqdq %%xmm13, %%xmm12, %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 525 "vpunpcklqdq %%xmm15, %%xmm14, %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 526 "vpunpckhqdq %%xmm13, %%xmm12, %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 527 "vpunpckhqdq %%xmm15, %%xmm14, %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 528 "vmovdqu (%[in]), %%xmm8\n\t"
wolfSSL 13:f67a6c6013ca 529 "vmovdqu 16(%[in]), %%xmm9\n\t"
wolfSSL 13:f67a6c6013ca 530 "vmovdqu 64(%[in]), %%xmm10\n\t"
wolfSSL 13:f67a6c6013ca 531 "vmovdqu 80(%[in]), %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 532 "vmovdqu 128(%[in]), %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 533 "vmovdqu 144(%[in]), %%xmm13\n\t"
wolfSSL 13:f67a6c6013ca 534 "vmovdqu 192(%[in]), %%xmm14\n\t"
wolfSSL 13:f67a6c6013ca 535 "vmovdqu 208(%[in]), %%xmm15\n\t"
wolfSSL 13:f67a6c6013ca 536 "vpxor %%xmm8, %%xmm0, %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 537 "vpxor %%xmm9, %%xmm1, %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 538 "vpxor %%xmm10, %%xmm2, %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 539 "vpxor %%xmm11, %%xmm3, %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 540 "vpxor %%xmm12, %%xmm4, %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 541 "vpxor %%xmm13, %%xmm5, %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 542 "vpxor %%xmm14, %%xmm6, %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 543 "vpxor %%xmm15, %%xmm7, %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 544 "vmovdqu %%xmm0, (%[out])\n\t"
wolfSSL 13:f67a6c6013ca 545 "vmovdqu %%xmm1, 16(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 546 "vmovdqu %%xmm2, 64(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 547 "vmovdqu %%xmm3, 80(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 548 "vmovdqu %%xmm4, 128(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 549 "vmovdqu %%xmm5, 144(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 550 "vmovdqu %%xmm6, 192(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 551 "vmovdqu %%xmm7, 208(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 552
wolfSSL 13:f67a6c6013ca 553 "vmovdqa (%[x]), %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 554 "vmovdqa 16(%[x]), %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 555 "vmovdqa 32(%[x]), %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 556 "vmovdqa 48(%[x]), %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 557 "vmovdqa 64(%[x]), %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 558 "vmovdqa 80(%[x]), %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 559 "vmovdqa 96(%[x]), %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 560 "vmovdqa 112(%[x]), %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 561
wolfSSL 13:f67a6c6013ca 562 "vpunpckldq %%xmm1, %%xmm0, %%xmm8\n\t"
wolfSSL 13:f67a6c6013ca 563 "vpunpckldq %%xmm3, %%xmm2, %%xmm9\n\t"
wolfSSL 13:f67a6c6013ca 564 "vpunpckhdq %%xmm1, %%xmm0, %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 565 "vpunpckhdq %%xmm3, %%xmm2, %%xmm13\n\t"
wolfSSL 13:f67a6c6013ca 566 "vpunpckldq %%xmm5, %%xmm4, %%xmm10\n\t"
wolfSSL 13:f67a6c6013ca 567 "vpunpckldq %%xmm7, %%xmm6, %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 568 "vpunpckhdq %%xmm5, %%xmm4, %%xmm14\n\t"
wolfSSL 13:f67a6c6013ca 569 "vpunpckhdq %%xmm7, %%xmm6, %%xmm15\n\t"
wolfSSL 13:f67a6c6013ca 570 "vpunpcklqdq %%xmm9, %%xmm8, %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 571 "vpunpcklqdq %%xmm11, %%xmm10, %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 572 "vpunpckhqdq %%xmm9, %%xmm8, %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 573 "vpunpckhqdq %%xmm11, %%xmm10, %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 574 "vpunpcklqdq %%xmm13, %%xmm12, %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 575 "vpunpcklqdq %%xmm15, %%xmm14, %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 576 "vpunpckhqdq %%xmm13, %%xmm12, %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 577 "vpunpckhqdq %%xmm15, %%xmm14, %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 578 "vmovdqu 32(%[in]), %%xmm8\n\t"
wolfSSL 13:f67a6c6013ca 579 "vmovdqu 48(%[in]), %%xmm9\n\t"
wolfSSL 13:f67a6c6013ca 580 "vmovdqu 96(%[in]), %%xmm10\n\t"
wolfSSL 13:f67a6c6013ca 581 "vmovdqu 112(%[in]), %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 582 "vmovdqu 160(%[in]), %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 583 "vmovdqu 176(%[in]), %%xmm13\n\t"
wolfSSL 13:f67a6c6013ca 584 "vmovdqu 224(%[in]), %%xmm14\n\t"
wolfSSL 13:f67a6c6013ca 585 "vmovdqu 240(%[in]), %%xmm15\n\t"
wolfSSL 13:f67a6c6013ca 586 "vpxor %%xmm8, %%xmm0, %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 587 "vpxor %%xmm9, %%xmm1, %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 588 "vpxor %%xmm10, %%xmm2, %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 589 "vpxor %%xmm11, %%xmm3, %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 590 "vpxor %%xmm12, %%xmm4, %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 591 "vpxor %%xmm13, %%xmm5, %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 592 "vpxor %%xmm14, %%xmm6, %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 593 "vpxor %%xmm15, %%xmm7, %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 594 "vmovdqu %%xmm0, 32(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 595 "vmovdqu %%xmm1, 48(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 596 "vmovdqu %%xmm2, 96(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 597 "vmovdqu %%xmm3, 112(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 598 "vmovdqu %%xmm4, 160(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 599 "vmovdqu %%xmm5, 176(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 600 "vmovdqu %%xmm6, 224(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 601 "vmovdqu %%xmm7, 240(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 602
wolfSSL 13:f67a6c6013ca 603 "vmovdqa 192(%[X]), %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 604 "add $256, %[in]\n\t"
wolfSSL 13:f67a6c6013ca 605 "add $256, %[out]\n\t"
wolfSSL 13:f67a6c6013ca 606 "vpaddd %[four], %%xmm12, %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 607 "sub $256, %[bytes]\n\t"
wolfSSL 13:f67a6c6013ca 608 "vmovdqa %%xmm12, 192(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 609 "cmp $256, %[bytes]\n\t"
wolfSSL 13:f67a6c6013ca 610 "jl L_done\n\t"
wolfSSL 13:f67a6c6013ca 611
wolfSSL 13:f67a6c6013ca 612 "vmovdqa (%[X]), %%xmm0\n\t"
wolfSSL 13:f67a6c6013ca 613 "vmovdqa 16(%[X]), %%xmm1\n\t"
wolfSSL 13:f67a6c6013ca 614 "vmovdqa 32(%[X]), %%xmm2\n\t"
wolfSSL 13:f67a6c6013ca 615 "vmovdqa 48(%[X]), %%xmm3\n\t"
wolfSSL 13:f67a6c6013ca 616 "vmovdqa 64(%[X]), %%xmm4\n\t"
wolfSSL 13:f67a6c6013ca 617 "vmovdqa 80(%[X]), %%xmm5\n\t"
wolfSSL 13:f67a6c6013ca 618 "vmovdqa 96(%[X]), %%xmm6\n\t"
wolfSSL 13:f67a6c6013ca 619 "vmovdqa 112(%[X]), %%xmm7\n\t"
wolfSSL 13:f67a6c6013ca 620 "vmovdqa 128(%[X]), %%xmm8\n\t"
wolfSSL 13:f67a6c6013ca 621 "vmovdqa 144(%[X]), %%xmm9\n\t"
wolfSSL 13:f67a6c6013ca 622 "vmovdqa 160(%[X]), %%xmm10\n\t"
wolfSSL 13:f67a6c6013ca 623 "vmovdqa 176(%[X]), %%xmm11\n\t"
wolfSSL 13:f67a6c6013ca 624 "vmovdqa 192(%[X]), %%xmm12\n\t"
wolfSSL 13:f67a6c6013ca 625 "vmovdqa 208(%[X]), %%xmm13\n\t"
wolfSSL 13:f67a6c6013ca 626 "vmovdqa 224(%[X]), %%xmm14\n\t"
wolfSSL 13:f67a6c6013ca 627 "vmovdqa 240(%[X]), %%xmm15\n\t"
wolfSSL 13:f67a6c6013ca 628 "jmp L_enc128_loop\n\t"
wolfSSL 13:f67a6c6013ca 629
wolfSSL 13:f67a6c6013ca 630 "\n"
wolfSSL 13:f67a6c6013ca 631 "L_done:\n\t"
wolfSSL 13:f67a6c6013ca 632
wolfSSL 13:f67a6c6013ca 633 "shl $2, %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 634 "add 48(%[key]), %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 635 "movl %[cnt], 48(%[key])\n\t"
wolfSSL 13:f67a6c6013ca 636 "\n"
wolfSSL 13:f67a6c6013ca 637 "L_end128:"
wolfSSL 13:f67a6c6013ca 638 : [bytes] "+r" (bytes), [cnt] "+r" (cnt),
wolfSSL 13:f67a6c6013ca 639 [in] "+r" (m), [out] "+r" (c)
wolfSSL 13:f67a6c6013ca 640 : [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
wolfSSL 13:f67a6c6013ca 641 [add] "m" (add), [four] "m" (four),
wolfSSL 13:f67a6c6013ca 642 [rotl8] "m" (rotl8), [rotl16] "m" (rotl16)
wolfSSL 13:f67a6c6013ca 643 : "xmm0", "xmm1", "xmm2", "xmm3",
wolfSSL 13:f67a6c6013ca 644 "xmm4", "xmm5", "xmm6", "xmm7",
wolfSSL 13:f67a6c6013ca 645 "xmm8", "xmm9", "xmm10", "xmm11",
wolfSSL 13:f67a6c6013ca 646 "xmm12", "xmm13", "xmm14", "xmm15", "memory"
wolfSSL 13:f67a6c6013ca 647 );
wolfSSL 13:f67a6c6013ca 648
wolfSSL 13:f67a6c6013ca 649 output = (byte*)x;
wolfSSL 13:f67a6c6013ca 650 for (; bytes > 0;) {
wolfSSL 13:f67a6c6013ca 651 wc_Chacha_wordtobyte(x, ctx->X);
wolfSSL 13:f67a6c6013ca 652 ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
wolfSSL 13:f67a6c6013ca 653 if (bytes <= CHACHA_CHUNK_BYTES) {
wolfSSL 13:f67a6c6013ca 654 for (i = 0; i < bytes; ++i) {
wolfSSL 13:f67a6c6013ca 655 c[i] = m[i] ^ output[i];
wolfSSL 13:f67a6c6013ca 656 }
wolfSSL 13:f67a6c6013ca 657 return;
wolfSSL 13:f67a6c6013ca 658 }
wolfSSL 13:f67a6c6013ca 659 for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
wolfSSL 13:f67a6c6013ca 660 c[i] = m[i] ^ output[i];
wolfSSL 13:f67a6c6013ca 661 }
wolfSSL 13:f67a6c6013ca 662 bytes -= CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 663 c += CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 664 m += CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 665 }
wolfSSL 13:f67a6c6013ca 666 }
wolfSSL 13:f67a6c6013ca 667 #endif /* HAVE_INTEL_AVX1 */
wolfSSL 13:f67a6c6013ca 668
wolfSSL 13:f67a6c6013ca 669 #ifdef HAVE_INTEL_AVX2
wolfSSL 13:f67a6c6013ca 670 static void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
wolfSSL 13:f67a6c6013ca 671 word32 bytes)
wolfSSL 13:f67a6c6013ca 672 {
wolfSSL 13:f67a6c6013ca 673 ALIGN256 word32 X[8*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
wolfSSL 13:f67a6c6013ca 674 ALIGN256 word32 x[4*CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
wolfSSL 13:f67a6c6013ca 675 byte* output;
wolfSSL 13:f67a6c6013ca 676 word32 i;
wolfSSL 13:f67a6c6013ca 677 word32 cnt = 0;
wolfSSL 13:f67a6c6013ca 678 static const word64 add[4] = { 0x0000000100000000UL, 0x0000000300000002UL,
wolfSSL 13:f67a6c6013ca 679 0x0000000500000004UL, 0x0000000700000006UL };
wolfSSL 13:f67a6c6013ca 680 static const word64 eight[4] =
wolfSSL 13:f67a6c6013ca 681 { 0x0000000800000008UL, 0x0000000800000008UL,
wolfSSL 13:f67a6c6013ca 682 0x0000000800000008UL, 0x0000000800000008UL };
wolfSSL 13:f67a6c6013ca 683 static const word64 rotl8[4] =
wolfSSL 13:f67a6c6013ca 684 { 0x0605040702010003UL, 0x0e0d0c0f0a09080bUL,
wolfSSL 13:f67a6c6013ca 685 0x0605040702010003UL, 0x0e0d0c0f0a09080bUL };
wolfSSL 13:f67a6c6013ca 686 static const word64 rotl16[4] =
wolfSSL 13:f67a6c6013ca 687 { 0x0504070601000302UL, 0x0d0c0f0e09080b0aUL,
wolfSSL 13:f67a6c6013ca 688 0x0504070601000302UL, 0x0d0c0f0e09080b0aUL };
wolfSSL 13:f67a6c6013ca 689
wolfSSL 13:f67a6c6013ca 690 if (bytes == 0)
wolfSSL 13:f67a6c6013ca 691 return;
wolfSSL 13:f67a6c6013ca 692
wolfSSL 13:f67a6c6013ca 693 __asm__ __volatile__ (
wolfSSL 13:f67a6c6013ca 694 "movl %[bytes], %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 695 "shrl $9, %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 696 "jz L_end256\n\t"
wolfSSL 13:f67a6c6013ca 697
wolfSSL 13:f67a6c6013ca 698 "vpbroadcastd (%[key]), %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 699 "vpbroadcastd 4(%[key]), %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 700 "vpbroadcastd 8(%[key]), %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 701 "vpbroadcastd 12(%[key]), %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 702 "vpbroadcastd 16(%[key]), %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 703 "vpbroadcastd 20(%[key]), %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 704 "vpbroadcastd 24(%[key]), %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 705 "vpbroadcastd 28(%[key]), %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 706 "vpbroadcastd 32(%[key]), %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 707 "vpbroadcastd 36(%[key]), %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 708 "vpbroadcastd 40(%[key]), %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 709 "vpbroadcastd 44(%[key]), %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 710 "vpbroadcastd 48(%[key]), %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 711 "vpbroadcastd 52(%[key]), %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 712 "vpbroadcastd 56(%[key]), %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 713 "vpbroadcastd 60(%[key]), %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 714
wolfSSL 13:f67a6c6013ca 715 "vpaddd %[add], %%ymm12, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 716
wolfSSL 13:f67a6c6013ca 717 "vmovdqa %%ymm0, (%[X])\n\t"
wolfSSL 13:f67a6c6013ca 718 "vmovdqa %%ymm1, 32(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 719 "vmovdqa %%ymm2, 64(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 720 "vmovdqa %%ymm3, 96(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 721 "vmovdqa %%ymm4, 128(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 722 "vmovdqa %%ymm5, 160(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 723 "vmovdqa %%ymm6, 192(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 724 "vmovdqa %%ymm7, 224(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 725 "vmovdqa %%ymm8, 256(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 726 "vmovdqa %%ymm9, 288(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 727 "vmovdqa %%ymm10, 320(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 728 "vmovdqa %%ymm11, 352(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 729 "vmovdqa %%ymm12, 384(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 730 "vmovdqa %%ymm13, 416(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 731 "vmovdqa %%ymm14, 448(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 732 "vmovdqa %%ymm15, 480(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 733 "\n"
wolfSSL 13:f67a6c6013ca 734 "L_enc256_loop:\n\t"
wolfSSL 13:f67a6c6013ca 735 "vmovdqa %%ymm11, 96(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 736 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 737 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 738 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 739 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 740 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 741 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 742 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 743 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 744 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 745 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 746 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 747 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 748 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 749 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 750 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 751 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 752 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 753 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 754 QUARTERROUND_YMM()
wolfSSL 13:f67a6c6013ca 755 QUARTERROUND_YMM_2()
wolfSSL 13:f67a6c6013ca 756 "vmovdqa 96(%[x]), %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 757
wolfSSL 13:f67a6c6013ca 758 "vpaddd (%[X]), %%ymm0, %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 759 "vpaddd 32(%[X]), %%ymm1, %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 760 "vpaddd 64(%[X]), %%ymm2, %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 761 "vpaddd 96(%[X]), %%ymm3, %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 762 "vpaddd 128(%[X]), %%ymm4, %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 763 "vpaddd 160(%[X]), %%ymm5, %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 764 "vpaddd 192(%[X]), %%ymm6, %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 765 "vpaddd 224(%[X]), %%ymm7, %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 766 "vpaddd 256(%[X]), %%ymm8, %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 767 "vpaddd 288(%[X]), %%ymm9, %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 768 "vpaddd 320(%[X]), %%ymm10, %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 769 "vpaddd 352(%[X]), %%ymm11, %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 770 "vpaddd 384(%[X]), %%ymm12, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 771 "vpaddd 416(%[X]), %%ymm13, %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 772 "vpaddd 448(%[X]), %%ymm14, %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 773 "vpaddd 480(%[X]), %%ymm15, %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 774
wolfSSL 13:f67a6c6013ca 775 "vmovdqa %%ymm8, (%[x])\n\t"
wolfSSL 13:f67a6c6013ca 776 "vmovdqa %%ymm9, 32(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 777 "vmovdqa %%ymm10, 64(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 778 "vmovdqa %%ymm11, 96(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 779 "vmovdqa %%ymm12, 128(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 780 "vmovdqa %%ymm13, 160(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 781 "vmovdqa %%ymm14, 192(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 782 "vmovdqa %%ymm15, 224(%[x])\n\t"
wolfSSL 13:f67a6c6013ca 783
wolfSSL 13:f67a6c6013ca 784 "vpunpckldq %%ymm1, %%ymm0, %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 785 "vpunpckldq %%ymm3, %%ymm2, %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 786 "vpunpckhdq %%ymm1, %%ymm0, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 787 "vpunpckhdq %%ymm3, %%ymm2, %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 788 "vpunpckldq %%ymm5, %%ymm4, %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 789 "vpunpckldq %%ymm7, %%ymm6, %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 790 "vpunpckhdq %%ymm5, %%ymm4, %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 791 "vpunpckhdq %%ymm7, %%ymm6, %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 792 "vpunpcklqdq %%ymm9, %%ymm8, %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 793 "vpunpcklqdq %%ymm11, %%ymm10, %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 794 "vpunpckhqdq %%ymm9, %%ymm8, %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 795 "vpunpckhqdq %%ymm11, %%ymm10, %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 796 "vpunpcklqdq %%ymm13, %%ymm12, %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 797 "vpunpcklqdq %%ymm15, %%ymm14, %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 798 "vpunpckhqdq %%ymm13, %%ymm12, %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 799 "vpunpckhqdq %%ymm15, %%ymm14, %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 800 "vperm2i128 $0x20, %%ymm1, %%ymm0, %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 801 "vperm2i128 $0x20, %%ymm3, %%ymm2, %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 802 "vperm2i128 $0x31, %%ymm1, %%ymm0, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 803 "vperm2i128 $0x31, %%ymm3, %%ymm2, %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 804 "vperm2i128 $0x20, %%ymm5, %%ymm4, %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 805 "vperm2i128 $0x20, %%ymm7, %%ymm6, %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 806 "vperm2i128 $0x31, %%ymm5, %%ymm4, %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 807 "vperm2i128 $0x31, %%ymm7, %%ymm6, %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 808
wolfSSL 13:f67a6c6013ca 809 "vmovdqu (%[in]), %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 810 "vmovdqu 64(%[in]), %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 811 "vmovdqu 128(%[in]), %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 812 "vmovdqu 192(%[in]), %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 813 "vmovdqu 256(%[in]), %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 814 "vmovdqu 320(%[in]), %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 815 "vmovdqu 384(%[in]), %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 816 "vmovdqu 448(%[in]), %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 817 "vpxor %%ymm0, %%ymm8, %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 818 "vpxor %%ymm1, %%ymm9, %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 819 "vpxor %%ymm2, %%ymm10, %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 820 "vpxor %%ymm3, %%ymm11, %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 821 "vpxor %%ymm4, %%ymm12, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 822 "vpxor %%ymm5, %%ymm13, %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 823 "vpxor %%ymm6, %%ymm14, %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 824 "vpxor %%ymm7, %%ymm15, %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 825 "vmovdqu %%ymm8, (%[out])\n\t"
wolfSSL 13:f67a6c6013ca 826 "vmovdqu %%ymm9, 64(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 827 "vmovdqu %%ymm10, 128(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 828 "vmovdqu %%ymm11, 192(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 829 "vmovdqu %%ymm12, 256(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 830 "vmovdqu %%ymm13, 320(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 831 "vmovdqu %%ymm14, 384(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 832 "vmovdqu %%ymm15, 448(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 833
wolfSSL 13:f67a6c6013ca 834 "vmovdqa (%[x]), %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 835 "vmovdqa 32(%[x]), %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 836 "vmovdqa 64(%[x]), %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 837 "vmovdqa 96(%[x]), %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 838 "vmovdqa 128(%[x]), %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 839 "vmovdqa 160(%[x]), %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 840 "vmovdqa 192(%[x]), %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 841 "vmovdqa 224(%[x]), %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 842
wolfSSL 13:f67a6c6013ca 843 "vpunpckldq %%ymm1, %%ymm0, %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 844 "vpunpckldq %%ymm3, %%ymm2, %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 845 "vpunpckhdq %%ymm1, %%ymm0, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 846 "vpunpckhdq %%ymm3, %%ymm2, %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 847 "vpunpckldq %%ymm5, %%ymm4, %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 848 "vpunpckldq %%ymm7, %%ymm6, %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 849 "vpunpckhdq %%ymm5, %%ymm4, %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 850 "vpunpckhdq %%ymm7, %%ymm6, %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 851 "vpunpcklqdq %%ymm9, %%ymm8, %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 852 "vpunpcklqdq %%ymm11, %%ymm10, %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 853 "vpunpckhqdq %%ymm9 , %%ymm8, %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 854 "vpunpckhqdq %%ymm11, %%ymm10, %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 855 "vpunpcklqdq %%ymm13, %%ymm12, %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 856 "vpunpcklqdq %%ymm15, %%ymm14, %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 857 "vpunpckhqdq %%ymm13, %%ymm12, %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 858 "vpunpckhqdq %%ymm15, %%ymm14, %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 859 "vperm2i128 $0x20, %%ymm1, %%ymm0, %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 860 "vperm2i128 $0x20, %%ymm3, %%ymm2, %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 861 "vperm2i128 $0x31, %%ymm1, %%ymm0, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 862 "vperm2i128 $0x31, %%ymm3, %%ymm2, %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 863 "vperm2i128 $0x20, %%ymm5, %%ymm4, %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 864 "vperm2i128 $0x20, %%ymm7, %%ymm6, %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 865 "vperm2i128 $0x31, %%ymm5, %%ymm4, %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 866 "vperm2i128 $0x31, %%ymm7, %%ymm6, %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 867
wolfSSL 13:f67a6c6013ca 868 "vmovdqu 32(%[in]), %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 869 "vmovdqu 96(%[in]), %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 870 "vmovdqu 160(%[in]), %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 871 "vmovdqu 224(%[in]), %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 872 "vmovdqu 288(%[in]), %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 873 "vmovdqu 352(%[in]), %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 874 "vmovdqu 416(%[in]), %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 875 "vmovdqu 480(%[in]), %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 876 "vpxor %%ymm0, %%ymm8, %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 877 "vpxor %%ymm1, %%ymm9, %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 878 "vpxor %%ymm2, %%ymm10, %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 879 "vpxor %%ymm3, %%ymm11, %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 880 "vpxor %%ymm4, %%ymm12, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 881 "vpxor %%ymm5, %%ymm13, %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 882 "vpxor %%ymm6, %%ymm14, %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 883 "vpxor %%ymm7, %%ymm15, %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 884 "vmovdqu %%ymm8, 32(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 885 "vmovdqu %%ymm9, 96(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 886 "vmovdqu %%ymm10, 160(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 887 "vmovdqu %%ymm11, 224(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 888 "vmovdqu %%ymm12, 288(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 889 "vmovdqu %%ymm13, 352(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 890 "vmovdqu %%ymm14, 416(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 891 "vmovdqu %%ymm15, 480(%[out])\n\t"
wolfSSL 13:f67a6c6013ca 892
wolfSSL 13:f67a6c6013ca 893 "vmovdqa 384(%[X]), %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 894 "add $512, %[in]\n\t"
wolfSSL 13:f67a6c6013ca 895 "add $512, %[out]\n\t"
wolfSSL 13:f67a6c6013ca 896 "vpaddd %[eight], %%ymm12, %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 897 "sub $512, %[bytes]\n\t"
wolfSSL 13:f67a6c6013ca 898 "vmovdqa %%ymm12, 384(%[X])\n\t"
wolfSSL 13:f67a6c6013ca 899 "cmp $512, %[bytes]\n\t"
wolfSSL 13:f67a6c6013ca 900 "jl L_done256\n\t"
wolfSSL 13:f67a6c6013ca 901
wolfSSL 13:f67a6c6013ca 902 "vmovdqa (%[X]), %%ymm0\n\t"
wolfSSL 13:f67a6c6013ca 903 "vmovdqa 32(%[X]), %%ymm1\n\t"
wolfSSL 13:f67a6c6013ca 904 "vmovdqa 64(%[X]), %%ymm2\n\t"
wolfSSL 13:f67a6c6013ca 905 "vmovdqa 96(%[X]), %%ymm3\n\t"
wolfSSL 13:f67a6c6013ca 906 "vmovdqa 128(%[X]), %%ymm4\n\t"
wolfSSL 13:f67a6c6013ca 907 "vmovdqa 160(%[X]), %%ymm5\n\t"
wolfSSL 13:f67a6c6013ca 908 "vmovdqa 192(%[X]), %%ymm6\n\t"
wolfSSL 13:f67a6c6013ca 909 "vmovdqa 224(%[X]), %%ymm7\n\t"
wolfSSL 13:f67a6c6013ca 910 "vmovdqa 256(%[X]), %%ymm8\n\t"
wolfSSL 13:f67a6c6013ca 911 "vmovdqa 288(%[X]), %%ymm9\n\t"
wolfSSL 13:f67a6c6013ca 912 "vmovdqa 320(%[X]), %%ymm10\n\t"
wolfSSL 13:f67a6c6013ca 913 "vmovdqa 352(%[X]), %%ymm11\n\t"
wolfSSL 13:f67a6c6013ca 914 "vmovdqa 384(%[X]), %%ymm12\n\t"
wolfSSL 13:f67a6c6013ca 915 "vmovdqa 416(%[X]), %%ymm13\n\t"
wolfSSL 13:f67a6c6013ca 916 "vmovdqa 448(%[X]), %%ymm14\n\t"
wolfSSL 13:f67a6c6013ca 917 "vmovdqa 480(%[X]), %%ymm15\n\t"
wolfSSL 13:f67a6c6013ca 918 "jmp L_enc256_loop\n\t"
wolfSSL 13:f67a6c6013ca 919 "\n"
wolfSSL 13:f67a6c6013ca 920 "L_done256:\n\t"
wolfSSL 13:f67a6c6013ca 921 "shl $3, %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 922 "add 48(%[key]), %[cnt]\n\t"
wolfSSL 13:f67a6c6013ca 923 "movl %[cnt], 48(%[key])\n\t"
wolfSSL 13:f67a6c6013ca 924 "\n"
wolfSSL 13:f67a6c6013ca 925 "L_end256:"
wolfSSL 13:f67a6c6013ca 926 : [bytes] "+r" (bytes), [cnt] "+r" (cnt),
wolfSSL 13:f67a6c6013ca 927 [in] "+r" (m), [out] "+r" (c)
wolfSSL 13:f67a6c6013ca 928 : [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
wolfSSL 13:f67a6c6013ca 929 [add] "m" (add), [eight] "m" (eight),
wolfSSL 13:f67a6c6013ca 930 [rotl8] "m" (rotl8), [rotl16] "m" (rotl16)
wolfSSL 13:f67a6c6013ca 931 : "ymm0", "ymm1", "ymm2", "ymm3",
wolfSSL 13:f67a6c6013ca 932 "ymm4", "ymm5", "ymm6", "ymm7",
wolfSSL 13:f67a6c6013ca 933 "ymm8", "ymm9", "ymm10", "ymm11",
wolfSSL 13:f67a6c6013ca 934 "ymm12", "ymm13", "ymm14", "ymm15", "memory"
wolfSSL 13:f67a6c6013ca 935 );
wolfSSL 13:f67a6c6013ca 936
wolfSSL 13:f67a6c6013ca 937 output = (byte*)x;
wolfSSL 13:f67a6c6013ca 938 for (; bytes > 0;) {
wolfSSL 13:f67a6c6013ca 939 wc_Chacha_wordtobyte(x, ctx->X);
wolfSSL 13:f67a6c6013ca 940 ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
wolfSSL 13:f67a6c6013ca 941 if (bytes <= CHACHA_CHUNK_BYTES) {
wolfSSL 13:f67a6c6013ca 942 for (i = 0; i < bytes; ++i) {
wolfSSL 13:f67a6c6013ca 943 c[i] = m[i] ^ output[i];
wolfSSL 13:f67a6c6013ca 944 }
wolfSSL 13:f67a6c6013ca 945 return;
wolfSSL 13:f67a6c6013ca 946 }
wolfSSL 13:f67a6c6013ca 947 for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
wolfSSL 13:f67a6c6013ca 948 c[i] = m[i] ^ output[i];
wolfSSL 13:f67a6c6013ca 949 }
wolfSSL 13:f67a6c6013ca 950 bytes -= CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 951 c += CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 952 m += CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 953 }
wolfSSL 13:f67a6c6013ca 954 }
wolfSSL 13:f67a6c6013ca 955 #endif /* HAVE_INTEL_AVX2 */
wolfSSL 13:f67a6c6013ca 956 #endif /* USE_INTEL_CHACHA_SPEEDUP */
wolfSSL 13:f67a6c6013ca 957
wolfSSL 13:f67a6c6013ca 958 /**
wolfSSL 13:f67a6c6013ca 959 * Encrypt a stream of bytes
wolfSSL 13:f67a6c6013ca 960 */
wolfSSL 13:f67a6c6013ca 961 static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
wolfSSL 13:f67a6c6013ca 962 word32 bytes)
wolfSSL 13:f67a6c6013ca 963 {
wolfSSL 13:f67a6c6013ca 964 byte* output;
wolfSSL 13:f67a6c6013ca 965 word32 temp[CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
wolfSSL 13:f67a6c6013ca 966 word32 i;
wolfSSL 13:f67a6c6013ca 967
wolfSSL 13:f67a6c6013ca 968 output = (byte*)temp;
wolfSSL 13:f67a6c6013ca 969
wolfSSL 13:f67a6c6013ca 970 for (; bytes > 0;) {
wolfSSL 13:f67a6c6013ca 971 wc_Chacha_wordtobyte(temp, ctx->X);
wolfSSL 13:f67a6c6013ca 972 ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
wolfSSL 13:f67a6c6013ca 973 if (bytes <= CHACHA_CHUNK_BYTES) {
wolfSSL 13:f67a6c6013ca 974 for (i = 0; i < bytes; ++i) {
wolfSSL 13:f67a6c6013ca 975 c[i] = m[i] ^ output[i];
wolfSSL 13:f67a6c6013ca 976 }
wolfSSL 13:f67a6c6013ca 977 return;
wolfSSL 13:f67a6c6013ca 978 }
wolfSSL 13:f67a6c6013ca 979 for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
wolfSSL 13:f67a6c6013ca 980 c[i] = m[i] ^ output[i];
wolfSSL 13:f67a6c6013ca 981 }
wolfSSL 13:f67a6c6013ca 982 bytes -= CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 983 c += CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 984 m += CHACHA_CHUNK_BYTES;
wolfSSL 13:f67a6c6013ca 985 }
wolfSSL 13:f67a6c6013ca 986 }
wolfSSL 13:f67a6c6013ca 987
wolfSSL 13:f67a6c6013ca 988 /**
wolfSSL 13:f67a6c6013ca 989 * API to encrypt/decrypt a message of any size.
wolfSSL 13:f67a6c6013ca 990 */
wolfSSL 13:f67a6c6013ca 991 int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
wolfSSL 13:f67a6c6013ca 992 word32 msglen)
wolfSSL 13:f67a6c6013ca 993 {
wolfSSL 13:f67a6c6013ca 994 if (ctx == NULL)
wolfSSL 13:f67a6c6013ca 995 return BAD_FUNC_ARG;
wolfSSL 13:f67a6c6013ca 996
wolfSSL 13:f67a6c6013ca 997 #ifdef USE_INTEL_CHACHA_SPEEDUP
wolfSSL 13:f67a6c6013ca 998 #ifdef HAVE_INTEL_AVX2
wolfSSL 13:f67a6c6013ca 999 if (IS_INTEL_AVX2(cpuid_get_flags()))
wolfSSL 13:f67a6c6013ca 1000 chacha_encrypt_avx2(ctx, input, output, msglen);
wolfSSL 13:f67a6c6013ca 1001 else
wolfSSL 13:f67a6c6013ca 1002 #endif
wolfSSL 13:f67a6c6013ca 1003 chacha_encrypt_avx(ctx, input, output, msglen);
wolfSSL 13:f67a6c6013ca 1004 return 0;
wolfSSL 13:f67a6c6013ca 1005 #endif
wolfSSL 13:f67a6c6013ca 1006 wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
wolfSSL 13:f67a6c6013ca 1007
wolfSSL 13:f67a6c6013ca 1008 return 0;
wolfSSL 13:f67a6c6013ca 1009 }
wolfSSL 13:f67a6c6013ca 1010
wolfSSL 13:f67a6c6013ca 1011 #endif /* HAVE_CHACHA*/
wolfSSL 13:f67a6c6013ca 1012
wolfSSL 13:f67a6c6013ca 1013