wolfSSL SSL/TLS library, support up to TLS1.3
Dependents: CyaSSL-Twitter-OAuth4Tw Example-client-tls-cert TwitterReader TweetTest ... more
poly1305.c
00001 /* poly1305.c 00002 * 00003 * Copyright (C) 2006-2020 wolfSSL Inc. 00004 * 00005 * This file is part of wolfSSL. 00006 * 00007 * wolfSSL is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 2 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * wolfSSL is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA 00020 */ 00021 00022 /* 00023 * Based off the public domain implementations by Andrew Moon 00024 * and Daniel J. Bernstein 00025 */ 00026 00027 00028 #ifdef HAVE_CONFIG_H 00029 #include <config.h> 00030 #endif 00031 00032 #include <wolfssl/wolfcrypt/settings.h> 00033 00034 #ifdef HAVE_POLY1305 00035 #include <wolfssl/wolfcrypt/poly1305.h > 00036 #include <wolfssl/wolfcrypt/error-crypt.h > 00037 #include <wolfssl/wolfcrypt/logging.h > 00038 #include <wolfssl/wolfcrypt/cpuid.h> 00039 #ifdef NO_INLINE 00040 #include <wolfssl/wolfcrypt/misc.h> 00041 #else 00042 #define WOLFSSL_MISC_INCLUDED 00043 #include <wolfcrypt/src/misc.c> 00044 #endif 00045 #ifdef CHACHA_AEAD_TEST 00046 #include <stdio.h> 00047 #endif 00048 00049 #ifdef _MSC_VER 00050 /* 4127 warning constant while(1) */ 00051 #pragma warning(disable: 4127) 00052 #endif 00053 00054 #ifdef USE_INTEL_SPEEDUP 00055 #include <emmintrin.h> 00056 #include <immintrin.h> 00057 00058 #if defined(__GNUC__) && ((__GNUC__ < 4) || \ 00059 (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) 00060 #undef NO_AVX2_SUPPORT 00061 #define NO_AVX2_SUPPORT 00062 #endif 00063 #if defined(__clang__) && ((__clang_major__ < 3) || \ 00064 (__clang_major__ == 3 && __clang_minor__ <= 5)) 00065 #define NO_AVX2_SUPPORT 00066 #elif defined(__clang__) && defined(NO_AVX2_SUPPORT) 00067 #undef NO_AVX2_SUPPORT 00068 #endif 00069 00070 #define HAVE_INTEL_AVX1 00071 #ifndef NO_AVX2_SUPPORT 00072 #define HAVE_INTEL_AVX2 00073 #endif 00074 #endif 00075 00076 #ifdef USE_INTEL_SPEEDUP 00077 static word32 intel_flags = 0; 00078 static word32 cpu_flags_set = 0; 00079 #endif 00080 00081 #if defined(USE_INTEL_SPEEDUP) || defined(POLY130564) 00082 #if defined(_MSC_VER) 00083 #define POLY1305_NOINLINE __declspec(noinline) 00084 #elif defined(__GNUC__) 00085 #define POLY1305_NOINLINE __attribute__((noinline)) 00086 #else 00087 #define POLY1305_NOINLINE 00088 #endif 00089 00090 #if defined(_MSC_VER) 00091 #include <intrin.h> 00092 00093 typedef struct word128 { 00094 word64 lo; 00095 word64 hi; 00096 } word128; 00097 00098 #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi) 00099 #define ADD(out, in) { word64 t = out.lo; out.lo += in.lo; \ 00100 out.hi += (out.lo < t) + in.hi; } 00101 #define ADDLO(out, in) { word64 t = out.lo; out.lo += in; \ 00102 out.hi += (out.lo < t); } 00103 #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift))) 00104 #define LO(in) (in.lo) 00105 00106 #elif defined(__GNUC__) 00107 #if defined(__SIZEOF_INT128__) 00108 typedef unsigned __int128 word128; 00109 #else 00110 typedef unsigned word128 __attribute__((mode(TI))); 00111 #endif 00112 00113 #define MUL(out, x, y) out = ((word128)x * y) 00114 #define ADD(out, in) out += in 00115 #define ADDLO(out, in) out += in 00116 #define SHR(in, shift) (word64)(in >> (shift)) 00117 #define LO(in) (word64)(in) 00118 #endif 00119 #endif 00120 00121 #ifdef USE_INTEL_SPEEDUP 00122 #ifdef __cplusplus 00123 extern "C" { 00124 #endif 00125 00126 #ifdef HAVE_INTEL_AVX1 00127 /* Process one block (16 bytes) of data. 00128 * 00129 * ctx Poly1305 context. 00130 * m One block of message data. 00131 */ 00132 extern void poly1305_block_avx(Poly1305* ctx, const unsigned char *m); 00133 /* Process multiple blocks (n * 16 bytes) of data. 00134 * 00135 * ctx Poly1305 context. 00136 * m Blocks of message data. 00137 * bytes The number of bytes to process. 00138 */ 00139 extern void poly1305_blocks_avx(Poly1305* ctx, const unsigned char* m, 00140 size_t bytes); 00141 /* Set the key to use when processing data. 00142 * Initialize the context. 00143 * 00144 * ctx Poly1305 context. 00145 * key The key data (16 bytes). 00146 */ 00147 extern void poly1305_setkey_avx(Poly1305* ctx, const byte* key); 00148 /* Calculate the final result - authentication data. 00149 * Zeros out the private data in the context. 00150 * 00151 * ctx Poly1305 context. 00152 * mac Buffer to hold 16 bytes. 00153 */ 00154 extern void poly1305_final_avx(Poly1305* ctx, byte* mac); 00155 #endif 00156 00157 #ifdef HAVE_INTEL_AVX2 00158 /* Process multiple blocks (n * 16 bytes) of data. 00159 * 00160 * ctx Poly1305 context. 00161 * m Blocks of message data. 00162 * bytes The number of bytes to process. 00163 */ 00164 extern void poly1305_blocks_avx2(Poly1305* ctx, const unsigned char* m, 00165 size_t bytes); 00166 /* Calculate R^1, R^2, R^3 and R^4 and store them in the context. 00167 * 00168 * ctx Poly1305 context. 00169 */ 00170 extern void poly1305_calc_powers_avx2(Poly1305* ctx); 00171 /* Set the key to use when processing data. 00172 * Initialize the context. 00173 * Calls AVX set key function as final function calls AVX code. 00174 * 00175 * ctx Poly1305 context. 00176 * key The key data (16 bytes). 00177 */ 00178 extern void poly1305_setkey_avx2(Poly1305* ctx, const byte* key); 00179 /* Calculate the final result - authentication data. 00180 * Zeros out the private data in the context. 00181 * Calls AVX final function to quickly process last blocks. 00182 * 00183 * ctx Poly1305 context. 00184 * mac Buffer to hold 16 bytes - authentication data. 00185 */ 00186 extern void poly1305_final_avx2(Poly1305* ctx, byte* mac); 00187 #endif 00188 00189 #ifdef __cplusplus 00190 } /* extern "C" */ 00191 #endif 00192 00193 #elif defined(POLY130564) 00194 #ifndef WOLFSSL_ARMASM 00195 static word64 U8TO64(const byte* p) 00196 { 00197 return 00198 (((word64)(p[0] & 0xff) ) | 00199 ((word64)(p[1] & 0xff) << 8) | 00200 ((word64)(p[2] & 0xff) << 16) | 00201 ((word64)(p[3] & 0xff) << 24) | 00202 ((word64)(p[4] & 0xff) << 32) | 00203 ((word64)(p[5] & 0xff) << 40) | 00204 ((word64)(p[6] & 0xff) << 48) | 00205 ((word64)(p[7] & 0xff) << 56)); 00206 } 00207 00208 static void U64TO8(byte* p, word64 v) { 00209 p[0] = (v ) & 0xff; 00210 p[1] = (v >> 8) & 0xff; 00211 p[2] = (v >> 16) & 0xff; 00212 p[3] = (v >> 24) & 0xff; 00213 p[4] = (v >> 32) & 0xff; 00214 p[5] = (v >> 40) & 0xff; 00215 p[6] = (v >> 48) & 0xff; 00216 p[7] = (v >> 56) & 0xff; 00217 } 00218 #endif/* WOLFSSL_ARMASM */ 00219 #else /* if not 64 bit then use 32 bit */ 00220 00221 static word32 U8TO32(const byte *p) 00222 { 00223 return 00224 (((word32)(p[0] & 0xff) ) | 00225 ((word32)(p[1] & 0xff) << 8) | 00226 ((word32)(p[2] & 0xff) << 16) | 00227 ((word32)(p[3] & 0xff) << 24)); 00228 } 00229 00230 static void U32TO8(byte *p, word32 v) { 00231 p[0] = (v ) & 0xff; 00232 p[1] = (v >> 8) & 0xff; 00233 p[2] = (v >> 16) & 0xff; 00234 p[3] = (v >> 24) & 0xff; 00235 } 00236 #endif 00237 00238 /* convert 32-bit unsigned to little endian 64 bit type as byte array */ 00239 static WC_INLINE void u32tole64(const word32 inLe32, byte outLe64[8]) 00240 { 00241 #ifndef WOLFSSL_X86_64_BUILD 00242 outLe64[0] = (byte)(inLe32 & 0x000000FF); 00243 outLe64[1] = (byte)((inLe32 & 0x0000FF00) >> 8); 00244 outLe64[2] = (byte)((inLe32 & 0x00FF0000) >> 16); 00245 outLe64[3] = (byte)((inLe32 & 0xFF000000) >> 24); 00246 outLe64[4] = 0; 00247 outLe64[5] = 0; 00248 outLe64[6] = 0; 00249 outLe64[7] = 0; 00250 #else 00251 *(word64*)outLe64 = inLe32; 00252 #endif 00253 } 00254 00255 00256 #if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) 00257 void poly1305_blocks(Poly1305* ctx, const unsigned char *m, 00258 size_t bytes) 00259 { 00260 #ifdef USE_INTEL_SPEEDUP 00261 /* AVX2 is handled in wc_Poly1305Update. */ 00262 poly1305_blocks_avx(ctx, m, bytes); 00263 #elif defined(POLY130564) 00264 const word64 hibit = (ctx->finished) ? 0 : ((word64)1 << 40); /* 1 << 128 */ 00265 word64 r0,r1,r2; 00266 word64 s1,s2; 00267 word64 h0,h1,h2; 00268 word64 c; 00269 word128 d0,d1,d2,d; 00270 00271 r0 = ctx->r[0]; 00272 r1 = ctx->r[1]; 00273 r2 = ctx->r[2]; 00274 00275 h0 = ctx->h[0]; 00276 h1 = ctx->h[1]; 00277 h2 = ctx->h[2]; 00278 00279 s1 = r1 * (5 << 2); 00280 s2 = r2 * (5 << 2); 00281 00282 while (bytes >= POLY1305_BLOCK_SIZE) { 00283 word64 t0,t1; 00284 00285 /* h += m[i] */ 00286 t0 = U8TO64(&m[0]); 00287 t1 = U8TO64(&m[8]); 00288 00289 h0 += (( t0 ) & 0xfffffffffff); 00290 h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); 00291 h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit; 00292 00293 /* h *= r */ 00294 MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d); 00295 MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d); 00296 MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d); 00297 00298 /* (partial) h %= p */ 00299 c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff; 00300 ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff; 00301 ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff; 00302 h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff; 00303 h1 += c; 00304 00305 m += POLY1305_BLOCK_SIZE; 00306 bytes -= POLY1305_BLOCK_SIZE; 00307 } 00308 00309 ctx->h[0] = h0; 00310 ctx->h[1] = h1; 00311 ctx->h[2] = h2; 00312 00313 #else /* if not 64 bit then use 32 bit */ 00314 const word32 hibit = (ctx->finished) ? 0 : ((word32)1 << 24); /* 1 << 128 */ 00315 word32 r0,r1,r2,r3,r4; 00316 word32 s1,s2,s3,s4; 00317 word32 h0,h1,h2,h3,h4; 00318 word64 d0,d1,d2,d3,d4; 00319 word32 c; 00320 00321 00322 r0 = ctx->r[0]; 00323 r1 = ctx->r[1]; 00324 r2 = ctx->r[2]; 00325 r3 = ctx->r[3]; 00326 r4 = ctx->r[4]; 00327 00328 s1 = r1 * 5; 00329 s2 = r2 * 5; 00330 s3 = r3 * 5; 00331 s4 = r4 * 5; 00332 00333 h0 = ctx->h[0]; 00334 h1 = ctx->h[1]; 00335 h2 = ctx->h[2]; 00336 h3 = ctx->h[3]; 00337 h4 = ctx->h[4]; 00338 00339 while (bytes >= POLY1305_BLOCK_SIZE) { 00340 /* h += m[i] */ 00341 h0 += (U8TO32(m+ 0) ) & 0x3ffffff; 00342 h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff; 00343 h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff; 00344 h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff; 00345 h4 += (U8TO32(m+12) >> 8) | hibit; 00346 00347 /* h *= r */ 00348 d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) + 00349 ((word64)h3 * s2) + ((word64)h4 * s1); 00350 d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) + 00351 ((word64)h3 * s3) + ((word64)h4 * s2); 00352 d2 = ((word64)h0 * r2) + ((word64)h1 * r1) + ((word64)h2 * r0) + 00353 ((word64)h3 * s4) + ((word64)h4 * s3); 00354 d3 = ((word64)h0 * r3) + ((word64)h1 * r2) + ((word64)h2 * r1) + 00355 ((word64)h3 * r0) + ((word64)h4 * s4); 00356 d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) + 00357 ((word64)h3 * r1) + ((word64)h4 * r0); 00358 00359 /* (partial) h %= p */ 00360 c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff; 00361 d1 += c; c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff; 00362 d2 += c; c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff; 00363 d3 += c; c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff; 00364 d4 += c; c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff; 00365 h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; 00366 h1 += c; 00367 00368 m += POLY1305_BLOCK_SIZE; 00369 bytes -= POLY1305_BLOCK_SIZE; 00370 } 00371 00372 ctx->h[0] = h0; 00373 ctx->h[1] = h1; 00374 ctx->h[2] = h2; 00375 ctx->h[3] = h3; 00376 ctx->h[4] = h4; 00377 00378 #endif /* end of 64 bit cpu blocks or 32 bit cpu */ 00379 } 00380 00381 void poly1305_block(Poly1305* ctx, const unsigned char *m) 00382 { 00383 #ifdef USE_INTEL_SPEEDUP 00384 /* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */ 00385 poly1305_block_avx(ctx, m); 00386 #else 00387 poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE); 00388 #endif 00389 } 00390 #endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */ 00391 00392 #if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) 00393 int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) 00394 { 00395 #if defined(POLY130564) && !defined(USE_INTEL_SPEEDUP) 00396 word64 t0,t1; 00397 #endif 00398 00399 if (key == NULL) 00400 return BAD_FUNC_ARG; 00401 00402 #ifdef CHACHA_AEAD_TEST 00403 word32 k; 00404 printf("Poly key used:\n"); 00405 for (k = 0; k < keySz; k++) { 00406 printf("%02x", key[k]); 00407 if ((k+1) % 8 == 0) 00408 printf("\n"); 00409 } 00410 printf("\n"); 00411 #endif 00412 00413 if (keySz != 32 || ctx == NULL) 00414 return BAD_FUNC_ARG; 00415 00416 #ifdef USE_INTEL_SPEEDUP 00417 if (!cpu_flags_set) { 00418 intel_flags = cpuid_get_flags(); 00419 cpu_flags_set = 1; 00420 } 00421 #ifdef HAVE_INTEL_AVX2 00422 if (IS_INTEL_AVX2(intel_flags)) 00423 poly1305_setkey_avx2(ctx, key); 00424 else 00425 #endif 00426 poly1305_setkey_avx(ctx, key); 00427 #elif defined(POLY130564) 00428 00429 /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 00430 t0 = U8TO64(key + 0); 00431 t1 = U8TO64(key + 8); 00432 00433 ctx->r[0] = ( t0 ) & 0xffc0fffffff; 00434 ctx->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; 00435 ctx->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f; 00436 00437 /* h (accumulator) = 0 */ 00438 ctx->h[0] = 0; 00439 ctx->h[1] = 0; 00440 ctx->h[2] = 0; 00441 00442 /* save pad for later */ 00443 ctx->pad[0] = U8TO64(key + 16); 00444 ctx->pad[1] = U8TO64(key + 24); 00445 00446 ctx->leftover = 0; 00447 ctx->finished = 0; 00448 00449 #else /* if not 64 bit then use 32 bit */ 00450 00451 /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 00452 ctx->r[0] = (U8TO32(key + 0) ) & 0x3ffffff; 00453 ctx->r[1] = (U8TO32(key + 3) >> 2) & 0x3ffff03; 00454 ctx->r[2] = (U8TO32(key + 6) >> 4) & 0x3ffc0ff; 00455 ctx->r[3] = (U8TO32(key + 9) >> 6) & 0x3f03fff; 00456 ctx->r[4] = (U8TO32(key + 12) >> 8) & 0x00fffff; 00457 00458 /* h = 0 */ 00459 ctx->h[0] = 0; 00460 ctx->h[1] = 0; 00461 ctx->h[2] = 0; 00462 ctx->h[3] = 0; 00463 ctx->h[4] = 0; 00464 00465 /* save pad for later */ 00466 ctx->pad[0] = U8TO32(key + 16); 00467 ctx->pad[1] = U8TO32(key + 20); 00468 ctx->pad[2] = U8TO32(key + 24); 00469 ctx->pad[3] = U8TO32(key + 28); 00470 00471 ctx->leftover = 0; 00472 ctx->finished = 0; 00473 00474 #endif 00475 00476 return 0; 00477 } 00478 00479 int wc_Poly1305Final(Poly1305* ctx, byte* mac) 00480 { 00481 #ifdef USE_INTEL_SPEEDUP 00482 #elif defined(POLY130564) 00483 00484 word64 h0,h1,h2,c; 00485 word64 g0,g1,g2; 00486 word64 t0,t1; 00487 00488 #else 00489 00490 word32 h0,h1,h2,h3,h4,c; 00491 word32 g0,g1,g2,g3,g4; 00492 word64 f; 00493 word32 mask; 00494 00495 #endif 00496 00497 if (ctx == NULL) 00498 return BAD_FUNC_ARG; 00499 00500 #ifdef USE_INTEL_SPEEDUP 00501 #ifdef HAVE_INTEL_AVX2 00502 if (IS_INTEL_AVX2(intel_flags)) 00503 poly1305_final_avx2(ctx, mac); 00504 else 00505 #endif 00506 poly1305_final_avx(ctx, mac); 00507 #elif defined(POLY130564) 00508 00509 /* process the remaining block */ 00510 if (ctx->leftover) { 00511 size_t i = ctx->leftover; 00512 ctx->buffer[i] = 1; 00513 for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++) 00514 ctx->buffer[i] = 0; 00515 ctx->finished = 1; 00516 poly1305_block(ctx, ctx->buffer); 00517 } 00518 00519 /* fully carry h */ 00520 h0 = ctx->h[0]; 00521 h1 = ctx->h[1]; 00522 h2 = ctx->h[2]; 00523 00524 c = (h1 >> 44); h1 &= 0xfffffffffff; 00525 h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; 00526 h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; 00527 h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; 00528 h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; 00529 h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; 00530 h1 += c; 00531 00532 /* compute h + -p */ 00533 g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; 00534 g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; 00535 g2 = h2 + c - ((word64)1 << 42); 00536 00537 /* select h if h < p, or h + -p if h >= p */ 00538 c = (g2 >> ((sizeof(word64) * 8) - 1)) - 1; 00539 g0 &= c; 00540 g1 &= c; 00541 g2 &= c; 00542 c = ~c; 00543 h0 = (h0 & c) | g0; 00544 h1 = (h1 & c) | g1; 00545 h2 = (h2 & c) | g2; 00546 00547 /* h = (h + pad) */ 00548 t0 = ctx->pad[0]; 00549 t1 = ctx->pad[1]; 00550 00551 h0 += (( t0 ) & 0xfffffffffff) ; 00552 c = (h0 >> 44); h0 &= 0xfffffffffff; 00553 h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; 00554 c = (h1 >> 44); h1 &= 0xfffffffffff; 00555 h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; 00556 h2 &= 0x3ffffffffff; 00557 00558 /* mac = h % (2^128) */ 00559 h0 = ((h0 ) | (h1 << 44)); 00560 h1 = ((h1 >> 20) | (h2 << 24)); 00561 00562 U64TO8(mac + 0, h0); 00563 U64TO8(mac + 8, h1); 00564 00565 /* zero out the state */ 00566 ctx->h[0] = 0; 00567 ctx->h[1] = 0; 00568 ctx->h[2] = 0; 00569 ctx->r[0] = 0; 00570 ctx->r[1] = 0; 00571 ctx->r[2] = 0; 00572 ctx->pad[0] = 0; 00573 ctx->pad[1] = 0; 00574 00575 #else /* if not 64 bit then use 32 bit */ 00576 00577 /* process the remaining block */ 00578 if (ctx->leftover) { 00579 size_t i = ctx->leftover; 00580 ctx->buffer[i++] = 1; 00581 for (; i < POLY1305_BLOCK_SIZE; i++) 00582 ctx->buffer[i] = 0; 00583 ctx->finished = 1; 00584 poly1305_block(ctx, ctx->buffer); 00585 } 00586 00587 /* fully carry h */ 00588 h0 = ctx->h[0]; 00589 h1 = ctx->h[1]; 00590 h2 = ctx->h[2]; 00591 h3 = ctx->h[3]; 00592 h4 = ctx->h[4]; 00593 00594 c = h1 >> 26; h1 = h1 & 0x3ffffff; 00595 h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; 00596 h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; 00597 h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; 00598 h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; 00599 h1 += c; 00600 00601 /* compute h + -p */ 00602 g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; 00603 g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; 00604 g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; 00605 g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; 00606 g4 = h4 + c - ((word32)1 << 26); 00607 00608 /* select h if h < p, or h + -p if h >= p */ 00609 mask = ((word32)g4 >> ((sizeof(word32) * 8) - 1)) - 1; 00610 g0 &= mask; 00611 g1 &= mask; 00612 g2 &= mask; 00613 g3 &= mask; 00614 g4 &= mask; 00615 mask = ~mask; 00616 h0 = (h0 & mask) | g0; 00617 h1 = (h1 & mask) | g1; 00618 h2 = (h2 & mask) | g2; 00619 h3 = (h3 & mask) | g3; 00620 h4 = (h4 & mask) | g4; 00621 00622 /* h = h % (2^128) */ 00623 h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; 00624 h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; 00625 h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; 00626 h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; 00627 00628 /* mac = (h + pad) % (2^128) */ 00629 f = (word64)h0 + ctx->pad[0] ; h0 = (word32)f; 00630 f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f; 00631 f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f; 00632 f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f; 00633 00634 U32TO8(mac + 0, h0); 00635 U32TO8(mac + 4, h1); 00636 U32TO8(mac + 8, h2); 00637 U32TO8(mac + 12, h3); 00638 00639 /* zero out the state */ 00640 ctx->h[0] = 0; 00641 ctx->h[1] = 0; 00642 ctx->h[2] = 0; 00643 ctx->h[3] = 0; 00644 ctx->h[4] = 0; 00645 ctx->r[0] = 0; 00646 ctx->r[1] = 0; 00647 ctx->r[2] = 0; 00648 ctx->r[3] = 0; 00649 ctx->r[4] = 0; 00650 ctx->pad[0] = 0; 00651 ctx->pad[1] = 0; 00652 ctx->pad[2] = 0; 00653 ctx->pad[3] = 0; 00654 00655 #endif 00656 00657 return 0; 00658 } 00659 #endif /* !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) */ 00660 00661 00662 int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes) 00663 { 00664 size_t i; 00665 00666 #ifdef CHACHA_AEAD_TEST 00667 word32 k; 00668 printf("Raw input to poly:\n"); 00669 for (k = 0; k < bytes; k++) { 00670 printf("%02x", m[k]); 00671 if ((k+1) % 16 == 0) 00672 printf("\n"); 00673 } 00674 printf("\n"); 00675 #endif 00676 00677 if (ctx == NULL) 00678 return BAD_FUNC_ARG; 00679 00680 #ifdef USE_INTEL_SPEEDUP 00681 #ifdef HAVE_INTEL_AVX2 00682 if (IS_INTEL_AVX2(intel_flags)) { 00683 /* handle leftover */ 00684 if (ctx->leftover) { 00685 size_t want = sizeof(ctx->buffer) - ctx->leftover; 00686 if (want > bytes) 00687 want = bytes; 00688 00689 for (i = 0; i < want; i++) 00690 ctx->buffer[ctx->leftover + i] = m[i]; 00691 bytes -= (word32)want; 00692 m += want; 00693 ctx->leftover += want; 00694 if (ctx->leftover < sizeof(ctx->buffer)) 00695 return 0; 00696 00697 if (!ctx->started) 00698 poly1305_calc_powers_avx2(ctx); 00699 poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer)); 00700 ctx->leftover = 0; 00701 } 00702 00703 /* process full blocks */ 00704 if (bytes >= sizeof(ctx->buffer)) { 00705 size_t want = bytes & ~(sizeof(ctx->buffer) - 1); 00706 00707 if (!ctx->started) 00708 poly1305_calc_powers_avx2(ctx); 00709 poly1305_blocks_avx2(ctx, m, want); 00710 m += want; 00711 bytes -= (word32)want; 00712 } 00713 00714 /* store leftover */ 00715 if (bytes) { 00716 for (i = 0; i < bytes; i++) 00717 ctx->buffer[ctx->leftover + i] = m[i]; 00718 ctx->leftover += bytes; 00719 } 00720 } 00721 else 00722 #endif 00723 #endif 00724 { 00725 /* handle leftover */ 00726 if (ctx->leftover) { 00727 size_t want = (POLY1305_BLOCK_SIZE - ctx->leftover); 00728 if (want > bytes) 00729 want = bytes; 00730 for (i = 0; i < want; i++) 00731 ctx->buffer[ctx->leftover + i] = m[i]; 00732 bytes -= (word32)want; 00733 m += want; 00734 ctx->leftover += want; 00735 if (ctx->leftover < POLY1305_BLOCK_SIZE) 00736 return 0; 00737 poly1305_block(ctx, ctx->buffer); 00738 ctx->leftover = 0; 00739 } 00740 00741 /* process full blocks */ 00742 if (bytes >= POLY1305_BLOCK_SIZE) { 00743 size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1)); 00744 poly1305_blocks(ctx, m, want); 00745 m += want; 00746 bytes -= (word32)want; 00747 } 00748 00749 /* store leftover */ 00750 if (bytes) { 00751 for (i = 0; i < bytes; i++) 00752 ctx->buffer[ctx->leftover + i] = m[i]; 00753 ctx->leftover += bytes; 00754 } 00755 } 00756 00757 return 0; 00758 } 00759 00760 /* Takes a Poly1305 struct that has a key loaded and pads the provided length 00761 ctx : Initialized Poly1305 struct to use 00762 lenToPad : Current number of bytes updated that needs padding to 16 00763 */ 00764 int wc_Poly1305_Pad(Poly1305* ctx, word32 lenToPad) 00765 { 00766 int ret = 0; 00767 word32 paddingLen; 00768 byte padding[WC_POLY1305_PAD_SZ - 1]; 00769 00770 if (ctx == NULL) { 00771 return BAD_FUNC_ARG; 00772 } 00773 if (lenToPad == 0) { 00774 return 0; /* nothing needs to be done */ 00775 } 00776 00777 XMEMSET(padding, 0, sizeof(padding)); 00778 00779 /* Pad length to 16 bytes */ 00780 paddingLen = -(int)lenToPad & (WC_POLY1305_PAD_SZ - 1); 00781 if (paddingLen > 0) { 00782 ret = wc_Poly1305Update(ctx, padding, paddingLen); 00783 } 00784 return ret; 00785 } 00786 00787 /* Takes a Poly1305 struct that has a key loaded and adds the AEAD length 00788 encoding in 64-bit little endian 00789 aadSz : Size of the additional authentication data 00790 dataSz : Size of the plaintext or ciphertext 00791 */ 00792 int wc_Poly1305_EncodeSizes(Poly1305* ctx, word32 aadSz, word32 dataSz) 00793 { 00794 int ret; 00795 byte little64[16]; /* sizeof(word64) * 2 */ 00796 00797 if (ctx == NULL) { 00798 return BAD_FUNC_ARG; 00799 } 00800 00801 XMEMSET(little64, 0, sizeof(little64)); 00802 00803 /* size of additional data and input data as little endian 64 bit types */ 00804 u32tole64(aadSz, little64); 00805 u32tole64(dataSz, little64 + 8); 00806 ret = wc_Poly1305Update(ctx, little64, sizeof(little64)); 00807 00808 return ret; 00809 } 00810 00811 /* Takes in an initialized Poly1305 struct that has a key loaded and creates 00812 a MAC (tag) using recent TLS AEAD padding scheme. 00813 ctx : Initialized Poly1305 struct to use 00814 additional : Additional data to use 00815 addSz : Size of additional buffer 00816 input : Input buffer to create tag from 00817 sz : Size of input buffer 00818 tag : Buffer to hold created tag 00819 tagSz : Size of input tag buffer (must be at least 00820 WC_POLY1305_MAC_SZ(16)) 00821 */ 00822 int wc_Poly1305_MAC(Poly1305* ctx, byte* additional, word32 addSz, 00823 byte* input, word32 sz, byte* tag, word32 tagSz) 00824 { 00825 int ret; 00826 00827 /* sanity check on arguments */ 00828 if (ctx == NULL || input == NULL || tag == NULL || 00829 tagSz < WC_POLY1305_MAC_SZ) { 00830 return BAD_FUNC_ARG; 00831 } 00832 00833 /* additional allowed to be 0 */ 00834 if (addSz > 0) { 00835 if (additional == NULL) 00836 return BAD_FUNC_ARG; 00837 00838 /* additional data plus padding */ 00839 if ((ret = wc_Poly1305Update(ctx, additional, addSz)) != 0) { 00840 return ret; 00841 } 00842 /* pad additional data */ 00843 if ((ret = wc_Poly1305_Pad(ctx, addSz)) != 0) { 00844 return ret; 00845 } 00846 } 00847 00848 /* input plus padding */ 00849 if ((ret = wc_Poly1305Update(ctx, input, sz)) != 0) { 00850 return ret; 00851 } 00852 /* pad input data */ 00853 if ((ret = wc_Poly1305_Pad(ctx, sz)) != 0) { 00854 return ret; 00855 } 00856 00857 /* encode size of AAD and input data as little endian 64 bit types */ 00858 if ((ret = wc_Poly1305_EncodeSizes(ctx, addSz, sz)) != 0) { 00859 return ret; 00860 } 00861 00862 /* Finalize the auth tag */ 00863 ret = wc_Poly1305Final(ctx, tag); 00864 00865 return ret; 00866 00867 } 00868 #endif /* HAVE_POLY1305 */ 00869
Generated on Tue Jul 12 2022 20:58:41 by 1.7.2