A library for setting up Secure Socket Layer (SSL) connections and verifying remote hosts using certificates. Contains only the source files for mbed platform implementation of the library.

Dependents:   HTTPClient-SSL HTTPClient-SSL HTTPClient-SSL HTTPClient-SSL

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers poly1305.c Source File

poly1305.c

00001 /* poly1305.c
00002  *
00003  * Copyright (C) 2006-2014 wolfSSL Inc.
00004  *
00005  * This file is part of CyaSSL.
00006  *
00007  * CyaSSL is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 2 of the License, or
00010  * (at your option) any later version.
00011  *
00012  * CyaSSL is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
00020  *
00021  * Based off the public domain implementations by Andrew Moon 
00022  * and Daniel J. Bernstein
00023  */
00024 
00025 #ifdef HAVE_CONFIG_H
00026     #include <config.h>
00027 #endif
00028 
00029 #include <cyassl/ctaocrypt/settings.h>
00030 
00031 #ifdef HAVE_POLY1305
00032 #include <cyassl/ctaocrypt/poly1305.h>
00033 #include <cyassl/ctaocrypt/error-crypt.h>
00034 #include <cyassl/ctaocrypt/logging.h>
00035 #ifdef NO_INLINE
00036     #include <cyassl/ctaocrypt/misc.h>
00037 #else
00038     #include <ctaocrypt/src/misc.c>
00039 #endif
00040 #ifdef CHACHA_AEAD_TEST
00041     #include <stdio.h>
00042 #endif
00043 
00044 #ifdef _MSC_VER
00045     /* 4127 warning constant while(1)  */
00046     #pragma warning(disable: 4127)
00047 #endif
00048 
00049 #if defined(POLY130564)
00050     
00051     #if defined(_MSC_VER)
00052         #define POLY1305_NOINLINE __declspec(noinline)
00053     #elif defined(__GNUC__)
00054         #define POLY1305_NOINLINE __attribute__((noinline))
00055     #else
00056         #define POLY1305_NOINLINE
00057     #endif
00058     
00059     #if defined(_MSC_VER)
00060         #include <intrin.h>
00061         
00062         typedef struct word128 {
00063             word64 lo;
00064             word64 hi;
00065         } word128;
00066     
00067         #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi)
00068         #define ADD(out, in) { word64 t = out.lo; out.lo += in.lo;
00069                                out.hi += (out.lo < t) + in.hi; }
00070         #define ADDLO(out, in) { word64 t = out.lo; out.lo += in;
00071                                  out.hi += (out.lo < t); }
00072         #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift)))
00073         #define LO(in) (in.lo)
00074     
00075     #elif defined(__GNUC__)
00076         #if defined(__SIZEOF_INT128__)
00077             typedef unsigned __int128 word128;
00078         #else
00079             typedef unsigned word128 __attribute__((mode(TI)));
00080         #endif
00081     
00082         #define MUL(out, x, y) out = ((word128)x * y)
00083         #define ADD(out, in) out += in
00084         #define ADDLO(out, in) out += in
00085         #define SHR(in, shift) (word64)(in >> (shift))
00086         #define LO(in) (word64)(in)
00087     #endif
00088     
00089     static word64 U8TO64(const byte* p) {
00090         return
00091             (((word64)(p[0] & 0xff)      ) |
00092              ((word64)(p[1] & 0xff) <<  8) |
00093              ((word64)(p[2] & 0xff) << 16) |
00094              ((word64)(p[3] & 0xff) << 24) |
00095              ((word64)(p[4] & 0xff) << 32) |
00096              ((word64)(p[5] & 0xff) << 40) |
00097              ((word64)(p[6] & 0xff) << 48) |
00098              ((word64)(p[7] & 0xff) << 56));
00099     }
00100     
00101     static void U64TO8(byte* p, word64 v) {
00102         p[0] = (v      ) & 0xff;
00103         p[1] = (v >>  8) & 0xff;
00104         p[2] = (v >> 16) & 0xff;
00105         p[3] = (v >> 24) & 0xff;
00106         p[4] = (v >> 32) & 0xff;
00107         p[5] = (v >> 40) & 0xff;
00108         p[6] = (v >> 48) & 0xff;
00109         p[7] = (v >> 56) & 0xff;
00110     }
00111 
00112 #else /* if not 64 bit then use 32 bit */
00113     
00114     static word32 U8TO32(const byte *p) {
00115         return
00116             (((word32)(p[0] & 0xff)      ) |
00117              ((word32)(p[1] & 0xff) <<  8) |
00118              ((word32)(p[2] & 0xff) << 16) |
00119              ((word32)(p[3] & 0xff) << 24));
00120     }
00121     
00122     static void U32TO8(byte *p, word32 v) {
00123         p[0] = (v      ) & 0xff;
00124         p[1] = (v >>  8) & 0xff;
00125         p[2] = (v >> 16) & 0xff;
00126         p[3] = (v >> 24) & 0xff;
00127     }
00128 #endif
00129 
00130 static void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
00131                             size_t bytes) {
00132 
00133 #ifdef POLY130564
00134 
00135     const word64 hibit = (ctx->final) ? 0 : ((word64)1 << 40); /* 1 << 128 */
00136     word64 r0,r1,r2;
00137     word64 s1,s2;
00138     word64 h0,h1,h2;
00139     word64 c;
00140     word128 d0,d1,d2,d;
00141 
00142 #else
00143 
00144     const word32 hibit = (ctx->final) ? 0 : (1 << 24); /* 1 << 128 */
00145     word32 r0,r1,r2,r3,r4;
00146     word32 s1,s2,s3,s4;
00147     word32 h0,h1,h2,h3,h4;
00148     word64 d0,d1,d2,d3,d4;
00149     word32 c;
00150 
00151 #endif
00152 
00153 #ifdef POLY130564
00154 
00155     r0 = ctx->r[0];
00156     r1 = ctx->r[1];
00157     r2 = ctx->r[2];
00158 
00159     h0 = ctx->h[0];
00160     h1 = ctx->h[1];
00161     h2 = ctx->h[2];
00162 
00163     s1 = r1 * (5 << 2);
00164     s2 = r2 * (5 << 2);
00165 
00166     while (bytes >= POLY1305_BLOCK_SIZE) {
00167         word64 t0,t1;
00168 
00169         /* h += m[i] */
00170         t0 = U8TO64(&m[0]);
00171         t1 = U8TO64(&m[8]);
00172 
00173         h0 += (( t0                    ) & 0xfffffffffff);
00174         h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
00175         h2 += (((t1 >> 24)             ) & 0x3ffffffffff) | hibit;
00176 
00177         /* h *= r */
00178         MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
00179         MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
00180         MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
00181 
00182         /* (partial) h %= p */
00183                       c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
00184         ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
00185         ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
00186         h0  += c * 5; c = (h0 >> 44);  h0 =    h0  & 0xfffffffffff;
00187         h1  += c;
00188 
00189         m += POLY1305_BLOCK_SIZE;
00190         bytes -= POLY1305_BLOCK_SIZE;
00191     }
00192 
00193     ctx->h[0] = h0;
00194     ctx->h[1] = h1;
00195     ctx->h[2] = h2;
00196 
00197 #else /* if not 64 bit then use 32 bit */
00198    
00199     r0 = ctx->r[0];
00200     r1 = ctx->r[1];
00201     r2 = ctx->r[2];
00202     r3 = ctx->r[3];
00203     r4 = ctx->r[4];
00204 
00205     s1 = r1 * 5;
00206     s2 = r2 * 5;
00207     s3 = r3 * 5;
00208     s4 = r4 * 5;
00209 
00210     h0 = ctx->h[0];
00211     h1 = ctx->h[1];
00212     h2 = ctx->h[2];
00213     h3 = ctx->h[3];
00214     h4 = ctx->h[4];
00215 
00216     while (bytes >= POLY1305_BLOCK_SIZE) {
00217         /* h += m[i] */
00218         h0 += (U8TO32(m+ 0)     ) & 0x3ffffff;
00219         h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
00220         h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
00221         h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
00222         h4 += (U8TO32(m+12) >> 8) | hibit;
00223 
00224         /* h *= r */
00225         d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) +
00226              ((word64)h3 * s2) + ((word64)h4 * s1);
00227         d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) +
00228              ((word64)h3 * s3) + ((word64)h4 * s2);
00229         d2 = ((word64)h0 * r2) + ((word64)h1 * r1) + ((word64)h2 * r0) +
00230              ((word64)h3 * s4) + ((word64)h4 * s3);
00231         d3 = ((word64)h0 * r3) + ((word64)h1 * r2) + ((word64)h2 * r1) +
00232              ((word64)h3 * r0) + ((word64)h4 * s4);
00233         d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) +
00234              ((word64)h3 * r1) + ((word64)h4 * r0);
00235 
00236         /* (partial) h %= p */
00237                       c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff;
00238         d1 += c;      c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff;
00239         d2 += c;      c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff;
00240         d3 += c;      c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff;
00241         d4 += c;      c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff;
00242         h0 += c * 5;  c =  (h0 >> 26); h0 =                h0 & 0x3ffffff;
00243         h1 += c;
00244 
00245         m += POLY1305_BLOCK_SIZE;
00246         bytes -= POLY1305_BLOCK_SIZE;
00247     }
00248 
00249     ctx->h[0] = h0;
00250     ctx->h[1] = h1;
00251     ctx->h[2] = h2;
00252     ctx->h[3] = h3;
00253     ctx->h[4] = h4;
00254 
00255 #endif /* end of 64 bit cpu blocks or 32 bit cpu */
00256 }
00257 
00258 
00259 int Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz) {
00260 
00261 #if defined(POLY130564)
00262     word64 t0,t1;
00263 #endif
00264 
00265 #ifdef CHACHA_AEAD_TEST
00266     word32 k;
00267     printf("Poly key used:\n");
00268     for (k = 0; k < keySz; k++) {
00269         printf("%02x", key[k]);
00270         if ((k+1) % 8 == 0)
00271             printf("\n");
00272     }
00273     printf("\n");
00274 #endif
00275 
00276     if (keySz != 32 || ctx == NULL)
00277         return BAD_FUNC_ARG;
00278 
00279 #if defined(POLY130564)
00280 
00281     /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
00282     t0 = U8TO64(key + 0);
00283     t1 = U8TO64(key + 8);
00284 
00285     ctx->r[0] = ( t0                    ) & 0xffc0fffffff;
00286     ctx->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
00287     ctx->r[2] = ((t1 >> 24)             ) & 0x00ffffffc0f;
00288 
00289     /* h (accumulator) = 0 */
00290     ctx->h[0] = 0;
00291     ctx->h[1] = 0;
00292     ctx->h[2] = 0;
00293 
00294     /* save pad for later */
00295     ctx->pad[0] = U8TO64(key + 16);
00296     ctx->pad[1] = U8TO64(key + 24);
00297 
00298 #else /* if not 64 bit then use 32 bit */
00299     
00300     /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
00301     ctx->r[0] = (U8TO32(key +  0)     ) & 0x3ffffff;
00302     ctx->r[1] = (U8TO32(key +  3) >> 2) & 0x3ffff03;
00303     ctx->r[2] = (U8TO32(key +  6) >> 4) & 0x3ffc0ff;
00304     ctx->r[3] = (U8TO32(key +  9) >> 6) & 0x3f03fff;
00305     ctx->r[4] = (U8TO32(key + 12) >> 8) & 0x00fffff;
00306 
00307     /* h = 0 */
00308     ctx->h[0] = 0;
00309     ctx->h[1] = 0;
00310     ctx->h[2] = 0;
00311     ctx->h[3] = 0;
00312     ctx->h[4] = 0;
00313 
00314     /* save pad for later */
00315     ctx->pad[0] = U8TO32(key + 16);
00316     ctx->pad[1] = U8TO32(key + 20);
00317     ctx->pad[2] = U8TO32(key + 24);
00318     ctx->pad[3] = U8TO32(key + 28);
00319 
00320 #endif
00321 
00322     ctx->leftover = 0;
00323     ctx->final = 0;
00324 
00325     return 0;
00326 }
00327 
00328 
00329 int Poly1305Final(Poly1305* ctx, byte* mac) {
00330 
00331 #if defined(POLY130564)
00332 
00333     word64 h0,h1,h2,c;
00334     word64 g0,g1,g2;
00335     word64 t0,t1;
00336 
00337 #else
00338 
00339     word32 h0,h1,h2,h3,h4,c;
00340     word32 g0,g1,g2,g3,g4;
00341     word64 f;
00342     word32 mask;
00343 
00344 #endif
00345 
00346     if (ctx == NULL)
00347         return BAD_FUNC_ARG;
00348 
00349 #if defined(POLY130564)
00350 
00351     /* process the remaining block */
00352     if (ctx->leftover) {
00353         size_t i = ctx->leftover;
00354         ctx->buffer[i] = 1;
00355         for (i = i + 1; i < POLY1305_BLOCK_SIZE; i++)
00356             ctx->buffer[i] = 0;
00357         ctx->final = 1;
00358         poly1305_blocks(ctx, ctx->buffer, POLY1305_BLOCK_SIZE);
00359     }
00360 
00361     /* fully carry h */
00362     h0 = ctx->h[0];
00363     h1 = ctx->h[1];
00364     h2 = ctx->h[2];
00365 
00366                  c = (h1 >> 44); h1 &= 0xfffffffffff;
00367     h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
00368     h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
00369     h1 += c;     c = (h1 >> 44); h1 &= 0xfffffffffff;
00370     h2 += c;     c = (h2 >> 42); h2 &= 0x3ffffffffff;
00371     h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
00372     h1 += c;
00373 
00374     /* compute h + -p */
00375     g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
00376     g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
00377     g2 = h2 + c - ((word64)1 << 42);
00378 
00379     /* select h if h < p, or h + -p if h >= p */
00380     c = (g2 >> ((sizeof(word64) * 8) - 1)) - 1;
00381     g0 &= c;
00382     g1 &= c;
00383     g2 &= c;
00384     c = ~c;
00385     h0 = (h0 & c) | g0;
00386     h1 = (h1 & c) | g1;
00387     h2 = (h2 & c) | g2;
00388 
00389     /* h = (h + pad) */
00390     t0 = ctx->pad[0];
00391     t1 = ctx->pad[1];
00392 
00393     h0 += (( t0                    ) & 0xfffffffffff)    ;
00394     c = (h0 >> 44); h0 &= 0xfffffffffff;
00395     h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
00396     c = (h1 >> 44); h1 &= 0xfffffffffff;
00397     h2 += (((t1 >> 24)             ) & 0x3ffffffffff) + c;
00398     h2 &= 0x3ffffffffff;
00399 
00400     /* mac = h % (2^128) */
00401     h0 = ((h0      ) | (h1 << 44));
00402     h1 = ((h1 >> 20) | (h2 << 24));
00403 
00404     U64TO8(mac + 0, h0);
00405     U64TO8(mac + 8, h1);
00406 
00407     /* zero out the state */
00408     ctx->h[0] = 0;
00409     ctx->h[1] = 0;
00410     ctx->h[2] = 0;
00411     ctx->r[0] = 0;
00412     ctx->r[1] = 0;
00413     ctx->r[2] = 0;
00414     ctx->pad[0] = 0;
00415     ctx->pad[1] = 0;
00416 
00417 #else /* if not 64 bit then use 32 bit */
00418     
00419     /* process the remaining block */
00420     if (ctx->leftover) {
00421         size_t i = ctx->leftover;
00422         ctx->buffer[i++] = 1;
00423         for (; i < POLY1305_BLOCK_SIZE; i++)
00424             ctx->buffer[i] = 0;
00425         ctx->final = 1;
00426         poly1305_blocks(ctx, ctx->buffer, POLY1305_BLOCK_SIZE);
00427     }
00428 
00429     /* fully carry h */
00430     h0 = ctx->h[0];
00431     h1 = ctx->h[1];
00432     h2 = ctx->h[2];
00433     h3 = ctx->h[3];
00434     h4 = ctx->h[4];
00435 
00436                  c = h1 >> 26; h1 = h1 & 0x3ffffff;
00437     h2 +=     c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
00438     h3 +=     c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
00439     h4 +=     c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
00440     h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
00441     h1 +=     c;
00442 
00443     /* compute h + -p */
00444     g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
00445     g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
00446     g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
00447     g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
00448     g4 = h4 + c - (1 << 26);
00449 
00450     /* select h if h < p, or h + -p if h >= p */
00451     mask = (g4 >> ((sizeof(word32) * 8) - 1)) - 1;
00452     g0 &= mask;
00453     g1 &= mask;
00454     g2 &= mask;
00455     g3 &= mask;
00456     g4 &= mask;
00457     mask = ~mask;
00458     h0 = (h0 & mask) | g0;
00459     h1 = (h1 & mask) | g1;
00460     h2 = (h2 & mask) | g2;
00461     h3 = (h3 & mask) | g3;
00462     h4 = (h4 & mask) | g4;
00463 
00464     /* h = h % (2^128) */
00465     h0 = ((h0      ) | (h1 << 26)) & 0xffffffff;
00466     h1 = ((h1 >>  6) | (h2 << 20)) & 0xffffffff;
00467     h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
00468     h3 = ((h3 >> 18) | (h4 <<  8)) & 0xffffffff;
00469 
00470     /* mac = (h + pad) % (2^128) */
00471     f = (word64)h0 + ctx->pad[0]            ; h0 = (word32)f;
00472     f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f;
00473     f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f;
00474     f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f;
00475 
00476     U32TO8(mac + 0, h0);
00477     U32TO8(mac + 4, h1);
00478     U32TO8(mac + 8, h2);
00479     U32TO8(mac + 12, h3);
00480 
00481     /* zero out the state */
00482     ctx->h[0] = 0;
00483     ctx->h[1] = 0;
00484     ctx->h[2] = 0;
00485     ctx->h[3] = 0;
00486     ctx->h[4] = 0;
00487     ctx->r[0] = 0;
00488     ctx->r[1] = 0;
00489     ctx->r[2] = 0;
00490     ctx->r[3] = 0;
00491     ctx->r[4] = 0;
00492     ctx->pad[0] = 0;
00493     ctx->pad[1] = 0;
00494     ctx->pad[2] = 0;
00495     ctx->pad[3] = 0;
00496 
00497 #endif
00498 
00499     return 0;
00500 }
00501 
00502 
00503 int Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes) {
00504 
00505     size_t i;
00506 
00507 #ifdef CHACHA_AEAD_TEST
00508     word32 k;
00509     printf("Raw input to poly:\n");
00510     for (k = 0; k < bytes; k++) {
00511         printf("%02x", m[k]);
00512         if ((k+1) % 16 == 0)
00513             printf("\n");
00514     }
00515     printf("\n");
00516 #endif
00517     
00518     if (ctx == NULL)
00519         return BAD_FUNC_ARG;
00520 
00521     /* handle leftover */
00522     if (ctx->leftover) {
00523         size_t want = (POLY1305_BLOCK_SIZE - ctx->leftover);
00524         if (want > bytes)
00525             want = bytes;
00526         for (i = 0; i < want; i++)
00527             ctx->buffer[ctx->leftover + i] = m[i];
00528         bytes -= want;
00529         m += want;
00530         ctx->leftover += want;
00531         if (ctx->leftover < POLY1305_BLOCK_SIZE)
00532             return 0;
00533         poly1305_blocks(ctx, ctx->buffer, POLY1305_BLOCK_SIZE);
00534         ctx->leftover = 0;
00535     }
00536 
00537     /* process full blocks */
00538     if (bytes >= POLY1305_BLOCK_SIZE) {
00539         size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1));
00540         poly1305_blocks(ctx, m, want);
00541         m += want;
00542         bytes -= want;
00543     }
00544 
00545     /* store leftover */
00546     if (bytes) {
00547         for (i = 0; i < bytes; i++)
00548             ctx->buffer[ctx->leftover + i] = m[i];
00549         ctx->leftover += bytes;
00550     }
00551     return 0;
00552 }
00553 #endif /* HAVE_POLY1305 */
00554