A fine-tuned implementation of the SHA256 hashing algorithm.

Dependents:   EntropySource Wallet_v1

Revision:
2:1991439ea6b8
Parent:
0:772b6de3a841
Child:
3:f19b10394f9c
--- a/SHA256.cpp	Mon Jun 20 00:25:41 2011 +0000
+++ b/SHA256.cpp	Mon Jun 20 09:45:02 2011 +0000
@@ -2,6 +2,7 @@
 // Based on:
 //   http://en.wikipedia.org/wiki/SHA-2
 //   http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf
+//   some of the OpenSSL optimizations
 
 #include "SHA256.h"
 
@@ -15,34 +16,8 @@
 
 inline unsigned int rotate_right(unsigned int x, int shift)
 {
-    return (x >> shift) | (x << (32 - shift));
-}
-
-const unsigned int k[64] = {
-    0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
-    0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
-    0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
-    0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
-    0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
-    0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
-    0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
-    0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
-};
-
-inline unsigned int s0(unsigned int x) {
-    return rotate_right(x, 7) ^ rotate_right(x, 18) ^ (x >> 3);
-}
-
-inline unsigned int s1(unsigned int x) {
-    return rotate_right(x, 17) ^ rotate_right(x, 19) ^ (x >> 10);
-}
-
-inline unsigned int s2(unsigned int x) {
-    return rotate_right(x, 2) ^ rotate_right(x, 13) ^ rotate_right(x, 22);
-}
-
-inline unsigned int s3(unsigned int x) {
-    return rotate_right(x, 6) ^ rotate_right(x, 11) ^ rotate_right(x, 25);
+    // return (x >> shift) | (x << (32 - shift));
+    return __ror(x, shift);
 }
 
 void SHA256::reset()
@@ -60,11 +35,37 @@
 
 void SHA256::append(const char* data, int size)
 {
+    /*
+    unsigned int* dataw = reinterpret_cast<unsigned int*>(data);
+    
+    while(length & 0x3) {
+        if(data == end) {
+            length += size;
+            return;
+        }
+        buffer[3 + (length & (~0x3)) - (length & 0x3)] = *data;
+        ++length;
+        ++data;
+    }
+    if(length % 64 == 0) {
+        process_chunk();
+    }
+    
+    // Process words
+    int num_words = (end - data) / 4;
+    int index = (length % 64) / 4;
+    unsigned int* data_words = reinterpret_cast<unsigned int*>(data);
+    while(num_words--)
+    {
+        if(index == 16) {
+            process_chunk();
+        }
+    }
+    */
+    
     const char* end = data + size;
     char* buffer = reinterpret_cast<char*>(w);
-    
-    // TODO: operate in words
-    
+
     int index = length % 64;
     while(data != end) {
         int word_index = index / 4;
@@ -77,6 +78,7 @@
             index = 0;
         }
     }
+    
     length += size;
 }
 
@@ -88,9 +90,9 @@
     int last_block = trailing / 4;
     unsigned int bit_in_block = 0x80 << (24 - (trailing % 4) * 8); 
     w[last_block] |= bit_in_block;
-    w[last_block] &= ~(bit_in_block - 1);
     
     // Set all other bits to zero
+    w[last_block] &= ~(bit_in_block - 1);
     for(int i = last_block + 1; i < 15; ++i)
         w[i] = 0;
     
@@ -111,13 +113,48 @@
        hash[i] = byte_swap(hash[i]);
 }
 
+#define s0(x) (rotate_right(x, 7) ^ rotate_right(x, 18) ^ (x >> 3))
+#define s1(x) (rotate_right(x, 17) ^ rotate_right(x, 19) ^ (x >> 10))
+#define s2(x) (rotate_right(x, 2) ^ rotate_right(x, 13) ^ rotate_right(x, 22))
+#define s3(x) (rotate_right(x, 6) ^ rotate_right(x, 11) ^ rotate_right(x, 25))
+#define maj(a,b,c) ((a & b) ^ (a & c) ^ (b & c))
+#define ch(a,b,c) ((a & b) ^ ((~a) & c))
+
+const unsigned int k[64] = {
+    0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
+    0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
+    0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
+    0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
+    0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
+    0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
+    0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
+    0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
+};
+
+#define ROUND(a,b,c,d,e,f,g,h,i) \
+    h += w[i];\
+    h += *K++;\
+    h += s3(e); \
+    h += ch(e, f, g); \
+    d += h;\
+    h += s2(a);\
+    h += maj(a, b, c);
+
+#define W(n) w[(n) & 0xf]
+
+#define ROUND2(a,b,c,d,e,f,g,h,i) \
+    W(i) += s0(W(i+1)) + W(i+9) + s1(W(i+14));\
+    h += W(i); \
+    h += *K++;\
+    h += s3(e);\
+    h += ch(e, f, g); \
+    d += h;\
+    h += s2(a);\
+    h += maj(a, b, c);
+
 // Process a 512 bit chunk stored in w[1...15]
 void SHA256::process_chunk()
 {
-    // Extend the chunk to 64 x 32 bit
-    for(int i = 16; i < 64; ++i)
-        w[i] = w[i - 16] + s0(w[i - 15]) + w[i - 7] + s1(w[i - 2]);
-    
     // Initialize using current hash
     unsigned int a = hash[0];
     unsigned int b = hash[1];
@@ -129,20 +166,42 @@
     unsigned int h = hash[7];
     
     // Main loop
-    for(int i = 0; i < 64; ++i) {
-        unsigned int maj = (a & b) ^ (a & c) ^ (b & c);
-        unsigned int ch = (e & f) ^ ((~e) & g);
-        unsigned int t1 = h + s3(e) + ch + k[i] + w[i];
-        unsigned int t2 = maj + s2(a);
-        h = g;
-        g = f;
-        f = e;
-        e = d + t1;
-        d = c;
-        c = b;
-        b = a;
-        a = t1 + t2;
-    }
+    const unsigned int* K = k;
+    const unsigned int* K_end = k + 64;
+    ROUND(a,b,c,d,e,f,g,h,0);
+    ROUND(h,a,b,c,d,e,f,g,1);
+    ROUND(g,h,a,b,c,d,e,f,2);
+    ROUND(f,g,h,a,b,c,d,e,3);
+    ROUND(e,f,g,h,a,b,c,d,4);
+    ROUND(d,e,f,g,h,a,b,c,5);
+    ROUND(c,d,e,f,g,h,a,b,6);
+    ROUND(b,c,d,e,f,g,h,a,7);
+    ROUND(a,b,c,d,e,f,g,h,8);
+    ROUND(h,a,b,c,d,e,f,g,9);
+    ROUND(g,h,a,b,c,d,e,f,10);
+    ROUND(f,g,h,a,b,c,d,e,11);
+    ROUND(e,f,g,h,a,b,c,d,12);
+    ROUND(d,e,f,g,h,a,b,c,13);
+    ROUND(c,d,e,f,g,h,a,b,14);
+    ROUND(b,c,d,e,f,g,h,a,15);
+    do {
+        ROUND2(a,b,c,d,e,f,g,h,0);
+        ROUND2(h,a,b,c,d,e,f,g,1);
+        ROUND2(g,h,a,b,c,d,e,f,2);
+        ROUND2(f,g,h,a,b,c,d,e,3);
+        ROUND2(e,f,g,h,a,b,c,d,4);
+        ROUND2(d,e,f,g,h,a,b,c,5);
+        ROUND2(c,d,e,f,g,h,a,b,6);
+        ROUND2(b,c,d,e,f,g,h,a,7);
+        ROUND2(a,b,c,d,e,f,g,h,8);
+        ROUND2(h,a,b,c,d,e,f,g,9);
+        ROUND2(g,h,a,b,c,d,e,f,10);
+        ROUND2(f,g,h,a,b,c,d,e,11);
+        ROUND2(e,f,g,h,a,b,c,d,12);
+        ROUND2(d,e,f,g,h,a,b,c,13);
+        ROUND2(c,d,e,f,g,h,a,b,14);
+        ROUND2(b,c,d,e,f,g,h,a,15);
+    } while(K != K_end);
     
     // Update hash
     hash[0] += a;