CurvasElipticas

Users » saranieves92 » Code » CurvasElipticas » Documentation
sara matheu / Mbed 2 deprecated CurvasElipticas
Embed: (wiki syntax)
Show/hide line numbers ecc.cpp Source File
00001 #include "ecc.h"
00002 
00003 #include <string.h>
00004 
00005 typedef unsigned int uint;
00006 
00007 #define Curve_P_1 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFD}
00008 #define Curve_P_2 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
00009 #define Curve_P_3 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF}
00010 #define Curve_P_4 {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
00011     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
00012 #define Curve_P_5 {0xFFFFFC2F, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
00013 
00014 #define Curve_B_1 {0x2CEE5ED3, 0xD824993C, 0x1079F43D, 0xE87579C1}
00015 #define Curve_B_2 {0xC146B9B1, 0xFEB8DEEC, 0x72243049, 0x0FA7E9AB, 0xE59C80E7, 0x64210519}
00016 #define Curve_B_3 {0x27D2604B, 0x3BCE3C3E, 0xCC53B0F6, 0x651D06B0, 0x769886BC, 0xB3EBBD55, 0xAA3A93E7, 0x5AC635D8}
00017 #define Curve_B_4 {0xD3EC2AEF, 0x2A85C8ED, 0x8A2ED19D, 0xC656398D, 0x5013875A, 0x0314088F, 0xFE814112, 0x181D9C6E, \
00018     0xE3F82D19, 0x988E056B, 0xE23EE7E4, 0xB3312FA7}
00019 #define Curve_B_5 {0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
00020 
00021 #define Curve_G_1 { \
00022     {0xA52C5B86, 0x0C28607C, 0x8B899B2D, 0x161FF752}, \
00023     {0xDDED7A83, 0xC02DA292, 0x5BAFEB13, 0xCF5AC839}}
00024 
00025 #define Curve_G_2 { \
00026     {0x82FF1012, 0xF4FF0AFD, 0x43A18800, 0x7CBF20EB, 0xB03090F6, 0x188DA80E}, \
00027     {0x1E794811, 0x73F977A1, 0x6B24CDD5, 0x631011ED, 0xFFC8DA78, 0x07192B95}}
00028     
00029 #define Curve_G_3 { \
00030     {0xD898C296, 0xF4A13945, 0x2DEB33A0, 0x77037D81, 0x63A440F2, 0xF8BCE6E5, 0xE12C4247, 0x6B17D1F2}, \
00031     {0x37BF51F5, 0xCBB64068, 0x6B315ECE, 0x2BCE3357, 0x7C0F9E16, 0x8EE7EB4A, 0xFE1A7F9B, 0x4FE342E2}}
00032 
00033 #define Curve_G_4 { \
00034     {0x72760AB7, 0x3A545E38, 0xBF55296C, 0x5502F25D, 0x82542A38, 0x59F741E0, 0x8BA79B98, 0x6E1D3B62, \
00035         0xF320AD74, 0x8EB1C71E, 0xBE8B0537, 0xAA87CA22}, \
00036     {0x90EA0E5F, 0x7A431D7C, 0x1D7E819D, 0x0A60B1CE, 0xB5F0B8C0, 0xE9DA3113, 0x289A147C, 0xF8F41DBD, \
00037         0x9292DC29, 0x5D9E98BF, 0x96262C6F, 0x3617DE4A}}
00038 
00039 #define Curve_G_5 { \
00040     {0x16F81798, 0x59F2815B, 0x2DCE28D9, 0x029BFCDB, 0xCE870B07, 0x55A06295, 0xF9DCBBAC, 0x79BE667E}, \
00041     {0xFB10D4B8, 0x9C47D08F, 0xA6855419, 0xFD17B448, 0x0E1108A8, 0x5DA4FBFC, 0x26A3C465, 0x483ADA77}}
00042 
00043 #define Curve_N_1 {0x9038A115, 0x75A30D1B, 0x00000000, 0xFFFFFFFE}
00044 #define Curve_N_2 {0xB4D22831, 0x146BC9B1, 0x99DEF836, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
00045 #define Curve_N_3 {0xFC632551, 0xF3B9CAC2, 0xA7179E84, 0xBCE6FAAD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF}
00046 #define Curve_N_4 {0xCCC52973, 0xECEC196A, 0x48B0A77A, 0x581A0DB2, 0xF4372DDF, 0xC7634D81, 0xFFFFFFFF, 0xFFFFFFFF, \
00047     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
00048 #define Curve_N_5 {0xD0364141, 0xBFD25E8C, 0xAF48A03B, 0xBAAEDCE6, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
00049 
00050 static uint32_t curve_p[NUM_ECC_DIGITS] = ECC_CONCAT(Curve_P_, ECC_CURVE);
00051 static uint32_t curve_b[NUM_ECC_DIGITS] = ECC_CONCAT(Curve_B_, ECC_CURVE);
00052 static EccPoint curve_G = ECC_CONCAT(Curve_G_, ECC_CURVE);
00053 static uint32_t curve_n[NUM_ECC_DIGITS] = ECC_CONCAT(Curve_N_, ECC_CURVE);
00054 
00055 static void vli_clear(uint32_t *p_vli)
00056 {
00057     uint i;
00058     for(i=0; i<NUM_ECC_DIGITS; ++i)
00059     {
00060         p_vli[i] = 0;
00061     }
00062 }
00063 
00064 /* Returns 1 if p_vli == 0, 0 otherwise. */
00065 static int vli_isZero(const uint32_t *p_vli)
00066 {
00067     uint i;
00068     for(i = 0; i < NUM_ECC_DIGITS; ++i)
00069     {
00070         if(p_vli[i])
00071         {
00072             return 0;
00073         }
00074     }
00075     return 1;
00076 }
00077 
00078 /* Returns nonzero if bit p_bit of p_vli is set. */
00079 static uint32_t vli_testBit(const uint32_t *p_vli, const unsigned int p_bit)
00080 {
00081     return (p_vli[p_bit/32] & (1 << (p_bit % 32)));
00082 }
00083 
00084 /* Counts the number of 32-bit "digits" in p_vli. */
00085 static uint vli_numDigits(const uint32_t *p_vli)
00086 {
00087     int i;
00088     /* Search from the end until we find a non-zero digit.
00089        We do it in reverse because we expect that most digits will be nonzero. */
00090     for(i = NUM_ECC_DIGITS - 1; i >= 0 && p_vli[i] == 0; --i)
00091     {
00092     }
00093 
00094     return (i + 1);
00095 }
00096 
00097 /* Counts the number of bits required for p_vli. */
00098 static uint vli_numBits(const uint32_t *p_vli)
00099 {
00100     uint i;
00101     uint32_t l_digit;
00102     
00103     uint l_numDigits = vli_numDigits(p_vli);
00104     if(l_numDigits == 0)
00105     {
00106         return 0;
00107     }
00108 
00109     l_digit = p_vli[l_numDigits - 1];
00110     for(i=0; l_digit; ++i)
00111     {
00112         l_digit >>= 1;
00113     }
00114     
00115     return ((l_numDigits - 1) * 32 + i);
00116 }
00117 
00118 /* Sets p_dest = p_src. */
00119 static void vli_set(uint32_t *p_dest, const uint32_t *p_src)
00120 {
00121     uint i;
00122     for(i=0; i<NUM_ECC_DIGITS; ++i)
00123     {
00124         p_dest[i] = p_src[i];
00125     }
00126 }
00127 
00128 /* Returns sign of p_left - p_right. */
00129 static int vli_cmp(const uint32_t *p_left, const uint32_t *p_right)
00130 {
00131     int i;
00132     for(i = NUM_ECC_DIGITS-1; i >= 0; --i)
00133     {
00134         if(p_left[i] > p_right[i])
00135         {
00136             return 1;
00137         }
00138         else if(p_left[i] < p_right[i])
00139         {
00140             return -1;
00141         }
00142     }
00143     return 0;
00144 }
00145 
00146 /* Computes p_result = p_in << c, returning carry. Can modify in place (if p_result == p_in). 0 < p_shift < 32. */
00147 static uint32_t vli_lshift(uint32_t *p_result, const uint32_t *p_in, const unsigned int p_shift)
00148 {
00149     uint32_t l_carry = 0;
00150     uint i;
00151     for(i = 0; i < NUM_ECC_DIGITS; ++i)
00152     {
00153         uint32_t l_temp = p_in[i];
00154         p_result[i] = (l_temp << p_shift) | l_carry;
00155         l_carry = l_temp >> (32 - p_shift);
00156     }
00157     
00158     return l_carry;
00159 }
00160 
00161 /* Computes p_vli = p_vli >> 1. */
00162 static void vli_rshift1(uint32_t *p_vli)
00163 {
00164     uint32_t *l_end = p_vli;
00165     uint32_t l_carry = 0;
00166     
00167     p_vli += NUM_ECC_DIGITS;
00168     while(p_vli-- > l_end)
00169     {
00170         uint32_t l_temp = *p_vli;
00171         *p_vli = (l_temp >> 1) | l_carry;
00172         l_carry = l_temp << 31;
00173     }
00174 }
00175 
00176 /* Computes p_result = p_left + p_right, returning carry. Can modify in place. */
00177 static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
00178 {
00179 #if (ECC_ASM == ecc_asm_thumb || ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm)
00180     uint32_t l_counter = NUM_ECC_DIGITS;
00181     uint32_t l_carry = 0; /* carry = 0 initially */
00182     uint32_t l_left;
00183     uint32_t l_right;
00184     
00185     asm volatile (
00186         ".syntax unified \n\t"
00187         "1: \n\t"
00188         "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
00189         "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
00190         "lsrs %[carry], #1 \n\t"          /* Set up carry flag (l_carry = 0 after this). */
00191         "adcs %[left], %[right] \n\t"     /* Add with carry. */
00192         "adcs %[carry], %[carry] \n\t"    /* Store carry bit in l_carry. */
00193         "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
00194         "subs %[ctr], #1 \n\t"            /* Decrement index. */
00195         "bne 1b \n\t"                     /* Loop until index == 0. */
00196     #if (ECC_ASM != ecc_asm_thumb2)
00197         ".syntax divided \n\t"
00198     #endif
00199     #if (ECC_ASM == ecc_asm_thumb)
00200         : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right), [ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
00201     #else
00202         : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right), [ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
00203     #endif
00204         :
00205         : "cc", "memory"
00206     );
00207     return l_carry;
00208     
00209 #else
00210 
00211     uint32_t l_carry = 0;
00212     uint i;
00213     for(i=0; i<NUM_ECC_DIGITS; ++i)
00214     {
00215         uint32_t l_sum = p_left[i] + p_right[i] + l_carry;
00216         if(l_sum != p_left[i])
00217         {
00218             l_carry = (l_sum < p_left[i]);
00219         }
00220         p_result[i] = l_sum;
00221     }
00222     return l_carry;
00223 #endif
00224 }
00225 
00226 /* Computes p_result = p_left - p_right, returning borrow. Can modify in place. */
00227 static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
00228 {
00229 #if (ECC_ASM == ecc_asm_thumb || ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm)
00230     uint32_t l_counter = NUM_ECC_DIGITS;
00231     uint32_t l_carry = 1; /* carry = 1 initially (means don't borrow) */
00232     uint32_t l_left;
00233     uint32_t l_right;
00234     
00235     asm volatile (
00236         ".syntax unified \n\t"
00237         "1: \n\t"
00238         "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
00239         "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
00240         "lsrs %[carry], #1 \n\t"          /* Set up carry flag (l_carry = 0 after this). */
00241         "sbcs %[left], %[right] \n\t"     /* Subtract with borrow. */
00242         "adcs %[carry], %[carry] \n\t"    /* Store carry bit in l_carry. */
00243         "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
00244         "subs %[ctr], #1 \n\t"            /* Decrement index. */
00245         "bne 1b \n\t"                     /* Loop until index == 0. */
00246     #if (ECC_ASM != ecc_asm_thumb2)
00247         ".syntax divided \n\t"
00248     #endif
00249     #if (ECC_ASM == ecc_asm_thumb)
00250         : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right), [ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right)
00251     #else
00252         : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right), [ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right)
00253     #endif
00254         :
00255         : "cc", "memory"
00256     );
00257     return !l_carry;
00258 
00259 #else
00260 
00261     uint32_t l_borrow = 0;
00262     uint i;
00263     for(i=0; i<NUM_ECC_DIGITS; ++i)
00264     {
00265         uint32_t l_diff = p_left[i] - p_right[i] - l_borrow;
00266         if(l_diff != p_left[i])
00267         {
00268             l_borrow = (l_diff > p_left[i]);
00269         }
00270         p_result[i] = l_diff;
00271     }
00272     return l_borrow;
00273 #endif
00274 }
00275 
00276 /* Computes p_result = p_left * p_right. */
00277 static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
00278 {
00279 #if (ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm)
00280     uint32_t c0 = 0;
00281     uint32_t c1 = 0;
00282     uint32_t c2 = 0;
00283     uint32_t k = 0;
00284     uint32_t i;
00285     uint32_t t0, t1;
00286     
00287     asm volatile (
00288         ".syntax unified \n\t"
00289         
00290         "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */
00291         "movs %[i], #0 \n\t" /* i = 0 */
00292         "b 3f \n\t"
00293         
00294         "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */
00295         "movs %[i], %[k] \n\t"      /* i = k */
00296         "subs %[i], %[eccdm1] \n\t" /* i = k - (NUM_ECC_DIGITS - 1) (times 4) */
00297         
00298         "3: \n\t" /* inner loop */
00299         "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
00300         
00301         "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = p_right[k-i] */
00302         "ldr %[t0], [%[left], %[i]] \n\t"   /* t0 = p_left[i] */
00303         
00304         "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
00305         
00306         "adds %[c0], %[t0] \n\t" /* add low word to c0 */
00307         "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */
00308         "adcs %[c2], #0 \n\t"    /* add carry to c2 */
00309 
00310         "adds %[i], #4 \n\t"     /* i += 4 */
00311         "cmp %[i], %[eccd] \n\t" /* i < NUM_ECC_DIGITS (times 4)? */
00312         "bge 4f \n\t" /* if not, exit the loop */
00313         "cmp %[i], %[k] \n\t"    /* i <= k? */
00314         "ble 3b \n\t" /* if so, continue looping */
00315         
00316         "4: \n\t" /* end inner loop */
00317         
00318         "str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
00319         "mov %[c0], %[c1] \n\t"     /* c0 = c1 */
00320         "mov %[c1], %[c2] \n\t"     /* c1 = c2 */
00321         "movs %[c2], #0 \n\t"       /* c2 = 0 */
00322         "adds %[k], #4 \n\t"        /* k += 4 */
00323         "cmp %[k], %[eccd] \n\t"    /* k < NUM_ECC_DIGITS (times 4) ? */
00324         "blt 1b \n\t" /* if not, loop back, start with i = 0 */
00325         "cmp %[k], %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */
00326         "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */
00327         /* end outer loop */
00328         
00329         "str %[c0], [%[result], %[k]] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */
00330     #if (ECC_ASM != ecc_asm_thumb2)
00331         ".syntax divided \n\t"
00332     #endif
00333         : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
00334         : [result] "r" (p_result), [left] "r" (p_left), [right] "r" (p_right),
00335           [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4)
00336         : "cc", "memory"
00337     );
00338     
00339 #elif (ECC_ASM == ecc_asm_thumb)
00340 
00341     register uint32_t *r0 asm("r0") = p_result;
00342     register uint32_t *r1 asm("r1") = p_left;
00343     register uint32_t *r2 asm("r2") = p_right;
00344     
00345     asm volatile (
00346         ".syntax unified \n\t"
00347         "movs r3, #0 \n\t" /* c0 = 0 */
00348         "movs r4, #0 \n\t" /* c1 = 0 */
00349         "movs r5, #0 \n\t" /* c2 = 0 */
00350         "movs r6, #0 \n\t" /* k = 0 */
00351         
00352         "push {r0} \n\t" /* keep p_result on the stack */
00353         
00354         "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */
00355         "movs r7, #0 \n\t" /* r7 = i = 0 */
00356         "b 3f \n\t"
00357         
00358         "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */
00359         "movs r7, r6 \n\t"        /* r7 = k */
00360         "subs r7, %[eccdm1] \n\t" /* r7 = i = k - (NUM_ECC_DIGITS - 1) (times 4) */
00361         
00362         "3: \n\t" /* inner loop */
00363         "push {r3, r4, r5, r6} \n\t" /* push things, r3 (c0) is at the top of stack. */
00364         "subs r0, r6, r7 \n\t"       /* r0 = k-i */
00365         
00366         "ldr r4, [r2, r0] \n\t" /* r4 = p_right[k-i] */
00367         "ldr r0, [r1, r7] \n\t" /* r0 = p_left[i] */
00368         
00369         "lsrs r3, r0, #16 \n\t" /* r3 = a1 */
00370         "uxth r0, r0 \n\t"      /* r0 = a0 */
00371         
00372         "lsrs r5, r4, #16 \n\t" /* r5 = b1 */
00373         "uxth r4, r4 \n\t"      /* r4 = b0 */
00374         
00375         "movs r6, r3 \n\t"     /* r6 = a1 */
00376         "muls r6, r5, r6 \n\t" /* r6 = a1*b1 */
00377         "muls r3, r4, r3 \n\t" /* r3 = b0*a1 */
00378         "muls r5, r0, r5 \n\t" /* r5 = a0*b1 */
00379         "muls r0, r4, r0 \n\t" /* r0 = a0*b0 */
00380         
00381         "movs r4, #0 \n\t"  /* r4 = 0 */
00382         "adds r3, r5 \n\t"  /* r3 = b0*a1 + a0*b1 */
00383         "adcs r4, r4 \n\t"  /* r4 = carry */
00384         "lsls r4, #16 \n\t" /* r4 = carry << 16 */
00385         "adds r6, r4 \n\t"  /* r6 = a1*b1 + carry */
00386         
00387         "lsls r4, r3, #16 \n\t" /* r4 = (b0*a1 + a0*b1) << 16 */
00388         "lsrs r3, #16 \n\t"     /* r3 = (b0*a1 + a0*b1) >> 16 */
00389         "adds r0, r4 \n\t"      /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
00390         "adcs r6, r3 \n\t"      /* r6 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
00391         
00392         "pop {r3, r4, r5} \n\t" /* r3 = c0, r4 = c1, r5 = c2 */
00393         "adds r3, r0 \n\t"      /* add low word to c0 */
00394         "adcs r4, r6 \n\t"      /* add high word to c1, including carry */
00395         "movs r0, #0 \n\t"      /* r0 = 0 (does not affect carry bit) */
00396         "adcs r5, r0 \n\t"      /* add carry to c2 */
00397         
00398         "pop {r6} \n\t" /* r6 = k */
00399 
00400         "adds r7, #4 \n\t"     /* i += 4 */
00401         "cmp r7, %[eccd] \n\t" /* i < NUM_ECC_DIGITS (times 4)? */
00402         "bge 4f \n\t" /* if not, exit the loop */
00403         "cmp r7, r6 \n\t"      /* i <= k? */
00404         "ble 3b \n\t" /* if so, continue looping */
00405         
00406         "4: \n\t" /* end inner loop */
00407         
00408         "ldr r0, [sp, #0] \n\t" /* r0 = p_result */
00409         
00410         "str r3, [r0, r6] \n\t"   /* p_result[k] = c0 */
00411         "mov r3, r4 \n\t"         /* c0 = c1 */
00412         "mov r4, r5 \n\t"         /* c1 = c2 */
00413         "movs r5, #0 \n\t"        /* c2 = 0 */
00414         "adds r6, #4 \n\t"        /* k += 4 */
00415         "cmp r6, %[eccd] \n\t"    /* k < NUM_ECC_DIGITS (times 4) ? */
00416         "blt 1b \n\t" /* if not, loop back, start with i = 0 */
00417         "cmp r6, %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */
00418         "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */
00419         /* end outer loop */
00420         
00421         "str r3, [r0, r6] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */
00422         "pop {r0} \n\t"         /* pop p_result off the stack */
00423         
00424         ".syntax divided \n\t"
00425         : 
00426         : [r0] "l" (r0), [r1] "l" (r1), [r2] "l" (r2), [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4)
00427         : "r3", "r4", "r5", "r6", "r7", "cc", "memory"
00428     );
00429 
00430 #else
00431 
00432     uint64_t r01 = 0;
00433     uint32_t r2 = 0;
00434     
00435     uint i, k;
00436     
00437     /* Compute each digit of p_result in sequence, maintaining the carries. */
00438     for(k=0; k < NUM_ECC_DIGITS*2 - 1; ++k)
00439     {
00440         uint l_min = (k < NUM_ECC_DIGITS ? 0 : (k + 1) - NUM_ECC_DIGITS);
00441         for(i=l_min; i<=k && i<NUM_ECC_DIGITS; ++i)
00442         {
00443             uint64_t l_product = (uint64_t)p_left[i] * p_right[k-i];
00444             r01 += l_product;
00445             r2 += (r01 < l_product);
00446         }
00447         p_result[k] = (uint32_t)r01;
00448         r01 = (r01 >> 32) | (((uint64_t)r2) << 32);
00449         r2 = 0;
00450     }
00451     
00452     p_result[NUM_ECC_DIGITS*2 - 1] = (uint32_t)r01;
00453 #endif
00454 }
00455 
00456 /* Computes p_result = (p_left + p_right) % p_mod.
00457    Assumes that p_left < p_mod and p_right < p_mod, p_result != p_mod. */
00458 static void vli_modAdd(uint32_t *p_result, const uint32_t *p_left,
00459     const uint32_t *p_right, const uint32_t *p_mod)
00460 {
00461     uint32_t l_carry = vli_add(p_result, p_left, p_right);
00462     if(l_carry || vli_cmp(p_result, p_mod) >= 0)
00463     { /* p_result > p_mod (p_result = p_mod + remainder), so subtract p_mod to get remainder. */
00464         vli_sub(p_result, p_result, p_mod);
00465     }
00466 }
00467 
00468 /* Computes p_result = (p_left - p_right) % p_mod.
00469    Assumes that p_left < p_mod and p_right < p_mod, p_result != p_mod. */
00470 static void vli_modSub(uint32_t *p_result, const uint32_t *p_left,
00471     const uint32_t *p_right, const uint32_t *p_mod)
00472 {
00473     uint32_t l_borrow = vli_sub(p_result, p_left, p_right);
00474     if(l_borrow)
00475     { /* In this case, p_result == -diff == (max int) - diff.
00476          Since -x % d == d - x, we can get the correct result from p_result + p_mod (with overflow). */
00477         vli_add(p_result, p_result, p_mod);
00478     }
00479 }
00480 
00481 #if (ECC_CURVE == secp128r1)
00482 
00483 /* Computes p_result = p_product % curve_p.
00484    See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */
00485 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product)
00486 {
00487     uint32_t l_tmp[NUM_ECC_DIGITS];
00488     int l_carry;
00489     
00490     vli_set(p_result, p_product);
00491     
00492     l_tmp[0] = p_product[4];
00493     l_tmp[1] = p_product[5];
00494     l_tmp[2] = p_product[6];
00495     l_tmp[3] = (p_product[7] & 0x00000001) | (p_product[4] << 1);
00496     l_carry = vli_add(p_result, p_result, l_tmp);
00497     
00498     l_tmp[0] = (p_product[4] >> 31) | (p_product[5] << 1);
00499     l_tmp[1] = (p_product[5] >> 31) | (p_product[6] << 1);
00500     l_tmp[2] = (p_product[6] >> 31) | (p_product[7] << 1);
00501     l_tmp[3] = (p_product[7] >> 31) | ((p_product[4] & 0x80000000) >> 30) | (p_product[5] << 2);
00502     l_carry += vli_add(p_result, p_result, l_tmp);
00503     
00504     l_tmp[0] = (p_product[5] >> 30) | (p_product[6] << 2);
00505     l_tmp[1] = (p_product[6] >> 30) | (p_product[7] << 2);
00506     l_tmp[2] = (p_product[7] >> 30);
00507     l_tmp[3] = ((p_product[5] & 0xC0000000) >> 29) | (p_product[6] << 3);
00508     l_carry += vli_add(p_result, p_result, l_tmp);
00509     
00510     l_tmp[0] = (p_product[6] >> 29) | (p_product[7] << 3);
00511     l_tmp[1] = (p_product[7] >> 29);
00512     l_tmp[2] = 0;
00513     l_tmp[3] = ((p_product[6] & 0xE0000000) >> 28) | (p_product[7] << 4);
00514     l_carry += vli_add(p_result, p_result, l_tmp);
00515     
00516     l_tmp[0] = (p_product[7] >> 28);
00517     l_tmp[1] = 0;
00518     l_tmp[2] = 0;
00519     l_tmp[3] = (p_product[7] & 0xFFFFFFFE);
00520     l_carry += vli_add(p_result, p_result, l_tmp);
00521     
00522     l_tmp[0] = 0;
00523     l_tmp[1] = 0;
00524     l_tmp[2] = 0;
00525     l_tmp[3] = ((p_product[7] & 0xF0000000) >> 27);
00526     l_carry += vli_add(p_result, p_result, l_tmp);
00527     
00528     while(l_carry || vli_cmp(curve_p, p_result) != 1)
00529     {
00530         l_carry -= vli_sub(p_result, p_result, curve_p);
00531     }
00532 }
00533 
00534 #elif (ECC_CURVE == secp192r1)
00535 
00536 /* Computes p_result = p_product % curve_p.
00537    See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */
00538 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product)
00539 {
00540     uint32_t l_tmp[NUM_ECC_DIGITS];
00541     int l_carry;
00542     
00543     vli_set(p_result, p_product);
00544     
00545     vli_set(l_tmp, &p_product[6]);
00546     l_carry = vli_add(p_result, p_result, l_tmp);
00547     
00548     l_tmp[0] = l_tmp[1] = 0;
00549     l_tmp[2] = p_product[6];
00550     l_tmp[3] = p_product[7];
00551     l_tmp[4] = p_product[8];
00552     l_tmp[5] = p_product[9];
00553     l_carry += vli_add(p_result, p_result, l_tmp);
00554     
00555     l_tmp[0] = l_tmp[2] = p_product[10];
00556     l_tmp[1] = l_tmp[3] = p_product[11];
00557     l_tmp[4] = l_tmp[5] = 0;
00558     l_carry += vli_add(p_result, p_result, l_tmp);
00559     
00560     while(l_carry || vli_cmp(curve_p, p_result) != 1)
00561     {
00562         l_carry -= vli_sub(p_result, p_result, curve_p);
00563     }
00564 }
00565 
00566 #elif (ECC_CURVE == secp256r1)
00567 
00568 /* Computes p_result = p_product % curve_p
00569    from http://www.nsa.gov/ia/_files/nist-routines.pdf */
00570 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product)
00571 {
00572     uint32_t l_tmp[NUM_ECC_DIGITS];
00573     int l_carry;
00574     
00575     /* t */
00576     vli_set(p_result, p_product);
00577     
00578     /* s1 */
00579     l_tmp[0] = l_tmp[1] = l_tmp[2] = 0;
00580     l_tmp[3] = p_product[11];
00581     l_tmp[4] = p_product[12];
00582     l_tmp[5] = p_product[13];
00583     l_tmp[6] = p_product[14];
00584     l_tmp[7] = p_product[15];
00585     l_carry = vli_lshift(l_tmp, l_tmp, 1);
00586     l_carry += vli_add(p_result, p_result, l_tmp);
00587     
00588     /* s2 */
00589     l_tmp[3] = p_product[12];
00590     l_tmp[4] = p_product[13];
00591     l_tmp[5] = p_product[14];
00592     l_tmp[6] = p_product[15];
00593     l_tmp[7] = 0;
00594     l_carry += vli_lshift(l_tmp, l_tmp, 1);
00595     l_carry += vli_add(p_result, p_result, l_tmp);
00596     
00597     /* s3 */
00598     l_tmp[0] = p_product[8];
00599     l_tmp[1] = p_product[9];
00600     l_tmp[2] = p_product[10];
00601     l_tmp[3] = l_tmp[4] = l_tmp[5] = 0;
00602     l_tmp[6] = p_product[14];
00603     l_tmp[7] = p_product[15];
00604     l_carry += vli_add(p_result, p_result, l_tmp);
00605     
00606     /* s4 */
00607     l_tmp[0] = p_product[9];
00608     l_tmp[1] = p_product[10];
00609     l_tmp[2] = p_product[11];
00610     l_tmp[3] = p_product[13];
00611     l_tmp[4] = p_product[14];
00612     l_tmp[5] = p_product[15];
00613     l_tmp[6] = p_product[13];
00614     l_tmp[7] = p_product[8];
00615     l_carry += vli_add(p_result, p_result, l_tmp);
00616     
00617     /* d1 */
00618     l_tmp[0] = p_product[11];
00619     l_tmp[1] = p_product[12];
00620     l_tmp[2] = p_product[13];
00621     l_tmp[3] = l_tmp[4] = l_tmp[5] = 0;
00622     l_tmp[6] = p_product[8];
00623     l_tmp[7] = p_product[10];
00624     l_carry -= vli_sub(p_result, p_result, l_tmp);
00625     
00626     /* d2 */
00627     l_tmp[0] = p_product[12];
00628     l_tmp[1] = p_product[13];
00629     l_tmp[2] = p_product[14];
00630     l_tmp[3] = p_product[15];
00631     l_tmp[4] = l_tmp[5] = 0;
00632     l_tmp[6] = p_product[9];
00633     l_tmp[7] = p_product[11];
00634     l_carry -= vli_sub(p_result, p_result, l_tmp);
00635     
00636     /* d3 */
00637     l_tmp[0] = p_product[13];
00638     l_tmp[1] = p_product[14];
00639     l_tmp[2] = p_product[15];
00640     l_tmp[3] = p_product[8];
00641     l_tmp[4] = p_product[9];
00642     l_tmp[5] = p_product[10];
00643     l_tmp[6] = 0;
00644     l_tmp[7] = p_product[12];
00645     l_carry -= vli_sub(p_result, p_result, l_tmp);
00646     
00647     /* d4 */
00648     l_tmp[0] = p_product[14];
00649     l_tmp[1] = p_product[15];
00650     l_tmp[2] = 0;
00651     l_tmp[3] = p_product[9];
00652     l_tmp[4] = p_product[10];
00653     l_tmp[5] = p_product[11];
00654     l_tmp[6] = 0;
00655     l_tmp[7] = p_product[13];
00656     l_carry -= vli_sub(p_result, p_result, l_tmp);
00657     
00658     if(l_carry < 0)
00659     {
00660         do
00661         {
00662             l_carry += vli_add(p_result, p_result, curve_p);
00663         } while(l_carry < 0);
00664     }
00665     else
00666     {
00667         while(l_carry || vli_cmp(curve_p, p_result) != 1)
00668         {
00669             l_carry -= vli_sub(p_result, p_result, curve_p);
00670         }
00671     }
00672 }
00673 
00674 #elif (ECC_CURVE == secp384r1)
00675 
00676 static void omega_mult(uint32_t *p_result, const uint32_t *p_right)
00677 {
00678     /* Multiply by (2^128 + 2^96 - 2^32 + 1). */
00679     vli_set(p_result, p_right); /* 1 */
00680     p_result[3 + NUM_ECC_DIGITS] = vli_add(p_result + 3, p_result + 3, p_right); /* 2^96 + 1 */
00681     p_result[4 + NUM_ECC_DIGITS] = vli_add(p_result + 4, p_result + 4, p_right); /* 2^128 + 2^96 + 1 */
00682     if(vli_sub(p_result + 1, p_result + 1, p_right)) /* 2^128 + 2^96 - 2^32 + 1 */
00683     { /* Propagate borrow if necessary. */
00684         uint i;
00685         for(i = 1 + NUM_ECC_DIGITS; ; ++i)
00686         {
00687             --p_result[i];
00688             if(p_result[i] != 0xffffffff)
00689             {
00690                 break;
00691             }
00692         }
00693     }
00694 }
00695 
00696 /* Computes p_result = p_product % curve_p
00697     see PDF "Comparing Elliptic Curve Cryptography and RSA on 8-bit CPUs"
00698     section "Curve-Specific Optimizations" */
00699 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product)
00700 {
00701     uint32_t l_tmp[2*NUM_ECC_DIGITS];
00702      
00703     while(!vli_isZero(p_product + NUM_ECC_DIGITS)) /* While c1 != 0 */
00704     {
00705         uint32_t l_carry = 0;
00706         uint i;
00707         
00708         vli_clear(l_tmp);
00709         vli_clear(l_tmp + NUM_ECC_DIGITS);
00710         omega_mult(l_tmp, p_product + NUM_ECC_DIGITS); /* tmp = w * c1 */
00711         vli_clear(p_product + NUM_ECC_DIGITS); /* p = c0 */
00712         
00713         /* (c1, c0) = c0 + w * c1 */
00714         for(i=0; i<NUM_ECC_DIGITS+5; ++i)
00715         {
00716             uint32_t l_sum = p_product[i] + l_tmp[i] + l_carry;
00717             if(l_sum != p_product[i])
00718             {
00719                 l_carry = (l_sum < p_product[i]);
00720             }
00721             p_product[i] = l_sum;
00722         }
00723     }
00724     
00725     while(vli_cmp(p_product, curve_p) > 0)
00726     {
00727         vli_sub(p_product, p_product, curve_p);
00728     }
00729     vli_set(p_result, p_product);
00730 }
00731 
00732 #elif (ECC_CURVE == secp256k1)
00733 
00734 static void omega_mult(uint32_t *p_result, const uint32_t *p_right)
00735 {
00736     /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
00737     uint64_t l_mult = 0x3D1; /* everything except 2^32 */
00738     uint32_t l_carry = 0;
00739     uint i;
00740     for(i=0; i<NUM_ECC_DIGITS; ++i)
00741     {
00742         uint64_t p = l_mult * p_right[i] + l_carry;
00743         p_result[i] = (p & 0xffffffff);
00744         l_carry = p >> 32;
00745     }
00746     p_result[NUM_ECC_DIGITS] = l_carry;
00747     
00748     p_result[1 + NUM_ECC_DIGITS] = vli_add(p_result + 1, p_result + 1, p_right); /* add the 2^32 multiple */
00749 }
00750 
00751 /* Computes p_result = p_product % curve_p  */
00752 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product)
00753 {
00754     uint32_t l_tmp[2*NUM_ECC_DIGITS];
00755      
00756     while(!vli_isZero(p_product + NUM_ECC_DIGITS)) /* While c1 != 0 */
00757     {
00758         uint32_t l_carry = 0;
00759         uint i;
00760         
00761         vli_clear(l_tmp);
00762         vli_clear(l_tmp + NUM_ECC_DIGITS);
00763         omega_mult(l_tmp, p_product + NUM_ECC_DIGITS); /* tmp = w * c1 */
00764         vli_clear(p_product + NUM_ECC_DIGITS); /* p = c0 */
00765         
00766         /* (c1, c0) = c0 + w * c1 */
00767         for(i=0; i<NUM_ECC_DIGITS+2; ++i)
00768         {
00769             uint32_t l_sum = p_product[i] + l_tmp[i] + l_carry;
00770             if(l_sum != p_product[i])
00771             {
00772                 l_carry = (l_sum < p_product[i]);
00773             }
00774             p_product[i] = l_sum;
00775         }
00776     }
00777     
00778     while(vli_cmp(p_product, curve_p) > 0)
00779     {
00780         vli_sub(p_product, p_product, curve_p);
00781     }
00782     vli_set(p_result, p_product);
00783 }
00784 
00785 #endif
00786 
00787 /* Computes p_result = (p_left * p_right) % curve_p. */
00788 static void vli_modMult_fast(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right)
00789 {
00790    uint32_t l_product[2 * NUM_ECC_DIGITS];
00791    vli_mult(l_product, p_left, p_right);
00792    vli_mmod_fast(p_result, l_product);
00793 }
00794 
00795 #if ECC_SQUARE_FUNC
00796 
00797 /* Computes p_result = p_left^2. */
00798 static void vli_square(uint32_t *p_result, const uint32_t *p_left)
00799 {
00800 #if (ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm)
00801     uint32_t c0 = 0;
00802     uint32_t c1 = 0;
00803     uint32_t c2 = 0;
00804     uint32_t k = 0;
00805     uint32_t i, tt;
00806     uint32_t t0, t1;
00807     
00808     asm volatile (
00809         ".syntax unified \n\t"
00810         
00811         "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */
00812         "movs %[i], #0 \n\t" /* i = 0 */
00813         "b 3f \n\t"
00814         
00815         "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */
00816         "movs %[i], %[k] \n\t"      /* i = k */
00817         "subs %[i], %[eccdm1] \n\t" /* i = k - (NUM_ECC_DIGITS - 1) (times 4) */
00818         
00819         "3: \n\t" /* inner loop */
00820         "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
00821         
00822         "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = p_left[k-i] */
00823         "ldr %[t0], [%[left], %[i]] \n\t"  /* t0 = p_left[i] */
00824         
00825         "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */
00826         
00827         "cmp %[i], %[tt] \n\t"   /* (i < k-i) ? */
00828         "bge 4f \n\t" /* if i >= k-i, skip */
00829         "lsls %[t1], #1 \n\t"    /* high word << 1 */
00830         "adc %[c2], #0 \n\t"     /* add carry bit to c2 */
00831         "lsls %[t0], #1 \n\t"       /* low word << 1 */
00832         "adc %[t1], #0 \n\t"     /* add carry bit to high word */
00833         
00834         "4: \n\t"
00835 
00836         "adds %[c0], %[t0] \n\t" /* add low word to c0 */
00837         "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */
00838         "adc %[c2], #0 \n\t"     /* add carry to c2 */
00839         
00840         "adds %[i], #4 \n\t"          /* i += 4 */
00841         "cmp %[i], %[k] \n\t"         /* i <= k? */
00842         "bge 5f \n\t" /* if not, exit the loop */
00843         "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
00844         "cmp %[i], %[tt] \n\t"        /* i <= k-i? */
00845         "ble 3b \n\t" /* if so, continue looping */
00846         
00847         "5: \n\t" /* end inner loop */
00848         
00849         "str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */
00850         "mov %[c0], %[c1] \n\t"     /* c0 = c1 */
00851         "mov %[c1], %[c2] \n\t"     /* c1 = c2 */
00852         "movs %[c2], #0 \n\t"       /* c2 = 0 */
00853         "adds %[k], #4 \n\t"        /* k += 4 */
00854         "cmp %[k], %[eccd] \n\t"    /* k < NUM_ECC_DIGITS (times 4) ? */
00855         "blt 1b \n\t" /* if not, loop back, start with i = 0 */
00856         "cmp %[k], %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */
00857         "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */
00858         /* end outer loop */
00859         
00860         "str %[c0], [%[result], %[k]] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */
00861     #if (ECC_ASM != ecc_asm_thumb2)
00862         ".syntax divided \n\t"
00863     #endif
00864         : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
00865         : [result] "r" (p_result), [left] "r" (p_left),
00866           [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4)
00867         : "cc", "memory"
00868     );
00869     
00870 #elif (ECC_ASM == ecc_asm_thumb)
00871 
00872     register uint32_t *r0 asm("r0") = p_result;
00873     register uint32_t *r1 asm("r1") = p_left;
00874     
00875     asm volatile (
00876         ".syntax unified \n\t"
00877         "movs r2, #0 \n\t" /* c0 = 0 */
00878         "movs r3, #0 \n\t" /* c1 = 0 */
00879         "movs r4, #0 \n\t" /* c2 = 0 */
00880         "movs r5, #0 \n\t" /* k = 0 */
00881         
00882         "push {r0} \n\t" /* keep p_result on the stack */
00883         
00884         "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */
00885         "movs r6, #0 \n\t" /* r6 = i = 0 */
00886         "b 3f \n\t"
00887         
00888         "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */
00889         "movs r6, r5 \n\t"        /* r6 = k */
00890         "subs r6, %[eccdm1] \n\t" /* r6 = i = k - (NUM_ECC_DIGITS - 1) (times 4) */
00891         
00892         "3: \n\t" /* inner loop */
00893         "push {r2, r3, r4, r5} \n\t" /* push things, r2 (c0) is at the top of stack. */
00894         "subs r7, r5, r6 \n\t"       /* r7 = k-i */
00895         
00896         "ldr r3, [r1, r7] \n\t" /* r3 = p_left[k-i] */
00897         "ldr r0, [r1, r6] \n\t" /* r0 = p_left[i] */
00898         
00899         "lsrs r2, r0, #16 \n\t" /* r2 = a1 */
00900         "uxth r0, r0 \n\t"      /* r0 = a0 */
00901         
00902         "lsrs r4, r3, #16 \n\t" /* r4 = b1 */
00903         "uxth r3, r3 \n\t"      /* r3 = b0 */
00904         
00905         "movs r5, r2 \n\t"     /* r5 = a1 */
00906         "muls r5, r4, r5 \n\t" /* r5 = a1*b1 */
00907         "muls r2, r3, r2 \n\t" /* r2 = b0*a1 */
00908         "muls r4, r0, r4 \n\t" /* r4 = a0*b1 */
00909         "muls r0, r3, r0 \n\t" /* r0 = a0*b0 */
00910         
00911         "movs r3, #0 \n\t"  /* r3 = 0 */
00912         "adds r2, r4 \n\t"  /* r2 = b0*a1 + a0*b1 */
00913         "adcs r3, r3 \n\t"  /* r3 = carry */
00914         "lsls r3, #16 \n\t" /* r3 = carry << 16 */
00915         "adds r5, r3 \n\t"  /* r5 = a1*b1 + carry */
00916         
00917         "lsls r3, r2, #16 \n\t" /* r3 = (b0*a1 + a0*b1) << 16 */
00918         "lsrs r2, #16 \n\t"     /* r2 = (b0*a1 + a0*b1) >> 16 */
00919         "adds r0, r3 \n\t"      /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */
00920         "adcs r5, r2 \n\t"      /* r5 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */
00921     
00922         "movs r3, #0 \n\t"  /* r3 = 0 */
00923         "cmp r6, r7 \n\t"   /* (i < k-i) ? */
00924         "mov r7, r3 \n\t"   /* r7 = 0 (does not affect condition)*/
00925         "bge 4f \n\t" /* if i >= k-i, skip */
00926         "lsls r5, #1 \n\t"  /* high word << 1 */
00927         "adcs r7, r3 \n\t"  /* r7 = carry bit for c2 */
00928         "lsls r0, #1 \n\t"  /* low word << 1 */
00929         "adcs r5, r3 \n\t"  /* add carry from shift to high word */
00930         
00931         "4: \n\t"
00932         "pop {r2, r3, r4} \n\t" /* r2 = c0, r3 = c1, r4 = c2 */
00933         "adds r2, r0 \n\t"      /* add low word to c0 */
00934         "adcs r3, r5 \n\t"      /* add high word to c1, including carry */
00935         "movs r0, #0 \n\t"      /* r0 = 0 (does not affect carry bit) */
00936         "adcs r4, r0 \n\t"      /* add carry to c2 */
00937         "adds r4, r7 \n\t"      /* add carry from doubling (if any) */
00938         
00939         "pop {r5} \n\t" /* r5 = k */
00940         
00941         "adds r6, #4 \n\t"     /* i += 4 */
00942         "cmp r6, r5 \n\t"      /* i <= k? */
00943         "bge 5f \n\t" /* if not, exit the loop */
00944         "subs r7, r5, r6 \n\t" /* r7 = k-i */
00945         "cmp r6, r7 \n\t"      /* i <= k-i? */
00946         "ble 3b \n\t" /* if so, continue looping */
00947         
00948         "5: \n\t" /* end inner loop */
00949         
00950         "ldr r0, [sp, #0] \n\t" /* r0 = p_result */
00951         
00952         "str r2, [r0, r5] \n\t"   /* p_result[k] = c0 */
00953         "mov r2, r3 \n\t"         /* c0 = c1 */
00954         "mov r3, r4 \n\t"         /* c1 = c2 */
00955         "movs r4, #0 \n\t"        /* c2 = 0 */
00956         "adds r5, #4 \n\t"        /* k += 4 */
00957         "cmp r5, %[eccd] \n\t"    /* k < NUM_ECC_DIGITS (times 4) ? */
00958         "blt 1b \n\t" /* if not, loop back, start with i = 0 */
00959         "cmp r5, %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */
00960         "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */
00961         /* end outer loop */
00962         
00963         "str r2, [r0, r5] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */
00964         "pop {r0} \n\t"         /* pop p_result off the stack */
00965 
00966         ".syntax divided \n\t"
00967         : [r0] "+l" (r0), [r1] "+l" (r1)
00968         : [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4)
00969         : "r2", "r3", "r4", "r5", "r6", "r7", "cc", "memory"
00970     );
00971 
00972 #else
00973 
00974     uint64_t r01 = 0;
00975     uint32_t r2 = 0;
00976     
00977     uint i, k;
00978     for(k=0; k < NUM_ECC_DIGITS*2 - 1; ++k)
00979     {
00980         uint l_min = (k < NUM_ECC_DIGITS ? 0 : (k + 1) - NUM_ECC_DIGITS);
00981         for(i=l_min; i<=k && i<=k-i; ++i)
00982         {
00983             uint64_t l_product = (uint64_t)p_left[i] * p_left[k-i];
00984             if(i < k-i)
00985             {
00986                 r2 += l_product >> 63;
00987                 l_product *= 2;
00988             }
00989             r01 += l_product;
00990             r2 += (r01 < l_product);
00991         }
00992         p_result[k] = (uint32_t)r01;
00993         r01 = (r01 >> 32) | (((uint64_t)r2) << 32);
00994         r2 = 0;
00995     }
00996     
00997     p_result[NUM_ECC_DIGITS*2 - 1] = (uint32_t)r01;
00998 #endif
00999 }
01000 
01001 /* Computes p_result = p_left^2 % curve_p. */
01002 static void vli_modSquare_fast(uint32_t *p_result, const uint32_t *p_left)
01003 {
01004     uint32_t l_product[2 * NUM_ECC_DIGITS];
01005     vli_square(l_product, p_left);
01006     vli_mmod_fast(p_result, l_product);
01007 }
01008 
01009 #else /* ECC_SQUARE_FUNC */
01010 
01011 #define vli_square(result, left, size) vli_mult((result), (left), (left), (size))
01012 #define vli_modSquare_fast(result, left) vli_modMult_fast((result), (left), (left))
01013     
01014 #endif /* ECC_SQUARE_FUNC */
01015 
01016 #define EVEN(vli) (!(vli[0] & 1))
01017 /* Computes p_result = (1 / p_input) % p_mod. All VLIs are the same size.
01018    See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
01019    https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf */
01020 static void vli_modInv(uint32_t *p_result, const uint32_t *p_input, const uint32_t *p_mod)
01021 {
01022     uint32_t a[NUM_ECC_DIGITS], b[NUM_ECC_DIGITS], u[NUM_ECC_DIGITS], v[NUM_ECC_DIGITS];
01023     uint32_t l_carry;
01024     
01025     vli_set(a, p_input);
01026     vli_set(b, p_mod);
01027     vli_clear(u);
01028     u[0] = 1;
01029     vli_clear(v);
01030     
01031     int l_cmpResult;
01032     while((l_cmpResult = vli_cmp(a, b)) != 0)
01033     {
01034         l_carry = 0;
01035         if(EVEN(a))
01036         {
01037             vli_rshift1(a);
01038             if(!EVEN(u))
01039             {
01040                 l_carry = vli_add(u, u, p_mod);
01041             }
01042             vli_rshift1(u);
01043             if(l_carry)
01044             {
01045                 u[NUM_ECC_DIGITS-1] |= 0x80000000;
01046             }
01047         }
01048         else if(EVEN(b))
01049         {
01050             vli_rshift1(b);
01051             if(!EVEN(v))
01052             {
01053                 l_carry = vli_add(v, v, p_mod);
01054             }
01055             vli_rshift1(v);
01056             if(l_carry)
01057             {
01058                 v[NUM_ECC_DIGITS-1] |= 0x80000000;
01059             }
01060         }
01061         else if(l_cmpResult > 0)
01062         {
01063             vli_sub(a, a, b);
01064             vli_rshift1(a);
01065             if(vli_cmp(u, v) < 0)
01066             {
01067                 vli_add(u, u, p_mod);
01068             }
01069             vli_sub(u, u, v);
01070             if(!EVEN(u))
01071             {
01072                 l_carry = vli_add(u, u, p_mod);
01073             }
01074             vli_rshift1(u);
01075             if(l_carry)
01076             {
01077                 u[NUM_ECC_DIGITS-1] |= 0x80000000;
01078             }
01079         }
01080         else
01081         {
01082             vli_sub(b, b, a);
01083             vli_rshift1(b);
01084             if(vli_cmp(v, u) < 0)
01085             {
01086                 vli_add(v, v, p_mod);
01087             }
01088             vli_sub(v, v, u);
01089             if(!EVEN(v))
01090             {
01091                 l_carry = vli_add(v, v, p_mod);
01092             }
01093             vli_rshift1(v);
01094             if(l_carry)
01095             {
01096                 v[NUM_ECC_DIGITS-1] |= 0x80000000;
01097             }
01098         }
01099     }
01100     
01101     vli_set(p_result, u);
01102 }
01103 
01104 /* ------ Point operations ------ */
01105 
01106 /* Returns 1 if p_point is the point at infinity, 0 otherwise. */
01107 static int EccPoint_isZero(const EccPoint *p_point)
01108 {
01109     return (vli_isZero(p_point->x) && vli_isZero(p_point->y));
01110 }
01111 
01112 /* Point multiplication algorithm using Montgomery's ladder with co-Z coordinates.
01113 From http://eprint.iacr.org/2011/338.pdf
01114 */
01115 
01116 /* Double in place */
01117 #if (ECC_CURVE == secp256k1)
01118 static void EccPoint_double_jacobian(uint32_t *X1, uint32_t *Y1, uint32_t *Z1)
01119 {
01120     /* t1 = X, t2 = Y, t3 = Z */
01121     uint32_t t4[NUM_ECC_DIGITS];
01122     uint32_t t5[NUM_ECC_DIGITS];
01123     
01124     if(vli_isZero(Z1))
01125     {
01126         return;
01127     }
01128     
01129     vli_modSquare_fast(t5, Y1);   /* t5 = y1^2 */
01130     vli_modMult_fast(t4, X1, t5); /* t4 = x1*y1^2 = A */
01131     vli_modSquare_fast(X1, X1);   /* t1 = x1^2 */
01132     vli_modSquare_fast(t5, t5);   /* t5 = y1^4 */
01133     vli_modMult_fast(Z1, Y1, Z1); /* t3 = y1*z1 = z3 */
01134     
01135     vli_modAdd(Y1, X1, X1, curve_p); /* t2 = 2*x1^2 */
01136     vli_modAdd(Y1, Y1, X1, curve_p); /* t2 = 3*x1^2 */
01137     if(vli_testBit(Y1, 0))
01138     {
01139         uint32_t l_carry = vli_add(Y1, Y1, curve_p);
01140         vli_rshift1(Y1);
01141         Y1[NUM_ECC_DIGITS-1] |= l_carry << 31;
01142     }
01143     else
01144     {
01145         vli_rshift1(Y1);
01146     }
01147     /* t2 = 3/2*(x1^2) = B */
01148     
01149     vli_modSquare_fast(X1, Y1);   /* t1 = B^2 */
01150     vli_modSub(X1, X1, t4, curve_p); /* t1 = B^2 - A */
01151     vli_modSub(X1, X1, t4, curve_p); /* t1 = B^2 - 2A = x3 */
01152     
01153     vli_modSub(t4, t4, X1, curve_p); /* t4 = A - x3 */
01154     vli_modMult_fast(Y1, Y1, t4);    /* t2 = B * (A - x3) */
01155     vli_modSub(Y1, Y1, t5, curve_p); /* t2 = B * (A - x3) - y1^4 = y3 */
01156 }
01157 #else
01158 static void EccPoint_double_jacobian(uint32_t *X1, uint32_t *Y1, uint32_t *Z1)
01159 {
01160     /* t1 = X, t2 = Y, t3 = Z */
01161     uint32_t t4[NUM_ECC_DIGITS];
01162     uint32_t t5[NUM_ECC_DIGITS];
01163     
01164     if(vli_isZero(Z1))
01165     {
01166         return;
01167     }
01168     
01169     vli_modSquare_fast(t4, Y1);   /* t4 = y1^2 */
01170     vli_modMult_fast(t5, X1, t4); /* t5 = x1*y1^2 = A */
01171     vli_modSquare_fast(t4, t4);   /* t4 = y1^4 */
01172     vli_modMult_fast(Y1, Y1, Z1); /* t2 = y1*z1 = z3 */
01173     vli_modSquare_fast(Z1, Z1);   /* t3 = z1^2 */
01174     
01175     vli_modAdd(X1, X1, Z1, curve_p); /* t1 = x1 + z1^2 */
01176     vli_modAdd(Z1, Z1, Z1, curve_p); /* t3 = 2*z1^2 */
01177     vli_modSub(Z1, X1, Z1, curve_p); /* t3 = x1 - z1^2 */
01178     vli_modMult_fast(X1, X1, Z1);    /* t1 = x1^2 - z1^4 */
01179     
01180     vli_modAdd(Z1, X1, X1, curve_p); /* t3 = 2*(x1^2 - z1^4) */
01181     vli_modAdd(X1, X1, Z1, curve_p); /* t1 = 3*(x1^2 - z1^4) */
01182     if(vli_testBit(X1, 0))
01183     {
01184         uint32_t l_carry = vli_add(X1, X1, curve_p);
01185         vli_rshift1(X1);
01186         X1[NUM_ECC_DIGITS-1] |= l_carry << 31;
01187     }
01188     else
01189     {
01190         vli_rshift1(X1);
01191     }
01192     /* t1 = 3/2*(x1^2 - z1^4) = B */
01193     
01194     vli_modSquare_fast(Z1, X1);      /* t3 = B^2 */
01195     vli_modSub(Z1, Z1, t5, curve_p); /* t3 = B^2 - A */
01196     vli_modSub(Z1, Z1, t5, curve_p); /* t3 = B^2 - 2A = x3 */
01197     vli_modSub(t5, t5, Z1, curve_p); /* t5 = A - x3 */
01198     vli_modMult_fast(X1, X1, t5);    /* t1 = B * (A - x3) */
01199     vli_modSub(t4, X1, t4, curve_p); /* t4 = B * (A - x3) - y1^4 = y3 */
01200     
01201     vli_set(X1, Z1);
01202     vli_set(Z1, Y1);
01203     vli_set(Y1, t4);
01204 }
01205 #endif
01206 
01207 /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
01208 static void apply_z(uint32_t *X1, uint32_t *Y1, uint32_t *Z)
01209 {
01210     uint32_t t1[NUM_ECC_DIGITS];
01211 
01212     vli_modSquare_fast(t1, Z);    /* z^2 */
01213     vli_modMult_fast(X1, X1, t1); /* x1 * z^2 */
01214     vli_modMult_fast(t1, t1, Z);  /* z^3 */
01215     vli_modMult_fast(Y1, Y1, t1); /* y1 * z^3 */
01216 }
01217 
01218 /* P = (x1, y1) => 2P, (x2, y2) => P' */
01219 static void XYcZ_initial_double(uint32_t *X1, uint32_t *Y1, uint32_t *X2, uint32_t *Y2, const uint32_t *p_initialZ)
01220 {
01221     uint32_t z[NUM_ECC_DIGITS];
01222     
01223     vli_set(X2, X1);
01224     vli_set(Y2, Y1);
01225     
01226     vli_clear(z);
01227     z[0] = 1;
01228     if(p_initialZ)
01229     {
01230         vli_set(z, p_initialZ);
01231     }
01232     apply_z(X1, Y1, z);
01233     
01234     EccPoint_double_jacobian(X1, Y1, z);
01235     
01236     apply_z(X2, Y2, z);
01237 }
01238 
01239 /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
01240    Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
01241    or P => P', Q => P + Q
01242 */
01243 static void XYcZ_add(uint32_t *X1, uint32_t *Y1, uint32_t *X2, uint32_t *Y2)
01244 {
01245     /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
01246     uint32_t t5[NUM_ECC_DIGITS];
01247     
01248     vli_modSub(t5, X2, X1, curve_p); /* t5 = x2 - x1 */
01249     vli_modSquare_fast(t5, t5);      /* t5 = (x2 - x1)^2 = A */
01250     vli_modMult_fast(X1, X1, t5);    /* t1 = x1*A = B */
01251     vli_modMult_fast(X2, X2, t5);    /* t3 = x2*A = C */
01252     vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y2 - y1 */
01253     vli_modSquare_fast(t5, Y2);      /* t5 = (y2 - y1)^2 = D */
01254     
01255     vli_modSub(t5, t5, X1, curve_p); /* t5 = D - B */
01256     vli_modSub(t5, t5, X2, curve_p); /* t5 = D - B - C = x3 */
01257     vli_modSub(X2, X2, X1, curve_p); /* t3 = C - B */
01258     vli_modMult_fast(Y1, Y1, X2);    /* t2 = y1*(C - B) */
01259     vli_modSub(X2, X1, t5, curve_p); /* t3 = B - x3 */
01260     vli_modMult_fast(Y2, Y2, X2);    /* t4 = (y2 - y1)*(B - x3) */
01261     vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y3 */
01262     
01263     vli_set(X2, t5);
01264 }
01265 
01266 /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
01267    Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
01268    or P => P - Q, Q => P + Q
01269 */
01270 static void XYcZ_addC(uint32_t *X1, uint32_t *Y1, uint32_t *X2, uint32_t *Y2)
01271 {
01272     /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
01273     uint32_t t5[NUM_ECC_DIGITS];
01274     uint32_t t6[NUM_ECC_DIGITS];
01275     uint32_t t7[NUM_ECC_DIGITS];
01276     
01277     vli_modSub(t5, X2, X1, curve_p); /* t5 = x2 - x1 */
01278     vli_modSquare_fast(t5, t5);      /* t5 = (x2 - x1)^2 = A */
01279     vli_modMult_fast(X1, X1, t5);    /* t1 = x1*A = B */
01280     vli_modMult_fast(X2, X2, t5);    /* t3 = x2*A = C */
01281     vli_modAdd(t5, Y2, Y1, curve_p); /* t4 = y2 + y1 */
01282     vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y2 - y1 */
01283 
01284     vli_modSub(t6, X2, X1, curve_p); /* t6 = C - B */
01285     vli_modMult_fast(Y1, Y1, t6);    /* t2 = y1 * (C - B) */
01286     vli_modAdd(t6, X1, X2, curve_p); /* t6 = B + C */
01287     vli_modSquare_fast(X2, Y2);      /* t3 = (y2 - y1)^2 */
01288     vli_modSub(X2, X2, t6, curve_p); /* t3 = x3 */
01289     
01290     vli_modSub(t7, X1, X2, curve_p); /* t7 = B - x3 */
01291     vli_modMult_fast(Y2, Y2, t7);    /* t4 = (y2 - y1)*(B - x3) */
01292     vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y3 */
01293     
01294     vli_modSquare_fast(t7, t5);      /* t7 = (y2 + y1)^2 = F */
01295     vli_modSub(t7, t7, t6, curve_p); /* t7 = x3' */
01296     vli_modSub(t6, t7, X1, curve_p); /* t6 = x3' - B */
01297     vli_modMult_fast(t6, t6, t5);    /* t6 = (y2 + y1)*(x3' - B) */
01298     vli_modSub(Y1, t6, Y1, curve_p); /* t2 = y3' */
01299     
01300     vli_set(X1, t7);
01301 }
01302 
01303 static void EccPoint_mult(EccPoint *p_result, const EccPoint *p_point,
01304     const uint32_t *p_scalar, const uint32_t *p_initialZ)
01305 {
01306     /* R0 and R1 */
01307     uint32_t Rx[2][NUM_ECC_DIGITS];
01308     uint32_t Ry[2][NUM_ECC_DIGITS];
01309     uint32_t z[NUM_ECC_DIGITS];
01310     
01311     int i, nb;
01312     
01313     vli_set(Rx[1], p_point->x);
01314     vli_set(Ry[1], p_point->y);
01315 
01316     XYcZ_initial_double(Rx[1], Ry[1], Rx[0], Ry[0], p_initialZ);
01317 
01318     for(i = vli_numBits(p_scalar) - 2; i > 0; --i)
01319     {
01320         nb = !vli_testBit(p_scalar, i);
01321         XYcZ_addC(Rx[1-nb], Ry[1-nb], Rx[nb], Ry[nb]);
01322         XYcZ_add(Rx[nb], Ry[nb], Rx[1-nb], Ry[1-nb]);
01323     }
01324 
01325     nb = !vli_testBit(p_scalar, 0);
01326     XYcZ_addC(Rx[1-nb], Ry[1-nb], Rx[nb], Ry[nb]);
01327     
01328     /* Find final 1/Z value. */
01329     vli_modSub(z, Rx[1], Rx[0], curve_p); /* X1 - X0 */
01330     vli_modMult_fast(z, z, Ry[1-nb]);     /* Yb * (X1 - X0) */
01331     vli_modMult_fast(z, z, p_point->x);   /* xP * Yb * (X1 - X0) */
01332     vli_modInv(z, z, curve_p);            /* 1 / (xP * Yb * (X1 - X0)) */
01333     vli_modMult_fast(z, z, p_point->y);   /* yP / (xP * Yb * (X1 - X0)) */
01334     vli_modMult_fast(z, z, Rx[1-nb]);     /* Xb * yP / (xP * Yb * (X1 - X0)) */
01335     /* End 1/Z calculation */
01336 
01337     XYcZ_add(Rx[nb], Ry[nb], Rx[1-nb], Ry[1-nb]);
01338     
01339     apply_z(Rx[0], Ry[0], z);
01340     
01341     vli_set(p_result->x, Rx[0]);
01342     vli_set(p_result->y, Ry[0]);
01343 }
01344 
01345 int ecc_make_key(EccPoint *p_publicKey, uint32_t p_privateKey[NUM_ECC_DIGITS],
01346     const uint32_t p_random[NUM_ECC_DIGITS])
01347 {
01348     /* Make sure the private key is in the range [1, n-1].
01349        For the supported curves, n is always large enough that we only need to subtract once at most. */
01350     vli_set(p_privateKey, p_random);
01351     if(vli_cmp(curve_n, p_privateKey) != 1)
01352     {
01353         vli_sub(p_privateKey, p_privateKey, curve_n);
01354     }
01355     
01356     if(vli_isZero(p_privateKey))
01357     {
01358         return 0; /* The private key cannot be 0 (mod p). */
01359     }
01360     
01361     EccPoint_mult(p_publicKey, &curve_G, p_privateKey, NULL);
01362     return 1;
01363 }
01364 
01365 #if (ECC_CURVE == secp256k1)
01366 /* Compute p_result = x^3 + b */
01367 static void curve_x_side(uint32_t p_result[NUM_ECC_DIGITS], const uint32_t x[NUM_ECC_DIGITS])
01368 {
01369     vli_modSquare_fast(p_result, x); /* r = x^2 */
01370     vli_modMult_fast(p_result, p_result, x); /* r = x^3 */
01371     vli_modAdd(p_result, p_result, curve_b, curve_p); /* r = x^3 + b */
01372 }
01373 #else
01374 /* Compute p_result = x^3 - 3x + b */
01375 static void curve_x_side(uint32_t p_result[NUM_ECC_DIGITS], const uint32_t x[NUM_ECC_DIGITS])
01376 {
01377     uint32_t _3[NUM_ECC_DIGITS] = {3}; /* -a = 3 */
01378     
01379     vli_modSquare_fast(p_result, x); /* r = x^2 */
01380     vli_modSub(p_result, p_result, _3, curve_p); /* r = x^2 - 3 */
01381     vli_modMult_fast(p_result, p_result, x); /* r = x^3 - 3x */
01382     vli_modAdd(p_result, p_result, curve_b, curve_p); /* r = x^3 - 3x + b */
01383 }
01384 #endif
01385 
01386 int ecc_valid_public_key(const EccPoint *p_publicKey)
01387 {
01388     uint32_t l_tmp1[NUM_ECC_DIGITS];
01389     uint32_t l_tmp2[NUM_ECC_DIGITS];
01390     
01391     if(EccPoint_isZero(p_publicKey))
01392     {
01393         return 0;
01394     }
01395     
01396     if(vli_cmp(curve_p, p_publicKey->x) != 1 || vli_cmp(curve_p, p_publicKey->y) != 1)
01397     {
01398         return 0;
01399     }
01400     
01401     vli_modSquare_fast(l_tmp1, p_publicKey->y); /* tmp1 = y^2 */
01402     
01403     curve_x_side(l_tmp2, p_publicKey->x); /* tmp2 = x^3 - 3x + b */
01404     
01405     /* Make sure that y^2 == x^3 + ax + b */
01406     if(vli_cmp(l_tmp1, l_tmp2) != 0)
01407     {
01408         return 0;
01409     }
01410     
01411     return 1;
01412 }
01413 
01414 int ecdh_shared_secret(uint32_t p_secret[NUM_ECC_DIGITS], const EccPoint *p_publicKey,
01415     const uint32_t p_privateKey[NUM_ECC_DIGITS], const uint32_t p_random[NUM_ECC_DIGITS])
01416 {
01417     EccPoint l_product;
01418     
01419     EccPoint_mult(&l_product, p_publicKey, p_privateKey, p_random);
01420     if(EccPoint_isZero(&l_product))
01421     {
01422         return 0;
01423     }
01424     
01425     vli_set(p_secret, l_product.x);
01426     
01427     return 1;
01428 }
01429 
01430 /* -------- ECDSA code -------- */
01431 
01432 /* Computes p_result = (p_left * p_right) % p_mod. */
01433 static void vli_modMult(uint32_t *p_result, const uint32_t *p_left,
01434     const uint32_t *p_right, const uint32_t *p_mod)
01435 {
01436     uint32_t l_product[2 * NUM_ECC_DIGITS];
01437     uint32_t l_modMultiple[2 * NUM_ECC_DIGITS];
01438     uint l_digitShift, l_bitShift;
01439     uint l_productBits;
01440     uint l_modBits = vli_numBits(p_mod);
01441     
01442     vli_mult(l_product, p_left, p_right);
01443     l_productBits = vli_numBits(l_product + NUM_ECC_DIGITS);
01444     if(l_productBits)
01445     {
01446         l_productBits += NUM_ECC_DIGITS * 32;
01447     }
01448     else
01449     {
01450         l_productBits = vli_numBits(l_product);
01451     }
01452     
01453     if(l_productBits < l_modBits)
01454     { /* l_product < p_mod. */
01455         vli_set(p_result, l_product);
01456         return;
01457     }
01458     
01459     /* Shift p_mod by (l_leftBits - l_modBits). This multiplies p_mod by the largest
01460        power of two possible while still resulting in a number less than p_left. */
01461     vli_clear(l_modMultiple);
01462     vli_clear(l_modMultiple + NUM_ECC_DIGITS);
01463     l_digitShift = (l_productBits - l_modBits) / 32;
01464     l_bitShift = (l_productBits - l_modBits) % 32;
01465     if(l_bitShift)
01466     {
01467         l_modMultiple[l_digitShift + NUM_ECC_DIGITS] = vli_lshift(l_modMultiple + l_digitShift, p_mod, l_bitShift);
01468     }
01469     else
01470     {
01471         vli_set(l_modMultiple + l_digitShift, p_mod);
01472     }
01473 
01474     /* Subtract all multiples of p_mod to get the remainder. */
01475     vli_clear(p_result);
01476     p_result[0] = 1; /* Use p_result as a temp var to store 1 (for subtraction) */
01477     while(l_productBits > NUM_ECC_DIGITS * 32 || vli_cmp(l_modMultiple, p_mod) >= 0)
01478     {
01479         int l_cmp = vli_cmp(l_modMultiple + NUM_ECC_DIGITS, l_product + NUM_ECC_DIGITS);
01480         if(l_cmp < 0 || (l_cmp == 0 && vli_cmp(l_modMultiple, l_product) <= 0))
01481         {
01482             if(vli_sub(l_product, l_product, l_modMultiple))
01483             { /* borrow */
01484                 vli_sub(l_product + NUM_ECC_DIGITS, l_product + NUM_ECC_DIGITS, p_result);
01485             }
01486             vli_sub(l_product + NUM_ECC_DIGITS, l_product + NUM_ECC_DIGITS, l_modMultiple + NUM_ECC_DIGITS);
01487         }
01488         uint32_t l_carry = (l_modMultiple[NUM_ECC_DIGITS] & 0x01) << 31;
01489         vli_rshift1(l_modMultiple + NUM_ECC_DIGITS);
01490         vli_rshift1(l_modMultiple);
01491         l_modMultiple[NUM_ECC_DIGITS-1] |= l_carry;
01492         
01493         --l_productBits;
01494     }
01495     vli_set(p_result, l_product);
01496 }
01497 
01498 static inline unsigned int max(const unsigned int a, const unsigned int b)
01499 {
01500     return (a > b ? a : b);
01501 }
01502 
01503 int ecdsa_sign(uint32_t r[NUM_ECC_DIGITS], uint32_t s[NUM_ECC_DIGITS],
01504     const uint32_t p_privateKey[NUM_ECC_DIGITS], const uint32_t p_random[NUM_ECC_DIGITS],
01505     const uint32_t p_hash[NUM_ECC_DIGITS])
01506 {
01507     uint32_t k[NUM_ECC_DIGITS];
01508     EccPoint p;
01509     
01510     if(vli_isZero(p_random))
01511     { /* The random number must not be 0. */
01512         return 0;
01513     }
01514     
01515     vli_set(k, p_random);
01516     if(vli_cmp(curve_n, k) != 1)
01517     {
01518         vli_sub(k, k, curve_n);
01519     }
01520     
01521     /* tmp = k * G */
01522     EccPoint_mult(&p, &curve_G, k, NULL);
01523     
01524     /* r = x1 (mod n) */
01525     vli_set(r, p.x);
01526     if(vli_cmp(curve_n, r) != 1)
01527     {
01528         vli_sub(r, r, curve_n);
01529     }
01530     if(vli_isZero(r))
01531     { /* If r == 0, fail (need a different random number). */
01532         return 0;
01533     }
01534     
01535     vli_modMult(s, r, p_privateKey, curve_n); /* s = r*d */
01536     vli_modAdd(s, p_hash, s, curve_n); /* s = e + r*d */
01537     vli_modInv(k, k, curve_n); /* k = 1 / k */
01538     vli_modMult(s, s, k, curve_n); /* s = (e + r*d) / k */
01539     
01540     return 1;
01541 }
01542 
01543 int ecdsa_verify(const EccPoint *p_publicKey, const uint32_t p_hash[NUM_ECC_DIGITS],
01544     const uint32_t r[NUM_ECC_DIGITS], const uint32_t s[NUM_ECC_DIGITS])
01545 {
01546     uint32_t u1[NUM_ECC_DIGITS], u2[NUM_ECC_DIGITS];
01547     uint32_t z[NUM_ECC_DIGITS];
01548     EccPoint l_sum;
01549     uint32_t rx[NUM_ECC_DIGITS];
01550     uint32_t ry[NUM_ECC_DIGITS];
01551     uint32_t tx[NUM_ECC_DIGITS];
01552     uint32_t ty[NUM_ECC_DIGITS];
01553     uint32_t tz[NUM_ECC_DIGITS];
01554     
01555     if(vli_isZero(r) || vli_isZero(s))
01556     { /* r, s must not be 0. */
01557         return 0;
01558     }
01559     
01560     if(vli_cmp(curve_n, r) != 1 || vli_cmp(curve_n, s) != 1)
01561     { /* r, s must be < n. */
01562         return 0;
01563     }
01564 
01565     /* Calculate u1 and u2. */
01566     vli_modInv(z, s, curve_n); /* Z = s^-1 */
01567     vli_modMult(u1, p_hash, z, curve_n); /* u1 = e/s */
01568     vli_modMult(u2, r, z, curve_n); /* u2 = r/s */
01569     
01570     /* Calculate l_sum = G + Q. */
01571     vli_set(l_sum.x, p_publicKey->x);
01572     vli_set(l_sum.y, p_publicKey->y);
01573     vli_set(tx, curve_G.x);
01574     vli_set(ty, curve_G.y);
01575     vli_modSub(z, l_sum.x, tx, curve_p); /* Z = x2 - x1 */
01576     XYcZ_add(tx, ty, l_sum.x, l_sum.y);
01577     vli_modInv(z, z, curve_p); /* Z = 1/Z */
01578     apply_z(l_sum.x, l_sum.y, z);
01579     
01580     /* Use Shamir's trick to calculate u1*G + u2*Q */
01581     const EccPoint *l_points[4] = {NULL, &curve_G, p_publicKey, &l_sum};
01582     uint l_numBits = max(vli_numBits(u1), vli_numBits(u2));
01583     
01584     const EccPoint *l_point = l_points[(!!vli_testBit(u1, l_numBits-1)) | ((!!vli_testBit(u2, l_numBits-1)) << 1)];
01585     vli_set(rx, l_point->x);
01586     vli_set(ry, l_point->y);
01587     vli_clear(z);
01588     z[0] = 1;
01589 
01590     int i;
01591     for(i = l_numBits - 2; i >= 0; --i)
01592     {
01593         EccPoint_double_jacobian(rx, ry, z);
01594         
01595         int l_index = (!!vli_testBit(u1, i)) | ((!!vli_testBit(u2, i)) << 1);
01596         l_point = l_points[l_index];
01597         if(l_point)
01598         {
01599             vli_set(tx, l_point->x);
01600             vli_set(ty, l_point->y);
01601             apply_z(tx, ty, z);
01602             vli_modSub(tz, rx, tx, curve_p); /* Z = x2 - x1 */
01603             XYcZ_add(tx, ty, rx, ry);
01604             vli_modMult_fast(z, z, tz);
01605         }
01606     }
01607 
01608     vli_modInv(z, z, curve_p); /* Z = 1/Z */
01609     apply_z(rx, ry, z);
01610     
01611     /* v = x1 (mod n) */
01612     if(vli_cmp(curve_n, rx) != 1)
01613     {
01614         vli_sub(rx, rx, curve_n);
01615     }
01616 
01617     /* Accept only if v == r. */
01618     return (vli_cmp(rx, r) == 0);
01619 }
01620 
01621 void ecc_bytes2native(uint32_t p_native[NUM_ECC_DIGITS], const uint8_t p_bytes[NUM_ECC_DIGITS*4])
01622 {
01623     unsigned i;
01624     for(i=0; i<NUM_ECC_DIGITS; ++i)
01625     {
01626         uint8_t *p_digit = (uint8_t *)(p_bytes + 4 * (NUM_ECC_DIGITS - 1 - i));
01627         p_native[i] = ((uint32_t)p_digit[0] << 24) | ((uint32_t)p_digit[1] << 16) | ((uint32_t)p_digit[2] << 8) | (uint32_t)p_digit[3];
01628     }
01629 }
01630 
01631 void ecc_native2bytes(uint8_t p_bytes[NUM_ECC_DIGITS*4], const uint32_t p_native[NUM_ECC_DIGITS])
01632 {
01633     unsigned i;
01634     for(i=0; i<NUM_ECC_DIGITS; ++i)
01635     {
01636         uint8_t *p_digit = p_bytes + 4 * (NUM_ECC_DIGITS - 1 - i);
01637         p_digit[0] = p_native[i] >> 24;
01638         p_digit[1] = p_native[i] >> 16;
01639         p_digit[2] = p_native[i] >> 8;
01640         p_digit[3] = p_native[i];
01641     }
01642 }
01643 
01644 /* Compute a = sqrt(a) (mod curve_p). */
01645 static void mod_sqrt(uint32_t a[NUM_ECC_DIGITS])
01646 {
01647     unsigned i;
01648     uint32_t p1[NUM_ECC_DIGITS] = {1};
01649     uint32_t l_result[NUM_ECC_DIGITS] = {1};
01650     
01651     /* Since curve_p == 3 (mod 4) for all supported curves, we can
01652        compute sqrt(a) = a^((curve_p + 1) / 4) (mod curve_p). */
01653     vli_add(p1, curve_p, p1); /* p1 = curve_p + 1 */
01654     for(i = vli_numBits(p1) - 1; i > 1; --i)
01655     {
01656         vli_modSquare_fast(l_result, l_result);
01657         if(vli_testBit(p1, i))
01658         {
01659             vli_modMult_fast(l_result, l_result, a);
01660         }
01661     }
01662     vli_set(a, l_result);
01663 }
01664 
01665 void ecc_point_compress(uint8_t p_compressed[NUM_ECC_DIGITS*4 + 1], const EccPoint *p_point)
01666 {
01667     p_compressed[0] = 2 + (p_point->y[0] & 0x01);
01668     ecc_native2bytes(p_compressed + 1, p_point->x);
01669 }
01670 
01671 void ecc_point_decompress(EccPoint *p_point, const uint8_t p_compressed[NUM_ECC_DIGITS*4 + 1])
01672 {
01673     ecc_bytes2native(p_point->x, p_compressed + 1);
01674     curve_x_side(p_point->y, p_point->x);
01675     mod_sqrt(p_point->y);
01676     if((p_point->y[0] & 0x01) != (p_compressed[0] & 0x01))
01677     {
01678         vli_sub(p_point->y, curve_p, p_point->y);
01679     }
01680 }
01681
Repository toolbox

Repository details

Type:	Program
Mbed OS support:	Mbed 2 deprecated
Created:	05 Feb 2015
Imports:	4
Forks:	0
Commits:	6
Dependents:	0
Dependencies:	2
Followers:	1
Important changes to repositories hosted on mbed.com

ecc.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

ecc.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning