Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
ecc.cpp
00001 #include "ecc.h" 00002 00003 #include <string.h> 00004 00005 typedef unsigned int uint; 00006 00007 #define Curve_P_1 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFD} 00008 #define Curve_P_2 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} 00009 #define Curve_P_3 {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF} 00010 #define Curve_P_4 {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \ 00011 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} 00012 #define Curve_P_5 {0xFFFFFC2F, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} 00013 00014 #define Curve_B_1 {0x2CEE5ED3, 0xD824993C, 0x1079F43D, 0xE87579C1} 00015 #define Curve_B_2 {0xC146B9B1, 0xFEB8DEEC, 0x72243049, 0x0FA7E9AB, 0xE59C80E7, 0x64210519} 00016 #define Curve_B_3 {0x27D2604B, 0x3BCE3C3E, 0xCC53B0F6, 0x651D06B0, 0x769886BC, 0xB3EBBD55, 0xAA3A93E7, 0x5AC635D8} 00017 #define Curve_B_4 {0xD3EC2AEF, 0x2A85C8ED, 0x8A2ED19D, 0xC656398D, 0x5013875A, 0x0314088F, 0xFE814112, 0x181D9C6E, \ 00018 0xE3F82D19, 0x988E056B, 0xE23EE7E4, 0xB3312FA7} 00019 #define Curve_B_5 {0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000} 00020 00021 #define Curve_G_1 { \ 00022 {0xA52C5B86, 0x0C28607C, 0x8B899B2D, 0x161FF752}, \ 00023 {0xDDED7A83, 0xC02DA292, 0x5BAFEB13, 0xCF5AC839}} 00024 00025 #define Curve_G_2 { \ 00026 {0x82FF1012, 0xF4FF0AFD, 0x43A18800, 0x7CBF20EB, 0xB03090F6, 0x188DA80E}, \ 00027 {0x1E794811, 0x73F977A1, 0x6B24CDD5, 0x631011ED, 0xFFC8DA78, 0x07192B95}} 00028 00029 #define Curve_G_3 { \ 00030 {0xD898C296, 0xF4A13945, 0x2DEB33A0, 0x77037D81, 0x63A440F2, 0xF8BCE6E5, 0xE12C4247, 0x6B17D1F2}, \ 00031 {0x37BF51F5, 0xCBB64068, 0x6B315ECE, 0x2BCE3357, 0x7C0F9E16, 0x8EE7EB4A, 0xFE1A7F9B, 0x4FE342E2}} 00032 00033 #define Curve_G_4 { \ 00034 {0x72760AB7, 0x3A545E38, 0xBF55296C, 0x5502F25D, 0x82542A38, 0x59F741E0, 0x8BA79B98, 0x6E1D3B62, \ 00035 0xF320AD74, 0x8EB1C71E, 0xBE8B0537, 0xAA87CA22}, \ 00036 {0x90EA0E5F, 0x7A431D7C, 0x1D7E819D, 0x0A60B1CE, 0xB5F0B8C0, 0xE9DA3113, 0x289A147C, 0xF8F41DBD, \ 00037 0x9292DC29, 0x5D9E98BF, 0x96262C6F, 0x3617DE4A}} 00038 00039 #define Curve_G_5 { \ 00040 {0x16F81798, 0x59F2815B, 0x2DCE28D9, 0x029BFCDB, 0xCE870B07, 0x55A06295, 0xF9DCBBAC, 0x79BE667E}, \ 00041 {0xFB10D4B8, 0x9C47D08F, 0xA6855419, 0xFD17B448, 0x0E1108A8, 0x5DA4FBFC, 0x26A3C465, 0x483ADA77}} 00042 00043 #define Curve_N_1 {0x9038A115, 0x75A30D1B, 0x00000000, 0xFFFFFFFE} 00044 #define Curve_N_2 {0xB4D22831, 0x146BC9B1, 0x99DEF836, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} 00045 #define Curve_N_3 {0xFC632551, 0xF3B9CAC2, 0xA7179E84, 0xBCE6FAAD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF} 00046 #define Curve_N_4 {0xCCC52973, 0xECEC196A, 0x48B0A77A, 0x581A0DB2, 0xF4372DDF, 0xC7634D81, 0xFFFFFFFF, 0xFFFFFFFF, \ 00047 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} 00048 #define Curve_N_5 {0xD0364141, 0xBFD25E8C, 0xAF48A03B, 0xBAAEDCE6, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} 00049 00050 static uint32_t curve_p[NUM_ECC_DIGITS] = ECC_CONCAT(Curve_P_, ECC_CURVE); 00051 static uint32_t curve_b[NUM_ECC_DIGITS] = ECC_CONCAT(Curve_B_, ECC_CURVE); 00052 static EccPoint curve_G = ECC_CONCAT(Curve_G_, ECC_CURVE); 00053 static uint32_t curve_n[NUM_ECC_DIGITS] = ECC_CONCAT(Curve_N_, ECC_CURVE); 00054 00055 static void vli_clear(uint32_t *p_vli) 00056 { 00057 uint i; 00058 for(i=0; i<NUM_ECC_DIGITS; ++i) 00059 { 00060 p_vli[i] = 0; 00061 } 00062 } 00063 00064 /* Returns 1 if p_vli == 0, 0 otherwise. */ 00065 static int vli_isZero(const uint32_t *p_vli) 00066 { 00067 uint i; 00068 for(i = 0; i < NUM_ECC_DIGITS; ++i) 00069 { 00070 if(p_vli[i]) 00071 { 00072 return 0; 00073 } 00074 } 00075 return 1; 00076 } 00077 00078 /* Returns nonzero if bit p_bit of p_vli is set. */ 00079 static uint32_t vli_testBit(const uint32_t *p_vli, const unsigned int p_bit) 00080 { 00081 return (p_vli[p_bit/32] & (1 << (p_bit % 32))); 00082 } 00083 00084 /* Counts the number of 32-bit "digits" in p_vli. */ 00085 static uint vli_numDigits(const uint32_t *p_vli) 00086 { 00087 int i; 00088 /* Search from the end until we find a non-zero digit. 00089 We do it in reverse because we expect that most digits will be nonzero. */ 00090 for(i = NUM_ECC_DIGITS - 1; i >= 0 && p_vli[i] == 0; --i) 00091 { 00092 } 00093 00094 return (i + 1); 00095 } 00096 00097 /* Counts the number of bits required for p_vli. */ 00098 static uint vli_numBits(const uint32_t *p_vli) 00099 { 00100 uint i; 00101 uint32_t l_digit; 00102 00103 uint l_numDigits = vli_numDigits(p_vli); 00104 if(l_numDigits == 0) 00105 { 00106 return 0; 00107 } 00108 00109 l_digit = p_vli[l_numDigits - 1]; 00110 for(i=0; l_digit; ++i) 00111 { 00112 l_digit >>= 1; 00113 } 00114 00115 return ((l_numDigits - 1) * 32 + i); 00116 } 00117 00118 /* Sets p_dest = p_src. */ 00119 static void vli_set(uint32_t *p_dest, const uint32_t *p_src) 00120 { 00121 uint i; 00122 for(i=0; i<NUM_ECC_DIGITS; ++i) 00123 { 00124 p_dest[i] = p_src[i]; 00125 } 00126 } 00127 00128 /* Returns sign of p_left - p_right. */ 00129 static int vli_cmp(const uint32_t *p_left, const uint32_t *p_right) 00130 { 00131 int i; 00132 for(i = NUM_ECC_DIGITS-1; i >= 0; --i) 00133 { 00134 if(p_left[i] > p_right[i]) 00135 { 00136 return 1; 00137 } 00138 else if(p_left[i] < p_right[i]) 00139 { 00140 return -1; 00141 } 00142 } 00143 return 0; 00144 } 00145 00146 /* Computes p_result = p_in << c, returning carry. Can modify in place (if p_result == p_in). 0 < p_shift < 32. */ 00147 static uint32_t vli_lshift(uint32_t *p_result, const uint32_t *p_in, const unsigned int p_shift) 00148 { 00149 uint32_t l_carry = 0; 00150 uint i; 00151 for(i = 0; i < NUM_ECC_DIGITS; ++i) 00152 { 00153 uint32_t l_temp = p_in[i]; 00154 p_result[i] = (l_temp << p_shift) | l_carry; 00155 l_carry = l_temp >> (32 - p_shift); 00156 } 00157 00158 return l_carry; 00159 } 00160 00161 /* Computes p_vli = p_vli >> 1. */ 00162 static void vli_rshift1(uint32_t *p_vli) 00163 { 00164 uint32_t *l_end = p_vli; 00165 uint32_t l_carry = 0; 00166 00167 p_vli += NUM_ECC_DIGITS; 00168 while(p_vli-- > l_end) 00169 { 00170 uint32_t l_temp = *p_vli; 00171 *p_vli = (l_temp >> 1) | l_carry; 00172 l_carry = l_temp << 31; 00173 } 00174 } 00175 00176 /* Computes p_result = p_left + p_right, returning carry. Can modify in place. */ 00177 static uint32_t vli_add(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right) 00178 { 00179 #if (ECC_ASM == ecc_asm_thumb || ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm) 00180 uint32_t l_counter = NUM_ECC_DIGITS; 00181 uint32_t l_carry = 0; /* carry = 0 initially */ 00182 uint32_t l_left; 00183 uint32_t l_right; 00184 00185 asm volatile ( 00186 ".syntax unified \n\t" 00187 "1: \n\t" 00188 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 00189 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 00190 "lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */ 00191 "adcs %[left], %[right] \n\t" /* Add with carry. */ 00192 "adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */ 00193 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 00194 "subs %[ctr], #1 \n\t" /* Decrement index. */ 00195 "bne 1b \n\t" /* Loop until index == 0. */ 00196 #if (ECC_ASM != ecc_asm_thumb2) 00197 ".syntax divided \n\t" 00198 #endif 00199 #if (ECC_ASM == ecc_asm_thumb) 00200 : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right), [ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right) 00201 #else 00202 : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right), [ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right) 00203 #endif 00204 : 00205 : "cc", "memory" 00206 ); 00207 return l_carry; 00208 00209 #else 00210 00211 uint32_t l_carry = 0; 00212 uint i; 00213 for(i=0; i<NUM_ECC_DIGITS; ++i) 00214 { 00215 uint32_t l_sum = p_left[i] + p_right[i] + l_carry; 00216 if(l_sum != p_left[i]) 00217 { 00218 l_carry = (l_sum < p_left[i]); 00219 } 00220 p_result[i] = l_sum; 00221 } 00222 return l_carry; 00223 #endif 00224 } 00225 00226 /* Computes p_result = p_left - p_right, returning borrow. Can modify in place. */ 00227 static uint32_t vli_sub(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right) 00228 { 00229 #if (ECC_ASM == ecc_asm_thumb || ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm) 00230 uint32_t l_counter = NUM_ECC_DIGITS; 00231 uint32_t l_carry = 1; /* carry = 1 initially (means don't borrow) */ 00232 uint32_t l_left; 00233 uint32_t l_right; 00234 00235 asm volatile ( 00236 ".syntax unified \n\t" 00237 "1: \n\t" 00238 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 00239 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 00240 "lsrs %[carry], #1 \n\t" /* Set up carry flag (l_carry = 0 after this). */ 00241 "sbcs %[left], %[right] \n\t" /* Subtract with borrow. */ 00242 "adcs %[carry], %[carry] \n\t" /* Store carry bit in l_carry. */ 00243 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 00244 "subs %[ctr], #1 \n\t" /* Decrement index. */ 00245 "bne 1b \n\t" /* Loop until index == 0. */ 00246 #if (ECC_ASM != ecc_asm_thumb2) 00247 ".syntax divided \n\t" 00248 #endif 00249 #if (ECC_ASM == ecc_asm_thumb) 00250 : [dptr] "+l" (p_result), [lptr] "+l" (p_left), [rptr] "+l" (p_right), [ctr] "+l" (l_counter), [carry] "+l" (l_carry), [left] "=l" (l_left), [right] "=l" (l_right) 00251 #else 00252 : [dptr] "+r" (p_result), [lptr] "+r" (p_left), [rptr] "+r" (p_right), [ctr] "+r" (l_counter), [carry] "+r" (l_carry), [left] "=r" (l_left), [right] "=r" (l_right) 00253 #endif 00254 : 00255 : "cc", "memory" 00256 ); 00257 return !l_carry; 00258 00259 #else 00260 00261 uint32_t l_borrow = 0; 00262 uint i; 00263 for(i=0; i<NUM_ECC_DIGITS; ++i) 00264 { 00265 uint32_t l_diff = p_left[i] - p_right[i] - l_borrow; 00266 if(l_diff != p_left[i]) 00267 { 00268 l_borrow = (l_diff > p_left[i]); 00269 } 00270 p_result[i] = l_diff; 00271 } 00272 return l_borrow; 00273 #endif 00274 } 00275 00276 /* Computes p_result = p_left * p_right. */ 00277 static void vli_mult(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right) 00278 { 00279 #if (ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm) 00280 uint32_t c0 = 0; 00281 uint32_t c1 = 0; 00282 uint32_t c2 = 0; 00283 uint32_t k = 0; 00284 uint32_t i; 00285 uint32_t t0, t1; 00286 00287 asm volatile ( 00288 ".syntax unified \n\t" 00289 00290 "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */ 00291 "movs %[i], #0 \n\t" /* i = 0 */ 00292 "b 3f \n\t" 00293 00294 "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */ 00295 "movs %[i], %[k] \n\t" /* i = k */ 00296 "subs %[i], %[eccdm1] \n\t" /* i = k - (NUM_ECC_DIGITS - 1) (times 4) */ 00297 00298 "3: \n\t" /* inner loop */ 00299 "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */ 00300 00301 "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = p_right[k-i] */ 00302 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */ 00303 00304 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */ 00305 00306 "adds %[c0], %[t0] \n\t" /* add low word to c0 */ 00307 "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 00308 "adcs %[c2], #0 \n\t" /* add carry to c2 */ 00309 00310 "adds %[i], #4 \n\t" /* i += 4 */ 00311 "cmp %[i], %[eccd] \n\t" /* i < NUM_ECC_DIGITS (times 4)? */ 00312 "bge 4f \n\t" /* if not, exit the loop */ 00313 "cmp %[i], %[k] \n\t" /* i <= k? */ 00314 "ble 3b \n\t" /* if so, continue looping */ 00315 00316 "4: \n\t" /* end inner loop */ 00317 00318 "str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */ 00319 "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 00320 "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 00321 "movs %[c2], #0 \n\t" /* c2 = 0 */ 00322 "adds %[k], #4 \n\t" /* k += 4 */ 00323 "cmp %[k], %[eccd] \n\t" /* k < NUM_ECC_DIGITS (times 4) ? */ 00324 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 00325 "cmp %[k], %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */ 00326 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */ 00327 /* end outer loop */ 00328 00329 "str %[c0], [%[result], %[k]] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */ 00330 #if (ECC_ASM != ecc_asm_thumb2) 00331 ".syntax divided \n\t" 00332 #endif 00333 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1) 00334 : [result] "r" (p_result), [left] "r" (p_left), [right] "r" (p_right), 00335 [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4) 00336 : "cc", "memory" 00337 ); 00338 00339 #elif (ECC_ASM == ecc_asm_thumb) 00340 00341 register uint32_t *r0 asm("r0") = p_result; 00342 register uint32_t *r1 asm("r1") = p_left; 00343 register uint32_t *r2 asm("r2") = p_right; 00344 00345 asm volatile ( 00346 ".syntax unified \n\t" 00347 "movs r3, #0 \n\t" /* c0 = 0 */ 00348 "movs r4, #0 \n\t" /* c1 = 0 */ 00349 "movs r5, #0 \n\t" /* c2 = 0 */ 00350 "movs r6, #0 \n\t" /* k = 0 */ 00351 00352 "push {r0} \n\t" /* keep p_result on the stack */ 00353 00354 "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */ 00355 "movs r7, #0 \n\t" /* r7 = i = 0 */ 00356 "b 3f \n\t" 00357 00358 "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */ 00359 "movs r7, r6 \n\t" /* r7 = k */ 00360 "subs r7, %[eccdm1] \n\t" /* r7 = i = k - (NUM_ECC_DIGITS - 1) (times 4) */ 00361 00362 "3: \n\t" /* inner loop */ 00363 "push {r3, r4, r5, r6} \n\t" /* push things, r3 (c0) is at the top of stack. */ 00364 "subs r0, r6, r7 \n\t" /* r0 = k-i */ 00365 00366 "ldr r4, [r2, r0] \n\t" /* r4 = p_right[k-i] */ 00367 "ldr r0, [r1, r7] \n\t" /* r0 = p_left[i] */ 00368 00369 "lsrs r3, r0, #16 \n\t" /* r3 = a1 */ 00370 "uxth r0, r0 \n\t" /* r0 = a0 */ 00371 00372 "lsrs r5, r4, #16 \n\t" /* r5 = b1 */ 00373 "uxth r4, r4 \n\t" /* r4 = b0 */ 00374 00375 "movs r6, r3 \n\t" /* r6 = a1 */ 00376 "muls r6, r5, r6 \n\t" /* r6 = a1*b1 */ 00377 "muls r3, r4, r3 \n\t" /* r3 = b0*a1 */ 00378 "muls r5, r0, r5 \n\t" /* r5 = a0*b1 */ 00379 "muls r0, r4, r0 \n\t" /* r0 = a0*b0 */ 00380 00381 "movs r4, #0 \n\t" /* r4 = 0 */ 00382 "adds r3, r5 \n\t" /* r3 = b0*a1 + a0*b1 */ 00383 "adcs r4, r4 \n\t" /* r4 = carry */ 00384 "lsls r4, #16 \n\t" /* r4 = carry << 16 */ 00385 "adds r6, r4 \n\t" /* r6 = a1*b1 + carry */ 00386 00387 "lsls r4, r3, #16 \n\t" /* r4 = (b0*a1 + a0*b1) << 16 */ 00388 "lsrs r3, #16 \n\t" /* r3 = (b0*a1 + a0*b1) >> 16 */ 00389 "adds r0, r4 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */ 00390 "adcs r6, r3 \n\t" /* r6 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */ 00391 00392 "pop {r3, r4, r5} \n\t" /* r3 = c0, r4 = c1, r5 = c2 */ 00393 "adds r3, r0 \n\t" /* add low word to c0 */ 00394 "adcs r4, r6 \n\t" /* add high word to c1, including carry */ 00395 "movs r0, #0 \n\t" /* r0 = 0 (does not affect carry bit) */ 00396 "adcs r5, r0 \n\t" /* add carry to c2 */ 00397 00398 "pop {r6} \n\t" /* r6 = k */ 00399 00400 "adds r7, #4 \n\t" /* i += 4 */ 00401 "cmp r7, %[eccd] \n\t" /* i < NUM_ECC_DIGITS (times 4)? */ 00402 "bge 4f \n\t" /* if not, exit the loop */ 00403 "cmp r7, r6 \n\t" /* i <= k? */ 00404 "ble 3b \n\t" /* if so, continue looping */ 00405 00406 "4: \n\t" /* end inner loop */ 00407 00408 "ldr r0, [sp, #0] \n\t" /* r0 = p_result */ 00409 00410 "str r3, [r0, r6] \n\t" /* p_result[k] = c0 */ 00411 "mov r3, r4 \n\t" /* c0 = c1 */ 00412 "mov r4, r5 \n\t" /* c1 = c2 */ 00413 "movs r5, #0 \n\t" /* c2 = 0 */ 00414 "adds r6, #4 \n\t" /* k += 4 */ 00415 "cmp r6, %[eccd] \n\t" /* k < NUM_ECC_DIGITS (times 4) ? */ 00416 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 00417 "cmp r6, %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */ 00418 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */ 00419 /* end outer loop */ 00420 00421 "str r3, [r0, r6] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */ 00422 "pop {r0} \n\t" /* pop p_result off the stack */ 00423 00424 ".syntax divided \n\t" 00425 : 00426 : [r0] "l" (r0), [r1] "l" (r1), [r2] "l" (r2), [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4) 00427 : "r3", "r4", "r5", "r6", "r7", "cc", "memory" 00428 ); 00429 00430 #else 00431 00432 uint64_t r01 = 0; 00433 uint32_t r2 = 0; 00434 00435 uint i, k; 00436 00437 /* Compute each digit of p_result in sequence, maintaining the carries. */ 00438 for(k=0; k < NUM_ECC_DIGITS*2 - 1; ++k) 00439 { 00440 uint l_min = (k < NUM_ECC_DIGITS ? 0 : (k + 1) - NUM_ECC_DIGITS); 00441 for(i=l_min; i<=k && i<NUM_ECC_DIGITS; ++i) 00442 { 00443 uint64_t l_product = (uint64_t)p_left[i] * p_right[k-i]; 00444 r01 += l_product; 00445 r2 += (r01 < l_product); 00446 } 00447 p_result[k] = (uint32_t)r01; 00448 r01 = (r01 >> 32) | (((uint64_t)r2) << 32); 00449 r2 = 0; 00450 } 00451 00452 p_result[NUM_ECC_DIGITS*2 - 1] = (uint32_t)r01; 00453 #endif 00454 } 00455 00456 /* Computes p_result = (p_left + p_right) % p_mod. 00457 Assumes that p_left < p_mod and p_right < p_mod, p_result != p_mod. */ 00458 static void vli_modAdd(uint32_t *p_result, const uint32_t *p_left, 00459 const uint32_t *p_right, const uint32_t *p_mod) 00460 { 00461 uint32_t l_carry = vli_add(p_result, p_left, p_right); 00462 if(l_carry || vli_cmp(p_result, p_mod) >= 0) 00463 { /* p_result > p_mod (p_result = p_mod + remainder), so subtract p_mod to get remainder. */ 00464 vli_sub(p_result, p_result, p_mod); 00465 } 00466 } 00467 00468 /* Computes p_result = (p_left - p_right) % p_mod. 00469 Assumes that p_left < p_mod and p_right < p_mod, p_result != p_mod. */ 00470 static void vli_modSub(uint32_t *p_result, const uint32_t *p_left, 00471 const uint32_t *p_right, const uint32_t *p_mod) 00472 { 00473 uint32_t l_borrow = vli_sub(p_result, p_left, p_right); 00474 if(l_borrow) 00475 { /* In this case, p_result == -diff == (max int) - diff. 00476 Since -x % d == d - x, we can get the correct result from p_result + p_mod (with overflow). */ 00477 vli_add(p_result, p_result, p_mod); 00478 } 00479 } 00480 00481 #if (ECC_CURVE == secp128r1) 00482 00483 /* Computes p_result = p_product % curve_p. 00484 See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */ 00485 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product) 00486 { 00487 uint32_t l_tmp[NUM_ECC_DIGITS]; 00488 int l_carry; 00489 00490 vli_set(p_result, p_product); 00491 00492 l_tmp[0] = p_product[4]; 00493 l_tmp[1] = p_product[5]; 00494 l_tmp[2] = p_product[6]; 00495 l_tmp[3] = (p_product[7] & 0x00000001) | (p_product[4] << 1); 00496 l_carry = vli_add(p_result, p_result, l_tmp); 00497 00498 l_tmp[0] = (p_product[4] >> 31) | (p_product[5] << 1); 00499 l_tmp[1] = (p_product[5] >> 31) | (p_product[6] << 1); 00500 l_tmp[2] = (p_product[6] >> 31) | (p_product[7] << 1); 00501 l_tmp[3] = (p_product[7] >> 31) | ((p_product[4] & 0x80000000) >> 30) | (p_product[5] << 2); 00502 l_carry += vli_add(p_result, p_result, l_tmp); 00503 00504 l_tmp[0] = (p_product[5] >> 30) | (p_product[6] << 2); 00505 l_tmp[1] = (p_product[6] >> 30) | (p_product[7] << 2); 00506 l_tmp[2] = (p_product[7] >> 30); 00507 l_tmp[3] = ((p_product[5] & 0xC0000000) >> 29) | (p_product[6] << 3); 00508 l_carry += vli_add(p_result, p_result, l_tmp); 00509 00510 l_tmp[0] = (p_product[6] >> 29) | (p_product[7] << 3); 00511 l_tmp[1] = (p_product[7] >> 29); 00512 l_tmp[2] = 0; 00513 l_tmp[3] = ((p_product[6] & 0xE0000000) >> 28) | (p_product[7] << 4); 00514 l_carry += vli_add(p_result, p_result, l_tmp); 00515 00516 l_tmp[0] = (p_product[7] >> 28); 00517 l_tmp[1] = 0; 00518 l_tmp[2] = 0; 00519 l_tmp[3] = (p_product[7] & 0xFFFFFFFE); 00520 l_carry += vli_add(p_result, p_result, l_tmp); 00521 00522 l_tmp[0] = 0; 00523 l_tmp[1] = 0; 00524 l_tmp[2] = 0; 00525 l_tmp[3] = ((p_product[7] & 0xF0000000) >> 27); 00526 l_carry += vli_add(p_result, p_result, l_tmp); 00527 00528 while(l_carry || vli_cmp(curve_p, p_result) != 1) 00529 { 00530 l_carry -= vli_sub(p_result, p_result, curve_p); 00531 } 00532 } 00533 00534 #elif (ECC_CURVE == secp192r1) 00535 00536 /* Computes p_result = p_product % curve_p. 00537 See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */ 00538 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product) 00539 { 00540 uint32_t l_tmp[NUM_ECC_DIGITS]; 00541 int l_carry; 00542 00543 vli_set(p_result, p_product); 00544 00545 vli_set(l_tmp, &p_product[6]); 00546 l_carry = vli_add(p_result, p_result, l_tmp); 00547 00548 l_tmp[0] = l_tmp[1] = 0; 00549 l_tmp[2] = p_product[6]; 00550 l_tmp[3] = p_product[7]; 00551 l_tmp[4] = p_product[8]; 00552 l_tmp[5] = p_product[9]; 00553 l_carry += vli_add(p_result, p_result, l_tmp); 00554 00555 l_tmp[0] = l_tmp[2] = p_product[10]; 00556 l_tmp[1] = l_tmp[3] = p_product[11]; 00557 l_tmp[4] = l_tmp[5] = 0; 00558 l_carry += vli_add(p_result, p_result, l_tmp); 00559 00560 while(l_carry || vli_cmp(curve_p, p_result) != 1) 00561 { 00562 l_carry -= vli_sub(p_result, p_result, curve_p); 00563 } 00564 } 00565 00566 #elif (ECC_CURVE == secp256r1) 00567 00568 /* Computes p_result = p_product % curve_p 00569 from http://www.nsa.gov/ia/_files/nist-routines.pdf */ 00570 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product) 00571 { 00572 uint32_t l_tmp[NUM_ECC_DIGITS]; 00573 int l_carry; 00574 00575 /* t */ 00576 vli_set(p_result, p_product); 00577 00578 /* s1 */ 00579 l_tmp[0] = l_tmp[1] = l_tmp[2] = 0; 00580 l_tmp[3] = p_product[11]; 00581 l_tmp[4] = p_product[12]; 00582 l_tmp[5] = p_product[13]; 00583 l_tmp[6] = p_product[14]; 00584 l_tmp[7] = p_product[15]; 00585 l_carry = vli_lshift(l_tmp, l_tmp, 1); 00586 l_carry += vli_add(p_result, p_result, l_tmp); 00587 00588 /* s2 */ 00589 l_tmp[3] = p_product[12]; 00590 l_tmp[4] = p_product[13]; 00591 l_tmp[5] = p_product[14]; 00592 l_tmp[6] = p_product[15]; 00593 l_tmp[7] = 0; 00594 l_carry += vli_lshift(l_tmp, l_tmp, 1); 00595 l_carry += vli_add(p_result, p_result, l_tmp); 00596 00597 /* s3 */ 00598 l_tmp[0] = p_product[8]; 00599 l_tmp[1] = p_product[9]; 00600 l_tmp[2] = p_product[10]; 00601 l_tmp[3] = l_tmp[4] = l_tmp[5] = 0; 00602 l_tmp[6] = p_product[14]; 00603 l_tmp[7] = p_product[15]; 00604 l_carry += vli_add(p_result, p_result, l_tmp); 00605 00606 /* s4 */ 00607 l_tmp[0] = p_product[9]; 00608 l_tmp[1] = p_product[10]; 00609 l_tmp[2] = p_product[11]; 00610 l_tmp[3] = p_product[13]; 00611 l_tmp[4] = p_product[14]; 00612 l_tmp[5] = p_product[15]; 00613 l_tmp[6] = p_product[13]; 00614 l_tmp[7] = p_product[8]; 00615 l_carry += vli_add(p_result, p_result, l_tmp); 00616 00617 /* d1 */ 00618 l_tmp[0] = p_product[11]; 00619 l_tmp[1] = p_product[12]; 00620 l_tmp[2] = p_product[13]; 00621 l_tmp[3] = l_tmp[4] = l_tmp[5] = 0; 00622 l_tmp[6] = p_product[8]; 00623 l_tmp[7] = p_product[10]; 00624 l_carry -= vli_sub(p_result, p_result, l_tmp); 00625 00626 /* d2 */ 00627 l_tmp[0] = p_product[12]; 00628 l_tmp[1] = p_product[13]; 00629 l_tmp[2] = p_product[14]; 00630 l_tmp[3] = p_product[15]; 00631 l_tmp[4] = l_tmp[5] = 0; 00632 l_tmp[6] = p_product[9]; 00633 l_tmp[7] = p_product[11]; 00634 l_carry -= vli_sub(p_result, p_result, l_tmp); 00635 00636 /* d3 */ 00637 l_tmp[0] = p_product[13]; 00638 l_tmp[1] = p_product[14]; 00639 l_tmp[2] = p_product[15]; 00640 l_tmp[3] = p_product[8]; 00641 l_tmp[4] = p_product[9]; 00642 l_tmp[5] = p_product[10]; 00643 l_tmp[6] = 0; 00644 l_tmp[7] = p_product[12]; 00645 l_carry -= vli_sub(p_result, p_result, l_tmp); 00646 00647 /* d4 */ 00648 l_tmp[0] = p_product[14]; 00649 l_tmp[1] = p_product[15]; 00650 l_tmp[2] = 0; 00651 l_tmp[3] = p_product[9]; 00652 l_tmp[4] = p_product[10]; 00653 l_tmp[5] = p_product[11]; 00654 l_tmp[6] = 0; 00655 l_tmp[7] = p_product[13]; 00656 l_carry -= vli_sub(p_result, p_result, l_tmp); 00657 00658 if(l_carry < 0) 00659 { 00660 do 00661 { 00662 l_carry += vli_add(p_result, p_result, curve_p); 00663 } while(l_carry < 0); 00664 } 00665 else 00666 { 00667 while(l_carry || vli_cmp(curve_p, p_result) != 1) 00668 { 00669 l_carry -= vli_sub(p_result, p_result, curve_p); 00670 } 00671 } 00672 } 00673 00674 #elif (ECC_CURVE == secp384r1) 00675 00676 static void omega_mult(uint32_t *p_result, const uint32_t *p_right) 00677 { 00678 /* Multiply by (2^128 + 2^96 - 2^32 + 1). */ 00679 vli_set(p_result, p_right); /* 1 */ 00680 p_result[3 + NUM_ECC_DIGITS] = vli_add(p_result + 3, p_result + 3, p_right); /* 2^96 + 1 */ 00681 p_result[4 + NUM_ECC_DIGITS] = vli_add(p_result + 4, p_result + 4, p_right); /* 2^128 + 2^96 + 1 */ 00682 if(vli_sub(p_result + 1, p_result + 1, p_right)) /* 2^128 + 2^96 - 2^32 + 1 */ 00683 { /* Propagate borrow if necessary. */ 00684 uint i; 00685 for(i = 1 + NUM_ECC_DIGITS; ; ++i) 00686 { 00687 --p_result[i]; 00688 if(p_result[i] != 0xffffffff) 00689 { 00690 break; 00691 } 00692 } 00693 } 00694 } 00695 00696 /* Computes p_result = p_product % curve_p 00697 see PDF "Comparing Elliptic Curve Cryptography and RSA on 8-bit CPUs" 00698 section "Curve-Specific Optimizations" */ 00699 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product) 00700 { 00701 uint32_t l_tmp[2*NUM_ECC_DIGITS]; 00702 00703 while(!vli_isZero(p_product + NUM_ECC_DIGITS)) /* While c1 != 0 */ 00704 { 00705 uint32_t l_carry = 0; 00706 uint i; 00707 00708 vli_clear(l_tmp); 00709 vli_clear(l_tmp + NUM_ECC_DIGITS); 00710 omega_mult(l_tmp, p_product + NUM_ECC_DIGITS); /* tmp = w * c1 */ 00711 vli_clear(p_product + NUM_ECC_DIGITS); /* p = c0 */ 00712 00713 /* (c1, c0) = c0 + w * c1 */ 00714 for(i=0; i<NUM_ECC_DIGITS+5; ++i) 00715 { 00716 uint32_t l_sum = p_product[i] + l_tmp[i] + l_carry; 00717 if(l_sum != p_product[i]) 00718 { 00719 l_carry = (l_sum < p_product[i]); 00720 } 00721 p_product[i] = l_sum; 00722 } 00723 } 00724 00725 while(vli_cmp(p_product, curve_p) > 0) 00726 { 00727 vli_sub(p_product, p_product, curve_p); 00728 } 00729 vli_set(p_result, p_product); 00730 } 00731 00732 #elif (ECC_CURVE == secp256k1) 00733 00734 static void omega_mult(uint32_t *p_result, const uint32_t *p_right) 00735 { 00736 /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ 00737 uint64_t l_mult = 0x3D1; /* everything except 2^32 */ 00738 uint32_t l_carry = 0; 00739 uint i; 00740 for(i=0; i<NUM_ECC_DIGITS; ++i) 00741 { 00742 uint64_t p = l_mult * p_right[i] + l_carry; 00743 p_result[i] = (p & 0xffffffff); 00744 l_carry = p >> 32; 00745 } 00746 p_result[NUM_ECC_DIGITS] = l_carry; 00747 00748 p_result[1 + NUM_ECC_DIGITS] = vli_add(p_result + 1, p_result + 1, p_right); /* add the 2^32 multiple */ 00749 } 00750 00751 /* Computes p_result = p_product % curve_p */ 00752 static void vli_mmod_fast(uint32_t *p_result, const uint32_t *p_product) 00753 { 00754 uint32_t l_tmp[2*NUM_ECC_DIGITS]; 00755 00756 while(!vli_isZero(p_product + NUM_ECC_DIGITS)) /* While c1 != 0 */ 00757 { 00758 uint32_t l_carry = 0; 00759 uint i; 00760 00761 vli_clear(l_tmp); 00762 vli_clear(l_tmp + NUM_ECC_DIGITS); 00763 omega_mult(l_tmp, p_product + NUM_ECC_DIGITS); /* tmp = w * c1 */ 00764 vli_clear(p_product + NUM_ECC_DIGITS); /* p = c0 */ 00765 00766 /* (c1, c0) = c0 + w * c1 */ 00767 for(i=0; i<NUM_ECC_DIGITS+2; ++i) 00768 { 00769 uint32_t l_sum = p_product[i] + l_tmp[i] + l_carry; 00770 if(l_sum != p_product[i]) 00771 { 00772 l_carry = (l_sum < p_product[i]); 00773 } 00774 p_product[i] = l_sum; 00775 } 00776 } 00777 00778 while(vli_cmp(p_product, curve_p) > 0) 00779 { 00780 vli_sub(p_product, p_product, curve_p); 00781 } 00782 vli_set(p_result, p_product); 00783 } 00784 00785 #endif 00786 00787 /* Computes p_result = (p_left * p_right) % curve_p. */ 00788 static void vli_modMult_fast(uint32_t *p_result, const uint32_t *p_left, const uint32_t *p_right) 00789 { 00790 uint32_t l_product[2 * NUM_ECC_DIGITS]; 00791 vli_mult(l_product, p_left, p_right); 00792 vli_mmod_fast(p_result, l_product); 00793 } 00794 00795 #if ECC_SQUARE_FUNC 00796 00797 /* Computes p_result = p_left^2. */ 00798 static void vli_square(uint32_t *p_result, const uint32_t *p_left) 00799 { 00800 #if (ECC_ASM == ecc_asm_thumb2 || ECC_ASM == ecc_asm_arm) 00801 uint32_t c0 = 0; 00802 uint32_t c1 = 0; 00803 uint32_t c2 = 0; 00804 uint32_t k = 0; 00805 uint32_t i, tt; 00806 uint32_t t0, t1; 00807 00808 asm volatile ( 00809 ".syntax unified \n\t" 00810 00811 "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */ 00812 "movs %[i], #0 \n\t" /* i = 0 */ 00813 "b 3f \n\t" 00814 00815 "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */ 00816 "movs %[i], %[k] \n\t" /* i = k */ 00817 "subs %[i], %[eccdm1] \n\t" /* i = k - (NUM_ECC_DIGITS - 1) (times 4) */ 00818 00819 "3: \n\t" /* inner loop */ 00820 "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */ 00821 00822 "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = p_left[k-i] */ 00823 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = p_left[i] */ 00824 00825 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = p_left[i] * p_right[k-i] */ 00826 00827 "cmp %[i], %[tt] \n\t" /* (i < k-i) ? */ 00828 "bge 4f \n\t" /* if i >= k-i, skip */ 00829 "lsls %[t1], #1 \n\t" /* high word << 1 */ 00830 "adc %[c2], #0 \n\t" /* add carry bit to c2 */ 00831 "lsls %[t0], #1 \n\t" /* low word << 1 */ 00832 "adc %[t1], #0 \n\t" /* add carry bit to high word */ 00833 00834 "4: \n\t" 00835 00836 "adds %[c0], %[t0] \n\t" /* add low word to c0 */ 00837 "adcs %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 00838 "adc %[c2], #0 \n\t" /* add carry to c2 */ 00839 00840 "adds %[i], #4 \n\t" /* i += 4 */ 00841 "cmp %[i], %[k] \n\t" /* i <= k? */ 00842 "bge 5f \n\t" /* if not, exit the loop */ 00843 "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */ 00844 "cmp %[i], %[tt] \n\t" /* i <= k-i? */ 00845 "ble 3b \n\t" /* if so, continue looping */ 00846 00847 "5: \n\t" /* end inner loop */ 00848 00849 "str %[c0], [%[result], %[k]] \n\t" /* p_result[k] = c0 */ 00850 "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 00851 "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 00852 "movs %[c2], #0 \n\t" /* c2 = 0 */ 00853 "adds %[k], #4 \n\t" /* k += 4 */ 00854 "cmp %[k], %[eccd] \n\t" /* k < NUM_ECC_DIGITS (times 4) ? */ 00855 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 00856 "cmp %[k], %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */ 00857 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */ 00858 /* end outer loop */ 00859 00860 "str %[c0], [%[result], %[k]] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */ 00861 #if (ECC_ASM != ecc_asm_thumb2) 00862 ".syntax divided \n\t" 00863 #endif 00864 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1) 00865 : [result] "r" (p_result), [left] "r" (p_left), 00866 [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4) 00867 : "cc", "memory" 00868 ); 00869 00870 #elif (ECC_ASM == ecc_asm_thumb) 00871 00872 register uint32_t *r0 asm("r0") = p_result; 00873 register uint32_t *r1 asm("r1") = p_left; 00874 00875 asm volatile ( 00876 ".syntax unified \n\t" 00877 "movs r2, #0 \n\t" /* c0 = 0 */ 00878 "movs r3, #0 \n\t" /* c1 = 0 */ 00879 "movs r4, #0 \n\t" /* c2 = 0 */ 00880 "movs r5, #0 \n\t" /* k = 0 */ 00881 00882 "push {r0} \n\t" /* keep p_result on the stack */ 00883 00884 "1: \n\t" /* outer loop (k < NUM_ECC_DIGITS) */ 00885 "movs r6, #0 \n\t" /* r6 = i = 0 */ 00886 "b 3f \n\t" 00887 00888 "2: \n\t" /* outer loop (k >= NUM_ECC_DIGITS) */ 00889 "movs r6, r5 \n\t" /* r6 = k */ 00890 "subs r6, %[eccdm1] \n\t" /* r6 = i = k - (NUM_ECC_DIGITS - 1) (times 4) */ 00891 00892 "3: \n\t" /* inner loop */ 00893 "push {r2, r3, r4, r5} \n\t" /* push things, r2 (c0) is at the top of stack. */ 00894 "subs r7, r5, r6 \n\t" /* r7 = k-i */ 00895 00896 "ldr r3, [r1, r7] \n\t" /* r3 = p_left[k-i] */ 00897 "ldr r0, [r1, r6] \n\t" /* r0 = p_left[i] */ 00898 00899 "lsrs r2, r0, #16 \n\t" /* r2 = a1 */ 00900 "uxth r0, r0 \n\t" /* r0 = a0 */ 00901 00902 "lsrs r4, r3, #16 \n\t" /* r4 = b1 */ 00903 "uxth r3, r3 \n\t" /* r3 = b0 */ 00904 00905 "movs r5, r2 \n\t" /* r5 = a1 */ 00906 "muls r5, r4, r5 \n\t" /* r5 = a1*b1 */ 00907 "muls r2, r3, r2 \n\t" /* r2 = b0*a1 */ 00908 "muls r4, r0, r4 \n\t" /* r4 = a0*b1 */ 00909 "muls r0, r3, r0 \n\t" /* r0 = a0*b0 */ 00910 00911 "movs r3, #0 \n\t" /* r3 = 0 */ 00912 "adds r2, r4 \n\t" /* r2 = b0*a1 + a0*b1 */ 00913 "adcs r3, r3 \n\t" /* r3 = carry */ 00914 "lsls r3, #16 \n\t" /* r3 = carry << 16 */ 00915 "adds r5, r3 \n\t" /* r5 = a1*b1 + carry */ 00916 00917 "lsls r3, r2, #16 \n\t" /* r3 = (b0*a1 + a0*b1) << 16 */ 00918 "lsrs r2, #16 \n\t" /* r2 = (b0*a1 + a0*b1) >> 16 */ 00919 "adds r0, r3 \n\t" /* r0 = low word = a0*b0 + ((b0*a1 + a0*b1) << 16) */ 00920 "adcs r5, r2 \n\t" /* r5 = high word = a1*b1 + carry + ((b0*a1 + a0*b1) >> 16) */ 00921 00922 "movs r3, #0 \n\t" /* r3 = 0 */ 00923 "cmp r6, r7 \n\t" /* (i < k-i) ? */ 00924 "mov r7, r3 \n\t" /* r7 = 0 (does not affect condition)*/ 00925 "bge 4f \n\t" /* if i >= k-i, skip */ 00926 "lsls r5, #1 \n\t" /* high word << 1 */ 00927 "adcs r7, r3 \n\t" /* r7 = carry bit for c2 */ 00928 "lsls r0, #1 \n\t" /* low word << 1 */ 00929 "adcs r5, r3 \n\t" /* add carry from shift to high word */ 00930 00931 "4: \n\t" 00932 "pop {r2, r3, r4} \n\t" /* r2 = c0, r3 = c1, r4 = c2 */ 00933 "adds r2, r0 \n\t" /* add low word to c0 */ 00934 "adcs r3, r5 \n\t" /* add high word to c1, including carry */ 00935 "movs r0, #0 \n\t" /* r0 = 0 (does not affect carry bit) */ 00936 "adcs r4, r0 \n\t" /* add carry to c2 */ 00937 "adds r4, r7 \n\t" /* add carry from doubling (if any) */ 00938 00939 "pop {r5} \n\t" /* r5 = k */ 00940 00941 "adds r6, #4 \n\t" /* i += 4 */ 00942 "cmp r6, r5 \n\t" /* i <= k? */ 00943 "bge 5f \n\t" /* if not, exit the loop */ 00944 "subs r7, r5, r6 \n\t" /* r7 = k-i */ 00945 "cmp r6, r7 \n\t" /* i <= k-i? */ 00946 "ble 3b \n\t" /* if so, continue looping */ 00947 00948 "5: \n\t" /* end inner loop */ 00949 00950 "ldr r0, [sp, #0] \n\t" /* r0 = p_result */ 00951 00952 "str r2, [r0, r5] \n\t" /* p_result[k] = c0 */ 00953 "mov r2, r3 \n\t" /* c0 = c1 */ 00954 "mov r3, r4 \n\t" /* c1 = c2 */ 00955 "movs r4, #0 \n\t" /* c2 = 0 */ 00956 "adds r5, #4 \n\t" /* k += 4 */ 00957 "cmp r5, %[eccd] \n\t" /* k < NUM_ECC_DIGITS (times 4) ? */ 00958 "blt 1b \n\t" /* if not, loop back, start with i = 0 */ 00959 "cmp r5, %[eccd2m1] \n\t" /* k < NUM_ECC_DIGITS * 2 - 1 (times 4) ? */ 00960 "blt 2b \n\t" /* if not, loop back, start with i = (k+1) - NUM_ECC_DIGITS */ 00961 /* end outer loop */ 00962 00963 "str r2, [r0, r5] \n\t" /* p_result[NUM_ECC_DIGITS * 2 - 1] = c0 */ 00964 "pop {r0} \n\t" /* pop p_result off the stack */ 00965 00966 ".syntax divided \n\t" 00967 : [r0] "+l" (r0), [r1] "+l" (r1) 00968 : [eccd] "I" (NUM_ECC_DIGITS * 4), [eccdm1] "I" ((NUM_ECC_DIGITS-1) * 4), [eccd2m1] "I" ((NUM_ECC_DIGITS * 2 - 1) * 4) 00969 : "r2", "r3", "r4", "r5", "r6", "r7", "cc", "memory" 00970 ); 00971 00972 #else 00973 00974 uint64_t r01 = 0; 00975 uint32_t r2 = 0; 00976 00977 uint i, k; 00978 for(k=0; k < NUM_ECC_DIGITS*2 - 1; ++k) 00979 { 00980 uint l_min = (k < NUM_ECC_DIGITS ? 0 : (k + 1) - NUM_ECC_DIGITS); 00981 for(i=l_min; i<=k && i<=k-i; ++i) 00982 { 00983 uint64_t l_product = (uint64_t)p_left[i] * p_left[k-i]; 00984 if(i < k-i) 00985 { 00986 r2 += l_product >> 63; 00987 l_product *= 2; 00988 } 00989 r01 += l_product; 00990 r2 += (r01 < l_product); 00991 } 00992 p_result[k] = (uint32_t)r01; 00993 r01 = (r01 >> 32) | (((uint64_t)r2) << 32); 00994 r2 = 0; 00995 } 00996 00997 p_result[NUM_ECC_DIGITS*2 - 1] = (uint32_t)r01; 00998 #endif 00999 } 01000 01001 /* Computes p_result = p_left^2 % curve_p. */ 01002 static void vli_modSquare_fast(uint32_t *p_result, const uint32_t *p_left) 01003 { 01004 uint32_t l_product[2 * NUM_ECC_DIGITS]; 01005 vli_square(l_product, p_left); 01006 vli_mmod_fast(p_result, l_product); 01007 } 01008 01009 #else /* ECC_SQUARE_FUNC */ 01010 01011 #define vli_square(result, left, size) vli_mult((result), (left), (left), (size)) 01012 #define vli_modSquare_fast(result, left) vli_modMult_fast((result), (left), (left)) 01013 01014 #endif /* ECC_SQUARE_FUNC */ 01015 01016 #define EVEN(vli) (!(vli[0] & 1)) 01017 /* Computes p_result = (1 / p_input) % p_mod. All VLIs are the same size. 01018 See "From Euclid's GCD to Montgomery Multiplication to the Great Divide" 01019 https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf */ 01020 static void vli_modInv(uint32_t *p_result, const uint32_t *p_input, const uint32_t *p_mod) 01021 { 01022 uint32_t a[NUM_ECC_DIGITS], b[NUM_ECC_DIGITS], u[NUM_ECC_DIGITS], v[NUM_ECC_DIGITS]; 01023 uint32_t l_carry; 01024 01025 vli_set(a, p_input); 01026 vli_set(b, p_mod); 01027 vli_clear(u); 01028 u[0] = 1; 01029 vli_clear(v); 01030 01031 int l_cmpResult; 01032 while((l_cmpResult = vli_cmp(a, b)) != 0) 01033 { 01034 l_carry = 0; 01035 if(EVEN(a)) 01036 { 01037 vli_rshift1(a); 01038 if(!EVEN(u)) 01039 { 01040 l_carry = vli_add(u, u, p_mod); 01041 } 01042 vli_rshift1(u); 01043 if(l_carry) 01044 { 01045 u[NUM_ECC_DIGITS-1] |= 0x80000000; 01046 } 01047 } 01048 else if(EVEN(b)) 01049 { 01050 vli_rshift1(b); 01051 if(!EVEN(v)) 01052 { 01053 l_carry = vli_add(v, v, p_mod); 01054 } 01055 vli_rshift1(v); 01056 if(l_carry) 01057 { 01058 v[NUM_ECC_DIGITS-1] |= 0x80000000; 01059 } 01060 } 01061 else if(l_cmpResult > 0) 01062 { 01063 vli_sub(a, a, b); 01064 vli_rshift1(a); 01065 if(vli_cmp(u, v) < 0) 01066 { 01067 vli_add(u, u, p_mod); 01068 } 01069 vli_sub(u, u, v); 01070 if(!EVEN(u)) 01071 { 01072 l_carry = vli_add(u, u, p_mod); 01073 } 01074 vli_rshift1(u); 01075 if(l_carry) 01076 { 01077 u[NUM_ECC_DIGITS-1] |= 0x80000000; 01078 } 01079 } 01080 else 01081 { 01082 vli_sub(b, b, a); 01083 vli_rshift1(b); 01084 if(vli_cmp(v, u) < 0) 01085 { 01086 vli_add(v, v, p_mod); 01087 } 01088 vli_sub(v, v, u); 01089 if(!EVEN(v)) 01090 { 01091 l_carry = vli_add(v, v, p_mod); 01092 } 01093 vli_rshift1(v); 01094 if(l_carry) 01095 { 01096 v[NUM_ECC_DIGITS-1] |= 0x80000000; 01097 } 01098 } 01099 } 01100 01101 vli_set(p_result, u); 01102 } 01103 01104 /* ------ Point operations ------ */ 01105 01106 /* Returns 1 if p_point is the point at infinity, 0 otherwise. */ 01107 static int EccPoint_isZero(const EccPoint *p_point) 01108 { 01109 return (vli_isZero(p_point->x) && vli_isZero(p_point->y)); 01110 } 01111 01112 /* Point multiplication algorithm using Montgomery's ladder with co-Z coordinates. 01113 From http://eprint.iacr.org/2011/338.pdf 01114 */ 01115 01116 /* Double in place */ 01117 #if (ECC_CURVE == secp256k1) 01118 static void EccPoint_double_jacobian(uint32_t *X1, uint32_t *Y1, uint32_t *Z1) 01119 { 01120 /* t1 = X, t2 = Y, t3 = Z */ 01121 uint32_t t4[NUM_ECC_DIGITS]; 01122 uint32_t t5[NUM_ECC_DIGITS]; 01123 01124 if(vli_isZero(Z1)) 01125 { 01126 return; 01127 } 01128 01129 vli_modSquare_fast(t5, Y1); /* t5 = y1^2 */ 01130 vli_modMult_fast(t4, X1, t5); /* t4 = x1*y1^2 = A */ 01131 vli_modSquare_fast(X1, X1); /* t1 = x1^2 */ 01132 vli_modSquare_fast(t5, t5); /* t5 = y1^4 */ 01133 vli_modMult_fast(Z1, Y1, Z1); /* t3 = y1*z1 = z3 */ 01134 01135 vli_modAdd(Y1, X1, X1, curve_p); /* t2 = 2*x1^2 */ 01136 vli_modAdd(Y1, Y1, X1, curve_p); /* t2 = 3*x1^2 */ 01137 if(vli_testBit(Y1, 0)) 01138 { 01139 uint32_t l_carry = vli_add(Y1, Y1, curve_p); 01140 vli_rshift1(Y1); 01141 Y1[NUM_ECC_DIGITS-1] |= l_carry << 31; 01142 } 01143 else 01144 { 01145 vli_rshift1(Y1); 01146 } 01147 /* t2 = 3/2*(x1^2) = B */ 01148 01149 vli_modSquare_fast(X1, Y1); /* t1 = B^2 */ 01150 vli_modSub(X1, X1, t4, curve_p); /* t1 = B^2 - A */ 01151 vli_modSub(X1, X1, t4, curve_p); /* t1 = B^2 - 2A = x3 */ 01152 01153 vli_modSub(t4, t4, X1, curve_p); /* t4 = A - x3 */ 01154 vli_modMult_fast(Y1, Y1, t4); /* t2 = B * (A - x3) */ 01155 vli_modSub(Y1, Y1, t5, curve_p); /* t2 = B * (A - x3) - y1^4 = y3 */ 01156 } 01157 #else 01158 static void EccPoint_double_jacobian(uint32_t *X1, uint32_t *Y1, uint32_t *Z1) 01159 { 01160 /* t1 = X, t2 = Y, t3 = Z */ 01161 uint32_t t4[NUM_ECC_DIGITS]; 01162 uint32_t t5[NUM_ECC_DIGITS]; 01163 01164 if(vli_isZero(Z1)) 01165 { 01166 return; 01167 } 01168 01169 vli_modSquare_fast(t4, Y1); /* t4 = y1^2 */ 01170 vli_modMult_fast(t5, X1, t4); /* t5 = x1*y1^2 = A */ 01171 vli_modSquare_fast(t4, t4); /* t4 = y1^4 */ 01172 vli_modMult_fast(Y1, Y1, Z1); /* t2 = y1*z1 = z3 */ 01173 vli_modSquare_fast(Z1, Z1); /* t3 = z1^2 */ 01174 01175 vli_modAdd(X1, X1, Z1, curve_p); /* t1 = x1 + z1^2 */ 01176 vli_modAdd(Z1, Z1, Z1, curve_p); /* t3 = 2*z1^2 */ 01177 vli_modSub(Z1, X1, Z1, curve_p); /* t3 = x1 - z1^2 */ 01178 vli_modMult_fast(X1, X1, Z1); /* t1 = x1^2 - z1^4 */ 01179 01180 vli_modAdd(Z1, X1, X1, curve_p); /* t3 = 2*(x1^2 - z1^4) */ 01181 vli_modAdd(X1, X1, Z1, curve_p); /* t1 = 3*(x1^2 - z1^4) */ 01182 if(vli_testBit(X1, 0)) 01183 { 01184 uint32_t l_carry = vli_add(X1, X1, curve_p); 01185 vli_rshift1(X1); 01186 X1[NUM_ECC_DIGITS-1] |= l_carry << 31; 01187 } 01188 else 01189 { 01190 vli_rshift1(X1); 01191 } 01192 /* t1 = 3/2*(x1^2 - z1^4) = B */ 01193 01194 vli_modSquare_fast(Z1, X1); /* t3 = B^2 */ 01195 vli_modSub(Z1, Z1, t5, curve_p); /* t3 = B^2 - A */ 01196 vli_modSub(Z1, Z1, t5, curve_p); /* t3 = B^2 - 2A = x3 */ 01197 vli_modSub(t5, t5, Z1, curve_p); /* t5 = A - x3 */ 01198 vli_modMult_fast(X1, X1, t5); /* t1 = B * (A - x3) */ 01199 vli_modSub(t4, X1, t4, curve_p); /* t4 = B * (A - x3) - y1^4 = y3 */ 01200 01201 vli_set(X1, Z1); 01202 vli_set(Z1, Y1); 01203 vli_set(Y1, t4); 01204 } 01205 #endif 01206 01207 /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ 01208 static void apply_z(uint32_t *X1, uint32_t *Y1, uint32_t *Z) 01209 { 01210 uint32_t t1[NUM_ECC_DIGITS]; 01211 01212 vli_modSquare_fast(t1, Z); /* z^2 */ 01213 vli_modMult_fast(X1, X1, t1); /* x1 * z^2 */ 01214 vli_modMult_fast(t1, t1, Z); /* z^3 */ 01215 vli_modMult_fast(Y1, Y1, t1); /* y1 * z^3 */ 01216 } 01217 01218 /* P = (x1, y1) => 2P, (x2, y2) => P' */ 01219 static void XYcZ_initial_double(uint32_t *X1, uint32_t *Y1, uint32_t *X2, uint32_t *Y2, const uint32_t *p_initialZ) 01220 { 01221 uint32_t z[NUM_ECC_DIGITS]; 01222 01223 vli_set(X2, X1); 01224 vli_set(Y2, Y1); 01225 01226 vli_clear(z); 01227 z[0] = 1; 01228 if(p_initialZ) 01229 { 01230 vli_set(z, p_initialZ); 01231 } 01232 apply_z(X1, Y1, z); 01233 01234 EccPoint_double_jacobian(X1, Y1, z); 01235 01236 apply_z(X2, Y2, z); 01237 } 01238 01239 /* Input P = (x1, y1, Z), Q = (x2, y2, Z) 01240 Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) 01241 or P => P', Q => P + Q 01242 */ 01243 static void XYcZ_add(uint32_t *X1, uint32_t *Y1, uint32_t *X2, uint32_t *Y2) 01244 { 01245 /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ 01246 uint32_t t5[NUM_ECC_DIGITS]; 01247 01248 vli_modSub(t5, X2, X1, curve_p); /* t5 = x2 - x1 */ 01249 vli_modSquare_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */ 01250 vli_modMult_fast(X1, X1, t5); /* t1 = x1*A = B */ 01251 vli_modMult_fast(X2, X2, t5); /* t3 = x2*A = C */ 01252 vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y2 - y1 */ 01253 vli_modSquare_fast(t5, Y2); /* t5 = (y2 - y1)^2 = D */ 01254 01255 vli_modSub(t5, t5, X1, curve_p); /* t5 = D - B */ 01256 vli_modSub(t5, t5, X2, curve_p); /* t5 = D - B - C = x3 */ 01257 vli_modSub(X2, X2, X1, curve_p); /* t3 = C - B */ 01258 vli_modMult_fast(Y1, Y1, X2); /* t2 = y1*(C - B) */ 01259 vli_modSub(X2, X1, t5, curve_p); /* t3 = B - x3 */ 01260 vli_modMult_fast(Y2, Y2, X2); /* t4 = (y2 - y1)*(B - x3) */ 01261 vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y3 */ 01262 01263 vli_set(X2, t5); 01264 } 01265 01266 /* Input P = (x1, y1, Z), Q = (x2, y2, Z) 01267 Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) 01268 or P => P - Q, Q => P + Q 01269 */ 01270 static void XYcZ_addC(uint32_t *X1, uint32_t *Y1, uint32_t *X2, uint32_t *Y2) 01271 { 01272 /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ 01273 uint32_t t5[NUM_ECC_DIGITS]; 01274 uint32_t t6[NUM_ECC_DIGITS]; 01275 uint32_t t7[NUM_ECC_DIGITS]; 01276 01277 vli_modSub(t5, X2, X1, curve_p); /* t5 = x2 - x1 */ 01278 vli_modSquare_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */ 01279 vli_modMult_fast(X1, X1, t5); /* t1 = x1*A = B */ 01280 vli_modMult_fast(X2, X2, t5); /* t3 = x2*A = C */ 01281 vli_modAdd(t5, Y2, Y1, curve_p); /* t4 = y2 + y1 */ 01282 vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y2 - y1 */ 01283 01284 vli_modSub(t6, X2, X1, curve_p); /* t6 = C - B */ 01285 vli_modMult_fast(Y1, Y1, t6); /* t2 = y1 * (C - B) */ 01286 vli_modAdd(t6, X1, X2, curve_p); /* t6 = B + C */ 01287 vli_modSquare_fast(X2, Y2); /* t3 = (y2 - y1)^2 */ 01288 vli_modSub(X2, X2, t6, curve_p); /* t3 = x3 */ 01289 01290 vli_modSub(t7, X1, X2, curve_p); /* t7 = B - x3 */ 01291 vli_modMult_fast(Y2, Y2, t7); /* t4 = (y2 - y1)*(B - x3) */ 01292 vli_modSub(Y2, Y2, Y1, curve_p); /* t4 = y3 */ 01293 01294 vli_modSquare_fast(t7, t5); /* t7 = (y2 + y1)^2 = F */ 01295 vli_modSub(t7, t7, t6, curve_p); /* t7 = x3' */ 01296 vli_modSub(t6, t7, X1, curve_p); /* t6 = x3' - B */ 01297 vli_modMult_fast(t6, t6, t5); /* t6 = (y2 + y1)*(x3' - B) */ 01298 vli_modSub(Y1, t6, Y1, curve_p); /* t2 = y3' */ 01299 01300 vli_set(X1, t7); 01301 } 01302 01303 static void EccPoint_mult(EccPoint *p_result, const EccPoint *p_point, 01304 const uint32_t *p_scalar, const uint32_t *p_initialZ) 01305 { 01306 /* R0 and R1 */ 01307 uint32_t Rx[2][NUM_ECC_DIGITS]; 01308 uint32_t Ry[2][NUM_ECC_DIGITS]; 01309 uint32_t z[NUM_ECC_DIGITS]; 01310 01311 int i, nb; 01312 01313 vli_set(Rx[1], p_point->x); 01314 vli_set(Ry[1], p_point->y); 01315 01316 XYcZ_initial_double(Rx[1], Ry[1], Rx[0], Ry[0], p_initialZ); 01317 01318 for(i = vli_numBits(p_scalar) - 2; i > 0; --i) 01319 { 01320 nb = !vli_testBit(p_scalar, i); 01321 XYcZ_addC(Rx[1-nb], Ry[1-nb], Rx[nb], Ry[nb]); 01322 XYcZ_add(Rx[nb], Ry[nb], Rx[1-nb], Ry[1-nb]); 01323 } 01324 01325 nb = !vli_testBit(p_scalar, 0); 01326 XYcZ_addC(Rx[1-nb], Ry[1-nb], Rx[nb], Ry[nb]); 01327 01328 /* Find final 1/Z value. */ 01329 vli_modSub(z, Rx[1], Rx[0], curve_p); /* X1 - X0 */ 01330 vli_modMult_fast(z, z, Ry[1-nb]); /* Yb * (X1 - X0) */ 01331 vli_modMult_fast(z, z, p_point->x); /* xP * Yb * (X1 - X0) */ 01332 vli_modInv(z, z, curve_p); /* 1 / (xP * Yb * (X1 - X0)) */ 01333 vli_modMult_fast(z, z, p_point->y); /* yP / (xP * Yb * (X1 - X0)) */ 01334 vli_modMult_fast(z, z, Rx[1-nb]); /* Xb * yP / (xP * Yb * (X1 - X0)) */ 01335 /* End 1/Z calculation */ 01336 01337 XYcZ_add(Rx[nb], Ry[nb], Rx[1-nb], Ry[1-nb]); 01338 01339 apply_z(Rx[0], Ry[0], z); 01340 01341 vli_set(p_result->x, Rx[0]); 01342 vli_set(p_result->y, Ry[0]); 01343 } 01344 01345 int ecc_make_key(EccPoint *p_publicKey, uint32_t p_privateKey[NUM_ECC_DIGITS], 01346 const uint32_t p_random[NUM_ECC_DIGITS]) 01347 { 01348 /* Make sure the private key is in the range [1, n-1]. 01349 For the supported curves, n is always large enough that we only need to subtract once at most. */ 01350 vli_set(p_privateKey, p_random); 01351 if(vli_cmp(curve_n, p_privateKey) != 1) 01352 { 01353 vli_sub(p_privateKey, p_privateKey, curve_n); 01354 } 01355 01356 if(vli_isZero(p_privateKey)) 01357 { 01358 return 0; /* The private key cannot be 0 (mod p). */ 01359 } 01360 01361 EccPoint_mult(p_publicKey, &curve_G, p_privateKey, NULL); 01362 return 1; 01363 } 01364 01365 #if (ECC_CURVE == secp256k1) 01366 /* Compute p_result = x^3 + b */ 01367 static void curve_x_side(uint32_t p_result[NUM_ECC_DIGITS], const uint32_t x[NUM_ECC_DIGITS]) 01368 { 01369 vli_modSquare_fast(p_result, x); /* r = x^2 */ 01370 vli_modMult_fast(p_result, p_result, x); /* r = x^3 */ 01371 vli_modAdd(p_result, p_result, curve_b, curve_p); /* r = x^3 + b */ 01372 } 01373 #else 01374 /* Compute p_result = x^3 - 3x + b */ 01375 static void curve_x_side(uint32_t p_result[NUM_ECC_DIGITS], const uint32_t x[NUM_ECC_DIGITS]) 01376 { 01377 uint32_t _3[NUM_ECC_DIGITS] = {3}; /* -a = 3 */ 01378 01379 vli_modSquare_fast(p_result, x); /* r = x^2 */ 01380 vli_modSub(p_result, p_result, _3, curve_p); /* r = x^2 - 3 */ 01381 vli_modMult_fast(p_result, p_result, x); /* r = x^3 - 3x */ 01382 vli_modAdd(p_result, p_result, curve_b, curve_p); /* r = x^3 - 3x + b */ 01383 } 01384 #endif 01385 01386 int ecc_valid_public_key(const EccPoint *p_publicKey) 01387 { 01388 uint32_t l_tmp1[NUM_ECC_DIGITS]; 01389 uint32_t l_tmp2[NUM_ECC_DIGITS]; 01390 01391 if(EccPoint_isZero(p_publicKey)) 01392 { 01393 return 0; 01394 } 01395 01396 if(vli_cmp(curve_p, p_publicKey->x) != 1 || vli_cmp(curve_p, p_publicKey->y) != 1) 01397 { 01398 return 0; 01399 } 01400 01401 vli_modSquare_fast(l_tmp1, p_publicKey->y); /* tmp1 = y^2 */ 01402 01403 curve_x_side(l_tmp2, p_publicKey->x); /* tmp2 = x^3 - 3x + b */ 01404 01405 /* Make sure that y^2 == x^3 + ax + b */ 01406 if(vli_cmp(l_tmp1, l_tmp2) != 0) 01407 { 01408 return 0; 01409 } 01410 01411 return 1; 01412 } 01413 01414 int ecdh_shared_secret(uint32_t p_secret[NUM_ECC_DIGITS], const EccPoint *p_publicKey, 01415 const uint32_t p_privateKey[NUM_ECC_DIGITS], const uint32_t p_random[NUM_ECC_DIGITS]) 01416 { 01417 EccPoint l_product; 01418 01419 EccPoint_mult(&l_product, p_publicKey, p_privateKey, p_random); 01420 if(EccPoint_isZero(&l_product)) 01421 { 01422 return 0; 01423 } 01424 01425 vli_set(p_secret, l_product.x); 01426 01427 return 1; 01428 } 01429 01430 /* -------- ECDSA code -------- */ 01431 01432 /* Computes p_result = (p_left * p_right) % p_mod. */ 01433 static void vli_modMult(uint32_t *p_result, const uint32_t *p_left, 01434 const uint32_t *p_right, const uint32_t *p_mod) 01435 { 01436 uint32_t l_product[2 * NUM_ECC_DIGITS]; 01437 uint32_t l_modMultiple[2 * NUM_ECC_DIGITS]; 01438 uint l_digitShift, l_bitShift; 01439 uint l_productBits; 01440 uint l_modBits = vli_numBits(p_mod); 01441 01442 vli_mult(l_product, p_left, p_right); 01443 l_productBits = vli_numBits(l_product + NUM_ECC_DIGITS); 01444 if(l_productBits) 01445 { 01446 l_productBits += NUM_ECC_DIGITS * 32; 01447 } 01448 else 01449 { 01450 l_productBits = vli_numBits(l_product); 01451 } 01452 01453 if(l_productBits < l_modBits) 01454 { /* l_product < p_mod. */ 01455 vli_set(p_result, l_product); 01456 return; 01457 } 01458 01459 /* Shift p_mod by (l_leftBits - l_modBits). This multiplies p_mod by the largest 01460 power of two possible while still resulting in a number less than p_left. */ 01461 vli_clear(l_modMultiple); 01462 vli_clear(l_modMultiple + NUM_ECC_DIGITS); 01463 l_digitShift = (l_productBits - l_modBits) / 32; 01464 l_bitShift = (l_productBits - l_modBits) % 32; 01465 if(l_bitShift) 01466 { 01467 l_modMultiple[l_digitShift + NUM_ECC_DIGITS] = vli_lshift(l_modMultiple + l_digitShift, p_mod, l_bitShift); 01468 } 01469 else 01470 { 01471 vli_set(l_modMultiple + l_digitShift, p_mod); 01472 } 01473 01474 /* Subtract all multiples of p_mod to get the remainder. */ 01475 vli_clear(p_result); 01476 p_result[0] = 1; /* Use p_result as a temp var to store 1 (for subtraction) */ 01477 while(l_productBits > NUM_ECC_DIGITS * 32 || vli_cmp(l_modMultiple, p_mod) >= 0) 01478 { 01479 int l_cmp = vli_cmp(l_modMultiple + NUM_ECC_DIGITS, l_product + NUM_ECC_DIGITS); 01480 if(l_cmp < 0 || (l_cmp == 0 && vli_cmp(l_modMultiple, l_product) <= 0)) 01481 { 01482 if(vli_sub(l_product, l_product, l_modMultiple)) 01483 { /* borrow */ 01484 vli_sub(l_product + NUM_ECC_DIGITS, l_product + NUM_ECC_DIGITS, p_result); 01485 } 01486 vli_sub(l_product + NUM_ECC_DIGITS, l_product + NUM_ECC_DIGITS, l_modMultiple + NUM_ECC_DIGITS); 01487 } 01488 uint32_t l_carry = (l_modMultiple[NUM_ECC_DIGITS] & 0x01) << 31; 01489 vli_rshift1(l_modMultiple + NUM_ECC_DIGITS); 01490 vli_rshift1(l_modMultiple); 01491 l_modMultiple[NUM_ECC_DIGITS-1] |= l_carry; 01492 01493 --l_productBits; 01494 } 01495 vli_set(p_result, l_product); 01496 } 01497 01498 static inline unsigned int max(const unsigned int a, const unsigned int b) 01499 { 01500 return (a > b ? a : b); 01501 } 01502 01503 int ecdsa_sign(uint32_t r[NUM_ECC_DIGITS], uint32_t s[NUM_ECC_DIGITS], 01504 const uint32_t p_privateKey[NUM_ECC_DIGITS], const uint32_t p_random[NUM_ECC_DIGITS], 01505 const uint32_t p_hash[NUM_ECC_DIGITS]) 01506 { 01507 uint32_t k[NUM_ECC_DIGITS]; 01508 EccPoint p; 01509 01510 if(vli_isZero(p_random)) 01511 { /* The random number must not be 0. */ 01512 return 0; 01513 } 01514 01515 vli_set(k, p_random); 01516 if(vli_cmp(curve_n, k) != 1) 01517 { 01518 vli_sub(k, k, curve_n); 01519 } 01520 01521 /* tmp = k * G */ 01522 EccPoint_mult(&p, &curve_G, k, NULL); 01523 01524 /* r = x1 (mod n) */ 01525 vli_set(r, p.x); 01526 if(vli_cmp(curve_n, r) != 1) 01527 { 01528 vli_sub(r, r, curve_n); 01529 } 01530 if(vli_isZero(r)) 01531 { /* If r == 0, fail (need a different random number). */ 01532 return 0; 01533 } 01534 01535 vli_modMult(s, r, p_privateKey, curve_n); /* s = r*d */ 01536 vli_modAdd(s, p_hash, s, curve_n); /* s = e + r*d */ 01537 vli_modInv(k, k, curve_n); /* k = 1 / k */ 01538 vli_modMult(s, s, k, curve_n); /* s = (e + r*d) / k */ 01539 01540 return 1; 01541 } 01542 01543 int ecdsa_verify(const EccPoint *p_publicKey, const uint32_t p_hash[NUM_ECC_DIGITS], 01544 const uint32_t r[NUM_ECC_DIGITS], const uint32_t s[NUM_ECC_DIGITS]) 01545 { 01546 uint32_t u1[NUM_ECC_DIGITS], u2[NUM_ECC_DIGITS]; 01547 uint32_t z[NUM_ECC_DIGITS]; 01548 EccPoint l_sum; 01549 uint32_t rx[NUM_ECC_DIGITS]; 01550 uint32_t ry[NUM_ECC_DIGITS]; 01551 uint32_t tx[NUM_ECC_DIGITS]; 01552 uint32_t ty[NUM_ECC_DIGITS]; 01553 uint32_t tz[NUM_ECC_DIGITS]; 01554 01555 if(vli_isZero(r) || vli_isZero(s)) 01556 { /* r, s must not be 0. */ 01557 return 0; 01558 } 01559 01560 if(vli_cmp(curve_n, r) != 1 || vli_cmp(curve_n, s) != 1) 01561 { /* r, s must be < n. */ 01562 return 0; 01563 } 01564 01565 /* Calculate u1 and u2. */ 01566 vli_modInv(z, s, curve_n); /* Z = s^-1 */ 01567 vli_modMult(u1, p_hash, z, curve_n); /* u1 = e/s */ 01568 vli_modMult(u2, r, z, curve_n); /* u2 = r/s */ 01569 01570 /* Calculate l_sum = G + Q. */ 01571 vli_set(l_sum.x, p_publicKey->x); 01572 vli_set(l_sum.y, p_publicKey->y); 01573 vli_set(tx, curve_G.x); 01574 vli_set(ty, curve_G.y); 01575 vli_modSub(z, l_sum.x, tx, curve_p); /* Z = x2 - x1 */ 01576 XYcZ_add(tx, ty, l_sum.x, l_sum.y); 01577 vli_modInv(z, z, curve_p); /* Z = 1/Z */ 01578 apply_z(l_sum.x, l_sum.y, z); 01579 01580 /* Use Shamir's trick to calculate u1*G + u2*Q */ 01581 const EccPoint *l_points[4] = {NULL, &curve_G, p_publicKey, &l_sum}; 01582 uint l_numBits = max(vli_numBits(u1), vli_numBits(u2)); 01583 01584 const EccPoint *l_point = l_points[(!!vli_testBit(u1, l_numBits-1)) | ((!!vli_testBit(u2, l_numBits-1)) << 1)]; 01585 vli_set(rx, l_point->x); 01586 vli_set(ry, l_point->y); 01587 vli_clear(z); 01588 z[0] = 1; 01589 01590 int i; 01591 for(i = l_numBits - 2; i >= 0; --i) 01592 { 01593 EccPoint_double_jacobian(rx, ry, z); 01594 01595 int l_index = (!!vli_testBit(u1, i)) | ((!!vli_testBit(u2, i)) << 1); 01596 l_point = l_points[l_index]; 01597 if(l_point) 01598 { 01599 vli_set(tx, l_point->x); 01600 vli_set(ty, l_point->y); 01601 apply_z(tx, ty, z); 01602 vli_modSub(tz, rx, tx, curve_p); /* Z = x2 - x1 */ 01603 XYcZ_add(tx, ty, rx, ry); 01604 vli_modMult_fast(z, z, tz); 01605 } 01606 } 01607 01608 vli_modInv(z, z, curve_p); /* Z = 1/Z */ 01609 apply_z(rx, ry, z); 01610 01611 /* v = x1 (mod n) */ 01612 if(vli_cmp(curve_n, rx) != 1) 01613 { 01614 vli_sub(rx, rx, curve_n); 01615 } 01616 01617 /* Accept only if v == r. */ 01618 return (vli_cmp(rx, r) == 0); 01619 } 01620 01621 void ecc_bytes2native(uint32_t p_native[NUM_ECC_DIGITS], const uint8_t p_bytes[NUM_ECC_DIGITS*4]) 01622 { 01623 unsigned i; 01624 for(i=0; i<NUM_ECC_DIGITS; ++i) 01625 { 01626 uint8_t *p_digit = (uint8_t *)(p_bytes + 4 * (NUM_ECC_DIGITS - 1 - i)); 01627 p_native[i] = ((uint32_t)p_digit[0] << 24) | ((uint32_t)p_digit[1] << 16) | ((uint32_t)p_digit[2] << 8) | (uint32_t)p_digit[3]; 01628 } 01629 } 01630 01631 void ecc_native2bytes(uint8_t p_bytes[NUM_ECC_DIGITS*4], const uint32_t p_native[NUM_ECC_DIGITS]) 01632 { 01633 unsigned i; 01634 for(i=0; i<NUM_ECC_DIGITS; ++i) 01635 { 01636 uint8_t *p_digit = p_bytes + 4 * (NUM_ECC_DIGITS - 1 - i); 01637 p_digit[0] = p_native[i] >> 24; 01638 p_digit[1] = p_native[i] >> 16; 01639 p_digit[2] = p_native[i] >> 8; 01640 p_digit[3] = p_native[i]; 01641 } 01642 } 01643 01644 /* Compute a = sqrt(a) (mod curve_p). */ 01645 static void mod_sqrt(uint32_t a[NUM_ECC_DIGITS]) 01646 { 01647 unsigned i; 01648 uint32_t p1[NUM_ECC_DIGITS] = {1}; 01649 uint32_t l_result[NUM_ECC_DIGITS] = {1}; 01650 01651 /* Since curve_p == 3 (mod 4) for all supported curves, we can 01652 compute sqrt(a) = a^((curve_p + 1) / 4) (mod curve_p). */ 01653 vli_add(p1, curve_p, p1); /* p1 = curve_p + 1 */ 01654 for(i = vli_numBits(p1) - 1; i > 1; --i) 01655 { 01656 vli_modSquare_fast(l_result, l_result); 01657 if(vli_testBit(p1, i)) 01658 { 01659 vli_modMult_fast(l_result, l_result, a); 01660 } 01661 } 01662 vli_set(a, l_result); 01663 } 01664 01665 void ecc_point_compress(uint8_t p_compressed[NUM_ECC_DIGITS*4 + 1], const EccPoint *p_point) 01666 { 01667 p_compressed[0] = 2 + (p_point->y[0] & 0x01); 01668 ecc_native2bytes(p_compressed + 1, p_point->x); 01669 } 01670 01671 void ecc_point_decompress(EccPoint *p_point, const uint8_t p_compressed[NUM_ECC_DIGITS*4 + 1]) 01672 { 01673 ecc_bytes2native(p_point->x, p_compressed + 1); 01674 curve_x_side(p_point->y, p_point->x); 01675 mod_sqrt(p_point->y); 01676 if((p_point->y[0] & 0x01) != (p_compressed[0] & 0x01)) 01677 { 01678 vli_sub(p_point->y, curve_p, p_point->y); 01679 } 01680 } 01681
Generated on Tue Jul 19 2022 15:58:40 by
1.7.2