micro-ECC for mbed, ported from GCC version from Github,
Dependents: mbed_microECC Wallet_v1
asm_arm.h
00001 /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 00002 00003 #ifndef _UECC_ASM_ARM_H_ 00004 #define _UECC_ASM_ARM_H_ 00005 00006 #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) 00007 #define uECC_MIN_WORDS 8 00008 #endif 00009 #if uECC_SUPPORTS_secp224r1 00010 #undef uECC_MIN_WORDS 00011 #define uECC_MIN_WORDS 7 00012 #endif 00013 #if uECC_SUPPORTS_secp192r1 00014 #undef uECC_MIN_WORDS 00015 #define uECC_MIN_WORDS 6 00016 #endif 00017 #if uECC_SUPPORTS_secp160r1 00018 #undef uECC_MIN_WORDS 00019 #define uECC_MIN_WORDS 5 00020 #endif 00021 00022 #if (uECC_PLATFORM == uECC_arm_thumb) 00023 #define REG_RW "+l" 00024 #define REG_WRITE "=l" 00025 #else 00026 #define REG_RW "+r" 00027 #define REG_WRITE "=r" 00028 #endif 00029 00030 #if (uECC_PLATFORM == uECC_arm_thumb || uECC_PLATFORM == uECC_arm_thumb2) 00031 #define REG_RW_LO "+l" 00032 #define REG_WRITE_LO "=l" 00033 #else 00034 #define REG_RW_LO "+r" 00035 #define REG_WRITE_LO "=r" 00036 #endif 00037 00038 #if (uECC_PLATFORM == uECC_arm_thumb2) 00039 #define RESUME_SYNTAX 00040 #else 00041 #define RESUME_SYNTAX ".syntax divided \n\t" 00042 #endif 00043 00044 #if (uECC_OPTIMIZATION_LEVEL >= 2) 00045 00046 uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, 00047 const uECC_word_t *left, 00048 const uECC_word_t *right, 00049 wordcount_t num_words) { 00050 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00051 #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) 00052 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1; 00053 #else /* ARM */ 00054 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4; 00055 #endif 00056 #endif 00057 uint32_t carry; 00058 uint32_t left_word; 00059 uint32_t right_word; 00060 00061 __asm__ volatile ( 00062 ".syntax unified \n\t" 00063 "movs %[carry], #0 \n\t" 00064 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00065 "adr %[left], 1f \n\t" 00066 ".align 4 \n\t" 00067 "adds %[jump], %[left] \n\t" 00068 #endif 00069 00070 "ldmia %[lptr]!, {%[left]} \n\t" 00071 "ldmia %[rptr]!, {%[right]} \n\t" 00072 "adds %[left], %[right] \n\t" 00073 "stmia %[dptr]!, {%[left]} \n\t" 00074 00075 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00076 "bx %[jump] \n\t" 00077 #endif 00078 "1: \n\t" 00079 REPEAT(DEC(uECC_MAX_WORDS), 00080 "ldmia %[lptr]!, {%[left]} \n\t" 00081 "ldmia %[rptr]!, {%[right]} \n\t" 00082 "adcs %[left], %[right] \n\t" 00083 "stmia %[dptr]!, {%[left]} \n\t") 00084 00085 "adcs %[carry], %[carry] \n\t" 00086 RESUME_SYNTAX 00087 : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right), 00088 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00089 [jump] REG_RW_LO (jump), 00090 #endif 00091 [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word), 00092 [right] REG_WRITE_LO (right_word) 00093 : 00094 : "cc", "memory" 00095 ); 00096 return carry; 00097 } 00098 #define asm_add 1 00099 00100 uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, 00101 const uECC_word_t *left, 00102 const uECC_word_t *right, 00103 wordcount_t num_words) { 00104 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00105 #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) 00106 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1; 00107 #else /* ARM */ 00108 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4; 00109 #endif 00110 #endif 00111 uint32_t carry; 00112 uint32_t left_word; 00113 uint32_t right_word; 00114 00115 __asm__ volatile ( 00116 ".syntax unified \n\t" 00117 "movs %[carry], #0 \n\t" 00118 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00119 "adr %[left], 1f \n\t" 00120 ".align 4 \n\t" 00121 "adds %[jump], %[left] \n\t" 00122 #endif 00123 00124 "ldmia %[lptr]!, {%[left]} \n\t" 00125 "ldmia %[rptr]!, {%[right]} \n\t" 00126 "subs %[left], %[right] \n\t" 00127 "stmia %[dptr]!, {%[left]} \n\t" 00128 00129 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00130 "bx %[jump] \n\t" 00131 #endif 00132 "1: \n\t" 00133 REPEAT(DEC(uECC_MAX_WORDS), 00134 "ldmia %[lptr]!, {%[left]} \n\t" 00135 "ldmia %[rptr]!, {%[right]} \n\t" 00136 "sbcs %[left], %[right] \n\t" 00137 "stmia %[dptr]!, {%[left]} \n\t") 00138 00139 "adcs %[carry], %[carry] \n\t" 00140 RESUME_SYNTAX 00141 : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right), 00142 #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 00143 [jump] REG_RW_LO (jump), 00144 #endif 00145 [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word), 00146 [right] REG_WRITE_LO (right_word) 00147 : 00148 : "cc", "memory" 00149 ); 00150 return !carry; /* Note that on ARM, carry flag set means "no borrow" when subtracting 00151 (for some reason...) */ 00152 } 00153 #define asm_sub 1 00154 00155 #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */ 00156 00157 #if (uECC_OPTIMIZATION_LEVEL >= 3) 00158 00159 #if (uECC_PLATFORM != uECC_arm_thumb) 00160 00161 #if uECC_ARM_USE_UMAAL 00162 #include "asm_arm_mult_square_umaal.inc" 00163 #else 00164 #include "asm_arm_mult_square.inc" 00165 #endif 00166 00167 #if (uECC_OPTIMIZATION_LEVEL == 3) 00168 00169 uECC_VLI_API void uECC_vli_mult(uint32_t *result, 00170 const uint32_t *left, 00171 const uint32_t *right, 00172 wordcount_t num_words) { 00173 register uint32_t *r0 __asm__("r0") = result; 00174 register const uint32_t *r1 __asm__("r1") = left; 00175 register const uint32_t *r2 __asm__("r2") = right; 00176 register uint32_t r3 __asm__("r3") = num_words; 00177 00178 __asm__ volatile ( 00179 ".syntax unified \n\t" 00180 #if (uECC_MIN_WORDS == 5) 00181 FAST_MULT_ASM_5 00182 #if (uECC_MAX_WORDS > 5) 00183 FAST_MULT_ASM_5_TO_6 00184 #endif 00185 #if (uECC_MAX_WORDS > 6) 00186 FAST_MULT_ASM_6_TO_7 00187 #endif 00188 #if (uECC_MAX_WORDS > 7) 00189 FAST_MULT_ASM_7_TO_8 00190 #endif 00191 #elif (uECC_MIN_WORDS == 6) 00192 FAST_MULT_ASM_6 00193 #if (uECC_MAX_WORDS > 6) 00194 FAST_MULT_ASM_6_TO_7 00195 #endif 00196 #if (uECC_MAX_WORDS > 7) 00197 FAST_MULT_ASM_7_TO_8 00198 #endif 00199 #elif (uECC_MIN_WORDS == 7) 00200 FAST_MULT_ASM_7 00201 #if (uECC_MAX_WORDS > 7) 00202 FAST_MULT_ASM_7_TO_8 00203 #endif 00204 #elif (uECC_MIN_WORDS == 8) 00205 FAST_MULT_ASM_8 00206 #endif 00207 "1: \n\t" 00208 RESUME_SYNTAX 00209 : "+r" (r0), "+r" (r1), "+r" (r2) 00210 : "r" (r3) 00211 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00212 ); 00213 } 00214 #define asm_mult 1 00215 00216 #if uECC_SQUARE_FUNC 00217 uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 00218 const uECC_word_t *left, 00219 wordcount_t num_words) { 00220 register uint32_t *r0 __asm__("r0") = result; 00221 register const uint32_t *r1 __asm__("r1") = left; 00222 register uint32_t r2 __asm__("r2") = num_words; 00223 00224 __asm__ volatile ( 00225 ".syntax unified \n\t" 00226 #if (uECC_MIN_WORDS == 5) 00227 FAST_SQUARE_ASM_5 00228 #if (uECC_MAX_WORDS > 5) 00229 FAST_SQUARE_ASM_5_TO_6 00230 #endif 00231 #if (uECC_MAX_WORDS > 6) 00232 FAST_SQUARE_ASM_6_TO_7 00233 #endif 00234 #if (uECC_MAX_WORDS > 7) 00235 FAST_SQUARE_ASM_7_TO_8 00236 #endif 00237 #elif (uECC_MIN_WORDS == 6) 00238 FAST_SQUARE_ASM_6 00239 #if (uECC_MAX_WORDS > 6) 00240 FAST_SQUARE_ASM_6_TO_7 00241 #endif 00242 #if (uECC_MAX_WORDS > 7) 00243 FAST_SQUARE_ASM_7_TO_8 00244 #endif 00245 #elif (uECC_MIN_WORDS == 7) 00246 FAST_SQUARE_ASM_7 00247 #if (uECC_MAX_WORDS > 7) 00248 FAST_SQUARE_ASM_7_TO_8 00249 #endif 00250 #elif (uECC_MIN_WORDS == 8) 00251 FAST_SQUARE_ASM_8 00252 #endif 00253 00254 "1: \n\t" 00255 RESUME_SYNTAX 00256 : "+r" (r0), "+r" (r1) 00257 : "r" (r2) 00258 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00259 ); 00260 } 00261 #define asm_square 1 00262 #endif /* uECC_SQUARE_FUNC */ 00263 00264 #else /* (uECC_OPTIMIZATION_LEVEL > 3) */ 00265 00266 uECC_VLI_API void uECC_vli_mult(uint32_t *result, 00267 const uint32_t *left, 00268 const uint32_t *right, 00269 wordcount_t num_words) { 00270 register uint32_t *r0 __asm__("r0") = result; 00271 register const uint32_t *r1 __asm__("r1") = left; 00272 register const uint32_t *r2 __asm__("r2") = right; 00273 register uint32_t r3 __asm__("r3") = num_words; 00274 00275 #if uECC_SUPPORTS_secp160r1 00276 if (num_words == 5) { 00277 __asm__ volatile ( 00278 ".syntax unified \n\t" 00279 FAST_MULT_ASM_5 00280 RESUME_SYNTAX 00281 : "+r" (r0), "+r" (r1), "+r" (r2) 00282 : "r" (r3) 00283 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00284 ); 00285 return; 00286 } 00287 #endif 00288 #if uECC_SUPPORTS_secp192r1 00289 if (num_words == 6) { 00290 __asm__ volatile ( 00291 ".syntax unified \n\t" 00292 FAST_MULT_ASM_6 00293 RESUME_SYNTAX 00294 : "+r" (r0), "+r" (r1), "+r" (r2) 00295 : "r" (r3) 00296 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00297 ); 00298 return; 00299 } 00300 #endif 00301 #if uECC_SUPPORTS_secp224r1 00302 if (num_words == 7) { 00303 __asm__ volatile ( 00304 ".syntax unified \n\t" 00305 FAST_MULT_ASM_7 00306 RESUME_SYNTAX 00307 : "+r" (r0), "+r" (r1), "+r" (r2) 00308 : "r" (r3) 00309 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00310 ); 00311 return; 00312 } 00313 #endif 00314 #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) 00315 if (num_words == 8) { 00316 __asm__ volatile ( 00317 ".syntax unified \n\t" 00318 FAST_MULT_ASM_8 00319 RESUME_SYNTAX 00320 : "+r" (r0), "+r" (r1), "+r" (r2) 00321 : "r" (r3) 00322 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00323 ); 00324 return; 00325 } 00326 #endif 00327 } 00328 #define asm_mult 1 00329 00330 #if uECC_SQUARE_FUNC 00331 uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 00332 const uECC_word_t *left, 00333 wordcount_t num_words) { 00334 register uint32_t *r0 __asm__("r0") = result; 00335 register const uint32_t *r1 __asm__("r1") = left; 00336 register uint32_t r2 __asm__("r2") = num_words; 00337 00338 #if uECC_SUPPORTS_secp160r1 00339 if (num_words == 5) { 00340 __asm__ volatile ( 00341 ".syntax unified \n\t" 00342 FAST_SQUARE_ASM_5 00343 RESUME_SYNTAX 00344 : "+r" (r0), "+r" (r1) 00345 : "r" (r2) 00346 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00347 ); 00348 return; 00349 } 00350 #endif 00351 #if uECC_SUPPORTS_secp192r1 00352 if (num_words == 6) { 00353 __asm__ volatile ( 00354 ".syntax unified \n\t" 00355 FAST_SQUARE_ASM_6 00356 RESUME_SYNTAX 00357 : "+r" (r0), "+r" (r1) 00358 : "r" (r2) 00359 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00360 ); 00361 return; 00362 } 00363 #endif 00364 #if uECC_SUPPORTS_secp224r1 00365 if (num_words == 7) { 00366 __asm__ volatile ( 00367 ".syntax unified \n\t" 00368 FAST_SQUARE_ASM_7 00369 RESUME_SYNTAX 00370 : "+r" (r0), "+r" (r1) 00371 : "r" (r2) 00372 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00373 ); 00374 return; 00375 } 00376 #endif 00377 #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) 00378 if (num_words == 8) { 00379 __asm__ volatile ( 00380 ".syntax unified \n\t" 00381 FAST_SQUARE_ASM_8 00382 RESUME_SYNTAX 00383 : "+r" (r0), "+r" (r1) 00384 : "r" (r2) 00385 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00386 ); 00387 return; 00388 } 00389 #endif 00390 } 00391 #define asm_square 1 00392 #endif /* uECC_SQUARE_FUNC */ 00393 00394 #endif /* (uECC_OPTIMIZATION_LEVEL > 3) */ 00395 00396 #endif /* uECC_PLATFORM != uECC_arm_thumb */ 00397 00398 #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */ 00399 00400 /* ---- "Small" implementations ---- */ 00401 00402 #if !asm_add 00403 uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, 00404 const uECC_word_t *left, 00405 const uECC_word_t *right, 00406 wordcount_t num_words) { 00407 uint32_t carry = 0; 00408 uint32_t left_word; 00409 uint32_t right_word; 00410 00411 __asm__ volatile ( 00412 ".syntax unified \n\t" 00413 "1: \n\t" 00414 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 00415 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 00416 "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ 00417 "adcs %[left], %[left], %[right] \n\t" /* Add with carry. */ 00418 "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */ 00419 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 00420 "subs %[ctr], #1 \n\t" /* Decrement counter. */ 00421 "bne 1b \n\t" /* Loop until counter == 0. */ 00422 RESUME_SYNTAX 00423 : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right), 00424 [ctr] REG_RW (num_words), [carry] REG_RW (carry), 00425 [left] REG_WRITE (left_word), [right] REG_WRITE (right_word) 00426 : 00427 : "cc", "memory" 00428 ); 00429 return carry; 00430 } 00431 #define asm_add 1 00432 #endif 00433 00434 #if !asm_sub 00435 uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, 00436 const uECC_word_t *left, 00437 const uECC_word_t *right, 00438 wordcount_t num_words) { 00439 uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */ 00440 uint32_t left_word; 00441 uint32_t right_word; 00442 00443 __asm__ volatile ( 00444 ".syntax unified \n\t" 00445 "1: \n\t" 00446 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 00447 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 00448 "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ 00449 "sbcs %[left], %[left], %[right] \n\t" /* Subtract with borrow. */ 00450 "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */ 00451 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 00452 "subs %[ctr], #1 \n\t" /* Decrement counter. */ 00453 "bne 1b \n\t" /* Loop until counter == 0. */ 00454 RESUME_SYNTAX 00455 : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right), 00456 [ctr] REG_RW (num_words), [carry] REG_RW (carry), 00457 [left] REG_WRITE (left_word), [right] REG_WRITE (right_word) 00458 : 00459 : "cc", "memory" 00460 ); 00461 return !carry; 00462 } 00463 #define asm_sub 1 00464 #endif 00465 00466 #if !asm_mult 00467 uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, 00468 const uECC_word_t *left, 00469 const uECC_word_t *right, 00470 wordcount_t num_words) { 00471 #if (uECC_PLATFORM != uECC_arm_thumb) 00472 uint32_t c0 = 0; 00473 uint32_t c1 = 0; 00474 uint32_t c2 = 0; 00475 uint32_t k = 0; 00476 uint32_t i; 00477 uint32_t t0, t1; 00478 00479 __asm__ volatile ( 00480 ".syntax unified \n\t" 00481 00482 "1: \n\t" /* outer loop (k < num_words) */ 00483 "movs %[i], #0 \n\t" /* i = 0 */ 00484 "b 3f \n\t" 00485 00486 "2: \n\t" /* outer loop (k >= num_words) */ 00487 "movs %[i], %[k] \n\t" /* i = k */ 00488 "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */ 00489 00490 "3: \n\t" /* inner loop */ 00491 "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */ 00492 00493 "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */ 00494 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ 00495 00496 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ 00497 00498 "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ 00499 "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 00500 "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ 00501 00502 "adds %[i], #4 \n\t" /* i += 4 */ 00503 "cmp %[i], %[last_word] \n\t" /* i > (num_words - 1) (times 4)? */ 00504 "bgt 4f \n\t" /* if so, exit the loop */ 00505 "cmp %[i], %[k] \n\t" /* i <= k? */ 00506 "ble 3b \n\t" /* if so, continue looping */ 00507 00508 "4: \n\t" /* end inner loop */ 00509 00510 "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ 00511 "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 00512 "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 00513 "movs %[c2], #0 \n\t" /* c2 = 0 */ 00514 "adds %[k], #4 \n\t" /* k += 4 */ 00515 "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */ 00516 "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 00517 "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 00518 "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */ 00519 /* end outer loop */ 00520 00521 "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */ 00522 RESUME_SYNTAX 00523 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), 00524 [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1) 00525 : [result] "r" (result), [left] "r" (left), [right] "r" (right), 00526 [last_word] "r" ((num_words - 1) * 4) 00527 : "cc", "memory" 00528 ); 00529 00530 #else /* Thumb-1 */ 00531 uint32_t r4, r5, r6, r7; 00532 00533 __asm__ volatile ( 00534 ".syntax unified \n\t" 00535 "subs %[r3], #1 \n\t" /* r3 = num_words - 1 */ 00536 "lsls %[r3], #2 \n\t" /* r3 = (num_words - 1) * 4 */ 00537 "mov r8, %[r3] \n\t" /* r8 = (num_words - 1) * 4 */ 00538 "lsls %[r3], #1 \n\t" /* r3 = (num_words - 1) * 8 */ 00539 "mov r9, %[r3] \n\t" /* r9 = (num_words - 1) * 8 */ 00540 "movs %[r3], #0 \n\t" /* c0 = 0 */ 00541 "movs %[r4], #0 \n\t" /* c1 = 0 */ 00542 "movs %[r5], #0 \n\t" /* c2 = 0 */ 00543 "movs %[r6], #0 \n\t" /* k = 0 */ 00544 00545 "push {%[r0]} \n\t" /* keep result on the stack */ 00546 00547 "1: \n\t" /* outer loop (k < num_words) */ 00548 "movs %[r7], #0 \n\t" /* r7 = i = 0 */ 00549 "b 3f \n\t" 00550 00551 "2: \n\t" /* outer loop (k >= num_words) */ 00552 "movs %[r7], %[r6] \n\t" /* r7 = k */ 00553 "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */ 00554 "subs %[r7], %[r0] \n\t" /* r7 = i = k - (num_words - 1) (times 4) */ 00555 00556 "3: \n\t" /* inner loop */ 00557 "mov r10, %[r3] \n\t" 00558 "mov r11, %[r4] \n\t" 00559 "mov r12, %[r5] \n\t" 00560 "mov r14, %[r6] \n\t" 00561 "subs %[r0], %[r6], %[r7] \n\t" /* r0 = k - i */ 00562 00563 "ldr %[r4], [%[r2], %[r0]] \n\t" /* r4 = right[k - i] */ 00564 "ldr %[r0], [%[r1], %[r7]] \n\t" /* r0 = left[i] */ 00565 00566 "lsrs %[r3], %[r0], #16 \n\t" /* r3 = a1 */ 00567 "uxth %[r0], %[r0] \n\t" /* r0 = a0 */ 00568 00569 "lsrs %[r5], %[r4], #16 \n\t" /* r5 = b1 */ 00570 "uxth %[r4], %[r4] \n\t" /* r4 = b0 */ 00571 00572 "movs %[r6], %[r3] \n\t" /* r6 = a1 */ 00573 "muls %[r6], %[r5], %[r6] \n\t" /* r6 = a1 * b1 */ 00574 "muls %[r3], %[r4], %[r3] \n\t" /* r3 = b0 * a1 */ 00575 "muls %[r5], %[r0], %[r5] \n\t" /* r5 = a0 * b1 */ 00576 "muls %[r0], %[r4], %[r0] \n\t" /* r0 = a0 * b0 */ 00577 00578 /* Add middle terms */ 00579 "lsls %[r4], %[r3], #16 \n\t" 00580 "lsrs %[r3], %[r3], #16 \n\t" 00581 "adds %[r0], %[r4] \n\t" 00582 "adcs %[r6], %[r3] \n\t" 00583 00584 "lsls %[r4], %[r5], #16 \n\t" 00585 "lsrs %[r5], %[r5], #16 \n\t" 00586 "adds %[r0], %[r4] \n\t" 00587 "adcs %[r6], %[r5] \n\t" 00588 00589 "mov %[r3], r10\n\t" 00590 "mov %[r4], r11\n\t" 00591 "mov %[r5], r12\n\t" 00592 "adds %[r3], %[r0] \n\t" /* add low word to c0 */ 00593 "adcs %[r4], %[r6] \n\t" /* add high word to c1, including carry */ 00594 "movs %[r0], #0 \n\t" /* r0 = 0 (does not affect carry bit) */ 00595 "adcs %[r5], %[r0] \n\t" /* add carry to c2 */ 00596 00597 "mov %[r6], r14\n\t" /* r6 = k */ 00598 00599 "adds %[r7], #4 \n\t" /* i += 4 */ 00600 "cmp %[r7], r8 \n\t" /* i > (num_words - 1) (times 4)? */ 00601 "bgt 4f \n\t" /* if so, exit the loop */ 00602 "cmp %[r7], %[r6] \n\t" /* i <= k? */ 00603 "ble 3b \n\t" /* if so, continue looping */ 00604 00605 "4: \n\t" /* end inner loop */ 00606 00607 "ldr %[r0], [sp, #0] \n\t" /* r0 = result */ 00608 00609 "str %[r3], [%[r0], %[r6]] \n\t" /* result[k] = c0 */ 00610 "mov %[r3], %[r4] \n\t" /* c0 = c1 */ 00611 "mov %[r4], %[r5] \n\t" /* c1 = c2 */ 00612 "movs %[r5], #0 \n\t" /* c2 = 0 */ 00613 "adds %[r6], #4 \n\t" /* k += 4 */ 00614 "cmp %[r6], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */ 00615 "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 00616 "cmp %[r6], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 00617 "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */ 00618 /* end outer loop */ 00619 00620 "str %[r3], [%[r0], %[r6]] \n\t" /* result[num_words * 2 - 1] = c0 */ 00621 "pop {%[r0]} \n\t" /* pop result off the stack */ 00622 00623 ".syntax divided \n\t" 00624 : [r3] "+l" (num_words), [r4] "=&l" (r4), 00625 [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7) 00626 : [r0] "l" (result), [r1] "l" (left), [r2] "l" (right) 00627 : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00628 ); 00629 #endif 00630 } 00631 #define asm_mult 1 00632 #endif 00633 00634 #if uECC_SQUARE_FUNC 00635 #if !asm_square 00636 uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 00637 const uECC_word_t *left, 00638 wordcount_t num_words) { 00639 #if (uECC_PLATFORM != uECC_arm_thumb) 00640 uint32_t c0 = 0; 00641 uint32_t c1 = 0; 00642 uint32_t c2 = 0; 00643 uint32_t k = 0; 00644 uint32_t i, tt; 00645 uint32_t t0, t1; 00646 00647 __asm__ volatile ( 00648 ".syntax unified \n\t" 00649 00650 "1: \n\t" /* outer loop (k < num_words) */ 00651 "movs %[i], #0 \n\t" /* i = 0 */ 00652 "b 3f \n\t" 00653 00654 "2: \n\t" /* outer loop (k >= num_words) */ 00655 "movs %[i], %[k] \n\t" /* i = k */ 00656 "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */ 00657 00658 "3: \n\t" /* inner loop */ 00659 "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */ 00660 00661 "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */ 00662 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ 00663 00664 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ 00665 00666 "cmp %[i], %[tt] \n\t" /* (i < k - i) ? */ 00667 "bge 4f \n\t" /* if i >= k - i, skip */ 00668 "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ 00669 "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 00670 "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ 00671 00672 "4: \n\t" 00673 "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ 00674 "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 00675 "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ 00676 00677 "adds %[i], #4 \n\t" /* i += 4 */ 00678 "cmp %[i], %[k] \n\t" /* i >= k? */ 00679 "bge 5f \n\t" /* if so, exit the loop */ 00680 "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */ 00681 "cmp %[i], %[tt] \n\t" /* i <= k - i? */ 00682 "ble 3b \n\t" /* if so, continue looping */ 00683 00684 "5: \n\t" /* end inner loop */ 00685 00686 "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ 00687 "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 00688 "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 00689 "movs %[c2], #0 \n\t" /* c2 = 0 */ 00690 "adds %[k], #4 \n\t" /* k += 4 */ 00691 "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */ 00692 "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 00693 "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 00694 "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */ 00695 /* end outer loop */ 00696 00697 "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */ 00698 RESUME_SYNTAX 00699 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), 00700 [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1) 00701 : [result] "r" (result), [left] "r" (left), [last_word] "r" ((num_words - 1) * 4) 00702 : "cc", "memory" 00703 ); 00704 00705 #else 00706 uint32_t r3, r4, r5, r6, r7; 00707 00708 __asm__ volatile ( 00709 ".syntax unified \n\t" 00710 "subs %[r2], #1 \n\t" /* r2 = num_words - 1 */ 00711 "lsls %[r2], #2 \n\t" /* r2 = (num_words - 1) * 4 */ 00712 "mov r8, %[r2] \n\t" /* r8 = (num_words - 1) * 4 */ 00713 "lsls %[r2], #1 \n\t" /* r2 = (num_words - 1) * 8 */ 00714 "mov r9, %[r2] \n\t" /* r9 = (num_words - 1) * 8 */ 00715 "movs %[r2], #0 \n\t" /* c0 = 0 */ 00716 "movs %[r3], #0 \n\t" /* c1 = 0 */ 00717 "movs %[r4], #0 \n\t" /* c2 = 0 */ 00718 "movs %[r5], #0 \n\t" /* k = 0 */ 00719 00720 "push {%[r0]} \n\t" /* keep result on the stack */ 00721 00722 "1: \n\t" /* outer loop (k < num_words) */ 00723 "movs %[r6], #0 \n\t" /* r6 = i = 0 */ 00724 "b 3f \n\t" 00725 00726 "2: \n\t" /* outer loop (k >= num_words) */ 00727 "movs %[r6], %[r5] \n\t" /* r6 = k */ 00728 "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */ 00729 "subs %[r6], %[r0] \n\t" /* r6 = i = k - (num_words - 1) (times 4) */ 00730 00731 "3: \n\t" /* inner loop */ 00732 "mov r10, %[r2] \n\t" 00733 "mov r11, %[r3] \n\t" 00734 "mov r12, %[r4] \n\t" 00735 "mov r14, %[r5] \n\t" 00736 "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */ 00737 00738 "ldr %[r3], [%[r1], %[r7]] \n\t" /* r3 = left[k - i] */ 00739 "ldr %[r0], [%[r1], %[r6]] \n\t" /* r0 = left[i] */ 00740 00741 "lsrs %[r2], %[r0], #16 \n\t" /* r2 = a1 */ 00742 "uxth %[r0], %[r0] \n\t" /* r0 = a0 */ 00743 00744 "lsrs %[r4], %[r3], #16 \n\t" /* r4 = b1 */ 00745 "uxth %[r3], %[r3] \n\t" /* r3 = b0 */ 00746 00747 "movs %[r5], %[r2] \n\t" /* r5 = a1 */ 00748 "muls %[r5], %[r4], %[r5] \n\t" /* r5 = a1 * b1 */ 00749 "muls %[r2], %[r3], %[r2] \n\t" /* r2 = b0 * a1 */ 00750 "muls %[r4], %[r0], %[r4] \n\t" /* r4 = a0 * b1 */ 00751 "muls %[r0], %[r3], %[r0] \n\t" /* r0 = a0 * b0 */ 00752 00753 /* Add middle terms */ 00754 "lsls %[r3], %[r2], #16 \n\t" 00755 "lsrs %[r2], %[r2], #16 \n\t" 00756 "adds %[r0], %[r3] \n\t" 00757 "adcs %[r5], %[r2] \n\t" 00758 00759 "lsls %[r3], %[r4], #16 \n\t" 00760 "lsrs %[r4], %[r4], #16 \n\t" 00761 "adds %[r0], %[r3] \n\t" 00762 "adcs %[r5], %[r4] \n\t" 00763 00764 /* Add to acc, doubling if necessary */ 00765 "mov %[r2], r10\n\t" 00766 "mov %[r3], r11\n\t" 00767 "mov %[r4], r12\n\t" 00768 00769 "cmp %[r6], %[r7] \n\t" /* (i < k - i) ? */ 00770 "bge 4f \n\t" /* if i >= k - i, skip */ 00771 "movs %[r7], #0 \n\t" /* r7 = 0 */ 00772 "adds %[r2], %[r0] \n\t" /* add low word to c0 */ 00773 "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */ 00774 "adcs %[r4], %[r7] \n\t" /* add carry to c2 */ 00775 "4: \n\t" 00776 "movs %[r7], #0 \n\t" /* r7 = 0 */ 00777 "adds %[r2], %[r0] \n\t" /* add low word to c0 */ 00778 "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */ 00779 "adcs %[r4], %[r7] \n\t" /* add carry to c2 */ 00780 00781 "mov %[r5], r14\n\t" /* r5 = k */ 00782 00783 "adds %[r6], #4 \n\t" /* i += 4 */ 00784 "cmp %[r6], %[r5] \n\t" /* i >= k? */ 00785 "bge 5f \n\t" /* if so, exit the loop */ 00786 "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */ 00787 "cmp %[r6], %[r7] \n\t" /* i <= k - i? */ 00788 "ble 3b \n\t" /* if so, continue looping */ 00789 00790 "5: \n\t" /* end inner loop */ 00791 00792 "ldr %[r0], [sp, #0] \n\t" /* r0 = result */ 00793 00794 "str %[r2], [%[r0], %[r5]] \n\t" /* result[k] = c0 */ 00795 "mov %[r2], %[r3] \n\t" /* c0 = c1 */ 00796 "mov %[r3], %[r4] \n\t" /* c1 = c2 */ 00797 "movs %[r4], #0 \n\t" /* c2 = 0 */ 00798 "adds %[r5], #4 \n\t" /* k += 4 */ 00799 "cmp %[r5], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */ 00800 "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 00801 "cmp %[r5], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 00802 "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */ 00803 /* end outer loop */ 00804 00805 "str %[r2], [%[r0], %[r5]] \n\t" /* result[num_words * 2 - 1] = c0 */ 00806 "pop {%[r0]} \n\t" /* pop result off the stack */ 00807 00808 ".syntax divided \n\t" 00809 : [r2] "+l" (num_words), [r3] "=&l" (r3), [r4] "=&l" (r4), 00810 [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7) 00811 : [r0] "l" (result), [r1] "l" (left) 00812 : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 00813 ); 00814 #endif 00815 } 00816 #define asm_square 1 00817 #endif 00818 #endif /* uECC_SQUARE_FUNC */ 00819 00820 #endif /* _UECC_ASM_ARM_H_ */
Generated on Wed Jul 13 2022 03:48:20 by
1.7.2