micro-ECC for mbed, ported from GCC version from Github,

Dependents:   mbed_microECC Wallet_v1

Committer:
allankliu
Date:
Thu Sep 07 12:10:11 2017 +0000
Revision:
0:b6fdeddc0bc9
Init version, ported from GCC version of uECC of Github. Assembly optimization for thumb2 is disabled.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
allankliu 0:b6fdeddc0bc9 1 /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
allankliu 0:b6fdeddc0bc9 2
allankliu 0:b6fdeddc0bc9 3 #ifndef _UECC_ASM_ARM_H_
allankliu 0:b6fdeddc0bc9 4 #define _UECC_ASM_ARM_H_
allankliu 0:b6fdeddc0bc9 5
allankliu 0:b6fdeddc0bc9 6 #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
allankliu 0:b6fdeddc0bc9 7 #define uECC_MIN_WORDS 8
allankliu 0:b6fdeddc0bc9 8 #endif
allankliu 0:b6fdeddc0bc9 9 #if uECC_SUPPORTS_secp224r1
allankliu 0:b6fdeddc0bc9 10 #undef uECC_MIN_WORDS
allankliu 0:b6fdeddc0bc9 11 #define uECC_MIN_WORDS 7
allankliu 0:b6fdeddc0bc9 12 #endif
allankliu 0:b6fdeddc0bc9 13 #if uECC_SUPPORTS_secp192r1
allankliu 0:b6fdeddc0bc9 14 #undef uECC_MIN_WORDS
allankliu 0:b6fdeddc0bc9 15 #define uECC_MIN_WORDS 6
allankliu 0:b6fdeddc0bc9 16 #endif
allankliu 0:b6fdeddc0bc9 17 #if uECC_SUPPORTS_secp160r1
allankliu 0:b6fdeddc0bc9 18 #undef uECC_MIN_WORDS
allankliu 0:b6fdeddc0bc9 19 #define uECC_MIN_WORDS 5
allankliu 0:b6fdeddc0bc9 20 #endif
allankliu 0:b6fdeddc0bc9 21
allankliu 0:b6fdeddc0bc9 22 #if (uECC_PLATFORM == uECC_arm_thumb)
allankliu 0:b6fdeddc0bc9 23 #define REG_RW "+l"
allankliu 0:b6fdeddc0bc9 24 #define REG_WRITE "=l"
allankliu 0:b6fdeddc0bc9 25 #else
allankliu 0:b6fdeddc0bc9 26 #define REG_RW "+r"
allankliu 0:b6fdeddc0bc9 27 #define REG_WRITE "=r"
allankliu 0:b6fdeddc0bc9 28 #endif
allankliu 0:b6fdeddc0bc9 29
allankliu 0:b6fdeddc0bc9 30 #if (uECC_PLATFORM == uECC_arm_thumb || uECC_PLATFORM == uECC_arm_thumb2)
allankliu 0:b6fdeddc0bc9 31 #define REG_RW_LO "+l"
allankliu 0:b6fdeddc0bc9 32 #define REG_WRITE_LO "=l"
allankliu 0:b6fdeddc0bc9 33 #else
allankliu 0:b6fdeddc0bc9 34 #define REG_RW_LO "+r"
allankliu 0:b6fdeddc0bc9 35 #define REG_WRITE_LO "=r"
allankliu 0:b6fdeddc0bc9 36 #endif
allankliu 0:b6fdeddc0bc9 37
allankliu 0:b6fdeddc0bc9 38 #if (uECC_PLATFORM == uECC_arm_thumb2)
allankliu 0:b6fdeddc0bc9 39 #define RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 40 #else
allankliu 0:b6fdeddc0bc9 41 #define RESUME_SYNTAX ".syntax divided \n\t"
allankliu 0:b6fdeddc0bc9 42 #endif
allankliu 0:b6fdeddc0bc9 43
allankliu 0:b6fdeddc0bc9 44 #if (uECC_OPTIMIZATION_LEVEL >= 2)
allankliu 0:b6fdeddc0bc9 45
allankliu 0:b6fdeddc0bc9 46 uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 47 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 48 const uECC_word_t *right,
allankliu 0:b6fdeddc0bc9 49 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 50 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 51 #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
allankliu 0:b6fdeddc0bc9 52 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
allankliu 0:b6fdeddc0bc9 53 #else /* ARM */
allankliu 0:b6fdeddc0bc9 54 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
allankliu 0:b6fdeddc0bc9 55 #endif
allankliu 0:b6fdeddc0bc9 56 #endif
allankliu 0:b6fdeddc0bc9 57 uint32_t carry;
allankliu 0:b6fdeddc0bc9 58 uint32_t left_word;
allankliu 0:b6fdeddc0bc9 59 uint32_t right_word;
allankliu 0:b6fdeddc0bc9 60
allankliu 0:b6fdeddc0bc9 61 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 62 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 63 "movs %[carry], #0 \n\t"
allankliu 0:b6fdeddc0bc9 64 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 65 "adr %[left], 1f \n\t"
allankliu 0:b6fdeddc0bc9 66 ".align 4 \n\t"
allankliu 0:b6fdeddc0bc9 67 "adds %[jump], %[left] \n\t"
allankliu 0:b6fdeddc0bc9 68 #endif
allankliu 0:b6fdeddc0bc9 69
allankliu 0:b6fdeddc0bc9 70 "ldmia %[lptr]!, {%[left]} \n\t"
allankliu 0:b6fdeddc0bc9 71 "ldmia %[rptr]!, {%[right]} \n\t"
allankliu 0:b6fdeddc0bc9 72 "adds %[left], %[right] \n\t"
allankliu 0:b6fdeddc0bc9 73 "stmia %[dptr]!, {%[left]} \n\t"
allankliu 0:b6fdeddc0bc9 74
allankliu 0:b6fdeddc0bc9 75 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 76 "bx %[jump] \n\t"
allankliu 0:b6fdeddc0bc9 77 #endif
allankliu 0:b6fdeddc0bc9 78 "1: \n\t"
allankliu 0:b6fdeddc0bc9 79 REPEAT(DEC(uECC_MAX_WORDS),
allankliu 0:b6fdeddc0bc9 80 "ldmia %[lptr]!, {%[left]} \n\t"
allankliu 0:b6fdeddc0bc9 81 "ldmia %[rptr]!, {%[right]} \n\t"
allankliu 0:b6fdeddc0bc9 82 "adcs %[left], %[right] \n\t"
allankliu 0:b6fdeddc0bc9 83 "stmia %[dptr]!, {%[left]} \n\t")
allankliu 0:b6fdeddc0bc9 84
allankliu 0:b6fdeddc0bc9 85 "adcs %[carry], %[carry] \n\t"
allankliu 0:b6fdeddc0bc9 86 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 87 : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
allankliu 0:b6fdeddc0bc9 88 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 89 [jump] REG_RW_LO (jump),
allankliu 0:b6fdeddc0bc9 90 #endif
allankliu 0:b6fdeddc0bc9 91 [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
allankliu 0:b6fdeddc0bc9 92 [right] REG_WRITE_LO (right_word)
allankliu 0:b6fdeddc0bc9 93 :
allankliu 0:b6fdeddc0bc9 94 : "cc", "memory"
allankliu 0:b6fdeddc0bc9 95 );
allankliu 0:b6fdeddc0bc9 96 return carry;
allankliu 0:b6fdeddc0bc9 97 }
allankliu 0:b6fdeddc0bc9 98 #define asm_add 1
allankliu 0:b6fdeddc0bc9 99
allankliu 0:b6fdeddc0bc9 100 uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 101 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 102 const uECC_word_t *right,
allankliu 0:b6fdeddc0bc9 103 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 104 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 105 #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
allankliu 0:b6fdeddc0bc9 106 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
allankliu 0:b6fdeddc0bc9 107 #else /* ARM */
allankliu 0:b6fdeddc0bc9 108 uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
allankliu 0:b6fdeddc0bc9 109 #endif
allankliu 0:b6fdeddc0bc9 110 #endif
allankliu 0:b6fdeddc0bc9 111 uint32_t carry;
allankliu 0:b6fdeddc0bc9 112 uint32_t left_word;
allankliu 0:b6fdeddc0bc9 113 uint32_t right_word;
allankliu 0:b6fdeddc0bc9 114
allankliu 0:b6fdeddc0bc9 115 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 116 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 117 "movs %[carry], #0 \n\t"
allankliu 0:b6fdeddc0bc9 118 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 119 "adr %[left], 1f \n\t"
allankliu 0:b6fdeddc0bc9 120 ".align 4 \n\t"
allankliu 0:b6fdeddc0bc9 121 "adds %[jump], %[left] \n\t"
allankliu 0:b6fdeddc0bc9 122 #endif
allankliu 0:b6fdeddc0bc9 123
allankliu 0:b6fdeddc0bc9 124 "ldmia %[lptr]!, {%[left]} \n\t"
allankliu 0:b6fdeddc0bc9 125 "ldmia %[rptr]!, {%[right]} \n\t"
allankliu 0:b6fdeddc0bc9 126 "subs %[left], %[right] \n\t"
allankliu 0:b6fdeddc0bc9 127 "stmia %[dptr]!, {%[left]} \n\t"
allankliu 0:b6fdeddc0bc9 128
allankliu 0:b6fdeddc0bc9 129 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 130 "bx %[jump] \n\t"
allankliu 0:b6fdeddc0bc9 131 #endif
allankliu 0:b6fdeddc0bc9 132 "1: \n\t"
allankliu 0:b6fdeddc0bc9 133 REPEAT(DEC(uECC_MAX_WORDS),
allankliu 0:b6fdeddc0bc9 134 "ldmia %[lptr]!, {%[left]} \n\t"
allankliu 0:b6fdeddc0bc9 135 "ldmia %[rptr]!, {%[right]} \n\t"
allankliu 0:b6fdeddc0bc9 136 "sbcs %[left], %[right] \n\t"
allankliu 0:b6fdeddc0bc9 137 "stmia %[dptr]!, {%[left]} \n\t")
allankliu 0:b6fdeddc0bc9 138
allankliu 0:b6fdeddc0bc9 139 "adcs %[carry], %[carry] \n\t"
allankliu 0:b6fdeddc0bc9 140 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 141 : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
allankliu 0:b6fdeddc0bc9 142 #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
allankliu 0:b6fdeddc0bc9 143 [jump] REG_RW_LO (jump),
allankliu 0:b6fdeddc0bc9 144 #endif
allankliu 0:b6fdeddc0bc9 145 [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
allankliu 0:b6fdeddc0bc9 146 [right] REG_WRITE_LO (right_word)
allankliu 0:b6fdeddc0bc9 147 :
allankliu 0:b6fdeddc0bc9 148 : "cc", "memory"
allankliu 0:b6fdeddc0bc9 149 );
allankliu 0:b6fdeddc0bc9 150 return !carry; /* Note that on ARM, carry flag set means "no borrow" when subtracting
allankliu 0:b6fdeddc0bc9 151 (for some reason...) */
allankliu 0:b6fdeddc0bc9 152 }
allankliu 0:b6fdeddc0bc9 153 #define asm_sub 1
allankliu 0:b6fdeddc0bc9 154
allankliu 0:b6fdeddc0bc9 155 #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */
allankliu 0:b6fdeddc0bc9 156
allankliu 0:b6fdeddc0bc9 157 #if (uECC_OPTIMIZATION_LEVEL >= 3)
allankliu 0:b6fdeddc0bc9 158
allankliu 0:b6fdeddc0bc9 159 #if (uECC_PLATFORM != uECC_arm_thumb)
allankliu 0:b6fdeddc0bc9 160
allankliu 0:b6fdeddc0bc9 161 #if uECC_ARM_USE_UMAAL
allankliu 0:b6fdeddc0bc9 162 #include "asm_arm_mult_square_umaal.inc"
allankliu 0:b6fdeddc0bc9 163 #else
allankliu 0:b6fdeddc0bc9 164 #include "asm_arm_mult_square.inc"
allankliu 0:b6fdeddc0bc9 165 #endif
allankliu 0:b6fdeddc0bc9 166
allankliu 0:b6fdeddc0bc9 167 #if (uECC_OPTIMIZATION_LEVEL == 3)
allankliu 0:b6fdeddc0bc9 168
allankliu 0:b6fdeddc0bc9 169 uECC_VLI_API void uECC_vli_mult(uint32_t *result,
allankliu 0:b6fdeddc0bc9 170 const uint32_t *left,
allankliu 0:b6fdeddc0bc9 171 const uint32_t *right,
allankliu 0:b6fdeddc0bc9 172 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 173 register uint32_t *r0 __asm__("r0") = result;
allankliu 0:b6fdeddc0bc9 174 register const uint32_t *r1 __asm__("r1") = left;
allankliu 0:b6fdeddc0bc9 175 register const uint32_t *r2 __asm__("r2") = right;
allankliu 0:b6fdeddc0bc9 176 register uint32_t r3 __asm__("r3") = num_words;
allankliu 0:b6fdeddc0bc9 177
allankliu 0:b6fdeddc0bc9 178 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 179 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 180 #if (uECC_MIN_WORDS == 5)
allankliu 0:b6fdeddc0bc9 181 FAST_MULT_ASM_5
allankliu 0:b6fdeddc0bc9 182 #if (uECC_MAX_WORDS > 5)
allankliu 0:b6fdeddc0bc9 183 FAST_MULT_ASM_5_TO_6
allankliu 0:b6fdeddc0bc9 184 #endif
allankliu 0:b6fdeddc0bc9 185 #if (uECC_MAX_WORDS > 6)
allankliu 0:b6fdeddc0bc9 186 FAST_MULT_ASM_6_TO_7
allankliu 0:b6fdeddc0bc9 187 #endif
allankliu 0:b6fdeddc0bc9 188 #if (uECC_MAX_WORDS > 7)
allankliu 0:b6fdeddc0bc9 189 FAST_MULT_ASM_7_TO_8
allankliu 0:b6fdeddc0bc9 190 #endif
allankliu 0:b6fdeddc0bc9 191 #elif (uECC_MIN_WORDS == 6)
allankliu 0:b6fdeddc0bc9 192 FAST_MULT_ASM_6
allankliu 0:b6fdeddc0bc9 193 #if (uECC_MAX_WORDS > 6)
allankliu 0:b6fdeddc0bc9 194 FAST_MULT_ASM_6_TO_7
allankliu 0:b6fdeddc0bc9 195 #endif
allankliu 0:b6fdeddc0bc9 196 #if (uECC_MAX_WORDS > 7)
allankliu 0:b6fdeddc0bc9 197 FAST_MULT_ASM_7_TO_8
allankliu 0:b6fdeddc0bc9 198 #endif
allankliu 0:b6fdeddc0bc9 199 #elif (uECC_MIN_WORDS == 7)
allankliu 0:b6fdeddc0bc9 200 FAST_MULT_ASM_7
allankliu 0:b6fdeddc0bc9 201 #if (uECC_MAX_WORDS > 7)
allankliu 0:b6fdeddc0bc9 202 FAST_MULT_ASM_7_TO_8
allankliu 0:b6fdeddc0bc9 203 #endif
allankliu 0:b6fdeddc0bc9 204 #elif (uECC_MIN_WORDS == 8)
allankliu 0:b6fdeddc0bc9 205 FAST_MULT_ASM_8
allankliu 0:b6fdeddc0bc9 206 #endif
allankliu 0:b6fdeddc0bc9 207 "1: \n\t"
allankliu 0:b6fdeddc0bc9 208 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 209 : "+r" (r0), "+r" (r1), "+r" (r2)
allankliu 0:b6fdeddc0bc9 210 : "r" (r3)
allankliu 0:b6fdeddc0bc9 211 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 212 );
allankliu 0:b6fdeddc0bc9 213 }
allankliu 0:b6fdeddc0bc9 214 #define asm_mult 1
allankliu 0:b6fdeddc0bc9 215
allankliu 0:b6fdeddc0bc9 216 #if uECC_SQUARE_FUNC
allankliu 0:b6fdeddc0bc9 217 uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 218 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 219 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 220 register uint32_t *r0 __asm__("r0") = result;
allankliu 0:b6fdeddc0bc9 221 register const uint32_t *r1 __asm__("r1") = left;
allankliu 0:b6fdeddc0bc9 222 register uint32_t r2 __asm__("r2") = num_words;
allankliu 0:b6fdeddc0bc9 223
allankliu 0:b6fdeddc0bc9 224 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 225 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 226 #if (uECC_MIN_WORDS == 5)
allankliu 0:b6fdeddc0bc9 227 FAST_SQUARE_ASM_5
allankliu 0:b6fdeddc0bc9 228 #if (uECC_MAX_WORDS > 5)
allankliu 0:b6fdeddc0bc9 229 FAST_SQUARE_ASM_5_TO_6
allankliu 0:b6fdeddc0bc9 230 #endif
allankliu 0:b6fdeddc0bc9 231 #if (uECC_MAX_WORDS > 6)
allankliu 0:b6fdeddc0bc9 232 FAST_SQUARE_ASM_6_TO_7
allankliu 0:b6fdeddc0bc9 233 #endif
allankliu 0:b6fdeddc0bc9 234 #if (uECC_MAX_WORDS > 7)
allankliu 0:b6fdeddc0bc9 235 FAST_SQUARE_ASM_7_TO_8
allankliu 0:b6fdeddc0bc9 236 #endif
allankliu 0:b6fdeddc0bc9 237 #elif (uECC_MIN_WORDS == 6)
allankliu 0:b6fdeddc0bc9 238 FAST_SQUARE_ASM_6
allankliu 0:b6fdeddc0bc9 239 #if (uECC_MAX_WORDS > 6)
allankliu 0:b6fdeddc0bc9 240 FAST_SQUARE_ASM_6_TO_7
allankliu 0:b6fdeddc0bc9 241 #endif
allankliu 0:b6fdeddc0bc9 242 #if (uECC_MAX_WORDS > 7)
allankliu 0:b6fdeddc0bc9 243 FAST_SQUARE_ASM_7_TO_8
allankliu 0:b6fdeddc0bc9 244 #endif
allankliu 0:b6fdeddc0bc9 245 #elif (uECC_MIN_WORDS == 7)
allankliu 0:b6fdeddc0bc9 246 FAST_SQUARE_ASM_7
allankliu 0:b6fdeddc0bc9 247 #if (uECC_MAX_WORDS > 7)
allankliu 0:b6fdeddc0bc9 248 FAST_SQUARE_ASM_7_TO_8
allankliu 0:b6fdeddc0bc9 249 #endif
allankliu 0:b6fdeddc0bc9 250 #elif (uECC_MIN_WORDS == 8)
allankliu 0:b6fdeddc0bc9 251 FAST_SQUARE_ASM_8
allankliu 0:b6fdeddc0bc9 252 #endif
allankliu 0:b6fdeddc0bc9 253
allankliu 0:b6fdeddc0bc9 254 "1: \n\t"
allankliu 0:b6fdeddc0bc9 255 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 256 : "+r" (r0), "+r" (r1)
allankliu 0:b6fdeddc0bc9 257 : "r" (r2)
allankliu 0:b6fdeddc0bc9 258 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 259 );
allankliu 0:b6fdeddc0bc9 260 }
allankliu 0:b6fdeddc0bc9 261 #define asm_square 1
allankliu 0:b6fdeddc0bc9 262 #endif /* uECC_SQUARE_FUNC */
allankliu 0:b6fdeddc0bc9 263
allankliu 0:b6fdeddc0bc9 264 #else /* (uECC_OPTIMIZATION_LEVEL > 3) */
allankliu 0:b6fdeddc0bc9 265
allankliu 0:b6fdeddc0bc9 266 uECC_VLI_API void uECC_vli_mult(uint32_t *result,
allankliu 0:b6fdeddc0bc9 267 const uint32_t *left,
allankliu 0:b6fdeddc0bc9 268 const uint32_t *right,
allankliu 0:b6fdeddc0bc9 269 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 270 register uint32_t *r0 __asm__("r0") = result;
allankliu 0:b6fdeddc0bc9 271 register const uint32_t *r1 __asm__("r1") = left;
allankliu 0:b6fdeddc0bc9 272 register const uint32_t *r2 __asm__("r2") = right;
allankliu 0:b6fdeddc0bc9 273 register uint32_t r3 __asm__("r3") = num_words;
allankliu 0:b6fdeddc0bc9 274
allankliu 0:b6fdeddc0bc9 275 #if uECC_SUPPORTS_secp160r1
allankliu 0:b6fdeddc0bc9 276 if (num_words == 5) {
allankliu 0:b6fdeddc0bc9 277 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 278 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 279 FAST_MULT_ASM_5
allankliu 0:b6fdeddc0bc9 280 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 281 : "+r" (r0), "+r" (r1), "+r" (r2)
allankliu 0:b6fdeddc0bc9 282 : "r" (r3)
allankliu 0:b6fdeddc0bc9 283 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 284 );
allankliu 0:b6fdeddc0bc9 285 return;
allankliu 0:b6fdeddc0bc9 286 }
allankliu 0:b6fdeddc0bc9 287 #endif
allankliu 0:b6fdeddc0bc9 288 #if uECC_SUPPORTS_secp192r1
allankliu 0:b6fdeddc0bc9 289 if (num_words == 6) {
allankliu 0:b6fdeddc0bc9 290 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 291 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 292 FAST_MULT_ASM_6
allankliu 0:b6fdeddc0bc9 293 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 294 : "+r" (r0), "+r" (r1), "+r" (r2)
allankliu 0:b6fdeddc0bc9 295 : "r" (r3)
allankliu 0:b6fdeddc0bc9 296 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 297 );
allankliu 0:b6fdeddc0bc9 298 return;
allankliu 0:b6fdeddc0bc9 299 }
allankliu 0:b6fdeddc0bc9 300 #endif
allankliu 0:b6fdeddc0bc9 301 #if uECC_SUPPORTS_secp224r1
allankliu 0:b6fdeddc0bc9 302 if (num_words == 7) {
allankliu 0:b6fdeddc0bc9 303 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 304 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 305 FAST_MULT_ASM_7
allankliu 0:b6fdeddc0bc9 306 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 307 : "+r" (r0), "+r" (r1), "+r" (r2)
allankliu 0:b6fdeddc0bc9 308 : "r" (r3)
allankliu 0:b6fdeddc0bc9 309 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 310 );
allankliu 0:b6fdeddc0bc9 311 return;
allankliu 0:b6fdeddc0bc9 312 }
allankliu 0:b6fdeddc0bc9 313 #endif
allankliu 0:b6fdeddc0bc9 314 #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
allankliu 0:b6fdeddc0bc9 315 if (num_words == 8) {
allankliu 0:b6fdeddc0bc9 316 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 317 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 318 FAST_MULT_ASM_8
allankliu 0:b6fdeddc0bc9 319 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 320 : "+r" (r0), "+r" (r1), "+r" (r2)
allankliu 0:b6fdeddc0bc9 321 : "r" (r3)
allankliu 0:b6fdeddc0bc9 322 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 323 );
allankliu 0:b6fdeddc0bc9 324 return;
allankliu 0:b6fdeddc0bc9 325 }
allankliu 0:b6fdeddc0bc9 326 #endif
allankliu 0:b6fdeddc0bc9 327 }
allankliu 0:b6fdeddc0bc9 328 #define asm_mult 1
allankliu 0:b6fdeddc0bc9 329
allankliu 0:b6fdeddc0bc9 330 #if uECC_SQUARE_FUNC
allankliu 0:b6fdeddc0bc9 331 uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 332 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 333 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 334 register uint32_t *r0 __asm__("r0") = result;
allankliu 0:b6fdeddc0bc9 335 register const uint32_t *r1 __asm__("r1") = left;
allankliu 0:b6fdeddc0bc9 336 register uint32_t r2 __asm__("r2") = num_words;
allankliu 0:b6fdeddc0bc9 337
allankliu 0:b6fdeddc0bc9 338 #if uECC_SUPPORTS_secp160r1
allankliu 0:b6fdeddc0bc9 339 if (num_words == 5) {
allankliu 0:b6fdeddc0bc9 340 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 341 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 342 FAST_SQUARE_ASM_5
allankliu 0:b6fdeddc0bc9 343 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 344 : "+r" (r0), "+r" (r1)
allankliu 0:b6fdeddc0bc9 345 : "r" (r2)
allankliu 0:b6fdeddc0bc9 346 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 347 );
allankliu 0:b6fdeddc0bc9 348 return;
allankliu 0:b6fdeddc0bc9 349 }
allankliu 0:b6fdeddc0bc9 350 #endif
allankliu 0:b6fdeddc0bc9 351 #if uECC_SUPPORTS_secp192r1
allankliu 0:b6fdeddc0bc9 352 if (num_words == 6) {
allankliu 0:b6fdeddc0bc9 353 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 354 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 355 FAST_SQUARE_ASM_6
allankliu 0:b6fdeddc0bc9 356 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 357 : "+r" (r0), "+r" (r1)
allankliu 0:b6fdeddc0bc9 358 : "r" (r2)
allankliu 0:b6fdeddc0bc9 359 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 360 );
allankliu 0:b6fdeddc0bc9 361 return;
allankliu 0:b6fdeddc0bc9 362 }
allankliu 0:b6fdeddc0bc9 363 #endif
allankliu 0:b6fdeddc0bc9 364 #if uECC_SUPPORTS_secp224r1
allankliu 0:b6fdeddc0bc9 365 if (num_words == 7) {
allankliu 0:b6fdeddc0bc9 366 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 367 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 368 FAST_SQUARE_ASM_7
allankliu 0:b6fdeddc0bc9 369 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 370 : "+r" (r0), "+r" (r1)
allankliu 0:b6fdeddc0bc9 371 : "r" (r2)
allankliu 0:b6fdeddc0bc9 372 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 373 );
allankliu 0:b6fdeddc0bc9 374 return;
allankliu 0:b6fdeddc0bc9 375 }
allankliu 0:b6fdeddc0bc9 376 #endif
allankliu 0:b6fdeddc0bc9 377 #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
allankliu 0:b6fdeddc0bc9 378 if (num_words == 8) {
allankliu 0:b6fdeddc0bc9 379 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 380 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 381 FAST_SQUARE_ASM_8
allankliu 0:b6fdeddc0bc9 382 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 383 : "+r" (r0), "+r" (r1)
allankliu 0:b6fdeddc0bc9 384 : "r" (r2)
allankliu 0:b6fdeddc0bc9 385 : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 386 );
allankliu 0:b6fdeddc0bc9 387 return;
allankliu 0:b6fdeddc0bc9 388 }
allankliu 0:b6fdeddc0bc9 389 #endif
allankliu 0:b6fdeddc0bc9 390 }
allankliu 0:b6fdeddc0bc9 391 #define asm_square 1
allankliu 0:b6fdeddc0bc9 392 #endif /* uECC_SQUARE_FUNC */
allankliu 0:b6fdeddc0bc9 393
allankliu 0:b6fdeddc0bc9 394 #endif /* (uECC_OPTIMIZATION_LEVEL > 3) */
allankliu 0:b6fdeddc0bc9 395
allankliu 0:b6fdeddc0bc9 396 #endif /* uECC_PLATFORM != uECC_arm_thumb */
allankliu 0:b6fdeddc0bc9 397
allankliu 0:b6fdeddc0bc9 398 #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */
allankliu 0:b6fdeddc0bc9 399
allankliu 0:b6fdeddc0bc9 400 /* ---- "Small" implementations ---- */
allankliu 0:b6fdeddc0bc9 401
allankliu 0:b6fdeddc0bc9 402 #if !asm_add
allankliu 0:b6fdeddc0bc9 403 uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 404 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 405 const uECC_word_t *right,
allankliu 0:b6fdeddc0bc9 406 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 407 uint32_t carry = 0;
allankliu 0:b6fdeddc0bc9 408 uint32_t left_word;
allankliu 0:b6fdeddc0bc9 409 uint32_t right_word;
allankliu 0:b6fdeddc0bc9 410
allankliu 0:b6fdeddc0bc9 411 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 412 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 413 "1: \n\t"
allankliu 0:b6fdeddc0bc9 414 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
allankliu 0:b6fdeddc0bc9 415 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
allankliu 0:b6fdeddc0bc9 416 "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */
allankliu 0:b6fdeddc0bc9 417 "adcs %[left], %[left], %[right] \n\t" /* Add with carry. */
allankliu 0:b6fdeddc0bc9 418 "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
allankliu 0:b6fdeddc0bc9 419 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
allankliu 0:b6fdeddc0bc9 420 "subs %[ctr], #1 \n\t" /* Decrement counter. */
allankliu 0:b6fdeddc0bc9 421 "bne 1b \n\t" /* Loop until counter == 0. */
allankliu 0:b6fdeddc0bc9 422 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 423 : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
allankliu 0:b6fdeddc0bc9 424 [ctr] REG_RW (num_words), [carry] REG_RW (carry),
allankliu 0:b6fdeddc0bc9 425 [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
allankliu 0:b6fdeddc0bc9 426 :
allankliu 0:b6fdeddc0bc9 427 : "cc", "memory"
allankliu 0:b6fdeddc0bc9 428 );
allankliu 0:b6fdeddc0bc9 429 return carry;
allankliu 0:b6fdeddc0bc9 430 }
allankliu 0:b6fdeddc0bc9 431 #define asm_add 1
allankliu 0:b6fdeddc0bc9 432 #endif
allankliu 0:b6fdeddc0bc9 433
allankliu 0:b6fdeddc0bc9 434 #if !asm_sub
allankliu 0:b6fdeddc0bc9 435 uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 436 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 437 const uECC_word_t *right,
allankliu 0:b6fdeddc0bc9 438 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 439 uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */
allankliu 0:b6fdeddc0bc9 440 uint32_t left_word;
allankliu 0:b6fdeddc0bc9 441 uint32_t right_word;
allankliu 0:b6fdeddc0bc9 442
allankliu 0:b6fdeddc0bc9 443 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 444 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 445 "1: \n\t"
allankliu 0:b6fdeddc0bc9 446 "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */
allankliu 0:b6fdeddc0bc9 447 "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
allankliu 0:b6fdeddc0bc9 448 "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */
allankliu 0:b6fdeddc0bc9 449 "sbcs %[left], %[left], %[right] \n\t" /* Subtract with borrow. */
allankliu 0:b6fdeddc0bc9 450 "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
allankliu 0:b6fdeddc0bc9 451 "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */
allankliu 0:b6fdeddc0bc9 452 "subs %[ctr], #1 \n\t" /* Decrement counter. */
allankliu 0:b6fdeddc0bc9 453 "bne 1b \n\t" /* Loop until counter == 0. */
allankliu 0:b6fdeddc0bc9 454 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 455 : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
allankliu 0:b6fdeddc0bc9 456 [ctr] REG_RW (num_words), [carry] REG_RW (carry),
allankliu 0:b6fdeddc0bc9 457 [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
allankliu 0:b6fdeddc0bc9 458 :
allankliu 0:b6fdeddc0bc9 459 : "cc", "memory"
allankliu 0:b6fdeddc0bc9 460 );
allankliu 0:b6fdeddc0bc9 461 return !carry;
allankliu 0:b6fdeddc0bc9 462 }
allankliu 0:b6fdeddc0bc9 463 #define asm_sub 1
allankliu 0:b6fdeddc0bc9 464 #endif
allankliu 0:b6fdeddc0bc9 465
allankliu 0:b6fdeddc0bc9 466 #if !asm_mult
allankliu 0:b6fdeddc0bc9 467 uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 468 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 469 const uECC_word_t *right,
allankliu 0:b6fdeddc0bc9 470 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 471 #if (uECC_PLATFORM != uECC_arm_thumb)
allankliu 0:b6fdeddc0bc9 472 uint32_t c0 = 0;
allankliu 0:b6fdeddc0bc9 473 uint32_t c1 = 0;
allankliu 0:b6fdeddc0bc9 474 uint32_t c2 = 0;
allankliu 0:b6fdeddc0bc9 475 uint32_t k = 0;
allankliu 0:b6fdeddc0bc9 476 uint32_t i;
allankliu 0:b6fdeddc0bc9 477 uint32_t t0, t1;
allankliu 0:b6fdeddc0bc9 478
allankliu 0:b6fdeddc0bc9 479 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 480 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 481
allankliu 0:b6fdeddc0bc9 482 "1: \n\t" /* outer loop (k < num_words) */
allankliu 0:b6fdeddc0bc9 483 "movs %[i], #0 \n\t" /* i = 0 */
allankliu 0:b6fdeddc0bc9 484 "b 3f \n\t"
allankliu 0:b6fdeddc0bc9 485
allankliu 0:b6fdeddc0bc9 486 "2: \n\t" /* outer loop (k >= num_words) */
allankliu 0:b6fdeddc0bc9 487 "movs %[i], %[k] \n\t" /* i = k */
allankliu 0:b6fdeddc0bc9 488 "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
allankliu 0:b6fdeddc0bc9 489
allankliu 0:b6fdeddc0bc9 490 "3: \n\t" /* inner loop */
allankliu 0:b6fdeddc0bc9 491 "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
allankliu 0:b6fdeddc0bc9 492
allankliu 0:b6fdeddc0bc9 493 "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */
allankliu 0:b6fdeddc0bc9 494 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */
allankliu 0:b6fdeddc0bc9 495
allankliu 0:b6fdeddc0bc9 496 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
allankliu 0:b6fdeddc0bc9 497
allankliu 0:b6fdeddc0bc9 498 "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
allankliu 0:b6fdeddc0bc9 499 "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
allankliu 0:b6fdeddc0bc9 500 "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */
allankliu 0:b6fdeddc0bc9 501
allankliu 0:b6fdeddc0bc9 502 "adds %[i], #4 \n\t" /* i += 4 */
allankliu 0:b6fdeddc0bc9 503 "cmp %[i], %[last_word] \n\t" /* i > (num_words - 1) (times 4)? */
allankliu 0:b6fdeddc0bc9 504 "bgt 4f \n\t" /* if so, exit the loop */
allankliu 0:b6fdeddc0bc9 505 "cmp %[i], %[k] \n\t" /* i <= k? */
allankliu 0:b6fdeddc0bc9 506 "ble 3b \n\t" /* if so, continue looping */
allankliu 0:b6fdeddc0bc9 507
allankliu 0:b6fdeddc0bc9 508 "4: \n\t" /* end inner loop */
allankliu 0:b6fdeddc0bc9 509
allankliu 0:b6fdeddc0bc9 510 "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
allankliu 0:b6fdeddc0bc9 511 "mov %[c0], %[c1] \n\t" /* c0 = c1 */
allankliu 0:b6fdeddc0bc9 512 "mov %[c1], %[c2] \n\t" /* c1 = c2 */
allankliu 0:b6fdeddc0bc9 513 "movs %[c2], #0 \n\t" /* c2 = 0 */
allankliu 0:b6fdeddc0bc9 514 "adds %[k], #4 \n\t" /* k += 4 */
allankliu 0:b6fdeddc0bc9 515 "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
allankliu 0:b6fdeddc0bc9 516 "ble 1b \n\t" /* if so, loop back, start with i = 0 */
allankliu 0:b6fdeddc0bc9 517 "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
allankliu 0:b6fdeddc0bc9 518 "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */
allankliu 0:b6fdeddc0bc9 519 /* end outer loop */
allankliu 0:b6fdeddc0bc9 520
allankliu 0:b6fdeddc0bc9 521 "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
allankliu 0:b6fdeddc0bc9 522 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 523 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
allankliu 0:b6fdeddc0bc9 524 [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
allankliu 0:b6fdeddc0bc9 525 : [result] "r" (result), [left] "r" (left), [right] "r" (right),
allankliu 0:b6fdeddc0bc9 526 [last_word] "r" ((num_words - 1) * 4)
allankliu 0:b6fdeddc0bc9 527 : "cc", "memory"
allankliu 0:b6fdeddc0bc9 528 );
allankliu 0:b6fdeddc0bc9 529
allankliu 0:b6fdeddc0bc9 530 #else /* Thumb-1 */
allankliu 0:b6fdeddc0bc9 531 uint32_t r4, r5, r6, r7;
allankliu 0:b6fdeddc0bc9 532
allankliu 0:b6fdeddc0bc9 533 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 534 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 535 "subs %[r3], #1 \n\t" /* r3 = num_words - 1 */
allankliu 0:b6fdeddc0bc9 536 "lsls %[r3], #2 \n\t" /* r3 = (num_words - 1) * 4 */
allankliu 0:b6fdeddc0bc9 537 "mov r8, %[r3] \n\t" /* r8 = (num_words - 1) * 4 */
allankliu 0:b6fdeddc0bc9 538 "lsls %[r3], #1 \n\t" /* r3 = (num_words - 1) * 8 */
allankliu 0:b6fdeddc0bc9 539 "mov r9, %[r3] \n\t" /* r9 = (num_words - 1) * 8 */
allankliu 0:b6fdeddc0bc9 540 "movs %[r3], #0 \n\t" /* c0 = 0 */
allankliu 0:b6fdeddc0bc9 541 "movs %[r4], #0 \n\t" /* c1 = 0 */
allankliu 0:b6fdeddc0bc9 542 "movs %[r5], #0 \n\t" /* c2 = 0 */
allankliu 0:b6fdeddc0bc9 543 "movs %[r6], #0 \n\t" /* k = 0 */
allankliu 0:b6fdeddc0bc9 544
allankliu 0:b6fdeddc0bc9 545 "push {%[r0]} \n\t" /* keep result on the stack */
allankliu 0:b6fdeddc0bc9 546
allankliu 0:b6fdeddc0bc9 547 "1: \n\t" /* outer loop (k < num_words) */
allankliu 0:b6fdeddc0bc9 548 "movs %[r7], #0 \n\t" /* r7 = i = 0 */
allankliu 0:b6fdeddc0bc9 549 "b 3f \n\t"
allankliu 0:b6fdeddc0bc9 550
allankliu 0:b6fdeddc0bc9 551 "2: \n\t" /* outer loop (k >= num_words) */
allankliu 0:b6fdeddc0bc9 552 "movs %[r7], %[r6] \n\t" /* r7 = k */
allankliu 0:b6fdeddc0bc9 553 "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */
allankliu 0:b6fdeddc0bc9 554 "subs %[r7], %[r0] \n\t" /* r7 = i = k - (num_words - 1) (times 4) */
allankliu 0:b6fdeddc0bc9 555
allankliu 0:b6fdeddc0bc9 556 "3: \n\t" /* inner loop */
allankliu 0:b6fdeddc0bc9 557 "mov r10, %[r3] \n\t"
allankliu 0:b6fdeddc0bc9 558 "mov r11, %[r4] \n\t"
allankliu 0:b6fdeddc0bc9 559 "mov r12, %[r5] \n\t"
allankliu 0:b6fdeddc0bc9 560 "mov r14, %[r6] \n\t"
allankliu 0:b6fdeddc0bc9 561 "subs %[r0], %[r6], %[r7] \n\t" /* r0 = k - i */
allankliu 0:b6fdeddc0bc9 562
allankliu 0:b6fdeddc0bc9 563 "ldr %[r4], [%[r2], %[r0]] \n\t" /* r4 = right[k - i] */
allankliu 0:b6fdeddc0bc9 564 "ldr %[r0], [%[r1], %[r7]] \n\t" /* r0 = left[i] */
allankliu 0:b6fdeddc0bc9 565
allankliu 0:b6fdeddc0bc9 566 "lsrs %[r3], %[r0], #16 \n\t" /* r3 = a1 */
allankliu 0:b6fdeddc0bc9 567 "uxth %[r0], %[r0] \n\t" /* r0 = a0 */
allankliu 0:b6fdeddc0bc9 568
allankliu 0:b6fdeddc0bc9 569 "lsrs %[r5], %[r4], #16 \n\t" /* r5 = b1 */
allankliu 0:b6fdeddc0bc9 570 "uxth %[r4], %[r4] \n\t" /* r4 = b0 */
allankliu 0:b6fdeddc0bc9 571
allankliu 0:b6fdeddc0bc9 572 "movs %[r6], %[r3] \n\t" /* r6 = a1 */
allankliu 0:b6fdeddc0bc9 573 "muls %[r6], %[r5], %[r6] \n\t" /* r6 = a1 * b1 */
allankliu 0:b6fdeddc0bc9 574 "muls %[r3], %[r4], %[r3] \n\t" /* r3 = b0 * a1 */
allankliu 0:b6fdeddc0bc9 575 "muls %[r5], %[r0], %[r5] \n\t" /* r5 = a0 * b1 */
allankliu 0:b6fdeddc0bc9 576 "muls %[r0], %[r4], %[r0] \n\t" /* r0 = a0 * b0 */
allankliu 0:b6fdeddc0bc9 577
allankliu 0:b6fdeddc0bc9 578 /* Add middle terms */
allankliu 0:b6fdeddc0bc9 579 "lsls %[r4], %[r3], #16 \n\t"
allankliu 0:b6fdeddc0bc9 580 "lsrs %[r3], %[r3], #16 \n\t"
allankliu 0:b6fdeddc0bc9 581 "adds %[r0], %[r4] \n\t"
allankliu 0:b6fdeddc0bc9 582 "adcs %[r6], %[r3] \n\t"
allankliu 0:b6fdeddc0bc9 583
allankliu 0:b6fdeddc0bc9 584 "lsls %[r4], %[r5], #16 \n\t"
allankliu 0:b6fdeddc0bc9 585 "lsrs %[r5], %[r5], #16 \n\t"
allankliu 0:b6fdeddc0bc9 586 "adds %[r0], %[r4] \n\t"
allankliu 0:b6fdeddc0bc9 587 "adcs %[r6], %[r5] \n\t"
allankliu 0:b6fdeddc0bc9 588
allankliu 0:b6fdeddc0bc9 589 "mov %[r3], r10\n\t"
allankliu 0:b6fdeddc0bc9 590 "mov %[r4], r11\n\t"
allankliu 0:b6fdeddc0bc9 591 "mov %[r5], r12\n\t"
allankliu 0:b6fdeddc0bc9 592 "adds %[r3], %[r0] \n\t" /* add low word to c0 */
allankliu 0:b6fdeddc0bc9 593 "adcs %[r4], %[r6] \n\t" /* add high word to c1, including carry */
allankliu 0:b6fdeddc0bc9 594 "movs %[r0], #0 \n\t" /* r0 = 0 (does not affect carry bit) */
allankliu 0:b6fdeddc0bc9 595 "adcs %[r5], %[r0] \n\t" /* add carry to c2 */
allankliu 0:b6fdeddc0bc9 596
allankliu 0:b6fdeddc0bc9 597 "mov %[r6], r14\n\t" /* r6 = k */
allankliu 0:b6fdeddc0bc9 598
allankliu 0:b6fdeddc0bc9 599 "adds %[r7], #4 \n\t" /* i += 4 */
allankliu 0:b6fdeddc0bc9 600 "cmp %[r7], r8 \n\t" /* i > (num_words - 1) (times 4)? */
allankliu 0:b6fdeddc0bc9 601 "bgt 4f \n\t" /* if so, exit the loop */
allankliu 0:b6fdeddc0bc9 602 "cmp %[r7], %[r6] \n\t" /* i <= k? */
allankliu 0:b6fdeddc0bc9 603 "ble 3b \n\t" /* if so, continue looping */
allankliu 0:b6fdeddc0bc9 604
allankliu 0:b6fdeddc0bc9 605 "4: \n\t" /* end inner loop */
allankliu 0:b6fdeddc0bc9 606
allankliu 0:b6fdeddc0bc9 607 "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
allankliu 0:b6fdeddc0bc9 608
allankliu 0:b6fdeddc0bc9 609 "str %[r3], [%[r0], %[r6]] \n\t" /* result[k] = c0 */
allankliu 0:b6fdeddc0bc9 610 "mov %[r3], %[r4] \n\t" /* c0 = c1 */
allankliu 0:b6fdeddc0bc9 611 "mov %[r4], %[r5] \n\t" /* c1 = c2 */
allankliu 0:b6fdeddc0bc9 612 "movs %[r5], #0 \n\t" /* c2 = 0 */
allankliu 0:b6fdeddc0bc9 613 "adds %[r6], #4 \n\t" /* k += 4 */
allankliu 0:b6fdeddc0bc9 614 "cmp %[r6], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */
allankliu 0:b6fdeddc0bc9 615 "ble 1b \n\t" /* if so, loop back, start with i = 0 */
allankliu 0:b6fdeddc0bc9 616 "cmp %[r6], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
allankliu 0:b6fdeddc0bc9 617 "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */
allankliu 0:b6fdeddc0bc9 618 /* end outer loop */
allankliu 0:b6fdeddc0bc9 619
allankliu 0:b6fdeddc0bc9 620 "str %[r3], [%[r0], %[r6]] \n\t" /* result[num_words * 2 - 1] = c0 */
allankliu 0:b6fdeddc0bc9 621 "pop {%[r0]} \n\t" /* pop result off the stack */
allankliu 0:b6fdeddc0bc9 622
allankliu 0:b6fdeddc0bc9 623 ".syntax divided \n\t"
allankliu 0:b6fdeddc0bc9 624 : [r3] "+l" (num_words), [r4] "=&l" (r4),
allankliu 0:b6fdeddc0bc9 625 [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
allankliu 0:b6fdeddc0bc9 626 : [r0] "l" (result), [r1] "l" (left), [r2] "l" (right)
allankliu 0:b6fdeddc0bc9 627 : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 628 );
allankliu 0:b6fdeddc0bc9 629 #endif
allankliu 0:b6fdeddc0bc9 630 }
allankliu 0:b6fdeddc0bc9 631 #define asm_mult 1
allankliu 0:b6fdeddc0bc9 632 #endif
allankliu 0:b6fdeddc0bc9 633
allankliu 0:b6fdeddc0bc9 634 #if uECC_SQUARE_FUNC
allankliu 0:b6fdeddc0bc9 635 #if !asm_square
allankliu 0:b6fdeddc0bc9 636 uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
allankliu 0:b6fdeddc0bc9 637 const uECC_word_t *left,
allankliu 0:b6fdeddc0bc9 638 wordcount_t num_words) {
allankliu 0:b6fdeddc0bc9 639 #if (uECC_PLATFORM != uECC_arm_thumb)
allankliu 0:b6fdeddc0bc9 640 uint32_t c0 = 0;
allankliu 0:b6fdeddc0bc9 641 uint32_t c1 = 0;
allankliu 0:b6fdeddc0bc9 642 uint32_t c2 = 0;
allankliu 0:b6fdeddc0bc9 643 uint32_t k = 0;
allankliu 0:b6fdeddc0bc9 644 uint32_t i, tt;
allankliu 0:b6fdeddc0bc9 645 uint32_t t0, t1;
allankliu 0:b6fdeddc0bc9 646
allankliu 0:b6fdeddc0bc9 647 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 648 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 649
allankliu 0:b6fdeddc0bc9 650 "1: \n\t" /* outer loop (k < num_words) */
allankliu 0:b6fdeddc0bc9 651 "movs %[i], #0 \n\t" /* i = 0 */
allankliu 0:b6fdeddc0bc9 652 "b 3f \n\t"
allankliu 0:b6fdeddc0bc9 653
allankliu 0:b6fdeddc0bc9 654 "2: \n\t" /* outer loop (k >= num_words) */
allankliu 0:b6fdeddc0bc9 655 "movs %[i], %[k] \n\t" /* i = k */
allankliu 0:b6fdeddc0bc9 656 "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
allankliu 0:b6fdeddc0bc9 657
allankliu 0:b6fdeddc0bc9 658 "3: \n\t" /* inner loop */
allankliu 0:b6fdeddc0bc9 659 "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
allankliu 0:b6fdeddc0bc9 660
allankliu 0:b6fdeddc0bc9 661 "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */
allankliu 0:b6fdeddc0bc9 662 "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */
allankliu 0:b6fdeddc0bc9 663
allankliu 0:b6fdeddc0bc9 664 "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
allankliu 0:b6fdeddc0bc9 665
allankliu 0:b6fdeddc0bc9 666 "cmp %[i], %[tt] \n\t" /* (i < k - i) ? */
allankliu 0:b6fdeddc0bc9 667 "bge 4f \n\t" /* if i >= k - i, skip */
allankliu 0:b6fdeddc0bc9 668 "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
allankliu 0:b6fdeddc0bc9 669 "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
allankliu 0:b6fdeddc0bc9 670 "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */
allankliu 0:b6fdeddc0bc9 671
allankliu 0:b6fdeddc0bc9 672 "4: \n\t"
allankliu 0:b6fdeddc0bc9 673 "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
allankliu 0:b6fdeddc0bc9 674 "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
allankliu 0:b6fdeddc0bc9 675 "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */
allankliu 0:b6fdeddc0bc9 676
allankliu 0:b6fdeddc0bc9 677 "adds %[i], #4 \n\t" /* i += 4 */
allankliu 0:b6fdeddc0bc9 678 "cmp %[i], %[k] \n\t" /* i >= k? */
allankliu 0:b6fdeddc0bc9 679 "bge 5f \n\t" /* if so, exit the loop */
allankliu 0:b6fdeddc0bc9 680 "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */
allankliu 0:b6fdeddc0bc9 681 "cmp %[i], %[tt] \n\t" /* i <= k - i? */
allankliu 0:b6fdeddc0bc9 682 "ble 3b \n\t" /* if so, continue looping */
allankliu 0:b6fdeddc0bc9 683
allankliu 0:b6fdeddc0bc9 684 "5: \n\t" /* end inner loop */
allankliu 0:b6fdeddc0bc9 685
allankliu 0:b6fdeddc0bc9 686 "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
allankliu 0:b6fdeddc0bc9 687 "mov %[c0], %[c1] \n\t" /* c0 = c1 */
allankliu 0:b6fdeddc0bc9 688 "mov %[c1], %[c2] \n\t" /* c1 = c2 */
allankliu 0:b6fdeddc0bc9 689 "movs %[c2], #0 \n\t" /* c2 = 0 */
allankliu 0:b6fdeddc0bc9 690 "adds %[k], #4 \n\t" /* k += 4 */
allankliu 0:b6fdeddc0bc9 691 "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
allankliu 0:b6fdeddc0bc9 692 "ble 1b \n\t" /* if so, loop back, start with i = 0 */
allankliu 0:b6fdeddc0bc9 693 "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
allankliu 0:b6fdeddc0bc9 694 "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */
allankliu 0:b6fdeddc0bc9 695 /* end outer loop */
allankliu 0:b6fdeddc0bc9 696
allankliu 0:b6fdeddc0bc9 697 "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
allankliu 0:b6fdeddc0bc9 698 RESUME_SYNTAX
allankliu 0:b6fdeddc0bc9 699 : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
allankliu 0:b6fdeddc0bc9 700 [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
allankliu 0:b6fdeddc0bc9 701 : [result] "r" (result), [left] "r" (left), [last_word] "r" ((num_words - 1) * 4)
allankliu 0:b6fdeddc0bc9 702 : "cc", "memory"
allankliu 0:b6fdeddc0bc9 703 );
allankliu 0:b6fdeddc0bc9 704
allankliu 0:b6fdeddc0bc9 705 #else
allankliu 0:b6fdeddc0bc9 706 uint32_t r3, r4, r5, r6, r7;
allankliu 0:b6fdeddc0bc9 707
allankliu 0:b6fdeddc0bc9 708 __asm__ volatile (
allankliu 0:b6fdeddc0bc9 709 ".syntax unified \n\t"
allankliu 0:b6fdeddc0bc9 710 "subs %[r2], #1 \n\t" /* r2 = num_words - 1 */
allankliu 0:b6fdeddc0bc9 711 "lsls %[r2], #2 \n\t" /* r2 = (num_words - 1) * 4 */
allankliu 0:b6fdeddc0bc9 712 "mov r8, %[r2] \n\t" /* r8 = (num_words - 1) * 4 */
allankliu 0:b6fdeddc0bc9 713 "lsls %[r2], #1 \n\t" /* r2 = (num_words - 1) * 8 */
allankliu 0:b6fdeddc0bc9 714 "mov r9, %[r2] \n\t" /* r9 = (num_words - 1) * 8 */
allankliu 0:b6fdeddc0bc9 715 "movs %[r2], #0 \n\t" /* c0 = 0 */
allankliu 0:b6fdeddc0bc9 716 "movs %[r3], #0 \n\t" /* c1 = 0 */
allankliu 0:b6fdeddc0bc9 717 "movs %[r4], #0 \n\t" /* c2 = 0 */
allankliu 0:b6fdeddc0bc9 718 "movs %[r5], #0 \n\t" /* k = 0 */
allankliu 0:b6fdeddc0bc9 719
allankliu 0:b6fdeddc0bc9 720 "push {%[r0]} \n\t" /* keep result on the stack */
allankliu 0:b6fdeddc0bc9 721
allankliu 0:b6fdeddc0bc9 722 "1: \n\t" /* outer loop (k < num_words) */
allankliu 0:b6fdeddc0bc9 723 "movs %[r6], #0 \n\t" /* r6 = i = 0 */
allankliu 0:b6fdeddc0bc9 724 "b 3f \n\t"
allankliu 0:b6fdeddc0bc9 725
allankliu 0:b6fdeddc0bc9 726 "2: \n\t" /* outer loop (k >= num_words) */
allankliu 0:b6fdeddc0bc9 727 "movs %[r6], %[r5] \n\t" /* r6 = k */
allankliu 0:b6fdeddc0bc9 728 "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */
allankliu 0:b6fdeddc0bc9 729 "subs %[r6], %[r0] \n\t" /* r6 = i = k - (num_words - 1) (times 4) */
allankliu 0:b6fdeddc0bc9 730
allankliu 0:b6fdeddc0bc9 731 "3: \n\t" /* inner loop */
allankliu 0:b6fdeddc0bc9 732 "mov r10, %[r2] \n\t"
allankliu 0:b6fdeddc0bc9 733 "mov r11, %[r3] \n\t"
allankliu 0:b6fdeddc0bc9 734 "mov r12, %[r4] \n\t"
allankliu 0:b6fdeddc0bc9 735 "mov r14, %[r5] \n\t"
allankliu 0:b6fdeddc0bc9 736 "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */
allankliu 0:b6fdeddc0bc9 737
allankliu 0:b6fdeddc0bc9 738 "ldr %[r3], [%[r1], %[r7]] \n\t" /* r3 = left[k - i] */
allankliu 0:b6fdeddc0bc9 739 "ldr %[r0], [%[r1], %[r6]] \n\t" /* r0 = left[i] */
allankliu 0:b6fdeddc0bc9 740
allankliu 0:b6fdeddc0bc9 741 "lsrs %[r2], %[r0], #16 \n\t" /* r2 = a1 */
allankliu 0:b6fdeddc0bc9 742 "uxth %[r0], %[r0] \n\t" /* r0 = a0 */
allankliu 0:b6fdeddc0bc9 743
allankliu 0:b6fdeddc0bc9 744 "lsrs %[r4], %[r3], #16 \n\t" /* r4 = b1 */
allankliu 0:b6fdeddc0bc9 745 "uxth %[r3], %[r3] \n\t" /* r3 = b0 */
allankliu 0:b6fdeddc0bc9 746
allankliu 0:b6fdeddc0bc9 747 "movs %[r5], %[r2] \n\t" /* r5 = a1 */
allankliu 0:b6fdeddc0bc9 748 "muls %[r5], %[r4], %[r5] \n\t" /* r5 = a1 * b1 */
allankliu 0:b6fdeddc0bc9 749 "muls %[r2], %[r3], %[r2] \n\t" /* r2 = b0 * a1 */
allankliu 0:b6fdeddc0bc9 750 "muls %[r4], %[r0], %[r4] \n\t" /* r4 = a0 * b1 */
allankliu 0:b6fdeddc0bc9 751 "muls %[r0], %[r3], %[r0] \n\t" /* r0 = a0 * b0 */
allankliu 0:b6fdeddc0bc9 752
allankliu 0:b6fdeddc0bc9 753 /* Add middle terms */
allankliu 0:b6fdeddc0bc9 754 "lsls %[r3], %[r2], #16 \n\t"
allankliu 0:b6fdeddc0bc9 755 "lsrs %[r2], %[r2], #16 \n\t"
allankliu 0:b6fdeddc0bc9 756 "adds %[r0], %[r3] \n\t"
allankliu 0:b6fdeddc0bc9 757 "adcs %[r5], %[r2] \n\t"
allankliu 0:b6fdeddc0bc9 758
allankliu 0:b6fdeddc0bc9 759 "lsls %[r3], %[r4], #16 \n\t"
allankliu 0:b6fdeddc0bc9 760 "lsrs %[r4], %[r4], #16 \n\t"
allankliu 0:b6fdeddc0bc9 761 "adds %[r0], %[r3] \n\t"
allankliu 0:b6fdeddc0bc9 762 "adcs %[r5], %[r4] \n\t"
allankliu 0:b6fdeddc0bc9 763
allankliu 0:b6fdeddc0bc9 764 /* Add to acc, doubling if necessary */
allankliu 0:b6fdeddc0bc9 765 "mov %[r2], r10\n\t"
allankliu 0:b6fdeddc0bc9 766 "mov %[r3], r11\n\t"
allankliu 0:b6fdeddc0bc9 767 "mov %[r4], r12\n\t"
allankliu 0:b6fdeddc0bc9 768
allankliu 0:b6fdeddc0bc9 769 "cmp %[r6], %[r7] \n\t" /* (i < k - i) ? */
allankliu 0:b6fdeddc0bc9 770 "bge 4f \n\t" /* if i >= k - i, skip */
allankliu 0:b6fdeddc0bc9 771 "movs %[r7], #0 \n\t" /* r7 = 0 */
allankliu 0:b6fdeddc0bc9 772 "adds %[r2], %[r0] \n\t" /* add low word to c0 */
allankliu 0:b6fdeddc0bc9 773 "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
allankliu 0:b6fdeddc0bc9 774 "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
allankliu 0:b6fdeddc0bc9 775 "4: \n\t"
allankliu 0:b6fdeddc0bc9 776 "movs %[r7], #0 \n\t" /* r7 = 0 */
allankliu 0:b6fdeddc0bc9 777 "adds %[r2], %[r0] \n\t" /* add low word to c0 */
allankliu 0:b6fdeddc0bc9 778 "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
allankliu 0:b6fdeddc0bc9 779 "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
allankliu 0:b6fdeddc0bc9 780
allankliu 0:b6fdeddc0bc9 781 "mov %[r5], r14\n\t" /* r5 = k */
allankliu 0:b6fdeddc0bc9 782
allankliu 0:b6fdeddc0bc9 783 "adds %[r6], #4 \n\t" /* i += 4 */
allankliu 0:b6fdeddc0bc9 784 "cmp %[r6], %[r5] \n\t" /* i >= k? */
allankliu 0:b6fdeddc0bc9 785 "bge 5f \n\t" /* if so, exit the loop */
allankliu 0:b6fdeddc0bc9 786 "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */
allankliu 0:b6fdeddc0bc9 787 "cmp %[r6], %[r7] \n\t" /* i <= k - i? */
allankliu 0:b6fdeddc0bc9 788 "ble 3b \n\t" /* if so, continue looping */
allankliu 0:b6fdeddc0bc9 789
allankliu 0:b6fdeddc0bc9 790 "5: \n\t" /* end inner loop */
allankliu 0:b6fdeddc0bc9 791
allankliu 0:b6fdeddc0bc9 792 "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
allankliu 0:b6fdeddc0bc9 793
allankliu 0:b6fdeddc0bc9 794 "str %[r2], [%[r0], %[r5]] \n\t" /* result[k] = c0 */
allankliu 0:b6fdeddc0bc9 795 "mov %[r2], %[r3] \n\t" /* c0 = c1 */
allankliu 0:b6fdeddc0bc9 796 "mov %[r3], %[r4] \n\t" /* c1 = c2 */
allankliu 0:b6fdeddc0bc9 797 "movs %[r4], #0 \n\t" /* c2 = 0 */
allankliu 0:b6fdeddc0bc9 798 "adds %[r5], #4 \n\t" /* k += 4 */
allankliu 0:b6fdeddc0bc9 799 "cmp %[r5], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */
allankliu 0:b6fdeddc0bc9 800 "ble 1b \n\t" /* if so, loop back, start with i = 0 */
allankliu 0:b6fdeddc0bc9 801 "cmp %[r5], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
allankliu 0:b6fdeddc0bc9 802 "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */
allankliu 0:b6fdeddc0bc9 803 /* end outer loop */
allankliu 0:b6fdeddc0bc9 804
allankliu 0:b6fdeddc0bc9 805 "str %[r2], [%[r0], %[r5]] \n\t" /* result[num_words * 2 - 1] = c0 */
allankliu 0:b6fdeddc0bc9 806 "pop {%[r0]} \n\t" /* pop result off the stack */
allankliu 0:b6fdeddc0bc9 807
allankliu 0:b6fdeddc0bc9 808 ".syntax divided \n\t"
allankliu 0:b6fdeddc0bc9 809 : [r2] "+l" (num_words), [r3] "=&l" (r3), [r4] "=&l" (r4),
allankliu 0:b6fdeddc0bc9 810 [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
allankliu 0:b6fdeddc0bc9 811 : [r0] "l" (result), [r1] "l" (left)
allankliu 0:b6fdeddc0bc9 812 : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
allankliu 0:b6fdeddc0bc9 813 );
allankliu 0:b6fdeddc0bc9 814 #endif
allankliu 0:b6fdeddc0bc9 815 }
allankliu 0:b6fdeddc0bc9 816 #define asm_square 1
allankliu 0:b6fdeddc0bc9 817 #endif
allankliu 0:b6fdeddc0bc9 818 #endif /* uECC_SQUARE_FUNC */
allankliu 0:b6fdeddc0bc9 819
allankliu 0:b6fdeddc0bc9 820 #endif /* _UECC_ASM_ARM_H_ */