Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of MiniTLS-GPL by
fp_mul_comba.c
00001 /* TomsFastMath, a fast ISO C bignum library. 00002 * 00003 * This project is meant to fill in where LibTomMath 00004 * falls short. That is speed ;-) 00005 * 00006 * This project is public domain and free for all purposes. 00007 * 00008 * Tom St Denis, tomstdenis@gmail.com 00009 */ 00010 00011 /* About this file... 00012 00013 */ 00014 00015 #include <tfm.h> 00016 00017 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2) 00018 #undef TFM_SSE2 00019 #define TFM_X86 00020 #endif 00021 00022 /* these are the combas. Worship them. */ 00023 #if defined(TFM_X86) 00024 /* Generic x86 optimized code */ 00025 00026 /* anything you need at the start */ 00027 #define COMBA_START 00028 00029 /* clear the chaining variables */ 00030 #define COMBA_CLEAR \ 00031 c0 = c1 = c2 = 0; 00032 00033 /* forward the carry to the next digit */ 00034 #define COMBA_FORWARD \ 00035 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00036 00037 /* store the first sum */ 00038 #define COMBA_STORE(x) \ 00039 x = c0; 00040 00041 /* store the second sum [carry] */ 00042 #define COMBA_STORE2(x) \ 00043 x = c1; 00044 00045 /* anything you need at the end */ 00046 #define COMBA_FINI 00047 00048 /* this should multiply i and j */ 00049 #define MULADD(i, j) \ 00050 asm( \ 00051 "movl %6,%%eax \n\t" \ 00052 "mull %7 \n\t" \ 00053 "addl %%eax,%0 \n\t" \ 00054 "adcl %%edx,%1 \n\t" \ 00055 "adcl $0,%2 \n\t" \ 00056 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00057 00058 #elif defined(TFM_X86_64) 00059 /* x86-64 optimized */ 00060 00061 /* anything you need at the start */ 00062 #define COMBA_START 00063 00064 /* clear the chaining variables */ 00065 #define COMBA_CLEAR \ 00066 c0 = c1 = c2 = 0; 00067 00068 /* forward the carry to the next digit */ 00069 #define COMBA_FORWARD \ 00070 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00071 00072 /* store the first sum */ 00073 #define COMBA_STORE(x) \ 00074 x = c0; 00075 00076 /* store the second sum [carry] */ 00077 #define COMBA_STORE2(x) \ 00078 x = c1; 00079 00080 /* anything you need at the end */ 00081 #define COMBA_FINI 00082 00083 /* this should multiply i and j */ 00084 #define MULADD(i, j) \ 00085 asm ( \ 00086 "movq %6,%%rax \n\t" \ 00087 "mulq %7 \n\t" \ 00088 "addq %%rax,%0 \n\t" \ 00089 "adcq %%rdx,%1 \n\t" \ 00090 "adcq $0,%2 \n\t" \ 00091 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00092 00093 #elif defined(TFM_SSE2) 00094 /* use SSE2 optimizations */ 00095 00096 /* anything you need at the start */ 00097 #define COMBA_START 00098 00099 /* clear the chaining variables */ 00100 #define COMBA_CLEAR \ 00101 c0 = c1 = c2 = 0; 00102 00103 /* forward the carry to the next digit */ 00104 #define COMBA_FORWARD \ 00105 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00106 00107 /* store the first sum */ 00108 #define COMBA_STORE(x) \ 00109 x = c0; 00110 00111 /* store the second sum [carry] */ 00112 #define COMBA_STORE2(x) \ 00113 x = c1; 00114 00115 /* anything you need at the end */ 00116 #define COMBA_FINI \ 00117 asm("emms"); 00118 00119 /* this should multiply i and j */ 00120 #define MULADD(i, j) \ 00121 asm( \ 00122 "movd %6,%%mm0 \n\t" \ 00123 "movd %7,%%mm1 \n\t" \ 00124 "pmuludq %%mm1,%%mm0\n\t" \ 00125 "movd %%mm0,%%eax \n\t" \ 00126 "psrlq $32,%%mm0 \n\t" \ 00127 "addl %%eax,%0 \n\t" \ 00128 "movd %%mm0,%%eax \n\t" \ 00129 "adcl %%eax,%1 \n\t" \ 00130 "adcl $0,%2 \n\t" \ 00131 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc"); 00132 00133 #elif defined(TFM_ARM) 00134 /* ARM code */ 00135 00136 #define COMBA_START 00137 00138 #define COMBA_CLEAR \ 00139 c0 = c1 = c2 = 0; 00140 00141 #define COMBA_FORWARD \ 00142 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00143 00144 #define COMBA_STORE(x) \ 00145 x = c0; 00146 00147 #define COMBA_STORE2(x) \ 00148 x = c1; 00149 00150 #define COMBA_FINI 00151 00152 #define MULADD(i, j) \ 00153 asm( \ 00154 " UMULL r0,r1,%6,%7 \n\t" \ 00155 " ADDS %0,%0,r0 \n\t" \ 00156 " ADCS %1,%1,r1 \n\t" \ 00157 " ADC %2,%2,#0 \n\t" \ 00158 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); 00159 00160 #elif defined(TFM_PPC32) 00161 /* For 32-bit PPC */ 00162 00163 #define COMBA_START 00164 00165 #define COMBA_CLEAR \ 00166 c0 = c1 = c2 = 0; 00167 00168 #define COMBA_FORWARD \ 00169 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00170 00171 #define COMBA_STORE(x) \ 00172 x = c0; 00173 00174 #define COMBA_STORE2(x) \ 00175 x = c1; 00176 00177 #define COMBA_FINI 00178 00179 /* untested: will mulhwu change the flags? Docs say no */ 00180 #define MULADD(i, j) \ 00181 asm( \ 00182 " mullw 16,%6,%7 \n\t" \ 00183 " addc %0,%0,16 \n\t" \ 00184 " mulhwu 16,%6,%7 \n\t" \ 00185 " adde %1,%1,16 \n\t" \ 00186 " addze %2,%2 \n\t" \ 00187 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 00188 00189 #elif defined(TFM_PPC64) 00190 /* For 64-bit PPC */ 00191 00192 #define COMBA_START 00193 00194 #define COMBA_CLEAR \ 00195 c0 = c1 = c2 = 0; 00196 00197 #define COMBA_FORWARD \ 00198 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00199 00200 #define COMBA_STORE(x) \ 00201 x = c0; 00202 00203 #define COMBA_STORE2(x) \ 00204 x = c1; 00205 00206 #define COMBA_FINI 00207 00208 /* untested: will mulhdu change the flags? Docs say no */ 00209 #define MULADD(i, j) \ 00210 asm( \ 00211 " mulld r16,%6,%7 \n\t" \ 00212 " addc %0,%0,16 \n\t" \ 00213 " mulhdu r16,%6,%7 \n\t" \ 00214 " adde %1,%1,16 \n\t" \ 00215 " addze %2,%2 \n\t" \ 00216 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16"); 00217 00218 #elif defined(TFM_AVR32) 00219 00220 /* ISO C code */ 00221 00222 #define COMBA_START 00223 00224 #define COMBA_CLEAR \ 00225 c0 = c1 = c2 = 0; 00226 00227 #define COMBA_FORWARD \ 00228 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00229 00230 #define COMBA_STORE(x) \ 00231 x = c0; 00232 00233 #define COMBA_STORE2(x) \ 00234 x = c1; 00235 00236 #define COMBA_FINI 00237 00238 #define MULADD(i, j) \ 00239 asm( \ 00240 " mulu.d r2,%6,%7 \n\t"\ 00241 " add %0,r2 \n\t"\ 00242 " adc %1,%1,r3 \n\t"\ 00243 " acr %2 \n\t"\ 00244 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); 00245 00246 #elif defined(TFM_MIPS) 00247 00248 #define COMBA_START 00249 00250 #define COMBA_CLEAR \ 00251 c0 = c1 = c2 = 0; 00252 00253 #define COMBA_FORWARD \ 00254 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00255 00256 #define COMBA_STORE(x) \ 00257 x = c0; 00258 00259 #define COMBA_STORE2(x) \ 00260 x = c1; 00261 00262 #define COMBA_FINI 00263 00264 #define MULADD(i, j) \ 00265 asm( \ 00266 " multu %6,%7 \n\t" \ 00267 " mflo $12 \n\t" \ 00268 " mfhi $13 \n\t" \ 00269 " addu %0,%0,$12 \n\t" \ 00270 " sltu $12,%0,$12 \n\t" \ 00271 " addu %1,%1,$13 \n\t" \ 00272 " sltu $13,%1,$13 \n\t" \ 00273 " addu %1,%1,$12 \n\t" \ 00274 " sltu $12,%1,$12 \n\t" \ 00275 " addu %2,%2,$13 \n\t" \ 00276 " addu %2,%2,$12 \n\t" \ 00277 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13"); 00278 00279 #else 00280 /* ISO C code */ 00281 00282 #define COMBA_START 00283 00284 #define COMBA_CLEAR \ 00285 c0 = c1 = c2 = 0; 00286 00287 #define COMBA_FORWARD \ 00288 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00289 00290 #define COMBA_STORE(x) \ 00291 x = c0; 00292 00293 #define COMBA_STORE2(x) \ 00294 x = c1; 00295 00296 #define COMBA_FINI 00297 00298 #define MULADD(i, j) \ 00299 do { fp_word t; \ 00300 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ 00301 t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ 00302 } while (0); 00303 00304 #endif 00305 00306 #ifndef TFM_DEFINES 00307 00308 /* generic PxQ multiplier */ 00309 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) 00310 { 00311 int ix, iy, iz, tx, ty, pa; 00312 fp_digit c0, c1, c2, *tmpx, *tmpy; 00313 fp_int tmp, *dst; 00314 00315 COMBA_START; 00316 COMBA_CLEAR; 00317 00318 /* get size of output and trim */ 00319 pa = A->used + B->used; 00320 if (pa >= FP_SIZE) { 00321 pa = FP_SIZE-1; 00322 } 00323 00324 if (A == C || B == C) { 00325 fp_zero(&tmp); 00326 dst = &tmp; 00327 } else { 00328 fp_zero(C); 00329 dst = C; 00330 } 00331 00332 for (ix = 0; ix < pa; ix++) { 00333 /* get offsets into the two bignums */ 00334 ty = MIN(ix, B->used-1); 00335 tx = ix - ty; 00336 00337 /* setup temp aliases */ 00338 tmpx = A->dp + tx; 00339 tmpy = B->dp + ty; 00340 00341 /* this is the number of times the loop will iterrate, essentially its 00342 while (tx++ < a->used && ty-- >= 0) { ... } 00343 */ 00344 iy = MIN(A->used-tx, ty+1); 00345 00346 /* execute loop */ 00347 COMBA_FORWARD; 00348 for (iz = 0; iz < iy; ++iz) { 00349 MULADD(*tmpx++, *tmpy--); 00350 } 00351 00352 /* store term */ 00353 COMBA_STORE(dst->dp[ix]); 00354 } 00355 COMBA_FINI; 00356 00357 dst->used = pa; 00358 dst->sign = A->sign ^ B->sign; 00359 fp_clamp(dst); 00360 fp_copy(dst, C); 00361 } 00362 00363 #endif 00364 00365 /* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_comba.c,v $ */ 00366 /* $Revision: 1.4 $ */ 00367 /* $Date: 2007/03/14 23:47:42 $ */ 00368
Generated on Tue Jul 12 2022 19:20:10 by
1.7.2
