Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Dependents: MiniTLS-HTTPS-Example
fp_mul_comba.c
00001 /* TomsFastMath, a fast ISO C bignum library. 00002 * 00003 * This project is meant to fill in where LibTomMath 00004 * falls short. That is speed ;-) 00005 * 00006 * This project is public domain and free for all purposes. 00007 * 00008 * Tom St Denis, tomstdenis@gmail.com 00009 */ 00010 00011 /* About this file... 00012 00013 */ 00014 00015 #include <tfm.h> 00016 00017 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2) 00018 #undef TFM_SSE2 00019 #define TFM_X86 00020 #endif 00021 00022 /* these are the combas. Worship them. */ 00023 #if defined(TFM_X86) 00024 /* Generic x86 optimized code */ 00025 00026 /* anything you need at the start */ 00027 #define COMBA_START 00028 00029 /* clear the chaining variables */ 00030 #define COMBA_CLEAR \ 00031 c0 = c1 = c2 = 0; 00032 00033 /* forward the carry to the next digit */ 00034 #define COMBA_FORWARD \ 00035 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00036 00037 /* store the first sum */ 00038 #define COMBA_STORE(x) \ 00039 x = c0; 00040 00041 /* store the second sum [carry] */ 00042 #define COMBA_STORE2(x) \ 00043 x = c1; 00044 00045 /* anything you need at the end */ 00046 #define COMBA_FINI 00047 00048 /* this should multiply i and j */ 00049 #define MULADD(i, j) \ 00050 asm( \ 00051 "movl %6,%%eax \n\t" \ 00052 "mull %7 \n\t" \ 00053 "addl %%eax,%0 \n\t" \ 00054 "adcl %%edx,%1 \n\t" \ 00055 "adcl $0,%2 \n\t" \ 00056 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); 00057 00058 #elif defined(TFM_X86_64) 00059 /* x86-64 optimized */ 00060 00061 /* anything you need at the start */ 00062 #define COMBA_START 00063 00064 /* clear the chaining variables */ 00065 #define COMBA_CLEAR \ 00066 c0 = c1 = c2 = 0; 00067 00068 /* forward the carry to the next digit */ 00069 #define COMBA_FORWARD \ 00070 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00071 00072 /* store the first sum */ 00073 #define COMBA_STORE(x) \ 00074 x = c0; 00075 00076 /* store the second sum [carry] */ 00077 #define COMBA_STORE2(x) \ 00078 x = c1; 00079 00080 /* anything you need at the end */ 00081 #define COMBA_FINI 00082 00083 /* this should multiply i and j */ 00084 #define MULADD(i, j) \ 00085 asm ( \ 00086 "movq %6,%%rax \n\t" \ 00087 "mulq %7 \n\t" \ 00088 "addq %%rax,%0 \n\t" \ 00089 "adcq %%rdx,%1 \n\t" \ 00090 "adcq $0,%2 \n\t" \ 00091 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","%cc"); 00092 00093 #elif defined(TFM_SSE2) 00094 /* use SSE2 optimizations */ 00095 00096 /* anything you need at the start */ 00097 #define COMBA_START 00098 00099 /* clear the chaining variables */ 00100 #define COMBA_CLEAR \ 00101 c0 = c1 = c2 = 0; 00102 00103 /* forward the carry to the next digit */ 00104 #define COMBA_FORWARD \ 00105 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00106 00107 /* store the first sum */ 00108 #define COMBA_STORE(x) \ 00109 x = c0; 00110 00111 /* store the second sum [carry] */ 00112 #define COMBA_STORE2(x) \ 00113 x = c1; 00114 00115 /* anything you need at the end */ 00116 #define COMBA_FINI \ 00117 asm("emms"); 00118 00119 /* this should multiply i and j */ 00120 #define MULADD(i, j) \ 00121 asm( \ 00122 "movd %6,%%mm0 \n\t" \ 00123 "movd %7,%%mm1 \n\t" \ 00124 "pmuludq %%mm1,%%mm0\n\t" \ 00125 "movd %%mm0,%%eax \n\t" \ 00126 "psrlq $32,%%mm0 \n\t" \ 00127 "addl %%eax,%0 \n\t" \ 00128 "movd %%mm0,%%eax \n\t" \ 00129 "adcl %%eax,%1 \n\t" \ 00130 "adcl $0,%2 \n\t" \ 00131 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%cc"); 00132 00133 #elif defined(TFM_ARM) 00134 /* ARM code */ 00135 00136 #define COMBA_START 00137 00138 #define COMBA_CLEAR \ 00139 c0 = c1 = c2 = 0; 00140 00141 #define COMBA_FORWARD \ 00142 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00143 00144 #define COMBA_STORE(x) \ 00145 x = c0; 00146 00147 #define COMBA_STORE2(x) \ 00148 x = c1; 00149 00150 #define COMBA_FINI 00151 00152 #define MULADD(i, j) \ 00153 asm( \ 00154 " UMULL r0,r1,%6,%7 \n\t" \ 00155 " ADDS %0,%0,r0 \n\t" \ 00156 " ADCS %1,%1,r1 \n\t" \ 00157 " ADC %2,%2,#0 \n\t" \ 00158 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); 00159 00160 #elif defined(TFM_PPC32) 00161 /* For 32-bit PPC */ 00162 00163 #define COMBA_START 00164 00165 #define COMBA_CLEAR \ 00166 c0 = c1 = c2 = 0; 00167 00168 #define COMBA_FORWARD \ 00169 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00170 00171 #define COMBA_STORE(x) \ 00172 x = c0; 00173 00174 #define COMBA_STORE2(x) \ 00175 x = c1; 00176 00177 #define COMBA_FINI 00178 00179 /* untested: will mulhwu change the flags? Docs say no */ 00180 #define MULADD(i, j) \ 00181 asm( \ 00182 " mullw 16,%6,%7 \n\t" \ 00183 " addc %0,%0,16 \n\t" \ 00184 " mulhwu 16,%6,%7 \n\t" \ 00185 " adde %1,%1,16 \n\t" \ 00186 " addze %2,%2 \n\t" \ 00187 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16"); 00188 00189 #elif defined(TFM_PPC64) 00190 /* For 64-bit PPC */ 00191 00192 #define COMBA_START 00193 00194 #define COMBA_CLEAR \ 00195 c0 = c1 = c2 = 0; 00196 00197 #define COMBA_FORWARD \ 00198 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00199 00200 #define COMBA_STORE(x) \ 00201 x = c0; 00202 00203 #define COMBA_STORE2(x) \ 00204 x = c1; 00205 00206 #define COMBA_FINI 00207 00208 /* untested: will mulhdu change the flags? Docs say no */ 00209 #define MULADD(i, j) \ 00210 asm( \ 00211 " mulld r16,%6,%7 \n\t" \ 00212 " addc %0,%0,16 \n\t" \ 00213 " mulhdu r16,%6,%7 \n\t" \ 00214 " adde %1,%1,16 \n\t" \ 00215 " addze %2,%2 \n\t" \ 00216 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16"); 00217 00218 #elif defined(TFM_AVR32) 00219 00220 /* ISO C code */ 00221 00222 #define COMBA_START 00223 00224 #define COMBA_CLEAR \ 00225 c0 = c1 = c2 = 0; 00226 00227 #define COMBA_FORWARD \ 00228 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00229 00230 #define COMBA_STORE(x) \ 00231 x = c0; 00232 00233 #define COMBA_STORE2(x) \ 00234 x = c1; 00235 00236 #define COMBA_FINI 00237 00238 #define MULADD(i, j) \ 00239 asm( \ 00240 " mulu.d r2,%6,%7 \n\t"\ 00241 " add %0,r2 \n\t"\ 00242 " adc %1,%1,r3 \n\t"\ 00243 " acr %2 \n\t"\ 00244 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3"); 00245 00246 #elif defined(TFM_MIPS) 00247 00248 #define COMBA_START 00249 00250 #define COMBA_CLEAR \ 00251 c0 = c1 = c2 = 0; 00252 00253 #define COMBA_FORWARD \ 00254 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00255 00256 #define COMBA_STORE(x) \ 00257 x = c0; 00258 00259 #define COMBA_STORE2(x) \ 00260 x = c1; 00261 00262 #define COMBA_FINI 00263 00264 #define MULADD(i, j) \ 00265 asm( \ 00266 " multu %6,%7 \n\t" \ 00267 " mflo $12 \n\t" \ 00268 " mfhi $13 \n\t" \ 00269 " addu %0,%0,$12 \n\t" \ 00270 " sltu $12,%0,$12 \n\t" \ 00271 " addu %1,%1,$13 \n\t" \ 00272 " sltu $13,%1,$13 \n\t" \ 00273 " addu %1,%1,$12 \n\t" \ 00274 " sltu $12,%1,$12 \n\t" \ 00275 " addu %2,%2,$13 \n\t" \ 00276 " addu %2,%2,$12 \n\t" \ 00277 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13"); 00278 00279 #else 00280 /* ISO C code */ 00281 00282 #define COMBA_START 00283 00284 #define COMBA_CLEAR \ 00285 c0 = c1 = c2 = 0; 00286 00287 #define COMBA_FORWARD \ 00288 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 00289 00290 #define COMBA_STORE(x) \ 00291 x = c0; 00292 00293 #define COMBA_STORE2(x) \ 00294 x = c1; 00295 00296 #define COMBA_FINI 00297 00298 #define MULADD(i, j) \ 00299 do { fp_word t; \ 00300 t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ 00301 t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ 00302 } while (0); 00303 00304 #endif 00305 00306 #ifndef TFM_DEFINES 00307 00308 /* generic PxQ multiplier */ 00309 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) 00310 { 00311 int ix, iy, iz, tx, ty, pa; 00312 fp_digit c0, c1, c2, *tmpx, *tmpy; 00313 fp_int tmp, *dst; 00314 00315 COMBA_START; 00316 COMBA_CLEAR; 00317 00318 /* get size of output and trim */ 00319 pa = A->used + B->used; 00320 if (pa >= FP_SIZE) { 00321 pa = FP_SIZE-1; 00322 } 00323 00324 if (A == C || B == C) { 00325 fp_zero(&tmp); 00326 dst = &tmp; 00327 } else { 00328 fp_zero(C); 00329 dst = C; 00330 } 00331 00332 for (ix = 0; ix < pa; ix++) { 00333 /* get offsets into the two bignums */ 00334 ty = MIN(ix, B->used-1); 00335 tx = ix - ty; 00336 00337 /* setup temp aliases */ 00338 tmpx = A->dp + tx; 00339 tmpy = B->dp + ty; 00340 00341 /* this is the number of times the loop will iterrate, essentially its 00342 while (tx++ < a->used && ty-- >= 0) { ... } 00343 */ 00344 iy = MIN(A->used-tx, ty+1); 00345 00346 /* execute loop */ 00347 COMBA_FORWARD; 00348 for (iz = 0; iz < iy; ++iz) { 00349 MULADD(*tmpx++, *tmpy--); 00350 } 00351 00352 /* store term */ 00353 COMBA_STORE(dst->dp[ix]); 00354 } 00355 COMBA_FINI; 00356 00357 dst->used = pa; 00358 dst->sign = A->sign ^ B->sign; 00359 fp_clamp(dst); 00360 fp_copy(dst, C); 00361 } 00362 00363 #endif 00364 00365 /* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_comba.c,v $ */ 00366 /* $Revision: 1.4 $ */ 00367 /* $Date: 2007/03/14 23:47:42 $ */ 00368
Generated on Wed Jul 13 2022 00:22:54 by
1.7.2