ARM Shanghai IoT Team (Internal) / newMiniTLS-GPL

Fork of MiniTLS-GPL by Donatien Garnier

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers fp_mul_comba.c Source File

fp_mul_comba.c

00001 /* TomsFastMath, a fast ISO C bignum library.
00002  * 
00003  * This project is meant to fill in where LibTomMath
00004  * falls short.  That is speed ;-)
00005  *
00006  * This project is public domain and free for all purposes.
00007  * 
00008  * Tom St Denis, tomstdenis@gmail.com
00009  */
00010 
00011 /* About this file...
00012 
00013 */
00014 
00015 #include <tfm.h>
00016 
00017 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
00018    #undef TFM_SSE2
00019    #define TFM_X86
00020 #endif
00021 
00022 /* these are the combas.  Worship them. */
00023 #if defined(TFM_X86)
00024 /* Generic x86 optimized code */
00025 
00026 /* anything you need at the start */
00027 #define COMBA_START
00028 
00029 /* clear the chaining variables */
00030 #define COMBA_CLEAR \
00031    c0 = c1 = c2 = 0;
00032 
00033 /* forward the carry to the next digit */
00034 #define COMBA_FORWARD \
00035    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00036 
00037 /* store the first sum */
00038 #define COMBA_STORE(x) \
00039    x = c0;
00040 
00041 /* store the second sum [carry] */
00042 #define COMBA_STORE2(x) \
00043    x = c1;
00044 
00045 /* anything you need at the end */
00046 #define COMBA_FINI
00047 
00048 /* this should multiply i and j  */
00049 #define MULADD(i, j)                                      \
00050 asm(                                                      \
00051      "movl  %6,%%eax     \n\t"                            \
00052      "mull  %7           \n\t"                            \
00053      "addl  %%eax,%0     \n\t"                            \
00054      "adcl  %%edx,%1     \n\t"                            \
00055      "adcl  $0,%2        \n\t"                            \
00056      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","%cc");
00057 
00058 #elif defined(TFM_X86_64)
00059 /* x86-64 optimized */
00060 
00061 /* anything you need at the start */
00062 #define COMBA_START
00063 
00064 /* clear the chaining variables */
00065 #define COMBA_CLEAR \
00066    c0 = c1 = c2 = 0;
00067 
00068 /* forward the carry to the next digit */
00069 #define COMBA_FORWARD \
00070    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00071 
00072 /* store the first sum */
00073 #define COMBA_STORE(x) \
00074    x = c0;
00075 
00076 /* store the second sum [carry] */
00077 #define COMBA_STORE2(x) \
00078    x = c1;
00079 
00080 /* anything you need at the end */
00081 #define COMBA_FINI
00082 
00083 /* this should multiply i and j  */
00084 #define MULADD(i, j)                                      \
00085 asm  (                                                    \
00086      "movq  %6,%%rax     \n\t"                            \
00087      "mulq  %7           \n\t"                            \
00088      "addq  %%rax,%0     \n\t"                            \
00089      "adcq  %%rdx,%1     \n\t"                            \
00090      "adcq  $0,%2        \n\t"                            \
00091      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j)  :"%rax","%rdx","%cc");
00092 
00093 #elif defined(TFM_SSE2)
00094 /* use SSE2 optimizations */
00095 
00096 /* anything you need at the start */
00097 #define COMBA_START
00098 
00099 /* clear the chaining variables */
00100 #define COMBA_CLEAR \
00101    c0 = c1 = c2 = 0;
00102 
00103 /* forward the carry to the next digit */
00104 #define COMBA_FORWARD \
00105    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00106 
00107 /* store the first sum */
00108 #define COMBA_STORE(x) \
00109    x = c0;
00110 
00111 /* store the second sum [carry] */
00112 #define COMBA_STORE2(x) \
00113    x = c1;
00114 
00115 /* anything you need at the end */
00116 #define COMBA_FINI \
00117    asm("emms");
00118 
00119 /* this should multiply i and j  */
00120 #define MULADD(i, j)                                     \
00121 asm(                                                     \
00122     "movd  %6,%%mm0     \n\t"                            \
00123     "movd  %7,%%mm1     \n\t"                            \
00124     "pmuludq %%mm1,%%mm0\n\t"                            \
00125     "movd  %%mm0,%%eax  \n\t"                            \
00126     "psrlq $32,%%mm0    \n\t"                            \
00127     "addl  %%eax,%0     \n\t"                            \
00128     "movd  %%mm0,%%eax  \n\t"                            \
00129     "adcl  %%eax,%1     \n\t"                            \
00130     "adcl  $0,%2        \n\t"                            \
00131     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%cc");
00132 
00133 #elif defined(TFM_ARM)
00134 /* ARM code */
00135 
00136 #define COMBA_START 
00137 
00138 #define COMBA_CLEAR \
00139    c0 = c1 = c2 = 0;
00140 
00141 #define COMBA_FORWARD \
00142    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00143 
00144 #define COMBA_STORE(x) \
00145    x = c0;
00146 
00147 #define COMBA_STORE2(x) \
00148    x = c1;
00149 
00150 #define COMBA_FINI
00151 
00152 #define MULADD(i, j)                                          \
00153 asm(                                                          \
00154 "  UMULL  r0,r1,%6,%7           \n\t"                         \
00155 "  ADDS   %0,%0,r0              \n\t"                         \
00156 "  ADCS   %1,%1,r1              \n\t"                         \
00157 "  ADC    %2,%2,#0              \n\t"                         \
00158 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
00159 
00160 #elif defined(TFM_PPC32)
00161 /* For 32-bit PPC */
00162 
00163 #define COMBA_START
00164 
00165 #define COMBA_CLEAR \
00166    c0 = c1 = c2 = 0;
00167 
00168 #define COMBA_FORWARD \
00169    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00170 
00171 #define COMBA_STORE(x) \
00172    x = c0;
00173 
00174 #define COMBA_STORE2(x) \
00175    x = c1;
00176 
00177 #define COMBA_FINI 
00178    
00179 /* untested: will mulhwu change the flags?  Docs say no */
00180 #define MULADD(i, j)              \
00181 asm(                              \
00182    " mullw  16,%6,%7       \n\t" \
00183    " addc   %0,%0,16       \n\t" \
00184    " mulhwu 16,%6,%7       \n\t" \
00185    " adde   %1,%1,16       \n\t" \
00186    " addze  %2,%2          \n\t" \
00187 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
00188 
00189 #elif defined(TFM_PPC64)
00190 /* For 64-bit PPC */
00191 
00192 #define COMBA_START
00193 
00194 #define COMBA_CLEAR \
00195    c0 = c1 = c2 = 0;
00196 
00197 #define COMBA_FORWARD \
00198    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00199 
00200 #define COMBA_STORE(x) \
00201    x = c0;
00202 
00203 #define COMBA_STORE2(x) \
00204    x = c1;
00205 
00206 #define COMBA_FINI 
00207    
00208 /* untested: will mulhdu change the flags?  Docs say no */
00209 #define MULADD(i, j)              \
00210 asm(                              \
00211    " mulld  r16,%6,%7       \n\t" \
00212    " addc   %0,%0,16       \n\t" \
00213    " mulhdu r16,%6,%7       \n\t" \
00214    " adde   %1,%1,16       \n\t" \
00215    " addze  %2,%2          \n\t" \
00216 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
00217 
00218 #elif defined(TFM_AVR32)
00219 
00220 /* ISO C code */
00221 
00222 #define COMBA_START
00223 
00224 #define COMBA_CLEAR \
00225    c0 = c1 = c2 = 0;
00226 
00227 #define COMBA_FORWARD \
00228    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00229 
00230 #define COMBA_STORE(x) \
00231    x = c0;
00232 
00233 #define COMBA_STORE2(x) \
00234    x = c1;
00235 
00236 #define COMBA_FINI 
00237    
00238 #define MULADD(i, j)             \
00239 asm(                             \
00240    " mulu.d r2,%6,%7        \n\t"\
00241    " add    %0,r2           \n\t"\
00242    " adc    %1,%1,r3        \n\t"\
00243    " acr    %2              \n\t"\
00244 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
00245 
00246 #elif defined(TFM_MIPS)
00247 
00248 #define COMBA_START
00249 
00250 #define COMBA_CLEAR \
00251    c0 = c1 = c2 = 0;
00252 
00253 #define COMBA_FORWARD \
00254    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00255 
00256 #define COMBA_STORE(x) \
00257    x = c0;
00258 
00259 #define COMBA_STORE2(x) \
00260    x = c1;
00261 
00262 #define COMBA_FINI 
00263    
00264 #define MULADD(i, j)              \
00265 asm(                              \
00266    " multu  %6,%7          \n\t"  \
00267    " mflo   $12            \n\t"  \
00268    " mfhi   $13            \n\t"  \
00269    " addu    %0,%0,$12     \n\t"  \
00270    " sltu   $12,%0,$12     \n\t"  \
00271    " addu    %1,%1,$13     \n\t"  \
00272    " sltu   $13,%1,$13     \n\t"  \
00273    " addu    %1,%1,$12     \n\t"  \
00274    " sltu   $12,%1,$12     \n\t"  \
00275    " addu    %2,%2,$13     \n\t"  \
00276    " addu    %2,%2,$12     \n\t"  \
00277 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
00278 
00279 #else
00280 /* ISO C code */
00281 
00282 #define COMBA_START
00283 
00284 #define COMBA_CLEAR \
00285    c0 = c1 = c2 = 0;
00286 
00287 #define COMBA_FORWARD \
00288    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
00289 
00290 #define COMBA_STORE(x) \
00291    x = c0;
00292 
00293 #define COMBA_STORE2(x) \
00294    x = c1;
00295 
00296 #define COMBA_FINI 
00297    
00298 #define MULADD(i, j)                                                              \
00299    do { fp_word t;                                                                \
00300    t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t;                         \
00301    t = (fp_word)c1 + (t >> DIGIT_BIT);            c1 = t; c2 += t >> DIGIT_BIT;   \
00302    } while (0);
00303 
00304 #endif
00305 
00306 #ifndef TFM_DEFINES
00307 
00308 /* generic PxQ multiplier */
00309 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
00310 {
00311    int       ix, iy, iz, tx, ty, pa;
00312    fp_digit  c0, c1, c2, *tmpx, *tmpy;
00313    fp_int    tmp, *dst;
00314 
00315    COMBA_START;
00316    COMBA_CLEAR;
00317    
00318    /* get size of output and trim */
00319    pa = A->used + B->used;
00320    if (pa >= FP_SIZE) {
00321       pa = FP_SIZE-1;
00322    }
00323 
00324    if (A == C || B == C) {
00325       fp_zero(&tmp);
00326       dst = &tmp;
00327    } else {
00328       fp_zero(C);
00329       dst = C;
00330    }
00331 
00332    for (ix = 0; ix < pa; ix++) {
00333       /* get offsets into the two bignums */
00334       ty = MIN(ix, B->used-1);
00335       tx = ix - ty;
00336 
00337       /* setup temp aliases */
00338       tmpx = A->dp + tx;
00339       tmpy = B->dp + ty;
00340 
00341       /* this is the number of times the loop will iterrate, essentially its 
00342          while (tx++ < a->used && ty-- >= 0) { ... }
00343        */
00344       iy = MIN(A->used-tx, ty+1);
00345 
00346       /* execute loop */
00347       COMBA_FORWARD;
00348       for (iz = 0; iz < iy; ++iz) {
00349           MULADD(*tmpx++, *tmpy--);
00350       }
00351 
00352       /* store term */
00353       COMBA_STORE(dst->dp[ix]);
00354   }
00355   COMBA_FINI;
00356 
00357   dst->used = pa;
00358   dst->sign = A->sign ^ B->sign;
00359   fp_clamp(dst);
00360   fp_copy(dst, C);
00361 }
00362 
00363 #endif
00364 
00365 /* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_comba.c,v $ */
00366 /* $Revision: 1.4 $ */
00367 /* $Date: 2007/03/14 23:47:42 $ */
00368