Library for big numbers from http://www.ttmath.org/

Dependents:   PIDHeater82 Conceptcontroller_v_1_0 AlarmClockApp COG4050_adxl355_tilt ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers ttmathuint_x86_64.h Source File

ttmathuint_x86_64.h

Go to the documentation of this file.
00001 /*
00002  * This file is a part of TTMath Bignum Library
00003  * and is distributed under the (new) BSD licence.
00004  * Author: Tomasz Sowa <t.sowa@ttmath.org>
00005  */
00006 
00007 /* 
00008  * Copyright (c) 2006-2010, Tomasz Sowa
00009  * All rights reserved.
00010  * 
00011  * Redistribution and use in source and binary forms, with or without
00012  * modification, are permitted provided that the following conditions are met:
00013  * 
00014  *  * Redistributions of source code must retain the above copyright notice,
00015  *    this list of conditions and the following disclaimer.
00016  *    
00017  *  * Redistributions in binary form must reproduce the above copyright
00018  *    notice, this list of conditions and the following disclaimer in the
00019  *    documentation and/or other materials provided with the distribution.
00020  *    
00021  *  * Neither the name Tomasz Sowa nor the names of contributors to this
00022  *    project may be used to endorse or promote products derived
00023  *    from this software without specific prior written permission.
00024  *
00025  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00026  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00028  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00029  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00030  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00031  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00032  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00033  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00034  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00035  * THE POSSIBILITY OF SUCH DAMAGE.
00036  */
00037 
00038 
00039 #ifndef headerfilettmathuint_x86_64
00040 #define headerfilettmathuint_x86_64
00041 
00042 
00043 #ifndef TTMATH_NOASM
00044 #ifdef TTMATH_PLATFORM64
00045 
00046 
00047 /*!
00048     \file ttmathuint_x86_64.h
00049     \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
00050 
00051     this file is included at the end of ttmathuint.h
00052 */
00053 
00054 #ifndef __GNUC__
00055 #include <intrin.h>
00056 #endif
00057 
00058 
00059 namespace ttmath
00060 {
00061 
00062     #ifndef __GNUC__
00063 
00064         extern "C"
00065             {
00066             uint  __fastcall ttmath_adc_x64(uint * p1, const uint * p2, uint  nSize, uint  c);
00067             uint  __fastcall ttmath_addindexed_x64(uint * p1, uint  nSize, uint  nPos, uint  nValue);
00068             uint  __fastcall ttmath_addindexed2_x64(uint * p1, uint  nSize, uint  nPos, uint  nValue1, uint  nValue2);
00069             uint  __fastcall ttmath_addvector_x64(const uint  * ss1, const uint  * ss2, uint  ss1_size, uint  ss2_size, uint  * result);
00070             uint  __fastcall ttmath_sbb_x64(uint * p1, const uint * p2, uint  nSize, uint  c);
00071             uint  __fastcall ttmath_subindexed_x64(uint * p1, uint  nSize, uint  nPos, uint  nValue);
00072             uint  __fastcall ttmath_subvector_x64(const uint  * ss1, const uint  * ss2, uint  ss1_size, uint  ss2_size, uint  * result);
00073             uint  __fastcall ttmath_rcl_x64(uint * p1, uint  nSize, uint  nLowestBit);
00074             uint  __fastcall ttmath_rcr_x64(uint * p1, uint  nSize, uint  nLowestBit);
00075             uint  __fastcall ttmath_div_x64(uint * pnValHi, uint * pnValLo, uint  nDiv);
00076             uint  __fastcall ttmath_rcl2_x64(uint * p1, uint  nSize, uint  nBits, uint  c);
00077             uint  __fastcall ttmath_rcr2_x64(uint * p1, uint  nSize, uint  nBits, uint  c);
00078             };
00079     #endif
00080 
00081 
00082     /*!
00083         returning the string represents the currect type of the library
00084         we have following types:
00085           asm_vc_32   - with asm code designed for Microsoft Visual C++ (32 bits)
00086           asm_gcc_32  - with asm code designed for GCC (32 bits)
00087           asm_vc_64   - with asm for VC (64 bit)
00088           asm_gcc_64  - with asm for GCC (64 bit)
00089           no_asm_32   - pure C++ version (32 bit) - without any asm code
00090           no_asm_64   - pure C++ version (64 bit) - without any asm code
00091     */
00092     template<uint value_size>
00093     const char * UInt<value_size>::LibTypeStr ()
00094     {
00095         #ifndef __GNUC__
00096             static const char info[] = "asm_vc_64";
00097         #endif        
00098 
00099         #ifdef __GNUC__
00100             static const char info[] = "asm_gcc_64";
00101         #endif
00102 
00103     return info;
00104     }
00105 
00106 
00107     /*!
00108         returning the currect type of the library
00109     */
00110     template<uint value_size>
00111     LibTypeCode  UInt<value_size>::LibType ()
00112     {
00113         #ifndef __GNUC__
00114             LibTypeCode  info = asm_vc_64;
00115         #endif        
00116 
00117         #ifdef __GNUC__
00118             LibTypeCode  info = asm_gcc_64;
00119         #endif
00120 
00121     return info;
00122     }
00123 
00124 
00125     /*!
00126     *
00127     *    basic mathematic functions
00128     *
00129     */
00130 
00131 
00132 
00133     /*!
00134         this method adding ss2 to the this and adding carry if it's defined
00135         (this = this + ss2 + c)
00136 
00137         ***this method is created only on a 64bit platform***
00138 
00139         c must be zero or one (might be a bigger value than 1)
00140         function returns carry (1) (if it was)
00141     */
00142     template<uint value_size>
00143     uint  UInt<value_size>::Add (const UInt<value_size> & ss2, uint  c)
00144     {
00145     uint  b = value_size;
00146     uint  * p1 = table;
00147     const uint  * p2 = ss2.table;
00148 
00149         // we don't have to use TTMATH_REFERENCE_ASSERT here
00150         // this algorithm doesn't require it
00151 
00152         #ifndef __GNUC__
00153             c = ttmath_adc_x64(p1,p2,b,c);
00154         #endif
00155 
00156         #ifdef __GNUC__
00157         uint  dummy, dummy2;
00158 
00159             /*
00160                 this part should be compiled with gcc
00161             */
00162             __asm__ __volatile__(
00163     
00164                 "xorq %%rdx, %%rdx                \n"
00165                 "negq %%rax                        \n"     // CF=1 if rax!=0 , CF=0 if rax==0
00166 
00167             "1:                                    \n"
00168                 "movq (%%rsi,%%rdx,8), %%rax    \n"
00169                 "adcq %%rax, (%%rbx,%%rdx,8)    \n"
00170             
00171                 "incq %%rdx                        \n"
00172                 "decq %%rcx                        \n"
00173             "jnz 1b                                \n"
00174 
00175                 "adcq %%rcx, %%rcx                \n"
00176 
00177                 : "=c" (c), "=a" (dummy), "=d" (dummy2)
00178                 : "0" (b),  "1" (c), "b" (p1), "S" (p2)
00179                 : "cc", "memory" );
00180 
00181         #endif
00182 
00183         TTMATH_LOGC("UInt::Add", c)
00184     
00185     return c;
00186     }
00187 
00188 
00189 
00190     /*!
00191         this method adds one word (at a specific position)
00192         and returns a carry (if it was)
00193 
00194         ***this method is created only on a 64bit platform***
00195 
00196 
00197         if we've got (value_size=3):
00198             table[0] = 10;
00199             table[1] = 30;
00200             table[2] = 5;    
00201         and we call:
00202             AddInt(2,1)
00203         then it'll be:
00204             table[0] = 10;
00205             table[1] = 30 + 2;
00206             table[2] = 5;
00207 
00208         of course if there was a carry from table[2] it would be returned
00209     */
00210     template<uint  value_size>
00211     uint  UInt<value_size>::AddInt(uint  value, uint  index)
00212     {
00213     uint  b = value_size;
00214     uint  * p1 = table;
00215     uint  c;
00216 
00217         TTMATH_ASSERT( index < value_size )
00218 
00219         #ifndef __GNUC__
00220             c = ttmath_addindexed_x64(p1,b,index,value);
00221         #endif
00222 
00223 
00224         #ifdef __GNUC__
00225         uint  dummy, dummy2;
00226     
00227             __asm__ __volatile__(
00228 
00229                 "subq %%rdx, %%rcx                 \n"
00230 
00231             "1:                                    \n"
00232                 "addq %%rax, (%%rbx,%%rdx,8)    \n"
00233             "jnc 2f                                \n"
00234                 
00235                 "movq $1, %%rax                    \n"
00236                 "incq %%rdx                        \n"
00237                 "decq %%rcx                        \n"
00238             "jnz 1b                                \n"
00239 
00240             "2:                                    \n"
00241                 "setc %%al                        \n"
00242                 "movzx %%al, %%rdx                \n"
00243 
00244                 : "=d" (c),    "=a" (dummy), "=c" (dummy2)
00245                 : "0" (index), "1" (value),  "2" (b), "b" (p1)
00246                 : "cc", "memory" );
00247 
00248         #endif
00249 
00250         TTMATH_LOGC("UInt::AddInt", c)
00251     
00252     return c;
00253     }
00254 
00255 
00256 
00257     /*!
00258         this method adds only two unsigned words to the existing value
00259         and these words begin on the 'index' position
00260         (it's used in the multiplication algorithm 2)
00261 
00262         ***this method is created only on a 64bit platform***
00263 
00264         index should be equal or smaller than value_size-2 (index <= value_size-2)
00265         x1 - lower word, x2 - higher word
00266 
00267         for example if we've got value_size equal 4 and:
00268             table[0] = 3
00269             table[1] = 4
00270             table[2] = 5
00271             table[3] = 6
00272         then let
00273             x1 = 10
00274             x2 = 20
00275         and
00276             index = 1
00277 
00278         the result of this method will be:
00279             table[0] = 3
00280             table[1] = 4 + x1 = 14
00281             table[2] = 5 + x2 = 25
00282             table[3] = 6
00283         
00284         and no carry at the end of table[3]
00285 
00286         (of course if there was a carry in table[2](5+20) then 
00287         this carry would be passed to the table[3] etc.)
00288     */
00289     template<uint  value_size>
00290     uint  UInt<value_size>::AddTwoInts(uint  x2, uint  x1, uint  index)
00291     {
00292     uint  b = value_size;
00293     uint  * p1 = table;
00294     uint  c;
00295 
00296         TTMATH_ASSERT( index < value_size - 1 )
00297 
00298         #ifndef __GNUC__
00299             c = ttmath_addindexed2_x64(p1,b,index,x1,x2);
00300         #endif
00301 
00302 
00303         #ifdef __GNUC__
00304         uint  dummy, dummy2;
00305 
00306             __asm__ __volatile__(
00307             
00308                 "subq %%rdx, %%rcx                 \n"
00309                 
00310                 "addq %%rsi, (%%rbx,%%rdx,8)     \n"
00311                 "incq %%rdx                        \n"
00312                 "decq %%rcx                        \n"
00313 
00314             "1:                                    \n"
00315                 "adcq %%rax, (%%rbx,%%rdx,8)    \n"
00316             "jnc 2f                                \n"
00317 
00318                 "mov $0, %%rax                    \n"
00319                 "incq %%rdx                        \n"
00320                 "decq %%rcx                        \n"
00321             "jnz 1b                                \n"
00322 
00323             "2:                                    \n"
00324                 "setc %%al                        \n"
00325                 "movzx %%al, %%rax                \n"
00326 
00327                 : "=a" (c), "=c" (dummy), "=d" (dummy2)
00328                 : "0" (x2), "1" (b),      "2" (index), "b" (p1), "S" (x1)
00329                 : "cc", "memory" );
00330 
00331         #endif
00332 
00333         TTMATH_LOGC("UInt::AddTwoInts", c)
00334 
00335     return c;
00336     }
00337 
00338 
00339 
00340     /*!
00341         this static method addes one vector to the other
00342         'ss1' is larger in size or equal to 'ss2'
00343 
00344         ss1 points to the first (larger) vector
00345         ss2 points to the second vector
00346         ss1_size - size of the ss1 (and size of the result too)
00347         ss2_size - size of the ss2
00348         result - is the result vector (which has size the same as ss1: ss1_size)
00349 
00350         Example:  ss1_size is 5, ss2_size is 3
00351         ss1:      ss2:   result (output):
00352           5        1         5+1
00353           4        3         4+3
00354           2        7         2+7
00355           6                  6
00356           9                  9
00357       of course the carry is propagated and will be returned from the last item
00358       (this method is used by the Karatsuba multiplication algorithm)
00359     */
00360     template<uint  value_size>
00361     uint  UInt<value_size>::AddVector(const uint  * ss1, const uint  * ss2, uint  ss1_size, uint  ss2_size, uint  * result)
00362     {
00363         TTMATH_ASSERT( ss1_size >= ss2_size )
00364 
00365         uint  c;
00366 
00367         #ifndef __GNUC__
00368              c = ttmath_addvector_x64(ss1, ss2, ss1_size, ss2_size, result);
00369         #endif
00370 
00371 
00372         #ifdef __GNUC__
00373         uint  dummy1, dummy2, dummy3;    
00374         uint  rest = ss1_size - ss2_size;
00375             
00376             //    this part should be compiled with gcc
00377         
00378             __asm__ __volatile__(
00379                 "mov %%rdx, %%r8                    \n"
00380                 "xor %%rdx, %%rdx                    \n"   // rdx = 0, cf = 0
00381             "1:                                        \n"
00382                 "mov (%%rsi,%%rdx,8), %%rax            \n"
00383                 "adc (%%rbx,%%rdx,8), %%rax            \n"
00384                 "mov %%rax, (%%rdi,%%rdx,8)            \n"
00385 
00386                 "inc %%rdx                            \n"
00387                 "dec %%rcx                            \n"
00388             "jnz 1b                                    \n"
00389 
00390                 "adc %%rcx, %%rcx                    \n"   // rcx has the cf state
00391 
00392                 "or %%r8, %%r8                        \n"
00393                 "jz 3f                                \n"
00394                 
00395                 "xor %%rbx, %%rbx                    \n"   // ebx = 0
00396                 "neg %%rcx                            \n"   // setting cf from rcx
00397                 "mov %%r8, %%rcx                    \n"   // rcx=rest and is != 0
00398             "2:                                        \n"
00399                 "mov (%%rsi, %%rdx, 8), %%rax        \n"
00400                 "adc %%rbx, %%rax                     \n"
00401                 "mov %%rax, (%%rdi, %%rdx, 8)        \n"
00402 
00403                 "inc %%rdx                            \n"
00404                 "dec %%rcx                            \n"
00405             "jnz 2b                                    \n"
00406 
00407                 "adc %%rcx, %%rcx                    \n"
00408             "3:                                        \n"
00409 
00410                 : "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
00411                 :                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
00412                 : "%r8", "cc", "memory" );
00413 
00414         #endif
00415 
00416         TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
00417 
00418     return c;
00419     }
00420 
00421 
00422 
00423     /*!
00424         this method's subtracting ss2 from the 'this' and subtracting
00425         carry if it has been defined
00426         (this = this - ss2 - c)
00427 
00428         ***this method is created only on a 64bit platform***
00429 
00430         c must be zero or one (might be a bigger value than 1)
00431         function returns carry (1) (if it was)
00432     */
00433     template<uint  value_size>
00434     uint  UInt<value_size>::Sub(const UInt<value_size> & ss2, uint  c)
00435     {
00436     uint  b = value_size;
00437     uint  * p1 = table;
00438     const uint  * p2 = ss2.table;
00439     
00440         // we don't have to use TTMATH_REFERENCE_ASSERT here
00441         // this algorithm doesn't require it
00442 
00443         #ifndef __GNUC__
00444             c = ttmath_sbb_x64(p1,p2,b,c);
00445         #endif
00446 
00447 
00448         #ifdef __GNUC__
00449         uint  dummy, dummy2;
00450 
00451             __asm__  __volatile__(
00452     
00453                 "xorq %%rdx, %%rdx                \n"
00454                 "negq %%rax                        \n"     // CF=1 if rax!=0 , CF=0 if rax==0
00455 
00456             "1:                                    \n"
00457                 "movq (%%rsi,%%rdx,8), %%rax    \n"
00458                 "sbbq %%rax, (%%rbx,%%rdx,8)    \n"
00459             
00460                 "incq %%rdx                        \n"
00461                 "decq %%rcx                        \n"
00462             "jnz 1b                                \n"
00463 
00464                 "adcq %%rcx, %%rcx                \n"
00465 
00466                 : "=c" (c), "=a" (dummy), "=d" (dummy2)
00467                 : "0" (b),  "1" (c), "b" (p1), "S" (p2)
00468                 : "cc", "memory" );
00469 
00470         #endif
00471 
00472         TTMATH_LOGC("UInt::Sub", c)
00473 
00474     return c;
00475     }
00476 
00477 
00478 
00479     /*!
00480         this method subtracts one word (at a specific position)
00481         and returns a carry (if it was)
00482 
00483         ***this method is created only on a 64bit platform***
00484 
00485         if we've got (value_size=3):
00486             table[0] = 10;
00487             table[1] = 30;
00488             table[2] = 5;    
00489         and we call:
00490             SubInt(2,1)
00491         then it'll be:
00492             table[0] = 10;
00493             table[1] = 30 - 2;
00494             table[2] = 5;
00495 
00496         of course if there was a carry from table[2] it would be returned
00497     */
00498     template<uint  value_size>
00499     uint  UInt<value_size>::SubInt(uint  value, uint  index)
00500     {
00501     uint  b = value_size;
00502     uint  * p1 = table;
00503     uint  c;
00504 
00505         TTMATH_ASSERT( index < value_size )
00506 
00507         #ifndef __GNUC__
00508             c = ttmath_subindexed_x64(p1,b,index,value);
00509         #endif
00510 
00511 
00512         #ifdef __GNUC__
00513             uint  dummy, dummy2;
00514 
00515             __asm__ __volatile__(
00516             
00517                 "subq %%rdx, %%rcx                 \n"
00518 
00519             "1:                                    \n"
00520                 "subq %%rax, (%%rbx,%%rdx,8)    \n"
00521             "jnc 2f                                \n"
00522                 
00523                 "movq $1, %%rax                    \n"
00524                 "incq %%rdx                        \n"
00525                 "decq %%rcx                        \n"
00526             "jnz 1b                                \n"
00527 
00528             "2:                                    \n"
00529                 "setc %%al                        \n"
00530                 "movzx %%al, %%rdx                \n"
00531 
00532                 : "=d" (c),    "=a" (dummy), "=c" (dummy2)
00533                 : "0" (index), "1" (value),  "2" (b), "b" (p1)
00534                 : "cc", "memory" );
00535 
00536         #endif
00537 
00538         TTMATH_LOGC("UInt::SubInt", c)
00539 
00540     return c;
00541     }
00542 
00543 
00544     /*!
00545         this static method subtractes one vector from the other
00546         'ss1' is larger in size or equal to 'ss2'
00547 
00548         ss1 points to the first (larger) vector
00549         ss2 points to the second vector
00550         ss1_size - size of the ss1 (and size of the result too)
00551         ss2_size - size of the ss2
00552         result - is the result vector (which has size the same as ss1: ss1_size)
00553 
00554         Example:  ss1_size is 5, ss2_size is 3
00555         ss1:      ss2:   result (output):
00556           5        1         5-1
00557           4        3         4-3
00558           2        7         2-7
00559           6                  6-1  (the borrow from previous item)
00560           9                  9
00561                        return (carry): 0
00562       of course the carry (borrow) is propagated and will be returned from the last item
00563       (this method is used by the Karatsuba multiplication algorithm)
00564     */
00565     template<uint  value_size>
00566     uint  UInt<value_size>::SubVector(const uint  * ss1, const uint  * ss2, uint  ss1_size, uint  ss2_size, uint  * result)
00567     {
00568         TTMATH_ASSERT( ss1_size >= ss2_size )
00569 
00570         uint  c;
00571 
00572         #ifndef __GNUC__
00573             c = ttmath_subvector_x64(ss1, ss2, ss1_size, ss2_size, result);
00574         #endif
00575 
00576 
00577         #ifdef __GNUC__
00578         
00579         //    the asm code is nearly the same as in AddVector
00580         //    only two instructions 'adc' are changed to 'sbb'
00581         
00582         uint  dummy1, dummy2, dummy3;
00583         uint  rest = ss1_size - ss2_size;
00584 
00585             __asm__ __volatile__(
00586                 "mov %%rdx, %%r8                    \n"
00587                 "xor %%rdx, %%rdx                    \n"   // rdx = 0, cf = 0
00588             "1:                                        \n"
00589                 "mov (%%rsi,%%rdx,8), %%rax            \n"
00590                 "sbb (%%rbx,%%rdx,8), %%rax            \n"
00591                 "mov %%rax, (%%rdi,%%rdx,8)            \n"
00592 
00593                 "inc %%rdx                            \n"
00594                 "dec %%rcx                            \n"
00595             "jnz 1b                                    \n"
00596 
00597                 "adc %%rcx, %%rcx                    \n"   // rcx has the cf state
00598 
00599                 "or %%r8, %%r8                        \n"
00600                 "jz 3f                                \n"
00601                 
00602                 "xor %%rbx, %%rbx                    \n"   // ebx = 0
00603                 "neg %%rcx                            \n"   // setting cf from rcx
00604                 "mov %%r8, %%rcx                    \n"   // rcx=rest and is != 0
00605             "2:                                        \n"
00606                 "mov (%%rsi, %%rdx, 8), %%rax        \n"
00607                 "sbb %%rbx, %%rax                     \n"
00608                 "mov %%rax, (%%rdi, %%rdx, 8)        \n"
00609 
00610                 "inc %%rdx                            \n"
00611                 "dec %%rcx                            \n"
00612             "jnz 2b                                    \n"
00613 
00614                 "adc %%rcx, %%rcx                    \n"
00615             "3:                                        \n"
00616 
00617                 : "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
00618                 :                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
00619                 : "%r8", "cc", "memory" );
00620 
00621         #endif
00622 
00623         TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
00624 
00625     return c;
00626     }
00627 
00628 
00629     /*!
00630         this method moves all bits into the left hand side
00631         return value <- this <- c
00632 
00633         the lowest *bit* will be held the 'c' and
00634         the state of one additional bit (on the left hand side)
00635         will be returned
00636 
00637         for example:
00638         let this is 001010000
00639         after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
00640     
00641         ***this method is created only on a 64bit platform***
00642     */
00643     template<uint  value_size>
00644     uint  UInt<value_size>::Rcl2_one(uint  c)
00645     {
00646     sint b = value_size;
00647     uint  * p1 = table;
00648 
00649 
00650         #ifndef __GNUC__
00651             c = ttmath_rcl_x64(p1,b,c);
00652         #endif
00653 
00654 
00655         #ifdef __GNUC__
00656         uint  dummy, dummy2;
00657 
00658         __asm__  __volatile__(
00659         
00660             "xorq %%rdx, %%rdx            \n"   // rdx=0
00661             "negq %%rax                    \n"   // CF=1 if rax!=0 , CF=0 if rax==0
00662 
00663         "1:                                \n"
00664             "rclq $1, (%%rbx, %%rdx, 8)    \n"
00665 
00666             "incq %%rdx                    \n"
00667             "decq %%rcx                    \n"
00668         "jnz 1b                            \n"
00669 
00670             "adcq %%rcx, %%rcx            \n"
00671 
00672             : "=c" (c), "=a" (dummy), "=d" (dummy2)
00673             : "0" (b),  "1" (c), "b" (p1)
00674             : "cc", "memory" );
00675     
00676         #endif
00677 
00678         TTMATH_LOGC("UInt::Rcl2_one", c)
00679 
00680     return c;
00681     }
00682 
00683 
00684     /*!
00685         this method moves all bits into the right hand side
00686         c -> this -> return value
00687 
00688         the highest *bit* will be held the 'c' and
00689         the state of one additional bit (on the right hand side)
00690         will be returned
00691 
00692         for example:
00693         let this is 000000010
00694         after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
00695 
00696         ***this method is created only on a 64bit platform***
00697     */
00698     template<uint  value_size>
00699     uint  UInt<value_size>::Rcr2_one(uint  c)
00700     {
00701     sint b = value_size;
00702     uint  * p1 = table;
00703     
00704 
00705         #ifndef __GNUC__
00706             c = ttmath_rcr_x64(p1,b,c);
00707         #endif
00708 
00709 
00710         #ifdef __GNUC__
00711         uint  dummy;
00712 
00713         __asm__  __volatile__(
00714 
00715             "negq %%rax                        \n"   // CF=1 if rax!=0 , CF=0 if rax==0
00716 
00717         "1:                                    \n"
00718             "rcrq $1, -8(%%rbx, %%rcx, 8)    \n"
00719 
00720             "decq %%rcx                        \n"
00721         "jnz 1b                                \n"
00722 
00723             "adcq %%rcx, %%rcx                \n"
00724 
00725             : "=c" (c), "=a" (dummy)
00726             : "0" (b),  "1" (c), "b" (p1)
00727             : "cc", "memory" );
00728 
00729         #endif
00730 
00731         TTMATH_LOGC("UInt::Rcr2_one", c)
00732 
00733     return c;
00734     }
00735 
00736 
00737 
00738     /*!
00739         this method moves all bits into the left hand side
00740         return value <- this <- c
00741 
00742         the lowest *bits* will be held the 'c' and
00743         the state of one additional bit (on the left hand side)
00744         will be returned
00745 
00746         for example:
00747         let this is 001010000
00748         after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
00749     
00750         ***this method is created only on a 64bit platform***
00751     */
00752     template<uint  value_size>
00753     uint  UInt<value_size>::Rcl2(uint  bits, uint  c)
00754     {
00755     TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
00756 
00757     uint  b = value_size;
00758     uint  * p1 = table;
00759 
00760 
00761         #ifndef __GNUC__
00762             c = ttmath_rcl2_x64(p1,b,bits,c);
00763         #endif
00764 
00765 
00766         #ifdef __GNUC__
00767         uint  dummy, dummy2, dummy3;
00768 
00769         __asm__  __volatile__(
00770         
00771             "movq %%rcx, %%rsi                \n"
00772             "movq $64, %%rcx                \n"
00773             "subq %%rsi, %%rcx                \n"
00774             "movq $-1, %%rdx                \n"
00775             "shrq %%cl, %%rdx                \n"
00776             "movq %%rdx, %%r8                 \n"
00777             "movq %%rsi, %%rcx                \n"
00778 
00779             "xorq %%rdx, %%rdx                \n"
00780             "movq %%rdx, %%rsi                \n"
00781             "orq %%rax, %%rax                \n"
00782             "cmovnz %%r8, %%rsi                \n"
00783 
00784         "1:                                    \n"
00785             "rolq %%cl, (%%rbx,%%rdx,8)        \n"
00786 
00787             "movq (%%rbx,%%rdx,8), %%rax    \n"
00788             "andq %%r8, %%rax                \n"
00789             "xorq %%rax, (%%rbx,%%rdx,8)    \n"
00790             "orq  %%rsi, (%%rbx,%%rdx,8)    \n"
00791             "movq %%rax, %%rsi                \n"
00792             
00793             "incq %%rdx                        \n"
00794             "decq %%rdi                        \n"
00795         "jnz 1b                                \n"
00796             
00797             "and $1, %%rax                    \n"
00798 
00799             : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
00800             : "0" (c),  "1" (b), "b" (p1), "c" (bits)
00801             : "%r8", "cc", "memory" );
00802 
00803         #endif
00804 
00805         TTMATH_LOGC("UInt::Rcl2", c)
00806 
00807     return c;
00808     }
00809 
00810 
00811     /*!
00812         this method moves all bits into the right hand side
00813         C -> this -> return value
00814 
00815         the highest *bits* will be held the 'c' and
00816         the state of one additional bit (on the right hand side)
00817         will be returned
00818 
00819         for example:
00820         let this is 000000010
00821         after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
00822 
00823         ***this method is created only on a 64bit platform***
00824     */
00825     template<uint  value_size>
00826     uint  UInt<value_size>::Rcr2(uint  bits, uint  c)
00827     {
00828     TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
00829 
00830     sint b = value_size;
00831     uint  * p1 = table;
00832 
00833 
00834         #ifndef __GNUC__
00835             c = ttmath_rcr2_x64(p1,b,bits,c);
00836         #endif
00837 
00838 
00839         #ifdef __GNUC__
00840             uint  dummy, dummy2, dummy3;
00841 
00842             __asm__  __volatile__(
00843 
00844             "movq %%rcx, %%rsi                \n"
00845             "movq $64, %%rcx                \n"
00846             "subq %%rsi, %%rcx                \n"
00847             "movq $-1, %%rdx                \n"
00848             "shlq %%cl, %%rdx                \n"
00849             "movq %%rdx, %%R8                \n"
00850             "movq %%rsi, %%rcx                \n"
00851 
00852             "xorq %%rdx, %%rdx                \n"
00853             "movq %%rdx, %%rsi                \n"
00854             "addq %%rdi, %%rdx                \n"
00855             "decq %%rdx                        \n"
00856             "orq %%rax, %%rax                \n"
00857             "cmovnz %%R8, %%rsi                \n"
00858 
00859         "1:                                    \n"
00860             "rorq %%cl, (%%rbx,%%rdx,8)        \n"
00861 
00862             "movq (%%rbx,%%rdx,8), %%rax    \n"
00863             "andq %%R8, %%rax                \n"
00864             "xorq %%rax, (%%rbx,%%rdx,8)    \n"
00865             "orq  %%rsi, (%%rbx,%%rdx,8)    \n"
00866             "movq %%rax, %%rsi                \n"
00867             
00868             "decq %%rdx                        \n"
00869             "decq %%rdi                        \n"
00870         "jnz 1b                                \n"
00871             
00872             "rolq $1, %%rax                    \n"
00873             "andq $1, %%rax                    \n"
00874 
00875             : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
00876             : "0" (c), "1" (b), "b" (p1), "c" (bits)
00877             : "%r8", "cc", "memory" );
00878 
00879         #endif
00880 
00881         TTMATH_LOGC("UInt::Rcr2", c)
00882 
00883     return c;
00884     }
00885 
00886 
00887     /*
00888         this method returns the number of the highest set bit in one 64-bit word
00889         if the 'x' is zero this method returns '-1'
00890 
00891         ***this method is created only on a 64bit platform***
00892     */
00893     template<uint  value_size>
00894     sint UInt<value_size>::FindLeadingBitInWord(uint  x)
00895     {
00896     sint result;
00897 
00898     
00899         #ifndef __GNUC__
00900 
00901             unsigned long nIndex = 0;
00902 
00903             if( _BitScanReverse64(&nIndex,x) == 0 )
00904                 result = -1;
00905             else
00906                 result = nIndex;
00907 
00908         #endif
00909 
00910 
00911         #ifdef __GNUC__
00912         uint  dummy;
00913 
00914                 __asm__ (
00915 
00916                 "movq $-1, %1          \n"
00917                 "bsrq %2, %0           \n"
00918                 "cmovz %1, %0          \n"
00919 
00920                 : "=r" (result), "=&r" (dummy)
00921                 : "r" (x)
00922                 : "cc" );
00923 
00924         #endif
00925 
00926 
00927     return result;
00928     }
00929 
00930 
00931     /*
00932         this method returns the number of the highest set bit in one 64-bit word
00933         if the 'x' is zero this method returns '-1'
00934 
00935         ***this method is created only on a 64bit platform***
00936     */
00937     template<uint value_size>
00938     sint UInt<value_size>::FindLowestBitInWord (uint  x)
00939     {
00940     sint result;
00941 
00942     
00943         #ifndef __GNUC__
00944 
00945             unsigned long nIndex = 0;
00946 
00947             if( _BitScanForward64(&nIndex,x) == 0 )
00948                 result = -1;
00949             else
00950                 result = nIndex;
00951 
00952         #endif
00953 
00954 
00955         #ifdef __GNUC__
00956         uint  dummy;
00957 
00958                 __asm__ (
00959 
00960                 "movq $-1, %1          \n"
00961                 "bsfq %2, %0           \n"
00962                 "cmovz %1, %0          \n"
00963 
00964                 : "=r" (result), "=&r" (dummy)
00965                 : "r" (x)
00966                 : "cc" );
00967 
00968         #endif
00969 
00970 
00971     return result;
00972     }
00973 
00974 
00975     /*!
00976         this method sets a special bit in the 'value'
00977         and returns the last state of the bit (zero or one)
00978 
00979         ***this method is created only on a 64bit platform***
00980 
00981         bit is from <0,63>
00982 
00983         e.g.
00984          uint x = 100;
00985          uint bit = SetBitInWord(x, 3);
00986          now: x = 108 and bit = 0
00987     */
00988     template<uint value_size>
00989     uint  UInt<value_size>::SetBitInWord (uint  & value, uint  bit)
00990     {
00991         TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
00992         
00993         uint  old_bit;
00994         uint  v = value;
00995 
00996 
00997         #ifndef __GNUC__
00998             old_bit = _bittestandset64((__int64*)&value,bit) != 0;
00999         #endif
01000 
01001 
01002         #ifdef __GNUC__
01003 
01004             __asm__ (
01005 
01006             "btsq %%rbx, %%rax        \n"
01007             "setc %%bl                \n"
01008             "movzx %%bl, %%rbx        \n"
01009             
01010             : "=a" (v), "=b" (old_bit)
01011             : "0" (v),  "1" (bit)
01012             : "cc" );
01013 
01014         #endif
01015 
01016         value = v;
01017 
01018     return old_bit;
01019     }
01020 
01021 
01022     /*!
01023      *
01024      * Multiplication
01025      *
01026      *
01027     */
01028 
01029 
01030     /*!
01031         multiplication: result_high:result_low = a * b
01032         result_high - higher word of the result
01033         result_low  - lower word of the result
01034     
01035         this methos never returns a carry
01036         this method is used in the second version of the multiplication algorithms
01037 
01038         ***this method is created only on a 64bit platform***
01039     */
01040     template<uint value_size>
01041     void UInt<value_size>::MulTwoWords (uint  a, uint  b, uint  * result_high, uint  * result_low)
01042     {
01043     /*
01044         we must use these temporary variables in order to inform the compilator
01045         that value pointed with result1 and result2 has changed
01046 
01047         this has no effect in visual studio but it's usefull when
01048         using gcc and options like -O
01049     */
01050     uint  result1_;
01051     uint  result2_;
01052 
01053 
01054         #ifndef __GNUC__
01055             result1_ = _umul128(a,b,&result2_);
01056         #endif
01057 
01058 
01059         #ifdef __GNUC__
01060 
01061         __asm__ (
01062         
01063             "mulq %%rdx            \n"
01064 
01065             : "=a" (result1_), "=d" (result2_)
01066             : "0" (a),         "1" (b)
01067             : "cc" );
01068 
01069         #endif
01070 
01071 
01072         *result_low  = result1_;
01073         *result_high = result2_;
01074     }
01075 
01076 
01077 
01078 
01079     /*!
01080      *
01081      * Division
01082      *
01083      *
01084     */
01085     
01086 
01087     /*!
01088         this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
01089         r = a:b / c and rest - remainder
01090         
01091         ***this method is created only on a 64bit platform***
01092 
01093         *
01094         * WARNING:
01095         * if r (one word) is too small for the result or c is equal zero
01096         * there'll be a hardware interruption (0)
01097         * and probably the end of your program
01098         *
01099     */
01100     template<uint value_size>
01101     void UInt<value_size>::DivTwoWords (uint  a,uint  b, uint  c, uint  * r, uint  * rest)
01102     {
01103         uint  r_;
01104         uint  rest_;
01105         /*
01106             these variables have similar meaning like those in
01107             the multiplication algorithm MulTwoWords
01108         */
01109 
01110         TTMATH_ASSERT( c != 0 )
01111 
01112 
01113         #ifndef __GNUC__
01114 
01115             ttmath_div_x64(&a,&b,c);
01116             r_    = a;
01117             rest_ = b;
01118             
01119         #endif
01120 
01121 
01122         #ifdef __GNUC__
01123         
01124             __asm__ (
01125 
01126             "divq %%rcx                \n"
01127 
01128             : "=a" (r_), "=d" (rest_)
01129             : "d" (a), "a" (b), "c" (c)
01130             : "cc" );
01131 
01132         #endif
01133 
01134 
01135         *r = r_;
01136         *rest = rest_;
01137     }
01138 
01139 } //namespace
01140 
01141 
01142 #endif //ifdef TTMATH_PLATFORM64
01143 #endif //ifndef TTMATH_NOASM
01144 #endif
01145 
01146 
01147