Library for big numbers from http://www.ttmath.org/

Dependents:   PIDHeater82 Conceptcontroller_v_1_0 AlarmClockApp COG4050_adxl355_tilt ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers ttmathuint_x86.h Source File

ttmathuint_x86.h

Go to the documentation of this file.
00001 /*
00002  * This file is a part of TTMath Bignum Library
00003  * and is distributed under the (new) BSD licence.
00004  * Author: Tomasz Sowa <t.sowa@ttmath.org>
00005  */
00006 
00007 /* 
00008  * Copyright (c) 2006-2009, Tomasz Sowa
00009  * All rights reserved.
00010  * 
00011  * Redistribution and use in source and binary forms, with or without
00012  * modification, are permitted provided that the following conditions are met:
00013  * 
00014  *  * Redistributions of source code must retain the above copyright notice,
00015  *    this list of conditions and the following disclaimer.
00016  *    
00017  *  * Redistributions in binary form must reproduce the above copyright
00018  *    notice, this list of conditions and the following disclaimer in the
00019  *    documentation and/or other materials provided with the distribution.
00020  *    
00021  *  * Neither the name Tomasz Sowa nor the names of contributors to this
00022  *    project may be used to endorse or promote products derived
00023  *    from this software without specific prior written permission.
00024  *
00025  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00026  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00028  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00029  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00030  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00031  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00032  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00033  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00034  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00035  * THE POSSIBILITY OF SUCH DAMAGE.
00036  */
00037 
00038 
00039 
00040 #ifndef headerfilettmathuint_x86
00041 #define headerfilettmathuint_x86
00042 
00043 
00044 #ifndef TTMATH_NOASM
00045 #ifdef TTMATH_PLATFORM32
00046 
00047 
00048 /*!
00049     \file ttmathuint_x86.h
00050     \brief template class UInt<uint> with assembler code for 32bit x86 processors
00051 
00052     this file is included at the end of ttmathuint.h
00053 */
00054 
00055 
00056 
00057 /*!
00058     \brief a namespace for the TTMath library
00059 */
00060 namespace ttmath
00061 {
00062 
00063     /*!
00064         returning the string represents the currect type of the library
00065         we have following types:
00066           asm_vc_32   - with asm code designed for Microsoft Visual C++ (32 bits)
00067           asm_gcc_32  - with asm code designed for GCC (32 bits)
00068           asm_vc_64   - with asm for VC (64 bit)
00069           asm_gcc_64  - with asm for GCC (64 bit)
00070           no_asm_32   - pure C++ version (32 bit) - without any asm code
00071           no_asm_64   - pure C++ version (64 bit) - without any asm code
00072     */
00073     template<uint value_size>
00074     const char * UInt<value_size>::LibTypeStr ()
00075     {
00076         #ifndef __GNUC__
00077             static const char info[] = "asm_vc_32";
00078         #endif        
00079 
00080         #ifdef __GNUC__
00081             static const char info[] = "asm_gcc_32";
00082         #endif
00083 
00084     return info;
00085     }
00086 
00087 
00088     /*!
00089         returning the currect type of the library
00090     */
00091     template<uint value_size>
00092     LibTypeCode  UInt<value_size>::LibType ()
00093     {
00094         #ifndef __GNUC__
00095             LibTypeCode  info = asm_vc_32;
00096         #endif        
00097 
00098         #ifdef __GNUC__
00099             LibTypeCode  info = asm_gcc_32;
00100         #endif
00101 
00102     return info;
00103     }
00104 
00105 
00106 
00107     /*!
00108     *
00109     *    basic mathematic functions
00110     *
00111     */
00112 
00113 
00114     /*!
00115         adding ss2 to the this and adding carry if it's defined
00116         (this = this + ss2 + c)
00117 
00118         c must be zero or one (might be a bigger value than 1)
00119         function returns carry (1) (if it has been)
00120     */
00121     template<uint value_size>
00122     uint  UInt<value_size>::Add (const UInt<value_size> & ss2, uint  c)
00123     {
00124     uint  b = value_size;
00125     uint  * p1 = table;
00126     uint  * p2 = const_cast<uint *>(ss2.table);
00127 
00128         // we don't have to use TTMATH_REFERENCE_ASSERT here
00129         // this algorithm doesn't require it
00130 
00131         #ifndef __GNUC__
00132             
00133             //    this part might be compiled with for example visual c
00134 
00135             __asm
00136             {
00137                 push eax
00138                 push ebx
00139                 push ecx
00140                 push edx
00141                 push esi
00142 
00143                 mov ecx,[b]
00144                 
00145                 mov ebx,[p1]
00146                 mov esi,[p2]
00147 
00148                 xor edx,edx          // edx=0
00149                 mov eax,[c]
00150                 neg eax              // CF=1 if rax!=0 , CF=0 if rax==0
00151 
00152             ttmath_loop:
00153                 mov eax,[esi+edx*4]
00154                 adc [ebx+edx*4],eax
00155 
00156                 inc edx
00157                 dec ecx
00158             jnz ttmath_loop
00159 
00160                 adc ecx, ecx
00161                 mov [c], ecx
00162 
00163                 pop esi
00164                 pop edx
00165                 pop ecx
00166                 pop ebx
00167                 pop eax
00168             }
00169 
00170 
00171 
00172         #endif        
00173             
00174 
00175         #ifdef __GNUC__
00176         uint  dummy, dummy2;
00177             //    this part should be compiled with gcc
00178             
00179             __asm__ __volatile__(
00180 
00181                 "xorl %%edx, %%edx                \n"
00182                 "negl %%eax                        \n"  // CF=1 if rax!=0 , CF=0 if rax==0
00183 
00184             "1:                                    \n"
00185                 "movl (%%esi,%%edx,4), %%eax    \n"
00186                 "adcl %%eax, (%%ebx,%%edx,4)    \n"
00187             
00188                 "incl %%edx                        \n"
00189                 "decl %%ecx                        \n"
00190             "jnz 1b                                \n"
00191 
00192                 "adc %%ecx, %%ecx                \n"
00193 
00194                 : "=c" (c), "=a" (dummy), "=d" (dummy2)
00195                 : "0" (b),  "1" (c), "b" (p1), "S" (p2)
00196                 : "cc", "memory" );
00197         #endif
00198 
00199         TTMATH_LOGC("UInt::Add", c)
00200 
00201     return c;
00202     }
00203 
00204 
00205 
00206     /*!
00207         adding one word (at a specific position)
00208         and returning a carry (if it has been)
00209 
00210         e.g.
00211 
00212         if we've got (value_size=3):
00213             table[0] = 10;
00214             table[1] = 30;
00215             table[2] = 5;    
00216         and we call:
00217             AddInt(2,1)
00218         then it'll be:
00219             table[0] = 10;
00220             table[1] = 30 + 2;
00221             table[2] = 5;
00222 
00223         of course if there was a carry from table[2] it would be returned
00224     */
00225     template<uint  value_size>
00226     uint  UInt<value_size>::AddInt(uint  value, uint  index)
00227     {
00228     uint  b = value_size;
00229     uint  * p1 = table;
00230     uint  c;
00231 
00232         TTMATH_ASSERT( index < value_size )
00233 
00234         #ifndef __GNUC__
00235 
00236             __asm
00237             {
00238                 push eax
00239                 push ebx
00240                 push ecx
00241                 push edx
00242 
00243                 mov ecx, [b]
00244                 sub ecx, [index]                
00245 
00246                 mov edx, [index]
00247                 mov ebx, [p1]
00248 
00249                 mov eax, [value]
00250 
00251             ttmath_loop:
00252                 add [ebx+edx*4], eax
00253             jnc ttmath_end
00254 
00255                 mov eax, 1
00256                 inc edx
00257                 dec ecx
00258             jnz ttmath_loop
00259 
00260             ttmath_end:
00261                 setc al
00262                 movzx edx, al
00263                 mov [c], edx
00264 
00265                 pop edx
00266                 pop ecx
00267                 pop ebx
00268                 pop eax
00269             }
00270 
00271         #endif        
00272             
00273 
00274         #ifdef __GNUC__
00275         uint  dummy, dummy2;
00276 
00277             __asm__ __volatile__(
00278             
00279                 "subl %%edx, %%ecx                 \n"
00280 
00281             "1:                                    \n"
00282                 "addl %%eax, (%%ebx,%%edx,4)    \n"
00283             "jnc 2f                                \n"
00284                 
00285                 "movl $1, %%eax                    \n"
00286                 "incl %%edx                        \n"
00287                 "decl %%ecx                        \n"
00288             "jnz 1b                                \n"
00289 
00290             "2:                                    \n"
00291                 "setc %%al                        \n"
00292                 "movzx %%al, %%edx                \n"
00293 
00294                 : "=d" (c),    "=a" (dummy), "=c" (dummy2)
00295                 : "0" (index), "1" (value),  "2" (b), "b" (p1)
00296                 : "cc", "memory" );
00297 
00298         #endif
00299     
00300         TTMATH_LOGC("UInt::AddInt", c)
00301 
00302     return c;
00303     }
00304 
00305 
00306 
00307 
00308     /*!
00309         adding only two unsigned words to the existing value
00310         and these words begin on the 'index' position
00311         (it's used in the multiplication algorithm 2)
00312 
00313         index should be equal or smaller than value_size-2 (index <= value_size-2)
00314         x1 - lower word, x2 - higher word
00315 
00316         for example if we've got value_size equal 4 and:
00317             table[0] = 3
00318             table[1] = 4
00319             table[2] = 5
00320             table[3] = 6
00321         then let
00322             x1 = 10
00323             x2 = 20
00324         and
00325             index = 1
00326 
00327         the result of this method will be:
00328             table[0] = 3
00329             table[1] = 4 + x1 = 14
00330             table[2] = 5 + x2 = 25
00331             table[3] = 6
00332         
00333         and no carry at the end of table[3]
00334 
00335         (of course if there was a carry in table[2](5+20) then 
00336         this carry would be passed to the table[3] etc.)
00337     */
00338     template<uint  value_size>
00339     uint  UInt<value_size>::AddTwoInts(uint  x2, uint  x1, uint  index)
00340     {
00341     uint  b = value_size;
00342     uint  * p1 = table;
00343     uint  c;
00344 
00345         TTMATH_ASSERT( index < value_size - 1 )
00346 
00347         #ifndef __GNUC__
00348             __asm
00349             {
00350                 push eax
00351                 push ebx
00352                 push ecx
00353                 push edx
00354 
00355                 mov ecx, [b]
00356                 sub ecx, [index]                
00357 
00358                 mov ebx, [p1]
00359                 mov edx, [index]
00360 
00361                 mov eax, [x1]
00362                 add [ebx+edx*4], eax
00363                 inc edx
00364                 dec ecx
00365 
00366                 mov eax, [x2]
00367             
00368             ttmath_loop:
00369                 adc [ebx+edx*4], eax
00370             jnc ttmath_end
00371 
00372                 mov eax, 0
00373                 inc edx
00374                 dec ecx
00375             jnz ttmath_loop
00376 
00377             ttmath_end:
00378                 setc al
00379                 movzx edx, al
00380                 mov [c], edx
00381                 
00382                 pop edx
00383                 pop ecx
00384                 pop ebx
00385                 pop eax
00386 
00387             }
00388         #endif        
00389             
00390 
00391         #ifdef __GNUC__
00392         uint  dummy, dummy2;
00393 
00394             __asm__ __volatile__(
00395             
00396                 "subl %%edx, %%ecx                 \n"
00397                 
00398                 "addl %%esi, (%%ebx,%%edx,4)     \n"
00399                 "incl %%edx                        \n"
00400                 "decl %%ecx                        \n"
00401 
00402             "1:                                    \n"
00403                 "adcl %%eax, (%%ebx,%%edx,4)    \n"
00404             "jnc 2f                                \n"
00405 
00406                 "mov $0, %%eax                    \n"
00407                 "incl %%edx                        \n"
00408                 "decl %%ecx                        \n"
00409             "jnz 1b                                \n"
00410 
00411             "2:                                    \n"
00412                 "setc %%al                        \n"
00413                 "movzx %%al, %%eax                \n"
00414 
00415                 : "=a" (c), "=c" (dummy), "=d" (dummy2)
00416                 : "0" (x2), "1" (b),      "2" (index), "b" (p1), "S" (x1)
00417                 : "cc", "memory" );
00418 
00419         #endif
00420 
00421         TTMATH_LOGC("UInt::AddTwoInts", c)
00422     
00423     return c;
00424     }
00425 
00426 
00427 
00428     /*!
00429         this static method addes one vector to the other
00430         'ss1' is larger in size or equal to 'ss2'
00431 
00432         ss1 points to the first (larger) vector
00433         ss2 points to the second vector
00434         ss1_size - size of the ss1 (and size of the result too)
00435         ss2_size - size of the ss2
00436         result - is the result vector (which has size the same as ss1: ss1_size)
00437 
00438         Example:  ss1_size is 5, ss2_size is 3
00439         ss1:      ss2:   result (output):
00440           5        1         5+1
00441           4        3         4+3
00442           2        7         2+7
00443           6                  6
00444           9                  9
00445       of course the carry is propagated and will be returned from the last item
00446       (this method is used by the Karatsuba multiplication algorithm)
00447     */
00448     template<uint  value_size>
00449     uint  UInt<value_size>::AddVector(const uint  * ss1, const uint  * ss2, uint  ss1_size, uint  ss2_size, uint  * result)
00450     {
00451         TTMATH_ASSERT( ss1_size >= ss2_size )
00452 
00453         uint  rest = ss1_size - ss2_size;
00454         uint  c;
00455 
00456         #ifndef __GNUC__
00457 
00458             //    this part might be compiled with for example visual c
00459             __asm
00460             {
00461                 pushad
00462 
00463                 mov ecx, [ss2_size]
00464                 xor edx, edx               // edx = 0, cf = 0
00465 
00466                 mov esi, [ss1]
00467                 mov ebx, [ss2]
00468                 mov edi, [result]
00469 
00470             ttmath_loop:
00471                 mov eax, [esi+edx*4]
00472                 adc eax, [ebx+edx*4]
00473                 mov [edi+edx*4], eax
00474 
00475                 inc edx
00476                 dec ecx
00477             jnz ttmath_loop
00478 
00479                 adc ecx, ecx             // ecx has the cf state
00480 
00481                 mov ebx, [rest]
00482                 or ebx, ebx
00483                 jz ttmath_end
00484                 
00485                 xor ebx, ebx             // ebx = 0
00486                 neg ecx                  // setting cf from ecx
00487                 mov ecx, [rest]          // ecx is != 0
00488             
00489             ttmath_loop2:
00490                 mov eax, [esi+edx*4]
00491                 adc eax, ebx 
00492                 mov [edi+edx*4], eax
00493 
00494                 inc edx
00495                 dec ecx
00496             jnz ttmath_loop2
00497 
00498                 adc ecx, ecx
00499 
00500             ttmath_end:
00501                 mov [c], ecx
00502 
00503                 popad
00504             }
00505 
00506         #endif        
00507             
00508 
00509         #ifdef __GNUC__
00510             
00511         //    this part should be compiled with gcc
00512         uint  dummy1, dummy2, dummy3;
00513 
00514             __asm__ __volatile__(
00515                 "push %%edx                            \n"
00516                 "xor %%edx, %%edx                    \n"   // edx = 0, cf = 0
00517             "1:                                        \n"
00518                 "mov (%%esi,%%edx,4), %%eax            \n"
00519                 "adc (%%ebx,%%edx,4), %%eax            \n"
00520                 "mov %%eax, (%%edi,%%edx,4)            \n"
00521 
00522                 "inc %%edx                            \n"
00523                 "dec %%ecx                            \n"
00524             "jnz 1b                                    \n"
00525 
00526                 "adc %%ecx, %%ecx                    \n"   // ecx has the cf state
00527                 "pop %%eax                            \n"   // eax = rest
00528 
00529                 "or %%eax, %%eax                    \n"
00530                 "jz 3f                                \n"
00531                 
00532                 "xor %%ebx, %%ebx                    \n"   // ebx = 0
00533                 "neg %%ecx                            \n"   // setting cf from ecx
00534                 "mov %%eax, %%ecx                    \n"   // ecx=rest and is != 0
00535             "2:                                        \n"
00536                 "mov (%%esi, %%edx, 4), %%eax        \n"
00537                 "adc %%ebx, %%eax                     \n"
00538                 "mov %%eax, (%%edi, %%edx, 4)        \n"
00539 
00540                 "inc %%edx                            \n"
00541                 "dec %%ecx                            \n"
00542             "jnz 2b                                    \n"
00543 
00544                 "adc %%ecx, %%ecx                    \n"
00545             "3:                                        \n"
00546 
00547                 : "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
00548                 :                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
00549                 : "cc", "memory" );
00550 
00551         #endif
00552 
00553         TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
00554 
00555     return c;
00556     }
00557 
00558 
00559     /*!
00560         subtracting ss2 from the 'this' and subtracting
00561         carry if it has been defined
00562         (this = this - ss2 - c)
00563 
00564         c must be zero or one (might be a bigger value than 1)
00565         function returns carry (1) (if it has been)
00566     */
00567     template<uint  value_size>
00568     uint  UInt<value_size>::Sub(const UInt<value_size> & ss2, uint  c)
00569     {
00570     uint  b = value_size;
00571     uint  * p1 = table;
00572     uint  * p2 = const_cast<uint *>(ss2.table);
00573 
00574         // we don't have to use TTMATH_REFERENCE_ASSERT here
00575         // this algorithm doesn't require it
00576 
00577         #ifndef __GNUC__
00578 
00579             __asm
00580             {
00581                 push eax
00582                 push ebx
00583                 push ecx
00584                 push edx
00585                 push esi
00586 
00587                 mov ecx,[b]
00588                 
00589                 mov ebx,[p1]
00590                 mov esi,[p2]
00591 
00592                 xor edx,edx          // edx=0
00593                 mov eax,[c]
00594                 neg eax              // CF=1 if rax!=0 , CF=0 if rax==0
00595 
00596             ttmath_loop:
00597                 mov eax,[esi+edx*4]
00598                 sbb [ebx+edx*4],eax
00599 
00600                 inc edx
00601                 dec ecx
00602             jnz ttmath_loop
00603 
00604                 adc ecx, ecx
00605                 mov [c], ecx
00606 
00607                 pop esi
00608                 pop edx
00609                 pop ecx
00610                 pop ebx
00611                 pop eax
00612             }
00613 
00614         #endif
00615 
00616 
00617         #ifdef __GNUC__
00618         uint  dummy, dummy2;
00619 
00620             __asm__  __volatile__(
00621 
00622                 "xorl %%edx, %%edx                \n"
00623                 "negl %%eax                        \n"  // CF=1 if rax!=0 , CF=0 if rax==0
00624 
00625             "1:                                    \n"
00626                 "movl (%%esi,%%edx,4), %%eax    \n"
00627                 "sbbl %%eax, (%%ebx,%%edx,4)    \n"
00628             
00629                 "incl %%edx                        \n"
00630                 "decl %%ecx                        \n"
00631             "jnz 1b                                \n"
00632 
00633                 "adc %%ecx, %%ecx                \n"
00634 
00635                 : "=c" (c), "=a" (dummy), "=d" (dummy2)
00636                 : "0" (b),  "1" (c), "b" (p1), "S" (p2)
00637                 : "cc", "memory" );
00638 
00639         #endif
00640 
00641         TTMATH_LOGC("UInt::Sub", c)
00642 
00643     return c;
00644     }
00645 
00646 
00647 
00648 
00649     /*!
00650         this method subtracts one word (at a specific position)
00651         and returns a carry (if it was)
00652 
00653         e.g.
00654 
00655         if we've got (value_size=3):
00656             table[0] = 10;
00657             table[1] = 30;
00658             table[2] = 5;    
00659         and we call:
00660             SubInt(2,1)
00661         then it'll be:
00662             table[0] = 10;
00663             table[1] = 30 - 2;
00664             table[2] = 5;
00665 
00666         of course if there was a carry from table[2] it would be returned
00667     */
00668     template<uint  value_size>
00669     uint  UInt<value_size>::SubInt(uint  value, uint  index)
00670     {
00671     uint  b = value_size;
00672     uint  * p1 = table;
00673     uint  c;
00674 
00675         TTMATH_ASSERT( index < value_size )
00676 
00677         #ifndef __GNUC__
00678 
00679             __asm
00680             {
00681                 push eax
00682                 push ebx
00683                 push ecx
00684                 push edx
00685 
00686                 mov ecx, [b]
00687                 sub ecx, [index]                
00688 
00689                 mov edx, [index]
00690                 mov ebx, [p1]
00691 
00692                 mov eax, [value]
00693 
00694             ttmath_loop:
00695                 sub [ebx+edx*4], eax
00696             jnc ttmath_end
00697 
00698                 mov eax, 1
00699                 inc edx
00700                 dec ecx
00701             jnz ttmath_loop
00702 
00703             ttmath_end:
00704                 setc al
00705                 movzx edx, al
00706                 mov [c], edx
00707 
00708                 pop edx
00709                 pop ecx
00710                 pop ebx
00711                 pop eax
00712             }
00713 
00714         #endif        
00715             
00716 
00717         #ifdef __GNUC__
00718         uint  dummy, dummy2;
00719 
00720             __asm__ __volatile__(
00721             
00722                 "subl %%edx, %%ecx                 \n"
00723 
00724             "1:                                    \n"
00725                 "subl %%eax, (%%ebx,%%edx,4)    \n"
00726             "jnc 2f                                \n"
00727                 
00728                 "movl $1, %%eax                    \n"
00729                 "incl %%edx                        \n"
00730                 "decl %%ecx                        \n"
00731             "jnz 1b                                \n"
00732 
00733             "2:                                    \n"
00734                 "setc %%al                        \n"
00735                 "movzx %%al, %%edx                \n"
00736 
00737                 : "=d" (c),    "=a" (dummy), "=c" (dummy2)
00738                 : "0" (index), "1" (value),  "2" (b), "b" (p1)
00739                 : "cc", "memory" );
00740 
00741         #endif
00742         
00743         TTMATH_LOGC("UInt::SubInt", c)
00744     
00745     return c;
00746     }
00747 
00748 
00749 
00750     /*!
00751         this static method subtractes one vector from the other
00752         'ss1' is larger in size or equal to 'ss2'
00753 
00754         ss1 points to the first (larger) vector
00755         ss2 points to the second vector
00756         ss1_size - size of the ss1 (and size of the result too)
00757         ss2_size - size of the ss2
00758         result - is the result vector (which has size the same as ss1: ss1_size)
00759 
00760         Example:  ss1_size is 5, ss2_size is 3
00761         ss1:      ss2:   result (output):
00762           5        1         5-1
00763           4        3         4-3
00764           2        7         2-7
00765           6                  6-1  (the borrow from previous item)
00766           9                  9
00767                       return (carry): 0
00768       of course the carry (borrow) is propagated and will be returned from the last item
00769       (this method is used by the Karatsuba multiplication algorithm)
00770     */
00771     template<uint  value_size>
00772     uint  UInt<value_size>::SubVector(const uint  * ss1, const uint  * ss2, uint  ss1_size, uint  ss2_size, uint  * result)
00773     {
00774         TTMATH_ASSERT( ss1_size >= ss2_size )
00775 
00776         uint  rest = ss1_size - ss2_size;
00777         uint  c;
00778 
00779         #ifndef __GNUC__
00780             
00781             //    this part might be compiled with for example visual c
00782 
00783             /*
00784                 the asm code is nearly the same as in AddVector
00785                 only two instructions 'adc' are changed to 'sbb'
00786             */
00787             __asm
00788             {
00789                 pushad
00790 
00791                 mov ecx, [ss2_size]
00792                 xor edx, edx               // edx = 0, cf = 0
00793 
00794                 mov esi, [ss1]
00795                 mov ebx, [ss2]
00796                 mov edi, [result]
00797 
00798             ttmath_loop:
00799                 mov eax, [esi+edx*4]
00800                 sbb eax, [ebx+edx*4]
00801                 mov [edi+edx*4], eax
00802 
00803                 inc edx
00804                 dec ecx
00805             jnz ttmath_loop
00806 
00807                 adc ecx, ecx             // ecx has the cf state
00808 
00809                 mov ebx, [rest]
00810                 or ebx, ebx
00811                 jz ttmath_end
00812                 
00813                 xor ebx, ebx             // ebx = 0
00814                 neg ecx                  // setting cf from ecx
00815                 mov ecx, [rest]          // ecx is != 0
00816 
00817             ttmath_loop2:
00818                 mov eax, [esi+edx*4]
00819                 sbb eax, ebx 
00820                 mov [edi+edx*4], eax
00821 
00822                 inc edx
00823                 dec ecx
00824             jnz ttmath_loop2
00825 
00826                 adc ecx, ecx
00827 
00828             ttmath_end:
00829                 mov [c], ecx
00830 
00831                 popad
00832             }
00833 
00834         #endif        
00835             
00836 
00837         #ifdef __GNUC__
00838             
00839         //    this part should be compiled with gcc
00840         uint  dummy1, dummy2, dummy3;
00841 
00842             __asm__ __volatile__(
00843                 "push %%edx                            \n"
00844                 "xor %%edx, %%edx                    \n"   // edx = 0, cf = 0
00845             "1:                                        \n"
00846                 "mov (%%esi,%%edx,4), %%eax            \n"
00847                 "sbb (%%ebx,%%edx,4), %%eax            \n"
00848                 "mov %%eax, (%%edi,%%edx,4)            \n"
00849 
00850                 "inc %%edx                            \n"
00851                 "dec %%ecx                            \n"
00852             "jnz 1b                                    \n"
00853 
00854                 "adc %%ecx, %%ecx                    \n"   // ecx has the cf state
00855                 "pop %%eax                            \n"   // eax = rest
00856 
00857                 "or %%eax, %%eax                    \n"
00858                 "jz 3f                                \n"
00859                 
00860                 "xor %%ebx, %%ebx                    \n"   // ebx = 0
00861                 "neg %%ecx                            \n"   // setting cf from ecx
00862                 "mov %%eax, %%ecx                    \n"   // ecx=rest and is != 0
00863             "2:                                        \n"
00864                 "mov (%%esi, %%edx, 4), %%eax        \n"
00865                 "sbb %%ebx, %%eax                     \n"
00866                 "mov %%eax, (%%edi, %%edx, 4)        \n"
00867 
00868                 "inc %%edx                            \n"
00869                 "dec %%ecx                            \n"
00870             "jnz 2b                                    \n"
00871 
00872                 "adc %%ecx, %%ecx                    \n"
00873             "3:                                        \n"
00874 
00875                 : "=a" (dummy1), "=b" (dummy2), "=c" (c),       "=d" (dummy3)
00876                 :                "1" (ss2),     "2" (ss2_size), "3" (rest),   "S" (ss1),  "D" (result)
00877                 : "cc", "memory" );
00878 
00879         #endif
00880 
00881         TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
00882 
00883     return c;
00884     }
00885 
00886 
00887 
00888     /*!
00889         this method moves all bits into the left hand side
00890         return value <- this <- c
00891 
00892         the lowest *bit* will be held the 'c' and
00893         the state of one additional bit (on the left hand side)
00894         will be returned
00895 
00896         for example:
00897         let this is 001010000
00898         after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
00899     */
00900     template<uint  value_size>
00901     uint  UInt<value_size>::Rcl2_one(uint  c)
00902     {
00903     uint  b = value_size;
00904     uint  * p1 = table;
00905 
00906         #ifndef __GNUC__
00907             __asm
00908             {
00909                 push ebx
00910                 push ecx
00911                 push edx
00912 
00913                 mov ebx, [p1]
00914                 xor edx, edx
00915                 mov ecx, [c]
00916                 neg ecx
00917                 mov ecx, [b]
00918 
00919             ttmath_loop:
00920                 rcl dword ptr [ebx+edx*4], 1
00921                 
00922                 inc edx
00923                 dec ecx
00924             jnz ttmath_loop
00925 
00926                 adc ecx, ecx
00927                 mov [c], ecx
00928                 
00929                 pop edx
00930                 pop ecx
00931                 pop ebx
00932             }
00933         #endif
00934 
00935 
00936         #ifdef __GNUC__
00937         uint  dummy, dummy2;
00938 
00939         __asm__  __volatile__(
00940 
00941             "xorl %%edx, %%edx            \n"   // edx=0
00942             "negl %%eax                    \n"   // CF=1 if eax!=0 , CF=0 if eax==0
00943 
00944         "1:                                \n"
00945             "rcll $1, (%%ebx, %%edx, 4)    \n"
00946 
00947             "incl %%edx                    \n"
00948             "decl %%ecx                    \n"
00949         "jnz 1b                            \n"
00950 
00951             "adcl %%ecx, %%ecx            \n"
00952 
00953             : "=c" (c), "=a" (dummy), "=d" (dummy2)
00954             : "0" (b),  "1" (c), "b" (p1)
00955             : "cc", "memory" );
00956 
00957         #endif
00958 
00959         TTMATH_LOGC("UInt::Rcl2_one", c)
00960 
00961     return c;
00962     }
00963 
00964 
00965 
00966     /*!
00967         this method moves all bits into the right hand side
00968         c -> this -> return value
00969 
00970         the highest *bit* will be held the 'c' and
00971         the state of one additional bit (on the right hand side)
00972         will be returned
00973 
00974         for example:
00975         let this is 000000010
00976         after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
00977     */
00978     template<uint  value_size>
00979     uint  UInt<value_size>::Rcr2_one(uint  c)
00980     {
00981     uint  b = value_size;
00982     uint  * p1 = table;
00983 
00984         #ifndef __GNUC__
00985             __asm
00986             {
00987                 push ebx
00988                 push ecx
00989 
00990                 mov ebx, [p1]
00991                 mov ecx, [c]
00992                 neg ecx
00993                 mov ecx, [b]
00994 
00995             ttmath_loop:
00996                 rcr dword ptr [ebx+ecx*4-4], 1
00997                 
00998                 dec ecx
00999             jnz ttmath_loop
01000 
01001                 adc ecx, ecx
01002                 mov [c], ecx
01003 
01004                 pop ecx
01005                 pop ebx
01006             }
01007         #endif
01008 
01009 
01010         #ifdef __GNUC__
01011         uint  dummy;
01012 
01013         __asm__  __volatile__(
01014 
01015             "negl %%eax                        \n"   // CF=1 if eax!=0 , CF=0 if eax==0
01016 
01017         "1:                                    \n"
01018             "rcrl $1, -4(%%ebx, %%ecx, 4)    \n"
01019 
01020             "decl %%ecx                        \n"
01021         "jnz 1b                                \n"
01022 
01023             "adcl %%ecx, %%ecx                \n"
01024 
01025             : "=c" (c), "=a" (dummy)
01026             : "0" (b),  "1" (c), "b" (p1)
01027             : "cc", "memory" );
01028 
01029         #endif
01030 
01031         TTMATH_LOGC("UInt::Rcr2_one", c)
01032 
01033     return c;
01034     }
01035 
01036 
01037 
01038 #ifdef _MSC_VER
01039 #pragma warning (disable : 4731)
01040 //warning C4731: frame pointer register 'ebp' modified by inline assembly code
01041 #endif
01042     
01043 
01044 
01045     /*!
01046         this method moves all bits into the left hand side
01047         return value <- this <- c
01048 
01049         the lowest *bits* will be held the 'c' and
01050         the state of one additional bit (on the left hand side)
01051         will be returned
01052 
01053         for example:
01054         let this is 001010000
01055         after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
01056     */
01057     template<uint value_size>
01058     uint  UInt<value_size>::Rcl2(uint  bits, uint  c)
01059     {
01060     TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
01061         
01062     uint  b = value_size;
01063     uint  * p1 = table;
01064 
01065         #ifndef __GNUC__
01066             __asm
01067             {
01068                 push eax
01069                 push ebx
01070                 push ecx
01071                 push edx
01072                 push esi
01073                 push edi
01074                 push ebp
01075 
01076                 mov edi, [b]
01077 
01078                 mov ecx, 32
01079                 sub ecx, [bits]
01080                 mov edx, -1
01081                 shr edx, cl
01082 
01083                 mov ecx, [bits]
01084                 mov ebx, [p1]
01085                 mov eax, [c]
01086 
01087                 mov ebp, edx         // ebp = mask (modified ebp - don't read/write to variables)
01088 
01089                 xor edx, edx         // edx = 0
01090                 mov esi, edx
01091                 or eax, eax
01092                 cmovnz esi, ebp      // if(c) esi=mask else esi=0
01093 
01094             ttmath_loop:
01095                 rol dword ptr [ebx+edx*4], cl
01096                 
01097                 mov eax, [ebx+edx*4]
01098                 and eax, ebp
01099                 xor [ebx+edx*4], eax // clearing bits
01100                 or [ebx+edx*4], esi  // saving old value
01101                 mov esi, eax
01102 
01103                 inc edx
01104                 dec edi
01105             jnz ttmath_loop
01106 
01107                 pop ebp              // restoring ebp
01108 
01109                 and eax, 1
01110                 mov [c], eax
01111 
01112                 pop edi
01113                 pop esi
01114                 pop edx
01115                 pop ecx
01116                 pop ebx
01117                 pop eax
01118             }
01119         #endif
01120 
01121 
01122         #ifdef __GNUC__
01123         uint  dummy, dummy2, dummy3;
01124 
01125         __asm__  __volatile__(
01126 
01127             "push %%ebp                        \n"
01128             
01129             "movl %%ecx, %%esi                \n"
01130             "movl $32, %%ecx                \n"
01131             "subl %%esi, %%ecx                \n"    // ecx = 32 - bits
01132             "movl $-1, %%edx                \n"    // edx = -1 (all bits set to one)
01133             "shrl %%cl, %%edx                \n"    // shifting (0 -> edx -> cf)  (cl times)
01134             "movl %%edx, %%ebp                \n"    // ebp = edx = mask
01135             "movl %%esi, %%ecx                \n"
01136 
01137             "xorl %%edx, %%edx                \n"
01138             "movl %%edx, %%esi                \n"
01139             "orl %%eax, %%eax                \n"
01140             "cmovnz %%ebp, %%esi            \n"    // if(c) esi=mask else esi=0
01141 
01142         "1:                                    \n"
01143             "roll %%cl, (%%ebx,%%edx,4)        \n"
01144 
01145             "movl (%%ebx,%%edx,4), %%eax    \n"
01146             "andl %%ebp, %%eax                \n"
01147             "xorl %%eax, (%%ebx,%%edx,4)    \n"
01148             "orl  %%esi, (%%ebx,%%edx,4)    \n"
01149             "movl %%eax, %%esi                \n"
01150             
01151             "incl %%edx                        \n"
01152             "decl %%edi                        \n"
01153         "jnz 1b                                \n"
01154             
01155             "and $1, %%eax                    \n"
01156 
01157             "pop %%ebp                        \n"
01158 
01159             : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
01160             : "0" (c),  "1" (b), "b" (p1), "c" (bits)
01161             : "cc", "memory" );
01162 
01163         #endif
01164 
01165         TTMATH_LOGC("UInt::Rcl2", c)
01166 
01167     return c;
01168     }
01169 
01170 
01171 
01172 
01173     /*!
01174         this method moves all bits into the right hand side
01175         C -> this -> return value
01176 
01177         the highest *bits* will be held the 'c' and
01178         the state of one additional bit (on the right hand side)
01179         will be returned
01180 
01181         for example:
01182         let this is 000000010
01183         after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
01184     */
01185     template<uint  value_size>
01186     uint  UInt<value_size>::Rcr2(uint  bits, uint  c)
01187     {
01188     TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
01189 
01190     uint  b = value_size;
01191     uint  * p1 = table;
01192 
01193         #ifndef __GNUC__
01194             __asm
01195             {
01196                 push eax
01197                 push ebx
01198                 push ecx
01199                 push edx
01200                 push esi
01201                 push edi
01202                 push ebp
01203 
01204                 mov edi, [b]
01205 
01206                 mov ecx, 32
01207                 sub ecx, [bits]
01208                 mov edx, -1
01209                 shl edx, cl
01210 
01211                 mov ecx, [bits]
01212                 mov ebx, [p1]
01213                 mov eax, [c]
01214 
01215                 mov ebp, edx         // ebp = mask (modified ebp - don't read/write to variables)
01216 
01217                 xor edx, edx         // edx = 0
01218                 mov esi, edx
01219                 add edx, edi
01220                 dec edx              // edx is pointing at the end of the table (on last word)
01221                 or eax, eax
01222                 cmovnz esi, ebp      // if(c) esi=mask else esi=0
01223 
01224             ttmath_loop:
01225                 ror dword ptr [ebx+edx*4], cl
01226                 
01227                 mov eax, [ebx+edx*4]
01228                 and eax, ebp 
01229                 xor [ebx+edx*4], eax // clearing bits
01230                 or [ebx+edx*4], esi  // saving old value
01231                 mov esi, eax
01232 
01233                 dec edx
01234                 dec edi
01235             jnz ttmath_loop
01236 
01237                 pop ebp              // restoring ebp
01238 
01239                 rol eax, 1           // 31bit will be first
01240                 and eax, 1  
01241                 mov [c], eax
01242 
01243                 pop edi
01244                 pop esi
01245                 pop edx
01246                 pop ecx
01247                 pop ebx
01248                 pop eax
01249             }
01250         #endif
01251 
01252 
01253         #ifdef __GNUC__
01254         uint  dummy, dummy2, dummy3;
01255 
01256             __asm__  __volatile__(
01257 
01258             "push %%ebp                        \n"
01259             
01260             "movl %%ecx, %%esi                \n"
01261             "movl $32, %%ecx                \n"
01262             "subl %%esi, %%ecx                \n"    // ecx = 32 - bits
01263             "movl $-1, %%edx                \n"    // edx = -1 (all bits set to one)
01264             "shll %%cl, %%edx                \n"    // shifting (cf <- edx <- 0)  (cl times)
01265             "movl %%edx, %%ebp                \n"    // ebp = edx = mask
01266             "movl %%esi, %%ecx                \n"
01267 
01268             "xorl %%edx, %%edx                \n"
01269             "movl %%edx, %%esi                \n"
01270             "addl %%edi, %%edx                \n"
01271             "decl %%edx                        \n"    // edx is pointing at the end of the table (on last word)
01272             "orl %%eax, %%eax                \n"
01273             "cmovnz %%ebp, %%esi            \n"    // if(c) esi=mask else esi=0
01274 
01275         "1:                                    \n"
01276             "rorl %%cl, (%%ebx,%%edx,4)        \n"
01277 
01278             "movl (%%ebx,%%edx,4), %%eax    \n"
01279             "andl %%ebp, %%eax                \n"
01280             "xorl %%eax, (%%ebx,%%edx,4)    \n"
01281             "orl  %%esi, (%%ebx,%%edx,4)    \n"
01282             "movl %%eax, %%esi                \n"
01283             
01284             "decl %%edx                        \n"
01285             "decl %%edi                        \n"
01286         "jnz 1b                                \n"
01287             
01288             "roll $1, %%eax                    \n"
01289             "andl $1, %%eax                    \n"
01290 
01291             "pop %%ebp                        \n"
01292 
01293             : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
01294             : "0" (c),  "1" (b), "b" (p1), "c" (bits)
01295             : "cc", "memory" );
01296 
01297         #endif
01298 
01299         TTMATH_LOGC("UInt::Rcr2", c)
01300 
01301     return c;
01302     }
01303 
01304 
01305 #ifdef _MSC_VER
01306 #pragma warning (default : 4731)
01307 #endif
01308 
01309 
01310     /*
01311         this method returns the number of the highest set bit in one 32-bit word
01312         if the 'x' is zero this method returns '-1'
01313     */
01314     template<uint value_size>
01315     sint UInt<value_size>::FindLeadingBitInWord (uint  x)
01316     {
01317     sint result;
01318 
01319         #ifndef __GNUC__
01320             __asm
01321             {
01322                 push eax
01323                 push edx
01324 
01325                 mov edx,-1
01326                 bsr eax,[x]
01327                 cmovz eax,edx
01328                 mov [result], eax
01329 
01330                 pop edx
01331                 pop eax
01332             }
01333         #endif
01334 
01335 
01336         #ifdef __GNUC__
01337         uint  dummy;
01338 
01339                 __asm__ (
01340 
01341                 "movl $-1, %1          \n"
01342                 "bsrl %2, %0           \n"
01343                 "cmovz %1, %0          \n"
01344 
01345                 : "=r" (result), "=&r" (dummy)
01346                 : "r" (x)
01347                 : "cc" );
01348 
01349         #endif
01350 
01351     return result;
01352     }
01353 
01354 
01355 
01356     /*
01357         this method returns the number of the smallest set bit in one 32-bit word
01358         if the 'x' is zero this method returns '-1'
01359     */
01360     template<uint value_size>
01361     sint UInt<value_size>::FindLowestBitInWord (uint  x)
01362     {
01363     sint result;
01364 
01365         #ifndef __GNUC__
01366             __asm
01367             {
01368                 push eax
01369                 push edx
01370 
01371                 mov edx,-1
01372                 bsf eax,[x]
01373                 cmovz eax,edx
01374                 mov [result], eax
01375 
01376                 pop edx
01377                 pop eax
01378             }
01379         #endif
01380 
01381 
01382         #ifdef __GNUC__
01383         uint  dummy;
01384 
01385                 __asm__ (
01386 
01387                 "movl $-1, %1          \n"
01388                 "bsfl %2, %0           \n"
01389                 "cmovz %1, %0          \n"
01390 
01391                 : "=r" (result), "=&r" (dummy)
01392                 : "r" (x)
01393                 : "cc" );
01394 
01395         #endif
01396 
01397     return result;
01398     }
01399 
01400 
01401 
01402     /*!
01403         this method sets a special bit in the 'value'
01404         and returns the last state of the bit (zero or one)
01405 
01406         bit is from <0,31>
01407         e.g.
01408          uint x = 100;
01409          uint bit = SetBitInWord(x, 3);
01410          now: x = 108 and bit = 0
01411     */
01412     template<uint value_size>
01413     uint  UInt<value_size>::SetBitInWord (uint  & value, uint  bit)
01414     {
01415         TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
01416 
01417         uint  old_bit;
01418         uint  v = value;
01419 
01420         #ifndef __GNUC__
01421             __asm
01422             {
01423             push ebx
01424             push eax
01425 
01426             mov eax, [v]
01427             mov ebx, [bit]
01428             bts eax, ebx
01429             mov [v], eax
01430 
01431             setc bl
01432             movzx ebx, bl
01433             mov [old_bit], ebx
01434 
01435             pop eax
01436             pop ebx
01437             }
01438         #endif
01439 
01440 
01441         #ifdef __GNUC__
01442             __asm__ (
01443 
01444             "btsl %%ebx, %%eax        \n"
01445             "setc %%bl                \n"
01446             "movzx %%bl, %%ebx        \n"
01447             
01448             : "=a" (v), "=b" (old_bit)
01449             : "0" (v),  "1" (bit)
01450             : "cc" );
01451 
01452         #endif
01453 
01454         value = v;
01455 
01456     return old_bit;
01457     }
01458 
01459 
01460 
01461 
01462     /*!
01463         multiplication: result_high:result_low = a * b
01464         result_high - higher word of the result
01465         result_low  - lower word of the result
01466     
01467         this methos never returns a carry
01468         this method is used in the second version of the multiplication algorithms
01469     */
01470     template<uint value_size>
01471     void UInt<value_size>::MulTwoWords (uint  a, uint  b, uint  * result_high, uint  * result_low)
01472     {
01473     /*
01474         we must use these temporary variables in order to inform the compilator
01475         that value pointed with result1 and result2 has changed
01476 
01477         this has no effect in visual studio but it's useful when
01478         using gcc and options like -Ox
01479     */
01480     uint  result1_;
01481     uint  result2_;
01482 
01483         #ifndef __GNUC__
01484 
01485             __asm
01486             {
01487             push eax
01488             push edx
01489 
01490             mov eax, [a]
01491             mul dword ptr [b]
01492 
01493             mov [result2_], edx
01494             mov [result1_], eax
01495 
01496             pop edx
01497             pop eax
01498             }
01499 
01500         #endif
01501 
01502 
01503         #ifdef __GNUC__
01504 
01505         __asm__ (
01506         
01507             "mull %%edx            \n"
01508 
01509             : "=a" (result1_), "=d" (result2_)
01510             : "0" (a),         "1" (b)
01511             : "cc" );
01512 
01513         #endif
01514 
01515 
01516         *result_low  = result1_;
01517         *result_high = result2_;
01518     }
01519 
01520 
01521 
01522 
01523 
01524     /*!
01525      *
01526      * Division
01527      *
01528      *
01529     */
01530     
01531 
01532 
01533 
01534     /*!
01535         this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
01536         r = a:b / c and rest - remainder
01537 
01538         *
01539         * WARNING:
01540         * if r (one word) is too small for the result or c is equal zero
01541         * there'll be a hardware interruption (0)
01542         * and probably the end of your program
01543         *
01544     */
01545     template<uint value_size>
01546     void UInt<value_size>::DivTwoWords (uint  a, uint  b, uint  c, uint  * r, uint  * rest)
01547     {
01548         uint  r_;
01549         uint  rest_;
01550         /*
01551             these variables have similar meaning like those in
01552             the multiplication algorithm MulTwoWords
01553         */
01554 
01555         TTMATH_ASSERT( c != 0 )
01556 
01557         #ifndef __GNUC__
01558             __asm
01559             {
01560                 push eax
01561                 push edx
01562 
01563                 mov edx, [a]
01564                 mov eax, [b]
01565                 div dword ptr [c]
01566 
01567                 mov [r_], eax
01568                 mov [rest_], edx
01569 
01570                 pop edx
01571                 pop eax
01572             }
01573         #endif
01574 
01575 
01576         #ifdef __GNUC__
01577         
01578             __asm__ (
01579 
01580             "divl %%ecx                \n"
01581 
01582             : "=a" (r_), "=d" (rest_)
01583             : "0" (b),   "1" (a), "c" (c)
01584             : "cc" );
01585 
01586         #endif
01587 
01588 
01589         *r = r_;
01590         *rest = rest_;
01591 
01592     }
01593 
01594 
01595 
01596 } //namespace
01597 
01598 
01599 
01600 #endif //ifdef TTMATH_PLATFORM32
01601 #endif //ifndef TTMATH_NOASM
01602 #endif
01603