Library for big numbers from http://www.ttmath.org/
Dependents: PIDHeater82 Conceptcontroller_v_1_0 AlarmClockApp COG4050_adxl355_tilt ... more
ttmathuint_x86.h
00001 /* 00002 * This file is a part of TTMath Bignum Library 00003 * and is distributed under the (new) BSD licence. 00004 * Author: Tomasz Sowa <t.sowa@ttmath.org> 00005 */ 00006 00007 /* 00008 * Copyright (c) 2006-2009, Tomasz Sowa 00009 * All rights reserved. 00010 * 00011 * Redistribution and use in source and binary forms, with or without 00012 * modification, are permitted provided that the following conditions are met: 00013 * 00014 * * Redistributions of source code must retain the above copyright notice, 00015 * this list of conditions and the following disclaimer. 00016 * 00017 * * Redistributions in binary form must reproduce the above copyright 00018 * notice, this list of conditions and the following disclaimer in the 00019 * documentation and/or other materials provided with the distribution. 00020 * 00021 * * Neither the name Tomasz Sowa nor the names of contributors to this 00022 * project may be used to endorse or promote products derived 00023 * from this software without specific prior written permission. 00024 * 00025 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00026 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00028 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00029 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00030 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00031 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00032 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00033 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00034 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 00035 * THE POSSIBILITY OF SUCH DAMAGE. 00036 */ 00037 00038 00039 00040 #ifndef headerfilettmathuint_x86 00041 #define headerfilettmathuint_x86 00042 00043 00044 #ifndef TTMATH_NOASM 00045 #ifdef TTMATH_PLATFORM32 00046 00047 00048 /*! 00049 \file ttmathuint_x86.h 00050 \brief template class UInt<uint> with assembler code for 32bit x86 processors 00051 00052 this file is included at the end of ttmathuint.h 00053 */ 00054 00055 00056 00057 /*! 00058 \brief a namespace for the TTMath library 00059 */ 00060 namespace ttmath 00061 { 00062 00063 /*! 00064 returning the string represents the currect type of the library 00065 we have following types: 00066 asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits) 00067 asm_gcc_32 - with asm code designed for GCC (32 bits) 00068 asm_vc_64 - with asm for VC (64 bit) 00069 asm_gcc_64 - with asm for GCC (64 bit) 00070 no_asm_32 - pure C++ version (32 bit) - without any asm code 00071 no_asm_64 - pure C++ version (64 bit) - without any asm code 00072 */ 00073 template<uint value_size> 00074 const char * UInt<value_size>::LibTypeStr () 00075 { 00076 #ifndef __GNUC__ 00077 static const char info[] = "asm_vc_32"; 00078 #endif 00079 00080 #ifdef __GNUC__ 00081 static const char info[] = "asm_gcc_32"; 00082 #endif 00083 00084 return info; 00085 } 00086 00087 00088 /*! 00089 returning the currect type of the library 00090 */ 00091 template<uint value_size> 00092 LibTypeCode UInt<value_size>::LibType () 00093 { 00094 #ifndef __GNUC__ 00095 LibTypeCode info = asm_vc_32; 00096 #endif 00097 00098 #ifdef __GNUC__ 00099 LibTypeCode info = asm_gcc_32; 00100 #endif 00101 00102 return info; 00103 } 00104 00105 00106 00107 /*! 00108 * 00109 * basic mathematic functions 00110 * 00111 */ 00112 00113 00114 /*! 00115 adding ss2 to the this and adding carry if it's defined 00116 (this = this + ss2 + c) 00117 00118 c must be zero or one (might be a bigger value than 1) 00119 function returns carry (1) (if it has been) 00120 */ 00121 template<uint value_size> 00122 uint UInt<value_size>::Add (const UInt<value_size> & ss2, uint c) 00123 { 00124 uint b = value_size; 00125 uint * p1 = table; 00126 uint * p2 = const_cast<uint *>(ss2.table); 00127 00128 // we don't have to use TTMATH_REFERENCE_ASSERT here 00129 // this algorithm doesn't require it 00130 00131 #ifndef __GNUC__ 00132 00133 // this part might be compiled with for example visual c 00134 00135 __asm 00136 { 00137 push eax 00138 push ebx 00139 push ecx 00140 push edx 00141 push esi 00142 00143 mov ecx,[b] 00144 00145 mov ebx,[p1] 00146 mov esi,[p2] 00147 00148 xor edx,edx // edx=0 00149 mov eax,[c] 00150 neg eax // CF=1 if rax!=0 , CF=0 if rax==0 00151 00152 ttmath_loop: 00153 mov eax,[esi+edx*4] 00154 adc [ebx+edx*4],eax 00155 00156 inc edx 00157 dec ecx 00158 jnz ttmath_loop 00159 00160 adc ecx, ecx 00161 mov [c], ecx 00162 00163 pop esi 00164 pop edx 00165 pop ecx 00166 pop ebx 00167 pop eax 00168 } 00169 00170 00171 00172 #endif 00173 00174 00175 #ifdef __GNUC__ 00176 uint dummy, dummy2; 00177 // this part should be compiled with gcc 00178 00179 __asm__ __volatile__( 00180 00181 "xorl %%edx, %%edx \n" 00182 "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0 00183 00184 "1: \n" 00185 "movl (%%esi,%%edx,4), %%eax \n" 00186 "adcl %%eax, (%%ebx,%%edx,4) \n" 00187 00188 "incl %%edx \n" 00189 "decl %%ecx \n" 00190 "jnz 1b \n" 00191 00192 "adc %%ecx, %%ecx \n" 00193 00194 : "=c" (c), "=a" (dummy), "=d" (dummy2) 00195 : "0" (b), "1" (c), "b" (p1), "S" (p2) 00196 : "cc", "memory" ); 00197 #endif 00198 00199 TTMATH_LOGC("UInt::Add", c) 00200 00201 return c; 00202 } 00203 00204 00205 00206 /*! 00207 adding one word (at a specific position) 00208 and returning a carry (if it has been) 00209 00210 e.g. 00211 00212 if we've got (value_size=3): 00213 table[0] = 10; 00214 table[1] = 30; 00215 table[2] = 5; 00216 and we call: 00217 AddInt(2,1) 00218 then it'll be: 00219 table[0] = 10; 00220 table[1] = 30 + 2; 00221 table[2] = 5; 00222 00223 of course if there was a carry from table[2] it would be returned 00224 */ 00225 template<uint value_size> 00226 uint UInt<value_size>::AddInt(uint value, uint index) 00227 { 00228 uint b = value_size; 00229 uint * p1 = table; 00230 uint c; 00231 00232 TTMATH_ASSERT( index < value_size ) 00233 00234 #ifndef __GNUC__ 00235 00236 __asm 00237 { 00238 push eax 00239 push ebx 00240 push ecx 00241 push edx 00242 00243 mov ecx, [b] 00244 sub ecx, [index] 00245 00246 mov edx, [index] 00247 mov ebx, [p1] 00248 00249 mov eax, [value] 00250 00251 ttmath_loop: 00252 add [ebx+edx*4], eax 00253 jnc ttmath_end 00254 00255 mov eax, 1 00256 inc edx 00257 dec ecx 00258 jnz ttmath_loop 00259 00260 ttmath_end: 00261 setc al 00262 movzx edx, al 00263 mov [c], edx 00264 00265 pop edx 00266 pop ecx 00267 pop ebx 00268 pop eax 00269 } 00270 00271 #endif 00272 00273 00274 #ifdef __GNUC__ 00275 uint dummy, dummy2; 00276 00277 __asm__ __volatile__( 00278 00279 "subl %%edx, %%ecx \n" 00280 00281 "1: \n" 00282 "addl %%eax, (%%ebx,%%edx,4) \n" 00283 "jnc 2f \n" 00284 00285 "movl $1, %%eax \n" 00286 "incl %%edx \n" 00287 "decl %%ecx \n" 00288 "jnz 1b \n" 00289 00290 "2: \n" 00291 "setc %%al \n" 00292 "movzx %%al, %%edx \n" 00293 00294 : "=d" (c), "=a" (dummy), "=c" (dummy2) 00295 : "0" (index), "1" (value), "2" (b), "b" (p1) 00296 : "cc", "memory" ); 00297 00298 #endif 00299 00300 TTMATH_LOGC("UInt::AddInt", c) 00301 00302 return c; 00303 } 00304 00305 00306 00307 00308 /*! 00309 adding only two unsigned words to the existing value 00310 and these words begin on the 'index' position 00311 (it's used in the multiplication algorithm 2) 00312 00313 index should be equal or smaller than value_size-2 (index <= value_size-2) 00314 x1 - lower word, x2 - higher word 00315 00316 for example if we've got value_size equal 4 and: 00317 table[0] = 3 00318 table[1] = 4 00319 table[2] = 5 00320 table[3] = 6 00321 then let 00322 x1 = 10 00323 x2 = 20 00324 and 00325 index = 1 00326 00327 the result of this method will be: 00328 table[0] = 3 00329 table[1] = 4 + x1 = 14 00330 table[2] = 5 + x2 = 25 00331 table[3] = 6 00332 00333 and no carry at the end of table[3] 00334 00335 (of course if there was a carry in table[2](5+20) then 00336 this carry would be passed to the table[3] etc.) 00337 */ 00338 template<uint value_size> 00339 uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index) 00340 { 00341 uint b = value_size; 00342 uint * p1 = table; 00343 uint c; 00344 00345 TTMATH_ASSERT( index < value_size - 1 ) 00346 00347 #ifndef __GNUC__ 00348 __asm 00349 { 00350 push eax 00351 push ebx 00352 push ecx 00353 push edx 00354 00355 mov ecx, [b] 00356 sub ecx, [index] 00357 00358 mov ebx, [p1] 00359 mov edx, [index] 00360 00361 mov eax, [x1] 00362 add [ebx+edx*4], eax 00363 inc edx 00364 dec ecx 00365 00366 mov eax, [x2] 00367 00368 ttmath_loop: 00369 adc [ebx+edx*4], eax 00370 jnc ttmath_end 00371 00372 mov eax, 0 00373 inc edx 00374 dec ecx 00375 jnz ttmath_loop 00376 00377 ttmath_end: 00378 setc al 00379 movzx edx, al 00380 mov [c], edx 00381 00382 pop edx 00383 pop ecx 00384 pop ebx 00385 pop eax 00386 00387 } 00388 #endif 00389 00390 00391 #ifdef __GNUC__ 00392 uint dummy, dummy2; 00393 00394 __asm__ __volatile__( 00395 00396 "subl %%edx, %%ecx \n" 00397 00398 "addl %%esi, (%%ebx,%%edx,4) \n" 00399 "incl %%edx \n" 00400 "decl %%ecx \n" 00401 00402 "1: \n" 00403 "adcl %%eax, (%%ebx,%%edx,4) \n" 00404 "jnc 2f \n" 00405 00406 "mov $0, %%eax \n" 00407 "incl %%edx \n" 00408 "decl %%ecx \n" 00409 "jnz 1b \n" 00410 00411 "2: \n" 00412 "setc %%al \n" 00413 "movzx %%al, %%eax \n" 00414 00415 : "=a" (c), "=c" (dummy), "=d" (dummy2) 00416 : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1) 00417 : "cc", "memory" ); 00418 00419 #endif 00420 00421 TTMATH_LOGC("UInt::AddTwoInts", c) 00422 00423 return c; 00424 } 00425 00426 00427 00428 /*! 00429 this static method addes one vector to the other 00430 'ss1' is larger in size or equal to 'ss2' 00431 00432 ss1 points to the first (larger) vector 00433 ss2 points to the second vector 00434 ss1_size - size of the ss1 (and size of the result too) 00435 ss2_size - size of the ss2 00436 result - is the result vector (which has size the same as ss1: ss1_size) 00437 00438 Example: ss1_size is 5, ss2_size is 3 00439 ss1: ss2: result (output): 00440 5 1 5+1 00441 4 3 4+3 00442 2 7 2+7 00443 6 6 00444 9 9 00445 of course the carry is propagated and will be returned from the last item 00446 (this method is used by the Karatsuba multiplication algorithm) 00447 */ 00448 template<uint value_size> 00449 uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) 00450 { 00451 TTMATH_ASSERT( ss1_size >= ss2_size ) 00452 00453 uint rest = ss1_size - ss2_size; 00454 uint c; 00455 00456 #ifndef __GNUC__ 00457 00458 // this part might be compiled with for example visual c 00459 __asm 00460 { 00461 pushad 00462 00463 mov ecx, [ss2_size] 00464 xor edx, edx // edx = 0, cf = 0 00465 00466 mov esi, [ss1] 00467 mov ebx, [ss2] 00468 mov edi, [result] 00469 00470 ttmath_loop: 00471 mov eax, [esi+edx*4] 00472 adc eax, [ebx+edx*4] 00473 mov [edi+edx*4], eax 00474 00475 inc edx 00476 dec ecx 00477 jnz ttmath_loop 00478 00479 adc ecx, ecx // ecx has the cf state 00480 00481 mov ebx, [rest] 00482 or ebx, ebx 00483 jz ttmath_end 00484 00485 xor ebx, ebx // ebx = 0 00486 neg ecx // setting cf from ecx 00487 mov ecx, [rest] // ecx is != 0 00488 00489 ttmath_loop2: 00490 mov eax, [esi+edx*4] 00491 adc eax, ebx 00492 mov [edi+edx*4], eax 00493 00494 inc edx 00495 dec ecx 00496 jnz ttmath_loop2 00497 00498 adc ecx, ecx 00499 00500 ttmath_end: 00501 mov [c], ecx 00502 00503 popad 00504 } 00505 00506 #endif 00507 00508 00509 #ifdef __GNUC__ 00510 00511 // this part should be compiled with gcc 00512 uint dummy1, dummy2, dummy3; 00513 00514 __asm__ __volatile__( 00515 "push %%edx \n" 00516 "xor %%edx, %%edx \n" // edx = 0, cf = 0 00517 "1: \n" 00518 "mov (%%esi,%%edx,4), %%eax \n" 00519 "adc (%%ebx,%%edx,4), %%eax \n" 00520 "mov %%eax, (%%edi,%%edx,4) \n" 00521 00522 "inc %%edx \n" 00523 "dec %%ecx \n" 00524 "jnz 1b \n" 00525 00526 "adc %%ecx, %%ecx \n" // ecx has the cf state 00527 "pop %%eax \n" // eax = rest 00528 00529 "or %%eax, %%eax \n" 00530 "jz 3f \n" 00531 00532 "xor %%ebx, %%ebx \n" // ebx = 0 00533 "neg %%ecx \n" // setting cf from ecx 00534 "mov %%eax, %%ecx \n" // ecx=rest and is != 0 00535 "2: \n" 00536 "mov (%%esi, %%edx, 4), %%eax \n" 00537 "adc %%ebx, %%eax \n" 00538 "mov %%eax, (%%edi, %%edx, 4) \n" 00539 00540 "inc %%edx \n" 00541 "dec %%ecx \n" 00542 "jnz 2b \n" 00543 00544 "adc %%ecx, %%ecx \n" 00545 "3: \n" 00546 00547 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3) 00548 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result) 00549 : "cc", "memory" ); 00550 00551 #endif 00552 00553 TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size) 00554 00555 return c; 00556 } 00557 00558 00559 /*! 00560 subtracting ss2 from the 'this' and subtracting 00561 carry if it has been defined 00562 (this = this - ss2 - c) 00563 00564 c must be zero or one (might be a bigger value than 1) 00565 function returns carry (1) (if it has been) 00566 */ 00567 template<uint value_size> 00568 uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c) 00569 { 00570 uint b = value_size; 00571 uint * p1 = table; 00572 uint * p2 = const_cast<uint *>(ss2.table); 00573 00574 // we don't have to use TTMATH_REFERENCE_ASSERT here 00575 // this algorithm doesn't require it 00576 00577 #ifndef __GNUC__ 00578 00579 __asm 00580 { 00581 push eax 00582 push ebx 00583 push ecx 00584 push edx 00585 push esi 00586 00587 mov ecx,[b] 00588 00589 mov ebx,[p1] 00590 mov esi,[p2] 00591 00592 xor edx,edx // edx=0 00593 mov eax,[c] 00594 neg eax // CF=1 if rax!=0 , CF=0 if rax==0 00595 00596 ttmath_loop: 00597 mov eax,[esi+edx*4] 00598 sbb [ebx+edx*4],eax 00599 00600 inc edx 00601 dec ecx 00602 jnz ttmath_loop 00603 00604 adc ecx, ecx 00605 mov [c], ecx 00606 00607 pop esi 00608 pop edx 00609 pop ecx 00610 pop ebx 00611 pop eax 00612 } 00613 00614 #endif 00615 00616 00617 #ifdef __GNUC__ 00618 uint dummy, dummy2; 00619 00620 __asm__ __volatile__( 00621 00622 "xorl %%edx, %%edx \n" 00623 "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0 00624 00625 "1: \n" 00626 "movl (%%esi,%%edx,4), %%eax \n" 00627 "sbbl %%eax, (%%ebx,%%edx,4) \n" 00628 00629 "incl %%edx \n" 00630 "decl %%ecx \n" 00631 "jnz 1b \n" 00632 00633 "adc %%ecx, %%ecx \n" 00634 00635 : "=c" (c), "=a" (dummy), "=d" (dummy2) 00636 : "0" (b), "1" (c), "b" (p1), "S" (p2) 00637 : "cc", "memory" ); 00638 00639 #endif 00640 00641 TTMATH_LOGC("UInt::Sub", c) 00642 00643 return c; 00644 } 00645 00646 00647 00648 00649 /*! 00650 this method subtracts one word (at a specific position) 00651 and returns a carry (if it was) 00652 00653 e.g. 00654 00655 if we've got (value_size=3): 00656 table[0] = 10; 00657 table[1] = 30; 00658 table[2] = 5; 00659 and we call: 00660 SubInt(2,1) 00661 then it'll be: 00662 table[0] = 10; 00663 table[1] = 30 - 2; 00664 table[2] = 5; 00665 00666 of course if there was a carry from table[2] it would be returned 00667 */ 00668 template<uint value_size> 00669 uint UInt<value_size>::SubInt(uint value, uint index) 00670 { 00671 uint b = value_size; 00672 uint * p1 = table; 00673 uint c; 00674 00675 TTMATH_ASSERT( index < value_size ) 00676 00677 #ifndef __GNUC__ 00678 00679 __asm 00680 { 00681 push eax 00682 push ebx 00683 push ecx 00684 push edx 00685 00686 mov ecx, [b] 00687 sub ecx, [index] 00688 00689 mov edx, [index] 00690 mov ebx, [p1] 00691 00692 mov eax, [value] 00693 00694 ttmath_loop: 00695 sub [ebx+edx*4], eax 00696 jnc ttmath_end 00697 00698 mov eax, 1 00699 inc edx 00700 dec ecx 00701 jnz ttmath_loop 00702 00703 ttmath_end: 00704 setc al 00705 movzx edx, al 00706 mov [c], edx 00707 00708 pop edx 00709 pop ecx 00710 pop ebx 00711 pop eax 00712 } 00713 00714 #endif 00715 00716 00717 #ifdef __GNUC__ 00718 uint dummy, dummy2; 00719 00720 __asm__ __volatile__( 00721 00722 "subl %%edx, %%ecx \n" 00723 00724 "1: \n" 00725 "subl %%eax, (%%ebx,%%edx,4) \n" 00726 "jnc 2f \n" 00727 00728 "movl $1, %%eax \n" 00729 "incl %%edx \n" 00730 "decl %%ecx \n" 00731 "jnz 1b \n" 00732 00733 "2: \n" 00734 "setc %%al \n" 00735 "movzx %%al, %%edx \n" 00736 00737 : "=d" (c), "=a" (dummy), "=c" (dummy2) 00738 : "0" (index), "1" (value), "2" (b), "b" (p1) 00739 : "cc", "memory" ); 00740 00741 #endif 00742 00743 TTMATH_LOGC("UInt::SubInt", c) 00744 00745 return c; 00746 } 00747 00748 00749 00750 /*! 00751 this static method subtractes one vector from the other 00752 'ss1' is larger in size or equal to 'ss2' 00753 00754 ss1 points to the first (larger) vector 00755 ss2 points to the second vector 00756 ss1_size - size of the ss1 (and size of the result too) 00757 ss2_size - size of the ss2 00758 result - is the result vector (which has size the same as ss1: ss1_size) 00759 00760 Example: ss1_size is 5, ss2_size is 3 00761 ss1: ss2: result (output): 00762 5 1 5-1 00763 4 3 4-3 00764 2 7 2-7 00765 6 6-1 (the borrow from previous item) 00766 9 9 00767 return (carry): 0 00768 of course the carry (borrow) is propagated and will be returned from the last item 00769 (this method is used by the Karatsuba multiplication algorithm) 00770 */ 00771 template<uint value_size> 00772 uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) 00773 { 00774 TTMATH_ASSERT( ss1_size >= ss2_size ) 00775 00776 uint rest = ss1_size - ss2_size; 00777 uint c; 00778 00779 #ifndef __GNUC__ 00780 00781 // this part might be compiled with for example visual c 00782 00783 /* 00784 the asm code is nearly the same as in AddVector 00785 only two instructions 'adc' are changed to 'sbb' 00786 */ 00787 __asm 00788 { 00789 pushad 00790 00791 mov ecx, [ss2_size] 00792 xor edx, edx // edx = 0, cf = 0 00793 00794 mov esi, [ss1] 00795 mov ebx, [ss2] 00796 mov edi, [result] 00797 00798 ttmath_loop: 00799 mov eax, [esi+edx*4] 00800 sbb eax, [ebx+edx*4] 00801 mov [edi+edx*4], eax 00802 00803 inc edx 00804 dec ecx 00805 jnz ttmath_loop 00806 00807 adc ecx, ecx // ecx has the cf state 00808 00809 mov ebx, [rest] 00810 or ebx, ebx 00811 jz ttmath_end 00812 00813 xor ebx, ebx // ebx = 0 00814 neg ecx // setting cf from ecx 00815 mov ecx, [rest] // ecx is != 0 00816 00817 ttmath_loop2: 00818 mov eax, [esi+edx*4] 00819 sbb eax, ebx 00820 mov [edi+edx*4], eax 00821 00822 inc edx 00823 dec ecx 00824 jnz ttmath_loop2 00825 00826 adc ecx, ecx 00827 00828 ttmath_end: 00829 mov [c], ecx 00830 00831 popad 00832 } 00833 00834 #endif 00835 00836 00837 #ifdef __GNUC__ 00838 00839 // this part should be compiled with gcc 00840 uint dummy1, dummy2, dummy3; 00841 00842 __asm__ __volatile__( 00843 "push %%edx \n" 00844 "xor %%edx, %%edx \n" // edx = 0, cf = 0 00845 "1: \n" 00846 "mov (%%esi,%%edx,4), %%eax \n" 00847 "sbb (%%ebx,%%edx,4), %%eax \n" 00848 "mov %%eax, (%%edi,%%edx,4) \n" 00849 00850 "inc %%edx \n" 00851 "dec %%ecx \n" 00852 "jnz 1b \n" 00853 00854 "adc %%ecx, %%ecx \n" // ecx has the cf state 00855 "pop %%eax \n" // eax = rest 00856 00857 "or %%eax, %%eax \n" 00858 "jz 3f \n" 00859 00860 "xor %%ebx, %%ebx \n" // ebx = 0 00861 "neg %%ecx \n" // setting cf from ecx 00862 "mov %%eax, %%ecx \n" // ecx=rest and is != 0 00863 "2: \n" 00864 "mov (%%esi, %%edx, 4), %%eax \n" 00865 "sbb %%ebx, %%eax \n" 00866 "mov %%eax, (%%edi, %%edx, 4) \n" 00867 00868 "inc %%edx \n" 00869 "dec %%ecx \n" 00870 "jnz 2b \n" 00871 00872 "adc %%ecx, %%ecx \n" 00873 "3: \n" 00874 00875 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3) 00876 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result) 00877 : "cc", "memory" ); 00878 00879 #endif 00880 00881 TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size) 00882 00883 return c; 00884 } 00885 00886 00887 00888 /*! 00889 this method moves all bits into the left hand side 00890 return value <- this <- c 00891 00892 the lowest *bit* will be held the 'c' and 00893 the state of one additional bit (on the left hand side) 00894 will be returned 00895 00896 for example: 00897 let this is 001010000 00898 after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0 00899 */ 00900 template<uint value_size> 00901 uint UInt<value_size>::Rcl2_one(uint c) 00902 { 00903 uint b = value_size; 00904 uint * p1 = table; 00905 00906 #ifndef __GNUC__ 00907 __asm 00908 { 00909 push ebx 00910 push ecx 00911 push edx 00912 00913 mov ebx, [p1] 00914 xor edx, edx 00915 mov ecx, [c] 00916 neg ecx 00917 mov ecx, [b] 00918 00919 ttmath_loop: 00920 rcl dword ptr [ebx+edx*4], 1 00921 00922 inc edx 00923 dec ecx 00924 jnz ttmath_loop 00925 00926 adc ecx, ecx 00927 mov [c], ecx 00928 00929 pop edx 00930 pop ecx 00931 pop ebx 00932 } 00933 #endif 00934 00935 00936 #ifdef __GNUC__ 00937 uint dummy, dummy2; 00938 00939 __asm__ __volatile__( 00940 00941 "xorl %%edx, %%edx \n" // edx=0 00942 "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 00943 00944 "1: \n" 00945 "rcll $1, (%%ebx, %%edx, 4) \n" 00946 00947 "incl %%edx \n" 00948 "decl %%ecx \n" 00949 "jnz 1b \n" 00950 00951 "adcl %%ecx, %%ecx \n" 00952 00953 : "=c" (c), "=a" (dummy), "=d" (dummy2) 00954 : "0" (b), "1" (c), "b" (p1) 00955 : "cc", "memory" ); 00956 00957 #endif 00958 00959 TTMATH_LOGC("UInt::Rcl2_one", c) 00960 00961 return c; 00962 } 00963 00964 00965 00966 /*! 00967 this method moves all bits into the right hand side 00968 c -> this -> return value 00969 00970 the highest *bit* will be held the 'c' and 00971 the state of one additional bit (on the right hand side) 00972 will be returned 00973 00974 for example: 00975 let this is 000000010 00976 after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0 00977 */ 00978 template<uint value_size> 00979 uint UInt<value_size>::Rcr2_one(uint c) 00980 { 00981 uint b = value_size; 00982 uint * p1 = table; 00983 00984 #ifndef __GNUC__ 00985 __asm 00986 { 00987 push ebx 00988 push ecx 00989 00990 mov ebx, [p1] 00991 mov ecx, [c] 00992 neg ecx 00993 mov ecx, [b] 00994 00995 ttmath_loop: 00996 rcr dword ptr [ebx+ecx*4-4], 1 00997 00998 dec ecx 00999 jnz ttmath_loop 01000 01001 adc ecx, ecx 01002 mov [c], ecx 01003 01004 pop ecx 01005 pop ebx 01006 } 01007 #endif 01008 01009 01010 #ifdef __GNUC__ 01011 uint dummy; 01012 01013 __asm__ __volatile__( 01014 01015 "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0 01016 01017 "1: \n" 01018 "rcrl $1, -4(%%ebx, %%ecx, 4) \n" 01019 01020 "decl %%ecx \n" 01021 "jnz 1b \n" 01022 01023 "adcl %%ecx, %%ecx \n" 01024 01025 : "=c" (c), "=a" (dummy) 01026 : "0" (b), "1" (c), "b" (p1) 01027 : "cc", "memory" ); 01028 01029 #endif 01030 01031 TTMATH_LOGC("UInt::Rcr2_one", c) 01032 01033 return c; 01034 } 01035 01036 01037 01038 #ifdef _MSC_VER 01039 #pragma warning (disable : 4731) 01040 //warning C4731: frame pointer register 'ebp' modified by inline assembly code 01041 #endif 01042 01043 01044 01045 /*! 01046 this method moves all bits into the left hand side 01047 return value <- this <- c 01048 01049 the lowest *bits* will be held the 'c' and 01050 the state of one additional bit (on the left hand side) 01051 will be returned 01052 01053 for example: 01054 let this is 001010000 01055 after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1 01056 */ 01057 template<uint value_size> 01058 uint UInt<value_size>::Rcl2(uint bits, uint c) 01059 { 01060 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) 01061 01062 uint b = value_size; 01063 uint * p1 = table; 01064 01065 #ifndef __GNUC__ 01066 __asm 01067 { 01068 push eax 01069 push ebx 01070 push ecx 01071 push edx 01072 push esi 01073 push edi 01074 push ebp 01075 01076 mov edi, [b] 01077 01078 mov ecx, 32 01079 sub ecx, [bits] 01080 mov edx, -1 01081 shr edx, cl 01082 01083 mov ecx, [bits] 01084 mov ebx, [p1] 01085 mov eax, [c] 01086 01087 mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables) 01088 01089 xor edx, edx // edx = 0 01090 mov esi, edx 01091 or eax, eax 01092 cmovnz esi, ebp // if(c) esi=mask else esi=0 01093 01094 ttmath_loop: 01095 rol dword ptr [ebx+edx*4], cl 01096 01097 mov eax, [ebx+edx*4] 01098 and eax, ebp 01099 xor [ebx+edx*4], eax // clearing bits 01100 or [ebx+edx*4], esi // saving old value 01101 mov esi, eax 01102 01103 inc edx 01104 dec edi 01105 jnz ttmath_loop 01106 01107 pop ebp // restoring ebp 01108 01109 and eax, 1 01110 mov [c], eax 01111 01112 pop edi 01113 pop esi 01114 pop edx 01115 pop ecx 01116 pop ebx 01117 pop eax 01118 } 01119 #endif 01120 01121 01122 #ifdef __GNUC__ 01123 uint dummy, dummy2, dummy3; 01124 01125 __asm__ __volatile__( 01126 01127 "push %%ebp \n" 01128 01129 "movl %%ecx, %%esi \n" 01130 "movl $32, %%ecx \n" 01131 "subl %%esi, %%ecx \n" // ecx = 32 - bits 01132 "movl $-1, %%edx \n" // edx = -1 (all bits set to one) 01133 "shrl %%cl, %%edx \n" // shifting (0 -> edx -> cf) (cl times) 01134 "movl %%edx, %%ebp \n" // ebp = edx = mask 01135 "movl %%esi, %%ecx \n" 01136 01137 "xorl %%edx, %%edx \n" 01138 "movl %%edx, %%esi \n" 01139 "orl %%eax, %%eax \n" 01140 "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0 01141 01142 "1: \n" 01143 "roll %%cl, (%%ebx,%%edx,4) \n" 01144 01145 "movl (%%ebx,%%edx,4), %%eax \n" 01146 "andl %%ebp, %%eax \n" 01147 "xorl %%eax, (%%ebx,%%edx,4) \n" 01148 "orl %%esi, (%%ebx,%%edx,4) \n" 01149 "movl %%eax, %%esi \n" 01150 01151 "incl %%edx \n" 01152 "decl %%edi \n" 01153 "jnz 1b \n" 01154 01155 "and $1, %%eax \n" 01156 01157 "pop %%ebp \n" 01158 01159 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3) 01160 : "0" (c), "1" (b), "b" (p1), "c" (bits) 01161 : "cc", "memory" ); 01162 01163 #endif 01164 01165 TTMATH_LOGC("UInt::Rcl2", c) 01166 01167 return c; 01168 } 01169 01170 01171 01172 01173 /*! 01174 this method moves all bits into the right hand side 01175 C -> this -> return value 01176 01177 the highest *bits* will be held the 'c' and 01178 the state of one additional bit (on the right hand side) 01179 will be returned 01180 01181 for example: 01182 let this is 000000010 01183 after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1 01184 */ 01185 template<uint value_size> 01186 uint UInt<value_size>::Rcr2(uint bits, uint c) 01187 { 01188 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) 01189 01190 uint b = value_size; 01191 uint * p1 = table; 01192 01193 #ifndef __GNUC__ 01194 __asm 01195 { 01196 push eax 01197 push ebx 01198 push ecx 01199 push edx 01200 push esi 01201 push edi 01202 push ebp 01203 01204 mov edi, [b] 01205 01206 mov ecx, 32 01207 sub ecx, [bits] 01208 mov edx, -1 01209 shl edx, cl 01210 01211 mov ecx, [bits] 01212 mov ebx, [p1] 01213 mov eax, [c] 01214 01215 mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables) 01216 01217 xor edx, edx // edx = 0 01218 mov esi, edx 01219 add edx, edi 01220 dec edx // edx is pointing at the end of the table (on last word) 01221 or eax, eax 01222 cmovnz esi, ebp // if(c) esi=mask else esi=0 01223 01224 ttmath_loop: 01225 ror dword ptr [ebx+edx*4], cl 01226 01227 mov eax, [ebx+edx*4] 01228 and eax, ebp 01229 xor [ebx+edx*4], eax // clearing bits 01230 or [ebx+edx*4], esi // saving old value 01231 mov esi, eax 01232 01233 dec edx 01234 dec edi 01235 jnz ttmath_loop 01236 01237 pop ebp // restoring ebp 01238 01239 rol eax, 1 // 31bit will be first 01240 and eax, 1 01241 mov [c], eax 01242 01243 pop edi 01244 pop esi 01245 pop edx 01246 pop ecx 01247 pop ebx 01248 pop eax 01249 } 01250 #endif 01251 01252 01253 #ifdef __GNUC__ 01254 uint dummy, dummy2, dummy3; 01255 01256 __asm__ __volatile__( 01257 01258 "push %%ebp \n" 01259 01260 "movl %%ecx, %%esi \n" 01261 "movl $32, %%ecx \n" 01262 "subl %%esi, %%ecx \n" // ecx = 32 - bits 01263 "movl $-1, %%edx \n" // edx = -1 (all bits set to one) 01264 "shll %%cl, %%edx \n" // shifting (cf <- edx <- 0) (cl times) 01265 "movl %%edx, %%ebp \n" // ebp = edx = mask 01266 "movl %%esi, %%ecx \n" 01267 01268 "xorl %%edx, %%edx \n" 01269 "movl %%edx, %%esi \n" 01270 "addl %%edi, %%edx \n" 01271 "decl %%edx \n" // edx is pointing at the end of the table (on last word) 01272 "orl %%eax, %%eax \n" 01273 "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0 01274 01275 "1: \n" 01276 "rorl %%cl, (%%ebx,%%edx,4) \n" 01277 01278 "movl (%%ebx,%%edx,4), %%eax \n" 01279 "andl %%ebp, %%eax \n" 01280 "xorl %%eax, (%%ebx,%%edx,4) \n" 01281 "orl %%esi, (%%ebx,%%edx,4) \n" 01282 "movl %%eax, %%esi \n" 01283 01284 "decl %%edx \n" 01285 "decl %%edi \n" 01286 "jnz 1b \n" 01287 01288 "roll $1, %%eax \n" 01289 "andl $1, %%eax \n" 01290 01291 "pop %%ebp \n" 01292 01293 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3) 01294 : "0" (c), "1" (b), "b" (p1), "c" (bits) 01295 : "cc", "memory" ); 01296 01297 #endif 01298 01299 TTMATH_LOGC("UInt::Rcr2", c) 01300 01301 return c; 01302 } 01303 01304 01305 #ifdef _MSC_VER 01306 #pragma warning (default : 4731) 01307 #endif 01308 01309 01310 /* 01311 this method returns the number of the highest set bit in one 32-bit word 01312 if the 'x' is zero this method returns '-1' 01313 */ 01314 template<uint value_size> 01315 sint UInt<value_size>::FindLeadingBitInWord (uint x) 01316 { 01317 sint result; 01318 01319 #ifndef __GNUC__ 01320 __asm 01321 { 01322 push eax 01323 push edx 01324 01325 mov edx,-1 01326 bsr eax,[x] 01327 cmovz eax,edx 01328 mov [result], eax 01329 01330 pop edx 01331 pop eax 01332 } 01333 #endif 01334 01335 01336 #ifdef __GNUC__ 01337 uint dummy; 01338 01339 __asm__ ( 01340 01341 "movl $-1, %1 \n" 01342 "bsrl %2, %0 \n" 01343 "cmovz %1, %0 \n" 01344 01345 : "=r" (result), "=&r" (dummy) 01346 : "r" (x) 01347 : "cc" ); 01348 01349 #endif 01350 01351 return result; 01352 } 01353 01354 01355 01356 /* 01357 this method returns the number of the smallest set bit in one 32-bit word 01358 if the 'x' is zero this method returns '-1' 01359 */ 01360 template<uint value_size> 01361 sint UInt<value_size>::FindLowestBitInWord (uint x) 01362 { 01363 sint result; 01364 01365 #ifndef __GNUC__ 01366 __asm 01367 { 01368 push eax 01369 push edx 01370 01371 mov edx,-1 01372 bsf eax,[x] 01373 cmovz eax,edx 01374 mov [result], eax 01375 01376 pop edx 01377 pop eax 01378 } 01379 #endif 01380 01381 01382 #ifdef __GNUC__ 01383 uint dummy; 01384 01385 __asm__ ( 01386 01387 "movl $-1, %1 \n" 01388 "bsfl %2, %0 \n" 01389 "cmovz %1, %0 \n" 01390 01391 : "=r" (result), "=&r" (dummy) 01392 : "r" (x) 01393 : "cc" ); 01394 01395 #endif 01396 01397 return result; 01398 } 01399 01400 01401 01402 /*! 01403 this method sets a special bit in the 'value' 01404 and returns the last state of the bit (zero or one) 01405 01406 bit is from <0,31> 01407 e.g. 01408 uint x = 100; 01409 uint bit = SetBitInWord(x, 3); 01410 now: x = 108 and bit = 0 01411 */ 01412 template<uint value_size> 01413 uint UInt<value_size>::SetBitInWord (uint & value, uint bit) 01414 { 01415 TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT ) 01416 01417 uint old_bit; 01418 uint v = value; 01419 01420 #ifndef __GNUC__ 01421 __asm 01422 { 01423 push ebx 01424 push eax 01425 01426 mov eax, [v] 01427 mov ebx, [bit] 01428 bts eax, ebx 01429 mov [v], eax 01430 01431 setc bl 01432 movzx ebx, bl 01433 mov [old_bit], ebx 01434 01435 pop eax 01436 pop ebx 01437 } 01438 #endif 01439 01440 01441 #ifdef __GNUC__ 01442 __asm__ ( 01443 01444 "btsl %%ebx, %%eax \n" 01445 "setc %%bl \n" 01446 "movzx %%bl, %%ebx \n" 01447 01448 : "=a" (v), "=b" (old_bit) 01449 : "0" (v), "1" (bit) 01450 : "cc" ); 01451 01452 #endif 01453 01454 value = v; 01455 01456 return old_bit; 01457 } 01458 01459 01460 01461 01462 /*! 01463 multiplication: result_high:result_low = a * b 01464 result_high - higher word of the result 01465 result_low - lower word of the result 01466 01467 this methos never returns a carry 01468 this method is used in the second version of the multiplication algorithms 01469 */ 01470 template<uint value_size> 01471 void UInt<value_size>::MulTwoWords (uint a, uint b, uint * result_high, uint * result_low) 01472 { 01473 /* 01474 we must use these temporary variables in order to inform the compilator 01475 that value pointed with result1 and result2 has changed 01476 01477 this has no effect in visual studio but it's useful when 01478 using gcc and options like -Ox 01479 */ 01480 uint result1_; 01481 uint result2_; 01482 01483 #ifndef __GNUC__ 01484 01485 __asm 01486 { 01487 push eax 01488 push edx 01489 01490 mov eax, [a] 01491 mul dword ptr [b] 01492 01493 mov [result2_], edx 01494 mov [result1_], eax 01495 01496 pop edx 01497 pop eax 01498 } 01499 01500 #endif 01501 01502 01503 #ifdef __GNUC__ 01504 01505 __asm__ ( 01506 01507 "mull %%edx \n" 01508 01509 : "=a" (result1_), "=d" (result2_) 01510 : "0" (a), "1" (b) 01511 : "cc" ); 01512 01513 #endif 01514 01515 01516 *result_low = result1_; 01517 *result_high = result2_; 01518 } 01519 01520 01521 01522 01523 01524 /*! 01525 * 01526 * Division 01527 * 01528 * 01529 */ 01530 01531 01532 01533 01534 /*! 01535 this method calculates 64bits word a:b / 32bits c (a higher, b lower word) 01536 r = a:b / c and rest - remainder 01537 01538 * 01539 * WARNING: 01540 * if r (one word) is too small for the result or c is equal zero 01541 * there'll be a hardware interruption (0) 01542 * and probably the end of your program 01543 * 01544 */ 01545 template<uint value_size> 01546 void UInt<value_size>::DivTwoWords (uint a, uint b, uint c, uint * r, uint * rest) 01547 { 01548 uint r_; 01549 uint rest_; 01550 /* 01551 these variables have similar meaning like those in 01552 the multiplication algorithm MulTwoWords 01553 */ 01554 01555 TTMATH_ASSERT( c != 0 ) 01556 01557 #ifndef __GNUC__ 01558 __asm 01559 { 01560 push eax 01561 push edx 01562 01563 mov edx, [a] 01564 mov eax, [b] 01565 div dword ptr [c] 01566 01567 mov [r_], eax 01568 mov [rest_], edx 01569 01570 pop edx 01571 pop eax 01572 } 01573 #endif 01574 01575 01576 #ifdef __GNUC__ 01577 01578 __asm__ ( 01579 01580 "divl %%ecx \n" 01581 01582 : "=a" (r_), "=d" (rest_) 01583 : "0" (b), "1" (a), "c" (c) 01584 : "cc" ); 01585 01586 #endif 01587 01588 01589 *r = r_; 01590 *rest = rest_; 01591 01592 } 01593 01594 01595 01596 } //namespace 01597 01598 01599 01600 #endif //ifdef TTMATH_PLATFORM32 01601 #endif //ifndef TTMATH_NOASM 01602 #endif 01603
Generated on Tue Jul 12 2022 14:03:18 by 1.7.2