Library for big numbers from http://www.ttmath.org/
Dependents: PIDHeater82 Conceptcontroller_v_1_0 AlarmClockApp COG4050_adxl355_tilt ... more
ttmathuint_x86_64.h
00001 /* 00002 * This file is a part of TTMath Bignum Library 00003 * and is distributed under the (new) BSD licence. 00004 * Author: Tomasz Sowa <t.sowa@ttmath.org> 00005 */ 00006 00007 /* 00008 * Copyright (c) 2006-2010, Tomasz Sowa 00009 * All rights reserved. 00010 * 00011 * Redistribution and use in source and binary forms, with or without 00012 * modification, are permitted provided that the following conditions are met: 00013 * 00014 * * Redistributions of source code must retain the above copyright notice, 00015 * this list of conditions and the following disclaimer. 00016 * 00017 * * Redistributions in binary form must reproduce the above copyright 00018 * notice, this list of conditions and the following disclaimer in the 00019 * documentation and/or other materials provided with the distribution. 00020 * 00021 * * Neither the name Tomasz Sowa nor the names of contributors to this 00022 * project may be used to endorse or promote products derived 00023 * from this software without specific prior written permission. 00024 * 00025 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00026 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00028 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00029 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00030 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00031 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00032 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00033 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00034 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 00035 * THE POSSIBILITY OF SUCH DAMAGE. 00036 */ 00037 00038 00039 #ifndef headerfilettmathuint_x86_64 00040 #define headerfilettmathuint_x86_64 00041 00042 00043 #ifndef TTMATH_NOASM 00044 #ifdef TTMATH_PLATFORM64 00045 00046 00047 /*! 00048 \file ttmathuint_x86_64.h 00049 \brief template class UInt<uint> with assembler code for 64bit x86_64 processors 00050 00051 this file is included at the end of ttmathuint.h 00052 */ 00053 00054 #ifndef __GNUC__ 00055 #include <intrin.h> 00056 #endif 00057 00058 00059 namespace ttmath 00060 { 00061 00062 #ifndef __GNUC__ 00063 00064 extern "C" 00065 { 00066 uint __fastcall ttmath_adc_x64(uint * p1, const uint * p2, uint nSize, uint c); 00067 uint __fastcall ttmath_addindexed_x64(uint * p1, uint nSize, uint nPos, uint nValue); 00068 uint __fastcall ttmath_addindexed2_x64(uint * p1, uint nSize, uint nPos, uint nValue1, uint nValue2); 00069 uint __fastcall ttmath_addvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result); 00070 uint __fastcall ttmath_sbb_x64(uint * p1, const uint * p2, uint nSize, uint c); 00071 uint __fastcall ttmath_subindexed_x64(uint * p1, uint nSize, uint nPos, uint nValue); 00072 uint __fastcall ttmath_subvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result); 00073 uint __fastcall ttmath_rcl_x64(uint * p1, uint nSize, uint nLowestBit); 00074 uint __fastcall ttmath_rcr_x64(uint * p1, uint nSize, uint nLowestBit); 00075 uint __fastcall ttmath_div_x64(uint * pnValHi, uint * pnValLo, uint nDiv); 00076 uint __fastcall ttmath_rcl2_x64(uint * p1, uint nSize, uint nBits, uint c); 00077 uint __fastcall ttmath_rcr2_x64(uint * p1, uint nSize, uint nBits, uint c); 00078 }; 00079 #endif 00080 00081 00082 /*! 00083 returning the string represents the currect type of the library 00084 we have following types: 00085 asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits) 00086 asm_gcc_32 - with asm code designed for GCC (32 bits) 00087 asm_vc_64 - with asm for VC (64 bit) 00088 asm_gcc_64 - with asm for GCC (64 bit) 00089 no_asm_32 - pure C++ version (32 bit) - without any asm code 00090 no_asm_64 - pure C++ version (64 bit) - without any asm code 00091 */ 00092 template<uint value_size> 00093 const char * UInt<value_size>::LibTypeStr () 00094 { 00095 #ifndef __GNUC__ 00096 static const char info[] = "asm_vc_64"; 00097 #endif 00098 00099 #ifdef __GNUC__ 00100 static const char info[] = "asm_gcc_64"; 00101 #endif 00102 00103 return info; 00104 } 00105 00106 00107 /*! 00108 returning the currect type of the library 00109 */ 00110 template<uint value_size> 00111 LibTypeCode UInt<value_size>::LibType () 00112 { 00113 #ifndef __GNUC__ 00114 LibTypeCode info = asm_vc_64; 00115 #endif 00116 00117 #ifdef __GNUC__ 00118 LibTypeCode info = asm_gcc_64; 00119 #endif 00120 00121 return info; 00122 } 00123 00124 00125 /*! 00126 * 00127 * basic mathematic functions 00128 * 00129 */ 00130 00131 00132 00133 /*! 00134 this method adding ss2 to the this and adding carry if it's defined 00135 (this = this + ss2 + c) 00136 00137 ***this method is created only on a 64bit platform*** 00138 00139 c must be zero or one (might be a bigger value than 1) 00140 function returns carry (1) (if it was) 00141 */ 00142 template<uint value_size> 00143 uint UInt<value_size>::Add (const UInt<value_size> & ss2, uint c) 00144 { 00145 uint b = value_size; 00146 uint * p1 = table; 00147 const uint * p2 = ss2.table; 00148 00149 // we don't have to use TTMATH_REFERENCE_ASSERT here 00150 // this algorithm doesn't require it 00151 00152 #ifndef __GNUC__ 00153 c = ttmath_adc_x64(p1,p2,b,c); 00154 #endif 00155 00156 #ifdef __GNUC__ 00157 uint dummy, dummy2; 00158 00159 /* 00160 this part should be compiled with gcc 00161 */ 00162 __asm__ __volatile__( 00163 00164 "xorq %%rdx, %%rdx \n" 00165 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 00166 00167 "1: \n" 00168 "movq (%%rsi,%%rdx,8), %%rax \n" 00169 "adcq %%rax, (%%rbx,%%rdx,8) \n" 00170 00171 "incq %%rdx \n" 00172 "decq %%rcx \n" 00173 "jnz 1b \n" 00174 00175 "adcq %%rcx, %%rcx \n" 00176 00177 : "=c" (c), "=a" (dummy), "=d" (dummy2) 00178 : "0" (b), "1" (c), "b" (p1), "S" (p2) 00179 : "cc", "memory" ); 00180 00181 #endif 00182 00183 TTMATH_LOGC("UInt::Add", c) 00184 00185 return c; 00186 } 00187 00188 00189 00190 /*! 00191 this method adds one word (at a specific position) 00192 and returns a carry (if it was) 00193 00194 ***this method is created only on a 64bit platform*** 00195 00196 00197 if we've got (value_size=3): 00198 table[0] = 10; 00199 table[1] = 30; 00200 table[2] = 5; 00201 and we call: 00202 AddInt(2,1) 00203 then it'll be: 00204 table[0] = 10; 00205 table[1] = 30 + 2; 00206 table[2] = 5; 00207 00208 of course if there was a carry from table[2] it would be returned 00209 */ 00210 template<uint value_size> 00211 uint UInt<value_size>::AddInt(uint value, uint index) 00212 { 00213 uint b = value_size; 00214 uint * p1 = table; 00215 uint c; 00216 00217 TTMATH_ASSERT( index < value_size ) 00218 00219 #ifndef __GNUC__ 00220 c = ttmath_addindexed_x64(p1,b,index,value); 00221 #endif 00222 00223 00224 #ifdef __GNUC__ 00225 uint dummy, dummy2; 00226 00227 __asm__ __volatile__( 00228 00229 "subq %%rdx, %%rcx \n" 00230 00231 "1: \n" 00232 "addq %%rax, (%%rbx,%%rdx,8) \n" 00233 "jnc 2f \n" 00234 00235 "movq $1, %%rax \n" 00236 "incq %%rdx \n" 00237 "decq %%rcx \n" 00238 "jnz 1b \n" 00239 00240 "2: \n" 00241 "setc %%al \n" 00242 "movzx %%al, %%rdx \n" 00243 00244 : "=d" (c), "=a" (dummy), "=c" (dummy2) 00245 : "0" (index), "1" (value), "2" (b), "b" (p1) 00246 : "cc", "memory" ); 00247 00248 #endif 00249 00250 TTMATH_LOGC("UInt::AddInt", c) 00251 00252 return c; 00253 } 00254 00255 00256 00257 /*! 00258 this method adds only two unsigned words to the existing value 00259 and these words begin on the 'index' position 00260 (it's used in the multiplication algorithm 2) 00261 00262 ***this method is created only on a 64bit platform*** 00263 00264 index should be equal or smaller than value_size-2 (index <= value_size-2) 00265 x1 - lower word, x2 - higher word 00266 00267 for example if we've got value_size equal 4 and: 00268 table[0] = 3 00269 table[1] = 4 00270 table[2] = 5 00271 table[3] = 6 00272 then let 00273 x1 = 10 00274 x2 = 20 00275 and 00276 index = 1 00277 00278 the result of this method will be: 00279 table[0] = 3 00280 table[1] = 4 + x1 = 14 00281 table[2] = 5 + x2 = 25 00282 table[3] = 6 00283 00284 and no carry at the end of table[3] 00285 00286 (of course if there was a carry in table[2](5+20) then 00287 this carry would be passed to the table[3] etc.) 00288 */ 00289 template<uint value_size> 00290 uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index) 00291 { 00292 uint b = value_size; 00293 uint * p1 = table; 00294 uint c; 00295 00296 TTMATH_ASSERT( index < value_size - 1 ) 00297 00298 #ifndef __GNUC__ 00299 c = ttmath_addindexed2_x64(p1,b,index,x1,x2); 00300 #endif 00301 00302 00303 #ifdef __GNUC__ 00304 uint dummy, dummy2; 00305 00306 __asm__ __volatile__( 00307 00308 "subq %%rdx, %%rcx \n" 00309 00310 "addq %%rsi, (%%rbx,%%rdx,8) \n" 00311 "incq %%rdx \n" 00312 "decq %%rcx \n" 00313 00314 "1: \n" 00315 "adcq %%rax, (%%rbx,%%rdx,8) \n" 00316 "jnc 2f \n" 00317 00318 "mov $0, %%rax \n" 00319 "incq %%rdx \n" 00320 "decq %%rcx \n" 00321 "jnz 1b \n" 00322 00323 "2: \n" 00324 "setc %%al \n" 00325 "movzx %%al, %%rax \n" 00326 00327 : "=a" (c), "=c" (dummy), "=d" (dummy2) 00328 : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1) 00329 : "cc", "memory" ); 00330 00331 #endif 00332 00333 TTMATH_LOGC("UInt::AddTwoInts", c) 00334 00335 return c; 00336 } 00337 00338 00339 00340 /*! 00341 this static method addes one vector to the other 00342 'ss1' is larger in size or equal to 'ss2' 00343 00344 ss1 points to the first (larger) vector 00345 ss2 points to the second vector 00346 ss1_size - size of the ss1 (and size of the result too) 00347 ss2_size - size of the ss2 00348 result - is the result vector (which has size the same as ss1: ss1_size) 00349 00350 Example: ss1_size is 5, ss2_size is 3 00351 ss1: ss2: result (output): 00352 5 1 5+1 00353 4 3 4+3 00354 2 7 2+7 00355 6 6 00356 9 9 00357 of course the carry is propagated and will be returned from the last item 00358 (this method is used by the Karatsuba multiplication algorithm) 00359 */ 00360 template<uint value_size> 00361 uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) 00362 { 00363 TTMATH_ASSERT( ss1_size >= ss2_size ) 00364 00365 uint c; 00366 00367 #ifndef __GNUC__ 00368 c = ttmath_addvector_x64(ss1, ss2, ss1_size, ss2_size, result); 00369 #endif 00370 00371 00372 #ifdef __GNUC__ 00373 uint dummy1, dummy2, dummy3; 00374 uint rest = ss1_size - ss2_size; 00375 00376 // this part should be compiled with gcc 00377 00378 __asm__ __volatile__( 00379 "mov %%rdx, %%r8 \n" 00380 "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0 00381 "1: \n" 00382 "mov (%%rsi,%%rdx,8), %%rax \n" 00383 "adc (%%rbx,%%rdx,8), %%rax \n" 00384 "mov %%rax, (%%rdi,%%rdx,8) \n" 00385 00386 "inc %%rdx \n" 00387 "dec %%rcx \n" 00388 "jnz 1b \n" 00389 00390 "adc %%rcx, %%rcx \n" // rcx has the cf state 00391 00392 "or %%r8, %%r8 \n" 00393 "jz 3f \n" 00394 00395 "xor %%rbx, %%rbx \n" // ebx = 0 00396 "neg %%rcx \n" // setting cf from rcx 00397 "mov %%r8, %%rcx \n" // rcx=rest and is != 0 00398 "2: \n" 00399 "mov (%%rsi, %%rdx, 8), %%rax \n" 00400 "adc %%rbx, %%rax \n" 00401 "mov %%rax, (%%rdi, %%rdx, 8) \n" 00402 00403 "inc %%rdx \n" 00404 "dec %%rcx \n" 00405 "jnz 2b \n" 00406 00407 "adc %%rcx, %%rcx \n" 00408 "3: \n" 00409 00410 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3) 00411 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result) 00412 : "%r8", "cc", "memory" ); 00413 00414 #endif 00415 00416 TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size) 00417 00418 return c; 00419 } 00420 00421 00422 00423 /*! 00424 this method's subtracting ss2 from the 'this' and subtracting 00425 carry if it has been defined 00426 (this = this - ss2 - c) 00427 00428 ***this method is created only on a 64bit platform*** 00429 00430 c must be zero or one (might be a bigger value than 1) 00431 function returns carry (1) (if it was) 00432 */ 00433 template<uint value_size> 00434 uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c) 00435 { 00436 uint b = value_size; 00437 uint * p1 = table; 00438 const uint * p2 = ss2.table; 00439 00440 // we don't have to use TTMATH_REFERENCE_ASSERT here 00441 // this algorithm doesn't require it 00442 00443 #ifndef __GNUC__ 00444 c = ttmath_sbb_x64(p1,p2,b,c); 00445 #endif 00446 00447 00448 #ifdef __GNUC__ 00449 uint dummy, dummy2; 00450 00451 __asm__ __volatile__( 00452 00453 "xorq %%rdx, %%rdx \n" 00454 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 00455 00456 "1: \n" 00457 "movq (%%rsi,%%rdx,8), %%rax \n" 00458 "sbbq %%rax, (%%rbx,%%rdx,8) \n" 00459 00460 "incq %%rdx \n" 00461 "decq %%rcx \n" 00462 "jnz 1b \n" 00463 00464 "adcq %%rcx, %%rcx \n" 00465 00466 : "=c" (c), "=a" (dummy), "=d" (dummy2) 00467 : "0" (b), "1" (c), "b" (p1), "S" (p2) 00468 : "cc", "memory" ); 00469 00470 #endif 00471 00472 TTMATH_LOGC("UInt::Sub", c) 00473 00474 return c; 00475 } 00476 00477 00478 00479 /*! 00480 this method subtracts one word (at a specific position) 00481 and returns a carry (if it was) 00482 00483 ***this method is created only on a 64bit platform*** 00484 00485 if we've got (value_size=3): 00486 table[0] = 10; 00487 table[1] = 30; 00488 table[2] = 5; 00489 and we call: 00490 SubInt(2,1) 00491 then it'll be: 00492 table[0] = 10; 00493 table[1] = 30 - 2; 00494 table[2] = 5; 00495 00496 of course if there was a carry from table[2] it would be returned 00497 */ 00498 template<uint value_size> 00499 uint UInt<value_size>::SubInt(uint value, uint index) 00500 { 00501 uint b = value_size; 00502 uint * p1 = table; 00503 uint c; 00504 00505 TTMATH_ASSERT( index < value_size ) 00506 00507 #ifndef __GNUC__ 00508 c = ttmath_subindexed_x64(p1,b,index,value); 00509 #endif 00510 00511 00512 #ifdef __GNUC__ 00513 uint dummy, dummy2; 00514 00515 __asm__ __volatile__( 00516 00517 "subq %%rdx, %%rcx \n" 00518 00519 "1: \n" 00520 "subq %%rax, (%%rbx,%%rdx,8) \n" 00521 "jnc 2f \n" 00522 00523 "movq $1, %%rax \n" 00524 "incq %%rdx \n" 00525 "decq %%rcx \n" 00526 "jnz 1b \n" 00527 00528 "2: \n" 00529 "setc %%al \n" 00530 "movzx %%al, %%rdx \n" 00531 00532 : "=d" (c), "=a" (dummy), "=c" (dummy2) 00533 : "0" (index), "1" (value), "2" (b), "b" (p1) 00534 : "cc", "memory" ); 00535 00536 #endif 00537 00538 TTMATH_LOGC("UInt::SubInt", c) 00539 00540 return c; 00541 } 00542 00543 00544 /*! 00545 this static method subtractes one vector from the other 00546 'ss1' is larger in size or equal to 'ss2' 00547 00548 ss1 points to the first (larger) vector 00549 ss2 points to the second vector 00550 ss1_size - size of the ss1 (and size of the result too) 00551 ss2_size - size of the ss2 00552 result - is the result vector (which has size the same as ss1: ss1_size) 00553 00554 Example: ss1_size is 5, ss2_size is 3 00555 ss1: ss2: result (output): 00556 5 1 5-1 00557 4 3 4-3 00558 2 7 2-7 00559 6 6-1 (the borrow from previous item) 00560 9 9 00561 return (carry): 0 00562 of course the carry (borrow) is propagated and will be returned from the last item 00563 (this method is used by the Karatsuba multiplication algorithm) 00564 */ 00565 template<uint value_size> 00566 uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result) 00567 { 00568 TTMATH_ASSERT( ss1_size >= ss2_size ) 00569 00570 uint c; 00571 00572 #ifndef __GNUC__ 00573 c = ttmath_subvector_x64(ss1, ss2, ss1_size, ss2_size, result); 00574 #endif 00575 00576 00577 #ifdef __GNUC__ 00578 00579 // the asm code is nearly the same as in AddVector 00580 // only two instructions 'adc' are changed to 'sbb' 00581 00582 uint dummy1, dummy2, dummy3; 00583 uint rest = ss1_size - ss2_size; 00584 00585 __asm__ __volatile__( 00586 "mov %%rdx, %%r8 \n" 00587 "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0 00588 "1: \n" 00589 "mov (%%rsi,%%rdx,8), %%rax \n" 00590 "sbb (%%rbx,%%rdx,8), %%rax \n" 00591 "mov %%rax, (%%rdi,%%rdx,8) \n" 00592 00593 "inc %%rdx \n" 00594 "dec %%rcx \n" 00595 "jnz 1b \n" 00596 00597 "adc %%rcx, %%rcx \n" // rcx has the cf state 00598 00599 "or %%r8, %%r8 \n" 00600 "jz 3f \n" 00601 00602 "xor %%rbx, %%rbx \n" // ebx = 0 00603 "neg %%rcx \n" // setting cf from rcx 00604 "mov %%r8, %%rcx \n" // rcx=rest and is != 0 00605 "2: \n" 00606 "mov (%%rsi, %%rdx, 8), %%rax \n" 00607 "sbb %%rbx, %%rax \n" 00608 "mov %%rax, (%%rdi, %%rdx, 8) \n" 00609 00610 "inc %%rdx \n" 00611 "dec %%rcx \n" 00612 "jnz 2b \n" 00613 00614 "adc %%rcx, %%rcx \n" 00615 "3: \n" 00616 00617 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3) 00618 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result) 00619 : "%r8", "cc", "memory" ); 00620 00621 #endif 00622 00623 TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size) 00624 00625 return c; 00626 } 00627 00628 00629 /*! 00630 this method moves all bits into the left hand side 00631 return value <- this <- c 00632 00633 the lowest *bit* will be held the 'c' and 00634 the state of one additional bit (on the left hand side) 00635 will be returned 00636 00637 for example: 00638 let this is 001010000 00639 after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0 00640 00641 ***this method is created only on a 64bit platform*** 00642 */ 00643 template<uint value_size> 00644 uint UInt<value_size>::Rcl2_one(uint c) 00645 { 00646 sint b = value_size; 00647 uint * p1 = table; 00648 00649 00650 #ifndef __GNUC__ 00651 c = ttmath_rcl_x64(p1,b,c); 00652 #endif 00653 00654 00655 #ifdef __GNUC__ 00656 uint dummy, dummy2; 00657 00658 __asm__ __volatile__( 00659 00660 "xorq %%rdx, %%rdx \n" // rdx=0 00661 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 00662 00663 "1: \n" 00664 "rclq $1, (%%rbx, %%rdx, 8) \n" 00665 00666 "incq %%rdx \n" 00667 "decq %%rcx \n" 00668 "jnz 1b \n" 00669 00670 "adcq %%rcx, %%rcx \n" 00671 00672 : "=c" (c), "=a" (dummy), "=d" (dummy2) 00673 : "0" (b), "1" (c), "b" (p1) 00674 : "cc", "memory" ); 00675 00676 #endif 00677 00678 TTMATH_LOGC("UInt::Rcl2_one", c) 00679 00680 return c; 00681 } 00682 00683 00684 /*! 00685 this method moves all bits into the right hand side 00686 c -> this -> return value 00687 00688 the highest *bit* will be held the 'c' and 00689 the state of one additional bit (on the right hand side) 00690 will be returned 00691 00692 for example: 00693 let this is 000000010 00694 after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0 00695 00696 ***this method is created only on a 64bit platform*** 00697 */ 00698 template<uint value_size> 00699 uint UInt<value_size>::Rcr2_one(uint c) 00700 { 00701 sint b = value_size; 00702 uint * p1 = table; 00703 00704 00705 #ifndef __GNUC__ 00706 c = ttmath_rcr_x64(p1,b,c); 00707 #endif 00708 00709 00710 #ifdef __GNUC__ 00711 uint dummy; 00712 00713 __asm__ __volatile__( 00714 00715 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0 00716 00717 "1: \n" 00718 "rcrq $1, -8(%%rbx, %%rcx, 8) \n" 00719 00720 "decq %%rcx \n" 00721 "jnz 1b \n" 00722 00723 "adcq %%rcx, %%rcx \n" 00724 00725 : "=c" (c), "=a" (dummy) 00726 : "0" (b), "1" (c), "b" (p1) 00727 : "cc", "memory" ); 00728 00729 #endif 00730 00731 TTMATH_LOGC("UInt::Rcr2_one", c) 00732 00733 return c; 00734 } 00735 00736 00737 00738 /*! 00739 this method moves all bits into the left hand side 00740 return value <- this <- c 00741 00742 the lowest *bits* will be held the 'c' and 00743 the state of one additional bit (on the left hand side) 00744 will be returned 00745 00746 for example: 00747 let this is 001010000 00748 after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1 00749 00750 ***this method is created only on a 64bit platform*** 00751 */ 00752 template<uint value_size> 00753 uint UInt<value_size>::Rcl2(uint bits, uint c) 00754 { 00755 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) 00756 00757 uint b = value_size; 00758 uint * p1 = table; 00759 00760 00761 #ifndef __GNUC__ 00762 c = ttmath_rcl2_x64(p1,b,bits,c); 00763 #endif 00764 00765 00766 #ifdef __GNUC__ 00767 uint dummy, dummy2, dummy3; 00768 00769 __asm__ __volatile__( 00770 00771 "movq %%rcx, %%rsi \n" 00772 "movq $64, %%rcx \n" 00773 "subq %%rsi, %%rcx \n" 00774 "movq $-1, %%rdx \n" 00775 "shrq %%cl, %%rdx \n" 00776 "movq %%rdx, %%r8 \n" 00777 "movq %%rsi, %%rcx \n" 00778 00779 "xorq %%rdx, %%rdx \n" 00780 "movq %%rdx, %%rsi \n" 00781 "orq %%rax, %%rax \n" 00782 "cmovnz %%r8, %%rsi \n" 00783 00784 "1: \n" 00785 "rolq %%cl, (%%rbx,%%rdx,8) \n" 00786 00787 "movq (%%rbx,%%rdx,8), %%rax \n" 00788 "andq %%r8, %%rax \n" 00789 "xorq %%rax, (%%rbx,%%rdx,8) \n" 00790 "orq %%rsi, (%%rbx,%%rdx,8) \n" 00791 "movq %%rax, %%rsi \n" 00792 00793 "incq %%rdx \n" 00794 "decq %%rdi \n" 00795 "jnz 1b \n" 00796 00797 "and $1, %%rax \n" 00798 00799 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3) 00800 : "0" (c), "1" (b), "b" (p1), "c" (bits) 00801 : "%r8", "cc", "memory" ); 00802 00803 #endif 00804 00805 TTMATH_LOGC("UInt::Rcl2", c) 00806 00807 return c; 00808 } 00809 00810 00811 /*! 00812 this method moves all bits into the right hand side 00813 C -> this -> return value 00814 00815 the highest *bits* will be held the 'c' and 00816 the state of one additional bit (on the right hand side) 00817 will be returned 00818 00819 for example: 00820 let this is 000000010 00821 after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1 00822 00823 ***this method is created only on a 64bit platform*** 00824 */ 00825 template<uint value_size> 00826 uint UInt<value_size>::Rcr2(uint bits, uint c) 00827 { 00828 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT ) 00829 00830 sint b = value_size; 00831 uint * p1 = table; 00832 00833 00834 #ifndef __GNUC__ 00835 c = ttmath_rcr2_x64(p1,b,bits,c); 00836 #endif 00837 00838 00839 #ifdef __GNUC__ 00840 uint dummy, dummy2, dummy3; 00841 00842 __asm__ __volatile__( 00843 00844 "movq %%rcx, %%rsi \n" 00845 "movq $64, %%rcx \n" 00846 "subq %%rsi, %%rcx \n" 00847 "movq $-1, %%rdx \n" 00848 "shlq %%cl, %%rdx \n" 00849 "movq %%rdx, %%R8 \n" 00850 "movq %%rsi, %%rcx \n" 00851 00852 "xorq %%rdx, %%rdx \n" 00853 "movq %%rdx, %%rsi \n" 00854 "addq %%rdi, %%rdx \n" 00855 "decq %%rdx \n" 00856 "orq %%rax, %%rax \n" 00857 "cmovnz %%R8, %%rsi \n" 00858 00859 "1: \n" 00860 "rorq %%cl, (%%rbx,%%rdx,8) \n" 00861 00862 "movq (%%rbx,%%rdx,8), %%rax \n" 00863 "andq %%R8, %%rax \n" 00864 "xorq %%rax, (%%rbx,%%rdx,8) \n" 00865 "orq %%rsi, (%%rbx,%%rdx,8) \n" 00866 "movq %%rax, %%rsi \n" 00867 00868 "decq %%rdx \n" 00869 "decq %%rdi \n" 00870 "jnz 1b \n" 00871 00872 "rolq $1, %%rax \n" 00873 "andq $1, %%rax \n" 00874 00875 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3) 00876 : "0" (c), "1" (b), "b" (p1), "c" (bits) 00877 : "%r8", "cc", "memory" ); 00878 00879 #endif 00880 00881 TTMATH_LOGC("UInt::Rcr2", c) 00882 00883 return c; 00884 } 00885 00886 00887 /* 00888 this method returns the number of the highest set bit in one 64-bit word 00889 if the 'x' is zero this method returns '-1' 00890 00891 ***this method is created only on a 64bit platform*** 00892 */ 00893 template<uint value_size> 00894 sint UInt<value_size>::FindLeadingBitInWord(uint x) 00895 { 00896 sint result; 00897 00898 00899 #ifndef __GNUC__ 00900 00901 unsigned long nIndex = 0; 00902 00903 if( _BitScanReverse64(&nIndex,x) == 0 ) 00904 result = -1; 00905 else 00906 result = nIndex; 00907 00908 #endif 00909 00910 00911 #ifdef __GNUC__ 00912 uint dummy; 00913 00914 __asm__ ( 00915 00916 "movq $-1, %1 \n" 00917 "bsrq %2, %0 \n" 00918 "cmovz %1, %0 \n" 00919 00920 : "=r" (result), "=&r" (dummy) 00921 : "r" (x) 00922 : "cc" ); 00923 00924 #endif 00925 00926 00927 return result; 00928 } 00929 00930 00931 /* 00932 this method returns the number of the highest set bit in one 64-bit word 00933 if the 'x' is zero this method returns '-1' 00934 00935 ***this method is created only on a 64bit platform*** 00936 */ 00937 template<uint value_size> 00938 sint UInt<value_size>::FindLowestBitInWord (uint x) 00939 { 00940 sint result; 00941 00942 00943 #ifndef __GNUC__ 00944 00945 unsigned long nIndex = 0; 00946 00947 if( _BitScanForward64(&nIndex,x) == 0 ) 00948 result = -1; 00949 else 00950 result = nIndex; 00951 00952 #endif 00953 00954 00955 #ifdef __GNUC__ 00956 uint dummy; 00957 00958 __asm__ ( 00959 00960 "movq $-1, %1 \n" 00961 "bsfq %2, %0 \n" 00962 "cmovz %1, %0 \n" 00963 00964 : "=r" (result), "=&r" (dummy) 00965 : "r" (x) 00966 : "cc" ); 00967 00968 #endif 00969 00970 00971 return result; 00972 } 00973 00974 00975 /*! 00976 this method sets a special bit in the 'value' 00977 and returns the last state of the bit (zero or one) 00978 00979 ***this method is created only on a 64bit platform*** 00980 00981 bit is from <0,63> 00982 00983 e.g. 00984 uint x = 100; 00985 uint bit = SetBitInWord(x, 3); 00986 now: x = 108 and bit = 0 00987 */ 00988 template<uint value_size> 00989 uint UInt<value_size>::SetBitInWord (uint & value, uint bit) 00990 { 00991 TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT ) 00992 00993 uint old_bit; 00994 uint v = value; 00995 00996 00997 #ifndef __GNUC__ 00998 old_bit = _bittestandset64((__int64*)&value,bit) != 0; 00999 #endif 01000 01001 01002 #ifdef __GNUC__ 01003 01004 __asm__ ( 01005 01006 "btsq %%rbx, %%rax \n" 01007 "setc %%bl \n" 01008 "movzx %%bl, %%rbx \n" 01009 01010 : "=a" (v), "=b" (old_bit) 01011 : "0" (v), "1" (bit) 01012 : "cc" ); 01013 01014 #endif 01015 01016 value = v; 01017 01018 return old_bit; 01019 } 01020 01021 01022 /*! 01023 * 01024 * Multiplication 01025 * 01026 * 01027 */ 01028 01029 01030 /*! 01031 multiplication: result_high:result_low = a * b 01032 result_high - higher word of the result 01033 result_low - lower word of the result 01034 01035 this methos never returns a carry 01036 this method is used in the second version of the multiplication algorithms 01037 01038 ***this method is created only on a 64bit platform*** 01039 */ 01040 template<uint value_size> 01041 void UInt<value_size>::MulTwoWords (uint a, uint b, uint * result_high, uint * result_low) 01042 { 01043 /* 01044 we must use these temporary variables in order to inform the compilator 01045 that value pointed with result1 and result2 has changed 01046 01047 this has no effect in visual studio but it's usefull when 01048 using gcc and options like -O 01049 */ 01050 uint result1_; 01051 uint result2_; 01052 01053 01054 #ifndef __GNUC__ 01055 result1_ = _umul128(a,b,&result2_); 01056 #endif 01057 01058 01059 #ifdef __GNUC__ 01060 01061 __asm__ ( 01062 01063 "mulq %%rdx \n" 01064 01065 : "=a" (result1_), "=d" (result2_) 01066 : "0" (a), "1" (b) 01067 : "cc" ); 01068 01069 #endif 01070 01071 01072 *result_low = result1_; 01073 *result_high = result2_; 01074 } 01075 01076 01077 01078 01079 /*! 01080 * 01081 * Division 01082 * 01083 * 01084 */ 01085 01086 01087 /*! 01088 this method calculates 64bits word a:b / 32bits c (a higher, b lower word) 01089 r = a:b / c and rest - remainder 01090 01091 ***this method is created only on a 64bit platform*** 01092 01093 * 01094 * WARNING: 01095 * if r (one word) is too small for the result or c is equal zero 01096 * there'll be a hardware interruption (0) 01097 * and probably the end of your program 01098 * 01099 */ 01100 template<uint value_size> 01101 void UInt<value_size>::DivTwoWords (uint a,uint b, uint c, uint * r, uint * rest) 01102 { 01103 uint r_; 01104 uint rest_; 01105 /* 01106 these variables have similar meaning like those in 01107 the multiplication algorithm MulTwoWords 01108 */ 01109 01110 TTMATH_ASSERT( c != 0 ) 01111 01112 01113 #ifndef __GNUC__ 01114 01115 ttmath_div_x64(&a,&b,c); 01116 r_ = a; 01117 rest_ = b; 01118 01119 #endif 01120 01121 01122 #ifdef __GNUC__ 01123 01124 __asm__ ( 01125 01126 "divq %%rcx \n" 01127 01128 : "=a" (r_), "=d" (rest_) 01129 : "d" (a), "a" (b), "c" (c) 01130 : "cc" ); 01131 01132 #endif 01133 01134 01135 *r = r_; 01136 *rest = rest_; 01137 } 01138 01139 } //namespace 01140 01141 01142 #endif //ifdef TTMATH_PLATFORM64 01143 #endif //ifndef TTMATH_NOASM 01144 #endif 01145 01146 01147
Generated on Tue Jul 12 2022 14:03:18 by 1.7.2