Library for big numbers from http://www.ttmath.org/

Dependents:   PIDHeater82 Conceptcontroller_v_1_0 AlarmClockApp COG4050_adxl355_tilt ... more

TTMath is a small library which allows one to perform arithmetic operations with big unsigned integer, big signed integer and big floating point numbers. It provides standard mathematical operations like adding, subtracting, multiplying, dividing.

TTMath is BSD Licensed (new/modified BSD)

For more information about ttmath see http://www.ttmath.org/

Committer:
stevep
Date:
Tue Jul 30 18:43:48 2013 +0000
Revision:
0:04a9f72bbca7
v0.9.3 of ttmath

Who changed what in which revision?

UserRevisionLine numberNew contents of line
stevep 0:04a9f72bbca7 1 /*
stevep 0:04a9f72bbca7 2 * This file is a part of TTMath Bignum Library
stevep 0:04a9f72bbca7 3 * and is distributed under the (new) BSD licence.
stevep 0:04a9f72bbca7 4 * Author: Tomasz Sowa <t.sowa@ttmath.org>
stevep 0:04a9f72bbca7 5 */
stevep 0:04a9f72bbca7 6
stevep 0:04a9f72bbca7 7 /*
stevep 0:04a9f72bbca7 8 * Copyright (c) 2006-2010, Tomasz Sowa
stevep 0:04a9f72bbca7 9 * All rights reserved.
stevep 0:04a9f72bbca7 10 *
stevep 0:04a9f72bbca7 11 * Redistribution and use in source and binary forms, with or without
stevep 0:04a9f72bbca7 12 * modification, are permitted provided that the following conditions are met:
stevep 0:04a9f72bbca7 13 *
stevep 0:04a9f72bbca7 14 * * Redistributions of source code must retain the above copyright notice,
stevep 0:04a9f72bbca7 15 * this list of conditions and the following disclaimer.
stevep 0:04a9f72bbca7 16 *
stevep 0:04a9f72bbca7 17 * * Redistributions in binary form must reproduce the above copyright
stevep 0:04a9f72bbca7 18 * notice, this list of conditions and the following disclaimer in the
stevep 0:04a9f72bbca7 19 * documentation and/or other materials provided with the distribution.
stevep 0:04a9f72bbca7 20 *
stevep 0:04a9f72bbca7 21 * * Neither the name Tomasz Sowa nor the names of contributors to this
stevep 0:04a9f72bbca7 22 * project may be used to endorse or promote products derived
stevep 0:04a9f72bbca7 23 * from this software without specific prior written permission.
stevep 0:04a9f72bbca7 24 *
stevep 0:04a9f72bbca7 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
stevep 0:04a9f72bbca7 26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
stevep 0:04a9f72bbca7 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
stevep 0:04a9f72bbca7 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
stevep 0:04a9f72bbca7 29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
stevep 0:04a9f72bbca7 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
stevep 0:04a9f72bbca7 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
stevep 0:04a9f72bbca7 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
stevep 0:04a9f72bbca7 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
stevep 0:04a9f72bbca7 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
stevep 0:04a9f72bbca7 35 * THE POSSIBILITY OF SUCH DAMAGE.
stevep 0:04a9f72bbca7 36 */
stevep 0:04a9f72bbca7 37
stevep 0:04a9f72bbca7 38
stevep 0:04a9f72bbca7 39 #ifndef headerfilettmathuint_x86_64
stevep 0:04a9f72bbca7 40 #define headerfilettmathuint_x86_64
stevep 0:04a9f72bbca7 41
stevep 0:04a9f72bbca7 42
stevep 0:04a9f72bbca7 43 #ifndef TTMATH_NOASM
stevep 0:04a9f72bbca7 44 #ifdef TTMATH_PLATFORM64
stevep 0:04a9f72bbca7 45
stevep 0:04a9f72bbca7 46
stevep 0:04a9f72bbca7 47 /*!
stevep 0:04a9f72bbca7 48 \file ttmathuint_x86_64.h
stevep 0:04a9f72bbca7 49 \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
stevep 0:04a9f72bbca7 50
stevep 0:04a9f72bbca7 51 this file is included at the end of ttmathuint.h
stevep 0:04a9f72bbca7 52 */
stevep 0:04a9f72bbca7 53
stevep 0:04a9f72bbca7 54 #ifndef __GNUC__
stevep 0:04a9f72bbca7 55 #include <intrin.h>
stevep 0:04a9f72bbca7 56 #endif
stevep 0:04a9f72bbca7 57
stevep 0:04a9f72bbca7 58
stevep 0:04a9f72bbca7 59 namespace ttmath
stevep 0:04a9f72bbca7 60 {
stevep 0:04a9f72bbca7 61
stevep 0:04a9f72bbca7 62 #ifndef __GNUC__
stevep 0:04a9f72bbca7 63
stevep 0:04a9f72bbca7 64 extern "C"
stevep 0:04a9f72bbca7 65 {
stevep 0:04a9f72bbca7 66 uint __fastcall ttmath_adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
stevep 0:04a9f72bbca7 67 uint __fastcall ttmath_addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
stevep 0:04a9f72bbca7 68 uint __fastcall ttmath_addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
stevep 0:04a9f72bbca7 69 uint __fastcall ttmath_addvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
stevep 0:04a9f72bbca7 70 uint __fastcall ttmath_sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
stevep 0:04a9f72bbca7 71 uint __fastcall ttmath_subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
stevep 0:04a9f72bbca7 72 uint __fastcall ttmath_subvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
stevep 0:04a9f72bbca7 73 uint __fastcall ttmath_rcl_x64(uint* p1, uint nSize, uint nLowestBit);
stevep 0:04a9f72bbca7 74 uint __fastcall ttmath_rcr_x64(uint* p1, uint nSize, uint nLowestBit);
stevep 0:04a9f72bbca7 75 uint __fastcall ttmath_div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
stevep 0:04a9f72bbca7 76 uint __fastcall ttmath_rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
stevep 0:04a9f72bbca7 77 uint __fastcall ttmath_rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
stevep 0:04a9f72bbca7 78 };
stevep 0:04a9f72bbca7 79 #endif
stevep 0:04a9f72bbca7 80
stevep 0:04a9f72bbca7 81
stevep 0:04a9f72bbca7 82 /*!
stevep 0:04a9f72bbca7 83 returning the string represents the currect type of the library
stevep 0:04a9f72bbca7 84 we have following types:
stevep 0:04a9f72bbca7 85 asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
stevep 0:04a9f72bbca7 86 asm_gcc_32 - with asm code designed for GCC (32 bits)
stevep 0:04a9f72bbca7 87 asm_vc_64 - with asm for VC (64 bit)
stevep 0:04a9f72bbca7 88 asm_gcc_64 - with asm for GCC (64 bit)
stevep 0:04a9f72bbca7 89 no_asm_32 - pure C++ version (32 bit) - without any asm code
stevep 0:04a9f72bbca7 90 no_asm_64 - pure C++ version (64 bit) - without any asm code
stevep 0:04a9f72bbca7 91 */
stevep 0:04a9f72bbca7 92 template<uint value_size>
stevep 0:04a9f72bbca7 93 const char * UInt<value_size>::LibTypeStr()
stevep 0:04a9f72bbca7 94 {
stevep 0:04a9f72bbca7 95 #ifndef __GNUC__
stevep 0:04a9f72bbca7 96 static const char info[] = "asm_vc_64";
stevep 0:04a9f72bbca7 97 #endif
stevep 0:04a9f72bbca7 98
stevep 0:04a9f72bbca7 99 #ifdef __GNUC__
stevep 0:04a9f72bbca7 100 static const char info[] = "asm_gcc_64";
stevep 0:04a9f72bbca7 101 #endif
stevep 0:04a9f72bbca7 102
stevep 0:04a9f72bbca7 103 return info;
stevep 0:04a9f72bbca7 104 }
stevep 0:04a9f72bbca7 105
stevep 0:04a9f72bbca7 106
stevep 0:04a9f72bbca7 107 /*!
stevep 0:04a9f72bbca7 108 returning the currect type of the library
stevep 0:04a9f72bbca7 109 */
stevep 0:04a9f72bbca7 110 template<uint value_size>
stevep 0:04a9f72bbca7 111 LibTypeCode UInt<value_size>::LibType()
stevep 0:04a9f72bbca7 112 {
stevep 0:04a9f72bbca7 113 #ifndef __GNUC__
stevep 0:04a9f72bbca7 114 LibTypeCode info = asm_vc_64;
stevep 0:04a9f72bbca7 115 #endif
stevep 0:04a9f72bbca7 116
stevep 0:04a9f72bbca7 117 #ifdef __GNUC__
stevep 0:04a9f72bbca7 118 LibTypeCode info = asm_gcc_64;
stevep 0:04a9f72bbca7 119 #endif
stevep 0:04a9f72bbca7 120
stevep 0:04a9f72bbca7 121 return info;
stevep 0:04a9f72bbca7 122 }
stevep 0:04a9f72bbca7 123
stevep 0:04a9f72bbca7 124
stevep 0:04a9f72bbca7 125 /*!
stevep 0:04a9f72bbca7 126 *
stevep 0:04a9f72bbca7 127 * basic mathematic functions
stevep 0:04a9f72bbca7 128 *
stevep 0:04a9f72bbca7 129 */
stevep 0:04a9f72bbca7 130
stevep 0:04a9f72bbca7 131
stevep 0:04a9f72bbca7 132
stevep 0:04a9f72bbca7 133 /*!
stevep 0:04a9f72bbca7 134 this method adding ss2 to the this and adding carry if it's defined
stevep 0:04a9f72bbca7 135 (this = this + ss2 + c)
stevep 0:04a9f72bbca7 136
stevep 0:04a9f72bbca7 137 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 138
stevep 0:04a9f72bbca7 139 c must be zero or one (might be a bigger value than 1)
stevep 0:04a9f72bbca7 140 function returns carry (1) (if it was)
stevep 0:04a9f72bbca7 141 */
stevep 0:04a9f72bbca7 142 template<uint value_size>
stevep 0:04a9f72bbca7 143 uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
stevep 0:04a9f72bbca7 144 {
stevep 0:04a9f72bbca7 145 uint b = value_size;
stevep 0:04a9f72bbca7 146 uint * p1 = table;
stevep 0:04a9f72bbca7 147 const uint * p2 = ss2.table;
stevep 0:04a9f72bbca7 148
stevep 0:04a9f72bbca7 149 // we don't have to use TTMATH_REFERENCE_ASSERT here
stevep 0:04a9f72bbca7 150 // this algorithm doesn't require it
stevep 0:04a9f72bbca7 151
stevep 0:04a9f72bbca7 152 #ifndef __GNUC__
stevep 0:04a9f72bbca7 153 c = ttmath_adc_x64(p1,p2,b,c);
stevep 0:04a9f72bbca7 154 #endif
stevep 0:04a9f72bbca7 155
stevep 0:04a9f72bbca7 156 #ifdef __GNUC__
stevep 0:04a9f72bbca7 157 uint dummy, dummy2;
stevep 0:04a9f72bbca7 158
stevep 0:04a9f72bbca7 159 /*
stevep 0:04a9f72bbca7 160 this part should be compiled with gcc
stevep 0:04a9f72bbca7 161 */
stevep 0:04a9f72bbca7 162 __asm__ __volatile__(
stevep 0:04a9f72bbca7 163
stevep 0:04a9f72bbca7 164 "xorq %%rdx, %%rdx \n"
stevep 0:04a9f72bbca7 165 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 166
stevep 0:04a9f72bbca7 167 "1: \n"
stevep 0:04a9f72bbca7 168 "movq (%%rsi,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 169 "adcq %%rax, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 170
stevep 0:04a9f72bbca7 171 "incq %%rdx \n"
stevep 0:04a9f72bbca7 172 "decq %%rcx \n"
stevep 0:04a9f72bbca7 173 "jnz 1b \n"
stevep 0:04a9f72bbca7 174
stevep 0:04a9f72bbca7 175 "adcq %%rcx, %%rcx \n"
stevep 0:04a9f72bbca7 176
stevep 0:04a9f72bbca7 177 : "=c" (c), "=a" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 178 : "0" (b), "1" (c), "b" (p1), "S" (p2)
stevep 0:04a9f72bbca7 179 : "cc", "memory" );
stevep 0:04a9f72bbca7 180
stevep 0:04a9f72bbca7 181 #endif
stevep 0:04a9f72bbca7 182
stevep 0:04a9f72bbca7 183 TTMATH_LOGC("UInt::Add", c)
stevep 0:04a9f72bbca7 184
stevep 0:04a9f72bbca7 185 return c;
stevep 0:04a9f72bbca7 186 }
stevep 0:04a9f72bbca7 187
stevep 0:04a9f72bbca7 188
stevep 0:04a9f72bbca7 189
stevep 0:04a9f72bbca7 190 /*!
stevep 0:04a9f72bbca7 191 this method adds one word (at a specific position)
stevep 0:04a9f72bbca7 192 and returns a carry (if it was)
stevep 0:04a9f72bbca7 193
stevep 0:04a9f72bbca7 194 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 195
stevep 0:04a9f72bbca7 196
stevep 0:04a9f72bbca7 197 if we've got (value_size=3):
stevep 0:04a9f72bbca7 198 table[0] = 10;
stevep 0:04a9f72bbca7 199 table[1] = 30;
stevep 0:04a9f72bbca7 200 table[2] = 5;
stevep 0:04a9f72bbca7 201 and we call:
stevep 0:04a9f72bbca7 202 AddInt(2,1)
stevep 0:04a9f72bbca7 203 then it'll be:
stevep 0:04a9f72bbca7 204 table[0] = 10;
stevep 0:04a9f72bbca7 205 table[1] = 30 + 2;
stevep 0:04a9f72bbca7 206 table[2] = 5;
stevep 0:04a9f72bbca7 207
stevep 0:04a9f72bbca7 208 of course if there was a carry from table[2] it would be returned
stevep 0:04a9f72bbca7 209 */
stevep 0:04a9f72bbca7 210 template<uint value_size>
stevep 0:04a9f72bbca7 211 uint UInt<value_size>::AddInt(uint value, uint index)
stevep 0:04a9f72bbca7 212 {
stevep 0:04a9f72bbca7 213 uint b = value_size;
stevep 0:04a9f72bbca7 214 uint * p1 = table;
stevep 0:04a9f72bbca7 215 uint c;
stevep 0:04a9f72bbca7 216
stevep 0:04a9f72bbca7 217 TTMATH_ASSERT( index < value_size )
stevep 0:04a9f72bbca7 218
stevep 0:04a9f72bbca7 219 #ifndef __GNUC__
stevep 0:04a9f72bbca7 220 c = ttmath_addindexed_x64(p1,b,index,value);
stevep 0:04a9f72bbca7 221 #endif
stevep 0:04a9f72bbca7 222
stevep 0:04a9f72bbca7 223
stevep 0:04a9f72bbca7 224 #ifdef __GNUC__
stevep 0:04a9f72bbca7 225 uint dummy, dummy2;
stevep 0:04a9f72bbca7 226
stevep 0:04a9f72bbca7 227 __asm__ __volatile__(
stevep 0:04a9f72bbca7 228
stevep 0:04a9f72bbca7 229 "subq %%rdx, %%rcx \n"
stevep 0:04a9f72bbca7 230
stevep 0:04a9f72bbca7 231 "1: \n"
stevep 0:04a9f72bbca7 232 "addq %%rax, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 233 "jnc 2f \n"
stevep 0:04a9f72bbca7 234
stevep 0:04a9f72bbca7 235 "movq $1, %%rax \n"
stevep 0:04a9f72bbca7 236 "incq %%rdx \n"
stevep 0:04a9f72bbca7 237 "decq %%rcx \n"
stevep 0:04a9f72bbca7 238 "jnz 1b \n"
stevep 0:04a9f72bbca7 239
stevep 0:04a9f72bbca7 240 "2: \n"
stevep 0:04a9f72bbca7 241 "setc %%al \n"
stevep 0:04a9f72bbca7 242 "movzx %%al, %%rdx \n"
stevep 0:04a9f72bbca7 243
stevep 0:04a9f72bbca7 244 : "=d" (c), "=a" (dummy), "=c" (dummy2)
stevep 0:04a9f72bbca7 245 : "0" (index), "1" (value), "2" (b), "b" (p1)
stevep 0:04a9f72bbca7 246 : "cc", "memory" );
stevep 0:04a9f72bbca7 247
stevep 0:04a9f72bbca7 248 #endif
stevep 0:04a9f72bbca7 249
stevep 0:04a9f72bbca7 250 TTMATH_LOGC("UInt::AddInt", c)
stevep 0:04a9f72bbca7 251
stevep 0:04a9f72bbca7 252 return c;
stevep 0:04a9f72bbca7 253 }
stevep 0:04a9f72bbca7 254
stevep 0:04a9f72bbca7 255
stevep 0:04a9f72bbca7 256
stevep 0:04a9f72bbca7 257 /*!
stevep 0:04a9f72bbca7 258 this method adds only two unsigned words to the existing value
stevep 0:04a9f72bbca7 259 and these words begin on the 'index' position
stevep 0:04a9f72bbca7 260 (it's used in the multiplication algorithm 2)
stevep 0:04a9f72bbca7 261
stevep 0:04a9f72bbca7 262 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 263
stevep 0:04a9f72bbca7 264 index should be equal or smaller than value_size-2 (index <= value_size-2)
stevep 0:04a9f72bbca7 265 x1 - lower word, x2 - higher word
stevep 0:04a9f72bbca7 266
stevep 0:04a9f72bbca7 267 for example if we've got value_size equal 4 and:
stevep 0:04a9f72bbca7 268 table[0] = 3
stevep 0:04a9f72bbca7 269 table[1] = 4
stevep 0:04a9f72bbca7 270 table[2] = 5
stevep 0:04a9f72bbca7 271 table[3] = 6
stevep 0:04a9f72bbca7 272 then let
stevep 0:04a9f72bbca7 273 x1 = 10
stevep 0:04a9f72bbca7 274 x2 = 20
stevep 0:04a9f72bbca7 275 and
stevep 0:04a9f72bbca7 276 index = 1
stevep 0:04a9f72bbca7 277
stevep 0:04a9f72bbca7 278 the result of this method will be:
stevep 0:04a9f72bbca7 279 table[0] = 3
stevep 0:04a9f72bbca7 280 table[1] = 4 + x1 = 14
stevep 0:04a9f72bbca7 281 table[2] = 5 + x2 = 25
stevep 0:04a9f72bbca7 282 table[3] = 6
stevep 0:04a9f72bbca7 283
stevep 0:04a9f72bbca7 284 and no carry at the end of table[3]
stevep 0:04a9f72bbca7 285
stevep 0:04a9f72bbca7 286 (of course if there was a carry in table[2](5+20) then
stevep 0:04a9f72bbca7 287 this carry would be passed to the table[3] etc.)
stevep 0:04a9f72bbca7 288 */
stevep 0:04a9f72bbca7 289 template<uint value_size>
stevep 0:04a9f72bbca7 290 uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
stevep 0:04a9f72bbca7 291 {
stevep 0:04a9f72bbca7 292 uint b = value_size;
stevep 0:04a9f72bbca7 293 uint * p1 = table;
stevep 0:04a9f72bbca7 294 uint c;
stevep 0:04a9f72bbca7 295
stevep 0:04a9f72bbca7 296 TTMATH_ASSERT( index < value_size - 1 )
stevep 0:04a9f72bbca7 297
stevep 0:04a9f72bbca7 298 #ifndef __GNUC__
stevep 0:04a9f72bbca7 299 c = ttmath_addindexed2_x64(p1,b,index,x1,x2);
stevep 0:04a9f72bbca7 300 #endif
stevep 0:04a9f72bbca7 301
stevep 0:04a9f72bbca7 302
stevep 0:04a9f72bbca7 303 #ifdef __GNUC__
stevep 0:04a9f72bbca7 304 uint dummy, dummy2;
stevep 0:04a9f72bbca7 305
stevep 0:04a9f72bbca7 306 __asm__ __volatile__(
stevep 0:04a9f72bbca7 307
stevep 0:04a9f72bbca7 308 "subq %%rdx, %%rcx \n"
stevep 0:04a9f72bbca7 309
stevep 0:04a9f72bbca7 310 "addq %%rsi, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 311 "incq %%rdx \n"
stevep 0:04a9f72bbca7 312 "decq %%rcx \n"
stevep 0:04a9f72bbca7 313
stevep 0:04a9f72bbca7 314 "1: \n"
stevep 0:04a9f72bbca7 315 "adcq %%rax, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 316 "jnc 2f \n"
stevep 0:04a9f72bbca7 317
stevep 0:04a9f72bbca7 318 "mov $0, %%rax \n"
stevep 0:04a9f72bbca7 319 "incq %%rdx \n"
stevep 0:04a9f72bbca7 320 "decq %%rcx \n"
stevep 0:04a9f72bbca7 321 "jnz 1b \n"
stevep 0:04a9f72bbca7 322
stevep 0:04a9f72bbca7 323 "2: \n"
stevep 0:04a9f72bbca7 324 "setc %%al \n"
stevep 0:04a9f72bbca7 325 "movzx %%al, %%rax \n"
stevep 0:04a9f72bbca7 326
stevep 0:04a9f72bbca7 327 : "=a" (c), "=c" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 328 : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
stevep 0:04a9f72bbca7 329 : "cc", "memory" );
stevep 0:04a9f72bbca7 330
stevep 0:04a9f72bbca7 331 #endif
stevep 0:04a9f72bbca7 332
stevep 0:04a9f72bbca7 333 TTMATH_LOGC("UInt::AddTwoInts", c)
stevep 0:04a9f72bbca7 334
stevep 0:04a9f72bbca7 335 return c;
stevep 0:04a9f72bbca7 336 }
stevep 0:04a9f72bbca7 337
stevep 0:04a9f72bbca7 338
stevep 0:04a9f72bbca7 339
stevep 0:04a9f72bbca7 340 /*!
stevep 0:04a9f72bbca7 341 this static method addes one vector to the other
stevep 0:04a9f72bbca7 342 'ss1' is larger in size or equal to 'ss2'
stevep 0:04a9f72bbca7 343
stevep 0:04a9f72bbca7 344 ss1 points to the first (larger) vector
stevep 0:04a9f72bbca7 345 ss2 points to the second vector
stevep 0:04a9f72bbca7 346 ss1_size - size of the ss1 (and size of the result too)
stevep 0:04a9f72bbca7 347 ss2_size - size of the ss2
stevep 0:04a9f72bbca7 348 result - is the result vector (which has size the same as ss1: ss1_size)
stevep 0:04a9f72bbca7 349
stevep 0:04a9f72bbca7 350 Example: ss1_size is 5, ss2_size is 3
stevep 0:04a9f72bbca7 351 ss1: ss2: result (output):
stevep 0:04a9f72bbca7 352 5 1 5+1
stevep 0:04a9f72bbca7 353 4 3 4+3
stevep 0:04a9f72bbca7 354 2 7 2+7
stevep 0:04a9f72bbca7 355 6 6
stevep 0:04a9f72bbca7 356 9 9
stevep 0:04a9f72bbca7 357 of course the carry is propagated and will be returned from the last item
stevep 0:04a9f72bbca7 358 (this method is used by the Karatsuba multiplication algorithm)
stevep 0:04a9f72bbca7 359 */
stevep 0:04a9f72bbca7 360 template<uint value_size>
stevep 0:04a9f72bbca7 361 uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
stevep 0:04a9f72bbca7 362 {
stevep 0:04a9f72bbca7 363 TTMATH_ASSERT( ss1_size >= ss2_size )
stevep 0:04a9f72bbca7 364
stevep 0:04a9f72bbca7 365 uint c;
stevep 0:04a9f72bbca7 366
stevep 0:04a9f72bbca7 367 #ifndef __GNUC__
stevep 0:04a9f72bbca7 368 c = ttmath_addvector_x64(ss1, ss2, ss1_size, ss2_size, result);
stevep 0:04a9f72bbca7 369 #endif
stevep 0:04a9f72bbca7 370
stevep 0:04a9f72bbca7 371
stevep 0:04a9f72bbca7 372 #ifdef __GNUC__
stevep 0:04a9f72bbca7 373 uint dummy1, dummy2, dummy3;
stevep 0:04a9f72bbca7 374 uint rest = ss1_size - ss2_size;
stevep 0:04a9f72bbca7 375
stevep 0:04a9f72bbca7 376 // this part should be compiled with gcc
stevep 0:04a9f72bbca7 377
stevep 0:04a9f72bbca7 378 __asm__ __volatile__(
stevep 0:04a9f72bbca7 379 "mov %%rdx, %%r8 \n"
stevep 0:04a9f72bbca7 380 "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
stevep 0:04a9f72bbca7 381 "1: \n"
stevep 0:04a9f72bbca7 382 "mov (%%rsi,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 383 "adc (%%rbx,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 384 "mov %%rax, (%%rdi,%%rdx,8) \n"
stevep 0:04a9f72bbca7 385
stevep 0:04a9f72bbca7 386 "inc %%rdx \n"
stevep 0:04a9f72bbca7 387 "dec %%rcx \n"
stevep 0:04a9f72bbca7 388 "jnz 1b \n"
stevep 0:04a9f72bbca7 389
stevep 0:04a9f72bbca7 390 "adc %%rcx, %%rcx \n" // rcx has the cf state
stevep 0:04a9f72bbca7 391
stevep 0:04a9f72bbca7 392 "or %%r8, %%r8 \n"
stevep 0:04a9f72bbca7 393 "jz 3f \n"
stevep 0:04a9f72bbca7 394
stevep 0:04a9f72bbca7 395 "xor %%rbx, %%rbx \n" // ebx = 0
stevep 0:04a9f72bbca7 396 "neg %%rcx \n" // setting cf from rcx
stevep 0:04a9f72bbca7 397 "mov %%r8, %%rcx \n" // rcx=rest and is != 0
stevep 0:04a9f72bbca7 398 "2: \n"
stevep 0:04a9f72bbca7 399 "mov (%%rsi, %%rdx, 8), %%rax \n"
stevep 0:04a9f72bbca7 400 "adc %%rbx, %%rax \n"
stevep 0:04a9f72bbca7 401 "mov %%rax, (%%rdi, %%rdx, 8) \n"
stevep 0:04a9f72bbca7 402
stevep 0:04a9f72bbca7 403 "inc %%rdx \n"
stevep 0:04a9f72bbca7 404 "dec %%rcx \n"
stevep 0:04a9f72bbca7 405 "jnz 2b \n"
stevep 0:04a9f72bbca7 406
stevep 0:04a9f72bbca7 407 "adc %%rcx, %%rcx \n"
stevep 0:04a9f72bbca7 408 "3: \n"
stevep 0:04a9f72bbca7 409
stevep 0:04a9f72bbca7 410 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
stevep 0:04a9f72bbca7 411 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
stevep 0:04a9f72bbca7 412 : "%r8", "cc", "memory" );
stevep 0:04a9f72bbca7 413
stevep 0:04a9f72bbca7 414 #endif
stevep 0:04a9f72bbca7 415
stevep 0:04a9f72bbca7 416 TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
stevep 0:04a9f72bbca7 417
stevep 0:04a9f72bbca7 418 return c;
stevep 0:04a9f72bbca7 419 }
stevep 0:04a9f72bbca7 420
stevep 0:04a9f72bbca7 421
stevep 0:04a9f72bbca7 422
stevep 0:04a9f72bbca7 423 /*!
stevep 0:04a9f72bbca7 424 this method's subtracting ss2 from the 'this' and subtracting
stevep 0:04a9f72bbca7 425 carry if it has been defined
stevep 0:04a9f72bbca7 426 (this = this - ss2 - c)
stevep 0:04a9f72bbca7 427
stevep 0:04a9f72bbca7 428 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 429
stevep 0:04a9f72bbca7 430 c must be zero or one (might be a bigger value than 1)
stevep 0:04a9f72bbca7 431 function returns carry (1) (if it was)
stevep 0:04a9f72bbca7 432 */
stevep 0:04a9f72bbca7 433 template<uint value_size>
stevep 0:04a9f72bbca7 434 uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
stevep 0:04a9f72bbca7 435 {
stevep 0:04a9f72bbca7 436 uint b = value_size;
stevep 0:04a9f72bbca7 437 uint * p1 = table;
stevep 0:04a9f72bbca7 438 const uint * p2 = ss2.table;
stevep 0:04a9f72bbca7 439
stevep 0:04a9f72bbca7 440 // we don't have to use TTMATH_REFERENCE_ASSERT here
stevep 0:04a9f72bbca7 441 // this algorithm doesn't require it
stevep 0:04a9f72bbca7 442
stevep 0:04a9f72bbca7 443 #ifndef __GNUC__
stevep 0:04a9f72bbca7 444 c = ttmath_sbb_x64(p1,p2,b,c);
stevep 0:04a9f72bbca7 445 #endif
stevep 0:04a9f72bbca7 446
stevep 0:04a9f72bbca7 447
stevep 0:04a9f72bbca7 448 #ifdef __GNUC__
stevep 0:04a9f72bbca7 449 uint dummy, dummy2;
stevep 0:04a9f72bbca7 450
stevep 0:04a9f72bbca7 451 __asm__ __volatile__(
stevep 0:04a9f72bbca7 452
stevep 0:04a9f72bbca7 453 "xorq %%rdx, %%rdx \n"
stevep 0:04a9f72bbca7 454 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 455
stevep 0:04a9f72bbca7 456 "1: \n"
stevep 0:04a9f72bbca7 457 "movq (%%rsi,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 458 "sbbq %%rax, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 459
stevep 0:04a9f72bbca7 460 "incq %%rdx \n"
stevep 0:04a9f72bbca7 461 "decq %%rcx \n"
stevep 0:04a9f72bbca7 462 "jnz 1b \n"
stevep 0:04a9f72bbca7 463
stevep 0:04a9f72bbca7 464 "adcq %%rcx, %%rcx \n"
stevep 0:04a9f72bbca7 465
stevep 0:04a9f72bbca7 466 : "=c" (c), "=a" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 467 : "0" (b), "1" (c), "b" (p1), "S" (p2)
stevep 0:04a9f72bbca7 468 : "cc", "memory" );
stevep 0:04a9f72bbca7 469
stevep 0:04a9f72bbca7 470 #endif
stevep 0:04a9f72bbca7 471
stevep 0:04a9f72bbca7 472 TTMATH_LOGC("UInt::Sub", c)
stevep 0:04a9f72bbca7 473
stevep 0:04a9f72bbca7 474 return c;
stevep 0:04a9f72bbca7 475 }
stevep 0:04a9f72bbca7 476
stevep 0:04a9f72bbca7 477
stevep 0:04a9f72bbca7 478
stevep 0:04a9f72bbca7 479 /*!
stevep 0:04a9f72bbca7 480 this method subtracts one word (at a specific position)
stevep 0:04a9f72bbca7 481 and returns a carry (if it was)
stevep 0:04a9f72bbca7 482
stevep 0:04a9f72bbca7 483 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 484
stevep 0:04a9f72bbca7 485 if we've got (value_size=3):
stevep 0:04a9f72bbca7 486 table[0] = 10;
stevep 0:04a9f72bbca7 487 table[1] = 30;
stevep 0:04a9f72bbca7 488 table[2] = 5;
stevep 0:04a9f72bbca7 489 and we call:
stevep 0:04a9f72bbca7 490 SubInt(2,1)
stevep 0:04a9f72bbca7 491 then it'll be:
stevep 0:04a9f72bbca7 492 table[0] = 10;
stevep 0:04a9f72bbca7 493 table[1] = 30 - 2;
stevep 0:04a9f72bbca7 494 table[2] = 5;
stevep 0:04a9f72bbca7 495
stevep 0:04a9f72bbca7 496 of course if there was a carry from table[2] it would be returned
stevep 0:04a9f72bbca7 497 */
stevep 0:04a9f72bbca7 498 template<uint value_size>
stevep 0:04a9f72bbca7 499 uint UInt<value_size>::SubInt(uint value, uint index)
stevep 0:04a9f72bbca7 500 {
stevep 0:04a9f72bbca7 501 uint b = value_size;
stevep 0:04a9f72bbca7 502 uint * p1 = table;
stevep 0:04a9f72bbca7 503 uint c;
stevep 0:04a9f72bbca7 504
stevep 0:04a9f72bbca7 505 TTMATH_ASSERT( index < value_size )
stevep 0:04a9f72bbca7 506
stevep 0:04a9f72bbca7 507 #ifndef __GNUC__
stevep 0:04a9f72bbca7 508 c = ttmath_subindexed_x64(p1,b,index,value);
stevep 0:04a9f72bbca7 509 #endif
stevep 0:04a9f72bbca7 510
stevep 0:04a9f72bbca7 511
stevep 0:04a9f72bbca7 512 #ifdef __GNUC__
stevep 0:04a9f72bbca7 513 uint dummy, dummy2;
stevep 0:04a9f72bbca7 514
stevep 0:04a9f72bbca7 515 __asm__ __volatile__(
stevep 0:04a9f72bbca7 516
stevep 0:04a9f72bbca7 517 "subq %%rdx, %%rcx \n"
stevep 0:04a9f72bbca7 518
stevep 0:04a9f72bbca7 519 "1: \n"
stevep 0:04a9f72bbca7 520 "subq %%rax, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 521 "jnc 2f \n"
stevep 0:04a9f72bbca7 522
stevep 0:04a9f72bbca7 523 "movq $1, %%rax \n"
stevep 0:04a9f72bbca7 524 "incq %%rdx \n"
stevep 0:04a9f72bbca7 525 "decq %%rcx \n"
stevep 0:04a9f72bbca7 526 "jnz 1b \n"
stevep 0:04a9f72bbca7 527
stevep 0:04a9f72bbca7 528 "2: \n"
stevep 0:04a9f72bbca7 529 "setc %%al \n"
stevep 0:04a9f72bbca7 530 "movzx %%al, %%rdx \n"
stevep 0:04a9f72bbca7 531
stevep 0:04a9f72bbca7 532 : "=d" (c), "=a" (dummy), "=c" (dummy2)
stevep 0:04a9f72bbca7 533 : "0" (index), "1" (value), "2" (b), "b" (p1)
stevep 0:04a9f72bbca7 534 : "cc", "memory" );
stevep 0:04a9f72bbca7 535
stevep 0:04a9f72bbca7 536 #endif
stevep 0:04a9f72bbca7 537
stevep 0:04a9f72bbca7 538 TTMATH_LOGC("UInt::SubInt", c)
stevep 0:04a9f72bbca7 539
stevep 0:04a9f72bbca7 540 return c;
stevep 0:04a9f72bbca7 541 }
stevep 0:04a9f72bbca7 542
stevep 0:04a9f72bbca7 543
stevep 0:04a9f72bbca7 544 /*!
stevep 0:04a9f72bbca7 545 this static method subtractes one vector from the other
stevep 0:04a9f72bbca7 546 'ss1' is larger in size or equal to 'ss2'
stevep 0:04a9f72bbca7 547
stevep 0:04a9f72bbca7 548 ss1 points to the first (larger) vector
stevep 0:04a9f72bbca7 549 ss2 points to the second vector
stevep 0:04a9f72bbca7 550 ss1_size - size of the ss1 (and size of the result too)
stevep 0:04a9f72bbca7 551 ss2_size - size of the ss2
stevep 0:04a9f72bbca7 552 result - is the result vector (which has size the same as ss1: ss1_size)
stevep 0:04a9f72bbca7 553
stevep 0:04a9f72bbca7 554 Example: ss1_size is 5, ss2_size is 3
stevep 0:04a9f72bbca7 555 ss1: ss2: result (output):
stevep 0:04a9f72bbca7 556 5 1 5-1
stevep 0:04a9f72bbca7 557 4 3 4-3
stevep 0:04a9f72bbca7 558 2 7 2-7
stevep 0:04a9f72bbca7 559 6 6-1 (the borrow from previous item)
stevep 0:04a9f72bbca7 560 9 9
stevep 0:04a9f72bbca7 561 return (carry): 0
stevep 0:04a9f72bbca7 562 of course the carry (borrow) is propagated and will be returned from the last item
stevep 0:04a9f72bbca7 563 (this method is used by the Karatsuba multiplication algorithm)
stevep 0:04a9f72bbca7 564 */
stevep 0:04a9f72bbca7 565 template<uint value_size>
stevep 0:04a9f72bbca7 566 uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
stevep 0:04a9f72bbca7 567 {
stevep 0:04a9f72bbca7 568 TTMATH_ASSERT( ss1_size >= ss2_size )
stevep 0:04a9f72bbca7 569
stevep 0:04a9f72bbca7 570 uint c;
stevep 0:04a9f72bbca7 571
stevep 0:04a9f72bbca7 572 #ifndef __GNUC__
stevep 0:04a9f72bbca7 573 c = ttmath_subvector_x64(ss1, ss2, ss1_size, ss2_size, result);
stevep 0:04a9f72bbca7 574 #endif
stevep 0:04a9f72bbca7 575
stevep 0:04a9f72bbca7 576
stevep 0:04a9f72bbca7 577 #ifdef __GNUC__
stevep 0:04a9f72bbca7 578
stevep 0:04a9f72bbca7 579 // the asm code is nearly the same as in AddVector
stevep 0:04a9f72bbca7 580 // only two instructions 'adc' are changed to 'sbb'
stevep 0:04a9f72bbca7 581
stevep 0:04a9f72bbca7 582 uint dummy1, dummy2, dummy3;
stevep 0:04a9f72bbca7 583 uint rest = ss1_size - ss2_size;
stevep 0:04a9f72bbca7 584
stevep 0:04a9f72bbca7 585 __asm__ __volatile__(
stevep 0:04a9f72bbca7 586 "mov %%rdx, %%r8 \n"
stevep 0:04a9f72bbca7 587 "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
stevep 0:04a9f72bbca7 588 "1: \n"
stevep 0:04a9f72bbca7 589 "mov (%%rsi,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 590 "sbb (%%rbx,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 591 "mov %%rax, (%%rdi,%%rdx,8) \n"
stevep 0:04a9f72bbca7 592
stevep 0:04a9f72bbca7 593 "inc %%rdx \n"
stevep 0:04a9f72bbca7 594 "dec %%rcx \n"
stevep 0:04a9f72bbca7 595 "jnz 1b \n"
stevep 0:04a9f72bbca7 596
stevep 0:04a9f72bbca7 597 "adc %%rcx, %%rcx \n" // rcx has the cf state
stevep 0:04a9f72bbca7 598
stevep 0:04a9f72bbca7 599 "or %%r8, %%r8 \n"
stevep 0:04a9f72bbca7 600 "jz 3f \n"
stevep 0:04a9f72bbca7 601
stevep 0:04a9f72bbca7 602 "xor %%rbx, %%rbx \n" // ebx = 0
stevep 0:04a9f72bbca7 603 "neg %%rcx \n" // setting cf from rcx
stevep 0:04a9f72bbca7 604 "mov %%r8, %%rcx \n" // rcx=rest and is != 0
stevep 0:04a9f72bbca7 605 "2: \n"
stevep 0:04a9f72bbca7 606 "mov (%%rsi, %%rdx, 8), %%rax \n"
stevep 0:04a9f72bbca7 607 "sbb %%rbx, %%rax \n"
stevep 0:04a9f72bbca7 608 "mov %%rax, (%%rdi, %%rdx, 8) \n"
stevep 0:04a9f72bbca7 609
stevep 0:04a9f72bbca7 610 "inc %%rdx \n"
stevep 0:04a9f72bbca7 611 "dec %%rcx \n"
stevep 0:04a9f72bbca7 612 "jnz 2b \n"
stevep 0:04a9f72bbca7 613
stevep 0:04a9f72bbca7 614 "adc %%rcx, %%rcx \n"
stevep 0:04a9f72bbca7 615 "3: \n"
stevep 0:04a9f72bbca7 616
stevep 0:04a9f72bbca7 617 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
stevep 0:04a9f72bbca7 618 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
stevep 0:04a9f72bbca7 619 : "%r8", "cc", "memory" );
stevep 0:04a9f72bbca7 620
stevep 0:04a9f72bbca7 621 #endif
stevep 0:04a9f72bbca7 622
stevep 0:04a9f72bbca7 623 TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
stevep 0:04a9f72bbca7 624
stevep 0:04a9f72bbca7 625 return c;
stevep 0:04a9f72bbca7 626 }
stevep 0:04a9f72bbca7 627
stevep 0:04a9f72bbca7 628
stevep 0:04a9f72bbca7 629 /*!
stevep 0:04a9f72bbca7 630 this method moves all bits into the left hand side
stevep 0:04a9f72bbca7 631 return value <- this <- c
stevep 0:04a9f72bbca7 632
stevep 0:04a9f72bbca7 633 the lowest *bit* will be held the 'c' and
stevep 0:04a9f72bbca7 634 the state of one additional bit (on the left hand side)
stevep 0:04a9f72bbca7 635 will be returned
stevep 0:04a9f72bbca7 636
stevep 0:04a9f72bbca7 637 for example:
stevep 0:04a9f72bbca7 638 let this is 001010000
stevep 0:04a9f72bbca7 639 after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
stevep 0:04a9f72bbca7 640
stevep 0:04a9f72bbca7 641 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 642 */
stevep 0:04a9f72bbca7 643 template<uint value_size>
stevep 0:04a9f72bbca7 644 uint UInt<value_size>::Rcl2_one(uint c)
stevep 0:04a9f72bbca7 645 {
stevep 0:04a9f72bbca7 646 sint b = value_size;
stevep 0:04a9f72bbca7 647 uint * p1 = table;
stevep 0:04a9f72bbca7 648
stevep 0:04a9f72bbca7 649
stevep 0:04a9f72bbca7 650 #ifndef __GNUC__
stevep 0:04a9f72bbca7 651 c = ttmath_rcl_x64(p1,b,c);
stevep 0:04a9f72bbca7 652 #endif
stevep 0:04a9f72bbca7 653
stevep 0:04a9f72bbca7 654
stevep 0:04a9f72bbca7 655 #ifdef __GNUC__
stevep 0:04a9f72bbca7 656 uint dummy, dummy2;
stevep 0:04a9f72bbca7 657
stevep 0:04a9f72bbca7 658 __asm__ __volatile__(
stevep 0:04a9f72bbca7 659
stevep 0:04a9f72bbca7 660 "xorq %%rdx, %%rdx \n" // rdx=0
stevep 0:04a9f72bbca7 661 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 662
stevep 0:04a9f72bbca7 663 "1: \n"
stevep 0:04a9f72bbca7 664 "rclq $1, (%%rbx, %%rdx, 8) \n"
stevep 0:04a9f72bbca7 665
stevep 0:04a9f72bbca7 666 "incq %%rdx \n"
stevep 0:04a9f72bbca7 667 "decq %%rcx \n"
stevep 0:04a9f72bbca7 668 "jnz 1b \n"
stevep 0:04a9f72bbca7 669
stevep 0:04a9f72bbca7 670 "adcq %%rcx, %%rcx \n"
stevep 0:04a9f72bbca7 671
stevep 0:04a9f72bbca7 672 : "=c" (c), "=a" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 673 : "0" (b), "1" (c), "b" (p1)
stevep 0:04a9f72bbca7 674 : "cc", "memory" );
stevep 0:04a9f72bbca7 675
stevep 0:04a9f72bbca7 676 #endif
stevep 0:04a9f72bbca7 677
stevep 0:04a9f72bbca7 678 TTMATH_LOGC("UInt::Rcl2_one", c)
stevep 0:04a9f72bbca7 679
stevep 0:04a9f72bbca7 680 return c;
stevep 0:04a9f72bbca7 681 }
stevep 0:04a9f72bbca7 682
stevep 0:04a9f72bbca7 683
stevep 0:04a9f72bbca7 684 /*!
stevep 0:04a9f72bbca7 685 this method moves all bits into the right hand side
stevep 0:04a9f72bbca7 686 c -> this -> return value
stevep 0:04a9f72bbca7 687
stevep 0:04a9f72bbca7 688 the highest *bit* will be held the 'c' and
stevep 0:04a9f72bbca7 689 the state of one additional bit (on the right hand side)
stevep 0:04a9f72bbca7 690 will be returned
stevep 0:04a9f72bbca7 691
stevep 0:04a9f72bbca7 692 for example:
stevep 0:04a9f72bbca7 693 let this is 000000010
stevep 0:04a9f72bbca7 694 after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
stevep 0:04a9f72bbca7 695
stevep 0:04a9f72bbca7 696 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 697 */
stevep 0:04a9f72bbca7 698 template<uint value_size>
stevep 0:04a9f72bbca7 699 uint UInt<value_size>::Rcr2_one(uint c)
stevep 0:04a9f72bbca7 700 {
stevep 0:04a9f72bbca7 701 sint b = value_size;
stevep 0:04a9f72bbca7 702 uint * p1 = table;
stevep 0:04a9f72bbca7 703
stevep 0:04a9f72bbca7 704
stevep 0:04a9f72bbca7 705 #ifndef __GNUC__
stevep 0:04a9f72bbca7 706 c = ttmath_rcr_x64(p1,b,c);
stevep 0:04a9f72bbca7 707 #endif
stevep 0:04a9f72bbca7 708
stevep 0:04a9f72bbca7 709
stevep 0:04a9f72bbca7 710 #ifdef __GNUC__
stevep 0:04a9f72bbca7 711 uint dummy;
stevep 0:04a9f72bbca7 712
stevep 0:04a9f72bbca7 713 __asm__ __volatile__(
stevep 0:04a9f72bbca7 714
stevep 0:04a9f72bbca7 715 "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 716
stevep 0:04a9f72bbca7 717 "1: \n"
stevep 0:04a9f72bbca7 718 "rcrq $1, -8(%%rbx, %%rcx, 8) \n"
stevep 0:04a9f72bbca7 719
stevep 0:04a9f72bbca7 720 "decq %%rcx \n"
stevep 0:04a9f72bbca7 721 "jnz 1b \n"
stevep 0:04a9f72bbca7 722
stevep 0:04a9f72bbca7 723 "adcq %%rcx, %%rcx \n"
stevep 0:04a9f72bbca7 724
stevep 0:04a9f72bbca7 725 : "=c" (c), "=a" (dummy)
stevep 0:04a9f72bbca7 726 : "0" (b), "1" (c), "b" (p1)
stevep 0:04a9f72bbca7 727 : "cc", "memory" );
stevep 0:04a9f72bbca7 728
stevep 0:04a9f72bbca7 729 #endif
stevep 0:04a9f72bbca7 730
stevep 0:04a9f72bbca7 731 TTMATH_LOGC("UInt::Rcr2_one", c)
stevep 0:04a9f72bbca7 732
stevep 0:04a9f72bbca7 733 return c;
stevep 0:04a9f72bbca7 734 }
stevep 0:04a9f72bbca7 735
stevep 0:04a9f72bbca7 736
stevep 0:04a9f72bbca7 737
stevep 0:04a9f72bbca7 738 /*!
stevep 0:04a9f72bbca7 739 this method moves all bits into the left hand side
stevep 0:04a9f72bbca7 740 return value <- this <- c
stevep 0:04a9f72bbca7 741
stevep 0:04a9f72bbca7 742 the lowest *bits* will be held the 'c' and
stevep 0:04a9f72bbca7 743 the state of one additional bit (on the left hand side)
stevep 0:04a9f72bbca7 744 will be returned
stevep 0:04a9f72bbca7 745
stevep 0:04a9f72bbca7 746 for example:
stevep 0:04a9f72bbca7 747 let this is 001010000
stevep 0:04a9f72bbca7 748 after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
stevep 0:04a9f72bbca7 749
stevep 0:04a9f72bbca7 750 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 751 */
stevep 0:04a9f72bbca7 752 template<uint value_size>
stevep 0:04a9f72bbca7 753 uint UInt<value_size>::Rcl2(uint bits, uint c)
stevep 0:04a9f72bbca7 754 {
stevep 0:04a9f72bbca7 755 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
stevep 0:04a9f72bbca7 756
stevep 0:04a9f72bbca7 757 uint b = value_size;
stevep 0:04a9f72bbca7 758 uint * p1 = table;
stevep 0:04a9f72bbca7 759
stevep 0:04a9f72bbca7 760
stevep 0:04a9f72bbca7 761 #ifndef __GNUC__
stevep 0:04a9f72bbca7 762 c = ttmath_rcl2_x64(p1,b,bits,c);
stevep 0:04a9f72bbca7 763 #endif
stevep 0:04a9f72bbca7 764
stevep 0:04a9f72bbca7 765
stevep 0:04a9f72bbca7 766 #ifdef __GNUC__
stevep 0:04a9f72bbca7 767 uint dummy, dummy2, dummy3;
stevep 0:04a9f72bbca7 768
stevep 0:04a9f72bbca7 769 __asm__ __volatile__(
stevep 0:04a9f72bbca7 770
stevep 0:04a9f72bbca7 771 "movq %%rcx, %%rsi \n"
stevep 0:04a9f72bbca7 772 "movq $64, %%rcx \n"
stevep 0:04a9f72bbca7 773 "subq %%rsi, %%rcx \n"
stevep 0:04a9f72bbca7 774 "movq $-1, %%rdx \n"
stevep 0:04a9f72bbca7 775 "shrq %%cl, %%rdx \n"
stevep 0:04a9f72bbca7 776 "movq %%rdx, %%r8 \n"
stevep 0:04a9f72bbca7 777 "movq %%rsi, %%rcx \n"
stevep 0:04a9f72bbca7 778
stevep 0:04a9f72bbca7 779 "xorq %%rdx, %%rdx \n"
stevep 0:04a9f72bbca7 780 "movq %%rdx, %%rsi \n"
stevep 0:04a9f72bbca7 781 "orq %%rax, %%rax \n"
stevep 0:04a9f72bbca7 782 "cmovnz %%r8, %%rsi \n"
stevep 0:04a9f72bbca7 783
stevep 0:04a9f72bbca7 784 "1: \n"
stevep 0:04a9f72bbca7 785 "rolq %%cl, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 786
stevep 0:04a9f72bbca7 787 "movq (%%rbx,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 788 "andq %%r8, %%rax \n"
stevep 0:04a9f72bbca7 789 "xorq %%rax, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 790 "orq %%rsi, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 791 "movq %%rax, %%rsi \n"
stevep 0:04a9f72bbca7 792
stevep 0:04a9f72bbca7 793 "incq %%rdx \n"
stevep 0:04a9f72bbca7 794 "decq %%rdi \n"
stevep 0:04a9f72bbca7 795 "jnz 1b \n"
stevep 0:04a9f72bbca7 796
stevep 0:04a9f72bbca7 797 "and $1, %%rax \n"
stevep 0:04a9f72bbca7 798
stevep 0:04a9f72bbca7 799 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
stevep 0:04a9f72bbca7 800 : "0" (c), "1" (b), "b" (p1), "c" (bits)
stevep 0:04a9f72bbca7 801 : "%r8", "cc", "memory" );
stevep 0:04a9f72bbca7 802
stevep 0:04a9f72bbca7 803 #endif
stevep 0:04a9f72bbca7 804
stevep 0:04a9f72bbca7 805 TTMATH_LOGC("UInt::Rcl2", c)
stevep 0:04a9f72bbca7 806
stevep 0:04a9f72bbca7 807 return c;
stevep 0:04a9f72bbca7 808 }
stevep 0:04a9f72bbca7 809
stevep 0:04a9f72bbca7 810
stevep 0:04a9f72bbca7 811 /*!
stevep 0:04a9f72bbca7 812 this method moves all bits into the right hand side
stevep 0:04a9f72bbca7 813 C -> this -> return value
stevep 0:04a9f72bbca7 814
stevep 0:04a9f72bbca7 815 the highest *bits* will be held the 'c' and
stevep 0:04a9f72bbca7 816 the state of one additional bit (on the right hand side)
stevep 0:04a9f72bbca7 817 will be returned
stevep 0:04a9f72bbca7 818
stevep 0:04a9f72bbca7 819 for example:
stevep 0:04a9f72bbca7 820 let this is 000000010
stevep 0:04a9f72bbca7 821 after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
stevep 0:04a9f72bbca7 822
stevep 0:04a9f72bbca7 823 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 824 */
stevep 0:04a9f72bbca7 825 template<uint value_size>
stevep 0:04a9f72bbca7 826 uint UInt<value_size>::Rcr2(uint bits, uint c)
stevep 0:04a9f72bbca7 827 {
stevep 0:04a9f72bbca7 828 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
stevep 0:04a9f72bbca7 829
stevep 0:04a9f72bbca7 830 sint b = value_size;
stevep 0:04a9f72bbca7 831 uint * p1 = table;
stevep 0:04a9f72bbca7 832
stevep 0:04a9f72bbca7 833
stevep 0:04a9f72bbca7 834 #ifndef __GNUC__
stevep 0:04a9f72bbca7 835 c = ttmath_rcr2_x64(p1,b,bits,c);
stevep 0:04a9f72bbca7 836 #endif
stevep 0:04a9f72bbca7 837
stevep 0:04a9f72bbca7 838
stevep 0:04a9f72bbca7 839 #ifdef __GNUC__
stevep 0:04a9f72bbca7 840 uint dummy, dummy2, dummy3;
stevep 0:04a9f72bbca7 841
stevep 0:04a9f72bbca7 842 __asm__ __volatile__(
stevep 0:04a9f72bbca7 843
stevep 0:04a9f72bbca7 844 "movq %%rcx, %%rsi \n"
stevep 0:04a9f72bbca7 845 "movq $64, %%rcx \n"
stevep 0:04a9f72bbca7 846 "subq %%rsi, %%rcx \n"
stevep 0:04a9f72bbca7 847 "movq $-1, %%rdx \n"
stevep 0:04a9f72bbca7 848 "shlq %%cl, %%rdx \n"
stevep 0:04a9f72bbca7 849 "movq %%rdx, %%R8 \n"
stevep 0:04a9f72bbca7 850 "movq %%rsi, %%rcx \n"
stevep 0:04a9f72bbca7 851
stevep 0:04a9f72bbca7 852 "xorq %%rdx, %%rdx \n"
stevep 0:04a9f72bbca7 853 "movq %%rdx, %%rsi \n"
stevep 0:04a9f72bbca7 854 "addq %%rdi, %%rdx \n"
stevep 0:04a9f72bbca7 855 "decq %%rdx \n"
stevep 0:04a9f72bbca7 856 "orq %%rax, %%rax \n"
stevep 0:04a9f72bbca7 857 "cmovnz %%R8, %%rsi \n"
stevep 0:04a9f72bbca7 858
stevep 0:04a9f72bbca7 859 "1: \n"
stevep 0:04a9f72bbca7 860 "rorq %%cl, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 861
stevep 0:04a9f72bbca7 862 "movq (%%rbx,%%rdx,8), %%rax \n"
stevep 0:04a9f72bbca7 863 "andq %%R8, %%rax \n"
stevep 0:04a9f72bbca7 864 "xorq %%rax, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 865 "orq %%rsi, (%%rbx,%%rdx,8) \n"
stevep 0:04a9f72bbca7 866 "movq %%rax, %%rsi \n"
stevep 0:04a9f72bbca7 867
stevep 0:04a9f72bbca7 868 "decq %%rdx \n"
stevep 0:04a9f72bbca7 869 "decq %%rdi \n"
stevep 0:04a9f72bbca7 870 "jnz 1b \n"
stevep 0:04a9f72bbca7 871
stevep 0:04a9f72bbca7 872 "rolq $1, %%rax \n"
stevep 0:04a9f72bbca7 873 "andq $1, %%rax \n"
stevep 0:04a9f72bbca7 874
stevep 0:04a9f72bbca7 875 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
stevep 0:04a9f72bbca7 876 : "0" (c), "1" (b), "b" (p1), "c" (bits)
stevep 0:04a9f72bbca7 877 : "%r8", "cc", "memory" );
stevep 0:04a9f72bbca7 878
stevep 0:04a9f72bbca7 879 #endif
stevep 0:04a9f72bbca7 880
stevep 0:04a9f72bbca7 881 TTMATH_LOGC("UInt::Rcr2", c)
stevep 0:04a9f72bbca7 882
stevep 0:04a9f72bbca7 883 return c;
stevep 0:04a9f72bbca7 884 }
stevep 0:04a9f72bbca7 885
stevep 0:04a9f72bbca7 886
stevep 0:04a9f72bbca7 887 /*
stevep 0:04a9f72bbca7 888 this method returns the number of the highest set bit in one 64-bit word
stevep 0:04a9f72bbca7 889 if the 'x' is zero this method returns '-1'
stevep 0:04a9f72bbca7 890
stevep 0:04a9f72bbca7 891 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 892 */
stevep 0:04a9f72bbca7 893 template<uint value_size>
stevep 0:04a9f72bbca7 894 sint UInt<value_size>::FindLeadingBitInWord(uint x)
stevep 0:04a9f72bbca7 895 {
stevep 0:04a9f72bbca7 896 sint result;
stevep 0:04a9f72bbca7 897
stevep 0:04a9f72bbca7 898
stevep 0:04a9f72bbca7 899 #ifndef __GNUC__
stevep 0:04a9f72bbca7 900
stevep 0:04a9f72bbca7 901 unsigned long nIndex = 0;
stevep 0:04a9f72bbca7 902
stevep 0:04a9f72bbca7 903 if( _BitScanReverse64(&nIndex,x) == 0 )
stevep 0:04a9f72bbca7 904 result = -1;
stevep 0:04a9f72bbca7 905 else
stevep 0:04a9f72bbca7 906 result = nIndex;
stevep 0:04a9f72bbca7 907
stevep 0:04a9f72bbca7 908 #endif
stevep 0:04a9f72bbca7 909
stevep 0:04a9f72bbca7 910
stevep 0:04a9f72bbca7 911 #ifdef __GNUC__
stevep 0:04a9f72bbca7 912 uint dummy;
stevep 0:04a9f72bbca7 913
stevep 0:04a9f72bbca7 914 __asm__ (
stevep 0:04a9f72bbca7 915
stevep 0:04a9f72bbca7 916 "movq $-1, %1 \n"
stevep 0:04a9f72bbca7 917 "bsrq %2, %0 \n"
stevep 0:04a9f72bbca7 918 "cmovz %1, %0 \n"
stevep 0:04a9f72bbca7 919
stevep 0:04a9f72bbca7 920 : "=r" (result), "=&r" (dummy)
stevep 0:04a9f72bbca7 921 : "r" (x)
stevep 0:04a9f72bbca7 922 : "cc" );
stevep 0:04a9f72bbca7 923
stevep 0:04a9f72bbca7 924 #endif
stevep 0:04a9f72bbca7 925
stevep 0:04a9f72bbca7 926
stevep 0:04a9f72bbca7 927 return result;
stevep 0:04a9f72bbca7 928 }
stevep 0:04a9f72bbca7 929
stevep 0:04a9f72bbca7 930
stevep 0:04a9f72bbca7 931 /*
stevep 0:04a9f72bbca7 932 this method returns the number of the highest set bit in one 64-bit word
stevep 0:04a9f72bbca7 933 if the 'x' is zero this method returns '-1'
stevep 0:04a9f72bbca7 934
stevep 0:04a9f72bbca7 935 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 936 */
stevep 0:04a9f72bbca7 937 template<uint value_size>
stevep 0:04a9f72bbca7 938 sint UInt<value_size>::FindLowestBitInWord(uint x)
stevep 0:04a9f72bbca7 939 {
stevep 0:04a9f72bbca7 940 sint result;
stevep 0:04a9f72bbca7 941
stevep 0:04a9f72bbca7 942
stevep 0:04a9f72bbca7 943 #ifndef __GNUC__
stevep 0:04a9f72bbca7 944
stevep 0:04a9f72bbca7 945 unsigned long nIndex = 0;
stevep 0:04a9f72bbca7 946
stevep 0:04a9f72bbca7 947 if( _BitScanForward64(&nIndex,x) == 0 )
stevep 0:04a9f72bbca7 948 result = -1;
stevep 0:04a9f72bbca7 949 else
stevep 0:04a9f72bbca7 950 result = nIndex;
stevep 0:04a9f72bbca7 951
stevep 0:04a9f72bbca7 952 #endif
stevep 0:04a9f72bbca7 953
stevep 0:04a9f72bbca7 954
stevep 0:04a9f72bbca7 955 #ifdef __GNUC__
stevep 0:04a9f72bbca7 956 uint dummy;
stevep 0:04a9f72bbca7 957
stevep 0:04a9f72bbca7 958 __asm__ (
stevep 0:04a9f72bbca7 959
stevep 0:04a9f72bbca7 960 "movq $-1, %1 \n"
stevep 0:04a9f72bbca7 961 "bsfq %2, %0 \n"
stevep 0:04a9f72bbca7 962 "cmovz %1, %0 \n"
stevep 0:04a9f72bbca7 963
stevep 0:04a9f72bbca7 964 : "=r" (result), "=&r" (dummy)
stevep 0:04a9f72bbca7 965 : "r" (x)
stevep 0:04a9f72bbca7 966 : "cc" );
stevep 0:04a9f72bbca7 967
stevep 0:04a9f72bbca7 968 #endif
stevep 0:04a9f72bbca7 969
stevep 0:04a9f72bbca7 970
stevep 0:04a9f72bbca7 971 return result;
stevep 0:04a9f72bbca7 972 }
stevep 0:04a9f72bbca7 973
stevep 0:04a9f72bbca7 974
stevep 0:04a9f72bbca7 975 /*!
stevep 0:04a9f72bbca7 976 this method sets a special bit in the 'value'
stevep 0:04a9f72bbca7 977 and returns the last state of the bit (zero or one)
stevep 0:04a9f72bbca7 978
stevep 0:04a9f72bbca7 979 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 980
stevep 0:04a9f72bbca7 981 bit is from <0,63>
stevep 0:04a9f72bbca7 982
stevep 0:04a9f72bbca7 983 e.g.
stevep 0:04a9f72bbca7 984 uint x = 100;
stevep 0:04a9f72bbca7 985 uint bit = SetBitInWord(x, 3);
stevep 0:04a9f72bbca7 986 now: x = 108 and bit = 0
stevep 0:04a9f72bbca7 987 */
stevep 0:04a9f72bbca7 988 template<uint value_size>
stevep 0:04a9f72bbca7 989 uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
stevep 0:04a9f72bbca7 990 {
stevep 0:04a9f72bbca7 991 TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
stevep 0:04a9f72bbca7 992
stevep 0:04a9f72bbca7 993 uint old_bit;
stevep 0:04a9f72bbca7 994 uint v = value;
stevep 0:04a9f72bbca7 995
stevep 0:04a9f72bbca7 996
stevep 0:04a9f72bbca7 997 #ifndef __GNUC__
stevep 0:04a9f72bbca7 998 old_bit = _bittestandset64((__int64*)&value,bit) != 0;
stevep 0:04a9f72bbca7 999 #endif
stevep 0:04a9f72bbca7 1000
stevep 0:04a9f72bbca7 1001
stevep 0:04a9f72bbca7 1002 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1003
stevep 0:04a9f72bbca7 1004 __asm__ (
stevep 0:04a9f72bbca7 1005
stevep 0:04a9f72bbca7 1006 "btsq %%rbx, %%rax \n"
stevep 0:04a9f72bbca7 1007 "setc %%bl \n"
stevep 0:04a9f72bbca7 1008 "movzx %%bl, %%rbx \n"
stevep 0:04a9f72bbca7 1009
stevep 0:04a9f72bbca7 1010 : "=a" (v), "=b" (old_bit)
stevep 0:04a9f72bbca7 1011 : "0" (v), "1" (bit)
stevep 0:04a9f72bbca7 1012 : "cc" );
stevep 0:04a9f72bbca7 1013
stevep 0:04a9f72bbca7 1014 #endif
stevep 0:04a9f72bbca7 1015
stevep 0:04a9f72bbca7 1016 value = v;
stevep 0:04a9f72bbca7 1017
stevep 0:04a9f72bbca7 1018 return old_bit;
stevep 0:04a9f72bbca7 1019 }
stevep 0:04a9f72bbca7 1020
stevep 0:04a9f72bbca7 1021
stevep 0:04a9f72bbca7 1022 /*!
stevep 0:04a9f72bbca7 1023 *
stevep 0:04a9f72bbca7 1024 * Multiplication
stevep 0:04a9f72bbca7 1025 *
stevep 0:04a9f72bbca7 1026 *
stevep 0:04a9f72bbca7 1027 */
stevep 0:04a9f72bbca7 1028
stevep 0:04a9f72bbca7 1029
stevep 0:04a9f72bbca7 1030 /*!
stevep 0:04a9f72bbca7 1031 multiplication: result_high:result_low = a * b
stevep 0:04a9f72bbca7 1032 result_high - higher word of the result
stevep 0:04a9f72bbca7 1033 result_low - lower word of the result
stevep 0:04a9f72bbca7 1034
stevep 0:04a9f72bbca7 1035 this methos never returns a carry
stevep 0:04a9f72bbca7 1036 this method is used in the second version of the multiplication algorithms
stevep 0:04a9f72bbca7 1037
stevep 0:04a9f72bbca7 1038 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 1039 */
stevep 0:04a9f72bbca7 1040 template<uint value_size>
stevep 0:04a9f72bbca7 1041 void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
stevep 0:04a9f72bbca7 1042 {
stevep 0:04a9f72bbca7 1043 /*
stevep 0:04a9f72bbca7 1044 we must use these temporary variables in order to inform the compilator
stevep 0:04a9f72bbca7 1045 that value pointed with result1 and result2 has changed
stevep 0:04a9f72bbca7 1046
stevep 0:04a9f72bbca7 1047 this has no effect in visual studio but it's usefull when
stevep 0:04a9f72bbca7 1048 using gcc and options like -O
stevep 0:04a9f72bbca7 1049 */
stevep 0:04a9f72bbca7 1050 uint result1_;
stevep 0:04a9f72bbca7 1051 uint result2_;
stevep 0:04a9f72bbca7 1052
stevep 0:04a9f72bbca7 1053
stevep 0:04a9f72bbca7 1054 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1055 result1_ = _umul128(a,b,&result2_);
stevep 0:04a9f72bbca7 1056 #endif
stevep 0:04a9f72bbca7 1057
stevep 0:04a9f72bbca7 1058
stevep 0:04a9f72bbca7 1059 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1060
stevep 0:04a9f72bbca7 1061 __asm__ (
stevep 0:04a9f72bbca7 1062
stevep 0:04a9f72bbca7 1063 "mulq %%rdx \n"
stevep 0:04a9f72bbca7 1064
stevep 0:04a9f72bbca7 1065 : "=a" (result1_), "=d" (result2_)
stevep 0:04a9f72bbca7 1066 : "0" (a), "1" (b)
stevep 0:04a9f72bbca7 1067 : "cc" );
stevep 0:04a9f72bbca7 1068
stevep 0:04a9f72bbca7 1069 #endif
stevep 0:04a9f72bbca7 1070
stevep 0:04a9f72bbca7 1071
stevep 0:04a9f72bbca7 1072 *result_low = result1_;
stevep 0:04a9f72bbca7 1073 *result_high = result2_;
stevep 0:04a9f72bbca7 1074 }
stevep 0:04a9f72bbca7 1075
stevep 0:04a9f72bbca7 1076
stevep 0:04a9f72bbca7 1077
stevep 0:04a9f72bbca7 1078
stevep 0:04a9f72bbca7 1079 /*!
stevep 0:04a9f72bbca7 1080 *
stevep 0:04a9f72bbca7 1081 * Division
stevep 0:04a9f72bbca7 1082 *
stevep 0:04a9f72bbca7 1083 *
stevep 0:04a9f72bbca7 1084 */
stevep 0:04a9f72bbca7 1085
stevep 0:04a9f72bbca7 1086
stevep 0:04a9f72bbca7 1087 /*!
stevep 0:04a9f72bbca7 1088 this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
stevep 0:04a9f72bbca7 1089 r = a:b / c and rest - remainder
stevep 0:04a9f72bbca7 1090
stevep 0:04a9f72bbca7 1091 ***this method is created only on a 64bit platform***
stevep 0:04a9f72bbca7 1092
stevep 0:04a9f72bbca7 1093 *
stevep 0:04a9f72bbca7 1094 * WARNING:
stevep 0:04a9f72bbca7 1095 * if r (one word) is too small for the result or c is equal zero
stevep 0:04a9f72bbca7 1096 * there'll be a hardware interruption (0)
stevep 0:04a9f72bbca7 1097 * and probably the end of your program
stevep 0:04a9f72bbca7 1098 *
stevep 0:04a9f72bbca7 1099 */
stevep 0:04a9f72bbca7 1100 template<uint value_size>
stevep 0:04a9f72bbca7 1101 void UInt<value_size>::DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest)
stevep 0:04a9f72bbca7 1102 {
stevep 0:04a9f72bbca7 1103 uint r_;
stevep 0:04a9f72bbca7 1104 uint rest_;
stevep 0:04a9f72bbca7 1105 /*
stevep 0:04a9f72bbca7 1106 these variables have similar meaning like those in
stevep 0:04a9f72bbca7 1107 the multiplication algorithm MulTwoWords
stevep 0:04a9f72bbca7 1108 */
stevep 0:04a9f72bbca7 1109
stevep 0:04a9f72bbca7 1110 TTMATH_ASSERT( c != 0 )
stevep 0:04a9f72bbca7 1111
stevep 0:04a9f72bbca7 1112
stevep 0:04a9f72bbca7 1113 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1114
stevep 0:04a9f72bbca7 1115 ttmath_div_x64(&a,&b,c);
stevep 0:04a9f72bbca7 1116 r_ = a;
stevep 0:04a9f72bbca7 1117 rest_ = b;
stevep 0:04a9f72bbca7 1118
stevep 0:04a9f72bbca7 1119 #endif
stevep 0:04a9f72bbca7 1120
stevep 0:04a9f72bbca7 1121
stevep 0:04a9f72bbca7 1122 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1123
stevep 0:04a9f72bbca7 1124 __asm__ (
stevep 0:04a9f72bbca7 1125
stevep 0:04a9f72bbca7 1126 "divq %%rcx \n"
stevep 0:04a9f72bbca7 1127
stevep 0:04a9f72bbca7 1128 : "=a" (r_), "=d" (rest_)
stevep 0:04a9f72bbca7 1129 : "d" (a), "a" (b), "c" (c)
stevep 0:04a9f72bbca7 1130 : "cc" );
stevep 0:04a9f72bbca7 1131
stevep 0:04a9f72bbca7 1132 #endif
stevep 0:04a9f72bbca7 1133
stevep 0:04a9f72bbca7 1134
stevep 0:04a9f72bbca7 1135 *r = r_;
stevep 0:04a9f72bbca7 1136 *rest = rest_;
stevep 0:04a9f72bbca7 1137 }
stevep 0:04a9f72bbca7 1138
stevep 0:04a9f72bbca7 1139 } //namespace
stevep 0:04a9f72bbca7 1140
stevep 0:04a9f72bbca7 1141
stevep 0:04a9f72bbca7 1142 #endif //ifdef TTMATH_PLATFORM64
stevep 0:04a9f72bbca7 1143 #endif //ifndef TTMATH_NOASM
stevep 0:04a9f72bbca7 1144 #endif
stevep 0:04a9f72bbca7 1145
stevep 0:04a9f72bbca7 1146
stevep 0:04a9f72bbca7 1147