Library for big numbers from http://www.ttmath.org/

Dependents:   PIDHeater82 Conceptcontroller_v_1_0 AlarmClockApp COG4050_adxl355_tilt ... more

TTMath is a small library which allows one to perform arithmetic operations with big unsigned integer, big signed integer and big floating point numbers. It provides standard mathematical operations like adding, subtracting, multiplying, dividing.

TTMath is BSD Licensed (new/modified BSD)

For more information about ttmath see http://www.ttmath.org/

Committer:
stevep
Date:
Tue Jul 30 18:43:48 2013 +0000
Revision:
0:04a9f72bbca7
v0.9.3 of ttmath

Who changed what in which revision?

UserRevisionLine numberNew contents of line
stevep 0:04a9f72bbca7 1 /*
stevep 0:04a9f72bbca7 2 * This file is a part of TTMath Bignum Library
stevep 0:04a9f72bbca7 3 * and is distributed under the (new) BSD licence.
stevep 0:04a9f72bbca7 4 * Author: Tomasz Sowa <t.sowa@ttmath.org>
stevep 0:04a9f72bbca7 5 */
stevep 0:04a9f72bbca7 6
stevep 0:04a9f72bbca7 7 /*
stevep 0:04a9f72bbca7 8 * Copyright (c) 2006-2009, Tomasz Sowa
stevep 0:04a9f72bbca7 9 * All rights reserved.
stevep 0:04a9f72bbca7 10 *
stevep 0:04a9f72bbca7 11 * Redistribution and use in source and binary forms, with or without
stevep 0:04a9f72bbca7 12 * modification, are permitted provided that the following conditions are met:
stevep 0:04a9f72bbca7 13 *
stevep 0:04a9f72bbca7 14 * * Redistributions of source code must retain the above copyright notice,
stevep 0:04a9f72bbca7 15 * this list of conditions and the following disclaimer.
stevep 0:04a9f72bbca7 16 *
stevep 0:04a9f72bbca7 17 * * Redistributions in binary form must reproduce the above copyright
stevep 0:04a9f72bbca7 18 * notice, this list of conditions and the following disclaimer in the
stevep 0:04a9f72bbca7 19 * documentation and/or other materials provided with the distribution.
stevep 0:04a9f72bbca7 20 *
stevep 0:04a9f72bbca7 21 * * Neither the name Tomasz Sowa nor the names of contributors to this
stevep 0:04a9f72bbca7 22 * project may be used to endorse or promote products derived
stevep 0:04a9f72bbca7 23 * from this software without specific prior written permission.
stevep 0:04a9f72bbca7 24 *
stevep 0:04a9f72bbca7 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
stevep 0:04a9f72bbca7 26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
stevep 0:04a9f72bbca7 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
stevep 0:04a9f72bbca7 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
stevep 0:04a9f72bbca7 29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
stevep 0:04a9f72bbca7 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
stevep 0:04a9f72bbca7 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
stevep 0:04a9f72bbca7 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
stevep 0:04a9f72bbca7 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
stevep 0:04a9f72bbca7 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
stevep 0:04a9f72bbca7 35 * THE POSSIBILITY OF SUCH DAMAGE.
stevep 0:04a9f72bbca7 36 */
stevep 0:04a9f72bbca7 37
stevep 0:04a9f72bbca7 38
stevep 0:04a9f72bbca7 39
stevep 0:04a9f72bbca7 40 #ifndef headerfilettmathuint_x86
stevep 0:04a9f72bbca7 41 #define headerfilettmathuint_x86
stevep 0:04a9f72bbca7 42
stevep 0:04a9f72bbca7 43
stevep 0:04a9f72bbca7 44 #ifndef TTMATH_NOASM
stevep 0:04a9f72bbca7 45 #ifdef TTMATH_PLATFORM32
stevep 0:04a9f72bbca7 46
stevep 0:04a9f72bbca7 47
stevep 0:04a9f72bbca7 48 /*!
stevep 0:04a9f72bbca7 49 \file ttmathuint_x86.h
stevep 0:04a9f72bbca7 50 \brief template class UInt<uint> with assembler code for 32bit x86 processors
stevep 0:04a9f72bbca7 51
stevep 0:04a9f72bbca7 52 this file is included at the end of ttmathuint.h
stevep 0:04a9f72bbca7 53 */
stevep 0:04a9f72bbca7 54
stevep 0:04a9f72bbca7 55
stevep 0:04a9f72bbca7 56
stevep 0:04a9f72bbca7 57 /*!
stevep 0:04a9f72bbca7 58 \brief a namespace for the TTMath library
stevep 0:04a9f72bbca7 59 */
stevep 0:04a9f72bbca7 60 namespace ttmath
stevep 0:04a9f72bbca7 61 {
stevep 0:04a9f72bbca7 62
stevep 0:04a9f72bbca7 63 /*!
stevep 0:04a9f72bbca7 64 returning the string represents the currect type of the library
stevep 0:04a9f72bbca7 65 we have following types:
stevep 0:04a9f72bbca7 66 asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
stevep 0:04a9f72bbca7 67 asm_gcc_32 - with asm code designed for GCC (32 bits)
stevep 0:04a9f72bbca7 68 asm_vc_64 - with asm for VC (64 bit)
stevep 0:04a9f72bbca7 69 asm_gcc_64 - with asm for GCC (64 bit)
stevep 0:04a9f72bbca7 70 no_asm_32 - pure C++ version (32 bit) - without any asm code
stevep 0:04a9f72bbca7 71 no_asm_64 - pure C++ version (64 bit) - without any asm code
stevep 0:04a9f72bbca7 72 */
stevep 0:04a9f72bbca7 73 template<uint value_size>
stevep 0:04a9f72bbca7 74 const char * UInt<value_size>::LibTypeStr()
stevep 0:04a9f72bbca7 75 {
stevep 0:04a9f72bbca7 76 #ifndef __GNUC__
stevep 0:04a9f72bbca7 77 static const char info[] = "asm_vc_32";
stevep 0:04a9f72bbca7 78 #endif
stevep 0:04a9f72bbca7 79
stevep 0:04a9f72bbca7 80 #ifdef __GNUC__
stevep 0:04a9f72bbca7 81 static const char info[] = "asm_gcc_32";
stevep 0:04a9f72bbca7 82 #endif
stevep 0:04a9f72bbca7 83
stevep 0:04a9f72bbca7 84 return info;
stevep 0:04a9f72bbca7 85 }
stevep 0:04a9f72bbca7 86
stevep 0:04a9f72bbca7 87
stevep 0:04a9f72bbca7 88 /*!
stevep 0:04a9f72bbca7 89 returning the currect type of the library
stevep 0:04a9f72bbca7 90 */
stevep 0:04a9f72bbca7 91 template<uint value_size>
stevep 0:04a9f72bbca7 92 LibTypeCode UInt<value_size>::LibType()
stevep 0:04a9f72bbca7 93 {
stevep 0:04a9f72bbca7 94 #ifndef __GNUC__
stevep 0:04a9f72bbca7 95 LibTypeCode info = asm_vc_32;
stevep 0:04a9f72bbca7 96 #endif
stevep 0:04a9f72bbca7 97
stevep 0:04a9f72bbca7 98 #ifdef __GNUC__
stevep 0:04a9f72bbca7 99 LibTypeCode info = asm_gcc_32;
stevep 0:04a9f72bbca7 100 #endif
stevep 0:04a9f72bbca7 101
stevep 0:04a9f72bbca7 102 return info;
stevep 0:04a9f72bbca7 103 }
stevep 0:04a9f72bbca7 104
stevep 0:04a9f72bbca7 105
stevep 0:04a9f72bbca7 106
stevep 0:04a9f72bbca7 107 /*!
stevep 0:04a9f72bbca7 108 *
stevep 0:04a9f72bbca7 109 * basic mathematic functions
stevep 0:04a9f72bbca7 110 *
stevep 0:04a9f72bbca7 111 */
stevep 0:04a9f72bbca7 112
stevep 0:04a9f72bbca7 113
stevep 0:04a9f72bbca7 114 /*!
stevep 0:04a9f72bbca7 115 adding ss2 to the this and adding carry if it's defined
stevep 0:04a9f72bbca7 116 (this = this + ss2 + c)
stevep 0:04a9f72bbca7 117
stevep 0:04a9f72bbca7 118 c must be zero or one (might be a bigger value than 1)
stevep 0:04a9f72bbca7 119 function returns carry (1) (if it has been)
stevep 0:04a9f72bbca7 120 */
stevep 0:04a9f72bbca7 121 template<uint value_size>
stevep 0:04a9f72bbca7 122 uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
stevep 0:04a9f72bbca7 123 {
stevep 0:04a9f72bbca7 124 uint b = value_size;
stevep 0:04a9f72bbca7 125 uint * p1 = table;
stevep 0:04a9f72bbca7 126 uint * p2 = const_cast<uint*>(ss2.table);
stevep 0:04a9f72bbca7 127
stevep 0:04a9f72bbca7 128 // we don't have to use TTMATH_REFERENCE_ASSERT here
stevep 0:04a9f72bbca7 129 // this algorithm doesn't require it
stevep 0:04a9f72bbca7 130
stevep 0:04a9f72bbca7 131 #ifndef __GNUC__
stevep 0:04a9f72bbca7 132
stevep 0:04a9f72bbca7 133 // this part might be compiled with for example visual c
stevep 0:04a9f72bbca7 134
stevep 0:04a9f72bbca7 135 __asm
stevep 0:04a9f72bbca7 136 {
stevep 0:04a9f72bbca7 137 push eax
stevep 0:04a9f72bbca7 138 push ebx
stevep 0:04a9f72bbca7 139 push ecx
stevep 0:04a9f72bbca7 140 push edx
stevep 0:04a9f72bbca7 141 push esi
stevep 0:04a9f72bbca7 142
stevep 0:04a9f72bbca7 143 mov ecx,[b]
stevep 0:04a9f72bbca7 144
stevep 0:04a9f72bbca7 145 mov ebx,[p1]
stevep 0:04a9f72bbca7 146 mov esi,[p2]
stevep 0:04a9f72bbca7 147
stevep 0:04a9f72bbca7 148 xor edx,edx // edx=0
stevep 0:04a9f72bbca7 149 mov eax,[c]
stevep 0:04a9f72bbca7 150 neg eax // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 151
stevep 0:04a9f72bbca7 152 ttmath_loop:
stevep 0:04a9f72bbca7 153 mov eax,[esi+edx*4]
stevep 0:04a9f72bbca7 154 adc [ebx+edx*4],eax
stevep 0:04a9f72bbca7 155
stevep 0:04a9f72bbca7 156 inc edx
stevep 0:04a9f72bbca7 157 dec ecx
stevep 0:04a9f72bbca7 158 jnz ttmath_loop
stevep 0:04a9f72bbca7 159
stevep 0:04a9f72bbca7 160 adc ecx, ecx
stevep 0:04a9f72bbca7 161 mov [c], ecx
stevep 0:04a9f72bbca7 162
stevep 0:04a9f72bbca7 163 pop esi
stevep 0:04a9f72bbca7 164 pop edx
stevep 0:04a9f72bbca7 165 pop ecx
stevep 0:04a9f72bbca7 166 pop ebx
stevep 0:04a9f72bbca7 167 pop eax
stevep 0:04a9f72bbca7 168 }
stevep 0:04a9f72bbca7 169
stevep 0:04a9f72bbca7 170
stevep 0:04a9f72bbca7 171
stevep 0:04a9f72bbca7 172 #endif
stevep 0:04a9f72bbca7 173
stevep 0:04a9f72bbca7 174
stevep 0:04a9f72bbca7 175 #ifdef __GNUC__
stevep 0:04a9f72bbca7 176 uint dummy, dummy2;
stevep 0:04a9f72bbca7 177 // this part should be compiled with gcc
stevep 0:04a9f72bbca7 178
stevep 0:04a9f72bbca7 179 __asm__ __volatile__(
stevep 0:04a9f72bbca7 180
stevep 0:04a9f72bbca7 181 "xorl %%edx, %%edx \n"
stevep 0:04a9f72bbca7 182 "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 183
stevep 0:04a9f72bbca7 184 "1: \n"
stevep 0:04a9f72bbca7 185 "movl (%%esi,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 186 "adcl %%eax, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 187
stevep 0:04a9f72bbca7 188 "incl %%edx \n"
stevep 0:04a9f72bbca7 189 "decl %%ecx \n"
stevep 0:04a9f72bbca7 190 "jnz 1b \n"
stevep 0:04a9f72bbca7 191
stevep 0:04a9f72bbca7 192 "adc %%ecx, %%ecx \n"
stevep 0:04a9f72bbca7 193
stevep 0:04a9f72bbca7 194 : "=c" (c), "=a" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 195 : "0" (b), "1" (c), "b" (p1), "S" (p2)
stevep 0:04a9f72bbca7 196 : "cc", "memory" );
stevep 0:04a9f72bbca7 197 #endif
stevep 0:04a9f72bbca7 198
stevep 0:04a9f72bbca7 199 TTMATH_LOGC("UInt::Add", c)
stevep 0:04a9f72bbca7 200
stevep 0:04a9f72bbca7 201 return c;
stevep 0:04a9f72bbca7 202 }
stevep 0:04a9f72bbca7 203
stevep 0:04a9f72bbca7 204
stevep 0:04a9f72bbca7 205
stevep 0:04a9f72bbca7 206 /*!
stevep 0:04a9f72bbca7 207 adding one word (at a specific position)
stevep 0:04a9f72bbca7 208 and returning a carry (if it has been)
stevep 0:04a9f72bbca7 209
stevep 0:04a9f72bbca7 210 e.g.
stevep 0:04a9f72bbca7 211
stevep 0:04a9f72bbca7 212 if we've got (value_size=3):
stevep 0:04a9f72bbca7 213 table[0] = 10;
stevep 0:04a9f72bbca7 214 table[1] = 30;
stevep 0:04a9f72bbca7 215 table[2] = 5;
stevep 0:04a9f72bbca7 216 and we call:
stevep 0:04a9f72bbca7 217 AddInt(2,1)
stevep 0:04a9f72bbca7 218 then it'll be:
stevep 0:04a9f72bbca7 219 table[0] = 10;
stevep 0:04a9f72bbca7 220 table[1] = 30 + 2;
stevep 0:04a9f72bbca7 221 table[2] = 5;
stevep 0:04a9f72bbca7 222
stevep 0:04a9f72bbca7 223 of course if there was a carry from table[2] it would be returned
stevep 0:04a9f72bbca7 224 */
stevep 0:04a9f72bbca7 225 template<uint value_size>
stevep 0:04a9f72bbca7 226 uint UInt<value_size>::AddInt(uint value, uint index)
stevep 0:04a9f72bbca7 227 {
stevep 0:04a9f72bbca7 228 uint b = value_size;
stevep 0:04a9f72bbca7 229 uint * p1 = table;
stevep 0:04a9f72bbca7 230 uint c;
stevep 0:04a9f72bbca7 231
stevep 0:04a9f72bbca7 232 TTMATH_ASSERT( index < value_size )
stevep 0:04a9f72bbca7 233
stevep 0:04a9f72bbca7 234 #ifndef __GNUC__
stevep 0:04a9f72bbca7 235
stevep 0:04a9f72bbca7 236 __asm
stevep 0:04a9f72bbca7 237 {
stevep 0:04a9f72bbca7 238 push eax
stevep 0:04a9f72bbca7 239 push ebx
stevep 0:04a9f72bbca7 240 push ecx
stevep 0:04a9f72bbca7 241 push edx
stevep 0:04a9f72bbca7 242
stevep 0:04a9f72bbca7 243 mov ecx, [b]
stevep 0:04a9f72bbca7 244 sub ecx, [index]
stevep 0:04a9f72bbca7 245
stevep 0:04a9f72bbca7 246 mov edx, [index]
stevep 0:04a9f72bbca7 247 mov ebx, [p1]
stevep 0:04a9f72bbca7 248
stevep 0:04a9f72bbca7 249 mov eax, [value]
stevep 0:04a9f72bbca7 250
stevep 0:04a9f72bbca7 251 ttmath_loop:
stevep 0:04a9f72bbca7 252 add [ebx+edx*4], eax
stevep 0:04a9f72bbca7 253 jnc ttmath_end
stevep 0:04a9f72bbca7 254
stevep 0:04a9f72bbca7 255 mov eax, 1
stevep 0:04a9f72bbca7 256 inc edx
stevep 0:04a9f72bbca7 257 dec ecx
stevep 0:04a9f72bbca7 258 jnz ttmath_loop
stevep 0:04a9f72bbca7 259
stevep 0:04a9f72bbca7 260 ttmath_end:
stevep 0:04a9f72bbca7 261 setc al
stevep 0:04a9f72bbca7 262 movzx edx, al
stevep 0:04a9f72bbca7 263 mov [c], edx
stevep 0:04a9f72bbca7 264
stevep 0:04a9f72bbca7 265 pop edx
stevep 0:04a9f72bbca7 266 pop ecx
stevep 0:04a9f72bbca7 267 pop ebx
stevep 0:04a9f72bbca7 268 pop eax
stevep 0:04a9f72bbca7 269 }
stevep 0:04a9f72bbca7 270
stevep 0:04a9f72bbca7 271 #endif
stevep 0:04a9f72bbca7 272
stevep 0:04a9f72bbca7 273
stevep 0:04a9f72bbca7 274 #ifdef __GNUC__
stevep 0:04a9f72bbca7 275 uint dummy, dummy2;
stevep 0:04a9f72bbca7 276
stevep 0:04a9f72bbca7 277 __asm__ __volatile__(
stevep 0:04a9f72bbca7 278
stevep 0:04a9f72bbca7 279 "subl %%edx, %%ecx \n"
stevep 0:04a9f72bbca7 280
stevep 0:04a9f72bbca7 281 "1: \n"
stevep 0:04a9f72bbca7 282 "addl %%eax, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 283 "jnc 2f \n"
stevep 0:04a9f72bbca7 284
stevep 0:04a9f72bbca7 285 "movl $1, %%eax \n"
stevep 0:04a9f72bbca7 286 "incl %%edx \n"
stevep 0:04a9f72bbca7 287 "decl %%ecx \n"
stevep 0:04a9f72bbca7 288 "jnz 1b \n"
stevep 0:04a9f72bbca7 289
stevep 0:04a9f72bbca7 290 "2: \n"
stevep 0:04a9f72bbca7 291 "setc %%al \n"
stevep 0:04a9f72bbca7 292 "movzx %%al, %%edx \n"
stevep 0:04a9f72bbca7 293
stevep 0:04a9f72bbca7 294 : "=d" (c), "=a" (dummy), "=c" (dummy2)
stevep 0:04a9f72bbca7 295 : "0" (index), "1" (value), "2" (b), "b" (p1)
stevep 0:04a9f72bbca7 296 : "cc", "memory" );
stevep 0:04a9f72bbca7 297
stevep 0:04a9f72bbca7 298 #endif
stevep 0:04a9f72bbca7 299
stevep 0:04a9f72bbca7 300 TTMATH_LOGC("UInt::AddInt", c)
stevep 0:04a9f72bbca7 301
stevep 0:04a9f72bbca7 302 return c;
stevep 0:04a9f72bbca7 303 }
stevep 0:04a9f72bbca7 304
stevep 0:04a9f72bbca7 305
stevep 0:04a9f72bbca7 306
stevep 0:04a9f72bbca7 307
stevep 0:04a9f72bbca7 308 /*!
stevep 0:04a9f72bbca7 309 adding only two unsigned words to the existing value
stevep 0:04a9f72bbca7 310 and these words begin on the 'index' position
stevep 0:04a9f72bbca7 311 (it's used in the multiplication algorithm 2)
stevep 0:04a9f72bbca7 312
stevep 0:04a9f72bbca7 313 index should be equal or smaller than value_size-2 (index <= value_size-2)
stevep 0:04a9f72bbca7 314 x1 - lower word, x2 - higher word
stevep 0:04a9f72bbca7 315
stevep 0:04a9f72bbca7 316 for example if we've got value_size equal 4 and:
stevep 0:04a9f72bbca7 317 table[0] = 3
stevep 0:04a9f72bbca7 318 table[1] = 4
stevep 0:04a9f72bbca7 319 table[2] = 5
stevep 0:04a9f72bbca7 320 table[3] = 6
stevep 0:04a9f72bbca7 321 then let
stevep 0:04a9f72bbca7 322 x1 = 10
stevep 0:04a9f72bbca7 323 x2 = 20
stevep 0:04a9f72bbca7 324 and
stevep 0:04a9f72bbca7 325 index = 1
stevep 0:04a9f72bbca7 326
stevep 0:04a9f72bbca7 327 the result of this method will be:
stevep 0:04a9f72bbca7 328 table[0] = 3
stevep 0:04a9f72bbca7 329 table[1] = 4 + x1 = 14
stevep 0:04a9f72bbca7 330 table[2] = 5 + x2 = 25
stevep 0:04a9f72bbca7 331 table[3] = 6
stevep 0:04a9f72bbca7 332
stevep 0:04a9f72bbca7 333 and no carry at the end of table[3]
stevep 0:04a9f72bbca7 334
stevep 0:04a9f72bbca7 335 (of course if there was a carry in table[2](5+20) then
stevep 0:04a9f72bbca7 336 this carry would be passed to the table[3] etc.)
stevep 0:04a9f72bbca7 337 */
stevep 0:04a9f72bbca7 338 template<uint value_size>
stevep 0:04a9f72bbca7 339 uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
stevep 0:04a9f72bbca7 340 {
stevep 0:04a9f72bbca7 341 uint b = value_size;
stevep 0:04a9f72bbca7 342 uint * p1 = table;
stevep 0:04a9f72bbca7 343 uint c;
stevep 0:04a9f72bbca7 344
stevep 0:04a9f72bbca7 345 TTMATH_ASSERT( index < value_size - 1 )
stevep 0:04a9f72bbca7 346
stevep 0:04a9f72bbca7 347 #ifndef __GNUC__
stevep 0:04a9f72bbca7 348 __asm
stevep 0:04a9f72bbca7 349 {
stevep 0:04a9f72bbca7 350 push eax
stevep 0:04a9f72bbca7 351 push ebx
stevep 0:04a9f72bbca7 352 push ecx
stevep 0:04a9f72bbca7 353 push edx
stevep 0:04a9f72bbca7 354
stevep 0:04a9f72bbca7 355 mov ecx, [b]
stevep 0:04a9f72bbca7 356 sub ecx, [index]
stevep 0:04a9f72bbca7 357
stevep 0:04a9f72bbca7 358 mov ebx, [p1]
stevep 0:04a9f72bbca7 359 mov edx, [index]
stevep 0:04a9f72bbca7 360
stevep 0:04a9f72bbca7 361 mov eax, [x1]
stevep 0:04a9f72bbca7 362 add [ebx+edx*4], eax
stevep 0:04a9f72bbca7 363 inc edx
stevep 0:04a9f72bbca7 364 dec ecx
stevep 0:04a9f72bbca7 365
stevep 0:04a9f72bbca7 366 mov eax, [x2]
stevep 0:04a9f72bbca7 367
stevep 0:04a9f72bbca7 368 ttmath_loop:
stevep 0:04a9f72bbca7 369 adc [ebx+edx*4], eax
stevep 0:04a9f72bbca7 370 jnc ttmath_end
stevep 0:04a9f72bbca7 371
stevep 0:04a9f72bbca7 372 mov eax, 0
stevep 0:04a9f72bbca7 373 inc edx
stevep 0:04a9f72bbca7 374 dec ecx
stevep 0:04a9f72bbca7 375 jnz ttmath_loop
stevep 0:04a9f72bbca7 376
stevep 0:04a9f72bbca7 377 ttmath_end:
stevep 0:04a9f72bbca7 378 setc al
stevep 0:04a9f72bbca7 379 movzx edx, al
stevep 0:04a9f72bbca7 380 mov [c], edx
stevep 0:04a9f72bbca7 381
stevep 0:04a9f72bbca7 382 pop edx
stevep 0:04a9f72bbca7 383 pop ecx
stevep 0:04a9f72bbca7 384 pop ebx
stevep 0:04a9f72bbca7 385 pop eax
stevep 0:04a9f72bbca7 386
stevep 0:04a9f72bbca7 387 }
stevep 0:04a9f72bbca7 388 #endif
stevep 0:04a9f72bbca7 389
stevep 0:04a9f72bbca7 390
stevep 0:04a9f72bbca7 391 #ifdef __GNUC__
stevep 0:04a9f72bbca7 392 uint dummy, dummy2;
stevep 0:04a9f72bbca7 393
stevep 0:04a9f72bbca7 394 __asm__ __volatile__(
stevep 0:04a9f72bbca7 395
stevep 0:04a9f72bbca7 396 "subl %%edx, %%ecx \n"
stevep 0:04a9f72bbca7 397
stevep 0:04a9f72bbca7 398 "addl %%esi, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 399 "incl %%edx \n"
stevep 0:04a9f72bbca7 400 "decl %%ecx \n"
stevep 0:04a9f72bbca7 401
stevep 0:04a9f72bbca7 402 "1: \n"
stevep 0:04a9f72bbca7 403 "adcl %%eax, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 404 "jnc 2f \n"
stevep 0:04a9f72bbca7 405
stevep 0:04a9f72bbca7 406 "mov $0, %%eax \n"
stevep 0:04a9f72bbca7 407 "incl %%edx \n"
stevep 0:04a9f72bbca7 408 "decl %%ecx \n"
stevep 0:04a9f72bbca7 409 "jnz 1b \n"
stevep 0:04a9f72bbca7 410
stevep 0:04a9f72bbca7 411 "2: \n"
stevep 0:04a9f72bbca7 412 "setc %%al \n"
stevep 0:04a9f72bbca7 413 "movzx %%al, %%eax \n"
stevep 0:04a9f72bbca7 414
stevep 0:04a9f72bbca7 415 : "=a" (c), "=c" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 416 : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
stevep 0:04a9f72bbca7 417 : "cc", "memory" );
stevep 0:04a9f72bbca7 418
stevep 0:04a9f72bbca7 419 #endif
stevep 0:04a9f72bbca7 420
stevep 0:04a9f72bbca7 421 TTMATH_LOGC("UInt::AddTwoInts", c)
stevep 0:04a9f72bbca7 422
stevep 0:04a9f72bbca7 423 return c;
stevep 0:04a9f72bbca7 424 }
stevep 0:04a9f72bbca7 425
stevep 0:04a9f72bbca7 426
stevep 0:04a9f72bbca7 427
stevep 0:04a9f72bbca7 428 /*!
stevep 0:04a9f72bbca7 429 this static method addes one vector to the other
stevep 0:04a9f72bbca7 430 'ss1' is larger in size or equal to 'ss2'
stevep 0:04a9f72bbca7 431
stevep 0:04a9f72bbca7 432 ss1 points to the first (larger) vector
stevep 0:04a9f72bbca7 433 ss2 points to the second vector
stevep 0:04a9f72bbca7 434 ss1_size - size of the ss1 (and size of the result too)
stevep 0:04a9f72bbca7 435 ss2_size - size of the ss2
stevep 0:04a9f72bbca7 436 result - is the result vector (which has size the same as ss1: ss1_size)
stevep 0:04a9f72bbca7 437
stevep 0:04a9f72bbca7 438 Example: ss1_size is 5, ss2_size is 3
stevep 0:04a9f72bbca7 439 ss1: ss2: result (output):
stevep 0:04a9f72bbca7 440 5 1 5+1
stevep 0:04a9f72bbca7 441 4 3 4+3
stevep 0:04a9f72bbca7 442 2 7 2+7
stevep 0:04a9f72bbca7 443 6 6
stevep 0:04a9f72bbca7 444 9 9
stevep 0:04a9f72bbca7 445 of course the carry is propagated and will be returned from the last item
stevep 0:04a9f72bbca7 446 (this method is used by the Karatsuba multiplication algorithm)
stevep 0:04a9f72bbca7 447 */
stevep 0:04a9f72bbca7 448 template<uint value_size>
stevep 0:04a9f72bbca7 449 uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
stevep 0:04a9f72bbca7 450 {
stevep 0:04a9f72bbca7 451 TTMATH_ASSERT( ss1_size >= ss2_size )
stevep 0:04a9f72bbca7 452
stevep 0:04a9f72bbca7 453 uint rest = ss1_size - ss2_size;
stevep 0:04a9f72bbca7 454 uint c;
stevep 0:04a9f72bbca7 455
stevep 0:04a9f72bbca7 456 #ifndef __GNUC__
stevep 0:04a9f72bbca7 457
stevep 0:04a9f72bbca7 458 // this part might be compiled with for example visual c
stevep 0:04a9f72bbca7 459 __asm
stevep 0:04a9f72bbca7 460 {
stevep 0:04a9f72bbca7 461 pushad
stevep 0:04a9f72bbca7 462
stevep 0:04a9f72bbca7 463 mov ecx, [ss2_size]
stevep 0:04a9f72bbca7 464 xor edx, edx // edx = 0, cf = 0
stevep 0:04a9f72bbca7 465
stevep 0:04a9f72bbca7 466 mov esi, [ss1]
stevep 0:04a9f72bbca7 467 mov ebx, [ss2]
stevep 0:04a9f72bbca7 468 mov edi, [result]
stevep 0:04a9f72bbca7 469
stevep 0:04a9f72bbca7 470 ttmath_loop:
stevep 0:04a9f72bbca7 471 mov eax, [esi+edx*4]
stevep 0:04a9f72bbca7 472 adc eax, [ebx+edx*4]
stevep 0:04a9f72bbca7 473 mov [edi+edx*4], eax
stevep 0:04a9f72bbca7 474
stevep 0:04a9f72bbca7 475 inc edx
stevep 0:04a9f72bbca7 476 dec ecx
stevep 0:04a9f72bbca7 477 jnz ttmath_loop
stevep 0:04a9f72bbca7 478
stevep 0:04a9f72bbca7 479 adc ecx, ecx // ecx has the cf state
stevep 0:04a9f72bbca7 480
stevep 0:04a9f72bbca7 481 mov ebx, [rest]
stevep 0:04a9f72bbca7 482 or ebx, ebx
stevep 0:04a9f72bbca7 483 jz ttmath_end
stevep 0:04a9f72bbca7 484
stevep 0:04a9f72bbca7 485 xor ebx, ebx // ebx = 0
stevep 0:04a9f72bbca7 486 neg ecx // setting cf from ecx
stevep 0:04a9f72bbca7 487 mov ecx, [rest] // ecx is != 0
stevep 0:04a9f72bbca7 488
stevep 0:04a9f72bbca7 489 ttmath_loop2:
stevep 0:04a9f72bbca7 490 mov eax, [esi+edx*4]
stevep 0:04a9f72bbca7 491 adc eax, ebx
stevep 0:04a9f72bbca7 492 mov [edi+edx*4], eax
stevep 0:04a9f72bbca7 493
stevep 0:04a9f72bbca7 494 inc edx
stevep 0:04a9f72bbca7 495 dec ecx
stevep 0:04a9f72bbca7 496 jnz ttmath_loop2
stevep 0:04a9f72bbca7 497
stevep 0:04a9f72bbca7 498 adc ecx, ecx
stevep 0:04a9f72bbca7 499
stevep 0:04a9f72bbca7 500 ttmath_end:
stevep 0:04a9f72bbca7 501 mov [c], ecx
stevep 0:04a9f72bbca7 502
stevep 0:04a9f72bbca7 503 popad
stevep 0:04a9f72bbca7 504 }
stevep 0:04a9f72bbca7 505
stevep 0:04a9f72bbca7 506 #endif
stevep 0:04a9f72bbca7 507
stevep 0:04a9f72bbca7 508
stevep 0:04a9f72bbca7 509 #ifdef __GNUC__
stevep 0:04a9f72bbca7 510
stevep 0:04a9f72bbca7 511 // this part should be compiled with gcc
stevep 0:04a9f72bbca7 512 uint dummy1, dummy2, dummy3;
stevep 0:04a9f72bbca7 513
stevep 0:04a9f72bbca7 514 __asm__ __volatile__(
stevep 0:04a9f72bbca7 515 "push %%edx \n"
stevep 0:04a9f72bbca7 516 "xor %%edx, %%edx \n" // edx = 0, cf = 0
stevep 0:04a9f72bbca7 517 "1: \n"
stevep 0:04a9f72bbca7 518 "mov (%%esi,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 519 "adc (%%ebx,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 520 "mov %%eax, (%%edi,%%edx,4) \n"
stevep 0:04a9f72bbca7 521
stevep 0:04a9f72bbca7 522 "inc %%edx \n"
stevep 0:04a9f72bbca7 523 "dec %%ecx \n"
stevep 0:04a9f72bbca7 524 "jnz 1b \n"
stevep 0:04a9f72bbca7 525
stevep 0:04a9f72bbca7 526 "adc %%ecx, %%ecx \n" // ecx has the cf state
stevep 0:04a9f72bbca7 527 "pop %%eax \n" // eax = rest
stevep 0:04a9f72bbca7 528
stevep 0:04a9f72bbca7 529 "or %%eax, %%eax \n"
stevep 0:04a9f72bbca7 530 "jz 3f \n"
stevep 0:04a9f72bbca7 531
stevep 0:04a9f72bbca7 532 "xor %%ebx, %%ebx \n" // ebx = 0
stevep 0:04a9f72bbca7 533 "neg %%ecx \n" // setting cf from ecx
stevep 0:04a9f72bbca7 534 "mov %%eax, %%ecx \n" // ecx=rest and is != 0
stevep 0:04a9f72bbca7 535 "2: \n"
stevep 0:04a9f72bbca7 536 "mov (%%esi, %%edx, 4), %%eax \n"
stevep 0:04a9f72bbca7 537 "adc %%ebx, %%eax \n"
stevep 0:04a9f72bbca7 538 "mov %%eax, (%%edi, %%edx, 4) \n"
stevep 0:04a9f72bbca7 539
stevep 0:04a9f72bbca7 540 "inc %%edx \n"
stevep 0:04a9f72bbca7 541 "dec %%ecx \n"
stevep 0:04a9f72bbca7 542 "jnz 2b \n"
stevep 0:04a9f72bbca7 543
stevep 0:04a9f72bbca7 544 "adc %%ecx, %%ecx \n"
stevep 0:04a9f72bbca7 545 "3: \n"
stevep 0:04a9f72bbca7 546
stevep 0:04a9f72bbca7 547 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
stevep 0:04a9f72bbca7 548 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
stevep 0:04a9f72bbca7 549 : "cc", "memory" );
stevep 0:04a9f72bbca7 550
stevep 0:04a9f72bbca7 551 #endif
stevep 0:04a9f72bbca7 552
stevep 0:04a9f72bbca7 553 TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
stevep 0:04a9f72bbca7 554
stevep 0:04a9f72bbca7 555 return c;
stevep 0:04a9f72bbca7 556 }
stevep 0:04a9f72bbca7 557
stevep 0:04a9f72bbca7 558
stevep 0:04a9f72bbca7 559 /*!
stevep 0:04a9f72bbca7 560 subtracting ss2 from the 'this' and subtracting
stevep 0:04a9f72bbca7 561 carry if it has been defined
stevep 0:04a9f72bbca7 562 (this = this - ss2 - c)
stevep 0:04a9f72bbca7 563
stevep 0:04a9f72bbca7 564 c must be zero or one (might be a bigger value than 1)
stevep 0:04a9f72bbca7 565 function returns carry (1) (if it has been)
stevep 0:04a9f72bbca7 566 */
stevep 0:04a9f72bbca7 567 template<uint value_size>
stevep 0:04a9f72bbca7 568 uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
stevep 0:04a9f72bbca7 569 {
stevep 0:04a9f72bbca7 570 uint b = value_size;
stevep 0:04a9f72bbca7 571 uint * p1 = table;
stevep 0:04a9f72bbca7 572 uint * p2 = const_cast<uint*>(ss2.table);
stevep 0:04a9f72bbca7 573
stevep 0:04a9f72bbca7 574 // we don't have to use TTMATH_REFERENCE_ASSERT here
stevep 0:04a9f72bbca7 575 // this algorithm doesn't require it
stevep 0:04a9f72bbca7 576
stevep 0:04a9f72bbca7 577 #ifndef __GNUC__
stevep 0:04a9f72bbca7 578
stevep 0:04a9f72bbca7 579 __asm
stevep 0:04a9f72bbca7 580 {
stevep 0:04a9f72bbca7 581 push eax
stevep 0:04a9f72bbca7 582 push ebx
stevep 0:04a9f72bbca7 583 push ecx
stevep 0:04a9f72bbca7 584 push edx
stevep 0:04a9f72bbca7 585 push esi
stevep 0:04a9f72bbca7 586
stevep 0:04a9f72bbca7 587 mov ecx,[b]
stevep 0:04a9f72bbca7 588
stevep 0:04a9f72bbca7 589 mov ebx,[p1]
stevep 0:04a9f72bbca7 590 mov esi,[p2]
stevep 0:04a9f72bbca7 591
stevep 0:04a9f72bbca7 592 xor edx,edx // edx=0
stevep 0:04a9f72bbca7 593 mov eax,[c]
stevep 0:04a9f72bbca7 594 neg eax // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 595
stevep 0:04a9f72bbca7 596 ttmath_loop:
stevep 0:04a9f72bbca7 597 mov eax,[esi+edx*4]
stevep 0:04a9f72bbca7 598 sbb [ebx+edx*4],eax
stevep 0:04a9f72bbca7 599
stevep 0:04a9f72bbca7 600 inc edx
stevep 0:04a9f72bbca7 601 dec ecx
stevep 0:04a9f72bbca7 602 jnz ttmath_loop
stevep 0:04a9f72bbca7 603
stevep 0:04a9f72bbca7 604 adc ecx, ecx
stevep 0:04a9f72bbca7 605 mov [c], ecx
stevep 0:04a9f72bbca7 606
stevep 0:04a9f72bbca7 607 pop esi
stevep 0:04a9f72bbca7 608 pop edx
stevep 0:04a9f72bbca7 609 pop ecx
stevep 0:04a9f72bbca7 610 pop ebx
stevep 0:04a9f72bbca7 611 pop eax
stevep 0:04a9f72bbca7 612 }
stevep 0:04a9f72bbca7 613
stevep 0:04a9f72bbca7 614 #endif
stevep 0:04a9f72bbca7 615
stevep 0:04a9f72bbca7 616
stevep 0:04a9f72bbca7 617 #ifdef __GNUC__
stevep 0:04a9f72bbca7 618 uint dummy, dummy2;
stevep 0:04a9f72bbca7 619
stevep 0:04a9f72bbca7 620 __asm__ __volatile__(
stevep 0:04a9f72bbca7 621
stevep 0:04a9f72bbca7 622 "xorl %%edx, %%edx \n"
stevep 0:04a9f72bbca7 623 "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
stevep 0:04a9f72bbca7 624
stevep 0:04a9f72bbca7 625 "1: \n"
stevep 0:04a9f72bbca7 626 "movl (%%esi,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 627 "sbbl %%eax, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 628
stevep 0:04a9f72bbca7 629 "incl %%edx \n"
stevep 0:04a9f72bbca7 630 "decl %%ecx \n"
stevep 0:04a9f72bbca7 631 "jnz 1b \n"
stevep 0:04a9f72bbca7 632
stevep 0:04a9f72bbca7 633 "adc %%ecx, %%ecx \n"
stevep 0:04a9f72bbca7 634
stevep 0:04a9f72bbca7 635 : "=c" (c), "=a" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 636 : "0" (b), "1" (c), "b" (p1), "S" (p2)
stevep 0:04a9f72bbca7 637 : "cc", "memory" );
stevep 0:04a9f72bbca7 638
stevep 0:04a9f72bbca7 639 #endif
stevep 0:04a9f72bbca7 640
stevep 0:04a9f72bbca7 641 TTMATH_LOGC("UInt::Sub", c)
stevep 0:04a9f72bbca7 642
stevep 0:04a9f72bbca7 643 return c;
stevep 0:04a9f72bbca7 644 }
stevep 0:04a9f72bbca7 645
stevep 0:04a9f72bbca7 646
stevep 0:04a9f72bbca7 647
stevep 0:04a9f72bbca7 648
stevep 0:04a9f72bbca7 649 /*!
stevep 0:04a9f72bbca7 650 this method subtracts one word (at a specific position)
stevep 0:04a9f72bbca7 651 and returns a carry (if it was)
stevep 0:04a9f72bbca7 652
stevep 0:04a9f72bbca7 653 e.g.
stevep 0:04a9f72bbca7 654
stevep 0:04a9f72bbca7 655 if we've got (value_size=3):
stevep 0:04a9f72bbca7 656 table[0] = 10;
stevep 0:04a9f72bbca7 657 table[1] = 30;
stevep 0:04a9f72bbca7 658 table[2] = 5;
stevep 0:04a9f72bbca7 659 and we call:
stevep 0:04a9f72bbca7 660 SubInt(2,1)
stevep 0:04a9f72bbca7 661 then it'll be:
stevep 0:04a9f72bbca7 662 table[0] = 10;
stevep 0:04a9f72bbca7 663 table[1] = 30 - 2;
stevep 0:04a9f72bbca7 664 table[2] = 5;
stevep 0:04a9f72bbca7 665
stevep 0:04a9f72bbca7 666 of course if there was a carry from table[2] it would be returned
stevep 0:04a9f72bbca7 667 */
stevep 0:04a9f72bbca7 668 template<uint value_size>
stevep 0:04a9f72bbca7 669 uint UInt<value_size>::SubInt(uint value, uint index)
stevep 0:04a9f72bbca7 670 {
stevep 0:04a9f72bbca7 671 uint b = value_size;
stevep 0:04a9f72bbca7 672 uint * p1 = table;
stevep 0:04a9f72bbca7 673 uint c;
stevep 0:04a9f72bbca7 674
stevep 0:04a9f72bbca7 675 TTMATH_ASSERT( index < value_size )
stevep 0:04a9f72bbca7 676
stevep 0:04a9f72bbca7 677 #ifndef __GNUC__
stevep 0:04a9f72bbca7 678
stevep 0:04a9f72bbca7 679 __asm
stevep 0:04a9f72bbca7 680 {
stevep 0:04a9f72bbca7 681 push eax
stevep 0:04a9f72bbca7 682 push ebx
stevep 0:04a9f72bbca7 683 push ecx
stevep 0:04a9f72bbca7 684 push edx
stevep 0:04a9f72bbca7 685
stevep 0:04a9f72bbca7 686 mov ecx, [b]
stevep 0:04a9f72bbca7 687 sub ecx, [index]
stevep 0:04a9f72bbca7 688
stevep 0:04a9f72bbca7 689 mov edx, [index]
stevep 0:04a9f72bbca7 690 mov ebx, [p1]
stevep 0:04a9f72bbca7 691
stevep 0:04a9f72bbca7 692 mov eax, [value]
stevep 0:04a9f72bbca7 693
stevep 0:04a9f72bbca7 694 ttmath_loop:
stevep 0:04a9f72bbca7 695 sub [ebx+edx*4], eax
stevep 0:04a9f72bbca7 696 jnc ttmath_end
stevep 0:04a9f72bbca7 697
stevep 0:04a9f72bbca7 698 mov eax, 1
stevep 0:04a9f72bbca7 699 inc edx
stevep 0:04a9f72bbca7 700 dec ecx
stevep 0:04a9f72bbca7 701 jnz ttmath_loop
stevep 0:04a9f72bbca7 702
stevep 0:04a9f72bbca7 703 ttmath_end:
stevep 0:04a9f72bbca7 704 setc al
stevep 0:04a9f72bbca7 705 movzx edx, al
stevep 0:04a9f72bbca7 706 mov [c], edx
stevep 0:04a9f72bbca7 707
stevep 0:04a9f72bbca7 708 pop edx
stevep 0:04a9f72bbca7 709 pop ecx
stevep 0:04a9f72bbca7 710 pop ebx
stevep 0:04a9f72bbca7 711 pop eax
stevep 0:04a9f72bbca7 712 }
stevep 0:04a9f72bbca7 713
stevep 0:04a9f72bbca7 714 #endif
stevep 0:04a9f72bbca7 715
stevep 0:04a9f72bbca7 716
stevep 0:04a9f72bbca7 717 #ifdef __GNUC__
stevep 0:04a9f72bbca7 718 uint dummy, dummy2;
stevep 0:04a9f72bbca7 719
stevep 0:04a9f72bbca7 720 __asm__ __volatile__(
stevep 0:04a9f72bbca7 721
stevep 0:04a9f72bbca7 722 "subl %%edx, %%ecx \n"
stevep 0:04a9f72bbca7 723
stevep 0:04a9f72bbca7 724 "1: \n"
stevep 0:04a9f72bbca7 725 "subl %%eax, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 726 "jnc 2f \n"
stevep 0:04a9f72bbca7 727
stevep 0:04a9f72bbca7 728 "movl $1, %%eax \n"
stevep 0:04a9f72bbca7 729 "incl %%edx \n"
stevep 0:04a9f72bbca7 730 "decl %%ecx \n"
stevep 0:04a9f72bbca7 731 "jnz 1b \n"
stevep 0:04a9f72bbca7 732
stevep 0:04a9f72bbca7 733 "2: \n"
stevep 0:04a9f72bbca7 734 "setc %%al \n"
stevep 0:04a9f72bbca7 735 "movzx %%al, %%edx \n"
stevep 0:04a9f72bbca7 736
stevep 0:04a9f72bbca7 737 : "=d" (c), "=a" (dummy), "=c" (dummy2)
stevep 0:04a9f72bbca7 738 : "0" (index), "1" (value), "2" (b), "b" (p1)
stevep 0:04a9f72bbca7 739 : "cc", "memory" );
stevep 0:04a9f72bbca7 740
stevep 0:04a9f72bbca7 741 #endif
stevep 0:04a9f72bbca7 742
stevep 0:04a9f72bbca7 743 TTMATH_LOGC("UInt::SubInt", c)
stevep 0:04a9f72bbca7 744
stevep 0:04a9f72bbca7 745 return c;
stevep 0:04a9f72bbca7 746 }
stevep 0:04a9f72bbca7 747
stevep 0:04a9f72bbca7 748
stevep 0:04a9f72bbca7 749
stevep 0:04a9f72bbca7 750 /*!
stevep 0:04a9f72bbca7 751 this static method subtractes one vector from the other
stevep 0:04a9f72bbca7 752 'ss1' is larger in size or equal to 'ss2'
stevep 0:04a9f72bbca7 753
stevep 0:04a9f72bbca7 754 ss1 points to the first (larger) vector
stevep 0:04a9f72bbca7 755 ss2 points to the second vector
stevep 0:04a9f72bbca7 756 ss1_size - size of the ss1 (and size of the result too)
stevep 0:04a9f72bbca7 757 ss2_size - size of the ss2
stevep 0:04a9f72bbca7 758 result - is the result vector (which has size the same as ss1: ss1_size)
stevep 0:04a9f72bbca7 759
stevep 0:04a9f72bbca7 760 Example: ss1_size is 5, ss2_size is 3
stevep 0:04a9f72bbca7 761 ss1: ss2: result (output):
stevep 0:04a9f72bbca7 762 5 1 5-1
stevep 0:04a9f72bbca7 763 4 3 4-3
stevep 0:04a9f72bbca7 764 2 7 2-7
stevep 0:04a9f72bbca7 765 6 6-1 (the borrow from previous item)
stevep 0:04a9f72bbca7 766 9 9
stevep 0:04a9f72bbca7 767 return (carry): 0
stevep 0:04a9f72bbca7 768 of course the carry (borrow) is propagated and will be returned from the last item
stevep 0:04a9f72bbca7 769 (this method is used by the Karatsuba multiplication algorithm)
stevep 0:04a9f72bbca7 770 */
stevep 0:04a9f72bbca7 771 template<uint value_size>
stevep 0:04a9f72bbca7 772 uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
stevep 0:04a9f72bbca7 773 {
stevep 0:04a9f72bbca7 774 TTMATH_ASSERT( ss1_size >= ss2_size )
stevep 0:04a9f72bbca7 775
stevep 0:04a9f72bbca7 776 uint rest = ss1_size - ss2_size;
stevep 0:04a9f72bbca7 777 uint c;
stevep 0:04a9f72bbca7 778
stevep 0:04a9f72bbca7 779 #ifndef __GNUC__
stevep 0:04a9f72bbca7 780
stevep 0:04a9f72bbca7 781 // this part might be compiled with for example visual c
stevep 0:04a9f72bbca7 782
stevep 0:04a9f72bbca7 783 /*
stevep 0:04a9f72bbca7 784 the asm code is nearly the same as in AddVector
stevep 0:04a9f72bbca7 785 only two instructions 'adc' are changed to 'sbb'
stevep 0:04a9f72bbca7 786 */
stevep 0:04a9f72bbca7 787 __asm
stevep 0:04a9f72bbca7 788 {
stevep 0:04a9f72bbca7 789 pushad
stevep 0:04a9f72bbca7 790
stevep 0:04a9f72bbca7 791 mov ecx, [ss2_size]
stevep 0:04a9f72bbca7 792 xor edx, edx // edx = 0, cf = 0
stevep 0:04a9f72bbca7 793
stevep 0:04a9f72bbca7 794 mov esi, [ss1]
stevep 0:04a9f72bbca7 795 mov ebx, [ss2]
stevep 0:04a9f72bbca7 796 mov edi, [result]
stevep 0:04a9f72bbca7 797
stevep 0:04a9f72bbca7 798 ttmath_loop:
stevep 0:04a9f72bbca7 799 mov eax, [esi+edx*4]
stevep 0:04a9f72bbca7 800 sbb eax, [ebx+edx*4]
stevep 0:04a9f72bbca7 801 mov [edi+edx*4], eax
stevep 0:04a9f72bbca7 802
stevep 0:04a9f72bbca7 803 inc edx
stevep 0:04a9f72bbca7 804 dec ecx
stevep 0:04a9f72bbca7 805 jnz ttmath_loop
stevep 0:04a9f72bbca7 806
stevep 0:04a9f72bbca7 807 adc ecx, ecx // ecx has the cf state
stevep 0:04a9f72bbca7 808
stevep 0:04a9f72bbca7 809 mov ebx, [rest]
stevep 0:04a9f72bbca7 810 or ebx, ebx
stevep 0:04a9f72bbca7 811 jz ttmath_end
stevep 0:04a9f72bbca7 812
stevep 0:04a9f72bbca7 813 xor ebx, ebx // ebx = 0
stevep 0:04a9f72bbca7 814 neg ecx // setting cf from ecx
stevep 0:04a9f72bbca7 815 mov ecx, [rest] // ecx is != 0
stevep 0:04a9f72bbca7 816
stevep 0:04a9f72bbca7 817 ttmath_loop2:
stevep 0:04a9f72bbca7 818 mov eax, [esi+edx*4]
stevep 0:04a9f72bbca7 819 sbb eax, ebx
stevep 0:04a9f72bbca7 820 mov [edi+edx*4], eax
stevep 0:04a9f72bbca7 821
stevep 0:04a9f72bbca7 822 inc edx
stevep 0:04a9f72bbca7 823 dec ecx
stevep 0:04a9f72bbca7 824 jnz ttmath_loop2
stevep 0:04a9f72bbca7 825
stevep 0:04a9f72bbca7 826 adc ecx, ecx
stevep 0:04a9f72bbca7 827
stevep 0:04a9f72bbca7 828 ttmath_end:
stevep 0:04a9f72bbca7 829 mov [c], ecx
stevep 0:04a9f72bbca7 830
stevep 0:04a9f72bbca7 831 popad
stevep 0:04a9f72bbca7 832 }
stevep 0:04a9f72bbca7 833
stevep 0:04a9f72bbca7 834 #endif
stevep 0:04a9f72bbca7 835
stevep 0:04a9f72bbca7 836
stevep 0:04a9f72bbca7 837 #ifdef __GNUC__
stevep 0:04a9f72bbca7 838
stevep 0:04a9f72bbca7 839 // this part should be compiled with gcc
stevep 0:04a9f72bbca7 840 uint dummy1, dummy2, dummy3;
stevep 0:04a9f72bbca7 841
stevep 0:04a9f72bbca7 842 __asm__ __volatile__(
stevep 0:04a9f72bbca7 843 "push %%edx \n"
stevep 0:04a9f72bbca7 844 "xor %%edx, %%edx \n" // edx = 0, cf = 0
stevep 0:04a9f72bbca7 845 "1: \n"
stevep 0:04a9f72bbca7 846 "mov (%%esi,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 847 "sbb (%%ebx,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 848 "mov %%eax, (%%edi,%%edx,4) \n"
stevep 0:04a9f72bbca7 849
stevep 0:04a9f72bbca7 850 "inc %%edx \n"
stevep 0:04a9f72bbca7 851 "dec %%ecx \n"
stevep 0:04a9f72bbca7 852 "jnz 1b \n"
stevep 0:04a9f72bbca7 853
stevep 0:04a9f72bbca7 854 "adc %%ecx, %%ecx \n" // ecx has the cf state
stevep 0:04a9f72bbca7 855 "pop %%eax \n" // eax = rest
stevep 0:04a9f72bbca7 856
stevep 0:04a9f72bbca7 857 "or %%eax, %%eax \n"
stevep 0:04a9f72bbca7 858 "jz 3f \n"
stevep 0:04a9f72bbca7 859
stevep 0:04a9f72bbca7 860 "xor %%ebx, %%ebx \n" // ebx = 0
stevep 0:04a9f72bbca7 861 "neg %%ecx \n" // setting cf from ecx
stevep 0:04a9f72bbca7 862 "mov %%eax, %%ecx \n" // ecx=rest and is != 0
stevep 0:04a9f72bbca7 863 "2: \n"
stevep 0:04a9f72bbca7 864 "mov (%%esi, %%edx, 4), %%eax \n"
stevep 0:04a9f72bbca7 865 "sbb %%ebx, %%eax \n"
stevep 0:04a9f72bbca7 866 "mov %%eax, (%%edi, %%edx, 4) \n"
stevep 0:04a9f72bbca7 867
stevep 0:04a9f72bbca7 868 "inc %%edx \n"
stevep 0:04a9f72bbca7 869 "dec %%ecx \n"
stevep 0:04a9f72bbca7 870 "jnz 2b \n"
stevep 0:04a9f72bbca7 871
stevep 0:04a9f72bbca7 872 "adc %%ecx, %%ecx \n"
stevep 0:04a9f72bbca7 873 "3: \n"
stevep 0:04a9f72bbca7 874
stevep 0:04a9f72bbca7 875 : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
stevep 0:04a9f72bbca7 876 : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
stevep 0:04a9f72bbca7 877 : "cc", "memory" );
stevep 0:04a9f72bbca7 878
stevep 0:04a9f72bbca7 879 #endif
stevep 0:04a9f72bbca7 880
stevep 0:04a9f72bbca7 881 TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
stevep 0:04a9f72bbca7 882
stevep 0:04a9f72bbca7 883 return c;
stevep 0:04a9f72bbca7 884 }
stevep 0:04a9f72bbca7 885
stevep 0:04a9f72bbca7 886
stevep 0:04a9f72bbca7 887
stevep 0:04a9f72bbca7 888 /*!
stevep 0:04a9f72bbca7 889 this method moves all bits into the left hand side
stevep 0:04a9f72bbca7 890 return value <- this <- c
stevep 0:04a9f72bbca7 891
stevep 0:04a9f72bbca7 892 the lowest *bit* will be held the 'c' and
stevep 0:04a9f72bbca7 893 the state of one additional bit (on the left hand side)
stevep 0:04a9f72bbca7 894 will be returned
stevep 0:04a9f72bbca7 895
stevep 0:04a9f72bbca7 896 for example:
stevep 0:04a9f72bbca7 897 let this is 001010000
stevep 0:04a9f72bbca7 898 after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
stevep 0:04a9f72bbca7 899 */
stevep 0:04a9f72bbca7 900 template<uint value_size>
stevep 0:04a9f72bbca7 901 uint UInt<value_size>::Rcl2_one(uint c)
stevep 0:04a9f72bbca7 902 {
stevep 0:04a9f72bbca7 903 uint b = value_size;
stevep 0:04a9f72bbca7 904 uint * p1 = table;
stevep 0:04a9f72bbca7 905
stevep 0:04a9f72bbca7 906 #ifndef __GNUC__
stevep 0:04a9f72bbca7 907 __asm
stevep 0:04a9f72bbca7 908 {
stevep 0:04a9f72bbca7 909 push ebx
stevep 0:04a9f72bbca7 910 push ecx
stevep 0:04a9f72bbca7 911 push edx
stevep 0:04a9f72bbca7 912
stevep 0:04a9f72bbca7 913 mov ebx, [p1]
stevep 0:04a9f72bbca7 914 xor edx, edx
stevep 0:04a9f72bbca7 915 mov ecx, [c]
stevep 0:04a9f72bbca7 916 neg ecx
stevep 0:04a9f72bbca7 917 mov ecx, [b]
stevep 0:04a9f72bbca7 918
stevep 0:04a9f72bbca7 919 ttmath_loop:
stevep 0:04a9f72bbca7 920 rcl dword ptr [ebx+edx*4], 1
stevep 0:04a9f72bbca7 921
stevep 0:04a9f72bbca7 922 inc edx
stevep 0:04a9f72bbca7 923 dec ecx
stevep 0:04a9f72bbca7 924 jnz ttmath_loop
stevep 0:04a9f72bbca7 925
stevep 0:04a9f72bbca7 926 adc ecx, ecx
stevep 0:04a9f72bbca7 927 mov [c], ecx
stevep 0:04a9f72bbca7 928
stevep 0:04a9f72bbca7 929 pop edx
stevep 0:04a9f72bbca7 930 pop ecx
stevep 0:04a9f72bbca7 931 pop ebx
stevep 0:04a9f72bbca7 932 }
stevep 0:04a9f72bbca7 933 #endif
stevep 0:04a9f72bbca7 934
stevep 0:04a9f72bbca7 935
stevep 0:04a9f72bbca7 936 #ifdef __GNUC__
stevep 0:04a9f72bbca7 937 uint dummy, dummy2;
stevep 0:04a9f72bbca7 938
stevep 0:04a9f72bbca7 939 __asm__ __volatile__(
stevep 0:04a9f72bbca7 940
stevep 0:04a9f72bbca7 941 "xorl %%edx, %%edx \n" // edx=0
stevep 0:04a9f72bbca7 942 "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
stevep 0:04a9f72bbca7 943
stevep 0:04a9f72bbca7 944 "1: \n"
stevep 0:04a9f72bbca7 945 "rcll $1, (%%ebx, %%edx, 4) \n"
stevep 0:04a9f72bbca7 946
stevep 0:04a9f72bbca7 947 "incl %%edx \n"
stevep 0:04a9f72bbca7 948 "decl %%ecx \n"
stevep 0:04a9f72bbca7 949 "jnz 1b \n"
stevep 0:04a9f72bbca7 950
stevep 0:04a9f72bbca7 951 "adcl %%ecx, %%ecx \n"
stevep 0:04a9f72bbca7 952
stevep 0:04a9f72bbca7 953 : "=c" (c), "=a" (dummy), "=d" (dummy2)
stevep 0:04a9f72bbca7 954 : "0" (b), "1" (c), "b" (p1)
stevep 0:04a9f72bbca7 955 : "cc", "memory" );
stevep 0:04a9f72bbca7 956
stevep 0:04a9f72bbca7 957 #endif
stevep 0:04a9f72bbca7 958
stevep 0:04a9f72bbca7 959 TTMATH_LOGC("UInt::Rcl2_one", c)
stevep 0:04a9f72bbca7 960
stevep 0:04a9f72bbca7 961 return c;
stevep 0:04a9f72bbca7 962 }
stevep 0:04a9f72bbca7 963
stevep 0:04a9f72bbca7 964
stevep 0:04a9f72bbca7 965
stevep 0:04a9f72bbca7 966 /*!
stevep 0:04a9f72bbca7 967 this method moves all bits into the right hand side
stevep 0:04a9f72bbca7 968 c -> this -> return value
stevep 0:04a9f72bbca7 969
stevep 0:04a9f72bbca7 970 the highest *bit* will be held the 'c' and
stevep 0:04a9f72bbca7 971 the state of one additional bit (on the right hand side)
stevep 0:04a9f72bbca7 972 will be returned
stevep 0:04a9f72bbca7 973
stevep 0:04a9f72bbca7 974 for example:
stevep 0:04a9f72bbca7 975 let this is 000000010
stevep 0:04a9f72bbca7 976 after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
stevep 0:04a9f72bbca7 977 */
stevep 0:04a9f72bbca7 978 template<uint value_size>
stevep 0:04a9f72bbca7 979 uint UInt<value_size>::Rcr2_one(uint c)
stevep 0:04a9f72bbca7 980 {
stevep 0:04a9f72bbca7 981 uint b = value_size;
stevep 0:04a9f72bbca7 982 uint * p1 = table;
stevep 0:04a9f72bbca7 983
stevep 0:04a9f72bbca7 984 #ifndef __GNUC__
stevep 0:04a9f72bbca7 985 __asm
stevep 0:04a9f72bbca7 986 {
stevep 0:04a9f72bbca7 987 push ebx
stevep 0:04a9f72bbca7 988 push ecx
stevep 0:04a9f72bbca7 989
stevep 0:04a9f72bbca7 990 mov ebx, [p1]
stevep 0:04a9f72bbca7 991 mov ecx, [c]
stevep 0:04a9f72bbca7 992 neg ecx
stevep 0:04a9f72bbca7 993 mov ecx, [b]
stevep 0:04a9f72bbca7 994
stevep 0:04a9f72bbca7 995 ttmath_loop:
stevep 0:04a9f72bbca7 996 rcr dword ptr [ebx+ecx*4-4], 1
stevep 0:04a9f72bbca7 997
stevep 0:04a9f72bbca7 998 dec ecx
stevep 0:04a9f72bbca7 999 jnz ttmath_loop
stevep 0:04a9f72bbca7 1000
stevep 0:04a9f72bbca7 1001 adc ecx, ecx
stevep 0:04a9f72bbca7 1002 mov [c], ecx
stevep 0:04a9f72bbca7 1003
stevep 0:04a9f72bbca7 1004 pop ecx
stevep 0:04a9f72bbca7 1005 pop ebx
stevep 0:04a9f72bbca7 1006 }
stevep 0:04a9f72bbca7 1007 #endif
stevep 0:04a9f72bbca7 1008
stevep 0:04a9f72bbca7 1009
stevep 0:04a9f72bbca7 1010 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1011 uint dummy;
stevep 0:04a9f72bbca7 1012
stevep 0:04a9f72bbca7 1013 __asm__ __volatile__(
stevep 0:04a9f72bbca7 1014
stevep 0:04a9f72bbca7 1015 "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
stevep 0:04a9f72bbca7 1016
stevep 0:04a9f72bbca7 1017 "1: \n"
stevep 0:04a9f72bbca7 1018 "rcrl $1, -4(%%ebx, %%ecx, 4) \n"
stevep 0:04a9f72bbca7 1019
stevep 0:04a9f72bbca7 1020 "decl %%ecx \n"
stevep 0:04a9f72bbca7 1021 "jnz 1b \n"
stevep 0:04a9f72bbca7 1022
stevep 0:04a9f72bbca7 1023 "adcl %%ecx, %%ecx \n"
stevep 0:04a9f72bbca7 1024
stevep 0:04a9f72bbca7 1025 : "=c" (c), "=a" (dummy)
stevep 0:04a9f72bbca7 1026 : "0" (b), "1" (c), "b" (p1)
stevep 0:04a9f72bbca7 1027 : "cc", "memory" );
stevep 0:04a9f72bbca7 1028
stevep 0:04a9f72bbca7 1029 #endif
stevep 0:04a9f72bbca7 1030
stevep 0:04a9f72bbca7 1031 TTMATH_LOGC("UInt::Rcr2_one", c)
stevep 0:04a9f72bbca7 1032
stevep 0:04a9f72bbca7 1033 return c;
stevep 0:04a9f72bbca7 1034 }
stevep 0:04a9f72bbca7 1035
stevep 0:04a9f72bbca7 1036
stevep 0:04a9f72bbca7 1037
stevep 0:04a9f72bbca7 1038 #ifdef _MSC_VER
stevep 0:04a9f72bbca7 1039 #pragma warning (disable : 4731)
stevep 0:04a9f72bbca7 1040 //warning C4731: frame pointer register 'ebp' modified by inline assembly code
stevep 0:04a9f72bbca7 1041 #endif
stevep 0:04a9f72bbca7 1042
stevep 0:04a9f72bbca7 1043
stevep 0:04a9f72bbca7 1044
stevep 0:04a9f72bbca7 1045 /*!
stevep 0:04a9f72bbca7 1046 this method moves all bits into the left hand side
stevep 0:04a9f72bbca7 1047 return value <- this <- c
stevep 0:04a9f72bbca7 1048
stevep 0:04a9f72bbca7 1049 the lowest *bits* will be held the 'c' and
stevep 0:04a9f72bbca7 1050 the state of one additional bit (on the left hand side)
stevep 0:04a9f72bbca7 1051 will be returned
stevep 0:04a9f72bbca7 1052
stevep 0:04a9f72bbca7 1053 for example:
stevep 0:04a9f72bbca7 1054 let this is 001010000
stevep 0:04a9f72bbca7 1055 after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
stevep 0:04a9f72bbca7 1056 */
stevep 0:04a9f72bbca7 1057 template<uint value_size>
stevep 0:04a9f72bbca7 1058 uint UInt<value_size>::Rcl2(uint bits, uint c)
stevep 0:04a9f72bbca7 1059 {
stevep 0:04a9f72bbca7 1060 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
stevep 0:04a9f72bbca7 1061
stevep 0:04a9f72bbca7 1062 uint b = value_size;
stevep 0:04a9f72bbca7 1063 uint * p1 = table;
stevep 0:04a9f72bbca7 1064
stevep 0:04a9f72bbca7 1065 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1066 __asm
stevep 0:04a9f72bbca7 1067 {
stevep 0:04a9f72bbca7 1068 push eax
stevep 0:04a9f72bbca7 1069 push ebx
stevep 0:04a9f72bbca7 1070 push ecx
stevep 0:04a9f72bbca7 1071 push edx
stevep 0:04a9f72bbca7 1072 push esi
stevep 0:04a9f72bbca7 1073 push edi
stevep 0:04a9f72bbca7 1074 push ebp
stevep 0:04a9f72bbca7 1075
stevep 0:04a9f72bbca7 1076 mov edi, [b]
stevep 0:04a9f72bbca7 1077
stevep 0:04a9f72bbca7 1078 mov ecx, 32
stevep 0:04a9f72bbca7 1079 sub ecx, [bits]
stevep 0:04a9f72bbca7 1080 mov edx, -1
stevep 0:04a9f72bbca7 1081 shr edx, cl
stevep 0:04a9f72bbca7 1082
stevep 0:04a9f72bbca7 1083 mov ecx, [bits]
stevep 0:04a9f72bbca7 1084 mov ebx, [p1]
stevep 0:04a9f72bbca7 1085 mov eax, [c]
stevep 0:04a9f72bbca7 1086
stevep 0:04a9f72bbca7 1087 mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
stevep 0:04a9f72bbca7 1088
stevep 0:04a9f72bbca7 1089 xor edx, edx // edx = 0
stevep 0:04a9f72bbca7 1090 mov esi, edx
stevep 0:04a9f72bbca7 1091 or eax, eax
stevep 0:04a9f72bbca7 1092 cmovnz esi, ebp // if(c) esi=mask else esi=0
stevep 0:04a9f72bbca7 1093
stevep 0:04a9f72bbca7 1094 ttmath_loop:
stevep 0:04a9f72bbca7 1095 rol dword ptr [ebx+edx*4], cl
stevep 0:04a9f72bbca7 1096
stevep 0:04a9f72bbca7 1097 mov eax, [ebx+edx*4]
stevep 0:04a9f72bbca7 1098 and eax, ebp
stevep 0:04a9f72bbca7 1099 xor [ebx+edx*4], eax // clearing bits
stevep 0:04a9f72bbca7 1100 or [ebx+edx*4], esi // saving old value
stevep 0:04a9f72bbca7 1101 mov esi, eax
stevep 0:04a9f72bbca7 1102
stevep 0:04a9f72bbca7 1103 inc edx
stevep 0:04a9f72bbca7 1104 dec edi
stevep 0:04a9f72bbca7 1105 jnz ttmath_loop
stevep 0:04a9f72bbca7 1106
stevep 0:04a9f72bbca7 1107 pop ebp // restoring ebp
stevep 0:04a9f72bbca7 1108
stevep 0:04a9f72bbca7 1109 and eax, 1
stevep 0:04a9f72bbca7 1110 mov [c], eax
stevep 0:04a9f72bbca7 1111
stevep 0:04a9f72bbca7 1112 pop edi
stevep 0:04a9f72bbca7 1113 pop esi
stevep 0:04a9f72bbca7 1114 pop edx
stevep 0:04a9f72bbca7 1115 pop ecx
stevep 0:04a9f72bbca7 1116 pop ebx
stevep 0:04a9f72bbca7 1117 pop eax
stevep 0:04a9f72bbca7 1118 }
stevep 0:04a9f72bbca7 1119 #endif
stevep 0:04a9f72bbca7 1120
stevep 0:04a9f72bbca7 1121
stevep 0:04a9f72bbca7 1122 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1123 uint dummy, dummy2, dummy3;
stevep 0:04a9f72bbca7 1124
stevep 0:04a9f72bbca7 1125 __asm__ __volatile__(
stevep 0:04a9f72bbca7 1126
stevep 0:04a9f72bbca7 1127 "push %%ebp \n"
stevep 0:04a9f72bbca7 1128
stevep 0:04a9f72bbca7 1129 "movl %%ecx, %%esi \n"
stevep 0:04a9f72bbca7 1130 "movl $32, %%ecx \n"
stevep 0:04a9f72bbca7 1131 "subl %%esi, %%ecx \n" // ecx = 32 - bits
stevep 0:04a9f72bbca7 1132 "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
stevep 0:04a9f72bbca7 1133 "shrl %%cl, %%edx \n" // shifting (0 -> edx -> cf) (cl times)
stevep 0:04a9f72bbca7 1134 "movl %%edx, %%ebp \n" // ebp = edx = mask
stevep 0:04a9f72bbca7 1135 "movl %%esi, %%ecx \n"
stevep 0:04a9f72bbca7 1136
stevep 0:04a9f72bbca7 1137 "xorl %%edx, %%edx \n"
stevep 0:04a9f72bbca7 1138 "movl %%edx, %%esi \n"
stevep 0:04a9f72bbca7 1139 "orl %%eax, %%eax \n"
stevep 0:04a9f72bbca7 1140 "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
stevep 0:04a9f72bbca7 1141
stevep 0:04a9f72bbca7 1142 "1: \n"
stevep 0:04a9f72bbca7 1143 "roll %%cl, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 1144
stevep 0:04a9f72bbca7 1145 "movl (%%ebx,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 1146 "andl %%ebp, %%eax \n"
stevep 0:04a9f72bbca7 1147 "xorl %%eax, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 1148 "orl %%esi, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 1149 "movl %%eax, %%esi \n"
stevep 0:04a9f72bbca7 1150
stevep 0:04a9f72bbca7 1151 "incl %%edx \n"
stevep 0:04a9f72bbca7 1152 "decl %%edi \n"
stevep 0:04a9f72bbca7 1153 "jnz 1b \n"
stevep 0:04a9f72bbca7 1154
stevep 0:04a9f72bbca7 1155 "and $1, %%eax \n"
stevep 0:04a9f72bbca7 1156
stevep 0:04a9f72bbca7 1157 "pop %%ebp \n"
stevep 0:04a9f72bbca7 1158
stevep 0:04a9f72bbca7 1159 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
stevep 0:04a9f72bbca7 1160 : "0" (c), "1" (b), "b" (p1), "c" (bits)
stevep 0:04a9f72bbca7 1161 : "cc", "memory" );
stevep 0:04a9f72bbca7 1162
stevep 0:04a9f72bbca7 1163 #endif
stevep 0:04a9f72bbca7 1164
stevep 0:04a9f72bbca7 1165 TTMATH_LOGC("UInt::Rcl2", c)
stevep 0:04a9f72bbca7 1166
stevep 0:04a9f72bbca7 1167 return c;
stevep 0:04a9f72bbca7 1168 }
stevep 0:04a9f72bbca7 1169
stevep 0:04a9f72bbca7 1170
stevep 0:04a9f72bbca7 1171
stevep 0:04a9f72bbca7 1172
stevep 0:04a9f72bbca7 1173 /*!
stevep 0:04a9f72bbca7 1174 this method moves all bits into the right hand side
stevep 0:04a9f72bbca7 1175 C -> this -> return value
stevep 0:04a9f72bbca7 1176
stevep 0:04a9f72bbca7 1177 the highest *bits* will be held the 'c' and
stevep 0:04a9f72bbca7 1178 the state of one additional bit (on the right hand side)
stevep 0:04a9f72bbca7 1179 will be returned
stevep 0:04a9f72bbca7 1180
stevep 0:04a9f72bbca7 1181 for example:
stevep 0:04a9f72bbca7 1182 let this is 000000010
stevep 0:04a9f72bbca7 1183 after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
stevep 0:04a9f72bbca7 1184 */
stevep 0:04a9f72bbca7 1185 template<uint value_size>
stevep 0:04a9f72bbca7 1186 uint UInt<value_size>::Rcr2(uint bits, uint c)
stevep 0:04a9f72bbca7 1187 {
stevep 0:04a9f72bbca7 1188 TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
stevep 0:04a9f72bbca7 1189
stevep 0:04a9f72bbca7 1190 uint b = value_size;
stevep 0:04a9f72bbca7 1191 uint * p1 = table;
stevep 0:04a9f72bbca7 1192
stevep 0:04a9f72bbca7 1193 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1194 __asm
stevep 0:04a9f72bbca7 1195 {
stevep 0:04a9f72bbca7 1196 push eax
stevep 0:04a9f72bbca7 1197 push ebx
stevep 0:04a9f72bbca7 1198 push ecx
stevep 0:04a9f72bbca7 1199 push edx
stevep 0:04a9f72bbca7 1200 push esi
stevep 0:04a9f72bbca7 1201 push edi
stevep 0:04a9f72bbca7 1202 push ebp
stevep 0:04a9f72bbca7 1203
stevep 0:04a9f72bbca7 1204 mov edi, [b]
stevep 0:04a9f72bbca7 1205
stevep 0:04a9f72bbca7 1206 mov ecx, 32
stevep 0:04a9f72bbca7 1207 sub ecx, [bits]
stevep 0:04a9f72bbca7 1208 mov edx, -1
stevep 0:04a9f72bbca7 1209 shl edx, cl
stevep 0:04a9f72bbca7 1210
stevep 0:04a9f72bbca7 1211 mov ecx, [bits]
stevep 0:04a9f72bbca7 1212 mov ebx, [p1]
stevep 0:04a9f72bbca7 1213 mov eax, [c]
stevep 0:04a9f72bbca7 1214
stevep 0:04a9f72bbca7 1215 mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
stevep 0:04a9f72bbca7 1216
stevep 0:04a9f72bbca7 1217 xor edx, edx // edx = 0
stevep 0:04a9f72bbca7 1218 mov esi, edx
stevep 0:04a9f72bbca7 1219 add edx, edi
stevep 0:04a9f72bbca7 1220 dec edx // edx is pointing at the end of the table (on last word)
stevep 0:04a9f72bbca7 1221 or eax, eax
stevep 0:04a9f72bbca7 1222 cmovnz esi, ebp // if(c) esi=mask else esi=0
stevep 0:04a9f72bbca7 1223
stevep 0:04a9f72bbca7 1224 ttmath_loop:
stevep 0:04a9f72bbca7 1225 ror dword ptr [ebx+edx*4], cl
stevep 0:04a9f72bbca7 1226
stevep 0:04a9f72bbca7 1227 mov eax, [ebx+edx*4]
stevep 0:04a9f72bbca7 1228 and eax, ebp
stevep 0:04a9f72bbca7 1229 xor [ebx+edx*4], eax // clearing bits
stevep 0:04a9f72bbca7 1230 or [ebx+edx*4], esi // saving old value
stevep 0:04a9f72bbca7 1231 mov esi, eax
stevep 0:04a9f72bbca7 1232
stevep 0:04a9f72bbca7 1233 dec edx
stevep 0:04a9f72bbca7 1234 dec edi
stevep 0:04a9f72bbca7 1235 jnz ttmath_loop
stevep 0:04a9f72bbca7 1236
stevep 0:04a9f72bbca7 1237 pop ebp // restoring ebp
stevep 0:04a9f72bbca7 1238
stevep 0:04a9f72bbca7 1239 rol eax, 1 // 31bit will be first
stevep 0:04a9f72bbca7 1240 and eax, 1
stevep 0:04a9f72bbca7 1241 mov [c], eax
stevep 0:04a9f72bbca7 1242
stevep 0:04a9f72bbca7 1243 pop edi
stevep 0:04a9f72bbca7 1244 pop esi
stevep 0:04a9f72bbca7 1245 pop edx
stevep 0:04a9f72bbca7 1246 pop ecx
stevep 0:04a9f72bbca7 1247 pop ebx
stevep 0:04a9f72bbca7 1248 pop eax
stevep 0:04a9f72bbca7 1249 }
stevep 0:04a9f72bbca7 1250 #endif
stevep 0:04a9f72bbca7 1251
stevep 0:04a9f72bbca7 1252
stevep 0:04a9f72bbca7 1253 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1254 uint dummy, dummy2, dummy3;
stevep 0:04a9f72bbca7 1255
stevep 0:04a9f72bbca7 1256 __asm__ __volatile__(
stevep 0:04a9f72bbca7 1257
stevep 0:04a9f72bbca7 1258 "push %%ebp \n"
stevep 0:04a9f72bbca7 1259
stevep 0:04a9f72bbca7 1260 "movl %%ecx, %%esi \n"
stevep 0:04a9f72bbca7 1261 "movl $32, %%ecx \n"
stevep 0:04a9f72bbca7 1262 "subl %%esi, %%ecx \n" // ecx = 32 - bits
stevep 0:04a9f72bbca7 1263 "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
stevep 0:04a9f72bbca7 1264 "shll %%cl, %%edx \n" // shifting (cf <- edx <- 0) (cl times)
stevep 0:04a9f72bbca7 1265 "movl %%edx, %%ebp \n" // ebp = edx = mask
stevep 0:04a9f72bbca7 1266 "movl %%esi, %%ecx \n"
stevep 0:04a9f72bbca7 1267
stevep 0:04a9f72bbca7 1268 "xorl %%edx, %%edx \n"
stevep 0:04a9f72bbca7 1269 "movl %%edx, %%esi \n"
stevep 0:04a9f72bbca7 1270 "addl %%edi, %%edx \n"
stevep 0:04a9f72bbca7 1271 "decl %%edx \n" // edx is pointing at the end of the table (on last word)
stevep 0:04a9f72bbca7 1272 "orl %%eax, %%eax \n"
stevep 0:04a9f72bbca7 1273 "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
stevep 0:04a9f72bbca7 1274
stevep 0:04a9f72bbca7 1275 "1: \n"
stevep 0:04a9f72bbca7 1276 "rorl %%cl, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 1277
stevep 0:04a9f72bbca7 1278 "movl (%%ebx,%%edx,4), %%eax \n"
stevep 0:04a9f72bbca7 1279 "andl %%ebp, %%eax \n"
stevep 0:04a9f72bbca7 1280 "xorl %%eax, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 1281 "orl %%esi, (%%ebx,%%edx,4) \n"
stevep 0:04a9f72bbca7 1282 "movl %%eax, %%esi \n"
stevep 0:04a9f72bbca7 1283
stevep 0:04a9f72bbca7 1284 "decl %%edx \n"
stevep 0:04a9f72bbca7 1285 "decl %%edi \n"
stevep 0:04a9f72bbca7 1286 "jnz 1b \n"
stevep 0:04a9f72bbca7 1287
stevep 0:04a9f72bbca7 1288 "roll $1, %%eax \n"
stevep 0:04a9f72bbca7 1289 "andl $1, %%eax \n"
stevep 0:04a9f72bbca7 1290
stevep 0:04a9f72bbca7 1291 "pop %%ebp \n"
stevep 0:04a9f72bbca7 1292
stevep 0:04a9f72bbca7 1293 : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
stevep 0:04a9f72bbca7 1294 : "0" (c), "1" (b), "b" (p1), "c" (bits)
stevep 0:04a9f72bbca7 1295 : "cc", "memory" );
stevep 0:04a9f72bbca7 1296
stevep 0:04a9f72bbca7 1297 #endif
stevep 0:04a9f72bbca7 1298
stevep 0:04a9f72bbca7 1299 TTMATH_LOGC("UInt::Rcr2", c)
stevep 0:04a9f72bbca7 1300
stevep 0:04a9f72bbca7 1301 return c;
stevep 0:04a9f72bbca7 1302 }
stevep 0:04a9f72bbca7 1303
stevep 0:04a9f72bbca7 1304
stevep 0:04a9f72bbca7 1305 #ifdef _MSC_VER
stevep 0:04a9f72bbca7 1306 #pragma warning (default : 4731)
stevep 0:04a9f72bbca7 1307 #endif
stevep 0:04a9f72bbca7 1308
stevep 0:04a9f72bbca7 1309
stevep 0:04a9f72bbca7 1310 /*
stevep 0:04a9f72bbca7 1311 this method returns the number of the highest set bit in one 32-bit word
stevep 0:04a9f72bbca7 1312 if the 'x' is zero this method returns '-1'
stevep 0:04a9f72bbca7 1313 */
stevep 0:04a9f72bbca7 1314 template<uint value_size>
stevep 0:04a9f72bbca7 1315 sint UInt<value_size>::FindLeadingBitInWord(uint x)
stevep 0:04a9f72bbca7 1316 {
stevep 0:04a9f72bbca7 1317 sint result;
stevep 0:04a9f72bbca7 1318
stevep 0:04a9f72bbca7 1319 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1320 __asm
stevep 0:04a9f72bbca7 1321 {
stevep 0:04a9f72bbca7 1322 push eax
stevep 0:04a9f72bbca7 1323 push edx
stevep 0:04a9f72bbca7 1324
stevep 0:04a9f72bbca7 1325 mov edx,-1
stevep 0:04a9f72bbca7 1326 bsr eax,[x]
stevep 0:04a9f72bbca7 1327 cmovz eax,edx
stevep 0:04a9f72bbca7 1328 mov [result], eax
stevep 0:04a9f72bbca7 1329
stevep 0:04a9f72bbca7 1330 pop edx
stevep 0:04a9f72bbca7 1331 pop eax
stevep 0:04a9f72bbca7 1332 }
stevep 0:04a9f72bbca7 1333 #endif
stevep 0:04a9f72bbca7 1334
stevep 0:04a9f72bbca7 1335
stevep 0:04a9f72bbca7 1336 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1337 uint dummy;
stevep 0:04a9f72bbca7 1338
stevep 0:04a9f72bbca7 1339 __asm__ (
stevep 0:04a9f72bbca7 1340
stevep 0:04a9f72bbca7 1341 "movl $-1, %1 \n"
stevep 0:04a9f72bbca7 1342 "bsrl %2, %0 \n"
stevep 0:04a9f72bbca7 1343 "cmovz %1, %0 \n"
stevep 0:04a9f72bbca7 1344
stevep 0:04a9f72bbca7 1345 : "=r" (result), "=&r" (dummy)
stevep 0:04a9f72bbca7 1346 : "r" (x)
stevep 0:04a9f72bbca7 1347 : "cc" );
stevep 0:04a9f72bbca7 1348
stevep 0:04a9f72bbca7 1349 #endif
stevep 0:04a9f72bbca7 1350
stevep 0:04a9f72bbca7 1351 return result;
stevep 0:04a9f72bbca7 1352 }
stevep 0:04a9f72bbca7 1353
stevep 0:04a9f72bbca7 1354
stevep 0:04a9f72bbca7 1355
stevep 0:04a9f72bbca7 1356 /*
stevep 0:04a9f72bbca7 1357 this method returns the number of the smallest set bit in one 32-bit word
stevep 0:04a9f72bbca7 1358 if the 'x' is zero this method returns '-1'
stevep 0:04a9f72bbca7 1359 */
stevep 0:04a9f72bbca7 1360 template<uint value_size>
stevep 0:04a9f72bbca7 1361 sint UInt<value_size>::FindLowestBitInWord(uint x)
stevep 0:04a9f72bbca7 1362 {
stevep 0:04a9f72bbca7 1363 sint result;
stevep 0:04a9f72bbca7 1364
stevep 0:04a9f72bbca7 1365 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1366 __asm
stevep 0:04a9f72bbca7 1367 {
stevep 0:04a9f72bbca7 1368 push eax
stevep 0:04a9f72bbca7 1369 push edx
stevep 0:04a9f72bbca7 1370
stevep 0:04a9f72bbca7 1371 mov edx,-1
stevep 0:04a9f72bbca7 1372 bsf eax,[x]
stevep 0:04a9f72bbca7 1373 cmovz eax,edx
stevep 0:04a9f72bbca7 1374 mov [result], eax
stevep 0:04a9f72bbca7 1375
stevep 0:04a9f72bbca7 1376 pop edx
stevep 0:04a9f72bbca7 1377 pop eax
stevep 0:04a9f72bbca7 1378 }
stevep 0:04a9f72bbca7 1379 #endif
stevep 0:04a9f72bbca7 1380
stevep 0:04a9f72bbca7 1381
stevep 0:04a9f72bbca7 1382 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1383 uint dummy;
stevep 0:04a9f72bbca7 1384
stevep 0:04a9f72bbca7 1385 __asm__ (
stevep 0:04a9f72bbca7 1386
stevep 0:04a9f72bbca7 1387 "movl $-1, %1 \n"
stevep 0:04a9f72bbca7 1388 "bsfl %2, %0 \n"
stevep 0:04a9f72bbca7 1389 "cmovz %1, %0 \n"
stevep 0:04a9f72bbca7 1390
stevep 0:04a9f72bbca7 1391 : "=r" (result), "=&r" (dummy)
stevep 0:04a9f72bbca7 1392 : "r" (x)
stevep 0:04a9f72bbca7 1393 : "cc" );
stevep 0:04a9f72bbca7 1394
stevep 0:04a9f72bbca7 1395 #endif
stevep 0:04a9f72bbca7 1396
stevep 0:04a9f72bbca7 1397 return result;
stevep 0:04a9f72bbca7 1398 }
stevep 0:04a9f72bbca7 1399
stevep 0:04a9f72bbca7 1400
stevep 0:04a9f72bbca7 1401
stevep 0:04a9f72bbca7 1402 /*!
stevep 0:04a9f72bbca7 1403 this method sets a special bit in the 'value'
stevep 0:04a9f72bbca7 1404 and returns the last state of the bit (zero or one)
stevep 0:04a9f72bbca7 1405
stevep 0:04a9f72bbca7 1406 bit is from <0,31>
stevep 0:04a9f72bbca7 1407 e.g.
stevep 0:04a9f72bbca7 1408 uint x = 100;
stevep 0:04a9f72bbca7 1409 uint bit = SetBitInWord(x, 3);
stevep 0:04a9f72bbca7 1410 now: x = 108 and bit = 0
stevep 0:04a9f72bbca7 1411 */
stevep 0:04a9f72bbca7 1412 template<uint value_size>
stevep 0:04a9f72bbca7 1413 uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
stevep 0:04a9f72bbca7 1414 {
stevep 0:04a9f72bbca7 1415 TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
stevep 0:04a9f72bbca7 1416
stevep 0:04a9f72bbca7 1417 uint old_bit;
stevep 0:04a9f72bbca7 1418 uint v = value;
stevep 0:04a9f72bbca7 1419
stevep 0:04a9f72bbca7 1420 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1421 __asm
stevep 0:04a9f72bbca7 1422 {
stevep 0:04a9f72bbca7 1423 push ebx
stevep 0:04a9f72bbca7 1424 push eax
stevep 0:04a9f72bbca7 1425
stevep 0:04a9f72bbca7 1426 mov eax, [v]
stevep 0:04a9f72bbca7 1427 mov ebx, [bit]
stevep 0:04a9f72bbca7 1428 bts eax, ebx
stevep 0:04a9f72bbca7 1429 mov [v], eax
stevep 0:04a9f72bbca7 1430
stevep 0:04a9f72bbca7 1431 setc bl
stevep 0:04a9f72bbca7 1432 movzx ebx, bl
stevep 0:04a9f72bbca7 1433 mov [old_bit], ebx
stevep 0:04a9f72bbca7 1434
stevep 0:04a9f72bbca7 1435 pop eax
stevep 0:04a9f72bbca7 1436 pop ebx
stevep 0:04a9f72bbca7 1437 }
stevep 0:04a9f72bbca7 1438 #endif
stevep 0:04a9f72bbca7 1439
stevep 0:04a9f72bbca7 1440
stevep 0:04a9f72bbca7 1441 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1442 __asm__ (
stevep 0:04a9f72bbca7 1443
stevep 0:04a9f72bbca7 1444 "btsl %%ebx, %%eax \n"
stevep 0:04a9f72bbca7 1445 "setc %%bl \n"
stevep 0:04a9f72bbca7 1446 "movzx %%bl, %%ebx \n"
stevep 0:04a9f72bbca7 1447
stevep 0:04a9f72bbca7 1448 : "=a" (v), "=b" (old_bit)
stevep 0:04a9f72bbca7 1449 : "0" (v), "1" (bit)
stevep 0:04a9f72bbca7 1450 : "cc" );
stevep 0:04a9f72bbca7 1451
stevep 0:04a9f72bbca7 1452 #endif
stevep 0:04a9f72bbca7 1453
stevep 0:04a9f72bbca7 1454 value = v;
stevep 0:04a9f72bbca7 1455
stevep 0:04a9f72bbca7 1456 return old_bit;
stevep 0:04a9f72bbca7 1457 }
stevep 0:04a9f72bbca7 1458
stevep 0:04a9f72bbca7 1459
stevep 0:04a9f72bbca7 1460
stevep 0:04a9f72bbca7 1461
stevep 0:04a9f72bbca7 1462 /*!
stevep 0:04a9f72bbca7 1463 multiplication: result_high:result_low = a * b
stevep 0:04a9f72bbca7 1464 result_high - higher word of the result
stevep 0:04a9f72bbca7 1465 result_low - lower word of the result
stevep 0:04a9f72bbca7 1466
stevep 0:04a9f72bbca7 1467 this methos never returns a carry
stevep 0:04a9f72bbca7 1468 this method is used in the second version of the multiplication algorithms
stevep 0:04a9f72bbca7 1469 */
stevep 0:04a9f72bbca7 1470 template<uint value_size>
stevep 0:04a9f72bbca7 1471 void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
stevep 0:04a9f72bbca7 1472 {
stevep 0:04a9f72bbca7 1473 /*
stevep 0:04a9f72bbca7 1474 we must use these temporary variables in order to inform the compilator
stevep 0:04a9f72bbca7 1475 that value pointed with result1 and result2 has changed
stevep 0:04a9f72bbca7 1476
stevep 0:04a9f72bbca7 1477 this has no effect in visual studio but it's useful when
stevep 0:04a9f72bbca7 1478 using gcc and options like -Ox
stevep 0:04a9f72bbca7 1479 */
stevep 0:04a9f72bbca7 1480 uint result1_;
stevep 0:04a9f72bbca7 1481 uint result2_;
stevep 0:04a9f72bbca7 1482
stevep 0:04a9f72bbca7 1483 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1484
stevep 0:04a9f72bbca7 1485 __asm
stevep 0:04a9f72bbca7 1486 {
stevep 0:04a9f72bbca7 1487 push eax
stevep 0:04a9f72bbca7 1488 push edx
stevep 0:04a9f72bbca7 1489
stevep 0:04a9f72bbca7 1490 mov eax, [a]
stevep 0:04a9f72bbca7 1491 mul dword ptr [b]
stevep 0:04a9f72bbca7 1492
stevep 0:04a9f72bbca7 1493 mov [result2_], edx
stevep 0:04a9f72bbca7 1494 mov [result1_], eax
stevep 0:04a9f72bbca7 1495
stevep 0:04a9f72bbca7 1496 pop edx
stevep 0:04a9f72bbca7 1497 pop eax
stevep 0:04a9f72bbca7 1498 }
stevep 0:04a9f72bbca7 1499
stevep 0:04a9f72bbca7 1500 #endif
stevep 0:04a9f72bbca7 1501
stevep 0:04a9f72bbca7 1502
stevep 0:04a9f72bbca7 1503 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1504
stevep 0:04a9f72bbca7 1505 __asm__ (
stevep 0:04a9f72bbca7 1506
stevep 0:04a9f72bbca7 1507 "mull %%edx \n"
stevep 0:04a9f72bbca7 1508
stevep 0:04a9f72bbca7 1509 : "=a" (result1_), "=d" (result2_)
stevep 0:04a9f72bbca7 1510 : "0" (a), "1" (b)
stevep 0:04a9f72bbca7 1511 : "cc" );
stevep 0:04a9f72bbca7 1512
stevep 0:04a9f72bbca7 1513 #endif
stevep 0:04a9f72bbca7 1514
stevep 0:04a9f72bbca7 1515
stevep 0:04a9f72bbca7 1516 *result_low = result1_;
stevep 0:04a9f72bbca7 1517 *result_high = result2_;
stevep 0:04a9f72bbca7 1518 }
stevep 0:04a9f72bbca7 1519
stevep 0:04a9f72bbca7 1520
stevep 0:04a9f72bbca7 1521
stevep 0:04a9f72bbca7 1522
stevep 0:04a9f72bbca7 1523
stevep 0:04a9f72bbca7 1524 /*!
stevep 0:04a9f72bbca7 1525 *
stevep 0:04a9f72bbca7 1526 * Division
stevep 0:04a9f72bbca7 1527 *
stevep 0:04a9f72bbca7 1528 *
stevep 0:04a9f72bbca7 1529 */
stevep 0:04a9f72bbca7 1530
stevep 0:04a9f72bbca7 1531
stevep 0:04a9f72bbca7 1532
stevep 0:04a9f72bbca7 1533
stevep 0:04a9f72bbca7 1534 /*!
stevep 0:04a9f72bbca7 1535 this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
stevep 0:04a9f72bbca7 1536 r = a:b / c and rest - remainder
stevep 0:04a9f72bbca7 1537
stevep 0:04a9f72bbca7 1538 *
stevep 0:04a9f72bbca7 1539 * WARNING:
stevep 0:04a9f72bbca7 1540 * if r (one word) is too small for the result or c is equal zero
stevep 0:04a9f72bbca7 1541 * there'll be a hardware interruption (0)
stevep 0:04a9f72bbca7 1542 * and probably the end of your program
stevep 0:04a9f72bbca7 1543 *
stevep 0:04a9f72bbca7 1544 */
stevep 0:04a9f72bbca7 1545 template<uint value_size>
stevep 0:04a9f72bbca7 1546 void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
stevep 0:04a9f72bbca7 1547 {
stevep 0:04a9f72bbca7 1548 uint r_;
stevep 0:04a9f72bbca7 1549 uint rest_;
stevep 0:04a9f72bbca7 1550 /*
stevep 0:04a9f72bbca7 1551 these variables have similar meaning like those in
stevep 0:04a9f72bbca7 1552 the multiplication algorithm MulTwoWords
stevep 0:04a9f72bbca7 1553 */
stevep 0:04a9f72bbca7 1554
stevep 0:04a9f72bbca7 1555 TTMATH_ASSERT( c != 0 )
stevep 0:04a9f72bbca7 1556
stevep 0:04a9f72bbca7 1557 #ifndef __GNUC__
stevep 0:04a9f72bbca7 1558 __asm
stevep 0:04a9f72bbca7 1559 {
stevep 0:04a9f72bbca7 1560 push eax
stevep 0:04a9f72bbca7 1561 push edx
stevep 0:04a9f72bbca7 1562
stevep 0:04a9f72bbca7 1563 mov edx, [a]
stevep 0:04a9f72bbca7 1564 mov eax, [b]
stevep 0:04a9f72bbca7 1565 div dword ptr [c]
stevep 0:04a9f72bbca7 1566
stevep 0:04a9f72bbca7 1567 mov [r_], eax
stevep 0:04a9f72bbca7 1568 mov [rest_], edx
stevep 0:04a9f72bbca7 1569
stevep 0:04a9f72bbca7 1570 pop edx
stevep 0:04a9f72bbca7 1571 pop eax
stevep 0:04a9f72bbca7 1572 }
stevep 0:04a9f72bbca7 1573 #endif
stevep 0:04a9f72bbca7 1574
stevep 0:04a9f72bbca7 1575
stevep 0:04a9f72bbca7 1576 #ifdef __GNUC__
stevep 0:04a9f72bbca7 1577
stevep 0:04a9f72bbca7 1578 __asm__ (
stevep 0:04a9f72bbca7 1579
stevep 0:04a9f72bbca7 1580 "divl %%ecx \n"
stevep 0:04a9f72bbca7 1581
stevep 0:04a9f72bbca7 1582 : "=a" (r_), "=d" (rest_)
stevep 0:04a9f72bbca7 1583 : "0" (b), "1" (a), "c" (c)
stevep 0:04a9f72bbca7 1584 : "cc" );
stevep 0:04a9f72bbca7 1585
stevep 0:04a9f72bbca7 1586 #endif
stevep 0:04a9f72bbca7 1587
stevep 0:04a9f72bbca7 1588
stevep 0:04a9f72bbca7 1589 *r = r_;
stevep 0:04a9f72bbca7 1590 *rest = rest_;
stevep 0:04a9f72bbca7 1591
stevep 0:04a9f72bbca7 1592 }
stevep 0:04a9f72bbca7 1593
stevep 0:04a9f72bbca7 1594
stevep 0:04a9f72bbca7 1595
stevep 0:04a9f72bbca7 1596 } //namespace
stevep 0:04a9f72bbca7 1597
stevep 0:04a9f72bbca7 1598
stevep 0:04a9f72bbca7 1599
stevep 0:04a9f72bbca7 1600 #endif //ifdef TTMATH_PLATFORM32
stevep 0:04a9f72bbca7 1601 #endif //ifndef TTMATH_NOASM
stevep 0:04a9f72bbca7 1602 #endif
stevep 0:04a9f72bbca7 1603