| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602 |
- /*
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <[email protected]>
- */
- /*
- * Copyright (c) 2006-2009, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
- #ifndef headerfilettmathuint_x86
- #define headerfilettmathuint_x86
- #ifndef TTMATH_NOASM
- #ifdef TTMATH_PLATFORM32
- /*!
- \file ttmathuint_x86.h
- \brief template class UInt<uint> with assembler code for 32bit x86 processors
- this file is included at the end of ttmathuint.h
- */
- /*!
- \brief a namespace for the TTMath library
- */
- namespace ttmath
- {
- /*!
- returning the string represents the currect type of the library
- we have following types:
- asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
- asm_gcc_32 - with asm code designed for GCC (32 bits)
- asm_vc_64 - with asm for VC (64 bit)
- asm_gcc_64 - with asm for GCC (64 bit)
- no_asm_32 - pure C++ version (32 bit) - without any asm code
- no_asm_64 - pure C++ version (64 bit) - without any asm code
- */
- template<uint value_size>
- const char * UInt<value_size>::LibTypeStr()
- {
- #ifndef __GNUC__
- static const char info[] = "asm_vc_32";
- #endif
- #ifdef __GNUC__
- static const char info[] = "asm_gcc_32";
- #endif
- return info;
- }
- /*!
- returning the currect type of the library
- */
- template<uint value_size>
- LibTypeCode UInt<value_size>::LibType()
- {
- #ifndef __GNUC__
- LibTypeCode info = asm_vc_32;
- #endif
- #ifdef __GNUC__
- LibTypeCode info = asm_gcc_32;
- #endif
- return info;
- }
- /*!
- *
- * basic mathematic functions
- *
- */
- /*!
- adding ss2 to the this and adding carry if it's defined
- (this = this + ss2 + c)
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it has been)
- */
- template<uint value_size>
- uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- uint * p2 = const_cast<uint*>(ss2.table);
- // we don't have to use TTMATH_REFERENCE_ASSERT here
- // this algorithm doesn't require it
- #ifndef __GNUC__
-
- // this part might be compiled with for example visual c
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- mov ecx,[b]
-
- mov ebx,[p1]
- mov esi,[p2]
- xor edx,edx // edx=0
- mov eax,[c]
- neg eax // CF=1 if rax!=0 , CF=0 if rax==0
- ttmath_loop:
- mov eax,[esi+edx*4]
- adc [ebx+edx*4],eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
-
- #ifdef __GNUC__
- uint dummy, dummy2;
- // this part should be compiled with gcc
-
- __asm__ __volatile__(
- "xorl %%edx, %%edx \n"
- "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "movl (%%esi,%%edx,4), %%eax \n"
- "adcl %%eax, (%%ebx,%%edx,4) \n"
-
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1), "S" (p2)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Add", c)
- return c;
- }
- /*!
- adding one word (at a specific position)
- and returning a carry (if it has been)
- e.g.
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- AddInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 + 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::AddInt(uint value, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- mov ecx, [b]
- sub ecx, [index]
- mov edx, [index]
- mov ebx, [p1]
- mov eax, [value]
- ttmath_loop:
- add [ebx+edx*4], eax
- jnc ttmath_end
- mov eax, 1
- inc edx
- dec ecx
- jnz ttmath_loop
- ttmath_end:
- setc al
- movzx edx, al
- mov [c], edx
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
-
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
-
- "subl %%edx, %%ecx \n"
- "1: \n"
- "addl %%eax, (%%ebx,%%edx,4) \n"
- "jnc 2f \n"
-
- "movl $1, %%eax \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%edx \n"
- : "=d" (c), "=a" (dummy), "=c" (dummy2)
- : "0" (index), "1" (value), "2" (b), "b" (p1)
- : "cc", "memory" );
- #endif
-
- TTMATH_LOGC("UInt::AddInt", c)
- return c;
- }
- /*!
- adding only two unsigned words to the existing value
- and these words begin on the 'index' position
- (it's used in the multiplication algorithm 2)
- index should be equal or smaller than value_size-2 (index <= value_size-2)
- x1 - lower word, x2 - higher word
- for example if we've got value_size equal 4 and:
- table[0] = 3
- table[1] = 4
- table[2] = 5
- table[3] = 6
- then let
- x1 = 10
- x2 = 20
- and
- index = 1
- the result of this method will be:
- table[0] = 3
- table[1] = 4 + x1 = 14
- table[2] = 5 + x2 = 25
- table[3] = 6
-
- and no carry at the end of table[3]
- (of course if there was a carry in table[2](5+20) then
- this carry would be passed to the table[3] etc.)
- */
- template<uint value_size>
- uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size - 1 )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- mov ecx, [b]
- sub ecx, [index]
- mov ebx, [p1]
- mov edx, [index]
- mov eax, [x1]
- add [ebx+edx*4], eax
- inc edx
- dec ecx
- mov eax, [x2]
-
- ttmath_loop:
- adc [ebx+edx*4], eax
- jnc ttmath_end
- mov eax, 0
- inc edx
- dec ecx
- jnz ttmath_loop
- ttmath_end:
- setc al
- movzx edx, al
- mov [c], edx
-
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
-
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
-
- "subl %%edx, %%ecx \n"
-
- "addl %%esi, (%%ebx,%%edx,4) \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "1: \n"
- "adcl %%eax, (%%ebx,%%edx,4) \n"
- "jnc 2f \n"
- "mov $0, %%eax \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%eax \n"
- : "=a" (c), "=c" (dummy), "=d" (dummy2)
- : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::AddTwoInts", c)
-
- return c;
- }
- /*!
- this static method addes one vector to the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5+1
- 4 3 4+3
- 2 7 2+7
- 6 6
- 9 9
- of course the carry is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- TTMATH_ASSERT( ss1_size >= ss2_size )
- uint rest = ss1_size - ss2_size;
- uint c;
- #ifndef __GNUC__
- // this part might be compiled with for example visual c
- __asm
- {
- pushad
- mov ecx, [ss2_size]
- xor edx, edx // edx = 0, cf = 0
- mov esi, [ss1]
- mov ebx, [ss2]
- mov edi, [result]
- ttmath_loop:
- mov eax, [esi+edx*4]
- adc eax, [ebx+edx*4]
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx // ecx has the cf state
- mov ebx, [rest]
- or ebx, ebx
- jz ttmath_end
-
- xor ebx, ebx // ebx = 0
- neg ecx // setting cf from ecx
- mov ecx, [rest] // ecx is != 0
-
- ttmath_loop2:
- mov eax, [esi+edx*4]
- adc eax, ebx
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop2
- adc ecx, ecx
- ttmath_end:
- mov [c], ecx
- popad
- }
- #endif
-
- #ifdef __GNUC__
-
- // this part should be compiled with gcc
- uint dummy1, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%edx \n"
- "xor %%edx, %%edx \n" // edx = 0, cf = 0
- "1: \n"
- "mov (%%esi,%%edx,4), %%eax \n"
- "adc (%%ebx,%%edx,4), %%eax \n"
- "mov %%eax, (%%edi,%%edx,4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n" // ecx has the cf state
- "pop %%eax \n" // eax = rest
- "or %%eax, %%eax \n"
- "jz 3f \n"
-
- "xor %%ebx, %%ebx \n" // ebx = 0
- "neg %%ecx \n" // setting cf from ecx
- "mov %%eax, %%ecx \n" // ecx=rest and is != 0
- "2: \n"
- "mov (%%esi, %%edx, 4), %%eax \n"
- "adc %%ebx, %%eax \n"
- "mov %%eax, (%%edi, %%edx, 4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 2b \n"
- "adc %%ecx, %%ecx \n"
- "3: \n"
- : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
- : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
- : "cc", "memory" );
- #endif
- TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
- return c;
- }
- /*!
- subtracting ss2 from the 'this' and subtracting
- carry if it has been defined
- (this = this - ss2 - c)
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it has been)
- */
- template<uint value_size>
- uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- uint * p2 = const_cast<uint*>(ss2.table);
- // we don't have to use TTMATH_REFERENCE_ASSERT here
- // this algorithm doesn't require it
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- mov ecx,[b]
-
- mov ebx,[p1]
- mov esi,[p2]
- xor edx,edx // edx=0
- mov eax,[c]
- neg eax // CF=1 if rax!=0 , CF=0 if rax==0
- ttmath_loop:
- mov eax,[esi+edx*4]
- sbb [ebx+edx*4],eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "xorl %%edx, %%edx \n"
- "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "movl (%%esi,%%edx,4), %%eax \n"
- "sbbl %%eax, (%%ebx,%%edx,4) \n"
-
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1), "S" (p2)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Sub", c)
- return c;
- }
- /*!
- this method subtracts one word (at a specific position)
- and returns a carry (if it was)
- e.g.
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- SubInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 - 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::SubInt(uint value, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- mov ecx, [b]
- sub ecx, [index]
- mov edx, [index]
- mov ebx, [p1]
- mov eax, [value]
- ttmath_loop:
- sub [ebx+edx*4], eax
- jnc ttmath_end
- mov eax, 1
- inc edx
- dec ecx
- jnz ttmath_loop
- ttmath_end:
- setc al
- movzx edx, al
- mov [c], edx
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
-
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
-
- "subl %%edx, %%ecx \n"
- "1: \n"
- "subl %%eax, (%%ebx,%%edx,4) \n"
- "jnc 2f \n"
-
- "movl $1, %%eax \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%edx \n"
- : "=d" (c), "=a" (dummy), "=c" (dummy2)
- : "0" (index), "1" (value), "2" (b), "b" (p1)
- : "cc", "memory" );
- #endif
-
- TTMATH_LOGC("UInt::SubInt", c)
-
- return c;
- }
- /*!
- this static method subtractes one vector from the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5-1
- 4 3 4-3
- 2 7 2-7
- 6 6-1 (the borrow from previous item)
- 9 9
- return (carry): 0
- of course the carry (borrow) is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- TTMATH_ASSERT( ss1_size >= ss2_size )
- uint rest = ss1_size - ss2_size;
- uint c;
- #ifndef __GNUC__
-
- // this part might be compiled with for example visual c
- /*
- the asm code is nearly the same as in AddVector
- only two instructions 'adc' are changed to 'sbb'
- */
- __asm
- {
- pushad
- mov ecx, [ss2_size]
- xor edx, edx // edx = 0, cf = 0
- mov esi, [ss1]
- mov ebx, [ss2]
- mov edi, [result]
- ttmath_loop:
- mov eax, [esi+edx*4]
- sbb eax, [ebx+edx*4]
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx // ecx has the cf state
- mov ebx, [rest]
- or ebx, ebx
- jz ttmath_end
-
- xor ebx, ebx // ebx = 0
- neg ecx // setting cf from ecx
- mov ecx, [rest] // ecx is != 0
- ttmath_loop2:
- mov eax, [esi+edx*4]
- sbb eax, ebx
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop2
- adc ecx, ecx
- ttmath_end:
- mov [c], ecx
- popad
- }
- #endif
-
- #ifdef __GNUC__
-
- // this part should be compiled with gcc
- uint dummy1, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%edx \n"
- "xor %%edx, %%edx \n" // edx = 0, cf = 0
- "1: \n"
- "mov (%%esi,%%edx,4), %%eax \n"
- "sbb (%%ebx,%%edx,4), %%eax \n"
- "mov %%eax, (%%edi,%%edx,4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n" // ecx has the cf state
- "pop %%eax \n" // eax = rest
- "or %%eax, %%eax \n"
- "jz 3f \n"
-
- "xor %%ebx, %%ebx \n" // ebx = 0
- "neg %%ecx \n" // setting cf from ecx
- "mov %%eax, %%ecx \n" // ecx=rest and is != 0
- "2: \n"
- "mov (%%esi, %%edx, 4), %%eax \n"
- "sbb %%ebx, %%eax \n"
- "mov %%eax, (%%edi, %%edx, 4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 2b \n"
- "adc %%ecx, %%ecx \n"
- "3: \n"
- : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
- : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
- : "cc", "memory" );
- #endif
- TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
- return c;
- }
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bit* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2_one(uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push ebx
- push ecx
- push edx
- mov ebx, [p1]
- xor edx, edx
- mov ecx, [c]
- neg ecx
- mov ecx, [b]
- ttmath_loop:
- rcl dword ptr [ebx+edx*4], 1
-
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
-
- pop edx
- pop ecx
- pop ebx
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "xorl %%edx, %%edx \n" // edx=0
- "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
- "1: \n"
- "rcll $1, (%%ebx, %%edx, 4) \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adcl %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcl2_one", c)
- return c;
- }
- /*!
- this method moves all bits into the right hand side
- c -> this -> return value
- the highest *bit* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2_one(uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push ebx
- push ecx
- mov ebx, [p1]
- mov ecx, [c]
- neg ecx
- mov ecx, [b]
- ttmath_loop:
- rcr dword ptr [ebx+ecx*4-4], 1
-
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
- pop ecx
- pop ebx
- }
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ __volatile__(
- "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
- "1: \n"
- "rcrl $1, -4(%%ebx, %%ecx, 4) \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adcl %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy)
- : "0" (b), "1" (c), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcr2_one", c)
- return c;
- }
- #ifdef _MSC_VER
- #pragma warning (disable : 4731)
- //warning C4731: frame pointer register 'ebp' modified by inline assembly code
- #endif
-
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bits* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2(uint bits, uint c)
- {
- TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
-
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- push edi
- push ebp
- mov edi, [b]
- mov ecx, 32
- sub ecx, [bits]
- mov edx, -1
- shr edx, cl
- mov ecx, [bits]
- mov ebx, [p1]
- mov eax, [c]
- mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
- xor edx, edx // edx = 0
- mov esi, edx
- or eax, eax
- cmovnz esi, ebp // if(c) esi=mask else esi=0
- ttmath_loop:
- rol dword ptr [ebx+edx*4], cl
-
- mov eax, [ebx+edx*4]
- and eax, ebp
- xor [ebx+edx*4], eax // clearing bits
- or [ebx+edx*4], esi // saving old value
- mov esi, eax
- inc edx
- dec edi
- jnz ttmath_loop
- pop ebp // restoring ebp
- and eax, 1
- mov [c], eax
- pop edi
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%ebp \n"
-
- "movl %%ecx, %%esi \n"
- "movl $32, %%ecx \n"
- "subl %%esi, %%ecx \n" // ecx = 32 - bits
- "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
- "shrl %%cl, %%edx \n" // shifting (0 -> edx -> cf) (cl times)
- "movl %%edx, %%ebp \n" // ebp = edx = mask
- "movl %%esi, %%ecx \n"
- "xorl %%edx, %%edx \n"
- "movl %%edx, %%esi \n"
- "orl %%eax, %%eax \n"
- "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
- "1: \n"
- "roll %%cl, (%%ebx,%%edx,4) \n"
- "movl (%%ebx,%%edx,4), %%eax \n"
- "andl %%ebp, %%eax \n"
- "xorl %%eax, (%%ebx,%%edx,4) \n"
- "orl %%esi, (%%ebx,%%edx,4) \n"
- "movl %%eax, %%esi \n"
-
- "incl %%edx \n"
- "decl %%edi \n"
- "jnz 1b \n"
-
- "and $1, %%eax \n"
- "pop %%ebp \n"
- : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
- : "0" (c), "1" (b), "b" (p1), "c" (bits)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcl2", c)
- return c;
- }
- /*!
- this method moves all bits into the right hand side
- C -> this -> return value
- the highest *bits* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2(uint bits, uint c)
- {
- TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- push edi
- push ebp
- mov edi, [b]
- mov ecx, 32
- sub ecx, [bits]
- mov edx, -1
- shl edx, cl
- mov ecx, [bits]
- mov ebx, [p1]
- mov eax, [c]
- mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
- xor edx, edx // edx = 0
- mov esi, edx
- add edx, edi
- dec edx // edx is pointing at the end of the table (on last word)
- or eax, eax
- cmovnz esi, ebp // if(c) esi=mask else esi=0
- ttmath_loop:
- ror dword ptr [ebx+edx*4], cl
-
- mov eax, [ebx+edx*4]
- and eax, ebp
- xor [ebx+edx*4], eax // clearing bits
- or [ebx+edx*4], esi // saving old value
- mov esi, eax
- dec edx
- dec edi
- jnz ttmath_loop
- pop ebp // restoring ebp
- rol eax, 1 // 31bit will be first
- and eax, 1
- mov [c], eax
- pop edi
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%ebp \n"
-
- "movl %%ecx, %%esi \n"
- "movl $32, %%ecx \n"
- "subl %%esi, %%ecx \n" // ecx = 32 - bits
- "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
- "shll %%cl, %%edx \n" // shifting (cf <- edx <- 0) (cl times)
- "movl %%edx, %%ebp \n" // ebp = edx = mask
- "movl %%esi, %%ecx \n"
- "xorl %%edx, %%edx \n"
- "movl %%edx, %%esi \n"
- "addl %%edi, %%edx \n"
- "decl %%edx \n" // edx is pointing at the end of the table (on last word)
- "orl %%eax, %%eax \n"
- "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
- "1: \n"
- "rorl %%cl, (%%ebx,%%edx,4) \n"
- "movl (%%ebx,%%edx,4), %%eax \n"
- "andl %%ebp, %%eax \n"
- "xorl %%eax, (%%ebx,%%edx,4) \n"
- "orl %%esi, (%%ebx,%%edx,4) \n"
- "movl %%eax, %%esi \n"
-
- "decl %%edx \n"
- "decl %%edi \n"
- "jnz 1b \n"
-
- "roll $1, %%eax \n"
- "andl $1, %%eax \n"
- "pop %%ebp \n"
- : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
- : "0" (c), "1" (b), "b" (p1), "c" (bits)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcr2", c)
- return c;
- }
- #ifdef _MSC_VER
- #pragma warning (default : 4731)
- #endif
- /*
- this method returns the number of the highest set bit in one 32-bit word
- if the 'x' is zero this method returns '-1'
- */
- template<uint value_size>
- sint UInt<value_size>::FindLeadingBitInWord(uint x)
- {
- sint result;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov edx,-1
- bsr eax,[x]
- cmovz eax,edx
- mov [result], eax
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ (
- "movl $-1, %1 \n"
- "bsrl %2, %0 \n"
- "cmovz %1, %0 \n"
- : "=r" (result), "=&r" (dummy)
- : "r" (x)
- : "cc" );
- #endif
- return result;
- }
- /*
- this method returns the number of the smallest set bit in one 32-bit word
- if the 'x' is zero this method returns '-1'
- */
- template<uint value_size>
- sint UInt<value_size>::FindLowestBitInWord(uint x)
- {
- sint result;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov edx,-1
- bsf eax,[x]
- cmovz eax,edx
- mov [result], eax
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ (
- "movl $-1, %1 \n"
- "bsfl %2, %0 \n"
- "cmovz %1, %0 \n"
- : "=r" (result), "=&r" (dummy)
- : "r" (x)
- : "cc" );
- #endif
- return result;
- }
- /*!
- this method sets a special bit in the 'value'
- and returns the last state of the bit (zero or one)
- bit is from <0,31>
- e.g.
- uint x = 100;
- uint bit = SetBitInWord(x, 3);
- now: x = 108 and bit = 0
- */
- template<uint value_size>
- uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
- {
- TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
- uint old_bit;
- uint v = value;
- #ifndef __GNUC__
- __asm
- {
- push ebx
- push eax
- mov eax, [v]
- mov ebx, [bit]
- bts eax, ebx
- mov [v], eax
- setc bl
- movzx ebx, bl
- mov [old_bit], ebx
- pop eax
- pop ebx
- }
- #endif
- #ifdef __GNUC__
- __asm__ (
- "btsl %%ebx, %%eax \n"
- "setc %%bl \n"
- "movzx %%bl, %%ebx \n"
-
- : "=a" (v), "=b" (old_bit)
- : "0" (v), "1" (bit)
- : "cc" );
- #endif
- value = v;
- return old_bit;
- }
- /*!
- multiplication: result_high:result_low = a * b
- result_high - higher word of the result
- result_low - lower word of the result
-
- this methos never returns a carry
- this method is used in the second version of the multiplication algorithms
- */
- template<uint value_size>
- void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
- {
- /*
- we must use these temporary variables in order to inform the compilator
- that value pointed with result1 and result2 has changed
- this has no effect in visual studio but it's useful when
- using gcc and options like -Ox
- */
- uint result1_;
- uint result2_;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov eax, [a]
- mul dword ptr [b]
- mov [result2_], edx
- mov [result1_], eax
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- __asm__ (
-
- "mull %%edx \n"
- : "=a" (result1_), "=d" (result2_)
- : "0" (a), "1" (b)
- : "cc" );
- #endif
- *result_low = result1_;
- *result_high = result2_;
- }
- /*!
- *
- * Division
- *
- *
- */
-
- /*!
- this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
- r = a:b / c and rest - remainder
- *
- * WARNING:
- * if r (one word) is too small for the result or c is equal zero
- * there'll be a hardware interruption (0)
- * and probably the end of your program
- *
- */
- template<uint value_size>
- void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
- {
- uint r_;
- uint rest_;
- /*
- these variables have similar meaning like those in
- the multiplication algorithm MulTwoWords
- */
- TTMATH_ASSERT( c != 0 )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov edx, [a]
- mov eax, [b]
- div dword ptr [c]
- mov [r_], eax
- mov [rest_], edx
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
-
- __asm__ (
- "divl %%ecx \n"
- : "=a" (r_), "=d" (rest_)
- : "0" (b), "1" (a), "c" (c)
- : "cc" );
- #endif
- *r = r_;
- *rest = rest_;
- }
- } //namespace
- #endif //ifdef TTMATH_PLATFORM32
- #endif //ifndef TTMATH_NOASM
- #endif
|