| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669 |
- /*
- * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #ifndef TYPEARITH_H
- #define TYPEARITH_H
- #include "mpdecimal.h"
- /*****************************************************************************/
- /* Low level native arithmetic on basic types */
- /*****************************************************************************/
- /** ------------------------------------------------------------
- ** Double width multiplication and division
- ** ------------------------------------------------------------
- */
- #if defined(CONFIG_64)
- #if defined(ANSI)
- #if defined(HAVE_UINT128_T)
- static inline void
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- __uint128_t hl;
- hl = (__uint128_t)a * b;
- *hi = hl >> 64;
- *lo = (mpd_uint_t)hl;
- }
- static inline void
- _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
- mpd_uint_t d)
- {
- __uint128_t hl;
- hl = ((__uint128_t)hi<<64) + lo;
- *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
- *r = (mpd_uint_t)(hl - (__uint128_t)(*q) * d);
- }
- #else
- static inline void
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- uint32_t w[4], carry;
- uint32_t ah, al, bh, bl;
- uint64_t hl;
- ah = (uint32_t)(a>>32); al = (uint32_t)a;
- bh = (uint32_t)(b>>32); bl = (uint32_t)b;
- hl = (uint64_t)al * bl;
- w[0] = (uint32_t)hl;
- carry = (uint32_t)(hl>>32);
- hl = (uint64_t)ah * bl + carry;
- w[1] = (uint32_t)hl;
- w[2] = (uint32_t)(hl>>32);
- hl = (uint64_t)al * bh + w[1];
- w[1] = (uint32_t)hl;
- carry = (uint32_t)(hl>>32);
- hl = ((uint64_t)ah * bh + w[2]) + carry;
- w[2] = (uint32_t)hl;
- w[3] = (uint32_t)(hl>>32);
- *hi = ((uint64_t)w[3]<<32) + w[2];
- *lo = ((uint64_t)w[1]<<32) + w[0];
- }
- /*
- * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
- * http://www.hackersdelight.org/permissions.htm:
- * "You are free to use, copy, and distribute any of the code on this web
- * site, whether modified by you or not. You need not give attribution."
- *
- * Slightly modified, comments are mine.
- */
- static inline int
- nlz(uint64_t x)
- {
- int n;
- if (x == 0) return(64);
- n = 0;
- if (x <= 0x00000000FFFFFFFF) {n = n +32; x = x <<32;}
- if (x <= 0x0000FFFFFFFFFFFF) {n = n +16; x = x <<16;}
- if (x <= 0x00FFFFFFFFFFFFFF) {n = n + 8; x = x << 8;}
- if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;}
- if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;}
- if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;}
- return n;
- }
- static inline void
- _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
- mpd_uint_t v)
- {
- const mpd_uint_t b = 4294967296;
- mpd_uint_t un1, un0,
- vn1, vn0,
- q1, q0,
- un32, un21, un10,
- rhat, t;
- int s;
- assert(u1 < v);
- s = nlz(v);
- v = v << s;
- vn1 = v >> 32;
- vn0 = v & 0xFFFFFFFF;
- t = (s == 0) ? 0 : u0 >> (64 - s);
- un32 = (u1 << s) | t;
- un10 = u0 << s;
- un1 = un10 >> 32;
- un0 = un10 & 0xFFFFFFFF;
- q1 = un32 / vn1;
- rhat = un32 - q1*vn1;
- again1:
- if (q1 >= b || q1*vn0 > b*rhat + un1) {
- q1 = q1 - 1;
- rhat = rhat + vn1;
- if (rhat < b) goto again1;
- }
- /*
- * Before again1 we had:
- * (1) q1*vn1 + rhat = un32
- * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
- *
- * The statements inside the if-clause do not change the value
- * of the left-hand side of (2), and the loop is only exited
- * if q1*vn0 <= rhat*b + un1, so:
- *
- * (3) q1*vn1*b + q1*vn0 <= un32*b + un1
- * (4) q1*v <= un32*b + un1
- * (5) 0 <= un32*b + un1 - q1*v
- *
- * By (5) we are certain that the possible add-back step from
- * Knuth's algorithm D is never required.
- *
- * Since the final quotient is less than 2**64, the following
- * must be true:
- *
- * (6) un32*b + un1 - q1*v <= UINT64_MAX
- *
- * This means that in the following line, the high words
- * of un32*b and q1*v can be discarded without any effect
- * on the result.
- */
- un21 = un32*b + un1 - q1*v;
- q0 = un21 / vn1;
- rhat = un21 - q0*vn1;
- again2:
- if (q0 >= b || q0*vn0 > b*rhat + un0) {
- q0 = q0 - 1;
- rhat = rhat + vn1;
- if (rhat < b) goto again2;
- }
- *q = q1*b + q0;
- *r = (un21*b + un0 - q0*v) >> s;
- }
- #endif
- /* END ANSI */
- #elif defined(ASM)
- static inline void
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- mpd_uint_t h, l;
- __asm__ ( "mulq %3\n\t"
- : "=d" (h), "=a" (l)
- : "%a" (a), "rm" (b)
- : "cc"
- );
- *hi = h;
- *lo = l;
- }
- static inline void
- _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
- mpd_uint_t d)
- {
- mpd_uint_t qq, rr;
- __asm__ ( "divq %4\n\t"
- : "=a" (qq), "=d" (rr)
- : "a" (lo), "d" (hi), "rm" (d)
- : "cc"
- );
- *q = qq;
- *r = rr;
- }
- /* END GCC ASM */
- #elif defined(MASM)
- #include <intrin.h>
- #pragma intrinsic(_umul128)
- static inline void
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- *lo = _umul128(a, b, hi);
- }
- void _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
- mpd_uint_t d);
- /* END MASM (_MSC_VER) */
- #else
- #error "need platform specific 128 bit multiplication and division"
- #endif
- #define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
- static inline void
- _mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
- {
- assert(exp <= 19);
- if (exp <= 9) {
- if (exp <= 4) {
- switch (exp) {
- case 0: *q = v; *r = 0; break;
- case 1: DIVMOD(q, r, v, 10UL); break;
- case 2: DIVMOD(q, r, v, 100UL); break;
- case 3: DIVMOD(q, r, v, 1000UL); break;
- case 4: DIVMOD(q, r, v, 10000UL); break;
- }
- }
- else {
- switch (exp) {
- case 5: DIVMOD(q, r, v, 100000UL); break;
- case 6: DIVMOD(q, r, v, 1000000UL); break;
- case 7: DIVMOD(q, r, v, 10000000UL); break;
- case 8: DIVMOD(q, r, v, 100000000UL); break;
- case 9: DIVMOD(q, r, v, 1000000000UL); break;
- }
- }
- }
- else {
- if (exp <= 14) {
- switch (exp) {
- case 10: DIVMOD(q, r, v, 10000000000ULL); break;
- case 11: DIVMOD(q, r, v, 100000000000ULL); break;
- case 12: DIVMOD(q, r, v, 1000000000000ULL); break;
- case 13: DIVMOD(q, r, v, 10000000000000ULL); break;
- case 14: DIVMOD(q, r, v, 100000000000000ULL); break;
- }
- }
- else {
- switch (exp) {
- case 15: DIVMOD(q, r, v, 1000000000000000ULL); break;
- case 16: DIVMOD(q, r, v, 10000000000000000ULL); break;
- case 17: DIVMOD(q, r, v, 100000000000000000ULL); break;
- case 18: DIVMOD(q, r, v, 1000000000000000000ULL); break;
- case 19: DIVMOD(q, r, v, 10000000000000000000ULL); break; /* GCOV_NOT_REACHED */
- }
- }
- }
- }
- /* END CONFIG_64 */
- #elif defined(CONFIG_32)
- #if defined(ANSI)
- #if !defined(LEGACY_COMPILER)
- static inline void
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- mpd_uuint_t hl;
- hl = (mpd_uuint_t)a * b;
- *hi = hl >> 32;
- *lo = (mpd_uint_t)hl;
- }
- static inline void
- _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
- mpd_uint_t d)
- {
- mpd_uuint_t hl;
- hl = ((mpd_uuint_t)hi<<32) + lo;
- *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
- *r = (mpd_uint_t)(hl - (mpd_uuint_t)(*q) * d);
- }
- /* END ANSI + uint64_t */
- #else
- static inline void
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- uint16_t w[4], carry;
- uint16_t ah, al, bh, bl;
- uint32_t hl;
- ah = (uint16_t)(a>>16); al = (uint16_t)a;
- bh = (uint16_t)(b>>16); bl = (uint16_t)b;
- hl = (uint32_t)al * bl;
- w[0] = (uint16_t)hl;
- carry = (uint16_t)(hl>>16);
- hl = (uint32_t)ah * bl + carry;
- w[1] = (uint16_t)hl;
- w[2] = (uint16_t)(hl>>16);
- hl = (uint32_t)al * bh + w[1];
- w[1] = (uint16_t)hl;
- carry = (uint16_t)(hl>>16);
- hl = ((uint32_t)ah * bh + w[2]) + carry;
- w[2] = (uint16_t)hl;
- w[3] = (uint16_t)(hl>>16);
- *hi = ((uint32_t)w[3]<<16) + w[2];
- *lo = ((uint32_t)w[1]<<16) + w[0];
- }
- /*
- * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
- * http://www.hackersdelight.org/permissions.htm:
- * "You are free to use, copy, and distribute any of the code on this web
- * site, whether modified by you or not. You need not give attribution."
- *
- * Slightly modified, comments are mine.
- */
- static inline int
- nlz(uint32_t x)
- {
- int n;
- if (x == 0) return(32);
- n = 0;
- if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
- if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
- if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
- if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
- if (x <= 0x7FFFFFFF) {n = n + 1;}
- return n;
- }
- static inline void
- _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
- mpd_uint_t v)
- {
- const mpd_uint_t b = 65536;
- mpd_uint_t un1, un0,
- vn1, vn0,
- q1, q0,
- un32, un21, un10,
- rhat, t;
- int s;
- assert(u1 < v);
- s = nlz(v);
- v = v << s;
- vn1 = v >> 16;
- vn0 = v & 0xFFFF;
- t = (s == 0) ? 0 : u0 >> (32 - s);
- un32 = (u1 << s) | t;
- un10 = u0 << s;
- un1 = un10 >> 16;
- un0 = un10 & 0xFFFF;
- q1 = un32 / vn1;
- rhat = un32 - q1*vn1;
- again1:
- if (q1 >= b || q1*vn0 > b*rhat + un1) {
- q1 = q1 - 1;
- rhat = rhat + vn1;
- if (rhat < b) goto again1;
- }
- /*
- * Before again1 we had:
- * (1) q1*vn1 + rhat = un32
- * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
- *
- * The statements inside the if-clause do not change the value
- * of the left-hand side of (2), and the loop is only exited
- * if q1*vn0 <= rhat*b + un1, so:
- *
- * (3) q1*vn1*b + q1*vn0 <= un32*b + un1
- * (4) q1*v <= un32*b + un1
- * (5) 0 <= un32*b + un1 - q1*v
- *
- * By (5) we are certain that the possible add-back step from
- * Knuth's algorithm D is never required.
- *
- * Since the final quotient is less than 2**32, the following
- * must be true:
- *
- * (6) un32*b + un1 - q1*v <= UINT32_MAX
- *
- * This means that in the following line, the high words
- * of un32*b and q1*v can be discarded without any effect
- * on the result.
- */
- un21 = un32*b + un1 - q1*v;
- q0 = un21 / vn1;
- rhat = un21 - q0*vn1;
- again2:
- if (q0 >= b || q0*vn0 > b*rhat + un0) {
- q0 = q0 - 1;
- rhat = rhat + vn1;
- if (rhat < b) goto again2;
- }
- *q = q1*b + q0;
- *r = (un21*b + un0 - q0*v) >> s;
- }
- #endif /* END ANSI + LEGACY_COMPILER */
- /* END ANSI */
- #elif defined(ASM)
- static inline void
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- mpd_uint_t h, l;
- __asm__ ( "mull %3\n\t"
- : "=d" (h), "=a" (l)
- : "%a" (a), "rm" (b)
- : "cc"
- );
- *hi = h;
- *lo = l;
- }
- static inline void
- _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
- mpd_uint_t d)
- {
- mpd_uint_t qq, rr;
- __asm__ ( "divl %4\n\t"
- : "=a" (qq), "=d" (rr)
- : "a" (lo), "d" (hi), "rm" (d)
- : "cc"
- );
- *q = qq;
- *r = rr;
- }
- /* END GCC ASM */
- #elif defined(MASM)
- static inline void __cdecl
- _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
- {
- mpd_uint_t h, l;
- __asm {
- mov eax, a
- mul b
- mov h, edx
- mov l, eax
- }
- *hi = h;
- *lo = l;
- }
- static inline void __cdecl
- _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
- mpd_uint_t d)
- {
- mpd_uint_t qq, rr;
- __asm {
- mov eax, lo
- mov edx, hi
- div d
- mov qq, eax
- mov rr, edx
- }
- *q = qq;
- *r = rr;
- }
- /* END MASM (_MSC_VER) */
- #else
- #error "need platform specific 64 bit multiplication and division"
- #endif
- #define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
- static inline void
- _mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
- {
- assert(exp <= 9);
- if (exp <= 4) {
- switch (exp) {
- case 0: *q = v; *r = 0; break;
- case 1: DIVMOD(q, r, v, 10UL); break;
- case 2: DIVMOD(q, r, v, 100UL); break;
- case 3: DIVMOD(q, r, v, 1000UL); break;
- case 4: DIVMOD(q, r, v, 10000UL); break;
- }
- }
- else {
- switch (exp) {
- case 5: DIVMOD(q, r, v, 100000UL); break;
- case 6: DIVMOD(q, r, v, 1000000UL); break;
- case 7: DIVMOD(q, r, v, 10000000UL); break;
- case 8: DIVMOD(q, r, v, 100000000UL); break;
- case 9: DIVMOD(q, r, v, 1000000000UL); break; /* GCOV_NOT_REACHED */
- }
- }
- }
- /* END CONFIG_32 */
- /* NO CONFIG */
- #else
- #error "define CONFIG_64 or CONFIG_32"
- #endif /* CONFIG */
- static inline void
- _mpd_div_word(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t d)
- {
- *q = v / d;
- *r = v - *q * d;
- }
- static inline void
- _mpd_idiv_word(mpd_ssize_t *q, mpd_ssize_t *r, mpd_ssize_t v, mpd_ssize_t d)
- {
- *q = v / d;
- *r = v - *q * d;
- }
- /** ------------------------------------------------------------
- ** Arithmetic with overflow checking
- ** ------------------------------------------------------------
- */
- /* The following macros do call exit() in case of an overflow.
- If the library is used correctly (i.e. with valid context
- parameters), such overflows cannot occur. The macros are used
- as sanity checks in a couple of strategic places and should
- be viewed as a handwritten version of gcc's -ftrapv option. */
- static inline mpd_size_t
- add_size_t(mpd_size_t a, mpd_size_t b)
- {
- if (a > MPD_SIZE_MAX - b) {
- mpd_err_fatal("add_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
- }
- return a + b;
- }
- static inline mpd_size_t
- sub_size_t(mpd_size_t a, mpd_size_t b)
- {
- if (b > a) {
- mpd_err_fatal("sub_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
- }
- return a - b;
- }
- #if MPD_SIZE_MAX != MPD_UINT_MAX
- #error "adapt mul_size_t() and mulmod_size_t()"
- #endif
- static inline mpd_size_t
- mul_size_t(mpd_size_t a, mpd_size_t b)
- {
- mpd_uint_t hi, lo;
- _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
- if (hi) {
- mpd_err_fatal("mul_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
- }
- return lo;
- }
- static inline mpd_size_t
- add_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
- {
- mpd_size_t ret;
- *overflow = 0;
- ret = a + b;
- if (ret < a) *overflow = 1;
- return ret;
- }
- static inline mpd_size_t
- mul_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
- {
- mpd_uint_t lo;
- _mpd_mul_words((mpd_uint_t *)overflow, &lo, (mpd_uint_t)a,
- (mpd_uint_t)b);
- return lo;
- }
- static inline mpd_ssize_t
- mod_mpd_ssize_t(mpd_ssize_t a, mpd_ssize_t m)
- {
- mpd_ssize_t r = a % m;
- return (r < 0) ? r + m : r;
- }
- static inline mpd_size_t
- mulmod_size_t(mpd_size_t a, mpd_size_t b, mpd_size_t m)
- {
- mpd_uint_t hi, lo;
- mpd_uint_t q, r;
- _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
- _mpd_div_words(&q, &r, hi, lo, (mpd_uint_t)m);
- return r;
- }
- #endif /* TYPEARITH_H */
|