r128.h 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123
  1. /*
  2. r128.h: 128-bit (64.64) signed fixed-point arithmetic. Version 1.4.4
  3. COMPILATION
  4. -----------
  5. Drop this header file somewhere in your project and include it wherever it is
  6. needed. There is no separate .c file for this library. To get the code, in ONE
  7. file in your project, put:
  8. #define R128_IMPLEMENTATION
  9. before you include this file. You may also provide a definition for R128_ASSERT
  10. to force the library to use a custom assert macro.
  11. COMPILER/LIBRARY SUPPORT
  12. ------------------------
  13. This library requires a C89 compiler with support for 64-bit integers. If your
  14. compiler does not support the long long data type, the R128_U64, etc. macros
  15. must be set appropriately. On x86 and x64 targets, Intel intrinsics are used
  16. for speed. If your compiler does not support these intrinsics, you can add
  17. #define R128_STDC_ONLY
  18. in your implementation file before including r128.h.
  19. The only C runtime library functionality used by this library is <assert.h>.
  20. This can be avoided by defining an R128_ASSERT macro in your implementation
  21. file. Since this library uses 64-bit arithmetic, this may implicitly add a
  22. runtime library dependency on 32-bit platforms.
  23. C++ SUPPORT
  24. -----------
  25. Operator overloads are supplied for C++ files that include this file. Since all
  26. C++ functions are declared inline (or static inline), the R128_IMPLEMENTATION
  27. file can be either C++ or C.
  28. LICENSE
  29. -------
  30. This is free and unencumbered software released into the public domain.
  31. Anyone is free to copy, modify, publish, use, compile, sell, or
  32. distribute this software, either in source code form or as a compiled
  33. binary, for any purpose, commercial or non-commercial, and by any
  34. means.
  35. In jurisdictions that recognize copyright laws, the author or authors
  36. of this software dedicate any and all copyright interest in the
  37. software to the public domain. We make this dedication for the benefit
  38. of the public at large and to the detriment of our heirs and
  39. successors. We intend this dedication to be an overt act of
  40. relinquishment in perpetuity of all present and future rights to this
  41. software under copyright law.
  42. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  43. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  44. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  45. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  46. OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  47. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  48. OTHER DEALINGS IN THE SOFTWARE.
  49. */
  50. #ifndef H_R128_H
  51. #define H_R128_H
  52. #include <stddef.h>
  53. // 64-bit integer support
  54. // If your compiler does not have stdint.h, add appropriate defines for these macros.
  55. #if defined(_MSC_VER) && (_MSC_VER < 1600)
  56. # define R128_S32 __int32
  57. # define R128_U32 unsigned __int32
  58. # define R128_S64 __int64
  59. # define R128_U64 unsigned __int64
  60. # define R128_LIT_S64(x) x##i64
  61. # define R128_LIT_U64(x) x##ui64
  62. #else
  63. # include <stdint.h>
  64. # define R128_S32 int32_t
  65. # define R128_U32 uint32_t
  66. # define R128_S64 long long
  67. # define R128_U64 unsigned long long
  68. # define R128_LIT_S64(x) x##ll
  69. # define R128_LIT_U64(x) x##ull
  70. #endif
  71. #ifdef __cplusplus
  72. extern "C" {
  73. #endif
  74. typedef struct R128 {
  75. R128_U64 lo;
  76. R128_U64 hi;
  77. #ifdef __cplusplus
  78. R128();
  79. R128(double);
  80. R128(int);
  81. R128(R128_S64);
  82. R128(R128_U64 low, R128_U64 high);
  83. operator double() const;
  84. operator R128_S64() const;
  85. operator int() const;
  86. operator bool() const;
  87. bool operator!() const;
  88. R128 operator~() const;
  89. R128 operator-() const;
  90. R128 &operator|=(const R128 &rhs);
  91. R128 &operator&=(const R128 &rhs);
  92. R128 &operator^=(const R128 &rhs);
  93. R128 &operator+=(const R128 &rhs);
  94. R128 &operator-=(const R128 &rhs);
  95. R128 &operator*=(const R128 &rhs);
  96. R128 &operator/=(const R128 &rhs);
  97. R128 &operator%=(const R128 &rhs);
  98. R128 &operator<<=(int amount);
  99. R128 &operator>>=(int amount);
  100. #endif //__cplusplus
  101. } R128;
  102. // Type conversion
  103. extern void r128FromInt(R128 *dst, R128_S64 v);
  104. extern void r128FromFloat(R128 *dst, double v);
  105. extern R128_S64 r128ToInt(const R128 *v);
  106. extern double r128ToFloat(const R128 *v);
  107. // Copy
  108. extern void r128Copy(R128 *dst, const R128 *src);
  109. // Negate
  110. extern void r128Neg(R128 *dst, const R128 *src);
  111. // Bitwise operations
  112. extern void r128Not(R128 *dst, const R128 *src); // ~a
  113. extern void r128Or(R128 *dst, const R128 *a, const R128 *b); // a | b
  114. extern void r128And(R128 *dst, const R128 *a, const R128 *b); // a & b
  115. extern void r128Xor(R128 *dst, const R128 *a, const R128 *b); // a ^ b
  116. extern void r128Shl(R128 *dst, const R128 *src, int amount); // shift left by amount mod 128
  117. extern void r128Shr(R128 *dst, const R128 *src, int amount); // shift right logical by amount mod 128
  118. extern void r128Sar(R128 *dst, const R128 *src, int amount); // shift right arithmetic by amount mod 128
  119. // Arithmetic
  120. extern void r128Add(R128 *dst, const R128 *a, const R128 *b); // a + b
  121. extern void r128Sub(R128 *dst, const R128 *a, const R128 *b); // a - b
  122. extern void r128Mul(R128 *dst, const R128 *a, const R128 *b); // a * b
  123. extern void r128Div(R128 *dst, const R128 *a, const R128 *b); // a / b
  124. extern void r128Mod(R128 *dst, const R128 *a, const R128 *b); // a - toInt(a / b) * b
  125. extern void r128Sqrt(R128 *dst, const R128 *v); // sqrt(v)
  126. extern void r128Rsqrt(R128 *dst, const R128 *v); // 1 / sqrt(v)
  127. // Comparison
  128. extern int r128Cmp(const R128 *a, const R128 *b); // sign of a-b
  129. extern void r128Min(R128 *dst, const R128 *a, const R128 *b);
  130. extern void r128Max(R128 *dst, const R128 *a, const R128 *b);
  131. extern void r128Floor(R128 *dst, const R128 *v);
  132. extern void r128Ceil(R128 *dst, const R128 *v);
  133. extern int r128IsNeg(const R128 *v); // quick check for < 0
  134. // String conversion
  135. //
  136. typedef enum R128ToStringSign {
  137. R128ToStringSign_Default, // no sign character for positive values
  138. R128ToStringSign_Space, // leading space for positive values
  139. R128ToStringSign_Plus, // leading '+' for positive values
  140. } R128ToStringSign;
  141. // Formatting options for use with r128ToStringOpt. The "defaults" correspond
  142. // to a format string of "%f".
  143. //
  144. typedef struct R128ToStringFormat {
  145. // sign character for positive values. Default is R128ToStringSign_Default.
  146. R128ToStringSign sign;
  147. // minimum number of characters to write. Default is 0.
  148. int width;
  149. // place to the right of the decimal at which rounding is performed. If negative,
  150. // a maximum of 20 decimal places will be written, with no trailing zeroes.
  151. // (20 places is sufficient to ensure that r128FromString will convert back to the
  152. // original value.) Default is -1. NOTE: This is not the same default that the C
  153. // standard library uses for %f.
  154. int precision;
  155. // If non-zero, pads the output string with leading zeroes if the final result is
  156. // fewer than width characters. Otherwise, leading spaces are used. Default is 0.
  157. int zeroPad;
  158. // Always print a decimal point, even if the value is an integer. Default is 0.
  159. int decimal;
  160. // Left-align output if width specifier requires padding.
  161. // Default is 0 (right align).
  162. int leftAlign;
  163. } R128ToStringFormat;
  164. // r128ToStringOpt: convert R128 to a decimal string, with formatting.
  165. //
  166. // dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written
  167. // (including null terminator). No additional rounding is performed if dstSize is not large
  168. // enough to hold the entire string.
  169. //
  170. // opt: an R128ToStringFormat struct (q.v.) with formatting options.
  171. //
  172. // Uses the R128_decimal global as the decimal point character.
  173. // Always writes a null terminator, even if the destination buffer is not large enough.
  174. //
  175. // Number of bytes that will be written (i.e. how big does dst need to be?):
  176. // If width is specified: width + 1 bytes.
  177. // If precision is specified: at most precision + 22 bytes.
  178. // If neither is specified: at most 42 bytes.
  179. //
  180. // Returns the number of bytes that would have been written if dst was sufficiently large,
  181. // not including the final null terminator.
  182. //
  183. extern int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt);
  184. // r128ToStringf: convert R128 to a decimal string, with formatting.
  185. //
  186. // dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written
  187. // (including null terminator).
  188. //
  189. // format: a printf-style format specifier, as one would use with floating point types.
  190. // e.g. "%+5.2f". (The leading % and trailing f are optional.)
  191. // NOTE: This is NOT a full replacement for sprintf. Any characters in the format string
  192. // that do not correspond to a format placeholder are ignored.
  193. //
  194. // Uses the R128_decimal global as the decimal point character.
  195. // Always writes a null terminator, even if the destination buffer is not large enough.
  196. //
  197. // Number of bytes that will be written (i.e. how big does dst need to be?):
  198. // If the precision field is specified: at most max(width, precision + 21) + 1 bytes
  199. // Otherwise: at most max(width, 41) + 1 bytes.
  200. //
  201. // Returns the number of bytes that would have been written if dst was sufficiently large,
  202. // not including the final null terminator.
  203. //
  204. extern int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v);
  205. // r128ToString: convert R128 to a decimal string, with default formatting.
  206. // Equivalent to r128ToStringf(dst, dstSize, "%f", v).
  207. //
  208. // Uses the R128_decimal global as the decimal point character.
  209. // Always writes a null terminator, even if the destination buffer is not large enough.
  210. //
  211. // Will write at most 42 bytes (including NUL) to dst.
  212. //
  213. // Returns the number of bytes that would have been written if dst was sufficiently large,
  214. // not including the final null terminator.
  215. //
  216. extern int r128ToString(char *dst, size_t dstSize, const R128 *v);
  217. // r128FromString: Convert string to R128.
  218. //
  219. // The string can be formatted either as a decimal number with optional sign
  220. // or as hexadecimal with a prefix of 0x or 0X.
  221. //
  222. // endptr, if not NULL, is set to the character following the last character
  223. // used in the conversion.
  224. //
  225. extern void r128FromString(R128 *dst, const char *s, char **endptr);
  226. // Constants
  227. extern const R128 R128_min; // minimum (most negative) value
  228. extern const R128 R128_max; // maximum (most positive) value
  229. extern const R128 R128_smallest; // smallest positive value
  230. extern const R128 R128_zero; // zero
  231. extern const R128 R128_one; // 1.0
  232. extern char R128_decimal; // decimal point character used by r128From/ToString. defaults to '.'
  233. #ifdef __cplusplus
  234. }
  235. #include <limits>
  236. namespace std {
  237. template<>
  238. struct numeric_limits<R128>
  239. {
  240. static const bool is_specialized = true;
  241. static R128 min() throw() { return R128_min; }
  242. static R128 max() throw() { return R128_max; }
  243. static const int digits = 127;
  244. static const int digits10 = 38;
  245. static const bool is_signed = true;
  246. static const bool is_integer = false;
  247. static const bool is_exact = false;
  248. static const int radix = 2;
  249. static R128 epsilon() throw() { return R128_smallest; }
  250. static R128 round_error() throw() { return R128_one; }
  251. static const int min_exponent = 0;
  252. static const int min_exponent10 = 0;
  253. static const int max_exponent = 0;
  254. static const int max_exponent10 = 0;
  255. static const bool has_infinity = false;
  256. static const bool has_quiet_NaN = false;
  257. static const bool has_signaling_NaN = false;
  258. static const float_denorm_style has_denorm = denorm_absent;
  259. static const bool has_denorm_loss = false;
  260. static R128 infinity() throw() { return R128_zero; }
  261. static R128 quiet_NaN() throw() { return R128_zero; }
  262. static R128 signaling_NaN() throw() { return R128_zero; }
  263. static R128 denorm_min() throw() { return R128_zero; }
  264. static const bool is_iec559 = false;
  265. static const bool is_bounded = true;
  266. static const bool is_modulo = true;
  267. static const bool traps = numeric_limits<R128_U64>::traps;
  268. static const bool tinyness_before = false;
  269. static const float_round_style round_style = round_toward_zero;
  270. };
  271. } //namespace std
  272. inline R128::R128() {}
  273. inline R128::R128(double v)
  274. {
  275. r128FromFloat(this, v);
  276. }
  277. inline R128::R128(int v)
  278. {
  279. r128FromInt(this, v);
  280. }
  281. inline R128::R128(R128_S64 v)
  282. {
  283. r128FromInt(this, v);
  284. }
  285. inline R128::R128(R128_U64 low, R128_U64 high)
  286. {
  287. lo = low;
  288. hi = high;
  289. }
  290. inline R128::operator double() const
  291. {
  292. return r128ToFloat(this);
  293. }
  294. inline R128::operator R128_S64() const
  295. {
  296. return r128ToInt(this);
  297. }
  298. inline R128::operator int() const
  299. {
  300. return (int) r128ToInt(this);
  301. }
  302. inline R128::operator bool() const
  303. {
  304. return lo || hi;
  305. }
  306. inline bool R128::operator!() const
  307. {
  308. return !lo && !hi;
  309. }
  310. inline R128 R128::operator~() const
  311. {
  312. R128 r;
  313. r128Not(&r, this);
  314. return r;
  315. }
  316. inline R128 R128::operator-() const
  317. {
  318. R128 r;
  319. r128Neg(&r, this);
  320. return r;
  321. }
  322. inline R128 &R128::operator|=(const R128 &rhs)
  323. {
  324. r128Or(this, this, &rhs);
  325. return *this;
  326. }
  327. inline R128 &R128::operator&=(const R128 &rhs)
  328. {
  329. r128And(this, this, &rhs);
  330. return *this;
  331. }
  332. inline R128 &R128::operator^=(const R128 &rhs)
  333. {
  334. r128Xor(this, this, &rhs);
  335. return *this;
  336. }
  337. inline R128 &R128::operator+=(const R128 &rhs)
  338. {
  339. r128Add(this, this, &rhs);
  340. return *this;
  341. }
  342. inline R128 &R128::operator-=(const R128 &rhs)
  343. {
  344. r128Sub(this, this, &rhs);
  345. return *this;
  346. }
  347. inline R128 &R128::operator*=(const R128 &rhs)
  348. {
  349. r128Mul(this, this, &rhs);
  350. return *this;
  351. }
  352. inline R128 &R128::operator/=(const R128 &rhs)
  353. {
  354. r128Div(this, this, &rhs);
  355. return *this;
  356. }
  357. inline R128 &R128::operator%=(const R128 &rhs)
  358. {
  359. r128Mod(this, this, &rhs);
  360. return *this;
  361. }
  362. inline R128 &R128::operator<<=(int amount)
  363. {
  364. r128Shl(this, this, amount);
  365. return *this;
  366. }
  367. inline R128 &R128::operator>>=(int amount)
  368. {
  369. r128Sar(this, this, amount);
  370. return *this;
  371. }
  372. static inline R128 operator|(const R128 &lhs, const R128 &rhs)
  373. {
  374. R128 r(lhs);
  375. return r |= rhs;
  376. }
  377. static inline R128 operator&(const R128 &lhs, const R128 &rhs)
  378. {
  379. R128 r(lhs);
  380. return r &= rhs;
  381. }
  382. static inline R128 operator^(const R128 &lhs, const R128 &rhs)
  383. {
  384. R128 r(lhs);
  385. return r ^= rhs;
  386. }
  387. static inline R128 operator+(const R128 &lhs, const R128 &rhs)
  388. {
  389. R128 r(lhs);
  390. return r += rhs;
  391. }
  392. static inline R128 operator-(const R128 &lhs, const R128 &rhs)
  393. {
  394. R128 r(lhs);
  395. return r -= rhs;
  396. }
  397. static inline R128 operator*(const R128 &lhs, const R128 &rhs)
  398. {
  399. R128 r(lhs);
  400. return r *= rhs;
  401. }
  402. static inline R128 operator/(const R128 &lhs, const R128 &rhs)
  403. {
  404. R128 r(lhs);
  405. return r /= rhs;
  406. }
  407. static inline R128 operator%(const R128 &lhs, const R128 &rhs)
  408. {
  409. R128 r(lhs);
  410. return r %= rhs;
  411. }
  412. static inline R128 operator<<(const R128 &lhs, int amount)
  413. {
  414. R128 r(lhs);
  415. return r <<= amount;
  416. }
  417. static inline R128 operator>>(const R128 &lhs, int amount)
  418. {
  419. R128 r(lhs);
  420. return r >>= amount;
  421. }
  422. static inline bool operator<(const R128 &lhs, const R128 &rhs)
  423. {
  424. return r128Cmp(&lhs, &rhs) < 0;
  425. }
  426. static inline bool operator>(const R128 &lhs, const R128 &rhs)
  427. {
  428. return r128Cmp(&lhs, &rhs) > 0;
  429. }
  430. static inline bool operator<=(const R128 &lhs, const R128 &rhs)
  431. {
  432. return r128Cmp(&lhs, &rhs) <= 0;
  433. }
  434. static inline bool operator>=(const R128 &lhs, const R128 &rhs)
  435. {
  436. return r128Cmp(&lhs, &rhs) >= 0;
  437. }
  438. static inline bool operator==(const R128 &lhs, const R128 &rhs)
  439. {
  440. return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
  441. }
  442. static inline bool operator!=(const R128 &lhs, const R128 &rhs)
  443. {
  444. return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
  445. }
  446. #endif //__cplusplus
  447. #endif //H_R128_H
  448. #ifdef R128_IMPLEMENTATION
  449. #ifdef R128_DEBUG_VIS
  450. # define R128_DEBUG_SET(x) r128ToString(R128_last, sizeof(R128_last), x)
  451. #else
  452. # define R128_DEBUG_SET(x)
  453. #endif
  454. #define R128_SET2(x, l, h) do { (x)->lo = (R128_U64)(l); (x)->hi = (R128_U64)(h); } while(0)
  455. #define R128_R0(x) ((R128_U32)(x)->lo)
  456. #define R128_R2(x) ((R128_U32)(x)->hi)
  457. #if defined(_M_IX86)
  458. // workaround: MSVC x86's handling of 64-bit values is not great
  459. # define R128_SET4(x, r0, r1, r2, r3) do { \
  460. ((R128_U32*)&(x)->lo)[0] = (R128_U32)(r0); \
  461. ((R128_U32*)&(x)->lo)[1] = (R128_U32)(r1); \
  462. ((R128_U32*)&(x)->hi)[0] = (R128_U32)(r2); \
  463. ((R128_U32*)&(x)->hi)[1] = (R128_U32)(r3); \
  464. } while(0)
  465. # define R128_R1(x) (((R128_U32*)&(x)->lo)[1])
  466. # define R128_R3(x) (((R128_U32*)&(x)->hi)[1])
  467. #else
  468. # define R128_SET4(x, r0, r1, r2, r3) do { (x)->lo = (R128_U64)(r0) | ((R128_U64)(r1) << 32); \
  469. (x)->hi = (R128_U64)(r2) | ((R128_U64)(r3) << 32); } while(0)
  470. # define R128_R1(x) ((R128_U32)((x)->lo >> 32))
  471. # define R128_R3(x) ((R128_U32)((x)->hi >> 32))
  472. #endif
  473. #if defined(_M_X64)
  474. # define R128_INTEL 1
  475. # define R128_64BIT 1
  476. # ifndef R128_STDC_ONLY
  477. # include <intrin.h>
  478. # endif
  479. #elif defined(__x86_64__)
  480. # define R128_INTEL 1
  481. # define R128_64BIT 1
  482. # ifndef R128_STDC_ONLY
  483. # include <x86intrin.h>
  484. # endif
  485. #elif defined(_M_IX86)
  486. # define R128_INTEL 1
  487. # ifndef R128_STDC_ONLY
  488. # include <intrin.h>
  489. # endif
  490. #elif defined(__i386__)
  491. # define R128_INTEL 1
  492. # ifndef R128_STDC_ONLY
  493. # include <x86intrin.h>
  494. # endif
  495. #elif defined(_M_ARM)
  496. # ifndef R128_STDC_ONLY
  497. # include <intrin.h>
  498. # endif
  499. #elif defined(_M_ARM64)
  500. # define R128_64BIT 1
  501. # ifndef R128_STDC_ONLY
  502. # include <intrin.h>
  503. # endif
  504. #elif defined(__aarch64__)
  505. # define R128_64BIT 1
  506. #endif
  507. #ifndef R128_INTEL
  508. # define R128_INTEL 0
  509. #endif
  510. #ifndef R128_64BIT
  511. # define R128_64BIT 0
  512. #endif
  513. #ifndef R128_ASSERT
  514. # include <assert.h>
  515. # define R128_ASSERT(x) assert(x)
  516. #endif
  517. #include <stdlib.h> // for NULL
  518. static const R128ToStringFormat R128__defaultFormat = {
  519. R128ToStringSign_Default,
  520. 0,
  521. -1,
  522. 0,
  523. 0,
  524. 0
  525. };
  526. const R128 R128_min = { 0, R128_LIT_U64(0x8000000000000000) };
  527. const R128 R128_max = { R128_LIT_U64(0xffffffffffffffff), R128_LIT_U64(0x7fffffffffffffff) };
  528. const R128 R128_smallest = { 1, 0 };
  529. const R128 R128_zero = { 0, 0 };
  530. const R128 R128_one = { 0, 1 };
  531. char R128_decimal = '.';
  532. #ifdef R128_DEBUG_VIS
  533. char R128_last[42];
  534. #endif
  535. static int r128__clz64(R128_U64 x)
  536. {
  537. #if defined(R128_STDC_ONLY)
  538. R128_U64 n = 64, y;
  539. y = x >> 32; if (y) { n -= 32; x = y; }
  540. y = x >> 16; if (y) { n -= 16; x = y; }
  541. y = x >> 8; if (y) { n -= 8; x = y; }
  542. y = x >> 4; if (y) { n -= 4; x = y; }
  543. y = x >> 2; if (y) { n -= 2; x = y; }
  544. y = x >> 1; if (y) { n -= 1; x = y; }
  545. return (int)(n - x);
  546. #elif defined(_M_X64) || defined(_M_ARM64)
  547. unsigned long idx;
  548. if (_BitScanReverse64(&idx, x)) {
  549. return 63 - (int)idx;
  550. } else {
  551. return 64;
  552. }
  553. #elif defined(_MSC_VER)
  554. unsigned long idx;
  555. if (_BitScanReverse(&idx, (R128_U32)(x >> 32))) {
  556. return 31 - (int)idx;
  557. } else if (_BitScanReverse(&idx, (R128_U32)x)) {
  558. return 63 - (int)idx;
  559. } else {
  560. return 64;
  561. }
  562. #else
  563. return x ? __builtin_clzll(x) : 64;
  564. #endif
  565. }
  566. #if !R128_64BIT
  567. // 32*32->64
  568. static R128_U64 r128__umul64(R128_U32 a, R128_U32 b)
  569. {
  570. # if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  571. return __emulu(a, b);
  572. # elif defined(_M_ARM) && !defined(R128_STDC_ONLY)
  573. return _arm_umull(a, b);
  574. # else
  575. return a * (R128_U64)b;
  576. # endif
  577. }
  578. // 64/32->32
  579. static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem)
  580. {
  581. # if defined(_M_IX86) && (_MSC_VER >= 1920) && !defined(R128_STDC_ONLY)
  582. unsigned __int64 n = ((unsigned __int64)nhi << 32) | nlo;
  583. return _udiv64(n, d, rem);
  584. # elif defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  585. __asm {
  586. mov eax, nlo
  587. mov edx, nhi
  588. div d
  589. mov ecx, rem
  590. mov dword ptr [ecx], edx
  591. }
  592. # elif defined(__i386__) && !defined(R128_STDC_ONLY)
  593. R128_U32 q, r;
  594. __asm("divl %4"
  595. : "=a"(q), "=d"(r)
  596. : "a"(nlo), "d"(nhi), "X"(d));
  597. *rem = r;
  598. return q;
  599. # else
  600. R128_U64 n64 = ((R128_U64)nhi << 32) | nlo;
  601. *rem = (R128_U32)(n64 % d);
  602. return (R128_U32)(n64 / d);
  603. # endif
  604. }
  605. #elif defined(R128_STDC_ONLY) || !R128_INTEL
  606. #define r128__umul64(a, b) ((a) * (R128_U64)(b))
  607. static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem)
  608. {
  609. R128_U64 n64 = ((R128_U64)nhi << 32) | nlo;
  610. *rem = (R128_U32)(n64 % d);
  611. return (R128_U32)(n64 / d);
  612. }
  613. #endif //!R128_64BIT
  614. static void r128__neg(R128 *dst, const R128 *src)
  615. {
  616. R128_ASSERT(dst != NULL);
  617. R128_ASSERT(src != NULL);
  618. #if R128_INTEL && !defined(R128_STDC_ONLY)
  619. {
  620. unsigned char carry = 0;
  621. # if R128_64BIT
  622. carry = _addcarry_u64(carry, ~src->lo, 1, &dst->lo);
  623. carry = _addcarry_u64(carry, ~src->hi, 0, &dst->hi);
  624. # else
  625. R128_U32 r0, r1, r2, r3;
  626. carry = _addcarry_u32(carry, ~R128_R0(src), 1, &r0);
  627. carry = _addcarry_u32(carry, ~R128_R1(src), 0, &r1);
  628. carry = _addcarry_u32(carry, ~R128_R2(src), 0, &r2);
  629. carry = _addcarry_u32(carry, ~R128_R3(src), 0, &r3);
  630. R128_SET4(dst, r0, r1, r2, r3);
  631. # endif //R128_64BIT
  632. }
  633. #else
  634. if (src->lo) {
  635. dst->lo = ~src->lo + 1;
  636. dst->hi = ~src->hi;
  637. } else {
  638. dst->lo = 0;
  639. dst->hi = ~src->hi + 1;
  640. }
  641. #endif //R128_INTEL
  642. }
  643. // 64*64->128
  644. static void r128__umul128(R128 *dst, R128_U64 a, R128_U64 b)
  645. {
  646. #if defined(_M_X64) && !defined(R128_STDC_ONLY)
  647. dst->lo = _umul128(a, b, &dst->hi);
  648. #elif R128_64BIT && !defined(_MSC_VER) && !defined(R128_STDC_ONLY)
  649. unsigned __int128 p0 = a * (unsigned __int128)b;
  650. dst->hi = (R128_U64)(p0 >> 64);
  651. dst->lo = (R128_U64)p0;
  652. #else
  653. R128_U32 alo = (R128_U32)a;
  654. R128_U32 ahi = (R128_U32)(a >> 32);
  655. R128_U32 blo = (R128_U32)b;
  656. R128_U32 bhi = (R128_U32)(b >> 32);
  657. R128_U64 p0, p1, p2, p3;
  658. p0 = r128__umul64(alo, blo);
  659. p1 = r128__umul64(alo, bhi);
  660. p2 = r128__umul64(ahi, blo);
  661. p3 = r128__umul64(ahi, bhi);
  662. {
  663. #if R128_INTEL && !defined(R128_STDC_ONLY)
  664. R128_U32 r0, r1, r2, r3;
  665. unsigned char carry;
  666. r0 = (R128_U32)(p0);
  667. r1 = (R128_U32)(p0 >> 32);
  668. r2 = (R128_U32)(p1 >> 32);
  669. r3 = (R128_U32)(p3 >> 32);
  670. carry = _addcarry_u32(0, r1, (R128_U32)p1, &r1);
  671. carry = _addcarry_u32(carry, r2, (R128_U32)(p2 >> 32), &r2);
  672. _addcarry_u32(carry, r3, 0, &r3);
  673. carry = _addcarry_u32(0, r1, (R128_U32)p2, &r1);
  674. carry = _addcarry_u32(carry, r2, (R128_U32)p3, &r2);
  675. _addcarry_u32(carry, r3, 0, &r3);
  676. R128_SET4(dst, r0, r1, r2, r3);
  677. #else
  678. R128_U64 carry, lo, hi;
  679. carry = ((R128_U64)(R128_U32)p1 + (R128_U64)(R128_U32)p2 + (p0 >> 32)) >> 32;
  680. lo = p0 + ((p1 + p2) << 32);
  681. hi = p3 + ((R128_U32)(p1 >> 32) + (R128_U32)(p2 >> 32)) + carry;
  682. R128_SET2(dst, lo, hi);
  683. #endif
  684. }
  685. #endif
  686. }
  687. // 128/64->64
  688. #if defined(_M_X64) && (_MSC_VER < 1920) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  689. // MSVC x64 provides neither inline assembly nor (pre-2019) a div intrinsic, so we do fake
  690. // "inline assembly" to avoid long division or outline assembly.
  691. #pragma code_seg(".text")
  692. __declspec(allocate(".text") align(16)) static const unsigned char r128__udiv128Code[] = {
  693. 0x48, 0x8B, 0xC1, //mov rax, rcx
  694. 0x49, 0xF7, 0xF0, //div rax, r8
  695. 0x49, 0x89, 0x11, //mov qword ptr [r9], rdx
  696. 0xC3 //ret
  697. };
  698. typedef R128_U64 (*r128__udiv128Proc)(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem);
  699. static const r128__udiv128Proc r128__udiv128 = (r128__udiv128Proc)(void*)r128__udiv128Code;
  700. #else
  701. static R128_U64 r128__udiv128(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem)
  702. {
  703. #if defined(_M_X64) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  704. return _udiv128(nhi, nlo, d, rem);
  705. #elif defined(__x86_64__) && !defined(R128_STDC_ONLY)
  706. R128_U64 q, r;
  707. __asm("divq %4"
  708. : "=a"(q), "=d"(r)
  709. : "a"(nlo), "d"(nhi), "X"(d));
  710. *rem = r;
  711. return q;
  712. #else
  713. R128_U64 tmp;
  714. R128_U32 d0, d1;
  715. R128_U32 n3, n2, n1, n0;
  716. R128_U32 q0, q1;
  717. R128_U32 r;
  718. int shift;
  719. R128_ASSERT(d != 0); //division by zero
  720. R128_ASSERT(nhi < d); //overflow
  721. // normalize
  722. shift = r128__clz64(d);
  723. if (shift) {
  724. R128 tmp128;
  725. R128_SET2(&tmp128, nlo, nhi);
  726. r128Shl(&tmp128, &tmp128, shift);
  727. n3 = R128_R3(&tmp128);
  728. n2 = R128_R2(&tmp128);
  729. n1 = R128_R1(&tmp128);
  730. n0 = R128_R0(&tmp128);
  731. d <<= shift;
  732. } else {
  733. n3 = (R128_U32)(nhi >> 32);
  734. n2 = (R128_U32)nhi;
  735. n1 = (R128_U32)(nlo >> 32);
  736. n0 = (R128_U32)nlo;
  737. }
  738. d1 = (R128_U32)(d >> 32);
  739. d0 = (R128_U32)d;
  740. // first digit
  741. R128_ASSERT(n3 <= d1);
  742. if (n3 < d1) {
  743. q1 = r128__udiv64(n2, n3, d1, &r);
  744. } else {
  745. q1 = 0xffffffffu;
  746. r = n2 + d1;
  747. }
  748. refine1:
  749. if (r128__umul64(q1, d0) > ((R128_U64)r << 32) + n1) {
  750. --q1;
  751. if (r < ~d1 + 1) {
  752. r += d1;
  753. goto refine1;
  754. }
  755. }
  756. tmp = ((R128_U64)n2 << 32) + n1 - (r128__umul64(q1, d0) + (r128__umul64(q1, d1) << 32));
  757. n2 = (R128_U32)(tmp >> 32);
  758. n1 = (R128_U32)tmp;
  759. // second digit
  760. R128_ASSERT(n2 <= d1);
  761. if (n2 < d1) {
  762. q0 = r128__udiv64(n1, n2, d1, &r);
  763. } else {
  764. q0 = 0xffffffffu;
  765. r = n1 + d1;
  766. }
  767. refine0:
  768. if (r128__umul64(q0, d0) > ((R128_U64)r << 32) + n0) {
  769. --q0;
  770. if (r < ~d1 + 1) {
  771. r += d1;
  772. goto refine0;
  773. }
  774. }
  775. tmp = ((R128_U64)n1 << 32) + n0 - (r128__umul64(q0, d0) + (r128__umul64(q0, d1) << 32));
  776. n1 = (R128_U32)(tmp >> 32);
  777. n0 = (R128_U32)tmp;
  778. *rem = (((R128_U64)n1 << 32) + n0) >> shift;
  779. return ((R128_U64)q1 << 32) + q0;
  780. #endif
  781. }
  782. #endif
  783. static int r128__ucmp(const R128 *a, const R128 *b)
  784. {
  785. if (a->hi != b->hi) {
  786. if (a->hi > b->hi) {
  787. return 1;
  788. } else {
  789. return -1;
  790. }
  791. } else {
  792. if (a->lo == b->lo) {
  793. return 0;
  794. } else if (a->lo > b->lo) {
  795. return 1;
  796. } else {
  797. return -1;
  798. }
  799. }
  800. }
  801. static void r128__umul(R128 *dst, const R128 *a, const R128 *b)
  802. {
  803. #if defined(_M_X64) && !defined(R128_STDC_ONLY)
  804. R128_U64 t0, t1;
  805. R128_U64 lo, hi = 0;
  806. unsigned char carry;
  807. t0 = _umul128(a->lo, b->lo, &t1);
  808. carry = _addcarry_u64(0, t1, t0 >> 63, &lo);
  809. _addcarry_u64(carry, hi, hi, &hi);
  810. t0 = _umul128(a->lo, b->hi, &t1);
  811. carry = _addcarry_u64(0, lo, t0, &lo);
  812. _addcarry_u64(carry, hi, t1, &hi);
  813. t0 = _umul128(a->hi, b->lo, &t1);
  814. carry = _addcarry_u64(0, lo, t0, &lo);
  815. _addcarry_u64(carry, hi, t1, &hi);
  816. t0 = _umul128(a->hi, b->hi, &t1);
  817. hi += t0;
  818. R128_SET2(dst, lo, hi);
  819. #elif defined(__x86_64__) && !defined(R128_STDC_ONLY)
  820. unsigned __int128 p0, p1, p2, p3;
  821. p0 = a->lo * (unsigned __int128)b->lo;
  822. p1 = a->lo * (unsigned __int128)b->hi;
  823. p2 = a->hi * (unsigned __int128)b->lo;
  824. p3 = a->hi * (unsigned __int128)b->hi;
  825. p0 = (p3 << 64) + p2 + p1 + (p0 >> 64) + ((R128_U64)p0 >> 63);
  826. dst->lo = (R128_U64)p0;
  827. dst->hi = (R128_U64)(p0 >> 64);
  828. #else
  829. R128 p0, p1, p2, p3, round;
  830. r128__umul128(&p0, a->lo, b->lo);
  831. round.hi = 0; round.lo = p0.lo >> 63;
  832. p0.lo = p0.hi; p0.hi = 0; //r128Shr(&p0, &p0, 64);
  833. r128Add(&p0, &p0, &round);
  834. r128__umul128(&p1, a->hi, b->lo);
  835. r128Add(&p0, &p0, &p1);
  836. r128__umul128(&p2, a->lo, b->hi);
  837. r128Add(&p0, &p0, &p2);
  838. r128__umul128(&p3, a->hi, b->hi);
  839. p3.hi = p3.lo; p3.lo = 0; //r128Shl(&p3, &p3, 64);
  840. r128Add(&p0, &p0, &p3);
  841. R128_SET2(dst, p0.lo, p0.hi);
  842. #endif
  843. }
  844. // Shift d left until the high bit is set, and shift n left by the same amount.
  845. // returns non-zero on overflow.
  846. static int r128__norm(R128 *n, R128 *d, R128_U64 *n2)
  847. {
  848. R128_U64 d0, d1;
  849. R128_U64 n0, n1;
  850. int shift;
  851. d1 = d->hi;
  852. d0 = d->lo;
  853. n1 = n->hi;
  854. n0 = n->lo;
  855. if (d1) {
  856. shift = r128__clz64(d1);
  857. if (shift) {
  858. d1 = (d1 << shift) | (d0 >> (64 - shift));
  859. d0 = d0 << shift;
  860. *n2 = n1 >> (64 - shift);
  861. n1 = (n1 << shift) | (n0 >> (64 - shift));
  862. n0 = n0 << shift;
  863. } else {
  864. *n2 = 0;
  865. }
  866. } else {
  867. shift = r128__clz64(d0);
  868. if (r128__clz64(n1) <= shift) {
  869. return 1; // overflow
  870. }
  871. if (shift) {
  872. d1 = d0 << shift;
  873. d0 = 0;
  874. *n2 = (n1 << shift) | (n0 >> (64 - shift));
  875. n1 = n0 << shift;
  876. n0 = 0;
  877. } else {
  878. d1 = d0;
  879. d0 = 0;
  880. *n2 = n1;
  881. n1 = n0;
  882. n0 = 0;
  883. }
  884. }
  885. R128_SET2(n, n0, n1);
  886. R128_SET2(d, d0, d1);
  887. return 0;
  888. }
  889. static void r128__udiv(R128 *quotient, const R128 *dividend, const R128 *divisor)
  890. {
  891. R128 tmp;
  892. R128_U64 d0, d1;
  893. R128_U64 n1, n2, n3;
  894. R128 q;
  895. R128_ASSERT(dividend != NULL);
  896. R128_ASSERT(divisor != NULL);
  897. R128_ASSERT(quotient != NULL);
  898. R128_ASSERT(divisor->hi != 0 || divisor->lo != 0); // divide by zero
  899. // scale dividend and normalize
  900. {
  901. R128 n, d;
  902. R128_SET2(&n, dividend->lo, dividend->hi);
  903. R128_SET2(&d, divisor->lo, divisor->hi);
  904. if (r128__norm(&n, &d, &n3)) {
  905. R128_SET2(quotient, R128_max.lo, R128_max.hi);
  906. return;
  907. }
  908. d1 = d.hi;
  909. d0 = d.lo;
  910. n2 = n.hi;
  911. n1 = n.lo;
  912. }
  913. // first digit
  914. R128_ASSERT(n3 <= d1);
  915. {
  916. R128 t0, t1;
  917. t0.lo = n1;
  918. if (n3 < d1) {
  919. q.hi = r128__udiv128(n2, n3, d1, &t0.hi);
  920. } else {
  921. q.hi = R128_LIT_U64(0xffffffffffffffff);
  922. t0.hi = n2 + d1;
  923. }
  924. refine1:
  925. r128__umul128(&t1, q.hi, d0);
  926. if (r128__ucmp(&t1, &t0) > 0) {
  927. --q.hi;
  928. if (t0.hi < ~d1 + 1) {
  929. t0.hi += d1;
  930. goto refine1;
  931. }
  932. }
  933. }
  934. {
  935. R128 t0, t1, t2;
  936. t0.hi = n2;
  937. t0.lo = n1;
  938. r128__umul128(&t1, q.hi, d0);
  939. r128__umul128(&t2, q.hi, d1);
  940. t2.hi = t2.lo; t2.lo = 0; //r128Shl(&t2, &t2, 64);
  941. r128Add(&tmp, &t1, &t2);
  942. r128Sub(&tmp, &t0, &tmp);
  943. }
  944. n2 = tmp.hi;
  945. n1 = tmp.lo;
  946. // second digit
  947. R128_ASSERT(n2 <= d1);
  948. {
  949. R128 t0, t1;
  950. t0.lo = 0;
  951. if (n2 < d1) {
  952. q.lo = r128__udiv128(n1, n2, d1, &t0.hi);
  953. } else {
  954. q.lo = R128_LIT_U64(0xffffffffffffffff);
  955. t0.hi = n1 + d1;
  956. }
  957. refine0:
  958. r128__umul128(&t1, q.lo, d0);
  959. if (r128__ucmp(&t1, &t0) > 0) {
  960. --q.lo;
  961. if (t0.hi < ~d1 + 1) {
  962. t0.hi += d1;
  963. goto refine0;
  964. }
  965. }
  966. }
  967. R128_SET2(quotient, q.lo, q.hi);
  968. }
  969. static R128_U64 r128__umod(R128 *n, R128 *d)
  970. {
  971. R128_U64 d0, d1;
  972. R128_U64 n3, n2, n1;
  973. R128_U64 q;
  974. R128_ASSERT(d != NULL);
  975. R128_ASSERT(n != NULL);
  976. R128_ASSERT(d->hi != 0 || d->lo != 0); // divide by zero
  977. if (r128__norm(n, d, &n3)) {
  978. return R128_LIT_U64(0xffffffffffffffff);
  979. }
  980. d1 = d->hi;
  981. d0 = d->lo;
  982. n2 = n->hi;
  983. n1 = n->lo;
  984. R128_ASSERT(n3 < d1);
  985. {
  986. R128 t0, t1;
  987. t0.lo = n1;
  988. q = r128__udiv128(n2, n3, d1, &t0.hi);
  989. refine1:
  990. r128__umul128(&t1, q, d0);
  991. if (r128__ucmp(&t1, &t0) > 0) {
  992. --q;
  993. if (t0.hi < ~d1 + 1) {
  994. t0.hi += d1;
  995. goto refine1;
  996. }
  997. }
  998. }
  999. return q;
  1000. }
  1001. static int r128__format(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *format)
  1002. {
  1003. char buf[128];
  1004. R128 tmp;
  1005. R128_U64 whole;
  1006. char *cursor, *decimal, *dstp = dst;
  1007. int sign = 0;
  1008. int fullPrecision = 1;
  1009. int width, precision;
  1010. int padCnt, trail = 0;
  1011. R128_ASSERT(dst != NULL && dstSize > 0);
  1012. R128_ASSERT(v != NULL);
  1013. R128_ASSERT(format != NULL);
  1014. --dstSize;
  1015. R128_SET2(&tmp, v->lo, v->hi);
  1016. if (r128IsNeg(&tmp)) {
  1017. r128__neg(&tmp, &tmp);
  1018. sign = 1;
  1019. }
  1020. width = format->width;
  1021. if (width < 0) {
  1022. width = 0;
  1023. }
  1024. precision = format->precision;
  1025. if (precision < 0) {
  1026. // print a maximum of 20 digits
  1027. fullPrecision = 0;
  1028. precision = 20;
  1029. } else if (precision > sizeof(buf) - 21) {
  1030. trail = precision - (sizeof(buf) - 21);
  1031. precision -= trail;
  1032. }
  1033. whole = tmp.hi;
  1034. decimal = cursor = buf;
  1035. // fractional part first in case a carry into the whole part is required
  1036. if (tmp.lo || format->decimal) {
  1037. while (tmp.lo || (fullPrecision && precision)) {
  1038. if ((int)(cursor - buf) == precision) {
  1039. if ((R128_S64)tmp.lo < 0) {
  1040. // round up, propagate carry backwards
  1041. char *c;
  1042. for (c = cursor - 1; c >= buf; --c) {
  1043. char d = ++*c;
  1044. if (d <= '9') {
  1045. goto endfrac;
  1046. } else {
  1047. *c = '0';
  1048. }
  1049. }
  1050. // carry out into the whole part
  1051. whole++;
  1052. }
  1053. break;
  1054. }
  1055. r128__umul128(&tmp, tmp.lo, 10);
  1056. *cursor++ = (char)tmp.hi + '0';
  1057. }
  1058. endfrac:
  1059. if (format->decimal || precision) {
  1060. decimal = cursor;
  1061. *cursor++ = R128_decimal;
  1062. }
  1063. }
  1064. // whole part
  1065. do {
  1066. char digit = (char)(whole % 10);
  1067. whole /= 10;
  1068. *cursor++ = digit + '0';
  1069. } while (whole);
  1070. #define R128__WRITE(c) do { if (dstp < dst + dstSize) *dstp = c; ++dstp; } while(0)
  1071. padCnt = width - (int)(cursor - buf) - 1;
  1072. // left padding
  1073. if (!format->leftAlign) {
  1074. char padChar = format->zeroPad ? '0' : ' ';
  1075. if (format->zeroPad) {
  1076. if (sign) {
  1077. R128__WRITE('-');
  1078. } else if (format->sign == R128ToStringSign_Plus) {
  1079. R128__WRITE('+');
  1080. } else if (format->sign == R128ToStringSign_Space) {
  1081. R128__WRITE(' ');
  1082. } else {
  1083. ++padCnt;
  1084. }
  1085. }
  1086. for (; padCnt > 0; --padCnt) {
  1087. R128__WRITE(padChar);
  1088. }
  1089. }
  1090. if (format->leftAlign || !format->zeroPad) {
  1091. if (sign) {
  1092. R128__WRITE('-');
  1093. } else if (format->sign == R128ToStringSign_Plus) {
  1094. R128__WRITE('+');
  1095. } else if (format->sign == R128ToStringSign_Space) {
  1096. R128__WRITE(' ');
  1097. } else {
  1098. ++padCnt;
  1099. }
  1100. }
  1101. {
  1102. char *i;
  1103. // reverse the whole part
  1104. for (i = cursor - 1; i >= decimal; --i) {
  1105. R128__WRITE(*i);
  1106. }
  1107. // copy the fractional part
  1108. for (i = buf; i < decimal; ++i) {
  1109. R128__WRITE(*i);
  1110. }
  1111. }
  1112. // right padding
  1113. if (format->leftAlign) {
  1114. char padChar = format->zeroPad ? '0' : ' ';
  1115. for (; padCnt > 0; --padCnt) {
  1116. R128__WRITE(padChar);
  1117. }
  1118. }
  1119. // trailing zeroes for very large precision
  1120. while (trail--) {
  1121. R128__WRITE('0');
  1122. }
  1123. #undef R128__WRITE
  1124. if (dstp <= dst + dstSize) {
  1125. *dstp = '\0';
  1126. } else {
  1127. dst[dstSize] = '\0';
  1128. }
  1129. return (int)(dstp - dst);
  1130. }
  1131. void r128FromInt(R128 *dst, R128_S64 v)
  1132. {
  1133. R128_ASSERT(dst != NULL);
  1134. dst->lo = 0;
  1135. dst->hi = (R128_U64)v;
  1136. R128_DEBUG_SET(dst);
  1137. }
  1138. void r128FromFloat(R128 *dst, double v)
  1139. {
  1140. R128_ASSERT(dst != NULL);
  1141. if (v < -9223372036854775808.0) {
  1142. r128Copy(dst, &R128_min);
  1143. } else if (v >= 9223372036854775808.0) {
  1144. r128Copy(dst, &R128_max);
  1145. } else {
  1146. R128 r;
  1147. int sign = 0;
  1148. if (v < 0) {
  1149. v = -v;
  1150. sign = 1;
  1151. }
  1152. r.hi = (R128_U64)(R128_S64)v;
  1153. v -= (R128_S64)v;
  1154. r.lo = (R128_U64)(v * 18446744073709551616.0);
  1155. if (sign) {
  1156. r128__neg(&r, &r);
  1157. }
  1158. r128Copy(dst, &r);
  1159. }
  1160. }
  1161. void r128FromString(R128 *dst, const char *s, char **endptr)
  1162. {
  1163. R128_U64 lo = 0, hi = 0;
  1164. R128_U64 base = 10;
  1165. int sign = 0;
  1166. R128_ASSERT(dst != NULL);
  1167. R128_ASSERT(s != NULL);
  1168. R128_SET2(dst, 0, 0);
  1169. // consume whitespace
  1170. for (;;) {
  1171. if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' || *s == '\v') {
  1172. ++s;
  1173. } else {
  1174. break;
  1175. }
  1176. }
  1177. // sign
  1178. if (*s == '-') {
  1179. sign = 1;
  1180. ++s;
  1181. } else if (*s == '+') {
  1182. ++s;
  1183. }
  1184. // parse base prefix
  1185. if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
  1186. base = 16;
  1187. s += 2;
  1188. }
  1189. // whole part
  1190. for (;; ++s) {
  1191. R128_U64 digit;
  1192. if ('0' <= *s && *s <= '9') {
  1193. digit = *s - '0';
  1194. } else if (base == 16 && 'a' <= *s && *s <= 'f') {
  1195. digit = *s - 'a' + 10;
  1196. } else if (base == 16 && 'A' <= *s && *s <= 'F') {
  1197. digit = *s - 'A' + 10;
  1198. } else {
  1199. break;
  1200. }
  1201. hi = hi * base + digit;
  1202. }
  1203. // fractional part
  1204. if (*s == R128_decimal) {
  1205. const char *exp = ++s;
  1206. // find the last digit and work backwards
  1207. for (;; ++s) {
  1208. if ('0' <= *s && *s <= '9') {
  1209. } else if (base == 16 && ('a' <= *s && *s <= 'f')) {
  1210. } else if (base == 16 && ('A' <= *s && *s <= 'F')) {
  1211. } else {
  1212. break;
  1213. }
  1214. }
  1215. for (--s; s >= exp; --s) {
  1216. R128_U64 digit, unused;
  1217. if ('0' <= *s && *s <= '9') {
  1218. digit = *s - '0';
  1219. } else if ('a' <= *s && *s <= 'f') {
  1220. digit = *s - 'a' + 10;
  1221. } else {
  1222. digit = *s - 'A' + 10;
  1223. }
  1224. lo = r128__udiv128(lo, digit, base, &unused);
  1225. }
  1226. }
  1227. R128_SET2(dst, lo, hi);
  1228. if (sign) {
  1229. r128__neg(dst, dst);
  1230. }
  1231. if (endptr) {
  1232. *endptr = (char *) s;
  1233. }
  1234. }
  1235. R128_S64 r128ToInt(const R128 *v)
  1236. {
  1237. R128_ASSERT(v != NULL);
  1238. return (R128_S64)v->hi;
  1239. }
  1240. double r128ToFloat(const R128 *v)
  1241. {
  1242. R128 tmp;
  1243. int sign = 0;
  1244. double d;
  1245. R128_ASSERT(v != NULL);
  1246. R128_SET2(&tmp, v->lo, v->hi);
  1247. if (r128IsNeg(&tmp)) {
  1248. r128__neg(&tmp, &tmp);
  1249. sign = 1;
  1250. }
  1251. d = tmp.hi + tmp.lo * (1 / 18446744073709551616.0);
  1252. if (sign) {
  1253. d = -d;
  1254. }
  1255. return d;
  1256. }
  1257. int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt)
  1258. {
  1259. return r128__format(dst, dstSize, v, opt);
  1260. }
  1261. int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v)
  1262. {
  1263. R128ToStringFormat opts;
  1264. R128_ASSERT(dst != NULL && dstSize);
  1265. R128_ASSERT(format != NULL);
  1266. R128_ASSERT(v != NULL);
  1267. opts.sign = R128__defaultFormat.sign;
  1268. opts.precision = R128__defaultFormat.precision;
  1269. opts.zeroPad = R128__defaultFormat.zeroPad;
  1270. opts.decimal = R128__defaultFormat.decimal;
  1271. opts.leftAlign = R128__defaultFormat.leftAlign;
  1272. if (*format == '%') {
  1273. ++format;
  1274. }
  1275. // flags field
  1276. for (;; ++format) {
  1277. if (*format == ' ' && opts.sign != R128ToStringSign_Plus) {
  1278. opts.sign = R128ToStringSign_Space;
  1279. } else if (*format == '+') {
  1280. opts.sign = R128ToStringSign_Plus;
  1281. } else if (*format == '0') {
  1282. opts.zeroPad = 1;
  1283. } else if (*format == '-') {
  1284. opts.leftAlign = 1;
  1285. } else if (*format == '#') {
  1286. opts.decimal = 1;
  1287. } else {
  1288. break;
  1289. }
  1290. }
  1291. // width field
  1292. opts.width = 0;
  1293. for (;;) {
  1294. if ('0' <= *format && *format <= '9') {
  1295. opts.width = opts.width * 10 + *format++ - '0';
  1296. } else {
  1297. break;
  1298. }
  1299. }
  1300. // precision field
  1301. if (*format == '.') {
  1302. opts.precision = 0;
  1303. ++format;
  1304. for (;;) {
  1305. if ('0' <= *format && *format <= '9') {
  1306. opts.precision = opts.precision * 10 + *format++ - '0';
  1307. } else {
  1308. break;
  1309. }
  1310. }
  1311. }
  1312. return r128__format(dst, dstSize, v, &opts);
  1313. }
  1314. int r128ToString(char *dst, size_t dstSize, const R128 *v)
  1315. {
  1316. return r128__format(dst, dstSize, v, &R128__defaultFormat);
  1317. }
  1318. void r128Copy(R128 *dst, const R128 *src)
  1319. {
  1320. R128_ASSERT(dst != NULL);
  1321. R128_ASSERT(src != NULL);
  1322. dst->lo = src->lo;
  1323. dst->hi = src->hi;
  1324. R128_DEBUG_SET(dst);
  1325. }
  1326. void r128Neg(R128 *dst, const R128 *src)
  1327. {
  1328. r128__neg(dst, src);
  1329. R128_DEBUG_SET(dst);
  1330. }
  1331. void r128Not(R128 *dst, const R128 *src)
  1332. {
  1333. R128_ASSERT(dst != NULL);
  1334. R128_ASSERT(src != NULL);
  1335. dst->lo = ~src->lo;
  1336. dst->hi = ~src->hi;
  1337. R128_DEBUG_SET(dst);
  1338. }
  1339. void r128Or(R128 *dst, const R128 *a, const R128 *b)
  1340. {
  1341. R128_ASSERT(dst != NULL);
  1342. R128_ASSERT(a != NULL);
  1343. R128_ASSERT(b != NULL);
  1344. dst->lo = a->lo | b->lo;
  1345. dst->hi = a->hi | b->hi;
  1346. R128_DEBUG_SET(dst);
  1347. }
  1348. void r128And(R128 *dst, const R128 *a, const R128 *b)
  1349. {
  1350. R128_ASSERT(dst != NULL);
  1351. R128_ASSERT(a != NULL);
  1352. R128_ASSERT(b != NULL);
  1353. dst->lo = a->lo & b->lo;
  1354. dst->hi = a->hi & b->hi;
  1355. R128_DEBUG_SET(dst);
  1356. }
  1357. void r128Xor(R128 *dst, const R128 *a, const R128 *b)
  1358. {
  1359. R128_ASSERT(dst != NULL);
  1360. R128_ASSERT(a != NULL);
  1361. R128_ASSERT(b != NULL);
  1362. dst->lo = a->lo ^ b->lo;
  1363. dst->hi = a->hi ^ b->hi;
  1364. R128_DEBUG_SET(dst);
  1365. }
  1366. void r128Shl(R128 *dst, const R128 *src, int amount)
  1367. {
  1368. R128_U64 r[4];
  1369. R128_ASSERT(dst != NULL);
  1370. R128_ASSERT(src != NULL);
  1371. #if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  1372. __asm {
  1373. // load src
  1374. mov edx, dword ptr[src]
  1375. mov ecx, amount
  1376. mov edi, dword ptr[edx]
  1377. mov esi, dword ptr[edx + 4]
  1378. mov ebx, dword ptr[edx + 8]
  1379. mov eax, dword ptr[edx + 12]
  1380. // shift mod 32
  1381. shld eax, ebx, cl
  1382. shld ebx, esi, cl
  1383. shld esi, edi, cl
  1384. shl edi, cl
  1385. // clear out low 12 bytes of stack
  1386. xor edx, edx
  1387. mov dword ptr[r], edx
  1388. mov dword ptr[r + 4], edx
  1389. mov dword ptr[r + 8], edx
  1390. // store shifted amount offset by count/32 bits
  1391. shr ecx, 5
  1392. and ecx, 3
  1393. mov dword ptr[r + ecx * 4 + 0], edi
  1394. mov dword ptr[r + ecx * 4 + 4], esi
  1395. mov dword ptr[r + ecx * 4 + 8], ebx
  1396. mov dword ptr[r + ecx * 4 + 12], eax
  1397. }
  1398. #else
  1399. r[0] = src->lo;
  1400. r[1] = src->hi;
  1401. amount &= 127;
  1402. if (amount >= 64) {
  1403. r[1] = r[0] << (amount - 64);
  1404. r[0] = 0;
  1405. } else if (amount) {
  1406. # ifdef _M_X64
  1407. r[1] = __shiftleft128(r[0], r[1], (char) amount);
  1408. # else
  1409. r[1] = (r[1] << amount) | (r[0] >> (64 - amount));
  1410. # endif
  1411. r[0] = r[0] << amount;
  1412. }
  1413. #endif //_M_IX86
  1414. dst->lo = r[0];
  1415. dst->hi = r[1];
  1416. R128_DEBUG_SET(dst);
  1417. }
  1418. void r128Shr(R128 *dst, const R128 *src, int amount)
  1419. {
  1420. R128_U64 r[4];
  1421. R128_ASSERT(dst != NULL);
  1422. R128_ASSERT(src != NULL);
  1423. #if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  1424. __asm {
  1425. // load src
  1426. mov edx, dword ptr[src]
  1427. mov ecx, amount
  1428. mov edi, dword ptr[edx]
  1429. mov esi, dword ptr[edx + 4]
  1430. mov ebx, dword ptr[edx + 8]
  1431. mov eax, dword ptr[edx + 12]
  1432. // shift mod 32
  1433. shrd edi, esi, cl
  1434. shrd esi, ebx, cl
  1435. shrd ebx, eax, cl
  1436. shr eax, cl
  1437. // clear out high 12 bytes of stack
  1438. xor edx, edx
  1439. mov dword ptr[r + 20], edx
  1440. mov dword ptr[r + 24], edx
  1441. mov dword ptr[r + 28], edx
  1442. // store shifted amount offset by -count/32 bits
  1443. shr ecx, 5
  1444. and ecx, 3
  1445. neg ecx
  1446. mov dword ptr[r + ecx * 4 + 16], edi
  1447. mov dword ptr[r + ecx * 4 + 20], esi
  1448. mov dword ptr[r + ecx * 4 + 24], ebx
  1449. mov dword ptr[r + ecx * 4 + 28], eax
  1450. }
  1451. #else
  1452. r[2] = src->lo;
  1453. r[3] = src->hi;
  1454. amount &= 127;
  1455. if (amount >= 64) {
  1456. r[2] = r[3] >> (amount - 64);
  1457. r[3] = 0;
  1458. } else if (amount) {
  1459. #ifdef _M_X64
  1460. r[2] = __shiftright128(r[2], r[3], (char) amount);
  1461. #else
  1462. r[2] = (r[2] >> amount) | (r[3] << (64 - amount));
  1463. #endif
  1464. r[3] = r[3] >> amount;
  1465. }
  1466. #endif
  1467. dst->lo = r[2];
  1468. dst->hi = r[3];
  1469. R128_DEBUG_SET(dst);
  1470. }
  1471. void r128Sar(R128 *dst, const R128 *src, int amount)
  1472. {
  1473. R128_U64 r[4];
  1474. R128_ASSERT(dst != NULL);
  1475. R128_ASSERT(src != NULL);
  1476. #if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  1477. __asm {
  1478. // load src
  1479. mov edx, dword ptr[src]
  1480. mov ecx, amount
  1481. mov edi, dword ptr[edx]
  1482. mov esi, dword ptr[edx + 4]
  1483. mov ebx, dword ptr[edx + 8]
  1484. mov eax, dword ptr[edx + 12]
  1485. // shift mod 32
  1486. shrd edi, esi, cl
  1487. shrd esi, ebx, cl
  1488. shrd ebx, eax, cl
  1489. sar eax, cl
  1490. // copy sign to high 12 bytes of stack
  1491. cdq
  1492. mov dword ptr[r + 20], edx
  1493. mov dword ptr[r + 24], edx
  1494. mov dword ptr[r + 28], edx
  1495. // store shifted amount offset by -count/32 bits
  1496. shr ecx, 5
  1497. and ecx, 3
  1498. neg ecx
  1499. mov dword ptr[r + ecx * 4 + 16], edi
  1500. mov dword ptr[r + ecx * 4 + 20], esi
  1501. mov dword ptr[r + ecx * 4 + 24], ebx
  1502. mov dword ptr[r + ecx * 4 + 28], eax
  1503. }
  1504. #else
  1505. r[2] = src->lo;
  1506. r[3] = src->hi;
  1507. amount &= 127;
  1508. if (amount >= 64) {
  1509. r[2] = (R128_U64)((R128_S64)r[3] >> (amount - 64));
  1510. r[3] = (R128_U64)((R128_S64)r[3] >> 63);
  1511. } else if (amount) {
  1512. r[2] = (r[2] >> amount) | (R128_U64)((R128_S64)r[3] << (64 - amount));
  1513. r[3] = (R128_U64)((R128_S64)r[3] >> amount);
  1514. }
  1515. #endif
  1516. dst->lo = r[2];
  1517. dst->hi = r[3];
  1518. R128_DEBUG_SET(dst);
  1519. }
  1520. void r128Add(R128 *dst, const R128 *a, const R128 *b)
  1521. {
  1522. unsigned char carry = 0;
  1523. R128_ASSERT(dst != NULL);
  1524. R128_ASSERT(a != NULL);
  1525. R128_ASSERT(b != NULL);
  1526. #if R128_INTEL && !defined(R128_STDC_ONLY)
  1527. # if R128_64BIT
  1528. carry = _addcarry_u64(carry, a->lo, b->lo, &dst->lo);
  1529. carry = _addcarry_u64(carry, a->hi, b->hi, &dst->hi);
  1530. # else
  1531. R128_U32 r0, r1, r2, r3;
  1532. carry = _addcarry_u32(carry, R128_R0(a), R128_R0(b), &r0);
  1533. carry = _addcarry_u32(carry, R128_R1(a), R128_R1(b), &r1);
  1534. carry = _addcarry_u32(carry, R128_R2(a), R128_R2(b), &r2);
  1535. carry = _addcarry_u32(carry, R128_R3(a), R128_R3(b), &r3);
  1536. R128_SET4(dst, r0, r1, r2, r3);
  1537. # endif //R128_64BIT
  1538. #else
  1539. {
  1540. R128_U64 r = a->lo + b->lo;
  1541. carry = r < a->lo;
  1542. dst->lo = r;
  1543. dst->hi = a->hi + b->hi + carry;
  1544. }
  1545. #endif //R128_INTEL
  1546. R128_DEBUG_SET(dst);
  1547. }
  1548. void r128Sub(R128 *dst, const R128 *a, const R128 *b)
  1549. {
  1550. unsigned char borrow = 0;
  1551. R128_ASSERT(dst != NULL);
  1552. R128_ASSERT(a != NULL);
  1553. R128_ASSERT(b != NULL);
  1554. #if R128_INTEL && !defined(R128_STDC_ONLY)
  1555. # if R128_64BIT
  1556. borrow = _subborrow_u64(borrow, a->lo, b->lo, &dst->lo);
  1557. borrow = _subborrow_u64(borrow, a->hi, b->hi, &dst->hi);
  1558. # else
  1559. R128_U32 r0, r1, r2, r3;
  1560. borrow = _subborrow_u32(borrow, R128_R0(a), R128_R0(b), &r0);
  1561. borrow = _subborrow_u32(borrow, R128_R1(a), R128_R1(b), &r1);
  1562. borrow = _subborrow_u32(borrow, R128_R2(a), R128_R2(b), &r2);
  1563. borrow = _subborrow_u32(borrow, R128_R3(a), R128_R3(b), &r3);
  1564. R128_SET4(dst, r0, r1, r2, r3);
  1565. # endif //R128_64BIT
  1566. #else
  1567. {
  1568. R128_U64 r = a->lo - b->lo;
  1569. borrow = r > a->lo;
  1570. dst->lo = r;
  1571. dst->hi = a->hi - b->hi - borrow;
  1572. }
  1573. #endif //R128_INTEL
  1574. R128_DEBUG_SET(dst);
  1575. }
  1576. void r128Mul(R128 *dst, const R128 *a, const R128 *b)
  1577. {
  1578. int sign = 0;
  1579. R128 ta, tb, tc;
  1580. R128_ASSERT(dst != NULL);
  1581. R128_ASSERT(a != NULL);
  1582. R128_ASSERT(b != NULL);
  1583. R128_SET2(&ta, a->lo, a->hi);
  1584. R128_SET2(&tb, b->lo, b->hi);
  1585. if (r128IsNeg(&ta)) {
  1586. r128__neg(&ta, &ta);
  1587. sign = !sign;
  1588. }
  1589. if (r128IsNeg(&tb)) {
  1590. r128__neg(&tb, &tb);
  1591. sign = !sign;
  1592. }
  1593. r128__umul(&tc, &ta, &tb);
  1594. if (sign) {
  1595. r128__neg(&tc, &tc);
  1596. }
  1597. r128Copy(dst, &tc);
  1598. }
  1599. void r128Div(R128 *dst, const R128 *a, const R128 *b)
  1600. {
  1601. int sign = 0;
  1602. R128 tn, td, tq;
  1603. R128_ASSERT(dst != NULL);
  1604. R128_ASSERT(a != NULL);
  1605. R128_ASSERT(b != NULL);
  1606. R128_SET2(&tn, a->lo, a->hi);
  1607. R128_SET2(&td, b->lo, b->hi);
  1608. if (r128IsNeg(&tn)) {
  1609. r128__neg(&tn, &tn);
  1610. sign = !sign;
  1611. }
  1612. if (td.lo == 0 && td.hi == 0) {
  1613. // divide by zero
  1614. if (sign) {
  1615. r128Copy(dst, &R128_min);
  1616. } else {
  1617. r128Copy(dst, &R128_max);
  1618. }
  1619. return;
  1620. } else if (r128IsNeg(&td)) {
  1621. r128__neg(&td, &td);
  1622. sign = !sign;
  1623. }
  1624. r128__udiv(&tq, &tn, &td);
  1625. if (sign) {
  1626. r128__neg(&tq, &tq);
  1627. }
  1628. r128Copy(dst, &tq);
  1629. }
  1630. void r128Mod(R128 *dst, const R128 *a, const R128 *b)
  1631. {
  1632. int sign = 0;
  1633. R128 tn, td, tq;
  1634. R128_ASSERT(dst != NULL);
  1635. R128_ASSERT(a != NULL);
  1636. R128_ASSERT(b != NULL);
  1637. R128_SET2(&tn, a->lo, a->hi);
  1638. R128_SET2(&td, b->lo, b->hi);
  1639. if (r128IsNeg(&tn)) {
  1640. r128__neg(&tn, &tn);
  1641. sign = !sign;
  1642. }
  1643. if (td.lo == 0 && td.hi == 0) {
  1644. // divide by zero
  1645. if (sign) {
  1646. r128Copy(dst, &R128_min);
  1647. } else {
  1648. r128Copy(dst, &R128_max);
  1649. }
  1650. return;
  1651. } else if (r128IsNeg(&td)) {
  1652. r128__neg(&td, &td);
  1653. sign = !sign;
  1654. }
  1655. tq.hi = r128__umod(&tn, &td);
  1656. tq.lo = 0;
  1657. if (sign) {
  1658. tq.hi = ~tq.hi + 1;
  1659. }
  1660. r128Mul(&tq, &tq, b);
  1661. r128Sub(dst, a, &tq);
  1662. }
  1663. void r128Rsqrt(R128 *dst, const R128 *v)
  1664. {
  1665. static const R128 threeHalves = { R128_LIT_U64(0x8000000000000000), 1 };
  1666. R128 x, est;
  1667. int i;
  1668. if ((R128_S64)v->hi < 0) {
  1669. r128Copy(dst, &R128_min);
  1670. return;
  1671. }
  1672. R128_SET2(&x, v->lo, v->hi);
  1673. // get initial estimate
  1674. if (x.hi) {
  1675. int shift = (64 + r128__clz64(x.hi)) >> 1;
  1676. est.lo = R128_LIT_U64(1) << shift;
  1677. est.hi = 0;
  1678. } else if (x.lo) {
  1679. int shift = r128__clz64(x.lo) >> 1;
  1680. est.hi = R128_LIT_U64(1) << shift;
  1681. est.lo = 0;
  1682. } else {
  1683. R128_SET2(dst, 0, 0);
  1684. return;
  1685. }
  1686. // x /= 2
  1687. r128Shr(&x, &x, 1);
  1688. // Newton-Raphson iterate
  1689. for (i = 0; i < 7; ++i) {
  1690. R128 newEst;
  1691. // newEst = est * (threeHalves - (x / 2) * est * est);
  1692. r128__umul(&newEst, &est, &est);
  1693. r128__umul(&newEst, &newEst, &x);
  1694. r128Sub(&newEst, &threeHalves, &newEst);
  1695. r128__umul(&newEst, &est, &newEst);
  1696. if (newEst.lo == est.lo && newEst.hi == est.hi) {
  1697. break;
  1698. }
  1699. R128_SET2(&est, newEst.lo, newEst.hi);
  1700. }
  1701. r128Copy(dst, &est);
  1702. }
  1703. void r128Sqrt(R128 *dst, const R128 *v)
  1704. {
  1705. R128 x, est;
  1706. int i;
  1707. if ((R128_S64)v->hi < 0) {
  1708. r128Copy(dst, &R128_min);
  1709. return;
  1710. }
  1711. R128_SET2(&x, v->lo, v->hi);
  1712. // get initial estimate
  1713. if (x.hi) {
  1714. int shift = (63 - r128__clz64(x.hi)) >> 1;
  1715. r128Shr(&est, &x, shift);
  1716. } else if (x.lo) {
  1717. int shift = (1 + r128__clz64(x.lo)) >> 1;
  1718. r128Shl(&est, &x, shift);
  1719. } else {
  1720. R128_SET2(dst, 0, 0);
  1721. return;
  1722. }
  1723. // Newton-Raphson iterate
  1724. for (i = 0; i < 7; ++i) {
  1725. R128 newEst;
  1726. // newEst = (est + x / est) / 2
  1727. r128__udiv(&newEst, &x, &est);
  1728. r128Add(&newEst, &newEst, &est);
  1729. r128Shr(&newEst, &newEst, 1);
  1730. if (newEst.lo == est.lo && newEst.hi == est.hi) {
  1731. break;
  1732. }
  1733. R128_SET2(&est, newEst.lo, newEst.hi);
  1734. }
  1735. r128Copy(dst, &est);
  1736. }
  1737. int r128Cmp(const R128 *a, const R128 *b)
  1738. {
  1739. R128_ASSERT(a != NULL);
  1740. R128_ASSERT(b != NULL);
  1741. if (a->hi == b->hi) {
  1742. if (a->lo == b->lo) {
  1743. return 0;
  1744. } else if (a->lo > b->lo) {
  1745. return 1;
  1746. } else {
  1747. return -1;
  1748. }
  1749. } else if ((R128_S64)a->hi > (R128_S64)b->hi) {
  1750. return 1;
  1751. } else {
  1752. return -1;
  1753. }
  1754. }
  1755. int r128IsNeg(const R128 *v)
  1756. {
  1757. R128_ASSERT(v != NULL);
  1758. return (R128_S64)v->hi < 0;
  1759. }
  1760. void r128Min(R128 *dst, const R128 *a, const R128 *b)
  1761. {
  1762. R128_ASSERT(dst != NULL);
  1763. R128_ASSERT(a != NULL);
  1764. R128_ASSERT(b != NULL);
  1765. if (r128Cmp(a, b) < 0) {
  1766. r128Copy(dst, a);
  1767. } else {
  1768. r128Copy(dst, b);
  1769. }
  1770. }
  1771. void r128Max(R128 *dst, const R128 *a, const R128 *b)
  1772. {
  1773. R128_ASSERT(dst != NULL);
  1774. R128_ASSERT(a != NULL);
  1775. R128_ASSERT(b != NULL);
  1776. if (r128Cmp(a, b) > 0) {
  1777. r128Copy(dst, a);
  1778. } else {
  1779. r128Copy(dst, b);
  1780. }
  1781. }
  1782. void r128Floor(R128 *dst, const R128 *v)
  1783. {
  1784. R128_ASSERT(dst != NULL);
  1785. R128_ASSERT(v != NULL);
  1786. if ((R128_S64)v->hi < 0) {
  1787. dst->hi = v->hi - (v->lo != 0);
  1788. } else {
  1789. dst->hi = v->hi;
  1790. }
  1791. dst->lo = 0;
  1792. R128_DEBUG_SET(dst);
  1793. }
  1794. void r128Ceil(R128 *dst, const R128 *v)
  1795. {
  1796. R128_ASSERT(dst != NULL);
  1797. R128_ASSERT(v != NULL);
  1798. if ((R128_S64)v->hi > 0) {
  1799. dst->hi = v->hi + (v->lo != 0);
  1800. } else {
  1801. dst->hi = v->hi;
  1802. }
  1803. dst->lo = 0;
  1804. R128_DEBUG_SET(dst);
  1805. }
  1806. #endif //R128_IMPLEMENTATION