udivmod128.odin 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. package runtime
  2. import "core:intrinsics"
  3. udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
  4. _ctz :: intrinsics.count_trailing_zeros;
  5. _clz :: intrinsics.count_leading_zeros;
  6. n := transmute([2]u64)a;
  7. d := transmute([2]u64)b;
  8. q, r: [2]u64 = ---, ---;
  9. sr: u32 = 0;
  10. low :: 1 when ODIN_ENDIAN == "big" else 0;
  11. high :: 1 - low;
  12. U64_BITS :: 8*size_of(u64);
  13. U128_BITS :: 8*size_of(u128);
  14. // Special Cases
  15. if n[high] == 0 {
  16. if d[high] == 0 {
  17. if rem != nil {
  18. res := n[low] % d[low];
  19. rem^ = u128(res);
  20. }
  21. return u128(n[low] / d[low]);
  22. }
  23. if rem != nil {
  24. rem^ = u128(n[low]);
  25. }
  26. return 0;
  27. }
  28. if d[low] == 0 {
  29. if d[high] == 0 {
  30. if rem != nil {
  31. rem^ = u128(n[high] % d[low]);
  32. }
  33. return u128(n[high] / d[low]);
  34. }
  35. if n[low] == 0 {
  36. if rem != nil {
  37. r[high] = n[high] % d[high];
  38. r[low] = 0;
  39. rem^ = transmute(u128)r;
  40. }
  41. return u128(n[high] / d[high]);
  42. }
  43. if d[high] & (d[high]-1) == 0 {
  44. if rem != nil {
  45. r[low] = n[low];
  46. r[high] = n[high] & (d[high] - 1);
  47. rem^ = transmute(u128)r;
  48. }
  49. return u128(n[high] >> _ctz(d[high]));
  50. }
  51. sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])));
  52. if sr > U64_BITS - 2 {
  53. if rem != nil {
  54. rem^ = a;
  55. }
  56. return 0;
  57. }
  58. sr += 1;
  59. q[low] = 0;
  60. q[high] = n[low] << u64(U64_BITS - sr);
  61. r[high] = n[high] >> sr;
  62. r[low] = (n[high] << (U64_BITS - sr)) | (n[low] >> sr);
  63. } else {
  64. if d[high] == 0 {
  65. if d[low] & (d[low] - 1) == 0 {
  66. if rem != nil {
  67. rem^ = u128(n[low] & (d[low] - 1));
  68. }
  69. if d[low] == 1 {
  70. return a;
  71. }
  72. sr = u32(_ctz(d[low]));
  73. q[high] = n[high] >> sr;
  74. q[low] = (n[high] << (U64_BITS-sr)) | (n[low] >> sr);
  75. return transmute(u128)q;
  76. }
  77. sr = 1 + U64_BITS + u32(_clz(d[low])) - u32(_clz(n[high]));
  78. switch {
  79. case sr == U64_BITS:
  80. q[low] = 0;
  81. q[high] = n[low];
  82. r[high] = 0;
  83. r[low] = n[high];
  84. case sr < U64_BITS:
  85. q[low] = 0;
  86. q[high] = n[low] << (U64_BITS - sr);
  87. r[high] = n[high] >> sr;
  88. r[low] = (n[high] << (U64_BITS - sr)) | (n[low] >> sr);
  89. case:
  90. q[low] = n[low] << (U128_BITS - sr);
  91. q[high] = (n[high] << (U128_BITS - sr)) | (n[low] >> (sr - U64_BITS));
  92. r[high] = 0;
  93. r[low] = n[high] >> (sr - U64_BITS);
  94. }
  95. } else {
  96. sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])));
  97. if sr > U64_BITS - 1 {
  98. if rem != nil {
  99. rem^ = a;
  100. }
  101. return 0;
  102. }
  103. sr += 1;
  104. q[low] = 0;
  105. if sr == U64_BITS {
  106. q[high] = n[low];
  107. r[high] = 0;
  108. r[low] = n[high];
  109. } else {
  110. r[high] = n[high] >> sr;
  111. r[low] = (n[high] << (U64_BITS - sr)) | (n[low] >> sr);
  112. q[high] = n[low] << (U64_BITS - sr);
  113. }
  114. }
  115. }
  116. carry: u32 = 0;
  117. r_all: u128 = ---;
  118. for ; sr > 0; sr -= 1 {
  119. r[high] = (r[high] << 1) | (r[low] >> (U64_BITS - 1));
  120. r[low] = (r[low] << 1) | (q[high] >> (U64_BITS - 1));
  121. q[high] = (q[high] << 1) | (q[low] >> (U64_BITS - 1));
  122. q[low] = (q[low] << 1) | u64(carry);
  123. r_all = transmute(u128)r;
  124. s := i128(b - r_all - 1) >> (U128_BITS - 1);
  125. carry = u32(s & 1);
  126. r_all -= b & transmute(u128)s;
  127. r = transmute([2]u64)r_all;
  128. }
  129. q_all := ((transmute(u128)q) << 1) | u128(carry);
  130. if rem != nil {
  131. rem^ = r_all;
  132. }
  133. return q_all;
  134. }