udivmod128.odin 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. package runtime
  2. @(default_calling_convention="none")
  3. foreign {
  4. @(link_name="llvm.cttz.i8") _ctz_u8 :: proc(i: u8, is_zero_undef := false) -> u8 ---
  5. @(link_name="llvm.cttz.i16") _ctz_u16 :: proc(i: u16, is_zero_undef := false) -> u16 ---
  6. @(link_name="llvm.cttz.i32") _ctz_u32 :: proc(i: u32, is_zero_undef := false) -> u32 ---
  7. @(link_name="llvm.cttz.i64") _ctz_u64 :: proc(i: u64, is_zero_undef := false) -> u64 ---
  8. }
  9. _ctz :: proc{
  10. _ctz_u8,
  11. _ctz_u16,
  12. _ctz_u32,
  13. _ctz_u64,
  14. };
  15. @(default_calling_convention="none")
  16. foreign {
  17. @(link_name="llvm.ctlz.i8") _clz_u8 :: proc(i: u8, is_zero_undef := false) -> u8 ---
  18. @(link_name="llvm.ctlz.i16") _clz_u16 :: proc(i: u16, is_zero_undef := false) -> u16 ---
  19. @(link_name="llvm.ctlz.i32") _clz_u32 :: proc(i: u32, is_zero_undef := false) -> u32 ---
  20. @(link_name="llvm.ctlz.i64") _clz_u64 :: proc(i: u64, is_zero_undef := false) -> u64 ---
  21. }
  22. _clz :: proc{
  23. _clz_u8,
  24. _clz_u16,
  25. _clz_u32,
  26. _clz_u64,
  27. };
  28. udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
  29. n := transmute([2]u64)a;
  30. d := transmute([2]u64)b;
  31. q, r: [2]u64 = ---, ---;
  32. sr: u32 = 0;
  33. low :: ODIN_ENDIAN == "big" ? 1 : 0;
  34. high :: 1 - low;
  35. U64_BITS :: 8*size_of(u64);
  36. U128_BITS :: 8*size_of(u128);
  37. // Special Cases
  38. if n[high] == 0 {
  39. if d[high] == 0 {
  40. if rem != nil {
  41. res := n[low] % d[low];
  42. rem^ = u128(res);
  43. }
  44. return u128(n[low] / d[low]);
  45. }
  46. if rem != nil {
  47. rem^ = u128(n[low]);
  48. }
  49. return 0;
  50. }
  51. if d[low] == 0 {
  52. if d[high] == 0 {
  53. if rem != nil {
  54. rem^ = u128(n[high] % d[low]);
  55. }
  56. return u128(n[high] / d[low]);
  57. }
  58. if n[low] == 0 {
  59. if rem != nil {
  60. r[high] = n[high] % d[high];
  61. r[low] = 0;
  62. rem^ = transmute(u128)r;
  63. }
  64. return u128(n[high] / d[high]);
  65. }
  66. if d[high] & (d[high]-1) == 0 {
  67. if rem != nil {
  68. r[low] = n[low];
  69. r[high] = n[high] & (d[high] - 1);
  70. rem^ = transmute(u128)r;
  71. }
  72. return u128(n[high] >> _ctz(d[high]));
  73. }
  74. sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])));
  75. if sr > U64_BITS - 2 {
  76. if rem != nil {
  77. rem^ = a;
  78. }
  79. return 0;
  80. }
  81. sr += 1;
  82. q[low] = 0;
  83. q[high] = n[low] << u64(U64_BITS - sr);
  84. r[high] = n[high] >> sr;
  85. r[low] = (n[high] << (U64_BITS - sr)) | (n[low] >> sr);
  86. } else {
  87. if d[high] == 0 {
  88. if d[low] & (d[low] - 1) == 0 {
  89. if rem != nil {
  90. rem^ = u128(n[low] & (d[low] - 1));
  91. }
  92. if d[low] == 1 {
  93. return a;
  94. }
  95. sr = u32(_ctz(d[low]));
  96. q[high] = n[high] >> sr;
  97. q[low] = (n[high] << (U64_BITS-sr)) | (n[low] >> sr);
  98. return transmute(u128)q;
  99. }
  100. sr = 1 + U64_BITS + u32(_clz(d[low])) - u32(_clz(n[high]));
  101. switch {
  102. case sr == U64_BITS:
  103. q[low] = 0;
  104. q[high] = n[low];
  105. r[high] = 0;
  106. r[low] = n[high];
  107. case sr < U64_BITS:
  108. q[low] = 0;
  109. q[high] = n[low] << (U64_BITS - sr);
  110. r[high] = n[high] >> sr;
  111. r[low] = (n[high] << (U64_BITS - sr)) | (n[low] >> sr);
  112. case:
  113. q[low] = n[low] << (U128_BITS - sr);
  114. q[high] = (n[high] << (U128_BITS - sr)) | (n[low] >> (sr - U64_BITS));
  115. r[high] = 0;
  116. r[low] = n[high] >> (sr - U64_BITS);
  117. }
  118. } else {
  119. sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])));
  120. if sr > U64_BITS - 1 {
  121. if rem != nil {
  122. rem^ = a;
  123. }
  124. return 0;
  125. }
  126. sr += 1;
  127. q[low] = 0;
  128. if sr == U64_BITS {
  129. q[high] = n[low];
  130. r[high] = 0;
  131. r[low] = n[high];
  132. } else {
  133. r[high] = n[high] >> sr;
  134. r[low] = (n[high] << (U64_BITS - sr)) | (n[low] >> sr);
  135. q[high] = n[low] << (U64_BITS - sr);
  136. }
  137. }
  138. }
  139. carry: u32 = 0;
  140. r_all: u128 = ---;
  141. for ; sr > 0; sr -= 1 {
  142. r[high] = (r[high] << 1) | (r[low] >> (U64_BITS - 1));
  143. r[low] = (r[low] << 1) | (q[high] >> (U64_BITS - 1));
  144. q[high] = (q[high] << 1) | (q[low] >> (U64_BITS - 1));
  145. q[low] = (q[low] << 1) | u64(carry);
  146. r_all = transmute(u128)r;
  147. s := i128(b - r_all - 1) >> (U128_BITS - 1);
  148. carry = u32(s & 1);
  149. r_all -= b & transmute(u128)s;
  150. r = transmute([2]u64)r_all;
  151. }
  152. q_all := ((transmute(u128)q) << 1) | u128(carry);
  153. if rem != nil {
  154. rem^ = r_all;
  155. }
  156. return q_all;
  157. }