benchmark_runtime.odin 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. package benchmark_runtime
  2. import "base:runtime"
  3. import "core:fmt"
  4. import "core:log"
  5. import "core:testing"
  6. import "core:strings"
  7. import "core:text/table"
  8. import "core:time"
  9. RUNS_PER_SIZE :: 2500
  10. sizes := [?]int {
  11. 7, 8, 9,
  12. 15, 16, 17,
  13. 31, 32, 33,
  14. 63, 64, 65,
  15. 95, 96, 97,
  16. 128,
  17. 256,
  18. 512,
  19. 1024,
  20. 4096,
  21. 1024 * 1024,
  22. }
  23. // These are the normal, unoptimized algorithms.
  24. plain_memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
  25. switch {
  26. case n == 0: return true
  27. case x == y: return true
  28. }
  29. a, b := ([^]byte)(x), ([^]byte)(y)
  30. length := uint(n)
  31. for i := uint(0); i < length; i += 1 {
  32. if a[i] != b[i] {
  33. return false
  34. }
  35. }
  36. return true
  37. }
  38. plain_memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
  39. switch {
  40. case a == b: return 0
  41. case a == nil: return -1
  42. case b == nil: return +1
  43. }
  44. x := uintptr(a)
  45. y := uintptr(b)
  46. n := uintptr(n)
  47. SU :: size_of(uintptr)
  48. fast := n/SU + 1
  49. offset := (fast-1)*SU
  50. curr_block := uintptr(0)
  51. if n < SU {
  52. fast = 0
  53. }
  54. for /**/; curr_block < fast; curr_block += 1 {
  55. va := (^uintptr)(x + curr_block * size_of(uintptr))^
  56. vb := (^uintptr)(y + curr_block * size_of(uintptr))^
  57. if va ~ vb != 0 {
  58. for pos := curr_block*SU; pos < n; pos += 1 {
  59. a := (^byte)(x+pos)^
  60. b := (^byte)(y+pos)^
  61. if a ~ b != 0 {
  62. return -1 if (int(a) - int(b)) < 0 else +1
  63. }
  64. }
  65. }
  66. }
  67. for /**/; offset < n; offset += 1 {
  68. a := (^byte)(x+offset)^
  69. b := (^byte)(y+offset)^
  70. if a ~ b != 0 {
  71. return -1 if (int(a) - int(b)) < 0 else +1
  72. }
  73. }
  74. return 0
  75. }
  76. plain_memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
  77. x := uintptr(a)
  78. n := uintptr(n)
  79. SU :: size_of(uintptr)
  80. fast := n/SU + 1
  81. offset := (fast-1)*SU
  82. curr_block := uintptr(0)
  83. if n < SU {
  84. fast = 0
  85. }
  86. for /**/; curr_block < fast; curr_block += 1 {
  87. va := (^uintptr)(x + curr_block * size_of(uintptr))^
  88. if va ~ 0 != 0 {
  89. for pos := curr_block*SU; pos < n; pos += 1 {
  90. a := (^byte)(x+pos)^
  91. if a ~ 0 != 0 {
  92. return -1 if int(a) < 0 else +1
  93. }
  94. }
  95. }
  96. }
  97. for /**/; offset < n; offset += 1 {
  98. a := (^byte)(x+offset)^
  99. if a ~ 0 != 0 {
  100. return -1 if int(a) < 0 else +1
  101. }
  102. }
  103. return 0
  104. }
  105. run_trial_size_cmp :: proc(p: proc "contextless" (rawptr, rawptr, int) -> $R, size: int, idx: int, runs: int, loc := #caller_location) -> (timing: time.Duration) {
  106. left := make([]u8, size)
  107. right := make([]u8, size)
  108. defer {
  109. delete(left)
  110. delete(right)
  111. }
  112. right[idx] = 0x01
  113. accumulator: int
  114. watch: time.Stopwatch
  115. time.stopwatch_start(&watch)
  116. for _ in 0..<runs {
  117. result := p(&left[0], &right[0], size)
  118. when R == bool {
  119. assert(result == false, loc = loc)
  120. accumulator += 1
  121. } else when R == int {
  122. assert(result == -1, loc = loc)
  123. accumulator += result
  124. }
  125. }
  126. time.stopwatch_stop(&watch)
  127. timing = time.stopwatch_duration(watch)
  128. log.debug(accumulator)
  129. return
  130. }
  131. run_trial_size_zero :: proc(p: proc "contextless" (rawptr, int) -> int, size: int, idx: int, runs: int, loc := #caller_location) -> (timing: time.Duration) {
  132. data := make([]u8, size)
  133. defer delete(data)
  134. data[idx] = 0x01
  135. accumulator: int
  136. watch: time.Stopwatch
  137. time.stopwatch_start(&watch)
  138. for _ in 0..<runs {
  139. result := p(&data[0], size)
  140. assert(result == 1, loc = loc)
  141. accumulator += result
  142. }
  143. time.stopwatch_stop(&watch)
  144. timing = time.stopwatch_duration(watch)
  145. log.debug(accumulator)
  146. return
  147. }
  148. run_trial_size :: proc {
  149. run_trial_size_cmp,
  150. run_trial_size_zero,
  151. }
  152. bench_table :: proc(algo_name: string, plain, simd: $P) {
  153. string_buffer := strings.builder_make()
  154. defer strings.builder_destroy(&string_buffer)
  155. tbl: table.Table
  156. table.init(&tbl)
  157. defer table.destroy(&tbl)
  158. table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Iterations", "Scalar", "SIMD", "SIMD Relative (%)", "SIMD Relative (x)")
  159. for size in sizes {
  160. // Place the non-zero byte somewhere in the middle.
  161. needle_index := size / 2
  162. plain_timing := run_trial_size(plain, size, needle_index, RUNS_PER_SIZE)
  163. simd_timing := run_trial_size(simd, size, needle_index, RUNS_PER_SIZE)
  164. _plain := fmt.tprintf("%8M", plain_timing)
  165. _simd := fmt.tprintf("%8M", simd_timing)
  166. _relp := fmt.tprintf("%.3f %%", f64(simd_timing) / f64(plain_timing) * 100.0)
  167. _relx := fmt.tprintf("%.3f x", 1 / (f64(simd_timing) / f64(plain_timing)))
  168. table.aligned_row_of_values(
  169. &tbl,
  170. .Right,
  171. algo_name,
  172. size, RUNS_PER_SIZE, _plain, _simd, _relp, _relx)
  173. }
  174. builder_writer := strings.to_writer(&string_buffer)
  175. fmt.sbprintln(&string_buffer)
  176. table.write_plain_table(builder_writer, &tbl)
  177. my_table_string := strings.to_string(string_buffer)
  178. log.info(my_table_string)
  179. }
  180. @test
  181. benchmark_memory_procs :: proc(t: ^testing.T) {
  182. bench_table("memory_equal", plain_memory_equal, runtime.memory_equal)
  183. bench_table("memory_compare", plain_memory_compare, runtime.memory_compare)
  184. bench_table("memory_compare_zero", plain_memory_compare_zero, runtime.memory_compare_zero)
  185. }