perf.odin 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. package time
  2. import "base:runtime"
  3. import "base:intrinsics"
  4. Tick :: struct {
  5. _nsec: i64, // relative amount
  6. }
  7. tick_now :: proc "contextless" () -> Tick {
  8. return _tick_now()
  9. }
  10. tick_diff :: proc "contextless" (start, end: Tick) -> Duration {
  11. d := end._nsec - start._nsec
  12. return Duration(d)
  13. }
  14. tick_lap_time :: proc "contextless" (prev: ^Tick) -> Duration {
  15. d: Duration
  16. t := tick_now()
  17. if prev._nsec != 0 {
  18. d = tick_diff(prev^, t)
  19. }
  20. prev^ = t
  21. return d
  22. }
  23. tick_since :: proc "contextless" (start: Tick) -> Duration {
  24. return tick_diff(start, tick_now())
  25. }
  26. @(deferred_in_out=_tick_duration_end)
  27. SCOPED_TICK_DURATION :: proc "contextless" (d: ^Duration) -> Tick {
  28. return tick_now()
  29. }
  30. _tick_duration_end :: proc "contextless" (d: ^Duration, t: Tick) {
  31. d^ = tick_since(t)
  32. }
  33. when ODIN_ARCH == .amd64 {
  34. @(private)
  35. x86_has_invariant_tsc :: proc "contextless" () -> bool {
  36. eax, _, _, _ := intrinsics.x86_cpuid(0x80_000_000, 0)
  37. // Is this processor *really* ancient?
  38. if eax < 0x80_000_007 {
  39. return false
  40. }
  41. // check if the invariant TSC bit is set
  42. _, _, _, edx := intrinsics.x86_cpuid(0x80_000_007, 0)
  43. return (edx & (1 << 8)) != 0
  44. }
  45. }
  46. when ODIN_OS != .Darwin && ODIN_OS != .Linux && ODIN_OS != .FreeBSD {
  47. _get_tsc_frequency :: proc "contextless" () -> (u64, bool) {
  48. return 0, false
  49. }
  50. }
  51. has_invariant_tsc :: proc "contextless" () -> bool {
  52. when ODIN_ARCH == .amd64 {
  53. return x86_has_invariant_tsc()
  54. }
  55. return false
  56. }
  57. tsc_frequency :: proc "contextless" (fallback_sleep := 2 * Second) -> (u64, bool) {
  58. if !has_invariant_tsc() {
  59. return 0, false
  60. }
  61. hz, ok := _get_tsc_frequency()
  62. if !ok {
  63. // fallback to approximate TSC
  64. tsc_begin := intrinsics.read_cycle_counter()
  65. tick_begin := tick_now()
  66. sleep(fallback_sleep)
  67. tsc_end := intrinsics.read_cycle_counter()
  68. tick_end := tick_now()
  69. time_diff := u128(duration_nanoseconds(tick_diff(tick_begin, tick_end)))
  70. hz = u64((u128(tsc_end - tsc_begin) * 1_000_000_000) / time_diff)
  71. }
  72. return hz, true
  73. }
  74. /*
  75. Benchmark helpers
  76. */
  77. Benchmark_Error :: enum {
  78. Okay = 0,
  79. Allocation_Error,
  80. }
  81. Benchmark_Options :: struct {
  82. setup: #type proc(options: ^Benchmark_Options, allocator: runtime.Allocator) -> (err: Benchmark_Error),
  83. bench: #type proc(options: ^Benchmark_Options, allocator: runtime.Allocator) -> (err: Benchmark_Error),
  84. teardown: #type proc(options: ^Benchmark_Options, allocator: runtime.Allocator) -> (err: Benchmark_Error),
  85. rounds: int,
  86. bytes: int,
  87. input: []u8,
  88. count: int,
  89. processed: int,
  90. output: []u8, // Unused for hash benchmarks
  91. hash: u128,
  92. /*
  93. Performance
  94. */
  95. duration: Duration,
  96. rounds_per_second: f64,
  97. megabytes_per_second: f64,
  98. }
  99. benchmark :: proc(options: ^Benchmark_Options, allocator := context.allocator) -> (err: Benchmark_Error) {
  100. assert(options != nil)
  101. assert(options.bench != nil)
  102. if options.setup != nil {
  103. options->setup(allocator) or_return
  104. }
  105. diff: Duration
  106. {
  107. SCOPED_TICK_DURATION(&diff)
  108. options->bench(allocator) or_return
  109. }
  110. options.duration = diff
  111. times_per_second := f64(Second) / f64(diff)
  112. options.rounds_per_second = times_per_second * f64(options.count)
  113. options.megabytes_per_second = f64(options.processed) / f64(1024 * 1024) * times_per_second
  114. if options.teardown != nil {
  115. options->teardown(allocator) or_return
  116. }
  117. return
  118. }