Explorar el Código

begin adding tsc frequency getters

Colin Davidson hace 2 años
padre
commit
051c9cb564

+ 2 - 2
core/intrinsics/intrinsics.odin

@@ -283,7 +283,7 @@ wasm_memory_atomic_wait32   :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -
 wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) ---
 
 // x86 Targets (i386, amd64)
-x86_cpuid  :: proc(ax, cx: u32) -> (eax, ebc, ecx, edx: u32) ---
+x86_cpuid  :: proc(ax, cx: u32) -> (eax, ebx, ecx, edx: u32) ---
 x86_xgetbv :: proc(cx: u32) -> (eax, edx: u32) ---
 
 
@@ -305,4 +305,4 @@ valgrind_client_request :: proc(default: uintptr, request: uintptr, a0, a1, a2,
 
 // Internal compiler use only
 
-__entry_point :: proc() ---
+__entry_point :: proc() ---

+ 154 - 0
core/sys/unix/syscalls_linux.odin

@@ -1537,6 +1537,7 @@ MAP_SHARED          :: 0x1
 MAP_PRIVATE         :: 0x2
 MAP_SHARED_VALIDATE :: 0x3
 MAP_ANONYMOUS       :: 0x20
+MAP_FAILED          :: -1
 
 // mremap flags
 MREMAP_MAYMOVE   :: 1
@@ -1563,6 +1564,155 @@ MADV_WIPEONFORK  :: 18
 MADV_KEEPONFORK  :: 19
 MADV_HWPOISON    :: 100
 
+// perf event data
+Perf_Sample :: struct #raw_union {
+	period:    u64,
+	frequency: u64,
+}
+Perf_Wakeup :: struct #raw_union {
+	events:    u32,
+	watermark: u32,
+}
+Perf_Field1 :: struct #raw_union {
+	breakpoint_addr: u64,
+	kprobe_func:     u64,
+	uprobe_path:     u64,
+	config1:         u64,
+}
+Perf_Field2 :: struct #raw_union {
+	breakpoint_len: u64,
+	kprobe_addr:    u64,
+	uprobe_offset:  u64,
+	config2:        u64,
+}
+Perf_Event_Attr :: struct #packed {
+	type:   u32,
+	size:   u32,
+	config: u64,
+	sample: Perf_Sample,
+	sample_type: u64,
+	read_format: u64,
+	flags:       Perf_Flags,
+	wakeup: Perf_Wakeup,
+	breakpoint_type: u32,
+	field1: Perf_Field1,
+	field2: Perf_Field2,
+	branch_sample_type: u64,
+	sample_regs_user:   u64,
+	sample_stack_user:  u32,
+	clock_id:           i32,
+	sample_regs_intr:   u64,
+	aux_watermark:      u32,
+	sample_max_stack:   u16,
+	_padding:           u16,
+}
+
+Perf_Event_Flags :: distinct bit_set[Perf_Event_Flag; u64]
+Perf_Event_Flag :: enum u64 {
+	Bit0               = 0,
+	Bit0_Is_Deprecated = 1,
+	User_Rdpmc         = 2,
+	User_Time          = 3,
+	User_Time_Zero     = 4,
+	User_Time_Short    = 5,
+}
+Perf_Capabilities :: struct #raw_union {
+	capabilities: u64,
+	flags: Perf_Event_Flags,
+}
+Perf_Event_mmap_Page :: struct #packed {
+	version:        u32,
+	compat_version: u32,
+	lock:           u32,
+	index:          u32,
+	offset:         i64,
+	time_enabled:   u64,
+	time_running:   u64,
+	cap: Perf_Capabilities,
+	pmc_width:      u16,
+	time_shift:     u16,
+	time_mult:      u32,
+	time_offset:    u64,
+	time_zero:      u64,
+	size:           u32,
+	reserved1:      u32,
+	time_cycles:    u64,
+	time_mask:      u64,
+	reserved2:      [116*8]u8,
+	data_head:      u64,
+	data_tail:      u64,
+	data_offset:    u64,
+	data_size:      u64,
+	aux_head:       u64,
+	aux_tail:       u64,
+	aux_offset:     u64,
+	aux_size:       u64,
+}
+
+Perf_Type_Id :: enum u32 {
+	Hardware   = 0,
+	Software   = 1,
+	Tracepoint = 2,
+	HW_Cache   = 3,
+	Raw        = 4,
+	Breakpoint = 5,
+}
+
+Perf_Hardware_Id :: enum u64 {
+	CPU_Cycles              = 0,
+	Instructions            = 1,
+	Cache_References        = 2,
+	Cache_Misses            = 3,
+	Branch_Instructions     = 4,
+	Branch_Misses           = 5,
+	Bus_Cycles              = 6,
+	Stalled_Cycles_Frontend = 7,
+	Stalled_Cycles_Backend  = 8,
+	Ref_CPU_Cycles          = 9,
+}
+
+Perf_Flags :: distinct bit_set[Perf_Flag; u64]
+Perf_Flag :: enum u64 {
+	Disabled       = 0,
+	Inherit        = 1,
+	Pinned         = 2,
+	Exclusive      = 3,
+	Exclude_User   = 4,
+	Exclude_Kernel = 5,
+	Exclude_HV     = 6,
+	Exclude_Idle   = 7,
+	mmap           = 8,
+	Comm           = 9,
+	Freq           = 10,
+	Inherit_Stat   = 11,
+	Enable_On_Exec = 12,
+	Task           = 13,
+	Watermark      = 14,
+	Precise_IP_0   = 15,
+	Precise_IP_1   = 16,
+	mmap_Data      = 17,
+	Sample_Id_All  = 18,
+	Exclude_Host   = 19,
+	Exclude_Guest  = 20,
+	Exclude_Callchain_Kernel = 21,
+	Exclude_Callchain_User   = 22,
+	mmap2          = 23,
+	Comm_Exec      = 24,
+	Use_Clockid    = 25,
+	Context_Switch = 26,
+	Write_Backward = 27,
+	Namespaces     = 28,
+	KSymbol        = 29,
+	BPF_Event      = 30,
+	Aux_Output     = 31,
+	CGroup         = 32,
+	Text_Poke      = 33,
+	Build_Id       = 34,
+	Inherit_Thread = 35,
+	Remove_On_Exec = 36,
+	Sigtrap        = 37,
+}
+
 sys_gettid :: proc "contextless" () -> int {
 	return cast(int)intrinsics.syscall(SYS_gettid)
 }
@@ -1846,6 +1996,10 @@ sys_utimensat :: proc "contextless" (dfd: int, path: cstring, times: rawptr, fla
 	return int(intrinsics.syscall(SYS_utimensat, uintptr(dfd), uintptr(rawptr(path)), uintptr(times), uintptr(flags)))
 }
 
+sys_perf_event_open :: proc "contextless" (event_attr: rawptr, pid: i32, cpu: i32, group_fd: i32, flags: u32) -> int {
+	return int(intrinsics.syscall(SYS_perf_event_open, uintptr(event_attr), uintptr(pid), uintptr(cpu), uintptr(group_fd), uintptr(flags)))
+}
+
 get_errno :: proc "contextless" (res: int) -> i32 {
 	if res < 0 && res > -4096 {
 		return i32(-res)

+ 22 - 1
core/time/perf.odin

@@ -1,6 +1,7 @@
 package time
 
 import "core:runtime"
+import "core:intrinsics"
 
 Tick :: struct {
 	_nsec: i64, // relative amount
@@ -40,6 +41,26 @@ _tick_duration_end :: proc "contextless" (d: ^Duration, t: Tick) {
 	d^ = tick_since(t)
 }
 
+when ODIN_ARCH == .amd64 {
+	_has_invariant_tsc :: proc "contextless" () -> bool {
+		eax, _, _, _ := intrinsics.x86_cpuid(0x80_000_000, 0)
+
+		// Is this processor *really* ancient?
+		if eax < 0x80_000_007 {
+			return false
+		}
+
+		// check if the invariant TSC bit is set
+		_, _, _, edx := intrinsics.x86_cpuid(0x80_000_007, 0)
+		return (edx & (1 << 8)) != 0
+
+	}
+} else {
+	_has_invariant_tsc :: proc "contextless" () -> bool {
+		return false
+	}
+}
+
 /*
 	Benchmark helpers
 */
@@ -94,4 +115,4 @@ benchmark :: proc(options: ^Benchmark_Options, allocator := context.allocator) -
 		options->teardown(allocator) or_return
 	}
 	return
-}
+}

+ 43 - 0
core/time/tsc_linux.odin

@@ -0,0 +1,43 @@
+//+private
+//+build linux
+package time
+
+import "core:intrinsics"
+import "core:sys/unix"
+
+_get_tsc_frequency :: proc "contextless" () -> u64 {
+	@(static) frequency : u64 = 0
+	if frequency > 0 {
+		return frequency
+	}
+
+	perf_attr := unix.Perf_Event_Attr{}
+	perf_attr.type = u32(unix.Perf_Type_Id.Hardware)
+	perf_attr.config = u64(unix.Perf_Hardware_Id.Instructions)
+	perf_attr.size = size_of(perf_attr)
+	perf_attr.flags = {.Disabled, .Exclude_Kernel, .Exclude_HV}
+	fd := unix.sys_perf_event_open(&perf_attr, 0, -1, -1, 0)
+	if fd == -1 {
+		frequency = 1
+		return 0
+	}
+	defer unix.sys_close(fd)
+
+	page_size : uint = 4096
+	ret := unix.sys_mmap(nil, page_size, unix.PROT_READ, unix.MAP_SHARED, fd, 0)
+	if ret == unix.MAP_FAILED {
+		frequency = 1
+		return 0
+	}
+	addr := rawptr(uintptr(ret))
+	defer unix.sys_munmap(addr, page_size)
+
+	event_page := (^unix.Perf_Event_mmap_Page)(addr)
+	if .User_Time not_in event_page.cap.flags {
+		frequency = 1
+		return 0
+	}
+
+	frequency = u64((u128(1_000_000_000) << u128(event_page.time_shift)) / u128(event_page.time_mult))
+	return frequency
+}