4 hónapja · 9814370659
--- a/base/intrinsics/intrinsics.odin
+++ b/base/intrinsics/intrinsics.odin
@@ -298,7 +298,7 @@ simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)
 
				 simd_masked_expand_load    :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
			
 
				 simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)              where type_is_integer(U) || type_is_boolean(U) ---
			
 
				 
			
 
				-
			
 
				+simd_indices :: proc($T: typeid/#simd[$N]$E) -> T where type_is_numeric(T) ---
			
 
				 
			
 
				 simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
			
 
				 simd_select  :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
			
--- a/core/bufio/reader.odin
+++ b/core/bufio/reader.odin
@@ -257,7 +257,7 @@ reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
 
				 	for b.r+utf8.UTF_MAX > b.w &&
			
 
				 	    !utf8.full_rune(b.buf[b.r:b.w]) &&
			
 
				 	    b.err == nil &&
			
 
				-	    b.w-b.w < len(b.buf) {
			
 
				+	    b.w-b.r < len(b.buf) {
			
 
				 		_reader_read_new_chunk(b) or_return
			
 
				 	}
			
 
				 
			
--- a/core/os/os2/file_windows.odin
+++ b/core/os/os2/file_windows.odin
@@ -12,7 +12,30 @@ import win32 "core:sys/windows"
 
				 
			
 
				 INVALID_HANDLE :: ~uintptr(0)
			
 
				 
			
 
				-S_IWRITE :: 0o200
			
 
				+// NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`.
			
 
				+//               Let's not do so for Windows for `chmod` or `read_directory_iterator` either.
			
 
				+//               They're *not* portable between Windows and non-Windows platforms.
			
 
				+//
			
 
				+//               It also leads to information loss as flags like Archive, Hidden and System have no equivalent there.
			
 
				+//               We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend
			
 
				+//               that 0o644 is meaningful when returned as a mode.
			
 
				+//               `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444.
			
 
				+FILE_ATTRIBUTE_READONLY            :: win32.FILE_ATTRIBUTE_READONLY            // 0x00000001
			
 
				+FILE_ATTRIBUTE_HIDDEN              :: win32.FILE_ATTRIBUTE_HIDDEN              // 0x00000002
			
 
				+FILE_ATTRIBUTE_SYSTEM              :: win32.FILE_ATTRIBUTE_SYSTEM              // 0x00000004
			
 
				+FILE_ATTRIBUTE_DIRECTORY           :: win32.FILE_ATTRIBUTE_DIRECTORY           // 0x00000010
			
 
				+FILE_ATTRIBUTE_ARCHIVE             :: win32.FILE_ATTRIBUTE_ARCHIVE             // 0x00000020
			
 
				+FILE_ATTRIBUTE_DEVICE              :: win32.FILE_ATTRIBUTE_DEVICE              // 0x00000040
			
 
				+FILE_ATTRIBUTE_NORMAL              :: win32.FILE_ATTRIBUTE_NORMAL              // 0x00000080
			
 
				+FILE_ATTRIBUTE_TEMPORARY           :: win32.FILE_ATTRIBUTE_TEMPORARY           // 0x00000100
			
 
				+FILE_ATTRIBUTE_SPARSE_FILE         :: win32.FILE_ATTRIBUTE_SPARSE_FILE         // 0x00000200
			
 
				+FILE_ATTRIBUTE_REPARSE_Point       :: win32.FILE_ATTRIBUTE_REPARSE_Point       // 0x00000400
			
 
				+FILE_ATTRIBUTE_REPARSE_POINT       :: win32.FILE_ATTRIBUTE_REPARSE_POINT       // 0x00000400
			
 
				+FILE_ATTRIBUTE_COMPRESSED          :: win32.FILE_ATTRIBUTE_COMPRESSED          // 0x00000800
			
 
				+FILE_ATTRIBUTE_OFFLINE             :: win32.FILE_ATTRIBUTE_OFFLINE             // 0x00001000
			
 
				+FILE_ATTRIBUTE_NOT_CONTENT_INDEXED :: win32.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED // 0x00002000
			
 
				+FILE_ATTRIBUTE_ENCRYPTED           :: win32.FILE_ATTRIBUTE_ENCRYPTED           // 0x00004000
			
 
				+
			
 
				 _ERROR_BAD_NETPATH :: 53
			
 
				 MAX_RW :: 1<<30
			
 
				 
			
@@ -122,7 +145,7 @@ _open_internal :: proc(name: string, flags: File_Flags, perm: int) -> (handle: u
 
				 	}
			
 
				 
			
 
				 	attrs: u32 = win32.FILE_ATTRIBUTE_NORMAL|win32.FILE_FLAG_BACKUP_SEMANTICS
			
 
				-	if perm & S_IWRITE == 0 {
			
 
				+	if u32(perm) & FILE_ATTRIBUTE_NORMAL == 0 {
			
 
				 		attrs = win32.FILE_ATTRIBUTE_READONLY
			
 
				 		if create_mode == win32.CREATE_ALWAYS {
			
 
				 			// NOTE(bill): Open has just asked to create a file in read-only mode.
			
@@ -748,20 +771,10 @@ _fchmod :: proc(f: ^File, mode: int) -> Error {
 
				 	if f == nil || f.impl == nil {
			
 
				 		return nil
			
 
				 	}
			
 
				-	d: win32.BY_HANDLE_FILE_INFORMATION
			
 
				-	if !win32.GetFileInformationByHandle(_handle(f), &d) {
			
 
				-		return _get_platform_error()
			
 
				-	}
			
 
				-	attrs := d.dwFileAttributes
			
 
				-	if mode & S_IWRITE != 0 {
			
 
				-		attrs &~= win32.FILE_ATTRIBUTE_READONLY
			
 
				-	} else {
			
 
				-		attrs |= win32.FILE_ATTRIBUTE_READONLY
			
 
				-	}
			
 
				 
			
 
				 	info: win32.FILE_BASIC_INFO
			
 
				-	info.FileAttributes = attrs
			
 
				-	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) {
			
 
				+	info.FileAttributes = win32.DWORD(mode)
			
 
				+	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) {
			
 
				 		return _get_platform_error()
			
 
				 	}
			
 
				 	return nil
			
@@ -800,19 +813,11 @@ _chtimes :: proc(name: string, atime, mtime: time.Time) -> Error {
 
				 	defer close(f)
			
 
				 	return _fchtimes(f, atime, mtime)
			
 
				 }
			
 
				+
			
 
				 _fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error {
			
 
				 	if f == nil || f.impl == nil {
			
 
				 		return nil
			
 
				 	}
			
 
				-	d: win32.BY_HANDLE_FILE_INFORMATION
			
 
				-	if !win32.GetFileInformationByHandle(_handle(f), &d) {
			
 
				-		return _get_platform_error()
			
 
				-	}
			
 
				-
			
 
				-	to_windows_time :: #force_inline proc(t: time.Time) -> win32.LARGE_INTEGER {
			
 
				-		// a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)
			
 
				-		return win32.LARGE_INTEGER(time.time_to_unix_nano(t) * 100 + 116444736000000000)
			
 
				-	}
			
 
				 
			
 
				 	atime, mtime := atime, mtime
			
 
				 	if time.time_to_unix_nano(atime) < time.time_to_unix_nano(mtime) {
			
@@ -820,9 +825,9 @@ _fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error {
 
				 	}
			
 
				 
			
 
				 	info: win32.FILE_BASIC_INFO
			
 
				-	info.LastAccessTime = to_windows_time(atime)
			
 
				-	info.LastWriteTime  = to_windows_time(mtime)
			
 
				-	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) {
			
 
				+	info.LastAccessTime = time_as_filetime(atime)
			
 
				+	info.LastWriteTime  = time_as_filetime(mtime)
			
 
				+	if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) {
			
 
				 		return _get_platform_error()
			
 
				 	}
			
 
				 	return nil
			
--- a/core/os/os2/process_linux.odin
+++ b/core/os/os2/process_linux.odin
@@ -162,7 +162,7 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args, .Executable_Path} != {} {
			
 
				+	cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args} != {} {
			
 
				 		strings.builder_reset(&path_builder)
			
 
				 		strings.write_string(&path_builder, "/proc/")
			
 
				 		strings.write_int(&path_builder, pid)
			
@@ -178,12 +178,12 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
				 		terminator := strings.index_byte(cmdline, 0)
			
 
				 		assert(terminator > 0)
			
 
				 
			
 
				-		command_line_exec := cmdline[:terminator]
			
 
				+		// command_line_exec := cmdline[:terminator]
			
 
				 
			
 
				 		// Still need cwd if the execution on the command line is relative.
			
 
				 		cwd: string
			
 
				 		cwd_err: Error
			
 
				-		if .Working_Dir in selection || (.Executable_Path in selection && command_line_exec[0] != '/') {
			
 
				+		if .Working_Dir in selection {
			
 
				 			strings.builder_reset(&path_builder)
			
 
				 			strings.write_string(&path_builder, "/proc/")
			
 
				 			strings.write_int(&path_builder, pid)
			
@@ -199,18 +199,6 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		if .Executable_Path in selection {
			
 
				-			if cmdline[0] == '/' {
			
 
				-				info.executable_path = strings.clone(cmdline[:terminator], allocator) or_return
			
 
				-				info.fields += {.Executable_Path}
			
 
				-			} else if cwd_err == nil {
			
 
				-				info.executable_path = join_path({ cwd, cmdline[:terminator] }, allocator) or_return
			
 
				-				info.fields += {.Executable_Path}
			
 
				-			} else {
			
 
				-				break cmdline_if
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				 		if selection & {.Command_Line, .Command_Args} != {} {
			
 
				 			// skip to first arg
			
 
				 			//cmdline = cmdline[terminator + 1:]
			
@@ -323,6 +311,30 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if .Executable_Path in selection {
			
 
				+		/*
			
 
				+		NOTE(Jeroen):
			
 
				+
			
 
				+		The old version returned the wrong executable path for things like `bash` or `sh`,
			
 
				+		for whom `/proc/<pid>/cmdline` will just report "bash" or "sh",
			
 
				+		resulting in misleading paths like `$PWD/sh`, even though that executable doesn't exist there.
			
 
				+
			
 
				+		Thanks to Yawning for suggesting `/proc/self/exe`.
			
 
				+		*/
			
 
				+
			
 
				+		strings.builder_reset(&path_builder)
			
 
				+		strings.write_string(&path_builder, "/proc/")
			
 
				+		strings.write_int(&path_builder, pid)
			
 
				+		strings.write_string(&path_builder, "/exe")
			
 
				+
			
 
				+		if exe_bytes, exe_err := _read_link(strings.to_string(path_builder), temp_allocator()); exe_err == nil {
			
 
				+			info.executable_path = strings.clone(string(exe_bytes), allocator) or_return
			
 
				+			info.fields += {.Executable_Path}
			
 
				+		} else {
			
 
				+			err = exe_err
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	if .Environment in selection {
			
 
				 		strings.builder_reset(&path_builder)
			
 
				 		strings.write_string(&path_builder, "/proc/")
			
--- a/core/os/os2/stat_windows.odin
+++ b/core/os/os2/stat_windows.odin
@@ -212,11 +212,15 @@ _file_type_from_create_file :: proc(wname: win32.wstring, create_file_attributes
 
				 }
			
 
				 
			
 
				 _file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: win32.HANDLE, ReparseTag: win32.DWORD) -> (type: File_Type, mode: int) {
			
 
				-	if file_attributes & win32.FILE_ATTRIBUTE_READONLY != 0 {
			
 
				-		mode |= 0o444
			
 
				-	} else {
			
 
				-		mode |= 0o666
			
 
				-	}
			
 
				+	// NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`.
			
 
				+	//               Let's not do so for Windows for `chmod` or `read_directory_iterator` either.
			
 
				+	//               They're *not* portable between Windows and non-Windows platforms.
			
 
				+	//
			
 
				+	//               It also leads to information loss as flags like Archive, Hidden and System have no equivalent there.
			
 
				+	//               We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend
			
 
				+	//               that 0o644 is meaningful when returned as a mode.
			
 
				+	//               `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444.
			
 
				+	mode = int(file_attributes)
			
 
				 
			
 
				 	is_sym := false
			
 
				 	if file_attributes & win32.FILE_ATTRIBUTE_REPARSE_POINT == 0 {
			
@@ -229,21 +233,36 @@ _file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: wi
 
				 		type = .Symlink
			
 
				 	} else if file_attributes & win32.FILE_ATTRIBUTE_DIRECTORY != 0 {
			
 
				 		type = .Directory
			
 
				-		mode |= 0o111
			
 
				 	} else if h != nil {
			
 
				 		type = file_type(h)
			
 
				 	}
			
 
				 	return
			
 
				 }
			
 
				 
			
 
				+// a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)
			
 
				+time_as_filetime :: #force_inline proc(t: time.Time) -> (ft: win32.LARGE_INTEGER) {
			
 
				+	win := u64(t._nsec / 100) + 116444736000000000
			
 
				+	return win32.LARGE_INTEGER(win)
			
 
				+}
			
 
				+
			
 
				+filetime_as_time_li :: #force_inline proc(ft: win32.LARGE_INTEGER) -> (t: time.Time) {
			
 
				+	return {_nsec=(i64(ft) - 116444736000000000) * 100}
			
 
				+}
			
 
				+
			
 
				+filetime_as_time_ft :: #force_inline proc(ft: win32.FILETIME) -> (t: time.Time) {
			
 
				+	return filetime_as_time_li(win32.LARGE_INTEGER(ft.dwLowDateTime) + win32.LARGE_INTEGER(ft.dwHighDateTime) << 32)
			
 
				+}
			
 
				+
			
 
				+filetime_as_time :: proc{filetime_as_time_ft, filetime_as_time_li}
			
 
				+
			
 
				 _file_info_from_win32_file_attribute_data :: proc(d: ^win32.WIN32_FILE_ATTRIBUTE_DATA, name: string, allocator: runtime.Allocator) -> (fi: File_Info, e: Error) {
			
 
				 	fi.size = i64(d.nFileSizeHigh)<<32 + i64(d.nFileSizeLow)
			
 
				 	type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0)
			
 
				 	fi.type = type
			
 
				 	fi.mode |= mode
			
 
				-	fi.creation_time     = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
			
 
				-	fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
			
 
				-	fi.access_time       = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
			
 
				+	fi.creation_time     = filetime_as_time(d.ftCreationTime)
			
 
				+	fi.modification_time = filetime_as_time(d.ftLastWriteTime)
			
 
				+	fi.access_time       = filetime_as_time(d.ftLastAccessTime)
			
 
				 	fi.fullpath, e = full_path_from_name(name, allocator)
			
 
				 	fi.name = basename(fi.fullpath)
			
 
				 	return
			
@@ -254,9 +273,9 @@ _file_info_from_win32_find_data :: proc(d: ^win32.WIN32_FIND_DATAW, name: string
 
				 	type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0)
			
 
				 	fi.type = type
			
 
				 	fi.mode |= mode
			
 
				-	fi.creation_time     = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
			
 
				-	fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
			
 
				-	fi.access_time       = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
			
 
				+	fi.creation_time     = filetime_as_time(d.ftCreationTime)
			
 
				+	fi.modification_time = filetime_as_time(d.ftLastWriteTime)
			
 
				+	fi.access_time       = filetime_as_time(d.ftLastAccessTime)
			
 
				 	fi.fullpath, e = full_path_from_name(name, allocator)
			
 
				 	fi.name = basename(fi.fullpath)
			
 
				 	return
			
@@ -286,9 +305,9 @@ _file_info_from_get_file_information_by_handle :: proc(path: string, h: win32.HA
 
				 	type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, h, 0)
			
 
				 	fi.type = type
			
 
				 	fi.mode |= mode
			
 
				-	fi.creation_time     = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
			
 
				-	fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
			
 
				-	fi.access_time       = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
			
 
				+	fi.creation_time     = filetime_as_time(d.ftCreationTime)
			
 
				+	fi.modification_time = filetime_as_time(d.ftLastWriteTime)
			
 
				+	fi.access_time       = filetime_as_time(d.ftLastAccessTime)
			
 
				 	return fi, nil
			
 
				 }
			
 
				 
			
--- a/core/simd/simd.odin
+++ b/core/simd/simd.odin
@@ -1759,7 +1759,7 @@ Returns:
 
				 replace :: intrinsics.simd_replace
			
 
				 
			
 
				 /*
			
 
				-Reduce a vector to a scalar by adding up all the lanes in an ordered fashion.
			
 
				+Reduce a vector to a scalar by adding up all the lanes.
			
 
				 
			
 
				 This procedure returns a scalar that is the ordered sum of all lanes. The
			
 
				 ordered sum may be important for accounting for precision errors in
			
@@ -2511,460 +2511,16 @@ recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where int
 
				 	return T(1) / v
			
 
				 }
			
 
				 
			
 
				+
			
 
				 /*
			
 
				 Create a vector where each lane contains the index of that lane.
			
 
				-
			
 
				 Inputs:
			
 
				 - `V`: The type of the vector to create.
			
 
				-
			
 
				 Result:
			
 
				 - A vector of the given type, where each lane contains the index of that lane.
			
 
				-
			
 
				 **Operation**:
			
 
				-
			
 
				 	for i in 0 ..< N {
			
 
				 		res[i] = i
			
 
				 	}
			
 
				 */
			
 
				-indices :: #force_inline proc "contextless" ($V: typeid/#simd[$N]$E) -> V where intrinsics.type_is_numeric(E) {
			
 
				-	when N == 1 {
			
 
				-		return {0}
			
 
				-	} else when N == 2 {
			
 
				-		return {0, 1}
			
 
				-	} else when N == 4 {
			
 
				-		return {0, 1, 2, 3}
			
 
				-	} else when N == 8 {
			
 
				-		return {0, 1, 2, 3, 4, 5, 6, 7}
			
 
				-	} else when N == 16 {
			
 
				-		return {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
			
 
				-	} else when N == 32 {
			
 
				-		return {
			
 
				-			0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
			
 
				-			16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
			
 
				-		}
			
 
				-	} else when N == 64 {
			
 
				-		return {
			
 
				-			0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
			
 
				-			16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
			
 
				-			32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
			
 
				-			48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
			
 
				-		}
			
 
				-	} else {
			
 
				-		#panic("Unsupported vector size!")
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-Reduce a vector to a scalar by adding up all the lanes in a pairwise fashion.
			
 
				-
			
 
				-This procedure returns a scalar that is the sum of all lanes, calculated by
			
 
				-adding each even-indexed element with the following odd-indexed element to
			
 
				-produce N/2 values. This is repeated until only a single element remains. This
			
 
				-order is supported by hardware instructions for some types/architectures (e.g.
			
 
				-i16/i32/f32/f64 on x86 SSE, i8/i16/i32/f32 on ARM NEON).
			
 
				-
			
 
				-The order of the sum may be important for accounting for precision errors in
			
 
				-floating-point computation, as floating-point addition is not associative, that
			
 
				-is `(a+b)+c` may not be equal to `a+(b+c)`.
			
 
				-
			
 
				-Inputs:
			
 
				-- `v`: The vector to reduce.
			
 
				-
			
 
				-Result:
			
 
				-- Sum of all lanes, as a scalar.
			
 
				-
			
 
				-**Operation**:
			
 
				-
			
 
				-	for n > 1 {
			
 
				-		n = n / 2
			
 
				-		for i in 0 ..< n {
			
 
				-			a[i] = a[2*i+0] + a[2*i+1]
			
 
				-		}
			
 
				-	}
			
 
				-	res := a[0]
			
 
				-
			
 
				-Graphical representation of the operation for N=4:
			
 
				-
			
 
				-	   +-----------------------+
			
 
				-	v: | v0  | v1  | v2  | v3  |
			
 
				-	   +-----------------------+
			
 
				-	      |     |     |     |
			
 
				-	      `>[+]<'     `>[+]<'
			
 
				-	         |           |
			
 
				-	         `--->[+]<--'
			
 
				-	               |
			
 
				-	               v
			
 
				-	            +-----+
			
 
				-	    result: | y0  |
			
 
				-	            +-----+
			
 
				-*/
			
 
				-reduce_add_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
			
 
				-	where intrinsics.type_is_numeric(E) {
			
 
				-	when N == 64 { v64 := v }
			
 
				-	when N == 32 { v32 := v }
			
 
				-	when N == 16 { v16 := v }
			
 
				-	when N == 8  { v8 := v }
			
 
				-	when N == 4  { v4 := v }
			
 
				-	when N == 2  { v2 := v }
			
 
				-
			
 
				-	when N >= 64 {
			
 
				-		x32 := swizzle(v64,
			
 
				-			0,  2,  4,  6,  8,  10, 12, 14,
			
 
				-			16, 18, 20, 22, 24, 26, 28, 30,
			
 
				-			32, 34, 36, 38, 40, 42, 44, 46,
			
 
				-			48, 50, 52, 54, 56, 58, 60, 62)
			
 
				-		y32 := swizzle(v64,
			
 
				-			1,  3,  5,  7,  9,  11, 13, 15,
			
 
				-			17, 19, 21, 23, 25, 27, 29, 31,
			
 
				-			33, 35, 37, 39, 41, 43, 45, 47,
			
 
				-			49, 51, 53, 55, 57, 59, 61, 63)
			
 
				-		v32 := x32 + y32
			
 
				-	}
			
 
				-
			
 
				-	when N >= 32 {
			
 
				-		x16 := swizzle(v32,
			
 
				-			0,  2,  4,  6,  8,  10, 12, 14,
			
 
				-			16, 18, 20, 22, 24, 26, 28, 30)
			
 
				-		y16 := swizzle(v32,
			
 
				-			1,  3,  5,  7,  9,  11, 13, 15,
			
 
				-			17, 19, 21, 23, 25, 27, 29, 31)
			
 
				-		v16 := x16 + y16
			
 
				-	}
			
 
				-
			
 
				-	when N >= 16 {
			
 
				-		x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14)
			
 
				-		y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15)
			
 
				-		v8 := x8 + y8
			
 
				-	}
			
 
				-
			
 
				-	when N >= 8 {
			
 
				-		x4 := swizzle(v8, 0, 2, 4, 6)
			
 
				-		y4 := swizzle(v8, 1, 3, 5, 7)
			
 
				-		v4 := x4 + y4
			
 
				-	}
			
 
				-
			
 
				-	when N >= 4 {
			
 
				-		x2 := swizzle(v4, 0, 2)
			
 
				-		y2 := swizzle(v4, 1, 3)
			
 
				-		v2 := x2 + y2
			
 
				-	}
			
 
				-
			
 
				-	when N >= 2 {
			
 
				-		return extract(v2, 0) + extract(v2, 1)
			
 
				-	} else {
			
 
				-		return extract(v, 0)
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-Reduce a vector to a scalar by adding up all the lanes in a bisecting fashion.
			
 
				-
			
 
				-This procedure returns a scalar that is the sum of all lanes, calculated by
			
 
				-bisecting the vector into two parts, where the first contains lanes [0, N/2)
			
 
				-and the second contains lanes [N/2, N), and adding the two halves element-wise
			
 
				-to produce N/2 values. This is repeated until only a single element remains.
			
 
				-This order may be faster to compute than the ordered sum for floats, as it can
			
 
				-often be better parallelized.
			
 
				-
			
 
				-The order of the sum may be important for accounting for precision errors in
			
 
				-floating-point computation, as floating-point addition is not associative, that
			
 
				-is `(a+b)+c` may not be equal to `a+(b+c)`.
			
 
				-
			
 
				-Inputs:
			
 
				-- `v`: The vector to reduce.
			
 
				-
			
 
				-Result:
			
 
				-- Sum of all lanes, as a scalar.
			
 
				-
			
 
				-**Operation**:
			
 
				-
			
 
				-	for n > 1 {
			
 
				-		n = n / 2
			
 
				-		for i in 0 ..< n {
			
 
				-			a[i] += a[i+n]
			
 
				-		}
			
 
				-	}
			
 
				-	res := a[0]
			
 
				-
			
 
				-Graphical representation of the operation for N=4:
			
 
				-
			
 
				-	     +-----------------------+
			
 
				-	     | v0  | v1  | v2  | v3  |
			
 
				-	     +-----------------------+
			
 
				-	        |     |     |     |
			
 
				-	       [+]<-- | ---'      |
			
 
				-	        |    [+]<--------'
			
 
				-	        |     |
			
 
				-	        `>[+]<'
			
 
				-	           |
			
 
				-	           v
			
 
				-	        +-----+
			
 
				-	result: | y0  |
			
 
				-	        +-----+
			
 
				-*/
			
 
				-reduce_add_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
			
 
				-	where intrinsics.type_is_numeric(E) {
			
 
				-	when N == 64 { v64 := v }
			
 
				-	when N == 32 { v32 := v }
			
 
				-	when N == 16 { v16 := v }
			
 
				-	when N == 8  { v8 := v }
			
 
				-	when N == 4  { v4 := v }
			
 
				-	when N == 2  { v2 := v }
			
 
				-
			
 
				-	when N >= 64 {
			
 
				-		x32 := swizzle(v64,
			
 
				-			0,  1,  2,  3,  4,  5,  6,  7,
			
 
				-			8,  9,  10, 11, 12, 13, 14, 15,
			
 
				-			16, 17, 18, 19, 20, 21, 22, 23,
			
 
				-			24, 25, 26, 27, 28, 29, 30, 31)
			
 
				-		y32 := swizzle(v64,
			
 
				-			32, 33, 34, 35, 36, 37, 38, 39,
			
 
				-			40, 41, 42, 43, 44, 45, 46, 47,
			
 
				-			48, 49, 50, 51, 52, 53, 54, 55,
			
 
				-			56, 57, 58, 59, 60, 61, 62, 63)
			
 
				-		v32 := x32 + y32
			
 
				-	}
			
 
				-
			
 
				-	when N >= 32 {
			
 
				-		x16 := swizzle(v32,
			
 
				-			0,  1,  2,  3,  4,  5,  6,  7,
			
 
				-			8,  9,  10, 11, 12, 13, 14, 15)
			
 
				-		y16 := swizzle(v32,
			
 
				-			16, 17, 18, 19, 20, 21, 22, 23,
			
 
				-			24, 25, 26, 27, 28, 29, 30, 31)
			
 
				-		v16 := x16 + y16
			
 
				-	}
			
 
				-
			
 
				-	when N >= 16 {
			
 
				-		x8 := swizzle(v16, 0, 1, 2,  3,  4,  5,  6,  7)
			
 
				-		y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15)
			
 
				-		v8 := x8 + y8
			
 
				-	}
			
 
				-
			
 
				-	when N >= 8 {
			
 
				-		x4 := swizzle(v8, 0, 1, 2, 3)
			
 
				-		y4 := swizzle(v8, 4, 5, 6, 7)
			
 
				-		v4 := x4 + y4
			
 
				-	}
			
 
				-
			
 
				-	when N >= 4 {
			
 
				-		x2 := swizzle(v4, 0, 1)
			
 
				-		y2 := swizzle(v4, 2, 3)
			
 
				-		v2 := x2 + y2
			
 
				-	}
			
 
				-
			
 
				-	when N >= 2 {
			
 
				-		return extract(v2, 0) + extract(v2, 1)
			
 
				-	} else {
			
 
				-		return extract(v, 0)
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-Reduce a vector to a scalar by multiplying all the lanes in a pairwise fashion.
			
 
				-
			
 
				-This procedure returns a scalar that is the product of all lanes, calculated by
			
 
				-bisecting the vector into two parts, where the first contains lanes [0, N/2)
			
 
				-and the second contains lanes [N/2, N), and multiplying the two halves together
			
 
				-multiplying each even-indexed element with the following odd-indexed element to
			
 
				-produce N/2 values. This is repeated until only a single element remains. This
			
 
				-order may be faster to compute than the ordered product for floats, as it can
			
 
				-often be better parallelized.
			
 
				-
			
 
				-The order of the product may be important for accounting for precision errors
			
 
				-in floating-point computation, as floating-point multiplication is not
			
 
				-associative, that is `(a*b)*c` may not be equal to `a*(b*c)`.
			
 
				-
			
 
				-Inputs:
			
 
				-- `v`: The vector to reduce.
			
 
				-
			
 
				-Result:
			
 
				-- Product of all lanes, as a scalar.
			
 
				-
			
 
				-**Operation**:
			
 
				-
			
 
				-	for n > 1 {
			
 
				-		n = n / 2
			
 
				-		for i in 0 ..< n {
			
 
				-			a[i] = a[2*i+0] * a[2*i+1]
			
 
				-		}
			
 
				-	}
			
 
				-	res := a[0]
			
 
				-
			
 
				-Graphical representation of the operation for N=4:
			
 
				-
			
 
				-	   +-----------------------+
			
 
				-	v: | v0  | v1  | v2  | v3  |
			
 
				-	   +-----------------------+
			
 
				-	      |     |     |     |
			
 
				-	      `>[x]<'     `>[x]<'
			
 
				-	         |           |
			
 
				-	         `--->[x]<--'
			
 
				-	               |
			
 
				-	               v
			
 
				-	            +-----+
			
 
				-	    result: | y0  |
			
 
				-	            +-----+
			
 
				-*/
			
 
				-reduce_mul_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
			
 
				-	where intrinsics.type_is_numeric(E) {
			
 
				-	when N == 64 { v64 := v }
			
 
				-	when N == 32 { v32 := v }
			
 
				-	when N == 16 { v16 := v }
			
 
				-	when N == 8  { v8 := v }
			
 
				-	when N == 4  { v4 := v }
			
 
				-	when N == 2  { v2 := v }
			
 
				-
			
 
				-	when N >= 64 {
			
 
				-		x32 := swizzle(v64,
			
 
				-			0,  2,  4,  6,  8,  10, 12, 14,
			
 
				-			16, 18, 20, 22, 24, 26, 28, 30,
			
 
				-			32, 34, 36, 38, 40, 42, 44, 46,
			
 
				-			48, 50, 52, 54, 56, 58, 60, 62)
			
 
				-		y32 := swizzle(v64,
			
 
				-			1,  3,  5,  7,  9,  11, 13, 15,
			
 
				-			17, 19, 21, 23, 25, 27, 29, 31,
			
 
				-			33, 35, 37, 39, 41, 43, 45, 47,
			
 
				-			49, 51, 53, 55, 57, 59, 61, 63)
			
 
				-		v32 := x32 * y32
			
 
				-	}
			
 
				-
			
 
				-	when N >= 32 {
			
 
				-		x16 := swizzle(v32,
			
 
				-			0,  2,  4,  6,  8,  10, 12, 14,
			
 
				-			16, 18, 20, 22, 24, 26, 28, 30)
			
 
				-		y16 := swizzle(v32,
			
 
				-			1,  3,  5,  7,  9,  11, 13, 15,
			
 
				-			17, 19, 21, 23, 25, 27, 29, 31)
			
 
				-		v16 := x16 * y16
			
 
				-	}
			
 
				-
			
 
				-	when N >= 16 {
			
 
				-		x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14)
			
 
				-		y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15)
			
 
				-		v8 := x8 * y8
			
 
				-	}
			
 
				-
			
 
				-	when N >= 8 {
			
 
				-		x4 := swizzle(v8, 0, 2, 4, 6)
			
 
				-		y4 := swizzle(v8, 1, 3, 5, 7)
			
 
				-		v4 := x4 * y4
			
 
				-	}
			
 
				-
			
 
				-	when N >= 4 {
			
 
				-		x2 := swizzle(v4, 0, 2)
			
 
				-		y2 := swizzle(v4, 1, 3)
			
 
				-		v2 := x2 * y2
			
 
				-	}
			
 
				-
			
 
				-	when N >= 2 {
			
 
				-		return extract(v2, 0) * extract(v2, 1)
			
 
				-	} else {
			
 
				-		return extract(v, 0)
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				-Reduce a vector to a scalar by multiplying up all the lanes in a bisecting fashion.
			
 
				-
			
 
				-This procedure returns a scalar that is the product of all lanes, calculated by
			
 
				-bisecting the vector into two parts, where the first contains indices [0, N/2)
			
 
				-and the second contains indices [N/2, N), and multiplying the two halves
			
 
				-together element-wise to produce N/2 values. This is repeated until only a
			
 
				-single element remains. This order may be faster to compute than the ordered
			
 
				-product for floats, as it can often be better parallelized.
			
 
				-
			
 
				-The order of the product may be important for accounting for precision errors
			
 
				-in floating-point computation, as floating-point multiplication is not
			
 
				-associative, that is `(a*b)*c` may not be equal to `a*(b*c)`.
			
 
				-
			
 
				-Inputs:
			
 
				-- `v`: The vector to reduce.
			
 
				-
			
 
				-Result:
			
 
				-- Product of all lanes, as a scalar.
			
 
				-
			
 
				-**Operation**:
			
 
				-
			
 
				-	for n > 1 {
			
 
				-		n = n / 2
			
 
				-		for i in 0 ..< n {
			
 
				-			a[i] *= a[i+n]
			
 
				-		}
			
 
				-	}
			
 
				-	res := a[0]
			
 
				-
			
 
				-Graphical representation of the operation for N=4:
			
 
				-
			
 
				-	     +-----------------------+
			
 
				-	     | v0  | v1  | v2  | v3  |
			
 
				-	     +-----------------------+
			
 
				-	        |     |     |     |
			
 
				-	       [x]<-- | ---'      |
			
 
				-	        |    [x]<--------'
			
 
				-	        |     |
			
 
				-	        `>[x]<'
			
 
				-	           |
			
 
				-	           v
			
 
				-	        +-----+
			
 
				-	result: | y0  |
			
 
				-	        +-----+
			
 
				-*/
			
 
				-reduce_mul_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
			
 
				-	where intrinsics.type_is_numeric(E) {
			
 
				-	when N == 64 { v64 := v }
			
 
				-	when N == 32 { v32 := v }
			
 
				-	when N == 16 { v16 := v }
			
 
				-	when N == 8  { v8 := v }
			
 
				-	when N == 4  { v4 := v }
			
 
				-	when N == 2  { v2 := v }
			
 
				-
			
 
				-	when N >= 64 {
			
 
				-		x32 := swizzle(v64,
			
 
				-			0,  1,  2,  3,  4,  5,  6,  7,
			
 
				-			8,  9,  10, 11, 12, 13, 14, 15,
			
 
				-			16, 17, 18, 19, 20, 21, 22, 23,
			
 
				-			24, 25, 26, 27, 28, 29, 30, 31)
			
 
				-		y32 := swizzle(v64,
			
 
				-			32, 33, 34, 35, 36, 37, 38, 39,
			
 
				-			40, 41, 42, 43, 44, 45, 46, 47,
			
 
				-			48, 49, 50, 51, 52, 53, 54, 55,
			
 
				-			56, 57, 58, 59, 60, 61, 62, 63)
			
 
				-		v32 := x32 * y32
			
 
				-	}
			
 
				-
			
 
				-	when N >= 32 {
			
 
				-		x16 := swizzle(v32,
			
 
				-			0,  1,  2,  3,  4,  5,  6,  7,
			
 
				-			8,  9,  10, 11, 12, 13, 14, 15)
			
 
				-		y16 := swizzle(v32,
			
 
				-			16, 17, 18, 19, 20, 21, 22, 23,
			
 
				-			24, 25, 26, 27, 28, 29, 30, 31)
			
 
				-		v16 := x16 * y16
			
 
				-	}
			
 
				-
			
 
				-	when N >= 16 {
			
 
				-		x8 := swizzle(v16, 0, 1, 2,  3,  4,  5,  6,  7)
			
 
				-		y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15)
			
 
				-		v8 := x8 * y8
			
 
				-	}
			
 
				-
			
 
				-	when N >= 8 {
			
 
				-		x4 := swizzle(v8, 0, 1, 2, 3)
			
 
				-		y4 := swizzle(v8, 4, 5, 6, 7)
			
 
				-		v4 := x4 * y4
			
 
				-	}
			
 
				-
			
 
				-	when N >= 4 {
			
 
				-		x2 := swizzle(v4, 0, 1)
			
 
				-		y2 := swizzle(v4, 2, 3)
			
 
				-		v2 := x2 * y2
			
 
				-	}
			
 
				-
			
 
				-	when N >= 2 {
			
 
				-		return extract(v2, 0) * extract(v2, 1)
			
 
				-	} else {
			
 
				-		return extract(v, 0)
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				+indices :: intrinsics.simd_indices
			
--- a/core/sys/windows/user32.odin
+++ b/core/sys/windows/user32.odin
@@ -47,6 +47,8 @@ foreign user32 {
 
				 		lpParam: LPVOID,
			
 
				 	) -> HWND ---
			
 
				 
			
 
				+	GetWindowThreadProcessId :: proc(hwnd: HWND, lpdwProcessId: LPDWORD) -> DWORD ---
			
 
				+
			
 
				 	DestroyWindow :: proc(hWnd: HWND) -> BOOL ---
			
 
				 
			
 
				 	ShowWindow :: proc(hWnd: HWND, nCmdShow: INT) -> BOOL ---
			
--- a/src/check_builtin.cpp
+++ b/src/check_builtin.cpp
@@ -760,6 +760,36 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan
 
				 			return true;
			
 
				 		}
			
 
				 
			
 
				+	case BuiltinProc_simd_indices:
			
 
				+		{
			
 
				+			Operand x = {};
			
 
				+			check_expr_or_type(c, &x, ce->args[0], nullptr);
			
 
				+			if (x.mode == Addressing_Invalid) return false;
			
 
				+			if (x.mode != Addressing_Type) {
			
 
				+				gbString s = expr_to_string(x.expr);
			
 
				+				error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s);
			
 
				+				gb_string_free(s);
			
 
				+				return false;
			
 
				+			}
			
 
				+			if (!is_type_simd_vector(x.type)) {
			
 
				+				gbString s = type_to_string(x.type);
			
 
				+				error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s);
			
 
				+				gb_string_free(s);
			
 
				+				return false;
			
 
				+			}
			
 
				+
			
 
				+			Type *elem = base_array_type(x.type);
			
 
				+			if (!is_type_numeric(elem)) {
			
 
				+				gbString s = type_to_string(x.type);
			
 
				+				error(x.expr, "'%.*s' expected a simd vector type with a numeric element type, got '%s'", LIT(builtin_name), s);
			
 
				+				gb_string_free(s);
			
 
				+			}
			
 
				+
			
 
				+			operand->mode = Addressing_Value;
			
 
				+			operand->type = x.type;
			
 
				+			return true;
			
 
				+		}
			
 
				+
			
 
				 	case BuiltinProc_simd_extract:
			
 
				 		{
			
 
				 			Operand x = {};
			
@@ -2059,6 +2089,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 
				 	case BuiltinProc_atomic_type_is_lock_free:
			
 
				 	case BuiltinProc_has_target_feature:
			
 
				 	case BuiltinProc_procedure_of:
			
 
				+	case BuiltinProc_simd_indices:
			
 
				 		// NOTE(bill): The first arg may be a Type, this will be checked case by case
			
 
				 		break;
			
 
				 
			
@@ -6001,12 +6032,13 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
 
				 			
			
 
				 			// NOTE(jakubtomsu): forces calculation of variant_block_size
			
 
				 			type_size_of(u);
			
 
				-			i64 tag_offset = u->Union.variant_block_size;
			
 
				-			GB_ASSERT(tag_offset > 0);
			
 
				+			// NOTE(Jeroen): A tag offset of zero is perfectly fine if all members of the union are empty structs.
			
 
				+			//               What matters is that the tag size is > 0.
			
 
				+			GB_ASSERT(u->Union.tag_size > 0);
			
 
				 			
			
 
				 			operand->mode = Addressing_Constant;
			
 
				 			operand->type = t_untyped_integer;
			
 
				-			operand->value = exact_value_i64(tag_offset);
			
 
				+			operand->value = exact_value_i64(u->Union.variant_block_size);
			
 
				 		}
			
 
				 		break;
			
 
				 
			
--- a/src/check_expr.cpp
+++ b/src/check_expr.cpp
@@ -2910,9 +2910,20 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
 
				 		if (!defined) {
			
 
				 			gbString xs = type_to_string(x->type, temporary_allocator());
			
 
				 			gbString ys = type_to_string(y->type, temporary_allocator());
			
 
				-			err_str = gb_string_make(temporary_allocator(),
			
 
				-				gb_bprintf("operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys)
			
 
				-			);
			
 
				+
			
 
				+			if (!is_type_comparable(x->type)) {
			
 
				+				err_str = gb_string_make(temporary_allocator(),
			
 
				+					gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", xs, LIT(token_strings[op]))
			
 
				+				);
			
 
				+			} else if (!is_type_comparable(y->type)) {
			
 
				+				err_str = gb_string_make(temporary_allocator(),
			
 
				+					gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", ys, LIT(token_strings[op]))
			
 
				+				);
			
 
				+			} else {
			
 
				+				err_str = gb_string_make(temporary_allocator(),
			
 
				+					gb_bprintf("Operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys)
			
 
				+				);
			
 
				+			}
			
 
				 		} else {
			
 
				 			Type *comparison_type = x->type;
			
 
				 			if (x->type == err_type && is_operand_nil(*x)) {
			
@@ -2933,11 +2944,11 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
 
				 		} else {
			
 
				 			yt = type_to_string(y->type);
			
 
				 		}
			
 
				-		err_str = gb_string_make(temporary_allocator(), gb_bprintf("mismatched types '%s' and '%s'", xt, yt));
			
 
				+		err_str = gb_string_make(temporary_allocator(), gb_bprintf("Mismatched types '%s' and '%s'", xt, yt));
			
 
				 	}
			
 
				 
			
 
				 	if (err_str != nullptr) {
			
 
				-		error(node, "Cannot compare expression, %s", err_str);
			
 
				+		error(node, "Cannot compare expression. %s.", err_str);
			
 
				 		x->type = t_untyped_bool;
			
 
				 	} else {
			
 
				 		if (x->mode == Addressing_Constant &&
			
--- a/src/checker_builtin_procs.hpp
+++ b/src/checker_builtin_procs.hpp
@@ -205,6 +205,9 @@ BuiltinProc__simd_begin,
 
				 	BuiltinProc_simd_masked_expand_load,
			
 
				 	BuiltinProc_simd_masked_compress_store,
			
 
				 
			
 
				+	BuiltinProc_simd_indices,
			
 
				+
			
 
				+
			
 
				 	// Platform specific SIMD intrinsics
			
 
				 	BuiltinProc_simd_x86__MM_SHUFFLE,
			
 
				 BuiltinProc__simd_end,
			
@@ -551,6 +554,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
 
				 	{STR_LIT("simd_masked_expand_load"),    3, false, Expr_Expr, BuiltinProcPkg_intrinsics},
			
 
				 	{STR_LIT("simd_masked_compress_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics},
			
 
				 
			
 
				+	{STR_LIT("simd_indices"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
			
 
				+
			
 
				 	{STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics},
			
 
				 
			
 
				 	{STR_LIT(""), 0, false, Expr_Stmt, BuiltinProcPkg_intrinsics},
			
--- a/src/llvm_backend_proc.cpp
+++ b/src/llvm_backend_proc.cpp
@@ -1293,6 +1293,23 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
 
				 	lbValue res = {};
			
 
				 	res.type = tv.type;
			
 
				 
			
 
				+	switch (builtin_id) {
			
 
				+	case BuiltinProc_simd_indices: {
			
 
				+		Type *type = base_type(res.type);
			
 
				+		GB_ASSERT(type->kind == Type_SimdVector);
			
 
				+		Type *elem = type->SimdVector.elem;
			
 
				+
			
 
				+		i64 count = type->SimdVector.count;
			
 
				+		LLVMValueRef *scalars = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
			
 
				+		for (i64 i = 0; i < count; i++) {
			
 
				+			scalars[i] = lb_const_value(m, elem, exact_value_i64(i)).value;
			
 
				+		}
			
 
				+
			
 
				+		res.value = LLVMConstVector(scalars, cast(unsigned)count);
			
 
				+		return res;
			
 
				+	}
			
 
				+	}
			
 
				+
			
 
				 	lbValue arg0 = {}; if (ce->args.count > 0) arg0 = lb_build_expr(p, ce->args[0]);
			
 
				 	lbValue arg1 = {}; if (ce->args.count > 1) arg1 = lb_build_expr(p, ce->args[1]);
			
 
				 	lbValue arg2 = {}; if (ce->args.count > 2) arg2 = lb_build_expr(p, ce->args[2]);
			
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -4108,10 +4108,10 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
 
				 		}
			
 
				 
			
 
				 		i64 max = 0;
			
 
				-		i64 field_size = 0;
			
 
				 
			
 
				 		for_array(i, t->Union.variants) {
			
 
				 			Type *variant_type = t->Union.variants[i];
			
 
				+
			
 
				 			i64 size = type_size_of_internal(variant_type, path);
			
 
				 			if (max < size) {
			
 
				 				max = size;
			
@@ -4130,7 +4130,7 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
 
				 			size = align_formula(max, tag_size);
			
 
				 			// NOTE(bill): Calculate the padding between the common fields and the tag
			
 
				 			t->Union.tag_size = cast(i16)tag_size;
			
 
				-			t->Union.variant_block_size = size - field_size;
			
 
				+			t->Union.variant_block_size = size;
			
 
				 
			
 
				 			size += tag_size;
			
 
				 		}